001// Licensed under the Apache License, Version 2.0 (the "License");
002// you may not use this file except in compliance with the License.
003// You may obtain a copy of the License at
004//
005//     http://www.apache.org/licenses/LICENSE-2.0
006//
007// Unless required by applicable law or agreed to in writing, software
008// distributed under the License is distributed on an "AS IS" BASIS,
009// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
010// See the License for the specific language governing permissions and
011// limitations under the License.
012
013package org.apache.tapestry5.internal.services;
014
015import org.apache.tapestry5.internal.parser.*;
016import org.apache.tapestry5.ioc.Location;
017import org.apache.tapestry5.ioc.Resource;
018import org.apache.tapestry5.ioc.internal.util.CollectionFactory;
019import org.apache.tapestry5.ioc.internal.util.InternalUtils;
020import org.apache.tapestry5.ioc.internal.util.TapestryException;
021import org.apache.tapestry5.ioc.util.ExceptionUtils;
022
023import javax.xml.namespace.QName;
024import java.net.URL;
025import java.util.List;
026import java.util.Map;
027import java.util.Set;
028import java.util.regex.Matcher;
029import java.util.regex.Pattern;
030
031import static org.apache.tapestry5.internal.services.SaxTemplateParser.Version.*;
032
033/**
034 * SAX-based template parser logic, taking a {@link Resource} to a Tapestry
035 * template file and returning
036 * a {@link ComponentTemplate}.
037 *
038 * Earlier versions of this code used the StAX (streaming XML parser), but that
039 * was really, really bad for Google App Engine. This version uses SAX under the
040 * covers, but kind of replicates the important bits of the StAX API as
041 * {@link XMLTokenStream}.
042 *
043 * @since 5.2.0
044 */
045@SuppressWarnings(
046        {"JavaDoc"})
047public class SaxTemplateParser
048{
049    private static final String MIXINS_ATTRIBUTE_NAME = "mixins";
050
051    private static final String TYPE_ATTRIBUTE_NAME = "type";
052
053    private static final String ID_ATTRIBUTE_NAME = "id";
054
055    public static final String XML_NAMESPACE_URI = "http://www.w3.org/XML/1998/namespace";
056
057    private static final Map<String, Version> NAMESPACE_URI_TO_VERSION = CollectionFactory.newMap();
058
059    {
060        NAMESPACE_URI_TO_VERSION.put("http://tapestry.apache.org/schema/tapestry_5_0_0.xsd", T_5_0);
061        NAMESPACE_URI_TO_VERSION.put("http://tapestry.apache.org/schema/tapestry_5_1_0.xsd", T_5_1);
062        // 5.2 didn't change the schmea, so the 5_1_0.xsd was still used.
063        // 5.3 fixes an incorrect element name in the XSD ("replacement" should be "replace")
064        // The parser code here always expected "replace".
065        NAMESPACE_URI_TO_VERSION.put("http://tapestry.apache.org/schema/tapestry_5_3.xsd", T_5_3);
066        // 5.4 is pretty much the same as 5.3, but allows block inside extend
067        // as per TAP5-1847
068        NAMESPACE_URI_TO_VERSION.put("http://tapestry.apache.org/schema/tapestry_5_4.xsd", T_5_4);
069    }
070
071    /**
072     * Special namespace used to denote Block parameters to components, as a
073     * (preferred) alternative to the t:parameter
074     * element. The simple element name is the name of the parameter.
075     */
076    private static final String TAPESTRY_PARAMETERS_URI = "tapestry:parameter";
077
078    /**
079     * URI prefix used to identify a Tapestry library, the remainder of the URI
080     * becomes a prefix on the element name.
081     */
082    private static final String LIB_NAMESPACE_URI_PREFIX = "tapestry-library:";
083
084    /**
085     * Pattern used to parse the path portion of the library namespace URI. A
086     * series of simple identifiers with slashes
087     * allowed as seperators.
088     */
089
090    private static final Pattern LIBRARY_PATH_PATTERN = Pattern.compile("^[a-z]\\w*(/[a-z]\\w*)*$",
091            Pattern.CASE_INSENSITIVE);
092
093    private static final Pattern ID_PATTERN = Pattern.compile("^[a-z]\\w*$",
094            Pattern.CASE_INSENSITIVE);
095
096    /**
097     * Any amount of mixed simple whitespace (space, tab, form feed) mixed with
098     * at least one carriage return or line
099     * feed, followed by any amount of whitespace. Will be reduced to a single
100     * linefeed.
101     */
102    private static final Pattern REDUCE_LINEBREAKS_PATTERN = Pattern.compile(
103            "[ \\t\\f]*[\\r\\n]\\s*", Pattern.MULTILINE);
104
105    /**
106     * Used when compressing whitespace, matches any sequence of simple
107     * whitespace (space, tab, formfeed). Applied after
108     * REDUCE_LINEBREAKS_PATTERN.
109     */
110    private static final Pattern REDUCE_WHITESPACE_PATTERN = Pattern.compile("[ \\t\\f]+",
111            Pattern.MULTILINE);
112
113    // Note the use of the non-greedy modifier; this prevents the pattern from
114    // merging multiple
115    // expansions on the same text line into a single large
116    // but invalid expansion.
117
118    private static final Pattern EXPANSION_PATTERN = Pattern.compile("\\$\\{\\s*(((?!\\$\\{).)*)\\s*}");
119    private static final char EXPANSION_STRING_DELIMITTER = '\'';
120    private static final char OPEN_BRACE = '{';
121    private static final char CLOSE_BRACE = '}';
122
123    private static final Set<String> MUST_BE_ROOT = CollectionFactory.newSet("extend", "container");
124
125    private final Resource resource;
126
127    private final XMLTokenStream tokenStream;
128
129    private final StringBuilder textBuffer = new StringBuilder();
130
131    private final List<TemplateToken> tokens = CollectionFactory.newList();
132
133    // This starts pointing at tokens but occasionally shifts to a list inside
134    // the overrides Map.
135    private List<TemplateToken> tokenAccumulator = tokens;
136
137    /**
138     * Primarily used as a set of componentIds (to check for duplicates and
139     * conflicts).
140     */
141    private final Map<String, Location> componentIds = CollectionFactory.newCaseInsensitiveMap();
142
143    /**
144     * Map from override id to a list of tokens; this actually works both for
145     * overrides defined by this template and
146     * overrides provided by this template.
147     */
148    private Map<String, List<TemplateToken>> overrides;
149
150    private boolean extension;
151
152    private Location textStartLocation;
153
154    private boolean active = true;
155
156    private boolean strictMixinParameters = false;
157
158    private final Map<String, Boolean> extensionPointIdSet = CollectionFactory.newCaseInsensitiveMap();
159
160    public SaxTemplateParser(Resource resource, Map<String, URL> publicIdToURL)
161    {
162        this.resource = resource;
163        this.tokenStream = new XMLTokenStream(resource, publicIdToURL);
164    }
165
166    public ComponentTemplate parse(boolean compressWhitespace)
167    {
168        try
169        {
170            tokenStream.parse();
171
172            TemplateParserState initialParserState = new TemplateParserState()
173                    .compressWhitespace(compressWhitespace);
174
175            root(initialParserState);
176
177            return new ComponentTemplateImpl(resource, tokens, componentIds, extension, strictMixinParameters, overrides);
178        } catch (Exception ex)
179        {
180            throw new TapestryException(String.format("Failure parsing template %s: %s", resource,
181                    ExceptionUtils.toMessage(ex)), tokenStream.getLocation(), ex);
182        }
183
184    }
185
186    void root(TemplateParserState state)
187    {
188        while (active && tokenStream.hasNext())
189        {
190            switch (tokenStream.next())
191            {
192                case DTD:
193
194                    dtd();
195
196                    break;
197
198                case START_ELEMENT:
199
200                    rootElement(state);
201
202                    break;
203
204                case END_DOCUMENT:
205                    // Ignore it.
206                    break;
207
208                default:
209                    textContent(state);
210            }
211        }
212    }
213
214    private void rootElement(TemplateParserState initialState)
215    {
216        TemplateParserState state = setupForElement(initialState);
217
218        String uri = tokenStream.getNamespaceURI();
219        String name = tokenStream.getLocalName();
220        Version version = NAMESPACE_URI_TO_VERSION.get(uri);
221
222        if (T_5_1.sameOrEarlier(version))
223        {
224            if (name.equalsIgnoreCase("extend"))
225            {
226                extend(state);
227                return;
228            }
229        }
230
231        if (version != null)
232        {
233            if (name.equalsIgnoreCase("container"))
234            {
235                container(state);
236                return;
237            }
238        }
239
240        element(state);
241    }
242
243    private void extend(TemplateParserState state)
244    {
245        extension = true;
246
247        while (active)
248        {
249            switch (tokenStream.next())
250            {
251                case START_ELEMENT:
252
253                    if (isTemplateVersion(Version.T_5_1) && isElementName("replace"))
254                    {
255                        replace(state);
256                        break;
257                    }
258
259                    boolean is54 = isTemplateVersion(Version.T_5_4);
260
261                    if (is54 && isElementName("block"))
262                    {
263                        block(state);
264                        break;
265                    }
266
267                    throw new RuntimeException(
268                            is54
269                                    ? "Child element of <extend> must be <replace> or <block>."
270                                    : "Child element of <extend> must be <replace>.");
271
272                case END_ELEMENT:
273
274                    return;
275
276                // Ignore spaces and comments directly inside <extend>.
277
278                case COMMENT:
279                case SPACE:
280                    break;
281
282                // Other non-whitespace content (characters, etc.) are forbidden.
283
284                case CHARACTERS:
285                    if (InternalUtils.isBlank(tokenStream.getText()))
286                        break;
287
288                default:
289                    unexpectedEventType();
290            }
291        }
292    }
293
294    /**
295     * Returns true if the <em>local name</em> is the element name (ignoring case).
296     */
297    private boolean isElementName(String elementName)
298    {
299        return tokenStream.getLocalName().equalsIgnoreCase(elementName);
300    }
301
302    /**
303     * Returns true if the template version is at least the required version.
304     */
305    private boolean isTemplateVersion(Version requiredVersion)
306    {
307        Version templateVersion = NAMESPACE_URI_TO_VERSION.get(tokenStream.getNamespaceURI());
308
309        return requiredVersion.sameOrEarlier(templateVersion);
310    }
311
312    private void replace(TemplateParserState state)
313    {
314        String id = getRequiredIdAttribute();
315
316        addContentToOverride(setupForElement(state), id);
317    }
318
319    private void unexpectedEventType()
320    {
321        XMLTokenType eventType = tokenStream.getEventType();
322
323        throw new IllegalStateException(String.format("Unexpected XML parse event %s.", eventType
324                .name()));
325    }
326
327    private void dtd()
328    {
329        DTDData dtdInfo = tokenStream.getDTDInfo();
330
331        tokenAccumulator.add(new DTDToken(dtdInfo.rootName, dtdInfo.publicId, dtdInfo
332                .systemId, getLocation()));
333    }
334
335    private Location getLocation()
336    {
337        return tokenStream.getLocation();
338    }
339
340    /**
341     * Processes an element through to its matching end tag.
342     *
343     * An element can be:
344     *
345     * a Tapestry component via &lt;t:type&gt;
346     *
347     * a Tapestry component via t:type="type" and/or t:id="id"
348     *
349     * a Tapestry component via a library namespace
350     *
351     * A parameter element via &lt;t:parameter&gt;
352     *
353     * A parameter element via &lt;p:name&gt;
354     *
355     * A &lt;t:remove&gt; element (in the 5.1 schema)
356     *
357     * A &lt;t:content&gt; element (in the 5.1 schema)
358     *
359     * A &lt;t:block&gt; element
360     *
361     * The body &lt;t:body&gt;
362     *
363     * An ordinary element
364     */
365    void element(TemplateParserState initialState)
366    {
367        TemplateParserState state = setupForElement(initialState);
368
369        String uri = tokenStream.getNamespaceURI();
370        String name = tokenStream.getLocalName();
371        Version version = NAMESPACE_URI_TO_VERSION.get(uri);
372
373        if (T_5_1.sameOrEarlier(version))
374        {
375
376            if (name.equalsIgnoreCase("remove"))
377            {
378                removeContent();
379
380                return;
381            }
382
383            if (name.equalsIgnoreCase("content"))
384            {
385                limitContent(state);
386
387                return;
388            }
389
390            if (name.equalsIgnoreCase("extension-point"))
391            {
392                extensionPoint(state);
393
394                return;
395            }
396
397            if (name.equalsIgnoreCase("replace"))
398            {
399                throw new RuntimeException(
400                        "The <replace> element may only appear directly within an extend element.");
401            }
402
403            if (MUST_BE_ROOT.contains(name))
404                mustBeRoot(name);
405        }
406
407        if (version != null)
408        {
409
410            if (name.equalsIgnoreCase("body"))
411            {
412                body();
413                return;
414            }
415
416            if (name.equalsIgnoreCase("container"))
417            {
418                mustBeRoot(name);
419            }
420
421            if (name.equalsIgnoreCase("block"))
422            {
423                block(state);
424                return;
425            }
426
427            if (name.equalsIgnoreCase("parameter"))
428            {
429                if (T_5_3.sameOrEarlier(version))
430                {
431                    throw new RuntimeException(
432                            String.format("The <parameter> element has been deprecated in Tapestry 5.3 in favour of '%s' namespace.", TAPESTRY_PARAMETERS_URI));
433                }
434
435                classicParameter(state);
436
437                return;
438            }
439
440            possibleTapestryComponent(state, null, tokenStream.getLocalName().replace('.', '/'));
441
442            return;
443        }
444
445        if (uri != null && uri.startsWith(LIB_NAMESPACE_URI_PREFIX))
446        {
447            libraryNamespaceComponent(state);
448
449            return;
450        }
451
452        if (TAPESTRY_PARAMETERS_URI.equals(uri))
453        {
454            parameterElement(state);
455
456            return;
457        }
458
459        // Just an ordinary element ... unless it has t:id or t:type
460
461        possibleTapestryComponent(state, tokenStream.getLocalName(), null);
462    }
463
464    /**
465     * Processes a body of an element including text and (recursively) nested
466     * elements. Adds an
467     * {@link org.apache.tapestry5.internal.parser.TokenType#END_ELEMENT} token
468     * before returning.
469     *
470     * @param state
471     */
472    private void processBody(TemplateParserState state)
473    {
474        while (active)
475        {
476            switch (tokenStream.next())
477            {
478                case START_ELEMENT:
479
480                    // The recursive part: when we see a new element start.
481
482                    element(state);
483                    break;
484
485                case END_ELEMENT:
486
487                    // At the end of an element, we're done and can return.
488                    // This is the matching end element for the start element
489                    // that invoked this method.
490
491                    endElement(state);
492
493                    return;
494
495                default:
496                    textContent(state);
497            }
498        }
499    }
500
501    private TemplateParserState setupForElement(TemplateParserState initialState)
502    {
503        processTextBuffer(initialState);
504
505        return checkForXMLSpaceAttribute(initialState);
506    }
507
508    /**
509     * Handles an extension point, putting a RenderExtension token in position
510     * in the template.
511     *
512     * @param state
513     */
514    private void extensionPoint(TemplateParserState state)
515    {
516        // An extension point adds a token that represents where the override
517        // (either the default
518        // provided in the parent template, or the true override from a child
519        // template) is positioned.
520
521        String id = getRequiredIdAttribute();
522
523        if (extensionPointIdSet.containsKey(id))
524        {
525            throw new TapestryException(String.format("Extension point '%s' is already defined for this template. Extension point ids must be unique.", id), getLocation(), null);
526        } else
527        {
528            extensionPointIdSet.put(id, true);
529        }
530
531        tokenAccumulator.add(new ExtensionPointToken(id, getLocation()));
532
533        addContentToOverride(state.insideComponent(false), id);
534    }
535
536    private String getRequiredIdAttribute()
537    {
538        String id = getSingleParameter("id");
539
540        if (InternalUtils.isBlank(id))
541            throw new RuntimeException(String.format("The <%s> element must have an id attribute.",
542                    tokenStream.getLocalName()));
543
544        return id;
545    }
546
547    private void addContentToOverride(TemplateParserState state, String id)
548
549    {
550        List<TemplateToken> savedTokenAccumulator = tokenAccumulator;
551
552        tokenAccumulator = CollectionFactory.newList();
553
554        // TODO: id should probably be unique; i.e., you either define an
555        // override or you
556        // provide an override, but you don't do both in the same template.
557
558        if (overrides == null)
559            overrides = CollectionFactory.newCaseInsensitiveMap();
560
561        overrides.put(id, tokenAccumulator);
562
563        while (active)
564        {
565            switch (tokenStream.next())
566            {
567                case START_ELEMENT:
568                    element(state);
569                    break;
570
571                case END_ELEMENT:
572
573                    processTextBuffer(state);
574
575                    // Restore everthing to how it was before the
576                    // extention-point was reached.
577
578                    tokenAccumulator = savedTokenAccumulator;
579                    return;
580
581                default:
582                    textContent(state);
583            }
584        }
585    }
586
587    private void mustBeRoot(String name)
588    {
589        throw new RuntimeException(String.format(
590                "Element <%s> is only valid as the root element of a template.", name));
591    }
592
593    /**
594     * Triggered by &lt;t:content&gt; element; limits template content to just
595     * what's inside.
596     */
597
598    private void limitContent(TemplateParserState state)
599    {
600        if (state.isCollectingContent())
601            throw new IllegalStateException(
602                    "The <content> element may not be nested within another <content> element.");
603
604        TemplateParserState newState = state.collectingContent().insideComponent(false);
605
606        // Clear out any tokens that precede the <t:content> element
607
608        tokens.clear();
609
610        // I'm not happy about this; you really shouldn't define overrides just
611        // to clear them out,
612        // but it is consistent. Perhaps this should be an error if overrides is
613        // non-empty.
614
615        overrides = null;
616
617        // Make sure that if the <t:content> appears inside a <t:replace> or
618        // <t:extension-point>, that
619        // it is still handled correctly.
620
621        tokenAccumulator = tokens;
622
623        while (active)
624        {
625            switch (tokenStream.next())
626            {
627                case START_ELEMENT:
628                    element(newState);
629                    break;
630
631                case END_ELEMENT:
632
633                    // The active flag is global, once we hit it, the entire
634                    // parse is aborted, leaving
635                    // tokens with just tokens defined inside <t:content>.
636
637                    processTextBuffer(newState);
638
639                    active = false;
640
641                    break;
642
643                default:
644                    textContent(state);
645            }
646        }
647
648    }
649
650    private void removeContent()
651    {
652        int depth = 1;
653
654        while (active)
655        {
656            switch (tokenStream.next())
657            {
658                case START_ELEMENT:
659                    depth++;
660                    break;
661
662                // The matching end element.
663
664                case END_ELEMENT:
665                    depth--;
666
667                    if (depth == 0)
668                        return;
669
670                    break;
671
672                default:
673                    // Ignore anything else (text, comments, etc.)
674            }
675        }
676    }
677
678    private String nullForBlank(String input)
679    {
680        return InternalUtils.isBlank(input) ? null : input;
681    }
682
683    /**
684     * Added in release 5.1.
685     */
686    private void libraryNamespaceComponent(TemplateParserState state)
687    {
688        String uri = tokenStream.getNamespaceURI();
689
690        // The library path is encoded into the namespace URI.
691
692        String path = uri.substring(LIB_NAMESPACE_URI_PREFIX.length());
693
694        if (!LIBRARY_PATH_PATTERN.matcher(path).matches())
695            throw new RuntimeException(String.format("The path portion of library namespace URI '%s' is not valid: it must be a simple identifier, or a series of identifiers seperated by slashes.", uri));
696
697        possibleTapestryComponent(state, null, path + "/" + tokenStream.getLocalName());
698    }
699
700    /**
701     * @param elementName
702     * @param identifiedType
703     *         the type of the element, usually null, but may be the
704     *         component type derived from element
705     */
706    private void possibleTapestryComponent(TemplateParserState state, String elementName,
707                                           String identifiedType)
708    {
709        String id = null;
710        String type = identifiedType;
711        String mixins = null;
712
713        int count = tokenStream.getAttributeCount();
714
715        Location location = getLocation();
716
717        List<TemplateToken> attributeTokens = CollectionFactory.newList();
718
719        for (int i = 0; i < count; i++)
720        {
721            QName qname = tokenStream.getAttributeName(i);
722
723            if (isXMLSpaceAttribute(qname))
724                continue;
725
726            // The name will be blank for an xmlns: attribute
727
728            String localName = qname.getLocalPart();
729
730            if (InternalUtils.isBlank(localName))
731                continue;
732
733            String uri = qname.getNamespaceURI();
734
735            String value = tokenStream.getAttributeValue(i);
736
737
738            Version version = NAMESPACE_URI_TO_VERSION.get(uri);
739
740            if (version != null)
741            {
742                // We are kind of assuming that the namespace URI appears once, in the outermost element of the template.
743                // And we don't and can't handle the case that it appears multiple times in the template.
744
745                if (T_5_4.sameOrEarlier(version)) {
746                    strictMixinParameters = true;
747                }
748
749                if (localName.equalsIgnoreCase(ID_ATTRIBUTE_NAME))
750                {
751                    id = nullForBlank(value);
752
753                    validateId(id, "Component id '%s' is not valid; component ids must be valid Java identifiers: start with a letter, and consist of letters, numbers and underscores.");
754
755                    continue;
756                }
757
758                if (type == null && localName.equalsIgnoreCase(TYPE_ATTRIBUTE_NAME))
759                {
760                    type = nullForBlank(value);
761                    continue;
762                }
763
764                if (localName.equalsIgnoreCase(MIXINS_ATTRIBUTE_NAME))
765                {
766                    mixins = nullForBlank(value);
767                    continue;
768                }
769
770                // Anything else is the name of a Tapestry component parameter
771                // that is simply
772                // not part of the template's doctype for the element being
773                // instrumented.
774            }
775
776            attributeTokens.add(new AttributeToken(uri, localName, value, location));
777        }
778
779        boolean isComponent = (id != null || type != null);
780
781        // If provided t:mixins but not t:id or t:type, then its not quite a
782        // component
783
784        if (mixins != null && !isComponent)
785            throw new TapestryException(String.format("You may not specify mixins for element <%s> because it does not represent a component (which requires either an id attribute or a type attribute).", elementName),
786                    location, null);
787
788        if (isComponent)
789        {
790            tokenAccumulator.add(new StartComponentToken(elementName, id, type, mixins, location));
791        } else
792        {
793            tokenAccumulator.add(new StartElementToken(tokenStream.getNamespaceURI(), elementName,
794                    location));
795        }
796
797        addDefineNamespaceTokens();
798
799        tokenAccumulator.addAll(attributeTokens);
800
801        if (id != null)
802            componentIds.put(id, location);
803
804        processBody(state.insideComponent(isComponent));
805    }
806
807    private void addDefineNamespaceTokens()
808    {
809        for (int i = 0; i < tokenStream.getNamespaceCount(); i++)
810        {
811            String uri = tokenStream.getNamespaceURI(i);
812
813            // These URIs are strictly part of the server-side Tapestry template
814            // and are not ever sent to the client.
815
816            if (NAMESPACE_URI_TO_VERSION.containsKey(uri))
817                continue;
818
819            if (uri.equals(TAPESTRY_PARAMETERS_URI))
820                continue;
821
822            if (uri.startsWith(LIB_NAMESPACE_URI_PREFIX))
823                continue;
824
825            tokenAccumulator.add(new DefineNamespacePrefixToken(uri, tokenStream
826                    .getNamespacePrefix(i), getLocation()));
827        }
828    }
829
830    private TemplateParserState checkForXMLSpaceAttribute(TemplateParserState state)
831    {
832        for (int i = 0; i < tokenStream.getAttributeCount(); i++)
833        {
834            QName qName = tokenStream.getAttributeName(i);
835
836            if (isXMLSpaceAttribute(qName))
837            {
838                boolean compress = !"preserve".equals(tokenStream.getAttributeValue(i));
839
840                return state.compressWhitespace(compress);
841            }
842        }
843
844        return state;
845    }
846
847    /**
848     * Processes the text buffer and then adds an end element token.
849     */
850    private void endElement(TemplateParserState state)
851    {
852        processTextBuffer(state);
853
854        tokenAccumulator.add(new EndElementToken(getLocation()));
855    }
856
857    /**
858     * Handler for Tapestry 5.0's "classic" &lt;t:parameter&gt; element. This
859     * turns into a {@link org.apache.tapestry5.internal.parser.ParameterToken}
860     * and the body and end element are provided normally.
861     */
862    private void classicParameter(TemplateParserState state)
863    {
864        String parameterName = getSingleParameter("name");
865
866        if (InternalUtils.isBlank(parameterName))
867            throw new TapestryException("The name attribute of the <parameter> element must be specified.",
868                    getLocation(), null);
869
870        ensureParameterWithinComponent(state);
871
872        tokenAccumulator.add(new ParameterToken(parameterName, getLocation()));
873
874        processBody(state.insideComponent(false));
875    }
876
877    private void ensureParameterWithinComponent(TemplateParserState state)
878    {
879        if (!state.isInsideComponent())
880            throw new RuntimeException(
881                    "Block parameters are only allowed directly within component elements.");
882    }
883
884    /**
885     * Tapestry 5.1 uses a special namespace (usually mapped to "p:") and the
886     * name becomes the parameter element.
887     */
888    private void parameterElement(TemplateParserState state)
889    {
890        ensureParameterWithinComponent(state);
891
892        if (tokenStream.getAttributeCount() > 0)
893            throw new TapestryException("A block parameter element does not allow any additional attributes. The element name defines the parameter name.",
894                    getLocation(), null);
895
896        tokenAccumulator.add(new ParameterToken(tokenStream.getLocalName(), getLocation()));
897
898        processBody(state.insideComponent(false));
899    }
900
901    /**
902     * Checks that a body element is empty. Returns after the body's close
903     * element. Adds a single body token (but not an
904     * end token).
905     */
906    private void body()
907    {
908        tokenAccumulator.add(new BodyToken(getLocation()));
909
910        while (active)
911        {
912            switch (tokenStream.next())
913            {
914                case END_ELEMENT:
915                    return;
916
917                default:
918                    throw new IllegalStateException(String.format("Content inside a Tapestry body element is not allowed (at %s). The content has been ignored.", getLocation()));
919            }
920        }
921    }
922
923    /**
924     * Driven by the &lt;t:container&gt; element, this state adds elements for
925     * its body but not its start or end tags.
926     *
927     * @param state
928     */
929    private void container(TemplateParserState state)
930    {
931        while (active)
932        {
933            switch (tokenStream.next())
934            {
935                case START_ELEMENT:
936                    element(state);
937                    break;
938
939                // The matching end-element for the container. Don't add a
940                // token.
941
942                case END_ELEMENT:
943
944                    processTextBuffer(state);
945
946                    return;
947
948                default:
949                    textContent(state);
950            }
951        }
952    }
953
954    /**
955     * A block adds a token for its start tag and end tag and allows any content
956     * within.
957     */
958    private void block(TemplateParserState state)
959    {
960        String blockId = getSingleParameter("id");
961
962        validateId(blockId, "Block id '%s' is not valid; block ids must be valid Java identifiers: start with a letter, and consist of letters, numbers and underscores.");
963
964        tokenAccumulator.add(new BlockToken(blockId, getLocation()));
965
966        processBody(state.insideComponent(false));
967    }
968
969    private String getSingleParameter(String attributeName)
970    {
971        String result = null;
972
973        for (int i = 0; i < tokenStream.getAttributeCount(); i++)
974        {
975            QName qName = tokenStream.getAttributeName(i);
976
977            if (isXMLSpaceAttribute(qName))
978                continue;
979
980            if (qName.getLocalPart().equalsIgnoreCase(attributeName))
981            {
982                result = tokenStream.getAttributeValue(i);
983                continue;
984            }
985
986            // Only the named attribute is allowed.
987
988            throw new TapestryException(String.format("Element <%s> does not support an attribute named '%s'. The only allowed attribute name is '%s'.", tokenStream
989                    .getLocalName(), qName.toString(), attributeName), getLocation(), null);
990        }
991
992        return result;
993    }
994
995    private void validateId(String id, String messageKey)
996    {
997        if (id == null)
998            return;
999
1000        if (ID_PATTERN.matcher(id).matches())
1001            return;
1002
1003        // Not a match.
1004
1005        throw new TapestryException(String.format(messageKey, id), getLocation(), null);
1006    }
1007
1008    private boolean isXMLSpaceAttribute(QName qName)
1009    {
1010        return XML_NAMESPACE_URI.equals(qName.getNamespaceURI())
1011                && "space".equals(qName.getLocalPart());
1012    }
1013
1014    /**
1015     * Processes text content if in the correct state, or throws an exception.
1016     * This is used as a default for matching
1017     * case statements.
1018     *
1019     * @param state
1020     */
1021    private void textContent(TemplateParserState state)
1022    {
1023        switch (tokenStream.getEventType())
1024        {
1025            case COMMENT:
1026                comment(state);
1027                break;
1028
1029            case CDATA:
1030                cdata(state);
1031                break;
1032
1033            case CHARACTERS:
1034            case SPACE:
1035                characters();
1036                break;
1037
1038            default:
1039                unexpectedEventType();
1040        }
1041    }
1042
1043    private void characters()
1044    {
1045        if (textStartLocation == null)
1046            textStartLocation = getLocation();
1047
1048        textBuffer.append(tokenStream.getText());
1049    }
1050
1051    private void cdata(TemplateParserState state)
1052    {
1053        processTextBuffer(state);
1054
1055        tokenAccumulator.add(new CDATAToken(tokenStream.getText(), getLocation()));
1056    }
1057
1058    private void comment(TemplateParserState state)
1059    {
1060        processTextBuffer(state);
1061
1062        String comment = tokenStream.getText();
1063
1064        tokenAccumulator.add(new CommentToken(comment, getLocation()));
1065    }
1066
1067    /**
1068     * Processes the accumulated text in the text buffer as a text token.
1069     */
1070    private void processTextBuffer(TemplateParserState state)
1071    {
1072        if (textBuffer.length() != 0)
1073            convertTextBufferToTokens(state);
1074
1075        textStartLocation = null;
1076    }
1077
1078    private void convertTextBufferToTokens(TemplateParserState state)
1079    {
1080        String text = textBuffer.toString();
1081
1082        textBuffer.setLength(0);
1083
1084        if (state.isCompressWhitespace())
1085        {
1086            text = compressWhitespaceInText(text);
1087
1088            if (InternalUtils.isBlank(text))
1089                return;
1090        }
1091
1092        addTokensForText(text);
1093    }
1094
1095    /**
1096     * Reduces vertical whitespace to a single newline, then reduces horizontal
1097     * whitespace to a single space.
1098     *
1099     * @param text
1100     * @return compressed version of text
1101     */
1102    private String compressWhitespaceInText(String text)
1103    {
1104        String linebreaksReduced = REDUCE_LINEBREAKS_PATTERN.matcher(text).replaceAll("\n");
1105
1106        return REDUCE_WHITESPACE_PATTERN.matcher(linebreaksReduced).replaceAll(" ");
1107    }
1108
1109    /**
1110     * Scans the text, using a regular expression pattern, for expansion
1111     * patterns, and adds appropriate tokens for what
1112     * it finds.
1113     *
1114     * @param text
1115     *         to add as
1116     *         {@link org.apache.tapestry5.internal.parser.TextToken}s and
1117     *         {@link org.apache.tapestry5.internal.parser.ExpansionToken}s
1118     */
1119    private void addTokensForText(String text)
1120    {
1121        Matcher matcher = EXPANSION_PATTERN.matcher(text);
1122
1123        int startx = 0;
1124
1125        // The big problem with all this code is that everything gets assigned
1126        // to the
1127        // start of the text block, even if there are line breaks leading up to
1128        // it.
1129        // That's going to take a lot more work and there are bigger fish to
1130        // fry. In addition,
1131        // TAPESTRY-2028 means that the whitespace has likely been stripped out
1132        // of the text
1133        // already anyway.
1134        while (matcher.find())
1135        {
1136            int matchStart = matcher.start();
1137
1138            if (matchStart != startx)
1139            {
1140                String prefix = text.substring(startx, matchStart);
1141                tokenAccumulator.add(new TextToken(prefix, textStartLocation));
1142            }
1143
1144            // Group 1 includes the real text of the expansion, with whitespace
1145            // around the
1146            // expression (but inside the curly braces) excluded.
1147            // But note that we run into a problem.  The original 
1148            // EXPANSION_PATTERN used a reluctant quantifier to match the 
1149            // smallest instance of ${} possible.  But if you have ${'}'} or 
1150            // ${{'key': 'value'}} (maps, cf TAP5-1605) then you run into issues
1151            // b/c the expansion becomes {'key': 'value' which is wrong.
1152            // A fix to use greedy matching with negative lookahead to prevent 
1153            // ${...}...${...} all matching a single expansion is close, but 
1154            // has issues when an expansion is used inside a javascript function
1155            // (see TAP5-1620). The solution is to use the greedy 
1156            // EXPANSION_PATTERN as before to bound the search for a single 
1157            // expansion, then check for {} consistency, ignoring opening and 
1158            // closing braces that occur within '' (the property expression 
1159            // language doesn't support "" for strings). That should include: 
1160            // 'This string has a } in it' and 'This string has a { in it.'
1161            // Note also that the property expression language doesn't support
1162            // escaping the string character ('), so we don't have to worry 
1163            // about that. 
1164            String expression = matcher.group(1);
1165            //count of 'open' braces. Expression ends when it hits 0. In most cases,
1166            // it should end up as 1 b/c "expression" is everything inside ${}, so 
1167            // the following will typically not find the end of the expression.
1168            int openBraceCount = 1;
1169            int expressionEnd = expression.length();
1170            boolean inQuote = false;
1171            for (int i = 0; i < expression.length(); i++)
1172            {
1173                char c = expression.charAt(i);
1174                //basically, if we're inQuote, we ignore everything until we hit the quote end, so we only care if the character matches the quote start (meaning we're at the end of the quote).
1175                //note that I don't believe expression support escaped quotes...
1176                if (c == EXPANSION_STRING_DELIMITTER)
1177                {
1178                    inQuote = !inQuote;
1179                    continue;
1180                } else if (inQuote)
1181                {
1182                    continue;
1183                } else if (c == CLOSE_BRACE)
1184                {
1185                    openBraceCount--;
1186                    if (openBraceCount == 0)
1187                    {
1188                        expressionEnd = i;
1189                        break;
1190                    }
1191                } else if (c == OPEN_BRACE)
1192                {
1193                    openBraceCount++;
1194                }
1195            }
1196            if (expressionEnd < expression.length())
1197            {
1198                //then we gobbled up some } that we shouldn't have... like the closing } of a javascript
1199                //function.
1200                tokenAccumulator.add(new ExpansionToken(expression.substring(0, expressionEnd), textStartLocation));
1201                //can't just assign to 
1202                startx = matcher.start(1) + expressionEnd + 1;
1203            } else
1204            {
1205                tokenAccumulator.add(new ExpansionToken(expression.trim(), textStartLocation));
1206
1207                startx = matcher.end();
1208            }
1209        }
1210
1211        // Catch anything after the final regexp match.
1212
1213        if (startx < text.length())
1214            tokenAccumulator.add(new TextToken(text.substring(startx, text.length()),
1215                    textStartLocation));
1216    }
1217
1218    static enum Version
1219    {
1220        T_5_0(5, 0), T_5_1(5, 1), T_5_3(5, 3), T_5_4(5, 4);
1221
1222        private int major;
1223        private int minor;
1224
1225
1226        private Version(int major, int minor)
1227        {
1228            this.major = major;
1229            this.minor = minor;
1230        }
1231
1232        /**
1233         * Returns true if this Version is the same as, or ordered before the other Version. This is often used to enable new
1234         * template features for a specific version.
1235         */
1236        public boolean sameOrEarlier(Version other)
1237        {
1238            if (other == null)
1239                return false;
1240
1241            if (this == other)
1242                return true;
1243
1244            return major <= other.major && minor <= other.minor;
1245        }
1246    }
1247
1248}