001    // Copyright 2009, 2011, 2012 The Apache Software Foundation
002    //
003    // Licensed under the Apache License, Version 2.0 (the "License");
004    // you may not use this file except in compliance with the License.
005    // You may obtain a copy of the License at
006    //
007    //     http://www.apache.org/licenses/LICENSE-2.0
008    //
009    // Unless required by applicable law or agreed to in writing, software
010    // distributed under the License is distributed on an "AS IS" BASIS,
011    // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012    // See the License for the specific language governing permissions and
013    // limitations under the License.
014    
015    package org.apache.tapestry5.internal.services;
016    
017    import org.apache.tapestry5.ioc.Location;
018    import org.apache.tapestry5.ioc.Resource;
019    import org.apache.tapestry5.ioc.internal.util.CollectionFactory;
020    import org.apache.tapestry5.ioc.internal.util.InternalUtils;
021    import org.apache.tapestry5.ioc.internal.util.LocationImpl;
022    import org.xml.sax.*;
023    import org.xml.sax.ext.Attributes2;
024    import org.xml.sax.ext.LexicalHandler;
025    import org.xml.sax.helpers.XMLReaderFactory;
026    
027    import javax.xml.namespace.QName;
028    import java.io.*;
029    import java.net.URL;
030    import java.util.Collections;
031    import java.util.List;
032    import java.util.Map;
033    
034    /**
035     * Parses a document as a stream of XML tokens. It includes a special hack (as of Tapestry 5.3) to support the HTML5 doctype ({@code <!DOCTYPE html>})
036     * as if it were the XHTML transitional doctype
037     * ({@code <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">}).
038     */
039    public class XMLTokenStream
040    {
041    
042        public static final String TRANSITIONAL_DOCTYPE = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">";
043    
044        private static final DTDData HTML5_DTD_DATA = new DTDData("html", null, null);
045    
046        private final class SaxHandler implements LexicalHandler, EntityResolver, ContentHandler
047        {
048            private Locator locator;
049    
050            private int currentLine = -1;
051    
052            private Location cachedLocation;
053    
054            private Location textLocation;
055    
056            private final StringBuilder builder = new StringBuilder();
057    
058            private boolean inCDATA, insideDTD;
059    
060            private List<NamespaceMapping> namespaceMappings = CollectionFactory.newList();
061    
062            private Location getLocation()
063            {
064                int line = locator.getLineNumber();
065    
066                if (currentLine != line)
067                    cachedLocation = null;
068    
069                if (cachedLocation == null)
070                {
071                    // lineOffset accounts for the extra line when a doctype is injected. The line number reported
072                    // from the XML parser inlcudes the phantom doctype line, the lineOffset is used to subtract one
073                    // to get the real line number.
074                    cachedLocation = new LocationImpl(resource, line + lineOffset);
075                }
076    
077                return cachedLocation;
078            }
079    
080            private XMLToken add(XMLTokenType type)
081            {
082                XMLToken token = new XMLToken(type, getLocation());
083    
084                tokens.add(token);
085    
086                return token;
087            }
088    
089            public InputSource resolveEntity(String publicId, String systemId) throws SAXException,
090                    IOException
091            {
092                URL url = publicIdToURL.get(publicId);
093    
094                try
095                {
096                    if (url != null)
097                        return new InputSource(url.openStream());
098                } catch (IOException ex)
099                {
100                    throw new SAXException(String.format("Unable to open stream for resource %s: %s",
101                            url, InternalUtils.toMessage(ex)), ex);
102                }
103    
104                return null;
105            }
106    
107            public void comment(char[] ch, int start, int length) throws SAXException
108            {
109                if (insideDTD)
110                    return;
111    
112                // TODO: Coalesce?
113                add(XMLTokenType.COMMENT).text = new String(ch, start, length);
114            }
115    
116            public void startCDATA() throws SAXException
117            {
118                // TODO: Flush characters?
119    
120                inCDATA = true;
121            }
122    
123            public void endCDATA() throws SAXException
124            {
125                if (builder.length() != 0)
126                {
127                    add(XMLTokenType.CDATA).text = builder.toString();
128                }
129    
130                builder.setLength(0);
131                inCDATA = false;
132            }
133    
134            public void characters(char[] ch, int start, int length) throws SAXException
135            {
136                if (inCDATA)
137                {
138                    builder.append(ch, start, length);
139                    return;
140                }
141    
142                XMLToken token = new XMLToken(XMLTokenType.CHARACTERS, textLocation);
143                token.text = new String(ch, start, length);
144    
145                tokens.add(token);
146            }
147    
148            public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException
149            {
150                characters(ch, start, length);
151            }
152    
153            public void startDTD(final String name, final String publicId, final String systemId)
154                    throws SAXException
155            {
156                insideDTD = true;
157    
158                if (!ignoreDTD)
159                {
160                    DTDData data = html5DTD ? HTML5_DTD_DATA : new DTDData(name, publicId, systemId);
161    
162                    add(XMLTokenType.DTD).dtdData = data;
163                }
164            }
165    
166            public void endDocument() throws SAXException
167            {
168                add(XMLTokenType.END_DOCUMENT);
169            }
170    
171            public void endElement(String uri, String localName, String qName) throws SAXException
172            {
173                add(XMLTokenType.END_ELEMENT);
174            }
175    
176            public void setDocumentLocator(Locator locator)
177            {
178                this.locator = locator;
179            }
180    
181            /**
182             * Checks for the extra namespace injected when the transitional doctype is injected (which
183             * occurs when the template contains no doctype).
184             */
185            private boolean ignoreURI(String uri)
186            {
187                return ignoreDTD && uri.equals("http://www.w3.org/1999/xhtml");
188            }
189    
190            public void startElement(String uri, String localName, String qName, Attributes attributes)
191                    throws SAXException
192            {
193                XMLToken token = add(XMLTokenType.START_ELEMENT);
194    
195                token.uri = ignoreURI(uri) ? "" : uri;
196                token.localName = localName;
197                token.qName = qName;
198    
199                // The XML parser tends to reuse the same Attributes object, so
200                // capture the data out of it.
201    
202                Attributes2 a2 = (attributes instanceof Attributes2) ? (Attributes2) attributes : null;
203    
204                if (attributes.getLength() == 0)
205                {
206                    token.attributes = Collections.emptyList();
207                } else
208                {
209                    token.attributes = CollectionFactory.newList();
210    
211                    for (int i = 0; i < attributes.getLength(); i++)
212                    {
213                        // Filter out attributes that are not present in the XML input stream, but were
214                        // instead provided by DTD defaulting.
215    
216                        if (a2 != null && !a2.isSpecified(i))
217                        {
218                            continue;
219                        }
220    
221                        String prefixedName = attributes.getQName(i);
222    
223                        int lastColon = prefixedName.lastIndexOf(':');
224    
225                        String prefix = lastColon > 0 ? prefixedName.substring(0, lastColon) : "";
226    
227                        QName qname = new QName(attributes.getURI(i), attributes.getLocalName(i),
228                                prefix);
229    
230                        token.attributes.add(new AttributeInfo(qname, attributes.getValue(i)));
231                    }
232                }
233    
234                token.namespaceMappings = CollectionFactory.newList(namespaceMappings);
235    
236                namespaceMappings.clear();
237    
238                // Any text collected starts here as well:
239    
240                textLocation = getLocation();
241            }
242    
243            public void startPrefixMapping(String prefix, String uri) throws SAXException
244            {
245                if (ignoreDTD && prefix.equals("") && uri.equals("http://www.w3.org/1999/xhtml"))
246                {
247                    return;
248                }
249    
250                namespaceMappings.add(new NamespaceMapping(prefix, uri));
251            }
252    
253            public void endDTD() throws SAXException
254            {
255                insideDTD = false;
256            }
257    
258            public void endEntity(String name) throws SAXException
259            {
260            }
261    
262            public void startEntity(String name) throws SAXException
263            {
264            }
265    
266            public void endPrefixMapping(String prefix) throws SAXException
267            {
268            }
269    
270            public void processingInstruction(String target, String data) throws SAXException
271            {
272            }
273    
274            public void skippedEntity(String name) throws SAXException
275            {
276            }
277    
278            public void startDocument() throws SAXException
279            {
280            }
281        }
282    
283        private int cursor = -1;
284    
285        private final List<XMLToken> tokens = CollectionFactory.newList();
286    
287        private final Resource resource;
288    
289        private final Map<String, URL> publicIdToURL;
290    
291        private Location exceptionLocation;
292    
293        private boolean html5DTD, ignoreDTD;
294    
295        private int lineOffset;
296    
297        public XMLTokenStream(Resource resource, Map<String, URL> publicIdToURL)
298        {
299            this.resource = resource;
300            this.publicIdToURL = publicIdToURL;
301        }
302    
303        public void parse() throws SAXException, IOException
304        {
305            SaxHandler handler = new SaxHandler();
306    
307            XMLReader reader = XMLReaderFactory.createXMLReader();
308    
309            reader.setContentHandler(handler);
310            reader.setEntityResolver(handler);
311            reader.setProperty("http://xml.org/sax/properties/lexical-handler", handler);
312    
313            InputStream stream = openStream();
314    
315            try
316            {
317                reader.parse(new InputSource(stream));
318            } catch (IOException ex)
319            {
320                this.exceptionLocation = handler.getLocation();
321    
322                throw ex;
323            } catch (SAXException ex)
324            {
325                this.exceptionLocation = handler.getLocation();
326    
327                throw ex;
328            } catch (RuntimeException ex)
329            {
330                this.exceptionLocation = handler.getLocation();
331    
332                throw ex;
333            } finally
334            {
335                InternalUtils.close(stream);
336            }
337        }
338    
339        enum State
340        {
341            MAYBE_XML, MAYBE_DOCTYPE, JUST_COPY
342        }
343    
344        private InputStream openStream() throws IOException
345        {
346            InputStream rawStream = resource.openStream();
347    
348            InputStreamReader rawReader = new InputStreamReader(rawStream);
349            LineNumberReader reader = new LineNumberReader(rawReader);
350    
351            ByteArrayOutputStream bos = new ByteArrayOutputStream(5000);
352            PrintWriter writer = new PrintWriter(bos);
353    
354            State state = State.MAYBE_XML;
355    
356            try
357            {
358                while (true)
359                {
360                    String line = reader.readLine();
361    
362                    if (line == null)
363                    {
364                        break;
365                    }
366    
367                    switch (state)
368                    {
369    
370                        case MAYBE_XML:
371    
372                            if (line.toLowerCase().startsWith("<?xml"))
373                            {
374                                writer.println(line);
375                                state = State.MAYBE_DOCTYPE;
376                                continue;
377                            }
378    
379                        case MAYBE_DOCTYPE:
380    
381                            if (line.trim().length() == 0)
382                            {
383                                writer.println(line);
384                                continue;
385                            }
386    
387                            String lineLower = line.toLowerCase();
388    
389                            if (lineLower.equals("<!doctype html>"))
390                            {
391                                html5DTD = true;
392                                writer.println(TRANSITIONAL_DOCTYPE);
393                                state = State.JUST_COPY;
394                                continue;
395                            }
396    
397    
398                            if (lineLower.startsWith("<!doctype"))
399                            {
400                                writer.println(line);
401                                state = State.JUST_COPY;
402                                continue;
403                            }
404    
405                            // No doctype, let's provide one.
406    
407                            ignoreDTD = true;
408                            lineOffset = -1;
409                            writer.println(TRANSITIONAL_DOCTYPE);
410    
411                            state = State.JUST_COPY;
412    
413                            // And drop down to writing out the actual line, and all following lines.
414    
415                        case JUST_COPY:
416                            writer.println(line);
417                    }
418                }
419            } finally
420            {
421                writer.close();
422                reader.close();
423            }
424    
425            return new ByteArrayInputStream(bos.toByteArray());
426        }
427    
428        private XMLToken token()
429        {
430            return tokens.get(cursor);
431        }
432    
433        /**
434         * Returns the type of the next token.
435         */
436        public XMLTokenType next()
437        {
438            cursor++;
439    
440            // TODO: Check for overflow?
441    
442            return getEventType();
443        }
444    
445        public int getAttributeCount()
446        {
447            return token().attributes.size();
448        }
449    
450        public QName getAttributeName(int i)
451        {
452            return token().attributes.get(i).attributeName;
453        }
454    
455        public DTDData getDTDInfo()
456        {
457            return token().dtdData;
458        }
459    
460        public XMLTokenType getEventType()
461        {
462            return token().type;
463        }
464    
465        public String getLocalName()
466        {
467            return token().localName;
468        }
469    
470        public Location getLocation()
471        {
472            if (exceptionLocation != null)
473                return exceptionLocation;
474    
475            return token().getLocation();
476        }
477    
478        public int getNamespaceCount()
479        {
480            return token().namespaceMappings.size();
481        }
482    
483        public String getNamespacePrefix(int i)
484        {
485            return token().namespaceMappings.get(i).prefix;
486        }
487    
488        public String getNamespaceURI()
489        {
490            return token().uri;
491        }
492    
493        public String getNamespaceURI(int i)
494        {
495            return token().namespaceMappings.get(i).uri;
496        }
497    
498        public String getText()
499        {
500            return token().text;
501        }
502    
503        public boolean hasNext()
504        {
505            return cursor < tokens.size() - 1;
506        }
507    
508        public String getAttributeValue(int i)
509        {
510            return token().attributes.get(i).value;
511        }
512    
513    }