001 // Copyright 2009, 2011, 2012 The Apache Software Foundation
002 //
003 // Licensed under the Apache License, Version 2.0 (the "License");
004 // you may not use this file except in compliance with the License.
005 // You may obtain a copy of the License at
006 //
007 // http://www.apache.org/licenses/LICENSE-2.0
008 //
009 // Unless required by applicable law or agreed to in writing, software
010 // distributed under the License is distributed on an "AS IS" BASIS,
011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012 // See the License for the specific language governing permissions and
013 // limitations under the License.
014
015 package org.apache.tapestry5.internal.services;
016
017 import org.apache.tapestry5.ioc.Location;
018 import org.apache.tapestry5.ioc.Resource;
019 import org.apache.tapestry5.ioc.internal.util.CollectionFactory;
020 import org.apache.tapestry5.ioc.internal.util.InternalUtils;
021 import org.apache.tapestry5.ioc.internal.util.LocationImpl;
022 import org.xml.sax.*;
023 import org.xml.sax.ext.Attributes2;
024 import org.xml.sax.ext.LexicalHandler;
025 import org.xml.sax.helpers.XMLReaderFactory;
026
027 import javax.xml.namespace.QName;
028 import java.io.*;
029 import java.net.URL;
030 import java.util.Collections;
031 import java.util.List;
032 import java.util.Map;
033
034 /**
035 * Parses a document as a stream of XML tokens. It includes a special hack (as of Tapestry 5.3) to support the HTML5 doctype ({@code <!DOCTYPE html>})
036 * as if it were the XHTML transitional doctype
037 * ({@code <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">}).
038 */
039 public class XMLTokenStream
040 {
041
042 public static final String TRANSITIONAL_DOCTYPE = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">";
043
044 private static final DTDData HTML5_DTD_DATA = new DTDData("html", null, null);
045
046 private final class SaxHandler implements LexicalHandler, EntityResolver, ContentHandler
047 {
048 private Locator locator;
049
050 private int currentLine = -1;
051
052 private Location cachedLocation;
053
054 private Location textLocation;
055
056 private final StringBuilder builder = new StringBuilder();
057
058 private boolean inCDATA, insideDTD;
059
060 private List<NamespaceMapping> namespaceMappings = CollectionFactory.newList();
061
062 private Location getLocation()
063 {
064 int line = locator.getLineNumber();
065
066 if (currentLine != line)
067 cachedLocation = null;
068
069 if (cachedLocation == null)
070 {
071 // lineOffset accounts for the extra line when a doctype is injected. The line number reported
072 // from the XML parser inlcudes the phantom doctype line, the lineOffset is used to subtract one
073 // to get the real line number.
074 cachedLocation = new LocationImpl(resource, line + lineOffset);
075 }
076
077 return cachedLocation;
078 }
079
080 private XMLToken add(XMLTokenType type)
081 {
082 XMLToken token = new XMLToken(type, getLocation());
083
084 tokens.add(token);
085
086 return token;
087 }
088
089 public InputSource resolveEntity(String publicId, String systemId) throws SAXException,
090 IOException
091 {
092 URL url = publicIdToURL.get(publicId);
093
094 try
095 {
096 if (url != null)
097 return new InputSource(url.openStream());
098 } catch (IOException ex)
099 {
100 throw new SAXException(String.format("Unable to open stream for resource %s: %s",
101 url, InternalUtils.toMessage(ex)), ex);
102 }
103
104 return null;
105 }
106
107 public void comment(char[] ch, int start, int length) throws SAXException
108 {
109 if (insideDTD)
110 return;
111
112 // TODO: Coalesce?
113 add(XMLTokenType.COMMENT).text = new String(ch, start, length);
114 }
115
116 public void startCDATA() throws SAXException
117 {
118 // TODO: Flush characters?
119
120 inCDATA = true;
121 }
122
123 public void endCDATA() throws SAXException
124 {
125 if (builder.length() != 0)
126 {
127 add(XMLTokenType.CDATA).text = builder.toString();
128 }
129
130 builder.setLength(0);
131 inCDATA = false;
132 }
133
134 public void characters(char[] ch, int start, int length) throws SAXException
135 {
136 if (inCDATA)
137 {
138 builder.append(ch, start, length);
139 return;
140 }
141
142 XMLToken token = new XMLToken(XMLTokenType.CHARACTERS, textLocation);
143 token.text = new String(ch, start, length);
144
145 tokens.add(token);
146 }
147
148 public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException
149 {
150 characters(ch, start, length);
151 }
152
153 public void startDTD(final String name, final String publicId, final String systemId)
154 throws SAXException
155 {
156 insideDTD = true;
157
158 if (!ignoreDTD)
159 {
160 DTDData data = html5DTD ? HTML5_DTD_DATA : new DTDData(name, publicId, systemId);
161
162 add(XMLTokenType.DTD).dtdData = data;
163 }
164 }
165
166 public void endDocument() throws SAXException
167 {
168 add(XMLTokenType.END_DOCUMENT);
169 }
170
171 public void endElement(String uri, String localName, String qName) throws SAXException
172 {
173 add(XMLTokenType.END_ELEMENT);
174 }
175
176 public void setDocumentLocator(Locator locator)
177 {
178 this.locator = locator;
179 }
180
181 /**
182 * Checks for the extra namespace injected when the transitional doctype is injected (which
183 * occurs when the template contains no doctype).
184 */
185 private boolean ignoreURI(String uri)
186 {
187 return ignoreDTD && uri.equals("http://www.w3.org/1999/xhtml");
188 }
189
190 public void startElement(String uri, String localName, String qName, Attributes attributes)
191 throws SAXException
192 {
193 XMLToken token = add(XMLTokenType.START_ELEMENT);
194
195 token.uri = ignoreURI(uri) ? "" : uri;
196 token.localName = localName;
197 token.qName = qName;
198
199 // The XML parser tends to reuse the same Attributes object, so
200 // capture the data out of it.
201
202 Attributes2 a2 = (attributes instanceof Attributes2) ? (Attributes2) attributes : null;
203
204 if (attributes.getLength() == 0)
205 {
206 token.attributes = Collections.emptyList();
207 } else
208 {
209 token.attributes = CollectionFactory.newList();
210
211 for (int i = 0; i < attributes.getLength(); i++)
212 {
213 // Filter out attributes that are not present in the XML input stream, but were
214 // instead provided by DTD defaulting.
215
216 if (a2 != null && !a2.isSpecified(i))
217 {
218 continue;
219 }
220
221 String prefixedName = attributes.getQName(i);
222
223 int lastColon = prefixedName.lastIndexOf(':');
224
225 String prefix = lastColon > 0 ? prefixedName.substring(0, lastColon) : "";
226
227 QName qname = new QName(attributes.getURI(i), attributes.getLocalName(i),
228 prefix);
229
230 token.attributes.add(new AttributeInfo(qname, attributes.getValue(i)));
231 }
232 }
233
234 token.namespaceMappings = CollectionFactory.newList(namespaceMappings);
235
236 namespaceMappings.clear();
237
238 // Any text collected starts here as well:
239
240 textLocation = getLocation();
241 }
242
243 public void startPrefixMapping(String prefix, String uri) throws SAXException
244 {
245 if (ignoreDTD && prefix.equals("") && uri.equals("http://www.w3.org/1999/xhtml"))
246 {
247 return;
248 }
249
250 namespaceMappings.add(new NamespaceMapping(prefix, uri));
251 }
252
253 public void endDTD() throws SAXException
254 {
255 insideDTD = false;
256 }
257
258 public void endEntity(String name) throws SAXException
259 {
260 }
261
262 public void startEntity(String name) throws SAXException
263 {
264 }
265
266 public void endPrefixMapping(String prefix) throws SAXException
267 {
268 }
269
270 public void processingInstruction(String target, String data) throws SAXException
271 {
272 }
273
274 public void skippedEntity(String name) throws SAXException
275 {
276 }
277
278 public void startDocument() throws SAXException
279 {
280 }
281 }
282
283 private int cursor = -1;
284
285 private final List<XMLToken> tokens = CollectionFactory.newList();
286
287 private final Resource resource;
288
289 private final Map<String, URL> publicIdToURL;
290
291 private Location exceptionLocation;
292
293 private boolean html5DTD, ignoreDTD;
294
295 private int lineOffset;
296
297 public XMLTokenStream(Resource resource, Map<String, URL> publicIdToURL)
298 {
299 this.resource = resource;
300 this.publicIdToURL = publicIdToURL;
301 }
302
303 public void parse() throws SAXException, IOException
304 {
305 SaxHandler handler = new SaxHandler();
306
307 XMLReader reader = XMLReaderFactory.createXMLReader();
308
309 reader.setContentHandler(handler);
310 reader.setEntityResolver(handler);
311 reader.setProperty("http://xml.org/sax/properties/lexical-handler", handler);
312
313 InputStream stream = openStream();
314
315 try
316 {
317 reader.parse(new InputSource(stream));
318 } catch (IOException ex)
319 {
320 this.exceptionLocation = handler.getLocation();
321
322 throw ex;
323 } catch (SAXException ex)
324 {
325 this.exceptionLocation = handler.getLocation();
326
327 throw ex;
328 } catch (RuntimeException ex)
329 {
330 this.exceptionLocation = handler.getLocation();
331
332 throw ex;
333 } finally
334 {
335 InternalUtils.close(stream);
336 }
337 }
338
339 enum State
340 {
341 MAYBE_XML, MAYBE_DOCTYPE, JUST_COPY
342 }
343
344 private InputStream openStream() throws IOException
345 {
346 InputStream rawStream = resource.openStream();
347
348 InputStreamReader rawReader = new InputStreamReader(rawStream);
349 LineNumberReader reader = new LineNumberReader(rawReader);
350
351 ByteArrayOutputStream bos = new ByteArrayOutputStream(5000);
352 PrintWriter writer = new PrintWriter(bos);
353
354 State state = State.MAYBE_XML;
355
356 try
357 {
358 while (true)
359 {
360 String line = reader.readLine();
361
362 if (line == null)
363 {
364 break;
365 }
366
367 switch (state)
368 {
369
370 case MAYBE_XML:
371
372 if (line.toLowerCase().startsWith("<?xml"))
373 {
374 writer.println(line);
375 state = State.MAYBE_DOCTYPE;
376 continue;
377 }
378
379 case MAYBE_DOCTYPE:
380
381 if (line.trim().length() == 0)
382 {
383 writer.println(line);
384 continue;
385 }
386
387 String lineLower = line.toLowerCase();
388
389 if (lineLower.equals("<!doctype html>"))
390 {
391 html5DTD = true;
392 writer.println(TRANSITIONAL_DOCTYPE);
393 state = State.JUST_COPY;
394 continue;
395 }
396
397
398 if (lineLower.startsWith("<!doctype"))
399 {
400 writer.println(line);
401 state = State.JUST_COPY;
402 continue;
403 }
404
405 // No doctype, let's provide one.
406
407 ignoreDTD = true;
408 lineOffset = -1;
409 writer.println(TRANSITIONAL_DOCTYPE);
410
411 state = State.JUST_COPY;
412
413 // And drop down to writing out the actual line, and all following lines.
414
415 case JUST_COPY:
416 writer.println(line);
417 }
418 }
419 } finally
420 {
421 writer.close();
422 reader.close();
423 }
424
425 return new ByteArrayInputStream(bos.toByteArray());
426 }
427
428 private XMLToken token()
429 {
430 return tokens.get(cursor);
431 }
432
433 /**
434 * Returns the type of the next token.
435 */
436 public XMLTokenType next()
437 {
438 cursor++;
439
440 // TODO: Check for overflow?
441
442 return getEventType();
443 }
444
445 public int getAttributeCount()
446 {
447 return token().attributes.size();
448 }
449
450 public QName getAttributeName(int i)
451 {
452 return token().attributes.get(i).attributeName;
453 }
454
455 public DTDData getDTDInfo()
456 {
457 return token().dtdData;
458 }
459
460 public XMLTokenType getEventType()
461 {
462 return token().type;
463 }
464
465 public String getLocalName()
466 {
467 return token().localName;
468 }
469
470 public Location getLocation()
471 {
472 if (exceptionLocation != null)
473 return exceptionLocation;
474
475 return token().getLocation();
476 }
477
478 public int getNamespaceCount()
479 {
480 return token().namespaceMappings.size();
481 }
482
483 public String getNamespacePrefix(int i)
484 {
485 return token().namespaceMappings.get(i).prefix;
486 }
487
488 public String getNamespaceURI()
489 {
490 return token().uri;
491 }
492
493 public String getNamespaceURI(int i)
494 {
495 return token().namespaceMappings.get(i).uri;
496 }
497
498 public String getText()
499 {
500 return token().text;
501 }
502
503 public boolean hasNext()
504 {
505 return cursor < tokens.size() - 1;
506 }
507
508 public String getAttributeValue(int i)
509 {
510 return token().attributes.get(i).value;
511 }
512
513 }