001 // Copyright 2009, 2011, 2012 The Apache Software Foundation 002 // 003 // Licensed under the Apache License, Version 2.0 (the "License"); 004 // you may not use this file except in compliance with the License. 005 // You may obtain a copy of the License at 006 // 007 // http://www.apache.org/licenses/LICENSE-2.0 008 // 009 // Unless required by applicable law or agreed to in writing, software 010 // distributed under the License is distributed on an "AS IS" BASIS, 011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 // See the License for the specific language governing permissions and 013 // limitations under the License. 014 015 package org.apache.tapestry5.internal.services; 016 017 import org.apache.tapestry5.ioc.Location; 018 import org.apache.tapestry5.ioc.Resource; 019 import org.apache.tapestry5.ioc.internal.util.CollectionFactory; 020 import org.apache.tapestry5.ioc.internal.util.InternalUtils; 021 import org.apache.tapestry5.ioc.internal.util.LocationImpl; 022 import org.xml.sax.*; 023 import org.xml.sax.ext.Attributes2; 024 import org.xml.sax.ext.LexicalHandler; 025 import org.xml.sax.helpers.XMLReaderFactory; 026 027 import javax.xml.namespace.QName; 028 import java.io.*; 029 import java.net.URL; 030 import java.util.Collections; 031 import java.util.List; 032 import java.util.Map; 033 034 /** 035 * Parses a document as a stream of XML tokens. It includes a special hack (as of Tapestry 5.3) to support the HTML5 doctype ({@code <!DOCTYPE html>}) 036 * as if it were the XHTML transitional doctype 037 * ({@code <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">}). 038 */ 039 public class XMLTokenStream 040 { 041 042 public static final String TRANSITIONAL_DOCTYPE = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">"; 043 044 private static final DTDData HTML5_DTD_DATA = new DTDData("html", null, null); 045 046 private final class SaxHandler implements LexicalHandler, EntityResolver, ContentHandler 047 { 048 private Locator locator; 049 050 private int currentLine = -1; 051 052 private Location cachedLocation; 053 054 private Location textLocation; 055 056 private final StringBuilder builder = new StringBuilder(); 057 058 private boolean inCDATA, insideDTD; 059 060 private List<NamespaceMapping> namespaceMappings = CollectionFactory.newList(); 061 062 private Location getLocation() 063 { 064 int line = locator.getLineNumber(); 065 066 if (currentLine != line) 067 cachedLocation = null; 068 069 if (cachedLocation == null) 070 { 071 // lineOffset accounts for the extra line when a doctype is injected. The line number reported 072 // from the XML parser inlcudes the phantom doctype line, the lineOffset is used to subtract one 073 // to get the real line number. 074 cachedLocation = new LocationImpl(resource, line + lineOffset); 075 } 076 077 return cachedLocation; 078 } 079 080 private XMLToken add(XMLTokenType type) 081 { 082 XMLToken token = new XMLToken(type, getLocation()); 083 084 tokens.add(token); 085 086 return token; 087 } 088 089 public InputSource resolveEntity(String publicId, String systemId) throws SAXException, 090 IOException 091 { 092 URL url = publicIdToURL.get(publicId); 093 094 try 095 { 096 if (url != null) 097 return new InputSource(url.openStream()); 098 } catch (IOException ex) 099 { 100 throw new SAXException(String.format("Unable to open stream for resource %s: %s", 101 url, InternalUtils.toMessage(ex)), ex); 102 } 103 104 return null; 105 } 106 107 public void comment(char[] ch, int start, int length) throws SAXException 108 { 109 if (insideDTD) 110 return; 111 112 // TODO: Coalesce? 113 add(XMLTokenType.COMMENT).text = new String(ch, start, length); 114 } 115 116 public void startCDATA() throws SAXException 117 { 118 // TODO: Flush characters? 119 120 inCDATA = true; 121 } 122 123 public void endCDATA() throws SAXException 124 { 125 if (builder.length() != 0) 126 { 127 add(XMLTokenType.CDATA).text = builder.toString(); 128 } 129 130 builder.setLength(0); 131 inCDATA = false; 132 } 133 134 public void characters(char[] ch, int start, int length) throws SAXException 135 { 136 if (inCDATA) 137 { 138 builder.append(ch, start, length); 139 return; 140 } 141 142 XMLToken token = new XMLToken(XMLTokenType.CHARACTERS, textLocation); 143 token.text = new String(ch, start, length); 144 145 tokens.add(token); 146 } 147 148 public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException 149 { 150 characters(ch, start, length); 151 } 152 153 public void startDTD(final String name, final String publicId, final String systemId) 154 throws SAXException 155 { 156 insideDTD = true; 157 158 if (!ignoreDTD) 159 { 160 DTDData data = html5DTD ? HTML5_DTD_DATA : new DTDData(name, publicId, systemId); 161 162 add(XMLTokenType.DTD).dtdData = data; 163 } 164 } 165 166 public void endDocument() throws SAXException 167 { 168 add(XMLTokenType.END_DOCUMENT); 169 } 170 171 public void endElement(String uri, String localName, String qName) throws SAXException 172 { 173 add(XMLTokenType.END_ELEMENT); 174 } 175 176 public void setDocumentLocator(Locator locator) 177 { 178 this.locator = locator; 179 } 180 181 /** 182 * Checks for the extra namespace injected when the transitional doctype is injected (which 183 * occurs when the template contains no doctype). 184 */ 185 private boolean ignoreURI(String uri) 186 { 187 return ignoreDTD && uri.equals("http://www.w3.org/1999/xhtml"); 188 } 189 190 public void startElement(String uri, String localName, String qName, Attributes attributes) 191 throws SAXException 192 { 193 XMLToken token = add(XMLTokenType.START_ELEMENT); 194 195 token.uri = ignoreURI(uri) ? "" : uri; 196 token.localName = localName; 197 token.qName = qName; 198 199 // The XML parser tends to reuse the same Attributes object, so 200 // capture the data out of it. 201 202 Attributes2 a2 = (attributes instanceof Attributes2) ? (Attributes2) attributes : null; 203 204 if (attributes.getLength() == 0) 205 { 206 token.attributes = Collections.emptyList(); 207 } else 208 { 209 token.attributes = CollectionFactory.newList(); 210 211 for (int i = 0; i < attributes.getLength(); i++) 212 { 213 // Filter out attributes that are not present in the XML input stream, but were 214 // instead provided by DTD defaulting. 215 216 if (a2 != null && !a2.isSpecified(i)) 217 { 218 continue; 219 } 220 221 String prefixedName = attributes.getQName(i); 222 223 int lastColon = prefixedName.lastIndexOf(':'); 224 225 String prefix = lastColon > 0 ? prefixedName.substring(0, lastColon) : ""; 226 227 QName qname = new QName(attributes.getURI(i), attributes.getLocalName(i), 228 prefix); 229 230 token.attributes.add(new AttributeInfo(qname, attributes.getValue(i))); 231 } 232 } 233 234 token.namespaceMappings = CollectionFactory.newList(namespaceMappings); 235 236 namespaceMappings.clear(); 237 238 // Any text collected starts here as well: 239 240 textLocation = getLocation(); 241 } 242 243 public void startPrefixMapping(String prefix, String uri) throws SAXException 244 { 245 if (ignoreDTD && prefix.equals("") && uri.equals("http://www.w3.org/1999/xhtml")) 246 { 247 return; 248 } 249 250 namespaceMappings.add(new NamespaceMapping(prefix, uri)); 251 } 252 253 public void endDTD() throws SAXException 254 { 255 insideDTD = false; 256 } 257 258 public void endEntity(String name) throws SAXException 259 { 260 } 261 262 public void startEntity(String name) throws SAXException 263 { 264 } 265 266 public void endPrefixMapping(String prefix) throws SAXException 267 { 268 } 269 270 public void processingInstruction(String target, String data) throws SAXException 271 { 272 } 273 274 public void skippedEntity(String name) throws SAXException 275 { 276 } 277 278 public void startDocument() throws SAXException 279 { 280 } 281 } 282 283 private int cursor = -1; 284 285 private final List<XMLToken> tokens = CollectionFactory.newList(); 286 287 private final Resource resource; 288 289 private final Map<String, URL> publicIdToURL; 290 291 private Location exceptionLocation; 292 293 private boolean html5DTD, ignoreDTD; 294 295 private int lineOffset; 296 297 public XMLTokenStream(Resource resource, Map<String, URL> publicIdToURL) 298 { 299 this.resource = resource; 300 this.publicIdToURL = publicIdToURL; 301 } 302 303 public void parse() throws SAXException, IOException 304 { 305 SaxHandler handler = new SaxHandler(); 306 307 XMLReader reader = XMLReaderFactory.createXMLReader(); 308 309 reader.setContentHandler(handler); 310 reader.setEntityResolver(handler); 311 reader.setProperty("http://xml.org/sax/properties/lexical-handler", handler); 312 313 InputStream stream = openStream(); 314 315 try 316 { 317 reader.parse(new InputSource(stream)); 318 } catch (IOException ex) 319 { 320 this.exceptionLocation = handler.getLocation(); 321 322 throw ex; 323 } catch (SAXException ex) 324 { 325 this.exceptionLocation = handler.getLocation(); 326 327 throw ex; 328 } catch (RuntimeException ex) 329 { 330 this.exceptionLocation = handler.getLocation(); 331 332 throw ex; 333 } finally 334 { 335 InternalUtils.close(stream); 336 } 337 } 338 339 enum State 340 { 341 MAYBE_XML, MAYBE_DOCTYPE, JUST_COPY 342 } 343 344 private InputStream openStream() throws IOException 345 { 346 InputStream rawStream = resource.openStream(); 347 348 InputStreamReader rawReader = new InputStreamReader(rawStream); 349 LineNumberReader reader = new LineNumberReader(rawReader); 350 351 ByteArrayOutputStream bos = new ByteArrayOutputStream(5000); 352 PrintWriter writer = new PrintWriter(bos); 353 354 State state = State.MAYBE_XML; 355 356 try 357 { 358 while (true) 359 { 360 String line = reader.readLine(); 361 362 if (line == null) 363 { 364 break; 365 } 366 367 switch (state) 368 { 369 370 case MAYBE_XML: 371 372 if (line.toLowerCase().startsWith("<?xml")) 373 { 374 writer.println(line); 375 state = State.MAYBE_DOCTYPE; 376 continue; 377 } 378 379 case MAYBE_DOCTYPE: 380 381 if (line.trim().length() == 0) 382 { 383 writer.println(line); 384 continue; 385 } 386 387 String lineLower = line.toLowerCase(); 388 389 if (lineLower.equals("<!doctype html>")) 390 { 391 html5DTD = true; 392 writer.println(TRANSITIONAL_DOCTYPE); 393 state = State.JUST_COPY; 394 continue; 395 } 396 397 398 if (lineLower.startsWith("<!doctype")) 399 { 400 writer.println(line); 401 state = State.JUST_COPY; 402 continue; 403 } 404 405 // No doctype, let's provide one. 406 407 ignoreDTD = true; 408 lineOffset = -1; 409 writer.println(TRANSITIONAL_DOCTYPE); 410 411 state = State.JUST_COPY; 412 413 // And drop down to writing out the actual line, and all following lines. 414 415 case JUST_COPY: 416 writer.println(line); 417 } 418 } 419 } finally 420 { 421 writer.close(); 422 reader.close(); 423 } 424 425 return new ByteArrayInputStream(bos.toByteArray()); 426 } 427 428 private XMLToken token() 429 { 430 return tokens.get(cursor); 431 } 432 433 /** 434 * Returns the type of the next token. 435 */ 436 public XMLTokenType next() 437 { 438 cursor++; 439 440 // TODO: Check for overflow? 441 442 return getEventType(); 443 } 444 445 public int getAttributeCount() 446 { 447 return token().attributes.size(); 448 } 449 450 public QName getAttributeName(int i) 451 { 452 return token().attributes.get(i).attributeName; 453 } 454 455 public DTDData getDTDInfo() 456 { 457 return token().dtdData; 458 } 459 460 public XMLTokenType getEventType() 461 { 462 return token().type; 463 } 464 465 public String getLocalName() 466 { 467 return token().localName; 468 } 469 470 public Location getLocation() 471 { 472 if (exceptionLocation != null) 473 return exceptionLocation; 474 475 return token().getLocation(); 476 } 477 478 public int getNamespaceCount() 479 { 480 return token().namespaceMappings.size(); 481 } 482 483 public String getNamespacePrefix(int i) 484 { 485 return token().namespaceMappings.get(i).prefix; 486 } 487 488 public String getNamespaceURI() 489 { 490 return token().uri; 491 } 492 493 public String getNamespaceURI(int i) 494 { 495 return token().namespaceMappings.get(i).uri; 496 } 497 498 public String getText() 499 { 500 return token().text; 501 } 502 503 public boolean hasNext() 504 { 505 return cursor < tokens.size() - 1; 506 } 507 508 public String getAttributeValue(int i) 509 { 510 return token().attributes.get(i).value; 511 } 512 513 }