001// Copyright 2009-2013 The Apache Software Foundation 002// 003// Licensed under the Apache License, Version 2.0 (the "License"); 004// you may not use this file except in compliance with the License. 005// You may obtain a copy of the License at 006// 007// http://www.apache.org/licenses/LICENSE-2.0 008// 009// Unless required by applicable law or agreed to in writing, software 010// distributed under the License is distributed on an "AS IS" BASIS, 011// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012// See the License for the specific language governing permissions and 013// limitations under the License. 014 015package org.apache.tapestry5.internal.services; 016 017import org.apache.tapestry5.ioc.Location; 018import org.apache.tapestry5.ioc.Resource; 019import org.apache.tapestry5.ioc.internal.util.CollectionFactory; 020import org.apache.tapestry5.ioc.internal.util.InternalUtils; 021import org.apache.tapestry5.ioc.internal.util.LocationImpl; 022import org.apache.tapestry5.ioc.util.ExceptionUtils; 023import org.xml.sax.*; 024import org.xml.sax.ext.Attributes2; 025import org.xml.sax.ext.LexicalHandler; 026import org.xml.sax.helpers.XMLReaderFactory; 027 028import javax.xml.namespace.QName; 029import java.io.*; 030import java.net.URL; 031import java.util.Collections; 032import java.util.List; 033import java.util.Map; 034 035/** 036 * Parses a document as a stream of XML tokens. It includes a special hack (as of Tapestry 5.3) to support the HTML5 doctype ({@code <!DOCTYPE html>}) 037 * as if it were the XHTML transitional doctype 038 * ({@code <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">}). 039 */ 040public class XMLTokenStream 041{ 042 043 public static final String TRANSITIONAL_DOCTYPE = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">"; 044 045 private static final DTDData HTML5_DTD_DATA = new DTDData("html", null, null); 046 047 private final class SaxHandler implements LexicalHandler, EntityResolver, ContentHandler 048 { 049 private Locator locator; 050 051 private int currentLine = -1; 052 053 private Location cachedLocation; 054 055 private Location textLocation; 056 057 private final StringBuilder builder = new StringBuilder(); 058 059 private boolean inCDATA, insideDTD; 060 061 private List<NamespaceMapping> namespaceMappings = CollectionFactory.newList(); 062 063 private Location getLocation() 064 { 065 if (locator == null) 066 { 067 if (cachedLocation == null) 068 { 069 cachedLocation = new LocationImpl(resource); 070 } 071 } else { 072 int line = locator.getLineNumber(); 073 074 if (currentLine != line) 075 cachedLocation = null; 076 077 if (cachedLocation == null) 078 { 079 // lineOffset accounts for the extra line when a doctype is injected. The line number reported 080 // from the XML parser inlcudes the phantom doctype line, the lineOffset is used to subtract one 081 // to get the real line number. 082 cachedLocation = new LocationImpl(resource, line + lineOffset); 083 } 084 } 085 086 return cachedLocation; 087 } 088 089 private XMLToken add(XMLTokenType type) 090 { 091 XMLToken token = new XMLToken(type, getLocation()); 092 093 tokens.add(token); 094 095 return token; 096 } 097 098 public InputSource resolveEntity(String publicId, String systemId) throws SAXException, 099 IOException 100 { 101 URL url = publicIdToURL.get(publicId); 102 103 try 104 { 105 if (url != null) 106 return new InputSource(url.openStream()); 107 } catch (IOException ex) 108 { 109 throw new SAXException(String.format("Unable to open stream for resource %s: %s", 110 url, ExceptionUtils.toMessage(ex)), ex); 111 } 112 113 return null; 114 } 115 116 public void comment(char[] ch, int start, int length) throws SAXException 117 { 118 if (insideDTD) 119 return; 120 121 // TODO: Coalesce? 122 add(XMLTokenType.COMMENT).text = new String(ch, start, length); 123 } 124 125 public void startCDATA() throws SAXException 126 { 127 // TODO: Flush characters? 128 129 inCDATA = true; 130 } 131 132 public void endCDATA() throws SAXException 133 { 134 if (builder.length() != 0) 135 { 136 add(XMLTokenType.CDATA).text = builder.toString(); 137 } 138 139 builder.setLength(0); 140 inCDATA = false; 141 } 142 143 public void characters(char[] ch, int start, int length) throws SAXException 144 { 145 if (inCDATA) 146 { 147 builder.append(ch, start, length); 148 return; 149 } 150 151 XMLToken token = new XMLToken(XMLTokenType.CHARACTERS, textLocation); 152 token.text = new String(ch, start, length); 153 154 tokens.add(token); 155 } 156 157 public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException 158 { 159 characters(ch, start, length); 160 } 161 162 public void startDTD(final String name, final String publicId, final String systemId) 163 throws SAXException 164 { 165 insideDTD = true; 166 167 if (!ignoreDTD) 168 { 169 DTDData data = html5DTD ? HTML5_DTD_DATA : new DTDData(name, publicId, systemId); 170 171 add(XMLTokenType.DTD).dtdData = data; 172 } 173 } 174 175 public void endDocument() throws SAXException 176 { 177 add(XMLTokenType.END_DOCUMENT); 178 } 179 180 public void endElement(String uri, String localName, String qName) throws SAXException 181 { 182 add(XMLTokenType.END_ELEMENT); 183 } 184 185 public void setDocumentLocator(Locator locator) 186 { 187 this.locator = locator; 188 } 189 190 /** 191 * Checks for the extra namespace injected when the transitional doctype is injected (which 192 * occurs when the template contains no doctype). 193 */ 194 private boolean ignoreURI(String uri) 195 { 196 return ignoreDTD && uri.equals("http://www.w3.org/1999/xhtml"); 197 } 198 199 public void startElement(String uri, String localName, String qName, Attributes attributes) 200 throws SAXException 201 { 202 XMLToken token = add(XMLTokenType.START_ELEMENT); 203 204 token.uri = ignoreURI(uri) ? "" : uri; 205 token.localName = localName; 206 token.qName = qName; 207 208 // The XML parser tends to reuse the same Attributes object, so 209 // capture the data out of it. 210 211 Attributes2 a2 = (attributes instanceof Attributes2) ? (Attributes2) attributes : null; 212 213 if (attributes.getLength() == 0) 214 { 215 token.attributes = Collections.emptyList(); 216 } else 217 { 218 token.attributes = CollectionFactory.newList(); 219 220 for (int i = 0; i < attributes.getLength(); i++) 221 { 222 // Filter out attributes that are not present in the XML input stream, but were 223 // instead provided by DTD defaulting. 224 225 if (a2 != null && !a2.isSpecified(i)) 226 { 227 continue; 228 } 229 230 String prefixedName = attributes.getQName(i); 231 232 int lastColon = prefixedName.lastIndexOf(':'); 233 234 String prefix = lastColon > 0 ? prefixedName.substring(0, lastColon) : ""; 235 236 QName qname = new QName(attributes.getURI(i), attributes.getLocalName(i), 237 prefix); 238 239 token.attributes.add(new AttributeInfo(qname, attributes.getValue(i))); 240 } 241 } 242 243 token.namespaceMappings = CollectionFactory.newList(namespaceMappings); 244 245 namespaceMappings.clear(); 246 247 // Any text collected starts here as well: 248 249 textLocation = getLocation(); 250 } 251 252 public void startPrefixMapping(String prefix, String uri) throws SAXException 253 { 254 if (ignoreDTD && prefix.equals("") && uri.equals("http://www.w3.org/1999/xhtml")) 255 { 256 return; 257 } 258 259 namespaceMappings.add(new NamespaceMapping(prefix, uri)); 260 } 261 262 public void endDTD() throws SAXException 263 { 264 insideDTD = false; 265 } 266 267 public void endEntity(String name) throws SAXException 268 { 269 } 270 271 public void startEntity(String name) throws SAXException 272 { 273 } 274 275 public void endPrefixMapping(String prefix) throws SAXException 276 { 277 } 278 279 public void processingInstruction(String target, String data) throws SAXException 280 { 281 } 282 283 public void skippedEntity(String name) throws SAXException 284 { 285 } 286 287 public void startDocument() throws SAXException 288 { 289 } 290 } 291 292 private int cursor = -1; 293 294 private final List<XMLToken> tokens = CollectionFactory.newList(); 295 296 private final Resource resource; 297 298 private final Map<String, URL> publicIdToURL; 299 300 private Location exceptionLocation; 301 302 private boolean html5DTD, ignoreDTD; 303 304 private int lineOffset; 305 306 public XMLTokenStream(Resource resource, Map<String, URL> publicIdToURL) 307 { 308 this.resource = resource; 309 this.publicIdToURL = publicIdToURL; 310 } 311 312 public void parse() throws SAXException, IOException 313 { 314 SaxHandler handler = new SaxHandler(); 315 316 XMLReader reader = XMLReaderFactory.createXMLReader(); 317 318 reader.setContentHandler(handler); 319 reader.setEntityResolver(handler); 320 reader.setProperty("http://xml.org/sax/properties/lexical-handler", handler); 321 322 InputStream stream = null; 323 324 try 325 { 326 stream = openStream(); 327 reader.parse(new InputSource(stream)); 328 } catch (IOException ex) 329 { 330 this.exceptionLocation = handler.getLocation(); 331 332 throw ex; 333 } catch (SAXException ex) 334 { 335 this.exceptionLocation = handler.getLocation(); 336 337 throw ex; 338 } catch (RuntimeException ex) 339 { 340 this.exceptionLocation = handler.getLocation(); 341 342 throw ex; 343 } finally 344 { 345 InternalUtils.close(stream); 346 } 347 } 348 349 enum State 350 { 351 MAYBE_XML, MAYBE_DOCTYPE, JUST_COPY 352 } 353 354 private InputStream openStream() throws IOException 355 { 356 InputStream rawStream = resource.openStream(); 357 358 String transformationEncoding = "UTF8"; 359 360 InputStreamReader rawReader = new InputStreamReader(rawStream, transformationEncoding); 361 LineNumberReader reader = new LineNumberReader(rawReader); 362 363 ByteArrayOutputStream bos = new ByteArrayOutputStream(5000); 364 PrintWriter writer = new PrintWriter(new OutputStreamWriter(bos, transformationEncoding)); 365 366 State state = State.MAYBE_XML; 367 368 try 369 { 370 while (true) 371 { 372 String line = reader.readLine(); 373 374 if (line == null) 375 { 376 break; 377 } 378 379 switch (state) 380 { 381 382 case MAYBE_XML: 383 384 if (line.toLowerCase().startsWith("<?xml")) 385 { 386 writer.println(line); 387 state = State.MAYBE_DOCTYPE; 388 continue; 389 } 390 391 case MAYBE_DOCTYPE: 392 393 if (line.trim().length() == 0) 394 { 395 writer.println(line); 396 continue; 397 } 398 399 String lineLower = line.toLowerCase(); 400 401 if (lineLower.equals("<!doctype html>")) 402 { 403 html5DTD = true; 404 writer.println(TRANSITIONAL_DOCTYPE); 405 state = State.JUST_COPY; 406 continue; 407 } 408 409 410 if (lineLower.startsWith("<!doctype")) 411 { 412 writer.println(line); 413 state = State.JUST_COPY; 414 continue; 415 } 416 417 // No doctype, let's provide one. 418 419 ignoreDTD = true; 420 lineOffset = -1; 421 writer.println(TRANSITIONAL_DOCTYPE); 422 423 state = State.JUST_COPY; 424 425 // And drop down to writing out the actual line, and all following lines. 426 427 case JUST_COPY: 428 writer.println(line); 429 } 430 } 431 } finally 432 { 433 writer.close(); 434 reader.close(); 435 } 436 437 return new ByteArrayInputStream(bos.toByteArray()); 438 } 439 440 private XMLToken token() 441 { 442 return cursor == -1 ? null : tokens.get(cursor); 443 } 444 445 /** 446 * Returns the type of the next token. 447 */ 448 public XMLTokenType next() 449 { 450 cursor++; 451 452 // TODO: Check for overflow? 453 454 return getEventType(); 455 } 456 457 public int getAttributeCount() 458 { 459 return token().attributes.size(); 460 } 461 462 public QName getAttributeName(int i) 463 { 464 return token().attributes.get(i).attributeName; 465 } 466 467 public DTDData getDTDInfo() 468 { 469 return token().dtdData; 470 } 471 472 public XMLTokenType getEventType() 473 { 474 return token().type; 475 } 476 477 public String getLocalName() 478 { 479 return token().localName; 480 } 481 482 public Location getLocation() 483 { 484 if (exceptionLocation != null) 485 return exceptionLocation; 486 487 return token().getLocation(); 488 } 489 490 public int getNamespaceCount() 491 { 492 return token().namespaceMappings.size(); 493 } 494 495 public String getNamespacePrefix(int i) 496 { 497 return token().namespaceMappings.get(i).prefix; 498 } 499 500 public String getNamespaceURI() 501 { 502 return token().uri; 503 } 504 505 public String getNamespaceURI(int i) 506 { 507 return token().namespaceMappings.get(i).uri; 508 } 509 510 public String getText() 511 { 512 return token().text; 513 } 514 515 public boolean hasNext() 516 { 517 return cursor < tokens.size() - 1; 518 } 519 520 public String getAttributeValue(int i) 521 { 522 return token().attributes.get(i).value; 523 } 524 525}