001 // Copyright 2009, 2011 The Apache Software Foundation 002 // 003 // Licensed under the Apache License, Version 2.0 (the "License"); 004 // you may not use this file except in compliance with the License. 005 // You may obtain a copy of the License at 006 // 007 // http://www.apache.org/licenses/LICENSE-2.0 008 // 009 // Unless required by applicable law or agreed to in writing, software 010 // distributed under the License is distributed on an "AS IS" BASIS, 011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 // See the License for the specific language governing permissions and 013 // limitations under the License. 014 015 package org.apache.tapestry5.internal.services; 016 017 import org.apache.tapestry5.ioc.Location; 018 import org.apache.tapestry5.ioc.Resource; 019 import org.apache.tapestry5.ioc.internal.util.CollectionFactory; 020 import org.apache.tapestry5.ioc.internal.util.InternalUtils; 021 import org.apache.tapestry5.ioc.internal.util.LocationImpl; 022 import org.xml.sax.*; 023 import org.xml.sax.ext.LexicalHandler; 024 import org.xml.sax.helpers.XMLReaderFactory; 025 026 import javax.xml.namespace.QName; 027 import java.io.*; 028 import java.net.URL; 029 import java.util.Collections; 030 import java.util.List; 031 import java.util.Map; 032 033 /** 034 * Parses a document as a stream of XML tokens. It includes a special hack (as of Tapestry 5.3) to support the HTML5 doctype ({@code <!DOCTYPE html>}) 035 * as if it were the XHTML transitional doctype 036 * ({@code <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">}). 037 */ 038 public class XMLTokenStream 039 { 040 041 public static final String TRANSITIONAL_DOCTYPE = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">"; 042 043 private static final DTDData HTML5_DTD_DATA = new DTDData("html", null, null); 044 045 private final class SaxHandler implements LexicalHandler, EntityResolver, ContentHandler 046 { 047 private Locator locator; 048 049 private int currentLine = -1; 050 051 private Location cachedLocation; 052 053 private Location textLocation; 054 055 private final StringBuilder builder = new StringBuilder(); 056 057 private boolean inCDATA, insideDTD; 058 059 private List<NamespaceMapping> namespaceMappings = CollectionFactory.newList(); 060 061 private Location getLocation() 062 { 063 int line = locator.getLineNumber(); 064 065 if (currentLine != line) 066 cachedLocation = null; 067 068 if (cachedLocation == null) 069 { 070 // lineOffset accounts for the extra line when a doctype is injected. The line number reported 071 // from the XML parser inlcudes the phantom doctype line, the lineOffset is used to subtract one 072 // to get the real line number. 073 cachedLocation = new LocationImpl(resource, line + lineOffset); 074 } 075 076 return cachedLocation; 077 } 078 079 private XMLToken add(XMLTokenType type) 080 { 081 XMLToken token = new XMLToken(type, getLocation()); 082 083 tokens.add(token); 084 085 return token; 086 } 087 088 public InputSource resolveEntity(String publicId, String systemId) throws SAXException, 089 IOException 090 { 091 URL url = publicIdToURL.get(publicId); 092 093 try 094 { 095 if (url != null) 096 return new InputSource(url.openStream()); 097 } catch (IOException ex) 098 { 099 throw new SAXException(String.format("Unable to open stream for resource %s: %s", 100 url, InternalUtils.toMessage(ex)), ex); 101 } 102 103 return null; 104 } 105 106 public void comment(char[] ch, int start, int length) throws SAXException 107 { 108 if (insideDTD) 109 return; 110 111 // TODO: Coalesce? 112 add(XMLTokenType.COMMENT).text = new String(ch, start, length); 113 } 114 115 public void startCDATA() throws SAXException 116 { 117 // TODO: Flush characters? 118 119 inCDATA = true; 120 } 121 122 public void endCDATA() throws SAXException 123 { 124 if (builder.length() != 0) 125 { 126 add(XMLTokenType.CDATA).text = builder.toString(); 127 } 128 129 builder.setLength(0); 130 inCDATA = false; 131 } 132 133 public void characters(char[] ch, int start, int length) throws SAXException 134 { 135 if (inCDATA) 136 { 137 builder.append(ch, start, length); 138 return; 139 } 140 141 XMLToken token = new XMLToken(XMLTokenType.CHARACTERS, textLocation); 142 token.text = new String(ch, start, length); 143 144 tokens.add(token); 145 } 146 147 public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException 148 { 149 characters(ch, start, length); 150 } 151 152 public void startDTD(final String name, final String publicId, final String systemId) 153 throws SAXException 154 { 155 insideDTD = true; 156 157 if (!ignoreDTD) 158 { 159 DTDData data = html5DTD ? HTML5_DTD_DATA : new DTDData(name, publicId, systemId); 160 161 add(XMLTokenType.DTD).dtdData = data; 162 } 163 } 164 165 public void endDocument() throws SAXException 166 { 167 add(XMLTokenType.END_DOCUMENT); 168 } 169 170 public void endElement(String uri, String localName, String qName) throws SAXException 171 { 172 add(XMLTokenType.END_ELEMENT); 173 } 174 175 public void setDocumentLocator(Locator locator) 176 { 177 this.locator = locator; 178 } 179 180 /** 181 * Checks for the extra namespace injected when the transitional doctype is injected (which 182 * occurs when the template contains no doctype). 183 */ 184 private boolean ignoreURI(String uri) 185 { 186 return ignoreDTD && uri.equals("http://www.w3.org/1999/xhtml"); 187 } 188 189 public void startElement(String uri, String localName, String qName, Attributes attributes) 190 throws SAXException 191 { 192 XMLToken token = add(XMLTokenType.START_ELEMENT); 193 194 token.uri = ignoreURI(uri) ? "" : uri; 195 token.localName = localName; 196 token.qName = qName; 197 198 // The XML parser tends to reuse the same Attributes object, so 199 // capture the data out of it. 200 201 if (attributes.getLength() == 0) 202 { 203 token.attributes = Collections.emptyList(); 204 } else 205 { 206 token.attributes = CollectionFactory.newList(); 207 208 for (int i = 0; i < attributes.getLength(); i++) 209 { 210 String prefixedName = attributes.getQName(i); 211 212 int lastColon = prefixedName.lastIndexOf(':'); 213 214 String prefix = lastColon > 0 ? prefixedName.substring(0, lastColon) : ""; 215 216 QName qname = new QName(attributes.getURI(i), attributes.getLocalName(i), 217 prefix); 218 219 token.attributes.add(new AttributeInfo(qname, attributes.getValue(i))); 220 } 221 } 222 223 token.namespaceMappings = CollectionFactory.newList(namespaceMappings); 224 225 namespaceMappings.clear(); 226 227 // Any text collected starts here as well: 228 229 textLocation = getLocation(); 230 } 231 232 public void startPrefixMapping(String prefix, String uri) throws SAXException 233 { 234 if (ignoreDTD && prefix.equals("") && uri.equals("http://www.w3.org/1999/xhtml")) 235 { 236 return; 237 } 238 239 namespaceMappings.add(new NamespaceMapping(prefix, uri)); 240 } 241 242 public void endDTD() throws SAXException 243 { 244 insideDTD = false; 245 } 246 247 public void endEntity(String name) throws SAXException 248 { 249 } 250 251 public void startEntity(String name) throws SAXException 252 { 253 } 254 255 public void endPrefixMapping(String prefix) throws SAXException 256 { 257 } 258 259 public void processingInstruction(String target, String data) throws SAXException 260 { 261 } 262 263 public void skippedEntity(String name) throws SAXException 264 { 265 } 266 267 public void startDocument() throws SAXException 268 { 269 } 270 } 271 272 private int cursor = -1; 273 274 private final List<XMLToken> tokens = CollectionFactory.newList(); 275 276 private final Resource resource; 277 278 private final Map<String, URL> publicIdToURL; 279 280 private Location exceptionLocation; 281 282 private boolean html5DTD, ignoreDTD; 283 284 private int lineOffset; 285 286 public XMLTokenStream(Resource resource, Map<String, URL> publicIdToURL) 287 { 288 this.resource = resource; 289 this.publicIdToURL = publicIdToURL; 290 } 291 292 public void parse() throws SAXException, IOException 293 { 294 SaxHandler handler = new SaxHandler(); 295 296 XMLReader reader = XMLReaderFactory.createXMLReader(); 297 298 reader.setContentHandler(handler); 299 reader.setEntityResolver(handler); 300 reader.setProperty("http://xml.org/sax/properties/lexical-handler", handler); 301 302 InputStream stream = openStream(); 303 304 try 305 { 306 reader.parse(new InputSource(stream)); 307 } catch (IOException ex) 308 { 309 this.exceptionLocation = handler.getLocation(); 310 311 throw ex; 312 } catch (SAXException ex) 313 { 314 this.exceptionLocation = handler.getLocation(); 315 316 throw ex; 317 } catch (RuntimeException ex) 318 { 319 this.exceptionLocation = handler.getLocation(); 320 321 throw ex; 322 } finally 323 { 324 InternalUtils.close(stream); 325 } 326 } 327 328 enum State 329 { 330 MAYBE_XML, MAYBE_DOCTYPE, JUST_COPY 331 } 332 333 private InputStream openStream() throws IOException 334 { 335 InputStream rawStream = resource.openStream(); 336 337 InputStreamReader rawReader = new InputStreamReader(rawStream); 338 LineNumberReader reader = new LineNumberReader(rawReader); 339 340 ByteArrayOutputStream bos = new ByteArrayOutputStream(5000); 341 PrintWriter writer = new PrintWriter(bos); 342 343 State state = State.MAYBE_XML; 344 345 try 346 { 347 while (true) 348 { 349 String line = reader.readLine(); 350 351 if (line == null) 352 { 353 break; 354 } 355 356 switch (state) 357 { 358 359 case MAYBE_XML: 360 361 if (line.toLowerCase().startsWith("<?xml")) 362 { 363 writer.println(line); 364 state = State.MAYBE_DOCTYPE; 365 continue; 366 } 367 368 case MAYBE_DOCTYPE: 369 370 if (line.trim().length() == 0) 371 { 372 writer.println(line); 373 continue; 374 } 375 376 String lineLower = line.toLowerCase(); 377 378 if (lineLower.equals("<!doctype html>")) 379 { 380 html5DTD = true; 381 writer.println(TRANSITIONAL_DOCTYPE); 382 state = State.JUST_COPY; 383 continue; 384 } 385 386 387 if (lineLower.startsWith("<!doctype")) 388 { 389 writer.println(line); 390 state = State.JUST_COPY; 391 continue; 392 } 393 394 // No doctype, let's provide one. 395 396 ignoreDTD = true; 397 lineOffset = -1; 398 writer.println(TRANSITIONAL_DOCTYPE); 399 400 state = State.JUST_COPY; 401 402 // And drop down to writing out the actual line, and all following lines. 403 404 case JUST_COPY: 405 writer.println(line); 406 } 407 } 408 } finally 409 { 410 writer.close(); 411 reader.close(); 412 } 413 414 return new ByteArrayInputStream(bos.toByteArray()); 415 } 416 417 private XMLToken token() 418 { 419 return tokens.get(cursor); 420 } 421 422 /** 423 * Returns the type of the next token. 424 */ 425 public XMLTokenType next() 426 { 427 cursor++; 428 429 // TODO: Check for overflow? 430 431 return getEventType(); 432 } 433 434 public int getAttributeCount() 435 { 436 return token().attributes.size(); 437 } 438 439 public QName getAttributeName(int i) 440 { 441 return token().attributes.get(i).attributeName; 442 } 443 444 public DTDData getDTDInfo() 445 { 446 return token().dtdData; 447 } 448 449 public XMLTokenType getEventType() 450 { 451 return token().type; 452 } 453 454 public String getLocalName() 455 { 456 return token().localName; 457 } 458 459 public Location getLocation() 460 { 461 if (exceptionLocation != null) 462 return exceptionLocation; 463 464 return token().getLocation(); 465 } 466 467 public int getNamespaceCount() 468 { 469 return token().namespaceMappings.size(); 470 } 471 472 public String getNamespacePrefix(int i) 473 { 474 return token().namespaceMappings.get(i).prefix; 475 } 476 477 public String getNamespaceURI() 478 { 479 return token().uri; 480 } 481 482 public String getNamespaceURI(int i) 483 { 484 return token().namespaceMappings.get(i).uri; 485 } 486 487 public String getText() 488 { 489 return token().text; 490 } 491 492 public boolean hasNext() 493 { 494 return cursor < tokens.size() - 1; 495 } 496 497 public String getAttributeValue(int i) 498 { 499 return token().attributes.get(i).value; 500 } 501 502 }