001 // Copyright 2009, 2011 The Apache Software Foundation
002 //
003 // Licensed under the Apache License, Version 2.0 (the "License");
004 // you may not use this file except in compliance with the License.
005 // You may obtain a copy of the License at
006 //
007 // http://www.apache.org/licenses/LICENSE-2.0
008 //
009 // Unless required by applicable law or agreed to in writing, software
010 // distributed under the License is distributed on an "AS IS" BASIS,
011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012 // See the License for the specific language governing permissions and
013 // limitations under the License.
014
015 package org.apache.tapestry5.internal.services;
016
017 import org.apache.tapestry5.ioc.Location;
018 import org.apache.tapestry5.ioc.Resource;
019 import org.apache.tapestry5.ioc.internal.util.CollectionFactory;
020 import org.apache.tapestry5.ioc.internal.util.InternalUtils;
021 import org.apache.tapestry5.ioc.internal.util.LocationImpl;
022 import org.xml.sax.*;
023 import org.xml.sax.ext.LexicalHandler;
024 import org.xml.sax.helpers.XMLReaderFactory;
025
026 import javax.xml.namespace.QName;
027 import java.io.*;
028 import java.net.URL;
029 import java.util.Collections;
030 import java.util.List;
031 import java.util.Map;
032
033 /**
034 * Parses a document as a stream of XML tokens. It includes a special hack (as of Tapestry 5.3) to support the HTML5 doctype ({@code <!DOCTYPE html>})
035 * as if it were the XHTML transitional doctype
036 * ({@code <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">}).
037 */
038 public class XMLTokenStream
039 {
040
041 public static final String TRANSITIONAL_DOCTYPE = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">";
042
043 private static final DTDData HTML5_DTD_DATA = new DTDData("html", null, null);
044
045 private final class SaxHandler implements LexicalHandler, EntityResolver, ContentHandler
046 {
047 private Locator locator;
048
049 private int currentLine = -1;
050
051 private Location cachedLocation;
052
053 private Location textLocation;
054
055 private final StringBuilder builder = new StringBuilder();
056
057 private boolean inCDATA, insideDTD;
058
059 private List<NamespaceMapping> namespaceMappings = CollectionFactory.newList();
060
061 private Location getLocation()
062 {
063 int line = locator.getLineNumber();
064
065 if (currentLine != line)
066 cachedLocation = null;
067
068 if (cachedLocation == null)
069 {
070 // lineOffset accounts for the extra line when a doctype is injected. The line number reported
071 // from the XML parser inlcudes the phantom doctype line, the lineOffset is used to subtract one
072 // to get the real line number.
073 cachedLocation = new LocationImpl(resource, line + lineOffset);
074 }
075
076 return cachedLocation;
077 }
078
079 private XMLToken add(XMLTokenType type)
080 {
081 XMLToken token = new XMLToken(type, getLocation());
082
083 tokens.add(token);
084
085 return token;
086 }
087
088 public InputSource resolveEntity(String publicId, String systemId) throws SAXException,
089 IOException
090 {
091 URL url = publicIdToURL.get(publicId);
092
093 try
094 {
095 if (url != null)
096 return new InputSource(url.openStream());
097 } catch (IOException ex)
098 {
099 throw new SAXException(String.format("Unable to open stream for resource %s: %s",
100 url, InternalUtils.toMessage(ex)), ex);
101 }
102
103 return null;
104 }
105
106 public void comment(char[] ch, int start, int length) throws SAXException
107 {
108 if (insideDTD)
109 return;
110
111 // TODO: Coalesce?
112 add(XMLTokenType.COMMENT).text = new String(ch, start, length);
113 }
114
115 public void startCDATA() throws SAXException
116 {
117 // TODO: Flush characters?
118
119 inCDATA = true;
120 }
121
122 public void endCDATA() throws SAXException
123 {
124 if (builder.length() != 0)
125 {
126 add(XMLTokenType.CDATA).text = builder.toString();
127 }
128
129 builder.setLength(0);
130 inCDATA = false;
131 }
132
133 public void characters(char[] ch, int start, int length) throws SAXException
134 {
135 if (inCDATA)
136 {
137 builder.append(ch, start, length);
138 return;
139 }
140
141 XMLToken token = new XMLToken(XMLTokenType.CHARACTERS, textLocation);
142 token.text = new String(ch, start, length);
143
144 tokens.add(token);
145 }
146
147 public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException
148 {
149 characters(ch, start, length);
150 }
151
152 public void startDTD(final String name, final String publicId, final String systemId)
153 throws SAXException
154 {
155 insideDTD = true;
156
157 if (!ignoreDTD)
158 {
159 DTDData data = html5DTD ? HTML5_DTD_DATA : new DTDData(name, publicId, systemId);
160
161 add(XMLTokenType.DTD).dtdData = data;
162 }
163 }
164
165 public void endDocument() throws SAXException
166 {
167 add(XMLTokenType.END_DOCUMENT);
168 }
169
170 public void endElement(String uri, String localName, String qName) throws SAXException
171 {
172 add(XMLTokenType.END_ELEMENT);
173 }
174
175 public void setDocumentLocator(Locator locator)
176 {
177 this.locator = locator;
178 }
179
180 /**
181 * Checks for the extra namespace injected when the transitional doctype is injected (which
182 * occurs when the template contains no doctype).
183 */
184 private boolean ignoreURI(String uri)
185 {
186 return ignoreDTD && uri.equals("http://www.w3.org/1999/xhtml");
187 }
188
189 public void startElement(String uri, String localName, String qName, Attributes attributes)
190 throws SAXException
191 {
192 XMLToken token = add(XMLTokenType.START_ELEMENT);
193
194 token.uri = ignoreURI(uri) ? "" : uri;
195 token.localName = localName;
196 token.qName = qName;
197
198 // The XML parser tends to reuse the same Attributes object, so
199 // capture the data out of it.
200
201 if (attributes.getLength() == 0)
202 {
203 token.attributes = Collections.emptyList();
204 } else
205 {
206 token.attributes = CollectionFactory.newList();
207
208 for (int i = 0; i < attributes.getLength(); i++)
209 {
210 String prefixedName = attributes.getQName(i);
211
212 int lastColon = prefixedName.lastIndexOf(':');
213
214 String prefix = lastColon > 0 ? prefixedName.substring(0, lastColon) : "";
215
216 QName qname = new QName(attributes.getURI(i), attributes.getLocalName(i),
217 prefix);
218
219 token.attributes.add(new AttributeInfo(qname, attributes.getValue(i)));
220 }
221 }
222
223 token.namespaceMappings = CollectionFactory.newList(namespaceMappings);
224
225 namespaceMappings.clear();
226
227 // Any text collected starts here as well:
228
229 textLocation = getLocation();
230 }
231
232 public void startPrefixMapping(String prefix, String uri) throws SAXException
233 {
234 if (ignoreDTD && prefix.equals("") && uri.equals("http://www.w3.org/1999/xhtml"))
235 {
236 return;
237 }
238
239 namespaceMappings.add(new NamespaceMapping(prefix, uri));
240 }
241
242 public void endDTD() throws SAXException
243 {
244 insideDTD = false;
245 }
246
247 public void endEntity(String name) throws SAXException
248 {
249 }
250
251 public void startEntity(String name) throws SAXException
252 {
253 }
254
255 public void endPrefixMapping(String prefix) throws SAXException
256 {
257 }
258
259 public void processingInstruction(String target, String data) throws SAXException
260 {
261 }
262
263 public void skippedEntity(String name) throws SAXException
264 {
265 }
266
267 public void startDocument() throws SAXException
268 {
269 }
270 }
271
272 private int cursor = -1;
273
274 private final List<XMLToken> tokens = CollectionFactory.newList();
275
276 private final Resource resource;
277
278 private final Map<String, URL> publicIdToURL;
279
280 private Location exceptionLocation;
281
282 private boolean html5DTD, ignoreDTD;
283
284 private int lineOffset;
285
286 public XMLTokenStream(Resource resource, Map<String, URL> publicIdToURL)
287 {
288 this.resource = resource;
289 this.publicIdToURL = publicIdToURL;
290 }
291
292 public void parse() throws SAXException, IOException
293 {
294 SaxHandler handler = new SaxHandler();
295
296 XMLReader reader = XMLReaderFactory.createXMLReader();
297
298 reader.setContentHandler(handler);
299 reader.setEntityResolver(handler);
300 reader.setProperty("http://xml.org/sax/properties/lexical-handler", handler);
301
302 InputStream stream = openStream();
303
304 try
305 {
306 reader.parse(new InputSource(stream));
307 } catch (IOException ex)
308 {
309 this.exceptionLocation = handler.getLocation();
310
311 throw ex;
312 } catch (SAXException ex)
313 {
314 this.exceptionLocation = handler.getLocation();
315
316 throw ex;
317 } catch (RuntimeException ex)
318 {
319 this.exceptionLocation = handler.getLocation();
320
321 throw ex;
322 } finally
323 {
324 InternalUtils.close(stream);
325 }
326 }
327
328 enum State
329 {
330 MAYBE_XML, MAYBE_DOCTYPE, JUST_COPY
331 }
332
333 private InputStream openStream() throws IOException
334 {
335 InputStream rawStream = resource.openStream();
336
337 InputStreamReader rawReader = new InputStreamReader(rawStream);
338 LineNumberReader reader = new LineNumberReader(rawReader);
339
340 ByteArrayOutputStream bos = new ByteArrayOutputStream(5000);
341 PrintWriter writer = new PrintWriter(bos);
342
343 State state = State.MAYBE_XML;
344
345 try
346 {
347 while (true)
348 {
349 String line = reader.readLine();
350
351 if (line == null)
352 {
353 break;
354 }
355
356 switch (state)
357 {
358
359 case MAYBE_XML:
360
361 if (line.toLowerCase().startsWith("<?xml"))
362 {
363 writer.println(line);
364 state = State.MAYBE_DOCTYPE;
365 continue;
366 }
367
368 case MAYBE_DOCTYPE:
369
370 if (line.trim().length() == 0)
371 {
372 writer.println(line);
373 continue;
374 }
375
376 String lineLower = line.toLowerCase();
377
378 if (lineLower.equals("<!doctype html>"))
379 {
380 html5DTD = true;
381 writer.println(TRANSITIONAL_DOCTYPE);
382 state = State.JUST_COPY;
383 continue;
384 }
385
386
387 if (lineLower.startsWith("<!doctype"))
388 {
389 writer.println(line);
390 state = State.JUST_COPY;
391 continue;
392 }
393
394 // No doctype, let's provide one.
395
396 ignoreDTD = true;
397 lineOffset = -1;
398 writer.println(TRANSITIONAL_DOCTYPE);
399
400 state = State.JUST_COPY;
401
402 // And drop down to writing out the actual line, and all following lines.
403
404 case JUST_COPY:
405 writer.println(line);
406 }
407 }
408 } finally
409 {
410 writer.close();
411 reader.close();
412 }
413
414 return new ByteArrayInputStream(bos.toByteArray());
415 }
416
417 private XMLToken token()
418 {
419 return tokens.get(cursor);
420 }
421
422 /**
423 * Returns the type of the next token.
424 */
425 public XMLTokenType next()
426 {
427 cursor++;
428
429 // TODO: Check for overflow?
430
431 return getEventType();
432 }
433
434 public int getAttributeCount()
435 {
436 return token().attributes.size();
437 }
438
439 public QName getAttributeName(int i)
440 {
441 return token().attributes.get(i).attributeName;
442 }
443
444 public DTDData getDTDInfo()
445 {
446 return token().dtdData;
447 }
448
449 public XMLTokenType getEventType()
450 {
451 return token().type;
452 }
453
454 public String getLocalName()
455 {
456 return token().localName;
457 }
458
459 public Location getLocation()
460 {
461 if (exceptionLocation != null)
462 return exceptionLocation;
463
464 return token().getLocation();
465 }
466
467 public int getNamespaceCount()
468 {
469 return token().namespaceMappings.size();
470 }
471
472 public String getNamespacePrefix(int i)
473 {
474 return token().namespaceMappings.get(i).prefix;
475 }
476
477 public String getNamespaceURI()
478 {
479 return token().uri;
480 }
481
482 public String getNamespaceURI(int i)
483 {
484 return token().namespaceMappings.get(i).uri;
485 }
486
487 public String getText()
488 {
489 return token().text;
490 }
491
492 public boolean hasNext()
493 {
494 return cursor < tokens.size() - 1;
495 }
496
497 public String getAttributeValue(int i)
498 {
499 return token().attributes.get(i).value;
500 }
501
502 }