001    // Copyright 2009, 2011 The Apache Software Foundation
002    //
003    // Licensed under the Apache License, Version 2.0 (the "License");
004    // you may not use this file except in compliance with the License.
005    // You may obtain a copy of the License at
006    //
007    //     http://www.apache.org/licenses/LICENSE-2.0
008    //
009    // Unless required by applicable law or agreed to in writing, software
010    // distributed under the License is distributed on an "AS IS" BASIS,
011    // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012    // See the License for the specific language governing permissions and
013    // limitations under the License.
014    
015    package org.apache.tapestry5.internal.services;
016    
017    import org.apache.tapestry5.ioc.Location;
018    import org.apache.tapestry5.ioc.Resource;
019    import org.apache.tapestry5.ioc.internal.util.CollectionFactory;
020    import org.apache.tapestry5.ioc.internal.util.InternalUtils;
021    import org.apache.tapestry5.ioc.internal.util.LocationImpl;
022    import org.xml.sax.*;
023    import org.xml.sax.ext.LexicalHandler;
024    import org.xml.sax.helpers.XMLReaderFactory;
025    
026    import javax.xml.namespace.QName;
027    import java.io.*;
028    import java.net.URL;
029    import java.util.Collections;
030    import java.util.List;
031    import java.util.Map;
032    
033    /**
034     * Parses a document as a stream of XML tokens. It includes a special hack (as of Tapestry 5.3) to support the HTML5 doctype ({@code <!DOCTYPE html>})
035     * as if it were the XHTML transitional doctype
036     * ({@code <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">}).
037     */
038    public class XMLTokenStream
039    {
040    
041        public static final String TRANSITIONAL_DOCTYPE = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">";
042    
043        private static final DTDData HTML5_DTD_DATA = new DTDData("html", null, null);
044    
045        private final class SaxHandler implements LexicalHandler, EntityResolver, ContentHandler
046        {
047            private Locator locator;
048    
049            private int currentLine = -1;
050    
051            private Location cachedLocation;
052    
053            private Location textLocation;
054    
055            private final StringBuilder builder = new StringBuilder();
056    
057            private boolean inCDATA, insideDTD;
058    
059            private List<NamespaceMapping> namespaceMappings = CollectionFactory.newList();
060    
061            private Location getLocation()
062            {
063                int line = locator.getLineNumber();
064    
065                if (currentLine != line)
066                    cachedLocation = null;
067    
068                if (cachedLocation == null)
069                {
070                    // lineOffset accounts for the extra line when a doctype is injected. The line number reported
071                    // from the XML parser inlcudes the phantom doctype line, the lineOffset is used to subtract one
072                    // to get the real line number.
073                    cachedLocation = new LocationImpl(resource, line + lineOffset);
074                }
075    
076                return cachedLocation;
077            }
078    
079            private XMLToken add(XMLTokenType type)
080            {
081                XMLToken token = new XMLToken(type, getLocation());
082    
083                tokens.add(token);
084    
085                return token;
086            }
087    
088            public InputSource resolveEntity(String publicId, String systemId) throws SAXException,
089                    IOException
090            {
091                URL url = publicIdToURL.get(publicId);
092    
093                try
094                {
095                    if (url != null)
096                        return new InputSource(url.openStream());
097                } catch (IOException ex)
098                {
099                    throw new SAXException(String.format("Unable to open stream for resource %s: %s",
100                            url, InternalUtils.toMessage(ex)), ex);
101                }
102    
103                return null;
104            }
105    
106            public void comment(char[] ch, int start, int length) throws SAXException
107            {
108                if (insideDTD)
109                    return;
110    
111                // TODO: Coalesce?
112                add(XMLTokenType.COMMENT).text = new String(ch, start, length);
113            }
114    
115            public void startCDATA() throws SAXException
116            {
117                // TODO: Flush characters?
118    
119                inCDATA = true;
120            }
121    
122            public void endCDATA() throws SAXException
123            {
124                if (builder.length() != 0)
125                {
126                    add(XMLTokenType.CDATA).text = builder.toString();
127                }
128    
129                builder.setLength(0);
130                inCDATA = false;
131            }
132    
133            public void characters(char[] ch, int start, int length) throws SAXException
134            {
135                if (inCDATA)
136                {
137                    builder.append(ch, start, length);
138                    return;
139                }
140    
141                XMLToken token = new XMLToken(XMLTokenType.CHARACTERS, textLocation);
142                token.text = new String(ch, start, length);
143    
144                tokens.add(token);
145            }
146    
147            public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException
148            {
149                characters(ch, start, length);
150            }
151    
152            public void startDTD(final String name, final String publicId, final String systemId)
153                    throws SAXException
154            {
155                insideDTD = true;
156    
157                if (!ignoreDTD)
158                {
159                    DTDData data = html5DTD ? HTML5_DTD_DATA : new DTDData(name, publicId, systemId);
160    
161                    add(XMLTokenType.DTD).dtdData = data;
162                }
163            }
164    
165            public void endDocument() throws SAXException
166            {
167                add(XMLTokenType.END_DOCUMENT);
168            }
169    
170            public void endElement(String uri, String localName, String qName) throws SAXException
171            {
172                add(XMLTokenType.END_ELEMENT);
173            }
174    
175            public void setDocumentLocator(Locator locator)
176            {
177                this.locator = locator;
178            }
179    
180            /**
181             * Checks for the extra namespace injected when the transitional doctype is injected (which
182             * occurs when the template contains no doctype).
183             */
184            private boolean ignoreURI(String uri)
185            {
186                return ignoreDTD && uri.equals("http://www.w3.org/1999/xhtml");
187            }
188    
189            public void startElement(String uri, String localName, String qName, Attributes attributes)
190                    throws SAXException
191            {
192                XMLToken token = add(XMLTokenType.START_ELEMENT);
193    
194                token.uri = ignoreURI(uri) ? "" : uri;
195                token.localName = localName;
196                token.qName = qName;
197    
198                // The XML parser tends to reuse the same Attributes object, so
199                // capture the data out of it.
200    
201                if (attributes.getLength() == 0)
202                {
203                    token.attributes = Collections.emptyList();
204                } else
205                {
206                    token.attributes = CollectionFactory.newList();
207    
208                    for (int i = 0; i < attributes.getLength(); i++)
209                    {
210                        String prefixedName = attributes.getQName(i);
211    
212                        int lastColon = prefixedName.lastIndexOf(':');
213    
214                        String prefix = lastColon > 0 ? prefixedName.substring(0, lastColon) : "";
215    
216                        QName qname = new QName(attributes.getURI(i), attributes.getLocalName(i),
217                                prefix);
218    
219                        token.attributes.add(new AttributeInfo(qname, attributes.getValue(i)));
220                    }
221                }
222    
223                token.namespaceMappings = CollectionFactory.newList(namespaceMappings);
224    
225                namespaceMappings.clear();
226    
227                // Any text collected starts here as well:
228    
229                textLocation = getLocation();
230            }
231    
232            public void startPrefixMapping(String prefix, String uri) throws SAXException
233            {
234                if (ignoreDTD && prefix.equals("") && uri.equals("http://www.w3.org/1999/xhtml"))
235                {
236                    return;
237                }
238    
239                namespaceMappings.add(new NamespaceMapping(prefix, uri));
240            }
241    
242            public void endDTD() throws SAXException
243            {
244                insideDTD = false;
245            }
246    
247            public void endEntity(String name) throws SAXException
248            {
249            }
250    
251            public void startEntity(String name) throws SAXException
252            {
253            }
254    
255            public void endPrefixMapping(String prefix) throws SAXException
256            {
257            }
258    
259            public void processingInstruction(String target, String data) throws SAXException
260            {
261            }
262    
263            public void skippedEntity(String name) throws SAXException
264            {
265            }
266    
267            public void startDocument() throws SAXException
268            {
269            }
270        }
271    
272        private int cursor = -1;
273    
274        private final List<XMLToken> tokens = CollectionFactory.newList();
275    
276        private final Resource resource;
277    
278        private final Map<String, URL> publicIdToURL;
279    
280        private Location exceptionLocation;
281    
282        private boolean html5DTD, ignoreDTD;
283    
284        private int lineOffset;
285    
286        public XMLTokenStream(Resource resource, Map<String, URL> publicIdToURL)
287        {
288            this.resource = resource;
289            this.publicIdToURL = publicIdToURL;
290        }
291    
292        public void parse() throws SAXException, IOException
293        {
294            SaxHandler handler = new SaxHandler();
295    
296            XMLReader reader = XMLReaderFactory.createXMLReader();
297    
298            reader.setContentHandler(handler);
299            reader.setEntityResolver(handler);
300            reader.setProperty("http://xml.org/sax/properties/lexical-handler", handler);
301    
302            InputStream stream = openStream();
303    
304            try
305            {
306                reader.parse(new InputSource(stream));
307            } catch (IOException ex)
308            {
309                this.exceptionLocation = handler.getLocation();
310    
311                throw ex;
312            } catch (SAXException ex)
313            {
314                this.exceptionLocation = handler.getLocation();
315    
316                throw ex;
317            } catch (RuntimeException ex)
318            {
319                this.exceptionLocation = handler.getLocation();
320    
321                throw ex;
322            } finally
323            {
324                InternalUtils.close(stream);
325            }
326        }
327    
328        enum State
329        {
330            MAYBE_XML, MAYBE_DOCTYPE, JUST_COPY
331        }
332    
333        private InputStream openStream() throws IOException
334        {
335            InputStream rawStream = resource.openStream();
336    
337            InputStreamReader rawReader = new InputStreamReader(rawStream);
338            LineNumberReader reader = new LineNumberReader(rawReader);
339    
340            ByteArrayOutputStream bos = new ByteArrayOutputStream(5000);
341            PrintWriter writer = new PrintWriter(bos);
342    
343            State state = State.MAYBE_XML;
344    
345            try
346            {
347                while (true)
348                {
349                    String line = reader.readLine();
350    
351                    if (line == null)
352                    {
353                        break;
354                    }
355    
356                    switch (state)
357                    {
358    
359                        case MAYBE_XML:
360    
361                            if (line.toLowerCase().startsWith("<?xml"))
362                            {
363                                writer.println(line);
364                                state = State.MAYBE_DOCTYPE;
365                                continue;
366                            }
367    
368                        case MAYBE_DOCTYPE:
369    
370                            if (line.trim().length() == 0)
371                            {
372                                writer.println(line);
373                                continue;
374                            }
375    
376                            String lineLower = line.toLowerCase();
377    
378                            if (lineLower.equals("<!doctype html>"))
379                            {
380                                html5DTD = true;
381                                writer.println(TRANSITIONAL_DOCTYPE);
382                                state = State.JUST_COPY;
383                                continue;
384                            }
385    
386    
387                            if (lineLower.startsWith("<!doctype"))
388                            {
389                                writer.println(line);
390                                state = State.JUST_COPY;
391                                continue;
392                            }
393    
394                            // No doctype, let's provide one.
395    
396                            ignoreDTD = true;
397                            lineOffset = -1;
398                            writer.println(TRANSITIONAL_DOCTYPE);
399    
400                            state = State.JUST_COPY;
401    
402                            // And drop down to writing out the actual line, and all following lines.
403    
404                        case JUST_COPY:
405                            writer.println(line);
406                    }
407                }
408            } finally
409            {
410                writer.close();
411                reader.close();
412            }
413    
414            return new ByteArrayInputStream(bos.toByteArray());
415        }
416    
417        private XMLToken token()
418        {
419            return tokens.get(cursor);
420        }
421    
422        /**
423         * Returns the type of the next token.
424         */
425        public XMLTokenType next()
426        {
427            cursor++;
428    
429            // TODO: Check for overflow?
430    
431            return getEventType();
432        }
433    
434        public int getAttributeCount()
435        {
436            return token().attributes.size();
437        }
438    
439        public QName getAttributeName(int i)
440        {
441            return token().attributes.get(i).attributeName;
442        }
443    
444        public DTDData getDTDInfo()
445        {
446            return token().dtdData;
447        }
448    
449        public XMLTokenType getEventType()
450        {
451            return token().type;
452        }
453    
454        public String getLocalName()
455        {
456            return token().localName;
457        }
458    
459        public Location getLocation()
460        {
461            if (exceptionLocation != null)
462                return exceptionLocation;
463    
464            return token().getLocation();
465        }
466    
467        public int getNamespaceCount()
468        {
469            return token().namespaceMappings.size();
470        }
471    
472        public String getNamespacePrefix(int i)
473        {
474            return token().namespaceMappings.get(i).prefix;
475        }
476    
477        public String getNamespaceURI()
478        {
479            return token().uri;
480        }
481    
482        public String getNamespaceURI(int i)
483        {
484            return token().namespaceMappings.get(i).uri;
485        }
486    
487        public String getText()
488        {
489            return token().text;
490        }
491    
492        public boolean hasNext()
493        {
494            return cursor < tokens.size() - 1;
495        }
496    
497        public String getAttributeValue(int i)
498        {
499            return token().attributes.get(i).value;
500        }
501    
502    }