001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.codec.net;
019    
020    import java.io.ByteArrayOutputStream;
021    import java.io.UnsupportedEncodingException;
022    import java.util.BitSet;
023    
024    import org.apache.commons.codec.BinaryDecoder;
025    import org.apache.commons.codec.BinaryEncoder;
026    import org.apache.commons.codec.DecoderException;
027    import org.apache.commons.codec.EncoderException;
028    import org.apache.commons.codec.CharEncoding;
029    import org.apache.commons.codec.StringDecoder;
030    import org.apache.commons.codec.StringEncoder;
031    import org.apache.commons.codec.binary.StringUtils;
032    
033    /**
034     * <p>
035     * Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521 </a>.
036     * </p>
037     * <p>
038     * The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to
039     * printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are
040     * unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the
041     * data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable
042     * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping
043     * gateway.
044     * </p>
045     * 
046     * <p>
047     * Note:
048     * </p>
049     * <p>
050     * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec
051     * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the steamable codec
052     * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy
053     * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec.
054     * </p>
055     * 
056     * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One:
057     *          Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a>
058     * 
059     * @author Apache Software Foundation
060     * @since 1.3
061     * @version $Id: QuotedPrintableCodec.java 798333 2009-07-27 23:41:58Z ggregory $
062     */
063    public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
064        /**
065         * The default charset used for string decoding and encoding.
066         */
067        private final String charset;
068    
069        /**
070         * BitSet of printable characters as defined in RFC 1521.
071         */
072        private static final BitSet PRINTABLE_CHARS = new BitSet(256);
073    
074        private static final byte ESCAPE_CHAR = '=';
075    
076        private static final byte TAB = 9;
077    
078        private static final byte SPACE = 32;
079        // Static initializer for printable chars collection
080        static {
081            // alpha characters
082            for (int i = 33; i <= 60; i++) {
083                PRINTABLE_CHARS.set(i);
084            }
085            for (int i = 62; i <= 126; i++) {
086                PRINTABLE_CHARS.set(i);
087            }
088            PRINTABLE_CHARS.set(TAB);
089            PRINTABLE_CHARS.set(SPACE);
090        }
091    
092        /**
093         * Default constructor.
094         */
095        public QuotedPrintableCodec() {
096            this(CharEncoding.UTF_8);
097        }
098    
099        /**
100         * Constructor which allows for the selection of a default charset
101         * 
102         * @param charset
103         *                  the default string charset to use.
104         */
105        public QuotedPrintableCodec(String charset) {
106            super();
107            this.charset = charset;
108        }
109    
110        /**
111         * Encodes byte into its quoted-printable representation.
112         * 
113         * @param b
114         *                  byte to encode
115         * @param buffer
116         *                  the buffer to write to
117         */
118        private static final void encodeQuotedPrintable(int b, ByteArrayOutputStream buffer) {
119            buffer.write(ESCAPE_CHAR);
120            char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
121            char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
122            buffer.write(hex1);
123            buffer.write(hex2);
124        }
125    
126        /**
127         * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
128         * 
129         * <p>
130         * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
131         * RFC 1521 and is suitable for encoding binary data and unformatted text.
132         * </p>
133         * 
134         * @param printable
135         *                  bitset of characters deemed quoted-printable
136         * @param bytes
137         *                  array of bytes to be encoded
138         * @return array of bytes containing quoted-printable data
139         */
140        public static final byte[] encodeQuotedPrintable(BitSet printable, byte[] bytes) {
141            if (bytes == null) {
142                return null;
143            }
144            if (printable == null) {
145                printable = PRINTABLE_CHARS;
146            }
147            ByteArrayOutputStream buffer = new ByteArrayOutputStream();
148            for (int i = 0; i < bytes.length; i++) {
149                int b = bytes[i];
150                if (b < 0) {
151                    b = 256 + b;
152                }
153                if (printable.get(b)) {
154                    buffer.write(b);
155                } else {
156                    encodeQuotedPrintable(b, buffer);
157                }
158            }
159            return buffer.toByteArray();
160        }
161    
162        /**
163         * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted
164         * back to their original representation.
165         * 
166         * <p>
167         * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
168         * RFC 1521.
169         * </p>
170         * 
171         * @param bytes
172         *                  array of quoted-printable characters
173         * @return array of original bytes
174         * @throws DecoderException
175         *                  Thrown if quoted-printable decoding is unsuccessful
176         */
177        public static final byte[] decodeQuotedPrintable(byte[] bytes) throws DecoderException {
178            if (bytes == null) {
179                return null;
180            }
181            ByteArrayOutputStream buffer = new ByteArrayOutputStream();
182            for (int i = 0; i < bytes.length; i++) {
183                int b = bytes[i];
184                if (b == ESCAPE_CHAR) {
185                    try {
186                        int u = Utils.digit16(bytes[++i]);
187                        int l = Utils.digit16(bytes[++i]);
188                        buffer.write((char) ((u << 4) + l));
189                    } catch (ArrayIndexOutOfBoundsException e) {
190                        throw new DecoderException("Invalid quoted-printable encoding", e);
191                    }
192                } else {
193                    buffer.write(b);
194                }
195            }
196            return buffer.toByteArray();
197        }
198    
199        /**
200         * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
201         * 
202         * <p>
203         * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
204         * RFC 1521 and is suitable for encoding binary data and unformatted text.
205         * </p>
206         * 
207         * @param bytes
208         *                  array of bytes to be encoded
209         * @return array of bytes containing quoted-printable data
210         */
211        public byte[] encode(byte[] bytes) {
212            return encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
213        }
214    
215        /**
216         * Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted
217         * back to their original representation.
218         * 
219         * <p>
220         * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
221         * RFC 1521.
222         * </p>
223         * 
224         * @param bytes
225         *                  array of quoted-printable characters
226         * @return array of original bytes
227         * @throws DecoderException
228         *                  Thrown if quoted-printable decoding is unsuccessful
229         */
230        public byte[] decode(byte[] bytes) throws DecoderException {
231            return decodeQuotedPrintable(bytes);
232        }
233    
234        /**
235         * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped.
236         * 
237         * <p>
238         * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
239         * RFC 1521 and is suitable for encoding binary data.
240         * </p>
241         * 
242         * @param pString
243         *                  string to convert to quoted-printable form
244         * @return quoted-printable string
245         * 
246         * @throws EncoderException
247         *                  Thrown if quoted-printable encoding is unsuccessful
248         * 
249         * @see #getDefaultCharset()
250         */
251        public String encode(String pString) throws EncoderException {
252            if (pString == null) {
253                return null;
254            }
255            try {
256                return encode(pString, getDefaultCharset());
257            } catch (UnsupportedEncodingException e) {
258                throw new EncoderException(e.getMessage(), e);
259            }
260        }
261    
262        /**
263         * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
264         * are converted back to their original representation.
265         * 
266         * @param pString
267         *                  quoted-printable string to convert into its original form
268         * @param charset
269         *                  the original string charset
270         * @return original string
271         * @throws DecoderException
272         *                  Thrown if quoted-printable decoding is unsuccessful
273         * @throws UnsupportedEncodingException
274         *                  Thrown if charset is not supported
275         */
276        public String decode(String pString, String charset) throws DecoderException, UnsupportedEncodingException {
277            if (pString == null) {
278                return null;
279            }
280            return new String(decode(StringUtils.getBytesUsAscii(pString)), charset);
281        }
282    
283        /**
284         * Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are
285         * converted back to their original representation.
286         * 
287         * @param pString
288         *                  quoted-printable string to convert into its original form
289         * @return original string
290         * @throws DecoderException
291         *                  Thrown if quoted-printable decoding is unsuccessful.
292         *                  Thrown if charset is not supported.
293         * @see #getDefaultCharset()
294         */
295        public String decode(String pString) throws DecoderException {
296            if (pString == null) {
297                return null;
298            }
299            try {
300                return decode(pString, getDefaultCharset());
301            } catch (UnsupportedEncodingException e) {
302                throw new DecoderException(e.getMessage(), e);
303            }
304        }
305    
306        /**
307         * Encodes an object into its quoted-printable safe form. Unsafe characters are escaped.
308         * 
309         * @param pObject
310         *                  string to convert to a quoted-printable form
311         * @return quoted-printable object
312         * @throws EncoderException
313         *                  Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is
314         *                  unsuccessful
315         */
316        public Object encode(Object pObject) throws EncoderException {
317            if (pObject == null) {
318                return null;
319            } else if (pObject instanceof byte[]) {
320                return encode((byte[]) pObject);
321            } else if (pObject instanceof String) {
322                return encode((String) pObject);
323            } else {
324                throw new EncoderException("Objects of type " + 
325                      pObject.getClass().getName() + 
326                      " cannot be quoted-printable encoded");
327            }
328        }
329    
330        /**
331         * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
332         * representation.
333         * 
334         * @param pObject
335         *                  quoted-printable object to convert into its original form
336         * @return original object
337         * @throws DecoderException
338         *                  Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure condition is
339         *                  encountered during the decode process.
340         */
341        public Object decode(Object pObject) throws DecoderException {
342            if (pObject == null) {
343                return null;
344            } else if (pObject instanceof byte[]) {
345                return decode((byte[]) pObject);
346            } else if (pObject instanceof String) {
347                return decode((String) pObject);
348            } else {
349                throw new DecoderException("Objects of type " + 
350                      pObject.getClass().getName() + 
351                      " cannot be quoted-printable decoded");
352            }
353        }
354    
355        /**
356         * Returns the default charset used for string decoding and encoding.
357         * 
358         * @return the default string charset.
359         */
360        public String getDefaultCharset() {
361            return this.charset;
362        }
363    
364        /**
365         * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
366         * 
367         * <p>
368         * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
369         * RFC 1521 and is suitable for encoding binary data and unformatted text.
370         * </p>
371         * 
372         * @param pString
373         *                  string to convert to quoted-printable form
374         * @param charset
375         *                  the charset for pString
376         * @return quoted-printable string
377         * 
378         * @throws UnsupportedEncodingException
379         *                  Thrown if the charset is not supported
380         */
381        public String encode(String pString, String charset) throws UnsupportedEncodingException {
382            if (pString == null) {
383                return null;
384            }
385            return StringUtils.newStringUsAscii(encode(pString.getBytes(charset)));
386        }
387    }