001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.codec.net;
019
020 import java.io.ByteArrayOutputStream;
021 import java.io.UnsupportedEncodingException;
022 import java.util.BitSet;
023
024 import org.apache.commons.codec.BinaryDecoder;
025 import org.apache.commons.codec.BinaryEncoder;
026 import org.apache.commons.codec.DecoderException;
027 import org.apache.commons.codec.EncoderException;
028 import org.apache.commons.codec.CharEncoding;
029 import org.apache.commons.codec.StringDecoder;
030 import org.apache.commons.codec.StringEncoder;
031 import org.apache.commons.codec.binary.StringUtils;
032
033 /**
034 * <p>
035 * Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521 </a>.
036 * </p>
037 * <p>
038 * The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to
039 * printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are
040 * unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the
041 * data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable
042 * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping
043 * gateway.
044 * </p>
045 *
046 * <p>
047 * Note:
048 * </p>
049 * <p>
050 * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec
051 * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the steamable codec
052 * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy
053 * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec.
054 * </p>
055 *
056 * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One:
057 * Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a>
058 *
059 * @author Apache Software Foundation
060 * @since 1.3
061 * @version $Id: QuotedPrintableCodec.java 798333 2009-07-27 23:41:58Z ggregory $
062 */
063 public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
064 /**
065 * The default charset used for string decoding and encoding.
066 */
067 private final String charset;
068
069 /**
070 * BitSet of printable characters as defined in RFC 1521.
071 */
072 private static final BitSet PRINTABLE_CHARS = new BitSet(256);
073
074 private static final byte ESCAPE_CHAR = '=';
075
076 private static final byte TAB = 9;
077
078 private static final byte SPACE = 32;
079 // Static initializer for printable chars collection
080 static {
081 // alpha characters
082 for (int i = 33; i <= 60; i++) {
083 PRINTABLE_CHARS.set(i);
084 }
085 for (int i = 62; i <= 126; i++) {
086 PRINTABLE_CHARS.set(i);
087 }
088 PRINTABLE_CHARS.set(TAB);
089 PRINTABLE_CHARS.set(SPACE);
090 }
091
092 /**
093 * Default constructor.
094 */
095 public QuotedPrintableCodec() {
096 this(CharEncoding.UTF_8);
097 }
098
099 /**
100 * Constructor which allows for the selection of a default charset
101 *
102 * @param charset
103 * the default string charset to use.
104 */
105 public QuotedPrintableCodec(String charset) {
106 super();
107 this.charset = charset;
108 }
109
110 /**
111 * Encodes byte into its quoted-printable representation.
112 *
113 * @param b
114 * byte to encode
115 * @param buffer
116 * the buffer to write to
117 */
118 private static final void encodeQuotedPrintable(int b, ByteArrayOutputStream buffer) {
119 buffer.write(ESCAPE_CHAR);
120 char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
121 char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
122 buffer.write(hex1);
123 buffer.write(hex2);
124 }
125
126 /**
127 * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
128 *
129 * <p>
130 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
131 * RFC 1521 and is suitable for encoding binary data and unformatted text.
132 * </p>
133 *
134 * @param printable
135 * bitset of characters deemed quoted-printable
136 * @param bytes
137 * array of bytes to be encoded
138 * @return array of bytes containing quoted-printable data
139 */
140 public static final byte[] encodeQuotedPrintable(BitSet printable, byte[] bytes) {
141 if (bytes == null) {
142 return null;
143 }
144 if (printable == null) {
145 printable = PRINTABLE_CHARS;
146 }
147 ByteArrayOutputStream buffer = new ByteArrayOutputStream();
148 for (int i = 0; i < bytes.length; i++) {
149 int b = bytes[i];
150 if (b < 0) {
151 b = 256 + b;
152 }
153 if (printable.get(b)) {
154 buffer.write(b);
155 } else {
156 encodeQuotedPrintable(b, buffer);
157 }
158 }
159 return buffer.toByteArray();
160 }
161
162 /**
163 * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted
164 * back to their original representation.
165 *
166 * <p>
167 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
168 * RFC 1521.
169 * </p>
170 *
171 * @param bytes
172 * array of quoted-printable characters
173 * @return array of original bytes
174 * @throws DecoderException
175 * Thrown if quoted-printable decoding is unsuccessful
176 */
177 public static final byte[] decodeQuotedPrintable(byte[] bytes) throws DecoderException {
178 if (bytes == null) {
179 return null;
180 }
181 ByteArrayOutputStream buffer = new ByteArrayOutputStream();
182 for (int i = 0; i < bytes.length; i++) {
183 int b = bytes[i];
184 if (b == ESCAPE_CHAR) {
185 try {
186 int u = Utils.digit16(bytes[++i]);
187 int l = Utils.digit16(bytes[++i]);
188 buffer.write((char) ((u << 4) + l));
189 } catch (ArrayIndexOutOfBoundsException e) {
190 throw new DecoderException("Invalid quoted-printable encoding", e);
191 }
192 } else {
193 buffer.write(b);
194 }
195 }
196 return buffer.toByteArray();
197 }
198
199 /**
200 * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
201 *
202 * <p>
203 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
204 * RFC 1521 and is suitable for encoding binary data and unformatted text.
205 * </p>
206 *
207 * @param bytes
208 * array of bytes to be encoded
209 * @return array of bytes containing quoted-printable data
210 */
211 public byte[] encode(byte[] bytes) {
212 return encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
213 }
214
215 /**
216 * Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted
217 * back to their original representation.
218 *
219 * <p>
220 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
221 * RFC 1521.
222 * </p>
223 *
224 * @param bytes
225 * array of quoted-printable characters
226 * @return array of original bytes
227 * @throws DecoderException
228 * Thrown if quoted-printable decoding is unsuccessful
229 */
230 public byte[] decode(byte[] bytes) throws DecoderException {
231 return decodeQuotedPrintable(bytes);
232 }
233
234 /**
235 * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped.
236 *
237 * <p>
238 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
239 * RFC 1521 and is suitable for encoding binary data.
240 * </p>
241 *
242 * @param pString
243 * string to convert to quoted-printable form
244 * @return quoted-printable string
245 *
246 * @throws EncoderException
247 * Thrown if quoted-printable encoding is unsuccessful
248 *
249 * @see #getDefaultCharset()
250 */
251 public String encode(String pString) throws EncoderException {
252 if (pString == null) {
253 return null;
254 }
255 try {
256 return encode(pString, getDefaultCharset());
257 } catch (UnsupportedEncodingException e) {
258 throw new EncoderException(e.getMessage(), e);
259 }
260 }
261
262 /**
263 * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
264 * are converted back to their original representation.
265 *
266 * @param pString
267 * quoted-printable string to convert into its original form
268 * @param charset
269 * the original string charset
270 * @return original string
271 * @throws DecoderException
272 * Thrown if quoted-printable decoding is unsuccessful
273 * @throws UnsupportedEncodingException
274 * Thrown if charset is not supported
275 */
276 public String decode(String pString, String charset) throws DecoderException, UnsupportedEncodingException {
277 if (pString == null) {
278 return null;
279 }
280 return new String(decode(StringUtils.getBytesUsAscii(pString)), charset);
281 }
282
283 /**
284 * Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are
285 * converted back to their original representation.
286 *
287 * @param pString
288 * quoted-printable string to convert into its original form
289 * @return original string
290 * @throws DecoderException
291 * Thrown if quoted-printable decoding is unsuccessful.
292 * Thrown if charset is not supported.
293 * @see #getDefaultCharset()
294 */
295 public String decode(String pString) throws DecoderException {
296 if (pString == null) {
297 return null;
298 }
299 try {
300 return decode(pString, getDefaultCharset());
301 } catch (UnsupportedEncodingException e) {
302 throw new DecoderException(e.getMessage(), e);
303 }
304 }
305
306 /**
307 * Encodes an object into its quoted-printable safe form. Unsafe characters are escaped.
308 *
309 * @param pObject
310 * string to convert to a quoted-printable form
311 * @return quoted-printable object
312 * @throws EncoderException
313 * Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is
314 * unsuccessful
315 */
316 public Object encode(Object pObject) throws EncoderException {
317 if (pObject == null) {
318 return null;
319 } else if (pObject instanceof byte[]) {
320 return encode((byte[]) pObject);
321 } else if (pObject instanceof String) {
322 return encode((String) pObject);
323 } else {
324 throw new EncoderException("Objects of type " +
325 pObject.getClass().getName() +
326 " cannot be quoted-printable encoded");
327 }
328 }
329
330 /**
331 * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
332 * representation.
333 *
334 * @param pObject
335 * quoted-printable object to convert into its original form
336 * @return original object
337 * @throws DecoderException
338 * Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure condition is
339 * encountered during the decode process.
340 */
341 public Object decode(Object pObject) throws DecoderException {
342 if (pObject == null) {
343 return null;
344 } else if (pObject instanceof byte[]) {
345 return decode((byte[]) pObject);
346 } else if (pObject instanceof String) {
347 return decode((String) pObject);
348 } else {
349 throw new DecoderException("Objects of type " +
350 pObject.getClass().getName() +
351 " cannot be quoted-printable decoded");
352 }
353 }
354
355 /**
356 * Returns the default charset used for string decoding and encoding.
357 *
358 * @return the default string charset.
359 */
360 public String getDefaultCharset() {
361 return this.charset;
362 }
363
364 /**
365 * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
366 *
367 * <p>
368 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
369 * RFC 1521 and is suitable for encoding binary data and unformatted text.
370 * </p>
371 *
372 * @param pString
373 * string to convert to quoted-printable form
374 * @param charset
375 * the charset for pString
376 * @return quoted-printable string
377 *
378 * @throws UnsupportedEncodingException
379 * Thrown if the charset is not supported
380 */
381 public String encode(String pString, String charset) throws UnsupportedEncodingException {
382 if (pString == null) {
383 return null;
384 }
385 return StringUtils.newStringUsAscii(encode(pString.getBytes(charset)));
386 }
387 }