001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.codec.binary;
019
020 import java.math.BigInteger;
021
022 import org.apache.commons.codec.BinaryDecoder;
023 import org.apache.commons.codec.BinaryEncoder;
024 import org.apache.commons.codec.DecoderException;
025 import org.apache.commons.codec.EncoderException;
026
027 /**
028 * Provides Base64 encoding and decoding as defined by RFC 2045.
029 *
030 * <p>
031 * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose
032 * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein.
033 * </p>
034 * <p>
035 * The class can be parameterized in the following manner with various constructors:
036 * <ul>
037 * <li>URL-safe mode: Default off.</li>
038 * <li>Line length: Default 76. Line length that aren't multiples of 4 will still essentially end up being multiples of
039 * 4 in the encoded data.
040 * <li>Line separator: Default is CRLF ("\r\n")</li>
041 * </ul>
042 * </p>
043 * <p>
044 * Since this class operates directly on byte streams, and not character streams, it is hard-coded to only encode/decode
045 * character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, etc).
046 * </p>
047 *
048 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
049 * @author Apache Software Foundation
050 * @since 1.0
051 * @version $Id: Base64.java 801706 2009-08-06 16:27:06Z niallp $
052 */
053 public class Base64 implements BinaryEncoder, BinaryDecoder {
054 private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2;
055
056 private static final int DEFAULT_BUFFER_SIZE = 8192;
057
058 /**
059 * Chunk size per RFC 2045 section 6.8.
060 *
061 * <p>
062 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
063 * equal signs.
064 * </p>
065 *
066 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
067 */
068 static final int CHUNK_SIZE = 76;
069
070 /**
071 * Chunk separator per RFC 2045 section 2.1.
072 *
073 * <p>
074 * N.B. The next major release may break compatibility and make this field private.
075 * </p>
076 *
077 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a>
078 */
079 static final byte[] CHUNK_SEPARATOR = {'\r', '\n'};
080
081 /**
082 * This array is a lookup table that translates 6-bit positive integer index values into their "Base64 Alphabet"
083 * equivalents as specified in Table 1 of RFC 2045.
084 *
085 * Thanks to "commons" project in ws.apache.org for this code.
086 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
087 */
088 private static final byte[] STANDARD_ENCODE_TABLE = {
089 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
090 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
091 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
092 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
093 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
094 };
095
096 /**
097 * This is a copy of the STANDARD_ENCODE_TABLE above, but with + and /
098 * changed to - and _ to make the encoded Base64 results more URL-SAFE.
099 * This table is only used when the Base64's mode is set to URL-SAFE.
100 */
101 private static final byte[] URL_SAFE_ENCODE_TABLE = {
102 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
103 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
104 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
105 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
106 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_'
107 };
108
109 /**
110 * Byte used to pad output.
111 */
112 private static final byte PAD = '=';
113
114 /**
115 * This array is a lookup table that translates Unicode characters drawn from the "Base64 Alphabet" (as specified in
116 * Table 1 of RFC 2045) into their 6-bit positive integer equivalents. Characters that are not in the Base64
117 * alphabet but fall within the bounds of the array are translated to -1.
118 *
119 * Note: '+' and '-' both decode to 62. '/' and '_' both decode to 63. This means decoder seamlessly handles both
120 * URL_SAFE and STANDARD base64. (The encoder, on the other hand, needs to know ahead of time what to emit).
121 *
122 * Thanks to "commons" project in ws.apache.org for this code.
123 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
124 */
125 private static final byte[] DECODE_TABLE = {
126 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
127 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
128 -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, 52, 53, 54,
129 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4,
130 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
131 24, 25, -1, -1, -1, -1, 63, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34,
132 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51
133 };
134
135 /** Mask used to extract 6 bits, used when encoding */
136 private static final int MASK_6BITS = 0x3f;
137
138 /** Mask used to extract 8 bits, used in decoding base64 bytes */
139 private static final int MASK_8BITS = 0xff;
140
141 // The static final fields above are used for the original static byte[] methods on Base64.
142 // The private member fields below are used with the new streaming approach, which requires
143 // some state be preserved between calls of encode() and decode().
144
145 /**
146 * Encode table to use: either STANDARD or URL_SAFE. Note: the DECODE_TABLE above remains static because it is able
147 * to decode both STANDARD and URL_SAFE streams, but the encodeTable must be a member variable so we can switch
148 * between the two modes.
149 */
150 private final byte[] encodeTable;
151
152 /**
153 * Line length for encoding. Not used when decoding. A value of zero or less implies no chunking of the base64
154 * encoded data.
155 */
156 private final int lineLength;
157
158 /**
159 * Line separator for encoding. Not used when decoding. Only used if lineLength > 0.
160 */
161 private final byte[] lineSeparator;
162
163 /**
164 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing.
165 * <code>decodeSize = 3 + lineSeparator.length;</code>
166 */
167 private final int decodeSize;
168
169 /**
170 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing.
171 * <code>encodeSize = 4 + lineSeparator.length;</code>
172 */
173 private final int encodeSize;
174
175 /**
176 * Buffer for streaming.
177 */
178 private byte[] buffer;
179
180 /**
181 * Position where next character should be written in the buffer.
182 */
183 private int pos;
184
185 /**
186 * Position where next character should be read from the buffer.
187 */
188 private int readPos;
189
190 /**
191 * Variable tracks how many characters have been written to the current line. Only used when encoding. We use it to
192 * make sure each encoded line never goes beyond lineLength (if lineLength > 0).
193 */
194 private int currentLinePos;
195
196 /**
197 * Writes to the buffer only occur after every 3 reads when encoding, an every 4 reads when decoding. This variable
198 * helps track that.
199 */
200 private int modulus;
201
202 /**
203 * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this Base64 object becomes useless,
204 * and must be thrown away.
205 */
206 private boolean eof;
207
208 /**
209 * Place holder for the 3 bytes we're dealing with for our base64 logic. Bitwise operations store and extract the
210 * base64 encoding or decoding from this variable.
211 */
212 private int x;
213
214 /**
215 * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
216 * <p>
217 * When encoding the line length is 76, the line separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE.
218 * </p>
219 *
220 * <p>
221 * When decoding all variants are supported.
222 * </p>
223 */
224 public Base64() {
225 this(false);
226 }
227
228 /**
229 * Creates a Base64 codec used for decoding (all modes) and encoding in the given URL-safe mode.
230 * <p>
231 * When encoding the line length is 76, the line separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE.
232 * </p>
233 *
234 * <p>
235 * When decoding all variants are supported.
236 * </p>
237 *
238 * @param urlSafe
239 * if <code>true</code>, URL-safe encoding is used. In most cases this should be set to
240 * <code>false</code>.
241 * @since 1.4
242 */
243 public Base64(boolean urlSafe) {
244 this(CHUNK_SIZE, CHUNK_SEPARATOR, urlSafe);
245 }
246
247 /**
248 * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
249 * <p>
250 * When encoding the line length is given in the constructor, the line separator is CRLF, and the encoding table is
251 * STANDARD_ENCODE_TABLE.
252 * </p>
253 * <p>
254 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
255 * </p>
256 * <p>
257 * When decoding all variants are supported.
258 * </p>
259 *
260 * @param lineLength
261 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 4).
262 * If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when decoding.
263 * @since 1.4
264 */
265 public Base64(int lineLength) {
266 this(lineLength, CHUNK_SEPARATOR);
267 }
268
269 /**
270 * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
271 * <p>
272 * When encoding the line length and line separator are given in the constructor, and the encoding table is
273 * STANDARD_ENCODE_TABLE.
274 * </p>
275 * <p>
276 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
277 * </p>
278 * <p>
279 * When decoding all variants are supported.
280 * </p>
281 *
282 * @param lineLength
283 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 4).
284 * If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when decoding.
285 * @param lineSeparator
286 * Each line of encoded data will end with this sequence of bytes.
287 * @throws IllegalArgumentException
288 * Thrown when the provided lineSeparator included some base64 characters.
289 * @since 1.4
290 */
291 public Base64(int lineLength, byte[] lineSeparator) {
292 this(lineLength, lineSeparator, false);
293 }
294
295 /**
296 * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
297 * <p>
298 * When encoding the line length and line separator are given in the constructor, and the encoding table is
299 * STANDARD_ENCODE_TABLE.
300 * </p>
301 * <p>
302 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
303 * </p>
304 * <p>
305 * When decoding all variants are supported.
306 * </p>
307 *
308 * @param lineLength
309 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 4).
310 * If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when decoding.
311 * @param lineSeparator
312 * Each line of encoded data will end with this sequence of bytes.
313 * @param urlSafe
314 * Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode
315 * operations. Decoding seamlessly handles both modes.
316 * @throws IllegalArgumentException
317 * The provided lineSeparator included some base64 characters. That's not going to work!
318 * @since 1.4
319 */
320 public Base64(int lineLength, byte[] lineSeparator, boolean urlSafe) {
321 if (lineSeparator == null) {
322 lineLength = 0; // disable chunk-separating
323 lineSeparator = CHUNK_SEPARATOR; // this just gets ignored
324 }
325 this.lineLength = lineLength > 0 ? (lineLength / 4) * 4 : 0;
326 this.lineSeparator = new byte[lineSeparator.length];
327 System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length);
328 if (lineLength > 0) {
329 this.encodeSize = 4 + lineSeparator.length;
330 } else {
331 this.encodeSize = 4;
332 }
333 this.decodeSize = this.encodeSize - 1;
334 if (containsBase64Byte(lineSeparator)) {
335 String sep = StringUtils.newStringUtf8(lineSeparator);
336 throw new IllegalArgumentException("lineSeperator must not contain base64 characters: [" + sep + "]");
337 }
338 this.encodeTable = urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE;
339 }
340
341 /**
342 * Returns our current encode mode. True if we're URL-SAFE, false otherwise.
343 *
344 * @return true if we're in URL-SAFE mode, false otherwise.
345 * @since 1.4
346 */
347 public boolean isUrlSafe() {
348 return this.encodeTable == URL_SAFE_ENCODE_TABLE;
349 }
350
351 /**
352 * Returns true if this Base64 object has buffered data for reading.
353 *
354 * @return true if there is Base64 object still available for reading.
355 */
356 boolean hasData() {
357 return this.buffer != null;
358 }
359
360 /**
361 * Returns the amount of buffered data available for reading.
362 *
363 * @return The amount of buffered data available for reading.
364 */
365 int avail() {
366 return buffer != null ? pos - readPos : 0;
367 }
368
369 /** Doubles our buffer. */
370 private void resizeBuffer() {
371 if (buffer == null) {
372 buffer = new byte[DEFAULT_BUFFER_SIZE];
373 pos = 0;
374 readPos = 0;
375 } else {
376 byte[] b = new byte[buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR];
377 System.arraycopy(buffer, 0, b, 0, buffer.length);
378 buffer = b;
379 }
380 }
381
382 /**
383 * Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail
384 * bytes. Returns how many bytes were actually extracted.
385 *
386 * @param b
387 * byte[] array to extract the buffered data into.
388 * @param bPos
389 * position in byte[] array to start extraction at.
390 * @param bAvail
391 * amount of bytes we're allowed to extract. We may extract fewer (if fewer are available).
392 * @return The number of bytes successfully extracted into the provided byte[] array.
393 */
394 int readResults(byte[] b, int bPos, int bAvail) {
395 if (buffer != null) {
396 int len = Math.min(avail(), bAvail);
397 if (buffer != b) {
398 System.arraycopy(buffer, readPos, b, bPos, len);
399 readPos += len;
400 if (readPos >= pos) {
401 buffer = null;
402 }
403 } else {
404 // Re-using the original consumer's output array is only
405 // allowed for one round.
406 buffer = null;
407 }
408 return len;
409 }
410 return eof ? -1 : 0;
411 }
412
413 /**
414 * Sets the streaming buffer. This is a small optimization where we try to buffer directly to the consumer's output
415 * array for one round (if the consumer calls this method first) instead of starting our own buffer.
416 *
417 * @param out
418 * byte[] array to buffer directly to.
419 * @param outPos
420 * Position to start buffering into.
421 * @param outAvail
422 * Amount of bytes available for direct buffering.
423 */
424 void setInitialBuffer(byte[] out, int outPos, int outAvail) {
425 // We can re-use consumer's original output array under
426 // special circumstances, saving on some System.arraycopy().
427 if (out != null && out.length == outAvail) {
428 buffer = out;
429 pos = outPos;
430 readPos = outPos;
431 }
432 }
433
434 /**
435 * <p>
436 * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with
437 * the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, so flush last
438 * remaining bytes (if not multiple of 3).
439 * </p>
440 * <p>
441 * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach.
442 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
443 * </p>
444 *
445 * @param in
446 * byte[] array of binary data to base64 encode.
447 * @param inPos
448 * Position to start reading data from.
449 * @param inAvail
450 * Amount of bytes available from input for encoding.
451 */
452 void encode(byte[] in, int inPos, int inAvail) {
453 if (eof) {
454 return;
455 }
456 // inAvail < 0 is how we're informed of EOF in the underlying data we're
457 // encoding.
458 if (inAvail < 0) {
459 eof = true;
460 if (buffer == null || buffer.length - pos < encodeSize) {
461 resizeBuffer();
462 }
463 switch (modulus) {
464 case 1 :
465 buffer[pos++] = encodeTable[(x >> 2) & MASK_6BITS];
466 buffer[pos++] = encodeTable[(x << 4) & MASK_6BITS];
467 // URL-SAFE skips the padding to further reduce size.
468 if (encodeTable == STANDARD_ENCODE_TABLE) {
469 buffer[pos++] = PAD;
470 buffer[pos++] = PAD;
471 }
472 break;
473
474 case 2 :
475 buffer[pos++] = encodeTable[(x >> 10) & MASK_6BITS];
476 buffer[pos++] = encodeTable[(x >> 4) & MASK_6BITS];
477 buffer[pos++] = encodeTable[(x << 2) & MASK_6BITS];
478 // URL-SAFE skips the padding to further reduce size.
479 if (encodeTable == STANDARD_ENCODE_TABLE) {
480 buffer[pos++] = PAD;
481 }
482 break;
483 }
484 if (lineLength > 0 && pos > 0) {
485 System.arraycopy(lineSeparator, 0, buffer, pos, lineSeparator.length);
486 pos += lineSeparator.length;
487 }
488 } else {
489 for (int i = 0; i < inAvail; i++) {
490 if (buffer == null || buffer.length - pos < encodeSize) {
491 resizeBuffer();
492 }
493 modulus = (++modulus) % 3;
494 int b = in[inPos++];
495 if (b < 0) {
496 b += 256;
497 }
498 x = (x << 8) + b;
499 if (0 == modulus) {
500 buffer[pos++] = encodeTable[(x >> 18) & MASK_6BITS];
501 buffer[pos++] = encodeTable[(x >> 12) & MASK_6BITS];
502 buffer[pos++] = encodeTable[(x >> 6) & MASK_6BITS];
503 buffer[pos++] = encodeTable[x & MASK_6BITS];
504 currentLinePos += 4;
505 if (lineLength > 0 && lineLength <= currentLinePos) {
506 System.arraycopy(lineSeparator, 0, buffer, pos, lineSeparator.length);
507 pos += lineSeparator.length;
508 currentLinePos = 0;
509 }
510 }
511 }
512 }
513 }
514
515 /**
516 * <p>
517 * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once
518 * with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1"
519 * call is not necessary when decoding, but it doesn't hurt, either.
520 * </p>
521 * <p>
522 * Ignores all non-base64 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are
523 * silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in,
524 * garbage-out philosophy: it will not check the provided data for validity.
525 * </p>
526 * <p>
527 * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach.
528 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
529 * </p>
530 *
531 * @param in
532 * byte[] array of ascii data to base64 decode.
533 * @param inPos
534 * Position to start reading data from.
535 * @param inAvail
536 * Amount of bytes available from input for encoding.
537 */
538 void decode(byte[] in, int inPos, int inAvail) {
539 if (eof) {
540 return;
541 }
542 if (inAvail < 0) {
543 eof = true;
544 }
545 for (int i = 0; i < inAvail; i++) {
546 if (buffer == null || buffer.length - pos < decodeSize) {
547 resizeBuffer();
548 }
549 byte b = in[inPos++];
550 if (b == PAD) {
551 // We're done.
552 eof = true;
553 break;
554 } else {
555 if (b >= 0 && b < DECODE_TABLE.length) {
556 int result = DECODE_TABLE[b];
557 if (result >= 0) {
558 modulus = (++modulus) % 4;
559 x = (x << 6) + result;
560 if (modulus == 0) {
561 buffer[pos++] = (byte) ((x >> 16) & MASK_8BITS);
562 buffer[pos++] = (byte) ((x >> 8) & MASK_8BITS);
563 buffer[pos++] = (byte) (x & MASK_8BITS);
564 }
565 }
566 }
567 }
568 }
569
570 // Two forms of EOF as far as base64 decoder is concerned: actual
571 // EOF (-1) and first time '=' character is encountered in stream.
572 // This approach makes the '=' padding characters completely optional.
573 if (eof && modulus != 0) {
574 x = x << 6;
575 switch (modulus) {
576 case 2 :
577 x = x << 6;
578 buffer[pos++] = (byte) ((x >> 16) & MASK_8BITS);
579 break;
580 case 3 :
581 buffer[pos++] = (byte) ((x >> 16) & MASK_8BITS);
582 buffer[pos++] = (byte) ((x >> 8) & MASK_8BITS);
583 break;
584 }
585 }
586 }
587
588 /**
589 * Returns whether or not the <code>octet</code> is in the base 64 alphabet.
590 *
591 * @param octet
592 * The value to test
593 * @return <code>true</code> if the value is defined in the the base 64 alphabet, <code>false</code> otherwise.
594 * @since 1.4
595 */
596 public static boolean isBase64(byte octet) {
597 return octet == PAD || (octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1);
598 }
599
600 /**
601 * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the
602 * method treats whitespace as valid.
603 *
604 * @param arrayOctet
605 * byte array to test
606 * @return <code>true</code> if all bytes are valid characters in the Base64 alphabet or if the byte array is empty;
607 * false, otherwise
608 */
609 public static boolean isArrayByteBase64(byte[] arrayOctet) {
610 for (int i = 0; i < arrayOctet.length; i++) {
611 if (!isBase64(arrayOctet[i]) && !isWhiteSpace(arrayOctet[i])) {
612 return false;
613 }
614 }
615 return true;
616 }
617
618 /**
619 * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet.
620 *
621 * @param arrayOctet
622 * byte array to test
623 * @return <code>true</code> if any byte is a valid character in the Base64 alphabet; false herwise
624 */
625 private static boolean containsBase64Byte(byte[] arrayOctet) {
626 for (int i = 0; i < arrayOctet.length; i++) {
627 if (isBase64(arrayOctet[i])) {
628 return true;
629 }
630 }
631 return false;
632 }
633
634 /**
635 * Encodes binary data using the base64 algorithm but does not chunk the output.
636 *
637 * @param binaryData
638 * binary data to encode
639 * @return byte[] containing Base64 characters in their UTF-8 representation.
640 */
641 public static byte[] encodeBase64(byte[] binaryData) {
642 return encodeBase64(binaryData, false);
643 }
644
645 /**
646 * Encodes binary data using the base64 algorithm into 76 character blocks separated by CRLF.
647 *
648 * @param binaryData
649 * binary data to encode
650 * @return String containing Base64 characters.
651 * @since 1.4
652 */
653 public static String encodeBase64String(byte[] binaryData) {
654 return StringUtils.newStringUtf8(encodeBase64(binaryData, true));
655 }
656
657 /**
658 * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The
659 * url-safe variation emits - and _ instead of + and / characters.
660 *
661 * @param binaryData
662 * binary data to encode
663 * @return byte[] containing Base64 characters in their UTF-8 representation.
664 * @since 1.4
665 */
666 public static byte[] encodeBase64URLSafe(byte[] binaryData) {
667 return encodeBase64(binaryData, false, true);
668 }
669
670 /**
671 * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The
672 * url-safe variation emits - and _ instead of + and / characters.
673 *
674 * @param binaryData
675 * binary data to encode
676 * @return String containing Base64 characters
677 * @since 1.4
678 */
679 public static String encodeBase64URLSafeString(byte[] binaryData) {
680 return StringUtils.newStringUtf8(encodeBase64(binaryData, false, true));
681 }
682
683 /**
684 * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks
685 *
686 * @param binaryData
687 * binary data to encode
688 * @return Base64 characters chunked in 76 character blocks
689 */
690 public static byte[] encodeBase64Chunked(byte[] binaryData) {
691 return encodeBase64(binaryData, true);
692 }
693
694 /**
695 * Decodes an Object using the base64 algorithm. This method is provided in order to satisfy the requirements of the
696 * Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String.
697 *
698 * @param pObject
699 * Object to decode
700 * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String supplied.
701 * @throws DecoderException
702 * if the parameter supplied is not of type byte[]
703 */
704 public Object decode(Object pObject) throws DecoderException {
705 if (pObject instanceof byte[]) {
706 return decode((byte[]) pObject);
707 } else if (pObject instanceof String) {
708 return decode((String) pObject);
709 } else {
710 throw new DecoderException("Parameter supplied to Base64 decode is not a byte[] or a String");
711 }
712 }
713
714 /**
715 * Decodes a String containing containing characters in the Base64 alphabet.
716 *
717 * @param pArray
718 * A String containing Base64 character data
719 * @return a byte array containing binary data
720 * @since 1.4
721 */
722 public byte[] decode(String pArray) {
723 return decode(StringUtils.getBytesUtf8(pArray));
724 }
725
726 /**
727 * Decodes a byte[] containing containing characters in the Base64 alphabet.
728 *
729 * @param pArray
730 * A byte array containing Base64 character data
731 * @return a byte array containing binary data
732 */
733 public byte[] decode(byte[] pArray) {
734 reset();
735 if (pArray == null || pArray.length == 0) {
736 return pArray;
737 }
738 long len = (pArray.length * 3) / 4;
739 byte[] buf = new byte[(int) len];
740 setInitialBuffer(buf, 0, buf.length);
741 decode(pArray, 0, pArray.length);
742 decode(pArray, 0, -1); // Notify decoder of EOF.
743
744 // Would be nice to just return buf (like we sometimes do in the encode
745 // logic), but we have no idea what the line-length was (could even be
746 // variable). So we cannot determine ahead of time exactly how big an
747 // array is necessary. Hence the need to construct a 2nd byte array to
748 // hold the final result:
749
750 byte[] result = new byte[pos];
751 readResults(result, 0, result.length);
752 return result;
753 }
754
755 /**
756 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
757 *
758 * @param binaryData
759 * Array containing binary data to encode.
760 * @param isChunked
761 * if <code>true</code> this encoder will chunk the base64 output into 76 character blocks
762 * @return Base64-encoded data.
763 * @throws IllegalArgumentException
764 * Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
765 */
766 public static byte[] encodeBase64(byte[] binaryData, boolean isChunked) {
767 return encodeBase64(binaryData, isChunked, false);
768 }
769
770 /**
771 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
772 *
773 * @param binaryData
774 * Array containing binary data to encode.
775 * @param isChunked
776 * if <code>true</code> this encoder will chunk the base64 output into 76 character blocks
777 * @param urlSafe
778 * if <code>true</code> this encoder will emit - and _ instead of the usual + and / characters.
779 * @return Base64-encoded data.
780 * @throws IllegalArgumentException
781 * Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
782 * @since 1.4
783 */
784 public static byte[] encodeBase64(byte[] binaryData, boolean isChunked, boolean urlSafe) {
785 return encodeBase64(binaryData, isChunked, urlSafe, Integer.MAX_VALUE);
786 }
787
788 /**
789 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
790 *
791 * @param binaryData
792 * Array containing binary data to encode.
793 * @param isChunked
794 * if <code>true</code> this encoder will chunk the base64 output into 76 character blocks
795 * @param urlSafe
796 * if <code>true</code> this encoder will emit - and _ instead of the usual + and / characters.
797 * @param maxResultSize
798 * The maximum result size to accept.
799 * @return Base64-encoded data.
800 * @throws IllegalArgumentException
801 * Thrown when the input array needs an output array bigger than maxResultSize
802 * @since 1.4
803 */
804 public static byte[] encodeBase64(byte[] binaryData, boolean isChunked, boolean urlSafe, int maxResultSize) {
805 if (binaryData == null || binaryData.length == 0) {
806 return binaryData;
807 }
808
809 long len = getEncodeLength(binaryData, CHUNK_SIZE, CHUNK_SEPARATOR);
810 if (len > maxResultSize) {
811 throw new IllegalArgumentException("Input array too big, the output array would be bigger (" +
812 len +
813 ") than the specified maxium size of " +
814 maxResultSize);
815 }
816
817 Base64 b64 = isChunked ? new Base64(urlSafe) : new Base64(0, CHUNK_SEPARATOR, urlSafe);
818 return b64.encode(binaryData);
819 }
820
821 /**
822 * Decodes a Base64 String into octets
823 *
824 * @param base64String
825 * String containing Base64 data
826 * @return Array containing decoded data.
827 * @since 1.4
828 */
829 public static byte[] decodeBase64(String base64String) {
830 return new Base64().decode(base64String);
831 }
832
833 /**
834 * Decodes Base64 data into octets
835 *
836 * @param base64Data
837 * Byte array containing Base64 data
838 * @return Array containing decoded data.
839 */
840 public static byte[] decodeBase64(byte[] base64Data) {
841 return new Base64().decode(base64Data);
842 }
843
844 /**
845 * Discards any whitespace from a base-64 encoded block.
846 *
847 * @param data
848 * The base-64 encoded data to discard the whitespace from.
849 * @return The data, less whitespace (see RFC 2045).
850 * @deprecated This method is no longer needed
851 */
852 static byte[] discardWhitespace(byte[] data) {
853 byte groomedData[] = new byte[data.length];
854 int bytesCopied = 0;
855 for (int i = 0; i < data.length; i++) {
856 switch (data[i]) {
857 case ' ' :
858 case '\n' :
859 case '\r' :
860 case '\t' :
861 break;
862 default :
863 groomedData[bytesCopied++] = data[i];
864 }
865 }
866 byte packedData[] = new byte[bytesCopied];
867 System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
868 return packedData;
869 }
870
871 /**
872 * Checks if a byte value is whitespace or not.
873 *
874 * @param byteToCheck
875 * the byte to check
876 * @return true if byte is whitespace, false otherwise
877 */
878 private static boolean isWhiteSpace(byte byteToCheck) {
879 switch (byteToCheck) {
880 case ' ' :
881 case '\n' :
882 case '\r' :
883 case '\t' :
884 return true;
885 default :
886 return false;
887 }
888 }
889
890 // Implementation of the Encoder Interface
891
892 /**
893 * Encodes an Object using the base64 algorithm. This method is provided in order to satisfy the requirements of the
894 * Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[].
895 *
896 * @param pObject
897 * Object to encode
898 * @return An object (of type byte[]) containing the base64 encoded data which corresponds to the byte[] supplied.
899 * @throws EncoderException
900 * if the parameter supplied is not of type byte[]
901 */
902 public Object encode(Object pObject) throws EncoderException {
903 if (!(pObject instanceof byte[])) {
904 throw new EncoderException("Parameter supplied to Base64 encode is not a byte[]");
905 }
906 return encode((byte[]) pObject);
907 }
908
909 /**
910 * Encodes a byte[] containing binary data, into a String containing characters in the Base64 alphabet.
911 *
912 * @param pArray
913 * a byte array containing binary data
914 * @return A String containing only Base64 character data
915 * @since 1.4
916 */
917 public String encodeToString(byte[] pArray) {
918 return StringUtils.newStringUtf8(encode(pArray));
919 }
920
921 /**
922 * Encodes a byte[] containing binary data, into a byte[] containing characters in the Base64 alphabet.
923 *
924 * @param pArray
925 * a byte array containing binary data
926 * @return A byte array containing only Base64 character data
927 */
928 public byte[] encode(byte[] pArray) {
929 reset();
930 if (pArray == null || pArray.length == 0) {
931 return pArray;
932 }
933 long len = getEncodeLength(pArray, lineLength, lineSeparator);
934 byte[] buf = new byte[(int) len];
935 setInitialBuffer(buf, 0, buf.length);
936 encode(pArray, 0, pArray.length);
937 encode(pArray, 0, -1); // Notify encoder of EOF.
938 // Encoder might have resized, even though it was unnecessary.
939 if (buffer != buf) {
940 readResults(buf, 0, buf.length);
941 }
942 // In URL-SAFE mode we skip the padding characters, so sometimes our
943 // final length is a bit smaller.
944 if (isUrlSafe() && pos < buf.length) {
945 byte[] smallerBuf = new byte[pos];
946 System.arraycopy(buf, 0, smallerBuf, 0, pos);
947 buf = smallerBuf;
948 }
949 return buf;
950 }
951
952 /**
953 * Pre-calculates the amount of space needed to base64-encode the supplied array.
954 *
955 * @param pArray byte[] array which will later be encoded
956 * @param chunkSize line-length of the output (<= 0 means no chunking) between each
957 * chunkSeparator (e.g. CRLF).
958 * @param chunkSeparator the sequence of bytes used to separate chunks of output (e.g. CRLF).
959 *
960 * @return amount of space needed to encoded the supplied array. Returns
961 * a long since a max-len array will require Integer.MAX_VALUE + 33%.
962 */
963 private static long getEncodeLength(byte[] pArray, int chunkSize, byte[] chunkSeparator) {
964 // base64 always encodes to multiples of 4.
965 chunkSize = (chunkSize / 4) * 4;
966
967 long len = (pArray.length * 4) / 3;
968 long mod = len % 4;
969 if (mod != 0) {
970 len += 4 - mod;
971 }
972 if (chunkSize > 0) {
973 boolean lenChunksPerfectly = len % chunkSize == 0;
974 len += (len / chunkSize) * chunkSeparator.length;
975 if (!lenChunksPerfectly) {
976 len += chunkSeparator.length;
977 }
978 }
979 return len;
980 }
981
982 // Implementation of integer encoding used for crypto
983 /**
984 * Decodes a byte64-encoded integer according to crypto standards such as W3C's XML-Signature
985 *
986 * @param pArray
987 * a byte array containing base64 character data
988 * @return A BigInteger
989 * @since 1.4
990 */
991 public static BigInteger decodeInteger(byte[] pArray) {
992 return new BigInteger(1, decodeBase64(pArray));
993 }
994
995 /**
996 * Encodes to a byte64-encoded integer according to crypto standards such as W3C's XML-Signature
997 *
998 * @param bigInt
999 * a BigInteger
1000 * @return A byte array containing base64 character data
1001 * @throws NullPointerException
1002 * if null is passed in
1003 * @since 1.4
1004 */
1005 public static byte[] encodeInteger(BigInteger bigInt) {
1006 if (bigInt == null) {
1007 throw new NullPointerException("encodeInteger called with null parameter");
1008 }
1009 return encodeBase64(toIntegerBytes(bigInt), false);
1010 }
1011
1012 /**
1013 * Returns a byte-array representation of a <code>BigInteger</code> without sign bit.
1014 *
1015 * @param bigInt
1016 * <code>BigInteger</code> to be converted
1017 * @return a byte array representation of the BigInteger parameter
1018 */
1019 static byte[] toIntegerBytes(BigInteger bigInt) {
1020 int bitlen = bigInt.bitLength();
1021 // round bitlen
1022 bitlen = ((bitlen + 7) >> 3) << 3;
1023 byte[] bigBytes = bigInt.toByteArray();
1024
1025 if (((bigInt.bitLength() % 8) != 0) && (((bigInt.bitLength() / 8) + 1) == (bitlen / 8))) {
1026 return bigBytes;
1027 }
1028 // set up params for copying everything but sign bit
1029 int startSrc = 0;
1030 int len = bigBytes.length;
1031
1032 // if bigInt is exactly byte-aligned, just skip signbit in copy
1033 if ((bigInt.bitLength() % 8) == 0) {
1034 startSrc = 1;
1035 len--;
1036 }
1037 int startDst = bitlen / 8 - len; // to pad w/ nulls as per spec
1038 byte[] resizedBytes = new byte[bitlen / 8];
1039 System.arraycopy(bigBytes, startSrc, resizedBytes, startDst, len);
1040 return resizedBytes;
1041 }
1042
1043 /**
1044 * Resets this Base64 object to its initial newly constructed state.
1045 */
1046 private void reset() {
1047 buffer = null;
1048 pos = 0;
1049 readPos = 0;
1050 currentLinePos = 0;
1051 modulus = 0;
1052 eof = false;
1053 }
1054
1055 }