001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.commons.codec.binary; 019 020 import java.math.BigInteger; 021 022 import org.apache.commons.codec.BinaryDecoder; 023 import org.apache.commons.codec.BinaryEncoder; 024 import org.apache.commons.codec.DecoderException; 025 import org.apache.commons.codec.EncoderException; 026 027 /** 028 * Provides Base64 encoding and decoding as defined by RFC 2045. 029 * 030 * <p> 031 * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose 032 * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein. 033 * </p> 034 * <p> 035 * The class can be parameterized in the following manner with various constructors: 036 * <ul> 037 * <li>URL-safe mode: Default off.</li> 038 * <li>Line length: Default 76. Line length that aren't multiples of 4 will still essentially end up being multiples of 039 * 4 in the encoded data. 040 * <li>Line separator: Default is CRLF ("\r\n")</li> 041 * </ul> 042 * </p> 043 * <p> 044 * Since this class operates directly on byte streams, and not character streams, it is hard-coded to only encode/decode 045 * character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, etc). 046 * </p> 047 * 048 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a> 049 * @author Apache Software Foundation 050 * @since 1.0 051 * @version $Id: Base64.java 801706 2009-08-06 16:27:06Z niallp $ 052 */ 053 public class Base64 implements BinaryEncoder, BinaryDecoder { 054 private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2; 055 056 private static final int DEFAULT_BUFFER_SIZE = 8192; 057 058 /** 059 * Chunk size per RFC 2045 section 6.8. 060 * 061 * <p> 062 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any 063 * equal signs. 064 * </p> 065 * 066 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a> 067 */ 068 static final int CHUNK_SIZE = 76; 069 070 /** 071 * Chunk separator per RFC 2045 section 2.1. 072 * 073 * <p> 074 * N.B. The next major release may break compatibility and make this field private. 075 * </p> 076 * 077 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a> 078 */ 079 static final byte[] CHUNK_SEPARATOR = {'\r', '\n'}; 080 081 /** 082 * This array is a lookup table that translates 6-bit positive integer index values into their "Base64 Alphabet" 083 * equivalents as specified in Table 1 of RFC 2045. 084 * 085 * Thanks to "commons" project in ws.apache.org for this code. 086 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ 087 */ 088 private static final byte[] STANDARD_ENCODE_TABLE = { 089 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 090 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 091 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 092 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 093 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' 094 }; 095 096 /** 097 * This is a copy of the STANDARD_ENCODE_TABLE above, but with + and / 098 * changed to - and _ to make the encoded Base64 results more URL-SAFE. 099 * This table is only used when the Base64's mode is set to URL-SAFE. 100 */ 101 private static final byte[] URL_SAFE_ENCODE_TABLE = { 102 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 103 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 104 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 105 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 106 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_' 107 }; 108 109 /** 110 * Byte used to pad output. 111 */ 112 private static final byte PAD = '='; 113 114 /** 115 * This array is a lookup table that translates Unicode characters drawn from the "Base64 Alphabet" (as specified in 116 * Table 1 of RFC 2045) into their 6-bit positive integer equivalents. Characters that are not in the Base64 117 * alphabet but fall within the bounds of the array are translated to -1. 118 * 119 * Note: '+' and '-' both decode to 62. '/' and '_' both decode to 63. This means decoder seamlessly handles both 120 * URL_SAFE and STANDARD base64. (The encoder, on the other hand, needs to know ahead of time what to emit). 121 * 122 * Thanks to "commons" project in ws.apache.org for this code. 123 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ 124 */ 125 private static final byte[] DECODE_TABLE = { 126 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 127 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 128 -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, 52, 53, 54, 129 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 130 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 131 24, 25, -1, -1, -1, -1, 63, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 132 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 133 }; 134 135 /** Mask used to extract 6 bits, used when encoding */ 136 private static final int MASK_6BITS = 0x3f; 137 138 /** Mask used to extract 8 bits, used in decoding base64 bytes */ 139 private static final int MASK_8BITS = 0xff; 140 141 // The static final fields above are used for the original static byte[] methods on Base64. 142 // The private member fields below are used with the new streaming approach, which requires 143 // some state be preserved between calls of encode() and decode(). 144 145 /** 146 * Encode table to use: either STANDARD or URL_SAFE. Note: the DECODE_TABLE above remains static because it is able 147 * to decode both STANDARD and URL_SAFE streams, but the encodeTable must be a member variable so we can switch 148 * between the two modes. 149 */ 150 private final byte[] encodeTable; 151 152 /** 153 * Line length for encoding. Not used when decoding. A value of zero or less implies no chunking of the base64 154 * encoded data. 155 */ 156 private final int lineLength; 157 158 /** 159 * Line separator for encoding. Not used when decoding. Only used if lineLength > 0. 160 */ 161 private final byte[] lineSeparator; 162 163 /** 164 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. 165 * <code>decodeSize = 3 + lineSeparator.length;</code> 166 */ 167 private final int decodeSize; 168 169 /** 170 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. 171 * <code>encodeSize = 4 + lineSeparator.length;</code> 172 */ 173 private final int encodeSize; 174 175 /** 176 * Buffer for streaming. 177 */ 178 private byte[] buffer; 179 180 /** 181 * Position where next character should be written in the buffer. 182 */ 183 private int pos; 184 185 /** 186 * Position where next character should be read from the buffer. 187 */ 188 private int readPos; 189 190 /** 191 * Variable tracks how many characters have been written to the current line. Only used when encoding. We use it to 192 * make sure each encoded line never goes beyond lineLength (if lineLength > 0). 193 */ 194 private int currentLinePos; 195 196 /** 197 * Writes to the buffer only occur after every 3 reads when encoding, an every 4 reads when decoding. This variable 198 * helps track that. 199 */ 200 private int modulus; 201 202 /** 203 * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this Base64 object becomes useless, 204 * and must be thrown away. 205 */ 206 private boolean eof; 207 208 /** 209 * Place holder for the 3 bytes we're dealing with for our base64 logic. Bitwise operations store and extract the 210 * base64 encoding or decoding from this variable. 211 */ 212 private int x; 213 214 /** 215 * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. 216 * <p> 217 * When encoding the line length is 76, the line separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE. 218 * </p> 219 * 220 * <p> 221 * When decoding all variants are supported. 222 * </p> 223 */ 224 public Base64() { 225 this(false); 226 } 227 228 /** 229 * Creates a Base64 codec used for decoding (all modes) and encoding in the given URL-safe mode. 230 * <p> 231 * When encoding the line length is 76, the line separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE. 232 * </p> 233 * 234 * <p> 235 * When decoding all variants are supported. 236 * </p> 237 * 238 * @param urlSafe 239 * if <code>true</code>, URL-safe encoding is used. In most cases this should be set to 240 * <code>false</code>. 241 * @since 1.4 242 */ 243 public Base64(boolean urlSafe) { 244 this(CHUNK_SIZE, CHUNK_SEPARATOR, urlSafe); 245 } 246 247 /** 248 * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. 249 * <p> 250 * When encoding the line length is given in the constructor, the line separator is CRLF, and the encoding table is 251 * STANDARD_ENCODE_TABLE. 252 * </p> 253 * <p> 254 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. 255 * </p> 256 * <p> 257 * When decoding all variants are supported. 258 * </p> 259 * 260 * @param lineLength 261 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 4). 262 * If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when decoding. 263 * @since 1.4 264 */ 265 public Base64(int lineLength) { 266 this(lineLength, CHUNK_SEPARATOR); 267 } 268 269 /** 270 * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. 271 * <p> 272 * When encoding the line length and line separator are given in the constructor, and the encoding table is 273 * STANDARD_ENCODE_TABLE. 274 * </p> 275 * <p> 276 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. 277 * </p> 278 * <p> 279 * When decoding all variants are supported. 280 * </p> 281 * 282 * @param lineLength 283 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 4). 284 * If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when decoding. 285 * @param lineSeparator 286 * Each line of encoded data will end with this sequence of bytes. 287 * @throws IllegalArgumentException 288 * Thrown when the provided lineSeparator included some base64 characters. 289 * @since 1.4 290 */ 291 public Base64(int lineLength, byte[] lineSeparator) { 292 this(lineLength, lineSeparator, false); 293 } 294 295 /** 296 * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. 297 * <p> 298 * When encoding the line length and line separator are given in the constructor, and the encoding table is 299 * STANDARD_ENCODE_TABLE. 300 * </p> 301 * <p> 302 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. 303 * </p> 304 * <p> 305 * When decoding all variants are supported. 306 * </p> 307 * 308 * @param lineLength 309 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of 4). 310 * If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when decoding. 311 * @param lineSeparator 312 * Each line of encoded data will end with this sequence of bytes. 313 * @param urlSafe 314 * Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode 315 * operations. Decoding seamlessly handles both modes. 316 * @throws IllegalArgumentException 317 * The provided lineSeparator included some base64 characters. That's not going to work! 318 * @since 1.4 319 */ 320 public Base64(int lineLength, byte[] lineSeparator, boolean urlSafe) { 321 if (lineSeparator == null) { 322 lineLength = 0; // disable chunk-separating 323 lineSeparator = CHUNK_SEPARATOR; // this just gets ignored 324 } 325 this.lineLength = lineLength > 0 ? (lineLength / 4) * 4 : 0; 326 this.lineSeparator = new byte[lineSeparator.length]; 327 System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length); 328 if (lineLength > 0) { 329 this.encodeSize = 4 + lineSeparator.length; 330 } else { 331 this.encodeSize = 4; 332 } 333 this.decodeSize = this.encodeSize - 1; 334 if (containsBase64Byte(lineSeparator)) { 335 String sep = StringUtils.newStringUtf8(lineSeparator); 336 throw new IllegalArgumentException("lineSeperator must not contain base64 characters: [" + sep + "]"); 337 } 338 this.encodeTable = urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE; 339 } 340 341 /** 342 * Returns our current encode mode. True if we're URL-SAFE, false otherwise. 343 * 344 * @return true if we're in URL-SAFE mode, false otherwise. 345 * @since 1.4 346 */ 347 public boolean isUrlSafe() { 348 return this.encodeTable == URL_SAFE_ENCODE_TABLE; 349 } 350 351 /** 352 * Returns true if this Base64 object has buffered data for reading. 353 * 354 * @return true if there is Base64 object still available for reading. 355 */ 356 boolean hasData() { 357 return this.buffer != null; 358 } 359 360 /** 361 * Returns the amount of buffered data available for reading. 362 * 363 * @return The amount of buffered data available for reading. 364 */ 365 int avail() { 366 return buffer != null ? pos - readPos : 0; 367 } 368 369 /** Doubles our buffer. */ 370 private void resizeBuffer() { 371 if (buffer == null) { 372 buffer = new byte[DEFAULT_BUFFER_SIZE]; 373 pos = 0; 374 readPos = 0; 375 } else { 376 byte[] b = new byte[buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR]; 377 System.arraycopy(buffer, 0, b, 0, buffer.length); 378 buffer = b; 379 } 380 } 381 382 /** 383 * Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail 384 * bytes. Returns how many bytes were actually extracted. 385 * 386 * @param b 387 * byte[] array to extract the buffered data into. 388 * @param bPos 389 * position in byte[] array to start extraction at. 390 * @param bAvail 391 * amount of bytes we're allowed to extract. We may extract fewer (if fewer are available). 392 * @return The number of bytes successfully extracted into the provided byte[] array. 393 */ 394 int readResults(byte[] b, int bPos, int bAvail) { 395 if (buffer != null) { 396 int len = Math.min(avail(), bAvail); 397 if (buffer != b) { 398 System.arraycopy(buffer, readPos, b, bPos, len); 399 readPos += len; 400 if (readPos >= pos) { 401 buffer = null; 402 } 403 } else { 404 // Re-using the original consumer's output array is only 405 // allowed for one round. 406 buffer = null; 407 } 408 return len; 409 } 410 return eof ? -1 : 0; 411 } 412 413 /** 414 * Sets the streaming buffer. This is a small optimization where we try to buffer directly to the consumer's output 415 * array for one round (if the consumer calls this method first) instead of starting our own buffer. 416 * 417 * @param out 418 * byte[] array to buffer directly to. 419 * @param outPos 420 * Position to start buffering into. 421 * @param outAvail 422 * Amount of bytes available for direct buffering. 423 */ 424 void setInitialBuffer(byte[] out, int outPos, int outAvail) { 425 // We can re-use consumer's original output array under 426 // special circumstances, saving on some System.arraycopy(). 427 if (out != null && out.length == outAvail) { 428 buffer = out; 429 pos = outPos; 430 readPos = outPos; 431 } 432 } 433 434 /** 435 * <p> 436 * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with 437 * the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, so flush last 438 * remaining bytes (if not multiple of 3). 439 * </p> 440 * <p> 441 * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach. 442 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ 443 * </p> 444 * 445 * @param in 446 * byte[] array of binary data to base64 encode. 447 * @param inPos 448 * Position to start reading data from. 449 * @param inAvail 450 * Amount of bytes available from input for encoding. 451 */ 452 void encode(byte[] in, int inPos, int inAvail) { 453 if (eof) { 454 return; 455 } 456 // inAvail < 0 is how we're informed of EOF in the underlying data we're 457 // encoding. 458 if (inAvail < 0) { 459 eof = true; 460 if (buffer == null || buffer.length - pos < encodeSize) { 461 resizeBuffer(); 462 } 463 switch (modulus) { 464 case 1 : 465 buffer[pos++] = encodeTable[(x >> 2) & MASK_6BITS]; 466 buffer[pos++] = encodeTable[(x << 4) & MASK_6BITS]; 467 // URL-SAFE skips the padding to further reduce size. 468 if (encodeTable == STANDARD_ENCODE_TABLE) { 469 buffer[pos++] = PAD; 470 buffer[pos++] = PAD; 471 } 472 break; 473 474 case 2 : 475 buffer[pos++] = encodeTable[(x >> 10) & MASK_6BITS]; 476 buffer[pos++] = encodeTable[(x >> 4) & MASK_6BITS]; 477 buffer[pos++] = encodeTable[(x << 2) & MASK_6BITS]; 478 // URL-SAFE skips the padding to further reduce size. 479 if (encodeTable == STANDARD_ENCODE_TABLE) { 480 buffer[pos++] = PAD; 481 } 482 break; 483 } 484 if (lineLength > 0 && pos > 0) { 485 System.arraycopy(lineSeparator, 0, buffer, pos, lineSeparator.length); 486 pos += lineSeparator.length; 487 } 488 } else { 489 for (int i = 0; i < inAvail; i++) { 490 if (buffer == null || buffer.length - pos < encodeSize) { 491 resizeBuffer(); 492 } 493 modulus = (++modulus) % 3; 494 int b = in[inPos++]; 495 if (b < 0) { 496 b += 256; 497 } 498 x = (x << 8) + b; 499 if (0 == modulus) { 500 buffer[pos++] = encodeTable[(x >> 18) & MASK_6BITS]; 501 buffer[pos++] = encodeTable[(x >> 12) & MASK_6BITS]; 502 buffer[pos++] = encodeTable[(x >> 6) & MASK_6BITS]; 503 buffer[pos++] = encodeTable[x & MASK_6BITS]; 504 currentLinePos += 4; 505 if (lineLength > 0 && lineLength <= currentLinePos) { 506 System.arraycopy(lineSeparator, 0, buffer, pos, lineSeparator.length); 507 pos += lineSeparator.length; 508 currentLinePos = 0; 509 } 510 } 511 } 512 } 513 } 514 515 /** 516 * <p> 517 * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once 518 * with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1" 519 * call is not necessary when decoding, but it doesn't hurt, either. 520 * </p> 521 * <p> 522 * Ignores all non-base64 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are 523 * silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in, 524 * garbage-out philosophy: it will not check the provided data for validity. 525 * </p> 526 * <p> 527 * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach. 528 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ 529 * </p> 530 * 531 * @param in 532 * byte[] array of ascii data to base64 decode. 533 * @param inPos 534 * Position to start reading data from. 535 * @param inAvail 536 * Amount of bytes available from input for encoding. 537 */ 538 void decode(byte[] in, int inPos, int inAvail) { 539 if (eof) { 540 return; 541 } 542 if (inAvail < 0) { 543 eof = true; 544 } 545 for (int i = 0; i < inAvail; i++) { 546 if (buffer == null || buffer.length - pos < decodeSize) { 547 resizeBuffer(); 548 } 549 byte b = in[inPos++]; 550 if (b == PAD) { 551 // We're done. 552 eof = true; 553 break; 554 } else { 555 if (b >= 0 && b < DECODE_TABLE.length) { 556 int result = DECODE_TABLE[b]; 557 if (result >= 0) { 558 modulus = (++modulus) % 4; 559 x = (x << 6) + result; 560 if (modulus == 0) { 561 buffer[pos++] = (byte) ((x >> 16) & MASK_8BITS); 562 buffer[pos++] = (byte) ((x >> 8) & MASK_8BITS); 563 buffer[pos++] = (byte) (x & MASK_8BITS); 564 } 565 } 566 } 567 } 568 } 569 570 // Two forms of EOF as far as base64 decoder is concerned: actual 571 // EOF (-1) and first time '=' character is encountered in stream. 572 // This approach makes the '=' padding characters completely optional. 573 if (eof && modulus != 0) { 574 x = x << 6; 575 switch (modulus) { 576 case 2 : 577 x = x << 6; 578 buffer[pos++] = (byte) ((x >> 16) & MASK_8BITS); 579 break; 580 case 3 : 581 buffer[pos++] = (byte) ((x >> 16) & MASK_8BITS); 582 buffer[pos++] = (byte) ((x >> 8) & MASK_8BITS); 583 break; 584 } 585 } 586 } 587 588 /** 589 * Returns whether or not the <code>octet</code> is in the base 64 alphabet. 590 * 591 * @param octet 592 * The value to test 593 * @return <code>true</code> if the value is defined in the the base 64 alphabet, <code>false</code> otherwise. 594 * @since 1.4 595 */ 596 public static boolean isBase64(byte octet) { 597 return octet == PAD || (octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1); 598 } 599 600 /** 601 * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the 602 * method treats whitespace as valid. 603 * 604 * @param arrayOctet 605 * byte array to test 606 * @return <code>true</code> if all bytes are valid characters in the Base64 alphabet or if the byte array is empty; 607 * false, otherwise 608 */ 609 public static boolean isArrayByteBase64(byte[] arrayOctet) { 610 for (int i = 0; i < arrayOctet.length; i++) { 611 if (!isBase64(arrayOctet[i]) && !isWhiteSpace(arrayOctet[i])) { 612 return false; 613 } 614 } 615 return true; 616 } 617 618 /** 619 * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. 620 * 621 * @param arrayOctet 622 * byte array to test 623 * @return <code>true</code> if any byte is a valid character in the Base64 alphabet; false herwise 624 */ 625 private static boolean containsBase64Byte(byte[] arrayOctet) { 626 for (int i = 0; i < arrayOctet.length; i++) { 627 if (isBase64(arrayOctet[i])) { 628 return true; 629 } 630 } 631 return false; 632 } 633 634 /** 635 * Encodes binary data using the base64 algorithm but does not chunk the output. 636 * 637 * @param binaryData 638 * binary data to encode 639 * @return byte[] containing Base64 characters in their UTF-8 representation. 640 */ 641 public static byte[] encodeBase64(byte[] binaryData) { 642 return encodeBase64(binaryData, false); 643 } 644 645 /** 646 * Encodes binary data using the base64 algorithm into 76 character blocks separated by CRLF. 647 * 648 * @param binaryData 649 * binary data to encode 650 * @return String containing Base64 characters. 651 * @since 1.4 652 */ 653 public static String encodeBase64String(byte[] binaryData) { 654 return StringUtils.newStringUtf8(encodeBase64(binaryData, true)); 655 } 656 657 /** 658 * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The 659 * url-safe variation emits - and _ instead of + and / characters. 660 * 661 * @param binaryData 662 * binary data to encode 663 * @return byte[] containing Base64 characters in their UTF-8 representation. 664 * @since 1.4 665 */ 666 public static byte[] encodeBase64URLSafe(byte[] binaryData) { 667 return encodeBase64(binaryData, false, true); 668 } 669 670 /** 671 * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The 672 * url-safe variation emits - and _ instead of + and / characters. 673 * 674 * @param binaryData 675 * binary data to encode 676 * @return String containing Base64 characters 677 * @since 1.4 678 */ 679 public static String encodeBase64URLSafeString(byte[] binaryData) { 680 return StringUtils.newStringUtf8(encodeBase64(binaryData, false, true)); 681 } 682 683 /** 684 * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks 685 * 686 * @param binaryData 687 * binary data to encode 688 * @return Base64 characters chunked in 76 character blocks 689 */ 690 public static byte[] encodeBase64Chunked(byte[] binaryData) { 691 return encodeBase64(binaryData, true); 692 } 693 694 /** 695 * Decodes an Object using the base64 algorithm. This method is provided in order to satisfy the requirements of the 696 * Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String. 697 * 698 * @param pObject 699 * Object to decode 700 * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String supplied. 701 * @throws DecoderException 702 * if the parameter supplied is not of type byte[] 703 */ 704 public Object decode(Object pObject) throws DecoderException { 705 if (pObject instanceof byte[]) { 706 return decode((byte[]) pObject); 707 } else if (pObject instanceof String) { 708 return decode((String) pObject); 709 } else { 710 throw new DecoderException("Parameter supplied to Base64 decode is not a byte[] or a String"); 711 } 712 } 713 714 /** 715 * Decodes a String containing containing characters in the Base64 alphabet. 716 * 717 * @param pArray 718 * A String containing Base64 character data 719 * @return a byte array containing binary data 720 * @since 1.4 721 */ 722 public byte[] decode(String pArray) { 723 return decode(StringUtils.getBytesUtf8(pArray)); 724 } 725 726 /** 727 * Decodes a byte[] containing containing characters in the Base64 alphabet. 728 * 729 * @param pArray 730 * A byte array containing Base64 character data 731 * @return a byte array containing binary data 732 */ 733 public byte[] decode(byte[] pArray) { 734 reset(); 735 if (pArray == null || pArray.length == 0) { 736 return pArray; 737 } 738 long len = (pArray.length * 3) / 4; 739 byte[] buf = new byte[(int) len]; 740 setInitialBuffer(buf, 0, buf.length); 741 decode(pArray, 0, pArray.length); 742 decode(pArray, 0, -1); // Notify decoder of EOF. 743 744 // Would be nice to just return buf (like we sometimes do in the encode 745 // logic), but we have no idea what the line-length was (could even be 746 // variable). So we cannot determine ahead of time exactly how big an 747 // array is necessary. Hence the need to construct a 2nd byte array to 748 // hold the final result: 749 750 byte[] result = new byte[pos]; 751 readResults(result, 0, result.length); 752 return result; 753 } 754 755 /** 756 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks. 757 * 758 * @param binaryData 759 * Array containing binary data to encode. 760 * @param isChunked 761 * if <code>true</code> this encoder will chunk the base64 output into 76 character blocks 762 * @return Base64-encoded data. 763 * @throws IllegalArgumentException 764 * Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE} 765 */ 766 public static byte[] encodeBase64(byte[] binaryData, boolean isChunked) { 767 return encodeBase64(binaryData, isChunked, false); 768 } 769 770 /** 771 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks. 772 * 773 * @param binaryData 774 * Array containing binary data to encode. 775 * @param isChunked 776 * if <code>true</code> this encoder will chunk the base64 output into 76 character blocks 777 * @param urlSafe 778 * if <code>true</code> this encoder will emit - and _ instead of the usual + and / characters. 779 * @return Base64-encoded data. 780 * @throws IllegalArgumentException 781 * Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE} 782 * @since 1.4 783 */ 784 public static byte[] encodeBase64(byte[] binaryData, boolean isChunked, boolean urlSafe) { 785 return encodeBase64(binaryData, isChunked, urlSafe, Integer.MAX_VALUE); 786 } 787 788 /** 789 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks. 790 * 791 * @param binaryData 792 * Array containing binary data to encode. 793 * @param isChunked 794 * if <code>true</code> this encoder will chunk the base64 output into 76 character blocks 795 * @param urlSafe 796 * if <code>true</code> this encoder will emit - and _ instead of the usual + and / characters. 797 * @param maxResultSize 798 * The maximum result size to accept. 799 * @return Base64-encoded data. 800 * @throws IllegalArgumentException 801 * Thrown when the input array needs an output array bigger than maxResultSize 802 * @since 1.4 803 */ 804 public static byte[] encodeBase64(byte[] binaryData, boolean isChunked, boolean urlSafe, int maxResultSize) { 805 if (binaryData == null || binaryData.length == 0) { 806 return binaryData; 807 } 808 809 long len = getEncodeLength(binaryData, CHUNK_SIZE, CHUNK_SEPARATOR); 810 if (len > maxResultSize) { 811 throw new IllegalArgumentException("Input array too big, the output array would be bigger (" + 812 len + 813 ") than the specified maxium size of " + 814 maxResultSize); 815 } 816 817 Base64 b64 = isChunked ? new Base64(urlSafe) : new Base64(0, CHUNK_SEPARATOR, urlSafe); 818 return b64.encode(binaryData); 819 } 820 821 /** 822 * Decodes a Base64 String into octets 823 * 824 * @param base64String 825 * String containing Base64 data 826 * @return Array containing decoded data. 827 * @since 1.4 828 */ 829 public static byte[] decodeBase64(String base64String) { 830 return new Base64().decode(base64String); 831 } 832 833 /** 834 * Decodes Base64 data into octets 835 * 836 * @param base64Data 837 * Byte array containing Base64 data 838 * @return Array containing decoded data. 839 */ 840 public static byte[] decodeBase64(byte[] base64Data) { 841 return new Base64().decode(base64Data); 842 } 843 844 /** 845 * Discards any whitespace from a base-64 encoded block. 846 * 847 * @param data 848 * The base-64 encoded data to discard the whitespace from. 849 * @return The data, less whitespace (see RFC 2045). 850 * @deprecated This method is no longer needed 851 */ 852 static byte[] discardWhitespace(byte[] data) { 853 byte groomedData[] = new byte[data.length]; 854 int bytesCopied = 0; 855 for (int i = 0; i < data.length; i++) { 856 switch (data[i]) { 857 case ' ' : 858 case '\n' : 859 case '\r' : 860 case '\t' : 861 break; 862 default : 863 groomedData[bytesCopied++] = data[i]; 864 } 865 } 866 byte packedData[] = new byte[bytesCopied]; 867 System.arraycopy(groomedData, 0, packedData, 0, bytesCopied); 868 return packedData; 869 } 870 871 /** 872 * Checks if a byte value is whitespace or not. 873 * 874 * @param byteToCheck 875 * the byte to check 876 * @return true if byte is whitespace, false otherwise 877 */ 878 private static boolean isWhiteSpace(byte byteToCheck) { 879 switch (byteToCheck) { 880 case ' ' : 881 case '\n' : 882 case '\r' : 883 case '\t' : 884 return true; 885 default : 886 return false; 887 } 888 } 889 890 // Implementation of the Encoder Interface 891 892 /** 893 * Encodes an Object using the base64 algorithm. This method is provided in order to satisfy the requirements of the 894 * Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[]. 895 * 896 * @param pObject 897 * Object to encode 898 * @return An object (of type byte[]) containing the base64 encoded data which corresponds to the byte[] supplied. 899 * @throws EncoderException 900 * if the parameter supplied is not of type byte[] 901 */ 902 public Object encode(Object pObject) throws EncoderException { 903 if (!(pObject instanceof byte[])) { 904 throw new EncoderException("Parameter supplied to Base64 encode is not a byte[]"); 905 } 906 return encode((byte[]) pObject); 907 } 908 909 /** 910 * Encodes a byte[] containing binary data, into a String containing characters in the Base64 alphabet. 911 * 912 * @param pArray 913 * a byte array containing binary data 914 * @return A String containing only Base64 character data 915 * @since 1.4 916 */ 917 public String encodeToString(byte[] pArray) { 918 return StringUtils.newStringUtf8(encode(pArray)); 919 } 920 921 /** 922 * Encodes a byte[] containing binary data, into a byte[] containing characters in the Base64 alphabet. 923 * 924 * @param pArray 925 * a byte array containing binary data 926 * @return A byte array containing only Base64 character data 927 */ 928 public byte[] encode(byte[] pArray) { 929 reset(); 930 if (pArray == null || pArray.length == 0) { 931 return pArray; 932 } 933 long len = getEncodeLength(pArray, lineLength, lineSeparator); 934 byte[] buf = new byte[(int) len]; 935 setInitialBuffer(buf, 0, buf.length); 936 encode(pArray, 0, pArray.length); 937 encode(pArray, 0, -1); // Notify encoder of EOF. 938 // Encoder might have resized, even though it was unnecessary. 939 if (buffer != buf) { 940 readResults(buf, 0, buf.length); 941 } 942 // In URL-SAFE mode we skip the padding characters, so sometimes our 943 // final length is a bit smaller. 944 if (isUrlSafe() && pos < buf.length) { 945 byte[] smallerBuf = new byte[pos]; 946 System.arraycopy(buf, 0, smallerBuf, 0, pos); 947 buf = smallerBuf; 948 } 949 return buf; 950 } 951 952 /** 953 * Pre-calculates the amount of space needed to base64-encode the supplied array. 954 * 955 * @param pArray byte[] array which will later be encoded 956 * @param chunkSize line-length of the output (<= 0 means no chunking) between each 957 * chunkSeparator (e.g. CRLF). 958 * @param chunkSeparator the sequence of bytes used to separate chunks of output (e.g. CRLF). 959 * 960 * @return amount of space needed to encoded the supplied array. Returns 961 * a long since a max-len array will require Integer.MAX_VALUE + 33%. 962 */ 963 private static long getEncodeLength(byte[] pArray, int chunkSize, byte[] chunkSeparator) { 964 // base64 always encodes to multiples of 4. 965 chunkSize = (chunkSize / 4) * 4; 966 967 long len = (pArray.length * 4) / 3; 968 long mod = len % 4; 969 if (mod != 0) { 970 len += 4 - mod; 971 } 972 if (chunkSize > 0) { 973 boolean lenChunksPerfectly = len % chunkSize == 0; 974 len += (len / chunkSize) * chunkSeparator.length; 975 if (!lenChunksPerfectly) { 976 len += chunkSeparator.length; 977 } 978 } 979 return len; 980 } 981 982 // Implementation of integer encoding used for crypto 983 /** 984 * Decodes a byte64-encoded integer according to crypto standards such as W3C's XML-Signature 985 * 986 * @param pArray 987 * a byte array containing base64 character data 988 * @return A BigInteger 989 * @since 1.4 990 */ 991 public static BigInteger decodeInteger(byte[] pArray) { 992 return new BigInteger(1, decodeBase64(pArray)); 993 } 994 995 /** 996 * Encodes to a byte64-encoded integer according to crypto standards such as W3C's XML-Signature 997 * 998 * @param bigInt 999 * a BigInteger 1000 * @return A byte array containing base64 character data 1001 * @throws NullPointerException 1002 * if null is passed in 1003 * @since 1.4 1004 */ 1005 public static byte[] encodeInteger(BigInteger bigInt) { 1006 if (bigInt == null) { 1007 throw new NullPointerException("encodeInteger called with null parameter"); 1008 } 1009 return encodeBase64(toIntegerBytes(bigInt), false); 1010 } 1011 1012 /** 1013 * Returns a byte-array representation of a <code>BigInteger</code> without sign bit. 1014 * 1015 * @param bigInt 1016 * <code>BigInteger</code> to be converted 1017 * @return a byte array representation of the BigInteger parameter 1018 */ 1019 static byte[] toIntegerBytes(BigInteger bigInt) { 1020 int bitlen = bigInt.bitLength(); 1021 // round bitlen 1022 bitlen = ((bitlen + 7) >> 3) << 3; 1023 byte[] bigBytes = bigInt.toByteArray(); 1024 1025 if (((bigInt.bitLength() % 8) != 0) && (((bigInt.bitLength() / 8) + 1) == (bitlen / 8))) { 1026 return bigBytes; 1027 } 1028 // set up params for copying everything but sign bit 1029 int startSrc = 0; 1030 int len = bigBytes.length; 1031 1032 // if bigInt is exactly byte-aligned, just skip signbit in copy 1033 if ((bigInt.bitLength() % 8) == 0) { 1034 startSrc = 1; 1035 len--; 1036 } 1037 int startDst = bitlen / 8 - len; // to pad w/ nulls as per spec 1038 byte[] resizedBytes = new byte[bitlen / 8]; 1039 System.arraycopy(bigBytes, startSrc, resizedBytes, startDst, len); 1040 return resizedBytes; 1041 } 1042 1043 /** 1044 * Resets this Base64 object to its initial newly constructed state. 1045 */ 1046 private void reset() { 1047 buffer = null; 1048 pos = 0; 1049 readPos = 0; 1050 currentLinePos = 0; 1051 modulus = 0; 1052 eof = false; 1053 } 1054 1055 }