001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.commons.codec.net; 019 020 import java.io.ByteArrayOutputStream; 021 import java.io.UnsupportedEncodingException; 022 import java.util.BitSet; 023 024 import org.apache.commons.codec.BinaryDecoder; 025 import org.apache.commons.codec.BinaryEncoder; 026 import org.apache.commons.codec.DecoderException; 027 import org.apache.commons.codec.EncoderException; 028 import org.apache.commons.codec.CharEncoding; 029 import org.apache.commons.codec.StringDecoder; 030 import org.apache.commons.codec.StringEncoder; 031 import org.apache.commons.codec.binary.StringUtils; 032 033 /** 034 * <p>Implements the 'www-form-urlencoded' encoding scheme, 035 * also misleadingly known as URL encoding.</p> 036 * 037 * <p>For more detailed information please refer to 038 * <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1"> 039 * Chapter 17.13.4 'Form content types'</a> of the 040 * <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification<a></p> 041 * 042 * <p> 043 * This codec is meant to be a replacement for standard Java classes 044 * {@link java.net.URLEncoder} and {@link java.net.URLDecoder} 045 * on older Java platforms, as these classes in Java versions below 046 * 1.4 rely on the platform's default charset encoding. 047 * </p> 048 * 049 * @author Apache Software Foundation 050 * @since 1.2 051 * @version $Id: URLCodec.java 798416 2009-07-28 06:35:58Z ggregory $ 052 */ 053 public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder { 054 055 /** 056 * Radix used in encoding and decoding. 057 */ 058 static final int RADIX = 16; 059 060 /** 061 * The default charset used for string decoding and encoding. Consider this field final. The next major release may 062 * break compatibility and make this field be final. 063 */ 064 protected String charset; 065 066 /** 067 * Consider this field final. The next major release may break compatibility and make this field be final. 068 */ 069 protected static byte ESCAPE_CHAR = '%'; 070 /** 071 * BitSet of www-form-url safe characters. 072 */ 073 protected static final BitSet WWW_FORM_URL = new BitSet(256); 074 075 // Static initializer for www_form_url 076 static { 077 // alpha characters 078 for (int i = 'a'; i <= 'z'; i++) { 079 WWW_FORM_URL.set(i); 080 } 081 for (int i = 'A'; i <= 'Z'; i++) { 082 WWW_FORM_URL.set(i); 083 } 084 // numeric characters 085 for (int i = '0'; i <= '9'; i++) { 086 WWW_FORM_URL.set(i); 087 } 088 // special chars 089 WWW_FORM_URL.set('-'); 090 WWW_FORM_URL.set('_'); 091 WWW_FORM_URL.set('.'); 092 WWW_FORM_URL.set('*'); 093 // blank to be replaced with + 094 WWW_FORM_URL.set(' '); 095 } 096 097 098 /** 099 * Default constructor. 100 */ 101 public URLCodec() { 102 this(CharEncoding.UTF_8); 103 } 104 105 /** 106 * Constructor which allows for the selection of a default charset 107 * 108 * @param charset the default string charset to use. 109 */ 110 public URLCodec(String charset) { 111 super(); 112 this.charset = charset; 113 } 114 115 /** 116 * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped. 117 * 118 * @param urlsafe 119 * bitset of characters deemed URL safe 120 * @param bytes 121 * array of bytes to convert to URL safe characters 122 * @return array of bytes containing URL safe characters 123 */ 124 public static final byte[] encodeUrl(BitSet urlsafe, byte[] bytes) { 125 if (bytes == null) { 126 return null; 127 } 128 if (urlsafe == null) { 129 urlsafe = WWW_FORM_URL; 130 } 131 132 ByteArrayOutputStream buffer = new ByteArrayOutputStream(); 133 for (int i = 0; i < bytes.length; i++) { 134 int b = bytes[i]; 135 if (b < 0) { 136 b = 256 + b; 137 } 138 if (urlsafe.get(b)) { 139 if (b == ' ') { 140 b = '+'; 141 } 142 buffer.write(b); 143 } else { 144 buffer.write(ESCAPE_CHAR); 145 char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, RADIX)); 146 char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, RADIX)); 147 buffer.write(hex1); 148 buffer.write(hex2); 149 } 150 } 151 return buffer.toByteArray(); 152 } 153 154 /** 155 * Decodes an array of URL safe 7-bit characters into an array of 156 * original bytes. Escaped characters are converted back to their 157 * original representation. 158 * 159 * @param bytes array of URL safe characters 160 * @return array of original bytes 161 * @throws DecoderException Thrown if URL decoding is unsuccessful 162 */ 163 public static final byte[] decodeUrl(byte[] bytes) throws DecoderException { 164 if (bytes == null) { 165 return null; 166 } 167 ByteArrayOutputStream buffer = new ByteArrayOutputStream(); 168 for (int i = 0; i < bytes.length; i++) { 169 int b = bytes[i]; 170 if (b == '+') { 171 buffer.write(' '); 172 } else if (b == ESCAPE_CHAR) { 173 try { 174 int u = Utils.digit16(bytes[++i]); 175 int l = Utils.digit16(bytes[++i]); 176 buffer.write((char) ((u << 4) + l)); 177 } catch (ArrayIndexOutOfBoundsException e) { 178 throw new DecoderException("Invalid URL encoding: ", e); 179 } 180 } else { 181 buffer.write(b); 182 } 183 } 184 return buffer.toByteArray(); 185 } 186 187 /** 188 * Encodes an array of bytes into an array of URL safe 7-bit 189 * characters. Unsafe characters are escaped. 190 * 191 * @param bytes array of bytes to convert to URL safe characters 192 * @return array of bytes containing URL safe characters 193 */ 194 public byte[] encode(byte[] bytes) { 195 return encodeUrl(WWW_FORM_URL, bytes); 196 } 197 198 199 /** 200 * Decodes an array of URL safe 7-bit characters into an array of 201 * original bytes. Escaped characters are converted back to their 202 * original representation. 203 * 204 * @param bytes array of URL safe characters 205 * @return array of original bytes 206 * @throws DecoderException Thrown if URL decoding is unsuccessful 207 */ 208 public byte[] decode(byte[] bytes) throws DecoderException { 209 return decodeUrl(bytes); 210 } 211 212 /** 213 * Encodes a string into its URL safe form using the specified string charset. Unsafe characters are escaped. 214 * 215 * @param pString 216 * string to convert to a URL safe form 217 * @param charset 218 * the charset for pString 219 * @return URL safe string 220 * @throws UnsupportedEncodingException 221 * Thrown if charset is not supported 222 */ 223 public String encode(String pString, String charset) throws UnsupportedEncodingException { 224 if (pString == null) { 225 return null; 226 } 227 return StringUtils.newStringUsAscii(encode(pString.getBytes(charset))); 228 } 229 230 /** 231 * Encodes a string into its URL safe form using the default string 232 * charset. Unsafe characters are escaped. 233 * 234 * @param pString string to convert to a URL safe form 235 * @return URL safe string 236 * @throws EncoderException Thrown if URL encoding is unsuccessful 237 * 238 * @see #getDefaultCharset() 239 */ 240 public String encode(String pString) throws EncoderException { 241 if (pString == null) { 242 return null; 243 } 244 try { 245 return encode(pString, getDefaultCharset()); 246 } catch (UnsupportedEncodingException e) { 247 throw new EncoderException(e.getMessage(), e); 248 } 249 } 250 251 252 /** 253 * Decodes a URL safe string into its original form using the 254 * specified encoding. Escaped characters are converted back 255 * to their original representation. 256 * 257 * @param pString URL safe string to convert into its original form 258 * @param charset the original string charset 259 * @return original string 260 * @throws DecoderException Thrown if URL decoding is unsuccessful 261 * @throws UnsupportedEncodingException Thrown if charset is not 262 * supported 263 */ 264 public String decode(String pString, String charset) throws DecoderException, UnsupportedEncodingException { 265 if (pString == null) { 266 return null; 267 } 268 return new String(decode(StringUtils.getBytesUsAscii(pString)), charset); 269 } 270 271 /** 272 * Decodes a URL safe string into its original form using the default 273 * string charset. Escaped characters are converted back to their 274 * original representation. 275 * 276 * @param pString URL safe string to convert into its original form 277 * @return original string 278 * @throws DecoderException Thrown if URL decoding is unsuccessful 279 * 280 * @see #getDefaultCharset() 281 */ 282 public String decode(String pString) throws DecoderException { 283 if (pString == null) { 284 return null; 285 } 286 try { 287 return decode(pString, getDefaultCharset()); 288 } catch (UnsupportedEncodingException e) { 289 throw new DecoderException(e.getMessage(), e); 290 } 291 } 292 293 /** 294 * Encodes an object into its URL safe form. Unsafe characters are 295 * escaped. 296 * 297 * @param pObject string to convert to a URL safe form 298 * @return URL safe object 299 * @throws EncoderException Thrown if URL encoding is not 300 * applicable to objects of this type or 301 * if encoding is unsuccessful 302 */ 303 public Object encode(Object pObject) throws EncoderException { 304 if (pObject == null) { 305 return null; 306 } else if (pObject instanceof byte[]) { 307 return encode((byte[])pObject); 308 } else if (pObject instanceof String) { 309 return encode((String)pObject); 310 } else { 311 throw new EncoderException("Objects of type " + 312 pObject.getClass().getName() + " cannot be URL encoded"); 313 314 } 315 } 316 317 /** 318 * Decodes a URL safe object into its original form. Escaped characters are converted back to their original 319 * representation. 320 * 321 * @param pObject 322 * URL safe object to convert into its original form 323 * @return original object 324 * @throws DecoderException 325 * Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure condition is 326 * encountered during the decode process. 327 */ 328 public Object decode(Object pObject) throws DecoderException { 329 if (pObject == null) { 330 return null; 331 } else if (pObject instanceof byte[]) { 332 return decode((byte[]) pObject); 333 } else if (pObject instanceof String) { 334 return decode((String) pObject); 335 } else { 336 throw new DecoderException("Objects of type " + pObject.getClass().getName() + " cannot be URL decoded"); 337 338 } 339 } 340 341 /** 342 * The <code>String</code> encoding used for decoding and encoding. 343 * 344 * @return Returns the encoding. 345 * 346 * @deprecated use #getDefaultCharset() 347 */ 348 public String getEncoding() { 349 return this.charset; 350 } 351 352 /** 353 * The default charset used for string decoding and encoding. 354 * 355 * @return the default string charset. 356 */ 357 public String getDefaultCharset() { 358 return this.charset; 359 } 360 361 }