001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.codec.net;
019
020 import java.io.ByteArrayOutputStream;
021 import java.io.UnsupportedEncodingException;
022 import java.util.BitSet;
023
024 import org.apache.commons.codec.BinaryDecoder;
025 import org.apache.commons.codec.BinaryEncoder;
026 import org.apache.commons.codec.DecoderException;
027 import org.apache.commons.codec.EncoderException;
028 import org.apache.commons.codec.CharEncoding;
029 import org.apache.commons.codec.StringDecoder;
030 import org.apache.commons.codec.StringEncoder;
031 import org.apache.commons.codec.binary.StringUtils;
032
033 /**
034 * <p>Implements the 'www-form-urlencoded' encoding scheme,
035 * also misleadingly known as URL encoding.</p>
036 *
037 * <p>For more detailed information please refer to
038 * <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1">
039 * Chapter 17.13.4 'Form content types'</a> of the
040 * <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification<a></p>
041 *
042 * <p>
043 * This codec is meant to be a replacement for standard Java classes
044 * {@link java.net.URLEncoder} and {@link java.net.URLDecoder}
045 * on older Java platforms, as these classes in Java versions below
046 * 1.4 rely on the platform's default charset encoding.
047 * </p>
048 *
049 * @author Apache Software Foundation
050 * @since 1.2
051 * @version $Id: URLCodec.java 798416 2009-07-28 06:35:58Z ggregory $
052 */
053 public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
054
055 /**
056 * Radix used in encoding and decoding.
057 */
058 static final int RADIX = 16;
059
060 /**
061 * The default charset used for string decoding and encoding. Consider this field final. The next major release may
062 * break compatibility and make this field be final.
063 */
064 protected String charset;
065
066 /**
067 * Consider this field final. The next major release may break compatibility and make this field be final.
068 */
069 protected static byte ESCAPE_CHAR = '%';
070 /**
071 * BitSet of www-form-url safe characters.
072 */
073 protected static final BitSet WWW_FORM_URL = new BitSet(256);
074
075 // Static initializer for www_form_url
076 static {
077 // alpha characters
078 for (int i = 'a'; i <= 'z'; i++) {
079 WWW_FORM_URL.set(i);
080 }
081 for (int i = 'A'; i <= 'Z'; i++) {
082 WWW_FORM_URL.set(i);
083 }
084 // numeric characters
085 for (int i = '0'; i <= '9'; i++) {
086 WWW_FORM_URL.set(i);
087 }
088 // special chars
089 WWW_FORM_URL.set('-');
090 WWW_FORM_URL.set('_');
091 WWW_FORM_URL.set('.');
092 WWW_FORM_URL.set('*');
093 // blank to be replaced with +
094 WWW_FORM_URL.set(' ');
095 }
096
097
098 /**
099 * Default constructor.
100 */
101 public URLCodec() {
102 this(CharEncoding.UTF_8);
103 }
104
105 /**
106 * Constructor which allows for the selection of a default charset
107 *
108 * @param charset the default string charset to use.
109 */
110 public URLCodec(String charset) {
111 super();
112 this.charset = charset;
113 }
114
115 /**
116 * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped.
117 *
118 * @param urlsafe
119 * bitset of characters deemed URL safe
120 * @param bytes
121 * array of bytes to convert to URL safe characters
122 * @return array of bytes containing URL safe characters
123 */
124 public static final byte[] encodeUrl(BitSet urlsafe, byte[] bytes) {
125 if (bytes == null) {
126 return null;
127 }
128 if (urlsafe == null) {
129 urlsafe = WWW_FORM_URL;
130 }
131
132 ByteArrayOutputStream buffer = new ByteArrayOutputStream();
133 for (int i = 0; i < bytes.length; i++) {
134 int b = bytes[i];
135 if (b < 0) {
136 b = 256 + b;
137 }
138 if (urlsafe.get(b)) {
139 if (b == ' ') {
140 b = '+';
141 }
142 buffer.write(b);
143 } else {
144 buffer.write(ESCAPE_CHAR);
145 char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, RADIX));
146 char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, RADIX));
147 buffer.write(hex1);
148 buffer.write(hex2);
149 }
150 }
151 return buffer.toByteArray();
152 }
153
154 /**
155 * Decodes an array of URL safe 7-bit characters into an array of
156 * original bytes. Escaped characters are converted back to their
157 * original representation.
158 *
159 * @param bytes array of URL safe characters
160 * @return array of original bytes
161 * @throws DecoderException Thrown if URL decoding is unsuccessful
162 */
163 public static final byte[] decodeUrl(byte[] bytes) throws DecoderException {
164 if (bytes == null) {
165 return null;
166 }
167 ByteArrayOutputStream buffer = new ByteArrayOutputStream();
168 for (int i = 0; i < bytes.length; i++) {
169 int b = bytes[i];
170 if (b == '+') {
171 buffer.write(' ');
172 } else if (b == ESCAPE_CHAR) {
173 try {
174 int u = Utils.digit16(bytes[++i]);
175 int l = Utils.digit16(bytes[++i]);
176 buffer.write((char) ((u << 4) + l));
177 } catch (ArrayIndexOutOfBoundsException e) {
178 throw new DecoderException("Invalid URL encoding: ", e);
179 }
180 } else {
181 buffer.write(b);
182 }
183 }
184 return buffer.toByteArray();
185 }
186
187 /**
188 * Encodes an array of bytes into an array of URL safe 7-bit
189 * characters. Unsafe characters are escaped.
190 *
191 * @param bytes array of bytes to convert to URL safe characters
192 * @return array of bytes containing URL safe characters
193 */
194 public byte[] encode(byte[] bytes) {
195 return encodeUrl(WWW_FORM_URL, bytes);
196 }
197
198
199 /**
200 * Decodes an array of URL safe 7-bit characters into an array of
201 * original bytes. Escaped characters are converted back to their
202 * original representation.
203 *
204 * @param bytes array of URL safe characters
205 * @return array of original bytes
206 * @throws DecoderException Thrown if URL decoding is unsuccessful
207 */
208 public byte[] decode(byte[] bytes) throws DecoderException {
209 return decodeUrl(bytes);
210 }
211
212 /**
213 * Encodes a string into its URL safe form using the specified string charset. Unsafe characters are escaped.
214 *
215 * @param pString
216 * string to convert to a URL safe form
217 * @param charset
218 * the charset for pString
219 * @return URL safe string
220 * @throws UnsupportedEncodingException
221 * Thrown if charset is not supported
222 */
223 public String encode(String pString, String charset) throws UnsupportedEncodingException {
224 if (pString == null) {
225 return null;
226 }
227 return StringUtils.newStringUsAscii(encode(pString.getBytes(charset)));
228 }
229
230 /**
231 * Encodes a string into its URL safe form using the default string
232 * charset. Unsafe characters are escaped.
233 *
234 * @param pString string to convert to a URL safe form
235 * @return URL safe string
236 * @throws EncoderException Thrown if URL encoding is unsuccessful
237 *
238 * @see #getDefaultCharset()
239 */
240 public String encode(String pString) throws EncoderException {
241 if (pString == null) {
242 return null;
243 }
244 try {
245 return encode(pString, getDefaultCharset());
246 } catch (UnsupportedEncodingException e) {
247 throw new EncoderException(e.getMessage(), e);
248 }
249 }
250
251
252 /**
253 * Decodes a URL safe string into its original form using the
254 * specified encoding. Escaped characters are converted back
255 * to their original representation.
256 *
257 * @param pString URL safe string to convert into its original form
258 * @param charset the original string charset
259 * @return original string
260 * @throws DecoderException Thrown if URL decoding is unsuccessful
261 * @throws UnsupportedEncodingException Thrown if charset is not
262 * supported
263 */
264 public String decode(String pString, String charset) throws DecoderException, UnsupportedEncodingException {
265 if (pString == null) {
266 return null;
267 }
268 return new String(decode(StringUtils.getBytesUsAscii(pString)), charset);
269 }
270
271 /**
272 * Decodes a URL safe string into its original form using the default
273 * string charset. Escaped characters are converted back to their
274 * original representation.
275 *
276 * @param pString URL safe string to convert into its original form
277 * @return original string
278 * @throws DecoderException Thrown if URL decoding is unsuccessful
279 *
280 * @see #getDefaultCharset()
281 */
282 public String decode(String pString) throws DecoderException {
283 if (pString == null) {
284 return null;
285 }
286 try {
287 return decode(pString, getDefaultCharset());
288 } catch (UnsupportedEncodingException e) {
289 throw new DecoderException(e.getMessage(), e);
290 }
291 }
292
293 /**
294 * Encodes an object into its URL safe form. Unsafe characters are
295 * escaped.
296 *
297 * @param pObject string to convert to a URL safe form
298 * @return URL safe object
299 * @throws EncoderException Thrown if URL encoding is not
300 * applicable to objects of this type or
301 * if encoding is unsuccessful
302 */
303 public Object encode(Object pObject) throws EncoderException {
304 if (pObject == null) {
305 return null;
306 } else if (pObject instanceof byte[]) {
307 return encode((byte[])pObject);
308 } else if (pObject instanceof String) {
309 return encode((String)pObject);
310 } else {
311 throw new EncoderException("Objects of type " +
312 pObject.getClass().getName() + " cannot be URL encoded");
313
314 }
315 }
316
317 /**
318 * Decodes a URL safe object into its original form. Escaped characters are converted back to their original
319 * representation.
320 *
321 * @param pObject
322 * URL safe object to convert into its original form
323 * @return original object
324 * @throws DecoderException
325 * Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure condition is
326 * encountered during the decode process.
327 */
328 public Object decode(Object pObject) throws DecoderException {
329 if (pObject == null) {
330 return null;
331 } else if (pObject instanceof byte[]) {
332 return decode((byte[]) pObject);
333 } else if (pObject instanceof String) {
334 return decode((String) pObject);
335 } else {
336 throw new DecoderException("Objects of type " + pObject.getClass().getName() + " cannot be URL decoded");
337
338 }
339 }
340
341 /**
342 * The <code>String</code> encoding used for decoding and encoding.
343 *
344 * @return Returns the encoding.
345 *
346 * @deprecated use #getDefaultCharset()
347 */
348 public String getEncoding() {
349 return this.charset;
350 }
351
352 /**
353 * The default charset used for string decoding and encoding.
354 *
355 * @return the default string charset.
356 */
357 public String getDefaultCharset() {
358 return this.charset;
359 }
360
361 }