001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.codec.binary;
019
020 import java.io.UnsupportedEncodingException;
021
022 import org.apache.commons.codec.CharEncoding;
023
024 /**
025 * Converts String to and from bytes using the encodings required by the Java specification. These encodings are specified in <a
026 * href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
027 *
028 * @see CharEncoding
029 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
030 * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a>
031 * @version $Id: StringUtils.java 801391 2009-08-05 19:55:54Z ggregory $
032 * @since 1.4
033 */
034 public class StringUtils {
035
036 /**
037 * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
038 * byte array.
039 *
040 * @param string
041 * the String to encode
042 * @return encoded bytes
043 * @throws IllegalStateException
044 * Thrown when the charset is missing, which should be never according the the Java specification.
045 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
046 * @see #getBytesUnchecked(String, String)
047 */
048 public static byte[] getBytesIso8859_1(String string) {
049 return StringUtils.getBytesUnchecked(string, CharEncoding.ISO_8859_1);
050 }
051
052 /**
053 * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
054 * array.
055 *
056 * @param string
057 * the String to encode
058 * @return encoded bytes
059 * @throws IllegalStateException
060 * Thrown when the charset is missing, which should be never according the the Java specification.
061 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
062 * @see #getBytesUnchecked(String, String)
063 */
064 public static byte[] getBytesUsAscii(String string) {
065 return StringUtils.getBytesUnchecked(string, CharEncoding.US_ASCII);
066 }
067
068 /**
069 * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
070 * array.
071 *
072 * @param string
073 * the String to encode
074 * @return encoded bytes
075 * @throws IllegalStateException
076 * Thrown when the charset is missing, which should be never according the the Java specification.
077 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
078 * @see #getBytesUnchecked(String, String)
079 */
080 public static byte[] getBytesUtf16(String string) {
081 return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16);
082 }
083
084 /**
085 * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
086 * array.
087 *
088 * @param string
089 * the String to encode
090 * @return encoded bytes
091 * @throws IllegalStateException
092 * Thrown when the charset is missing, which should be never according the the Java specification.
093 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
094 * @see #getBytesUnchecked(String, String)
095 */
096 public static byte[] getBytesUtf16Be(String string) {
097 return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16BE);
098 }
099
100 /**
101 * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
102 * array.
103 *
104 * @param string
105 * the String to encode
106 * @return encoded bytes
107 * @throws IllegalStateException
108 * Thrown when the charset is missing, which should be never according the the Java specification.
109 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
110 * @see #getBytesUnchecked(String, String)
111 */
112 public static byte[] getBytesUtf16Le(String string) {
113 return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16LE);
114 }
115
116 /**
117 * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
118 * array.
119 *
120 * @param string
121 * the String to encode
122 * @return encoded bytes
123 * @throws IllegalStateException
124 * Thrown when the charset is missing, which should be never according the the Java specification.
125 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
126 * @see #getBytesUnchecked(String, String)
127 */
128 public static byte[] getBytesUtf8(String string) {
129 return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_8);
130 }
131
132 /**
133 * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
134 * array.
135 * <p>
136 * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
137 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
138 * </p>
139 *
140 * @param string
141 * the String to encode
142 * @param charsetName
143 * The name of a required {@link java.nio.charset.Charset}
144 * @return encoded bytes
145 * @throws IllegalStateException
146 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
147 * required charset name.
148 * @see CharEncoding
149 * @see String#getBytes(String)
150 */
151 public static byte[] getBytesUnchecked(String string, String charsetName) {
152 if (string == null) {
153 return null;
154 }
155 try {
156 return string.getBytes(charsetName);
157 } catch (UnsupportedEncodingException e) {
158 throw StringUtils.newIllegalStateException(charsetName, e);
159 }
160 }
161
162 private static IllegalStateException newIllegalStateException(String charsetName, UnsupportedEncodingException e) {
163 return new IllegalStateException(charsetName + ": " + e);
164 }
165
166 /**
167 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
168 * <p>
169 * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
170 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
171 * </p>
172 *
173 * @param bytes
174 * The bytes to be decoded into characters
175 * @param charsetName
176 * The name of a required {@link java.nio.charset.Charset}
177 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
178 * @throws IllegalStateException
179 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
180 * required charset name.
181 * @see CharEncoding
182 * @see String#String(byte[], String)
183 */
184 public static String newString(byte[] bytes, String charsetName) {
185 if (bytes == null) {
186 return null;
187 }
188 try {
189 return new String(bytes, charsetName);
190 } catch (UnsupportedEncodingException e) {
191 throw StringUtils.newIllegalStateException(charsetName, e);
192 }
193 }
194
195 /**
196 * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset.
197 *
198 * @param bytes
199 * The bytes to be decoded into characters
200 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
201 * @throws IllegalStateException
202 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
203 * charset is required.
204 */
205 public static String newStringIso8859_1(byte[] bytes) {
206 return StringUtils.newString(bytes, CharEncoding.ISO_8859_1);
207 }
208
209 /**
210 * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset.
211 *
212 * @param bytes
213 * The bytes to be decoded into characters
214 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
215 * @throws IllegalStateException
216 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
217 * charset is required.
218 */
219 public static String newStringUsAscii(byte[] bytes) {
220 return StringUtils.newString(bytes, CharEncoding.US_ASCII);
221 }
222
223 /**
224 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset.
225 *
226 * @param bytes
227 * The bytes to be decoded into characters
228 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
229 * @throws IllegalStateException
230 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
231 * charset is required.
232 */
233 public static String newStringUtf16(byte[] bytes) {
234 return StringUtils.newString(bytes, CharEncoding.UTF_16);
235 }
236
237 /**
238 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset.
239 *
240 * @param bytes
241 * The bytes to be decoded into characters
242 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
243 * @throws IllegalStateException
244 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
245 * charset is required.
246 */
247 public static String newStringUtf16Be(byte[] bytes) {
248 return StringUtils.newString(bytes, CharEncoding.UTF_16BE);
249 }
250
251 /**
252 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset.
253 *
254 * @param bytes
255 * The bytes to be decoded into characters
256 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
257 * @throws IllegalStateException
258 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
259 * charset is required.
260 */
261 public static String newStringUtf16Le(byte[] bytes) {
262 return StringUtils.newString(bytes, CharEncoding.UTF_16LE);
263 }
264
265 /**
266 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset.
267 *
268 * @param bytes
269 * The bytes to be decoded into characters
270 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
271 * @throws IllegalStateException
272 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
273 * charset is required.
274 */
275 public static String newStringUtf8(byte[] bytes) {
276 return StringUtils.newString(bytes, CharEncoding.UTF_8);
277 }
278
279 }