001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.codec.binary;
019    
020    import java.io.UnsupportedEncodingException;
021    
022    import org.apache.commons.codec.CharEncoding;
023    
024    /**
025     * Converts String to and from bytes using the encodings required by the Java specification. These encodings are specified in <a
026     * href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
027     * 
028     * @see CharEncoding
029     * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
030     * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a>
031     * @version $Id: StringUtils.java 801391 2009-08-05 19:55:54Z ggregory $
032     * @since 1.4
033     */
034    public class StringUtils {
035    
036        /**
037         * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
038         * byte array.
039         * 
040         * @param string
041         *            the String to encode
042         * @return encoded bytes
043         * @throws IllegalStateException
044         *             Thrown when the charset is missing, which should be never according the the Java specification.
045         * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
046         * @see #getBytesUnchecked(String, String)
047         */
048        public static byte[] getBytesIso8859_1(String string) {
049            return StringUtils.getBytesUnchecked(string, CharEncoding.ISO_8859_1);
050        }
051    
052        /**
053         * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
054         * array.
055         * 
056         * @param string
057         *            the String to encode
058         * @return encoded bytes
059         * @throws IllegalStateException
060         *             Thrown when the charset is missing, which should be never according the the Java specification.
061         * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
062         * @see #getBytesUnchecked(String, String)
063         */
064        public static byte[] getBytesUsAscii(String string) {
065            return StringUtils.getBytesUnchecked(string, CharEncoding.US_ASCII);
066        }
067    
068        /**
069         * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
070         * array.
071         * 
072         * @param string
073         *            the String to encode
074         * @return encoded bytes
075         * @throws IllegalStateException
076         *             Thrown when the charset is missing, which should be never according the the Java specification.
077         * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
078         * @see #getBytesUnchecked(String, String)
079         */
080        public static byte[] getBytesUtf16(String string) {
081            return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16);
082        }
083    
084        /**
085         * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
086         * array.
087         * 
088         * @param string
089         *            the String to encode
090         * @return encoded bytes
091         * @throws IllegalStateException
092         *             Thrown when the charset is missing, which should be never according the the Java specification.
093         * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
094         * @see #getBytesUnchecked(String, String)
095         */
096        public static byte[] getBytesUtf16Be(String string) {
097            return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16BE);
098        }
099    
100        /**
101         * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
102         * array.
103         * 
104         * @param string
105         *            the String to encode
106         * @return encoded bytes
107         * @throws IllegalStateException
108         *             Thrown when the charset is missing, which should be never according the the Java specification.
109         * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
110         * @see #getBytesUnchecked(String, String)
111         */
112        public static byte[] getBytesUtf16Le(String string) {
113            return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16LE);
114        }
115    
116        /**
117         * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
118         * array.
119         * 
120         * @param string
121         *            the String to encode
122         * @return encoded bytes
123         * @throws IllegalStateException
124         *             Thrown when the charset is missing, which should be never according the the Java specification.
125         * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
126         * @see #getBytesUnchecked(String, String)
127         */
128        public static byte[] getBytesUtf8(String string) {
129            return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_8);
130        }
131    
132        /**
133         * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
134         * array.
135         * <p>
136         * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
137         * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
138         * </p>
139         * 
140         * @param string
141         *            the String to encode
142         * @param charsetName
143         *            The name of a required {@link java.nio.charset.Charset}
144         * @return encoded bytes
145         * @throws IllegalStateException
146         *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
147         *             required charset name.
148         * @see CharEncoding
149         * @see String#getBytes(String)
150         */
151        public static byte[] getBytesUnchecked(String string, String charsetName) {
152            if (string == null) {
153                return null;
154            }
155            try {
156                return string.getBytes(charsetName);
157            } catch (UnsupportedEncodingException e) {
158                throw StringUtils.newIllegalStateException(charsetName, e);
159            }
160        }
161    
162        private static IllegalStateException newIllegalStateException(String charsetName, UnsupportedEncodingException e) {
163            return new IllegalStateException(charsetName + ": " + e);
164        }
165    
166        /**
167         * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
168         * <p>
169         * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
170         * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
171         * </p>
172         * 
173         * @param bytes
174         *            The bytes to be decoded into characters
175         * @param charsetName
176         *            The name of a required {@link java.nio.charset.Charset}
177         * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
178         * @throws IllegalStateException
179         *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
180         *             required charset name.
181         * @see CharEncoding
182         * @see String#String(byte[], String)
183         */
184        public static String newString(byte[] bytes, String charsetName) {
185            if (bytes == null) {
186                return null;
187            }
188            try {
189                return new String(bytes, charsetName);
190            } catch (UnsupportedEncodingException e) {
191                throw StringUtils.newIllegalStateException(charsetName, e);
192            }
193        }
194    
195        /**
196         * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset.
197         * 
198         * @param bytes
199         *            The bytes to be decoded into characters
200         * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
201         * @throws IllegalStateException
202         *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
203         *             charset is required.
204         */
205        public static String newStringIso8859_1(byte[] bytes) {
206            return StringUtils.newString(bytes, CharEncoding.ISO_8859_1);
207        }
208    
209        /**
210         * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset.
211         * 
212         * @param bytes
213         *            The bytes to be decoded into characters
214         * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
215         * @throws IllegalStateException
216         *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
217         *             charset is required.
218         */
219        public static String newStringUsAscii(byte[] bytes) {
220            return StringUtils.newString(bytes, CharEncoding.US_ASCII);
221        }
222    
223        /**
224         * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset.
225         * 
226         * @param bytes
227         *            The bytes to be decoded into characters
228         * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
229         * @throws IllegalStateException
230         *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
231         *             charset is required.
232         */
233        public static String newStringUtf16(byte[] bytes) {
234            return StringUtils.newString(bytes, CharEncoding.UTF_16);
235        }
236    
237        /**
238         * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset.
239         * 
240         * @param bytes
241         *            The bytes to be decoded into characters
242         * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
243         * @throws IllegalStateException
244         *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
245         *             charset is required.
246         */
247        public static String newStringUtf16Be(byte[] bytes) {
248            return StringUtils.newString(bytes, CharEncoding.UTF_16BE);
249        }
250    
251        /**
252         * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset.
253         * 
254         * @param bytes
255         *            The bytes to be decoded into characters
256         * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
257         * @throws IllegalStateException
258         *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
259         *             charset is required.
260         */
261        public static String newStringUtf16Le(byte[] bytes) {
262            return StringUtils.newString(bytes, CharEncoding.UTF_16LE);
263        }
264    
265        /**
266         * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset.
267         * 
268         * @param bytes
269         *            The bytes to be decoded into characters
270         * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
271         * @throws IllegalStateException
272         *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
273         *             charset is required.
274         */
275        public static String newStringUtf8(byte[] bytes) {
276            return StringUtils.newString(bytes, CharEncoding.UTF_8);
277        }
278    
279    }