001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.commons.codec.binary; 019 020 import java.io.UnsupportedEncodingException; 021 022 import org.apache.commons.codec.CharEncoding; 023 024 /** 025 * Converts String to and from bytes using the encodings required by the Java specification. These encodings are specified in <a 026 * href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 027 * 028 * @see CharEncoding 029 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 030 * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a> 031 * @version $Id: StringUtils.java 801391 2009-08-05 19:55:54Z ggregory $ 032 * @since 1.4 033 */ 034 public class StringUtils { 035 036 /** 037 * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new 038 * byte array. 039 * 040 * @param string 041 * the String to encode 042 * @return encoded bytes 043 * @throws IllegalStateException 044 * Thrown when the charset is missing, which should be never according the the Java specification. 045 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 046 * @see #getBytesUnchecked(String, String) 047 */ 048 public static byte[] getBytesIso8859_1(String string) { 049 return StringUtils.getBytesUnchecked(string, CharEncoding.ISO_8859_1); 050 } 051 052 /** 053 * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte 054 * array. 055 * 056 * @param string 057 * the String to encode 058 * @return encoded bytes 059 * @throws IllegalStateException 060 * Thrown when the charset is missing, which should be never according the the Java specification. 061 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 062 * @see #getBytesUnchecked(String, String) 063 */ 064 public static byte[] getBytesUsAscii(String string) { 065 return StringUtils.getBytesUnchecked(string, CharEncoding.US_ASCII); 066 } 067 068 /** 069 * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte 070 * array. 071 * 072 * @param string 073 * the String to encode 074 * @return encoded bytes 075 * @throws IllegalStateException 076 * Thrown when the charset is missing, which should be never according the the Java specification. 077 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 078 * @see #getBytesUnchecked(String, String) 079 */ 080 public static byte[] getBytesUtf16(String string) { 081 return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16); 082 } 083 084 /** 085 * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte 086 * array. 087 * 088 * @param string 089 * the String to encode 090 * @return encoded bytes 091 * @throws IllegalStateException 092 * Thrown when the charset is missing, which should be never according the the Java specification. 093 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 094 * @see #getBytesUnchecked(String, String) 095 */ 096 public static byte[] getBytesUtf16Be(String string) { 097 return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16BE); 098 } 099 100 /** 101 * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte 102 * array. 103 * 104 * @param string 105 * the String to encode 106 * @return encoded bytes 107 * @throws IllegalStateException 108 * Thrown when the charset is missing, which should be never according the the Java specification. 109 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 110 * @see #getBytesUnchecked(String, String) 111 */ 112 public static byte[] getBytesUtf16Le(String string) { 113 return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16LE); 114 } 115 116 /** 117 * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte 118 * array. 119 * 120 * @param string 121 * the String to encode 122 * @return encoded bytes 123 * @throws IllegalStateException 124 * Thrown when the charset is missing, which should be never according the the Java specification. 125 * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 126 * @see #getBytesUnchecked(String, String) 127 */ 128 public static byte[] getBytesUtf8(String string) { 129 return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_8); 130 } 131 132 /** 133 * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte 134 * array. 135 * <p> 136 * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which 137 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE. 138 * </p> 139 * 140 * @param string 141 * the String to encode 142 * @param charsetName 143 * The name of a required {@link java.nio.charset.Charset} 144 * @return encoded bytes 145 * @throws IllegalStateException 146 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a 147 * required charset name. 148 * @see CharEncoding 149 * @see String#getBytes(String) 150 */ 151 public static byte[] getBytesUnchecked(String string, String charsetName) { 152 if (string == null) { 153 return null; 154 } 155 try { 156 return string.getBytes(charsetName); 157 } catch (UnsupportedEncodingException e) { 158 throw StringUtils.newIllegalStateException(charsetName, e); 159 } 160 } 161 162 private static IllegalStateException newIllegalStateException(String charsetName, UnsupportedEncodingException e) { 163 return new IllegalStateException(charsetName + ": " + e); 164 } 165 166 /** 167 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset. 168 * <p> 169 * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which 170 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE. 171 * </p> 172 * 173 * @param bytes 174 * The bytes to be decoded into characters 175 * @param charsetName 176 * The name of a required {@link java.nio.charset.Charset} 177 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset. 178 * @throws IllegalStateException 179 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a 180 * required charset name. 181 * @see CharEncoding 182 * @see String#String(byte[], String) 183 */ 184 public static String newString(byte[] bytes, String charsetName) { 185 if (bytes == null) { 186 return null; 187 } 188 try { 189 return new String(bytes, charsetName); 190 } catch (UnsupportedEncodingException e) { 191 throw StringUtils.newIllegalStateException(charsetName, e); 192 } 193 } 194 195 /** 196 * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset. 197 * 198 * @param bytes 199 * The bytes to be decoded into characters 200 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset. 201 * @throws IllegalStateException 202 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the 203 * charset is required. 204 */ 205 public static String newStringIso8859_1(byte[] bytes) { 206 return StringUtils.newString(bytes, CharEncoding.ISO_8859_1); 207 } 208 209 /** 210 * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset. 211 * 212 * @param bytes 213 * The bytes to be decoded into characters 214 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset. 215 * @throws IllegalStateException 216 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the 217 * charset is required. 218 */ 219 public static String newStringUsAscii(byte[] bytes) { 220 return StringUtils.newString(bytes, CharEncoding.US_ASCII); 221 } 222 223 /** 224 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset. 225 * 226 * @param bytes 227 * The bytes to be decoded into characters 228 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset. 229 * @throws IllegalStateException 230 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the 231 * charset is required. 232 */ 233 public static String newStringUtf16(byte[] bytes) { 234 return StringUtils.newString(bytes, CharEncoding.UTF_16); 235 } 236 237 /** 238 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset. 239 * 240 * @param bytes 241 * The bytes to be decoded into characters 242 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset. 243 * @throws IllegalStateException 244 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the 245 * charset is required. 246 */ 247 public static String newStringUtf16Be(byte[] bytes) { 248 return StringUtils.newString(bytes, CharEncoding.UTF_16BE); 249 } 250 251 /** 252 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset. 253 * 254 * @param bytes 255 * The bytes to be decoded into characters 256 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset. 257 * @throws IllegalStateException 258 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the 259 * charset is required. 260 */ 261 public static String newStringUtf16Le(byte[] bytes) { 262 return StringUtils.newString(bytes, CharEncoding.UTF_16LE); 263 } 264 265 /** 266 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset. 267 * 268 * @param bytes 269 * The bytes to be decoded into characters 270 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset. 271 * @throws IllegalStateException 272 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the 273 * charset is required. 274 */ 275 public static String newStringUtf8(byte[] bytes) { 276 return StringUtils.newString(bytes, CharEncoding.UTF_8); 277 } 278 279 }