001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.codec.binary;
019    
020    import org.apache.commons.codec.BinaryDecoder;
021    import org.apache.commons.codec.BinaryEncoder;
022    import org.apache.commons.codec.DecoderException;
023    import org.apache.commons.codec.EncoderException;
024    
025    /**
026     * Translates between byte arrays and strings of "0"s and "1"s.
027     * 
028     * TODO: may want to add more bit vector functions like and/or/xor/nand 
029     * TODO: also might be good to generate boolean[] from byte[] et. cetera.
030     * 
031     * @author Apache Software Foundation
032     * @since 1.3
033     * @version $Id: BinaryCodec.java 798433 2009-07-28 07:53:10Z ggregory $
034     */
035    public class BinaryCodec implements BinaryDecoder, BinaryEncoder {
036        /*
037         * tried to avoid using ArrayUtils to minimize dependencies while using these empty arrays - dep is just not worth
038         * it.
039         */
040        /** Empty char array. */
041        private static final char[] EMPTY_CHAR_ARRAY = new char[0];
042    
043        /** Empty byte array. */
044        private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
045    
046        /** Mask for bit 0 of a byte. */
047        private static final int BIT_0 = 1;
048    
049        /** Mask for bit 1 of a byte. */
050        private static final int BIT_1 = 0x02;
051    
052        /** Mask for bit 2 of a byte. */
053        private static final int BIT_2 = 0x04;
054    
055        /** Mask for bit 3 of a byte. */
056        private static final int BIT_3 = 0x08;
057    
058        /** Mask for bit 4 of a byte. */
059        private static final int BIT_4 = 0x10;
060    
061        /** Mask for bit 5 of a byte. */
062        private static final int BIT_5 = 0x20;
063    
064        /** Mask for bit 6 of a byte. */
065        private static final int BIT_6 = 0x40;
066    
067        /** Mask for bit 7 of a byte. */
068        private static final int BIT_7 = 0x80;
069    
070        private static final int[] BITS = {BIT_0, BIT_1, BIT_2, BIT_3, BIT_4, BIT_5, BIT_6, BIT_7};
071    
072        /**
073         * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
074         * 
075         * @param raw
076         *                  the raw binary data to convert
077         * @return 0 and 1 ASCII character bytes one for each bit of the argument
078         * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
079         */
080        public byte[] encode(byte[] raw) {
081            return toAsciiBytes(raw);
082        }
083    
084        /**
085         * Converts an array of raw binary data into an array of ASCII 0 and 1 chars.
086         * 
087         * @param raw
088         *                  the raw binary data to convert
089         * @return 0 and 1 ASCII character chars one for each bit of the argument
090         * @throws EncoderException
091         *                  if the argument is not a byte[]
092         * @see org.apache.commons.codec.Encoder#encode(Object)
093         */
094        public Object encode(Object raw) throws EncoderException {
095            if (!(raw instanceof byte[])) {
096                throw new EncoderException("argument not a byte array");
097            }
098            return toAsciiChars((byte[]) raw);
099        }
100    
101        /**
102         * Decodes a byte array where each byte represents an ASCII '0' or '1'.
103         * 
104         * @param ascii
105         *                  each byte represents an ASCII '0' or '1'
106         * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
107         * @throws DecoderException
108         *                  if argument is not a byte[], char[] or String
109         * @see org.apache.commons.codec.Decoder#decode(Object)
110         */
111        public Object decode(Object ascii) throws DecoderException {
112            if (ascii == null) {
113                return EMPTY_BYTE_ARRAY;
114            }
115            if (ascii instanceof byte[]) {
116                return fromAscii((byte[]) ascii);
117            }
118            if (ascii instanceof char[]) {
119                return fromAscii((char[]) ascii);
120            }
121            if (ascii instanceof String) {
122                return fromAscii(((String) ascii).toCharArray());
123            }
124            throw new DecoderException("argument not a byte array");
125        }
126    
127        /**
128         * Decodes a byte array where each byte represents an ASCII '0' or '1'.
129         * 
130         * @param ascii
131         *                  each byte represents an ASCII '0' or '1'
132         * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
133         * @see org.apache.commons.codec.Decoder#decode(Object)
134         */
135        public byte[] decode(byte[] ascii) {
136            return fromAscii(ascii);
137        }
138    
139        /**
140         * Decodes a String where each char of the String represents an ASCII '0' or '1'.
141         * 
142         * @param ascii
143         *                  String of '0' and '1' characters
144         * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
145         * @see org.apache.commons.codec.Decoder#decode(Object)
146         */
147        public byte[] toByteArray(String ascii) {
148            if (ascii == null) {
149                return EMPTY_BYTE_ARRAY;
150            }
151            return fromAscii(ascii.toCharArray());
152        }
153    
154        // ------------------------------------------------------------------------
155        //
156        // static codec operations
157        //
158        // ------------------------------------------------------------------------
159        /**
160         * Decodes a char array where each char represents an ASCII '0' or '1'.
161         * 
162         * @param ascii
163         *                  each char represents an ASCII '0' or '1'
164         * @return the raw encoded binary where each bit corresponds to a char in the char array argument
165         */
166        public static byte[] fromAscii(char[] ascii) {
167            if (ascii == null || ascii.length == 0) {
168                return EMPTY_BYTE_ARRAY;
169            }
170            // get length/8 times bytes with 3 bit shifts to the right of the length
171            byte[] l_raw = new byte[ascii.length >> 3];
172            /*
173             * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
174             * loop.
175             */
176            for (int ii = 0, jj = ascii.length - 1; ii < l_raw.length; ii++, jj -= 8) {
177                for (int bits = 0; bits < BITS.length; ++bits) {
178                    if (ascii[jj - bits] == '1') {
179                        l_raw[ii] |= BITS[bits];
180                    }
181                }
182            }
183            return l_raw;
184        }
185    
186        /**
187         * Decodes a byte array where each byte represents an ASCII '0' or '1'.
188         * 
189         * @param ascii
190         *                  each byte represents an ASCII '0' or '1'
191         * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
192         */
193        public static byte[] fromAscii(byte[] ascii) {
194            if (isEmpty(ascii)) {
195                return EMPTY_BYTE_ARRAY;
196            }
197            // get length/8 times bytes with 3 bit shifts to the right of the length
198            byte[] l_raw = new byte[ascii.length >> 3];
199            /*
200             * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
201             * loop.
202             */
203            for (int ii = 0, jj = ascii.length - 1; ii < l_raw.length; ii++, jj -= 8) {
204                for (int bits = 0; bits < BITS.length; ++bits) {
205                    if (ascii[jj - bits] == '1') {
206                        l_raw[ii] |= BITS[bits];
207                    }
208                }
209            }
210            return l_raw;
211        }
212    
213        /**
214         * Returns <code>true</code> if the given array is <code>null</code> or empty (size 0.)
215         * 
216         * @param array
217         *            the source array
218         * @return <code>true</code> if the given array is <code>null</code> or empty (size 0.)
219         */
220        private static boolean isEmpty(byte[] array) {
221            return array == null || array.length == 0;
222        }
223    
224        /**
225         * Converts an array of raw binary data into an array of ASCII 0 and 1 character bytes - each byte is a truncated
226         * char.
227         * 
228         * @param raw
229         *                  the raw binary data to convert
230         * @return an array of 0 and 1 character bytes for each bit of the argument
231         * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
232         */
233        public static byte[] toAsciiBytes(byte[] raw) {
234            if (isEmpty(raw)) {
235                return EMPTY_BYTE_ARRAY;
236            }
237            // get 8 times the bytes with 3 bit shifts to the left of the length
238            byte[] l_ascii = new byte[raw.length << 3];
239            /*
240             * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
241             * loop.
242             */
243            for (int ii = 0, jj = l_ascii.length - 1; ii < raw.length; ii++, jj -= 8) {
244                for (int bits = 0; bits < BITS.length; ++bits) {
245                    if ((raw[ii] & BITS[bits]) == 0) {
246                        l_ascii[jj - bits] = '0';
247                    } else {
248                        l_ascii[jj - bits] = '1';
249                    }
250                }
251            }
252            return l_ascii;
253        }
254    
255        /**
256         * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
257         * 
258         * @param raw
259         *                  the raw binary data to convert
260         * @return an array of 0 and 1 characters for each bit of the argument
261         * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
262         */
263        public static char[] toAsciiChars(byte[] raw) {
264            if (isEmpty(raw)) {
265                return EMPTY_CHAR_ARRAY;
266            }
267            // get 8 times the bytes with 3 bit shifts to the left of the length
268            char[] l_ascii = new char[raw.length << 3];
269            /*
270             * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
271             * loop.
272             */
273            for (int ii = 0, jj = l_ascii.length - 1; ii < raw.length; ii++, jj -= 8) {
274                for (int bits = 0; bits < BITS.length; ++bits) {
275                    if ((raw[ii] & BITS[bits]) == 0) {
276                        l_ascii[jj - bits] = '0';
277                    } else {
278                        l_ascii[jj - bits] = '1';
279                    }
280                }
281            }
282            return l_ascii;
283        }
284    
285        /**
286         * Converts an array of raw binary data into a String of ASCII 0 and 1 characters.
287         * 
288         * @param raw
289         *                  the raw binary data to convert
290         * @return a String of 0 and 1 characters representing the binary data
291         * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
292         */
293        public static String toAsciiString(byte[] raw) {
294            return new String(toAsciiChars(raw));
295        }
296    }