001 /* 002 * Copyright (C) 2009 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017 package com.google.common.primitives; 018 019 import static com.google.common.base.Preconditions.checkArgument; 020 import static com.google.common.base.Preconditions.checkNotNull; 021 022 import com.google.common.annotations.VisibleForTesting; 023 024 import sun.misc.Unsafe; 025 026 import java.lang.reflect.Field; 027 import java.nio.ByteOrder; 028 import java.security.AccessController; 029 import java.security.PrivilegedAction; 030 import java.util.Comparator; 031 032 /** 033 * Static utility methods pertaining to {@code byte} primitives that interpret 034 * values as <i>unsigned</i> (that is, any negative value {@code b} is treated 035 * as the positive value {@code 256 + b}). The corresponding methods that treat 036 * the values as signed are found in {@link SignedBytes}, and the methods for 037 * which signedness is not an issue are in {@link Bytes}. 038 * 039 * @author Kevin Bourrillion 040 * @author Martin Buchholz 041 * @author Hiroshi Yamauchi 042 * @since 1.0 043 */ 044 public final class UnsignedBytes { 045 private UnsignedBytes() {} 046 047 /** 048 * The largest power of two that can be represented as an unsigned {@code byte}. 049 * 050 * @since 10.0 051 */ 052 public static final byte MAX_POWER_OF_TWO = (byte) (1 << 7); 053 054 /** 055 * Returns the value of the given byte as an integer, when treated as 056 * unsigned. That is, returns {@code value + 256} if {@code value} is 057 * negative; {@code value} itself otherwise. 058 * 059 * @since 6.0 060 */ 061 public static int toInt(byte value) { 062 return value & 0xFF; 063 } 064 065 /** 066 * Returns the {@code byte} value that, when treated as unsigned, is equal to 067 * {@code value}, if possible. 068 * 069 * @param value a value between 0 and 255 inclusive 070 * @return the {@code byte} value that, when treated as unsigned, equals 071 * {@code value} 072 * @throws IllegalArgumentException if {@code value} is negative or greater 073 * than 255 074 */ 075 public static byte checkedCast(long value) { 076 checkArgument(value >> 8 == 0, "out of range: %s", value); 077 return (byte) value; 078 } 079 080 /** 081 * Returns the {@code byte} value that, when treated as unsigned, is nearest 082 * in value to {@code value}. 083 * 084 * @param value any {@code long} value 085 * @return {@code (byte) 255} if {@code value >= 255}, {@code (byte) 0} if 086 * {@code value <= 0}, and {@code value} cast to {@code byte} otherwise 087 */ 088 public static byte saturatedCast(long value) { 089 if (value > 255) { 090 return (byte) 255; // -1 091 } 092 if (value < 0) { 093 return (byte) 0; 094 } 095 return (byte) value; 096 } 097 098 /** 099 * Compares the two specified {@code byte} values, treating them as unsigned 100 * values between 0 and 255 inclusive. For example, {@code (byte) -127} is 101 * considered greater than {@code (byte) 127} because it is seen as having 102 * the value of positive {@code 129}. 103 * 104 * @param a the first {@code byte} to compare 105 * @param b the second {@code byte} to compare 106 * @return a negative value if {@code a} is less than {@code b}; a positive 107 * value if {@code a} is greater than {@code b}; or zero if they are equal 108 */ 109 public static int compare(byte a, byte b) { 110 return toInt(a) - toInt(b); 111 } 112 113 /** 114 * Returns the least value present in {@code array}. 115 * 116 * @param array a <i>nonempty</i> array of {@code byte} values 117 * @return the value present in {@code array} that is less than or equal to 118 * every other value in the array 119 * @throws IllegalArgumentException if {@code array} is empty 120 */ 121 public static byte min(byte... array) { 122 checkArgument(array.length > 0); 123 int min = toInt(array[0]); 124 for (int i = 1; i < array.length; i++) { 125 int next = toInt(array[i]); 126 if (next < min) { 127 min = next; 128 } 129 } 130 return (byte) min; 131 } 132 133 /** 134 * Returns the greatest value present in {@code array}. 135 * 136 * @param array a <i>nonempty</i> array of {@code byte} values 137 * @return the value present in {@code array} that is greater than or equal 138 * to every other value in the array 139 * @throws IllegalArgumentException if {@code array} is empty 140 */ 141 public static byte max(byte... array) { 142 checkArgument(array.length > 0); 143 int max = toInt(array[0]); 144 for (int i = 1; i < array.length; i++) { 145 int next = toInt(array[i]); 146 if (next > max) { 147 max = next; 148 } 149 } 150 return (byte) max; 151 } 152 153 /** 154 * Returns a string containing the supplied {@code byte} values separated by 155 * {@code separator}. For example, {@code join(":", (byte) 1, (byte) 2, 156 * (byte) 255)} returns the string {@code "1:2:255"}. 157 * 158 * @param separator the text that should appear between consecutive values in 159 * the resulting string (but not at the start or end) 160 * @param array an array of {@code byte} values, possibly empty 161 */ 162 public static String join(String separator, byte... array) { 163 checkNotNull(separator); 164 if (array.length == 0) { 165 return ""; 166 } 167 168 // For pre-sizing a builder, just get the right order of magnitude 169 StringBuilder builder = new StringBuilder(array.length * 5); 170 builder.append(toInt(array[0])); 171 for (int i = 1; i < array.length; i++) { 172 builder.append(separator).append(toInt(array[i])); 173 } 174 return builder.toString(); 175 } 176 177 /** 178 * Returns a comparator that compares two {@code byte} arrays 179 * lexicographically. That is, it compares, using {@link 180 * #compare(byte, byte)}), the first pair of values that follow any common 181 * prefix, or when one array is a prefix of the other, treats the shorter 182 * array as the lesser. For example, {@code [] < [0x01] < [0x01, 0x7F] < 183 * [0x01, 0x80] < [0x02]}. Values are treated as unsigned. 184 * 185 * <p>The returned comparator is inconsistent with {@link 186 * Object#equals(Object)} (since arrays support only identity equality), but 187 * it is consistent with {@link java.util.Arrays#equals(byte[], byte[])}. 188 * 189 * @see <a href="http://en.wikipedia.org/wiki/Lexicographical_order"> 190 * Lexicographical order article at Wikipedia</a> 191 * @since 2.0 192 */ 193 public static Comparator<byte[]> lexicographicalComparator() { 194 return LexicographicalComparatorHolder.BEST_COMPARATOR; 195 } 196 197 @VisibleForTesting 198 static Comparator<byte[]> lexicographicalComparatorJavaImpl() { 199 return LexicographicalComparatorHolder.PureJavaComparator.INSTANCE; 200 } 201 202 /** 203 * Provides a lexicographical comparator implementation; either a Java 204 * implementation or a faster implementation based on {@link Unsafe}. 205 * 206 * <p>Uses reflection to gracefully fall back to the Java implementation if 207 * {@code Unsafe} isn't available. 208 */ 209 @VisibleForTesting 210 static class LexicographicalComparatorHolder { 211 static final String UNSAFE_COMPARATOR_NAME = 212 LexicographicalComparatorHolder.class.getName() + "$UnsafeComparator"; 213 214 static final Comparator<byte[]> BEST_COMPARATOR = getBestComparator(); 215 216 @VisibleForTesting 217 enum UnsafeComparator implements Comparator<byte[]> { 218 INSTANCE; 219 220 static final boolean littleEndian = 221 ByteOrder.nativeOrder().equals(ByteOrder.LITTLE_ENDIAN); 222 223 /* 224 * The following static final fields exist for performance reasons. 225 * 226 * In UnsignedBytesBenchmark, accessing the following objects via static 227 * final fields is the fastest (more than twice as fast as the Java 228 * implementation, vs ~1.5x with non-final static fields, on x86_32) 229 * under the Hotspot server compiler. The reason is obviously that the 230 * non-final fields need to be reloaded inside the loop. 231 * 232 * And, no, defining (final or not) local variables out of the loop still 233 * isn't as good because the null check on the theUnsafe object remains 234 * inside the loop and BYTE_ARRAY_BASE_OFFSET doesn't get 235 * constant-folded. 236 * 237 * The compiler can treat static final fields as compile-time constants 238 * and can constant-fold them while (final or not) local variables are 239 * run time values. 240 */ 241 242 static final Unsafe theUnsafe; 243 244 /** The offset to the first element in a byte array. */ 245 static final int BYTE_ARRAY_BASE_OFFSET; 246 247 static { 248 theUnsafe = (Unsafe) AccessController.doPrivileged( 249 new PrivilegedAction<Object>() { 250 @Override 251 public Object run() { 252 try { 253 Field f = Unsafe.class.getDeclaredField("theUnsafe"); 254 f.setAccessible(true); 255 return f.get(null); 256 } catch (NoSuchFieldException e) { 257 // It doesn't matter what we throw; 258 // it's swallowed in getBestComparator(). 259 throw new Error(); 260 } catch (IllegalAccessException e) { 261 throw new Error(); 262 } 263 } 264 }); 265 266 BYTE_ARRAY_BASE_OFFSET = theUnsafe.arrayBaseOffset(byte[].class); 267 268 // sanity check - this should never fail 269 if (theUnsafe.arrayIndexScale(byte[].class) != 1) { 270 throw new AssertionError(); 271 } 272 } 273 274 @Override public int compare(byte[] left, byte[] right) { 275 int minLength = Math.min(left.length, right.length); 276 int minWords = minLength / Longs.BYTES; 277 278 /* 279 * Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a 280 * time is no slower than comparing 4 bytes at a time even on 32-bit. 281 * On the other hand, it is substantially faster on 64-bit. 282 */ 283 for (int i = 0; i < minWords * Longs.BYTES; i += Longs.BYTES) { 284 long lw = theUnsafe.getLong(left, BYTE_ARRAY_BASE_OFFSET + (long) i); 285 long rw = theUnsafe.getLong(right, BYTE_ARRAY_BASE_OFFSET + (long) i); 286 long diff = lw ^ rw; 287 288 if (diff != 0) { 289 if (!littleEndian) { 290 return UnsignedLongs.compare(lw, rw); 291 } 292 293 // Use binary search 294 int n = 0; 295 int y; 296 int x = (int) diff; 297 if (x == 0) { 298 x = (int) (diff >>> 32); 299 n = 32; 300 } 301 302 y = x << 16; 303 if (y == 0) { 304 n += 16; 305 } else { 306 x = y; 307 } 308 309 y = x << 8; 310 if (y == 0) { 311 n += 8; 312 } 313 return (int) (((lw >>> n) & 0xFFL) - ((rw >>> n) & 0xFFL)); 314 } 315 } 316 317 // The epilogue to cover the last (minLength % 8) elements. 318 for (int i = minWords * Longs.BYTES; i < minLength; i++) { 319 int result = UnsignedBytes.compare(left[i], right[i]); 320 if (result != 0) { 321 return result; 322 } 323 } 324 return left.length - right.length; 325 } 326 } 327 328 enum PureJavaComparator implements Comparator<byte[]> { 329 INSTANCE; 330 331 @Override public int compare(byte[] left, byte[] right) { 332 int minLength = Math.min(left.length, right.length); 333 for (int i = 0; i < minLength; i++) { 334 int result = UnsignedBytes.compare(left[i], right[i]); 335 if (result != 0) { 336 return result; 337 } 338 } 339 return left.length - right.length; 340 } 341 } 342 343 /** 344 * Returns the Unsafe-using Comparator, or falls back to the pure-Java 345 * implementation if unable to do so. 346 */ 347 static Comparator<byte[]> getBestComparator() { 348 try { 349 Class<?> theClass = Class.forName(UNSAFE_COMPARATOR_NAME); 350 351 // yes, UnsafeComparator does implement Comparator<byte[]> 352 @SuppressWarnings("unchecked") 353 Comparator<byte[]> comparator = 354 (Comparator<byte[]>) theClass.getEnumConstants()[0]; 355 return comparator; 356 } catch (Throwable t) { // ensure we really catch *everything* 357 return lexicographicalComparatorJavaImpl(); 358 } 359 } 360 } 361 } 362