2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org
.apache
.hadoop
.hbase
.util
;
20 import static org
.apache
.hbase
.thirdparty
.com
.google
.common
.base
.Preconditions
.checkArgument
;
21 import static org
.apache
.hbase
.thirdparty
.com
.google
.common
.base
.Preconditions
.checkNotNull
;
22 import static org
.apache
.hbase
.thirdparty
.com
.google
.common
.base
.Preconditions
.checkPositionIndex
;
24 import java
.io
.DataInput
;
25 import java
.io
.DataOutput
;
26 import java
.io
.IOException
;
27 import java
.io
.UnsupportedEncodingException
;
28 import java
.math
.BigDecimal
;
29 import java
.math
.BigInteger
;
30 import java
.nio
.ByteBuffer
;
31 import java
.nio
.charset
.StandardCharsets
;
32 import java
.security
.SecureRandom
;
33 import java
.util
.ArrayList
;
34 import java
.util
.Arrays
;
35 import java
.util
.Collection
;
36 import java
.util
.Collections
;
37 import java
.util
.Comparator
;
38 import java
.util
.Iterator
;
39 import java
.util
.List
;
41 import org
.apache
.hadoop
.hbase
.Cell
;
42 import org
.apache
.hadoop
.hbase
.CellComparator
;
43 import org
.apache
.hadoop
.io
.RawComparator
;
44 import org
.apache
.hadoop
.io
.WritableComparator
;
45 import org
.apache
.hadoop
.io
.WritableUtils
;
46 import org
.apache
.yetus
.audience
.InterfaceAudience
;
47 import org
.slf4j
.Logger
;
48 import org
.slf4j
.LoggerFactory
;
50 import sun
.misc
.Unsafe
;
52 import org
.apache
.hbase
.thirdparty
.com
.google
.common
.annotations
.VisibleForTesting
;
53 import org
.apache
.hbase
.thirdparty
.org
.apache
.commons
.collections4
.CollectionUtils
;
56 * Utility class that handles byte arrays, conversions to/from other types,
57 * comparisons, hash code generation, manufacturing keys for HashMaps or
58 * HashSets, and can be used as key in maps or trees.
60 @SuppressWarnings("restriction")
61 @InterfaceAudience.Public
62 @edu.umd
.cs
.findbugs
.annotations
.SuppressWarnings(
63 value
="EQ_CHECK_FOR_OPERAND_NOT_COMPATIBLE_WITH_THIS",
64 justification
="It has been like this forever")
65 public class Bytes
implements Comparable
<Bytes
> {
67 // Using the charset canonical name for String/byte[] conversions is much
68 // more efficient due to use of cached encoders/decoders.
69 private static final String UTF8_CSN
= StandardCharsets
.UTF_8
.name();
71 //HConstants.EMPTY_BYTE_ARRAY should be updated if this changed
72 private static final byte [] EMPTY_BYTE_ARRAY
= new byte [0];
74 private static final Logger LOG
= LoggerFactory
.getLogger(Bytes
.class);
77 * Size of boolean in bytes
79 public static final int SIZEOF_BOOLEAN
= Byte
.SIZE
/ Byte
.SIZE
;
82 * Size of byte in bytes
84 public static final int SIZEOF_BYTE
= SIZEOF_BOOLEAN
;
87 * Size of char in bytes
89 public static final int SIZEOF_CHAR
= Character
.SIZE
/ Byte
.SIZE
;
92 * Size of double in bytes
94 public static final int SIZEOF_DOUBLE
= Double
.SIZE
/ Byte
.SIZE
;
97 * Size of float in bytes
99 public static final int SIZEOF_FLOAT
= Float
.SIZE
/ Byte
.SIZE
;
102 * Size of int in bytes
104 public static final int SIZEOF_INT
= Integer
.SIZE
/ Byte
.SIZE
;
107 * Size of long in bytes
109 public static final int SIZEOF_LONG
= Long
.SIZE
/ Byte
.SIZE
;
112 * Size of short in bytes
114 public static final int SIZEOF_SHORT
= Short
.SIZE
/ Byte
.SIZE
;
117 * Mask to apply to a long to reveal the lower int only. Use like this:
118 * int i = (int)(0xFFFFFFFF00000000L ^ some_long_value);
120 public static final long MASK_FOR_LOWER_INT_IN_LONG
= 0xFFFFFFFF00000000L
;
123 * Estimate of size cost to pay beyond payload in jvm for instance of byte [].
124 * Estimate based on study of jhat and jprofiler numbers.
126 // JHat says BU is 56 bytes.
127 // SizeOf which uses java.lang.instrument says 24 bytes. (3 longs?)
128 public static final int ESTIMATED_HEAP_TAX
= 16;
131 static final boolean UNSAFE_UNALIGNED
= UnsafeAvailChecker
.unaligned();
134 * Returns length of the byte array, returning 0 if the array is null.
135 * Useful for calculating sizes.
136 * @param b byte array, which can be null
137 * @return 0 if b is null, otherwise returns length
139 final public static int len(byte[] b
) {
140 return b
== null ?
0 : b
.length
;
143 private byte[] bytes
;
148 * Create a zero-size sequence.
155 * Create a Bytes using the byte array as the initial value.
156 * @param bytes This array becomes the backing storage for the object.
158 public Bytes(byte[] bytes
) {
159 this(bytes
, 0, bytes
.length
);
163 * Set the new Bytes to the contents of the passed
165 * @param ibw the value to set this Bytes to.
167 public Bytes(final Bytes ibw
) {
168 this(ibw
.get(), ibw
.getOffset(), ibw
.getLength());
172 * Set the value to a given byte range
173 * @param bytes the new byte range to set to
174 * @param offset the offset in newData to start at
175 * @param length the number of bytes in the range
177 public Bytes(final byte[] bytes
, final int offset
,
180 this.offset
= offset
;
181 this.length
= length
;
185 * Get the data from the Bytes.
186 * @return The data is only valid between offset and offset+length.
188 public byte [] get() {
189 if (this.bytes
== null) {
190 throw new IllegalStateException("Uninitialiized. Null constructor " +
191 "called w/o accompaying readFields invocation");
197 * @param b Use passed bytes as backing array for this instance.
199 public void set(final byte [] b
) {
204 * @param b Use passed bytes as backing array for this instance.
208 public void set(final byte [] b
, final int offset
, final int length
) {
210 this.offset
= offset
;
211 this.length
= length
;
215 * @return the number of valid bytes in the buffer
217 public int getLength() {
218 if (this.bytes
== null) {
219 throw new IllegalStateException("Uninitialiized. Null constructor " +
220 "called w/o accompaying readFields invocation");
228 public int getOffset(){
233 public int hashCode() {
234 return Bytes
.hashCode(bytes
, offset
, length
);
238 * Define the sort order of the Bytes.
239 * @param that The other bytes writable
240 * @return Positive if left is bigger than right, 0 if they are equal, and
241 * negative if left is smaller than right.
244 public int compareTo(Bytes that
) {
245 return BYTES_RAWCOMPARATOR
.compare(
246 this.bytes
, this.offset
, this.length
,
247 that
.bytes
, that
.offset
, that
.length
);
251 * Compares the bytes in this object to the specified byte array
253 * @return Positive if left is bigger than right, 0 if they are equal, and
254 * negative if left is smaller than right.
256 public int compareTo(final byte [] that
) {
257 return BYTES_RAWCOMPARATOR
.compare(
258 this.bytes
, this.offset
, this.length
,
259 that
, 0, that
.length
);
263 * @see Object#equals(Object)
266 public boolean equals(Object right_obj
) {
267 if (right_obj
instanceof byte []) {
268 return compareTo((byte [])right_obj
) == 0;
270 if (right_obj
instanceof Bytes
) {
271 return compareTo((Bytes
)right_obj
) == 0;
277 * @see Object#toString()
280 public String
toString() {
281 return Bytes
.toString(bytes
, offset
, length
);
285 * @param array List of byte [].
286 * @return Array of byte [].
288 public static byte [][] toArray(final List
<byte []> array
) {
289 // List#toArray doesn't work on lists of byte [].
290 byte[][] results
= new byte[array
.size()][];
291 for (int i
= 0; i
< array
.size(); i
++) {
292 results
[i
] = array
.get(i
);
298 * Returns a copy of the bytes referred to by this writable
300 public byte[] copyBytes() {
301 return Arrays
.copyOfRange(bytes
, offset
, offset
+length
);
304 * Byte array comparator class.
306 @InterfaceAudience.Public
307 public static class ByteArrayComparator
implements RawComparator
<byte []> {
311 public ByteArrayComparator() {
315 public int compare(byte [] left
, byte [] right
) {
316 return compareTo(left
, right
);
319 public int compare(byte [] b1
, int s1
, int l1
, byte [] b2
, int s2
, int l2
) {
320 return LexicographicalComparerHolder
.BEST_COMPARER
.
321 compareTo(b1
, s1
, l1
, b2
, s2
, l2
);
326 * A {@link ByteArrayComparator} that treats the empty array as the largest value.
327 * This is useful for comparing row end keys for regions.
329 // TODO: unfortunately, HBase uses byte[0] as both start and end keys for region
330 // boundaries. Thus semantically, we should treat empty byte array as the smallest value
331 // while comparing row keys, start keys etc; but as the largest value for comparing
332 // region boundaries for endKeys.
333 @InterfaceAudience.Public
334 public static class RowEndKeyComparator
extends ByteArrayComparator
{
336 public int compare(byte[] left
, byte[] right
) {
337 return compare(left
, 0, left
.length
, right
, 0, right
.length
);
340 public int compare(byte[] b1
, int s1
, int l1
, byte[] b2
, int s2
, int l2
) {
341 if (b1
== b2
&& s1
== s2
&& l1
== l2
) {
345 return l2
; //0 or positive
350 return super.compare(b1
, s1
, l1
, b2
, s2
, l2
);
355 * Pass this to TreeMaps where byte [] are keys.
357 public final static Comparator
<byte []> BYTES_COMPARATOR
= new ByteArrayComparator();
360 * Use comparing byte arrays, byte-by-byte
362 public final static RawComparator
<byte []> BYTES_RAWCOMPARATOR
= new ByteArrayComparator();
365 * Read byte-array written with a WritableableUtils.vint prefix.
366 * @param in Input to read from.
367 * @return byte array read off <code>in</code>
368 * @throws IOException e
370 public static byte [] readByteArray(final DataInput in
)
372 int len
= WritableUtils
.readVInt(in
);
374 throw new NegativeArraySizeException(Integer
.toString(len
));
376 byte [] result
= new byte[len
];
377 in
.readFully(result
, 0, len
);
382 * Read byte-array written with a WritableableUtils.vint prefix.
383 * IOException is converted to a RuntimeException.
384 * @param in Input to read from.
385 * @return byte array read off <code>in</code>
387 public static byte [] readByteArrayThrowsRuntime(final DataInput in
) {
389 return readByteArray(in
);
390 } catch (Exception e
) {
391 throw new RuntimeException(e
);
396 * Write byte-array with a WritableableUtils.vint prefix.
397 * @param out output stream to be written to
398 * @param b array to write
399 * @throws IOException e
401 public static void writeByteArray(final DataOutput out
, final byte [] b
)
404 WritableUtils
.writeVInt(out
, 0);
406 writeByteArray(out
, b
, 0, b
.length
);
411 * Write byte-array to out with a vint length prefix.
412 * @param out output stream
414 * @param offset offset into array
415 * @param length length past offset
416 * @throws IOException e
418 public static void writeByteArray(final DataOutput out
, final byte [] b
,
419 final int offset
, final int length
)
421 WritableUtils
.writeVInt(out
, length
);
422 out
.write(b
, offset
, length
);
426 * Write byte-array from src to tgt with a vint length prefix.
427 * @param tgt target array
428 * @param tgtOffset offset into target array
429 * @param src source array
430 * @param srcOffset source offset
431 * @param srcLength source length
432 * @return New offset in src array.
434 public static int writeByteArray(final byte [] tgt
, final int tgtOffset
,
435 final byte [] src
, final int srcOffset
, final int srcLength
) {
436 byte [] vint
= vintToBytes(srcLength
);
437 System
.arraycopy(vint
, 0, tgt
, tgtOffset
, vint
.length
);
438 int offset
= tgtOffset
+ vint
.length
;
439 System
.arraycopy(src
, srcOffset
, tgt
, offset
, srcLength
);
440 return offset
+ srcLength
;
444 * Put bytes at the specified byte array position.
445 * @param tgtBytes the byte array
446 * @param tgtOffset position in the array
447 * @param srcBytes array to write out
448 * @param srcOffset source offset
449 * @param srcLength source length
450 * @return incremented offset
452 public static int putBytes(byte[] tgtBytes
, int tgtOffset
, byte[] srcBytes
,
453 int srcOffset
, int srcLength
) {
454 System
.arraycopy(srcBytes
, srcOffset
, tgtBytes
, tgtOffset
, srcLength
);
455 return tgtOffset
+ srcLength
;
459 * Write a single byte out to the specified byte array position.
460 * @param bytes the byte array
461 * @param offset position in the array
462 * @param b byte to write out
463 * @return incremented offset
465 public static int putByte(byte[] bytes
, int offset
, byte b
) {
471 * Add the whole content of the ByteBuffer to the bytes arrays. The ByteBuffer is modified.
472 * @param bytes the byte array
473 * @param offset position in the array
474 * @param buf ByteBuffer to write out
475 * @return incremented offset
477 public static int putByteBuffer(byte[] bytes
, int offset
, ByteBuffer buf
) {
478 int len
= buf
.remaining();
479 buf
.get(bytes
, offset
, len
);
484 * Returns a new byte array, copied from the given {@code buf},
485 * from the index 0 (inclusive) to the limit (exclusive),
486 * regardless of the current position.
487 * The position and the other index parameters are not changed.
489 * @param buf a byte buffer
490 * @return the byte array
491 * @see #getBytes(ByteBuffer)
493 public static byte[] toBytes(ByteBuffer buf
) {
494 ByteBuffer dup
= buf
.duplicate();
496 return readBytes(dup
);
499 private static byte[] readBytes(ByteBuffer buf
) {
500 byte [] result
= new byte[buf
.remaining()];
506 * @param b Presumed UTF-8 encoded byte array.
507 * @return String made from <code>b</code>
509 public static String
toString(final byte [] b
) {
513 return toString(b
, 0, b
.length
);
517 * Joins two byte arrays together using a separator.
518 * @param b1 The first byte array.
519 * @param sep The separator to use.
520 * @param b2 The second byte array.
522 public static String
toString(final byte [] b1
,
525 return toString(b1
, 0, b1
.length
) + sep
+ toString(b2
, 0, b2
.length
);
529 * This method will convert utf8 encoded bytes into a string. If
530 * the given byte array is null, this method will return null.
532 * @param b Presumed UTF-8 encoded byte array.
533 * @param off offset into array
534 * @return String made from <code>b</code> or null
536 public static String
toString(final byte[] b
, int off
) {
540 int len
= b
.length
- off
;
545 return new String(b
, off
, len
, UTF8_CSN
);
546 } catch (UnsupportedEncodingException e
) {
547 // should never happen!
548 throw new IllegalArgumentException("UTF8 encoding is not supported", e
);
553 * This method will convert utf8 encoded bytes into a string. If
554 * the given byte array is null, this method will return null.
556 * @param b Presumed UTF-8 encoded byte array.
557 * @param off offset into array
558 * @param len length of utf-8 sequence
559 * @return String made from <code>b</code> or null
561 public static String
toString(final byte[] b
, int off
, int len
) {
569 return new String(b
, off
, len
, UTF8_CSN
);
570 } catch (UnsupportedEncodingException e
) {
571 // should never happen!
572 throw new IllegalArgumentException("UTF8 encoding is not supported", e
);
577 * Write a printable representation of a byte array.
579 * @param b byte array
581 * @see #toStringBinary(byte[], int, int)
583 public static String
toStringBinary(final byte [] b
) {
586 return toStringBinary(b
, 0, b
.length
);
590 * Converts the given byte buffer to a printable representation,
591 * from the index 0 (inclusive) to the limit (exclusive),
592 * regardless of the current position.
593 * The position and the other index parameters are not changed.
595 * @param buf a byte buffer
596 * @return a string representation of the buffer's binary contents
597 * @see #toBytes(ByteBuffer)
598 * @see #getBytes(ByteBuffer)
600 public static String
toStringBinary(ByteBuffer buf
) {
603 if (buf
.hasArray()) {
604 return toStringBinary(buf
.array(), buf
.arrayOffset(), buf
.limit());
606 return toStringBinary(toBytes(buf
));
609 private static final char[] HEX_CHARS_UPPER
= {
610 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
614 * Write a printable representation of a byte array. Non-printable
615 * characters are hex escaped in the format \\x%02X, eg:
618 * @param b array to write out
619 * @param off offset to start at
620 * @param len length to write
621 * @return string output
623 public static String
toStringBinary(final byte [] b
, int off
, int len
) {
624 StringBuilder result
= new StringBuilder();
625 // Just in case we are passed a 'len' that is > buffer length...
626 if (off
>= b
.length
) return result
.toString();
627 if (off
+ len
> b
.length
) len
= b
.length
- off
;
628 for (int i
= off
; i
< off
+ len
; ++i
) {
629 int ch
= b
[i
] & 0xFF;
630 if (ch
>= ' ' && ch
<= '~' && ch
!= '\\') {
631 result
.append((char)ch
);
633 result
.append("\\x");
634 result
.append(HEX_CHARS_UPPER
[ch
/ 0x10]);
635 result
.append(HEX_CHARS_UPPER
[ch
% 0x10]);
638 return result
.toString();
641 private static boolean isHexDigit(char c
) {
643 (c
>= 'A' && c
<= 'F') ||
644 (c
>= '0' && c
<= '9');
648 * Takes a ASCII digit in the range A-F0-9 and returns
649 * the corresponding integer/ordinal value.
650 * @param ch The hex digit.
651 * @return The converted hex value as a byte.
653 public static byte toBinaryFromHex(byte ch
) {
654 if (ch
>= 'A' && ch
<= 'F')
655 return (byte) ((byte)10 + (byte) (ch
- 'A'));
657 return (byte) (ch
- '0');
660 public static byte [] toBytesBinary(String in
) {
661 // this may be bigger than we need, but let's be safe.
662 byte [] b
= new byte[in
.length()];
664 for (int i
= 0; i
< in
.length(); ++i
) {
665 char ch
= in
.charAt(i
);
666 if (ch
== '\\' && in
.length() > i
+1 && in
.charAt(i
+1) == 'x') {
667 // ok, take next 2 hex digits.
668 char hd1
= in
.charAt(i
+2);
669 char hd2
= in
.charAt(i
+3);
671 // they need to be A-F0-9:
672 if (!isHexDigit(hd1
) ||
674 // bogus escape code, ignore:
677 // turn hex ASCII digit -> number
678 byte d
= (byte) ((toBinaryFromHex((byte)hd1
) << 4) + toBinaryFromHex((byte)hd2
));
683 b
[size
++] = (byte) ch
;
687 byte [] b2
= new byte[size
];
688 System
.arraycopy(b
, 0, b2
, 0, size
);
693 * Converts a string to a UTF-8 byte array.
695 * @return the byte array
697 public static byte[] toBytes(String s
) {
699 return s
.getBytes(UTF8_CSN
);
700 } catch (UnsupportedEncodingException e
) {
701 // should never happen!
702 throw new IllegalArgumentException("UTF8 decoding is not supported", e
);
707 * Convert a boolean to a byte array. True becomes -1
708 * and false becomes 0.
711 * @return <code>b</code> encoded in a byte array.
713 public static byte [] toBytes(final boolean b
) {
714 return new byte[] { b ?
(byte) -1 : (byte) 0 };
718 * Reverses {@link #toBytes(boolean)}
720 * @return True or false.
722 public static boolean toBoolean(final byte [] b
) {
724 throw new IllegalArgumentException("Array has wrong size: " + b
.length
);
726 return b
[0] != (byte) 0;
730 * Convert a long value to a byte array using big-endian.
732 * @param val value to convert
733 * @return the byte array
735 public static byte[] toBytes(long val
) {
736 byte [] b
= new byte[8];
737 for (int i
= 7; i
> 0; i
--) {
746 * Converts a byte array to a long value. Reverses
747 * {@link #toBytes(long)}
749 * @return the long value
751 public static long toLong(byte[] bytes
) {
752 return toLong(bytes
, 0, SIZEOF_LONG
);
756 * Converts a byte array to a long value. Assumes there will be
757 * {@link #SIZEOF_LONG} bytes available.
760 * @param offset offset
761 * @return the long value
763 public static long toLong(byte[] bytes
, int offset
) {
764 return toLong(bytes
, offset
, SIZEOF_LONG
);
768 * Converts a byte array to a long value.
770 * @param bytes array of bytes
771 * @param offset offset into array
772 * @param length length of data (must be {@link #SIZEOF_LONG})
773 * @return the long value
774 * @throws IllegalArgumentException if length is not {@link #SIZEOF_LONG} or
775 * if there's not enough room in the array at the offset indicated.
777 public static long toLong(byte[] bytes
, int offset
, final int length
) {
778 if (length
!= SIZEOF_LONG
|| offset
+ length
> bytes
.length
) {
779 throw explainWrongLengthOrOffset(bytes
, offset
, length
, SIZEOF_LONG
);
781 return ConverterHolder
.BEST_CONVERTER
.toLong(bytes
, offset
, length
);
784 private static IllegalArgumentException
785 explainWrongLengthOrOffset(final byte[] bytes
,
788 final int expectedLength
) {
790 if (length
!= expectedLength
) {
791 reason
= "Wrong length: " + length
+ ", expected " + expectedLength
;
793 reason
= "offset (" + offset
+ ") + length (" + length
+ ") exceed the"
794 + " capacity of the array: " + bytes
.length
;
796 return new IllegalArgumentException(reason
);
800 * Put a long value out to the specified byte array position.
801 * @param bytes the byte array
802 * @param offset position in the array
803 * @param val long to write out
804 * @return incremented offset
805 * @throws IllegalArgumentException if the byte array given doesn't have
806 * enough room at the offset specified.
808 public static int putLong(byte[] bytes
, int offset
, long val
) {
809 if (bytes
.length
- offset
< SIZEOF_LONG
) {
810 throw new IllegalArgumentException("Not enough room to put a long at"
811 + " offset " + offset
+ " in a " + bytes
.length
+ " byte array");
813 return ConverterHolder
.BEST_CONVERTER
.putLong(bytes
, offset
, val
);
817 * Presumes float encoded as IEEE 754 floating-point "single format"
818 * @param bytes byte array
819 * @return Float made from passed byte array.
821 public static float toFloat(byte [] bytes
) {
822 return toFloat(bytes
, 0);
826 * Presumes float encoded as IEEE 754 floating-point "single format"
827 * @param bytes array to convert
828 * @param offset offset into array
829 * @return Float made from passed byte array.
831 public static float toFloat(byte [] bytes
, int offset
) {
832 return Float
.intBitsToFloat(toInt(bytes
, offset
, SIZEOF_INT
));
836 * @param bytes byte array
837 * @param offset offset to write to
838 * @param f float value
839 * @return New offset in <code>bytes</code>
841 public static int putFloat(byte [] bytes
, int offset
, float f
) {
842 return putInt(bytes
, offset
, Float
.floatToRawIntBits(f
));
846 * @param f float value
847 * @return the float represented as byte []
849 public static byte [] toBytes(final float f
) {
851 return Bytes
.toBytes(Float
.floatToRawIntBits(f
));
855 * @param bytes byte array
856 * @return Return double made from passed bytes.
858 public static double toDouble(final byte [] bytes
) {
859 return toDouble(bytes
, 0);
863 * @param bytes byte array
864 * @param offset offset where double is
865 * @return Return double made from passed bytes.
867 public static double toDouble(final byte [] bytes
, final int offset
) {
868 return Double
.longBitsToDouble(toLong(bytes
, offset
, SIZEOF_LONG
));
872 * @param bytes byte array
873 * @param offset offset to write to
875 * @return New offset into array <code>bytes</code>
877 public static int putDouble(byte [] bytes
, int offset
, double d
) {
878 return putLong(bytes
, offset
, Double
.doubleToLongBits(d
));
882 * Serialize a double as the IEEE 754 double format output. The resultant
883 * array will be 8 bytes long.
886 * @return the double represented as byte []
888 public static byte [] toBytes(final double d
) {
889 // Encode it as a long
890 return Bytes
.toBytes(Double
.doubleToRawLongBits(d
));
894 * Convert an int value to a byte array. Big-endian. Same as what DataOutputStream.writeInt
898 * @return the byte array
900 public static byte[] toBytes(int val
) {
901 byte [] b
= new byte[4];
902 for(int i
= 3; i
> 0; i
--) {
911 * Converts a byte array to an int value
912 * @param bytes byte array
913 * @return the int value
915 public static int toInt(byte[] bytes
) {
916 return toInt(bytes
, 0, SIZEOF_INT
);
920 * Converts a byte array to an int value
921 * @param bytes byte array
922 * @param offset offset into array
923 * @return the int value
925 public static int toInt(byte[] bytes
, int offset
) {
926 return toInt(bytes
, offset
, SIZEOF_INT
);
930 * Converts a byte array to an int value
931 * @param bytes byte array
932 * @param offset offset into array
933 * @param length length of int (has to be {@link #SIZEOF_INT})
934 * @return the int value
935 * @throws IllegalArgumentException if length is not {@link #SIZEOF_INT} or
936 * if there's not enough room in the array at the offset indicated.
938 public static int toInt(byte[] bytes
, int offset
, final int length
) {
939 if (length
!= SIZEOF_INT
|| offset
+ length
> bytes
.length
) {
940 throw explainWrongLengthOrOffset(bytes
, offset
, length
, SIZEOF_INT
);
942 return ConverterHolder
.BEST_CONVERTER
.toInt(bytes
, offset
, length
);
946 * Converts a byte array to an int value
947 * @param bytes byte array
948 * @param offset offset into array
949 * @param length how many bytes should be considered for creating int
950 * @return the int value
951 * @throws IllegalArgumentException if there's not enough room in the array at the offset
954 public static int readAsInt(byte[] bytes
, int offset
, final int length
) {
955 if (offset
+ length
> bytes
.length
) {
956 throw new IllegalArgumentException("offset (" + offset
+ ") + length (" + length
957 + ") exceed the" + " capacity of the array: " + bytes
.length
);
960 for(int i
= offset
; i
< (offset
+ length
); i
++) {
962 n ^
= bytes
[i
] & 0xFF;
968 * Put an int value out to the specified byte array position.
969 * @param bytes the byte array
970 * @param offset position in the array
971 * @param val int to write out
972 * @return incremented offset
973 * @throws IllegalArgumentException if the byte array given doesn't have
974 * enough room at the offset specified.
976 public static int putInt(byte[] bytes
, int offset
, int val
) {
977 if (bytes
.length
- offset
< SIZEOF_INT
) {
978 throw new IllegalArgumentException("Not enough room to put an int at"
979 + " offset " + offset
+ " in a " + bytes
.length
+ " byte array");
981 return ConverterHolder
.BEST_CONVERTER
.putInt(bytes
, offset
, val
);
985 * Convert a short value to a byte array of {@link #SIZEOF_SHORT} bytes long.
987 * @return the byte array
989 public static byte[] toBytes(short val
) {
990 byte[] b
= new byte[SIZEOF_SHORT
];
998 * Converts a byte array to a short value
999 * @param bytes byte array
1000 * @return the short value
1002 public static short toShort(byte[] bytes
) {
1003 return toShort(bytes
, 0, SIZEOF_SHORT
);
1007 * Converts a byte array to a short value
1008 * @param bytes byte array
1009 * @param offset offset into array
1010 * @return the short value
1012 public static short toShort(byte[] bytes
, int offset
) {
1013 return toShort(bytes
, offset
, SIZEOF_SHORT
);
1017 * Converts a byte array to a short value
1018 * @param bytes byte array
1019 * @param offset offset into array
1020 * @param length length, has to be {@link #SIZEOF_SHORT}
1021 * @return the short value
1022 * @throws IllegalArgumentException if length is not {@link #SIZEOF_SHORT}
1023 * or if there's not enough room in the array at the offset indicated.
1025 public static short toShort(byte[] bytes
, int offset
, final int length
) {
1026 if (length
!= SIZEOF_SHORT
|| offset
+ length
> bytes
.length
) {
1027 throw explainWrongLengthOrOffset(bytes
, offset
, length
, SIZEOF_SHORT
);
1029 return ConverterHolder
.BEST_CONVERTER
.toShort(bytes
, offset
, length
);
1033 * Returns a new byte array, copied from the given {@code buf},
1034 * from the position (inclusive) to the limit (exclusive).
1035 * The position and the other index parameters are not changed.
1037 * @param buf a byte buffer
1038 * @return the byte array
1039 * @see #toBytes(ByteBuffer)
1041 public static byte[] getBytes(ByteBuffer buf
) {
1042 return readBytes(buf
.duplicate());
1046 * Put a short value out to the specified byte array position.
1047 * @param bytes the byte array
1048 * @param offset position in the array
1049 * @param val short to write out
1050 * @return incremented offset
1051 * @throws IllegalArgumentException if the byte array given doesn't have
1052 * enough room at the offset specified.
1054 public static int putShort(byte[] bytes
, int offset
, short val
) {
1055 if (bytes
.length
- offset
< SIZEOF_SHORT
) {
1056 throw new IllegalArgumentException("Not enough room to put a short at"
1057 + " offset " + offset
+ " in a " + bytes
.length
+ " byte array");
1059 return ConverterHolder
.BEST_CONVERTER
.putShort(bytes
, offset
, val
);
1063 * Put an int value as short out to the specified byte array position. Only the lower 2 bytes of
1064 * the short will be put into the array. The caller of the API need to make sure they will not
1065 * loose the value by doing so. This is useful to store an unsigned short which is represented as
1066 * int in other parts.
1067 * @param bytes the byte array
1068 * @param offset position in the array
1069 * @param val value to write out
1070 * @return incremented offset
1071 * @throws IllegalArgumentException if the byte array given doesn't have
1072 * enough room at the offset specified.
1074 public static int putAsShort(byte[] bytes
, int offset
, int val
) {
1075 if (bytes
.length
- offset
< SIZEOF_SHORT
) {
1076 throw new IllegalArgumentException("Not enough room to put a short at"
1077 + " offset " + offset
+ " in a " + bytes
.length
+ " byte array");
1079 bytes
[offset
+1] = (byte) val
;
1081 bytes
[offset
] = (byte) val
;
1082 return offset
+ SIZEOF_SHORT
;
1086 * Convert a BigDecimal value to a byte array
1089 * @return the byte array
1091 public static byte[] toBytes(BigDecimal val
) {
1092 byte[] valueBytes
= val
.unscaledValue().toByteArray();
1093 byte[] result
= new byte[valueBytes
.length
+ SIZEOF_INT
];
1094 int offset
= putInt(result
, 0, val
.scale());
1095 putBytes(result
, offset
, valueBytes
, 0, valueBytes
.length
);
1101 * Converts a byte array to a BigDecimal
1104 * @return the char value
1106 public static BigDecimal
toBigDecimal(byte[] bytes
) {
1107 return toBigDecimal(bytes
, 0, bytes
.length
);
1111 * Converts a byte array to a BigDecimal value
1116 * @return the char value
1118 public static BigDecimal
toBigDecimal(byte[] bytes
, int offset
, final int length
) {
1119 if (bytes
== null || length
< SIZEOF_INT
+ 1 ||
1120 (offset
+ length
> bytes
.length
)) {
1124 int scale
= toInt(bytes
, offset
);
1125 byte[] tcBytes
= new byte[length
- SIZEOF_INT
];
1126 System
.arraycopy(bytes
, offset
+ SIZEOF_INT
, tcBytes
, 0, length
- SIZEOF_INT
);
1127 return new BigDecimal(new BigInteger(tcBytes
), scale
);
1131 * Put a BigDecimal value out to the specified byte array position.
1133 * @param bytes the byte array
1134 * @param offset position in the array
1135 * @param val BigDecimal to write out
1136 * @return incremented offset
1138 public static int putBigDecimal(byte[] bytes
, int offset
, BigDecimal val
) {
1139 if (bytes
== null) {
1143 byte[] valueBytes
= val
.unscaledValue().toByteArray();
1144 byte[] result
= new byte[valueBytes
.length
+ SIZEOF_INT
];
1145 offset
= putInt(result
, offset
, val
.scale());
1146 return putBytes(result
, offset
, valueBytes
, 0, valueBytes
.length
);
1150 * @param vint Integer to make a vint of.
1151 * @return Vint as bytes array.
1153 public static byte [] vintToBytes(final long vint
) {
1155 int size
= WritableUtils
.getVIntSize(i
);
1156 byte [] result
= new byte[size
];
1158 if (i
>= -112 && i
<= 127) {
1159 result
[offset
] = (byte) i
;
1165 i ^
= -1L; // take one's complement'
1175 result
[offset
++] = (byte) len
;
1177 len
= (len
< -120) ?
-(len
+ 120) : -(len
+ 112);
1179 for (int idx
= len
; idx
!= 0; idx
--) {
1180 int shiftbits
= (idx
- 1) * 8;
1181 long mask
= 0xFFL
<< shiftbits
;
1182 result
[offset
++] = (byte)((i
& mask
) >> shiftbits
);
1188 * @param buffer buffer to convert
1189 * @return vint bytes as an integer.
1191 public static long bytesToVint(final byte [] buffer
) {
1193 byte firstByte
= buffer
[offset
++];
1194 int len
= WritableUtils
.decodeVIntSize(firstByte
);
1199 for (int idx
= 0; idx
< len
-1; idx
++) {
1200 byte b
= buffer
[offset
++];
1204 return (WritableUtils
.isNegativeVInt(firstByte
) ? ~i
: i
);
1208 * Reads a zero-compressed encoded long from input buffer and returns it.
1209 * @param buffer Binary array
1210 * @param offset Offset into array at which vint begins.
1211 * @return deserialized long from buffer.
1213 public static long readAsVLong(final byte [] buffer
, final int offset
) {
1214 byte firstByte
= buffer
[offset
];
1215 int len
= WritableUtils
.decodeVIntSize(firstByte
);
1220 for (int idx
= 0; idx
< len
-1; idx
++) {
1221 byte b
= buffer
[offset
+ 1 + idx
];
1225 return (WritableUtils
.isNegativeVInt(firstByte
) ? ~i
: i
);
1229 * @param left left operand
1230 * @param right right operand
1231 * @return 0 if equal, < 0 if left is less than right, etc.
1233 public static int compareTo(final byte [] left
, final byte [] right
) {
1234 return LexicographicalComparerHolder
.BEST_COMPARER
.
1235 compareTo(left
, 0, left
== null?
0: left
.length
, right
, 0, right
== null?
0: right
.length
);
1239 * Lexicographically compare two arrays.
1241 * @param buffer1 left operand
1242 * @param buffer2 right operand
1243 * @param offset1 Where to start comparing in the left buffer
1244 * @param offset2 Where to start comparing in the right buffer
1245 * @param length1 How much to compare from the left buffer
1246 * @param length2 How much to compare from the right buffer
1247 * @return 0 if equal, < 0 if left is less than right, etc.
1249 public static int compareTo(byte[] buffer1
, int offset1
, int length1
,
1250 byte[] buffer2
, int offset2
, int length2
) {
1251 return LexicographicalComparerHolder
.BEST_COMPARER
.
1252 compareTo(buffer1
, offset1
, length1
, buffer2
, offset2
, length2
);
1255 interface Comparer
<T
> {
1257 T buffer1
, int offset1
, int length1
, T buffer2
, int offset2
, int length2
1261 static abstract class Converter
{
1262 abstract long toLong(byte[] bytes
, int offset
, int length
);
1263 abstract int putLong(byte[] bytes
, int offset
, long val
);
1265 abstract int toInt(byte[] bytes
, int offset
, final int length
);
1266 abstract int putInt(byte[] bytes
, int offset
, int val
);
1268 abstract short toShort(byte[] bytes
, int offset
, final int length
);
1269 abstract int putShort(byte[] bytes
, int offset
, short val
);
1274 static Comparer
<byte[]> lexicographicalComparerJavaImpl() {
1275 return LexicographicalComparerHolder
.PureJavaComparer
.INSTANCE
;
1278 static class ConverterHolder
{
1279 static final String UNSAFE_CONVERTER_NAME
=
1280 ConverterHolder
.class.getName() + "$UnsafeConverter";
1282 static final Converter BEST_CONVERTER
= getBestConverter();
1284 * Returns the Unsafe-using Converter, or falls back to the pure-Java
1285 * implementation if unable to do so.
1287 static Converter
getBestConverter() {
1289 Class
<?
> theClass
= Class
.forName(UNSAFE_CONVERTER_NAME
);
1291 // yes, UnsafeComparer does implement Comparer<byte[]>
1292 @SuppressWarnings("unchecked")
1293 Converter converter
= (Converter
) theClass
.getConstructor().newInstance();
1295 } catch (Throwable t
) { // ensure we really catch *everything*
1296 return PureJavaConverter
.INSTANCE
;
1300 protected static final class PureJavaConverter
extends Converter
{
1301 static final PureJavaConverter INSTANCE
= new PureJavaConverter();
1303 private PureJavaConverter() {}
1306 long toLong(byte[] bytes
, int offset
, int length
) {
1308 for(int i
= offset
; i
< offset
+ length
; i
++) {
1310 l ^
= bytes
[i
] & 0xFF;
1316 int putLong(byte[] bytes
, int offset
, long val
) {
1317 for(int i
= offset
+ 7; i
> offset
; i
--) {
1318 bytes
[i
] = (byte) val
;
1321 bytes
[offset
] = (byte) val
;
1322 return offset
+ SIZEOF_LONG
;
1326 int toInt(byte[] bytes
, int offset
, int length
) {
1328 for(int i
= offset
; i
< (offset
+ length
); i
++) {
1330 n ^
= bytes
[i
] & 0xFF;
1336 int putInt(byte[] bytes
, int offset
, int val
) {
1337 for(int i
= offset
+ 3; i
> offset
; i
--) {
1338 bytes
[i
] = (byte) val
;
1341 bytes
[offset
] = (byte) val
;
1342 return offset
+ SIZEOF_INT
;
1346 short toShort(byte[] bytes
, int offset
, int length
) {
1348 n
= (short) ((n ^ bytes
[offset
]) & 0xFF);
1349 n
= (short) (n
<< 8);
1350 n ^
= (short) (bytes
[offset
+1] & 0xFF);
1355 int putShort(byte[] bytes
, int offset
, short val
) {
1356 bytes
[offset
+1] = (byte) val
;
1358 bytes
[offset
] = (byte) val
;
1359 return offset
+ SIZEOF_SHORT
;
1363 protected static final class UnsafeConverter
extends Converter
{
1365 static final Unsafe theUnsafe
;
1367 public UnsafeConverter() {}
1370 if (UNSAFE_UNALIGNED
) {
1371 theUnsafe
= UnsafeAccess
.theUnsafe
;
1373 // It doesn't matter what we throw;
1374 // it's swallowed in getBestComparer().
1378 // sanity check - this should never fail
1379 if (theUnsafe
.arrayIndexScale(byte[].class) != 1) {
1380 throw new AssertionError();
1385 long toLong(byte[] bytes
, int offset
, int length
) {
1386 return UnsafeAccess
.toLong(bytes
, offset
);
1390 int putLong(byte[] bytes
, int offset
, long val
) {
1391 return UnsafeAccess
.putLong(bytes
, offset
, val
);
1395 int toInt(byte[] bytes
, int offset
, int length
) {
1396 return UnsafeAccess
.toInt(bytes
, offset
);
1400 int putInt(byte[] bytes
, int offset
, int val
) {
1401 return UnsafeAccess
.putInt(bytes
, offset
, val
);
1405 short toShort(byte[] bytes
, int offset
, int length
) {
1406 return UnsafeAccess
.toShort(bytes
, offset
);
1410 int putShort(byte[] bytes
, int offset
, short val
) {
1411 return UnsafeAccess
.putShort(bytes
, offset
, val
);
1417 * Provides a lexicographical comparer implementation; either a Java
1418 * implementation or a faster implementation based on {@link Unsafe}.
1420 * <p>Uses reflection to gracefully fall back to the Java implementation if
1421 * {@code Unsafe} isn't available.
1424 static class LexicographicalComparerHolder
{
1425 static final String UNSAFE_COMPARER_NAME
=
1426 LexicographicalComparerHolder
.class.getName() + "$UnsafeComparer";
1428 static final Comparer
<byte[]> BEST_COMPARER
= getBestComparer();
1430 * Returns the Unsafe-using Comparer, or falls back to the pure-Java
1431 * implementation if unable to do so.
1433 static Comparer
<byte[]> getBestComparer() {
1435 Class
<?
> theClass
= Class
.forName(UNSAFE_COMPARER_NAME
);
1437 // yes, UnsafeComparer does implement Comparer<byte[]>
1438 @SuppressWarnings("unchecked")
1439 Comparer
<byte[]> comparer
=
1440 (Comparer
<byte[]>) theClass
.getEnumConstants()[0];
1442 } catch (Throwable t
) { // ensure we really catch *everything*
1443 return lexicographicalComparerJavaImpl();
1447 enum PureJavaComparer
implements Comparer
<byte[]> {
1451 public int compareTo(byte[] buffer1
, int offset1
, int length1
,
1452 byte[] buffer2
, int offset2
, int length2
) {
1453 // Short circuit equal case
1454 if (buffer1
== buffer2
&&
1455 offset1
== offset2
&&
1456 length1
== length2
) {
1459 // Bring WritableComparator code local
1460 int end1
= offset1
+ length1
;
1461 int end2
= offset2
+ length2
;
1462 for (int i
= offset1
, j
= offset2
; i
< end1
&& j
< end2
; i
++, j
++) {
1463 int a
= (buffer1
[i
] & 0xff);
1464 int b
= (buffer2
[j
] & 0xff);
1469 return length1
- length2
;
1474 enum UnsafeComparer
implements Comparer
<byte[]> {
1477 static final Unsafe theUnsafe
;
1479 if (UNSAFE_UNALIGNED
) {
1480 theUnsafe
= UnsafeAccess
.theUnsafe
;
1482 // It doesn't matter what we throw;
1483 // it's swallowed in getBestComparer().
1487 // sanity check - this should never fail
1488 if (theUnsafe
.arrayIndexScale(byte[].class) != 1) {
1489 throw new AssertionError();
1494 * Lexicographically compare two arrays.
1496 * @param buffer1 left operand
1497 * @param buffer2 right operand
1498 * @param offset1 Where to start comparing in the left buffer
1499 * @param offset2 Where to start comparing in the right buffer
1500 * @param length1 How much to compare from the left buffer
1501 * @param length2 How much to compare from the right buffer
1502 * @return 0 if equal, < 0 if left is less than right, etc.
1505 public int compareTo(byte[] buffer1
, int offset1
, int length1
,
1506 byte[] buffer2
, int offset2
, int length2
) {
1508 // Short circuit equal case
1509 if (buffer1
== buffer2
&&
1510 offset1
== offset2
&&
1511 length1
== length2
) {
1514 final int stride
= 8;
1515 final int minLength
= Math
.min(length1
, length2
);
1516 int strideLimit
= minLength
& ~
(stride
- 1);
1517 final long offset1Adj
= offset1
+ UnsafeAccess
.BYTE_ARRAY_BASE_OFFSET
;
1518 final long offset2Adj
= offset2
+ UnsafeAccess
.BYTE_ARRAY_BASE_OFFSET
;
1522 * Compare 8 bytes at a time. Benchmarking on x86 shows a stride of 8 bytes is no slower
1523 * than 4 bytes even on 32-bit. On the other hand, it is substantially faster on 64-bit.
1525 for (i
= 0; i
< strideLimit
; i
+= stride
) {
1526 long lw
= theUnsafe
.getLong(buffer1
, offset1Adj
+ i
);
1527 long rw
= theUnsafe
.getLong(buffer2
, offset2Adj
+ i
);
1529 if(!UnsafeAccess
.LITTLE_ENDIAN
) {
1530 return ((lw
+ Long
.MIN_VALUE
) < (rw
+ Long
.MIN_VALUE
)) ?
-1 : 1;
1534 * We want to compare only the first index where left[index] != right[index]. This
1535 * corresponds to the least significant nonzero byte in lw ^ rw, since lw and rw are
1536 * little-endian. Long.numberOfTrailingZeros(diff) tells us the least significant
1537 * nonzero bit, and zeroing out the first three bits of L.nTZ gives us the shift to get
1538 * that least significant nonzero byte. This comparison logic is based on UnsignedBytes
1539 * comparator from guava v21
1541 int n
= Long
.numberOfTrailingZeros(lw ^ rw
) & ~
0x7;
1542 return ((int) ((lw
>>> n
) & 0xFF)) - ((int) ((rw
>>> n
) & 0xFF));
1546 // The epilogue to cover the last (minLength % stride) elements.
1547 for (; i
< minLength
; i
++) {
1548 int a
= (buffer1
[offset1
+ i
] & 0xFF);
1549 int b
= (buffer2
[offset2
+ i
] & 0xFF);
1554 return length1
- length2
;
1560 * @param left left operand
1561 * @param right right operand
1562 * @return True if equal
1564 public static boolean equals(final byte [] left
, final byte [] right
) {
1565 // Could use Arrays.equals?
1566 //noinspection SimplifiableConditionalExpression
1567 if (left
== right
) return true;
1568 if (left
== null || right
== null) return false;
1569 if (left
.length
!= right
.length
) return false;
1570 if (left
.length
== 0) return true;
1572 // Since we're often comparing adjacent sorted data,
1573 // it's usual to have equal arrays except for the very last byte
1574 // so check that first
1575 if (left
[left
.length
- 1] != right
[right
.length
- 1]) return false;
1577 return compareTo(left
, right
) == 0;
1580 public static boolean equals(final byte[] left
, int leftOffset
, int leftLen
,
1581 final byte[] right
, int rightOffset
, int rightLen
) {
1582 // short circuit case
1583 if (left
== right
&&
1584 leftOffset
== rightOffset
&&
1585 leftLen
== rightLen
) {
1588 // different lengths fast check
1589 if (leftLen
!= rightLen
) {
1596 // Since we're often comparing adjacent sorted data,
1597 // it's usual to have equal arrays except for the very last byte
1598 // so check that first
1599 if (left
[leftOffset
+ leftLen
- 1] != right
[rightOffset
+ rightLen
- 1]) return false;
1601 return LexicographicalComparerHolder
.BEST_COMPARER
.
1602 compareTo(left
, leftOffset
, leftLen
, right
, rightOffset
, rightLen
) == 0;
1607 * @param a left operand
1608 * @param buf right operand
1609 * @return True if equal
1611 public static boolean equals(byte[] a
, ByteBuffer buf
) {
1612 if (a
== null) return buf
== null;
1613 if (buf
== null) return false;
1614 if (a
.length
!= buf
.remaining()) return false;
1616 // Thou shalt not modify the original byte buffer in what should be read only operations.
1617 ByteBuffer b
= buf
.duplicate();
1618 for (byte anA
: a
) {
1619 if (anA
!= b
.get()) {
1628 * Return true if the byte array on the right is a prefix of the byte
1629 * array on the left.
1631 public static boolean startsWith(byte[] bytes
, byte[] prefix
) {
1632 return bytes
!= null && prefix
!= null &&
1633 bytes
.length
>= prefix
.length
&&
1634 LexicographicalComparerHolder
.BEST_COMPARER
.
1635 compareTo(bytes
, 0, prefix
.length
, prefix
, 0, prefix
.length
) == 0;
1639 * @param b bytes to hash
1640 * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the
1641 * passed in array. This method is what {@link org.apache.hadoop.io.Text}
1642 * use calculating hash code.
1644 public static int hashCode(final byte [] b
) {
1645 return hashCode(b
, b
.length
);
1650 * @param length length of the value
1651 * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the
1652 * passed in array. This method is what {@link org.apache.hadoop.io.Text}
1653 * use calculating hash code.
1655 public static int hashCode(final byte [] b
, final int length
) {
1656 return WritableComparator
.hashBytes(b
, length
);
1660 * @param b bytes to hash
1661 * @return A hash of <code>b</code> as an Integer that can be used as key in
1664 public static Integer
mapKey(final byte [] b
) {
1669 * @param b bytes to hash
1670 * @param length length to hash
1671 * @return A hash of <code>b</code> as an Integer that can be used as key in
1674 public static Integer
mapKey(final byte [] b
, final int length
) {
1675 return hashCode(b
, length
);
1679 * @param a lower half
1680 * @param b upper half
1681 * @return New array that has a in lower half and b in upper half.
1683 public static byte [] add(final byte [] a
, final byte [] b
) {
1684 return add(a
, b
, EMPTY_BYTE_ARRAY
);
1688 * @param a first third
1689 * @param b second third
1690 * @param c third third
1691 * @return New array made from a, b and c
1693 public static byte [] add(final byte [] a
, final byte [] b
, final byte [] c
) {
1694 byte [] result
= new byte[a
.length
+ b
.length
+ c
.length
];
1695 System
.arraycopy(a
, 0, result
, 0, a
.length
);
1696 System
.arraycopy(b
, 0, result
, a
.length
, b
.length
);
1697 System
.arraycopy(c
, 0, result
, a
.length
+ b
.length
, c
.length
);
1702 * @param arrays all the arrays to concatenate together.
1703 * @return New array made from the concatenation of the given arrays.
1705 public static byte [] add(final byte [][] arrays
) {
1707 for (int i
= 0; i
< arrays
.length
; i
++) {
1708 length
+= arrays
[i
].length
;
1710 byte [] result
= new byte[length
];
1712 for (int i
= 0; i
< arrays
.length
; i
++) {
1713 System
.arraycopy(arrays
[i
], 0, result
, index
, arrays
[i
].length
);
1714 index
+= arrays
[i
].length
;
1721 * @param length amount of bytes to grab
1722 * @return First <code>length</code> bytes from <code>a</code>
1724 public static byte [] head(final byte [] a
, final int length
) {
1725 if (a
.length
< length
) {
1728 byte [] result
= new byte[length
];
1729 System
.arraycopy(a
, 0, result
, 0, length
);
1735 * @param length amount of bytes to snarf
1736 * @return Last <code>length</code> bytes from <code>a</code>
1738 public static byte [] tail(final byte [] a
, final int length
) {
1739 if (a
.length
< length
) {
1742 byte [] result
= new byte[length
];
1743 System
.arraycopy(a
, a
.length
- length
, result
, 0, length
);
1749 * @param length new array size
1750 * @return Value in <code>a</code> plus <code>length</code> prepended 0 bytes
1752 public static byte [] padHead(final byte [] a
, final int length
) {
1753 byte [] padding
= new byte[length
];
1754 for (int i
= 0; i
< length
; i
++) {
1757 return add(padding
,a
);
1762 * @param length new array size
1763 * @return Value in <code>a</code> plus <code>length</code> appended 0 bytes
1765 public static byte [] padTail(final byte [] a
, final int length
) {
1766 byte [] padding
= new byte[length
];
1767 for (int i
= 0; i
< length
; i
++) {
1770 return add(a
,padding
);
1774 * Split passed range. Expensive operation relatively. Uses BigInteger math.
1775 * Useful splitting ranges for MapReduce jobs.
1776 * @param a Beginning of range
1777 * @param b End of range
1778 * @param num Number of times to split range. Pass 1 if you want to split
1779 * the range in two; i.e. one split.
1780 * @return Array of dividing values
1782 public static byte [][] split(final byte [] a
, final byte [] b
, final int num
) {
1783 return split(a
, b
, false, num
);
1787 * Split passed range. Expensive operation relatively. Uses BigInteger math.
1788 * Useful splitting ranges for MapReduce jobs.
1789 * @param a Beginning of range
1790 * @param b End of range
1791 * @param inclusive Whether the end of range is prefix-inclusive or is
1792 * considered an exclusive boundary. Automatic splits are generally exclusive
1793 * and manual splits with an explicit range utilize an inclusive end of range.
1794 * @param num Number of times to split range. Pass 1 if you want to split
1795 * the range in two; i.e. one split.
1796 * @return Array of dividing values
1798 public static byte[][] split(final byte[] a
, final byte[] b
,
1799 boolean inclusive
, final int num
) {
1800 byte[][] ret
= new byte[num
+ 2][];
1802 Iterable
<byte[]> iter
= iterateOnSplits(a
, b
, inclusive
, num
);
1805 for (byte[] elem
: iter
) {
1812 * Iterate over keys within the passed range, splitting at an [a,b) boundary.
1814 public static Iterable
<byte[]> iterateOnSplits(final byte[] a
,
1815 final byte[] b
, final int num
)
1817 return iterateOnSplits(a
, b
, false, num
);
1821 * Iterate over keys within the passed range.
1823 public static Iterable
<byte[]> iterateOnSplits(
1824 final byte[] a
, final byte[]b
, boolean inclusive
, final int num
)
1828 if (a
.length
< b
.length
) {
1829 aPadded
= padTail(a
, b
.length
- a
.length
);
1831 } else if (b
.length
< a
.length
) {
1833 bPadded
= padTail(b
, a
.length
- b
.length
);
1838 if (compareTo(aPadded
,bPadded
) >= 0) {
1839 throw new IllegalArgumentException("b <= a");
1842 throw new IllegalArgumentException("num cannot be <= 0");
1844 byte [] prependHeader
= {1, 0};
1845 final BigInteger startBI
= new BigInteger(add(prependHeader
, aPadded
));
1846 final BigInteger stopBI
= new BigInteger(add(prependHeader
, bPadded
));
1847 BigInteger diffBI
= stopBI
.subtract(startBI
);
1849 diffBI
= diffBI
.add(BigInteger
.ONE
);
1851 final BigInteger splitsBI
= BigInteger
.valueOf(num
+ 1);
1852 //when diffBI < splitBI, use an additional byte to increase diffBI
1853 if(diffBI
.compareTo(splitsBI
) < 0) {
1854 byte[] aPaddedAdditional
= new byte[aPadded
.length
+1];
1855 byte[] bPaddedAdditional
= new byte[bPadded
.length
+1];
1856 for (int i
= 0; i
< aPadded
.length
; i
++){
1857 aPaddedAdditional
[i
] = aPadded
[i
];
1859 for (int j
= 0; j
< bPadded
.length
; j
++){
1860 bPaddedAdditional
[j
] = bPadded
[j
];
1862 aPaddedAdditional
[aPadded
.length
] = 0;
1863 bPaddedAdditional
[bPadded
.length
] = 0;
1864 return iterateOnSplits(aPaddedAdditional
, bPaddedAdditional
, inclusive
, num
);
1866 final BigInteger intervalBI
;
1868 intervalBI
= diffBI
.divide(splitsBI
);
1869 } catch(Exception e
) {
1870 LOG
.error("Exception caught during division", e
);
1874 final Iterator
<byte[]> iterator
= new Iterator
<byte[]>() {
1878 public boolean hasNext() {
1883 public byte[] next() {
1885 if (i
== 0) return a
;
1886 if (i
== num
+ 1) return b
;
1888 BigInteger curBI
= startBI
.add(intervalBI
.multiply(BigInteger
.valueOf(i
)));
1889 byte [] padded
= curBI
.toByteArray();
1891 padded
= tail(padded
, padded
.length
- 2);
1893 padded
= tail(padded
, padded
.length
- 1);
1898 public void remove() {
1899 throw new UnsupportedOperationException();
1904 return new Iterable
<byte[]>() {
1906 public Iterator
<byte[]> iterator() {
1913 * @param bytes array to hash
1914 * @param offset offset to start from
1915 * @param length length to hash
1917 public static int hashCode(byte[] bytes
, int offset
, int length
) {
1919 for (int i
= offset
; i
< offset
+ length
; i
++)
1920 hash
= (31 * hash
) + bytes
[i
];
1926 * @return Array of byte arrays made from passed array of Text
1928 public static byte [][] toByteArrays(final String
[] t
) {
1929 byte [][] result
= new byte[t
.length
][];
1930 for (int i
= 0; i
< t
.length
; i
++) {
1931 result
[i
] = Bytes
.toBytes(t
[i
]);
1938 * @return Array of binary byte arrays made from passed array of binary strings
1940 public static byte[][] toBinaryByteArrays(final String
[] t
) {
1941 byte[][] result
= new byte[t
.length
][];
1942 for (int i
= 0; i
< t
.length
; i
++) {
1943 result
[i
] = Bytes
.toBytesBinary(t
[i
]);
1949 * @param column operand
1950 * @return A byte array of a byte array where first and only entry is
1951 * <code>column</code>
1953 public static byte [][] toByteArrays(final String column
) {
1954 return toByteArrays(toBytes(column
));
1958 * @param column operand
1959 * @return A byte array of a byte array where first and only entry is
1960 * <code>column</code>
1962 public static byte [][] toByteArrays(final byte [] column
) {
1963 byte [][] result
= new byte[1][];
1969 * Binary search for keys in indexes using Bytes.BYTES_RAWCOMPARATOR.
1971 * @param arr array of byte arrays to search for
1972 * @param key the key you want to find
1973 * @param offset the offset in the key you want to find
1974 * @param length the length of the key
1975 * @return zero-based index of the key, if the key is present in the array.
1976 * Otherwise, a value -(i + 1) such that the key is between arr[i -
1977 * 1] and arr[i] non-inclusively, where i is in [0, i], if we define
1978 * arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above
1979 * means that this function can return 2N + 1 different values
1980 * ranging from -(N + 1) to N - 1.
1982 public static int binarySearch(byte[][] arr
, byte[] key
, int offset
, int length
) {
1984 int high
= arr
.length
- 1;
1986 while (low
<= high
) {
1987 int mid
= low
+ ((high
- low
) >> 1);
1988 // we have to compare in this order, because the comparator order
1989 // has special logic when the 'left side' is a special key.
1990 int cmp
= Bytes
.BYTES_RAWCOMPARATOR
1991 .compare(key
, offset
, length
, arr
[mid
], 0, arr
[mid
].length
);
1992 // key lives above the midpoint
1995 // key lives below the midpoint
1998 // BAM. how often does this really happen?
2006 * Binary search for keys in indexes.
2008 * @param arr array of byte arrays to search for
2009 * @param key the key you want to find
2010 * @param comparator a comparator to compare.
2011 * @return zero-based index of the key, if the key is present in the array.
2012 * Otherwise, a value -(i + 1) such that the key is between arr[i -
2013 * 1] and arr[i] non-inclusively, where i is in [0, i], if we define
2014 * arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above
2015 * means that this function can return 2N + 1 different values
2016 * ranging from -(N + 1) to N - 1.
2017 * @return the index of the block
2019 public static int binarySearch(Cell
[] arr
, Cell key
, CellComparator comparator
) {
2021 int high
= arr
.length
- 1;
2022 while (low
<= high
) {
2023 int mid
= low
+ ((high
- low
) >> 1);
2024 // we have to compare in this order, because the comparator order
2025 // has special logic when the 'left side' is a special key.
2026 int cmp
= comparator
.compare(key
, arr
[mid
]);
2027 // key lives above the midpoint
2030 // key lives below the midpoint
2033 // BAM. how often does this really happen?
2041 * Bytewise binary increment/deincrement of long contained in byte array
2044 * @param value - array of bytes containing long (length <= SIZEOF_LONG)
2045 * @param amount value will be incremented on (deincremented if negative)
2046 * @return array of bytes containing incremented long (length == SIZEOF_LONG)
2048 public static byte [] incrementBytes(byte[] value
, long amount
)
2051 if (val
.length
< SIZEOF_LONG
) {
2052 // Hopefully this doesn't happen too often.
2055 newvalue
= new byte[]{-1, -1, -1, -1, -1, -1, -1, -1};
2057 newvalue
= new byte[SIZEOF_LONG
];
2059 System
.arraycopy(val
, 0, newvalue
, newvalue
.length
- val
.length
,
2062 } else if (val
.length
> SIZEOF_LONG
) {
2063 throw new IllegalArgumentException("Increment Bytes - value too big: " +
2066 if(amount
== 0) return val
;
2068 return binaryIncrementNeg(val
, amount
);
2070 return binaryIncrementPos(val
, amount
);
2073 /* increment/deincrement for positive value */
2074 private static byte [] binaryIncrementPos(byte [] value
, long amount
) {
2081 for(int i
=0;i
<value
.length
;i
++) {
2082 int cur
= ((int)amo
% 256) * sign
;
2084 int val
= value
[value
.length
-i
-1] & 0x0ff;
2085 int total
= val
+ cur
;
2089 } else if (total
< 0) {
2092 value
[value
.length
-i
-1] = (byte)total
;
2093 if (amo
== 0) return value
;
2098 /* increment/deincrement for negative value */
2099 private static byte [] binaryIncrementNeg(byte [] value
, long amount
) {
2106 for(int i
=0;i
<value
.length
;i
++) {
2107 int cur
= ((int)amo
% 256) * sign
;
2109 int val
= ((~value
[value
.length
-i
-1]) & 0x0ff) + 1;
2110 int total
= cur
- val
;
2113 } else if (total
< -256) {
2117 value
[value
.length
-i
-1] = (byte)total
;
2118 if (amo
== 0) return value
;
2124 * Writes a string as a fixed-size field, padded with zeros.
2126 public static void writeStringFixedSize(final DataOutput out
, String s
,
2127 int size
) throws IOException
{
2128 byte[] b
= toBytes(s
);
2129 if (b
.length
> size
) {
2130 throw new IOException("Trying to write " + b
.length
+ " bytes (" +
2131 toStringBinary(b
) + ") into a field of length " + size
);
2135 for (int i
= 0; i
< size
- s
.length(); ++i
)
2140 * Reads a fixed-size field and interprets it as a string padded with zeros.
2142 public static String
readStringFixedSize(final DataInput in
, int size
)
2143 throws IOException
{
2144 byte[] b
= new byte[size
];
2147 while (n
> 0 && b
[n
- 1] == 0)
2150 return toString(b
, 0, n
);
2154 * Copy the byte array given in parameter and return an instance
2155 * of a new byte array with the same length and the same content.
2156 * @param bytes the byte array to duplicate
2157 * @return a copy of the given byte array
2159 public static byte [] copy(byte [] bytes
) {
2160 if (bytes
== null) return null;
2161 byte [] result
= new byte[bytes
.length
];
2162 System
.arraycopy(bytes
, 0, result
, 0, bytes
.length
);
2167 * Copy the byte array given in parameter and return an instance
2168 * of a new byte array with the same length and the same content.
2169 * @param bytes the byte array to copy from
2170 * @return a copy of the given designated byte array
2174 public static byte [] copy(byte [] bytes
, final int offset
, final int length
) {
2175 if (bytes
== null) return null;
2176 byte [] result
= new byte[length
];
2177 System
.arraycopy(bytes
, offset
, result
, 0, length
);
2182 * Search sorted array "a" for byte "key". I can't remember if I wrote this or copied it from
2183 * somewhere. (mcorgan)
2184 * @param a Array to search. Entries must be sorted and unique.
2185 * @param fromIndex First index inclusive of "a" to include in the search.
2186 * @param toIndex Last index exclusive of "a" to include in the search.
2187 * @param key The byte to search for.
2188 * @return The index of key if found. If not found, return -(index + 1), where negative indicates
2189 * "not found" and the "index + 1" handles the "-0" case.
2191 public static int unsignedBinarySearch(byte[] a
, int fromIndex
, int toIndex
, byte key
) {
2192 int unsignedKey
= key
& 0xff;
2193 int low
= fromIndex
;
2194 int high
= toIndex
- 1;
2196 while (low
<= high
) {
2197 int mid
= low
+ ((high
- low
) >> 1);
2198 int midVal
= a
[mid
] & 0xff;
2200 if (midVal
< unsignedKey
) {
2202 } else if (midVal
> unsignedKey
) {
2205 return mid
; // key found
2208 return -(low
+ 1); // key not found.
2212 * Treat the byte[] as an unsigned series of bytes, most significant bits first. Start by adding
2213 * 1 to the rightmost bit/byte and carry over all overflows to the more significant bits/bytes.
2215 * @param input The byte[] to increment.
2216 * @return The incremented copy of "in". May be same length or 1 byte longer.
2218 public static byte[] unsignedCopyAndIncrement(final byte[] input
) {
2219 byte[] copy
= copy(input
);
2221 throw new IllegalArgumentException("cannot increment null array");
2223 for (int i
= copy
.length
- 1; i
>= 0; --i
) {
2224 if (copy
[i
] == -1) {// -1 is all 1-bits, which is the unsigned maximum
2231 // we maxed out the array
2232 byte[] out
= new byte[copy
.length
+ 1];
2234 System
.arraycopy(copy
, 0, out
, 1, copy
.length
);
2238 public static boolean equals(List
<byte[]> a
, List
<byte[]> b
) {
2248 if (a
.size() != b
.size()) {
2251 for (int i
= 0; i
< a
.size(); ++i
) {
2252 if (!Bytes
.equals(a
.get(i
), b
.get(i
))) {
2259 public static boolean isSorted(Collection
<byte[]> arrays
) {
2260 if (!CollectionUtils
.isEmpty(arrays
)) {
2261 byte[] previous
= new byte[0];
2262 for (byte[] array
: arrays
) {
2263 if (Bytes
.compareTo(previous
, array
) > 0) {
2272 public static List
<byte[]> getUtf8ByteArrays(List
<String
> strings
) {
2273 if (CollectionUtils
.isEmpty(strings
)) {
2274 return Collections
.emptyList();
2276 List
<byte[]> byteArrays
= new ArrayList
<>(strings
.size());
2277 strings
.forEach(s
-> byteArrays
.add(Bytes
.toBytes(s
)));
2282 * Returns the index of the first appearance of the value {@code target} in
2285 * @param array an array of {@code byte} values, possibly empty
2286 * @param target a primitive {@code byte} value
2287 * @return the least index {@code i} for which {@code array[i] == target}, or
2288 * {@code -1} if no such index exists.
2290 public static int indexOf(byte[] array
, byte target
) {
2291 for (int i
= 0; i
< array
.length
; i
++) {
2292 if (array
[i
] == target
) {
2300 * Returns the start position of the first occurrence of the specified {@code
2301 * target} within {@code array}, or {@code -1} if there is no such occurrence.
2303 * <p>More formally, returns the lowest index {@code i} such that {@code
2304 * java.util.Arrays.copyOfRange(array, i, i + target.length)} contains exactly
2305 * the same elements as {@code target}.
2307 * @param array the array to search for the sequence {@code target}
2308 * @param target the array to search for as a sub-sequence of {@code array}
2310 public static int indexOf(byte[] array
, byte[] target
) {
2311 checkNotNull(array
, "array");
2312 checkNotNull(target
, "target");
2313 if (target
.length
== 0) {
2318 for (int i
= 0; i
< array
.length
- target
.length
+ 1; i
++) {
2319 for (int j
= 0; j
< target
.length
; j
++) {
2320 if (array
[i
+ j
] != target
[j
]) {
2330 * @param array an array of {@code byte} values, possibly empty
2331 * @param target a primitive {@code byte} value
2332 * @return {@code true} if {@code target} is present as an element anywhere in {@code array}.
2334 public static boolean contains(byte[] array
, byte target
) {
2335 return indexOf(array
, target
) > -1;
2339 * @param array an array of {@code byte} values, possibly empty
2340 * @param target an array of {@code byte}
2341 * @return {@code true} if {@code target} is present anywhere in {@code array}
2343 public static boolean contains(byte[] array
, byte[] target
) {
2344 return indexOf(array
, target
) > -1;
2348 * Fill given array with zeros.
2349 * @param b array which needs to be filled with zeros
2351 public static void zero(byte[] b
) {
2352 zero(b
, 0, b
.length
);
2356 * Fill given array with zeros at the specified position.
2361 public static void zero(byte[] b
, int offset
, int length
) {
2362 checkPositionIndex(offset
, b
.length
, "offset");
2363 checkArgument(length
> 0, "length must be greater than 0");
2364 checkPositionIndex(offset
+ length
, b
.length
, "offset + length");
2365 Arrays
.fill(b
, offset
, offset
+ length
, (byte) 0);
2368 private static final SecureRandom RNG
= new SecureRandom();
2371 * Fill given array with random bytes.
2372 * @param b array which needs to be filled with random bytes
2374 public static void random(byte[] b
) {
2379 * Fill given array with random bytes at the specified position.
2384 public static void random(byte[] b
, int offset
, int length
) {
2385 checkPositionIndex(offset
, b
.length
, "offset");
2386 checkArgument(length
> 0, "length must be greater than 0");
2387 checkPositionIndex(offset
+ length
, b
.length
, "offset + length");
2388 byte[] buf
= new byte[length
];
2390 System
.arraycopy(buf
, 0, b
, offset
, length
);
2394 * Create a max byte array with the specified max byte count
2395 * @param maxByteCount the length of returned byte array
2396 * @return the created max byte array
2398 public static byte[] createMaxByteArray(int maxByteCount
) {
2399 byte[] maxByteArray
= new byte[maxByteCount
];
2400 for (int i
= 0; i
< maxByteArray
.length
; i
++) {
2401 maxByteArray
[i
] = (byte) 0xff;
2403 return maxByteArray
;
2407 * Create a byte array which is multiple given bytes
2410 * @return byte array
2412 public static byte[] multiple(byte[] srcBytes
, int multiNum
) {
2413 if (multiNum
<= 0) {
2416 byte[] result
= new byte[srcBytes
.length
* multiNum
];
2417 for (int i
= 0; i
< multiNum
; i
++) {
2418 System
.arraycopy(srcBytes
, 0, result
, i
* srcBytes
.length
,
2424 private static final char[] HEX_CHARS
= {
2425 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
2429 * Convert a byte range into a hex string
2431 public static String
toHex(byte[] b
, int offset
, int length
) {
2432 checkArgument(length
<= Integer
.MAX_VALUE
/ 2);
2433 int numChars
= length
* 2;
2434 char[] ch
= new char[numChars
];
2435 for (int i
= 0; i
< numChars
; i
+= 2)
2437 byte d
= b
[offset
+ i
/2];
2438 ch
[i
] = HEX_CHARS
[(d
>> 4) & 0x0F];
2439 ch
[i
+1] = HEX_CHARS
[d
& 0x0F];
2441 return new String(ch
);
2445 * Convert a byte array into a hex string
2447 public static String
toHex(byte[] b
) {
2448 return toHex(b
, 0, b
.length
);
2451 private static int hexCharToNibble(char ch
) {
2452 if (ch
<= '9' && ch
>= '0') {
2454 } else if (ch
>= 'a' && ch
<= 'f') {
2455 return ch
- 'a' + 10;
2456 } else if (ch
>= 'A' && ch
<= 'F') {
2457 return ch
- 'A' + 10;
2459 throw new IllegalArgumentException("Invalid hex char: " + ch
);
2462 private static byte hexCharsToByte(char c1
, char c2
) {
2463 return (byte) ((hexCharToNibble(c1
) << 4) | hexCharToNibble(c2
));
2467 * Create a byte array from a string of hash digits. The length of the
2468 * string must be a multiple of 2
2471 public static byte[] fromHex(String hex
) {
2472 checkArgument(hex
.length() % 2 == 0, "length must be a multiple of 2");
2473 int len
= hex
.length();
2474 byte[] b
= new byte[len
/ 2];
2475 for (int i
= 0; i
< len
; i
+= 2) {
2476 b
[i
/ 2] = hexCharsToByte(hex
.charAt(i
),hex
.charAt(i
+1));
2484 * @return Index of delimiter having started from start of <code>b</code> moving rightward.
2486 public static int searchDelimiterIndex(final byte[] b
, int offset
, final int length
,
2487 final int delimiter
) {
2489 throw new IllegalArgumentException("Passed buffer is null");
2492 for (int i
= offset
; i
< length
+ offset
; i
++) {
2493 if (b
[i
] == delimiter
) {
2502 * Find index of passed delimiter walking from end of buffer backwards.
2506 * @return Index of delimiter
2508 public static int searchDelimiterIndexInReverse(final byte[] b
, final int offset
,
2509 final int length
, final int delimiter
) {
2511 throw new IllegalArgumentException("Passed buffer is null");
2514 for (int i
= (offset
+ length
) - 1; i
>= offset
; i
--) {
2515 if (b
[i
] == delimiter
) {
2523 public static int findCommonPrefix(byte[] left
, byte[] right
, int leftLength
, int rightLength
,
2524 int leftOffset
, int rightOffset
) {
2525 int length
= Math
.min(leftLength
, rightLength
);
2528 while (result
< length
&& left
[leftOffset
+ result
] == right
[rightOffset
+ result
]) {