2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org
.apache
.hadoop
.hbase
;
20 import java
.util
.Comparator
;
22 import org
.apache
.hadoop
.hbase
.util
.ByteBufferUtils
;
23 import org
.apache
.yetus
.audience
.InterfaceAudience
;
24 import org
.slf4j
.Logger
;
25 import org
.slf4j
.LoggerFactory
;
27 import org
.apache
.hbase
.thirdparty
.com
.google
.common
.primitives
.Longs
;
30 * A comparator for case where {@link ByteBufferKeyValue} is prevalent type (BBKV
31 * is base-type in hbase2). Takes a general comparator as fallback in case types are NOT the
32 * expected ByteBufferKeyValue.
34 * <p>This is a tricked-out Comparator at heart of hbase read and write. It is in
35 * the HOT path so we try all sorts of ugly stuff so we can go faster. See below
36 * in this javadoc comment for the list.
38 * <p>Apply this comparator narrowly so it is fed exclusively ByteBufferKeyValues
39 * as much as is possible so JIT can settle (e.g. make one per ConcurrentSkipListMap
42 * <p>Exploits specially added methods in BBKV to save on deserializations of shorts,
43 * longs, etc: i.e. calculating the family length requires row length; pass it in
44 * rather than recalculate it, and so on.
46 * <p>This comparator does static dispatch to private final methods so hotspot is comfortable
49 * <p>Measurement has it that we almost have it so all inlines from memstore
50 * ConcurrentSkipListMap on down to the (unsafe) intrinisics that do byte compare
51 * and deserialize shorts and ints; needs a bit more work.
53 * <p>Does not take a Type to compare: i.e. it is not a Comparator<Cell> or
54 * CellComparator<Cell> or Comparator<ByteBufferKeyValue> because that adds
55 * another method to the hierarchy -- from compare(Object, Object)
56 * to dynamic compare(Cell, Cell) to static private compare -- and inlining doesn't happen if
57 * hierarchy is too deep (it is the case here).
59 * <p>Be careful making changes. Compare perf before and after and look at what
60 * hotspot ends up generating before committing change (jitwatch is helpful here).
61 * Changing this one class doubled write throughput (HBASE-20483).
63 @InterfaceAudience.Private
64 public class BBKVComparator
implements Comparator
{
65 protected static final Logger LOG
= LoggerFactory
.getLogger(BBKVComparator
.class);
66 private final Comparator fallback
;
68 public BBKVComparator(Comparator fallback
) {
69 this.fallback
= fallback
;
73 public int compare(Object l
, Object r
) {
74 if ((l
instanceof ByteBufferKeyValue
) && (r
instanceof ByteBufferKeyValue
)) {
75 return compare((ByteBufferKeyValue
)l
, (ByteBufferKeyValue
)r
, false);
77 // Skip calling compare(Object, Object) and go direct to compare(Cell, Cell)
78 return this.fallback
.compare((Cell
)l
, (Cell
)r
);
81 // TODO: Come back here. We get a few percentage points extra of throughput if this is a
83 static int compare(ByteBufferKeyValue left
, ByteBufferKeyValue right
,
84 boolean ignoreSequenceid
) {
85 // NOTE: Same method is in CellComparatorImpl, also private, not shared, intentionally. Not
86 // sharing gets us a few percent more throughput in compares. If changes here or there, make
87 // sure done in both places.
89 // Compare Rows. Cache row length.
90 int leftRowLength
= left
.getRowLength();
91 int rightRowLength
= right
.getRowLength();
92 int diff
= ByteBufferUtils
.compareTo(left
.getRowByteBuffer(), left
.getRowPosition(),
94 right
.getRowByteBuffer(), right
.getRowPosition(), rightRowLength
);
99 // If the column is not specified, the "minimum" key type appears as latest in the sorted
100 // order, regardless of the timestamp. This is used for specifying the last key/value in a
101 // given row, because there is no "lexicographically last column" (it would be infinitely long).
102 // The "maximum" key type does not need this behavior. Copied from KeyValue. This is bad in that
103 // we can't do memcmp w/ special rules like this.
104 // TODO: Is there a test for this behavior?
105 int leftFamilyLengthPosition
= left
.getFamilyLengthPosition(leftRowLength
);
106 int leftFamilyLength
= left
.getFamilyLength(leftFamilyLengthPosition
);
107 int leftKeyLength
= left
.getKeyLength();
108 int leftQualifierLength
= left
.getQualifierLength(leftKeyLength
, leftRowLength
,
111 // No need of left row length below here.
113 byte leftType
= left
.getTypeByte(leftKeyLength
);
114 if (leftFamilyLength
+ leftQualifierLength
== 0 &&
115 leftType
== KeyValue
.Type
.Minimum
.getCode()) {
116 // left is "bigger", i.e. it appears later in the sorted order
120 int rightFamilyLengthPosition
= right
.getFamilyLengthPosition(rightRowLength
);
121 int rightFamilyLength
= right
.getFamilyLength(rightFamilyLengthPosition
);
122 int rightKeyLength
= right
.getKeyLength();
123 int rightQualifierLength
= right
.getQualifierLength(rightKeyLength
, rightRowLength
,
126 // No need of right row length below here.
128 byte rightType
= right
.getTypeByte(rightKeyLength
);
129 if (rightFamilyLength
+ rightQualifierLength
== 0 &&
130 rightType
== KeyValue
.Type
.Minimum
.getCode()) {
135 int leftFamilyPosition
= left
.getFamilyPosition(leftFamilyLengthPosition
);
136 int rightFamilyPosition
= right
.getFamilyPosition(rightFamilyLengthPosition
);
137 diff
= ByteBufferUtils
.compareTo(left
.getFamilyByteBuffer(), leftFamilyPosition
,
139 right
.getFamilyByteBuffer(), rightFamilyPosition
, rightFamilyLength
);
144 // Compare qualifiers
145 diff
= ByteBufferUtils
.compareTo(left
.getQualifierByteBuffer(),
146 left
.getQualifierPosition(leftFamilyPosition
, leftFamilyLength
), leftQualifierLength
,
147 right
.getQualifierByteBuffer(),
148 right
.getQualifierPosition(rightFamilyPosition
, rightFamilyLength
),
149 rightQualifierLength
);
155 // Swap order we pass into compare so we get DESCENDING order.
156 diff
= Long
.compare(right
.getTimestamp(rightKeyLength
), left
.getTimestamp(leftKeyLength
));
161 // Compare types. Let the delete types sort ahead of puts; i.e. types
162 // of higher numbers sort before those of lesser numbers. Maximum (255)
163 // appears ahead of everything, and minimum (0) appears after
165 diff
= (0xff & rightType
) - (0xff & leftType
);
170 // Negate following comparisons so later edits show up first mvccVersion: later sorts first
171 return ignoreSequenceid ? diff
: Longs
.compare(right
.getSequenceId(), left
.getSequenceId());