2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org
.apache
.hadoop
.hbase
.regionserver
;
20 import static org
.junit
.Assert
.assertTrue
;
22 import java
.io
.IOException
;
23 import java
.util
.ArrayList
;
24 import java
.util
.Arrays
;
25 import java
.util
.Collection
;
26 import java
.util
.Collections
;
27 import java
.util
.HashMap
;
28 import java
.util
.HashSet
;
29 import java
.util
.List
;
31 import java
.util
.Random
;
33 import org
.apache
.hadoop
.hbase
.Cell
;
34 import org
.apache
.hadoop
.hbase
.CellComparatorImpl
;
35 import org
.apache
.hadoop
.hbase
.CellUtil
;
36 import org
.apache
.hadoop
.hbase
.HBaseClassTestRule
;
37 import org
.apache
.hadoop
.hbase
.HBaseTestingUtil
;
38 import org
.apache
.hadoop
.hbase
.HConstants
;
39 import org
.apache
.hadoop
.hbase
.KeyValue
;
40 import org
.apache
.hadoop
.hbase
.PrivateCellUtil
;
41 import org
.apache
.hadoop
.hbase
.client
.ColumnFamilyDescriptor
;
42 import org
.apache
.hadoop
.hbase
.client
.ColumnFamilyDescriptorBuilder
;
43 import org
.apache
.hadoop
.hbase
.client
.Delete
;
44 import org
.apache
.hadoop
.hbase
.client
.Put
;
45 import org
.apache
.hadoop
.hbase
.client
.Scan
;
46 import org
.apache
.hadoop
.hbase
.io
.compress
.Compression
;
47 import org
.apache
.hadoop
.hbase
.testclassification
.MediumTests
;
48 import org
.apache
.hadoop
.hbase
.testclassification
.RegionServerTests
;
49 import org
.apache
.hadoop
.hbase
.util
.BloomFilterUtil
;
50 import org
.apache
.hadoop
.hbase
.util
.Bytes
;
51 import org
.junit
.After
;
52 import org
.junit
.Before
;
53 import org
.junit
.ClassRule
;
54 import org
.junit
.Test
;
55 import org
.junit
.experimental
.categories
.Category
;
56 import org
.junit
.runner
.RunWith
;
57 import org
.junit
.runners
.Parameterized
;
58 import org
.junit
.runners
.Parameterized
.Parameters
;
59 import org
.slf4j
.Logger
;
60 import org
.slf4j
.LoggerFactory
;
63 * Test various seek optimizations for correctness and check if they are
64 * actually saving I/O operations.
66 @RunWith(Parameterized
.class)
67 @Category({RegionServerTests
.class, MediumTests
.class})
68 public class TestSeekOptimizations
{
71 public static final HBaseClassTestRule CLASS_RULE
=
72 HBaseClassTestRule
.forClass(TestSeekOptimizations
.class);
74 private static final Logger LOG
=
75 LoggerFactory
.getLogger(TestSeekOptimizations
.class);
78 private static final String FAMILY
= "myCF";
79 private static final byte[] FAMILY_BYTES
= Bytes
.toBytes(FAMILY
);
81 private static final int PUTS_PER_ROW_COL
= 50;
82 private static final int DELETES_PER_ROW_COL
= 10;
84 private static final int NUM_ROWS
= 3;
85 private static final int NUM_COLS
= 3;
87 private static final boolean VERBOSE
= false;
90 * Disable this when this test fails hopelessly and you need to debug a
93 private static final boolean USE_MANY_STORE_FILES
= true;
95 private static final int[][] COLUMN_SETS
= new int[][] {
104 // Both start row and end row are inclusive here for the purposes of this
106 private static final int[][] ROW_RANGES
= new int[][] {
114 private static final int[] MAX_VERSIONS_VALUES
= new int[] { 1, 2 };
116 // Instance variables
117 private HRegion region
;
121 private Set
<Long
> putTimestamps
= new HashSet
<>();
122 private Set
<Long
> delTimestamps
= new HashSet
<>();
123 private List
<Cell
> expectedKVs
= new ArrayList
<>();
125 private Compression
.Algorithm comprAlgo
;
126 private BloomType bloomType
;
128 private long totalSeekDiligent
, totalSeekLazy
;
130 private final static HBaseTestingUtil TEST_UTIL
= new HBaseTestingUtil();
133 public static final Collection
<Object
[]> parameters() {
134 return HBaseTestingUtil
.BLOOM_AND_COMPRESSION_COMBINATIONS
;
137 public TestSeekOptimizations(Compression
.Algorithm comprAlgo
,
138 BloomType bloomType
) {
139 this.comprAlgo
= comprAlgo
;
140 this.bloomType
= bloomType
;
144 public void setUp() {
145 rand
= new Random(91238123L);
147 TEST_UTIL
.getConfiguration().setInt(BloomFilterUtil
.PREFIX_LENGTH_KEY
, 10);
151 public void testMultipleTimestampRanges() throws IOException
{
152 // enable seek counting
153 StoreFileScanner
.instrument();
154 ColumnFamilyDescriptor columnFamilyDescriptor
=
155 ColumnFamilyDescriptorBuilder
.newBuilder(Bytes
.toBytes(FAMILY
))
156 .setCompressionType(comprAlgo
)
157 .setBloomFilterType(bloomType
)
161 region
= TEST_UTIL
.createTestRegion("testMultipleTimestampRanges", columnFamilyDescriptor
);
163 // Delete the given timestamp and everything before.
164 final long latestDelTS
= USE_MANY_STORE_FILES ?
1397 : -1;
166 createTimestampRange(1, 50, -1);
167 createTimestampRange(51, 100, -1);
168 if (USE_MANY_STORE_FILES
) {
169 createTimestampRange(100, 500, 127);
170 createTimestampRange(900, 1300, -1);
171 createTimestampRange(1301, 2500, latestDelTS
);
172 createTimestampRange(2502, 2598, -1);
173 createTimestampRange(2599, 2999, -1);
176 prepareExpectedKVs(latestDelTS
);
178 for (int[] columnArr
: COLUMN_SETS
) {
179 for (int[] rowRange
: ROW_RANGES
) {
180 for (int maxVersions
: MAX_VERSIONS_VALUES
) {
181 for (boolean lazySeekEnabled
: new boolean[] { false, true }) {
182 testScan(columnArr
, lazySeekEnabled
, rowRange
[0], rowRange
[1],
189 final double seekSavings
= 1 - totalSeekLazy
* 1.0 / totalSeekDiligent
;
190 System
.err
.println("For bloom=" + bloomType
+ ", compr=" + comprAlgo
+
191 " total seeks without optimization: " + totalSeekDiligent
192 + ", with optimization: " + totalSeekLazy
+ " (" +
193 String
.format("%.2f%%", totalSeekLazy
* 100.0 / totalSeekDiligent
) +
194 "), savings: " + String
.format("%.2f%%",
195 100.0 * seekSavings
) + "\n");
197 // Test that lazy seeks are buying us something. Without the actual
198 // implementation of the lazy seek optimization this will be 0.
199 final double expectedSeekSavings
= 0.0;
200 assertTrue("Lazy seek is only saving " +
201 String
.format("%.2f%%", seekSavings
* 100) + " seeks but should " +
202 "save at least " + String
.format("%.2f%%", expectedSeekSavings
* 100),
203 seekSavings
>= expectedSeekSavings
);
206 private void testScan(final int[] columnArr
, final boolean lazySeekEnabled
,
207 final int startRow
, final int endRow
, int maxVersions
)
209 StoreScanner
.enableLazySeekGlobally(lazySeekEnabled
);
210 final Scan scan
= new Scan();
211 final Set
<String
> qualSet
= new HashSet
<>();
212 for (int iColumn
: columnArr
) {
213 String qualStr
= getQualStr(iColumn
);
214 scan
.addColumn(FAMILY_BYTES
, Bytes
.toBytes(qualStr
));
215 qualSet
.add(qualStr
);
217 scan
.readVersions(maxVersions
);
218 scan
.withStartRow(rowBytes(startRow
));
220 // Adjust for the fact that for multi-row queries the end row is exclusive.
221 if (startRow
!= endRow
) {
222 scan
.withStopRow(rowBytes(endRow
+ 1));
224 scan
.withStopRow(rowBytes(endRow
), true);
227 final long initialSeekCount
= StoreFileScanner
.getSeekCount();
228 final InternalScanner scanner
= region
.getScanner(scan
);
229 final List
<Cell
> results
= new ArrayList
<>();
230 final List
<Cell
> actualKVs
= new ArrayList
<>();
232 // Such a clumsy do-while loop appears to be the official way to use an
233 // internalScanner. scanner.next() return value refers to the _next_
234 // result, not to the one already returned in results.
237 hasNext
= scanner
.next(results
);
238 actualKVs
.addAll(results
);
242 List
<Cell
> filteredKVs
= filterExpectedResults(qualSet
,
243 rowBytes(startRow
), rowBytes(endRow
), maxVersions
);
244 final String rowRestrictionStr
=
245 (startRow
== -1 && endRow
== -1) ?
"all rows" : (
246 startRow
== endRow ?
("row=" + startRow
) : ("startRow="
247 + startRow
+ ", " + "endRow=" + endRow
));
248 final String columnRestrictionStr
=
249 columnArr
.length
== 0 ?
"all columns"
250 : ("columns=" + Arrays
.toString(columnArr
));
251 final String testDesc
=
252 "Bloom=" + bloomType
+ ", compr=" + comprAlgo
+ ", "
253 + (scan
.isGetScan() ?
"Get" : "Scan") + ": "
254 + columnRestrictionStr
+ ", " + rowRestrictionStr
255 + ", maxVersions=" + maxVersions
+ ", lazySeek=" + lazySeekEnabled
;
256 long seekCount
= StoreFileScanner
.getSeekCount() - initialSeekCount
;
258 System
.err
.println("Seek count: " + seekCount
+ ", KVs returned: "
259 + actualKVs
.size() + ". " + testDesc
+
260 (lazySeekEnabled ?
"\n" : ""));
262 if (lazySeekEnabled
) {
263 totalSeekLazy
+= seekCount
;
265 totalSeekDiligent
+= seekCount
;
267 assertKVListsEqual(testDesc
, filteredKVs
, actualKVs
);
270 private List
<Cell
> filterExpectedResults(Set
<String
> qualSet
,
271 byte[] startRow
, byte[] endRow
, int maxVersions
) {
272 final List
<Cell
> filteredKVs
= new ArrayList
<>();
273 final Map
<String
, Integer
> verCount
= new HashMap
<>();
274 for (Cell kv
: expectedKVs
) {
275 if (startRow
.length
> 0 &&
276 Bytes
.compareTo(kv
.getRowArray(), kv
.getRowOffset(), kv
.getRowLength(),
277 startRow
, 0, startRow
.length
) < 0) {
281 // In this unit test the end row is always inclusive.
282 if (endRow
.length
> 0 &&
283 Bytes
.compareTo(kv
.getRowArray(), kv
.getRowOffset(), kv
.getRowLength(),
284 endRow
, 0, endRow
.length
) > 0) {
288 if (!qualSet
.isEmpty() && (!CellUtil
.matchingFamily(kv
, FAMILY_BYTES
)
289 || !qualSet
.contains(Bytes
.toString(CellUtil
.cloneQualifier(kv
))))) {
293 final String rowColStr
=
294 Bytes
.toStringBinary(CellUtil
.cloneRow(kv
)) + "/"
295 + Bytes
.toStringBinary(CellUtil
.cloneFamily(kv
)) + ":"
296 + Bytes
.toStringBinary(CellUtil
.cloneQualifier(kv
));
297 final Integer curNumVer
= verCount
.get(rowColStr
);
298 final int newNumVer
= curNumVer
!= null ?
(curNumVer
+ 1) : 1;
299 if (newNumVer
<= maxVersions
) {
301 verCount
.put(rowColStr
, newNumVer
);
308 private void prepareExpectedKVs(long latestDelTS
) {
309 final List
<Cell
> filteredKVs
= new ArrayList
<>();
310 for (Cell kv
: expectedKVs
) {
311 if (kv
.getTimestamp() > latestDelTS
|| latestDelTS
== -1) {
315 expectedKVs
= filteredKVs
;
316 Collections
.sort(expectedKVs
, CellComparatorImpl
.COMPARATOR
);
319 public void put(String qual
, long ts
) {
320 if (!putTimestamps
.contains(ts
)) {
321 put
.addColumn(FAMILY_BYTES
, Bytes
.toBytes(qual
), ts
, createValue(ts
));
322 putTimestamps
.add(ts
);
325 LOG
.info("put: row " + Bytes
.toStringBinary(put
.getRow())
326 + ", cf " + FAMILY
+ ", qualifier " + qual
+ ", ts " + ts
);
330 private byte[] createValue(long ts
) {
331 return Bytes
.toBytes("value" + ts
);
334 public void delAtTimestamp(String qual
, long ts
) {
335 del
.addColumn(FAMILY_BYTES
, Bytes
.toBytes(qual
), ts
);
336 logDelete(qual
, ts
, "at");
339 private void logDelete(String qual
, long ts
, String delType
) {
341 LOG
.info("del " + delType
+ ": row "
342 + Bytes
.toStringBinary(put
.getRow()) + ", cf " + FAMILY
343 + ", qualifier " + qual
+ ", ts " + ts
);
347 private void delUpToTimestamp(String qual
, long upToTS
) {
348 del
.addColumns(FAMILY_BYTES
, Bytes
.toBytes(qual
), upToTS
);
349 logDelete(qual
, upToTS
, "up to and including");
352 private long randLong(long n
) {
353 long l
= rand
.nextLong();
354 if (l
== Long
.MIN_VALUE
)
356 return Math
.abs(l
) % n
;
359 private long randBetween(long a
, long b
) {
360 long x
= a
+ randLong(b
- a
+ 1);
361 assertTrue(a
<= x
&& x
<= b
);
365 private final String
rowStr(int i
) {
366 return ("row" + i
).intern();
369 private final byte[] rowBytes(int i
) {
371 return HConstants
.EMPTY_BYTE_ARRAY
;
373 return Bytes
.toBytes(rowStr(i
));
376 private final String
getQualStr(int i
) {
377 return ("qual" + i
).intern();
380 public void createTimestampRange(long minTS
, long maxTS
,
381 long deleteUpToTS
) throws IOException
{
382 assertTrue(minTS
< maxTS
);
383 assertTrue(deleteUpToTS
== -1
384 || (minTS
<= deleteUpToTS
&& deleteUpToTS
<= maxTS
));
386 for (int iRow
= 0; iRow
< NUM_ROWS
; ++iRow
) {
387 final String row
= rowStr(iRow
);
388 final byte[] rowBytes
= Bytes
.toBytes(row
);
389 for (int iCol
= 0; iCol
< NUM_COLS
; ++iCol
) {
390 final String qual
= getQualStr(iCol
);
391 final byte[] qualBytes
= Bytes
.toBytes(qual
);
392 put
= new Put(rowBytes
);
394 putTimestamps
.clear();
397 for (int i
= 0; i
< PUTS_PER_ROW_COL
; ++i
) {
398 put(qual
, randBetween(minTS
, maxTS
));
401 long[] putTimestampList
= new long[putTimestamps
.size()];
404 for (long ts
: putTimestamps
) {
405 putTimestampList
[i
++] = ts
;
409 // Delete a predetermined number of particular timestamps
410 delTimestamps
.clear();
411 assertTrue(putTimestampList
.length
>= DELETES_PER_ROW_COL
);
412 int numToDel
= DELETES_PER_ROW_COL
;
413 int tsRemaining
= putTimestampList
.length
;
414 del
= new Delete(rowBytes
);
415 for (long ts
: putTimestampList
) {
416 if (rand
.nextInt(tsRemaining
) < numToDel
) {
417 delAtTimestamp(qual
, ts
);
418 putTimestamps
.remove(ts
);
422 if (--tsRemaining
== 0) {
427 // Another type of delete: everything up to the given timestamp.
428 if (deleteUpToTS
!= -1) {
429 delUpToTimestamp(qual
, deleteUpToTS
);
433 if (!del
.isEmpty()) {
437 // Add remaining timestamps (those we have not deleted) to expected
439 for (long ts
: putTimestamps
) {
440 expectedKVs
.add(new KeyValue(rowBytes
, FAMILY_BYTES
, qualBytes
, ts
,
450 public void tearDown() throws IOException
{
451 if (region
!= null) {
452 HBaseTestingUtil
.closeRegionAndWAL(region
);
455 // We have to re-set the lazy seek flag back to the default so that other
456 // unit tests are not affected.
457 StoreScanner
.enableLazySeekGlobally(
458 StoreScanner
.LAZY_SEEK_ENABLED_BY_DEFAULT
);
462 public void assertKVListsEqual(String additionalMsg
,
463 final List
<?
extends Cell
> expected
,
464 final List
<?
extends Cell
> actual
) {
465 final int eLen
= expected
.size();
466 final int aLen
= actual
.size();
467 final int minLen
= Math
.min(eLen
, aLen
);
470 for (i
= 0; i
< minLen
471 && PrivateCellUtil
.compareKeyIgnoresMvcc(CellComparatorImpl
.COMPARATOR
, expected
.get(i
),
472 actual
.get(i
)) == 0; ++i
) {
475 if (additionalMsg
== null) {
478 if (!additionalMsg
.isEmpty()) {
479 additionalMsg
= ". " + additionalMsg
;
482 if (eLen
!= aLen
|| i
!= minLen
) {
483 throw new AssertionError(
484 "Expected and actual KV arrays differ at position " + i
+ ": " +
485 HBaseTestingUtil
.safeGetAsStr(expected
, i
) + " (length " + eLen
+") vs. " +
486 HBaseTestingUtil
.safeGetAsStr(actual
, i
) + " (length " + aLen
+ ")" + additionalMsg
);