HBASE-26567 Remove IndexType from ChunkCreator (#3947)
[hbase.git] / hbase-server / src / test / java / org / apache / hadoop / hbase / regionserver / TestSeekOptimizations.java
bloba49d2e4bc194685492db6cf1693c71f5ca92975a
1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org.apache.hadoop.hbase.regionserver;
20 import static org.junit.Assert.assertTrue;
22 import java.io.IOException;
23 import java.util.ArrayList;
24 import java.util.Arrays;
25 import java.util.Collection;
26 import java.util.Collections;
27 import java.util.HashMap;
28 import java.util.HashSet;
29 import java.util.List;
30 import java.util.Map;
31 import java.util.Random;
32 import java.util.Set;
33 import org.apache.hadoop.hbase.Cell;
34 import org.apache.hadoop.hbase.CellComparatorImpl;
35 import org.apache.hadoop.hbase.CellUtil;
36 import org.apache.hadoop.hbase.HBaseClassTestRule;
37 import org.apache.hadoop.hbase.HBaseTestingUtil;
38 import org.apache.hadoop.hbase.HConstants;
39 import org.apache.hadoop.hbase.KeyValue;
40 import org.apache.hadoop.hbase.PrivateCellUtil;
41 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
42 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
43 import org.apache.hadoop.hbase.client.Delete;
44 import org.apache.hadoop.hbase.client.Put;
45 import org.apache.hadoop.hbase.client.Scan;
46 import org.apache.hadoop.hbase.io.compress.Compression;
47 import org.apache.hadoop.hbase.testclassification.MediumTests;
48 import org.apache.hadoop.hbase.testclassification.RegionServerTests;
49 import org.apache.hadoop.hbase.util.BloomFilterUtil;
50 import org.apache.hadoop.hbase.util.Bytes;
51 import org.junit.After;
52 import org.junit.Before;
53 import org.junit.ClassRule;
54 import org.junit.Test;
55 import org.junit.experimental.categories.Category;
56 import org.junit.runner.RunWith;
57 import org.junit.runners.Parameterized;
58 import org.junit.runners.Parameterized.Parameters;
59 import org.slf4j.Logger;
60 import org.slf4j.LoggerFactory;
62 /**
63 * Test various seek optimizations for correctness and check if they are
64 * actually saving I/O operations.
66 @RunWith(Parameterized.class)
67 @Category({RegionServerTests.class, MediumTests.class})
68 public class TestSeekOptimizations {
70 @ClassRule
71 public static final HBaseClassTestRule CLASS_RULE =
72 HBaseClassTestRule.forClass(TestSeekOptimizations.class);
74 private static final Logger LOG =
75 LoggerFactory.getLogger(TestSeekOptimizations.class);
77 // Constants
78 private static final String FAMILY = "myCF";
79 private static final byte[] FAMILY_BYTES = Bytes.toBytes(FAMILY);
81 private static final int PUTS_PER_ROW_COL = 50;
82 private static final int DELETES_PER_ROW_COL = 10;
84 private static final int NUM_ROWS = 3;
85 private static final int NUM_COLS = 3;
87 private static final boolean VERBOSE = false;
89 /**
90 * Disable this when this test fails hopelessly and you need to debug a
91 * simpler case.
93 private static final boolean USE_MANY_STORE_FILES = true;
95 private static final int[][] COLUMN_SETS = new int[][] {
96 {}, // All columns
97 {0},
98 {1},
99 {0, 2},
100 {1, 2},
101 {0, 1, 2},
104 // Both start row and end row are inclusive here for the purposes of this
105 // test.
106 private static final int[][] ROW_RANGES = new int[][] {
107 {-1, -1},
108 {0, 1},
109 {1, 1},
110 {1, 2},
111 {0, 2}
114 private static final int[] MAX_VERSIONS_VALUES = new int[] { 1, 2 };
116 // Instance variables
117 private HRegion region;
118 private Put put;
119 private Delete del;
120 private Random rand;
121 private Set<Long> putTimestamps = new HashSet<>();
122 private Set<Long> delTimestamps = new HashSet<>();
123 private List<Cell> expectedKVs = new ArrayList<>();
125 private Compression.Algorithm comprAlgo;
126 private BloomType bloomType;
128 private long totalSeekDiligent, totalSeekLazy;
130 private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
132 @Parameters
133 public static final Collection<Object[]> parameters() {
134 return HBaseTestingUtil.BLOOM_AND_COMPRESSION_COMBINATIONS;
137 public TestSeekOptimizations(Compression.Algorithm comprAlgo,
138 BloomType bloomType) {
139 this.comprAlgo = comprAlgo;
140 this.bloomType = bloomType;
143 @Before
144 public void setUp() {
145 rand = new Random(91238123L);
146 expectedKVs.clear();
147 TEST_UTIL.getConfiguration().setInt(BloomFilterUtil.PREFIX_LENGTH_KEY, 10);
150 @Test
151 public void testMultipleTimestampRanges() throws IOException {
152 // enable seek counting
153 StoreFileScanner.instrument();
154 ColumnFamilyDescriptor columnFamilyDescriptor =
155 ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(FAMILY))
156 .setCompressionType(comprAlgo)
157 .setBloomFilterType(bloomType)
158 .setMaxVersions(3)
159 .build();
161 region = TEST_UTIL.createTestRegion("testMultipleTimestampRanges", columnFamilyDescriptor);
163 // Delete the given timestamp and everything before.
164 final long latestDelTS = USE_MANY_STORE_FILES ? 1397 : -1;
166 createTimestampRange(1, 50, -1);
167 createTimestampRange(51, 100, -1);
168 if (USE_MANY_STORE_FILES) {
169 createTimestampRange(100, 500, 127);
170 createTimestampRange(900, 1300, -1);
171 createTimestampRange(1301, 2500, latestDelTS);
172 createTimestampRange(2502, 2598, -1);
173 createTimestampRange(2599, 2999, -1);
176 prepareExpectedKVs(latestDelTS);
178 for (int[] columnArr : COLUMN_SETS) {
179 for (int[] rowRange : ROW_RANGES) {
180 for (int maxVersions : MAX_VERSIONS_VALUES) {
181 for (boolean lazySeekEnabled : new boolean[] { false, true }) {
182 testScan(columnArr, lazySeekEnabled, rowRange[0], rowRange[1],
183 maxVersions);
189 final double seekSavings = 1 - totalSeekLazy * 1.0 / totalSeekDiligent;
190 System.err.println("For bloom=" + bloomType + ", compr=" + comprAlgo +
191 " total seeks without optimization: " + totalSeekDiligent
192 + ", with optimization: " + totalSeekLazy + " (" +
193 String.format("%.2f%%", totalSeekLazy * 100.0 / totalSeekDiligent) +
194 "), savings: " + String.format("%.2f%%",
195 100.0 * seekSavings) + "\n");
197 // Test that lazy seeks are buying us something. Without the actual
198 // implementation of the lazy seek optimization this will be 0.
199 final double expectedSeekSavings = 0.0;
200 assertTrue("Lazy seek is only saving " +
201 String.format("%.2f%%", seekSavings * 100) + " seeks but should " +
202 "save at least " + String.format("%.2f%%", expectedSeekSavings * 100),
203 seekSavings >= expectedSeekSavings);
206 private void testScan(final int[] columnArr, final boolean lazySeekEnabled,
207 final int startRow, final int endRow, int maxVersions)
208 throws IOException {
209 StoreScanner.enableLazySeekGlobally(lazySeekEnabled);
210 final Scan scan = new Scan();
211 final Set<String> qualSet = new HashSet<>();
212 for (int iColumn : columnArr) {
213 String qualStr = getQualStr(iColumn);
214 scan.addColumn(FAMILY_BYTES, Bytes.toBytes(qualStr));
215 qualSet.add(qualStr);
217 scan.readVersions(maxVersions);
218 scan.withStartRow(rowBytes(startRow));
220 // Adjust for the fact that for multi-row queries the end row is exclusive.
221 if (startRow != endRow) {
222 scan.withStopRow(rowBytes(endRow + 1));
223 } else {
224 scan.withStopRow(rowBytes(endRow), true);
227 final long initialSeekCount = StoreFileScanner.getSeekCount();
228 final InternalScanner scanner = region.getScanner(scan);
229 final List<Cell> results = new ArrayList<>();
230 final List<Cell> actualKVs = new ArrayList<>();
232 // Such a clumsy do-while loop appears to be the official way to use an
233 // internalScanner. scanner.next() return value refers to the _next_
234 // result, not to the one already returned in results.
235 boolean hasNext;
236 do {
237 hasNext = scanner.next(results);
238 actualKVs.addAll(results);
239 results.clear();
240 } while (hasNext);
242 List<Cell> filteredKVs = filterExpectedResults(qualSet,
243 rowBytes(startRow), rowBytes(endRow), maxVersions);
244 final String rowRestrictionStr =
245 (startRow == -1 && endRow == -1) ? "all rows" : (
246 startRow == endRow ? ("row=" + startRow) : ("startRow="
247 + startRow + ", " + "endRow=" + endRow));
248 final String columnRestrictionStr =
249 columnArr.length == 0 ? "all columns"
250 : ("columns=" + Arrays.toString(columnArr));
251 final String testDesc =
252 "Bloom=" + bloomType + ", compr=" + comprAlgo + ", "
253 + (scan.isGetScan() ? "Get" : "Scan") + ": "
254 + columnRestrictionStr + ", " + rowRestrictionStr
255 + ", maxVersions=" + maxVersions + ", lazySeek=" + lazySeekEnabled;
256 long seekCount = StoreFileScanner.getSeekCount() - initialSeekCount;
257 if (VERBOSE) {
258 System.err.println("Seek count: " + seekCount + ", KVs returned: "
259 + actualKVs.size() + ". " + testDesc +
260 (lazySeekEnabled ? "\n" : ""));
262 if (lazySeekEnabled) {
263 totalSeekLazy += seekCount;
264 } else {
265 totalSeekDiligent += seekCount;
267 assertKVListsEqual(testDesc, filteredKVs, actualKVs);
270 private List<Cell> filterExpectedResults(Set<String> qualSet,
271 byte[] startRow, byte[] endRow, int maxVersions) {
272 final List<Cell> filteredKVs = new ArrayList<>();
273 final Map<String, Integer> verCount = new HashMap<>();
274 for (Cell kv : expectedKVs) {
275 if (startRow.length > 0 &&
276 Bytes.compareTo(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
277 startRow, 0, startRow.length) < 0) {
278 continue;
281 // In this unit test the end row is always inclusive.
282 if (endRow.length > 0 &&
283 Bytes.compareTo(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
284 endRow, 0, endRow.length) > 0) {
285 continue;
288 if (!qualSet.isEmpty() && (!CellUtil.matchingFamily(kv, FAMILY_BYTES)
289 || !qualSet.contains(Bytes.toString(CellUtil.cloneQualifier(kv))))) {
290 continue;
293 final String rowColStr =
294 Bytes.toStringBinary(CellUtil.cloneRow(kv)) + "/"
295 + Bytes.toStringBinary(CellUtil.cloneFamily(kv)) + ":"
296 + Bytes.toStringBinary(CellUtil.cloneQualifier(kv));
297 final Integer curNumVer = verCount.get(rowColStr);
298 final int newNumVer = curNumVer != null ? (curNumVer + 1) : 1;
299 if (newNumVer <= maxVersions) {
300 filteredKVs.add(kv);
301 verCount.put(rowColStr, newNumVer);
305 return filteredKVs;
308 private void prepareExpectedKVs(long latestDelTS) {
309 final List<Cell> filteredKVs = new ArrayList<>();
310 for (Cell kv : expectedKVs) {
311 if (kv.getTimestamp() > latestDelTS || latestDelTS == -1) {
312 filteredKVs.add(kv);
315 expectedKVs = filteredKVs;
316 Collections.sort(expectedKVs, CellComparatorImpl.COMPARATOR);
319 public void put(String qual, long ts) {
320 if (!putTimestamps.contains(ts)) {
321 put.addColumn(FAMILY_BYTES, Bytes.toBytes(qual), ts, createValue(ts));
322 putTimestamps.add(ts);
324 if (VERBOSE) {
325 LOG.info("put: row " + Bytes.toStringBinary(put.getRow())
326 + ", cf " + FAMILY + ", qualifier " + qual + ", ts " + ts);
330 private byte[] createValue(long ts) {
331 return Bytes.toBytes("value" + ts);
334 public void delAtTimestamp(String qual, long ts) {
335 del.addColumn(FAMILY_BYTES, Bytes.toBytes(qual), ts);
336 logDelete(qual, ts, "at");
339 private void logDelete(String qual, long ts, String delType) {
340 if (VERBOSE) {
341 LOG.info("del " + delType + ": row "
342 + Bytes.toStringBinary(put.getRow()) + ", cf " + FAMILY
343 + ", qualifier " + qual + ", ts " + ts);
347 private void delUpToTimestamp(String qual, long upToTS) {
348 del.addColumns(FAMILY_BYTES, Bytes.toBytes(qual), upToTS);
349 logDelete(qual, upToTS, "up to and including");
352 private long randLong(long n) {
353 long l = rand.nextLong();
354 if (l == Long.MIN_VALUE)
355 l = Long.MAX_VALUE;
356 return Math.abs(l) % n;
359 private long randBetween(long a, long b) {
360 long x = a + randLong(b - a + 1);
361 assertTrue(a <= x && x <= b);
362 return x;
365 private final String rowStr(int i) {
366 return ("row" + i).intern();
369 private final byte[] rowBytes(int i) {
370 if (i == -1) {
371 return HConstants.EMPTY_BYTE_ARRAY;
373 return Bytes.toBytes(rowStr(i));
376 private final String getQualStr(int i) {
377 return ("qual" + i).intern();
380 public void createTimestampRange(long minTS, long maxTS,
381 long deleteUpToTS) throws IOException {
382 assertTrue(minTS < maxTS);
383 assertTrue(deleteUpToTS == -1
384 || (minTS <= deleteUpToTS && deleteUpToTS <= maxTS));
386 for (int iRow = 0; iRow < NUM_ROWS; ++iRow) {
387 final String row = rowStr(iRow);
388 final byte[] rowBytes = Bytes.toBytes(row);
389 for (int iCol = 0; iCol < NUM_COLS; ++iCol) {
390 final String qual = getQualStr(iCol);
391 final byte[] qualBytes = Bytes.toBytes(qual);
392 put = new Put(rowBytes);
394 putTimestamps.clear();
395 put(qual, minTS);
396 put(qual, maxTS);
397 for (int i = 0; i < PUTS_PER_ROW_COL; ++i) {
398 put(qual, randBetween(minTS, maxTS));
401 long[] putTimestampList = new long[putTimestamps.size()];
403 int i = 0;
404 for (long ts : putTimestamps) {
405 putTimestampList[i++] = ts;
409 // Delete a predetermined number of particular timestamps
410 delTimestamps.clear();
411 assertTrue(putTimestampList.length >= DELETES_PER_ROW_COL);
412 int numToDel = DELETES_PER_ROW_COL;
413 int tsRemaining = putTimestampList.length;
414 del = new Delete(rowBytes);
415 for (long ts : putTimestampList) {
416 if (rand.nextInt(tsRemaining) < numToDel) {
417 delAtTimestamp(qual, ts);
418 putTimestamps.remove(ts);
419 --numToDel;
422 if (--tsRemaining == 0) {
423 break;
427 // Another type of delete: everything up to the given timestamp.
428 if (deleteUpToTS != -1) {
429 delUpToTimestamp(qual, deleteUpToTS);
432 region.put(put);
433 if (!del.isEmpty()) {
434 region.delete(del);
437 // Add remaining timestamps (those we have not deleted) to expected
438 // results
439 for (long ts : putTimestamps) {
440 expectedKVs.add(new KeyValue(rowBytes, FAMILY_BYTES, qualBytes, ts,
441 KeyValue.Type.Put));
446 region.flush(true);
449 @After
450 public void tearDown() throws IOException {
451 if (region != null) {
452 HBaseTestingUtil.closeRegionAndWAL(region);
455 // We have to re-set the lazy seek flag back to the default so that other
456 // unit tests are not affected.
457 StoreScanner.enableLazySeekGlobally(
458 StoreScanner.LAZY_SEEK_ENABLED_BY_DEFAULT);
462 public void assertKVListsEqual(String additionalMsg,
463 final List<? extends Cell> expected,
464 final List<? extends Cell> actual) {
465 final int eLen = expected.size();
466 final int aLen = actual.size();
467 final int minLen = Math.min(eLen, aLen);
469 int i;
470 for (i = 0; i < minLen
471 && PrivateCellUtil.compareKeyIgnoresMvcc(CellComparatorImpl.COMPARATOR, expected.get(i),
472 actual.get(i)) == 0; ++i) {
475 if (additionalMsg == null) {
476 additionalMsg = "";
478 if (!additionalMsg.isEmpty()) {
479 additionalMsg = ". " + additionalMsg;
482 if (eLen != aLen || i != minLen) {
483 throw new AssertionError(
484 "Expected and actual KV arrays differ at position " + i + ": " +
485 HBaseTestingUtil.safeGetAsStr(expected, i) + " (length " + eLen +") vs. " +
486 HBaseTestingUtil.safeGetAsStr(actual, i) + " (length " + aLen + ")" + additionalMsg);