2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org
.apache
.hadoop
.hbase
.regionserver
;
20 import static org
.junit
.Assert
.assertEquals
;
21 import static org
.junit
.Assert
.assertFalse
;
22 import static org
.junit
.Assert
.assertTrue
;
24 import java
.io
.IOException
;
25 import java
.util
.ArrayList
;
26 import java
.util
.Collection
;
27 import java
.util
.Collections
;
28 import java
.util
.HashMap
;
29 import java
.util
.HashSet
;
30 import java
.util
.List
;
32 import java
.util
.Random
;
34 import java
.util
.TreeSet
;
35 import org
.apache
.hadoop
.hbase
.Cell
;
36 import org
.apache
.hadoop
.hbase
.CellComparatorImpl
;
37 import org
.apache
.hadoop
.hbase
.CellUtil
;
38 import org
.apache
.hadoop
.hbase
.HBaseTestingUtil
;
39 import org
.apache
.hadoop
.hbase
.KeyValue
;
40 import org
.apache
.hadoop
.hbase
.KeyValueTestUtil
;
41 import org
.apache
.hadoop
.hbase
.PrivateCellUtil
;
42 import org
.apache
.hadoop
.hbase
.client
.ColumnFamilyDescriptorBuilder
;
43 import org
.apache
.hadoop
.hbase
.client
.Delete
;
44 import org
.apache
.hadoop
.hbase
.client
.Put
;
45 import org
.apache
.hadoop
.hbase
.client
.Scan
;
46 import org
.apache
.hadoop
.hbase
.io
.compress
.Compression
;
47 import org
.apache
.hadoop
.hbase
.io
.encoding
.DataBlockEncoding
;
48 import org
.apache
.hadoop
.hbase
.io
.hfile
.BlockCacheFactory
;
49 import org
.apache
.hadoop
.hbase
.util
.BloomFilterUtil
;
50 import org
.apache
.hadoop
.hbase
.util
.Bytes
;
51 import org
.junit
.Test
;
52 import org
.junit
.runners
.Parameterized
.Parameter
;
53 import org
.slf4j
.Logger
;
54 import org
.slf4j
.LoggerFactory
;
57 * Tests optimized scanning of multiple columns. <br>
58 * We separated the big test into several sub-class UT, because When in ROWCOL bloom type, we will
59 * test the row-col bloom filter frequently for saving HDFS seek once we switch from one column to
60 * another in our UT. It's cpu time consuming (~45s for each case), so moved the ROWCOL case into a
61 * separated LargeTests to avoid timeout failure. <br>
63 * To be clear: In TestMultiColumnScanner, we will flush 10 (NUM_FLUSHES=10) HFiles here, and the
64 * table will put ~1000 cells (rows=20, ts=6, qualifiers=8, total=20*6*8 ~ 1000) . Each full table
65 * scan will check the ROWCOL bloom filter 20 (rows)* 8 (column) * 10 (hfiles)= 1600 times, beside
66 * it will scan the full table 6*2^8=1536 times, so finally will have 1600*1536=2457600 bloom filter
67 * testing. (See HBASE-21520)
69 public abstract class TestMultiColumnScanner
{
71 private static final Logger LOG
= LoggerFactory
.getLogger(TestMultiColumnScanner
.class);
73 private static final String TABLE_NAME
=
74 TestMultiColumnScanner
.class.getSimpleName();
76 static final int MAX_VERSIONS
= 50;
78 private static final String FAMILY
= "CF";
79 private static final byte[] FAMILY_BYTES
= Bytes
.toBytes(FAMILY
);
82 * The size of the column qualifier set used. Increasing this parameter
83 * exponentially increases test time.
85 private static final int NUM_COLUMNS
= 8;
87 private static final int MAX_COLUMN_BIT_MASK
= 1 << NUM_COLUMNS
- 1;
88 private static final int NUM_FLUSHES
= 10;
89 private static final int NUM_ROWS
= 20;
91 /** A large value of type long for use as a timestamp */
92 private static final long BIG_LONG
= 9111222333444555666L;
95 * Timestamps to test with. Cannot use {@link Long#MAX_VALUE} here, because
96 * it will be replaced by an timestamp auto-generated based on the time.
98 private static final long[] TIMESTAMPS
= new long[] { 1, 3, 5,
99 Integer
.MAX_VALUE
, BIG_LONG
, Long
.MAX_VALUE
- 1 };
101 /** The probability that a column is skipped in a store file. */
102 private static final double COLUMN_SKIP_IN_STORE_FILE_PROB
= 0.7;
104 /** The probability to delete a row/column pair */
105 private static final double DELETE_PROBABILITY
= 0.02;
107 private final static HBaseTestingUtil TEST_UTIL
= new HBaseTestingUtil();
110 public Compression
.Algorithm comprAlgo
;
113 public BloomType bloomType
;
116 public DataBlockEncoding dataBlockEncoding
;
118 // Some static sanity-checking.
120 assertTrue(BIG_LONG
> 0.9 * Long
.MAX_VALUE
); // Guard against typos.
122 // Ensure TIMESTAMPS are sorted.
123 for (int i
= 0; i
< TIMESTAMPS
.length
- 1; ++i
)
124 assertTrue(TIMESTAMPS
[i
] < TIMESTAMPS
[i
+ 1]);
127 public static Collection
<Object
[]> generateParams(Compression
.Algorithm algo
,
128 boolean useDataBlockEncoding
) {
129 List
<Object
[]> parameters
= new ArrayList
<>();
130 for (BloomType bloomType
: BloomType
.values()) {
131 DataBlockEncoding dataBlockEncoding
=
132 useDataBlockEncoding ? DataBlockEncoding
.PREFIX
: DataBlockEncoding
.NONE
;
133 parameters
.add(new Object
[] { algo
, bloomType
, dataBlockEncoding
});
139 public void testMultiColumnScanner() throws IOException
{
140 TEST_UTIL
.getConfiguration().setInt(BloomFilterUtil
.PREFIX_LENGTH_KEY
, 10);
141 HRegion region
= TEST_UTIL
.createTestRegion(TABLE_NAME
,
142 ColumnFamilyDescriptorBuilder
.newBuilder(FAMILY_BYTES
).setCompressionType(comprAlgo
)
143 .setBloomFilterType(bloomType
).setMaxVersions(MAX_VERSIONS
)
144 .setDataBlockEncoding(dataBlockEncoding
).build(),
145 BlockCacheFactory
.createBlockCache(TEST_UTIL
.getConfiguration()));
146 List
<String
> rows
= sequentialStrings("row", NUM_ROWS
);
147 List
<String
> qualifiers
= sequentialStrings("qual", NUM_COLUMNS
);
148 List
<KeyValue
> kvs
= new ArrayList
<>();
149 Set
<String
> keySet
= new HashSet
<>();
151 // A map from <row>_<qualifier> to the most recent delete timestamp for
153 Map
<String
, Long
> lastDelTimeMap
= new HashMap
<>();
155 Random rand
= new Random(29372937L);
157 for (int iFlush
= 0; iFlush
< NUM_FLUSHES
; ++iFlush
) {
158 for (String qual
: qualifiers
) {
159 // This is where we decide to include or not include this column into
160 // this store file, regardless of row and timestamp.
161 if (rand
.nextDouble() < COLUMN_SKIP_IN_STORE_FILE_PROB
)
164 byte[] qualBytes
= Bytes
.toBytes(qual
);
165 for (String row
: rows
) {
166 Put p
= new Put(Bytes
.toBytes(row
));
167 for (long ts
: TIMESTAMPS
) {
168 String value
= createValue(row
, qual
, ts
);
169 KeyValue kv
= KeyValueTestUtil
.create(row
, FAMILY
, qual
, ts
,
171 assertEquals(kv
.getTimestamp(), ts
);
173 String keyAsString
= kv
.toString();
174 if (!keySet
.contains(keyAsString
)) {
175 keySet
.add(keyAsString
);
181 Delete d
= new Delete(Bytes
.toBytes(row
));
182 boolean deletedSomething
= false;
183 for (long ts
: TIMESTAMPS
)
184 if (rand
.nextDouble() < DELETE_PROBABILITY
) {
185 d
.addColumns(FAMILY_BYTES
, qualBytes
, ts
);
186 String rowAndQual
= row
+ "_" + qual
;
187 Long whenDeleted
= lastDelTimeMap
.get(rowAndQual
);
188 lastDelTimeMap
.put(rowAndQual
, whenDeleted
== null ? ts
189 : Math
.max(ts
, whenDeleted
));
190 deletedSomething
= true;
192 if (deletedSomething
)
199 Collections
.sort(kvs
, CellComparatorImpl
.COMPARATOR
);
200 for (int maxVersions
= 1; maxVersions
<= TIMESTAMPS
.length
; ++maxVersions
) {
201 for (int columnBitMask
= 1; columnBitMask
<= MAX_COLUMN_BIT_MASK
; ++columnBitMask
) {
202 Scan scan
= new Scan();
203 scan
.readVersions(maxVersions
);
204 Set
<String
> qualSet
= new TreeSet
<>();
206 int columnMaskTmp
= columnBitMask
;
207 for (String qual
: qualifiers
) {
208 if ((columnMaskTmp
& 1) != 0) {
209 scan
.addColumn(FAMILY_BYTES
, Bytes
.toBytes(qual
));
214 assertEquals(0, columnMaskTmp
);
217 InternalScanner scanner
= region
.getScanner(scan
);
218 List
<Cell
> results
= new ArrayList
<>();
222 String queryInfo
= "columns queried: " + qualSet
+ " (columnBitMask="
223 + columnBitMask
+ "), maxVersions=" + maxVersions
;
225 while (scanner
.next(results
) || results
.size() > 0) {
226 for (Cell kv
: results
) {
227 while (kvPos
< kvs
.size()
228 && !matchesQuery(kvs
.get(kvPos
), qualSet
, maxVersions
,
232 String rowQual
= getRowQualStr(kv
);
233 String deleteInfo
= "";
234 Long lastDelTS
= lastDelTimeMap
.get(rowQual
);
235 if (lastDelTS
!= null) {
236 deleteInfo
= "; last timestamp when row/column " + rowQual
237 + " was deleted: " + lastDelTS
;
239 assertTrue("Scanner returned additional key/value: " + kv
+ ", "
240 + queryInfo
+ deleteInfo
+ ";", kvPos
< kvs
.size());
241 assertTrue("Scanner returned wrong key/value; " + queryInfo
+ deleteInfo
+ ";",
242 PrivateCellUtil
.equalsIgnoreMvccVersion(kvs
.get(kvPos
), (kv
)));
248 for (; kvPos
< kvs
.size(); ++kvPos
) {
249 KeyValue remainingKV
= kvs
.get(kvPos
);
250 assertFalse("Matching column not returned by scanner: "
251 + remainingKV
+ ", " + queryInfo
+ ", results returned: "
252 + numResults
, matchesQuery(remainingKV
, qualSet
, maxVersions
,
257 assertTrue("This test is supposed to delete at least some row/column " +
258 "pairs", lastDelTimeMap
.size() > 0);
259 LOG
.info("Number of row/col pairs deleted at least once: " +
260 lastDelTimeMap
.size());
261 HBaseTestingUtil
.closeRegionAndWAL(region
);
264 private static String
getRowQualStr(Cell kv
) {
265 String rowStr
= Bytes
.toString(CellUtil
.cloneRow(kv
));
266 String qualStr
= Bytes
.toString(CellUtil
.cloneQualifier(kv
));
267 return rowStr
+ "_" + qualStr
;
270 private static boolean matchesQuery(KeyValue kv
, Set
<String
> qualSet
,
271 int maxVersions
, Map
<String
, Long
> lastDelTimeMap
) {
272 Long lastDelTS
= lastDelTimeMap
.get(getRowQualStr(kv
));
273 long ts
= kv
.getTimestamp();
274 return qualSet
.contains(qualStr(kv
))
275 && ts
>= TIMESTAMPS
[TIMESTAMPS
.length
- maxVersions
]
276 && (lastDelTS
== null || ts
> lastDelTS
);
279 private static String
qualStr(KeyValue kv
) {
280 return Bytes
.toString(kv
.getQualifierArray(), kv
.getQualifierOffset(),
281 kv
.getQualifierLength());
284 static String
createValue(String row
, String qual
, long ts
) {
285 return "value_for_" + row
+ "_" + qual
+ "_" + ts
;
288 private static List
<String
> sequentialStrings(String prefix
, int n
) {
289 List
<String
> lst
= new ArrayList
<>();
290 for (int i
= 0; i
< n
; ++i
) {
291 StringBuilder sb
= new StringBuilder();
292 sb
.append(prefix
+ i
);
294 // Make column length depend on i.
296 while (iBitShifted
!= 0) {
297 sb
.append((iBitShifted
& 1) == 0 ?
'a' : 'b');
301 lst
.add(sb
.toString());