3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
19 package org
.apache
.hadoop
.hbase
.regionserver
;
21 import java
.io
.IOException
;
22 import java
.util
.ArrayList
;
23 import java
.util
.List
;
24 import java
.util
.Random
;
26 import org
.apache
.commons
.cli
.CommandLine
;
27 import org
.apache
.commons
.cli
.CommandLineParser
;
28 import org
.apache
.commons
.cli
.GnuParser
;
29 import org
.apache
.commons
.cli
.HelpFormatter
;
30 import org
.apache
.commons
.cli
.Option
;
31 import org
.apache
.commons
.cli
.Options
;
32 import org
.apache
.commons
.logging
.Log
;
33 import org
.apache
.commons
.logging
.LogFactory
;
34 import org
.apache
.hadoop
.hbase
.HBaseTestingUtility
;
35 import org
.apache
.hadoop
.hbase
.HColumnDescriptor
;
36 import org
.apache
.hadoop
.hbase
.HTableDescriptor
;
37 import org
.apache
.hadoop
.hbase
.MiniHBaseCluster
;
38 import org
.apache
.hadoop
.hbase
.TableName
;
39 import org
.apache
.hadoop
.hbase
.client
.Put
;
40 import org
.apache
.hadoop
.hbase
.client
.Result
;
41 import org
.apache
.hadoop
.hbase
.client
.ResultScanner
;
42 import org
.apache
.hadoop
.hbase
.client
.Scan
;
43 import org
.apache
.hadoop
.hbase
.client
.Table
;
44 import org
.apache
.hadoop
.hbase
.filter
.CompareFilter
;
45 import org
.apache
.hadoop
.hbase
.filter
.SingleColumnValueFilter
;
46 import org
.apache
.hadoop
.hbase
.io
.encoding
.DataBlockEncoding
;
47 import org
.apache
.hadoop
.hbase
.testclassification
.LargeTests
;
48 import org
.apache
.hadoop
.hbase
.testclassification
.RegionServerTests
;
49 import org
.apache
.hadoop
.hbase
.util
.Bytes
;
50 import org
.junit
.Rule
;
51 import org
.junit
.Test
;
52 import org
.junit
.experimental
.categories
.Category
;
53 import org
.junit
.rules
.TestName
;
56 * Test performance improvement of joined scanners optimization:
57 * https://issues.apache.org/jira/browse/HBASE-5416
59 @Category({RegionServerTests
.class, LargeTests
.class})
60 public class TestJoinedScanners
{
61 private static final Log LOG
= LogFactory
.getLog(TestJoinedScanners
.class);
63 private static final HBaseTestingUtility TEST_UTIL
= new HBaseTestingUtility();
64 private static final String DIR
= TEST_UTIL
.getDataTestDir("TestJoinedScanners").toString();
66 private static final byte[] cf_essential
= Bytes
.toBytes("essential");
67 private static final byte[] cf_joined
= Bytes
.toBytes("joined");
68 private static final byte[] col_name
= Bytes
.toBytes("a");
69 private static final byte[] flag_yes
= Bytes
.toBytes("Y");
70 private static final byte[] flag_no
= Bytes
.toBytes("N");
72 private static DataBlockEncoding blockEncoding
= DataBlockEncoding
.FAST_DIFF
;
73 private static int selectionRatio
= 30;
74 private static int valueWidth
= 128 * 1024;
77 public TestName name
= new TestName();
80 public void testJoinedScanners() throws Exception
{
81 String dataNodeHosts
[] = new String
[] { "host1", "host2", "host3" };
82 int regionServersCount
= 3;
84 HBaseTestingUtility htu
= new HBaseTestingUtility();
86 final int DEFAULT_BLOCK_SIZE
= 1024*1024;
87 htu
.getConfiguration().setLong("dfs.blocksize", DEFAULT_BLOCK_SIZE
);
88 htu
.getConfiguration().setInt("dfs.replication", 1);
89 htu
.getConfiguration().setLong("hbase.hregion.max.filesize", 322122547200L);
90 MiniHBaseCluster cluster
= null;
93 cluster
= htu
.startMiniCluster(1, regionServersCount
, dataNodeHosts
);
94 byte [][] families
= {cf_essential
, cf_joined
};
96 final TableName tableName
= TableName
.valueOf(name
.getMethodName());
97 HTableDescriptor desc
= new HTableDescriptor(tableName
);
98 for(byte[] family
: families
) {
99 HColumnDescriptor hcd
= new HColumnDescriptor(family
);
100 hcd
.setDataBlockEncoding(blockEncoding
);
103 htu
.getAdmin().createTable(desc
);
104 Table ht
= htu
.getConnection().getTable(tableName
);
106 long rows_to_insert
= 1000;
107 int insert_batch
= 20;
108 long time
= System
.nanoTime();
109 Random rand
= new Random(time
);
111 LOG
.info("Make " + Long
.toString(rows_to_insert
) + " rows, total size = "
112 + Float
.toString(rows_to_insert
* valueWidth
/ 1024 / 1024) + " MB");
114 byte [] val_large
= new byte[valueWidth
];
116 List
<Put
> puts
= new ArrayList
<>();
118 for (long i
= 0; i
< rows_to_insert
; i
++) {
119 Put put
= new Put(Bytes
.toBytes(Long
.toString (i
)));
120 if (rand
.nextInt(100) <= selectionRatio
) {
121 put
.addColumn(cf_essential
, col_name
, flag_yes
);
123 put
.addColumn(cf_essential
, col_name
, flag_no
);
125 put
.addColumn(cf_joined
, col_name
, val_large
);
127 if (puts
.size() >= insert_batch
) {
132 if (puts
.size() >= 0) {
137 LOG
.info("Data generated in "
138 + Double
.toString((System
.nanoTime() - time
) / 1000000000.0) + " seconds");
141 for (int i
= 0; i
< 10; ++i
) {
142 runScanner(ht
, slow
);
148 if (cluster
!= null) {
149 htu
.shutdownMiniCluster();
154 private void runScanner(Table table
, boolean slow
) throws Exception
{
155 long time
= System
.nanoTime();
156 Scan scan
= new Scan();
157 scan
.addColumn(cf_essential
, col_name
);
158 scan
.addColumn(cf_joined
, col_name
);
160 SingleColumnValueFilter filter
= new SingleColumnValueFilter(
161 cf_essential
, col_name
, CompareFilter
.CompareOp
.EQUAL
, flag_yes
);
162 filter
.setFilterIfMissing(true);
163 scan
.setFilter(filter
);
164 scan
.setLoadColumnFamiliesOnDemand(!slow
);
166 ResultScanner result_scanner
= table
.getScanner(scan
);
169 while ((res
= result_scanner
.next()) != null) {
173 double timeSec
= (System
.nanoTime() - time
) / 1000000000.0;
174 result_scanner
.close();
175 LOG
.info((slow ?
"Slow" : "Joined") + " scanner finished in " + Double
.toString(timeSec
)
176 + " seconds, got " + Long
.toString(rows_count
/2) + " rows");
179 private static Options options
= new Options();
182 * Command line interface:
184 * @throws IOException if there is a bug while reading from disk
186 public static void main(final String
[] args
) throws Exception
{
187 Option encodingOption
= new Option("e", "blockEncoding", true,
188 "Data block encoding; Default: FAST_DIFF");
189 encodingOption
.setRequired(false);
190 options
.addOption(encodingOption
);
192 Option ratioOption
= new Option("r", "selectionRatio", true,
193 "Ratio of selected rows using essential column family");
194 ratioOption
.setRequired(false);
195 options
.addOption(ratioOption
);
197 Option widthOption
= new Option("w", "valueWidth", true,
198 "Width of value for non-essential column family");
199 widthOption
.setRequired(false);
200 options
.addOption(widthOption
);
202 CommandLineParser parser
= new GnuParser();
203 CommandLine cmd
= parser
.parse(options
, args
);
204 if (args
.length
< 1) {
205 HelpFormatter formatter
= new HelpFormatter();
206 formatter
.printHelp("TestJoinedScanners", options
, true);
209 if (cmd
.hasOption("e")) {
210 blockEncoding
= DataBlockEncoding
.valueOf(cmd
.getOptionValue("e"));
212 if (cmd
.hasOption("r")) {
213 selectionRatio
= Integer
.parseInt(cmd
.getOptionValue("r"));
215 if (cmd
.hasOption("w")) {
216 valueWidth
= Integer
.parseInt(cmd
.getOptionValue("w"));
219 TestJoinedScanners test
= new TestJoinedScanners();
220 test
.testJoinedScanners();