HBASE-17532 Replaced explicit type with diamond operator
[hbase.git] / hbase-server / src / test / java / org / apache / hadoop / hbase / regionserver / TestJoinedScanners.java
blob83810f2024516d68339c650291c8f821909d3f3a
1 /**
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
19 package org.apache.hadoop.hbase.regionserver;
21 import java.io.IOException;
22 import java.util.ArrayList;
23 import java.util.List;
24 import java.util.Random;
26 import org.apache.commons.cli.CommandLine;
27 import org.apache.commons.cli.CommandLineParser;
28 import org.apache.commons.cli.GnuParser;
29 import org.apache.commons.cli.HelpFormatter;
30 import org.apache.commons.cli.Option;
31 import org.apache.commons.cli.Options;
32 import org.apache.commons.logging.Log;
33 import org.apache.commons.logging.LogFactory;
34 import org.apache.hadoop.hbase.HBaseTestingUtility;
35 import org.apache.hadoop.hbase.HColumnDescriptor;
36 import org.apache.hadoop.hbase.HTableDescriptor;
37 import org.apache.hadoop.hbase.MiniHBaseCluster;
38 import org.apache.hadoop.hbase.TableName;
39 import org.apache.hadoop.hbase.client.Put;
40 import org.apache.hadoop.hbase.client.Result;
41 import org.apache.hadoop.hbase.client.ResultScanner;
42 import org.apache.hadoop.hbase.client.Scan;
43 import org.apache.hadoop.hbase.client.Table;
44 import org.apache.hadoop.hbase.filter.CompareFilter;
45 import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
46 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
47 import org.apache.hadoop.hbase.testclassification.LargeTests;
48 import org.apache.hadoop.hbase.testclassification.RegionServerTests;
49 import org.apache.hadoop.hbase.util.Bytes;
50 import org.junit.Rule;
51 import org.junit.Test;
52 import org.junit.experimental.categories.Category;
53 import org.junit.rules.TestName;
55 /**
56 * Test performance improvement of joined scanners optimization:
57 * https://issues.apache.org/jira/browse/HBASE-5416
59 @Category({RegionServerTests.class, LargeTests.class})
60 public class TestJoinedScanners {
61 private static final Log LOG = LogFactory.getLog(TestJoinedScanners.class);
63 private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
64 private static final String DIR = TEST_UTIL.getDataTestDir("TestJoinedScanners").toString();
66 private static final byte[] cf_essential = Bytes.toBytes("essential");
67 private static final byte[] cf_joined = Bytes.toBytes("joined");
68 private static final byte[] col_name = Bytes.toBytes("a");
69 private static final byte[] flag_yes = Bytes.toBytes("Y");
70 private static final byte[] flag_no = Bytes.toBytes("N");
72 private static DataBlockEncoding blockEncoding = DataBlockEncoding.FAST_DIFF;
73 private static int selectionRatio = 30;
74 private static int valueWidth = 128 * 1024;
76 @Rule
77 public TestName name = new TestName();
79 @Test
80 public void testJoinedScanners() throws Exception {
81 String dataNodeHosts[] = new String[] { "host1", "host2", "host3" };
82 int regionServersCount = 3;
84 HBaseTestingUtility htu = new HBaseTestingUtility();
86 final int DEFAULT_BLOCK_SIZE = 1024*1024;
87 htu.getConfiguration().setLong("dfs.blocksize", DEFAULT_BLOCK_SIZE);
88 htu.getConfiguration().setInt("dfs.replication", 1);
89 htu.getConfiguration().setLong("hbase.hregion.max.filesize", 322122547200L);
90 MiniHBaseCluster cluster = null;
92 try {
93 cluster = htu.startMiniCluster(1, regionServersCount, dataNodeHosts);
94 byte [][] families = {cf_essential, cf_joined};
96 final TableName tableName = TableName.valueOf(name.getMethodName());
97 HTableDescriptor desc = new HTableDescriptor(tableName);
98 for(byte[] family : families) {
99 HColumnDescriptor hcd = new HColumnDescriptor(family);
100 hcd.setDataBlockEncoding(blockEncoding);
101 desc.addFamily(hcd);
103 htu.getAdmin().createTable(desc);
104 Table ht = htu.getConnection().getTable(tableName);
106 long rows_to_insert = 1000;
107 int insert_batch = 20;
108 long time = System.nanoTime();
109 Random rand = new Random(time);
111 LOG.info("Make " + Long.toString(rows_to_insert) + " rows, total size = "
112 + Float.toString(rows_to_insert * valueWidth / 1024 / 1024) + " MB");
114 byte [] val_large = new byte[valueWidth];
116 List<Put> puts = new ArrayList<>();
118 for (long i = 0; i < rows_to_insert; i++) {
119 Put put = new Put(Bytes.toBytes(Long.toString (i)));
120 if (rand.nextInt(100) <= selectionRatio) {
121 put.addColumn(cf_essential, col_name, flag_yes);
122 } else {
123 put.addColumn(cf_essential, col_name, flag_no);
125 put.addColumn(cf_joined, col_name, val_large);
126 puts.add(put);
127 if (puts.size() >= insert_batch) {
128 ht.put(puts);
129 puts.clear();
132 if (puts.size() >= 0) {
133 ht.put(puts);
134 puts.clear();
137 LOG.info("Data generated in "
138 + Double.toString((System.nanoTime() - time) / 1000000000.0) + " seconds");
140 boolean slow = true;
141 for (int i = 0; i < 10; ++i) {
142 runScanner(ht, slow);
143 slow = !slow;
146 ht.close();
147 } finally {
148 if (cluster != null) {
149 htu.shutdownMiniCluster();
154 private void runScanner(Table table, boolean slow) throws Exception {
155 long time = System.nanoTime();
156 Scan scan = new Scan();
157 scan.addColumn(cf_essential, col_name);
158 scan.addColumn(cf_joined, col_name);
160 SingleColumnValueFilter filter = new SingleColumnValueFilter(
161 cf_essential, col_name, CompareFilter.CompareOp.EQUAL, flag_yes);
162 filter.setFilterIfMissing(true);
163 scan.setFilter(filter);
164 scan.setLoadColumnFamiliesOnDemand(!slow);
166 ResultScanner result_scanner = table.getScanner(scan);
167 Result res;
168 long rows_count = 0;
169 while ((res = result_scanner.next()) != null) {
170 rows_count++;
173 double timeSec = (System.nanoTime() - time) / 1000000000.0;
174 result_scanner.close();
175 LOG.info((slow ? "Slow" : "Joined") + " scanner finished in " + Double.toString(timeSec)
176 + " seconds, got " + Long.toString(rows_count/2) + " rows");
179 private static Options options = new Options();
182 * Command line interface:
183 * @param args
184 * @throws IOException if there is a bug while reading from disk
186 public static void main(final String[] args) throws Exception {
187 Option encodingOption = new Option("e", "blockEncoding", true,
188 "Data block encoding; Default: FAST_DIFF");
189 encodingOption.setRequired(false);
190 options.addOption(encodingOption);
192 Option ratioOption = new Option("r", "selectionRatio", true,
193 "Ratio of selected rows using essential column family");
194 ratioOption.setRequired(false);
195 options.addOption(ratioOption);
197 Option widthOption = new Option("w", "valueWidth", true,
198 "Width of value for non-essential column family");
199 widthOption.setRequired(false);
200 options.addOption(widthOption);
202 CommandLineParser parser = new GnuParser();
203 CommandLine cmd = parser.parse(options, args);
204 if (args.length < 1) {
205 HelpFormatter formatter = new HelpFormatter();
206 formatter.printHelp("TestJoinedScanners", options, true);
209 if (cmd.hasOption("e")) {
210 blockEncoding = DataBlockEncoding.valueOf(cmd.getOptionValue("e"));
212 if (cmd.hasOption("r")) {
213 selectionRatio = Integer.parseInt(cmd.getOptionValue("r"));
215 if (cmd.hasOption("w")) {
216 valueWidth = Integer.parseInt(cmd.getOptionValue("w"));
218 // run the test
219 TestJoinedScanners test = new TestJoinedScanners();
220 test.testJoinedScanners();