3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
19 package org
.apache
.hadoop
.hbase
;
21 import java
.io
.IOException
;
22 import java
.security
.SecureRandom
;
23 import java
.util
.Random
;
25 import org
.apache
.commons
.math3
.random
.RandomData
;
26 import org
.apache
.commons
.math3
.random
.RandomDataImpl
;
27 import org
.apache
.hadoop
.conf
.Configuration
;
28 import org
.apache
.hadoop
.fs
.FileSystem
;
29 import org
.apache
.hadoop
.fs
.Path
;
30 import org
.apache
.yetus
.audience
.InterfaceAudience
;
31 import org
.slf4j
.Logger
;
32 import org
.slf4j
.LoggerFactory
;
33 import org
.apache
.hadoop
.hbase
.io
.ImmutableBytesWritable
;
34 import org
.apache
.hadoop
.hbase
.io
.crypto
.CryptoCipherProvider
;
35 import org
.apache
.hadoop
.hbase
.io
.crypto
.DefaultCipherProvider
;
36 import org
.apache
.hadoop
.hbase
.io
.crypto
.Encryption
;
37 import org
.apache
.hadoop
.hbase
.io
.crypto
.KeyProviderForTesting
;
38 import org
.apache
.hadoop
.hbase
.io
.crypto
.aes
.AES
;
39 import org
.apache
.hadoop
.hbase
.io
.hfile
.HFileWriterImpl
;
40 import org
.apache
.hadoop
.hbase
.io
.hfile
.CacheConfig
;
41 import org
.apache
.hadoop
.hbase
.io
.hfile
.HFile
;
42 import org
.apache
.hadoop
.hbase
.io
.hfile
.HFileContext
;
43 import org
.apache
.hadoop
.hbase
.io
.hfile
.HFileContextBuilder
;
44 import org
.apache
.hadoop
.hbase
.io
.hfile
.HFileScanner
;
45 import org
.apache
.hadoop
.hbase
.util
.Bytes
;
48 * This class runs performance benchmarks for {@link HFile}.
50 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience
.TOOLS
)
51 public class HFilePerformanceEvaluation
{
52 private static final int ROW_LENGTH
= 10;
53 private static final int ROW_COUNT
= 1000000;
54 private static final int RFILE_BLOCKSIZE
= 8 * 1024;
55 private static StringBuilder testSummary
= new StringBuilder();
57 // Disable verbose INFO logging from org.apache.hadoop.io.compress.CodecPool
59 System
.setProperty("org.apache.commons.logging.Log",
60 "org.apache.commons.logging.impl.SimpleLog");
61 System
.setProperty("org.apache.commons.logging.simplelog.log.org.apache.hadoop.io.compress.CodecPool",
65 private static final Logger LOG
=
66 LoggerFactory
.getLogger(HFilePerformanceEvaluation
.class.getName());
68 static byte [] format(final int i
) {
69 String v
= Integer
.toString(i
);
70 return Bytes
.toBytes("0000000000".substring(v
.length()) + v
);
73 static ImmutableBytesWritable
format(final int i
, ImmutableBytesWritable w
) {
78 static Cell
createCell(final int i
) {
79 return createCell(i
, HConstants
.EMPTY_BYTE_ARRAY
);
83 * HFile is Cell-based. It used to be byte arrays. Doing this test, pass Cells. All Cells
84 * intentionally have same coordinates in all fields but row.
85 * @param i Integer to format as a row Key.
86 * @param value Value to use
87 * @return Created Cell.
89 static Cell
createCell(final int i
, final byte [] value
) {
90 return createCell(format(i
), value
);
93 static Cell
createCell(final byte [] keyRow
) {
94 return ExtendedCellBuilderFactory
.create(CellBuilderType
.DEEP_COPY
)
96 .setFamily(HConstants
.EMPTY_BYTE_ARRAY
)
97 .setQualifier(HConstants
.EMPTY_BYTE_ARRAY
)
98 .setTimestamp(HConstants
.LATEST_TIMESTAMP
)
99 .setType(KeyValue
.Type
.Maximum
.getCode())
100 .setValue(HConstants
.EMPTY_BYTE_ARRAY
)
104 static Cell
createCell(final byte [] keyRow
, final byte [] value
) {
105 return ExtendedCellBuilderFactory
.create(CellBuilderType
.DEEP_COPY
)
107 .setFamily(HConstants
.EMPTY_BYTE_ARRAY
)
108 .setQualifier(HConstants
.EMPTY_BYTE_ARRAY
)
109 .setTimestamp(HConstants
.LATEST_TIMESTAMP
)
110 .setType(KeyValue
.Type
.Maximum
.getCode())
116 * Add any supported codec or cipher to test the HFile read/write performance.
117 * Specify "none" to disable codec or cipher or both.
120 private void runBenchmarks() throws Exception
{
121 final Configuration conf
= new Configuration();
122 final FileSystem fs
= FileSystem
.get(conf
);
123 final Path mf
= fs
.makeQualified(new Path("performanceevaluation.mapfile"));
125 // codec=none cipher=none
126 runWriteBenchmark(conf
, fs
, mf
, "none", "none");
127 runReadBenchmark(conf
, fs
, mf
, "none", "none");
129 // codec=gz cipher=none
130 runWriteBenchmark(conf
, fs
, mf
, "gz", "none");
131 runReadBenchmark(conf
, fs
, mf
, "gz", "none");
133 // Add configuration for AES cipher
134 final Configuration aesconf
= new Configuration();
135 aesconf
.set(HConstants
.CRYPTO_KEYPROVIDER_CONF_KEY
, KeyProviderForTesting
.class.getName());
136 aesconf
.set(HConstants
.CRYPTO_MASTERKEY_NAME_CONF_KEY
, "hbase");
137 aesconf
.setInt("hfile.format.version", 3);
138 final FileSystem aesfs
= FileSystem
.get(aesconf
);
139 final Path aesmf
= aesfs
.makeQualified(new Path("performanceevaluation.aes.mapfile"));
141 // codec=none cipher=aes
142 runWriteBenchmark(aesconf
, aesfs
, aesmf
, "none", "aes");
143 runReadBenchmark(aesconf
, aesfs
, aesmf
, "none", "aes");
145 // codec=gz cipher=aes
146 runWriteBenchmark(aesconf
, aesfs
, aesmf
, "gz", "aes");
147 runReadBenchmark(aesconf
, aesfs
, aesmf
, "gz", "aes");
149 // Add configuration for Commons cipher
150 final Configuration cryptoconf
= new Configuration();
151 cryptoconf
.set(HConstants
.CRYPTO_KEYPROVIDER_CONF_KEY
, KeyProviderForTesting
.class.getName());
152 cryptoconf
.set(HConstants
.CRYPTO_MASTERKEY_NAME_CONF_KEY
, "hbase");
153 cryptoconf
.setInt("hfile.format.version", 3);
154 cryptoconf
.set(HConstants
.CRYPTO_CIPHERPROVIDER_CONF_KEY
, CryptoCipherProvider
.class.getName());
155 final FileSystem cryptofs
= FileSystem
.get(cryptoconf
);
156 final Path cryptof
= cryptofs
.makeQualified(new Path("performanceevaluation.aes.mapfile"));
158 // codec=none cipher=aes
159 runWriteBenchmark(cryptoconf
, cryptofs
, aesmf
, "none", "aes");
160 runReadBenchmark(cryptoconf
, cryptofs
, aesmf
, "none", "aes");
162 // codec=gz cipher=aes
163 runWriteBenchmark(cryptoconf
, aesfs
, aesmf
, "gz", "aes");
164 runReadBenchmark(cryptoconf
, aesfs
, aesmf
, "gz", "aes");
166 // cleanup test files
170 if (aesfs
.exists(aesmf
)) {
171 aesfs
.delete(aesmf
, true);
173 if (cryptofs
.exists(aesmf
)) {
174 cryptofs
.delete(cryptof
, true);
177 // Print Result Summary
178 LOG
.info("\n***************\n" + "Result Summary" + "\n***************\n");
179 LOG
.info(testSummary
.toString());
184 * Write a test HFile with the given codec & cipher
188 * @param codec "none", "lzo", "gz", "snappy"
189 * @param cipher "none", "aes"
192 private void runWriteBenchmark(Configuration conf
, FileSystem fs
, Path mf
, String codec
,
193 String cipher
) throws Exception
{
198 runBenchmark(new SequentialWriteBenchmark(conf
, fs
, mf
, ROW_COUNT
, codec
, cipher
),
199 ROW_COUNT
, codec
, getCipherName(conf
, cipher
));
204 * Run all the read benchmarks for the test HFile
208 * @param codec "none", "lzo", "gz", "snappy"
209 * @param cipher "none", "aes"
211 private void runReadBenchmark(final Configuration conf
, final FileSystem fs
, final Path mf
,
212 final String codec
, final String cipher
) {
213 PerformanceEvaluationCommons
.concurrentReads(new Runnable() {
217 runBenchmark(new UniformRandomSmallScan(conf
, fs
, mf
, ROW_COUNT
),
218 ROW_COUNT
, codec
, getCipherName(conf
, cipher
));
219 } catch (Exception e
) {
220 testSummary
.append("UniformRandomSmallScan failed " + e
.getMessage());
226 PerformanceEvaluationCommons
.concurrentReads(new Runnable() {
230 runBenchmark(new UniformRandomReadBenchmark(conf
, fs
, mf
, ROW_COUNT
),
231 ROW_COUNT
, codec
, getCipherName(conf
, cipher
));
232 } catch (Exception e
) {
233 testSummary
.append("UniformRandomReadBenchmark failed " + e
.getMessage());
239 PerformanceEvaluationCommons
.concurrentReads(new Runnable() {
243 runBenchmark(new GaussianRandomReadBenchmark(conf
, fs
, mf
, ROW_COUNT
),
244 ROW_COUNT
, codec
, getCipherName(conf
, cipher
));
245 } catch (Exception e
) {
246 testSummary
.append("GaussianRandomReadBenchmark failed " + e
.getMessage());
252 PerformanceEvaluationCommons
.concurrentReads(new Runnable() {
256 runBenchmark(new SequentialReadBenchmark(conf
, fs
, mf
, ROW_COUNT
),
257 ROW_COUNT
, codec
, getCipherName(conf
, cipher
));
258 } catch (Exception e
) {
259 testSummary
.append("SequentialReadBenchmark failed " + e
.getMessage());
267 protected void runBenchmark(RowOrientedBenchmark benchmark
, int rowCount
,
268 String codec
, String cipher
) throws Exception
{
269 LOG
.info("Running " + benchmark
.getClass().getSimpleName() + " with codec[" +
270 codec
+ "] " + "cipher[" + cipher
+ "] for " + rowCount
+ " rows.");
272 long elapsedTime
= benchmark
.run();
274 LOG
.info("Running " + benchmark
.getClass().getSimpleName() + " with codec[" +
275 codec
+ "] " + "cipher[" + cipher
+ "] for " + rowCount
+ " rows took " +
276 elapsedTime
+ "ms.");
278 // Store results to print summary at the end
279 testSummary
.append("Running ").append(benchmark
.getClass().getSimpleName())
280 .append(" with codec[").append(codec
).append("] cipher[").append(cipher
)
281 .append("] for ").append(rowCount
).append(" rows took ").append(elapsedTime
)
282 .append("ms.").append("\n");
285 static abstract class RowOrientedBenchmark
{
287 protected final Configuration conf
;
288 protected final FileSystem fs
;
289 protected final Path mf
;
290 protected final int totalRows
;
291 protected String codec
= "none";
292 protected String cipher
= "none";
294 public RowOrientedBenchmark(Configuration conf
, FileSystem fs
, Path mf
,
295 int totalRows
, String codec
, String cipher
) {
299 this.totalRows
= totalRows
;
301 this.cipher
= cipher
;
304 public RowOrientedBenchmark(Configuration conf
, FileSystem fs
, Path mf
,
309 this.totalRows
= totalRows
;
312 void setUp() throws Exception
{
316 abstract void doRow(int i
) throws Exception
;
318 protected int getReportingPeriod() {
319 return this.totalRows
/ 10;
322 void tearDown() throws Exception
{
328 * @return elapsed time.
331 long run() throws Exception
{
334 long startTime
= System
.currentTimeMillis();
336 for (int i
= 0; i
< totalRows
; i
++) {
337 if (i
> 0 && i
% getReportingPeriod() == 0) {
338 LOG
.info("Processed " + i
+ " rows.");
342 elapsedTime
= System
.currentTimeMillis() - startTime
;
351 static class SequentialWriteBenchmark
extends RowOrientedBenchmark
{
352 protected HFile
.Writer writer
;
353 private Random random
= new Random();
354 private byte[] bytes
= new byte[ROW_LENGTH
];
356 public SequentialWriteBenchmark(Configuration conf
, FileSystem fs
, Path mf
,
357 int totalRows
, String codec
, String cipher
) {
358 super(conf
, fs
, mf
, totalRows
, codec
, cipher
);
362 void setUp() throws Exception
{
364 HFileContextBuilder builder
= new HFileContextBuilder()
365 .withCompression(HFileWriterImpl
.compressionByName(codec
))
366 .withBlockSize(RFILE_BLOCKSIZE
);
368 if (cipher
== "aes") {
369 byte[] cipherKey
= new byte[AES
.KEY_LENGTH
];
370 new SecureRandom().nextBytes(cipherKey
);
371 builder
.withEncryptionContext(Encryption
.newContext(conf
)
372 .setCipher(Encryption
.getCipher(conf
, cipher
))
374 } else if (!"none".equals(cipher
)) {
375 throw new IOException("Cipher " + cipher
+ " not supported.");
378 HFileContext hFileContext
= builder
.build();
380 writer
= HFile
.getWriterFactoryNoCache(conf
)
382 .withFileContext(hFileContext
)
387 void doRow(int i
) throws Exception
{
388 writer
.append(createCell(i
, generateValue()));
391 private byte[] generateValue() {
392 random
.nextBytes(bytes
);
397 protected int getReportingPeriod() {
398 return this.totalRows
; // don't report progress
402 void tearDown() throws Exception
{
408 static abstract class ReadBenchmark
extends RowOrientedBenchmark
{
410 protected HFile
.Reader reader
;
412 public ReadBenchmark(Configuration conf
, FileSystem fs
, Path mf
,
414 super(conf
, fs
, mf
, totalRows
);
418 void setUp() throws Exception
{
419 reader
= HFile
.createReader(this.fs
, this.mf
, new CacheConfig(this.conf
), true, this.conf
);
423 void tearDown() throws Exception
{
429 static class SequentialReadBenchmark
extends ReadBenchmark
{
430 private HFileScanner scanner
;
432 public SequentialReadBenchmark(Configuration conf
, FileSystem fs
,
433 Path mf
, int totalRows
) {
434 super(conf
, fs
, mf
, totalRows
);
438 void setUp() throws Exception
{
440 this.scanner
= this.reader
.getScanner(false, false);
441 this.scanner
.seekTo();
445 void doRow(int i
) throws Exception
{
446 if (this.scanner
.next()) {
447 // TODO: Fix. Make Scanner do Cells.
448 Cell c
= this.scanner
.getCell();
449 PerformanceEvaluationCommons
.assertKey(format(i
+ 1), c
);
450 PerformanceEvaluationCommons
.assertValueSize(c
.getValueLength(), ROW_LENGTH
);
455 protected int getReportingPeriod() {
456 return this.totalRows
; // don't report progress
461 static class UniformRandomReadBenchmark
extends ReadBenchmark
{
463 private Random random
= new Random();
465 public UniformRandomReadBenchmark(Configuration conf
, FileSystem fs
,
466 Path mf
, int totalRows
) {
467 super(conf
, fs
, mf
, totalRows
);
471 void doRow(int i
) throws Exception
{
472 HFileScanner scanner
= this.reader
.getScanner(false, true);
473 byte [] b
= getRandomRow();
474 if (scanner
.seekTo(createCell(b
)) < 0) {
475 LOG
.info("Not able to seekTo " + new String(b
));
478 // TODO: Fix scanner so it does Cells
479 Cell c
= scanner
.getCell();
480 PerformanceEvaluationCommons
.assertKey(b
, c
);
481 PerformanceEvaluationCommons
.assertValueSize(c
.getValueLength(), ROW_LENGTH
);
484 private byte [] getRandomRow() {
485 return format(random
.nextInt(totalRows
));
489 static class UniformRandomSmallScan
extends ReadBenchmark
{
490 private Random random
= new Random();
492 public UniformRandomSmallScan(Configuration conf
, FileSystem fs
,
493 Path mf
, int totalRows
) {
494 super(conf
, fs
, mf
, totalRows
/10);
498 void doRow(int i
) throws Exception
{
499 HFileScanner scanner
= this.reader
.getScanner(false, false);
500 byte [] b
= getRandomRow();
501 // System.out.println("Random row: " + new String(b));
502 Cell c
= createCell(b
);
503 if (scanner
.seekTo(c
) != 0) {
504 LOG
.info("Nonexistent row: " + new String(b
));
507 // TODO: HFileScanner doesn't do Cells yet. Temporary fix.
508 c
= scanner
.getCell();
509 // System.out.println("Found row: " +
510 // new String(c.getRowArray(), c.getRowOffset(), c.getRowLength()));
511 PerformanceEvaluationCommons
.assertKey(b
, c
);
512 for (int ii
= 0; ii
< 30; ii
++) {
513 if (!scanner
.next()) {
514 LOG
.info("NOTHING FOLLOWS");
517 c
= scanner
.getCell();
518 PerformanceEvaluationCommons
.assertValueSize(c
.getValueLength(), ROW_LENGTH
);
522 private byte [] getRandomRow() {
523 return format(random
.nextInt(totalRows
));
527 static class GaussianRandomReadBenchmark
extends ReadBenchmark
{
529 private RandomData randomData
= new RandomDataImpl();
531 public GaussianRandomReadBenchmark(Configuration conf
, FileSystem fs
,
532 Path mf
, int totalRows
) {
533 super(conf
, fs
, mf
, totalRows
);
537 void doRow(int i
) throws Exception
{
538 HFileScanner scanner
= this.reader
.getScanner(false, true);
539 byte[] gaussianRandomRowBytes
= getGaussianRandomRowBytes();
540 scanner
.seekTo(createCell(gaussianRandomRowBytes
));
541 for (int ii
= 0; ii
< 30; ii
++) {
542 if (!scanner
.next()) {
543 LOG
.info("NOTHING FOLLOWS");
546 // TODO: Fix. Make scanner do Cells.
551 private byte [] getGaussianRandomRowBytes() {
552 int r
= (int) randomData
.nextGaussian((double)totalRows
/ 2.0,
553 (double)totalRows
/ 10.0);
554 // make sure r falls into [0,totalRows)
555 return format(Math
.min(totalRows
, Math
.max(r
,0)));
562 * @throws IOException
564 public static void main(String
[] args
) throws Exception
{
565 new HFilePerformanceEvaluation().runBenchmarks();
568 private String
getCipherName(Configuration conf
, String cipherName
) {
569 if (cipherName
.equals("aes")) {
570 String provider
= conf
.get(HConstants
.CRYPTO_CIPHERPROVIDER_CONF_KEY
);
571 if (provider
== null || provider
.equals("")
572 || provider
.equals(DefaultCipherProvider
.class.getName())) {
573 return "aes-default";
574 } else if (provider
.equals(CryptoCipherProvider
.class.getName())) {
575 return "aes-commons";