3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
19 package org
.apache
.hadoop
.hbase
;
21 import java
.io
.IOException
;
22 import java
.security
.SecureRandom
;
23 import java
.util
.Random
;
25 import org
.apache
.commons
.math3
.random
.RandomData
;
26 import org
.apache
.commons
.math3
.random
.RandomDataImpl
;
27 import org
.apache
.hadoop
.conf
.Configuration
;
28 import org
.apache
.hadoop
.fs
.FileSystem
;
29 import org
.apache
.hadoop
.fs
.Path
;
30 import org
.apache
.yetus
.audience
.InterfaceAudience
;
31 import org
.slf4j
.Logger
;
32 import org
.slf4j
.LoggerFactory
;
33 import org
.apache
.hadoop
.hbase
.io
.ImmutableBytesWritable
;
34 import org
.apache
.hadoop
.hbase
.io
.crypto
.CryptoCipherProvider
;
35 import org
.apache
.hadoop
.hbase
.io
.crypto
.DefaultCipherProvider
;
36 import org
.apache
.hadoop
.hbase
.io
.crypto
.Encryption
;
37 import org
.apache
.hadoop
.hbase
.io
.crypto
.KeyProviderForTesting
;
38 import org
.apache
.hadoop
.hbase
.io
.crypto
.aes
.AES
;
39 import org
.apache
.hadoop
.hbase
.io
.hfile
.HFileWriterImpl
;
40 import org
.apache
.hadoop
.hbase
.io
.hfile
.CacheConfig
;
41 import org
.apache
.hadoop
.hbase
.io
.hfile
.HFile
;
42 import org
.apache
.hadoop
.hbase
.io
.hfile
.HFileContext
;
43 import org
.apache
.hadoop
.hbase
.io
.hfile
.HFileContextBuilder
;
44 import org
.apache
.hadoop
.hbase
.io
.hfile
.HFileScanner
;
45 import org
.apache
.hadoop
.hbase
.util
.Bytes
;
46 import org
.apache
.hadoop
.hbase
.util
.EnvironmentEdgeManager
;
49 * This class runs performance benchmarks for {@link HFile}.
51 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience
.TOOLS
)
52 public class HFilePerformanceEvaluation
{
53 private static final int ROW_LENGTH
= 10;
54 private static final int ROW_COUNT
= 1000000;
55 private static final int RFILE_BLOCKSIZE
= 8 * 1024;
56 private static StringBuilder testSummary
= new StringBuilder();
58 // Disable verbose INFO logging from org.apache.hadoop.io.compress.CodecPool
60 System
.setProperty("org.apache.commons.logging.Log",
61 "org.apache.commons.logging.impl.SimpleLog");
62 System
.setProperty("org.apache.commons.logging.simplelog.log.org.apache.hadoop.io.compress.CodecPool",
66 private static final Logger LOG
=
67 LoggerFactory
.getLogger(HFilePerformanceEvaluation
.class.getName());
69 static byte [] format(final int i
) {
70 String v
= Integer
.toString(i
);
71 return Bytes
.toBytes("0000000000".substring(v
.length()) + v
);
74 static ImmutableBytesWritable
format(final int i
, ImmutableBytesWritable w
) {
79 static Cell
createCell(final int i
) {
80 return createCell(i
, HConstants
.EMPTY_BYTE_ARRAY
);
84 * HFile is Cell-based. It used to be byte arrays. Doing this test, pass Cells. All Cells
85 * intentionally have same coordinates in all fields but row.
86 * @param i Integer to format as a row Key.
87 * @param value Value to use
88 * @return Created Cell.
90 static Cell
createCell(final int i
, final byte [] value
) {
91 return createCell(format(i
), value
);
94 static Cell
createCell(final byte [] keyRow
) {
95 return ExtendedCellBuilderFactory
.create(CellBuilderType
.DEEP_COPY
)
97 .setFamily(HConstants
.EMPTY_BYTE_ARRAY
)
98 .setQualifier(HConstants
.EMPTY_BYTE_ARRAY
)
99 .setTimestamp(HConstants
.LATEST_TIMESTAMP
)
100 .setType(KeyValue
.Type
.Maximum
.getCode())
101 .setValue(HConstants
.EMPTY_BYTE_ARRAY
)
105 static Cell
createCell(final byte [] keyRow
, final byte [] value
) {
106 return ExtendedCellBuilderFactory
.create(CellBuilderType
.DEEP_COPY
)
108 .setFamily(HConstants
.EMPTY_BYTE_ARRAY
)
109 .setQualifier(HConstants
.EMPTY_BYTE_ARRAY
)
110 .setTimestamp(HConstants
.LATEST_TIMESTAMP
)
111 .setType(KeyValue
.Type
.Maximum
.getCode())
117 * Add any supported codec or cipher to test the HFile read/write performance.
118 * Specify "none" to disable codec or cipher or both.
121 private void runBenchmarks() throws Exception
{
122 final Configuration conf
= new Configuration();
123 final FileSystem fs
= FileSystem
.get(conf
);
124 final Path mf
= fs
.makeQualified(new Path("performanceevaluation.mapfile"));
126 // codec=none cipher=none
127 runWriteBenchmark(conf
, fs
, mf
, "none", "none");
128 runReadBenchmark(conf
, fs
, mf
, "none", "none");
130 // codec=gz cipher=none
131 runWriteBenchmark(conf
, fs
, mf
, "gz", "none");
132 runReadBenchmark(conf
, fs
, mf
, "gz", "none");
134 // Add configuration for AES cipher
135 final Configuration aesconf
= new Configuration();
136 aesconf
.set(HConstants
.CRYPTO_KEYPROVIDER_CONF_KEY
, KeyProviderForTesting
.class.getName());
137 aesconf
.set(HConstants
.CRYPTO_MASTERKEY_NAME_CONF_KEY
, "hbase");
138 aesconf
.setInt("hfile.format.version", 3);
139 final FileSystem aesfs
= FileSystem
.get(aesconf
);
140 final Path aesmf
= aesfs
.makeQualified(new Path("performanceevaluation.aes.mapfile"));
142 // codec=none cipher=aes
143 runWriteBenchmark(aesconf
, aesfs
, aesmf
, "none", "aes");
144 runReadBenchmark(aesconf
, aesfs
, aesmf
, "none", "aes");
146 // codec=gz cipher=aes
147 runWriteBenchmark(aesconf
, aesfs
, aesmf
, "gz", "aes");
148 runReadBenchmark(aesconf
, aesfs
, aesmf
, "gz", "aes");
150 // Add configuration for Commons cipher
151 final Configuration cryptoconf
= new Configuration();
152 cryptoconf
.set(HConstants
.CRYPTO_KEYPROVIDER_CONF_KEY
, KeyProviderForTesting
.class.getName());
153 cryptoconf
.set(HConstants
.CRYPTO_MASTERKEY_NAME_CONF_KEY
, "hbase");
154 cryptoconf
.setInt("hfile.format.version", 3);
155 cryptoconf
.set(HConstants
.CRYPTO_CIPHERPROVIDER_CONF_KEY
, CryptoCipherProvider
.class.getName());
156 final FileSystem cryptofs
= FileSystem
.get(cryptoconf
);
157 final Path cryptof
= cryptofs
.makeQualified(new Path("performanceevaluation.aes.mapfile"));
159 // codec=none cipher=aes
160 runWriteBenchmark(cryptoconf
, cryptofs
, aesmf
, "none", "aes");
161 runReadBenchmark(cryptoconf
, cryptofs
, aesmf
, "none", "aes");
163 // codec=gz cipher=aes
164 runWriteBenchmark(cryptoconf
, aesfs
, aesmf
, "gz", "aes");
165 runReadBenchmark(cryptoconf
, aesfs
, aesmf
, "gz", "aes");
167 // cleanup test files
171 if (aesfs
.exists(aesmf
)) {
172 aesfs
.delete(aesmf
, true);
174 if (cryptofs
.exists(aesmf
)) {
175 cryptofs
.delete(cryptof
, true);
178 // Print Result Summary
179 LOG
.info("\n***************\n" + "Result Summary" + "\n***************\n");
180 LOG
.info(testSummary
.toString());
185 * Write a test HFile with the given codec & cipher
189 * @param codec "none", "lzo", "gz", "snappy"
190 * @param cipher "none", "aes"
193 private void runWriteBenchmark(Configuration conf
, FileSystem fs
, Path mf
, String codec
,
194 String cipher
) throws Exception
{
199 runBenchmark(new SequentialWriteBenchmark(conf
, fs
, mf
, ROW_COUNT
, codec
, cipher
),
200 ROW_COUNT
, codec
, getCipherName(conf
, cipher
));
205 * Run all the read benchmarks for the test HFile
209 * @param codec "none", "lzo", "gz", "snappy"
210 * @param cipher "none", "aes"
212 private void runReadBenchmark(final Configuration conf
, final FileSystem fs
, final Path mf
,
213 final String codec
, final String cipher
) {
214 PerformanceEvaluationCommons
.concurrentReads(new Runnable() {
218 runBenchmark(new UniformRandomSmallScan(conf
, fs
, mf
, ROW_COUNT
),
219 ROW_COUNT
, codec
, getCipherName(conf
, cipher
));
220 } catch (Exception e
) {
221 testSummary
.append("UniformRandomSmallScan failed " + e
.getMessage());
227 PerformanceEvaluationCommons
.concurrentReads(new Runnable() {
231 runBenchmark(new UniformRandomReadBenchmark(conf
, fs
, mf
, ROW_COUNT
),
232 ROW_COUNT
, codec
, getCipherName(conf
, cipher
));
233 } catch (Exception e
) {
234 testSummary
.append("UniformRandomReadBenchmark failed " + e
.getMessage());
240 PerformanceEvaluationCommons
.concurrentReads(new Runnable() {
244 runBenchmark(new GaussianRandomReadBenchmark(conf
, fs
, mf
, ROW_COUNT
),
245 ROW_COUNT
, codec
, getCipherName(conf
, cipher
));
246 } catch (Exception e
) {
247 testSummary
.append("GaussianRandomReadBenchmark failed " + e
.getMessage());
253 PerformanceEvaluationCommons
.concurrentReads(new Runnable() {
257 runBenchmark(new SequentialReadBenchmark(conf
, fs
, mf
, ROW_COUNT
),
258 ROW_COUNT
, codec
, getCipherName(conf
, cipher
));
259 } catch (Exception e
) {
260 testSummary
.append("SequentialReadBenchmark failed " + e
.getMessage());
268 protected void runBenchmark(RowOrientedBenchmark benchmark
, int rowCount
,
269 String codec
, String cipher
) throws Exception
{
270 LOG
.info("Running " + benchmark
.getClass().getSimpleName() + " with codec[" +
271 codec
+ "] " + "cipher[" + cipher
+ "] for " + rowCount
+ " rows.");
273 long elapsedTime
= benchmark
.run();
275 LOG
.info("Running " + benchmark
.getClass().getSimpleName() + " with codec[" +
276 codec
+ "] " + "cipher[" + cipher
+ "] for " + rowCount
+ " rows took " +
277 elapsedTime
+ "ms.");
279 // Store results to print summary at the end
280 testSummary
.append("Running ").append(benchmark
.getClass().getSimpleName())
281 .append(" with codec[").append(codec
).append("] cipher[").append(cipher
)
282 .append("] for ").append(rowCount
).append(" rows took ").append(elapsedTime
)
283 .append("ms.").append("\n");
286 static abstract class RowOrientedBenchmark
{
288 protected final Configuration conf
;
289 protected final FileSystem fs
;
290 protected final Path mf
;
291 protected final int totalRows
;
292 protected String codec
= "none";
293 protected String cipher
= "none";
295 public RowOrientedBenchmark(Configuration conf
, FileSystem fs
, Path mf
,
296 int totalRows
, String codec
, String cipher
) {
300 this.totalRows
= totalRows
;
302 this.cipher
= cipher
;
305 public RowOrientedBenchmark(Configuration conf
, FileSystem fs
, Path mf
,
310 this.totalRows
= totalRows
;
313 void setUp() throws Exception
{
317 abstract void doRow(int i
) throws Exception
;
319 protected int getReportingPeriod() {
320 return this.totalRows
/ 10;
323 void tearDown() throws Exception
{
329 * @return elapsed time.
332 long run() throws Exception
{
335 long startTime
= EnvironmentEdgeManager
.currentTime();
337 for (int i
= 0; i
< totalRows
; i
++) {
338 if (i
> 0 && i
% getReportingPeriod() == 0) {
339 LOG
.info("Processed " + i
+ " rows.");
343 elapsedTime
= EnvironmentEdgeManager
.currentTime() - startTime
;
352 static class SequentialWriteBenchmark
extends RowOrientedBenchmark
{
353 protected HFile
.Writer writer
;
354 private Random random
= new Random();
355 private byte[] bytes
= new byte[ROW_LENGTH
];
357 public SequentialWriteBenchmark(Configuration conf
, FileSystem fs
, Path mf
,
358 int totalRows
, String codec
, String cipher
) {
359 super(conf
, fs
, mf
, totalRows
, codec
, cipher
);
363 void setUp() throws Exception
{
365 HFileContextBuilder builder
= new HFileContextBuilder()
366 .withCompression(HFileWriterImpl
.compressionByName(codec
))
367 .withBlockSize(RFILE_BLOCKSIZE
);
369 if (cipher
== "aes") {
370 byte[] cipherKey
= new byte[AES
.KEY_LENGTH
];
371 new SecureRandom().nextBytes(cipherKey
);
372 builder
.withEncryptionContext(Encryption
.newContext(conf
)
373 .setCipher(Encryption
.getCipher(conf
, cipher
))
375 } else if (!"none".equals(cipher
)) {
376 throw new IOException("Cipher " + cipher
+ " not supported.");
379 HFileContext hFileContext
= builder
.build();
381 writer
= HFile
.getWriterFactoryNoCache(conf
)
383 .withFileContext(hFileContext
)
388 void doRow(int i
) throws Exception
{
389 writer
.append(createCell(i
, generateValue()));
392 private byte[] generateValue() {
393 random
.nextBytes(bytes
);
398 protected int getReportingPeriod() {
399 return this.totalRows
; // don't report progress
403 void tearDown() throws Exception
{
409 static abstract class ReadBenchmark
extends RowOrientedBenchmark
{
411 protected HFile
.Reader reader
;
413 public ReadBenchmark(Configuration conf
, FileSystem fs
, Path mf
,
415 super(conf
, fs
, mf
, totalRows
);
419 void setUp() throws Exception
{
420 reader
= HFile
.createReader(this.fs
, this.mf
, new CacheConfig(this.conf
), true, this.conf
);
424 void tearDown() throws Exception
{
430 static class SequentialReadBenchmark
extends ReadBenchmark
{
431 private HFileScanner scanner
;
433 public SequentialReadBenchmark(Configuration conf
, FileSystem fs
,
434 Path mf
, int totalRows
) {
435 super(conf
, fs
, mf
, totalRows
);
439 void setUp() throws Exception
{
441 this.scanner
= this.reader
.getScanner(conf
, false, false);
442 this.scanner
.seekTo();
446 void doRow(int i
) throws Exception
{
447 if (this.scanner
.next()) {
448 // TODO: Fix. Make Scanner do Cells.
449 Cell c
= this.scanner
.getCell();
450 PerformanceEvaluationCommons
.assertKey(format(i
+ 1), c
);
451 PerformanceEvaluationCommons
.assertValueSize(ROW_LENGTH
, c
.getValueLength());
456 protected int getReportingPeriod() {
457 return this.totalRows
; // don't report progress
462 static class UniformRandomReadBenchmark
extends ReadBenchmark
{
464 private Random random
= new Random();
466 public UniformRandomReadBenchmark(Configuration conf
, FileSystem fs
,
467 Path mf
, int totalRows
) {
468 super(conf
, fs
, mf
, totalRows
);
472 void doRow(int i
) throws Exception
{
473 HFileScanner scanner
= this.reader
.getScanner(conf
, false, true);
474 byte [] b
= getRandomRow();
475 if (scanner
.seekTo(createCell(b
)) < 0) {
476 LOG
.info("Not able to seekTo " + new String(b
));
479 // TODO: Fix scanner so it does Cells
480 Cell c
= scanner
.getCell();
481 PerformanceEvaluationCommons
.assertKey(b
, c
);
482 PerformanceEvaluationCommons
.assertValueSize(ROW_LENGTH
, c
.getValueLength());
485 private byte [] getRandomRow() {
486 return format(random
.nextInt(totalRows
));
490 static class UniformRandomSmallScan
extends ReadBenchmark
{
491 private Random random
= new Random();
493 public UniformRandomSmallScan(Configuration conf
, FileSystem fs
,
494 Path mf
, int totalRows
) {
495 super(conf
, fs
, mf
, totalRows
/10);
499 void doRow(int i
) throws Exception
{
500 HFileScanner scanner
= this.reader
.getScanner(conf
, false, false);
501 byte [] b
= getRandomRow();
502 // System.out.println("Random row: " + new String(b));
503 Cell c
= createCell(b
);
504 if (scanner
.seekTo(c
) != 0) {
505 LOG
.info("Nonexistent row: " + new String(b
));
508 // TODO: HFileScanner doesn't do Cells yet. Temporary fix.
509 c
= scanner
.getCell();
510 // System.out.println("Found row: " +
511 // new String(c.getRowArray(), c.getRowOffset(), c.getRowLength()));
512 PerformanceEvaluationCommons
.assertKey(b
, c
);
513 for (int ii
= 0; ii
< 30; ii
++) {
514 if (!scanner
.next()) {
515 LOG
.info("NOTHING FOLLOWS");
518 c
= scanner
.getCell();
519 PerformanceEvaluationCommons
.assertValueSize(ROW_LENGTH
, c
.getValueLength());
523 private byte [] getRandomRow() {
524 return format(random
.nextInt(totalRows
));
528 static class GaussianRandomReadBenchmark
extends ReadBenchmark
{
530 private RandomData randomData
= new RandomDataImpl();
532 public GaussianRandomReadBenchmark(Configuration conf
, FileSystem fs
,
533 Path mf
, int totalRows
) {
534 super(conf
, fs
, mf
, totalRows
);
538 void doRow(int i
) throws Exception
{
539 HFileScanner scanner
= this.reader
.getScanner(conf
, false, true);
540 byte[] gaussianRandomRowBytes
= getGaussianRandomRowBytes();
541 scanner
.seekTo(createCell(gaussianRandomRowBytes
));
542 for (int ii
= 0; ii
< 30; ii
++) {
543 if (!scanner
.next()) {
544 LOG
.info("NOTHING FOLLOWS");
547 // TODO: Fix. Make scanner do Cells.
552 private byte [] getGaussianRandomRowBytes() {
553 int r
= (int) randomData
.nextGaussian((double)totalRows
/ 2.0,
554 (double)totalRows
/ 10.0);
555 // make sure r falls into [0,totalRows)
556 return format(Math
.min(totalRows
, Math
.max(r
,0)));
563 * @throws IOException
565 public static void main(String
[] args
) throws Exception
{
566 new HFilePerformanceEvaluation().runBenchmarks();
569 private String
getCipherName(Configuration conf
, String cipherName
) {
570 if (cipherName
.equals("aes")) {
571 String provider
= conf
.get(HConstants
.CRYPTO_CIPHERPROVIDER_CONF_KEY
);
572 if (provider
== null || provider
.equals("")
573 || provider
.equals(DefaultCipherProvider
.class.getName())) {
574 return "aes-default";
575 } else if (provider
.equals(CryptoCipherProvider
.class.getName())) {
576 return "aes-commons";