HBASE-24033 Add ut for loading the corrupt recovered hfiles (#1322)
[hbase.git] / hbase-server / src / test / java / org / apache / hadoop / hbase / HFilePerformanceEvaluation.java
blob2c4209ce8db8c6e1387b9ad9a3eaa6dc59e8ee60
1 /**
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
19 package org.apache.hadoop.hbase;
21 import java.io.IOException;
22 import java.security.SecureRandom;
23 import java.util.Random;
25 import org.apache.commons.math3.random.RandomData;
26 import org.apache.commons.math3.random.RandomDataImpl;
27 import org.apache.hadoop.conf.Configuration;
28 import org.apache.hadoop.fs.FileSystem;
29 import org.apache.hadoop.fs.Path;
30 import org.apache.yetus.audience.InterfaceAudience;
31 import org.slf4j.Logger;
32 import org.slf4j.LoggerFactory;
33 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
34 import org.apache.hadoop.hbase.io.crypto.CryptoCipherProvider;
35 import org.apache.hadoop.hbase.io.crypto.DefaultCipherProvider;
36 import org.apache.hadoop.hbase.io.crypto.Encryption;
37 import org.apache.hadoop.hbase.io.crypto.KeyProviderForTesting;
38 import org.apache.hadoop.hbase.io.crypto.aes.AES;
39 import org.apache.hadoop.hbase.io.hfile.HFileWriterImpl;
40 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
41 import org.apache.hadoop.hbase.io.hfile.HFile;
42 import org.apache.hadoop.hbase.io.hfile.HFileContext;
43 import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
44 import org.apache.hadoop.hbase.io.hfile.HFileScanner;
45 import org.apache.hadoop.hbase.util.Bytes;
47 /**
48 * This class runs performance benchmarks for {@link HFile}.
50 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
51 public class HFilePerformanceEvaluation {
52 private static final int ROW_LENGTH = 10;
53 private static final int ROW_COUNT = 1000000;
54 private static final int RFILE_BLOCKSIZE = 8 * 1024;
55 private static StringBuilder testSummary = new StringBuilder();
57 // Disable verbose INFO logging from org.apache.hadoop.io.compress.CodecPool
58 static {
59 System.setProperty("org.apache.commons.logging.Log",
60 "org.apache.commons.logging.impl.SimpleLog");
61 System.setProperty("org.apache.commons.logging.simplelog.log.org.apache.hadoop.io.compress.CodecPool",
62 "WARN");
65 private static final Logger LOG =
66 LoggerFactory.getLogger(HFilePerformanceEvaluation.class.getName());
68 static byte [] format(final int i) {
69 String v = Integer.toString(i);
70 return Bytes.toBytes("0000000000".substring(v.length()) + v);
73 static ImmutableBytesWritable format(final int i, ImmutableBytesWritable w) {
74 w.set(format(i));
75 return w;
78 static Cell createCell(final int i) {
79 return createCell(i, HConstants.EMPTY_BYTE_ARRAY);
82 /**
83 * HFile is Cell-based. It used to be byte arrays. Doing this test, pass Cells. All Cells
84 * intentionally have same coordinates in all fields but row.
85 * @param i Integer to format as a row Key.
86 * @param value Value to use
87 * @return Created Cell.
89 static Cell createCell(final int i, final byte [] value) {
90 return createCell(format(i), value);
93 static Cell createCell(final byte [] keyRow) {
94 return ExtendedCellBuilderFactory.create(CellBuilderType.DEEP_COPY)
95 .setRow(keyRow)
96 .setFamily(HConstants.EMPTY_BYTE_ARRAY)
97 .setQualifier(HConstants.EMPTY_BYTE_ARRAY)
98 .setTimestamp(HConstants.LATEST_TIMESTAMP)
99 .setType(KeyValue.Type.Maximum.getCode())
100 .setValue(HConstants.EMPTY_BYTE_ARRAY)
101 .build();
104 static Cell createCell(final byte [] keyRow, final byte [] value) {
105 return ExtendedCellBuilderFactory.create(CellBuilderType.DEEP_COPY)
106 .setRow(keyRow)
107 .setFamily(HConstants.EMPTY_BYTE_ARRAY)
108 .setQualifier(HConstants.EMPTY_BYTE_ARRAY)
109 .setTimestamp(HConstants.LATEST_TIMESTAMP)
110 .setType(KeyValue.Type.Maximum.getCode())
111 .setValue(value)
112 .build();
116 * Add any supported codec or cipher to test the HFile read/write performance.
117 * Specify "none" to disable codec or cipher or both.
118 * @throws Exception
120 private void runBenchmarks() throws Exception {
121 final Configuration conf = new Configuration();
122 final FileSystem fs = FileSystem.get(conf);
123 final Path mf = fs.makeQualified(new Path("performanceevaluation.mapfile"));
125 // codec=none cipher=none
126 runWriteBenchmark(conf, fs, mf, "none", "none");
127 runReadBenchmark(conf, fs, mf, "none", "none");
129 // codec=gz cipher=none
130 runWriteBenchmark(conf, fs, mf, "gz", "none");
131 runReadBenchmark(conf, fs, mf, "gz", "none");
133 // Add configuration for AES cipher
134 final Configuration aesconf = new Configuration();
135 aesconf.set(HConstants.CRYPTO_KEYPROVIDER_CONF_KEY, KeyProviderForTesting.class.getName());
136 aesconf.set(HConstants.CRYPTO_MASTERKEY_NAME_CONF_KEY, "hbase");
137 aesconf.setInt("hfile.format.version", 3);
138 final FileSystem aesfs = FileSystem.get(aesconf);
139 final Path aesmf = aesfs.makeQualified(new Path("performanceevaluation.aes.mapfile"));
141 // codec=none cipher=aes
142 runWriteBenchmark(aesconf, aesfs, aesmf, "none", "aes");
143 runReadBenchmark(aesconf, aesfs, aesmf, "none", "aes");
145 // codec=gz cipher=aes
146 runWriteBenchmark(aesconf, aesfs, aesmf, "gz", "aes");
147 runReadBenchmark(aesconf, aesfs, aesmf, "gz", "aes");
149 // Add configuration for Commons cipher
150 final Configuration cryptoconf = new Configuration();
151 cryptoconf.set(HConstants.CRYPTO_KEYPROVIDER_CONF_KEY, KeyProviderForTesting.class.getName());
152 cryptoconf.set(HConstants.CRYPTO_MASTERKEY_NAME_CONF_KEY, "hbase");
153 cryptoconf.setInt("hfile.format.version", 3);
154 cryptoconf.set(HConstants.CRYPTO_CIPHERPROVIDER_CONF_KEY, CryptoCipherProvider.class.getName());
155 final FileSystem cryptofs = FileSystem.get(cryptoconf);
156 final Path cryptof = cryptofs.makeQualified(new Path("performanceevaluation.aes.mapfile"));
158 // codec=none cipher=aes
159 runWriteBenchmark(cryptoconf, cryptofs, aesmf, "none", "aes");
160 runReadBenchmark(cryptoconf, cryptofs, aesmf, "none", "aes");
162 // codec=gz cipher=aes
163 runWriteBenchmark(cryptoconf, aesfs, aesmf, "gz", "aes");
164 runReadBenchmark(cryptoconf, aesfs, aesmf, "gz", "aes");
166 // cleanup test files
167 if (fs.exists(mf)) {
168 fs.delete(mf, true);
170 if (aesfs.exists(aesmf)) {
171 aesfs.delete(aesmf, true);
173 if (cryptofs.exists(aesmf)) {
174 cryptofs.delete(cryptof, true);
177 // Print Result Summary
178 LOG.info("\n***************\n" + "Result Summary" + "\n***************\n");
179 LOG.info(testSummary.toString());
184 * Write a test HFile with the given codec & cipher
185 * @param conf
186 * @param fs
187 * @param mf
188 * @param codec "none", "lzo", "gz", "snappy"
189 * @param cipher "none", "aes"
190 * @throws Exception
192 private void runWriteBenchmark(Configuration conf, FileSystem fs, Path mf, String codec,
193 String cipher) throws Exception {
194 if (fs.exists(mf)) {
195 fs.delete(mf, true);
198 runBenchmark(new SequentialWriteBenchmark(conf, fs, mf, ROW_COUNT, codec, cipher),
199 ROW_COUNT, codec, getCipherName(conf, cipher));
204 * Run all the read benchmarks for the test HFile
205 * @param conf
206 * @param fs
207 * @param mf
208 * @param codec "none", "lzo", "gz", "snappy"
209 * @param cipher "none", "aes"
211 private void runReadBenchmark(final Configuration conf, final FileSystem fs, final Path mf,
212 final String codec, final String cipher) {
213 PerformanceEvaluationCommons.concurrentReads(new Runnable() {
214 @Override
215 public void run() {
216 try {
217 runBenchmark(new UniformRandomSmallScan(conf, fs, mf, ROW_COUNT),
218 ROW_COUNT, codec, getCipherName(conf, cipher));
219 } catch (Exception e) {
220 testSummary.append("UniformRandomSmallScan failed " + e.getMessage());
221 e.printStackTrace();
226 PerformanceEvaluationCommons.concurrentReads(new Runnable() {
227 @Override
228 public void run() {
229 try {
230 runBenchmark(new UniformRandomReadBenchmark(conf, fs, mf, ROW_COUNT),
231 ROW_COUNT, codec, getCipherName(conf, cipher));
232 } catch (Exception e) {
233 testSummary.append("UniformRandomReadBenchmark failed " + e.getMessage());
234 e.printStackTrace();
239 PerformanceEvaluationCommons.concurrentReads(new Runnable() {
240 @Override
241 public void run() {
242 try {
243 runBenchmark(new GaussianRandomReadBenchmark(conf, fs, mf, ROW_COUNT),
244 ROW_COUNT, codec, getCipherName(conf, cipher));
245 } catch (Exception e) {
246 testSummary.append("GaussianRandomReadBenchmark failed " + e.getMessage());
247 e.printStackTrace();
252 PerformanceEvaluationCommons.concurrentReads(new Runnable() {
253 @Override
254 public void run() {
255 try {
256 runBenchmark(new SequentialReadBenchmark(conf, fs, mf, ROW_COUNT),
257 ROW_COUNT, codec, getCipherName(conf, cipher));
258 } catch (Exception e) {
259 testSummary.append("SequentialReadBenchmark failed " + e.getMessage());
260 e.printStackTrace();
263 });
267 protected void runBenchmark(RowOrientedBenchmark benchmark, int rowCount,
268 String codec, String cipher) throws Exception {
269 LOG.info("Running " + benchmark.getClass().getSimpleName() + " with codec[" +
270 codec + "] " + "cipher[" + cipher + "] for " + rowCount + " rows.");
272 long elapsedTime = benchmark.run();
274 LOG.info("Running " + benchmark.getClass().getSimpleName() + " with codec[" +
275 codec + "] " + "cipher[" + cipher + "] for " + rowCount + " rows took " +
276 elapsedTime + "ms.");
278 // Store results to print summary at the end
279 testSummary.append("Running ").append(benchmark.getClass().getSimpleName())
280 .append(" with codec[").append(codec).append("] cipher[").append(cipher)
281 .append("] for ").append(rowCount).append(" rows took ").append(elapsedTime)
282 .append("ms.").append("\n");
285 static abstract class RowOrientedBenchmark {
287 protected final Configuration conf;
288 protected final FileSystem fs;
289 protected final Path mf;
290 protected final int totalRows;
291 protected String codec = "none";
292 protected String cipher = "none";
294 public RowOrientedBenchmark(Configuration conf, FileSystem fs, Path mf,
295 int totalRows, String codec, String cipher) {
296 this.conf = conf;
297 this.fs = fs;
298 this.mf = mf;
299 this.totalRows = totalRows;
300 this.codec = codec;
301 this.cipher = cipher;
304 public RowOrientedBenchmark(Configuration conf, FileSystem fs, Path mf,
305 int totalRows) {
306 this.conf = conf;
307 this.fs = fs;
308 this.mf = mf;
309 this.totalRows = totalRows;
312 void setUp() throws Exception {
313 // do nothing
316 abstract void doRow(int i) throws Exception;
318 protected int getReportingPeriod() {
319 return this.totalRows / 10;
322 void tearDown() throws Exception {
323 // do nothing
327 * Run benchmark
328 * @return elapsed time.
329 * @throws Exception
331 long run() throws Exception {
332 long elapsedTime;
333 setUp();
334 long startTime = System.currentTimeMillis();
335 try {
336 for (int i = 0; i < totalRows; i++) {
337 if (i > 0 && i % getReportingPeriod() == 0) {
338 LOG.info("Processed " + i + " rows.");
340 doRow(i);
342 elapsedTime = System.currentTimeMillis() - startTime;
343 } finally {
344 tearDown();
346 return elapsedTime;
351 static class SequentialWriteBenchmark extends RowOrientedBenchmark {
352 protected HFile.Writer writer;
353 private Random random = new Random();
354 private byte[] bytes = new byte[ROW_LENGTH];
356 public SequentialWriteBenchmark(Configuration conf, FileSystem fs, Path mf,
357 int totalRows, String codec, String cipher) {
358 super(conf, fs, mf, totalRows, codec, cipher);
361 @Override
362 void setUp() throws Exception {
364 HFileContextBuilder builder = new HFileContextBuilder()
365 .withCompression(HFileWriterImpl.compressionByName(codec))
366 .withBlockSize(RFILE_BLOCKSIZE);
368 if (cipher == "aes") {
369 byte[] cipherKey = new byte[AES.KEY_LENGTH];
370 new SecureRandom().nextBytes(cipherKey);
371 builder.withEncryptionContext(Encryption.newContext(conf)
372 .setCipher(Encryption.getCipher(conf, cipher))
373 .setKey(cipherKey));
374 } else if (!"none".equals(cipher)) {
375 throw new IOException("Cipher " + cipher + " not supported.");
378 HFileContext hFileContext = builder.build();
380 writer = HFile.getWriterFactoryNoCache(conf)
381 .withPath(fs, mf)
382 .withFileContext(hFileContext)
383 .create();
386 @Override
387 void doRow(int i) throws Exception {
388 writer.append(createCell(i, generateValue()));
391 private byte[] generateValue() {
392 random.nextBytes(bytes);
393 return bytes;
396 @Override
397 protected int getReportingPeriod() {
398 return this.totalRows; // don't report progress
401 @Override
402 void tearDown() throws Exception {
403 writer.close();
408 static abstract class ReadBenchmark extends RowOrientedBenchmark {
410 protected HFile.Reader reader;
412 public ReadBenchmark(Configuration conf, FileSystem fs, Path mf,
413 int totalRows) {
414 super(conf, fs, mf, totalRows);
417 @Override
418 void setUp() throws Exception {
419 reader = HFile.createReader(this.fs, this.mf, new CacheConfig(this.conf), true, this.conf);
422 @Override
423 void tearDown() throws Exception {
424 reader.close();
429 static class SequentialReadBenchmark extends ReadBenchmark {
430 private HFileScanner scanner;
432 public SequentialReadBenchmark(Configuration conf, FileSystem fs,
433 Path mf, int totalRows) {
434 super(conf, fs, mf, totalRows);
437 @Override
438 void setUp() throws Exception {
439 super.setUp();
440 this.scanner = this.reader.getScanner(false, false);
441 this.scanner.seekTo();
444 @Override
445 void doRow(int i) throws Exception {
446 if (this.scanner.next()) {
447 // TODO: Fix. Make Scanner do Cells.
448 Cell c = this.scanner.getCell();
449 PerformanceEvaluationCommons.assertKey(format(i + 1), c);
450 PerformanceEvaluationCommons.assertValueSize(c.getValueLength(), ROW_LENGTH);
454 @Override
455 protected int getReportingPeriod() {
456 return this.totalRows; // don't report progress
461 static class UniformRandomReadBenchmark extends ReadBenchmark {
463 private Random random = new Random();
465 public UniformRandomReadBenchmark(Configuration conf, FileSystem fs,
466 Path mf, int totalRows) {
467 super(conf, fs, mf, totalRows);
470 @Override
471 void doRow(int i) throws Exception {
472 HFileScanner scanner = this.reader.getScanner(false, true);
473 byte [] b = getRandomRow();
474 if (scanner.seekTo(createCell(b)) < 0) {
475 LOG.info("Not able to seekTo " + new String(b));
476 return;
478 // TODO: Fix scanner so it does Cells
479 Cell c = scanner.getCell();
480 PerformanceEvaluationCommons.assertKey(b, c);
481 PerformanceEvaluationCommons.assertValueSize(c.getValueLength(), ROW_LENGTH);
484 private byte [] getRandomRow() {
485 return format(random.nextInt(totalRows));
489 static class UniformRandomSmallScan extends ReadBenchmark {
490 private Random random = new Random();
492 public UniformRandomSmallScan(Configuration conf, FileSystem fs,
493 Path mf, int totalRows) {
494 super(conf, fs, mf, totalRows/10);
497 @Override
498 void doRow(int i) throws Exception {
499 HFileScanner scanner = this.reader.getScanner(false, false);
500 byte [] b = getRandomRow();
501 // System.out.println("Random row: " + new String(b));
502 Cell c = createCell(b);
503 if (scanner.seekTo(c) != 0) {
504 LOG.info("Nonexistent row: " + new String(b));
505 return;
507 // TODO: HFileScanner doesn't do Cells yet. Temporary fix.
508 c = scanner.getCell();
509 // System.out.println("Found row: " +
510 // new String(c.getRowArray(), c.getRowOffset(), c.getRowLength()));
511 PerformanceEvaluationCommons.assertKey(b, c);
512 for (int ii = 0; ii < 30; ii++) {
513 if (!scanner.next()) {
514 LOG.info("NOTHING FOLLOWS");
515 return;
517 c = scanner.getCell();
518 PerformanceEvaluationCommons.assertValueSize(c.getValueLength(), ROW_LENGTH);
522 private byte [] getRandomRow() {
523 return format(random.nextInt(totalRows));
527 static class GaussianRandomReadBenchmark extends ReadBenchmark {
529 private RandomData randomData = new RandomDataImpl();
531 public GaussianRandomReadBenchmark(Configuration conf, FileSystem fs,
532 Path mf, int totalRows) {
533 super(conf, fs, mf, totalRows);
536 @Override
537 void doRow(int i) throws Exception {
538 HFileScanner scanner = this.reader.getScanner(false, true);
539 byte[] gaussianRandomRowBytes = getGaussianRandomRowBytes();
540 scanner.seekTo(createCell(gaussianRandomRowBytes));
541 for (int ii = 0; ii < 30; ii++) {
542 if (!scanner.next()) {
543 LOG.info("NOTHING FOLLOWS");
544 return;
546 // TODO: Fix. Make scanner do Cells.
547 scanner.getCell();
551 private byte [] getGaussianRandomRowBytes() {
552 int r = (int) randomData.nextGaussian((double)totalRows / 2.0,
553 (double)totalRows / 10.0);
554 // make sure r falls into [0,totalRows)
555 return format(Math.min(totalRows, Math.max(r,0)));
560 * @param args
561 * @throws Exception
562 * @throws IOException
564 public static void main(String[] args) throws Exception {
565 new HFilePerformanceEvaluation().runBenchmarks();
568 private String getCipherName(Configuration conf, String cipherName) {
569 if (cipherName.equals("aes")) {
570 String provider = conf.get(HConstants.CRYPTO_CIPHERPROVIDER_CONF_KEY);
571 if (provider == null || provider.equals("")
572 || provider.equals(DefaultCipherProvider.class.getName())) {
573 return "aes-default";
574 } else if (provider.equals(CryptoCipherProvider.class.getName())) {
575 return "aes-commons";
578 return cipherName;