HBASE-24033 Add ut for loading the corrupt recovered hfiles (#1322)
[hbase.git] / hbase-server / src / test / java / org / apache / hadoop / hbase / tool / TestBulkLoadHFiles.java
blob78a82ed89cc1c33d314aa00280cdc7afeb2c3388
1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org.apache.hadoop.hbase.tool;
20 import static org.apache.hadoop.hbase.HBaseTestingUtility.countRows;
21 import static org.junit.Assert.assertArrayEquals;
22 import static org.junit.Assert.assertEquals;
23 import static org.junit.Assert.assertTrue;
24 import static org.junit.Assert.fail;
26 import java.io.IOException;
27 import java.nio.ByteBuffer;
28 import java.util.ArrayList;
29 import java.util.List;
30 import java.util.Locale;
31 import java.util.Map;
32 import java.util.TreeMap;
33 import org.apache.hadoop.conf.Configuration;
34 import org.apache.hadoop.fs.FSDataOutputStream;
35 import org.apache.hadoop.fs.FileStatus;
36 import org.apache.hadoop.fs.FileSystem;
37 import org.apache.hadoop.fs.Path;
38 import org.apache.hadoop.hbase.HBaseClassTestRule;
39 import org.apache.hadoop.hbase.HBaseTestingUtility;
40 import org.apache.hadoop.hbase.HConstants;
41 import org.apache.hadoop.hbase.NamespaceDescriptor;
42 import org.apache.hadoop.hbase.TableName;
43 import org.apache.hadoop.hbase.TableNotFoundException;
44 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
45 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
46 import org.apache.hadoop.hbase.client.Table;
47 import org.apache.hadoop.hbase.client.TableDescriptor;
48 import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
49 import org.apache.hadoop.hbase.codec.KeyValueCodecWithTags;
50 import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
51 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
52 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
53 import org.apache.hadoop.hbase.io.hfile.HFile;
54 import org.apache.hadoop.hbase.io.hfile.HFileScanner;
55 import org.apache.hadoop.hbase.regionserver.BloomType;
56 import org.apache.hadoop.hbase.testclassification.LargeTests;
57 import org.apache.hadoop.hbase.testclassification.MiscTests;
58 import org.apache.hadoop.hbase.util.Bytes;
59 import org.apache.hadoop.hbase.util.FSUtils;
60 import org.apache.hadoop.hbase.util.HFileTestUtil;
61 import org.junit.AfterClass;
62 import org.junit.BeforeClass;
63 import org.junit.ClassRule;
64 import org.junit.Rule;
65 import org.junit.Test;
66 import org.junit.experimental.categories.Category;
67 import org.junit.rules.TestName;
69 import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
71 /**
72 * Test cases for the "load" half of the HFileOutputFormat bulk load functionality. These tests run
73 * faster than the full MR cluster tests in TestHFileOutputFormat
75 @Category({ MiscTests.class, LargeTests.class })
76 public class TestBulkLoadHFiles {
78 @ClassRule
79 public static final HBaseClassTestRule CLASS_RULE =
80 HBaseClassTestRule.forClass(TestBulkLoadHFiles.class);
82 @Rule
83 public TestName tn = new TestName();
85 private static final byte[] QUALIFIER = Bytes.toBytes("myqual");
86 private static final byte[] FAMILY = Bytes.toBytes("myfam");
87 private static final String NAMESPACE = "bulkNS";
89 static final String EXPECTED_MSG_FOR_NON_EXISTING_FAMILY = "Unmatched family names found";
90 static final int MAX_FILES_PER_REGION_PER_FAMILY = 4;
92 private static final byte[][] SPLIT_KEYS =
93 new byte[][] { Bytes.toBytes("ddd"), Bytes.toBytes("ppp") };
95 static HBaseTestingUtility util = new HBaseTestingUtility();
97 @BeforeClass
98 public static void setUpBeforeClass() throws Exception {
99 util.getConfiguration().set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, "");
100 util.getConfiguration().setInt(BulkLoadHFiles.MAX_FILES_PER_REGION_PER_FAMILY,
101 MAX_FILES_PER_REGION_PER_FAMILY);
102 // change default behavior so that tag values are returned with normal rpcs
103 util.getConfiguration().set(HConstants.RPC_CODEC_CONF_KEY,
104 KeyValueCodecWithTags.class.getCanonicalName());
105 util.startMiniCluster();
107 setupNamespace();
110 protected static void setupNamespace() throws Exception {
111 util.getAdmin().createNamespace(NamespaceDescriptor.create(NAMESPACE).build());
114 @AfterClass
115 public static void tearDownAfterClass() throws Exception {
116 util.shutdownMiniCluster();
119 @Test
120 public void testSimpleLoadWithMap() throws Exception {
121 runTest("testSimpleLoadWithMap", BloomType.NONE,
122 new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
123 new byte[][] { Bytes.toBytes("ddd"), Bytes.toBytes("ooo") }, },
124 true);
128 * Test case that creates some regions and loads HFiles that fit snugly inside those regions
130 @Test
131 public void testSimpleLoad() throws Exception {
132 runTest("testSimpleLoad", BloomType.NONE,
133 new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
134 new byte[][] { Bytes.toBytes("ddd"), Bytes.toBytes("ooo") }, });
137 @Test
138 public void testSimpleLoadWithFileCopy() throws Exception {
139 String testName = tn.getMethodName();
140 final byte[] TABLE_NAME = Bytes.toBytes("mytable_" + testName);
141 runTest(testName, buildHTD(TableName.valueOf(TABLE_NAME), BloomType.NONE), false, null,
142 new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
143 new byte[][] { Bytes.toBytes("ddd"), Bytes.toBytes("ooo") }, },
144 false, true, 2);
148 * Test case that creates some regions and loads HFiles that cross the boundaries of those regions
150 @Test
151 public void testRegionCrossingLoad() throws Exception {
152 runTest("testRegionCrossingLoad", BloomType.NONE,
153 new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
154 new byte[][] { Bytes.toBytes("fff"), Bytes.toBytes("zzz") }, });
158 * Test loading into a column family that has a ROW bloom filter.
160 @Test
161 public void testRegionCrossingRowBloom() throws Exception {
162 runTest("testRegionCrossingLoadRowBloom", BloomType.ROW,
163 new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
164 new byte[][] { Bytes.toBytes("fff"), Bytes.toBytes("zzz") }, });
168 * Test loading into a column family that has a ROWCOL bloom filter.
170 @Test
171 public void testRegionCrossingRowColBloom() throws Exception {
172 runTest("testRegionCrossingLoadRowColBloom", BloomType.ROWCOL,
173 new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
174 new byte[][] { Bytes.toBytes("fff"), Bytes.toBytes("zzz") }, });
178 * Test case that creates some regions and loads HFiles that have different region boundaries than
179 * the table pre-split.
181 @Test
182 public void testSimpleHFileSplit() throws Exception {
183 runTest("testHFileSplit", BloomType.NONE,
184 new byte[][] { Bytes.toBytes("aaa"), Bytes.toBytes("fff"), Bytes.toBytes("jjj"),
185 Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"), },
186 new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("lll") },
187 new byte[][] { Bytes.toBytes("mmm"), Bytes.toBytes("zzz") }, });
191 * Test case that creates some regions and loads HFiles that cross the boundaries and have
192 * different region boundaries than the table pre-split.
194 @Test
195 public void testRegionCrossingHFileSplit() throws Exception {
196 testRegionCrossingHFileSplit(BloomType.NONE);
200 * Test case that creates some regions and loads HFiles that cross the boundaries have a ROW bloom
201 * filter and a different region boundaries than the table pre-split.
203 @Test
204 public void testRegionCrossingHFileSplitRowBloom() throws Exception {
205 testRegionCrossingHFileSplit(BloomType.ROW);
209 * Test case that creates some regions and loads HFiles that cross the boundaries have a ROWCOL
210 * bloom filter and a different region boundaries than the table pre-split.
212 @Test
213 public void testRegionCrossingHFileSplitRowColBloom() throws Exception {
214 testRegionCrossingHFileSplit(BloomType.ROWCOL);
217 @Test
218 public void testSplitALot() throws Exception {
219 runTest("testSplitALot", BloomType.NONE,
220 new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("bbb"), Bytes.toBytes("ccc"),
221 Bytes.toBytes("ddd"), Bytes.toBytes("eee"), Bytes.toBytes("fff"), Bytes.toBytes("ggg"),
222 Bytes.toBytes("hhh"), Bytes.toBytes("iii"), Bytes.toBytes("lll"), Bytes.toBytes("mmm"),
223 Bytes.toBytes("nnn"), Bytes.toBytes("ooo"), Bytes.toBytes("ppp"), Bytes.toBytes("qqq"),
224 Bytes.toBytes("rrr"), Bytes.toBytes("sss"), Bytes.toBytes("ttt"), Bytes.toBytes("uuu"),
225 Bytes.toBytes("vvv"), Bytes.toBytes("zzz"), },
226 new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("zzz") }, });
229 private void testRegionCrossingHFileSplit(BloomType bloomType) throws Exception {
230 runTest("testHFileSplit" + bloomType + "Bloom", bloomType,
231 new byte[][] { Bytes.toBytes("aaa"), Bytes.toBytes("fff"), Bytes.toBytes("jjj"),
232 Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"), },
233 new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
234 new byte[][] { Bytes.toBytes("fff"), Bytes.toBytes("zzz") }, });
237 private TableDescriptor buildHTD(TableName tableName, BloomType bloomType) {
238 return TableDescriptorBuilder.newBuilder(tableName)
239 .setColumnFamily(
240 ColumnFamilyDescriptorBuilder.newBuilder(FAMILY).setBloomFilterType(bloomType).build())
241 .build();
244 private void runTest(String testName, BloomType bloomType, byte[][][] hfileRanges)
245 throws Exception {
246 runTest(testName, bloomType, null, hfileRanges);
249 private void runTest(String testName, BloomType bloomType, byte[][][] hfileRanges, boolean useMap)
250 throws Exception {
251 runTest(testName, bloomType, null, hfileRanges, useMap);
254 private void runTest(String testName, BloomType bloomType, byte[][] tableSplitKeys,
255 byte[][][] hfileRanges) throws Exception {
256 runTest(testName, bloomType, tableSplitKeys, hfileRanges, false);
259 private void runTest(String testName, BloomType bloomType, byte[][] tableSplitKeys,
260 byte[][][] hfileRanges, boolean useMap) throws Exception {
261 final byte[] TABLE_NAME = Bytes.toBytes("mytable_" + testName);
262 final boolean preCreateTable = tableSplitKeys != null;
264 // Run the test bulkloading the table to the default namespace
265 final TableName TABLE_WITHOUT_NS = TableName.valueOf(TABLE_NAME);
266 runTest(testName, TABLE_WITHOUT_NS, bloomType, preCreateTable, tableSplitKeys, hfileRanges,
267 useMap, 2);
270 * Run the test bulkloading the table from a depth of 3 directory structure is now baseDirectory
271 * -- regionDir -- familyDir -- storeFileDir
273 if (preCreateTable) {
274 runTest(testName + 2, TABLE_WITHOUT_NS, bloomType, true, tableSplitKeys, hfileRanges, false,
278 // Run the test bulkloading the table to the specified namespace
279 final TableName TABLE_WITH_NS = TableName.valueOf(Bytes.toBytes(NAMESPACE), TABLE_NAME);
280 runTest(testName, TABLE_WITH_NS, bloomType, preCreateTable, tableSplitKeys, hfileRanges, useMap,
284 private void runTest(String testName, TableName tableName, BloomType bloomType,
285 boolean preCreateTable, byte[][] tableSplitKeys, byte[][][] hfileRanges, boolean useMap,
286 int depth) throws Exception {
287 TableDescriptor htd = buildHTD(tableName, bloomType);
288 runTest(testName, htd, preCreateTable, tableSplitKeys, hfileRanges, useMap, false, depth);
291 public static int loadHFiles(String testName, TableDescriptor htd, HBaseTestingUtility util,
292 byte[] fam, byte[] qual, boolean preCreateTable, byte[][] tableSplitKeys,
293 byte[][][] hfileRanges, boolean useMap, boolean deleteFile, boolean copyFiles,
294 int initRowCount, int factor) throws Exception {
295 return loadHFiles(testName, htd, util, fam, qual, preCreateTable, tableSplitKeys, hfileRanges,
296 useMap, deleteFile, copyFiles, initRowCount, factor, 2);
299 public static int loadHFiles(String testName, TableDescriptor htd, HBaseTestingUtility util,
300 byte[] fam, byte[] qual, boolean preCreateTable, byte[][] tableSplitKeys,
301 byte[][][] hfileRanges, boolean useMap, boolean deleteFile, boolean copyFiles,
302 int initRowCount, int factor, int depth) throws Exception {
303 Path baseDirectory = util.getDataTestDirOnTestFS(testName);
304 FileSystem fs = util.getTestFileSystem();
305 baseDirectory = baseDirectory.makeQualified(fs.getUri(), fs.getWorkingDirectory());
306 Path parentDir = baseDirectory;
307 if (depth == 3) {
308 assert !useMap;
309 parentDir = new Path(baseDirectory, "someRegion");
311 Path familyDir = new Path(parentDir, Bytes.toString(fam));
313 int hfileIdx = 0;
314 Map<byte[], List<Path>> map = null;
315 List<Path> list = null;
316 if (useMap || copyFiles) {
317 list = new ArrayList<>();
319 if (useMap) {
320 map = new TreeMap<>(Bytes.BYTES_COMPARATOR);
321 map.put(fam, list);
323 Path last = null;
324 for (byte[][] range : hfileRanges) {
325 byte[] from = range[0];
326 byte[] to = range[1];
327 Path path = new Path(familyDir, "hfile_" + hfileIdx++);
328 HFileTestUtil.createHFile(util.getConfiguration(), fs, path, fam, qual, from, to, factor);
329 if (useMap) {
330 last = path;
331 list.add(path);
334 int expectedRows = hfileIdx * factor;
336 TableName tableName = htd.getTableName();
337 if (!util.getAdmin().tableExists(tableName) && (preCreateTable || map != null)) {
338 if (tableSplitKeys != null) {
339 util.getAdmin().createTable(htd, tableSplitKeys);
340 } else {
341 util.getAdmin().createTable(htd);
345 Configuration conf = util.getConfiguration();
346 if (copyFiles) {
347 conf.setBoolean(BulkLoadHFiles.ALWAYS_COPY_FILES, true);
349 BulkLoadHFilesTool loader = new BulkLoadHFilesTool(conf);
350 List<String> args = Lists.newArrayList(baseDirectory.toString(), tableName.toString());
351 if (depth == 3) {
352 args.add("-loadTable");
355 if (useMap) {
356 if (deleteFile) {
357 fs.delete(last, true);
359 Map<BulkLoadHFiles.LoadQueueItem, ByteBuffer> loaded = loader.bulkLoad(tableName, map);
360 if (deleteFile) {
361 expectedRows -= 1000;
362 for (BulkLoadHFiles.LoadQueueItem item : loaded.keySet()) {
363 if (item.getFilePath().getName().equals(last.getName())) {
364 fail(last + " should be missing");
368 } else {
369 loader.run(args.toArray(new String[] {}));
372 if (copyFiles) {
373 for (Path p : list) {
374 assertTrue(p + " should exist", fs.exists(p));
378 try (Table table = util.getConnection().getTable(tableName)) {
379 assertEquals(initRowCount + expectedRows, countRows(table));
382 return expectedRows;
385 private void runTest(String testName, TableDescriptor htd, boolean preCreateTable,
386 byte[][] tableSplitKeys, byte[][][] hfileRanges, boolean useMap, boolean copyFiles, int depth)
387 throws Exception {
388 loadHFiles(testName, htd, util, FAMILY, QUALIFIER, preCreateTable, tableSplitKeys, hfileRanges,
389 useMap, true, copyFiles, 0, 1000, depth);
391 final TableName tableName = htd.getTableName();
392 // verify staging folder has been cleaned up
393 Path stagingBasePath =
394 new Path(FSUtils.getRootDir(util.getConfiguration()), HConstants.BULKLOAD_STAGING_DIR_NAME);
395 FileSystem fs = util.getTestFileSystem();
396 if (fs.exists(stagingBasePath)) {
397 FileStatus[] files = fs.listStatus(stagingBasePath);
398 for (FileStatus file : files) {
399 assertTrue("Folder=" + file.getPath() + " is not cleaned up.",
400 file.getPath().getName() != "DONOTERASE");
404 util.deleteTable(tableName);
408 * Test that tags survive through a bulk load that needs to split hfiles. This test depends on the
409 * "hbase.client.rpc.codec" = KeyValueCodecWithTags so that the client can get tags in the
410 * responses.
412 @Test
413 public void testTagsSurviveBulkLoadSplit() throws Exception {
414 Path dir = util.getDataTestDirOnTestFS(tn.getMethodName());
415 FileSystem fs = util.getTestFileSystem();
416 dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
417 Path familyDir = new Path(dir, Bytes.toString(FAMILY));
418 // table has these split points
419 byte[][] tableSplitKeys = new byte[][] { Bytes.toBytes("aaa"), Bytes.toBytes("fff"),
420 Bytes.toBytes("jjj"), Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"), };
422 // creating an hfile that has values that span the split points.
423 byte[] from = Bytes.toBytes("ddd");
424 byte[] to = Bytes.toBytes("ooo");
425 HFileTestUtil.createHFileWithTags(util.getConfiguration(), fs,
426 new Path(familyDir, tn.getMethodName() + "_hfile"), FAMILY, QUALIFIER, from, to, 1000);
427 int expectedRows = 1000;
429 TableName tableName = TableName.valueOf(tn.getMethodName());
430 TableDescriptor htd = buildHTD(tableName, BloomType.NONE);
431 util.getAdmin().createTable(htd, tableSplitKeys);
433 BulkLoadHFiles.create(util.getConfiguration()).bulkLoad(tableName, dir);
435 Table table = util.getConnection().getTable(tableName);
436 try {
437 assertEquals(expectedRows, countRows(table));
438 HFileTestUtil.verifyTags(table);
439 } finally {
440 table.close();
443 util.deleteTable(tableName);
447 * Test loading into a column family that does not exist.
449 @Test
450 public void testNonexistentColumnFamilyLoad() throws Exception {
451 String testName = tn.getMethodName();
452 byte[][][] hFileRanges =
453 new byte[][][] { new byte[][] { Bytes.toBytes("aaa"), Bytes.toBytes("ccc") },
454 new byte[][] { Bytes.toBytes("ddd"), Bytes.toBytes("ooo") }, };
456 byte[] TABLE = Bytes.toBytes("mytable_" + testName);
457 // set real family name to upper case in purpose to simulate the case that
458 // family name in HFiles is invalid
459 TableDescriptor htd = TableDescriptorBuilder.newBuilder(TableName.valueOf(TABLE))
460 .setColumnFamily(ColumnFamilyDescriptorBuilder
461 .of(Bytes.toBytes(new String(FAMILY).toUpperCase(Locale.ROOT))))
462 .build();
464 try {
465 runTest(testName, htd, true, SPLIT_KEYS, hFileRanges, false, false, 2);
466 assertTrue("Loading into table with non-existent family should have failed", false);
467 } catch (Exception e) {
468 assertTrue("IOException expected", e instanceof IOException);
469 // further check whether the exception message is correct
470 String errMsg = e.getMessage();
471 assertTrue(
472 "Incorrect exception message, expected message: [" + EXPECTED_MSG_FOR_NON_EXISTING_FAMILY +
473 "], current message: [" + errMsg + "]",
474 errMsg.contains(EXPECTED_MSG_FOR_NON_EXISTING_FAMILY));
478 @Test
479 public void testNonHfileFolderWithUnmatchedFamilyName() throws Exception {
480 testNonHfileFolder("testNonHfileFolderWithUnmatchedFamilyName", true);
483 @Test
484 public void testNonHfileFolder() throws Exception {
485 testNonHfileFolder("testNonHfileFolder", false);
489 * Write a random data file and a non-file in a dir with a valid family name but not part of the
490 * table families. we should we able to bulkload without getting the unmatched family exception.
491 * HBASE-13037/HBASE-13227
493 private void testNonHfileFolder(String tableName, boolean preCreateTable) throws Exception {
494 Path dir = util.getDataTestDirOnTestFS(tableName);
495 FileSystem fs = util.getTestFileSystem();
496 dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
498 Path familyDir = new Path(dir, Bytes.toString(FAMILY));
499 HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_0"), FAMILY,
500 QUALIFIER, Bytes.toBytes("begin"), Bytes.toBytes("end"), 500);
501 createRandomDataFile(fs, new Path(familyDir, "012356789"), 16 * 1024);
503 final String NON_FAMILY_FOLDER = "_logs";
504 Path nonFamilyDir = new Path(dir, NON_FAMILY_FOLDER);
505 fs.mkdirs(nonFamilyDir);
506 fs.mkdirs(new Path(nonFamilyDir, "non-file"));
507 createRandomDataFile(fs, new Path(nonFamilyDir, "012356789"), 16 * 1024);
509 Table table = null;
510 try {
511 if (preCreateTable) {
512 table = util.createTable(TableName.valueOf(tableName), FAMILY);
513 } else {
514 table = util.getConnection().getTable(TableName.valueOf(tableName));
516 BulkLoadHFiles.create(util.getConfiguration()).bulkLoad(TableName.valueOf(tableName), dir);
517 assertEquals(500, countRows(table));
518 } finally {
519 if (table != null) {
520 table.close();
522 fs.delete(dir, true);
526 private static void createRandomDataFile(FileSystem fs, Path path, int size) throws IOException {
527 FSDataOutputStream stream = fs.create(path);
528 try {
529 byte[] data = new byte[1024];
530 for (int i = 0; i < data.length; ++i) {
531 data[i] = (byte) (i & 0xff);
533 while (size >= data.length) {
534 stream.write(data, 0, data.length);
535 size -= data.length;
537 if (size > 0) {
538 stream.write(data, 0, size);
540 } finally {
541 stream.close();
545 @Test
546 public void testSplitStoreFile() throws IOException {
547 Path dir = util.getDataTestDirOnTestFS("testSplitHFile");
548 FileSystem fs = util.getTestFileSystem();
549 Path testIn = new Path(dir, "testhfile");
550 ColumnFamilyDescriptor familyDesc = ColumnFamilyDescriptorBuilder.of(FAMILY);
551 HFileTestUtil.createHFile(util.getConfiguration(), fs, testIn, FAMILY, QUALIFIER,
552 Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
554 Path bottomOut = new Path(dir, "bottom.out");
555 Path topOut = new Path(dir, "top.out");
557 BulkLoadHFilesTool.splitStoreFile(util.getConfiguration(), testIn, familyDesc,
558 Bytes.toBytes("ggg"), bottomOut, topOut);
560 int rowCount = verifyHFile(bottomOut);
561 rowCount += verifyHFile(topOut);
562 assertEquals(1000, rowCount);
565 @Test
566 public void testSplitStoreFileWithNoneToNone() throws IOException {
567 testSplitStoreFileWithDifferentEncoding(DataBlockEncoding.NONE, DataBlockEncoding.NONE);
570 @Test
571 public void testSplitStoreFileWithEncodedToEncoded() throws IOException {
572 testSplitStoreFileWithDifferentEncoding(DataBlockEncoding.DIFF, DataBlockEncoding.DIFF);
575 @Test
576 public void testSplitStoreFileWithEncodedToNone() throws IOException {
577 testSplitStoreFileWithDifferentEncoding(DataBlockEncoding.DIFF, DataBlockEncoding.NONE);
580 @Test
581 public void testSplitStoreFileWithNoneToEncoded() throws IOException {
582 testSplitStoreFileWithDifferentEncoding(DataBlockEncoding.NONE, DataBlockEncoding.DIFF);
585 private void testSplitStoreFileWithDifferentEncoding(DataBlockEncoding bulkloadEncoding,
586 DataBlockEncoding cfEncoding) throws IOException {
587 Path dir = util.getDataTestDirOnTestFS("testSplitHFileWithDifferentEncoding");
588 FileSystem fs = util.getTestFileSystem();
589 Path testIn = new Path(dir, "testhfile");
590 ColumnFamilyDescriptor familyDesc =
591 ColumnFamilyDescriptorBuilder.newBuilder(FAMILY).setDataBlockEncoding(cfEncoding).build();
592 HFileTestUtil.createHFileWithDataBlockEncoding(util.getConfiguration(), fs, testIn,
593 bulkloadEncoding, FAMILY, QUALIFIER, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
595 Path bottomOut = new Path(dir, "bottom.out");
596 Path topOut = new Path(dir, "top.out");
598 BulkLoadHFilesTool.splitStoreFile(util.getConfiguration(), testIn, familyDesc,
599 Bytes.toBytes("ggg"), bottomOut, topOut);
601 int rowCount = verifyHFile(bottomOut);
602 rowCount += verifyHFile(topOut);
603 assertEquals(1000, rowCount);
606 private int verifyHFile(Path p) throws IOException {
607 Configuration conf = util.getConfiguration();
608 HFile.Reader reader =
609 HFile.createReader(p.getFileSystem(conf), p, new CacheConfig(conf), true, conf);
610 HFileScanner scanner = reader.getScanner(false, false);
611 scanner.seekTo();
612 int count = 0;
613 do {
614 count++;
615 } while (scanner.next());
616 assertTrue(count > 0);
617 reader.close();
618 return count;
621 private void addStartEndKeysForTest(TreeMap<byte[], Integer> map, byte[] first, byte[] last) {
622 Integer value = map.containsKey(first) ? map.get(first) : 0;
623 map.put(first, value + 1);
625 value = map.containsKey(last) ? map.get(last) : 0;
626 map.put(last, value - 1);
629 @Test
630 public void testInferBoundaries() {
631 TreeMap<byte[], Integer> map = new TreeMap<>(Bytes.BYTES_COMPARATOR);
634 * Toy example c---------i o------p s---------t v------x a------e g-----k m-------------q r----s
635 * u----w Should be inferred as: a-----------------k m-------------q r--------------t
636 * u---------x The output should be (m,r,u)
639 String first;
640 String last;
642 first = "a";
643 last = "e";
644 addStartEndKeysForTest(map, Bytes.toBytes(first), Bytes.toBytes(last));
646 first = "r";
647 last = "s";
648 addStartEndKeysForTest(map, Bytes.toBytes(first), Bytes.toBytes(last));
650 first = "o";
651 last = "p";
652 addStartEndKeysForTest(map, Bytes.toBytes(first), Bytes.toBytes(last));
654 first = "g";
655 last = "k";
656 addStartEndKeysForTest(map, Bytes.toBytes(first), Bytes.toBytes(last));
658 first = "v";
659 last = "x";
660 addStartEndKeysForTest(map, Bytes.toBytes(first), Bytes.toBytes(last));
662 first = "c";
663 last = "i";
664 addStartEndKeysForTest(map, Bytes.toBytes(first), Bytes.toBytes(last));
666 first = "m";
667 last = "q";
668 addStartEndKeysForTest(map, Bytes.toBytes(first), Bytes.toBytes(last));
670 first = "s";
671 last = "t";
672 addStartEndKeysForTest(map, Bytes.toBytes(first), Bytes.toBytes(last));
674 first = "u";
675 last = "w";
676 addStartEndKeysForTest(map, Bytes.toBytes(first), Bytes.toBytes(last));
678 byte[][] keysArray = BulkLoadHFilesTool.inferBoundaries(map);
679 byte[][] compare = new byte[3][];
680 compare[0] = Bytes.toBytes("m");
681 compare[1] = Bytes.toBytes("r");
682 compare[2] = Bytes.toBytes("u");
684 assertEquals(3, keysArray.length);
686 for (int row = 0; row < keysArray.length; row++) {
687 assertArrayEquals(keysArray[row], compare[row]);
691 @Test
692 public void testLoadTooMayHFiles() throws Exception {
693 Path dir = util.getDataTestDirOnTestFS("testLoadTooMayHFiles");
694 FileSystem fs = util.getTestFileSystem();
695 dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
696 Path familyDir = new Path(dir, Bytes.toString(FAMILY));
698 byte[] from = Bytes.toBytes("begin");
699 byte[] to = Bytes.toBytes("end");
700 for (int i = 0; i <= MAX_FILES_PER_REGION_PER_FAMILY; i++) {
701 HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_" + i),
702 FAMILY, QUALIFIER, from, to, 1000);
705 try {
706 BulkLoadHFiles.create(util.getConfiguration())
707 .bulkLoad(TableName.valueOf("mytable_testLoadTooMayHFiles"), dir);
708 fail("Bulk loading too many files should fail");
709 } catch (IOException ie) {
710 assertTrue(ie.getMessage()
711 .contains("Trying to load more than " + MAX_FILES_PER_REGION_PER_FAMILY + " hfiles"));
715 @Test(expected = TableNotFoundException.class)
716 public void testWithoutAnExistingTableAndCreateTableSetToNo() throws Exception {
717 Configuration conf = util.getConfiguration();
718 conf.set(BulkLoadHFiles.CREATE_TABLE_CONF_KEY, "no");
719 BulkLoadHFilesTool loader = new BulkLoadHFilesTool(conf);
720 String[] args = { "directory", "nonExistingTable" };
721 loader.run(args);
724 @Test
725 public void testTableWithCFNameStartWithUnderScore() throws Exception {
726 Path dir = util.getDataTestDirOnTestFS("cfNameStartWithUnderScore");
727 FileSystem fs = util.getTestFileSystem();
728 dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
729 String family = "_cf";
730 Path familyDir = new Path(dir, family);
732 byte[] from = Bytes.toBytes("begin");
733 byte[] to = Bytes.toBytes("end");
734 Configuration conf = util.getConfiguration();
735 String tableName = tn.getMethodName();
736 try (Table table = util.createTable(TableName.valueOf(tableName), family)) {
737 HFileTestUtil.createHFile(conf, fs, new Path(familyDir, "hfile"), Bytes.toBytes(family),
738 QUALIFIER, from, to, 1000);
739 BulkLoadHFiles.create(conf).bulkLoad(table.getName(), dir);
740 assertEquals(1000, countRows(table));