HBASE-26921 Rewrite the counting cells part in TestMultiVersions (#4316)
[hbase.git] / hbase-server / src / test / java / org / apache / hadoop / hbase / tool / TestBulkLoadHFiles.java
blobffc09bccd6be945f9e40df0b383995d175cfef85
1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org.apache.hadoop.hbase.tool;
20 import static org.apache.hadoop.hbase.HBaseTestingUtil.countRows;
21 import static org.junit.Assert.assertArrayEquals;
22 import static org.junit.Assert.assertEquals;
23 import static org.junit.Assert.assertTrue;
24 import static org.junit.Assert.fail;
26 import java.io.IOException;
27 import java.nio.ByteBuffer;
28 import java.util.ArrayList;
29 import java.util.Collection;
30 import java.util.List;
31 import java.util.Locale;
32 import java.util.Map;
33 import java.util.TreeMap;
34 import java.util.concurrent.CompletableFuture;
35 import java.util.concurrent.atomic.AtomicInteger;
36 import org.apache.hadoop.conf.Configuration;
37 import org.apache.hadoop.fs.FSDataOutputStream;
38 import org.apache.hadoop.fs.FileStatus;
39 import org.apache.hadoop.fs.FileSystem;
40 import org.apache.hadoop.fs.Path;
41 import org.apache.hadoop.hbase.HBaseClassTestRule;
42 import org.apache.hadoop.hbase.HBaseTestingUtil;
43 import org.apache.hadoop.hbase.HConstants;
44 import org.apache.hadoop.hbase.NamespaceDescriptor;
45 import org.apache.hadoop.hbase.TableName;
46 import org.apache.hadoop.hbase.TableNotFoundException;
47 import org.apache.hadoop.hbase.client.AsyncClusterConnection;
48 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
49 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
50 import org.apache.hadoop.hbase.client.Table;
51 import org.apache.hadoop.hbase.client.TableDescriptor;
52 import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
53 import org.apache.hadoop.hbase.codec.KeyValueCodecWithTags;
54 import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
55 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
56 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
57 import org.apache.hadoop.hbase.io.hfile.HFile;
58 import org.apache.hadoop.hbase.io.hfile.HFileScanner;
59 import org.apache.hadoop.hbase.regionserver.BloomType;
60 import org.apache.hadoop.hbase.testclassification.LargeTests;
61 import org.apache.hadoop.hbase.testclassification.MiscTests;
62 import org.apache.hadoop.hbase.util.Bytes;
63 import org.apache.hadoop.hbase.util.CommonFSUtils;
64 import org.apache.hadoop.hbase.util.HFileTestUtil;
65 import org.junit.AfterClass;
66 import org.junit.BeforeClass;
67 import org.junit.ClassRule;
68 import org.junit.Rule;
69 import org.junit.Test;
70 import org.junit.experimental.categories.Category;
71 import org.junit.rules.TestName;
73 import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
75 /**
76 * Test cases for the "load" half of the HFileOutputFormat bulk load functionality. These tests run
77 * faster than the full MR cluster tests in TestHFileOutputFormat
79 @Category({ MiscTests.class, LargeTests.class })
80 public class TestBulkLoadHFiles {
82 @ClassRule
83 public static final HBaseClassTestRule CLASS_RULE =
84 HBaseClassTestRule.forClass(TestBulkLoadHFiles.class);
86 @Rule
87 public TestName tn = new TestName();
89 private static final byte[] QUALIFIER = Bytes.toBytes("myqual");
90 private static final byte[] FAMILY = Bytes.toBytes("myfam");
91 private static final String NAMESPACE = "bulkNS";
93 static final String EXPECTED_MSG_FOR_NON_EXISTING_FAMILY = "Unmatched family names found";
94 static final int MAX_FILES_PER_REGION_PER_FAMILY = 4;
96 private static final byte[][] SPLIT_KEYS =
97 new byte[][] { Bytes.toBytes("ddd"), Bytes.toBytes("ppp") };
99 static HBaseTestingUtil util = new HBaseTestingUtil();
101 @BeforeClass
102 public static void setUpBeforeClass() throws Exception {
103 util.getConfiguration().set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, "");
104 util.getConfiguration().setInt(BulkLoadHFiles.MAX_FILES_PER_REGION_PER_FAMILY,
105 MAX_FILES_PER_REGION_PER_FAMILY);
106 // change default behavior so that tag values are returned with normal rpcs
107 util.getConfiguration().set(HConstants.RPC_CODEC_CONF_KEY,
108 KeyValueCodecWithTags.class.getCanonicalName());
109 util.startMiniCluster();
111 setupNamespace();
114 protected static void setupNamespace() throws Exception {
115 util.getAdmin().createNamespace(NamespaceDescriptor.create(NAMESPACE).build());
118 @AfterClass
119 public static void tearDownAfterClass() throws Exception {
120 util.shutdownMiniCluster();
123 @Test
124 public void testSimpleLoadWithMap() throws Exception {
125 runTest("testSimpleLoadWithMap", BloomType.NONE,
126 new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
127 new byte[][] { Bytes.toBytes("ddd"), Bytes.toBytes("ooo") }, },
128 true);
132 * Test case that creates some regions and loads HFiles that fit snugly inside those regions
134 @Test
135 public void testSimpleLoad() throws Exception {
136 runTest("testSimpleLoad", BloomType.NONE,
137 new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
138 new byte[][] { Bytes.toBytes("ddd"), Bytes.toBytes("ooo") }, });
141 @Test
142 public void testSimpleLoadWithFileCopy() throws Exception {
143 String testName = tn.getMethodName();
144 final byte[] TABLE_NAME = Bytes.toBytes("mytable_" + testName);
145 runTest(testName, buildHTD(TableName.valueOf(TABLE_NAME), BloomType.NONE), false, null,
146 new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
147 new byte[][] { Bytes.toBytes("ddd"), Bytes.toBytes("ooo") }, },
148 false, true, 2);
152 * Test case that creates some regions and loads HFiles that cross the boundaries of those regions
154 @Test
155 public void testRegionCrossingLoad() throws Exception {
156 runTest("testRegionCrossingLoad", BloomType.NONE,
157 new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
158 new byte[][] { Bytes.toBytes("fff"), Bytes.toBytes("zzz") }, });
162 * Test loading into a column family that has a ROW bloom filter.
164 @Test
165 public void testRegionCrossingRowBloom() throws Exception {
166 runTest("testRegionCrossingLoadRowBloom", BloomType.ROW,
167 new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
168 new byte[][] { Bytes.toBytes("fff"), Bytes.toBytes("zzz") }, });
172 * Test loading into a column family that has a ROWCOL bloom filter.
174 @Test
175 public void testRegionCrossingRowColBloom() throws Exception {
176 runTest("testRegionCrossingLoadRowColBloom", BloomType.ROWCOL,
177 new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
178 new byte[][] { Bytes.toBytes("fff"), Bytes.toBytes("zzz") }, });
182 * Test case that creates some regions and loads HFiles that have different region boundaries than
183 * the table pre-split.
185 @Test
186 public void testSimpleHFileSplit() throws Exception {
187 runTest("testHFileSplit", BloomType.NONE,
188 new byte[][] { Bytes.toBytes("aaa"), Bytes.toBytes("fff"), Bytes.toBytes("jjj"),
189 Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"), },
190 new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("lll") },
191 new byte[][] { Bytes.toBytes("mmm"), Bytes.toBytes("zzz") }, });
195 * Test case that creates some regions and loads HFiles that cross the boundaries and have
196 * different region boundaries than the table pre-split.
198 @Test
199 public void testRegionCrossingHFileSplit() throws Exception {
200 testRegionCrossingHFileSplit(BloomType.NONE);
204 * Test case that creates some regions and loads HFiles that cross the boundaries have a ROW bloom
205 * filter and a different region boundaries than the table pre-split.
207 @Test
208 public void testRegionCrossingHFileSplitRowBloom() throws Exception {
209 testRegionCrossingHFileSplit(BloomType.ROW);
213 * Test case that creates some regions and loads HFiles that cross the boundaries have a ROWCOL
214 * bloom filter and a different region boundaries than the table pre-split.
216 @Test
217 public void testRegionCrossingHFileSplitRowColBloom() throws Exception {
218 testRegionCrossingHFileSplit(BloomType.ROWCOL);
221 @Test
222 public void testSplitALot() throws Exception {
223 runTest("testSplitALot", BloomType.NONE,
224 new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("bbb"), Bytes.toBytes("ccc"),
225 Bytes.toBytes("ddd"), Bytes.toBytes("eee"), Bytes.toBytes("fff"), Bytes.toBytes("ggg"),
226 Bytes.toBytes("hhh"), Bytes.toBytes("iii"), Bytes.toBytes("lll"), Bytes.toBytes("mmm"),
227 Bytes.toBytes("nnn"), Bytes.toBytes("ooo"), Bytes.toBytes("ppp"), Bytes.toBytes("qqq"),
228 Bytes.toBytes("rrr"), Bytes.toBytes("sss"), Bytes.toBytes("ttt"), Bytes.toBytes("uuu"),
229 Bytes.toBytes("vvv"), Bytes.toBytes("zzz"), },
230 new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("zzz") }, });
233 private void testRegionCrossingHFileSplit(BloomType bloomType) throws Exception {
234 runTest("testHFileSplit" + bloomType + "Bloom", bloomType,
235 new byte[][] { Bytes.toBytes("aaa"), Bytes.toBytes("fff"), Bytes.toBytes("jjj"),
236 Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"), },
237 new byte[][][] { new byte[][] { Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
238 new byte[][] { Bytes.toBytes("fff"), Bytes.toBytes("zzz") }, });
241 private TableDescriptor buildHTD(TableName tableName, BloomType bloomType) {
242 return TableDescriptorBuilder.newBuilder(tableName)
243 .setColumnFamily(
244 ColumnFamilyDescriptorBuilder.newBuilder(FAMILY).setBloomFilterType(bloomType).build())
245 .build();
248 private void runTest(String testName, BloomType bloomType, byte[][][] hfileRanges)
249 throws Exception {
250 runTest(testName, bloomType, null, hfileRanges);
253 private void runTest(String testName, BloomType bloomType, byte[][][] hfileRanges, boolean useMap)
254 throws Exception {
255 runTest(testName, bloomType, null, hfileRanges, useMap);
258 private void runTest(String testName, BloomType bloomType, byte[][] tableSplitKeys,
259 byte[][][] hfileRanges) throws Exception {
260 runTest(testName, bloomType, tableSplitKeys, hfileRanges, false);
263 private void runTest(String testName, BloomType bloomType, byte[][] tableSplitKeys,
264 byte[][][] hfileRanges, boolean useMap) throws Exception {
265 final byte[] TABLE_NAME = Bytes.toBytes("mytable_" + testName);
266 final boolean preCreateTable = tableSplitKeys != null;
268 // Run the test bulkloading the table to the default namespace
269 final TableName TABLE_WITHOUT_NS = TableName.valueOf(TABLE_NAME);
270 runTest(testName, TABLE_WITHOUT_NS, bloomType, preCreateTable, tableSplitKeys, hfileRanges,
271 useMap, 2);
274 * Run the test bulkloading the table from a depth of 3 directory structure is now baseDirectory
275 * -- regionDir -- familyDir -- storeFileDir
277 if (preCreateTable) {
278 runTest(testName + 2, TABLE_WITHOUT_NS, bloomType, true, tableSplitKeys, hfileRanges, false,
282 // Run the test bulkloading the table to the specified namespace
283 final TableName TABLE_WITH_NS = TableName.valueOf(Bytes.toBytes(NAMESPACE), TABLE_NAME);
284 runTest(testName, TABLE_WITH_NS, bloomType, preCreateTable, tableSplitKeys, hfileRanges, useMap,
288 private void runTest(String testName, TableName tableName, BloomType bloomType,
289 boolean preCreateTable, byte[][] tableSplitKeys, byte[][][] hfileRanges, boolean useMap,
290 int depth) throws Exception {
291 TableDescriptor htd = buildHTD(tableName, bloomType);
292 runTest(testName, htd, preCreateTable, tableSplitKeys, hfileRanges, useMap, false, depth);
295 public static int loadHFiles(String testName, TableDescriptor htd, HBaseTestingUtil util,
296 byte[] fam, byte[] qual, boolean preCreateTable, byte[][] tableSplitKeys,
297 byte[][][] hfileRanges, boolean useMap, boolean deleteFile, boolean copyFiles,
298 int initRowCount, int factor) throws Exception {
299 return loadHFiles(testName, htd, util, fam, qual, preCreateTable, tableSplitKeys, hfileRanges,
300 useMap, deleteFile, copyFiles, initRowCount, factor, 2);
303 public static int loadHFiles(String testName, TableDescriptor htd, HBaseTestingUtil util,
304 byte[] fam, byte[] qual, boolean preCreateTable, byte[][] tableSplitKeys,
305 byte[][][] hfileRanges, boolean useMap, boolean deleteFile, boolean copyFiles,
306 int initRowCount, int factor, int depth) throws Exception {
307 Path baseDirectory = util.getDataTestDirOnTestFS(testName);
308 FileSystem fs = util.getTestFileSystem();
309 baseDirectory = baseDirectory.makeQualified(fs.getUri(), fs.getWorkingDirectory());
310 Path parentDir = baseDirectory;
311 if (depth == 3) {
312 assert !useMap;
313 parentDir = new Path(baseDirectory, "someRegion");
315 Path familyDir = new Path(parentDir, Bytes.toString(fam));
317 int hfileIdx = 0;
318 Map<byte[], List<Path>> map = null;
319 List<Path> list = null;
320 if (useMap || copyFiles) {
321 list = new ArrayList<>();
323 if (useMap) {
324 map = new TreeMap<>(Bytes.BYTES_COMPARATOR);
325 map.put(fam, list);
327 Path last = null;
328 for (byte[][] range : hfileRanges) {
329 byte[] from = range[0];
330 byte[] to = range[1];
331 Path path = new Path(familyDir, "hfile_" + hfileIdx++);
332 HFileTestUtil.createHFile(util.getConfiguration(), fs, path, fam, qual, from, to, factor);
333 if (useMap) {
334 last = path;
335 list.add(path);
338 int expectedRows = hfileIdx * factor;
340 TableName tableName = htd.getTableName();
341 if (!util.getAdmin().tableExists(tableName) && (preCreateTable || map != null)) {
342 if (tableSplitKeys != null) {
343 util.getAdmin().createTable(htd, tableSplitKeys);
344 } else {
345 util.getAdmin().createTable(htd);
349 Configuration conf = util.getConfiguration();
350 if (copyFiles) {
351 conf.setBoolean(BulkLoadHFiles.ALWAYS_COPY_FILES, true);
353 BulkLoadHFilesTool loader = new BulkLoadHFilesTool(conf);
354 List<String> args = Lists.newArrayList(baseDirectory.toString(), tableName.toString());
355 if (depth == 3) {
356 args.add("-loadTable");
359 if (useMap) {
360 if (deleteFile) {
361 fs.delete(last, true);
363 Map<BulkLoadHFiles.LoadQueueItem, ByteBuffer> loaded = loader.bulkLoad(tableName, map);
364 if (deleteFile) {
365 expectedRows -= 1000;
366 for (BulkLoadHFiles.LoadQueueItem item : loaded.keySet()) {
367 if (item.getFilePath().getName().equals(last.getName())) {
368 fail(last + " should be missing");
372 } else {
373 loader.run(args.toArray(new String[] {}));
376 if (copyFiles) {
377 for (Path p : list) {
378 assertTrue(p + " should exist", fs.exists(p));
382 try (Table table = util.getConnection().getTable(tableName)) {
383 assertEquals(initRowCount + expectedRows, countRows(table));
386 return expectedRows;
389 private void runTest(String testName, TableDescriptor htd, boolean preCreateTable,
390 byte[][] tableSplitKeys, byte[][][] hfileRanges, boolean useMap, boolean copyFiles, int depth)
391 throws Exception {
392 loadHFiles(testName, htd, util, FAMILY, QUALIFIER, preCreateTable, tableSplitKeys, hfileRanges,
393 useMap, true, copyFiles, 0, 1000, depth);
395 final TableName tableName = htd.getTableName();
396 // verify staging folder has been cleaned up
397 Path stagingBasePath = new Path(CommonFSUtils.getRootDir(util.getConfiguration()),
398 HConstants.BULKLOAD_STAGING_DIR_NAME);
399 FileSystem fs = util.getTestFileSystem();
400 if (fs.exists(stagingBasePath)) {
401 FileStatus[] files = fs.listStatus(stagingBasePath);
402 for (FileStatus file : files) {
403 assertTrue("Folder=" + file.getPath() + " is not cleaned up.",
404 file.getPath().getName() != "DONOTERASE");
408 util.deleteTable(tableName);
412 * Test that tags survive through a bulk load that needs to split hfiles. This test depends on the
413 * "hbase.client.rpc.codec" = KeyValueCodecWithTags so that the client can get tags in the
414 * responses.
416 @Test
417 public void testTagsSurviveBulkLoadSplit() throws Exception {
418 Path dir = util.getDataTestDirOnTestFS(tn.getMethodName());
419 FileSystem fs = util.getTestFileSystem();
420 dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
421 Path familyDir = new Path(dir, Bytes.toString(FAMILY));
422 // table has these split points
423 byte[][] tableSplitKeys = new byte[][] { Bytes.toBytes("aaa"), Bytes.toBytes("fff"),
424 Bytes.toBytes("jjj"), Bytes.toBytes("ppp"), Bytes.toBytes("uuu"), Bytes.toBytes("zzz"), };
426 // creating an hfile that has values that span the split points.
427 byte[] from = Bytes.toBytes("ddd");
428 byte[] to = Bytes.toBytes("ooo");
429 HFileTestUtil.createHFileWithTags(util.getConfiguration(), fs,
430 new Path(familyDir, tn.getMethodName() + "_hfile"), FAMILY, QUALIFIER, from, to, 1000);
431 int expectedRows = 1000;
433 TableName tableName = TableName.valueOf(tn.getMethodName());
434 TableDescriptor htd = buildHTD(tableName, BloomType.NONE);
435 util.getAdmin().createTable(htd, tableSplitKeys);
437 BulkLoadHFiles.create(util.getConfiguration()).bulkLoad(tableName, dir);
439 Table table = util.getConnection().getTable(tableName);
440 try {
441 assertEquals(expectedRows, countRows(table));
442 HFileTestUtil.verifyTags(table);
443 } finally {
444 table.close();
447 util.deleteTable(tableName);
451 * Test loading into a column family that does not exist.
453 @Test
454 public void testNonexistentColumnFamilyLoad() throws Exception {
455 String testName = tn.getMethodName();
456 byte[][][] hFileRanges =
457 new byte[][][] { new byte[][] { Bytes.toBytes("aaa"), Bytes.toBytes("ccc") },
458 new byte[][] { Bytes.toBytes("ddd"), Bytes.toBytes("ooo") }, };
460 byte[] TABLE = Bytes.toBytes("mytable_" + testName);
461 // set real family name to upper case in purpose to simulate the case that
462 // family name in HFiles is invalid
463 TableDescriptor htd = TableDescriptorBuilder.newBuilder(TableName.valueOf(TABLE))
464 .setColumnFamily(ColumnFamilyDescriptorBuilder
465 .of(Bytes.toBytes(new String(FAMILY).toUpperCase(Locale.ROOT))))
466 .build();
468 try {
469 runTest(testName, htd, true, SPLIT_KEYS, hFileRanges, false, false, 2);
470 assertTrue("Loading into table with non-existent family should have failed", false);
471 } catch (Exception e) {
472 assertTrue("IOException expected", e instanceof IOException);
473 // further check whether the exception message is correct
474 String errMsg = e.getMessage();
475 assertTrue(
476 "Incorrect exception message, expected message: [" + EXPECTED_MSG_FOR_NON_EXISTING_FAMILY +
477 "], current message: [" + errMsg + "]",
478 errMsg.contains(EXPECTED_MSG_FOR_NON_EXISTING_FAMILY));
482 @Test
483 public void testNonHfileFolderWithUnmatchedFamilyName() throws Exception {
484 testNonHfileFolder("testNonHfileFolderWithUnmatchedFamilyName", true);
487 @Test
488 public void testNonHfileFolder() throws Exception {
489 testNonHfileFolder("testNonHfileFolder", false);
493 * Write a random data file and a non-file in a dir with a valid family name but not part of the
494 * table families. we should we able to bulkload without getting the unmatched family exception.
495 * HBASE-13037/HBASE-13227
497 private void testNonHfileFolder(String tableName, boolean preCreateTable) throws Exception {
498 Path dir = util.getDataTestDirOnTestFS(tableName);
499 FileSystem fs = util.getTestFileSystem();
500 dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
502 Path familyDir = new Path(dir, Bytes.toString(FAMILY));
503 HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_0"), FAMILY,
504 QUALIFIER, Bytes.toBytes("begin"), Bytes.toBytes("end"), 500);
505 createRandomDataFile(fs, new Path(familyDir, "012356789"), 16 * 1024);
507 final String NON_FAMILY_FOLDER = "_logs";
508 Path nonFamilyDir = new Path(dir, NON_FAMILY_FOLDER);
509 fs.mkdirs(nonFamilyDir);
510 fs.mkdirs(new Path(nonFamilyDir, "non-file"));
511 createRandomDataFile(fs, new Path(nonFamilyDir, "012356789"), 16 * 1024);
513 Table table = null;
514 try {
515 if (preCreateTable) {
516 table = util.createTable(TableName.valueOf(tableName), FAMILY);
517 } else {
518 table = util.getConnection().getTable(TableName.valueOf(tableName));
520 BulkLoadHFiles.create(util.getConfiguration()).bulkLoad(TableName.valueOf(tableName), dir);
521 assertEquals(500, countRows(table));
522 } finally {
523 if (table != null) {
524 table.close();
526 fs.delete(dir, true);
530 private static void createRandomDataFile(FileSystem fs, Path path, int size) throws IOException {
531 FSDataOutputStream stream = fs.create(path);
532 try {
533 byte[] data = new byte[1024];
534 for (int i = 0; i < data.length; ++i) {
535 data[i] = (byte) (i & 0xff);
537 while (size >= data.length) {
538 stream.write(data, 0, data.length);
539 size -= data.length;
541 if (size > 0) {
542 stream.write(data, 0, size);
544 } finally {
545 stream.close();
549 @Test
550 public void testSplitStoreFile() throws IOException {
551 Path dir = util.getDataTestDirOnTestFS("testSplitHFile");
552 FileSystem fs = util.getTestFileSystem();
553 Path testIn = new Path(dir, "testhfile");
554 ColumnFamilyDescriptor familyDesc = ColumnFamilyDescriptorBuilder.of(FAMILY);
555 HFileTestUtil.createHFile(util.getConfiguration(), fs, testIn, FAMILY, QUALIFIER,
556 Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
558 Path bottomOut = new Path(dir, "bottom.out");
559 Path topOut = new Path(dir, "top.out");
561 BulkLoadHFilesTool.splitStoreFile(util.getConfiguration(), testIn, familyDesc,
562 Bytes.toBytes("ggg"), bottomOut, topOut);
564 int rowCount = verifyHFile(bottomOut);
565 rowCount += verifyHFile(topOut);
566 assertEquals(1000, rowCount);
569 @Test
570 public void testSplitStoreFileWithNoneToNone() throws IOException {
571 testSplitStoreFileWithDifferentEncoding(DataBlockEncoding.NONE, DataBlockEncoding.NONE);
574 @Test
575 public void testSplitStoreFileWithEncodedToEncoded() throws IOException {
576 testSplitStoreFileWithDifferentEncoding(DataBlockEncoding.DIFF, DataBlockEncoding.DIFF);
579 @Test
580 public void testSplitStoreFileWithEncodedToNone() throws IOException {
581 testSplitStoreFileWithDifferentEncoding(DataBlockEncoding.DIFF, DataBlockEncoding.NONE);
584 @Test
585 public void testSplitStoreFileWithNoneToEncoded() throws IOException {
586 testSplitStoreFileWithDifferentEncoding(DataBlockEncoding.NONE, DataBlockEncoding.DIFF);
589 private void testSplitStoreFileWithDifferentEncoding(DataBlockEncoding bulkloadEncoding,
590 DataBlockEncoding cfEncoding) throws IOException {
591 Path dir = util.getDataTestDirOnTestFS("testSplitHFileWithDifferentEncoding");
592 FileSystem fs = util.getTestFileSystem();
593 Path testIn = new Path(dir, "testhfile");
594 ColumnFamilyDescriptor familyDesc =
595 ColumnFamilyDescriptorBuilder.newBuilder(FAMILY).setDataBlockEncoding(cfEncoding).build();
596 HFileTestUtil.createHFileWithDataBlockEncoding(util.getConfiguration(), fs, testIn,
597 bulkloadEncoding, FAMILY, QUALIFIER, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
599 Path bottomOut = new Path(dir, "bottom.out");
600 Path topOut = new Path(dir, "top.out");
602 BulkLoadHFilesTool.splitStoreFile(util.getConfiguration(), testIn, familyDesc,
603 Bytes.toBytes("ggg"), bottomOut, topOut);
605 int rowCount = verifyHFile(bottomOut);
606 rowCount += verifyHFile(topOut);
607 assertEquals(1000, rowCount);
610 private int verifyHFile(Path p) throws IOException {
611 Configuration conf = util.getConfiguration();
612 HFile.Reader reader =
613 HFile.createReader(p.getFileSystem(conf), p, new CacheConfig(conf), true, conf);
614 HFileScanner scanner = reader.getScanner(conf, false, false);
615 scanner.seekTo();
616 int count = 0;
617 do {
618 count++;
619 } while (scanner.next());
620 assertTrue(count > 0);
621 reader.close();
622 return count;
625 private void addStartEndKeysForTest(TreeMap<byte[], Integer> map, byte[] first, byte[] last) {
626 Integer value = map.containsKey(first) ? map.get(first) : 0;
627 map.put(first, value + 1);
629 value = map.containsKey(last) ? map.get(last) : 0;
630 map.put(last, value - 1);
633 @Test
634 public void testInferBoundaries() {
635 TreeMap<byte[], Integer> map = new TreeMap<>(Bytes.BYTES_COMPARATOR);
638 * Toy example c---------i o------p s---------t v------x a------e g-----k m-------------q r----s
639 * u----w Should be inferred as: a-----------------k m-------------q r--------------t
640 * u---------x The output should be (m,r,u)
643 String first;
644 String last;
646 first = "a";
647 last = "e";
648 addStartEndKeysForTest(map, Bytes.toBytes(first), Bytes.toBytes(last));
650 first = "r";
651 last = "s";
652 addStartEndKeysForTest(map, Bytes.toBytes(first), Bytes.toBytes(last));
654 first = "o";
655 last = "p";
656 addStartEndKeysForTest(map, Bytes.toBytes(first), Bytes.toBytes(last));
658 first = "g";
659 last = "k";
660 addStartEndKeysForTest(map, Bytes.toBytes(first), Bytes.toBytes(last));
662 first = "v";
663 last = "x";
664 addStartEndKeysForTest(map, Bytes.toBytes(first), Bytes.toBytes(last));
666 first = "c";
667 last = "i";
668 addStartEndKeysForTest(map, Bytes.toBytes(first), Bytes.toBytes(last));
670 first = "m";
671 last = "q";
672 addStartEndKeysForTest(map, Bytes.toBytes(first), Bytes.toBytes(last));
674 first = "s";
675 last = "t";
676 addStartEndKeysForTest(map, Bytes.toBytes(first), Bytes.toBytes(last));
678 first = "u";
679 last = "w";
680 addStartEndKeysForTest(map, Bytes.toBytes(first), Bytes.toBytes(last));
682 byte[][] keysArray = BulkLoadHFilesTool.inferBoundaries(map);
683 byte[][] compare = new byte[3][];
684 compare[0] = Bytes.toBytes("m");
685 compare[1] = Bytes.toBytes("r");
686 compare[2] = Bytes.toBytes("u");
688 assertEquals(3, keysArray.length);
690 for (int row = 0; row < keysArray.length; row++) {
691 assertArrayEquals(keysArray[row], compare[row]);
695 @Test
696 public void testLoadTooMayHFiles() throws Exception {
697 Path dir = util.getDataTestDirOnTestFS("testLoadTooMayHFiles");
698 FileSystem fs = util.getTestFileSystem();
699 dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
700 Path familyDir = new Path(dir, Bytes.toString(FAMILY));
702 byte[] from = Bytes.toBytes("begin");
703 byte[] to = Bytes.toBytes("end");
704 for (int i = 0; i <= MAX_FILES_PER_REGION_PER_FAMILY; i++) {
705 HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_" + i),
706 FAMILY, QUALIFIER, from, to, 1000);
709 try {
710 BulkLoadHFiles.create(util.getConfiguration())
711 .bulkLoad(TableName.valueOf("mytable_testLoadTooMayHFiles"), dir);
712 fail("Bulk loading too many files should fail");
713 } catch (IOException ie) {
714 assertTrue(ie.getMessage()
715 .contains("Trying to load more than " + MAX_FILES_PER_REGION_PER_FAMILY + " hfiles"));
719 @Test(expected = TableNotFoundException.class)
720 public void testWithoutAnExistingTableAndCreateTableSetToNo() throws Exception {
721 Configuration conf = util.getConfiguration();
722 conf.set(BulkLoadHFiles.CREATE_TABLE_CONF_KEY, "no");
723 BulkLoadHFilesTool loader = new BulkLoadHFilesTool(conf);
724 String[] args = { "directory", "nonExistingTable" };
725 loader.run(args);
728 @Test
729 public void testTableWithCFNameStartWithUnderScore() throws Exception {
730 Path dir = util.getDataTestDirOnTestFS("cfNameStartWithUnderScore");
731 FileSystem fs = util.getTestFileSystem();
732 dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
733 String family = "_cf";
734 Path familyDir = new Path(dir, family);
736 byte[] from = Bytes.toBytes("begin");
737 byte[] to = Bytes.toBytes("end");
738 Configuration conf = util.getConfiguration();
739 String tableName = tn.getMethodName();
740 try (Table table = util.createTable(TableName.valueOf(tableName), family)) {
741 HFileTestUtil.createHFile(conf, fs, new Path(familyDir, "hfile"), Bytes.toBytes(family),
742 QUALIFIER, from, to, 1000);
743 BulkLoadHFiles.create(conf).bulkLoad(table.getName(), dir);
744 assertEquals(1000, countRows(table));
748 @Test
749 public void testBulkLoadByFamily() throws Exception {
750 Path dir = util.getDataTestDirOnTestFS("testBulkLoadByFamily");
751 FileSystem fs = util.getTestFileSystem();
752 dir = dir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
753 String tableName = tn.getMethodName();
754 String[] families = { "cf1", "cf2", "cf3" };
755 for (int i = 0; i < families.length; i++) {
756 byte[] from = Bytes.toBytes(i + "begin");
757 byte[] to = Bytes.toBytes(i + "end");
758 Path familyDir = new Path(dir, families[i]);
759 HFileTestUtil.createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile"),
760 Bytes.toBytes(families[i]), QUALIFIER, from, to, 1000);
762 Table table = util.createTable(TableName.valueOf(tableName), families);
763 final AtomicInteger attmptedCalls = new AtomicInteger();
764 util.getConfiguration().setBoolean(BulkLoadHFilesTool.BULK_LOAD_HFILES_BY_FAMILY, true);
765 BulkLoadHFiles loader = new BulkLoadHFilesTool(util.getConfiguration()) {
766 @Override
767 protected CompletableFuture<Collection<LoadQueueItem>> tryAtomicRegionLoad(
768 final AsyncClusterConnection conn, final TableName tableName, boolean copyFiles,
769 final byte[] first, Collection<LoadQueueItem> lqis) {
770 attmptedCalls.incrementAndGet();
771 return super.tryAtomicRegionLoad(conn, tableName, copyFiles, first, lqis);
774 try {
775 loader.bulkLoad(table.getName(), dir);
776 assertEquals(families.length, attmptedCalls.get());
777 assertEquals(1000 * families.length, HBaseTestingUtil.countRows(table));
778 } finally {
779 if (null != table) {
780 table.close();
782 util.getConfiguration().setBoolean(BulkLoadHFilesTool.BULK_LOAD_HFILES_BY_FAMILY, false);