2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org
.apache
.hadoop
.hbase
.tool
;
20 import static org
.apache
.hadoop
.hbase
.HBaseTestingUtil
.countRows
;
21 import static org
.junit
.Assert
.assertArrayEquals
;
22 import static org
.junit
.Assert
.assertEquals
;
23 import static org
.junit
.Assert
.assertTrue
;
24 import static org
.junit
.Assert
.fail
;
26 import java
.io
.IOException
;
27 import java
.nio
.ByteBuffer
;
28 import java
.util
.ArrayList
;
29 import java
.util
.Collection
;
30 import java
.util
.List
;
31 import java
.util
.Locale
;
33 import java
.util
.TreeMap
;
34 import java
.util
.concurrent
.CompletableFuture
;
35 import java
.util
.concurrent
.atomic
.AtomicInteger
;
36 import org
.apache
.hadoop
.conf
.Configuration
;
37 import org
.apache
.hadoop
.fs
.FSDataOutputStream
;
38 import org
.apache
.hadoop
.fs
.FileStatus
;
39 import org
.apache
.hadoop
.fs
.FileSystem
;
40 import org
.apache
.hadoop
.fs
.Path
;
41 import org
.apache
.hadoop
.hbase
.HBaseClassTestRule
;
42 import org
.apache
.hadoop
.hbase
.HBaseTestingUtil
;
43 import org
.apache
.hadoop
.hbase
.HConstants
;
44 import org
.apache
.hadoop
.hbase
.NamespaceDescriptor
;
45 import org
.apache
.hadoop
.hbase
.TableName
;
46 import org
.apache
.hadoop
.hbase
.TableNotFoundException
;
47 import org
.apache
.hadoop
.hbase
.client
.AsyncClusterConnection
;
48 import org
.apache
.hadoop
.hbase
.client
.ColumnFamilyDescriptor
;
49 import org
.apache
.hadoop
.hbase
.client
.ColumnFamilyDescriptorBuilder
;
50 import org
.apache
.hadoop
.hbase
.client
.Table
;
51 import org
.apache
.hadoop
.hbase
.client
.TableDescriptor
;
52 import org
.apache
.hadoop
.hbase
.client
.TableDescriptorBuilder
;
53 import org
.apache
.hadoop
.hbase
.codec
.KeyValueCodecWithTags
;
54 import org
.apache
.hadoop
.hbase
.coprocessor
.CoprocessorHost
;
55 import org
.apache
.hadoop
.hbase
.io
.encoding
.DataBlockEncoding
;
56 import org
.apache
.hadoop
.hbase
.io
.hfile
.CacheConfig
;
57 import org
.apache
.hadoop
.hbase
.io
.hfile
.HFile
;
58 import org
.apache
.hadoop
.hbase
.io
.hfile
.HFileScanner
;
59 import org
.apache
.hadoop
.hbase
.regionserver
.BloomType
;
60 import org
.apache
.hadoop
.hbase
.testclassification
.LargeTests
;
61 import org
.apache
.hadoop
.hbase
.testclassification
.MiscTests
;
62 import org
.apache
.hadoop
.hbase
.util
.Bytes
;
63 import org
.apache
.hadoop
.hbase
.util
.CommonFSUtils
;
64 import org
.apache
.hadoop
.hbase
.util
.HFileTestUtil
;
65 import org
.junit
.AfterClass
;
66 import org
.junit
.BeforeClass
;
67 import org
.junit
.ClassRule
;
68 import org
.junit
.Rule
;
69 import org
.junit
.Test
;
70 import org
.junit
.experimental
.categories
.Category
;
71 import org
.junit
.rules
.TestName
;
73 import org
.apache
.hbase
.thirdparty
.com
.google
.common
.collect
.Lists
;
76 * Test cases for the "load" half of the HFileOutputFormat bulk load functionality. These tests run
77 * faster than the full MR cluster tests in TestHFileOutputFormat
79 @Category({ MiscTests
.class, LargeTests
.class })
80 public class TestBulkLoadHFiles
{
83 public static final HBaseClassTestRule CLASS_RULE
=
84 HBaseClassTestRule
.forClass(TestBulkLoadHFiles
.class);
87 public TestName tn
= new TestName();
89 private static final byte[] QUALIFIER
= Bytes
.toBytes("myqual");
90 private static final byte[] FAMILY
= Bytes
.toBytes("myfam");
91 private static final String NAMESPACE
= "bulkNS";
93 static final String EXPECTED_MSG_FOR_NON_EXISTING_FAMILY
= "Unmatched family names found";
94 static final int MAX_FILES_PER_REGION_PER_FAMILY
= 4;
96 private static final byte[][] SPLIT_KEYS
=
97 new byte[][] { Bytes
.toBytes("ddd"), Bytes
.toBytes("ppp") };
99 static HBaseTestingUtil util
= new HBaseTestingUtil();
102 public static void setUpBeforeClass() throws Exception
{
103 util
.getConfiguration().set(CoprocessorHost
.REGION_COPROCESSOR_CONF_KEY
, "");
104 util
.getConfiguration().setInt(BulkLoadHFiles
.MAX_FILES_PER_REGION_PER_FAMILY
,
105 MAX_FILES_PER_REGION_PER_FAMILY
);
106 // change default behavior so that tag values are returned with normal rpcs
107 util
.getConfiguration().set(HConstants
.RPC_CODEC_CONF_KEY
,
108 KeyValueCodecWithTags
.class.getCanonicalName());
109 util
.startMiniCluster();
114 protected static void setupNamespace() throws Exception
{
115 util
.getAdmin().createNamespace(NamespaceDescriptor
.create(NAMESPACE
).build());
119 public static void tearDownAfterClass() throws Exception
{
120 util
.shutdownMiniCluster();
124 public void testSimpleLoadWithMap() throws Exception
{
125 runTest("testSimpleLoadWithMap", BloomType
.NONE
,
126 new byte[][][] { new byte[][] { Bytes
.toBytes("aaaa"), Bytes
.toBytes("cccc") },
127 new byte[][] { Bytes
.toBytes("ddd"), Bytes
.toBytes("ooo") }, },
132 * Test case that creates some regions and loads HFiles that fit snugly inside those regions
135 public void testSimpleLoad() throws Exception
{
136 runTest("testSimpleLoad", BloomType
.NONE
,
137 new byte[][][] { new byte[][] { Bytes
.toBytes("aaaa"), Bytes
.toBytes("cccc") },
138 new byte[][] { Bytes
.toBytes("ddd"), Bytes
.toBytes("ooo") }, });
142 public void testSimpleLoadWithFileCopy() throws Exception
{
143 String testName
= tn
.getMethodName();
144 final byte[] TABLE_NAME
= Bytes
.toBytes("mytable_" + testName
);
145 runTest(testName
, buildHTD(TableName
.valueOf(TABLE_NAME
), BloomType
.NONE
), false, null,
146 new byte[][][] { new byte[][] { Bytes
.toBytes("aaaa"), Bytes
.toBytes("cccc") },
147 new byte[][] { Bytes
.toBytes("ddd"), Bytes
.toBytes("ooo") }, },
152 * Test case that creates some regions and loads HFiles that cross the boundaries of those regions
155 public void testRegionCrossingLoad() throws Exception
{
156 runTest("testRegionCrossingLoad", BloomType
.NONE
,
157 new byte[][][] { new byte[][] { Bytes
.toBytes("aaaa"), Bytes
.toBytes("eee") },
158 new byte[][] { Bytes
.toBytes("fff"), Bytes
.toBytes("zzz") }, });
162 * Test loading into a column family that has a ROW bloom filter.
165 public void testRegionCrossingRowBloom() throws Exception
{
166 runTest("testRegionCrossingLoadRowBloom", BloomType
.ROW
,
167 new byte[][][] { new byte[][] { Bytes
.toBytes("aaaa"), Bytes
.toBytes("eee") },
168 new byte[][] { Bytes
.toBytes("fff"), Bytes
.toBytes("zzz") }, });
172 * Test loading into a column family that has a ROWCOL bloom filter.
175 public void testRegionCrossingRowColBloom() throws Exception
{
176 runTest("testRegionCrossingLoadRowColBloom", BloomType
.ROWCOL
,
177 new byte[][][] { new byte[][] { Bytes
.toBytes("aaaa"), Bytes
.toBytes("eee") },
178 new byte[][] { Bytes
.toBytes("fff"), Bytes
.toBytes("zzz") }, });
182 * Test case that creates some regions and loads HFiles that have different region boundaries than
183 * the table pre-split.
186 public void testSimpleHFileSplit() throws Exception
{
187 runTest("testHFileSplit", BloomType
.NONE
,
188 new byte[][] { Bytes
.toBytes("aaa"), Bytes
.toBytes("fff"), Bytes
.toBytes("jjj"),
189 Bytes
.toBytes("ppp"), Bytes
.toBytes("uuu"), Bytes
.toBytes("zzz"), },
190 new byte[][][] { new byte[][] { Bytes
.toBytes("aaaa"), Bytes
.toBytes("lll") },
191 new byte[][] { Bytes
.toBytes("mmm"), Bytes
.toBytes("zzz") }, });
195 * Test case that creates some regions and loads HFiles that cross the boundaries and have
196 * different region boundaries than the table pre-split.
199 public void testRegionCrossingHFileSplit() throws Exception
{
200 testRegionCrossingHFileSplit(BloomType
.NONE
);
204 * Test case that creates some regions and loads HFiles that cross the boundaries have a ROW bloom
205 * filter and a different region boundaries than the table pre-split.
208 public void testRegionCrossingHFileSplitRowBloom() throws Exception
{
209 testRegionCrossingHFileSplit(BloomType
.ROW
);
213 * Test case that creates some regions and loads HFiles that cross the boundaries have a ROWCOL
214 * bloom filter and a different region boundaries than the table pre-split.
217 public void testRegionCrossingHFileSplitRowColBloom() throws Exception
{
218 testRegionCrossingHFileSplit(BloomType
.ROWCOL
);
222 public void testSplitALot() throws Exception
{
223 runTest("testSplitALot", BloomType
.NONE
,
224 new byte[][] { Bytes
.toBytes("aaaa"), Bytes
.toBytes("bbb"), Bytes
.toBytes("ccc"),
225 Bytes
.toBytes("ddd"), Bytes
.toBytes("eee"), Bytes
.toBytes("fff"), Bytes
.toBytes("ggg"),
226 Bytes
.toBytes("hhh"), Bytes
.toBytes("iii"), Bytes
.toBytes("lll"), Bytes
.toBytes("mmm"),
227 Bytes
.toBytes("nnn"), Bytes
.toBytes("ooo"), Bytes
.toBytes("ppp"), Bytes
.toBytes("qqq"),
228 Bytes
.toBytes("rrr"), Bytes
.toBytes("sss"), Bytes
.toBytes("ttt"), Bytes
.toBytes("uuu"),
229 Bytes
.toBytes("vvv"), Bytes
.toBytes("zzz"), },
230 new byte[][][] { new byte[][] { Bytes
.toBytes("aaaa"), Bytes
.toBytes("zzz") }, });
233 private void testRegionCrossingHFileSplit(BloomType bloomType
) throws Exception
{
234 runTest("testHFileSplit" + bloomType
+ "Bloom", bloomType
,
235 new byte[][] { Bytes
.toBytes("aaa"), Bytes
.toBytes("fff"), Bytes
.toBytes("jjj"),
236 Bytes
.toBytes("ppp"), Bytes
.toBytes("uuu"), Bytes
.toBytes("zzz"), },
237 new byte[][][] { new byte[][] { Bytes
.toBytes("aaaa"), Bytes
.toBytes("eee") },
238 new byte[][] { Bytes
.toBytes("fff"), Bytes
.toBytes("zzz") }, });
241 private TableDescriptor
buildHTD(TableName tableName
, BloomType bloomType
) {
242 return TableDescriptorBuilder
.newBuilder(tableName
)
244 ColumnFamilyDescriptorBuilder
.newBuilder(FAMILY
).setBloomFilterType(bloomType
).build())
248 private void runTest(String testName
, BloomType bloomType
, byte[][][] hfileRanges
)
250 runTest(testName
, bloomType
, null, hfileRanges
);
253 private void runTest(String testName
, BloomType bloomType
, byte[][][] hfileRanges
, boolean useMap
)
255 runTest(testName
, bloomType
, null, hfileRanges
, useMap
);
258 private void runTest(String testName
, BloomType bloomType
, byte[][] tableSplitKeys
,
259 byte[][][] hfileRanges
) throws Exception
{
260 runTest(testName
, bloomType
, tableSplitKeys
, hfileRanges
, false);
263 private void runTest(String testName
, BloomType bloomType
, byte[][] tableSplitKeys
,
264 byte[][][] hfileRanges
, boolean useMap
) throws Exception
{
265 final byte[] TABLE_NAME
= Bytes
.toBytes("mytable_" + testName
);
266 final boolean preCreateTable
= tableSplitKeys
!= null;
268 // Run the test bulkloading the table to the default namespace
269 final TableName TABLE_WITHOUT_NS
= TableName
.valueOf(TABLE_NAME
);
270 runTest(testName
, TABLE_WITHOUT_NS
, bloomType
, preCreateTable
, tableSplitKeys
, hfileRanges
,
274 * Run the test bulkloading the table from a depth of 3 directory structure is now baseDirectory
275 * -- regionDir -- familyDir -- storeFileDir
277 if (preCreateTable
) {
278 runTest(testName
+ 2, TABLE_WITHOUT_NS
, bloomType
, true, tableSplitKeys
, hfileRanges
, false,
282 // Run the test bulkloading the table to the specified namespace
283 final TableName TABLE_WITH_NS
= TableName
.valueOf(Bytes
.toBytes(NAMESPACE
), TABLE_NAME
);
284 runTest(testName
, TABLE_WITH_NS
, bloomType
, preCreateTable
, tableSplitKeys
, hfileRanges
, useMap
,
288 private void runTest(String testName
, TableName tableName
, BloomType bloomType
,
289 boolean preCreateTable
, byte[][] tableSplitKeys
, byte[][][] hfileRanges
, boolean useMap
,
290 int depth
) throws Exception
{
291 TableDescriptor htd
= buildHTD(tableName
, bloomType
);
292 runTest(testName
, htd
, preCreateTable
, tableSplitKeys
, hfileRanges
, useMap
, false, depth
);
295 public static int loadHFiles(String testName
, TableDescriptor htd
, HBaseTestingUtil util
,
296 byte[] fam
, byte[] qual
, boolean preCreateTable
, byte[][] tableSplitKeys
,
297 byte[][][] hfileRanges
, boolean useMap
, boolean deleteFile
, boolean copyFiles
,
298 int initRowCount
, int factor
) throws Exception
{
299 return loadHFiles(testName
, htd
, util
, fam
, qual
, preCreateTable
, tableSplitKeys
, hfileRanges
,
300 useMap
, deleteFile
, copyFiles
, initRowCount
, factor
, 2);
303 public static int loadHFiles(String testName
, TableDescriptor htd
, HBaseTestingUtil util
,
304 byte[] fam
, byte[] qual
, boolean preCreateTable
, byte[][] tableSplitKeys
,
305 byte[][][] hfileRanges
, boolean useMap
, boolean deleteFile
, boolean copyFiles
,
306 int initRowCount
, int factor
, int depth
) throws Exception
{
307 Path baseDirectory
= util
.getDataTestDirOnTestFS(testName
);
308 FileSystem fs
= util
.getTestFileSystem();
309 baseDirectory
= baseDirectory
.makeQualified(fs
.getUri(), fs
.getWorkingDirectory());
310 Path parentDir
= baseDirectory
;
313 parentDir
= new Path(baseDirectory
, "someRegion");
315 Path familyDir
= new Path(parentDir
, Bytes
.toString(fam
));
318 Map
<byte[], List
<Path
>> map
= null;
319 List
<Path
> list
= null;
320 if (useMap
|| copyFiles
) {
321 list
= new ArrayList
<>();
324 map
= new TreeMap
<>(Bytes
.BYTES_COMPARATOR
);
328 for (byte[][] range
: hfileRanges
) {
329 byte[] from
= range
[0];
330 byte[] to
= range
[1];
331 Path path
= new Path(familyDir
, "hfile_" + hfileIdx
++);
332 HFileTestUtil
.createHFile(util
.getConfiguration(), fs
, path
, fam
, qual
, from
, to
, factor
);
338 int expectedRows
= hfileIdx
* factor
;
340 TableName tableName
= htd
.getTableName();
341 if (!util
.getAdmin().tableExists(tableName
) && (preCreateTable
|| map
!= null)) {
342 if (tableSplitKeys
!= null) {
343 util
.getAdmin().createTable(htd
, tableSplitKeys
);
345 util
.getAdmin().createTable(htd
);
349 Configuration conf
= util
.getConfiguration();
351 conf
.setBoolean(BulkLoadHFiles
.ALWAYS_COPY_FILES
, true);
353 BulkLoadHFilesTool loader
= new BulkLoadHFilesTool(conf
);
354 List
<String
> args
= Lists
.newArrayList(baseDirectory
.toString(), tableName
.toString());
356 args
.add("-loadTable");
361 fs
.delete(last
, true);
363 Map
<BulkLoadHFiles
.LoadQueueItem
, ByteBuffer
> loaded
= loader
.bulkLoad(tableName
, map
);
365 expectedRows
-= 1000;
366 for (BulkLoadHFiles
.LoadQueueItem item
: loaded
.keySet()) {
367 if (item
.getFilePath().getName().equals(last
.getName())) {
368 fail(last
+ " should be missing");
373 loader
.run(args
.toArray(new String
[] {}));
377 for (Path p
: list
) {
378 assertTrue(p
+ " should exist", fs
.exists(p
));
382 try (Table table
= util
.getConnection().getTable(tableName
)) {
383 assertEquals(initRowCount
+ expectedRows
, countRows(table
));
389 private void runTest(String testName
, TableDescriptor htd
, boolean preCreateTable
,
390 byte[][] tableSplitKeys
, byte[][][] hfileRanges
, boolean useMap
, boolean copyFiles
, int depth
)
392 loadHFiles(testName
, htd
, util
, FAMILY
, QUALIFIER
, preCreateTable
, tableSplitKeys
, hfileRanges
,
393 useMap
, true, copyFiles
, 0, 1000, depth
);
395 final TableName tableName
= htd
.getTableName();
396 // verify staging folder has been cleaned up
397 Path stagingBasePath
= new Path(CommonFSUtils
.getRootDir(util
.getConfiguration()),
398 HConstants
.BULKLOAD_STAGING_DIR_NAME
);
399 FileSystem fs
= util
.getTestFileSystem();
400 if (fs
.exists(stagingBasePath
)) {
401 FileStatus
[] files
= fs
.listStatus(stagingBasePath
);
402 for (FileStatus file
: files
) {
403 assertTrue("Folder=" + file
.getPath() + " is not cleaned up.",
404 file
.getPath().getName() != "DONOTERASE");
408 util
.deleteTable(tableName
);
412 * Test that tags survive through a bulk load that needs to split hfiles. This test depends on the
413 * "hbase.client.rpc.codec" = KeyValueCodecWithTags so that the client can get tags in the
417 public void testTagsSurviveBulkLoadSplit() throws Exception
{
418 Path dir
= util
.getDataTestDirOnTestFS(tn
.getMethodName());
419 FileSystem fs
= util
.getTestFileSystem();
420 dir
= dir
.makeQualified(fs
.getUri(), fs
.getWorkingDirectory());
421 Path familyDir
= new Path(dir
, Bytes
.toString(FAMILY
));
422 // table has these split points
423 byte[][] tableSplitKeys
= new byte[][] { Bytes
.toBytes("aaa"), Bytes
.toBytes("fff"),
424 Bytes
.toBytes("jjj"), Bytes
.toBytes("ppp"), Bytes
.toBytes("uuu"), Bytes
.toBytes("zzz"), };
426 // creating an hfile that has values that span the split points.
427 byte[] from
= Bytes
.toBytes("ddd");
428 byte[] to
= Bytes
.toBytes("ooo");
429 HFileTestUtil
.createHFileWithTags(util
.getConfiguration(), fs
,
430 new Path(familyDir
, tn
.getMethodName() + "_hfile"), FAMILY
, QUALIFIER
, from
, to
, 1000);
431 int expectedRows
= 1000;
433 TableName tableName
= TableName
.valueOf(tn
.getMethodName());
434 TableDescriptor htd
= buildHTD(tableName
, BloomType
.NONE
);
435 util
.getAdmin().createTable(htd
, tableSplitKeys
);
437 BulkLoadHFiles
.create(util
.getConfiguration()).bulkLoad(tableName
, dir
);
439 Table table
= util
.getConnection().getTable(tableName
);
441 assertEquals(expectedRows
, countRows(table
));
442 HFileTestUtil
.verifyTags(table
);
447 util
.deleteTable(tableName
);
451 * Test loading into a column family that does not exist.
454 public void testNonexistentColumnFamilyLoad() throws Exception
{
455 String testName
= tn
.getMethodName();
456 byte[][][] hFileRanges
=
457 new byte[][][] { new byte[][] { Bytes
.toBytes("aaa"), Bytes
.toBytes("ccc") },
458 new byte[][] { Bytes
.toBytes("ddd"), Bytes
.toBytes("ooo") }, };
460 byte[] TABLE
= Bytes
.toBytes("mytable_" + testName
);
461 // set real family name to upper case in purpose to simulate the case that
462 // family name in HFiles is invalid
463 TableDescriptor htd
= TableDescriptorBuilder
.newBuilder(TableName
.valueOf(TABLE
))
464 .setColumnFamily(ColumnFamilyDescriptorBuilder
465 .of(Bytes
.toBytes(new String(FAMILY
).toUpperCase(Locale
.ROOT
))))
469 runTest(testName
, htd
, true, SPLIT_KEYS
, hFileRanges
, false, false, 2);
470 assertTrue("Loading into table with non-existent family should have failed", false);
471 } catch (Exception e
) {
472 assertTrue("IOException expected", e
instanceof IOException
);
473 // further check whether the exception message is correct
474 String errMsg
= e
.getMessage();
476 "Incorrect exception message, expected message: [" + EXPECTED_MSG_FOR_NON_EXISTING_FAMILY
+
477 "], current message: [" + errMsg
+ "]",
478 errMsg
.contains(EXPECTED_MSG_FOR_NON_EXISTING_FAMILY
));
483 public void testNonHfileFolderWithUnmatchedFamilyName() throws Exception
{
484 testNonHfileFolder("testNonHfileFolderWithUnmatchedFamilyName", true);
488 public void testNonHfileFolder() throws Exception
{
489 testNonHfileFolder("testNonHfileFolder", false);
493 * Write a random data file and a non-file in a dir with a valid family name but not part of the
494 * table families. we should we able to bulkload without getting the unmatched family exception.
495 * HBASE-13037/HBASE-13227
497 private void testNonHfileFolder(String tableName
, boolean preCreateTable
) throws Exception
{
498 Path dir
= util
.getDataTestDirOnTestFS(tableName
);
499 FileSystem fs
= util
.getTestFileSystem();
500 dir
= dir
.makeQualified(fs
.getUri(), fs
.getWorkingDirectory());
502 Path familyDir
= new Path(dir
, Bytes
.toString(FAMILY
));
503 HFileTestUtil
.createHFile(util
.getConfiguration(), fs
, new Path(familyDir
, "hfile_0"), FAMILY
,
504 QUALIFIER
, Bytes
.toBytes("begin"), Bytes
.toBytes("end"), 500);
505 createRandomDataFile(fs
, new Path(familyDir
, "012356789"), 16 * 1024);
507 final String NON_FAMILY_FOLDER
= "_logs";
508 Path nonFamilyDir
= new Path(dir
, NON_FAMILY_FOLDER
);
509 fs
.mkdirs(nonFamilyDir
);
510 fs
.mkdirs(new Path(nonFamilyDir
, "non-file"));
511 createRandomDataFile(fs
, new Path(nonFamilyDir
, "012356789"), 16 * 1024);
515 if (preCreateTable
) {
516 table
= util
.createTable(TableName
.valueOf(tableName
), FAMILY
);
518 table
= util
.getConnection().getTable(TableName
.valueOf(tableName
));
520 BulkLoadHFiles
.create(util
.getConfiguration()).bulkLoad(TableName
.valueOf(tableName
), dir
);
521 assertEquals(500, countRows(table
));
526 fs
.delete(dir
, true);
530 private static void createRandomDataFile(FileSystem fs
, Path path
, int size
) throws IOException
{
531 FSDataOutputStream stream
= fs
.create(path
);
533 byte[] data
= new byte[1024];
534 for (int i
= 0; i
< data
.length
; ++i
) {
535 data
[i
] = (byte) (i
& 0xff);
537 while (size
>= data
.length
) {
538 stream
.write(data
, 0, data
.length
);
542 stream
.write(data
, 0, size
);
550 public void testSplitStoreFile() throws IOException
{
551 Path dir
= util
.getDataTestDirOnTestFS("testSplitHFile");
552 FileSystem fs
= util
.getTestFileSystem();
553 Path testIn
= new Path(dir
, "testhfile");
554 ColumnFamilyDescriptor familyDesc
= ColumnFamilyDescriptorBuilder
.of(FAMILY
);
555 HFileTestUtil
.createHFile(util
.getConfiguration(), fs
, testIn
, FAMILY
, QUALIFIER
,
556 Bytes
.toBytes("aaa"), Bytes
.toBytes("zzz"), 1000);
558 Path bottomOut
= new Path(dir
, "bottom.out");
559 Path topOut
= new Path(dir
, "top.out");
561 BulkLoadHFilesTool
.splitStoreFile(util
.getConfiguration(), testIn
, familyDesc
,
562 Bytes
.toBytes("ggg"), bottomOut
, topOut
);
564 int rowCount
= verifyHFile(bottomOut
);
565 rowCount
+= verifyHFile(topOut
);
566 assertEquals(1000, rowCount
);
570 public void testSplitStoreFileWithNoneToNone() throws IOException
{
571 testSplitStoreFileWithDifferentEncoding(DataBlockEncoding
.NONE
, DataBlockEncoding
.NONE
);
575 public void testSplitStoreFileWithEncodedToEncoded() throws IOException
{
576 testSplitStoreFileWithDifferentEncoding(DataBlockEncoding
.DIFF
, DataBlockEncoding
.DIFF
);
580 public void testSplitStoreFileWithEncodedToNone() throws IOException
{
581 testSplitStoreFileWithDifferentEncoding(DataBlockEncoding
.DIFF
, DataBlockEncoding
.NONE
);
585 public void testSplitStoreFileWithNoneToEncoded() throws IOException
{
586 testSplitStoreFileWithDifferentEncoding(DataBlockEncoding
.NONE
, DataBlockEncoding
.DIFF
);
589 private void testSplitStoreFileWithDifferentEncoding(DataBlockEncoding bulkloadEncoding
,
590 DataBlockEncoding cfEncoding
) throws IOException
{
591 Path dir
= util
.getDataTestDirOnTestFS("testSplitHFileWithDifferentEncoding");
592 FileSystem fs
= util
.getTestFileSystem();
593 Path testIn
= new Path(dir
, "testhfile");
594 ColumnFamilyDescriptor familyDesc
=
595 ColumnFamilyDescriptorBuilder
.newBuilder(FAMILY
).setDataBlockEncoding(cfEncoding
).build();
596 HFileTestUtil
.createHFileWithDataBlockEncoding(util
.getConfiguration(), fs
, testIn
,
597 bulkloadEncoding
, FAMILY
, QUALIFIER
, Bytes
.toBytes("aaa"), Bytes
.toBytes("zzz"), 1000);
599 Path bottomOut
= new Path(dir
, "bottom.out");
600 Path topOut
= new Path(dir
, "top.out");
602 BulkLoadHFilesTool
.splitStoreFile(util
.getConfiguration(), testIn
, familyDesc
,
603 Bytes
.toBytes("ggg"), bottomOut
, topOut
);
605 int rowCount
= verifyHFile(bottomOut
);
606 rowCount
+= verifyHFile(topOut
);
607 assertEquals(1000, rowCount
);
610 private int verifyHFile(Path p
) throws IOException
{
611 Configuration conf
= util
.getConfiguration();
612 HFile
.Reader reader
=
613 HFile
.createReader(p
.getFileSystem(conf
), p
, new CacheConfig(conf
), true, conf
);
614 HFileScanner scanner
= reader
.getScanner(conf
, false, false);
619 } while (scanner
.next());
620 assertTrue(count
> 0);
625 private void addStartEndKeysForTest(TreeMap
<byte[], Integer
> map
, byte[] first
, byte[] last
) {
626 Integer value
= map
.containsKey(first
) ? map
.get(first
) : 0;
627 map
.put(first
, value
+ 1);
629 value
= map
.containsKey(last
) ? map
.get(last
) : 0;
630 map
.put(last
, value
- 1);
634 public void testInferBoundaries() {
635 TreeMap
<byte[], Integer
> map
= new TreeMap
<>(Bytes
.BYTES_COMPARATOR
);
638 * Toy example c---------i o------p s---------t v------x a------e g-----k m-------------q r----s
639 * u----w Should be inferred as: a-----------------k m-------------q r--------------t
640 * u---------x The output should be (m,r,u)
648 addStartEndKeysForTest(map
, Bytes
.toBytes(first
), Bytes
.toBytes(last
));
652 addStartEndKeysForTest(map
, Bytes
.toBytes(first
), Bytes
.toBytes(last
));
656 addStartEndKeysForTest(map
, Bytes
.toBytes(first
), Bytes
.toBytes(last
));
660 addStartEndKeysForTest(map
, Bytes
.toBytes(first
), Bytes
.toBytes(last
));
664 addStartEndKeysForTest(map
, Bytes
.toBytes(first
), Bytes
.toBytes(last
));
668 addStartEndKeysForTest(map
, Bytes
.toBytes(first
), Bytes
.toBytes(last
));
672 addStartEndKeysForTest(map
, Bytes
.toBytes(first
), Bytes
.toBytes(last
));
676 addStartEndKeysForTest(map
, Bytes
.toBytes(first
), Bytes
.toBytes(last
));
680 addStartEndKeysForTest(map
, Bytes
.toBytes(first
), Bytes
.toBytes(last
));
682 byte[][] keysArray
= BulkLoadHFilesTool
.inferBoundaries(map
);
683 byte[][] compare
= new byte[3][];
684 compare
[0] = Bytes
.toBytes("m");
685 compare
[1] = Bytes
.toBytes("r");
686 compare
[2] = Bytes
.toBytes("u");
688 assertEquals(3, keysArray
.length
);
690 for (int row
= 0; row
< keysArray
.length
; row
++) {
691 assertArrayEquals(keysArray
[row
], compare
[row
]);
696 public void testLoadTooMayHFiles() throws Exception
{
697 Path dir
= util
.getDataTestDirOnTestFS("testLoadTooMayHFiles");
698 FileSystem fs
= util
.getTestFileSystem();
699 dir
= dir
.makeQualified(fs
.getUri(), fs
.getWorkingDirectory());
700 Path familyDir
= new Path(dir
, Bytes
.toString(FAMILY
));
702 byte[] from
= Bytes
.toBytes("begin");
703 byte[] to
= Bytes
.toBytes("end");
704 for (int i
= 0; i
<= MAX_FILES_PER_REGION_PER_FAMILY
; i
++) {
705 HFileTestUtil
.createHFile(util
.getConfiguration(), fs
, new Path(familyDir
, "hfile_" + i
),
706 FAMILY
, QUALIFIER
, from
, to
, 1000);
710 BulkLoadHFiles
.create(util
.getConfiguration())
711 .bulkLoad(TableName
.valueOf("mytable_testLoadTooMayHFiles"), dir
);
712 fail("Bulk loading too many files should fail");
713 } catch (IOException ie
) {
714 assertTrue(ie
.getMessage()
715 .contains("Trying to load more than " + MAX_FILES_PER_REGION_PER_FAMILY
+ " hfiles"));
719 @Test(expected
= TableNotFoundException
.class)
720 public void testWithoutAnExistingTableAndCreateTableSetToNo() throws Exception
{
721 Configuration conf
= util
.getConfiguration();
722 conf
.set(BulkLoadHFiles
.CREATE_TABLE_CONF_KEY
, "no");
723 BulkLoadHFilesTool loader
= new BulkLoadHFilesTool(conf
);
724 String
[] args
= { "directory", "nonExistingTable" };
729 public void testTableWithCFNameStartWithUnderScore() throws Exception
{
730 Path dir
= util
.getDataTestDirOnTestFS("cfNameStartWithUnderScore");
731 FileSystem fs
= util
.getTestFileSystem();
732 dir
= dir
.makeQualified(fs
.getUri(), fs
.getWorkingDirectory());
733 String family
= "_cf";
734 Path familyDir
= new Path(dir
, family
);
736 byte[] from
= Bytes
.toBytes("begin");
737 byte[] to
= Bytes
.toBytes("end");
738 Configuration conf
= util
.getConfiguration();
739 String tableName
= tn
.getMethodName();
740 try (Table table
= util
.createTable(TableName
.valueOf(tableName
), family
)) {
741 HFileTestUtil
.createHFile(conf
, fs
, new Path(familyDir
, "hfile"), Bytes
.toBytes(family
),
742 QUALIFIER
, from
, to
, 1000);
743 BulkLoadHFiles
.create(conf
).bulkLoad(table
.getName(), dir
);
744 assertEquals(1000, countRows(table
));
749 public void testBulkLoadByFamily() throws Exception
{
750 Path dir
= util
.getDataTestDirOnTestFS("testBulkLoadByFamily");
751 FileSystem fs
= util
.getTestFileSystem();
752 dir
= dir
.makeQualified(fs
.getUri(), fs
.getWorkingDirectory());
753 String tableName
= tn
.getMethodName();
754 String
[] families
= { "cf1", "cf2", "cf3" };
755 for (int i
= 0; i
< families
.length
; i
++) {
756 byte[] from
= Bytes
.toBytes(i
+ "begin");
757 byte[] to
= Bytes
.toBytes(i
+ "end");
758 Path familyDir
= new Path(dir
, families
[i
]);
759 HFileTestUtil
.createHFile(util
.getConfiguration(), fs
, new Path(familyDir
, "hfile"),
760 Bytes
.toBytes(families
[i
]), QUALIFIER
, from
, to
, 1000);
762 Table table
= util
.createTable(TableName
.valueOf(tableName
), families
);
763 final AtomicInteger attmptedCalls
= new AtomicInteger();
764 util
.getConfiguration().setBoolean(BulkLoadHFilesTool
.BULK_LOAD_HFILES_BY_FAMILY
, true);
765 BulkLoadHFiles loader
= new BulkLoadHFilesTool(util
.getConfiguration()) {
767 protected CompletableFuture
<Collection
<LoadQueueItem
>> tryAtomicRegionLoad(
768 final AsyncClusterConnection conn
, final TableName tableName
, boolean copyFiles
,
769 final byte[] first
, Collection
<LoadQueueItem
> lqis
) {
770 attmptedCalls
.incrementAndGet();
771 return super.tryAtomicRegionLoad(conn
, tableName
, copyFiles
, first
, lqis
);
775 loader
.bulkLoad(table
.getName(), dir
);
776 assertEquals(families
.length
, attmptedCalls
.get());
777 assertEquals(1000 * families
.length
, HBaseTestingUtil
.countRows(table
));
782 util
.getConfiguration().setBoolean(BulkLoadHFilesTool
.BULK_LOAD_HFILES_BY_FAMILY
, false);