2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org
.apache
.hadoop
.hbase
.tool
;
20 import static org
.apache
.hadoop
.hbase
.HBaseTestingUtility
.countRows
;
21 import static org
.junit
.Assert
.assertArrayEquals
;
22 import static org
.junit
.Assert
.assertEquals
;
23 import static org
.junit
.Assert
.assertTrue
;
24 import static org
.junit
.Assert
.fail
;
26 import java
.io
.IOException
;
27 import java
.nio
.ByteBuffer
;
28 import java
.util
.ArrayList
;
29 import java
.util
.List
;
30 import java
.util
.Locale
;
32 import java
.util
.TreeMap
;
33 import org
.apache
.hadoop
.conf
.Configuration
;
34 import org
.apache
.hadoop
.fs
.FSDataOutputStream
;
35 import org
.apache
.hadoop
.fs
.FileStatus
;
36 import org
.apache
.hadoop
.fs
.FileSystem
;
37 import org
.apache
.hadoop
.fs
.Path
;
38 import org
.apache
.hadoop
.hbase
.HBaseClassTestRule
;
39 import org
.apache
.hadoop
.hbase
.HBaseTestingUtility
;
40 import org
.apache
.hadoop
.hbase
.HConstants
;
41 import org
.apache
.hadoop
.hbase
.NamespaceDescriptor
;
42 import org
.apache
.hadoop
.hbase
.TableName
;
43 import org
.apache
.hadoop
.hbase
.TableNotFoundException
;
44 import org
.apache
.hadoop
.hbase
.client
.ColumnFamilyDescriptor
;
45 import org
.apache
.hadoop
.hbase
.client
.ColumnFamilyDescriptorBuilder
;
46 import org
.apache
.hadoop
.hbase
.client
.Table
;
47 import org
.apache
.hadoop
.hbase
.client
.TableDescriptor
;
48 import org
.apache
.hadoop
.hbase
.client
.TableDescriptorBuilder
;
49 import org
.apache
.hadoop
.hbase
.codec
.KeyValueCodecWithTags
;
50 import org
.apache
.hadoop
.hbase
.coprocessor
.CoprocessorHost
;
51 import org
.apache
.hadoop
.hbase
.io
.encoding
.DataBlockEncoding
;
52 import org
.apache
.hadoop
.hbase
.io
.hfile
.CacheConfig
;
53 import org
.apache
.hadoop
.hbase
.io
.hfile
.HFile
;
54 import org
.apache
.hadoop
.hbase
.io
.hfile
.HFileScanner
;
55 import org
.apache
.hadoop
.hbase
.regionserver
.BloomType
;
56 import org
.apache
.hadoop
.hbase
.testclassification
.LargeTests
;
57 import org
.apache
.hadoop
.hbase
.testclassification
.MiscTests
;
58 import org
.apache
.hadoop
.hbase
.util
.Bytes
;
59 import org
.apache
.hadoop
.hbase
.util
.FSUtils
;
60 import org
.apache
.hadoop
.hbase
.util
.HFileTestUtil
;
61 import org
.junit
.AfterClass
;
62 import org
.junit
.BeforeClass
;
63 import org
.junit
.ClassRule
;
64 import org
.junit
.Rule
;
65 import org
.junit
.Test
;
66 import org
.junit
.experimental
.categories
.Category
;
67 import org
.junit
.rules
.TestName
;
69 import org
.apache
.hbase
.thirdparty
.com
.google
.common
.collect
.Lists
;
72 * Test cases for the "load" half of the HFileOutputFormat bulk load functionality. These tests run
73 * faster than the full MR cluster tests in TestHFileOutputFormat
75 @Category({ MiscTests
.class, LargeTests
.class })
76 public class TestBulkLoadHFiles
{
79 public static final HBaseClassTestRule CLASS_RULE
=
80 HBaseClassTestRule
.forClass(TestBulkLoadHFiles
.class);
83 public TestName tn
= new TestName();
85 private static final byte[] QUALIFIER
= Bytes
.toBytes("myqual");
86 private static final byte[] FAMILY
= Bytes
.toBytes("myfam");
87 private static final String NAMESPACE
= "bulkNS";
89 static final String EXPECTED_MSG_FOR_NON_EXISTING_FAMILY
= "Unmatched family names found";
90 static final int MAX_FILES_PER_REGION_PER_FAMILY
= 4;
92 private static final byte[][] SPLIT_KEYS
=
93 new byte[][] { Bytes
.toBytes("ddd"), Bytes
.toBytes("ppp") };
95 static HBaseTestingUtility util
= new HBaseTestingUtility();
98 public static void setUpBeforeClass() throws Exception
{
99 util
.getConfiguration().set(CoprocessorHost
.REGION_COPROCESSOR_CONF_KEY
, "");
100 util
.getConfiguration().setInt(BulkLoadHFiles
.MAX_FILES_PER_REGION_PER_FAMILY
,
101 MAX_FILES_PER_REGION_PER_FAMILY
);
102 // change default behavior so that tag values are returned with normal rpcs
103 util
.getConfiguration().set(HConstants
.RPC_CODEC_CONF_KEY
,
104 KeyValueCodecWithTags
.class.getCanonicalName());
105 util
.startMiniCluster();
110 protected static void setupNamespace() throws Exception
{
111 util
.getAdmin().createNamespace(NamespaceDescriptor
.create(NAMESPACE
).build());
115 public static void tearDownAfterClass() throws Exception
{
116 util
.shutdownMiniCluster();
120 public void testSimpleLoadWithMap() throws Exception
{
121 runTest("testSimpleLoadWithMap", BloomType
.NONE
,
122 new byte[][][] { new byte[][] { Bytes
.toBytes("aaaa"), Bytes
.toBytes("cccc") },
123 new byte[][] { Bytes
.toBytes("ddd"), Bytes
.toBytes("ooo") }, },
128 * Test case that creates some regions and loads HFiles that fit snugly inside those regions
131 public void testSimpleLoad() throws Exception
{
132 runTest("testSimpleLoad", BloomType
.NONE
,
133 new byte[][][] { new byte[][] { Bytes
.toBytes("aaaa"), Bytes
.toBytes("cccc") },
134 new byte[][] { Bytes
.toBytes("ddd"), Bytes
.toBytes("ooo") }, });
138 public void testSimpleLoadWithFileCopy() throws Exception
{
139 String testName
= tn
.getMethodName();
140 final byte[] TABLE_NAME
= Bytes
.toBytes("mytable_" + testName
);
141 runTest(testName
, buildHTD(TableName
.valueOf(TABLE_NAME
), BloomType
.NONE
), false, null,
142 new byte[][][] { new byte[][] { Bytes
.toBytes("aaaa"), Bytes
.toBytes("cccc") },
143 new byte[][] { Bytes
.toBytes("ddd"), Bytes
.toBytes("ooo") }, },
148 * Test case that creates some regions and loads HFiles that cross the boundaries of those regions
151 public void testRegionCrossingLoad() throws Exception
{
152 runTest("testRegionCrossingLoad", BloomType
.NONE
,
153 new byte[][][] { new byte[][] { Bytes
.toBytes("aaaa"), Bytes
.toBytes("eee") },
154 new byte[][] { Bytes
.toBytes("fff"), Bytes
.toBytes("zzz") }, });
158 * Test loading into a column family that has a ROW bloom filter.
161 public void testRegionCrossingRowBloom() throws Exception
{
162 runTest("testRegionCrossingLoadRowBloom", BloomType
.ROW
,
163 new byte[][][] { new byte[][] { Bytes
.toBytes("aaaa"), Bytes
.toBytes("eee") },
164 new byte[][] { Bytes
.toBytes("fff"), Bytes
.toBytes("zzz") }, });
168 * Test loading into a column family that has a ROWCOL bloom filter.
171 public void testRegionCrossingRowColBloom() throws Exception
{
172 runTest("testRegionCrossingLoadRowColBloom", BloomType
.ROWCOL
,
173 new byte[][][] { new byte[][] { Bytes
.toBytes("aaaa"), Bytes
.toBytes("eee") },
174 new byte[][] { Bytes
.toBytes("fff"), Bytes
.toBytes("zzz") }, });
178 * Test case that creates some regions and loads HFiles that have different region boundaries than
179 * the table pre-split.
182 public void testSimpleHFileSplit() throws Exception
{
183 runTest("testHFileSplit", BloomType
.NONE
,
184 new byte[][] { Bytes
.toBytes("aaa"), Bytes
.toBytes("fff"), Bytes
.toBytes("jjj"),
185 Bytes
.toBytes("ppp"), Bytes
.toBytes("uuu"), Bytes
.toBytes("zzz"), },
186 new byte[][][] { new byte[][] { Bytes
.toBytes("aaaa"), Bytes
.toBytes("lll") },
187 new byte[][] { Bytes
.toBytes("mmm"), Bytes
.toBytes("zzz") }, });
191 * Test case that creates some regions and loads HFiles that cross the boundaries and have
192 * different region boundaries than the table pre-split.
195 public void testRegionCrossingHFileSplit() throws Exception
{
196 testRegionCrossingHFileSplit(BloomType
.NONE
);
200 * Test case that creates some regions and loads HFiles that cross the boundaries have a ROW bloom
201 * filter and a different region boundaries than the table pre-split.
204 public void testRegionCrossingHFileSplitRowBloom() throws Exception
{
205 testRegionCrossingHFileSplit(BloomType
.ROW
);
209 * Test case that creates some regions and loads HFiles that cross the boundaries have a ROWCOL
210 * bloom filter and a different region boundaries than the table pre-split.
213 public void testRegionCrossingHFileSplitRowColBloom() throws Exception
{
214 testRegionCrossingHFileSplit(BloomType
.ROWCOL
);
218 public void testSplitALot() throws Exception
{
219 runTest("testSplitALot", BloomType
.NONE
,
220 new byte[][] { Bytes
.toBytes("aaaa"), Bytes
.toBytes("bbb"), Bytes
.toBytes("ccc"),
221 Bytes
.toBytes("ddd"), Bytes
.toBytes("eee"), Bytes
.toBytes("fff"), Bytes
.toBytes("ggg"),
222 Bytes
.toBytes("hhh"), Bytes
.toBytes("iii"), Bytes
.toBytes("lll"), Bytes
.toBytes("mmm"),
223 Bytes
.toBytes("nnn"), Bytes
.toBytes("ooo"), Bytes
.toBytes("ppp"), Bytes
.toBytes("qqq"),
224 Bytes
.toBytes("rrr"), Bytes
.toBytes("sss"), Bytes
.toBytes("ttt"), Bytes
.toBytes("uuu"),
225 Bytes
.toBytes("vvv"), Bytes
.toBytes("zzz"), },
226 new byte[][][] { new byte[][] { Bytes
.toBytes("aaaa"), Bytes
.toBytes("zzz") }, });
229 private void testRegionCrossingHFileSplit(BloomType bloomType
) throws Exception
{
230 runTest("testHFileSplit" + bloomType
+ "Bloom", bloomType
,
231 new byte[][] { Bytes
.toBytes("aaa"), Bytes
.toBytes("fff"), Bytes
.toBytes("jjj"),
232 Bytes
.toBytes("ppp"), Bytes
.toBytes("uuu"), Bytes
.toBytes("zzz"), },
233 new byte[][][] { new byte[][] { Bytes
.toBytes("aaaa"), Bytes
.toBytes("eee") },
234 new byte[][] { Bytes
.toBytes("fff"), Bytes
.toBytes("zzz") }, });
237 private TableDescriptor
buildHTD(TableName tableName
, BloomType bloomType
) {
238 return TableDescriptorBuilder
.newBuilder(tableName
)
240 ColumnFamilyDescriptorBuilder
.newBuilder(FAMILY
).setBloomFilterType(bloomType
).build())
244 private void runTest(String testName
, BloomType bloomType
, byte[][][] hfileRanges
)
246 runTest(testName
, bloomType
, null, hfileRanges
);
249 private void runTest(String testName
, BloomType bloomType
, byte[][][] hfileRanges
, boolean useMap
)
251 runTest(testName
, bloomType
, null, hfileRanges
, useMap
);
254 private void runTest(String testName
, BloomType bloomType
, byte[][] tableSplitKeys
,
255 byte[][][] hfileRanges
) throws Exception
{
256 runTest(testName
, bloomType
, tableSplitKeys
, hfileRanges
, false);
259 private void runTest(String testName
, BloomType bloomType
, byte[][] tableSplitKeys
,
260 byte[][][] hfileRanges
, boolean useMap
) throws Exception
{
261 final byte[] TABLE_NAME
= Bytes
.toBytes("mytable_" + testName
);
262 final boolean preCreateTable
= tableSplitKeys
!= null;
264 // Run the test bulkloading the table to the default namespace
265 final TableName TABLE_WITHOUT_NS
= TableName
.valueOf(TABLE_NAME
);
266 runTest(testName
, TABLE_WITHOUT_NS
, bloomType
, preCreateTable
, tableSplitKeys
, hfileRanges
,
270 * Run the test bulkloading the table from a depth of 3 directory structure is now baseDirectory
271 * -- regionDir -- familyDir -- storeFileDir
273 if (preCreateTable
) {
274 runTest(testName
+ 2, TABLE_WITHOUT_NS
, bloomType
, true, tableSplitKeys
, hfileRanges
, false,
278 // Run the test bulkloading the table to the specified namespace
279 final TableName TABLE_WITH_NS
= TableName
.valueOf(Bytes
.toBytes(NAMESPACE
), TABLE_NAME
);
280 runTest(testName
, TABLE_WITH_NS
, bloomType
, preCreateTable
, tableSplitKeys
, hfileRanges
, useMap
,
284 private void runTest(String testName
, TableName tableName
, BloomType bloomType
,
285 boolean preCreateTable
, byte[][] tableSplitKeys
, byte[][][] hfileRanges
, boolean useMap
,
286 int depth
) throws Exception
{
287 TableDescriptor htd
= buildHTD(tableName
, bloomType
);
288 runTest(testName
, htd
, preCreateTable
, tableSplitKeys
, hfileRanges
, useMap
, false, depth
);
291 public static int loadHFiles(String testName
, TableDescriptor htd
, HBaseTestingUtility util
,
292 byte[] fam
, byte[] qual
, boolean preCreateTable
, byte[][] tableSplitKeys
,
293 byte[][][] hfileRanges
, boolean useMap
, boolean deleteFile
, boolean copyFiles
,
294 int initRowCount
, int factor
) throws Exception
{
295 return loadHFiles(testName
, htd
, util
, fam
, qual
, preCreateTable
, tableSplitKeys
, hfileRanges
,
296 useMap
, deleteFile
, copyFiles
, initRowCount
, factor
, 2);
299 public static int loadHFiles(String testName
, TableDescriptor htd
, HBaseTestingUtility util
,
300 byte[] fam
, byte[] qual
, boolean preCreateTable
, byte[][] tableSplitKeys
,
301 byte[][][] hfileRanges
, boolean useMap
, boolean deleteFile
, boolean copyFiles
,
302 int initRowCount
, int factor
, int depth
) throws Exception
{
303 Path baseDirectory
= util
.getDataTestDirOnTestFS(testName
);
304 FileSystem fs
= util
.getTestFileSystem();
305 baseDirectory
= baseDirectory
.makeQualified(fs
.getUri(), fs
.getWorkingDirectory());
306 Path parentDir
= baseDirectory
;
309 parentDir
= new Path(baseDirectory
, "someRegion");
311 Path familyDir
= new Path(parentDir
, Bytes
.toString(fam
));
314 Map
<byte[], List
<Path
>> map
= null;
315 List
<Path
> list
= null;
316 if (useMap
|| copyFiles
) {
317 list
= new ArrayList
<>();
320 map
= new TreeMap
<>(Bytes
.BYTES_COMPARATOR
);
324 for (byte[][] range
: hfileRanges
) {
325 byte[] from
= range
[0];
326 byte[] to
= range
[1];
327 Path path
= new Path(familyDir
, "hfile_" + hfileIdx
++);
328 HFileTestUtil
.createHFile(util
.getConfiguration(), fs
, path
, fam
, qual
, from
, to
, factor
);
334 int expectedRows
= hfileIdx
* factor
;
336 TableName tableName
= htd
.getTableName();
337 if (!util
.getAdmin().tableExists(tableName
) && (preCreateTable
|| map
!= null)) {
338 if (tableSplitKeys
!= null) {
339 util
.getAdmin().createTable(htd
, tableSplitKeys
);
341 util
.getAdmin().createTable(htd
);
345 Configuration conf
= util
.getConfiguration();
347 conf
.setBoolean(BulkLoadHFiles
.ALWAYS_COPY_FILES
, true);
349 BulkLoadHFilesTool loader
= new BulkLoadHFilesTool(conf
);
350 List
<String
> args
= Lists
.newArrayList(baseDirectory
.toString(), tableName
.toString());
352 args
.add("-loadTable");
357 fs
.delete(last
, true);
359 Map
<BulkLoadHFiles
.LoadQueueItem
, ByteBuffer
> loaded
= loader
.bulkLoad(tableName
, map
);
361 expectedRows
-= 1000;
362 for (BulkLoadHFiles
.LoadQueueItem item
: loaded
.keySet()) {
363 if (item
.getFilePath().getName().equals(last
.getName())) {
364 fail(last
+ " should be missing");
369 loader
.run(args
.toArray(new String
[] {}));
373 for (Path p
: list
) {
374 assertTrue(p
+ " should exist", fs
.exists(p
));
378 try (Table table
= util
.getConnection().getTable(tableName
)) {
379 assertEquals(initRowCount
+ expectedRows
, countRows(table
));
385 private void runTest(String testName
, TableDescriptor htd
, boolean preCreateTable
,
386 byte[][] tableSplitKeys
, byte[][][] hfileRanges
, boolean useMap
, boolean copyFiles
, int depth
)
388 loadHFiles(testName
, htd
, util
, FAMILY
, QUALIFIER
, preCreateTable
, tableSplitKeys
, hfileRanges
,
389 useMap
, true, copyFiles
, 0, 1000, depth
);
391 final TableName tableName
= htd
.getTableName();
392 // verify staging folder has been cleaned up
393 Path stagingBasePath
=
394 new Path(FSUtils
.getRootDir(util
.getConfiguration()), HConstants
.BULKLOAD_STAGING_DIR_NAME
);
395 FileSystem fs
= util
.getTestFileSystem();
396 if (fs
.exists(stagingBasePath
)) {
397 FileStatus
[] files
= fs
.listStatus(stagingBasePath
);
398 for (FileStatus file
: files
) {
399 assertTrue("Folder=" + file
.getPath() + " is not cleaned up.",
400 file
.getPath().getName() != "DONOTERASE");
404 util
.deleteTable(tableName
);
408 * Test that tags survive through a bulk load that needs to split hfiles. This test depends on the
409 * "hbase.client.rpc.codec" = KeyValueCodecWithTags so that the client can get tags in the
413 public void testTagsSurviveBulkLoadSplit() throws Exception
{
414 Path dir
= util
.getDataTestDirOnTestFS(tn
.getMethodName());
415 FileSystem fs
= util
.getTestFileSystem();
416 dir
= dir
.makeQualified(fs
.getUri(), fs
.getWorkingDirectory());
417 Path familyDir
= new Path(dir
, Bytes
.toString(FAMILY
));
418 // table has these split points
419 byte[][] tableSplitKeys
= new byte[][] { Bytes
.toBytes("aaa"), Bytes
.toBytes("fff"),
420 Bytes
.toBytes("jjj"), Bytes
.toBytes("ppp"), Bytes
.toBytes("uuu"), Bytes
.toBytes("zzz"), };
422 // creating an hfile that has values that span the split points.
423 byte[] from
= Bytes
.toBytes("ddd");
424 byte[] to
= Bytes
.toBytes("ooo");
425 HFileTestUtil
.createHFileWithTags(util
.getConfiguration(), fs
,
426 new Path(familyDir
, tn
.getMethodName() + "_hfile"), FAMILY
, QUALIFIER
, from
, to
, 1000);
427 int expectedRows
= 1000;
429 TableName tableName
= TableName
.valueOf(tn
.getMethodName());
430 TableDescriptor htd
= buildHTD(tableName
, BloomType
.NONE
);
431 util
.getAdmin().createTable(htd
, tableSplitKeys
);
433 BulkLoadHFiles
.create(util
.getConfiguration()).bulkLoad(tableName
, dir
);
435 Table table
= util
.getConnection().getTable(tableName
);
437 assertEquals(expectedRows
, countRows(table
));
438 HFileTestUtil
.verifyTags(table
);
443 util
.deleteTable(tableName
);
447 * Test loading into a column family that does not exist.
450 public void testNonexistentColumnFamilyLoad() throws Exception
{
451 String testName
= tn
.getMethodName();
452 byte[][][] hFileRanges
=
453 new byte[][][] { new byte[][] { Bytes
.toBytes("aaa"), Bytes
.toBytes("ccc") },
454 new byte[][] { Bytes
.toBytes("ddd"), Bytes
.toBytes("ooo") }, };
456 byte[] TABLE
= Bytes
.toBytes("mytable_" + testName
);
457 // set real family name to upper case in purpose to simulate the case that
458 // family name in HFiles is invalid
459 TableDescriptor htd
= TableDescriptorBuilder
.newBuilder(TableName
.valueOf(TABLE
))
460 .setColumnFamily(ColumnFamilyDescriptorBuilder
461 .of(Bytes
.toBytes(new String(FAMILY
).toUpperCase(Locale
.ROOT
))))
465 runTest(testName
, htd
, true, SPLIT_KEYS
, hFileRanges
, false, false, 2);
466 assertTrue("Loading into table with non-existent family should have failed", false);
467 } catch (Exception e
) {
468 assertTrue("IOException expected", e
instanceof IOException
);
469 // further check whether the exception message is correct
470 String errMsg
= e
.getMessage();
472 "Incorrect exception message, expected message: [" + EXPECTED_MSG_FOR_NON_EXISTING_FAMILY
+
473 "], current message: [" + errMsg
+ "]",
474 errMsg
.contains(EXPECTED_MSG_FOR_NON_EXISTING_FAMILY
));
479 public void testNonHfileFolderWithUnmatchedFamilyName() throws Exception
{
480 testNonHfileFolder("testNonHfileFolderWithUnmatchedFamilyName", true);
484 public void testNonHfileFolder() throws Exception
{
485 testNonHfileFolder("testNonHfileFolder", false);
489 * Write a random data file and a non-file in a dir with a valid family name but not part of the
490 * table families. we should we able to bulkload without getting the unmatched family exception.
491 * HBASE-13037/HBASE-13227
493 private void testNonHfileFolder(String tableName
, boolean preCreateTable
) throws Exception
{
494 Path dir
= util
.getDataTestDirOnTestFS(tableName
);
495 FileSystem fs
= util
.getTestFileSystem();
496 dir
= dir
.makeQualified(fs
.getUri(), fs
.getWorkingDirectory());
498 Path familyDir
= new Path(dir
, Bytes
.toString(FAMILY
));
499 HFileTestUtil
.createHFile(util
.getConfiguration(), fs
, new Path(familyDir
, "hfile_0"), FAMILY
,
500 QUALIFIER
, Bytes
.toBytes("begin"), Bytes
.toBytes("end"), 500);
501 createRandomDataFile(fs
, new Path(familyDir
, "012356789"), 16 * 1024);
503 final String NON_FAMILY_FOLDER
= "_logs";
504 Path nonFamilyDir
= new Path(dir
, NON_FAMILY_FOLDER
);
505 fs
.mkdirs(nonFamilyDir
);
506 fs
.mkdirs(new Path(nonFamilyDir
, "non-file"));
507 createRandomDataFile(fs
, new Path(nonFamilyDir
, "012356789"), 16 * 1024);
511 if (preCreateTable
) {
512 table
= util
.createTable(TableName
.valueOf(tableName
), FAMILY
);
514 table
= util
.getConnection().getTable(TableName
.valueOf(tableName
));
516 BulkLoadHFiles
.create(util
.getConfiguration()).bulkLoad(TableName
.valueOf(tableName
), dir
);
517 assertEquals(500, countRows(table
));
522 fs
.delete(dir
, true);
526 private static void createRandomDataFile(FileSystem fs
, Path path
, int size
) throws IOException
{
527 FSDataOutputStream stream
= fs
.create(path
);
529 byte[] data
= new byte[1024];
530 for (int i
= 0; i
< data
.length
; ++i
) {
531 data
[i
] = (byte) (i
& 0xff);
533 while (size
>= data
.length
) {
534 stream
.write(data
, 0, data
.length
);
538 stream
.write(data
, 0, size
);
546 public void testSplitStoreFile() throws IOException
{
547 Path dir
= util
.getDataTestDirOnTestFS("testSplitHFile");
548 FileSystem fs
= util
.getTestFileSystem();
549 Path testIn
= new Path(dir
, "testhfile");
550 ColumnFamilyDescriptor familyDesc
= ColumnFamilyDescriptorBuilder
.of(FAMILY
);
551 HFileTestUtil
.createHFile(util
.getConfiguration(), fs
, testIn
, FAMILY
, QUALIFIER
,
552 Bytes
.toBytes("aaa"), Bytes
.toBytes("zzz"), 1000);
554 Path bottomOut
= new Path(dir
, "bottom.out");
555 Path topOut
= new Path(dir
, "top.out");
557 BulkLoadHFilesTool
.splitStoreFile(util
.getConfiguration(), testIn
, familyDesc
,
558 Bytes
.toBytes("ggg"), bottomOut
, topOut
);
560 int rowCount
= verifyHFile(bottomOut
);
561 rowCount
+= verifyHFile(topOut
);
562 assertEquals(1000, rowCount
);
566 public void testSplitStoreFileWithNoneToNone() throws IOException
{
567 testSplitStoreFileWithDifferentEncoding(DataBlockEncoding
.NONE
, DataBlockEncoding
.NONE
);
571 public void testSplitStoreFileWithEncodedToEncoded() throws IOException
{
572 testSplitStoreFileWithDifferentEncoding(DataBlockEncoding
.DIFF
, DataBlockEncoding
.DIFF
);
576 public void testSplitStoreFileWithEncodedToNone() throws IOException
{
577 testSplitStoreFileWithDifferentEncoding(DataBlockEncoding
.DIFF
, DataBlockEncoding
.NONE
);
581 public void testSplitStoreFileWithNoneToEncoded() throws IOException
{
582 testSplitStoreFileWithDifferentEncoding(DataBlockEncoding
.NONE
, DataBlockEncoding
.DIFF
);
585 private void testSplitStoreFileWithDifferentEncoding(DataBlockEncoding bulkloadEncoding
,
586 DataBlockEncoding cfEncoding
) throws IOException
{
587 Path dir
= util
.getDataTestDirOnTestFS("testSplitHFileWithDifferentEncoding");
588 FileSystem fs
= util
.getTestFileSystem();
589 Path testIn
= new Path(dir
, "testhfile");
590 ColumnFamilyDescriptor familyDesc
=
591 ColumnFamilyDescriptorBuilder
.newBuilder(FAMILY
).setDataBlockEncoding(cfEncoding
).build();
592 HFileTestUtil
.createHFileWithDataBlockEncoding(util
.getConfiguration(), fs
, testIn
,
593 bulkloadEncoding
, FAMILY
, QUALIFIER
, Bytes
.toBytes("aaa"), Bytes
.toBytes("zzz"), 1000);
595 Path bottomOut
= new Path(dir
, "bottom.out");
596 Path topOut
= new Path(dir
, "top.out");
598 BulkLoadHFilesTool
.splitStoreFile(util
.getConfiguration(), testIn
, familyDesc
,
599 Bytes
.toBytes("ggg"), bottomOut
, topOut
);
601 int rowCount
= verifyHFile(bottomOut
);
602 rowCount
+= verifyHFile(topOut
);
603 assertEquals(1000, rowCount
);
606 private int verifyHFile(Path p
) throws IOException
{
607 Configuration conf
= util
.getConfiguration();
608 HFile
.Reader reader
=
609 HFile
.createReader(p
.getFileSystem(conf
), p
, new CacheConfig(conf
), true, conf
);
610 HFileScanner scanner
= reader
.getScanner(false, false);
615 } while (scanner
.next());
616 assertTrue(count
> 0);
621 private void addStartEndKeysForTest(TreeMap
<byte[], Integer
> map
, byte[] first
, byte[] last
) {
622 Integer value
= map
.containsKey(first
) ? map
.get(first
) : 0;
623 map
.put(first
, value
+ 1);
625 value
= map
.containsKey(last
) ? map
.get(last
) : 0;
626 map
.put(last
, value
- 1);
630 public void testInferBoundaries() {
631 TreeMap
<byte[], Integer
> map
= new TreeMap
<>(Bytes
.BYTES_COMPARATOR
);
634 * Toy example c---------i o------p s---------t v------x a------e g-----k m-------------q r----s
635 * u----w Should be inferred as: a-----------------k m-------------q r--------------t
636 * u---------x The output should be (m,r,u)
644 addStartEndKeysForTest(map
, Bytes
.toBytes(first
), Bytes
.toBytes(last
));
648 addStartEndKeysForTest(map
, Bytes
.toBytes(first
), Bytes
.toBytes(last
));
652 addStartEndKeysForTest(map
, Bytes
.toBytes(first
), Bytes
.toBytes(last
));
656 addStartEndKeysForTest(map
, Bytes
.toBytes(first
), Bytes
.toBytes(last
));
660 addStartEndKeysForTest(map
, Bytes
.toBytes(first
), Bytes
.toBytes(last
));
664 addStartEndKeysForTest(map
, Bytes
.toBytes(first
), Bytes
.toBytes(last
));
668 addStartEndKeysForTest(map
, Bytes
.toBytes(first
), Bytes
.toBytes(last
));
672 addStartEndKeysForTest(map
, Bytes
.toBytes(first
), Bytes
.toBytes(last
));
676 addStartEndKeysForTest(map
, Bytes
.toBytes(first
), Bytes
.toBytes(last
));
678 byte[][] keysArray
= BulkLoadHFilesTool
.inferBoundaries(map
);
679 byte[][] compare
= new byte[3][];
680 compare
[0] = Bytes
.toBytes("m");
681 compare
[1] = Bytes
.toBytes("r");
682 compare
[2] = Bytes
.toBytes("u");
684 assertEquals(3, keysArray
.length
);
686 for (int row
= 0; row
< keysArray
.length
; row
++) {
687 assertArrayEquals(keysArray
[row
], compare
[row
]);
692 public void testLoadTooMayHFiles() throws Exception
{
693 Path dir
= util
.getDataTestDirOnTestFS("testLoadTooMayHFiles");
694 FileSystem fs
= util
.getTestFileSystem();
695 dir
= dir
.makeQualified(fs
.getUri(), fs
.getWorkingDirectory());
696 Path familyDir
= new Path(dir
, Bytes
.toString(FAMILY
));
698 byte[] from
= Bytes
.toBytes("begin");
699 byte[] to
= Bytes
.toBytes("end");
700 for (int i
= 0; i
<= MAX_FILES_PER_REGION_PER_FAMILY
; i
++) {
701 HFileTestUtil
.createHFile(util
.getConfiguration(), fs
, new Path(familyDir
, "hfile_" + i
),
702 FAMILY
, QUALIFIER
, from
, to
, 1000);
706 BulkLoadHFiles
.create(util
.getConfiguration())
707 .bulkLoad(TableName
.valueOf("mytable_testLoadTooMayHFiles"), dir
);
708 fail("Bulk loading too many files should fail");
709 } catch (IOException ie
) {
710 assertTrue(ie
.getMessage()
711 .contains("Trying to load more than " + MAX_FILES_PER_REGION_PER_FAMILY
+ " hfiles"));
715 @Test(expected
= TableNotFoundException
.class)
716 public void testWithoutAnExistingTableAndCreateTableSetToNo() throws Exception
{
717 Configuration conf
= util
.getConfiguration();
718 conf
.set(BulkLoadHFiles
.CREATE_TABLE_CONF_KEY
, "no");
719 BulkLoadHFilesTool loader
= new BulkLoadHFilesTool(conf
);
720 String
[] args
= { "directory", "nonExistingTable" };
725 public void testTableWithCFNameStartWithUnderScore() throws Exception
{
726 Path dir
= util
.getDataTestDirOnTestFS("cfNameStartWithUnderScore");
727 FileSystem fs
= util
.getTestFileSystem();
728 dir
= dir
.makeQualified(fs
.getUri(), fs
.getWorkingDirectory());
729 String family
= "_cf";
730 Path familyDir
= new Path(dir
, family
);
732 byte[] from
= Bytes
.toBytes("begin");
733 byte[] to
= Bytes
.toBytes("end");
734 Configuration conf
= util
.getConfiguration();
735 String tableName
= tn
.getMethodName();
736 try (Table table
= util
.createTable(TableName
.valueOf(tableName
), family
)) {
737 HFileTestUtil
.createHFile(conf
, fs
, new Path(familyDir
, "hfile"), Bytes
.toBytes(family
),
738 QUALIFIER
, from
, to
, 1000);
739 BulkLoadHFiles
.create(conf
).bulkLoad(table
.getName(), dir
);
740 assertEquals(1000, countRows(table
));