2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org
.apache
.hadoop
.hbase
.regionserver
;
20 import static org
.apache
.hadoop
.hbase
.io
.HFileLink
.LINK_NAME_PATTERN
;
21 import edu
.umd
.cs
.findbugs
.annotations
.Nullable
;
22 import java
.io
.FileNotFoundException
;
23 import java
.io
.IOException
;
24 import java
.io
.InterruptedIOException
;
25 import java
.util
.ArrayList
;
26 import java
.util
.Collection
;
27 import java
.util
.HashMap
;
28 import java
.util
.List
;
30 import java
.util
.Objects
;
31 import java
.util
.Optional
;
32 import java
.util
.UUID
;
33 import java
.util
.regex
.Matcher
;
34 import org
.apache
.hadoop
.conf
.Configuration
;
35 import org
.apache
.hadoop
.fs
.FSDataInputStream
;
36 import org
.apache
.hadoop
.fs
.FSDataOutputStream
;
37 import org
.apache
.hadoop
.fs
.FileStatus
;
38 import org
.apache
.hadoop
.fs
.FileSystem
;
39 import org
.apache
.hadoop
.fs
.FileUtil
;
40 import org
.apache
.hadoop
.fs
.LocatedFileStatus
;
41 import org
.apache
.hadoop
.fs
.Path
;
42 import org
.apache
.hadoop
.fs
.permission
.FsPermission
;
43 import org
.apache
.hadoop
.hbase
.Cell
;
44 import org
.apache
.hadoop
.hbase
.HConstants
;
45 import org
.apache
.hadoop
.hbase
.PrivateCellUtil
;
46 import org
.apache
.hadoop
.hbase
.TableName
;
47 import org
.apache
.hadoop
.hbase
.backup
.HFileArchiver
;
48 import org
.apache
.hadoop
.hbase
.client
.ColumnFamilyDescriptor
;
49 import org
.apache
.hadoop
.hbase
.client
.RegionInfo
;
50 import org
.apache
.hadoop
.hbase
.client
.TableDescriptor
;
51 import org
.apache
.hadoop
.hbase
.fs
.HFileSystem
;
52 import org
.apache
.hadoop
.hbase
.io
.HFileLink
;
53 import org
.apache
.hadoop
.hbase
.io
.Reference
;
54 import org
.apache
.hadoop
.hbase
.master
.procedure
.MasterProcedureEnv
;
55 import org
.apache
.hadoop
.hbase
.regionserver
.storefiletracker
.StoreFileTracker
;
56 import org
.apache
.hadoop
.hbase
.regionserver
.storefiletracker
.StoreFileTrackerFactory
;
57 import org
.apache
.hadoop
.hbase
.util
.Bytes
;
58 import org
.apache
.hadoop
.hbase
.util
.CommonFSUtils
;
59 import org
.apache
.hadoop
.hbase
.util
.FSUtils
;
60 import org
.apache
.hadoop
.hbase
.util
.Pair
;
61 import org
.apache
.hadoop
.hbase
.util
.ServerRegionReplicaUtil
;
62 import org
.apache
.yetus
.audience
.InterfaceAudience
;
63 import org
.slf4j
.Logger
;
64 import org
.slf4j
.LoggerFactory
;
65 import org
.apache
.hbase
.thirdparty
.com
.google
.common
.collect
.Lists
;
68 * View to an on-disk Region.
69 * Provides the set of methods necessary to interact with the on-disk region data.
71 @InterfaceAudience.Private
72 public class HRegionFileSystem
{
73 private static final Logger LOG
= LoggerFactory
.getLogger(HRegionFileSystem
.class);
75 /** Name of the region info file that resides just under the region directory. */
76 public final static String REGION_INFO_FILE
= ".regioninfo";
78 /** Temporary subdirectory of the region directory used for merges. */
79 public static final String REGION_MERGES_DIR
= ".merges";
81 /** Temporary subdirectory of the region directory used for splits. */
82 public static final String REGION_SPLITS_DIR
= ".splits";
84 /** Temporary subdirectory of the region directory used for compaction output. */
85 static final String REGION_TEMP_DIR
= ".tmp";
87 private final RegionInfo regionInfo
;
88 //regionInfo for interacting with FS (getting encodedName, etc)
89 final RegionInfo regionInfoForFs
;
90 final Configuration conf
;
91 private final Path tableDir
;
93 private final Path regionDir
;
96 * In order to handle NN connectivity hiccups, one need to retry non-idempotent operation at the
99 private final int hdfsClientRetriesNumber
;
100 private final int baseSleepBeforeRetries
;
101 private static final int DEFAULT_HDFS_CLIENT_RETRIES_NUMBER
= 10;
102 private static final int DEFAULT_BASE_SLEEP_BEFORE_RETRIES
= 1000;
105 * Create a view to the on-disk region
106 * @param conf the {@link Configuration} to use
107 * @param fs {@link FileSystem} that contains the region
108 * @param tableDir {@link Path} to where the table is being stored
109 * @param regionInfo {@link RegionInfo} for region
111 HRegionFileSystem(final Configuration conf
, final FileSystem fs
, final Path tableDir
,
112 final RegionInfo regionInfo
) {
115 this.tableDir
= Objects
.requireNonNull(tableDir
, "tableDir is null");
116 this.regionInfo
= Objects
.requireNonNull(regionInfo
, "regionInfo is null");
117 this.regionInfoForFs
= ServerRegionReplicaUtil
.getRegionInfoForFs(regionInfo
);
118 this.regionDir
= FSUtils
.getRegionDirFromTableDir(tableDir
, regionInfo
);
119 this.hdfsClientRetriesNumber
= conf
.getInt("hdfs.client.retries.number",
120 DEFAULT_HDFS_CLIENT_RETRIES_NUMBER
);
121 this.baseSleepBeforeRetries
= conf
.getInt("hdfs.client.sleep.before.retries",
122 DEFAULT_BASE_SLEEP_BEFORE_RETRIES
);
125 /** @return the underlying {@link FileSystem} */
126 public FileSystem
getFileSystem() {
130 /** @return the {@link RegionInfo} that describe this on-disk region view */
131 public RegionInfo
getRegionInfo() {
132 return this.regionInfo
;
135 public RegionInfo
getRegionInfoForFS() {
136 return this.regionInfoForFs
;
139 /** @return {@link Path} to the region's root directory. */
140 public Path
getTableDir() {
141 return this.tableDir
;
144 /** @return {@link Path} to the region directory. */
145 public Path
getRegionDir() {
149 // ===========================================================================
151 // ===========================================================================
152 /** @return {@link Path} to the region's temp directory, used for file creations */
153 public Path
getTempDir() {
154 return new Path(getRegionDir(), REGION_TEMP_DIR
);
158 * Clean up any temp detritus that may have been left around from previous operation attempts.
160 void cleanupTempDir() throws IOException
{
161 deleteDir(getTempDir());
164 // ===========================================================================
165 // Store/StoreFile Helpers
166 // ===========================================================================
168 * Returns the directory path of the specified family
169 * @param familyName Column Family Name
170 * @return {@link Path} to the directory of the specified family
172 public Path
getStoreDir(final String familyName
) {
173 return new Path(this.getRegionDir(), familyName
);
177 * @param tabledir {@link Path} to where the table is being stored
178 * @param hri {@link RegionInfo} for the region.
179 * @param family {@link ColumnFamilyDescriptor} describing the column family
180 * @return Path to family/Store home directory.
182 public static Path
getStoreHomedir(final Path tabledir
,
183 final RegionInfo hri
, final byte[] family
) {
184 return getStoreHomedir(tabledir
, hri
.getEncodedName(), family
);
188 * @param tabledir {@link Path} to where the table is being stored
189 * @param encodedName Encoded region name.
190 * @param family {@link ColumnFamilyDescriptor} describing the column family
191 * @return Path to family/Store home directory.
193 public static Path
getStoreHomedir(final Path tabledir
,
194 final String encodedName
, final byte[] family
) {
195 return new Path(tabledir
, new Path(encodedName
, Bytes
.toString(family
)));
199 * Create the store directory for the specified family name
200 * @param familyName Column Family Name
201 * @return {@link Path} to the directory of the specified family
202 * @throws IOException if the directory creation fails.
204 Path
createStoreDir(final String familyName
) throws IOException
{
205 Path storeDir
= getStoreDir(familyName
);
206 if(!fs
.exists(storeDir
) && !createDir(storeDir
))
207 throw new IOException("Failed creating "+storeDir
);
212 * Set the directory of CF to the specified storage policy. <br>
213 * <i>"LAZY_PERSIST"</i>, <i>"ALL_SSD"</i>, <i>"ONE_SSD"</i>, <i>"HOT"</i>, <i>"WARM"</i>,
216 * See {@link org.apache.hadoop.hdfs.protocol.HdfsConstants} for more details.
217 * @param familyName The name of column family.
218 * @param policyName The name of the storage policy: 'HOT', 'COLD', etc.
219 * See see hadoop 2.6+ org.apache.hadoop.hdfs.protocol.HdfsConstants for possible list e.g
220 * 'COLD', 'WARM', 'HOT', 'ONE_SSD', 'ALL_SSD', 'LAZY_PERSIST'.
222 public void setStoragePolicy(String familyName
, String policyName
) {
223 CommonFSUtils
.setStoragePolicy(this.fs
, getStoreDir(familyName
), policyName
);
227 * Get the storage policy of the directory of CF.
228 * @param familyName The name of column family.
229 * @return Storage policy name, or {@code null} if not using {@link HFileSystem} or exception
230 * thrown when trying to get policy
233 public String
getStoragePolicyName(String familyName
) {
234 if (this.fs
instanceof HFileSystem
) {
235 Path storeDir
= getStoreDir(familyName
);
236 return ((HFileSystem
) this.fs
).getStoragePolicyName(storeDir
);
243 * Returns the store files available for the family.
244 * This methods performs the filtering based on the valid store files.
245 * @param familyName Column Family Name
246 * @return a set of {@link StoreFileInfo} for the specified family.
248 public List
<StoreFileInfo
> getStoreFiles(final String familyName
) throws IOException
{
249 return getStoreFiles(familyName
, true);
253 * Returns the store files available for the family.
254 * This methods performs the filtering based on the valid store files.
255 * @param familyName Column Family Name
256 * @return a set of {@link StoreFileInfo} for the specified family.
258 public List
<StoreFileInfo
> getStoreFiles(final String familyName
, final boolean validate
)
260 Path familyDir
= getStoreDir(familyName
);
261 FileStatus
[] files
= CommonFSUtils
.listStatus(this.fs
, familyDir
);
263 if (LOG
.isTraceEnabled()) {
264 LOG
.trace("No StoreFiles for: " + familyDir
);
269 ArrayList
<StoreFileInfo
> storeFiles
= new ArrayList
<>(files
.length
);
270 for (FileStatus status
: files
) {
271 if (validate
&& !StoreFileInfo
.isValid(status
)) {
272 // recovered.hfiles directory is expected inside CF path when hbase.wal.split.to.hfile to
273 // true, refer HBASE-23740
274 if (!HConstants
.RECOVERED_HFILES_DIR
.equals(status
.getPath().getName())) {
275 LOG
.warn("Invalid StoreFile: {}", status
.getPath());
279 StoreFileInfo info
= ServerRegionReplicaUtil
.getStoreFileInfo(conf
, fs
, regionInfo
,
280 regionInfoForFs
, familyName
, status
.getPath());
281 storeFiles
.add(info
);
288 * Returns the store files' LocatedFileStatus which available for the family.
289 * This methods performs the filtering based on the valid store files.
290 * @param familyName Column Family Name
291 * @return a list of store files' LocatedFileStatus for the specified family.
293 public static List
<LocatedFileStatus
> getStoreFilesLocatedStatus(
294 final HRegionFileSystem regionfs
, final String familyName
,
295 final boolean validate
) throws IOException
{
296 Path familyDir
= regionfs
.getStoreDir(familyName
);
297 List
<LocatedFileStatus
> locatedFileStatuses
= CommonFSUtils
.listLocatedStatus(
298 regionfs
.getFileSystem(), familyDir
);
299 if (locatedFileStatuses
== null) {
300 if (LOG
.isTraceEnabled()) {
301 LOG
.trace("No StoreFiles for: " + familyDir
);
306 List
<LocatedFileStatus
> validStoreFiles
= Lists
.newArrayList();
307 for (LocatedFileStatus status
: locatedFileStatuses
) {
308 if (validate
&& !StoreFileInfo
.isValid(status
)) {
309 // recovered.hfiles directory is expected inside CF path when hbase.wal.split.to.hfile to
310 // true, refer HBASE-23740
311 if (!HConstants
.RECOVERED_HFILES_DIR
.equals(status
.getPath().getName())) {
312 LOG
.warn("Invalid StoreFile: {}", status
.getPath());
315 validStoreFiles
.add(status
);
318 return validStoreFiles
;
322 * Return Qualified Path of the specified family/file
324 * @param familyName Column Family Name
325 * @param fileName File Name
326 * @return The qualified Path for the specified family/file
328 Path
getStoreFilePath(final String familyName
, final String fileName
) {
329 Path familyDir
= getStoreDir(familyName
);
330 return new Path(familyDir
, fileName
).makeQualified(fs
.getUri(), fs
.getWorkingDirectory());
334 * Return the store file information of the specified family/file.
336 * @param familyName Column Family Name
337 * @param fileName File Name
338 * @return The {@link StoreFileInfo} for the specified family/file
340 StoreFileInfo
getStoreFileInfo(final String familyName
, final String fileName
)
342 Path familyDir
= getStoreDir(familyName
);
343 return ServerRegionReplicaUtil
.getStoreFileInfo(conf
, fs
, regionInfo
,
344 regionInfoForFs
, familyName
, new Path(familyDir
, fileName
));
348 * Returns true if the specified family has reference files
349 * @param familyName Column Family Name
350 * @return true if family contains reference files
351 * @throws IOException
353 public boolean hasReferences(final String familyName
) throws IOException
{
354 Path storeDir
= getStoreDir(familyName
);
355 FileStatus
[] files
= CommonFSUtils
.listStatus(fs
, storeDir
);
357 for(FileStatus stat
: files
) {
358 if(stat
.isDirectory()) {
361 if (StoreFileInfo
.isReference(stat
.getPath())) {
362 LOG
.trace("Reference {}", stat
.getPath());
371 * Check whether region has Reference file
372 * @param htd table desciptor of the region
373 * @return true if region has reference file
374 * @throws IOException
376 public boolean hasReferences(final TableDescriptor htd
) throws IOException
{
377 for (ColumnFamilyDescriptor family
: htd
.getColumnFamilies()) {
378 if (hasReferences(family
.getNameAsString())) {
386 * @return the set of families present on disk
387 * @throws IOException
389 public Collection
<String
> getFamilies() throws IOException
{
391 CommonFSUtils
.listStatus(fs
, getRegionDir(), new FSUtils
.FamilyDirFilter(fs
));
392 if (fds
== null) return null;
394 ArrayList
<String
> families
= new ArrayList
<>(fds
.length
);
395 for (FileStatus status
: fds
) {
396 families
.add(status
.getPath().getName());
403 * Remove the region family from disk, archiving the store files.
404 * @param familyName Column Family Name
405 * @throws IOException if an error occours during the archiving
407 public void deleteFamily(final String familyName
) throws IOException
{
408 // archive family store files
409 HFileArchiver
.archiveFamily(fs
, conf
, regionInfoForFs
, tableDir
, Bytes
.toBytes(familyName
));
411 // delete the family folder
412 Path familyDir
= getStoreDir(familyName
);
413 if(fs
.exists(familyDir
) && !deleteDir(familyDir
))
414 throw new IOException("Could not delete family " + familyName
415 + " from FileSystem for region " + regionInfoForFs
.getRegionNameAsString() + "("
416 + regionInfoForFs
.getEncodedName() + ")");
420 * Generate a unique file name, used by createTempName() and commitStoreFile()
421 * @param suffix extra information to append to the generated name
422 * @return Unique file name
424 private static String
generateUniqueName(final String suffix
) {
425 String name
= UUID
.randomUUID().toString().replaceAll("-", "");
426 if (suffix
!= null) name
+= suffix
;
431 * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
432 * to get a safer file creation.
434 * Path file = fs.createTempName();
435 * ...StoreFile.Writer(file)...
436 * fs.commitStoreFile("family", file);
439 * @return Unique {@link Path} of the temporary file
441 public Path
createTempName() {
442 return createTempName(null);
446 * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
447 * to get a safer file creation.
449 * Path file = fs.createTempName();
450 * ...StoreFile.Writer(file)...
451 * fs.commitStoreFile("family", file);
454 * @param suffix extra information to append to the generated name
455 * @return Unique {@link Path} of the temporary file
457 public Path
createTempName(final String suffix
) {
458 return new Path(getTempDir(), generateUniqueName(suffix
));
462 * Move the file from a build/temp location to the main family store directory.
463 * @param familyName Family that will gain the file
464 * @param buildPath {@link Path} to the file to commit.
465 * @return The new {@link Path} of the committed file
466 * @throws IOException
468 public Path
commitStoreFile(final String familyName
, final Path buildPath
) throws IOException
{
469 Path dstPath
= preCommitStoreFile(familyName
, buildPath
, -1, false);
470 return commitStoreFile(buildPath
, dstPath
);
474 * Generate the filename in the main family store directory for moving the file from a build/temp
476 * @param familyName Family that will gain the file
477 * @param buildPath {@link Path} to the file to commit.
478 * @param seqNum Sequence Number to append to the file name (less then 0 if no sequence number)
479 * @param generateNewName False if you want to keep the buildPath name
480 * @return The new {@link Path} of the to be committed file
481 * @throws IOException
483 private Path
preCommitStoreFile(final String familyName
, final Path buildPath
,
484 final long seqNum
, final boolean generateNewName
) throws IOException
{
485 Path storeDir
= getStoreDir(familyName
);
486 if(!fs
.exists(storeDir
) && !createDir(storeDir
))
487 throw new IOException("Failed creating " + storeDir
);
489 String name
= buildPath
.getName();
490 if (generateNewName
) {
491 name
= generateUniqueName((seqNum
< 0) ?
null : "_SeqId_" + seqNum
+ "_");
493 Path dstPath
= new Path(storeDir
, name
);
494 if (!fs
.exists(buildPath
)) {
495 throw new FileNotFoundException(buildPath
.toString());
497 if (LOG
.isDebugEnabled()) {
498 LOG
.debug("Committing " + buildPath
+ " as " + dstPath
);
504 * Moves file from staging dir to region dir
505 * @param buildPath {@link Path} to the file to commit.
506 * @param dstPath {@link Path} to the file under region dir
507 * @return The {@link Path} of the committed file
508 * @throws IOException
510 Path
commitStoreFile(final Path buildPath
, Path dstPath
) throws IOException
{
511 // buildPath exists, therefore not doing an exists() check.
512 if (!rename(buildPath
, dstPath
)) {
513 throw new IOException("Failed rename of " + buildPath
+ " to " + dstPath
);
519 * Archives the specified store file from the specified family.
520 * @param familyName Family that contains the store files
521 * @param filePath {@link Path} to the store file to remove
522 * @throws IOException if the archiving fails
524 public void removeStoreFile(final String familyName
, final Path filePath
)
526 HFileArchiver
.archiveStoreFile(this.conf
, this.fs
, this.regionInfoForFs
,
527 this.tableDir
, Bytes
.toBytes(familyName
), filePath
);
531 * Closes and archives the specified store files from the specified family.
532 * @param familyName Family that contains the store files
533 * @param storeFiles set of store files to remove
534 * @throws IOException if the archiving fails
536 public void removeStoreFiles(String familyName
, Collection
<HStoreFile
> storeFiles
)
538 HFileArchiver
.archiveStoreFiles(this.conf
, this.fs
, this.regionInfoForFs
,
539 this.tableDir
, Bytes
.toBytes(familyName
), storeFiles
);
543 * Bulk load: Add a specified store file to the specified family.
544 * If the source file is on the same different file-system is moved from the
545 * source location to the destination location, otherwise is copied over.
547 * @param familyName Family that will gain the file
548 * @param srcPath {@link Path} to the file to import
549 * @param seqNum Bulk Load sequence number
550 * @return The destination {@link Path} of the bulk loaded file
551 * @throws IOException
553 Pair
<Path
, Path
> bulkLoadStoreFile(final String familyName
, Path srcPath
, long seqNum
)
555 // Copy the file if it's on another filesystem
556 FileSystem srcFs
= srcPath
.getFileSystem(conf
);
557 srcPath
= srcFs
.resolvePath(srcPath
);
558 FileSystem realSrcFs
= srcPath
.getFileSystem(conf
);
559 FileSystem desFs
= fs
instanceof HFileSystem ?
((HFileSystem
)fs
).getBackingFs() : fs
;
561 // We can't compare FileSystem instances as equals() includes UGI instance
562 // as part of the comparison and won't work when doing SecureBulkLoad
563 // TODO deal with viewFS
564 if (!FSUtils
.isSameHdfs(conf
, realSrcFs
, desFs
)) {
565 LOG
.info("Bulk-load file " + srcPath
+ " is on different filesystem than " +
566 "the destination store. Copying file over to destination filesystem.");
567 Path tmpPath
= createTempName();
568 FileUtil
.copy(realSrcFs
, srcPath
, fs
, tmpPath
, false, conf
);
569 LOG
.info("Copied " + srcPath
+ " to temporary path on destination filesystem: " + tmpPath
);
573 return new Pair
<>(srcPath
, preCommitStoreFile(familyName
, srcPath
, seqNum
, true));
576 // ===========================================================================
578 // ===========================================================================
580 public Path
getSplitsDir(final RegionInfo hri
) {
581 return new Path(getTableDir(), hri
.getEncodedName());
585 * Remove daughter region
586 * @param regionInfo daughter {@link RegionInfo}
587 * @throws IOException
589 void cleanupDaughterRegion(final RegionInfo regionInfo
) throws IOException
{
590 Path regionDir
= new Path(this.tableDir
, regionInfo
.getEncodedName());
591 if (this.fs
.exists(regionDir
) && !deleteDir(regionDir
)) {
592 throw new IOException("Failed delete of " + regionDir
);
597 * Commit a daughter region, moving it from the split temporary directory
598 * to the proper location in the filesystem.
600 * @param regionInfo daughter {@link org.apache.hadoop.hbase.client.RegionInfo}
602 public Path
commitDaughterRegion(final RegionInfo regionInfo
, List
<Path
> allRegionFiles
,
603 MasterProcedureEnv env
) throws IOException
{
604 Path regionDir
= this.getSplitsDir(regionInfo
);
605 if (fs
.exists(regionDir
)) {
606 // Write HRI to a file in case we need to recover hbase:meta
607 Path regionInfoFile
= new Path(regionDir
, REGION_INFO_FILE
);
608 byte[] regionInfoContent
= getRegionInfoFileContent(regionInfo
);
609 writeRegionInfoFileContent(conf
, fs
, regionInfoFile
, regionInfoContent
);
610 HRegionFileSystem regionFs
= HRegionFileSystem
.openRegionFromFileSystem(
611 env
.getMasterConfiguration(), fs
, getTableDir(), regionInfo
, false);
612 insertRegionFilesIntoStoreTracker(allRegionFiles
, env
, regionFs
);
617 private void insertRegionFilesIntoStoreTracker(List
<Path
> allFiles
, MasterProcedureEnv env
,
618 HRegionFileSystem regionFs
) throws IOException
{
619 TableDescriptor tblDesc
= env
.getMasterServices().getTableDescriptors().
620 get(regionInfo
.getTable());
621 //we need to map trackers per store
622 Map
<String
, StoreFileTracker
> trackerMap
= new HashMap
<>();
623 //we need to map store files per store
624 Map
<String
, List
<StoreFileInfo
>> fileInfoMap
= new HashMap
<>();
625 for(Path file
: allFiles
) {
626 String familyName
= file
.getParent().getName();
627 trackerMap
.computeIfAbsent(familyName
, t
-> StoreFileTrackerFactory
.create(conf
, tblDesc
,
628 tblDesc
.getColumnFamily(Bytes
.toBytes(familyName
)), regionFs
));
629 fileInfoMap
.computeIfAbsent(familyName
, l
-> new ArrayList
<>());
630 List
<StoreFileInfo
> infos
= fileInfoMap
.get(familyName
);
631 infos
.add(new StoreFileInfo(conf
, fs
, file
, true));
633 for(Map
.Entry
<String
, StoreFileTracker
> entry
: trackerMap
.entrySet()) {
634 entry
.getValue().add(fileInfoMap
.get(entry
.getKey()));
639 * Creates region split daughter directories under the table dir. If the daughter regions already
640 * exist, for example, in the case of a recovery from a previous failed split procedure, this
641 * method deletes the given region dir recursively, then recreates it again.
643 public void createSplitsDir(RegionInfo daughterA
, RegionInfo daughterB
) throws IOException
{
644 Path daughterADir
= getSplitsDir(daughterA
);
645 if (fs
.exists(daughterADir
) && !deleteDir(daughterADir
)) {
646 throw new IOException("Failed deletion of " + daughterADir
+ " before creating them again.");
649 if (!createDir(daughterADir
)) {
650 throw new IOException("Failed create of " + daughterADir
);
652 Path daughterBDir
= getSplitsDir(daughterB
);
653 if (fs
.exists(daughterBDir
) && !deleteDir(daughterBDir
)) {
654 throw new IOException("Failed deletion of " + daughterBDir
+ " before creating them again.");
657 if (!createDir(daughterBDir
)) {
658 throw new IOException("Failed create of " + daughterBDir
);
663 * Write out a split reference. Package local so it doesnt leak out of
665 * @param hri {@link RegionInfo} of the destination
666 * @param familyName Column Family Name
667 * @param f File to split.
668 * @param splitRow Split Row
669 * @param top True if we are referring to the top half of the hfile.
670 * @param splitPolicy A split policy instance; be careful! May not be full populated; e.g. if
671 * this method is invoked on the Master side, then the RegionSplitPolicy will
672 * NOT have a reference to a Region.
673 * @return Path to created reference.
675 public Path
splitStoreFile(RegionInfo hri
, String familyName
, HStoreFile f
, byte[] splitRow
,
676 boolean top
, RegionSplitPolicy splitPolicy
) throws IOException
{
677 Path splitDir
= new Path(getSplitsDir(hri
), familyName
);
678 // Add the referred-to regions name as a dot separated suffix.
679 // See REF_NAME_REGEX regex above. The referred-to regions name is
680 // up in the path of the passed in <code>f</code> -- parentdir is family,
681 // then the directory above is the region name.
682 String parentRegionName
= regionInfoForFs
.getEncodedName();
683 // Write reference with same file id only with the other region name as
684 // suffix and into the new region location (under same family).
685 Path p
= new Path(splitDir
, f
.getPath().getName() + "." + parentRegionName
);
687 LOG
.warn("Found an already existing split file for {}. Assuming this is a recovery.", p
);
690 boolean createLinkFile
= false;
691 if (splitPolicy
== null || !splitPolicy
.skipStoreFileRangeCheck(familyName
)) {
692 // Check whether the split row lies in the range of the store file
693 // If it is outside the range, return directly.
696 Cell splitKey
= PrivateCellUtil
.createFirstOnRow(splitRow
);
697 Optional
<Cell
> lastKey
= f
.getLastKey();
698 Optional
<Cell
> firstKey
= f
.getFirstKey();
700 //check if larger than last key.
701 // If lastKey is null means storefile is empty.
702 if (!lastKey
.isPresent()) {
705 if (f
.getComparator().compare(splitKey
, lastKey
.get()) > 0) {
708 if (firstKey
.isPresent() && f
.getComparator().compare(splitKey
, firstKey
.get()) <= 0) {
709 LOG
.debug("Will create HFileLink file for {}, top=true", f
.getPath());
710 createLinkFile
= true;
713 //check if smaller than first key
714 // If firstKey is null means storefile is empty.
715 if (!firstKey
.isPresent()) {
718 if (f
.getComparator().compare(splitKey
, firstKey
.get()) < 0) {
721 if (lastKey
.isPresent() && f
.getComparator().compare(splitKey
, lastKey
.get()) >= 0) {
722 LOG
.debug("Will create HFileLink file for {}, top=false", f
.getPath());
723 createLinkFile
= true;
727 f
.closeStoreFile(f
.getCacheConf() != null ? f
.getCacheConf().shouldEvictOnClose() : true);
730 if (createLinkFile
) {
731 // create HFileLink file instead of Reference file for child
732 String hfileName
= f
.getPath().getName();
733 TableName linkedTable
= regionInfoForFs
.getTable();
734 String linkedRegion
= regionInfoForFs
.getEncodedName();
736 if (HFileLink
.isHFileLink(hfileName
)) {
737 Matcher m
= LINK_NAME_PATTERN
.matcher(hfileName
);
739 throw new IllegalArgumentException(hfileName
+ " is not a valid HFileLink name!");
741 linkedTable
= TableName
.valueOf(m
.group(1), m
.group(2));
742 linkedRegion
= m
.group(3);
743 hfileName
= m
.group(4);
745 // must create back reference here
746 HFileLink
.create(conf
, fs
, splitDir
, familyName
, hri
.getTable().getNameAsString(),
747 hri
.getEncodedName(), linkedTable
, linkedRegion
, hfileName
, true);
749 new Path(splitDir
, HFileLink
.createHFileLinkName(linkedTable
, linkedRegion
, hfileName
));
750 LOG
.info("Created linkFile:" + path
.toString() + " for child: " + hri
.getEncodedName()
751 + ", parent: " + regionInfoForFs
.getEncodedName());
753 } catch (IOException e
) {
754 // if create HFileLink file failed, then just skip the error and create Reference file
755 LOG
.error("Create link file for " + hfileName
+ " for child " + hri
.getEncodedName()
756 + "failed, will create Reference file", e
);
759 // A reference to the bottom half of the hsf store file.
761 top ? Reference
.createTopReference(splitRow
): Reference
.createBottomReference(splitRow
);
762 return r
.write(fs
, p
);
765 // ===========================================================================
767 // ===========================================================================
769 Path
getMergesDir(final RegionInfo hri
) {
770 return new Path(getTableDir(), hri
.getEncodedName());
775 * Remove merged region
776 * @param mergedRegion {@link RegionInfo}
777 * @throws IOException
779 public void cleanupMergedRegion(final RegionInfo mergedRegion
) throws IOException
{
780 Path regionDir
= new Path(this.tableDir
, mergedRegion
.getEncodedName());
781 if (this.fs
.exists(regionDir
) && !this.fs
.delete(regionDir
, true)) {
782 throw new IOException("Failed delete of " + regionDir
);
786 static boolean mkdirs(FileSystem fs
, Configuration conf
, Path dir
) throws IOException
{
787 if (FSUtils
.isDistributedFileSystem(fs
) ||
788 !conf
.getBoolean(HConstants
.ENABLE_DATA_FILE_UMASK
, false)) {
789 return fs
.mkdirs(dir
);
791 FsPermission perms
= CommonFSUtils
.getFilePermissions(fs
, conf
, HConstants
.DATA_FILE_UMASK_KEY
);
792 return fs
.mkdirs(dir
, perms
);
796 * Write out a merge reference under the given merges directory.
797 * @param mergingRegion {@link RegionInfo} for one of the regions being merged.
798 * @param familyName Column Family Name
799 * @param f File to create reference.
800 * @return Path to created reference.
801 * @throws IOException if the merge write fails.
803 public Path
mergeStoreFile(RegionInfo mergingRegion
, String familyName
, HStoreFile f
)
805 Path referenceDir
= new Path(getMergesDir(regionInfoForFs
), familyName
);
806 // A whole reference to the store file.
807 Reference r
= Reference
.createTopReference(mergingRegion
.getStartKey());
808 // Add the referred-to regions name as a dot separated suffix.
809 // See REF_NAME_REGEX regex above. The referred-to regions name is
810 // up in the path of the passed in <code>f</code> -- parentdir is family,
811 // then the directory above is the region name.
812 String mergingRegionName
= mergingRegion
.getEncodedName();
813 // Write reference with same file id only with the other region name as
814 // suffix and into the new region location (under same family).
815 Path p
= new Path(referenceDir
, f
.getPath().getName() + "."
816 + mergingRegionName
);
817 return r
.write(fs
, p
);
821 * Commit a merged region, making it ready for use.
822 * @throws IOException
824 public void commitMergedRegion(List
<Path
> allMergedFiles
, MasterProcedureEnv env
)
826 Path regionDir
= getMergesDir(regionInfoForFs
);
827 if (regionDir
!= null && fs
.exists(regionDir
)) {
828 // Write HRI to a file in case we need to recover hbase:meta
829 Path regionInfoFile
= new Path(regionDir
, REGION_INFO_FILE
);
830 byte[] regionInfoContent
= getRegionInfoFileContent(regionInfo
);
831 writeRegionInfoFileContent(conf
, fs
, regionInfoFile
, regionInfoContent
);
832 insertRegionFilesIntoStoreTracker(allMergedFiles
, env
, this);
836 // ===========================================================================
837 // Create/Open/Delete Helpers
838 // ===========================================================================
842 * @return Content of the file we write out to the filesystem under a region
843 * @throws IOException
845 private static byte[] getRegionInfoFileContent(final RegionInfo hri
) throws IOException
{
846 return RegionInfo
.toDelimitedByteArray(hri
);
850 * Create a {@link RegionInfo} from the serialized version on-disk.
851 * @param fs {@link FileSystem} that contains the Region Info file
852 * @param regionDir {@link Path} to the Region Directory that contains the Info file
853 * @return An {@link RegionInfo} instance gotten from the Region Info file.
854 * @throws IOException if an error occurred during file open/read operation.
856 public static RegionInfo
loadRegionInfoFileContent(final FileSystem fs
, final Path regionDir
)
858 FSDataInputStream in
= fs
.open(new Path(regionDir
, REGION_INFO_FILE
));
860 return RegionInfo
.parseFrom(in
);
867 * Write the .regioninfo file on-disk.
869 * Overwrites if exists already.
871 private static void writeRegionInfoFileContent(final Configuration conf
, final FileSystem fs
,
872 final Path regionInfoFile
, final byte[] content
) throws IOException
{
873 // First check to get the permissions
874 FsPermission perms
= CommonFSUtils
.getFilePermissions(fs
, conf
, HConstants
.DATA_FILE_UMASK_KEY
);
875 // Write the RegionInfo file content
876 try (FSDataOutputStream out
= FSUtils
.create(conf
, fs
, regionInfoFile
, perms
, null)) {
882 * Write out an info file under the stored region directory. Useful recovering mangled regions.
883 * If the regionInfo already exists on-disk, then we fast exit.
885 void checkRegionInfoOnFilesystem() throws IOException
{
886 // Compose the content of the file so we can compare to length in filesystem. If not same,
887 // rewrite it (it may have been written in the old format using Writables instead of pb). The
888 // pb version is much shorter -- we write now w/o the toString version -- so checking length
889 // only should be sufficient. I don't want to read the file every time to check if it pb
891 byte[] content
= getRegionInfoFileContent(regionInfoForFs
);
893 // Verify if the region directory exists before opening a region. We need to do this since if
894 // the region directory doesn't exist we will re-create the region directory and a new HRI
895 // when HRegion.openHRegion() is called.
897 FileStatus status
= fs
.getFileStatus(getRegionDir());
898 } catch (FileNotFoundException e
) {
899 LOG
.warn(getRegionDir() + " doesn't exist for region: " + regionInfoForFs
.getEncodedName() +
900 " on table " + regionInfo
.getTable());
904 Path regionInfoFile
= new Path(getRegionDir(), REGION_INFO_FILE
);
905 FileStatus status
= fs
.getFileStatus(regionInfoFile
);
906 if (status
!= null && status
.getLen() == content
.length
) {
907 // Then assume the content good and move on.
908 // NOTE: that the length is not sufficient to define the the content matches.
912 LOG
.info("Rewriting .regioninfo file at: " + regionInfoFile
);
913 if (!fs
.delete(regionInfoFile
, false)) {
914 throw new IOException("Unable to remove existing " + regionInfoFile
);
916 } catch (FileNotFoundException e
) {
917 LOG
.warn(REGION_INFO_FILE
+ " file not found for region: " + regionInfoForFs
.getEncodedName() +
918 " on table " + regionInfo
.getTable());
921 // Write HRI to a file in case we need to recover hbase:meta
922 writeRegionInfoOnFilesystem(content
, true);
926 * Write out an info file under the region directory. Useful recovering mangled regions.
927 * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
929 private void writeRegionInfoOnFilesystem(boolean useTempDir
) throws IOException
{
930 byte[] content
= getRegionInfoFileContent(regionInfoForFs
);
931 writeRegionInfoOnFilesystem(content
, useTempDir
);
935 * Write out an info file under the region directory. Useful recovering mangled regions.
936 * @param regionInfoContent serialized version of the {@link RegionInfo}
937 * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
939 private void writeRegionInfoOnFilesystem(final byte[] regionInfoContent
,
940 final boolean useTempDir
) throws IOException
{
941 Path regionInfoFile
= new Path(getRegionDir(), REGION_INFO_FILE
);
943 // Create in tmpDir and then move into place in case we crash after
944 // create but before close. If we don't successfully close the file,
945 // subsequent region reopens will fail the below because create is
948 // And then create the file
949 Path tmpPath
= new Path(getTempDir(), REGION_INFO_FILE
);
951 // If datanode crashes or if the RS goes down just before the close is called while trying to
952 // close the created regioninfo file in the .tmp directory then on next
953 // creation we will be getting AlreadyCreatedException.
954 // Hence delete and create the file if exists.
955 if (CommonFSUtils
.isExists(fs
, tmpPath
)) {
956 CommonFSUtils
.delete(fs
, tmpPath
, true);
959 // Write HRI to a file in case we need to recover hbase:meta
960 writeRegionInfoFileContent(conf
, fs
, tmpPath
, regionInfoContent
);
962 // Move the created file to the original path
963 if (fs
.exists(tmpPath
) && !rename(tmpPath
, regionInfoFile
)) {
964 throw new IOException("Unable to rename " + tmpPath
+ " to " + regionInfoFile
);
967 // Write HRI to a file in case we need to recover hbase:meta
968 writeRegionInfoFileContent(conf
, fs
, regionInfoFile
, regionInfoContent
);
973 * Create a new Region on file-system.
974 * @param conf the {@link Configuration} to use
975 * @param fs {@link FileSystem} from which to add the region
976 * @param tableDir {@link Path} to where the table is being stored
977 * @param regionInfo {@link RegionInfo} for region to be added
978 * @throws IOException if the region creation fails due to a FileSystem exception.
980 public static HRegionFileSystem
createRegionOnFileSystem(final Configuration conf
,
981 final FileSystem fs
, final Path tableDir
, final RegionInfo regionInfo
) throws IOException
{
982 HRegionFileSystem regionFs
= new HRegionFileSystem(conf
, fs
, tableDir
, regionInfo
);
984 // We only create a .regioninfo and the region directory if this is the default region replica
985 if (regionInfo
.getReplicaId() == RegionInfo
.DEFAULT_REPLICA_ID
) {
986 Path regionDir
= regionFs
.getRegionDir();
987 if (fs
.exists(regionDir
)) {
988 LOG
.warn("Trying to create a region that already exists on disk: " + regionDir
);
990 // Create the region directory
991 if (!createDirOnFileSystem(fs
, conf
, regionDir
)) {
992 LOG
.warn("Unable to create the region directory: " + regionDir
);
993 throw new IOException("Unable to create region directory: " + regionDir
);
997 // Write HRI to a file in case we need to recover hbase:meta
998 regionFs
.writeRegionInfoOnFilesystem(false);
1000 if (LOG
.isDebugEnabled())
1001 LOG
.debug("Skipping creation of .regioninfo file for " + regionInfo
);
1007 * Open Region from file-system.
1008 * @param conf the {@link Configuration} to use
1009 * @param fs {@link FileSystem} from which to add the region
1010 * @param tableDir {@link Path} to where the table is being stored
1011 * @param regionInfo {@link RegionInfo} for region to be added
1012 * @param readOnly True if you don't want to edit the region data
1013 * @throws IOException if the region creation fails due to a FileSystem exception.
1015 public static HRegionFileSystem
openRegionFromFileSystem(final Configuration conf
,
1016 final FileSystem fs
, final Path tableDir
, final RegionInfo regionInfo
, boolean readOnly
)
1017 throws IOException
{
1018 HRegionFileSystem regionFs
= new HRegionFileSystem(conf
, fs
, tableDir
, regionInfo
);
1019 Path regionDir
= regionFs
.getRegionDir();
1021 if (!fs
.exists(regionDir
)) {
1022 LOG
.warn("Trying to open a region that do not exists on disk: " + regionDir
);
1023 throw new IOException("The specified region do not exists on disk: " + regionDir
);
1027 // Cleanup temporary directories
1028 regionFs
.cleanupTempDir();
1030 // If it doesn't exists, Write HRI to a file, in case we need to recover hbase:meta
1031 // Only create HRI if we are the default replica
1032 if (regionInfo
.getReplicaId() == RegionInfo
.DEFAULT_REPLICA_ID
) {
1033 regionFs
.checkRegionInfoOnFilesystem();
1035 if (LOG
.isDebugEnabled()) {
1036 LOG
.debug("Skipping creation of .regioninfo file for " + regionInfo
);
1045 * Remove the region from the table directory, archiving the region's hfiles.
1046 * @param conf the {@link Configuration} to use
1047 * @param fs {@link FileSystem} from which to remove the region
1048 * @param tableDir {@link Path} to where the table is being stored
1049 * @param regionInfo {@link RegionInfo} for region to be deleted
1050 * @throws IOException if the request cannot be completed
1052 public static void deleteRegionFromFileSystem(final Configuration conf
,
1053 final FileSystem fs
, final Path tableDir
, final RegionInfo regionInfo
) throws IOException
{
1054 HRegionFileSystem regionFs
= new HRegionFileSystem(conf
, fs
, tableDir
, regionInfo
);
1055 Path regionDir
= regionFs
.getRegionDir();
1057 if (!fs
.exists(regionDir
)) {
1058 LOG
.warn("Trying to delete a region that do not exists on disk: " + regionDir
);
1062 if (LOG
.isDebugEnabled()) {
1063 LOG
.debug("DELETING region " + regionDir
);
1067 Path rootDir
= CommonFSUtils
.getRootDir(conf
);
1068 HFileArchiver
.archiveRegion(fs
, rootDir
, tableDir
, regionDir
);
1070 // Delete empty region dir
1071 if (!fs
.delete(regionDir
, true)) {
1072 LOG
.warn("Failed delete of " + regionDir
);
1077 * Creates a directory. Assumes the user has already checked for this directory existence.
1079 * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
1080 * whether the directory exists or not, and returns true if it exists.
1081 * @throws IOException
1083 boolean createDir(Path dir
) throws IOException
{
1085 IOException lastIOE
= null;
1088 return mkdirs(fs
, conf
, dir
);
1089 } catch (IOException ioe
) {
1091 if (fs
.exists(dir
)) return true; // directory is present
1093 sleepBeforeRetry("Create Directory", i
+1);
1094 } catch (InterruptedException e
) {
1095 throw (InterruptedIOException
)new InterruptedIOException().initCause(e
);
1098 } while (++i
<= hdfsClientRetriesNumber
);
1099 throw new IOException("Exception in createDir", lastIOE
);
1103 * Renames a directory. Assumes the user has already checked for this directory existence.
1106 * @return true if rename is successful.
1107 * @throws IOException
1109 boolean rename(Path srcpath
, Path dstPath
) throws IOException
{
1110 IOException lastIOE
= null;
1114 return fs
.rename(srcpath
, dstPath
);
1115 } catch (IOException ioe
) {
1117 if (!fs
.exists(srcpath
) && fs
.exists(dstPath
)) return true; // successful move
1118 // dir is not there, retry after some time.
1120 sleepBeforeRetry("Rename Directory", i
+1);
1121 } catch (InterruptedException e
) {
1122 throw (InterruptedIOException
)new InterruptedIOException().initCause(e
);
1125 } while (++i
<= hdfsClientRetriesNumber
);
1127 throw new IOException("Exception in rename", lastIOE
);
1131 * Deletes a directory. Assumes the user has already checked for this directory existence.
1133 * @return true if the directory is deleted.
1134 * @throws IOException
1136 boolean deleteDir(Path dir
) throws IOException
{
1137 IOException lastIOE
= null;
1141 return fs
.delete(dir
, true);
1142 } catch (IOException ioe
) {
1144 if (!fs
.exists(dir
)) return true;
1145 // dir is there, retry deleting after some time.
1147 sleepBeforeRetry("Delete Directory", i
+1);
1148 } catch (InterruptedException e
) {
1149 throw (InterruptedIOException
)new InterruptedIOException().initCause(e
);
1152 } while (++i
<= hdfsClientRetriesNumber
);
1154 throw new IOException("Exception in DeleteDir", lastIOE
);
1158 * sleeping logic; handles the interrupt exception.
1160 private void sleepBeforeRetry(String msg
, int sleepMultiplier
) throws InterruptedException
{
1161 sleepBeforeRetry(msg
, sleepMultiplier
, baseSleepBeforeRetries
, hdfsClientRetriesNumber
);
1165 * Creates a directory for a filesystem and configuration object. Assumes the user has already
1166 * checked for this directory existence.
1170 * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
1171 * whether the directory exists or not, and returns true if it exists.
1172 * @throws IOException
1174 private static boolean createDirOnFileSystem(FileSystem fs
, Configuration conf
, Path dir
)
1175 throws IOException
{
1177 IOException lastIOE
= null;
1178 int hdfsClientRetriesNumber
= conf
.getInt("hdfs.client.retries.number",
1179 DEFAULT_HDFS_CLIENT_RETRIES_NUMBER
);
1180 int baseSleepBeforeRetries
= conf
.getInt("hdfs.client.sleep.before.retries",
1181 DEFAULT_BASE_SLEEP_BEFORE_RETRIES
);
1184 return fs
.mkdirs(dir
);
1185 } catch (IOException ioe
) {
1187 if (fs
.exists(dir
)) return true; // directory is present
1189 sleepBeforeRetry("Create Directory", i
+1, baseSleepBeforeRetries
, hdfsClientRetriesNumber
);
1190 } catch (InterruptedException e
) {
1191 throw (InterruptedIOException
)new InterruptedIOException().initCause(e
);
1194 } while (++i
<= hdfsClientRetriesNumber
);
1196 throw new IOException("Exception in createDir", lastIOE
);
1200 * sleeping logic for static methods; handles the interrupt exception. Keeping a static version
1201 * for this to avoid re-looking for the integer values.
1203 private static void sleepBeforeRetry(String msg
, int sleepMultiplier
, int baseSleepBeforeRetries
,
1204 int hdfsClientRetriesNumber
) throws InterruptedException
{
1205 if (sleepMultiplier
> hdfsClientRetriesNumber
) {
1206 if (LOG
.isDebugEnabled()) {
1207 LOG
.debug(msg
+ ", retries exhausted");
1211 if (LOG
.isDebugEnabled()) {
1212 LOG
.debug(msg
+ ", sleeping " + baseSleepBeforeRetries
+ " times " + sleepMultiplier
);
1214 Thread
.sleep((long)baseSleepBeforeRetries
* sleepMultiplier
);