HBASE-26280 Use store file tracker when snapshoting (#3685)
[hbase.git] / hbase-server / src / main / java / org / apache / hadoop / hbase / regionserver / HRegionFileSystem.java
blob811002561f0a8d5900b2fac23f082e5f0e6a953a
1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 package org.apache.hadoop.hbase.regionserver;
20 import static org.apache.hadoop.hbase.io.HFileLink.LINK_NAME_PATTERN;
21 import edu.umd.cs.findbugs.annotations.Nullable;
22 import java.io.FileNotFoundException;
23 import java.io.IOException;
24 import java.io.InterruptedIOException;
25 import java.util.ArrayList;
26 import java.util.Collection;
27 import java.util.HashMap;
28 import java.util.List;
29 import java.util.Map;
30 import java.util.Objects;
31 import java.util.Optional;
32 import java.util.UUID;
33 import java.util.regex.Matcher;
34 import org.apache.hadoop.conf.Configuration;
35 import org.apache.hadoop.fs.FSDataInputStream;
36 import org.apache.hadoop.fs.FSDataOutputStream;
37 import org.apache.hadoop.fs.FileStatus;
38 import org.apache.hadoop.fs.FileSystem;
39 import org.apache.hadoop.fs.FileUtil;
40 import org.apache.hadoop.fs.LocatedFileStatus;
41 import org.apache.hadoop.fs.Path;
42 import org.apache.hadoop.fs.permission.FsPermission;
43 import org.apache.hadoop.hbase.Cell;
44 import org.apache.hadoop.hbase.HConstants;
45 import org.apache.hadoop.hbase.PrivateCellUtil;
46 import org.apache.hadoop.hbase.TableName;
47 import org.apache.hadoop.hbase.backup.HFileArchiver;
48 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
49 import org.apache.hadoop.hbase.client.RegionInfo;
50 import org.apache.hadoop.hbase.client.TableDescriptor;
51 import org.apache.hadoop.hbase.fs.HFileSystem;
52 import org.apache.hadoop.hbase.io.HFileLink;
53 import org.apache.hadoop.hbase.io.Reference;
54 import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
55 import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker;
56 import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
57 import org.apache.hadoop.hbase.util.Bytes;
58 import org.apache.hadoop.hbase.util.CommonFSUtils;
59 import org.apache.hadoop.hbase.util.FSUtils;
60 import org.apache.hadoop.hbase.util.Pair;
61 import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
62 import org.apache.yetus.audience.InterfaceAudience;
63 import org.slf4j.Logger;
64 import org.slf4j.LoggerFactory;
65 import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
67 /**
68 * View to an on-disk Region.
69 * Provides the set of methods necessary to interact with the on-disk region data.
71 @InterfaceAudience.Private
72 public class HRegionFileSystem {
73 private static final Logger LOG = LoggerFactory.getLogger(HRegionFileSystem.class);
75 /** Name of the region info file that resides just under the region directory. */
76 public final static String REGION_INFO_FILE = ".regioninfo";
78 /** Temporary subdirectory of the region directory used for merges. */
79 public static final String REGION_MERGES_DIR = ".merges";
81 /** Temporary subdirectory of the region directory used for splits. */
82 public static final String REGION_SPLITS_DIR = ".splits";
84 /** Temporary subdirectory of the region directory used for compaction output. */
85 static final String REGION_TEMP_DIR = ".tmp";
87 private final RegionInfo regionInfo;
88 //regionInfo for interacting with FS (getting encodedName, etc)
89 final RegionInfo regionInfoForFs;
90 final Configuration conf;
91 private final Path tableDir;
92 final FileSystem fs;
93 private final Path regionDir;
95 /**
96 * In order to handle NN connectivity hiccups, one need to retry non-idempotent operation at the
97 * client level.
99 private final int hdfsClientRetriesNumber;
100 private final int baseSleepBeforeRetries;
101 private static final int DEFAULT_HDFS_CLIENT_RETRIES_NUMBER = 10;
102 private static final int DEFAULT_BASE_SLEEP_BEFORE_RETRIES = 1000;
105 * Create a view to the on-disk region
106 * @param conf the {@link Configuration} to use
107 * @param fs {@link FileSystem} that contains the region
108 * @param tableDir {@link Path} to where the table is being stored
109 * @param regionInfo {@link RegionInfo} for region
111 HRegionFileSystem(final Configuration conf, final FileSystem fs, final Path tableDir,
112 final RegionInfo regionInfo) {
113 this.fs = fs;
114 this.conf = conf;
115 this.tableDir = Objects.requireNonNull(tableDir, "tableDir is null");
116 this.regionInfo = Objects.requireNonNull(regionInfo, "regionInfo is null");
117 this.regionInfoForFs = ServerRegionReplicaUtil.getRegionInfoForFs(regionInfo);
118 this.regionDir = FSUtils.getRegionDirFromTableDir(tableDir, regionInfo);
119 this.hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number",
120 DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
121 this.baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries",
122 DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
125 /** @return the underlying {@link FileSystem} */
126 public FileSystem getFileSystem() {
127 return this.fs;
130 /** @return the {@link RegionInfo} that describe this on-disk region view */
131 public RegionInfo getRegionInfo() {
132 return this.regionInfo;
135 public RegionInfo getRegionInfoForFS() {
136 return this.regionInfoForFs;
139 /** @return {@link Path} to the region's root directory. */
140 public Path getTableDir() {
141 return this.tableDir;
144 /** @return {@link Path} to the region directory. */
145 public Path getRegionDir() {
146 return regionDir;
149 // ===========================================================================
150 // Temp Helpers
151 // ===========================================================================
152 /** @return {@link Path} to the region's temp directory, used for file creations */
153 public Path getTempDir() {
154 return new Path(getRegionDir(), REGION_TEMP_DIR);
158 * Clean up any temp detritus that may have been left around from previous operation attempts.
160 void cleanupTempDir() throws IOException {
161 deleteDir(getTempDir());
164 // ===========================================================================
165 // Store/StoreFile Helpers
166 // ===========================================================================
168 * Returns the directory path of the specified family
169 * @param familyName Column Family Name
170 * @return {@link Path} to the directory of the specified family
172 public Path getStoreDir(final String familyName) {
173 return new Path(this.getRegionDir(), familyName);
177 * @param tabledir {@link Path} to where the table is being stored
178 * @param hri {@link RegionInfo} for the region.
179 * @param family {@link ColumnFamilyDescriptor} describing the column family
180 * @return Path to family/Store home directory.
182 public static Path getStoreHomedir(final Path tabledir,
183 final RegionInfo hri, final byte[] family) {
184 return getStoreHomedir(tabledir, hri.getEncodedName(), family);
188 * @param tabledir {@link Path} to where the table is being stored
189 * @param encodedName Encoded region name.
190 * @param family {@link ColumnFamilyDescriptor} describing the column family
191 * @return Path to family/Store home directory.
193 public static Path getStoreHomedir(final Path tabledir,
194 final String encodedName, final byte[] family) {
195 return new Path(tabledir, new Path(encodedName, Bytes.toString(family)));
199 * Create the store directory for the specified family name
200 * @param familyName Column Family Name
201 * @return {@link Path} to the directory of the specified family
202 * @throws IOException if the directory creation fails.
204 Path createStoreDir(final String familyName) throws IOException {
205 Path storeDir = getStoreDir(familyName);
206 if(!fs.exists(storeDir) && !createDir(storeDir))
207 throw new IOException("Failed creating "+storeDir);
208 return storeDir;
212 * Set the directory of CF to the specified storage policy. <br>
213 * <i>"LAZY_PERSIST"</i>, <i>"ALL_SSD"</i>, <i>"ONE_SSD"</i>, <i>"HOT"</i>, <i>"WARM"</i>,
214 * <i>"COLD"</i> <br>
215 * <br>
216 * See {@link org.apache.hadoop.hdfs.protocol.HdfsConstants} for more details.
217 * @param familyName The name of column family.
218 * @param policyName The name of the storage policy: 'HOT', 'COLD', etc.
219 * See see hadoop 2.6+ org.apache.hadoop.hdfs.protocol.HdfsConstants for possible list e.g
220 * 'COLD', 'WARM', 'HOT', 'ONE_SSD', 'ALL_SSD', 'LAZY_PERSIST'.
222 public void setStoragePolicy(String familyName, String policyName) {
223 CommonFSUtils.setStoragePolicy(this.fs, getStoreDir(familyName), policyName);
227 * Get the storage policy of the directory of CF.
228 * @param familyName The name of column family.
229 * @return Storage policy name, or {@code null} if not using {@link HFileSystem} or exception
230 * thrown when trying to get policy
232 @Nullable
233 public String getStoragePolicyName(String familyName) {
234 if (this.fs instanceof HFileSystem) {
235 Path storeDir = getStoreDir(familyName);
236 return ((HFileSystem) this.fs).getStoragePolicyName(storeDir);
239 return null;
243 * Returns the store files available for the family.
244 * This methods performs the filtering based on the valid store files.
245 * @param familyName Column Family Name
246 * @return a set of {@link StoreFileInfo} for the specified family.
248 public List<StoreFileInfo> getStoreFiles(final String familyName) throws IOException {
249 return getStoreFiles(familyName, true);
253 * Returns the store files available for the family.
254 * This methods performs the filtering based on the valid store files.
255 * @param familyName Column Family Name
256 * @return a set of {@link StoreFileInfo} for the specified family.
258 public List<StoreFileInfo> getStoreFiles(final String familyName, final boolean validate)
259 throws IOException {
260 Path familyDir = getStoreDir(familyName);
261 FileStatus[] files = CommonFSUtils.listStatus(this.fs, familyDir);
262 if (files == null) {
263 if (LOG.isTraceEnabled()) {
264 LOG.trace("No StoreFiles for: " + familyDir);
266 return null;
269 ArrayList<StoreFileInfo> storeFiles = new ArrayList<>(files.length);
270 for (FileStatus status: files) {
271 if (validate && !StoreFileInfo.isValid(status)) {
272 // recovered.hfiles directory is expected inside CF path when hbase.wal.split.to.hfile to
273 // true, refer HBASE-23740
274 if (!HConstants.RECOVERED_HFILES_DIR.equals(status.getPath().getName())) {
275 LOG.warn("Invalid StoreFile: {}", status.getPath());
277 continue;
279 StoreFileInfo info = ServerRegionReplicaUtil.getStoreFileInfo(conf, fs, regionInfo,
280 regionInfoForFs, familyName, status.getPath());
281 storeFiles.add(info);
284 return storeFiles;
288 * Returns the store files' LocatedFileStatus which available for the family.
289 * This methods performs the filtering based on the valid store files.
290 * @param familyName Column Family Name
291 * @return a list of store files' LocatedFileStatus for the specified family.
293 public static List<LocatedFileStatus> getStoreFilesLocatedStatus(
294 final HRegionFileSystem regionfs, final String familyName,
295 final boolean validate) throws IOException {
296 Path familyDir = regionfs.getStoreDir(familyName);
297 List<LocatedFileStatus> locatedFileStatuses = CommonFSUtils.listLocatedStatus(
298 regionfs.getFileSystem(), familyDir);
299 if (locatedFileStatuses == null) {
300 if (LOG.isTraceEnabled()) {
301 LOG.trace("No StoreFiles for: " + familyDir);
303 return null;
306 List<LocatedFileStatus> validStoreFiles = Lists.newArrayList();
307 for (LocatedFileStatus status : locatedFileStatuses) {
308 if (validate && !StoreFileInfo.isValid(status)) {
309 // recovered.hfiles directory is expected inside CF path when hbase.wal.split.to.hfile to
310 // true, refer HBASE-23740
311 if (!HConstants.RECOVERED_HFILES_DIR.equals(status.getPath().getName())) {
312 LOG.warn("Invalid StoreFile: {}", status.getPath());
314 } else {
315 validStoreFiles.add(status);
318 return validStoreFiles;
322 * Return Qualified Path of the specified family/file
324 * @param familyName Column Family Name
325 * @param fileName File Name
326 * @return The qualified Path for the specified family/file
328 Path getStoreFilePath(final String familyName, final String fileName) {
329 Path familyDir = getStoreDir(familyName);
330 return new Path(familyDir, fileName).makeQualified(fs.getUri(), fs.getWorkingDirectory());
334 * Return the store file information of the specified family/file.
336 * @param familyName Column Family Name
337 * @param fileName File Name
338 * @return The {@link StoreFileInfo} for the specified family/file
340 StoreFileInfo getStoreFileInfo(final String familyName, final String fileName)
341 throws IOException {
342 Path familyDir = getStoreDir(familyName);
343 return ServerRegionReplicaUtil.getStoreFileInfo(conf, fs, regionInfo,
344 regionInfoForFs, familyName, new Path(familyDir, fileName));
348 * Returns true if the specified family has reference files
349 * @param familyName Column Family Name
350 * @return true if family contains reference files
351 * @throws IOException
353 public boolean hasReferences(final String familyName) throws IOException {
354 Path storeDir = getStoreDir(familyName);
355 FileStatus[] files = CommonFSUtils.listStatus(fs, storeDir);
356 if (files != null) {
357 for(FileStatus stat: files) {
358 if(stat.isDirectory()) {
359 continue;
361 if (StoreFileInfo.isReference(stat.getPath())) {
362 LOG.trace("Reference {}", stat.getPath());
363 return true;
367 return false;
371 * Check whether region has Reference file
372 * @param htd table desciptor of the region
373 * @return true if region has reference file
374 * @throws IOException
376 public boolean hasReferences(final TableDescriptor htd) throws IOException {
377 for (ColumnFamilyDescriptor family : htd.getColumnFamilies()) {
378 if (hasReferences(family.getNameAsString())) {
379 return true;
382 return false;
386 * @return the set of families present on disk
387 * @throws IOException
389 public Collection<String> getFamilies() throws IOException {
390 FileStatus[] fds =
391 CommonFSUtils.listStatus(fs, getRegionDir(), new FSUtils.FamilyDirFilter(fs));
392 if (fds == null) return null;
394 ArrayList<String> families = new ArrayList<>(fds.length);
395 for (FileStatus status : fds) {
396 families.add(status.getPath().getName());
399 return families;
403 * Remove the region family from disk, archiving the store files.
404 * @param familyName Column Family Name
405 * @throws IOException if an error occours during the archiving
407 public void deleteFamily(final String familyName) throws IOException {
408 // archive family store files
409 HFileArchiver.archiveFamily(fs, conf, regionInfoForFs, tableDir, Bytes.toBytes(familyName));
411 // delete the family folder
412 Path familyDir = getStoreDir(familyName);
413 if(fs.exists(familyDir) && !deleteDir(familyDir))
414 throw new IOException("Could not delete family " + familyName
415 + " from FileSystem for region " + regionInfoForFs.getRegionNameAsString() + "("
416 + regionInfoForFs.getEncodedName() + ")");
420 * Generate a unique file name, used by createTempName() and commitStoreFile()
421 * @param suffix extra information to append to the generated name
422 * @return Unique file name
424 private static String generateUniqueName(final String suffix) {
425 String name = UUID.randomUUID().toString().replaceAll("-", "");
426 if (suffix != null) name += suffix;
427 return name;
431 * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
432 * to get a safer file creation.
433 * <code>
434 * Path file = fs.createTempName();
435 * ...StoreFile.Writer(file)...
436 * fs.commitStoreFile("family", file);
437 * </code>
439 * @return Unique {@link Path} of the temporary file
441 public Path createTempName() {
442 return createTempName(null);
446 * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
447 * to get a safer file creation.
448 * <code>
449 * Path file = fs.createTempName();
450 * ...StoreFile.Writer(file)...
451 * fs.commitStoreFile("family", file);
452 * </code>
454 * @param suffix extra information to append to the generated name
455 * @return Unique {@link Path} of the temporary file
457 public Path createTempName(final String suffix) {
458 return new Path(getTempDir(), generateUniqueName(suffix));
462 * Move the file from a build/temp location to the main family store directory.
463 * @param familyName Family that will gain the file
464 * @param buildPath {@link Path} to the file to commit.
465 * @return The new {@link Path} of the committed file
466 * @throws IOException
468 public Path commitStoreFile(final String familyName, final Path buildPath) throws IOException {
469 Path dstPath = preCommitStoreFile(familyName, buildPath, -1, false);
470 return commitStoreFile(buildPath, dstPath);
474 * Generate the filename in the main family store directory for moving the file from a build/temp
475 * location.
476 * @param familyName Family that will gain the file
477 * @param buildPath {@link Path} to the file to commit.
478 * @param seqNum Sequence Number to append to the file name (less then 0 if no sequence number)
479 * @param generateNewName False if you want to keep the buildPath name
480 * @return The new {@link Path} of the to be committed file
481 * @throws IOException
483 private Path preCommitStoreFile(final String familyName, final Path buildPath,
484 final long seqNum, final boolean generateNewName) throws IOException {
485 Path storeDir = getStoreDir(familyName);
486 if(!fs.exists(storeDir) && !createDir(storeDir))
487 throw new IOException("Failed creating " + storeDir);
489 String name = buildPath.getName();
490 if (generateNewName) {
491 name = generateUniqueName((seqNum < 0) ? null : "_SeqId_" + seqNum + "_");
493 Path dstPath = new Path(storeDir, name);
494 if (!fs.exists(buildPath)) {
495 throw new FileNotFoundException(buildPath.toString());
497 if (LOG.isDebugEnabled()) {
498 LOG.debug("Committing " + buildPath + " as " + dstPath);
500 return dstPath;
504 * Moves file from staging dir to region dir
505 * @param buildPath {@link Path} to the file to commit.
506 * @param dstPath {@link Path} to the file under region dir
507 * @return The {@link Path} of the committed file
508 * @throws IOException
510 Path commitStoreFile(final Path buildPath, Path dstPath) throws IOException {
511 // buildPath exists, therefore not doing an exists() check.
512 if (!rename(buildPath, dstPath)) {
513 throw new IOException("Failed rename of " + buildPath + " to " + dstPath);
515 return dstPath;
519 * Archives the specified store file from the specified family.
520 * @param familyName Family that contains the store files
521 * @param filePath {@link Path} to the store file to remove
522 * @throws IOException if the archiving fails
524 public void removeStoreFile(final String familyName, final Path filePath)
525 throws IOException {
526 HFileArchiver.archiveStoreFile(this.conf, this.fs, this.regionInfoForFs,
527 this.tableDir, Bytes.toBytes(familyName), filePath);
531 * Closes and archives the specified store files from the specified family.
532 * @param familyName Family that contains the store files
533 * @param storeFiles set of store files to remove
534 * @throws IOException if the archiving fails
536 public void removeStoreFiles(String familyName, Collection<HStoreFile> storeFiles)
537 throws IOException {
538 HFileArchiver.archiveStoreFiles(this.conf, this.fs, this.regionInfoForFs,
539 this.tableDir, Bytes.toBytes(familyName), storeFiles);
543 * Bulk load: Add a specified store file to the specified family.
544 * If the source file is on the same different file-system is moved from the
545 * source location to the destination location, otherwise is copied over.
547 * @param familyName Family that will gain the file
548 * @param srcPath {@link Path} to the file to import
549 * @param seqNum Bulk Load sequence number
550 * @return The destination {@link Path} of the bulk loaded file
551 * @throws IOException
553 Pair<Path, Path> bulkLoadStoreFile(final String familyName, Path srcPath, long seqNum)
554 throws IOException {
555 // Copy the file if it's on another filesystem
556 FileSystem srcFs = srcPath.getFileSystem(conf);
557 srcPath = srcFs.resolvePath(srcPath);
558 FileSystem realSrcFs = srcPath.getFileSystem(conf);
559 FileSystem desFs = fs instanceof HFileSystem ? ((HFileSystem)fs).getBackingFs() : fs;
561 // We can't compare FileSystem instances as equals() includes UGI instance
562 // as part of the comparison and won't work when doing SecureBulkLoad
563 // TODO deal with viewFS
564 if (!FSUtils.isSameHdfs(conf, realSrcFs, desFs)) {
565 LOG.info("Bulk-load file " + srcPath + " is on different filesystem than " +
566 "the destination store. Copying file over to destination filesystem.");
567 Path tmpPath = createTempName();
568 FileUtil.copy(realSrcFs, srcPath, fs, tmpPath, false, conf);
569 LOG.info("Copied " + srcPath + " to temporary path on destination filesystem: " + tmpPath);
570 srcPath = tmpPath;
573 return new Pair<>(srcPath, preCommitStoreFile(familyName, srcPath, seqNum, true));
576 // ===========================================================================
577 // Splits Helpers
578 // ===========================================================================
580 public Path getSplitsDir(final RegionInfo hri) {
581 return new Path(getTableDir(), hri.getEncodedName());
585 * Remove daughter region
586 * @param regionInfo daughter {@link RegionInfo}
587 * @throws IOException
589 void cleanupDaughterRegion(final RegionInfo regionInfo) throws IOException {
590 Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName());
591 if (this.fs.exists(regionDir) && !deleteDir(regionDir)) {
592 throw new IOException("Failed delete of " + regionDir);
597 * Commit a daughter region, moving it from the split temporary directory
598 * to the proper location in the filesystem.
600 * @param regionInfo daughter {@link org.apache.hadoop.hbase.client.RegionInfo}
602 public Path commitDaughterRegion(final RegionInfo regionInfo, List<Path> allRegionFiles,
603 MasterProcedureEnv env) throws IOException {
604 Path regionDir = this.getSplitsDir(regionInfo);
605 if (fs.exists(regionDir)) {
606 // Write HRI to a file in case we need to recover hbase:meta
607 Path regionInfoFile = new Path(regionDir, REGION_INFO_FILE);
608 byte[] regionInfoContent = getRegionInfoFileContent(regionInfo);
609 writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
610 HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem(
611 env.getMasterConfiguration(), fs, getTableDir(), regionInfo, false);
612 insertRegionFilesIntoStoreTracker(allRegionFiles, env, regionFs);
614 return regionDir;
617 private void insertRegionFilesIntoStoreTracker(List<Path> allFiles, MasterProcedureEnv env,
618 HRegionFileSystem regionFs) throws IOException {
619 TableDescriptor tblDesc = env.getMasterServices().getTableDescriptors().
620 get(regionInfo.getTable());
621 //we need to map trackers per store
622 Map<String, StoreFileTracker> trackerMap = new HashMap<>();
623 //we need to map store files per store
624 Map<String, List<StoreFileInfo>> fileInfoMap = new HashMap<>();
625 for(Path file : allFiles) {
626 String familyName = file.getParent().getName();
627 trackerMap.computeIfAbsent(familyName, t -> StoreFileTrackerFactory.create(conf, tblDesc,
628 tblDesc.getColumnFamily(Bytes.toBytes(familyName)), regionFs));
629 fileInfoMap.computeIfAbsent(familyName, l -> new ArrayList<>());
630 List<StoreFileInfo> infos = fileInfoMap.get(familyName);
631 infos.add(new StoreFileInfo(conf, fs, file, true));
633 for(Map.Entry<String, StoreFileTracker> entry : trackerMap.entrySet()) {
634 entry.getValue().add(fileInfoMap.get(entry.getKey()));
639 * Creates region split daughter directories under the table dir. If the daughter regions already
640 * exist, for example, in the case of a recovery from a previous failed split procedure, this
641 * method deletes the given region dir recursively, then recreates it again.
643 public void createSplitsDir(RegionInfo daughterA, RegionInfo daughterB) throws IOException {
644 Path daughterADir = getSplitsDir(daughterA);
645 if (fs.exists(daughterADir) && !deleteDir(daughterADir)) {
646 throw new IOException("Failed deletion of " + daughterADir + " before creating them again.");
649 if (!createDir(daughterADir)) {
650 throw new IOException("Failed create of " + daughterADir);
652 Path daughterBDir = getSplitsDir(daughterB);
653 if (fs.exists(daughterBDir) && !deleteDir(daughterBDir)) {
654 throw new IOException("Failed deletion of " + daughterBDir + " before creating them again.");
657 if (!createDir(daughterBDir)) {
658 throw new IOException("Failed create of " + daughterBDir);
663 * Write out a split reference. Package local so it doesnt leak out of
664 * regionserver.
665 * @param hri {@link RegionInfo} of the destination
666 * @param familyName Column Family Name
667 * @param f File to split.
668 * @param splitRow Split Row
669 * @param top True if we are referring to the top half of the hfile.
670 * @param splitPolicy A split policy instance; be careful! May not be full populated; e.g. if
671 * this method is invoked on the Master side, then the RegionSplitPolicy will
672 * NOT have a reference to a Region.
673 * @return Path to created reference.
675 public Path splitStoreFile(RegionInfo hri, String familyName, HStoreFile f, byte[] splitRow,
676 boolean top, RegionSplitPolicy splitPolicy) throws IOException {
677 Path splitDir = new Path(getSplitsDir(hri), familyName);
678 // Add the referred-to regions name as a dot separated suffix.
679 // See REF_NAME_REGEX regex above. The referred-to regions name is
680 // up in the path of the passed in <code>f</code> -- parentdir is family,
681 // then the directory above is the region name.
682 String parentRegionName = regionInfoForFs.getEncodedName();
683 // Write reference with same file id only with the other region name as
684 // suffix and into the new region location (under same family).
685 Path p = new Path(splitDir, f.getPath().getName() + "." + parentRegionName);
686 if(fs.exists(p)){
687 LOG.warn("Found an already existing split file for {}. Assuming this is a recovery.", p);
688 return p;
690 boolean createLinkFile = false;
691 if (splitPolicy == null || !splitPolicy.skipStoreFileRangeCheck(familyName)) {
692 // Check whether the split row lies in the range of the store file
693 // If it is outside the range, return directly.
694 f.initReader();
695 try {
696 Cell splitKey = PrivateCellUtil.createFirstOnRow(splitRow);
697 Optional<Cell> lastKey = f.getLastKey();
698 Optional<Cell> firstKey = f.getFirstKey();
699 if (top) {
700 //check if larger than last key.
701 // If lastKey is null means storefile is empty.
702 if (!lastKey.isPresent()) {
703 return null;
705 if (f.getComparator().compare(splitKey, lastKey.get()) > 0) {
706 return null;
708 if (firstKey.isPresent() && f.getComparator().compare(splitKey, firstKey.get()) <= 0) {
709 LOG.debug("Will create HFileLink file for {}, top=true", f.getPath());
710 createLinkFile = true;
712 } else {
713 //check if smaller than first key
714 // If firstKey is null means storefile is empty.
715 if (!firstKey.isPresent()) {
716 return null;
718 if (f.getComparator().compare(splitKey, firstKey.get()) < 0) {
719 return null;
721 if (lastKey.isPresent() && f.getComparator().compare(splitKey, lastKey.get()) >= 0) {
722 LOG.debug("Will create HFileLink file for {}, top=false", f.getPath());
723 createLinkFile = true;
726 } finally {
727 f.closeStoreFile(f.getCacheConf() != null ? f.getCacheConf().shouldEvictOnClose() : true);
730 if (createLinkFile) {
731 // create HFileLink file instead of Reference file for child
732 String hfileName = f.getPath().getName();
733 TableName linkedTable = regionInfoForFs.getTable();
734 String linkedRegion = regionInfoForFs.getEncodedName();
735 try {
736 if (HFileLink.isHFileLink(hfileName)) {
737 Matcher m = LINK_NAME_PATTERN.matcher(hfileName);
738 if (!m.matches()) {
739 throw new IllegalArgumentException(hfileName + " is not a valid HFileLink name!");
741 linkedTable = TableName.valueOf(m.group(1), m.group(2));
742 linkedRegion = m.group(3);
743 hfileName = m.group(4);
745 // must create back reference here
746 HFileLink.create(conf, fs, splitDir, familyName, hri.getTable().getNameAsString(),
747 hri.getEncodedName(), linkedTable, linkedRegion, hfileName, true);
748 Path path =
749 new Path(splitDir, HFileLink.createHFileLinkName(linkedTable, linkedRegion, hfileName));
750 LOG.info("Created linkFile:" + path.toString() + " for child: " + hri.getEncodedName()
751 + ", parent: " + regionInfoForFs.getEncodedName());
752 return path;
753 } catch (IOException e) {
754 // if create HFileLink file failed, then just skip the error and create Reference file
755 LOG.error("Create link file for " + hfileName + " for child " + hri.getEncodedName()
756 + "failed, will create Reference file", e);
759 // A reference to the bottom half of the hsf store file.
760 Reference r =
761 top ? Reference.createTopReference(splitRow): Reference.createBottomReference(splitRow);
762 return r.write(fs, p);
765 // ===========================================================================
766 // Merge Helpers
767 // ===========================================================================
769 Path getMergesDir(final RegionInfo hri) {
770 return new Path(getTableDir(), hri.getEncodedName());
775 * Remove merged region
776 * @param mergedRegion {@link RegionInfo}
777 * @throws IOException
779 public void cleanupMergedRegion(final RegionInfo mergedRegion) throws IOException {
780 Path regionDir = new Path(this.tableDir, mergedRegion.getEncodedName());
781 if (this.fs.exists(regionDir) && !this.fs.delete(regionDir, true)) {
782 throw new IOException("Failed delete of " + regionDir);
786 static boolean mkdirs(FileSystem fs, Configuration conf, Path dir) throws IOException {
787 if (FSUtils.isDistributedFileSystem(fs) ||
788 !conf.getBoolean(HConstants.ENABLE_DATA_FILE_UMASK, false)) {
789 return fs.mkdirs(dir);
791 FsPermission perms = CommonFSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY);
792 return fs.mkdirs(dir, perms);
796 * Write out a merge reference under the given merges directory.
797 * @param mergingRegion {@link RegionInfo} for one of the regions being merged.
798 * @param familyName Column Family Name
799 * @param f File to create reference.
800 * @return Path to created reference.
801 * @throws IOException if the merge write fails.
803 public Path mergeStoreFile(RegionInfo mergingRegion, String familyName, HStoreFile f)
804 throws IOException {
805 Path referenceDir = new Path(getMergesDir(regionInfoForFs), familyName);
806 // A whole reference to the store file.
807 Reference r = Reference.createTopReference(mergingRegion.getStartKey());
808 // Add the referred-to regions name as a dot separated suffix.
809 // See REF_NAME_REGEX regex above. The referred-to regions name is
810 // up in the path of the passed in <code>f</code> -- parentdir is family,
811 // then the directory above is the region name.
812 String mergingRegionName = mergingRegion.getEncodedName();
813 // Write reference with same file id only with the other region name as
814 // suffix and into the new region location (under same family).
815 Path p = new Path(referenceDir, f.getPath().getName() + "."
816 + mergingRegionName);
817 return r.write(fs, p);
821 * Commit a merged region, making it ready for use.
822 * @throws IOException
824 public void commitMergedRegion(List<Path> allMergedFiles, MasterProcedureEnv env)
825 throws IOException {
826 Path regionDir = getMergesDir(regionInfoForFs);
827 if (regionDir != null && fs.exists(regionDir)) {
828 // Write HRI to a file in case we need to recover hbase:meta
829 Path regionInfoFile = new Path(regionDir, REGION_INFO_FILE);
830 byte[] regionInfoContent = getRegionInfoFileContent(regionInfo);
831 writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
832 insertRegionFilesIntoStoreTracker(allMergedFiles, env, this);
836 // ===========================================================================
837 // Create/Open/Delete Helpers
838 // ===========================================================================
841 * @param hri
842 * @return Content of the file we write out to the filesystem under a region
843 * @throws IOException
845 private static byte[] getRegionInfoFileContent(final RegionInfo hri) throws IOException {
846 return RegionInfo.toDelimitedByteArray(hri);
850 * Create a {@link RegionInfo} from the serialized version on-disk.
851 * @param fs {@link FileSystem} that contains the Region Info file
852 * @param regionDir {@link Path} to the Region Directory that contains the Info file
853 * @return An {@link RegionInfo} instance gotten from the Region Info file.
854 * @throws IOException if an error occurred during file open/read operation.
856 public static RegionInfo loadRegionInfoFileContent(final FileSystem fs, final Path regionDir)
857 throws IOException {
858 FSDataInputStream in = fs.open(new Path(regionDir, REGION_INFO_FILE));
859 try {
860 return RegionInfo.parseFrom(in);
861 } finally {
862 in.close();
867 * Write the .regioninfo file on-disk.
868 * <p/>
869 * Overwrites if exists already.
871 private static void writeRegionInfoFileContent(final Configuration conf, final FileSystem fs,
872 final Path regionInfoFile, final byte[] content) throws IOException {
873 // First check to get the permissions
874 FsPermission perms = CommonFSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY);
875 // Write the RegionInfo file content
876 try (FSDataOutputStream out = FSUtils.create(conf, fs, regionInfoFile, perms, null)) {
877 out.write(content);
882 * Write out an info file under the stored region directory. Useful recovering mangled regions.
883 * If the regionInfo already exists on-disk, then we fast exit.
885 void checkRegionInfoOnFilesystem() throws IOException {
886 // Compose the content of the file so we can compare to length in filesystem. If not same,
887 // rewrite it (it may have been written in the old format using Writables instead of pb). The
888 // pb version is much shorter -- we write now w/o the toString version -- so checking length
889 // only should be sufficient. I don't want to read the file every time to check if it pb
890 // serialized.
891 byte[] content = getRegionInfoFileContent(regionInfoForFs);
893 // Verify if the region directory exists before opening a region. We need to do this since if
894 // the region directory doesn't exist we will re-create the region directory and a new HRI
895 // when HRegion.openHRegion() is called.
896 try {
897 FileStatus status = fs.getFileStatus(getRegionDir());
898 } catch (FileNotFoundException e) {
899 LOG.warn(getRegionDir() + " doesn't exist for region: " + regionInfoForFs.getEncodedName() +
900 " on table " + regionInfo.getTable());
903 try {
904 Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
905 FileStatus status = fs.getFileStatus(regionInfoFile);
906 if (status != null && status.getLen() == content.length) {
907 // Then assume the content good and move on.
908 // NOTE: that the length is not sufficient to define the the content matches.
909 return;
912 LOG.info("Rewriting .regioninfo file at: " + regionInfoFile);
913 if (!fs.delete(regionInfoFile, false)) {
914 throw new IOException("Unable to remove existing " + regionInfoFile);
916 } catch (FileNotFoundException e) {
917 LOG.warn(REGION_INFO_FILE + " file not found for region: " + regionInfoForFs.getEncodedName() +
918 " on table " + regionInfo.getTable());
921 // Write HRI to a file in case we need to recover hbase:meta
922 writeRegionInfoOnFilesystem(content, true);
926 * Write out an info file under the region directory. Useful recovering mangled regions.
927 * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
929 private void writeRegionInfoOnFilesystem(boolean useTempDir) throws IOException {
930 byte[] content = getRegionInfoFileContent(regionInfoForFs);
931 writeRegionInfoOnFilesystem(content, useTempDir);
935 * Write out an info file under the region directory. Useful recovering mangled regions.
936 * @param regionInfoContent serialized version of the {@link RegionInfo}
937 * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
939 private void writeRegionInfoOnFilesystem(final byte[] regionInfoContent,
940 final boolean useTempDir) throws IOException {
941 Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
942 if (useTempDir) {
943 // Create in tmpDir and then move into place in case we crash after
944 // create but before close. If we don't successfully close the file,
945 // subsequent region reopens will fail the below because create is
946 // registered in NN.
948 // And then create the file
949 Path tmpPath = new Path(getTempDir(), REGION_INFO_FILE);
951 // If datanode crashes or if the RS goes down just before the close is called while trying to
952 // close the created regioninfo file in the .tmp directory then on next
953 // creation we will be getting AlreadyCreatedException.
954 // Hence delete and create the file if exists.
955 if (CommonFSUtils.isExists(fs, tmpPath)) {
956 CommonFSUtils.delete(fs, tmpPath, true);
959 // Write HRI to a file in case we need to recover hbase:meta
960 writeRegionInfoFileContent(conf, fs, tmpPath, regionInfoContent);
962 // Move the created file to the original path
963 if (fs.exists(tmpPath) && !rename(tmpPath, regionInfoFile)) {
964 throw new IOException("Unable to rename " + tmpPath + " to " + regionInfoFile);
966 } else {
967 // Write HRI to a file in case we need to recover hbase:meta
968 writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
973 * Create a new Region on file-system.
974 * @param conf the {@link Configuration} to use
975 * @param fs {@link FileSystem} from which to add the region
976 * @param tableDir {@link Path} to where the table is being stored
977 * @param regionInfo {@link RegionInfo} for region to be added
978 * @throws IOException if the region creation fails due to a FileSystem exception.
980 public static HRegionFileSystem createRegionOnFileSystem(final Configuration conf,
981 final FileSystem fs, final Path tableDir, final RegionInfo regionInfo) throws IOException {
982 HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
984 // We only create a .regioninfo and the region directory if this is the default region replica
985 if (regionInfo.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
986 Path regionDir = regionFs.getRegionDir();
987 if (fs.exists(regionDir)) {
988 LOG.warn("Trying to create a region that already exists on disk: " + regionDir);
989 } else {
990 // Create the region directory
991 if (!createDirOnFileSystem(fs, conf, regionDir)) {
992 LOG.warn("Unable to create the region directory: " + regionDir);
993 throw new IOException("Unable to create region directory: " + regionDir);
997 // Write HRI to a file in case we need to recover hbase:meta
998 regionFs.writeRegionInfoOnFilesystem(false);
999 } else {
1000 if (LOG.isDebugEnabled())
1001 LOG.debug("Skipping creation of .regioninfo file for " + regionInfo);
1003 return regionFs;
1007 * Open Region from file-system.
1008 * @param conf the {@link Configuration} to use
1009 * @param fs {@link FileSystem} from which to add the region
1010 * @param tableDir {@link Path} to where the table is being stored
1011 * @param regionInfo {@link RegionInfo} for region to be added
1012 * @param readOnly True if you don't want to edit the region data
1013 * @throws IOException if the region creation fails due to a FileSystem exception.
1015 public static HRegionFileSystem openRegionFromFileSystem(final Configuration conf,
1016 final FileSystem fs, final Path tableDir, final RegionInfo regionInfo, boolean readOnly)
1017 throws IOException {
1018 HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
1019 Path regionDir = regionFs.getRegionDir();
1021 if (!fs.exists(regionDir)) {
1022 LOG.warn("Trying to open a region that do not exists on disk: " + regionDir);
1023 throw new IOException("The specified region do not exists on disk: " + regionDir);
1026 if (!readOnly) {
1027 // Cleanup temporary directories
1028 regionFs.cleanupTempDir();
1030 // If it doesn't exists, Write HRI to a file, in case we need to recover hbase:meta
1031 // Only create HRI if we are the default replica
1032 if (regionInfo.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
1033 regionFs.checkRegionInfoOnFilesystem();
1034 } else {
1035 if (LOG.isDebugEnabled()) {
1036 LOG.debug("Skipping creation of .regioninfo file for " + regionInfo);
1041 return regionFs;
1045 * Remove the region from the table directory, archiving the region's hfiles.
1046 * @param conf the {@link Configuration} to use
1047 * @param fs {@link FileSystem} from which to remove the region
1048 * @param tableDir {@link Path} to where the table is being stored
1049 * @param regionInfo {@link RegionInfo} for region to be deleted
1050 * @throws IOException if the request cannot be completed
1052 public static void deleteRegionFromFileSystem(final Configuration conf,
1053 final FileSystem fs, final Path tableDir, final RegionInfo regionInfo) throws IOException {
1054 HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
1055 Path regionDir = regionFs.getRegionDir();
1057 if (!fs.exists(regionDir)) {
1058 LOG.warn("Trying to delete a region that do not exists on disk: " + regionDir);
1059 return;
1062 if (LOG.isDebugEnabled()) {
1063 LOG.debug("DELETING region " + regionDir);
1066 // Archive region
1067 Path rootDir = CommonFSUtils.getRootDir(conf);
1068 HFileArchiver.archiveRegion(fs, rootDir, tableDir, regionDir);
1070 // Delete empty region dir
1071 if (!fs.delete(regionDir, true)) {
1072 LOG.warn("Failed delete of " + regionDir);
1077 * Creates a directory. Assumes the user has already checked for this directory existence.
1078 * @param dir
1079 * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
1080 * whether the directory exists or not, and returns true if it exists.
1081 * @throws IOException
1083 boolean createDir(Path dir) throws IOException {
1084 int i = 0;
1085 IOException lastIOE = null;
1086 do {
1087 try {
1088 return mkdirs(fs, conf, dir);
1089 } catch (IOException ioe) {
1090 lastIOE = ioe;
1091 if (fs.exists(dir)) return true; // directory is present
1092 try {
1093 sleepBeforeRetry("Create Directory", i+1);
1094 } catch (InterruptedException e) {
1095 throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1098 } while (++i <= hdfsClientRetriesNumber);
1099 throw new IOException("Exception in createDir", lastIOE);
1103 * Renames a directory. Assumes the user has already checked for this directory existence.
1104 * @param srcpath
1105 * @param dstPath
1106 * @return true if rename is successful.
1107 * @throws IOException
1109 boolean rename(Path srcpath, Path dstPath) throws IOException {
1110 IOException lastIOE = null;
1111 int i = 0;
1112 do {
1113 try {
1114 return fs.rename(srcpath, dstPath);
1115 } catch (IOException ioe) {
1116 lastIOE = ioe;
1117 if (!fs.exists(srcpath) && fs.exists(dstPath)) return true; // successful move
1118 // dir is not there, retry after some time.
1119 try {
1120 sleepBeforeRetry("Rename Directory", i+1);
1121 } catch (InterruptedException e) {
1122 throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1125 } while (++i <= hdfsClientRetriesNumber);
1127 throw new IOException("Exception in rename", lastIOE);
1131 * Deletes a directory. Assumes the user has already checked for this directory existence.
1132 * @param dir
1133 * @return true if the directory is deleted.
1134 * @throws IOException
1136 boolean deleteDir(Path dir) throws IOException {
1137 IOException lastIOE = null;
1138 int i = 0;
1139 do {
1140 try {
1141 return fs.delete(dir, true);
1142 } catch (IOException ioe) {
1143 lastIOE = ioe;
1144 if (!fs.exists(dir)) return true;
1145 // dir is there, retry deleting after some time.
1146 try {
1147 sleepBeforeRetry("Delete Directory", i+1);
1148 } catch (InterruptedException e) {
1149 throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1152 } while (++i <= hdfsClientRetriesNumber);
1154 throw new IOException("Exception in DeleteDir", lastIOE);
1158 * sleeping logic; handles the interrupt exception.
1160 private void sleepBeforeRetry(String msg, int sleepMultiplier) throws InterruptedException {
1161 sleepBeforeRetry(msg, sleepMultiplier, baseSleepBeforeRetries, hdfsClientRetriesNumber);
1165 * Creates a directory for a filesystem and configuration object. Assumes the user has already
1166 * checked for this directory existence.
1167 * @param fs
1168 * @param conf
1169 * @param dir
1170 * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
1171 * whether the directory exists or not, and returns true if it exists.
1172 * @throws IOException
1174 private static boolean createDirOnFileSystem(FileSystem fs, Configuration conf, Path dir)
1175 throws IOException {
1176 int i = 0;
1177 IOException lastIOE = null;
1178 int hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number",
1179 DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
1180 int baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries",
1181 DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
1182 do {
1183 try {
1184 return fs.mkdirs(dir);
1185 } catch (IOException ioe) {
1186 lastIOE = ioe;
1187 if (fs.exists(dir)) return true; // directory is present
1188 try {
1189 sleepBeforeRetry("Create Directory", i+1, baseSleepBeforeRetries, hdfsClientRetriesNumber);
1190 } catch (InterruptedException e) {
1191 throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1194 } while (++i <= hdfsClientRetriesNumber);
1196 throw new IOException("Exception in createDir", lastIOE);
1200 * sleeping logic for static methods; handles the interrupt exception. Keeping a static version
1201 * for this to avoid re-looking for the integer values.
1203 private static void sleepBeforeRetry(String msg, int sleepMultiplier, int baseSleepBeforeRetries,
1204 int hdfsClientRetriesNumber) throws InterruptedException {
1205 if (sleepMultiplier > hdfsClientRetriesNumber) {
1206 if (LOG.isDebugEnabled()) {
1207 LOG.debug(msg + ", retries exhausted");
1209 return;
1211 if (LOG.isDebugEnabled()) {
1212 LOG.debug(msg + ", sleeping " + baseSleepBeforeRetries + " times " + sleepMultiplier);
1214 Thread.sleep((long)baseSleepBeforeRetries * sleepMultiplier);