HBASE-26921 Rewrite the counting cells part in TestMultiVersions (#4316)
[hbase.git] / hbase-server / src / main / java / org / apache / hadoop / hbase / client / TableSnapshotScanner.java
blobdd03ab26675d4ad0bc03395ce6662dbd19cb81b8
1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
19 package org.apache.hadoop.hbase.client;
21 import java.io.IOException;
22 import java.util.ArrayList;
23 import java.util.List;
24 import java.util.UUID;
25 import org.apache.hadoop.conf.Configuration;
26 import org.apache.hadoop.fs.FileSystem;
27 import org.apache.hadoop.fs.Path;
28 import org.apache.hadoop.hbase.PrivateCellUtil;
29 import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
30 import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
31 import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
32 import org.apache.hadoop.hbase.util.CommonFSUtils;
33 import org.apache.yetus.audience.InterfaceAudience;
34 import org.slf4j.Logger;
35 import org.slf4j.LoggerFactory;
37 import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
38 import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;
39 import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
41 /**
42 * A Scanner which performs a scan over snapshot files. Using this class requires copying the
43 * snapshot to a temporary empty directory, which will copy the snapshot reference files into that
44 * directory. Actual data files are not copied.
46 * <p>
47 * This also allows one to run the scan from an
48 * online or offline hbase cluster. The snapshot files can be exported by using the
49 * org.apache.hadoop.hbase.snapshot.ExportSnapshot tool,
50 * to a pure-hdfs cluster, and this scanner can be used to
51 * run the scan directly over the snapshot files. The snapshot should not be deleted while there
52 * are open scanners reading from snapshot files.
54 * <p>
55 * An internal RegionScanner is used to execute the {@link Scan} obtained
56 * from the user for each region in the snapshot.
57 * <p>
58 * HBase owns all the data and snapshot files on the filesystem. Only the HBase user can read from
59 * snapshot files and data files. HBase also enforces security because all the requests are handled
60 * by the server layer, and the user cannot read from the data files directly. To read from snapshot
61 * files directly from the file system, the user who is running the MR job must have sufficient
62 * permissions to access snapshot and reference files. This means that to run mapreduce over
63 * snapshot files, the job has to be run as the HBase user or the user must have group or other
64 * priviledges in the filesystem (See HBASE-8369). Note that, given other users access to read from
65 * snapshot/data files will completely circumvent the access control enforced by HBase.
66 * See org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat.
68 @InterfaceAudience.Private
69 public class TableSnapshotScanner extends AbstractClientScanner {
71 private static final Logger LOG = LoggerFactory.getLogger(TableSnapshotScanner.class);
73 private Configuration conf;
74 private String snapshotName;
75 private FileSystem fs;
76 private Path rootDir;
77 private Path restoreDir;
78 private Scan scan;
79 private ArrayList<RegionInfo> regions;
80 private TableDescriptor htd;
81 private final boolean snapshotAlreadyRestored;
83 private ClientSideRegionScanner currentRegionScanner = null;
84 private int currentRegion = -1;
86 private int numOfCompleteRows = 0;
87 /**
88 * Creates a TableSnapshotScanner.
89 * @param conf the configuration
90 * @param restoreDir a temporary directory to copy the snapshot files into. Current user should
91 * have write permissions to this directory, and this should not be a subdirectory of
92 * rootDir. The scanner deletes the contents of the directory once the scanner is closed.
93 * @param snapshotName the name of the snapshot to read from
94 * @param scan a Scan representing scan parameters
95 * @throws IOException in case of error
97 public TableSnapshotScanner(Configuration conf, Path restoreDir, String snapshotName, Scan scan)
98 throws IOException {
99 this(conf, CommonFSUtils.getRootDir(conf), restoreDir, snapshotName, scan);
102 public TableSnapshotScanner(Configuration conf, Path rootDir, Path restoreDir,
103 String snapshotName, Scan scan) throws IOException {
104 this(conf, rootDir, restoreDir, snapshotName, scan, false);
108 * Creates a TableSnapshotScanner.
109 * @param conf the configuration
110 * @param rootDir root directory for HBase.
111 * @param restoreDir a temporary directory to copy the snapshot files into. Current user should
112 * have write permissions to this directory, and this should not be a subdirectory of
113 * rootdir. The scanner deletes the contents of the directory once the scanner is closed.
114 * @param snapshotName the name of the snapshot to read from
115 * @param scan a Scan representing scan parameters
116 * @param snapshotAlreadyRestored true to indicate that snapshot has been restored.
117 * @throws IOException in case of error
119 public TableSnapshotScanner(Configuration conf, Path rootDir, Path restoreDir,
120 String snapshotName, Scan scan, boolean snapshotAlreadyRestored) throws IOException {
121 this.conf = conf;
122 this.snapshotName = snapshotName;
123 this.rootDir = rootDir;
124 this.scan = scan;
125 this.snapshotAlreadyRestored = snapshotAlreadyRestored;
126 this.fs = rootDir.getFileSystem(conf);
128 if (snapshotAlreadyRestored) {
129 this.restoreDir = restoreDir;
130 openWithoutRestoringSnapshot();
131 } else {
132 // restoreDir will be deleted in close(), use a unique sub directory
133 this.restoreDir = new Path(restoreDir, UUID.randomUUID().toString());
134 openWithRestoringSnapshot();
137 initScanMetrics(scan);
140 private void openWithoutRestoringSnapshot() throws IOException {
141 Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
142 SnapshotProtos.SnapshotDescription snapshotDesc =
143 SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
145 SnapshotManifest manifest = SnapshotManifest.open(conf, fs, snapshotDir, snapshotDesc);
146 List<SnapshotRegionManifest> regionManifests = manifest.getRegionManifests();
147 if (regionManifests == null) {
148 throw new IllegalArgumentException("Snapshot seems empty, snapshotName: " + snapshotName);
151 regions = new ArrayList<>(regionManifests.size());
152 regionManifests.stream().map(r -> ProtobufUtil.toRegionInfo(r.getRegionInfo()))
153 .filter(this::isValidRegion).sorted().forEach(r -> regions.add(r));
154 htd = manifest.getTableDescriptor();
157 private boolean isValidRegion(RegionInfo hri) {
158 // An offline split parent region should be excluded.
159 if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) {
160 return false;
162 return PrivateCellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(), hri.getStartKey(),
163 hri.getEndKey());
166 private void openWithRestoringSnapshot() throws IOException {
167 final RestoreSnapshotHelper.RestoreMetaChanges meta =
168 RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName);
169 final List<RegionInfo> restoredRegions = meta.getRegionsToAdd();
171 htd = meta.getTableDescriptor();
172 regions = new ArrayList<>(restoredRegions.size());
173 restoredRegions.stream().filter(this::isValidRegion).sorted().forEach(r -> regions.add(r));
176 @Override
177 public Result next() throws IOException {
178 Result result = null;
179 while (true) {
180 if (currentRegionScanner == null) {
181 currentRegion++;
182 if (currentRegion >= regions.size()) {
183 return null;
186 RegionInfo hri = regions.get(currentRegion);
187 currentRegionScanner = new ClientSideRegionScanner(conf, fs,
188 restoreDir, htd, hri, scan, scanMetrics);
189 if (this.scanMetrics != null) {
190 this.scanMetrics.countOfRegions.incrementAndGet();
194 try {
195 result = currentRegionScanner.next();
196 if (result != null) {
197 if (scan.getLimit() > 0 && ++this.numOfCompleteRows > scan.getLimit()) {
198 result = null;
200 return result;
202 } finally {
203 if (result == null) {
204 currentRegionScanner.close();
205 currentRegionScanner = null;
211 private void cleanup() {
212 try {
213 if (fs.exists(this.restoreDir)) {
214 if (!fs.delete(this.restoreDir, true)) {
215 LOG.warn(
216 "Delete restore directory for the snapshot failed. restoreDir: " + this.restoreDir);
219 } catch (IOException ex) {
220 LOG.warn(
221 "Could not delete restore directory for the snapshot. restoreDir: " + this.restoreDir, ex);
225 @Override
226 public void close() {
227 if (currentRegionScanner != null) {
228 currentRegionScanner.close();
230 // if snapshotAlreadyRestored is true, then we should invoke cleanup() method by hand.
231 if (!this.snapshotAlreadyRestored) {
232 cleanup();
236 @Override
237 public boolean renewLease() {
238 throw new UnsupportedOperationException();