2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
19 package org
.apache
.hadoop
.hbase
.client
;
21 import java
.io
.IOException
;
22 import java
.util
.ArrayList
;
23 import java
.util
.List
;
24 import java
.util
.UUID
;
25 import org
.apache
.hadoop
.conf
.Configuration
;
26 import org
.apache
.hadoop
.fs
.FileSystem
;
27 import org
.apache
.hadoop
.fs
.Path
;
28 import org
.apache
.hadoop
.hbase
.PrivateCellUtil
;
29 import org
.apache
.hadoop
.hbase
.snapshot
.RestoreSnapshotHelper
;
30 import org
.apache
.hadoop
.hbase
.snapshot
.SnapshotDescriptionUtils
;
31 import org
.apache
.hadoop
.hbase
.snapshot
.SnapshotManifest
;
32 import org
.apache
.hadoop
.hbase
.util
.CommonFSUtils
;
33 import org
.apache
.yetus
.audience
.InterfaceAudience
;
34 import org
.slf4j
.Logger
;
35 import org
.slf4j
.LoggerFactory
;
37 import org
.apache
.hadoop
.hbase
.shaded
.protobuf
.ProtobufUtil
;
38 import org
.apache
.hadoop
.hbase
.shaded
.protobuf
.generated
.SnapshotProtos
;
39 import org
.apache
.hadoop
.hbase
.shaded
.protobuf
.generated
.SnapshotProtos
.SnapshotRegionManifest
;
42 * A Scanner which performs a scan over snapshot files. Using this class requires copying the
43 * snapshot to a temporary empty directory, which will copy the snapshot reference files into that
44 * directory. Actual data files are not copied.
47 * This also allows one to run the scan from an
48 * online or offline hbase cluster. The snapshot files can be exported by using the
49 * org.apache.hadoop.hbase.snapshot.ExportSnapshot tool,
50 * to a pure-hdfs cluster, and this scanner can be used to
51 * run the scan directly over the snapshot files. The snapshot should not be deleted while there
52 * are open scanners reading from snapshot files.
55 * An internal RegionScanner is used to execute the {@link Scan} obtained
56 * from the user for each region in the snapshot.
58 * HBase owns all the data and snapshot files on the filesystem. Only the HBase user can read from
59 * snapshot files and data files. HBase also enforces security because all the requests are handled
60 * by the server layer, and the user cannot read from the data files directly. To read from snapshot
61 * files directly from the file system, the user who is running the MR job must have sufficient
62 * permissions to access snapshot and reference files. This means that to run mapreduce over
63 * snapshot files, the job has to be run as the HBase user or the user must have group or other
64 * priviledges in the filesystem (See HBASE-8369). Note that, given other users access to read from
65 * snapshot/data files will completely circumvent the access control enforced by HBase.
66 * See org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat.
68 @InterfaceAudience.Private
69 public class TableSnapshotScanner
extends AbstractClientScanner
{
71 private static final Logger LOG
= LoggerFactory
.getLogger(TableSnapshotScanner
.class);
73 private Configuration conf
;
74 private String snapshotName
;
75 private FileSystem fs
;
77 private Path restoreDir
;
79 private ArrayList
<RegionInfo
> regions
;
80 private TableDescriptor htd
;
81 private final boolean snapshotAlreadyRestored
;
83 private ClientSideRegionScanner currentRegionScanner
= null;
84 private int currentRegion
= -1;
86 private int numOfCompleteRows
= 0;
88 * Creates a TableSnapshotScanner.
89 * @param conf the configuration
90 * @param restoreDir a temporary directory to copy the snapshot files into. Current user should
91 * have write permissions to this directory, and this should not be a subdirectory of
92 * rootDir. The scanner deletes the contents of the directory once the scanner is closed.
93 * @param snapshotName the name of the snapshot to read from
94 * @param scan a Scan representing scan parameters
95 * @throws IOException in case of error
97 public TableSnapshotScanner(Configuration conf
, Path restoreDir
, String snapshotName
, Scan scan
)
99 this(conf
, CommonFSUtils
.getRootDir(conf
), restoreDir
, snapshotName
, scan
);
102 public TableSnapshotScanner(Configuration conf
, Path rootDir
, Path restoreDir
,
103 String snapshotName
, Scan scan
) throws IOException
{
104 this(conf
, rootDir
, restoreDir
, snapshotName
, scan
, false);
108 * Creates a TableSnapshotScanner.
109 * @param conf the configuration
110 * @param rootDir root directory for HBase.
111 * @param restoreDir a temporary directory to copy the snapshot files into. Current user should
112 * have write permissions to this directory, and this should not be a subdirectory of
113 * rootdir. The scanner deletes the contents of the directory once the scanner is closed.
114 * @param snapshotName the name of the snapshot to read from
115 * @param scan a Scan representing scan parameters
116 * @param snapshotAlreadyRestored true to indicate that snapshot has been restored.
117 * @throws IOException in case of error
119 public TableSnapshotScanner(Configuration conf
, Path rootDir
, Path restoreDir
,
120 String snapshotName
, Scan scan
, boolean snapshotAlreadyRestored
) throws IOException
{
122 this.snapshotName
= snapshotName
;
123 this.rootDir
= rootDir
;
125 this.snapshotAlreadyRestored
= snapshotAlreadyRestored
;
126 this.fs
= rootDir
.getFileSystem(conf
);
128 if (snapshotAlreadyRestored
) {
129 this.restoreDir
= restoreDir
;
130 openWithoutRestoringSnapshot();
132 // restoreDir will be deleted in close(), use a unique sub directory
133 this.restoreDir
= new Path(restoreDir
, UUID
.randomUUID().toString());
134 openWithRestoringSnapshot();
137 initScanMetrics(scan
);
140 private void openWithoutRestoringSnapshot() throws IOException
{
141 Path snapshotDir
= SnapshotDescriptionUtils
.getCompletedSnapshotDir(snapshotName
, rootDir
);
142 SnapshotProtos
.SnapshotDescription snapshotDesc
=
143 SnapshotDescriptionUtils
.readSnapshotInfo(fs
, snapshotDir
);
145 SnapshotManifest manifest
= SnapshotManifest
.open(conf
, fs
, snapshotDir
, snapshotDesc
);
146 List
<SnapshotRegionManifest
> regionManifests
= manifest
.getRegionManifests();
147 if (regionManifests
== null) {
148 throw new IllegalArgumentException("Snapshot seems empty, snapshotName: " + snapshotName
);
151 regions
= new ArrayList
<>(regionManifests
.size());
152 regionManifests
.stream().map(r
-> ProtobufUtil
.toRegionInfo(r
.getRegionInfo()))
153 .filter(this::isValidRegion
).sorted().forEach(r
-> regions
.add(r
));
154 htd
= manifest
.getTableDescriptor();
157 private boolean isValidRegion(RegionInfo hri
) {
158 // An offline split parent region should be excluded.
159 if (hri
.isOffline() && (hri
.isSplit() || hri
.isSplitParent())) {
162 return PrivateCellUtil
.overlappingKeys(scan
.getStartRow(), scan
.getStopRow(), hri
.getStartKey(),
166 private void openWithRestoringSnapshot() throws IOException
{
167 final RestoreSnapshotHelper
.RestoreMetaChanges meta
=
168 RestoreSnapshotHelper
.copySnapshotForScanner(conf
, fs
, rootDir
, restoreDir
, snapshotName
);
169 final List
<RegionInfo
> restoredRegions
= meta
.getRegionsToAdd();
171 htd
= meta
.getTableDescriptor();
172 regions
= new ArrayList
<>(restoredRegions
.size());
173 restoredRegions
.stream().filter(this::isValidRegion
).sorted().forEach(r
-> regions
.add(r
));
177 public Result
next() throws IOException
{
178 Result result
= null;
180 if (currentRegionScanner
== null) {
182 if (currentRegion
>= regions
.size()) {
186 RegionInfo hri
= regions
.get(currentRegion
);
187 currentRegionScanner
= new ClientSideRegionScanner(conf
, fs
,
188 restoreDir
, htd
, hri
, scan
, scanMetrics
);
189 if (this.scanMetrics
!= null) {
190 this.scanMetrics
.countOfRegions
.incrementAndGet();
195 result
= currentRegionScanner
.next();
196 if (result
!= null) {
197 if (scan
.getLimit() > 0 && ++this.numOfCompleteRows
> scan
.getLimit()) {
203 if (result
== null) {
204 currentRegionScanner
.close();
205 currentRegionScanner
= null;
211 private void cleanup() {
213 if (fs
.exists(this.restoreDir
)) {
214 if (!fs
.delete(this.restoreDir
, true)) {
216 "Delete restore directory for the snapshot failed. restoreDir: " + this.restoreDir
);
219 } catch (IOException ex
) {
221 "Could not delete restore directory for the snapshot. restoreDir: " + this.restoreDir
, ex
);
226 public void close() {
227 if (currentRegionScanner
!= null) {
228 currentRegionScanner
.close();
230 // if snapshotAlreadyRestored is true, then we should invoke cleanup() method by hand.
231 if (!this.snapshotAlreadyRestored
) {
237 public boolean renewLease() {
238 throw new UnsupportedOperationException();