hbase-client/src/main/java/org/apache/hadoop/hbase/client/Scan.java

   1 /*
   2  *
   3  * Licensed to the Apache Software Foundation (ASF) under one
   4  * or more contributor license agreements.  See the NOTICE file
   5  * distributed with this work for additional information
   6  * regarding copyright ownership.  The ASF licenses this file
   7  * to you under the Apache License, Version 2.0 (the
   8  * "License"); you may not use this file except in compliance
   9  * with the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 package org.apache.hadoop.hbase.client;
  21
  22 import java.io.IOException;
  23 import java.util.ArrayList;
  24 import java.util.HashMap;
  25 import java.util.List;
  26 import java.util.Map;
  27 import java.util.NavigableSet;
  28 import java.util.TreeMap;
  29 import java.util.TreeSet;
  30 import org.apache.hadoop.hbase.HConstants;
  31 import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
  32 import org.apache.hadoop.hbase.filter.Filter;
  33 import org.apache.hadoop.hbase.filter.IncompatibleFilterException;
  34 import org.apache.hadoop.hbase.io.TimeRange;
  35 import org.apache.hadoop.hbase.security.access.Permission;
  36 import org.apache.hadoop.hbase.security.visibility.Authorizations;
  37 import org.apache.hadoop.hbase.util.Bytes;
  38 import org.apache.yetus.audience.InterfaceAudience;
  39 import org.slf4j.Logger;
  40 import org.slf4j.LoggerFactory;
  41
  42 /**
  43  * Used to perform Scan operations.
  44  * <p>
  45  * All operations are identical to {@link Get} with the exception of instantiation. Rather than
  46  * specifying a single row, an optional startRow and stopRow may be defined. If rows are not
  47  * specified, the Scanner will iterate over all rows.
  48  * <p>
  49  * To get all columns from all rows of a Table, create an instance with no constraints; use the
  50  * {@link #Scan()} constructor. To constrain the scan to specific column families, call
  51  * {@link #addFamily(byte[]) addFamily} for each family to retrieve on your Scan instance.
  52  * <p>
  53  * To get specific columns, call {@link #addColumn(byte[], byte[]) addColumn} for each column to
  54  * retrieve.
  55  * <p>
  56  * To only retrieve columns within a specific range of version timestamps, call
  57  * {@link #setTimeRange(long, long) setTimeRange}.
  58  * <p>
  59  * To only retrieve columns with a specific timestamp, call {@link #setTimestamp(long) setTimestamp}
  60  * .
  61  * <p>
  62  * To limit the number of versions of each column to be returned, call {@link #readVersions(int)}.
  63  * <p>
  64  * To limit the maximum number of values returned for each call to next(), call
  65  * {@link #setBatch(int) setBatch}.
  66  * <p>
  67  * To add a filter, call {@link #setFilter(org.apache.hadoop.hbase.filter.Filter) setFilter}.
  68  * <p>
  69  * For small scan, it is deprecated in 2.0.0. Now we have a {@link #setLimit(int)} method in Scan
  70  * object which is used to tell RS how many rows we want. If the rows return reaches the limit, the
  71  * RS will close the RegionScanner automatically. And we will also fetch data when openScanner in
  72  * the new implementation, this means we can also finish a scan operation in one rpc call. And we
  73  * have also introduced a {@link #setReadType(ReadType)} method. You can use this method to tell RS
  74  * to use pread explicitly.
  75  * <p>
  76  * Expert: To explicitly disable server-side block caching for this scan, execute
  77  * {@link #setCacheBlocks(boolean)}.
  78  * <p>
  79  * <em>Note:</em> Usage alters Scan instances. Internally, attributes are updated as the Scan runs
  80  * and if enabled, metrics accumulate in the Scan instance. Be aware this is the case when you go to
  81  * clone a Scan instance or if you go to reuse a created Scan instance; safer is create a Scan
  82  * instance per usage.
  83  */
  84 @InterfaceAudience.Public
  85 public class Scan extends Query {
  86   private static final Logger LOG = LoggerFactory.getLogger(Scan.class);
  87
  88   private static final String RAW_ATTR = "_raw_";
  89
  90   private byte[] startRow = HConstants.EMPTY_START_ROW;
  91   private boolean includeStartRow = true;
  92   private byte[] stopRow  = HConstants.EMPTY_END_ROW;
  93   private boolean includeStopRow = false;
  94   private int maxVersions = 1;
  95   private int batch = -1;
  96
  97   /**
  98    * Partial {@link Result}s are {@link Result}s must be combined to form a complete {@link Result}.
  99    * The {@link Result}s had to be returned in fragments (i.e. as partials) because the size of the
 100    * cells in the row exceeded max result size on the server. Typically partial results will be
 101    * combined client side into complete results before being delivered to the caller. However, if
 102    * this flag is set, the caller is indicating that they do not mind seeing partial results (i.e.
 103    * they understand that the results returned from the Scanner may only represent part of a
 104    * particular row). In such a case, any attempt to combine the partials into a complete result on
 105    * the client side will be skipped, and the caller will be able to see the exact results returned
 106    * from the server.
 107    */
 108   private boolean allowPartialResults = false;
 109
 110   private int storeLimit = -1;
 111   private int storeOffset = 0;
 112
 113   private static final String SCAN_ATTRIBUTES_METRICS_ENABLE = "scan.attributes.metrics.enable";
 114
 115   // If an application wants to use multiple scans over different tables each scan must
 116   // define this attribute with the appropriate table name by calling
 117   // scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(tableName))
 118   static public final String SCAN_ATTRIBUTES_TABLE_NAME = "scan.attributes.table.name";
 119
 120   /**
 121    * -1 means no caching specified and the value of {@link HConstants#HBASE_CLIENT_SCANNER_CACHING}
 122    * (default to {@link HConstants#DEFAULT_HBASE_CLIENT_SCANNER_CACHING}) will be used
 123    */
 124   private int caching = -1;
 125   private long maxResultSize = -1;
 126   private boolean cacheBlocks = true;
 127   private boolean reversed = false;
 128   private TimeRange tr = TimeRange.allTime();
 129   private Map<byte [], NavigableSet<byte []>> familyMap =
 130     new TreeMap<byte [], NavigableSet<byte []>>(Bytes.BYTES_COMPARATOR);
 131   private Boolean asyncPrefetch = null;
 132
 133   /**
 134    * Parameter name for client scanner sync/async prefetch toggle.
 135    * When using async scanner, prefetching data from the server is done at the background.
 136    * The parameter currently won't have any effect in the case that the user has set
 137    * Scan#setSmall or Scan#setReversed
 138    */
 139   public static final String HBASE_CLIENT_SCANNER_ASYNC_PREFETCH =
 140       "hbase.client.scanner.async.prefetch";
 141
 142   /**
 143    * Default value of {@link #HBASE_CLIENT_SCANNER_ASYNC_PREFETCH}.
 144    */
 145   public static final boolean DEFAULT_HBASE_CLIENT_SCANNER_ASYNC_PREFETCH = false;
 146
 147   /**
 148    * The mvcc read point to use when open a scanner. Remember to clear it after switching regions as
 149    * the mvcc is only valid within region scope.
 150    */
 151   private long mvccReadPoint = -1L;
 152
 153   /**
 154    * The number of rows we want for this scan. We will terminate the scan if the number of return
 155    * rows reaches this value.
 156    */
 157   private int limit = -1;
 158
 159   /**
 160    * Control whether to use pread at server side.
 161    */
 162   private ReadType readType = ReadType.DEFAULT;
 163
 164   private boolean needCursorResult = false;
 165
 166   /**
 167    * Create a Scan operation across all rows.
 168    */
 169   public Scan() {}
 170
 171   /**
 172    * Creates a new instance of this class while copying all values.
 173    *
 174    * @param scan  The scan instance to copy from.
 175    * @throws IOException When copying the values fails.
 176    */
 177   public Scan(Scan scan) throws IOException {
 178     startRow = scan.getStartRow();
 179     includeStartRow = scan.includeStartRow();
 180     stopRow  = scan.getStopRow();
 181     includeStopRow = scan.includeStopRow();
 182     maxVersions = scan.getMaxVersions();
 183     batch = scan.getBatch();
 184     storeLimit = scan.getMaxResultsPerColumnFamily();
 185     storeOffset = scan.getRowOffsetPerColumnFamily();
 186     caching = scan.getCaching();
 187     maxResultSize = scan.getMaxResultSize();
 188     cacheBlocks = scan.getCacheBlocks();
 189     filter = scan.getFilter(); // clone?
 190     loadColumnFamiliesOnDemand = scan.getLoadColumnFamiliesOnDemandValue();
 191     consistency = scan.getConsistency();
 192     this.setIsolationLevel(scan.getIsolationLevel());
 193     reversed = scan.isReversed();
 194     asyncPrefetch = scan.isAsyncPrefetch();
 195     allowPartialResults = scan.getAllowPartialResults();
 196     tr = scan.getTimeRange(); // TimeRange is immutable
 197     Map<byte[], NavigableSet<byte[]>> fams = scan.getFamilyMap();
 198     for (Map.Entry<byte[],NavigableSet<byte[]>> entry : fams.entrySet()) {
 199       byte [] fam = entry.getKey();
 200       NavigableSet<byte[]> cols = entry.getValue();
 201       if (cols != null && cols.size() > 0) {
 202         for (byte[] col : cols) {
 203           addColumn(fam, col);
 204         }
 205       } else {
 206         addFamily(fam);
 207       }
 208     }
 209     for (Map.Entry<String, byte[]> attr : scan.getAttributesMap().entrySet()) {
 210       setAttribute(attr.getKey(), attr.getValue());
 211     }
 212     for (Map.Entry<byte[], TimeRange> entry : scan.getColumnFamilyTimeRange().entrySet()) {
 213       TimeRange tr = entry.getValue();
 214       setColumnFamilyTimeRange(entry.getKey(), tr.getMin(), tr.getMax());
 215     }
 216     this.mvccReadPoint = scan.getMvccReadPoint();
 217     this.limit = scan.getLimit();
 218     this.needCursorResult = scan.isNeedCursorResult();
 219     setPriority(scan.getPriority());
 220     readType = scan.getReadType();
 221     super.setReplicaId(scan.getReplicaId());
 222   }
 223
 224   /**
 225    * Builds a scan object with the same specs as get.
 226    * @param get get to model scan after
 227    */
 228   public Scan(Get get) {
 229     this.startRow = get.getRow();
 230     this.includeStartRow = true;
 231     this.stopRow = get.getRow();
 232     this.includeStopRow = true;
 233     this.filter = get.getFilter();
 234     this.cacheBlocks = get.getCacheBlocks();
 235     this.maxVersions = get.getMaxVersions();
 236     this.storeLimit = get.getMaxResultsPerColumnFamily();
 237     this.storeOffset = get.getRowOffsetPerColumnFamily();
 238     this.tr = get.getTimeRange();
 239     this.familyMap = get.getFamilyMap();
 240     this.asyncPrefetch = false;
 241     this.consistency = get.getConsistency();
 242     this.setIsolationLevel(get.getIsolationLevel());
 243     this.loadColumnFamiliesOnDemand = get.getLoadColumnFamiliesOnDemandValue();
 244     for (Map.Entry<String, byte[]> attr : get.getAttributesMap().entrySet()) {
 245       setAttribute(attr.getKey(), attr.getValue());
 246     }
 247     for (Map.Entry<byte[], TimeRange> entry : get.getColumnFamilyTimeRange().entrySet()) {
 248       TimeRange tr = entry.getValue();
 249       setColumnFamilyTimeRange(entry.getKey(), tr.getMin(), tr.getMax());
 250     }
 251     this.mvccReadPoint = -1L;
 252     setPriority(get.getPriority());
 253     super.setReplicaId(get.getReplicaId());
 254   }
 255
 256   public boolean isGetScan() {
 257     return includeStartRow && includeStopRow
 258         && ClientUtil.areScanStartRowAndStopRowEqual(this.startRow, this.stopRow);
 259   }
 260
 261   /**
 262    * Get all columns from the specified family.
 263    * <p>
 264    * Overrides previous calls to addColumn for this family.
 265    * @param family family name
 266    * @return this
 267    */
 268   public Scan addFamily(byte [] family) {
 269     familyMap.remove(family);
 270     familyMap.put(family, null);
 271     return this;
 272   }
 273
 274   /**
 275    * Get the column from the specified family with the specified qualifier.
 276    * <p>
 277    * Overrides previous calls to addFamily for this family.
 278    * @param family family name
 279    * @param qualifier column qualifier
 280    * @return this
 281    */
 282   public Scan addColumn(byte [] family, byte [] qualifier) {
 283     NavigableSet<byte []> set = familyMap.get(family);
 284     if(set == null) {
 285       set = new TreeSet<>(Bytes.BYTES_COMPARATOR);
 286       familyMap.put(family, set);
 287     }
 288     if (qualifier == null) {
 289       qualifier = HConstants.EMPTY_BYTE_ARRAY;
 290     }
 291     set.add(qualifier);
 292     return this;
 293   }
 294
 295   /**
 296    * Get versions of columns only within the specified timestamp range,
 297    * [minStamp, maxStamp).  Note, default maximum versions to return is 1.  If
 298    * your time range spans more than one version and you want all versions
 299    * returned, up the number of versions beyond the default.
 300    * @param minStamp minimum timestamp value, inclusive
 301    * @param maxStamp maximum timestamp value, exclusive
 302    * @see #readAllVersions()
 303    * @see #readVersions(int)
 304    * @return this
 305    */
 306   public Scan setTimeRange(long minStamp, long maxStamp) throws IOException {
 307     tr = TimeRange.between(minStamp, maxStamp);
 308     return this;
 309   }
 310
 311   /**
 312    * Get versions of columns with the specified timestamp. Note, default maximum
 313    * versions to return is 1.  If your time range spans more than one version
 314    * and you want all versions returned, up the number of versions beyond the
 315    * defaut.
 316    * @param timestamp version timestamp
 317    * @see #readAllVersions()
 318    * @see #readVersions(int)
 319    * @return this
 320    */
 321   public Scan setTimestamp(long timestamp) {
 322     try {
 323       tr = TimeRange.at(timestamp);
 324     } catch(Exception e) {
 325       // This should never happen, unless integer overflow or something extremely wrong...
 326       LOG.error("TimeRange failed, likely caused by integer overflow. ", e);
 327       throw e;
 328     }
 329
 330     return this;
 331   }
 332
 333   @Override public Scan setColumnFamilyTimeRange(byte[] cf, long minStamp, long maxStamp) {
 334     return (Scan) super.setColumnFamilyTimeRange(cf, minStamp, maxStamp);
 335   }
 336
 337   /**
 338    * Set the start row of the scan.
 339    * <p>
 340    * If the specified row does not exist, the Scanner will start from the next closest row after the
 341    * specified row.
 342    * @param startRow row to start scanner at or after
 343    * @return this
 344    * @throws IllegalArgumentException if startRow does not meet criteria for a row key (when length
 345    *           exceeds {@link HConstants#MAX_ROW_LENGTH})
 346    */
 347   public Scan withStartRow(byte[] startRow) {
 348     return withStartRow(startRow, true);
 349   }
 350
 351   /**
 352    * Set the start row of the scan.
 353    * <p>
 354    * If the specified row does not exist, or the {@code inclusive} is {@code false}, the Scanner
 355    * will start from the next closest row after the specified row.
 356    * <p>
 357    * <b>Note:</b> When use {@link #setRowPrefixFilter(byte[])}, the result might be unexpected.
 358    * </p>
 359    * @param startRow row to start scanner at or after
 360    * @param inclusive whether we should include the start row when scan
 361    * @return this
 362    * @throws IllegalArgumentException if startRow does not meet criteria for a row key (when length
 363    *           exceeds {@link HConstants#MAX_ROW_LENGTH})
 364    */
 365   public Scan withStartRow(byte[] startRow, boolean inclusive) {
 366     if (Bytes.len(startRow) > HConstants.MAX_ROW_LENGTH) {
 367       throw new IllegalArgumentException("startRow's length must be less than or equal to "
 368           + HConstants.MAX_ROW_LENGTH + " to meet the criteria" + " for a row key.");
 369     }
 370     this.startRow = startRow;
 371     this.includeStartRow = inclusive;
 372     return this;
 373   }
 374
 375   /**
 376    * Set the stop row of the scan.
 377    * <p>
 378    * The scan will include rows that are lexicographically less than the provided stopRow.
 379    * <p>
 380    * <b>Note:</b> When doing a filter for a rowKey <u>Prefix</u> use
 381    * {@link #setRowPrefixFilter(byte[])}. The 'trailing 0' will not yield the desired result.
 382    * </p>
 383    * @param stopRow row to end at (exclusive)
 384    * @return this
 385    * @throws IllegalArgumentException if stopRow does not meet criteria for a row key (when length
 386    *           exceeds {@link HConstants#MAX_ROW_LENGTH})
 387    */
 388   public Scan withStopRow(byte[] stopRow) {
 389     return withStopRow(stopRow, false);
 390   }
 391
 392   /**
 393    * Set the stop row of the scan.
 394    * <p>
 395    * The scan will include rows that are lexicographically less than (or equal to if
 396    * {@code inclusive} is {@code true}) the provided stopRow.
 397    * @param stopRow row to end at
 398    * @param inclusive whether we should include the stop row when scan
 399    * @return this
 400    * @throws IllegalArgumentException if stopRow does not meet criteria for a row key (when length
 401    *           exceeds {@link HConstants#MAX_ROW_LENGTH})
 402    */
 403   public Scan withStopRow(byte[] stopRow, boolean inclusive) {
 404     if (Bytes.len(stopRow) > HConstants.MAX_ROW_LENGTH) {
 405       throw new IllegalArgumentException("stopRow's length must be less than or equal to "
 406           + HConstants.MAX_ROW_LENGTH + " to meet the criteria" + " for a row key.");
 407     }
 408     this.stopRow = stopRow;
 409     this.includeStopRow = inclusive;
 410     return this;
 411   }
 412
 413   /**
 414    * <p>Set a filter (using stopRow and startRow) so the result set only contains rows where the
 415    * rowKey starts with the specified prefix.</p>
 416    * <p>This is a utility method that converts the desired rowPrefix into the appropriate values
 417    * for the startRow and stopRow to achieve the desired result.</p>
 418    * <p>This can safely be used in combination with setFilter.</p>
 419    * <p><b>NOTE: Doing a {@link #withStartRow(byte[])} and/or {@link #withStopRow(byte[])}
 420    * after this method will yield undefined results.</b></p>
 421    * @param rowPrefix the prefix all rows must start with. (Set <i>null</i> to remove the filter.)
 422    * @return this
 423    * @deprecated since 3.0.0. The scan result might be unexpected in some cases.
 424    *   e.g. startRow : "112" and rowPrefixFilter : "11"
 425    *   The Result of this scan might contains : "111"
 426    *   This method implements the filter by setting startRow and stopRow,
 427    *   but does not take care of the scenario where startRow has been set.
 428    */
 429   @Deprecated
 430   public Scan setRowPrefixFilter(byte[] rowPrefix) {
 431     if (rowPrefix == null) {
 432       withStartRow(HConstants.EMPTY_START_ROW);
 433       withStopRow(HConstants.EMPTY_END_ROW);
 434     } else {
 435       this.withStartRow(rowPrefix);
 436       this.withStopRow(ClientUtil.calculateTheClosestNextRowKeyForPrefix(rowPrefix));
 437     }
 438     return this;
 439   }
 440
 441   /**
 442    * Get all available versions.
 443    * @return this
 444    */
 445   public Scan readAllVersions() {
 446     this.maxVersions = Integer.MAX_VALUE;
 447     return this;
 448   }
 449
 450   /**
 451    * Get up to the specified number of versions of each column.
 452    * @param versions specified number of versions for each column
 453    * @return this
 454    */
 455   public Scan readVersions(int versions) {
 456     this.maxVersions = versions;
 457     return this;
 458   }
 459
 460   /**
 461    * Set the maximum number of cells to return for each call to next(). Callers should be aware
 462    * that this is not equivalent to calling {@link #setAllowPartialResults(boolean)}.
 463    * If you don't allow partial results, the number of cells in each Result must equal to your
 464    * batch setting unless it is the last Result for current row. So this method is helpful in paging
 465    * queries. If you just want to prevent OOM at client, use setAllowPartialResults(true) is better.
 466    * @param batch the maximum number of values
 467    * @see Result#mayHaveMoreCellsInRow()
 468    */
 469   public Scan setBatch(int batch) {
 470     if (this.hasFilter() && this.filter.hasFilterRow()) {
 471       throw new IncompatibleFilterException(
 472         "Cannot set batch on a scan using a filter" +
 473         " that returns true for filter.hasFilterRow");
 474     }
 475     this.batch = batch;
 476     return this;
 477   }
 478
 479   /**
 480    * Set the maximum number of values to return per row per Column Family
 481    * @param limit the maximum number of values returned / row / CF
 482    */
 483   public Scan setMaxResultsPerColumnFamily(int limit) {
 484     this.storeLimit = limit;
 485     return this;
 486   }
 487
 488   /**
 489    * Set offset for the row per Column Family.
 490    * @param offset is the number of kvs that will be skipped.
 491    */
 492   public Scan setRowOffsetPerColumnFamily(int offset) {
 493     this.storeOffset = offset;
 494     return this;
 495   }
 496
 497   /**
 498    * Set the number of rows for caching that will be passed to scanners.
 499    * If not set, the Configuration setting {@link HConstants#HBASE_CLIENT_SCANNER_CACHING} will
 500    * apply.
 501    * Higher caching values will enable faster scanners but will use more memory.
 502    * @param caching the number of rows for caching
 503    */
 504   public Scan setCaching(int caching) {
 505     this.caching = caching;
 506     return this;
 507   }
 508
 509   /**
 510    * @return the maximum result size in bytes. See {@link #setMaxResultSize(long)}
 511    */
 512   public long getMaxResultSize() {
 513     return maxResultSize;
 514   }
 515
 516   /**
 517    * Set the maximum result size. The default is -1; this means that no specific
 518    * maximum result size will be set for this scan, and the global configured
 519    * value will be used instead. (Defaults to unlimited).
 520    *
 521    * @param maxResultSize The maximum result size in bytes.
 522    */
 523   public Scan setMaxResultSize(long maxResultSize) {
 524     this.maxResultSize = maxResultSize;
 525     return this;
 526   }
 527
 528   @Override
 529   public Scan setFilter(Filter filter) {
 530     super.setFilter(filter);
 531     return this;
 532   }
 533
 534   /**
 535    * Setting the familyMap
 536    * @param familyMap map of family to qualifier
 537    * @return this
 538    */
 539   public Scan setFamilyMap(Map<byte [], NavigableSet<byte []>> familyMap) {
 540     this.familyMap = familyMap;
 541     return this;
 542   }
 543
 544   /**
 545    * Getting the familyMap
 546    * @return familyMap
 547    */
 548   public Map<byte [], NavigableSet<byte []>> getFamilyMap() {
 549     return this.familyMap;
 550   }
 551
 552   /**
 553    * @return the number of families in familyMap
 554    */
 555   public int numFamilies() {
 556     if(hasFamilies()) {
 557       return this.familyMap.size();
 558     }
 559     return 0;
 560   }
 561
 562   /**
 563    * @return true if familyMap is non empty, false otherwise
 564    */
 565   public boolean hasFamilies() {
 566     return !this.familyMap.isEmpty();
 567   }
 568
 569   /**
 570    * @return the keys of the familyMap
 571    */
 572   public byte[][] getFamilies() {
 573     if(hasFamilies()) {
 574       return this.familyMap.keySet().toArray(new byte[0][0]);
 575     }
 576     return null;
 577   }
 578
 579   /**
 580    * @return the startrow
 581    */
 582   public byte [] getStartRow() {
 583     return this.startRow;
 584   }
 585
 586   /**
 587    * @return if we should include start row when scan
 588    */
 589   public boolean includeStartRow() {
 590     return includeStartRow;
 591   }
 592
 593   /**
 594    * @return the stoprow
 595    */
 596   public byte[] getStopRow() {
 597     return this.stopRow;
 598   }
 599
 600   /**
 601    * @return if we should include stop row when scan
 602    */
 603   public boolean includeStopRow() {
 604     return includeStopRow;
 605   }
 606
 607   /**
 608    * @return the max number of versions to fetch
 609    */
 610   public int getMaxVersions() {
 611     return this.maxVersions;
 612   }
 613
 614   /**
 615    * @return maximum number of values to return for a single call to next()
 616    */
 617   public int getBatch() {
 618     return this.batch;
 619   }
 620
 621   /**
 622    * @return maximum number of values to return per row per CF
 623    */
 624   public int getMaxResultsPerColumnFamily() {
 625     return this.storeLimit;
 626   }
 627
 628   /**
 629    * Method for retrieving the scan's offset per row per column
 630    * family (#kvs to be skipped)
 631    * @return row offset
 632    */
 633   public int getRowOffsetPerColumnFamily() {
 634     return this.storeOffset;
 635   }
 636
 637   /**
 638    * @return caching the number of rows fetched when calling next on a scanner
 639    */
 640   public int getCaching() {
 641     return this.caching;
 642   }
 643
 644   /**
 645    * @return TimeRange
 646    */
 647   public TimeRange getTimeRange() {
 648     return this.tr;
 649   }
 650
 651   /**
 652    * @return RowFilter
 653    */
 654   @Override
 655   public Filter getFilter() {
 656     return filter;
 657   }
 658
 659   /**
 660    * @return true is a filter has been specified, false if not
 661    */
 662   public boolean hasFilter() {
 663     return filter != null;
 664   }
 665
 666   /**
 667    * Set whether blocks should be cached for this Scan.
 668    * <p>
 669    * This is true by default.  When true, default settings of the table and
 670    * family are used (this will never override caching blocks if the block
 671    * cache is disabled for that family or entirely).
 672    *
 673    * @param cacheBlocks if false, default settings are overridden and blocks
 674    * will not be cached
 675    */
 676   public Scan setCacheBlocks(boolean cacheBlocks) {
 677     this.cacheBlocks = cacheBlocks;
 678     return this;
 679   }
 680
 681   /**
 682    * Get whether blocks should be cached for this Scan.
 683    * @return true if default caching should be used, false if blocks should not
 684    * be cached
 685    */
 686   public boolean getCacheBlocks() {
 687     return cacheBlocks;
 688   }
 689
 690   /**
 691    * Set whether this scan is a reversed one
 692    * <p>
 693    * This is false by default which means forward(normal) scan.
 694    *
 695    * @param reversed if true, scan will be backward order
 696    * @return this
 697    */
 698   public Scan setReversed(boolean reversed) {
 699     this.reversed = reversed;
 700     return this;
 701   }
 702
 703   /**
 704    * Get whether this scan is a reversed one.
 705    * @return true if backward scan, false if forward(default) scan
 706    */
 707   public boolean isReversed() {
 708     return reversed;
 709   }
 710
 711   /**
 712    * Setting whether the caller wants to see the partial results when server returns
 713    * less-than-expected cells. It is helpful while scanning a huge row to prevent OOM at client.
 714    * By default this value is false and the complete results will be assembled client side
 715    * before being delivered to the caller.
 716    * @param allowPartialResults
 717    * @return this
 718    * @see Result#mayHaveMoreCellsInRow()
 719    * @see #setBatch(int)
 720    */
 721   public Scan setAllowPartialResults(final boolean allowPartialResults) {
 722     this.allowPartialResults = allowPartialResults;
 723     return this;
 724   }
 725
 726   /**
 727    * @return true when the constructor of this scan understands that the results they will see may
 728    *         only represent a partial portion of a row. The entire row would be retrieved by
 729    *         subsequent calls to {@link ResultScanner#next()}
 730    */
 731   public boolean getAllowPartialResults() {
 732     return allowPartialResults;
 733   }
 734
 735   @Override
 736   public Scan setLoadColumnFamiliesOnDemand(boolean value) {
 737     return (Scan) super.setLoadColumnFamiliesOnDemand(value);
 738   }
 739
 740   /**
 741    * Compile the table and column family (i.e. schema) information
 742    * into a String. Useful for parsing and aggregation by debugging,
 743    * logging, and administration tools.
 744    * @return Map
 745    */
 746   @Override
 747   public Map<String, Object> getFingerprint() {
 748     Map<String, Object> map = new HashMap<>();
 749     List<String> families = new ArrayList<>();
 750     if(this.familyMap.isEmpty()) {
 751       map.put("families", "ALL");
 752       return map;
 753     } else {
 754       map.put("families", families);
 755     }
 756     for (Map.Entry<byte [], NavigableSet<byte[]>> entry :
 757         this.familyMap.entrySet()) {
 758       families.add(Bytes.toStringBinary(entry.getKey()));
 759     }
 760     return map;
 761   }
 762
 763   /**
 764    * Compile the details beyond the scope of getFingerprint (row, columns,
 765    * timestamps, etc.) into a Map along with the fingerprinted information.
 766    * Useful for debugging, logging, and administration tools.
 767    * @param maxCols a limit on the number of columns output prior to truncation
 768    * @return Map
 769    */
 770   @Override
 771   public Map<String, Object> toMap(int maxCols) {
 772     // start with the fingerpring map and build on top of it
 773     Map<String, Object> map = getFingerprint();
 774     // map from families to column list replaces fingerprint's list of families
 775     Map<String, List<String>> familyColumns = new HashMap<>();
 776     map.put("families", familyColumns);
 777     // add scalar information first
 778     map.put("startRow", Bytes.toStringBinary(this.startRow));
 779     map.put("stopRow", Bytes.toStringBinary(this.stopRow));
 780     map.put("maxVersions", this.maxVersions);
 781     map.put("batch", this.batch);
 782     map.put("caching", this.caching);
 783     map.put("maxResultSize", this.maxResultSize);
 784     map.put("cacheBlocks", this.cacheBlocks);
 785     map.put("loadColumnFamiliesOnDemand", this.loadColumnFamiliesOnDemand);
 786     List<Long> timeRange = new ArrayList<>(2);
 787     timeRange.add(this.tr.getMin());
 788     timeRange.add(this.tr.getMax());
 789     map.put("timeRange", timeRange);
 790     int colCount = 0;
 791     // iterate through affected families and list out up to maxCols columns
 792     for (Map.Entry<byte [], NavigableSet<byte[]>> entry :
 793       this.familyMap.entrySet()) {
 794       List<String> columns = new ArrayList<>();
 795       familyColumns.put(Bytes.toStringBinary(entry.getKey()), columns);
 796       if(entry.getValue() == null) {
 797         colCount++;
 798         --maxCols;
 799         columns.add("ALL");
 800       } else {
 801         colCount += entry.getValue().size();
 802         if (maxCols <= 0) {
 803           continue;
 804         }
 805         for (byte [] column : entry.getValue()) {
 806           if (--maxCols <= 0) {
 807             continue;
 808           }
 809           columns.add(Bytes.toStringBinary(column));
 810         }
 811       }
 812     }
 813     map.put("totalColumns", colCount);
 814     if (this.filter != null) {
 815       map.put("filter", this.filter.toString());
 816     }
 817     // add the id if set
 818     if (getId() != null) {
 819       map.put("id", getId());
 820     }
 821     return map;
 822   }
 823
 824   /**
 825    * Enable/disable "raw" mode for this scan.
 826    * If "raw" is enabled the scan will return all
 827    * delete marker and deleted rows that have not
 828    * been collected, yet.
 829    * This is mostly useful for Scan on column families
 830    * that have KEEP_DELETED_ROWS enabled.
 831    * It is an error to specify any column when "raw" is set.
 832    * @param raw True/False to enable/disable "raw" mode.
 833    */
 834   public Scan setRaw(boolean raw) {
 835     setAttribute(RAW_ATTR, Bytes.toBytes(raw));
 836     return this;
 837   }
 838
 839   /**
 840    * @return True if this Scan is in "raw" mode.
 841    */
 842   public boolean isRaw() {
 843     byte[] attr = getAttribute(RAW_ATTR);
 844     return attr == null ? false : Bytes.toBoolean(attr);
 845   }
 846
 847   @Override
 848   public Scan setAttribute(String name, byte[] value) {
 849     return (Scan) super.setAttribute(name, value);
 850   }
 851
 852   @Override
 853   public Scan setId(String id) {
 854     return (Scan) super.setId(id);
 855   }
 856
 857   @Override
 858   public Scan setAuthorizations(Authorizations authorizations) {
 859     return (Scan) super.setAuthorizations(authorizations);
 860   }
 861
 862   @Override
 863   public Scan setACL(Map<String, Permission> perms) {
 864     return (Scan) super.setACL(perms);
 865   }
 866
 867   @Override
 868   public Scan setACL(String user, Permission perms) {
 869     return (Scan) super.setACL(user, perms);
 870   }
 871
 872   @Override
 873   public Scan setConsistency(Consistency consistency) {
 874     return (Scan) super.setConsistency(consistency);
 875   }
 876
 877   @Override
 878   public Scan setReplicaId(int Id) {
 879     return (Scan) super.setReplicaId(Id);
 880   }
 881
 882   @Override
 883   public Scan setIsolationLevel(IsolationLevel level) {
 884     return (Scan) super.setIsolationLevel(level);
 885   }
 886
 887   @Override
 888   public Scan setPriority(int priority) {
 889     return (Scan) super.setPriority(priority);
 890   }
 891
 892   /**
 893    * Enable collection of {@link ScanMetrics}. For advanced users.
 894    * @param enabled Set to true to enable accumulating scan metrics
 895    */
 896   public Scan setScanMetricsEnabled(final boolean enabled) {
 897     setAttribute(Scan.SCAN_ATTRIBUTES_METRICS_ENABLE, Bytes.toBytes(Boolean.valueOf(enabled)));
 898     return this;
 899   }
 900
 901   /**
 902    * @return True if collection of scan metrics is enabled. For advanced users.
 903    */
 904   public boolean isScanMetricsEnabled() {
 905     byte[] attr = getAttribute(Scan.SCAN_ATTRIBUTES_METRICS_ENABLE);
 906     return attr == null ? false : Bytes.toBoolean(attr);
 907   }
 908
 909   public Boolean isAsyncPrefetch() {
 910     return asyncPrefetch;
 911   }
 912
 913   /**
 914    * @deprecated Since 3.0.0, will be removed in 4.0.0. After building sync client upon async
 915    *             client, the implementation is always 'async prefetch', so this flag is useless now.
 916    */
 917   @Deprecated
 918   public Scan setAsyncPrefetch(boolean asyncPrefetch) {
 919     this.asyncPrefetch = asyncPrefetch;
 920     return this;
 921   }
 922
 923   /**
 924    * @return the limit of rows for this scan
 925    */
 926   public int getLimit() {
 927     return limit;
 928   }
 929
 930   /**
 931    * Set the limit of rows for this scan. We will terminate the scan if the number of returned rows
 932    * reaches this value.
 933    * <p>
 934    * This condition will be tested at last, after all other conditions such as stopRow, filter, etc.
 935    * @param limit the limit of rows for this scan
 936    * @return this
 937    */
 938   public Scan setLimit(int limit) {
 939     this.limit = limit;
 940     return this;
 941   }
 942
 943   /**
 944    * Call this when you only want to get one row. It will set {@code limit} to {@code 1}, and also
 945    * set {@code readType} to {@link ReadType#PREAD}.
 946    * @return this
 947    */
 948   public Scan setOneRowLimit() {
 949     return setLimit(1).setReadType(ReadType.PREAD);
 950   }
 951
 952   @InterfaceAudience.Public
 953   public enum ReadType {
 954     DEFAULT, STREAM, PREAD
 955   }
 956
 957   /**
 958    * @return the read type for this scan
 959    */
 960   public ReadType getReadType() {
 961     return readType;
 962   }
 963
 964   /**
 965    * Set the read type for this scan.
 966    * <p>
 967    * Notice that we may choose to use pread even if you specific {@link ReadType#STREAM} here. For
 968    * example, we will always use pread if this is a get scan.
 969    * @return this
 970    */
 971   public Scan setReadType(ReadType readType) {
 972     this.readType = readType;
 973     return this;
 974   }
 975
 976   /**
 977    * Get the mvcc read point used to open a scanner.
 978    */
 979   long getMvccReadPoint() {
 980     return mvccReadPoint;
 981   }
 982
 983   /**
 984    * Set the mvcc read point used to open a scanner.
 985    */
 986   Scan setMvccReadPoint(long mvccReadPoint) {
 987     this.mvccReadPoint = mvccReadPoint;
 988     return this;
 989   }
 990
 991   /**
 992    * Set the mvcc read point to -1 which means do not use it.
 993    */
 994   Scan resetMvccReadPoint() {
 995     return setMvccReadPoint(-1L);
 996   }
 997
 998   /**
 999    * When the server is slow or we scan a table with many deleted data or we use a sparse filter,
1000    * the server will response heartbeat to prevent timeout. However the scanner will return a Result
1001    * only when client can do it. So if there are many heartbeats, the blocking time on
1002    * ResultScanner#next() may be very long, which is not friendly to online services.
1003    *
1004    * Set this to true then you can get a special Result whose #isCursor() returns true and is not
1005    * contains any real data. It only tells you where the server has scanned. You can call next
1006    * to continue scanning or open a new scanner with this row key as start row whenever you want.
1007    *
1008    * Users can get a cursor when and only when there is a response from the server but we can not
1009    * return a Result to users, for example, this response is a heartbeat or there are partial cells
1010    * but users do not allow partial result.
1011    *
1012    * Now the cursor is in row level which means the special Result will only contains a row key.
1013    * {@link Result#isCursor()}
1014    * {@link Result#getCursor()}
1015    * {@link Cursor}
1016    */
1017   public Scan setNeedCursorResult(boolean needCursorResult) {
1018     this.needCursorResult = needCursorResult;
1019     return this;
1020   }
1021
1022   public boolean isNeedCursorResult() {
1023     return needCursorResult;
1024   }
1025
1026   /**
1027    * Create a new Scan with a cursor. It only set the position information like start row key.
1028    * The others (like cfs, stop row, limit) should still be filled in by the user.
1029    * {@link Result#isCursor()}
1030    * {@link Result#getCursor()}
1031    * {@link Cursor}
1032    */
1033   public static Scan createScanFromCursor(Cursor cursor) {
1034     return new Scan().withStartRow(cursor.getRow());
1035   }
1036 }