tools/telemetry/third_party/gsutilz/gslib/ls_helper.py

   1 # -*- coding: utf-8 -*-
   2 # Copyright 2014 Google Inc. All Rights Reserved.
   3 #
   4 # Licensed under the Apache License, Version 2.0 (the "License");
   5 # you may not use this file except in compliance with the License.
   6 # You may obtain a copy of the License at
   7 #
   8 #     http://www.apache.org/licenses/LICENSE-2.0
   9 #
  10 # Unless required by applicable law or agreed to in writing, software
  11 # distributed under the License is distributed on an "AS IS" BASIS,
  12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 # See the License for the specific language governing permissions and
  14 # limitations under the License.
  15 """Utility functions and class for listing commands such as ls and du."""
  16
  17 from __future__ import absolute_import
  18
  19 import fnmatch
  20
  21 from gslib.exception import CommandException
  22 from gslib.plurality_checkable_iterator import PluralityCheckableIterator
  23 from gslib.util import UTF8
  24 from gslib.wildcard_iterator import StorageUrlFromString
  25
  26
  27 def PrintNewLine():
  28   """Default function for printing new lines between directories."""
  29   print
  30
  31
  32 def PrintDirHeader(bucket_listing_ref):
  33   """Default function for printing headers for prefixes.
  34
  35   Header is printed prior to listing the contents of the prefix.
  36
  37   Args:
  38     bucket_listing_ref: BucketListingRef of type PREFIX.
  39   """
  40   print '%s:' % bucket_listing_ref.url_string.encode(UTF8)
  41
  42
  43 def PrintBucketHeader(bucket_listing_ref):  # pylint: disable=unused-argument
  44   """Default function for printing headers for buckets.
  45
  46   Header is printed prior to listing the contents of the bucket.
  47
  48   Args:
  49     bucket_listing_ref: BucketListingRef of type BUCKET.
  50   """
  51   pass
  52
  53
  54 def PrintDir(bucket_listing_ref):
  55   """Default function for printing buckets or prefixes.
  56
  57   Args:
  58     bucket_listing_ref: BucketListingRef of type BUCKET or PREFIX.
  59   """
  60   print bucket_listing_ref.url_string.encode(UTF8)
  61
  62
  63 # pylint: disable=unused-argument
  64 def PrintDirSummary(num_bytes, bucket_listing_ref):
  65   """Off-by-default function for printing buckets or prefix size summaries.
  66
  67   Args:
  68     num_bytes: Number of bytes contained in the directory.
  69     bucket_listing_ref: BucketListingRef of type BUCKET or PREFIX.
  70   """
  71   pass
  72
  73
  74 def PrintObject(bucket_listing_ref):
  75   """Default printing function for objects.
  76
  77   Args:
  78     bucket_listing_ref: BucketListingRef of type OBJECT.
  79
  80   Returns:
  81     (num_objects, num_bytes).
  82   """
  83   print bucket_listing_ref.url_string.encode(UTF8)
  84   return (1, 0)
  85
  86
  87 class LsHelper(object):
  88   """Helper class for ls and du."""
  89
  90   def __init__(self, iterator_func, logger,
  91                print_object_func=PrintObject,
  92                print_dir_func=PrintDir,
  93                print_dir_header_func=PrintDirHeader,
  94                print_bucket_header_func=PrintBucketHeader,
  95                print_dir_summary_func=PrintDirSummary,
  96                print_newline_func=PrintNewLine,
  97                all_versions=False, should_recurse=False,
  98                exclude_patterns=None, fields=('name',)):
  99     """Initializes the helper class to prepare for listing.
 100
 101     Args:
 102       iterator_func: Function for instantiating iterator.
 103                      Inputs-
 104                        url_string- Url string to iterate on. May include
 105                                    wildcards.
 106                        all_versions=False- If true, iterate over all object
 107                                            versions.
 108       logger: Logger for outputting warnings / errors.
 109       print_object_func: Function for printing objects.
 110       print_dir_func:    Function for printing buckets/prefixes.
 111       print_dir_header_func: Function for printing header line for buckets
 112                              or prefixes.
 113       print_bucket_header_func: Function for printing header line for buckets
 114                                 or prefixes.
 115       print_dir_summary_func: Function for printing size summaries about
 116                               buckets/prefixes.
 117       print_newline_func: Function for printing new lines between dirs.
 118       all_versions:      If true, list all object versions.
 119       should_recurse:    If true, recursively listing buckets/prefixes.
 120       exclude_patterns:  Patterns to exclude when listing.
 121       fields:            Fields to request from bucket listings; this should
 122                          include all fields that need to be populated in
 123                          objects so they can be listed. Can be set to None
 124                          to retrieve all object fields. Defaults to short
 125                          listing fields.
 126     """
 127     self._iterator_func = iterator_func
 128     self.logger = logger
 129     self._print_object_func = print_object_func
 130     self._print_dir_func = print_dir_func
 131     self._print_dir_header_func = print_dir_header_func
 132     self._print_bucket_header_func = print_bucket_header_func
 133     self._print_dir_summary_func = print_dir_summary_func
 134     self._print_newline_func = print_newline_func
 135     self.all_versions = all_versions
 136     self.should_recurse = should_recurse
 137     self.exclude_patterns = exclude_patterns
 138     self.bucket_listing_fields = fields
 139
 140   def ExpandUrlAndPrint(self, url):
 141     """Iterates over the given URL and calls print functions.
 142
 143     Args:
 144       url: StorageUrl to iterate over.
 145
 146     Returns:
 147       (num_objects, num_bytes) total number of objects and bytes iterated.
 148     """
 149     num_objects = 0
 150     num_dirs = 0
 151     num_bytes = 0
 152     print_newline = False
 153
 154     if url.IsBucket() or self.should_recurse:
 155       # IsBucket() implies a top-level listing.
 156       if url.IsBucket():
 157         self._print_bucket_header_func(url)
 158       return self._RecurseExpandUrlAndPrint(url.url_string,
 159                                             print_initial_newline=False)
 160     else:
 161       # User provided a prefix or object URL, but it's impossible to tell
 162       # which until we do a listing and see what matches.
 163       top_level_iterator = PluralityCheckableIterator(self._iterator_func(
 164           url.CreatePrefixUrl(wildcard_suffix=None),
 165           all_versions=self.all_versions).IterAll(
 166               expand_top_level_buckets=True,
 167               bucket_listing_fields=self.bucket_listing_fields))
 168       plurality = top_level_iterator.HasPlurality()
 169
 170       for blr in top_level_iterator:
 171         if self._MatchesExcludedPattern(blr):
 172           continue
 173         if blr.IsObject():
 174           nd = 0
 175           no, nb = self._print_object_func(blr)
 176           print_newline = True
 177         elif blr.IsPrefix():
 178           if print_newline:
 179             self._print_newline_func()
 180           else:
 181             print_newline = True
 182           if plurality:
 183             self._print_dir_header_func(blr)
 184           expansion_url_str = StorageUrlFromString(
 185               blr.url_string).CreatePrefixUrl(wildcard_suffix='*')
 186           nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str)
 187           self._print_dir_summary_func(nb, blr)
 188         else:
 189           # We handle all buckets at the top level, so this should never happen.
 190           raise CommandException(
 191               'Sub-level iterator returned a CsBucketListingRef of type Bucket')
 192         num_objects += no
 193         num_dirs += nd
 194         num_bytes += nb
 195       return num_dirs, num_objects, num_bytes
 196
 197   def _RecurseExpandUrlAndPrint(self, url_str, print_initial_newline=True):
 198     """Iterates over the given URL string and calls print functions.
 199
 200     Args:
 201       url_str: String describing StorageUrl to iterate over.
 202                Must be of depth one or higher.
 203       print_initial_newline: If true, print a newline before recursively
 204                              expanded prefixes.
 205
 206     Returns:
 207       (num_objects, num_bytes) total number of objects and bytes iterated.
 208     """
 209     num_objects = 0
 210     num_dirs = 0
 211     num_bytes = 0
 212     for blr in self._iterator_func(
 213         '%s' % url_str, all_versions=self.all_versions).IterAll(
 214             expand_top_level_buckets=True,
 215             bucket_listing_fields=self.bucket_listing_fields):
 216       if self._MatchesExcludedPattern(blr):
 217         continue
 218
 219       if blr.IsObject():
 220         nd = 0
 221         no, nb = self._print_object_func(blr)
 222       elif blr.IsPrefix():
 223         if self.should_recurse:
 224           if print_initial_newline:
 225             self._print_newline_func()
 226           else:
 227             print_initial_newline = True
 228           self._print_dir_header_func(blr)
 229           expansion_url_str = StorageUrlFromString(
 230               blr.url_string).CreatePrefixUrl(wildcard_suffix='*')
 231
 232           nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str)
 233           self._print_dir_summary_func(nb, blr)
 234         else:
 235           nd, no, nb = 1, 0, 0
 236           self._print_dir_func(blr)
 237       else:
 238         # We handle all buckets at the top level, so this should never happen.
 239         raise CommandException(
 240             'Sub-level iterator returned a bucketListingRef of type Bucket')
 241       num_dirs += nd
 242       num_objects += no
 243       num_bytes += nb
 244
 245     return num_dirs, num_objects, num_bytes
 246
 247   def _MatchesExcludedPattern(self, blr):
 248     """Checks bucket listing reference against patterns to exclude.
 249
 250     Args:
 251       blr: BucketListingRef to check.
 252
 253     Returns:
 254       True if reference matches a pattern and should be excluded.
 255     """
 256     if self.exclude_patterns:
 257       tomatch = blr.url_string
 258       for pattern in self.exclude_patterns:
 259         if fnmatch.fnmatch(tomatch, pattern):
 260           return True
 261     return False