Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / tools / telemetry / third_party / gsutilz / gslib / ls_helper.py
blobf424515d1d98e14d3905e4e670f966a44f9a4da6
1 # -*- coding: utf-8 -*-
2 # Copyright 2014 Google Inc. All Rights Reserved.
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 """Utility functions and class for listing commands such as ls and du."""
17 from __future__ import absolute_import
19 import fnmatch
21 from gslib.exception import CommandException
22 from gslib.plurality_checkable_iterator import PluralityCheckableIterator
23 from gslib.util import UTF8
24 from gslib.wildcard_iterator import StorageUrlFromString
27 def PrintNewLine():
28 """Default function for printing new lines between directories."""
29 print
32 def PrintDirHeader(bucket_listing_ref):
33 """Default function for printing headers for prefixes.
35 Header is printed prior to listing the contents of the prefix.
37 Args:
38 bucket_listing_ref: BucketListingRef of type PREFIX.
39 """
40 print '%s:' % bucket_listing_ref.url_string.encode(UTF8)
43 def PrintBucketHeader(bucket_listing_ref): # pylint: disable=unused-argument
44 """Default function for printing headers for buckets.
46 Header is printed prior to listing the contents of the bucket.
48 Args:
49 bucket_listing_ref: BucketListingRef of type BUCKET.
50 """
51 pass
54 def PrintDir(bucket_listing_ref):
55 """Default function for printing buckets or prefixes.
57 Args:
58 bucket_listing_ref: BucketListingRef of type BUCKET or PREFIX.
59 """
60 print bucket_listing_ref.url_string.encode(UTF8)
63 # pylint: disable=unused-argument
64 def PrintDirSummary(num_bytes, bucket_listing_ref):
65 """Off-by-default function for printing buckets or prefix size summaries.
67 Args:
68 num_bytes: Number of bytes contained in the directory.
69 bucket_listing_ref: BucketListingRef of type BUCKET or PREFIX.
70 """
71 pass
74 def PrintObject(bucket_listing_ref):
75 """Default printing function for objects.
77 Args:
78 bucket_listing_ref: BucketListingRef of type OBJECT.
80 Returns:
81 (num_objects, num_bytes).
82 """
83 print bucket_listing_ref.url_string.encode(UTF8)
84 return (1, 0)
87 class LsHelper(object):
88 """Helper class for ls and du."""
90 def __init__(self, iterator_func, logger,
91 print_object_func=PrintObject,
92 print_dir_func=PrintDir,
93 print_dir_header_func=PrintDirHeader,
94 print_bucket_header_func=PrintBucketHeader,
95 print_dir_summary_func=PrintDirSummary,
96 print_newline_func=PrintNewLine,
97 all_versions=False, should_recurse=False,
98 exclude_patterns=None, fields=('name',)):
99 """Initializes the helper class to prepare for listing.
101 Args:
102 iterator_func: Function for instantiating iterator.
103 Inputs-
104 url_string- Url string to iterate on. May include
105 wildcards.
106 all_versions=False- If true, iterate over all object
107 versions.
108 logger: Logger for outputting warnings / errors.
109 print_object_func: Function for printing objects.
110 print_dir_func: Function for printing buckets/prefixes.
111 print_dir_header_func: Function for printing header line for buckets
112 or prefixes.
113 print_bucket_header_func: Function for printing header line for buckets
114 or prefixes.
115 print_dir_summary_func: Function for printing size summaries about
116 buckets/prefixes.
117 print_newline_func: Function for printing new lines between dirs.
118 all_versions: If true, list all object versions.
119 should_recurse: If true, recursively listing buckets/prefixes.
120 exclude_patterns: Patterns to exclude when listing.
121 fields: Fields to request from bucket listings; this should
122 include all fields that need to be populated in
123 objects so they can be listed. Can be set to None
124 to retrieve all object fields. Defaults to short
125 listing fields.
127 self._iterator_func = iterator_func
128 self.logger = logger
129 self._print_object_func = print_object_func
130 self._print_dir_func = print_dir_func
131 self._print_dir_header_func = print_dir_header_func
132 self._print_bucket_header_func = print_bucket_header_func
133 self._print_dir_summary_func = print_dir_summary_func
134 self._print_newline_func = print_newline_func
135 self.all_versions = all_versions
136 self.should_recurse = should_recurse
137 self.exclude_patterns = exclude_patterns
138 self.bucket_listing_fields = fields
140 def ExpandUrlAndPrint(self, url):
141 """Iterates over the given URL and calls print functions.
143 Args:
144 url: StorageUrl to iterate over.
146 Returns:
147 (num_objects, num_bytes) total number of objects and bytes iterated.
149 num_objects = 0
150 num_dirs = 0
151 num_bytes = 0
152 print_newline = False
154 if url.IsBucket() or self.should_recurse:
155 # IsBucket() implies a top-level listing.
156 if url.IsBucket():
157 self._print_bucket_header_func(url)
158 return self._RecurseExpandUrlAndPrint(url.url_string,
159 print_initial_newline=False)
160 else:
161 # User provided a prefix or object URL, but it's impossible to tell
162 # which until we do a listing and see what matches.
163 top_level_iterator = PluralityCheckableIterator(self._iterator_func(
164 url.CreatePrefixUrl(wildcard_suffix=None),
165 all_versions=self.all_versions).IterAll(
166 expand_top_level_buckets=True,
167 bucket_listing_fields=self.bucket_listing_fields))
168 plurality = top_level_iterator.HasPlurality()
170 for blr in top_level_iterator:
171 if self._MatchesExcludedPattern(blr):
172 continue
173 if blr.IsObject():
174 nd = 0
175 no, nb = self._print_object_func(blr)
176 print_newline = True
177 elif blr.IsPrefix():
178 if print_newline:
179 self._print_newline_func()
180 else:
181 print_newline = True
182 if plurality:
183 self._print_dir_header_func(blr)
184 expansion_url_str = StorageUrlFromString(
185 blr.url_string).CreatePrefixUrl(wildcard_suffix='*')
186 nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str)
187 self._print_dir_summary_func(nb, blr)
188 else:
189 # We handle all buckets at the top level, so this should never happen.
190 raise CommandException(
191 'Sub-level iterator returned a CsBucketListingRef of type Bucket')
192 num_objects += no
193 num_dirs += nd
194 num_bytes += nb
195 return num_dirs, num_objects, num_bytes
197 def _RecurseExpandUrlAndPrint(self, url_str, print_initial_newline=True):
198 """Iterates over the given URL string and calls print functions.
200 Args:
201 url_str: String describing StorageUrl to iterate over.
202 Must be of depth one or higher.
203 print_initial_newline: If true, print a newline before recursively
204 expanded prefixes.
206 Returns:
207 (num_objects, num_bytes) total number of objects and bytes iterated.
209 num_objects = 0
210 num_dirs = 0
211 num_bytes = 0
212 for blr in self._iterator_func(
213 '%s' % url_str, all_versions=self.all_versions).IterAll(
214 expand_top_level_buckets=True,
215 bucket_listing_fields=self.bucket_listing_fields):
216 if self._MatchesExcludedPattern(blr):
217 continue
219 if blr.IsObject():
220 nd = 0
221 no, nb = self._print_object_func(blr)
222 elif blr.IsPrefix():
223 if self.should_recurse:
224 if print_initial_newline:
225 self._print_newline_func()
226 else:
227 print_initial_newline = True
228 self._print_dir_header_func(blr)
229 expansion_url_str = StorageUrlFromString(
230 blr.url_string).CreatePrefixUrl(wildcard_suffix='*')
232 nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str)
233 self._print_dir_summary_func(nb, blr)
234 else:
235 nd, no, nb = 1, 0, 0
236 self._print_dir_func(blr)
237 else:
238 # We handle all buckets at the top level, so this should never happen.
239 raise CommandException(
240 'Sub-level iterator returned a bucketListingRef of type Bucket')
241 num_dirs += nd
242 num_objects += no
243 num_bytes += nb
245 return num_dirs, num_objects, num_bytes
247 def _MatchesExcludedPattern(self, blr):
248 """Checks bucket listing reference against patterns to exclude.
250 Args:
251 blr: BucketListingRef to check.
253 Returns:
254 True if reference matches a pattern and should be excluded.
256 if self.exclude_patterns:
257 tomatch = blr.url_string
258 for pattern in self.exclude_patterns:
259 if fnmatch.fnmatch(tomatch, pattern):
260 return True
261 return False