Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / tools / telemetry / third_party / gsutilz / gslib / commands / ls.py
blobffa139f1fa330f1d961162fec3b99df46e93c36c
1 # -*- coding: utf-8 -*-
2 # Copyright 2011 Google Inc. All Rights Reserved.
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 """Implementation of Unix-like ls command for cloud storage providers."""
17 from __future__ import absolute_import
19 import re
21 from gslib.boto_translation import S3_DELETE_MARKER_GUID
22 from gslib.cloud_api import NotFoundException
23 from gslib.command import Command
24 from gslib.command_argument import CommandArgument
25 from gslib.cs_api_map import ApiSelector
26 from gslib.exception import CommandException
27 from gslib.ls_helper import LsHelper
28 from gslib.storage_url import ContainsWildcard
29 from gslib.storage_url import StorageUrlFromString
30 from gslib.translation_helper import AclTranslation
31 from gslib.util import ListingStyle
32 from gslib.util import MakeHumanReadable
33 from gslib.util import NO_MAX
34 from gslib.util import PrintFullInfoAboutObject
35 from gslib.util import UTF8
38 # Regex that assists with converting JSON timestamp to ls-style output.
39 # This excludes timestamp fractional seconds, for example:
40 # 2013-07-03 20:32:53.048000+00:00
41 JSON_TIMESTAMP_RE = re.compile(r'([^\s]*)\s([^\.\+]*).*')
43 _SYNOPSIS = """
44 gsutil ls [-a] [-b] [-l] [-L] [-r] [-p proj_id] url...
45 """
47 _DETAILED_HELP_TEXT = ("""
48 <B>SYNOPSIS</B>
49 """ + _SYNOPSIS + """
52 <B>LISTING PROVIDERS, BUCKETS, SUBDIRECTORIES, AND OBJECTS</B>
53 If you run gsutil ls without URLs, it lists all of the Google Cloud Storage
54 buckets under your default project ID:
56 gsutil ls
58 (For details about projects, see "gsutil help projects" and also the -p
59 option in the OPTIONS section below.)
61 If you specify one or more provider URLs, gsutil ls will list buckets at
62 each listed provider:
64 gsutil ls gs://
66 If you specify bucket URLs, gsutil ls will list objects at the top level of
67 each bucket, along with the names of each subdirectory. For example:
69 gsutil ls gs://bucket
71 might produce output like:
73 gs://bucket/obj1.htm
74 gs://bucket/obj2.htm
75 gs://bucket/images1/
76 gs://bucket/images2/
78 The "/" at the end of the last 2 URLs tells you they are subdirectories,
79 which you can list using:
81 gsutil ls gs://bucket/images*
83 If you specify object URLs, gsutil ls will list the specified objects. For
84 example:
86 gsutil ls gs://bucket/*.txt
88 will list all files whose name matches the above wildcard at the top level
89 of the bucket.
91 See "gsutil help wildcards" for more details on working with wildcards.
94 <B>DIRECTORY BY DIRECTORY, FLAT, and RECURSIVE LISTINGS</B>
95 Listing a bucket or subdirectory (as illustrated near the end of the previous
96 section) only shows the objects and names of subdirectories it contains. You
97 can list all objects in a bucket by using the -r option. For example:
99 gsutil ls -r gs://bucket
101 will list the top-level objects and buckets, then the objects and
102 buckets under gs://bucket/images1, then those under gs://bucket/images2, etc.
104 If you want to see all objects in the bucket in one "flat" listing use the
105 recursive ("**") wildcard, like:
107 gsutil ls -r gs://bucket/**
109 or, for a flat listing of a subdirectory:
111 gsutil ls -r gs://bucket/dir/**
114 <B>LISTING OBJECT DETAILS</B>
115 If you specify the -l option, gsutil will output additional information
116 about each matching provider, bucket, subdirectory, or object. For example:
118 gsutil ls -l gs://bucket/*.txt
120 will print the object size, creation time stamp, and name of each matching
121 object, along with the total count and sum of sizes of all matching objects:
123 2276224 2012-03-02T19:25:17Z gs://bucket/obj1
124 3914624 2012-03-02T19:30:27Z gs://bucket/obj2
125 TOTAL: 2 objects, 6190848 bytes (5.9 MiB)
127 Note that the total listed in parentheses above is in mebibytes (or gibibytes,
128 tebibytes, etc.), which corresponds to the unit of billing measurement for
129 Google Cloud Storage.
131 You can get a listing of all the objects in the top-level bucket directory
132 (along with the total count and sum of sizes) using a command like:
134 gsutil ls -l gs://bucket
136 To print additional detail about objects and buckets use the gsutil ls -L
137 option. For example:
139 gsutil ls -L gs://bucket/obj1
141 will print something like:
143 gs://bucket/obj1:
144 Creation Time: Fri, 02 Mar 2012 19:25:17 GMT
145 Size: 2276224
146 Cache-Control: private, max-age=0
147 Content-Type: application/x-executable
148 ETag: 5ca6796417570a586723b7344afffc81
149 Generation: 1378862725952000
150 Metageneration: 1
151 ACL:
154 "entity": "group-00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac45f66ad70",
155 "entityId": "00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac45f66ad70",
156 "role": "OWNER"
159 TOTAL: 1 objects, 2276224 bytes (2.17 MiB)
161 See also "gsutil help acl" for getting a more readable version of the ACL.
164 <B>LISTING BUCKET DETAILS</B>
165 If you want to see information about the bucket itself, use the -b
166 option. For example:
168 gsutil ls -L -b gs://bucket
170 will print something like:
172 gs://bucket/ :
173 StorageClass: STANDARD
174 LocationConstraint: US
175 Versioning enabled: True
176 Logging: None
177 WebsiteConfiguration: None
178 CORS configuration: Present
179 Lifecycle configuration: None
182 "entity": "group-00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac45f66ad70",
183 "entityId": "00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac45f66ad70",
184 "role": "OWNER"
187 Default ACL:
190 "entity": "group-00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac45f66ad70",
191 "entityId": "00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac45f66ad70",
192 "role": "OWNER"
197 <B>OPTIONS</B>
198 -l Prints long listing (owner, length).
200 -L Prints even more detail than -l. Note: If you use this option
201 with the (non-default) XML API it will generate an additional
202 request per object being listed, which makes the -L option run
203 much more slowly (and cost more) using the XML API than the
204 default JSON API.
206 -b Prints info about the bucket when used with a bucket URL.
208 -h When used with -l, prints object sizes in human readable format
209 (e.g., 1 KiB, 234 MiB, 2 GiB, etc.)
211 -p proj_id Specifies the project ID to use for listing buckets.
213 -R, -r Requests a recursive listing.
215 -a Includes non-current object versions / generations in the listing
216 (only useful with a versioning-enabled bucket). If combined with
217 -l option also prints metageneration for each listed object.
219 -e Include ETag in long listing (-l) output.
220 """)
223 class LsCommand(Command):
224 """Implementation of gsutil ls command."""
226 # Command specification. See base class for documentation.
227 command_spec = Command.CreateCommandSpec(
228 'ls',
229 command_name_aliases=['dir', 'list'],
230 usage_synopsis=_SYNOPSIS,
231 min_args=0,
232 max_args=NO_MAX,
233 supported_sub_args='aeblLhp:rR',
234 file_url_ok=False,
235 provider_url_ok=True,
236 urls_start_arg=0,
237 gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
238 gs_default_api=ApiSelector.JSON,
239 argparse_arguments=[
240 CommandArgument.MakeZeroOrMoreCloudURLsArgument()
243 # Help specification. See help_provider.py for documentation.
244 help_spec = Command.HelpSpec(
245 help_name='ls',
246 help_name_aliases=['dir', 'list'],
247 help_type='command_help',
248 help_one_line_summary='List providers, buckets, or objects',
249 help_text=_DETAILED_HELP_TEXT,
250 subcommand_help_text={},
253 def _PrintBucketInfo(self, bucket_blr, listing_style):
254 """Print listing info for given bucket.
256 Args:
257 bucket_blr: BucketListingReference for the bucket being listed
258 listing_style: ListingStyle enum describing type of output desired.
260 Returns:
261 Tuple (total objects, total bytes) in the bucket.
263 if (listing_style == ListingStyle.SHORT or
264 listing_style == ListingStyle.LONG):
265 print bucket_blr
266 return
267 # listing_style == ListingStyle.LONG_LONG:
268 # We're guaranteed by the caller that the root object is populated.
269 bucket = bucket_blr.root_object
270 location_constraint = bucket.location
271 storage_class = bucket.storageClass
272 fields = {'bucket': bucket_blr.url_string,
273 'storage_class': storage_class,
274 'location_constraint': location_constraint,
275 'acl': AclTranslation.JsonFromMessage(bucket.acl),
276 'default_acl': AclTranslation.JsonFromMessage(
277 bucket.defaultObjectAcl)}
279 fields['versioning'] = bucket.versioning and bucket.versioning.enabled
280 fields['website_config'] = 'Present' if bucket.website else 'None'
281 fields['logging_config'] = 'Present' if bucket.logging else 'None'
282 fields['cors_config'] = 'Present' if bucket.cors else 'None'
283 fields['lifecycle_config'] = 'Present' if bucket.lifecycle else 'None'
285 # For field values that are multiline, add indenting to make it look
286 # prettier.
287 for key in fields:
288 previous_value = fields[key]
289 if (not isinstance(previous_value, basestring) or
290 '\n' not in previous_value):
291 continue
292 new_value = previous_value.replace('\n', '\n\t ')
293 # Start multiline values on a new line if they aren't already.
294 if not new_value.startswith('\n'):
295 new_value = '\n\t ' + new_value
296 fields[key] = new_value
298 print('{bucket} :\n'
299 '\tStorage class:\t\t\t{storage_class}\n'
300 '\tLocation constraint:\t\t{location_constraint}\n'
301 '\tVersioning enabled:\t\t{versioning}\n'
302 '\tLogging configuration:\t\t{logging_config}\n'
303 '\tWebsite configuration:\t\t{website_config}\n'
304 '\tCORS configuration: \t\t{cors_config}\n'
305 '\tLifecycle configuration:\t{lifecycle_config}\n'
306 '\tACL:\t\t\t\t{acl}\n'
307 '\tDefault ACL:\t\t\t{default_acl}'.format(**fields))
308 if bucket_blr.storage_url.scheme == 's3':
309 print('Note: this is an S3 bucket so configuration values may be '
310 'blank. To retrieve bucket configuration values, use '
311 'individual configuration commands such as gsutil acl get '
312 '<bucket>.')
314 def _PrintLongListing(self, bucket_listing_ref):
315 """Prints an object with ListingStyle.LONG."""
316 obj = bucket_listing_ref.root_object
317 url_str = bucket_listing_ref.url_string
318 if (obj.metadata and S3_DELETE_MARKER_GUID in
319 obj.metadata.additionalProperties):
320 size_string = '0'
321 num_bytes = 0
322 num_objs = 0
323 url_str += '<DeleteMarker>'
324 else:
325 size_string = (MakeHumanReadable(obj.size)
326 if self.human_readable else str(obj.size))
327 num_bytes = obj.size
328 num_objs = 1
330 timestamp = JSON_TIMESTAMP_RE.sub(
331 r'\1T\2Z', str(obj.updated).decode(UTF8).encode('ascii'))
332 printstr = '%(size)10s %(timestamp)s %(url)s'
333 encoded_etag = None
334 encoded_metagen = None
335 if self.all_versions:
336 printstr += ' metageneration=%(metageneration)s'
337 encoded_metagen = str(obj.metageneration).encode(UTF8)
338 if self.include_etag:
339 printstr += ' etag=%(etag)s'
340 encoded_etag = obj.etag.encode(UTF8)
341 format_args = {
342 'size': size_string,
343 'timestamp': timestamp,
344 'url': url_str.encode(UTF8),
345 'metageneration': encoded_metagen,
346 'etag': encoded_etag
348 print printstr % format_args
349 return (num_objs, num_bytes)
351 def RunCommand(self):
352 """Command entry point for the ls command."""
353 got_nomatch_errors = False
354 got_bucket_nomatch_errors = False
355 listing_style = ListingStyle.SHORT
356 get_bucket_info = False
357 self.recursion_requested = False
358 self.all_versions = False
359 self.include_etag = False
360 self.human_readable = False
361 if self.sub_opts:
362 for o, a in self.sub_opts:
363 if o == '-a':
364 self.all_versions = True
365 elif o == '-e':
366 self.include_etag = True
367 elif o == '-b':
368 get_bucket_info = True
369 elif o == '-h':
370 self.human_readable = True
371 elif o == '-l':
372 listing_style = ListingStyle.LONG
373 elif o == '-L':
374 listing_style = ListingStyle.LONG_LONG
375 elif o == '-p':
376 self.project_id = a
377 elif o == '-r' or o == '-R':
378 self.recursion_requested = True
380 if not self.args:
381 # default to listing all gs buckets
382 self.args = ['gs://']
384 total_objs = 0
385 total_bytes = 0
387 def MaybePrintBucketHeader(blr):
388 if len(self.args) > 1:
389 print '%s:' % blr.url_string.encode(UTF8)
390 print_bucket_header = MaybePrintBucketHeader
392 for url_str in self.args:
393 storage_url = StorageUrlFromString(url_str)
394 if storage_url.IsFileUrl():
395 raise CommandException('Only cloud URLs are supported for %s'
396 % self.command_name)
397 bucket_fields = None
398 if (listing_style == ListingStyle.SHORT or
399 listing_style == ListingStyle.LONG):
400 bucket_fields = ['id']
401 elif listing_style == ListingStyle.LONG_LONG:
402 bucket_fields = ['location', 'storageClass', 'versioning', 'acl',
403 'defaultObjectAcl', 'website', 'logging', 'cors',
404 'lifecycle']
405 if storage_url.IsProvider():
406 # Provider URL: use bucket wildcard to list buckets.
407 for blr in self.WildcardIterator(
408 '%s://*' % storage_url.scheme).IterBuckets(
409 bucket_fields=bucket_fields):
410 self._PrintBucketInfo(blr, listing_style)
411 elif storage_url.IsBucket() and get_bucket_info:
412 # ls -b bucket listing request: List info about bucket(s).
413 total_buckets = 0
414 for blr in self.WildcardIterator(url_str).IterBuckets(
415 bucket_fields=bucket_fields):
416 if not ContainsWildcard(url_str) and not blr.root_object:
417 # Iterator does not make an HTTP call for non-wildcarded
418 # listings with fields=='id'. Ensure the bucket exists by calling
419 # GetBucket.
420 self.gsutil_api.GetBucket(
421 blr.storage_url.bucket_name,
422 fields=['id'], provider=storage_url.scheme)
423 self._PrintBucketInfo(blr, listing_style)
424 total_buckets += 1
425 if not ContainsWildcard(url_str) and not total_buckets:
426 got_bucket_nomatch_errors = True
427 else:
428 # URL names a bucket, object, or object subdir ->
429 # list matching object(s) / subdirs.
430 def _PrintPrefixLong(blr):
431 print '%-33s%s' % ('', blr.url_string.encode(UTF8))
433 if listing_style == ListingStyle.SHORT:
434 # ls helper by default readies us for a short listing.
435 ls_helper = LsHelper(self.WildcardIterator, self.logger,
436 all_versions=self.all_versions,
437 print_bucket_header_func=print_bucket_header,
438 should_recurse=self.recursion_requested)
439 elif listing_style == ListingStyle.LONG:
440 bucket_listing_fields = ['name', 'updated', 'size']
441 if self.all_versions:
442 bucket_listing_fields.extend(['generation', 'metageneration'])
443 if self.include_etag:
444 bucket_listing_fields.append('etag')
446 ls_helper = LsHelper(self.WildcardIterator, self.logger,
447 print_object_func=self._PrintLongListing,
448 print_dir_func=_PrintPrefixLong,
449 print_bucket_header_func=print_bucket_header,
450 all_versions=self.all_versions,
451 should_recurse=self.recursion_requested,
452 fields=bucket_listing_fields)
454 elif listing_style == ListingStyle.LONG_LONG:
455 # List all fields
456 bucket_listing_fields = None
457 ls_helper = LsHelper(self.WildcardIterator, self.logger,
458 print_object_func=PrintFullInfoAboutObject,
459 print_dir_func=_PrintPrefixLong,
460 print_bucket_header_func=print_bucket_header,
461 all_versions=self.all_versions,
462 should_recurse=self.recursion_requested,
463 fields=bucket_listing_fields)
464 else:
465 raise CommandException('Unknown listing style: %s' % listing_style)
467 exp_dirs, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(storage_url)
468 if storage_url.IsObject() and exp_objs == 0 and exp_dirs == 0:
469 got_nomatch_errors = True
470 total_bytes += exp_bytes
471 total_objs += exp_objs
473 if total_objs and listing_style != ListingStyle.SHORT:
474 print ('TOTAL: %d objects, %d bytes (%s)' %
475 (total_objs, total_bytes, MakeHumanReadable(float(total_bytes))))
476 if got_nomatch_errors:
477 raise CommandException('One or more URLs matched no objects.')
478 if got_bucket_nomatch_errors:
479 raise NotFoundException('One or more bucket URLs matched no buckets.')
481 return 0