1 # -*- coding: utf-8 -*-
2 # Copyright 2011 Google Inc. All Rights Reserved.
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 """Implementation of Unix-like ls command for cloud storage providers."""
17 from __future__
import absolute_import
21 from gslib
.boto_translation
import S3_DELETE_MARKER_GUID
22 from gslib
.cloud_api
import NotFoundException
23 from gslib
.command
import Command
24 from gslib
.command_argument
import CommandArgument
25 from gslib
.cs_api_map
import ApiSelector
26 from gslib
.exception
import CommandException
27 from gslib
.ls_helper
import LsHelper
28 from gslib
.storage_url
import ContainsWildcard
29 from gslib
.storage_url
import StorageUrlFromString
30 from gslib
.translation_helper
import AclTranslation
31 from gslib
.util
import ListingStyle
32 from gslib
.util
import MakeHumanReadable
33 from gslib
.util
import NO_MAX
34 from gslib
.util
import PrintFullInfoAboutObject
35 from gslib
.util
import UTF8
38 # Regex that assists with converting JSON timestamp to ls-style output.
39 # This excludes timestamp fractional seconds, for example:
40 # 2013-07-03 20:32:53.048000+00:00
41 JSON_TIMESTAMP_RE
= re
.compile(r
'([^\s]*)\s([^\.\+]*).*')
44 gsutil ls [-a] [-b] [-l] [-L] [-r] [-p proj_id] url...
47 _DETAILED_HELP_TEXT
= ("""
52 <B>LISTING PROVIDERS, BUCKETS, SUBDIRECTORIES, AND OBJECTS</B>
53 If you run gsutil ls without URLs, it lists all of the Google Cloud Storage
54 buckets under your default project ID:
58 (For details about projects, see "gsutil help projects" and also the -p
59 option in the OPTIONS section below.)
61 If you specify one or more provider URLs, gsutil ls will list buckets at
66 If you specify bucket URLs, gsutil ls will list objects at the top level of
67 each bucket, along with the names of each subdirectory. For example:
71 might produce output like:
78 The "/" at the end of the last 2 URLs tells you they are subdirectories,
79 which you can list using:
81 gsutil ls gs://bucket/images*
83 If you specify object URLs, gsutil ls will list the specified objects. For
86 gsutil ls gs://bucket/*.txt
88 will list all files whose name matches the above wildcard at the top level
91 See "gsutil help wildcards" for more details on working with wildcards.
94 <B>DIRECTORY BY DIRECTORY, FLAT, and RECURSIVE LISTINGS</B>
95 Listing a bucket or subdirectory (as illustrated near the end of the previous
96 section) only shows the objects and names of subdirectories it contains. You
97 can list all objects in a bucket by using the -r option. For example:
99 gsutil ls -r gs://bucket
101 will list the top-level objects and buckets, then the objects and
102 buckets under gs://bucket/images1, then those under gs://bucket/images2, etc.
104 If you want to see all objects in the bucket in one "flat" listing use the
105 recursive ("**") wildcard, like:
107 gsutil ls -r gs://bucket/**
109 or, for a flat listing of a subdirectory:
111 gsutil ls -r gs://bucket/dir/**
114 <B>LISTING OBJECT DETAILS</B>
115 If you specify the -l option, gsutil will output additional information
116 about each matching provider, bucket, subdirectory, or object. For example:
118 gsutil ls -l gs://bucket/*.txt
120 will print the object size, creation time stamp, and name of each matching
121 object, along with the total count and sum of sizes of all matching objects:
123 2276224 2012-03-02T19:25:17Z gs://bucket/obj1
124 3914624 2012-03-02T19:30:27Z gs://bucket/obj2
125 TOTAL: 2 objects, 6190848 bytes (5.9 MiB)
127 Note that the total listed in parentheses above is in mebibytes (or gibibytes,
128 tebibytes, etc.), which corresponds to the unit of billing measurement for
129 Google Cloud Storage.
131 You can get a listing of all the objects in the top-level bucket directory
132 (along with the total count and sum of sizes) using a command like:
134 gsutil ls -l gs://bucket
136 To print additional detail about objects and buckets use the gsutil ls -L
139 gsutil ls -L gs://bucket/obj1
141 will print something like:
144 Creation Time: Fri, 02 Mar 2012 19:25:17 GMT
146 Cache-Control: private, max-age=0
147 Content-Type: application/x-executable
148 ETag: 5ca6796417570a586723b7344afffc81
149 Generation: 1378862725952000
154 "entity": "group-00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac45f66ad70",
155 "entityId": "00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac45f66ad70",
159 TOTAL: 1 objects, 2276224 bytes (2.17 MiB)
161 See also "gsutil help acl" for getting a more readable version of the ACL.
164 <B>LISTING BUCKET DETAILS</B>
165 If you want to see information about the bucket itself, use the -b
168 gsutil ls -L -b gs://bucket
170 will print something like:
173 StorageClass: STANDARD
174 LocationConstraint: US
175 Versioning enabled: True
177 WebsiteConfiguration: None
178 CORS configuration: Present
179 Lifecycle configuration: None
182 "entity": "group-00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac45f66ad70",
183 "entityId": "00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac45f66ad70",
190 "entity": "group-00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac45f66ad70",
191 "entityId": "00b4903a97163d99003117abe64d292561d2b4074fc90ce5c0e35ac45f66ad70",
198 -l Prints long listing (owner, length).
200 -L Prints even more detail than -l. Note: If you use this option
201 with the (non-default) XML API it will generate an additional
202 request per object being listed, which makes the -L option run
203 much more slowly (and cost more) using the XML API than the
206 -b Prints info about the bucket when used with a bucket URL.
208 -h When used with -l, prints object sizes in human readable format
209 (e.g., 1 KiB, 234 MiB, 2 GiB, etc.)
211 -p proj_id Specifies the project ID to use for listing buckets.
213 -R, -r Requests a recursive listing.
215 -a Includes non-current object versions / generations in the listing
216 (only useful with a versioning-enabled bucket). If combined with
217 -l option also prints metageneration for each listed object.
219 -e Include ETag in long listing (-l) output.
223 class LsCommand(Command
):
224 """Implementation of gsutil ls command."""
226 # Command specification. See base class for documentation.
227 command_spec
= Command
.CreateCommandSpec(
229 command_name_aliases
=['dir', 'list'],
230 usage_synopsis
=_SYNOPSIS
,
233 supported_sub_args
='aeblLhp:rR',
235 provider_url_ok
=True,
237 gs_api_support
=[ApiSelector
.XML
, ApiSelector
.JSON
],
238 gs_default_api
=ApiSelector
.JSON
,
240 CommandArgument
.MakeZeroOrMoreCloudURLsArgument()
243 # Help specification. See help_provider.py for documentation.
244 help_spec
= Command
.HelpSpec(
246 help_name_aliases
=['dir', 'list'],
247 help_type
='command_help',
248 help_one_line_summary
='List providers, buckets, or objects',
249 help_text
=_DETAILED_HELP_TEXT
,
250 subcommand_help_text
={},
253 def _PrintBucketInfo(self
, bucket_blr
, listing_style
):
254 """Print listing info for given bucket.
257 bucket_blr: BucketListingReference for the bucket being listed
258 listing_style: ListingStyle enum describing type of output desired.
261 Tuple (total objects, total bytes) in the bucket.
263 if (listing_style
== ListingStyle
.SHORT
or
264 listing_style
== ListingStyle
.LONG
):
267 # listing_style == ListingStyle.LONG_LONG:
268 # We're guaranteed by the caller that the root object is populated.
269 bucket
= bucket_blr
.root_object
270 location_constraint
= bucket
.location
271 storage_class
= bucket
.storageClass
272 fields
= {'bucket': bucket_blr
.url_string
,
273 'storage_class': storage_class
,
274 'location_constraint': location_constraint
,
275 'acl': AclTranslation
.JsonFromMessage(bucket
.acl
),
276 'default_acl': AclTranslation
.JsonFromMessage(
277 bucket
.defaultObjectAcl
)}
279 fields
['versioning'] = bucket
.versioning
and bucket
.versioning
.enabled
280 fields
['website_config'] = 'Present' if bucket
.website
else 'None'
281 fields
['logging_config'] = 'Present' if bucket
.logging
else 'None'
282 fields
['cors_config'] = 'Present' if bucket
.cors
else 'None'
283 fields
['lifecycle_config'] = 'Present' if bucket
.lifecycle
else 'None'
285 # For field values that are multiline, add indenting to make it look
288 previous_value
= fields
[key
]
289 if (not isinstance(previous_value
, basestring
) or
290 '\n' not in previous_value
):
292 new_value
= previous_value
.replace('\n', '\n\t ')
293 # Start multiline values on a new line if they aren't already.
294 if not new_value
.startswith('\n'):
295 new_value
= '\n\t ' + new_value
296 fields
[key
] = new_value
299 '\tStorage class:\t\t\t{storage_class}\n'
300 '\tLocation constraint:\t\t{location_constraint}\n'
301 '\tVersioning enabled:\t\t{versioning}\n'
302 '\tLogging configuration:\t\t{logging_config}\n'
303 '\tWebsite configuration:\t\t{website_config}\n'
304 '\tCORS configuration: \t\t{cors_config}\n'
305 '\tLifecycle configuration:\t{lifecycle_config}\n'
306 '\tACL:\t\t\t\t{acl}\n'
307 '\tDefault ACL:\t\t\t{default_acl}'.format(**fields
))
308 if bucket_blr
.storage_url
.scheme
== 's3':
309 print('Note: this is an S3 bucket so configuration values may be '
310 'blank. To retrieve bucket configuration values, use '
311 'individual configuration commands such as gsutil acl get '
314 def _PrintLongListing(self
, bucket_listing_ref
):
315 """Prints an object with ListingStyle.LONG."""
316 obj
= bucket_listing_ref
.root_object
317 url_str
= bucket_listing_ref
.url_string
318 if (obj
.metadata
and S3_DELETE_MARKER_GUID
in
319 obj
.metadata
.additionalProperties
):
323 url_str
+= '<DeleteMarker>'
325 size_string
= (MakeHumanReadable(obj
.size
)
326 if self
.human_readable
else str(obj
.size
))
330 timestamp
= JSON_TIMESTAMP_RE
.sub(
331 r
'\1T\2Z', str(obj
.updated
).decode(UTF8
).encode('ascii'))
332 printstr
= '%(size)10s %(timestamp)s %(url)s'
334 encoded_metagen
= None
335 if self
.all_versions
:
336 printstr
+= ' metageneration=%(metageneration)s'
337 encoded_metagen
= str(obj
.metageneration
).encode(UTF8
)
338 if self
.include_etag
:
339 printstr
+= ' etag=%(etag)s'
340 encoded_etag
= obj
.etag
.encode(UTF8
)
343 'timestamp': timestamp
,
344 'url': url_str
.encode(UTF8
),
345 'metageneration': encoded_metagen
,
348 print printstr
% format_args
349 return (num_objs
, num_bytes
)
351 def RunCommand(self
):
352 """Command entry point for the ls command."""
353 got_nomatch_errors
= False
354 got_bucket_nomatch_errors
= False
355 listing_style
= ListingStyle
.SHORT
356 get_bucket_info
= False
357 self
.recursion_requested
= False
358 self
.all_versions
= False
359 self
.include_etag
= False
360 self
.human_readable
= False
362 for o
, a
in self
.sub_opts
:
364 self
.all_versions
= True
366 self
.include_etag
= True
368 get_bucket_info
= True
370 self
.human_readable
= True
372 listing_style
= ListingStyle
.LONG
374 listing_style
= ListingStyle
.LONG_LONG
377 elif o
== '-r' or o
== '-R':
378 self
.recursion_requested
= True
381 # default to listing all gs buckets
382 self
.args
= ['gs://']
387 def MaybePrintBucketHeader(blr
):
388 if len(self
.args
) > 1:
389 print '%s:' % blr
.url_string
.encode(UTF8
)
390 print_bucket_header
= MaybePrintBucketHeader
392 for url_str
in self
.args
:
393 storage_url
= StorageUrlFromString(url_str
)
394 if storage_url
.IsFileUrl():
395 raise CommandException('Only cloud URLs are supported for %s'
398 if (listing_style
== ListingStyle
.SHORT
or
399 listing_style
== ListingStyle
.LONG
):
400 bucket_fields
= ['id']
401 elif listing_style
== ListingStyle
.LONG_LONG
:
402 bucket_fields
= ['location', 'storageClass', 'versioning', 'acl',
403 'defaultObjectAcl', 'website', 'logging', 'cors',
405 if storage_url
.IsProvider():
406 # Provider URL: use bucket wildcard to list buckets.
407 for blr
in self
.WildcardIterator(
408 '%s://*' % storage_url
.scheme
).IterBuckets(
409 bucket_fields
=bucket_fields
):
410 self
._PrintBucketInfo
(blr
, listing_style
)
411 elif storage_url
.IsBucket() and get_bucket_info
:
412 # ls -b bucket listing request: List info about bucket(s).
414 for blr
in self
.WildcardIterator(url_str
).IterBuckets(
415 bucket_fields
=bucket_fields
):
416 if not ContainsWildcard(url_str
) and not blr
.root_object
:
417 # Iterator does not make an HTTP call for non-wildcarded
418 # listings with fields=='id'. Ensure the bucket exists by calling
420 self
.gsutil_api
.GetBucket(
421 blr
.storage_url
.bucket_name
,
422 fields
=['id'], provider
=storage_url
.scheme
)
423 self
._PrintBucketInfo
(blr
, listing_style
)
425 if not ContainsWildcard(url_str
) and not total_buckets
:
426 got_bucket_nomatch_errors
= True
428 # URL names a bucket, object, or object subdir ->
429 # list matching object(s) / subdirs.
430 def _PrintPrefixLong(blr
):
431 print '%-33s%s' % ('', blr
.url_string
.encode(UTF8
))
433 if listing_style
== ListingStyle
.SHORT
:
434 # ls helper by default readies us for a short listing.
435 ls_helper
= LsHelper(self
.WildcardIterator
, self
.logger
,
436 all_versions
=self
.all_versions
,
437 print_bucket_header_func
=print_bucket_header
,
438 should_recurse
=self
.recursion_requested
)
439 elif listing_style
== ListingStyle
.LONG
:
440 bucket_listing_fields
= ['name', 'updated', 'size']
441 if self
.all_versions
:
442 bucket_listing_fields
.extend(['generation', 'metageneration'])
443 if self
.include_etag
:
444 bucket_listing_fields
.append('etag')
446 ls_helper
= LsHelper(self
.WildcardIterator
, self
.logger
,
447 print_object_func
=self
._PrintLongListing
,
448 print_dir_func
=_PrintPrefixLong
,
449 print_bucket_header_func
=print_bucket_header
,
450 all_versions
=self
.all_versions
,
451 should_recurse
=self
.recursion_requested
,
452 fields
=bucket_listing_fields
)
454 elif listing_style
== ListingStyle
.LONG_LONG
:
456 bucket_listing_fields
= None
457 ls_helper
= LsHelper(self
.WildcardIterator
, self
.logger
,
458 print_object_func
=PrintFullInfoAboutObject
,
459 print_dir_func
=_PrintPrefixLong
,
460 print_bucket_header_func
=print_bucket_header
,
461 all_versions
=self
.all_versions
,
462 should_recurse
=self
.recursion_requested
,
463 fields
=bucket_listing_fields
)
465 raise CommandException('Unknown listing style: %s' % listing_style
)
467 exp_dirs
, exp_objs
, exp_bytes
= ls_helper
.ExpandUrlAndPrint(storage_url
)
468 if storage_url
.IsObject() and exp_objs
== 0 and exp_dirs
== 0:
469 got_nomatch_errors
= True
470 total_bytes
+= exp_bytes
471 total_objs
+= exp_objs
473 if total_objs
and listing_style
!= ListingStyle
.SHORT
:
474 print ('TOTAL: %d objects, %d bytes (%s)' %
475 (total_objs
, total_bytes
, MakeHumanReadable(float(total_bytes
))))
476 if got_nomatch_errors
:
477 raise CommandException('One or more URLs matched no objects.')
478 if got_bucket_nomatch_errors
:
479 raise NotFoundException('One or more bucket URLs matched no buckets.')