1 # -*- coding: utf-8 -*-
2 # Copyright 2013 Google Inc. All Rights Reserved.
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 """Implementation of Unix-like du command for cloud storage providers."""
17 from __future__
import absolute_import
21 from gslib
.boto_translation
import S3_DELETE_MARKER_GUID
22 from gslib
.bucket_listing_ref
import BucketListingObject
23 from gslib
.command
import Command
24 from gslib
.command_argument
import CommandArgument
25 from gslib
.cs_api_map
import ApiSelector
26 from gslib
.exception
import CommandException
27 from gslib
.ls_helper
import LsHelper
28 from gslib
.storage_url
import ContainsWildcard
29 from gslib
.storage_url
import StorageUrlFromString
30 from gslib
.util
import MakeHumanReadable
31 from gslib
.util
import NO_MAX
32 from gslib
.util
import UTF8
38 _DETAILED_HELP_TEXT
= ("""
44 The du command displays the amount of space (in bytes) being used by the
45 objects in the file or object hierarchy under a given URL. The syntax emulates
46 the Linux du command (which stands for disk usage). For example, the command:
48 gsutil du -s gs://your-bucket/dir
50 will report the total space used by all objects under gs://your-bucket/dir and
55 -0 Ends each output line with a 0 byte rather than a newline. This
56 can be useful to make the output more easily machine-readable.
58 -a Includes non-current object versions / generations in the listing
59 (only useful with a versioning-enabled bucket). Also prints
60 generation and metageneration for each listed object.
62 -c Produce a grand total.
64 -e A pattern to exclude from reporting. Example: -e "*.o" would
65 exclude any object that ends in ".o". Can be specified multiple
68 -h Prints object sizes in human-readable format (e.g., 1 KiB,
71 -s Display only a summary total for each argument.
73 -X Similar to -e, but excludes patterns from the given file. The
74 patterns to exclude should be one per line.
78 To list the size of all objects in a bucket:
80 gsutil du gs://bucketname
82 To list the size of all objects underneath a prefix:
84 gsutil du gs://bucketname/prefix/*
86 To print the total number of bytes in a bucket, in human-readable form:
88 gsutil du -ch gs://bucketname
90 To see a summary of the total bytes in the two given buckets:
92 gsutil du -s gs://bucket1 gs://bucket2
94 To list the size of all objects in a versioned bucket, including objects that
97 gsutil du -a gs://bucketname
99 To list all objects in a bucket, except objects that end in ".bak",
100 with each object printed ending in a null byte:
102 gsutil du -e "*.bak" -0 gs://bucketname
104 To get a total of all buckets in a project with a grand total for an entire
107 gsutil -o GSUtil:default_project_id=project-name du -shc
111 class DuCommand(Command
):
112 """Implementation of gsutil du command."""
114 # Command specification. See base class for documentation.
115 command_spec
= Command
.CreateCommandSpec(
117 command_name_aliases
=[],
118 usage_synopsis
=_SYNOPSIS
,
121 supported_sub_args
='0ace:hsX:',
123 provider_url_ok
=True,
125 gs_api_support
=[ApiSelector
.XML
, ApiSelector
.JSON
],
126 gs_default_api
=ApiSelector
.JSON
,
128 CommandArgument
.MakeZeroOrMoreCloudURLsArgument()
131 # Help specification. See help_provider.py for documentation.
132 help_spec
= Command
.HelpSpec(
134 help_name_aliases
=[],
135 help_type
='command_help',
136 help_one_line_summary
='Display object size usage',
137 help_text
=_DETAILED_HELP_TEXT
,
138 subcommand_help_text
={},
141 def _PrintSummaryLine(self
, num_bytes
, name
):
142 size_string
= (MakeHumanReadable(num_bytes
)
143 if self
.human_readable
else str(num_bytes
))
144 sys
.stdout
.write('%(size)-10s %(name)s%(ending)s' % {
145 'size': size_string
, 'name': name
, 'ending': self
.line_ending
})
147 def _PrintInfoAboutBucketListingRef(self
, bucket_listing_ref
):
148 """Print listing info for given bucket_listing_ref.
151 bucket_listing_ref: BucketListing being listed.
154 Tuple (number of objects, object size)
157 Exception: if calling bug encountered.
159 obj
= bucket_listing_ref
.root_object
160 url_str
= bucket_listing_ref
.url_string
161 if (obj
.metadata
and S3_DELETE_MARKER_GUID
in
162 obj
.metadata
.additionalProperties
):
166 url_str
+= '<DeleteMarker>'
168 size_string
= (MakeHumanReadable(obj
.size
)
169 if self
.human_readable
else str(obj
.size
))
173 if not self
.summary_only
:
174 sys
.stdout
.write('%(size)-10s %(url)s%(ending)s' % {
176 'url': url_str
.encode(UTF8
),
177 'ending': self
.line_ending
})
179 return (num_objs
, num_bytes
)
181 def RunCommand(self
):
182 """Command entry point for the du command."""
183 self
.line_ending
= '\n'
184 self
.all_versions
= False
185 self
.produce_total
= False
186 self
.human_readable
= False
187 self
.summary_only
= False
188 self
.exclude_patterns
= []
190 for o
, a
in self
.sub_opts
:
192 self
.line_ending
= '\0'
194 self
.all_versions
= True
196 self
.produce_total
= True
198 self
.exclude_patterns
.append(a
)
200 self
.human_readable
= True
202 self
.summary_only
= True
212 self
.exclude_patterns
.append(line
)
217 # Default to listing all gs buckets.
218 self
.args
= ['gs://']
221 got_nomatch_errors
= False
223 def _PrintObjectLong(blr
):
224 return self
._PrintInfoAboutBucketListingRef
(blr
)
226 def _PrintNothing(unused_blr
=None):
229 def _PrintDirectory(num_bytes
, name
):
230 if not self
.summary_only
:
231 self
._PrintSummaryLine
(num_bytes
, name
)
233 for url_arg
in self
.args
:
234 top_level_storage_url
= StorageUrlFromString(url_arg
)
235 if top_level_storage_url
.IsFileUrl():
236 raise CommandException('Only cloud URLs are supported for %s'
238 bucket_listing_fields
= ['size']
240 ls_helper
= LsHelper(
241 self
.WildcardIterator
, self
.logger
,
242 print_object_func
=_PrintObjectLong
, print_dir_func
=_PrintNothing
,
243 print_dir_header_func
=_PrintNothing
,
244 print_dir_summary_func
=_PrintDirectory
,
245 print_newline_func
=_PrintNothing
, all_versions
=self
.all_versions
,
246 should_recurse
=True, exclude_patterns
=self
.exclude_patterns
,
247 fields
=bucket_listing_fields
)
249 # ls_helper expands to objects and prefixes, so perform a top-level
251 if top_level_storage_url
.IsProvider():
252 # Provider URL: use bucket wildcard to iterate over all buckets.
253 top_level_iter
= self
.WildcardIterator(
254 '%s://*' % top_level_storage_url
.scheme
).IterBuckets(
255 bucket_fields
=['id'])
256 elif top_level_storage_url
.IsBucket():
257 top_level_iter
= self
.WildcardIterator(
258 '%s://%s' % (top_level_storage_url
.scheme
,
259 top_level_storage_url
.bucket_name
)).IterBuckets(
260 bucket_fields
=['id'])
262 top_level_iter
= [BucketListingObject(top_level_storage_url
)]
264 for blr
in top_level_iter
:
265 storage_url
= blr
.storage_url
266 if storage_url
.IsBucket() and self
.summary_only
:
267 storage_url
= StorageUrlFromString(
268 storage_url
.CreatePrefixUrl(wildcard_suffix
='**'))
269 _
, exp_objs
, exp_bytes
= ls_helper
.ExpandUrlAndPrint(storage_url
)
270 if (storage_url
.IsObject() and exp_objs
== 0 and
271 ContainsWildcard(url_arg
) and not self
.exclude_patterns
):
272 got_nomatch_errors
= True
273 total_bytes
+= exp_bytes
275 if self
.summary_only
:
276 self
._PrintSummaryLine
(exp_bytes
, blr
.url_string
.rstrip('/'))
278 if self
.produce_total
:
279 self
._PrintSummaryLine
(total_bytes
, 'total')
281 if got_nomatch_errors
:
282 raise CommandException('One or more URLs matched no objects.')