1 # -*- coding: utf-8 -*-
2 # Copyright 2011 Google Inc. All Rights Reserved.
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 """Implementation of Unix-like rm command for cloud storage providers."""
17 from __future__
import absolute_import
19 from gslib
.cloud_api
import NotEmptyException
20 from gslib
.cloud_api
import ServiceException
21 from gslib
.command
import Command
22 from gslib
.command
import GetFailureCount
23 from gslib
.command
import ResetFailureCount
24 from gslib
.command_argument
import CommandArgument
25 from gslib
.cs_api_map
import ApiSelector
26 from gslib
.exception
import CommandException
27 from gslib
.name_expansion
import NameExpansionIterator
28 from gslib
.storage_url
import StorageUrlFromString
29 from gslib
.translation_helper
import PreconditionsFromHeaders
30 from gslib
.util
import GetCloudApiInstance
31 from gslib
.util
import NO_MAX
32 from gslib
.util
import Retry
33 from gslib
.util
import StdinIterator
37 gsutil rm [-f] [-r] url...
38 gsutil rm [-f] [-r] -I
41 _DETAILED_HELP_TEXT
= ("""
47 The gsutil rm command removes objects.
48 For example, the command:
50 gsutil rm gs://bucket/subdir/*
52 will remove all objects in gs://bucket/subdir, but not in any of its
53 sub-directories. In contrast:
55 gsutil rm gs://bucket/subdir/**
57 will remove all objects under gs://bucket/subdir or any of its
60 You can also use the -r option to specify recursive object deletion. Thus, for
61 example, either of the following two commands will remove gs://bucket/subdir
62 and all objects and subdirectories under it:
64 gsutil rm gs://bucket/subdir**
65 gsutil rm -r gs://bucket/subdir
67 The -r option will also delete all object versions in the subdirectory for
68 versioning-enabled buckets, whereas the ** command will only delete the live
69 version of each object in the subdirectory.
71 Running gsutil rm -r on a bucket will delete all versions of all objects in
72 the bucket, and then delete the bucket:
74 gsutil rm -r gs://bucket
76 If you want to delete all objects in the bucket, but not the bucket itself,
77 this command will work:
79 gsutil rm gs://bucket/**
81 If you have a large number of objects to remove you might want to use the
82 gsutil -m option, to perform a parallel (multi-threaded/multi-processing)
85 gsutil -m rm -r gs://my_bucket/subdir
87 You can pass a list of URLs (one per line) to remove on stdin instead of as
88 command line arguments by using the -I option. This allows you to use gsutil
89 in a pipeline to remove objects identified by a program, such as:
91 some_program | gsutil -m rm -I
93 The contents of stdin can name cloud URLs and wildcards of cloud URLs.
95 Note that gsutil rm will refuse to remove files from the local
96 file system. For example this will fail:
100 WARNING: Object removal cannot be undone. Google Cloud Storage is designed
101 to give developers a high amount of flexibility and control over their data,
102 and Google maintains strict controls over the processing and purging of
103 deleted data. To protect yourself from mistakes, you can configure object
104 versioning on your bucket(s). See 'gsutil help versions' for details.
107 <B>DATA RESTORATION FROM ACCIDENTAL DELETION OR OVERWRITES</B>
108 Google Cloud Storage does not provide support for restoring data lost
109 or overwritten due to customer errors. If you have concerns that your
110 application software (or your users) may at some point erroneously delete or
111 overwrite data, you can protect yourself from that risk by enabling Object
112 Versioning (see "gsutil help versioning"). Doing so increases storage costs,
113 which can be partially mitigated by configuring Lifecycle Management to delete
114 older object versions (see "gsutil help lifecycle").
118 -f Continues silently (without printing error messages) despite
119 errors when removing multiple objects. If some of the objects
120 could not be removed, gsutil's exit status will be non-zero even
121 if this flag is set. This option is implicitly set when running
124 -I Causes gsutil to read the list of objects to remove from stdin.
125 This allows you to run a program that generates the list of
128 -R, -r Causes bucket or bucket subdirectory contents (all objects and
129 subdirectories that it contains) to be removed recursively. If
130 used with a bucket-only URL (like gs://bucket), after deleting
131 objects and subdirectories gsutil will delete the bucket. The -r
132 flag implies the -a flag and will delete all object versions.
134 -a Delete all versions of an object.
138 def _RemoveExceptionHandler(cls
, e
):
139 """Simple exception handler to allow post-completion status."""
140 if not cls
.continue_on_error
:
141 cls
.logger
.error(str(e
))
142 cls
.everything_removed_okay
= False
145 # pylint: disable=unused-argument
146 def _RemoveFoldersExceptionHandler(cls
, e
):
147 """When removing folders, we don't mind if none exist."""
148 if (isinstance(e
, CommandException
.__class
__) and
149 'No URLs matched' in e
.message
):
155 def _RemoveFuncWrapper(cls
, name_expansion_result
, thread_state
=None):
156 cls
.RemoveFunc(name_expansion_result
, thread_state
=thread_state
)
159 class RmCommand(Command
):
160 """Implementation of gsutil rm command."""
162 # Command specification. See base class for documentation.
163 command_spec
= Command
.CreateCommandSpec(
165 command_name_aliases
=['del', 'delete', 'remove'],
166 usage_synopsis
=_SYNOPSIS
,
169 supported_sub_args
='afIrR',
171 provider_url_ok
=False,
173 gs_api_support
=[ApiSelector
.XML
, ApiSelector
.JSON
],
174 gs_default_api
=ApiSelector
.JSON
,
176 CommandArgument
.MakeZeroOrMoreCloudURLsArgument()
179 # Help specification. See help_provider.py for documentation.
180 help_spec
= Command
.HelpSpec(
182 help_name_aliases
=['del', 'delete', 'remove'],
183 help_type
='command_help',
184 help_one_line_summary
='Remove objects',
185 help_text
=_DETAILED_HELP_TEXT
,
186 subcommand_help_text
={},
189 def RunCommand(self
):
190 """Command entry point for the rm command."""
191 # self.recursion_requested is initialized in command.py (so it can be
192 # checked in parent class for all commands).
193 self
.continue_on_error
= False
194 self
.read_args_from_stdin
= False
195 self
.all_versions
= False
197 for o
, unused_a
in self
.sub_opts
:
199 self
.all_versions
= True
201 self
.continue_on_error
= True
203 self
.read_args_from_stdin
= True
204 elif o
== '-r' or o
== '-R':
205 self
.recursion_requested
= True
206 self
.all_versions
= True
208 if self
.read_args_from_stdin
:
210 raise CommandException('No arguments allowed with the -I flag.')
211 url_strs
= StdinIterator()
214 raise CommandException('The rm command (without -I) expects at '
218 bucket_urls_to_delete
= []
219 bucket_strings_to_delete
= []
220 if self
.recursion_requested
:
221 bucket_fields
= ['id']
222 for url_str
in url_strs
:
223 url
= StorageUrlFromString(url_str
)
224 if url
.IsBucket() or url
.IsProvider():
225 for blr
in self
.WildcardIterator(url_str
).IterBuckets(
226 bucket_fields
=bucket_fields
):
227 bucket_urls_to_delete
.append(blr
.storage_url
)
228 bucket_strings_to_delete
.append(url_str
)
230 self
.preconditions
= PreconditionsFromHeaders(self
.headers
or {})
232 # Used to track if any files failed to be removed.
233 self
.everything_removed_okay
= True
236 # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
237 name_expansion_iterator
= NameExpansionIterator(
238 self
.command_name
, self
.debug
, self
.logger
, self
.gsutil_api
,
239 url_strs
, self
.recursion_requested
, project_id
=self
.project_id
,
240 all_versions
=self
.all_versions
,
241 continue_on_error
=self
.continue_on_error
or self
.parallel_operations
)
243 # Perform remove requests in parallel (-m) mode, if requested, using
244 # configured number of parallel processes and threads. Otherwise,
245 # perform requests with sequential function calls in current process.
246 self
.Apply(_RemoveFuncWrapper
, name_expansion_iterator
,
247 _RemoveExceptionHandler
,
248 fail_on_error
=(not self
.continue_on_error
))
250 # Assuming the bucket has versioning enabled, url's that don't map to
251 # objects should throw an error even with all_versions, since the prior
252 # round of deletes only sends objects to a history table.
253 # This assumption that rm -a is only called for versioned buckets should be
254 # corrected, but the fix is non-trivial.
255 except CommandException
as e
:
256 # Don't raise if there are buckets to delete -- it's valid to say:
257 # gsutil rm -r gs://some_bucket
258 # if the bucket is empty.
259 if not bucket_urls_to_delete
and not self
.continue_on_error
:
261 # Reset the failure count if we failed due to an empty bucket that we're
263 msg
= 'No URLs matched: '
265 parts
= str(e
).split(msg
)
266 if len(parts
) == 2 and parts
[1] in bucket_strings_to_delete
:
268 except ServiceException
, e
:
269 if not self
.continue_on_error
:
272 if not self
.everything_removed_okay
and not self
.continue_on_error
:
273 raise CommandException('Some files could not be removed.')
275 # If this was a gsutil rm -r command covering any bucket subdirs,
276 # remove any dir_$folder$ objects (which are created by various web UI
277 # tools to simulate folders).
278 if self
.recursion_requested
:
279 had_previous_failures
= GetFailureCount() > 0
280 folder_object_wildcards
= []
281 for url_str
in url_strs
:
282 url
= StorageUrlFromString(url_str
)
284 folder_object_wildcards
.append('%s**_$folder$' % url_str
)
285 if folder_object_wildcards
:
286 self
.continue_on_error
= True
288 name_expansion_iterator
= NameExpansionIterator(
289 self
.command_name
, self
.debug
,
290 self
.logger
, self
.gsutil_api
,
291 folder_object_wildcards
, self
.recursion_requested
,
292 project_id
=self
.project_id
,
293 all_versions
=self
.all_versions
)
294 # When we're removing folder objects, always continue on error
295 self
.Apply(_RemoveFuncWrapper
, name_expansion_iterator
,
296 _RemoveFoldersExceptionHandler
,
298 except CommandException
as e
:
299 # Ignore exception from name expansion due to an absent folder file.
300 if not e
.reason
.startswith('No URLs matched:'):
302 if not had_previous_failures
:
305 # Now that all data has been deleted, delete any bucket URLs.
306 for url
in bucket_urls_to_delete
:
307 self
.logger
.info('Removing %s...', url
)
309 @Retry(NotEmptyException
, tries
=3, timeout_secs
=1)
310 def BucketDeleteWithRetry():
311 self
.gsutil_api
.DeleteBucket(url
.bucket_name
, provider
=url
.scheme
)
313 BucketDeleteWithRetry()
317 def RemoveFunc(self
, name_expansion_result
, thread_state
=None):
318 gsutil_api
= GetCloudApiInstance(self
, thread_state
=thread_state
)
320 exp_src_url
= name_expansion_result
.expanded_storage_url
321 self
.logger
.info('Removing %s...', exp_src_url
)
322 gsutil_api
.DeleteObject(
323 exp_src_url
.bucket_name
, exp_src_url
.object_name
,
324 preconditions
=self
.preconditions
, generation
=exp_src_url
.generation
,
325 provider
=exp_src_url
.scheme
)