Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / tools / telemetry / third_party / gsutilz / gslib / commands / setmeta.py
blob8208341e08cb6344d9a34d08afca6eadd9a062ff
1 # -*- coding: utf-8 -*-
2 # Copyright 2012 Google Inc. All Rights Reserved.
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 """Implementation of setmeta command for setting cloud object metadata."""
17 from __future__ import absolute_import
19 from gslib.cloud_api import AccessDeniedException
20 from gslib.cloud_api import PreconditionException
21 from gslib.cloud_api import Preconditions
22 from gslib.command import Command
23 from gslib.command_argument import CommandArgument
24 from gslib.cs_api_map import ApiSelector
25 from gslib.exception import CommandException
26 from gslib.name_expansion import NameExpansionIterator
27 from gslib.storage_url import StorageUrlFromString
28 from gslib.translation_helper import CopyObjectMetadata
29 from gslib.translation_helper import ObjectMetadataFromHeaders
30 from gslib.translation_helper import PreconditionsFromHeaders
31 from gslib.util import GetCloudApiInstance
32 from gslib.util import NO_MAX
33 from gslib.util import Retry
36 _SYNOPSIS = """
37 gsutil setmeta -h [header:value|header] ... url...
38 """
40 _DETAILED_HELP_TEXT = ("""
41 <B>SYNOPSIS</B>
42 """ + _SYNOPSIS + """
45 <B>DESCRIPTION</B>
46 The gsutil setmeta command allows you to set or remove the metadata on one
47 or more objects. It takes one or more header arguments followed by one or
48 more URLs, where each header argument is in one of two forms:
50 - if you specify header:value, it will set the given header on all
51 named objects.
53 - if you specify header (with no value), it will remove the given header
54 from all named objects.
56 For example, the following command would set the Content-Type and
57 Cache-Control and remove the Content-Disposition on the specified objects:
59 gsutil setmeta -h "Content-Type:text/html" \\
60 -h "Cache-Control:public, max-age=3600" \\
61 -h "Content-Disposition" gs://bucket/*.html
63 If you have a large number of objects to update you might want to use the
64 gsutil -m option, to perform a parallel (multi-threaded/multi-processing)
65 update:
67 gsutil -m setmeta -h "Content-Type:text/html" \\
68 -h "Cache-Control:public, max-age=3600" \\
69 -h "Content-Disposition" gs://bucket/*.html
71 You can also use the setmeta command to set custom metadata on an object:
73 gsutil setmeta -h "x-goog-meta-icecreamflavor:vanilla" gs://bucket/object
75 See "gsutil help metadata" for details about how you can set metadata
76 while uploading objects, what metadata fields can be set and the meaning of
77 these fields, use of custom metadata, and how to view currently set metadata.
79 NOTE: By default, publicly readable objects are served with a Cache-Control
80 header allowing such objects to be cached for 3600 seconds. For more details
81 about this default behavior see the CACHE-CONTROL section of
82 "gsutil help metadata". If you need to ensure that updates become visible
83 immediately, you should set a Cache-Control header of "Cache-Control:private,
84 max-age=0, no-transform" on such objects. You can do this with the command:
86 gsutil setmeta -h "Content-Type:text/html" \\
87 -h "Cache-Control:private, max-age=0, no-transform" gs://bucket/*.html
89 The setmeta command reads each object's current generation and metageneration
90 and uses those as preconditions unless they are otherwise specified by
91 top-level arguments. For example:
93 gsutil -h "x-goog-if-metageneration-match:2" setmeta
94 -h "x-goog-meta-icecreamflavor:vanilla"
96 will set the icecreamflavor:vanilla metadata if the current live object has a
97 metageneration of 2.
99 <B>OPTIONS</B>
100 -h Specifies a header:value to be added, or header to be removed,
101 from each named object.
102 """)
104 # Setmeta assumes a header-like model which doesn't line up with the JSON way
105 # of doing things. This list comes from functionality that was supported by
106 # gsutil3 at the time gsutil4 was released.
107 SETTABLE_FIELDS = ['cache-control', 'content-disposition',
108 'content-encoding', 'content-language',
109 'content-md5', 'content-type']
112 def _SetMetadataExceptionHandler(cls, e):
113 """Exception handler that maintains state about post-completion status."""
114 cls.logger.error(e)
115 cls.everything_set_okay = False
118 def _SetMetadataFuncWrapper(cls, name_expansion_result, thread_state=None):
119 cls.SetMetadataFunc(name_expansion_result, thread_state=thread_state)
122 class SetMetaCommand(Command):
123 """Implementation of gsutil setmeta command."""
125 # Command specification. See base class for documentation.
126 command_spec = Command.CreateCommandSpec(
127 'setmeta',
128 command_name_aliases=['setheader'],
129 usage_synopsis=_SYNOPSIS,
130 min_args=1,
131 max_args=NO_MAX,
132 supported_sub_args='h:rR',
133 file_url_ok=False,
134 provider_url_ok=False,
135 urls_start_arg=1,
136 gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
137 gs_default_api=ApiSelector.JSON,
138 argparse_arguments=[
139 CommandArgument.MakeZeroOrMoreCloudURLsArgument()
142 # Help specification. See help_provider.py for documentation.
143 help_spec = Command.HelpSpec(
144 help_name='setmeta',
145 help_name_aliases=['setheader'],
146 help_type='command_help',
147 help_one_line_summary='Set metadata on already uploaded objects',
148 help_text=_DETAILED_HELP_TEXT,
149 subcommand_help_text={},
152 def RunCommand(self):
153 """Command entry point for the setmeta command."""
154 headers = []
155 if self.sub_opts:
156 for o, a in self.sub_opts:
157 if o == '-h':
158 if 'x-goog-acl' in a or 'x-amz-acl' in a:
159 raise CommandException(
160 'gsutil setmeta no longer allows canned ACLs. Use gsutil acl '
161 'set ... to set canned ACLs.')
162 headers.append(a)
164 (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers)
166 self.metadata_change = metadata_plus
167 for header in metadata_minus:
168 self.metadata_change[header] = ''
170 if len(self.args) == 1 and not self.recursion_requested:
171 url = StorageUrlFromString(self.args[0])
172 if not (url.IsCloudUrl() and url.IsObject()):
173 raise CommandException('URL (%s) must name an object' % self.args[0])
175 # Used to track if any objects' metadata failed to be set.
176 self.everything_set_okay = True
178 self.preconditions = PreconditionsFromHeaders(self.headers)
180 name_expansion_iterator = NameExpansionIterator(
181 self.command_name, self.debug, self.logger, self.gsutil_api,
182 self.args, self.recursion_requested, all_versions=self.all_versions,
183 continue_on_error=self.parallel_operations)
185 try:
186 # Perform requests in parallel (-m) mode, if requested, using
187 # configured number of parallel processes and threads. Otherwise,
188 # perform requests with sequential function calls in current process.
189 self.Apply(_SetMetadataFuncWrapper, name_expansion_iterator,
190 _SetMetadataExceptionHandler, fail_on_error=True)
191 except AccessDeniedException as e:
192 if e.status == 403:
193 self._WarnServiceAccounts()
194 raise
196 if not self.everything_set_okay:
197 raise CommandException('Metadata for some objects could not be set.')
199 return 0
201 @Retry(PreconditionException, tries=3, timeout_secs=1)
202 def SetMetadataFunc(self, name_expansion_result, thread_state=None):
203 """Sets metadata on an object.
205 Args:
206 name_expansion_result: NameExpansionResult describing target object.
207 thread_state: gsutil Cloud API instance to use for the operation.
209 gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)
211 exp_src_url = name_expansion_result.expanded_storage_url
212 self.logger.info('Setting metadata on %s...', exp_src_url)
214 fields = ['generation', 'metadata', 'metageneration']
215 cloud_obj_metadata = gsutil_api.GetObjectMetadata(
216 exp_src_url.bucket_name, exp_src_url.object_name,
217 generation=exp_src_url.generation, provider=exp_src_url.scheme,
218 fields=fields)
220 preconditions = Preconditions(
221 gen_match=self.preconditions.gen_match,
222 meta_gen_match=self.preconditions.meta_gen_match)
223 if preconditions.gen_match is None:
224 preconditions.gen_match = cloud_obj_metadata.generation
225 if preconditions.meta_gen_match is None:
226 preconditions.meta_gen_match = cloud_obj_metadata.metageneration
228 # Patch handles the patch semantics for most metadata, but we need to
229 # merge the custom metadata field manually.
230 patch_obj_metadata = ObjectMetadataFromHeaders(self.metadata_change)
232 api = gsutil_api.GetApiSelector(provider=exp_src_url.scheme)
233 # For XML we only want to patch through custom metadata that has
234 # changed. For JSON we need to build the complete set.
235 if api == ApiSelector.XML:
236 pass
237 elif api == ApiSelector.JSON:
238 CopyObjectMetadata(patch_obj_metadata, cloud_obj_metadata,
239 override=True)
240 patch_obj_metadata = cloud_obj_metadata
241 # Patch body does not need the object generation and metageneration.
242 patch_obj_metadata.generation = None
243 patch_obj_metadata.metageneration = None
245 gsutil_api.PatchObjectMetadata(
246 exp_src_url.bucket_name, exp_src_url.object_name, patch_obj_metadata,
247 generation=exp_src_url.generation, preconditions=preconditions,
248 provider=exp_src_url.scheme)
250 def _ParseMetadataHeaders(self, headers):
251 """Validates and parses metadata changes from the headers argument.
253 Args:
254 headers: Header dict to validate and parse.
256 Returns:
257 (metadata_plus, metadata_minus): Tuple of header sets to add and remove.
259 metadata_minus = set()
260 cust_metadata_minus = set()
261 metadata_plus = {}
262 cust_metadata_plus = {}
263 # Build a count of the keys encountered from each plus and minus arg so we
264 # can check for dupe field specs.
265 num_metadata_plus_elems = 0
266 num_cust_metadata_plus_elems = 0
267 num_metadata_minus_elems = 0
268 num_cust_metadata_minus_elems = 0
270 for md_arg in headers:
271 parts = md_arg.split(':')
272 if len(parts) not in (1, 2):
273 raise CommandException(
274 'Invalid argument: must be either header or header:value (%s)' %
275 md_arg)
276 if len(parts) == 2:
277 (header, value) = parts
278 else:
279 (header, value) = (parts[0], None)
280 _InsistAsciiHeader(header)
281 # Translate headers to lowercase to match the casing assumed by our
282 # sanity-checking operations.
283 header = header.lower()
284 if value:
285 if _IsCustomMeta(header):
286 # Allow non-ASCII data for custom metadata fields.
287 cust_metadata_plus[header] = value
288 num_cust_metadata_plus_elems += 1
289 else:
290 # Don't unicode encode other fields because that would perturb their
291 # content (e.g., adding %2F's into the middle of a Cache-Control
292 # value).
293 _InsistAsciiHeaderValue(header, value)
294 value = str(value)
295 metadata_plus[header] = value
296 num_metadata_plus_elems += 1
297 else:
298 if _IsCustomMeta(header):
299 cust_metadata_minus.add(header)
300 num_cust_metadata_minus_elems += 1
301 else:
302 metadata_minus.add(header)
303 num_metadata_minus_elems += 1
305 if (num_metadata_plus_elems != len(metadata_plus)
306 or num_cust_metadata_plus_elems != len(cust_metadata_plus)
307 or num_metadata_minus_elems != len(metadata_minus)
308 or num_cust_metadata_minus_elems != len(cust_metadata_minus)
309 or metadata_minus.intersection(set(metadata_plus.keys()))):
310 raise CommandException('Each header must appear at most once.')
311 other_than_base_fields = (set(metadata_plus.keys())
312 .difference(SETTABLE_FIELDS))
313 other_than_base_fields.update(
314 metadata_minus.difference(SETTABLE_FIELDS))
315 for f in other_than_base_fields:
316 # This check is overly simple; it would be stronger to check, for each
317 # URL argument, whether f.startswith the
318 # provider metadata_prefix, but here we just parse the spec
319 # once, before processing any of the URLs. This means we will not
320 # detect if the user tries to set an x-goog-meta- field on an another
321 # provider's object, for example.
322 if not _IsCustomMeta(f):
323 raise CommandException(
324 'Invalid or disallowed header (%s).\nOnly these fields (plus '
325 'x-goog-meta-* fields) can be set or unset:\n%s' % (
326 f, sorted(list(SETTABLE_FIELDS))))
327 metadata_plus.update(cust_metadata_plus)
328 metadata_minus.update(cust_metadata_minus)
329 return (metadata_minus, metadata_plus)
332 def _InsistAscii(string, message):
333 if not all(ord(c) < 128 for c in string):
334 raise CommandException(message)
337 def _InsistAsciiHeader(header):
338 _InsistAscii(header, 'Invalid non-ASCII header (%s).' % header)
341 def _InsistAsciiHeaderValue(header, value):
342 _InsistAscii(
343 value, ('Invalid non-ASCII value (%s) was provided for header %s.'
344 % (value, header)))
347 def _IsCustomMeta(header):
348 return header.startswith('x-goog-meta-') or header.startswith('x-amz-meta-')