Only grant permissions to new extensions from sync if they have the expected version
[chromium-blink-merge.git] / tools / telemetry / third_party / gsutilz / gslib / tracker_file.py
blob4fddc8a13d5d81336fdc73d7e4077e3b68cb0690
1 # -*- coding: utf-8 -*-
2 # Copyright 2015 Google Inc. All Rights Reserved.
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 """Helper functions for tracker file functionality."""
17 import errno
18 import hashlib
19 import os
20 import re
22 from boto import config
23 from gslib.exception import CommandException
24 from gslib.util import CreateDirIfNeeded
25 from gslib.util import GetGsutilStateDir
26 from gslib.util import ResumableThreshold
27 from gslib.util import UTF8
29 # The maximum length of a file name can vary wildly between different
30 # operating systems, so we always ensure that tracker files are less
31 # than 100 characters in order to avoid any such issues.
32 MAX_TRACKER_FILE_NAME_LENGTH = 100
35 TRACKER_FILE_UNWRITABLE_EXCEPTION_TEXT = (
36 'Couldn\'t write tracker file (%s): %s. This can happen if gsutil is '
37 'configured to save tracker files to an unwritable directory)')
40 class TrackerFileType(object):
41 UPLOAD = 'upload'
42 DOWNLOAD = 'download'
43 PARALLEL_UPLOAD = 'parallel_upload'
44 REWRITE = 'rewrite'
47 def _HashFilename(filename):
48 """Apply a hash function (SHA1) to shorten the passed file name.
50 The spec for the hashed file name is as follows:
52 TRACKER_<hash>_<trailing>
54 where hash is a SHA1 hash on the original file name and trailing is
55 the last 16 chars from the original file name. Max file name lengths
56 vary by operating system so the goal of this function is to ensure
57 the hashed version takes fewer than 100 characters.
59 Args:
60 filename: file name to be hashed.
62 Returns:
63 shorter, hashed version of passed file name
64 """
65 if isinstance(filename, unicode):
66 filename = filename.encode(UTF8)
67 else:
68 filename = unicode(filename, UTF8).encode(UTF8)
69 m = hashlib.sha1(filename)
70 return 'TRACKER_' + m.hexdigest() + '.' + filename[-16:]
73 def CreateTrackerDirIfNeeded():
74 """Looks up or creates the gsutil tracker file directory.
76 This is the configured directory where gsutil keeps its resumable transfer
77 tracker files. This function creates it if it doesn't already exist.
79 Returns:
80 The pathname to the tracker directory.
81 """
82 tracker_dir = config.get(
83 'GSUtil', 'resumable_tracker_dir',
84 os.path.join(GetGsutilStateDir(), 'tracker-files'))
85 CreateDirIfNeeded(tracker_dir)
86 return tracker_dir
89 def GetRewriteTrackerFilePath(src_bucket_name, src_obj_name, dst_bucket_name,
90 dst_obj_name, api_selector):
91 """Gets the tracker file name described by the arguments.
93 Args:
94 src_bucket_name: Source bucket (string).
95 src_obj_name: Source object (string).
96 dst_bucket_name: Destination bucket (string).
97 dst_obj_name: Destination object (string)
98 api_selector: API to use for this operation.
100 Returns:
101 File path to tracker file.
103 # Encode the src and dest bucket and object names into the tracker file
104 # name.
105 res_tracker_file_name = (
106 re.sub('[/\\\\]', '_', 'rewrite__%s__%s__%s__%s__%s.token' %
107 (src_bucket_name, src_obj_name, dst_bucket_name,
108 dst_obj_name, api_selector)))
110 return _HashAndReturnPath(res_tracker_file_name, TrackerFileType.REWRITE)
113 def GetTrackerFilePath(dst_url, tracker_file_type, api_selector, src_url=None):
114 """Gets the tracker file name described by the arguments.
116 Args:
117 dst_url: Destination URL for tracker file.
118 tracker_file_type: TrackerFileType for this operation.
119 api_selector: API to use for this operation.
120 src_url: Source URL for the source file name for parallel uploads.
122 Returns:
123 File path to tracker file.
125 if tracker_file_type == TrackerFileType.UPLOAD:
126 # Encode the dest bucket and object name into the tracker file name.
127 res_tracker_file_name = (
128 re.sub('[/\\\\]', '_', 'resumable_upload__%s__%s__%s.url' %
129 (dst_url.bucket_name, dst_url.object_name, api_selector)))
130 elif tracker_file_type == TrackerFileType.DOWNLOAD:
131 # Encode the fully-qualified dest file name into the tracker file name.
132 res_tracker_file_name = (
133 re.sub('[/\\\\]', '_', 'resumable_download__%s__%s.etag' %
134 (os.path.realpath(dst_url.object_name), api_selector)))
135 elif tracker_file_type == TrackerFileType.PARALLEL_UPLOAD:
136 # Encode the dest bucket and object names as well as the source file name
137 # into the tracker file name.
138 res_tracker_file_name = (
139 re.sub('[/\\\\]', '_', 'parallel_upload__%s__%s__%s__%s.url' %
140 (dst_url.bucket_name, dst_url.object_name,
141 src_url, api_selector)))
142 elif tracker_file_type == TrackerFileType.REWRITE:
143 # Should use GetRewriteTrackerFilePath instead.
144 raise NotImplementedError()
146 return _HashAndReturnPath(res_tracker_file_name, tracker_file_type)
149 def _HashAndReturnPath(res_tracker_file_name, tracker_file_type):
150 resumable_tracker_dir = CreateTrackerDirIfNeeded()
151 hashed_tracker_file_name = _HashFilename(res_tracker_file_name)
152 tracker_file_name = '%s_%s' % (str(tracker_file_type).lower(),
153 hashed_tracker_file_name)
154 tracker_file_path = '%s%s%s' % (resumable_tracker_dir, os.sep,
155 tracker_file_name)
156 assert len(tracker_file_name) < MAX_TRACKER_FILE_NAME_LENGTH
157 return tracker_file_path
160 def DeleteTrackerFile(tracker_file_name):
161 if tracker_file_name and os.path.exists(tracker_file_name):
162 os.unlink(tracker_file_name)
165 def HashRewriteParameters(
166 src_obj_metadata, dst_obj_metadata, projection, src_generation=None,
167 gen_match=None, meta_gen_match=None, canned_acl=None, fields=None,
168 max_bytes_per_call=None):
169 """Creates an MD5 hex digest of the parameters for a rewrite call.
171 Resuming rewrites requires that the input parameters are identical. Thus,
172 the rewrite tracker file needs to represent the input parameters. For
173 easy comparison, hash the input values. If a user does a performs a
174 same-source/same-destination rewrite via a different command (for example,
175 with a changed ACL), the hashes will not match and we will restart the
176 rewrite from the beginning.
178 Args:
179 src_obj_metadata: apitools Object describing source object. Must include
180 bucket, name, and etag.
181 dst_obj_metadata: apitools Object describing destination object. Must
182 include bucket and object name
183 projection: Projection used for the API call.
184 src_generation: Optional source generation.
185 gen_match: Optional generation precondition.
186 meta_gen_match: Optional metageneration precondition.
187 canned_acl: Optional canned ACL string.
188 fields: Optional fields to include in response.
189 max_bytes_per_call: Optional maximum bytes rewritten per call.
191 Returns:
192 MD5 hex digest Hash of the input parameters, or None if required parameters
193 are missing.
195 if (not src_obj_metadata or
196 not src_obj_metadata.bucket or
197 not src_obj_metadata.name or
198 not src_obj_metadata.etag or
199 not dst_obj_metadata or
200 not dst_obj_metadata.bucket or
201 not dst_obj_metadata.name or
202 not projection):
203 return
204 md5_hash = hashlib.md5()
205 for input_param in (
206 src_obj_metadata, dst_obj_metadata, projection, src_generation,
207 gen_match, meta_gen_match, canned_acl, fields, max_bytes_per_call):
208 md5_hash.update(str(input_param))
209 return md5_hash.hexdigest()
212 def ReadRewriteTrackerFile(tracker_file_name, rewrite_params_hash):
213 """Attempts to read a rewrite tracker file.
215 Args:
216 tracker_file_name: Tracker file path string.
217 rewrite_params_hash: MD5 hex digest of rewrite call parameters constructed
218 by HashRewriteParameters.
220 Returns:
221 String rewrite_token for resuming rewrite requests if a matching tracker
222 file exists, None otherwise (which will result in starting a new rewrite).
224 # Check to see if we already have a matching tracker file.
225 tracker_file = None
226 if not rewrite_params_hash:
227 return
228 try:
229 tracker_file = open(tracker_file_name, 'r')
230 existing_hash = tracker_file.readline().rstrip('\n')
231 if existing_hash == rewrite_params_hash:
232 # Next line is the rewrite token.
233 return tracker_file.readline().rstrip('\n')
234 except IOError as e:
235 # Ignore non-existent file (happens first time a rewrite is attempted.
236 if e.errno != errno.ENOENT:
237 print('Couldn\'t read Copy tracker file (%s): %s. Restarting copy '
238 'from scratch.' %
239 (tracker_file_name, e.strerror))
240 finally:
241 if tracker_file:
242 tracker_file.close()
245 def WriteRewriteTrackerFile(tracker_file_name, rewrite_params_hash,
246 rewrite_token):
247 """Writes a rewrite tracker file.
249 Args:
250 tracker_file_name: Tracker file path string.
251 rewrite_params_hash: MD5 hex digest of rewrite call parameters constructed
252 by HashRewriteParameters.
253 rewrite_token: Rewrite token string returned by the service.
255 _WriteTrackerFile(tracker_file_name, '%s\n%s\n' % (rewrite_params_hash,
256 rewrite_token))
259 def ReadOrCreateDownloadTrackerFile(src_obj_metadata, dst_url,
260 api_selector):
261 """Checks for a download tracker file and creates one if it does not exist.
263 Args:
264 src_obj_metadata: Metadata for the source object. Must include
265 etag and size.
266 dst_url: Destination file StorageUrl.
267 api_selector: API mode to use (for tracker file naming).
269 Returns:
270 True if the tracker file already exists (resume existing download),
271 False if we created a new tracker file (new download).
273 if src_obj_metadata.size < ResumableThreshold():
274 # Don't create a tracker file for a small downloads; cross-process resumes
275 # won't work, but restarting a small download is inexpensive.
276 return False
278 assert src_obj_metadata.etag
279 tracker_file_name = GetTrackerFilePath(
280 dst_url, TrackerFileType.DOWNLOAD, api_selector)
281 tracker_file = None
283 # Check to see if we already have a matching tracker file.
284 try:
285 tracker_file = open(tracker_file_name, 'r')
286 etag_value = tracker_file.readline().rstrip('\n')
287 if etag_value == src_obj_metadata.etag:
288 return True
289 except IOError as e:
290 # Ignore non-existent file (happens first time a download
291 # is attempted on an object), but warn user for other errors.
292 if e.errno != errno.ENOENT:
293 print('Couldn\'t read URL tracker file (%s): %s. Restarting '
294 'download from scratch.' %
295 (tracker_file_name, e.strerror))
296 finally:
297 if tracker_file:
298 tracker_file.close()
300 # Otherwise, create a new tracker file and start from scratch.
301 _WriteTrackerFile(tracker_file_name, '%s\n' % src_obj_metadata.etag)
304 def _WriteTrackerFile(tracker_file_name, data):
305 """Creates a tracker file, storing the input data."""
306 try:
307 with os.fdopen(os.open(tracker_file_name,
308 os.O_WRONLY | os.O_CREAT, 0600), 'w') as tf:
309 tf.write(data)
310 return False
311 except (IOError, OSError) as e:
312 raise RaiseUnwritableTrackerFileException(tracker_file_name, e.strerror)
315 def RaiseUnwritableTrackerFileException(tracker_file_name, error_str):
316 """Raises an exception when unable to write the tracker file."""
317 raise CommandException(TRACKER_FILE_UNWRITABLE_EXCEPTION_TEXT %
318 (tracker_file_name, error_str))