1 # -*- coding: utf-8 -*-
2 # Copyright 2015 Google Inc. All Rights Reserved.
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 """Helper functions for tracker file functionality."""
22 from boto
import config
23 from gslib
.exception
import CommandException
24 from gslib
.util
import CreateDirIfNeeded
25 from gslib
.util
import GetGsutilStateDir
26 from gslib
.util
import ResumableThreshold
27 from gslib
.util
import UTF8
29 # The maximum length of a file name can vary wildly between different
30 # operating systems, so we always ensure that tracker files are less
31 # than 100 characters in order to avoid any such issues.
32 MAX_TRACKER_FILE_NAME_LENGTH
= 100
35 TRACKER_FILE_UNWRITABLE_EXCEPTION_TEXT
= (
36 'Couldn\'t write tracker file (%s): %s. This can happen if gsutil is '
37 'configured to save tracker files to an unwritable directory)')
40 class TrackerFileType(object):
43 PARALLEL_UPLOAD
= 'parallel_upload'
47 def _HashFilename(filename
):
48 """Apply a hash function (SHA1) to shorten the passed file name.
50 The spec for the hashed file name is as follows:
52 TRACKER_<hash>_<trailing>
54 where hash is a SHA1 hash on the original file name and trailing is
55 the last 16 chars from the original file name. Max file name lengths
56 vary by operating system so the goal of this function is to ensure
57 the hashed version takes fewer than 100 characters.
60 filename: file name to be hashed.
63 shorter, hashed version of passed file name
65 if isinstance(filename
, unicode):
66 filename
= filename
.encode(UTF8
)
68 filename
= unicode(filename
, UTF8
).encode(UTF8
)
69 m
= hashlib
.sha1(filename
)
70 return 'TRACKER_' + m
.hexdigest() + '.' + filename
[-16:]
73 def CreateTrackerDirIfNeeded():
74 """Looks up or creates the gsutil tracker file directory.
76 This is the configured directory where gsutil keeps its resumable transfer
77 tracker files. This function creates it if it doesn't already exist.
80 The pathname to the tracker directory.
82 tracker_dir
= config
.get(
83 'GSUtil', 'resumable_tracker_dir',
84 os
.path
.join(GetGsutilStateDir(), 'tracker-files'))
85 CreateDirIfNeeded(tracker_dir
)
89 def GetRewriteTrackerFilePath(src_bucket_name
, src_obj_name
, dst_bucket_name
,
90 dst_obj_name
, api_selector
):
91 """Gets the tracker file name described by the arguments.
94 src_bucket_name: Source bucket (string).
95 src_obj_name: Source object (string).
96 dst_bucket_name: Destination bucket (string).
97 dst_obj_name: Destination object (string)
98 api_selector: API to use for this operation.
101 File path to tracker file.
103 # Encode the src and dest bucket and object names into the tracker file
105 res_tracker_file_name
= (
106 re
.sub('[/\\\\]', '_', 'rewrite__%s__%s__%s__%s__%s.token' %
107 (src_bucket_name
, src_obj_name
, dst_bucket_name
,
108 dst_obj_name
, api_selector
)))
110 return _HashAndReturnPath(res_tracker_file_name
, TrackerFileType
.REWRITE
)
113 def GetTrackerFilePath(dst_url
, tracker_file_type
, api_selector
, src_url
=None):
114 """Gets the tracker file name described by the arguments.
117 dst_url: Destination URL for tracker file.
118 tracker_file_type: TrackerFileType for this operation.
119 api_selector: API to use for this operation.
120 src_url: Source URL for the source file name for parallel uploads.
123 File path to tracker file.
125 if tracker_file_type
== TrackerFileType
.UPLOAD
:
126 # Encode the dest bucket and object name into the tracker file name.
127 res_tracker_file_name
= (
128 re
.sub('[/\\\\]', '_', 'resumable_upload__%s__%s__%s.url' %
129 (dst_url
.bucket_name
, dst_url
.object_name
, api_selector
)))
130 elif tracker_file_type
== TrackerFileType
.DOWNLOAD
:
131 # Encode the fully-qualified dest file name into the tracker file name.
132 res_tracker_file_name
= (
133 re
.sub('[/\\\\]', '_', 'resumable_download__%s__%s.etag' %
134 (os
.path
.realpath(dst_url
.object_name
), api_selector
)))
135 elif tracker_file_type
== TrackerFileType
.PARALLEL_UPLOAD
:
136 # Encode the dest bucket and object names as well as the source file name
137 # into the tracker file name.
138 res_tracker_file_name
= (
139 re
.sub('[/\\\\]', '_', 'parallel_upload__%s__%s__%s__%s.url' %
140 (dst_url
.bucket_name
, dst_url
.object_name
,
141 src_url
, api_selector
)))
142 elif tracker_file_type
== TrackerFileType
.REWRITE
:
143 # Should use GetRewriteTrackerFilePath instead.
144 raise NotImplementedError()
146 return _HashAndReturnPath(res_tracker_file_name
, tracker_file_type
)
149 def _HashAndReturnPath(res_tracker_file_name
, tracker_file_type
):
150 resumable_tracker_dir
= CreateTrackerDirIfNeeded()
151 hashed_tracker_file_name
= _HashFilename(res_tracker_file_name
)
152 tracker_file_name
= '%s_%s' % (str(tracker_file_type
).lower(),
153 hashed_tracker_file_name
)
154 tracker_file_path
= '%s%s%s' % (resumable_tracker_dir
, os
.sep
,
156 assert len(tracker_file_name
) < MAX_TRACKER_FILE_NAME_LENGTH
157 return tracker_file_path
160 def DeleteTrackerFile(tracker_file_name
):
161 if tracker_file_name
and os
.path
.exists(tracker_file_name
):
162 os
.unlink(tracker_file_name
)
165 def HashRewriteParameters(
166 src_obj_metadata
, dst_obj_metadata
, projection
, src_generation
=None,
167 gen_match
=None, meta_gen_match
=None, canned_acl
=None, fields
=None,
168 max_bytes_per_call
=None):
169 """Creates an MD5 hex digest of the parameters for a rewrite call.
171 Resuming rewrites requires that the input parameters are identical. Thus,
172 the rewrite tracker file needs to represent the input parameters. For
173 easy comparison, hash the input values. If a user does a performs a
174 same-source/same-destination rewrite via a different command (for example,
175 with a changed ACL), the hashes will not match and we will restart the
176 rewrite from the beginning.
179 src_obj_metadata: apitools Object describing source object. Must include
180 bucket, name, and etag.
181 dst_obj_metadata: apitools Object describing destination object. Must
182 include bucket and object name
183 projection: Projection used for the API call.
184 src_generation: Optional source generation.
185 gen_match: Optional generation precondition.
186 meta_gen_match: Optional metageneration precondition.
187 canned_acl: Optional canned ACL string.
188 fields: Optional fields to include in response.
189 max_bytes_per_call: Optional maximum bytes rewritten per call.
192 MD5 hex digest Hash of the input parameters, or None if required parameters
195 if (not src_obj_metadata
or
196 not src_obj_metadata
.bucket
or
197 not src_obj_metadata
.name
or
198 not src_obj_metadata
.etag
or
199 not dst_obj_metadata
or
200 not dst_obj_metadata
.bucket
or
201 not dst_obj_metadata
.name
or
204 md5_hash
= hashlib
.md5()
206 src_obj_metadata
, dst_obj_metadata
, projection
, src_generation
,
207 gen_match
, meta_gen_match
, canned_acl
, fields
, max_bytes_per_call
):
208 md5_hash
.update(str(input_param
))
209 return md5_hash
.hexdigest()
212 def ReadRewriteTrackerFile(tracker_file_name
, rewrite_params_hash
):
213 """Attempts to read a rewrite tracker file.
216 tracker_file_name: Tracker file path string.
217 rewrite_params_hash: MD5 hex digest of rewrite call parameters constructed
218 by HashRewriteParameters.
221 String rewrite_token for resuming rewrite requests if a matching tracker
222 file exists, None otherwise (which will result in starting a new rewrite).
224 # Check to see if we already have a matching tracker file.
226 if not rewrite_params_hash
:
229 tracker_file
= open(tracker_file_name
, 'r')
230 existing_hash
= tracker_file
.readline().rstrip('\n')
231 if existing_hash
== rewrite_params_hash
:
232 # Next line is the rewrite token.
233 return tracker_file
.readline().rstrip('\n')
235 # Ignore non-existent file (happens first time a rewrite is attempted.
236 if e
.errno
!= errno
.ENOENT
:
237 print('Couldn\'t read Copy tracker file (%s): %s. Restarting copy '
239 (tracker_file_name
, e
.strerror
))
245 def WriteRewriteTrackerFile(tracker_file_name
, rewrite_params_hash
,
247 """Writes a rewrite tracker file.
250 tracker_file_name: Tracker file path string.
251 rewrite_params_hash: MD5 hex digest of rewrite call parameters constructed
252 by HashRewriteParameters.
253 rewrite_token: Rewrite token string returned by the service.
255 _WriteTrackerFile(tracker_file_name
, '%s\n%s\n' % (rewrite_params_hash
,
259 def ReadOrCreateDownloadTrackerFile(src_obj_metadata
, dst_url
,
261 """Checks for a download tracker file and creates one if it does not exist.
264 src_obj_metadata: Metadata for the source object. Must include
266 dst_url: Destination file StorageUrl.
267 api_selector: API mode to use (for tracker file naming).
270 True if the tracker file already exists (resume existing download),
271 False if we created a new tracker file (new download).
273 if src_obj_metadata
.size
< ResumableThreshold():
274 # Don't create a tracker file for a small downloads; cross-process resumes
275 # won't work, but restarting a small download is inexpensive.
278 assert src_obj_metadata
.etag
279 tracker_file_name
= GetTrackerFilePath(
280 dst_url
, TrackerFileType
.DOWNLOAD
, api_selector
)
283 # Check to see if we already have a matching tracker file.
285 tracker_file
= open(tracker_file_name
, 'r')
286 etag_value
= tracker_file
.readline().rstrip('\n')
287 if etag_value
== src_obj_metadata
.etag
:
290 # Ignore non-existent file (happens first time a download
291 # is attempted on an object), but warn user for other errors.
292 if e
.errno
!= errno
.ENOENT
:
293 print('Couldn\'t read URL tracker file (%s): %s. Restarting '
294 'download from scratch.' %
295 (tracker_file_name
, e
.strerror
))
300 # Otherwise, create a new tracker file and start from scratch.
301 _WriteTrackerFile(tracker_file_name
, '%s\n' % src_obj_metadata
.etag
)
304 def _WriteTrackerFile(tracker_file_name
, data
):
305 """Creates a tracker file, storing the input data."""
307 with os
.fdopen(os
.open(tracker_file_name
,
308 os
.O_WRONLY | os
.O_CREAT
, 0600), 'w') as tf
:
311 except (IOError, OSError) as e
:
312 raise RaiseUnwritableTrackerFileException(tracker_file_name
, e
.strerror
)
315 def RaiseUnwritableTrackerFileException(tracker_file_name
, error_str
):
316 """Raises an exception when unable to write the tracker file."""
317 raise CommandException(TRACKER_FILE_UNWRITABLE_EXCEPTION_TEXT
%
318 (tracker_file_name
, error_str
))