Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / tools / telemetry / third_party / gsutilz / gslib / tests / test_rsync.py
blob0bf6c5ff485929aed7800915a6ab849d726cf02a
1 # -*- coding: utf-8 -*-
2 # Copyright 2014 Google Inc. All Rights Reserved.
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 """Integration tests for rsync command."""
17 import os
19 import crcmod
21 import gslib.tests.testcase as testcase
22 from gslib.tests.testcase.integration_testcase import SkipForS3
23 from gslib.tests.util import ObjectToURI as suri
24 from gslib.tests.util import PerformsFileToObjectUpload
25 from gslib.tests.util import SetBotoConfigForTest
26 from gslib.tests.util import unittest
27 from gslib.util import IS_WINDOWS
28 from gslib.util import Retry
29 from gslib.util import UsingCrcmodExtension
31 NO_CHANGES = 'Building synchronization state...\nStarting synchronization\n'
34 def _TailSet(start_point, listing):
35 """Returns set of object name tails.
37 Tails can be compared between source and dest, past the point at which rsync
38 was done. For example if test ran rsync gs://bucket1/dir gs://bucket2/dir2,
39 the tails for listings from bucket1 would start after "dir", while the tails
40 for listings from bucket2 would start after "dir2".
42 Args:
43 start_point: The target of the rsync command, e.g., for the above command it
44 would be gs://bucket1/dir for the bucket1 listing results and
45 gs://bucket2/dir2 for the bucket2 listing results.
46 listing: The listing over which to compute tail.
48 Returns:
49 Object name tails.
50 """
51 return set(l[len(start_point):] for l in listing.strip().split('\n'))
53 # TODO: Add inspection to the retry wrappers in this test suite where the state
54 # at the end of a retry block is depended upon by subsequent tests (since
55 # listing content can vary depending on which backend server is reached until
56 # eventual consistency is reached).
57 # TODO: Remove retry wrappers and AssertNObjectsInBucket calls if GCS ever
58 # supports strong listing consistency.
59 class TestRsync(testcase.GsUtilIntegrationTestCase):
60 """Integration tests for rsync command."""
62 @staticmethod
63 def _FlatListDir(directory):
64 """Perform a flat listing over directory.
66 Args:
67 directory: The directory to list
69 Returns:
70 Listings with path separators canonicalized to '/', to make assertions
71 easier for Linux vs Windows.
72 """
73 result = []
74 for dirpath, _, filenames in os.walk(directory):
75 for f in filenames:
76 result.append(os.path.join(dirpath, f))
77 return '\n'.join(result).replace('\\', '/')
79 def _FlatListBucket(self, bucket_url_string):
80 """Perform a flat listing over bucket_url_string."""
81 return self.RunGsUtil(['ls', suri(bucket_url_string, '**')],
82 return_stdout=True)
84 def test_invalid_args(self):
85 """Tests various invalid argument cases."""
86 bucket_uri = self.CreateBucket()
87 obj1 = self.CreateObject(bucket_uri=bucket_uri, object_name='obj1',
88 contents='obj1')
89 tmpdir = self.CreateTempDir()
90 # rsync object to bucket.
91 self.RunGsUtil(['rsync', suri(obj1), suri(bucket_uri)], expected_status=1)
92 # rsync bucket to object.
93 self.RunGsUtil(['rsync', suri(bucket_uri), suri(obj1)], expected_status=1)
94 # rsync bucket to non-existent bucket.
95 self.RunGsUtil(['rsync', suri(bucket_uri), self.nonexistent_bucket_name],
96 expected_status=1)
97 # rsync object to dir.
98 self.RunGsUtil(['rsync', suri(obj1), tmpdir], expected_status=1)
99 # rsync dir to object.
100 self.RunGsUtil(['rsync', tmpdir, suri(obj1)], expected_status=1)
101 # rsync dir to non-existent bucket.
102 self.RunGsUtil(['rsync', tmpdir, suri(obj1), self.nonexistent_bucket_name],
103 expected_status=1)
105 # Note: The tests below exercise the cases
106 # {src_dir, src_bucket} X {dst_dir, dst_bucket}. We use gsutil rsync -d for
107 # all the cases but then have just one test without -d (test_bucket_to_bucket)
108 # as representative of handling without the -d option. This provides
109 # reasonable test coverage because the -d handling it src/dest URI-type
110 # independent, and keeps the test case combinations more manageable.
112 def test_bucket_to_bucket(self):
113 """Tests that flat and recursive rsync between 2 buckets works correctly."""
114 # Create 2 buckets with 1 overlapping object, 1 extra object at root level
115 # in each, and 1 extra object 1 level down in each. Make the overlapping
116 # objects named the same but with different content, to test that we detect
117 # and properly copy in that case.
118 bucket1_uri = self.CreateBucket()
119 bucket2_uri = self.CreateBucket()
120 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1',
121 contents='obj1')
122 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj2',
123 contents='obj2')
124 self.CreateObject(bucket_uri=bucket1_uri, object_name='subdir/obj3',
125 contents='subdir/obj3')
126 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj2',
127 contents='OBJ2')
128 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj4',
129 contents='obj4')
130 self.CreateObject(bucket_uri=bucket2_uri, object_name='subdir/obj5',
131 contents='subdir/obj5')
133 # Use @Retry as hedge against bucket listing eventual consistency.
134 @Retry(AssertionError, tries=3, timeout_secs=1)
135 def _Check1():
136 """Tests rsync works as expected."""
137 self.RunGsUtil(['rsync', suri(bucket1_uri), suri(bucket2_uri)])
138 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri))
139 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri))
140 # First bucket should have un-altered content.
141 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3']))
142 # Second bucket should have new objects added from source bucket (without
143 # removing extraneeous object found in dest bucket), and without the
144 # subdir objects synchronized.
145 self.assertEquals(listing2,
146 set(['/obj1', '/obj2', '/obj4', '/subdir/obj5']))
147 # Assert that the src/dest objects that had same length but different
148 # content were correctly synchronized (bucket to bucket sync uses
149 # checksums).
150 self.assertEquals('obj2', self.RunGsUtil(
151 ['cat', suri(bucket1_uri, 'obj2')], return_stdout=True))
152 self.assertEquals('obj2', self.RunGsUtil(
153 ['cat', suri(bucket2_uri, 'obj2')], return_stdout=True))
154 _Check1()
156 # Use @Retry as hedge against bucket listing eventual consistency.
157 @Retry(AssertionError, tries=3, timeout_secs=1)
158 def _Check2():
159 # Check that re-running the same rsync command causes no more changes.
160 self.assertEquals(NO_CHANGES, self.RunGsUtil(
161 ['rsync', suri(bucket1_uri), suri(bucket2_uri)], return_stderr=True))
162 _Check2()
164 # Now add and remove some objects in each bucket and test rsync -r.
165 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj6',
166 contents='obj6')
167 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj7',
168 contents='obj7')
169 self.RunGsUtil(['rm', suri(bucket1_uri, 'obj1')])
170 self.RunGsUtil(['rm', suri(bucket2_uri, 'obj2')])
172 # Use @Retry as hedge against bucket listing eventual consistency.
173 @Retry(AssertionError, tries=3, timeout_secs=1)
174 def _Check3():
175 self.RunGsUtil(['rsync', '-r', suri(bucket1_uri), suri(bucket2_uri)])
176 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri))
177 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri))
178 # First bucket should have un-altered content.
179 self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir/obj3']))
180 # Second bucket should have objects tha were newly added to first bucket
181 # (wihout removing extraneous dest bucket objects), and without the
182 # subdir objects synchronized.
183 self.assertEquals(listing2, set(['/obj1', '/obj2', '/obj4', '/obj6',
184 '/obj7', '/subdir/obj3',
185 '/subdir/obj5']))
186 _Check3()
188 # Use @Retry as hedge against bucket listing eventual consistency.
189 @Retry(AssertionError, tries=3, timeout_secs=1)
190 def _Check4():
191 # Check that re-running the same rsync command causes no more changes.
192 self.assertEquals(NO_CHANGES, self.RunGsUtil(
193 ['rsync', '-r', suri(bucket1_uri), suri(bucket2_uri)],
194 return_stderr=True))
195 _Check4()
197 def test_bucket_to_bucket_minus_d(self):
198 """Tests that flat and recursive rsync between 2 buckets works correctly."""
199 # Create 2 buckets with 1 overlapping object, 1 extra object at root level
200 # in each, and 1 extra object 1 level down in each. Make the overlapping
201 # objects named the same but with different content, to test that we detect
202 # and properly copy in that case.
203 bucket1_uri = self.CreateBucket()
204 bucket2_uri = self.CreateBucket()
205 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1',
206 contents='obj1')
207 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj2',
208 contents='obj2')
209 self.CreateObject(bucket_uri=bucket1_uri, object_name='subdir/obj3',
210 contents='subdir/obj3')
211 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj2',
212 contents='OBJ2')
213 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj4',
214 contents='obj4')
215 self.CreateObject(bucket_uri=bucket2_uri, object_name='subdir/obj5',
216 contents='subdir/obj5')
218 # Use @Retry as hedge against bucket listing eventual consistency.
219 @Retry(AssertionError, tries=3, timeout_secs=1)
220 def _Check1():
221 """Tests rsync works as expected."""
222 self.RunGsUtil(['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)])
223 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri))
224 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri))
225 # First bucket should have un-altered content.
226 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3']))
227 # Second bucket should have content like first bucket but without the
228 # subdir objects synchronized.
229 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5']))
230 # Assert that the src/dest objects that had same length but different
231 # content were correctly synchronized (bucket to bucket sync uses
232 # checksums).
233 self.assertEquals('obj2', self.RunGsUtil(
234 ['cat', suri(bucket1_uri, 'obj2')], return_stdout=True))
235 self.assertEquals('obj2', self.RunGsUtil(
236 ['cat', suri(bucket2_uri, 'obj2')], return_stdout=True))
237 _Check1()
239 # Use @Retry as hedge against bucket listing eventual consistency.
240 @Retry(AssertionError, tries=3, timeout_secs=1)
241 def _Check2():
242 # Check that re-running the same rsync command causes no more changes.
243 self.assertEquals(NO_CHANGES, self.RunGsUtil(
244 ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)],
245 return_stderr=True))
246 _Check2()
248 # Now add and remove some objects in each bucket and test rsync -r.
249 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj6',
250 contents='obj6')
251 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj7',
252 contents='obj7')
253 self.RunGsUtil(['rm', suri(bucket1_uri, 'obj1')])
254 self.RunGsUtil(['rm', suri(bucket2_uri, 'obj2')])
256 # Use @Retry as hedge against bucket listing eventual consistency.
257 @Retry(AssertionError, tries=3, timeout_secs=1)
258 def _Check3():
259 self.RunGsUtil(['rsync', '-d', '-r',
260 suri(bucket1_uri), suri(bucket2_uri)])
261 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri))
262 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri))
263 # First bucket should have un-altered content.
264 self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir/obj3']))
265 # Second bucket should have content like first bucket but without the
266 # subdir objects synchronized.
267 self.assertEquals(listing2, set(['/obj2', '/obj6', '/subdir/obj3']))
268 _Check3()
270 # Use @Retry as hedge against bucket listing eventual consistency.
271 @Retry(AssertionError, tries=3, timeout_secs=1)
272 def _Check4():
273 # Check that re-running the same rsync command causes no more changes.
274 self.assertEquals(NO_CHANGES, self.RunGsUtil(
275 ['rsync', '-d', '-r', suri(bucket1_uri), suri(bucket2_uri)],
276 return_stderr=True))
277 _Check4()
279 # Test sequential upload as well as parallel composite upload case.
280 @PerformsFileToObjectUpload
281 @unittest.skipUnless(UsingCrcmodExtension(crcmod),
282 'Test requires fast crcmod.')
283 def test_dir_to_bucket_minus_d(self):
284 """Tests that flat and recursive rsync dir to bucket works correctly."""
285 # Create dir and bucket with 1 overlapping object, 1 extra object at root
286 # level in each, and 1 extra object 1 level down in each. Make the
287 # overlapping objects named the same but with different content, to test
288 # that we detect and properly copy in that case.
289 tmpdir = self.CreateTempDir()
290 subdir = os.path.join(tmpdir, 'subdir')
291 os.mkdir(subdir)
292 bucket_uri = self.CreateBucket()
293 self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents='obj1')
294 self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2')
295 self.CreateTempFile(tmpdir=subdir, file_name='obj3', contents='subdir/obj3')
296 self.CreateObject(bucket_uri=bucket_uri, object_name='obj2',
297 contents='OBJ2')
298 self.CreateObject(bucket_uri=bucket_uri, object_name='obj4',
299 contents='obj4')
300 self.CreateObject(bucket_uri=bucket_uri, object_name='subdir/obj5',
301 contents='subdir/obj5')
303 # Need to make sure the bucket listing is caught-up, otherwise the
304 # first rsync may not see obj2 and overwrite it.
305 self.AssertNObjectsInBucket(bucket_uri, 3)
307 # Use @Retry as hedge against bucket listing eventual consistency.
308 @Retry(AssertionError, tries=3, timeout_secs=1)
309 def _Check1():
310 """Tests rsync works as expected."""
311 self.RunGsUtil(['rsync', '-d', tmpdir, suri(bucket_uri)])
312 listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
313 listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri))
314 # Dir should have un-altered content.
315 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3']))
316 # Bucket should have content like dir but without the subdir objects
317 # synchronized.
318 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5']))
319 # Assert that the src/dest objects that had same length but different
320 # content were not synchronized (dir to bucket sync doesn't use checksums
321 # unless you specify -c).
322 with open(os.path.join(tmpdir, 'obj2')) as f:
323 self.assertEquals('obj2', '\n'.join(f.readlines()))
324 self.assertEquals('OBJ2', self.RunGsUtil(
325 ['cat', suri(bucket_uri, 'obj2')], return_stdout=True))
326 _Check1()
328 # Use @Retry as hedge against bucket listing eventual consistency.
329 @Retry(AssertionError, tries=3, timeout_secs=1)
330 def _Check2():
331 # Check that re-running the same rsync command causes no more changes.
332 self.assertEquals(NO_CHANGES, self.RunGsUtil(
333 ['rsync', '-d', tmpdir, suri(bucket_uri)], return_stderr=True))
334 _Check2()
336 # Now rerun the sync with the -c option.
337 # Use @Retry as hedge against bucket listing eventual consistency.
338 @Retry(AssertionError, tries=3, timeout_secs=1)
339 def _Check3():
340 """Tests rsync -c works as expected."""
341 self.RunGsUtil(['rsync', '-d', '-c', tmpdir, suri(bucket_uri)])
342 listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
343 listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri))
344 # Dir should have un-altered content.
345 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3']))
346 # Bucket should have content like dir but without the subdir objects
347 # synchronized.
348 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5']))
349 # Assert that the src/dest objects that had same length but different
350 # content were synchronized (dir to bucket sync with -c uses checksums).
351 with open(os.path.join(tmpdir, 'obj2')) as f:
352 self.assertEquals('obj2', '\n'.join(f.readlines()))
353 self.assertEquals('obj2', self.RunGsUtil(
354 ['cat', suri(bucket_uri, 'obj2')], return_stdout=True))
355 _Check3()
357 # Use @Retry as hedge against bucket listing eventual consistency.
358 @Retry(AssertionError, tries=3, timeout_secs=1)
359 def _Check4():
360 # Check that re-running the same rsync command causes no more changes.
361 self.assertEquals(NO_CHANGES, self.RunGsUtil(
362 ['rsync', '-d', '-c', tmpdir, suri(bucket_uri)], return_stderr=True))
363 _Check4()
365 # Now add and remove some objects in dir and bucket and test rsync -r.
366 self.CreateTempFile(tmpdir=tmpdir, file_name='obj6', contents='obj6')
367 self.CreateObject(bucket_uri=bucket_uri, object_name='obj7',
368 contents='obj7')
369 os.unlink(os.path.join(tmpdir, 'obj1'))
370 self.RunGsUtil(['rm', suri(bucket_uri, 'obj2')])
372 # Use @Retry as hedge against bucket listing eventual consistency.
373 @Retry(AssertionError, tries=3, timeout_secs=1)
374 def _Check5():
375 self.RunGsUtil(['rsync', '-d', '-r', tmpdir, suri(bucket_uri)])
376 listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
377 listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri))
378 # Dir should have un-altered content.
379 self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir/obj3']))
380 # Bucket should have content like dir but without the subdir objects
381 # synchronized.
382 self.assertEquals(listing2, set(['/obj2', '/obj6', '/subdir/obj3']))
383 _Check5()
385 # Use @Retry as hedge against bucket listing eventual consistency.
386 @Retry(AssertionError, tries=3, timeout_secs=1)
387 def _Check6():
388 # Check that re-running the same rsync command causes no more changes.
389 self.assertEquals(NO_CHANGES, self.RunGsUtil(
390 ['rsync', '-d', '-r', tmpdir, suri(bucket_uri)], return_stderr=True))
391 _Check6()
393 @unittest.skipUnless(UsingCrcmodExtension(crcmod),
394 'Test requires fast crcmod.')
395 def test_dir_to_dir_minus_d(self):
396 """Tests that flat and recursive rsync dir to dir works correctly."""
397 # Create 2 dirs with 1 overlapping file, 1 extra file at root
398 # level in each, and 1 extra file 1 level down in each. Make the
399 # overlapping files named the same but with different content, to test
400 # that we detect and properly copy in that case.
401 tmpdir1 = self.CreateTempDir()
402 tmpdir2 = self.CreateTempDir()
403 subdir1 = os.path.join(tmpdir1, 'subdir1')
404 subdir2 = os.path.join(tmpdir2, 'subdir2')
405 os.mkdir(subdir1)
406 os.mkdir(subdir2)
407 self.CreateTempFile(tmpdir=tmpdir1, file_name='obj1', contents='obj1')
408 self.CreateTempFile(tmpdir=tmpdir1, file_name='obj2', contents='obj2')
409 self.CreateTempFile(
410 tmpdir=subdir1, file_name='obj3', contents='subdir1/obj3')
411 self.CreateTempFile(tmpdir=tmpdir2, file_name='obj2', contents='OBJ2')
412 self.CreateTempFile(tmpdir=tmpdir2, file_name='obj4', contents='obj4')
413 self.CreateTempFile(
414 tmpdir=subdir2, file_name='obj5', contents='subdir2/obj5')
416 self.RunGsUtil(['rsync', '-d', tmpdir1, tmpdir2])
417 listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1))
418 listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2))
419 # dir1 should have un-altered content.
420 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir1/obj3']))
421 # dir2 should have content like dir1 but without the subdir1 objects
422 # synchronized.
423 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir2/obj5']))
424 # Assert that the src/dest objects that had same length but different
425 # checksums were not synchronized (dir to dir sync doesn't use checksums
426 # unless you specify -c).
427 with open(os.path.join(tmpdir1, 'obj2')) as f:
428 self.assertEquals('obj2', '\n'.join(f.readlines()))
429 with open(os.path.join(tmpdir2, 'obj2')) as f:
430 self.assertEquals('OBJ2', '\n'.join(f.readlines()))
432 # Use @Retry as hedge against bucket listing eventual consistency.
433 @Retry(AssertionError, tries=3, timeout_secs=1)
434 def _Check1():
435 # Check that re-running the same rsync command causes no more changes.
436 self.assertEquals(NO_CHANGES, self.RunGsUtil(
437 ['rsync', '-d', tmpdir1, tmpdir2], return_stderr=True))
438 _Check1()
440 # Now rerun the sync with the -c option.
441 self.RunGsUtil(['rsync', '-d', '-c', tmpdir1, tmpdir2])
442 listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1))
443 listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2))
444 # dir1 should have un-altered content.
445 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir1/obj3']))
446 # dir2 should have content like dir but without the subdir objects
447 # synchronized.
448 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir2/obj5']))
449 # Assert that the src/dest objects that had same length but different
450 # content were synchronized (dir to dir sync with -c uses checksums).
451 with open(os.path.join(tmpdir1, 'obj2')) as f:
452 self.assertEquals('obj2', '\n'.join(f.readlines()))
453 with open(os.path.join(tmpdir1, 'obj2')) as f:
454 self.assertEquals('obj2', '\n'.join(f.readlines()))
456 # Use @Retry as hedge against bucket listing eventual consistency.
457 @Retry(AssertionError, tries=3, timeout_secs=1)
458 def _Check2():
459 # Check that re-running the same rsync command causes no more changes.
460 self.assertEquals(NO_CHANGES, self.RunGsUtil(
461 ['rsync', '-d', '-c', tmpdir1, tmpdir2], return_stderr=True))
462 _Check2()
464 # Now add and remove some objects in both dirs and test rsync -r.
465 self.CreateTempFile(tmpdir=tmpdir1, file_name='obj6', contents='obj6')
466 self.CreateTempFile(tmpdir=tmpdir2, file_name='obj7', contents='obj7')
467 os.unlink(os.path.join(tmpdir1, 'obj1'))
468 os.unlink(os.path.join(tmpdir2, 'obj2'))
470 self.RunGsUtil(['rsync', '-d', '-r', tmpdir1, tmpdir2])
471 listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1))
472 listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2))
473 # dir1 should have un-altered content.
474 self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir1/obj3']))
475 # dir2 should have content like dir but without the subdir objects
476 # synchronized.
477 self.assertEquals(listing2, set(['/obj2', '/obj6', '/subdir1/obj3']))
479 # Use @Retry as hedge against bucket listing eventual consistency.
480 @Retry(AssertionError, tries=3, timeout_secs=1)
481 def _Check3():
482 # Check that re-running the same rsync command causes no more changes.
483 self.assertEquals(NO_CHANGES, self.RunGsUtil(
484 ['rsync', '-d', '-r', tmpdir1, tmpdir2], return_stderr=True))
485 _Check3()
487 def test_dir_to_dir_minus_d_more_files_than_bufsize(self):
488 """Tests concurrently building listing from multiple tmp file ranges."""
489 # Create 2 dirs, where each dir has 1000 objects and differing names.
490 tmpdir1 = self.CreateTempDir()
491 tmpdir2 = self.CreateTempDir()
492 for i in range(0, 1000):
493 self.CreateTempFile(tmpdir=tmpdir1, file_name='d1-%s' %i, contents='x')
494 self.CreateTempFile(tmpdir=tmpdir2, file_name='d2-%s' %i, contents='y')
496 # We open a new temp file each time we reach rsync_buffer_lines of
497 # listing output. On Windows, this will result in a 'too many open file
498 # handles' error, so choose a larger value so as not to open so many files.
499 rsync_buffer_config = [('GSUtil', 'rsync_buffer_lines',
500 '50' if IS_WINDOWS else '2')]
501 # Run gsutil with config option to make buffer size << # files.
502 with SetBotoConfigForTest(rsync_buffer_config):
503 self.RunGsUtil(['rsync', '-d', tmpdir1, tmpdir2])
504 listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1))
505 listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2))
506 self.assertEquals(listing1, listing2)
508 # Use @Retry as hedge against bucket listing eventual consistency.
509 @Retry(AssertionError, tries=3, timeout_secs=1)
510 def _Check():
511 # Check that re-running the same rsync command causes no more changes.
512 self.assertEquals(NO_CHANGES, self.RunGsUtil(
513 ['rsync', '-d', tmpdir1, tmpdir2], return_stderr=True))
514 _Check()
516 @unittest.skipUnless(UsingCrcmodExtension(crcmod),
517 'Test requires fast crcmod.')
518 def test_bucket_to_dir_minus_d(self):
519 """Tests that flat and recursive rsync bucket to dir works correctly."""
520 # Create bucket and dir with 1 overlapping object, 1 extra object at root
521 # level in each, and 1 extra object 1 level down in each. Make the
522 # overlapping objects named the same but with different content, to test
523 # that we detect and properly copy in that case.
524 bucket_uri = self.CreateBucket()
525 tmpdir = self.CreateTempDir()
526 subdir = os.path.join(tmpdir, 'subdir')
527 os.mkdir(subdir)
528 self.CreateObject(bucket_uri=bucket_uri, object_name='obj1',
529 contents='obj1')
530 self.CreateObject(bucket_uri=bucket_uri, object_name='obj2',
531 contents='obj2')
532 self.CreateObject(bucket_uri=bucket_uri, object_name='subdir/obj3',
533 contents='subdir/obj3')
534 self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='OBJ2')
535 self.CreateTempFile(tmpdir=tmpdir, file_name='obj4', contents='obj4')
536 self.CreateTempFile(tmpdir=subdir, file_name='obj5', contents='subdir/obj5')
538 # Use @Retry as hedge against bucket listing eventual consistency.
539 @Retry(AssertionError, tries=3, timeout_secs=1)
540 def _Check1():
541 """Tests rsync works as expected."""
542 self.RunGsUtil(['rsync', '-d', suri(bucket_uri), tmpdir])
543 listing1 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri))
544 listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
545 # Bucket should have un-altered content.
546 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3']))
547 # Dir should have content like bucket but without the subdir objects
548 # synchronized.
549 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5']))
550 # Assert that the src/dest objects that had same length but different
551 # content were not synchronized (bucket to dir sync doesn't use checksums
552 # unless you specify -c).
553 self.assertEquals('obj2', self.RunGsUtil(
554 ['cat', suri(bucket_uri, 'obj2')], return_stdout=True))
555 with open(os.path.join(tmpdir, 'obj2')) as f:
556 self.assertEquals('OBJ2', '\n'.join(f.readlines()))
557 _Check1()
559 # Use @Retry as hedge against bucket listing eventual consistency.
560 @Retry(AssertionError, tries=3, timeout_secs=1)
561 def _Check2():
562 # Check that re-running the same rsync command causes no more changes.
563 self.assertEquals(NO_CHANGES, self.RunGsUtil(
564 ['rsync', '-d', suri(bucket_uri), tmpdir], return_stderr=True))
565 _Check2()
567 # Now rerun the sync with the -c option.
568 # Use @Retry as hedge against bucket listing eventual consistency.
569 @Retry(AssertionError, tries=3, timeout_secs=1)
570 def _Check3():
571 """Tests rsync -c works as expected."""
572 self.RunGsUtil(['rsync', '-d', '-c', suri(bucket_uri), tmpdir])
573 listing1 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri))
574 listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
575 # Bucket should have un-altered content.
576 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3']))
577 # Dir should have content like bucket but without the subdir objects
578 # synchronized.
579 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5']))
580 # Assert that the src/dest objects that had same length but different
581 # content were synchronized (bucket to dir sync with -c uses checksums).
582 self.assertEquals('obj2', self.RunGsUtil(
583 ['cat', suri(bucket_uri, 'obj2')], return_stdout=True))
584 with open(os.path.join(tmpdir, 'obj2')) as f:
585 self.assertEquals('obj2', '\n'.join(f.readlines()))
586 _Check3()
588 # Use @Retry as hedge against bucket listing eventual consistency.
589 @Retry(AssertionError, tries=3, timeout_secs=1)
590 def _Check4():
591 # Check that re-running the same rsync command causes no more changes.
592 self.assertEquals(NO_CHANGES, self.RunGsUtil(
593 ['rsync', '-d', '-c', suri(bucket_uri), tmpdir], return_stderr=True))
594 _Check4()
596 # Now add and remove some objects in dir and bucket and test rsync -r.
597 self.CreateObject(bucket_uri=bucket_uri, object_name='obj6',
598 contents='obj6')
599 self.CreateTempFile(tmpdir=tmpdir, file_name='obj7', contents='obj7')
600 self.RunGsUtil(['rm', suri(bucket_uri, 'obj1')])
601 os.unlink(os.path.join(tmpdir, 'obj2'))
603 # Use @Retry as hedge against bucket listing eventual consistency.
604 @Retry(AssertionError, tries=3, timeout_secs=1)
605 def _Check5():
606 self.RunGsUtil(['rsync', '-d', '-r', suri(bucket_uri), tmpdir])
607 listing1 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri))
608 listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
609 # Bucket should have un-altered content.
610 self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir/obj3']))
611 # Dir should have content like bucket but without the subdir objects
612 # synchronized.
613 self.assertEquals(listing2, set(['/obj2', '/obj6', '/subdir/obj3']))
614 _Check5()
616 # Use @Retry as hedge against bucket listing eventual consistency.
617 @Retry(AssertionError, tries=3, timeout_secs=1)
618 def _Check6():
619 # Check that re-running the same rsync command causes no more changes.
620 self.assertEquals(NO_CHANGES, self.RunGsUtil(
621 ['rsync', '-d', '-r', suri(bucket_uri), tmpdir], return_stderr=True))
622 _Check6()
624 def test_bucket_to_dir_minus_d_with_fname_case_change(self):
625 """Tests that name case changes work correctly.
627 Example:
629 Windows filenames are case-preserving in what you wrote, but case-
630 insensitive when compared. If you synchronize from FS to cloud and then
631 change case-naming in local files, you could end up with this situation:
633 Cloud copy is called .../TiVo/...
634 FS copy is called .../Tivo/...
636 Then, if you sync from cloud to FS, if rsync doesn't recognize that on
637 Windows these names are identical, each rsync run will cause both a copy
638 and a delete to be executed.
640 # Create bucket and dir with same objects, but dir copy has different name
641 # case.
642 bucket_uri = self.CreateBucket()
643 tmpdir = self.CreateTempDir()
644 self.CreateObject(bucket_uri=bucket_uri, object_name='obj1',
645 contents='obj1')
646 self.CreateTempFile(tmpdir=tmpdir, file_name='Obj1', contents='obj1')
648 # Use @Retry as hedge against bucket listing eventual consistency.
649 @Retry(AssertionError, tries=3, timeout_secs=1)
650 def _Check1():
651 """Tests rsync works as expected."""
652 output = self.RunGsUtil(
653 ['rsync', '-d', '-r', suri(bucket_uri), tmpdir], return_stderr=True)
654 # Nothing should be copied or removed under Windows.
655 if IS_WINDOWS:
656 self.assertEquals(NO_CHANGES, output)
657 else:
658 self.assertNotEquals(NO_CHANGES, output)
659 _Check1()
661 def test_bucket_to_dir_minus_d_with_leftover_dir_placeholder(self):
662 """Tests that we correctly handle leftover dir placeholders.
664 See comments in gslib.commands.rsync._FieldedListingIterator for details.
666 bucket_uri = self.CreateBucket()
667 tmpdir = self.CreateTempDir()
668 self.CreateObject(bucket_uri=bucket_uri, object_name='obj1',
669 contents='obj1')
670 # Create a placeholder like what can be left over by web GUI tools.
671 key_uri = bucket_uri.clone_replace_name('/')
672 key_uri.set_contents_from_string('')
674 # Use @Retry as hedge against bucket listing eventual consistency.
675 @Retry(AssertionError, tries=3, timeout_secs=1)
676 def _Check1():
677 """Tests rsync works as expected."""
678 output = self.RunGsUtil(
679 ['rsync', '-d', '-r', suri(bucket_uri), tmpdir], return_stderr=True)
680 listing1 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri))
681 listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
682 # Bucket should have un-altered content.
683 self.assertEquals(listing1, set(['/obj1', '//']))
684 # Bucket should not have the placeholder object.
685 self.assertEquals(listing2, set(['/obj1']))
686 # Stdout should report what happened.
687 self.assertRegexpMatches(output, r'.*Skipping cloud sub-directory.*')
688 _Check1()
690 @unittest.skipIf(IS_WINDOWS, 'os.symlink() is not available on Windows.')
691 def test_rsync_minus_d_minus_e(self):
692 """Tests that rsync -e ignores symlinks."""
693 tmpdir = self.CreateTempDir()
694 subdir = os.path.join(tmpdir, 'subdir')
695 os.mkdir(subdir)
696 bucket_uri = self.CreateBucket()
697 fpath1 = self.CreateTempFile(
698 tmpdir=tmpdir, file_name='obj1', contents='obj1')
699 self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2')
700 self.CreateTempFile(tmpdir=subdir, file_name='obj3', contents='subdir/obj3')
701 good_symlink_path = os.path.join(tmpdir, 'symlink1')
702 os.symlink(fpath1, good_symlink_path)
703 # Make a symlink that points to a non-existent path to test that -e also
704 # handles that case.
705 bad_symlink_path = os.path.join(tmpdir, 'symlink2')
706 os.symlink(os.path.join('/', 'non-existent'), bad_symlink_path)
707 self.CreateObject(bucket_uri=bucket_uri, object_name='obj2',
708 contents='OBJ2')
709 self.CreateObject(bucket_uri=bucket_uri, object_name='obj4',
710 contents='obj4')
711 self.CreateObject(bucket_uri=bucket_uri, object_name='subdir/obj5',
712 contents='subdir/obj5')
714 # Use @Retry as hedge against bucket listing eventual consistency.
715 @Retry(AssertionError, tries=3, timeout_secs=1)
716 def _Check1():
717 """Ensure listings match the commented expectations."""
718 self.RunGsUtil(['rsync', '-d', '-e', tmpdir, suri(bucket_uri)])
719 listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
720 listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri))
721 # Dir should have un-altered content.
722 self.assertEquals(
723 listing1,
724 set(['/obj1', '/obj2', '/subdir/obj3', '/symlink1', '/symlink2']))
725 # Bucket should have content like dir but without the symlink, and
726 # without subdir objects synchronized.
727 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5']))
728 _Check1()
730 # Now remove invalid symlink and run without -e, and see that symlink gets
731 # copied (as file to which it points). Use @Retry as hedge against bucket
732 # listing eventual consistency.
733 os.unlink(bad_symlink_path)
734 @Retry(AssertionError, tries=3, timeout_secs=1)
735 def _Check2():
736 """Tests rsync works as expected."""
737 self.RunGsUtil(['rsync', '-d', tmpdir, suri(bucket_uri)])
738 listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
739 listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri))
740 # Dir should have un-altered content.
741 self.assertEquals(
742 listing1, set(['/obj1', '/obj2', '/subdir/obj3', '/symlink1']))
743 # Bucket should have content like dir but without the symlink, and
744 # without subdir objects synchronized.
745 self.assertEquals(
746 listing2, set(['/obj1', '/obj2', '/subdir/obj5', '/symlink1']))
747 self.assertEquals('obj1', self.RunGsUtil(
748 ['cat', suri(bucket_uri, 'symlink1')], return_stdout=True))
749 _Check2()
751 # Use @Retry as hedge against bucket listing eventual consistency.
752 @Retry(AssertionError, tries=3, timeout_secs=1)
753 def _Check3():
754 # Check that re-running the same rsync command causes no more changes.
755 self.assertEquals(NO_CHANGES, self.RunGsUtil(
756 ['rsync', '-d', tmpdir, suri(bucket_uri)], return_stderr=True))
757 _Check3()
759 @SkipForS3('S3 does not support composite objects')
760 def test_bucket_to_bucket_minus_d_with_composites(self):
761 """Tests that rsync works with composite objects (which don't have MD5s)."""
762 bucket1_uri = self.CreateBucket()
763 bucket2_uri = self.CreateBucket()
764 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1',
765 contents='obj1')
766 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj2',
767 contents='obj2')
768 self.RunGsUtil(
769 ['compose', suri(bucket1_uri, 'obj1'), suri(bucket1_uri, 'obj2'),
770 suri(bucket1_uri, 'obj3')])
771 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj2',
772 contents='OBJ2')
773 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj4',
774 contents='obj4')
776 # Use @Retry as hedge against bucket listing eventual consistency.
777 @Retry(AssertionError, tries=3, timeout_secs=1)
778 def _Check1():
779 self.RunGsUtil(['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)])
780 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri))
781 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri))
782 # First bucket should have un-altered content.
783 self.assertEquals(listing1, set(['/obj1', '/obj2', '/obj3']))
784 # Second bucket should have content like first bucket but without the
785 # subdir objects synchronized.
786 self.assertEquals(listing2, set(['/obj1', '/obj2', '/obj3']))
787 _Check1()
789 # Use @Retry as hedge against bucket listing eventual consistency.
790 @Retry(AssertionError, tries=3, timeout_secs=1)
791 def _Check2():
792 # Check that re-running the same rsync command causes no more changes.
793 self.assertEquals(NO_CHANGES, self.RunGsUtil(
794 ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)],
795 return_stderr=True))
796 _Check2()
798 def test_bucket_to_bucket_minus_d_empty_dest(self):
799 """Tests working with empty dest bucket (iter runs out before src iter)."""
800 bucket1_uri = self.CreateBucket()
801 bucket2_uri = self.CreateBucket()
802 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1',
803 contents='obj1')
804 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj2',
805 contents='obj2')
807 # Use @Retry as hedge against bucket listing eventual consistency.
808 @Retry(AssertionError, tries=3, timeout_secs=1)
809 def _Check1():
810 self.RunGsUtil(['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)])
811 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri))
812 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri))
813 self.assertEquals(listing1, set(['/obj1', '/obj2']))
814 self.assertEquals(listing2, set(['/obj1', '/obj2']))
815 _Check1()
817 # Use @Retry as hedge against bucket listing eventual consistency.
818 @Retry(AssertionError, tries=3, timeout_secs=1)
819 def _Check2():
820 # Check that re-running the same rsync command causes no more changes.
821 self.assertEquals(NO_CHANGES, self.RunGsUtil(
822 ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)],
823 return_stderr=True))
824 _Check2()
826 def test_bucket_to_bucket_minus_d_empty_src(self):
827 """Tests working with empty src bucket (iter runs out before dst iter)."""
828 bucket1_uri = self.CreateBucket()
829 bucket2_uri = self.CreateBucket()
830 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj1',
831 contents='obj1')
832 self.CreateObject(bucket_uri=bucket2_uri, object_name='obj2',
833 contents='obj2')
835 # Use @Retry as hedge against bucket listing eventual consistency.
836 @Retry(AssertionError, tries=3, timeout_secs=1)
837 def _Check1():
838 self.RunGsUtil(['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)])
839 stderr = self.RunGsUtil(['ls', suri(bucket1_uri, '**')],
840 expected_status=1, return_stderr=True)
841 self.assertIn('One or more URLs matched no objects', stderr)
842 stderr = self.RunGsUtil(['ls', suri(bucket2_uri, '**')],
843 expected_status=1, return_stderr=True)
844 self.assertIn('One or more URLs matched no objects', stderr)
845 _Check1()
847 # Use @Retry as hedge against bucket listing eventual consistency.
848 @Retry(AssertionError, tries=3, timeout_secs=1)
849 def _Check2():
850 # Check that re-running the same rsync command causes no more changes.
851 self.assertEquals(NO_CHANGES, self.RunGsUtil(
852 ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)],
853 return_stderr=True))
854 _Check2()
856 def test_rsync_minus_d_minus_p(self):
857 """Tests that rsync -p preserves ACLs."""
858 bucket1_uri = self.CreateBucket()
859 bucket2_uri = self.CreateBucket()
860 self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1',
861 contents='obj1')
862 # Set public-read (non-default) ACL so we can verify that rsync -p works.
863 self.RunGsUtil(['acl', 'set', 'public-read', suri(bucket1_uri, 'obj1')])
865 # Use @Retry as hedge against bucket listing eventual consistency.
866 @Retry(AssertionError, tries=3, timeout_secs=1)
867 def _Check1():
868 """Tests rsync -p works as expected."""
869 self.RunGsUtil(['rsync', '-d', '-p', suri(bucket1_uri),
870 suri(bucket2_uri)])
871 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri))
872 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri))
873 self.assertEquals(listing1, set(['/obj1']))
874 self.assertEquals(listing2, set(['/obj1']))
875 acl1_json = self.RunGsUtil(['acl', 'get', suri(bucket1_uri, 'obj1')],
876 return_stdout=True)
877 acl2_json = self.RunGsUtil(['acl', 'get', suri(bucket2_uri, 'obj1')],
878 return_stdout=True)
879 self.assertEquals(acl1_json, acl2_json)
880 _Check1()
882 # Use @Retry as hedge against bucket listing eventual consistency.
883 @Retry(AssertionError, tries=3, timeout_secs=1)
884 def _Check2():
885 # Check that re-running the same rsync command causes no more changes.
886 self.assertEquals(NO_CHANGES, self.RunGsUtil(
887 ['rsync', '-d', '-p', suri(bucket1_uri), suri(bucket2_uri)],
888 return_stderr=True))
889 _Check2()
891 def test_rsync_to_nonexistent_bucket_subdir(self):
892 """Tests that rsync to non-existent bucket subdir works."""
893 # Create dir with some objects and empty bucket.
894 tmpdir = self.CreateTempDir()
895 subdir = os.path.join(tmpdir, 'subdir')
896 os.mkdir(subdir)
897 bucket_url = self.CreateBucket()
898 self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents='obj1')
899 self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2')
900 self.CreateTempFile(tmpdir=subdir, file_name='obj3', contents='subdir/obj3')
902 # Use @Retry as hedge against bucket listing eventual consistency.
903 @Retry(AssertionError, tries=3, timeout_secs=1)
904 def _Check1():
905 """Tests rsync works as expected."""
906 self.RunGsUtil(['rsync', '-r', tmpdir, suri(bucket_url, 'subdir')])
907 listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
908 listing2 = _TailSet(
909 suri(bucket_url, 'subdir'),
910 self._FlatListBucket(bucket_url.clone_replace_name('subdir')))
911 # Dir should have un-altered content.
912 self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3']))
913 # Bucket subdir should have content like dir.
914 self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj3']))
915 _Check1()
917 # Use @Retry as hedge against bucket listing eventual consistency.
918 @Retry(AssertionError, tries=3, timeout_secs=1)
919 def _Check2():
920 # Check that re-running the same rsync command causes no more changes.
921 self.assertEquals(NO_CHANGES, self.RunGsUtil(
922 ['rsync', '-r', tmpdir, suri(bucket_url, 'subdir')],
923 return_stderr=True))
924 _Check2()
926 def test_rsync_from_nonexistent_bucket(self):
927 """Tests that rsync from a non-existent bucket subdir fails gracefully."""
928 tmpdir = self.CreateTempDir()
929 self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents='obj1')
930 self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2')
931 bucket_url_str = '%s://%s' % (
932 self.default_provider, self.nonexistent_bucket_name)
933 stderr = self.RunGsUtil(['rsync', '-d', bucket_url_str, tmpdir],
934 expected_status=1, return_stderr=True)
935 self.assertIn('Caught non-retryable exception', stderr)
936 listing = _TailSet(tmpdir, self._FlatListDir(tmpdir))
937 # Dir should have un-altered content.
938 self.assertEquals(listing, set(['/obj1', '/obj2']))
940 def test_rsync_to_nonexistent_bucket(self):
941 """Tests that rsync from a non-existent bucket subdir fails gracefully."""
942 tmpdir = self.CreateTempDir()
943 self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents='obj1')
944 self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2')
945 bucket_url_str = '%s://%s' % (
946 self.default_provider, self.nonexistent_bucket_name)
947 stderr = self.RunGsUtil(['rsync', '-d', bucket_url_str, tmpdir],
948 expected_status=1, return_stderr=True)
949 self.assertIn('Caught non-retryable exception', stderr)
950 listing = _TailSet(tmpdir, self._FlatListDir(tmpdir))
951 # Dir should have un-altered content.
952 self.assertEquals(listing, set(['/obj1', '/obj2']))
954 def test_bucket_to_bucket_minus_d_with_overwrite_and_punc_chars(self):
955 """Tests that punc chars in filenames don't confuse sort order."""
956 bucket1_uri = self.CreateBucket()
957 bucket2_uri = self.CreateBucket()
958 # Create 2 objects in each bucket, with one overwritten with a name that's
959 # less than the next name in destination bucket when encoded, but not when
960 # compared without encoding.
961 self.CreateObject(bucket_uri=bucket1_uri, object_name='e/obj1',
962 contents='obj1')
963 self.CreateObject(bucket_uri=bucket1_uri, object_name='e-1/obj2',
964 contents='obj2')
965 self.CreateObject(bucket_uri=bucket2_uri, object_name='e/obj1',
966 contents='OBJ1')
967 self.CreateObject(bucket_uri=bucket2_uri, object_name='e-1/obj2',
968 contents='obj2')
969 # Need to make sure the bucket listings are caught-up, otherwise the
970 # rsync may not see all objects and fail to synchronize correctly.
971 self.AssertNObjectsInBucket(bucket1_uri, 2)
972 self.AssertNObjectsInBucket(bucket2_uri, 2)
974 # Use @Retry as hedge against bucket listing eventual consistency.
975 @Retry(AssertionError, tries=3, timeout_secs=1)
976 def _Check1():
977 """Tests rsync works as expected."""
978 self.RunGsUtil(['rsync', '-rd', suri(bucket1_uri), suri(bucket2_uri)])
979 listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri))
980 listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri))
981 # First bucket should have un-altered content.
982 self.assertEquals(listing1, set(['/e/obj1', '/e-1/obj2']))
983 self.assertEquals(listing2, set(['/e/obj1', '/e-1/obj2']))
984 # Assert correct contents.
985 self.assertEquals('obj1', self.RunGsUtil(
986 ['cat', suri(bucket2_uri, 'e/obj1')], return_stdout=True))
987 self.assertEquals('obj2', self.RunGsUtil(
988 ['cat', suri(bucket2_uri, 'e-1/obj2')], return_stdout=True))
989 _Check1()
991 # Use @Retry as hedge against bucket listing eventual consistency.
992 @Retry(AssertionError, tries=3, timeout_secs=1)
993 def _Check2():
994 # Check that re-running the same rsync command causes no more changes.
995 self.assertEquals(NO_CHANGES, self.RunGsUtil(
996 ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)],
997 return_stderr=True))
998 _Check2()
1000 def test_dir_to_bucket_minus_x(self):
1001 """Tests that rsync -x option works correctly."""
1002 # Create dir and bucket with 1 overlapping and 2 extra objects in each.
1003 tmpdir = self.CreateTempDir()
1004 bucket_uri = self.CreateBucket()
1005 self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents='obj1')
1006 self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2')
1007 self.CreateTempFile(tmpdir=tmpdir, file_name='obj3', contents='obj3')
1008 self.CreateObject(bucket_uri=bucket_uri, object_name='obj2',
1009 contents='obj2')
1010 self.CreateObject(bucket_uri=bucket_uri, object_name='obj4',
1011 contents='obj4')
1012 self.CreateObject(bucket_uri=bucket_uri, object_name='obj5',
1013 contents='obj5')
1015 # Need to make sure the bucket listing is caught-up, otherwise the
1016 # first rsync may not see obj2 and overwrite it.
1017 self.AssertNObjectsInBucket(bucket_uri, 3)
1019 # Use @Retry as hedge against bucket listing eventual consistency.
1020 @Retry(AssertionError, tries=3, timeout_secs=1)
1021 def _Check1():
1022 """Tests rsync works as expected."""
1023 self.RunGsUtil(['rsync', '-d', '-x', 'obj[34]', tmpdir, suri(bucket_uri)])
1024 listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
1025 listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri))
1026 # Dir should have un-altered content.
1027 self.assertEquals(listing1, set(['/obj1', '/obj2', '/obj3']))
1028 # Bucket should have content like dir but ignoring obj3 from dir and not
1029 # deleting obj4 from bucket (per exclude regex).
1030 self.assertEquals(listing2, set(['/obj1', '/obj2', '/obj4']))
1031 _Check1()
1033 # Use @Retry as hedge against bucket listing eventual consistency.
1034 @Retry(AssertionError, tries=3, timeout_secs=1)
1035 def _Check2():
1036 # Check that re-running the same rsync command causes no more changes.
1037 self.assertEquals(NO_CHANGES, self.RunGsUtil(
1038 ['rsync', '-d', '-x', 'obj[34]', tmpdir, suri(bucket_uri)],
1039 return_stderr=True))
1040 _Check2()