1 # -*- coding: utf-8 -*-
2 # Copyright 2014 Google Inc. All Rights Reserved.
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 """Integration tests for rsync command."""
21 import gslib
.tests
.testcase
as testcase
22 from gslib
.tests
.testcase
.integration_testcase
import SkipForS3
23 from gslib
.tests
.util
import ObjectToURI
as suri
24 from gslib
.tests
.util
import PerformsFileToObjectUpload
25 from gslib
.tests
.util
import SetBotoConfigForTest
26 from gslib
.tests
.util
import unittest
27 from gslib
.util
import IS_WINDOWS
28 from gslib
.util
import Retry
29 from gslib
.util
import UsingCrcmodExtension
31 NO_CHANGES
= 'Building synchronization state...\nStarting synchronization\n'
34 def _TailSet(start_point
, listing
):
35 """Returns set of object name tails.
37 Tails can be compared between source and dest, past the point at which rsync
38 was done. For example if test ran rsync gs://bucket1/dir gs://bucket2/dir2,
39 the tails for listings from bucket1 would start after "dir", while the tails
40 for listings from bucket2 would start after "dir2".
43 start_point: The target of the rsync command, e.g., for the above command it
44 would be gs://bucket1/dir for the bucket1 listing results and
45 gs://bucket2/dir2 for the bucket2 listing results.
46 listing: The listing over which to compute tail.
51 return set(l
[len(start_point
):] for l
in listing
.strip().split('\n'))
53 # TODO: Add inspection to the retry wrappers in this test suite where the state
54 # at the end of a retry block is depended upon by subsequent tests (since
55 # listing content can vary depending on which backend server is reached until
56 # eventual consistency is reached).
57 # TODO: Remove retry wrappers and AssertNObjectsInBucket calls if GCS ever
58 # supports strong listing consistency.
59 class TestRsync(testcase
.GsUtilIntegrationTestCase
):
60 """Integration tests for rsync command."""
63 def _FlatListDir(directory
):
64 """Perform a flat listing over directory.
67 directory: The directory to list
70 Listings with path separators canonicalized to '/', to make assertions
71 easier for Linux vs Windows.
74 for dirpath
, _
, filenames
in os
.walk(directory
):
76 result
.append(os
.path
.join(dirpath
, f
))
77 return '\n'.join(result
).replace('\\', '/')
79 def _FlatListBucket(self
, bucket_url_string
):
80 """Perform a flat listing over bucket_url_string."""
81 return self
.RunGsUtil(['ls', suri(bucket_url_string
, '**')],
84 def test_invalid_args(self
):
85 """Tests various invalid argument cases."""
86 bucket_uri
= self
.CreateBucket()
87 obj1
= self
.CreateObject(bucket_uri
=bucket_uri
, object_name
='obj1',
89 tmpdir
= self
.CreateTempDir()
90 # rsync object to bucket.
91 self
.RunGsUtil(['rsync', suri(obj1
), suri(bucket_uri
)], expected_status
=1)
92 # rsync bucket to object.
93 self
.RunGsUtil(['rsync', suri(bucket_uri
), suri(obj1
)], expected_status
=1)
94 # rsync bucket to non-existent bucket.
95 self
.RunGsUtil(['rsync', suri(bucket_uri
), self
.nonexistent_bucket_name
],
97 # rsync object to dir.
98 self
.RunGsUtil(['rsync', suri(obj1
), tmpdir
], expected_status
=1)
99 # rsync dir to object.
100 self
.RunGsUtil(['rsync', tmpdir
, suri(obj1
)], expected_status
=1)
101 # rsync dir to non-existent bucket.
102 self
.RunGsUtil(['rsync', tmpdir
, suri(obj1
), self
.nonexistent_bucket_name
],
105 # Note: The tests below exercise the cases
106 # {src_dir, src_bucket} X {dst_dir, dst_bucket}. We use gsutil rsync -d for
107 # all the cases but then have just one test without -d (test_bucket_to_bucket)
108 # as representative of handling without the -d option. This provides
109 # reasonable test coverage because the -d handling it src/dest URI-type
110 # independent, and keeps the test case combinations more manageable.
112 def test_bucket_to_bucket(self
):
113 """Tests that flat and recursive rsync between 2 buckets works correctly."""
114 # Create 2 buckets with 1 overlapping object, 1 extra object at root level
115 # in each, and 1 extra object 1 level down in each. Make the overlapping
116 # objects named the same but with different content, to test that we detect
117 # and properly copy in that case.
118 bucket1_uri
= self
.CreateBucket()
119 bucket2_uri
= self
.CreateBucket()
120 self
.CreateObject(bucket_uri
=bucket1_uri
, object_name
='obj1',
122 self
.CreateObject(bucket_uri
=bucket1_uri
, object_name
='obj2',
124 self
.CreateObject(bucket_uri
=bucket1_uri
, object_name
='subdir/obj3',
125 contents
='subdir/obj3')
126 self
.CreateObject(bucket_uri
=bucket2_uri
, object_name
='obj2',
128 self
.CreateObject(bucket_uri
=bucket2_uri
, object_name
='obj4',
130 self
.CreateObject(bucket_uri
=bucket2_uri
, object_name
='subdir/obj5',
131 contents
='subdir/obj5')
133 # Use @Retry as hedge against bucket listing eventual consistency.
134 @Retry(AssertionError, tries
=3, timeout_secs
=1)
136 """Tests rsync works as expected."""
137 self
.RunGsUtil(['rsync', suri(bucket1_uri
), suri(bucket2_uri
)])
138 listing1
= _TailSet(suri(bucket1_uri
), self
._FlatListBucket
(bucket1_uri
))
139 listing2
= _TailSet(suri(bucket2_uri
), self
._FlatListBucket
(bucket2_uri
))
140 # First bucket should have un-altered content.
141 self
.assertEquals(listing1
, set(['/obj1', '/obj2', '/subdir/obj3']))
142 # Second bucket should have new objects added from source bucket (without
143 # removing extraneeous object found in dest bucket), and without the
144 # subdir objects synchronized.
145 self
.assertEquals(listing2
,
146 set(['/obj1', '/obj2', '/obj4', '/subdir/obj5']))
147 # Assert that the src/dest objects that had same length but different
148 # content were correctly synchronized (bucket to bucket sync uses
150 self
.assertEquals('obj2', self
.RunGsUtil(
151 ['cat', suri(bucket1_uri
, 'obj2')], return_stdout
=True))
152 self
.assertEquals('obj2', self
.RunGsUtil(
153 ['cat', suri(bucket2_uri
, 'obj2')], return_stdout
=True))
156 # Use @Retry as hedge against bucket listing eventual consistency.
157 @Retry(AssertionError, tries
=3, timeout_secs
=1)
159 # Check that re-running the same rsync command causes no more changes.
160 self
.assertEquals(NO_CHANGES
, self
.RunGsUtil(
161 ['rsync', suri(bucket1_uri
), suri(bucket2_uri
)], return_stderr
=True))
164 # Now add and remove some objects in each bucket and test rsync -r.
165 self
.CreateObject(bucket_uri
=bucket1_uri
, object_name
='obj6',
167 self
.CreateObject(bucket_uri
=bucket2_uri
, object_name
='obj7',
169 self
.RunGsUtil(['rm', suri(bucket1_uri
, 'obj1')])
170 self
.RunGsUtil(['rm', suri(bucket2_uri
, 'obj2')])
172 # Use @Retry as hedge against bucket listing eventual consistency.
173 @Retry(AssertionError, tries
=3, timeout_secs
=1)
175 self
.RunGsUtil(['rsync', '-r', suri(bucket1_uri
), suri(bucket2_uri
)])
176 listing1
= _TailSet(suri(bucket1_uri
), self
._FlatListBucket
(bucket1_uri
))
177 listing2
= _TailSet(suri(bucket2_uri
), self
._FlatListBucket
(bucket2_uri
))
178 # First bucket should have un-altered content.
179 self
.assertEquals(listing1
, set(['/obj2', '/obj6', '/subdir/obj3']))
180 # Second bucket should have objects tha were newly added to first bucket
181 # (wihout removing extraneous dest bucket objects), and without the
182 # subdir objects synchronized.
183 self
.assertEquals(listing2
, set(['/obj1', '/obj2', '/obj4', '/obj6',
184 '/obj7', '/subdir/obj3',
188 # Use @Retry as hedge against bucket listing eventual consistency.
189 @Retry(AssertionError, tries
=3, timeout_secs
=1)
191 # Check that re-running the same rsync command causes no more changes.
192 self
.assertEquals(NO_CHANGES
, self
.RunGsUtil(
193 ['rsync', '-r', suri(bucket1_uri
), suri(bucket2_uri
)],
197 def test_bucket_to_bucket_minus_d(self
):
198 """Tests that flat and recursive rsync between 2 buckets works correctly."""
199 # Create 2 buckets with 1 overlapping object, 1 extra object at root level
200 # in each, and 1 extra object 1 level down in each. Make the overlapping
201 # objects named the same but with different content, to test that we detect
202 # and properly copy in that case.
203 bucket1_uri
= self
.CreateBucket()
204 bucket2_uri
= self
.CreateBucket()
205 self
.CreateObject(bucket_uri
=bucket1_uri
, object_name
='obj1',
207 self
.CreateObject(bucket_uri
=bucket1_uri
, object_name
='obj2',
209 self
.CreateObject(bucket_uri
=bucket1_uri
, object_name
='subdir/obj3',
210 contents
='subdir/obj3')
211 self
.CreateObject(bucket_uri
=bucket2_uri
, object_name
='obj2',
213 self
.CreateObject(bucket_uri
=bucket2_uri
, object_name
='obj4',
215 self
.CreateObject(bucket_uri
=bucket2_uri
, object_name
='subdir/obj5',
216 contents
='subdir/obj5')
218 # Use @Retry as hedge against bucket listing eventual consistency.
219 @Retry(AssertionError, tries
=3, timeout_secs
=1)
221 """Tests rsync works as expected."""
222 self
.RunGsUtil(['rsync', '-d', suri(bucket1_uri
), suri(bucket2_uri
)])
223 listing1
= _TailSet(suri(bucket1_uri
), self
._FlatListBucket
(bucket1_uri
))
224 listing2
= _TailSet(suri(bucket2_uri
), self
._FlatListBucket
(bucket2_uri
))
225 # First bucket should have un-altered content.
226 self
.assertEquals(listing1
, set(['/obj1', '/obj2', '/subdir/obj3']))
227 # Second bucket should have content like first bucket but without the
228 # subdir objects synchronized.
229 self
.assertEquals(listing2
, set(['/obj1', '/obj2', '/subdir/obj5']))
230 # Assert that the src/dest objects that had same length but different
231 # content were correctly synchronized (bucket to bucket sync uses
233 self
.assertEquals('obj2', self
.RunGsUtil(
234 ['cat', suri(bucket1_uri
, 'obj2')], return_stdout
=True))
235 self
.assertEquals('obj2', self
.RunGsUtil(
236 ['cat', suri(bucket2_uri
, 'obj2')], return_stdout
=True))
239 # Use @Retry as hedge against bucket listing eventual consistency.
240 @Retry(AssertionError, tries
=3, timeout_secs
=1)
242 # Check that re-running the same rsync command causes no more changes.
243 self
.assertEquals(NO_CHANGES
, self
.RunGsUtil(
244 ['rsync', '-d', suri(bucket1_uri
), suri(bucket2_uri
)],
248 # Now add and remove some objects in each bucket and test rsync -r.
249 self
.CreateObject(bucket_uri
=bucket1_uri
, object_name
='obj6',
251 self
.CreateObject(bucket_uri
=bucket2_uri
, object_name
='obj7',
253 self
.RunGsUtil(['rm', suri(bucket1_uri
, 'obj1')])
254 self
.RunGsUtil(['rm', suri(bucket2_uri
, 'obj2')])
256 # Use @Retry as hedge against bucket listing eventual consistency.
257 @Retry(AssertionError, tries
=3, timeout_secs
=1)
259 self
.RunGsUtil(['rsync', '-d', '-r',
260 suri(bucket1_uri
), suri(bucket2_uri
)])
261 listing1
= _TailSet(suri(bucket1_uri
), self
._FlatListBucket
(bucket1_uri
))
262 listing2
= _TailSet(suri(bucket2_uri
), self
._FlatListBucket
(bucket2_uri
))
263 # First bucket should have un-altered content.
264 self
.assertEquals(listing1
, set(['/obj2', '/obj6', '/subdir/obj3']))
265 # Second bucket should have content like first bucket but without the
266 # subdir objects synchronized.
267 self
.assertEquals(listing2
, set(['/obj2', '/obj6', '/subdir/obj3']))
270 # Use @Retry as hedge against bucket listing eventual consistency.
271 @Retry(AssertionError, tries
=3, timeout_secs
=1)
273 # Check that re-running the same rsync command causes no more changes.
274 self
.assertEquals(NO_CHANGES
, self
.RunGsUtil(
275 ['rsync', '-d', '-r', suri(bucket1_uri
), suri(bucket2_uri
)],
279 # Test sequential upload as well as parallel composite upload case.
280 @PerformsFileToObjectUpload
281 @unittest.skipUnless(UsingCrcmodExtension(crcmod
),
282 'Test requires fast crcmod.')
283 def test_dir_to_bucket_minus_d(self
):
284 """Tests that flat and recursive rsync dir to bucket works correctly."""
285 # Create dir and bucket with 1 overlapping object, 1 extra object at root
286 # level in each, and 1 extra object 1 level down in each. Make the
287 # overlapping objects named the same but with different content, to test
288 # that we detect and properly copy in that case.
289 tmpdir
= self
.CreateTempDir()
290 subdir
= os
.path
.join(tmpdir
, 'subdir')
292 bucket_uri
= self
.CreateBucket()
293 self
.CreateTempFile(tmpdir
=tmpdir
, file_name
='obj1', contents
='obj1')
294 self
.CreateTempFile(tmpdir
=tmpdir
, file_name
='obj2', contents
='obj2')
295 self
.CreateTempFile(tmpdir
=subdir
, file_name
='obj3', contents
='subdir/obj3')
296 self
.CreateObject(bucket_uri
=bucket_uri
, object_name
='obj2',
298 self
.CreateObject(bucket_uri
=bucket_uri
, object_name
='obj4',
300 self
.CreateObject(bucket_uri
=bucket_uri
, object_name
='subdir/obj5',
301 contents
='subdir/obj5')
303 # Need to make sure the bucket listing is caught-up, otherwise the
304 # first rsync may not see obj2 and overwrite it.
305 self
.AssertNObjectsInBucket(bucket_uri
, 3)
307 # Use @Retry as hedge against bucket listing eventual consistency.
308 @Retry(AssertionError, tries
=3, timeout_secs
=1)
310 """Tests rsync works as expected."""
311 self
.RunGsUtil(['rsync', '-d', tmpdir
, suri(bucket_uri
)])
312 listing1
= _TailSet(tmpdir
, self
._FlatListDir
(tmpdir
))
313 listing2
= _TailSet(suri(bucket_uri
), self
._FlatListBucket
(bucket_uri
))
314 # Dir should have un-altered content.
315 self
.assertEquals(listing1
, set(['/obj1', '/obj2', '/subdir/obj3']))
316 # Bucket should have content like dir but without the subdir objects
318 self
.assertEquals(listing2
, set(['/obj1', '/obj2', '/subdir/obj5']))
319 # Assert that the src/dest objects that had same length but different
320 # content were not synchronized (dir to bucket sync doesn't use checksums
321 # unless you specify -c).
322 with
open(os
.path
.join(tmpdir
, 'obj2')) as f
:
323 self
.assertEquals('obj2', '\n'.join(f
.readlines()))
324 self
.assertEquals('OBJ2', self
.RunGsUtil(
325 ['cat', suri(bucket_uri
, 'obj2')], return_stdout
=True))
328 # Use @Retry as hedge against bucket listing eventual consistency.
329 @Retry(AssertionError, tries
=3, timeout_secs
=1)
331 # Check that re-running the same rsync command causes no more changes.
332 self
.assertEquals(NO_CHANGES
, self
.RunGsUtil(
333 ['rsync', '-d', tmpdir
, suri(bucket_uri
)], return_stderr
=True))
336 # Now rerun the sync with the -c option.
337 # Use @Retry as hedge against bucket listing eventual consistency.
338 @Retry(AssertionError, tries
=3, timeout_secs
=1)
340 """Tests rsync -c works as expected."""
341 self
.RunGsUtil(['rsync', '-d', '-c', tmpdir
, suri(bucket_uri
)])
342 listing1
= _TailSet(tmpdir
, self
._FlatListDir
(tmpdir
))
343 listing2
= _TailSet(suri(bucket_uri
), self
._FlatListBucket
(bucket_uri
))
344 # Dir should have un-altered content.
345 self
.assertEquals(listing1
, set(['/obj1', '/obj2', '/subdir/obj3']))
346 # Bucket should have content like dir but without the subdir objects
348 self
.assertEquals(listing2
, set(['/obj1', '/obj2', '/subdir/obj5']))
349 # Assert that the src/dest objects that had same length but different
350 # content were synchronized (dir to bucket sync with -c uses checksums).
351 with
open(os
.path
.join(tmpdir
, 'obj2')) as f
:
352 self
.assertEquals('obj2', '\n'.join(f
.readlines()))
353 self
.assertEquals('obj2', self
.RunGsUtil(
354 ['cat', suri(bucket_uri
, 'obj2')], return_stdout
=True))
357 # Use @Retry as hedge against bucket listing eventual consistency.
358 @Retry(AssertionError, tries
=3, timeout_secs
=1)
360 # Check that re-running the same rsync command causes no more changes.
361 self
.assertEquals(NO_CHANGES
, self
.RunGsUtil(
362 ['rsync', '-d', '-c', tmpdir
, suri(bucket_uri
)], return_stderr
=True))
365 # Now add and remove some objects in dir and bucket and test rsync -r.
366 self
.CreateTempFile(tmpdir
=tmpdir
, file_name
='obj6', contents
='obj6')
367 self
.CreateObject(bucket_uri
=bucket_uri
, object_name
='obj7',
369 os
.unlink(os
.path
.join(tmpdir
, 'obj1'))
370 self
.RunGsUtil(['rm', suri(bucket_uri
, 'obj2')])
372 # Use @Retry as hedge against bucket listing eventual consistency.
373 @Retry(AssertionError, tries
=3, timeout_secs
=1)
375 self
.RunGsUtil(['rsync', '-d', '-r', tmpdir
, suri(bucket_uri
)])
376 listing1
= _TailSet(tmpdir
, self
._FlatListDir
(tmpdir
))
377 listing2
= _TailSet(suri(bucket_uri
), self
._FlatListBucket
(bucket_uri
))
378 # Dir should have un-altered content.
379 self
.assertEquals(listing1
, set(['/obj2', '/obj6', '/subdir/obj3']))
380 # Bucket should have content like dir but without the subdir objects
382 self
.assertEquals(listing2
, set(['/obj2', '/obj6', '/subdir/obj3']))
385 # Use @Retry as hedge against bucket listing eventual consistency.
386 @Retry(AssertionError, tries
=3, timeout_secs
=1)
388 # Check that re-running the same rsync command causes no more changes.
389 self
.assertEquals(NO_CHANGES
, self
.RunGsUtil(
390 ['rsync', '-d', '-r', tmpdir
, suri(bucket_uri
)], return_stderr
=True))
393 @unittest.skipUnless(UsingCrcmodExtension(crcmod
),
394 'Test requires fast crcmod.')
395 def test_dir_to_dir_minus_d(self
):
396 """Tests that flat and recursive rsync dir to dir works correctly."""
397 # Create 2 dirs with 1 overlapping file, 1 extra file at root
398 # level in each, and 1 extra file 1 level down in each. Make the
399 # overlapping files named the same but with different content, to test
400 # that we detect and properly copy in that case.
401 tmpdir1
= self
.CreateTempDir()
402 tmpdir2
= self
.CreateTempDir()
403 subdir1
= os
.path
.join(tmpdir1
, 'subdir1')
404 subdir2
= os
.path
.join(tmpdir2
, 'subdir2')
407 self
.CreateTempFile(tmpdir
=tmpdir1
, file_name
='obj1', contents
='obj1')
408 self
.CreateTempFile(tmpdir
=tmpdir1
, file_name
='obj2', contents
='obj2')
410 tmpdir
=subdir1
, file_name
='obj3', contents
='subdir1/obj3')
411 self
.CreateTempFile(tmpdir
=tmpdir2
, file_name
='obj2', contents
='OBJ2')
412 self
.CreateTempFile(tmpdir
=tmpdir2
, file_name
='obj4', contents
='obj4')
414 tmpdir
=subdir2
, file_name
='obj5', contents
='subdir2/obj5')
416 self
.RunGsUtil(['rsync', '-d', tmpdir1
, tmpdir2
])
417 listing1
= _TailSet(tmpdir1
, self
._FlatListDir
(tmpdir1
))
418 listing2
= _TailSet(tmpdir2
, self
._FlatListDir
(tmpdir2
))
419 # dir1 should have un-altered content.
420 self
.assertEquals(listing1
, set(['/obj1', '/obj2', '/subdir1/obj3']))
421 # dir2 should have content like dir1 but without the subdir1 objects
423 self
.assertEquals(listing2
, set(['/obj1', '/obj2', '/subdir2/obj5']))
424 # Assert that the src/dest objects that had same length but different
425 # checksums were not synchronized (dir to dir sync doesn't use checksums
426 # unless you specify -c).
427 with
open(os
.path
.join(tmpdir1
, 'obj2')) as f
:
428 self
.assertEquals('obj2', '\n'.join(f
.readlines()))
429 with
open(os
.path
.join(tmpdir2
, 'obj2')) as f
:
430 self
.assertEquals('OBJ2', '\n'.join(f
.readlines()))
432 # Use @Retry as hedge against bucket listing eventual consistency.
433 @Retry(AssertionError, tries
=3, timeout_secs
=1)
435 # Check that re-running the same rsync command causes no more changes.
436 self
.assertEquals(NO_CHANGES
, self
.RunGsUtil(
437 ['rsync', '-d', tmpdir1
, tmpdir2
], return_stderr
=True))
440 # Now rerun the sync with the -c option.
441 self
.RunGsUtil(['rsync', '-d', '-c', tmpdir1
, tmpdir2
])
442 listing1
= _TailSet(tmpdir1
, self
._FlatListDir
(tmpdir1
))
443 listing2
= _TailSet(tmpdir2
, self
._FlatListDir
(tmpdir2
))
444 # dir1 should have un-altered content.
445 self
.assertEquals(listing1
, set(['/obj1', '/obj2', '/subdir1/obj3']))
446 # dir2 should have content like dir but without the subdir objects
448 self
.assertEquals(listing2
, set(['/obj1', '/obj2', '/subdir2/obj5']))
449 # Assert that the src/dest objects that had same length but different
450 # content were synchronized (dir to dir sync with -c uses checksums).
451 with
open(os
.path
.join(tmpdir1
, 'obj2')) as f
:
452 self
.assertEquals('obj2', '\n'.join(f
.readlines()))
453 with
open(os
.path
.join(tmpdir1
, 'obj2')) as f
:
454 self
.assertEquals('obj2', '\n'.join(f
.readlines()))
456 # Use @Retry as hedge against bucket listing eventual consistency.
457 @Retry(AssertionError, tries
=3, timeout_secs
=1)
459 # Check that re-running the same rsync command causes no more changes.
460 self
.assertEquals(NO_CHANGES
, self
.RunGsUtil(
461 ['rsync', '-d', '-c', tmpdir1
, tmpdir2
], return_stderr
=True))
464 # Now add and remove some objects in both dirs and test rsync -r.
465 self
.CreateTempFile(tmpdir
=tmpdir1
, file_name
='obj6', contents
='obj6')
466 self
.CreateTempFile(tmpdir
=tmpdir2
, file_name
='obj7', contents
='obj7')
467 os
.unlink(os
.path
.join(tmpdir1
, 'obj1'))
468 os
.unlink(os
.path
.join(tmpdir2
, 'obj2'))
470 self
.RunGsUtil(['rsync', '-d', '-r', tmpdir1
, tmpdir2
])
471 listing1
= _TailSet(tmpdir1
, self
._FlatListDir
(tmpdir1
))
472 listing2
= _TailSet(tmpdir2
, self
._FlatListDir
(tmpdir2
))
473 # dir1 should have un-altered content.
474 self
.assertEquals(listing1
, set(['/obj2', '/obj6', '/subdir1/obj3']))
475 # dir2 should have content like dir but without the subdir objects
477 self
.assertEquals(listing2
, set(['/obj2', '/obj6', '/subdir1/obj3']))
479 # Use @Retry as hedge against bucket listing eventual consistency.
480 @Retry(AssertionError, tries
=3, timeout_secs
=1)
482 # Check that re-running the same rsync command causes no more changes.
483 self
.assertEquals(NO_CHANGES
, self
.RunGsUtil(
484 ['rsync', '-d', '-r', tmpdir1
, tmpdir2
], return_stderr
=True))
487 def test_dir_to_dir_minus_d_more_files_than_bufsize(self
):
488 """Tests concurrently building listing from multiple tmp file ranges."""
489 # Create 2 dirs, where each dir has 1000 objects and differing names.
490 tmpdir1
= self
.CreateTempDir()
491 tmpdir2
= self
.CreateTempDir()
492 for i
in range(0, 1000):
493 self
.CreateTempFile(tmpdir
=tmpdir1
, file_name
='d1-%s' %i, contents
='x')
494 self
.CreateTempFile(tmpdir
=tmpdir2
, file_name
='d2-%s' %i, contents
='y')
496 # We open a new temp file each time we reach rsync_buffer_lines of
497 # listing output. On Windows, this will result in a 'too many open file
498 # handles' error, so choose a larger value so as not to open so many files.
499 rsync_buffer_config
= [('GSUtil', 'rsync_buffer_lines',
500 '50' if IS_WINDOWS
else '2')]
501 # Run gsutil with config option to make buffer size << # files.
502 with
SetBotoConfigForTest(rsync_buffer_config
):
503 self
.RunGsUtil(['rsync', '-d', tmpdir1
, tmpdir2
])
504 listing1
= _TailSet(tmpdir1
, self
._FlatListDir
(tmpdir1
))
505 listing2
= _TailSet(tmpdir2
, self
._FlatListDir
(tmpdir2
))
506 self
.assertEquals(listing1
, listing2
)
508 # Use @Retry as hedge against bucket listing eventual consistency.
509 @Retry(AssertionError, tries
=3, timeout_secs
=1)
511 # Check that re-running the same rsync command causes no more changes.
512 self
.assertEquals(NO_CHANGES
, self
.RunGsUtil(
513 ['rsync', '-d', tmpdir1
, tmpdir2
], return_stderr
=True))
516 @unittest.skipUnless(UsingCrcmodExtension(crcmod
),
517 'Test requires fast crcmod.')
518 def test_bucket_to_dir_minus_d(self
):
519 """Tests that flat and recursive rsync bucket to dir works correctly."""
520 # Create bucket and dir with 1 overlapping object, 1 extra object at root
521 # level in each, and 1 extra object 1 level down in each. Make the
522 # overlapping objects named the same but with different content, to test
523 # that we detect and properly copy in that case.
524 bucket_uri
= self
.CreateBucket()
525 tmpdir
= self
.CreateTempDir()
526 subdir
= os
.path
.join(tmpdir
, 'subdir')
528 self
.CreateObject(bucket_uri
=bucket_uri
, object_name
='obj1',
530 self
.CreateObject(bucket_uri
=bucket_uri
, object_name
='obj2',
532 self
.CreateObject(bucket_uri
=bucket_uri
, object_name
='subdir/obj3',
533 contents
='subdir/obj3')
534 self
.CreateTempFile(tmpdir
=tmpdir
, file_name
='obj2', contents
='OBJ2')
535 self
.CreateTempFile(tmpdir
=tmpdir
, file_name
='obj4', contents
='obj4')
536 self
.CreateTempFile(tmpdir
=subdir
, file_name
='obj5', contents
='subdir/obj5')
538 # Use @Retry as hedge against bucket listing eventual consistency.
539 @Retry(AssertionError, tries
=3, timeout_secs
=1)
541 """Tests rsync works as expected."""
542 self
.RunGsUtil(['rsync', '-d', suri(bucket_uri
), tmpdir
])
543 listing1
= _TailSet(suri(bucket_uri
), self
._FlatListBucket
(bucket_uri
))
544 listing2
= _TailSet(tmpdir
, self
._FlatListDir
(tmpdir
))
545 # Bucket should have un-altered content.
546 self
.assertEquals(listing1
, set(['/obj1', '/obj2', '/subdir/obj3']))
547 # Dir should have content like bucket but without the subdir objects
549 self
.assertEquals(listing2
, set(['/obj1', '/obj2', '/subdir/obj5']))
550 # Assert that the src/dest objects that had same length but different
551 # content were not synchronized (bucket to dir sync doesn't use checksums
552 # unless you specify -c).
553 self
.assertEquals('obj2', self
.RunGsUtil(
554 ['cat', suri(bucket_uri
, 'obj2')], return_stdout
=True))
555 with
open(os
.path
.join(tmpdir
, 'obj2')) as f
:
556 self
.assertEquals('OBJ2', '\n'.join(f
.readlines()))
559 # Use @Retry as hedge against bucket listing eventual consistency.
560 @Retry(AssertionError, tries
=3, timeout_secs
=1)
562 # Check that re-running the same rsync command causes no more changes.
563 self
.assertEquals(NO_CHANGES
, self
.RunGsUtil(
564 ['rsync', '-d', suri(bucket_uri
), tmpdir
], return_stderr
=True))
567 # Now rerun the sync with the -c option.
568 # Use @Retry as hedge against bucket listing eventual consistency.
569 @Retry(AssertionError, tries
=3, timeout_secs
=1)
571 """Tests rsync -c works as expected."""
572 self
.RunGsUtil(['rsync', '-d', '-c', suri(bucket_uri
), tmpdir
])
573 listing1
= _TailSet(suri(bucket_uri
), self
._FlatListBucket
(bucket_uri
))
574 listing2
= _TailSet(tmpdir
, self
._FlatListDir
(tmpdir
))
575 # Bucket should have un-altered content.
576 self
.assertEquals(listing1
, set(['/obj1', '/obj2', '/subdir/obj3']))
577 # Dir should have content like bucket but without the subdir objects
579 self
.assertEquals(listing2
, set(['/obj1', '/obj2', '/subdir/obj5']))
580 # Assert that the src/dest objects that had same length but different
581 # content were synchronized (bucket to dir sync with -c uses checksums).
582 self
.assertEquals('obj2', self
.RunGsUtil(
583 ['cat', suri(bucket_uri
, 'obj2')], return_stdout
=True))
584 with
open(os
.path
.join(tmpdir
, 'obj2')) as f
:
585 self
.assertEquals('obj2', '\n'.join(f
.readlines()))
588 # Use @Retry as hedge against bucket listing eventual consistency.
589 @Retry(AssertionError, tries
=3, timeout_secs
=1)
591 # Check that re-running the same rsync command causes no more changes.
592 self
.assertEquals(NO_CHANGES
, self
.RunGsUtil(
593 ['rsync', '-d', '-c', suri(bucket_uri
), tmpdir
], return_stderr
=True))
596 # Now add and remove some objects in dir and bucket and test rsync -r.
597 self
.CreateObject(bucket_uri
=bucket_uri
, object_name
='obj6',
599 self
.CreateTempFile(tmpdir
=tmpdir
, file_name
='obj7', contents
='obj7')
600 self
.RunGsUtil(['rm', suri(bucket_uri
, 'obj1')])
601 os
.unlink(os
.path
.join(tmpdir
, 'obj2'))
603 # Use @Retry as hedge against bucket listing eventual consistency.
604 @Retry(AssertionError, tries
=3, timeout_secs
=1)
606 self
.RunGsUtil(['rsync', '-d', '-r', suri(bucket_uri
), tmpdir
])
607 listing1
= _TailSet(suri(bucket_uri
), self
._FlatListBucket
(bucket_uri
))
608 listing2
= _TailSet(tmpdir
, self
._FlatListDir
(tmpdir
))
609 # Bucket should have un-altered content.
610 self
.assertEquals(listing1
, set(['/obj2', '/obj6', '/subdir/obj3']))
611 # Dir should have content like bucket but without the subdir objects
613 self
.assertEquals(listing2
, set(['/obj2', '/obj6', '/subdir/obj3']))
616 # Use @Retry as hedge against bucket listing eventual consistency.
617 @Retry(AssertionError, tries
=3, timeout_secs
=1)
619 # Check that re-running the same rsync command causes no more changes.
620 self
.assertEquals(NO_CHANGES
, self
.RunGsUtil(
621 ['rsync', '-d', '-r', suri(bucket_uri
), tmpdir
], return_stderr
=True))
624 def test_bucket_to_dir_minus_d_with_fname_case_change(self
):
625 """Tests that name case changes work correctly.
629 Windows filenames are case-preserving in what you wrote, but case-
630 insensitive when compared. If you synchronize from FS to cloud and then
631 change case-naming in local files, you could end up with this situation:
633 Cloud copy is called .../TiVo/...
634 FS copy is called .../Tivo/...
636 Then, if you sync from cloud to FS, if rsync doesn't recognize that on
637 Windows these names are identical, each rsync run will cause both a copy
638 and a delete to be executed.
640 # Create bucket and dir with same objects, but dir copy has different name
642 bucket_uri
= self
.CreateBucket()
643 tmpdir
= self
.CreateTempDir()
644 self
.CreateObject(bucket_uri
=bucket_uri
, object_name
='obj1',
646 self
.CreateTempFile(tmpdir
=tmpdir
, file_name
='Obj1', contents
='obj1')
648 # Use @Retry as hedge against bucket listing eventual consistency.
649 @Retry(AssertionError, tries
=3, timeout_secs
=1)
651 """Tests rsync works as expected."""
652 output
= self
.RunGsUtil(
653 ['rsync', '-d', '-r', suri(bucket_uri
), tmpdir
], return_stderr
=True)
654 # Nothing should be copied or removed under Windows.
656 self
.assertEquals(NO_CHANGES
, output
)
658 self
.assertNotEquals(NO_CHANGES
, output
)
661 def test_bucket_to_dir_minus_d_with_leftover_dir_placeholder(self
):
662 """Tests that we correctly handle leftover dir placeholders.
664 See comments in gslib.commands.rsync._FieldedListingIterator for details.
666 bucket_uri
= self
.CreateBucket()
667 tmpdir
= self
.CreateTempDir()
668 self
.CreateObject(bucket_uri
=bucket_uri
, object_name
='obj1',
670 # Create a placeholder like what can be left over by web GUI tools.
671 key_uri
= bucket_uri
.clone_replace_name('/')
672 key_uri
.set_contents_from_string('')
674 # Use @Retry as hedge against bucket listing eventual consistency.
675 @Retry(AssertionError, tries
=3, timeout_secs
=1)
677 """Tests rsync works as expected."""
678 output
= self
.RunGsUtil(
679 ['rsync', '-d', '-r', suri(bucket_uri
), tmpdir
], return_stderr
=True)
680 listing1
= _TailSet(suri(bucket_uri
), self
._FlatListBucket
(bucket_uri
))
681 listing2
= _TailSet(tmpdir
, self
._FlatListDir
(tmpdir
))
682 # Bucket should have un-altered content.
683 self
.assertEquals(listing1
, set(['/obj1', '//']))
684 # Bucket should not have the placeholder object.
685 self
.assertEquals(listing2
, set(['/obj1']))
686 # Stdout should report what happened.
687 self
.assertRegexpMatches(output
, r
'.*Skipping cloud sub-directory.*')
690 @unittest.skipIf(IS_WINDOWS
, 'os.symlink() is not available on Windows.')
691 def test_rsync_minus_d_minus_e(self
):
692 """Tests that rsync -e ignores symlinks."""
693 tmpdir
= self
.CreateTempDir()
694 subdir
= os
.path
.join(tmpdir
, 'subdir')
696 bucket_uri
= self
.CreateBucket()
697 fpath1
= self
.CreateTempFile(
698 tmpdir
=tmpdir
, file_name
='obj1', contents
='obj1')
699 self
.CreateTempFile(tmpdir
=tmpdir
, file_name
='obj2', contents
='obj2')
700 self
.CreateTempFile(tmpdir
=subdir
, file_name
='obj3', contents
='subdir/obj3')
701 good_symlink_path
= os
.path
.join(tmpdir
, 'symlink1')
702 os
.symlink(fpath1
, good_symlink_path
)
703 # Make a symlink that points to a non-existent path to test that -e also
705 bad_symlink_path
= os
.path
.join(tmpdir
, 'symlink2')
706 os
.symlink(os
.path
.join('/', 'non-existent'), bad_symlink_path
)
707 self
.CreateObject(bucket_uri
=bucket_uri
, object_name
='obj2',
709 self
.CreateObject(bucket_uri
=bucket_uri
, object_name
='obj4',
711 self
.CreateObject(bucket_uri
=bucket_uri
, object_name
='subdir/obj5',
712 contents
='subdir/obj5')
714 # Use @Retry as hedge against bucket listing eventual consistency.
715 @Retry(AssertionError, tries
=3, timeout_secs
=1)
717 """Ensure listings match the commented expectations."""
718 self
.RunGsUtil(['rsync', '-d', '-e', tmpdir
, suri(bucket_uri
)])
719 listing1
= _TailSet(tmpdir
, self
._FlatListDir
(tmpdir
))
720 listing2
= _TailSet(suri(bucket_uri
), self
._FlatListBucket
(bucket_uri
))
721 # Dir should have un-altered content.
724 set(['/obj1', '/obj2', '/subdir/obj3', '/symlink1', '/symlink2']))
725 # Bucket should have content like dir but without the symlink, and
726 # without subdir objects synchronized.
727 self
.assertEquals(listing2
, set(['/obj1', '/obj2', '/subdir/obj5']))
730 # Now remove invalid symlink and run without -e, and see that symlink gets
731 # copied (as file to which it points). Use @Retry as hedge against bucket
732 # listing eventual consistency.
733 os
.unlink(bad_symlink_path
)
734 @Retry(AssertionError, tries
=3, timeout_secs
=1)
736 """Tests rsync works as expected."""
737 self
.RunGsUtil(['rsync', '-d', tmpdir
, suri(bucket_uri
)])
738 listing1
= _TailSet(tmpdir
, self
._FlatListDir
(tmpdir
))
739 listing2
= _TailSet(suri(bucket_uri
), self
._FlatListBucket
(bucket_uri
))
740 # Dir should have un-altered content.
742 listing1
, set(['/obj1', '/obj2', '/subdir/obj3', '/symlink1']))
743 # Bucket should have content like dir but without the symlink, and
744 # without subdir objects synchronized.
746 listing2
, set(['/obj1', '/obj2', '/subdir/obj5', '/symlink1']))
747 self
.assertEquals('obj1', self
.RunGsUtil(
748 ['cat', suri(bucket_uri
, 'symlink1')], return_stdout
=True))
751 # Use @Retry as hedge against bucket listing eventual consistency.
752 @Retry(AssertionError, tries
=3, timeout_secs
=1)
754 # Check that re-running the same rsync command causes no more changes.
755 self
.assertEquals(NO_CHANGES
, self
.RunGsUtil(
756 ['rsync', '-d', tmpdir
, suri(bucket_uri
)], return_stderr
=True))
759 @SkipForS3('S3 does not support composite objects')
760 def test_bucket_to_bucket_minus_d_with_composites(self
):
761 """Tests that rsync works with composite objects (which don't have MD5s)."""
762 bucket1_uri
= self
.CreateBucket()
763 bucket2_uri
= self
.CreateBucket()
764 self
.CreateObject(bucket_uri
=bucket1_uri
, object_name
='obj1',
766 self
.CreateObject(bucket_uri
=bucket1_uri
, object_name
='obj2',
769 ['compose', suri(bucket1_uri
, 'obj1'), suri(bucket1_uri
, 'obj2'),
770 suri(bucket1_uri
, 'obj3')])
771 self
.CreateObject(bucket_uri
=bucket2_uri
, object_name
='obj2',
773 self
.CreateObject(bucket_uri
=bucket2_uri
, object_name
='obj4',
776 # Use @Retry as hedge against bucket listing eventual consistency.
777 @Retry(AssertionError, tries
=3, timeout_secs
=1)
779 self
.RunGsUtil(['rsync', '-d', suri(bucket1_uri
), suri(bucket2_uri
)])
780 listing1
= _TailSet(suri(bucket1_uri
), self
._FlatListBucket
(bucket1_uri
))
781 listing2
= _TailSet(suri(bucket2_uri
), self
._FlatListBucket
(bucket2_uri
))
782 # First bucket should have un-altered content.
783 self
.assertEquals(listing1
, set(['/obj1', '/obj2', '/obj3']))
784 # Second bucket should have content like first bucket but without the
785 # subdir objects synchronized.
786 self
.assertEquals(listing2
, set(['/obj1', '/obj2', '/obj3']))
789 # Use @Retry as hedge against bucket listing eventual consistency.
790 @Retry(AssertionError, tries
=3, timeout_secs
=1)
792 # Check that re-running the same rsync command causes no more changes.
793 self
.assertEquals(NO_CHANGES
, self
.RunGsUtil(
794 ['rsync', '-d', suri(bucket1_uri
), suri(bucket2_uri
)],
798 def test_bucket_to_bucket_minus_d_empty_dest(self
):
799 """Tests working with empty dest bucket (iter runs out before src iter)."""
800 bucket1_uri
= self
.CreateBucket()
801 bucket2_uri
= self
.CreateBucket()
802 self
.CreateObject(bucket_uri
=bucket1_uri
, object_name
='obj1',
804 self
.CreateObject(bucket_uri
=bucket1_uri
, object_name
='obj2',
807 # Use @Retry as hedge against bucket listing eventual consistency.
808 @Retry(AssertionError, tries
=3, timeout_secs
=1)
810 self
.RunGsUtil(['rsync', '-d', suri(bucket1_uri
), suri(bucket2_uri
)])
811 listing1
= _TailSet(suri(bucket1_uri
), self
._FlatListBucket
(bucket1_uri
))
812 listing2
= _TailSet(suri(bucket2_uri
), self
._FlatListBucket
(bucket2_uri
))
813 self
.assertEquals(listing1
, set(['/obj1', '/obj2']))
814 self
.assertEquals(listing2
, set(['/obj1', '/obj2']))
817 # Use @Retry as hedge against bucket listing eventual consistency.
818 @Retry(AssertionError, tries
=3, timeout_secs
=1)
820 # Check that re-running the same rsync command causes no more changes.
821 self
.assertEquals(NO_CHANGES
, self
.RunGsUtil(
822 ['rsync', '-d', suri(bucket1_uri
), suri(bucket2_uri
)],
826 def test_bucket_to_bucket_minus_d_empty_src(self
):
827 """Tests working with empty src bucket (iter runs out before dst iter)."""
828 bucket1_uri
= self
.CreateBucket()
829 bucket2_uri
= self
.CreateBucket()
830 self
.CreateObject(bucket_uri
=bucket2_uri
, object_name
='obj1',
832 self
.CreateObject(bucket_uri
=bucket2_uri
, object_name
='obj2',
835 # Use @Retry as hedge against bucket listing eventual consistency.
836 @Retry(AssertionError, tries
=3, timeout_secs
=1)
838 self
.RunGsUtil(['rsync', '-d', suri(bucket1_uri
), suri(bucket2_uri
)])
839 stderr
= self
.RunGsUtil(['ls', suri(bucket1_uri
, '**')],
840 expected_status
=1, return_stderr
=True)
841 self
.assertIn('One or more URLs matched no objects', stderr
)
842 stderr
= self
.RunGsUtil(['ls', suri(bucket2_uri
, '**')],
843 expected_status
=1, return_stderr
=True)
844 self
.assertIn('One or more URLs matched no objects', stderr
)
847 # Use @Retry as hedge against bucket listing eventual consistency.
848 @Retry(AssertionError, tries
=3, timeout_secs
=1)
850 # Check that re-running the same rsync command causes no more changes.
851 self
.assertEquals(NO_CHANGES
, self
.RunGsUtil(
852 ['rsync', '-d', suri(bucket1_uri
), suri(bucket2_uri
)],
856 def test_rsync_minus_d_minus_p(self
):
857 """Tests that rsync -p preserves ACLs."""
858 bucket1_uri
= self
.CreateBucket()
859 bucket2_uri
= self
.CreateBucket()
860 self
.CreateObject(bucket_uri
=bucket1_uri
, object_name
='obj1',
862 # Set public-read (non-default) ACL so we can verify that rsync -p works.
863 self
.RunGsUtil(['acl', 'set', 'public-read', suri(bucket1_uri
, 'obj1')])
865 # Use @Retry as hedge against bucket listing eventual consistency.
866 @Retry(AssertionError, tries
=3, timeout_secs
=1)
868 """Tests rsync -p works as expected."""
869 self
.RunGsUtil(['rsync', '-d', '-p', suri(bucket1_uri
),
871 listing1
= _TailSet(suri(bucket1_uri
), self
._FlatListBucket
(bucket1_uri
))
872 listing2
= _TailSet(suri(bucket2_uri
), self
._FlatListBucket
(bucket2_uri
))
873 self
.assertEquals(listing1
, set(['/obj1']))
874 self
.assertEquals(listing2
, set(['/obj1']))
875 acl1_json
= self
.RunGsUtil(['acl', 'get', suri(bucket1_uri
, 'obj1')],
877 acl2_json
= self
.RunGsUtil(['acl', 'get', suri(bucket2_uri
, 'obj1')],
879 self
.assertEquals(acl1_json
, acl2_json
)
882 # Use @Retry as hedge against bucket listing eventual consistency.
883 @Retry(AssertionError, tries
=3, timeout_secs
=1)
885 # Check that re-running the same rsync command causes no more changes.
886 self
.assertEquals(NO_CHANGES
, self
.RunGsUtil(
887 ['rsync', '-d', '-p', suri(bucket1_uri
), suri(bucket2_uri
)],
891 def test_rsync_to_nonexistent_bucket_subdir(self
):
892 """Tests that rsync to non-existent bucket subdir works."""
893 # Create dir with some objects and empty bucket.
894 tmpdir
= self
.CreateTempDir()
895 subdir
= os
.path
.join(tmpdir
, 'subdir')
897 bucket_url
= self
.CreateBucket()
898 self
.CreateTempFile(tmpdir
=tmpdir
, file_name
='obj1', contents
='obj1')
899 self
.CreateTempFile(tmpdir
=tmpdir
, file_name
='obj2', contents
='obj2')
900 self
.CreateTempFile(tmpdir
=subdir
, file_name
='obj3', contents
='subdir/obj3')
902 # Use @Retry as hedge against bucket listing eventual consistency.
903 @Retry(AssertionError, tries
=3, timeout_secs
=1)
905 """Tests rsync works as expected."""
906 self
.RunGsUtil(['rsync', '-r', tmpdir
, suri(bucket_url
, 'subdir')])
907 listing1
= _TailSet(tmpdir
, self
._FlatListDir
(tmpdir
))
909 suri(bucket_url
, 'subdir'),
910 self
._FlatListBucket
(bucket_url
.clone_replace_name('subdir')))
911 # Dir should have un-altered content.
912 self
.assertEquals(listing1
, set(['/obj1', '/obj2', '/subdir/obj3']))
913 # Bucket subdir should have content like dir.
914 self
.assertEquals(listing2
, set(['/obj1', '/obj2', '/subdir/obj3']))
917 # Use @Retry as hedge against bucket listing eventual consistency.
918 @Retry(AssertionError, tries
=3, timeout_secs
=1)
920 # Check that re-running the same rsync command causes no more changes.
921 self
.assertEquals(NO_CHANGES
, self
.RunGsUtil(
922 ['rsync', '-r', tmpdir
, suri(bucket_url
, 'subdir')],
926 def test_rsync_from_nonexistent_bucket(self
):
927 """Tests that rsync from a non-existent bucket subdir fails gracefully."""
928 tmpdir
= self
.CreateTempDir()
929 self
.CreateTempFile(tmpdir
=tmpdir
, file_name
='obj1', contents
='obj1')
930 self
.CreateTempFile(tmpdir
=tmpdir
, file_name
='obj2', contents
='obj2')
931 bucket_url_str
= '%s://%s' % (
932 self
.default_provider
, self
.nonexistent_bucket_name
)
933 stderr
= self
.RunGsUtil(['rsync', '-d', bucket_url_str
, tmpdir
],
934 expected_status
=1, return_stderr
=True)
935 self
.assertIn('Caught non-retryable exception', stderr
)
936 listing
= _TailSet(tmpdir
, self
._FlatListDir
(tmpdir
))
937 # Dir should have un-altered content.
938 self
.assertEquals(listing
, set(['/obj1', '/obj2']))
940 def test_rsync_to_nonexistent_bucket(self
):
941 """Tests that rsync from a non-existent bucket subdir fails gracefully."""
942 tmpdir
= self
.CreateTempDir()
943 self
.CreateTempFile(tmpdir
=tmpdir
, file_name
='obj1', contents
='obj1')
944 self
.CreateTempFile(tmpdir
=tmpdir
, file_name
='obj2', contents
='obj2')
945 bucket_url_str
= '%s://%s' % (
946 self
.default_provider
, self
.nonexistent_bucket_name
)
947 stderr
= self
.RunGsUtil(['rsync', '-d', bucket_url_str
, tmpdir
],
948 expected_status
=1, return_stderr
=True)
949 self
.assertIn('Caught non-retryable exception', stderr
)
950 listing
= _TailSet(tmpdir
, self
._FlatListDir
(tmpdir
))
951 # Dir should have un-altered content.
952 self
.assertEquals(listing
, set(['/obj1', '/obj2']))
954 def test_bucket_to_bucket_minus_d_with_overwrite_and_punc_chars(self
):
955 """Tests that punc chars in filenames don't confuse sort order."""
956 bucket1_uri
= self
.CreateBucket()
957 bucket2_uri
= self
.CreateBucket()
958 # Create 2 objects in each bucket, with one overwritten with a name that's
959 # less than the next name in destination bucket when encoded, but not when
960 # compared without encoding.
961 self
.CreateObject(bucket_uri
=bucket1_uri
, object_name
='e/obj1',
963 self
.CreateObject(bucket_uri
=bucket1_uri
, object_name
='e-1/obj2',
965 self
.CreateObject(bucket_uri
=bucket2_uri
, object_name
='e/obj1',
967 self
.CreateObject(bucket_uri
=bucket2_uri
, object_name
='e-1/obj2',
969 # Need to make sure the bucket listings are caught-up, otherwise the
970 # rsync may not see all objects and fail to synchronize correctly.
971 self
.AssertNObjectsInBucket(bucket1_uri
, 2)
972 self
.AssertNObjectsInBucket(bucket2_uri
, 2)
974 # Use @Retry as hedge against bucket listing eventual consistency.
975 @Retry(AssertionError, tries
=3, timeout_secs
=1)
977 """Tests rsync works as expected."""
978 self
.RunGsUtil(['rsync', '-rd', suri(bucket1_uri
), suri(bucket2_uri
)])
979 listing1
= _TailSet(suri(bucket1_uri
), self
._FlatListBucket
(bucket1_uri
))
980 listing2
= _TailSet(suri(bucket2_uri
), self
._FlatListBucket
(bucket2_uri
))
981 # First bucket should have un-altered content.
982 self
.assertEquals(listing1
, set(['/e/obj1', '/e-1/obj2']))
983 self
.assertEquals(listing2
, set(['/e/obj1', '/e-1/obj2']))
984 # Assert correct contents.
985 self
.assertEquals('obj1', self
.RunGsUtil(
986 ['cat', suri(bucket2_uri
, 'e/obj1')], return_stdout
=True))
987 self
.assertEquals('obj2', self
.RunGsUtil(
988 ['cat', suri(bucket2_uri
, 'e-1/obj2')], return_stdout
=True))
991 # Use @Retry as hedge against bucket listing eventual consistency.
992 @Retry(AssertionError, tries
=3, timeout_secs
=1)
994 # Check that re-running the same rsync command causes no more changes.
995 self
.assertEquals(NO_CHANGES
, self
.RunGsUtil(
996 ['rsync', '-d', suri(bucket1_uri
), suri(bucket2_uri
)],
1000 def test_dir_to_bucket_minus_x(self
):
1001 """Tests that rsync -x option works correctly."""
1002 # Create dir and bucket with 1 overlapping and 2 extra objects in each.
1003 tmpdir
= self
.CreateTempDir()
1004 bucket_uri
= self
.CreateBucket()
1005 self
.CreateTempFile(tmpdir
=tmpdir
, file_name
='obj1', contents
='obj1')
1006 self
.CreateTempFile(tmpdir
=tmpdir
, file_name
='obj2', contents
='obj2')
1007 self
.CreateTempFile(tmpdir
=tmpdir
, file_name
='obj3', contents
='obj3')
1008 self
.CreateObject(bucket_uri
=bucket_uri
, object_name
='obj2',
1010 self
.CreateObject(bucket_uri
=bucket_uri
, object_name
='obj4',
1012 self
.CreateObject(bucket_uri
=bucket_uri
, object_name
='obj5',
1015 # Need to make sure the bucket listing is caught-up, otherwise the
1016 # first rsync may not see obj2 and overwrite it.
1017 self
.AssertNObjectsInBucket(bucket_uri
, 3)
1019 # Use @Retry as hedge against bucket listing eventual consistency.
1020 @Retry(AssertionError, tries
=3, timeout_secs
=1)
1022 """Tests rsync works as expected."""
1023 self
.RunGsUtil(['rsync', '-d', '-x', 'obj[34]', tmpdir
, suri(bucket_uri
)])
1024 listing1
= _TailSet(tmpdir
, self
._FlatListDir
(tmpdir
))
1025 listing2
= _TailSet(suri(bucket_uri
), self
._FlatListBucket
(bucket_uri
))
1026 # Dir should have un-altered content.
1027 self
.assertEquals(listing1
, set(['/obj1', '/obj2', '/obj3']))
1028 # Bucket should have content like dir but ignoring obj3 from dir and not
1029 # deleting obj4 from bucket (per exclude regex).
1030 self
.assertEquals(listing2
, set(['/obj1', '/obj2', '/obj4']))
1033 # Use @Retry as hedge against bucket listing eventual consistency.
1034 @Retry(AssertionError, tries
=3, timeout_secs
=1)
1036 # Check that re-running the same rsync command causes no more changes.
1037 self
.assertEquals(NO_CHANGES
, self
.RunGsUtil(
1038 ['rsync', '-d', '-x', 'obj[34]', tmpdir
, suri(bucket_uri
)],
1039 return_stderr
=True))