1 # -*- coding: utf-8 -*-
2 # Copyright 2010 Google Inc. All Rights Reserved.
4 # Permission is hereby granted, free of charge, to any person obtaining a
5 # copy of this software and associated documentation files (the
6 # "Software"), to deal in the Software without restriction, including
7 # without limitation the rights to use, copy, modify, merge, publish, dis-
8 # tribute, sublicense, and/or sell copies of the Software, and to permit
9 # persons to whom the Software is furnished to do so, subject to the fol-
12 # The above copyright notice and this permission notice shall be included
13 # in all copies or substantial portions of the Software.
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
17 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
18 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 """Unit tests for gsutil wildcard_iterator."""
24 from __future__
import absolute_import
28 from gslib
import wildcard_iterator
29 from gslib
.exception
import InvalidUrlError
30 from gslib
.storage_url
import ContainsWildcard
31 import gslib
.tests
.testcase
as testcase
32 from gslib
.tests
.util
import ObjectToURI
as suri
35 class CloudWildcardIteratorTests(testcase
.GsUtilUnitTestCase
):
36 """Unit tests for CloudWildcardIterator."""
39 """Creates 2 mock buckets, each containing 4 objects, including 1 nested."""
40 super(CloudWildcardIteratorTests
, self
).setUp()
41 self
.immed_child_obj_names
= ['abcd', 'abdd', 'ade$']
42 self
.all_obj_names
= ['abcd', 'abdd', 'ade$', 'nested1/nested2/xyz1',
43 'nested1/nested2/xyz2', 'nested1/nfile_abc']
45 self
.base_bucket_uri
= self
.CreateBucket()
46 self
.prefix_bucket_name
= '%s_' % self
.base_bucket_uri
.bucket_name
[:61]
47 self
.base_uri_str
= suri(self
.base_bucket_uri
)
48 self
.base_uri_str
= self
.base_uri_str
.replace(
49 self
.base_bucket_uri
.bucket_name
, self
.prefix_bucket_name
)
51 self
.test_bucket0_uri
= self
.CreateBucket(
52 bucket_name
='%s0' % self
.prefix_bucket_name
)
53 self
.test_bucket0_obj_uri_strs
= set()
54 for obj_name
in self
.all_obj_names
:
55 obj_uri
= self
.CreateObject(bucket_uri
=self
.test_bucket0_uri
,
56 object_name
=obj_name
, contents
='')
57 self
.test_bucket0_obj_uri_strs
.add(suri(obj_uri
))
59 self
.test_bucket1_uri
= self
.CreateBucket(
60 bucket_name
='%s1' % self
.prefix_bucket_name
)
61 self
.test_bucket1_obj_uri_strs
= set()
62 for obj_name
in self
.all_obj_names
:
63 obj_uri
= self
.CreateObject(bucket_uri
=self
.test_bucket1_uri
,
64 object_name
=obj_name
, contents
='')
65 self
.test_bucket1_obj_uri_strs
.add(suri(obj_uri
))
67 def testNoOpObjectIterator(self
):
68 """Tests that bucket-only URI iterates just that one URI."""
70 self
._test
_wildcard
_iterator
(self
.test_bucket0_uri
).IterBuckets(
71 bucket_fields
=['id']))
72 self
.assertEqual(1, len(results
))
73 self
.assertEqual(str(self
.test_bucket0_uri
), str(results
[0]))
75 def testMatchingAllObjects(self
):
76 """Tests matching all objects, based on wildcard."""
77 actual_obj_uri_strs
= set(
78 str(u
) for u
in self
._test
_wildcard
_iterator
(
79 self
.test_bucket0_uri
.clone_replace_name('**')).IterAll(
80 expand_top_level_buckets
=True))
81 self
.assertEqual(self
.test_bucket0_obj_uri_strs
, actual_obj_uri_strs
)
83 def testMatchingObjectSubset(self
):
84 """Tests matching a subset of objects, based on wildcard."""
85 exp_obj_uri_strs
= set(
86 [str(self
.test_bucket0_uri
.clone_replace_name('abcd')),
87 str(self
.test_bucket0_uri
.clone_replace_name('abdd'))])
88 actual_obj_uri_strs
= set(
89 str(u
) for u
in self
._test
_wildcard
_iterator
(
90 self
.test_bucket0_uri
.clone_replace_name('ab??')).IterAll(
91 expand_top_level_buckets
=True))
92 self
.assertEqual(exp_obj_uri_strs
, actual_obj_uri_strs
)
94 def testMatchingNonWildcardedUri(self
):
95 """Tests matching a single named object."""
96 exp_obj_uri_strs
= set([str(self
.test_bucket0_uri
.clone_replace_name('abcd')
98 actual_obj_uri_strs
= set(
99 str(u
) for u
in self
._test
_wildcard
_iterator
(
100 self
.test_bucket0_uri
.clone_replace_name('abcd')).IterAll(
101 expand_top_level_buckets
=True))
102 self
.assertEqual(exp_obj_uri_strs
, actual_obj_uri_strs
)
104 def testWildcardedObjectUriWithVsWithoutPrefix(self
):
105 """Tests that wildcarding w/ and w/o server prefix get same result."""
106 # (It's just more efficient to query w/o a prefix; wildcard
107 # iterator will filter the matches either way.)
108 with_prefix_uri_strs
= set(
109 str(u
) for u
in self
._test
_wildcard
_iterator
(
110 self
.test_bucket0_uri
.clone_replace_name('abcd')).IterAll(
111 expand_top_level_buckets
=True))
112 # By including a wildcard at the start of the string no prefix can be
113 # used in server request.
114 no_prefix_uri_strs
= set(
115 str(u
) for u
in self
._test
_wildcard
_iterator
(
116 self
.test_bucket0_uri
.clone_replace_name('?bcd')).IterAll(
117 expand_top_level_buckets
=True))
118 self
.assertEqual(with_prefix_uri_strs
, no_prefix_uri_strs
)
120 def testWildcardedObjectUriNestedSubdirMatch(self
):
121 """Tests wildcarding with a nested subdir."""
124 for blr
in self
._test
_wildcard
_iterator
(
125 self
.test_bucket0_uri
.clone_replace_name('*')):
127 prefixes
.add(blr
.root_object
)
129 uri_strs
.add(blr
.url_string
)
130 exp_obj_uri_strs
= set([suri(self
.test_bucket0_uri
, x
)
131 for x
in self
.immed_child_obj_names
])
132 self
.assertEqual(exp_obj_uri_strs
, uri_strs
)
133 self
.assertEqual(1, len(prefixes
))
134 self
.assertTrue('nested1/' in prefixes
)
136 def testWildcardPlusSubdirMatch(self
):
137 """Tests gs://bucket/*/subdir matching."""
138 actual_uri_strs
= set()
139 actual_prefixes
= set()
140 for blr
in self
._test
_wildcard
_iterator
(
141 self
.test_bucket0_uri
.clone_replace_name('*/nested1')):
143 actual_prefixes
.add(blr
.root_object
)
145 actual_uri_strs
.add(blr
.url_string
)
146 expected_uri_strs
= set()
147 expected_prefixes
= set(['nested1/'])
148 self
.assertEqual(expected_prefixes
, actual_prefixes
)
149 self
.assertEqual(expected_uri_strs
, actual_uri_strs
)
151 def testWildcardPlusSubdirSubdirMatch(self
):
152 """Tests gs://bucket/*/subdir/* matching."""
153 actual_uri_strs
= set()
154 actual_prefixes
= set()
155 for blr
in self
._test
_wildcard
_iterator
(
156 self
.test_bucket0_uri
.clone_replace_name('*/nested2/*')):
158 actual_prefixes
.add(blr
.root_object
)
160 actual_uri_strs
.add(blr
.url_string
)
161 expected_uri_strs
= set([
162 self
.test_bucket0_uri
.clone_replace_name('nested1/nested2/xyz1').uri
,
163 self
.test_bucket0_uri
.clone_replace_name('nested1/nested2/xyz2').uri
])
164 expected_prefixes
= set()
165 self
.assertEqual(expected_prefixes
, actual_prefixes
)
166 self
.assertEqual(expected_uri_strs
, actual_uri_strs
)
168 def testNoMatchingWildcardedObjectUri(self
):
169 """Tests that get back an empty iterator for non-matching wildcarded URI."""
170 res
= list(self
._test
_wildcard
_iterator
(
171 self
.test_bucket0_uri
.clone_replace_name('*x0')).IterAll(
172 expand_top_level_buckets
=True))
173 self
.assertEqual(0, len(res
))
175 def testWildcardedInvalidObjectUri(self
):
176 """Tests that we raise an exception for wildcarded invalid URI."""
178 for unused_
in self
._test
_wildcard
_iterator
(
179 'badscheme://asdf').IterAll(expand_top_level_buckets
=True):
180 self
.assertFalse('Expected InvalidUrlError not raised.')
181 except InvalidUrlError
, e
:
183 self
.assertTrue(e
.message
.find('Unrecognized scheme') != -1)
185 def testSingleMatchWildcardedBucketUri(self
):
186 """Tests matching a single bucket based on a wildcarded bucket URI."""
187 exp_obj_uri_strs
= set([
188 suri(self
.test_bucket1_uri
) + self
.test_bucket1_uri
.delim
])
189 actual_obj_uri_strs
= set(
190 str(u
) for u
in self
._test
_wildcard
_iterator
(
191 '%s*1' % self
.base_uri_str
).IterBuckets(bucket_fields
=['id']))
192 self
.assertEqual(exp_obj_uri_strs
, actual_obj_uri_strs
)
194 def testMultiMatchWildcardedBucketUri(self
):
195 """Tests matching a multiple buckets based on a wildcarded bucket URI."""
196 exp_obj_uri_strs
= set([
197 suri(self
.test_bucket0_uri
) + self
.test_bucket0_uri
.delim
,
198 suri(self
.test_bucket1_uri
) + self
.test_bucket1_uri
.delim
])
199 actual_obj_uri_strs
= set(
200 str(u
) for u
in self
._test
_wildcard
_iterator
(
201 '%s*' % self
.base_uri_str
).IterBuckets(bucket_fields
=['id']))
202 self
.assertEqual(exp_obj_uri_strs
, actual_obj_uri_strs
)
204 def testWildcardBucketAndObjectUri(self
):
205 """Tests matching with both bucket and object wildcards."""
206 exp_obj_uri_strs
= set([str(self
.test_bucket0_uri
.clone_replace_name(
208 actual_obj_uri_strs
= set(
209 str(u
) for u
in self
._test
_wildcard
_iterator
(
210 '%s0*/abc*' % self
.base_uri_str
).IterAll(
211 expand_top_level_buckets
=True))
212 self
.assertEqual(exp_obj_uri_strs
, actual_obj_uri_strs
)
214 def testWildcardUpToFinalCharSubdirPlusObjectName(self
):
215 """Tests wildcard subd*r/obj name."""
216 exp_obj_uri_strs
= set([str(self
.test_bucket0_uri
.clone_replace_name(
217 'nested1/nested2/xyz1'))])
218 actual_obj_uri_strs
= set(
219 str(u
) for u
in self
._test
_wildcard
_iterator
(
220 '%snested1/nest*2/xyz1' % self
.test_bucket0_uri
.uri
).IterAll(
221 expand_top_level_buckets
=True))
222 self
.assertEqual(exp_obj_uri_strs
, actual_obj_uri_strs
)
224 def testPostRecursiveWildcard(self
):
225 """Tests wildcard containing ** followed by an additional wildcard."""
226 exp_obj_uri_strs
= set([str(self
.test_bucket0_uri
.clone_replace_name(
227 'nested1/nested2/xyz2'))])
228 actual_obj_uri_strs
= set(
229 str(u
) for u
in self
._test
_wildcard
_iterator
(
230 '%s**/*y*2' % self
.test_bucket0_uri
.uri
).IterAll(
231 expand_top_level_buckets
=True))
232 self
.assertEqual(exp_obj_uri_strs
, actual_obj_uri_strs
)
234 def testWildcardFields(self
):
235 """Tests that wildcard w/fields specification returns correct fields."""
237 u
for u
in self
._test
_wildcard
_iterator
(
238 self
.test_bucket0_uri
.clone_replace_name('**')).IterAll(
239 bucket_listing_fields
=['updated']))
240 self
.assertTrue(len(blrs
))
242 self
.assertTrue(blr
.root_object
and blr
.root_object
.updated
)
244 u
for u
in self
._test
_wildcard
_iterator
(
245 self
.test_bucket0_uri
.clone_replace_name('**')).IterAll(
246 bucket_listing_fields
=['generation']))
247 self
.assertTrue(len(blrs
))
249 self
.assertTrue(blr
.root_object
and not blr
.root_object
.updated
)
252 class FileIteratorTests(testcase
.GsUtilUnitTestCase
):
253 """Unit tests for FileWildcardIterator."""
256 """Creates a test dir with 3 files and one nested subdirectory + file."""
257 super(FileIteratorTests
, self
).setUp()
259 self
.test_dir
= self
.CreateTempDir(test_files
=[
260 'abcd', 'abdd', 'ade$', ('dir1', 'dir2', 'zzz')])
262 self
.root_files_uri_strs
= set([
263 suri(self
.test_dir
, 'abcd'),
264 suri(self
.test_dir
, 'abdd'),
265 suri(self
.test_dir
, 'ade$')])
267 self
.subdirs_uri_strs
= set([suri(self
.test_dir
, 'dir1')])
269 self
.nested_files_uri_strs
= set([
270 suri(self
.test_dir
, 'dir1', 'dir2', 'zzz')])
272 self
.immed_child_uri_strs
= self
.root_files_uri_strs | self
.subdirs_uri_strs
273 self
.all_file_uri_strs
= (
274 self
.root_files_uri_strs | self
.nested_files_uri_strs
)
276 def testContainsWildcard(self
):
277 """Tests ContainsWildcard call."""
278 self
.assertTrue(ContainsWildcard('a*.txt'))
279 self
.assertTrue(ContainsWildcard('a[0-9].txt'))
280 self
.assertFalse(ContainsWildcard('0-9.txt'))
281 self
.assertTrue(ContainsWildcard('?.txt'))
283 def testNoOpDirectoryIterator(self
):
284 """Tests that directory-only URI iterates just that one URI."""
286 self
._test
_wildcard
_iterator
(suri(tempfile
.tempdir
)).IterAll(
287 expand_top_level_buckets
=True))
288 self
.assertEqual(1, len(results
))
289 self
.assertEqual(suri(tempfile
.tempdir
), str(results
[0]))
291 def testMatchingAllFiles(self
):
292 """Tests matching all files, based on wildcard."""
293 uri
= self
._test
_storage
_uri
(suri(self
.test_dir
, '*'))
294 actual_uri_strs
= set(str(u
) for u
in
295 self
._test
_wildcard
_iterator
(uri
).IterAll(
296 expand_top_level_buckets
=True))
297 self
.assertEqual(self
.immed_child_uri_strs
, actual_uri_strs
)
299 def testMatchingFileSubset(self
):
300 """Tests matching a subset of files, based on wildcard."""
302 [suri(self
.test_dir
, 'abcd'), suri(self
.test_dir
, 'abdd')])
303 uri
= self
._test
_storage
_uri
(suri(self
.test_dir
, 'ab??'))
304 actual_uri_strs
= set(str(u
) for u
in
305 self
._test
_wildcard
_iterator
(uri
).IterAll(
306 expand_top_level_buckets
=True))
307 self
.assertEqual(exp_uri_strs
, actual_uri_strs
)
309 def testMatchingNonWildcardedUri(self
):
310 """Tests matching a single named file."""
311 exp_uri_strs
= set([suri(self
.test_dir
, 'abcd')])
312 uri
= self
._test
_storage
_uri
(suri(self
.test_dir
, 'abcd'))
313 actual_uri_strs
= set(
314 str(u
) for u
in self
._test
_wildcard
_iterator
(uri
).IterAll(
315 expand_top_level_buckets
=True))
316 self
.assertEqual(exp_uri_strs
, actual_uri_strs
)
318 def testMatchingFilesIgnoringOtherRegexChars(self
):
319 """Tests ignoring non-wildcard regex chars (e.g., ^ and $)."""
321 exp_uri_strs
= set([suri(self
.test_dir
, 'ade$')])
322 uri
= self
._test
_storage
_uri
(suri(self
.test_dir
, 'ad*$'))
323 actual_uri_strs
= set(
324 str(u
) for u
in self
._test
_wildcard
_iterator
(uri
).IterAll(
325 expand_top_level_buckets
=True))
326 self
.assertEqual(exp_uri_strs
, actual_uri_strs
)
328 def testRecursiveDirectoryOnlyWildcarding(self
):
329 """Tests recursive expansion of directory-only '**' wildcard."""
330 uri
= self
._test
_storage
_uri
(suri(self
.test_dir
, '**'))
331 actual_uri_strs
= set(
332 str(u
) for u
in self
._test
_wildcard
_iterator
(uri
).IterAll(
333 expand_top_level_buckets
=True))
334 self
.assertEqual(self
.all_file_uri_strs
, actual_uri_strs
)
336 def testRecursiveDirectoryPlusFileWildcarding(self
):
337 """Tests recursive expansion of '**' directory plus '*' wildcard."""
338 uri
= self
._test
_storage
_uri
(suri(self
.test_dir
, '**', '*'))
339 actual_uri_strs
= set(
340 str(u
) for u
in self
._test
_wildcard
_iterator
(uri
).IterAll(
341 expand_top_level_buckets
=True))
342 self
.assertEqual(self
.all_file_uri_strs
, actual_uri_strs
)
344 def testInvalidRecursiveDirectoryWildcard(self
):
345 """Tests that wildcard containing '***' raises exception."""
347 uri
= self
._test
_storage
_uri
(suri(self
.test_dir
, '***', 'abcd'))
348 for unused_
in self
._test
_wildcard
_iterator
(uri
).IterAll(
349 expand_top_level_buckets
=True):
350 self
.fail('Expected WildcardException not raised.')
351 except wildcard_iterator
.WildcardException
, e
:
353 self
.assertTrue(str(e
).find('more than 2 consecutive') != -1)
355 def testMissingDir(self
):
356 """Tests that wildcard gets empty iterator when directory doesn't exist."""
358 self
._test
_wildcard
_iterator
(suri('no_such_dir', '*')).IterAll(
359 expand_top_level_buckets
=True))
360 self
.assertEqual(0, len(res
))
362 def testExistingDirNoFileMatch(self
):
363 """Tests that wildcard returns empty iterator when there's no match."""
364 uri
= self
._test
_storage
_uri
(
365 suri(self
.test_dir
, 'non_existent*'))
366 res
= list(self
._test
_wildcard
_iterator
(uri
).IterAll(
367 expand_top_level_buckets
=True))
368 self
.assertEqual(0, len(res
))