Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / tools / telemetry / third_party / gsutilz / gslib / tests / test_wildcard_iterator.py
blobcf60afc25a1ecdb7c29dad18b2e3ce53b0921b15
1 # -*- coding: utf-8 -*-
2 # Copyright 2010 Google Inc. All Rights Reserved.
4 # Permission is hereby granted, free of charge, to any person obtaining a
5 # copy of this software and associated documentation files (the
6 # "Software"), to deal in the Software without restriction, including
7 # without limitation the rights to use, copy, modify, merge, publish, dis-
8 # tribute, sublicense, and/or sell copies of the Software, and to permit
9 # persons to whom the Software is furnished to do so, subject to the fol-
10 # lowing conditions:
12 # The above copyright notice and this permission notice shall be included
13 # in all copies or substantial portions of the Software.
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
17 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
18 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 # IN THE SOFTWARE.
22 """Unit tests for gsutil wildcard_iterator."""
24 from __future__ import absolute_import
26 import tempfile
28 from gslib import wildcard_iterator
29 from gslib.exception import InvalidUrlError
30 from gslib.storage_url import ContainsWildcard
31 import gslib.tests.testcase as testcase
32 from gslib.tests.util import ObjectToURI as suri
35 class CloudWildcardIteratorTests(testcase.GsUtilUnitTestCase):
36 """Unit tests for CloudWildcardIterator."""
38 def setUp(self):
39 """Creates 2 mock buckets, each containing 4 objects, including 1 nested."""
40 super(CloudWildcardIteratorTests, self).setUp()
41 self.immed_child_obj_names = ['abcd', 'abdd', 'ade$']
42 self.all_obj_names = ['abcd', 'abdd', 'ade$', 'nested1/nested2/xyz1',
43 'nested1/nested2/xyz2', 'nested1/nfile_abc']
45 self.base_bucket_uri = self.CreateBucket()
46 self.prefix_bucket_name = '%s_' % self.base_bucket_uri.bucket_name[:61]
47 self.base_uri_str = suri(self.base_bucket_uri)
48 self.base_uri_str = self.base_uri_str.replace(
49 self.base_bucket_uri.bucket_name, self.prefix_bucket_name)
51 self.test_bucket0_uri = self.CreateBucket(
52 bucket_name='%s0' % self.prefix_bucket_name)
53 self.test_bucket0_obj_uri_strs = set()
54 for obj_name in self.all_obj_names:
55 obj_uri = self.CreateObject(bucket_uri=self.test_bucket0_uri,
56 object_name=obj_name, contents='')
57 self.test_bucket0_obj_uri_strs.add(suri(obj_uri))
59 self.test_bucket1_uri = self.CreateBucket(
60 bucket_name='%s1' % self.prefix_bucket_name)
61 self.test_bucket1_obj_uri_strs = set()
62 for obj_name in self.all_obj_names:
63 obj_uri = self.CreateObject(bucket_uri=self.test_bucket1_uri,
64 object_name=obj_name, contents='')
65 self.test_bucket1_obj_uri_strs.add(suri(obj_uri))
67 def testNoOpObjectIterator(self):
68 """Tests that bucket-only URI iterates just that one URI."""
69 results = list(
70 self._test_wildcard_iterator(self.test_bucket0_uri).IterBuckets(
71 bucket_fields=['id']))
72 self.assertEqual(1, len(results))
73 self.assertEqual(str(self.test_bucket0_uri), str(results[0]))
75 def testMatchingAllObjects(self):
76 """Tests matching all objects, based on wildcard."""
77 actual_obj_uri_strs = set(
78 str(u) for u in self._test_wildcard_iterator(
79 self.test_bucket0_uri.clone_replace_name('**')).IterAll(
80 expand_top_level_buckets=True))
81 self.assertEqual(self.test_bucket0_obj_uri_strs, actual_obj_uri_strs)
83 def testMatchingObjectSubset(self):
84 """Tests matching a subset of objects, based on wildcard."""
85 exp_obj_uri_strs = set(
86 [str(self.test_bucket0_uri.clone_replace_name('abcd')),
87 str(self.test_bucket0_uri.clone_replace_name('abdd'))])
88 actual_obj_uri_strs = set(
89 str(u) for u in self._test_wildcard_iterator(
90 self.test_bucket0_uri.clone_replace_name('ab??')).IterAll(
91 expand_top_level_buckets=True))
92 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
94 def testMatchingNonWildcardedUri(self):
95 """Tests matching a single named object."""
96 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name('abcd')
97 )])
98 actual_obj_uri_strs = set(
99 str(u) for u in self._test_wildcard_iterator(
100 self.test_bucket0_uri.clone_replace_name('abcd')).IterAll(
101 expand_top_level_buckets=True))
102 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
104 def testWildcardedObjectUriWithVsWithoutPrefix(self):
105 """Tests that wildcarding w/ and w/o server prefix get same result."""
106 # (It's just more efficient to query w/o a prefix; wildcard
107 # iterator will filter the matches either way.)
108 with_prefix_uri_strs = set(
109 str(u) for u in self._test_wildcard_iterator(
110 self.test_bucket0_uri.clone_replace_name('abcd')).IterAll(
111 expand_top_level_buckets=True))
112 # By including a wildcard at the start of the string no prefix can be
113 # used in server request.
114 no_prefix_uri_strs = set(
115 str(u) for u in self._test_wildcard_iterator(
116 self.test_bucket0_uri.clone_replace_name('?bcd')).IterAll(
117 expand_top_level_buckets=True))
118 self.assertEqual(with_prefix_uri_strs, no_prefix_uri_strs)
120 def testWildcardedObjectUriNestedSubdirMatch(self):
121 """Tests wildcarding with a nested subdir."""
122 uri_strs = set()
123 prefixes = set()
124 for blr in self._test_wildcard_iterator(
125 self.test_bucket0_uri.clone_replace_name('*')):
126 if blr.IsPrefix():
127 prefixes.add(blr.root_object)
128 else:
129 uri_strs.add(blr.url_string)
130 exp_obj_uri_strs = set([suri(self.test_bucket0_uri, x)
131 for x in self.immed_child_obj_names])
132 self.assertEqual(exp_obj_uri_strs, uri_strs)
133 self.assertEqual(1, len(prefixes))
134 self.assertTrue('nested1/' in prefixes)
136 def testWildcardPlusSubdirMatch(self):
137 """Tests gs://bucket/*/subdir matching."""
138 actual_uri_strs = set()
139 actual_prefixes = set()
140 for blr in self._test_wildcard_iterator(
141 self.test_bucket0_uri.clone_replace_name('*/nested1')):
142 if blr.IsPrefix():
143 actual_prefixes.add(blr.root_object)
144 else:
145 actual_uri_strs.add(blr.url_string)
146 expected_uri_strs = set()
147 expected_prefixes = set(['nested1/'])
148 self.assertEqual(expected_prefixes, actual_prefixes)
149 self.assertEqual(expected_uri_strs, actual_uri_strs)
151 def testWildcardPlusSubdirSubdirMatch(self):
152 """Tests gs://bucket/*/subdir/* matching."""
153 actual_uri_strs = set()
154 actual_prefixes = set()
155 for blr in self._test_wildcard_iterator(
156 self.test_bucket0_uri.clone_replace_name('*/nested2/*')):
157 if blr.IsPrefix():
158 actual_prefixes.add(blr.root_object)
159 else:
160 actual_uri_strs.add(blr.url_string)
161 expected_uri_strs = set([
162 self.test_bucket0_uri.clone_replace_name('nested1/nested2/xyz1').uri,
163 self.test_bucket0_uri.clone_replace_name('nested1/nested2/xyz2').uri])
164 expected_prefixes = set()
165 self.assertEqual(expected_prefixes, actual_prefixes)
166 self.assertEqual(expected_uri_strs, actual_uri_strs)
168 def testNoMatchingWildcardedObjectUri(self):
169 """Tests that get back an empty iterator for non-matching wildcarded URI."""
170 res = list(self._test_wildcard_iterator(
171 self.test_bucket0_uri.clone_replace_name('*x0')).IterAll(
172 expand_top_level_buckets=True))
173 self.assertEqual(0, len(res))
175 def testWildcardedInvalidObjectUri(self):
176 """Tests that we raise an exception for wildcarded invalid URI."""
177 try:
178 for unused_ in self._test_wildcard_iterator(
179 'badscheme://asdf').IterAll(expand_top_level_buckets=True):
180 self.assertFalse('Expected InvalidUrlError not raised.')
181 except InvalidUrlError, e:
182 # Expected behavior.
183 self.assertTrue(e.message.find('Unrecognized scheme') != -1)
185 def testSingleMatchWildcardedBucketUri(self):
186 """Tests matching a single bucket based on a wildcarded bucket URI."""
187 exp_obj_uri_strs = set([
188 suri(self.test_bucket1_uri) + self.test_bucket1_uri.delim])
189 actual_obj_uri_strs = set(
190 str(u) for u in self._test_wildcard_iterator(
191 '%s*1' % self.base_uri_str).IterBuckets(bucket_fields=['id']))
192 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
194 def testMultiMatchWildcardedBucketUri(self):
195 """Tests matching a multiple buckets based on a wildcarded bucket URI."""
196 exp_obj_uri_strs = set([
197 suri(self.test_bucket0_uri) + self.test_bucket0_uri.delim,
198 suri(self.test_bucket1_uri) + self.test_bucket1_uri.delim])
199 actual_obj_uri_strs = set(
200 str(u) for u in self._test_wildcard_iterator(
201 '%s*' % self.base_uri_str).IterBuckets(bucket_fields=['id']))
202 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
204 def testWildcardBucketAndObjectUri(self):
205 """Tests matching with both bucket and object wildcards."""
206 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name(
207 'abcd'))])
208 actual_obj_uri_strs = set(
209 str(u) for u in self._test_wildcard_iterator(
210 '%s0*/abc*' % self.base_uri_str).IterAll(
211 expand_top_level_buckets=True))
212 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
214 def testWildcardUpToFinalCharSubdirPlusObjectName(self):
215 """Tests wildcard subd*r/obj name."""
216 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name(
217 'nested1/nested2/xyz1'))])
218 actual_obj_uri_strs = set(
219 str(u) for u in self._test_wildcard_iterator(
220 '%snested1/nest*2/xyz1' % self.test_bucket0_uri.uri).IterAll(
221 expand_top_level_buckets=True))
222 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
224 def testPostRecursiveWildcard(self):
225 """Tests wildcard containing ** followed by an additional wildcard."""
226 exp_obj_uri_strs = set([str(self.test_bucket0_uri.clone_replace_name(
227 'nested1/nested2/xyz2'))])
228 actual_obj_uri_strs = set(
229 str(u) for u in self._test_wildcard_iterator(
230 '%s**/*y*2' % self.test_bucket0_uri.uri).IterAll(
231 expand_top_level_buckets=True))
232 self.assertEqual(exp_obj_uri_strs, actual_obj_uri_strs)
234 def testWildcardFields(self):
235 """Tests that wildcard w/fields specification returns correct fields."""
236 blrs = set(
237 u for u in self._test_wildcard_iterator(
238 self.test_bucket0_uri.clone_replace_name('**')).IterAll(
239 bucket_listing_fields=['updated']))
240 self.assertTrue(len(blrs))
241 for blr in blrs:
242 self.assertTrue(blr.root_object and blr.root_object.updated)
243 blrs = set(
244 u for u in self._test_wildcard_iterator(
245 self.test_bucket0_uri.clone_replace_name('**')).IterAll(
246 bucket_listing_fields=['generation']))
247 self.assertTrue(len(blrs))
248 for blr in blrs:
249 self.assertTrue(blr.root_object and not blr.root_object.updated)
252 class FileIteratorTests(testcase.GsUtilUnitTestCase):
253 """Unit tests for FileWildcardIterator."""
255 def setUp(self):
256 """Creates a test dir with 3 files and one nested subdirectory + file."""
257 super(FileIteratorTests, self).setUp()
259 self.test_dir = self.CreateTempDir(test_files=[
260 'abcd', 'abdd', 'ade$', ('dir1', 'dir2', 'zzz')])
262 self.root_files_uri_strs = set([
263 suri(self.test_dir, 'abcd'),
264 suri(self.test_dir, 'abdd'),
265 suri(self.test_dir, 'ade$')])
267 self.subdirs_uri_strs = set([suri(self.test_dir, 'dir1')])
269 self.nested_files_uri_strs = set([
270 suri(self.test_dir, 'dir1', 'dir2', 'zzz')])
272 self.immed_child_uri_strs = self.root_files_uri_strs | self.subdirs_uri_strs
273 self.all_file_uri_strs = (
274 self.root_files_uri_strs | self.nested_files_uri_strs)
276 def testContainsWildcard(self):
277 """Tests ContainsWildcard call."""
278 self.assertTrue(ContainsWildcard('a*.txt'))
279 self.assertTrue(ContainsWildcard('a[0-9].txt'))
280 self.assertFalse(ContainsWildcard('0-9.txt'))
281 self.assertTrue(ContainsWildcard('?.txt'))
283 def testNoOpDirectoryIterator(self):
284 """Tests that directory-only URI iterates just that one URI."""
285 results = list(
286 self._test_wildcard_iterator(suri(tempfile.tempdir)).IterAll(
287 expand_top_level_buckets=True))
288 self.assertEqual(1, len(results))
289 self.assertEqual(suri(tempfile.tempdir), str(results[0]))
291 def testMatchingAllFiles(self):
292 """Tests matching all files, based on wildcard."""
293 uri = self._test_storage_uri(suri(self.test_dir, '*'))
294 actual_uri_strs = set(str(u) for u in
295 self._test_wildcard_iterator(uri).IterAll(
296 expand_top_level_buckets=True))
297 self.assertEqual(self.immed_child_uri_strs, actual_uri_strs)
299 def testMatchingFileSubset(self):
300 """Tests matching a subset of files, based on wildcard."""
301 exp_uri_strs = set(
302 [suri(self.test_dir, 'abcd'), suri(self.test_dir, 'abdd')])
303 uri = self._test_storage_uri(suri(self.test_dir, 'ab??'))
304 actual_uri_strs = set(str(u) for u in
305 self._test_wildcard_iterator(uri).IterAll(
306 expand_top_level_buckets=True))
307 self.assertEqual(exp_uri_strs, actual_uri_strs)
309 def testMatchingNonWildcardedUri(self):
310 """Tests matching a single named file."""
311 exp_uri_strs = set([suri(self.test_dir, 'abcd')])
312 uri = self._test_storage_uri(suri(self.test_dir, 'abcd'))
313 actual_uri_strs = set(
314 str(u) for u in self._test_wildcard_iterator(uri).IterAll(
315 expand_top_level_buckets=True))
316 self.assertEqual(exp_uri_strs, actual_uri_strs)
318 def testMatchingFilesIgnoringOtherRegexChars(self):
319 """Tests ignoring non-wildcard regex chars (e.g., ^ and $)."""
321 exp_uri_strs = set([suri(self.test_dir, 'ade$')])
322 uri = self._test_storage_uri(suri(self.test_dir, 'ad*$'))
323 actual_uri_strs = set(
324 str(u) for u in self._test_wildcard_iterator(uri).IterAll(
325 expand_top_level_buckets=True))
326 self.assertEqual(exp_uri_strs, actual_uri_strs)
328 def testRecursiveDirectoryOnlyWildcarding(self):
329 """Tests recursive expansion of directory-only '**' wildcard."""
330 uri = self._test_storage_uri(suri(self.test_dir, '**'))
331 actual_uri_strs = set(
332 str(u) for u in self._test_wildcard_iterator(uri).IterAll(
333 expand_top_level_buckets=True))
334 self.assertEqual(self.all_file_uri_strs, actual_uri_strs)
336 def testRecursiveDirectoryPlusFileWildcarding(self):
337 """Tests recursive expansion of '**' directory plus '*' wildcard."""
338 uri = self._test_storage_uri(suri(self.test_dir, '**', '*'))
339 actual_uri_strs = set(
340 str(u) for u in self._test_wildcard_iterator(uri).IterAll(
341 expand_top_level_buckets=True))
342 self.assertEqual(self.all_file_uri_strs, actual_uri_strs)
344 def testInvalidRecursiveDirectoryWildcard(self):
345 """Tests that wildcard containing '***' raises exception."""
346 try:
347 uri = self._test_storage_uri(suri(self.test_dir, '***', 'abcd'))
348 for unused_ in self._test_wildcard_iterator(uri).IterAll(
349 expand_top_level_buckets=True):
350 self.fail('Expected WildcardException not raised.')
351 except wildcard_iterator.WildcardException, e:
352 # Expected behavior.
353 self.assertTrue(str(e).find('more than 2 consecutive') != -1)
355 def testMissingDir(self):
356 """Tests that wildcard gets empty iterator when directory doesn't exist."""
357 res = list(
358 self._test_wildcard_iterator(suri('no_such_dir', '*')).IterAll(
359 expand_top_level_buckets=True))
360 self.assertEqual(0, len(res))
362 def testExistingDirNoFileMatch(self):
363 """Tests that wildcard returns empty iterator when there's no match."""
364 uri = self._test_storage_uri(
365 suri(self.test_dir, 'non_existent*'))
366 res = list(self._test_wildcard_iterator(uri).IterAll(
367 expand_top_level_buckets=True))
368 self.assertEqual(0, len(res))