Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / tools / telemetry / third_party / gsutilz / gslib / tests / test_hashing_helper.py
blob317de93b21468d6f04ece86bdb2755d3dff4dc16
1 # -*- coding: utf-8 -*-
2 # Copyright 2014 Google Inc. All Rights Reserved.
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 """Unit tests for hashing helper functions and classes."""
17 from __future__ import absolute_import
19 from hashlib import md5
20 import os
21 import pkgutil
23 from gslib.exception import CommandException
24 from gslib.hashing_helper import CalculateMd5FromContents
25 from gslib.hashing_helper import HashingFileUploadWrapper
26 import gslib.tests.testcase as testcase
27 from gslib.util import StorageUrlFromString
28 from gslib.util import TRANSFER_BUFFER_SIZE
31 _TEST_FILE = 'test.txt'
34 class TestHashingFileUploadWrapper(testcase.GsUtilUnitTestCase):
35 """Unit tests for the HashingFileUploadWrapper class."""
37 _temp_test_file = None
38 _dummy_url = StorageUrlFromString('gs://bucket/object')
40 def _GetTestFile(self):
41 contents = pkgutil.get_data('gslib', 'tests/test_data/%s' % _TEST_FILE)
42 if not self._temp_test_file:
43 self._temp_test_file = self.CreateTempFile(
44 file_name=_TEST_FILE, contents=contents)
45 return self._temp_test_file
47 def testReadToEOF(self):
48 digesters = {'md5': md5()}
49 tmp_file = self.CreateTempFile(contents='a' * TRANSFER_BUFFER_SIZE * 4)
50 with open(tmp_file, 'rb') as stream:
51 wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': md5},
52 self._dummy_url, self.logger)
53 wrapper.read()
54 with open(tmp_file, 'rb') as stream:
55 actual = CalculateMd5FromContents(stream)
56 self.assertEqual(actual, digesters['md5'].hexdigest())
58 def _testSeekBack(self, initial_position, seek_back_amount):
59 """Tests reading then seeking backwards.
61 This function simulates an upload that is resumed after a connection break.
62 It reads one transfer buffer at a time until it reaches initial_position,
63 then seeks backwards (as if the server did not receive some of the bytes)
64 and reads to the end of the file, ensuring the hash matches the original
65 file upon completion.
67 Args:
68 initial_position: Initial number of bytes to read before seek.
69 seek_back_amount: Number of bytes to seek backward.
71 Raises:
72 AssertionError on wrong amount of data remaining or hash mismatch.
73 """
74 tmp_file = self._GetTestFile()
75 tmp_file_len = os.path.getsize(tmp_file)
77 self.assertGreaterEqual(
78 initial_position, seek_back_amount,
79 'seek_back_amount must be less than initial position %s '
80 '(but was actually: %s)' % (initial_position, seek_back_amount))
81 self.assertLess(
82 initial_position, tmp_file_len,
83 'initial_position must be less than test file size %s '
84 '(but was actually: %s)' % (tmp_file_len, initial_position))
86 digesters = {'md5': md5()}
87 with open(tmp_file, 'rb') as stream:
88 wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': md5},
89 self._dummy_url, self.logger)
90 position = 0
91 while position < initial_position - TRANSFER_BUFFER_SIZE:
92 data = wrapper.read(TRANSFER_BUFFER_SIZE)
93 position += len(data)
94 wrapper.read(initial_position - position)
95 wrapper.seek(initial_position - seek_back_amount)
96 self.assertEqual(wrapper.tell(),
97 initial_position - seek_back_amount)
98 data = wrapper.read()
99 self.assertEqual(
100 len(data), tmp_file_len - (initial_position - seek_back_amount))
101 with open(tmp_file, 'rb') as stream:
102 actual = CalculateMd5FromContents(stream)
103 self.assertEqual(actual, digesters['md5'].hexdigest())
105 def testSeekToBeginning(self):
106 for num_bytes in (TRANSFER_BUFFER_SIZE - 1,
107 TRANSFER_BUFFER_SIZE,
108 TRANSFER_BUFFER_SIZE + 1,
109 TRANSFER_BUFFER_SIZE * 2 - 1,
110 TRANSFER_BUFFER_SIZE * 2,
111 TRANSFER_BUFFER_SIZE * 2 + 1,
112 TRANSFER_BUFFER_SIZE * 3 - 1,
113 TRANSFER_BUFFER_SIZE * 3,
114 TRANSFER_BUFFER_SIZE * 3 + 1):
115 self._testSeekBack(num_bytes, num_bytes)
117 def testSeekBackAroundOneBuffer(self):
118 for initial_position in (TRANSFER_BUFFER_SIZE + 1,
119 TRANSFER_BUFFER_SIZE * 2 - 1,
120 TRANSFER_BUFFER_SIZE * 2,
121 TRANSFER_BUFFER_SIZE * 2 + 1,
122 TRANSFER_BUFFER_SIZE * 3 - 1,
123 TRANSFER_BUFFER_SIZE * 3,
124 TRANSFER_BUFFER_SIZE * 3 + 1):
125 for seek_back_amount in (TRANSFER_BUFFER_SIZE - 1,
126 TRANSFER_BUFFER_SIZE,
127 TRANSFER_BUFFER_SIZE + 1):
128 self._testSeekBack(initial_position, seek_back_amount)
130 def testSeekBackMoreThanOneBuffer(self):
131 for initial_position in (TRANSFER_BUFFER_SIZE * 2 + 1,
132 TRANSFER_BUFFER_SIZE * 3 - 1,
133 TRANSFER_BUFFER_SIZE * 3,
134 TRANSFER_BUFFER_SIZE * 3 + 1):
135 for seek_back_amount in (TRANSFER_BUFFER_SIZE * 2 - 1,
136 TRANSFER_BUFFER_SIZE * 2,
137 TRANSFER_BUFFER_SIZE * 2 + 1):
138 self._testSeekBack(initial_position, seek_back_amount)
140 def _testSeekForward(self, initial_seek):
141 """Tests seeking to an initial position and then reading.
143 This function simulates an upload that is resumed after a process break.
144 It seeks from zero to the initial position (as if the server already had
145 those bytes). Then it reads to the end of the file, ensuring the hash
146 matches the original file upon completion.
148 Args:
149 initial_seek: Number of bytes to initially seek.
151 Raises:
152 AssertionError on wrong amount of data remaining or hash mismatch.
154 tmp_file = self._GetTestFile()
155 tmp_file_len = os.path.getsize(tmp_file)
157 self.assertLess(
158 initial_seek, tmp_file_len,
159 'initial_seek must be less than test file size %s '
160 '(but was actually: %s)' % (tmp_file_len, initial_seek))
162 digesters = {'md5': md5()}
163 with open(tmp_file, 'rb') as stream:
164 wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': md5},
165 self._dummy_url, self.logger)
166 wrapper.seek(initial_seek)
167 self.assertEqual(wrapper.tell(), initial_seek)
168 data = wrapper.read()
169 self.assertEqual(len(data), tmp_file_len - initial_seek)
170 with open(tmp_file, 'rb') as stream:
171 actual = CalculateMd5FromContents(stream)
172 self.assertEqual(actual, digesters['md5'].hexdigest())
174 def testSeekForward(self):
175 for initial_seek in (0,
176 TRANSFER_BUFFER_SIZE - 1,
177 TRANSFER_BUFFER_SIZE,
178 TRANSFER_BUFFER_SIZE + 1,
179 TRANSFER_BUFFER_SIZE * 2 - 1,
180 TRANSFER_BUFFER_SIZE * 2,
181 TRANSFER_BUFFER_SIZE * 2 + 1):
182 self._testSeekForward(initial_seek)
184 def _testSeekAway(self, initial_read):
185 """Tests reading to an initial position and then seeking to EOF and back.
187 This function simulates an size check on the input file by seeking to the
188 end of the file and then back to the current position. Then it reads to
189 the end of the file, ensuring the hash matches the original file upon
190 completion.
192 Args:
193 initial_read: Number of bytes to initially read.
195 Raises:
196 AssertionError on wrong amount of data remaining or hash mismatch.
198 tmp_file = self._GetTestFile()
199 tmp_file_len = os.path.getsize(tmp_file)
201 self.assertLess(
202 initial_read, tmp_file_len,
203 'initial_read must be less than test file size %s '
204 '(but was actually: %s)' % (tmp_file_len, initial_read))
206 digesters = {'md5': md5()}
207 with open(tmp_file, 'rb') as stream:
208 wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': md5},
209 self._dummy_url, self.logger)
210 wrapper.read(initial_read)
211 self.assertEqual(wrapper.tell(), initial_read)
212 wrapper.seek(0, os.SEEK_END)
213 self.assertEqual(wrapper.tell(), tmp_file_len)
214 wrapper.seek(initial_read, os.SEEK_SET)
215 data = wrapper.read()
216 self.assertEqual(len(data), tmp_file_len - initial_read)
217 with open(tmp_file, 'rb') as stream:
218 actual = CalculateMd5FromContents(stream)
219 self.assertEqual(actual, digesters['md5'].hexdigest())
221 def testValidSeekAway(self):
222 for initial_read in (0,
223 TRANSFER_BUFFER_SIZE - 1,
224 TRANSFER_BUFFER_SIZE,
225 TRANSFER_BUFFER_SIZE + 1,
226 TRANSFER_BUFFER_SIZE * 2 - 1,
227 TRANSFER_BUFFER_SIZE * 2,
228 TRANSFER_BUFFER_SIZE * 2 + 1):
229 self._testSeekAway(initial_read)
231 def testInvalidSeekAway(self):
232 """Tests seeking to EOF and then reading without first doing a SEEK_SET."""
233 tmp_file = self._GetTestFile()
234 digesters = {'md5': md5()}
235 with open(tmp_file, 'rb') as stream:
236 wrapper = HashingFileUploadWrapper(stream, digesters, {'md5': md5},
237 self._dummy_url, self.logger)
238 wrapper.read(TRANSFER_BUFFER_SIZE)
239 wrapper.seek(0, os.SEEK_END)
240 try:
241 wrapper.read()
242 self.fail('Expected CommandException for invalid seek.')
243 except CommandException, e:
244 self.assertIn(
245 'Read called on hashing file pointer in an unknown position',
246 str(e))