Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / tools / telemetry / third_party / gsutilz / gslib / commands / hash.py
blobcfe47778465fae34cc9e3ece738ad43a2c23f592
1 # -*- coding: utf-8 -*-
2 # Copyright 2014 Google Inc. All Rights Reserved.
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 """Implementation of hash command for calculating hashes of local files."""
17 from hashlib import md5
18 import os
20 import crcmod
22 from gslib.command import Command
23 from gslib.command_argument import CommandArgument
24 from gslib.cs_api_map import ApiSelector
25 from gslib.exception import CommandException
26 from gslib.hashing_helper import Base64EncodeHash
27 from gslib.hashing_helper import CalculateHashesFromContents
28 from gslib.hashing_helper import SLOW_CRCMOD_WARNING
29 from gslib.progress_callback import ConstructAnnounceText
30 from gslib.progress_callback import FileProgressCallbackHandler
31 from gslib.progress_callback import ProgressCallbackWithBackoff
32 from gslib.storage_url import StorageUrlFromString
33 from gslib.util import NO_MAX
34 from gslib.util import UsingCrcmodExtension
36 _SYNOPSIS = """
37 gsutil [-c] [-h] [-m] hash filename...
38 """
40 _DETAILED_HELP_TEXT = ("""
41 <B>SYNOPSIS</B>
42 """ + _SYNOPSIS + """
45 <B>DESCRIPTION</B>
46 The hash command calculates hashes on a local file that can be used to compare
47 with gsutil ls -L output. If a specific hash option is not provided, this
48 command calculates all gsutil-supported hashes for the file.
50 Note that gsutil automatically performs hash validation when uploading or
51 downloading files, so this command is only needed if you want to write a
52 script that separately checks the hash for some reason.
54 If you calculate a CRC32c hash for the file without a precompiled crcmod
55 installation, hashing will be very slow. See "gsutil help crcmod" for details.
57 <B>OPTIONS</B>
58 -c Calculate a CRC32c hash for the file.
60 -h Output hashes in hex format. By default, gsutil uses base64.
62 -m Calculate a MD5 hash for the file.
63 """)
66 class HashCommand(Command):
67 """Implementation of gsutil hash command."""
69 # Command specification. See base class for documentation.
70 command_spec = Command.CreateCommandSpec(
71 'hash',
72 command_name_aliases=[],
73 usage_synopsis=_SYNOPSIS,
74 min_args=1,
75 max_args=NO_MAX,
76 supported_sub_args='chm',
77 file_url_ok=True,
78 provider_url_ok=False,
79 urls_start_arg=0,
80 gs_api_support=[ApiSelector.JSON],
81 gs_default_api=ApiSelector.JSON,
82 argparse_arguments=[
83 CommandArgument.MakeZeroOrMoreFileURLsArgument()
86 # Help specification. See help_provider.py for documentation.
87 help_spec = Command.HelpSpec(
88 help_name='hash',
89 help_name_aliases=['checksum'],
90 help_type='command_help',
91 help_one_line_summary='Calculate file hashes',
92 help_text=_DETAILED_HELP_TEXT,
93 subcommand_help_text={},
96 @classmethod
97 def _ParseOpts(cls, sub_opts, logger):
98 """Returns behavior variables based on input options.
100 Args:
101 sub_opts: getopt sub-arguments for the command.
102 logger: logging.Logger for the command.
104 Returns:
105 Tuple of
106 calc_crc32c: Boolean, if True, command should calculate a CRC32c checksum.
107 calc_md5: Boolean, if True, command should calculate an MD5 hash.
108 format_func: Function used for formatting the hash in the desired format.
109 output_format: String describing the hash output format.
111 calc_crc32c = False
112 calc_md5 = False
113 format_func = lambda digest: Base64EncodeHash(digest.hexdigest())
114 found_hash_option = False
115 output_format = 'base64'
117 if sub_opts:
118 for o, unused_a in sub_opts:
119 if o == '-c':
120 calc_crc32c = True
121 found_hash_option = True
122 elif o == '-h':
123 output_format = 'hex'
124 format_func = lambda digest: digest.hexdigest()
125 elif o == '-m':
126 calc_md5 = True
127 found_hash_option = True
129 if not found_hash_option:
130 calc_crc32c = True
131 calc_md5 = True
133 if calc_crc32c and not UsingCrcmodExtension(crcmod):
134 logger.warn(SLOW_CRCMOD_WARNING)
136 return calc_crc32c, calc_md5, format_func, output_format
138 def _GetHashClassesFromArgs(self, calc_crc32c, calc_md5):
139 """Constructs the dictionary of hashes to compute based on the arguments.
141 Args:
142 calc_crc32c: If True, CRC32c should be included.
143 calc_md5: If True, MD5 should be included.
145 Returns:
146 Dictionary of {string: hash digester}, where string the name of the
147 digester algorithm.
149 hash_dict = {}
150 if calc_crc32c:
151 hash_dict['crc32c'] = crcmod.predefined.Crc('crc-32c')
152 if calc_md5:
153 hash_dict['md5'] = md5()
154 return hash_dict
156 def RunCommand(self):
157 """Command entry point for the hash command."""
158 (calc_crc32c, calc_md5, format_func, output_format) = (
159 self._ParseOpts(self.sub_opts, self.logger))
161 matched_one = False
162 for url_str in self.args:
163 if not StorageUrlFromString(url_str).IsFileUrl():
164 raise CommandException('"hash" command requires a file URL')
166 for file_ref in self.WildcardIterator(url_str).IterObjects():
167 matched_one = True
168 file_name = file_ref.storage_url.object_name
169 file_size = os.path.getsize(file_name)
170 callback_processor = ProgressCallbackWithBackoff(
171 file_size, FileProgressCallbackHandler(
172 ConstructAnnounceText('Hashing', file_name), self.logger).call)
173 hash_dict = self._GetHashClassesFromArgs(calc_crc32c, calc_md5)
174 with open(file_name, 'rb') as fp:
175 CalculateHashesFromContents(fp, hash_dict,
176 callback_processor=callback_processor)
177 print 'Hashes [%s] for %s:' % (output_format, file_name)
178 for name, digest in hash_dict.iteritems():
179 print '\tHash (%s):\t\t%s' % (name, format_func(digest))
181 if not matched_one:
182 raise CommandException('No files matched')
184 return 0