1 # -*- coding: utf-8 -*-
2 # Copyright 2014 Google Inc. All Rights Reserved.
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 """Implementation of hash command for calculating hashes of local files."""
17 from hashlib
import md5
22 from gslib
.command
import Command
23 from gslib
.command_argument
import CommandArgument
24 from gslib
.cs_api_map
import ApiSelector
25 from gslib
.exception
import CommandException
26 from gslib
.hashing_helper
import Base64EncodeHash
27 from gslib
.hashing_helper
import CalculateHashesFromContents
28 from gslib
.hashing_helper
import SLOW_CRCMOD_WARNING
29 from gslib
.progress_callback
import ConstructAnnounceText
30 from gslib
.progress_callback
import FileProgressCallbackHandler
31 from gslib
.progress_callback
import ProgressCallbackWithBackoff
32 from gslib
.storage_url
import StorageUrlFromString
33 from gslib
.util
import NO_MAX
34 from gslib
.util
import UsingCrcmodExtension
37 gsutil [-c] [-h] [-m] hash filename...
40 _DETAILED_HELP_TEXT
= ("""
46 The hash command calculates hashes on a local file that can be used to compare
47 with gsutil ls -L output. If a specific hash option is not provided, this
48 command calculates all gsutil-supported hashes for the file.
50 Note that gsutil automatically performs hash validation when uploading or
51 downloading files, so this command is only needed if you want to write a
52 script that separately checks the hash for some reason.
54 If you calculate a CRC32c hash for the file without a precompiled crcmod
55 installation, hashing will be very slow. See "gsutil help crcmod" for details.
58 -c Calculate a CRC32c hash for the file.
60 -h Output hashes in hex format. By default, gsutil uses base64.
62 -m Calculate a MD5 hash for the file.
66 class HashCommand(Command
):
67 """Implementation of gsutil hash command."""
69 # Command specification. See base class for documentation.
70 command_spec
= Command
.CreateCommandSpec(
72 command_name_aliases
=[],
73 usage_synopsis
=_SYNOPSIS
,
76 supported_sub_args
='chm',
78 provider_url_ok
=False,
80 gs_api_support
=[ApiSelector
.JSON
],
81 gs_default_api
=ApiSelector
.JSON
,
83 CommandArgument
.MakeZeroOrMoreFileURLsArgument()
86 # Help specification. See help_provider.py for documentation.
87 help_spec
= Command
.HelpSpec(
89 help_name_aliases
=['checksum'],
90 help_type
='command_help',
91 help_one_line_summary
='Calculate file hashes',
92 help_text
=_DETAILED_HELP_TEXT
,
93 subcommand_help_text
={},
97 def _ParseOpts(cls
, sub_opts
, logger
):
98 """Returns behavior variables based on input options.
101 sub_opts: getopt sub-arguments for the command.
102 logger: logging.Logger for the command.
106 calc_crc32c: Boolean, if True, command should calculate a CRC32c checksum.
107 calc_md5: Boolean, if True, command should calculate an MD5 hash.
108 format_func: Function used for formatting the hash in the desired format.
109 output_format: String describing the hash output format.
113 format_func
= lambda digest
: Base64EncodeHash(digest
.hexdigest())
114 found_hash_option
= False
115 output_format
= 'base64'
118 for o
, unused_a
in sub_opts
:
121 found_hash_option
= True
123 output_format
= 'hex'
124 format_func
= lambda digest
: digest
.hexdigest()
127 found_hash_option
= True
129 if not found_hash_option
:
133 if calc_crc32c
and not UsingCrcmodExtension(crcmod
):
134 logger
.warn(SLOW_CRCMOD_WARNING
)
136 return calc_crc32c
, calc_md5
, format_func
, output_format
138 def _GetHashClassesFromArgs(self
, calc_crc32c
, calc_md5
):
139 """Constructs the dictionary of hashes to compute based on the arguments.
142 calc_crc32c: If True, CRC32c should be included.
143 calc_md5: If True, MD5 should be included.
146 Dictionary of {string: hash digester}, where string the name of the
151 hash_dict
['crc32c'] = crcmod
.predefined
.Crc('crc-32c')
153 hash_dict
['md5'] = md5()
156 def RunCommand(self
):
157 """Command entry point for the hash command."""
158 (calc_crc32c
, calc_md5
, format_func
, output_format
) = (
159 self
._ParseOpts
(self
.sub_opts
, self
.logger
))
162 for url_str
in self
.args
:
163 if not StorageUrlFromString(url_str
).IsFileUrl():
164 raise CommandException('"hash" command requires a file URL')
166 for file_ref
in self
.WildcardIterator(url_str
).IterObjects():
168 file_name
= file_ref
.storage_url
.object_name
169 file_size
= os
.path
.getsize(file_name
)
170 callback_processor
= ProgressCallbackWithBackoff(
171 file_size
, FileProgressCallbackHandler(
172 ConstructAnnounceText('Hashing', file_name
), self
.logger
).call
)
173 hash_dict
= self
._GetHashClassesFromArgs
(calc_crc32c
, calc_md5
)
174 with
open(file_name
, 'rb') as fp
:
175 CalculateHashesFromContents(fp
, hash_dict
,
176 callback_processor
=callback_processor
)
177 print 'Hashes [%s] for %s:' % (output_format
, file_name
)
178 for name
, digest
in hash_dict
.iteritems():
179 print '\tHash (%s):\t\t%s' % (name
, format_func(digest
))
182 raise CommandException('No files matched')