Merge branch release-2019 into release-2020
[gromacs.git] / admin / createFileHash.py
blob5aedafc96dbaac0f3878dfae23673729b26a7af3
1 #! /usr/bin/env python
2 # This file is part of the GROMACS molecular simulation package.
4 # Copyright (c) 2019, by the GROMACS development team, led by
5 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 # and including many others, as listed in the AUTHORS file in the
7 # top-level source directory and at http://www.gromacs.org.
9 # GROMACS is free software; you can redistribute it and/or
10 # modify it under the terms of the GNU Lesser General Public License
11 # as published by the Free Software Foundation; either version 2.1
12 # of the License, or (at your option) any later version.
14 # GROMACS is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 # Lesser General Public License for more details.
19 # You should have received a copy of the GNU Lesser General Public
20 # License along with GROMACS; if not, see
21 # http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 # If you want to redistribute modifications to GROMACS, please
25 # consider that scientific software is very special. Version
26 # control is crucial - bugs must be traceable. We will be happy to
27 # consider code for inclusion in the official distribution, but
28 # derived work must not be called official GROMACS. Details are found
29 # in the README & COPYING files - if they are missing, get the
30 # official version at http://www.gromacs.org.
32 # To help us fund GROMACS development, we humbly ask that you cite
33 # the research papers on the package. Check out http://www.gromacs.org.
34 import hashlib, hmac, os, stat, sys, re
35 from re import search
37 """
38 Calculate hash of files in build tree to allow checking against
39 stored hashes in case of the tree not being in git (e.g. if the
40 program is build from a release tarball.
42 Based on example script found here:
43 https://unix.stackexchange.com/a/35847
44 """
46 def is_in_whitelist(name):
47 """Return true if file is white listed to be included in hash calculation."""
48 in_whitelist = False
49 whitelist = ["\.cpp$", "\.h$", "\.cuh$", "\.cu$", "\.clh$", "CMakeList.txt$", "\.cmake$", "\.in$", "\.cmakein$", "\.py$"]
50 for item in whitelist:
51 if search(item, name):
52 in_whitelist = True
53 break
55 return in_whitelist
57 def is_blacklisted(name):
58 """Return if a file has been explicitly blacklisted.
60 """
61 is_blacklisted = False
62 blacklist = ["gmx-completion"]
63 for item in blacklist:
64 if search(item, name):
65 is_blacklisted = True
66 break
68 return is_blacklisted
70 def file_hash(name):
71 """Return the hash of the contents of the specified file, as a hex string
73 Reads file in chunks of 16384 bytes and calculates the hash of the complete
74 file afterwards.
75 The hashing algorithm used is sha256, to avoid accidental clashes when using
76 a more simple algorithm such as md5.
77 """
78 f = open(name, 'rb')
79 h = hashlib.sha256()
80 while True:
81 buf = f.read(16384)
82 if len(buf) == 0: break
83 h.update(buf)
84 f.close()
85 return h.hexdigest()
87 def traverse(h, path, original_path):
88 """Recursive function to traverse a file path until a regular file is found.
89 Walks down the path given as the input and updates the hash function with
90 information of new files that are found on bottom of the list.
92 Information used to calculate the hash are the name and the contents of the file.
93 Uses both absolute and relative path to make sure only the relative path is used
94 to calculate the hash.
96 Ignores files that are not in the white-list and also skips files that are
97 explicitly blacklisted.
98 Other things that are ignored are symlinks and all kinds of special files.
99 """
100 rs = os.lstat(path)
101 quoted_name = repr(os.path.relpath(path, original_path))
102 if stat.S_ISDIR(rs.st_mode):
103 for entry in sorted(os.listdir(path)):
104 traverse(h, os.path.join(path, entry), original_path)
105 elif stat.S_ISREG(rs.st_mode):
106 # Only test files that actually take part in building GROMACS
107 if (is_in_whitelist(path) and not is_blacklisted(path)):
108 fullname = 'reg ' + quoted_name + ' '
109 fullname += str(rs.st_size) + ' '
110 fullname += file_hash(path) + '\n'
111 h.update(fullname.encode('utf-8'))
112 else: pass # silently symlinks and other special files
114 def main():
115 """Run the hashing script.
117 Takes single directory to hash files in.
120 import os
121 import sys
122 import argparse
124 parser = argparse.ArgumentParser(description='Hash all white listed files in a single directory')
125 parser.add_argument('-s',
126 '--source-root',
127 help='Source tree directory, can be specified multiple times to get several directories hashed',
128 nargs='*',
129 required=True)
130 parser.add_argument('-o',
131 '--output-file',
132 help='File to write hash to.',
133 default='hashresult')
135 args = parser.parse_args()
137 outfile_path = args.output_file
138 h = hashlib.sha256()
139 for input_sources in args.source_root:
140 traverse(h, input_sources, input_sources)
142 end = 'end\n'
143 h.update(end.encode('utf-8'))
144 outputfile = open(outfile_path, 'w')
145 outputfile.write(h.hexdigest())
147 if __name__ == '__main__':
148 main()