ga-infounused: Use `--no-textconv` in `git diff`
[sunny256-utils.git] / sums
blob4e779d09a6910486b2033ae6aa0859164a6dd249
1 #!/usr/bin/env perl
3 #=======================================================================
4 # sums
5 # File ID: 53bdb770-b77a-11de-90f3-00248cd5cf1e
6 # Generates smsum hashes
8 # Character set: UTF-8
9 # ©opyleft 2008– Øyvind A. Holm <sunny@sunbase.org>
10 # License: GNU General Public License version 2 or later, see end of
11 # file for legal stuff.
12 #=======================================================================
14 use strict;
15 use warnings;
16 use Getopt::Long;
17 use Digest::MD5 qw{ md5_hex };
18 use Digest::SHA qw{ sha1_hex };
20 $| = 1;
22 our $Debug = 0;
24 our %Opt = (
26 'debug' => 0,
27 'help' => 0,
28 'verbose' => 0,
29 'version' => 0,
30 'with-mtime' => 0,
34 our $progname = $0;
35 $progname =~ s/^.*\/(.*?)$/$1/;
36 our $VERSION = "0.00";
38 Getopt::Long::Configure("bundling");
39 GetOptions(
41 "debug" => \$Opt{'debug'},
42 "help|h" => \$Opt{'help'},
43 "verbose|v+" => \$Opt{'verbose'},
44 "version" => \$Opt{'version'},
45 "with-mtime|m" => \$Opt{'with-mtime'},
47 ) || die("$progname: Option error. Use -h for help.\n");
49 $Opt{'debug'} && ($Debug = 1);
50 $Opt{'help'} && usage(0);
51 if ($Opt{'version'}) {
52 print_version();
53 exit(0);
56 while (<>) {
57 chomp(my $Line = $_);
58 printf("INSERT INTO sums (string, md5, sha1) VALUES (E'%s', '%s', '%s');\n",
59 safe_sql($Line), md5_hex($Line), sha1_hex($Line));
62 sub safe_sql {
63 # {{{
64 my $Text = shift;
65 $Text =~ s/\\/\\\\\\\\/g; # I’m not kidding
66 $Text =~ s/'/''/g;
67 $Text =~ s/\n/\\n/g;
68 $Text =~ s/\r/\\r/g;
69 $Text =~ s/\t/\\t/g;
70 $Text =~ s/([\x00-\x1f\x7e-\xff])/escape_char($1)/ge;
71 return($Text);
72 # }}}
73 } # safe_sql()
75 sub escape_char {
76 my $Char = shift;
77 return(sprintf("\\\\%03o", ord($Char)));
78 } # escape_char()
80 sub process_file {
81 # {{{
82 my $Filename = shift;
83 my $Retval = "";
84 my %Sum = ();
85 D("process_file('$Filename')");
86 my $use_stdin = ($Filename eq "-") ? 1 : 0;
87 my @stat_array = ();
88 if ($use_stdin || (@stat_array = stat($Filename))) {
89 my ($Dev, $Inode, $Mode, $Nlinks, $Uid, $Gid, $Rdev, $Size,
90 $Atime, $Mtime, $Ctime, $Blksize, $Blocks) = @stat_array;
91 if ($use_stdin || -f $Filename) {
92 local *FP;
93 if ($use_stdin || open(FP, "<", $Filename)) {
94 my $sha1 = Digest::SHA->new;
95 my $md5 = Digest::MD5->new;
96 $use_stdin && (*FP = *STDIN, $Size = 0, $Mtime = time);
97 msg(2, sprintf("Reading %s...", safe_tab($Filename)));
98 while (my $Curr = <FP>) {
99 $sha1->add($Curr);
100 $md5->add($Curr);
101 $use_stdin && ($Size += length($Curr));
103 $Sum{'sha1'} = $sha1->hexdigest;
104 $Sum{'md5'} = $md5->hexdigest;
105 $Retval =
106 $Sum{'sha1'} . "-" .
107 $Sum{'md5'} . "-" .
108 $Size . "\t" .
109 safe_tab($Filename) . (
110 $Opt{'with-mtime'}
111 ? "\t" . sec_to_string($Mtime)
112 : ""
113 ) . "\n";
114 } else {
115 warn("$progname: $Filename: Cannot read file\n");
117 } else {
118 msg(1, "$Filename: Ignoring non-file");
120 } else {
121 warn("$progname: $Filename: Cannot read file status\n");
123 return($Retval);
124 # }}}
125 } # process_file()
127 sub safe_tab {
128 # {{{
129 my $Str = shift;
130 $Str =~ s/\\/\\\\/gs;
131 $Str =~ s/\n/\\n/gs;
132 $Str =~ s/\r/\\r/gs;
133 $Str =~ s/\t/\\t/gs;
134 return($Str);
135 # }}}
136 } # safe_tab()
138 sub sec_to_string {
139 # Convert seconds since 1970 to "yyyy-mm-ddThh:mm:ssZ" {{{
140 my ($Seconds) = shift;
142 my @TA = gmtime($Seconds);
143 my($DateString) = sprintf("%04u-%02u-%02uT%02u:%02u:%02uZ",
144 $TA[5]+1900, $TA[4]+1, $TA[3],
145 $TA[2], $TA[1], $TA[0]);
146 return($DateString);
147 # }}}
148 } # sec_to_string()
150 sub print_version {
151 # Print program version {{{
152 print("$progname v$VERSION\n");
153 # }}}
154 } # print_version()
156 sub usage {
157 # Send the help message to stdout {{{
158 my $Retval = shift;
160 if ($Opt{'verbose'}) {
161 print("\n");
162 print_version();
164 print(<<END);
166 Usage: $progname [options] [file [files [...]]]
168 The program is based on the same principle as md5sum(1) and sha1sum(1),
169 but combines the two hashes and also includes the file size:
171 [SHA1][-][MD5][-][SIZE][\\t][FILENAME][\\n]
173 or if the --with-mtime option is used:
175 [SHA1][-][MD5][-][SIZE][\\t][FILENAME][\\t][MTIME][\\n]
177 The reason for this approach, is that both hashing algoritms are well
178 known and widely used. Both algorithms are good enogh for everyday
179 content verification, but at least the MD5 algorithm is vulnerable to
180 intentional collisions. Instead of inventing new algorithms which has to
181 earn trust over the years, combining the two well examined algorithms
182 and adding the size of the file will make a smsum hash collision much
183 harder.
185 If no filenames are specified on the command line, stdin is used.
187 Special characters in filenames are escaped this way:
189 Horizontal Tab (0x09): \\t
190 Line feed (0x0a): \\n
191 Carriage return (0x0d): \\r
192 Backslash ('\\', 0x5c): \\\\
194 Options:
196 -h, --help
197 Show this help.
198 -m, --with-mtime
199 Also include file modification time at the end of every line. The
200 date uses the UTC timezone and has the format
201 "yyyy-mm-ddThh:mm:ssZ". If stdin is read, the current time is used.
202 -v, --verbose
203 Increase level of verbosity. Can be repeated.
204 --version
205 Print version information.
206 --debug
207 Print debugging messages.
210 exit($Retval);
211 # }}}
212 } # usage()
214 sub msg {
215 # Print a status message to stderr based on verbosity level {{{
216 my ($verbose_level, $Txt) = @_;
218 if ($Opt{'verbose'} >= $verbose_level) {
219 print(STDERR "$progname: $Txt\n");
221 # }}}
222 } # msg()
224 sub D {
225 # Print a debugging message {{{
226 $Debug || return;
227 my @call_info = caller;
228 chomp(my $Txt = shift);
229 my $File = $call_info[1];
230 $File =~ s#\\#/#g;
231 $File =~ s#^.*/(.*?)$#$1#;
232 print(STDERR "$File:$call_info[2] $$ $Txt\n");
233 return("");
234 # }}}
235 } # D()
237 __END__
239 # Plain Old Documentation (POD) {{{
241 =pod
243 =head1 NAME
247 =head1 SYNOPSIS
249 [options] [file [files [...]]]
251 =head1 DESCRIPTION
255 =head1 OPTIONS
257 =over 4
259 =item B<-h>, B<--help>
261 Print a brief help summary.
263 =item B<-v>, B<--verbose>
265 Increase level of verbosity. Can be repeated.
267 =item B<--version>
269 Print version information.
271 =item B<--debug>
273 Print debugging messages.
275 =back
277 =head1 BUGS
281 =head1 AUTHOR
283 Made by Øyvind A. Holm S<E<lt>sunny@sunbase.orgE<gt>>.
285 =head1 COPYRIGHT
287 Copyleft © Øyvind A. Holm E<lt>sunny@sunbase.orgE<gt>
288 This is free software; see the file F<COPYING> for legalese stuff.
290 =head1 LICENCE
292 This program is free software: you can redistribute it and/or modify it
293 under the terms of the GNU General Public License as published by the
294 Free Software Foundation, either version 2 of the License, or (at your
295 option) any later version.
297 This program is distributed in the hope that it will be useful, but
298 WITHOUT ANY WARRANTY; without even the implied warranty of
299 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
300 See the GNU General Public License for more details.
302 You should have received a copy of the GNU General Public License along
303 with this program.
304 If not, see L<http://www.gnu.org/licenses/>.
306 =head1 SEE ALSO
308 =cut
310 # }}}
312 # vim: set fenc=UTF-8 ft=perl fdm=marker ts=4 sw=4 sts=4 et fo+=w :