add kvpairs2td
[hband-tools.git] / user-tools / rdfindreport
blobfafd59cea71a503f7a06a0e1c17ec14fe635d632
1 #!/bin/bash
3 set -u
5 NL=$'\n'
6 tmpfile1=`mktemp`
7 tmpfile2=`mktemp`
8 trap 'rm "$tmpfile1" "$tmpfile2"' SIGINT SIGQUIT SIGTERM EXIT
11 newlinefile=`find "$@" -xdev -name "*${NL}*" -print -quit`
12 if [ -n "$newlinefile" ]
13 then
14 echo "Invalid filename found:${NL}$newlinefile"
15 exit 1
18 set -e
19 rdfind -removeidentinode true -xdev true -outputname "$tmpfile1" -checksum sha1 "$@" >&2
21 sed -e '/^#/d' "$tmpfile1" | sort -k4nr -k2n -k3n >"$tmpfile2"
23 export tmpfile1
24 export tmpfile2
26 perl -e '
27 use Number::Bytes::Human "format_bytes";
29 $reduce_size = $optimal_size = $sets = $duplicated_files = 0;
31 sub flush_data
33 if(@set_data and scalar @set_data > 1)
35 print {$tmp1} "\n", join("", @set_data);
39 open $tmp1, ">", $ENV{"tmpfile1"};
40 open $tmp2, "<", $ENV{"tmpfile2"};
42 while(<$tmp2>)
44 s/^(\S+\s+)-/$1/;
45 s/[\r\n]*$//;
46 my ($type, $id, $depth, $size, $device, $inode, $prio, $filename) = split /\s+/, $_, 8;
48 if($filename =~ /\/\.Recycler\//) { next; }
50 if($id != $oldid)
52 $optimal_size += $size;
53 $sets++;
54 flush_data;
55 @set_data = ();
57 else
59 $reduce_size += $size;
60 $duplicated_files++;
62 push @set_data, "$type $size $filename\n";
63 $oldid = $id;
65 flush_data;
66 close $tmp1;
67 close $tmp2;
69 open $tmp2, ">", $ENV{"tmpfile2"};
70 $total_size = $reduce_size + $optimal_size;
71 $prcnt = $total_size == 0 ? "inf" : $reduce_size * 100 / $total_size;
73 printf {$tmp2} "# duplicated files: %d
74 # in %d groups
75 # occupying %sB extra space
76 # that means %d%% redundancy among duplications
77 # Fields: Type Size FilePath
78 ", $duplicated_files, $sets, format_bytes($reduce_size), $prcnt;
81 cat "$tmpfile2" "$tmpfile1"