6 while getopts ":a:b:p:h" OPT
; do
19 This script finds duplicate mp3 files by audio content (it ignores tags).
21 usage: $0 [-a algorithm] [-b percent] [-p pattern] [path] [path...]
23 -a algorithm Chooses the hash algorithm to use. See audiosum -l.
24 -b percent Integer >=1 and <=99, chooses the amount of the file data to
25 read during the intermediate phase of processing.
26 -p pattern File pattern to match against.
28 This script works by processing the mp3 files according to audiosum -h. This
29 is made by three parts:
31 1. Discards files which have different sizes.
32 2. Discards files with same size but different hash for the first n% data.
33 3. Discards files with same size but different hash for the whole data.
35 The remaining files are duplicated files.
42 HASH_LEN
=`audiosum -l | grep ": .$ALGO" | ( read COLON NAME LEN; echo $((LEN/4)) )`
43 if [ -z "$HASH_LEN" ]; then
44 echo Algorithm not supported.
48 if [[ $HASH_LEN = *[^
0-9]* ]]; then
49 echo Algorithm not supported.
53 if [[ $PERCENT = *[^
0-9]* ]]; then
54 echo Percent value must be an integer between
1 and
99.
58 if [ $PERCENT -lt 1 ] ||
[ $PERCENT -gt 99 ]; then
59 echo Percent value must be an integer between
1 and
99.
65 find -- $
* -type f
-iname "$PATTERN" | \
66 audiosum
-b |
sort |
uniq -D -w 8 | cut
-d ' ' -f 6- | \
67 audiosum
-b $PERCENT |
sort |
uniq -D -w $
((HASH_LEN
+9)) | cut
-d ' ' -f 7- | \
68 audiosum |
sort |
uniq --all-repeated=separate
-w $
((HASH_LEN
+9))