2 # -*- coding: utf-8 -*-
4 #This file is part of <name prog> project
7 #Copyright (C) <year> <name|nick>
9 #This program is free software; you can redistribute it and/or
10 #modify it under the terms of the GNU General Public License
11 #as published by the Free Software Foundation; either version 2
12 #of the License, or (at your option) any later version.
14 #This program is distributed in the hope that it will be useful,
15 #but WITHOUT ANY WARRANTY; without even the implied warranty of
16 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 #GNU General Public License for more details.
19 #You should have received a copy of the GNU General Public License
20 #along with this program; if not, write to the Free Software
21 #Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 #You can contact author by email <my email>
31 URL
= "http://g.e-hentai.org/"#?f_shash="
32 SHA1SUM_BIN
= '/usr/bin/sha1sum'
34 def get_sha1_from_file(filepath
):
35 if os
.path
.isfile(filepath
) and os
.path
.splitext(filepath
)[1].lower() in ['.png','.jpg','.jpeg']:
36 cmd
= '%s "%s"' % (SHA1SUM_BIN
, filepath
)
37 sha1_res
= os
.popen( cmd
).read()
38 m
= re
.search("^([\w]{40})[\s]", sha1_res
)
42 def find_by_sha1(sha1
):
44 query
= urllib
.urlencode( {'f_shash':sha1
} )
45 _url
= "%s?%s"%(URL
, query
)
46 u
= urllib
.urlopen( _url
)
48 test_res
= re
.findall("class=\"(gtr0)\"", src
)
52 def main( *args
, **kwargs
):
53 args
= os
.sys
.argv
[1:]
55 if os
.path
.isfile(arg
):
56 sha1
= get_sha1_from_file( arg
)
57 print find_by_sha1(sha1
), " duplicate for ", arg
58 elif os
.path
.isdir(arg
):
59 for dirpath
, dirnames
, filenames
in os
.walk(arg
):
61 filename
= random
.choice(filenames
)
62 if os
.path
.splitext(filename
)[1].lower() in ['.png','.jpg','.jpeg']:
63 filepath
= os
.path
.join(dirpath
,filename
)
64 sha1
= get_sha1_from_file( filepath
)
66 dup_count
= find_by_sha1(sha1
)
69 print dup_count
, " duplicate for ", dirpath
75 if __name__
== "__main__":