more scripts file
[archive.git] / Apkawa / ehentai_dup.py
blob0ba2dc4d6be24608bde4ae08fc882fad4cb43c38
1 #!/usr/bin/python
2 # -*- coding: utf-8 -*-
3 ###
4 #This file is part of <name prog> project
6 #<описание программы>
7 #Copyright (C) <year> <name|nick>
9 #This program is free software; you can redistribute it and/or
10 #modify it under the terms of the GNU General Public License
11 #as published by the Free Software Foundation; either version 2
12 #of the License, or (at your option) any later version.
14 #This program is distributed in the hope that it will be useful,
15 #but WITHOUT ANY WARRANTY; without even the implied warranty of
16 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 #GNU General Public License for more details.
19 #You should have received a copy of the GNU General Public License
20 #along with this program; if not, write to the Free Software
21 #Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 #You can contact author by email <my email>
24 ###
25 import os
26 import re
27 import time
28 import random
29 import urllib
31 URL = "http://g.e-hentai.org/"#?f_shash="
32 SHA1SUM_BIN = '/usr/bin/sha1sum'
34 def get_sha1_from_file(filepath):
35 if os.path.isfile(filepath) and os.path.splitext(filepath)[1].lower() in ['.png','.jpg','.jpeg']:
36 cmd = '%s "%s"' % (SHA1SUM_BIN, filepath)
37 sha1_res = os.popen( cmd).read()
38 m = re.search("^([\w]{40})[\s]", sha1_res)
39 if m:
40 return m.groups()[0]
41 return None
42 def find_by_sha1(sha1):
43 time.sleep(4)
44 query = urllib.urlencode( {'f_shash':sha1} )
45 _url = "%s?%s"%(URL, query)
46 u = urllib.urlopen( _url)
47 src = u.read()
48 test_res = re.findall("class=\"(gtr0)\"", src)
49 return len(test_res)
52 def main( *args, **kwargs):
53 args = os.sys.argv[1:]
54 for arg in args:
55 if os.path.isfile(arg):
56 sha1= get_sha1_from_file( arg )
57 print find_by_sha1(sha1), " duplicate for ", arg
58 elif os.path.isdir(arg):
59 for dirpath, dirnames, filenames in os.walk(arg):
60 for f in filenames:
61 filename = random.choice(filenames)
62 if os.path.splitext(filename)[1].lower() in ['.png','.jpg','.jpeg']:
63 filepath = os.path.join(dirpath,filename)
64 sha1= get_sha1_from_file( filepath )
65 for i in range(3):
66 dup_count = find_by_sha1(sha1)
67 if dup_count:
68 break
69 print dup_count, " duplicate for ", dirpath
70 break
75 if __name__ == "__main__":
76 main()