Create README.md
[ViQa-Site-Tracker.git] / main.py
blob637c5d0b8b4f9b9d373e9d07632a8395d7ed2240
1 import vconfig as userconf
2 import os
3 import MySQLdb
4 import time
5 import json
6 import urllib.request
7 import traceback
9 global_sha1_hash_value = "3e37f124251bdd8f8fdd93cf0824817a"; #empty string hash
11 def safe_retrive_from_URL(file_ext, target_site, target_name):
12 thumb_file_ext = file_ext
13 fname, headers = urllib.request.urlretrieve(target_site + "thumb/" + str(target_name) + thumb_file_ext,
14 userconf.thumb_storage_location_absolute + str(target_name) + thumb_file_ext)
15 if str(headers).find("text/html") != -1:
16 raise ValueError("Not Media")
17 print(headers)
18 return thumb_file_ext
20 def global_sha1_from_bin():
21 pass
23 def write_py():
24 print(time.time())
25 with open("vconfig.py",'w') as py_config:
26 py_config.write("""
27 sites = """ + str(userconf.sites) + """
28 board = '""" + userconf.board + """'
29 database = '""" + userconf.database + """'
30 user = '""" + userconf.user + """'
31 password = '""" + userconf.password + """'
32 last_check = """ + str(int(time.time())) + """
34 file_storage_location = '""" + userconf.file_storage_location + """'
35 thumb_storage_location = '""" + userconf.thumb_storage_location +"""'
36 file_storage_location_absolute = '""" + userconf.file_storage_location_absolute + """'
37 thumb_storage_location_absolute = '""" + userconf.thumb_storage_location_absolute + """'
39 rebuild_bot_location = '""" +userconf.rebuild_bot_location +"""'
40 scraper_location = '""" +userconf.scraper_location + """'
41 """)
42 py_config.close()
44 def sanitize_post_data(post_data):
45 if post_data != None:
46 #sanitize data
47 post_data = post_data.replace("…","...")
48 #post_data = post_data.replace(";","&#59;")
49 post_data = post_data.replace("'","'")
50 post_data = post_data.replace("'","'")
52 return post_data
54 def retrieve_and_store_image(file_name, file_ext, file_size,
55 file_width, file_height, thumb_width, thumb_height, target_site, target_name):
56 if file_name == None:
57 return None
58 else:
59 print(target_site + "src/" + str(target_name) + file_ext)
60 if file_ext == ".webm" or file_ext == ".mp4":
61 thumb_file_ext = ".jpg"
62 full_file_ext = file_ext
63 urllib.request.urlretrieve(target_site + "thumb/" + str(target_name) + thumb_file_ext,
64 userconf.thumb_storage_location_absolute + str(target_name) + thumb_file_ext)
65 urllib.request.urlretrieve(target_site + "src/" + str(target_name) + full_file_ext,
66 userconf.file_storage_location_absolute + str(target_name) + full_file_ext)
67 file_json = returnFileJSON("video", file_name, thumb_file_ext, full_file_ext, file_size, target_name,
68 file_width, file_height, thumb_width, thumb_height)
69 elif file_ext == ".flac" or file_ext == ".mp3":
70 try:
71 thumb_file_ext = file_ext
72 urllib.request.urlretrieve(target_site + "thumb/" + str(target_name) + thumb_file_ext,
73 userconf.thumb_storage_location_absolute + str(target_name) + thumb_file_ext)
74 except:
75 try:
76 thumb_file_ext = ".png"
77 urllib.request.urlretrieve(target_site + "thumb/" + str(target_name) + thumb_file_ext,
78 userconf.thumb_storage_location_absolute + str(target_name) + thumb_file_ext)
79 except:
80 urllib.request.urlretrieve(target_site + "thumb/" + str(target_name) + ".jpg",
81 userconf.thumb_storage_location_absolute + str(target_name) + ".jpg")
82 full_file_ext = file_ext
83 urllib.request.urlretrieve(target_site + "src/" + str(target_name) + full_file_ext,
84 userconf.file_storage_location_absolute + str(target_name) + full_file_ext)
85 file_json = returnFileJSON("audio", file_name, thumb_file_ext, full_file_ext, file_size, target_name,
86 file_width, file_height, thumb_width, thumb_height)
87 else:
88 try:
89 thumb_file_ext = safe_retrive_from_URL(file_ext, target_site, target_name)
90 except:
91 try:
92 thumb_file_ext = safe_retrive_from_URL(".png", target_site, target_name)
93 except:
94 try:
95 thumb_file_ext = safe_retrive_from_URL(".jpg", target_site, target_name)
96 except:
97 thumb_file_ext = safe_retrive_from_URL(".jpeg", target_site, target_name)
98 full_file_ext = file_ext
99 urllib.request.urlretrieve(target_site + "src/" + str(target_name) + full_file_ext,
100 userconf.file_storage_location_absolute + str(target_name) + full_file_ext)
101 file_json = returnFileJSON("image", file_name, thumb_file_ext, full_file_ext, file_size, target_name,
102 file_width, file_height, thumb_width, thumb_height)
103 return file_json
105 def returnFileJSON(type, file_name, thumb_file_ext, full_file_ext, file_size, target_name,
106 file_width, file_height, thumb_width, thumb_height):
107 global global_sha1_hash_value
108 return """[{
109 "name":\"""" + file_name +"""\",
110 "type":\"""" + type + "\/" + full_file_ext[1:] +"""\",
111 "tmp_name":"None",
112 "error":0,
113 "size":""" + str(file_size) + """,
114 "filename":\"""" + file_name + full_file_ext + """\",
115 "extension":\"""" + full_file_ext[1:] + """\",
116 "file_id":\"""" + str(target_name) +"""\",
117 "file":\"""" + str(target_name) + full_file_ext +"""\",
118 "thumb":\"""" + str(target_name) + thumb_file_ext +"""\",
119 "is_an_image":true,
120 "hash": \"""" + global_sha1_hash_value + """\",
121 "width": """ + str(file_width) + """,
122 "height":""" + str(file_height) + """,
123 "thumbwidth":""" + str(thumb_width) + """,
124 "thumbheight":""" + str(thumb_height) + """,
125 "file_path":\"""" + userconf.file_storage_location + str(target_name) + full_file_ext + """\",
126 "thumb_path":\"""" + userconf.thumb_storage_location + str(target_name) + thumb_file_ext + """\"}]"""
129 try:
130 db = MySQLdb.connect(host="localhost",user=userconf.user, passwd=userconf.password,
131 db=userconf.database, charset='utf8')
132 db_obj = db.cursor()
134 sitestring = ",".join(userconf.sites)
135 site_data_arr = (os.popen('sudo python3 ' + userconf.scraper_location + " -u \"" + sitestring + "\" -r \(.*\) --raw"))
136 for site_no, site_data in enumerate(site_data_arr):
137 try:
138 site_data = site_data.strip()
139 if site_data == userconf.sites[site_no] + " " + "Not found":
140 print(userconf.sites[site_no] + " " + "404")
141 raise Exception(userconf.sites[site_no] + " " + "404")
142 site_json = (json.loads(site_data))
143 for thread_container in reversed(site_json):
144 for thread in reversed(thread_container["threads"]):
145 if(int(thread["time"]) > userconf.last_check):
146 try:
147 thread_url = userconf.sites[site_no][0:-12] + "res/" + str(thread.get("no")) + ".html"
148 pd = [
149 "0", #ud
150 None, #thread
151 thread.get("sub"), #sub
152 thread.get("email"), #email
153 thread.get("name"), #name
154 thread.get("trip"), #trip
155 thread.get("capcode"), #capcode
156 (thread.get("com") + "<br/><a href=" + thread_url + ">" + thread_url + "</a>"
157 if thread.get("com") != None else "<a href=" + thread_url + ">" + thread_url + "</a>") , #body
158 (thread.get("com") + "<br/><a href=" + thread_url + ">" + thread_url + "</a>"
159 if thread.get("com") != None else "<a href=" + thread_url + ">" + thread_url + "</a>"), #body_nomarkup
160 str(int(time.time())), #time
161 str(int(time.time())), #bump
162 retrieve_and_store_image(
163 thread.get("filename"), #name
164 thread.get("ext"), #ext
165 thread.get("fsize"), #file_size
166 thread.get("w"), #file_width
167 thread.get("h"), #file_height
168 thread.get("tn_w"), #thumb_width
169 thread.get("tn_h"), #thumb_height
170 userconf.sites[site_no][0:-12], #target site
171 thread.get("tim")#target_name
172 ), #image
173 "1", #num_files
174 global_sha1_hash_value, # filehash
175 "cantdeletethis", #password
176 "127.0.0.1", #ip
177 "0", #sticky
178 "1", #locked
179 "0", #cycle
180 "0", #sage
181 thread.get("embed"), #embed
182 thread.get("sub") #slug
184 for index, data in enumerate(pd):
185 pd[index] = sanitize_post_data(data)
186 db_obj.execute("INSERT INTO posts_" + userconf.board + """(id,
187 thread,subject,email,name,trip,capcode,body,body_nomarkup,time,
188 bump,files,num_files,filehash,password,ip,sticky,locked,cycle,
189 sage,embed,slug)
190 VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);""",
191 (pd[0],pd[1],pd[2],pd[3],pd[4],pd[5],pd[6],pd[7],pd[8],pd[9],pd[10],
192 pd[11],pd[12],pd[13],pd[14],pd[15],pd[16],pd[17],pd[18],pd[19],pd[20],
193 pd[21],))
194 except Exception as err:
195 with open("err_log.txt", "a+") as log:
196 print(str(traceback.format_exc()))
197 log.write(str(traceback.format_exc()) + "\n\n")
198 log.close
199 except:
200 with open("err_log.txt", "a+") as log:
201 print(str(traceback.format_exc()))
202 log.write(str(traceback.format_exc()) + "\n\n")
203 log.close
204 os.system("sudo python3 " + userconf.rebuild_bot_location);
205 except Exception as err:
206 with open("err_log.txt", "a+") as log:
207 print(str(traceback.format_exc()))
208 log.write(str(traceback.format_exc()) + "\n\n")
209 log.close
211 write_py()
212 db.close()