1 import vconfig
as userconf
9 global_sha1_hash_value
= "3e37f124251bdd8f8fdd93cf0824817a"; #empty string hash
11 def safe_retrive_from_URL(file_ext
, target_site
, target_name
):
12 thumb_file_ext
= file_ext
13 fname
, headers
= urllib
.request
.urlretrieve(target_site
+ "thumb/" + str(target_name
) + thumb_file_ext
,
14 userconf
.thumb_storage_location_absolute
+ str(target_name
) + thumb_file_ext
)
15 if str(headers
).find("text/html") != -1:
16 raise ValueError("Not Media")
20 def global_sha1_from_bin():
25 with
open("vconfig.py",'w') as py_config
:
27 sites = """ + str(userconf
.sites
) + """
28 board = '""" + userconf
.board
+ """'
29 database = '""" + userconf
.database
+ """'
30 user = '""" + userconf
.user
+ """'
31 password = '""" + userconf
.password
+ """'
32 last_check = """ + str(int(time
.time())) + """
34 file_storage_location = '""" + userconf
.file_storage_location
+ """'
35 thumb_storage_location = '""" + userconf
.thumb_storage_location
+"""'
36 file_storage_location_absolute = '""" + userconf
.file_storage_location_absolute
+ """'
37 thumb_storage_location_absolute = '""" + userconf
.thumb_storage_location_absolute
+ """'
39 rebuild_bot_location = '""" +userconf
.rebuild_bot_location
+"""'
40 scraper_location = '""" +userconf
.scraper_location
+ """'
44 def sanitize_post_data(post_data
):
47 post_data
= post_data
.replace("…","...")
48 #post_data = post_data.replace(";",";")
49 post_data
= post_data
.replace("'","'")
50 post_data
= post_data
.replace("'","'")
54 def retrieve_and_store_image(file_name
, file_ext
, file_size
,
55 file_width
, file_height
, thumb_width
, thumb_height
, target_site
, target_name
):
59 print(target_site
+ "src/" + str(target_name
) + file_ext
)
60 if file_ext
== ".webm" or file_ext
== ".mp4":
61 thumb_file_ext
= ".jpg"
62 full_file_ext
= file_ext
63 urllib
.request
.urlretrieve(target_site
+ "thumb/" + str(target_name
) + thumb_file_ext
,
64 userconf
.thumb_storage_location_absolute
+ str(target_name
) + thumb_file_ext
)
65 urllib
.request
.urlretrieve(target_site
+ "src/" + str(target_name
) + full_file_ext
,
66 userconf
.file_storage_location_absolute
+ str(target_name
) + full_file_ext
)
67 file_json
= returnFileJSON("video", file_name
, thumb_file_ext
, full_file_ext
, file_size
, target_name
,
68 file_width
, file_height
, thumb_width
, thumb_height
)
69 elif file_ext
== ".flac" or file_ext
== ".mp3":
71 thumb_file_ext
= file_ext
72 urllib
.request
.urlretrieve(target_site
+ "thumb/" + str(target_name
) + thumb_file_ext
,
73 userconf
.thumb_storage_location_absolute
+ str(target_name
) + thumb_file_ext
)
76 thumb_file_ext
= ".png"
77 urllib
.request
.urlretrieve(target_site
+ "thumb/" + str(target_name
) + thumb_file_ext
,
78 userconf
.thumb_storage_location_absolute
+ str(target_name
) + thumb_file_ext
)
80 urllib
.request
.urlretrieve(target_site
+ "thumb/" + str(target_name
) + ".jpg",
81 userconf
.thumb_storage_location_absolute
+ str(target_name
) + ".jpg")
82 full_file_ext
= file_ext
83 urllib
.request
.urlretrieve(target_site
+ "src/" + str(target_name
) + full_file_ext
,
84 userconf
.file_storage_location_absolute
+ str(target_name
) + full_file_ext
)
85 file_json
= returnFileJSON("audio", file_name
, thumb_file_ext
, full_file_ext
, file_size
, target_name
,
86 file_width
, file_height
, thumb_width
, thumb_height
)
89 thumb_file_ext
= safe_retrive_from_URL(file_ext
, target_site
, target_name
)
92 thumb_file_ext
= safe_retrive_from_URL(".png", target_site
, target_name
)
95 thumb_file_ext
= safe_retrive_from_URL(".jpg", target_site
, target_name
)
97 thumb_file_ext
= safe_retrive_from_URL(".jpeg", target_site
, target_name
)
98 full_file_ext
= file_ext
99 urllib
.request
.urlretrieve(target_site
+ "src/" + str(target_name
) + full_file_ext
,
100 userconf
.file_storage_location_absolute
+ str(target_name
) + full_file_ext
)
101 file_json
= returnFileJSON("image", file_name
, thumb_file_ext
, full_file_ext
, file_size
, target_name
,
102 file_width
, file_height
, thumb_width
, thumb_height
)
105 def returnFileJSON(type, file_name
, thumb_file_ext
, full_file_ext
, file_size
, target_name
,
106 file_width
, file_height
, thumb_width
, thumb_height
):
107 global global_sha1_hash_value
109 "name":\"""" + file_name +"""\",
110 "type":\"""
" + type + "\
/" + full_file_ext[1:] +"""\",
113 "size
":""" + str(file_size) + """,
114 "filename
":\"""" + file_name
+ full_file_ext
+ """\",
115 "extension":\"""" + full_file_ext[1:] + """\",
116 "file_id":\"""
" + str(target_name) +"""\",
117 "file":\"""" + str(target_name
) + full_file_ext
+"""\",
118 "thumb":\"""" + str(target_name) + thumb_file_ext +"""\",
120 "hash": \"""
" + global_sha1_hash_value + """\",
121 "width
": """ + str(file_width) + """,
122 "height
":""" + str(file_height) + """,
123 "thumbwidth
":""" + str(thumb_width) + """,
124 "thumbheight
":""" + str(thumb_height) + """,
125 "file_path
":\"""" + userconf
.file_storage_location
+ str(target_name
) + full_file_ext
+ """\",
126 "thumb_path":\"""" + userconf.thumb_storage_location + str(target_name) + thumb_file_ext + """\"}]"""
130 db = MySQLdb.connect(host="localhost",user=userconf.user, passwd=userconf.password,
131 db=userconf.database, charset='utf8')
134 sitestring = ",".join(userconf.sites)
135 site_data_arr = (os.popen('sudo python3 ' + userconf.scraper_location + " -u \"" + sitestring + "\" -r \(.*\) --raw"))
136 for site_no, site_data in enumerate(site_data_arr):
138 site_data = site_data.strip()
139 if site_data == userconf.sites[site_no] + " " + "Not found":
140 print(userconf.sites[site_no] + " " + "404")
141 raise Exception(userconf.sites[site_no] + " " + "404")
142 site_json = (json.loads(site_data))
143 for thread_container in reversed(site_json):
144 for thread in reversed(thread_container["threads"]):
145 if(int(thread["time"]) > userconf.last_check):
147 thread_url = userconf.sites[site_no][0:-12] + "res/" + str(thread.get("no")) + ".html"
151 thread.get("sub"), #sub
152 thread.get("email"), #email
153 thread.get("name"), #name
154 thread.get("trip"), #trip
155 thread.get("capcode"), #capcode
156 (thread.get("com") + "<br/><a href=" + thread_url + ">" + thread_url + "</a>"
157 if thread.get("com") != None else "<a href=" + thread_url + ">" + thread_url + "</a>") , #body
158 (thread.get("com") + "<br/><a href=" + thread_url + ">" + thread_url + "</a>"
159 if thread.get("com") != None else "<a href=" + thread_url + ">" + thread_url + "</a>"), #body_nomarkup
160 str(int(time.time())), #time
161 str(int(time.time())), #bump
162 retrieve_and_store_image(
163 thread.get("filename"), #name
164 thread.get("ext"), #ext
165 thread.get("fsize"), #file_size
166 thread.get("w"), #file_width
167 thread.get("h"), #file_height
168 thread.get("tn_w"), #thumb_width
169 thread.get("tn_h"), #thumb_height
170 userconf.sites[site_no][0:-12], #target site
171 thread.get("tim")#target_name
174 global_sha1_hash_value, # filehash
175 "cantdeletethis", #password
181 thread.get("embed"), #embed
182 thread.get("sub") #slug
184 for index, data in enumerate(pd):
185 pd[index] = sanitize_post_data(data)
186 db_obj.execute("INSERT INTO posts_" + userconf.board + """(id,
187 thread
,subject
,email
,name
,trip
,capcode
,body
,body_nomarkup
,time
,
188 bump
,files
,num_files
,filehash
,password
,ip
,sticky
,locked
,cycle
,
190 VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);""",
191 (pd[0],pd[1],pd[2],pd[3],pd[4],pd[5],pd[6],pd[7],pd[8],pd[9],pd[10],
192 pd[11],pd[12],pd[13],pd[14],pd[15],pd[16],pd[17],pd[18],pd[19],pd[20],
194 except Exception as err:
195 with open("err_log.txt", "a+") as log:
196 print(str(traceback.format_exc()))
197 log.write(str(traceback.format_exc()) + "\n\n")
200 with open("err_log.txt", "a+") as log:
201 print(str(traceback.format_exc()))
202 log.write(str(traceback.format_exc()) + "\n\n")
204 os.system("sudo python3 " + userconf.rebuild_bot_location);
205 except Exception as err:
206 with open("err_log.txt", "a+") as log:
207 print(str(traceback.format_exc()))
208 log.write(str(traceback.format_exc()) + "\n\n")