Small bug.
[Python-Scripts.git] / MiniScripts / motherless-dl.py
blob1a9128bebf433206facdb06de92ef87d2d593eb1
1 #!/usr/bin/env python
3 '''
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2013 Cool Dude 2k - http://idb.berlios.de/
13 Copyright 2013 Game Maker 2k - http://intdb.sourceforge.net/
14 Copyright 2013 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
16 $FileInfo: motherless-dl.py - Last Update: 11/10/2013 Ver. 1.6.7 RC 1 - Author: cooldude2k $
17 '''
19 from __future__ import division, absolute_import, print_function;
20 import re, os, sys, urllib, urllib2, cookielib, StringIO, gzip, time, datetime, argparse, urlparse;
21 if(__name__ == "__main__"):
22 sys.tracebacklimit = 0;
23 __version_info__ = (1, 6, 5, "RC 3");
24 __version_date__ = "2013.10.23";
25 if(__version_info__[3]!=None):
26 __version__ = str(__version_info__[0])+"."+str(__version_info__[1])+"."+str(__version_info__[2])+" "+str(__version_info__[3]);
27 if(__version_info__[3]==None):
28 __version__ = str(__version_info__[0])+"."+str(__version_info__[1])+"."+str(__version_info__[2]);
30 parser = argparse.ArgumentParser(description="get urls of images/videos from motherless.com", conflict_handler="resolve", add_help=True);
31 parser.add_argument("url", nargs='*', help='motherless url');
32 parser.add_argument('-v', '--version', action='version', version=__version__);
33 parser.add_argument("--pages-start", nargs="*", help="start at page number");
34 parser.add_argument("--pages-end", nargs="*", help="end at page number");
35 parser.add_argument("--update", action='store_true', help="update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)");
36 parser.add_argument("--dump-user-agent", action='store_true', help="display the current browser identification");
37 parser.add_argument("--user-agent", nargs="?", default="Mozilla/5.0 (Windows NT 7.0; rv:25.0) Gecko/20100101 Firefox/25.0", help="specify a custom user agent");
38 parser.add_argument("--referer", nargs="?", default="http://motherless.com/", help="specify a custom referer, use if the video access");
39 parser.add_argument("--proxy", nargs="?", default=None, help="Use the specified HTTP/HTTPS proxy");
40 parser.add_argument("--id", action='store_true', help="use only video ID in file name");
41 parser.add_argument("--get-url", action='store_true', help="simulate, quiet but print URL");
42 parser.add_argument("--get-pageurl", action='store_true', help="simulate, quiet but print URL");
43 parser.add_argument("--get-title", action='store_true', help="simulate, quiet but print title");
44 parser.add_argument("--get-posts", action='store_true', help="simulate, quiet but print user posts");
45 parser.add_argument("--get-id", action='store_true', help="simulate, quiet but print id");
46 parser.add_argument("--get-thumbnail", action='store_true', help="simulate, quiet but print thumbnail URL");
47 parser.add_argument("--get-filename", action='store_true', help="simulate, quiet but print output filename");
48 parser.add_argument("--get-format", action='store_true', help="simulate, quiet but print file format");
49 parser.add_argument("--get-type", action='store_true', help="simulate, quiet but print file type");
50 parser.add_argument("--get-username", action='store_true', help="simulate, quiet but print uploaders username");
51 parser.add_argument("--get-bbcode", action='store_true', help="simulate, quiet but print bbcode");
52 parser.add_argument("--get-html", action='store_true', help="simulate, quiet but print html code");
53 parser.add_argument("--get-dimensions", action='store_true', help="simulate, quiet but print dimensions (width x height)");
54 parser.add_argument("--get-width", action='store_true', help="simulate, quiet but print width");
55 parser.add_argument("--get-height", action='store_true', help="simulate, quiet but print height");
56 parser.add_argument("--get-views", action='store_true', help="simulate, quiet but print number of views");
57 parser.add_argument("--get-favorites", action='store_true', help="simulate, quiet but print number of favorites");
58 parser.add_argument("--verbose", action='store_true', help="print various debugging information");
59 getargs = parser.parse_args();
60 if(getargs.update==True):
61 from distutils.version import LooseVersion as VerCheck;
62 fakeua = getargs.user_agent;
63 proxycfg = None;
64 if(getargs.proxy!=None):
65 proxycfg = urllib2.ProxyHandler({"http": getargs.proxy});
66 geturls_cj = cookielib.CookieJar();
67 if(proxycfg==None):
68 geturls_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(geturls_cj));
69 if(proxycfg!=None):
70 geturls_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(geturls_cj), proxycfg);
71 geturls_opener.addheaders = [("Referer", "https://github.com/GameMaker2k/Python-Scripts/"), ("User-Agent", fakeua), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")];
72 urllib2.install_opener(geturls_opener);
73 geturls_text = geturls_opener.open("https://raw.github.com/GameMaker2k/Python-Scripts/master/MiniScripts/motherless-dl.py");
74 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
75 strbuf = StringIO.StringIO(geturls_text.read());
76 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
77 pyfile_text = gzstrbuf.read()[:];
78 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
79 pyfile_text = geturls_text.read()[:];
80 regex_finddate_text = re.escape("__version_date__ = \"")+"([0-9\.]+)"+re.escape("\"");
81 finddate_text = re.findall(regex_finddate_text, pyfile_text);
82 regex_findver_text = re.escape("__version_info__ = (")+"([0-9]+)"+re.escape(", ")+"([0-9]+)"+re.escape(", ")+"([0-9]+)"+re.escape(", \"")+"([A-Z0-9 ]+)"+re.escape("\");");
83 findver_text = re.findall(regex_findver_text, pyfile_text);
84 ProVerStr = str(__version_info__[0])+"."+str(__version_info__[1])+"."+str(__version_info__[2])+__version_info__[3].replace(" ", "").lower();
85 ProVerCheck = VerCheck(ProVerStr);
86 ProDateCheck = VerCheck(__version_date__);
87 NewVerStr = findver_text[0][0]+"."+findver_text[0][1]+"."+findver_text[0][2]+findver_text[0][3].replace(" ", "").lower();
88 NewVerCheck = VerCheck(NewVerStr);
89 NewDateCheck = VerCheck(finddate_text[0]);
90 if(ProVerStr < NewVerCheck and ProDateCheck <= NewDateCheck):
91 fileopen = open(__file__, "w+");
92 fileopen.write(pyfile_text);
93 fileopen.close();
94 print();
95 sys.exit();
97 if(getargs.dump_user_agent==True):
98 print(getargs.user_agent);
99 sys.exit();
100 if(len(getargs.url)==0):
101 parser.print_help();
102 sys.exit();
103 def motherless_dl(mtlessgetargs=vars(getargs)):
104 fakeua = mtlessgetargs["user_agent"];
105 proxycfg = None;
106 if(mtlessgetargs["proxy"]!=None):
107 proxycfg = urllib2.ProxyHandler({"http": mtlessgetargs["proxy"]});
108 geturls_cj = cookielib.CookieJar();
109 if(proxycfg==None):
110 geturls_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(geturls_cj));
111 if(proxycfg!=None):
112 geturls_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(geturls_cj), proxycfg);
113 geturls_opener.addheaders = [("Referer", mtlessgetargs["referer"]), ("User-Agent", fakeua), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")];
114 urllib2.install_opener(geturls_opener);
115 per_gal_sleep = 0;
116 per_url_sleep = 0;
117 numurlarg = len(mtlessgetargs["url"]);
118 cururlarg = 0;
119 while(cururlarg<numurlarg):
120 mlessvid = mtlessgetargs["url"][cururlarg];
121 if(re.match("^s([0-9]+)"+re.escape(".motherlessmedia.com"), urlparse.urlparse(mlessvid).hostname)):
122 geturls_text = geturls_opener.open("http://motherless.com/mogile_api.php?path="+urllib.quote_plus(mlessvid)+"&redirect=1");
123 mlessvid = geturls_text.geturl();
124 mregex_text = re.escape("http://motherless.com/")+"([\w\/\?\&\=]+)";
125 if(re.findall(mregex_text, mlessvid)):
126 mlessvid = re.findall(mregex_text, mlessvid);
127 mlessvid = "http://motherless.com/"+mlessvid[0];
128 if(mtlessgetargs["verbose"]==True):
129 print(mlessvid);
130 if(re.match("^"+re.escape("thumbs.motherlessmedia.com"), urlparse.urlparse(mlessvid).hostname)):
131 mlessvid = re.sub(re.escape("-zoom"), "", mlessvid);
132 mlessvid = re.sub(re.escape("-strip"), "", mlessvid);
133 mlessvidtmp = urlparse.urlparse(mlessvid).path.split("/");
134 mlessvid = "http://motherless.com/"+mlessvidtmp[2];
135 mregex_text = re.escape("http://motherless.com/")+"([\w\/\?\&\=]+)";
136 if(re.findall(mregex_text, mlessvid)):
137 mlessvid = re.findall(mregex_text, mlessvid);
138 mlessvid = "http://motherless.com/"+mlessvid[0];
139 mlessvid = re.sub(re.escape("http://motherless.com/"), "", mlessvid);
140 mlessvid = re.sub(re.escape("http://www.motherless.com/"), "", mlessvid);
141 mlessvid = re.sub(re.escape("https://motherless.com/"), "", mlessvid);
142 mlessvid = re.sub(re.escape("https://www.motherless.com/"), "", mlessvid);
143 mlessvid = re.sub(re.escape("motherless.com/"), "", mlessvid);
144 mlessvid = re.sub(re.escape("www.motherless.com/"), "", mlessvid);
145 mlessvid = re.sub("^"+re.escape("/"), "", mlessvid);
146 mlessvid = "http://motherless.com/"+mlessvid;
147 mregex_text = re.escape("http://motherless.com/")+"([\w\/\?\&\=]+)";
148 if(re.findall(mregex_text, mlessvid)):
149 mlessvid = re.findall(mregex_text, mlessvid);
150 mlessvid = "/"+mlessvid[0];
151 mlessvidqstr = urlparse.parse_qs(urlparse.urlparse(mlessvid).query);
152 mlessvidid = urlparse.urlparse(mlessvid).path.split("/");
153 mlessgallist = [];
154 if((re.match("^random", mlessvidid[1]) and len(mlessvidid)==2) or (re.match("^random", mlessvidid[1]) and len(mlessvidid)==3) and (re.match("^image", mlessvidid[2]) or re.match("^video", mlessvidid[2]))):
155 geturls_text = geturls_opener.open("http://motherless.com"+mlessvid);
156 mlessvid = geturls_text.geturl();
157 if(re.findall(mregex_text, mlessvid)):
158 mlessvid = re.findall(mregex_text, mlessvid);
159 mlessvid = mlessvid[0];
160 if(mtlessgetargs["verbose"]==True):
161 print(mlessvid);
162 if((re.match("^galleries", mlessvidid[1]) and len(mlessvidid)==4) or (re.match("^f", mlessvidid[1]) and re.match("^galleries", mlessvidid[2]) and len(mlessvidid)==4) or (re.match("^term", mlessvidid[1]) and re.match("^galleries", mlessvidid[2]) and len(mlessvidid)==4)):
163 geturls_text = geturls_opener.open("http://motherless.com"+mlessvid+"?page=1");
164 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
165 strbuf = StringIO.StringIO(geturls_text.read());
166 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
167 out_text = gzstrbuf.read()[:];
168 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
169 out_text = geturls_text.read()[:];
170 out_text = re.sub(re.escape("http://motherless.com"), "", out_text);
171 out_text = re.sub(re.escape("http://www.motherless.com"), "", out_text);
172 out_text = re.sub(re.escape("https://motherless.com"), "", out_text);
173 out_text = re.sub(re.escape("https://www.motherless.com"), "", out_text);
174 regex_ptext = re.escape("class=\"pop\" rel=\"")+"([0-9]+)"+re.escape("\">")+"([0-9]+)"+re.escape("</a>");
175 page_text = re.findall(regex_ptext, out_text);
176 try:
177 numpages = int(page_text[-1][0]);
178 except IndexError:
179 numpages = 1;
180 curpage = 1;
181 if(not mtlessgetargs["pages_start"]==None and mtlessgetargs["pages_start"][0].isdigit()):
182 if(int(mtlessgetargs["pages_start"][0])<=numpages):
183 curpage = int(mtlessgetargs["pages_start"][0]);
184 if(not mtlessgetargs["pages_end"]==None and mtlessgetargs["pages_end"][0].isdigit()):
185 if(int(mtlessgetargs["pages_end"][0])>=curpage):
186 numpages = int(mtlessgetargs["pages_end"][0]);
187 if(int(mtlessgetargs["pages_end"][0])<=numpages):
188 numpages = int(mtlessgetargs["pages_end"][0]);
189 while(curpage<=numpages):
190 if(curpage>1):
191 geturls_text = geturls_opener.open("http://motherless.com/"+mlessvid+"?page="+str(curpage));
192 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
193 strbuf = StringIO.StringIO(geturls_text.read());
194 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
195 out_text = gzstrbuf.read()[:];
196 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
197 out_text = geturls_text.read()[:];
198 out_text = re.sub(re.escape("http://motherless.com"), "", out_text);
199 out_text = re.sub(re.escape("http://www.motherless.com"), "", out_text);
200 out_text = re.sub(re.escape("http://motherless.com"), "", out_text);
201 out_text = re.sub(re.escape("http://www.motherless.com"), "", out_text);
202 regex_text = re.escape("")+"([\w\/]+)"+re.escape("\" class=\"img-container\" target=\"_self\">");
203 post_text = re.findall(regex_text, out_text);
204 numgal = len(post_text);
205 curgal = 0;
206 while(curgal<numgal):
207 mlessgallist.append(post_text[curgal]);
208 if(mtlessgetargs["verbose"]==True):
209 print(post_text[curgal]);
210 curgal = curgal + 1;
211 curpage = curpage + 1;
212 if(not re.match("^galleries", mlessvidid[1]) or (re.match("^galleries", mlessvidid[1]) and len(mlessvidid)<4) or (re.match("^galleries", mlessvidid[1]) and len(mlessvidid)>5)):
213 mlessgallist.append(mlessvid);
214 numusrgal = len(mlessgallist);
215 curusrgal = 0;
216 while(curusrgal<numusrgal):
217 mlessvid = mlessgallist[curusrgal];
218 if(not re.match("^\/", mlessvid)):
219 mlessvid = "/"+mlessvid;
220 mlessvidqstr = urlparse.parse_qs(urlparse.urlparse(mlessvid).query);
221 mlessvidid = urlparse.urlparse(mlessvid).path.split("/");
222 mlessurllist = [];
223 if((re.match("^G", mlessvidid[1]) and len(mlessvidid)==2) or (re.match("^H", mlessvidid[1]) and len(mlessvidid)==2) or (re.match("^V", mlessvidid[1]) and len(mlessvidid)==2) or (re.match("^live", mlessvidid[1]) and len(mlessvidid)==2) or (re.match("^g", mlessvidid[1]) and len(mlessvidid)==3) or (re.match("^u", mlessvidid[1]) and len(mlessvidid)==3) or (re.match("^term", mlessvidid[1]) and (re.match("^videos", mlessvidid[2]) or re.match("^images", mlessvidid[2])) and len(mlessvidid)==4) or (re.match("^f", mlessvidid[1]) and len(mlessvidid)==4 and (re.match("^videos", mlessvidid[3]) or re.match("^images", mlessvidid[3]))) or (re.match("^live", mlessvidid[1]) and len(mlessvidid)==3 and (re.match("^images", mlessvidid[2]) or re.match("^videos", mlessvidid[2]))) or (re.match("^images", mlessvidid[1]) and len(mlessvidid)==3 and (re.match("^favorited", mlessvidid[2]) or re.match("^viewed", mlessvidid[2]) or re.match("^commented", mlessvidid[2]) or re.match("^popular", mlessvidid[2]))) or (re.match("^videos", mlessvidid[1]) and len(mlessvidid)==3 and (re.match("^favorited", mlessvidid[2]) or re.match("^viewed", mlessvidid[2]) or re.match("^commented", mlessvidid[2]) or re.match("^popular", mlessvidid[2])))):
224 addtvar = False;
225 tvaradd = "";
226 if(re.match("^u", mlessvidid[1]) and len(mlessvidid)==3):
227 try:
228 if(mlessvidqstr["t"][0]=="i" or mlessvidqstr["t"][0]=="v"):
229 tvaradd = "&t="+mlessvidqstr["t"][0];
230 addtvar = True;
231 except KeyError:
232 addtvar = False;
233 except IndexError:
234 addtvar = False;
235 geturls_text = geturls_opener.open("http://motherless.com"+mlessvid+"?page=1"+tvaradd);
236 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
237 strbuf = StringIO.StringIO(geturls_text.read());
238 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
239 out_text = gzstrbuf.read()[:];
240 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
241 out_text = geturls_text.read()[:];
242 out_text = re.sub(re.escape("http://motherless.com"), "", out_text);
243 out_text = re.sub(re.escape("http://www.motherless.com"), "", out_text);
244 out_text = re.sub(re.escape("http://motherless.com"), "", out_text);
245 out_text = re.sub(re.escape("http://www.motherless.com"), "", out_text);
246 regex_ptext = re.escape("class=\"pop\" rel=\"")+"([0-9]+)"+re.escape("\">")+"([0-9]+)"+re.escape("</a>");
247 page_text = re.findall(regex_ptext, out_text);
248 try:
249 numpages = int(page_text[-1][0]);
250 except IndexError:
251 numpages = 1;
252 curpage = 1;
253 if(not mtlessgetargs["pages_start"]==None and mtlessgetargs["pages_start"][0].isdigit()):
254 if(int(mtlessgetargs["pages_start"][0])<=numpages):
255 curpage = int(mtlessgetargs["pages_start"][0]);
256 if(not mtlessgetargs["pages_end"]==None and mtlessgetargs["pages_end"][0].isdigit()):
257 if(int(mtlessgetargs["pages_end"][0])>=curpage):
258 numpages = int(mtlessgetargs["pages_end"][0]);
259 if(int(mtlessgetargs["pages_end"][0])<=numpages):
260 numpages = int(mtlessgetargs["pages_end"][0]);
261 while(curpage<=numpages):
262 if(curpage>1):
263 geturls_text = geturls_opener.open("http://motherless.com"+mlessvid+"?page="+str(curpage)+tvaradd);
264 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
265 strbuf = StringIO.StringIO(geturls_text.read());
266 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
267 out_text = gzstrbuf.read()[:];
268 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
269 out_text = geturls_text.read()[:];
270 out_text = re.sub(re.escape("http://motherless.com"), "", out_text);
271 out_text = re.sub(re.escape("http://www.motherless.com"), "", out_text);
272 out_text = re.sub(re.escape("http://motherless.com"), "", out_text);
273 out_text = re.sub(re.escape("http://www.motherless.com"), "", out_text);
274 if(re.match("^V", mlessvidid[1])):
275 out_text = re.sub(re.escape("class=\"img-container\" target=\"_self\""), "title=\"motherless link\"", out_text);
276 out_text = re.sub(re.escape("class=\"pop plain\" target=\"_blank\""), "title=\"motherless link\"", out_text);
277 regex_text = re.escape("<a href=\"")+"([\w\/]+)"+re.escape("\" title=\"motherless link\">");
278 if(not re.match("^V", mlessvidid[1])):
279 regex_text = re.escape("")+"([\w\/]+)"+re.escape("\" class=\"img-container\" target=\"_self\">");
280 post_text = re.findall(regex_text, out_text);
281 numurls = len(post_text);
282 cururl = 0;
283 while(cururl<numurls):
284 mlessurllist.append(post_text[cururl]);
285 if(mtlessgetargs["verbose"]==True):
286 print(post_text[cururl]);
287 cururl = cururl + 1;
288 curpage = curpage + 1;
289 if((re.match("^G", mlessvidid[1]) and len(mlessvidid)==3 and re.match("([0-9A-F]+)", mlessvidid[2])) or (re.match("^g", mlessvidid[1]) and len(mlessvidid)==4) or (len(mlessvidid)==2 and re.match("([0-9A-F]+)", mlessvidid[1]))):
290 mlessurllist.append(mlessvid);
291 numlist = len(mlessurllist);
292 curlurl = 0;
293 mlessoutlist = [];
294 while(curlurl<numlist):
295 skiplnk = False;
296 try:
297 geturls_text = geturls_opener.open("http://motherless.com"+mlessurllist[curlurl]);
298 except urllib2.HTTPError:
299 skiplnk = True;
300 if(skiplnk==False):
301 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
302 strbuf = StringIO.StringIO(geturls_text.read());
303 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
304 subout_text = gzstrbuf.read()[:];
305 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
306 subout_text = geturls_text.read()[:];
307 subout_text = re.sub(re.escape("http://motherless.com"), "", subout_text);
308 subout_text = re.sub(re.escape("http://www.motherless.com"), "", subout_text);
309 subout_text = re.sub(re.escape("http://motherless.com"), "", subout_text);
310 subout_text = re.sub(re.escape("http://www.motherless.com"), "", subout_text);
311 regex_title = re.escape("<title>")+"(.*)"+re.escape("</title>");
312 title_text = re.findall(regex_title, subout_text);
313 mlesstitle = re.sub(re.escape(" - MOTHERLESS.COM"), "", title_text[0]);
314 regex_thumb = re.escape("src=&quot;")+"(.*)"+re.escape("&quot;");
315 thumb_text = re.findall(regex_thumb, subout_text);
316 mlessthumb = thumb_text[0];
317 regex_text = re.escape("__fileurl = '")+"(.*)"+re.escape("';");
318 post_text = re.findall(regex_text, subout_text);
319 regex_img = re.escape("<meta property=\"og:image\" content=\"")+"(.*)"+re.escape("\">");
320 img_text = re.findall(regex_img, subout_text);
321 mlessimg = img_text[0];
322 regex_mediatype = re.escape("__mediatype = '")+"(.*)"+re.escape("',");
323 mediatype_text = re.findall(regex_mediatype, subout_text);
324 regex_altimg = re.escape("<link rel=\"image_src\" type=\"image/")+"(.*)"+re.escape("\" href=\"")+"(.*)"+re.escape("\">");
325 altimg_text = re.findall(regex_altimg, subout_text);
326 mlessaltimg = altimg_text[0][1];
327 regex_usrname = re.escape("<a href=\"/u/")+"([\w]+)"+re.escape("\" class=\"pop plain thumb-member-link-uploads\">Uploads</a>");
328 usrname_text = re.findall(regex_usrname, subout_text);
329 mlessusrname = usrname_text[0];
330 mlessid = re.sub("^"+re.escape("/"), "", mlessurllist[curlurl]);
331 mlesspurl = "http://motherless.com"+mlessurllist[curlurl];
332 regex_numviews = re.escape("<strong>Views</strong>")+"\n+\t+([^\t]+)\t+"+re.escape("</h2>");
333 numviews_text = re.findall(regex_numviews, subout_text);
334 mlessnumviews = numviews_text[0];
335 mlessnumviews = re.sub(re.escape(","), "", mlessnumviews);
336 regex_numfavs = re.escape("<strong>Favorited</strong>")+"\n+\t+([^\t]+)\t+"+re.escape("</h2>");
337 numfavs_text = re.findall(regex_numfavs, subout_text);
338 mlessnumfavs = numfavs_text[0];
339 mlessnumfavs = re.sub(re.escape(","), "", mlessnumfavs);
340 ''' some good regex "!-%'-?A-~ " "!-%'-?A-~ \<\>\"\'\@\#" '''
341 regex_postdata = re.escape("<div class=\"media-comment-contents\">")+"\n\t+"+re.escape("<h4>")+"\n\t+"+re.escape("<a href=\"/m/")+"([\w]+)"+re.escape("\" class=\"pop plain\" target=\"_blank\">")+"\n\t+([^\t]+)\t+"+re.escape("</a>")+"\n\t+"+re.escape("</h4>")+"\n\t+"+re.escape("<div class=\"media-comment-meta\">")+"\n\t+([^\t]+)\t+"+re.escape("</div>")+"\n\t+"+re.escape("<div style=\"text-align: justify;\">")+"\n\t+([^\t]+)\t+"+re.escape("</div>");
342 postdata_text = re.findall(regex_postdata, subout_text);
343 numpost = len(postdata_text);
344 regex_servsecs = re.escape("Served by web")+"([0-9]+)"+re.escape(" in ")+"([0-9\.]+)"+re.escape(" seconds");
345 servsecs_text = re.findall(regex_servsecs, subout_text);
346 servname = "web"+servsecs_text[0][0];
347 servsecs = float(servsecs_text[0][1]);
348 curpost = 0;
349 mlesspostlist = [];
350 ''' From Amber @ http://stackoverflow.com/a/9662362 '''
351 TAG_RE = re.compile(r'<[^>]+>');
352 while(numpost>0 and curpost<numpost):
353 newpostext = re.sub(re.escape("<br>"), "\n", postdata_text[curpost][3]);
354 newpostext = re.sub(re.escape("<br/>"), "\n", newpostext);
355 newpostext = re.sub(re.escape("<br />"), "\n", newpostext);
356 newpostext = TAG_RE.sub('', newpostext);
357 newpostext = re.sub(re.escape("/")+"([\w\/]+)", r"http://motherless.com/\1", newpostext);
358 mlesspostlist.append({"username": postdata_text[curpost][0], "avatar": "http://avatars.motherlessmedia.com/avatars/member/"+postdata_text[curpost][0]+".jpg", "smallavatar": "http://avatars.motherlessmedia.com/avatars/member/"+postdata_text[curpost][0]+"-small.jpg", "post": newpostext});
359 curpost = curpost + 1;
360 if(post_text>0):
361 mlesslink = post_text[0];
362 mlessext = os.path.splitext(urlparse.urlparse(mlesslink).path)[1];
363 mlessext = mlessext.replace(".", "");
364 mlessext = mlessext.lower();
365 if(mtlessgetargs["id"]==False):
366 mlessfname = urlparse.urlsplit(mlesslink).path.split("/")[-1];
367 if(mtlessgetargs["id"]==True):
368 mlessfname = re.sub(re.escape("/"), "_", mlessid)+"."+mlessext;
369 if(not mlessext=="mp4" and not mlessext=="flv"):
370 imginfo = {};
371 regex_ii_dimensions = re.escape("style=\"width: ")+"([0-9]+)"+re.escape("px; height: ")+"([0-9]+)"+re.escape("px; border: none;\"");
372 post_ii_dimensions = re.findall(regex_ii_dimensions, subout_text);
373 post_ii_width = post_ii_dimensions[0][0];
374 post_ii_height = post_ii_dimensions[0][1];
375 imginfo = {"width": int(post_ii_height), "height": int(post_ii_width), "views": int(mlessnumviews), "favorites": int(mlessnumfavs)};
376 if(mlessext=="mp4" or mlessext=="flv"):
377 vidinfo = {};
378 mlesslink = mlesslink+"?start=0";
379 regex_vi_file = re.escape("\"file\" : \"")+"(.*)"+re.escape("\",");
380 post_vi_file = re.findall(regex_vi_file, subout_text);
381 regex_vi_image = re.escape("\"image\" : \"")+"(.*)"+re.escape("\",");
382 post_vi_image = re.findall(regex_vi_image, subout_text);
383 regex_vi_height = re.escape("\"height\" : ")+"([0-9]+)"+re.escape(",");
384 post_vi_height = re.findall(regex_vi_height, subout_text);
385 regex_vi_width = re.escape("\"width\" : ")+"([0-9]+)"+re.escape(",");
386 post_vi_width = re.findall(regex_vi_width, subout_text);
387 regex_vi_filethumb = re.escape("\"file\": ")+"(.*)"+re.escape(",");
388 post_vi_filethumb = re.findall(regex_vi_filethumb, subout_text);
389 regex_vi_kind = re.escape("\"kind\": \"")+"(.*)"+re.escape("\"");
390 post_vi_kind = re.findall(regex_vi_kind, subout_text);
391 vidinfo = {"file": post_vi_file[0], "image": post_vi_image[0], "width": int(post_vi_width[0]), "height": int(post_vi_height[0]), "views": int(mlessnumviews), "favorites": int(mlessnumfavs), "filethumb": post_vi_filethumb[0], "thumbstrip": "http://thumbs.motherlessmedia.com/thumbs/"+mlessid+"-strip.jpg", "kind": post_vi_kind[0]};
392 if(mtlessgetargs["verbose"]==True):
393 print(mlesslink);
394 mlesslistitms = {};
395 mlesslistitms.update({"id": mlessid});
396 mlesslistitms.update({"title": mlesstitle});
397 mlesslistitms.update({"format": mlessext});
398 mlesslistitms.update({"filename": mlessfname});
399 mlesslistitms.update({"thumbnail": mlessthumb});
400 mlesslistitms.update({"servername": servname});
401 mlesslistitms.update({"servingtime": servsecs});
402 mlesslistitms.update({"mediatype": mediatype_text[0]});
403 if(not mlessext=="mp4" and not mlessext=="flv"):
404 mlesslistitms.update({"vidpic": mlesslink});
405 mlesslistitms.update({"type": "image"});
406 mlesslistitms.update({"info": imginfo});
407 mlesslistitms.update({"dimensions": str(imginfo["width"])+"x"+str(imginfo["height"])});
408 mlesslistitms.update({"width": imginfo["width"]});
409 mlesslistitms.update({"height": imginfo["height"]});
410 mlesslistitms.update({"views": imginfo["views"]});
411 mlesslistitms.update({"favorites": imginfo["favorites"]});
412 if(mlessext=="mp4" or mlessext=="flv"):
413 mlesslistitms.update({"vidpic": mlessimg});
414 mlesslistitms.update({"type": "video"});
415 mlesslistitms.update({"info": vidinfo});
416 mlesslistitms.update({"dimensions": str(vidinfo["width"])+"x"+str(vidinfo["height"])});
417 mlesslistitms.update({"width": vidinfo["width"]});
418 mlesslistitms.update({"height": vidinfo["height"]});
419 mlesslistitms.update({"views": vidinfo["views"]});
420 mlesslistitms.update({"favorites": vidinfo["favorites"]});
421 mlesslistitms.update({"username": mlessusrname});
422 mlesslistitms.update({"avatar": "http://avatars.motherlessmedia.com/avatars/member/"+mlessusrname+".jpg"});
423 mlesslistitms.update({"smallavatar": "http://avatars.motherlessmedia.com/avatars/member/"+mlessusrname+"-small.jpg"});
424 mlesslistitms.update({"posts": mlesspostlist});
425 mlesslistitms.update({"pageurl": mlesspurl});
426 mlesslistitms.update({"url": mlesslink});
427 mlessoutlist.append(mlesslistitms);
428 if(curlurl<(numlist - 1)):
429 time.sleep(per_url_sleep);
430 curlurl = curlurl + 1;
431 if(curusrgal<(numusrgal - 1)):
432 time.sleep(per_gal_sleep);
433 curusrgal = curusrgal + 1;
434 cururlarg = cururlarg + 1;
435 return mlessoutlist;
436 if(__name__ == "__main__"):
437 mtlesslinks = motherless_dl();
438 mtlesslncount = len(mtlesslinks);
439 mtlesscurln = 0;
440 while(mtlesscurln<mtlesslncount):
441 if(getargs.get_id==True):
442 print(mtlesslinks[mtlesscurln]["id"]);
443 if(getargs.get_title==True):
444 print(mtlesslinks[mtlesscurln]["title"]);
445 if(getargs.get_posts==True):
446 numpost = len(mtlesslinks[mtlesscurln]["posts"]);
447 curpost = 0;
448 mlesspostlist = [];
449 while(numpost>0 and curpost<numpost):
450 print(mtlesslinks[mtlesscurln]["posts"][curpost]["username"]+": "+mtlesslinks[mtlesscurln]["posts"][curpost]["post"]);
451 curpost = curpost + 1;
452 if(getargs.get_format==True):
453 print(mtlesslinks[mtlesscurln]["format"]);
454 if(getargs.get_type==True):
455 print(mtlesslinks[mtlesscurln]["type"]);
456 if(getargs.get_filename==True):
457 print(mtlesslinks[mtlesscurln]["filename"]);
458 if(getargs.get_thumbnail==True):
459 print(mtlesslinks[mtlesscurln]["thumbnail"]);
460 if(mtlesslinks[mtlesscurln]["format"]=="mp4" or mtlesslinks[mtlesscurln]["format"]=="flv"):
461 print(mtlesslinks[mtlesscurln]["vidpic"]);
462 if(getargs.get_username==True):
463 print(mtlesslinks[mtlesscurln]["username"]);
464 if(getargs.get_pageurl==True):
465 print(mtlesslinks[mtlesscurln]["pageurl"]);
466 if(getargs.get_bbcode==True):
467 print("[URL="+mtlesslinks[mtlesscurln]["pageurl"]+"][IMG]"+mtlesslinks[mtlesscurln]["thumbnail"]+"[/IMG][/URL]");
468 if(getargs.get_html==True):
469 print("<a href=\""+mtlesslinks[mtlesscurln]["pageurl"]+"\"><img src=\""+mtlesslinks[mtlesscurln]["thumbnail"]+"\"></a>");
470 if(getargs.get_dimensions==True):
471 print(mtlesslinks[mtlesscurln]["dimensions"]);
472 if(getargs.get_width==True):
473 print(str(mtlesslinks[mtlesscurln]["width"]));
474 if(getargs.get_height==True):
475 print(str(mtlesslinks[mtlesscurln]["height"]));
476 if(getargs.get_views==True):
477 print(mtlesslinks[mtlesscurln]["views"]);
478 if(getargs.get_favorites==True):
479 print(mtlesslinks[mtlesscurln]["favorites"]);
480 if(getargs.get_url==True or (getargs.get_id==False and getargs.get_title==False and getargs.get_posts==False and getargs.get_format==False and getargs.get_filename==False and getargs.get_thumbnail==False and getargs.get_username==False and getargs.get_pageurl==False and getargs.get_bbcode==False and getargs.get_html==False and getargs.get_dimensions==False and getargs.get_width==False and getargs.get_height==False and getargs.get_views==False and getargs.get_favorites==False and getargs.get_type==False)):
481 print(mtlesslinks[mtlesscurln]["url"]);
482 mtlesscurln = mtlesscurln + 1;