4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2013 Cool Dude 2k - http://idb.berlios.de/
13 Copyright 2013 Game Maker 2k - http://intdb.sourceforge.net/
14 Copyright 2013 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
16 $FileInfo: motherless-dl.py - Last Update: 11/10/2013 Ver. 1.6.7 RC 1 - Author: cooldude2k $
19 from __future__
import division
, absolute_import
, print_function
;
20 import re
, os
, sys
, urllib
, urllib2
, cookielib
, StringIO
, gzip
, time
, datetime
, argparse
, urlparse
;
21 if(__name__
== "__main__"):
22 sys
.tracebacklimit
= 0;
23 __version_info__
= (1, 6, 5, "RC 3");
24 __version_date__
= "2013.10.23";
25 if(__version_info__
[3]!=None):
26 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2])+" "+str(__version_info__
[3]);
27 if(__version_info__
[3]==None):
28 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2]);
30 parser
= argparse
.ArgumentParser(description
="get urls of images/videos from motherless.com", conflict_handler
="resolve", add_help
=True);
31 parser
.add_argument("url", nargs
='*', help='motherless url');
32 parser
.add_argument('-v', '--version', action
='version', version
=__version__
);
33 parser
.add_argument("--pages-start", nargs
="*", help="start at page number");
34 parser
.add_argument("--pages-end", nargs
="*", help="end at page number");
35 parser
.add_argument("--update", action
='store_true', help="update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)");
36 parser
.add_argument("--dump-user-agent", action
='store_true', help="display the current browser identification");
37 parser
.add_argument("--user-agent", nargs
="?", default
="Mozilla/5.0 (Windows NT 7.0; rv:25.0) Gecko/20100101 Firefox/25.0", help="specify a custom user agent");
38 parser
.add_argument("--referer", nargs
="?", default
="http://motherless.com/", help="specify a custom referer, use if the video access");
39 parser
.add_argument("--proxy", nargs
="?", default
=None, help="Use the specified HTTP/HTTPS proxy");
40 parser
.add_argument("--id", action
='store_true', help="use only video ID in file name");
41 parser
.add_argument("--get-url", action
='store_true', help="simulate, quiet but print URL");
42 parser
.add_argument("--get-pageurl", action
='store_true', help="simulate, quiet but print URL");
43 parser
.add_argument("--get-title", action
='store_true', help="simulate, quiet but print title");
44 parser
.add_argument("--get-posts", action
='store_true', help="simulate, quiet but print user posts");
45 parser
.add_argument("--get-id", action
='store_true', help="simulate, quiet but print id");
46 parser
.add_argument("--get-thumbnail", action
='store_true', help="simulate, quiet but print thumbnail URL");
47 parser
.add_argument("--get-filename", action
='store_true', help="simulate, quiet but print output filename");
48 parser
.add_argument("--get-format", action
='store_true', help="simulate, quiet but print file format");
49 parser
.add_argument("--get-type", action
='store_true', help="simulate, quiet but print file type");
50 parser
.add_argument("--get-username", action
='store_true', help="simulate, quiet but print uploaders username");
51 parser
.add_argument("--get-bbcode", action
='store_true', help="simulate, quiet but print bbcode");
52 parser
.add_argument("--get-html", action
='store_true', help="simulate, quiet but print html code");
53 parser
.add_argument("--get-dimensions", action
='store_true', help="simulate, quiet but print dimensions (width x height)");
54 parser
.add_argument("--get-width", action
='store_true', help="simulate, quiet but print width");
55 parser
.add_argument("--get-height", action
='store_true', help="simulate, quiet but print height");
56 parser
.add_argument("--get-views", action
='store_true', help="simulate, quiet but print number of views");
57 parser
.add_argument("--get-favorites", action
='store_true', help="simulate, quiet but print number of favorites");
58 parser
.add_argument("--verbose", action
='store_true', help="print various debugging information");
59 getargs
= parser
.parse_args();
60 if(getargs
.update
==True):
61 from distutils
.version
import LooseVersion
as VerCheck
;
62 fakeua
= getargs
.user_agent
;
64 if(getargs
.proxy
!=None):
65 proxycfg
= urllib2
.ProxyHandler({"http": getargs
.proxy
});
66 geturls_cj
= cookielib
.CookieJar();
68 geturls_opener
= urllib2
.build_opener(urllib2
.HTTPCookieProcessor(geturls_cj
));
70 geturls_opener
= urllib2
.build_opener(urllib2
.HTTPCookieProcessor(geturls_cj
), proxycfg
);
71 geturls_opener
.addheaders
= [("Referer", "https://github.com/GameMaker2k/Python-Scripts/"), ("User-Agent", fakeua
), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")];
72 urllib2
.install_opener(geturls_opener
);
73 geturls_text
= geturls_opener
.open("https://raw.github.com/GameMaker2k/Python-Scripts/master/MiniScripts/motherless-dl.py");
74 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
75 strbuf
= StringIO
.StringIO(geturls_text
.read());
76 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
77 pyfile_text
= gzstrbuf
.read()[:];
78 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
79 pyfile_text
= geturls_text
.read()[:];
80 regex_finddate_text
= re
.escape("__version_date__ = \"")+"([0-9\.]+)"+re
.escape("\"");
81 finddate_text
= re
.findall(regex_finddate_text
, pyfile_text
);
82 regex_findver_text
= re
.escape("__version_info__ = (")+"([0-9]+)"+re
.escape(", ")+"([0-9]+)"+re
.escape(", ")+"([0-9]+)"+re
.escape(", \"")+"([A-Z0-9 ]+)"+re
.escape("\");");
83 findver_text
= re
.findall(regex_findver_text
, pyfile_text
);
84 ProVerStr
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2])+__version_info__
[3].replace(" ", "").lower();
85 ProVerCheck
= VerCheck(ProVerStr
);
86 ProDateCheck
= VerCheck(__version_date__
);
87 NewVerStr
= findver_text
[0][0]+"."+findver_text
[0][1]+"."+findver_text
[0][2]+findver_text
[0][3].replace(" ", "").lower();
88 NewVerCheck
= VerCheck(NewVerStr
);
89 NewDateCheck
= VerCheck(finddate_text
[0]);
90 if(ProVerStr
< NewVerCheck
and ProDateCheck
<= NewDateCheck
):
91 fileopen
= open(__file__
, "w+");
92 fileopen
.write(pyfile_text
);
97 if(getargs
.dump_user_agent
==True):
98 print(getargs
.user_agent
);
100 if(len(getargs
.url
)==0):
103 def motherless_dl(mtlessgetargs
=vars(getargs
)):
104 fakeua
= mtlessgetargs
["user_agent"];
106 if(mtlessgetargs
["proxy"]!=None):
107 proxycfg
= urllib2
.ProxyHandler({"http": mtlessgetargs
["proxy"]});
108 geturls_cj
= cookielib
.CookieJar();
110 geturls_opener
= urllib2
.build_opener(urllib2
.HTTPCookieProcessor(geturls_cj
));
112 geturls_opener
= urllib2
.build_opener(urllib2
.HTTPCookieProcessor(geturls_cj
), proxycfg
);
113 geturls_opener
.addheaders
= [("Referer", mtlessgetargs
["referer"]), ("User-Agent", fakeua
), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")];
114 urllib2
.install_opener(geturls_opener
);
117 numurlarg
= len(mtlessgetargs
["url"]);
119 while(cururlarg
<numurlarg
):
120 mlessvid
= mtlessgetargs
["url"][cururlarg
];
121 if(re
.match("^s([0-9]+)"+re
.escape(".motherlessmedia.com"), urlparse
.urlparse(mlessvid
).hostname
)):
122 geturls_text
= geturls_opener
.open("http://motherless.com/mogile_api.php?path="+urllib
.quote_plus(mlessvid
)+"&redirect=1");
123 mlessvid
= geturls_text
.geturl();
124 mregex_text
= re
.escape("http://motherless.com/")+"([\w\/\?\&\=]+)";
125 if(re
.findall(mregex_text
, mlessvid
)):
126 mlessvid
= re
.findall(mregex_text
, mlessvid
);
127 mlessvid
= "http://motherless.com/"+mlessvid
[0];
128 if(mtlessgetargs
["verbose"]==True):
130 if(re
.match("^"+re
.escape("thumbs.motherlessmedia.com"), urlparse
.urlparse(mlessvid
).hostname
)):
131 mlessvid
= re
.sub(re
.escape("-zoom"), "", mlessvid
);
132 mlessvid
= re
.sub(re
.escape("-strip"), "", mlessvid
);
133 mlessvidtmp
= urlparse
.urlparse(mlessvid
).path
.split("/");
134 mlessvid
= "http://motherless.com/"+mlessvidtmp
[2];
135 mregex_text
= re
.escape("http://motherless.com/")+"([\w\/\?\&\=]+)";
136 if(re
.findall(mregex_text
, mlessvid
)):
137 mlessvid
= re
.findall(mregex_text
, mlessvid
);
138 mlessvid
= "http://motherless.com/"+mlessvid
[0];
139 mlessvid
= re
.sub(re
.escape("http://motherless.com/"), "", mlessvid
);
140 mlessvid
= re
.sub(re
.escape("http://www.motherless.com/"), "", mlessvid
);
141 mlessvid
= re
.sub(re
.escape("https://motherless.com/"), "", mlessvid
);
142 mlessvid
= re
.sub(re
.escape("https://www.motherless.com/"), "", mlessvid
);
143 mlessvid
= re
.sub(re
.escape("motherless.com/"), "", mlessvid
);
144 mlessvid
= re
.sub(re
.escape("www.motherless.com/"), "", mlessvid
);
145 mlessvid
= re
.sub("^"+re
.escape("/"), "", mlessvid
);
146 mlessvid
= "http://motherless.com/"+mlessvid
;
147 mregex_text
= re
.escape("http://motherless.com/")+"([\w\/\?\&\=]+)";
148 if(re
.findall(mregex_text
, mlessvid
)):
149 mlessvid
= re
.findall(mregex_text
, mlessvid
);
150 mlessvid
= "/"+mlessvid
[0];
151 mlessvidqstr
= urlparse
.parse_qs(urlparse
.urlparse(mlessvid
).query
);
152 mlessvidid
= urlparse
.urlparse(mlessvid
).path
.split("/");
154 if((re
.match("^random", mlessvidid
[1]) and len(mlessvidid
)==2) or (re
.match("^random", mlessvidid
[1]) and len(mlessvidid
)==3) and (re
.match("^image", mlessvidid
[2]) or re
.match("^video", mlessvidid
[2]))):
155 geturls_text
= geturls_opener
.open("http://motherless.com"+mlessvid
);
156 mlessvid
= geturls_text
.geturl();
157 if(re
.findall(mregex_text
, mlessvid
)):
158 mlessvid
= re
.findall(mregex_text
, mlessvid
);
159 mlessvid
= mlessvid
[0];
160 if(mtlessgetargs
["verbose"]==True):
162 if((re
.match("^galleries", mlessvidid
[1]) and len(mlessvidid
)==4) or (re
.match("^f", mlessvidid
[1]) and re
.match("^galleries", mlessvidid
[2]) and len(mlessvidid
)==4) or (re
.match("^term", mlessvidid
[1]) and re
.match("^galleries", mlessvidid
[2]) and len(mlessvidid
)==4)):
163 geturls_text
= geturls_opener
.open("http://motherless.com"+mlessvid
+"?page=1");
164 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
165 strbuf
= StringIO
.StringIO(geturls_text
.read());
166 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
167 out_text
= gzstrbuf
.read()[:];
168 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
169 out_text
= geturls_text
.read()[:];
170 out_text
= re
.sub(re
.escape("http://motherless.com"), "", out_text
);
171 out_text
= re
.sub(re
.escape("http://www.motherless.com"), "", out_text
);
172 out_text
= re
.sub(re
.escape("https://motherless.com"), "", out_text
);
173 out_text
= re
.sub(re
.escape("https://www.motherless.com"), "", out_text
);
174 regex_ptext
= re
.escape("class=\"pop\" rel=\"")+"([0-9]+)"+re
.escape("\">")+"([0-9]+)"+re
.escape("</a>");
175 page_text
= re
.findall(regex_ptext
, out_text
);
177 numpages
= int(page_text
[-1][0]);
181 if(not mtlessgetargs
["pages_start"]==None and mtlessgetargs
["pages_start"][0].isdigit()):
182 if(int(mtlessgetargs
["pages_start"][0])<=numpages
):
183 curpage
= int(mtlessgetargs
["pages_start"][0]);
184 if(not mtlessgetargs
["pages_end"]==None and mtlessgetargs
["pages_end"][0].isdigit()):
185 if(int(mtlessgetargs
["pages_end"][0])>=curpage
):
186 numpages
= int(mtlessgetargs
["pages_end"][0]);
187 if(int(mtlessgetargs
["pages_end"][0])<=numpages
):
188 numpages
= int(mtlessgetargs
["pages_end"][0]);
189 while(curpage
<=numpages
):
191 geturls_text
= geturls_opener
.open("http://motherless.com/"+mlessvid
+"?page="+str(curpage
));
192 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
193 strbuf
= StringIO
.StringIO(geturls_text
.read());
194 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
195 out_text
= gzstrbuf
.read()[:];
196 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
197 out_text
= geturls_text
.read()[:];
198 out_text
= re
.sub(re
.escape("http://motherless.com"), "", out_text
);
199 out_text
= re
.sub(re
.escape("http://www.motherless.com"), "", out_text
);
200 out_text
= re
.sub(re
.escape("http://motherless.com"), "", out_text
);
201 out_text
= re
.sub(re
.escape("http://www.motherless.com"), "", out_text
);
202 regex_text
= re
.escape("")+"([\w\/]+)"+re
.escape("\" class=\"img-container\" target=\"_self\">");
203 post_text
= re
.findall(regex_text
, out_text
);
204 numgal
= len(post_text
);
206 while(curgal
<numgal
):
207 mlessgallist
.append(post_text
[curgal
]);
208 if(mtlessgetargs
["verbose"]==True):
209 print(post_text
[curgal
]);
211 curpage
= curpage
+ 1;
212 if(not re
.match("^galleries", mlessvidid
[1]) or (re
.match("^galleries", mlessvidid
[1]) and len(mlessvidid
)<4) or (re
.match("^galleries", mlessvidid
[1]) and len(mlessvidid
)>5)):
213 mlessgallist
.append(mlessvid
);
214 numusrgal
= len(mlessgallist
);
216 while(curusrgal
<numusrgal
):
217 mlessvid
= mlessgallist
[curusrgal
];
218 if(not re
.match("^\/", mlessvid
)):
219 mlessvid
= "/"+mlessvid
;
220 mlessvidqstr
= urlparse
.parse_qs(urlparse
.urlparse(mlessvid
).query
);
221 mlessvidid
= urlparse
.urlparse(mlessvid
).path
.split("/");
223 if((re
.match("^G", mlessvidid
[1]) and len(mlessvidid
)==2) or (re
.match("^H", mlessvidid
[1]) and len(mlessvidid
)==2) or (re
.match("^V", mlessvidid
[1]) and len(mlessvidid
)==2) or (re
.match("^live", mlessvidid
[1]) and len(mlessvidid
)==2) or (re
.match("^g", mlessvidid
[1]) and len(mlessvidid
)==3) or (re
.match("^u", mlessvidid
[1]) and len(mlessvidid
)==3) or (re
.match("^term", mlessvidid
[1]) and (re
.match("^videos", mlessvidid
[2]) or re
.match("^images", mlessvidid
[2])) and len(mlessvidid
)==4) or (re
.match("^f", mlessvidid
[1]) and len(mlessvidid
)==4 and (re
.match("^videos", mlessvidid
[3]) or re
.match("^images", mlessvidid
[3]))) or (re
.match("^live", mlessvidid
[1]) and len(mlessvidid
)==3 and (re
.match("^images", mlessvidid
[2]) or re
.match("^videos", mlessvidid
[2]))) or (re
.match("^images", mlessvidid
[1]) and len(mlessvidid
)==3 and (re
.match("^favorited", mlessvidid
[2]) or re
.match("^viewed", mlessvidid
[2]) or re
.match("^commented", mlessvidid
[2]) or re
.match("^popular", mlessvidid
[2]))) or (re
.match("^videos", mlessvidid
[1]) and len(mlessvidid
)==3 and (re
.match("^favorited", mlessvidid
[2]) or re
.match("^viewed", mlessvidid
[2]) or re
.match("^commented", mlessvidid
[2]) or re
.match("^popular", mlessvidid
[2])))):
226 if(re
.match("^u", mlessvidid
[1]) and len(mlessvidid
)==3):
228 if(mlessvidqstr
["t"][0]=="i" or mlessvidqstr
["t"][0]=="v"):
229 tvaradd
= "&t="+mlessvidqstr
["t"][0];
235 geturls_text
= geturls_opener
.open("http://motherless.com"+mlessvid
+"?page=1"+tvaradd
);
236 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
237 strbuf
= StringIO
.StringIO(geturls_text
.read());
238 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
239 out_text
= gzstrbuf
.read()[:];
240 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
241 out_text
= geturls_text
.read()[:];
242 out_text
= re
.sub(re
.escape("http://motherless.com"), "", out_text
);
243 out_text
= re
.sub(re
.escape("http://www.motherless.com"), "", out_text
);
244 out_text
= re
.sub(re
.escape("http://motherless.com"), "", out_text
);
245 out_text
= re
.sub(re
.escape("http://www.motherless.com"), "", out_text
);
246 regex_ptext
= re
.escape("class=\"pop\" rel=\"")+"([0-9]+)"+re
.escape("\">")+"([0-9]+)"+re
.escape("</a>");
247 page_text
= re
.findall(regex_ptext
, out_text
);
249 numpages
= int(page_text
[-1][0]);
253 if(not mtlessgetargs
["pages_start"]==None and mtlessgetargs
["pages_start"][0].isdigit()):
254 if(int(mtlessgetargs
["pages_start"][0])<=numpages
):
255 curpage
= int(mtlessgetargs
["pages_start"][0]);
256 if(not mtlessgetargs
["pages_end"]==None and mtlessgetargs
["pages_end"][0].isdigit()):
257 if(int(mtlessgetargs
["pages_end"][0])>=curpage
):
258 numpages
= int(mtlessgetargs
["pages_end"][0]);
259 if(int(mtlessgetargs
["pages_end"][0])<=numpages
):
260 numpages
= int(mtlessgetargs
["pages_end"][0]);
261 while(curpage
<=numpages
):
263 geturls_text
= geturls_opener
.open("http://motherless.com"+mlessvid
+"?page="+str(curpage
)+tvaradd
);
264 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
265 strbuf
= StringIO
.StringIO(geturls_text
.read());
266 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
267 out_text
= gzstrbuf
.read()[:];
268 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
269 out_text
= geturls_text
.read()[:];
270 out_text
= re
.sub(re
.escape("http://motherless.com"), "", out_text
);
271 out_text
= re
.sub(re
.escape("http://www.motherless.com"), "", out_text
);
272 out_text
= re
.sub(re
.escape("http://motherless.com"), "", out_text
);
273 out_text
= re
.sub(re
.escape("http://www.motherless.com"), "", out_text
);
274 if(re
.match("^V", mlessvidid
[1])):
275 out_text
= re
.sub(re
.escape("class=\"img-container\" target=\"_self\""), "title=\"motherless link\"", out_text
);
276 out_text
= re
.sub(re
.escape("class=\"pop plain\" target=\"_blank\""), "title=\"motherless link\"", out_text
);
277 regex_text
= re
.escape("<a href=\"")+"([\w\/]+)"+re
.escape("\" title=\"motherless link\">");
278 if(not re
.match("^V", mlessvidid
[1])):
279 regex_text
= re
.escape("")+"([\w\/]+)"+re
.escape("\" class=\"img-container\" target=\"_self\">");
280 post_text
= re
.findall(regex_text
, out_text
);
281 numurls
= len(post_text
);
283 while(cururl
<numurls
):
284 mlessurllist
.append(post_text
[cururl
]);
285 if(mtlessgetargs
["verbose"]==True):
286 print(post_text
[cururl
]);
288 curpage
= curpage
+ 1;
289 if((re
.match("^G", mlessvidid
[1]) and len(mlessvidid
)==3 and re
.match("([0-9A-F]+)", mlessvidid
[2])) or (re
.match("^g", mlessvidid
[1]) and len(mlessvidid
)==4) or (len(mlessvidid
)==2 and re
.match("([0-9A-F]+)", mlessvidid
[1]))):
290 mlessurllist
.append(mlessvid
);
291 numlist
= len(mlessurllist
);
294 while(curlurl
<numlist
):
297 geturls_text
= geturls_opener
.open("http://motherless.com"+mlessurllist
[curlurl
]);
298 except urllib2
.HTTPError
:
301 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
302 strbuf
= StringIO
.StringIO(geturls_text
.read());
303 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
304 subout_text
= gzstrbuf
.read()[:];
305 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
306 subout_text
= geturls_text
.read()[:];
307 subout_text
= re
.sub(re
.escape("http://motherless.com"), "", subout_text
);
308 subout_text
= re
.sub(re
.escape("http://www.motherless.com"), "", subout_text
);
309 subout_text
= re
.sub(re
.escape("http://motherless.com"), "", subout_text
);
310 subout_text
= re
.sub(re
.escape("http://www.motherless.com"), "", subout_text
);
311 regex_title
= re
.escape("<title>")+"(.*)"+re
.escape("</title>");
312 title_text
= re
.findall(regex_title
, subout_text
);
313 mlesstitle
= re
.sub(re
.escape(" - MOTHERLESS.COM"), "", title_text
[0]);
314 regex_thumb
= re
.escape("src="")+"(.*)"+re
.escape(""");
315 thumb_text
= re
.findall(regex_thumb
, subout_text
);
316 mlessthumb
= thumb_text
[0];
317 regex_text
= re
.escape("__fileurl = '")+"(.*)"+re
.escape("';");
318 post_text
= re
.findall(regex_text
, subout_text
);
319 regex_img
= re
.escape("<meta property=\"og:image\" content=\"")+"(.*)"+re
.escape("\">");
320 img_text
= re
.findall(regex_img
, subout_text
);
321 mlessimg
= img_text
[0];
322 regex_mediatype
= re
.escape("__mediatype = '")+"(.*)"+re
.escape("',");
323 mediatype_text
= re
.findall(regex_mediatype
, subout_text
);
324 regex_altimg
= re
.escape("<link rel=\"image_src\" type=\"image/")+"(.*)"+re
.escape("\" href=\"")+"(.*)"+re
.escape("\">");
325 altimg_text
= re
.findall(regex_altimg
, subout_text
);
326 mlessaltimg
= altimg_text
[0][1];
327 regex_usrname
= re
.escape("<a href=\"/u/")+"([\w]+)"+re
.escape("\" class=\"pop plain thumb-member-link-uploads\">Uploads</a>");
328 usrname_text
= re
.findall(regex_usrname
, subout_text
);
329 mlessusrname
= usrname_text
[0];
330 mlessid
= re
.sub("^"+re
.escape("/"), "", mlessurllist
[curlurl
]);
331 mlesspurl
= "http://motherless.com"+mlessurllist
[curlurl
];
332 regex_numviews
= re
.escape("<strong>Views</strong>")+"\n+\t+([^\t]+)\t+"+re
.escape("</h2>");
333 numviews_text
= re
.findall(regex_numviews
, subout_text
);
334 mlessnumviews
= numviews_text
[0];
335 mlessnumviews
= re
.sub(re
.escape(","), "", mlessnumviews
);
336 regex_numfavs
= re
.escape("<strong>Favorited</strong>")+"\n+\t+([^\t]+)\t+"+re
.escape("</h2>");
337 numfavs_text
= re
.findall(regex_numfavs
, subout_text
);
338 mlessnumfavs
= numfavs_text
[0];
339 mlessnumfavs
= re
.sub(re
.escape(","), "", mlessnumfavs
);
340 ''' some good regex "!-%'-?A-~ " "!-%'-?A-~ \<\>\"\'\@\#" '''
341 regex_postdata
= re
.escape("<div class=\"media-comment-contents\">")+"\n\t+"+re
.escape("<h4>")+"\n\t+"+re
.escape("<a href=\"/m/")+"([\w]+)"+re
.escape("\" class=\"pop plain\" target=\"_blank\">")+"\n\t+([^\t]+)\t+"+re
.escape("</a>")+"\n\t+"+re
.escape("</h4>")+"\n\t+"+re
.escape("<div class=\"media-comment-meta\">")+"\n\t+([^\t]+)\t+"+re
.escape("</div>")+"\n\t+"+re
.escape("<div style=\"text-align: justify;\">")+"\n\t+([^\t]+)\t+"+re
.escape("</div>");
342 postdata_text
= re
.findall(regex_postdata
, subout_text
);
343 numpost
= len(postdata_text
);
344 regex_servsecs
= re
.escape("Served by web")+"([0-9]+)"+re
.escape(" in ")+"([0-9\.]+)"+re
.escape(" seconds");
345 servsecs_text
= re
.findall(regex_servsecs
, subout_text
);
346 servname
= "web"+servsecs_text
[0][0];
347 servsecs
= float(servsecs_text
[0][1]);
350 ''' From Amber @ http://stackoverflow.com/a/9662362 '''
351 TAG_RE
= re
.compile(r
'<[^>]+>');
352 while(numpost
>0 and curpost
<numpost
):
353 newpostext
= re
.sub(re
.escape("<br>"), "\n", postdata_text
[curpost
][3]);
354 newpostext
= re
.sub(re
.escape("<br/>"), "\n", newpostext
);
355 newpostext
= re
.sub(re
.escape("<br />"), "\n", newpostext
);
356 newpostext
= TAG_RE
.sub('', newpostext
);
357 newpostext
= re
.sub(re
.escape("/")+"([\w\/]+)", r
"http://motherless.com/\1", newpostext
);
358 mlesspostlist
.append({"username": postdata_text
[curpost
][0], "avatar": "http://avatars.motherlessmedia.com/avatars/member/"+postdata_text
[curpost
][0]+".jpg", "smallavatar": "http://avatars.motherlessmedia.com/avatars/member/"+postdata_text
[curpost
][0]+"-small.jpg", "post": newpostext
});
359 curpost
= curpost
+ 1;
361 mlesslink
= post_text
[0];
362 mlessext
= os
.path
.splitext(urlparse
.urlparse(mlesslink
).path
)[1];
363 mlessext
= mlessext
.replace(".", "");
364 mlessext
= mlessext
.lower();
365 if(mtlessgetargs
["id"]==False):
366 mlessfname
= urlparse
.urlsplit(mlesslink
).path
.split("/")[-1];
367 if(mtlessgetargs
["id"]==True):
368 mlessfname
= re
.sub(re
.escape("/"), "_", mlessid
)+"."+mlessext
;
369 if(not mlessext
=="mp4" and not mlessext
=="flv"):
371 regex_ii_dimensions
= re
.escape("style=\"width: ")+"([0-9]+)"+re
.escape("px; height: ")+"([0-9]+)"+re
.escape("px; border: none;\"");
372 post_ii_dimensions
= re
.findall(regex_ii_dimensions
, subout_text
);
373 post_ii_width
= post_ii_dimensions
[0][0];
374 post_ii_height
= post_ii_dimensions
[0][1];
375 imginfo
= {"width": int(post_ii_height
), "height": int(post_ii_width
), "views": int(mlessnumviews
), "favorites": int(mlessnumfavs
)};
376 if(mlessext
=="mp4" or mlessext
=="flv"):
378 mlesslink
= mlesslink
+"?start=0";
379 regex_vi_file
= re
.escape("\"file\" : \"")+"(.*)"+re
.escape("\",");
380 post_vi_file
= re
.findall(regex_vi_file
, subout_text
);
381 regex_vi_image
= re
.escape("\"image\" : \"")+"(.*)"+re
.escape("\",");
382 post_vi_image
= re
.findall(regex_vi_image
, subout_text
);
383 regex_vi_height
= re
.escape("\"height\" : ")+"([0-9]+)"+re
.escape(",");
384 post_vi_height
= re
.findall(regex_vi_height
, subout_text
);
385 regex_vi_width
= re
.escape("\"width\" : ")+"([0-9]+)"+re
.escape(",");
386 post_vi_width
= re
.findall(regex_vi_width
, subout_text
);
387 regex_vi_filethumb
= re
.escape("\"file\": ")+"(.*)"+re
.escape(",");
388 post_vi_filethumb
= re
.findall(regex_vi_filethumb
, subout_text
);
389 regex_vi_kind
= re
.escape("\"kind\": \"")+"(.*)"+re
.escape("\"");
390 post_vi_kind
= re
.findall(regex_vi_kind
, subout_text
);
391 vidinfo
= {"file": post_vi_file
[0], "image": post_vi_image
[0], "width": int(post_vi_width
[0]), "height": int(post_vi_height
[0]), "views": int(mlessnumviews
), "favorites": int(mlessnumfavs
), "filethumb": post_vi_filethumb
[0], "thumbstrip": "http://thumbs.motherlessmedia.com/thumbs/"+mlessid
+"-strip.jpg", "kind": post_vi_kind
[0]};
392 if(mtlessgetargs
["verbose"]==True):
395 mlesslistitms
.update({"id": mlessid
});
396 mlesslistitms
.update({"title": mlesstitle
});
397 mlesslistitms
.update({"format": mlessext
});
398 mlesslistitms
.update({"filename": mlessfname
});
399 mlesslistitms
.update({"thumbnail": mlessthumb
});
400 mlesslistitms
.update({"servername": servname
});
401 mlesslistitms
.update({"servingtime": servsecs
});
402 mlesslistitms
.update({"mediatype": mediatype_text
[0]});
403 if(not mlessext
=="mp4" and not mlessext
=="flv"):
404 mlesslistitms
.update({"vidpic": mlesslink
});
405 mlesslistitms
.update({"type": "image"});
406 mlesslistitms
.update({"info": imginfo
});
407 mlesslistitms
.update({"dimensions": str(imginfo
["width"])+"x"+str(imginfo
["height"])});
408 mlesslistitms
.update({"width": imginfo
["width"]});
409 mlesslistitms
.update({"height": imginfo
["height"]});
410 mlesslistitms
.update({"views": imginfo
["views"]});
411 mlesslistitms
.update({"favorites": imginfo
["favorites"]});
412 if(mlessext
=="mp4" or mlessext
=="flv"):
413 mlesslistitms
.update({"vidpic": mlessimg
});
414 mlesslistitms
.update({"type": "video"});
415 mlesslistitms
.update({"info": vidinfo
});
416 mlesslistitms
.update({"dimensions": str(vidinfo
["width"])+"x"+str(vidinfo
["height"])});
417 mlesslistitms
.update({"width": vidinfo
["width"]});
418 mlesslistitms
.update({"height": vidinfo
["height"]});
419 mlesslistitms
.update({"views": vidinfo
["views"]});
420 mlesslistitms
.update({"favorites": vidinfo
["favorites"]});
421 mlesslistitms
.update({"username": mlessusrname
});
422 mlesslistitms
.update({"avatar": "http://avatars.motherlessmedia.com/avatars/member/"+mlessusrname
+".jpg"});
423 mlesslistitms
.update({"smallavatar": "http://avatars.motherlessmedia.com/avatars/member/"+mlessusrname
+"-small.jpg"});
424 mlesslistitms
.update({"posts": mlesspostlist
});
425 mlesslistitms
.update({"pageurl": mlesspurl
});
426 mlesslistitms
.update({"url": mlesslink
});
427 mlessoutlist
.append(mlesslistitms
);
428 if(curlurl
<(numlist
- 1)):
429 time
.sleep(per_url_sleep
);
430 curlurl
= curlurl
+ 1;
431 if(curusrgal
<(numusrgal
- 1)):
432 time
.sleep(per_gal_sleep
);
433 curusrgal
= curusrgal
+ 1;
434 cururlarg
= cururlarg
+ 1;
436 if(__name__
== "__main__"):
437 mtlesslinks
= motherless_dl();
438 mtlesslncount
= len(mtlesslinks
);
440 while(mtlesscurln
<mtlesslncount
):
441 if(getargs
.get_id
==True):
442 print(mtlesslinks
[mtlesscurln
]["id"]);
443 if(getargs
.get_title
==True):
444 print(mtlesslinks
[mtlesscurln
]["title"]);
445 if(getargs
.get_posts
==True):
446 numpost
= len(mtlesslinks
[mtlesscurln
]["posts"]);
449 while(numpost
>0 and curpost
<numpost
):
450 print(mtlesslinks
[mtlesscurln
]["posts"][curpost
]["username"]+": "+mtlesslinks
[mtlesscurln
]["posts"][curpost
]["post"]);
451 curpost
= curpost
+ 1;
452 if(getargs
.get_format
==True):
453 print(mtlesslinks
[mtlesscurln
]["format"]);
454 if(getargs
.get_type
==True):
455 print(mtlesslinks
[mtlesscurln
]["type"]);
456 if(getargs
.get_filename
==True):
457 print(mtlesslinks
[mtlesscurln
]["filename"]);
458 if(getargs
.get_thumbnail
==True):
459 print(mtlesslinks
[mtlesscurln
]["thumbnail"]);
460 if(mtlesslinks
[mtlesscurln
]["format"]=="mp4" or mtlesslinks
[mtlesscurln
]["format"]=="flv"):
461 print(mtlesslinks
[mtlesscurln
]["vidpic"]);
462 if(getargs
.get_username
==True):
463 print(mtlesslinks
[mtlesscurln
]["username"]);
464 if(getargs
.get_pageurl
==True):
465 print(mtlesslinks
[mtlesscurln
]["pageurl"]);
466 if(getargs
.get_bbcode
==True):
467 print("[URL="+mtlesslinks
[mtlesscurln
]["pageurl"]+"][IMG]"+mtlesslinks
[mtlesscurln
]["thumbnail"]+"[/IMG][/URL]");
468 if(getargs
.get_html
==True):
469 print("<a href=\""+mtlesslinks
[mtlesscurln
]["pageurl"]+"\"><img src=\""+mtlesslinks
[mtlesscurln
]["thumbnail"]+"\"></a>");
470 if(getargs
.get_dimensions
==True):
471 print(mtlesslinks
[mtlesscurln
]["dimensions"]);
472 if(getargs
.get_width
==True):
473 print(str(mtlesslinks
[mtlesscurln
]["width"]));
474 if(getargs
.get_height
==True):
475 print(str(mtlesslinks
[mtlesscurln
]["height"]));
476 if(getargs
.get_views
==True):
477 print(mtlesslinks
[mtlesscurln
]["views"]);
478 if(getargs
.get_favorites
==True):
479 print(mtlesslinks
[mtlesscurln
]["favorites"]);
480 if(getargs
.get_url
==True or (getargs
.get_id
==False and getargs
.get_title
==False and getargs
.get_posts
==False and getargs
.get_format
==False and getargs
.get_filename
==False and getargs
.get_thumbnail
==False and getargs
.get_username
==False and getargs
.get_pageurl
==False and getargs
.get_bbcode
==False and getargs
.get_html
==False and getargs
.get_dimensions
==False and getargs
.get_width
==False and getargs
.get_height
==False and getargs
.get_views
==False and getargs
.get_favorites
==False and getargs
.get_type
==False)):
481 print(mtlesslinks
[mtlesscurln
]["url"]);
482 mtlesscurln
= mtlesscurln
+ 1;