Another small update.
[Python-Scripts.git] / MiniScripts / motherless-dl.py
blob30b962af355ebcc36fa84a86e666cf38c516f110
1 #!/usr/bin/env python
3 '''
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2013 Cool Dude 2k - http://idb.berlios.de/
13 Copyright 2013 Game Maker 2k - http://intdb.sourceforge.net/
14 Copyright 2013 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
16 $FileInfo: motherless-dl.py - Last Update: 10/09/2013 Ver. 1.6.0 RC 1 - Author: cooldude2k $
17 '''
19 import re, os, sys, urllib, urllib2, cookielib, StringIO, gzip, time, datetime, argparse, urlparse;
20 '''sys.tracebacklimit = 0;'''
22 __version_info__ = (1, 6, 0, "RC 1");
23 if(__version_info__[3]!=None):
24 __version__ = str(__version_info__[0])+"."+str(__version_info__[1])+"."+str(__version_info__[2])+" "+str(__version_info__[3]);
25 if(__version_info__[3]==None):
26 __version__ = str(__version_info__[0])+"."+str(__version_info__[1])+"."+str(__version_info__[2]);
28 parser = argparse.ArgumentParser();
29 parser.add_argument("url", nargs="*", help="motherless url");
30 parser.add_argument("--version", action='store_true', help="print program version and exit");
31 parser.add_argument("--update", action='store_true', help="update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)");
32 parser.add_argument("--dump-user-agent", action='store_true', help="display the current browser identification");
33 parser.add_argument("--user-agent", nargs="?", default="Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20100101 Firefox/24.0", help="specify a custom user agent");
34 parser.add_argument("--referer", nargs="?", default="http://motherless.com/", help="specify a custom referer, use if the video access");
35 parser.add_argument("--id", action='store_true', help="use only video ID in file name");
36 parser.add_argument("--get-url", action='store_true', help="simulate, quiet but print URL");
37 parser.add_argument("--get-pageurl", action='store_true', help="simulate, quiet but print URL");
38 parser.add_argument("--get-title", action='store_true', help="simulate, quiet but print title");
39 parser.add_argument("--get-id", action='store_true', help="simulate, quiet but print id");
40 parser.add_argument("--get-thumbnail", action='store_true', help="simulate, quiet but print thumbnail URL");
41 parser.add_argument("--get-filename", action='store_true', help="simulate, quiet but print output filename");
42 parser.add_argument("--get-format", action='store_true', help="simulate, quiet but print output format");
43 parser.add_argument("--get-username", action='store_true', help="simulate, quiet but print uploaders username");
44 parser.add_argument("--verbose", action='store_true', help="print various debugging information");
45 getargs = parser.parse_args();
46 if(getargs.version==True):
47 print(__version__);
48 sys.exit();
49 if(getargs.dump_user_agent==True):
50 print(getargs.user_agent);
51 sys.exit();
52 if(len(getargs.url)==0):
53 parser.print_help();
54 sys.exit();
55 def motherless_dl(mtlessgetargs=vars(getargs)):
56 fakeua = mtlessgetargs["user_agent"];
57 geturls_cj = cookielib.CookieJar();
58 geturls_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(geturls_cj));
59 geturls_opener.addheaders = [("Referer", mtlessgetargs["referer"]), ("User-Agent", fakeua), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en-CA,en-GB,en-UK,en-AU,en-NZ,en-ZA,en;q=0.5"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")];
60 per_gal_sleep = 0;
61 per_url_sleep = 0;
62 numurlarg = len(mtlessgetargs["url"]);
63 cururlarg = 0;
64 while(cururlarg<numurlarg):
65 mlessvid = mtlessgetargs["url"][cururlarg];
66 mlessvid = re.sub(re.escape("http://motherless.com/"), "", mlessvid);
67 mlessvid = re.sub(re.escape("http://www.motherless.com/"), "", mlessvid);
68 mlessvid = re.sub(re.escape("motherless.com/"), "", mlessvid);
69 mlessvid = re.sub(re.escape("www.motherless.com/"), "", mlessvid);
70 mlessvid = re.sub("^"+re.escape("/"), "", mlessvid);
71 mlessvid = "http://motherless.com/"+mlessvid;
72 mregex_text = re.escape("http://motherless.com/")+"([\w\/\?\&\=]+)";
73 if(re.findall(mregex_text, mlessvid)):
74 mlessvid = re.findall(mregex_text, mlessvid);
75 mlessvid = "/"+mlessvid[0];
76 mlessvidqstr = urlparse.parse_qs(urlparse.urlparse(mlessvid).query);
77 mlessvidid = urlparse.urlparse(mlessvid).path.split("/");
78 mlessgallist = [];
79 if((re.match("^random", mlessvidid[1]) and len(mlessvidid)==2) or (re.match("^random", mlessvidid[1]) and len(mlessvidid)==3) and (re.match("^image", mlessvidid[2]) or re.match("^video", mlessvidid[2]))):
80 geturls_text = geturls_opener.open("http://motherless.com"+mlessvid);
81 mlessvid = geturls_text.geturl();
82 if(re.findall(mregex_text, mlessvid)):
83 mlessvid = re.findall(mregex_text, mlessvid);
84 mlessvid = mlessvid[0];
85 if((re.match("^galleries", mlessvidid[1]) and len(mlessvidid)==4) or (re.match("^f", mlessvidid[1]) and re.match("^galleries", mlessvidid[2]) and len(mlessvidid)==4) or (re.match("^term", mlessvidid[1]) and re.match("^galleries", mlessvidid[2]) and len(mlessvidid)==4)):
86 geturls_text = geturls_opener.open("http://motherless.com"+mlessvid+"?page=1");
87 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
88 strbuf = StringIO.StringIO(geturls_text.read());
89 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
90 out_text = gzstrbuf.read()[:];
91 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
92 out_text = geturls_text.read()[:];
93 out_text = re.sub(re.escape("http://motherless.com"), "", out_text);
94 out_text = re.sub(re.escape("http://www.motherless.com"), "", out_text);
95 regex_ptext = re.escape("class=\"pop\" rel=\"")+"([0-9]+)"+re.escape("\">")+"([0-9]+)"+re.escape("</a>");
96 page_text = re.findall(regex_ptext, out_text);
97 try:
98 numpages = int(page_text[-1][0]);
99 except IndexError:
100 numpages = 1;
101 curpage = 1;
102 while(curpage<=numpages):
103 if(curpage>1):
104 geturls_text = geturls_opener.open("http://motherless.com/"+mlessvid+"?page="+str(curpage));
105 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
106 strbuf = StringIO.StringIO(geturls_text.read());
107 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
108 out_text = gzstrbuf.read()[:];
109 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
110 out_text = geturls_text.read()[:];
111 out_text = re.sub(re.escape("http://motherless.com"), "", out_text);
112 out_text = re.sub(re.escape("http://www.motherless.com"), "", out_text);
113 regex_text = re.escape("")+"([\w\/]+)"+re.escape("\" class=\"img-container\" target=\"_self\">");
114 post_text = re.findall(regex_text, out_text);
115 numgal = len(post_text);
116 curgal = 0;
117 while(curgal<numgal):
118 mlessgallist.append(post_text[curgal]);
119 curgal = curgal + 1;
120 curpage = curpage + 1;
121 if(not re.match("^galleries", mlessvidid[1]) or (re.match("^galleries", mlessvidid[1]) and len(mlessvidid)<4) or (re.match("^galleries", mlessvidid[1]) and len(mlessvidid)>5)):
122 mlessgallist.append(mlessvid);
123 numusrgal = len(mlessgallist);
124 curusrgal = 0;
125 while(curusrgal<numusrgal):
126 mlessvid = mlessgallist[curusrgal];
127 if(not re.match("^\/", mlessvid)):
128 mlessvid = "/"+mlessvid;
129 mlessvidqstr = urlparse.parse_qs(urlparse.urlparse(mlessvid).query);
130 mlessvidid = urlparse.urlparse(mlessvid).path.split("/");
131 mlessurllist = [];
132 if((re.match("^G", mlessvidid[1]) and len(mlessvidid)==2) or (re.match("^H", mlessvidid[1]) and len(mlessvidid)==2) or (re.match("^V", mlessvidid[1]) and len(mlessvidid)==2) or (re.match("^live", mlessvidid[1]) and len(mlessvidid)==2) or (re.match("^g", mlessvidid[1]) and len(mlessvidid)==3) or (re.match("^u", mlessvidid[1]) and len(mlessvidid)==3) or (re.match("^term", mlessvidid[1]) and (re.match("^videos", mlessvidid[2]) or re.match("^images", mlessvidid[2])) and len(mlessvidid)==4) or (re.match("^f", mlessvidid[1]) and len(mlessvidid)==4 and (re.match("^videos", mlessvidid[3]) or re.match("^images", mlessvidid[3]))) or (re.match("^live", mlessvidid[1]) and len(mlessvidid)==3 and (re.match("^images", mlessvidid[2]) or re.match("^videos", mlessvidid[2]))) or (re.match("^images", mlessvidid[1]) and len(mlessvidid)==3 and (re.match("^favorited", mlessvidid[2]) or re.match("^viewed", mlessvidid[2]) or re.match("^commented", mlessvidid[2]) or re.match("^popular", mlessvidid[2]))) or (re.match("^videos", mlessvidid[1]) and len(mlessvidid)==3 and (re.match("^favorited", mlessvidid[2]) or re.match("^viewed", mlessvidid[2]) or re.match("^commented", mlessvidid[2]) or re.match("^popular", mlessvidid[2])))):
133 addtvar = False;
134 tvaradd = "";
135 if(re.match("^u", mlessvidid[1]) and len(mlessvidid)==3):
136 try:
137 if(mlessvidqstr["t"][0]=="i" or mlessvidqstr["t"][0]=="v"):
138 tvaradd = "&t="+mlessvidqstr["t"][0];
139 addtvar = True;
140 except KeyError:
141 addtvar = False;
142 except IndexError:
143 addtvar = False;
144 geturls_text = geturls_opener.open("http://motherless.com"+mlessvid+"?page=1"+tvaradd);
145 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
146 strbuf = StringIO.StringIO(geturls_text.read());
147 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
148 out_text = gzstrbuf.read()[:];
149 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
150 out_text = geturls_text.read()[:];
151 out_text = re.sub(re.escape("http://motherless.com"), "", out_text);
152 out_text = re.sub(re.escape("http://www.motherless.com"), "", out_text);
153 regex_ptext = re.escape("class=\"pop\" rel=\"")+"([0-9]+)"+re.escape("\">")+"([0-9]+)"+re.escape("</a>");
154 page_text = re.findall(regex_ptext, out_text);
155 try:
156 numpages = int(page_text[-1][0]);
157 except IndexError:
158 numpages = 1;
159 curpage = 1;
160 while(curpage<=numpages):
161 if(curpage>1):
162 geturls_text = geturls_opener.open("http://motherless.com"+mlessvid+"?page="+str(curpage)+tvaradd);
163 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
164 strbuf = StringIO.StringIO(geturls_text.read());
165 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
166 out_text = gzstrbuf.read()[:];
167 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
168 out_text = geturls_text.read()[:];
169 out_text = re.sub(re.escape("http://motherless.com"), "", out_text);
170 out_text = re.sub(re.escape("http://www.motherless.com"), "", out_text);
171 if(re.match("^V", mlessvidid[1])):
172 out_text = re.sub(re.escape("class=\"img-container\" target=\"_self\""), "title=\"motherless link\"", out_text);
173 out_text = re.sub(re.escape("class=\"pop plain\" target=\"_blank\""), "title=\"motherless link\"", out_text);
174 regex_text = re.escape("<a href=\"")+"([\w\/]+)"+re.escape("\" title=\"motherless link\">");
175 if(not re.match("^V", mlessvidid[1])):
176 regex_text = re.escape("")+"([\w\/]+)"+re.escape("\" class=\"img-container\" target=\"_self\">");
177 post_text = re.findall(regex_text, out_text);
178 numurls = len(post_text);
179 cururl = 0;
180 while(cururl<numurls):
181 mlessurllist.append(post_text[cururl]);
182 cururl = cururl + 1;
183 curpage = curpage + 1;
184 if((re.match("^G", mlessvidid[1]) and len(mlessvidid)==3 and re.match("([0-9A-F]+)", mlessvidid[2])) or (len(mlessvidid)==2 and re.match("([0-9A-F]+)", mlessvidid[1]))):
185 mlessurllist.append(mlessvid);
186 numlist = len(mlessurllist);
187 curlurl = 0;
188 mlessoutlist = [];
189 while(curlurl<numlist):
190 geturls_text = geturls_opener.open("http://motherless.com"+mlessurllist[curlurl]);
191 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
192 strbuf = StringIO.StringIO(geturls_text.read());
193 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
194 subout_text = gzstrbuf.read()[:];
195 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
196 subout_text = geturls_text.read()[:];
197 subout_text = re.sub(re.escape("http://motherless.com"), "", subout_text);
198 subout_text = re.sub(re.escape("http://www.motherless.com"), "", subout_text);
199 regex_title = re.escape("<title>")+"(.*)"+re.escape("</title>");
200 title_text = re.findall(regex_title, subout_text);
201 mlesstitle = re.sub(re.escape(" - MOTHERLESS.COM"), "", title_text[0]);
202 regex_thumb = re.escape("src=&quot;")+"(.*)"+re.escape("&quot;");
203 thumb_text = re.findall(regex_thumb, subout_text);
204 mlessthumb = thumb_text[0];
205 regex_text = re.escape("__fileurl = '")+"(.*)"+re.escape("';");
206 post_text = re.findall(regex_text, subout_text);
207 regex_img = re.escape("<meta property=\"og:image\" content=\"")+"(.*)"+re.escape("\">");
208 img_text = re.findall(regex_img, subout_text);
209 mlessimg = img_text[0];
210 regex_altimg = re.escape("<link rel=\"image_src\" type=\"image/")+"(.*)"+re.escape("\" href=\"")+"(.*)"+re.escape("\">");
211 altimg_text = re.findall(regex_altimg, subout_text);
212 mlessaltimg = altimg_text[0][1];
213 regex_usrname = re.escape("<a href=\"/u/")+"([\w]+)"+re.escape("\" class=\"pop plain thumb-member-link-uploads\">Uploads</a>");
214 usrname_text = re.findall(regex_usrname, subout_text);
215 mlessusrname = usrname_text[0];
216 mlessid = re.sub("^"+re.escape("/"), "", mlessurllist[curlurl]);
217 mlesspurl = "http://motherless.com"+mlessurllist[curlurl];
218 if(post_text>0):
219 mlesslink = post_text[0];
220 mlessext = os.path.splitext(urlparse.urlparse(mlesslink).path)[1];
221 mlessext = mlessext.replace(".", "");
222 mlessext = mlessext.lower();
223 if(mtlessgetargs["id"]==False):
224 mlessfname = urlparse.urlsplit(mlesslink).path.split("/")[-1];
225 if(mtlessgetargs["id"]==True):
226 mlessfname = re.sub(re.escape("/"), "_", mlessid)+"."+mlessext;
227 if(not mlessext=="mp4" and not mlessext=="flv"):
228 imginfo = {};
229 regex_ii_dimensions = re.escape("style=\"width: ")+"([0-9]+)"+re.escape("px; height: ")+"([0-9]+)"+re.escape("px; border: none;\"");
230 post_ii_dimensions = re.findall(regex_ii_dimensions, subout_text);
231 post_ii_width = post_ii_dimensions[0][0];
232 post_ii_height = post_ii_dimensions[0][1];
233 imginfo = {"height": int(post_ii_width), "width": int(post_ii_height)};
234 if(mlessext=="mp4" or mlessext=="flv"):
235 vidinfo = {};
236 mlesslink = mlesslink+"?start=0";
237 regex_vi_file = re.escape("\"file\" : \"")+"(.*)"+re.escape("\",");
238 post_vi_file = re.findall(regex_vi_file, subout_text);
239 regex_vi_image = re.escape("\"image\" : \"")+"(.*)"+re.escape("\",");
240 post_vi_image = re.findall(regex_vi_image, subout_text);
241 regex_vi_height = re.escape("\"height\" : ")+"([0-9]+)"+re.escape(",");
242 post_vi_height = re.findall(regex_vi_height, subout_text);
243 regex_vi_width = re.escape("\"width\" : ")+"([0-9]+)"+re.escape(",");
244 post_vi_width = re.findall(regex_vi_width, subout_text);
245 regex_vi_filethumb = re.escape("\"file\": ")+"(.*)"+re.escape(",");
246 post_vi_filethumb = re.findall(regex_vi_filethumb, subout_text);
247 regex_vi_kind = re.escape("\"kind\": \"")+"(.*)"+re.escape("\"");
248 post_vi_kind = re.findall(regex_vi_kind, subout_text);
249 vidinfo = {"file": post_vi_file[0], "image": post_vi_image[0], "height": int(post_vi_height[0]), "width": int(post_vi_width[0]), "filethumb": post_vi_filethumb[0], "kind": post_vi_kind[0]};
250 mlesslistitms = {};
251 mlesslistitms.update({"id": mlessid});
252 mlesslistitms.update({"title": mlesstitle});
253 mlesslistitms.update({"format": mlessext});
254 mlesslistitms.update({"filename": mlessfname});
255 mlesslistitms.update({"thumbnail": mlessthumb});
256 if(not mlessext=="mp4" and not mlessext=="flv"):
257 mlesslistitms.update({"vidpic": mlesslink});
258 if(mlessext=="mp4" or mlessext=="flv"):
259 mlesslistitms.update({"vidpic": mlessimg});
260 mlesslistitms.update({"username": mlessusrname});
261 mlesslistitms.update({"pageurl": mlesspurl});
262 mlesslistitms.update({"url": mlesslink});
263 mlessoutlist.append(mlesslistitms);
264 if(curlurl<(numlist - 1)):
265 time.sleep(per_url_sleep);
266 curlurl = curlurl + 1;
267 if(curusrgal<(numusrgal - 1)):
268 time.sleep(per_gal_sleep);
269 curusrgal = curusrgal + 1;
270 cururlarg = cururlarg + 1;
271 return mlessoutlist;
273 mtlesslinks = motherless_dl();
274 mtlesslncount = len(mtlesslinks);
275 mtlesscurln = 0;
276 while(mtlesscurln<mtlesslncount):
277 if(getargs.get_id==True):
278 print(mtlesslinks[mtlesscurln]["id"]);
279 if(getargs.get_title==True):
280 print(mtlesslinks[mtlesscurln]["title"]);
281 if(getargs.get_format==True):
282 print(mtlesslinks[mtlesscurln]["format"]);
283 if(getargs.get_filename==True):
284 print(mtlesslinks[mtlesscurln]["filename"]);
285 if(getargs.get_thumbnail==True):
286 print(mtlesslinks[mtlesscurln]["thumbnail"]);
287 if(mtlesslinks[mtlesscurln]["format"]=="mp4" or mtlesslinks[mtlesscurln]["format"]=="flv"):
288 print(mtlesslinks[mtlesscurln]["vidpic"]);
289 if(getargs.get_username==True):
290 print(mtlesslinks[mtlesscurln]["username"]);
291 if(getargs.get_pageurl==True):
292 print(mtlesslinks[mtlesscurln]["pageurl"]);
293 if(getargs.get_url==True or (getargs.get_id==False and getargs.get_title==False and getargs.get_format==False and getargs.get_filename==False and getargs.get_thumbnail==False and getargs.get_username==False and getargs.get_pageurl==False)):
294 print(mtlesslinks[mtlesscurln]["url"]);
295 mtlesscurln = mtlesscurln + 1;