Yet another small update.
[Python-Scripts.git] / MiniScripts / motherless-dl.py
blobd11ddf0b1259a34967c99e202d75caa16f8d0704
1 #!/usr/bin/env python
3 '''
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2013 Cool Dude 2k - http://idb.berlios.de/
13 Copyright 2013 Game Maker 2k - http://intdb.sourceforge.net/
14 Copyright 2013 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
16 $FileInfo: motherless-dl.py - Last Update: 10/07/2013 Ver. 1.4.5 RC 2 - Author: cooldude2k $
17 '''
19 import re, os, sys, urllib, urllib2, cookielib, StringIO, gzip, time, datetime, argparse, urlparse;
21 __version_info__ = (1, 4, 5, "RC 2");
22 if(__version_info__[3]!=None):
23 __version__ = str(__version_info__[0])+"."+str(__version_info__[1])+"."+str(__version_info__[2])+" "+str(__version_info__[3]);
24 if(__version_info__[3]==None):
25 __version__ = str(__version_info__[0])+"."+str(__version_info__[1])+"."+str(__version_info__[2]);
27 parser = argparse.ArgumentParser();
28 parser.add_argument("url", nargs="*", help="motherless url");
29 parser.add_argument("--user-agent", nargs="?", default="Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20100101 Firefox/24.0", help="specify a custom user agent");
30 parser.add_argument("--referer", nargs="?", default="http://motherless.com/", help="specify a custom referer, use if the video access");
31 parser.add_argument("--verbose", action='store_true', help="print various debugging information");
32 parser.add_argument("--dump-user-agent", action='store_true', help="display the current browser identification");
33 parser.add_argument("--version", action='store_true', help="print program version and exit");
34 parser.add_argument("--update", action='store_true', help="update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)");
35 getargs = parser.parse_args();
36 if(getargs.version==True):
37 print(__version__);
38 sys.exit();
39 if(getargs.dump_user_agent==True):
40 print(getargs.user_agent);
41 sys.exit();
42 if(len(getargs.url)==0):
43 parser.print_help();
44 sys.exit();
46 fakeua = getargs.user_agent;
47 geturls_cj = cookielib.CookieJar();
48 geturls_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(geturls_cj));
49 geturls_opener.addheaders = [("Referer", getargs.referer), ("User-Agent", fakeua), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en-CA,en-GB,en-UK,en-AU,en-NZ,en-ZA,en;q=0.5"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")];
50 per_gal_sleep = 0;
51 per_url_sleep = 0;
53 numurlarg = len(getargs.url);
54 cururlarg = 0;
55 while(cururlarg<numurlarg):
56 mlessvid = getargs.url[cururlarg];
57 mlessvid = re.sub(re.escape("http://motherless.com/"), "", mlessvid);
58 mlessvid = re.sub(re.escape("http://www.motherless.com/"), "", mlessvid);
59 mlessvid = re.sub(re.escape("motherless.com/"), "", mlessvid);
60 mlessvid = re.sub(re.escape("www.motherless.com/"), "", mlessvid);
61 mlessvid = re.sub("^"+re.escape("/"), "", mlessvid);
62 mlessvid = "http://motherless.com/"+mlessvid;
63 mregex_text = re.escape("http://motherless.com/")+"([\w\/]+)";
64 if(re.findall(mregex_text, mlessvid)):
65 mlessvid = re.findall(mregex_text, mlessvid);
66 mlessvid = "/"+mlessvid[0];
67 mlessvidid = urlparse.urlparse(mlessvid).path.split('/');
68 mlessgallist = [];
69 if((re.match("^random", mlessvidid[1]) and len(mlessvidid)==2) or (re.match("^random", mlessvidid[1]) and len(mlessvidid)==3) and (re.match("^image", mlessvidid[2]) or re.match("^video", mlessvidid[2]))):
70 geturls_text = geturls_opener.open("http://motherless.com"+mlessvid);
71 mlessvid = geturls_text.geturl();
72 if(re.findall(mregex_text, mlessvid)):
73 mlessvid = re.findall(mregex_text, mlessvid);
74 mlessvid = mlessvid[0];
75 if((re.match("^galleries", mlessvidid[1]) and len(mlessvidid)==4) or (re.match("^f", mlessvidid[1]) and re.match("^galleries", mlessvidid[2]) and len(mlessvidid)==4)):
76 geturls_text = geturls_opener.open("http://motherless.com"+mlessvid+"?page=1");
77 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
78 strbuf = StringIO.StringIO(geturls_text.read());
79 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
80 out_text = gzstrbuf.read()[:];
81 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
82 out_text = geturls_text.read()[:];
83 out_text = re.sub(re.escape("http://motherless.com"), "", out_text);
84 out_text = re.sub(re.escape("http://www.motherless.com"), "", out_text);
85 regex_ptext = re.escape("class=\"pop\" rel=\"")+"([0-9]+)"+re.escape("\">")+"([0-9]+)"+re.escape("</a>");
86 page_text = re.findall(regex_ptext, out_text);
87 try:
88 numpages = int(page_text[-1][0]);
89 except IndexError:
90 numpages = 1;
91 curpage = 1;
92 while(curpage<=numpages):
93 if(curpage>1):
94 geturls_text = geturls_opener.open("http://motherless.com/"+mlessvid+"?page="+str(curpage));
95 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
96 strbuf = StringIO.StringIO(geturls_text.read());
97 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
98 out_text = gzstrbuf.read()[:];
99 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
100 out_text = geturls_text.read()[:];
101 out_text = re.sub(re.escape("http://motherless.com"), "", out_text);
102 out_text = re.sub(re.escape("http://www.motherless.com"), "", out_text);
103 regex_text = re.escape("")+"([\w\/]+)"+re.escape("\" class=\"img-container\" target=\"_self\">");
104 post_text = re.findall(regex_text, out_text);
105 numgal = len(post_text);
106 curgal = 0;
107 while(curgal<numgal):
108 mlessgallist.append(post_text[curgal]);
109 curgal = curgal + 1;
110 curpage = curpage + 1;
111 if(not re.match("^galleries", mlessvidid[1]) or (re.match("^galleries", mlessvidid[1]) and len(mlessvidid)<4) or (re.match("^galleries", mlessvidid[1]) and len(mlessvidid)>5)):
112 mlessgallist.append(mlessvid);
113 numusrgal = len(mlessgallist);
114 curusrgal = 0;
115 while(curusrgal<numusrgal):
116 mlessvid = mlessgallist[curusrgal];
117 if(not re.match("^\/", mlessvid)):
118 mlessvid = "/"+mlessvid;
119 mlessvidid = urlparse.urlparse(mlessvid).path.split('/');
120 mlessurllist = [];
121 if((re.match("^G", mlessvidid[1]) and len(mlessvidid)==2) or (re.match("^V", mlessvidid[1]) and len(mlessvidid)==2) or (re.match("^g", mlessvidid[1]) and len(mlessvidid)==3) or (re.match("^f", mlessvidid[1]) and len(mlessvidid)==4 and (re.match("^videos", mlessvidid[3]) or re.match("^images", mlessvidid[3]))) or (re.match("^live", mlessvidid[1]) and len(mlessvidid)==3 and (re.match("^images", mlessvidid[2]) or re.match("^videos", mlessvidid[2]))) or (re.match("^images", mlessvidid[1]) and len(mlessvidid)==3 and (re.match("^favorited", mlessvidid[2]) or re.match("^viewed", mlessvidid[2]) or re.match("^commented", mlessvidid[2]) or re.match("^popular", mlessvidid[2]))) or (re.match("^videos", mlessvidid[1]) and len(mlessvidid)==3 and (re.match("^favorited", mlessvidid[2]) or re.match("^viewed", mlessvidid[2]) or re.match("^commented", mlessvidid[2]) or re.match("^popular", mlessvidid[2])))):
122 geturls_text = geturls_opener.open("http://motherless.com"+mlessvid+"?page=1");
123 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
124 strbuf = StringIO.StringIO(geturls_text.read());
125 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
126 out_text = gzstrbuf.read()[:];
127 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
128 out_text = geturls_text.read()[:];
129 out_text = re.sub(re.escape("http://motherless.com"), "", out_text);
130 out_text = re.sub(re.escape("http://www.motherless.com"), "", out_text);
131 regex_ptext = re.escape("class=\"pop\" rel=\"")+"([0-9]+)"+re.escape("\">")+"([0-9]+)"+re.escape("</a>");
132 page_text = re.findall(regex_ptext, out_text);
133 try:
134 numpages = int(page_text[-1][0]);
135 except IndexError:
136 numpages = 1;
137 curpage = 1;
138 while(curpage<=numpages):
139 if(curpage>1):
140 geturls_text = geturls_opener.open("http://motherless.com"+mlessvid+"?page="+str(curpage));
141 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
142 strbuf = StringIO.StringIO(geturls_text.read());
143 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
144 out_text = gzstrbuf.read()[:];
145 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
146 out_text = geturls_text.read()[:];
147 out_text = re.sub(re.escape("http://motherless.com"), "", out_text);
148 out_text = re.sub(re.escape("http://www.motherless.com"), "", out_text);
149 if(re.match("^V", mlessvidid[1])):
150 out_text = re.sub(re.escape("class=\"img-container\" target=\"_self\""), "title=\"motherless link\"", out_text);
151 out_text = re.sub(re.escape("class=\"pop plain\" target=\"_blank\""), "title=\"motherless link\"", out_text);
152 regex_text = re.escape("<a href=\"")+"([\w\/]+)"+re.escape("\" title=\"motherless link\">");
153 if(not re.match("^V", mlessvidid[1])):
154 regex_text = re.escape("")+"([\w\/]+)"+re.escape("\" class=\"img-container\" target=\"_self\">");
155 post_text = re.findall(regex_text, out_text);
156 numurls = len(post_text);
157 cururl = 0;
158 while(cururl<numurls):
159 mlessurllist.append(post_text[cururl]);
160 cururl = cururl + 1;
161 curpage = curpage + 1;
162 if((re.match("^G", mlessvidid[1]) and len(mlessvidid)==3 and re.match("([0-9A-F]+)", mlessvidid[2])) or (len(mlessvidid)==2 and re.match("([0-9A-F]+)", mlessvidid[1]))):
163 mlessurllist.append(mlessvid);
164 numlist = len(mlessurllist);
165 curlurl = 0;
166 while(curlurl<numlist):
167 geturls_text = geturls_opener.open("http://motherless.com"+mlessurllist[curlurl]);
168 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
169 strbuf = StringIO.StringIO(geturls_text.read());
170 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
171 subout_text = gzstrbuf.read()[:];
172 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
173 subout_text = geturls_text.read()[:];
174 subout_text = re.sub(re.escape("http://motherless.com"), "", subout_text);
175 subout_text = re.sub(re.escape("http://www.motherless.com"), "", subout_text);
176 regex_text = re.escape("__fileurl = '")+"(.*)"+re.escape("';");
177 post_text = re.findall(regex_text, subout_text);
178 if(post_text>0):
179 mlesslink = post_text[0];
180 mlessext = os.path.splitext(urlparse.urlparse(mlesslink).path)[1];
181 mlessext = mlessext.replace(".", "");
182 mlessext = mlessext.lower();
183 if(mlessext=="mp4" or mlessext=="flv"):
184 mlesslink = mlesslink+"?start=0";
185 print(mlesslink);
186 if(curlurl<(numlist - 1)):
187 time.sleep(per_url_sleep);
188 curlurl = curlurl + 1;
189 if(curusrgal<(numusrgal - 1)):
190 time.sleep(per_gal_sleep);
191 curusrgal = curusrgal + 1;
192 cururlarg = cururlarg + 1;
195 getvidurls_cj = cookielib.CookieJar();
196 getvidurls_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(getvidurls_cj));
197 getvidurls_opener.addheaders = [("Referer", getargs.referer+mlessvid), ("User-Agent", fakeua), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en-CA,en-GB,en-UK,en-AU,en-NZ,en-ZA,en;q=0.5"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")];
198 getvidurls_text = getvidurls_opener.open(mlesslink);
199 def chunk_report(bytes_so_far, chunk_size, total_size):
200 percent = float(bytes_so_far) / total_size;
201 percent = round(percent*100, 2);
202 sys.stdout.write("Downloaded %d of %d bytes (%0.2f%%)\r" %
203 (bytes_so_far, total_size, percent));
204 if bytes_so_far >= total_size:
205 sys.stdout.write("\n");
206 def chunk_read(response, chunk_size=8192, report_hook=None):
207 total_size = response.info().getheader("Content-Length").strip();
208 total_size = int(total_size);
209 bytes_so_far = 0;
210 while 1:
211 chunk = response.read(chunk_size);
212 bytes_so_far += len(chunk);
213 if not chunk:
214 break;
215 if report_hook:
216 report_hook(bytes_so_far, chunk_size, total_size);
217 return bytes_so_far;
218 chunk_read(getvidurls_text, report_hook=chunk_report);
219 vidfile = open(os.getcwd()+os.sep+os.path.basename(urllib2.urlparse.urlsplit(mlesslink)[2]), "wb");
220 vidfile.write(getvidurls_text.read());
221 vidfile.close();