More updates to command line argumenmts.
[Python-Scripts.git] / MiniScripts / motherless-dl.py
blob9d3f4f1dff2dd87b76175fa68fc75fbc6ac1232d
1 #!/usr/bin/env python
3 '''
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2013 Cool Dude 2k - http://idb.berlios.de/
13 Copyright 2013 Game Maker 2k - http://intdb.sourceforge.net/
14 Copyright 2013 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
16 $FileInfo: motherless-dl.py - Last Update: 10/07/2013 Ver. 1.5.0 RC 1 - Author: cooldude2k $
17 '''
19 import re, os, sys, urllib, urllib2, cookielib, StringIO, gzip, time, datetime, argparse, urlparse;
21 __version_info__ = (1, 5, 0, "RC 1");
22 if(__version_info__[3]!=None):
23 __version__ = str(__version_info__[0])+"."+str(__version_info__[1])+"."+str(__version_info__[2])+" "+str(__version_info__[3]);
24 if(__version_info__[3]==None):
25 __version__ = str(__version_info__[0])+"."+str(__version_info__[1])+"."+str(__version_info__[2]);
27 parser = argparse.ArgumentParser();
28 parser.add_argument("url", nargs="*", help="motherless url");
29 parser.add_argument("--version", action='store_true', help="print program version and exit");
30 parser.add_argument("--update", action='store_true', help="update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)");
31 parser.add_argument("--dump-user-agent", action='store_true', help="display the current browser identification");
32 parser.add_argument("--user-agent", nargs="?", default="Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20100101 Firefox/24.0", help="specify a custom user agent");
33 parser.add_argument("--referer", nargs="?", default="http://motherless.com/", help="specify a custom referer, use if the video access");
34 parser.add_argument("--get-url", action='store_true', help="simulate, quiet but print URL");
35 parser.add_argument("--get-title", action='store_true', help="simulate, quiet but print title");
36 parser.add_argument("--get-id", action='store_true', help="simulate, quiet but print id");
37 parser.add_argument("--get-thumbnail", action='store_true', help="simulate, quiet but print thumbnail URL");
38 parser.add_argument("--get-filename", action='store_true', help="simulate, quiet but print output filename");
39 parser.add_argument("--get-format", action='store_true', help="simulate, quiet but print output format");
40 parser.add_argument("--verbose", action='store_true', help="print various debugging information");
41 getargs = parser.parse_args();
42 if(getargs.version==True):
43 print(__version__);
44 sys.exit();
45 if(getargs.dump_user_agent==True):
46 print(getargs.user_agent);
47 sys.exit();
48 if(len(getargs.url)==0):
49 parser.print_help();
50 sys.exit();
52 fakeua = getargs.user_agent;
53 geturls_cj = cookielib.CookieJar();
54 geturls_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(geturls_cj));
55 geturls_opener.addheaders = [("Referer", getargs.referer), ("User-Agent", fakeua), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en-CA,en-GB,en-UK,en-AU,en-NZ,en-ZA,en;q=0.5"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")];
56 per_gal_sleep = 0;
57 per_url_sleep = 0;
59 numurlarg = len(getargs.url);
60 cururlarg = 0;
61 while(cururlarg<numurlarg):
62 mlessvid = getargs.url[cururlarg];
63 mlessvid = re.sub(re.escape("http://motherless.com/"), "", mlessvid);
64 mlessvid = re.sub(re.escape("http://www.motherless.com/"), "", mlessvid);
65 mlessvid = re.sub(re.escape("motherless.com/"), "", mlessvid);
66 mlessvid = re.sub(re.escape("www.motherless.com/"), "", mlessvid);
67 mlessvid = re.sub("^"+re.escape("/"), "", mlessvid);
68 mlessvid = "http://motherless.com/"+mlessvid;
69 mregex_text = re.escape("http://motherless.com/")+"([\w\/]+)";
70 if(re.findall(mregex_text, mlessvid)):
71 mlessvid = re.findall(mregex_text, mlessvid);
72 mlessvid = "/"+mlessvid[0];
73 mlessvidid = urlparse.urlparse(mlessvid).path.split('/');
74 mlessgallist = [];
75 if((re.match("^random", mlessvidid[1]) and len(mlessvidid)==2) or (re.match("^random", mlessvidid[1]) and len(mlessvidid)==3) and (re.match("^image", mlessvidid[2]) or re.match("^video", mlessvidid[2]))):
76 geturls_text = geturls_opener.open("http://motherless.com"+mlessvid);
77 mlessvid = geturls_text.geturl();
78 if(re.findall(mregex_text, mlessvid)):
79 mlessvid = re.findall(mregex_text, mlessvid);
80 mlessvid = mlessvid[0];
81 if((re.match("^galleries", mlessvidid[1]) and len(mlessvidid)==4) or (re.match("^f", mlessvidid[1]) and re.match("^galleries", mlessvidid[2]) and len(mlessvidid)==4)):
82 geturls_text = geturls_opener.open("http://motherless.com"+mlessvid+"?page=1");
83 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
84 strbuf = StringIO.StringIO(geturls_text.read());
85 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
86 out_text = gzstrbuf.read()[:];
87 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
88 out_text = geturls_text.read()[:];
89 out_text = re.sub(re.escape("http://motherless.com"), "", out_text);
90 out_text = re.sub(re.escape("http://www.motherless.com"), "", out_text);
91 regex_ptext = re.escape("class=\"pop\" rel=\"")+"([0-9]+)"+re.escape("\">")+"([0-9]+)"+re.escape("</a>");
92 page_text = re.findall(regex_ptext, out_text);
93 try:
94 numpages = int(page_text[-1][0]);
95 except IndexError:
96 numpages = 1;
97 curpage = 1;
98 while(curpage<=numpages):
99 if(curpage>1):
100 geturls_text = geturls_opener.open("http://motherless.com/"+mlessvid+"?page="+str(curpage));
101 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
102 strbuf = StringIO.StringIO(geturls_text.read());
103 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
104 out_text = gzstrbuf.read()[:];
105 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
106 out_text = geturls_text.read()[:];
107 out_text = re.sub(re.escape("http://motherless.com"), "", out_text);
108 out_text = re.sub(re.escape("http://www.motherless.com"), "", out_text);
109 regex_text = re.escape("")+"([\w\/]+)"+re.escape("\" class=\"img-container\" target=\"_self\">");
110 post_text = re.findall(regex_text, out_text);
111 numgal = len(post_text);
112 curgal = 0;
113 while(curgal<numgal):
114 mlessgallist.append(post_text[curgal]);
115 curgal = curgal + 1;
116 curpage = curpage + 1;
117 if(not re.match("^galleries", mlessvidid[1]) or (re.match("^galleries", mlessvidid[1]) and len(mlessvidid)<4) or (re.match("^galleries", mlessvidid[1]) and len(mlessvidid)>5)):
118 mlessgallist.append(mlessvid);
119 numusrgal = len(mlessgallist);
120 curusrgal = 0;
121 while(curusrgal<numusrgal):
122 mlessvid = mlessgallist[curusrgal];
123 if(not re.match("^\/", mlessvid)):
124 mlessvid = "/"+mlessvid;
125 mlessvidid = urlparse.urlparse(mlessvid).path.split('/');
126 mlessurllist = [];
127 if((re.match("^G", mlessvidid[1]) and len(mlessvidid)==2) or (re.match("^V", mlessvidid[1]) and len(mlessvidid)==2) or (re.match("^g", mlessvidid[1]) and len(mlessvidid)==3) or (re.match("^f", mlessvidid[1]) and len(mlessvidid)==4 and (re.match("^videos", mlessvidid[3]) or re.match("^images", mlessvidid[3]))) or (re.match("^live", mlessvidid[1]) and len(mlessvidid)==3 and (re.match("^images", mlessvidid[2]) or re.match("^videos", mlessvidid[2]))) or (re.match("^images", mlessvidid[1]) and len(mlessvidid)==3 and (re.match("^favorited", mlessvidid[2]) or re.match("^viewed", mlessvidid[2]) or re.match("^commented", mlessvidid[2]) or re.match("^popular", mlessvidid[2]))) or (re.match("^videos", mlessvidid[1]) and len(mlessvidid)==3 and (re.match("^favorited", mlessvidid[2]) or re.match("^viewed", mlessvidid[2]) or re.match("^commented", mlessvidid[2]) or re.match("^popular", mlessvidid[2])))):
128 geturls_text = geturls_opener.open("http://motherless.com"+mlessvid+"?page=1");
129 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
130 strbuf = StringIO.StringIO(geturls_text.read());
131 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
132 out_text = gzstrbuf.read()[:];
133 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
134 out_text = geturls_text.read()[:];
135 out_text = re.sub(re.escape("http://motherless.com"), "", out_text);
136 out_text = re.sub(re.escape("http://www.motherless.com"), "", out_text);
137 regex_ptext = re.escape("class=\"pop\" rel=\"")+"([0-9]+)"+re.escape("\">")+"([0-9]+)"+re.escape("</a>");
138 page_text = re.findall(regex_ptext, out_text);
139 try:
140 numpages = int(page_text[-1][0]);
141 except IndexError:
142 numpages = 1;
143 curpage = 1;
144 while(curpage<=numpages):
145 if(curpage>1):
146 geturls_text = geturls_opener.open("http://motherless.com"+mlessvid+"?page="+str(curpage));
147 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
148 strbuf = StringIO.StringIO(geturls_text.read());
149 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
150 out_text = gzstrbuf.read()[:];
151 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
152 out_text = geturls_text.read()[:];
153 out_text = re.sub(re.escape("http://motherless.com"), "", out_text);
154 out_text = re.sub(re.escape("http://www.motherless.com"), "", out_text);
155 if(re.match("^V", mlessvidid[1])):
156 out_text = re.sub(re.escape("class=\"img-container\" target=\"_self\""), "title=\"motherless link\"", out_text);
157 out_text = re.sub(re.escape("class=\"pop plain\" target=\"_blank\""), "title=\"motherless link\"", out_text);
158 regex_text = re.escape("<a href=\"")+"([\w\/]+)"+re.escape("\" title=\"motherless link\">");
159 if(not re.match("^V", mlessvidid[1])):
160 regex_text = re.escape("")+"([\w\/]+)"+re.escape("\" class=\"img-container\" target=\"_self\">");
161 post_text = re.findall(regex_text, out_text);
162 numurls = len(post_text);
163 cururl = 0;
164 while(cururl<numurls):
165 mlessurllist.append(post_text[cururl]);
166 cururl = cururl + 1;
167 curpage = curpage + 1;
168 if((re.match("^G", mlessvidid[1]) and len(mlessvidid)==3 and re.match("([0-9A-F]+)", mlessvidid[2])) or (len(mlessvidid)==2 and re.match("([0-9A-F]+)", mlessvidid[1]))):
169 mlessurllist.append(mlessvid);
170 numlist = len(mlessurllist);
171 curlurl = 0;
172 while(curlurl<numlist):
173 geturls_text = geturls_opener.open("http://motherless.com"+mlessurllist[curlurl]);
174 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
175 strbuf = StringIO.StringIO(geturls_text.read());
176 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
177 subout_text = gzstrbuf.read()[:];
178 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
179 subout_text = geturls_text.read()[:];
180 subout_text = re.sub(re.escape("http://motherless.com"), "", subout_text);
181 subout_text = re.sub(re.escape("http://www.motherless.com"), "", subout_text);
182 regex_title = re.escape("<title>")+"(.*)"+re.escape("</title>");
183 title_text = re.findall(regex_title, subout_text);
184 mlesstitle = re.sub(re.escape(" - MOTHERLESS.COM"), "", title_text[0]);
185 regex_thumb = re.escape("src=&quot;")+"(.*)"+re.escape("&quot;");
186 thumb_text = re.findall(regex_thumb, subout_text);
187 mlessthumb = thumb_text[0];
188 regex_text = re.escape("__fileurl = '")+"(.*)"+re.escape("';");
189 post_text = re.findall(regex_text, subout_text);
190 regex_img = re.escape("<meta property=\"og:image\" content=\"")+"(.*)"+re.escape("\">");
191 img_text = re.findall(regex_img, subout_text);
192 mlessimg = img_text[0];
193 regex_altimg = re.escape("<link rel=\"image_src\" type=\"image/")+"(.*)"+re.escape("\" href=\"")+"(.*)"+re.escape("\">");
194 altimg_text = re.findall(regex_altimg, subout_text);
195 mlessaltimg = altimg_text[0][1];
196 mlessid = re.sub("^"+re.escape("/"), "", mlessurllist[curlurl]);
197 if(post_text>0):
198 mlesslink = post_text[0];
199 mlessext = os.path.splitext(urlparse.urlparse(mlesslink).path)[1];
200 mlessext = mlessext.replace(".", "");
201 mlessext = mlessext.lower();
202 mlessfname = urlparse.urlsplit(mlesslink).path.split('/')[-1];
203 if(not mlessext=="mp4" and not mlessext=="flv"):
204 imginfo = {};
205 regex_ii_dimensions = re.escape("style=\"width: ")+"([0-9]+)"+re.escape("px; height: ")+"([0-9]+)"+re.escape("px; border: none;\"");
206 post_ii_dimensions = re.findall(regex_ii_dimensions, subout_text);
207 post_ii_width = post_ii_dimensions[0][0];
208 post_ii_height = post_ii_dimensions[0][1];
209 imginfo = {"height": int(post_ii_width), "width": int(post_ii_height)};
210 if(mlessext=="mp4" or mlessext=="flv"):
211 vidinfo = {};
212 mlesslink = mlesslink+"?start=0";
213 regex_vi_file = re.escape("\"file\" : \"")+"(.*)"+re.escape("\",");
214 post_vi_file = re.findall(regex_vi_file, subout_text);
215 regex_vi_image = re.escape("\"image\" : \"")+"(.*)"+re.escape("\",");
216 post_vi_image = re.findall(regex_vi_image, subout_text);
217 regex_vi_height = re.escape("\"height\" : ")+"([0-9]+)"+re.escape(",");
218 post_vi_height = re.findall(regex_vi_height, subout_text);
219 regex_vi_width = re.escape("\"width\" : ")+"([0-9]+)"+re.escape(",");
220 post_vi_width = re.findall(regex_vi_width, subout_text);
221 regex_vi_filethumb = re.escape("\"file\": ")+"(.*)"+re.escape(",");
222 post_vi_filethumb = re.findall(regex_vi_filethumb, subout_text);
223 regex_vi_kind = re.escape("\"kind\": \"")+"(.*)"+re.escape("\"");
224 post_vi_kind = re.findall(regex_vi_kind, subout_text);
225 vidinfo = {"file": post_vi_file[0], "image": post_vi_image[0], "height": int(post_vi_height[0]), "width": int(post_vi_width[0]), "filethumb": post_vi_filethumb[0], "kind": post_vi_kind[0]};
226 if(getargs.get_id==True):
227 print(mlessid);
228 if(getargs.get_title==True):
229 print(mlesstitle);
230 if(getargs.get_format==True):
231 print(mlessext);
232 if(getargs.get_filename==True):
233 print(mlessfname);
234 if(getargs.get_thumbnail==True):
235 print(mlessthumb);
236 if(mlessext=="mp4" or mlessext=="flv"):
237 print(mlessimg);
238 if(getargs.get_url==True or (getargs.get_id==False and getargs.get_title==False and getargs.get_format==False and getargs.get_filename==False and getargs.get_thumbnail==False)):
239 print(mlesslink);
240 if(curlurl<(numlist - 1)):
241 time.sleep(per_url_sleep);
242 curlurl = curlurl + 1;
243 if(curusrgal<(numusrgal - 1)):
244 time.sleep(per_gal_sleep);
245 curusrgal = curusrgal + 1;
246 cururlarg = cururlarg + 1;
249 getvidurls_cj = cookielib.CookieJar();
250 getvidurls_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(getvidurls_cj));
251 getvidurls_opener.addheaders = [("Referer", getargs.referer+mlessvid), ("User-Agent", fakeua), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en-CA,en-GB,en-UK,en-AU,en-NZ,en-ZA,en;q=0.5"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")];
252 getvidurls_text = getvidurls_opener.open(mlesslink);
253 def chunk_report(bytes_so_far, chunk_size, total_size):
254 percent = float(bytes_so_far) / total_size;
255 percent = round(percent*100, 2);
256 sys.stdout.write("Downloaded %d of %d bytes (%0.2f%%)\r" %
257 (bytes_so_far, total_size, percent));
258 if bytes_so_far >= total_size:
259 sys.stdout.write("\n");
260 def chunk_read(response, chunk_size=8192, report_hook=None):
261 total_size = response.info().getheader("Content-Length").strip();
262 total_size = int(total_size);
263 bytes_so_far = 0;
264 while 1:
265 chunk = response.read(chunk_size);
266 bytes_so_far += len(chunk);
267 if not chunk:
268 break;
269 if report_hook:
270 report_hook(bytes_so_far, chunk_size, total_size);
271 return bytes_so_far;
272 chunk_read(getvidurls_text, report_hook=chunk_report);
273 vidfile = open(os.getcwd()+os.sep+os.path.basename(urllib2.urlparse.urlsplit(mlesslink)[2]), "wb");
274 vidfile.write(getvidurls_text.read());
275 vidfile.close();