4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2013 Cool Dude 2k - http://idb.berlios.de/
13 Copyright 2013 Game Maker 2k - http://intdb.sourceforge.net/
14 Copyright 2013 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
16 $FileInfo: motherless-dl.py - Last Update: 10/07/2013 Ver. 1.4.5 RC 2 - Author: cooldude2k $
19 import re
, os
, sys
, urllib
, urllib2
, cookielib
, StringIO
, gzip
, time
, datetime
, argparse
, urlparse
;
21 __version_info__
= (1, 4, 5, "RC 2");
22 if(__version_info__
[3]!=None):
23 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2])+" "+str(__version_info__
[3]);
24 if(__version_info__
[3]==None):
25 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2]);
27 parser
= argparse
.ArgumentParser();
28 parser
.add_argument("url", nargs
="*", help="motherless url");
29 parser
.add_argument("--user-agent", nargs
="?", default
="Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20100101 Firefox/24.0", help="specify a custom user agent");
30 parser
.add_argument("--referer", nargs
="?", default
="http://motherless.com/", help="specify a custom referer, use if the video access");
31 parser
.add_argument("--verbose", action
='store_true', help="print various debugging information");
32 parser
.add_argument("--dump-user-agent", action
='store_true', help="display the current browser identification");
33 parser
.add_argument("--version", action
='store_true', help="print program version and exit");
34 parser
.add_argument("--update", action
='store_true', help="update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)");
35 getargs
= parser
.parse_args();
36 if(getargs
.version
==True):
39 if(getargs
.dump_user_agent
==True):
40 print(getargs
.user_agent
);
42 if(len(getargs
.url
)==0):
46 fakeua
= getargs
.user_agent
;
47 geturls_cj
= cookielib
.CookieJar();
48 geturls_opener
= urllib2
.build_opener(urllib2
.HTTPCookieProcessor(geturls_cj
));
49 geturls_opener
.addheaders
= [("Referer", getargs
.referer
), ("User-Agent", fakeua
), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en-CA,en-GB,en-UK,en-AU,en-NZ,en-ZA,en;q=0.5"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")];
53 numurlarg
= len(getargs
.url
);
55 while(cururlarg
<numurlarg
):
56 mlessvid
= getargs
.url
[cururlarg
];
57 mlessvid
= re
.sub(re
.escape("http://motherless.com/"), "", mlessvid
);
58 mlessvid
= re
.sub(re
.escape("http://www.motherless.com/"), "", mlessvid
);
59 mlessvid
= re
.sub(re
.escape("motherless.com/"), "", mlessvid
);
60 mlessvid
= re
.sub(re
.escape("www.motherless.com/"), "", mlessvid
);
61 mlessvid
= re
.sub("^"+re
.escape("/"), "", mlessvid
);
62 mlessvid
= "http://motherless.com/"+mlessvid
;
63 mregex_text
= re
.escape("http://motherless.com/")+"([\w\/]+)";
64 if(re
.findall(mregex_text
, mlessvid
)):
65 mlessvid
= re
.findall(mregex_text
, mlessvid
);
66 mlessvid
= "/"+mlessvid
[0];
67 mlessvidid
= urlparse
.urlparse(mlessvid
).path
.split('/');
69 if((re
.match("^random", mlessvidid
[1]) and len(mlessvidid
)==2) or (re
.match("^random", mlessvidid
[1]) and len(mlessvidid
)==3) and (re
.match("^image", mlessvidid
[2]) or re
.match("^video", mlessvidid
[2]))):
70 geturls_text
= geturls_opener
.open("http://motherless.com"+mlessvid
);
71 mlessvid
= geturls_text
.geturl();
72 if(re
.findall(mregex_text
, mlessvid
)):
73 mlessvid
= re
.findall(mregex_text
, mlessvid
);
74 mlessvid
= mlessvid
[0];
75 if((re
.match("^galleries", mlessvidid
[1]) and len(mlessvidid
)==4) or (re
.match("^f", mlessvidid
[1]) and re
.match("^galleries", mlessvidid
[2]) and len(mlessvidid
)==4)):
76 geturls_text
= geturls_opener
.open("http://motherless.com"+mlessvid
+"?page=1");
77 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
78 strbuf
= StringIO
.StringIO(geturls_text
.read());
79 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
80 out_text
= gzstrbuf
.read()[:];
81 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
82 out_text
= geturls_text
.read()[:];
83 out_text
= re
.sub(re
.escape("http://motherless.com"), "", out_text
);
84 out_text
= re
.sub(re
.escape("http://www.motherless.com"), "", out_text
);
85 regex_ptext
= re
.escape("class=\"pop\" rel=\"")+"([0-9]+)"+re
.escape("\">")+"([0-9]+)"+re
.escape("</a>");
86 page_text
= re
.findall(regex_ptext
, out_text
);
88 numpages
= int(page_text
[-1][0]);
92 while(curpage
<=numpages
):
94 geturls_text
= geturls_opener
.open("http://motherless.com/"+mlessvid
+"?page="+str(curpage
));
95 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
96 strbuf
= StringIO
.StringIO(geturls_text
.read());
97 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
98 out_text
= gzstrbuf
.read()[:];
99 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
100 out_text
= geturls_text
.read()[:];
101 out_text
= re
.sub(re
.escape("http://motherless.com"), "", out_text
);
102 out_text
= re
.sub(re
.escape("http://www.motherless.com"), "", out_text
);
103 regex_text
= re
.escape("")+"([\w\/]+)"+re
.escape("\" class=\"img-container\" target=\"_self\">");
104 post_text
= re
.findall(regex_text
, out_text
);
105 numgal
= len(post_text
);
107 while(curgal
<numgal
):
108 mlessgallist
.append(post_text
[curgal
]);
110 curpage
= curpage
+ 1;
111 if(not re
.match("^galleries", mlessvidid
[1]) or (re
.match("^galleries", mlessvidid
[1]) and len(mlessvidid
)<4) or (re
.match("^galleries", mlessvidid
[1]) and len(mlessvidid
)>5)):
112 mlessgallist
.append(mlessvid
);
113 numusrgal
= len(mlessgallist
);
115 while(curusrgal
<numusrgal
):
116 mlessvid
= mlessgallist
[curusrgal
];
117 if(not re
.match("^\/", mlessvid
)):
118 mlessvid
= "/"+mlessvid
;
119 mlessvidid
= urlparse
.urlparse(mlessvid
).path
.split('/');
121 if((re
.match("^G", mlessvidid
[1]) and len(mlessvidid
)==2) or (re
.match("^V", mlessvidid
[1]) and len(mlessvidid
)==2) or (re
.match("^g", mlessvidid
[1]) and len(mlessvidid
)==3) or (re
.match("^f", mlessvidid
[1]) and len(mlessvidid
)==4 and (re
.match("^videos", mlessvidid
[3]) or re
.match("^images", mlessvidid
[3]))) or (re
.match("^live", mlessvidid
[1]) and len(mlessvidid
)==3 and (re
.match("^images", mlessvidid
[2]) or re
.match("^videos", mlessvidid
[2]))) or (re
.match("^images", mlessvidid
[1]) and len(mlessvidid
)==3 and (re
.match("^favorited", mlessvidid
[2]) or re
.match("^viewed", mlessvidid
[2]) or re
.match("^commented", mlessvidid
[2]) or re
.match("^popular", mlessvidid
[2]))) or (re
.match("^videos", mlessvidid
[1]) and len(mlessvidid
)==3 and (re
.match("^favorited", mlessvidid
[2]) or re
.match("^viewed", mlessvidid
[2]) or re
.match("^commented", mlessvidid
[2]) or re
.match("^popular", mlessvidid
[2])))):
122 geturls_text
= geturls_opener
.open("http://motherless.com"+mlessvid
+"?page=1");
123 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
124 strbuf
= StringIO
.StringIO(geturls_text
.read());
125 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
126 out_text
= gzstrbuf
.read()[:];
127 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
128 out_text
= geturls_text
.read()[:];
129 out_text
= re
.sub(re
.escape("http://motherless.com"), "", out_text
);
130 out_text
= re
.sub(re
.escape("http://www.motherless.com"), "", out_text
);
131 regex_ptext
= re
.escape("class=\"pop\" rel=\"")+"([0-9]+)"+re
.escape("\">")+"([0-9]+)"+re
.escape("</a>");
132 page_text
= re
.findall(regex_ptext
, out_text
);
134 numpages
= int(page_text
[-1][0]);
138 while(curpage
<=numpages
):
140 geturls_text
= geturls_opener
.open("http://motherless.com"+mlessvid
+"?page="+str(curpage
));
141 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
142 strbuf
= StringIO
.StringIO(geturls_text
.read());
143 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
144 out_text
= gzstrbuf
.read()[:];
145 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
146 out_text
= geturls_text
.read()[:];
147 out_text
= re
.sub(re
.escape("http://motherless.com"), "", out_text
);
148 out_text
= re
.sub(re
.escape("http://www.motherless.com"), "", out_text
);
149 if(re
.match("^V", mlessvidid
[1])):
150 out_text
= re
.sub(re
.escape("class=\"img-container\" target=\"_self\""), "title=\"motherless link\"", out_text
);
151 out_text
= re
.sub(re
.escape("class=\"pop plain\" target=\"_blank\""), "title=\"motherless link\"", out_text
);
152 regex_text
= re
.escape("<a href=\"")+"([\w\/]+)"+re
.escape("\" title=\"motherless link\">");
153 if(not re
.match("^V", mlessvidid
[1])):
154 regex_text
= re
.escape("")+"([\w\/]+)"+re
.escape("\" class=\"img-container\" target=\"_self\">");
155 post_text
= re
.findall(regex_text
, out_text
);
156 numurls
= len(post_text
);
158 while(cururl
<numurls
):
159 mlessurllist
.append(post_text
[cururl
]);
161 curpage
= curpage
+ 1;
162 if((re
.match("^G", mlessvidid
[1]) and len(mlessvidid
)==3 and re
.match("([0-9A-F]+)", mlessvidid
[2])) or (len(mlessvidid
)==2 and re
.match("([0-9A-F]+)", mlessvidid
[1]))):
163 mlessurllist
.append(mlessvid
);
164 numlist
= len(mlessurllist
);
166 while(curlurl
<numlist
):
167 geturls_text
= geturls_opener
.open("http://motherless.com"+mlessurllist
[curlurl
]);
168 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
169 strbuf
= StringIO
.StringIO(geturls_text
.read());
170 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
171 subout_text
= gzstrbuf
.read()[:];
172 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
173 subout_text
= geturls_text
.read()[:];
174 subout_text
= re
.sub(re
.escape("http://motherless.com"), "", subout_text
);
175 subout_text
= re
.sub(re
.escape("http://www.motherless.com"), "", subout_text
);
176 regex_text
= re
.escape("__fileurl = '")+"(.*)"+re
.escape("';");
177 post_text
= re
.findall(regex_text
, subout_text
);
179 mlesslink
= post_text
[0];
180 mlessext
= os
.path
.splitext(urlparse
.urlparse(mlesslink
).path
)[1];
181 mlessext
= mlessext
.replace(".", "");
182 mlessext
= mlessext
.lower();
183 if(mlessext
=="mp4" or mlessext
=="flv"):
184 mlesslink
= mlesslink
+"?start=0";
186 if(curlurl
<(numlist
- 1)):
187 time
.sleep(per_url_sleep
);
188 curlurl
= curlurl
+ 1;
189 if(curusrgal
<(numusrgal
- 1)):
190 time
.sleep(per_gal_sleep
);
191 curusrgal
= curusrgal
+ 1;
192 cururlarg
= cururlarg
+ 1;
195 getvidurls_cj = cookielib.CookieJar();
196 getvidurls_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(getvidurls_cj));
197 getvidurls_opener.addheaders = [("Referer", getargs.referer+mlessvid), ("User-Agent", fakeua), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en-CA,en-GB,en-UK,en-AU,en-NZ,en-ZA,en;q=0.5"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")];
198 getvidurls_text = getvidurls_opener.open(mlesslink);
199 def chunk_report(bytes_so_far, chunk_size, total_size):
200 percent = float(bytes_so_far) / total_size;
201 percent = round(percent*100, 2);
202 sys.stdout.write("Downloaded %d of %d bytes (%0.2f%%)\r" %
203 (bytes_so_far, total_size, percent));
204 if bytes_so_far >= total_size:
205 sys.stdout.write("\n");
206 def chunk_read(response, chunk_size=8192, report_hook=None):
207 total_size = response.info().getheader("Content-Length").strip();
208 total_size = int(total_size);
211 chunk = response.read(chunk_size);
212 bytes_so_far += len(chunk);
216 report_hook(bytes_so_far, chunk_size, total_size);
218 chunk_read(getvidurls_text, report_hook=chunk_report);
219 vidfile = open(os.getcwd()+os.sep+os.path.basename(urllib2.urlparse.urlsplit(mlesslink)[2]), "wb");
220 vidfile.write(getvidurls_text.read());