4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2013 Cool Dude 2k - http://idb.berlios.de/
13 Copyright 2013 Game Maker 2k - http://intdb.sourceforge.net/
14 Copyright 2013 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
16 $FileInfo: motherless-dl.py - Last Update: 10/09/2013 Ver. 1.6.0 RC 1 - Author: cooldude2k $
19 import re
, os
, sys
, urllib
, urllib2
, cookielib
, StringIO
, gzip
, time
, datetime
, argparse
, urlparse
;
20 '''sys.tracebacklimit = 0;'''
22 __version_info__
= (1, 6, 0, "RC 1");
23 if(__version_info__
[3]!=None):
24 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2])+" "+str(__version_info__
[3]);
25 if(__version_info__
[3]==None):
26 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2]);
28 parser
= argparse
.ArgumentParser();
29 parser
.add_argument("url", nargs
="*", help="motherless url");
30 parser
.add_argument("--version", action
='store_true', help="print program version and exit");
31 parser
.add_argument("--update", action
='store_true', help="update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)");
32 parser
.add_argument("--dump-user-agent", action
='store_true', help="display the current browser identification");
33 parser
.add_argument("--user-agent", nargs
="?", default
="Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20100101 Firefox/24.0", help="specify a custom user agent");
34 parser
.add_argument("--referer", nargs
="?", default
="http://motherless.com/", help="specify a custom referer, use if the video access");
35 parser
.add_argument("--id", action
='store_true', help="use only video ID in file name");
36 parser
.add_argument("--get-url", action
='store_true', help="simulate, quiet but print URL");
37 parser
.add_argument("--get-pageurl", action
='store_true', help="simulate, quiet but print URL");
38 parser
.add_argument("--get-title", action
='store_true', help="simulate, quiet but print title");
39 parser
.add_argument("--get-id", action
='store_true', help="simulate, quiet but print id");
40 parser
.add_argument("--get-thumbnail", action
='store_true', help="simulate, quiet but print thumbnail URL");
41 parser
.add_argument("--get-filename", action
='store_true', help="simulate, quiet but print output filename");
42 parser
.add_argument("--get-format", action
='store_true', help="simulate, quiet but print output format");
43 parser
.add_argument("--get-username", action
='store_true', help="simulate, quiet but print uploaders username");
44 parser
.add_argument("--verbose", action
='store_true', help="print various debugging information");
45 getargs
= parser
.parse_args();
46 if(getargs
.version
==True):
49 if(getargs
.dump_user_agent
==True):
50 print(getargs
.user_agent
);
52 if(len(getargs
.url
)==0):
55 def motherless_dl(mtlessgetargs
=vars(getargs
)):
56 fakeua
= mtlessgetargs
["user_agent"];
57 geturls_cj
= cookielib
.CookieJar();
58 geturls_opener
= urllib2
.build_opener(urllib2
.HTTPCookieProcessor(geturls_cj
));
59 geturls_opener
.addheaders
= [("Referer", mtlessgetargs
["referer"]), ("User-Agent", fakeua
), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en-CA,en-GB,en-UK,en-AU,en-NZ,en-ZA,en;q=0.5"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")];
62 numurlarg
= len(mtlessgetargs
["url"]);
64 while(cururlarg
<numurlarg
):
65 mlessvid
= mtlessgetargs
["url"][cururlarg
];
66 mlessvid
= re
.sub(re
.escape("http://motherless.com/"), "", mlessvid
);
67 mlessvid
= re
.sub(re
.escape("http://www.motherless.com/"), "", mlessvid
);
68 mlessvid
= re
.sub(re
.escape("motherless.com/"), "", mlessvid
);
69 mlessvid
= re
.sub(re
.escape("www.motherless.com/"), "", mlessvid
);
70 mlessvid
= re
.sub("^"+re
.escape("/"), "", mlessvid
);
71 mlessvid
= "http://motherless.com/"+mlessvid
;
72 mregex_text
= re
.escape("http://motherless.com/")+"([\w\/\?\&\=]+)";
73 if(re
.findall(mregex_text
, mlessvid
)):
74 mlessvid
= re
.findall(mregex_text
, mlessvid
);
75 mlessvid
= "/"+mlessvid
[0];
76 mlessvidqstr
= urlparse
.parse_qs(urlparse
.urlparse(mlessvid
).query
);
77 mlessvidid
= urlparse
.urlparse(mlessvid
).path
.split("/");
79 if((re
.match("^random", mlessvidid
[1]) and len(mlessvidid
)==2) or (re
.match("^random", mlessvidid
[1]) and len(mlessvidid
)==3) and (re
.match("^image", mlessvidid
[2]) or re
.match("^video", mlessvidid
[2]))):
80 geturls_text
= geturls_opener
.open("http://motherless.com"+mlessvid
);
81 mlessvid
= geturls_text
.geturl();
82 if(re
.findall(mregex_text
, mlessvid
)):
83 mlessvid
= re
.findall(mregex_text
, mlessvid
);
84 mlessvid
= mlessvid
[0];
85 if((re
.match("^galleries", mlessvidid
[1]) and len(mlessvidid
)==4) or (re
.match("^f", mlessvidid
[1]) and re
.match("^galleries", mlessvidid
[2]) and len(mlessvidid
)==4) or (re
.match("^term", mlessvidid
[1]) and re
.match("^galleries", mlessvidid
[2]) and len(mlessvidid
)==4)):
86 geturls_text
= geturls_opener
.open("http://motherless.com"+mlessvid
+"?page=1");
87 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
88 strbuf
= StringIO
.StringIO(geturls_text
.read());
89 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
90 out_text
= gzstrbuf
.read()[:];
91 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
92 out_text
= geturls_text
.read()[:];
93 out_text
= re
.sub(re
.escape("http://motherless.com"), "", out_text
);
94 out_text
= re
.sub(re
.escape("http://www.motherless.com"), "", out_text
);
95 regex_ptext
= re
.escape("class=\"pop\" rel=\"")+"([0-9]+)"+re
.escape("\">")+"([0-9]+)"+re
.escape("</a>");
96 page_text
= re
.findall(regex_ptext
, out_text
);
98 numpages
= int(page_text
[-1][0]);
102 while(curpage
<=numpages
):
104 geturls_text
= geturls_opener
.open("http://motherless.com/"+mlessvid
+"?page="+str(curpage
));
105 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
106 strbuf
= StringIO
.StringIO(geturls_text
.read());
107 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
108 out_text
= gzstrbuf
.read()[:];
109 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
110 out_text
= geturls_text
.read()[:];
111 out_text
= re
.sub(re
.escape("http://motherless.com"), "", out_text
);
112 out_text
= re
.sub(re
.escape("http://www.motherless.com"), "", out_text
);
113 regex_text
= re
.escape("")+"([\w\/]+)"+re
.escape("\" class=\"img-container\" target=\"_self\">");
114 post_text
= re
.findall(regex_text
, out_text
);
115 numgal
= len(post_text
);
117 while(curgal
<numgal
):
118 mlessgallist
.append(post_text
[curgal
]);
120 curpage
= curpage
+ 1;
121 if(not re
.match("^galleries", mlessvidid
[1]) or (re
.match("^galleries", mlessvidid
[1]) and len(mlessvidid
)<4) or (re
.match("^galleries", mlessvidid
[1]) and len(mlessvidid
)>5)):
122 mlessgallist
.append(mlessvid
);
123 numusrgal
= len(mlessgallist
);
125 while(curusrgal
<numusrgal
):
126 mlessvid
= mlessgallist
[curusrgal
];
127 if(not re
.match("^\/", mlessvid
)):
128 mlessvid
= "/"+mlessvid
;
129 mlessvidqstr
= urlparse
.parse_qs(urlparse
.urlparse(mlessvid
).query
);
130 mlessvidid
= urlparse
.urlparse(mlessvid
).path
.split("/");
132 if((re
.match("^G", mlessvidid
[1]) and len(mlessvidid
)==2) or (re
.match("^H", mlessvidid
[1]) and len(mlessvidid
)==2) or (re
.match("^V", mlessvidid
[1]) and len(mlessvidid
)==2) or (re
.match("^live", mlessvidid
[1]) and len(mlessvidid
)==2) or (re
.match("^g", mlessvidid
[1]) and len(mlessvidid
)==3) or (re
.match("^u", mlessvidid
[1]) and len(mlessvidid
)==3) or (re
.match("^term", mlessvidid
[1]) and (re
.match("^videos", mlessvidid
[2]) or re
.match("^images", mlessvidid
[2])) and len(mlessvidid
)==4) or (re
.match("^f", mlessvidid
[1]) and len(mlessvidid
)==4 and (re
.match("^videos", mlessvidid
[3]) or re
.match("^images", mlessvidid
[3]))) or (re
.match("^live", mlessvidid
[1]) and len(mlessvidid
)==3 and (re
.match("^images", mlessvidid
[2]) or re
.match("^videos", mlessvidid
[2]))) or (re
.match("^images", mlessvidid
[1]) and len(mlessvidid
)==3 and (re
.match("^favorited", mlessvidid
[2]) or re
.match("^viewed", mlessvidid
[2]) or re
.match("^commented", mlessvidid
[2]) or re
.match("^popular", mlessvidid
[2]))) or (re
.match("^videos", mlessvidid
[1]) and len(mlessvidid
)==3 and (re
.match("^favorited", mlessvidid
[2]) or re
.match("^viewed", mlessvidid
[2]) or re
.match("^commented", mlessvidid
[2]) or re
.match("^popular", mlessvidid
[2])))):
135 if(re
.match("^u", mlessvidid
[1]) and len(mlessvidid
)==3):
137 if(mlessvidqstr
["t"][0]=="i" or mlessvidqstr
["t"][0]=="v"):
138 tvaradd
= "&t="+mlessvidqstr
["t"][0];
144 geturls_text
= geturls_opener
.open("http://motherless.com"+mlessvid
+"?page=1"+tvaradd
);
145 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
146 strbuf
= StringIO
.StringIO(geturls_text
.read());
147 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
148 out_text
= gzstrbuf
.read()[:];
149 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
150 out_text
= geturls_text
.read()[:];
151 out_text
= re
.sub(re
.escape("http://motherless.com"), "", out_text
);
152 out_text
= re
.sub(re
.escape("http://www.motherless.com"), "", out_text
);
153 regex_ptext
= re
.escape("class=\"pop\" rel=\"")+"([0-9]+)"+re
.escape("\">")+"([0-9]+)"+re
.escape("</a>");
154 page_text
= re
.findall(regex_ptext
, out_text
);
156 numpages
= int(page_text
[-1][0]);
160 while(curpage
<=numpages
):
162 geturls_text
= geturls_opener
.open("http://motherless.com"+mlessvid
+"?page="+str(curpage
)+tvaradd
);
163 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
164 strbuf
= StringIO
.StringIO(geturls_text
.read());
165 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
166 out_text
= gzstrbuf
.read()[:];
167 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
168 out_text
= geturls_text
.read()[:];
169 out_text
= re
.sub(re
.escape("http://motherless.com"), "", out_text
);
170 out_text
= re
.sub(re
.escape("http://www.motherless.com"), "", out_text
);
171 if(re
.match("^V", mlessvidid
[1])):
172 out_text
= re
.sub(re
.escape("class=\"img-container\" target=\"_self\""), "title=\"motherless link\"", out_text
);
173 out_text
= re
.sub(re
.escape("class=\"pop plain\" target=\"_blank\""), "title=\"motherless link\"", out_text
);
174 regex_text
= re
.escape("<a href=\"")+"([\w\/]+)"+re
.escape("\" title=\"motherless link\">");
175 if(not re
.match("^V", mlessvidid
[1])):
176 regex_text
= re
.escape("")+"([\w\/]+)"+re
.escape("\" class=\"img-container\" target=\"_self\">");
177 post_text
= re
.findall(regex_text
, out_text
);
178 numurls
= len(post_text
);
180 while(cururl
<numurls
):
181 mlessurllist
.append(post_text
[cururl
]);
183 curpage
= curpage
+ 1;
184 if((re
.match("^G", mlessvidid
[1]) and len(mlessvidid
)==3 and re
.match("([0-9A-F]+)", mlessvidid
[2])) or (len(mlessvidid
)==2 and re
.match("([0-9A-F]+)", mlessvidid
[1]))):
185 mlessurllist
.append(mlessvid
);
186 numlist
= len(mlessurllist
);
189 while(curlurl
<numlist
):
190 geturls_text
= geturls_opener
.open("http://motherless.com"+mlessurllist
[curlurl
]);
191 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
192 strbuf
= StringIO
.StringIO(geturls_text
.read());
193 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
194 subout_text
= gzstrbuf
.read()[:];
195 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
196 subout_text
= geturls_text
.read()[:];
197 subout_text
= re
.sub(re
.escape("http://motherless.com"), "", subout_text
);
198 subout_text
= re
.sub(re
.escape("http://www.motherless.com"), "", subout_text
);
199 regex_title
= re
.escape("<title>")+"(.*)"+re
.escape("</title>");
200 title_text
= re
.findall(regex_title
, subout_text
);
201 mlesstitle
= re
.sub(re
.escape(" - MOTHERLESS.COM"), "", title_text
[0]);
202 regex_thumb
= re
.escape("src="")+"(.*)"+re
.escape(""");
203 thumb_text
= re
.findall(regex_thumb
, subout_text
);
204 mlessthumb
= thumb_text
[0];
205 regex_text
= re
.escape("__fileurl = '")+"(.*)"+re
.escape("';");
206 post_text
= re
.findall(regex_text
, subout_text
);
207 regex_img
= re
.escape("<meta property=\"og:image\" content=\"")+"(.*)"+re
.escape("\">");
208 img_text
= re
.findall(regex_img
, subout_text
);
209 mlessimg
= img_text
[0];
210 regex_altimg
= re
.escape("<link rel=\"image_src\" type=\"image/")+"(.*)"+re
.escape("\" href=\"")+"(.*)"+re
.escape("\">");
211 altimg_text
= re
.findall(regex_altimg
, subout_text
);
212 mlessaltimg
= altimg_text
[0][1];
213 regex_usrname
= re
.escape("<a href=\"/u/")+"([\w]+)"+re
.escape("\" class=\"pop plain thumb-member-link-uploads\">Uploads</a>");
214 usrname_text
= re
.findall(regex_usrname
, subout_text
);
215 mlessusrname
= usrname_text
[0];
216 mlessid
= re
.sub("^"+re
.escape("/"), "", mlessurllist
[curlurl
]);
217 mlesspurl
= "http://motherless.com"+mlessurllist
[curlurl
];
219 mlesslink
= post_text
[0];
220 mlessext
= os
.path
.splitext(urlparse
.urlparse(mlesslink
).path
)[1];
221 mlessext
= mlessext
.replace(".", "");
222 mlessext
= mlessext
.lower();
223 if(mtlessgetargs
["id"]==False):
224 mlessfname
= urlparse
.urlsplit(mlesslink
).path
.split("/")[-1];
225 if(mtlessgetargs
["id"]==True):
226 mlessfname
= re
.sub(re
.escape("/"), "_", mlessid
)+"."+mlessext
;
227 if(not mlessext
=="mp4" and not mlessext
=="flv"):
229 regex_ii_dimensions
= re
.escape("style=\"width: ")+"([0-9]+)"+re
.escape("px; height: ")+"([0-9]+)"+re
.escape("px; border: none;\"");
230 post_ii_dimensions
= re
.findall(regex_ii_dimensions
, subout_text
);
231 post_ii_width
= post_ii_dimensions
[0][0];
232 post_ii_height
= post_ii_dimensions
[0][1];
233 imginfo
= {"height": int(post_ii_width
), "width": int(post_ii_height
)};
234 if(mlessext
=="mp4" or mlessext
=="flv"):
236 mlesslink
= mlesslink
+"?start=0";
237 regex_vi_file
= re
.escape("\"file\" : \"")+"(.*)"+re
.escape("\",");
238 post_vi_file
= re
.findall(regex_vi_file
, subout_text
);
239 regex_vi_image
= re
.escape("\"image\" : \"")+"(.*)"+re
.escape("\",");
240 post_vi_image
= re
.findall(regex_vi_image
, subout_text
);
241 regex_vi_height
= re
.escape("\"height\" : ")+"([0-9]+)"+re
.escape(",");
242 post_vi_height
= re
.findall(regex_vi_height
, subout_text
);
243 regex_vi_width
= re
.escape("\"width\" : ")+"([0-9]+)"+re
.escape(",");
244 post_vi_width
= re
.findall(regex_vi_width
, subout_text
);
245 regex_vi_filethumb
= re
.escape("\"file\": ")+"(.*)"+re
.escape(",");
246 post_vi_filethumb
= re
.findall(regex_vi_filethumb
, subout_text
);
247 regex_vi_kind
= re
.escape("\"kind\": \"")+"(.*)"+re
.escape("\"");
248 post_vi_kind
= re
.findall(regex_vi_kind
, subout_text
);
249 vidinfo
= {"file": post_vi_file
[0], "image": post_vi_image
[0], "height": int(post_vi_height
[0]), "width": int(post_vi_width
[0]), "filethumb": post_vi_filethumb
[0], "kind": post_vi_kind
[0]};
251 mlesslistitms
.update({"id": mlessid
});
252 mlesslistitms
.update({"title": mlesstitle
});
253 mlesslistitms
.update({"format": mlessext
});
254 mlesslistitms
.update({"filename": mlessfname
});
255 mlesslistitms
.update({"thumbnail": mlessthumb
});
256 if(not mlessext
=="mp4" and not mlessext
=="flv"):
257 mlesslistitms
.update({"vidpic": mlesslink
});
258 if(mlessext
=="mp4" or mlessext
=="flv"):
259 mlesslistitms
.update({"vidpic": mlessimg
});
260 mlesslistitms
.update({"username": mlessusrname
});
261 mlesslistitms
.update({"pageurl": mlesspurl
});
262 mlesslistitms
.update({"url": mlesslink
});
263 mlessoutlist
.append(mlesslistitms
);
264 if(curlurl
<(numlist
- 1)):
265 time
.sleep(per_url_sleep
);
266 curlurl
= curlurl
+ 1;
267 if(curusrgal
<(numusrgal
- 1)):
268 time
.sleep(per_gal_sleep
);
269 curusrgal
= curusrgal
+ 1;
270 cururlarg
= cururlarg
+ 1;
273 mtlesslinks
= motherless_dl();
274 mtlesslncount
= len(mtlesslinks
);
276 while(mtlesscurln
<mtlesslncount
):
277 if(getargs
.get_id
==True):
278 print(mtlesslinks
[mtlesscurln
]["id"]);
279 if(getargs
.get_title
==True):
280 print(mtlesslinks
[mtlesscurln
]["title"]);
281 if(getargs
.get_format
==True):
282 print(mtlesslinks
[mtlesscurln
]["format"]);
283 if(getargs
.get_filename
==True):
284 print(mtlesslinks
[mtlesscurln
]["filename"]);
285 if(getargs
.get_thumbnail
==True):
286 print(mtlesslinks
[mtlesscurln
]["thumbnail"]);
287 if(mtlesslinks
[mtlesscurln
]["format"]=="mp4" or mtlesslinks
[mtlesscurln
]["format"]=="flv"):
288 print(mtlesslinks
[mtlesscurln
]["vidpic"]);
289 if(getargs
.get_username
==True):
290 print(mtlesslinks
[mtlesscurln
]["username"]);
291 if(getargs
.get_pageurl
==True):
292 print(mtlesslinks
[mtlesscurln
]["pageurl"]);
293 if(getargs
.get_url
==True or (getargs
.get_id
==False and getargs
.get_title
==False and getargs
.get_format
==False and getargs
.get_filename
==False and getargs
.get_thumbnail
==False and getargs
.get_username
==False and getargs
.get_pageurl
==False)):
294 print(mtlesslinks
[mtlesscurln
]["url"]);
295 mtlesscurln
= mtlesscurln
+ 1;