4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2013 Cool Dude 2k - http://idb.berlios.de/
13 Copyright 2013 Game Maker 2k - http://intdb.sourceforge.net/
14 Copyright 2013 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
16 $FileInfo: motherless-dl.py - Last Update: 11/10/2013 Ver. 1.6.7 RC 1 - Author: cooldude2k $
19 from __future__
import absolute_import
, division
, print_function
35 if (__name__
== "__main__"):
36 sys
.tracebacklimit
= 0
37 __version_info__
= (1, 6, 5, "RC 3")
38 __version_date__
= "2013.10.23"
39 if (__version_info__
[3] is not None):
40 __version__
= str(__version_info__
[0]) + "." + str(__version_info__
[1]) + "." + str(
41 __version_info__
[2]) + " " + str(__version_info__
[3])
42 if (__version_info__
[3] is None):
43 __version__
= str(__version_info__
[
44 0]) + "." + str(__version_info__
[1]) + "." + str(__version_info__
[2])
46 parser
= argparse
.ArgumentParser(
47 description
="get urls of images/videos from motherless.com",
48 conflict_handler
="resolve",
50 parser
.add_argument("url", nargs
='*', help='motherless url')
51 parser
.add_argument('-v', '--version', action
='version', version
=__version__
)
52 parser
.add_argument("--pages-start", nargs
="*", help="start at page number")
53 parser
.add_argument("--pages-end", nargs
="*", help="end at page number")
57 help="update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)")
58 parser
.add_argument("--dump-user-agent", action
='store_true',
59 help="display the current browser identification")
63 default
="Mozilla/5.0 (Windows NT 7.0; rv:25.0) Gecko/20100101 Firefox/25.0",
64 help="specify a custom user agent")
65 parser
.add_argument("--referer", nargs
="?", default
="http://motherless.com/",
66 help="specify a custom referer, use if the video access")
67 parser
.add_argument("--proxy", nargs
="?", default
=None,
68 help="Use the specified HTTP/HTTPS proxy")
69 parser
.add_argument("--id", action
='store_true',
70 help="use only video ID in file name")
71 parser
.add_argument("--get-url", action
='store_true',
72 help="simulate, quiet but print URL")
73 parser
.add_argument("--get-pageurl", action
='store_true',
74 help="simulate, quiet but print URL")
75 parser
.add_argument("--get-title", action
='store_true',
76 help="simulate, quiet but print title")
77 parser
.add_argument("--get-posts", action
='store_true',
78 help="simulate, quiet but print user posts")
79 parser
.add_argument("--get-id", action
='store_true',
80 help="simulate, quiet but print id")
81 parser
.add_argument("--get-thumbnail", action
='store_true',
82 help="simulate, quiet but print thumbnail URL")
83 parser
.add_argument("--get-filename", action
='store_true',
84 help="simulate, quiet but print output filename")
85 parser
.add_argument("--get-format", action
='store_true',
86 help="simulate, quiet but print file format")
87 parser
.add_argument("--get-type", action
='store_true',
88 help="simulate, quiet but print file type")
89 parser
.add_argument("--get-username", action
='store_true',
90 help="simulate, quiet but print uploaders username")
91 parser
.add_argument("--get-bbcode", action
='store_true',
92 help="simulate, quiet but print bbcode")
93 parser
.add_argument("--get-html", action
='store_true',
94 help="simulate, quiet but print html code")
98 help="simulate, quiet but print dimensions (width x height)")
99 parser
.add_argument("--get-width", action
='store_true',
100 help="simulate, quiet but print width")
101 parser
.add_argument("--get-height", action
='store_true',
102 help="simulate, quiet but print height")
103 parser
.add_argument("--get-views", action
='store_true',
104 help="simulate, quiet but print number of views")
105 parser
.add_argument("--get-favorites", action
='store_true',
106 help="simulate, quiet but print number of favorites")
107 parser
.add_argument("--verbose", action
='store_true',
108 help="print various debugging information")
109 getargs
= parser
.parse_args()
111 from distutils
.version
import LooseVersion
as VerCheck
112 fakeua
= getargs
.user_agent
114 if (getargs
.proxy
is not None):
115 proxycfg
= urllib2
.ProxyHandler({"http": getargs
.proxy
})
116 geturls_cj
= cookielib
.CookieJar()
117 if (proxycfg
is None):
118 geturls_opener
= urllib2
.build_opener(
119 urllib2
.HTTPCookieProcessor(geturls_cj
))
120 if (proxycfg
is not None):
121 geturls_opener
= urllib2
.build_opener(
122 urllib2
.HTTPCookieProcessor(geturls_cj
), proxycfg
)
123 geturls_opener
.addheaders
= [
125 "https://github.com/GameMaker2k/Python-Scripts/"),
131 "en-US,en;q=0.8,en-CA,en-GB;q=0.6"),
133 "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"),
135 "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"),
138 urllib2
.install_opener(geturls_opener
)
139 geturls_text
= geturls_opener
.open(
140 "https://raw.github.com/GameMaker2k/Python-Scripts/master/MiniScripts/motherless-dl.py")
141 if (geturls_text
.info().get("Content-Encoding") ==
142 "gzip" or geturls_text
.info().get("Content-Encoding") == "deflate"):
143 strbuf
= StringIO
.StringIO(geturls_text
.read())
144 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
)
145 pyfile_text
= gzstrbuf
.read()[:]
146 if (geturls_text
.info().get("Content-Encoding") !=
147 "gzip" and geturls_text
.info().get("Content-Encoding") != "deflate"):
148 pyfile_text
= geturls_text
.read()[:]
149 regex_finddate_text
= re
.escape(
150 "__version_date__ = \"") + "([0-9\\.]+)" + re
.escape("\"")
151 finddate_text
= re
.findall(regex_finddate_text
, pyfile_text
)
152 regex_findver_text
= re
.escape("__version_info__ = (") + "([0-9]+)" + re
.escape(
153 ", ") + "([0-9]+)" + re
.escape(", ") + "([0-9]+)" + re
.escape(", \"") + "([A-Z0-9 ]+)" + re
.escape("\");")
154 findver_text
= re
.findall(regex_findver_text
, pyfile_text
)
155 ProVerStr
= str(__version_info__
[0]) + "." + str(__version_info__
[1]) + "." + str(
156 __version_info__
[2]) + __version_info__
[3].replace(" ", "").lower()
157 ProVerCheck
= VerCheck(ProVerStr
)
158 ProDateCheck
= VerCheck(__version_date__
)
159 NewVerStr
= findver_text
[0][0] + "." + findver_text
[0][1] + "." + \
160 findver_text
[0][2] + findver_text
[0][3].replace(" ", "").lower()
161 NewVerCheck
= VerCheck(NewVerStr
)
162 NewDateCheck
= VerCheck(finddate_text
[0])
163 if (ProVerStr
< NewVerCheck
and ProDateCheck
<= NewDateCheck
):
164 fileopen
= open(__file__
, "w+")
165 fileopen
.write(pyfile_text
)
170 if (getargs
.dump_user_agent
):
171 print(getargs
.user_agent
)
173 if (len(getargs
.url
) == 0):
178 def motherless_dl(mtlessgetargs
=vars(getargs
)):
179 fakeua
= mtlessgetargs
["user_agent"]
181 if (mtlessgetargs
["proxy"] is not None):
182 proxycfg
= urllib2
.ProxyHandler({"http": mtlessgetargs
["proxy"]})
183 geturls_cj
= cookielib
.CookieJar()
184 if (proxycfg
is None):
185 geturls_opener
= urllib2
.build_opener(
186 urllib2
.HTTPCookieProcessor(geturls_cj
))
187 if (proxycfg
is not None):
188 geturls_opener
= urllib2
.build_opener(
189 urllib2
.HTTPCookieProcessor(geturls_cj
), proxycfg
)
190 geturls_opener
.addheaders
= [
192 mtlessgetargs
["referer"]),
198 "en-US,en;q=0.8,en-CA,en-GB;q=0.6"),
200 "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"),
202 "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"),
205 urllib2
.install_opener(geturls_opener
)
208 numurlarg
= len(mtlessgetargs
["url"])
210 while (cururlarg
< numurlarg
):
211 mlessvid
= mtlessgetargs
["url"][cururlarg
]
212 if (re
.match("^s([0-9]+)" + re
.escape(".motherlessmedia.com"),
213 urlparse
.urlparse(mlessvid
).hostname
)):
214 geturls_text
= geturls_opener
.open(
215 "http://motherless.com/mogile_api.php?path=" +
216 urllib
.quote_plus(mlessvid
) +
218 mlessvid
= geturls_text
.geturl()
219 mregex_text
= re
.escape(
220 "http://motherless.com/") + "([\\w\\/\\?\\&\\=]+)"
221 if (re
.findall(mregex_text
, mlessvid
)):
222 mlessvid
= re
.findall(mregex_text
, mlessvid
)
223 mlessvid
= "http://motherless.com/" + mlessvid
[0]
224 if (mtlessgetargs
["verbose"]):
226 if (re
.match("^" + re
.escape("thumbs.motherlessmedia.com"),
227 urlparse
.urlparse(mlessvid
).hostname
)):
228 mlessvid
= re
.sub(re
.escape("-zoom"), "", mlessvid
)
229 mlessvid
= re
.sub(re
.escape("-strip"), "", mlessvid
)
230 mlessvidtmp
= urlparse
.urlparse(mlessvid
).path
.split("/")
231 mlessvid
= "http://motherless.com/" + mlessvidtmp
[2]
232 mregex_text
= re
.escape(
233 "http://motherless.com/") + "([\\w\\/\\?\\&\\=]+)"
234 if (re
.findall(mregex_text
, mlessvid
)):
235 mlessvid
= re
.findall(mregex_text
, mlessvid
)
236 mlessvid
= "http://motherless.com/" + mlessvid
[0]
237 mlessvid
= re
.sub(re
.escape("http://motherless.com/"), "", mlessvid
)
239 re
.escape("http://www.motherless.com/"), "", mlessvid
)
240 mlessvid
= re
.sub(re
.escape("https://motherless.com/"), "", mlessvid
)
242 re
.escape("https://www.motherless.com/"), "", mlessvid
)
243 mlessvid
= re
.sub(re
.escape("motherless.com/"), "", mlessvid
)
244 mlessvid
= re
.sub(re
.escape("www.motherless.com/"), "", mlessvid
)
245 mlessvid
= re
.sub("^" + re
.escape("/"), "", mlessvid
)
246 mlessvid
= "http://motherless.com/" + mlessvid
247 mregex_text
= re
.escape(
248 "http://motherless.com/") + "([\\w\\/\\?\\&\\=]+)"
249 if (re
.findall(mregex_text
, mlessvid
)):
250 mlessvid
= re
.findall(mregex_text
, mlessvid
)
251 mlessvid
= "/" + mlessvid
[0]
252 mlessvidqstr
= urlparse
.parse_qs(urlparse
.urlparse(mlessvid
).query
)
253 mlessvidid
= urlparse
.urlparse(mlessvid
).path
.split("/")
255 if ((re
.match("^random",
256 mlessvidid
[1]) and len(mlessvidid
) == 2) or (re
.match("^random",
257 mlessvidid
[1]) and len(mlessvidid
) == 3) and (re
.match("^image",
258 mlessvidid
[2]) or re
.match("^video",
260 geturls_text
= geturls_opener
.open(
261 "http://motherless.com" + mlessvid
)
262 mlessvid
= geturls_text
.geturl()
263 if (re
.findall(mregex_text
, mlessvid
)):
264 mlessvid
= re
.findall(mregex_text
, mlessvid
)
265 mlessvid
= mlessvid
[0]
266 if (mtlessgetargs
["verbose"]):
268 if ((re
.match("^galleries",
269 mlessvidid
[1]) and len(mlessvidid
) == 4) or (re
.match("^f",
270 mlessvidid
[1]) and re
.match("^galleries",
271 mlessvidid
[2]) and len(mlessvidid
) == 4) or (re
.match("^term",
272 mlessvidid
[1]) and re
.match("^galleries",
273 mlessvidid
[2]) and len(mlessvidid
) == 4)):
274 geturls_text
= geturls_opener
.open(
275 "http://motherless.com" + mlessvid
+ "?page=1")
276 if (geturls_text
.info().get("Content-Encoding") ==
277 "gzip" or geturls_text
.info().get("Content-Encoding") == "deflate"):
278 strbuf
= StringIO
.StringIO(geturls_text
.read())
279 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
)
280 out_text
= gzstrbuf
.read()[:]
281 if (geturls_text
.info().get("Content-Encoding") !=
282 "gzip" and geturls_text
.info().get("Content-Encoding") != "deflate"):
283 out_text
= geturls_text
.read()[:]
284 out_text
= re
.sub(re
.escape("http://motherless.com"), "", out_text
)
286 re
.escape("http://www.motherless.com"), "", out_text
)
288 re
.escape("https://motherless.com"), "", out_text
)
290 re
.escape("https://www.motherless.com"), "", out_text
)
291 regex_ptext
= re
.escape(
292 "class=\"pop\" rel=\"") + "([0-9]+)" + re
.escape("\">") + "([0-9]+)" + re
.escape("</a>")
293 page_text
= re
.findall(regex_ptext
, out_text
)
295 numpages
= int(page_text
[-1][0])
299 if (not mtlessgetargs
["pages_start"]
300 is None and mtlessgetargs
["pages_start"][0].isdigit()):
301 if (int(mtlessgetargs
["pages_start"][0]) <= numpages
):
302 curpage
= int(mtlessgetargs
["pages_start"][0])
303 if (not mtlessgetargs
["pages_end"]
304 is None and mtlessgetargs
["pages_end"][0].isdigit()):
305 if (int(mtlessgetargs
["pages_end"][0]) >= curpage
):
306 numpages
= int(mtlessgetargs
["pages_end"][0])
307 if (int(mtlessgetargs
["pages_end"][0]) <= numpages
):
308 numpages
= int(mtlessgetargs
["pages_end"][0])
309 while (curpage
<= numpages
):
311 geturls_text
= geturls_opener
.open(
312 "http://motherless.com/" + mlessvid
+ "?page=" + str(curpage
))
313 if (geturls_text
.info().get("Content-Encoding") ==
314 "gzip" or geturls_text
.info().get("Content-Encoding") == "deflate"):
315 strbuf
= StringIO
.StringIO(geturls_text
.read())
316 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
)
317 out_text
= gzstrbuf
.read()[:]
318 if (geturls_text
.info().get("Content-Encoding") !=
319 "gzip" and geturls_text
.info().get("Content-Encoding") != "deflate"):
320 out_text
= geturls_text
.read()[:]
322 re
.escape("http://motherless.com"), "", out_text
)
324 re
.escape("http://www.motherless.com"), "", out_text
)
326 re
.escape("http://motherless.com"), "", out_text
)
328 re
.escape("http://www.motherless.com"), "", out_text
)
329 regex_text
= re
.escape(
330 "") + "([\\w\\/]+)" + re
.escape("\" class=\"img-container\" target=\"_self\">")
331 post_text
= re
.findall(regex_text
, out_text
)
332 numgal
= len(post_text
)
334 while (curgal
< numgal
):
335 mlessgallist
.append(post_text
[curgal
])
336 if (mtlessgetargs
["verbose"]):
337 print(post_text
[curgal
])
339 curpage
= curpage
+ 1
340 if (not re
.match("^galleries",
341 mlessvidid
[1]) or (re
.match("^galleries",
342 mlessvidid
[1]) and len(mlessvidid
) < 4) or (re
.match("^galleries",
343 mlessvidid
[1]) and len(mlessvidid
) > 5)):
344 mlessgallist
.append(mlessvid
)
345 numusrgal
= len(mlessgallist
)
347 while (curusrgal
< numusrgal
):
348 mlessvid
= mlessgallist
[curusrgal
]
349 if (not re
.match("^\\/", mlessvid
)):
350 mlessvid
= "/" + mlessvid
351 mlessvidqstr
= urlparse
.parse_qs(urlparse
.urlparse(mlessvid
).query
)
352 mlessvidid
= urlparse
.urlparse(mlessvid
).path
.split("/")
358 mlessvidid
[1]) and len(mlessvidid
) == 2) or (
361 mlessvidid
[1]) and len(mlessvidid
) == 2) or (
364 mlessvidid
[1]) and len(mlessvidid
) == 2) or (
367 mlessvidid
[1]) and len(mlessvidid
) == 2) or (
370 mlessvidid
[1]) and len(mlessvidid
) == 3) or (
373 mlessvidid
[1]) and len(mlessvidid
) == 3) or (
379 mlessvidid
[2]) or re
.match(
381 mlessvidid
[2])) and len(mlessvidid
) == 4) or (
384 mlessvidid
[1]) and len(mlessvidid
) == 4 and (
387 mlessvidid
[3]) or re
.match(
389 mlessvidid
[3]))) or (
392 mlessvidid
[1]) and len(mlessvidid
) == 3 and (
395 mlessvidid
[2]) or re
.match(
397 mlessvidid
[2]))) or (
400 mlessvidid
[1]) and len(mlessvidid
) == 3 and (
403 mlessvidid
[2]) or re
.match(
405 mlessvidid
[2]) or re
.match(
407 mlessvidid
[2]) or re
.match(
409 mlessvidid
[2]))) or (
412 mlessvidid
[1]) and len(mlessvidid
) == 3 and (
415 mlessvidid
[2]) or re
.match(
417 mlessvidid
[2]) or re
.match(
419 mlessvidid
[2]) or re
.match(
424 if (re
.match("^u", mlessvidid
[1]) and len(mlessvidid
) == 3):
426 if (mlessvidqstr
["t"][0] ==
427 "i" or mlessvidqstr
["t"][0] == "v"):
428 tvaradd
= "&t=" + mlessvidqstr
["t"][0]
434 geturls_text
= geturls_opener
.open(
435 "http://motherless.com" + mlessvid
+ "?page=1" + tvaradd
)
436 if (geturls_text
.info().get("Content-Encoding") ==
437 "gzip" or geturls_text
.info().get("Content-Encoding") == "deflate"):
438 strbuf
= StringIO
.StringIO(geturls_text
.read())
439 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
)
440 out_text
= gzstrbuf
.read()[:]
441 if (geturls_text
.info().get("Content-Encoding") !=
442 "gzip" and geturls_text
.info().get("Content-Encoding") != "deflate"):
443 out_text
= geturls_text
.read()[:]
445 re
.escape("http://motherless.com"), "", out_text
)
447 re
.escape("http://www.motherless.com"), "", out_text
)
449 re
.escape("http://motherless.com"), "", out_text
)
451 re
.escape("http://www.motherless.com"), "", out_text
)
452 regex_ptext
= re
.escape(
453 "class=\"pop\" rel=\"") + "([0-9]+)" + re
.escape("\">") + "([0-9]+)" + re
.escape("</a>")
454 page_text
= re
.findall(regex_ptext
, out_text
)
456 numpages
= int(page_text
[-1][0])
460 if (not mtlessgetargs
["pages_start"]
461 is None and mtlessgetargs
["pages_start"][0].isdigit()):
462 if (int(mtlessgetargs
["pages_start"][0]) <= numpages
):
463 curpage
= int(mtlessgetargs
["pages_start"][0])
464 if (not mtlessgetargs
["pages_end"]
465 is None and mtlessgetargs
["pages_end"][0].isdigit()):
466 if (int(mtlessgetargs
["pages_end"][0]) >= curpage
):
467 numpages
= int(mtlessgetargs
["pages_end"][0])
468 if (int(mtlessgetargs
["pages_end"][0]) <= numpages
):
469 numpages
= int(mtlessgetargs
["pages_end"][0])
470 while (curpage
<= numpages
):
472 geturls_text
= geturls_opener
.open(
473 "http://motherless.com" + mlessvid
+ "?page=" + str(curpage
) + tvaradd
)
474 if (geturls_text
.info().get("Content-Encoding") ==
475 "gzip" or geturls_text
.info().get("Content-Encoding") == "deflate"):
476 strbuf
= StringIO
.StringIO(geturls_text
.read())
477 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
)
478 out_text
= gzstrbuf
.read()[:]
479 if (geturls_text
.info().get("Content-Encoding") !=
480 "gzip" and geturls_text
.info().get("Content-Encoding") != "deflate"):
481 out_text
= geturls_text
.read()[:]
483 re
.escape("http://motherless.com"), "", out_text
)
485 re
.escape("http://www.motherless.com"), "", out_text
)
487 re
.escape("http://motherless.com"), "", out_text
)
489 re
.escape("http://www.motherless.com"), "", out_text
)
490 if (re
.match("^V", mlessvidid
[1])):
492 re
.escape("class=\"img-container\" target=\"_self\""),
493 "title=\"motherless link\"",
496 re
.escape("class=\"pop plain\" target=\"_blank\""),
497 "title=\"motherless link\"",
499 regex_text
= re
.escape(
500 "<a href=\"") + "([\\w\\/]+)" + re
.escape("\" title=\"motherless link\">")
501 if (not re
.match("^V", mlessvidid
[1])):
502 regex_text
= re
.escape(
503 "") + "([\\w\\/]+)" + re
.escape("\" class=\"img-container\" target=\"_self\">")
504 post_text
= re
.findall(regex_text
, out_text
)
505 numurls
= len(post_text
)
507 while (cururl
< numurls
):
508 mlessurllist
.append(post_text
[cururl
])
509 if (mtlessgetargs
["verbose"]):
510 print(post_text
[cururl
])
512 curpage
= curpage
+ 1
517 mlessvidid
[1]) and len(mlessvidid
) == 3 and re
.match(
522 mlessvidid
[1]) and len(mlessvidid
) == 4) or (
523 len(mlessvidid
) == 2 and re
.match(
526 mlessurllist
.append(mlessvid
)
527 numlist
= len(mlessurllist
)
530 while (curlurl
< numlist
):
533 geturls_text
= geturls_opener
.open(
534 "http://motherless.com" + mlessurllist
[curlurl
])
535 except urllib2
.HTTPError
:
537 if (skiplnk
== False):
538 if (geturls_text
.info().get("Content-Encoding") ==
539 "gzip" or geturls_text
.info().get("Content-Encoding") == "deflate"):
540 strbuf
= StringIO
.StringIO(geturls_text
.read())
541 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
)
542 subout_text
= gzstrbuf
.read()[:]
543 if (geturls_text
.info().get("Content-Encoding") !=
544 "gzip" and geturls_text
.info().get("Content-Encoding") != "deflate"):
545 subout_text
= geturls_text
.read()[:]
546 subout_text
= re
.sub(
547 re
.escape("http://motherless.com"), "", subout_text
)
548 subout_text
= re
.sub(
549 re
.escape("http://www.motherless.com"), "", subout_text
)
550 subout_text
= re
.sub(
551 re
.escape("http://motherless.com"), "", subout_text
)
552 subout_text
= re
.sub(
553 re
.escape("http://www.motherless.com"), "", subout_text
)
554 regex_title
= re
.escape(
555 "<title>") + "(.*)" + re
.escape("</title>")
556 title_text
= re
.findall(regex_title
, subout_text
)
558 re
.escape(" - MOTHERLESS.COM"), "", title_text
[0])
559 regex_thumb
= re
.escape(
560 "src="") + "(.*)" + re
.escape(""")
561 thumb_text
= re
.findall(regex_thumb
, subout_text
)
562 mlessthumb
= thumb_text
[0]
563 regex_text
= re
.escape(
564 "__fileurl = '") + "(.*)" + re
.escape("';")
565 post_text
= re
.findall(regex_text
, subout_text
)
566 regex_img
= re
.escape(
567 "<meta property=\"og:image\" content=\"") + "(.*)" + re
.escape("\">")
568 img_text
= re
.findall(regex_img
, subout_text
)
569 mlessimg
= img_text
[0]
570 regex_mediatype
= re
.escape(
571 "__mediatype = '") + "(.*)" + re
.escape("',")
572 mediatype_text
= re
.findall(regex_mediatype
, subout_text
)
573 regex_altimg
= re
.escape("<link rel=\"image_src\" type=\"image/") + \
574 "(.*)" + re
.escape("\" href=\"") + "(.*)" + re
.escape("\">")
575 altimg_text
= re
.findall(regex_altimg
, subout_text
)
576 mlessaltimg
= altimg_text
[0][1]
577 regex_usrname
= re
.escape("<a href=\"/u/") + "([\\w]+)" + re
.escape(
578 "\" class=\"pop plain thumb-member-link-uploads\">Uploads</a>")
579 usrname_text
= re
.findall(regex_usrname
, subout_text
)
580 mlessusrname
= usrname_text
[0]
581 mlessid
= re
.sub("^" + re
.escape("/"), "",
582 mlessurllist
[curlurl
])
583 mlesspurl
= "http://motherless.com" + mlessurllist
[curlurl
]
584 regex_numviews
= re
.escape(
585 "<strong>Views</strong>") + "\n+\t+([^\t]+)\t+" + re
.escape("</h2>")
586 numviews_text
= re
.findall(regex_numviews
, subout_text
)
587 mlessnumviews
= numviews_text
[0]
588 mlessnumviews
= re
.sub(re
.escape(","), "", mlessnumviews
)
589 regex_numfavs
= re
.escape(
590 "<strong>Favorited</strong>") + "\n+\t+([^\t]+)\t+" + re
.escape("</h2>")
591 numfavs_text
= re
.findall(regex_numfavs
, subout_text
)
592 mlessnumfavs
= numfavs_text
[0]
593 mlessnumfavs
= re
.sub(re
.escape(","), "", mlessnumfavs
)
594 ''' some good regex "!-%'-?A-~ " "!-%'-?A-~ \\<\\>\"\'\\@\\#" '''
595 regex_postdata
= re
.escape("<div class=\"media-comment-contents\">") + "\n\t+" + re
.escape("<h4>") + "\n\t+" + re
.escape("<a href=\"/m/") + "([\\w]+)" + re
.escape("\" class=\"pop plain\" target=\"_blank\">") + "\n\t+([^\t]+)\t+" + re
.escape(
596 "</a>") + "\n\t+" + re
.escape("</h4>") + "\n\t+" + re
.escape("<div class=\"media-comment-meta\">") + "\n\t+([^\t]+)\t+" + re
.escape("</div>") + "\n\t+" + re
.escape("<div style=\"text-align: justify;\">") + "\n\t+([^\t]+)\t+" + re
.escape("</div>")
597 postdata_text
= re
.findall(regex_postdata
, subout_text
)
598 numpost
= len(postdata_text
)
599 regex_servsecs
= re
.escape(
600 "Served by web") + "([0-9]+)" + re
.escape(" in ") + "([0-9\\.]+)" + re
.escape(" seconds")
601 servsecs_text
= re
.findall(regex_servsecs
, subout_text
)
602 servname
= "web" + servsecs_text
[0][0]
603 servsecs
= float(servsecs_text
[0][1])
606 ''' From Amber @ http://stackoverflow.com/a/9662362 '''
607 TAG_RE
= re
.compile(r
'<[^>]+>')
608 while (numpost
> 0 and curpost
< numpost
):
610 re
.escape("<br>"), "\n", postdata_text
[curpost
][3])
612 re
.escape("<br/>"), "\n", newpostext
)
614 re
.escape("<br />"), "\n", newpostext
)
615 newpostext
= TAG_RE
.sub('', newpostext
)
617 re
.escape("/") + "([\\w\\/]+)",
618 r
"http://motherless.com/\1",
620 mlesspostlist
.append(
622 "username": postdata_text
[curpost
][0],
623 "avatar": "http://avatars.motherlessmedia.com/avatars/member/" +
624 postdata_text
[curpost
][0] +
626 "smallavatar": "http://avatars.motherlessmedia.com/avatars/member/" +
627 postdata_text
[curpost
][0] +
630 curpost
= curpost
+ 1
632 mlesslink
= post_text
[0]
633 mlessext
= os
.path
.splitext(
634 urlparse
.urlparse(mlesslink
).path
)[1]
635 mlessext
= mlessext
.replace(".", "")
636 mlessext
= mlessext
.lower()
637 if (mtlessgetargs
["id"] == False):
638 mlessfname
= urlparse
.urlsplit(
639 mlesslink
).path
.split("/")[-1]
640 if (mtlessgetargs
["id"]):
642 re
.escape("/"), "_", mlessid
) + "." + mlessext
643 if (not mlessext
== "mp4" and not mlessext
== "flv"):
645 regex_ii_dimensions
= re
.escape("style=\"width: ") + "([0-9]+)" + re
.escape(
646 "px; height: ") + "([0-9]+)" + re
.escape("px; border: none;\"")
647 post_ii_dimensions
= re
.findall(
648 regex_ii_dimensions
, subout_text
)
649 post_ii_width
= post_ii_dimensions
[0][0]
650 post_ii_height
= post_ii_dimensions
[0][1]
652 "width": int(post_ii_height
),
653 "height": int(post_ii_width
),
654 "views": int(mlessnumviews
),
655 "favorites": int(mlessnumfavs
)}
656 if (mlessext
== "mp4" or mlessext
== "flv"):
658 mlesslink
= mlesslink
+ "?start=0"
659 regex_vi_file
= re
.escape(
660 "\"file\" : \"") + "(.*)" + re
.escape("\",")
661 post_vi_file
= re
.findall(
662 regex_vi_file
, subout_text
)
663 regex_vi_image
= re
.escape(
664 "\"image\" : \"") + "(.*)" + re
.escape("\",")
665 post_vi_image
= re
.findall(
666 regex_vi_image
, subout_text
)
667 regex_vi_height
= re
.escape(
668 "\"height\" : ") + "([0-9]+)" + re
.escape(",")
669 post_vi_height
= re
.findall(
670 regex_vi_height
, subout_text
)
671 regex_vi_width
= re
.escape(
672 "\"width\" : ") + "([0-9]+)" + re
.escape(",")
673 post_vi_width
= re
.findall(
674 regex_vi_width
, subout_text
)
675 regex_vi_filethumb
= re
.escape(
676 "\"file\": ") + "(.*)" + re
.escape(",")
677 post_vi_filethumb
= re
.findall(
678 regex_vi_filethumb
, subout_text
)
679 regex_vi_kind
= re
.escape(
680 "\"kind\": \"") + "(.*)" + re
.escape("\"")
681 post_vi_kind
= re
.findall(
682 regex_vi_kind
, subout_text
)
684 "file": post_vi_file
[0],
685 "image": post_vi_image
[0],
690 "views": int(mlessnumviews
),
691 "favorites": int(mlessnumfavs
),
692 "filethumb": post_vi_filethumb
[0],
693 "thumbstrip": "http://thumbs.motherlessmedia.com/thumbs/" +
696 "kind": post_vi_kind
[0]}
697 if (mtlessgetargs
["verbose"]):
700 mlesslistitms
.update({"id": mlessid
})
701 mlesslistitms
.update({"title": mlesstitle
})
702 mlesslistitms
.update({"format": mlessext
})
703 mlesslistitms
.update({"filename": mlessfname
})
704 mlesslistitms
.update({"thumbnail": mlessthumb
})
705 mlesslistitms
.update({"servername": servname
})
706 mlesslistitms
.update({"servingtime": servsecs
})
707 mlesslistitms
.update({"mediatype": mediatype_text
[0]})
708 if (not mlessext
== "mp4" and not mlessext
== "flv"):
709 mlesslistitms
.update({"vidpic": mlesslink
})
710 mlesslistitms
.update({"type": "image"})
711 mlesslistitms
.update({"info": imginfo
})
712 mlesslistitms
.update(
713 {"dimensions": str(imginfo
["width"]) + "x" + str(imginfo
["height"])})
714 mlesslistitms
.update({"width": imginfo
["width"]})
715 mlesslistitms
.update({"height": imginfo
["height"]})
716 mlesslistitms
.update({"views": imginfo
["views"]})
717 mlesslistitms
.update(
718 {"favorites": imginfo
["favorites"]})
719 if (mlessext
== "mp4" or mlessext
== "flv"):
720 mlesslistitms
.update({"vidpic": mlessimg
})
721 mlesslistitms
.update({"type": "video"})
722 mlesslistitms
.update({"info": vidinfo
})
723 mlesslistitms
.update(
724 {"dimensions": str(vidinfo
["width"]) + "x" + str(vidinfo
["height"])})
725 mlesslistitms
.update({"width": vidinfo
["width"]})
726 mlesslistitms
.update({"height": vidinfo
["height"]})
727 mlesslistitms
.update({"views": vidinfo
["views"]})
728 mlesslistitms
.update(
729 {"favorites": vidinfo
["favorites"]})
730 mlesslistitms
.update({"username": mlessusrname
})
731 mlesslistitms
.update(
732 {"avatar": "http://avatars.motherlessmedia.com/avatars/member/" + mlessusrname
+ ".jpg"})
733 mlesslistitms
.update(
735 "smallavatar": "http://avatars.motherlessmedia.com/avatars/member/" +
738 mlesslistitms
.update({"posts": mlesspostlist
})
739 mlesslistitms
.update({"pageurl": mlesspurl
})
740 mlesslistitms
.update({"url": mlesslink
})
741 mlessoutlist
.append(mlesslistitms
)
742 if (curlurl
< (numlist
- 1)):
743 time
.sleep(per_url_sleep
)
744 curlurl
= curlurl
+ 1
745 if (curusrgal
< (numusrgal
- 1)):
746 time
.sleep(per_gal_sleep
)
747 curusrgal
= curusrgal
+ 1
748 cururlarg
= cururlarg
+ 1
752 if (__name__
== "__main__"):
753 mtlesslinks
= motherless_dl()
754 mtlesslncount
= len(mtlesslinks
)
756 while (mtlesscurln
< mtlesslncount
):
758 print(mtlesslinks
[mtlesscurln
]["id"])
759 if (getargs
.get_title
):
760 print(mtlesslinks
[mtlesscurln
]["title"])
761 if (getargs
.get_posts
):
762 numpost
= len(mtlesslinks
[mtlesscurln
]["posts"])
765 while (numpost
> 0 and curpost
< numpost
):
767 mtlesslinks
[mtlesscurln
]["posts"][curpost
]["username"] +
769 mtlesslinks
[mtlesscurln
]["posts"][curpost
]["post"])
770 curpost
= curpost
+ 1
771 if (getargs
.get_format
):
772 print(mtlesslinks
[mtlesscurln
]["format"])
773 if (getargs
.get_type
):
774 print(mtlesslinks
[mtlesscurln
]["type"])
775 if (getargs
.get_filename
):
776 print(mtlesslinks
[mtlesscurln
]["filename"])
777 if (getargs
.get_thumbnail
):
778 print(mtlesslinks
[mtlesscurln
]["thumbnail"])
779 if (mtlesslinks
[mtlesscurln
]["format"] ==
780 "mp4" or mtlesslinks
[mtlesscurln
]["format"] == "flv"):
781 print(mtlesslinks
[mtlesscurln
]["vidpic"])
782 if (getargs
.get_username
):
783 print(mtlesslinks
[mtlesscurln
]["username"])
784 if (getargs
.get_pageurl
):
785 print(mtlesslinks
[mtlesscurln
]["pageurl"])
786 if (getargs
.get_bbcode
):
787 print("[URL=" + mtlesslinks
[mtlesscurln
]["pageurl"] + "][IMG]" +
788 mtlesslinks
[mtlesscurln
]["thumbnail"] + "[/IMG][/URL]")
789 if (getargs
.get_html
):
792 mtlesslinks
[mtlesscurln
]["pageurl"] +
794 mtlesslinks
[mtlesscurln
]["thumbnail"] +
796 if (getargs
.get_dimensions
):
797 print(mtlesslinks
[mtlesscurln
]["dimensions"])
798 if (getargs
.get_width
):
799 print(str(mtlesslinks
[mtlesscurln
]["width"]))
800 if (getargs
.get_height
):
801 print(str(mtlesslinks
[mtlesscurln
]["height"]))
802 if (getargs
.get_views
):
803 print(mtlesslinks
[mtlesscurln
]["views"])
804 if (getargs
.get_favorites
):
805 print(mtlesslinks
[mtlesscurln
]["favorites"])
806 if (getargs
.get_url
or (getargs
.get_id
== False and getargs
.get_title
== False and getargs
.get_posts
== False and getargs
.get_format
== False and getargs
.get_filename
== False and getargs
.get_thumbnail
== False and getargs
.get_username
== False and getargs
.get_pageurl
==
807 False and getargs
.get_bbcode
== False and getargs
.get_html
== False and getargs
.get_dimensions
== False and getargs
.get_width
== False and getargs
.get_height
== False and getargs
.get_views
== False and getargs
.get_favorites
== False and getargs
.get_type
== False)):
808 print(mtlesslinks
[mtlesscurln
]["url"])
809 mtlesscurln
= mtlesscurln
+ 1