Yet another small update.
[Python-Scripts.git] / MiniScripts / get_ylp.py
bloba4dcb0e4853f563ce87bf0bb500863d5ddfbed23
1 #!/usr/bin/env python
3 '''
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2013 Cool Dude 2k - http://idb.berlios.de/
13 Copyright 2013 Game Maker 2k - http://intdb.sourceforge.net/
14 Copyright 2013 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
16 $FileInfo: get_ylp.py - Last Update: 10/07/2013 Ver. 1.0.5 RC 6 - Author: cooldude2k $
17 '''
19 import re, os, sys, urllib, urllib2, cookielib, StringIO, gzip, time, datetime, argparse, urlparse;
21 fakeua = "Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20100101 Firefox/24.0"";
22 geturls_cj = cookielib.CookieJar();
23 geturls_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(geturls_cj));
24 geturls_opener.addheaders = [("Referer", "http://www.google.com/search?q=younglegalporn"), ("User-Agent", fakeua), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en-CA,en-GB,en-UK,en-AU,en-NZ,en-ZA,en;q=0.5"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")];
25 print("0 Reading URL: http://photos.younglegalporn.com/");
26 geturls_text = geturls_opener.open("http://photos.younglegalporn.com/");
27 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
28 strbuf = StringIO.StringIO(geturls_text.read());
29 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
30 out_text = gzstrbuf.read()[:];
31 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
32 out_text = geturls_text.read()[:];
33 regex_text = re.escape("<li><a href=\"http://photos.younglegalporn.com/")+"([a-fA-F0-9]{8})"+re.escape("/")+"([a-zA-Z0-9]{14})"+re.escape("/\"><img title=\"");
34 post_text = re.findall(regex_text, out_text);
35 wait_time1 = 4;
36 wait_time2 = wait_time1 + 4;
37 i = 0;
38 il = len(post_text);
39 print("0 Found "+str(il)+" Image Galleries.");
40 if(not os.path.exists("./younglegalporn/")):
41 print("0 Making Directory: ./younglegalporn/");
42 os.mkdir("./younglegalporn/");
43 while(i < il):
44 if(not os.path.exists("./younglegalporn/"+post_text[i][0]+"/")):
45 print(str(i+1)+" Making Directory: ./younglegalporn/"+post_text[i][0]+"/");
46 os.mkdir("./younglegalporn/"+post_text[i][0]+"/");
47 getsuburls_cj = geturls_cj;
48 getsuburls_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(getsuburls_cj));
49 getsuburls_opener.addheaders = [("Referer", "http://photos.younglegalporn.com/"), ("User-Agent", fakeua), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en-CA,en-GB,en-UK,en-AU,en-NZ,en-ZA,en;q=0.5"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")];
50 print(str(i+1)+" Start Downloading Image Gallery: "+str(i+1)+" of "+str(il));
51 print(str(i+1)+" Start Downloading Image Gallery: "+post_text[i][0]);
52 print(str(i+1)+" Reading Image Gallery: http://photos.younglegalporn.com/"+post_text[i][0]+"/"+post_text[i][1]+"/");
53 getsuburls_text = getsuburls_opener.open("http://photos.younglegalporn.com/"+post_text[i][0]+"/"+post_text[i][1]+"/");
54 if(getsuburls_text.info().get("Content-Encoding")=="gzip" or getsuburls_text.info().get("Content-Encoding")=="deflate"):
55 substrbuf = StringIO.StringIO(getsuburls_text.read());
56 gzsubstrbuf = gzip.GzipFile(fileobj=substrbuf);
57 subout_text = gzsubstrbuf.read()[:];
58 if(getsuburls_text.info().get("Content-Encoding")!="gzip" and getsuburls_text.info().get("Content-Encoding")!="deflate"):
59 subout_text = getsuburls_text.read()[:];
60 subout_text = subout_text.replace("://", ":///");
61 subout_text = subout_text.replace("//", "/");
62 subout_text = subout_text.replace("//pics/", "/pics/");
63 subout_text = subout_text.replace("/pics//", "/pics/");
64 subregex_text = re.escape("<a href=\"http://content1.sexforsure.com/")+"([0-9]+)"+re.escape("/")+"([0-9]+)"+re.escape("/")+"([0-9]+)"+re.escape("/")+"([0-9]+)"+re.escape("/pics/")+"([0-9]+)"+re.escape(".jpg\"><img src=\"");
65 subpost_text = re.findall(subregex_text, subout_text);
66 if(len(subpost_text)==0):
67 subregex_text = re.escape("<a href=\"http://content1.sexforsure.com/")+"([0-9]+)"+re.escape("/")+"([0-9]+)"+re.escape("/")+"([0-9]+)"+re.escape("/")+"([0-9]+)"+re.escape("//pics/")+"([0-9]+)"+re.escape(".jpg\"><img src=\"");
68 subpost_text = re.findall(subregex_text, subout_text);
69 subi = int(subpost_text[0][4]) - 1;
70 subil = int(subpost_text[-1][4]);
71 print(str(i+1)+" Found "+subpost_text[-1][4]+" JPEG Images.");
72 while(subi < subil):
73 getsub2xurls_cj = geturls_cj;
74 getsub2xurls_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(getsub2xurls_cj));
75 getsub2xurls_opener.addheaders = [("Referer", "http://photos.younglegalporn.com/"+post_text[i][0]+"/"+post_text[i][1]+"/"), ("User-Agent", fakeua), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en-CA,en-GB,en-UK,en-AU,en-NZ,en-ZA,en;q=0.5"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")];
76 print(str(i+1)+","+str(subi+1)+" Start Downloading Image File: "+str(subi+1)+" of "+str(subil));
77 print(str(i+1)+","+str(subi+1)+" Downloading Image: http://content1.sexforsure.com/"+subpost_text[subi][0]+"/"+subpost_text[subi][1]+"/"+subpost_text[subi][2]+"/"+subpost_text[subi][3]+"/pics/"+subpost_text[subi][4]+".jpg");
78 getsub2xurls_text = getsub2xurls_opener.open("http://content1.sexforsure.com/"+subpost_text[subi][0]+"/"+subpost_text[subi][1]+"/"+subpost_text[subi][2]+"/"+subpost_text[subi][3]+"/pics/"+subpost_text[subi][4]+".jpg");
79 print(str(i+1)+","+str(subi+1)+" Finished Downloading Image: http://content1.sexforsure.com/"+subpost_text[subi][0]+"/"+subpost_text[subi][1]+"/"+subpost_text[subi][2]+"/"+subpost_text[subi][3]+"/pics/"+subpost_text[subi][4]+".jpg");
80 print(str(i+1)+","+str(subi+1)+" Saving File: ./younglegalporn/"+post_text[i][0]+"/"+subpost_text[subi][4]+".jpg");
81 jpegf = open("./younglegalporn/"+post_text[i][0]+"/"+subpost_text[subi][4]+".jpg", "wb");
82 jpegf.write(getsub2xurls_text.read());
83 jpegf.close();
84 print(str(i+1)+","+str(subi+1)+" Finished Saving File: ./younglegalporn/"+post_text[i][0]+"/"+subpost_text[subi][4]+".jpg");
85 time.sleep(wait_time1);
86 subi = subi + 1;
87 print(str(i+1)+" Finished Downloading Image Gallery: "+post_text[i][0]);
88 time.sleep(wait_time2);
89 i = i + 1;
90 if(i < il):
91 print(str(i+1)+" Next Image Gallery Download: "+post_text[i][0]);
92 print("Full Download Completed Successfully");