MiniScripts/get_ylp.py

   1 #!/usr/bin/env python
   2
   3 '''
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the Revised BSD License.
   6
   7     This program is distributed in the hope that it will be useful,
   8     but WITHOUT ANY WARRANTY; without even the implied warranty of
   9     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  10     Revised BSD License for more details.
  11
  12     Copyright 2013 Cool Dude 2k - http://idb.berlios.de/
  13     Copyright 2013 Game Maker 2k - http://intdb.sourceforge.net/
  14     Copyright 2013 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
  15
  16     $FileInfo: get_ylp.py - Last Update: 10/07/2013 Ver. 1.0.5 RC 6 - Author: cooldude2k $
  17 '''
  18
  19 import re, os, sys, urllib, urllib2, cookielib, StringIO, gzip, time, datetime, argparse, urlparse;
  20
  21 fakeua = "Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20100101 Firefox/24.0"";
  22 geturls_cj = cookielib.CookieJar();
  23 geturls_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(geturls_cj));
  24 geturls_opener.addheaders = [("Referer", "http://www.google.com/search?q=younglegalporn"), ("User-Agent", fakeua), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en-CA,en-GB,en-UK,en-AU,en-NZ,en-ZA,en;q=0.5"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")];
  25 print("0 Reading URL: http://photos.younglegalporn.com/");
  26 geturls_text = geturls_opener.open("http://photos.younglegalporn.com/");
  27 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
  28  strbuf = StringIO.StringIO(geturls_text.read());
  29  gzstrbuf = gzip.GzipFile(fileobj=strbuf);
  30  out_text = gzstrbuf.read()[:];
  31 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
  32  out_text = geturls_text.read()[:];
  33 regex_text = re.escape("<li><a href=\"http://photos.younglegalporn.com/")+"([a-fA-F0-9]{8})"+re.escape("/")+"([a-zA-Z0-9]{14})"+re.escape("/\"><img title=\"");
  34 post_text = re.findall(regex_text, out_text);
  35 wait_time1 = 4;
  36 wait_time2 = wait_time1 + 4;
  37 i = 0;
  38 il = len(post_text);
  39 print("0 Found "+str(il)+" Image Galleries.");
  40 if(not os.path.exists("./younglegalporn/")):
  41  print("0 Making Directory: ./younglegalporn/");
  42  os.mkdir("./younglegalporn/");
  43 while(i < il):
  44  if(not os.path.exists("./younglegalporn/"+post_text[i][0]+"/")):
  45   print(str(i+1)+" Making Directory: ./younglegalporn/"+post_text[i][0]+"/");
  46   os.mkdir("./younglegalporn/"+post_text[i][0]+"/");
  47  getsuburls_cj = geturls_cj;
  48  getsuburls_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(getsuburls_cj));
  49  getsuburls_opener.addheaders = [("Referer", "http://photos.younglegalporn.com/"), ("User-Agent", fakeua), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en-CA,en-GB,en-UK,en-AU,en-NZ,en-ZA,en;q=0.5"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")];
  50  print(str(i+1)+" Start Downloading Image Gallery: "+str(i+1)+" of "+str(il));
  51  print(str(i+1)+" Start Downloading Image Gallery: "+post_text[i][0]);
  52  print(str(i+1)+" Reading Image Gallery: http://photos.younglegalporn.com/"+post_text[i][0]+"/"+post_text[i][1]+"/");
  53  getsuburls_text = getsuburls_opener.open("http://photos.younglegalporn.com/"+post_text[i][0]+"/"+post_text[i][1]+"/");
  54  if(getsuburls_text.info().get("Content-Encoding")=="gzip" or getsuburls_text.info().get("Content-Encoding")=="deflate"):
  55   substrbuf = StringIO.StringIO(getsuburls_text.read());
  56   gzsubstrbuf = gzip.GzipFile(fileobj=substrbuf);
  57   subout_text = gzsubstrbuf.read()[:];
  58  if(getsuburls_text.info().get("Content-Encoding")!="gzip" and getsuburls_text.info().get("Content-Encoding")!="deflate"):
  59   subout_text = getsuburls_text.read()[:];
  60  subout_text = subout_text.replace("://", ":///");
  61  subout_text = subout_text.replace("//", "/");
  62  subout_text = subout_text.replace("//pics/", "/pics/");
  63  subout_text = subout_text.replace("/pics//", "/pics/");
  64  subregex_text = re.escape("<a href=\"http://content1.sexforsure.com/")+"([0-9]+)"+re.escape("/")+"([0-9]+)"+re.escape("/")+"([0-9]+)"+re.escape("/")+"([0-9]+)"+re.escape("/pics/")+"([0-9]+)"+re.escape(".jpg\"><img src=\"");
  65  subpost_text = re.findall(subregex_text, subout_text);
  66  if(len(subpost_text)==0):
  67   subregex_text = re.escape("<a href=\"http://content1.sexforsure.com/")+"([0-9]+)"+re.escape("/")+"([0-9]+)"+re.escape("/")+"([0-9]+)"+re.escape("/")+"([0-9]+)"+re.escape("//pics/")+"([0-9]+)"+re.escape(".jpg\"><img src=\"");
  68   subpost_text = re.findall(subregex_text, subout_text);
  69  subi = int(subpost_text[0][4]) - 1;
  70  subil = int(subpost_text[-1][4]);
  71  print(str(i+1)+" Found "+subpost_text[-1][4]+" JPEG Images.");
  72  while(subi < subil):
  73   getsub2xurls_cj = geturls_cj;
  74   getsub2xurls_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(getsub2xurls_cj));
  75   getsub2xurls_opener.addheaders = [("Referer", "http://photos.younglegalporn.com/"+post_text[i][0]+"/"+post_text[i][1]+"/"), ("User-Agent", fakeua), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en-CA,en-GB,en-UK,en-AU,en-NZ,en-ZA,en;q=0.5"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")];
  76   print(str(i+1)+","+str(subi+1)+" Start Downloading Image File: "+str(subi+1)+" of "+str(subil));
  77   print(str(i+1)+","+str(subi+1)+" Downloading Image: http://content1.sexforsure.com/"+subpost_text[subi][0]+"/"+subpost_text[subi][1]+"/"+subpost_text[subi][2]+"/"+subpost_text[subi][3]+"/pics/"+subpost_text[subi][4]+".jpg");
  78   getsub2xurls_text = getsub2xurls_opener.open("http://content1.sexforsure.com/"+subpost_text[subi][0]+"/"+subpost_text[subi][1]+"/"+subpost_text[subi][2]+"/"+subpost_text[subi][3]+"/pics/"+subpost_text[subi][4]+".jpg");
  79   print(str(i+1)+","+str(subi+1)+" Finished Downloading Image: http://content1.sexforsure.com/"+subpost_text[subi][0]+"/"+subpost_text[subi][1]+"/"+subpost_text[subi][2]+"/"+subpost_text[subi][3]+"/pics/"+subpost_text[subi][4]+".jpg");
  80   print(str(i+1)+","+str(subi+1)+" Saving File: ./younglegalporn/"+post_text[i][0]+"/"+subpost_text[subi][4]+".jpg");
  81   jpegf = open("./younglegalporn/"+post_text[i][0]+"/"+subpost_text[subi][4]+".jpg", "wb");
  82   jpegf.write(getsub2xurls_text.read());
  83   jpegf.close();
  84   print(str(i+1)+","+str(subi+1)+" Finished Saving File: ./younglegalporn/"+post_text[i][0]+"/"+subpost_text[subi][4]+".jpg");
  85   time.sleep(wait_time1);
  86   subi = subi + 1;
  87  print(str(i+1)+" Finished Downloading Image Gallery: "+post_text[i][0]);
  88  time.sleep(wait_time2);
  89  i = i + 1;
  90  if(i < il):
  91   print(str(i+1)+" Next Image Gallery Download: "+post_text[i][0]);
  92 print("Full Download Completed Successfully");