Automated update from: http://smariot.no-ip.org/translate
[QuestHelper.git] / Development / gmail.py
blobd295d588572d04ec97549aad5b825090ac6f6a3a
1 #!/usr/bin/python
3 #Note: We have a custom version of libgmail to fix a bug involving binary attachments (which obviously we have.)
5 import warnings
6 warnings.simplefilter("ignore",DeprecationWarning)
8 import libgmail
9 import md5
10 import sys
11 import passwords
12 import os
13 import commands
14 import re
15 import time
16 import random
18 os.system("rm rawdata_*")
20 filehashdict = {}
22 #ct = 0
23 #tregex = re.compile("rawdata_([0-9a-f]{32,32})(.*)\.bz2")
24 #outp = commands.getoutput("s3cmd ls s3://questhelper_data/rawdata_")
25 #print "S3 listing snagged"
26 #for line in outp.split('\n'):
27 # if line == "Bucket 's3://questhelper_data':":
28 # continue
29 # serch = tregex.search(line)
30 # if not serch:
31 # print line
32 # toki = serch.group(1)
33 # ext = serch.group(2)
34 # #print toki
35 # filehashdict[toki] = ext
36 #print "Filenames isolated: %d" % len(filehashdict)
38 ga = libgmail.GmailAccount(passwords.gmail_username, passwords.gmail_password)
39 ga.login()
41 destination="./LocalInput/"
42 label=passwords.gmail_label
44 argument = "!label:" + label + " has:attachment"
45 argument = "has:attachment"
46 inbox=ga.getMessagesByQuery(argument)
47 i=0
49 print `len(inbox)`+" messages"
50 while len(inbox) > 0:
51 try:
52 for thread in inbox:
53 for message in thread:
54 mark = True
55 if thread.getLabels().count(label) != 0:
56 mark = False
58 clear = True
59 os.system("date")
60 print "message "+`i`+" id: "+message.id
61 #print thread.getLabels()
62 #print thread.getLabels().count("downloaded")
64 if True: # we used to make sure it had the right label, or more accurately, didn't
65 #print 'hoohah'
66 print '\t'+`len(message.attachments)`+" attachments"
67 id = random.randint(1, 1)
68 if id == 1:
69 for a in message.attachments:
70 a.filename = a.filename.encode('ascii', 'ignore').replace('*', '_')
71 print '\t\t filename:', a.filename
72 dig=md5.new()
73 cont=a.content
74 if cont <> None:
75 dig.update(cont)
76 pre=dig.hexdigest()
77 #dex=filename.find(".")
78 tup=a.filename.partition(".")
79 name=pre+tup[1]+tup[2]
80 f=open(destination+name,"w")
81 f.write(cont)
82 f.close()
83 #message.addLabel("downloaded")
85 print "\t\t saved"
87 s3name = "rawdata_" + name + ".bz2"
88 if not pre in filehashdict:
89 # okay, that's cool. Now we S3 it.
90 assert(os.system("bzip2 -k --best -c \"%s\" > \"%s\"" % (destination + name, s3name)) == 0)
91 assert(os.system("s3cmd put \"%s\" s3://questhelper_data/" % (s3name)) == 0)
92 assert(os.system("rm rawdata_*") == 0)
93 print "\t\t S3 saved"
94 filehashdict[pre] = name.partition(".")[1] + name.partition(".")[2] # we only look at the first page of emails, over and over. this way, on the second pass through that page, we'll get and delete instead of just re-storing over and over.
95 else:
96 s3oldname = "rawdata_" + pre + filehashdict[pre] + ".bz2"
97 if s3oldname != s3name:
98 print "\t\t WARNING: Name mismatch! %s vs %s" % (s3name, s3oldname)
99 s3cg = "s3cmd --force get \"s3://questhelper_data/%s\" \"%s\"" % (s3oldname, s3oldname)
100 while os.system(s3cg) != 0:
101 print "\t\t s3cmd failed, sleeping for 15 seconds . . ."
102 time.sleep(30)
103 assert(os.system("cat \"%s\" | bunzip2 > rawdata_temptest" % (s3oldname)) == 0)
104 assert(os.system("diff -q rawdata_temptest \"%s\"" % (destination + name)) == 0)
105 assert(os.system("rm rawdata_temptest \"%s\"" % (s3oldname)) == 0)
106 assert(os.system("rm \"%s\"" % (destination + name)) == 0)
107 else:
108 print "foobared attachment"
109 else:
110 print "\t Skipping"
111 if clear:
112 print "\t Trashing"
113 ga.trashMessage(message)
114 i=i+1
115 if mark:
116 print "\t Marking"
117 thread.addLabel(label)
118 except Exception, e:
119 raise
120 #print "whoops"
121 inbox=ga.getMessagesByQuery(argument)
122 #print len(inbox)
124 print `i`+" messages examined and saved"
126 os.system("rm rawdata_*")