3 #Note: We have a custom version of libgmail to fix a bug involving binary attachments (which obviously we have.)
6 warnings
.simplefilter("ignore",DeprecationWarning)
18 os
.system("rm rawdata_*")
23 #tregex = re.compile("rawdata_([0-9a-f]{32,32})(.*)\.bz2")
24 #outp = commands.getoutput("s3cmd ls s3://questhelper_data/rawdata_")
25 #print "S3 listing snagged"
26 #for line in outp.split('\n'):
27 # if line == "Bucket 's3://questhelper_data':":
29 # serch = tregex.search(line)
32 # toki = serch.group(1)
33 # ext = serch.group(2)
35 # filehashdict[toki] = ext
36 #print "Filenames isolated: %d" % len(filehashdict)
38 ga
= libgmail
.GmailAccount(passwords
.gmail_username
, passwords
.gmail_password
)
41 destination
="./LocalInput/"
42 label
=passwords
.gmail_label
44 argument
= "!label:" + label
+ " has:attachment"
45 argument
= "has:attachment"
46 inbox
=ga
.getMessagesByQuery(argument
)
49 print `
len(inbox
)`
+" messages"
53 for message
in thread
:
55 if thread
.getLabels().count(label
) != 0:
60 print "message "+`i`
+" id: "+message
.id
61 #print thread.getLabels()
62 #print thread.getLabels().count("downloaded")
64 if True: # we used to make sure it had the right label, or more accurately, didn't
66 print '\t'+`
len(message
.attachments
)`
+" attachments"
67 id = random
.randint(1, 1)
69 for a
in message
.attachments
:
70 a
.filename
= a
.filename
.encode('ascii', 'ignore').replace('*', '_')
71 print '\t\t filename:', a
.filename
77 #dex=filename.find(".")
78 tup
=a
.filename
.partition(".")
79 name
=pre
+tup
[1]+tup
[2]
80 f
=open(destination
+name
,"w")
83 #message.addLabel("downloaded")
87 s3name
= "rawdata_" + name
+ ".bz2"
88 if not pre
in filehashdict
:
89 # okay, that's cool. Now we S3 it.
90 assert(os
.system("bzip2 -k --best -c \"%s\" > \"%s\"" % (destination
+ name
, s3name
)) == 0)
91 assert(os
.system("s3cmd put \"%s\" s3://questhelper_data/" % (s3name
)) == 0)
92 assert(os
.system("rm rawdata_*") == 0)
94 filehashdict
[pre
] = name
.partition(".")[1] + name
.partition(".")[2] # we only look at the first page of emails, over and over. this way, on the second pass through that page, we'll get and delete instead of just re-storing over and over.
96 s3oldname
= "rawdata_" + pre
+ filehashdict
[pre
] + ".bz2"
97 if s3oldname
!= s3name
:
98 print "\t\t WARNING: Name mismatch! %s vs %s" % (s3name
, s3oldname
)
99 s3cg
= "s3cmd --force get \"s3://questhelper_data/%s\" \"%s\"" % (s3oldname
, s3oldname
)
100 while os
.system(s3cg
) != 0:
101 print "\t\t s3cmd failed, sleeping for 15 seconds . . ."
103 assert(os
.system("cat \"%s\" | bunzip2 > rawdata_temptest" % (s3oldname
)) == 0)
104 assert(os
.system("diff -q rawdata_temptest \"%s\"" % (destination
+ name
)) == 0)
105 assert(os
.system("rm rawdata_temptest \"%s\"" % (s3oldname
)) == 0)
106 assert(os
.system("rm \"%s\"" % (destination
+ name
)) == 0)
108 print "foobared attachment"
113 ga
.trashMessage(message
)
117 thread
.addLabel(label
)
121 inbox
=ga
.getMessagesByQuery(argument
)
124 print `i`
+" messages examined and saved"
126 os
.system("rm rawdata_*")