1 #!/usr/local/bin/python3.0
12 if not os.path.exists("data/01"):
13 os.makedirs("data/01")
15 def getAll(conn, bucket, prefix):
16 reply = conn.list_bucket(bucket, options={"prefix":prefix})
18 while reply.is_truncated:
20 print("Retrieving filelist, %d files . . ." % len(rv))
22 assert(len(reply.entries))
23 reply = conn.list_bucket(bucket, options={"prefix":prefix, "marker":marker})
24 assert(len(reply.entries))
25 rv = rv + reply.entries
26 print("Filelist retrieved, %d files" % len(rv))
29 AWS_ACCESS_KEY_ID="1JC38J0QE1MGRE08TT82"
30 AWS_SECRET_ACCESS_KEY="+I7mvTFCcEpI1WZ14lDjVSaeog+BKBCVUudwIWKs"
32 BUCKET="questhelper_data"
34 conn = S3.AWSAuthConnection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
35 generator = S3.QueryStringAuthGenerator(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
38 tregex = re.compile("rawdata_([0-9a-f]{32,32}).*\.bz2")
39 serverfiles = {tregex.match(x.key).group(1):x.key for x in getAll(conn, BUCKET, "rawdata_")}
40 print("Serverfiles isolated: %d" % len(serverfiles))
43 for path, dirs, files in os.walk("data/01"):
44 currentfiles.update({file:os.path.join(path, file) for file in files})
45 print("Currentfiles isolated: %d" % len(currentfiles))
47 touchfiles = sorted(set([tag for tag in currentfiles.keys()]) | set([tag for tag in serverfiles.keys()]))
49 for tag in touchfiles:
50 assert(tag in serverfiles)
51 if not tag in currentfiles:
52 print("Downloading %s" % tag)
53 dat = conn.get(BUCKET, serverfiles[tag]).object.data
54 #print("Downloaded, %d" % len(dat))
55 dat = bz2.decompress(dat)
56 assert(hashlib.md5(dat).hexdigest() == tag)
57 #print("Decompressed to %d" % len(dat))
58 if not os.path.exists("data/01/%s" % tag[0:2]):
59 os.makedirs("data/01/%s" % tag[0:2])
60 with open("data/01/%s/%s" % (tag[0:2], tag), "wb") as f: