4 # gcget -- screen scrape Geocaching.com's annoying web interface
5 # aka SHOW ME THE CACHE!!!
7 # Copyright 2007, Evan Battaglia
8 # Distributed under the terms of the GPL v2.
11 # requires module mechanize
14 # DEFAULT USERNAME AND PASSWORD: THESE ARE OVERWRITTEN BY COMMAND-LINE OPTIONS
20 # this has some extra args in:
21 # gcget lat,lon maxnumgcs [maxdist] [threshold]
22 # threshold -- if find more than this # of geocaches, don't get ANY,
23 # instead give warning and quit
30 This program is free software, distributed under the terms of the GNU GPL v2.
32 Usage: gcget [-u username] [-p password] lat,lon maxnumberofgcs [maxdistance] [threshold]
33 Downloads up to maxnumberofgcs at a distance of up to maxdistance from lat,lon.
34 If we number of geocaches within maxdistance is above threshold, don't download any
35 geocaches, just give a warning and quit.
37 If username and password are not given, will use default values hard-coded in script.
43 # PARSE OPTIONS: USERNAME AND PASSWORD
48 opts
, args
= getopt
.gnu_getopt(sys
.argv
[1:], "u:p:d", ["help"])
49 except getopt
.GetoptError
:
50 # print help information and exit:
61 if o
== "--help" or o
== "-h":
71 #########################
73 ll
= args
[0].split(",")
83 threshold
= int(args
[3])
87 # rounds up to multiples of 20. 20
88 n
= int((int(args
[1])+19)/20)
91 from mechanize
import Browser
94 # get magic number for "Next" button.
95 # this is normally 16 (link hidden is $ctl16), unless there are less than 10 pages of results,
96 # in which case it will be less (e.g. 09 for 3 pages of results)
97 def getmagicnumber(b
):
98 for i
in range(16,0,-1):
99 if re
.compile("pgrBottom.ctl%02d" % i
).search(b
.response().get_data()):
104 b
.open("http://geocaching.com/seek/")
105 b
.follow_link(text
="Log in")
107 b
["myUsername"] = USER
108 b
["myPassword"] = PASS
111 magicnumber
= 0 # the ctl number of Next. get only once
113 try: b
.select_form("form4")
116 b
.select_form("form4")
118 b
.select_form("form4")
119 print >> sys
.stderr
, "Invalid username/password"
121 f
=open("gcget.badlogin.html","w")
122 f
.write(b
.response().get_data())
124 print >> sys
.stderr
, "Dumping last HTML page recieved into gcget.badlogin.html"
127 b
["origin_lat"] = lat
128 b
["origin_long"] = lon
132 thresholdre
= re
.compile("Total Records: <b>([0-9]*)</b>")
133 m
= thresholdre
.search(b
.response().get_data())
135 if int(m
.group(1)) > threshold
:
136 sys
.stderr
.write("THRESHOLD %d > %d\n" % (int(m
.group(1)), threshold
))
139 records
= int(m
.group(1))
140 sys
.stderr
.write("ok found %d, getting min(%d,%d) gcs\n" % (int(m
.group(1)), int(records
), int(args
[1])))
142 print "can't find total records"
146 # (records+19)/20 is the max pages
147 for ii
in range(min(n
,(records
+19)/20)):
150 b
['CID'] = [i
.name
for i
in b
.find_control('CID').items
]
155 # only print one header, start of xml file
156 lines
= b
.response().get_data().split("\n")
158 print "\n".join(lines
[0:2])
161 print "\n".join(lines
[2:-1])
166 sys
.stderr
.write("i")
172 magicnumber
= getmagicnumber(b
)
174 # print "couldn't find magic number!" # why does this happen?
178 [f
for f
in b
.forms()][0].new_control("hidden", "ctl00$ContentBody$pgrBottom$ctl%02d" % magicnumber
, {})
181 sys
.stderr
.write("\n")
186 # f=open("delmeNOW","w")
187 # f.write(b.response().get_data())