3 """A variant on webchecker that creates a mirror copy of a remote site."""
5 __version__
= "$Revision$"
15 # Extract real version number if necessary
16 if __version__
[0] == '$':
17 _v
= string
.split(__version__
)
22 verbose
= webchecker
.VERBOSE
24 opts
, args
= getopt
.getopt(sys
.argv
[1:], "qv")
25 except getopt
.error
, msg
:
27 print "usage:", sys
.argv
[0], "[-qv] ... [rooturl] ..."
35 c
.setflags(verbose
=verbose
)
36 c
.urlopener
.addheaders
= [
37 ('User-agent', 'websucker/%s' % __version__
),
40 print "Adding root", arg
45 class Sucker(webchecker
.Checker
):
49 def readhtml(self
, url
):
51 path
= self
.savefilename(url
)
55 f
= self
.openpage(url
)
61 path
= self
.savefilename(url
)
64 self
.savefile(text
, path
)
65 if not self
.checkforhtml(info
, url
):
68 if self
.checkforhtml({}, url
):
73 def savefile(self
, text
, path
):
74 dir, base
= os
.path
.split(path
)
79 self
.message("saved %s", path
)
81 def savefilename(self
, url
):
82 type, rest
= urllib
.splittype(url
)
83 host
, path
= urllib
.splithost(rest
)
84 while path
[:1] == "/": path
= path
[1:]
85 user
, host
= urllib
.splituser(host
)
86 host
, port
= urllib
.splitnport(host
)
87 host
= string
.lower(host
)
88 if not path
or path
[-1] == "/":
89 path
= path
+ "index.html"
91 path
= string
.join(string
.split(path
, "/"), os
.sep
)
92 path
= os
.path
.join(host
, path
)
96 if not dir or os
.path
.exists(dir):
98 head
, tail
= os
.path
.split(dir)
100 print "Huh? Don't know how to make dir", dir
105 if __name__
== '__main__':
106 sys
.exit(main() or 0)