4 # spider_ftp.py - spider the ftp site and generate an output file with all
5 # the metadata we require, that can be transferred over to
6 # the master web server.
11 from datetime
import datetime
12 import pickle
as pickle
16 # Directories, specified from the root of the ftp tree and down, that
17 # will be recursively excluded from the pickle.
18 exclude_roots
= ['/repos', ]
24 f
= codecs
.open(fn
, 'r', encoding
='utf-8', errors
='replace')
30 def parse_directory(dirname
, rootlen
):
32 for f
in os
.listdir(dirname
):
33 if f
.startswith(".") and not f
== ".message":
35 if f
== "sync_timestamp":
38 fn
= os
.path
.join(dirname
, f
)
40 # Can be a directory itself, or a symbolic link to a directory
41 if os
.path
.islink(fn
):
42 # This is a symbolic link
45 'd': os
.readlink(fn
).strip("/"),
48 # This is a subdirectory, recurse into it, unless it happens
49 # to be on our exclude list.
50 if not fn
[rootlen
:] in exclude_roots
:
51 parse_directory(fn
, rootlen
)
61 'd': datetime
.fromtimestamp(stat
.st_mtime
),
63 if f
== "README" or f
== "CURRENT_MAINTAINER" or f
== ".message":
64 mynode
[f
]['c'] = read_file(fn
)
66 allnodes
[dirname
[rootlen
:].strip("/")] = mynode
70 print("Usage: spider_ftp.py <ftp_root> <pickle_file>")
72 print("If <pickle_file> starts with http[s]://, the file will be uploaded")
73 print("to that URL instead of written to the filesystem.")
77 if len(sys
.argv
) != 3:
80 parse_directory(sys
.argv
[1], len(sys
.argv
[1]))
82 if sys
.argv
[2].startswith("http://") or sys
.argv
[2].startswith("https://"):
85 data
=pickle
.dumps(allnodes
),
87 'Content-type': 'application/octet-stream',
88 'Host': 'www.postgresql.org',
91 if r
.status_code
!= 200:
92 print("Failed to upload, code: %s" % r
.status_code
)
94 elif r
.text
!= "NOT CHANGED" and r
.text
!= "OK":
95 print("Failed to upload: %s" % x
)
98 f
= open(sys
.argv
[2] + ".tmp", "wb")
99 pickle
.dump(allnodes
, f
)
101 os
.rename(sys
.argv
[2] + ".tmp", sys
.argv
[2])