Fix unicode bug from python3 conversion
[pgweb/local.git] / tools / ftp / spider_ftp.py
blob159c419ddc55f749d290d3f2e9efb60be3443a8e
1 #!/usr/bin/env python3
4 # spider_ftp.py - spider the ftp site and generate an output file with all
5 # the metadata we require, that can be transferred over to
6 # the master web server.
9 import sys
10 import os
11 from datetime import datetime
12 import pickle as pickle
13 import codecs
14 import requests
16 # Directories, specified from the root of the ftp tree and down, that
17 # will be recursively excluded from the pickle.
18 exclude_roots = ['/repos', ]
20 allnodes = {}
23 def read_file(fn):
24 f = codecs.open(fn, 'r', encoding='utf-8', errors='replace')
25 t = f.read()
26 f.close()
27 return t
30 def parse_directory(dirname, rootlen):
31 mynode = {}
32 for f in os.listdir(dirname):
33 if f.startswith(".") and not f == ".message":
34 continue
35 if f == "sync_timestamp":
36 continue
38 fn = os.path.join(dirname, f)
39 if os.path.isdir(fn):
40 # Can be a directory itself, or a symbolic link to a directory
41 if os.path.islink(fn):
42 # This is a symbolic link
43 mynode[f] = {
44 't': 'l',
45 'd': os.readlink(fn).strip("/"),
47 else:
48 # This is a subdirectory, recurse into it, unless it happens
49 # to be on our exclude list.
50 if not fn[rootlen:] in exclude_roots:
51 parse_directory(fn, rootlen)
52 mynode[f] = {
53 't': 'd',
55 else:
56 # This a file
57 stat = os.stat(fn)
58 mynode[f] = {
59 't': 'f',
60 's': stat.st_size,
61 'd': datetime.fromtimestamp(stat.st_mtime),
63 if f == "README" or f == "CURRENT_MAINTAINER" or f == ".message":
64 mynode[f]['c'] = read_file(fn)
66 allnodes[dirname[rootlen:].strip("/")] = mynode
69 def Usage():
70 print("Usage: spider_ftp.py <ftp_root> <pickle_file>")
71 print("")
72 print("If <pickle_file> starts with http[s]://, the file will be uploaded")
73 print("to that URL instead of written to the filesystem.")
74 sys.exit(1)
77 if len(sys.argv) != 3:
78 Usage()
80 parse_directory(sys.argv[1], len(sys.argv[1]))
82 if sys.argv[2].startswith("http://") or sys.argv[2].startswith("https://"):
83 r = requests.put(
84 sys.argv[2],
85 data=pickle.dumps(allnodes),
86 headers={
87 'Content-type': 'application/octet-stream',
88 'Host': 'www.postgresql.org',
91 if r.status_code != 200:
92 print("Failed to upload, code: %s" % r.status_code)
93 sys.exit(1)
94 elif r.text != "NOT CHANGED" and r.text != "OK":
95 print("Failed to upload: %s" % x)
96 sys.exit(1)
97 else:
98 f = open(sys.argv[2] + ".tmp", "wb")
99 pickle.dump(allnodes, f)
100 f.close()
101 os.rename(sys.argv[2] + ".tmp", sys.argv[2])