[rendering] This simple trick didn't work...
[wikipediardware.git] / host-tools / scripts / fetch_data.py
blob0fc047e0854fa6dd5b6ceecea9b40df668e6b08f
1 #!/usr/bin/env python
3 import glob, os, sys, subprocess, signal, time
5 current_pid = None
7 class TimeOutException(Exception):
8 pass
11 def alarm_handler(signum, frame):
12 """
13 Kill the process with current_pid... this should help to get
14 run_command unstuck...
15 """
16 os.kill(current_pid, signal.SIGKILL)
18 def run_command(cmd):
19 """
20 Execute a command. If the command is not finished with 120 seconds
21 the program will be killed with SIGKILL. If the return value of the
22 program is smaller than < 0 a timeout exception will be raised, if it
23 is > 0 a CalledProcessError will be raised.
24 """
25 global current_pid
27 # Launch and set an alarm... alarm_hnalder will kill
28 # current_pid
29 signal.alarm(120)
30 proc = subprocess.Popen(cmd)
31 current_pid = proc.pid
32 while True:
33 try:
34 pid, sts = os.waitpid(proc.pid, 0)
35 break
36 except OSError:
37 pass
39 # cancel timer and error checking
40 signal.alarm(0)
41 if sts > 0:
42 raise subprocess.CalledProcessError(sts, cmd)
43 elif sts < 0:
44 raise TimeOutException()
46 def execute(hash, url):
47 print "Getting %s" % url
48 file_base = os.path.join("articles", hash[0], hash[1:3])
49 render_text = "%s.blib" % os.path.join(file_base, hash)
50 render_link = "%s.link" % os.path.join(file_base, hash)
52 run_command(["GtkLauncher", url])
53 run_command(["mkdir", "-p", file_base])
54 run_command(["mv", "-f", "render_text.blib", render_text])
55 run_command(["mv", "-f", "render_text.links", render_link])
58 # main execution
59 job_dir = sys.argv[1]
60 os.chdir(job_dir)
61 try:
62 os.mkdir("articles")
63 except:
64 pass
66 display = 99 - int(job_dir.rsplit('/', 1)[1])
67 os.system("Xvfb :%d -noreset -ac &" % display)
68 os.environ['DISPLAY'] = ":%d" % display
70 # wait for the x server to start
71 time.sleep(6)
73 failed_urls = open("failed.urls", "a")
75 signal.signal(signal.SIGALRM, alarm_handler)
78 for work in glob.glob("*.work"):
79 if os.path.exists("%s.complete" % work):
80 print "Skipping %s as it is completed." % work
81 continue
83 print "Opening %s" % work
84 file = open(work)
85 for line in file:
86 data = line[:-1].split(" ", 1)
87 try:
88 execute(data[0], data[1])
89 except subprocess.CalledProcessError:
90 print >> failed_urls, "ProcessError: %s %s" % (data[0], data[1])
91 except TimeOutException:
92 print >> failed_urls, "Timeout: %s %s" % (data[0], data[1])
94 # mark it as done
95 subprocess.check_call(["touch", "%s.complete" % work])