[rendering] All hail pyflakes... fix the name
[wikipediardware.git] / host-tools / rendering / generate_code.py
blob3da139cf70f8cf1a285a71a51292f1b1141a23f5
1 #!/usr/bin/env python
2 """
3 A code to convert a .blib file to something can be easily compressible
4 and is not as redundant as the .blib format.
6 Copyright (C) 2008, 2009 Holger Hans Peter Freyther <zecke@openmoko.org>
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 """
22 import sys
23 import fontmap
24 import glyphmap
25 import textrun
26 import optparse
27 import os
28 import glob
29 import struct
30 import pickle
32 try:
33 import lzo
34 imported_lzo = True
35 except:
36 print "WARNING: LZO not imported performance will be degraded"
37 imported_lzo = False
39 def prepare_run(text_runs):
40 """Count the occurences of fonts, glyphs and position"""
42 # Sort by y position
43 text_runs.sort(textrun.TextRun.cmp)
44 return text_runs
46 def map_glyph(glyphmap, font_id, glyph_id):
47 """
48 gen_font_file.py has repaced some glyphs to create a more
49 dense datastructure. We have to apply the glyph remapping
50 for the given font to identify the right font. If the glyph
51 is not remapped we will use the original glyph-id.
52 """
53 try:
54 glyphmap[font_id][glyph_id]
55 except KeyError:
56 return glyph_id
58 def write_to_file(text_runs, fonts, glyphmap, auto_kern_bit):
59 """
60 A function saving the text runs and hoping autokern will do its job
62 """
64 def write_pending_bit(output, font_id, run):
65 """
66 The text run is sorted by paragrah and all glyphs of
67 one paragraph are on the same line and have roughly the
68 same height.
69 """
71 if run.first_y == 0:
72 output.append(",%d," % (run.first_x))
73 else:
74 output.append(",%d_%d," % (run.first_x, run.first_y))
76 list = []
77 for glyph in run.glyphs:
78 list.append(map_glyph(glyphmap, font_id, glyph['glyph']))
79 output.append("-".join(list))
81 last_font = None
82 output = []
83 for text_run in text_runs:
84 # we migt have a new font now
85 font = text_run.font
86 if last_font != font:
87 output.append("f%s" % fonts[font])
88 last_font = font
90 write_pending_bit(output, fonts[last_font], text_run)
92 text = "".join(output)
93 if imported_lzo:
94 compressed = lzo.compress(text)
95 else:
96 compressed = text
98 auto_kern_bit.write(struct.pack("<I", len(compressed)))
99 auto_kern_bit.write(compressed)
102 def parse():
103 parser = optparse.OptionParser(version = "Generate Huffman code utility 0.1",
104 usage = """%prog [options] input_file
105 Two modes are supported. Single conversion or batch conversion""")
107 parser.add_option("-f", "--fontmap", help = "specify the fontmap.map to use",
108 action = "store", dest = "fontmap", default = "fontmap.map")
109 parser.add_option("-g", "--glyphmap", help = "specify the glyphmap.map to use",
110 action = "store", dest = "glyphmap", default = "glyphmap.map")
111 parser.add_option("-o", "--output", help = "Output file",
112 action = "store", dest = "output_file", default = "huffmaned.cde")
113 parser.add_option("-b", "--batch", help = "start a batch job",
114 action = "store_true", dest = "batch", default = False)
115 parser.add_option("-a", "--batch-output", help = "Output file for the batch",
116 action = "store", dest = "output_batch_file", default = "wikipedia.set")
117 parser.add_option("-c", "--batch-offset", help = "File with offsets of articles",
118 action = "store", dest = "output_marker", default = "wikipedia.offset")
119 parser.add_option("-e", "--error-file", help = "File where to put errors",
120 action = "store", dest = "error_file", default = "failed_blib.files")
121 parser.add_option("-j", "--job", help = "specify the job number",
122 action = "store", dest = "jobnumber", default = "1")
123 parser.add_option("-d", "--dict", help = "No duplicates... generate a pickled dict...",
124 action = "store", dest ="dictionary", default = "dictionary.dict")
126 opts, args = parser.parse_args(sys.argv)
127 opts.jobnumber = int(opts.jobnumber)
128 return opts, args
131 options, args = parse()
133 # Import Psyco if available
134 try:
135 import psyco
136 psyco.full()
137 except ImportError:
138 pass
141 if not options.batch:
142 glyphs = textrun.load(open(args[1]))
143 text_runs = textrun.generate_text_runs(glyphs, 240)
144 prepare_run(text_runs)
145 fontmap = fontmap.load(options.fontmap)
146 glyphmap = glyphmap.load(options.glyphmap)
147 auto_kern_bit = open(options.output_file, "w")
148 write_to_file(text_runs, fontmap, glyphmap, auto_kern_bit)
149 else:
150 # We got pointed to a list of directories and will collect the
151 # the 'work' files from there and will pick up the objects and then
152 # do some work on it.
153 offset_marker = open(options.output_marker, "w")
154 batch_output = open(options.output_batch_file, "w")
155 fontmap = fontmap.load(options.fontmap)
156 glyphmap = glyphmap.load(options.glyphmap)
157 failed = open(options.error_file, "a")
158 try:
159 dict = pickle.load(open(options.dictionary))
160 except:
161 dict = {}
163 def convert(base_name, hash):
165 Convert a single file
167 if hash in dict:
168 return
169 else:
170 dict[hash] = 1
172 file_name = os.path.join(base_name, "articles", hash[0], hash[1:3], hash)
173 file_name = "%s.blib" % file_name
174 glyphs = textrun.load(open(file_name, 'rb'))
175 text_runs = textrun.generate_text_runs(glyphs, 240)
176 prepare_run(text_runs)
178 # write the offset to another file...
179 print >> offset_marker, \
180 "INSERT INTO Offsets (offset, file, hash) VALUES (%d, %d, '%s');" % (batch_output.tell(), options.jobnumber, hash)
181 write_to_file(text_runs, fontmap, glyphmap, batch_output)
183 print >> offset_marker, "BEGIN TRANSACTION;"
184 for arg in range(1, len(args)):
185 for work in glob.glob(os.path.join(args[arg], "*.work")):
186 print "Working on %s" % work
187 file = open(work)
188 for line in file:
189 data = line[:-1].split(" ", 1)
190 try:
191 convert(args[arg], data[0])
192 except:
193 print >> failed, "Error: %s from %s" % (data[0], work)
194 print >> offset_marker, "COMMIT;"
196 # store the state
197 pickle.dump(dict, open(options.dictionary, "w"))
199 print "Done. Have fun!"