missing project/build files
[client-tools.git] / src / external / 3rd / library / libxml / genUnicode.py
blobc5668fdcf3f711c0e0cc109cb14eb83f3bb2a2c2
1 #!/usr/bin/python -u
2 import sys
3 import string
4 import time
6 sources = "Blocks-4.txt UnicodeData-3.1.0.txt"
8 try:
9 blocks = open("Blocks-4.txt", "r")
10 except:
11 print "Missing Blocks-4.txt, aborting ..."
12 sys.exit(1)
14 BlockNames = {}
15 for line in blocks.readlines():
16 if line[0] == '#':
17 continue
18 line = string.strip(line)
19 if line == '':
20 continue
21 try:
22 fields = string.split(line, ';')
23 range = string.strip(fields[0])
24 (start, end) = string.split(range, "..")
25 name = string.strip(fields[1])
26 name = string.replace(name, ' ', '')
27 except:
28 print "Failed to process line: %s" % (line)
29 continue
30 BlockNames[name] = ("0x"+start, "0x"+end)
31 blocks.close()
32 print "Parsed %d blocks descriptions" % (len(BlockNames.keys()))
34 try:
35 data = open("UnicodeData-3.1.0.txt", "r")
36 except:
37 print "Missing UnicodeData-3.1.0.txt, aborting ..."
38 sys.exit(1)
40 nbchar = 0;
41 Categories = {}
42 for line in data.readlines():
43 if line[0] == '#':
44 continue
45 line = string.strip(line)
46 if line == '':
47 continue
48 try:
49 fields = string.split(line, ';')
50 point = string.strip(fields[0])
51 value = 0
52 while point != '':
53 value = value * 16
54 if point[0] >= '0' and point[0] <= '9':
55 value = value + ord(point[0]) - ord('0')
56 elif point[0] >= 'A' and point[0] <= 'F':
57 value = value + 10 + ord(point[0]) - ord('A')
58 elif point[0] >= 'a' and point[0] <= 'f':
59 value = value + 10 + ord(point[0]) - ord('a')
60 point = point[1:]
61 name = fields[2]
62 except:
63 print "Failed to process line: %s" % (line)
64 continue
66 nbchar = nbchar + 1
67 try:
68 Categories[name].append(value)
69 except:
70 try:
71 Categories[name] = [value]
72 except:
73 print "Failed to process line: %s" % (line)
74 try:
75 Categories[name[0]].append(value)
76 except:
77 try:
78 Categories[name[0]] = [value]
79 except:
80 print "Failed to process line: %s" % (line)
82 blocks.close()
83 print "Parsed %d char generating %d categories" % (nbchar, len(Categories.keys()))
84 #reduce the number list into ranges
85 for cat in Categories.keys():
86 list = Categories[cat]
87 start = -1
88 prev = -1
89 end = -1
90 ranges = []
91 for val in list:
92 if start == -1:
93 start = val
94 prev = val
95 continue
96 elif val == prev + 1:
97 prev = val
98 continue
99 elif prev == start:
100 ranges.append((prev, prev))
101 start = val
102 prev = val
103 continue
104 else:
105 ranges.append((start, prev))
106 start = val
107 prev = val
108 continue
109 if prev == start:
110 ranges.append((prev, prev))
111 else:
112 ranges.append((start, prev))
113 Categories[cat] = ranges
116 # Generate the resulting files
118 try:
119 header = open("xmlunicode.h", "w")
120 except:
121 print "Failed to open xmlunicode.h"
122 sys.exit(1)
124 try:
125 output = open("xmlunicode.c", "w")
126 except:
127 print "Failed to open xmlunicode.c"
128 sys.exit(1)
130 date = time.asctime(time.localtime(time.time()))
132 header.write(
133 """/*
134 * xmlunicode.h: this header exports interfaces for the Unicode character APIs
136 * This file is automatically generated from the
137 * UCS description files of the Unicode Character Database
138 * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
139 * using the genUnicode.py Python script.
141 * Generation date: %s
142 * Sources: %s
143 * Daniel Veillard <veillard@redhat.com>
146 #ifndef __XML_UNICODE_H__
147 #define __XML_UNICODE_H__
149 #ifdef __cplusplus
150 extern "C" {
151 #endif
153 """ % (date, sources));
154 output.write(
155 """/*
156 * xmlunicode.c: this module implements the Unicode character APIs
158 * This file is automatically generated from the
159 * UCS description files of the Unicode Character Database
160 * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
161 * using the genUnicode.py Python script.
163 * Generation date: %s
164 * Sources: %s
165 * Daniel Veillard <veillard@redhat.com>
168 #define IN_LIBXML
169 #include "libxml.h"
171 #ifdef LIBXML_UNICODE_ENABLED
173 #include <string.h>
174 #include <libxml/xmlversion.h>
175 #include <libxml/xmlunicode.h>
177 """ % (date, sources));
179 keys = BlockNames.keys()
180 keys.sort()
181 for block in keys:
182 (start, end) = BlockNames[block]
183 name = string.replace(block, '-', '')
184 header.write("int\txmlUCSIs%s\t(int code);\n" % name)
185 output.write("/**\n * xmlUCSIs%s:\n * @code: UCS code point\n" % (name))
186 output.write(" *\n * Check whether the character is part of %s UCS Block\n"%
187 (block))
188 output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
189 output.write("int\nxmlUCSIs%s(int code) {\n" % name)
190 output.write(" return((code >= %s) && (code <= %s));\n" % (start, end))
191 output.write("}\n\n")
193 header.write("\nint\txmlUCSIsBlock\t(int code,\n\t\t\t const char *block);\n\n")
194 output.write("/**\n * xmlUCSIsBlock:\n * @code: UCS code point\n")
195 output.write(" * @block: UCS block name\n")
196 output.write(" *\n * Check whether the caracter is part of the UCS Block\n")
197 output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown block\n */\n");
198 output.write("int\nxmlUCSIsBlock(int code, const char *block) {\n")
199 keys = BlockNames.keys()
200 keys.sort()
201 for block in keys:
202 name = string.replace(block, '-', '')
203 output.write(" if (!strcmp(block, \"%s\"))\n return(xmlUCSIs%s(code));\n" %
204 (block, name));
205 output.write(" return(-1);\n}\n\n")
208 keys = Categories.keys()
209 keys.sort()
210 for name in keys:
211 ranges = Categories[name]
212 header.write("int\txmlUCSIsCat%s\t(int code);\n" % name)
213 output.write("/**\n * xmlUCSIsCat%s:\n * @code: UCS code point\n" % (name))
214 output.write(" *\n * Check whether the character is part of %s UCS Category\n"%
215 (name))
216 output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
217 output.write("int\nxmlUCSIsCat%s(int code) {\n" % name)
218 start = 1
219 for range in ranges:
220 (begin, end) = range;
221 if start:
222 output.write(" return(");
223 start = 0
224 else:
225 output.write(" ||\n ");
226 if (begin == end):
227 output.write("(code == %s)" % (hex(begin)))
228 else:
229 output.write("((code >= %s) && (code <= %s))" % (
230 hex(begin), hex(end)))
231 output.write(");\n}\n\n")
233 header.write("\nint\txmlUCSIsCat\t(int code,\n\t\t\t const char *cat);\n")
234 output.write("/**\n * xmlUCSIsCat:\n * @code: UCS code point\n")
235 output.write(" * @cat: UCS Category name\n")
236 output.write(" *\n * Check whether the caracter is part of the UCS Category\n")
237 output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown category\n */\n");
238 output.write("int\nxmlUCSIsCat(int code, const char *cat) {\n")
239 keys = Categories.keys()
240 keys.sort()
241 for name in keys:
242 output.write(" if (!strcmp(cat, \"%s\"))\n return(xmlUCSIsCat%s(code));\n" %
243 (name, name));
244 output.write(" return(-1);\n}\n\n")
246 header.write("""
247 #ifdef __cplusplus
249 #endif
250 #endif /* __XML_UNICODE_H__ */
251 """);
252 output.write("""
253 #endif /* LIBXML_UNICODE_ENABLED */
254 """);
255 header.close()
256 output.close()