6 sources
= "Blocks-4.txt UnicodeData-3.1.0.txt"
9 blocks
= open("Blocks-4.txt", "r")
11 print "Missing Blocks-4.txt, aborting ..."
15 for line
in blocks
.readlines():
18 line
= string
.strip(line
)
22 fields
= string
.split(line
, ';')
23 range = string
.strip(fields
[0])
24 (start
, end
) = string
.split(range, "..")
25 name
= string
.strip(fields
[1])
26 name
= string
.replace(name
, ' ', '')
28 print "Failed to process line: %s" % (line
)
30 BlockNames
[name
] = ("0x"+start
, "0x"+end
)
32 print "Parsed %d blocks descriptions" % (len(BlockNames
.keys()))
35 data
= open("UnicodeData-3.1.0.txt", "r")
37 print "Missing UnicodeData-3.1.0.txt, aborting ..."
42 for line
in data
.readlines():
45 line
= string
.strip(line
)
49 fields
= string
.split(line
, ';')
50 point
= string
.strip(fields
[0])
54 if point
[0] >= '0' and point
[0] <= '9':
55 value
= value
+ ord(point
[0]) - ord('0')
56 elif point
[0] >= 'A' and point
[0] <= 'F':
57 value
= value
+ 10 + ord(point
[0]) - ord('A')
58 elif point
[0] >= 'a' and point
[0] <= 'f':
59 value
= value
+ 10 + ord(point
[0]) - ord('a')
63 print "Failed to process line: %s" % (line
)
68 Categories
[name
].append(value
)
71 Categories
[name
] = [value
]
73 print "Failed to process line: %s" % (line
)
75 Categories
[name
[0]].append(value
)
78 Categories
[name
[0]] = [value
]
80 print "Failed to process line: %s" % (line
)
83 print "Parsed %d char generating %d categories" % (nbchar
, len(Categories
.keys()))
84 #reduce the number list into ranges
85 for cat
in Categories
.keys():
86 list = Categories
[cat
]
100 ranges
.append((prev
, prev
))
105 ranges
.append((start
, prev
))
110 ranges
.append((prev
, prev
))
112 ranges
.append((start
, prev
))
113 Categories
[cat
] = ranges
116 # Generate the resulting files
119 header
= open("xmlunicode.h", "w")
121 print "Failed to open xmlunicode.h"
125 output
= open("xmlunicode.c", "w")
127 print "Failed to open xmlunicode.c"
130 date
= time
.asctime(time
.localtime(time
.time()))
134 * xmlunicode.h: this header exports interfaces for the Unicode character APIs
136 * This file is automatically generated from the
137 * UCS description files of the Unicode Character Database
138 * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
139 * using the genUnicode.py Python script.
141 * Generation date: %s
143 * Daniel Veillard <veillard@redhat.com>
146 #ifndef __XML_UNICODE_H__
147 #define __XML_UNICODE_H__
153 """ % (date
, sources
));
156 * xmlunicode.c: this module implements the Unicode character APIs
158 * This file is automatically generated from the
159 * UCS description files of the Unicode Character Database
160 * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
161 * using the genUnicode.py Python script.
163 * Generation date: %s
165 * Daniel Veillard <veillard@redhat.com>
171 #ifdef LIBXML_UNICODE_ENABLED
174 #include <libxml/xmlversion.h>
175 #include <libxml/xmlunicode.h>
177 """ % (date
, sources
));
179 keys
= BlockNames
.keys()
182 (start
, end
) = BlockNames
[block
]
183 name
= string
.replace(block
, '-', '')
184 header
.write("int\txmlUCSIs%s\t(int code);\n" % name
)
185 output
.write("/**\n * xmlUCSIs%s:\n * @code: UCS code point\n" % (name
))
186 output
.write(" *\n * Check whether the character is part of %s UCS Block\n"%
188 output
.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
189 output
.write("int\nxmlUCSIs%s(int code) {\n" % name
)
190 output
.write(" return((code >= %s) && (code <= %s));\n" % (start
, end
))
191 output
.write("}\n\n")
193 header
.write("\nint\txmlUCSIsBlock\t(int code,\n\t\t\t const char *block);\n\n")
194 output
.write("/**\n * xmlUCSIsBlock:\n * @code: UCS code point\n")
195 output
.write(" * @block: UCS block name\n")
196 output
.write(" *\n * Check whether the caracter is part of the UCS Block\n")
197 output
.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown block\n */\n");
198 output
.write("int\nxmlUCSIsBlock(int code, const char *block) {\n")
199 keys
= BlockNames
.keys()
202 name
= string
.replace(block
, '-', '')
203 output
.write(" if (!strcmp(block, \"%s\"))\n return(xmlUCSIs%s(code));\n" %
205 output
.write(" return(-1);\n}\n\n")
208 keys
= Categories
.keys()
211 ranges
= Categories
[name
]
212 header
.write("int\txmlUCSIsCat%s\t(int code);\n" % name
)
213 output
.write("/**\n * xmlUCSIsCat%s:\n * @code: UCS code point\n" % (name
))
214 output
.write(" *\n * Check whether the character is part of %s UCS Category\n"%
216 output
.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
217 output
.write("int\nxmlUCSIsCat%s(int code) {\n" % name
)
220 (begin
, end
) = range;
222 output
.write(" return(");
225 output
.write(" ||\n ");
227 output
.write("(code == %s)" % (hex(begin
)))
229 output
.write("((code >= %s) && (code <= %s))" % (
230 hex(begin
), hex(end
)))
231 output
.write(");\n}\n\n")
233 header
.write("\nint\txmlUCSIsCat\t(int code,\n\t\t\t const char *cat);\n")
234 output
.write("/**\n * xmlUCSIsCat:\n * @code: UCS code point\n")
235 output
.write(" * @cat: UCS Category name\n")
236 output
.write(" *\n * Check whether the caracter is part of the UCS Category\n")
237 output
.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown category\n */\n");
238 output
.write("int\nxmlUCSIsCat(int code, const char *cat) {\n")
239 keys
= Categories
.keys()
242 output
.write(" if (!strcmp(cat, \"%s\"))\n return(xmlUCSIsCat%s(code));\n" %
244 output
.write(" return(-1);\n}\n\n")
250 #endif /* __XML_UNICODE_H__ */
253 #endif /* LIBXML_UNICODE_ENABLED */