src/external/3rd/library/libxml/genUnicode.py

   1 #!/usr/bin/python -u
   2 import sys
   3 import string
   4 import time
   5
   6 sources = "Blocks-4.txt UnicodeData-3.1.0.txt"
   7
   8 try:
   9     blocks = open("Blocks-4.txt", "r")
  10 except:
  11     print "Missing Blocks-4.txt, aborting ..."
  12     sys.exit(1)
  13
  14 BlockNames = {}
  15 for line in blocks.readlines():
  16     if line[0] == '#':
  17         continue
  18     line = string.strip(line)
  19     if line == '':
  20         continue
  21     try:
  22         fields = string.split(line, ';')
  23         range = string.strip(fields[0])
  24         (start, end) = string.split(range, "..")
  25         name = string.strip(fields[1])
  26         name = string.replace(name, ' ', '')
  27     except:
  28         print "Failed to process line: %s" % (line)
  29         continue
  30     BlockNames[name] = ("0x"+start, "0x"+end)
  31 blocks.close()
  32 print "Parsed %d blocks descriptions" % (len(BlockNames.keys()))
  33
  34 try:
  35     data = open("UnicodeData-3.1.0.txt", "r")
  36 except:
  37     print "Missing UnicodeData-3.1.0.txt, aborting ..."
  38     sys.exit(1)
  39
  40 nbchar = 0;
  41 Categories = {}
  42 for line in data.readlines():
  43     if line[0] == '#':
  44         continue
  45     line = string.strip(line)
  46     if line == '':
  47         continue
  48     try:
  49         fields = string.split(line, ';')
  50         point = string.strip(fields[0])
  51         value = 0
  52         while point != '':
  53             value = value * 16
  54             if point[0] >= '0' and point[0] <= '9':
  55                 value = value + ord(point[0]) - ord('0')
  56             elif point[0] >= 'A' and point[0] <= 'F':
  57                 value = value + 10 + ord(point[0]) - ord('A')
  58             elif point[0] >= 'a' and point[0] <= 'f':
  59                 value = value + 10 + ord(point[0]) - ord('a')
  60             point = point[1:]
  61         name = fields[2]
  62     except:
  63         print "Failed to process line: %s" % (line)
  64         continue
  65
  66     nbchar = nbchar + 1
  67     try:
  68         Categories[name].append(value)
  69     except:
  70         try:
  71             Categories[name] = [value]
  72         except:
  73             print "Failed to process line: %s" % (line)
  74     try:
  75         Categories[name[0]].append(value)
  76     except:
  77         try:
  78             Categories[name[0]] = [value]
  79         except:
  80             print "Failed to process line: %s" % (line)
  81
  82 blocks.close()
  83 print "Parsed %d char generating %d categories" % (nbchar, len(Categories.keys()))
  84 #reduce the number list into ranges
  85 for cat in Categories.keys():
  86     list = Categories[cat]
  87     start = -1
  88     prev = -1
  89     end = -1
  90     ranges = []
  91     for val in list:
  92         if start == -1:
  93             start = val
  94             prev = val
  95             continue
  96         elif val == prev + 1:
  97             prev = val
  98             continue
  99         elif prev == start:
 100             ranges.append((prev, prev))
 101             start = val
 102             prev = val
 103             continue
 104         else:
 105             ranges.append((start, prev))
 106             start = val
 107             prev = val
 108             continue
 109     if prev == start:
 110         ranges.append((prev, prev))
 111     else:
 112         ranges.append((start, prev))
 113     Categories[cat] = ranges
 114
 115 #
 116 # Generate the resulting files
 117 #
 118 try:
 119     header = open("xmlunicode.h", "w")
 120 except:
 121     print "Failed to open xmlunicode.h"
 122     sys.exit(1)
 123
 124 try:
 125     output = open("xmlunicode.c", "w")
 126 except:
 127     print "Failed to open xmlunicode.c"
 128     sys.exit(1)
 129
 130 date = time.asctime(time.localtime(time.time()))
 131
 132 header.write(
 133 """/*
 134  * xmlunicode.h: this header exports interfaces for the Unicode character APIs
 135  *
 136  * This file is automatically generated from the
 137  * UCS description files of the Unicode Character Database
 138  * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
 139  * using the genUnicode.py Python script.
 140  *
 141  * Generation date: %s
 142  * Sources: %s
 143  * Daniel Veillard <veillard@redhat.com>
 144  */
 145
 146 #ifndef __XML_UNICODE_H__
 147 #define __XML_UNICODE_H__
 148
 149 #ifdef __cplusplus
 150 extern "C" {
 151 #endif
 152
 153 """ % (date, sources));
 154 output.write(
 155 """/*
 156  * xmlunicode.c: this module implements the Unicode character APIs
 157  *
 158  * This file is automatically generated from the
 159  * UCS description files of the Unicode Character Database
 160  * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
 161  * using the genUnicode.py Python script.
 162  *
 163  * Generation date: %s
 164  * Sources: %s
 165  * Daniel Veillard <veillard@redhat.com>
 166  */
 167
 168 #define IN_LIBXML
 169 #include "libxml.h"
 170
 171 #ifdef LIBXML_UNICODE_ENABLED
 172
 173 #include <string.h>
 174 #include <libxml/xmlversion.h>
 175 #include <libxml/xmlunicode.h>
 176
 177 """ % (date, sources));
 178
 179 keys = BlockNames.keys()
 180 keys.sort()
 181 for block in keys:
 182     (start, end) = BlockNames[block]
 183     name = string.replace(block, '-', '')
 184     header.write("int\txmlUCSIs%s\t(int code);\n" % name)
 185     output.write("/**\n * xmlUCSIs%s:\n * @code: UCS code point\n" % (name))
 186     output.write(" *\n * Check whether the character is part of %s UCS Block\n"%
 187                  (block))
 188     output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
 189     output.write("int\nxmlUCSIs%s(int code) {\n" % name)
 190     output.write("    return((code >= %s) && (code <= %s));\n" % (start, end))
 191     output.write("}\n\n")
 192
 193 header.write("\nint\txmlUCSIsBlock\t(int code,\n\t\t\t const char *block);\n\n")
 194 output.write("/**\n * xmlUCSIsBlock:\n * @code: UCS code point\n")
 195 output.write(" * @block: UCS block name\n")
 196 output.write(" *\n * Check whether the caracter is part of the UCS Block\n")
 197 output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown block\n */\n");
 198 output.write("int\nxmlUCSIsBlock(int code, const char *block) {\n")
 199 keys = BlockNames.keys()
 200 keys.sort()
 201 for block in keys:
 202     name = string.replace(block, '-', '')
 203     output.write("    if (!strcmp(block, \"%s\"))\n        return(xmlUCSIs%s(code));\n" %
 204                  (block, name));
 205 output.write("    return(-1);\n}\n\n")
 206
 207
 208 keys = Categories.keys()
 209 keys.sort()
 210 for name in keys:
 211     ranges = Categories[name]
 212     header.write("int\txmlUCSIsCat%s\t(int code);\n" % name)
 213     output.write("/**\n * xmlUCSIsCat%s:\n * @code: UCS code point\n" % (name))
 214     output.write(" *\n * Check whether the character is part of %s UCS Category\n"%
 215                  (name))
 216     output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
 217     output.write("int\nxmlUCSIsCat%s(int code) {\n" % name)
 218     start = 1
 219     for range in ranges:
 220         (begin, end) = range;
 221         if start:
 222             output.write("    return(");
 223             start = 0
 224         else:
 225             output.write(" ||\n           ");
 226         if (begin == end):
 227             output.write("(code == %s)" % (hex(begin)))
 228         else:
 229             output.write("((code >= %s) && (code <= %s))" % (
 230                          hex(begin), hex(end)))
 231     output.write(");\n}\n\n")
 232
 233 header.write("\nint\txmlUCSIsCat\t(int code,\n\t\t\t const char *cat);\n")
 234 output.write("/**\n * xmlUCSIsCat:\n * @code: UCS code point\n")
 235 output.write(" * @cat: UCS Category name\n")
 236 output.write(" *\n * Check whether the caracter is part of the UCS Category\n")
 237 output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown category\n */\n");
 238 output.write("int\nxmlUCSIsCat(int code, const char *cat) {\n")
 239 keys = Categories.keys()
 240 keys.sort()
 241 for name in keys:
 242     output.write("    if (!strcmp(cat, \"%s\"))\n        return(xmlUCSIsCat%s(code));\n" %
 243                  (name, name));
 244 output.write("    return(-1);\n}\n\n")
 245
 246 header.write("""
 247 #ifdef __cplusplus
 248 }
 249 #endif
 250 #endif /* __XML_UNICODE_H__ */
 251 """);
 252 output.write("""
 253 #endif /* LIBXML_UNICODE_ENABLED */
 254 """);
 255 header.close()
 256 output.close()