2 from unicode_parse_common
import *
4 # http://www.unicode.org/Public/5.1.0/ucd/extracted/DerivedGeneralCategory.txt
6 category_to_harfbuzz
= {
7 'Mn': 'HB_Mark_NonSpacing',
8 'Mc': 'HB_Mark_SpacingCombining',
9 'Me': 'HB_Mark_Enclosing',
11 'Nd': 'HB_Number_DecimalDigit',
12 'Nl': 'HB_Number_Letter',
13 'No': 'HB_Number_Other',
15 'Zs': 'HB_Separator_Space',
16 'Zl': 'HB_Separator_Line',
17 'Zp': 'HB_Separator_Paragraph',
19 'Cc': 'HB_Other_Control',
20 'Cf': 'HB_Other_Format',
21 'Cs': 'HB_Other_Surrogate',
22 'Co': 'HB_Other_PrivateUse',
23 'Cn': 'HB_Other_NotAssigned',
25 'Lu': 'HB_Letter_Uppercase',
26 'Ll': 'HB_Letter_Lowercase',
27 'Lt': 'HB_Letter_Titlecase',
28 'Lm': 'HB_Letter_Modifier',
29 'Lo': 'HB_Letter_Other',
31 'Pc': 'HB_Punctuation_Connector',
32 'Pd': 'HB_Punctuation_Dash',
33 'Ps': 'HB_Punctuation_Open',
34 'Pe': 'HB_Punctuation_Close',
35 'Pi': 'HB_Punctuation_InitialQuote',
36 'Pf': 'HB_Punctuation_FinalQuote',
37 'Po': 'HB_Punctuation_Other',
39 'Sm': 'HB_Symbol_Math',
40 'Sc': 'HB_Symbol_Currency',
41 'Sk': 'HB_Symbol_Modifier',
42 'So': 'HB_Symbol_Other',
45 def main(infile
, outfile
):
46 ranges
= unicode_file_parse(infile
, category_to_harfbuzz
)
47 ranges
= sort_and_merge(ranges
)
49 print >>outfile
, '// Generated from Unicode script tables\n'
50 print >>outfile
, '#ifndef CATEGORY_PROPERTIES_H_'
51 print >>outfile
, '#define CATEGORY_PROPERTIES_H_\n'
52 print >>outfile
, '#include <stdint.h>'
53 print >>outfile
, '#include "harfbuzz-external.h"\n'
54 print >>outfile
, 'struct category_property {'
55 print >>outfile
, ' uint32_t range_start;'
56 print >>outfile
, ' uint32_t range_end;'
57 print >>outfile
, ' HB_CharCategory category;'
58 print >>outfile
, '};\n'
59 print >>outfile
, 'static const struct category_property category_properties[] = {'
60 for (start
, end
, value
) in ranges
:
61 print >>outfile
, ' {0x%x, 0x%x, %s},' % (start
, end
, value
)
62 print >>outfile
, '};\n'
63 print >>outfile
, 'static const unsigned category_properties_count = %d;\n' % len(ranges
)
64 print >>outfile
, '#endif // CATEGORY_PROPERTIES_H_'
66 if __name__
== '__main__':
67 if len(sys
.argv
) != 3:
68 print 'Usage: %s <input .txt> <output .h>' % sys
.argv
[0]
70 main(file(sys
.argv
[1], 'r'), file(sys
.argv
[2], 'w+'))