2 # Copyright (C) 1998, 1999 Tom Tromey
3 # Copyright (C) 2001 Red Hat Software
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 2, or (at your option)
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, see <http://www.gnu.org/licenses/>.
19 gen-casefold-txt.py - Generate test cases for casefolding from Unicode data.
20 See http://www.unicode.org/Public/UNIDATA/UnicodeCharacterDatabase.html
22 I consider the output of this program to be unrestricted.
31 parser
= argparse
.ArgumentParser(
32 description
="Generate test cases for casefolding from Unicode data")
33 parser
.add_argument("UNICODE-VERSION")
34 parser
.add_argument("CaseFolding.txt")
35 args
= parser
.parse_args(argv
[1:])
36 version
= getattr(args
, "UNICODE-VERSION")
37 filename
= getattr(args
, "CaseFolding.txt")
40 # Test cases generated from Unicode {} data
41 # by gen-casefold-txt.py. Do not edit.
43 # Some special hand crafted tests
47 # Now the automatic tests
50 # Names of fields in the CaseFolding table
51 CODE
, STATUS
, MAPPING
= range(3)
53 with
open(filename
, encoding
="utf-8") as fileobj
:
55 # strip comments and skip empty lines
56 line
= line
.split("#", 1)[0].strip()
60 fields
= [f
.strip() for f
in line
.split(";", 3)[:3]]
63 "Entry for %s has wrong number of fields (%d)" % (
64 fields
[CODE
], len(fields
)))
66 status
= fields
[STATUS
]
67 # skip simple and Turkic mappings
71 code
= chr(int(fields
[CODE
], 16))
73 [chr(int(v
, 16)) for v
in fields
[MAPPING
].split()])
74 print("{}\t{}".format(code
, values
))
77 if __name__
== "__main__":
78 sys
.exit(main(sys
.argv
))