Automatic date update in version.in
[binutils-gdb.git] / gdb / ada-unicode.py
blob4c4986b84cb9886984bb18b943a313a8b92e9f2c
1 #!/usr/bin/env python3
3 # Generate Unicode case-folding table for Ada.
5 # Copyright (C) 2022 Free Software Foundation, Inc.
7 # This file is part of GDB.
9 # This program is free software; you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either version 3 of the License, or
12 # (at your option) any later version.
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
19 # You should have received a copy of the GNU General Public License
20 # along with this program. If not, see <http://www.gnu.org/licenses/>.
22 # This generates the ada-casefold.h header.
23 # Usage:
24 # python ada-unicode.py
26 import gdbcopyright
28 # The start of the current range of case-conversions we are
29 # processing. If RANGE_START is None, then we're outside of a range.
30 range_start = None
31 # End of the current range.
32 range_end = None
33 # The delta between RANGE_START and the upper-case variant of that
34 # character.
35 upper_delta = None
36 # The delta between RANGE_START and the lower-case variant of that
37 # character.
38 lower_delta = None
40 # All the ranges found and completed so far.
41 # Each entry is a tuple of the form (START, END, UPPER_DELTA, LOWER_DELTA).
42 all_ranges = []
45 def finish_range():
46 global range_start
47 global range_end
48 global upper_delta
49 global lower_delta
50 if range_start is not None:
51 all_ranges.append((range_start, range_end, upper_delta, lower_delta))
52 range_start = None
53 range_end = None
54 upper_delta = None
55 lower_delta = None
58 def process_codepoint(val):
59 global range_start
60 global range_end
61 global upper_delta
62 global lower_delta
63 c = chr(val)
64 low = c.lower()
65 up = c.upper()
66 # U+00DF ("LATIN SMALL LETTER SHARP S", aka eszsett) traditionally
67 # upper-cases to the two-character string "SS" (the capital form
68 # is a relatively recent addition -- 2017). Our simple scheme
69 # can't handle this, so we skip it. Also, because our approach
70 # just represents runs of characters with identical folding
71 # deltas, this change must terminate the current run.
72 if (c == low and c == up) or len(low) != 1 or len(up) != 1:
73 finish_range()
74 return
75 updelta = ord(up) - val
76 lowdelta = ord(low) - val
77 if range_start is not None and (updelta != upper_delta or lowdelta != lower_delta):
78 finish_range()
79 if range_start is None:
80 range_start = val
81 upper_delta = updelta
82 lower_delta = lowdelta
83 range_end = val
86 for c in range(0, 0x10FFFF):
87 process_codepoint(c)
89 with open("ada-casefold.h", "w") as f:
90 print(
91 gdbcopyright.copyright("ada-unicode.py", "UTF-32 case-folding for GDB"),
92 file=f,
94 for r in all_ranges:
95 print(f" {{{r[0]}, {r[1]}, {r[2]}, {r[3]}}},", file=f)