Fix tg_termpos1 for 64-bit termpos
[xapian.git] / xapian-applications / omega / gen-namedents
blob78d6154cbd36180e6a112fd8ad25648ec6d37dc8
1 #!/usr/bin/perl
2 use strict;
3 use warnings;
4 my $copyright = <<'EOF';
5 /* Copyright (C) 2006,2007,2012,2013,2015,2016 Olly Betts
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to
9 * deal in the Software without restriction, including without limitation the
10 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
11 * sell copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * IN THE SOFTWARE.
25 EOF
27 use Tokeniseise;
29 my $srcdir = shift @ARGV;
31 my $hdr = Tokeniseise->new('namedents.h', 'Map HTML entity name to Unicode codepoint', $copyright, 'OMEGA_INCLUDED_NAMEDENTS_H', 'named_ent', 2);
32 # Names and values from: "Character entity references in HTML 4"
33 # https://www.w3.org/TR/html4/sgml/entities.html
34 $hdr->append('static const unsigned named_ent_codepoint[] = {');
35 for my $ref (
36 [ "quot", 34 ],
37 [ "amp", 38 ],
38 [ "apos", 39 ], # Not in HTML 4 list but used in OpenOffice XML.
39 [ "lt", 60 ],
40 [ "gt", 62 ],
41 [ "nbsp", 160 ],
42 [ "iexcl", 161 ],
43 [ "cent", 162 ],
44 [ "pound", 163 ],
45 [ "curren", 164 ],
46 [ "yen", 165 ],
47 [ "brvbar", 166 ],
48 [ "sect", 167 ],
49 [ "uml", 168 ],
50 [ "copy", 169 ],
51 [ "ordf", 170 ],
52 [ "laquo", 171 ],
53 [ "not", 172 ],
54 [ "shy", 173 ],
55 [ "reg", 174 ],
56 [ "macr", 175 ],
57 [ "deg", 176 ],
58 [ "plusmn", 177 ],
59 [ "sup2", 178 ],
60 [ "sup3", 179 ],
61 [ "acute", 180 ],
62 [ "micro", 181 ],
63 [ "para", 182 ],
64 [ "middot", 183 ],
65 [ "cedil", 184 ],
66 [ "sup1", 185 ],
67 [ "ordm", 186 ],
68 [ "raquo", 187 ],
69 [ "frac14", 188 ],
70 [ "frac12", 189 ],
71 [ "frac34", 190 ],
72 [ "iquest", 191 ],
73 [ "Agrave", 192 ],
74 [ "Aacute", 193 ],
75 [ "Acirc", 194 ],
76 [ "Atilde", 195 ],
77 [ "Auml", 196 ],
78 [ "Aring", 197 ],
79 [ "AElig", 198 ],
80 [ "Ccedil", 199 ],
81 [ "Egrave", 200 ],
82 [ "Eacute", 201 ],
83 [ "Ecirc", 202 ],
84 [ "Euml", 203 ],
85 [ "Igrave", 204 ],
86 [ "Iacute", 205 ],
87 [ "Icirc", 206 ],
88 [ "Iuml", 207 ],
89 [ "ETH", 208 ],
90 [ "Ntilde", 209 ],
91 [ "Ograve", 210 ],
92 [ "Oacute", 211 ],
93 [ "Ocirc", 212 ],
94 [ "Otilde", 213 ],
95 [ "Ouml", 214 ],
96 [ "times", 215 ],
97 [ "Oslash", 216 ],
98 [ "Ugrave", 217 ],
99 [ "Uacute", 218 ],
100 [ "Ucirc", 219 ],
101 [ "Uuml", 220 ],
102 [ "Yacute", 221 ],
103 [ "THORN", 222 ],
104 [ "szlig", 223 ],
105 [ "agrave", 224 ],
106 [ "aacute", 225 ],
107 [ "acirc", 226 ],
108 [ "atilde", 227 ],
109 [ "auml", 228 ],
110 [ "aring", 229 ],
111 [ "aelig", 230 ],
112 [ "ccedil", 231 ],
113 [ "egrave", 232 ],
114 [ "eacute", 233 ],
115 [ "ecirc", 234 ],
116 [ "euml", 235 ],
117 [ "igrave", 236 ],
118 [ "iacute", 237 ],
119 [ "icirc", 238 ],
120 [ "iuml", 239 ],
121 [ "eth", 240 ],
122 [ "ntilde", 241 ],
123 [ "ograve", 242 ],
124 [ "oacute", 243 ],
125 [ "ocirc", 244 ],
126 [ "otilde", 245 ],
127 [ "ouml", 246 ],
128 [ "divide", 247 ],
129 [ "oslash", 248 ],
130 [ "ugrave", 249 ],
131 [ "uacute", 250 ],
132 [ "ucirc", 251 ],
133 [ "uuml", 252 ],
134 [ "yacute", 253 ],
135 [ "thorn", 254 ],
136 [ "yuml", 255 ],
137 [ "OElig", 338 ],
138 [ "oelig", 339 ],
139 [ "Scaron", 352 ],
140 [ "scaron", 353 ],
141 [ "Yuml", 376 ],
142 [ "fnof", 402 ],
143 [ "circ", 710 ],
144 [ "tilde", 732 ],
145 [ "Alpha", 913 ],
146 [ "Beta", 914 ],
147 [ "Gamma", 915 ],
148 [ "Delta", 916 ],
149 [ "Epsilon", 917 ],
150 [ "Zeta", 918 ],
151 [ "Eta", 919 ],
152 [ "Theta", 920 ],
153 [ "Iota", 921 ],
154 [ "Kappa", 922 ],
155 [ "Lambda", 923 ],
156 [ "Mu", 924 ],
157 [ "Nu", 925 ],
158 [ "Xi", 926 ],
159 [ "Omicron", 927 ],
160 [ "Pi", 928 ],
161 [ "Rho", 929 ],
162 [ "Sigma", 931 ],
163 [ "Tau", 932 ],
164 [ "Upsilon", 933 ],
165 [ "Phi", 934 ],
166 [ "Chi", 935 ],
167 [ "Psi", 936 ],
168 [ "Omega", 937 ],
169 [ "alpha", 945 ],
170 [ "beta", 946 ],
171 [ "gamma", 947 ],
172 [ "delta", 948 ],
173 [ "epsilon", 949 ],
174 [ "zeta", 950 ],
175 [ "eta", 951 ],
176 [ "theta", 952 ],
177 [ "iota", 953 ],
178 [ "kappa", 954 ],
179 [ "lambda", 955 ],
180 [ "mu", 956 ],
181 [ "nu", 957 ],
182 [ "xi", 958 ],
183 [ "omicron", 959 ],
184 [ "pi", 960 ],
185 [ "rho", 961 ],
186 [ "sigmaf", 962 ],
187 [ "sigma", 963 ],
188 [ "tau", 964 ],
189 [ "upsilon", 965 ],
190 [ "phi", 966 ],
191 [ "chi", 967 ],
192 [ "psi", 968 ],
193 [ "omega", 969 ],
194 [ "thetasym", 977 ],
195 [ "upsih", 978 ],
196 [ "piv", 982 ],
197 [ "ensp", 8194 ],
198 [ "emsp", 8195 ],
199 [ "thinsp", 8201 ],
200 [ "zwnj", 8204 ],
201 [ "zwj", 8205 ],
202 [ "lrm", 8206 ],
203 [ "rlm", 8207 ],
204 [ "ndash", 8211 ],
205 [ "mdash", 8212 ],
206 [ "lsquo", 8216 ],
207 [ "rsquo", 8217 ],
208 [ "sbquo", 8218 ],
209 [ "ldquo", 8220 ],
210 [ "rdquo", 8221 ],
211 [ "bdquo", 8222 ],
212 [ "dagger", 8224 ],
213 [ "Dagger", 8225 ],
214 [ "bull", 8226 ],
215 [ "hellip", 8230 ],
216 [ "permil", 8240 ],
217 [ "prime", 8242 ],
218 [ "Prime", 8243 ],
219 [ "lsaquo", 8249 ],
220 [ "rsaquo", 8250 ],
221 [ "oline", 8254 ],
222 [ "frasl", 8260 ],
223 [ "euro", 8364 ],
224 [ "image", 8465 ],
225 [ "weierp", 8472 ],
226 [ "real", 8476 ],
227 [ "trade", 8482 ],
228 [ "alefsym", 8501 ],
229 [ "larr", 8592 ],
230 [ "uarr", 8593 ],
231 [ "rarr", 8594 ],
232 [ "darr", 8595 ],
233 [ "harr", 8596 ],
234 [ "crarr", 8629 ],
235 [ "lArr", 8656 ],
236 [ "uArr", 8657 ],
237 [ "rArr", 8658 ],
238 [ "dArr", 8659 ],
239 [ "hArr", 8660 ],
240 [ "forall", 8704 ],
241 [ "part", 8706 ],
242 [ "exist", 8707 ],
243 [ "empty", 8709 ],
244 [ "nabla", 8711 ],
245 [ "isin", 8712 ],
246 [ "notin", 8713 ],
247 [ "ni", 8715 ],
248 [ "prod", 8719 ],
249 [ "sum", 8721 ],
250 [ "minus", 8722 ],
251 [ "lowast", 8727 ],
252 [ "radic", 8730 ],
253 [ "prop", 8733 ],
254 [ "infin", 8734 ],
255 [ "ang", 8736 ],
256 [ "and", 8743 ],
257 [ "or", 8744 ],
258 [ "cap", 8745 ],
259 [ "cup", 8746 ],
260 [ "int", 8747 ],
261 [ "there4", 8756 ],
262 [ "sim", 8764 ],
263 [ "cong", 8773 ],
264 [ "asymp", 8776 ],
265 [ "ne", 8800 ],
266 [ "equiv", 8801 ],
267 [ "le", 8804 ],
268 [ "ge", 8805 ],
269 [ "sub", 8834 ],
270 [ "sup", 8835 ],
271 [ "nsub", 8836 ],
272 [ "sube", 8838 ],
273 [ "supe", 8839 ],
274 [ "oplus", 8853 ],
275 [ "otimes", 8855 ],
276 [ "perp", 8869 ],
277 [ "sdot", 8901 ],
278 [ "lceil", 8968 ],
279 [ "rceil", 8969 ],
280 [ "lfloor", 8970 ],
281 [ "rfloor", 8971 ],
282 [ "lang", 9001 ],
283 [ "rang", 9002 ],
284 [ "loz", 9674 ],
285 [ "spades", 9824 ],
286 [ "clubs", 9827 ],
287 [ "hearts", 9829 ],
288 [ "diams", 9830 ]) {
289 my ($name, $codepoint) = @$ref;
290 $hdr->add($name, "ENT_$name");
291 $hdr->append(" $codepoint,");
293 $hdr->append('};');
295 $hdr->write();