Fix typo in 9b54bd30006c008b4a951331b273613d5bac3abf
[pm.git] / intl / unicharutil / tests / genNormalizationData.pl
blob816ab94e7add1ab2dd33ad47cc98b47026f987f4
1 #!/usr/bin/perl
3 # This Source Code Form is subject to the terms of the Mozilla Public
4 # License, v. 2.0. If a copy of the MPL was not distributed with this
5 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 open ( TEXTFILE , "< NormalizationTest.txt")
8 || die "Cannot find NormalizationTest.txt. The latest version should be available from\n http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt\n";
10 open ( OUT , "> NormalizationData.h")
11 #open ( OUT , "> test.txt")
12 || die "Cannot create output file NormalizationData.h\n";
14 $mpl = <<END_OF_MPL;
15 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
16 /* This Source Code Form is subject to the terms of the Mozilla Public
17 * License, v. 2.0. If a copy of the MPL was not distributed with this
18 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
19 /*
20 DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY
21 mozilla/intl/unicharutil/tools/genNormalizationData.pl
23 END_OF_MPL
25 print OUT $mpl;
27 # XXX This code assumes that wchar_t is 16-bit unsigned, which is currently
28 # true on Windows, Linux and Mac (with |g++ -fshort-wchar|).
29 # To make it work where that assumption doesn't hold, one could generate
30 # one huge array containing all the strings as 16-bit units (including
31 # the 0 terminator) and initialize the array of testcaseLine with pointers
32 # into the huge array.
34 while(<TEXTFILE>) {
35 chop;
36 if (/^# NormalizationTest-(.+)\.txt/) {
37 print OUT "static char versionText[] = \"$1\";\n";
38 } elsif (/^\@Part(.)/) {
39 if ($1 != "0") {
40 print OUT " {\n";
41 print OUT " L\"\",\n";
42 print OUT " L\"\",\n";
43 print OUT " L\"\",\n";
44 print OUT " L\"\",\n";
45 print OUT " L\"\",\n";
46 print OUT " \"\",\n";
47 print OUT " },\n";
48 print OUT "};\n";
50 print OUT "\n";
51 print OUT "static testcaseLine Part$1TestData[] = \n";
52 print OUT "{\n";
53 } else {
54 unless (/^\#/) {
55 @cases = split(/;/ , $_);
56 print OUT " {\n";
57 for ($case = 0; $case < 5; ++$case) {
58 $c = $cases[$case];
59 print OUT " L\"";
60 @codepoints = split(/ / , $c);
61 foreach (@codepoints) {
62 $cp = hex($_);
63 if ($cp < 0x10000) {
64 # BMP codepoint
65 printf OUT "\\x%04X", $cp;
66 } else {
67 # non-BMP codepoint, convert to surrogate pair
68 printf OUT "\\x%04X\\x%04X",
69 ($cp >> 10) + 0xD7C0,
70 ($cp & 0x03FF) | 0xDC00;
73 print OUT "\",\n";
75 $description = $cases[10];
76 $description =~ s/^ \) //;
77 print OUT " \"$description\"\n";
78 print OUT " },\n";
83 print OUT " {\n";
84 print OUT " L\"\",\n";
85 print OUT " L\"\",\n";
86 print OUT " L\"\",\n";
87 print OUT " L\"\",\n";
88 print OUT " L\"\",\n";
89 print OUT " \"\",\n";
90 print OUT " },\n";
91 print OUT "};\n";
92 close (OUT);
93 close (TEXTFILE);