gconvert: add note to avoid transliteration
[glib.git] / tests / gen-casefold-txt.pl
blob2a6a0d4b12f95183ad25c192d068c012e8412625
1 #! /usr/bin/perl -w
3 # Copyright (C) 1998, 1999 Tom Tromey
4 # Copyright (C) 2001 Red Hat Software
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2, or (at your option)
9 # any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, see <http://www.gnu.org/licenses/>.
19 # gen-casefold-test.pl - Generate test cases for casefolding from Unicode data.
20 # See http://www.unicode.org/Public/UNIDATA/UnicodeCharacterDatabase.html
21 # Usage:
22 # I consider the output of this program to be unrestricted. Use it as
23 # you will.
25 require 5.006;
27 # Names of fields in the CaseFolding table
28 $FOLDING_CODE = 0;
29 $FOLDING_STATUS = 1;
30 $FOLDING_MAPPING = 2;
32 my $casefoldlen = 0;
33 my @casefold;
35 if (@ARGV != 2) {
36 $0 =~ s@.*/@@;
37 die "Usage: $0 UNICODE-VERSION CaseFolding.txt\n";
40 print <<EOT;
41 # Test cases generated from Unicode $ARGV[0] data
42 # by gen-casefold-test.pl. Do not edit.
44 # Some special hand crafted tests
46 AaBbCc@@\taabbcc@@
48 # Now the automatic tests
50 EOT
52 binmode STDOUT, ":utf8";
53 open (INPUT, "< $ARGV[1]") || exit 1;
55 while (<INPUT>)
57 chop;
59 next if /^#/;
60 next if /^\s*$/;
62 s/\s*#.*//;
64 my @fields = split ('\s*;\s*', $_, 30);
66 my $raw_code = $fields[$FOLDING_CODE];
67 my $code = hex ($raw_code);
69 if ($#fields != 3)
71 printf STDERR ("Entry for $raw_code has wrong number of fields (%d)\n", $#fields);
72 next;
75 # skip simple and Turkic mappings
76 next if ($fields[$FOLDING_STATUS] =~ /^[ST]$/);
78 @values = map { hex ($_) } split /\s+/, $fields[$FOLDING_MAPPING];
79 printf ("%s\t%s\n", pack ("U", $code), pack ("U*", @values));
82 close INPUT;