Avoid signed/unsigned char pitfalls when calling viet_is* functions
[vspell.git] / scripts / h2t.pl
blobd82544a442e5a9dfc30a0df8eeca08fc6bedfe11
1 #!/usr/bin/perl
3 %entity = (
4 'à' => 'à',
5 'ả' => 'ä',
6 'ã' => 'ã',
7 'á' => 'á',
8 'ạ' => 'Õ',
9 'ă' => 'å',
10 'ằ' => '¢',
11 'ẳ' => 'Æ',
12 'ẵ' => 'Ç',
13 'ắ' => '¡',
14 'ặ' => '£',
15 'â' => 'â',
16 'ầ' => '¥',
17 'ẩ' => '¦',
18 'ẫ' => 'ç',
19 'ấ' => '¤',
20 'ậ' => '§',
21 'è' => 'è',
22 'ẻ' => 'ë',
23 'ẽ' => '¨',
24 'é' => 'é',
25 'ẹ' => '©',
26 'ê' => 'ê',
27 'ề' => '«',
28 'ể' => '¬',
29 'ễ' => '­',
30 'ế' => 'ª',
31 'ệ' => '®',
32 'ì' => 'ì',
33 'ỉ' => 'ï',
34 'ĩ' => 'î',
35 'í' => 'í',
36 'ị' => '¸',
37 'ò' => 'ò',
38 'ỏ' => 'ö',
39 'õ' => 'õ',
40 'ó' => 'ó',
41 'ọ' => '÷',
42 'ô' => 'ô',
43 'ồ' => '°',
44 'ổ' => '±',
45 'ỗ' => '²',
46 'ố' => '¯',
47 'ộ' => 'µ',
48 'ơ' => '½',
49 'ờ' => '¶',
50 'ở' => '·',
51 'ỡ' => 'Þ',
52 'ớ' => '¾',
53 'ợ' => 'þ',
54 'ù' => 'ù',
55 'ủ' => 'ü',
56 'ũ' => 'û',
57 'ú' => 'ú',
58 'ụ' => 'ø',
59 'ư' => 'ß',
60 'ừ' => '×',
61 'ử' => 'Ø',
62 'ữ' => 'æ',
63 'ứ' => 'Ñ',
64 'ự' => 'ñ',
65 'ỳ' => 'Ï',
66 'ỷ' => 'Ö',
67 'ỹ' => 'Û',
68 'ý' => 'ý',
69 'ỵ' => 'Ü',
70 'đ' => 'ð',
71 'À' => 'À',
72 'Ả' => 'Ä',
73 'Ã' => 'Ã',
74 'Á' => 'Á',
75 'Ạ' => '€',
76 'Ă' => 'Å',
77 'Ằ' => '‚',
78 'Ẳ' => '\x02',
79 'Ẵ' => '\x05',
80 'Ắ' => '�',
81 'Ặ' => 'ƒ',
82 'Â' => 'Â',
83 'Ầ' => '…',
84 'Ẩ' => '†',
85 'Ẫ' => '\x06',
86 'Ấ' => '$',
87 'Ậ' => '‡',
88 'È' => 'È',
89 'Ẻ' => 'Ë',
90 'Ẽ' => 'ˆ',
91 'É' => 'É',
92 'Ẹ' => '‰',
93 'Ê' => 'Ê',
94 'Ề' => '‹',
95 'Ể' => 'Œ',
96 'Ễ' => '�',
97 'Ế' => 'Š',
98 'Ệ' => 'Ž',
99 'Í' => 'Í',
100 'Ỉ' => '›',
101 'Ĩ' => 'Î',
102 'Ì' => 'Ì',
103 'Ị' => '˜',
104 'Ò' => 'Ò',
105 'Ỏ' => '™',
106 'Õ' => ' ',
107 'Ó' => 'Ó',
108 'Ọ' => 'š',
109 'Ô' => 'Ô',
110 'Ồ' => '�',
111 'Ổ' => '‘',
112 'Ỗ' => '’',
113 'Ố' => '�',
114 'Ộ' => '“',
115 'Ơ' => '´',
116 'Ờ' => '–',
117 'Ở' => '—',
118 'Ỡ' => '³',
119 'Ớ' => '•',
120 'Ợ' => '”',
121 'Ù' => 'Ù',
122 'Ủ' => 'œ',
123 'Ũ' => '�',
124 'Ú' => 'Ú',
125 'Ụ' => 'ž',
126 'Ư' => '¿',
127 'Ừ' => '»',
128 'Ử' => '¼',
129 'Ữ' => 'ÿ',
130 'Ứ' => 'º',
131 'Ự' => 'q',
132 'Ỳ' => 'Ÿ',
133 'Ỷ' => '\x14',
134 'Ỹ' => '\x19',
135 'Ý' => 'Ý',
136 'Ỵ' => '\x1e',
137 'Đ' => 'Ð',
140 # Get all content
141 while (<>) { chomp; $text .= "$_ "; }
143 $text =~ s/^.*<body[^>]*>//;
144 $text =~ s,</body.*$,,;
145 $text =~ s,<script[^>]*>[^<]*</script>,,g;
146 $text =~ s,</P>,\n,gi;
147 $text =~ s,<P [^>]*>,\n,gi;
148 $text =~ s,</TD>,\n,gi;
149 $text =~ s,<TD [^>]*>,\n,gi;
150 $text =~ s,<([^>]*)>,,g;
151 foreach $i (keys %entity) {
152 $val = $entity{$i};
153 $text =~ s/$i/$val/g;
155 $text =~ s,&nbsp;, ,g;
156 $text =~ s/&[^;]*;/ ENT /g;
157 print $text;
159 # Local Variables:
160 # coding: viscii
161 # End: