Reimplemented LogPtoProb (which is just an exp10 call)
[vspell.git] / scripts / standardize-personal-name.c
blob5bd678603d0157c075470105f384b02a1b90f070
1 #!/usr/bin/perl
3 @vowels = ("iê","yê","ia",
4 "ß½","ßa","uô","ua",
5 "a","å","â",
6 "e","ê",
7 "o","ô","½",
8 "u","ß","i","y");
10 @first_consonants = ("ngh", "nh", "ng", "ch", "gh",
11 "ph", "th", "tr", "gi", "kh",
12 "c", "m", "n", "t", "b", "k",
13 "q", "d", "ð", "g", "h", "l",
14 "r", "s", "v", "x");
16 @last_consonants = ("nh","ng","ch",
17 "c","m","n","p","t",
18 "i","y","o","u");
20 @padding_vowels = ( "o","u" );
22 @diacritic_table = ("aâåeêioô½ußy",
23 "ᤡéªíó¯¾úÑý",
24 "ॢè«ìò°¶ù×Ï",
25 "ä¦Æë¬ïö±·üØÖ",
26 "ãçǨ­îõ²ÞûæÛ",
27 "Õ§£©®¸÷µþøñÜ");
29 @case_table =
30 ("áàäãÕ⤥¦ç§å¡¢ÆÇ£éè먩ꪫ¬­®íìïî¸óòöõ÷ô¯°±²µ½¾¶·ÞþúùüûøßÑ×ØæñýÏÖÛÜð",
31 "ÁÀÄÀ„…†\x06‡Å�‚\x02\x05ƒÉÈˈ‰ÊŠ‹Œ�ŽÍ̛ΘÓÒ™ šÔ��‘’“´•–—³”ÚÙœ�ž¿º»¼ÿ¹ÝŸ\x14\x19\x1eÐ");
33 // we assumes str is a valid syllable ;)
34 bool Syllable::parse(const char *str)
36 // Rule: there is always vowel. Others can be omitted.
37 // [first_consonant] [padding_vowel] vowel [last_consonant]
39 int i,j,k;
40 char **pattern;
41 int len;
42 string syllable(str);
44 // fisrt of all, extract diacritic.
45 components[Diacritic] = None;
46 len = syllable.size();
47 for (k = 0;k < len;k ++) {
48 // look up into diacritic_table
49 for (j = 1;j < 6;j ++) {
50 char *pos = strchr(diacritic_table[j],syllable[k]);
51 if (pos != NULL) {
52 int ipos = pos - diacritic_table[j];
53 syllable[k] = diacritic_table[0][ipos]; // remove diacritic
54 components[Diacritic] = j;
55 break;
60 // first, get the last_consonant
61 components[Last_Consonant] = -1;
62 len = syllable.size();
63 for (i = 0;last_consonants[i] != 0; i++) {
64 char *pattern = last_consonants[i];
65 int pattern_len = strlen(pattern);
67 if (len > pattern_len &&
68 syllable.substr(len-pattern_len) == pattern) {
69 components[Last_Consonant] = i;
70 syllable.erase(len-pattern_len);
71 break;
75 // then get the first_consonant
76 components[First_Consonant] = -1;
77 len = syllable.size();
78 for (i = 0;first_consonants[i] != 0; i++) {
79 char *pattern = first_consonants[i];
80 int pattern_len = strlen(pattern);
82 if (len >= pattern_len && // equal is possible
83 syllable.substr(0,pattern_len) == pattern) {
84 components[First_Consonant] = i;
85 syllable.erase(0,pattern_len);
86 break;
90 // check if syllable is empty.
91 // if it is, we can't use this last_consonant.
92 // find the next last_consonant
93 if (syllable.empty()) {
94 if (components[Last_Consonant] == -1)
95 return false; // bad syllable
97 syllable = last_consonants[components[Last_Consonant]];
98 len = syllable.size();
99 int start = components[Last_Consonant]+1;
100 components[Last_Consonant] = -1;
101 for (i = start;last_consonants[i] != 0; i++) {
102 char *pattern = last_consonants[i];
103 int pattern_len = strlen(pattern);
105 if (len > pattern_len &&
106 syllable.substr(len-pattern_len) == pattern) {
107 components[Last_Consonant] = i;
108 syllable.erase(len-pattern_len);
109 break;
114 // get vowel
115 components[Vowel] = -1;
116 len = syllable.size();
117 if (len == 0)
118 return false; // bad syllable
119 for (i = 0;vowels[i] != 0; i++) {
120 char *pattern = vowels[i];
121 int pattern_len = strlen(pattern);
123 if (len >= pattern_len && // equal is possible
124 syllable.substr(len-pattern_len) == pattern) {
125 components[Vowel] = i;
126 syllable.erase(len-pattern_len);
127 break;
131 // the last is vowel
132 components[Padding_Vowel] = -1;
133 len = syllable.size();
134 if (len == 0)
135 return true;
136 for (i = 0;padding_vowels[i] != 0; i++) {
137 char *pattern = padding_vowels[i];
138 int pattern_len = strlen(pattern);
140 if (syllable == pattern) {
141 components[Padding_Vowel] = i;
142 syllable = "";
143 break;
147 return components[Vowel] != -1 && syllable.empty();
150 void Syllable::print()
152 char **p;
153 for (int i = 0;i < 4;i ++) {
154 if (components[i] < 0)
155 cerr << "_";
156 else {
157 switch (i) {
158 case First_Consonant: p = first_consonants; break;
159 case Last_Consonant: p = last_consonants; break;
160 case Padding_Vowel: p = padding_vowels; break;
161 case Vowel: p = vowels; break;
163 cerr << p[components[i]];
165 cerr << " ";
169 strid Syllable::to_id()
171 string s;
172 char **p;
173 for (int i = 0;i < 4;i ++) {
174 if (components[i] >= 0) {
175 switch (i) {
176 case First_Consonant: p = first_consonants; break;
177 case Last_Consonant: p = last_consonants; break;
178 case Padding_Vowel: p = padding_vowels; break;
179 case Vowel: p = vowels; break;
181 s += p[components[i]];
182 if (i == Vowel && components[Diacritic] != None) {
183 int last = s.size()-1;
184 int j = strchr(diacritic_table[0],s[last]) - diacritic_table[0];
185 s[last] = diacritic_table[components[Diacritic]][j];
189 return sarch[s];
193 void Syllable::standardize(std::string syllable)
195 // We just need to eliminate "òa", "òe", "ùa"
197 // first, cut the first consonant off
198 int start = 0;
199 int len = syllable.size();
200 for (i = 0;first_consonants[i] != 0; i++) {
201 char *pattern = *first_consonants[i];
202 int pattern_len = strlen(pattern);
204 if (len > pattern_len &&
205 syllable.compare(0,pattern_len,pattern) == 0) {
206 start = i;
207 break;
214 int viet_toupper(int ch) // must be sure ch is a character
216 return full_case_table[1][(unsigned char)(char)ch];
218 int viet_tolower(int ch) // must be sure ch is a character
220 return full_case_table[0][(unsigned char)(char)ch];
222 bool viet_isupper(int ch)
224 return full_case_table[1][ch] == ch;
227 bool viet_islower(int ch)
229 return full_case_table[0][ch] == ch;
233 # Local Variables:
234 # coding: viscii
235 # End: