3 @vowels = ("iê","yê","ia",
10 @first_consonants = ("ngh", "nh", "ng", "ch", "gh",
11 "ph", "th", "tr", "gi", "kh",
12 "c", "m", "n", "t", "b", "k",
13 "q", "d", "ð", "g", "h", "l",
16 @last_consonants = ("nh","ng","ch",
20 @padding_vowels = ( "o","u" );
22 @diacritic_table = ("aâåeêioô½ußy",
30 ("áàäãÕ⤥¦ç§å¡¢ÆÇ£éè먩ꪫ¬®íìïî¸óòöõ÷ô¯°±²µ½¾¶·ÞþúùüûøßÑ×ØæñýÏÖÛÜð",
31 "ÁÀÄÀ„…†\x06‡Å�‚\x02\x05ƒÉÈˈ‰ÊŠ‹Œ�ŽÍ̛ΘÓÒ™ šÔ��‘’“´•–—³”ÚÙœ�ž¿º»¼ÿ¹ÝŸ\x14\x19\x1eÐ");
33 // we assumes str is a valid syllable
;)
34 bool Syllable
::parse
(const char
*str
)
36 // Rule
: there is always vowel
. Others can be omitted
.
37 // [first_consonant
] [padding_vowel
] vowel
[last_consonant
]
44 // fisrt of all
, extract diacritic
.
45 components
[Diacritic
] = None
;
46 len
= syllable
.size
();
47 for (k
= 0;k
< len
;k
++) {
48 // look up into diacritic_table
49 for (j
= 1;j
< 6;j
++) {
50 char
*pos = strchr
(diacritic_table
[j
],syllable
[k
]);
52 int ipos
= pos - diacritic_table
[j
];
53 syllable
[k
] = diacritic_table
[0][ipos
]; // remove diacritic
54 components
[Diacritic
] = j
;
60 // first
, get the last_consonant
61 components
[Last_Consonant
] = -1;
62 len
= syllable
.size
();
63 for (i
= 0;last_consonants
[i
] != 0; i
++) {
64 char
*pattern
= last_consonants
[i
];
65 int pattern_len
= strlen
(pattern
);
67 if (len
> pattern_len
&&
68 syllable
.substr(len
-pattern_len
) == pattern
) {
69 components
[Last_Consonant
] = i
;
70 syllable
.erase
(len
-pattern_len
);
75 // then get the first_consonant
76 components
[First_Consonant
] = -1;
77 len
= syllable
.size
();
78 for (i
= 0;first_consonants
[i
] != 0; i
++) {
79 char
*pattern
= first_consonants
[i
];
80 int pattern_len
= strlen
(pattern
);
82 if (len
>= pattern_len
&& // equal is possible
83 syllable
.substr(0,pattern_len
) == pattern
) {
84 components
[First_Consonant
] = i
;
85 syllable
.erase
(0,pattern_len
);
90 // check
if syllable is empty
.
91 // if it is
, we can
't use this last_consonant.
92 // find the next last_consonant
93 if (syllable.empty()) {
94 if (components[Last_Consonant] == -1)
95 return false; // bad syllable
97 syllable = last_consonants[components[Last_Consonant]];
98 len = syllable.size();
99 int start = components[Last_Consonant]+1;
100 components[Last_Consonant] = -1;
101 for (i = start;last_consonants[i] != 0; i++) {
102 char *pattern = last_consonants[i];
103 int pattern_len = strlen(pattern);
105 if (len > pattern_len &&
106 syllable.substr(len-pattern_len) == pattern) {
107 components[Last_Consonant] = i;
108 syllable.erase(len-pattern_len);
115 components[Vowel] = -1;
116 len = syllable.size();
118 return false; // bad syllable
119 for (i = 0;vowels[i] != 0; i++) {
120 char *pattern = vowels[i];
121 int pattern_len = strlen(pattern);
123 if (len >= pattern_len && // equal is possible
124 syllable.substr(len-pattern_len) == pattern) {
125 components[Vowel] = i;
126 syllable.erase(len-pattern_len);
132 components[Padding_Vowel] = -1;
133 len = syllable.size();
136 for (i = 0;padding_vowels[i] != 0; i++) {
137 char *pattern = padding_vowels[i];
138 int pattern_len = strlen(pattern);
140 if (syllable == pattern) {
141 components[Padding_Vowel] = i;
147 return components[Vowel] != -1 && syllable.empty();
150 void Syllable::print()
153 for (int i = 0;i < 4;i ++) {
154 if (components[i] < 0)
158 case First_Consonant: p = first_consonants; break;
159 case Last_Consonant: p = last_consonants; break;
160 case Padding_Vowel: p = padding_vowels; break;
161 case Vowel: p = vowels; break;
163 cerr << p[components[i]];
169 strid Syllable::to_id()
173 for (int i = 0;i < 4;i ++) {
174 if (components[i] >= 0) {
176 case First_Consonant: p = first_consonants; break;
177 case Last_Consonant: p = last_consonants; break;
178 case Padding_Vowel: p = padding_vowels; break;
179 case Vowel: p = vowels; break;
181 s += p[components[i]];
182 if (i == Vowel && components[Diacritic] != None) {
183 int last = s.size()-1;
184 int j = strchr(diacritic_table[0],s[last]) - diacritic_table[0];
185 s[last] = diacritic_table[components[Diacritic]][j];
193 void Syllable::standardize(std::string syllable)
195 // We just need to eliminate "òa", "òe", "ùa"
197 // first, cut the first consonant off
199 int len = syllable.size();
200 for (i = 0;first_consonants[i] != 0; i++) {
201 char *pattern = *first_consonants[i];
202 int pattern_len = strlen(pattern);
204 if (len > pattern_len &&
205 syllable.compare(0,pattern_len,pattern) == 0) {
214 int viet_toupper(int ch) // must be sure ch is a character
216 return full_case_table[1][(unsigned char)(char)ch];
218 int viet_tolower(int ch) // must be sure ch is a character
220 return full_case_table[0][(unsigned char)(char)ch];
222 bool viet_isupper(int ch)
224 return full_case_table[1][ch] == ch;
227 bool viet_islower(int ch)
229 return full_case_table[0][ch] == ch;