1 // -*- coding: viscii mode: c++ -*-
5 #include "dictionary.h"
21 char *first_consonants
[] = { // longest first
51 char *last_consonants
[] = { // longest first
52 "nh","ng","ch",/*"gh","ph","th","tr","gi","kh",*/
53 "c","m","n","p","t",/*"b","k","q","d","ð","g","h","l","r","s","v","x",*/
54 // these are semivowels, not consonants.
59 char *padding_vowels
[] = {
64 char *diacritic_table
[6] = {
73 char *case_table
[2] = {
74 "áàäãÕ⤥¦ç§å¡¢ÆÇ£éè먩ꪫ¬®íìïî¸óòöõ÷ô¯°±²µ½¾¶·ÞþúùüûøßÑ×ØæñýÏÖÛÜð",
75 "ÁÀÄÀ„…†\x06‡Å�‚\x02\x05ƒÉÈˈ‰ÊŠ‹Œ�ŽÍ̛ΘÓÒ™ šÔ��‘’“´•–—³”ÚÙœ�ž¿º»¼ÿ¹ÝŸ\x14\x19\x1eÐ",
78 char full_case_table
[2][256];
82 int i
,len
= strlen(case_table
[0]);
83 for (i
= 0;i
< 256;i
++) {
84 full_case_table
[0][i
] = i
>= 'A' && i
<= 'Z' ? i
+32 : i
;
85 full_case_table
[1][i
] = i
>= 'a' && i
<= 'z' ? i
-32 : i
;
87 for (i
= 0;i
< len
; i
++) {
88 full_case_table
[0][(unsigned char)case_table
[1][i
]] = case_table
[0][i
];
89 full_case_table
[1][(unsigned char)case_table
[0][i
]] = case_table
[1][i
];
94 // we assumes str is a valid syllable ;)
95 bool Syllable::parse(const char *str
)
97 // Rule: there is always vowel. Others can be omitted.
98 // [first_consonant] [padding_vowel] vowel [last_consonant]
103 string
syllable(str
);
105 // fisrt of all, extract diacritic.
106 len
= syllable
.size();
107 for (k
= 0;k
< len
;k
++) {
108 // look up into diacritic_table
109 for (j
= 1;j
< 6;j
++) {
110 char *pos
= strchr(diacritic_table
[j
],syllable
[k
]);
112 int ipos
= pos
- diacritic_table
[j
];
113 syllable
[k
] = diacritic_table
[0][ipos
]; // remove diacritic
114 diacritic
= (diacritics
)j
;
120 // first, get the last_consonant
122 len
= syllable
.size();
123 for (i
= 0;last_consonants
[i
] != 0; i
++) {
124 char *pattern
= last_consonants
[i
];
125 int pattern_len
= strlen(pattern
);
127 if (len
> pattern_len
&&
128 syllable
.substr(len
-pattern_len
) == pattern
) {
130 syllable
.erase(len
-pattern_len
);
135 // then get the first_consonant
136 first_consonant
= -1;
137 len
= syllable
.size();
138 for (i
= 0;first_consonants
[i
] != 0; i
++) {
139 char *pattern
= first_consonants
[i
];
140 int pattern_len
= strlen(pattern
);
142 if (len
>= pattern_len
&& // equal is possible
143 syllable
.substr(0,pattern_len
) == pattern
) {
145 syllable
.erase(0,pattern_len
);
150 // check if syllable is empty.
151 // if it is, we can't use this last_consonant.
152 // find the next last_consonant
153 if (syllable
.empty()) {
154 if (last_consonant
!= -1)
155 return false; // bad syllable
157 syllable
= last_consonants
[last_consonant
];
158 len
= syllable
.size();
159 int start
= last_consonant
+1;
161 for (i
= start
;last_consonants
[i
] != 0; i
++) {
162 char *pattern
= last_consonants
[i
];
163 int pattern_len
= strlen(pattern
);
165 if (len
> pattern_len
&&
166 syllable
.substr(len
-pattern_len
) == pattern
) {
168 syllable
.erase(len
-pattern_len
);
176 len
= syllable
.size();
178 return false; // bad syllable
179 for (i
= 0;vowels
[i
] != 0; i
++) {
180 char *pattern
= vowels
[i
];
181 int pattern_len
= strlen(pattern
);
183 if (len
>= pattern_len
&& // equal is possible
184 syllable
.substr(len
-pattern_len
) == pattern
) {
186 syllable
.erase(len
-pattern_len
);
193 len
= syllable
.size();
196 for (i
= 0;padding_vowels
[i
] != 0; i
++) {
197 char *pattern
= padding_vowels
[i
];
198 int pattern_len
= strlen(pattern
);
200 if (syllable
== pattern
) {
207 return vowel
!= -1 && syllable
.empty();
211 void Syllable::standardize(std::string syllable)
213 // We just need to eliminate "òa", "òe", "ùa"
215 // first, cut the first consonant off
217 int len = syllable.size();
218 for (i = 0;first_consonants[i] != 0; i++) {
219 char *pattern = *first_consonants[i];
220 int pattern_len = strlen(pattern);
222 if (len > pattern_len &&
223 syllable.compare(0,pattern_len,pattern) == 0) {
232 int viet_toupper(int ch
) // must be sure ch is a character
234 return full_case_table
[1][(unsigned char)(char)ch
];
236 int viet_tolower(int ch
) // must be sure ch is a character
238 return full_case_table
[0][(unsigned char)(char)ch
];
240 bool viet_isupper(int ch
)
242 return full_case_table
[1][ch
] == ch
;
245 bool viet_islower(int ch
)
247 return full_case_table
[0][ch
] == ch
;