A lot of changes happened.
[vspell.git] / syllable.cpp
blob31c9dfe74196b559d6040e2a986d38c2d88062f9
1 // -*- coding: viscii mode: c++ -*-
2 #include <string>
3 #include <iostream>
4 #include <string.h>
5 #include "dictionary.h"
7 using namespace std;
9 namespace Dictionary {
11 char *vowels[] = {
12 "iê","yê","ia",
13 "ß½","ßa","uô","ua",
14 "a","å","â",
15 "e","ê",
16 "o","ô","½",
17 "u","ß","i","y",
18 NULL
21 char *first_consonants[] = { // longest first
22 "ngh", // 0
23 "nh", // 1
24 "ng", // 2
25 "ch", // 3
26 "gh", // 4
27 "ph", // 5
28 "th", // 6
29 "tr", // 7
30 "gi", // 8
31 "kh", // 9
32 "c", // 10
33 "m", // 11
34 "n"/*,"p"*/, // 12
35 "t", // 13
36 "b", // 14
37 "k", // 15
38 "q", // 16
39 "d", // 17
40 "ð", // 18
41 "g", // 19
42 "h", // 20
43 "l", // 21
44 "r", // 22
45 "s", // 23
46 "v", // 24
47 "x", // 25
48 NULL
51 char *last_consonants[] = { // longest first
52 "nh","ng","ch",/*"gh","ph","th","tr","gi","kh",*/
53 "c","m","n","p","t",/*"b","k","q","d","ð","g","h","l","r","s","v","x",*/
54 // these are semivowels, not consonants.
55 "i","y","o","u",
56 NULL
59 char *padding_vowels[] = {
60 "o","u",
61 NULL
64 char *diacritic_table[6] = {
65 "aâåeêioô½ußy",
66 "ᤡéªíó¯¾úÑý",
67 "ॢè«ìò°¶ù×Ï",
68 "ä¦Æë¬ïö±·üØÖ",
69 "ãçǨ­îõ²ÞûæÛ",
70 "Õ§£©®¸÷µþøñÜ",
73 char *case_table[2] = {
74 "áàäãÕ⤥¦ç§å¡¢ÆÇ£éè먩ꪫ¬­®íìïî¸óòöõ÷ô¯°±²µ½¾¶·ÞþúùüûøßÑ×ØæñýÏÖÛÜð",
75 "ÁÀÄÀ„…†\x06‡Å�‚\x02\x05ƒÉÈˈ‰ÊŠ‹Œ�ŽÍ̛ΘÓÒ™ šÔ��‘’“´•–—³”ÚÙœ�ž¿º»¼ÿ¹ÝŸ\x14\x19\x1eÐ",
78 char full_case_table[2][256];
80 bool syllable_init()
82 int i,len = strlen(case_table[0]);
83 for (i = 0;i < 256;i ++) {
84 full_case_table[0][i] = i >= 'A' && i <= 'Z' ? i+32 : i;
85 full_case_table[1][i] = i >= 'a' && i <= 'z' ? i-32 : i;
87 for (i = 0;i < len; i ++) {
88 full_case_table[0][(unsigned char)case_table[1][i]] = case_table[0][i];
89 full_case_table[1][(unsigned char)case_table[0][i]] = case_table[1][i];
91 return true;
94 // we assumes str is a valid syllable ;)
95 bool Syllable::parse(const char *str)
97 // Rule: there is always vowel. Others can be omitted.
98 // [first_consonant] [padding_vowel] vowel [last_consonant]
100 int i,j,k;
101 char **pattern;
102 int len;
103 string syllable(str);
105 // fisrt of all, extract diacritic.
106 len = syllable.size();
107 for (k = 0;k < len;k ++) {
108 // look up into diacritic_table
109 for (j = 1;j < 6;j ++) {
110 char *pos = strchr(diacritic_table[j],syllable[k]);
111 if (pos != NULL) {
112 int ipos = pos - diacritic_table[j];
113 syllable[k] = diacritic_table[0][ipos]; // remove diacritic
114 diacritic = (diacritics)j;
115 break;
120 // first, get the last_consonant
121 last_consonant = -1;
122 len = syllable.size();
123 for (i = 0;last_consonants[i] != 0; i++) {
124 char *pattern = last_consonants[i];
125 int pattern_len = strlen(pattern);
127 if (len > pattern_len &&
128 syllable.substr(len-pattern_len) == pattern) {
129 last_consonant = i;
130 syllable.erase(len-pattern_len);
131 break;
135 // then get the first_consonant
136 first_consonant = -1;
137 len = syllable.size();
138 for (i = 0;first_consonants[i] != 0; i++) {
139 char *pattern = first_consonants[i];
140 int pattern_len = strlen(pattern);
142 if (len >= pattern_len && // equal is possible
143 syllable.substr(0,pattern_len) == pattern) {
144 first_consonant = i;
145 syllable.erase(0,pattern_len);
146 break;
150 // check if syllable is empty.
151 // if it is, we can't use this last_consonant.
152 // find the next last_consonant
153 if (syllable.empty()) {
154 if (last_consonant != -1)
155 return false; // bad syllable
157 syllable = last_consonants[last_consonant];
158 len = syllable.size();
159 int start = last_consonant+1;
160 last_consonant = -1;
161 for (i = start;last_consonants[i] != 0; i++) {
162 char *pattern = last_consonants[i];
163 int pattern_len = strlen(pattern);
165 if (len > pattern_len &&
166 syllable.substr(len-pattern_len) == pattern) {
167 last_consonant = i;
168 syllable.erase(len-pattern_len);
169 break;
174 // get vowel
175 vowel = -1;
176 len = syllable.size();
177 if (len == 0)
178 return false; // bad syllable
179 for (i = 0;vowels[i] != 0; i++) {
180 char *pattern = vowels[i];
181 int pattern_len = strlen(pattern);
183 if (len >= pattern_len && // equal is possible
184 syllable.substr(len-pattern_len) == pattern) {
185 vowel = i;
186 syllable.erase(len-pattern_len);
187 break;
191 // the last is vowel
192 padding_vowel = -1;
193 len = syllable.size();
194 if (len == 0)
195 return false;
196 for (i = 0;padding_vowels[i] != 0; i++) {
197 char *pattern = padding_vowels[i];
198 int pattern_len = strlen(pattern);
200 if (syllable == pattern) {
201 padding_vowel = i;
202 syllable = "";
203 break;
207 return vowel != -1 && syllable.empty();
211 void Syllable::standardize(std::string syllable)
213 // We just need to eliminate "òa", "òe", "ùa"
215 // first, cut the first consonant off
216 int start = 0;
217 int len = syllable.size();
218 for (i = 0;first_consonants[i] != 0; i++) {
219 char *pattern = *first_consonants[i];
220 int pattern_len = strlen(pattern);
222 if (len > pattern_len &&
223 syllable.compare(0,pattern_len,pattern) == 0) {
224 start = i;
225 break;
232 int viet_toupper(int ch) // must be sure ch is a character
234 return full_case_table[1][(unsigned char)(char)ch];
236 int viet_tolower(int ch) // must be sure ch is a character
238 return full_case_table[0][(unsigned char)(char)ch];
240 bool viet_isupper(int ch)
242 return full_case_table[1][ch] == ch;
245 bool viet_islower(int ch)
247 return full_case_table[0][ch] == ch;