fixed bugs in Text::penalty2_construct(), Penalty2DAG::set_syllable_weights()
[vspell.git] / libvspell / keyboard.cpp
blobf54d8cf24db1ce5b9a8535cb5c0e0622fdd8999e
1 // -*- coding: viscii tab-width: 2 mode: c++ -*-
3 #include "keyboard.h"
4 #include "syllable.h"
6 static char *keys = "`~1!2@3#4$5%6^7&8*9(0)-_=+\\|" "qwertyuiop[{]}" "asdfghjkl;:'\"" "zxcvbnm,<.>/?";
7 static char *keymap[] = {
8 "~1!", // `
9 "`1!", // ~
10 "!`~q2@", // 1
11 "1`~q2@", // !
12 "@1!qw3#", // 2
13 "21!qw3#", // @
14 "#2@we4$", // 3
15 "32@we4$", // #
16 "$3#er5%", // 4
17 "43#er5%", // $
18 "%4$rt6^", // 5
19 "54$rt6^", // %
20 "^5%ty7&", // 6
21 "65%ty7&", // ^
22 "&6^yu8*", // 7
23 "76^yu8*", // &
24 "*7&ui9(", // 8
25 "87&ui9(", // *
26 "(8*io0)", // 9
27 "98*io0)", // (
28 ")9(op-_", // 0
29 "09(op-_", // )
30 "_0)p[=+", // -
31 "-0)p[=+", // _
32 "+-_[{]}\\|", // =
33 "=-_[{]}\\|", // +
34 "|=+]}", // \
35 "\\=+]}", // |
37 "aw2@1!", // q
38 "qase3#2@", // w
39 "wsdr4$3#", // e
40 "edft5%4$", // r
41 "rfgy6^5%", // t
42 "tghu7&6^", // y
43 "yhji8*7&", // u
44 "ujko9(8*", // i
45 "iklp0)9(", // o
46 "ol;:[{-_0)", // p
47 "{p;:'\"]}=+-_", // [
48 "[p;:'\"]}=+-_", // {
49 "}'\"[{=+\\|", // ]
50 "]'\"[{=+\\|", // }
52 "qwsz", // a
53 "awedxz", // s
54 "erfcxs", // d
55 "rtgvcd", // f
56 "tyhbvf", // g
57 "yujnbg", // h
58 "uikmnh", // j
59 "iol,<mj", // k
60 "op;:.>,<k", // l
61 ":p[{'\"/?.>l", // ;
62 ";p[{'\"/?.>l", // :
63 "\"[{]}/?;:", // '
64 "'[{]}/?;:", // "
66 "asx", // z
67 "zsdc", // x
68 "xdfv", // c
69 "cfgb", // v
70 "vghn", // b
71 "bhjm", // n
72 "njk,<", // m
73 "<mkl.>", // ,
74 ",mkl.>", // <
75 ">,<l;:/?", // .
76 ".,<l;:/?", // >
77 "?.>;:'\"", // /
78 "/.>;:'\"", // ?
81 using namespace std;
83 void KeyRecover::init(const char *input_,int N)
85 uint n = strlen(input_);
86 uint i;
88 input = input_;
89 vvv.resize(n);
90 vmap.resize(n);
91 for (i = 0;i < n;i ++) {
92 char c = tolower(input[i]);
93 char *s = strchr(keys,c);
94 if (s) {
95 vmap[i] = keymap[s-keys];
96 vvv[i] = strlen(vmap[i]);
97 } else {
98 vvv[i] = 1;
99 vmap[i] = &input[i];
103 inner_loop = false;
105 posgen.init(n,N);
108 bool KeyRecover::inner_step(string &output)
110 vector<uint> vv;
112 while (cgen.step(vv)) {
113 output = input;
114 for (uint i = 0;i < len;i ++) {
115 output[v[i]] = vmap[v[i]][vv[i]];
117 return true;
119 cgen.done();
120 inner_loop = false;
121 return false;
124 bool KeyRecover::step(string &output)
126 if (inner_loop) {
127 if (inner_step(output))
128 return true;
129 // inner_step returns false, it means that we can continue the main loop
132 while (posgen.step(v,len)) {
133 vector<uint> vv;
134 vv.resize(len);
135 for (uint i = 0;i < len;i ++)
136 vv[i] = vvv[v[i]];
137 cgen.init(vv);
138 inner_loop = true;
139 if (inner_step(output))
140 return true;
142 return false;
145 void KeyRecover::done()
147 posgen.done();
150 void keyboard_recover(const char *input,set<string> &output)
152 KeyRecover keyr;
153 string s;
154 keyr.init(input);
155 while (keyr.step(s)) {
156 output.insert(s);
157 cerr << s << endl;
159 keyr.done();
161 uint n = strlen(input);
162 uint i,N = 2;
164 vector<uint> vvv;
165 vector<const char*> vmap;
166 vvv.resize(n);
167 vmap.resize(n);
168 for (i = 0;i < n;i ++) {
169 char c = tolower(input[i]);
170 char *s = strchr(keys,c);
171 if (s) {
172 vmap[i] = keymap[s-keys];
173 vvv[i] = strlen(vmap[i]);
174 } else {
175 vvv[i] = 1;
176 vmap[i] = &input[i];
180 PosGen posgen;
181 CGen cgen;
182 vector<uint> v;
183 uint len;
184 posgen.init(n,N);
185 while (posgen.step(v,len)) {
186 if (!len) continue;
187 vector<uint> vv;
188 vv.resize(len);
189 for (i = 0;i < len;i ++)
190 vv[i] = vvv[v[i]];
191 cgen.init(vv);
192 while (cgen.step(vv)) {
193 string s = input;
194 for (i = 0;i < len;i ++) {
195 s[v[i]] = vmap[v[i]][vv[i]];
197 cerr << s << endl;
199 cgen.done();
201 posgen.done();
205 string vni_recover(const char *input)
207 string s;
208 uint p,i,n = strlen(input);
209 char diacritic = '0';
210 bool ok;
212 s = "0"; // diacritic placeholder
214 for (i = 0;i < n;i ++) {
215 switch (input[i]) {
216 case '1': diacritic = '1'; break;
217 case '2': diacritic = '2'; break;
218 case '3': diacritic = '3'; break;
219 case '4': diacritic = '4'; break;
220 case '5': diacritic = '5'; break;
222 case '6':
223 ok = false;
225 p = s.rfind('a');
226 if (p != string::npos) {
227 s[p] = 'â';
228 ok = true;
229 } else {
230 p = s.rfind('A');
231 if (p != string::npos) {
232 s[p] = 'Â';
233 ok = true;
237 p = s.rfind('e');
238 if (p != string::npos) {
239 s[p] = 'ê';
240 ok = true;
241 } else {
242 p = s.rfind('E');
243 if (p != string::npos) {
244 s[p] = 'Ê';
245 ok = true;
249 p = s.rfind('o');
250 if (p != string::npos) {
251 s[p] = 'ô';
252 ok = true;
253 } else {
254 p = s.rfind('O');
255 if (p != string::npos) {
256 s[p] = 'Ô';
257 ok = true;
261 if (!ok)
262 s += input[i];
263 break;
265 case '7':
266 ok = false;
268 p = s.rfind('u');
269 if (p != string::npos) {
270 s[p] = 'ß';
271 ok = true;
272 } else {
273 p = s.rfind('U');
274 if (p != string::npos) {
275 s[p] = '¿';
276 ok = true;
280 p = s.rfind('o');
281 if (p != string::npos) {
282 s[p] = '½';
283 ok = true;
284 } else {
285 p = s.rfind('O');
286 if (p != string::npos) {
287 s[p] = '´';
288 ok = true;
292 if (!ok)
293 s += input[i];
294 break;
296 case '8':
297 p = s.rfind('a');
298 if (p != string::npos)
299 s[p] = 'å';
300 else {
301 p = s.rfind('A');
302 if (p != string::npos)
303 s[p] = 'Å';
304 else
305 s += input[i];
307 break;
309 case '9':
310 p = s.rfind('d');
311 if (p != string::npos)
312 s[p] = 'ð';
313 else {
314 p = s.rfind('D');
315 if (p != string::npos)
316 s[p] = 'Ð';
317 else
318 s += input[i];
320 break;
322 default:
323 s += input[i];
327 s[0] = diacritic;
328 Syllable syll;
329 if (syll.parse(s.c_str()))
330 return syll.to_str();
331 else
332 return input;
335 string telex_recover(const char *input)
337 string s;
338 uint p,i,n = strlen(input);
339 char diacritic = '0';
340 bool ok;
342 s = "0"; // diacritic placeholder
344 for (i = 0;i < n;i ++) {
345 switch (tolower(input[i])) {
346 case 's': diacritic = '1'; break;
347 case 'f': diacritic = '2'; break;
348 case 'r': diacritic = '3'; break;
349 case 'x': diacritic = '4'; break;
350 case 'j': diacritic = '5'; break;
352 case 'w':
353 ok = false;
355 p = s.rfind('u');
356 if (p != string::npos) {
357 s[p] = 'ß';
358 ok = true;
359 } else {
360 p = s.rfind('U');
361 if (p != string::npos) {
362 s[p] = '¿';
363 ok = true;
367 p = s.rfind('o');
368 if (p != string::npos) {
369 s[p] = '½';
370 ok = true;
371 } else {
372 p = s.rfind('O');
373 if (p != string::npos) {
374 s[p] = '´';
375 ok = true;
379 p = s.rfind('a');
380 if (p != string::npos) {
381 s[p] = 'å';
382 ok = true;
383 } else {
384 p = s.rfind('A');
385 if (p != string::npos) {
386 s[p] = 'Å';
387 ok = true;
391 if (!ok)
392 s += 'ß';
393 break;
395 case '[': s += '½'; break;
396 case ']': s += 'ß'; break;
398 case 'a':
399 if (!s.empty() && tolower(s[s.size()-1]) == 'a')
400 s[s.size()-1] = s[s.size()-1] == 'a' ? 'â' : 'Â';
401 else
402 s += input[i];
403 break;
405 case 'e':
406 if (!s.empty() && tolower(s[s.size()-1]) == 'e')
407 s[s.size()-1] = s[s.size()-1] == 'e' ? 'ê' : 'Ê';
408 else
409 s += input[i];
410 break;
412 case 'o':
413 if (!s.empty() && tolower(s[s.size()-1]) == 'o')
414 s[s.size()-1] = s[s.size()-1] == 'o' ? 'ô' : 'Ô';
415 else
416 s += input[i];
417 break;
419 case 'd':
420 if (!s.empty() && tolower(s[s.size()-1]) == 'd')
421 s[s.size()-1] = s[s.size()-1] == 'd' ? 'ð' : 'Ð';
422 else
423 s += input[i];
424 break;
426 default:
427 s += input[i];
432 s[0] = diacritic;
433 Syllable syll;
434 if (syll.parse(s.c_str()))
435 return syll.to_str();
436 else
437 return input;
440 void im_recover(const char *input,set<string> &output)
442 string s;
443 s = vni_recover(input);
444 if (s != input)
445 output.insert(s);
446 s = telex_recover(input);
447 if (s != input)
448 output.insert(s);