1 #include "config.h" // -*- tab-width: 2 -*-
15 1. Sentence segmentation. (sentences_split)
16 2. Separate "words" by spaces. (tokenize)
17 3. Punctuation separation. (tokenize/tokenize_punctuation)
18 4. Foreign/Abbreviation detection.
19 5. Proper name detection.
20 6. Generalization (into class e.g. number_class, foreign_class ...). Try to
21 generalize all capitalized words.
22 6* Syllable checking. (check1)
23 7. Find all possible (misspelled) words. (**) (get_all_words)
24 8. "pre-separate" sentence into phrases.
25 9. Word segmentation. (**)
26 10. Find the best segmentation. (segment_best)
27 10* Word checking. (check2)
33 void spell_check1(Sentence
&st
,Suggestions
&sugg
)
35 int i
,n
= st
.get_syllable_count();
36 for (i
= 0;i
< n
;i
++) {
37 strid id
= st
[i
].get_cid();
38 if (sarch
.in_dict(id
))
43 VocabString s
= sarch
[id
];
44 if (strlen(s
) == 1 && !viet_isalpha(s
[0])) {
45 st
[i
].sid
= sarch
["<PUNCT>"];
55 void spell_check2(Sentence
&st
,Segmentation
&seg
,Suggestions
&sugg
)
60 for (i
= 0;i
< n
;i
++) {
62 int len
= seg
[i
].node
->get_syllable_count();
69 WordNodePtr
node(get_root());
70 for (start
= 0;start
< len
&& node
!= NULL
; start
++)
71 node
= node
->get_next(st
[start
+cc
].cid
);