terrible bug in PenaltyDAG and Penalty2DAG.
[vspell.git] / libvspell / vspell.h
blob7c34076d2301697a1738d7351383d8d7e7b3dbd7
1 #ifndef __VSPELL_H__ // -*- tab-width: 2 mode: c++ -*-
2 #define __VSPELL_H__
4 #ifndef __SPELL_H__
5 #include "spell.h"
6 #endif
7 #ifndef __SET__
8 #include <set>
9 #endif
10 #ifndef __VECTOR__
11 #include <vector>
12 #endif
13 #ifndef __STRING__
14 #include <string>
15 #endif
16 #ifndef __MYSTRING_H__
17 #include "mystring.h"
18 #endif
20 class VSpell;
22 class Text
24 protected:
25 VSpell *vspell;
27 virtual bool ui_syllable_check() = 0;
28 virtual bool ui_word_check() = 0;
29 bool syllable_check(int sentence_element_id);
30 void apply_separators(std::set<WordEntry> &wes);
31 void get_separators(std::vector<unsigned> &seps);
32 void penalty2_construct(Segmentation &s);
34 public:
35 int offset,length;
36 Sentence st;
37 Lattice w;
38 Segmentation seg;
39 Suggestions suggestions;
41 Text(VSpell *v):vspell(v) {}
42 virtual ~Text() {}
44 virtual bool sentence_check(const char *pp);
45 virtual bool syllable_check();
46 virtual bool word_check();
48 int utf8_pos(unsigned pos);
49 void replace(unsigned from,unsigned size,const char *utf8_text);
50 std::string substr(unsigned from,unsigned size);
52 unsigned pos_from_syllable(const Suggestion &s);
53 unsigned pos_from_word(const Suggestion &s);
56 class TextFactory
58 public:
59 virtual Text* create(VSpell *) const = 0;
63 class VSpell
65 protected:
66 const TextFactory &text_factory;
67 std::set<strid> syllables;
68 std::set<strid_string> words;
69 std::vector<unsigned> separators;
70 std::string utf8_text;
71 std::string text;
72 float penalty_weight,penalty2_weight;
73 bool do_normalization,do_trigram,do_strict_word_checking;
75 public:
76 VSpell(const TextFactory &tf):
77 text_factory(tf),
78 penalty_weight(0),
79 do_normalization(true),
80 do_trigram(false),
81 do_strict_word_checking(false)
83 virtual ~VSpell() { cleanup(); }
85 bool init();
86 virtual void cleanup() {}
87 bool check(const char *pp); // in utf-8 encoding
89 void set_penalty(float weight) {
90 penalty_weight = weight;
92 float get_penalty() const {
93 return penalty_weight;
96 void set_penalty2(float weight) {
97 penalty2_weight = weight;
99 float get_penalty2() const {
100 return penalty2_weight;
103 void set_normalization(bool t) {
104 do_normalization = t;
106 bool get_normalization() const {
107 return do_normalization;
110 void set_strict_word_checking(bool t) {
111 do_strict_word_checking = t;
113 bool get_strict_word_checking() const {
114 return do_strict_word_checking;
117 void set_trigram(bool t) {
118 do_trigram = t;
120 bool get_trigram() const {
121 return do_trigram;
124 const std::string &get_utf8_text() const { return utf8_text; }
125 const std::string &get_text() const { return text; }
126 void replace(unsigned from,unsigned size,const char *utf8_text);
128 bool in_dict(strid id) const {
129 return syllables.find(id) != syllables.end();
131 bool in_dict(const strid_string &id) const {
132 return words.find(id) != words.end();
134 void add(strid id) {
135 syllables.insert(id);
137 void add_word(const char *s);
138 void add_separators(const std::vector<unsigned> &seps);
139 const std::vector<unsigned> &get_separators() const {
140 return separators;
144 class Candidates
146 private:
147 struct Candidate
149 std::string candidate;
150 float priority;
151 friend bool operator < (const Candidate &c1,const Candidate &c2) {
152 return c1.candidate < c2.candidate;
155 std::set<Candidate> candidates;
157 class CandidateComparator {
158 public:
159 const Candidates &c;
160 CandidateComparator(const Candidates &cc):c(cc) {}
161 bool operator()(const std::string &s1,const std::string &s2);
163 friend class CandidateComparator;
164 public:
165 void insert(const std::string &,float f = 0);
166 void get_list(std::vector<std::string> &);
170 void get_syllable_candidates(const char *input,Candidates &output,float val = 0);
171 void get_phonetic_syllable_candidates(const char *input,Candidates &output,float val = 0);
172 void get_left_syllable_candidates(const char *input,Candidates &output);
173 void get_right_syllable_candidates(const char *input,Candidates &output);
175 bool viet_utf8_to_viscii(const char *in,char *out); // pre-allocated
176 bool viet_utf8_to_viscii_force(const char *in,char *out); // pre-allocated
177 void viet_viscii_to_utf8(const char *in,char *out); // pre-allocated
178 char* viet_to_viscii(const char *in);
179 char* viet_to_viscii_force(const char *in);
180 char* viet_to_utf8(const char *in);
183 #endif