softcount: tolerate zero ngrams
[vspell.git] / libvspell / vspell.h
blobcf31539395a0cf5a5e4212a88c38cf163013fd62
1 #ifndef __VSPELL_H__ // -*- tab-width: 2 mode: c++ -*-
2 #define __VSPELL_H__
4 #ifndef __SPELL_H__
5 #include "spell.h"
6 #endif
7 #ifndef __SET__
8 #include <set>
9 #endif
10 #ifndef __VECTOR__
11 #include <vector>
12 #endif
13 #ifndef __STRING__
14 #include <string>
15 #endif
16 #ifndef __MYSTRING_H__
17 #include "mystring.h"
18 #endif
20 class VSpell;
22 class Text
24 protected:
25 VSpell *vspell;
27 virtual bool ui_syllable_check() = 0;
28 virtual bool ui_word_check() = 0;
29 bool syllable_check(int sentence_element_id);
30 void apply_separators(std::set<WordEntry> &wes);
31 void get_separators(std::vector<unsigned> &seps);
32 void penalty2_construct(Segmentation &s);
34 public:
35 int offset,length;
36 boost::shared_ptr<Sentence> st;
37 Lattice w;
38 Segmentation seg;
39 Suggestions suggestions;
41 Text(VSpell *v):vspell(v),st(new Sentence) {}
42 virtual ~Text() {}
44 virtual bool sentence_check(const char *pp);
45 virtual bool syllable_check();
46 virtual bool word_check();
48 int utf8_pos(unsigned pos);
49 void replace(unsigned from,unsigned size,const char *utf8_text);
50 std::string substr(unsigned from,unsigned size);
52 unsigned pos_from_syllable(const Suggestion &s);
53 unsigned pos_from_word(const Suggestion &s);
56 class TextFactory
58 public:
59 virtual Text* create(VSpell *) const = 0;
63 class VSpell
65 protected:
66 const TextFactory &text_factory;
67 std::set<strid> syllables;
68 std::set<strid_string> words;
69 std::vector<unsigned> separators;
70 std::string utf8_text;
71 std::string text;
72 float penalty_weight,penalty2_weight;
73 bool do_trigram,do_strict_word_checking;
75 public:
76 VSpell(const TextFactory &tf):
77 text_factory(tf),
78 penalty_weight(0),
79 do_trigram(false),
80 do_strict_word_checking(false)
82 virtual ~VSpell() { cleanup(); }
84 bool init();
85 virtual void cleanup() {}
86 bool check(const char *pp); // in utf-8 encoding
88 void set_penalty(float weight) {
89 penalty_weight = weight;
91 float get_penalty() const {
92 return penalty_weight;
95 void set_penalty2(float weight) {
96 penalty2_weight = weight;
98 float get_penalty2() const {
99 return penalty2_weight;
102 void set_strict_word_checking(bool t) {
103 do_strict_word_checking = t;
105 bool get_strict_word_checking() const {
106 return do_strict_word_checking;
109 void set_trigram(bool t) {
110 do_trigram = t;
112 bool get_trigram() const {
113 return do_trigram;
116 const std::string &get_utf8_text() const { return utf8_text; }
117 const std::string &get_text() const { return text; }
118 void replace(unsigned from,unsigned size,const char *utf8_text);
120 bool in_dict(strid id) const {
121 return syllables.find(id) != syllables.end();
123 bool in_dict(const strid_string &id) const {
124 return words.find(id) != words.end();
126 void add(strid id) {
127 syllables.insert(id);
129 void add_word(const char *s);
130 void add_separators(const std::vector<unsigned> &seps);
131 const std::vector<unsigned> &get_separators() const {
132 return separators;
136 class Candidates
138 private:
139 struct Candidate
141 std::string candidate;
142 float priority;
143 friend bool operator < (const Candidate &c1,const Candidate &c2) {
144 return c1.candidate < c2.candidate;
147 std::set<Candidate> candidates;
149 class CandidateComparator {
150 public:
151 const Candidates &c;
152 CandidateComparator(const Candidates &cc):c(cc) {}
153 bool operator()(const std::string &s1,const std::string &s2);
155 friend class CandidateComparator;
156 public:
157 void insert(const std::string &,float f = 0);
158 void get_list(std::vector<std::string> &);
162 void get_syllable_candidates(const char *input,Candidates &output,float val = 0);
163 void get_phonetic_syllable_candidates(const char *input,Candidates &output,float val = 0);
164 void get_left_syllable_candidates(const char *input,Candidates &output);
165 void get_right_syllable_candidates(const char *input,Candidates &output);
167 bool viet_utf8_to_viscii(const char *in,char *out); // pre-allocated
168 bool viet_utf8_to_viscii_force(const char *in,char *out); // pre-allocated
169 void viet_viscii_to_utf8(const char *in,char *out); // pre-allocated
170 char* viet_to_viscii(const char *in);
171 char* viet_to_viscii_force(const char *in);
172 char* viet_to_utf8(const char *in);
175 #endif