Fixed leaks in LM::operator[](const char *) and LM::clear_oov()
[vspell.git] / libvspell / warch.cpp
blob1864568eb2d530c5f4981ac40d228cad6a5bc6b4
1 #include "wordnode.h" // -*- tab-width: 2 coding: viscii mode: c++ -*-
2 #include "syllable.h"
3 #include <utility>
4 #include <fstream>
5 #include <iostream>
6 #include <boost/format.hpp>
8 using namespace std;
9 static strid mainleaf_id,caseleaf_id;
10 std::map<strid,LeafNNode*> LeafNNode::leaf_index;
12 LeafNNode* BranchNNode::get_leaf(strid leaf) const
14 node_map::const_iterator iter;
15 iter = nodes.find(leaf);
16 if (iter != nodes.end())
17 return((LeafNNode*)iter->second.get());
18 else
19 return NULL;
22 void BranchNNode::get_leaves(std::vector<LeafNNode*> &_nodes) const
24 const vector<strid> leaf_id = warch.get_leaf_id();
25 node_map::const_iterator iter;
26 uint i,n = leaf_id.size();
27 for (i = 0;i < n;i ++) {
28 iter = nodes.find(leaf_id[i]);
29 if (iter != nodes.end())
30 _nodes.push_back((LeafNNode*)iter->second.get());
34 void BranchNNode::get_branches(strid _id,std::vector<BranchNNode*> &_nodes) const
36 const_np_range range;
37 range = nodes.equal_range(_id);
38 node_map::const_iterator iter;
39 for (iter = range.first;iter != range.second; ++iter)
40 if (!iter->second->is_leaf())
41 _nodes.push_back((BranchNNode*)iter->second.get());
44 BranchNNode* BranchNNode::get_branch(strid _id) const
46 const_np_range range;
47 range = nodes.equal_range(_id);
48 node_map::const_iterator iter;
49 for (iter = range.first;iter != range.second; ++iter)
50 if (!iter->second->is_leaf())
51 return (BranchNNode*)iter->second.get();
52 return NULL;
55 void BranchNNode::add(strid _id,NNodeRef _branch)
57 nodes.insert(make_pair(_id,_branch));
60 BranchNNode* BranchNNode::add_path(const std::vector<strid> &toks)
62 uint i,n = toks.size();
63 BranchNNode *me = this;
64 for (i = 0;i < n;i ++) {
65 BranchNNode *next = me->get_branch(toks[i]);
66 if (next == NULL) {
67 NNodeRef branch(new BranchNNode());
68 me->add(toks[i],branch);
69 next = (BranchNNode*)branch.get();
71 me = next;
73 return me;
76 void WordArchive::init()
78 mainleaf_id = sarch["<mainleaf>"];
79 caseleaf_id = sarch["<caseleaf>"];
80 register_leaf(mainleaf_id);
81 register_leaf(caseleaf_id);
84 bool WordArchive::load(const char* filename)
86 if (filename != NULL) {
87 ifstream ifs(filename);
89 if (!ifs.is_open())
90 return false;
92 string word;
93 while (ifs >> word) {
94 add_entry(word.c_str());
95 add_case_entry(word.c_str());
98 else {
99 const lm_t * lm = get_ngram().get_lm();
100 for (int i = 0;i < lm->ucount;i ++) {
101 add_entry(lm->word_str[i]);
102 add_case_entry(lm->word_str[i]);
105 return true;
108 LeafNNode* WordArchive::add_special_entry(strid tok)
110 LeafNNode *leaf = new LeafNNode;
111 NNodeRef noderef(leaf);
112 vector<strid> toks;
113 toks.push_back(tok);
114 leaf->set_id(toks);
115 //leaf->set_mask(MAIN_LEAF);
116 get_root()->add(tok,noderef);
117 return leaf;
120 void WordArchive::add_entry(const char *w)
122 unsigned len,wlen;
123 const char *pos,*start;
124 char *buf;
125 len = strlen(w);
126 start = pos = w;
127 buf = (char *)malloc(len+1);
128 vector<VocabIndex> syllables;
129 while (pos) {
130 pos = strchr(start,'_');
131 wlen = pos ? pos - start : len - (start - w);
132 memcpy(buf,start,wlen);
133 buf[wlen] = '\0';
134 VocabIndex id = sarch[buf];
135 syllables.push_back(id);
136 start = pos+1;
138 free(buf);
140 vector<strid> path = syllables;
141 BranchNNode* branch = get_root()->add_path(path);
142 NNodeRef noderef(new LeafNNode);
143 LeafNNode *leaf = (LeafNNode*)noderef.get();
144 //leaf->set_mask(MAIN_LEAF);
145 branch->add(mainleaf_id,noderef);
146 leaf->set_id(syllables);
149 void WordArchive::add_case_entry(const char *w2)
151 unsigned i,same,len,wlen;
152 const char *pos,*start;
153 char *buf;
154 char *w;
155 len = strlen(w2);
156 w = (char *)malloc(len+1);
157 same = 1;
158 for (i = 0;i < len;i ++) {
159 w[i] = (char)viet_tolower(w2[i]);
160 if (same && w[i] != w2[i])
161 same = 0;
163 if (same) {
164 free(w);
165 return;
167 w[len] = '\0';
168 buf = (char *)malloc(len+1);
169 vector<VocabIndex> syllables,real_syllables;
170 start = pos = w;
171 while (pos) {
172 pos = strchr(start,'_');
173 wlen = pos ? pos - start : len - (start - w);
174 memcpy(buf,start,wlen);
175 buf[wlen] = '\0';
176 VocabIndex id = sarch[buf];
177 syllables.push_back(id);
178 start = pos+1;
180 free(w);
182 start = pos = w2;
183 while (pos) {
184 pos = strchr(start,'_');
185 wlen = pos ? pos - start : len - (start - w2);
186 memcpy(buf,start,wlen);
187 buf[wlen] = '\0';
188 VocabIndex id = sarch[buf];
189 real_syllables.push_back(id);
190 start = pos+1;
192 free(buf);
194 vector<strid> path = syllables;
195 BranchNNode* branch = get_root()->add_path(path);
196 NNodeRef noderef(new LeafNNode);
197 LeafNNode *leaf = (LeafNNode*)noderef.get();
198 //leaf->set_mask(CASE_LEAF);
199 branch->add(caseleaf_id,noderef);
200 leaf->set_id(real_syllables);
203 void WordArchive::register_leaf(strid id)
205 if (find(leaf_id.begin(),leaf_id.end(),id) == leaf_id.end())
206 leaf_id.push_back(id);
209 void LeafNNode::set_mask(uint maskval,bool mask)
211 if (mask)
212 bitmask |= maskval;
213 else
214 bitmask &= ~maskval;
217 void LeafNNode::set_id(const vector<strid> &_syllables)
219 syllables = _syllables;
220 string word;
221 int i,nr_syllables = syllables.size();
222 for (i = 0;i < nr_syllables;i ++) {
223 if (i)
224 word += "_";
225 word += sarch[syllables[i]];
227 id = sarch[word];
228 leaf_index[id] = this;
231 LeafNNode* LeafNNode::find_leaf(const vector<strid> &syllables)
233 string word;
234 int i,nr_syllables = syllables.size();
235 for (i = 0;i < nr_syllables;i ++) {
236 if (i)
237 word += "_";
238 word += sarch[syllables[i]];
240 strid id = sarch[word];
241 map<strid,LeafNNode*>::iterator iter = leaf_index.find(id);
242 return iter != leaf_index.end() ? iter->second : NULL;
245 std::ostream& operator << (std::ostream &os,const LeafNNode &node)
247 std::vector<strid> syll;
248 node.get_syllables(syll);
249 os << boost::format("%04x %d") % node.bitmask % syll.size();
250 for (std::vector<strid>::size_type i = 0;i < syll.size();i ++) {
251 os << " ";
252 os << sarch[syll[i]];
254 return os;
257 std::istream& operator >> (std::istream &is,LeafNNode* &node)
259 std::vector<strid> syll;
260 int n;
261 uint bitmask;
262 is >> hex >> bitmask >> dec >> n;
263 syll.resize(n);
264 for (std::vector<strid>::size_type i = 0;i < syll.size();i ++) {
265 string s;
266 is >> s;
267 syll[i] = get_ngram()[s];
269 node = LeafNNode::find_leaf(syll);
270 return is;