terrible bug in PenaltyDAG and Penalty2DAG.
[vspell.git] / libvspell / warch.cpp
blob3266772bc4b8d3ae78e258299f2a4f0d414c761d
1 #include "wordnode.h" // -*- tab-width: 2 coding: viscii mode: c++ -*-
2 #include "syllable.h"
3 #include <utility>
5 using namespace std;
6 static strid mainleaf_id,caseleaf_id;
8 LeafNode* BranchNode::get_leaf(strid leaf) const
10 node_map::const_iterator iter;
11 iter = nodes.find(leaf);
12 if (iter != nodes.end())
13 return((LeafNode*)iter->second.get());
14 else
15 return NULL;
18 void BranchNode::get_leaves(std::vector<LeafNode*> &_nodes) const
20 const vector<strid> leaf_id = warch.get_leaf_id();
21 node_map::const_iterator iter;
22 uint i,n = leaf_id.size();
23 for (i = 0;i < n;i ++) {
24 iter = nodes.find(leaf_id[i]);
25 if (iter != nodes.end())
26 _nodes.push_back((LeafNode*)iter->second.get());
30 void BranchNode::get_branches(strid _id,std::vector<BranchNode*> &_nodes) const
32 const_np_range range;
33 range = nodes.equal_range(_id);
34 node_map::const_iterator iter;
35 for (iter = range.first;iter != range.second; ++iter)
36 if (!iter->second->is_leaf())
37 _nodes.push_back((BranchNode*)iter->second.get());
40 BranchNode* BranchNode::get_branch(strid _id) const
42 const_np_range range;
43 range = nodes.equal_range(_id);
44 node_map::const_iterator iter;
45 for (iter = range.first;iter != range.second; ++iter)
46 if (!iter->second->is_leaf())
47 return (BranchNode*)iter->second.get();
48 return NULL;
51 void BranchNode::add(strid _id,NodeRef _branch)
53 nodes.insert(make_pair(_id,_branch));
56 BranchNode* BranchNode::add_path(std::vector<strid> toks)
58 uint i,n = toks.size();
59 BranchNode *me = this;
60 for (i = 0;i < n;i ++) {
61 BranchNode *next = me->get_branch(toks[i]);
62 if (next == NULL) {
63 NodeRef branch(new BranchNode());
64 me->add(toks[i],branch);
65 next = (BranchNode*)branch.get();
67 me = next;
69 return me;
72 WordArchive::WordArchive():root(new BranchNode)
74 mainleaf_id = sarch["<mainleaf>"];
75 caseleaf_id = sarch["<caseleaf>"];
76 register_leaf(mainleaf_id);
77 register_leaf(caseleaf_id);
80 bool WordArchive::load(const char* filename)
82 File ifs(filename,"rt");
84 if (ifs.error())
85 return false;
87 int nr_lines = 0;
88 char *line;
89 int start,len,tmp_char;
90 string::size_type pos;
91 char *str_pos;
92 vector<string> toks;
93 while ((line = ifs.getline())) {
94 start = 0;
95 len = strlen(line);
96 toks.clear();
97 while (start < len) {
98 str_pos = strchr(line+start,'#');
99 if (str_pos == NULL)
100 pos = len;
101 else
102 pos = str_pos - line;
103 tmp_char = line[pos];
104 line[pos] = 0;
105 toks.push_back(line+start);
106 line[pos] = tmp_char;
107 start = pos+1;
110 if (!toks.size())
111 continue; // unrecoverable error
112 if (toks.size() < 2)
113 toks.push_back("N"); // assume N
114 if (toks.size() < 3)
115 toks.push_back("0"); // assume 0
117 add_entry(toks);
118 add_case_entry(toks);
120 nr_lines ++;
123 return true;
126 LeafNode* WordArchive::add_special_entry(strid tok)
128 LeafNode *leaf = new LeafNode;
129 NodeRef noderef(leaf);
130 vector<strid> toks;
131 toks.push_back(tok);
132 leaf->set_id(toks);
133 //leaf->set_mask(MAIN_LEAF);
134 get_root()->add(tok,noderef);
135 return leaf;
138 void WordArchive::add_entry(vector<string> toks)
140 unsigned start,len,pos;
141 start = 0;
142 len = toks[0].size();
143 vector<VocabIndex> syllables;
144 while (start < len) {
145 pos = toks[0].find(' ',start);
146 if (pos == string::npos)
147 pos = len;
148 string s = toks[0].substr(start,pos-start);
149 VocabIndex id = sarch[get_std_syllable(s)];
150 syllables.push_back(id);
151 start = pos+1;
154 vector<strid> path = syllables;
155 BranchNode* branch = get_root()->add_path(path);
156 NodeRef noderef(new LeafNode);
157 LeafNode *leaf = (LeafNode*)noderef.get();
158 //leaf->set_mask(MAIN_LEAF);
159 branch->add(mainleaf_id,noderef);
160 leaf->set_id(syllables);
164 void WordArchive::add_case_entry(vector<string> toks2)
166 vector<string> toks = toks2;
167 unsigned start,len,pos;
168 start = 0;
169 len = toks[0].size();
171 transform(toks[0].begin(),toks[0].end(),toks[0].begin(),viet_tolower);
173 if (toks[0] == toks2[0])
174 return;
176 start = 0; // path
177 vector<VocabIndex> syllables;
178 while (start < len) {
179 pos = toks[0].find(' ',start);
180 if (pos == string::npos)
181 pos = len;
182 string s = toks[0].substr(start,pos-start);
183 VocabIndex id = sarch[get_std_syllable(s)];
184 syllables.push_back(id);
185 start = pos+1;
188 start = 0; // real syllable
189 vector<VocabIndex> real_syllables;
190 while (start < len) {
191 pos = toks2[0].find(' ',start);
192 if (pos == string::npos)
193 pos = len;
194 string s = toks2[0].substr(start,pos-start);
195 VocabIndex id = sarch[get_std_syllable(s)];
196 real_syllables.push_back(id);
197 start = pos+1;
200 vector<strid> path = syllables;
201 BranchNode* branch = get_root()->add_path(path);
202 NodeRef noderef(new LeafNode);
203 LeafNode *leaf = (LeafNode*)noderef.get();
204 //leaf->set_mask(CASE_LEAF);
205 branch->add(caseleaf_id,noderef);
206 leaf->set_id(real_syllables);
209 void WordArchive::register_leaf(strid id)
211 if (find(leaf_id.begin(),leaf_id.end(),id) == leaf_id.end())
212 leaf_id.push_back(id);
215 void LeafNode::set_mask(uint maskval,bool mask)
217 if (mask)
218 bitmask |= maskval;
219 else
220 bitmask &= ~maskval;
223 void LeafNode::set_id(const vector<strid> &_syllables)
225 syllables = _syllables;
226 string word;
227 int i,nr_syllables = syllables.size();
228 for (i = 0;i < nr_syllables;i ++) {
229 if (i)
230 word += "_";
231 word += sarch[syllables[i]];
233 id = sarch[word];
236 std::ostream& operator << (std::ostream &os,const LeafNode &node)
238 std::vector<strid> syll;
239 node.get_syllables(syll);
240 for (std::vector<strid>::size_type i = 0;i < syll.size();i ++) {
241 if (i)
242 os << " ";
243 os << sarch[syll[i]] << "(" << syll[i] << ") ";
245 return os;