softcount: tolerate zero ngrams
[vspell.git] / libvspell / stridtree.cpp
bloba3c0c236a8b023758caa9d96c2adbb2397be22e8
1 #include "stridtree.h"
3 using namespace std;
5 void BranchNode::get_leaves(std::vector<LeafNode*> &_nodes) const
7 const_np_range range;
8 range = nodes.equal_range(sarch["<leaf>"]);
9 node_map::const_iterator iter;
10 for (iter = range.first;iter != range.second; ++iter)
11 if (iter->second->is_leaf())
12 _nodes.push_back((LeafNode*)iter->second.get());
15 void BranchNode::get_branches(strid _id,std::vector<BranchNode*> &_nodes) const
17 const_np_range range;
18 range = nodes.equal_range(_id);
19 node_map::const_iterator iter;
20 for (iter = range.first;iter != range.second; ++iter)
21 if (!iter->second->is_leaf())
22 _nodes.push_back((BranchNode*)iter->second.get());
25 BranchNode* BranchNode::get_branch(strid _id) const
27 const_np_range range;
28 range = nodes.equal_range(_id);
29 node_map::const_iterator iter;
30 for (iter = range.first;iter != range.second; ++iter)
31 if (!iter->second->is_leaf())
32 return (BranchNode*)iter->second.get();
33 return NULL;
36 void BranchNode::add(strid _id,NodeRef _branch)
38 nodes.insert(make_pair(_id,_branch));
41 BranchNode* BranchNode::add_path(std::vector<strid> toks)
43 uint i,n = toks.size();
44 BranchNode *me = this;
45 for (i = 0;i < n;i ++) {
46 BranchNode *next = me->get_branch(toks[i]);
47 if (next == NULL) {
48 NodeRef branch(new BranchNode());
49 me->add(toks[i],branch);
50 next = (BranchNode*)branch.get();
52 me = next;
54 return me;
57 bool WordArchive::load(const char* filename)
59 File ifs(filename,"rt");
61 if (ifs.error())
62 return false;
64 int nr_lines = 0;
65 char *line;
66 int start,len,tmp_char;
67 string::size_type pos;
68 char *str_pos;
69 vector<string> toks;
70 while ((line = ifs.getline())) {
71 start = 0;
72 len = strlen(line);
73 toks.clear();
74 while (start < len) {
75 str_pos = strchr(line+start,'#');
76 if (str_pos == NULL)
77 pos = len;
78 else
79 pos = str_pos - line;
80 tmp_char = line[pos];
81 line[pos] = 0;
82 toks.push_back(line+start);
83 line[pos] = tmp_char;
84 start = pos+1;
87 if (!toks.size())
88 continue; // unrecoverable error
89 if (toks.size() < 2)
90 toks.push_back("N"); // assume N
91 if (toks.size() < 3)
92 toks.push_back("0"); // assume 0
94 add_entry(toks);
96 nr_lines ++;
99 return true;
102 void WordArchive::add_special_entry(strid tok)
104 NodeRef noderef(new LeafNode);
105 LeafNode *leaf = (LeafNode*)noderef.get();
106 leaf->set_id(tok);
107 get_root()->add(sarch["<leaf>"],noderef);
110 void WordArchive::add_entry(vector<string> toks)
112 unsigned start,len,pos;
113 start = 0;
114 len = toks[0].size();
115 vector<VocabIndex> syllables;
116 while (start < len) {
117 pos = toks[0].find(' ',start);
118 if (pos == string::npos)
119 pos = len;
120 string s = toks[0].substr(start,pos-start);
121 VocabIndex id = sarch[get_dic_syllable(s)];
122 syllables.push_back(id);
123 start = pos+1;
126 vector<strid> path = syllables;
127 BranchNode* branch = get_root()->add_path(path);
128 NodeRef noderef(new LeafNode);
129 LeafNode *leaf = (LeafNode*)noderef.get();
130 branch->add("<leaf>",noderef);
132 // reconstruct word id
133 string word;
134 int i,nr_syllables = syllables.size();
135 for (i = 0;i < nr_syllables;i ++) {
136 if (i)
137 word += "_";
138 word += sarch[syllables[i]];
140 leaf->set_id(sarch[word]);