terrible bug in PenaltyDAG and Penalty2DAG.
[vspell.git] / libvspell / pfs.cpp
blobdf4f556050991b2418ca53242cedb569a306f9d8
1 #include "pfs.h" // -*- tab-width: 2 -*-
3 using namespace std;
5 class HeapValue : public vector<float> {
6 public:
7 bool operator() (uint v1,uint v2) {
8 return (*this)[v1] > (*this)[v2];
14 void PFS::segment_best(const Lattice &w,Segmentation &seps)
16 vector<uint> last;
17 vector<bool> seen; // true if a node is seen
18 vector<uint> candidates; // examining nodes
19 HeapValue val;
21 int n = w.get_word_count();
23 seps.we = w.we;
25 val.resize(w.we->size()+1);
26 last.resize(w.we->size()+1);
27 seen.resize(w.we->size()+1);
28 candidates.reserve(w.we->size()+1);
30 const WordEntryRefs &wers_start = w.get_we(0);
31 int ii,nn = wers_start.size();
32 for (ii = 0; ii < nn;ii ++) {
33 int v = wers_start[ii]->id;
34 candidates.push_back(v);
35 push_heap(candidates.begin(),candidates.end(),val);
36 seen[v] = true;
37 val[v] = 0;
38 last[v] = v;
41 // while there is a node to examine
42 while (!candidates.empty()) {
44 // get a node
45 pop_heap(candidates.begin(),candidates.end(),val);
46 int v = candidates.back();
47 candidates.pop_back();
49 //cerr << "got " << (*w.we)[v].node << " " << val[v] << endl;
51 int next = (*w.we)[v].pos+(*w.we)[v].len;
53 if (next >= n) {
54 int vv = w.we->size();
55 if (!seen[vv]) {
56 seen[vv] = true;
57 val[vv] = val[v];
58 last[vv] = v;
59 //cerr << " end " << val[vv] << "=" << v << endl;
60 } else {
61 if (val[vv] > val[v]) {
62 val[vv] = val[v];
63 last[vv] = v;
64 //cerr << " new end " << val[vv] << "=" << v << endl;
67 continue;
70 const WordEntryRefs &wers = w.get_we(next);
71 uint vv;
72 nn = wers.size();
73 float value,add;
74 VocabIndex vi[2];
75 vi[1] = Vocab_None;
76 vi[0] = (*w.we)[v].node.node->get_id();
77 for (ii = 0;ii < nn;ii ++) {
78 vv = wers[ii]->id;
79 //value = val[v]+wers[ii]->node.node->get_prob();
80 //vi[0] = (*w.we)[last[v]].node.node->get_id();
81 add = (-get_ngram().wordProb((*w.we)[vv].node.node->get_id(),vi));
82 value = val[v] + add;
83 //cerr << "examine " << vv << "(" << wers[ii]->node << ")";
85 if (!seen[vv]) {
86 candidates.push_back(vv);
87 seen[vv] = true;
88 val[vv] = value;
89 push_heap(candidates.begin(),candidates.end(),val);
90 last[vv] = v;
91 //cerr << " add " << val[vv] << "=" << v << "+"<< add;
92 } else {
93 if (val[vv] > value) {
94 val[vv] = value;
95 last[vv] = v;
97 // re-heap if necessary
98 vector<uint>::iterator iter = find(candidates.begin(),candidates.end(),vv);
99 while (true) {
100 push_heap(candidates.begin(),iter,val);
101 if (iter != candidates.end())
102 ++iter;
103 else
104 break;
106 //cerr << " val " << val[vv] << "=" << v << "+"<< add;
109 //cerr << endl;
113 const WordEntries &we = *w.we;
114 uint v = we.size();
115 while (true) {
116 //cerr << v << "->" << last[v] << endl;
117 if (last[v] != v) {
118 v = last[v];
119 seps.push_back(v);
120 } else
121 break;
123 reverse(seps.begin(),seps.end());
125 //cerr << "done" << endl;
129 void PFS::search(const DAG &dag,Path &seps)
131 vector<uint> last;
132 vector<bool> seen; // true if a node is seen
133 vector<uint> candidates; // examining nodes
134 uint v;
135 HeapValue val;
136 uint n = dag.node_count();
138 val.resize(n+1);
139 last.resize(n+1);
140 seen.resize(n+1);
141 candidates.reserve(n+1);
143 vector<uint> next_nodes;
144 v = dag.node_begin();
145 last[v] = v;
146 val[v] = 0;
147 seen[v] = true;
148 candidates.push_back(v);
149 int ii,nn = next_nodes.size();
151 dag.get_next(dag.node_begin(),next_nodes);
152 int ii,nn = next_nodes.size();
153 for (ii = 0; ii < nn;ii ++) {
154 v = next_nodes[ii];
155 candidates.push_back(v);
156 push_heap(candidates.begin(),candidates.end(),val);
157 seen[v] = true;
158 val[v] = 0;
159 last[v] = dag.node_begin();
163 // while there is a node to examine
164 while (!candidates.empty()) {
166 // get a node
167 pop_heap(candidates.begin(),candidates.end(),val);
168 v = candidates.back();
169 candidates.pop_back();
171 //cerr << "check " << v << " " << val[v] << endl;
173 next_nodes.clear();
174 dag.get_next(v,next_nodes);
176 uint vv;
177 nn = next_nodes.size();
178 float value,add;
179 for (ii = 0;ii < nn;ii ++) {
180 vv = next_nodes[ii];
181 add = dag.edge_value(v,vv);
182 value = val[v] + add;
183 //cerr << "examine " << vv << "(" << vv << ")";
185 if (!seen[vv]) {
186 candidates.push_back(vv);
187 seen[vv] = true;
188 val[vv] = value;
189 push_heap(candidates.begin(),candidates.end(),val);
190 last[vv] = v;
191 //cerr << " add " << val[vv] << "=" << v << "+"<< add;
192 } else {
193 if (val[vv] > value) {
194 val[vv] = value;
195 last[vv] = v;
197 // re-heap if necessary
198 vector<uint>::iterator iter = find(candidates.begin(),candidates.end(),vv);
199 while (true) {
200 push_heap(candidates.begin(),iter,val);
201 if (iter != candidates.end())
202 ++iter;
203 else
204 break;
206 //cerr << " val " << val[vv] << "=" << v << "+"<< add;
209 //cerr << endl;
213 v = dag.node_end();
214 if (!seen[v])
215 return; // ARGH! NO WAY TO THE END!!!
217 do {
218 seps.push_back(v);
219 v = last[v];
220 } while (v != last[v]);
221 seps.push_back(v);
222 reverse(seps.begin(),seps.end());
224 //cerr << "done" << endl;