Prevent crashes when get node_begin from an empty DAG
[vspell.git] / utils / wfst-test.cpp
blob2feac5a8399f5496d50ea72a85e73aa0c170bb70
1 // -*- coding: viscii -*-
2 #include "pfs.h"
3 #include "bellman.h"
4 #include "distance.h"
5 #include "sentence.h"
6 #include <string>
7 #include <fstream>
8 #include <cmath>
9 #include <cstdio>
10 #include <boost/format.hpp>
11 #include "propername.h"
13 using namespace std;
15 void dag_to_dot(ostream &os,Lattice &w2,DAG &dag,bool edge_value);
16 void dag2_to_dot(ostream &os,Lattice &w2,WordDAG2 &dag,bool edge_value);
17 void print_all_words(const Lattice &words);
19 int main(int argc,char **argv)
21 bool fuzzy = true;
22 bool bellman = false;
23 bool trigram = false;
24 bool dot = false;
25 bool edge_value = false;
27 for (int i = 1;i < argc;i ++) {
28 if (!strcmp(argv[i],"nofuzzy")) fuzzy = false;
29 else if (!strcmp(argv[i],"bellman")) bellman = true;
30 else if (!strcmp(argv[i],"trigram")) trigram = true;
31 else if (!strcmp(argv[i],"dot")) dot = true;
32 else if (!strcmp(argv[i],"edge")) edge_value = true;
34 dic_init();
35 ed_init();
37 cerr << "Loading... ";
38 warch.load("wordlist");
39 File f("ngram","rt",0);
40 if (!f.error())
41 get_ngram().read(f);
42 cerr << "done" << endl;
44 get_sarch().set_blocked(true);
46 get_sarch().dump();
49 cerr << "Saving...";
50 get_root()->save("wordlist2.wl");
51 cerr << "done" << endl;
54 //wfst.set_wordlist(get_root());
55 vector<Sentence> sentences;
57 string s;
58 while (getline(cin,s)) {
59 if (!s.empty()) {
60 vector<string> ss;
61 sentences_split(s,ss);
62 //sentences.push_back(Sentence(s));
63 //Sentence &st = sentences.back();
64 for (int i = 0;i < ss.size();i ++) {
65 //cout << ss[i] << endl;
66 Sentence st(ss[i]);
67 st.standardize();
68 st.tokenize();
69 if (!st.get_syllable_count())
70 continue;
71 Lattice words;
72 set<WordEntry> wes;
73 WordStateFactories factories;
74 ExactWordStateFactory exact;
75 LowerWordStateFactory lower;
76 UpperWordStateFactory upper;
77 FuzzyWordStateFactory ffuzzy;
78 factories.push_back(&exact);
79 factories.push_back(&lower);
80 if (fuzzy) {
81 factories.push_back(&upper);
82 factories.push_back(&ffuzzy);
84 words.pre_construct(st,wes,factories);
85 mark_proper_name(st,wes);
86 words.post_construct(wes);
87 Segmentation seg(words.we);
89 if (argc > 2)
90 wfst.segment_best_no_fuzzy(words,seg);
91 else
92 wfst.segment_best(words,seg);
94 Path path;
95 WordDAG dagw(&words);
96 DAG *dag = &dagw;
97 WordDAG2 *dagw2;
98 if (trigram) {
99 dagw2 = new WordDAG2(&dagw);
100 dag = dagw2;
102 if (dot) {
103 if (trigram)
104 dag2_to_dot(cout,words,*dagw2,edge_value);
105 else
106 dag_to_dot(cout,words,*dag,edge_value);
107 } else {
108 if (bellman) {
109 Bellman wfst;
110 wfst.search(*dag,path);
111 } else {
112 PFS wfst;
113 wfst.search(*dag,path);
115 if (trigram) {
116 ((WordDAG2*)dag)->demangle(path);
117 delete (WordDAG2*)dag;
119 seg.resize(path.size()-2);
120 copy(path.begin()+1,path.end()-1,seg.begin());
121 seg.pretty_print(cout,st) << endl;
123 // sarch.clear_rest();
128 return 0;
131 void dag_to_dot(ostream &os,Lattice &w2,DAG &dag,bool edge_value)
133 uint i,n;
134 const Sentence &st = *w2.st;
135 WordEntries &wes = *w2.we;
136 n = dag.node_count();
137 os << "digraph wordlattice {" << endl;
138 os << "\trankdir=LR;" << endl;
139 os << "\tstyle=invis;" << endl;
141 for (i = 0;i < n;i ++) {
142 string label;
143 label = get_sarch()[dag.node_id(i)];
145 wes[i].node.node->get_syllables(syll);
146 for (std::vector<strid>::size_type ii = 0;ii < syll.size();ii ++) {
147 if (i)
148 os << " ";
149 Syllable sy;
150 if (sy.parse(get_sarch()[syll[ii]]))
151 os << sy.to_str();
152 else
153 os << get_sarch()[syll[ii]];
156 os << "\tn" << i << "[label=\"" << label << ";" << i << "\"];" << endl;
160 vector<uint> nexts;
161 vector<bool> done;
162 uint v,vv,l,ii,nn;
163 i = 0;
164 done.resize(dag.node_count());
165 nexts.clear();
166 nexts.push_back(dag.node_begin());
167 while (i < (l = nexts.size())) {
168 v = nexts[i++];
169 if (done[v])
170 continue;
171 else
172 done[v] = true;
173 dag.get_next(v,nexts);
174 nn = nexts.size();
175 for (ii = l;ii < nn;ii ++) {
176 if (edge_value)
177 os << "\tn" << v << " -> n" << nexts[ii] << "[label=\"" << dag.edge_value(v,nexts[ii]) << "\"];" << endl;
178 else
179 os << "\tn" << v << " -> n" << nexts[ii] << ";" << endl;
182 done.clear();
184 os << "}" << endl;
187 void dag2_to_dot(ostream &os,Lattice &w2,WordDAG2 &dag,bool edge_value)
189 uint i,n;
190 const Sentence &st = *w2.st;
191 WordEntries &wes = *w2.we;
192 n = dag.node_count();
193 WordDAG *wdag = dag.get_dag();
194 os << "digraph wordlattice {" << endl;
195 os << "\trankdir=LR;" << endl;
196 os << "\tstyle=invis;" << endl;
198 for (i = 0;i < n;i ++) {
199 string label;
200 uint n1,n2;
201 if (i == dag.node_begin() || i == dag.node_end())
202 label = get_sarch()[dag.node_id(i)];
203 else {
204 dag.node_dag_edge(i,n1,n2);
205 label = get_sarch()[wdag->node_id(n1)];
206 label += string(" ");
207 label += get_sarch()[wdag->node_id(n2)];
209 os << "\tn" << i << "[label=\"" << label << ";" << i << "\"];" << endl;
212 vector<uint> nexts;
213 vector<bool> done;
214 uint v,vv,l,ii,nn;
215 i = 0;
216 done.resize(dag.node_count());
217 nexts.clear();
218 nexts.push_back(dag.node_begin());
219 while (i < (l = nexts.size())) {
220 v = nexts[i++];
221 if (done[v])
222 continue;
223 else
224 done[v] = true;
225 dag.get_next(v,nexts);
226 nn = nexts.size();
227 for (ii = l;ii < nn;ii ++) {
228 if (edge_value)
229 os << "\tn" << v << " -> n" << nexts[ii] << "[label=\"" << dag.edge_value(v,nexts[ii]) << "\"];" << endl;
230 else
231 os << "\tn" << v << " -> n" << nexts[ii] << ";" << endl;
234 done.clear();
236 os << "}" << endl;