sc-train: Only load wordlist on first count, specified by --wordlist
[vspell.git] / utils / lattice2dot.cpp
blob3bda6ac1a40ab07522730cf776b683a7a11513ff
1 #include <fstream>
2 #include <iostream>
3 #include <algorithm>
4 #include <iterator>
5 #include <set>
6 #include "vspell.h"
7 #include "syllable.h"
8 #include "propername.h"
10 using namespace std;
12 void lattice_to_dot(ostream &os,const Lattice &w2,bool spare,bool edge_value)
14 uint i,n;
15 const Sentence &st = *w2.st;
16 const WordEntries &wes = *w2.we;
17 n = wes.size();
18 if (n == 0) return;
19 os << "digraph wordlattice {" << endl;
20 os << "\trankdir=LR;" << endl;
21 os << "\tstyle=invis;" << endl;
22 os << "\thead;" << endl;
23 os << "\ttail;" << endl;
24 //set<strid> nodes;
25 int old_pos = -1;
26 int cc;
27 int anchor[st.get_syllable_count()];
28 for (i = 0;i < n;i ++) {
29 //if (nodes.find(wes[i].node.node->get_id()) == nodes.end()) {
30 //nodes.insert(wes[i].node.node->get_id());
31 if (wes[i].pos != old_pos) {
32 if (wes[i].pos) {
33 os << "\t}" << endl;
35 os << "\tsubgraph cluster_" << wes[i].pos << " {" << endl;
36 old_pos = wes[i].pos;
37 cc = 0;
40 if (spare && cc++ == w2.get_we(wes[i].pos).size()/2)
41 //os << "\tanchor_" << wes[i].pos << " [shape=\"point\"];" << endl;
42 anchor[wes[i].pos] = i;
44 os << "\tn" << i << " [label=\"";
45 std::vector<strid> syll;
46 if (wes[i].node.node) {
47 wes[i].node.node->get_syllables(syll);
48 for (std::vector<strid>::size_type ii = 0;ii < syll.size();ii ++) {
49 if (i)
50 os << " ";
51 Syllable sy;
52 if (sy.parse(get_ngram()[syll[ii]]))
53 os << sy.to_str();
54 else
55 os << get_ngram()[syll[ii]];
57 } else
58 os << "UNK";
59 os << "\"];" << endl;
61 //}
63 os << "\t}" << endl; // end of the last cluster
65 if (spare)
66 for (i = 0;i < st.get_syllable_count()-1;i ++) {
67 //os << "anchor_" << i << " -> anchor_" << (i+1) << " [style=invis, weight=10000];" << endl;
68 os << "n" << anchor[i] << " -> n" << anchor[i+1] << " [style=invis, weight=10000];" << endl;
71 VocabIndex vi[2];
72 vi[1] = 0;
73 float val;
74 int ii,nn;
75 for (i = 0;i < n;i ++) {
76 const WordEntry &we = wes[i];
78 if (we.pos == 0) {
79 if (edge_value) {
80 vi[0] = get_id(START_ID);
81 val = -get_ngram().wordProb(we.node.node->get_id(),vi);
82 os << "\thead -> n" << we.id << " [ label=\"" << val << "\"];" << endl;
83 } else
84 os << "\thead -> n" << we.id << ";" << endl;
86 if (we.pos+we.len >= w2.get_word_count()) {
87 if (edge_value) {
88 vi[0] = we.node.node->get_id();
89 val = -get_ngram().wordProb(get_id(STOP_ID),vi);
90 os << "\tn" << we.id << " -> tail [ label=\"" << val << "\"];" << endl;
91 } else
92 os << "\tn" << we.id << " -> tail;" << endl;
93 } else {
94 if (spare)
95 os << "\tn" << we.id << " -> n" << anchor[(we.pos+we.len)] << ";" << endl;
96 else {
97 const WordEntryRefs &wers = w2.get_we(we.pos+we.len);
98 nn = wers.size();
99 for (ii = 0;ii < nn; ii ++) {
100 if (edge_value) {
101 vi[0] = we.node.node->get_id();
102 val = -get_ngram().wordProb(wers[ii]->node.node->get_id(),vi);
103 os << "\tn" << we.id << " -> n" << wers[ii]->id << " [label=\"" << val << "\"];" << endl;
104 } else
105 os << "\tn" << we.id << " -> n" << wers[ii]->id << ";" << endl;
111 os << "}" << endl;
114 int main(int argc,char **argv)
116 bool spare = false;
117 bool edge_value = false;
119 for (int i = 1;i < argc;i ++) {
120 if (!strcmp(argv[i],"--spare")) {
121 spare = true;
122 continue;
124 if (!strcmp(argv[i],"--edge")) {
125 if (i+1 == argc) {
126 cerr << "ngram is needed for --edge" << endl;
127 return -1;
129 if (!get_ngram().read(argv[i+1])) {
130 cerr << "ngram " << argv[i+1] << " failed to load" << endl;
131 return -2;
133 edge_value = true;
134 i++;
135 continue;
137 cerr << "Unknown parameter " << argv[i] << endl;
140 dic_init();
141 warch.load("wordlist");
143 int count = 0;
144 while (!cin.eof()) {
145 if (++count % 200 == 0) cerr << count << endl;
146 Lattice l;
147 cin >> l;
148 lattice_to_dot(cout,l,spare,edge_value);
150 return 0;