8 #include "propername.h"
12 void apply_separators(const Sentence
&st
,set
<WordEntry
> &wes
,vector
<unsigned> &seps
)
14 sort(seps
.begin(),seps
.end());
15 //copy(seps1.begin(),seps1.end(),inserter(seps,seps.begin()));
17 int i
,n
= st
.get_syllable_count();
19 for (i
= 0;i
< n
-1 && sep
< seps
.size();i
++) {
20 int p
= offset
+st
[i
].start
+strlen(get_sarch()[st
[i
].get_id()]);
21 if (p
<= seps
[sep
] && seps
[sep
] <= offset
+st
[i
+1].start
) {
22 apply_separator(wes
,i
);
28 void lattice_to_dot(ostream
&os
,Lattice
&w2
,bool spare
,bool has_seps
,bool edge_value
)
31 const Sentence
&st
= *w2
.st
;
32 WordEntries
&wes
= *w2
.we
;
34 os
<< "digraph wordlattice {" << endl
;
35 os
<< "\trankdir=LR;" << endl
;
36 os
<< "\tstyle=invis;" << endl
;
37 os
<< "\thead;" << endl
;
38 os
<< "\ttail;" << endl
;
42 int anchor
[st
.get_syllable_count()];
43 for (i
= 0;i
< n
;i
++) {
44 //if (nodes.find(wes[i].node.node->get_id()) == nodes.end()) {
45 //nodes.insert(wes[i].node.node->get_id());
46 if (wes
[i
].pos
!= old_pos
) {
50 os
<< "\tsubgraph cluster_" << wes
[i
].pos
<< " {" << endl
;
55 if (spare
&& cc
++ == w2
.get_we(wes
[i
].pos
).size()/2)
56 //os << "\tanchor_" << wes[i].pos << " [shape=\"point\"];" << endl;
57 anchor
[wes
[i
].pos
] = i
;
59 os
<< "\tn" << i
<< " [label=\"";
60 std::vector
<strid
> syll
;
61 if (wes
[i
].node
.node
) {
62 wes
[i
].node
.node
->get_syllables(syll
);
63 for (std::vector
<strid
>::size_type ii
= 0;ii
< syll
.size();ii
++) {
67 if (sy
.parse(get_sarch()[syll
[ii
]]))
70 os
<< get_sarch()[syll
[ii
]];
78 os
<< "\t}" << endl
; // end of the last cluster
81 for (i
= 0;i
< st
.get_syllable_count()-1;i
++) {
82 //os << "anchor_" << i << " -> anchor_" << (i+1) << " [style=invis, weight=10000];" << endl;
83 os
<< "n" << anchor
[i
] << " -> n" << anchor
[i
+1] << " [style=invis, weight=10000];" << endl
;
90 for (i
= 0;i
< n
;i
++) {
91 WordEntry
&we
= wes
[i
];
95 vi
[0] = get_id(START_ID
);
96 val
= -get_ngram().wordProb(we
.node
.node
->get_id(),vi
);
97 os
<< "\thead -> n" << we
.id
<< " [ label=\"" << val
<< "\"];" << endl
;
99 os
<< "\thead -> n" << we
.id
<< ";" << endl
;
101 if (we
.pos
+we
.len
>= w2
.get_word_count()) {
103 vi
[0] = we
.node
.node
->get_id();
104 val
= -get_ngram().wordProb(get_id(STOP_ID
),vi
);
105 os
<< "\tn" << we
.id
<< " -> tail [ label=\"" << val
<< "\"];" << endl
;
107 os
<< "\tn" << we
.id
<< " -> tail;" << endl
;
110 os
<< "\tn" << we
.id
<< " -> n" << anchor
[(we
.pos
+we
.len
)] << ";" << endl
;
112 const WordEntryRefs
&wers
= w2
.get_we(we
.pos
+we
.len
);
114 for (ii
= 0;ii
< nn
; ii
++) {
116 vi
[0] = we
.node
.node
->get_id();
117 val
= -get_ngram().wordProb(wers
[ii
]->node
.node
->get_id(),vi
);
118 os
<< "\tn" << we
.id
<< " -> n" << wers
[ii
]->id
<< " [label=\"" << val
<< "\"];" << endl
;
120 os
<< "\tn" << we
.id
<< " -> n" << wers
[ii
]->id
<< ";" << endl
;
129 void total_combinations(ostream
&os
,Lattice
&w
)
131 WordEntries
&wes
= *w
.we
;
132 unsigned long long nn
= wes
.size();
133 vector
<unsigned long long> val(nn
);
135 vector
<vector
<uint
> > prev
;
136 int i
,n
= w
.get_word_count(),v
,vv
;
141 for (i
= 0;i
< n
;i
++) {
142 const WordEntryRefs
&wers
= w
.get_we(i
);
143 int ii
,nn
= wers
.size();
144 for (ii
= 0;ii
< nn
;ii
++) {
145 // wers[ii] is the first node (W).
149 int next
= wers
[ii
]->pos
+wers
[ii
]->len
;
151 const WordEntryRefs
&wers2
= w
.get_we(next
);
152 int iii
,nnn
= wers2
.size();
153 for (iii
= 0;iii
< nnn
;iii
++) {
154 //wers2[iii] is the second node (W).
156 prev
[vv
].push_back(v
);
162 unsigned long long final_val
= 0;
163 for (i
= 0;i
< n
;i
++) {
164 const WordEntryRefs
&wers
= w
.get_we(i
);
165 int ii
,nn
= wers
.size();
166 for (ii
= 0;ii
< nn
;ii
++) {
167 // wers[ii] is the first node (W).
169 int iii
,nnn
= prev
[v
].size();
170 for (iii
= 0;iii
< nnn
;iii
++) {
171 os
<< v
<< "(" << val
[v
] << ") <- " << prev
[v
][iii
] << "(" << val
[prev
[v
][iii
]] << ")" << endl
;
172 val
[v
] += val
[prev
[v
][iii
]];
174 if (wers
[ii
]->pos
+wers
[ii
]->len
== w
.get_word_count()) {
176 os
<< "Final: " << final_val
<< endl
;
180 cout
<< final_val
<< endl
;
183 int main(int argc
,char **argv
)
189 bool has_seps
= false;
190 bool edge_value
= false;
191 bool total_comb
= false;
194 vector
<unsigned> seps
;
196 for (i
= 1;i
< argc
;i
++) {
197 if (!strcmp(argv
[i
],"nofuzzy")) fuzzy
= false;
198 if (!strcmp(argv
[i
],"dot")) dot
= true;
199 if (!strcmp(argv
[i
],"spare")) spare
= true;
200 if (!strcmp(argv
[i
],"seps")) has_seps
= true;
201 if (!strcmp(argv
[i
],"edgeval")) edge_value
= true;
202 if (!strcmp(argv
[i
],"total_comb")) total_comb
= true;
207 cerr
<< "Loading... ";
208 warch
.load("wordlist");
209 File
f("ngram","rt",0);
212 cerr
<< "done" << endl
;
214 get_sarch().set_blocked(true);
216 //wfst.set_wordlist(get_root());
219 while (getline(cin
,s
)) {
220 if (s
.empty()) continue;
224 while ((p
= s
.find('|')) != string::npos
) {
235 WordStateFactories factories
;
236 ExactWordStateFactory exact
;
237 LowerWordStateFactory lower
;
238 FuzzyWordStateFactory ffuzzy
;
239 factories
.push_back(&exact
);
240 factories
.push_back(&lower
);
242 factories
.push_back(&ffuzzy
);
243 w2
.pre_construct(st
,wes
,factories
);
244 mark_proper_name(st
,wes
);
246 apply_separators(st
,wes
,seps
);
247 w2
.post_construct(wes
);
248 //w2.based_on(words);
250 total_combinations(cout
,w2
);
255 lattice_to_dot(cout
,w2
,spare
,has_seps
,edge_value
);
257 get_sarch().clear_rest();