1 // -*- tab-width: 2 -*-
11 #include <boost/format.hpp>
16 //NgramFractionalStats stats(sarch.get_dict(),2);
18 int main(int argc
,char **argv
)
21 fprintf(stderr
,"Need at least 2 argument.\n");
25 char *oldres
= argv
[1];
26 char *newres
= argv
[2];
33 cerr
<< "Loading... ";
34 //str = (boost::format("wordlist.%s") % oldres).str().c_str();
37 str
= (boost::format("ngram.%s") % oldres
).str().c_str();
42 cerr
<< "Ngram loading error..." << endl
;
43 cerr
<< "done" << endl
;
45 get_sarch().set_blocked(true);
47 //wfst.set_wordlist(get_root());
50 int i
,ii
,iii
,n
,nn
,nnn
,z
;
52 NgramStats
stats(get_sarch().get_dict(),3);
53 NgramStats
syllable_stats(get_sarch().get_dict(),2);
54 while (getline(cin
,s
)) {
57 cerr
<< count
<< endl
;
61 sentences_split(s
,ss
);
62 for (z
= 0;z
< ss
.size();z
++) {
66 if (!st
.get_syllable_count())
71 WordStateFactories factories
;
72 ExactWordStateFactory exact
;
73 LowerWordStateFactory lower
;
74 //FuzzyWordStateFactory fuzzy;
75 factories
.push_back(&exact
);
76 factories
.push_back(&lower
);
77 //factories.push_back(&fuzzy);
78 words
.pre_construct(sent
,wes
,factories
);
79 mark_proper_name(sent
,wes
);
80 words
.post_construct(wes
);
81 //cerr << words << endl;
82 Segmentation
seg(words
.we
);
84 /* // pfs don't distinguish
86 wfst.segment_best_no_fuzzy(words,seg);
88 wfst.segment_best(words,seg);
92 wfst
.search(dag
,path
);
93 seg
.resize(path
.size()-2);
94 copy(path
.begin()+1,path
.end()-1,seg
.begin());
96 //seg.pretty_print(cout,st) << endl;
99 n
= st
.get_syllable_count();
100 vi
= new VocabIndex
[n
+1];
102 for (i
= 0;i
< n
;i
++)
103 vi
[i
] = st
[i
].get_cid();
104 syllable_stats
.countSentence(vi
);
108 vi
= new VocabIndex
[n
+1];
110 for (i
= 0;i
< n
;i
++) {
111 if (path
[i
] == dag
.node_begin())
112 vi
[i
] = get_id(START_ID
);
113 else if (path
[i
] == dag
.node_end())
114 vi
[i
] = get_id(STOP_ID
);
116 vi
[i
] = ((WordEntry
*)dag
.node_info(path
[i
]))->node
.node
->get_id();
117 //cerr << "<" << sarch[vi[i]] << "> ";
121 stats
.countSentence(vi
);
122 //cerr << "done" << endl;
127 const WordEntries &we = *words.we;
129 for (i = 0;i < n;i ++) {
130 we[i].node.node->inc_b();
134 for (i = 0;i < n;i ++)
135 seg[i].node.node->inc_a();
140 cerr
<< "Calculating... ";
141 //get_root()->get_next(unk_id)->get_b() = 0;
142 //get_root()->recalculate();
143 get_ngram().estimate(stats
);
144 get_syngram().estimate(syllable_stats
);
145 //wfst.enable_ngram(true);
147 cerr
<< "Saving... ";
148 //str = (boost::format("wordlist.wl.%s") % newres).str().c_str();
149 //get_root()->save(str);
151 str
= (boost::format("ngram.%s") % newres
).str().c_str();
153 get_ngram().write(ff
);
154 str
= (boost::format("syngram.%s") % newres
).str().c_str();
156 get_syngram().write(fff
);
159 for (int i = 0;i < 50;i ++) {
162 ofstream ofs(oss.str().c_str());
163 cerr << "Iteration " << i << "... ";
165 cerr << "done" << endl;