1 // -*- tab-width: 2 -*-
10 #include "softcount.h"
11 #include "propername.h"
12 #include <boost/format.hpp>
16 void estimate(Ngram
&ngram
,NgramFractionalStats
&stats
);
18 int main(int argc
,char **argv
)
21 fprintf(stderr
,"Need at least 2 argument.\n");
25 char *oldres
= argv
[1];
26 char *newres
= argv
[2];
34 cerr
<< "Loading... ";
37 str
= (boost::format("ngram.%s") % oldres
).str().c_str();
42 cerr
<< "Ngram loading error..." << endl
;
43 cerr
<< "done" << endl
;
45 get_sarch().set_blocked(true);
48 int i
,ii
,iii
,n
,nn
,nnn
,z
;
50 NgramStats
stats(get_sarch().get_dict(),3);
51 while (getline(cin
,s
)) {
54 cerr
<< count
<< endl
;
58 sentences_split(s
,ss
);
59 for (z
= 0;z
< ss
.size();z
++) {
63 if (!st
.get_syllable_count())
65 //cout << ">>" << count << endl;
68 WordStateFactories factories
;
69 ExactWordStateFactory exact
;
70 LowerWordStateFactory lower
;
71 //FuzzyWordStateFactory fuzzy;
72 factories
.push_back(&exact
);
73 factories
.push_back(&lower
);
74 //factories.push_back(&fuzzy);
75 words
.pre_construct(st
,wes
,factories
);
76 mark_proper_name(st
,wes
);
77 words
.post_construct(wes
);
78 //cerr << words << endl;
83 dagw2
= new WordDAG2(&dagw
);
87 //sc.count(words,stats);
90 delete (WordDAG2
*)dag
;
95 File
fff("dump","wt");
99 cerr
<< "Calculating... ";
100 //estimate(get_ngram(),stats);
101 get_ngram().estimate(stats
);
102 //wfst.enable_ngram(true);
104 cerr
<< "Saving... ";
105 str
= (boost::format("ngram.%s") % newres
).str().c_str();
107 get_ngram().write(ff
);
110 for (int i = 0;i < 50;i ++) {
113 ofstream ofs(oss.str().c_str());
114 cerr << "Iteration " << i << "... ";
116 cerr << "done" << endl;
122 void estimate(Ngram
&ngram
,NgramFractionalStats
&stats
)
125 * If no discount method was specified we do the default, standard
126 * thing. Good Turing discounting with the specified min and max counts
129 unsigned order
= get_ngram().setorder(0);
130 Discount
*discounts
[order
];
132 Boolean error
= false;
134 for (i
= 1; !error
& i
<= order
; i
++) {
135 discounts
[i
-1] = new WittenBell();
137 * Transfer the LMStats's debug level to the newly
138 * created discount objects
140 discounts
[i
-1]->debugme(stats
.debuglevel());
142 if (!discounts
[i
-1]->estimate(stats
, i
)) {
143 std::cerr
<< "failed to estimate GT discount for order " << i
+ 1
150 error
= !get_ngram().estimate((NgramCounts
<FloatCount
>&)stats
, discounts
);
153 for (i
= 1; i
<= order
; i
++) {
154 delete discounts
[i
-1];