1 // -*- tab-width: 2 -*-
10 #include "softcount.h"
11 #include <boost/format.hpp>
15 void estimate(Ngram
&ngram
,NgramFractionalStats
&stats
);
17 int main(int argc
,char **argv
)
20 fprintf(stderr
,"Need at least 2 argument.\n");
24 char *oldres
= argv
[1];
25 char *newres
= argv
[2];
32 cerr
<< "Loading... ";
35 str
= (boost::format("ngram.%s") % oldres
).str().c_str();
40 cerr
<< "Ngram loading error..." << endl
;
41 cerr
<< "done" << endl
;
43 get_sarch().set_blocked(true);
46 int i
,ii
,iii
,n
,nn
,nnn
,z
;
48 NgramFractionalStats
stats(get_sarch().get_dict(),2);
49 while (getline(cin
,s
)) {
52 cerr
<< count
<< endl
;
56 sentences_split(s
,ss
);
57 for (z
= 0;z
< ss
.size();z
++) {
61 if (!st
.get_syllable_count())
65 Segmentation
seg(words
.we
);
67 sc
.count(words
,stats
);
71 cerr
<< "Calculating... ";
72 //estimate(ngram,stats);
73 get_ngram().estimate(stats
,NULL
);
74 //wfst.enable_ngram(true);
77 str
= (boost::format("get_ngram().%s") % newres
).str().c_str();
79 get_ngram().write(ff
);
82 for (int i = 0;i < 50;i ++) {
85 ofstream ofs(oss.str().c_str());
86 cerr << "Iteration " << i << "... ";
88 cerr << "done" << endl;
94 void estimate(Ngram
&ngram
,NgramFractionalStats
&stats
)
97 * If no discount method was specified we do the default, standard
98 * thing. Good Turing discounting with the specified min and max counts
101 unsigned order
= get_ngram().setorder(0);
102 Discount
*discounts
[order
];
104 Boolean error
= false;
106 for (i
= 1; !error
& i
<= order
; i
++) {
107 discounts
[i
-1] = new GoodTuring(GT_defaultMinCount
, GT_defaultMaxCount
);
109 * Transfer the LMStats's debug level to the newly
110 * created discount objects
112 discounts
[i
-1]->debugme(stats
.debuglevel());
114 if (!discounts
[i
-1]->estimate(stats
, i
)) {
115 std::cerr
<< "failed to estimate GT discount for order " << i
+ 1
122 error
= !get_ngram().estimate((NgramCounts
<FloatCount
>&)stats
, discounts
);
125 for (i
= 1; i
<= order
; i
++) {
126 delete discounts
[i
-1];