Added .gitignore
[vspell.git] / tests / sytrain.cpp
blob22cbf75de2f53db964f2ad61cf295e99d7d767fc
1 // -*- tab-width: 2 -*-
2 #include "pfs.h"
3 #include "distance.h"
4 #include <string>
5 #include <fstream>
6 #include <cmath>
7 #include <cstdio>
8 #include <sstream>
9 #include <iostream>
10 #include "sentence.h"
11 #include <boost/format.hpp>
13 using namespace std;
15 int main(int argc,char **argv)
17 if (argc < 3) {
18 fprintf(stderr,"Need at least 2 argument.\n");
19 return 0;
22 char *oldres = argv[1];
23 char *newres = argv[2];
24 bool nofuz = true;
25 bool nofuz2 = true;
26 const char *str;
28 dic_init();
30 cerr << "Loading... ";
31 str = "wordlist";
32 warch.load(str);
33 cerr << "done" << endl;
35 get_sarch().set_blocked(true);
37 string s;
38 int i,ii,iii,n,nn,nnn,z;
39 int count = 0;
40 NgramStats syllable_stats(get_sarch().get_dict(),2);
41 while (getline(cin,s)) {
42 count ++;
43 if (count % 200 == 0)
44 cerr << count << endl;
45 if (s.empty())
46 continue;
47 vector<string> ss;
48 sentences_split(s,ss);
49 for (z = 0;z < ss.size();z ++) {
50 Sentence st(ss[z]);
51 st.standardize();
52 st.tokenize();
53 if (!st.get_syllable_count())
54 continue;
56 VocabIndex *vi;
57 n = st.get_syllable_count();
58 vi = new VocabIndex[n+1];
59 vi[n] = Vocab_None;
60 for (i = 0;i < n;i ++)
61 vi[i] = get_sarch().in_dict(st[i].get_cid()) ? st[i].get_cid() : get_id(UNK_ID);
62 syllable_stats.countSentence(vi);
63 delete[] vi;
67 cerr << "Calculating... ";
68 get_syngram().estimate(syllable_stats);
69 cerr << "Saving... ";
70 str = (boost::format("syngram.%s") % newres).str().c_str();
71 File fff(str,"wt");
72 get_syngram().write(fff);
73 cerr << endl;
74 return 0;