softcount: tolerate zero ngrams
[vspell.git] / utils / gen-tests.cpp
blob6e0881d7f2090c7150f9fb1ccd01a0439c836b95
1 #include <iostream>
2 #include <vector>
3 #include <string>
4 #include <cgen.h>
6 using namespace std;
8 struct Item
10 int pos,len;
11 vector<string> candidates;
14 void load_corpus(istream &corpus,ostream &os)
16 string s;
17 while (getline(corpus,s)) {
18 if (s.empty() ||s[0] == '%')
19 continue;
21 vector<Item> items;
22 string::size_type p = 0;
23 while ((p = s.find('{',p)) != string::npos) {
24 string::size_type p2 = s.find('}',p);
25 if (p2 == string::npos)
26 continue;
27 Item item;
28 item.pos = p;
29 item.len = p2-p+1;
30 string s2 = s.substr(item.pos+1,item.len-2);
31 while (!s2.empty()) {
32 p = s2.find(',');
33 if (p == string::npos)
34 p = s2.size();
35 item.candidates.push_back(s2.substr(0,p));
36 s2.erase(0,p);
37 while (!s2.empty() && s2[0] == ',')
38 s2.erase(0,1);
40 items.push_back(item);
41 p = p2;
44 CGen cg;
45 vector<uint> limits,pos;
46 int i,n = items.size();
47 limits.resize(n);
48 for (i = 0;i < n;i ++)
49 limits[i] = items[i].candidates.size();
51 cg.init(limits);
52 while (cg.step(pos)) {
53 string sentence;
54 p = 0;
55 for (i = 0;i < n;i ++) {
56 sentence += s.substr(p,items[i].pos-p);
57 p = items[i].pos+items[i].len;
58 sentence += items[i].candidates[pos[i]];
60 sentence += s.substr(p);
61 os << sentence << endl;
63 cg.done();
67 int main(int argc,char **argv)
69 load_corpus(cin,cout);
70 return 0;