test
[ws10smt.git] / vest / mr_vest_reduce.cc
blob5efcc19a3091e644acf7be2e294377def9f721a0
1 #include <sstream>
2 #include <iostream>
3 #include <fstream>
4 #include <vector>
6 #include <boost/program_options.hpp>
7 #include <boost/program_options/variables_map.hpp>
9 #include "sparse_vector.h"
10 #include "error_surface.h"
11 #include "line_optimizer.h"
12 #include "hg_io.h"
14 using namespace std;
15 namespace po = boost::program_options;
17 void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
18 po::options_description opts("Configuration options");
19 opts.add_options()
20 ("loss_function,l",po::value<string>(), "Loss function being optimized")
21 ("help,h", "Help");
22 po::options_description dcmdline_options;
23 dcmdline_options.add(opts);
24 po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
25 bool flag = conf->count("loss_function") == 0;
26 if (flag || conf->count("help")) {
27 cerr << dcmdline_options << endl;
28 exit(1);
32 int main(int argc, char** argv) {
33 po::variables_map conf;
34 InitCommandLine(argc, argv, &conf);
35 const string loss_function = conf["loss_function"].as<string>();
36 ScoreType type = ScoreTypeFromString(loss_function);
37 LineOptimizer::ScoreType opt_type = LineOptimizer::MAXIMIZE_SCORE;
38 if (type == TER || type == AER) {
39 opt_type = LineOptimizer::MINIMIZE_SCORE;
41 string last_key;
42 vector<ErrorSurface> esv;
43 while(cin) {
44 string line;
45 getline(cin, line);
46 if (line.empty()) continue;
47 size_t ks = line.find("\t");
48 assert(string::npos != ks);
49 assert(ks > 2);
50 string key = line.substr(2, ks - 2);
51 string val = line.substr(ks + 1);
52 if (key != last_key) {
53 if (!last_key.empty()) {
54 float score;
55 double x = LineOptimizer::LineOptimize(esv, opt_type, &score);
56 cout << last_key << "|" << x << "|" << score << endl;
58 last_key = key;
59 esv.clear();
61 if (val.size() % 4 != 0) {
62 cerr << "B64 encoding error 1! Skipping.\n";
63 continue;
65 string encoded(val.size() / 4 * 3, '\0');
66 if (!B64::b64decode(reinterpret_cast<const unsigned char*>(&val[0]), val.size(), &encoded[0], encoded.size())) {
67 cerr << "B64 encoding error 2! Skipping.\n";
68 continue;
70 esv.push_back(ErrorSurface());
71 esv.back().Deserialize(type, encoded);
73 if (!esv.empty()) {
74 // cerr << "ESV=" << esv.size() << endl;
75 // for (int i = 0; i < esv.size(); ++i) { cerr << esv[i].size() << endl; }
76 float score;
77 double x = LineOptimizer::LineOptimize(esv, opt_type, &score);
78 cout << last_key << "|" << x << "|" << score << endl;
80 return 0;