Updated source code from upstream SVN
[svmtool++.git] / src / bin / SVMTagger.cc
blob05683c6c8c78639a4769e7f58bd9306587694477
1 /*
2 * Copyright (C) 2004 Jesus Gimenez, Lluis Marquez and Senen Moya
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include "hash.h"
23 #include "list.h"
24 #include "dict.h"
25 #include "weight.h"
26 #include "swindow.h"
27 #include "tagger.h"
28 #include "er.h"
30 #define ERROR1 "\nERROR: Value incorrect in -T option.\n"
31 #define ERROR2 "\nERROR: Value incorrect in -S option.\n"
32 #define ERROR3 "\nERROR: Value incorrect in -I option.\n"
33 #define ERROR4 "\nERROR: Value incorrect in -L option.\n"
34 #define ERROR5 "\nERROR: Value incorrect in -I or -L option. Window length have to be greater than Interest Point.\n"
35 #define ERROR6 "\nERROR: You can not change Interes Point (-I) if you don't change the length of the window (-L).\n"
36 #define ERROR8 "\nERROR: Incorrect Options\n"
37 #define ERRORK "\nERROR: Value incorrect in -K option.\n"
38 #define ERRORU "\nERROR: Value incorrect in -U option.\n"
40 int verbose = 0;
42 void printHelp(char *progname)
44 fprintf(stderr,"\nSVMTool++ v 1.1.6 -- SVMTagger\n\n");
45 fprintf(stderr,"Usage : %s [options] <model> < stdin > stdout\n\n", progname);
46 fprintf(stderr,"options:\n");
47 fprintf(stderr,"\t-L or -l <Window Lenght>\n");
48 fprintf(stderr,"\t\t<Window Lenght> have to be greater than 2\n");
49 fprintf(stderr,"\t\t7 (default)\n");
50 fprintf(stderr,"\t-I or -i <Interest Point>\n");
51 fprintf(stderr,"\t\t<Interest Point> have to be greater than 0\n");
52 fprintf(stderr,"\t\t4 (default)\n");
53 fprintf(stderr,"\t-S or -s <direction>\n");
54 fprintf(stderr,"\t\tLR\tleft-to-right\t(default)\n");
55 fprintf(stderr,"\t\tRL\tright-to-left\n");
56 fprintf(stderr,"\t\tLRL\tboth left-to-right and right-to-left\n");
57 fprintf(stderr,"\t-T or -t <strategy>\n");
58 fprintf(stderr,"\t\t0\tone-pass [default - requires model 0]\n");
59 fprintf(stderr,"\t\t1\ttwo-passes [revisiting results and relabeling - requires model 2 and model 1]\n");
60 fprintf(stderr,"\t\t2\tone-pass [robust against unknown words - requires model 0 and model 2]\n");
61 fprintf(stderr,"\t\t3\tone-pass [unsupervised learning models - requires model 3]\n");
62 fprintf(stderr,"\t\t4\tone-pass [very robust against unknown words - requires model 4]\n");
63 fprintf(stderr,"\t\t5\tone-pass [sentence-level likelihood - requires model 0] Not implemented!!\n");
64 fprintf(stderr,"\t\t6\tone-pass [robust sentence-level likelihood - requires model 4] Not implemented!!\n");
65 fprintf(stderr,"\t-B or -b <backup_lexicon>\n");
66 fprintf(stderr,"\t-K <n> weight filtering threshold for known words (default is 0)\n");
67 fprintf(stderr,"\t-U <n> weight filtering threshold for unknown words (default is 0)\n");
68 fprintf(stderr,"\t-V or -v verbose\n");
69 fprintf(stderr,"\t-A or -a show scores\n");
70 fprintf(stderr,"\nmodel: model location (path/name)\n");
71 fprintf(stderr,"\nUsage : %s -V -S LRL -T 0 /home/usuaris/smoya/SVMT/eng/WSJTP < WSJTP.TEST > WSJTP.TEST.OUT\n\n", progname);
75 -1 -t error
76 -2 -s error
77 -3 -i error
78 -4 -l error
79 -5 length<=ip
82 int options(int argc,char *argv[])
84 int isIP=0,ip=4,isLength=0,length=7;
86 for (int i=1;i<argc-1;i++)
88 if (strcmp(argv[i],"-t")==0 || strcmp(argv[i],"-T")==0)
90 i++;
91 //if (atoi(argv[i])==0) return -1;
93 else if (strcmp(argv[i],"-s")==0 || strcmp(argv[i],"-S")==0)
94 { i++;
95 if (!((strcmp(argv[i],"LR")==0) ||
96 (strcmp(argv[i],"RL")==0) ||
97 (strcmp(argv[i],"LRL")==0)) ) return -2;
99 else if (strcmp(argv[i],"-i")==0 || strcmp(argv[i],"-I")==0)
100 { i++;
101 if (atoi(argv[i])<1) return -3;
102 else ip = atoi(argv[i]);
103 isIP = 1;
105 else if (strcmp(argv[i],"-l")==0 || strcmp(argv[i],"-L")==0)
106 { i++;
107 if (atoi(argv[i])<0) return -4;
108 else length = atoi(argv[i]);
109 isLength = 1;
111 else if (strcmp(argv[i],"-b")==0 || strcmp(argv[i],"-B")==0)
113 if (strcmp(argv[i+1],"-v")!=0 && strcmp(argv[i+1],"-V")!=0
114 && strcmp(argv[i+1],"-b")!=0 && strcmp(argv[i+1],"-B")!=0
115 && strcmp(argv[i+1],"-l")!=0 && strcmp(argv[i+1],"-L")!=0
116 && strcmp(argv[i+1],"-s")!=0 && strcmp(argv[i+1],"-S")!=0
117 && strcmp(argv[i+1],"-t")!=0 && strcmp(argv[i+1],"-T")!=0
118 && strcmp(argv[i+1],"-i")!=0 && strcmp(argv[i+1],"-I")!=0) i++;
121 else if (strcmp(argv[i],"-k")==0 || strcmp(argv[i],"-K")==0)
123 float k;
124 if ( sscanf(argv[i],"%5f",&k) == EOF) return 'k';
126 else if (strcmp(argv[i],"-u")==0 || strcmp(argv[i],"-U")==0)
128 float u;
129 if (sscanf(argv[i],"%5f",&u) == EOF) return 'u';
131 else if ( strcmp(argv[i],"-a")!=0 && strcmp(argv[i],"-A")!=0 &&
132 strcmp(argv[i],"-v")!=0 && strcmp(argv[i],"-V")!=0 ) return -8;
135 if (isIP && !isLength) return -6;
136 if (isIP && isLength && length<=ip) return -5;
138 return 0;
141 int main(int argc, char *argv[])
143 int ret=0;
145 erCompRegExp();
147 if ((argc<=1) || ((ret=options(argc,argv))<0))
149 switch(ret)
151 case -1: fprintf(stderr,ERROR1); break;
152 case -2: fprintf(stderr,ERROR2); break;
153 case -3: fprintf(stderr,ERROR3); break;
154 case -4: fprintf(stderr,ERROR4); break;
155 case -5: fprintf(stderr,ERROR5); break;
156 case -6: fprintf(stderr,ERROR6); break;
157 case -8: fprintf(stderr,ERROR8); break;
158 case 'k': fprintf(stderr,ERRORK); break;
159 case 'u': fprintf(stderr,ERRORU); break;
162 printHelp(argv[0]);
163 exit(0);
166 tagger t(argv[argc-1]);
168 for (int i=1;i<argc-1;i++)
170 if (strcmp(argv[i],"-k")==0 || strcmp(argv[i],"-K")==0)
171 { t.taggerPutKWeightFilter(atof(argv[i+1])); i++;}
172 else if (strcmp(argv[i],"-u")==0 || strcmp(argv[i],"-U")==0)
173 { t.taggerPutUWeightFilter(atof(argv[i+1])); i++;}
174 else if (strcmp(argv[i],"-t")==0 || strcmp(argv[i],"-T")==0)
176 //modstat t.taggerPutStrategy(atoi(argv[i+1])-1);
177 t.taggerPutStrategy(atoi(argv[i+1]));
178 i++;
180 else if (strcmp(argv[i],"-s")==0 || strcmp(argv[i],"-S")==0)
181 { t.taggerPutFlow(argv[i+1]); i++;}
182 else if (strcmp(argv[i],"-v")==0 || strcmp(argv[i],"-V")==0) verbose = 1;
183 else if (strcmp(argv[i],"-a")==0 || strcmp(argv[i],"-A")==0)
184 { t.taggerActiveShowScoresFlag(); }
185 else if (strcmp(argv[i],"-l")==0 || strcmp(argv[i],"-L")==0)
186 { t.taggerPutWinLength(atoi(argv[i+1])); i++;}
187 else if (strcmp(argv[i],"-i")==0 || strcmp(argv[i],"-I")==0)
188 { t.taggerPutWinIndex(atoi(argv[i+1])); i++;}
189 else if (strcmp(argv[i],"-b")==0 || strcmp(argv[i],"-B")==0)
190 { t.taggerPutBackupDictionary(argv[i+1]); i++;}
193 if (verbose) fprintf(stderr,"\nSVMTool++ v 1.1.6 -- SVMTagger\n\n");
195 t.taggerLoadModelsForTagging();
197 t.taggerInit(std::cin, std::cerr);
198 t.taggerRun();
200 erFreeRegExp();
201 return 0;