Reordered files
[svmtool++.git] / src / bin / SVMTagger.cc
blob0386120a56ef41f221d85ae9964aab9e5908217c
1 /*
2 * Copyright (C) 2004 Jesus Gimenez, Lluis Marquez and Senen Moya
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include "hash.h"
23 #include "list.h"
24 #include "dict.h"
25 #include "weight.h"
26 #include "swindow.h"
27 #include "tagger.h"
28 #include "er.h"
30 #define ERROR1 "\nERROR: Value incorrect in -T option.\n"
31 #define ERROR2 "\nERROR: Value incorrect in -S option.\n"
32 #define ERROR3 "\nERROR: Value incorrect in -I option.\n"
33 #define ERROR4 "\nERROR: Value incorrect in -L option.\n"
34 #define ERROR5 "\nERROR: Value incorrect in -I or -L option. Window length have to be greater than Interest Point.\n"
35 #define ERROR6 "\nERROR: You can not change Interes Point (-I) if you don't change the length of the window (-L).\n"
36 #define ERROR8 "\nERROR: Incorrect Options\n"
37 #define ERRORK "\nERROR: Value incorrect in -K option.\n"
38 #define ERRORU "\nERROR: Value incorrect in -U option.\n"
40 extern int verbose_svmtool;
42 void printHelp()
44 fprintf(stderr,"\nSVMTool++ v 1.1.2 -- SVMTagger\n\n");
45 fprintf(stderr,"Usage : svmt [options] <model> < stdin > stdout\n\n");
46 fprintf(stderr,"options:\n");
47 fprintf(stderr,"\t-L or -l <Window Lenght>\n");
48 fprintf(stderr,"\t\t<Window Lenght> have to be greater than 2\n");
49 fprintf(stderr,"\t\t7 (default)\n");
50 fprintf(stderr,"\t-I or -i <Interest Point>\n");
51 fprintf(stderr,"\t\t<Interest Point> have to be greater than 0\n");
52 fprintf(stderr,"\t\t4 (default)\n");
53 fprintf(stderr,"\t-S or -s <direction>\n");
54 fprintf(stderr,"\t\tLR\tleft-to-right\t(default)\n");
55 fprintf(stderr,"\t\tRL\tright-to-left\n");
56 fprintf(stderr,"\t\tLRL\tboth left-to-right and right-to-left\n");
57 fprintf(stderr,"\t-T or -t <strategy>\n");
58 fprintf(stderr,"\t\t0\tone-pass [default - requires model 0]\n");
59 fprintf(stderr,"\t\t1\ttwo-passes [revisiting results and relabeling - requires model 2 and model 1]\n");
60 fprintf(stderr,"\t\t2\tone-pass [robust against unknown words - requires model 0 and model 2]\n");
61 fprintf(stderr,"\t\t3\tone-pass [unsupervised learning models - requires model 3]\n");
62 fprintf(stderr,"\t\t4\tone-pass [very robust against unknown words - requires model 4]\n");
63 fprintf(stderr,"\t\t5\tone-pass [sentence-level likelihood - requires model 0] Not implemented!!\n");
64 fprintf(stderr,"\t\t6\tone-pass [robust sentence-level likelihood - requires model 4] Not implemented!!\n");
65 fprintf(stderr,"\t-B or -b <backup_lexicon>\n");
66 fprintf(stderr,"\t-K <n> weight filtering threshold for known words (default is 0)\n");
67 fprintf(stderr,"\t-U <n> weight filtering threshold for unknown words (default is 0)\n");
68 fprintf(stderr,"\t-V or -v verbose\n");
69 fprintf(stderr,"\nmodel: model location (path/name)\n");
70 fprintf(stderr,"\nUsage : SVMTagger -V -S LRL -T 0 /home/users/me/SVMTool/models/eng/WSJTP < WSJTP.TEST > WSJTP.TEST.OUT\n\n");
75 -1 -t error
76 -2 -s error
77 -3 -i error
78 -4 -l error
79 -5 length<=ip
82 int options(int argc,char *argv[])
84 int isIP=0,ip=4,isLength=0,length=7;
86 for (int i=1;i<argc-1;i++)
88 if (strcmp(argv[i],"-t")==0 || strcmp(argv[i],"-T")==0)
90 i++;
91 //if (atoi(argv[i])==0) return -1;
93 else if (strcmp(argv[i],"-s")==0 || strcmp(argv[i],"-S")==0)
95 i++;
96 if (!((strcmp(argv[i],"LR")==0) ||
97 (strcmp(argv[i],"RL")==0) ||
98 (strcmp(argv[i],"LRL")==0)) ) return -2;
100 else if (strcmp(argv[i],"-i")==0 || strcmp(argv[i],"-I")==0)
102 i++;
103 if (atoi(argv[i])<1) return -3;
104 else ip = atoi(argv[i]);
105 isIP = 1;
107 else if (strcmp(argv[i],"-l")==0 || strcmp(argv[i],"-L")==0)
109 i++;
110 if (atoi(argv[i])<0) return -4;
111 else length = atoi(argv[i]);
112 isLength = 1;
114 else if (strcmp(argv[i],"-b")==0 || strcmp(argv[i],"-B")==0)
116 if (strcmp(argv[i+1],"-v")!=0 && strcmp(argv[i+1],"-V")!=0
117 && strcmp(argv[i+1],"-b")!=0 && strcmp(argv[i+1],"-B")!=0
118 && strcmp(argv[i+1],"-l")!=0 && strcmp(argv[i+1],"-L")!=0
119 && strcmp(argv[i+1],"-s")!=0 && strcmp(argv[i+1],"-S")!=0
120 && strcmp(argv[i+1],"-t")!=0 && strcmp(argv[i+1],"-T")!=0
121 && strcmp(argv[i+1],"-i")!=0 && strcmp(argv[i+1],"-I")!=0) i++;
124 else if (strcmp(argv[i],"-k")==0 || strcmp(argv[i],"-K")==0)
126 float k;
127 if ( sscanf(argv[i],"%.5f",&k) == EOF) return 'k';
129 else if (strcmp(argv[i],"-u")==0 || strcmp(argv[i],"-U")==0)
131 float u;
132 if (sscanf(argv[i],"%.5f",&u) == EOF) return 'u';
134 else if (strcmp(argv[i],"-v")!=0 && strcmp(argv[i],"-V")!=0) return -8;
137 if (isIP && !isLength) return -6;
138 if (isIP && isLength && length<=ip) return -5;
140 return 0;
144 int main(int argc, char *argv[])
146 int ret=0;
148 verbose_svmtool = FALSE;
150 erCompRegExp();
152 if ((argc<=1) || ((ret=options(argc,argv))<0))
154 switch(ret)
156 case -1: fprintf(stderr,ERROR1); break;
157 case -2: fprintf(stderr,ERROR2); break;
158 case -3: fprintf(stderr,ERROR3); break;
159 case -4: fprintf(stderr,ERROR4); break;
160 case -5: fprintf(stderr,ERROR5); break;
161 case -6: fprintf(stderr,ERROR6); break;
162 case -8: fprintf(stderr,ERROR8); break;
163 case 'k': fprintf(stderr,ERRORK); break;
164 case 'u': fprintf(stderr,ERRORU); break;
167 printHelp();
168 exit(0);
171 tagger t(argv[argc-1]);
173 for (int i=1;i<argc-1;i++)
175 if (strcmp(argv[i],"-k")==0 || strcmp(argv[i],"-K")==0)
176 { t.taggerPutKWeightFilter(atof(argv[i+1])); i++;}
177 else if (strcmp(argv[i],"-u")==0 || strcmp(argv[i],"-U")==0)
178 { t.taggerPutUWeightFilter(atof(argv[i+1])); i++;}
179 else if (strcmp(argv[i],"-t")==0 || strcmp(argv[i],"-T")==0)
181 //modstat t.taggerPutStrategy(atoi(argv[i+1])-1);
182 t.taggerPutStrategy(atoi(argv[i+1]));
183 i++;
185 else if (strcmp(argv[i],"-s")==0 || strcmp(argv[i],"-S")==0)
186 { t.taggerPutFlow(argv[i+1]); i++;}
187 else if (strcmp(argv[i],"-v")==0 || strcmp(argv[i],"-V")==0) verbose_svmtool = TRUE;
188 else if (strcmp(argv[i],"-l")==0 || strcmp(argv[i],"-L")==0)
189 { t.taggerPutWinLength(atoi(argv[i+1])); i++;}
190 else if (strcmp(argv[i],"-i")==0 || strcmp(argv[i],"-I")==0)
191 { t.taggerPutWinIndex(atoi(argv[i+1])); i++;}
192 else if (strcmp(argv[i],"-b")==0 || strcmp(argv[i],"-B")==0)
193 { t.taggerPutBackupDictionary(argv[i+1]); i++;}
196 if (verbose_svmtool) fprintf(stderr,"\nSVMTool++ v 1.1.2 -- SVMTagger\n\n");
198 t.taggerLoadModelsForTagging();
200 t.taggerInit();
201 t.taggerRun();
203 erFreeRegExp();
204 return 0;