Added models and dicts
[svmtool++.git] / lib / tagger.cc
blob02b6347f8c2aba03cad0c478c4ed038c5ed04b69
1 /*
2 * Copyright (C) 2004 Jesus Gimenez, Lluis Marquez and Senen Moya
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 #include <unistd.h>
20 #include <stdio.h>
21 #include <time.h>
22 #include <sys/times.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include "marks.h"
26 #include "strategies.h"
27 #include "hash.h"
28 #include "list.h"
29 #include "weight.h"
30 #include "dict.h"
31 #include "stack.h"
32 #include "swindow.h"
33 #include "tagger.h"
34 #include "common.h"
36 /***************************************************************/
38 struct tms tbuffStartUp,tbuffEndStartUp;
39 clock_t startUpTime,endStartUpTime;
40 double sysFexTime=0, usrFexTime=0,realFexTime=0;
41 double sysSVMTime=0, usrSVMTime=0,realSVMTime=0;
43 /***************************************************************/
45 extern int verbose;
46 int NUM_UNK_POS=0;
49 /***************************************************************/
51 hash_t *tagger::taggerCreateBiasHash(char *name)
53 hash_t *bias = new hash_t;
54 int i=0;
55 char c=' ',weight[20];
56 weight_node_t *w;
57 FILE *f;
59 hash_init(bias,40);
61 if ((f = fopen(name, "rt"))== NULL)
63 fprintf(stderr, "Error opening file: %s",name);
64 exit(0);
67 while (!feof(f))
69 c = fgetc(f);
70 if (c!='#')
72 w = new weight_node_t;
73 strcpy(weight,"");
74 i=0;
75 strcpy(w->pos,"");
76 while ((c!='\n') && (!feof(f)))
78 if (c!=' ' && c!='\n' && c!='\t' && i==1) sprintf(weight,"%s%c",weight,c);
79 else if (c!=' ' && c!='\n' && c!='\t' && i==0)
81 if (c!=':') sprintf(w->pos,"%s%c",w->pos,c);
82 else i=1;
84 c = fgetc(f);
86 w->data = (long double)0;
87 w->data = atof (weight);
88 hash_insert(bias,w->pos,(int)w);
89 } //end if
90 else while(c=fgetc(f)!='\n');
92 fclose(f);
93 return bias;
96 /***************************************************************/
98 tagger::tagger(char *model)
100 char name[150]="";
101 strcpy(flow,"LR");
102 taggerNumLaps = 1;
103 taggerKFilter = 0;
104 taggerUFilter = 0;
105 taggerStrategy = STRA_1P_DEFAULT; //modstrat //0;
106 taggerWinIndex = -1;
107 taggerWinLength = -1;
108 strcpy(taggerModelName,model);
109 strcpy (taggerBackupDict,"");
110 stk = new stack_t;
111 init_stack(stk);
114 /***************************************************************/
116 void tagger::taggerLoadModels(models_t *model, int taggerNumModel)
118 char name[150],flow2[5],flow1[5];
120 //Cargamos la lista de "features" para palabras conocidas
121 sprintf(name,"%s.A%d",taggerModelName,taggerNumModel);
122 if (verbose) fprintf(stderr,"\nLoading FEATURES FOR KNOWN WORDS from < %s >\n",name);
123 createFeatureList(name,&model->featureList);
124 //Cargamos la lista de "features" para palabras desconocidas
125 sprintf(name,"%s.A%d.UNK",taggerModelName,taggerNumModel);
126 if (verbose) fprintf(stderr,"\nLoading FEATURES FOR UNKNOWN WORDS from < %s >\n",name);
127 createFeatureList(name,&model->featureListUnk);
129 if (strcmp(flow,"LRL")==0)
131 strcpy(flow1,"LR"); strcpy(flow2,"RL");
133 sprintf(name,"%s (Right-to-Left)",flow1);
134 if (verbose) fprintf(stderr,"\nREADING MODELS < direction = %s >\n",name);
136 sprintf(name,"%s.M%d.%s.MRG",taggerModelName,taggerNumModel,flow2);
137 if (verbose) fprintf(stderr,"-. Loading MERGED MODEL FOR KNOWN WORDS from < %s >\n",name);
138 model->wr2 = new weightRepository(name,taggerKFilter);
140 sprintf(name,"%s.UNK.M%d.%s.MRG",taggerModelName,taggerNumModel,flow2);
141 if (verbose) fprintf(stderr,"-. Loading MERGED MODEL FOR UNKKNOWN WORDS from < %s >\n\n",name);
142 model->wrUnk2 = new weightRepository(name,taggerUFilter);
144 else strcpy(flow1,flow);
146 if (strcmp(flow1,"RL")==0) sprintf(name,"%s (Right-to-Left)",flow1);
147 else sprintf(name,"%s (Left-to-Right)",flow1);
149 if (verbose) fprintf(stderr,"\nREADING MODELS < direction = %s >\n",name);
151 sprintf(name,"%s.M%d.%s.MRG",taggerModelName,taggerNumModel,flow1);
152 if (verbose) fprintf(stderr,"-. Loading MERGED MODEL FOR KNOWN WORDS from < %s >\n",name);
153 model->wr = new weightRepository(name,taggerKFilter);
155 sprintf(name,"%s.UNK.M%d.%s.MRG",taggerModelName,taggerNumModel,flow1);
156 if (verbose) fprintf(stderr,"-. Loading MERGED MODEL FOR UNKNOWN WORDS from < %s >\n",name);
157 model->wrUnk = new weightRepository(name,taggerUFilter);
161 /***************************************************************/
163 void tagger::taggerLoadModelsForTagging()
165 startUpTime = times(&tbuffStartUp);
167 int modelsNeeded=1;
168 char name[150];
170 sprintf(name,"%s.DICT",taggerModelName);
171 if (strcmp(taggerBackupDict,"")!=0)
173 if (verbose) fprintf(stderr,"Loading DICTIONARY from < %s > with BACKUP DICTIONARY from < %s >\n",name,taggerBackupDict);
174 d = new dictionary(name,taggerBackupDict);
176 else
178 if (verbose) fprintf(stderr,"Loading DICTIONARY from < %s >\n",name);
179 d = new dictionary(name);
182 sprintf(name,"%s.UNKP",taggerModelName);
183 if (verbose) fprintf(stderr,"Loading UNKNOWN WORDS POS from < %s >\n",name);
184 weightUnk = taggerCreateWeightUnkArray(name);
186 if ( taggerStrategy == STRA_2P_RELABELING //modstrat 1
187 || taggerStrategy == STRA_1P_ROBUST_UNK /*modstrat 4*/ ) modelsNeeded = 2;
189 taggerModelList = new models_t[modelsNeeded];
190 taggerModelRunning = &taggerModelList[0];
192 if (taggerStrategy == STRA_1P_DEFAULT ) //modstrat 0)
193 taggerLoadModels(taggerModelRunning,0);
194 else if (taggerStrategy == STRA_1P_UNSUPERVISED ) //modstrat 2)
195 taggerLoadModels(taggerModelRunning,3);
196 else if (taggerStrategy == STRA_1P_ROBUST_UNK ) //modstrat 4)
198 taggerLoadModels(taggerModelRunning,0);
199 taggerLoadModels(&taggerModelList[1],2);
201 else if (taggerStrategy == STRA_1P_VERY_ROBUST_UNK ) //modstrat 5)
202 taggerLoadModels(taggerModelRunning,4);
203 else if (taggerStrategy == STRA_2P_RELABELING )//modstrat 1)
205 taggerLoadModels(taggerModelRunning,2);
206 taggerLoadModels(&taggerModelList[1],1);
207 taggerNumLaps = 2;
209 else
211 fprintf(stderr,"Execution error: Strategy %d doesn't exist!!\n\n",taggerStrategy);
212 exit(0);
215 endStartUpTime = times(&tbuffEndStartUp);
218 void tagger::taggerInit()
221 // int modelsNeeded=1;
222 char name[150];
224 //Mirar si existe fichero .WIN
225 if (taggerWinIndex==-1 && taggerWinLength==-1)
227 sprintf(name,"%s.WIN",taggerModelName);
228 FILE *f = fopen (name,"r");
229 if ( f == NULL ) sw = new swindow(stdin);
230 else
232 fscanf(f,"%d %d",&taggerWinLength,&taggerWinIndex);
233 fclose(f);
234 sw = new swindow (stdin,taggerWinLength,taggerWinIndex);
237 else if (taggerWinIndex==-1) sw = new swindow (stdin,taggerWinLength);
238 else sw = new swindow (stdin,taggerWinLength,taggerWinIndex);
242 /***************************************************************/
244 tagger::~tagger()
246 int modelsNeeded=1;
248 if (taggerStrategy == STRA_2P_RELABELING ) //modstrat 1)
249 modelsNeeded = 2;
251 delete stk;
252 delete d;
253 delete sw;
254 delete[] weightUnk; //Mod
256 for (int i=0;i<modelsNeeded;i++)
259 delete taggerModelList[i].wr;
260 delete taggerModelList[i].wrUnk;
262 if (strcmp(flow,"LRL")==0)
264 delete taggerModelList[i].wr2;
265 delete taggerModelList[i].wrUnk2;
270 /***************************************************************/
271 /***************************************************************/
273 void tagger::taggerPutFlow(char *inFlow)
275 strcpy(flow,inFlow);
278 /***************************************************************/
280 void tagger::taggerPutStrategy(int num)
282 taggerStrategy = num;
285 /***************************************************************/
287 void tagger::taggerPutWinLength(int l)
289 taggerWinLength = l;
292 /***************************************************************/
294 void tagger::taggerPutWinIndex(int i)
296 taggerWinIndex = i;
299 /***************************************************************/
301 void tagger::taggerPutBackupDictionary(char *dictName)
303 strcpy (taggerBackupDict,dictName);
306 /***************************************************************/
308 void tagger::taggerPutKWeightFilter(float kfilter)
310 taggerKFilter = kfilter;
313 /***************************************************************/
315 void tagger::taggerPutUWeightFilter(float ufilter)
317 taggerUFilter = ufilter;
320 /***************************************************************/
321 /***************************************************************/
322 /***************************************************************/
323 /***************************************************************/
325 int tagger::taggerRightSenseSpecialForUnknown()
327 int cont=1;
329 while(sw->previous()==0);
330 nodo *elem = sw->getIndex();
332 if (sw->winExistUnkWord(1,d)==-1)
333 taggerModelRunning=&taggerModelList[1];
334 else taggerModelRunning=&taggerModelList[0];
336 taggerGenerateScore(elem,1);
338 while(sw->next()==0)
340 elem = sw->getIndex();
342 if (sw->winExistUnkWord(1,d)==-1)
343 taggerModelRunning=&taggerModelList[1];
344 else taggerModelRunning=&taggerModelList[0];
346 taggerGenerateScore(elem,1);
347 cont++;
350 if (strcmp(flow,"LRL")==0) sw->winMaterializePOSValues(1);
352 return cont;
355 /***************************************************************/
357 int tagger::taggerLeftSenseSpecialForUnknown()
359 int cont=1;
360 while(sw->next()==0);
361 nodo *elem = sw->getIndex();
362 if (sw->winExistUnkWord(2,d)==-1)
363 taggerModelRunning=&taggerModelList[1];
364 else taggerModelRunning=&taggerModelList[0];
366 taggerGenerateScore(elem,2);
368 while(sw->previous()==0)
370 elem = sw->getIndex();
372 if (sw->winExistUnkWord(2,d)==-1)
373 taggerModelRunning=&taggerModelList[1];
374 else taggerModelRunning=&taggerModelList[0];
376 taggerGenerateScore(elem,2);
377 cont++;
380 if (strcmp(flow,"LRL")==0) sw->winMaterializePOSValues(0);
381 return cont;
384 /***************************************************************/
386 int tagger::taggerRightSense()
388 int cont=1;
390 while(sw->previous()==0);
391 nodo *elem = sw->getIndex();
392 taggerGenerateScore(elem,1);
394 while(sw->next()==0)
396 elem = sw->getIndex();
397 taggerGenerateScore(elem,1);
398 cont++;
401 if (strcmp(flow,"LRL")==0) sw->winMaterializePOSValues(1);
403 return cont;
406 /***************************************************************/
408 int tagger::taggerLeftSense()
410 int cont=1;
411 while(sw->next()==0);
412 nodo *elem = sw->getIndex();
413 taggerGenerateScore(elem,2);
415 while(sw->previous()==0)
417 elem = sw->getIndex();
418 taggerGenerateScore(elem,2);
419 cont++;
422 if (strcmp(flow,"LRL")==0) sw->winMaterializePOSValues(0);
423 return cont;
426 /***************************************************************/
428 void tagger::taggerRun()
430 int contWords=0,contSentences=0;
432 struct tms tbuff1,tbuff2;
433 clock_t start,end;
434 start = times(&tbuff1);
436 switch(taggerStrategy)
438 case STRA_1P_DEFAULT/*modstrat 0*/: taggerDoNormal(&contWords,&contSentences); break;
439 case STRA_2P_RELABELING/*modstrat 1*/: taggerDoNTimes(&contWords,&contSentences,taggerNumLaps); break;
440 case STRA_1P_UNSUPERVISED/*modstrat 2*/: taggerDoNormal(&contWords,&contSentences); break;
441 case STRA_1P_SENTENCE_LEVEL/*modstrat 3*/: /*taggerDoNTimes(&contWords,&contSentences,taggerNumLaps);*/ break;
442 case STRA_1P_ROBUST_UNK/*modstrat 4*/: taggerDoSpecialForUnknown(&contWords,&contSentences); break;
443 case STRA_1P_VERY_ROBUST_UNK/*modstrat 5*/: taggerDoNormal(&contWords,&contSentences); break;
444 case STRA_1P_ROBUST_SENTENCE_LEVEL: break;
446 end = times(&tbuff2);
449 if (verbose)
450 { taggerShowVerbose(contSentences,1);
452 fprintf(stderr,"* -------------------------------------------------------------------\n");
453 showTime("Start Up Time",
454 ((double)(endStartUpTime-startUpTime))/CLOCKS_PER_SECOND,
455 ((double)tbuffEndStartUp.tms_utime-(double)tbuffStartUp.tms_utime)/CLOCKS_PER_SECOND,
456 ((double)tbuffEndStartUp.tms_stime-(double)tbuffStartUp.tms_stime)/CLOCKS_PER_SECOND);
457 fprintf(stderr,"* -------------------------------------------------------------------\n");
458 showTime("Features Extraction Time",realFexTime,usrFexTime,sysFexTime);
459 showTime("SVM Time",realSVMTime,usrSVMTime,sysSVMTime);
460 showTime("Process Time",((double)(end-start))/CLOCKS_PER_SECOND - realFexTime - realSVMTime,
461 ((double)tbuff2.tms_utime-(double)tbuff1.tms_utime)/CLOCKS_PER_SECOND - usrFexTime -usrSVMTime,
462 ((double)tbuff2.tms_stime-(double)tbuff1.tms_stime)/CLOCKS_PER_SECOND - sysFexTime -sysSVMTime);
463 fprintf(stderr,"* -------------------------------------------------------------------\n");
464 fprintf(stderr,"[ Tagging Time = Feature Extraction Time + SVM Time + Process Time ]\n");
465 showTime("Tagging Time",((double)(end-start))/CLOCKS_PER_SECOND,
466 ((double)tbuff2.tms_utime-(double)tbuff1.tms_utime)/CLOCKS_PER_SECOND,
467 ((double)tbuff2.tms_stime-(double)tbuff1.tms_stime)/CLOCKS_PER_SECOND);
468 fprintf(stderr,"* -------------------------------------------------------------------\n");
469 fprintf(stderr,"[ Overall Time = Start up Time + Tagging Time ]\n");
470 showTime("Overall Time",((double)(end-start+endStartUpTime-startUpTime))/CLOCKS_PER_SECOND,
471 ((double)tbuff2.tms_utime-(double)tbuff1.tms_utime+
472 (double)tbuffEndStartUp.tms_utime-(double)tbuffStartUp.tms_utime)/CLOCKS_PER_SECOND,
473 ((double)tbuff2.tms_stime-(double)tbuff1.tms_stime+
474 (double)tbuffEndStartUp.tms_stime-(double)tbuffStartUp.tms_stime)/CLOCKS_PER_SECOND);
475 fprintf(stderr,"* -------------------------------------------------------------------\n");
476 taggerStadistics(contWords,contSentences,
477 ((double)(end-start))/CLOCKS_PER_SECOND,
478 ((double)tbuff2.tms_utime-(double)tbuff1.tms_utime)/CLOCKS_PER_SECOND,
479 ((double)tbuff2.tms_stime-(double)tbuff1.tms_stime)/CLOCKS_PER_SECOND);
483 /***************************************************************/
485 void tagger::taggerDoNormal(int *numWords, int *numSentences)
487 int contWordsLR=0,contWordsRL=0,contSentences=0,ret = 1;
489 while ((ret>=0))
491 if (verbose) taggerShowVerbose(contSentences,0);
493 if ((strcmp(flow,"LRL")==0) || (strcmp(flow,"LR")==0))
494 contWordsLR = contWordsLR+taggerRightSense();
495 if ((strcmp(flow,"LRL")==0) || (strcmp(flow,"RL")==0))
496 contWordsRL = contWordsRL+taggerLeftSense();
497 contSentences++;
498 sw->show();
499 sw->deleteList();
500 ret = sw->iniGeneric();
502 if (contWordsRL==0) *numWords=contWordsLR/taggerNumLaps;
503 else *numWords=contWordsRL/taggerNumLaps;
504 *numSentences = contSentences;
507 /***************************************************************/
509 void tagger::taggerDoSpecialForUnknown(int *numWords, int *numSentences)
511 int contWordsLR=0,contWordsRL=0,contSentences=0,ret = 1;
513 while ((ret>=0))
515 if (verbose) taggerShowVerbose(contSentences,0);
517 if ((strcmp(flow,"LRL")==0) || (strcmp(flow,"LR")==0))
518 contWordsLR = contWordsLR+taggerRightSenseSpecialForUnknown();
519 if ((strcmp(flow,"LRL")==0) || (strcmp(flow,"RL")==0))
520 contWordsRL = contWordsRL+taggerLeftSenseSpecialForUnknown();
522 contSentences++;
523 sw->show();
524 sw->deleteList();
525 ret = sw->iniGeneric();
527 if (contWordsRL==0) *numWords=contWordsLR/taggerNumLaps;
528 else *numWords=contWordsRL/taggerNumLaps;
529 *numSentences = contSentences;
532 /***************************************************************/
534 void tagger::taggerDoNTimes(int *numWords, int *numSentences,int laps)
536 int contWordsLR=0,contWordsRL=0,contSentences=0,ret = 1;
538 while ((ret>=0))
541 if (verbose) taggerShowVerbose(contSentences,0);
543 for (int pasadas=0;pasadas<laps;pasadas++)
546 taggerModelRunning = &taggerModelList[pasadas];
547 if ((strcmp(flow,"LRL")==0) || (strcmp(flow,"LR")==0))
548 contWordsLR = contWordsLR+taggerRightSense();
550 if (strcmp(flow,"LRL")==0 && pasadas>0)
551 sw->winMaterializePOSValues(2);
552 if ((strcmp(flow,"LRL")==0) || (strcmp(flow,"RL")==0))
553 contWordsRL = contWordsRL+taggerLeftSense();
557 contSentences++;
558 sw->show();
559 sw->deleteList();
560 ret = sw->iniGeneric();
562 if (contWordsRL==0) *numWords=contWordsLR/taggerNumLaps;
563 else *numWords=contWordsRL/taggerNumLaps;
564 *numSentences = contSentences;
567 /***************************************************************/
568 /***************************************************************/
570 void tagger::taggerGenerateScore(nodo *elem,int direction)
573 struct tms tbuffStartFex,tbuffEndFex;
574 clock_t startFexTime,endFexTime;
575 struct tms tbuffStartSVM,tbuffEndSVM;
576 clock_t startSVMTime,endSVMTime;
578 weight_node_t *weight;
579 nodo_feature_list *aux;
580 weightRepository *weightRep;
581 hash_t *bias;
582 int i,numMaybe,ret=1,max=0;
583 int is_unk=FALSE;
584 simpleList *featureList;
586 startFexTime = times(&tbuffStartFex);
588 i = d->getElement(elem->wrd);
589 if (i!=HASH_FAIL)
591 featureList = &taggerModelRunning->featureList;
592 numMaybe = d->getElementNumMaybe(i);
593 weight = taggerCreateWeightNodeArray(numMaybe,i);
594 if ((strcmp(flow,"LRL")==0) && (direction==2))
596 weightRep = taggerModelRunning->wr2; //wr2;
597 //bias = taggerModelRunning->bias2; //taggerBias2;
599 else
601 weightRep = taggerModelRunning->wr; //wr;
602 //bias = taggerModelRunning->bias; //taggerBias;
605 else
607 numMaybe = NUM_UNK_POS;
608 weight = taggerInitializeWeightNodeArray(numMaybe,weightUnk);
609 featureList = &taggerModelRunning->featureListUnk;
610 is_unk = TRUE;
612 if ((strcmp(flow,"LRL")==0) && (direction==2))
614 weightRep = taggerModelRunning->wrUnk2; //wrUnk2;
615 //bias = taggerModelRunning->biasUnk2; //taggerBiasUnk2;
617 else
618 { weightRep = taggerModelRunning->wrUnk; //wrUnk;
619 //bias =taggerModelRunning->biasUnk; //taggerBiasUnk;
624 if (numMaybe>1)
626 while (ret>=0)
628 aux = (nodo_feature_list *) featureList->getIndex();
629 if (strcmp(aux->mark,SLASTW)==0) sw->winPushSwnFeature(stk);
630 else if (strcmp(aux->mark,WMARK)==0) sw->winPushWordFeature((void *)aux,d,stk,direction);
631 else if (strcmp(aux->mark,KMARK)==0) sw->winPushAmbiguityFeature((void *)aux,d,stk,direction);
632 else if (strcmp(aux->mark,MMARK)==0) sw->winPushMaybeFeature((void *)aux,d,stk,direction);
633 else if (strcmp(aux->mark,PMARK)==0) sw->winPushPosFeature((void *)aux,d,stk,direction);
634 else if (strcmp(aux->mark,MFTMARK)==0) sw->winPushMFTFeature((void *)aux,d,stk,direction);
635 else if (is_unk==TRUE)
637 int *param;
638 if (aux->n>0)
640 param = (int *) aux->l.getIndex();
642 if (strcmp(aux->mark,PREFIX_MARK)==0) sw->winPushPrefixFeature(elem->wrd, stk, *param);
643 else if (strcmp(aux->mark,SUFFIX_MARK)==0) sw->winPushSuffixFeature(elem->wrd, stk, *param);
644 else if (strcmp(aux->mark,CHAR_A_MARK)==0) sw->winPushLetterFeature(elem->wrd, stk, *param, COUNTING_FROM_BEGIN);
645 else if (strcmp(aux->mark,CHAR_Z_MARK)==0) sw->winPushLetterFeature(elem->wrd, stk, *param, COUNTING_FROM_END);
646 else if (strcmp(aux->mark,LENGTH_MARK)==0) sw->winPushLenghtFeature(elem->wrd,stk);
647 else if (strcmp(aux->mark,START_CAPITAL_MARK)==0) sw->winPushStartWithCapFeature(elem->wrd,stk);
648 else if (strcmp(aux->mark,START_LOWER_MARK)==0) sw->winPushStartWithLowerFeature(elem->wrd,stk);
649 else if (strcmp(aux->mark,START_NUMBER_MARK)==0) sw->winPushStartWithNumberFeature(elem->wrd,stk);
650 else if (strcmp(aux->mark,ALL_UPPER_MARK)==0) sw->winPushAllUpFeature(elem->wrd,stk);
651 else if (strcmp(aux->mark,ALL_LOWER_MARK)==0) sw->winPushAllLowFeature(elem->wrd,stk);
652 else if (strcmp(aux->mark,CONTAIN_CAP_MARK)==0) sw->winPushContainCapFeature(elem->wrd, stk);
653 else if (strcmp(aux->mark,CONTAIN_CAPS_MARK)==0) sw->winPushContainCapsFeature(elem->wrd, stk);
654 else if (strcmp(aux->mark,CONTAIN_COMMA_MARK)==0) sw->winPushContainCommaFeature(elem->wrd, stk);
655 else if (strcmp(aux->mark,CONTAIN_NUMBER_MARK)==0) sw->winPushContainNumFeature(elem->wrd, stk);
656 else if (strcmp(aux->mark,CONTAIN_PERIOD_MARK)==0) sw->winPushContainPeriodFeature(elem->wrd, stk);
657 else if (strcmp(aux->mark,MULTIWORD_MARK)==0) sw->winPushMultiwordFeature(elem->wrd, stk);
659 ret = featureList->next();
661 featureList->setFirst();
663 endFexTime = times(&tbuffEndFex);
664 realFexTime = realFexTime + ((double)(endFexTime-startFexTime))/CLOCKS_PER_SECOND;
665 usrFexTime = usrFexTime + (((double)tbuffEndFex.tms_utime-(double)tbuffStartFex.tms_utime)/CLOCKS_PER_SECOND);
666 sysFexTime = sysFexTime + (((double)tbuffEndFex.tms_stime-(double)tbuffStartFex.tms_stime)/CLOCKS_PER_SECOND);
668 startSVMTime = times(&tbuffStartSVM);
670 taggerSumWeight(weightRep,bias,weight,numMaybe,&max);
672 endSVMTime = times(&tbuffEndSVM);
673 realSVMTime = realSVMTime + ((double)(endSVMTime-startSVMTime))/CLOCKS_PER_SECOND;
674 usrSVMTime = usrSVMTime + (((double)tbuffEndSVM.tms_utime-(double)tbuffStartSVM.tms_utime)/CLOCKS_PER_SECOND);
675 sysSVMTime = sysSVMTime + (((double)tbuffEndSVM.tms_stime-(double)tbuffStartSVM.tms_stime)/CLOCKS_PER_SECOND);
678 strcpy(elem->pos,weight[max].pos);
679 elem->weight = weight[max].data;
681 if (strcmp(flow,"LRL")==0)
683 weight_node_t *score = new weight_node_t;
684 score->data = weight[max].data;
685 strcpy(score->pos,weight[max].pos);
686 push(elem->stackScores,score);
689 if (i!=HASH_FAIL) delete[] weight; //mod delete[] instead of delete
692 /***************************************************************/
694 weight_node_t *tagger::taggerCreateWeightNodeArray(int numMaybe,int index)
696 int ret=1,j = numMaybe;
697 weight_node_t *weight = new weight_node_t[numMaybe];
698 simpleList *list = (simpleList *) d->getElementMaybe(index);
700 while (ret>=0 && numMaybe > 0)
702 infoDict *pInfoDict = (infoDict *) list->getIndex();
703 j--;
704 sprintf(weight[j].pos,"%s",pInfoDict->txt);
705 weight[j].data = 0;
706 ret=list->next();
709 list->setFirst();
710 return weight;
713 /***************************************************************/
715 weight_node_t *tagger::taggerInitializeWeightNodeArray(int numMaybe,weight_node_t *w)
717 for (int i=0;i<numMaybe;i++) w[i].data=0;
718 return w;
721 /***************************************************************/
723 void tagger::taggerSumWeight(weightRepository *wRep,hash_t *bias,weight_node_t *weight, int numMaybe, int *max)
725 weight_node_t *aux;
726 long double w,b = 0;
727 char *feature;
728 int putBias=1;
730 while (!empty(stk))
732 *max=0;
733 feature = (char *) pop(stk);
734 for (int j=0; j<numMaybe;j++)
736 if (putBias)
738 b = wRep->wrGetWeight("BIASES",weight[j].pos);
739 weight[j].data = weight[j].data - b;
741 w = wRep->wrGetWeight(feature,weight[j].pos);
742 weight[j].data=weight[j].data+w;
743 if (((float)weight[*max].data)<((float)weight[j].data)) *max=j;
745 delete[] feature; //mod delete[] instead of delete
746 putBias=0;
750 /***************************************************************/
751 /***************************************************************/
753 weight_node_t *tagger::taggerCreateWeightUnkArray(char *name)
755 NUM_UNK_POS=0;
756 int i=0;
757 char c=' ';
758 FILE *f;
760 if ((f = fopen(name, "rt"))== NULL)
762 fprintf(stderr, "Error opening file: %s",name);
763 exit(0);
766 while (!feof(f))
767 { if (fgetc(f)=='\n') NUM_UNK_POS++;
770 fseek(f,0,SEEK_SET);
772 weight_node_t *weight = new weight_node_t[NUM_UNK_POS];
773 while (!feof(f) && (i<NUM_UNK_POS))
774 { strcpy(weight[i].pos,"");
775 weight[i].data=0;
776 c = fgetc(f);
777 while ((c!='\n') && (!feof(f)))
779 if (c!=' ' && c!='\n' && c!='\t') sprintf(weight[i].pos,"%s%c",weight[i].pos,c);
780 c = fgetc(f);
782 i++;
784 fclose(f);
785 return weight;
788 /***************************************************************/
789 /***************************************************************/
791 void tagger::taggerStadistics(int numWords, int numSentences, double realTime,double usrTime, double sysTime)
793 char message[200]="";
794 float media=0;
795 if (time!=0) media = (float) (((double) numWords)/(sysTime+usrTime));
797 sprintf(message,"%s\n%d sentences were tagged.",message,numSentences);
798 sprintf(message,"%s\n%d words were tagged.",message,numWords);
799 sprintf(message,"%s\n%f words/second were tagged.\n",message,media);
800 fwrite(message,strlen(message),1,stderr);
803 /***************************************************************/
805 void tagger::taggerShowVerbose(int num,int isEnd)
807 if (isEnd) { fprintf(stderr,".%d sentences [DONE]\n\n",num); return; }
808 else if (num%100==0) fprintf(stderr,"%d",num);
809 else if (num%10==0) fprintf(stderr,".");