Updated source code from upstream SVN
[svmtool++.git] / src / tagger.cc
blob74d16da7ac16637c896f456c88d38305db0a6f73
1 /*
2 * Copyright (C) 2004 Jesus Gimenez, Lluis Marquez and Senen Moya
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 #include "tagger.h"
20 #include "strategies.h"
21 #include "hash.h"
22 #include "list.h"
23 #include "weight.h"
24 #include "dict.h"
25 #include "swindow.h"
26 #include "marks.h"
27 #include "common.h"
28 #include "nodo.h"
30 #include <sstream>
31 #include <string.h>
32 #include <stdio.h>
35 /***************************************************************/
37 struct tms tbuffStartUp,tbuffEndStartUp;
38 clock_t startUpTime,endStartUpTime;
39 double sysFexTime=0, usrFexTime=0,realFexTime=0;
40 double sysSVMTime=0, usrSVMTime=0,realSVMTime=0;
42 /***************************************************************/
44 extern int verbose;
47 /***************************************************************/
49 hash_t<weight_node_t*> *tagger::taggerCreateBiasHash(const std::string& name)
51 hash_t<weight_node_t*> *bias = new hash_t<weight_node_t*>;
52 int i=0;
53 char c=' ';
54 weight_node_t *w;
55 FILE *f;
57 bias->hash_init(40);
59 if ((f = fopen(name.c_str(), "rt"))== NULL)
61 std::cerr << "Error opening file: "<<name<<std::endl;
62 exit(0);
65 while (!feof(f))
67 c = fgetc(f);
68 if (c!='#')
70 w = new weight_node_t;
71 std::string weight;
72 i=0;
73 w->pos = "";
74 while ((c!='\n') && (!feof(f)))
76 if (c!=' ' && c!='\n' && c!='\t' && i==1) weight.push_back(c);
77 else if (c!=' ' && c!='\n' && c!='\t' && i==0)
79 if (c!=':') w->pos += c;
80 else i=1;
82 c = fgetc(f);
84 w->data = (long double)0;
85 std::istringstream iss(weight);
86 iss >> w->data;
87 bias->hash_insert(w->pos,w);
88 } //end if
89 else while((c=fgetc(f))!='\n');
91 fclose(f);
92 return bias;
95 /***************************************************************/
97 tagger::tagger(const std::string& model) : stk(), sw(0)
99 std::string name;
100 flow = "LR";
101 taggerShowScoresFlag = false;
102 taggerShowCommentsFlag = true;
103 taggerNumLaps = 1;
104 taggerKFilter = 0;
105 taggerUFilter = 0;
106 taggerStrategy = STRA_1P_DEFAULT; //modstrat //0;
107 taggerWinIndex = -1;
108 taggerWinLength = -1;
109 taggerModelName = model;
110 taggerBackupDict ="";
113 /***************************************************************/
115 void tagger::taggerLoadModels(models_t *model, int taggerNumModel)
117 std::string flow2,flow1;
118 std::ostringstream name;
120 //Cargamos la lista de "features" para palabras conocidas
121 name << taggerModelName << ".A" << taggerNumModel;
122 if (verbose) std::cerr << std::endl << "Loading FEATURES FOR KNOWN WORDS from < "<<name.str()<<" >"<< std::endl;
123 createFeatureList(name.str(),&model->featureList);
124 //Cargamos la lista de "features" para palabras desconocidas
125 name.str("");
126 name << taggerModelName << ".A" << taggerNumModel << ".UNK";
127 if (verbose) std::cerr << std::endl << "Loading FEATURES FOR UNKNOWN WORDS from < "<<name.str()<<" >"<< std::endl;
128 createFeatureList(name.str(),&model->featureListUnk);
130 if (flow == "LRL")
132 flow1 = "LR"; flow2 = "RL";
134 name.str("");
135 name << flow1 << " (Right-to-Left)";
136 if (verbose) std::cerr << std::endl <<"READING MODELS < direction = "<<name.str()<<" >"<<std::endl;
138 name.str("");
139 name << taggerModelName << ".M"<<taggerNumModel<<"."<<flow2<<".MRG";
140 if (verbose) std::cerr<<"-. Loading MERGED MODEL FOR KNOWN WORDS from < "<<name.str()<<" >"<< std::endl;
141 model->wr2 = new weightRepository(name.str(),taggerKFilter);
143 name.str("");
144 name << taggerModelName << ".UNK.M"<<taggerNumModel<<"."<<flow2<<".MRG";
145 if (verbose) std::cerr <<"-. Loading MERGED MODEL FOR UNKKNOWN WORDS from < "<<name.str()<<" >"<<std::endl<<std::endl;
146 model->wrUnk2 = new weightRepository(name.str(),taggerUFilter);
148 else flow1 =flow;
150 name.str("");
151 if (flow1 =="RL") name <<flow1<<" (Right-to-Left)";
152 else name << flow1 << " (Left-to-Right)";
154 if (verbose) std::cerr<<std::endl<<"READING MODELS < direction = "<<name.str()<<" >"<<std::endl;
156 name.str("");
157 name << taggerModelName << ".M"<<taggerNumModel<<"."<<flow1<<".MRG";
158 if (verbose) std::cerr<<"-. Loading MERGED MODEL FOR KNOWN WORDS from < "<<name.str()<<" >"<<std::endl;
159 model->wr = new weightRepository(name.str(),taggerKFilter);
161 name.str("");
162 name << taggerModelName << ".UNK.M"<<taggerNumModel<<"."<<flow1<<".MRG";
163 if (verbose) std::cerr<<"-. Loading MERGED MODEL FOR UNKNOWN WORDS from < "<<name.str()<<" >"<<std::endl;
164 model->wrUnk = new weightRepository(name.str(),taggerUFilter);
168 /***************************************************************/
170 void tagger::taggerLoadModelsForTagging()
172 startUpTime = times(&tbuffStartUp);
174 int modelsNeeded=1;
176 std::string name = taggerModelName + ".DICT";
177 if (!taggerBackupDict.empty())
179 if (verbose) std::cerr << "Loading DICTIONARY from < "<<name<<" > with BACKUP DICTIONARY from < "<<taggerBackupDict<<" >"<< std::endl;
180 d = new dictionary(name,taggerBackupDict);
182 else
184 if (verbose) std::cerr<<"Loading DICTIONARY from < "<<name<<" >"<<std::endl;
185 d = new dictionary(name);
188 name = taggerModelName + ".UNKP";
189 if (verbose) std::cerr << "Loading UNKNOWN WORDS POS from < "<<name<<" >"<<std::endl;
191 if ( taggerStrategy == STRA_2P_RELABELING //modstrat 1
192 || taggerStrategy == STRA_1P_ROBUST_UNK /*modstrat 4*/ ) modelsNeeded = 2;
194 taggerModelList = new models_t[modelsNeeded];
195 taggerModelRunning = &taggerModelList[0];
197 if (taggerStrategy == STRA_1P_DEFAULT ) //modstrat 0)
198 taggerLoadModels(taggerModelRunning,0);
199 else if (taggerStrategy == STRA_1P_UNSUPERVISED ) //modstrat 2)
200 taggerLoadModels(taggerModelRunning,3);
201 else if (taggerStrategy == STRA_1P_ROBUST_UNK ) //modstrat 4)
203 taggerLoadModels(taggerModelRunning,0);
204 taggerLoadModels(&taggerModelList[1],2);
206 else if (taggerStrategy == STRA_1P_VERY_ROBUST_UNK ) //modstrat 5)
207 taggerLoadModels(taggerModelRunning,4);
208 else if (taggerStrategy == STRA_2P_RELABELING )//modstrat 1)
210 taggerLoadModels(taggerModelRunning,2);
211 taggerLoadModels(&taggerModelList[1],1);
212 taggerNumLaps = 2;
214 else
216 std::cerr<<"Execution error: Strategy "<<taggerStrategy<<" doesn't exist!!"<<std::endl<<std::endl;
217 exit(0);
220 endStartUpTime = times(&tbuffEndStartUp);
223 void tagger::taggerInit(std::istream& input, std::ostream& output)
225 // int modelsNeeded=1;
226 std::string name;
228 if (sw != 0) delete sw;
229 //Mirar si existe fichero .WIN
230 if (taggerWinIndex==-1 && taggerWinLength==-1)
232 name = taggerModelName + ".WIN";
233 FILE *f = fopen (name.c_str(),"r");
234 if ( f == NULL ) sw = new swindow(input, &output, d);
235 else
237 fscanf(f,"%d %d",&taggerWinLength,&taggerWinIndex);
238 fclose(f);
239 sw = new swindow(input,taggerWinLength,taggerWinIndex, &output, d);
242 else if (taggerWinIndex==-1) sw = new swindow (input,taggerWinLength, &output, d);
243 else sw = new swindow (input,taggerWinLength,taggerWinIndex, &output, d);
246 void tagger::taggerInit()
248 if (sw != NULL) delete sw;
249 sw = new swindow(5, d);
252 /***************************************************************/
254 tagger::~tagger()
256 int modelsNeeded=1;
258 if (taggerStrategy == STRA_2P_RELABELING ) //modstrat 1)
259 modelsNeeded = 2;
261 delete d;
262 delete sw;
264 for (int i=0;i<modelsNeeded;i++)
267 delete taggerModelList[i].wr;
268 delete taggerModelList[i].wrUnk;
269 destroyFeatureList(&taggerModelList[i].featureList);
270 destroyFeatureList(&taggerModelList[i].featureListUnk);
272 if (flow =="LRL")
274 delete taggerModelList[i].wr2;
275 delete taggerModelList[i].wrUnk2;
278 delete[] taggerModelList;
281 /***************************************************************/
282 /***************************************************************/
285 void tagger::taggerShowComments()
287 taggerShowCommentsFlag = true;
290 /***************************************************************/
292 void tagger::taggerShowNoComments()
294 taggerShowCommentsFlag = false;
298 /***************************************************************/
301 void tagger::taggerActiveShowScoresFlag()
303 taggerShowScoresFlag = true;
306 /***************************************************************/
308 void tagger::taggerDesactiveShowScoresFlag()
310 this->taggerShowScoresFlag = false;
313 /***************************************************************/
315 void tagger::taggerPutFlow(const std::string& inFlow)
317 flow = inFlow;
320 /***************************************************************/
322 void tagger::taggerPutStrategy(int num)
324 taggerStrategy = num;
327 /***************************************************************/
329 void tagger::taggerPutWinLength(int l)
331 taggerWinLength = l;
334 /***************************************************************/
336 void tagger::taggerPutWinIndex(int i)
338 taggerWinIndex = i;
341 /***************************************************************/
343 void tagger::taggerPutBackupDictionary(const std::string& dictName)
345 taggerBackupDict = dictName;
348 /***************************************************************/
350 void tagger::taggerPutKWeightFilter(float kfilter)
352 taggerKFilter = kfilter;
355 /***************************************************************/
357 void tagger::taggerPutUWeightFilter(float ufilter)
359 taggerUFilter = ufilter;
362 /***************************************************************/
363 /***************************************************************/
364 /***************************************************************/
365 /***************************************************************/
367 int tagger::taggerRightSenseSpecialForUnknown()
369 int cont=1;
371 while(sw->previous());
372 nodo *elem = sw->getIndex();
374 if (sw->winExistUnkWord(1,d)==-1)
375 taggerModelRunning=&taggerModelList[1];
376 else taggerModelRunning=&taggerModelList[0];
378 taggerGenerateScore(elem,1);
380 while(sw->next())
382 elem = sw->getIndex();
384 if (sw->winExistUnkWord(1,d)==-1)
385 taggerModelRunning=&taggerModelList[1];
386 else taggerModelRunning=&taggerModelList[0];
388 taggerGenerateScore(elem,1);
389 cont++;
392 if (flow == "LRL") sw->winMaterializePOSValues(1);
394 return cont;
397 /***************************************************************/
399 int tagger::taggerLeftSenseSpecialForUnknown()
401 int cont=1;
402 while(sw->next());
403 nodo *elem = sw->getIndex();
404 if (sw->winExistUnkWord(2,d)==-1)
405 taggerModelRunning=&taggerModelList[1];
406 else taggerModelRunning=&taggerModelList[0];
408 taggerGenerateScore(elem,2);
410 while(sw->previous())
412 elem = sw->getIndex();
414 if (sw->winExistUnkWord(2,d)==-1)
415 taggerModelRunning=&taggerModelList[1];
416 else taggerModelRunning=&taggerModelList[0];
418 taggerGenerateScore(elem,2);
419 cont++;
422 if (flow=="LRL") sw->winMaterializePOSValues(0);
423 return cont;
426 /***************************************************************/
428 int tagger::taggerRightSense()
430 int cont=1;
432 while(sw->previous());
433 nodo *elem = sw->getIndex();
434 if (elem == 0)
436 std::cerr << "tagger::taggerRightSense: ERROR index null at beginning" << std::endl;
437 return -1;
439 taggerGenerateScore(elem,1);
441 while(sw->next())
443 elem = sw->getIndex();
444 taggerGenerateScore(elem,1);
445 cont++;
448 if (flow =="LRL") sw->winMaterializePOSValues(1);
450 return cont;
453 /***************************************************************/
455 int tagger::taggerLeftSense()
457 int cont=1;
458 while(sw->next());
459 nodo *elem = sw->getIndex();
460 taggerGenerateScore(elem,2);
462 while(sw->previous())
464 elem = sw->getIndex();
465 taggerGenerateScore(elem,2);
466 cont++;
469 if (flow =="LRL") sw->winMaterializePOSValues(0);
470 return cont;
473 /***************************************************************/
475 void tagger::taggerRun()
477 int contWords=0,contSentences=0;
479 struct tms tbuff1,tbuff2;
480 clock_t start,end;
481 start = times(&tbuff1);
483 switch(taggerStrategy)
485 case STRA_1P_DEFAULT/*modstrat 0*/: taggerDoNormal(&contWords,&contSentences); break;
486 case STRA_2P_RELABELING/*modstrat 1*/: taggerDoNTimes(&contWords,&contSentences,taggerNumLaps); break;
487 case STRA_1P_UNSUPERVISED/*modstrat 2*/: taggerDoNormal(&contWords,&contSentences); break;
488 case STRA_1P_SENTENCE_LEVEL/*modstrat 3*/: /*taggerDoNTimes(&contWords,&contSentences,taggerNumLaps);*/ break;
489 case STRA_1P_ROBUST_UNK/*modstrat 4*/: taggerDoSpecialForUnknown(&contWords,&contSentences); break;
490 case STRA_1P_VERY_ROBUST_UNK/*modstrat 5*/: taggerDoNormal(&contWords,&contSentences); break;
491 case STRA_1P_ROBUST_SENTENCE_LEVEL: break;
493 end = times(&tbuff2);
496 if (verbose)
497 { taggerShowVerbose(contSentences,1);
499 std::cerr<<"* -------------------------------------------------------------------"<<std::endl;
500 showTime("Start Up Time",
501 ((double)(endStartUpTime-startUpTime))/CLOCKS_PER_SECOND,
502 ((double)tbuffEndStartUp.tms_utime-(double)tbuffStartUp.tms_utime)/CLOCKS_PER_SECOND,
503 ((double)tbuffEndStartUp.tms_stime-(double)tbuffStartUp.tms_stime)/CLOCKS_PER_SECOND);
504 std::cerr<<"* -------------------------------------------------------------------"<<std::endl;
505 showTime("Features Extraction Time",realFexTime,usrFexTime,sysFexTime);
506 showTime("SVM Time",realSVMTime,usrSVMTime,sysSVMTime);
507 showTime("Process Time",((double)(end-start))/CLOCKS_PER_SECOND - realFexTime - realSVMTime,
508 ((double)tbuff2.tms_utime-(double)tbuff1.tms_utime)/CLOCKS_PER_SECOND - usrFexTime -usrSVMTime,
509 ((double)tbuff2.tms_stime-(double)tbuff1.tms_stime)/CLOCKS_PER_SECOND - sysFexTime -sysSVMTime);
510 std::cerr<<"* -------------------------------------------------------------------"<<std::endl;
511 std::cerr<<"[ Tagging Time = Feature Extraction Time + SVM Time + Process Time ]"<<std::endl;
512 showTime("Tagging Time",((double)(end-start))/CLOCKS_PER_SECOND,
513 ((double)tbuff2.tms_utime-(double)tbuff1.tms_utime)/CLOCKS_PER_SECOND,
514 ((double)tbuff2.tms_stime-(double)tbuff1.tms_stime)/CLOCKS_PER_SECOND);
515 std::cerr<<"* -------------------------------------------------------------------"<<std::endl;
516 std::cerr<<"[ Overall Time = Start up Time + Tagging Time ]"<<std::endl;
517 showTime("Overall Time",((double)(end-start+endStartUpTime-startUpTime))/CLOCKS_PER_SECOND,
518 ((double)tbuff2.tms_utime-(double)tbuff1.tms_utime+
519 (double)tbuffEndStartUp.tms_utime-(double)tbuffStartUp.tms_utime)/CLOCKS_PER_SECOND,
520 ((double)tbuff2.tms_stime-(double)tbuff1.tms_stime+
521 (double)tbuffEndStartUp.tms_stime-(double)tbuffStartUp.tms_stime)/CLOCKS_PER_SECOND);
522 std::cerr<<"* -------------------------------------------------------------------"<<std::endl;
523 taggerStadistics(contWords,contSentences,
524 ((double)(end-start))/CLOCKS_PER_SECOND,
525 ((double)tbuff2.tms_utime-(double)tbuff1.tms_utime)/CLOCKS_PER_SECOND,
526 ((double)tbuff2.tms_stime-(double)tbuff1.tms_stime)/CLOCKS_PER_SECOND);
530 /***************************************************************/
532 void tagger::taggerDoNormal(int *numWords, int *numSentences)
534 int contWordsLR=0,contWordsRL=0,contSentences=0,ret = 1;
536 while ((ret>=0))
538 if (verbose) taggerShowVerbose(contSentences,0);
540 if ((flow=="LRL") || (flow =="LR"))
541 contWordsLR = contWordsLR+taggerRightSense();
542 if ((flow =="LRL") || (flow == "RL"))
543 contWordsRL = contWordsRL+taggerLeftSense();
544 contSentences++;
545 sw->show(taggerShowScoresFlag, taggerShowCommentsFlag);
546 sw->deleteList();
547 ret = sw->iniGeneric(d);
549 if (contWordsRL==0) *numWords=contWordsLR/taggerNumLaps;
550 else *numWords=contWordsRL/taggerNumLaps;
551 *numSentences = contSentences;
554 /***************************************************************/
556 void tagger::taggerDoSpecialForUnknown(int *numWords, int *numSentences)
558 int contWordsLR=0,contWordsRL=0,contSentences=0,ret = 1;
560 while ((ret>=0))
562 if (verbose) taggerShowVerbose(contSentences,0);
564 if ((flow == "LRL") || (flow =="LR"))
565 contWordsLR = contWordsLR+taggerRightSenseSpecialForUnknown();
566 if ((flow == "LRL") || (flow =="RL"))
567 contWordsRL = contWordsRL+taggerLeftSenseSpecialForUnknown();
569 contSentences++;
570 sw->show(taggerShowScoresFlag, taggerShowCommentsFlag);
571 sw->deleteList();
572 ret = sw->iniGeneric(d);
574 if (contWordsRL==0) *numWords=contWordsLR/taggerNumLaps;
575 else *numWords=contWordsRL/taggerNumLaps;
576 *numSentences = contSentences;
579 /***************************************************************/
581 void tagger::taggerDoNTimes(int *numWords, int *numSentences,int laps)
583 int contWordsLR=0,contWordsRL=0,contSentences=0,ret = 1;
585 while ((ret>=0))
587 if (verbose) taggerShowVerbose(contSentences,0);
589 for (int pasadas=0;pasadas<laps;pasadas++)
591 taggerModelRunning = &taggerModelList[pasadas];
592 if ((flow == "LRL") || (flow == "LR"))
593 contWordsLR = contWordsLR+taggerRightSense();
594 if (flow == "LRL" && pasadas>0)
595 sw->winMaterializePOSValues(2);
596 if ((flow == "LRL") || (flow =="RL"))
597 contWordsRL = contWordsRL+taggerLeftSense();
600 contSentences++;
601 sw->show(taggerShowScoresFlag, taggerShowCommentsFlag);
602 sw->deleteList();
603 ret = sw->iniGeneric(d);
605 if (contWordsRL==0) *numWords=contWordsLR/taggerNumLaps;
606 else *numWords=contWordsRL/taggerNumLaps;
607 *numSentences = contSentences;
610 /***************************************************************/
612 void tagger::taggerGenerateScore(nodo *elem,int direction)
615 struct tms tbuffStartFex,tbuffEndFex;
616 clock_t startFexTime,endFexTime;
617 struct tms tbuffStartSVM,tbuffEndSVM;
618 clock_t startSVMTime,endSVMTime;
620 weight_node_t *weight;
621 weightRepository *weightRep;
622 int numMaybe,max=0;
623 int is_unk=FALSE;
624 simpleList<nodo_feature_list*> *featureList;
626 startFexTime = times(&tbuffStartFex);
628 dataDict* i = d->getElement(elem->wrd);
629 if ((long)i!=HASH_FAIL)
631 featureList = &taggerModelRunning->featureList;
632 numMaybe = d->getElementNumMaybe(i);
633 weight = taggerCreateWeightNodeArray(numMaybe,i);
634 if ((flow =="LRL") && (direction==2))
636 weightRep = taggerModelRunning->wr2; //wr2;
637 //bias = taggerModelRunning->bias2; //taggerBias2;
639 else
641 weightRep = taggerModelRunning->wr; //wr;
642 //bias = taggerModelRunning->bias; //taggerBias;
645 else
647 featureList = &taggerModelRunning->featureListUnk;
648 weight = taggerCreateWeightUnkArray(&numMaybe);
649 is_unk = TRUE;
651 if (flow =="LRL" && (direction==2))
653 weightRep = taggerModelRunning->wrUnk2; //wrUnk2;
654 //bias = taggerModelRunning->biasUnk2; //taggerBiasUnk2;
656 else
657 { weightRep = taggerModelRunning->wrUnk; //wrUnk;
658 //bias =taggerModelRunning->biasUnk; //taggerBiasUnk;
663 if (numMaybe>1)
665 bool ret = true;
666 while (ret)
668 nodo_feature_list* aux= *featureList->getIndex();
669 if (aux->mark == SLASTW) sw->winPushSwnFeature(stk);
670 else if (aux->mark == WMARK) sw->winPushWordFeature((void *)aux,d,stk,direction);
671 else if (aux->mark == KMARK) sw->winPushAmbiguityFeature((void *)aux,d,stk,direction);
672 else if (aux->mark == MMARK) sw->winPushMaybeFeature((void *)aux,d,stk,direction);
673 else if (aux->mark == PMARK) sw->winPushPosFeature((void *)aux,d,stk,direction);
674 else if (aux->mark == MFTMARK) sw->winPushMFTFeature((void *)aux,d,stk,direction);
675 else if (is_unk==TRUE)
677 int *param;
678 if (!aux->l.isEmpty())
680 param = *aux->l.getIndex();
682 if (aux->mark == PREFIX_MARK) sw->winPushPrefixFeature(elem->wrd, stk, *param);
683 else if (aux->mark == SUFFIX_MARK) sw->winPushSuffixFeature(elem->wrd, stk, *param);
684 else if (aux->mark == CHAR_A_MARK) sw->winPushLetterFeature(elem->wrd, stk, COUNTING_FROM_BEGIN, *param);
685 else if (aux->mark == CHAR_Z_MARK) sw->winPushLetterFeature(elem->wrd, stk, COUNTING_FROM_END, *param);
686 else if (aux->mark == LENGTH_MARK) sw->winPushLenghtFeature(elem->wrd,stk);
687 else if (aux->mark == START_CAPITAL_MARK) sw->winPushStartWithCapFeature(elem->wrd,stk);
688 else if (aux->mark == START_LOWER_MARK) sw->winPushStartWithLowerFeature(elem->wrd,stk);
689 else if (aux->mark == START_NUMBER_MARK) sw->winPushStartWithNumberFeature(elem->wrd,stk);
690 else if (aux->mark == ALL_UPPER_MARK) sw->winPushAllUpFeature(elem->wrd,stk);
691 else if (aux->mark == ALL_LOWER_MARK) sw->winPushAllLowFeature(elem->wrd,stk);
692 else if (aux->mark == CONTAIN_CAP_MARK) sw->winPushContainCapFeature(elem->wrd, stk);
693 else if (aux->mark == CONTAIN_CAPS_MARK) sw->winPushContainCapsFeature(elem->wrd, stk);
694 else if (aux->mark == CONTAIN_COMMA_MARK) sw->winPushContainCommaFeature(elem->wrd, stk);
695 else if (aux->mark == CONTAIN_NUMBER_MARK) sw->winPushContainNumFeature(elem->wrd, stk);
696 else if (aux->mark == CONTAIN_PERIOD_MARK) sw->winPushContainPeriodFeature(elem->wrd, stk);
697 else if (aux->mark == MULTIWORD_MARK) sw->winPushMultiwordFeature(elem->wrd, stk);
699 ret = featureList->next();
701 featureList->setFirst();
703 endFexTime = times(&tbuffEndFex);
704 realFexTime = realFexTime + ((double)(endFexTime-startFexTime))/CLOCKS_PER_SECOND;
705 usrFexTime = usrFexTime + (((double)tbuffEndFex.tms_utime-(double)tbuffStartFex.tms_utime)/CLOCKS_PER_SECOND);
706 sysFexTime = sysFexTime + (((double)tbuffEndFex.tms_stime-(double)tbuffStartFex.tms_stime)/CLOCKS_PER_SECOND);
708 startSVMTime = times(&tbuffStartSVM);
710 elem->strScores = taggerSumWeight(weightRep,weight,numMaybe,&max);
711 //std::cerr << "tagger::taggerGenerateScore got elem strScores: '" << elem->strScores << "'" << std::endl;
713 endSVMTime = times(&tbuffEndSVM);
714 realSVMTime = realSVMTime + ((double)(endSVMTime-startSVMTime))/CLOCKS_PER_SECOND;
715 usrSVMTime = usrSVMTime + (((double)tbuffEndSVM.tms_utime-(double)tbuffStartSVM.tms_utime)/CLOCKS_PER_SECOND);
716 sysSVMTime = sysSVMTime + (((double)tbuffEndSVM.tms_stime-(double)tbuffStartSVM.tms_stime)/CLOCKS_PER_SECOND);
719 elem->pos = weight[max].pos;
720 elem->weight = weight[max].data;
722 if (flow =="LRL")
724 weight_node_t* score = new weight_node_t();
725 score->data = weight[max].data;
726 score->pos = weight[max].pos;
727 elem->stackScores.push(score);
730 delete[] weight;
735 /***************************************************************/
737 /* Returns an array ready to be filled with maybe informations */
738 weight_node_t *tagger::taggerCreateWeightNodeArray(int numMaybe,dataDict* index)
740 int j = numMaybe;
741 weight_node_t *weight = new weight_node_t[numMaybe];
742 simpleList<infoDict*> *list = &d->getElementMaybe(index);
744 bool ret=true;
745 while (ret && numMaybe > 0)
747 infoDict *pInfoDict = *list->getIndex();
748 j--;
749 weight[j].pos = pInfoDict->pos;
750 weight[j].data = 0;
751 ret=list->next();
754 list->setFirst();
755 return weight;
758 /***************************************************************/
760 std::string tagger::taggerSumWeight(weightRepository* wRep, weight_node_t* weight, int numMaybe, int* max)
762 // weight_node_t *aux;
763 long double w,b = 0;
764 std::string feature;
765 int putBias=1;
767 while (!stk.empty())
769 *max=0;
770 feature = stk.top();
771 stk.pop();
772 for (int j=0; j<numMaybe;j++)
774 if (putBias)
776 b = wRep->wrGetWeight("BIASES",weight[j].pos);
777 weight[j].data = weight[j].data - b;
779 w = wRep->wrGetWeight(feature,weight[j].pos);
780 weight[j].data=weight[j].data+w;
781 if (((float)weight[*max].data)<((float)weight[j].data)) *max=j;
783 putBias=0;
785 std::ostringstream tmp;
786 if ( true )
788 for (int i=0; i<numMaybe; i++)
790 if ( i == 0 )
792 tmp << weight[i].pos << "_" << (float)weight[i].data;
794 else
796 tmp << " " << weight[i].pos << "_" << (float)weight[i].data;
800 //std::cerr << "tagger::taggerSumWeight generated string '" << tmp.str() << "'" << std::endl;
801 return tmp.str();
804 /***************************************************************/
805 /***************************************************************/
807 weight_node_t *tagger::taggerCreateWeightUnkArray(int *numMaybe)
809 int i=0;
810 char c=' ';
811 FILE *f;
812 std::string name = taggerModelName + ".UNKP";
814 *numMaybe = 0;
816 if ((f = fopen(name.c_str(), "rt"))== NULL)
818 std::cerr << "Error opening file: "<<name<<std::endl;
819 exit(0);
822 // first read: count the number of POS
823 while (!feof(f))
824 { if (fgetc(f)=='\n') (*numMaybe)++;
827 // second read, fill in the weight nodes
828 fseek(f,0,SEEK_SET);
830 weight_node_t *weight = new weight_node_t[*numMaybe];
831 while (!feof(f) && (i<*numMaybe))
832 { weight[i].pos = "";
833 weight[i].data=0;
834 c = fgetc(f);
835 while ((c!='\n') && (!feof(f)))
837 if (c!=' ' && c!='\n' && c!='\t') weight[i].pos += c;
838 c = fgetc(f);
840 i++;
842 fclose(f);
843 return weight;
846 /***************************************************************/
847 /***************************************************************/
849 void tagger::taggerStadistics(int numWords, int numSentences, double /*realTime*/, double usrTime, double sysTime)
851 std::ostringstream message;
852 float media=0;
853 if ((sysTime+usrTime)!=0) media = (float) (((double) numWords)/(sysTime+usrTime));
855 message << std::endl << numSentences <<" sentences were tagged." << std::endl;
856 message << numWords << " words were tagged." << std::endl;
857 message << media << " words/second were tagged." << std::endl;
858 std::cerr << message.str();
861 /***************************************************************/
863 void tagger::taggerShowVerbose(int num,int isEnd)
865 if (isEnd) { std::cerr<<"."<<num<<" sentences [DONE]"<<std::endl<<std::endl; return; }
866 else if (num%100==0) std::cerr <<num;
867 else if (num%10==0) std::cerr << ".";