2 * Copyright (C) 2004 Jesus Gimenez, Lluis Marquez and Senen Moya
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 #include "strategies.h"
35 /***************************************************************/
37 struct tms tbuffStartUp
,tbuffEndStartUp
;
38 clock_t startUpTime
,endStartUpTime
;
39 double sysFexTime
=0, usrFexTime
=0,realFexTime
=0;
40 double sysSVMTime
=0, usrSVMTime
=0,realSVMTime
=0;
42 /***************************************************************/
47 /***************************************************************/
49 hash_t
<weight_node_t
*> *tagger::taggerCreateBiasHash(const std::string
& name
)
51 hash_t
<weight_node_t
*> *bias
= new hash_t
<weight_node_t
*>;
59 if ((f
= fopen(name
.c_str(), "rt"))== NULL
)
61 std::cerr
<< "Error opening file: "<<name
<<std::endl
;
70 w
= new weight_node_t
;
74 while ((c
!='\n') && (!feof(f
)))
76 if (c
!=' ' && c
!='\n' && c
!='\t' && i
==1) weight
.push_back(c
);
77 else if (c
!=' ' && c
!='\n' && c
!='\t' && i
==0)
79 if (c
!=':') w
->pos
+= c
;
84 w
->data
= (long double)0;
85 std::istringstream
iss(weight
);
87 bias
->hash_insert(w
->pos
,w
);
89 else while((c
=fgetc(f
))!='\n');
95 /***************************************************************/
97 tagger::tagger(const std::string
& model
) : stk(), sw(0)
101 taggerShowScoresFlag
= false;
102 taggerShowCommentsFlag
= true;
106 taggerStrategy
= STRA_1P_DEFAULT
; //modstrat //0;
108 taggerWinLength
= -1;
109 taggerModelName
= model
;
110 taggerBackupDict
="";
113 /***************************************************************/
115 void tagger::taggerLoadModels(models_t
*model
, int taggerNumModel
)
117 std::string flow2
,flow1
;
118 std::ostringstream name
;
120 //Cargamos la lista de "features" para palabras conocidas
121 name
<< taggerModelName
<< ".A" << taggerNumModel
;
122 if (verbose
) std::cerr
<< std::endl
<< "Loading FEATURES FOR KNOWN WORDS from < "<<name
.str()<<" >"<< std::endl
;
123 createFeatureList(name
.str(),&model
->featureList
);
124 //Cargamos la lista de "features" para palabras desconocidas
126 name
<< taggerModelName
<< ".A" << taggerNumModel
<< ".UNK";
127 if (verbose
) std::cerr
<< std::endl
<< "Loading FEATURES FOR UNKNOWN WORDS from < "<<name
.str()<<" >"<< std::endl
;
128 createFeatureList(name
.str(),&model
->featureListUnk
);
132 flow1
= "LR"; flow2
= "RL";
135 name
<< flow1
<< " (Right-to-Left)";
136 if (verbose
) std::cerr
<< std::endl
<<"READING MODELS < direction = "<<name
.str()<<" >"<<std::endl
;
139 name
<< taggerModelName
<< ".M"<<taggerNumModel
<<"."<<flow2
<<".MRG";
140 if (verbose
) std::cerr
<<"-. Loading MERGED MODEL FOR KNOWN WORDS from < "<<name
.str()<<" >"<< std::endl
;
141 model
->wr2
= new weightRepository(name
.str(),taggerKFilter
);
144 name
<< taggerModelName
<< ".UNK.M"<<taggerNumModel
<<"."<<flow2
<<".MRG";
145 if (verbose
) std::cerr
<<"-. Loading MERGED MODEL FOR UNKKNOWN WORDS from < "<<name
.str()<<" >"<<std::endl
<<std::endl
;
146 model
->wrUnk2
= new weightRepository(name
.str(),taggerUFilter
);
151 if (flow1
=="RL") name
<<flow1
<<" (Right-to-Left)";
152 else name
<< flow1
<< " (Left-to-Right)";
154 if (verbose
) std::cerr
<<std::endl
<<"READING MODELS < direction = "<<name
.str()<<" >"<<std::endl
;
157 name
<< taggerModelName
<< ".M"<<taggerNumModel
<<"."<<flow1
<<".MRG";
158 if (verbose
) std::cerr
<<"-. Loading MERGED MODEL FOR KNOWN WORDS from < "<<name
.str()<<" >"<<std::endl
;
159 model
->wr
= new weightRepository(name
.str(),taggerKFilter
);
162 name
<< taggerModelName
<< ".UNK.M"<<taggerNumModel
<<"."<<flow1
<<".MRG";
163 if (verbose
) std::cerr
<<"-. Loading MERGED MODEL FOR UNKNOWN WORDS from < "<<name
.str()<<" >"<<std::endl
;
164 model
->wrUnk
= new weightRepository(name
.str(),taggerUFilter
);
168 /***************************************************************/
170 void tagger::taggerLoadModelsForTagging()
172 startUpTime
= times(&tbuffStartUp
);
176 std::string name
= taggerModelName
+ ".DICT";
177 if (!taggerBackupDict
.empty())
179 if (verbose
) std::cerr
<< "Loading DICTIONARY from < "<<name
<<" > with BACKUP DICTIONARY from < "<<taggerBackupDict
<<" >"<< std::endl
;
180 d
= new dictionary(name
,taggerBackupDict
);
184 if (verbose
) std::cerr
<<"Loading DICTIONARY from < "<<name
<<" >"<<std::endl
;
185 d
= new dictionary(name
);
188 name
= taggerModelName
+ ".UNKP";
189 if (verbose
) std::cerr
<< "Loading UNKNOWN WORDS POS from < "<<name
<<" >"<<std::endl
;
191 if ( taggerStrategy
== STRA_2P_RELABELING
//modstrat 1
192 || taggerStrategy
== STRA_1P_ROBUST_UNK
/*modstrat 4*/ ) modelsNeeded
= 2;
194 taggerModelList
= new models_t
[modelsNeeded
];
195 taggerModelRunning
= &taggerModelList
[0];
197 if (taggerStrategy
== STRA_1P_DEFAULT
) //modstrat 0)
198 taggerLoadModels(taggerModelRunning
,0);
199 else if (taggerStrategy
== STRA_1P_UNSUPERVISED
) //modstrat 2)
200 taggerLoadModels(taggerModelRunning
,3);
201 else if (taggerStrategy
== STRA_1P_ROBUST_UNK
) //modstrat 4)
203 taggerLoadModels(taggerModelRunning
,0);
204 taggerLoadModels(&taggerModelList
[1],2);
206 else if (taggerStrategy
== STRA_1P_VERY_ROBUST_UNK
) //modstrat 5)
207 taggerLoadModels(taggerModelRunning
,4);
208 else if (taggerStrategy
== STRA_2P_RELABELING
)//modstrat 1)
210 taggerLoadModels(taggerModelRunning
,2);
211 taggerLoadModels(&taggerModelList
[1],1);
216 std::cerr
<<"Execution error: Strategy "<<taggerStrategy
<<" doesn't exist!!"<<std::endl
<<std::endl
;
220 endStartUpTime
= times(&tbuffEndStartUp
);
223 void tagger::taggerInit(std::istream
& input
, std::ostream
& output
)
225 // int modelsNeeded=1;
228 if (sw
!= 0) delete sw
;
229 //Mirar si existe fichero .WIN
230 if (taggerWinIndex
==-1 && taggerWinLength
==-1)
232 name
= taggerModelName
+ ".WIN";
233 FILE *f
= fopen (name
.c_str(),"r");
234 if ( f
== NULL
) sw
= new swindow(input
, &output
, d
);
237 fscanf(f
,"%d %d",&taggerWinLength
,&taggerWinIndex
);
239 sw
= new swindow(input
,taggerWinLength
,taggerWinIndex
, &output
, d
);
242 else if (taggerWinIndex
==-1) sw
= new swindow (input
,taggerWinLength
, &output
, d
);
243 else sw
= new swindow (input
,taggerWinLength
,taggerWinIndex
, &output
, d
);
246 void tagger::taggerInit()
248 if (sw
!= NULL
) delete sw
;
249 sw
= new swindow(5, d
);
252 /***************************************************************/
258 if (taggerStrategy
== STRA_2P_RELABELING
) //modstrat 1)
264 for (int i
=0;i
<modelsNeeded
;i
++)
267 delete taggerModelList
[i
].wr
;
268 delete taggerModelList
[i
].wrUnk
;
269 destroyFeatureList(&taggerModelList
[i
].featureList
);
270 destroyFeatureList(&taggerModelList
[i
].featureListUnk
);
274 delete taggerModelList
[i
].wr2
;
275 delete taggerModelList
[i
].wrUnk2
;
278 delete[] taggerModelList
;
281 /***************************************************************/
282 /***************************************************************/
285 void tagger::taggerShowComments()
287 taggerShowCommentsFlag
= true;
290 /***************************************************************/
292 void tagger::taggerShowNoComments()
294 taggerShowCommentsFlag
= false;
298 /***************************************************************/
301 void tagger::taggerActiveShowScoresFlag()
303 taggerShowScoresFlag
= true;
306 /***************************************************************/
308 void tagger::taggerDesactiveShowScoresFlag()
310 this->taggerShowScoresFlag
= false;
313 /***************************************************************/
315 void tagger::taggerPutFlow(const std::string
& inFlow
)
320 /***************************************************************/
322 void tagger::taggerPutStrategy(int num
)
324 taggerStrategy
= num
;
327 /***************************************************************/
329 void tagger::taggerPutWinLength(int l
)
334 /***************************************************************/
336 void tagger::taggerPutWinIndex(int i
)
341 /***************************************************************/
343 void tagger::taggerPutBackupDictionary(const std::string
& dictName
)
345 taggerBackupDict
= dictName
;
348 /***************************************************************/
350 void tagger::taggerPutKWeightFilter(float kfilter
)
352 taggerKFilter
= kfilter
;
355 /***************************************************************/
357 void tagger::taggerPutUWeightFilter(float ufilter
)
359 taggerUFilter
= ufilter
;
362 /***************************************************************/
363 /***************************************************************/
364 /***************************************************************/
365 /***************************************************************/
367 int tagger::taggerRightSenseSpecialForUnknown()
371 while(sw
->previous());
372 nodo
*elem
= sw
->getIndex();
374 if (sw
->winExistUnkWord(1,d
)==-1)
375 taggerModelRunning
=&taggerModelList
[1];
376 else taggerModelRunning
=&taggerModelList
[0];
378 taggerGenerateScore(elem
,1);
382 elem
= sw
->getIndex();
384 if (sw
->winExistUnkWord(1,d
)==-1)
385 taggerModelRunning
=&taggerModelList
[1];
386 else taggerModelRunning
=&taggerModelList
[0];
388 taggerGenerateScore(elem
,1);
392 if (flow
== "LRL") sw
->winMaterializePOSValues(1);
397 /***************************************************************/
399 int tagger::taggerLeftSenseSpecialForUnknown()
403 nodo
*elem
= sw
->getIndex();
404 if (sw
->winExistUnkWord(2,d
)==-1)
405 taggerModelRunning
=&taggerModelList
[1];
406 else taggerModelRunning
=&taggerModelList
[0];
408 taggerGenerateScore(elem
,2);
410 while(sw
->previous())
412 elem
= sw
->getIndex();
414 if (sw
->winExistUnkWord(2,d
)==-1)
415 taggerModelRunning
=&taggerModelList
[1];
416 else taggerModelRunning
=&taggerModelList
[0];
418 taggerGenerateScore(elem
,2);
422 if (flow
=="LRL") sw
->winMaterializePOSValues(0);
426 /***************************************************************/
428 int tagger::taggerRightSense()
432 while(sw
->previous());
433 nodo
*elem
= sw
->getIndex();
436 std::cerr
<< "tagger::taggerRightSense: ERROR index null at beginning" << std::endl
;
439 taggerGenerateScore(elem
,1);
443 elem
= sw
->getIndex();
444 taggerGenerateScore(elem
,1);
448 if (flow
=="LRL") sw
->winMaterializePOSValues(1);
453 /***************************************************************/
455 int tagger::taggerLeftSense()
459 nodo
*elem
= sw
->getIndex();
460 taggerGenerateScore(elem
,2);
462 while(sw
->previous())
464 elem
= sw
->getIndex();
465 taggerGenerateScore(elem
,2);
469 if (flow
=="LRL") sw
->winMaterializePOSValues(0);
473 /***************************************************************/
475 void tagger::taggerRun()
477 int contWords
=0,contSentences
=0;
479 struct tms tbuff1
,tbuff2
;
481 start
= times(&tbuff1
);
483 switch(taggerStrategy
)
485 case STRA_1P_DEFAULT
/*modstrat 0*/: taggerDoNormal(&contWords
,&contSentences
); break;
486 case STRA_2P_RELABELING
/*modstrat 1*/: taggerDoNTimes(&contWords
,&contSentences
,taggerNumLaps
); break;
487 case STRA_1P_UNSUPERVISED
/*modstrat 2*/: taggerDoNormal(&contWords
,&contSentences
); break;
488 case STRA_1P_SENTENCE_LEVEL
/*modstrat 3*/: /*taggerDoNTimes(&contWords,&contSentences,taggerNumLaps);*/ break;
489 case STRA_1P_ROBUST_UNK
/*modstrat 4*/: taggerDoSpecialForUnknown(&contWords
,&contSentences
); break;
490 case STRA_1P_VERY_ROBUST_UNK
/*modstrat 5*/: taggerDoNormal(&contWords
,&contSentences
); break;
491 case STRA_1P_ROBUST_SENTENCE_LEVEL
: break;
493 end
= times(&tbuff2
);
497 { taggerShowVerbose(contSentences
,1);
499 std::cerr
<<"* -------------------------------------------------------------------"<<std::endl
;
500 showTime("Start Up Time",
501 ((double)(endStartUpTime
-startUpTime
))/CLOCKS_PER_SECOND
,
502 ((double)tbuffEndStartUp
.tms_utime
-(double)tbuffStartUp
.tms_utime
)/CLOCKS_PER_SECOND
,
503 ((double)tbuffEndStartUp
.tms_stime
-(double)tbuffStartUp
.tms_stime
)/CLOCKS_PER_SECOND
);
504 std::cerr
<<"* -------------------------------------------------------------------"<<std::endl
;
505 showTime("Features Extraction Time",realFexTime
,usrFexTime
,sysFexTime
);
506 showTime("SVM Time",realSVMTime
,usrSVMTime
,sysSVMTime
);
507 showTime("Process Time",((double)(end
-start
))/CLOCKS_PER_SECOND
- realFexTime
- realSVMTime
,
508 ((double)tbuff2
.tms_utime
-(double)tbuff1
.tms_utime
)/CLOCKS_PER_SECOND
- usrFexTime
-usrSVMTime
,
509 ((double)tbuff2
.tms_stime
-(double)tbuff1
.tms_stime
)/CLOCKS_PER_SECOND
- sysFexTime
-sysSVMTime
);
510 std::cerr
<<"* -------------------------------------------------------------------"<<std::endl
;
511 std::cerr
<<"[ Tagging Time = Feature Extraction Time + SVM Time + Process Time ]"<<std::endl
;
512 showTime("Tagging Time",((double)(end
-start
))/CLOCKS_PER_SECOND
,
513 ((double)tbuff2
.tms_utime
-(double)tbuff1
.tms_utime
)/CLOCKS_PER_SECOND
,
514 ((double)tbuff2
.tms_stime
-(double)tbuff1
.tms_stime
)/CLOCKS_PER_SECOND
);
515 std::cerr
<<"* -------------------------------------------------------------------"<<std::endl
;
516 std::cerr
<<"[ Overall Time = Start up Time + Tagging Time ]"<<std::endl
;
517 showTime("Overall Time",((double)(end
-start
+endStartUpTime
-startUpTime
))/CLOCKS_PER_SECOND
,
518 ((double)tbuff2
.tms_utime
-(double)tbuff1
.tms_utime
+
519 (double)tbuffEndStartUp
.tms_utime
-(double)tbuffStartUp
.tms_utime
)/CLOCKS_PER_SECOND
,
520 ((double)tbuff2
.tms_stime
-(double)tbuff1
.tms_stime
+
521 (double)tbuffEndStartUp
.tms_stime
-(double)tbuffStartUp
.tms_stime
)/CLOCKS_PER_SECOND
);
522 std::cerr
<<"* -------------------------------------------------------------------"<<std::endl
;
523 taggerStadistics(contWords
,contSentences
,
524 ((double)(end
-start
))/CLOCKS_PER_SECOND
,
525 ((double)tbuff2
.tms_utime
-(double)tbuff1
.tms_utime
)/CLOCKS_PER_SECOND
,
526 ((double)tbuff2
.tms_stime
-(double)tbuff1
.tms_stime
)/CLOCKS_PER_SECOND
);
530 /***************************************************************/
532 void tagger::taggerDoNormal(int *numWords
, int *numSentences
)
534 int contWordsLR
=0,contWordsRL
=0,contSentences
=0,ret
= 1;
538 if (verbose
) taggerShowVerbose(contSentences
,0);
540 if ((flow
=="LRL") || (flow
=="LR"))
541 contWordsLR
= contWordsLR
+taggerRightSense();
542 if ((flow
=="LRL") || (flow
== "RL"))
543 contWordsRL
= contWordsRL
+taggerLeftSense();
545 sw
->show(taggerShowScoresFlag
, taggerShowCommentsFlag
);
547 ret
= sw
->iniGeneric(d
);
549 if (contWordsRL
==0) *numWords
=contWordsLR
/taggerNumLaps
;
550 else *numWords
=contWordsRL
/taggerNumLaps
;
551 *numSentences
= contSentences
;
554 /***************************************************************/
556 void tagger::taggerDoSpecialForUnknown(int *numWords
, int *numSentences
)
558 int contWordsLR
=0,contWordsRL
=0,contSentences
=0,ret
= 1;
562 if (verbose
) taggerShowVerbose(contSentences
,0);
564 if ((flow
== "LRL") || (flow
=="LR"))
565 contWordsLR
= contWordsLR
+taggerRightSenseSpecialForUnknown();
566 if ((flow
== "LRL") || (flow
=="RL"))
567 contWordsRL
= contWordsRL
+taggerLeftSenseSpecialForUnknown();
570 sw
->show(taggerShowScoresFlag
, taggerShowCommentsFlag
);
572 ret
= sw
->iniGeneric(d
);
574 if (contWordsRL
==0) *numWords
=contWordsLR
/taggerNumLaps
;
575 else *numWords
=contWordsRL
/taggerNumLaps
;
576 *numSentences
= contSentences
;
579 /***************************************************************/
581 void tagger::taggerDoNTimes(int *numWords
, int *numSentences
,int laps
)
583 int contWordsLR
=0,contWordsRL
=0,contSentences
=0,ret
= 1;
587 if (verbose
) taggerShowVerbose(contSentences
,0);
589 for (int pasadas
=0;pasadas
<laps
;pasadas
++)
591 taggerModelRunning
= &taggerModelList
[pasadas
];
592 if ((flow
== "LRL") || (flow
== "LR"))
593 contWordsLR
= contWordsLR
+taggerRightSense();
594 if (flow
== "LRL" && pasadas
>0)
595 sw
->winMaterializePOSValues(2);
596 if ((flow
== "LRL") || (flow
=="RL"))
597 contWordsRL
= contWordsRL
+taggerLeftSense();
601 sw
->show(taggerShowScoresFlag
, taggerShowCommentsFlag
);
603 ret
= sw
->iniGeneric(d
);
605 if (contWordsRL
==0) *numWords
=contWordsLR
/taggerNumLaps
;
606 else *numWords
=contWordsRL
/taggerNumLaps
;
607 *numSentences
= contSentences
;
610 /***************************************************************/
612 void tagger::taggerGenerateScore(nodo
*elem
,int direction
)
615 struct tms tbuffStartFex
,tbuffEndFex
;
616 clock_t startFexTime
,endFexTime
;
617 struct tms tbuffStartSVM
,tbuffEndSVM
;
618 clock_t startSVMTime
,endSVMTime
;
620 weight_node_t
*weight
;
621 weightRepository
*weightRep
;
624 simpleList
<nodo_feature_list
*> *featureList
;
626 startFexTime
= times(&tbuffStartFex
);
628 dataDict
* i
= d
->getElement(elem
->wrd
);
629 if ((long)i
!=HASH_FAIL
)
631 featureList
= &taggerModelRunning
->featureList
;
632 numMaybe
= d
->getElementNumMaybe(i
);
633 weight
= taggerCreateWeightNodeArray(numMaybe
,i
);
634 if ((flow
=="LRL") && (direction
==2))
636 weightRep
= taggerModelRunning
->wr2
; //wr2;
637 //bias = taggerModelRunning->bias2; //taggerBias2;
641 weightRep
= taggerModelRunning
->wr
; //wr;
642 //bias = taggerModelRunning->bias; //taggerBias;
647 featureList
= &taggerModelRunning
->featureListUnk
;
648 weight
= taggerCreateWeightUnkArray(&numMaybe
);
651 if (flow
=="LRL" && (direction
==2))
653 weightRep
= taggerModelRunning
->wrUnk2
; //wrUnk2;
654 //bias = taggerModelRunning->biasUnk2; //taggerBiasUnk2;
657 { weightRep
= taggerModelRunning
->wrUnk
; //wrUnk;
658 //bias =taggerModelRunning->biasUnk; //taggerBiasUnk;
668 nodo_feature_list
* aux
= *featureList
->getIndex();
669 if (aux
->mark
== SLASTW
) sw
->winPushSwnFeature(stk
);
670 else if (aux
->mark
== WMARK
) sw
->winPushWordFeature((void *)aux
,d
,stk
,direction
);
671 else if (aux
->mark
== KMARK
) sw
->winPushAmbiguityFeature((void *)aux
,d
,stk
,direction
);
672 else if (aux
->mark
== MMARK
) sw
->winPushMaybeFeature((void *)aux
,d
,stk
,direction
);
673 else if (aux
->mark
== PMARK
) sw
->winPushPosFeature((void *)aux
,d
,stk
,direction
);
674 else if (aux
->mark
== MFTMARK
) sw
->winPushMFTFeature((void *)aux
,d
,stk
,direction
);
675 else if (is_unk
==TRUE
)
678 if (!aux
->l
.isEmpty())
680 param
= *aux
->l
.getIndex();
682 if (aux
->mark
== PREFIX_MARK
) sw
->winPushPrefixFeature(elem
->wrd
, stk
, *param
);
683 else if (aux
->mark
== SUFFIX_MARK
) sw
->winPushSuffixFeature(elem
->wrd
, stk
, *param
);
684 else if (aux
->mark
== CHAR_A_MARK
) sw
->winPushLetterFeature(elem
->wrd
, stk
, COUNTING_FROM_BEGIN
, *param
);
685 else if (aux
->mark
== CHAR_Z_MARK
) sw
->winPushLetterFeature(elem
->wrd
, stk
, COUNTING_FROM_END
, *param
);
686 else if (aux
->mark
== LENGTH_MARK
) sw
->winPushLenghtFeature(elem
->wrd
,stk
);
687 else if (aux
->mark
== START_CAPITAL_MARK
) sw
->winPushStartWithCapFeature(elem
->wrd
,stk
);
688 else if (aux
->mark
== START_LOWER_MARK
) sw
->winPushStartWithLowerFeature(elem
->wrd
,stk
);
689 else if (aux
->mark
== START_NUMBER_MARK
) sw
->winPushStartWithNumberFeature(elem
->wrd
,stk
);
690 else if (aux
->mark
== ALL_UPPER_MARK
) sw
->winPushAllUpFeature(elem
->wrd
,stk
);
691 else if (aux
->mark
== ALL_LOWER_MARK
) sw
->winPushAllLowFeature(elem
->wrd
,stk
);
692 else if (aux
->mark
== CONTAIN_CAP_MARK
) sw
->winPushContainCapFeature(elem
->wrd
, stk
);
693 else if (aux
->mark
== CONTAIN_CAPS_MARK
) sw
->winPushContainCapsFeature(elem
->wrd
, stk
);
694 else if (aux
->mark
== CONTAIN_COMMA_MARK
) sw
->winPushContainCommaFeature(elem
->wrd
, stk
);
695 else if (aux
->mark
== CONTAIN_NUMBER_MARK
) sw
->winPushContainNumFeature(elem
->wrd
, stk
);
696 else if (aux
->mark
== CONTAIN_PERIOD_MARK
) sw
->winPushContainPeriodFeature(elem
->wrd
, stk
);
697 else if (aux
->mark
== MULTIWORD_MARK
) sw
->winPushMultiwordFeature(elem
->wrd
, stk
);
699 ret
= featureList
->next();
701 featureList
->setFirst();
703 endFexTime
= times(&tbuffEndFex
);
704 realFexTime
= realFexTime
+ ((double)(endFexTime
-startFexTime
))/CLOCKS_PER_SECOND
;
705 usrFexTime
= usrFexTime
+ (((double)tbuffEndFex
.tms_utime
-(double)tbuffStartFex
.tms_utime
)/CLOCKS_PER_SECOND
);
706 sysFexTime
= sysFexTime
+ (((double)tbuffEndFex
.tms_stime
-(double)tbuffStartFex
.tms_stime
)/CLOCKS_PER_SECOND
);
708 startSVMTime
= times(&tbuffStartSVM
);
710 elem
->strScores
= taggerSumWeight(weightRep
,weight
,numMaybe
,&max
);
711 //std::cerr << "tagger::taggerGenerateScore got elem strScores: '" << elem->strScores << "'" << std::endl;
713 endSVMTime
= times(&tbuffEndSVM
);
714 realSVMTime
= realSVMTime
+ ((double)(endSVMTime
-startSVMTime
))/CLOCKS_PER_SECOND
;
715 usrSVMTime
= usrSVMTime
+ (((double)tbuffEndSVM
.tms_utime
-(double)tbuffStartSVM
.tms_utime
)/CLOCKS_PER_SECOND
);
716 sysSVMTime
= sysSVMTime
+ (((double)tbuffEndSVM
.tms_stime
-(double)tbuffStartSVM
.tms_stime
)/CLOCKS_PER_SECOND
);
719 elem
->pos
= weight
[max
].pos
;
720 elem
->weight
= weight
[max
].data
;
724 weight_node_t
* score
= new weight_node_t();
725 score
->data
= weight
[max
].data
;
726 score
->pos
= weight
[max
].pos
;
727 elem
->stackScores
.push(score
);
735 /***************************************************************/
737 /* Returns an array ready to be filled with maybe informations */
738 weight_node_t
*tagger::taggerCreateWeightNodeArray(int numMaybe
,dataDict
* index
)
741 weight_node_t
*weight
= new weight_node_t
[numMaybe
];
742 simpleList
<infoDict
*> *list
= &d
->getElementMaybe(index
);
745 while (ret
&& numMaybe
> 0)
747 infoDict
*pInfoDict
= *list
->getIndex();
749 weight
[j
].pos
= pInfoDict
->pos
;
758 /***************************************************************/
760 std::string
tagger::taggerSumWeight(weightRepository
* wRep
, weight_node_t
* weight
, int numMaybe
, int* max
)
762 // weight_node_t *aux;
772 for (int j
=0; j
<numMaybe
;j
++)
776 b
= wRep
->wrGetWeight("BIASES",weight
[j
].pos
);
777 weight
[j
].data
= weight
[j
].data
- b
;
779 w
= wRep
->wrGetWeight(feature
,weight
[j
].pos
);
780 weight
[j
].data
=weight
[j
].data
+w
;
781 if (((float)weight
[*max
].data
)<((float)weight
[j
].data
)) *max
=j
;
785 std::ostringstream tmp
;
788 for (int i
=0; i
<numMaybe
; i
++)
792 tmp
<< weight
[i
].pos
<< "_" << (float)weight
[i
].data
;
796 tmp
<< " " << weight
[i
].pos
<< "_" << (float)weight
[i
].data
;
800 //std::cerr << "tagger::taggerSumWeight generated string '" << tmp.str() << "'" << std::endl;
804 /***************************************************************/
805 /***************************************************************/
807 weight_node_t
*tagger::taggerCreateWeightUnkArray(int *numMaybe
)
812 std::string name
= taggerModelName
+ ".UNKP";
816 if ((f
= fopen(name
.c_str(), "rt"))== NULL
)
818 std::cerr
<< "Error opening file: "<<name
<<std::endl
;
822 // first read: count the number of POS
824 { if (fgetc(f
)=='\n') (*numMaybe
)++;
827 // second read, fill in the weight nodes
830 weight_node_t
*weight
= new weight_node_t
[*numMaybe
];
831 while (!feof(f
) && (i
<*numMaybe
))
832 { weight
[i
].pos
= "";
835 while ((c
!='\n') && (!feof(f
)))
837 if (c
!=' ' && c
!='\n' && c
!='\t') weight
[i
].pos
+= c
;
846 /***************************************************************/
847 /***************************************************************/
849 void tagger::taggerStadistics(int numWords
, int numSentences
, double /*realTime*/, double usrTime
, double sysTime
)
851 std::ostringstream message
;
853 if ((sysTime
+usrTime
)!=0) media
= (float) (((double) numWords
)/(sysTime
+usrTime
));
855 message
<< std::endl
<< numSentences
<<" sentences were tagged." << std::endl
;
856 message
<< numWords
<< " words were tagged." << std::endl
;
857 message
<< media
<< " words/second were tagged." << std::endl
;
858 std::cerr
<< message
.str();
861 /***************************************************************/
863 void tagger::taggerShowVerbose(int num
,int isEnd
)
865 if (isEnd
) { std::cerr
<<"."<<num
<<" sentences [DONE]"<<std::endl
<<std::endl
; return; }
866 else if (num
%100==0) std::cerr
<<num
;
867 else if (num
%10==0) std::cerr
<< ".";