2 * Copyright (C) 2004 Jesus Gimenez, Lluis Marquez and Senen Moya
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include <sys/times.h>
26 #include "strategies.h"
36 /***************************************************************/
38 struct tms tbuffStartUp
,tbuffEndStartUp
;
39 clock_t startUpTime
,endStartUpTime
;
40 double sysFexTime
=0, usrFexTime
=0,realFexTime
=0;
41 double sysSVMTime
=0, usrSVMTime
=0,realSVMTime
=0;
43 /***************************************************************/
49 /***************************************************************/
51 hash_t
*tagger::taggerCreateBiasHash(char *name
)
53 hash_t
*bias
= new hash_t
;
55 char c
=' ',weight
[20];
61 if ((f
= fopen(name
, "rt"))== NULL
)
63 fprintf(stderr
, "Error opening file: %s",name
);
72 w
= new weight_node_t
;
76 while ((c
!='\n') && (!feof(f
)))
78 if (c
!=' ' && c
!='\n' && c
!='\t' && i
==1) sprintf(weight
,"%s%c",weight
,c
);
79 else if (c
!=' ' && c
!='\n' && c
!='\t' && i
==0)
81 if (c
!=':') sprintf(w
->pos
,"%s%c",w
->pos
,c
);
86 w
->data
= (long double)0;
87 w
->data
= atof (weight
);
88 hash_insert(bias
,w
->pos
,(int)w
);
90 else while(c
=fgetc(f
)!='\n');
96 /***************************************************************/
98 tagger::tagger(char *model
)
105 taggerStrategy
= STRA_1P_DEFAULT
; //modstrat //0;
107 taggerWinLength
= -1;
108 strcpy(taggerModelName
,model
);
109 strcpy (taggerBackupDict
,"");
114 /***************************************************************/
116 void tagger::taggerLoadModels(models_t
*model
, int taggerNumModel
)
118 char name
[150],flow2
[5],flow1
[5];
120 //Cargamos la lista de "features" para palabras conocidas
121 sprintf(name
,"%s.A%d",taggerModelName
,taggerNumModel
);
122 if (verbose
) fprintf(stderr
,"\nLoading FEATURES FOR KNOWN WORDS from < %s >\n",name
);
123 createFeatureList(name
,&model
->featureList
);
124 //Cargamos la lista de "features" para palabras desconocidas
125 sprintf(name
,"%s.A%d.UNK",taggerModelName
,taggerNumModel
);
126 if (verbose
) fprintf(stderr
,"\nLoading FEATURES FOR UNKNOWN WORDS from < %s >\n",name
);
127 createFeatureList(name
,&model
->featureListUnk
);
129 if (strcmp(flow
,"LRL")==0)
131 strcpy(flow1
,"LR"); strcpy(flow2
,"RL");
133 sprintf(name
,"%s (Right-to-Left)",flow1
);
134 if (verbose
) fprintf(stderr
,"\nREADING MODELS < direction = %s >\n",name
);
136 sprintf(name
,"%s.M%d.%s.MRG",taggerModelName
,taggerNumModel
,flow2
);
137 if (verbose
) fprintf(stderr
,"-. Loading MERGED MODEL FOR KNOWN WORDS from < %s >\n",name
);
138 model
->wr2
= new weightRepository(name
,taggerKFilter
);
140 sprintf(name
,"%s.UNK.M%d.%s.MRG",taggerModelName
,taggerNumModel
,flow2
);
141 if (verbose
) fprintf(stderr
,"-. Loading MERGED MODEL FOR UNKKNOWN WORDS from < %s >\n\n",name
);
142 model
->wrUnk2
= new weightRepository(name
,taggerUFilter
);
144 else strcpy(flow1
,flow
);
146 if (strcmp(flow1
,"RL")==0) sprintf(name
,"%s (Right-to-Left)",flow1
);
147 else sprintf(name
,"%s (Left-to-Right)",flow1
);
149 if (verbose
) fprintf(stderr
,"\nREADING MODELS < direction = %s >\n",name
);
151 sprintf(name
,"%s.M%d.%s.MRG",taggerModelName
,taggerNumModel
,flow1
);
152 if (verbose
) fprintf(stderr
,"-. Loading MERGED MODEL FOR KNOWN WORDS from < %s >\n",name
);
153 model
->wr
= new weightRepository(name
,taggerKFilter
);
155 sprintf(name
,"%s.UNK.M%d.%s.MRG",taggerModelName
,taggerNumModel
,flow1
);
156 if (verbose
) fprintf(stderr
,"-. Loading MERGED MODEL FOR UNKNOWN WORDS from < %s >\n",name
);
157 model
->wrUnk
= new weightRepository(name
,taggerUFilter
);
161 /***************************************************************/
163 void tagger::taggerLoadModelsForTagging()
165 startUpTime
= times(&tbuffStartUp
);
170 sprintf(name
,"%s.DICT",taggerModelName
);
171 if (strcmp(taggerBackupDict
,"")!=0)
173 if (verbose
) fprintf(stderr
,"Loading DICTIONARY from < %s > with BACKUP DICTIONARY from < %s >\n",name
,taggerBackupDict
);
174 d
= new dictionary(name
,taggerBackupDict
);
178 if (verbose
) fprintf(stderr
,"Loading DICTIONARY from < %s >\n",name
);
179 d
= new dictionary(name
);
182 sprintf(name
,"%s.UNKP",taggerModelName
);
183 if (verbose
) fprintf(stderr
,"Loading UNKNOWN WORDS POS from < %s >\n",name
);
184 weightUnk
= taggerCreateWeightUnkArray(name
);
186 if ( taggerStrategy
== STRA_2P_RELABELING
//modstrat 1
187 || taggerStrategy
== STRA_1P_ROBUST_UNK
/*modstrat 4*/ ) modelsNeeded
= 2;
189 taggerModelList
= new models_t
[modelsNeeded
];
190 taggerModelRunning
= &taggerModelList
[0];
192 if (taggerStrategy
== STRA_1P_DEFAULT
) //modstrat 0)
193 taggerLoadModels(taggerModelRunning
,0);
194 else if (taggerStrategy
== STRA_1P_UNSUPERVISED
) //modstrat 2)
195 taggerLoadModels(taggerModelRunning
,3);
196 else if (taggerStrategy
== STRA_1P_ROBUST_UNK
) //modstrat 4)
198 taggerLoadModels(taggerModelRunning
,0);
199 taggerLoadModels(&taggerModelList
[1],2);
201 else if (taggerStrategy
== STRA_1P_VERY_ROBUST_UNK
) //modstrat 5)
202 taggerLoadModels(taggerModelRunning
,4);
203 else if (taggerStrategy
== STRA_2P_RELABELING
)//modstrat 1)
205 taggerLoadModels(taggerModelRunning
,2);
206 taggerLoadModels(&taggerModelList
[1],1);
211 fprintf(stderr
,"Execution error: Strategy %d doesn't exist!!\n\n",taggerStrategy
);
215 endStartUpTime
= times(&tbuffEndStartUp
);
218 void tagger::taggerInit()
221 // int modelsNeeded=1;
224 //Mirar si existe fichero .WIN
225 if (taggerWinIndex
==-1 && taggerWinLength
==-1)
227 sprintf(name
,"%s.WIN",taggerModelName
);
228 FILE *f
= fopen (name
,"r");
229 if ( f
== NULL
) sw
= new swindow(stdin
);
232 fscanf(f
,"%d %d",&taggerWinLength
,&taggerWinIndex
);
234 sw
= new swindow (stdin
,taggerWinLength
,taggerWinIndex
);
237 else if (taggerWinIndex
==-1) sw
= new swindow (stdin
,taggerWinLength
);
238 else sw
= new swindow (stdin
,taggerWinLength
,taggerWinIndex
);
242 /***************************************************************/
248 if (taggerStrategy
== STRA_2P_RELABELING
) //modstrat 1)
254 delete[] weightUnk
; //Mod
256 for (int i
=0;i
<modelsNeeded
;i
++)
259 delete taggerModelList
[i
].wr
;
260 delete taggerModelList
[i
].wrUnk
;
262 if (strcmp(flow
,"LRL")==0)
264 delete taggerModelList
[i
].wr2
;
265 delete taggerModelList
[i
].wrUnk2
;
270 /***************************************************************/
271 /***************************************************************/
273 void tagger::taggerPutFlow(char *inFlow
)
278 /***************************************************************/
280 void tagger::taggerPutStrategy(int num
)
282 taggerStrategy
= num
;
285 /***************************************************************/
287 void tagger::taggerPutWinLength(int l
)
292 /***************************************************************/
294 void tagger::taggerPutWinIndex(int i
)
299 /***************************************************************/
301 void tagger::taggerPutBackupDictionary(char *dictName
)
303 strcpy (taggerBackupDict
,dictName
);
306 /***************************************************************/
308 void tagger::taggerPutKWeightFilter(float kfilter
)
310 taggerKFilter
= kfilter
;
313 /***************************************************************/
315 void tagger::taggerPutUWeightFilter(float ufilter
)
317 taggerUFilter
= ufilter
;
320 /***************************************************************/
321 /***************************************************************/
322 /***************************************************************/
323 /***************************************************************/
325 int tagger::taggerRightSenseSpecialForUnknown()
329 while(sw
->previous()==0);
330 nodo
*elem
= sw
->getIndex();
332 if (sw
->winExistUnkWord(1,d
)==-1)
333 taggerModelRunning
=&taggerModelList
[1];
334 else taggerModelRunning
=&taggerModelList
[0];
336 taggerGenerateScore(elem
,1);
340 elem
= sw
->getIndex();
342 if (sw
->winExistUnkWord(1,d
)==-1)
343 taggerModelRunning
=&taggerModelList
[1];
344 else taggerModelRunning
=&taggerModelList
[0];
346 taggerGenerateScore(elem
,1);
350 if (strcmp(flow
,"LRL")==0) sw
->winMaterializePOSValues(1);
355 /***************************************************************/
357 int tagger::taggerLeftSenseSpecialForUnknown()
360 while(sw
->next()==0);
361 nodo
*elem
= sw
->getIndex();
362 if (sw
->winExistUnkWord(2,d
)==-1)
363 taggerModelRunning
=&taggerModelList
[1];
364 else taggerModelRunning
=&taggerModelList
[0];
366 taggerGenerateScore(elem
,2);
368 while(sw
->previous()==0)
370 elem
= sw
->getIndex();
372 if (sw
->winExistUnkWord(2,d
)==-1)
373 taggerModelRunning
=&taggerModelList
[1];
374 else taggerModelRunning
=&taggerModelList
[0];
376 taggerGenerateScore(elem
,2);
380 if (strcmp(flow
,"LRL")==0) sw
->winMaterializePOSValues(0);
384 /***************************************************************/
386 int tagger::taggerRightSense()
390 while(sw
->previous()==0);
391 nodo
*elem
= sw
->getIndex();
392 taggerGenerateScore(elem
,1);
396 elem
= sw
->getIndex();
397 taggerGenerateScore(elem
,1);
401 if (strcmp(flow
,"LRL")==0) sw
->winMaterializePOSValues(1);
406 /***************************************************************/
408 int tagger::taggerLeftSense()
411 while(sw
->next()==0);
412 nodo
*elem
= sw
->getIndex();
413 taggerGenerateScore(elem
,2);
415 while(sw
->previous()==0)
417 elem
= sw
->getIndex();
418 taggerGenerateScore(elem
,2);
422 if (strcmp(flow
,"LRL")==0) sw
->winMaterializePOSValues(0);
426 /***************************************************************/
428 void tagger::taggerRun()
430 int contWords
=0,contSentences
=0;
432 struct tms tbuff1
,tbuff2
;
434 start
= times(&tbuff1
);
436 switch(taggerStrategy
)
438 case STRA_1P_DEFAULT
/*modstrat 0*/: taggerDoNormal(&contWords
,&contSentences
); break;
439 case STRA_2P_RELABELING
/*modstrat 1*/: taggerDoNTimes(&contWords
,&contSentences
,taggerNumLaps
); break;
440 case STRA_1P_UNSUPERVISED
/*modstrat 2*/: taggerDoNormal(&contWords
,&contSentences
); break;
441 case STRA_1P_SENTENCE_LEVEL
/*modstrat 3*/: /*taggerDoNTimes(&contWords,&contSentences,taggerNumLaps);*/ break;
442 case STRA_1P_ROBUST_UNK
/*modstrat 4*/: taggerDoSpecialForUnknown(&contWords
,&contSentences
); break;
443 case STRA_1P_VERY_ROBUST_UNK
/*modstrat 5*/: taggerDoNormal(&contWords
,&contSentences
); break;
444 case STRA_1P_ROBUST_SENTENCE_LEVEL
: break;
446 end
= times(&tbuff2
);
450 { taggerShowVerbose(contSentences
,1);
452 fprintf(stderr
,"* -------------------------------------------------------------------\n");
453 showTime("Start Up Time",
454 ((double)(endStartUpTime
-startUpTime
))/CLOCKS_PER_SECOND
,
455 ((double)tbuffEndStartUp
.tms_utime
-(double)tbuffStartUp
.tms_utime
)/CLOCKS_PER_SECOND
,
456 ((double)tbuffEndStartUp
.tms_stime
-(double)tbuffStartUp
.tms_stime
)/CLOCKS_PER_SECOND
);
457 fprintf(stderr
,"* -------------------------------------------------------------------\n");
458 showTime("Features Extraction Time",realFexTime
,usrFexTime
,sysFexTime
);
459 showTime("SVM Time",realSVMTime
,usrSVMTime
,sysSVMTime
);
460 showTime("Process Time",((double)(end
-start
))/CLOCKS_PER_SECOND
- realFexTime
- realSVMTime
,
461 ((double)tbuff2
.tms_utime
-(double)tbuff1
.tms_utime
)/CLOCKS_PER_SECOND
- usrFexTime
-usrSVMTime
,
462 ((double)tbuff2
.tms_stime
-(double)tbuff1
.tms_stime
)/CLOCKS_PER_SECOND
- sysFexTime
-sysSVMTime
);
463 fprintf(stderr
,"* -------------------------------------------------------------------\n");
464 fprintf(stderr
,"[ Tagging Time = Feature Extraction Time + SVM Time + Process Time ]\n");
465 showTime("Tagging Time",((double)(end
-start
))/CLOCKS_PER_SECOND
,
466 ((double)tbuff2
.tms_utime
-(double)tbuff1
.tms_utime
)/CLOCKS_PER_SECOND
,
467 ((double)tbuff2
.tms_stime
-(double)tbuff1
.tms_stime
)/CLOCKS_PER_SECOND
);
468 fprintf(stderr
,"* -------------------------------------------------------------------\n");
469 fprintf(stderr
,"[ Overall Time = Start up Time + Tagging Time ]\n");
470 showTime("Overall Time",((double)(end
-start
+endStartUpTime
-startUpTime
))/CLOCKS_PER_SECOND
,
471 ((double)tbuff2
.tms_utime
-(double)tbuff1
.tms_utime
+
472 (double)tbuffEndStartUp
.tms_utime
-(double)tbuffStartUp
.tms_utime
)/CLOCKS_PER_SECOND
,
473 ((double)tbuff2
.tms_stime
-(double)tbuff1
.tms_stime
+
474 (double)tbuffEndStartUp
.tms_stime
-(double)tbuffStartUp
.tms_stime
)/CLOCKS_PER_SECOND
);
475 fprintf(stderr
,"* -------------------------------------------------------------------\n");
476 taggerStadistics(contWords
,contSentences
,
477 ((double)(end
-start
))/CLOCKS_PER_SECOND
,
478 ((double)tbuff2
.tms_utime
-(double)tbuff1
.tms_utime
)/CLOCKS_PER_SECOND
,
479 ((double)tbuff2
.tms_stime
-(double)tbuff1
.tms_stime
)/CLOCKS_PER_SECOND
);
483 /***************************************************************/
485 void tagger::taggerDoNormal(int *numWords
, int *numSentences
)
487 int contWordsLR
=0,contWordsRL
=0,contSentences
=0,ret
= 1;
491 if (verbose
) taggerShowVerbose(contSentences
,0);
493 if ((strcmp(flow
,"LRL")==0) || (strcmp(flow
,"LR")==0))
494 contWordsLR
= contWordsLR
+taggerRightSense();
495 if ((strcmp(flow
,"LRL")==0) || (strcmp(flow
,"RL")==0))
496 contWordsRL
= contWordsRL
+taggerLeftSense();
500 ret
= sw
->iniGeneric();
502 if (contWordsRL
==0) *numWords
=contWordsLR
/taggerNumLaps
;
503 else *numWords
=contWordsRL
/taggerNumLaps
;
504 *numSentences
= contSentences
;
507 /***************************************************************/
509 void tagger::taggerDoSpecialForUnknown(int *numWords
, int *numSentences
)
511 int contWordsLR
=0,contWordsRL
=0,contSentences
=0,ret
= 1;
515 if (verbose
) taggerShowVerbose(contSentences
,0);
517 if ((strcmp(flow
,"LRL")==0) || (strcmp(flow
,"LR")==0))
518 contWordsLR
= contWordsLR
+taggerRightSenseSpecialForUnknown();
519 if ((strcmp(flow
,"LRL")==0) || (strcmp(flow
,"RL")==0))
520 contWordsRL
= contWordsRL
+taggerLeftSenseSpecialForUnknown();
525 ret
= sw
->iniGeneric();
527 if (contWordsRL
==0) *numWords
=contWordsLR
/taggerNumLaps
;
528 else *numWords
=contWordsRL
/taggerNumLaps
;
529 *numSentences
= contSentences
;
532 /***************************************************************/
534 void tagger::taggerDoNTimes(int *numWords
, int *numSentences
,int laps
)
536 int contWordsLR
=0,contWordsRL
=0,contSentences
=0,ret
= 1;
541 if (verbose
) taggerShowVerbose(contSentences
,0);
543 for (int pasadas
=0;pasadas
<laps
;pasadas
++)
546 taggerModelRunning
= &taggerModelList
[pasadas
];
547 if ((strcmp(flow
,"LRL")==0) || (strcmp(flow
,"LR")==0))
548 contWordsLR
= contWordsLR
+taggerRightSense();
550 if (strcmp(flow
,"LRL")==0 && pasadas
>0)
551 sw
->winMaterializePOSValues(2);
552 if ((strcmp(flow
,"LRL")==0) || (strcmp(flow
,"RL")==0))
553 contWordsRL
= contWordsRL
+taggerLeftSense();
560 ret
= sw
->iniGeneric();
562 if (contWordsRL
==0) *numWords
=contWordsLR
/taggerNumLaps
;
563 else *numWords
=contWordsRL
/taggerNumLaps
;
564 *numSentences
= contSentences
;
567 /***************************************************************/
568 /***************************************************************/
570 void tagger::taggerGenerateScore(nodo
*elem
,int direction
)
573 struct tms tbuffStartFex
,tbuffEndFex
;
574 clock_t startFexTime
,endFexTime
;
575 struct tms tbuffStartSVM
,tbuffEndSVM
;
576 clock_t startSVMTime
,endSVMTime
;
578 weight_node_t
*weight
;
579 nodo_feature_list
*aux
;
580 weightRepository
*weightRep
;
582 int i
,numMaybe
,ret
=1,max
=0;
584 simpleList
*featureList
;
586 startFexTime
= times(&tbuffStartFex
);
588 i
= d
->getElement(elem
->wrd
);
591 featureList
= &taggerModelRunning
->featureList
;
592 numMaybe
= d
->getElementNumMaybe(i
);
593 weight
= taggerCreateWeightNodeArray(numMaybe
,i
);
594 if ((strcmp(flow
,"LRL")==0) && (direction
==2))
596 weightRep
= taggerModelRunning
->wr2
; //wr2;
597 //bias = taggerModelRunning->bias2; //taggerBias2;
601 weightRep
= taggerModelRunning
->wr
; //wr;
602 //bias = taggerModelRunning->bias; //taggerBias;
607 numMaybe
= NUM_UNK_POS
;
608 weight
= taggerInitializeWeightNodeArray(numMaybe
,weightUnk
);
609 featureList
= &taggerModelRunning
->featureListUnk
;
612 if ((strcmp(flow
,"LRL")==0) && (direction
==2))
614 weightRep
= taggerModelRunning
->wrUnk2
; //wrUnk2;
615 //bias = taggerModelRunning->biasUnk2; //taggerBiasUnk2;
618 { weightRep
= taggerModelRunning
->wrUnk
; //wrUnk;
619 //bias =taggerModelRunning->biasUnk; //taggerBiasUnk;
628 aux
= (nodo_feature_list
*) featureList
->getIndex();
629 if (strcmp(aux
->mark
,SLASTW
)==0) sw
->winPushSwnFeature(stk
);
630 else if (strcmp(aux
->mark
,WMARK
)==0) sw
->winPushWordFeature((void *)aux
,d
,stk
,direction
);
631 else if (strcmp(aux
->mark
,KMARK
)==0) sw
->winPushAmbiguityFeature((void *)aux
,d
,stk
,direction
);
632 else if (strcmp(aux
->mark
,MMARK
)==0) sw
->winPushMaybeFeature((void *)aux
,d
,stk
,direction
);
633 else if (strcmp(aux
->mark
,PMARK
)==0) sw
->winPushPosFeature((void *)aux
,d
,stk
,direction
);
634 else if (strcmp(aux
->mark
,MFTMARK
)==0) sw
->winPushMFTFeature((void *)aux
,d
,stk
,direction
);
635 else if (is_unk
==TRUE
)
640 param
= (int *) aux
->l
.getIndex();
642 if (strcmp(aux
->mark
,PREFIX_MARK
)==0) sw
->winPushPrefixFeature(elem
->wrd
, stk
, *param
);
643 else if (strcmp(aux
->mark
,SUFFIX_MARK
)==0) sw
->winPushSuffixFeature(elem
->wrd
, stk
, *param
);
644 else if (strcmp(aux
->mark
,CHAR_A_MARK
)==0) sw
->winPushLetterFeature(elem
->wrd
, stk
, *param
, COUNTING_FROM_BEGIN
);
645 else if (strcmp(aux
->mark
,CHAR_Z_MARK
)==0) sw
->winPushLetterFeature(elem
->wrd
, stk
, *param
, COUNTING_FROM_END
);
646 else if (strcmp(aux
->mark
,LENGTH_MARK
)==0) sw
->winPushLenghtFeature(elem
->wrd
,stk
);
647 else if (strcmp(aux
->mark
,START_CAPITAL_MARK
)==0) sw
->winPushStartWithCapFeature(elem
->wrd
,stk
);
648 else if (strcmp(aux
->mark
,START_LOWER_MARK
)==0) sw
->winPushStartWithLowerFeature(elem
->wrd
,stk
);
649 else if (strcmp(aux
->mark
,START_NUMBER_MARK
)==0) sw
->winPushStartWithNumberFeature(elem
->wrd
,stk
);
650 else if (strcmp(aux
->mark
,ALL_UPPER_MARK
)==0) sw
->winPushAllUpFeature(elem
->wrd
,stk
);
651 else if (strcmp(aux
->mark
,ALL_LOWER_MARK
)==0) sw
->winPushAllLowFeature(elem
->wrd
,stk
);
652 else if (strcmp(aux
->mark
,CONTAIN_CAP_MARK
)==0) sw
->winPushContainCapFeature(elem
->wrd
, stk
);
653 else if (strcmp(aux
->mark
,CONTAIN_CAPS_MARK
)==0) sw
->winPushContainCapsFeature(elem
->wrd
, stk
);
654 else if (strcmp(aux
->mark
,CONTAIN_COMMA_MARK
)==0) sw
->winPushContainCommaFeature(elem
->wrd
, stk
);
655 else if (strcmp(aux
->mark
,CONTAIN_NUMBER_MARK
)==0) sw
->winPushContainNumFeature(elem
->wrd
, stk
);
656 else if (strcmp(aux
->mark
,CONTAIN_PERIOD_MARK
)==0) sw
->winPushContainPeriodFeature(elem
->wrd
, stk
);
657 else if (strcmp(aux
->mark
,MULTIWORD_MARK
)==0) sw
->winPushMultiwordFeature(elem
->wrd
, stk
);
659 ret
= featureList
->next();
661 featureList
->setFirst();
663 endFexTime
= times(&tbuffEndFex
);
664 realFexTime
= realFexTime
+ ((double)(endFexTime
-startFexTime
))/CLOCKS_PER_SECOND
;
665 usrFexTime
= usrFexTime
+ (((double)tbuffEndFex
.tms_utime
-(double)tbuffStartFex
.tms_utime
)/CLOCKS_PER_SECOND
);
666 sysFexTime
= sysFexTime
+ (((double)tbuffEndFex
.tms_stime
-(double)tbuffStartFex
.tms_stime
)/CLOCKS_PER_SECOND
);
668 startSVMTime
= times(&tbuffStartSVM
);
670 taggerSumWeight(weightRep
,bias
,weight
,numMaybe
,&max
);
672 endSVMTime
= times(&tbuffEndSVM
);
673 realSVMTime
= realSVMTime
+ ((double)(endSVMTime
-startSVMTime
))/CLOCKS_PER_SECOND
;
674 usrSVMTime
= usrSVMTime
+ (((double)tbuffEndSVM
.tms_utime
-(double)tbuffStartSVM
.tms_utime
)/CLOCKS_PER_SECOND
);
675 sysSVMTime
= sysSVMTime
+ (((double)tbuffEndSVM
.tms_stime
-(double)tbuffStartSVM
.tms_stime
)/CLOCKS_PER_SECOND
);
678 strcpy(elem
->pos
,weight
[max
].pos
);
679 elem
->weight
= weight
[max
].data
;
681 if (strcmp(flow
,"LRL")==0)
683 weight_node_t
*score
= new weight_node_t
;
684 score
->data
= weight
[max
].data
;
685 strcpy(score
->pos
,weight
[max
].pos
);
686 push(elem
->stackScores
,score
);
689 if (i
!=HASH_FAIL
) delete[] weight
; //mod delete[] instead of delete
692 /***************************************************************/
694 weight_node_t
*tagger::taggerCreateWeightNodeArray(int numMaybe
,int index
)
696 int ret
=1,j
= numMaybe
;
697 weight_node_t
*weight
= new weight_node_t
[numMaybe
];
698 simpleList
*list
= (simpleList
*) d
->getElementMaybe(index
);
700 while (ret
>=0 && numMaybe
> 0)
702 infoDict
*pInfoDict
= (infoDict
*) list
->getIndex();
704 sprintf(weight
[j
].pos
,"%s",pInfoDict
->txt
);
713 /***************************************************************/
715 weight_node_t
*tagger::taggerInitializeWeightNodeArray(int numMaybe
,weight_node_t
*w
)
717 for (int i
=0;i
<numMaybe
;i
++) w
[i
].data
=0;
721 /***************************************************************/
723 void tagger::taggerSumWeight(weightRepository
*wRep
,hash_t
*bias
,weight_node_t
*weight
, int numMaybe
, int *max
)
733 feature
= (char *) pop(stk
);
734 for (int j
=0; j
<numMaybe
;j
++)
738 b
= wRep
->wrGetWeight("BIASES",weight
[j
].pos
);
739 weight
[j
].data
= weight
[j
].data
- b
;
741 w
= wRep
->wrGetWeight(feature
,weight
[j
].pos
);
742 weight
[j
].data
=weight
[j
].data
+w
;
743 if (((float)weight
[*max
].data
)<((float)weight
[j
].data
)) *max
=j
;
745 delete[] feature
; //mod delete[] instead of delete
750 /***************************************************************/
751 /***************************************************************/
753 weight_node_t
*tagger::taggerCreateWeightUnkArray(char *name
)
760 if ((f
= fopen(name
, "rt"))== NULL
)
762 fprintf(stderr
, "Error opening file: %s",name
);
767 { if (fgetc(f
)=='\n') NUM_UNK_POS
++;
772 weight_node_t
*weight
= new weight_node_t
[NUM_UNK_POS
];
773 while (!feof(f
) && (i
<NUM_UNK_POS
))
774 { strcpy(weight
[i
].pos
,"");
777 while ((c
!='\n') && (!feof(f
)))
779 if (c
!=' ' && c
!='\n' && c
!='\t') sprintf(weight
[i
].pos
,"%s%c",weight
[i
].pos
,c
);
788 /***************************************************************/
789 /***************************************************************/
791 void tagger::taggerStadistics(int numWords
, int numSentences
, double realTime
,double usrTime
, double sysTime
)
793 char message
[200]="";
795 if (time
!=0) media
= (float) (((double) numWords
)/(sysTime
+usrTime
));
797 sprintf(message
,"%s\n%d sentences were tagged.",message
,numSentences
);
798 sprintf(message
,"%s\n%d words were tagged.",message
,numWords
);
799 sprintf(message
,"%s\n%f words/second were tagged.\n",message
,media
);
800 fwrite(message
,strlen(message
),1,stderr
);
803 /***************************************************************/
805 void tagger::taggerShowVerbose(int num
,int isEnd
)
807 if (isEnd
) { fprintf(stderr
,".%d sentences [DONE]\n\n",num
); return; }
808 else if (num
%100==0) fprintf(stderr
,"%d",num
);
809 else if (num
%10==0) fprintf(stderr
,".");