2 * Copyright (C) 2004 Jesus Gimenez, Lluis Marquez and Senen Moya
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 #include <sys/types.h>
34 #define MAX_SENTENCE_LENGTH 1000
36 char lSentence
[MAX_SENTENCE_LENGTH
][250];
39 /*****************************************************************
41 *****************************************************************/
43 void swindow::winPushStartWithLowerFeature(char *wrd
,stack_t
*pila
)
47 //Comienza por Minuscula?
48 if (erLookRegExp2(&erStartLower
,wrd
))
52 //char *feat = new char[strlen(START_LOWER_MARK)+4];
53 char *feat
= new char[5];//mod Correcting dynamic memory errors
54 sprintf(feat
,"%s:%d",START_LOWER_MARK
,startlower
);
61 void swindow::winPushStartWithNumberFeature(char *wrd
,stack_t
*pila
)
65 //Comienza por Numero?
66 if (erLookRegExp2(&erStartNumber
,wrd
))
70 //mod Correcting dynamic memory errors
71 //char *feat = new char[strlen(START_NUMBER_MARK)+4];
72 char *feat
= new char[5];
74 sprintf(feat
,"%s:%d",START_NUMBER_MARK
,startnumber
);
81 void swindow::winPushSuffixFeature(char *wrd
, struct stack_t
*pila
,int longitud
)
83 //Obtenemos la longitud de la palabra
85 char *feat
= new char[longitud
+6];
86 int len
= strlen(wrd
);
91 for (int i
=len
-longitud
; i
<=len
-1; i
++)
93 if (i
>=0) sprintf(suf
,"%s%c",suf
,wrd
[i
]);
94 else sprintf(suf
,"%s~",suf
);
97 sprintf(feat
,"%s%d:%s",SUFFIX_MARK
,longitud
,suf
);
103 * void winPushPreffixFeatures ( char *wrd, struct stack_t *pila, int longitud)
104 * esta funcion creara las "features" para la palabra desconocida
105 * <wrd> y las apilara en en el parametro <pila>
107 void swindow::winPushPrefixFeature(char *wrd
, struct stack_t
*pila
,int longitud
)
109 //Obtenemos la longitud de la palabra
110 char *feat
= new char[6+longitud
];
111 int len
= strlen(wrd
);
112 char pref
[longitud
+1];
115 for (int i
=0; i
<longitud
; i
++)
117 if (len
> i
) sprintf(pref
,"%s%c",pref
,wrd
[i
]);
118 else /*if (i > len-1 )*/ sprintf(pref
,"%s~",pref
);
121 sprintf(feat
,"%s%d:%s",PREFIX_MARK
,longitud
,pref
);
126 void swindow::winPushStartWithCapFeature(char *wrd
, struct stack_t
*pila
)
130 //Comienza por Mayuscula?
131 if (erLookRegExp2(&erStartCap
,wrd
))
134 //mod Correcting dynamic memory errors
135 //char *feat = new char[strlen(START_CAPITAL_MARK)+4];
136 char *feat
= new char[5];
138 sprintf(feat
,"%s:%d",START_CAPITAL_MARK
,startcap
);
144 void swindow::winPushAllUpFeature(char *wrd
,stack_t
*pila
)
148 //Esta toda la palabra en mayusculas?
149 if (erLookRegExp2(&erAllUp
,wrd
))
153 //mod Correcting dynamic memory errors
154 //char *feat = new char[4];
155 char *feat
= new char[5];
157 sprintf(feat
,"%s:%d",ALL_UPPER_MARK
,allup
);
163 void swindow::winPushAllLowFeature(char *wrd
,stack_t
*pila
)
166 //Esta toda la palabra en minusculas?
167 if (erLookRegExp2(&erAllLow
,wrd
))
171 //mod Correcting dynamic memory errors
172 //char *feat = new char[4];
173 char *feat
= new char[5];
175 sprintf(feat
,"%s:%d",ALL_LOWER_MARK
,alllow
);
181 void swindow::winPushContainCapFeature(char *wrd
, stack_t
*pila
)
184 if (erLookRegExp2(&erContainCap
,wrd
))
188 //mod Correcting dynamic memory errors
189 //char *feat = new char[4];
190 char *feat
= new char[5];
192 sprintf(feat
,"%s:%d",CONTAIN_CAP_MARK
,containcap
);
198 void swindow::winPushContainCapsFeature(char *wrd
, stack_t
*pila
)
201 if (erLookRegExp2(&erContainCaps
,wrd
))
205 //mod Correcting dynamic memory errors
206 //char *feat = new char[5];
207 char *feat
= new char[6];
209 sprintf(feat
,"%s:%d",CONTAIN_CAPS_MARK
,containcaps
);
215 void swindow::winPushContainPeriodFeature(char *wrd
, stack_t
*pila
)
217 int containperiod
= 0;
219 if (erLookRegExp2(&erContainPeriod
,wrd
))
222 char *feat
= new char[5];
223 sprintf(feat
,"%s:%d",CONTAIN_PERIOD_MARK
,containperiod
);
229 void swindow::winPushContainCommaFeature(char *wrd
, stack_t
*pila
)
231 int containcomma
= 0;
233 if (erLookRegExp2(&erContainComma
,wrd
))
236 char *feat
= new char[5];
237 sprintf(feat
,"%s:%d",CONTAIN_COMMA_MARK
,containcomma
);
243 void swindow::winPushContainNumFeature(char *wrd
, stack_t
*pila
)
246 //Contiene un numero?
247 if (erLookRegExp2(&erContainNum
,wrd
))
251 char *feat
= new char[5];
253 //sprintf(feat,"CN:%d",containnum);
254 sprintf(feat
,"%s:%d",CONTAIN_NUMBER_MARK
,containnum
);
261 void swindow::winPushMultiwordFeature(char *wrd
, stack_t
*pila
)
264 //Es una palabra multiple?
265 if (erLookRegExp2(&erMultiWord
,wrd
))
269 //mod Correcting dynamic memory errors
270 //char *feat = new char[6];
271 //sprintf(feat,"MW:%d",multiword);
272 char *feat
= new char[5];
273 sprintf(feat
,"%s:%d",MULTIWORD_MARK
,multiword
);
280 void swindow::winPushLetterFeature(char *wrd
, stack_t
*pila
, int position
,int where
)
282 char *feature
= new char[12];
284 if (COUNTING_FROM_END
==where
)
286 sprintf(feature
,"%s%d:%c",CHAR_Z_MARK
,position
,wrd
[strlen(wrd
)-position
]);
290 sprintf(feature
,"%s%d:%c",CHAR_A_MARK
,position
,wrd
[position
-1]);
297 void swindow::winPushLenghtFeature(char *wrd
, stack_t
*pila
)
299 //Obtenemos la longitud de la palabra
300 int len
= strlen(wrd
);
302 //Longitud de la palabra
303 //mod Correcting dynamic memory errors
304 //char *feat = new char[4];
305 char *feat
= new char[6];
307 sprintf(feat
,"%s:%d",LENGTH_MARK
,len
);
313 * void winPushUnkownoFeatures ( char *wrd, struct stack_t *pila)
314 * esta funcion creara las "features" para la palabra desconocida
315 * <wrd> y las apilara en en el parametro <pila>
317 void swindow::winPushUnknownFeatures(char *wrd
, struct stack_t
*pila
)
319 int startcap
=0,allup
=0,alllow
=0,wordlength
=0,containnum
=0,multiword
=0,containcap
=0,containcaps
=0,containperiod
=0;
321 //Obtenemos la longitud de la palabra
322 int len
= strlen(wrd
);
325 //Creamos el prefijo de longitud 2
326 char *feat
= new char[6];
327 if (len
> 1) sprintf(ant
,"%c%c",wrd
[0],wrd
[1]);
328 else sprintf(ant
,"%c~",wrd
[0]);
329 sprintf(feat
,"a2:%s",ant
);
332 //Generamos el prefijo de longitud 3
334 if (len
> 2) sprintf(ant
,"%c%c%c",wrd
[0],wrd
[1],wrd
[2]);
335 else sprintf(ant
,"%s~",ant
);
336 sprintf(feat
,"a3:%s",ant
);
339 //Generamos el prefijo de longitud 4
341 if (len
> 3) sprintf(ant
,"%c%c%c%c",wrd
[0],wrd
[1],wrd
[2],wrd
[3]);
342 else sprintf(ant
,"%s~",ant
);
343 sprintf(feat
,"a4:%s",ant
);
346 //Generamos el sufijo de longitud 2
348 if (len
> 1) sprintf(feat
,"z2:%c%c",wrd
[len
-2],wrd
[len
-1]);
349 else sprintf(feat
,"z2:~%c","",wrd
[len
-1]);
352 //generamos el sufijo de longitud 3
354 if (len
> 2) sprintf(feat
,"z3:%c%c%c",wrd
[len
-3],wrd
[len
-2],wrd
[len
-1]);
355 else if (len
> 1) sprintf(feat
,"z3:~%c%c",wrd
[len
-2],wrd
[len
-1]);
356 else sprintf(feat
,"z3:~~%c",wrd
[len
-1]);
359 //generamos el sufijo de longitud 4
361 //strcpy(prefix4,substr(wrd, 0, 4));
362 if (len
> 3) sprintf(feat
,"z4:%c%c%c%c",wrd
[len
-4],wrd
[len
-3],wrd
[len
-2],wrd
[len
-1]);
363 else if (len
> 2) sprintf(feat
,"z4:~%c%c%c",wrd
[len
-3],wrd
[len
-2],wrd
[len
-1]);
364 else if (len
> 1) sprintf(feat
,"z4:~~%c%c",wrd
[len
-2],wrd
[len
-1]);
365 else sprintf(feat
,"z4:~~~%c",wrd
[len
-1]);
368 //Comienza por Mayuscula?
369 if (erLookRegExp2(&erStartCap
,wrd
)) startcap
= 1;
371 sprintf(feat
,"A:%d",startcap
);
374 //Esta toda la palabra en mayusculas?
375 if (erLookRegExp2(&erAllUp
,wrd
)) allup
= 1;
377 sprintf(feat
,"AA:%d",allup
);
380 //Esta toda la palabra en minusculas?
381 if (erLookRegExp2(&erAllLow
,wrd
)) alllow
= 1;
383 sprintf(feat
,"aa:%d",alllow
);
386 //Longitud de la palabra
388 sprintf(feat
,"L:%d",len
);
391 if (erLookRegExp2(&erContainCap
,wrd
)) containcap
= 1;
393 sprintf(feat
,"CA:%d",containcap
);
396 if (erLookRegExp2(&erContainCaps
,wrd
)) containcaps
= 1;
398 sprintf(feat
,"CAA:%d",containcaps
);
402 if (erLookRegExp2(&erContainPeriod
,wrd
)) containperiod
= 1;
404 sprintf(feat
,"CP:%d",containperiod
);
407 //Contiene un numero?
408 if (erLookRegExp2(&erContainNum
,wrd
)) containnum
= 1;
410 sprintf(feat
,"CN:%d",containnum
);
413 //Es una palabra multiple?
414 if (erLookRegExp2(&erMultiWord
,wrd
)) multiword
= 1;
416 sprintf(feat
,"MW:%d",multiword
);
419 //Letra por la que empieza la palabra
421 sprintf(feat
,"c1:%c",wrd
[0]);
424 //Letra por la que acaba la palabra
426 //charn = wrd[len-1]; //substr(wrd, len-1, 1);
427 sprintf(feat
,"cn:%c",wrd
[len
-1]);
433 * void winPushSwnFeature (struct stack_t *pila)
434 * Recibe como parametro <pila>, donde se apilara la "feature"
435 * Swn.Swn es el elemento final de frase que puede ser
438 void swindow::winPushSwnFeature(struct stack_t
*pila
)
440 char *feature
= new char[10];
441 sprintf(feature
,"Swn:%s",last
->wrd
);
447 * void winPushAmbiguityFeature(void *ptr, dictionary *d, stack_t *pila, int direction)
448 * Genera el atributo que representa la ambiguedad de una palabra.
449 * Recibe como parametros:
450 * ptr, que es un puntero a un nodo de la lista de atributos (nodo_feature_list)
451 * aunque se recibe como un void*.
452 * d, es el diccionario con el que estamos trabajarando
453 * pila,es la pila donde apilaremos el atributo generado
454 * direction, es la direccion en que estamos recorriendo el corpus (LEFT_TO_RIGHT
457 void swindow::winPushAmbiguityFeature(void *ptr
,dictionary
*d
,struct stack_t
*pila
,int direction
)
459 char value
[100],txt
[5];
460 nodo_feature_list
*p
= (nodo_feature_list
*)ptr
;
468 char *feature
= new char[100];
471 num
= (int *) p
->l
.getIndex();
472 sprintf(value
,"%s%d:",p
->mark
,*num
);
473 pn
= get(*num
, direction
);
477 w
= d
->getElement(pn
->wrd
);
480 list
= (simpleList
*) d
->getElementMaybe(w
);
481 int numMaybe
= d
->getElementNumMaybe(w
);
484 pInfoDict
= (infoDict
*) list
->getIndex();
486 if (numMaybe
>0) sprintf(value
,"%s%s~",value
,pInfoDict
->txt
);
487 else sprintf(value
,"%s%s",value
,pInfoDict
->txt
);
493 else sprintf(value
,"%s%s",value
,"UNKNOWN");
495 else sprintf(value
,"%s%s",value
,EMPTY_POS
);
497 strcpy(feature
,value
);
503 * void winPushMFTFeature(void *ptr, dictionary *d, stack_t *pila, int direction)
504 * Genera el atributo con la "Most Frequent Tag", la etiqueta mas frecuente.
505 * Recibe como parametros:
506 * ptr, que es un puntero a un nodo de la lista de atributos (nodo_feature_list)
507 * aunque se recibe como un void*.
508 * d, es el diccionario con el que estamos trabajarando
509 * pila,es la pila donde apilaremos el atributo generado
510 * direction, es la direccion en que estamos recorriendo el corpus (LEFT_TO_RIGHT
513 void swindow::winPushMFTFeature(void *ptr
,dictionary
*d
,struct stack_t
*pila
,int direction
)
515 char value
[100],mft
[5];
516 nodo_feature_list
*p
= (nodo_feature_list
*)ptr
;
519 int w
,*num
,max
=0,ret
=0;
523 //strcpy(feature,"");
525 num
= (int *) p
->l
.getIndex();
526 sprintf(value
,"%s%d:",p
->mark
,*num
);
527 pn
= get(*num
, direction
);
530 w
= d
->getElement(pn
->wrd
);
533 list
= (simpleList
*) d
->getElementMaybe(w
);
534 int numMaybe
= d
->getElementNumMaybe(w
);
537 pInfoDict
= (infoDict
*) list
->getIndex();
539 if (pInfoDict
->num
>max
) strcpy(mft
,pInfoDict
->txt
);
543 sprintf(value
,"%s%s",value
,mft
);
546 else sprintf(value
,"%s%s",value
,"UNKNOWN");
548 else sprintf(value
,"%s%s",value
,EMPTY_POS
);
549 char *feature
= new char[strlen(value
)+1];
550 strcpy(feature
,value
);
556 * void winPushMaybeFeature(void *ptr, dictionary *d, stack_t *pila, int direction)
557 * Genera tantos atributos "maybe" como posibles POS pueda tener la palabra, y los
559 * Recibe como parametros:
560 * ptr, que es un puntero a un nodo de la lista de atributos (nodo_feature_list)
561 * aunque se recibe como un void*.
562 * d, es el diccionario con el que estamos trabajarando
563 * pila,es la pila donde apilaremos el atributo generado
564 * direction, es la direccion en que estamos recorriendo el corpus (LEFT_TO_RIGHT
567 void swindow::winPushMaybeFeature(void *ptr
,dictionary
*d
,struct stack_t
*pila
,int direction
)
569 char value
[100],txt
[5];
570 nodo_feature_list
*p
= (nodo_feature_list
*)ptr
;
578 num
= (int *) p
->l
.getIndex();
579 sprintf(txt
,"%s%d~",p
->mark
,*num
);
580 pn
= get(*num
, direction
);
583 w
= d
->getElement(pn
->wrd
);
587 list
= (simpleList
*) d
->getElementMaybe(w
);
591 feature
= new char[10];
593 pInfoDict
= (infoDict
*) list
->getIndex();
594 sprintf(feature
,"%s%s:1",txt
,pInfoDict
->txt
);
602 feature
= new char[15];
604 sprintf(feature
,"%s%s:1",txt
,"UNKNOWN");
610 feature
= new char[10];
611 sprintf(feature
,"%s%s:1",txt
,EMPTY_POS
);
618 * void winPushPosFeature(void *ptr, dictionary *d, stack_t *pila, int direction)
619 * Genera un atributo con la POS de algunos elementos de la ventana.
620 * Recibe como parametros:
621 * ptr, que es un puntero a un nodo de la lista de atributos (nodo_feature_list)
622 * aunque se recibe como un void*.
623 * d, es el diccionario con el que estamos trabajarando
624 * pila,es la pila donde apilaremos el atributo generado
625 * direction, es la direccion en que estamos recorriendo el corpus (LEFT_TO_RIGHT
628 void swindow::winPushPosFeature(void *ptr
,dictionary
*d
, struct stack_t
*pila
,int direction
)
630 char value
[100]="",name
[100]="",txt
[100]="";
631 nodo_feature_list
*p
= (nodo_feature_list
*)ptr
;
636 int end
=1,ret
=1,w
,*num
;
641 num
= (int *) p
->l
.getIndex();
643 if (strcmp(name
,EMPTY
)==0) sprintf(name
,"%s%d",p
->mark
,*num
);
644 else sprintf(name
,"%s,%d",name
,*num
);
645 pn
= get(*num
, direction
);
647 if (pn
==NULL
) strcpy(txt
,EMPTY_POS
);
649 else if ( (strcmp(pn
->pos
,EMPTY
)==0) || (*num
==0) )
652 w
= d
->getElement(pn
->wrd
);
656 simpleList
*list
= (simpleList
*) d
->getElementMaybe(w
);
657 int numMaybe
= d
->getElementNumMaybe(w
);
662 pInfoDict
= (infoDict
*) list
->getIndex();
664 if (numMaybe
>0) sprintf(txt
,"%s%s_",txt
,pInfoDict
->txt
);
665 else sprintf(txt
,"%s%s",txt
,pInfoDict
->txt
);
671 else strcpy(txt
,"UNKNOWN");
673 else strcpy(txt
,pn
->pos
);//AKI3
676 if (strcmp(value
,EMPTY
)==0) sprintf(value
,"%s",txt
);
677 else sprintf(value
,"%s~%s",value
,txt
);
682 sprintf(name
,"%s:%s",name
,value
);
684 feature
= new char[strlen(name
)+2];
685 strcpy (feature
,name
);
686 //fprintf(stderr,"%s\n",feature);
692 * void winPushPOSFeature(void *ptr, dictionary *d, stack_t *pila, int direction)
693 * Genera un atributo con la palabra de algunos elementos de la ventana.
694 * Recibe como parametros:
695 * ptr, que es un puntero a un nodo de la lista de atributos (nodo_feature_list)
696 * aunque se recibe como un void*.
697 * d, es el diccionario con el que estamos trabajarando
698 * pila,es la pila donde apilaremos el atributo generado
699 * direction, es la direccion en que estamos recorriendo el corpus (LEFT_TO_RIGHT
702 void swindow::winPushWordFeature(void *ptr
,dictionary
*d
, struct stack_t
*pila
,int direction
)
704 char value
[200],name
[200],txt
[100];
705 nodo_feature_list
*p
= (nodo_feature_list
*)ptr
;
709 int *num
= (int *) p
->l
.getIndex();
710 pn
= get(*num
, direction
);
712 if (pn
==NULL
) strcpy(value
,EMPTY_WORD
);
713 else strcpy(value
,pn
->wrd
);
714 sprintf(name
,"%s%d",p
->mark
,*num
);
716 while (p
->l
.next()>=0)
718 num
= (int *) p
->l
.getIndex();
719 sprintf(name
,"%s,%d",name
,*num
);
720 pn
= get(*num
, direction
);
722 if (pn
==NULL
) strcpy(txt
,EMPTY_WORD
);
723 else strcpy(txt
,pn
->wrd
);
724 sprintf(value
,"%s~%s",value
,txt
);
727 sprintf(name
,"%s%s%s",name
,":",value
);
729 feature
= new char[strlen(name
)+2];
730 strcpy (feature
,name
);
735 /****************************************************************************/
737 int swindow::sentenceLength()
739 //Retorna el nmero de palabras que tiene la frase cargada en este objeto
746 * Elimina todas las palabras existentes en la ventana
747 * Retorna el nmero de elementos que poseia la ventana
749 void swindow::deleteList()
751 if (first
==NULL
) return;
753 while (first
->next
!=NULL
)
756 delete first
->previous
->stackScores
;
757 delete first
->previous
;
762 delete last
->stackScores
;
780 int swindow::iniGeneric()
791 //String para contener frase
792 memset(lSentence
,0,sizeof(lSentence
));
797 if (ret
>0) readSentence();
799 if (ret
==-1) return -1;
800 else if (ret
==0) posEnd
= posIndex
+last
->ord
;
801 else posEnd
=posIndex
+ret
;
809 int swindow::iniList()
813 for(j
=posIndex
; ((j
<lengthWin
) && (ret
>0)); j
++) ret
= readInput();
816 // 0 if end of sentence
817 // -1 if there aren't words
819 if (ret
>0) ret
=j
-posIndex
-1;
825 /****************************************************************************/
827 int swindow::readSentence()
830 while (ret
>0) ret
= readInput();
835 /****************************************************************************/
838 * Read one line from corpus and add node to list
839 * Return 1 if it's ok
840 * 0 if end of sentence
841 * -1 if there aren't more words
844 int swindow::readInput()
846 if (feof(input
)) return -2;
848 char value
[2][100] ={EMPTY
,EMPTY
};
849 char line
[250] = EMPTY
;
850 int is_comment
= FALSE
;
851 int is_empty_line
= FALSE
;
854 fgets(line
,250,input
);
855 if ( line
[0] == '#' && line
[1] == '#' ) is_comment
= TRUE
;
857 ret
= sscanf(line
,"%s %s\n",value
[0],value
[1]);
861 if ( line
[0] == '\n' ) is_empty_line
= TRUE
;
865 if ( strlen(value
[0]) > 0 && is_empty_line
== FALSE
&& is_comment
== FALSE
)
866 winAdd(value
[0],value
[1]);
868 //fprintf(stderr,"\t%s %s \n%d %s\n",value[0],value[1],iLSentence,lSentence);
870 strcpy(lSentence
[iLSentence
],line
);
873 //fprintf (stderr,"%s",lSentence[iLSentence-1]);
874 if ( iLSentence
>= MAX_SENTENCE_LENGTH
)
876 //fprintf (stderr,"-------->");
877 //return 0; //Si se supera el tamao mximo de frase se devuelve 0
878 fprintf(stderr
,"\nFound a sentence with more than %d words!!\n",MAX_SENTENCE_LENGTH
);
882 if ((strcmp(".",value
[0])==0) || (strcmp("?",value
[0])==0) || (strcmp("!",value
[0])==0))
884 //fprintf (stderr,"-------->");
892 /****************************************************************************/
894 /* Read one line from corpus and add node to list
895 * Return 1 if it's ok
896 * 0 if end of sentence
897 * -1 if there aren't more words
900 int swindow::readInput_old()
902 if (feof(input
)) return -2;
904 char value
[2][100]={EMPTY
,EMPTY
};
905 int i
=0,w
=0,ret
=1,isCom
=0,addComAtEnd
=0;
906 char ant
='q',c
= fgetc(input
);
908 while ((!feof(input
)) && (c
!='\n'))
910 if (i
<2 && ant
=='#' && c
=='#')
913 fgets(garbage
,512,input
);
918 strcpy(value
[0],EMPTY
);
919 strcpy(value
[1],EMPTY
);
922 if ((w
==0) && (c
==' ' || c
=='\t' || c
==32))
931 sprintf(value
[w
],"%s%c",value
[w
],c
);
940 if ((strlen(value
[0])<=0) && (!isCom
)) return -1;
942 winAdd(value
[0],value
[1]);
944 if ((strcmp(".",value
[0])==0) || (strcmp("?",value
[0])==0) || (strcmp("!",value
[0])==0)) return 0;
949 /****************************************************************************/
951 int swindow::winAdd(char *wrd
, char *com
)
953 nodo
*aux
= new nodo
;
963 aux
->previous
= last
;
968 int erRet
=erLookRegExp(wrd
);
971 case CARD
: strcpy(aux
->wrd
,"@CARD"); break;
972 case CARDSEPS
: strcpy(aux
->wrd
,"@CARDSEPS"); break;
973 case CARDPUNCT
: strcpy(aux
->wrd
,"@CARDPUNCT"); break;
974 case CARDSUFFIX
: strcpy(aux
->wrd
,"@CARDSUFFIX"); break;
975 default: strcpy(aux
->wrd
,wrd
);
977 strcpy(aux
->realWrd
,wrd
);
978 strcpy(aux
->posOld
,EMPTY
);
979 strcpy(aux
->pos
,EMPTY
);
980 strcpy(aux
->comment
,com
);
981 aux
->stackScores
= new stack_t
;
982 init_stack(aux
->stackScores
);
991 /****************************************************************************/
999 swindow::swindow(FILE *in
)
1009 swindow::swindow(FILE *in
,int number
, int position
)
1013 if ((number
<3) || (number
<=position
))
1015 fprintf(stderr
,"\nWindow Length can not be first or last element.\nLength should be greater than \"Interest Point Position\" or 3.\n");
1020 posIndex
= position
-1;
1026 swindow::swindow(FILE *in
,int number
)
1030 posIndex
= number
/2;
1036 /****************************************************************************/
1038 /* Move Interest Point to next element */
1042 if ((ret
==-1) && (endWin
->next
!=NULL
)) ret
=1;
1044 if ((index
==NULL
) || (index
->next
==NULL
)) return -1;
1045 if ((posIndex
>=posEnd
) && (ret
==-1)) return -1;
1047 if ((posIndex
<posEnd
) && (ret
==-1)) posEnd
--;
1048 if ((posEnd
==lengthWin
-1) && (ret
!=-1)) endWin
= endWin
->next
;
1050 if (posBegin
==0) beginWin
= beginWin
->next
;
1051 else if ((posIndex
>=posBegin
) && (posBegin
>0)) posBegin
--;
1053 index
= index
->next
;
1058 /****************************************************************************/
1060 /* Move Interest Point to previous element */
1061 int swindow::previous()
1063 if ((index
==NULL
) || (index
->previous
==NULL
)) return -1;
1065 if ((posBegin
==0) && (beginWin
->previous
!=NULL
)) beginWin
= beginWin
->previous
;
1066 else if (posIndex
>posBegin
) posBegin
++;
1068 if (posEnd
<lengthWin
-1) posEnd
++;
1069 else endWin
= endWin
->previous
;
1071 index
= index
->previous
;
1076 /****************************************************************************/
1078 /* Get Interest Point */
1079 nodo
*swindow::getIndex()
1085 /****************************************************************************/
1087 nodo
*swindow::get(int position
,int direction
)
1092 if (position
== 0) return index
;
1093 if (direction
==2) position
= -position
;
1095 || ((position
<0) && (posIndex
+position
+1<posBegin
))
1096 || ((position
>0) && (posIndex
+position
>posEnd
)) )
1106 if (aux
->next
!= NULL
) aux
= aux
->next
;
1112 if (aux
->previous
!= NULL
) aux
= aux
->previous
;
1121 /****************************************************************************/
1128 if (first
==NULL
) return 0;
1131 nodo
*actual
= first
;
1133 memset(wrd
,0,sizeof(wrd
));
1134 sscanf(lSentence
[i
],"%s",wrd
);
1135 if (strcmp(wrd
,actual
->realWrd
)==0)
1137 printf("%s %s %s\n",actual
->realWrd
,actual
->pos
,actual
->comment
);
1139 else printf(lSentence
[i
]);
1142 while (actual
->next
!=NULL
)
1145 memset(wrd
,0,sizeof(wrd
));
1146 sscanf(lSentence
[i
],"%s",wrd
);
1147 if (strcmp(wrd
,tmp
->realWrd
)==0)
1149 printf("%s %s %s\n",tmp
->realWrd
,tmp
->pos
,tmp
->comment
);
1152 else printf(lSentence
[i
]);
1154 //printf("%s %s %s\n",actual->realWrd,actual->pos,actual->comment);
1160 /****************************************************************************/
1162 void swindow::putLengthWin(int l
)
1168 /****************************************************************************/
1170 void swindow::putIndex(int i
)
1176 /****************************************************************************/
1179 * Modifica el valor de los pesos para una palabra
1181 * action = 0 --> Pone el peso mximo (put max score)
1182 * action = 1 --> Inicializa los pesos (reset values)
1183 * action = 2 --> Restaura el valor de la vuelta anterior(last lap value)
1185 int swindow::winMaterializePOSValues(int action
)
1187 if (first
==NULL
) return 0;
1190 weight_node_t
*w
,max
;
1193 while (actual
!=NULL
)
1200 while(!empty(actual
->stackScores
))
1202 w
= (weight_node_t
*) pop(actual
->stackScores
);
1204 if (inicio
|| w
->data
>max
.data
)
1207 strcpy(max
.pos
,w
->pos
);
1212 actual
->weight
=max
.data
;
1213 strcpy(actual
->pos
,max
.pos
);
1214 //Added for 2 laps tagging
1215 actual
->weightOld
=max
.data
;
1216 strcpy(actual
->posOld
,max
.pos
);
1218 case 1: //RESET VALUES
1219 strcpy(actual
->pos
,"");
1223 strcpy(actual
->pos
,actual
->posOld
);
1224 actual
->weight
=actual
->weightOld
;
1227 actual
=actual
->next
;
1233 /****************************************************************************/
1236 * int winExistUnkWord(int direction, dictionary *d)
1237 * Esta funcion comprueba si hay parabras desconocidas.
1238 * En caso de que el parametro direction sea:
1239 * LEFT_TO_RIGHT - mira si hay desconocidas a la
1240 * derecha del punto de interes de la ventana.
1241 * RIGHT_TO_LEFT - mira si hay desconocidas a la izquierda
1242 * del punto de interes de la ventana.
1243 * Esta funcion devuelve:
1244 * un entero >=0, si no hay desconocidas
1245 * -1, si hay desconocidas
1247 int swindow::winExistUnkWord(int direction
, dictionary
*d
)
1250 int ret
=0,i
=posIndex
;
1252 if (index
==NULL
) return 1;
1260 if (aux
->next
==NULL
|| aux
==endWin
) ret
=-1;
1261 else aux
= aux
->next
;
1262 if (d
->getElement(aux
->wrd
)==HASH_FAIL
) return -1;
1266 if (aux
->previous
==NULL
|| aux
==beginWin
) ret
=-1;
1267 else aux
= aux
->previous
;
1268 if (d
->getElement(aux
->wrd
)==HASH_FAIL
) return -1;