Reordered files
[svmtool++.git] / src / swindow.cc
blobcb652e3c18e4218250c0da90568e8b873f6a0032
1 /*
2 * Copyright (C) 2004 Jesus Gimenez, Lluis Marquez and Senen Moya
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 #include <sys/types.h>
20 #include <regex.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include "hash.h"
25 #include "list.h"
26 #include "dict.h"
27 #include "weight.h"
28 #include "stack.h"
29 #include "swindow.h"
30 #include "er.h"
31 #include "common.h"
32 #include "marks.h"
34 #define MAX_SENTENCE_LENGTH 1000
36 char lSentence[MAX_SENTENCE_LENGTH][250];
37 int iLSentence = 0;
39 /*****************************************************************
40 * Feature Generation
41 *****************************************************************/
43 void swindow::winPushStartWithLowerFeature(char *wrd,stack_t *pila)
45 int startlower=0;
47 //Comienza por Minuscula?
48 if (erLookRegExp2(&erStartLower,wrd))
50 startlower = 1;
52 //char *feat = new char[strlen(START_LOWER_MARK)+4];
53 char *feat = new char[5];//mod Correcting dynamic memory errors
54 sprintf(feat,"%s:%d",START_LOWER_MARK,startlower);
56 push(pila,feat);
61 void swindow::winPushStartWithNumberFeature(char *wrd,stack_t *pila)
63 int startnumber=0;
65 //Comienza por Numero?
66 if (erLookRegExp2(&erStartNumber,wrd))
68 startnumber = 1;
70 //mod Correcting dynamic memory errors
71 //char *feat = new char[strlen(START_NUMBER_MARK)+4];
72 char *feat = new char[5];
74 sprintf(feat,"%s:%d",START_NUMBER_MARK,startnumber);
76 push(pila,feat);
81 void swindow::winPushSuffixFeature(char *wrd, struct stack_t *pila,int longitud)
83 //Obtenemos la longitud de la palabra
85 char *feat = new char[longitud+6];
86 int len = strlen(wrd);
87 char suf[longitud+1];
88 //int a=0;
90 strcpy(suf,"");
91 for (int i=len-longitud; i<=len-1; i++)
93 if (i>=0) sprintf(suf,"%s%c",suf,wrd[i]);
94 else sprintf(suf,"%s~",suf);
97 sprintf(feat,"%s%d:%s",SUFFIX_MARK,longitud,suf);
98 push(pila,feat);
103 * void winPushPreffixFeatures ( char *wrd, struct stack_t *pila, int longitud)
104 * esta funcion creara las "features" para la palabra desconocida
105 * <wrd> y las apilara en en el parametro <pila>
107 void swindow::winPushPrefixFeature(char *wrd, struct stack_t *pila,int longitud)
109 //Obtenemos la longitud de la palabra
110 char *feat = new char[6+longitud];
111 int len = strlen(wrd);
112 char pref[longitud+1];
114 strcpy(pref,"");
115 for (int i=0; i<longitud; i++)
117 if (len > i) sprintf(pref,"%s%c",pref,wrd[i]);
118 else /*if (i > len-1 )*/ sprintf(pref,"%s~",pref);
121 sprintf(feat,"%s%d:%s",PREFIX_MARK,longitud,pref);
122 push(pila,feat);
126 void swindow::winPushStartWithCapFeature(char *wrd, struct stack_t *pila)
128 int startcap=0;
130 //Comienza por Mayuscula?
131 if (erLookRegExp2(&erStartCap,wrd))
133 startcap = 1;
134 //mod Correcting dynamic memory errors
135 //char *feat = new char[strlen(START_CAPITAL_MARK)+4];
136 char *feat = new char[5];
138 sprintf(feat,"%s:%d",START_CAPITAL_MARK,startcap);
139 push(pila,feat);
144 void swindow::winPushAllUpFeature(char *wrd,stack_t *pila)
146 int allup=0;
148 //Esta toda la palabra en mayusculas?
149 if (erLookRegExp2(&erAllUp,wrd))
151 allup = 1;
153 //mod Correcting dynamic memory errors
154 //char *feat = new char[4];
155 char *feat = new char[5];
157 sprintf(feat,"%s:%d",ALL_UPPER_MARK,allup);
158 push(pila,feat);
163 void swindow::winPushAllLowFeature(char *wrd,stack_t *pila)
165 int alllow = 0;
166 //Esta toda la palabra en minusculas?
167 if (erLookRegExp2(&erAllLow,wrd))
169 alllow = 1;
171 //mod Correcting dynamic memory errors
172 //char *feat = new char[4];
173 char *feat = new char[5];
175 sprintf(feat,"%s:%d",ALL_LOWER_MARK,alllow);
176 push(pila,feat);
181 void swindow::winPushContainCapFeature(char *wrd, stack_t *pila)
183 int containcap = 0;
184 if (erLookRegExp2(&erContainCap,wrd))
186 containcap = 1;
188 //mod Correcting dynamic memory errors
189 //char *feat = new char[4];
190 char *feat = new char[5];
192 sprintf(feat,"%s:%d",CONTAIN_CAP_MARK,containcap);
193 push(pila,feat);
198 void swindow::winPushContainCapsFeature(char *wrd, stack_t *pila)
200 int containcaps = 0;
201 if (erLookRegExp2(&erContainCaps,wrd))
203 containcaps = 1;
205 //mod Correcting dynamic memory errors
206 //char *feat = new char[5];
207 char *feat = new char[6];
209 sprintf(feat,"%s:%d",CONTAIN_CAPS_MARK,containcaps);
210 push(pila,feat);
215 void swindow::winPushContainPeriodFeature(char *wrd, stack_t *pila)
217 int containperiod = 0;
218 //Contiene un punto?
219 if (erLookRegExp2(&erContainPeriod,wrd))
221 containperiod = 1;
222 char *feat = new char[5];
223 sprintf(feat,"%s:%d",CONTAIN_PERIOD_MARK,containperiod);
224 push(pila,feat);
229 void swindow::winPushContainCommaFeature(char *wrd, stack_t *pila)
231 int containcomma = 0;
232 //Contiene un punto?
233 if (erLookRegExp2(&erContainComma,wrd))
235 containcomma = 1;
236 char *feat = new char[5];
237 sprintf(feat,"%s:%d",CONTAIN_COMMA_MARK,containcomma);
238 push(pila,feat);
243 void swindow::winPushContainNumFeature(char *wrd, stack_t *pila)
245 int containnum = 0;
246 //Contiene un numero?
247 if (erLookRegExp2(&erContainNum,wrd))
249 containnum = 1;
251 char *feat = new char[5];
252 //mod
253 //sprintf(feat,"CN:%d",containnum);
254 sprintf(feat,"%s:%d",CONTAIN_NUMBER_MARK,containnum);
256 push(pila,feat);
261 void swindow::winPushMultiwordFeature(char *wrd, stack_t *pila)
263 int multiword = 0;
264 //Es una palabra multiple?
265 if (erLookRegExp2(&erMultiWord,wrd))
267 multiword = 1;
269 //mod Correcting dynamic memory errors
270 //char *feat = new char[6];
271 //sprintf(feat,"MW:%d",multiword);
272 char *feat = new char[5];
273 sprintf(feat,"%s:%d",MULTIWORD_MARK,multiword);
275 push(pila,feat);
280 void swindow::winPushLetterFeature(char *wrd , stack_t *pila, int position,int where)
282 char *feature = new char[12];
284 if (COUNTING_FROM_END==where)
286 sprintf(feature,"%s%d:%c",CHAR_Z_MARK,position,wrd[strlen(wrd)-position]);
288 else
290 sprintf(feature,"%s%d:%c",CHAR_A_MARK,position,wrd[position-1]);
293 push (pila,feature);
297 void swindow::winPushLenghtFeature(char *wrd, stack_t *pila)
299 //Obtenemos la longitud de la palabra
300 int len = strlen(wrd);
302 //Longitud de la palabra
303 //mod Correcting dynamic memory errors
304 //char *feat = new char[4];
305 char *feat = new char[6];
307 sprintf(feat,"%s:%d",LENGTH_MARK,len);
308 push(pila,feat);
313 * void winPushUnkownoFeatures ( char *wrd, struct stack_t *pila)
314 * esta funcion creara las "features" para la palabra desconocida
315 * <wrd> y las apilara en en el parametro <pila>
317 void swindow::winPushUnknownFeatures(char *wrd, struct stack_t *pila)
319 int startcap=0,allup=0,alllow=0,wordlength=0,containnum=0,multiword=0,containcap=0,containcaps=0,containperiod=0;
321 //Obtenemos la longitud de la palabra
322 int len = strlen(wrd);
323 char ant[10]="";
325 //Creamos el prefijo de longitud 2
326 char *feat = new char[6];
327 if (len > 1) sprintf(ant,"%c%c",wrd[0],wrd[1]);
328 else sprintf(ant,"%c~",wrd[0]);
329 sprintf(feat,"a2:%s",ant);
330 push(pila,feat);
332 //Generamos el prefijo de longitud 3
333 feat = new char[7];
334 if (len > 2) sprintf(ant,"%c%c%c",wrd[0],wrd[1],wrd[2]);
335 else sprintf(ant,"%s~",ant);
336 sprintf(feat,"a3:%s",ant);
337 push(pila,feat);
339 //Generamos el prefijo de longitud 4
340 feat = new char[8];
341 if (len > 3) sprintf(ant,"%c%c%c%c",wrd[0],wrd[1],wrd[2],wrd[3]);
342 else sprintf(ant,"%s~",ant);
343 sprintf(feat,"a4:%s",ant);
344 push(pila,feat);
346 //Generamos el sufijo de longitud 2
347 feat = new char[6];
348 if (len > 1) sprintf(feat,"z2:%c%c",wrd[len-2],wrd[len-1]);
349 else sprintf(feat,"z2:~%c","",wrd[len-1]);
350 push(pila,feat);
352 //generamos el sufijo de longitud 3
353 feat = new char[7];
354 if (len > 2) sprintf(feat,"z3:%c%c%c",wrd[len-3],wrd[len-2],wrd[len-1]);
355 else if (len > 1) sprintf(feat,"z3:~%c%c",wrd[len-2],wrd[len-1]);
356 else sprintf(feat,"z3:~~%c",wrd[len-1]);
357 push(pila,feat);
359 //generamos el sufijo de longitud 4
360 feat = new char[8];
361 //strcpy(prefix4,substr(wrd, 0, 4));
362 if (len > 3) sprintf(feat,"z4:%c%c%c%c",wrd[len-4],wrd[len-3],wrd[len-2],wrd[len-1]);
363 else if (len > 2) sprintf(feat,"z4:~%c%c%c",wrd[len-3],wrd[len-2],wrd[len-1]);
364 else if (len > 1) sprintf(feat,"z4:~~%c%c",wrd[len-2],wrd[len-1]);
365 else sprintf(feat,"z4:~~~%c",wrd[len-1]);
366 push(pila,feat);
368 //Comienza por Mayuscula?
369 if (erLookRegExp2(&erStartCap,wrd)) startcap = 1;
370 feat = new char[4];
371 sprintf(feat,"A:%d",startcap);
372 push(pila,feat);
374 //Esta toda la palabra en mayusculas?
375 if (erLookRegExp2(&erAllUp,wrd)) allup = 1;
376 feat = new char[5];
377 sprintf(feat,"AA:%d",allup);
378 push(pila,feat);
380 //Esta toda la palabra en minusculas?
381 if (erLookRegExp2(&erAllLow,wrd)) alllow = 1;
382 feat = new char[5];
383 sprintf(feat,"aa:%d",alllow);
384 push(pila,feat);
386 //Longitud de la palabra
387 feat = new char[6];
388 sprintf(feat,"L:%d",len);
389 push(pila,feat);
391 if (erLookRegExp2(&erContainCap,wrd)) containcap = 1;
392 feat = new char[5];
393 sprintf(feat,"CA:%d",containcap);
394 push(pila,feat);
396 if (erLookRegExp2(&erContainCaps,wrd)) containcaps = 1;
397 feat = new char[6];
398 sprintf(feat,"CAA:%d",containcaps);
399 push(pila,feat);
401 //Contiene un punto?
402 if (erLookRegExp2(&erContainPeriod,wrd)) containperiod = 1;
403 feat = new char[5];
404 sprintf(feat,"CP:%d",containperiod);
405 push(pila,feat);
407 //Contiene un numero?
408 if (erLookRegExp2(&erContainNum,wrd)) containnum = 1;
409 feat = new char[5];
410 sprintf(feat,"CN:%d",containnum);
411 push(pila,feat);
413 //Es una palabra multiple?
414 if (erLookRegExp2(&erMultiWord,wrd)) multiword = 1;
415 feat = new char[5];
416 sprintf(feat,"MW:%d",multiword);
417 push(pila,feat);
419 //Letra por la que empieza la palabra
420 feat = new char[5];
421 sprintf(feat,"c1:%c",wrd[0]);
422 push(pila,feat);
424 //Letra por la que acaba la palabra
425 feat = new char[5];
426 //charn = wrd[len-1]; //substr(wrd, len-1, 1);
427 sprintf(feat,"cn:%c",wrd[len-1]);
428 push(pila,feat);
433 * void winPushSwnFeature (struct stack_t *pila)
434 * Recibe como parametro <pila>, donde se apilara la "feature"
435 * Swn.Swn es el elemento final de frase que puede ser
436 * ! ? o .
438 void swindow::winPushSwnFeature(struct stack_t *pila)
440 char *feature = new char[10];
441 sprintf(feature,"Swn:%s",last->wrd);
442 push(pila,feature);
447 * void winPushAmbiguityFeature(void *ptr, dictionary *d, stack_t *pila, int direction)
448 * Genera el atributo que representa la ambiguedad de una palabra.
449 * Recibe como parametros:
450 * ptr, que es un puntero a un nodo de la lista de atributos (nodo_feature_list)
451 * aunque se recibe como un void*.
452 * d, es el diccionario con el que estamos trabajarando
453 * pila,es la pila donde apilaremos el atributo generado
454 * direction, es la direccion en que estamos recorriendo el corpus (LEFT_TO_RIGHT
455 * o RIGHT_TO_LEFT).
457 void swindow::winPushAmbiguityFeature(void *ptr,dictionary *d,struct stack_t *pila,int direction)
459 char value[100],txt[5];
460 nodo_feature_list *p = (nodo_feature_list *)ptr;
461 nodo *pn;
462 simpleList *list;
463 int w,*num,ret=0;
464 infoDict *pInfoDict;
466 strcpy(value,"");
468 char *feature = new char[100];
469 strcpy(feature,"");
471 num = (int *) p->l.getIndex();
472 sprintf(value,"%s%d:",p->mark,*num);
473 pn = get(*num, direction);
474 if (pn!=NULL)
477 w = d->getElement(pn->wrd);
478 if (w!=HASH_FAIL)
480 list = (simpleList *) d->getElementMaybe(w);
481 int numMaybe = d->getElementNumMaybe(w);
482 while (ret>=0)
484 pInfoDict = (infoDict *) list->getIndex();
485 numMaybe--;
486 if (numMaybe>0) sprintf(value,"%s%s~",value,pInfoDict->txt);
487 else sprintf(value,"%s%s",value,pInfoDict->txt);
488 ret=list->next();
490 list->setFirst();
492 //is unknown word
493 else sprintf(value,"%s%s",value,"UNKNOWN");
495 else sprintf(value,"%s%s",value,EMPTY_POS);
497 strcpy(feature,value);
498 push (pila,feature);
503 * void winPushMFTFeature(void *ptr, dictionary *d, stack_t *pila, int direction)
504 * Genera el atributo con la "Most Frequent Tag", la etiqueta mas frecuente.
505 * Recibe como parametros:
506 * ptr, que es un puntero a un nodo de la lista de atributos (nodo_feature_list)
507 * aunque se recibe como un void*.
508 * d, es el diccionario con el que estamos trabajarando
509 * pila,es la pila donde apilaremos el atributo generado
510 * direction, es la direccion en que estamos recorriendo el corpus (LEFT_TO_RIGHT
511 * o RIGHT_TO_LEFT).
513 void swindow::winPushMFTFeature(void *ptr,dictionary *d,struct stack_t *pila,int direction)
515 char value[100],mft[5];
516 nodo_feature_list *p = (nodo_feature_list *)ptr;
517 nodo *pn;
518 simpleList *list;
519 int w,*num,max=0,ret=0;
520 infoDict *pInfoDict;
522 strcpy(value,"");
523 //strcpy(feature,"");
525 num = (int *) p->l.getIndex();
526 sprintf(value,"%s%d:",p->mark,*num);
527 pn = get(*num, direction);
528 if (pn!=NULL)
530 w = d->getElement(pn->wrd);
531 if (w!=HASH_FAIL)
533 list = (simpleList *) d->getElementMaybe(w);
534 int numMaybe = d->getElementNumMaybe(w);
535 while (ret>=0)
537 pInfoDict = (infoDict *) list->getIndex();
538 numMaybe--;
539 if (pInfoDict->num>max) strcpy(mft,pInfoDict->txt);
540 ret=list->next();
542 list->setFirst();
543 sprintf(value,"%s%s",value,mft);
545 //is unknown word
546 else sprintf(value,"%s%s",value,"UNKNOWN");
548 else sprintf(value,"%s%s",value,EMPTY_POS);
549 char *feature = new char[strlen(value)+1];
550 strcpy(feature,value);
551 push (pila,feature);
556 * void winPushMaybeFeature(void *ptr, dictionary *d, stack_t *pila, int direction)
557 * Genera tantos atributos "maybe" como posibles POS pueda tener la palabra, y los
558 * apila en <pila>.
559 * Recibe como parametros:
560 * ptr, que es un puntero a un nodo de la lista de atributos (nodo_feature_list)
561 * aunque se recibe como un void*.
562 * d, es el diccionario con el que estamos trabajarando
563 * pila,es la pila donde apilaremos el atributo generado
564 * direction, es la direccion en que estamos recorriendo el corpus (LEFT_TO_RIGHT
565 * o RIGHT_TO_LEFT).
567 void swindow::winPushMaybeFeature(void *ptr,dictionary *d,struct stack_t *pila,int direction)
569 char value[100],txt[5];
570 nodo_feature_list *p = (nodo_feature_list *)ptr;
571 nodo *pn;
572 simpleList *list;
573 int w,*num,ret=0;
574 infoDict *pInfoDict;
575 char *feature;
577 strcpy(value,"");
578 num = (int *) p->l.getIndex();
579 sprintf(txt,"%s%d~",p->mark,*num);
580 pn = get(*num, direction);
581 if (pn!=NULL)
583 w = d->getElement(pn->wrd);
585 if (w!=HASH_FAIL)
587 list = (simpleList *) d->getElementMaybe(w);
589 while (ret>=0)
591 feature = new char[10];
592 strcpy(feature,"");
593 pInfoDict = (infoDict *) list->getIndex();
594 sprintf(feature,"%s%s:1",txt,pInfoDict->txt);
595 push(pila,feature);
596 ret=list->next();
598 list->setFirst();
600 else
602 feature = new char[15];
603 //is unknown word
604 sprintf(feature,"%s%s:1",txt,"UNKNOWN");
605 push(pila,feature);
608 else
610 feature = new char[10];
611 sprintf(feature,"%s%s:1",txt,EMPTY_POS);
612 push(pila,feature);
618 * void winPushPosFeature(void *ptr, dictionary *d, stack_t *pila, int direction)
619 * Genera un atributo con la POS de algunos elementos de la ventana.
620 * Recibe como parametros:
621 * ptr, que es un puntero a un nodo de la lista de atributos (nodo_feature_list)
622 * aunque se recibe como un void*.
623 * d, es el diccionario con el que estamos trabajarando
624 * pila,es la pila donde apilaremos el atributo generado
625 * direction, es la direccion en que estamos recorriendo el corpus (LEFT_TO_RIGHT
626 * o RIGHT_TO_LEFT).
628 void swindow::winPushPosFeature(void *ptr,dictionary *d, struct stack_t *pila,int direction)
630 char value[100]="",name[100]="",txt[100]="";
631 nodo_feature_list *p = (nodo_feature_list *)ptr;
632 nodo *pn;
633 infoDict *pInfoDict;
634 char *feature;
636 int end=1,ret=1,w,*num;
638 while (end>=0)
640 ret=1;
641 num = (int *) p->l.getIndex();
642 //AKI3
643 if (strcmp(name,EMPTY)==0) sprintf(name,"%s%d",p->mark,*num);
644 else sprintf(name,"%s,%d",name,*num);
645 pn = get(*num, direction);
647 if (pn==NULL) strcpy(txt,EMPTY_POS);
648 //AKI3
649 else if ( (strcmp(pn->pos,EMPTY)==0) || (*num==0) )
652 w = d->getElement(pn->wrd);
654 if (w!=HASH_FAIL)
656 simpleList *list = (simpleList *) d->getElementMaybe(w);
657 int numMaybe = d->getElementNumMaybe(w);
659 strcpy(txt,EMPTY);
660 while ( ret>=0 )
662 pInfoDict = (infoDict *) list->getIndex();
663 numMaybe--;
664 if (numMaybe>0) sprintf(txt,"%s%s_",txt,pInfoDict->txt);
665 else sprintf(txt,"%s%s",txt,pInfoDict->txt);
666 ret=list->next();
668 list->setFirst();
670 //is unknown word
671 else strcpy(txt,"UNKNOWN");
673 else strcpy(txt,pn->pos);//AKI3
675 //AKI3
676 if (strcmp(value,EMPTY)==0) sprintf(value,"%s",txt);
677 else sprintf(value,"%s~%s",value,txt);
679 end = p->l.next();
681 p->l.setFirst();
682 sprintf(name,"%s:%s",name,value);
684 feature = new char[strlen(name)+2];
685 strcpy (feature,name);
686 //fprintf(stderr,"%s\n",feature);
687 push (pila,feature);
692 * void winPushPOSFeature(void *ptr, dictionary *d, stack_t *pila, int direction)
693 * Genera un atributo con la palabra de algunos elementos de la ventana.
694 * Recibe como parametros:
695 * ptr, que es un puntero a un nodo de la lista de atributos (nodo_feature_list)
696 * aunque se recibe como un void*.
697 * d, es el diccionario con el que estamos trabajarando
698 * pila,es la pila donde apilaremos el atributo generado
699 * direction, es la direccion en que estamos recorriendo el corpus (LEFT_TO_RIGHT
700 * o RIGHT_TO_LEFT).
702 void swindow::winPushWordFeature(void *ptr,dictionary *d, struct stack_t *pila,int direction)
704 char value[200],name[200],txt[100];
705 nodo_feature_list *p = (nodo_feature_list *)ptr;
706 nodo *pn=NULL;
707 char *feature;
709 int *num = (int *) p->l.getIndex();
710 pn = get(*num, direction);
712 if (pn==NULL) strcpy(value,EMPTY_WORD);
713 else strcpy(value,pn->wrd);
714 sprintf(name,"%s%d",p->mark,*num);
716 while (p->l.next()>=0)
718 num = (int *) p->l.getIndex();
719 sprintf(name,"%s,%d",name,*num);
720 pn = get(*num, direction);
722 if (pn==NULL) strcpy(txt,EMPTY_WORD);
723 else strcpy(txt,pn->wrd);
724 sprintf(value,"%s~%s",value,txt);
726 p->l.setFirst();
727 sprintf(name,"%s%s%s",name,":",value);
729 feature = new char[strlen(name)+2];
730 strcpy (feature,name);
731 push(pila,feature);
735 /****************************************************************************/
737 int swindow::sentenceLength()
739 //Retorna el nmero de palabras que tiene la frase cargada en este objeto
740 return this->numObj;
745 * void deleteList()
746 * Elimina todas las palabras existentes en la ventana
747 * Retorna el nmero de elementos que poseia la ventana
749 void swindow::deleteList()
751 if (first==NULL) return;
753 while (first->next!=NULL)
755 first = first->next;
756 delete first->previous->stackScores;
757 delete first->previous;
760 if ( last != NULL )
762 delete last->stackScores;
763 delete last;
766 first=NULL;
767 last=NULL;
768 index=NULL;
770 return;
774 void swindow::init()
776 iniGeneric();
780 int swindow::iniGeneric()
782 index = NULL;
783 beginWin = NULL;
784 endWin = NULL;
785 first = NULL;
786 last = NULL;
787 numObj = 0;
788 posBegin = posIndex;
789 posEnd = posIndex;
791 //String para contener frase
792 memset(lSentence,0,sizeof(lSentence));
793 iLSentence = 0;
795 int ret = iniList();
796 endWin = last;
797 if (ret>0) readSentence();
799 if (ret==-1) return -1;
800 else if (ret==0) posEnd = posIndex+last->ord;
801 else posEnd=posIndex+ret;
803 beginWin = first;
805 return ret;
809 int swindow::iniList()
811 int j=0,ret=1;
813 for(j=posIndex; ((j<lengthWin) && (ret>0)); j++) ret = readInput();
815 //ret >1 correct
816 // 0 if end of sentence
817 // -1 if there aren't words
818 // -2 if end of file
819 if (ret>0) ret=j-posIndex-1;
821 return ret;
825 /****************************************************************************/
827 int swindow::readSentence()
829 int ret=1;
830 while (ret>0) ret = readInput();
831 return ret;
835 /****************************************************************************/
838 * Read one line from corpus and add node to list
839 * Return 1 if it's ok
840 * 0 if end of sentence
841 * -1 if there aren't more words
842 * -2 if end of file
844 int swindow::readInput()
846 if (feof(input)) return -2;
848 char value[2][100] ={EMPTY,EMPTY};
849 char line[250] = EMPTY;
850 int is_comment = FALSE;
851 int is_empty_line = FALSE;
852 int ret = 0;
854 fgets(line,250,input);
855 if ( line[0] == '#' && line[1] == '#' ) is_comment = TRUE;
857 ret = sscanf(line,"%s %s\n",value[0],value[1]);
859 if ( ret < 0 )
861 if ( line[0] == '\n' ) is_empty_line = TRUE;
862 else return -2;
865 if ( strlen(value[0]) > 0 && is_empty_line == FALSE && is_comment == FALSE )
866 winAdd(value[0],value[1]);
868 //fprintf(stderr,"\t%s %s \n%d %s\n",value[0],value[1],iLSentence,lSentence);
870 strcpy(lSentence[iLSentence],line);
871 iLSentence++;
873 //fprintf (stderr,"%s",lSentence[iLSentence-1]);
874 if ( iLSentence >= MAX_SENTENCE_LENGTH )
876 //fprintf (stderr,"-------->");
877 //return 0; //Si se supera el tamao mximo de frase se devuelve 0
878 fprintf(stderr,"\nFound a sentence with more than %d words!!\n",MAX_SENTENCE_LENGTH);
879 exit(1);
882 if ((strcmp(".",value[0])==0) || (strcmp("?",value[0])==0) || (strcmp("!",value[0])==0))
884 //fprintf (stderr,"-------->");
885 return 0;
888 return 1;
892 /****************************************************************************/
894 /* Read one line from corpus and add node to list
895 * Return 1 if it's ok
896 * 0 if end of sentence
897 * -1 if there aren't more words
898 * -2 if end of file
900 int swindow::readInput_old()
902 if (feof(input)) return -2;
904 char value[2][100]={EMPTY,EMPTY};
905 int i=0,w=0,ret=1,isCom=0,addComAtEnd=0;
906 char ant='q',c = fgetc(input);
908 while ((!feof(input)) && (c!='\n'))
910 if (i<2 && ant=='#' && c=='#')
912 char garbage[512];
913 fgets(garbage,512,input);
914 w=0;
915 ret = 1;
916 i=0;
917 ant='q';
918 strcpy(value[0],EMPTY);
919 strcpy(value[1],EMPTY);
920 c = fgetc(input);
922 if ((w==0) && (c==' ' || c=='\t' || c==32))
925 i=0;
926 ret = 1;
927 w=1;
928 ant='q';
929 c = fgetc(input);
931 sprintf(value[w],"%s%c",value[w],c);
933 i++;
934 ant=c;
935 c = fgetc(input);
938 value[w][i]='\0';
940 if ((strlen(value[0])<=0) && (!isCom)) return -1;
942 winAdd(value[0],value[1]);
944 if ((strcmp(".",value[0])==0) || (strcmp("?",value[0])==0) || (strcmp("!",value[0])==0)) return 0;
945 return 1;
949 /****************************************************************************/
951 int swindow::winAdd(char *wrd, char *com)
953 nodo *aux = new nodo;
954 if(numObj == 0)
956 aux->previous=NULL;
957 first = aux;
958 last = aux;
959 index = aux;
961 else
963 aux->previous = last;
964 last->next = aux;
965 last = aux;
967 aux->ord = numObj;
968 int erRet=erLookRegExp(wrd);
969 switch (erRet)
971 case CARD: strcpy(aux->wrd,"@CARD"); break;
972 case CARDSEPS: strcpy(aux->wrd,"@CARDSEPS"); break;
973 case CARDPUNCT: strcpy(aux->wrd,"@CARDPUNCT"); break;
974 case CARDSUFFIX: strcpy(aux->wrd,"@CARDSUFFIX"); break;
975 default: strcpy(aux->wrd,wrd);
977 strcpy(aux->realWrd,wrd);
978 strcpy(aux->posOld,EMPTY);
979 strcpy(aux->pos,EMPTY);
980 strcpy(aux->comment,com);
981 aux->stackScores = new stack_t;
982 init_stack(aux->stackScores);
983 aux->weight = 0;
984 aux->weightOld = 0;
985 aux->next=NULL;
986 numObj++;
987 return numObj;
991 /****************************************************************************/
993 swindow::~swindow()
995 deleteList();
999 swindow::swindow(FILE *in)
1001 input=in;
1002 lengthWin = 7;
1003 posIndex = 3;
1005 init();
1009 swindow::swindow(FILE *in,int number, int position)
1011 input=in;
1013 if ((number<3) || (number<=position))
1015 fprintf(stderr,"\nWindow Length can not be first or last element.\nLength should be greater than \"Interest Point Position\" or 3.\n");
1016 exit(0);
1019 lengthWin = number;
1020 posIndex = position-1;
1022 init();
1026 swindow::swindow(FILE *in,int number)
1028 input=in;
1029 lengthWin = number;
1030 posIndex = number/2;
1032 init();
1036 /****************************************************************************/
1038 /* Move Interest Point to next element */
1039 int swindow::next()
1041 int ret = -1;
1042 if ((ret==-1) && (endWin->next!=NULL)) ret=1;
1044 if ((index==NULL) || (index->next==NULL)) return -1;
1045 if ((posIndex>=posEnd) && (ret==-1)) return -1;
1047 if ((posIndex<posEnd) && (ret==-1)) posEnd--;
1048 if ((posEnd==lengthWin-1) && (ret!=-1)) endWin = endWin->next;
1050 if (posBegin==0) beginWin = beginWin->next;
1051 else if ((posIndex>=posBegin) && (posBegin>0)) posBegin--;
1053 index = index->next;
1054 return 0;
1058 /****************************************************************************/
1060 /* Move Interest Point to previous element */
1061 int swindow::previous()
1063 if ((index==NULL) || (index->previous==NULL)) return -1;
1065 if ((posBegin==0) && (beginWin->previous!=NULL)) beginWin = beginWin->previous;
1066 else if (posIndex>posBegin) posBegin++;
1068 if (posEnd<lengthWin-1) posEnd++;
1069 else endWin = endWin->previous;
1071 index = index->previous;
1072 return 0;
1076 /****************************************************************************/
1078 /* Get Interest Point */
1079 nodo *swindow::getIndex()
1081 return index;
1085 /****************************************************************************/
1087 nodo *swindow::get(int position,int direction)
1089 nodo *aux=NULL;
1090 int i=0;
1092 if (position == 0) return index;
1093 if (direction==2) position = -position;
1094 if ( (numObj == 0)
1095 || ((position<0) && (posIndex+position+1<posBegin))
1096 || ((position>0) && (posIndex+position>posEnd)) )
1097 return NULL;
1099 aux = index;
1101 while (i!=position)
1103 if (position>0)
1105 i++;
1106 if (aux->next != NULL) aux = aux->next;
1107 else return NULL;
1109 else
1111 i--;
1112 if (aux->previous != NULL) aux = aux->previous;
1113 else return NULL;
1117 return aux;
1121 /****************************************************************************/
1123 int swindow::show()
1125 int i = 0;
1126 char wrd[TAM_WORD];
1128 if (first==NULL) return 0;
1130 nodo *tmp = first;
1131 nodo *actual = first;
1133 memset(wrd,0,sizeof(wrd));
1134 sscanf(lSentence[i],"%s",wrd);
1135 if (strcmp(wrd,actual->realWrd)==0)
1137 printf("%s %s %s\n",actual->realWrd,actual->pos,actual->comment);
1139 else printf(lSentence[i]);
1140 i++;
1142 while (actual->next!=NULL)
1144 tmp=actual->next;
1145 memset(wrd,0,sizeof(wrd));
1146 sscanf(lSentence[i],"%s",wrd);
1147 if (strcmp(wrd,tmp->realWrd)==0)
1149 printf("%s %s %s\n",tmp->realWrd,tmp->pos,tmp->comment);
1150 actual = tmp;
1152 else printf(lSentence[i]);
1153 i++;
1154 //printf("%s %s %s\n",actual->realWrd,actual->pos,actual->comment);
1156 return 0;
1160 /****************************************************************************/
1162 void swindow::putLengthWin(int l)
1164 lengthWin = l;
1168 /****************************************************************************/
1170 void swindow::putIndex(int i)
1172 posIndex = i;
1176 /****************************************************************************/
1179 * Modifica el valor de los pesos para una palabra
1180 * Si:
1181 * action = 0 --> Pone el peso mximo (put max score)
1182 * action = 1 --> Inicializa los pesos (reset values)
1183 * action = 2 --> Restaura el valor de la vuelta anterior(last lap value)
1185 int swindow::winMaterializePOSValues(int action)
1187 if (first==NULL) return 0;
1189 int inicio=1;
1190 weight_node_t *w,max;
1191 nodo *actual=first;
1193 while (actual!=NULL)
1196 switch (action)
1198 case 0: //PUT MAX
1199 inicio = 1;
1200 while(!empty(actual->stackScores))
1202 w = (weight_node_t *) pop(actual->stackScores);
1204 if (inicio || w->data>max.data)
1206 max.data=w->data;
1207 strcpy(max.pos,w->pos);
1208 inicio = 0;
1210 delete w;
1212 actual->weight=max.data;
1213 strcpy(actual->pos,max.pos);
1214 //Added for 2 laps tagging
1215 actual->weightOld=max.data;
1216 strcpy(actual->posOld,max.pos);
1217 break;
1218 case 1: //RESET VALUES
1219 strcpy(actual->pos,"");
1220 actual->weight=0;
1221 break;
1222 case 2: //PUT OLD
1223 strcpy(actual->pos,actual->posOld);
1224 actual->weight=actual->weightOld;
1225 break;
1227 actual=actual->next;
1229 return 0;
1233 /****************************************************************************/
1236 * int winExistUnkWord(int direction, dictionary *d)
1237 * Esta funcion comprueba si hay parabras desconocidas.
1238 * En caso de que el parametro direction sea:
1239 * LEFT_TO_RIGHT - mira si hay desconocidas a la
1240 * derecha del punto de interes de la ventana.
1241 * RIGHT_TO_LEFT - mira si hay desconocidas a la izquierda
1242 * del punto de interes de la ventana.
1243 * Esta funcion devuelve:
1244 * un entero >=0, si no hay desconocidas
1245 * -1, si hay desconocidas
1247 int swindow::winExistUnkWord(int direction, dictionary *d)
1249 nodo *aux=index;
1250 int ret=0,i=posIndex;
1252 if (index==NULL) return 1;
1253 aux = index;
1255 while (ret>=0)
1257 switch (direction)
1259 case LEFT_TO_RIGHT:
1260 if (aux->next==NULL || aux==endWin) ret=-1;
1261 else aux = aux->next;
1262 if (d->getElement(aux->wrd)==HASH_FAIL) return -1;
1263 i++;
1264 break;
1265 case RIGHT_TO_LEFT:
1266 if (aux->previous==NULL || aux==beginWin) ret=-1;
1267 else aux = aux->previous;
1268 if (d->getElement(aux->wrd)==HASH_FAIL) return -1;
1269 i--;
1270 break;
1273 return 0;