1 // kate: replace-tabs on; indent-width 2; indent-mode cstyle; encoding latin15;
3 * Copyright (C) 2004 Jesus Gimenez, Lluis Marquez and Senen Moya
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 /***********************************************************/
33 * El objeto WeightRepository es el encargado de contener los pesos
34 * para cada pareja POS-feature. Un depósito de pesos está formado por
35 * un hash de objetos weight_struct_t, conteniendo los atributos (key).
36 * Cada uno de estos hash es poseedor de un nuevo hash con todas las POS
37 * para las cuales se ha encontrado el atributo y su respectivo peso.
41 /***********************************************************/
43 //Definición de weight_struct_t
48 hash_t
<weight_node_t
*> *hash
;
50 // free inner memory and then free the hash.
59 /***********************************************************/
61 char weightRepository::wrSaltarBlancs(FILE *in
, char c
,int jmp
)
63 while ((c
==':') || (c
==' ') || (c
=='\n' && jmp
==1)) c
=fgetc(in
);
67 /***********************************************************/
70 * void weightRepository::wrReadMergeModel(FILE *in,float filter)
72 * FILE *in : apuntador al fichero que ha de leer
73 * float filter: Valor para filtrar los pesos que se lean
74 * Este método carga un depósito de pesos de un fichero (f), filltrando
75 * los pesos que esten por debajo del límite marcado (filter)
77 void weightRepository::wrReadMergeModel(FILE *in
,float filter
)
86 weight_struct_t
*obj
= new weight_struct_t
;
91 obj
->key
.push_back(c
);;
95 obj
->hash
= new hash_t
<weight_node_t
*>();
96 obj
->hash
->hash_init(10);
98 while ((c
!='\n') && (!feof(in
)))
102 c
= wrSaltarBlancs(in
,c
,0);
103 while ((c
!=':') && (!feof(in
)))
105 // std::cerr << "Adding '" << c << "' to '" << w->pos << "'" << std::endl;
110 c
= wrSaltarBlancs(in
,c
,0);
113 while ((c
!=' ') && (c
!='\n') && (!feof(in
)) )
120 std::istringstream
iss(value
);
122 weight_node_t
* node
= new weight_node_t();
125 // std::cerr << "weightRepository::wrReadMergeModel " << obj->key << " " << pos << " " << data << " " << (long)data << std::endl;
126 if ( fabsf(data
) > fabsf(filter
) )
127 obj
->hash
->hash_insert(pos
,node
);
131 c
= wrSaltarBlancs(in
,c
,1);
133 wr
.hash_insert(obj
->key
, obj
);
138 fgets(garbage
,512,in
); //while(c=fgetc(in)!='\n');
144 /***********************************************************/
147 * long double weightRepository::wrGetWeight(std::string feature,std::string pos)
149 * std::string feature: Atributo
150 * std::string pos: Etiqueta morfosintáctica
151 * Lee el peso para el atributo y la etiqueta recibidos como parámetro.
153 long double weightRepository::wrGetWeight(const std::string
& feature
,const std::string
& pos
)
155 weight_struct_t
*obj
= wr
.hash_lookup(feature
);
156 if ((long)obj
!=HASH_FAIL
)
158 weight_node_t
*ret
= obj
->hash
->hash_lookup(pos
);
160 if ((long)ret
!=HASH_FAIL
&& ret
!= 0)
168 /***********************************************************/
171 * weightRepository(std::string fileName,float filter)
173 * std::string fileName : Nombre del fichero
174 * float filter: Valor para filtrar los pesos que se lean
175 * Constructor que carga el depóosito de pesos del fichero llamado
176 * fileName , filltrando los pesos que esten por debajo del límite
179 weightRepository::weightRepository(const std::string
& fileName
,float filter
)
181 // std::cerr << "weightRepository::weightRepository " << fileName << std::endl;
183 if ((in
= fopen(fileName
.c_str(), "rt"))== NULL
)
185 fprintf(stderr
, "Error opening weightRepository: %s. It's going to work without it.\n",fileName
.c_str());
189 wrReadMergeModel(in
,filter
);
193 /***********************************************************/
199 weightRepository::weightRepository()
204 /***********************************************************/
207 * ~weightRepository()
210 weightRepository::~weightRepository()
215 /*******************************************************/
218 * void wrAddPOS(int obj, std::string pos, long double weight)
220 * int obj: Apuntador al objeto que contiene el atributo
221 * std::string pos: Etiqueta a insertar:
222 * long double weight: Peso a asignar a la etiqueta
223 * Insertamos un nuevo peso para la etiqueta pos, en el atributo indicado
224 * por obj. Si la etiqueta ya existe se incrementa el peso con weight. Si
225 * no existe se añade.
227 void weightRepository::wrAddPOS(long unsigned int obj
, const std::string
& pos
, long double weight
)
229 weight_struct_t
*wst
= (weight_struct_t
*)obj
;
230 weight_node_t
*wnt
= wst
->hash
->hash_lookup(pos
);
232 if ((long)wnt
==HASH_FAIL
)
234 //Insertamos Nueva POS
235 weight_node_t
*w
= new weight_node_t
;
238 wst
->hash
->hash_insert(w
->pos
,w
);
241 { //Si POS ya esta, incrementamos el peso
242 wnt
->data
= wnt
->data
+ weight
;
246 /*******************************************************/
249 * void wrAdd(std::string feature, std::string pos, long double weight)
251 * std::string feature: Atributo a insertar
252 * std::string pos: Etiqueta a insertar
253 * long double weight: Peso a asignar a la etiqueta
254 * Insertamos un nuevo peso para para el atributo feature y la etiqueta pos.
256 void weightRepository::wrAdd(const std::string
& feature
, const std::string
& pos
, long double weight
)
258 weight_struct_t
*obj
= wr
.hash_lookup(feature
);
260 if ( (long) obj
== HASH_FAIL
)
262 // Creamos nueva entrada en WeightRepository
263 obj
= new weight_struct_t
;
265 obj
->hash
= new hash_t
<weight_node_t
*>;
266 obj
->hash
->hash_init(10);
267 //Añadimos el peso y la etiqueta
268 wrAddPOS((unsigned long)obj
,pos
,weight
);
269 wr
.hash_insert(obj
->key
, obj
);
272 //Añadimos el peso y la etiqueta
273 wrAddPOS((unsigned long)obj
,pos
,weight
);
276 /*******************************************************/
279 * wrWrite(std::string outName)
280 * Escribe el depósito de pesos en el fichero con nombre outName.
282 * Modificación 180705:
283 * Añadimos el parámetro "float filter", se utiliza para filtrar pesos
285 void weightRepository::wrWrite(const std::string
& outName
, float filter
)
289 if ((f
= fopen(outName
.c_str(), "w"))== NULL
)
291 fprintf(stderr
, "Error opening file: %s\n",outName
.c_str());
295 //Recorremos el hash objeto a objeto
296 for (hash_t
<weight_struct_t
*>::iterator it
= wr
.begin(); it
!= wr
.end(); it
++)
298 weight_struct_t
*wst
= (weight_struct_t
*) ((*it
).second
);
300 //Modificación 180705: añadimos filtrado de pesos
301 //std::string mrg = wrGetMergeInput(wst->hash); //DEL 180705
302 std::string mrg
= wrGetMergeInput(wst
->hash
,filter
); //ADD 180705
304 if (!mrg
.empty()) fprintf(f
,"%s%s\n",wst
->key
.c_str(),mrg
.c_str());
309 /*******************************************************/
311 * std::string wrGetMergeInput(hash_t *tptr)
312 * Devuelve una cadena de caracteres con todas las parejas
313 * POS/PESO contenidas en el hash (tptr) de un atributo.
315 * Modificación 180705:
316 * Añadimos parámetro "float filter" para filtrado de pesos
318 std::string
weightRepository::wrGetMergeInput(hash_t
<weight_node_t
*> *tptr
, float filter
)
320 std::ostringstream out
;
322 for (hash_t
<weight_struct_t
*>::iterator it
= wr
.begin(); it
!= wr
.end(); it
++)
324 weight_node_t
*wnt
= (weight_node_t
*) ((*it
).second
);
326 if ((float)wnt
->data
!=0)
328 //Modificación 180705: Filtrado de pesos
329 //Comprobamos que el peso a insertar en el fichero
330 //cumple con el filtrado de pesos.
331 if ( fabsf(wnt
->data
) > fabsf(filter
) ) //ADD 180705
333 out
<< " " << wnt
->pos
<< ":" << (float) wnt
->data
;
339 /*******************************************************/
342 * void wrWriteHash(hash_t *tptr,FILE *f, char separador)
343 * Escribe el contenido de un hash (tptr), en fichero apuntado por f.
344 * Entre cada pareja POS/PESO pone el caracter separador.
346 void weightRepository::wrWriteHash(hash_t
<weight_node_t
*> *tptr
,FILE *f
, char separador
)
351 for (hash_t
<weight_struct_t
*>::iterator it
= wr
.begin(); it
!= wr
.end(); it
++)
353 weight_node_t
*wnt
= (weight_node_t
*) ((*it
).second
);
355 if (separador
== '\n' && cont
==0) fprintf(f
,"%s %2.10f",wnt
->pos
.c_str(),(float)wnt
->data
);
356 else fprintf(f
,"%c%s:%2.10f",separador
,wnt
->pos
.c_str(),(float)wnt
->data
);