Updated source code from upstream SVN
[svmtool++.git] / src / weight.cc
blobc2ac05002bdce01754916e34a3c46156eacf04ad
1 // kate: replace-tabs on; indent-width 2; indent-mode cstyle; encoding latin15;
2 /*
3 * Copyright (C) 2004 Jesus Gimenez, Lluis Marquez and Senen Moya
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 #include <stdlib.h>
21 #include <stdio.h>
22 #include <string.h>
23 #include <iostream>
24 #include <sstream>
25 #include <cmath>
26 #include "hash.h"
27 #include "weight.h"
30 /***********************************************************/
33 * El objeto WeightRepository es el encargado de contener los pesos
34 * para cada pareja POS-feature. Un depósito de pesos está formado por
35 * un hash de objetos weight_struct_t, conteniendo los atributos (key).
36 * Cada uno de estos hash es poseedor de un nuevo hash con todas las POS
37 * para las cuales se ha encontrado el atributo y su respectivo peso.
38 * (weight_node_t).
41 /***********************************************************/
43 //Definición de weight_struct_t
44 class weight_struct_t
46 public:
47 std::string key;
48 hash_t<weight_node_t*> *hash;
50 // free inner memory and then free the hash.
51 ~weight_struct_t()
53 hash->hash_destroy();
59 /***********************************************************/
61 char weightRepository::wrSaltarBlancs(FILE *in, char c,int jmp)
63 while ((c==':') || (c==' ') || (c=='\n' && jmp==1)) c=fgetc(in);
64 return c;
67 /***********************************************************/
70 * void weightRepository::wrReadMergeModel(FILE *in,float filter)
71 * Parámetros:
72 * FILE *in : apuntador al fichero que ha de leer
73 * float filter: Valor para filtrar los pesos que se lean
74 * Este método carga un depósito de pesos de un fichero (f), filltrando
75 * los pesos que esten por debajo del límite marcado (filter)
77 void weightRepository::wrReadMergeModel(FILE *in,float filter)
79 char c=fgetc(in);
81 while (!feof(in))
83 //c = fgetc(in);
84 if (c!='#')
86 weight_struct_t *obj = new weight_struct_t;
87 obj->key = "";
89 while (c!=' ')
91 obj->key.push_back(c);;
92 c=fgetc(in);
95 obj->hash = new hash_t<weight_node_t*>();
96 obj->hash->hash_init(10);
98 while ((c!='\n') && (!feof(in)))
100 std::string pos;
102 c = wrSaltarBlancs(in,c,0);
103 while ((c!=':') && (!feof(in)))
105 // std::cerr << "Adding '" << c << "' to '" << w->pos << "'" << std::endl;
106 pos.push_back(c);;
107 c=fgetc(in);
110 c = wrSaltarBlancs(in,c,0);
112 std::string value;
113 while ((c!=' ') && (c!='\n') && (!feof(in)) )
115 value.push_back(c);;
116 c=fgetc(in);
119 long double data;
120 std::istringstream iss(value);
121 iss >> data;
122 weight_node_t* node = new weight_node_t();
123 node->data = data;
124 node->pos = pos;
125 // std::cerr << "weightRepository::wrReadMergeModel " << obj->key << " " << pos << " " << data << " " << (long)data << std::endl;
126 if ( fabsf(data) > fabsf(filter) )
127 obj->hash->hash_insert(pos,node);
128 else delete node;
131 c = wrSaltarBlancs(in,c,1);
133 wr.hash_insert(obj->key, obj);
135 else
137 char garbage[512];
138 fgets(garbage,512,in); //while(c=fgetc(in)!='\n');
139 c = fgetc(in);
144 /***********************************************************/
147 * long double weightRepository::wrGetWeight(std::string feature,std::string pos)
148 * Parámetros:
149 * std::string feature: Atributo
150 * std::string pos: Etiqueta morfosintáctica
151 * Lee el peso para el atributo y la etiqueta recibidos como parámetro.
153 long double weightRepository::wrGetWeight(const std::string& feature,const std::string& pos)
155 weight_struct_t *obj = wr.hash_lookup(feature);
156 if ((long)obj!=HASH_FAIL)
158 weight_node_t *ret = obj->hash->hash_lookup(pos);
160 if ((long)ret!=HASH_FAIL && ret != 0)
162 return ret->data;
165 return 0;
168 /***********************************************************/
171 * weightRepository(std::string fileName,float filter)
172 * Parámetros:
173 * std::string fileName : Nombre del fichero
174 * float filter: Valor para filtrar los pesos que se lean
175 * Constructor que carga el depóosito de pesos del fichero llamado
176 * fileName , filltrando los pesos que esten por debajo del límite
177 * marcado (filter)
179 weightRepository::weightRepository(const std::string& fileName,float filter)
181 // std::cerr << "weightRepository::weightRepository " << fileName << std::endl;
182 FILE *in;
183 if ((in = fopen(fileName.c_str(), "rt"))== NULL)
185 fprintf(stderr, "Error opening weightRepository: %s. It's going to work without it.\n",fileName.c_str());
186 exit(0);
188 wr.hash_init(10000);
189 wrReadMergeModel(in,filter);
190 fclose(in);
193 /***********************************************************/
196 * weightRepository()
197 * Contructor
199 weightRepository::weightRepository()
201 wr.hash_init(10000);
204 /***********************************************************/
207 * ~weightRepository()
208 * Destructor
210 weightRepository::~weightRepository()
212 wr.hash_destroy();
215 /*******************************************************/
218 * void wrAddPOS(int obj, std::string pos, long double weight)
219 * Parámetros:
220 * int obj: Apuntador al objeto que contiene el atributo
221 * std::string pos: Etiqueta a insertar:
222 * long double weight: Peso a asignar a la etiqueta
223 * Insertamos un nuevo peso para la etiqueta pos, en el atributo indicado
224 * por obj. Si la etiqueta ya existe se incrementa el peso con weight. Si
225 * no existe se añade.
227 void weightRepository::wrAddPOS(long unsigned int obj, const std::string& pos, long double weight)
229 weight_struct_t *wst = (weight_struct_t *)obj;
230 weight_node_t *wnt = wst->hash->hash_lookup(pos);
232 if ((long)wnt==HASH_FAIL)
234 //Insertamos Nueva POS
235 weight_node_t *w = new weight_node_t;
236 w->pos = pos;
237 w->data=weight;
238 wst->hash->hash_insert(w->pos,w);
240 else
241 { //Si POS ya esta, incrementamos el peso
242 wnt->data = wnt->data + weight;
246 /*******************************************************/
249 * void wrAdd(std::string feature, std::string pos, long double weight)
250 * Parámetros:
251 * std::string feature: Atributo a insertar
252 * std::string pos: Etiqueta a insertar
253 * long double weight: Peso a asignar a la etiqueta
254 * Insertamos un nuevo peso para para el atributo feature y la etiqueta pos.
256 void weightRepository::wrAdd(const std::string& feature, const std::string& pos, long double weight)
258 weight_struct_t *obj = wr.hash_lookup(feature);
260 if ( (long) obj == HASH_FAIL)
262 // Creamos nueva entrada en WeightRepository
263 obj = new weight_struct_t;
264 obj->key = feature;
265 obj->hash = new hash_t<weight_node_t*>;
266 obj->hash->hash_init(10);
267 //Añadimos el peso y la etiqueta
268 wrAddPOS((unsigned long)obj,pos,weight);
269 wr.hash_insert(obj->key, obj);
271 else
272 //Añadimos el peso y la etiqueta
273 wrAddPOS((unsigned long)obj,pos,weight);
276 /*******************************************************/
279 * wrWrite(std::string outName)
280 * Escribe el depósito de pesos en el fichero con nombre outName.
282 * Modificación 180705:
283 * Añadimos el parámetro "float filter", se utiliza para filtrar pesos
285 void weightRepository::wrWrite(const std::string& outName, float filter)
287 FILE *f;
289 if ((f = fopen(outName.c_str(), "w"))== NULL)
291 fprintf(stderr, "Error opening file: %s\n",outName.c_str());
292 exit(0);
295 //Recorremos el hash objeto a objeto
296 for (hash_t<weight_struct_t*>::iterator it = wr.begin(); it != wr.end(); it++)
298 weight_struct_t *wst = (weight_struct_t *) ((*it).second);
300 //Modificación 180705: añadimos filtrado de pesos
301 //std::string mrg = wrGetMergeInput(wst->hash); //DEL 180705
302 std::string mrg = wrGetMergeInput(wst->hash,filter); //ADD 180705
304 if (!mrg.empty()) fprintf(f,"%s%s\n",wst->key.c_str(),mrg.c_str());
305 }//for
306 fclose (f);
309 /*******************************************************/
311 * std::string wrGetMergeInput(hash_t *tptr)
312 * Devuelve una cadena de caracteres con todas las parejas
313 * POS/PESO contenidas en el hash (tptr) de un atributo.
315 * Modificación 180705:
316 * Añadimos parámetro "float filter" para filtrado de pesos
318 std::string weightRepository::wrGetMergeInput(hash_t<weight_node_t*> *tptr, float filter)
320 std::ostringstream out;
322 for (hash_t<weight_struct_t*>::iterator it = wr.begin(); it != wr.end(); it++)
324 weight_node_t *wnt = (weight_node_t *) ((*it).second);
326 if ((float)wnt->data!=0)
328 //Modificación 180705: Filtrado de pesos
329 //Comprobamos que el peso a insertar en el fichero
330 //cumple con el filtrado de pesos.
331 if ( fabsf(wnt->data) > fabsf(filter) ) //ADD 180705
332 // %s %s:%.18E
333 out << " " << wnt->pos << ":" << (float) wnt->data;
335 } //for
336 return out.str();
339 /*******************************************************/
342 * void wrWriteHash(hash_t *tptr,FILE *f, char separador)
343 * Escribe el contenido de un hash (tptr), en fichero apuntado por f.
344 * Entre cada pareja POS/PESO pone el caracter separador.
346 void weightRepository::wrWriteHash(hash_t<weight_node_t*> *tptr,FILE *f, char separador)
349 int cont=0;
351 for (hash_t<weight_struct_t*>::iterator it = wr.begin(); it != wr.end(); it++)
353 weight_node_t *wnt = (weight_node_t *) ((*it).second);
355 if (separador == '\n' && cont==0) fprintf(f,"%s %2.10f",wnt->pos.c_str(),(float)wnt->data);
356 else fprintf(f,"%c%s:%2.10f",separador,wnt->pos.c_str(),(float)wnt->data);
357 cont++;
358 } /* for */