sync master with lastest vba changes
[ooovba.git] / lingucomponent / source / thesaurus / mythes / mythes.cxx
blobe4ab197ab9043890c93cbdad0e75f45a958d684c
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: mythes.cxx,v $
10 * $Revision: 1.9 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
32 // MARKER(update_precomp.py): autogen include statement, do not remove
33 #include "precompiled_lingucomponent.hxx"
34 #include "license.readme"
35 #include <stdio.h>
36 #include <string.h>
37 #include <stdlib.h>
38 #include <errno.h>
40 #include "mythes.hxx"
44 MyThes::MyThes(const char* idxpath, const char * datpath)
46 nw = 0;
47 encoding = NULL;
48 list = NULL;
49 offst = NULL;
51 if (thInitialize(idxpath, datpath) != 1) {
52 fprintf(stderr,"Error - can't open %s or %s\n",idxpath, datpath);
53 fflush(stderr);
54 thCleanup();
55 // did not initialize properly - throw exception?
60 MyThes::~MyThes()
62 thCleanup();
66 int MyThes::thInitialize(const char* idxpath, const char* datpath)
69 // open the index file
70 FILE * pifile = fopen(idxpath,"r");
71 if (!pifile) {
72 return 0;
75 // parse in encoding and index size */
76 char * wrd;
77 wrd = (char *)calloc(1, MAX_WD_LEN);
78 if (!wrd) {
79 fprintf(stderr,"Error - bad memory allocation\n");
80 fflush(stderr);
81 fclose(pifile);
82 return 0;
84 int len = readLine(pifile,wrd,MAX_WD_LEN);
85 encoding = mystrdup(wrd);
86 len = readLine(pifile,wrd,MAX_WD_LEN);
87 int idxsz = atoi(wrd);
90 // now allocate list, offst for the given size
91 list = (char**) calloc(idxsz,sizeof(char*));
92 offst = (unsigned int*) calloc(idxsz,sizeof(unsigned int));
94 if ( (!(list)) || (!(offst)) ) {
95 fprintf(stderr,"Error - bad memory allocation\n");
96 fflush(stderr);
97 fclose(pifile);
98 return 0;
101 // now parse the remaining lines of the index
102 len = readLine(pifile,wrd,MAX_WD_LEN);
103 while (len > 0)
105 int np = mystr_indexOfChar(wrd,'|');
106 if (nw < idxsz) {
107 if (np >= 0) {
108 *(wrd+np) = '\0';
109 list[nw] = (char *)calloc(1,(np+1));
110 if (!list[nw]) {
111 fprintf(stderr,"Error - bad memory allocation\n");
112 fflush(stderr);
113 fclose(pifile);
114 return 0;
116 memcpy((list[nw]),wrd,np);
117 offst[nw] = atoi(wrd+np+1);
118 nw++;
121 len = readLine(pifile,wrd,MAX_WD_LEN);
124 free((void *)wrd);
125 fclose(pifile);
127 /* next open the data file */
128 pdfile = fopen(datpath,"r");
129 if (!pdfile) {
130 return 0;
133 return 1;
137 void MyThes::thCleanup()
139 /* first close the data file */
140 if (pdfile) {
141 fclose(pdfile);
142 pdfile=NULL;
145 if (list)
147 /* now free up all the allocated strings on the list */
148 for (int i=0; i < nw; i++)
150 if (list[i]) {
151 free(list[i]);
152 list[i] = 0;
155 free((void*)list);
158 if (encoding) free((void*)encoding);
159 if (offst) free((void*)offst);
161 encoding = NULL;
162 list = NULL;
163 offst = NULL;
164 nw = 0;
169 // lookup text in index and count of meanings and a list of meaning entries
170 // with each entry having a synonym count and pointer to an
171 // array of char * (i.e the synonyms)
173 // note: calling routine should call CleanUpAfterLookup with the original
174 // meaning point and count to properly deallocate memory
176 int MyThes::Lookup(const char * pText, int len, mentry** pme)
179 *pme = NULL;
181 // handle the case of missing file or file related errors
182 if (! pdfile) return 0;
184 long offset = 0;
186 /* copy search word and make sure null terminated */
187 char * wrd = (char *) calloc(1,(len+1));
188 memcpy(wrd,pText,len);
190 /* find it in the list */
191 int idx = nw > 0 ? binsearch(wrd,list,nw) : -1;
192 free(wrd);
193 if (idx < 0) return 0;
195 // now seek to the offset
196 offset = (long) offst[idx];
197 int rc = fseek(pdfile,offset,SEEK_SET);
198 if (rc) {
199 return 0;
202 // grab the count of the number of meanings
203 // and allocate a list of meaning entries
204 char * buf = NULL;
205 buf = (char *) malloc( MAX_LN_LEN );
206 if (!buf) return 0;
207 readLine(pdfile, buf, (MAX_LN_LEN-1));
208 int np = mystr_indexOfChar(buf,'|');
209 if (np < 0) {
210 free(buf);
211 return 0;
213 int nmeanings = atoi(buf+np+1);
214 *pme = (mentry*) malloc( nmeanings * sizeof(mentry) );
215 if (!(*pme)) {
216 free(buf);
217 return 0;
220 // now read in each meaning and parse it to get defn, count and synonym lists
221 mentry* pm = *(pme);
222 char dfn[MAX_WD_LEN];
224 for (int j = 0; j < nmeanings; j++) {
225 readLine(pdfile, buf, (MAX_LN_LEN-1));
227 pm->count = 0;
228 pm->psyns = NULL;
229 pm->defn = NULL;
231 // store away the part of speech for later use
232 char * p = buf;
233 char * pos = NULL;
234 np = mystr_indexOfChar(p,'|');
235 if (np >= 0) {
236 *(buf+np) = '\0';
237 pos = mystrdup(p);
238 p = p + np + 1;
239 } else {
240 pos = mystrdup("");
243 // count the number of fields in the remaining line
244 int nf = 1;
245 char * d = p;
246 np = mystr_indexOfChar(d,'|');
247 while ( np >= 0 ) {
248 nf++;
249 d = d + np + 1;
250 np = mystr_indexOfChar(d,'|');
252 pm->count = nf;
253 pm->psyns = (char **) malloc(nf*sizeof(char*));
255 // fill in the synonym list
256 d = p;
257 for (int jj = 0; jj < nf; jj++)
259 np = mystr_indexOfChar(d,'|');
260 if (np > 0)
262 *(d+np) = '\0';
263 pm->psyns[jj] = mystrdup(d);
264 d = d + np + 1;
266 else
268 pm->psyns[jj] = mystrdup(d);
272 // add pos to first synonym to create the definition
273 int k = strlen(pos);
274 int m = strlen(pm->psyns[0]);
275 if ((k+m) < (MAX_WD_LEN - 1)) {
276 strncpy(dfn,pos,k);
277 *(dfn+k) = ' ';
278 strncpy((dfn+k+1),(pm->psyns[0]),m+1);
279 pm->defn = mystrdup(dfn);
280 } else {
281 pm->defn = mystrdup(pm->psyns[0]);
283 free(pos);
284 pm++;
287 free(buf);
289 return nmeanings;
294 void MyThes::CleanUpAfterLookup(mentry ** pme, int nmeanings)
297 if (nmeanings == 0) return;
298 if ((*pme) == NULL) return;
300 mentry * pm = *pme;
302 for (int i = 0; i < nmeanings; i++) {
303 int count = pm->count;
304 for (int j = 0; j < count; j++) {
305 if (pm->psyns[j]) free(pm->psyns[j]);
306 pm->psyns[j] = NULL;
308 if (pm->psyns) free(pm->psyns);
309 pm->psyns = NULL;
310 if (pm->defn) free(pm->defn);
311 pm->defn = NULL;
312 pm->count = 0;
313 pm++;
315 pm = *pme;
316 free(pm);
317 *pme = NULL;
318 return;
322 // read a line of text from a text file stripping
323 // off the line terminator and replacing it with
324 // a null string terminator.
325 // returns: -1 on error or the number of characters in
326 // in the returning string
328 // A maximum of nc characters will be returned
330 int MyThes::readLine(FILE * pf, char * buf, int nc)
333 if (fgets(buf,nc,pf)) {
334 mychomp(buf);
335 return strlen(buf);
337 return -1;
342 // performs a binary search on null terminated character
343 // strings
345 // returns: -1 on not found
346 // index of wrd in the list[]
348 int MyThes::binsearch(char * sw, char* _list[], int nlst)
350 int lp, up, mp, j, indx;
351 lp = 0;
352 up = nlst-1;
353 indx = -1;
354 if (strcmp(sw,_list[lp]) < 0) return -1;
355 if (strcmp(sw,_list[up]) > 0) return -1;
356 while (indx < 0 ) {
357 mp = (int)((lp+up) >> 1);
358 j = strcmp(sw,_list[mp]);
359 if ( j > 0) {
360 lp = mp + 1;
361 } else if (j < 0 ) {
362 up = mp - 1;
363 } else {
364 indx = mp;
366 if (lp > up) return -1;
368 return indx;
371 char * MyThes::get_th_encoding()
373 if (encoding) return encoding;
374 return NULL;
378 // string duplication routine
379 char * MyThes::mystrdup(const char * p)
381 int sl = strlen(p) + 1;
382 char * d = (char *)malloc(sl);
383 if (d) {
384 memcpy(d,p,sl);
385 return d;
387 return NULL;
390 // remove cross-platform text line end characters
391 void MyThes::mychomp(char * s)
393 int k = strlen(s);
394 if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
395 if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
399 // return index of char in string
400 int MyThes::mystr_indexOfChar(const char * d, int c)
402 char * p = strchr((char *)d,c);
403 if (p) return (int)(p-d);
404 return -1;