Fix oversight in previous error-reporting patch; mustn't pfree path string
[PostgreSQL.git] / src / backend / tsearch / to_tsany.c
blob99d0b0e9bfd9633d193ca8e874ab239facc70ccd
1 /*-------------------------------------------------------------------------
3 * to_tsany.c
4 * to_ts* function definitions
6 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
9 * IDENTIFICATION
10 * $PostgreSQL$
12 *-------------------------------------------------------------------------
14 #include "postgres.h"
16 #include "catalog/namespace.h"
17 #include "tsearch/ts_cache.h"
18 #include "tsearch/ts_utils.h"
19 #include "utils/builtins.h"
20 #include "utils/syscache.h"
23 Datum
24 get_current_ts_config(PG_FUNCTION_ARGS)
26 PG_RETURN_OID(getTSCurrentConfig(true));
30 * to_tsvector
32 static int
33 compareWORD(const void *a, const void *b)
35 int res;
37 res = tsCompareString(
38 ((ParsedWord *) a)->word, ((ParsedWord *) a)->len,
39 ((ParsedWord *) b)->word, ((ParsedWord *) b)->len,
40 false );
42 if (res == 0)
44 if (((ParsedWord *) a)->pos.pos == ((ParsedWord *) b)->pos.pos)
45 return 0;
47 res = (((ParsedWord *) a)->pos.pos > ((ParsedWord *) b)->pos.pos) ? 1 : -1;
50 return res;
53 static int
54 uniqueWORD(ParsedWord *a, int4 l)
56 ParsedWord *ptr,
57 *res;
58 int tmppos;
60 if (l == 1)
62 tmppos = LIMITPOS(a->pos.pos);
63 a->alen = 2;
64 a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
65 a->pos.apos[0] = 1;
66 a->pos.apos[1] = tmppos;
67 return l;
70 res = a;
71 ptr = a + 1;
74 * Sort words with its positions
76 qsort((void *) a, l, sizeof(ParsedWord), compareWORD);
79 * Initialize first word and its first position
81 tmppos = LIMITPOS(a->pos.pos);
82 a->alen = 2;
83 a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
84 a->pos.apos[0] = 1;
85 a->pos.apos[1] = tmppos;
88 * Summarize position information for each word
90 while (ptr - a < l)
92 if (!(ptr->len == res->len &&
93 strncmp(ptr->word, res->word, res->len) == 0))
96 * Got a new word, so put it in result
98 res++;
99 res->len = ptr->len;
100 res->word = ptr->word;
101 tmppos = LIMITPOS(ptr->pos.pos);
102 res->alen = 2;
103 res->pos.apos = (uint16 *) palloc(sizeof(uint16) * res->alen);
104 res->pos.apos[0] = 1;
105 res->pos.apos[1] = tmppos;
107 else
110 * The word already exists, so adjust position information. But
111 * before we should check size of position's array, max allowed
112 * value for position and uniqueness of position
114 pfree(ptr->word);
115 if (res->pos.apos[0] < MAXNUMPOS - 1 && res->pos.apos[res->pos.apos[0]] != MAXENTRYPOS - 1 &&
116 res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos))
118 if (res->pos.apos[0] + 1 >= res->alen)
120 res->alen *= 2;
121 res->pos.apos = (uint16 *) repalloc(res->pos.apos, sizeof(uint16) * res->alen);
123 if (res->pos.apos[0] == 0 || res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos))
125 res->pos.apos[res->pos.apos[0] + 1] = LIMITPOS(ptr->pos.pos);
126 res->pos.apos[0]++;
130 ptr++;
133 return res + 1 - a;
137 * make value of tsvector, given parsed text
139 TSVector
140 make_tsvector(ParsedText *prs)
142 int i,
144 lenstr = 0,
145 totallen;
146 TSVector in;
147 WordEntry *ptr;
148 char *str;
149 int stroff;
151 prs->curwords = uniqueWORD(prs->words, prs->curwords);
152 for (i = 0; i < prs->curwords; i++)
154 lenstr += prs->words[i].len;
155 if (prs->words[i].alen)
157 lenstr = SHORTALIGN(lenstr);
158 lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
162 if (lenstr > MAXSTRPOS)
163 ereport(ERROR,
164 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
165 errmsg("string is too long for tsvector (%d bytes, max %d bytes)", lenstr, MAXSTRPOS)));
167 totallen = CALCDATASIZE(prs->curwords, lenstr);
168 in = (TSVector) palloc0(totallen);
169 SET_VARSIZE(in, totallen);
170 in->size = prs->curwords;
172 ptr = ARRPTR(in);
173 str = STRPTR(in);
174 stroff = 0;
175 for (i = 0; i < prs->curwords; i++)
177 ptr->len = prs->words[i].len;
178 ptr->pos = stroff;
179 memcpy(str + stroff, prs->words[i].word, prs->words[i].len);
180 stroff += prs->words[i].len;
181 pfree(prs->words[i].word);
182 if (prs->words[i].alen)
184 int k = prs->words[i].pos.apos[0];
185 WordEntryPos *wptr;
187 if (k > 0xFFFF)
188 elog(ERROR, "positions array too long");
190 ptr->haspos = 1;
191 stroff = SHORTALIGN(stroff);
192 *(uint16 *) (str + stroff) = (uint16) k;
193 wptr = POSDATAPTR(in, ptr);
194 for (j = 0; j < k; j++)
196 WEP_SETWEIGHT(wptr[j], 0);
197 WEP_SETPOS(wptr[j], prs->words[i].pos.apos[j + 1]);
199 stroff += sizeof(uint16) + k * sizeof(WordEntryPos);
200 pfree(prs->words[i].pos.apos);
202 else
203 ptr->haspos = 0;
204 ptr++;
206 pfree(prs->words);
207 return in;
210 Datum
211 to_tsvector_byid(PG_FUNCTION_ARGS)
213 Oid cfgId = PG_GETARG_OID(0);
214 text *in = PG_GETARG_TEXT_P(1);
215 ParsedText prs;
216 TSVector out;
218 prs.lenwords = (VARSIZE(in) - VARHDRSZ) / 6; /* just estimation of
219 * word's number */
220 if (prs.lenwords == 0)
221 prs.lenwords = 2;
222 prs.curwords = 0;
223 prs.pos = 0;
224 prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
226 parsetext(cfgId, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
227 PG_FREE_IF_COPY(in, 1);
229 if (prs.curwords)
230 out = make_tsvector(&prs);
231 else
233 pfree(prs.words);
234 out = palloc(CALCDATASIZE(0, 0));
235 SET_VARSIZE(out, CALCDATASIZE(0, 0));
236 out->size = 0;
239 PG_RETURN_POINTER(out);
242 Datum
243 to_tsvector(PG_FUNCTION_ARGS)
245 text *in = PG_GETARG_TEXT_P(0);
246 Oid cfgId;
248 cfgId = getTSCurrentConfig(true);
249 PG_RETURN_DATUM(DirectFunctionCall2(to_tsvector_byid,
250 ObjectIdGetDatum(cfgId),
251 PointerGetDatum(in)));
255 * to_tsquery
260 * This function is used for morph parsing.
262 * The value is passed to parsetext which will call the right dictionary to
263 * lexize the word. If it turns out to be a stopword, we push a QI_VALSTOP
264 * to the stack.
266 * All words belonging to the same variant are pushed as an ANDed list,
267 * and different variants are ORred together.
269 static void
270 pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, int2 weight, bool prefix)
272 int4 count = 0;
273 ParsedText prs;
274 uint32 variant,
275 pos,
276 cntvar = 0,
277 cntpos = 0,
278 cnt = 0;
279 Oid cfg_id = DatumGetObjectId(opaque); /* the input is actually
280 * an Oid, not a pointer */
282 prs.lenwords = 4;
283 prs.curwords = 0;
284 prs.pos = 0;
285 prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
287 parsetext(cfg_id, &prs, strval, lenval);
289 if (prs.curwords > 0)
292 while (count < prs.curwords)
294 pos = prs.words[count].pos.pos;
295 cntvar = 0;
296 while (count < prs.curwords && pos == prs.words[count].pos.pos)
298 variant = prs.words[count].nvariant;
300 cnt = 0;
301 while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant)
304 pushValue(state, prs.words[count].word, prs.words[count].len, weight,
305 ( (prs.words[count].flags & TSL_PREFIX) || prefix ) ? true : false );
306 pfree(prs.words[count].word);
307 if (cnt)
308 pushOperator(state, OP_AND);
309 cnt++;
310 count++;
313 if (cntvar)
314 pushOperator(state, OP_OR);
315 cntvar++;
318 if (cntpos)
319 pushOperator(state, OP_AND);
321 cntpos++;
324 pfree(prs.words);
327 else
328 pushStop(state);
331 Datum
332 to_tsquery_byid(PG_FUNCTION_ARGS)
334 Oid cfgid = PG_GETARG_OID(0);
335 text *in = PG_GETARG_TEXT_P(1);
336 TSQuery query;
337 QueryItem *res;
338 int4 len;
340 query = parse_tsquery(text_to_cstring(in), pushval_morph, ObjectIdGetDatum(cfgid), false);
342 if (query->size == 0)
343 PG_RETURN_TSQUERY(query);
345 res = clean_fakeval(GETQUERY(query), &len);
346 if (!res)
348 SET_VARSIZE(query, HDRSIZETQ);
349 query->size = 0;
350 PG_RETURN_POINTER(query);
352 memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem));
354 if ( len != query->size ) {
355 char *oldoperand = GETOPERAND(query);
356 int4 lenoperand = VARSIZE(query) - (oldoperand - (char*)query);
358 Assert( len < query->size );
360 query->size = len;
361 memcpy((void *) GETOPERAND(query), oldoperand, VARSIZE(query) - (oldoperand - (char*)query) );
362 SET_VARSIZE(query, COMPUTESIZE( len, lenoperand ));
365 pfree(res);
366 PG_RETURN_TSQUERY(query);
369 Datum
370 to_tsquery(PG_FUNCTION_ARGS)
372 text *in = PG_GETARG_TEXT_P(0);
373 Oid cfgId;
375 cfgId = getTSCurrentConfig(true);
376 PG_RETURN_DATUM(DirectFunctionCall2(to_tsquery_byid,
377 ObjectIdGetDatum(cfgId),
378 PointerGetDatum(in)));
381 Datum
382 plainto_tsquery_byid(PG_FUNCTION_ARGS)
384 Oid cfgid = PG_GETARG_OID(0);
385 text *in = PG_GETARG_TEXT_P(1);
386 TSQuery query;
387 QueryItem *res;
388 int4 len;
390 query = parse_tsquery(text_to_cstring(in), pushval_morph, ObjectIdGetDatum(cfgid), true);
392 if (query->size == 0)
393 PG_RETURN_TSQUERY(query);
395 res = clean_fakeval(GETQUERY(query), &len);
396 if (!res)
398 SET_VARSIZE(query, HDRSIZETQ);
399 query->size = 0;
400 PG_RETURN_POINTER(query);
402 memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem));
404 if ( len != query->size ) {
405 char *oldoperand = GETOPERAND(query);
406 int4 lenoperand = VARSIZE(query) - (oldoperand - (char*)query);
408 Assert( len < query->size );
410 query->size = len;
411 memcpy((void *) GETOPERAND(query), oldoperand, lenoperand );
412 SET_VARSIZE(query, COMPUTESIZE( len, lenoperand ));
415 pfree(res);
416 PG_RETURN_POINTER(query);
419 Datum
420 plainto_tsquery(PG_FUNCTION_ARGS)
422 text *in = PG_GETARG_TEXT_P(0);
423 Oid cfgId;
425 cfgId = getTSCurrentConfig(true);
426 PG_RETURN_DATUM(DirectFunctionCall2(plainto_tsquery_byid,
427 ObjectIdGetDatum(cfgId),
428 PointerGetDatum(in)));