1 /*-------------------------------------------------------------------------
4 * to_ts* function definitions
6 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
12 *-------------------------------------------------------------------------
16 #include "catalog/namespace.h"
17 #include "tsearch/ts_cache.h"
18 #include "tsearch/ts_utils.h"
19 #include "utils/builtins.h"
20 #include "utils/syscache.h"
24 get_current_ts_config(PG_FUNCTION_ARGS
)
26 PG_RETURN_OID(getTSCurrentConfig(true));
33 compareWORD(const void *a
, const void *b
)
37 res
= tsCompareString(
38 ((ParsedWord
*) a
)->word
, ((ParsedWord
*) a
)->len
,
39 ((ParsedWord
*) b
)->word
, ((ParsedWord
*) b
)->len
,
44 if (((ParsedWord
*) a
)->pos
.pos
== ((ParsedWord
*) b
)->pos
.pos
)
47 res
= (((ParsedWord
*) a
)->pos
.pos
> ((ParsedWord
*) b
)->pos
.pos
) ? 1 : -1;
54 uniqueWORD(ParsedWord
*a
, int4 l
)
62 tmppos
= LIMITPOS(a
->pos
.pos
);
64 a
->pos
.apos
= (uint16
*) palloc(sizeof(uint16
) * a
->alen
);
66 a
->pos
.apos
[1] = tmppos
;
74 * Sort words with its positions
76 qsort((void *) a
, l
, sizeof(ParsedWord
), compareWORD
);
79 * Initialize first word and its first position
81 tmppos
= LIMITPOS(a
->pos
.pos
);
83 a
->pos
.apos
= (uint16
*) palloc(sizeof(uint16
) * a
->alen
);
85 a
->pos
.apos
[1] = tmppos
;
88 * Summarize position information for each word
92 if (!(ptr
->len
== res
->len
&&
93 strncmp(ptr
->word
, res
->word
, res
->len
) == 0))
96 * Got a new word, so put it in result
100 res
->word
= ptr
->word
;
101 tmppos
= LIMITPOS(ptr
->pos
.pos
);
103 res
->pos
.apos
= (uint16
*) palloc(sizeof(uint16
) * res
->alen
);
104 res
->pos
.apos
[0] = 1;
105 res
->pos
.apos
[1] = tmppos
;
110 * The word already exists, so adjust position information. But
111 * before we should check size of position's array, max allowed
112 * value for position and uniqueness of position
115 if (res
->pos
.apos
[0] < MAXNUMPOS
- 1 && res
->pos
.apos
[res
->pos
.apos
[0]] != MAXENTRYPOS
- 1 &&
116 res
->pos
.apos
[res
->pos
.apos
[0]] != LIMITPOS(ptr
->pos
.pos
))
118 if (res
->pos
.apos
[0] + 1 >= res
->alen
)
121 res
->pos
.apos
= (uint16
*) repalloc(res
->pos
.apos
, sizeof(uint16
) * res
->alen
);
123 if (res
->pos
.apos
[0] == 0 || res
->pos
.apos
[res
->pos
.apos
[0]] != LIMITPOS(ptr
->pos
.pos
))
125 res
->pos
.apos
[res
->pos
.apos
[0] + 1] = LIMITPOS(ptr
->pos
.pos
);
137 * make value of tsvector, given parsed text
140 make_tsvector(ParsedText
*prs
)
151 prs
->curwords
= uniqueWORD(prs
->words
, prs
->curwords
);
152 for (i
= 0; i
< prs
->curwords
; i
++)
154 lenstr
+= prs
->words
[i
].len
;
155 if (prs
->words
[i
].alen
)
157 lenstr
= SHORTALIGN(lenstr
);
158 lenstr
+= sizeof(uint16
) + prs
->words
[i
].pos
.apos
[0] * sizeof(WordEntryPos
);
162 if (lenstr
> MAXSTRPOS
)
164 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED
),
165 errmsg("string is too long for tsvector (%d bytes, max %d bytes)", lenstr
, MAXSTRPOS
)));
167 totallen
= CALCDATASIZE(prs
->curwords
, lenstr
);
168 in
= (TSVector
) palloc0(totallen
);
169 SET_VARSIZE(in
, totallen
);
170 in
->size
= prs
->curwords
;
175 for (i
= 0; i
< prs
->curwords
; i
++)
177 ptr
->len
= prs
->words
[i
].len
;
179 memcpy(str
+ stroff
, prs
->words
[i
].word
, prs
->words
[i
].len
);
180 stroff
+= prs
->words
[i
].len
;
181 pfree(prs
->words
[i
].word
);
182 if (prs
->words
[i
].alen
)
184 int k
= prs
->words
[i
].pos
.apos
[0];
188 elog(ERROR
, "positions array too long");
191 stroff
= SHORTALIGN(stroff
);
192 *(uint16
*) (str
+ stroff
) = (uint16
) k
;
193 wptr
= POSDATAPTR(in
, ptr
);
194 for (j
= 0; j
< k
; j
++)
196 WEP_SETWEIGHT(wptr
[j
], 0);
197 WEP_SETPOS(wptr
[j
], prs
->words
[i
].pos
.apos
[j
+ 1]);
199 stroff
+= sizeof(uint16
) + k
* sizeof(WordEntryPos
);
200 pfree(prs
->words
[i
].pos
.apos
);
211 to_tsvector_byid(PG_FUNCTION_ARGS
)
213 Oid cfgId
= PG_GETARG_OID(0);
214 text
*in
= PG_GETARG_TEXT_P(1);
218 prs
.lenwords
= (VARSIZE(in
) - VARHDRSZ
) / 6; /* just estimation of
220 if (prs
.lenwords
== 0)
224 prs
.words
= (ParsedWord
*) palloc(sizeof(ParsedWord
) * prs
.lenwords
);
226 parsetext(cfgId
, &prs
, VARDATA(in
), VARSIZE(in
) - VARHDRSZ
);
227 PG_FREE_IF_COPY(in
, 1);
230 out
= make_tsvector(&prs
);
234 out
= palloc(CALCDATASIZE(0, 0));
235 SET_VARSIZE(out
, CALCDATASIZE(0, 0));
239 PG_RETURN_POINTER(out
);
243 to_tsvector(PG_FUNCTION_ARGS
)
245 text
*in
= PG_GETARG_TEXT_P(0);
248 cfgId
= getTSCurrentConfig(true);
249 PG_RETURN_DATUM(DirectFunctionCall2(to_tsvector_byid
,
250 ObjectIdGetDatum(cfgId
),
251 PointerGetDatum(in
)));
260 * This function is used for morph parsing.
262 * The value is passed to parsetext which will call the right dictionary to
263 * lexize the word. If it turns out to be a stopword, we push a QI_VALSTOP
266 * All words belonging to the same variant are pushed as an ANDed list,
267 * and different variants are ORred together.
270 pushval_morph(Datum opaque
, TSQueryParserState state
, char *strval
, int lenval
, int2 weight
, bool prefix
)
279 Oid cfg_id
= DatumGetObjectId(opaque
); /* the input is actually
280 * an Oid, not a pointer */
285 prs
.words
= (ParsedWord
*) palloc(sizeof(ParsedWord
) * prs
.lenwords
);
287 parsetext(cfg_id
, &prs
, strval
, lenval
);
289 if (prs
.curwords
> 0)
292 while (count
< prs
.curwords
)
294 pos
= prs
.words
[count
].pos
.pos
;
296 while (count
< prs
.curwords
&& pos
== prs
.words
[count
].pos
.pos
)
298 variant
= prs
.words
[count
].nvariant
;
301 while (count
< prs
.curwords
&& pos
== prs
.words
[count
].pos
.pos
&& variant
== prs
.words
[count
].nvariant
)
304 pushValue(state
, prs
.words
[count
].word
, prs
.words
[count
].len
, weight
,
305 ( (prs
.words
[count
].flags
& TSL_PREFIX
) || prefix
) ? true : false );
306 pfree(prs
.words
[count
].word
);
308 pushOperator(state
, OP_AND
);
314 pushOperator(state
, OP_OR
);
319 pushOperator(state
, OP_AND
);
332 to_tsquery_byid(PG_FUNCTION_ARGS
)
334 Oid cfgid
= PG_GETARG_OID(0);
335 text
*in
= PG_GETARG_TEXT_P(1);
340 query
= parse_tsquery(text_to_cstring(in
), pushval_morph
, ObjectIdGetDatum(cfgid
), false);
342 if (query
->size
== 0)
343 PG_RETURN_TSQUERY(query
);
345 res
= clean_fakeval(GETQUERY(query
), &len
);
348 SET_VARSIZE(query
, HDRSIZETQ
);
350 PG_RETURN_POINTER(query
);
352 memcpy((void *) GETQUERY(query
), (void *) res
, len
* sizeof(QueryItem
));
354 if ( len
!= query
->size
) {
355 char *oldoperand
= GETOPERAND(query
);
356 int4 lenoperand
= VARSIZE(query
) - (oldoperand
- (char*)query
);
358 Assert( len
< query
->size
);
361 memcpy((void *) GETOPERAND(query
), oldoperand
, VARSIZE(query
) - (oldoperand
- (char*)query
) );
362 SET_VARSIZE(query
, COMPUTESIZE( len
, lenoperand
));
366 PG_RETURN_TSQUERY(query
);
370 to_tsquery(PG_FUNCTION_ARGS
)
372 text
*in
= PG_GETARG_TEXT_P(0);
375 cfgId
= getTSCurrentConfig(true);
376 PG_RETURN_DATUM(DirectFunctionCall2(to_tsquery_byid
,
377 ObjectIdGetDatum(cfgId
),
378 PointerGetDatum(in
)));
382 plainto_tsquery_byid(PG_FUNCTION_ARGS
)
384 Oid cfgid
= PG_GETARG_OID(0);
385 text
*in
= PG_GETARG_TEXT_P(1);
390 query
= parse_tsquery(text_to_cstring(in
), pushval_morph
, ObjectIdGetDatum(cfgid
), true);
392 if (query
->size
== 0)
393 PG_RETURN_TSQUERY(query
);
395 res
= clean_fakeval(GETQUERY(query
), &len
);
398 SET_VARSIZE(query
, HDRSIZETQ
);
400 PG_RETURN_POINTER(query
);
402 memcpy((void *) GETQUERY(query
), (void *) res
, len
* sizeof(QueryItem
));
404 if ( len
!= query
->size
) {
405 char *oldoperand
= GETOPERAND(query
);
406 int4 lenoperand
= VARSIZE(query
) - (oldoperand
- (char*)query
);
408 Assert( len
< query
->size
);
411 memcpy((void *) GETOPERAND(query
), oldoperand
, lenoperand
);
412 SET_VARSIZE(query
, COMPUTESIZE( len
, lenoperand
));
416 PG_RETURN_POINTER(query
);
420 plainto_tsquery(PG_FUNCTION_ARGS
)
422 text
*in
= PG_GETARG_TEXT_P(0);
425 cfgId
= getTSCurrentConfig(true);
426 PG_RETURN_DATUM(DirectFunctionCall2(plainto_tsquery_byid
,
427 ObjectIdGetDatum(cfgId
),
428 PointerGetDatum(in
)));