1 /*-------------------------------------------------------------------------
4 * helper utilities for tsearch
6 * Copyright (c) 1998-2025, PostgreSQL Global Development Group
8 * src/include/tsearch/ts_utils.h
10 *-------------------------------------------------------------------------
12 #ifndef _PG_TS_UTILS_H_
13 #define _PG_TS_UTILS_H_
15 #include "nodes/pg_list.h"
16 #include "tsearch/ts_public.h"
17 #include "tsearch/ts_type.h"
20 * Common parse definitions for tsvector and tsquery
23 /* tsvector parser support. */
25 struct TSVectorParseStateData
; /* opaque struct in tsvector_parser.c */
26 typedef struct TSVectorParseStateData
*TSVectorParseState
;
28 /* flag bits that can be passed to init_tsvector_parser: */
29 #define P_TSV_OPR_IS_DELIM (1 << 0)
30 #define P_TSV_IS_TSQUERY (1 << 1)
31 #define P_TSV_IS_WEB (1 << 2)
33 extern TSVectorParseState
init_tsvector_parser(char *input
, int flags
,
35 extern void reset_tsvector_parser(TSVectorParseState state
, char *input
);
36 extern bool gettoken_tsvector(TSVectorParseState state
,
37 char **strval
, int *lenval
,
38 WordEntryPos
**pos_ptr
, int *poslen
,
40 extern void close_tsvector_parser(TSVectorParseState state
);
42 /* phrase operator begins with '<' */
43 #define ISOPERATOR(x) \
44 ( pg_mblen(x) == 1 && ( *(x) == '!' || \
54 struct TSQueryParserStateData
; /* private in backend/utils/adt/tsquery.c */
55 typedef struct TSQueryParserStateData
*TSQueryParserState
;
57 typedef void (*PushFunction
) (Datum opaque
, TSQueryParserState state
,
58 char *token
, int tokenlen
,
59 int16 tokenweights
, /* bitmap as described in
60 * QueryOperand struct */
63 /* flag bits that can be passed to parse_tsquery: */
64 #define P_TSQ_PLAIN (1 << 0)
65 #define P_TSQ_WEB (1 << 1)
67 extern TSQuery
parse_tsquery(char *buf
,
73 /* Functions for use by PushFunction implementations */
74 extern void pushValue(TSQueryParserState state
,
75 char *strval
, int lenval
, int16 weight
, bool prefix
);
76 extern void pushStop(TSQueryParserState state
);
77 extern void pushOperator(TSQueryParserState state
, int8 oper
, int16 distance
);
80 * parse plain text and lexize words
84 uint16 flags
; /* currently, only TSL_PREFIX */
93 * When apos array is used, apos[0] is the number of elements in the
94 * array (excluding apos[0]), and alen is the allocated size of the
95 * array. We do not allow more than MAXNUMPOS array elements.
110 extern void parsetext(Oid cfgId
, ParsedText
*prs
, char *buf
, int32 buflen
);
113 * headline framework, flow in common to generate:
114 * 1 parse text with hlparsetext
115 * 2 parser-specific function to find part
116 * 3 generateHeadline to generate result text
119 extern void hlparsetext(Oid cfgId
, HeadlineParsedText
*prs
, TSQuery query
,
120 char *buf
, int32 buflen
);
121 extern text
*generateHeadline(HeadlineParsedText
*prs
);
124 * TSQuery execution support
126 * TS_execute() executes a tsquery against data that can be represented in
127 * various forms. The TSExecuteCallback callback function is called to check
128 * whether a given primitive tsquery value is matched in the data.
131 /* TS_execute requires ternary logic to handle NOT with phrase matches */
134 TS_NO
, /* definitely no match */
135 TS_YES
, /* definitely does match */
136 TS_MAYBE
, /* can't verify match for lack of pos data */
140 * struct ExecPhraseData is passed to a TSExecuteCallback function if we need
141 * lexeme position data (because of a phrase-match operator in the tsquery).
142 * The callback should fill in position data when it returns TS_YES (success).
143 * If it cannot return position data, it should leave "data" unchanged and
144 * return TS_MAYBE. The caller of TS_execute() must then arrange for a later
145 * recheck with position data available.
147 * The reported lexeme positions must be sorted and unique. Callers must only
148 * consult the position bits of the pos array, ie, WEP_GETPOS(data->pos[i]).
149 * This allows the returned "pos" to point directly to the WordEntryPos
150 * portion of a tsvector value. If "allocated" is true then the pos array
151 * is palloc'd workspace and caller may free it when done.
153 * "negate" means that the pos array contains positions where the query does
154 * not match, rather than positions where it does. "width" is positive when
155 * the match is wider than one lexeme. Neither of these fields normally need
156 * to be touched by TSExecuteCallback functions; they are used for
157 * phrase-search processing within TS_execute.
159 * All fields of the ExecPhraseData struct are initially zeroed by caller.
161 typedef struct ExecPhraseData
163 int npos
; /* number of positions reported */
164 bool allocated
; /* pos points to palloc'd data? */
165 bool negate
; /* positions are where query is NOT matched */
166 WordEntryPos
*pos
; /* ordered, non-duplicate lexeme positions */
167 int width
; /* width of match in lexemes, less 1 */
171 * Signature for TSQuery lexeme check functions
173 * arg: opaque value passed through from caller of TS_execute
174 * val: lexeme to test for presence of
175 * data: to be filled with lexeme positions; NULL if position data not needed
177 * Return TS_YES if lexeme is present in data, TS_MAYBE if it might be
178 * present, TS_NO if it definitely is not present. If data is not NULL,
179 * it must be filled with lexeme positions if available. If position data
180 * is not available, leave *data as zeroes and return TS_MAYBE, never TS_YES.
182 typedef TSTernaryValue (*TSExecuteCallback
) (void *arg
, QueryOperand
*val
,
183 ExecPhraseData
*data
);
186 * Flag bits for TS_execute
188 #define TS_EXEC_EMPTY (0x00)
190 * If TS_EXEC_SKIP_NOT is set, then NOT sub-expressions are automatically
191 * evaluated to be true. This was formerly the default behavior. It's now
192 * deprecated because it tends to give silly answers, but some applications
193 * might still have a use for it.
195 #define TS_EXEC_SKIP_NOT (0x01)
197 * If TS_EXEC_PHRASE_NO_POS is set, allow OP_PHRASE to be executed lossily
198 * in the absence of position information: a true result indicates that the
199 * phrase might be present. Without this flag, OP_PHRASE always returns
200 * false if lexeme position information is not available.
202 #define TS_EXEC_PHRASE_NO_POS (0x02)
204 extern bool TS_execute(QueryItem
*curitem
, void *arg
, uint32 flags
,
205 TSExecuteCallback chkcond
);
206 extern TSTernaryValue
TS_execute_ternary(QueryItem
*curitem
, void *arg
,
208 TSExecuteCallback chkcond
);
209 extern List
*TS_execute_locations(QueryItem
*curitem
, void *arg
,
211 TSExecuteCallback chkcond
);
212 extern bool tsquery_requires_match(QueryItem
*curitem
);
215 * to_ts* - text transformation to tsvector, tsquery
217 extern TSVector
make_tsvector(ParsedText
*prs
);
218 extern int32
tsCompareString(char *a
, int lena
, char *b
, int lenb
, bool prefix
);
221 * Possible strategy numbers for indexes
222 * TSearchStrategyNumber - (tsvector|text) @@ tsquery
223 * TSearchWithClassStrategyNumber - tsvector @@@ tsquery
225 #define TSearchStrategyNumber 1
226 #define TSearchWithClassStrategyNumber 2
231 extern QueryItem
*clean_NOT(QueryItem
*ptr
, int32
*len
);
232 extern TSQuery
cleanup_tsquery_stopwords(TSQuery in
, bool noisy
);
234 typedef struct QTNode
241 struct QTNode
**child
;
244 /* bits in QTNode.flags */
245 #define QTN_NEEDFREE 0x01
246 #define QTN_NOCHANGE 0x02
247 #define QTN_WORDFREE 0x04
249 typedef uint64 TSQuerySign
;
251 #define TSQS_SIGLEN (sizeof(TSQuerySign)*BITS_PER_BYTE)
254 TSQuerySignGetDatum(TSQuerySign X
)
256 return Int64GetDatum((int64
) X
);
259 static inline TSQuerySign
260 DatumGetTSQuerySign(Datum X
)
262 return (TSQuerySign
) DatumGetInt64(X
);
265 #define PG_RETURN_TSQUERYSIGN(X) return TSQuerySignGetDatum(X)
266 #define PG_GETARG_TSQUERYSIGN(n) DatumGetTSQuerySign(PG_GETARG_DATUM(n))
269 extern QTNode
*QT2QTN(QueryItem
*in
, char *operand
);
270 extern TSQuery
QTN2QT(QTNode
*in
);
271 extern void QTNFree(QTNode
*in
);
272 extern void QTNSort(QTNode
*in
);
273 extern void QTNTernary(QTNode
*in
);
274 extern void QTNBinary(QTNode
*in
);
275 extern int QTNodeCompare(QTNode
*an
, QTNode
*bn
);
276 extern QTNode
*QTNCopy(QTNode
*in
);
277 extern void QTNClearFlags(QTNode
*in
, uint32 flags
);
278 extern bool QTNEq(QTNode
*a
, QTNode
*b
);
279 extern TSQuerySign
makeTSQuerySign(TSQuery a
);
280 extern QTNode
*findsubquery(QTNode
*root
, QTNode
*ex
, QTNode
*subs
,
283 #endif /* _PG_TS_UTILS_H_ */