1 /*-------------------------------------------------------------------------
4 * helper utilities for tsearch
6 * Copyright (c) 1998-2021, PostgreSQL Global Development Group
8 * src/include/tsearch/ts_utils.h
10 *-------------------------------------------------------------------------
12 #ifndef _PG_TS_UTILS_H_
13 #define _PG_TS_UTILS_H_
15 #include "nodes/pg_list.h"
16 #include "tsearch/ts_public.h"
17 #include "tsearch/ts_type.h"
20 * Common parse definitions for tsvector and tsquery
23 /* tsvector parser support. */
25 struct TSVectorParseStateData
; /* opaque struct in tsvector_parser.c */
26 typedef struct TSVectorParseStateData
*TSVectorParseState
;
28 #define P_TSV_OPR_IS_DELIM (1 << 0)
29 #define P_TSV_IS_TSQUERY (1 << 1)
30 #define P_TSV_IS_WEB (1 << 2)
32 extern TSVectorParseState
init_tsvector_parser(char *input
, int flags
);
33 extern void reset_tsvector_parser(TSVectorParseState state
, char *input
);
34 extern bool gettoken_tsvector(TSVectorParseState state
,
35 char **token
, int *len
,
36 WordEntryPos
**pos
, int *poslen
,
38 extern void close_tsvector_parser(TSVectorParseState state
);
40 /* phrase operator begins with '<' */
41 #define ISOPERATOR(x) \
42 ( pg_mblen(x) == 1 && ( *(x) == '!' || \
52 struct TSQueryParserStateData
; /* private in backend/utils/adt/tsquery.c */
53 typedef struct TSQueryParserStateData
*TSQueryParserState
;
55 typedef void (*PushFunction
) (Datum opaque
, TSQueryParserState state
,
56 char *token
, int tokenlen
,
57 int16 tokenweights
, /* bitmap as described in
58 * QueryOperand struct */
61 #define P_TSQ_PLAIN (1 << 0)
62 #define P_TSQ_WEB (1 << 1)
64 extern TSQuery
parse_tsquery(char *buf
,
69 /* Functions for use by PushFunction implementations */
70 extern void pushValue(TSQueryParserState state
,
71 char *strval
, int lenval
, int16 weight
, bool prefix
);
72 extern void pushStop(TSQueryParserState state
);
73 extern void pushOperator(TSQueryParserState state
, int8 oper
, int16 distance
);
76 * parse plain text and lexize words
87 * When apos array is used, apos[0] is the number of elements in the
88 * array (excluding apos[0]), and alen is the allocated size of the
93 uint16 flags
; /* currently, only TSL_PREFIX */
106 extern void parsetext(Oid cfgId
, ParsedText
*prs
, char *buf
, int32 buflen
);
109 * headline framework, flow in common to generate:
110 * 1 parse text with hlparsetext
111 * 2 parser-specific function to find part
112 * 3 generateHeadline to generate result text
115 extern void hlparsetext(Oid cfgId
, HeadlineParsedText
*prs
, TSQuery query
,
116 char *buf
, int32 buflen
);
117 extern text
*generateHeadline(HeadlineParsedText
*prs
);
120 * TSQuery execution support
122 * TS_execute() executes a tsquery against data that can be represented in
123 * various forms. The TSExecuteCallback callback function is called to check
124 * whether a given primitive tsquery value is matched in the data.
127 /* TS_execute requires ternary logic to handle NOT with phrase matches */
130 TS_NO
, /* definitely no match */
131 TS_YES
, /* definitely does match */
132 TS_MAYBE
/* can't verify match for lack of pos data */
136 * struct ExecPhraseData is passed to a TSExecuteCallback function if we need
137 * lexeme position data (because of a phrase-match operator in the tsquery).
138 * The callback should fill in position data when it returns TS_YES (success).
139 * If it cannot return position data, it should leave "data" unchanged and
140 * return TS_MAYBE. The caller of TS_execute() must then arrange for a later
141 * recheck with position data available.
143 * The reported lexeme positions must be sorted and unique. Callers must only
144 * consult the position bits of the pos array, ie, WEP_GETPOS(data->pos[i]).
145 * This allows the returned "pos" to point directly to the WordEntryPos
146 * portion of a tsvector value. If "allocated" is true then the pos array
147 * is palloc'd workspace and caller may free it when done.
149 * "negate" means that the pos array contains positions where the query does
150 * not match, rather than positions where it does. "width" is positive when
151 * the match is wider than one lexeme. Neither of these fields normally need
152 * to be touched by TSExecuteCallback functions; they are used for
153 * phrase-search processing within TS_execute.
155 * All fields of the ExecPhraseData struct are initially zeroed by caller.
157 typedef struct ExecPhraseData
159 int npos
; /* number of positions reported */
160 bool allocated
; /* pos points to palloc'd data? */
161 bool negate
; /* positions are where query is NOT matched */
162 WordEntryPos
*pos
; /* ordered, non-duplicate lexeme positions */
163 int width
; /* width of match in lexemes, less 1 */
167 * Signature for TSQuery lexeme check functions
169 * arg: opaque value passed through from caller of TS_execute
170 * val: lexeme to test for presence of
171 * data: to be filled with lexeme positions; NULL if position data not needed
173 * Return TS_YES if lexeme is present in data, TS_MAYBE if it might be
174 * present, TS_NO if it definitely is not present. If data is not NULL,
175 * it must be filled with lexeme positions if available. If position data
176 * is not available, leave *data as zeroes and return TS_MAYBE, never TS_YES.
178 typedef TSTernaryValue (*TSExecuteCallback
) (void *arg
, QueryOperand
*val
,
179 ExecPhraseData
*data
);
182 * Flag bits for TS_execute
184 #define TS_EXEC_EMPTY (0x00)
186 * If TS_EXEC_SKIP_NOT is set, then NOT sub-expressions are automatically
187 * evaluated to be true. This was formerly the default behavior. It's now
188 * deprecated because it tends to give silly answers, but some applications
189 * might still have a use for it.
191 #define TS_EXEC_SKIP_NOT (0x01)
193 * If TS_EXEC_PHRASE_NO_POS is set, allow OP_PHRASE to be executed lossily
194 * in the absence of position information: a true result indicates that the
195 * phrase might be present. Without this flag, OP_PHRASE always returns
196 * false if lexeme position information is not available.
198 #define TS_EXEC_PHRASE_NO_POS (0x02)
200 extern bool TS_execute(QueryItem
*curitem
, void *arg
, uint32 flags
,
201 TSExecuteCallback chkcond
);
202 extern TSTernaryValue
TS_execute_ternary(QueryItem
*curitem
, void *arg
,
204 TSExecuteCallback chkcond
);
205 extern bool tsquery_requires_match(QueryItem
*curitem
);
208 * to_ts* - text transformation to tsvector, tsquery
210 extern TSVector
make_tsvector(ParsedText
*prs
);
211 extern int32
tsCompareString(char *a
, int lena
, char *b
, int lenb
, bool prefix
);
214 * Possible strategy numbers for indexes
215 * TSearchStrategyNumber - (tsvector|text) @@ tsquery
216 * TSearchWithClassStrategyNumber - tsvector @@@ tsquery
218 #define TSearchStrategyNumber 1
219 #define TSearchWithClassStrategyNumber 2
224 extern QueryItem
*clean_NOT(QueryItem
*ptr
, int32
*len
);
225 extern TSQuery
cleanup_tsquery_stopwords(TSQuery in
);
227 typedef struct QTNode
234 struct QTNode
**child
;
237 /* bits in QTNode.flags */
238 #define QTN_NEEDFREE 0x01
239 #define QTN_NOCHANGE 0x02
240 #define QTN_WORDFREE 0x04
242 typedef uint64 TSQuerySign
;
244 #define TSQS_SIGLEN (sizeof(TSQuerySign)*BITS_PER_BYTE)
246 #define TSQuerySignGetDatum(X) Int64GetDatum((int64) (X))
247 #define DatumGetTSQuerySign(X) ((TSQuerySign) DatumGetInt64(X))
248 #define PG_RETURN_TSQUERYSIGN(X) return TSQuerySignGetDatum(X)
249 #define PG_GETARG_TSQUERYSIGN(n) DatumGetTSQuerySign(PG_GETARG_DATUM(n))
252 extern QTNode
*QT2QTN(QueryItem
*in
, char *operand
);
253 extern TSQuery
QTN2QT(QTNode
*in
);
254 extern void QTNFree(QTNode
*in
);
255 extern void QTNSort(QTNode
*in
);
256 extern void QTNTernary(QTNode
*in
);
257 extern void QTNBinary(QTNode
*in
);
258 extern int QTNodeCompare(QTNode
*an
, QTNode
*bn
);
259 extern QTNode
*QTNCopy(QTNode
*in
);
260 extern void QTNClearFlags(QTNode
*in
, uint32 flags
);
261 extern bool QTNEq(QTNode
*a
, QTNode
*b
);
262 extern TSQuerySign
makeTSQuerySign(TSQuery a
);
263 extern QTNode
*findsubquery(QTNode
*root
, QTNode
*ex
, QTNode
*subs
,
266 #endif /* _PG_TS_UTILS_H_ */