Fix obsolete comment regarding FSM truncation.
[PostgreSQL.git] / src / include / tsearch / ts_type.h
blob0fef4072ac8e99459ca63cb5bfb00202e33ff667
1 /*-------------------------------------------------------------------------
3 * ts_type.h
4 * Definitions for the tsvector and tsquery types
6 * Copyright (c) 1998-2008, PostgreSQL Global Development Group
8 * $PostgreSQL$
10 *-------------------------------------------------------------------------
12 #ifndef _PG_TSTYPE_H_
13 #define _PG_TSTYPE_H_
15 #include "fmgr.h"
16 #include "utils/pg_crc.h"
20 * TSVector type.
21 * Note, tsvectorsend/recv believe that sizeof(WordEntry) == 4
24 typedef struct
26 uint32
27 haspos:1,
28 len:11, /* MAX 2Kb */
29 pos:20; /* MAX 1Mb */
30 } WordEntry;
32 #define MAXSTRLEN ( (1<<11) - 1)
33 #define MAXSTRPOS ( (1<<20) - 1)
36 * Equivalent to
37 * typedef struct {
38 * uint16
39 * weight:2,
40 * pos:14;
41 * }
44 typedef uint16 WordEntryPos;
46 typedef struct
48 uint16 npos;
49 WordEntryPos pos[1]; /* var length */
50 } WordEntryPosVector;
53 #define WEP_GETWEIGHT(x) ( (x) >> 14 )
54 #define WEP_GETPOS(x) ( (x) & 0x3fff )
56 #define WEP_SETWEIGHT(x,v) ( (x) = ( (v) << 14 ) | ( (x) & 0x3fff ) )
57 #define WEP_SETPOS(x,v) ( (x) = ( (x) & 0xc000 ) | ( (v) & 0x3fff ) )
59 #define MAXENTRYPOS (1<<14)
60 #define MAXNUMPOS (256)
61 #define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
64 * Structure of tsvector datatype:
65 * 1) standard varlena header
66 * 2) int4 size - number of lexemes or WordEntry array, which is the same
67 * 3) Array of WordEntry - sorted array, comparison based on word's length
68 * and strncmp(). WordEntry->pos points number of
69 * bytes from end of WordEntry array to start of
70 * corresponding lexeme.
71 * 4) Lexeme's storage:
72 * lexeme (without null-terminator)
73 * if haspos is true:
74 * padding byte if necessary to make the number of positions 2-byte aligned
75 * uint16 number of positions that follow.
76 * uint16[] positions
78 * The positions must be sorted.
81 typedef struct
83 int32 vl_len_; /* varlena header (do not touch directly!) */
84 int32 size;
85 WordEntry entries[1]; /* var size */
86 /* lexemes follow */
87 } TSVectorData;
89 typedef TSVectorData *TSVector;
91 #define DATAHDRSIZE (offsetof(TSVectorData, entries))
92 #define CALCDATASIZE(x, lenstr) (DATAHDRSIZE + (x) * sizeof(WordEntry) + (lenstr) )
93 #define ARRPTR(x) ( (x)->entries )
95 /* returns a pointer to the beginning of lexemes */
96 #define STRPTR(x) ( (char *) &(x)->entries[x->size] )
98 #define _POSVECPTR(x, e) ((WordEntryPosVector *)(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len)))
99 #define POSDATALEN(x,e) ( ( (e)->haspos ) ? (_POSVECPTR(x,e)->npos) : 0 )
100 #define POSDATAPTR(x,e) (_POSVECPTR(x,e)->pos)
103 * fmgr interface macros
106 #define DatumGetTSVector(X) ((TSVector) PG_DETOAST_DATUM(X))
107 #define DatumGetTSVectorCopy(X) ((TSVector) PG_DETOAST_DATUM_COPY(X))
108 #define TSVectorGetDatum(X) PointerGetDatum(X)
109 #define PG_GETARG_TSVECTOR(n) DatumGetTSVector(PG_GETARG_DATUM(n))
110 #define PG_GETARG_TSVECTOR_COPY(n) DatumGetTSVectorCopy(PG_GETARG_DATUM(n))
111 #define PG_RETURN_TSVECTOR(x) return TSVectorGetDatum(x)
114 * I/O
116 extern Datum tsvectorin(PG_FUNCTION_ARGS);
117 extern Datum tsvectorout(PG_FUNCTION_ARGS);
118 extern Datum tsvectorsend(PG_FUNCTION_ARGS);
119 extern Datum tsvectorrecv(PG_FUNCTION_ARGS);
122 * operations with tsvector
124 extern Datum tsvector_lt(PG_FUNCTION_ARGS);
125 extern Datum tsvector_le(PG_FUNCTION_ARGS);
126 extern Datum tsvector_eq(PG_FUNCTION_ARGS);
127 extern Datum tsvector_ne(PG_FUNCTION_ARGS);
128 extern Datum tsvector_ge(PG_FUNCTION_ARGS);
129 extern Datum tsvector_gt(PG_FUNCTION_ARGS);
130 extern Datum tsvector_cmp(PG_FUNCTION_ARGS);
132 extern Datum tsvector_length(PG_FUNCTION_ARGS);
133 extern Datum tsvector_strip(PG_FUNCTION_ARGS);
134 extern Datum tsvector_setweight(PG_FUNCTION_ARGS);
135 extern Datum tsvector_concat(PG_FUNCTION_ARGS);
136 extern Datum tsvector_update_trigger_byid(PG_FUNCTION_ARGS);
137 extern Datum tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS);
139 extern Datum ts_match_vq(PG_FUNCTION_ARGS);
140 extern Datum ts_match_qv(PG_FUNCTION_ARGS);
141 extern Datum ts_match_tt(PG_FUNCTION_ARGS);
142 extern Datum ts_match_tq(PG_FUNCTION_ARGS);
144 extern Datum ts_stat1(PG_FUNCTION_ARGS);
145 extern Datum ts_stat2(PG_FUNCTION_ARGS);
147 extern Datum ts_rank_tt(PG_FUNCTION_ARGS);
148 extern Datum ts_rank_wtt(PG_FUNCTION_ARGS);
149 extern Datum ts_rank_ttf(PG_FUNCTION_ARGS);
150 extern Datum ts_rank_wttf(PG_FUNCTION_ARGS);
151 extern Datum ts_rankcd_tt(PG_FUNCTION_ARGS);
152 extern Datum ts_rankcd_wtt(PG_FUNCTION_ARGS);
153 extern Datum ts_rankcd_ttf(PG_FUNCTION_ARGS);
154 extern Datum ts_rankcd_wttf(PG_FUNCTION_ARGS);
156 extern Datum tsmatchsel(PG_FUNCTION_ARGS);
157 extern Datum tsmatchjoinsel(PG_FUNCTION_ARGS);
159 extern Datum ts_typanalyze(PG_FUNCTION_ARGS);
163 * TSQuery
168 typedef int8 QueryItemType;
170 /* Valid values for QueryItemType: */
171 #define QI_VAL 1
172 #define QI_OPR 2
173 #define QI_VALSTOP 3 /* This is only used in an intermediate stack
174 * representation in parse_tsquery. It's not a
175 * legal type elsewhere. */
178 * QueryItem is one node in tsquery - operator or operand.
180 typedef struct
182 QueryItemType type; /* operand or kind of operator (ts_tokentype) */
183 uint8 weight; /* weights of operand to search. It's a
184 * bitmask of allowed weights. if it =0 then
185 * any weight are allowed. Weights and bit
186 * map: A: 1<<3 B: 1<<2 C: 1<<1 D: 1<<0 */
187 bool prefix; /* true if it's a prefix search */
188 int32 valcrc; /* XXX: pg_crc32 would be a more appropriate
189 * data type, but we use comparisons to signed
190 * integers in the code. They would need to be
191 * changed as well. */
193 /* pointer to text value of operand, must correlate with WordEntry */
194 uint32
195 length:12,
196 distance:20;
197 } QueryOperand;
200 /* Legal values for QueryOperator.operator */
201 #define OP_NOT 1
202 #define OP_AND 2
203 #define OP_OR 3
205 typedef struct
207 QueryItemType type;
208 int8 oper; /* see above */
209 uint32 left; /* pointer to left operand. Right operand is
210 * item + 1, left operand is placed
211 * item+item->left */
212 } QueryOperator;
215 * Note: TSQuery is 4-bytes aligned, so make sure there's no fields
216 * inside QueryItem requiring 8-byte alignment, like int64.
218 typedef union
220 QueryItemType type;
221 QueryOperator operator;
222 QueryOperand operand;
223 } QueryItem;
226 * Storage:
227 * (len)(size)(array of QueryItem)(operands as '\0'-terminated c-strings)
230 typedef struct
232 int32 vl_len_; /* varlena header (do not touch directly!) */
233 int4 size; /* number of QueryItems */
234 char data[1];
235 } TSQueryData;
237 typedef TSQueryData *TSQuery;
239 #define HDRSIZETQ ( VARHDRSZ + sizeof(int4) )
241 /* Computes the size of header and all QueryItems. size is the number of
242 * QueryItems, and lenofoperand is the total length of all operands
244 #define COMPUTESIZE(size, lenofoperand) ( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) )
246 /* Returns a pointer to the first QueryItem in a TSQuery */
247 #define GETQUERY(x) ((QueryItem*)( (char*)(x)+HDRSIZETQ ))
249 /* Returns a pointer to the beginning of operands in a TSQuery */
250 #define GETOPERAND(x) ( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) )
253 * fmgr interface macros
254 * Note, TSQuery type marked as plain storage, so it can't be toasted
255 * but PG_DETOAST_DATUM_COPY is used for simplicity
258 #define DatumGetTSQuery(X) ((TSQuery) DatumGetPointer(X))
259 #define DatumGetTSQueryCopy(X) ((TSQuery) PG_DETOAST_DATUM_COPY(X))
260 #define TSQueryGetDatum(X) PointerGetDatum(X)
261 #define PG_GETARG_TSQUERY(n) DatumGetTSQuery(PG_GETARG_DATUM(n))
262 #define PG_GETARG_TSQUERY_COPY(n) DatumGetTSQueryCopy(PG_GETARG_DATUM(n))
263 #define PG_RETURN_TSQUERY(x) return TSQueryGetDatum(x)
266 * I/O
268 extern Datum tsqueryin(PG_FUNCTION_ARGS);
269 extern Datum tsqueryout(PG_FUNCTION_ARGS);
270 extern Datum tsquerysend(PG_FUNCTION_ARGS);
271 extern Datum tsqueryrecv(PG_FUNCTION_ARGS);
274 * operations with tsquery
276 extern Datum tsquery_lt(PG_FUNCTION_ARGS);
277 extern Datum tsquery_le(PG_FUNCTION_ARGS);
278 extern Datum tsquery_eq(PG_FUNCTION_ARGS);
279 extern Datum tsquery_ne(PG_FUNCTION_ARGS);
280 extern Datum tsquery_ge(PG_FUNCTION_ARGS);
281 extern Datum tsquery_gt(PG_FUNCTION_ARGS);
282 extern Datum tsquery_cmp(PG_FUNCTION_ARGS);
284 extern Datum tsquerytree(PG_FUNCTION_ARGS);
285 extern Datum tsquery_numnode(PG_FUNCTION_ARGS);
287 extern Datum tsquery_and(PG_FUNCTION_ARGS);
288 extern Datum tsquery_or(PG_FUNCTION_ARGS);
289 extern Datum tsquery_not(PG_FUNCTION_ARGS);
291 extern Datum tsquery_rewrite(PG_FUNCTION_ARGS);
292 extern Datum tsquery_rewrite_query(PG_FUNCTION_ARGS);
294 extern Datum tsq_mcontains(PG_FUNCTION_ARGS);
295 extern Datum tsq_mcontained(PG_FUNCTION_ARGS);
297 #endif /* _PG_TSTYPE_H_ */