sepgsql: update TAP test to use fat comma style
[pgsql.git] / src / backend / utils / adt / tsquery.c
blobb1bad7bd60cf5e4d5739364f5bfffba8211dbfa4
1 /*-------------------------------------------------------------------------
3 * tsquery.c
4 * I/O functions for tsquery
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
9 * IDENTIFICATION
10 * src/backend/utils/adt/tsquery.c
12 *-------------------------------------------------------------------------
15 #include "postgres.h"
17 #include "libpq/pqformat.h"
18 #include "miscadmin.h"
19 #include "nodes/miscnodes.h"
20 #include "tsearch/ts_locale.h"
21 #include "tsearch/ts_type.h"
22 #include "tsearch/ts_utils.h"
23 #include "utils/builtins.h"
24 #include "utils/memutils.h"
25 #include "utils/pg_crc.h"
26 #include "varatt.h"
28 /* FTS operator priorities, see ts_type.h */
29 const int tsearch_op_priority[OP_COUNT] =
31 4, /* OP_NOT */
32 2, /* OP_AND */
33 1, /* OP_OR */
34 3 /* OP_PHRASE */
38 * parser's states
40 typedef enum
42 WAITOPERAND = 1,
43 WAITOPERATOR = 2,
44 WAITFIRSTOPERAND = 3,
45 } ts_parserstate;
48 * token types for parsing
50 typedef enum
52 PT_END = 0,
53 PT_ERR = 1,
54 PT_VAL = 2,
55 PT_OPR = 3,
56 PT_OPEN = 4,
57 PT_CLOSE = 5,
58 } ts_tokentype;
61 * get token from query string
63 * All arguments except "state" are output arguments.
65 * If return value is PT_OPR, then *operator is filled with an OP_* code
66 * and *weight will contain a distance value in case of phrase operator.
68 * If return value is PT_VAL, then *lenval, *strval, *weight, and *prefix
69 * are filled.
71 * If PT_ERR is returned then a soft error has occurred. If state->escontext
72 * isn't already filled then this should be reported as a generic parse error.
74 typedef ts_tokentype (*ts_tokenizer) (TSQueryParserState state, int8 *operator,
75 int *lenval, char **strval,
76 int16 *weight, bool *prefix);
78 struct TSQueryParserStateData
80 /* Tokenizer used for parsing tsquery */
81 ts_tokenizer gettoken;
83 /* State of tokenizer function */
84 char *buffer; /* entire string we are scanning */
85 char *buf; /* current scan point */
86 int count; /* nesting count, incremented by (,
87 * decremented by ) */
88 ts_parserstate state;
90 /* polish (prefix) notation in list, filled in by push* functions */
91 List *polstr;
94 * Strings from operands are collected in op. curop is a pointer to the
95 * end of used space of op.
97 char *op;
98 char *curop;
99 int lenop; /* allocated size of op */
100 int sumlen; /* used size of op */
102 /* state for value's parser */
103 TSVectorParseState valstate;
105 /* context object for soft errors - must match valstate's escontext */
106 Node *escontext;
110 * subroutine to parse the modifiers (weight and prefix flag currently)
111 * part, like ':AB*' of a query.
113 static char *
114 get_modifiers(char *buf, int16 *weight, bool *prefix)
116 *weight = 0;
117 *prefix = false;
119 if (!t_iseq(buf, ':'))
120 return buf;
122 buf++;
123 while (*buf && pg_mblen(buf) == 1)
125 switch (*buf)
127 case 'a':
128 case 'A':
129 *weight |= 1 << 3;
130 break;
131 case 'b':
132 case 'B':
133 *weight |= 1 << 2;
134 break;
135 case 'c':
136 case 'C':
137 *weight |= 1 << 1;
138 break;
139 case 'd':
140 case 'D':
141 *weight |= 1;
142 break;
143 case '*':
144 *prefix = true;
145 break;
146 default:
147 return buf;
149 buf++;
152 return buf;
156 * Parse phrase operator. The operator
157 * may take the following forms:
159 * a <N> b (distance is exactly N lexemes)
160 * a <-> b (default distance = 1)
162 * The buffer should begin with '<' char
164 static bool
165 parse_phrase_operator(TSQueryParserState pstate, int16 *distance)
167 enum
169 PHRASE_OPEN = 0,
170 PHRASE_DIST,
171 PHRASE_CLOSE,
172 PHRASE_FINISH
173 } state = PHRASE_OPEN;
174 char *ptr = pstate->buf;
175 char *endptr;
176 long l = 1; /* default distance */
178 while (*ptr)
180 switch (state)
182 case PHRASE_OPEN:
183 if (t_iseq(ptr, '<'))
185 state = PHRASE_DIST;
186 ptr++;
188 else
189 return false;
190 break;
192 case PHRASE_DIST:
193 if (t_iseq(ptr, '-'))
195 state = PHRASE_CLOSE;
196 ptr++;
197 continue;
200 if (!isdigit((unsigned char) *ptr))
201 return false;
203 errno = 0;
204 l = strtol(ptr, &endptr, 10);
205 if (ptr == endptr)
206 return false;
207 else if (errno == ERANGE || l < 0 || l > MAXENTRYPOS)
208 ereturn(pstate->escontext, false,
209 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
210 errmsg("distance in phrase operator must be an integer value between zero and %d inclusive",
211 MAXENTRYPOS)));
212 else
214 state = PHRASE_CLOSE;
215 ptr = endptr;
217 break;
219 case PHRASE_CLOSE:
220 if (t_iseq(ptr, '>'))
222 state = PHRASE_FINISH;
223 ptr++;
225 else
226 return false;
227 break;
229 case PHRASE_FINISH:
230 *distance = (int16) l;
231 pstate->buf = ptr;
232 return true;
236 return false;
240 * Parse OR operator used in websearch_to_tsquery(), returns true if we
241 * believe that "OR" literal could be an operator OR
243 static bool
244 parse_or_operator(TSQueryParserState pstate)
246 char *ptr = pstate->buf;
248 /* it should begin with "OR" literal */
249 if (pg_strncasecmp(ptr, "or", 2) != 0)
250 return false;
252 ptr += 2;
255 * it shouldn't be a part of any word but somewhere later it should be
256 * some operand
258 if (*ptr == '\0') /* no operand */
259 return false;
261 /* it shouldn't be a part of any word */
262 if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalnum(ptr))
263 return false;
265 for (;;)
267 ptr += pg_mblen(ptr);
269 if (*ptr == '\0') /* got end of string without operand */
270 return false;
273 * Suppose, we found an operand, but could be a not correct operand.
274 * So we still treat OR literal as operation with possibly incorrect
275 * operand and will not search it as lexeme
277 if (!isspace((unsigned char) *ptr))
278 break;
281 pstate->buf += 2;
282 return true;
285 static ts_tokentype
286 gettoken_query_standard(TSQueryParserState state, int8 *operator,
287 int *lenval, char **strval,
288 int16 *weight, bool *prefix)
290 *weight = 0;
291 *prefix = false;
293 while (true)
295 switch (state->state)
297 case WAITFIRSTOPERAND:
298 case WAITOPERAND:
299 if (t_iseq(state->buf, '!'))
301 state->buf++;
302 state->state = WAITOPERAND;
303 *operator = OP_NOT;
304 return PT_OPR;
306 else if (t_iseq(state->buf, '('))
308 state->buf++;
309 state->state = WAITOPERAND;
310 state->count++;
311 return PT_OPEN;
313 else if (t_iseq(state->buf, ':'))
315 /* generic syntax error message is fine */
316 return PT_ERR;
318 else if (!isspace((unsigned char) *state->buf))
321 * We rely on the tsvector parser to parse the value for
322 * us
324 reset_tsvector_parser(state->valstate, state->buf);
325 if (gettoken_tsvector(state->valstate, strval, lenval,
326 NULL, NULL, &state->buf))
328 state->buf = get_modifiers(state->buf, weight, prefix);
329 state->state = WAITOPERATOR;
330 return PT_VAL;
332 else if (SOFT_ERROR_OCCURRED(state->escontext))
334 /* gettoken_tsvector reported a soft error */
335 return PT_ERR;
337 else if (state->state == WAITFIRSTOPERAND)
339 return PT_END;
341 else
342 ereturn(state->escontext, PT_ERR,
343 (errcode(ERRCODE_SYNTAX_ERROR),
344 errmsg("no operand in tsquery: \"%s\"",
345 state->buffer)));
347 break;
349 case WAITOPERATOR:
350 if (t_iseq(state->buf, '&'))
352 state->buf++;
353 state->state = WAITOPERAND;
354 *operator = OP_AND;
355 return PT_OPR;
357 else if (t_iseq(state->buf, '|'))
359 state->buf++;
360 state->state = WAITOPERAND;
361 *operator = OP_OR;
362 return PT_OPR;
364 else if (parse_phrase_operator(state, weight))
366 /* weight var is used as storage for distance */
367 state->state = WAITOPERAND;
368 *operator = OP_PHRASE;
369 return PT_OPR;
371 else if (SOFT_ERROR_OCCURRED(state->escontext))
373 /* parse_phrase_operator reported a soft error */
374 return PT_ERR;
376 else if (t_iseq(state->buf, ')'))
378 state->buf++;
379 state->count--;
380 return (state->count < 0) ? PT_ERR : PT_CLOSE;
382 else if (*state->buf == '\0')
384 return (state->count) ? PT_ERR : PT_END;
386 else if (!isspace((unsigned char) *state->buf))
388 return PT_ERR;
390 break;
393 state->buf += pg_mblen(state->buf);
397 static ts_tokentype
398 gettoken_query_websearch(TSQueryParserState state, int8 *operator,
399 int *lenval, char **strval,
400 int16 *weight, bool *prefix)
402 *weight = 0;
403 *prefix = false;
405 while (true)
407 switch (state->state)
409 case WAITFIRSTOPERAND:
410 case WAITOPERAND:
411 if (t_iseq(state->buf, '-'))
413 state->buf++;
414 state->state = WAITOPERAND;
416 *operator = OP_NOT;
417 return PT_OPR;
419 else if (t_iseq(state->buf, '"'))
421 /* Everything in quotes is processed as a single token */
423 /* skip opening quote */
424 state->buf++;
425 *strval = state->buf;
427 /* iterate to the closing quote or end of the string */
428 while (*state->buf != '\0' && !t_iseq(state->buf, '"'))
429 state->buf++;
430 *lenval = state->buf - *strval;
432 /* skip closing quote if not end of the string */
433 if (*state->buf != '\0')
434 state->buf++;
436 state->state = WAITOPERATOR;
437 state->count++;
438 return PT_VAL;
440 else if (ISOPERATOR(state->buf))
442 /* ignore, else gettoken_tsvector() will raise an error */
443 state->buf++;
444 state->state = WAITOPERAND;
445 continue;
447 else if (!isspace((unsigned char) *state->buf))
450 * We rely on the tsvector parser to parse the value for
451 * us
453 reset_tsvector_parser(state->valstate, state->buf);
454 if (gettoken_tsvector(state->valstate, strval, lenval,
455 NULL, NULL, &state->buf))
457 state->state = WAITOPERATOR;
458 return PT_VAL;
460 else if (SOFT_ERROR_OCCURRED(state->escontext))
462 /* gettoken_tsvector reported a soft error */
463 return PT_ERR;
465 else if (state->state == WAITFIRSTOPERAND)
467 return PT_END;
469 else
471 /* finally, we have to provide an operand */
472 pushStop(state);
473 return PT_END;
476 break;
478 case WAITOPERATOR:
479 if (*state->buf == '\0')
481 return PT_END;
483 else if (parse_or_operator(state))
485 state->state = WAITOPERAND;
486 *operator = OP_OR;
487 return PT_OPR;
489 else if (ISOPERATOR(state->buf))
491 /* ignore other operators in this state too */
492 state->buf++;
493 continue;
495 else if (!isspace((unsigned char) *state->buf))
497 /* insert implicit AND between operands */
498 state->state = WAITOPERAND;
499 *operator = OP_AND;
500 return PT_OPR;
502 break;
505 state->buf += pg_mblen(state->buf);
509 static ts_tokentype
510 gettoken_query_plain(TSQueryParserState state, int8 *operator,
511 int *lenval, char **strval,
512 int16 *weight, bool *prefix)
514 *weight = 0;
515 *prefix = false;
517 if (*state->buf == '\0')
518 return PT_END;
520 *strval = state->buf;
521 *lenval = strlen(state->buf);
522 state->buf += *lenval;
523 state->count++;
524 return PT_VAL;
528 * Push an operator to state->polstr
530 void
531 pushOperator(TSQueryParserState state, int8 oper, int16 distance)
533 QueryOperator *tmp;
535 Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR || oper == OP_PHRASE);
537 tmp = (QueryOperator *) palloc0(sizeof(QueryOperator));
538 tmp->type = QI_OPR;
539 tmp->oper = oper;
540 tmp->distance = (oper == OP_PHRASE) ? distance : 0;
541 /* left is filled in later with findoprnd */
543 state->polstr = lcons(tmp, state->polstr);
546 static void
547 pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight, bool prefix)
549 QueryOperand *tmp;
551 if (distance >= MAXSTRPOS)
552 ereturn(state->escontext,,
553 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
554 errmsg("value is too big in tsquery: \"%s\"",
555 state->buffer)));
556 if (lenval >= MAXSTRLEN)
557 ereturn(state->escontext,,
558 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
559 errmsg("operand is too long in tsquery: \"%s\"",
560 state->buffer)));
562 tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
563 tmp->type = QI_VAL;
564 tmp->weight = weight;
565 tmp->prefix = prefix;
566 tmp->valcrc = (int32) valcrc;
567 tmp->length = lenval;
568 tmp->distance = distance;
570 state->polstr = lcons(tmp, state->polstr);
574 * Push an operand to state->polstr.
576 * strval must point to a string equal to state->curop. lenval is the length
577 * of the string.
579 void
580 pushValue(TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix)
582 pg_crc32 valcrc;
584 if (lenval >= MAXSTRLEN)
585 ereturn(state->escontext,,
586 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
587 errmsg("word is too long in tsquery: \"%s\"",
588 state->buffer)));
590 INIT_LEGACY_CRC32(valcrc);
591 COMP_LEGACY_CRC32(valcrc, strval, lenval);
592 FIN_LEGACY_CRC32(valcrc);
593 pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight, prefix);
595 /* append the value string to state.op, enlarging buffer if needed first */
596 while (state->curop - state->op + lenval + 1 >= state->lenop)
598 int used = state->curop - state->op;
600 state->lenop *= 2;
601 state->op = (char *) repalloc(state->op, state->lenop);
602 state->curop = state->op + used;
604 memcpy(state->curop, strval, lenval);
605 state->curop += lenval;
606 *(state->curop) = '\0';
607 state->curop++;
608 state->sumlen += lenval + 1 /* \0 */ ;
613 * Push a stopword placeholder to state->polstr
615 void
616 pushStop(TSQueryParserState state)
618 QueryOperand *tmp;
620 tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
621 tmp->type = QI_VALSTOP;
623 state->polstr = lcons(tmp, state->polstr);
627 #define STACKDEPTH 32
629 typedef struct OperatorElement
631 int8 op;
632 int16 distance;
633 } OperatorElement;
635 static void
636 pushOpStack(OperatorElement *stack, int *lenstack, int8 op, int16 distance)
638 if (*lenstack == STACKDEPTH) /* internal error */
639 elog(ERROR, "tsquery stack too small");
641 stack[*lenstack].op = op;
642 stack[*lenstack].distance = distance;
644 (*lenstack)++;
647 static void
648 cleanOpStack(TSQueryParserState state,
649 OperatorElement *stack, int *lenstack, int8 op)
651 int opPriority = OP_PRIORITY(op);
653 while (*lenstack)
655 /* NOT is right associative unlike to others */
656 if ((op != OP_NOT && opPriority > OP_PRIORITY(stack[*lenstack - 1].op)) ||
657 (op == OP_NOT && opPriority >= OP_PRIORITY(stack[*lenstack - 1].op)))
658 break;
660 (*lenstack)--;
661 pushOperator(state, stack[*lenstack].op,
662 stack[*lenstack].distance);
667 * Make polish (prefix) notation of query.
669 * See parse_tsquery for explanation of pushval.
671 static void
672 makepol(TSQueryParserState state,
673 PushFunction pushval,
674 Datum opaque)
676 int8 operator = 0;
677 ts_tokentype type;
678 int lenval = 0;
679 char *strval = NULL;
680 OperatorElement opstack[STACKDEPTH];
681 int lenstack = 0;
682 int16 weight = 0;
683 bool prefix;
685 /* since this function recurses, it could be driven to stack overflow */
686 check_stack_depth();
688 while ((type = state->gettoken(state, &operator,
689 &lenval, &strval,
690 &weight, &prefix)) != PT_END)
692 switch (type)
694 case PT_VAL:
695 pushval(opaque, state, strval, lenval, weight, prefix);
696 break;
697 case PT_OPR:
698 cleanOpStack(state, opstack, &lenstack, operator);
699 pushOpStack(opstack, &lenstack, operator, weight);
700 break;
701 case PT_OPEN:
702 makepol(state, pushval, opaque);
703 break;
704 case PT_CLOSE:
705 cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
706 return;
707 case PT_ERR:
708 default:
709 /* don't overwrite a soft error saved by gettoken function */
710 if (!SOFT_ERROR_OCCURRED(state->escontext))
711 errsave(state->escontext,
712 (errcode(ERRCODE_SYNTAX_ERROR),
713 errmsg("syntax error in tsquery: \"%s\"",
714 state->buffer)));
715 return;
717 /* detect soft error in pushval or recursion */
718 if (SOFT_ERROR_OCCURRED(state->escontext))
719 return;
722 cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
725 static void
726 findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes, bool *needcleanup)
728 /* since this function recurses, it could be driven to stack overflow. */
729 check_stack_depth();
731 if (*pos >= nnodes)
732 elog(ERROR, "malformed tsquery: operand not found");
734 if (ptr[*pos].type == QI_VAL)
736 (*pos)++;
738 else if (ptr[*pos].type == QI_VALSTOP)
740 *needcleanup = true; /* we'll have to remove stop words */
741 (*pos)++;
743 else
745 Assert(ptr[*pos].type == QI_OPR);
747 if (ptr[*pos].qoperator.oper == OP_NOT)
749 ptr[*pos].qoperator.left = 1; /* fixed offset */
750 (*pos)++;
752 /* process the only argument */
753 findoprnd_recurse(ptr, pos, nnodes, needcleanup);
755 else
757 QueryOperator *curitem = &ptr[*pos].qoperator;
758 int tmp = *pos; /* save current position */
760 Assert(curitem->oper == OP_AND ||
761 curitem->oper == OP_OR ||
762 curitem->oper == OP_PHRASE);
764 (*pos)++;
766 /* process RIGHT argument */
767 findoprnd_recurse(ptr, pos, nnodes, needcleanup);
769 curitem->left = *pos - tmp; /* set LEFT arg's offset */
771 /* process LEFT argument */
772 findoprnd_recurse(ptr, pos, nnodes, needcleanup);
779 * Fill in the left-fields previously left unfilled.
780 * The input QueryItems must be in polish (prefix) notation.
781 * Also, set *needcleanup to true if there are any QI_VALSTOP nodes.
783 static void
784 findoprnd(QueryItem *ptr, int size, bool *needcleanup)
786 uint32 pos;
788 *needcleanup = false;
789 pos = 0;
790 findoprnd_recurse(ptr, &pos, size, needcleanup);
792 if (pos != size)
793 elog(ERROR, "malformed tsquery: extra nodes");
798 * Parse the tsquery stored in "buf".
800 * Each value (operand) in the query is passed to pushval. pushval can
801 * transform the simple value to an arbitrarily complex expression using
802 * pushValue and pushOperator. It must push a single value with pushValue,
803 * a complete expression with all operands, or a stopword placeholder
804 * with pushStop, otherwise the prefix notation representation will be broken,
805 * having an operator with no operand.
807 * opaque is passed on to pushval as is, pushval can use it to store its
808 * private state.
810 * The pushval function can record soft errors via escontext.
811 * Callers must check SOFT_ERROR_OCCURRED to detect that.
813 * A bitmask of flags (see ts_utils.h) and an error context object
814 * can be provided as well. If a soft error occurs, NULL is returned.
816 TSQuery
817 parse_tsquery(char *buf,
818 PushFunction pushval,
819 Datum opaque,
820 int flags,
821 Node *escontext)
823 struct TSQueryParserStateData state;
824 int i;
825 TSQuery query;
826 int commonlen;
827 QueryItem *ptr;
828 ListCell *cell;
829 bool noisy;
830 bool needcleanup;
831 int tsv_flags = P_TSV_OPR_IS_DELIM | P_TSV_IS_TSQUERY;
833 /* plain should not be used with web */
834 Assert((flags & (P_TSQ_PLAIN | P_TSQ_WEB)) != (P_TSQ_PLAIN | P_TSQ_WEB));
836 /* select suitable tokenizer */
837 if (flags & P_TSQ_PLAIN)
838 state.gettoken = gettoken_query_plain;
839 else if (flags & P_TSQ_WEB)
841 state.gettoken = gettoken_query_websearch;
842 tsv_flags |= P_TSV_IS_WEB;
844 else
845 state.gettoken = gettoken_query_standard;
847 /* emit nuisance NOTICEs only if not doing soft errors */
848 noisy = !(escontext && IsA(escontext, ErrorSaveContext));
850 /* init state */
851 state.buffer = buf;
852 state.buf = buf;
853 state.count = 0;
854 state.state = WAITFIRSTOPERAND;
855 state.polstr = NIL;
856 state.escontext = escontext;
858 /* init value parser's state */
859 state.valstate = init_tsvector_parser(state.buffer, tsv_flags, escontext);
861 /* init list of operand */
862 state.sumlen = 0;
863 state.lenop = 64;
864 state.curop = state.op = (char *) palloc(state.lenop);
865 *(state.curop) = '\0';
867 /* parse query & make polish notation (postfix, but in reverse order) */
868 makepol(&state, pushval, opaque);
870 close_tsvector_parser(state.valstate);
872 if (SOFT_ERROR_OCCURRED(escontext))
873 return NULL;
875 if (state.polstr == NIL)
877 if (noisy)
878 ereport(NOTICE,
879 (errmsg("text-search query doesn't contain lexemes: \"%s\"",
880 state.buffer)));
881 query = (TSQuery) palloc(HDRSIZETQ);
882 SET_VARSIZE(query, HDRSIZETQ);
883 query->size = 0;
884 return query;
887 if (TSQUERY_TOO_BIG(list_length(state.polstr), state.sumlen))
888 ereturn(escontext, NULL,
889 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
890 errmsg("tsquery is too large")));
891 commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
893 /* Pack the QueryItems in the final TSQuery struct to return to caller */
894 query = (TSQuery) palloc0(commonlen);
895 SET_VARSIZE(query, commonlen);
896 query->size = list_length(state.polstr);
897 ptr = GETQUERY(query);
899 /* Copy QueryItems to TSQuery */
900 i = 0;
901 foreach(cell, state.polstr)
903 QueryItem *item = (QueryItem *) lfirst(cell);
905 switch (item->type)
907 case QI_VAL:
908 memcpy(&ptr[i], item, sizeof(QueryOperand));
909 break;
910 case QI_VALSTOP:
911 ptr[i].type = QI_VALSTOP;
912 break;
913 case QI_OPR:
914 memcpy(&ptr[i], item, sizeof(QueryOperator));
915 break;
916 default:
917 elog(ERROR, "unrecognized QueryItem type: %d", item->type);
919 i++;
922 /* Copy all the operand strings to TSQuery */
923 memcpy(GETOPERAND(query), state.op, state.sumlen);
924 pfree(state.op);
927 * Set left operand pointers for every operator. While we're at it,
928 * detect whether there are any QI_VALSTOP nodes.
930 findoprnd(ptr, query->size, &needcleanup);
933 * If there are QI_VALSTOP nodes, delete them and simplify the tree.
935 if (needcleanup)
936 query = cleanup_tsquery_stopwords(query, noisy);
938 return query;
941 static void
942 pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval,
943 int16 weight, bool prefix)
945 pushValue(state, strval, lenval, weight, prefix);
949 * in without morphology
951 Datum
952 tsqueryin(PG_FUNCTION_ARGS)
954 char *in = PG_GETARG_CSTRING(0);
955 Node *escontext = fcinfo->context;
957 PG_RETURN_TSQUERY(parse_tsquery(in,
958 pushval_asis,
959 PointerGetDatum(NULL),
961 escontext));
965 * out function
967 typedef struct
969 QueryItem *curpol;
970 char *buf;
971 char *cur;
972 char *op;
973 int buflen;
974 } INFIX;
976 /* Makes sure inf->buf is large enough for adding 'addsize' bytes */
977 #define RESIZEBUF(inf, addsize) \
978 while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
980 int len = (inf)->cur - (inf)->buf; \
981 (inf)->buflen *= 2; \
982 (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
983 (inf)->cur = (inf)->buf + len; \
987 * recursively traverse the tree and
988 * print it in infix (human-readable) form
990 static void
991 infix(INFIX *in, int parentPriority, bool rightPhraseOp)
993 /* since this function recurses, it could be driven to stack overflow. */
994 check_stack_depth();
996 if (in->curpol->type == QI_VAL)
998 QueryOperand *curpol = &in->curpol->qoperand;
999 char *op = in->op + curpol->distance;
1000 int clen;
1002 RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 6);
1003 *(in->cur) = '\'';
1004 in->cur++;
1005 while (*op)
1007 if (t_iseq(op, '\''))
1009 *(in->cur) = '\'';
1010 in->cur++;
1012 else if (t_iseq(op, '\\'))
1014 *(in->cur) = '\\';
1015 in->cur++;
1017 COPYCHAR(in->cur, op);
1019 clen = pg_mblen(op);
1020 op += clen;
1021 in->cur += clen;
1023 *(in->cur) = '\'';
1024 in->cur++;
1025 if (curpol->weight || curpol->prefix)
1027 *(in->cur) = ':';
1028 in->cur++;
1029 if (curpol->prefix)
1031 *(in->cur) = '*';
1032 in->cur++;
1034 if (curpol->weight & (1 << 3))
1036 *(in->cur) = 'A';
1037 in->cur++;
1039 if (curpol->weight & (1 << 2))
1041 *(in->cur) = 'B';
1042 in->cur++;
1044 if (curpol->weight & (1 << 1))
1046 *(in->cur) = 'C';
1047 in->cur++;
1049 if (curpol->weight & 1)
1051 *(in->cur) = 'D';
1052 in->cur++;
1055 *(in->cur) = '\0';
1056 in->curpol++;
1058 else if (in->curpol->qoperator.oper == OP_NOT)
1060 int priority = QO_PRIORITY(in->curpol);
1062 if (priority < parentPriority)
1064 RESIZEBUF(in, 2);
1065 sprintf(in->cur, "( ");
1066 in->cur = strchr(in->cur, '\0');
1068 RESIZEBUF(in, 1);
1069 *(in->cur) = '!';
1070 in->cur++;
1071 *(in->cur) = '\0';
1072 in->curpol++;
1074 infix(in, priority, false);
1075 if (priority < parentPriority)
1077 RESIZEBUF(in, 2);
1078 sprintf(in->cur, " )");
1079 in->cur = strchr(in->cur, '\0');
1082 else
1084 int8 op = in->curpol->qoperator.oper;
1085 int priority = QO_PRIORITY(in->curpol);
1086 int16 distance = in->curpol->qoperator.distance;
1087 INFIX nrm;
1088 bool needParenthesis = false;
1090 in->curpol++;
1091 if (priority < parentPriority ||
1092 /* phrase operator depends on order */
1093 (op == OP_PHRASE && rightPhraseOp))
1095 needParenthesis = true;
1096 RESIZEBUF(in, 2);
1097 sprintf(in->cur, "( ");
1098 in->cur = strchr(in->cur, '\0');
1101 nrm.curpol = in->curpol;
1102 nrm.op = in->op;
1103 nrm.buflen = 16;
1104 nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
1106 /* get right operand */
1107 infix(&nrm, priority, (op == OP_PHRASE));
1109 /* get & print left operand */
1110 in->curpol = nrm.curpol;
1111 infix(in, priority, false);
1113 /* print operator & right operand */
1114 RESIZEBUF(in, 3 + (2 + 10 /* distance */ ) + (nrm.cur - nrm.buf));
1115 switch (op)
1117 case OP_OR:
1118 sprintf(in->cur, " | %s", nrm.buf);
1119 break;
1120 case OP_AND:
1121 sprintf(in->cur, " & %s", nrm.buf);
1122 break;
1123 case OP_PHRASE:
1124 if (distance != 1)
1125 sprintf(in->cur, " <%d> %s", distance, nrm.buf);
1126 else
1127 sprintf(in->cur, " <-> %s", nrm.buf);
1128 break;
1129 default:
1130 /* OP_NOT is handled in above if-branch */
1131 elog(ERROR, "unrecognized operator type: %d", op);
1133 in->cur = strchr(in->cur, '\0');
1134 pfree(nrm.buf);
1136 if (needParenthesis)
1138 RESIZEBUF(in, 2);
1139 sprintf(in->cur, " )");
1140 in->cur = strchr(in->cur, '\0');
1145 Datum
1146 tsqueryout(PG_FUNCTION_ARGS)
1148 TSQuery query = PG_GETARG_TSQUERY(0);
1149 INFIX nrm;
1151 if (query->size == 0)
1153 char *b = palloc(1);
1155 *b = '\0';
1156 PG_RETURN_POINTER(b);
1158 nrm.curpol = GETQUERY(query);
1159 nrm.buflen = 32;
1160 nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
1161 *(nrm.cur) = '\0';
1162 nrm.op = GETOPERAND(query);
1163 infix(&nrm, -1 /* lowest priority */ , false);
1165 PG_FREE_IF_COPY(query, 0);
1166 PG_RETURN_CSTRING(nrm.buf);
1170 * Binary Input / Output functions. The binary format is as follows:
1172 * uint32 number of operators/operands in the query
1174 * Followed by the operators and operands, in prefix notation. For each
1175 * operand:
1177 * uint8 type, QI_VAL
1178 * uint8 weight
1179 * operand text in client encoding, null-terminated
1180 * uint8 prefix
1182 * For each operator:
1183 * uint8 type, QI_OPR
1184 * uint8 operator, one of OP_AND, OP_PHRASE OP_OR, OP_NOT.
1185 * uint16 distance (only for OP_PHRASE)
1187 Datum
1188 tsquerysend(PG_FUNCTION_ARGS)
1190 TSQuery query = PG_GETARG_TSQUERY(0);
1191 StringInfoData buf;
1192 int i;
1193 QueryItem *item = GETQUERY(query);
1195 pq_begintypsend(&buf);
1197 pq_sendint32(&buf, query->size);
1198 for (i = 0; i < query->size; i++)
1200 pq_sendint8(&buf, item->type);
1202 switch (item->type)
1204 case QI_VAL:
1205 pq_sendint8(&buf, item->qoperand.weight);
1206 pq_sendint8(&buf, item->qoperand.prefix);
1207 pq_sendstring(&buf, GETOPERAND(query) + item->qoperand.distance);
1208 break;
1209 case QI_OPR:
1210 pq_sendint8(&buf, item->qoperator.oper);
1211 if (item->qoperator.oper == OP_PHRASE)
1212 pq_sendint16(&buf, item->qoperator.distance);
1213 break;
1214 default:
1215 elog(ERROR, "unrecognized tsquery node type: %d", item->type);
1217 item++;
1220 PG_FREE_IF_COPY(query, 0);
1222 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
1225 Datum
1226 tsqueryrecv(PG_FUNCTION_ARGS)
1228 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
1229 TSQuery query;
1230 int i,
1231 len;
1232 QueryItem *item;
1233 int datalen;
1234 char *ptr;
1235 uint32 size;
1236 const char **operands;
1237 bool needcleanup;
1239 size = pq_getmsgint(buf, sizeof(uint32));
1240 if (size > (MaxAllocSize / sizeof(QueryItem)))
1241 elog(ERROR, "invalid size of tsquery");
1243 /* Allocate space to temporarily hold operand strings */
1244 operands = palloc(size * sizeof(char *));
1246 /* Allocate space for all the QueryItems. */
1247 len = HDRSIZETQ + sizeof(QueryItem) * size;
1248 query = (TSQuery) palloc0(len);
1249 query->size = size;
1250 item = GETQUERY(query);
1252 datalen = 0;
1253 for (i = 0; i < size; i++)
1255 item->type = (int8) pq_getmsgint(buf, sizeof(int8));
1257 if (item->type == QI_VAL)
1259 size_t val_len; /* length after recoding to server
1260 * encoding */
1261 uint8 weight;
1262 uint8 prefix;
1263 const char *val;
1264 pg_crc32 valcrc;
1266 weight = (uint8) pq_getmsgint(buf, sizeof(uint8));
1267 prefix = (uint8) pq_getmsgint(buf, sizeof(uint8));
1268 val = pq_getmsgstring(buf);
1269 val_len = strlen(val);
1271 /* Sanity checks */
1273 if (weight > 0xF)
1274 elog(ERROR, "invalid tsquery: invalid weight bitmap");
1276 if (val_len > MAXSTRLEN)
1277 elog(ERROR, "invalid tsquery: operand too long");
1279 if (datalen > MAXSTRPOS)
1280 elog(ERROR, "invalid tsquery: total operand length exceeded");
1282 /* Looks valid. */
1284 INIT_LEGACY_CRC32(valcrc);
1285 COMP_LEGACY_CRC32(valcrc, val, val_len);
1286 FIN_LEGACY_CRC32(valcrc);
1288 item->qoperand.weight = weight;
1289 item->qoperand.prefix = (prefix) ? true : false;
1290 item->qoperand.valcrc = (int32) valcrc;
1291 item->qoperand.length = val_len;
1292 item->qoperand.distance = datalen;
1295 * Operand strings are copied to the final struct after this loop;
1296 * here we just collect them to an array
1298 operands[i] = val;
1300 datalen += val_len + 1; /* + 1 for the '\0' terminator */
1302 else if (item->type == QI_OPR)
1304 int8 oper;
1306 oper = (int8) pq_getmsgint(buf, sizeof(int8));
1307 if (oper != OP_NOT && oper != OP_OR && oper != OP_AND && oper != OP_PHRASE)
1308 elog(ERROR, "invalid tsquery: unrecognized operator type %d",
1309 (int) oper);
1310 if (i == size - 1)
1311 elog(ERROR, "invalid pointer to right operand");
1313 item->qoperator.oper = oper;
1314 if (oper == OP_PHRASE)
1315 item->qoperator.distance = (int16) pq_getmsgint(buf, sizeof(int16));
1317 else
1318 elog(ERROR, "unrecognized tsquery node type: %d", item->type);
1320 item++;
1323 /* Enlarge buffer to make room for the operand values. */
1324 query = (TSQuery) repalloc(query, len + datalen);
1325 item = GETQUERY(query);
1326 ptr = GETOPERAND(query);
1329 * Fill in the left-pointers. Checks that the tree is well-formed as a
1330 * side-effect.
1332 findoprnd(item, size, &needcleanup);
1334 /* Can't have found any QI_VALSTOP nodes */
1335 Assert(!needcleanup);
1337 /* Copy operands to output struct */
1338 for (i = 0; i < size; i++)
1340 if (item->type == QI_VAL)
1342 memcpy(ptr, operands[i], item->qoperand.length + 1);
1343 ptr += item->qoperand.length + 1;
1345 item++;
1348 pfree(operands);
1350 Assert(ptr - GETOPERAND(query) == datalen);
1352 SET_VARSIZE(query, len + datalen);
1354 PG_RETURN_TSQUERY(query);
1358 * debug function, used only for view query
1359 * which will be executed in non-leaf pages in index
1361 Datum
1362 tsquerytree(PG_FUNCTION_ARGS)
1364 TSQuery query = PG_GETARG_TSQUERY(0);
1365 INFIX nrm;
1366 text *res;
1367 QueryItem *q;
1368 int len;
1370 if (query->size == 0)
1372 res = (text *) palloc(VARHDRSZ);
1373 SET_VARSIZE(res, VARHDRSZ);
1374 PG_RETURN_POINTER(res);
1377 q = clean_NOT(GETQUERY(query), &len);
1379 if (!q)
1381 res = cstring_to_text("T");
1383 else
1385 nrm.curpol = q;
1386 nrm.buflen = 32;
1387 nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
1388 *(nrm.cur) = '\0';
1389 nrm.op = GETOPERAND(query);
1390 infix(&nrm, -1, false);
1391 res = cstring_to_text_with_len(nrm.buf, nrm.cur - nrm.buf);
1392 pfree(q);
1395 PG_FREE_IF_COPY(query, 0);
1397 PG_RETURN_TEXT_P(res);