4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
13 ** This module contains code that implements a parser for fts3 query strings
14 ** (the right-hand argument to the MATCH operator). Because the supported
15 ** syntax is relatively simple, the whole tokenizer/parser system is
19 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
22 ** By default, this module parses the legacy syntax that has been
23 ** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS
24 ** is defined, then it uses the new syntax. The differences between
25 ** the new and the old syntaxes are:
27 ** a) The new syntax supports parenthesis. The old does not.
29 ** b) The new syntax supports the AND and NOT operators. The old does not.
31 ** c) The old syntax supports the "-" token qualifier. This is not
32 ** supported by the new syntax (it is replaced by the NOT operator).
34 ** d) When using the old syntax, the OR operator has a greater precedence
35 ** than an implicit AND. When using the new, both implicity and explicit
36 ** AND operators have a higher precedence than OR.
38 ** If compiled with SQLITE_TEST defined, then this module exports the
39 ** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable
40 ** to zero causes the module to use the old syntax. If it is set to
41 ** non-zero the new syntax is activated. This is so both syntaxes can
42 ** be tested using a single build of testfixture.
44 ** The following describes the syntax supported by the fts3 MATCH
45 ** operator in a similar format to that used by the lemon parser
46 ** generator. This module does not use actually lemon, it uses a
49 ** query ::= andexpr (OR andexpr)*.
51 ** andexpr ::= notexpr (AND? notexpr)*.
53 ** notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*.
54 ** notexpr ::= LP query RP.
56 ** nearexpr ::= phrase (NEAR distance_opt nearexpr)*.
59 ** distance_opt ::= / INTEGER.
62 ** phrase ::= COLUMN:TOKEN.
63 ** phrase ::= "TOKEN TOKEN TOKEN...".
67 int sqlite3_fts3_enable_parentheses
= 0;
69 # ifdef SQLITE_ENABLE_FTS3_PARENTHESIS
70 # define sqlite3_fts3_enable_parentheses 1
72 # define sqlite3_fts3_enable_parentheses 0
77 ** Default span for NEAR operators.
79 #define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10
86 ** This variable is used by function getNextNode(). When getNextNode() is
87 ** called, it sets ParseContext.isNot to true if the 'next node' is a
88 ** FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the
89 ** FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to
92 typedef struct ParseContext ParseContext
;
94 sqlite3_tokenizer
*pTokenizer
; /* Tokenizer module */
95 int iLangid
; /* Language id used with tokenizer */
96 const char **azCol
; /* Array of column names for fts3 table */
97 int bFts4
; /* True to allow FTS4-only syntax */
98 int nCol
; /* Number of entries in azCol[] */
99 int iDefaultCol
; /* Default column to query */
100 int isNot
; /* True if getNextNode() sees a unary - */
101 sqlite3_context
*pCtx
; /* Write error message here */
102 int nNest
; /* Number of nested brackets */
106 ** This function is equivalent to the standard isspace() function.
108 ** The standard isspace() can be awkward to use safely, because although it
109 ** is defined to accept an argument of type int, its behavior when passed
110 ** an integer that falls outside of the range of the unsigned char type
111 ** is undefined (and sometimes, "undefined" means segfault). This wrapper
112 ** is defined to accept an argument of type char, and always returns 0 for
113 ** any values that fall outside of the range of the unsigned char type (i.e.
116 static int fts3isspace(char c
){
117 return c
==' ' || c
=='\t' || c
=='\n' || c
=='\r' || c
=='\v' || c
=='\f';
121 ** Allocate nByte bytes of memory using sqlite3_malloc(). If successful,
122 ** zero the memory before returning a pointer to it. If unsuccessful,
125 static void *fts3MallocZero(int nByte
){
126 void *pRet
= sqlite3_malloc(nByte
);
127 if( pRet
) memset(pRet
, 0, nByte
);
131 int sqlite3Fts3OpenTokenizer(
132 sqlite3_tokenizer
*pTokenizer
,
136 sqlite3_tokenizer_cursor
**ppCsr
138 sqlite3_tokenizer_module
const *pModule
= pTokenizer
->pModule
;
139 sqlite3_tokenizer_cursor
*pCsr
= 0;
142 rc
= pModule
->xOpen(pTokenizer
, z
, n
, &pCsr
);
143 assert( rc
==SQLITE_OK
|| pCsr
==0 );
145 pCsr
->pTokenizer
= pTokenizer
;
146 if( pModule
->iVersion
>=1 ){
147 rc
= pModule
->xLanguageid(pCsr
, iLangid
);
149 pModule
->xClose(pCsr
);
159 ** Function getNextNode(), which is called by fts3ExprParse(), may itself
160 ** call fts3ExprParse(). So this forward declaration is required.
162 static int fts3ExprParse(ParseContext
*, const char *, int, Fts3Expr
**, int *);
165 ** Extract the next token from buffer z (length n) using the tokenizer
166 ** and other information (column names etc.) in pParse. Create an Fts3Expr
167 ** structure of type FTSQUERY_PHRASE containing a phrase consisting of this
168 ** single token and set *ppExpr to point to it. If the end of the buffer is
169 ** reached before a token is found, set *ppExpr to zero. It is the
170 ** responsibility of the caller to eventually deallocate the allocated
171 ** Fts3Expr structure (if any) by passing it to sqlite3_free().
173 ** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation
176 static int getNextToken(
177 ParseContext
*pParse
, /* fts3 query parse context */
178 int iCol
, /* Value for Fts3Phrase.iColumn */
179 const char *z
, int n
, /* Input string */
180 Fts3Expr
**ppExpr
, /* OUT: expression */
181 int *pnConsumed
/* OUT: Number of bytes consumed */
183 sqlite3_tokenizer
*pTokenizer
= pParse
->pTokenizer
;
184 sqlite3_tokenizer_module
const *pModule
= pTokenizer
->pModule
;
186 sqlite3_tokenizer_cursor
*pCursor
;
190 /* Set variable i to the maximum number of bytes of input to tokenize. */
192 if( sqlite3_fts3_enable_parentheses
&& (z
[i
]=='(' || z
[i
]==')') ) break;
193 if( z
[i
]=='"' ) break;
197 rc
= sqlite3Fts3OpenTokenizer(pTokenizer
, pParse
->iLangid
, z
, i
, &pCursor
);
200 int nToken
= 0, iStart
= 0, iEnd
= 0, iPosition
= 0;
201 int nByte
; /* total space to allocate */
203 rc
= pModule
->xNext(pCursor
, &zToken
, &nToken
, &iStart
, &iEnd
, &iPosition
);
205 nByte
= sizeof(Fts3Expr
) + sizeof(Fts3Phrase
) + nToken
;
206 pRet
= (Fts3Expr
*)fts3MallocZero(nByte
);
210 pRet
->eType
= FTSQUERY_PHRASE
;
211 pRet
->pPhrase
= (Fts3Phrase
*)&pRet
[1];
212 pRet
->pPhrase
->nToken
= 1;
213 pRet
->pPhrase
->iColumn
= iCol
;
214 pRet
->pPhrase
->aToken
[0].n
= nToken
;
215 pRet
->pPhrase
->aToken
[0].z
= (char *)&pRet
->pPhrase
[1];
216 memcpy(pRet
->pPhrase
->aToken
[0].z
, zToken
, nToken
);
218 if( iEnd
<n
&& z
[iEnd
]=='*' ){
219 pRet
->pPhrase
->aToken
[0].isPrefix
= 1;
224 if( !sqlite3_fts3_enable_parentheses
225 && iStart
>0 && z
[iStart
-1]=='-'
229 }else if( pParse
->bFts4
&& iStart
>0 && z
[iStart
-1]=='^' ){
230 pRet
->pPhrase
->aToken
[0].bFirst
= 1;
239 }else if( i
&& rc
==SQLITE_DONE
){
243 pModule
->xClose(pCursor
);
252 ** Enlarge a memory allocation. If an out-of-memory allocation occurs,
253 ** then free the old allocation.
255 static void *fts3ReallocOrFree(void *pOrig
, int nNew
){
256 void *pRet
= sqlite3_realloc(pOrig
, nNew
);
264 ** Buffer zInput, length nInput, contains the contents of a quoted string
265 ** that appeared as part of an fts3 query expression. Neither quote character
266 ** is included in the buffer. This function attempts to tokenize the entire
267 ** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE
268 ** containing the results.
270 ** If successful, SQLITE_OK is returned and *ppExpr set to point at the
271 ** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory
272 ** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set
275 static int getNextString(
276 ParseContext
*pParse
, /* fts3 query parse context */
277 const char *zInput
, int nInput
, /* Input string */
278 Fts3Expr
**ppExpr
/* OUT: expression */
280 sqlite3_tokenizer
*pTokenizer
= pParse
->pTokenizer
;
281 sqlite3_tokenizer_module
const *pModule
= pTokenizer
->pModule
;
284 sqlite3_tokenizer_cursor
*pCursor
= 0;
288 const int nSpace
= sizeof(Fts3Expr
) + sizeof(Fts3Phrase
);
291 /* The final Fts3Expr data structure, including the Fts3Phrase,
292 ** Fts3PhraseToken structures token buffers are all stored as a single
293 ** allocation so that the expression can be freed with a single call to
294 ** sqlite3_free(). Setting this up requires a two pass approach.
296 ** The first pass, in the block below, uses a tokenizer cursor to iterate
297 ** through the tokens in the expression. This pass uses fts3ReallocOrFree()
298 ** to assemble data in two dynamic buffers:
300 ** Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase
301 ** structure, followed by the array of Fts3PhraseToken
302 ** structures. This pass only populates the Fts3PhraseToken array.
304 ** Buffer zTemp: Contains copies of all tokens.
306 ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below,
307 ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase
310 rc
= sqlite3Fts3OpenTokenizer(
311 pTokenizer
, pParse
->iLangid
, zInput
, nInput
, &pCursor
);
314 for(ii
=0; rc
==SQLITE_OK
; ii
++){
316 int nByte
= 0, iBegin
= 0, iEnd
= 0, iPos
= 0;
317 rc
= pModule
->xNext(pCursor
, &zByte
, &nByte
, &iBegin
, &iEnd
, &iPos
);
319 Fts3PhraseToken
*pToken
;
321 p
= fts3ReallocOrFree(p
, nSpace
+ ii
*sizeof(Fts3PhraseToken
));
322 if( !p
) goto no_mem
;
324 zTemp
= fts3ReallocOrFree(zTemp
, nTemp
+ nByte
);
325 if( !zTemp
) goto no_mem
;
327 assert( nToken
==ii
);
328 pToken
= &((Fts3Phrase
*)(&p
[1]))->aToken
[ii
];
329 memset(pToken
, 0, sizeof(Fts3PhraseToken
));
331 memcpy(&zTemp
[nTemp
], zByte
, nByte
);
335 pToken
->isPrefix
= (iEnd
<nInput
&& zInput
[iEnd
]=='*');
336 pToken
->bFirst
= (iBegin
>0 && zInput
[iBegin
-1]=='^');
341 pModule
->xClose(pCursor
);
345 if( rc
==SQLITE_DONE
){
349 p
= fts3ReallocOrFree(p
, nSpace
+ nToken
*sizeof(Fts3PhraseToken
) + nTemp
);
350 if( !p
) goto no_mem
;
351 memset(p
, 0, (char *)&(((Fts3Phrase
*)&p
[1])->aToken
[0])-(char *)p
);
352 p
->eType
= FTSQUERY_PHRASE
;
353 p
->pPhrase
= (Fts3Phrase
*)&p
[1];
354 p
->pPhrase
->iColumn
= pParse
->iDefaultCol
;
355 p
->pPhrase
->nToken
= nToken
;
357 zBuf
= (char *)&p
->pPhrase
->aToken
[nToken
];
359 memcpy(zBuf
, zTemp
, nTemp
);
365 for(jj
=0; jj
<p
->pPhrase
->nToken
; jj
++){
366 p
->pPhrase
->aToken
[jj
].z
= zBuf
;
367 zBuf
+= p
->pPhrase
->aToken
[jj
].n
;
377 pModule
->xClose(pCursor
);
386 ** The output variable *ppExpr is populated with an allocated Fts3Expr
387 ** structure, or set to 0 if the end of the input buffer is reached.
389 ** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM
390 ** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered.
391 ** If SQLITE_ERROR is returned, pContext is populated with an error message.
393 static int getNextNode(
394 ParseContext
*pParse
, /* fts3 query parse context */
395 const char *z
, int n
, /* Input string */
396 Fts3Expr
**ppExpr
, /* OUT: expression */
397 int *pnConsumed
/* OUT: Number of bytes consumed */
399 static const struct Fts3Keyword
{
400 char *z
; /* Keyword text */
401 unsigned char n
; /* Length of the keyword */
402 unsigned char parenOnly
; /* Only valid in paren mode */
403 unsigned char eType
; /* Keyword code */
405 { "OR" , 2, 0, FTSQUERY_OR
},
406 { "AND", 3, 1, FTSQUERY_AND
},
407 { "NOT", 3, 1, FTSQUERY_NOT
},
408 { "NEAR", 4, 0, FTSQUERY_NEAR
}
416 const char *zInput
= z
;
421 /* Skip over any whitespace before checking for a keyword, an open or
422 ** close bracket, or a quoted string.
424 while( nInput
>0 && fts3isspace(*zInput
) ){
432 /* See if we are dealing with a keyword. */
433 for(ii
=0; ii
<(int)(sizeof(aKeyword
)/sizeof(struct Fts3Keyword
)); ii
++){
434 const struct Fts3Keyword
*pKey
= &aKeyword
[ii
];
436 if( (pKey
->parenOnly
& ~sqlite3_fts3_enable_parentheses
)!=0 ){
440 if( nInput
>=pKey
->n
&& 0==memcmp(zInput
, pKey
->z
, pKey
->n
) ){
441 int nNear
= SQLITE_FTS3_DEFAULT_NEAR_PARAM
;
445 /* If this is a "NEAR" keyword, check for an explicit nearness. */
446 if( pKey
->eType
==FTSQUERY_NEAR
){
448 if( zInput
[4]=='/' && zInput
[5]>='0' && zInput
[5]<='9' ){
450 for(nKey
=5; zInput
[nKey
]>='0' && zInput
[nKey
]<='9'; nKey
++){
451 nNear
= nNear
* 10 + (zInput
[nKey
] - '0');
456 /* At this point this is probably a keyword. But for that to be true,
457 ** the next byte must contain either whitespace, an open or close
458 ** parenthesis, a quote character, or EOF.
460 cNext
= zInput
[nKey
];
461 if( fts3isspace(cNext
)
462 || cNext
=='"' || cNext
=='(' || cNext
==')' || cNext
==0
464 pRet
= (Fts3Expr
*)fts3MallocZero(sizeof(Fts3Expr
));
468 pRet
->eType
= pKey
->eType
;
471 *pnConsumed
= (int)((zInput
- z
) + nKey
);
475 /* Turns out that wasn't a keyword after all. This happens if the
476 ** user has supplied a token such as "ORacle". Continue.
481 /* See if we are dealing with a quoted phrase. If this is the case, then
482 ** search for the closing quote and pass the whole string to getNextString()
483 ** for processing. This is easy to do, as fts3 has no syntax for escaping
484 ** a quote character embedded in a string.
487 for(ii
=1; ii
<nInput
&& zInput
[ii
]!='"'; ii
++);
488 *pnConsumed
= (int)((zInput
- z
) + ii
+ 1);
492 return getNextString(pParse
, &zInput
[1], ii
-1, ppExpr
);
495 if( sqlite3_fts3_enable_parentheses
){
499 rc
= fts3ExprParse(pParse
, zInput
+1, nInput
-1, ppExpr
, &nConsumed
);
500 if( rc
==SQLITE_OK
&& !*ppExpr
){ rc
= SQLITE_DONE
; }
501 *pnConsumed
= (int)(zInput
- z
) + 1 + nConsumed
;
503 }else if( *zInput
==')' ){
505 *pnConsumed
= (int)((zInput
- z
) + 1);
511 /* If control flows to this point, this must be a regular token, or
512 ** the end of the input. Read a regular token using the sqlite3_tokenizer
513 ** interface. Before doing so, figure out if there is an explicit
514 ** column specifier for the token.
516 ** TODO: Strangely, it is not possible to associate a column specifier
517 ** with a quoted phrase, only with a single token. Not sure if this was
518 ** an implementation artifact or an intentional decision when fts3 was
519 ** first implemented. Whichever it was, this module duplicates the
522 iCol
= pParse
->iDefaultCol
;
524 for(ii
=0; ii
<pParse
->nCol
; ii
++){
525 const char *zStr
= pParse
->azCol
[ii
];
526 int nStr
= (int)strlen(zStr
);
527 if( nInput
>nStr
&& zInput
[nStr
]==':'
528 && sqlite3_strnicmp(zStr
, zInput
, nStr
)==0
531 iColLen
= (int)((zInput
- z
) + nStr
+ 1);
535 rc
= getNextToken(pParse
, iCol
, &z
[iColLen
], n
-iColLen
, ppExpr
, pnConsumed
);
536 *pnConsumed
+= iColLen
;
541 ** The argument is an Fts3Expr structure for a binary operator (any type
542 ** except an FTSQUERY_PHRASE). Return an integer value representing the
543 ** precedence of the operator. Lower values have a higher precedence (i.e.
544 ** group more tightly). For example, in the C language, the == operator
545 ** groups more tightly than ||, and would therefore have a higher precedence.
547 ** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS
548 ** is defined), the order of the operators in precedence from highest to
553 ** AND (including implicit ANDs)
556 ** Note that when using the old query syntax, the OR operator has a higher
557 ** precedence than the AND operator.
559 static int opPrecedence(Fts3Expr
*p
){
560 assert( p
->eType
!=FTSQUERY_PHRASE
);
561 if( sqlite3_fts3_enable_parentheses
){
563 }else if( p
->eType
==FTSQUERY_NEAR
){
565 }else if( p
->eType
==FTSQUERY_OR
){
568 assert( p
->eType
==FTSQUERY_AND
);
573 ** Argument ppHead contains a pointer to the current head of a query
574 ** expression tree being parsed. pPrev is the expression node most recently
575 ** inserted into the tree. This function adds pNew, which is always a binary
576 ** operator node, into the expression tree based on the relative precedence
577 ** of pNew and the existing nodes of the tree. This may result in the head
578 ** of the tree changing, in which case *ppHead is set to the new root node.
580 static void insertBinaryOperator(
581 Fts3Expr
**ppHead
, /* Pointer to the root node of a tree */
582 Fts3Expr
*pPrev
, /* Node most recently inserted into the tree */
583 Fts3Expr
*pNew
/* New binary node to insert into expression tree */
585 Fts3Expr
*pSplit
= pPrev
;
586 while( pSplit
->pParent
&& opPrecedence(pSplit
->pParent
)<=opPrecedence(pNew
) ){
587 pSplit
= pSplit
->pParent
;
590 if( pSplit
->pParent
){
591 assert( pSplit
->pParent
->pRight
==pSplit
);
592 pSplit
->pParent
->pRight
= pNew
;
593 pNew
->pParent
= pSplit
->pParent
;
597 pNew
->pLeft
= pSplit
;
598 pSplit
->pParent
= pNew
;
602 ** Parse the fts3 query expression found in buffer z, length n. This function
603 ** returns either when the end of the buffer is reached or an unmatched
604 ** closing bracket - ')' - is encountered.
606 ** If successful, SQLITE_OK is returned, *ppExpr is set to point to the
607 ** parsed form of the expression and *pnConsumed is set to the number of
608 ** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM
609 ** (out of memory error) or SQLITE_ERROR (parse error) is returned.
611 static int fts3ExprParse(
612 ParseContext
*pParse
, /* fts3 query parse context */
613 const char *z
, int n
, /* Text of MATCH query */
614 Fts3Expr
**ppExpr
, /* OUT: Parsed query structure */
615 int *pnConsumed
/* OUT: Number of bytes consumed */
619 Fts3Expr
*pNotBranch
= 0; /* Only used in legacy parse mode */
623 int isRequirePhrase
= 1;
625 while( rc
==SQLITE_OK
){
629 rc
= getNextNode(pParse
, zIn
, nIn
, &p
, &nByte
);
630 assert( nByte
>0 || (rc
!=SQLITE_OK
&& p
==0) );
635 if( !sqlite3_fts3_enable_parentheses
636 && p
->eType
==FTSQUERY_PHRASE
&& pParse
->isNot
638 /* Create an implicit NOT operator. */
639 Fts3Expr
*pNot
= fts3MallocZero(sizeof(Fts3Expr
));
641 sqlite3Fts3ExprFree(p
);
645 pNot
->eType
= FTSQUERY_NOT
;
649 pNot
->pLeft
= pNotBranch
;
650 pNotBranch
->pParent
= pNot
;
655 int eType
= p
->eType
;
656 isPhrase
= (eType
==FTSQUERY_PHRASE
|| p
->pLeft
);
658 /* The isRequirePhrase variable is set to true if a phrase or
659 ** an expression contained in parenthesis is required. If a
660 ** binary operator (AND, OR, NOT or NEAR) is encounted when
661 ** isRequirePhrase is set, this is a syntax error.
663 if( !isPhrase
&& isRequirePhrase
){
664 sqlite3Fts3ExprFree(p
);
669 if( isPhrase
&& !isRequirePhrase
){
670 /* Insert an implicit AND operator. */
672 assert( pRet
&& pPrev
);
673 pAnd
= fts3MallocZero(sizeof(Fts3Expr
));
675 sqlite3Fts3ExprFree(p
);
679 pAnd
->eType
= FTSQUERY_AND
;
680 insertBinaryOperator(&pRet
, pPrev
, pAnd
);
684 /* This test catches attempts to make either operand of a NEAR
685 ** operator something other than a phrase. For example, either of
688 ** (bracketed expression) NEAR phrase
689 ** phrase NEAR (bracketed expression)
691 ** Return an error in either case.
694 (eType
==FTSQUERY_NEAR
&& !isPhrase
&& pPrev
->eType
!=FTSQUERY_PHRASE
)
695 || (eType
!=FTSQUERY_PHRASE
&& isPhrase
&& pPrev
->eType
==FTSQUERY_NEAR
)
697 sqlite3Fts3ExprFree(p
);
704 assert( pPrev
&& pPrev
->pLeft
&& pPrev
->pRight
==0 );
711 insertBinaryOperator(&pRet
, pPrev
, p
);
713 isRequirePhrase
= !isPhrase
;
719 assert( rc
!=SQLITE_OK
|| (nByte
>0 && nByte
<=nIn
) );
724 if( rc
==SQLITE_DONE
&& pRet
&& isRequirePhrase
){
728 if( rc
==SQLITE_DONE
){
730 if( !sqlite3_fts3_enable_parentheses
&& pNotBranch
){
734 Fts3Expr
*pIter
= pNotBranch
;
735 while( pIter
->pLeft
){
736 pIter
= pIter
->pLeft
;
739 pRet
->pParent
= pIter
;
744 *pnConsumed
= n
- nIn
;
748 sqlite3Fts3ExprFree(pRet
);
749 sqlite3Fts3ExprFree(pNotBranch
);
757 ** Return SQLITE_ERROR if the maximum depth of the expression tree passed
758 ** as the only argument is more than nMaxDepth.
760 static int fts3ExprCheckDepth(Fts3Expr
*p
, int nMaxDepth
){
766 rc
= fts3ExprCheckDepth(p
->pLeft
, nMaxDepth
-1);
768 rc
= fts3ExprCheckDepth(p
->pRight
, nMaxDepth
-1);
776 ** This function attempts to transform the expression tree at (*pp) to
777 ** an equivalent but more balanced form. The tree is modified in place.
778 ** If successful, SQLITE_OK is returned and (*pp) set to point to the
779 ** new root expression node.
781 ** nMaxDepth is the maximum allowable depth of the balanced sub-tree.
783 ** Otherwise, if an error occurs, an SQLite error code is returned and
784 ** expression (*pp) freed.
786 static int fts3ExprBalance(Fts3Expr
**pp
, int nMaxDepth
){
787 int rc
= SQLITE_OK
; /* Return code */
788 Fts3Expr
*pRoot
= *pp
; /* Initial root node */
789 Fts3Expr
*pFree
= 0; /* List of free nodes. Linked by pParent. */
790 int eType
= pRoot
->eType
; /* Type of node in this tree */
796 if( rc
==SQLITE_OK
&& (eType
==FTSQUERY_AND
|| eType
==FTSQUERY_OR
) ){
798 apLeaf
= (Fts3Expr
**)sqlite3_malloc(sizeof(Fts3Expr
*) * nMaxDepth
);
802 memset(apLeaf
, 0, sizeof(Fts3Expr
*) * nMaxDepth
);
809 /* Set $p to point to the left-most leaf in the tree of eType nodes. */
810 for(p
=pRoot
; p
->eType
==eType
; p
=p
->pLeft
){
811 assert( p
->pParent
==0 || p
->pParent
->pLeft
==p
);
812 assert( p
->pLeft
&& p
->pRight
);
815 /* This loop runs once for each leaf in the tree of eType nodes. */
818 Fts3Expr
*pParent
= p
->pParent
; /* Current parent of p */
820 assert( pParent
==0 || pParent
->pLeft
==p
);
827 rc
= fts3ExprBalance(&p
, nMaxDepth
-1);
828 if( rc
!=SQLITE_OK
) break;
830 for(iLvl
=0; p
&& iLvl
<nMaxDepth
; iLvl
++){
831 if( apLeaf
[iLvl
]==0 ){
836 pFree
->pLeft
= apLeaf
[iLvl
];
838 pFree
->pLeft
->pParent
= pFree
;
839 pFree
->pRight
->pParent
= pFree
;
842 pFree
= pFree
->pParent
;
848 sqlite3Fts3ExprFree(p
);
853 /* If that was the last leaf node, break out of the loop */
854 if( pParent
==0 ) break;
856 /* Set $p to point to the next leaf in the tree of eType nodes */
857 for(p
=pParent
->pRight
; p
->eType
==eType
; p
=p
->pLeft
);
859 /* Remove pParent from the original tree. */
860 assert( pParent
->pParent
==0 || pParent
->pParent
->pLeft
==pParent
);
861 pParent
->pRight
->pParent
= pParent
->pParent
;
862 if( pParent
->pParent
){
863 pParent
->pParent
->pLeft
= pParent
->pRight
;
865 assert( pParent
==pRoot
);
866 pRoot
= pParent
->pRight
;
869 /* Link pParent into the free node list. It will be used as an
870 ** internal node of the new tree. */
871 pParent
->pParent
= pFree
;
877 for(i
=0; i
<nMaxDepth
; i
++){
885 pFree
->pLeft
= apLeaf
[i
];
886 pFree
->pLeft
->pParent
= pFree
;
887 pFree
->pRight
->pParent
= pFree
;
890 pFree
= pFree
->pParent
;
897 /* An error occurred. Delete the contents of the apLeaf[] array
898 ** and pFree list. Everything else is cleaned up by the call to
899 ** sqlite3Fts3ExprFree(pRoot) below. */
901 for(i
=0; i
<nMaxDepth
; i
++){
902 sqlite3Fts3ExprFree(apLeaf
[i
]);
904 while( (pDel
=pFree
)!=0 ){
905 pFree
= pDel
->pParent
;
911 sqlite3_free( apLeaf
);
916 sqlite3Fts3ExprFree(pRoot
);
924 ** This function is similar to sqlite3Fts3ExprParse(), with the following
927 ** 1. It does not do expression rebalancing.
928 ** 2. It does not check that the expression does not exceed the
929 ** maximum allowable depth.
930 ** 3. Even if it fails, *ppExpr may still be set to point to an
931 ** expression tree. It should be deleted using sqlite3Fts3ExprFree()
934 static int fts3ExprParseUnbalanced(
935 sqlite3_tokenizer
*pTokenizer
, /* Tokenizer module */
936 int iLangid
, /* Language id for tokenizer */
937 char **azCol
, /* Array of column names for fts3 table */
938 int bFts4
, /* True to allow FTS4-only syntax */
939 int nCol
, /* Number of entries in azCol[] */
940 int iDefaultCol
, /* Default column to query */
941 const char *z
, int n
, /* Text of MATCH query */
942 Fts3Expr
**ppExpr
/* OUT: Parsed query structure */
948 memset(&sParse
, 0, sizeof(ParseContext
));
949 sParse
.pTokenizer
= pTokenizer
;
950 sParse
.iLangid
= iLangid
;
951 sParse
.azCol
= (const char **)azCol
;
953 sParse
.iDefaultCol
= iDefaultCol
;
954 sParse
.bFts4
= bFts4
;
962 rc
= fts3ExprParse(&sParse
, z
, n
, ppExpr
, &nParsed
);
963 assert( rc
==SQLITE_OK
|| *ppExpr
==0 );
965 /* Check for mismatched parenthesis */
966 if( rc
==SQLITE_OK
&& sParse
.nNest
){
974 ** Parameters z and n contain a pointer to and length of a buffer containing
975 ** an fts3 query expression, respectively. This function attempts to parse the
976 ** query expression and create a tree of Fts3Expr structures representing the
977 ** parsed expression. If successful, *ppExpr is set to point to the head
978 ** of the parsed expression tree and SQLITE_OK is returned. If an error
979 ** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse
980 ** error) is returned and *ppExpr is set to 0.
982 ** If parameter n is a negative number, then z is assumed to point to a
983 ** nul-terminated string and the length is determined using strlen().
985 ** The first parameter, pTokenizer, is passed the fts3 tokenizer module to
986 ** use to normalize query tokens while parsing the expression. The azCol[]
987 ** array, which is assumed to contain nCol entries, should contain the names
988 ** of each column in the target fts3 table, in order from left to right.
989 ** Column names must be nul-terminated strings.
991 ** The iDefaultCol parameter should be passed the index of the table column
992 ** that appears on the left-hand-side of the MATCH operator (the default
993 ** column to match against for tokens for which a column name is not explicitly
994 ** specified as part of the query string), or -1 if tokens may by default
995 ** match any table column.
997 int sqlite3Fts3ExprParse(
998 sqlite3_tokenizer
*pTokenizer
, /* Tokenizer module */
999 int iLangid
, /* Language id for tokenizer */
1000 char **azCol
, /* Array of column names for fts3 table */
1001 int bFts4
, /* True to allow FTS4-only syntax */
1002 int nCol
, /* Number of entries in azCol[] */
1003 int iDefaultCol
, /* Default column to query */
1004 const char *z
, int n
, /* Text of MATCH query */
1005 Fts3Expr
**ppExpr
, /* OUT: Parsed query structure */
1006 char **pzErr
/* OUT: Error message (sqlite3_malloc) */
1008 int rc
= fts3ExprParseUnbalanced(
1009 pTokenizer
, iLangid
, azCol
, bFts4
, nCol
, iDefaultCol
, z
, n
, ppExpr
1012 /* Rebalance the expression. And check that its depth does not exceed
1013 ** SQLITE_FTS3_MAX_EXPR_DEPTH. */
1014 if( rc
==SQLITE_OK
&& *ppExpr
){
1015 rc
= fts3ExprBalance(ppExpr
, SQLITE_FTS3_MAX_EXPR_DEPTH
);
1016 if( rc
==SQLITE_OK
){
1017 rc
= fts3ExprCheckDepth(*ppExpr
, SQLITE_FTS3_MAX_EXPR_DEPTH
);
1021 if( rc
!=SQLITE_OK
){
1022 sqlite3Fts3ExprFree(*ppExpr
);
1024 if( rc
==SQLITE_TOOBIG
){
1025 *pzErr
= sqlite3_mprintf(
1026 "FTS expression tree is too large (maximum depth %d)",
1027 SQLITE_FTS3_MAX_EXPR_DEPTH
1030 }else if( rc
==SQLITE_ERROR
){
1031 *pzErr
= sqlite3_mprintf("malformed MATCH expression: [%s]", z
);
1039 ** Free a single node of an expression tree.
1041 static void fts3FreeExprNode(Fts3Expr
*p
){
1042 assert( p
->eType
==FTSQUERY_PHRASE
|| p
->pPhrase
==0 );
1043 sqlite3Fts3EvalPhraseCleanup(p
->pPhrase
);
1044 sqlite3_free(p
->aMI
);
1049 ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
1051 ** This function would be simpler if it recursively called itself. But
1052 ** that would mean passing a sufficiently large expression to ExprParse()
1053 ** could cause a stack overflow.
1055 void sqlite3Fts3ExprFree(Fts3Expr
*pDel
){
1057 assert( pDel
==0 || pDel
->pParent
==0 );
1058 for(p
=pDel
; p
&& (p
->pLeft
||p
->pRight
); p
=(p
->pLeft
? p
->pLeft
: p
->pRight
)){
1059 assert( p
->pParent
==0 || p
==p
->pParent
->pRight
|| p
==p
->pParent
->pLeft
);
1062 Fts3Expr
*pParent
= p
->pParent
;
1063 fts3FreeExprNode(p
);
1064 if( pParent
&& p
==pParent
->pLeft
&& pParent
->pRight
){
1065 p
= pParent
->pRight
;
1066 while( p
&& (p
->pLeft
|| p
->pRight
) ){
1067 assert( p
==p
->pParent
->pRight
|| p
==p
->pParent
->pLeft
);
1068 p
= (p
->pLeft
? p
->pLeft
: p
->pRight
);
1076 /****************************************************************************
1077 *****************************************************************************
1078 ** Everything after this point is just test code.
1086 ** Function to query the hash-table of tokenizers (see README.tokenizers).
1088 static int queryTestTokenizer(
1091 const sqlite3_tokenizer_module
**pp
1094 sqlite3_stmt
*pStmt
;
1095 const char zSql
[] = "SELECT fts3_tokenizer(?)";
1098 rc
= sqlite3_prepare_v2(db
, zSql
, -1, &pStmt
, 0);
1099 if( rc
!=SQLITE_OK
){
1103 sqlite3_bind_text(pStmt
, 1, zName
, -1, SQLITE_STATIC
);
1104 if( SQLITE_ROW
==sqlite3_step(pStmt
) ){
1105 if( sqlite3_column_type(pStmt
, 0)==SQLITE_BLOB
){
1106 memcpy((void *)pp
, sqlite3_column_blob(pStmt
, 0), sizeof(*pp
));
1110 return sqlite3_finalize(pStmt
);
1114 ** Return a pointer to a buffer containing a text representation of the
1115 ** expression passed as the first argument. The buffer is obtained from
1116 ** sqlite3_malloc(). It is the responsibility of the caller to use
1117 ** sqlite3_free() to release the memory. If an OOM condition is encountered,
1118 ** NULL is returned.
1120 ** If the second argument is not NULL, then its contents are prepended to
1121 ** the returned expression text and then freed using sqlite3_free().
1123 static char *exprToString(Fts3Expr
*pExpr
, char *zBuf
){
1125 return sqlite3_mprintf("");
1127 switch( pExpr
->eType
){
1128 case FTSQUERY_PHRASE
: {
1129 Fts3Phrase
*pPhrase
= pExpr
->pPhrase
;
1131 zBuf
= sqlite3_mprintf(
1132 "%zPHRASE %d 0", zBuf
, pPhrase
->iColumn
);
1133 for(i
=0; zBuf
&& i
<pPhrase
->nToken
; i
++){
1134 zBuf
= sqlite3_mprintf("%z %.*s%s", zBuf
,
1135 pPhrase
->aToken
[i
].n
, pPhrase
->aToken
[i
].z
,
1136 (pPhrase
->aToken
[i
].isPrefix
?"+":"")
1143 zBuf
= sqlite3_mprintf("%zNEAR/%d ", zBuf
, pExpr
->nNear
);
1146 zBuf
= sqlite3_mprintf("%zNOT ", zBuf
);
1149 zBuf
= sqlite3_mprintf("%zAND ", zBuf
);
1152 zBuf
= sqlite3_mprintf("%zOR ", zBuf
);
1156 if( zBuf
) zBuf
= sqlite3_mprintf("%z{", zBuf
);
1157 if( zBuf
) zBuf
= exprToString(pExpr
->pLeft
, zBuf
);
1158 if( zBuf
) zBuf
= sqlite3_mprintf("%z} {", zBuf
);
1160 if( zBuf
) zBuf
= exprToString(pExpr
->pRight
, zBuf
);
1161 if( zBuf
) zBuf
= sqlite3_mprintf("%z}", zBuf
);
1167 ** This is the implementation of a scalar SQL function used to test the
1168 ** expression parser. It should be called as follows:
1170 ** fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...);
1172 ** The first argument, <tokenizer>, is the name of the fts3 tokenizer used
1173 ** to parse the query expression (see README.tokenizers). The second argument
1174 ** is the query expression to parse. Each subsequent argument is the name
1175 ** of a column of the fts3 table that the query expression may refer to.
1178 ** SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2');
1180 static void fts3ExprTest(
1181 sqlite3_context
*context
,
1183 sqlite3_value
**argv
1185 sqlite3_tokenizer_module
const *pModule
= 0;
1186 sqlite3_tokenizer
*pTokenizer
= 0;
1195 sqlite3
*db
= sqlite3_context_db_handle(context
);
1198 sqlite3_result_error(context
,
1199 "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1
1204 rc
= queryTestTokenizer(db
,
1205 (const char *)sqlite3_value_text(argv
[0]), &pModule
);
1206 if( rc
==SQLITE_NOMEM
){
1207 sqlite3_result_error_nomem(context
);
1209 }else if( !pModule
){
1210 sqlite3_result_error(context
, "No such tokenizer module", -1);
1214 rc
= pModule
->xCreate(0, 0, &pTokenizer
);
1215 assert( rc
==SQLITE_NOMEM
|| rc
==SQLITE_OK
);
1216 if( rc
==SQLITE_NOMEM
){
1217 sqlite3_result_error_nomem(context
);
1220 pTokenizer
->pModule
= pModule
;
1222 zExpr
= (const char *)sqlite3_value_text(argv
[1]);
1223 nExpr
= sqlite3_value_bytes(argv
[1]);
1225 azCol
= (char **)sqlite3_malloc(nCol
*sizeof(char *));
1227 sqlite3_result_error_nomem(context
);
1230 for(ii
=0; ii
<nCol
; ii
++){
1231 azCol
[ii
] = (char *)sqlite3_value_text(argv
[ii
+2]);
1234 if( sqlite3_user_data(context
) ){
1236 rc
= sqlite3Fts3ExprParse(
1237 pTokenizer
, 0, azCol
, 0, nCol
, nCol
, zExpr
, nExpr
, &pExpr
, &zDummy
1239 assert( rc
==SQLITE_OK
|| pExpr
==0 );
1240 sqlite3_free(zDummy
);
1242 rc
= fts3ExprParseUnbalanced(
1243 pTokenizer
, 0, azCol
, 0, nCol
, nCol
, zExpr
, nExpr
, &pExpr
1247 if( rc
!=SQLITE_OK
&& rc
!=SQLITE_NOMEM
){
1248 sqlite3Fts3ExprFree(pExpr
);
1249 sqlite3_result_error(context
, "Error parsing expression", -1);
1250 }else if( rc
==SQLITE_NOMEM
|| !(zBuf
= exprToString(pExpr
, 0)) ){
1251 sqlite3_result_error_nomem(context
);
1253 sqlite3_result_text(context
, zBuf
, -1, SQLITE_TRANSIENT
);
1257 sqlite3Fts3ExprFree(pExpr
);
1260 if( pModule
&& pTokenizer
){
1261 rc
= pModule
->xDestroy(pTokenizer
);
1263 sqlite3_free(azCol
);
1267 ** Register the query expression parser test function fts3_exprtest()
1268 ** with database connection db.
1270 int sqlite3Fts3ExprInitTestInterface(sqlite3
* db
){
1271 int rc
= sqlite3_create_function(
1272 db
, "fts3_exprtest", -1, SQLITE_UTF8
, 0, fts3ExprTest
, 0, 0
1274 if( rc
==SQLITE_OK
){
1275 rc
= sqlite3_create_function(db
, "fts3_exprtest_rebalance",
1276 -1, SQLITE_UTF8
, (void *)1, fts3ExprTest
, 0, 0
1283 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */