4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
15 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
20 #ifndef SQLITE_AMALGAMATION
21 typedef sqlite3_int64 i64
;
25 ** Characters that may appear in the second argument to matchinfo().
27 #define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */
28 #define FTS3_MATCHINFO_NCOL 'c' /* 1 value */
29 #define FTS3_MATCHINFO_NDOC 'n' /* 1 value */
30 #define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */
31 #define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */
32 #define FTS3_MATCHINFO_LCS 's' /* nCol values */
33 #define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */
34 #define FTS3_MATCHINFO_LHITS 'y' /* nCol*nPhrase values */
35 #define FTS3_MATCHINFO_LHITS_BM 'b' /* nCol*nPhrase values */
38 ** The default value for the second argument to matchinfo().
40 #define FTS3_MATCHINFO_DEFAULT "pcx"
44 ** Used as an sqlite3Fts3ExprIterate() context when loading phrase doclists to
45 ** Fts3Expr.aDoclist[]/nDoclist.
47 typedef struct LoadDoclistCtx LoadDoclistCtx
;
48 struct LoadDoclistCtx
{
49 Fts3Cursor
*pCsr
; /* FTS3 Cursor */
50 int nPhrase
; /* Number of phrases seen so far */
51 int nToken
; /* Number of tokens seen so far */
55 ** The following types are used as part of the implementation of the
56 ** fts3BestSnippet() routine.
58 typedef struct SnippetIter SnippetIter
;
59 typedef struct SnippetPhrase SnippetPhrase
;
60 typedef struct SnippetFragment SnippetFragment
;
63 Fts3Cursor
*pCsr
; /* Cursor snippet is being generated from */
64 int iCol
; /* Extract snippet from this column */
65 int nSnippet
; /* Requested snippet length (in tokens) */
66 int nPhrase
; /* Number of phrases in query */
67 SnippetPhrase
*aPhrase
; /* Array of size nPhrase */
68 int iCurrent
; /* First token of current snippet */
71 struct SnippetPhrase
{
72 int nToken
; /* Number of tokens in phrase */
73 char *pList
; /* Pointer to start of phrase position list */
74 i64 iHead
; /* Next value in position list */
75 char *pHead
; /* Position list data following iHead */
76 i64 iTail
; /* Next value in trailing position list */
77 char *pTail
; /* Position list data following iTail */
80 struct SnippetFragment
{
81 int iCol
; /* Column snippet is extracted from */
82 int iPos
; /* Index of first token in snippet */
83 u64 covered
; /* Mask of query phrases covered */
84 u64 hlmask
; /* Mask of snippet terms to highlight */
88 ** This type is used as an sqlite3Fts3ExprIterate() context object while
89 ** accumulating the data returned by the matchinfo() function.
91 typedef struct MatchInfo MatchInfo
;
93 Fts3Cursor
*pCursor
; /* FTS3 Cursor */
94 int nCol
; /* Number of columns in table */
95 int nPhrase
; /* Number of matchable phrases in query */
96 sqlite3_int64 nDoc
; /* Number of docs in database */
98 u32
*aMatchinfo
; /* Pre-allocated buffer */
102 ** An instance of this structure is used to manage a pair of buffers, each
103 ** (nElem * sizeof(u32)) bytes in size. See the MatchinfoBuffer code below
106 struct MatchinfoBuffer
{
109 int bGlobal
; /* Set if global data is loaded */
116 ** The snippet() and offsets() functions both return text values. An instance
117 ** of the following structure is used to accumulate those values while the
118 ** functions are running. See fts3StringAppend() for details.
120 typedef struct StrBuffer StrBuffer
;
122 char *z
; /* Pointer to buffer containing string */
123 int n
; /* Length of z in bytes (excl. nul-term) */
124 int nAlloc
; /* Allocated size of buffer z in bytes */
128 /*************************************************************************
129 ** Start of MatchinfoBuffer code.
133 ** Allocate a two-slot MatchinfoBuffer object.
135 static MatchinfoBuffer
*fts3MIBufferNew(size_t nElem
, const char *zMatchinfo
){
136 MatchinfoBuffer
*pRet
;
137 sqlite3_int64 nByte
= sizeof(u32
) * (2*(sqlite3_int64
)nElem
+ 1)
138 + sizeof(MatchinfoBuffer
);
139 sqlite3_int64 nStr
= strlen(zMatchinfo
);
141 pRet
= sqlite3Fts3MallocZero(nByte
+ nStr
+1);
143 pRet
->aMatchinfo
[0] = (u8
*)(&pRet
->aMatchinfo
[1]) - (u8
*)pRet
;
144 pRet
->aMatchinfo
[1+nElem
] = pRet
->aMatchinfo
[0]
145 + sizeof(u32
)*((int)nElem
+1);
146 pRet
->nElem
= (int)nElem
;
147 pRet
->zMatchinfo
= ((char*)pRet
) + nByte
;
148 memcpy(pRet
->zMatchinfo
, zMatchinfo
, nStr
+1);
155 static void fts3MIBufferFree(void *p
){
156 MatchinfoBuffer
*pBuf
= (MatchinfoBuffer
*)((u8
*)p
- ((u32
*)p
)[-1]);
158 assert( (u32
*)p
==&pBuf
->aMatchinfo
[1]
159 || (u32
*)p
==&pBuf
->aMatchinfo
[pBuf
->nElem
+2]
161 if( (u32
*)p
==&pBuf
->aMatchinfo
[1] ){
167 if( pBuf
->aRef
[0]==0 && pBuf
->aRef
[1]==0 && pBuf
->aRef
[2]==0 ){
172 static void (*fts3MIBufferAlloc(MatchinfoBuffer
*p
, u32
**paOut
))(void*){
173 void (*xRet
)(void*) = 0;
178 aOut
= &p
->aMatchinfo
[1];
179 xRet
= fts3MIBufferFree
;
181 else if( p
->aRef
[2]==0 ){
183 aOut
= &p
->aMatchinfo
[p
->nElem
+2];
184 xRet
= fts3MIBufferFree
;
186 aOut
= (u32
*)sqlite3_malloc64(p
->nElem
* sizeof(u32
));
189 if( p
->bGlobal
) memcpy(aOut
, &p
->aMatchinfo
[1], p
->nElem
*sizeof(u32
));
197 static void fts3MIBufferSetGlobal(MatchinfoBuffer
*p
){
199 memcpy(&p
->aMatchinfo
[2+p
->nElem
], &p
->aMatchinfo
[1], p
->nElem
*sizeof(u32
));
203 ** Free a MatchinfoBuffer object allocated using fts3MIBufferNew()
205 void sqlite3Fts3MIBufferFree(MatchinfoBuffer
*p
){
207 assert( p
->aRef
[0]==1 );
209 if( p
->aRef
[0]==0 && p
->aRef
[1]==0 && p
->aRef
[2]==0 ){
216 ** End of MatchinfoBuffer code.
217 *************************************************************************/
221 ** This function is used to help iterate through a position-list. A position
222 ** list is a list of unique integers, sorted from smallest to largest. Each
223 ** element of the list is represented by an FTS3 varint that takes the value
224 ** of the difference between the current element and the previous one plus
225 ** two. For example, to store the position-list:
229 ** the three varints:
235 ** When this function is called, *pp points to the start of an element of
236 ** the list. *piPos contains the value of the previous entry in the list.
237 ** After it returns, *piPos contains the value of the next element of the
238 ** list and *pp is advanced to the following varint.
240 static void fts3GetDeltaPosition(char **pp
, i64
*piPos
){
242 *pp
+= fts3GetVarint32(*pp
, &iVal
);
247 ** Helper function for sqlite3Fts3ExprIterate() (see below).
249 static int fts3ExprIterate2(
250 Fts3Expr
*pExpr
, /* Expression to iterate phrases of */
251 int *piPhrase
, /* Pointer to phrase counter */
252 int (*x
)(Fts3Expr
*,int,void*), /* Callback function to invoke for phrases */
253 void *pCtx
/* Second argument to pass to callback */
255 int rc
; /* Return code */
256 int eType
= pExpr
->eType
; /* Type of expression node pExpr */
258 if( eType
!=FTSQUERY_PHRASE
){
259 assert( pExpr
->pLeft
&& pExpr
->pRight
);
260 rc
= fts3ExprIterate2(pExpr
->pLeft
, piPhrase
, x
, pCtx
);
261 if( rc
==SQLITE_OK
&& eType
!=FTSQUERY_NOT
){
262 rc
= fts3ExprIterate2(pExpr
->pRight
, piPhrase
, x
, pCtx
);
265 rc
= x(pExpr
, *piPhrase
, pCtx
);
272 ** Iterate through all phrase nodes in an FTS3 query, except those that
273 ** are part of a sub-tree that is the right-hand-side of a NOT operator.
274 ** For each phrase node found, the supplied callback function is invoked.
276 ** If the callback function returns anything other than SQLITE_OK,
277 ** the iteration is abandoned and the error code returned immediately.
278 ** Otherwise, SQLITE_OK is returned after a callback has been made for
279 ** all eligible phrase nodes.
281 int sqlite3Fts3ExprIterate(
282 Fts3Expr
*pExpr
, /* Expression to iterate phrases of */
283 int (*x
)(Fts3Expr
*,int,void*), /* Callback function to invoke for phrases */
284 void *pCtx
/* Second argument to pass to callback */
286 int iPhrase
= 0; /* Variable used as the phrase counter */
287 return fts3ExprIterate2(pExpr
, &iPhrase
, x
, pCtx
);
291 ** This is an sqlite3Fts3ExprIterate() callback used while loading the
292 ** doclists for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
293 ** fts3ExprLoadDoclists().
295 static int fts3ExprLoadDoclistsCb(Fts3Expr
*pExpr
, int iPhrase
, void *ctx
){
297 Fts3Phrase
*pPhrase
= pExpr
->pPhrase
;
298 LoadDoclistCtx
*p
= (LoadDoclistCtx
*)ctx
;
300 UNUSED_PARAMETER(iPhrase
);
303 p
->nToken
+= pPhrase
->nToken
;
309 ** Load the doclists for each phrase in the query associated with FTS3 cursor
312 ** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable
313 ** phrases in the expression (all phrases except those directly or
314 ** indirectly descended from the right-hand-side of a NOT operator). If
315 ** pnToken is not NULL, then it is set to the number of tokens in all
316 ** matchable phrases of the expression.
318 static int fts3ExprLoadDoclists(
319 Fts3Cursor
*pCsr
, /* Fts3 cursor for current query */
320 int *pnPhrase
, /* OUT: Number of phrases in query */
321 int *pnToken
/* OUT: Number of tokens in query */
323 int rc
; /* Return Code */
324 LoadDoclistCtx sCtx
= {0,0,0}; /* Context for sqlite3Fts3ExprIterate() */
326 rc
= sqlite3Fts3ExprIterate(pCsr
->pExpr
,fts3ExprLoadDoclistsCb
,(void*)&sCtx
);
327 if( pnPhrase
) *pnPhrase
= sCtx
.nPhrase
;
328 if( pnToken
) *pnToken
= sCtx
.nToken
;
332 static int fts3ExprPhraseCountCb(Fts3Expr
*pExpr
, int iPhrase
, void *ctx
){
334 pExpr
->iPhrase
= iPhrase
;
337 static int fts3ExprPhraseCount(Fts3Expr
*pExpr
){
339 (void)sqlite3Fts3ExprIterate(pExpr
, fts3ExprPhraseCountCb
, (void *)&nPhrase
);
344 ** Advance the position list iterator specified by the first two
345 ** arguments so that it points to the first element with a value greater
346 ** than or equal to parameter iNext.
348 static void fts3SnippetAdvance(char **ppIter
, i64
*piIter
, int iNext
){
349 char *pIter
= *ppIter
;
353 while( iIter
<iNext
){
354 if( 0==(*pIter
& 0xFE) ){
359 fts3GetDeltaPosition(&pIter
, &iIter
);
368 ** Advance the snippet iterator to the next candidate snippet.
370 static int fts3SnippetNextCandidate(SnippetIter
*pIter
){
371 int i
; /* Loop counter */
373 if( pIter
->iCurrent
<0 ){
374 /* The SnippetIter object has just been initialized. The first snippet
375 ** candidate always starts at offset 0 (even if this candidate has a
380 /* Advance the 'head' iterator of each phrase to the first offset that
381 ** is greater than or equal to (iNext+nSnippet).
383 for(i
=0; i
<pIter
->nPhrase
; i
++){
384 SnippetPhrase
*pPhrase
= &pIter
->aPhrase
[i
];
385 fts3SnippetAdvance(&pPhrase
->pHead
, &pPhrase
->iHead
, pIter
->nSnippet
);
389 int iEnd
= 0x7FFFFFFF;
391 for(i
=0; i
<pIter
->nPhrase
; i
++){
392 SnippetPhrase
*pPhrase
= &pIter
->aPhrase
[i
];
393 if( pPhrase
->pHead
&& pPhrase
->iHead
<iEnd
){
394 iEnd
= pPhrase
->iHead
;
397 if( iEnd
==0x7FFFFFFF ){
401 pIter
->iCurrent
= iStart
= iEnd
- pIter
->nSnippet
+ 1;
402 for(i
=0; i
<pIter
->nPhrase
; i
++){
403 SnippetPhrase
*pPhrase
= &pIter
->aPhrase
[i
];
404 fts3SnippetAdvance(&pPhrase
->pHead
, &pPhrase
->iHead
, iEnd
+1);
405 fts3SnippetAdvance(&pPhrase
->pTail
, &pPhrase
->iTail
, iStart
);
413 ** Retrieve information about the current candidate snippet of snippet
416 static void fts3SnippetDetails(
417 SnippetIter
*pIter
, /* Snippet iterator */
418 u64 mCovered
, /* Bitmask of phrases already covered */
419 int *piToken
, /* OUT: First token of proposed snippet */
420 int *piScore
, /* OUT: "Score" for this snippet */
421 u64
*pmCover
, /* OUT: Bitmask of phrases covered */
422 u64
*pmHighlight
/* OUT: Bitmask of terms to highlight */
424 int iStart
= pIter
->iCurrent
; /* First token of snippet */
425 int iScore
= 0; /* Score of this snippet */
426 int i
; /* Loop counter */
427 u64 mCover
= 0; /* Mask of phrases covered by this snippet */
428 u64 mHighlight
= 0; /* Mask of tokens to highlight in snippet */
430 for(i
=0; i
<pIter
->nPhrase
; i
++){
431 SnippetPhrase
*pPhrase
= &pIter
->aPhrase
[i
];
432 if( pPhrase
->pTail
){
433 char *pCsr
= pPhrase
->pTail
;
434 i64 iCsr
= pPhrase
->iTail
;
436 while( iCsr
<(iStart
+pIter
->nSnippet
) && iCsr
>=iStart
){
438 u64 mPhrase
= (u64
)1 << (i
%64);
439 u64 mPos
= (u64
)1 << (iCsr
- iStart
);
440 assert( iCsr
>=iStart
&& (iCsr
- iStart
)<=64 );
442 if( (mCover
|mCovered
)&mPhrase
){
449 for(j
=0; j
<pPhrase
->nToken
&& j
<pIter
->nSnippet
; j
++){
450 mHighlight
|= (mPos
>>j
);
453 if( 0==(*pCsr
& 0x0FE) ) break;
454 fts3GetDeltaPosition(&pCsr
, &iCsr
);
459 /* Set the output variables before returning. */
463 *pmHighlight
= mHighlight
;
467 ** This function is an sqlite3Fts3ExprIterate() callback used by
468 ** fts3BestSnippet(). Each invocation populates an element of the
469 ** SnippetIter.aPhrase[] array.
471 static int fts3SnippetFindPositions(Fts3Expr
*pExpr
, int iPhrase
, void *ctx
){
472 SnippetIter
*p
= (SnippetIter
*)ctx
;
473 SnippetPhrase
*pPhrase
= &p
->aPhrase
[iPhrase
];
477 pPhrase
->nToken
= pExpr
->pPhrase
->nToken
;
478 rc
= sqlite3Fts3EvalPhrasePoslist(p
->pCsr
, pExpr
, p
->iCol
, &pCsr
);
479 assert( rc
==SQLITE_OK
|| pCsr
==0 );
482 pPhrase
->pList
= pCsr
;
483 fts3GetDeltaPosition(&pCsr
, &iFirst
);
485 rc
= FTS_CORRUPT_VTAB
;
487 pPhrase
->pHead
= pCsr
;
488 pPhrase
->pTail
= pCsr
;
489 pPhrase
->iHead
= iFirst
;
490 pPhrase
->iTail
= iFirst
;
493 assert( rc
!=SQLITE_OK
|| (
494 pPhrase
->pList
==0 && pPhrase
->pHead
==0 && pPhrase
->pTail
==0
502 ** Select the fragment of text consisting of nFragment contiguous tokens
503 ** from column iCol that represent the "best" snippet. The best snippet
504 ** is the snippet with the highest score, where scores are calculated
507 ** (a) +1 point for each occurrence of a matchable phrase in the snippet.
509 ** (b) +1000 points for the first occurrence of each matchable phrase in
510 ** the snippet for which the corresponding mCovered bit is not set.
512 ** The selected snippet parameters are stored in structure *pFragment before
513 ** returning. The score of the selected snippet is stored in *piScore
516 static int fts3BestSnippet(
517 int nSnippet
, /* Desired snippet length */
518 Fts3Cursor
*pCsr
, /* Cursor to create snippet for */
519 int iCol
, /* Index of column to create snippet from */
520 u64 mCovered
, /* Mask of phrases already covered */
521 u64
*pmSeen
, /* IN/OUT: Mask of phrases seen */
522 SnippetFragment
*pFragment
, /* OUT: Best snippet found */
523 int *piScore
/* OUT: Score of snippet pFragment */
525 int rc
; /* Return Code */
526 int nList
; /* Number of phrases in expression */
527 SnippetIter sIter
; /* Iterates through snippet candidates */
528 sqlite3_int64 nByte
; /* Number of bytes of space to allocate */
529 int iBestScore
= -1; /* Best snippet score found so far */
530 int i
; /* Loop counter */
532 memset(&sIter
, 0, sizeof(sIter
));
534 /* Iterate through the phrases in the expression to count them. The same
535 ** callback makes sure the doclists are loaded for each phrase.
537 rc
= fts3ExprLoadDoclists(pCsr
, &nList
, 0);
542 /* Now that it is known how many phrases there are, allocate and zero
543 ** the required space using malloc().
545 nByte
= sizeof(SnippetPhrase
) * nList
;
546 sIter
.aPhrase
= (SnippetPhrase
*)sqlite3Fts3MallocZero(nByte
);
547 if( !sIter
.aPhrase
){
551 /* Initialize the contents of the SnippetIter object. Then iterate through
552 ** the set of phrases in the expression to populate the aPhrase[] array.
556 sIter
.nSnippet
= nSnippet
;
557 sIter
.nPhrase
= nList
;
559 rc
= sqlite3Fts3ExprIterate(
560 pCsr
->pExpr
, fts3SnippetFindPositions
, (void*)&sIter
564 /* Set the *pmSeen output variable. */
565 for(i
=0; i
<nList
; i
++){
566 if( sIter
.aPhrase
[i
].pHead
){
567 *pmSeen
|= (u64
)1 << (i
%64);
571 /* Loop through all candidate snippets. Store the best snippet in
572 ** *pFragment. Store its associated 'score' in iBestScore.
574 pFragment
->iCol
= iCol
;
575 while( !fts3SnippetNextCandidate(&sIter
) ){
580 fts3SnippetDetails(&sIter
, mCovered
, &iPos
, &iScore
, &mCover
,&mHighlite
);
582 if( iScore
>iBestScore
){
583 pFragment
->iPos
= iPos
;
584 pFragment
->hlmask
= mHighlite
;
585 pFragment
->covered
= mCover
;
590 *piScore
= iBestScore
;
592 sqlite3_free(sIter
.aPhrase
);
598 ** Append a string to the string-buffer passed as the first argument.
600 ** If nAppend is negative, then the length of the string zAppend is
601 ** determined using strlen().
603 static int fts3StringAppend(
604 StrBuffer
*pStr
, /* Buffer to append to */
605 const char *zAppend
, /* Pointer to data to append to buffer */
606 int nAppend
/* Size of zAppend in bytes (or -1) */
609 nAppend
= (int)strlen(zAppend
);
612 /* If there is insufficient space allocated at StrBuffer.z, use realloc()
613 ** to grow the buffer until so that it is big enough to accomadate the
616 if( pStr
->n
+nAppend
+1>=pStr
->nAlloc
){
617 sqlite3_int64 nAlloc
= pStr
->nAlloc
+(sqlite3_int64
)nAppend
+100;
618 char *zNew
= sqlite3_realloc64(pStr
->z
, nAlloc
);
623 pStr
->nAlloc
= nAlloc
;
625 assert( pStr
->z
!=0 && (pStr
->nAlloc
>= pStr
->n
+nAppend
+1) );
627 /* Append the data to the string buffer. */
628 memcpy(&pStr
->z
[pStr
->n
], zAppend
, nAppend
);
630 pStr
->z
[pStr
->n
] = '\0';
636 ** The fts3BestSnippet() function often selects snippets that end with a
637 ** query term. That is, the final term of the snippet is always a term
638 ** that requires highlighting. For example, if 'X' is a highlighted term
639 ** and '.' is a non-highlighted term, BestSnippet() may select:
643 ** This function "shifts" the beginning of the snippet forward in the
644 ** document so that there are approximately the same number of
645 ** non-highlighted terms to the right of the final highlighted term as there
646 ** are to the left of the first highlighted term. For example, to this:
650 ** This is done as part of extracting the snippet text, not when selecting
651 ** the snippet. Snippet selection is done based on doclists only, so there
652 ** is no way for fts3BestSnippet() to know whether or not the document
653 ** actually contains terms that follow the final highlighted term.
655 static int fts3SnippetShift(
656 Fts3Table
*pTab
, /* FTS3 table snippet comes from */
657 int iLangid
, /* Language id to use in tokenizing */
658 int nSnippet
, /* Number of tokens desired for snippet */
659 const char *zDoc
, /* Document text to extract snippet from */
660 int nDoc
, /* Size of buffer zDoc in bytes */
661 int *piPos
, /* IN/OUT: First token of snippet */
662 u64
*pHlmask
/* IN/OUT: Mask of tokens to highlight */
664 u64 hlmask
= *pHlmask
; /* Local copy of initial highlight-mask */
667 int nLeft
; /* Tokens to the left of first highlight */
668 int nRight
; /* Tokens to the right of last highlight */
669 int nDesired
; /* Ideal number of tokens to shift forward */
671 for(nLeft
=0; !(hlmask
& ((u64
)1 << nLeft
)); nLeft
++);
672 for(nRight
=0; !(hlmask
& ((u64
)1 << (nSnippet
-1-nRight
))); nRight
++);
673 assert( (nSnippet
-1-nRight
)<=63 && (nSnippet
-1-nRight
)>=0 );
674 nDesired
= (nLeft
-nRight
)/2;
676 /* Ideally, the start of the snippet should be pushed forward in the
677 ** document nDesired tokens. This block checks if there are actually
678 ** nDesired tokens to the right of the snippet. If so, *piPos and
679 ** *pHlMask are updated to shift the snippet nDesired tokens to the
680 ** right. Otherwise, the snippet is shifted by the number of tokens
684 int nShift
; /* Number of tokens to shift snippet by */
685 int iCurrent
= 0; /* Token counter */
686 int rc
; /* Return Code */
687 sqlite3_tokenizer_module
*pMod
;
688 sqlite3_tokenizer_cursor
*pC
;
689 pMod
= (sqlite3_tokenizer_module
*)pTab
->pTokenizer
->pModule
;
691 /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
692 ** or more tokens in zDoc/nDoc.
694 rc
= sqlite3Fts3OpenTokenizer(pTab
->pTokenizer
, iLangid
, zDoc
, nDoc
, &pC
);
698 while( rc
==SQLITE_OK
&& iCurrent
<(nSnippet
+nDesired
) ){
699 const char *ZDUMMY
; int DUMMY1
= 0, DUMMY2
= 0, DUMMY3
= 0;
700 rc
= pMod
->xNext(pC
, &ZDUMMY
, &DUMMY1
, &DUMMY2
, &DUMMY3
, &iCurrent
);
703 if( rc
!=SQLITE_OK
&& rc
!=SQLITE_DONE
){ return rc
; }
705 nShift
= (rc
==SQLITE_DONE
)+iCurrent
-nSnippet
;
706 assert( nShift
<=nDesired
);
709 *pHlmask
= hlmask
>> nShift
;
717 ** Extract the snippet text for fragment pFragment from cursor pCsr and
718 ** append it to string buffer pOut.
720 static int fts3SnippetText(
721 Fts3Cursor
*pCsr
, /* FTS3 Cursor */
722 SnippetFragment
*pFragment
, /* Snippet to extract */
723 int iFragment
, /* Fragment number */
724 int isLast
, /* True for final fragment in snippet */
725 int nSnippet
, /* Number of tokens in extracted snippet */
726 const char *zOpen
, /* String inserted before highlighted term */
727 const char *zClose
, /* String inserted after highlighted term */
728 const char *zEllipsis
, /* String inserted between snippets */
729 StrBuffer
*pOut
/* Write output here */
731 Fts3Table
*pTab
= (Fts3Table
*)pCsr
->base
.pVtab
;
732 int rc
; /* Return code */
733 const char *zDoc
; /* Document text to extract snippet from */
734 int nDoc
; /* Size of zDoc in bytes */
735 int iCurrent
= 0; /* Current token number of document */
736 int iEnd
= 0; /* Byte offset of end of current token */
737 int isShiftDone
= 0; /* True after snippet is shifted */
738 int iPos
= pFragment
->iPos
; /* First token of snippet */
739 u64 hlmask
= pFragment
->hlmask
; /* Highlight-mask for snippet */
740 int iCol
= pFragment
->iCol
+1; /* Query column to extract text from */
741 sqlite3_tokenizer_module
*pMod
; /* Tokenizer module methods object */
742 sqlite3_tokenizer_cursor
*pC
; /* Tokenizer cursor open on zDoc/nDoc */
744 zDoc
= (const char *)sqlite3_column_text(pCsr
->pStmt
, iCol
);
746 if( sqlite3_column_type(pCsr
->pStmt
, iCol
)!=SQLITE_NULL
){
751 nDoc
= sqlite3_column_bytes(pCsr
->pStmt
, iCol
);
753 /* Open a token cursor on the document. */
754 pMod
= (sqlite3_tokenizer_module
*)pTab
->pTokenizer
->pModule
;
755 rc
= sqlite3Fts3OpenTokenizer(pTab
->pTokenizer
, pCsr
->iLangid
, zDoc
,nDoc
,&pC
);
760 while( rc
==SQLITE_OK
){
761 const char *ZDUMMY
; /* Dummy argument used with tokenizer */
762 int DUMMY1
= -1; /* Dummy argument used with tokenizer */
763 int iBegin
= 0; /* Offset in zDoc of start of token */
764 int iFin
= 0; /* Offset in zDoc of end of token */
765 int isHighlight
= 0; /* True for highlighted terms */
767 /* Variable DUMMY1 is initialized to a negative value above. Elsewhere
768 ** in the FTS code the variable that the third argument to xNext points to
769 ** is initialized to zero before the first (*but not necessarily
770 ** subsequent*) call to xNext(). This is done for a particular application
771 ** that needs to know whether or not the tokenizer is being used for
772 ** snippet generation or for some other purpose.
774 ** Extreme care is required when writing code to depend on this
775 ** initialization. It is not a documented part of the tokenizer interface.
776 ** If a tokenizer is used directly by any code outside of FTS, this
777 ** convention might not be respected. */
778 rc
= pMod
->xNext(pC
, &ZDUMMY
, &DUMMY1
, &iBegin
, &iFin
, &iCurrent
);
780 if( rc
==SQLITE_DONE
){
781 /* Special case - the last token of the snippet is also the last token
782 ** of the column. Append any punctuation that occurred between the end
783 ** of the previous token and the end of the document to the output.
784 ** Then break out of the loop. */
785 rc
= fts3StringAppend(pOut
, &zDoc
[iEnd
], -1);
789 if( iCurrent
<iPos
){ continue; }
792 int n
= nDoc
- iBegin
;
793 rc
= fts3SnippetShift(
794 pTab
, pCsr
->iLangid
, nSnippet
, &zDoc
[iBegin
], n
, &iPos
, &hlmask
798 /* Now that the shift has been done, check if the initial "..." are
799 ** required. They are required if (a) this is not the first fragment,
800 ** or (b) this fragment does not begin at position 0 of its column.
803 if( iPos
>0 || iFragment
>0 ){
804 rc
= fts3StringAppend(pOut
, zEllipsis
, -1);
806 rc
= fts3StringAppend(pOut
, zDoc
, iBegin
);
809 if( rc
!=SQLITE_OK
|| iCurrent
<iPos
) continue;
812 if( iCurrent
>=(iPos
+nSnippet
) ){
814 rc
= fts3StringAppend(pOut
, zEllipsis
, -1);
819 /* Set isHighlight to true if this term should be highlighted. */
820 isHighlight
= (hlmask
& ((u64
)1 << (iCurrent
-iPos
)))!=0;
822 if( iCurrent
>iPos
) rc
= fts3StringAppend(pOut
, &zDoc
[iEnd
], iBegin
-iEnd
);
823 if( rc
==SQLITE_OK
&& isHighlight
) rc
= fts3StringAppend(pOut
, zOpen
, -1);
824 if( rc
==SQLITE_OK
) rc
= fts3StringAppend(pOut
, &zDoc
[iBegin
], iFin
-iBegin
);
825 if( rc
==SQLITE_OK
&& isHighlight
) rc
= fts3StringAppend(pOut
, zClose
, -1);
836 ** This function is used to count the entries in a column-list (a
837 ** delta-encoded list of term offsets within a single column of a single
838 ** row). When this function is called, *ppCollist should point to the
839 ** beginning of the first varint in the column-list (the varint that
840 ** contains the position of the first matching term in the column data).
841 ** Before returning, *ppCollist is set to point to the first byte after
842 ** the last varint in the column-list (either the 0x00 signifying the end
843 ** of the position-list, or the 0x01 that precedes the column number of
844 ** the next column in the position-list).
846 ** The number of elements in the column-list is returned.
848 static int fts3ColumnlistCount(char **ppCollist
){
849 char *pEnd
= *ppCollist
;
853 /* A column-list is terminated by either a 0x01 or 0x00. */
854 while( 0xFE & (*pEnd
| c
) ){
864 ** This function gathers 'y' or 'b' data for a single phrase.
866 static int fts3ExprLHits(
867 Fts3Expr
*pExpr
, /* Phrase expression node */
868 MatchInfo
*p
/* Matchinfo context */
870 Fts3Table
*pTab
= (Fts3Table
*)p
->pCursor
->base
.pVtab
;
872 Fts3Phrase
*pPhrase
= pExpr
->pPhrase
;
873 char *pIter
= pPhrase
->doclist
.pList
;
876 assert( p
->flag
==FTS3_MATCHINFO_LHITS_BM
|| p
->flag
==FTS3_MATCHINFO_LHITS
);
877 if( p
->flag
==FTS3_MATCHINFO_LHITS
){
878 iStart
= pExpr
->iPhrase
* p
->nCol
;
880 iStart
= pExpr
->iPhrase
* ((p
->nCol
+ 31) / 32);
883 if( pIter
) while( 1 ){
884 int nHit
= fts3ColumnlistCount(&pIter
);
885 if( (pPhrase
->iColumn
>=pTab
->nColumn
|| pPhrase
->iColumn
==iCol
) ){
886 if( p
->flag
==FTS3_MATCHINFO_LHITS
){
887 p
->aMatchinfo
[iStart
+ iCol
] = (u32
)nHit
;
889 p
->aMatchinfo
[iStart
+ (iCol
+1)/32] |= (1 << (iCol
&0x1F));
892 assert( *pIter
==0x00 || *pIter
==0x01 );
893 if( *pIter
!=0x01 ) break;
895 pIter
+= fts3GetVarint32(pIter
, &iCol
);
896 if( iCol
>=p
->nCol
) return FTS_CORRUPT_VTAB
;
902 ** Gather the results for matchinfo directives 'y' and 'b'.
904 static int fts3ExprLHitGather(
909 assert( (pExpr
->pLeft
==0)==(pExpr
->pRight
==0) );
910 if( pExpr
->bEof
==0 && pExpr
->iDocid
==p
->pCursor
->iPrevId
){
912 rc
= fts3ExprLHitGather(pExpr
->pLeft
, p
);
913 if( rc
==SQLITE_OK
) rc
= fts3ExprLHitGather(pExpr
->pRight
, p
);
915 rc
= fts3ExprLHits(pExpr
, p
);
922 ** sqlite3Fts3ExprIterate() callback used to collect the "global" matchinfo
923 ** stats for a single query.
925 ** sqlite3Fts3ExprIterate() callback to load the 'global' elements of a
926 ** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements
927 ** of the matchinfo array that are constant for all rows returned by the
930 ** Argument pCtx is actually a pointer to a struct of type MatchInfo. This
931 ** function populates Matchinfo.aMatchinfo[] as follows:
933 ** for(iCol=0; iCol<nCol; iCol++){
934 ** aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X;
935 ** aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y;
938 ** where X is the number of matches for phrase iPhrase is column iCol of all
939 ** rows of the table. Y is the number of rows for which column iCol contains
940 ** at least one instance of phrase iPhrase.
942 ** If the phrase pExpr consists entirely of deferred tokens, then all X and
943 ** Y values are set to nDoc, where nDoc is the number of documents in the
944 ** file system. This is done because the full-text index doclist is required
945 ** to calculate these values properly, and the full-text index doclist is
946 ** not available for deferred tokens.
948 static int fts3ExprGlobalHitsCb(
949 Fts3Expr
*pExpr
, /* Phrase expression node */
950 int iPhrase
, /* Phrase number (numbered from zero) */
951 void *pCtx
/* Pointer to MatchInfo structure */
953 MatchInfo
*p
= (MatchInfo
*)pCtx
;
954 return sqlite3Fts3EvalPhraseStats(
955 p
->pCursor
, pExpr
, &p
->aMatchinfo
[3*iPhrase
*p
->nCol
]
960 ** sqlite3Fts3ExprIterate() callback used to collect the "local" part of the
961 ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the
962 ** array that are different for each row returned by the query.
964 static int fts3ExprLocalHitsCb(
965 Fts3Expr
*pExpr
, /* Phrase expression node */
966 int iPhrase
, /* Phrase number */
967 void *pCtx
/* Pointer to MatchInfo structure */
970 MatchInfo
*p
= (MatchInfo
*)pCtx
;
971 int iStart
= iPhrase
* p
->nCol
* 3;
974 for(i
=0; i
<p
->nCol
&& rc
==SQLITE_OK
; i
++){
976 rc
= sqlite3Fts3EvalPhrasePoslist(p
->pCursor
, pExpr
, i
, &pCsr
);
978 p
->aMatchinfo
[iStart
+i
*3] = fts3ColumnlistCount(&pCsr
);
980 p
->aMatchinfo
[iStart
+i
*3] = 0;
987 static int fts3MatchinfoCheck(
992 if( (cArg
==FTS3_MATCHINFO_NPHRASE
)
993 || (cArg
==FTS3_MATCHINFO_NCOL
)
994 || (cArg
==FTS3_MATCHINFO_NDOC
&& pTab
->bFts4
)
995 || (cArg
==FTS3_MATCHINFO_AVGLENGTH
&& pTab
->bFts4
)
996 || (cArg
==FTS3_MATCHINFO_LENGTH
&& pTab
->bHasDocsize
)
997 || (cArg
==FTS3_MATCHINFO_LCS
)
998 || (cArg
==FTS3_MATCHINFO_HITS
)
999 || (cArg
==FTS3_MATCHINFO_LHITS
)
1000 || (cArg
==FTS3_MATCHINFO_LHITS_BM
)
1004 sqlite3Fts3ErrMsg(pzErr
, "unrecognized matchinfo request: %c", cArg
);
1005 return SQLITE_ERROR
;
1008 static size_t fts3MatchinfoSize(MatchInfo
*pInfo
, char cArg
){
1009 size_t nVal
; /* Number of integers output by cArg */
1012 case FTS3_MATCHINFO_NDOC
:
1013 case FTS3_MATCHINFO_NPHRASE
:
1014 case FTS3_MATCHINFO_NCOL
:
1018 case FTS3_MATCHINFO_AVGLENGTH
:
1019 case FTS3_MATCHINFO_LENGTH
:
1020 case FTS3_MATCHINFO_LCS
:
1024 case FTS3_MATCHINFO_LHITS
:
1025 nVal
= pInfo
->nCol
* pInfo
->nPhrase
;
1028 case FTS3_MATCHINFO_LHITS_BM
:
1029 nVal
= pInfo
->nPhrase
* ((pInfo
->nCol
+ 31) / 32);
1033 assert( cArg
==FTS3_MATCHINFO_HITS
);
1034 nVal
= pInfo
->nCol
* pInfo
->nPhrase
* 3;
1041 static int fts3MatchinfoSelectDoctotal(
1043 sqlite3_stmt
**ppStmt
,
1044 sqlite3_int64
*pnDoc
,
1048 sqlite3_stmt
*pStmt
;
1056 int rc
= sqlite3Fts3SelectDoctotal(pTab
, ppStmt
);
1057 if( rc
!=SQLITE_OK
) return rc
;
1060 assert( sqlite3_data_count(pStmt
)==1 );
1062 n
= sqlite3_column_bytes(pStmt
, 0);
1063 a
= sqlite3_column_blob(pStmt
, 0);
1065 return FTS_CORRUPT_VTAB
;
1068 a
+= sqlite3Fts3GetVarintBounded(a
, pEnd
, &nDoc
);
1069 if( nDoc
<=0 || a
>pEnd
){
1070 return FTS_CORRUPT_VTAB
;
1074 if( paLen
) *paLen
= a
;
1075 if( ppEnd
) *ppEnd
= pEnd
;
1080 ** An instance of the following structure is used to store state while
1081 ** iterating through a multi-column position-list corresponding to the
1082 ** hits for a single phrase on a single row in order to calculate the
1083 ** values for a matchinfo() FTS3_MATCHINFO_LCS request.
1085 typedef struct LcsIterator LcsIterator
;
1086 struct LcsIterator
{
1087 Fts3Expr
*pExpr
; /* Pointer to phrase expression */
1088 int iPosOffset
; /* Tokens count up to end of this phrase */
1089 char *pRead
; /* Cursor used to iterate through aDoclist */
1090 int iPos
; /* Current position */
1094 ** If LcsIterator.iCol is set to the following value, the iterator has
1095 ** finished iterating through all offsets for all columns.
1097 #define LCS_ITERATOR_FINISHED 0x7FFFFFFF;
1099 static int fts3MatchinfoLcsCb(
1100 Fts3Expr
*pExpr
, /* Phrase expression node */
1101 int iPhrase
, /* Phrase number (numbered from zero) */
1102 void *pCtx
/* Pointer to MatchInfo structure */
1104 LcsIterator
*aIter
= (LcsIterator
*)pCtx
;
1105 aIter
[iPhrase
].pExpr
= pExpr
;
1110 ** Advance the iterator passed as an argument to the next position. Return
1111 ** 1 if the iterator is at EOF or if it now points to the start of the
1112 ** position list for the next column.
1114 static int fts3LcsIteratorAdvance(LcsIterator
*pIter
){
1116 sqlite3_int64 iRead
;
1119 if( NEVER(pIter
==0) ) return 1;
1120 pRead
= pIter
->pRead
;
1121 pRead
+= sqlite3Fts3GetVarint(pRead
, &iRead
);
1122 if( iRead
==0 || iRead
==1 ){
1126 pIter
->iPos
+= (int)(iRead
-2);
1129 pIter
->pRead
= pRead
;
1134 ** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag.
1136 ** If the call is successful, the longest-common-substring lengths for each
1137 ** column are written into the first nCol elements of the pInfo->aMatchinfo[]
1138 ** array before returning. SQLITE_OK is returned in this case.
1140 ** Otherwise, if an error occurs, an SQLite error code is returned and the
1141 ** data written to the first nCol elements of pInfo->aMatchinfo[] is
1144 static int fts3MatchinfoLcs(Fts3Cursor
*pCsr
, MatchInfo
*pInfo
){
1151 /* Allocate and populate the array of LcsIterator objects. The array
1152 ** contains one element for each matchable phrase in the query.
1154 aIter
= sqlite3Fts3MallocZero(sizeof(LcsIterator
) * pCsr
->nPhrase
);
1155 if( !aIter
) return SQLITE_NOMEM
;
1156 (void)sqlite3Fts3ExprIterate(pCsr
->pExpr
, fts3MatchinfoLcsCb
, (void*)aIter
);
1158 for(i
=0; i
<pInfo
->nPhrase
; i
++){
1159 LcsIterator
*pIter
= &aIter
[i
];
1160 nToken
-= pIter
->pExpr
->pPhrase
->nToken
;
1161 pIter
->iPosOffset
= nToken
;
1164 for(iCol
=0; iCol
<pInfo
->nCol
; iCol
++){
1165 int nLcs
= 0; /* LCS value for this column */
1166 int nLive
= 0; /* Number of iterators in aIter not at EOF */
1168 for(i
=0; i
<pInfo
->nPhrase
; i
++){
1169 LcsIterator
*pIt
= &aIter
[i
];
1170 rc
= sqlite3Fts3EvalPhrasePoslist(pCsr
, pIt
->pExpr
, iCol
, &pIt
->pRead
);
1171 if( rc
!=SQLITE_OK
) goto matchinfo_lcs_out
;
1173 pIt
->iPos
= pIt
->iPosOffset
;
1174 fts3LcsIteratorAdvance(pIt
);
1175 if( pIt
->pRead
==0 ){
1176 rc
= FTS_CORRUPT_VTAB
;
1177 goto matchinfo_lcs_out
;
1184 LcsIterator
*pAdv
= 0; /* The iterator to advance by one position */
1185 int nThisLcs
= 0; /* LCS for the current iterator positions */
1187 for(i
=0; i
<pInfo
->nPhrase
; i
++){
1188 LcsIterator
*pIter
= &aIter
[i
];
1189 if( pIter
->pRead
==0 ){
1190 /* This iterator is already at EOF for this column. */
1193 if( pAdv
==0 || pIter
->iPos
<pAdv
->iPos
){
1196 if( nThisLcs
==0 || pIter
->iPos
==pIter
[-1].iPos
){
1201 if( nThisLcs
>nLcs
) nLcs
= nThisLcs
;
1204 if( fts3LcsIteratorAdvance(pAdv
) ) nLive
--;
1207 pInfo
->aMatchinfo
[iCol
] = nLcs
;
1211 sqlite3_free(aIter
);
1216 ** Populate the buffer pInfo->aMatchinfo[] with an array of integers to
1217 ** be returned by the matchinfo() function. Argument zArg contains the
1218 ** format string passed as the second argument to matchinfo (or the
1219 ** default value "pcx" if no second argument was specified). The format
1220 ** string has already been validated and the pInfo->aMatchinfo[] array
1221 ** is guaranteed to be large enough for the output.
1223 ** If bGlobal is true, then populate all fields of the matchinfo() output.
1224 ** If it is false, then assume that those fields that do not change between
1225 ** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS)
1226 ** have already been populated.
1228 ** Return SQLITE_OK if successful, or an SQLite error code if an error
1229 ** occurs. If a value other than SQLITE_OK is returned, the state the
1230 ** pInfo->aMatchinfo[] buffer is left in is undefined.
1232 static int fts3MatchinfoValues(
1233 Fts3Cursor
*pCsr
, /* FTS3 cursor object */
1234 int bGlobal
, /* True to grab the global stats */
1235 MatchInfo
*pInfo
, /* Matchinfo context object */
1236 const char *zArg
/* Matchinfo format string */
1240 Fts3Table
*pTab
= (Fts3Table
*)pCsr
->base
.pVtab
;
1241 sqlite3_stmt
*pSelect
= 0;
1243 for(i
=0; rc
==SQLITE_OK
&& zArg
[i
]; i
++){
1244 pInfo
->flag
= zArg
[i
];
1246 case FTS3_MATCHINFO_NPHRASE
:
1247 if( bGlobal
) pInfo
->aMatchinfo
[0] = pInfo
->nPhrase
;
1250 case FTS3_MATCHINFO_NCOL
:
1251 if( bGlobal
) pInfo
->aMatchinfo
[0] = pInfo
->nCol
;
1254 case FTS3_MATCHINFO_NDOC
:
1256 sqlite3_int64 nDoc
= 0;
1257 rc
= fts3MatchinfoSelectDoctotal(pTab
, &pSelect
, &nDoc
, 0, 0);
1258 pInfo
->aMatchinfo
[0] = (u32
)nDoc
;
1262 case FTS3_MATCHINFO_AVGLENGTH
:
1264 sqlite3_int64 nDoc
; /* Number of rows in table */
1265 const char *a
; /* Aggregate column length array */
1266 const char *pEnd
; /* First byte past end of length array */
1268 rc
= fts3MatchinfoSelectDoctotal(pTab
, &pSelect
, &nDoc
, &a
, &pEnd
);
1269 if( rc
==SQLITE_OK
){
1271 for(iCol
=0; iCol
<pInfo
->nCol
; iCol
++){
1273 sqlite3_int64 nToken
;
1274 a
+= sqlite3Fts3GetVarint(a
, &nToken
);
1276 rc
= SQLITE_CORRUPT_VTAB
;
1279 iVal
= (u32
)(((u32
)(nToken
&0xffffffff)+nDoc
/2)/nDoc
);
1280 pInfo
->aMatchinfo
[iCol
] = iVal
;
1286 case FTS3_MATCHINFO_LENGTH
: {
1287 sqlite3_stmt
*pSelectDocsize
= 0;
1288 rc
= sqlite3Fts3SelectDocsize(pTab
, pCsr
->iPrevId
, &pSelectDocsize
);
1289 if( rc
==SQLITE_OK
){
1291 const char *a
= sqlite3_column_blob(pSelectDocsize
, 0);
1292 const char *pEnd
= a
+ sqlite3_column_bytes(pSelectDocsize
, 0);
1293 for(iCol
=0; iCol
<pInfo
->nCol
; iCol
++){
1294 sqlite3_int64 nToken
;
1295 a
+= sqlite3Fts3GetVarintBounded(a
, pEnd
, &nToken
);
1297 rc
= SQLITE_CORRUPT_VTAB
;
1300 pInfo
->aMatchinfo
[iCol
] = (u32
)nToken
;
1303 sqlite3_reset(pSelectDocsize
);
1307 case FTS3_MATCHINFO_LCS
:
1308 rc
= fts3ExprLoadDoclists(pCsr
, 0, 0);
1309 if( rc
==SQLITE_OK
){
1310 rc
= fts3MatchinfoLcs(pCsr
, pInfo
);
1314 case FTS3_MATCHINFO_LHITS_BM
:
1315 case FTS3_MATCHINFO_LHITS
: {
1316 size_t nZero
= fts3MatchinfoSize(pInfo
, zArg
[i
]) * sizeof(u32
);
1317 memset(pInfo
->aMatchinfo
, 0, nZero
);
1318 rc
= fts3ExprLHitGather(pCsr
->pExpr
, pInfo
);
1324 assert( zArg
[i
]==FTS3_MATCHINFO_HITS
);
1325 pExpr
= pCsr
->pExpr
;
1326 rc
= fts3ExprLoadDoclists(pCsr
, 0, 0);
1327 if( rc
!=SQLITE_OK
) break;
1329 if( pCsr
->pDeferred
){
1330 rc
= fts3MatchinfoSelectDoctotal(pTab
, &pSelect
, &pInfo
->nDoc
,0,0);
1331 if( rc
!=SQLITE_OK
) break;
1333 rc
= sqlite3Fts3ExprIterate(pExpr
, fts3ExprGlobalHitsCb
,(void*)pInfo
);
1334 sqlite3Fts3EvalTestDeferred(pCsr
, &rc
);
1335 if( rc
!=SQLITE_OK
) break;
1337 (void)sqlite3Fts3ExprIterate(pExpr
, fts3ExprLocalHitsCb
,(void*)pInfo
);
1342 pInfo
->aMatchinfo
+= fts3MatchinfoSize(pInfo
, zArg
[i
]);
1345 sqlite3_reset(pSelect
);
1351 ** Populate pCsr->aMatchinfo[] with data for the current row. The
1352 ** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32).
1354 static void fts3GetMatchinfo(
1355 sqlite3_context
*pCtx
, /* Return results here */
1356 Fts3Cursor
*pCsr
, /* FTS3 Cursor object */
1357 const char *zArg
/* Second argument to matchinfo() function */
1360 Fts3Table
*pTab
= (Fts3Table
*)pCsr
->base
.pVtab
;
1362 int bGlobal
= 0; /* Collect 'global' stats as well as local */
1365 void (*xDestroyOut
)(void*) = 0;
1367 memset(&sInfo
, 0, sizeof(MatchInfo
));
1368 sInfo
.pCursor
= pCsr
;
1369 sInfo
.nCol
= pTab
->nColumn
;
1371 /* If there is cached matchinfo() data, but the format string for the
1372 ** cache does not match the format string for this request, discard
1373 ** the cached data. */
1374 if( pCsr
->pMIBuffer
&& strcmp(pCsr
->pMIBuffer
->zMatchinfo
, zArg
) ){
1375 sqlite3Fts3MIBufferFree(pCsr
->pMIBuffer
);
1376 pCsr
->pMIBuffer
= 0;
1379 /* If Fts3Cursor.pMIBuffer is NULL, then this is the first time the
1380 ** matchinfo function has been called for this query. In this case
1381 ** allocate the array used to accumulate the matchinfo data and
1382 ** initialize those elements that are constant for every row.
1384 if( pCsr
->pMIBuffer
==0 ){
1385 size_t nMatchinfo
= 0; /* Number of u32 elements in match-info */
1386 int i
; /* Used to iterate through zArg */
1388 /* Determine the number of phrases in the query */
1389 pCsr
->nPhrase
= fts3ExprPhraseCount(pCsr
->pExpr
);
1390 sInfo
.nPhrase
= pCsr
->nPhrase
;
1392 /* Determine the number of integers in the buffer returned by this call. */
1393 for(i
=0; zArg
[i
]; i
++){
1395 if( fts3MatchinfoCheck(pTab
, zArg
[i
], &zErr
) ){
1396 sqlite3_result_error(pCtx
, zErr
, -1);
1400 nMatchinfo
+= fts3MatchinfoSize(&sInfo
, zArg
[i
]);
1403 /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */
1404 pCsr
->pMIBuffer
= fts3MIBufferNew(nMatchinfo
, zArg
);
1405 if( !pCsr
->pMIBuffer
) rc
= SQLITE_NOMEM
;
1407 pCsr
->isMatchinfoNeeded
= 1;
1411 if( rc
==SQLITE_OK
){
1412 xDestroyOut
= fts3MIBufferAlloc(pCsr
->pMIBuffer
, &aOut
);
1413 if( xDestroyOut
==0 ){
1418 if( rc
==SQLITE_OK
){
1419 sInfo
.aMatchinfo
= aOut
;
1420 sInfo
.nPhrase
= pCsr
->nPhrase
;
1421 rc
= fts3MatchinfoValues(pCsr
, bGlobal
, &sInfo
, zArg
);
1423 fts3MIBufferSetGlobal(pCsr
->pMIBuffer
);
1427 if( rc
!=SQLITE_OK
){
1428 sqlite3_result_error_code(pCtx
, rc
);
1429 if( xDestroyOut
) xDestroyOut(aOut
);
1431 int n
= pCsr
->pMIBuffer
->nElem
* sizeof(u32
);
1432 sqlite3_result_blob(pCtx
, aOut
, n
, xDestroyOut
);
1437 ** Implementation of snippet() function.
1439 void sqlite3Fts3Snippet(
1440 sqlite3_context
*pCtx
, /* SQLite function call context */
1441 Fts3Cursor
*pCsr
, /* Cursor object */
1442 const char *zStart
, /* Snippet start text - "<b>" */
1443 const char *zEnd
, /* Snippet end text - "</b>" */
1444 const char *zEllipsis
, /* Snippet ellipsis text - "<b>...</b>" */
1445 int iCol
, /* Extract snippet from this column */
1446 int nToken
/* Approximate number of tokens in snippet */
1448 Fts3Table
*pTab
= (Fts3Table
*)pCsr
->base
.pVtab
;
1451 StrBuffer res
= {0, 0, 0};
1453 /* The returned text includes up to four fragments of text extracted from
1454 ** the data in the current row. The first iteration of the for(...) loop
1455 ** below attempts to locate a single fragment of text nToken tokens in
1456 ** size that contains at least one instance of all phrases in the query
1457 ** expression that appear in the current row. If such a fragment of text
1458 ** cannot be found, the second iteration of the loop attempts to locate
1459 ** a pair of fragments, and so on.
1461 int nSnippet
= 0; /* Number of fragments in this snippet */
1462 SnippetFragment aSnippet
[4]; /* Maximum of 4 fragments per snippet */
1463 int nFToken
= -1; /* Number of tokens in each fragment */
1466 sqlite3_result_text(pCtx
, "", 0, SQLITE_STATIC
);
1470 /* Limit the snippet length to 64 tokens. */
1471 if( nToken
<-64 ) nToken
= -64;
1472 if( nToken
>+64 ) nToken
= +64;
1474 for(nSnippet
=1; 1; nSnippet
++){
1476 int iSnip
; /* Loop counter 0..nSnippet-1 */
1477 u64 mCovered
= 0; /* Bitmask of phrases covered by snippet */
1478 u64 mSeen
= 0; /* Bitmask of phrases seen by BestSnippet() */
1481 nFToken
= (nToken
+nSnippet
-1) / nSnippet
;
1483 nFToken
= -1 * nToken
;
1486 for(iSnip
=0; iSnip
<nSnippet
; iSnip
++){
1487 int iBestScore
= -1; /* Best score of columns checked so far */
1488 int iRead
; /* Used to iterate through columns */
1489 SnippetFragment
*pFragment
= &aSnippet
[iSnip
];
1491 memset(pFragment
, 0, sizeof(*pFragment
));
1493 /* Loop through all columns of the table being considered for snippets.
1494 ** If the iCol argument to this function was negative, this means all
1495 ** columns of the FTS3 table. Otherwise, only column iCol is considered.
1497 for(iRead
=0; iRead
<pTab
->nColumn
; iRead
++){
1498 SnippetFragment sF
= {0, 0, 0, 0};
1500 if( iCol
>=0 && iRead
!=iCol
) continue;
1502 /* Find the best snippet of nFToken tokens in column iRead. */
1503 rc
= fts3BestSnippet(nFToken
, pCsr
, iRead
, mCovered
, &mSeen
, &sF
, &iS
);
1504 if( rc
!=SQLITE_OK
){
1507 if( iS
>iBestScore
){
1513 mCovered
|= pFragment
->covered
;
1516 /* If all query phrases seen by fts3BestSnippet() are present in at least
1517 ** one of the nSnippet snippet fragments, break out of the loop.
1519 assert( (mCovered
&mSeen
)==mCovered
);
1520 if( mSeen
==mCovered
|| nSnippet
==SizeofArray(aSnippet
) ) break;
1523 assert( nFToken
>0 );
1525 for(i
=0; i
<nSnippet
&& rc
==SQLITE_OK
; i
++){
1526 rc
= fts3SnippetText(pCsr
, &aSnippet
[i
],
1527 i
, (i
==nSnippet
-1), nFToken
, zStart
, zEnd
, zEllipsis
, &res
1532 sqlite3Fts3SegmentsClose(pTab
);
1533 if( rc
!=SQLITE_OK
){
1534 sqlite3_result_error_code(pCtx
, rc
);
1535 sqlite3_free(res
.z
);
1537 sqlite3_result_text(pCtx
, res
.z
, -1, sqlite3_free
);
1542 typedef struct TermOffset TermOffset
;
1543 typedef struct TermOffsetCtx TermOffsetCtx
;
1546 char *pList
; /* Position-list */
1547 i64 iPos
; /* Position just read from pList */
1548 i64 iOff
; /* Offset of this term from read positions */
1551 struct TermOffsetCtx
{
1553 int iCol
; /* Column of table to populate aTerm for */
1555 sqlite3_int64 iDocid
;
1560 ** This function is an sqlite3Fts3ExprIterate() callback used by sqlite3Fts3Offsets().
1562 static int fts3ExprTermOffsetInit(Fts3Expr
*pExpr
, int iPhrase
, void *ctx
){
1563 TermOffsetCtx
*p
= (TermOffsetCtx
*)ctx
;
1564 int nTerm
; /* Number of tokens in phrase */
1565 int iTerm
; /* For looping through nTerm phrase terms */
1566 char *pList
; /* Pointer to position list for phrase */
1567 i64 iPos
= 0; /* First position in position-list */
1570 UNUSED_PARAMETER(iPhrase
);
1571 rc
= sqlite3Fts3EvalPhrasePoslist(p
->pCsr
, pExpr
, p
->iCol
, &pList
);
1572 nTerm
= pExpr
->pPhrase
->nToken
;
1574 fts3GetDeltaPosition(&pList
, &iPos
);
1575 assert_fts3_nc( iPos
>=0 );
1578 for(iTerm
=0; iTerm
<nTerm
; iTerm
++){
1579 TermOffset
*pT
= &p
->aTerm
[p
->iTerm
++];
1580 pT
->iOff
= nTerm
-iTerm
-1;
1589 ** Implementation of offsets() function.
1591 void sqlite3Fts3Offsets(
1592 sqlite3_context
*pCtx
, /* SQLite function call context */
1593 Fts3Cursor
*pCsr
/* Cursor object */
1595 Fts3Table
*pTab
= (Fts3Table
*)pCsr
->base
.pVtab
;
1596 sqlite3_tokenizer_module
const *pMod
= pTab
->pTokenizer
->pModule
;
1597 int rc
; /* Return Code */
1598 int nToken
; /* Number of tokens in query */
1599 int iCol
; /* Column currently being processed */
1600 StrBuffer res
= {0, 0, 0}; /* Result string */
1601 TermOffsetCtx sCtx
; /* Context for fts3ExprTermOffsetInit() */
1604 sqlite3_result_text(pCtx
, "", 0, SQLITE_STATIC
);
1608 memset(&sCtx
, 0, sizeof(sCtx
));
1609 assert( pCsr
->isRequireSeek
==0 );
1611 /* Count the number of terms in the query */
1612 rc
= fts3ExprLoadDoclists(pCsr
, 0, &nToken
);
1613 if( rc
!=SQLITE_OK
) goto offsets_out
;
1615 /* Allocate the array of TermOffset iterators. */
1616 sCtx
.aTerm
= (TermOffset
*)sqlite3Fts3MallocZero(sizeof(TermOffset
)*nToken
);
1617 if( 0==sCtx
.aTerm
){
1621 sCtx
.iDocid
= pCsr
->iPrevId
;
1624 /* Loop through the table columns, appending offset information to
1625 ** string-buffer res for each column.
1627 for(iCol
=0; iCol
<pTab
->nColumn
; iCol
++){
1628 sqlite3_tokenizer_cursor
*pC
; /* Tokenizer cursor */
1629 const char *ZDUMMY
; /* Dummy argument used with xNext() */
1630 int NDUMMY
= 0; /* Dummy argument used with xNext() */
1637 /* Initialize the contents of sCtx.aTerm[] for column iCol. This
1638 ** operation may fail if the database contains corrupt records.
1642 rc
= sqlite3Fts3ExprIterate(
1643 pCsr
->pExpr
, fts3ExprTermOffsetInit
, (void*)&sCtx
1645 if( rc
!=SQLITE_OK
) goto offsets_out
;
1647 /* Retreive the text stored in column iCol. If an SQL NULL is stored
1648 ** in column iCol, jump immediately to the next iteration of the loop.
1649 ** If an OOM occurs while retrieving the data (this can happen if SQLite
1650 ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM
1653 zDoc
= (const char *)sqlite3_column_text(pCsr
->pStmt
, iCol
+1);
1654 nDoc
= sqlite3_column_bytes(pCsr
->pStmt
, iCol
+1);
1656 if( sqlite3_column_type(pCsr
->pStmt
, iCol
+1)==SQLITE_NULL
){
1663 /* Initialize a tokenizer iterator to iterate through column iCol. */
1664 rc
= sqlite3Fts3OpenTokenizer(pTab
->pTokenizer
, pCsr
->iLangid
,
1667 if( rc
!=SQLITE_OK
) goto offsets_out
;
1669 rc
= pMod
->xNext(pC
, &ZDUMMY
, &NDUMMY
, &iStart
, &iEnd
, &iCurrent
);
1670 while( rc
==SQLITE_OK
){
1671 int i
; /* Used to loop through terms */
1672 int iMinPos
= 0x7FFFFFFF; /* Position of next token */
1673 TermOffset
*pTerm
= 0; /* TermOffset associated with next token */
1675 for(i
=0; i
<nToken
; i
++){
1676 TermOffset
*pT
= &sCtx
.aTerm
[i
];
1677 if( pT
->pList
&& (pT
->iPos
-pT
->iOff
)<iMinPos
){
1678 iMinPos
= pT
->iPos
-pT
->iOff
;
1684 /* All offsets for this column have been gathered. */
1687 assert_fts3_nc( iCurrent
<=iMinPos
);
1688 if( 0==(0xFE&*pTerm
->pList
) ){
1691 fts3GetDeltaPosition(&pTerm
->pList
, &pTerm
->iPos
);
1693 while( rc
==SQLITE_OK
&& iCurrent
<iMinPos
){
1694 rc
= pMod
->xNext(pC
, &ZDUMMY
, &NDUMMY
, &iStart
, &iEnd
, &iCurrent
);
1696 if( rc
==SQLITE_OK
){
1698 sqlite3_snprintf(sizeof(aBuffer
), aBuffer
,
1699 "%d %d %d %d ", iCol
, pTerm
-sCtx
.aTerm
, iStart
, iEnd
-iStart
1701 rc
= fts3StringAppend(&res
, aBuffer
, -1);
1702 }else if( rc
==SQLITE_DONE
&& pTab
->zContentTbl
==0 ){
1703 rc
= FTS_CORRUPT_VTAB
;
1707 if( rc
==SQLITE_DONE
){
1712 if( rc
!=SQLITE_OK
) goto offsets_out
;
1716 sqlite3_free(sCtx
.aTerm
);
1717 assert( rc
!=SQLITE_DONE
);
1718 sqlite3Fts3SegmentsClose(pTab
);
1719 if( rc
!=SQLITE_OK
){
1720 sqlite3_result_error_code(pCtx
, rc
);
1721 sqlite3_free(res
.z
);
1723 sqlite3_result_text(pCtx
, res
.z
, res
.n
-1, sqlite3_free
);
1729 ** Implementation of matchinfo() function.
1731 void sqlite3Fts3Matchinfo(
1732 sqlite3_context
*pContext
, /* Function call context */
1733 Fts3Cursor
*pCsr
, /* FTS3 table cursor */
1734 const char *zArg
/* Second arg to matchinfo() function */
1736 Fts3Table
*pTab
= (Fts3Table
*)pCsr
->base
.pVtab
;
1737 const char *zFormat
;
1742 zFormat
= FTS3_MATCHINFO_DEFAULT
;
1746 sqlite3_result_blob(pContext
, "", 0, SQLITE_STATIC
);
1749 /* Retrieve matchinfo() data. */
1750 fts3GetMatchinfo(pContext
, pCsr
, zFormat
);
1751 sqlite3Fts3SegmentsClose(pTab
);