4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
15 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
21 ** Characters that may appear in the second argument to matchinfo().
23 #define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */
24 #define FTS3_MATCHINFO_NCOL 'c' /* 1 value */
25 #define FTS3_MATCHINFO_NDOC 'n' /* 1 value */
26 #define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */
27 #define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */
28 #define FTS3_MATCHINFO_LCS 's' /* nCol values */
29 #define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */
32 ** The default value for the second argument to matchinfo().
34 #define FTS3_MATCHINFO_DEFAULT "pcx"
38 ** Used as an fts3ExprIterate() context when loading phrase doclists to
39 ** Fts3Expr.aDoclist[]/nDoclist.
41 typedef struct LoadDoclistCtx LoadDoclistCtx
;
42 struct LoadDoclistCtx
{
43 Fts3Cursor
*pCsr
; /* FTS3 Cursor */
44 int nPhrase
; /* Number of phrases seen so far */
45 int nToken
; /* Number of tokens seen so far */
49 ** The following types are used as part of the implementation of the
50 ** fts3BestSnippet() routine.
52 typedef struct SnippetIter SnippetIter
;
53 typedef struct SnippetPhrase SnippetPhrase
;
54 typedef struct SnippetFragment SnippetFragment
;
57 Fts3Cursor
*pCsr
; /* Cursor snippet is being generated from */
58 int iCol
; /* Extract snippet from this column */
59 int nSnippet
; /* Requested snippet length (in tokens) */
60 int nPhrase
; /* Number of phrases in query */
61 SnippetPhrase
*aPhrase
; /* Array of size nPhrase */
62 int iCurrent
; /* First token of current snippet */
65 struct SnippetPhrase
{
66 int nToken
; /* Number of tokens in phrase */
67 char *pList
; /* Pointer to start of phrase position list */
68 int iHead
; /* Next value in position list */
69 char *pHead
; /* Position list data following iHead */
70 int iTail
; /* Next value in trailing position list */
71 char *pTail
; /* Position list data following iTail */
74 struct SnippetFragment
{
75 int iCol
; /* Column snippet is extracted from */
76 int iPos
; /* Index of first token in snippet */
77 u64 covered
; /* Mask of query phrases covered */
78 u64 hlmask
; /* Mask of snippet terms to highlight */
82 ** This type is used as an fts3ExprIterate() context object while
83 ** accumulating the data returned by the matchinfo() function.
85 typedef struct MatchInfo MatchInfo
;
87 Fts3Cursor
*pCursor
; /* FTS3 Cursor */
88 int nCol
; /* Number of columns in table */
89 int nPhrase
; /* Number of matchable phrases in query */
90 sqlite3_int64 nDoc
; /* Number of docs in database */
91 u32
*aMatchinfo
; /* Pre-allocated buffer */
97 ** The snippet() and offsets() functions both return text values. An instance
98 ** of the following structure is used to accumulate those values while the
99 ** functions are running. See fts3StringAppend() for details.
101 typedef struct StrBuffer StrBuffer
;
103 char *z
; /* Pointer to buffer containing string */
104 int n
; /* Length of z in bytes (excl. nul-term) */
105 int nAlloc
; /* Allocated size of buffer z in bytes */
110 ** This function is used to help iterate through a position-list. A position
111 ** list is a list of unique integers, sorted from smallest to largest. Each
112 ** element of the list is represented by an FTS3 varint that takes the value
113 ** of the difference between the current element and the previous one plus
114 ** two. For example, to store the position-list:
118 ** the three varints:
124 ** When this function is called, *pp points to the start of an element of
125 ** the list. *piPos contains the value of the previous entry in the list.
126 ** After it returns, *piPos contains the value of the next element of the
127 ** list and *pp is advanced to the following varint.
129 static void fts3GetDeltaPosition(char **pp
, int *piPos
){
131 *pp
+= fts3GetVarint32(*pp
, &iVal
);
136 ** Helper function for fts3ExprIterate() (see below).
138 static int fts3ExprIterate2(
139 Fts3Expr
*pExpr
, /* Expression to iterate phrases of */
140 int *piPhrase
, /* Pointer to phrase counter */
141 int (*x
)(Fts3Expr
*,int,void*), /* Callback function to invoke for phrases */
142 void *pCtx
/* Second argument to pass to callback */
144 int rc
; /* Return code */
145 int eType
= pExpr
->eType
; /* Type of expression node pExpr */
147 if( eType
!=FTSQUERY_PHRASE
){
148 assert( pExpr
->pLeft
&& pExpr
->pRight
);
149 rc
= fts3ExprIterate2(pExpr
->pLeft
, piPhrase
, x
, pCtx
);
150 if( rc
==SQLITE_OK
&& eType
!=FTSQUERY_NOT
){
151 rc
= fts3ExprIterate2(pExpr
->pRight
, piPhrase
, x
, pCtx
);
154 rc
= x(pExpr
, *piPhrase
, pCtx
);
161 ** Iterate through all phrase nodes in an FTS3 query, except those that
162 ** are part of a sub-tree that is the right-hand-side of a NOT operator.
163 ** For each phrase node found, the supplied callback function is invoked.
165 ** If the callback function returns anything other than SQLITE_OK,
166 ** the iteration is abandoned and the error code returned immediately.
167 ** Otherwise, SQLITE_OK is returned after a callback has been made for
168 ** all eligible phrase nodes.
170 static int fts3ExprIterate(
171 Fts3Expr
*pExpr
, /* Expression to iterate phrases of */
172 int (*x
)(Fts3Expr
*,int,void*), /* Callback function to invoke for phrases */
173 void *pCtx
/* Second argument to pass to callback */
175 int iPhrase
= 0; /* Variable used as the phrase counter */
176 return fts3ExprIterate2(pExpr
, &iPhrase
, x
, pCtx
);
180 ** This is an fts3ExprIterate() callback used while loading the doclists
181 ** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
182 ** fts3ExprLoadDoclists().
184 static int fts3ExprLoadDoclistsCb(Fts3Expr
*pExpr
, int iPhrase
, void *ctx
){
186 Fts3Phrase
*pPhrase
= pExpr
->pPhrase
;
187 LoadDoclistCtx
*p
= (LoadDoclistCtx
*)ctx
;
189 UNUSED_PARAMETER(iPhrase
);
192 p
->nToken
+= pPhrase
->nToken
;
198 ** Load the doclists for each phrase in the query associated with FTS3 cursor
201 ** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable
202 ** phrases in the expression (all phrases except those directly or
203 ** indirectly descended from the right-hand-side of a NOT operator). If
204 ** pnToken is not NULL, then it is set to the number of tokens in all
205 ** matchable phrases of the expression.
207 static int fts3ExprLoadDoclists(
208 Fts3Cursor
*pCsr
, /* Fts3 cursor for current query */
209 int *pnPhrase
, /* OUT: Number of phrases in query */
210 int *pnToken
/* OUT: Number of tokens in query */
212 int rc
; /* Return Code */
213 LoadDoclistCtx sCtx
= {0,0,0}; /* Context for fts3ExprIterate() */
215 rc
= fts3ExprIterate(pCsr
->pExpr
, fts3ExprLoadDoclistsCb
, (void *)&sCtx
);
216 if( pnPhrase
) *pnPhrase
= sCtx
.nPhrase
;
217 if( pnToken
) *pnToken
= sCtx
.nToken
;
221 static int fts3ExprPhraseCountCb(Fts3Expr
*pExpr
, int iPhrase
, void *ctx
){
223 UNUSED_PARAMETER(pExpr
);
224 UNUSED_PARAMETER(iPhrase
);
227 static int fts3ExprPhraseCount(Fts3Expr
*pExpr
){
229 (void)fts3ExprIterate(pExpr
, fts3ExprPhraseCountCb
, (void *)&nPhrase
);
234 ** Advance the position list iterator specified by the first two
235 ** arguments so that it points to the first element with a value greater
236 ** than or equal to parameter iNext.
238 static void fts3SnippetAdvance(char **ppIter
, int *piIter
, int iNext
){
239 char *pIter
= *ppIter
;
243 while( iIter
<iNext
){
244 if( 0==(*pIter
& 0xFE) ){
249 fts3GetDeltaPosition(&pIter
, &iIter
);
258 ** Advance the snippet iterator to the next candidate snippet.
260 static int fts3SnippetNextCandidate(SnippetIter
*pIter
){
261 int i
; /* Loop counter */
263 if( pIter
->iCurrent
<0 ){
264 /* The SnippetIter object has just been initialized. The first snippet
265 ** candidate always starts at offset 0 (even if this candidate has a
270 /* Advance the 'head' iterator of each phrase to the first offset that
271 ** is greater than or equal to (iNext+nSnippet).
273 for(i
=0; i
<pIter
->nPhrase
; i
++){
274 SnippetPhrase
*pPhrase
= &pIter
->aPhrase
[i
];
275 fts3SnippetAdvance(&pPhrase
->pHead
, &pPhrase
->iHead
, pIter
->nSnippet
);
279 int iEnd
= 0x7FFFFFFF;
281 for(i
=0; i
<pIter
->nPhrase
; i
++){
282 SnippetPhrase
*pPhrase
= &pIter
->aPhrase
[i
];
283 if( pPhrase
->pHead
&& pPhrase
->iHead
<iEnd
){
284 iEnd
= pPhrase
->iHead
;
287 if( iEnd
==0x7FFFFFFF ){
291 pIter
->iCurrent
= iStart
= iEnd
- pIter
->nSnippet
+ 1;
292 for(i
=0; i
<pIter
->nPhrase
; i
++){
293 SnippetPhrase
*pPhrase
= &pIter
->aPhrase
[i
];
294 fts3SnippetAdvance(&pPhrase
->pHead
, &pPhrase
->iHead
, iEnd
+1);
295 fts3SnippetAdvance(&pPhrase
->pTail
, &pPhrase
->iTail
, iStart
);
303 ** Retrieve information about the current candidate snippet of snippet
306 static void fts3SnippetDetails(
307 SnippetIter
*pIter
, /* Snippet iterator */
308 u64 mCovered
, /* Bitmask of phrases already covered */
309 int *piToken
, /* OUT: First token of proposed snippet */
310 int *piScore
, /* OUT: "Score" for this snippet */
311 u64
*pmCover
, /* OUT: Bitmask of phrases covered */
312 u64
*pmHighlight
/* OUT: Bitmask of terms to highlight */
314 int iStart
= pIter
->iCurrent
; /* First token of snippet */
315 int iScore
= 0; /* Score of this snippet */
316 int i
; /* Loop counter */
317 u64 mCover
= 0; /* Mask of phrases covered by this snippet */
318 u64 mHighlight
= 0; /* Mask of tokens to highlight in snippet */
320 for(i
=0; i
<pIter
->nPhrase
; i
++){
321 SnippetPhrase
*pPhrase
= &pIter
->aPhrase
[i
];
322 if( pPhrase
->pTail
){
323 char *pCsr
= pPhrase
->pTail
;
324 int iCsr
= pPhrase
->iTail
;
326 while( iCsr
<(iStart
+pIter
->nSnippet
) ){
328 u64 mPhrase
= (u64
)1 << i
;
329 u64 mPos
= (u64
)1 << (iCsr
- iStart
);
330 assert( iCsr
>=iStart
);
331 if( (mCover
|mCovered
)&mPhrase
){
338 for(j
=0; j
<pPhrase
->nToken
; j
++){
339 mHighlight
|= (mPos
>>j
);
342 if( 0==(*pCsr
& 0x0FE) ) break;
343 fts3GetDeltaPosition(&pCsr
, &iCsr
);
348 /* Set the output variables before returning. */
352 *pmHighlight
= mHighlight
;
356 ** This function is an fts3ExprIterate() callback used by fts3BestSnippet().
357 ** Each invocation populates an element of the SnippetIter.aPhrase[] array.
359 static int fts3SnippetFindPositions(Fts3Expr
*pExpr
, int iPhrase
, void *ctx
){
360 SnippetIter
*p
= (SnippetIter
*)ctx
;
361 SnippetPhrase
*pPhrase
= &p
->aPhrase
[iPhrase
];
365 pPhrase
->nToken
= pExpr
->pPhrase
->nToken
;
366 rc
= sqlite3Fts3EvalPhrasePoslist(p
->pCsr
, pExpr
, p
->iCol
, &pCsr
);
367 assert( rc
==SQLITE_OK
|| pCsr
==0 );
370 pPhrase
->pList
= pCsr
;
371 fts3GetDeltaPosition(&pCsr
, &iFirst
);
373 pPhrase
->pHead
= pCsr
;
374 pPhrase
->pTail
= pCsr
;
375 pPhrase
->iHead
= iFirst
;
376 pPhrase
->iTail
= iFirst
;
378 assert( rc
!=SQLITE_OK
|| (
379 pPhrase
->pList
==0 && pPhrase
->pHead
==0 && pPhrase
->pTail
==0
387 ** Select the fragment of text consisting of nFragment contiguous tokens
388 ** from column iCol that represent the "best" snippet. The best snippet
389 ** is the snippet with the highest score, where scores are calculated
392 ** (a) +1 point for each occurrence of a matchable phrase in the snippet.
394 ** (b) +1000 points for the first occurrence of each matchable phrase in
395 ** the snippet for which the corresponding mCovered bit is not set.
397 ** The selected snippet parameters are stored in structure *pFragment before
398 ** returning. The score of the selected snippet is stored in *piScore
401 static int fts3BestSnippet(
402 int nSnippet
, /* Desired snippet length */
403 Fts3Cursor
*pCsr
, /* Cursor to create snippet for */
404 int iCol
, /* Index of column to create snippet from */
405 u64 mCovered
, /* Mask of phrases already covered */
406 u64
*pmSeen
, /* IN/OUT: Mask of phrases seen */
407 SnippetFragment
*pFragment
, /* OUT: Best snippet found */
408 int *piScore
/* OUT: Score of snippet pFragment */
410 int rc
; /* Return Code */
411 int nList
; /* Number of phrases in expression */
412 SnippetIter sIter
; /* Iterates through snippet candidates */
413 int nByte
; /* Number of bytes of space to allocate */
414 int iBestScore
= -1; /* Best snippet score found so far */
415 int i
; /* Loop counter */
417 memset(&sIter
, 0, sizeof(sIter
));
419 /* Iterate through the phrases in the expression to count them. The same
420 ** callback makes sure the doclists are loaded for each phrase.
422 rc
= fts3ExprLoadDoclists(pCsr
, &nList
, 0);
427 /* Now that it is known how many phrases there are, allocate and zero
428 ** the required space using malloc().
430 nByte
= sizeof(SnippetPhrase
) * nList
;
431 sIter
.aPhrase
= (SnippetPhrase
*)sqlite3_malloc(nByte
);
432 if( !sIter
.aPhrase
){
435 memset(sIter
.aPhrase
, 0, nByte
);
437 /* Initialize the contents of the SnippetIter object. Then iterate through
438 ** the set of phrases in the expression to populate the aPhrase[] array.
442 sIter
.nSnippet
= nSnippet
;
443 sIter
.nPhrase
= nList
;
445 (void)fts3ExprIterate(pCsr
->pExpr
, fts3SnippetFindPositions
, (void *)&sIter
);
447 /* Set the *pmSeen output variable. */
448 for(i
=0; i
<nList
; i
++){
449 if( sIter
.aPhrase
[i
].pHead
){
450 *pmSeen
|= (u64
)1 << i
;
454 /* Loop through all candidate snippets. Store the best snippet in
455 ** *pFragment. Store its associated 'score' in iBestScore.
457 pFragment
->iCol
= iCol
;
458 while( !fts3SnippetNextCandidate(&sIter
) ){
463 fts3SnippetDetails(&sIter
, mCovered
, &iPos
, &iScore
, &mCover
, &mHighlight
);
465 if( iScore
>iBestScore
){
466 pFragment
->iPos
= iPos
;
467 pFragment
->hlmask
= mHighlight
;
468 pFragment
->covered
= mCover
;
473 sqlite3_free(sIter
.aPhrase
);
474 *piScore
= iBestScore
;
480 ** Append a string to the string-buffer passed as the first argument.
482 ** If nAppend is negative, then the length of the string zAppend is
483 ** determined using strlen().
485 static int fts3StringAppend(
486 StrBuffer
*pStr
, /* Buffer to append to */
487 const char *zAppend
, /* Pointer to data to append to buffer */
488 int nAppend
/* Size of zAppend in bytes (or -1) */
491 nAppend
= (int)strlen(zAppend
);
494 /* If there is insufficient space allocated at StrBuffer.z, use realloc()
495 ** to grow the buffer until so that it is big enough to accomadate the
498 if( pStr
->n
+nAppend
+1>=pStr
->nAlloc
){
499 int nAlloc
= pStr
->nAlloc
+nAppend
+100;
500 char *zNew
= sqlite3_realloc(pStr
->z
, nAlloc
);
505 pStr
->nAlloc
= nAlloc
;
507 assert( pStr
->z
!=0 && (pStr
->nAlloc
>= pStr
->n
+nAppend
+1) );
509 /* Append the data to the string buffer. */
510 memcpy(&pStr
->z
[pStr
->n
], zAppend
, nAppend
);
512 pStr
->z
[pStr
->n
] = '\0';
518 ** The fts3BestSnippet() function often selects snippets that end with a
519 ** query term. That is, the final term of the snippet is always a term
520 ** that requires highlighting. For example, if 'X' is a highlighted term
521 ** and '.' is a non-highlighted term, BestSnippet() may select:
525 ** This function "shifts" the beginning of the snippet forward in the
526 ** document so that there are approximately the same number of
527 ** non-highlighted terms to the right of the final highlighted term as there
528 ** are to the left of the first highlighted term. For example, to this:
532 ** This is done as part of extracting the snippet text, not when selecting
533 ** the snippet. Snippet selection is done based on doclists only, so there
534 ** is no way for fts3BestSnippet() to know whether or not the document
535 ** actually contains terms that follow the final highlighted term.
537 static int fts3SnippetShift(
538 Fts3Table
*pTab
, /* FTS3 table snippet comes from */
539 int iLangid
, /* Language id to use in tokenizing */
540 int nSnippet
, /* Number of tokens desired for snippet */
541 const char *zDoc
, /* Document text to extract snippet from */
542 int nDoc
, /* Size of buffer zDoc in bytes */
543 int *piPos
, /* IN/OUT: First token of snippet */
544 u64
*pHlmask
/* IN/OUT: Mask of tokens to highlight */
546 u64 hlmask
= *pHlmask
; /* Local copy of initial highlight-mask */
549 int nLeft
; /* Tokens to the left of first highlight */
550 int nRight
; /* Tokens to the right of last highlight */
551 int nDesired
; /* Ideal number of tokens to shift forward */
553 for(nLeft
=0; !(hlmask
& ((u64
)1 << nLeft
)); nLeft
++);
554 for(nRight
=0; !(hlmask
& ((u64
)1 << (nSnippet
-1-nRight
))); nRight
++);
555 nDesired
= (nLeft
-nRight
)/2;
557 /* Ideally, the start of the snippet should be pushed forward in the
558 ** document nDesired tokens. This block checks if there are actually
559 ** nDesired tokens to the right of the snippet. If so, *piPos and
560 ** *pHlMask are updated to shift the snippet nDesired tokens to the
561 ** right. Otherwise, the snippet is shifted by the number of tokens
565 int nShift
; /* Number of tokens to shift snippet by */
566 int iCurrent
= 0; /* Token counter */
567 int rc
; /* Return Code */
568 sqlite3_tokenizer_module
*pMod
;
569 sqlite3_tokenizer_cursor
*pC
;
570 pMod
= (sqlite3_tokenizer_module
*)pTab
->pTokenizer
->pModule
;
572 /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
573 ** or more tokens in zDoc/nDoc.
575 rc
= sqlite3Fts3OpenTokenizer(pTab
->pTokenizer
, iLangid
, zDoc
, nDoc
, &pC
);
579 while( rc
==SQLITE_OK
&& iCurrent
<(nSnippet
+nDesired
) ){
580 const char *ZDUMMY
; int DUMMY1
= 0, DUMMY2
= 0, DUMMY3
= 0;
581 rc
= pMod
->xNext(pC
, &ZDUMMY
, &DUMMY1
, &DUMMY2
, &DUMMY3
, &iCurrent
);
584 if( rc
!=SQLITE_OK
&& rc
!=SQLITE_DONE
){ return rc
; }
586 nShift
= (rc
==SQLITE_DONE
)+iCurrent
-nSnippet
;
587 assert( nShift
<=nDesired
);
590 *pHlmask
= hlmask
>> nShift
;
598 ** Extract the snippet text for fragment pFragment from cursor pCsr and
599 ** append it to string buffer pOut.
601 static int fts3SnippetText(
602 Fts3Cursor
*pCsr
, /* FTS3 Cursor */
603 SnippetFragment
*pFragment
, /* Snippet to extract */
604 int iFragment
, /* Fragment number */
605 int isLast
, /* True for final fragment in snippet */
606 int nSnippet
, /* Number of tokens in extracted snippet */
607 const char *zOpen
, /* String inserted before highlighted term */
608 const char *zClose
, /* String inserted after highlighted term */
609 const char *zEllipsis
, /* String inserted between snippets */
610 StrBuffer
*pOut
/* Write output here */
612 Fts3Table
*pTab
= (Fts3Table
*)pCsr
->base
.pVtab
;
613 int rc
; /* Return code */
614 const char *zDoc
; /* Document text to extract snippet from */
615 int nDoc
; /* Size of zDoc in bytes */
616 int iCurrent
= 0; /* Current token number of document */
617 int iEnd
= 0; /* Byte offset of end of current token */
618 int isShiftDone
= 0; /* True after snippet is shifted */
619 int iPos
= pFragment
->iPos
; /* First token of snippet */
620 u64 hlmask
= pFragment
->hlmask
; /* Highlight-mask for snippet */
621 int iCol
= pFragment
->iCol
+1; /* Query column to extract text from */
622 sqlite3_tokenizer_module
*pMod
; /* Tokenizer module methods object */
623 sqlite3_tokenizer_cursor
*pC
; /* Tokenizer cursor open on zDoc/nDoc */
625 zDoc
= (const char *)sqlite3_column_text(pCsr
->pStmt
, iCol
);
627 if( sqlite3_column_type(pCsr
->pStmt
, iCol
)!=SQLITE_NULL
){
632 nDoc
= sqlite3_column_bytes(pCsr
->pStmt
, iCol
);
634 /* Open a token cursor on the document. */
635 pMod
= (sqlite3_tokenizer_module
*)pTab
->pTokenizer
->pModule
;
636 rc
= sqlite3Fts3OpenTokenizer(pTab
->pTokenizer
, pCsr
->iLangid
, zDoc
,nDoc
,&pC
);
641 while( rc
==SQLITE_OK
){
642 const char *ZDUMMY
; /* Dummy argument used with tokenizer */
643 int DUMMY1
= -1; /* Dummy argument used with tokenizer */
644 int iBegin
= 0; /* Offset in zDoc of start of token */
645 int iFin
= 0; /* Offset in zDoc of end of token */
646 int isHighlight
= 0; /* True for highlighted terms */
648 /* Variable DUMMY1 is initialized to a negative value above. Elsewhere
649 ** in the FTS code the variable that the third argument to xNext points to
650 ** is initialized to zero before the first (*but not necessarily
651 ** subsequent*) call to xNext(). This is done for a particular application
652 ** that needs to know whether or not the tokenizer is being used for
653 ** snippet generation or for some other purpose.
655 ** Extreme care is required when writing code to depend on this
656 ** initialization. It is not a documented part of the tokenizer interface.
657 ** If a tokenizer is used directly by any code outside of FTS, this
658 ** convention might not be respected. */
659 rc
= pMod
->xNext(pC
, &ZDUMMY
, &DUMMY1
, &iBegin
, &iFin
, &iCurrent
);
661 if( rc
==SQLITE_DONE
){
662 /* Special case - the last token of the snippet is also the last token
663 ** of the column. Append any punctuation that occurred between the end
664 ** of the previous token and the end of the document to the output.
665 ** Then break out of the loop. */
666 rc
= fts3StringAppend(pOut
, &zDoc
[iEnd
], -1);
670 if( iCurrent
<iPos
){ continue; }
673 int n
= nDoc
- iBegin
;
674 rc
= fts3SnippetShift(
675 pTab
, pCsr
->iLangid
, nSnippet
, &zDoc
[iBegin
], n
, &iPos
, &hlmask
679 /* Now that the shift has been done, check if the initial "..." are
680 ** required. They are required if (a) this is not the first fragment,
681 ** or (b) this fragment does not begin at position 0 of its column.
683 if( rc
==SQLITE_OK
&& (iPos
>0 || iFragment
>0) ){
684 rc
= fts3StringAppend(pOut
, zEllipsis
, -1);
686 if( rc
!=SQLITE_OK
|| iCurrent
<iPos
) continue;
689 if( iCurrent
>=(iPos
+nSnippet
) ){
691 rc
= fts3StringAppend(pOut
, zEllipsis
, -1);
696 /* Set isHighlight to true if this term should be highlighted. */
697 isHighlight
= (hlmask
& ((u64
)1 << (iCurrent
-iPos
)))!=0;
699 if( iCurrent
>iPos
) rc
= fts3StringAppend(pOut
, &zDoc
[iEnd
], iBegin
-iEnd
);
700 if( rc
==SQLITE_OK
&& isHighlight
) rc
= fts3StringAppend(pOut
, zOpen
, -1);
701 if( rc
==SQLITE_OK
) rc
= fts3StringAppend(pOut
, &zDoc
[iBegin
], iFin
-iBegin
);
702 if( rc
==SQLITE_OK
&& isHighlight
) rc
= fts3StringAppend(pOut
, zClose
, -1);
713 ** This function is used to count the entries in a column-list (a
714 ** delta-encoded list of term offsets within a single column of a single
715 ** row). When this function is called, *ppCollist should point to the
716 ** beginning of the first varint in the column-list (the varint that
717 ** contains the position of the first matching term in the column data).
718 ** Before returning, *ppCollist is set to point to the first byte after
719 ** the last varint in the column-list (either the 0x00 signifying the end
720 ** of the position-list, or the 0x01 that precedes the column number of
721 ** the next column in the position-list).
723 ** The number of elements in the column-list is returned.
725 static int fts3ColumnlistCount(char **ppCollist
){
726 char *pEnd
= *ppCollist
;
730 /* A column-list is terminated by either a 0x01 or 0x00. */
731 while( 0xFE & (*pEnd
| c
) ){
741 ** fts3ExprIterate() callback used to collect the "global" matchinfo stats
742 ** for a single query.
744 ** fts3ExprIterate() callback to load the 'global' elements of a
745 ** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements
746 ** of the matchinfo array that are constant for all rows returned by the
749 ** Argument pCtx is actually a pointer to a struct of type MatchInfo. This
750 ** function populates Matchinfo.aMatchinfo[] as follows:
752 ** for(iCol=0; iCol<nCol; iCol++){
753 ** aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X;
754 ** aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y;
757 ** where X is the number of matches for phrase iPhrase is column iCol of all
758 ** rows of the table. Y is the number of rows for which column iCol contains
759 ** at least one instance of phrase iPhrase.
761 ** If the phrase pExpr consists entirely of deferred tokens, then all X and
762 ** Y values are set to nDoc, where nDoc is the number of documents in the
763 ** file system. This is done because the full-text index doclist is required
764 ** to calculate these values properly, and the full-text index doclist is
765 ** not available for deferred tokens.
767 static int fts3ExprGlobalHitsCb(
768 Fts3Expr
*pExpr
, /* Phrase expression node */
769 int iPhrase
, /* Phrase number (numbered from zero) */
770 void *pCtx
/* Pointer to MatchInfo structure */
772 MatchInfo
*p
= (MatchInfo
*)pCtx
;
773 return sqlite3Fts3EvalPhraseStats(
774 p
->pCursor
, pExpr
, &p
->aMatchinfo
[3*iPhrase
*p
->nCol
]
779 ** fts3ExprIterate() callback used to collect the "local" part of the
780 ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the
781 ** array that are different for each row returned by the query.
783 static int fts3ExprLocalHitsCb(
784 Fts3Expr
*pExpr
, /* Phrase expression node */
785 int iPhrase
, /* Phrase number */
786 void *pCtx
/* Pointer to MatchInfo structure */
789 MatchInfo
*p
= (MatchInfo
*)pCtx
;
790 int iStart
= iPhrase
* p
->nCol
* 3;
793 for(i
=0; i
<p
->nCol
&& rc
==SQLITE_OK
; i
++){
795 rc
= sqlite3Fts3EvalPhrasePoslist(p
->pCursor
, pExpr
, i
, &pCsr
);
797 p
->aMatchinfo
[iStart
+i
*3] = fts3ColumnlistCount(&pCsr
);
799 p
->aMatchinfo
[iStart
+i
*3] = 0;
806 static int fts3MatchinfoCheck(
811 if( (cArg
==FTS3_MATCHINFO_NPHRASE
)
812 || (cArg
==FTS3_MATCHINFO_NCOL
)
813 || (cArg
==FTS3_MATCHINFO_NDOC
&& pTab
->bFts4
)
814 || (cArg
==FTS3_MATCHINFO_AVGLENGTH
&& pTab
->bFts4
)
815 || (cArg
==FTS3_MATCHINFO_LENGTH
&& pTab
->bHasDocsize
)
816 || (cArg
==FTS3_MATCHINFO_LCS
)
817 || (cArg
==FTS3_MATCHINFO_HITS
)
821 *pzErr
= sqlite3_mprintf("unrecognized matchinfo request: %c", cArg
);
825 static int fts3MatchinfoSize(MatchInfo
*pInfo
, char cArg
){
826 int nVal
; /* Number of integers output by cArg */
829 case FTS3_MATCHINFO_NDOC
:
830 case FTS3_MATCHINFO_NPHRASE
:
831 case FTS3_MATCHINFO_NCOL
:
835 case FTS3_MATCHINFO_AVGLENGTH
:
836 case FTS3_MATCHINFO_LENGTH
:
837 case FTS3_MATCHINFO_LCS
:
842 assert( cArg
==FTS3_MATCHINFO_HITS
);
843 nVal
= pInfo
->nCol
* pInfo
->nPhrase
* 3;
850 static int fts3MatchinfoSelectDoctotal(
852 sqlite3_stmt
**ppStmt
,
853 sqlite3_int64
*pnDoc
,
861 int rc
= sqlite3Fts3SelectDoctotal(pTab
, ppStmt
);
862 if( rc
!=SQLITE_OK
) return rc
;
865 assert( sqlite3_data_count(pStmt
)==1 );
867 a
= sqlite3_column_blob(pStmt
, 0);
868 a
+= sqlite3Fts3GetVarint(a
, &nDoc
);
869 if( nDoc
==0 ) return FTS_CORRUPT_VTAB
;
872 if( paLen
) *paLen
= a
;
877 ** An instance of the following structure is used to store state while
878 ** iterating through a multi-column position-list corresponding to the
879 ** hits for a single phrase on a single row in order to calculate the
880 ** values for a matchinfo() FTS3_MATCHINFO_LCS request.
882 typedef struct LcsIterator LcsIterator
;
884 Fts3Expr
*pExpr
; /* Pointer to phrase expression */
885 int iPosOffset
; /* Tokens count up to end of this phrase */
886 char *pRead
; /* Cursor used to iterate through aDoclist */
887 int iPos
; /* Current position */
891 ** If LcsIterator.iCol is set to the following value, the iterator has
892 ** finished iterating through all offsets for all columns.
894 #define LCS_ITERATOR_FINISHED 0x7FFFFFFF;
896 static int fts3MatchinfoLcsCb(
897 Fts3Expr
*pExpr
, /* Phrase expression node */
898 int iPhrase
, /* Phrase number (numbered from zero) */
899 void *pCtx
/* Pointer to MatchInfo structure */
901 LcsIterator
*aIter
= (LcsIterator
*)pCtx
;
902 aIter
[iPhrase
].pExpr
= pExpr
;
907 ** Advance the iterator passed as an argument to the next position. Return
908 ** 1 if the iterator is at EOF or if it now points to the start of the
909 ** position list for the next column.
911 static int fts3LcsIteratorAdvance(LcsIterator
*pIter
){
912 char *pRead
= pIter
->pRead
;
916 pRead
+= sqlite3Fts3GetVarint(pRead
, &iRead
);
917 if( iRead
==0 || iRead
==1 ){
921 pIter
->iPos
+= (int)(iRead
-2);
924 pIter
->pRead
= pRead
;
929 ** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag.
931 ** If the call is successful, the longest-common-substring lengths for each
932 ** column are written into the first nCol elements of the pInfo->aMatchinfo[]
933 ** array before returning. SQLITE_OK is returned in this case.
935 ** Otherwise, if an error occurs, an SQLite error code is returned and the
936 ** data written to the first nCol elements of pInfo->aMatchinfo[] is
939 static int fts3MatchinfoLcs(Fts3Cursor
*pCsr
, MatchInfo
*pInfo
){
945 /* Allocate and populate the array of LcsIterator objects. The array
946 ** contains one element for each matchable phrase in the query.
948 aIter
= sqlite3_malloc(sizeof(LcsIterator
) * pCsr
->nPhrase
);
949 if( !aIter
) return SQLITE_NOMEM
;
950 memset(aIter
, 0, sizeof(LcsIterator
) * pCsr
->nPhrase
);
951 (void)fts3ExprIterate(pCsr
->pExpr
, fts3MatchinfoLcsCb
, (void*)aIter
);
953 for(i
=0; i
<pInfo
->nPhrase
; i
++){
954 LcsIterator
*pIter
= &aIter
[i
];
955 nToken
-= pIter
->pExpr
->pPhrase
->nToken
;
956 pIter
->iPosOffset
= nToken
;
959 for(iCol
=0; iCol
<pInfo
->nCol
; iCol
++){
960 int nLcs
= 0; /* LCS value for this column */
961 int nLive
= 0; /* Number of iterators in aIter not at EOF */
963 for(i
=0; i
<pInfo
->nPhrase
; i
++){
965 LcsIterator
*pIt
= &aIter
[i
];
966 rc
= sqlite3Fts3EvalPhrasePoslist(pCsr
, pIt
->pExpr
, iCol
, &pIt
->pRead
);
967 if( rc
!=SQLITE_OK
) return rc
;
969 pIt
->iPos
= pIt
->iPosOffset
;
970 fts3LcsIteratorAdvance(&aIter
[i
]);
976 LcsIterator
*pAdv
= 0; /* The iterator to advance by one position */
977 int nThisLcs
= 0; /* LCS for the current iterator positions */
979 for(i
=0; i
<pInfo
->nPhrase
; i
++){
980 LcsIterator
*pIter
= &aIter
[i
];
981 if( pIter
->pRead
==0 ){
982 /* This iterator is already at EOF for this column. */
985 if( pAdv
==0 || pIter
->iPos
<pAdv
->iPos
){
988 if( nThisLcs
==0 || pIter
->iPos
==pIter
[-1].iPos
){
993 if( nThisLcs
>nLcs
) nLcs
= nThisLcs
;
996 if( fts3LcsIteratorAdvance(pAdv
) ) nLive
--;
999 pInfo
->aMatchinfo
[iCol
] = nLcs
;
1002 sqlite3_free(aIter
);
1007 ** Populate the buffer pInfo->aMatchinfo[] with an array of integers to
1008 ** be returned by the matchinfo() function. Argument zArg contains the
1009 ** format string passed as the second argument to matchinfo (or the
1010 ** default value "pcx" if no second argument was specified). The format
1011 ** string has already been validated and the pInfo->aMatchinfo[] array
1012 ** is guaranteed to be large enough for the output.
1014 ** If bGlobal is true, then populate all fields of the matchinfo() output.
1015 ** If it is false, then assume that those fields that do not change between
1016 ** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS)
1017 ** have already been populated.
1019 ** Return SQLITE_OK if successful, or an SQLite error code if an error
1020 ** occurs. If a value other than SQLITE_OK is returned, the state the
1021 ** pInfo->aMatchinfo[] buffer is left in is undefined.
1023 static int fts3MatchinfoValues(
1024 Fts3Cursor
*pCsr
, /* FTS3 cursor object */
1025 int bGlobal
, /* True to grab the global stats */
1026 MatchInfo
*pInfo
, /* Matchinfo context object */
1027 const char *zArg
/* Matchinfo format string */
1031 Fts3Table
*pTab
= (Fts3Table
*)pCsr
->base
.pVtab
;
1032 sqlite3_stmt
*pSelect
= 0;
1034 for(i
=0; rc
==SQLITE_OK
&& zArg
[i
]; i
++){
1037 case FTS3_MATCHINFO_NPHRASE
:
1038 if( bGlobal
) pInfo
->aMatchinfo
[0] = pInfo
->nPhrase
;
1041 case FTS3_MATCHINFO_NCOL
:
1042 if( bGlobal
) pInfo
->aMatchinfo
[0] = pInfo
->nCol
;
1045 case FTS3_MATCHINFO_NDOC
:
1047 sqlite3_int64 nDoc
= 0;
1048 rc
= fts3MatchinfoSelectDoctotal(pTab
, &pSelect
, &nDoc
, 0);
1049 pInfo
->aMatchinfo
[0] = (u32
)nDoc
;
1053 case FTS3_MATCHINFO_AVGLENGTH
:
1055 sqlite3_int64 nDoc
; /* Number of rows in table */
1056 const char *a
; /* Aggregate column length array */
1058 rc
= fts3MatchinfoSelectDoctotal(pTab
, &pSelect
, &nDoc
, &a
);
1059 if( rc
==SQLITE_OK
){
1061 for(iCol
=0; iCol
<pInfo
->nCol
; iCol
++){
1063 sqlite3_int64 nToken
;
1064 a
+= sqlite3Fts3GetVarint(a
, &nToken
);
1065 iVal
= (u32
)(((u32
)(nToken
&0xffffffff)+nDoc
/2)/nDoc
);
1066 pInfo
->aMatchinfo
[iCol
] = iVal
;
1072 case FTS3_MATCHINFO_LENGTH
: {
1073 sqlite3_stmt
*pSelectDocsize
= 0;
1074 rc
= sqlite3Fts3SelectDocsize(pTab
, pCsr
->iPrevId
, &pSelectDocsize
);
1075 if( rc
==SQLITE_OK
){
1077 const char *a
= sqlite3_column_blob(pSelectDocsize
, 0);
1078 for(iCol
=0; iCol
<pInfo
->nCol
; iCol
++){
1079 sqlite3_int64 nToken
;
1080 a
+= sqlite3Fts3GetVarint(a
, &nToken
);
1081 pInfo
->aMatchinfo
[iCol
] = (u32
)nToken
;
1084 sqlite3_reset(pSelectDocsize
);
1088 case FTS3_MATCHINFO_LCS
:
1089 rc
= fts3ExprLoadDoclists(pCsr
, 0, 0);
1090 if( rc
==SQLITE_OK
){
1091 rc
= fts3MatchinfoLcs(pCsr
, pInfo
);
1097 assert( zArg
[i
]==FTS3_MATCHINFO_HITS
);
1098 pExpr
= pCsr
->pExpr
;
1099 rc
= fts3ExprLoadDoclists(pCsr
, 0, 0);
1100 if( rc
!=SQLITE_OK
) break;
1102 if( pCsr
->pDeferred
){
1103 rc
= fts3MatchinfoSelectDoctotal(pTab
, &pSelect
, &pInfo
->nDoc
, 0);
1104 if( rc
!=SQLITE_OK
) break;
1106 rc
= fts3ExprIterate(pExpr
, fts3ExprGlobalHitsCb
,(void*)pInfo
);
1107 if( rc
!=SQLITE_OK
) break;
1109 (void)fts3ExprIterate(pExpr
, fts3ExprLocalHitsCb
,(void*)pInfo
);
1114 pInfo
->aMatchinfo
+= fts3MatchinfoSize(pInfo
, zArg
[i
]);
1117 sqlite3_reset(pSelect
);
1123 ** Populate pCsr->aMatchinfo[] with data for the current row. The
1124 ** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32).
1126 static int fts3GetMatchinfo(
1127 Fts3Cursor
*pCsr
, /* FTS3 Cursor object */
1128 const char *zArg
/* Second argument to matchinfo() function */
1131 Fts3Table
*pTab
= (Fts3Table
*)pCsr
->base
.pVtab
;
1133 int bGlobal
= 0; /* Collect 'global' stats as well as local */
1135 memset(&sInfo
, 0, sizeof(MatchInfo
));
1136 sInfo
.pCursor
= pCsr
;
1137 sInfo
.nCol
= pTab
->nColumn
;
1139 /* If there is cached matchinfo() data, but the format string for the
1140 ** cache does not match the format string for this request, discard
1141 ** the cached data. */
1142 if( pCsr
->zMatchinfo
&& strcmp(pCsr
->zMatchinfo
, zArg
) ){
1143 assert( pCsr
->aMatchinfo
);
1144 sqlite3_free(pCsr
->aMatchinfo
);
1145 pCsr
->zMatchinfo
= 0;
1146 pCsr
->aMatchinfo
= 0;
1149 /* If Fts3Cursor.aMatchinfo[] is NULL, then this is the first time the
1150 ** matchinfo function has been called for this query. In this case
1151 ** allocate the array used to accumulate the matchinfo data and
1152 ** initialize those elements that are constant for every row.
1154 if( pCsr
->aMatchinfo
==0 ){
1155 int nMatchinfo
= 0; /* Number of u32 elements in match-info */
1156 int nArg
; /* Bytes in zArg */
1157 int i
; /* Used to iterate through zArg */
1159 /* Determine the number of phrases in the query */
1160 pCsr
->nPhrase
= fts3ExprPhraseCount(pCsr
->pExpr
);
1161 sInfo
.nPhrase
= pCsr
->nPhrase
;
1163 /* Determine the number of integers in the buffer returned by this call. */
1164 for(i
=0; zArg
[i
]; i
++){
1165 nMatchinfo
+= fts3MatchinfoSize(&sInfo
, zArg
[i
]);
1168 /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */
1169 nArg
= (int)strlen(zArg
);
1170 pCsr
->aMatchinfo
= (u32
*)sqlite3_malloc(sizeof(u32
)*nMatchinfo
+ nArg
+ 1);
1171 if( !pCsr
->aMatchinfo
) return SQLITE_NOMEM
;
1173 pCsr
->zMatchinfo
= (char *)&pCsr
->aMatchinfo
[nMatchinfo
];
1174 pCsr
->nMatchinfo
= nMatchinfo
;
1175 memcpy(pCsr
->zMatchinfo
, zArg
, nArg
+1);
1176 memset(pCsr
->aMatchinfo
, 0, sizeof(u32
)*nMatchinfo
);
1177 pCsr
->isMatchinfoNeeded
= 1;
1181 sInfo
.aMatchinfo
= pCsr
->aMatchinfo
;
1182 sInfo
.nPhrase
= pCsr
->nPhrase
;
1183 if( pCsr
->isMatchinfoNeeded
){
1184 rc
= fts3MatchinfoValues(pCsr
, bGlobal
, &sInfo
, zArg
);
1185 pCsr
->isMatchinfoNeeded
= 0;
1192 ** Implementation of snippet() function.
1194 void sqlite3Fts3Snippet(
1195 sqlite3_context
*pCtx
, /* SQLite function call context */
1196 Fts3Cursor
*pCsr
, /* Cursor object */
1197 const char *zStart
, /* Snippet start text - "<b>" */
1198 const char *zEnd
, /* Snippet end text - "</b>" */
1199 const char *zEllipsis
, /* Snippet ellipsis text - "<b>...</b>" */
1200 int iCol
, /* Extract snippet from this column */
1201 int nToken
/* Approximate number of tokens in snippet */
1203 Fts3Table
*pTab
= (Fts3Table
*)pCsr
->base
.pVtab
;
1206 StrBuffer res
= {0, 0, 0};
1208 /* The returned text includes up to four fragments of text extracted from
1209 ** the data in the current row. The first iteration of the for(...) loop
1210 ** below attempts to locate a single fragment of text nToken tokens in
1211 ** size that contains at least one instance of all phrases in the query
1212 ** expression that appear in the current row. If such a fragment of text
1213 ** cannot be found, the second iteration of the loop attempts to locate
1214 ** a pair of fragments, and so on.
1216 int nSnippet
= 0; /* Number of fragments in this snippet */
1217 SnippetFragment aSnippet
[4]; /* Maximum of 4 fragments per snippet */
1218 int nFToken
= -1; /* Number of tokens in each fragment */
1221 sqlite3_result_text(pCtx
, "", 0, SQLITE_STATIC
);
1225 for(nSnippet
=1; 1; nSnippet
++){
1227 int iSnip
; /* Loop counter 0..nSnippet-1 */
1228 u64 mCovered
= 0; /* Bitmask of phrases covered by snippet */
1229 u64 mSeen
= 0; /* Bitmask of phrases seen by BestSnippet() */
1232 nFToken
= (nToken
+nSnippet
-1) / nSnippet
;
1234 nFToken
= -1 * nToken
;
1237 for(iSnip
=0; iSnip
<nSnippet
; iSnip
++){
1238 int iBestScore
= -1; /* Best score of columns checked so far */
1239 int iRead
; /* Used to iterate through columns */
1240 SnippetFragment
*pFragment
= &aSnippet
[iSnip
];
1242 memset(pFragment
, 0, sizeof(*pFragment
));
1244 /* Loop through all columns of the table being considered for snippets.
1245 ** If the iCol argument to this function was negative, this means all
1246 ** columns of the FTS3 table. Otherwise, only column iCol is considered.
1248 for(iRead
=0; iRead
<pTab
->nColumn
; iRead
++){
1249 SnippetFragment sF
= {0, 0, 0, 0};
1251 if( iCol
>=0 && iRead
!=iCol
) continue;
1253 /* Find the best snippet of nFToken tokens in column iRead. */
1254 rc
= fts3BestSnippet(nFToken
, pCsr
, iRead
, mCovered
, &mSeen
, &sF
, &iS
);
1255 if( rc
!=SQLITE_OK
){
1258 if( iS
>iBestScore
){
1264 mCovered
|= pFragment
->covered
;
1267 /* If all query phrases seen by fts3BestSnippet() are present in at least
1268 ** one of the nSnippet snippet fragments, break out of the loop.
1270 assert( (mCovered
&mSeen
)==mCovered
);
1271 if( mSeen
==mCovered
|| nSnippet
==SizeofArray(aSnippet
) ) break;
1274 assert( nFToken
>0 );
1276 for(i
=0; i
<nSnippet
&& rc
==SQLITE_OK
; i
++){
1277 rc
= fts3SnippetText(pCsr
, &aSnippet
[i
],
1278 i
, (i
==nSnippet
-1), nFToken
, zStart
, zEnd
, zEllipsis
, &res
1283 sqlite3Fts3SegmentsClose(pTab
);
1284 if( rc
!=SQLITE_OK
){
1285 sqlite3_result_error_code(pCtx
, rc
);
1286 sqlite3_free(res
.z
);
1288 sqlite3_result_text(pCtx
, res
.z
, -1, sqlite3_free
);
1293 typedef struct TermOffset TermOffset
;
1294 typedef struct TermOffsetCtx TermOffsetCtx
;
1297 char *pList
; /* Position-list */
1298 int iPos
; /* Position just read from pList */
1299 int iOff
; /* Offset of this term from read positions */
1302 struct TermOffsetCtx
{
1304 int iCol
; /* Column of table to populate aTerm for */
1306 sqlite3_int64 iDocid
;
1311 ** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets().
1313 static int fts3ExprTermOffsetInit(Fts3Expr
*pExpr
, int iPhrase
, void *ctx
){
1314 TermOffsetCtx
*p
= (TermOffsetCtx
*)ctx
;
1315 int nTerm
; /* Number of tokens in phrase */
1316 int iTerm
; /* For looping through nTerm phrase terms */
1317 char *pList
; /* Pointer to position list for phrase */
1318 int iPos
= 0; /* First position in position-list */
1321 UNUSED_PARAMETER(iPhrase
);
1322 rc
= sqlite3Fts3EvalPhrasePoslist(p
->pCsr
, pExpr
, p
->iCol
, &pList
);
1323 nTerm
= pExpr
->pPhrase
->nToken
;
1325 fts3GetDeltaPosition(&pList
, &iPos
);
1329 for(iTerm
=0; iTerm
<nTerm
; iTerm
++){
1330 TermOffset
*pT
= &p
->aTerm
[p
->iTerm
++];
1331 pT
->iOff
= nTerm
-iTerm
-1;
1340 ** Implementation of offsets() function.
1342 void sqlite3Fts3Offsets(
1343 sqlite3_context
*pCtx
, /* SQLite function call context */
1344 Fts3Cursor
*pCsr
/* Cursor object */
1346 Fts3Table
*pTab
= (Fts3Table
*)pCsr
->base
.pVtab
;
1347 sqlite3_tokenizer_module
const *pMod
= pTab
->pTokenizer
->pModule
;
1348 int rc
; /* Return Code */
1349 int nToken
; /* Number of tokens in query */
1350 int iCol
; /* Column currently being processed */
1351 StrBuffer res
= {0, 0, 0}; /* Result string */
1352 TermOffsetCtx sCtx
; /* Context for fts3ExprTermOffsetInit() */
1355 sqlite3_result_text(pCtx
, "", 0, SQLITE_STATIC
);
1359 memset(&sCtx
, 0, sizeof(sCtx
));
1360 assert( pCsr
->isRequireSeek
==0 );
1362 /* Count the number of terms in the query */
1363 rc
= fts3ExprLoadDoclists(pCsr
, 0, &nToken
);
1364 if( rc
!=SQLITE_OK
) goto offsets_out
;
1366 /* Allocate the array of TermOffset iterators. */
1367 sCtx
.aTerm
= (TermOffset
*)sqlite3_malloc(sizeof(TermOffset
)*nToken
);
1368 if( 0==sCtx
.aTerm
){
1372 sCtx
.iDocid
= pCsr
->iPrevId
;
1375 /* Loop through the table columns, appending offset information to
1376 ** string-buffer res for each column.
1378 for(iCol
=0; iCol
<pTab
->nColumn
; iCol
++){
1379 sqlite3_tokenizer_cursor
*pC
; /* Tokenizer cursor */
1380 const char *ZDUMMY
; /* Dummy argument used with xNext() */
1381 int NDUMMY
= 0; /* Dummy argument used with xNext() */
1388 /* Initialize the contents of sCtx.aTerm[] for column iCol. There is
1389 ** no way that this operation can fail, so the return code from
1390 ** fts3ExprIterate() can be discarded.
1394 (void)fts3ExprIterate(pCsr
->pExpr
, fts3ExprTermOffsetInit
, (void *)&sCtx
);
1396 /* Retreive the text stored in column iCol. If an SQL NULL is stored
1397 ** in column iCol, jump immediately to the next iteration of the loop.
1398 ** If an OOM occurs while retrieving the data (this can happen if SQLite
1399 ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM
1402 zDoc
= (const char *)sqlite3_column_text(pCsr
->pStmt
, iCol
+1);
1403 nDoc
= sqlite3_column_bytes(pCsr
->pStmt
, iCol
+1);
1405 if( sqlite3_column_type(pCsr
->pStmt
, iCol
+1)==SQLITE_NULL
){
1412 /* Initialize a tokenizer iterator to iterate through column iCol. */
1413 rc
= sqlite3Fts3OpenTokenizer(pTab
->pTokenizer
, pCsr
->iLangid
,
1416 if( rc
!=SQLITE_OK
) goto offsets_out
;
1418 rc
= pMod
->xNext(pC
, &ZDUMMY
, &NDUMMY
, &iStart
, &iEnd
, &iCurrent
);
1419 while( rc
==SQLITE_OK
){
1420 int i
; /* Used to loop through terms */
1421 int iMinPos
= 0x7FFFFFFF; /* Position of next token */
1422 TermOffset
*pTerm
= 0; /* TermOffset associated with next token */
1424 for(i
=0; i
<nToken
; i
++){
1425 TermOffset
*pT
= &sCtx
.aTerm
[i
];
1426 if( pT
->pList
&& (pT
->iPos
-pT
->iOff
)<iMinPos
){
1427 iMinPos
= pT
->iPos
-pT
->iOff
;
1433 /* All offsets for this column have been gathered. */
1436 assert( iCurrent
<=iMinPos
);
1437 if( 0==(0xFE&*pTerm
->pList
) ){
1440 fts3GetDeltaPosition(&pTerm
->pList
, &pTerm
->iPos
);
1442 while( rc
==SQLITE_OK
&& iCurrent
<iMinPos
){
1443 rc
= pMod
->xNext(pC
, &ZDUMMY
, &NDUMMY
, &iStart
, &iEnd
, &iCurrent
);
1445 if( rc
==SQLITE_OK
){
1447 sqlite3_snprintf(sizeof(aBuffer
), aBuffer
,
1448 "%d %d %d %d ", iCol
, pTerm
-sCtx
.aTerm
, iStart
, iEnd
-iStart
1450 rc
= fts3StringAppend(&res
, aBuffer
, -1);
1451 }else if( rc
==SQLITE_DONE
&& pTab
->zContentTbl
==0 ){
1452 rc
= FTS_CORRUPT_VTAB
;
1456 if( rc
==SQLITE_DONE
){
1461 if( rc
!=SQLITE_OK
) goto offsets_out
;
1465 sqlite3_free(sCtx
.aTerm
);
1466 assert( rc
!=SQLITE_DONE
);
1467 sqlite3Fts3SegmentsClose(pTab
);
1468 if( rc
!=SQLITE_OK
){
1469 sqlite3_result_error_code(pCtx
, rc
);
1470 sqlite3_free(res
.z
);
1472 sqlite3_result_text(pCtx
, res
.z
, res
.n
-1, sqlite3_free
);
1478 ** Implementation of matchinfo() function.
1480 void sqlite3Fts3Matchinfo(
1481 sqlite3_context
*pContext
, /* Function call context */
1482 Fts3Cursor
*pCsr
, /* FTS3 table cursor */
1483 const char *zArg
/* Second arg to matchinfo() function */
1485 Fts3Table
*pTab
= (Fts3Table
*)pCsr
->base
.pVtab
;
1488 const char *zFormat
;
1491 for(i
=0; zArg
[i
]; i
++){
1493 if( fts3MatchinfoCheck(pTab
, zArg
[i
], &zErr
) ){
1494 sqlite3_result_error(pContext
, zErr
, -1);
1501 zFormat
= FTS3_MATCHINFO_DEFAULT
;
1505 sqlite3_result_blob(pContext
, "", 0, SQLITE_STATIC
);
1509 /* Retrieve matchinfo() data. */
1510 rc
= fts3GetMatchinfo(pCsr
, zFormat
);
1511 sqlite3Fts3SegmentsClose(pTab
);
1513 if( rc
!=SQLITE_OK
){
1514 sqlite3_result_error_code(pContext
, rc
);
1516 int n
= pCsr
->nMatchinfo
* sizeof(u32
);
1517 sqlite3_result_blob(pContext
, pCsr
->aMatchinfo
, n
, SQLITE_TRANSIENT
);