Snapshot of upstream SQLite 3.46.1
[sqlcipher.git] / ext / fts3 / fts3_snippet.c
blobf6caabf4c9f6659bc721920aa47c71d0d0353b3f
1 /*
2 ** 2009 Oct 23
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
14 #include "fts3Int.h"
15 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
17 #include <string.h>
18 #include <assert.h>
20 #ifndef SQLITE_AMALGAMATION
21 typedef sqlite3_int64 i64;
22 #endif
25 ** Characters that may appear in the second argument to matchinfo().
27 #define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */
28 #define FTS3_MATCHINFO_NCOL 'c' /* 1 value */
29 #define FTS3_MATCHINFO_NDOC 'n' /* 1 value */
30 #define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */
31 #define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */
32 #define FTS3_MATCHINFO_LCS 's' /* nCol values */
33 #define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */
34 #define FTS3_MATCHINFO_LHITS 'y' /* nCol*nPhrase values */
35 #define FTS3_MATCHINFO_LHITS_BM 'b' /* nCol*nPhrase values */
38 ** The default value for the second argument to matchinfo().
40 #define FTS3_MATCHINFO_DEFAULT "pcx"
44 ** Used as an sqlite3Fts3ExprIterate() context when loading phrase doclists to
45 ** Fts3Expr.aDoclist[]/nDoclist.
47 typedef struct LoadDoclistCtx LoadDoclistCtx;
48 struct LoadDoclistCtx {
49 Fts3Cursor *pCsr; /* FTS3 Cursor */
50 int nPhrase; /* Number of phrases seen so far */
51 int nToken; /* Number of tokens seen so far */
55 ** The following types are used as part of the implementation of the
56 ** fts3BestSnippet() routine.
58 typedef struct SnippetIter SnippetIter;
59 typedef struct SnippetPhrase SnippetPhrase;
60 typedef struct SnippetFragment SnippetFragment;
62 struct SnippetIter {
63 Fts3Cursor *pCsr; /* Cursor snippet is being generated from */
64 int iCol; /* Extract snippet from this column */
65 int nSnippet; /* Requested snippet length (in tokens) */
66 int nPhrase; /* Number of phrases in query */
67 SnippetPhrase *aPhrase; /* Array of size nPhrase */
68 int iCurrent; /* First token of current snippet */
71 struct SnippetPhrase {
72 int nToken; /* Number of tokens in phrase */
73 char *pList; /* Pointer to start of phrase position list */
74 i64 iHead; /* Next value in position list */
75 char *pHead; /* Position list data following iHead */
76 i64 iTail; /* Next value in trailing position list */
77 char *pTail; /* Position list data following iTail */
80 struct SnippetFragment {
81 int iCol; /* Column snippet is extracted from */
82 int iPos; /* Index of first token in snippet */
83 u64 covered; /* Mask of query phrases covered */
84 u64 hlmask; /* Mask of snippet terms to highlight */
88 ** This type is used as an sqlite3Fts3ExprIterate() context object while
89 ** accumulating the data returned by the matchinfo() function.
91 typedef struct MatchInfo MatchInfo;
92 struct MatchInfo {
93 Fts3Cursor *pCursor; /* FTS3 Cursor */
94 int nCol; /* Number of columns in table */
95 int nPhrase; /* Number of matchable phrases in query */
96 sqlite3_int64 nDoc; /* Number of docs in database */
97 char flag;
98 u32 *aMatchinfo; /* Pre-allocated buffer */
102 ** An instance of this structure is used to manage a pair of buffers, each
103 ** (nElem * sizeof(u32)) bytes in size. See the MatchinfoBuffer code below
104 ** for details.
106 struct MatchinfoBuffer {
107 u8 aRef[3];
108 int nElem;
109 int bGlobal; /* Set if global data is loaded */
110 char *zMatchinfo;
111 u32 aMatchinfo[1];
116 ** The snippet() and offsets() functions both return text values. An instance
117 ** of the following structure is used to accumulate those values while the
118 ** functions are running. See fts3StringAppend() for details.
120 typedef struct StrBuffer StrBuffer;
121 struct StrBuffer {
122 char *z; /* Pointer to buffer containing string */
123 int n; /* Length of z in bytes (excl. nul-term) */
124 int nAlloc; /* Allocated size of buffer z in bytes */
128 /*************************************************************************
129 ** Start of MatchinfoBuffer code.
133 ** Allocate a two-slot MatchinfoBuffer object.
135 static MatchinfoBuffer *fts3MIBufferNew(size_t nElem, const char *zMatchinfo){
136 MatchinfoBuffer *pRet;
137 sqlite3_int64 nByte = sizeof(u32) * (2*(sqlite3_int64)nElem + 1)
138 + sizeof(MatchinfoBuffer);
139 sqlite3_int64 nStr = strlen(zMatchinfo);
141 pRet = sqlite3Fts3MallocZero(nByte + nStr+1);
142 if( pRet ){
143 pRet->aMatchinfo[0] = (u8*)(&pRet->aMatchinfo[1]) - (u8*)pRet;
144 pRet->aMatchinfo[1+nElem] = pRet->aMatchinfo[0]
145 + sizeof(u32)*((int)nElem+1);
146 pRet->nElem = (int)nElem;
147 pRet->zMatchinfo = ((char*)pRet) + nByte;
148 memcpy(pRet->zMatchinfo, zMatchinfo, nStr+1);
149 pRet->aRef[0] = 1;
152 return pRet;
155 static void fts3MIBufferFree(void *p){
156 MatchinfoBuffer *pBuf = (MatchinfoBuffer*)((u8*)p - ((u32*)p)[-1]);
158 assert( (u32*)p==&pBuf->aMatchinfo[1]
159 || (u32*)p==&pBuf->aMatchinfo[pBuf->nElem+2]
161 if( (u32*)p==&pBuf->aMatchinfo[1] ){
162 pBuf->aRef[1] = 0;
163 }else{
164 pBuf->aRef[2] = 0;
167 if( pBuf->aRef[0]==0 && pBuf->aRef[1]==0 && pBuf->aRef[2]==0 ){
168 sqlite3_free(pBuf);
172 static void (*fts3MIBufferAlloc(MatchinfoBuffer *p, u32 **paOut))(void*){
173 void (*xRet)(void*) = 0;
174 u32 *aOut = 0;
176 if( p->aRef[1]==0 ){
177 p->aRef[1] = 1;
178 aOut = &p->aMatchinfo[1];
179 xRet = fts3MIBufferFree;
181 else if( p->aRef[2]==0 ){
182 p->aRef[2] = 1;
183 aOut = &p->aMatchinfo[p->nElem+2];
184 xRet = fts3MIBufferFree;
185 }else{
186 aOut = (u32*)sqlite3_malloc64(p->nElem * sizeof(u32));
187 if( aOut ){
188 xRet = sqlite3_free;
189 if( p->bGlobal ) memcpy(aOut, &p->aMatchinfo[1], p->nElem*sizeof(u32));
193 *paOut = aOut;
194 return xRet;
197 static void fts3MIBufferSetGlobal(MatchinfoBuffer *p){
198 p->bGlobal = 1;
199 memcpy(&p->aMatchinfo[2+p->nElem], &p->aMatchinfo[1], p->nElem*sizeof(u32));
203 ** Free a MatchinfoBuffer object allocated using fts3MIBufferNew()
205 void sqlite3Fts3MIBufferFree(MatchinfoBuffer *p){
206 if( p ){
207 assert( p->aRef[0]==1 );
208 p->aRef[0] = 0;
209 if( p->aRef[0]==0 && p->aRef[1]==0 && p->aRef[2]==0 ){
210 sqlite3_free(p);
216 ** End of MatchinfoBuffer code.
217 *************************************************************************/
221 ** This function is used to help iterate through a position-list. A position
222 ** list is a list of unique integers, sorted from smallest to largest. Each
223 ** element of the list is represented by an FTS3 varint that takes the value
224 ** of the difference between the current element and the previous one plus
225 ** two. For example, to store the position-list:
227 ** 4 9 113
229 ** the three varints:
231 ** 6 7 106
233 ** are encoded.
235 ** When this function is called, *pp points to the start of an element of
236 ** the list. *piPos contains the value of the previous entry in the list.
237 ** After it returns, *piPos contains the value of the next element of the
238 ** list and *pp is advanced to the following varint.
240 static void fts3GetDeltaPosition(char **pp, i64 *piPos){
241 int iVal;
242 *pp += fts3GetVarint32(*pp, &iVal);
243 *piPos += (iVal-2);
247 ** Helper function for sqlite3Fts3ExprIterate() (see below).
249 static int fts3ExprIterate2(
250 Fts3Expr *pExpr, /* Expression to iterate phrases of */
251 int *piPhrase, /* Pointer to phrase counter */
252 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */
253 void *pCtx /* Second argument to pass to callback */
255 int rc; /* Return code */
256 int eType = pExpr->eType; /* Type of expression node pExpr */
258 if( eType!=FTSQUERY_PHRASE ){
259 assert( pExpr->pLeft && pExpr->pRight );
260 rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx);
261 if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){
262 rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx);
264 }else{
265 rc = x(pExpr, *piPhrase, pCtx);
266 (*piPhrase)++;
268 return rc;
272 ** Iterate through all phrase nodes in an FTS3 query, except those that
273 ** are part of a sub-tree that is the right-hand-side of a NOT operator.
274 ** For each phrase node found, the supplied callback function is invoked.
276 ** If the callback function returns anything other than SQLITE_OK,
277 ** the iteration is abandoned and the error code returned immediately.
278 ** Otherwise, SQLITE_OK is returned after a callback has been made for
279 ** all eligible phrase nodes.
281 int sqlite3Fts3ExprIterate(
282 Fts3Expr *pExpr, /* Expression to iterate phrases of */
283 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */
284 void *pCtx /* Second argument to pass to callback */
286 int iPhrase = 0; /* Variable used as the phrase counter */
287 return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx);
291 ** This is an sqlite3Fts3ExprIterate() callback used while loading the
292 ** doclists for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
293 ** fts3ExprLoadDoclists().
295 static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
296 int rc = SQLITE_OK;
297 Fts3Phrase *pPhrase = pExpr->pPhrase;
298 LoadDoclistCtx *p = (LoadDoclistCtx *)ctx;
300 UNUSED_PARAMETER(iPhrase);
302 p->nPhrase++;
303 p->nToken += pPhrase->nToken;
305 return rc;
309 ** Load the doclists for each phrase in the query associated with FTS3 cursor
310 ** pCsr.
312 ** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable
313 ** phrases in the expression (all phrases except those directly or
314 ** indirectly descended from the right-hand-side of a NOT operator). If
315 ** pnToken is not NULL, then it is set to the number of tokens in all
316 ** matchable phrases of the expression.
318 static int fts3ExprLoadDoclists(
319 Fts3Cursor *pCsr, /* Fts3 cursor for current query */
320 int *pnPhrase, /* OUT: Number of phrases in query */
321 int *pnToken /* OUT: Number of tokens in query */
323 int rc; /* Return Code */
324 LoadDoclistCtx sCtx = {0,0,0}; /* Context for sqlite3Fts3ExprIterate() */
325 sCtx.pCsr = pCsr;
326 rc = sqlite3Fts3ExprIterate(pCsr->pExpr,fts3ExprLoadDoclistsCb,(void*)&sCtx);
327 if( pnPhrase ) *pnPhrase = sCtx.nPhrase;
328 if( pnToken ) *pnToken = sCtx.nToken;
329 return rc;
332 static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
333 (*(int *)ctx)++;
334 pExpr->iPhrase = iPhrase;
335 return SQLITE_OK;
337 static int fts3ExprPhraseCount(Fts3Expr *pExpr){
338 int nPhrase = 0;
339 (void)sqlite3Fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase);
340 return nPhrase;
344 ** Advance the position list iterator specified by the first two
345 ** arguments so that it points to the first element with a value greater
346 ** than or equal to parameter iNext.
348 static void fts3SnippetAdvance(char **ppIter, i64 *piIter, int iNext){
349 char *pIter = *ppIter;
350 if( pIter ){
351 i64 iIter = *piIter;
353 while( iIter<iNext ){
354 if( 0==(*pIter & 0xFE) ){
355 iIter = -1;
356 pIter = 0;
357 break;
359 fts3GetDeltaPosition(&pIter, &iIter);
362 *piIter = iIter;
363 *ppIter = pIter;
368 ** Advance the snippet iterator to the next candidate snippet.
370 static int fts3SnippetNextCandidate(SnippetIter *pIter){
371 int i; /* Loop counter */
373 if( pIter->iCurrent<0 ){
374 /* The SnippetIter object has just been initialized. The first snippet
375 ** candidate always starts at offset 0 (even if this candidate has a
376 ** score of 0.0).
378 pIter->iCurrent = 0;
380 /* Advance the 'head' iterator of each phrase to the first offset that
381 ** is greater than or equal to (iNext+nSnippet).
383 for(i=0; i<pIter->nPhrase; i++){
384 SnippetPhrase *pPhrase = &pIter->aPhrase[i];
385 fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet);
387 }else{
388 int iStart;
389 int iEnd = 0x7FFFFFFF;
391 for(i=0; i<pIter->nPhrase; i++){
392 SnippetPhrase *pPhrase = &pIter->aPhrase[i];
393 if( pPhrase->pHead && pPhrase->iHead<iEnd ){
394 iEnd = pPhrase->iHead;
397 if( iEnd==0x7FFFFFFF ){
398 return 1;
401 pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1;
402 for(i=0; i<pIter->nPhrase; i++){
403 SnippetPhrase *pPhrase = &pIter->aPhrase[i];
404 fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1);
405 fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart);
409 return 0;
413 ** Retrieve information about the current candidate snippet of snippet
414 ** iterator pIter.
416 static void fts3SnippetDetails(
417 SnippetIter *pIter, /* Snippet iterator */
418 u64 mCovered, /* Bitmask of phrases already covered */
419 int *piToken, /* OUT: First token of proposed snippet */
420 int *piScore, /* OUT: "Score" for this snippet */
421 u64 *pmCover, /* OUT: Bitmask of phrases covered */
422 u64 *pmHighlight /* OUT: Bitmask of terms to highlight */
424 int iStart = pIter->iCurrent; /* First token of snippet */
425 int iScore = 0; /* Score of this snippet */
426 int i; /* Loop counter */
427 u64 mCover = 0; /* Mask of phrases covered by this snippet */
428 u64 mHighlight = 0; /* Mask of tokens to highlight in snippet */
430 for(i=0; i<pIter->nPhrase; i++){
431 SnippetPhrase *pPhrase = &pIter->aPhrase[i];
432 if( pPhrase->pTail ){
433 char *pCsr = pPhrase->pTail;
434 i64 iCsr = pPhrase->iTail;
436 while( iCsr<(iStart+pIter->nSnippet) && iCsr>=iStart ){
437 int j;
438 u64 mPhrase = (u64)1 << (i%64);
439 u64 mPos = (u64)1 << (iCsr - iStart);
440 assert( iCsr>=iStart && (iCsr - iStart)<=64 );
441 assert( i>=0 );
442 if( (mCover|mCovered)&mPhrase ){
443 iScore++;
444 }else{
445 iScore += 1000;
447 mCover |= mPhrase;
449 for(j=0; j<pPhrase->nToken && j<pIter->nSnippet; j++){
450 mHighlight |= (mPos>>j);
453 if( 0==(*pCsr & 0x0FE) ) break;
454 fts3GetDeltaPosition(&pCsr, &iCsr);
459 /* Set the output variables before returning. */
460 *piToken = iStart;
461 *piScore = iScore;
462 *pmCover = mCover;
463 *pmHighlight = mHighlight;
467 ** This function is an sqlite3Fts3ExprIterate() callback used by
468 ** fts3BestSnippet(). Each invocation populates an element of the
469 ** SnippetIter.aPhrase[] array.
471 static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){
472 SnippetIter *p = (SnippetIter *)ctx;
473 SnippetPhrase *pPhrase = &p->aPhrase[iPhrase];
474 char *pCsr;
475 int rc;
477 pPhrase->nToken = pExpr->pPhrase->nToken;
478 rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pCsr);
479 assert( rc==SQLITE_OK || pCsr==0 );
480 if( pCsr ){
481 i64 iFirst = 0;
482 pPhrase->pList = pCsr;
483 fts3GetDeltaPosition(&pCsr, &iFirst);
484 if( iFirst<0 ){
485 rc = FTS_CORRUPT_VTAB;
486 }else{
487 pPhrase->pHead = pCsr;
488 pPhrase->pTail = pCsr;
489 pPhrase->iHead = iFirst;
490 pPhrase->iTail = iFirst;
492 }else{
493 assert( rc!=SQLITE_OK || (
494 pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0
498 return rc;
502 ** Select the fragment of text consisting of nFragment contiguous tokens
503 ** from column iCol that represent the "best" snippet. The best snippet
504 ** is the snippet with the highest score, where scores are calculated
505 ** by adding:
507 ** (a) +1 point for each occurrence of a matchable phrase in the snippet.
509 ** (b) +1000 points for the first occurrence of each matchable phrase in
510 ** the snippet for which the corresponding mCovered bit is not set.
512 ** The selected snippet parameters are stored in structure *pFragment before
513 ** returning. The score of the selected snippet is stored in *piScore
514 ** before returning.
516 static int fts3BestSnippet(
517 int nSnippet, /* Desired snippet length */
518 Fts3Cursor *pCsr, /* Cursor to create snippet for */
519 int iCol, /* Index of column to create snippet from */
520 u64 mCovered, /* Mask of phrases already covered */
521 u64 *pmSeen, /* IN/OUT: Mask of phrases seen */
522 SnippetFragment *pFragment, /* OUT: Best snippet found */
523 int *piScore /* OUT: Score of snippet pFragment */
525 int rc; /* Return Code */
526 int nList; /* Number of phrases in expression */
527 SnippetIter sIter; /* Iterates through snippet candidates */
528 sqlite3_int64 nByte; /* Number of bytes of space to allocate */
529 int iBestScore = -1; /* Best snippet score found so far */
530 int i; /* Loop counter */
532 memset(&sIter, 0, sizeof(sIter));
534 /* Iterate through the phrases in the expression to count them. The same
535 ** callback makes sure the doclists are loaded for each phrase.
537 rc = fts3ExprLoadDoclists(pCsr, &nList, 0);
538 if( rc!=SQLITE_OK ){
539 return rc;
542 /* Now that it is known how many phrases there are, allocate and zero
543 ** the required space using malloc().
545 nByte = sizeof(SnippetPhrase) * nList;
546 sIter.aPhrase = (SnippetPhrase *)sqlite3Fts3MallocZero(nByte);
547 if( !sIter.aPhrase ){
548 return SQLITE_NOMEM;
551 /* Initialize the contents of the SnippetIter object. Then iterate through
552 ** the set of phrases in the expression to populate the aPhrase[] array.
554 sIter.pCsr = pCsr;
555 sIter.iCol = iCol;
556 sIter.nSnippet = nSnippet;
557 sIter.nPhrase = nList;
558 sIter.iCurrent = -1;
559 rc = sqlite3Fts3ExprIterate(
560 pCsr->pExpr, fts3SnippetFindPositions, (void*)&sIter
562 if( rc==SQLITE_OK ){
564 /* Set the *pmSeen output variable. */
565 for(i=0; i<nList; i++){
566 if( sIter.aPhrase[i].pHead ){
567 *pmSeen |= (u64)1 << (i%64);
571 /* Loop through all candidate snippets. Store the best snippet in
572 ** *pFragment. Store its associated 'score' in iBestScore.
574 pFragment->iCol = iCol;
575 while( !fts3SnippetNextCandidate(&sIter) ){
576 int iPos;
577 int iScore;
578 u64 mCover;
579 u64 mHighlite;
580 fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover,&mHighlite);
581 assert( iScore>=0 );
582 if( iScore>iBestScore ){
583 pFragment->iPos = iPos;
584 pFragment->hlmask = mHighlite;
585 pFragment->covered = mCover;
586 iBestScore = iScore;
590 *piScore = iBestScore;
592 sqlite3_free(sIter.aPhrase);
593 return rc;
598 ** Append a string to the string-buffer passed as the first argument.
600 ** If nAppend is negative, then the length of the string zAppend is
601 ** determined using strlen().
603 static int fts3StringAppend(
604 StrBuffer *pStr, /* Buffer to append to */
605 const char *zAppend, /* Pointer to data to append to buffer */
606 int nAppend /* Size of zAppend in bytes (or -1) */
608 if( nAppend<0 ){
609 nAppend = (int)strlen(zAppend);
612 /* If there is insufficient space allocated at StrBuffer.z, use realloc()
613 ** to grow the buffer until so that it is big enough to accomadate the
614 ** appended data.
616 if( pStr->n+nAppend+1>=pStr->nAlloc ){
617 sqlite3_int64 nAlloc = pStr->nAlloc+(sqlite3_int64)nAppend+100;
618 char *zNew = sqlite3_realloc64(pStr->z, nAlloc);
619 if( !zNew ){
620 return SQLITE_NOMEM;
622 pStr->z = zNew;
623 pStr->nAlloc = nAlloc;
625 assert( pStr->z!=0 && (pStr->nAlloc >= pStr->n+nAppend+1) );
627 /* Append the data to the string buffer. */
628 memcpy(&pStr->z[pStr->n], zAppend, nAppend);
629 pStr->n += nAppend;
630 pStr->z[pStr->n] = '\0';
632 return SQLITE_OK;
636 ** The fts3BestSnippet() function often selects snippets that end with a
637 ** query term. That is, the final term of the snippet is always a term
638 ** that requires highlighting. For example, if 'X' is a highlighted term
639 ** and '.' is a non-highlighted term, BestSnippet() may select:
641 ** ........X.....X
643 ** This function "shifts" the beginning of the snippet forward in the
644 ** document so that there are approximately the same number of
645 ** non-highlighted terms to the right of the final highlighted term as there
646 ** are to the left of the first highlighted term. For example, to this:
648 ** ....X.....X....
650 ** This is done as part of extracting the snippet text, not when selecting
651 ** the snippet. Snippet selection is done based on doclists only, so there
652 ** is no way for fts3BestSnippet() to know whether or not the document
653 ** actually contains terms that follow the final highlighted term.
655 static int fts3SnippetShift(
656 Fts3Table *pTab, /* FTS3 table snippet comes from */
657 int iLangid, /* Language id to use in tokenizing */
658 int nSnippet, /* Number of tokens desired for snippet */
659 const char *zDoc, /* Document text to extract snippet from */
660 int nDoc, /* Size of buffer zDoc in bytes */
661 int *piPos, /* IN/OUT: First token of snippet */
662 u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */
664 u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */
666 if( hlmask ){
667 int nLeft; /* Tokens to the left of first highlight */
668 int nRight; /* Tokens to the right of last highlight */
669 int nDesired; /* Ideal number of tokens to shift forward */
671 for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++);
672 for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++);
673 assert( (nSnippet-1-nRight)<=63 && (nSnippet-1-nRight)>=0 );
674 nDesired = (nLeft-nRight)/2;
676 /* Ideally, the start of the snippet should be pushed forward in the
677 ** document nDesired tokens. This block checks if there are actually
678 ** nDesired tokens to the right of the snippet. If so, *piPos and
679 ** *pHlMask are updated to shift the snippet nDesired tokens to the
680 ** right. Otherwise, the snippet is shifted by the number of tokens
681 ** available.
683 if( nDesired>0 ){
684 int nShift; /* Number of tokens to shift snippet by */
685 int iCurrent = 0; /* Token counter */
686 int rc; /* Return Code */
687 sqlite3_tokenizer_module *pMod;
688 sqlite3_tokenizer_cursor *pC;
689 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
691 /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
692 ** or more tokens in zDoc/nDoc.
694 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC);
695 if( rc!=SQLITE_OK ){
696 return rc;
698 while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
699 const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0;
700 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
702 pMod->xClose(pC);
703 if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; }
705 nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet;
706 assert( nShift<=nDesired );
707 if( nShift>0 ){
708 *piPos += nShift;
709 *pHlmask = hlmask >> nShift;
713 return SQLITE_OK;
717 ** Extract the snippet text for fragment pFragment from cursor pCsr and
718 ** append it to string buffer pOut.
720 static int fts3SnippetText(
721 Fts3Cursor *pCsr, /* FTS3 Cursor */
722 SnippetFragment *pFragment, /* Snippet to extract */
723 int iFragment, /* Fragment number */
724 int isLast, /* True for final fragment in snippet */
725 int nSnippet, /* Number of tokens in extracted snippet */
726 const char *zOpen, /* String inserted before highlighted term */
727 const char *zClose, /* String inserted after highlighted term */
728 const char *zEllipsis, /* String inserted between snippets */
729 StrBuffer *pOut /* Write output here */
731 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
732 int rc; /* Return code */
733 const char *zDoc; /* Document text to extract snippet from */
734 int nDoc; /* Size of zDoc in bytes */
735 int iCurrent = 0; /* Current token number of document */
736 int iEnd = 0; /* Byte offset of end of current token */
737 int isShiftDone = 0; /* True after snippet is shifted */
738 int iPos = pFragment->iPos; /* First token of snippet */
739 u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */
740 int iCol = pFragment->iCol+1; /* Query column to extract text from */
741 sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */
742 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */
744 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol);
745 if( zDoc==0 ){
746 if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){
747 return SQLITE_NOMEM;
749 return SQLITE_OK;
751 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol);
753 /* Open a token cursor on the document. */
754 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
755 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC);
756 if( rc!=SQLITE_OK ){
757 return rc;
760 while( rc==SQLITE_OK ){
761 const char *ZDUMMY; /* Dummy argument used with tokenizer */
762 int DUMMY1 = -1; /* Dummy argument used with tokenizer */
763 int iBegin = 0; /* Offset in zDoc of start of token */
764 int iFin = 0; /* Offset in zDoc of end of token */
765 int isHighlight = 0; /* True for highlighted terms */
767 /* Variable DUMMY1 is initialized to a negative value above. Elsewhere
768 ** in the FTS code the variable that the third argument to xNext points to
769 ** is initialized to zero before the first (*but not necessarily
770 ** subsequent*) call to xNext(). This is done for a particular application
771 ** that needs to know whether or not the tokenizer is being used for
772 ** snippet generation or for some other purpose.
774 ** Extreme care is required when writing code to depend on this
775 ** initialization. It is not a documented part of the tokenizer interface.
776 ** If a tokenizer is used directly by any code outside of FTS, this
777 ** convention might not be respected. */
778 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);
779 if( rc!=SQLITE_OK ){
780 if( rc==SQLITE_DONE ){
781 /* Special case - the last token of the snippet is also the last token
782 ** of the column. Append any punctuation that occurred between the end
783 ** of the previous token and the end of the document to the output.
784 ** Then break out of the loop. */
785 rc = fts3StringAppend(pOut, &zDoc[iEnd], -1);
787 break;
789 if( iCurrent<iPos ){ continue; }
791 if( !isShiftDone ){
792 int n = nDoc - iBegin;
793 rc = fts3SnippetShift(
794 pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask
796 isShiftDone = 1;
798 /* Now that the shift has been done, check if the initial "..." are
799 ** required. They are required if (a) this is not the first fragment,
800 ** or (b) this fragment does not begin at position 0 of its column.
802 if( rc==SQLITE_OK ){
803 if( iPos>0 || iFragment>0 ){
804 rc = fts3StringAppend(pOut, zEllipsis, -1);
805 }else if( iBegin ){
806 rc = fts3StringAppend(pOut, zDoc, iBegin);
809 if( rc!=SQLITE_OK || iCurrent<iPos ) continue;
812 if( iCurrent>=(iPos+nSnippet) ){
813 if( isLast ){
814 rc = fts3StringAppend(pOut, zEllipsis, -1);
816 break;
819 /* Set isHighlight to true if this term should be highlighted. */
820 isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0;
822 if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd);
823 if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1);
824 if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin);
825 if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1);
827 iEnd = iFin;
830 pMod->xClose(pC);
831 return rc;
836 ** This function is used to count the entries in a column-list (a
837 ** delta-encoded list of term offsets within a single column of a single
838 ** row). When this function is called, *ppCollist should point to the
839 ** beginning of the first varint in the column-list (the varint that
840 ** contains the position of the first matching term in the column data).
841 ** Before returning, *ppCollist is set to point to the first byte after
842 ** the last varint in the column-list (either the 0x00 signifying the end
843 ** of the position-list, or the 0x01 that precedes the column number of
844 ** the next column in the position-list).
846 ** The number of elements in the column-list is returned.
848 static int fts3ColumnlistCount(char **ppCollist){
849 char *pEnd = *ppCollist;
850 char c = 0;
851 int nEntry = 0;
853 /* A column-list is terminated by either a 0x01 or 0x00. */
854 while( 0xFE & (*pEnd | c) ){
855 c = *pEnd++ & 0x80;
856 if( !c ) nEntry++;
859 *ppCollist = pEnd;
860 return nEntry;
864 ** This function gathers 'y' or 'b' data for a single phrase.
866 static int fts3ExprLHits(
867 Fts3Expr *pExpr, /* Phrase expression node */
868 MatchInfo *p /* Matchinfo context */
870 Fts3Table *pTab = (Fts3Table *)p->pCursor->base.pVtab;
871 int iStart;
872 Fts3Phrase *pPhrase = pExpr->pPhrase;
873 char *pIter = pPhrase->doclist.pList;
874 int iCol = 0;
876 assert( p->flag==FTS3_MATCHINFO_LHITS_BM || p->flag==FTS3_MATCHINFO_LHITS );
877 if( p->flag==FTS3_MATCHINFO_LHITS ){
878 iStart = pExpr->iPhrase * p->nCol;
879 }else{
880 iStart = pExpr->iPhrase * ((p->nCol + 31) / 32);
883 if( pIter ) while( 1 ){
884 int nHit = fts3ColumnlistCount(&pIter);
885 if( (pPhrase->iColumn>=pTab->nColumn || pPhrase->iColumn==iCol) ){
886 if( p->flag==FTS3_MATCHINFO_LHITS ){
887 p->aMatchinfo[iStart + iCol] = (u32)nHit;
888 }else if( nHit ){
889 p->aMatchinfo[iStart + (iCol+1)/32] |= (1 << (iCol&0x1F));
892 assert( *pIter==0x00 || *pIter==0x01 );
893 if( *pIter!=0x01 ) break;
894 pIter++;
895 pIter += fts3GetVarint32(pIter, &iCol);
896 if( iCol>=p->nCol ) return FTS_CORRUPT_VTAB;
898 return SQLITE_OK;
902 ** Gather the results for matchinfo directives 'y' and 'b'.
904 static int fts3ExprLHitGather(
905 Fts3Expr *pExpr,
906 MatchInfo *p
908 int rc = SQLITE_OK;
909 assert( (pExpr->pLeft==0)==(pExpr->pRight==0) );
910 if( pExpr->bEof==0 && pExpr->iDocid==p->pCursor->iPrevId ){
911 if( pExpr->pLeft ){
912 rc = fts3ExprLHitGather(pExpr->pLeft, p);
913 if( rc==SQLITE_OK ) rc = fts3ExprLHitGather(pExpr->pRight, p);
914 }else{
915 rc = fts3ExprLHits(pExpr, p);
918 return rc;
922 ** sqlite3Fts3ExprIterate() callback used to collect the "global" matchinfo
923 ** stats for a single query.
925 ** sqlite3Fts3ExprIterate() callback to load the 'global' elements of a
926 ** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements
927 ** of the matchinfo array that are constant for all rows returned by the
928 ** current query.
930 ** Argument pCtx is actually a pointer to a struct of type MatchInfo. This
931 ** function populates Matchinfo.aMatchinfo[] as follows:
933 ** for(iCol=0; iCol<nCol; iCol++){
934 ** aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X;
935 ** aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y;
936 ** }
938 ** where X is the number of matches for phrase iPhrase is column iCol of all
939 ** rows of the table. Y is the number of rows for which column iCol contains
940 ** at least one instance of phrase iPhrase.
942 ** If the phrase pExpr consists entirely of deferred tokens, then all X and
943 ** Y values are set to nDoc, where nDoc is the number of documents in the
944 ** file system. This is done because the full-text index doclist is required
945 ** to calculate these values properly, and the full-text index doclist is
946 ** not available for deferred tokens.
948 static int fts3ExprGlobalHitsCb(
949 Fts3Expr *pExpr, /* Phrase expression node */
950 int iPhrase, /* Phrase number (numbered from zero) */
951 void *pCtx /* Pointer to MatchInfo structure */
953 MatchInfo *p = (MatchInfo *)pCtx;
954 return sqlite3Fts3EvalPhraseStats(
955 p->pCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol]
960 ** sqlite3Fts3ExprIterate() callback used to collect the "local" part of the
961 ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the
962 ** array that are different for each row returned by the query.
964 static int fts3ExprLocalHitsCb(
965 Fts3Expr *pExpr, /* Phrase expression node */
966 int iPhrase, /* Phrase number */
967 void *pCtx /* Pointer to MatchInfo structure */
969 int rc = SQLITE_OK;
970 MatchInfo *p = (MatchInfo *)pCtx;
971 int iStart = iPhrase * p->nCol * 3;
972 int i;
974 for(i=0; i<p->nCol && rc==SQLITE_OK; i++){
975 char *pCsr;
976 rc = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i, &pCsr);
977 if( pCsr ){
978 p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr);
979 }else{
980 p->aMatchinfo[iStart+i*3] = 0;
984 return rc;
987 static int fts3MatchinfoCheck(
988 Fts3Table *pTab,
989 char cArg,
990 char **pzErr
992 if( (cArg==FTS3_MATCHINFO_NPHRASE)
993 || (cArg==FTS3_MATCHINFO_NCOL)
994 || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4)
995 || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4)
996 || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize)
997 || (cArg==FTS3_MATCHINFO_LCS)
998 || (cArg==FTS3_MATCHINFO_HITS)
999 || (cArg==FTS3_MATCHINFO_LHITS)
1000 || (cArg==FTS3_MATCHINFO_LHITS_BM)
1002 return SQLITE_OK;
1004 sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo request: %c", cArg);
1005 return SQLITE_ERROR;
1008 static size_t fts3MatchinfoSize(MatchInfo *pInfo, char cArg){
1009 size_t nVal; /* Number of integers output by cArg */
1011 switch( cArg ){
1012 case FTS3_MATCHINFO_NDOC:
1013 case FTS3_MATCHINFO_NPHRASE:
1014 case FTS3_MATCHINFO_NCOL:
1015 nVal = 1;
1016 break;
1018 case FTS3_MATCHINFO_AVGLENGTH:
1019 case FTS3_MATCHINFO_LENGTH:
1020 case FTS3_MATCHINFO_LCS:
1021 nVal = pInfo->nCol;
1022 break;
1024 case FTS3_MATCHINFO_LHITS:
1025 nVal = pInfo->nCol * pInfo->nPhrase;
1026 break;
1028 case FTS3_MATCHINFO_LHITS_BM:
1029 nVal = pInfo->nPhrase * ((pInfo->nCol + 31) / 32);
1030 break;
1032 default:
1033 assert( cArg==FTS3_MATCHINFO_HITS );
1034 nVal = pInfo->nCol * pInfo->nPhrase * 3;
1035 break;
1038 return nVal;
1041 static int fts3MatchinfoSelectDoctotal(
1042 Fts3Table *pTab,
1043 sqlite3_stmt **ppStmt,
1044 sqlite3_int64 *pnDoc,
1045 const char **paLen,
1046 const char **ppEnd
1048 sqlite3_stmt *pStmt;
1049 const char *a;
1050 const char *pEnd;
1051 sqlite3_int64 nDoc;
1052 int n;
1055 if( !*ppStmt ){
1056 int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt);
1057 if( rc!=SQLITE_OK ) return rc;
1059 pStmt = *ppStmt;
1060 assert( sqlite3_data_count(pStmt)==1 );
1062 n = sqlite3_column_bytes(pStmt, 0);
1063 a = sqlite3_column_blob(pStmt, 0);
1064 if( a==0 ){
1065 return FTS_CORRUPT_VTAB;
1067 pEnd = a + n;
1068 a += sqlite3Fts3GetVarintBounded(a, pEnd, &nDoc);
1069 if( nDoc<=0 || a>pEnd ){
1070 return FTS_CORRUPT_VTAB;
1072 *pnDoc = nDoc;
1074 if( paLen ) *paLen = a;
1075 if( ppEnd ) *ppEnd = pEnd;
1076 return SQLITE_OK;
1080 ** An instance of the following structure is used to store state while
1081 ** iterating through a multi-column position-list corresponding to the
1082 ** hits for a single phrase on a single row in order to calculate the
1083 ** values for a matchinfo() FTS3_MATCHINFO_LCS request.
1085 typedef struct LcsIterator LcsIterator;
1086 struct LcsIterator {
1087 Fts3Expr *pExpr; /* Pointer to phrase expression */
1088 int iPosOffset; /* Tokens count up to end of this phrase */
1089 char *pRead; /* Cursor used to iterate through aDoclist */
1090 int iPos; /* Current position */
1094 ** If LcsIterator.iCol is set to the following value, the iterator has
1095 ** finished iterating through all offsets for all columns.
1097 #define LCS_ITERATOR_FINISHED 0x7FFFFFFF;
1099 static int fts3MatchinfoLcsCb(
1100 Fts3Expr *pExpr, /* Phrase expression node */
1101 int iPhrase, /* Phrase number (numbered from zero) */
1102 void *pCtx /* Pointer to MatchInfo structure */
1104 LcsIterator *aIter = (LcsIterator *)pCtx;
1105 aIter[iPhrase].pExpr = pExpr;
1106 return SQLITE_OK;
1110 ** Advance the iterator passed as an argument to the next position. Return
1111 ** 1 if the iterator is at EOF or if it now points to the start of the
1112 ** position list for the next column.
1114 static int fts3LcsIteratorAdvance(LcsIterator *pIter){
1115 char *pRead;
1116 sqlite3_int64 iRead;
1117 int rc = 0;
1119 if( NEVER(pIter==0) ) return 1;
1120 pRead = pIter->pRead;
1121 pRead += sqlite3Fts3GetVarint(pRead, &iRead);
1122 if( iRead==0 || iRead==1 ){
1123 pRead = 0;
1124 rc = 1;
1125 }else{
1126 pIter->iPos += (int)(iRead-2);
1129 pIter->pRead = pRead;
1130 return rc;
1134 ** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag.
1136 ** If the call is successful, the longest-common-substring lengths for each
1137 ** column are written into the first nCol elements of the pInfo->aMatchinfo[]
1138 ** array before returning. SQLITE_OK is returned in this case.
1140 ** Otherwise, if an error occurs, an SQLite error code is returned and the
1141 ** data written to the first nCol elements of pInfo->aMatchinfo[] is
1142 ** undefined.
1144 static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){
1145 LcsIterator *aIter;
1146 int i;
1147 int iCol;
1148 int nToken = 0;
1149 int rc = SQLITE_OK;
1151 /* Allocate and populate the array of LcsIterator objects. The array
1152 ** contains one element for each matchable phrase in the query.
1154 aIter = sqlite3Fts3MallocZero(sizeof(LcsIterator) * pCsr->nPhrase);
1155 if( !aIter ) return SQLITE_NOMEM;
1156 (void)sqlite3Fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter);
1158 for(i=0; i<pInfo->nPhrase; i++){
1159 LcsIterator *pIter = &aIter[i];
1160 nToken -= pIter->pExpr->pPhrase->nToken;
1161 pIter->iPosOffset = nToken;
1164 for(iCol=0; iCol<pInfo->nCol; iCol++){
1165 int nLcs = 0; /* LCS value for this column */
1166 int nLive = 0; /* Number of iterators in aIter not at EOF */
1168 for(i=0; i<pInfo->nPhrase; i++){
1169 LcsIterator *pIt = &aIter[i];
1170 rc = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol, &pIt->pRead);
1171 if( rc!=SQLITE_OK ) goto matchinfo_lcs_out;
1172 if( pIt->pRead ){
1173 pIt->iPos = pIt->iPosOffset;
1174 fts3LcsIteratorAdvance(pIt);
1175 if( pIt->pRead==0 ){
1176 rc = FTS_CORRUPT_VTAB;
1177 goto matchinfo_lcs_out;
1179 nLive++;
1183 while( nLive>0 ){
1184 LcsIterator *pAdv = 0; /* The iterator to advance by one position */
1185 int nThisLcs = 0; /* LCS for the current iterator positions */
1187 for(i=0; i<pInfo->nPhrase; i++){
1188 LcsIterator *pIter = &aIter[i];
1189 if( pIter->pRead==0 ){
1190 /* This iterator is already at EOF for this column. */
1191 nThisLcs = 0;
1192 }else{
1193 if( pAdv==0 || pIter->iPos<pAdv->iPos ){
1194 pAdv = pIter;
1196 if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){
1197 nThisLcs++;
1198 }else{
1199 nThisLcs = 1;
1201 if( nThisLcs>nLcs ) nLcs = nThisLcs;
1204 if( fts3LcsIteratorAdvance(pAdv) ) nLive--;
1207 pInfo->aMatchinfo[iCol] = nLcs;
1210 matchinfo_lcs_out:
1211 sqlite3_free(aIter);
1212 return rc;
1216 ** Populate the buffer pInfo->aMatchinfo[] with an array of integers to
1217 ** be returned by the matchinfo() function. Argument zArg contains the
1218 ** format string passed as the second argument to matchinfo (or the
1219 ** default value "pcx" if no second argument was specified). The format
1220 ** string has already been validated and the pInfo->aMatchinfo[] array
1221 ** is guaranteed to be large enough for the output.
1223 ** If bGlobal is true, then populate all fields of the matchinfo() output.
1224 ** If it is false, then assume that those fields that do not change between
1225 ** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS)
1226 ** have already been populated.
1228 ** Return SQLITE_OK if successful, or an SQLite error code if an error
1229 ** occurs. If a value other than SQLITE_OK is returned, the state the
1230 ** pInfo->aMatchinfo[] buffer is left in is undefined.
1232 static int fts3MatchinfoValues(
1233 Fts3Cursor *pCsr, /* FTS3 cursor object */
1234 int bGlobal, /* True to grab the global stats */
1235 MatchInfo *pInfo, /* Matchinfo context object */
1236 const char *zArg /* Matchinfo format string */
1238 int rc = SQLITE_OK;
1239 int i;
1240 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1241 sqlite3_stmt *pSelect = 0;
1243 for(i=0; rc==SQLITE_OK && zArg[i]; i++){
1244 pInfo->flag = zArg[i];
1245 switch( zArg[i] ){
1246 case FTS3_MATCHINFO_NPHRASE:
1247 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase;
1248 break;
1250 case FTS3_MATCHINFO_NCOL:
1251 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol;
1252 break;
1254 case FTS3_MATCHINFO_NDOC:
1255 if( bGlobal ){
1256 sqlite3_int64 nDoc = 0;
1257 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0, 0);
1258 pInfo->aMatchinfo[0] = (u32)nDoc;
1260 break;
1262 case FTS3_MATCHINFO_AVGLENGTH:
1263 if( bGlobal ){
1264 sqlite3_int64 nDoc; /* Number of rows in table */
1265 const char *a; /* Aggregate column length array */
1266 const char *pEnd; /* First byte past end of length array */
1268 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a, &pEnd);
1269 if( rc==SQLITE_OK ){
1270 int iCol;
1271 for(iCol=0; iCol<pInfo->nCol; iCol++){
1272 u32 iVal;
1273 sqlite3_int64 nToken;
1274 a += sqlite3Fts3GetVarint(a, &nToken);
1275 if( a>pEnd ){
1276 rc = SQLITE_CORRUPT_VTAB;
1277 break;
1279 iVal = (u32)(((u32)(nToken&0xffffffff)+nDoc/2)/nDoc);
1280 pInfo->aMatchinfo[iCol] = iVal;
1284 break;
1286 case FTS3_MATCHINFO_LENGTH: {
1287 sqlite3_stmt *pSelectDocsize = 0;
1288 rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize);
1289 if( rc==SQLITE_OK ){
1290 int iCol;
1291 const char *a = sqlite3_column_blob(pSelectDocsize, 0);
1292 const char *pEnd = a + sqlite3_column_bytes(pSelectDocsize, 0);
1293 for(iCol=0; iCol<pInfo->nCol; iCol++){
1294 sqlite3_int64 nToken;
1295 a += sqlite3Fts3GetVarintBounded(a, pEnd, &nToken);
1296 if( a>pEnd ){
1297 rc = SQLITE_CORRUPT_VTAB;
1298 break;
1300 pInfo->aMatchinfo[iCol] = (u32)nToken;
1303 sqlite3_reset(pSelectDocsize);
1304 break;
1307 case FTS3_MATCHINFO_LCS:
1308 rc = fts3ExprLoadDoclists(pCsr, 0, 0);
1309 if( rc==SQLITE_OK ){
1310 rc = fts3MatchinfoLcs(pCsr, pInfo);
1312 break;
1314 case FTS3_MATCHINFO_LHITS_BM:
1315 case FTS3_MATCHINFO_LHITS: {
1316 size_t nZero = fts3MatchinfoSize(pInfo, zArg[i]) * sizeof(u32);
1317 memset(pInfo->aMatchinfo, 0, nZero);
1318 rc = fts3ExprLHitGather(pCsr->pExpr, pInfo);
1319 break;
1322 default: {
1323 Fts3Expr *pExpr;
1324 assert( zArg[i]==FTS3_MATCHINFO_HITS );
1325 pExpr = pCsr->pExpr;
1326 rc = fts3ExprLoadDoclists(pCsr, 0, 0);
1327 if( rc!=SQLITE_OK ) break;
1328 if( bGlobal ){
1329 if( pCsr->pDeferred ){
1330 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc,0,0);
1331 if( rc!=SQLITE_OK ) break;
1333 rc = sqlite3Fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo);
1334 sqlite3Fts3EvalTestDeferred(pCsr, &rc);
1335 if( rc!=SQLITE_OK ) break;
1337 (void)sqlite3Fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo);
1338 break;
1342 pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]);
1345 sqlite3_reset(pSelect);
1346 return rc;
1351 ** Populate pCsr->aMatchinfo[] with data for the current row. The
1352 ** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32).
1354 static void fts3GetMatchinfo(
1355 sqlite3_context *pCtx, /* Return results here */
1356 Fts3Cursor *pCsr, /* FTS3 Cursor object */
1357 const char *zArg /* Second argument to matchinfo() function */
1359 MatchInfo sInfo;
1360 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1361 int rc = SQLITE_OK;
1362 int bGlobal = 0; /* Collect 'global' stats as well as local */
1364 u32 *aOut = 0;
1365 void (*xDestroyOut)(void*) = 0;
1367 memset(&sInfo, 0, sizeof(MatchInfo));
1368 sInfo.pCursor = pCsr;
1369 sInfo.nCol = pTab->nColumn;
1371 /* If there is cached matchinfo() data, but the format string for the
1372 ** cache does not match the format string for this request, discard
1373 ** the cached data. */
1374 if( pCsr->pMIBuffer && strcmp(pCsr->pMIBuffer->zMatchinfo, zArg) ){
1375 sqlite3Fts3MIBufferFree(pCsr->pMIBuffer);
1376 pCsr->pMIBuffer = 0;
1379 /* If Fts3Cursor.pMIBuffer is NULL, then this is the first time the
1380 ** matchinfo function has been called for this query. In this case
1381 ** allocate the array used to accumulate the matchinfo data and
1382 ** initialize those elements that are constant for every row.
1384 if( pCsr->pMIBuffer==0 ){
1385 size_t nMatchinfo = 0; /* Number of u32 elements in match-info */
1386 int i; /* Used to iterate through zArg */
1388 /* Determine the number of phrases in the query */
1389 pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr);
1390 sInfo.nPhrase = pCsr->nPhrase;
1392 /* Determine the number of integers in the buffer returned by this call. */
1393 for(i=0; zArg[i]; i++){
1394 char *zErr = 0;
1395 if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){
1396 sqlite3_result_error(pCtx, zErr, -1);
1397 sqlite3_free(zErr);
1398 return;
1400 nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]);
1403 /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */
1404 pCsr->pMIBuffer = fts3MIBufferNew(nMatchinfo, zArg);
1405 if( !pCsr->pMIBuffer ) rc = SQLITE_NOMEM;
1407 pCsr->isMatchinfoNeeded = 1;
1408 bGlobal = 1;
1411 if( rc==SQLITE_OK ){
1412 xDestroyOut = fts3MIBufferAlloc(pCsr->pMIBuffer, &aOut);
1413 if( xDestroyOut==0 ){
1414 rc = SQLITE_NOMEM;
1418 if( rc==SQLITE_OK ){
1419 sInfo.aMatchinfo = aOut;
1420 sInfo.nPhrase = pCsr->nPhrase;
1421 rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg);
1422 if( bGlobal ){
1423 fts3MIBufferSetGlobal(pCsr->pMIBuffer);
1427 if( rc!=SQLITE_OK ){
1428 sqlite3_result_error_code(pCtx, rc);
1429 if( xDestroyOut ) xDestroyOut(aOut);
1430 }else{
1431 int n = pCsr->pMIBuffer->nElem * sizeof(u32);
1432 sqlite3_result_blob(pCtx, aOut, n, xDestroyOut);
1437 ** Implementation of snippet() function.
1439 void sqlite3Fts3Snippet(
1440 sqlite3_context *pCtx, /* SQLite function call context */
1441 Fts3Cursor *pCsr, /* Cursor object */
1442 const char *zStart, /* Snippet start text - "<b>" */
1443 const char *zEnd, /* Snippet end text - "</b>" */
1444 const char *zEllipsis, /* Snippet ellipsis text - "<b>...</b>" */
1445 int iCol, /* Extract snippet from this column */
1446 int nToken /* Approximate number of tokens in snippet */
1448 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1449 int rc = SQLITE_OK;
1450 int i;
1451 StrBuffer res = {0, 0, 0};
1453 /* The returned text includes up to four fragments of text extracted from
1454 ** the data in the current row. The first iteration of the for(...) loop
1455 ** below attempts to locate a single fragment of text nToken tokens in
1456 ** size that contains at least one instance of all phrases in the query
1457 ** expression that appear in the current row. If such a fragment of text
1458 ** cannot be found, the second iteration of the loop attempts to locate
1459 ** a pair of fragments, and so on.
1461 int nSnippet = 0; /* Number of fragments in this snippet */
1462 SnippetFragment aSnippet[4]; /* Maximum of 4 fragments per snippet */
1463 int nFToken = -1; /* Number of tokens in each fragment */
1465 if( !pCsr->pExpr ){
1466 sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC);
1467 return;
1470 /* Limit the snippet length to 64 tokens. */
1471 if( nToken<-64 ) nToken = -64;
1472 if( nToken>+64 ) nToken = +64;
1474 for(nSnippet=1; 1; nSnippet++){
1476 int iSnip; /* Loop counter 0..nSnippet-1 */
1477 u64 mCovered = 0; /* Bitmask of phrases covered by snippet */
1478 u64 mSeen = 0; /* Bitmask of phrases seen by BestSnippet() */
1480 if( nToken>=0 ){
1481 nFToken = (nToken+nSnippet-1) / nSnippet;
1482 }else{
1483 nFToken = -1 * nToken;
1486 for(iSnip=0; iSnip<nSnippet; iSnip++){
1487 int iBestScore = -1; /* Best score of columns checked so far */
1488 int iRead; /* Used to iterate through columns */
1489 SnippetFragment *pFragment = &aSnippet[iSnip];
1491 memset(pFragment, 0, sizeof(*pFragment));
1493 /* Loop through all columns of the table being considered for snippets.
1494 ** If the iCol argument to this function was negative, this means all
1495 ** columns of the FTS3 table. Otherwise, only column iCol is considered.
1497 for(iRead=0; iRead<pTab->nColumn; iRead++){
1498 SnippetFragment sF = {0, 0, 0, 0};
1499 int iS = 0;
1500 if( iCol>=0 && iRead!=iCol ) continue;
1502 /* Find the best snippet of nFToken tokens in column iRead. */
1503 rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS);
1504 if( rc!=SQLITE_OK ){
1505 goto snippet_out;
1507 if( iS>iBestScore ){
1508 *pFragment = sF;
1509 iBestScore = iS;
1513 mCovered |= pFragment->covered;
1516 /* If all query phrases seen by fts3BestSnippet() are present in at least
1517 ** one of the nSnippet snippet fragments, break out of the loop.
1519 assert( (mCovered&mSeen)==mCovered );
1520 if( mSeen==mCovered || nSnippet==SizeofArray(aSnippet) ) break;
1523 assert( nFToken>0 );
1525 for(i=0; i<nSnippet && rc==SQLITE_OK; i++){
1526 rc = fts3SnippetText(pCsr, &aSnippet[i],
1527 i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res
1531 snippet_out:
1532 sqlite3Fts3SegmentsClose(pTab);
1533 if( rc!=SQLITE_OK ){
1534 sqlite3_result_error_code(pCtx, rc);
1535 sqlite3_free(res.z);
1536 }else{
1537 sqlite3_result_text(pCtx, res.z, -1, sqlite3_free);
1542 typedef struct TermOffset TermOffset;
1543 typedef struct TermOffsetCtx TermOffsetCtx;
1545 struct TermOffset {
1546 char *pList; /* Position-list */
1547 i64 iPos; /* Position just read from pList */
1548 i64 iOff; /* Offset of this term from read positions */
1551 struct TermOffsetCtx {
1552 Fts3Cursor *pCsr;
1553 int iCol; /* Column of table to populate aTerm for */
1554 int iTerm;
1555 sqlite3_int64 iDocid;
1556 TermOffset *aTerm;
1560 ** This function is an sqlite3Fts3ExprIterate() callback used by sqlite3Fts3Offsets().
1562 static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){
1563 TermOffsetCtx *p = (TermOffsetCtx *)ctx;
1564 int nTerm; /* Number of tokens in phrase */
1565 int iTerm; /* For looping through nTerm phrase terms */
1566 char *pList; /* Pointer to position list for phrase */
1567 i64 iPos = 0; /* First position in position-list */
1568 int rc;
1570 UNUSED_PARAMETER(iPhrase);
1571 rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pList);
1572 nTerm = pExpr->pPhrase->nToken;
1573 if( pList ){
1574 fts3GetDeltaPosition(&pList, &iPos);
1575 assert_fts3_nc( iPos>=0 );
1578 for(iTerm=0; iTerm<nTerm; iTerm++){
1579 TermOffset *pT = &p->aTerm[p->iTerm++];
1580 pT->iOff = nTerm-iTerm-1;
1581 pT->pList = pList;
1582 pT->iPos = iPos;
1585 return rc;
1589 ** Implementation of offsets() function.
1591 void sqlite3Fts3Offsets(
1592 sqlite3_context *pCtx, /* SQLite function call context */
1593 Fts3Cursor *pCsr /* Cursor object */
1595 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1596 sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule;
1597 int rc; /* Return Code */
1598 int nToken; /* Number of tokens in query */
1599 int iCol; /* Column currently being processed */
1600 StrBuffer res = {0, 0, 0}; /* Result string */
1601 TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */
1603 if( !pCsr->pExpr ){
1604 sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC);
1605 return;
1608 memset(&sCtx, 0, sizeof(sCtx));
1609 assert( pCsr->isRequireSeek==0 );
1611 /* Count the number of terms in the query */
1612 rc = fts3ExprLoadDoclists(pCsr, 0, &nToken);
1613 if( rc!=SQLITE_OK ) goto offsets_out;
1615 /* Allocate the array of TermOffset iterators. */
1616 sCtx.aTerm = (TermOffset *)sqlite3Fts3MallocZero(sizeof(TermOffset)*nToken);
1617 if( 0==sCtx.aTerm ){
1618 rc = SQLITE_NOMEM;
1619 goto offsets_out;
1621 sCtx.iDocid = pCsr->iPrevId;
1622 sCtx.pCsr = pCsr;
1624 /* Loop through the table columns, appending offset information to
1625 ** string-buffer res for each column.
1627 for(iCol=0; iCol<pTab->nColumn; iCol++){
1628 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */
1629 const char *ZDUMMY; /* Dummy argument used with xNext() */
1630 int NDUMMY = 0; /* Dummy argument used with xNext() */
1631 int iStart = 0;
1632 int iEnd = 0;
1633 int iCurrent = 0;
1634 const char *zDoc;
1635 int nDoc;
1637 /* Initialize the contents of sCtx.aTerm[] for column iCol. This
1638 ** operation may fail if the database contains corrupt records.
1640 sCtx.iCol = iCol;
1641 sCtx.iTerm = 0;
1642 rc = sqlite3Fts3ExprIterate(
1643 pCsr->pExpr, fts3ExprTermOffsetInit, (void*)&sCtx
1645 if( rc!=SQLITE_OK ) goto offsets_out;
1647 /* Retreive the text stored in column iCol. If an SQL NULL is stored
1648 ** in column iCol, jump immediately to the next iteration of the loop.
1649 ** If an OOM occurs while retrieving the data (this can happen if SQLite
1650 ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM
1651 ** to the caller.
1653 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1);
1654 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1);
1655 if( zDoc==0 ){
1656 if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){
1657 continue;
1659 rc = SQLITE_NOMEM;
1660 goto offsets_out;
1663 /* Initialize a tokenizer iterator to iterate through column iCol. */
1664 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid,
1665 zDoc, nDoc, &pC
1667 if( rc!=SQLITE_OK ) goto offsets_out;
1669 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
1670 while( rc==SQLITE_OK ){
1671 int i; /* Used to loop through terms */
1672 int iMinPos = 0x7FFFFFFF; /* Position of next token */
1673 TermOffset *pTerm = 0; /* TermOffset associated with next token */
1675 for(i=0; i<nToken; i++){
1676 TermOffset *pT = &sCtx.aTerm[i];
1677 if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){
1678 iMinPos = pT->iPos-pT->iOff;
1679 pTerm = pT;
1683 if( !pTerm ){
1684 /* All offsets for this column have been gathered. */
1685 rc = SQLITE_DONE;
1686 }else{
1687 assert_fts3_nc( iCurrent<=iMinPos );
1688 if( 0==(0xFE&*pTerm->pList) ){
1689 pTerm->pList = 0;
1690 }else{
1691 fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos);
1693 while( rc==SQLITE_OK && iCurrent<iMinPos ){
1694 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
1696 if( rc==SQLITE_OK ){
1697 char aBuffer[64];
1698 sqlite3_snprintf(sizeof(aBuffer), aBuffer,
1699 "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart
1701 rc = fts3StringAppend(&res, aBuffer, -1);
1702 }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){
1703 rc = FTS_CORRUPT_VTAB;
1707 if( rc==SQLITE_DONE ){
1708 rc = SQLITE_OK;
1711 pMod->xClose(pC);
1712 if( rc!=SQLITE_OK ) goto offsets_out;
1715 offsets_out:
1716 sqlite3_free(sCtx.aTerm);
1717 assert( rc!=SQLITE_DONE );
1718 sqlite3Fts3SegmentsClose(pTab);
1719 if( rc!=SQLITE_OK ){
1720 sqlite3_result_error_code(pCtx, rc);
1721 sqlite3_free(res.z);
1722 }else{
1723 sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free);
1725 return;
1729 ** Implementation of matchinfo() function.
1731 void sqlite3Fts3Matchinfo(
1732 sqlite3_context *pContext, /* Function call context */
1733 Fts3Cursor *pCsr, /* FTS3 table cursor */
1734 const char *zArg /* Second arg to matchinfo() function */
1736 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1737 const char *zFormat;
1739 if( zArg ){
1740 zFormat = zArg;
1741 }else{
1742 zFormat = FTS3_MATCHINFO_DEFAULT;
1745 if( !pCsr->pExpr ){
1746 sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC);
1747 return;
1748 }else{
1749 /* Retrieve matchinfo() data. */
1750 fts3GetMatchinfo(pContext, pCsr, zFormat);
1751 sqlite3Fts3SegmentsClose(pTab);
1755 #endif