Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / third_party / sqlite / src / ext / fts3 / fts3_snippet.c
blobaa8779fa61f22491ae1f357d697778e636aaac88
1 /*
2 ** 2009 Oct 23
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
14 #include "fts3Int.h"
15 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
17 #include <string.h>
18 #include <assert.h>
21 ** Characters that may appear in the second argument to matchinfo().
23 #define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */
24 #define FTS3_MATCHINFO_NCOL 'c' /* 1 value */
25 #define FTS3_MATCHINFO_NDOC 'n' /* 1 value */
26 #define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */
27 #define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */
28 #define FTS3_MATCHINFO_LCS 's' /* nCol values */
29 #define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */
32 ** The default value for the second argument to matchinfo().
34 #define FTS3_MATCHINFO_DEFAULT "pcx"
38 ** Used as an fts3ExprIterate() context when loading phrase doclists to
39 ** Fts3Expr.aDoclist[]/nDoclist.
41 typedef struct LoadDoclistCtx LoadDoclistCtx;
42 struct LoadDoclistCtx {
43 Fts3Cursor *pCsr; /* FTS3 Cursor */
44 int nPhrase; /* Number of phrases seen so far */
45 int nToken; /* Number of tokens seen so far */
49 ** The following types are used as part of the implementation of the
50 ** fts3BestSnippet() routine.
52 typedef struct SnippetIter SnippetIter;
53 typedef struct SnippetPhrase SnippetPhrase;
54 typedef struct SnippetFragment SnippetFragment;
56 struct SnippetIter {
57 Fts3Cursor *pCsr; /* Cursor snippet is being generated from */
58 int iCol; /* Extract snippet from this column */
59 int nSnippet; /* Requested snippet length (in tokens) */
60 int nPhrase; /* Number of phrases in query */
61 SnippetPhrase *aPhrase; /* Array of size nPhrase */
62 int iCurrent; /* First token of current snippet */
65 struct SnippetPhrase {
66 int nToken; /* Number of tokens in phrase */
67 char *pList; /* Pointer to start of phrase position list */
68 int iHead; /* Next value in position list */
69 char *pHead; /* Position list data following iHead */
70 int iTail; /* Next value in trailing position list */
71 char *pTail; /* Position list data following iTail */
74 struct SnippetFragment {
75 int iCol; /* Column snippet is extracted from */
76 int iPos; /* Index of first token in snippet */
77 u64 covered; /* Mask of query phrases covered */
78 u64 hlmask; /* Mask of snippet terms to highlight */
82 ** This type is used as an fts3ExprIterate() context object while
83 ** accumulating the data returned by the matchinfo() function.
85 typedef struct MatchInfo MatchInfo;
86 struct MatchInfo {
87 Fts3Cursor *pCursor; /* FTS3 Cursor */
88 int nCol; /* Number of columns in table */
89 int nPhrase; /* Number of matchable phrases in query */
90 sqlite3_int64 nDoc; /* Number of docs in database */
91 u32 *aMatchinfo; /* Pre-allocated buffer */
97 ** The snippet() and offsets() functions both return text values. An instance
98 ** of the following structure is used to accumulate those values while the
99 ** functions are running. See fts3StringAppend() for details.
101 typedef struct StrBuffer StrBuffer;
102 struct StrBuffer {
103 char *z; /* Pointer to buffer containing string */
104 int n; /* Length of z in bytes (excl. nul-term) */
105 int nAlloc; /* Allocated size of buffer z in bytes */
110 ** This function is used to help iterate through a position-list. A position
111 ** list is a list of unique integers, sorted from smallest to largest. Each
112 ** element of the list is represented by an FTS3 varint that takes the value
113 ** of the difference between the current element and the previous one plus
114 ** two. For example, to store the position-list:
116 ** 4 9 113
118 ** the three varints:
120 ** 6 7 106
122 ** are encoded.
124 ** When this function is called, *pp points to the start of an element of
125 ** the list. *piPos contains the value of the previous entry in the list.
126 ** After it returns, *piPos contains the value of the next element of the
127 ** list and *pp is advanced to the following varint.
129 static void fts3GetDeltaPosition(char **pp, int *piPos){
130 int iVal;
131 *pp += fts3GetVarint32(*pp, &iVal);
132 *piPos += (iVal-2);
136 ** Helper function for fts3ExprIterate() (see below).
138 static int fts3ExprIterate2(
139 Fts3Expr *pExpr, /* Expression to iterate phrases of */
140 int *piPhrase, /* Pointer to phrase counter */
141 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */
142 void *pCtx /* Second argument to pass to callback */
144 int rc; /* Return code */
145 int eType = pExpr->eType; /* Type of expression node pExpr */
147 if( eType!=FTSQUERY_PHRASE ){
148 assert( pExpr->pLeft && pExpr->pRight );
149 rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx);
150 if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){
151 rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx);
153 }else{
154 rc = x(pExpr, *piPhrase, pCtx);
155 (*piPhrase)++;
157 return rc;
161 ** Iterate through all phrase nodes in an FTS3 query, except those that
162 ** are part of a sub-tree that is the right-hand-side of a NOT operator.
163 ** For each phrase node found, the supplied callback function is invoked.
165 ** If the callback function returns anything other than SQLITE_OK,
166 ** the iteration is abandoned and the error code returned immediately.
167 ** Otherwise, SQLITE_OK is returned after a callback has been made for
168 ** all eligible phrase nodes.
170 static int fts3ExprIterate(
171 Fts3Expr *pExpr, /* Expression to iterate phrases of */
172 int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */
173 void *pCtx /* Second argument to pass to callback */
175 int iPhrase = 0; /* Variable used as the phrase counter */
176 return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx);
180 ** This is an fts3ExprIterate() callback used while loading the doclists
181 ** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
182 ** fts3ExprLoadDoclists().
184 static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
185 int rc = SQLITE_OK;
186 Fts3Phrase *pPhrase = pExpr->pPhrase;
187 LoadDoclistCtx *p = (LoadDoclistCtx *)ctx;
189 UNUSED_PARAMETER(iPhrase);
191 p->nPhrase++;
192 p->nToken += pPhrase->nToken;
194 return rc;
198 ** Load the doclists for each phrase in the query associated with FTS3 cursor
199 ** pCsr.
201 ** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable
202 ** phrases in the expression (all phrases except those directly or
203 ** indirectly descended from the right-hand-side of a NOT operator). If
204 ** pnToken is not NULL, then it is set to the number of tokens in all
205 ** matchable phrases of the expression.
207 static int fts3ExprLoadDoclists(
208 Fts3Cursor *pCsr, /* Fts3 cursor for current query */
209 int *pnPhrase, /* OUT: Number of phrases in query */
210 int *pnToken /* OUT: Number of tokens in query */
212 int rc; /* Return Code */
213 LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */
214 sCtx.pCsr = pCsr;
215 rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx);
216 if( pnPhrase ) *pnPhrase = sCtx.nPhrase;
217 if( pnToken ) *pnToken = sCtx.nToken;
218 return rc;
221 static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
222 (*(int *)ctx)++;
223 UNUSED_PARAMETER(pExpr);
224 UNUSED_PARAMETER(iPhrase);
225 return SQLITE_OK;
227 static int fts3ExprPhraseCount(Fts3Expr *pExpr){
228 int nPhrase = 0;
229 (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase);
230 return nPhrase;
234 ** Advance the position list iterator specified by the first two
235 ** arguments so that it points to the first element with a value greater
236 ** than or equal to parameter iNext.
238 static void fts3SnippetAdvance(char **ppIter, int *piIter, int iNext){
239 char *pIter = *ppIter;
240 if( pIter ){
241 int iIter = *piIter;
243 while( iIter<iNext ){
244 if( 0==(*pIter & 0xFE) ){
245 iIter = -1;
246 pIter = 0;
247 break;
249 fts3GetDeltaPosition(&pIter, &iIter);
252 *piIter = iIter;
253 *ppIter = pIter;
258 ** Advance the snippet iterator to the next candidate snippet.
260 static int fts3SnippetNextCandidate(SnippetIter *pIter){
261 int i; /* Loop counter */
263 if( pIter->iCurrent<0 ){
264 /* The SnippetIter object has just been initialized. The first snippet
265 ** candidate always starts at offset 0 (even if this candidate has a
266 ** score of 0.0).
268 pIter->iCurrent = 0;
270 /* Advance the 'head' iterator of each phrase to the first offset that
271 ** is greater than or equal to (iNext+nSnippet).
273 for(i=0; i<pIter->nPhrase; i++){
274 SnippetPhrase *pPhrase = &pIter->aPhrase[i];
275 fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet);
277 }else{
278 int iStart;
279 int iEnd = 0x7FFFFFFF;
281 for(i=0; i<pIter->nPhrase; i++){
282 SnippetPhrase *pPhrase = &pIter->aPhrase[i];
283 if( pPhrase->pHead && pPhrase->iHead<iEnd ){
284 iEnd = pPhrase->iHead;
287 if( iEnd==0x7FFFFFFF ){
288 return 1;
291 pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1;
292 for(i=0; i<pIter->nPhrase; i++){
293 SnippetPhrase *pPhrase = &pIter->aPhrase[i];
294 fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1);
295 fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart);
299 return 0;
303 ** Retrieve information about the current candidate snippet of snippet
304 ** iterator pIter.
306 static void fts3SnippetDetails(
307 SnippetIter *pIter, /* Snippet iterator */
308 u64 mCovered, /* Bitmask of phrases already covered */
309 int *piToken, /* OUT: First token of proposed snippet */
310 int *piScore, /* OUT: "Score" for this snippet */
311 u64 *pmCover, /* OUT: Bitmask of phrases covered */
312 u64 *pmHighlight /* OUT: Bitmask of terms to highlight */
314 int iStart = pIter->iCurrent; /* First token of snippet */
315 int iScore = 0; /* Score of this snippet */
316 int i; /* Loop counter */
317 u64 mCover = 0; /* Mask of phrases covered by this snippet */
318 u64 mHighlight = 0; /* Mask of tokens to highlight in snippet */
320 for(i=0; i<pIter->nPhrase; i++){
321 SnippetPhrase *pPhrase = &pIter->aPhrase[i];
322 if( pPhrase->pTail ){
323 char *pCsr = pPhrase->pTail;
324 int iCsr = pPhrase->iTail;
326 while( iCsr<(iStart+pIter->nSnippet) ){
327 int j;
328 u64 mPhrase = (u64)1 << i;
329 u64 mPos = (u64)1 << (iCsr - iStart);
330 assert( iCsr>=iStart );
331 if( (mCover|mCovered)&mPhrase ){
332 iScore++;
333 }else{
334 iScore += 1000;
336 mCover |= mPhrase;
338 for(j=0; j<pPhrase->nToken; j++){
339 mHighlight |= (mPos>>j);
342 if( 0==(*pCsr & 0x0FE) ) break;
343 fts3GetDeltaPosition(&pCsr, &iCsr);
348 /* Set the output variables before returning. */
349 *piToken = iStart;
350 *piScore = iScore;
351 *pmCover = mCover;
352 *pmHighlight = mHighlight;
356 ** This function is an fts3ExprIterate() callback used by fts3BestSnippet().
357 ** Each invocation populates an element of the SnippetIter.aPhrase[] array.
359 static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){
360 SnippetIter *p = (SnippetIter *)ctx;
361 SnippetPhrase *pPhrase = &p->aPhrase[iPhrase];
362 char *pCsr;
363 int rc;
365 pPhrase->nToken = pExpr->pPhrase->nToken;
366 rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pCsr);
367 assert( rc==SQLITE_OK || pCsr==0 );
368 if( pCsr ){
369 int iFirst = 0;
370 pPhrase->pList = pCsr;
371 fts3GetDeltaPosition(&pCsr, &iFirst);
372 assert( iFirst>=0 );
373 pPhrase->pHead = pCsr;
374 pPhrase->pTail = pCsr;
375 pPhrase->iHead = iFirst;
376 pPhrase->iTail = iFirst;
377 }else{
378 assert( rc!=SQLITE_OK || (
379 pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0
383 return rc;
387 ** Select the fragment of text consisting of nFragment contiguous tokens
388 ** from column iCol that represent the "best" snippet. The best snippet
389 ** is the snippet with the highest score, where scores are calculated
390 ** by adding:
392 ** (a) +1 point for each occurrence of a matchable phrase in the snippet.
394 ** (b) +1000 points for the first occurrence of each matchable phrase in
395 ** the snippet for which the corresponding mCovered bit is not set.
397 ** The selected snippet parameters are stored in structure *pFragment before
398 ** returning. The score of the selected snippet is stored in *piScore
399 ** before returning.
401 static int fts3BestSnippet(
402 int nSnippet, /* Desired snippet length */
403 Fts3Cursor *pCsr, /* Cursor to create snippet for */
404 int iCol, /* Index of column to create snippet from */
405 u64 mCovered, /* Mask of phrases already covered */
406 u64 *pmSeen, /* IN/OUT: Mask of phrases seen */
407 SnippetFragment *pFragment, /* OUT: Best snippet found */
408 int *piScore /* OUT: Score of snippet pFragment */
410 int rc; /* Return Code */
411 int nList; /* Number of phrases in expression */
412 SnippetIter sIter; /* Iterates through snippet candidates */
413 int nByte; /* Number of bytes of space to allocate */
414 int iBestScore = -1; /* Best snippet score found so far */
415 int i; /* Loop counter */
417 memset(&sIter, 0, sizeof(sIter));
419 /* Iterate through the phrases in the expression to count them. The same
420 ** callback makes sure the doclists are loaded for each phrase.
422 rc = fts3ExprLoadDoclists(pCsr, &nList, 0);
423 if( rc!=SQLITE_OK ){
424 return rc;
427 /* Now that it is known how many phrases there are, allocate and zero
428 ** the required space using malloc().
430 nByte = sizeof(SnippetPhrase) * nList;
431 sIter.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte);
432 if( !sIter.aPhrase ){
433 return SQLITE_NOMEM;
435 memset(sIter.aPhrase, 0, nByte);
437 /* Initialize the contents of the SnippetIter object. Then iterate through
438 ** the set of phrases in the expression to populate the aPhrase[] array.
440 sIter.pCsr = pCsr;
441 sIter.iCol = iCol;
442 sIter.nSnippet = nSnippet;
443 sIter.nPhrase = nList;
444 sIter.iCurrent = -1;
445 (void)fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sIter);
447 /* Set the *pmSeen output variable. */
448 for(i=0; i<nList; i++){
449 if( sIter.aPhrase[i].pHead ){
450 *pmSeen |= (u64)1 << i;
454 /* Loop through all candidate snippets. Store the best snippet in
455 ** *pFragment. Store its associated 'score' in iBestScore.
457 pFragment->iCol = iCol;
458 while( !fts3SnippetNextCandidate(&sIter) ){
459 int iPos;
460 int iScore;
461 u64 mCover;
462 u64 mHighlight;
463 fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover, &mHighlight);
464 assert( iScore>=0 );
465 if( iScore>iBestScore ){
466 pFragment->iPos = iPos;
467 pFragment->hlmask = mHighlight;
468 pFragment->covered = mCover;
469 iBestScore = iScore;
473 sqlite3_free(sIter.aPhrase);
474 *piScore = iBestScore;
475 return SQLITE_OK;
480 ** Append a string to the string-buffer passed as the first argument.
482 ** If nAppend is negative, then the length of the string zAppend is
483 ** determined using strlen().
485 static int fts3StringAppend(
486 StrBuffer *pStr, /* Buffer to append to */
487 const char *zAppend, /* Pointer to data to append to buffer */
488 int nAppend /* Size of zAppend in bytes (or -1) */
490 if( nAppend<0 ){
491 nAppend = (int)strlen(zAppend);
494 /* If there is insufficient space allocated at StrBuffer.z, use realloc()
495 ** to grow the buffer until so that it is big enough to accomadate the
496 ** appended data.
498 if( pStr->n+nAppend+1>=pStr->nAlloc ){
499 int nAlloc = pStr->nAlloc+nAppend+100;
500 char *zNew = sqlite3_realloc(pStr->z, nAlloc);
501 if( !zNew ){
502 return SQLITE_NOMEM;
504 pStr->z = zNew;
505 pStr->nAlloc = nAlloc;
507 assert( pStr->z!=0 && (pStr->nAlloc >= pStr->n+nAppend+1) );
509 /* Append the data to the string buffer. */
510 memcpy(&pStr->z[pStr->n], zAppend, nAppend);
511 pStr->n += nAppend;
512 pStr->z[pStr->n] = '\0';
514 return SQLITE_OK;
518 ** The fts3BestSnippet() function often selects snippets that end with a
519 ** query term. That is, the final term of the snippet is always a term
520 ** that requires highlighting. For example, if 'X' is a highlighted term
521 ** and '.' is a non-highlighted term, BestSnippet() may select:
523 ** ........X.....X
525 ** This function "shifts" the beginning of the snippet forward in the
526 ** document so that there are approximately the same number of
527 ** non-highlighted terms to the right of the final highlighted term as there
528 ** are to the left of the first highlighted term. For example, to this:
530 ** ....X.....X....
532 ** This is done as part of extracting the snippet text, not when selecting
533 ** the snippet. Snippet selection is done based on doclists only, so there
534 ** is no way for fts3BestSnippet() to know whether or not the document
535 ** actually contains terms that follow the final highlighted term.
537 static int fts3SnippetShift(
538 Fts3Table *pTab, /* FTS3 table snippet comes from */
539 int iLangid, /* Language id to use in tokenizing */
540 int nSnippet, /* Number of tokens desired for snippet */
541 const char *zDoc, /* Document text to extract snippet from */
542 int nDoc, /* Size of buffer zDoc in bytes */
543 int *piPos, /* IN/OUT: First token of snippet */
544 u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */
546 u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */
548 if( hlmask ){
549 int nLeft; /* Tokens to the left of first highlight */
550 int nRight; /* Tokens to the right of last highlight */
551 int nDesired; /* Ideal number of tokens to shift forward */
553 for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++);
554 for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++);
555 nDesired = (nLeft-nRight)/2;
557 /* Ideally, the start of the snippet should be pushed forward in the
558 ** document nDesired tokens. This block checks if there are actually
559 ** nDesired tokens to the right of the snippet. If so, *piPos and
560 ** *pHlMask are updated to shift the snippet nDesired tokens to the
561 ** right. Otherwise, the snippet is shifted by the number of tokens
562 ** available.
564 if( nDesired>0 ){
565 int nShift; /* Number of tokens to shift snippet by */
566 int iCurrent = 0; /* Token counter */
567 int rc; /* Return Code */
568 sqlite3_tokenizer_module *pMod;
569 sqlite3_tokenizer_cursor *pC;
570 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
572 /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
573 ** or more tokens in zDoc/nDoc.
575 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC);
576 if( rc!=SQLITE_OK ){
577 return rc;
579 while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
580 const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0;
581 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
583 pMod->xClose(pC);
584 if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; }
586 nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet;
587 assert( nShift<=nDesired );
588 if( nShift>0 ){
589 *piPos += nShift;
590 *pHlmask = hlmask >> nShift;
594 return SQLITE_OK;
598 ** Extract the snippet text for fragment pFragment from cursor pCsr and
599 ** append it to string buffer pOut.
601 static int fts3SnippetText(
602 Fts3Cursor *pCsr, /* FTS3 Cursor */
603 SnippetFragment *pFragment, /* Snippet to extract */
604 int iFragment, /* Fragment number */
605 int isLast, /* True for final fragment in snippet */
606 int nSnippet, /* Number of tokens in extracted snippet */
607 const char *zOpen, /* String inserted before highlighted term */
608 const char *zClose, /* String inserted after highlighted term */
609 const char *zEllipsis, /* String inserted between snippets */
610 StrBuffer *pOut /* Write output here */
612 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
613 int rc; /* Return code */
614 const char *zDoc; /* Document text to extract snippet from */
615 int nDoc; /* Size of zDoc in bytes */
616 int iCurrent = 0; /* Current token number of document */
617 int iEnd = 0; /* Byte offset of end of current token */
618 int isShiftDone = 0; /* True after snippet is shifted */
619 int iPos = pFragment->iPos; /* First token of snippet */
620 u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */
621 int iCol = pFragment->iCol+1; /* Query column to extract text from */
622 sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */
623 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */
625 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol);
626 if( zDoc==0 ){
627 if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){
628 return SQLITE_NOMEM;
630 return SQLITE_OK;
632 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol);
634 /* Open a token cursor on the document. */
635 pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
636 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC);
637 if( rc!=SQLITE_OK ){
638 return rc;
641 while( rc==SQLITE_OK ){
642 const char *ZDUMMY; /* Dummy argument used with tokenizer */
643 int DUMMY1 = -1; /* Dummy argument used with tokenizer */
644 int iBegin = 0; /* Offset in zDoc of start of token */
645 int iFin = 0; /* Offset in zDoc of end of token */
646 int isHighlight = 0; /* True for highlighted terms */
648 /* Variable DUMMY1 is initialized to a negative value above. Elsewhere
649 ** in the FTS code the variable that the third argument to xNext points to
650 ** is initialized to zero before the first (*but not necessarily
651 ** subsequent*) call to xNext(). This is done for a particular application
652 ** that needs to know whether or not the tokenizer is being used for
653 ** snippet generation or for some other purpose.
655 ** Extreme care is required when writing code to depend on this
656 ** initialization. It is not a documented part of the tokenizer interface.
657 ** If a tokenizer is used directly by any code outside of FTS, this
658 ** convention might not be respected. */
659 rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);
660 if( rc!=SQLITE_OK ){
661 if( rc==SQLITE_DONE ){
662 /* Special case - the last token of the snippet is also the last token
663 ** of the column. Append any punctuation that occurred between the end
664 ** of the previous token and the end of the document to the output.
665 ** Then break out of the loop. */
666 rc = fts3StringAppend(pOut, &zDoc[iEnd], -1);
668 break;
670 if( iCurrent<iPos ){ continue; }
672 if( !isShiftDone ){
673 int n = nDoc - iBegin;
674 rc = fts3SnippetShift(
675 pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask
677 isShiftDone = 1;
679 /* Now that the shift has been done, check if the initial "..." are
680 ** required. They are required if (a) this is not the first fragment,
681 ** or (b) this fragment does not begin at position 0 of its column.
683 if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){
684 rc = fts3StringAppend(pOut, zEllipsis, -1);
686 if( rc!=SQLITE_OK || iCurrent<iPos ) continue;
689 if( iCurrent>=(iPos+nSnippet) ){
690 if( isLast ){
691 rc = fts3StringAppend(pOut, zEllipsis, -1);
693 break;
696 /* Set isHighlight to true if this term should be highlighted. */
697 isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0;
699 if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd);
700 if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1);
701 if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin);
702 if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1);
704 iEnd = iFin;
707 pMod->xClose(pC);
708 return rc;
713 ** This function is used to count the entries in a column-list (a
714 ** delta-encoded list of term offsets within a single column of a single
715 ** row). When this function is called, *ppCollist should point to the
716 ** beginning of the first varint in the column-list (the varint that
717 ** contains the position of the first matching term in the column data).
718 ** Before returning, *ppCollist is set to point to the first byte after
719 ** the last varint in the column-list (either the 0x00 signifying the end
720 ** of the position-list, or the 0x01 that precedes the column number of
721 ** the next column in the position-list).
723 ** The number of elements in the column-list is returned.
725 static int fts3ColumnlistCount(char **ppCollist){
726 char *pEnd = *ppCollist;
727 char c = 0;
728 int nEntry = 0;
730 /* A column-list is terminated by either a 0x01 or 0x00. */
731 while( 0xFE & (*pEnd | c) ){
732 c = *pEnd++ & 0x80;
733 if( !c ) nEntry++;
736 *ppCollist = pEnd;
737 return nEntry;
741 ** fts3ExprIterate() callback used to collect the "global" matchinfo stats
742 ** for a single query.
744 ** fts3ExprIterate() callback to load the 'global' elements of a
745 ** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements
746 ** of the matchinfo array that are constant for all rows returned by the
747 ** current query.
749 ** Argument pCtx is actually a pointer to a struct of type MatchInfo. This
750 ** function populates Matchinfo.aMatchinfo[] as follows:
752 ** for(iCol=0; iCol<nCol; iCol++){
753 ** aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X;
754 ** aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y;
755 ** }
757 ** where X is the number of matches for phrase iPhrase is column iCol of all
758 ** rows of the table. Y is the number of rows for which column iCol contains
759 ** at least one instance of phrase iPhrase.
761 ** If the phrase pExpr consists entirely of deferred tokens, then all X and
762 ** Y values are set to nDoc, where nDoc is the number of documents in the
763 ** file system. This is done because the full-text index doclist is required
764 ** to calculate these values properly, and the full-text index doclist is
765 ** not available for deferred tokens.
767 static int fts3ExprGlobalHitsCb(
768 Fts3Expr *pExpr, /* Phrase expression node */
769 int iPhrase, /* Phrase number (numbered from zero) */
770 void *pCtx /* Pointer to MatchInfo structure */
772 MatchInfo *p = (MatchInfo *)pCtx;
773 return sqlite3Fts3EvalPhraseStats(
774 p->pCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol]
779 ** fts3ExprIterate() callback used to collect the "local" part of the
780 ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the
781 ** array that are different for each row returned by the query.
783 static int fts3ExprLocalHitsCb(
784 Fts3Expr *pExpr, /* Phrase expression node */
785 int iPhrase, /* Phrase number */
786 void *pCtx /* Pointer to MatchInfo structure */
788 int rc = SQLITE_OK;
789 MatchInfo *p = (MatchInfo *)pCtx;
790 int iStart = iPhrase * p->nCol * 3;
791 int i;
793 for(i=0; i<p->nCol && rc==SQLITE_OK; i++){
794 char *pCsr;
795 rc = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i, &pCsr);
796 if( pCsr ){
797 p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr);
798 }else{
799 p->aMatchinfo[iStart+i*3] = 0;
803 return rc;
806 static int fts3MatchinfoCheck(
807 Fts3Table *pTab,
808 char cArg,
809 char **pzErr
811 if( (cArg==FTS3_MATCHINFO_NPHRASE)
812 || (cArg==FTS3_MATCHINFO_NCOL)
813 || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4)
814 || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4)
815 || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize)
816 || (cArg==FTS3_MATCHINFO_LCS)
817 || (cArg==FTS3_MATCHINFO_HITS)
819 return SQLITE_OK;
821 *pzErr = sqlite3_mprintf("unrecognized matchinfo request: %c", cArg);
822 return SQLITE_ERROR;
825 static int fts3MatchinfoSize(MatchInfo *pInfo, char cArg){
826 int nVal; /* Number of integers output by cArg */
828 switch( cArg ){
829 case FTS3_MATCHINFO_NDOC:
830 case FTS3_MATCHINFO_NPHRASE:
831 case FTS3_MATCHINFO_NCOL:
832 nVal = 1;
833 break;
835 case FTS3_MATCHINFO_AVGLENGTH:
836 case FTS3_MATCHINFO_LENGTH:
837 case FTS3_MATCHINFO_LCS:
838 nVal = pInfo->nCol;
839 break;
841 default:
842 assert( cArg==FTS3_MATCHINFO_HITS );
843 nVal = pInfo->nCol * pInfo->nPhrase * 3;
844 break;
847 return nVal;
850 static int fts3MatchinfoSelectDoctotal(
851 Fts3Table *pTab,
852 sqlite3_stmt **ppStmt,
853 sqlite3_int64 *pnDoc,
854 const char **paLen
856 sqlite3_stmt *pStmt;
857 const char *a;
858 sqlite3_int64 nDoc;
860 if( !*ppStmt ){
861 int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt);
862 if( rc!=SQLITE_OK ) return rc;
864 pStmt = *ppStmt;
865 assert( sqlite3_data_count(pStmt)==1 );
867 a = sqlite3_column_blob(pStmt, 0);
868 a += sqlite3Fts3GetVarint(a, &nDoc);
869 if( nDoc==0 ) return FTS_CORRUPT_VTAB;
870 *pnDoc = (u32)nDoc;
872 if( paLen ) *paLen = a;
873 return SQLITE_OK;
877 ** An instance of the following structure is used to store state while
878 ** iterating through a multi-column position-list corresponding to the
879 ** hits for a single phrase on a single row in order to calculate the
880 ** values for a matchinfo() FTS3_MATCHINFO_LCS request.
882 typedef struct LcsIterator LcsIterator;
883 struct LcsIterator {
884 Fts3Expr *pExpr; /* Pointer to phrase expression */
885 int iPosOffset; /* Tokens count up to end of this phrase */
886 char *pRead; /* Cursor used to iterate through aDoclist */
887 int iPos; /* Current position */
891 ** If LcsIterator.iCol is set to the following value, the iterator has
892 ** finished iterating through all offsets for all columns.
894 #define LCS_ITERATOR_FINISHED 0x7FFFFFFF;
896 static int fts3MatchinfoLcsCb(
897 Fts3Expr *pExpr, /* Phrase expression node */
898 int iPhrase, /* Phrase number (numbered from zero) */
899 void *pCtx /* Pointer to MatchInfo structure */
901 LcsIterator *aIter = (LcsIterator *)pCtx;
902 aIter[iPhrase].pExpr = pExpr;
903 return SQLITE_OK;
907 ** Advance the iterator passed as an argument to the next position. Return
908 ** 1 if the iterator is at EOF or if it now points to the start of the
909 ** position list for the next column.
911 static int fts3LcsIteratorAdvance(LcsIterator *pIter){
912 char *pRead = pIter->pRead;
913 sqlite3_int64 iRead;
914 int rc = 0;
916 pRead += sqlite3Fts3GetVarint(pRead, &iRead);
917 if( iRead==0 || iRead==1 ){
918 pRead = 0;
919 rc = 1;
920 }else{
921 pIter->iPos += (int)(iRead-2);
924 pIter->pRead = pRead;
925 return rc;
929 ** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag.
931 ** If the call is successful, the longest-common-substring lengths for each
932 ** column are written into the first nCol elements of the pInfo->aMatchinfo[]
933 ** array before returning. SQLITE_OK is returned in this case.
935 ** Otherwise, if an error occurs, an SQLite error code is returned and the
936 ** data written to the first nCol elements of pInfo->aMatchinfo[] is
937 ** undefined.
939 static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){
940 LcsIterator *aIter;
941 int i;
942 int iCol;
943 int nToken = 0;
945 /* Allocate and populate the array of LcsIterator objects. The array
946 ** contains one element for each matchable phrase in the query.
948 aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase);
949 if( !aIter ) return SQLITE_NOMEM;
950 memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase);
951 (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter);
953 for(i=0; i<pInfo->nPhrase; i++){
954 LcsIterator *pIter = &aIter[i];
955 nToken -= pIter->pExpr->pPhrase->nToken;
956 pIter->iPosOffset = nToken;
959 for(iCol=0; iCol<pInfo->nCol; iCol++){
960 int nLcs = 0; /* LCS value for this column */
961 int nLive = 0; /* Number of iterators in aIter not at EOF */
963 for(i=0; i<pInfo->nPhrase; i++){
964 int rc;
965 LcsIterator *pIt = &aIter[i];
966 rc = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol, &pIt->pRead);
967 if( rc!=SQLITE_OK ) return rc;
968 if( pIt->pRead ){
969 pIt->iPos = pIt->iPosOffset;
970 fts3LcsIteratorAdvance(&aIter[i]);
971 nLive++;
975 while( nLive>0 ){
976 LcsIterator *pAdv = 0; /* The iterator to advance by one position */
977 int nThisLcs = 0; /* LCS for the current iterator positions */
979 for(i=0; i<pInfo->nPhrase; i++){
980 LcsIterator *pIter = &aIter[i];
981 if( pIter->pRead==0 ){
982 /* This iterator is already at EOF for this column. */
983 nThisLcs = 0;
984 }else{
985 if( pAdv==0 || pIter->iPos<pAdv->iPos ){
986 pAdv = pIter;
988 if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){
989 nThisLcs++;
990 }else{
991 nThisLcs = 1;
993 if( nThisLcs>nLcs ) nLcs = nThisLcs;
996 if( fts3LcsIteratorAdvance(pAdv) ) nLive--;
999 pInfo->aMatchinfo[iCol] = nLcs;
1002 sqlite3_free(aIter);
1003 return SQLITE_OK;
1007 ** Populate the buffer pInfo->aMatchinfo[] with an array of integers to
1008 ** be returned by the matchinfo() function. Argument zArg contains the
1009 ** format string passed as the second argument to matchinfo (or the
1010 ** default value "pcx" if no second argument was specified). The format
1011 ** string has already been validated and the pInfo->aMatchinfo[] array
1012 ** is guaranteed to be large enough for the output.
1014 ** If bGlobal is true, then populate all fields of the matchinfo() output.
1015 ** If it is false, then assume that those fields that do not change between
1016 ** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS)
1017 ** have already been populated.
1019 ** Return SQLITE_OK if successful, or an SQLite error code if an error
1020 ** occurs. If a value other than SQLITE_OK is returned, the state the
1021 ** pInfo->aMatchinfo[] buffer is left in is undefined.
1023 static int fts3MatchinfoValues(
1024 Fts3Cursor *pCsr, /* FTS3 cursor object */
1025 int bGlobal, /* True to grab the global stats */
1026 MatchInfo *pInfo, /* Matchinfo context object */
1027 const char *zArg /* Matchinfo format string */
1029 int rc = SQLITE_OK;
1030 int i;
1031 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1032 sqlite3_stmt *pSelect = 0;
1034 for(i=0; rc==SQLITE_OK && zArg[i]; i++){
1036 switch( zArg[i] ){
1037 case FTS3_MATCHINFO_NPHRASE:
1038 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase;
1039 break;
1041 case FTS3_MATCHINFO_NCOL:
1042 if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol;
1043 break;
1045 case FTS3_MATCHINFO_NDOC:
1046 if( bGlobal ){
1047 sqlite3_int64 nDoc = 0;
1048 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0);
1049 pInfo->aMatchinfo[0] = (u32)nDoc;
1051 break;
1053 case FTS3_MATCHINFO_AVGLENGTH:
1054 if( bGlobal ){
1055 sqlite3_int64 nDoc; /* Number of rows in table */
1056 const char *a; /* Aggregate column length array */
1058 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a);
1059 if( rc==SQLITE_OK ){
1060 int iCol;
1061 for(iCol=0; iCol<pInfo->nCol; iCol++){
1062 u32 iVal;
1063 sqlite3_int64 nToken;
1064 a += sqlite3Fts3GetVarint(a, &nToken);
1065 iVal = (u32)(((u32)(nToken&0xffffffff)+nDoc/2)/nDoc);
1066 pInfo->aMatchinfo[iCol] = iVal;
1070 break;
1072 case FTS3_MATCHINFO_LENGTH: {
1073 sqlite3_stmt *pSelectDocsize = 0;
1074 rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize);
1075 if( rc==SQLITE_OK ){
1076 int iCol;
1077 const char *a = sqlite3_column_blob(pSelectDocsize, 0);
1078 for(iCol=0; iCol<pInfo->nCol; iCol++){
1079 sqlite3_int64 nToken;
1080 a += sqlite3Fts3GetVarint(a, &nToken);
1081 pInfo->aMatchinfo[iCol] = (u32)nToken;
1084 sqlite3_reset(pSelectDocsize);
1085 break;
1088 case FTS3_MATCHINFO_LCS:
1089 rc = fts3ExprLoadDoclists(pCsr, 0, 0);
1090 if( rc==SQLITE_OK ){
1091 rc = fts3MatchinfoLcs(pCsr, pInfo);
1093 break;
1095 default: {
1096 Fts3Expr *pExpr;
1097 assert( zArg[i]==FTS3_MATCHINFO_HITS );
1098 pExpr = pCsr->pExpr;
1099 rc = fts3ExprLoadDoclists(pCsr, 0, 0);
1100 if( rc!=SQLITE_OK ) break;
1101 if( bGlobal ){
1102 if( pCsr->pDeferred ){
1103 rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc, 0);
1104 if( rc!=SQLITE_OK ) break;
1106 rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo);
1107 if( rc!=SQLITE_OK ) break;
1109 (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo);
1110 break;
1114 pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]);
1117 sqlite3_reset(pSelect);
1118 return rc;
1123 ** Populate pCsr->aMatchinfo[] with data for the current row. The
1124 ** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32).
1126 static int fts3GetMatchinfo(
1127 Fts3Cursor *pCsr, /* FTS3 Cursor object */
1128 const char *zArg /* Second argument to matchinfo() function */
1130 MatchInfo sInfo;
1131 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1132 int rc = SQLITE_OK;
1133 int bGlobal = 0; /* Collect 'global' stats as well as local */
1135 memset(&sInfo, 0, sizeof(MatchInfo));
1136 sInfo.pCursor = pCsr;
1137 sInfo.nCol = pTab->nColumn;
1139 /* If there is cached matchinfo() data, but the format string for the
1140 ** cache does not match the format string for this request, discard
1141 ** the cached data. */
1142 if( pCsr->zMatchinfo && strcmp(pCsr->zMatchinfo, zArg) ){
1143 assert( pCsr->aMatchinfo );
1144 sqlite3_free(pCsr->aMatchinfo);
1145 pCsr->zMatchinfo = 0;
1146 pCsr->aMatchinfo = 0;
1149 /* If Fts3Cursor.aMatchinfo[] is NULL, then this is the first time the
1150 ** matchinfo function has been called for this query. In this case
1151 ** allocate the array used to accumulate the matchinfo data and
1152 ** initialize those elements that are constant for every row.
1154 if( pCsr->aMatchinfo==0 ){
1155 int nMatchinfo = 0; /* Number of u32 elements in match-info */
1156 int nArg; /* Bytes in zArg */
1157 int i; /* Used to iterate through zArg */
1159 /* Determine the number of phrases in the query */
1160 pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr);
1161 sInfo.nPhrase = pCsr->nPhrase;
1163 /* Determine the number of integers in the buffer returned by this call. */
1164 for(i=0; zArg[i]; i++){
1165 nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]);
1168 /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */
1169 nArg = (int)strlen(zArg);
1170 pCsr->aMatchinfo = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo + nArg + 1);
1171 if( !pCsr->aMatchinfo ) return SQLITE_NOMEM;
1173 pCsr->zMatchinfo = (char *)&pCsr->aMatchinfo[nMatchinfo];
1174 pCsr->nMatchinfo = nMatchinfo;
1175 memcpy(pCsr->zMatchinfo, zArg, nArg+1);
1176 memset(pCsr->aMatchinfo, 0, sizeof(u32)*nMatchinfo);
1177 pCsr->isMatchinfoNeeded = 1;
1178 bGlobal = 1;
1181 sInfo.aMatchinfo = pCsr->aMatchinfo;
1182 sInfo.nPhrase = pCsr->nPhrase;
1183 if( pCsr->isMatchinfoNeeded ){
1184 rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg);
1185 pCsr->isMatchinfoNeeded = 0;
1188 return rc;
1192 ** Implementation of snippet() function.
1194 void sqlite3Fts3Snippet(
1195 sqlite3_context *pCtx, /* SQLite function call context */
1196 Fts3Cursor *pCsr, /* Cursor object */
1197 const char *zStart, /* Snippet start text - "<b>" */
1198 const char *zEnd, /* Snippet end text - "</b>" */
1199 const char *zEllipsis, /* Snippet ellipsis text - "<b>...</b>" */
1200 int iCol, /* Extract snippet from this column */
1201 int nToken /* Approximate number of tokens in snippet */
1203 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1204 int rc = SQLITE_OK;
1205 int i;
1206 StrBuffer res = {0, 0, 0};
1208 /* The returned text includes up to four fragments of text extracted from
1209 ** the data in the current row. The first iteration of the for(...) loop
1210 ** below attempts to locate a single fragment of text nToken tokens in
1211 ** size that contains at least one instance of all phrases in the query
1212 ** expression that appear in the current row. If such a fragment of text
1213 ** cannot be found, the second iteration of the loop attempts to locate
1214 ** a pair of fragments, and so on.
1216 int nSnippet = 0; /* Number of fragments in this snippet */
1217 SnippetFragment aSnippet[4]; /* Maximum of 4 fragments per snippet */
1218 int nFToken = -1; /* Number of tokens in each fragment */
1220 if( !pCsr->pExpr ){
1221 sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC);
1222 return;
1225 for(nSnippet=1; 1; nSnippet++){
1227 int iSnip; /* Loop counter 0..nSnippet-1 */
1228 u64 mCovered = 0; /* Bitmask of phrases covered by snippet */
1229 u64 mSeen = 0; /* Bitmask of phrases seen by BestSnippet() */
1231 if( nToken>=0 ){
1232 nFToken = (nToken+nSnippet-1) / nSnippet;
1233 }else{
1234 nFToken = -1 * nToken;
1237 for(iSnip=0; iSnip<nSnippet; iSnip++){
1238 int iBestScore = -1; /* Best score of columns checked so far */
1239 int iRead; /* Used to iterate through columns */
1240 SnippetFragment *pFragment = &aSnippet[iSnip];
1242 memset(pFragment, 0, sizeof(*pFragment));
1244 /* Loop through all columns of the table being considered for snippets.
1245 ** If the iCol argument to this function was negative, this means all
1246 ** columns of the FTS3 table. Otherwise, only column iCol is considered.
1248 for(iRead=0; iRead<pTab->nColumn; iRead++){
1249 SnippetFragment sF = {0, 0, 0, 0};
1250 int iS;
1251 if( iCol>=0 && iRead!=iCol ) continue;
1253 /* Find the best snippet of nFToken tokens in column iRead. */
1254 rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS);
1255 if( rc!=SQLITE_OK ){
1256 goto snippet_out;
1258 if( iS>iBestScore ){
1259 *pFragment = sF;
1260 iBestScore = iS;
1264 mCovered |= pFragment->covered;
1267 /* If all query phrases seen by fts3BestSnippet() are present in at least
1268 ** one of the nSnippet snippet fragments, break out of the loop.
1270 assert( (mCovered&mSeen)==mCovered );
1271 if( mSeen==mCovered || nSnippet==SizeofArray(aSnippet) ) break;
1274 assert( nFToken>0 );
1276 for(i=0; i<nSnippet && rc==SQLITE_OK; i++){
1277 rc = fts3SnippetText(pCsr, &aSnippet[i],
1278 i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res
1282 snippet_out:
1283 sqlite3Fts3SegmentsClose(pTab);
1284 if( rc!=SQLITE_OK ){
1285 sqlite3_result_error_code(pCtx, rc);
1286 sqlite3_free(res.z);
1287 }else{
1288 sqlite3_result_text(pCtx, res.z, -1, sqlite3_free);
1293 typedef struct TermOffset TermOffset;
1294 typedef struct TermOffsetCtx TermOffsetCtx;
1296 struct TermOffset {
1297 char *pList; /* Position-list */
1298 int iPos; /* Position just read from pList */
1299 int iOff; /* Offset of this term from read positions */
1302 struct TermOffsetCtx {
1303 Fts3Cursor *pCsr;
1304 int iCol; /* Column of table to populate aTerm for */
1305 int iTerm;
1306 sqlite3_int64 iDocid;
1307 TermOffset *aTerm;
1311 ** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets().
1313 static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){
1314 TermOffsetCtx *p = (TermOffsetCtx *)ctx;
1315 int nTerm; /* Number of tokens in phrase */
1316 int iTerm; /* For looping through nTerm phrase terms */
1317 char *pList; /* Pointer to position list for phrase */
1318 int iPos = 0; /* First position in position-list */
1319 int rc;
1321 UNUSED_PARAMETER(iPhrase);
1322 rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pList);
1323 nTerm = pExpr->pPhrase->nToken;
1324 if( pList ){
1325 fts3GetDeltaPosition(&pList, &iPos);
1326 assert( iPos>=0 );
1329 for(iTerm=0; iTerm<nTerm; iTerm++){
1330 TermOffset *pT = &p->aTerm[p->iTerm++];
1331 pT->iOff = nTerm-iTerm-1;
1332 pT->pList = pList;
1333 pT->iPos = iPos;
1336 return rc;
1340 ** Implementation of offsets() function.
1342 void sqlite3Fts3Offsets(
1343 sqlite3_context *pCtx, /* SQLite function call context */
1344 Fts3Cursor *pCsr /* Cursor object */
1346 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1347 sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule;
1348 int rc; /* Return Code */
1349 int nToken; /* Number of tokens in query */
1350 int iCol; /* Column currently being processed */
1351 StrBuffer res = {0, 0, 0}; /* Result string */
1352 TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */
1354 if( !pCsr->pExpr ){
1355 sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC);
1356 return;
1359 memset(&sCtx, 0, sizeof(sCtx));
1360 assert( pCsr->isRequireSeek==0 );
1362 /* Count the number of terms in the query */
1363 rc = fts3ExprLoadDoclists(pCsr, 0, &nToken);
1364 if( rc!=SQLITE_OK ) goto offsets_out;
1366 /* Allocate the array of TermOffset iterators. */
1367 sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken);
1368 if( 0==sCtx.aTerm ){
1369 rc = SQLITE_NOMEM;
1370 goto offsets_out;
1372 sCtx.iDocid = pCsr->iPrevId;
1373 sCtx.pCsr = pCsr;
1375 /* Loop through the table columns, appending offset information to
1376 ** string-buffer res for each column.
1378 for(iCol=0; iCol<pTab->nColumn; iCol++){
1379 sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */
1380 const char *ZDUMMY; /* Dummy argument used with xNext() */
1381 int NDUMMY = 0; /* Dummy argument used with xNext() */
1382 int iStart = 0;
1383 int iEnd = 0;
1384 int iCurrent = 0;
1385 const char *zDoc;
1386 int nDoc;
1388 /* Initialize the contents of sCtx.aTerm[] for column iCol. There is
1389 ** no way that this operation can fail, so the return code from
1390 ** fts3ExprIterate() can be discarded.
1392 sCtx.iCol = iCol;
1393 sCtx.iTerm = 0;
1394 (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void *)&sCtx);
1396 /* Retreive the text stored in column iCol. If an SQL NULL is stored
1397 ** in column iCol, jump immediately to the next iteration of the loop.
1398 ** If an OOM occurs while retrieving the data (this can happen if SQLite
1399 ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM
1400 ** to the caller.
1402 zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1);
1403 nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1);
1404 if( zDoc==0 ){
1405 if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){
1406 continue;
1408 rc = SQLITE_NOMEM;
1409 goto offsets_out;
1412 /* Initialize a tokenizer iterator to iterate through column iCol. */
1413 rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid,
1414 zDoc, nDoc, &pC
1416 if( rc!=SQLITE_OK ) goto offsets_out;
1418 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
1419 while( rc==SQLITE_OK ){
1420 int i; /* Used to loop through terms */
1421 int iMinPos = 0x7FFFFFFF; /* Position of next token */
1422 TermOffset *pTerm = 0; /* TermOffset associated with next token */
1424 for(i=0; i<nToken; i++){
1425 TermOffset *pT = &sCtx.aTerm[i];
1426 if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){
1427 iMinPos = pT->iPos-pT->iOff;
1428 pTerm = pT;
1432 if( !pTerm ){
1433 /* All offsets for this column have been gathered. */
1434 rc = SQLITE_DONE;
1435 }else{
1436 assert( iCurrent<=iMinPos );
1437 if( 0==(0xFE&*pTerm->pList) ){
1438 pTerm->pList = 0;
1439 }else{
1440 fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos);
1442 while( rc==SQLITE_OK && iCurrent<iMinPos ){
1443 rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
1445 if( rc==SQLITE_OK ){
1446 char aBuffer[64];
1447 sqlite3_snprintf(sizeof(aBuffer), aBuffer,
1448 "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart
1450 rc = fts3StringAppend(&res, aBuffer, -1);
1451 }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){
1452 rc = FTS_CORRUPT_VTAB;
1456 if( rc==SQLITE_DONE ){
1457 rc = SQLITE_OK;
1460 pMod->xClose(pC);
1461 if( rc!=SQLITE_OK ) goto offsets_out;
1464 offsets_out:
1465 sqlite3_free(sCtx.aTerm);
1466 assert( rc!=SQLITE_DONE );
1467 sqlite3Fts3SegmentsClose(pTab);
1468 if( rc!=SQLITE_OK ){
1469 sqlite3_result_error_code(pCtx, rc);
1470 sqlite3_free(res.z);
1471 }else{
1472 sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free);
1474 return;
1478 ** Implementation of matchinfo() function.
1480 void sqlite3Fts3Matchinfo(
1481 sqlite3_context *pContext, /* Function call context */
1482 Fts3Cursor *pCsr, /* FTS3 table cursor */
1483 const char *zArg /* Second arg to matchinfo() function */
1485 Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1486 int rc;
1487 int i;
1488 const char *zFormat;
1490 if( zArg ){
1491 for(i=0; zArg[i]; i++){
1492 char *zErr = 0;
1493 if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){
1494 sqlite3_result_error(pContext, zErr, -1);
1495 sqlite3_free(zErr);
1496 return;
1499 zFormat = zArg;
1500 }else{
1501 zFormat = FTS3_MATCHINFO_DEFAULT;
1504 if( !pCsr->pExpr ){
1505 sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC);
1506 return;
1509 /* Retrieve matchinfo() data. */
1510 rc = fts3GetMatchinfo(pCsr, zFormat);
1511 sqlite3Fts3SegmentsClose(pTab);
1513 if( rc!=SQLITE_OK ){
1514 sqlite3_result_error_code(pContext, rc);
1515 }else{
1516 int n = pCsr->nMatchinfo * sizeof(u32);
1517 sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT);
1521 #endif