Snapshot of upstream SQLite 3.41.0
[sqlcipher.git] / ext / fts5 / fts5_index.c
blob694cc16e45e1338bd62a8f05ce00921785b89fbd
1 /*
2 ** 2014 May 31
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
13 ** Low level access to the FTS index stored in the database file. The
14 ** routines in this file file implement all read and write access to the
15 ** %_data table. Other parts of the system access this functionality via
16 ** the interface defined in fts5Int.h.
20 #include "fts5Int.h"
23 ** Overview:
25 ** The %_data table contains all the FTS indexes for an FTS5 virtual table.
26 ** As well as the main term index, there may be up to 31 prefix indexes.
27 ** The format is similar to FTS3/4, except that:
29 ** * all segment b-tree leaf data is stored in fixed size page records
30 ** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is
31 ** taken to ensure it is possible to iterate in either direction through
32 ** the entries in a doclist, or to seek to a specific entry within a
33 ** doclist, without loading it into memory.
35 ** * large doclists that span many pages have associated "doclist index"
36 ** records that contain a copy of the first rowid on each page spanned by
37 ** the doclist. This is used to speed up seek operations, and merges of
38 ** large doclists with very small doclists.
40 ** * extra fields in the "structure record" record the state of ongoing
41 ** incremental merge operations.
46 #define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */
47 #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */
49 #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */
51 #define FTS5_MAIN_PREFIX '0'
53 #if FTS5_MAX_PREFIX_INDEXES > 31
54 # error "FTS5_MAX_PREFIX_INDEXES is too large"
55 #endif
57 #define FTS5_MAX_LEVEL 64
60 ** Details:
62 ** The %_data table managed by this module,
64 ** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB);
66 ** , contains the following 5 types of records. See the comments surrounding
67 ** the FTS5_*_ROWID macros below for a description of how %_data rowids are
68 ** assigned to each fo them.
70 ** 1. Structure Records:
72 ** The set of segments that make up an index - the index structure - are
73 ** recorded in a single record within the %_data table. The record consists
74 ** of a single 32-bit configuration cookie value followed by a list of
75 ** SQLite varints. If the FTS table features more than one index (because
76 ** there are one or more prefix indexes), it is guaranteed that all share
77 ** the same cookie value.
79 ** Immediately following the configuration cookie, the record begins with
80 ** three varints:
82 ** + number of levels,
83 ** + total number of segments on all levels,
84 ** + value of write counter.
86 ** Then, for each level from 0 to nMax:
88 ** + number of input segments in ongoing merge.
89 ** + total number of segments in level.
90 ** + for each segment from oldest to newest:
91 ** + segment id (always > 0)
92 ** + first leaf page number (often 1, always greater than 0)
93 ** + final leaf page number
95 ** 2. The Averages Record:
97 ** A single record within the %_data table. The data is a list of varints.
98 ** The first value is the number of rows in the index. Then, for each column
99 ** from left to right, the total number of tokens in the column for all
100 ** rows of the table.
102 ** 3. Segment leaves:
104 ** TERM/DOCLIST FORMAT:
106 ** Most of each segment leaf is taken up by term/doclist data. The
107 ** general format of term/doclist, starting with the first term
108 ** on the leaf page, is:
110 ** varint : size of first term
111 ** blob: first term data
112 ** doclist: first doclist
113 ** zero-or-more {
114 ** varint: number of bytes in common with previous term
115 ** varint: number of bytes of new term data (nNew)
116 ** blob: nNew bytes of new term data
117 ** doclist: next doclist
118 ** }
120 ** doclist format:
122 ** varint: first rowid
123 ** poslist: first poslist
124 ** zero-or-more {
125 ** varint: rowid delta (always > 0)
126 ** poslist: next poslist
127 ** }
129 ** poslist format:
131 ** varint: size of poslist in bytes multiplied by 2, not including
132 ** this field. Plus 1 if this entry carries the "delete" flag.
133 ** collist: collist for column 0
134 ** zero-or-more {
135 ** 0x01 byte
136 ** varint: column number (I)
137 ** collist: collist for column I
138 ** }
140 ** collist format:
142 ** varint: first offset + 2
143 ** zero-or-more {
144 ** varint: offset delta + 2
145 ** }
147 ** PAGE FORMAT
149 ** Each leaf page begins with a 4-byte header containing 2 16-bit
150 ** unsigned integer fields in big-endian format. They are:
152 ** * The byte offset of the first rowid on the page, if it exists
153 ** and occurs before the first term (otherwise 0).
155 ** * The byte offset of the start of the page footer. If the page
156 ** footer is 0 bytes in size, then this field is the same as the
157 ** size of the leaf page in bytes.
159 ** The page footer consists of a single varint for each term located
160 ** on the page. Each varint is the byte offset of the current term
161 ** within the page, delta-compressed against the previous value. In
162 ** other words, the first varint in the footer is the byte offset of
163 ** the first term, the second is the byte offset of the second less that
164 ** of the first, and so on.
166 ** The term/doclist format described above is accurate if the entire
167 ** term/doclist data fits on a single leaf page. If this is not the case,
168 ** the format is changed in two ways:
170 ** + if the first rowid on a page occurs before the first term, it
171 ** is stored as a literal value:
173 ** varint: first rowid
175 ** + the first term on each page is stored in the same way as the
176 ** very first term of the segment:
178 ** varint : size of first term
179 ** blob: first term data
181 ** 5. Segment doclist indexes:
183 ** Doclist indexes are themselves b-trees, however they usually consist of
184 ** a single leaf record only. The format of each doclist index leaf page
185 ** is:
187 ** * Flags byte. Bits are:
188 ** 0x01: Clear if leaf is also the root page, otherwise set.
190 ** * Page number of fts index leaf page. As a varint.
192 ** * First rowid on page indicated by previous field. As a varint.
194 ** * A list of varints, one for each subsequent termless page. A
195 ** positive delta if the termless page contains at least one rowid,
196 ** or an 0x00 byte otherwise.
198 ** Internal doclist index nodes are:
200 ** * Flags byte. Bits are:
201 ** 0x01: Clear for root page, otherwise set.
203 ** * Page number of first child page. As a varint.
205 ** * Copy of first rowid on page indicated by previous field. As a varint.
207 ** * A list of delta-encoded varints - the first rowid on each subsequent
208 ** child page.
213 ** Rowids for the averages and structure records in the %_data table.
215 #define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */
216 #define FTS5_STRUCTURE_ROWID 10 /* The structure record */
219 ** Macros determining the rowids used by segment leaves and dlidx leaves
220 ** and nodes. All nodes and leaves are stored in the %_data table with large
221 ** positive rowids.
223 ** Each segment has a unique non-zero 16-bit id.
225 ** The rowid for each segment leaf is found by passing the segment id and
226 ** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered
227 ** sequentially starting from 1.
229 #define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */
230 #define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */
231 #define FTS5_DATA_HEIGHT_B 5 /* Max dlidx tree height of 32 */
232 #define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */
234 #define fts5_dri(segid, dlidx, height, pgno) ( \
235 ((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \
236 ((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \
237 ((i64)(height) << (FTS5_DATA_PAGE_B)) + \
238 ((i64)(pgno)) \
241 #define FTS5_SEGMENT_ROWID(segid, pgno) fts5_dri(segid, 0, 0, pgno)
242 #define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)
244 #ifdef SQLITE_DEBUG
245 int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
246 #endif
250 ** Each time a blob is read from the %_data table, it is padded with this
251 ** many zero bytes. This makes it easier to decode the various record formats
252 ** without overreading if the records are corrupt.
254 #define FTS5_DATA_ZERO_PADDING 8
255 #define FTS5_DATA_PADDING 20
257 typedef struct Fts5Data Fts5Data;
258 typedef struct Fts5DlidxIter Fts5DlidxIter;
259 typedef struct Fts5DlidxLvl Fts5DlidxLvl;
260 typedef struct Fts5DlidxWriter Fts5DlidxWriter;
261 typedef struct Fts5Iter Fts5Iter;
262 typedef struct Fts5PageWriter Fts5PageWriter;
263 typedef struct Fts5SegIter Fts5SegIter;
264 typedef struct Fts5DoclistIter Fts5DoclistIter;
265 typedef struct Fts5SegWriter Fts5SegWriter;
266 typedef struct Fts5Structure Fts5Structure;
267 typedef struct Fts5StructureLevel Fts5StructureLevel;
268 typedef struct Fts5StructureSegment Fts5StructureSegment;
270 struct Fts5Data {
271 u8 *p; /* Pointer to buffer containing record */
272 int nn; /* Size of record in bytes */
273 int szLeaf; /* Size of leaf without page-index */
277 ** One object per %_data table.
279 struct Fts5Index {
280 Fts5Config *pConfig; /* Virtual table configuration */
281 char *zDataTbl; /* Name of %_data table */
282 int nWorkUnit; /* Leaf pages in a "unit" of work */
285 ** Variables related to the accumulation of tokens and doclists within the
286 ** in-memory hash tables before they are flushed to disk.
288 Fts5Hash *pHash; /* Hash table for in-memory data */
289 int nPendingData; /* Current bytes of pending data */
290 i64 iWriteRowid; /* Rowid for current doc being written */
291 int bDelete; /* Current write is a delete */
293 /* Error state. */
294 int rc; /* Current error code */
296 /* State used by the fts5DataXXX() functions. */
297 sqlite3_blob *pReader; /* RO incr-blob open on %_data table */
298 sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */
299 sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */
300 sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */
301 sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=?" */
302 sqlite3_stmt *pIdxSelect;
303 int nRead; /* Total number of blocks read */
305 sqlite3_stmt *pDataVersion;
306 i64 iStructVersion; /* data_version when pStruct read */
307 Fts5Structure *pStruct; /* Current db structure (or NULL) */
310 struct Fts5DoclistIter {
311 u8 *aEof; /* Pointer to 1 byte past end of doclist */
313 /* Output variables. aPoslist==0 at EOF */
314 i64 iRowid;
315 u8 *aPoslist;
316 int nPoslist;
317 int nSize;
321 ** The contents of the "structure" record for each index are represented
322 ** using an Fts5Structure record in memory. Which uses instances of the
323 ** other Fts5StructureXXX types as components.
325 struct Fts5StructureSegment {
326 int iSegid; /* Segment id */
327 int pgnoFirst; /* First leaf page number in segment */
328 int pgnoLast; /* Last leaf page number in segment */
330 struct Fts5StructureLevel {
331 int nMerge; /* Number of segments in incr-merge */
332 int nSeg; /* Total number of segments on level */
333 Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */
335 struct Fts5Structure {
336 int nRef; /* Object reference count */
337 u64 nWriteCounter; /* Total leaves written to level 0 */
338 int nSegment; /* Total segments in this structure */
339 int nLevel; /* Number of levels in this index */
340 Fts5StructureLevel aLevel[1]; /* Array of nLevel level objects */
344 ** An object of type Fts5SegWriter is used to write to segments.
346 struct Fts5PageWriter {
347 int pgno; /* Page number for this page */
348 int iPrevPgidx; /* Previous value written into pgidx */
349 Fts5Buffer buf; /* Buffer containing leaf data */
350 Fts5Buffer pgidx; /* Buffer containing page-index */
351 Fts5Buffer term; /* Buffer containing previous term on page */
353 struct Fts5DlidxWriter {
354 int pgno; /* Page number for this page */
355 int bPrevValid; /* True if iPrev is valid */
356 i64 iPrev; /* Previous rowid value written to page */
357 Fts5Buffer buf; /* Buffer containing page data */
359 struct Fts5SegWriter {
360 int iSegid; /* Segid to write to */
361 Fts5PageWriter writer; /* PageWriter object */
362 i64 iPrevRowid; /* Previous rowid written to current leaf */
363 u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */
364 u8 bFirstRowidInPage; /* True if next rowid is first in page */
365 /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
366 u8 bFirstTermInPage; /* True if next term will be first in leaf */
367 int nLeafWritten; /* Number of leaf pages written */
368 int nEmpty; /* Number of contiguous term-less nodes */
370 int nDlidx; /* Allocated size of aDlidx[] array */
371 Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */
373 /* Values to insert into the %_idx table */
374 Fts5Buffer btterm; /* Next term to insert into %_idx table */
375 int iBtPage; /* Page number corresponding to btterm */
378 typedef struct Fts5CResult Fts5CResult;
379 struct Fts5CResult {
380 u16 iFirst; /* aSeg[] index of firstest iterator */
381 u8 bTermEq; /* True if the terms are equal */
385 ** Object for iterating through a single segment, visiting each term/rowid
386 ** pair in the segment.
388 ** pSeg:
389 ** The segment to iterate through.
391 ** iLeafPgno:
392 ** Current leaf page number within segment.
394 ** iLeafOffset:
395 ** Byte offset within the current leaf that is the first byte of the
396 ** position list data (one byte passed the position-list size field).
397 ** rowid field of the current entry. Usually this is the size field of the
398 ** position list data. The exception is if the rowid for the current entry
399 ** is the last thing on the leaf page.
401 ** pLeaf:
402 ** Buffer containing current leaf page data. Set to NULL at EOF.
404 ** iTermLeafPgno, iTermLeafOffset:
405 ** Leaf page number containing the last term read from the segment. And
406 ** the offset immediately following the term data.
408 ** flags:
409 ** Mask of FTS5_SEGITER_XXX values. Interpreted as follows:
411 ** FTS5_SEGITER_ONETERM:
412 ** If set, set the iterator to point to EOF after the current doclist
413 ** has been exhausted. Do not proceed to the next term in the segment.
415 ** FTS5_SEGITER_REVERSE:
416 ** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If
417 ** it is set, iterate through rowid in descending order instead of the
418 ** default ascending order.
420 ** iRowidOffset/nRowidOffset/aRowidOffset:
421 ** These are used if the FTS5_SEGITER_REVERSE flag is set.
423 ** For each rowid on the page corresponding to the current term, the
424 ** corresponding aRowidOffset[] entry is set to the byte offset of the
425 ** start of the "position-list-size" field within the page.
427 ** iTermIdx:
428 ** Index of current term on iTermLeafPgno.
430 struct Fts5SegIter {
431 Fts5StructureSegment *pSeg; /* Segment to iterate through */
432 int flags; /* Mask of configuration flags */
433 int iLeafPgno; /* Current leaf page number */
434 Fts5Data *pLeaf; /* Current leaf data */
435 Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */
436 i64 iLeafOffset; /* Byte offset within current leaf */
438 /* Next method */
439 void (*xNext)(Fts5Index*, Fts5SegIter*, int*);
441 /* The page and offset from which the current term was read. The offset
442 ** is the offset of the first rowid in the current doclist. */
443 int iTermLeafPgno;
444 int iTermLeafOffset;
446 int iPgidxOff; /* Next offset in pgidx */
447 int iEndofDoclist;
449 /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
450 int iRowidOffset; /* Current entry in aRowidOffset[] */
451 int nRowidOffset; /* Allocated size of aRowidOffset[] array */
452 int *aRowidOffset; /* Array of offset to rowid fields */
454 Fts5DlidxIter *pDlidx; /* If there is a doclist-index */
456 /* Variables populated based on current entry. */
457 Fts5Buffer term; /* Current term */
458 i64 iRowid; /* Current rowid */
459 int nPos; /* Number of bytes in current position list */
460 u8 bDel; /* True if the delete flag is set */
464 ** Argument is a pointer to an Fts5Data structure that contains a
465 ** leaf page.
467 #define ASSERT_SZLEAF_OK(x) assert( \
468 (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \
471 #define FTS5_SEGITER_ONETERM 0x01
472 #define FTS5_SEGITER_REVERSE 0x02
475 ** Argument is a pointer to an Fts5Data structure that contains a leaf
476 ** page. This macro evaluates to true if the leaf contains no terms, or
477 ** false if it contains at least one term.
479 #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)
481 #define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))
483 #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))
486 ** Object for iterating through the merged results of one or more segments,
487 ** visiting each term/rowid pair in the merged data.
489 ** nSeg is always a power of two greater than or equal to the number of
490 ** segments that this object is merging data from. Both the aSeg[] and
491 ** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded
492 ** with zeroed objects - these are handled as if they were iterators opened
493 ** on empty segments.
495 ** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an
496 ** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the
497 ** comparison in this context is the index of the iterator that currently
498 ** points to the smaller term/rowid combination. Iterators at EOF are
499 ** considered to be greater than all other iterators.
501 ** aFirst[1] contains the index in aSeg[] of the iterator that points to
502 ** the smallest key overall. aFirst[0] is unused.
504 ** poslist:
505 ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
506 ** There is no way to tell if this is populated or not.
508 struct Fts5Iter {
509 Fts5IndexIter base; /* Base class containing output vars */
511 Fts5Index *pIndex; /* Index that owns this iterator */
512 Fts5Buffer poslist; /* Buffer containing current poslist */
513 Fts5Colset *pColset; /* Restrict matches to these columns */
515 /* Invoked to set output variables. */
516 void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*);
518 int nSeg; /* Size of aSeg[] array */
519 int bRev; /* True to iterate in reverse order */
520 u8 bSkipEmpty; /* True to skip deleted entries */
522 i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */
523 Fts5CResult *aFirst; /* Current merge state (see above) */
524 Fts5SegIter aSeg[1]; /* Array of segment iterators */
529 ** An instance of the following type is used to iterate through the contents
530 ** of a doclist-index record.
532 ** pData:
533 ** Record containing the doclist-index data.
535 ** bEof:
536 ** Set to true once iterator has reached EOF.
538 ** iOff:
539 ** Set to the current offset within record pData.
541 struct Fts5DlidxLvl {
542 Fts5Data *pData; /* Data for current page of this level */
543 int iOff; /* Current offset into pData */
544 int bEof; /* At EOF already */
545 int iFirstOff; /* Used by reverse iterators */
547 /* Output variables */
548 int iLeafPgno; /* Page number of current leaf page */
549 i64 iRowid; /* First rowid on leaf iLeafPgno */
551 struct Fts5DlidxIter {
552 int nLvl;
553 int iSegid;
554 Fts5DlidxLvl aLvl[1];
557 static void fts5PutU16(u8 *aOut, u16 iVal){
558 aOut[0] = (iVal>>8);
559 aOut[1] = (iVal&0xFF);
562 static u16 fts5GetU16(const u8 *aIn){
563 return ((u16)aIn[0] << 8) + aIn[1];
567 ** Allocate and return a buffer at least nByte bytes in size.
569 ** If an OOM error is encountered, return NULL and set the error code in
570 ** the Fts5Index handle passed as the first argument.
572 static void *fts5IdxMalloc(Fts5Index *p, sqlite3_int64 nByte){
573 return sqlite3Fts5MallocZero(&p->rc, nByte);
577 ** Compare the contents of the pLeft buffer with the pRight/nRight blob.
579 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
580 ** +ve if pRight is smaller than pLeft. In other words:
582 ** res = *pLeft - *pRight
584 #ifdef SQLITE_DEBUG
585 static int fts5BufferCompareBlob(
586 Fts5Buffer *pLeft, /* Left hand side of comparison */
587 const u8 *pRight, int nRight /* Right hand side of comparison */
589 int nCmp = MIN(pLeft->n, nRight);
590 int res = memcmp(pLeft->p, pRight, nCmp);
591 return (res==0 ? (pLeft->n - nRight) : res);
593 #endif
596 ** Compare the contents of the two buffers using memcmp(). If one buffer
597 ** is a prefix of the other, it is considered the lesser.
599 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
600 ** +ve if pRight is smaller than pLeft. In other words:
602 ** res = *pLeft - *pRight
604 static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){
605 int nCmp, res;
606 nCmp = MIN(pLeft->n, pRight->n);
607 assert( nCmp<=0 || pLeft->p!=0 );
608 assert( nCmp<=0 || pRight->p!=0 );
609 res = fts5Memcmp(pLeft->p, pRight->p, nCmp);
610 return (res==0 ? (pLeft->n - pRight->n) : res);
613 static int fts5LeafFirstTermOff(Fts5Data *pLeaf){
614 int ret;
615 fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret);
616 return ret;
620 ** Close the read-only blob handle, if it is open.
622 void sqlite3Fts5IndexCloseReader(Fts5Index *p){
623 if( p->pReader ){
624 sqlite3_blob *pReader = p->pReader;
625 p->pReader = 0;
626 sqlite3_blob_close(pReader);
631 ** Retrieve a record from the %_data table.
633 ** If an error occurs, NULL is returned and an error left in the
634 ** Fts5Index object.
636 static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
637 Fts5Data *pRet = 0;
638 if( p->rc==SQLITE_OK ){
639 int rc = SQLITE_OK;
641 if( p->pReader ){
642 /* This call may return SQLITE_ABORT if there has been a savepoint
643 ** rollback since it was last used. In this case a new blob handle
644 ** is required. */
645 sqlite3_blob *pBlob = p->pReader;
646 p->pReader = 0;
647 rc = sqlite3_blob_reopen(pBlob, iRowid);
648 assert( p->pReader==0 );
649 p->pReader = pBlob;
650 if( rc!=SQLITE_OK ){
651 sqlite3Fts5IndexCloseReader(p);
653 if( rc==SQLITE_ABORT ) rc = SQLITE_OK;
656 /* If the blob handle is not open at this point, open it and seek
657 ** to the requested entry. */
658 if( p->pReader==0 && rc==SQLITE_OK ){
659 Fts5Config *pConfig = p->pConfig;
660 rc = sqlite3_blob_open(pConfig->db,
661 pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader
665 /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls
666 ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead.
667 ** All the reasons those functions might return SQLITE_ERROR - missing
668 ** table, missing row, non-blob/text in block column - indicate
669 ** backing store corruption. */
670 if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT;
672 if( rc==SQLITE_OK ){
673 u8 *aOut = 0; /* Read blob data into this buffer */
674 int nByte = sqlite3_blob_bytes(p->pReader);
675 sqlite3_int64 nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING;
676 pRet = (Fts5Data*)sqlite3_malloc64(nAlloc);
677 if( pRet ){
678 pRet->nn = nByte;
679 aOut = pRet->p = (u8*)&pRet[1];
680 }else{
681 rc = SQLITE_NOMEM;
684 if( rc==SQLITE_OK ){
685 rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0);
687 if( rc!=SQLITE_OK ){
688 sqlite3_free(pRet);
689 pRet = 0;
690 }else{
691 /* TODO1: Fix this */
692 pRet->p[nByte] = 0x00;
693 pRet->p[nByte+1] = 0x00;
694 pRet->szLeaf = fts5GetU16(&pRet->p[2]);
697 p->rc = rc;
698 p->nRead++;
701 assert( (pRet==0)==(p->rc!=SQLITE_OK) );
702 return pRet;
707 ** Release a reference to data record returned by an earlier call to
708 ** fts5DataRead().
710 static void fts5DataRelease(Fts5Data *pData){
711 sqlite3_free(pData);
714 static Fts5Data *fts5LeafRead(Fts5Index *p, i64 iRowid){
715 Fts5Data *pRet = fts5DataRead(p, iRowid);
716 if( pRet ){
717 if( pRet->nn<4 || pRet->szLeaf>pRet->nn ){
718 p->rc = FTS5_CORRUPT;
719 fts5DataRelease(pRet);
720 pRet = 0;
723 return pRet;
726 static int fts5IndexPrepareStmt(
727 Fts5Index *p,
728 sqlite3_stmt **ppStmt,
729 char *zSql
731 if( p->rc==SQLITE_OK ){
732 if( zSql ){
733 p->rc = sqlite3_prepare_v3(p->pConfig->db, zSql, -1,
734 SQLITE_PREPARE_PERSISTENT|SQLITE_PREPARE_NO_VTAB,
735 ppStmt, 0);
736 }else{
737 p->rc = SQLITE_NOMEM;
740 sqlite3_free(zSql);
741 return p->rc;
746 ** INSERT OR REPLACE a record into the %_data table.
748 static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){
749 if( p->rc!=SQLITE_OK ) return;
751 if( p->pWriter==0 ){
752 Fts5Config *pConfig = p->pConfig;
753 fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintf(
754 "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)",
755 pConfig->zDb, pConfig->zName
757 if( p->rc ) return;
760 sqlite3_bind_int64(p->pWriter, 1, iRowid);
761 sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC);
762 sqlite3_step(p->pWriter);
763 p->rc = sqlite3_reset(p->pWriter);
764 sqlite3_bind_null(p->pWriter, 2);
768 ** Execute the following SQL:
770 ** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast
772 static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){
773 if( p->rc!=SQLITE_OK ) return;
775 if( p->pDeleter==0 ){
776 Fts5Config *pConfig = p->pConfig;
777 char *zSql = sqlite3_mprintf(
778 "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?",
779 pConfig->zDb, pConfig->zName
781 if( fts5IndexPrepareStmt(p, &p->pDeleter, zSql) ) return;
784 sqlite3_bind_int64(p->pDeleter, 1, iFirst);
785 sqlite3_bind_int64(p->pDeleter, 2, iLast);
786 sqlite3_step(p->pDeleter);
787 p->rc = sqlite3_reset(p->pDeleter);
791 ** Remove all records associated with segment iSegid.
793 static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){
794 i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0);
795 i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1;
796 fts5DataDelete(p, iFirst, iLast);
797 if( p->pIdxDeleter==0 ){
798 Fts5Config *pConfig = p->pConfig;
799 fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf(
800 "DELETE FROM '%q'.'%q_idx' WHERE segid=?",
801 pConfig->zDb, pConfig->zName
804 if( p->rc==SQLITE_OK ){
805 sqlite3_bind_int(p->pIdxDeleter, 1, iSegid);
806 sqlite3_step(p->pIdxDeleter);
807 p->rc = sqlite3_reset(p->pIdxDeleter);
812 ** Release a reference to an Fts5Structure object returned by an earlier
813 ** call to fts5StructureRead() or fts5StructureDecode().
815 static void fts5StructureRelease(Fts5Structure *pStruct){
816 if( pStruct && 0>=(--pStruct->nRef) ){
817 int i;
818 assert( pStruct->nRef==0 );
819 for(i=0; i<pStruct->nLevel; i++){
820 sqlite3_free(pStruct->aLevel[i].aSeg);
822 sqlite3_free(pStruct);
826 static void fts5StructureRef(Fts5Structure *pStruct){
827 pStruct->nRef++;
830 void *sqlite3Fts5StructureRef(Fts5Index *p){
831 fts5StructureRef(p->pStruct);
832 return (void*)p->pStruct;
834 void sqlite3Fts5StructureRelease(void *p){
835 if( p ){
836 fts5StructureRelease((Fts5Structure*)p);
839 int sqlite3Fts5StructureTest(Fts5Index *p, void *pStruct){
840 if( p->pStruct!=(Fts5Structure*)pStruct ){
841 return SQLITE_ABORT;
843 return SQLITE_OK;
847 ** Ensure that structure object (*pp) is writable.
849 ** This function is a no-op if (*pRc) is not SQLITE_OK when it is called. If
850 ** an error occurs, (*pRc) is set to an SQLite error code before returning.
852 static void fts5StructureMakeWritable(int *pRc, Fts5Structure **pp){
853 Fts5Structure *p = *pp;
854 if( *pRc==SQLITE_OK && p->nRef>1 ){
855 i64 nByte = sizeof(Fts5Structure)+(p->nLevel-1)*sizeof(Fts5StructureLevel);
856 Fts5Structure *pNew;
857 pNew = (Fts5Structure*)sqlite3Fts5MallocZero(pRc, nByte);
858 if( pNew ){
859 int i;
860 memcpy(pNew, p, nByte);
861 for(i=0; i<p->nLevel; i++) pNew->aLevel[i].aSeg = 0;
862 for(i=0; i<p->nLevel; i++){
863 Fts5StructureLevel *pLvl = &pNew->aLevel[i];
864 nByte = sizeof(Fts5StructureSegment) * pNew->aLevel[i].nSeg;
865 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(pRc, nByte);
866 if( pLvl->aSeg==0 ){
867 for(i=0; i<p->nLevel; i++){
868 sqlite3_free(pNew->aLevel[i].aSeg);
870 sqlite3_free(pNew);
871 return;
873 memcpy(pLvl->aSeg, p->aLevel[i].aSeg, nByte);
875 p->nRef--;
876 pNew->nRef = 1;
878 *pp = pNew;
883 ** Deserialize and return the structure record currently stored in serialized
884 ** form within buffer pData/nData.
886 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
887 ** are over-allocated by one slot. This allows the structure contents
888 ** to be more easily edited.
890 ** If an error occurs, *ppOut is set to NULL and an SQLite error code
891 ** returned. Otherwise, *ppOut is set to point to the new object and
892 ** SQLITE_OK returned.
894 static int fts5StructureDecode(
895 const u8 *pData, /* Buffer containing serialized structure */
896 int nData, /* Size of buffer pData in bytes */
897 int *piCookie, /* Configuration cookie value */
898 Fts5Structure **ppOut /* OUT: Deserialized object */
900 int rc = SQLITE_OK;
901 int i = 0;
902 int iLvl;
903 int nLevel = 0;
904 int nSegment = 0;
905 sqlite3_int64 nByte; /* Bytes of space to allocate at pRet */
906 Fts5Structure *pRet = 0; /* Structure object to return */
908 /* Grab the cookie value */
909 if( piCookie ) *piCookie = sqlite3Fts5Get32(pData);
910 i = 4;
912 /* Read the total number of levels and segments from the start of the
913 ** structure record. */
914 i += fts5GetVarint32(&pData[i], nLevel);
915 i += fts5GetVarint32(&pData[i], nSegment);
916 if( nLevel>FTS5_MAX_SEGMENT || nLevel<0
917 || nSegment>FTS5_MAX_SEGMENT || nSegment<0
919 return FTS5_CORRUPT;
921 nByte = (
922 sizeof(Fts5Structure) + /* Main structure */
923 sizeof(Fts5StructureLevel) * (nLevel-1) /* aLevel[] array */
925 pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte);
927 if( pRet ){
928 pRet->nRef = 1;
929 pRet->nLevel = nLevel;
930 pRet->nSegment = nSegment;
931 i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter);
933 for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){
934 Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl];
935 int nTotal = 0;
936 int iSeg;
938 if( i>=nData ){
939 rc = FTS5_CORRUPT;
940 }else{
941 i += fts5GetVarint32(&pData[i], pLvl->nMerge);
942 i += fts5GetVarint32(&pData[i], nTotal);
943 if( nTotal<pLvl->nMerge ) rc = FTS5_CORRUPT;
944 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc,
945 nTotal * sizeof(Fts5StructureSegment)
947 nSegment -= nTotal;
950 if( rc==SQLITE_OK ){
951 pLvl->nSeg = nTotal;
952 for(iSeg=0; iSeg<nTotal; iSeg++){
953 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
954 if( i>=nData ){
955 rc = FTS5_CORRUPT;
956 break;
958 i += fts5GetVarint32(&pData[i], pSeg->iSegid);
959 i += fts5GetVarint32(&pData[i], pSeg->pgnoFirst);
960 i += fts5GetVarint32(&pData[i], pSeg->pgnoLast);
961 if( pSeg->pgnoLast<pSeg->pgnoFirst ){
962 rc = FTS5_CORRUPT;
963 break;
966 if( iLvl>0 && pLvl[-1].nMerge && nTotal==0 ) rc = FTS5_CORRUPT;
967 if( iLvl==nLevel-1 && pLvl->nMerge ) rc = FTS5_CORRUPT;
970 if( nSegment!=0 && rc==SQLITE_OK ) rc = FTS5_CORRUPT;
972 if( rc!=SQLITE_OK ){
973 fts5StructureRelease(pRet);
974 pRet = 0;
978 *ppOut = pRet;
979 return rc;
983 ** Add a level to the Fts5Structure.aLevel[] array of structure object
984 ** (*ppStruct).
986 static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){
987 fts5StructureMakeWritable(pRc, ppStruct);
988 if( *pRc==SQLITE_OK ){
989 Fts5Structure *pStruct = *ppStruct;
990 int nLevel = pStruct->nLevel;
991 sqlite3_int64 nByte = (
992 sizeof(Fts5Structure) + /* Main structure */
993 sizeof(Fts5StructureLevel) * (nLevel+1) /* aLevel[] array */
996 pStruct = sqlite3_realloc64(pStruct, nByte);
997 if( pStruct ){
998 memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel));
999 pStruct->nLevel++;
1000 *ppStruct = pStruct;
1001 }else{
1002 *pRc = SQLITE_NOMEM;
1008 ** Extend level iLvl so that there is room for at least nExtra more
1009 ** segments.
1011 static void fts5StructureExtendLevel(
1012 int *pRc,
1013 Fts5Structure *pStruct,
1014 int iLvl,
1015 int nExtra,
1016 int bInsert
1018 if( *pRc==SQLITE_OK ){
1019 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
1020 Fts5StructureSegment *aNew;
1021 sqlite3_int64 nByte;
1023 nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment);
1024 aNew = sqlite3_realloc64(pLvl->aSeg, nByte);
1025 if( aNew ){
1026 if( bInsert==0 ){
1027 memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra);
1028 }else{
1029 int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment);
1030 memmove(&aNew[nExtra], aNew, nMove);
1031 memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra);
1033 pLvl->aSeg = aNew;
1034 }else{
1035 *pRc = SQLITE_NOMEM;
1040 static Fts5Structure *fts5StructureReadUncached(Fts5Index *p){
1041 Fts5Structure *pRet = 0;
1042 Fts5Config *pConfig = p->pConfig;
1043 int iCookie; /* Configuration cookie */
1044 Fts5Data *pData;
1046 pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
1047 if( p->rc==SQLITE_OK ){
1048 /* TODO: Do we need this if the leaf-index is appended? Probably... */
1049 memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING);
1050 p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet);
1051 if( p->rc==SQLITE_OK && (pConfig->pgsz==0 || pConfig->iCookie!=iCookie) ){
1052 p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
1054 fts5DataRelease(pData);
1055 if( p->rc!=SQLITE_OK ){
1056 fts5StructureRelease(pRet);
1057 pRet = 0;
1061 return pRet;
1064 static i64 fts5IndexDataVersion(Fts5Index *p){
1065 i64 iVersion = 0;
1067 if( p->rc==SQLITE_OK ){
1068 if( p->pDataVersion==0 ){
1069 p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion,
1070 sqlite3_mprintf("PRAGMA %Q.data_version", p->pConfig->zDb)
1072 if( p->rc ) return 0;
1075 if( SQLITE_ROW==sqlite3_step(p->pDataVersion) ){
1076 iVersion = sqlite3_column_int64(p->pDataVersion, 0);
1078 p->rc = sqlite3_reset(p->pDataVersion);
1081 return iVersion;
1085 ** Read, deserialize and return the structure record.
1087 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
1088 ** are over-allocated as described for function fts5StructureDecode()
1089 ** above.
1091 ** If an error occurs, NULL is returned and an error code left in the
1092 ** Fts5Index handle. If an error has already occurred when this function
1093 ** is called, it is a no-op.
1095 static Fts5Structure *fts5StructureRead(Fts5Index *p){
1097 if( p->pStruct==0 ){
1098 p->iStructVersion = fts5IndexDataVersion(p);
1099 if( p->rc==SQLITE_OK ){
1100 p->pStruct = fts5StructureReadUncached(p);
1104 #if 0
1105 else{
1106 Fts5Structure *pTest = fts5StructureReadUncached(p);
1107 if( pTest ){
1108 int i, j;
1109 assert_nc( p->pStruct->nSegment==pTest->nSegment );
1110 assert_nc( p->pStruct->nLevel==pTest->nLevel );
1111 for(i=0; i<pTest->nLevel; i++){
1112 assert_nc( p->pStruct->aLevel[i].nMerge==pTest->aLevel[i].nMerge );
1113 assert_nc( p->pStruct->aLevel[i].nSeg==pTest->aLevel[i].nSeg );
1114 for(j=0; j<pTest->aLevel[i].nSeg; j++){
1115 Fts5StructureSegment *p1 = &pTest->aLevel[i].aSeg[j];
1116 Fts5StructureSegment *p2 = &p->pStruct->aLevel[i].aSeg[j];
1117 assert_nc( p1->iSegid==p2->iSegid );
1118 assert_nc( p1->pgnoFirst==p2->pgnoFirst );
1119 assert_nc( p1->pgnoLast==p2->pgnoLast );
1122 fts5StructureRelease(pTest);
1125 #endif
1127 if( p->rc!=SQLITE_OK ) return 0;
1128 assert( p->iStructVersion!=0 );
1129 assert( p->pStruct!=0 );
1130 fts5StructureRef(p->pStruct);
1131 return p->pStruct;
1134 static void fts5StructureInvalidate(Fts5Index *p){
1135 if( p->pStruct ){
1136 fts5StructureRelease(p->pStruct);
1137 p->pStruct = 0;
1142 ** Return the total number of segments in index structure pStruct. This
1143 ** function is only ever used as part of assert() conditions.
1145 #ifdef SQLITE_DEBUG
1146 static int fts5StructureCountSegments(Fts5Structure *pStruct){
1147 int nSegment = 0; /* Total number of segments */
1148 if( pStruct ){
1149 int iLvl; /* Used to iterate through levels */
1150 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
1151 nSegment += pStruct->aLevel[iLvl].nSeg;
1155 return nSegment;
1157 #endif
1159 #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \
1160 assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) ); \
1161 memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob); \
1162 (pBuf)->n += nBlob; \
1165 #define fts5BufferSafeAppendVarint(pBuf, iVal) { \
1166 (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal)); \
1167 assert( (pBuf)->nSpace>=(pBuf)->n ); \
1172 ** Serialize and store the "structure" record.
1174 ** If an error occurs, leave an error code in the Fts5Index object. If an
1175 ** error has already occurred, this function is a no-op.
1177 static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){
1178 if( p->rc==SQLITE_OK ){
1179 Fts5Buffer buf; /* Buffer to serialize record into */
1180 int iLvl; /* Used to iterate through levels */
1181 int iCookie; /* Cookie value to store */
1183 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
1184 memset(&buf, 0, sizeof(Fts5Buffer));
1186 /* Append the current configuration cookie */
1187 iCookie = p->pConfig->iCookie;
1188 if( iCookie<0 ) iCookie = 0;
1190 if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, 4+9+9+9) ){
1191 sqlite3Fts5Put32(buf.p, iCookie);
1192 buf.n = 4;
1193 fts5BufferSafeAppendVarint(&buf, pStruct->nLevel);
1194 fts5BufferSafeAppendVarint(&buf, pStruct->nSegment);
1195 fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter);
1198 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
1199 int iSeg; /* Used to iterate through segments */
1200 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
1201 fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge);
1202 fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg);
1203 assert( pLvl->nMerge<=pLvl->nSeg );
1205 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
1206 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid);
1207 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst);
1208 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast);
1212 fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n);
1213 fts5BufferFree(&buf);
1217 #if 0
1218 static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*);
1219 static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){
1220 int rc = SQLITE_OK;
1221 Fts5Buffer buf;
1222 memset(&buf, 0, sizeof(buf));
1223 fts5DebugStructure(&rc, &buf, pStruct);
1224 fprintf(stdout, "%s: %s\n", zCaption, buf.p);
1225 fflush(stdout);
1226 fts5BufferFree(&buf);
1228 #else
1229 # define fts5PrintStructure(x,y)
1230 #endif
1232 static int fts5SegmentSize(Fts5StructureSegment *pSeg){
1233 return 1 + pSeg->pgnoLast - pSeg->pgnoFirst;
1237 ** Return a copy of index structure pStruct. Except, promote as many
1238 ** segments as possible to level iPromote. If an OOM occurs, NULL is
1239 ** returned.
1241 static void fts5StructurePromoteTo(
1242 Fts5Index *p,
1243 int iPromote,
1244 int szPromote,
1245 Fts5Structure *pStruct
1247 int il, is;
1248 Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote];
1250 if( pOut->nMerge==0 ){
1251 for(il=iPromote+1; il<pStruct->nLevel; il++){
1252 Fts5StructureLevel *pLvl = &pStruct->aLevel[il];
1253 if( pLvl->nMerge ) return;
1254 for(is=pLvl->nSeg-1; is>=0; is--){
1255 int sz = fts5SegmentSize(&pLvl->aSeg[is]);
1256 if( sz>szPromote ) return;
1257 fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1);
1258 if( p->rc ) return;
1259 memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment));
1260 pOut->nSeg++;
1261 pLvl->nSeg--;
1268 ** A new segment has just been written to level iLvl of index structure
1269 ** pStruct. This function determines if any segments should be promoted
1270 ** as a result. Segments are promoted in two scenarios:
1272 ** a) If the segment just written is smaller than one or more segments
1273 ** within the previous populated level, it is promoted to the previous
1274 ** populated level.
1276 ** b) If the segment just written is larger than the newest segment on
1277 ** the next populated level, then that segment, and any other adjacent
1278 ** segments that are also smaller than the one just written, are
1279 ** promoted.
1281 ** If one or more segments are promoted, the structure object is updated
1282 ** to reflect this.
1284 static void fts5StructurePromote(
1285 Fts5Index *p, /* FTS5 backend object */
1286 int iLvl, /* Index level just updated */
1287 Fts5Structure *pStruct /* Index structure */
1289 if( p->rc==SQLITE_OK ){
1290 int iTst;
1291 int iPromote = -1;
1292 int szPromote = 0; /* Promote anything this size or smaller */
1293 Fts5StructureSegment *pSeg; /* Segment just written */
1294 int szSeg; /* Size of segment just written */
1295 int nSeg = pStruct->aLevel[iLvl].nSeg;
1297 if( nSeg==0 ) return;
1298 pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1];
1299 szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst);
1301 /* Check for condition (a) */
1302 for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--);
1303 if( iTst>=0 ){
1304 int i;
1305 int szMax = 0;
1306 Fts5StructureLevel *pTst = &pStruct->aLevel[iTst];
1307 assert( pTst->nMerge==0 );
1308 for(i=0; i<pTst->nSeg; i++){
1309 int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1;
1310 if( sz>szMax ) szMax = sz;
1312 if( szMax>=szSeg ){
1313 /* Condition (a) is true. Promote the newest segment on level
1314 ** iLvl to level iTst. */
1315 iPromote = iTst;
1316 szPromote = szMax;
1320 /* If condition (a) is not met, assume (b) is true. StructurePromoteTo()
1321 ** is a no-op if it is not. */
1322 if( iPromote<0 ){
1323 iPromote = iLvl;
1324 szPromote = szSeg;
1326 fts5StructurePromoteTo(p, iPromote, szPromote, pStruct);
1332 ** Advance the iterator passed as the only argument. If the end of the
1333 ** doclist-index page is reached, return non-zero.
1335 static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
1336 Fts5Data *pData = pLvl->pData;
1338 if( pLvl->iOff==0 ){
1339 assert( pLvl->bEof==0 );
1340 pLvl->iOff = 1;
1341 pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno);
1342 pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid);
1343 pLvl->iFirstOff = pLvl->iOff;
1344 }else{
1345 int iOff;
1346 for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){
1347 if( pData->p[iOff] ) break;
1350 if( iOff<pData->nn ){
1351 i64 iVal;
1352 pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1;
1353 iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal);
1354 pLvl->iRowid += iVal;
1355 pLvl->iOff = iOff;
1356 }else{
1357 pLvl->bEof = 1;
1361 return pLvl->bEof;
1365 ** Advance the iterator passed as the only argument.
1367 static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
1368 Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
1370 assert( iLvl<pIter->nLvl );
1371 if( fts5DlidxLvlNext(pLvl) ){
1372 if( (iLvl+1) < pIter->nLvl ){
1373 fts5DlidxIterNextR(p, pIter, iLvl+1);
1374 if( pLvl[1].bEof==0 ){
1375 fts5DataRelease(pLvl->pData);
1376 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1377 pLvl->pData = fts5DataRead(p,
1378 FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
1380 if( pLvl->pData ) fts5DlidxLvlNext(pLvl);
1385 return pIter->aLvl[0].bEof;
1387 static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){
1388 return fts5DlidxIterNextR(p, pIter, 0);
1392 ** The iterator passed as the first argument has the following fields set
1393 ** as follows. This function sets up the rest of the iterator so that it
1394 ** points to the first rowid in the doclist-index.
1396 ** pData:
1397 ** pointer to doclist-index record,
1399 ** When this function is called pIter->iLeafPgno is the page number the
1400 ** doclist is associated with (the one featuring the term).
1402 static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
1403 int i;
1404 for(i=0; i<pIter->nLvl; i++){
1405 fts5DlidxLvlNext(&pIter->aLvl[i]);
1407 return pIter->aLvl[0].bEof;
1411 static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){
1412 return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof;
1415 static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){
1416 int i;
1418 /* Advance each level to the last entry on the last page */
1419 for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){
1420 Fts5DlidxLvl *pLvl = &pIter->aLvl[i];
1421 while( fts5DlidxLvlNext(pLvl)==0 );
1422 pLvl->bEof = 0;
1424 if( i>0 ){
1425 Fts5DlidxLvl *pChild = &pLvl[-1];
1426 fts5DataRelease(pChild->pData);
1427 memset(pChild, 0, sizeof(Fts5DlidxLvl));
1428 pChild->pData = fts5DataRead(p,
1429 FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno)
1436 ** Move the iterator passed as the only argument to the previous entry.
1438 static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){
1439 int iOff = pLvl->iOff;
1441 assert( pLvl->bEof==0 );
1442 if( iOff<=pLvl->iFirstOff ){
1443 pLvl->bEof = 1;
1444 }else{
1445 u8 *a = pLvl->pData->p;
1446 i64 iVal;
1447 int iLimit;
1448 int ii;
1449 int nZero = 0;
1451 /* Currently iOff points to the first byte of a varint. This block
1452 ** decrements iOff until it points to the first byte of the previous
1453 ** varint. Taking care not to read any memory locations that occur
1454 ** before the buffer in memory. */
1455 iLimit = (iOff>9 ? iOff-9 : 0);
1456 for(iOff--; iOff>iLimit; iOff--){
1457 if( (a[iOff-1] & 0x80)==0 ) break;
1460 fts5GetVarint(&a[iOff], (u64*)&iVal);
1461 pLvl->iRowid -= iVal;
1462 pLvl->iLeafPgno--;
1464 /* Skip backwards past any 0x00 varints. */
1465 for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){
1466 nZero++;
1468 if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){
1469 /* The byte immediately before the last 0x00 byte has the 0x80 bit
1470 ** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80
1471 ** bytes before a[ii]. */
1472 int bZero = 0; /* True if last 0x00 counts */
1473 if( (ii-8)>=pLvl->iFirstOff ){
1474 int j;
1475 for(j=1; j<=8 && (a[ii-j] & 0x80); j++);
1476 bZero = (j>8);
1478 if( bZero==0 ) nZero--;
1480 pLvl->iLeafPgno -= nZero;
1481 pLvl->iOff = iOff - nZero;
1484 return pLvl->bEof;
1487 static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
1488 Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
1490 assert( iLvl<pIter->nLvl );
1491 if( fts5DlidxLvlPrev(pLvl) ){
1492 if( (iLvl+1) < pIter->nLvl ){
1493 fts5DlidxIterPrevR(p, pIter, iLvl+1);
1494 if( pLvl[1].bEof==0 ){
1495 fts5DataRelease(pLvl->pData);
1496 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1497 pLvl->pData = fts5DataRead(p,
1498 FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
1500 if( pLvl->pData ){
1501 while( fts5DlidxLvlNext(pLvl)==0 );
1502 pLvl->bEof = 0;
1508 return pIter->aLvl[0].bEof;
1510 static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){
1511 return fts5DlidxIterPrevR(p, pIter, 0);
1515 ** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
1517 static void fts5DlidxIterFree(Fts5DlidxIter *pIter){
1518 if( pIter ){
1519 int i;
1520 for(i=0; i<pIter->nLvl; i++){
1521 fts5DataRelease(pIter->aLvl[i].pData);
1523 sqlite3_free(pIter);
1527 static Fts5DlidxIter *fts5DlidxIterInit(
1528 Fts5Index *p, /* Fts5 Backend to iterate within */
1529 int bRev, /* True for ORDER BY ASC */
1530 int iSegid, /* Segment id */
1531 int iLeafPg /* Leaf page number to load dlidx for */
1533 Fts5DlidxIter *pIter = 0;
1534 int i;
1535 int bDone = 0;
1537 for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
1538 sqlite3_int64 nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl);
1539 Fts5DlidxIter *pNew;
1541 pNew = (Fts5DlidxIter*)sqlite3_realloc64(pIter, nByte);
1542 if( pNew==0 ){
1543 p->rc = SQLITE_NOMEM;
1544 }else{
1545 i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg);
1546 Fts5DlidxLvl *pLvl = &pNew->aLvl[i];
1547 pIter = pNew;
1548 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1549 pLvl->pData = fts5DataRead(p, iRowid);
1550 if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){
1551 bDone = 1;
1553 pIter->nLvl = i+1;
1557 if( p->rc==SQLITE_OK ){
1558 pIter->iSegid = iSegid;
1559 if( bRev==0 ){
1560 fts5DlidxIterFirst(pIter);
1561 }else{
1562 fts5DlidxIterLast(p, pIter);
1566 if( p->rc!=SQLITE_OK ){
1567 fts5DlidxIterFree(pIter);
1568 pIter = 0;
1571 return pIter;
1574 static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){
1575 return pIter->aLvl[0].iRowid;
1577 static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){
1578 return pIter->aLvl[0].iLeafPgno;
1582 ** Load the next leaf page into the segment iterator.
1584 static void fts5SegIterNextPage(
1585 Fts5Index *p, /* FTS5 backend object */
1586 Fts5SegIter *pIter /* Iterator to advance to next page */
1588 Fts5Data *pLeaf;
1589 Fts5StructureSegment *pSeg = pIter->pSeg;
1590 fts5DataRelease(pIter->pLeaf);
1591 pIter->iLeafPgno++;
1592 if( pIter->pNextLeaf ){
1593 pIter->pLeaf = pIter->pNextLeaf;
1594 pIter->pNextLeaf = 0;
1595 }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
1596 pIter->pLeaf = fts5LeafRead(p,
1597 FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)
1599 }else{
1600 pIter->pLeaf = 0;
1602 pLeaf = pIter->pLeaf;
1604 if( pLeaf ){
1605 pIter->iPgidxOff = pLeaf->szLeaf;
1606 if( fts5LeafIsTermless(pLeaf) ){
1607 pIter->iEndofDoclist = pLeaf->nn+1;
1608 }else{
1609 pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],
1610 pIter->iEndofDoclist
1617 ** Argument p points to a buffer containing a varint to be interpreted as a
1618 ** position list size field. Read the varint and return the number of bytes
1619 ** read. Before returning, set *pnSz to the number of bytes in the position
1620 ** list, and *pbDel to true if the delete flag is set, or false otherwise.
1622 static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
1623 int nSz;
1624 int n = 0;
1625 fts5FastGetVarint32(p, n, nSz);
1626 assert_nc( nSz>=0 );
1627 *pnSz = nSz/2;
1628 *pbDel = nSz & 0x0001;
1629 return n;
1633 ** Fts5SegIter.iLeafOffset currently points to the first byte of a
1634 ** position-list size field. Read the value of the field and store it
1635 ** in the following variables:
1637 ** Fts5SegIter.nPos
1638 ** Fts5SegIter.bDel
1640 ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the
1641 ** position list content (if any).
1643 static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
1644 if( p->rc==SQLITE_OK ){
1645 int iOff = pIter->iLeafOffset; /* Offset to read at */
1646 ASSERT_SZLEAF_OK(pIter->pLeaf);
1647 if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
1648 int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf);
1649 pIter->bDel = 0;
1650 pIter->nPos = 1;
1651 if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
1652 pIter->bDel = 1;
1653 iOff++;
1654 if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
1655 pIter->nPos = 1;
1656 iOff++;
1657 }else{
1658 pIter->nPos = 0;
1661 }else{
1662 int nSz;
1663 fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz);
1664 pIter->bDel = (nSz & 0x0001);
1665 pIter->nPos = nSz>>1;
1666 assert_nc( pIter->nPos>=0 );
1668 pIter->iLeafOffset = iOff;
1672 static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
1673 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
1674 i64 iOff = pIter->iLeafOffset;
1676 ASSERT_SZLEAF_OK(pIter->pLeaf);
1677 if( iOff>=pIter->pLeaf->szLeaf ){
1678 fts5SegIterNextPage(p, pIter);
1679 if( pIter->pLeaf==0 ){
1680 if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
1681 return;
1683 iOff = 4;
1684 a = pIter->pLeaf->p;
1686 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
1687 pIter->iLeafOffset = iOff;
1691 ** Fts5SegIter.iLeafOffset currently points to the first byte of the
1692 ** "nSuffix" field of a term. Function parameter nKeep contains the value
1693 ** of the "nPrefix" field (if there was one - it is passed 0 if this is
1694 ** the first term in the segment).
1696 ** This function populates:
1698 ** Fts5SegIter.term
1699 ** Fts5SegIter.rowid
1701 ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of
1702 ** the first position list. The position list belonging to document
1703 ** (Fts5SegIter.iRowid).
1705 static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
1706 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
1707 i64 iOff = pIter->iLeafOffset; /* Offset to read at */
1708 int nNew; /* Bytes of new data */
1710 iOff += fts5GetVarint32(&a[iOff], nNew);
1711 if( iOff+nNew>pIter->pLeaf->szLeaf || nKeep>pIter->term.n || nNew==0 ){
1712 p->rc = FTS5_CORRUPT;
1713 return;
1715 pIter->term.n = nKeep;
1716 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
1717 assert( pIter->term.n<=pIter->term.nSpace );
1718 iOff += nNew;
1719 pIter->iTermLeafOffset = iOff;
1720 pIter->iTermLeafPgno = pIter->iLeafPgno;
1721 pIter->iLeafOffset = iOff;
1723 if( pIter->iPgidxOff>=pIter->pLeaf->nn ){
1724 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
1725 }else{
1726 int nExtra;
1727 pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra);
1728 pIter->iEndofDoclist += nExtra;
1731 fts5SegIterLoadRowid(p, pIter);
1734 static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*);
1735 static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*);
1736 static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*);
1738 static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){
1739 if( pIter->flags & FTS5_SEGITER_REVERSE ){
1740 pIter->xNext = fts5SegIterNext_Reverse;
1741 }else if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
1742 pIter->xNext = fts5SegIterNext_None;
1743 }else{
1744 pIter->xNext = fts5SegIterNext;
1749 ** Initialize the iterator object pIter to iterate through the entries in
1750 ** segment pSeg. The iterator is left pointing to the first entry when
1751 ** this function returns.
1753 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
1754 ** an error has already occurred when this function is called, it is a no-op.
1756 static void fts5SegIterInit(
1757 Fts5Index *p, /* FTS index object */
1758 Fts5StructureSegment *pSeg, /* Description of segment */
1759 Fts5SegIter *pIter /* Object to populate */
1761 if( pSeg->pgnoFirst==0 ){
1762 /* This happens if the segment is being used as an input to an incremental
1763 ** merge and all data has already been "trimmed". See function
1764 ** fts5TrimSegments() for details. In this case leave the iterator empty.
1765 ** The caller will see the (pIter->pLeaf==0) and assume the iterator is
1766 ** at EOF already. */
1767 assert( pIter->pLeaf==0 );
1768 return;
1771 if( p->rc==SQLITE_OK ){
1772 memset(pIter, 0, sizeof(*pIter));
1773 fts5SegIterSetNext(p, pIter);
1774 pIter->pSeg = pSeg;
1775 pIter->iLeafPgno = pSeg->pgnoFirst-1;
1776 fts5SegIterNextPage(p, pIter);
1779 if( p->rc==SQLITE_OK ){
1780 pIter->iLeafOffset = 4;
1781 assert( pIter->pLeaf!=0 );
1782 assert_nc( pIter->pLeaf->nn>4 );
1783 assert_nc( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
1784 pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
1785 fts5SegIterLoadTerm(p, pIter, 0);
1786 fts5SegIterLoadNPos(p, pIter);
1791 ** This function is only ever called on iterators created by calls to
1792 ** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set.
1794 ** The iterator is in an unusual state when this function is called: the
1795 ** Fts5SegIter.iLeafOffset variable is set to the offset of the start of
1796 ** the position-list size field for the first relevant rowid on the page.
1797 ** Fts5SegIter.rowid is set, but nPos and bDel are not.
1799 ** This function advances the iterator so that it points to the last
1800 ** relevant rowid on the page and, if necessary, initializes the
1801 ** aRowidOffset[] and iRowidOffset variables. At this point the iterator
1802 ** is in its regular state - Fts5SegIter.iLeafOffset points to the first
1803 ** byte of the position list content associated with said rowid.
1805 static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
1806 int eDetail = p->pConfig->eDetail;
1807 int n = pIter->pLeaf->szLeaf;
1808 int i = pIter->iLeafOffset;
1809 u8 *a = pIter->pLeaf->p;
1810 int iRowidOffset = 0;
1812 if( n>pIter->iEndofDoclist ){
1813 n = pIter->iEndofDoclist;
1816 ASSERT_SZLEAF_OK(pIter->pLeaf);
1817 while( 1 ){
1818 u64 iDelta = 0;
1820 if( eDetail==FTS5_DETAIL_NONE ){
1821 /* todo */
1822 if( i<n && a[i]==0 ){
1823 i++;
1824 if( i<n && a[i]==0 ) i++;
1826 }else{
1827 int nPos;
1828 int bDummy;
1829 i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
1830 i += nPos;
1832 if( i>=n ) break;
1833 i += fts5GetVarint(&a[i], &iDelta);
1834 pIter->iRowid += iDelta;
1836 /* If necessary, grow the pIter->aRowidOffset[] array. */
1837 if( iRowidOffset>=pIter->nRowidOffset ){
1838 int nNew = pIter->nRowidOffset + 8;
1839 int *aNew = (int*)sqlite3_realloc64(pIter->aRowidOffset,nNew*sizeof(int));
1840 if( aNew==0 ){
1841 p->rc = SQLITE_NOMEM;
1842 break;
1844 pIter->aRowidOffset = aNew;
1845 pIter->nRowidOffset = nNew;
1848 pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset;
1849 pIter->iLeafOffset = i;
1851 pIter->iRowidOffset = iRowidOffset;
1852 fts5SegIterLoadNPos(p, pIter);
1858 static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){
1859 assert( pIter->flags & FTS5_SEGITER_REVERSE );
1860 assert( pIter->flags & FTS5_SEGITER_ONETERM );
1862 fts5DataRelease(pIter->pLeaf);
1863 pIter->pLeaf = 0;
1864 while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
1865 Fts5Data *pNew;
1866 pIter->iLeafPgno--;
1867 pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
1868 pIter->pSeg->iSegid, pIter->iLeafPgno
1870 if( pNew ){
1871 /* iTermLeafOffset may be equal to szLeaf if the term is the last
1872 ** thing on the page - i.e. the first rowid is on the following page.
1873 ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */
1874 if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
1875 assert( pIter->pLeaf==0 );
1876 if( pIter->iTermLeafOffset<pNew->szLeaf ){
1877 pIter->pLeaf = pNew;
1878 pIter->iLeafOffset = pIter->iTermLeafOffset;
1880 }else{
1881 int iRowidOff;
1882 iRowidOff = fts5LeafFirstRowidOff(pNew);
1883 if( iRowidOff ){
1884 if( iRowidOff>=pNew->szLeaf ){
1885 p->rc = FTS5_CORRUPT;
1886 }else{
1887 pIter->pLeaf = pNew;
1888 pIter->iLeafOffset = iRowidOff;
1893 if( pIter->pLeaf ){
1894 u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset];
1895 pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid);
1896 break;
1897 }else{
1898 fts5DataRelease(pNew);
1903 if( pIter->pLeaf ){
1904 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
1905 fts5SegIterReverseInitPage(p, pIter);
1910 ** Return true if the iterator passed as the second argument currently
1911 ** points to a delete marker. A delete marker is an entry with a 0 byte
1912 ** position-list.
1914 static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){
1915 Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
1916 return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0);
1920 ** Advance iterator pIter to the next entry.
1922 ** This version of fts5SegIterNext() is only used by reverse iterators.
1924 static void fts5SegIterNext_Reverse(
1925 Fts5Index *p, /* FTS5 backend object */
1926 Fts5SegIter *pIter, /* Iterator to advance */
1927 int *pbUnused /* Unused */
1929 assert( pIter->flags & FTS5_SEGITER_REVERSE );
1930 assert( pIter->pNextLeaf==0 );
1931 UNUSED_PARAM(pbUnused);
1933 if( pIter->iRowidOffset>0 ){
1934 u8 *a = pIter->pLeaf->p;
1935 int iOff;
1936 u64 iDelta;
1938 pIter->iRowidOffset--;
1939 pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset];
1940 fts5SegIterLoadNPos(p, pIter);
1941 iOff = pIter->iLeafOffset;
1942 if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){
1943 iOff += pIter->nPos;
1945 fts5GetVarint(&a[iOff], &iDelta);
1946 pIter->iRowid -= iDelta;
1947 }else{
1948 fts5SegIterReverseNewPage(p, pIter);
1953 ** Advance iterator pIter to the next entry.
1955 ** This version of fts5SegIterNext() is only used if detail=none and the
1956 ** iterator is not a reverse direction iterator.
1958 static void fts5SegIterNext_None(
1959 Fts5Index *p, /* FTS5 backend object */
1960 Fts5SegIter *pIter, /* Iterator to advance */
1961 int *pbNewTerm /* OUT: Set for new term */
1963 int iOff;
1965 assert( p->rc==SQLITE_OK );
1966 assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 );
1967 assert( p->pConfig->eDetail==FTS5_DETAIL_NONE );
1969 ASSERT_SZLEAF_OK(pIter->pLeaf);
1970 iOff = pIter->iLeafOffset;
1972 /* Next entry is on the next page */
1973 if( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){
1974 fts5SegIterNextPage(p, pIter);
1975 if( p->rc || pIter->pLeaf==0 ) return;
1976 pIter->iRowid = 0;
1977 iOff = 4;
1980 if( iOff<pIter->iEndofDoclist ){
1981 /* Next entry is on the current page */
1982 i64 iDelta;
1983 iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], (u64*)&iDelta);
1984 pIter->iLeafOffset = iOff;
1985 pIter->iRowid += iDelta;
1986 }else if( (pIter->flags & FTS5_SEGITER_ONETERM)==0 ){
1987 if( pIter->pSeg ){
1988 int nKeep = 0;
1989 if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){
1990 iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep);
1992 pIter->iLeafOffset = iOff;
1993 fts5SegIterLoadTerm(p, pIter, nKeep);
1994 }else{
1995 const u8 *pList = 0;
1996 const char *zTerm = 0;
1997 int nList;
1998 sqlite3Fts5HashScanNext(p->pHash);
1999 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
2000 if( pList==0 ) goto next_none_eof;
2001 pIter->pLeaf->p = (u8*)pList;
2002 pIter->pLeaf->nn = nList;
2003 pIter->pLeaf->szLeaf = nList;
2004 pIter->iEndofDoclist = nList;
2005 sqlite3Fts5BufferSet(&p->rc,&pIter->term, (int)strlen(zTerm), (u8*)zTerm);
2006 pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
2009 if( pbNewTerm ) *pbNewTerm = 1;
2010 }else{
2011 goto next_none_eof;
2014 fts5SegIterLoadNPos(p, pIter);
2016 return;
2017 next_none_eof:
2018 fts5DataRelease(pIter->pLeaf);
2019 pIter->pLeaf = 0;
2024 ** Advance iterator pIter to the next entry.
2026 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It
2027 ** is not considered an error if the iterator reaches EOF. If an error has
2028 ** already occurred when this function is called, it is a no-op.
2030 static void fts5SegIterNext(
2031 Fts5Index *p, /* FTS5 backend object */
2032 Fts5SegIter *pIter, /* Iterator to advance */
2033 int *pbNewTerm /* OUT: Set for new term */
2035 Fts5Data *pLeaf = pIter->pLeaf;
2036 int iOff;
2037 int bNewTerm = 0;
2038 int nKeep = 0;
2039 u8 *a;
2040 int n;
2042 assert( pbNewTerm==0 || *pbNewTerm==0 );
2043 assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
2045 /* Search for the end of the position list within the current page. */
2046 a = pLeaf->p;
2047 n = pLeaf->szLeaf;
2049 ASSERT_SZLEAF_OK(pLeaf);
2050 iOff = pIter->iLeafOffset + pIter->nPos;
2052 if( iOff<n ){
2053 /* The next entry is on the current page. */
2054 assert_nc( iOff<=pIter->iEndofDoclist );
2055 if( iOff>=pIter->iEndofDoclist ){
2056 bNewTerm = 1;
2057 if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
2058 iOff += fts5GetVarint32(&a[iOff], nKeep);
2060 }else{
2061 u64 iDelta;
2062 iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
2063 pIter->iRowid += iDelta;
2064 assert_nc( iDelta>0 );
2066 pIter->iLeafOffset = iOff;
2068 }else if( pIter->pSeg==0 ){
2069 const u8 *pList = 0;
2070 const char *zTerm = 0;
2071 int nList = 0;
2072 assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm );
2073 if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){
2074 sqlite3Fts5HashScanNext(p->pHash);
2075 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
2077 if( pList==0 ){
2078 fts5DataRelease(pIter->pLeaf);
2079 pIter->pLeaf = 0;
2080 }else{
2081 pIter->pLeaf->p = (u8*)pList;
2082 pIter->pLeaf->nn = nList;
2083 pIter->pLeaf->szLeaf = nList;
2084 pIter->iEndofDoclist = nList+1;
2085 sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm),
2086 (u8*)zTerm);
2087 pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
2088 *pbNewTerm = 1;
2090 }else{
2091 iOff = 0;
2092 /* Next entry is not on the current page */
2093 while( iOff==0 ){
2094 fts5SegIterNextPage(p, pIter);
2095 pLeaf = pIter->pLeaf;
2096 if( pLeaf==0 ) break;
2097 ASSERT_SZLEAF_OK(pLeaf);
2098 if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
2099 iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
2100 pIter->iLeafOffset = iOff;
2102 if( pLeaf->nn>pLeaf->szLeaf ){
2103 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
2104 &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
2108 else if( pLeaf->nn>pLeaf->szLeaf ){
2109 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
2110 &pLeaf->p[pLeaf->szLeaf], iOff
2112 pIter->iLeafOffset = iOff;
2113 pIter->iEndofDoclist = iOff;
2114 bNewTerm = 1;
2116 assert_nc( iOff<pLeaf->szLeaf );
2117 if( iOff>pLeaf->szLeaf ){
2118 p->rc = FTS5_CORRUPT;
2119 return;
2124 /* Check if the iterator is now at EOF. If so, return early. */
2125 if( pIter->pLeaf ){
2126 if( bNewTerm ){
2127 if( pIter->flags & FTS5_SEGITER_ONETERM ){
2128 fts5DataRelease(pIter->pLeaf);
2129 pIter->pLeaf = 0;
2130 }else{
2131 fts5SegIterLoadTerm(p, pIter, nKeep);
2132 fts5SegIterLoadNPos(p, pIter);
2133 if( pbNewTerm ) *pbNewTerm = 1;
2135 }else{
2136 /* The following could be done by calling fts5SegIterLoadNPos(). But
2137 ** this block is particularly performance critical, so equivalent
2138 ** code is inlined. */
2139 int nSz;
2140 assert_nc( pIter->iLeafOffset<=pIter->pLeaf->nn );
2141 fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz);
2142 pIter->bDel = (nSz & 0x0001);
2143 pIter->nPos = nSz>>1;
2144 assert_nc( pIter->nPos>=0 );
2149 #define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; }
2151 #define fts5IndexSkipVarint(a, iOff) { \
2152 int iEnd = iOff+9; \
2153 while( (a[iOff++] & 0x80) && iOff<iEnd ); \
2157 ** Iterator pIter currently points to the first rowid in a doclist. This
2158 ** function sets the iterator up so that iterates in reverse order through
2159 ** the doclist.
2161 static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
2162 Fts5DlidxIter *pDlidx = pIter->pDlidx;
2163 Fts5Data *pLast = 0;
2164 int pgnoLast = 0;
2166 if( pDlidx ){
2167 int iSegid = pIter->pSeg->iSegid;
2168 pgnoLast = fts5DlidxIterPgno(pDlidx);
2169 pLast = fts5LeafRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
2170 }else{
2171 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
2173 /* Currently, Fts5SegIter.iLeafOffset points to the first byte of
2174 ** position-list content for the current rowid. Back it up so that it
2175 ** points to the start of the position-list size field. */
2176 int iPoslist;
2177 if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
2178 iPoslist = pIter->iTermLeafOffset;
2179 }else{
2180 iPoslist = 4;
2182 fts5IndexSkipVarint(pLeaf->p, iPoslist);
2183 pIter->iLeafOffset = iPoslist;
2185 /* If this condition is true then the largest rowid for the current
2186 ** term may not be stored on the current page. So search forward to
2187 ** see where said rowid really is. */
2188 if( pIter->iEndofDoclist>=pLeaf->szLeaf ){
2189 int pgno;
2190 Fts5StructureSegment *pSeg = pIter->pSeg;
2192 /* The last rowid in the doclist may not be on the current page. Search
2193 ** forward to find the page containing the last rowid. */
2194 for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){
2195 i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
2196 Fts5Data *pNew = fts5LeafRead(p, iAbs);
2197 if( pNew ){
2198 int iRowid, bTermless;
2199 iRowid = fts5LeafFirstRowidOff(pNew);
2200 bTermless = fts5LeafIsTermless(pNew);
2201 if( iRowid ){
2202 SWAPVAL(Fts5Data*, pNew, pLast);
2203 pgnoLast = pgno;
2205 fts5DataRelease(pNew);
2206 if( bTermless==0 ) break;
2212 /* If pLast is NULL at this point, then the last rowid for this doclist
2213 ** lies on the page currently indicated by the iterator. In this case
2214 ** pIter->iLeafOffset is already set to point to the position-list size
2215 ** field associated with the first relevant rowid on the page.
2217 ** Or, if pLast is non-NULL, then it is the page that contains the last
2218 ** rowid. In this case configure the iterator so that it points to the
2219 ** first rowid on this page.
2221 if( pLast ){
2222 int iOff;
2223 fts5DataRelease(pIter->pLeaf);
2224 pIter->pLeaf = pLast;
2225 pIter->iLeafPgno = pgnoLast;
2226 iOff = fts5LeafFirstRowidOff(pLast);
2227 if( iOff>pLast->szLeaf ){
2228 p->rc = FTS5_CORRUPT;
2229 return;
2231 iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
2232 pIter->iLeafOffset = iOff;
2234 if( fts5LeafIsTermless(pLast) ){
2235 pIter->iEndofDoclist = pLast->nn+1;
2236 }else{
2237 pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast);
2241 fts5SegIterReverseInitPage(p, pIter);
2245 ** Iterator pIter currently points to the first rowid of a doclist.
2246 ** There is a doclist-index associated with the final term on the current
2247 ** page. If the current term is the last term on the page, load the
2248 ** doclist-index from disk and initialize an iterator at (pIter->pDlidx).
2250 static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){
2251 int iSeg = pIter->pSeg->iSegid;
2252 int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
2253 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
2255 assert( pIter->flags & FTS5_SEGITER_ONETERM );
2256 assert( pIter->pDlidx==0 );
2258 /* Check if the current doclist ends on this page. If it does, return
2259 ** early without loading the doclist-index (as it belongs to a different
2260 ** term. */
2261 if( pIter->iTermLeafPgno==pIter->iLeafPgno
2262 && pIter->iEndofDoclist<pLeaf->szLeaf
2264 return;
2267 pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
2271 ** The iterator object passed as the second argument currently contains
2272 ** no valid values except for the Fts5SegIter.pLeaf member variable. This
2273 ** function searches the leaf page for a term matching (pTerm/nTerm).
2275 ** If the specified term is found on the page, then the iterator is left
2276 ** pointing to it. If argument bGe is zero and the term is not found,
2277 ** the iterator is left pointing at EOF.
2279 ** If bGe is non-zero and the specified term is not found, then the
2280 ** iterator is left pointing to the smallest term in the segment that
2281 ** is larger than the specified term, even if this term is not on the
2282 ** current page.
2284 static void fts5LeafSeek(
2285 Fts5Index *p, /* Leave any error code here */
2286 int bGe, /* True for a >= search */
2287 Fts5SegIter *pIter, /* Iterator to seek */
2288 const u8 *pTerm, int nTerm /* Term to search for */
2290 u32 iOff;
2291 const u8 *a = pIter->pLeaf->p;
2292 u32 n = (u32)pIter->pLeaf->nn;
2294 u32 nMatch = 0;
2295 u32 nKeep = 0;
2296 u32 nNew = 0;
2297 u32 iTermOff;
2298 u32 iPgidx; /* Current offset in pgidx */
2299 int bEndOfPage = 0;
2301 assert( p->rc==SQLITE_OK );
2303 iPgidx = (u32)pIter->pLeaf->szLeaf;
2304 iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff);
2305 iOff = iTermOff;
2306 if( iOff>n ){
2307 p->rc = FTS5_CORRUPT;
2308 return;
2311 while( 1 ){
2313 /* Figure out how many new bytes are in this term */
2314 fts5FastGetVarint32(a, iOff, nNew);
2315 if( nKeep<nMatch ){
2316 goto search_failed;
2319 assert( nKeep>=nMatch );
2320 if( nKeep==nMatch ){
2321 u32 nCmp;
2322 u32 i;
2323 nCmp = (u32)MIN(nNew, nTerm-nMatch);
2324 for(i=0; i<nCmp; i++){
2325 if( a[iOff+i]!=pTerm[nMatch+i] ) break;
2327 nMatch += i;
2329 if( (u32)nTerm==nMatch ){
2330 if( i==nNew ){
2331 goto search_success;
2332 }else{
2333 goto search_failed;
2335 }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
2336 goto search_failed;
2340 if( iPgidx>=n ){
2341 bEndOfPage = 1;
2342 break;
2345 iPgidx += fts5GetVarint32(&a[iPgidx], nKeep);
2346 iTermOff += nKeep;
2347 iOff = iTermOff;
2349 if( iOff>=n ){
2350 p->rc = FTS5_CORRUPT;
2351 return;
2354 /* Read the nKeep field of the next term. */
2355 fts5FastGetVarint32(a, iOff, nKeep);
2358 search_failed:
2359 if( bGe==0 ){
2360 fts5DataRelease(pIter->pLeaf);
2361 pIter->pLeaf = 0;
2362 return;
2363 }else if( bEndOfPage ){
2364 do {
2365 fts5SegIterNextPage(p, pIter);
2366 if( pIter->pLeaf==0 ) return;
2367 a = pIter->pLeaf->p;
2368 if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
2369 iPgidx = (u32)pIter->pLeaf->szLeaf;
2370 iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff);
2371 if( iOff<4 || (i64)iOff>=pIter->pLeaf->szLeaf ){
2372 p->rc = FTS5_CORRUPT;
2373 return;
2374 }else{
2375 nKeep = 0;
2376 iTermOff = iOff;
2377 n = (u32)pIter->pLeaf->nn;
2378 iOff += fts5GetVarint32(&a[iOff], nNew);
2379 break;
2382 }while( 1 );
2385 search_success:
2386 if( (i64)iOff+nNew>n || nNew<1 ){
2387 p->rc = FTS5_CORRUPT;
2388 return;
2390 pIter->iLeafOffset = iOff + nNew;
2391 pIter->iTermLeafOffset = pIter->iLeafOffset;
2392 pIter->iTermLeafPgno = pIter->iLeafPgno;
2394 fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
2395 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
2397 if( iPgidx>=n ){
2398 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
2399 }else{
2400 int nExtra;
2401 iPgidx += fts5GetVarint32(&a[iPgidx], nExtra);
2402 pIter->iEndofDoclist = iTermOff + nExtra;
2404 pIter->iPgidxOff = iPgidx;
2406 fts5SegIterLoadRowid(p, pIter);
2407 fts5SegIterLoadNPos(p, pIter);
2410 static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){
2411 if( p->pIdxSelect==0 ){
2412 Fts5Config *pConfig = p->pConfig;
2413 fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf(
2414 "SELECT pgno FROM '%q'.'%q_idx' WHERE "
2415 "segid=? AND term<=? ORDER BY term DESC LIMIT 1",
2416 pConfig->zDb, pConfig->zName
2419 return p->pIdxSelect;
2423 ** Initialize the object pIter to point to term pTerm/nTerm within segment
2424 ** pSeg. If there is no such term in the index, the iterator is set to EOF.
2426 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
2427 ** an error has already occurred when this function is called, it is a no-op.
2429 static void fts5SegIterSeekInit(
2430 Fts5Index *p, /* FTS5 backend */
2431 const u8 *pTerm, int nTerm, /* Term to seek to */
2432 int flags, /* Mask of FTS5INDEX_XXX flags */
2433 Fts5StructureSegment *pSeg, /* Description of segment */
2434 Fts5SegIter *pIter /* Object to populate */
2436 int iPg = 1;
2437 int bGe = (flags & FTS5INDEX_QUERY_SCAN);
2438 int bDlidx = 0; /* True if there is a doclist-index */
2439 sqlite3_stmt *pIdxSelect = 0;
2441 assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 );
2442 assert( pTerm && nTerm );
2443 memset(pIter, 0, sizeof(*pIter));
2444 pIter->pSeg = pSeg;
2446 /* This block sets stack variable iPg to the leaf page number that may
2447 ** contain term (pTerm/nTerm), if it is present in the segment. */
2448 pIdxSelect = fts5IdxSelectStmt(p);
2449 if( p->rc ) return;
2450 sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid);
2451 sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC);
2452 if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){
2453 i64 val = sqlite3_column_int(pIdxSelect, 0);
2454 iPg = (int)(val>>1);
2455 bDlidx = (val & 0x0001);
2457 p->rc = sqlite3_reset(pIdxSelect);
2458 sqlite3_bind_null(pIdxSelect, 2);
2460 if( iPg<pSeg->pgnoFirst ){
2461 iPg = pSeg->pgnoFirst;
2462 bDlidx = 0;
2465 pIter->iLeafPgno = iPg - 1;
2466 fts5SegIterNextPage(p, pIter);
2468 if( pIter->pLeaf ){
2469 fts5LeafSeek(p, bGe, pIter, pTerm, nTerm);
2472 if( p->rc==SQLITE_OK && bGe==0 ){
2473 pIter->flags |= FTS5_SEGITER_ONETERM;
2474 if( pIter->pLeaf ){
2475 if( flags & FTS5INDEX_QUERY_DESC ){
2476 pIter->flags |= FTS5_SEGITER_REVERSE;
2478 if( bDlidx ){
2479 fts5SegIterLoadDlidx(p, pIter);
2481 if( flags & FTS5INDEX_QUERY_DESC ){
2482 fts5SegIterReverse(p, pIter);
2487 fts5SegIterSetNext(p, pIter);
2489 /* Either:
2491 ** 1) an error has occurred, or
2492 ** 2) the iterator points to EOF, or
2493 ** 3) the iterator points to an entry with term (pTerm/nTerm), or
2494 ** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points
2495 ** to an entry with a term greater than or equal to (pTerm/nTerm).
2497 assert_nc( p->rc!=SQLITE_OK /* 1 */
2498 || pIter->pLeaf==0 /* 2 */
2499 || fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0 /* 3 */
2500 || (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0) /* 4 */
2505 ** Initialize the object pIter to point to term pTerm/nTerm within the
2506 ** in-memory hash table. If there is no such term in the hash-table, the
2507 ** iterator is set to EOF.
2509 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
2510 ** an error has already occurred when this function is called, it is a no-op.
2512 static void fts5SegIterHashInit(
2513 Fts5Index *p, /* FTS5 backend */
2514 const u8 *pTerm, int nTerm, /* Term to seek to */
2515 int flags, /* Mask of FTS5INDEX_XXX flags */
2516 Fts5SegIter *pIter /* Object to populate */
2518 int nList = 0;
2519 const u8 *z = 0;
2520 int n = 0;
2521 Fts5Data *pLeaf = 0;
2523 assert( p->pHash );
2524 assert( p->rc==SQLITE_OK );
2526 if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN) ){
2527 const u8 *pList = 0;
2529 p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm);
2530 sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &pList, &nList);
2531 n = (z ? (int)strlen((const char*)z) : 0);
2532 if( pList ){
2533 pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data));
2534 if( pLeaf ){
2535 pLeaf->p = (u8*)pList;
2538 }else{
2539 p->rc = sqlite3Fts5HashQuery(p->pHash, sizeof(Fts5Data),
2540 (const char*)pTerm, nTerm, (void**)&pLeaf, &nList
2542 if( pLeaf ){
2543 pLeaf->p = (u8*)&pLeaf[1];
2545 z = pTerm;
2546 n = nTerm;
2547 pIter->flags |= FTS5_SEGITER_ONETERM;
2550 if( pLeaf ){
2551 sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z);
2552 pLeaf->nn = pLeaf->szLeaf = nList;
2553 pIter->pLeaf = pLeaf;
2554 pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid);
2555 pIter->iEndofDoclist = pLeaf->nn;
2557 if( flags & FTS5INDEX_QUERY_DESC ){
2558 pIter->flags |= FTS5_SEGITER_REVERSE;
2559 fts5SegIterReverseInitPage(p, pIter);
2560 }else{
2561 fts5SegIterLoadNPos(p, pIter);
2565 fts5SegIterSetNext(p, pIter);
2569 ** Zero the iterator passed as the only argument.
2571 static void fts5SegIterClear(Fts5SegIter *pIter){
2572 fts5BufferFree(&pIter->term);
2573 fts5DataRelease(pIter->pLeaf);
2574 fts5DataRelease(pIter->pNextLeaf);
2575 fts5DlidxIterFree(pIter->pDlidx);
2576 sqlite3_free(pIter->aRowidOffset);
2577 memset(pIter, 0, sizeof(Fts5SegIter));
2580 #ifdef SQLITE_DEBUG
2583 ** This function is used as part of the big assert() procedure implemented by
2584 ** fts5AssertMultiIterSetup(). It ensures that the result currently stored
2585 ** in *pRes is the correct result of comparing the current positions of the
2586 ** two iterators.
2588 static void fts5AssertComparisonResult(
2589 Fts5Iter *pIter,
2590 Fts5SegIter *p1,
2591 Fts5SegIter *p2,
2592 Fts5CResult *pRes
2594 int i1 = p1 - pIter->aSeg;
2595 int i2 = p2 - pIter->aSeg;
2597 if( p1->pLeaf || p2->pLeaf ){
2598 if( p1->pLeaf==0 ){
2599 assert( pRes->iFirst==i2 );
2600 }else if( p2->pLeaf==0 ){
2601 assert( pRes->iFirst==i1 );
2602 }else{
2603 int nMin = MIN(p1->term.n, p2->term.n);
2604 int res = fts5Memcmp(p1->term.p, p2->term.p, nMin);
2605 if( res==0 ) res = p1->term.n - p2->term.n;
2607 if( res==0 ){
2608 assert( pRes->bTermEq==1 );
2609 assert( p1->iRowid!=p2->iRowid );
2610 res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1;
2611 }else{
2612 assert( pRes->bTermEq==0 );
2615 if( res<0 ){
2616 assert( pRes->iFirst==i1 );
2617 }else{
2618 assert( pRes->iFirst==i2 );
2625 ** This function is a no-op unless SQLITE_DEBUG is defined when this module
2626 ** is compiled. In that case, this function is essentially an assert()
2627 ** statement used to verify that the contents of the pIter->aFirst[] array
2628 ** are correct.
2630 static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){
2631 if( p->rc==SQLITE_OK ){
2632 Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
2633 int i;
2635 assert( (pFirst->pLeaf==0)==pIter->base.bEof );
2637 /* Check that pIter->iSwitchRowid is set correctly. */
2638 for(i=0; i<pIter->nSeg; i++){
2639 Fts5SegIter *p1 = &pIter->aSeg[i];
2640 assert( p1==pFirst
2641 || p1->pLeaf==0
2642 || fts5BufferCompare(&pFirst->term, &p1->term)
2643 || p1->iRowid==pIter->iSwitchRowid
2644 || (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev
2648 for(i=0; i<pIter->nSeg; i+=2){
2649 Fts5SegIter *p1 = &pIter->aSeg[i];
2650 Fts5SegIter *p2 = &pIter->aSeg[i+1];
2651 Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2];
2652 fts5AssertComparisonResult(pIter, p1, p2, pRes);
2655 for(i=1; i<(pIter->nSeg / 2); i+=2){
2656 Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ];
2657 Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ];
2658 Fts5CResult *pRes = &pIter->aFirst[i];
2659 fts5AssertComparisonResult(pIter, p1, p2, pRes);
2663 #else
2664 # define fts5AssertMultiIterSetup(x,y)
2665 #endif
2668 ** Do the comparison necessary to populate pIter->aFirst[iOut].
2670 ** If the returned value is non-zero, then it is the index of an entry
2671 ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing
2672 ** to a key that is a duplicate of another, higher priority,
2673 ** segment-iterator in the pSeg->aSeg[] array.
2675 static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){
2676 int i1; /* Index of left-hand Fts5SegIter */
2677 int i2; /* Index of right-hand Fts5SegIter */
2678 int iRes;
2679 Fts5SegIter *p1; /* Left-hand Fts5SegIter */
2680 Fts5SegIter *p2; /* Right-hand Fts5SegIter */
2681 Fts5CResult *pRes = &pIter->aFirst[iOut];
2683 assert( iOut<pIter->nSeg && iOut>0 );
2684 assert( pIter->bRev==0 || pIter->bRev==1 );
2686 if( iOut>=(pIter->nSeg/2) ){
2687 i1 = (iOut - pIter->nSeg/2) * 2;
2688 i2 = i1 + 1;
2689 }else{
2690 i1 = pIter->aFirst[iOut*2].iFirst;
2691 i2 = pIter->aFirst[iOut*2+1].iFirst;
2693 p1 = &pIter->aSeg[i1];
2694 p2 = &pIter->aSeg[i2];
2696 pRes->bTermEq = 0;
2697 if( p1->pLeaf==0 ){ /* If p1 is at EOF */
2698 iRes = i2;
2699 }else if( p2->pLeaf==0 ){ /* If p2 is at EOF */
2700 iRes = i1;
2701 }else{
2702 int res = fts5BufferCompare(&p1->term, &p2->term);
2703 if( res==0 ){
2704 assert_nc( i2>i1 );
2705 assert_nc( i2!=0 );
2706 pRes->bTermEq = 1;
2707 if( p1->iRowid==p2->iRowid ){
2708 p1->bDel = p2->bDel;
2709 return i2;
2711 res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1;
2713 assert( res!=0 );
2714 if( res<0 ){
2715 iRes = i1;
2716 }else{
2717 iRes = i2;
2721 pRes->iFirst = (u16)iRes;
2722 return 0;
2726 ** Move the seg-iter so that it points to the first rowid on page iLeafPgno.
2727 ** It is an error if leaf iLeafPgno does not exist or contains no rowids.
2729 static void fts5SegIterGotoPage(
2730 Fts5Index *p, /* FTS5 backend object */
2731 Fts5SegIter *pIter, /* Iterator to advance */
2732 int iLeafPgno
2734 assert( iLeafPgno>pIter->iLeafPgno );
2736 if( iLeafPgno>pIter->pSeg->pgnoLast ){
2737 p->rc = FTS5_CORRUPT;
2738 }else{
2739 fts5DataRelease(pIter->pNextLeaf);
2740 pIter->pNextLeaf = 0;
2741 pIter->iLeafPgno = iLeafPgno-1;
2742 fts5SegIterNextPage(p, pIter);
2743 assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno );
2745 if( p->rc==SQLITE_OK && ALWAYS(pIter->pLeaf!=0) ){
2746 int iOff;
2747 u8 *a = pIter->pLeaf->p;
2748 int n = pIter->pLeaf->szLeaf;
2750 iOff = fts5LeafFirstRowidOff(pIter->pLeaf);
2751 if( iOff<4 || iOff>=n ){
2752 p->rc = FTS5_CORRUPT;
2753 }else{
2754 iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
2755 pIter->iLeafOffset = iOff;
2756 fts5SegIterLoadNPos(p, pIter);
2763 ** Advance the iterator passed as the second argument until it is at or
2764 ** past rowid iFrom. Regardless of the value of iFrom, the iterator is
2765 ** always advanced at least once.
2767 static void fts5SegIterNextFrom(
2768 Fts5Index *p, /* FTS5 backend object */
2769 Fts5SegIter *pIter, /* Iterator to advance */
2770 i64 iMatch /* Advance iterator at least this far */
2772 int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
2773 Fts5DlidxIter *pDlidx = pIter->pDlidx;
2774 int iLeafPgno = pIter->iLeafPgno;
2775 int bMove = 1;
2777 assert( pIter->flags & FTS5_SEGITER_ONETERM );
2778 assert( pIter->pDlidx );
2779 assert( pIter->pLeaf );
2781 if( bRev==0 ){
2782 while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){
2783 iLeafPgno = fts5DlidxIterPgno(pDlidx);
2784 fts5DlidxIterNext(p, pDlidx);
2786 assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc );
2787 if( iLeafPgno>pIter->iLeafPgno ){
2788 fts5SegIterGotoPage(p, pIter, iLeafPgno);
2789 bMove = 0;
2791 }else{
2792 assert( pIter->pNextLeaf==0 );
2793 assert( iMatch<pIter->iRowid );
2794 while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){
2795 fts5DlidxIterPrev(p, pDlidx);
2797 iLeafPgno = fts5DlidxIterPgno(pDlidx);
2799 assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno );
2801 if( iLeafPgno<pIter->iLeafPgno ){
2802 pIter->iLeafPgno = iLeafPgno+1;
2803 fts5SegIterReverseNewPage(p, pIter);
2804 bMove = 0;
2809 if( bMove && p->rc==SQLITE_OK ) pIter->xNext(p, pIter, 0);
2810 if( pIter->pLeaf==0 ) break;
2811 if( bRev==0 && pIter->iRowid>=iMatch ) break;
2812 if( bRev!=0 && pIter->iRowid<=iMatch ) break;
2813 bMove = 1;
2814 }while( p->rc==SQLITE_OK );
2819 ** Free the iterator object passed as the second argument.
2821 static void fts5MultiIterFree(Fts5Iter *pIter){
2822 if( pIter ){
2823 int i;
2824 for(i=0; i<pIter->nSeg; i++){
2825 fts5SegIterClear(&pIter->aSeg[i]);
2827 fts5BufferFree(&pIter->poslist);
2828 sqlite3_free(pIter);
2832 static void fts5MultiIterAdvanced(
2833 Fts5Index *p, /* FTS5 backend to iterate within */
2834 Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
2835 int iChanged, /* Index of sub-iterator just advanced */
2836 int iMinset /* Minimum entry in aFirst[] to set */
2838 int i;
2839 for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){
2840 int iEq;
2841 if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){
2842 Fts5SegIter *pSeg = &pIter->aSeg[iEq];
2843 assert( p->rc==SQLITE_OK );
2844 pSeg->xNext(p, pSeg, 0);
2845 i = pIter->nSeg + iEq;
2851 ** Sub-iterator iChanged of iterator pIter has just been advanced. It still
2852 ** points to the same term though - just a different rowid. This function
2853 ** attempts to update the contents of the pIter->aFirst[] accordingly.
2854 ** If it does so successfully, 0 is returned. Otherwise 1.
2856 ** If non-zero is returned, the caller should call fts5MultiIterAdvanced()
2857 ** on the iterator instead. That function does the same as this one, except
2858 ** that it deals with more complicated cases as well.
2860 static int fts5MultiIterAdvanceRowid(
2861 Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
2862 int iChanged, /* Index of sub-iterator just advanced */
2863 Fts5SegIter **ppFirst
2865 Fts5SegIter *pNew = &pIter->aSeg[iChanged];
2867 if( pNew->iRowid==pIter->iSwitchRowid
2868 || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev
2870 int i;
2871 Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001];
2872 pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64 : LARGEST_INT64;
2873 for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){
2874 Fts5CResult *pRes = &pIter->aFirst[i];
2876 assert( pNew->pLeaf );
2877 assert( pRes->bTermEq==0 || pOther->pLeaf );
2879 if( pRes->bTermEq ){
2880 if( pNew->iRowid==pOther->iRowid ){
2881 return 1;
2882 }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){
2883 pIter->iSwitchRowid = pOther->iRowid;
2884 pNew = pOther;
2885 }else if( (pOther->iRowid>pIter->iSwitchRowid)==pIter->bRev ){
2886 pIter->iSwitchRowid = pOther->iRowid;
2889 pRes->iFirst = (u16)(pNew - pIter->aSeg);
2890 if( i==1 ) break;
2892 pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ];
2896 *ppFirst = pNew;
2897 return 0;
2901 ** Set the pIter->bEof variable based on the state of the sub-iterators.
2903 static void fts5MultiIterSetEof(Fts5Iter *pIter){
2904 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
2905 pIter->base.bEof = pSeg->pLeaf==0;
2906 pIter->iSwitchRowid = pSeg->iRowid;
2910 ** Move the iterator to the next entry.
2912 ** If an error occurs, an error code is left in Fts5Index.rc. It is not
2913 ** considered an error if the iterator reaches EOF, or if it is already at
2914 ** EOF when this function is called.
2916 static void fts5MultiIterNext(
2917 Fts5Index *p,
2918 Fts5Iter *pIter,
2919 int bFrom, /* True if argument iFrom is valid */
2920 i64 iFrom /* Advance at least as far as this */
2922 int bUseFrom = bFrom;
2923 assert( pIter->base.bEof==0 );
2924 while( p->rc==SQLITE_OK ){
2925 int iFirst = pIter->aFirst[1].iFirst;
2926 int bNewTerm = 0;
2927 Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
2928 assert( p->rc==SQLITE_OK );
2929 if( bUseFrom && pSeg->pDlidx ){
2930 fts5SegIterNextFrom(p, pSeg, iFrom);
2931 }else{
2932 pSeg->xNext(p, pSeg, &bNewTerm);
2935 if( pSeg->pLeaf==0 || bNewTerm
2936 || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
2938 fts5MultiIterAdvanced(p, pIter, iFirst, 1);
2939 fts5MultiIterSetEof(pIter);
2940 pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
2941 if( pSeg->pLeaf==0 ) return;
2944 fts5AssertMultiIterSetup(p, pIter);
2945 assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf );
2946 if( pIter->bSkipEmpty==0 || pSeg->nPos ){
2947 pIter->xSetOutputs(pIter, pSeg);
2948 return;
2950 bUseFrom = 0;
2954 static void fts5MultiIterNext2(
2955 Fts5Index *p,
2956 Fts5Iter *pIter,
2957 int *pbNewTerm /* OUT: True if *might* be new term */
2959 assert( pIter->bSkipEmpty );
2960 if( p->rc==SQLITE_OK ){
2961 *pbNewTerm = 0;
2963 int iFirst = pIter->aFirst[1].iFirst;
2964 Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
2965 int bNewTerm = 0;
2967 assert( p->rc==SQLITE_OK );
2968 pSeg->xNext(p, pSeg, &bNewTerm);
2969 if( pSeg->pLeaf==0 || bNewTerm
2970 || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
2972 fts5MultiIterAdvanced(p, pIter, iFirst, 1);
2973 fts5MultiIterSetEof(pIter);
2974 *pbNewTerm = 1;
2976 fts5AssertMultiIterSetup(p, pIter);
2978 }while( fts5MultiIterIsEmpty(p, pIter) );
2982 static void fts5IterSetOutputs_Noop(Fts5Iter *pUnused1, Fts5SegIter *pUnused2){
2983 UNUSED_PARAM2(pUnused1, pUnused2);
2986 static Fts5Iter *fts5MultiIterAlloc(
2987 Fts5Index *p, /* FTS5 backend to iterate within */
2988 int nSeg
2990 Fts5Iter *pNew;
2991 int nSlot; /* Power of two >= nSeg */
2993 for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2);
2994 pNew = fts5IdxMalloc(p,
2995 sizeof(Fts5Iter) + /* pNew */
2996 sizeof(Fts5SegIter) * (nSlot-1) + /* pNew->aSeg[] */
2997 sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */
2999 if( pNew ){
3000 pNew->nSeg = nSlot;
3001 pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot];
3002 pNew->pIndex = p;
3003 pNew->xSetOutputs = fts5IterSetOutputs_Noop;
3005 return pNew;
3008 static void fts5PoslistCallback(
3009 Fts5Index *pUnused,
3010 void *pContext,
3011 const u8 *pChunk, int nChunk
3013 UNUSED_PARAM(pUnused);
3014 assert_nc( nChunk>=0 );
3015 if( nChunk>0 ){
3016 fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk);
3020 typedef struct PoslistCallbackCtx PoslistCallbackCtx;
3021 struct PoslistCallbackCtx {
3022 Fts5Buffer *pBuf; /* Append to this buffer */
3023 Fts5Colset *pColset; /* Restrict matches to this column */
3024 int eState; /* See above */
3027 typedef struct PoslistOffsetsCtx PoslistOffsetsCtx;
3028 struct PoslistOffsetsCtx {
3029 Fts5Buffer *pBuf; /* Append to this buffer */
3030 Fts5Colset *pColset; /* Restrict matches to this column */
3031 int iRead;
3032 int iWrite;
3036 ** TODO: Make this more efficient!
3038 static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){
3039 int i;
3040 for(i=0; i<pColset->nCol; i++){
3041 if( pColset->aiCol[i]==iCol ) return 1;
3043 return 0;
3046 static void fts5PoslistOffsetsCallback(
3047 Fts5Index *pUnused,
3048 void *pContext,
3049 const u8 *pChunk, int nChunk
3051 PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext;
3052 UNUSED_PARAM(pUnused);
3053 assert_nc( nChunk>=0 );
3054 if( nChunk>0 ){
3055 int i = 0;
3056 while( i<nChunk ){
3057 int iVal;
3058 i += fts5GetVarint32(&pChunk[i], iVal);
3059 iVal += pCtx->iRead - 2;
3060 pCtx->iRead = iVal;
3061 if( fts5IndexColsetTest(pCtx->pColset, iVal) ){
3062 fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite);
3063 pCtx->iWrite = iVal;
3069 static void fts5PoslistFilterCallback(
3070 Fts5Index *pUnused,
3071 void *pContext,
3072 const u8 *pChunk, int nChunk
3074 PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext;
3075 UNUSED_PARAM(pUnused);
3076 assert_nc( nChunk>=0 );
3077 if( nChunk>0 ){
3078 /* Search through to find the first varint with value 1. This is the
3079 ** start of the next columns hits. */
3080 int i = 0;
3081 int iStart = 0;
3083 if( pCtx->eState==2 ){
3084 int iCol;
3085 fts5FastGetVarint32(pChunk, i, iCol);
3086 if( fts5IndexColsetTest(pCtx->pColset, iCol) ){
3087 pCtx->eState = 1;
3088 fts5BufferSafeAppendVarint(pCtx->pBuf, 1);
3089 }else{
3090 pCtx->eState = 0;
3094 do {
3095 while( i<nChunk && pChunk[i]!=0x01 ){
3096 while( pChunk[i] & 0x80 ) i++;
3097 i++;
3099 if( pCtx->eState ){
3100 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
3102 if( i<nChunk ){
3103 int iCol;
3104 iStart = i;
3105 i++;
3106 if( i>=nChunk ){
3107 pCtx->eState = 2;
3108 }else{
3109 fts5FastGetVarint32(pChunk, i, iCol);
3110 pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
3111 if( pCtx->eState ){
3112 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
3113 iStart = i;
3117 }while( i<nChunk );
3121 static void fts5ChunkIterate(
3122 Fts5Index *p, /* Index object */
3123 Fts5SegIter *pSeg, /* Poslist of this iterator */
3124 void *pCtx, /* Context pointer for xChunk callback */
3125 void (*xChunk)(Fts5Index*, void*, const u8*, int)
3127 int nRem = pSeg->nPos; /* Number of bytes still to come */
3128 Fts5Data *pData = 0;
3129 u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3130 int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
3131 int pgno = pSeg->iLeafPgno;
3132 int pgnoSave = 0;
3134 /* This function does not work with detail=none databases. */
3135 assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
3137 if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
3138 pgnoSave = pgno+1;
3141 while( 1 ){
3142 xChunk(p, pCtx, pChunk, nChunk);
3143 nRem -= nChunk;
3144 fts5DataRelease(pData);
3145 if( nRem<=0 ){
3146 break;
3147 }else if( pSeg->pSeg==0 ){
3148 p->rc = FTS5_CORRUPT;
3149 return;
3150 }else{
3151 pgno++;
3152 pData = fts5LeafRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno));
3153 if( pData==0 ) break;
3154 pChunk = &pData->p[4];
3155 nChunk = MIN(nRem, pData->szLeaf - 4);
3156 if( pgno==pgnoSave ){
3157 assert( pSeg->pNextLeaf==0 );
3158 pSeg->pNextLeaf = pData;
3159 pData = 0;
3166 ** Iterator pIter currently points to a valid entry (not EOF). This
3167 ** function appends the position list data for the current entry to
3168 ** buffer pBuf. It does not make a copy of the position-list size
3169 ** field.
3171 static void fts5SegiterPoslist(
3172 Fts5Index *p,
3173 Fts5SegIter *pSeg,
3174 Fts5Colset *pColset,
3175 Fts5Buffer *pBuf
3177 assert( pBuf!=0 );
3178 assert( pSeg!=0 );
3179 if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos+FTS5_DATA_ZERO_PADDING) ){
3180 assert( pBuf->p!=0 );
3181 assert( pBuf->nSpace >= pBuf->n+pSeg->nPos+FTS5_DATA_ZERO_PADDING );
3182 memset(&pBuf->p[pBuf->n+pSeg->nPos], 0, FTS5_DATA_ZERO_PADDING);
3183 if( pColset==0 ){
3184 fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
3185 }else{
3186 if( p->pConfig->eDetail==FTS5_DETAIL_FULL ){
3187 PoslistCallbackCtx sCtx;
3188 sCtx.pBuf = pBuf;
3189 sCtx.pColset = pColset;
3190 sCtx.eState = fts5IndexColsetTest(pColset, 0);
3191 assert( sCtx.eState==0 || sCtx.eState==1 );
3192 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
3193 }else{
3194 PoslistOffsetsCtx sCtx;
3195 memset(&sCtx, 0, sizeof(sCtx));
3196 sCtx.pBuf = pBuf;
3197 sCtx.pColset = pColset;
3198 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback);
3205 ** Parameter pPos points to a buffer containing a position list, size nPos.
3206 ** This function filters it according to pColset (which must be non-NULL)
3207 ** and sets pIter->base.pData/nData to point to the new position list.
3208 ** If memory is required for the new position list, use buffer pIter->poslist.
3209 ** Or, if the new position list is a contiguous subset of the input, set
3210 ** pIter->base.pData/nData to point directly to it.
3212 ** This function is a no-op if *pRc is other than SQLITE_OK when it is
3213 ** called. If an OOM error is encountered, *pRc is set to SQLITE_NOMEM
3214 ** before returning.
3216 static void fts5IndexExtractColset(
3217 int *pRc,
3218 Fts5Colset *pColset, /* Colset to filter on */
3219 const u8 *pPos, int nPos, /* Position list */
3220 Fts5Iter *pIter
3222 if( *pRc==SQLITE_OK ){
3223 const u8 *p = pPos;
3224 const u8 *aCopy = p;
3225 const u8 *pEnd = &p[nPos]; /* One byte past end of position list */
3226 int i = 0;
3227 int iCurrent = 0;
3229 if( pColset->nCol>1 && sqlite3Fts5BufferSize(pRc, &pIter->poslist, nPos) ){
3230 return;
3233 while( 1 ){
3234 while( pColset->aiCol[i]<iCurrent ){
3235 i++;
3236 if( i==pColset->nCol ){
3237 pIter->base.pData = pIter->poslist.p;
3238 pIter->base.nData = pIter->poslist.n;
3239 return;
3243 /* Advance pointer p until it points to pEnd or an 0x01 byte that is
3244 ** not part of a varint */
3245 while( p<pEnd && *p!=0x01 ){
3246 while( *p++ & 0x80 );
3249 if( pColset->aiCol[i]==iCurrent ){
3250 if( pColset->nCol==1 ){
3251 pIter->base.pData = aCopy;
3252 pIter->base.nData = p-aCopy;
3253 return;
3255 fts5BufferSafeAppendBlob(&pIter->poslist, aCopy, p-aCopy);
3257 if( p>=pEnd ){
3258 pIter->base.pData = pIter->poslist.p;
3259 pIter->base.nData = pIter->poslist.n;
3260 return;
3262 aCopy = p++;
3263 iCurrent = *p++;
3264 if( iCurrent & 0x80 ){
3265 p--;
3266 p += fts5GetVarint32(p, iCurrent);
3274 ** xSetOutputs callback used by detail=none tables.
3276 static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){
3277 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE );
3278 pIter->base.iRowid = pSeg->iRowid;
3279 pIter->base.nData = pSeg->nPos;
3283 ** xSetOutputs callback used by detail=full and detail=col tables when no
3284 ** column filters are specified.
3286 static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){
3287 pIter->base.iRowid = pSeg->iRowid;
3288 pIter->base.nData = pSeg->nPos;
3290 assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE );
3291 assert( pIter->pColset==0 );
3293 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
3294 /* All data is stored on the current page. Populate the output
3295 ** variables to point into the body of the page object. */
3296 pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3297 }else{
3298 /* The data is distributed over two or more pages. Copy it into the
3299 ** Fts5Iter.poslist buffer and then set the output pointer to point
3300 ** to this buffer. */
3301 fts5BufferZero(&pIter->poslist);
3302 fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist);
3303 pIter->base.pData = pIter->poslist.p;
3308 ** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match
3309 ** against no columns at all).
3311 static void fts5IterSetOutputs_ZeroColset(Fts5Iter *pIter, Fts5SegIter *pSeg){
3312 UNUSED_PARAM(pSeg);
3313 pIter->base.nData = 0;
3317 ** xSetOutputs callback used by detail=col when there is a column filter
3318 ** and there are 100 or more columns. Also called as a fallback from
3319 ** fts5IterSetOutputs_Col100 if the column-list spans more than one page.
3321 static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){
3322 fts5BufferZero(&pIter->poslist);
3323 fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist);
3324 pIter->base.iRowid = pSeg->iRowid;
3325 pIter->base.pData = pIter->poslist.p;
3326 pIter->base.nData = pIter->poslist.n;
3330 ** xSetOutputs callback used when:
3332 ** * detail=col,
3333 ** * there is a column filter, and
3334 ** * the table contains 100 or fewer columns.
3336 ** The last point is to ensure all column numbers are stored as
3337 ** single-byte varints.
3339 static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){
3341 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS );
3342 assert( pIter->pColset );
3344 if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){
3345 fts5IterSetOutputs_Col(pIter, pSeg);
3346 }else{
3347 u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset];
3348 u8 *pEnd = (u8*)&a[pSeg->nPos];
3349 int iPrev = 0;
3350 int *aiCol = pIter->pColset->aiCol;
3351 int *aiColEnd = &aiCol[pIter->pColset->nCol];
3353 u8 *aOut = pIter->poslist.p;
3354 int iPrevOut = 0;
3356 pIter->base.iRowid = pSeg->iRowid;
3358 while( a<pEnd ){
3359 iPrev += (int)a++[0] - 2;
3360 while( *aiCol<iPrev ){
3361 aiCol++;
3362 if( aiCol==aiColEnd ) goto setoutputs_col_out;
3364 if( *aiCol==iPrev ){
3365 *aOut++ = (u8)((iPrev - iPrevOut) + 2);
3366 iPrevOut = iPrev;
3370 setoutputs_col_out:
3371 pIter->base.pData = pIter->poslist.p;
3372 pIter->base.nData = aOut - pIter->poslist.p;
3377 ** xSetOutputs callback used by detail=full when there is a column filter.
3379 static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){
3380 Fts5Colset *pColset = pIter->pColset;
3381 pIter->base.iRowid = pSeg->iRowid;
3383 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL );
3384 assert( pColset );
3386 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
3387 /* All data is stored on the current page. Populate the output
3388 ** variables to point into the body of the page object. */
3389 const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3390 int *pRc = &pIter->pIndex->rc;
3391 fts5BufferZero(&pIter->poslist);
3392 fts5IndexExtractColset(pRc, pColset, a, pSeg->nPos, pIter);
3393 }else{
3394 /* The data is distributed over two or more pages. Copy it into the
3395 ** Fts5Iter.poslist buffer and then set the output pointer to point
3396 ** to this buffer. */
3397 fts5BufferZero(&pIter->poslist);
3398 fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
3399 pIter->base.pData = pIter->poslist.p;
3400 pIter->base.nData = pIter->poslist.n;
3404 static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){
3405 assert( pIter!=0 || (*pRc)!=SQLITE_OK );
3406 if( *pRc==SQLITE_OK ){
3407 Fts5Config *pConfig = pIter->pIndex->pConfig;
3408 if( pConfig->eDetail==FTS5_DETAIL_NONE ){
3409 pIter->xSetOutputs = fts5IterSetOutputs_None;
3412 else if( pIter->pColset==0 ){
3413 pIter->xSetOutputs = fts5IterSetOutputs_Nocolset;
3416 else if( pIter->pColset->nCol==0 ){
3417 pIter->xSetOutputs = fts5IterSetOutputs_ZeroColset;
3420 else if( pConfig->eDetail==FTS5_DETAIL_FULL ){
3421 pIter->xSetOutputs = fts5IterSetOutputs_Full;
3424 else{
3425 assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS );
3426 if( pConfig->nCol<=100 ){
3427 pIter->xSetOutputs = fts5IterSetOutputs_Col100;
3428 sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol);
3429 }else{
3430 pIter->xSetOutputs = fts5IterSetOutputs_Col;
3438 ** Allocate a new Fts5Iter object.
3440 ** The new object will be used to iterate through data in structure pStruct.
3441 ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel
3442 ** is zero or greater, data from the first nSegment segments on level iLevel
3443 ** is merged.
3445 ** The iterator initially points to the first term/rowid entry in the
3446 ** iterated data.
3448 static void fts5MultiIterNew(
3449 Fts5Index *p, /* FTS5 backend to iterate within */
3450 Fts5Structure *pStruct, /* Structure of specific index */
3451 int flags, /* FTS5INDEX_QUERY_XXX flags */
3452 Fts5Colset *pColset, /* Colset to filter on (or NULL) */
3453 const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */
3454 int iLevel, /* Level to iterate (-1 for all) */
3455 int nSegment, /* Number of segments to merge (iLevel>=0) */
3456 Fts5Iter **ppOut /* New object */
3458 int nSeg = 0; /* Number of segment-iters in use */
3459 int iIter = 0; /* */
3460 int iSeg; /* Used to iterate through segments */
3461 Fts5StructureLevel *pLvl;
3462 Fts5Iter *pNew;
3464 assert( (pTerm==0 && nTerm==0) || iLevel<0 );
3466 /* Allocate space for the new multi-seg-iterator. */
3467 if( p->rc==SQLITE_OK ){
3468 if( iLevel<0 ){
3469 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
3470 nSeg = pStruct->nSegment;
3471 nSeg += (p->pHash ? 1 : 0);
3472 }else{
3473 nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment);
3476 *ppOut = pNew = fts5MultiIterAlloc(p, nSeg);
3477 if( pNew==0 ){
3478 assert( p->rc!=SQLITE_OK );
3479 goto fts5MultiIterNew_post_check;
3481 pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC));
3482 pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY));
3483 pNew->pColset = pColset;
3484 if( (flags & FTS5INDEX_QUERY_NOOUTPUT)==0 ){
3485 fts5IterSetOutputCb(&p->rc, pNew);
3488 /* Initialize each of the component segment iterators. */
3489 if( p->rc==SQLITE_OK ){
3490 if( iLevel<0 ){
3491 Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel];
3492 if( p->pHash ){
3493 /* Add a segment iterator for the current contents of the hash table. */
3494 Fts5SegIter *pIter = &pNew->aSeg[iIter++];
3495 fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter);
3497 for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){
3498 for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){
3499 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
3500 Fts5SegIter *pIter = &pNew->aSeg[iIter++];
3501 if( pTerm==0 ){
3502 fts5SegIterInit(p, pSeg, pIter);
3503 }else{
3504 fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter);
3508 }else{
3509 pLvl = &pStruct->aLevel[iLevel];
3510 for(iSeg=nSeg-1; iSeg>=0; iSeg--){
3511 fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]);
3514 assert( iIter==nSeg );
3517 /* If the above was successful, each component iterators now points
3518 ** to the first entry in its segment. In this case initialize the
3519 ** aFirst[] array. Or, if an error has occurred, free the iterator
3520 ** object and set the output variable to NULL. */
3521 if( p->rc==SQLITE_OK ){
3522 for(iIter=pNew->nSeg-1; iIter>0; iIter--){
3523 int iEq;
3524 if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){
3525 Fts5SegIter *pSeg = &pNew->aSeg[iEq];
3526 if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0);
3527 fts5MultiIterAdvanced(p, pNew, iEq, iIter);
3530 fts5MultiIterSetEof(pNew);
3531 fts5AssertMultiIterSetup(p, pNew);
3533 if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){
3534 fts5MultiIterNext(p, pNew, 0, 0);
3535 }else if( pNew->base.bEof==0 ){
3536 Fts5SegIter *pSeg = &pNew->aSeg[pNew->aFirst[1].iFirst];
3537 pNew->xSetOutputs(pNew, pSeg);
3540 }else{
3541 fts5MultiIterFree(pNew);
3542 *ppOut = 0;
3545 fts5MultiIterNew_post_check:
3546 assert( (*ppOut)!=0 || p->rc!=SQLITE_OK );
3547 return;
3551 ** Create an Fts5Iter that iterates through the doclist provided
3552 ** as the second argument.
3554 static void fts5MultiIterNew2(
3555 Fts5Index *p, /* FTS5 backend to iterate within */
3556 Fts5Data *pData, /* Doclist to iterate through */
3557 int bDesc, /* True for descending rowid order */
3558 Fts5Iter **ppOut /* New object */
3560 Fts5Iter *pNew;
3561 pNew = fts5MultiIterAlloc(p, 2);
3562 if( pNew ){
3563 Fts5SegIter *pIter = &pNew->aSeg[1];
3565 pIter->flags = FTS5_SEGITER_ONETERM;
3566 if( pData->szLeaf>0 ){
3567 pIter->pLeaf = pData;
3568 pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
3569 pIter->iEndofDoclist = pData->nn;
3570 pNew->aFirst[1].iFirst = 1;
3571 if( bDesc ){
3572 pNew->bRev = 1;
3573 pIter->flags |= FTS5_SEGITER_REVERSE;
3574 fts5SegIterReverseInitPage(p, pIter);
3575 }else{
3576 fts5SegIterLoadNPos(p, pIter);
3578 pData = 0;
3579 }else{
3580 pNew->base.bEof = 1;
3582 fts5SegIterSetNext(p, pIter);
3584 *ppOut = pNew;
3587 fts5DataRelease(pData);
3591 ** Return true if the iterator is at EOF or if an error has occurred.
3592 ** False otherwise.
3594 static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){
3595 assert( pIter!=0 || p->rc!=SQLITE_OK );
3596 assert( p->rc!=SQLITE_OK
3597 || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof
3599 return (p->rc || pIter->base.bEof);
3603 ** Return the rowid of the entry that the iterator currently points
3604 ** to. If the iterator points to EOF when this function is called the
3605 ** results are undefined.
3607 static i64 fts5MultiIterRowid(Fts5Iter *pIter){
3608 assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf );
3609 return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid;
3613 ** Move the iterator to the next entry at or following iMatch.
3615 static void fts5MultiIterNextFrom(
3616 Fts5Index *p,
3617 Fts5Iter *pIter,
3618 i64 iMatch
3620 while( 1 ){
3621 i64 iRowid;
3622 fts5MultiIterNext(p, pIter, 1, iMatch);
3623 if( fts5MultiIterEof(p, pIter) ) break;
3624 iRowid = fts5MultiIterRowid(pIter);
3625 if( pIter->bRev==0 && iRowid>=iMatch ) break;
3626 if( pIter->bRev!=0 && iRowid<=iMatch ) break;
3631 ** Return a pointer to a buffer containing the term associated with the
3632 ** entry that the iterator currently points to.
3634 static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){
3635 Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
3636 *pn = p->term.n;
3637 return p->term.p;
3641 ** Allocate a new segment-id for the structure pStruct. The new segment
3642 ** id must be between 1 and 65335 inclusive, and must not be used by
3643 ** any currently existing segment. If a free segment id cannot be found,
3644 ** SQLITE_FULL is returned.
3646 ** If an error has already occurred, this function is a no-op. 0 is
3647 ** returned in this case.
3649 static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){
3650 int iSegid = 0;
3652 if( p->rc==SQLITE_OK ){
3653 if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){
3654 p->rc = SQLITE_FULL;
3655 }else{
3656 /* FTS5_MAX_SEGMENT is currently defined as 2000. So the following
3657 ** array is 63 elements, or 252 bytes, in size. */
3658 u32 aUsed[(FTS5_MAX_SEGMENT+31) / 32];
3659 int iLvl, iSeg;
3660 int i;
3661 u32 mask;
3662 memset(aUsed, 0, sizeof(aUsed));
3663 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
3664 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
3665 int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid;
3666 if( iId<=FTS5_MAX_SEGMENT && iId>0 ){
3667 aUsed[(iId-1) / 32] |= (u32)1 << ((iId-1) % 32);
3672 for(i=0; aUsed[i]==0xFFFFFFFF; i++);
3673 mask = aUsed[i];
3674 for(iSegid=0; mask & ((u32)1 << iSegid); iSegid++);
3675 iSegid += 1 + i*32;
3677 #ifdef SQLITE_DEBUG
3678 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
3679 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
3680 assert_nc( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid );
3683 assert_nc( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT );
3686 sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p);
3687 if( p->rc==SQLITE_OK ){
3688 u8 aBlob[2] = {0xff, 0xff};
3689 sqlite3_bind_int(pIdxSelect, 1, iSegid);
3690 sqlite3_bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC);
3691 assert_nc( sqlite3_step(pIdxSelect)!=SQLITE_ROW );
3692 p->rc = sqlite3_reset(pIdxSelect);
3693 sqlite3_bind_null(pIdxSelect, 2);
3696 #endif
3700 return iSegid;
3704 ** Discard all data currently cached in the hash-tables.
3706 static void fts5IndexDiscardData(Fts5Index *p){
3707 assert( p->pHash || p->nPendingData==0 );
3708 if( p->pHash ){
3709 sqlite3Fts5HashClear(p->pHash);
3710 p->nPendingData = 0;
3715 ** Return the size of the prefix, in bytes, that buffer
3716 ** (pNew/<length-unknown>) shares with buffer (pOld/nOld).
3718 ** Buffer (pNew/<length-unknown>) is guaranteed to be greater
3719 ** than buffer (pOld/nOld).
3721 static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){
3722 int i;
3723 for(i=0; i<nOld; i++){
3724 if( pOld[i]!=pNew[i] ) break;
3726 return i;
3729 static void fts5WriteDlidxClear(
3730 Fts5Index *p,
3731 Fts5SegWriter *pWriter,
3732 int bFlush /* If true, write dlidx to disk */
3734 int i;
3735 assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) );
3736 for(i=0; i<pWriter->nDlidx; i++){
3737 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
3738 if( pDlidx->buf.n==0 ) break;
3739 if( bFlush ){
3740 assert( pDlidx->pgno!=0 );
3741 fts5DataWrite(p,
3742 FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
3743 pDlidx->buf.p, pDlidx->buf.n
3746 sqlite3Fts5BufferZero(&pDlidx->buf);
3747 pDlidx->bPrevValid = 0;
3752 ** Grow the pWriter->aDlidx[] array to at least nLvl elements in size.
3753 ** Any new array elements are zeroed before returning.
3755 static int fts5WriteDlidxGrow(
3756 Fts5Index *p,
3757 Fts5SegWriter *pWriter,
3758 int nLvl
3760 if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){
3761 Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc64(
3762 pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl
3764 if( aDlidx==0 ){
3765 p->rc = SQLITE_NOMEM;
3766 }else{
3767 size_t nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx);
3768 memset(&aDlidx[pWriter->nDlidx], 0, nByte);
3769 pWriter->aDlidx = aDlidx;
3770 pWriter->nDlidx = nLvl;
3773 return p->rc;
3777 ** If the current doclist-index accumulating in pWriter->aDlidx[] is large
3778 ** enough, flush it to disk and return 1. Otherwise discard it and return
3779 ** zero.
3781 static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){
3782 int bFlag = 0;
3784 /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written
3785 ** to the database, also write the doclist-index to disk. */
3786 if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
3787 bFlag = 1;
3789 fts5WriteDlidxClear(p, pWriter, bFlag);
3790 pWriter->nEmpty = 0;
3791 return bFlag;
3795 ** This function is called whenever processing of the doclist for the
3796 ** last term on leaf page (pWriter->iBtPage) is completed.
3798 ** The doclist-index for that term is currently stored in-memory within the
3799 ** Fts5SegWriter.aDlidx[] array. If it is large enough, this function
3800 ** writes it out to disk. Or, if it is too small to bother with, discards
3801 ** it.
3803 ** Fts5SegWriter.btterm currently contains the first term on page iBtPage.
3805 static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){
3806 int bFlag;
3808 assert( pWriter->iBtPage || pWriter->nEmpty==0 );
3809 if( pWriter->iBtPage==0 ) return;
3810 bFlag = fts5WriteFlushDlidx(p, pWriter);
3812 if( p->rc==SQLITE_OK ){
3813 const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:"");
3814 /* The following was already done in fts5WriteInit(): */
3815 /* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */
3816 sqlite3_bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC);
3817 sqlite3_bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1));
3818 sqlite3_step(p->pIdxWriter);
3819 p->rc = sqlite3_reset(p->pIdxWriter);
3820 sqlite3_bind_null(p->pIdxWriter, 2);
3822 pWriter->iBtPage = 0;
3826 ** This is called once for each leaf page except the first that contains
3827 ** at least one term. Argument (nTerm/pTerm) is the split-key - a term that
3828 ** is larger than all terms written to earlier leaves, and equal to or
3829 ** smaller than the first term on the new leaf.
3831 ** If an error occurs, an error code is left in Fts5Index.rc. If an error
3832 ** has already occurred when this function is called, it is a no-op.
3834 static void fts5WriteBtreeTerm(
3835 Fts5Index *p, /* FTS5 backend object */
3836 Fts5SegWriter *pWriter, /* Writer object */
3837 int nTerm, const u8 *pTerm /* First term on new page */
3839 fts5WriteFlushBtree(p, pWriter);
3840 if( p->rc==SQLITE_OK ){
3841 fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm);
3842 pWriter->iBtPage = pWriter->writer.pgno;
3847 ** This function is called when flushing a leaf page that contains no
3848 ** terms at all to disk.
3850 static void fts5WriteBtreeNoTerm(
3851 Fts5Index *p, /* FTS5 backend object */
3852 Fts5SegWriter *pWriter /* Writer object */
3854 /* If there were no rowids on the leaf page either and the doclist-index
3855 ** has already been started, append an 0x00 byte to it. */
3856 if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){
3857 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0];
3858 assert( pDlidx->bPrevValid );
3859 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0);
3862 /* Increment the "number of sequential leaves without a term" counter. */
3863 pWriter->nEmpty++;
3866 static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){
3867 i64 iRowid;
3868 int iOff;
3870 iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid);
3871 fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid);
3872 return iRowid;
3876 ** Rowid iRowid has just been appended to the current leaf page. It is the
3877 ** first on the page. This function appends an appropriate entry to the current
3878 ** doclist-index.
3880 static void fts5WriteDlidxAppend(
3881 Fts5Index *p,
3882 Fts5SegWriter *pWriter,
3883 i64 iRowid
3885 int i;
3886 int bDone = 0;
3888 for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
3889 i64 iVal;
3890 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
3892 if( pDlidx->buf.n>=p->pConfig->pgsz ){
3893 /* The current doclist-index page is full. Write it to disk and push
3894 ** a copy of iRowid (which will become the first rowid on the next
3895 ** doclist-index leaf page) up into the next level of the b-tree
3896 ** hierarchy. If the node being flushed is currently the root node,
3897 ** also push its first rowid upwards. */
3898 pDlidx->buf.p[0] = 0x01; /* Not the root node */
3899 fts5DataWrite(p,
3900 FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
3901 pDlidx->buf.p, pDlidx->buf.n
3903 fts5WriteDlidxGrow(p, pWriter, i+2);
3904 pDlidx = &pWriter->aDlidx[i];
3905 if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){
3906 i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf);
3908 /* This was the root node. Push its first rowid up to the new root. */
3909 pDlidx[1].pgno = pDlidx->pgno;
3910 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0);
3911 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno);
3912 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst);
3913 pDlidx[1].bPrevValid = 1;
3914 pDlidx[1].iPrev = iFirst;
3917 sqlite3Fts5BufferZero(&pDlidx->buf);
3918 pDlidx->bPrevValid = 0;
3919 pDlidx->pgno++;
3920 }else{
3921 bDone = 1;
3924 if( pDlidx->bPrevValid ){
3925 iVal = iRowid - pDlidx->iPrev;
3926 }else{
3927 i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno);
3928 assert( pDlidx->buf.n==0 );
3929 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone);
3930 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno);
3931 iVal = iRowid;
3934 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal);
3935 pDlidx->bPrevValid = 1;
3936 pDlidx->iPrev = iRowid;
3940 static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
3941 static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
3942 Fts5PageWriter *pPage = &pWriter->writer;
3943 i64 iRowid;
3945 assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) );
3947 /* Set the szLeaf header field. */
3948 assert( 0==fts5GetU16(&pPage->buf.p[2]) );
3949 fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n);
3951 if( pWriter->bFirstTermInPage ){
3952 /* No term was written to this page. */
3953 assert( pPage->pgidx.n==0 );
3954 fts5WriteBtreeNoTerm(p, pWriter);
3955 }else{
3956 /* Append the pgidx to the page buffer. Set the szLeaf header field. */
3957 fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p);
3960 /* Write the page out to disk */
3961 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno);
3962 fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);
3964 /* Initialize the next page. */
3965 fts5BufferZero(&pPage->buf);
3966 fts5BufferZero(&pPage->pgidx);
3967 fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
3968 pPage->iPrevPgidx = 0;
3969 pPage->pgno++;
3971 /* Increase the leaves written counter */
3972 pWriter->nLeafWritten++;
3974 /* The new leaf holds no terms or rowids */
3975 pWriter->bFirstTermInPage = 1;
3976 pWriter->bFirstRowidInPage = 1;
3980 ** Append term pTerm/nTerm to the segment being written by the writer passed
3981 ** as the second argument.
3983 ** If an error occurs, set the Fts5Index.rc error code. If an error has
3984 ** already occurred, this function is a no-op.
3986 static void fts5WriteAppendTerm(
3987 Fts5Index *p,
3988 Fts5SegWriter *pWriter,
3989 int nTerm, const u8 *pTerm
3991 int nPrefix; /* Bytes of prefix compression for term */
3992 Fts5PageWriter *pPage = &pWriter->writer;
3993 Fts5Buffer *pPgidx = &pWriter->writer.pgidx;
3994 int nMin = MIN(pPage->term.n, nTerm);
3996 assert( p->rc==SQLITE_OK );
3997 assert( pPage->buf.n>=4 );
3998 assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );
4000 /* If the current leaf page is full, flush it to disk. */
4001 if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
4002 if( pPage->buf.n>4 ){
4003 fts5WriteFlushLeaf(p, pWriter);
4004 if( p->rc!=SQLITE_OK ) return;
4006 fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);
4009 /* TODO1: Updating pgidx here. */
4010 pPgidx->n += sqlite3Fts5PutVarint(
4011 &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx
4013 pPage->iPrevPgidx = pPage->buf.n;
4014 #if 0
4015 fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
4016 pPgidx->n += 2;
4017 #endif
4019 if( pWriter->bFirstTermInPage ){
4020 nPrefix = 0;
4021 if( pPage->pgno!=1 ){
4022 /* This is the first term on a leaf that is not the leftmost leaf in
4023 ** the segment b-tree. In this case it is necessary to add a term to
4024 ** the b-tree hierarchy that is (a) larger than the largest term
4025 ** already written to the segment and (b) smaller than or equal to
4026 ** this term. In other words, a prefix of (pTerm/nTerm) that is one
4027 ** byte longer than the longest prefix (pTerm/nTerm) shares with the
4028 ** previous term.
4030 ** Usually, the previous term is available in pPage->term. The exception
4031 ** is if this is the first term written in an incremental-merge step.
4032 ** In this case the previous term is not available, so just write a
4033 ** copy of (pTerm/nTerm) into the parent node. This is slightly
4034 ** inefficient, but still correct. */
4035 int n = nTerm;
4036 if( pPage->term.n ){
4037 n = 1 + fts5PrefixCompress(nMin, pPage->term.p, pTerm);
4039 fts5WriteBtreeTerm(p, pWriter, n, pTerm);
4040 if( p->rc!=SQLITE_OK ) return;
4041 pPage = &pWriter->writer;
4043 }else{
4044 nPrefix = fts5PrefixCompress(nMin, pPage->term.p, pTerm);
4045 fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
4048 /* Append the number of bytes of new data, then the term data itself
4049 ** to the page. */
4050 fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix);
4051 fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]);
4053 /* Update the Fts5PageWriter.term field. */
4054 fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
4055 pWriter->bFirstTermInPage = 0;
4057 pWriter->bFirstRowidInPage = 0;
4058 pWriter->bFirstRowidInDoclist = 1;
4060 assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
4061 pWriter->aDlidx[0].pgno = pPage->pgno;
4065 ** Append a rowid and position-list size field to the writers output.
4067 static void fts5WriteAppendRowid(
4068 Fts5Index *p,
4069 Fts5SegWriter *pWriter,
4070 i64 iRowid
4072 if( p->rc==SQLITE_OK ){
4073 Fts5PageWriter *pPage = &pWriter->writer;
4075 if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
4076 fts5WriteFlushLeaf(p, pWriter);
4079 /* If this is to be the first rowid written to the page, set the
4080 ** rowid-pointer in the page-header. Also append a value to the dlidx
4081 ** buffer, in case a doclist-index is required. */
4082 if( pWriter->bFirstRowidInPage ){
4083 fts5PutU16(pPage->buf.p, (u16)pPage->buf.n);
4084 fts5WriteDlidxAppend(p, pWriter, iRowid);
4087 /* Write the rowid. */
4088 if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){
4089 fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid);
4090 }else{
4091 assert_nc( p->rc || iRowid>pWriter->iPrevRowid );
4092 fts5BufferAppendVarint(&p->rc, &pPage->buf,
4093 (u64)iRowid - (u64)pWriter->iPrevRowid
4096 pWriter->iPrevRowid = iRowid;
4097 pWriter->bFirstRowidInDoclist = 0;
4098 pWriter->bFirstRowidInPage = 0;
4102 static void fts5WriteAppendPoslistData(
4103 Fts5Index *p,
4104 Fts5SegWriter *pWriter,
4105 const u8 *aData,
4106 int nData
4108 Fts5PageWriter *pPage = &pWriter->writer;
4109 const u8 *a = aData;
4110 int n = nData;
4112 assert( p->pConfig->pgsz>0 );
4113 while( p->rc==SQLITE_OK
4114 && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz
4116 int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n;
4117 int nCopy = 0;
4118 while( nCopy<nReq ){
4119 i64 dummy;
4120 nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy);
4122 fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a);
4123 a += nCopy;
4124 n -= nCopy;
4125 fts5WriteFlushLeaf(p, pWriter);
4127 if( n>0 ){
4128 fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a);
4133 ** Flush any data cached by the writer object to the database. Free any
4134 ** allocations associated with the writer.
4136 static void fts5WriteFinish(
4137 Fts5Index *p,
4138 Fts5SegWriter *pWriter, /* Writer object */
4139 int *pnLeaf /* OUT: Number of leaf pages in b-tree */
4141 int i;
4142 Fts5PageWriter *pLeaf = &pWriter->writer;
4143 if( p->rc==SQLITE_OK ){
4144 assert( pLeaf->pgno>=1 );
4145 if( pLeaf->buf.n>4 ){
4146 fts5WriteFlushLeaf(p, pWriter);
4148 *pnLeaf = pLeaf->pgno-1;
4149 if( pLeaf->pgno>1 ){
4150 fts5WriteFlushBtree(p, pWriter);
4153 fts5BufferFree(&pLeaf->term);
4154 fts5BufferFree(&pLeaf->buf);
4155 fts5BufferFree(&pLeaf->pgidx);
4156 fts5BufferFree(&pWriter->btterm);
4158 for(i=0; i<pWriter->nDlidx; i++){
4159 sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
4161 sqlite3_free(pWriter->aDlidx);
4164 static void fts5WriteInit(
4165 Fts5Index *p,
4166 Fts5SegWriter *pWriter,
4167 int iSegid
4169 const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING;
4171 memset(pWriter, 0, sizeof(Fts5SegWriter));
4172 pWriter->iSegid = iSegid;
4174 fts5WriteDlidxGrow(p, pWriter, 1);
4175 pWriter->writer.pgno = 1;
4176 pWriter->bFirstTermInPage = 1;
4177 pWriter->iBtPage = 1;
4179 assert( pWriter->writer.buf.n==0 );
4180 assert( pWriter->writer.pgidx.n==0 );
4182 /* Grow the two buffers to pgsz + padding bytes in size. */
4183 sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer);
4184 sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer);
4186 if( p->pIdxWriter==0 ){
4187 Fts5Config *pConfig = p->pConfig;
4188 fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf(
4189 "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)",
4190 pConfig->zDb, pConfig->zName
4194 if( p->rc==SQLITE_OK ){
4195 /* Initialize the 4-byte leaf-page header to 0x00. */
4196 memset(pWriter->writer.buf.p, 0, 4);
4197 pWriter->writer.buf.n = 4;
4199 /* Bind the current output segment id to the index-writer. This is an
4200 ** optimization over binding the same value over and over as rows are
4201 ** inserted into %_idx by the current writer. */
4202 sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
4207 ** Iterator pIter was used to iterate through the input segments of on an
4208 ** incremental merge operation. This function is called if the incremental
4209 ** merge step has finished but the input has not been completely exhausted.
4211 static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){
4212 int i;
4213 Fts5Buffer buf;
4214 memset(&buf, 0, sizeof(Fts5Buffer));
4215 for(i=0; i<pIter->nSeg && p->rc==SQLITE_OK; i++){
4216 Fts5SegIter *pSeg = &pIter->aSeg[i];
4217 if( pSeg->pSeg==0 ){
4218 /* no-op */
4219 }else if( pSeg->pLeaf==0 ){
4220 /* All keys from this input segment have been transfered to the output.
4221 ** Set both the first and last page-numbers to 0 to indicate that the
4222 ** segment is now empty. */
4223 pSeg->pSeg->pgnoLast = 0;
4224 pSeg->pSeg->pgnoFirst = 0;
4225 }else{
4226 int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */
4227 i64 iLeafRowid;
4228 Fts5Data *pData;
4229 int iId = pSeg->pSeg->iSegid;
4230 u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00};
4232 iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno);
4233 pData = fts5LeafRead(p, iLeafRowid);
4234 if( pData ){
4235 if( iOff>pData->szLeaf ){
4236 /* This can occur if the pages that the segments occupy overlap - if
4237 ** a single page has been assigned to more than one segment. In
4238 ** this case a prior iteration of this loop may have corrupted the
4239 ** segment currently being trimmed. */
4240 p->rc = FTS5_CORRUPT;
4241 }else{
4242 fts5BufferZero(&buf);
4243 fts5BufferGrow(&p->rc, &buf, pData->nn);
4244 fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr);
4245 fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n);
4246 fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p);
4247 fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff,&pData->p[iOff]);
4248 if( p->rc==SQLITE_OK ){
4249 /* Set the szLeaf field */
4250 fts5PutU16(&buf.p[2], (u16)buf.n);
4253 /* Set up the new page-index array */
4254 fts5BufferAppendVarint(&p->rc, &buf, 4);
4255 if( pSeg->iLeafPgno==pSeg->iTermLeafPgno
4256 && pSeg->iEndofDoclist<pData->szLeaf
4257 && pSeg->iPgidxOff<=pData->nn
4259 int nDiff = pData->szLeaf - pSeg->iEndofDoclist;
4260 fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4);
4261 fts5BufferAppendBlob(&p->rc, &buf,
4262 pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]
4266 pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
4267 fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid);
4268 fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
4270 fts5DataRelease(pData);
4274 fts5BufferFree(&buf);
4277 static void fts5MergeChunkCallback(
4278 Fts5Index *p,
4279 void *pCtx,
4280 const u8 *pChunk, int nChunk
4282 Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx;
4283 fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk);
4289 static void fts5IndexMergeLevel(
4290 Fts5Index *p, /* FTS5 backend object */
4291 Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */
4292 int iLvl, /* Level to read input from */
4293 int *pnRem /* Write up to this many output leaves */
4295 Fts5Structure *pStruct = *ppStruct;
4296 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
4297 Fts5StructureLevel *pLvlOut;
4298 Fts5Iter *pIter = 0; /* Iterator to read input data */
4299 int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */
4300 int nInput; /* Number of input segments */
4301 Fts5SegWriter writer; /* Writer object */
4302 Fts5StructureSegment *pSeg; /* Output segment */
4303 Fts5Buffer term;
4304 int bOldest; /* True if the output segment is the oldest */
4305 int eDetail = p->pConfig->eDetail;
4306 const int flags = FTS5INDEX_QUERY_NOOUTPUT;
4307 int bTermWritten = 0; /* True if current term already output */
4309 assert( iLvl<pStruct->nLevel );
4310 assert( pLvl->nMerge<=pLvl->nSeg );
4312 memset(&writer, 0, sizeof(Fts5SegWriter));
4313 memset(&term, 0, sizeof(Fts5Buffer));
4314 if( pLvl->nMerge ){
4315 pLvlOut = &pStruct->aLevel[iLvl+1];
4316 assert( pLvlOut->nSeg>0 );
4317 nInput = pLvl->nMerge;
4318 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1];
4320 fts5WriteInit(p, &writer, pSeg->iSegid);
4321 writer.writer.pgno = pSeg->pgnoLast+1;
4322 writer.iBtPage = 0;
4323 }else{
4324 int iSegid = fts5AllocateSegid(p, pStruct);
4326 /* Extend the Fts5Structure object as required to ensure the output
4327 ** segment exists. */
4328 if( iLvl==pStruct->nLevel-1 ){
4329 fts5StructureAddLevel(&p->rc, ppStruct);
4330 pStruct = *ppStruct;
4332 fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0);
4333 if( p->rc ) return;
4334 pLvl = &pStruct->aLevel[iLvl];
4335 pLvlOut = &pStruct->aLevel[iLvl+1];
4337 fts5WriteInit(p, &writer, iSegid);
4339 /* Add the new segment to the output level */
4340 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg];
4341 pLvlOut->nSeg++;
4342 pSeg->pgnoFirst = 1;
4343 pSeg->iSegid = iSegid;
4344 pStruct->nSegment++;
4346 /* Read input from all segments in the input level */
4347 nInput = pLvl->nSeg;
4349 bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2);
4351 assert( iLvl>=0 );
4352 for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter);
4353 fts5MultiIterEof(p, pIter)==0;
4354 fts5MultiIterNext(p, pIter, 0, 0)
4356 Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
4357 int nPos; /* position-list size field value */
4358 int nTerm;
4359 const u8 *pTerm;
4361 pTerm = fts5MultiIterTerm(pIter, &nTerm);
4362 if( nTerm!=term.n || fts5Memcmp(pTerm, term.p, nTerm) ){
4363 if( pnRem && writer.nLeafWritten>nRem ){
4364 break;
4366 fts5BufferSet(&p->rc, &term, nTerm, pTerm);
4367 bTermWritten =0;
4370 /* Check for key annihilation. */
4371 if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue;
4373 if( p->rc==SQLITE_OK && bTermWritten==0 ){
4374 /* This is a new term. Append a term to the output segment. */
4375 fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
4376 bTermWritten = 1;
4379 /* Append the rowid to the output */
4380 /* WRITEPOSLISTSIZE */
4381 fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));
4383 if( eDetail==FTS5_DETAIL_NONE ){
4384 if( pSegIter->bDel ){
4385 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
4386 if( pSegIter->nPos>0 ){
4387 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
4390 }else{
4391 /* Append the position-list data to the output */
4392 nPos = pSegIter->nPos*2 + pSegIter->bDel;
4393 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos);
4394 fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
4398 /* Flush the last leaf page to disk. Set the output segment b-tree height
4399 ** and last leaf page number at the same time. */
4400 fts5WriteFinish(p, &writer, &pSeg->pgnoLast);
4402 assert( pIter!=0 || p->rc!=SQLITE_OK );
4403 if( fts5MultiIterEof(p, pIter) ){
4404 int i;
4406 /* Remove the redundant segments from the %_data table */
4407 for(i=0; i<nInput; i++){
4408 fts5DataRemoveSegment(p, pLvl->aSeg[i].iSegid);
4411 /* Remove the redundant segments from the input level */
4412 if( pLvl->nSeg!=nInput ){
4413 int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment);
4414 memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove);
4416 pStruct->nSegment -= nInput;
4417 pLvl->nSeg -= nInput;
4418 pLvl->nMerge = 0;
4419 if( pSeg->pgnoLast==0 ){
4420 pLvlOut->nSeg--;
4421 pStruct->nSegment--;
4423 }else{
4424 assert( pSeg->pgnoLast>0 );
4425 fts5TrimSegments(p, pIter);
4426 pLvl->nMerge = nInput;
4429 fts5MultiIterFree(pIter);
4430 fts5BufferFree(&term);
4431 if( pnRem ) *pnRem -= writer.nLeafWritten;
4435 ** Do up to nPg pages of automerge work on the index.
4437 ** Return true if any changes were actually made, or false otherwise.
4439 static int fts5IndexMerge(
4440 Fts5Index *p, /* FTS5 backend object */
4441 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
4442 int nPg, /* Pages of work to do */
4443 int nMin /* Minimum number of segments to merge */
4445 int nRem = nPg;
4446 int bRet = 0;
4447 Fts5Structure *pStruct = *ppStruct;
4448 while( nRem>0 && p->rc==SQLITE_OK ){
4449 int iLvl; /* To iterate through levels */
4450 int iBestLvl = 0; /* Level offering the most input segments */
4451 int nBest = 0; /* Number of input segments on best level */
4453 /* Set iBestLvl to the level to read input segments from. */
4454 assert( pStruct->nLevel>0 );
4455 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
4456 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
4457 if( pLvl->nMerge ){
4458 if( pLvl->nMerge>nBest ){
4459 iBestLvl = iLvl;
4460 nBest = pLvl->nMerge;
4462 break;
4464 if( pLvl->nSeg>nBest ){
4465 nBest = pLvl->nSeg;
4466 iBestLvl = iLvl;
4470 /* If nBest is still 0, then the index must be empty. */
4471 #ifdef SQLITE_DEBUG
4472 for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){
4473 assert( pStruct->aLevel[iLvl].nSeg==0 );
4475 #endif
4477 if( nBest<nMin && pStruct->aLevel[iBestLvl].nMerge==0 ){
4478 break;
4480 bRet = 1;
4481 fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem);
4482 if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){
4483 fts5StructurePromote(p, iBestLvl+1, pStruct);
4486 *ppStruct = pStruct;
4487 return bRet;
4491 ** A total of nLeaf leaf pages of data has just been flushed to a level-0
4492 ** segment. This function updates the write-counter accordingly and, if
4493 ** necessary, performs incremental merge work.
4495 ** If an error occurs, set the Fts5Index.rc error code. If an error has
4496 ** already occurred, this function is a no-op.
4498 static void fts5IndexAutomerge(
4499 Fts5Index *p, /* FTS5 backend object */
4500 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
4501 int nLeaf /* Number of output leaves just written */
4503 if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 && ALWAYS((*ppStruct)!=0) ){
4504 Fts5Structure *pStruct = *ppStruct;
4505 u64 nWrite; /* Initial value of write-counter */
4506 int nWork; /* Number of work-quanta to perform */
4507 int nRem; /* Number of leaf pages left to write */
4509 /* Update the write-counter. While doing so, set nWork. */
4510 nWrite = pStruct->nWriteCounter;
4511 nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit));
4512 pStruct->nWriteCounter += nLeaf;
4513 nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel);
4515 fts5IndexMerge(p, ppStruct, nRem, p->pConfig->nAutomerge);
4519 static void fts5IndexCrisismerge(
4520 Fts5Index *p, /* FTS5 backend object */
4521 Fts5Structure **ppStruct /* IN/OUT: Current structure of index */
4523 const int nCrisis = p->pConfig->nCrisisMerge;
4524 Fts5Structure *pStruct = *ppStruct;
4525 int iLvl = 0;
4527 assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 );
4528 while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){
4529 fts5IndexMergeLevel(p, &pStruct, iLvl, 0);
4530 assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) );
4531 fts5StructurePromote(p, iLvl+1, pStruct);
4532 iLvl++;
4534 *ppStruct = pStruct;
4537 static int fts5IndexReturn(Fts5Index *p){
4538 int rc = p->rc;
4539 p->rc = SQLITE_OK;
4540 return rc;
4543 typedef struct Fts5FlushCtx Fts5FlushCtx;
4544 struct Fts5FlushCtx {
4545 Fts5Index *pIdx;
4546 Fts5SegWriter writer;
4550 ** Buffer aBuf[] contains a list of varints, all small enough to fit
4551 ** in a 32-bit integer. Return the size of the largest prefix of this
4552 ** list nMax bytes or less in size.
4554 static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
4555 int ret;
4556 u32 dummy;
4557 ret = fts5GetVarint32(aBuf, dummy);
4558 if( ret<nMax ){
4559 while( 1 ){
4560 int i = fts5GetVarint32(&aBuf[ret], dummy);
4561 if( (ret + i) > nMax ) break;
4562 ret += i;
4565 return ret;
4569 ** Flush the contents of in-memory hash table iHash to a new level-0
4570 ** segment on disk. Also update the corresponding structure record.
4572 ** If an error occurs, set the Fts5Index.rc error code. If an error has
4573 ** already occurred, this function is a no-op.
4575 static void fts5FlushOneHash(Fts5Index *p){
4576 Fts5Hash *pHash = p->pHash;
4577 Fts5Structure *pStruct;
4578 int iSegid;
4579 int pgnoLast = 0; /* Last leaf page number in segment */
4581 /* Obtain a reference to the index structure and allocate a new segment-id
4582 ** for the new level-0 segment. */
4583 pStruct = fts5StructureRead(p);
4584 iSegid = fts5AllocateSegid(p, pStruct);
4585 fts5StructureInvalidate(p);
4587 if( iSegid ){
4588 const int pgsz = p->pConfig->pgsz;
4589 int eDetail = p->pConfig->eDetail;
4590 Fts5StructureSegment *pSeg; /* New segment within pStruct */
4591 Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */
4592 Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */
4594 Fts5SegWriter writer;
4595 fts5WriteInit(p, &writer, iSegid);
4597 pBuf = &writer.writer.buf;
4598 pPgidx = &writer.writer.pgidx;
4600 /* fts5WriteInit() should have initialized the buffers to (most likely)
4601 ** the maximum space required. */
4602 assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
4603 assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );
4605 /* Begin scanning through hash table entries. This loop runs once for each
4606 ** term/doclist currently stored within the hash table. */
4607 if( p->rc==SQLITE_OK ){
4608 p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
4610 while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
4611 const char *zTerm; /* Buffer containing term */
4612 const u8 *pDoclist; /* Pointer to doclist for this term */
4613 int nDoclist; /* Size of doclist in bytes */
4615 /* Write the term for this entry to disk. */
4616 sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
4617 fts5WriteAppendTerm(p, &writer, (int)strlen(zTerm), (const u8*)zTerm);
4618 if( p->rc!=SQLITE_OK ) break;
4620 assert( writer.bFirstRowidInPage==0 );
4621 if( pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){
4622 /* The entire doclist will fit on the current leaf. */
4623 fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
4624 }else{
4625 i64 iRowid = 0;
4626 u64 iDelta = 0;
4627 int iOff = 0;
4629 /* The entire doclist will not fit on this leaf. The following
4630 ** loop iterates through the poslists that make up the current
4631 ** doclist. */
4632 while( p->rc==SQLITE_OK && iOff<nDoclist ){
4633 iOff += fts5GetVarint(&pDoclist[iOff], &iDelta);
4634 iRowid += iDelta;
4636 if( writer.bFirstRowidInPage ){
4637 fts5PutU16(&pBuf->p[0], (u16)pBuf->n); /* first rowid on page */
4638 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid);
4639 writer.bFirstRowidInPage = 0;
4640 fts5WriteDlidxAppend(p, &writer, iRowid);
4641 if( p->rc!=SQLITE_OK ) break;
4642 }else{
4643 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta);
4645 assert( pBuf->n<=pBuf->nSpace );
4647 if( eDetail==FTS5_DETAIL_NONE ){
4648 if( iOff<nDoclist && pDoclist[iOff]==0 ){
4649 pBuf->p[pBuf->n++] = 0;
4650 iOff++;
4651 if( iOff<nDoclist && pDoclist[iOff]==0 ){
4652 pBuf->p[pBuf->n++] = 0;
4653 iOff++;
4656 if( (pBuf->n + pPgidx->n)>=pgsz ){
4657 fts5WriteFlushLeaf(p, &writer);
4659 }else{
4660 int bDummy;
4661 int nPos;
4662 int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy);
4663 nCopy += nPos;
4664 if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
4665 /* The entire poslist will fit on the current leaf. So copy
4666 ** it in one go. */
4667 fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
4668 }else{
4669 /* The entire poslist will not fit on this leaf. So it needs
4670 ** to be broken into sections. The only qualification being
4671 ** that each varint must be stored contiguously. */
4672 const u8 *pPoslist = &pDoclist[iOff];
4673 int iPos = 0;
4674 while( p->rc==SQLITE_OK ){
4675 int nSpace = pgsz - pBuf->n - pPgidx->n;
4676 int n = 0;
4677 if( (nCopy - iPos)<=nSpace ){
4678 n = nCopy - iPos;
4679 }else{
4680 n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
4682 assert( n>0 );
4683 fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
4684 iPos += n;
4685 if( (pBuf->n + pPgidx->n)>=pgsz ){
4686 fts5WriteFlushLeaf(p, &writer);
4688 if( iPos>=nCopy ) break;
4691 iOff += nCopy;
4696 /* TODO2: Doclist terminator written here. */
4697 /* pBuf->p[pBuf->n++] = '\0'; */
4698 assert( pBuf->n<=pBuf->nSpace );
4699 if( p->rc==SQLITE_OK ) sqlite3Fts5HashScanNext(pHash);
4701 sqlite3Fts5HashClear(pHash);
4702 fts5WriteFinish(p, &writer, &pgnoLast);
4704 /* Update the Fts5Structure. It is written back to the database by the
4705 ** fts5StructureRelease() call below. */
4706 if( pStruct->nLevel==0 ){
4707 fts5StructureAddLevel(&p->rc, &pStruct);
4709 fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
4710 if( p->rc==SQLITE_OK ){
4711 pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
4712 pSeg->iSegid = iSegid;
4713 pSeg->pgnoFirst = 1;
4714 pSeg->pgnoLast = pgnoLast;
4715 pStruct->nSegment++;
4717 fts5StructurePromote(p, 0, pStruct);
4720 fts5IndexAutomerge(p, &pStruct, pgnoLast);
4721 fts5IndexCrisismerge(p, &pStruct);
4722 fts5StructureWrite(p, pStruct);
4723 fts5StructureRelease(pStruct);
4727 ** Flush any data stored in the in-memory hash tables to the database.
4729 static void fts5IndexFlush(Fts5Index *p){
4730 /* Unless it is empty, flush the hash table to disk */
4731 if( p->nPendingData ){
4732 assert( p->pHash );
4733 p->nPendingData = 0;
4734 fts5FlushOneHash(p);
4738 static Fts5Structure *fts5IndexOptimizeStruct(
4739 Fts5Index *p,
4740 Fts5Structure *pStruct
4742 Fts5Structure *pNew = 0;
4743 sqlite3_int64 nByte = sizeof(Fts5Structure);
4744 int nSeg = pStruct->nSegment;
4745 int i;
4747 /* Figure out if this structure requires optimization. A structure does
4748 ** not require optimization if either:
4750 ** + it consists of fewer than two segments, or
4751 ** + all segments are on the same level, or
4752 ** + all segments except one are currently inputs to a merge operation.
4754 ** In the first case, return NULL. In the second, increment the ref-count
4755 ** on *pStruct and return a copy of the pointer to it.
4757 if( nSeg<2 ) return 0;
4758 for(i=0; i<pStruct->nLevel; i++){
4759 int nThis = pStruct->aLevel[i].nSeg;
4760 if( nThis==nSeg || (nThis==nSeg-1 && pStruct->aLevel[i].nMerge==nThis) ){
4761 fts5StructureRef(pStruct);
4762 return pStruct;
4764 assert( pStruct->aLevel[i].nMerge<=nThis );
4767 nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel);
4768 pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte);
4770 if( pNew ){
4771 Fts5StructureLevel *pLvl;
4772 nByte = nSeg * sizeof(Fts5StructureSegment);
4773 pNew->nLevel = MIN(pStruct->nLevel+1, FTS5_MAX_LEVEL);
4774 pNew->nRef = 1;
4775 pNew->nWriteCounter = pStruct->nWriteCounter;
4776 pLvl = &pNew->aLevel[pNew->nLevel-1];
4777 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte);
4778 if( pLvl->aSeg ){
4779 int iLvl, iSeg;
4780 int iSegOut = 0;
4781 /* Iterate through all segments, from oldest to newest. Add them to
4782 ** the new Fts5Level object so that pLvl->aSeg[0] is the oldest
4783 ** segment in the data structure. */
4784 for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){
4785 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
4786 pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg];
4787 iSegOut++;
4790 pNew->nSegment = pLvl->nSeg = nSeg;
4791 }else{
4792 sqlite3_free(pNew);
4793 pNew = 0;
4797 return pNew;
4800 int sqlite3Fts5IndexOptimize(Fts5Index *p){
4801 Fts5Structure *pStruct;
4802 Fts5Structure *pNew = 0;
4804 assert( p->rc==SQLITE_OK );
4805 fts5IndexFlush(p);
4806 pStruct = fts5StructureRead(p);
4807 fts5StructureInvalidate(p);
4809 if( pStruct ){
4810 pNew = fts5IndexOptimizeStruct(p, pStruct);
4812 fts5StructureRelease(pStruct);
4814 assert( pNew==0 || pNew->nSegment>0 );
4815 if( pNew ){
4816 int iLvl;
4817 for(iLvl=0; pNew->aLevel[iLvl].nSeg==0; iLvl++){}
4818 while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){
4819 int nRem = FTS5_OPT_WORK_UNIT;
4820 fts5IndexMergeLevel(p, &pNew, iLvl, &nRem);
4823 fts5StructureWrite(p, pNew);
4824 fts5StructureRelease(pNew);
4827 return fts5IndexReturn(p);
4831 ** This is called to implement the special "VALUES('merge', $nMerge)"
4832 ** INSERT command.
4834 int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){
4835 Fts5Structure *pStruct = fts5StructureRead(p);
4836 if( pStruct ){
4837 int nMin = p->pConfig->nUsermerge;
4838 fts5StructureInvalidate(p);
4839 if( nMerge<0 ){
4840 Fts5Structure *pNew = fts5IndexOptimizeStruct(p, pStruct);
4841 fts5StructureRelease(pStruct);
4842 pStruct = pNew;
4843 nMin = 2;
4844 nMerge = nMerge*-1;
4846 if( pStruct && pStruct->nLevel ){
4847 if( fts5IndexMerge(p, &pStruct, nMerge, nMin) ){
4848 fts5StructureWrite(p, pStruct);
4851 fts5StructureRelease(pStruct);
4853 return fts5IndexReturn(p);
4856 static void fts5AppendRowid(
4857 Fts5Index *p,
4858 u64 iDelta,
4859 Fts5Iter *pUnused,
4860 Fts5Buffer *pBuf
4862 UNUSED_PARAM(pUnused);
4863 fts5BufferAppendVarint(&p->rc, pBuf, iDelta);
4866 static void fts5AppendPoslist(
4867 Fts5Index *p,
4868 u64 iDelta,
4869 Fts5Iter *pMulti,
4870 Fts5Buffer *pBuf
4872 int nData = pMulti->base.nData;
4873 int nByte = nData + 9 + 9 + FTS5_DATA_ZERO_PADDING;
4874 assert( nData>0 );
4875 if( p->rc==SQLITE_OK && 0==fts5BufferGrow(&p->rc, pBuf, nByte) ){
4876 fts5BufferSafeAppendVarint(pBuf, iDelta);
4877 fts5BufferSafeAppendVarint(pBuf, nData*2);
4878 fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData);
4879 memset(&pBuf->p[pBuf->n], 0, FTS5_DATA_ZERO_PADDING);
4884 static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
4885 u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist;
4887 assert( pIter->aPoslist || (p==0 && pIter->aPoslist==0) );
4888 if( p>=pIter->aEof ){
4889 pIter->aPoslist = 0;
4890 }else{
4891 i64 iDelta;
4893 p += fts5GetVarint(p, (u64*)&iDelta);
4894 pIter->iRowid += iDelta;
4896 /* Read position list size */
4897 if( p[0] & 0x80 ){
4898 int nPos;
4899 pIter->nSize = fts5GetVarint32(p, nPos);
4900 pIter->nPoslist = (nPos>>1);
4901 }else{
4902 pIter->nPoslist = ((int)(p[0])) >> 1;
4903 pIter->nSize = 1;
4906 pIter->aPoslist = p;
4907 if( &pIter->aPoslist[pIter->nPoslist]>pIter->aEof ){
4908 pIter->aPoslist = 0;
4913 static void fts5DoclistIterInit(
4914 Fts5Buffer *pBuf,
4915 Fts5DoclistIter *pIter
4917 memset(pIter, 0, sizeof(*pIter));
4918 if( pBuf->n>0 ){
4919 pIter->aPoslist = pBuf->p;
4920 pIter->aEof = &pBuf->p[pBuf->n];
4921 fts5DoclistIterNext(pIter);
4925 #if 0
4927 ** Append a doclist to buffer pBuf.
4929 ** This function assumes that space within the buffer has already been
4930 ** allocated.
4932 static void fts5MergeAppendDocid(
4933 Fts5Buffer *pBuf, /* Buffer to write to */
4934 i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */
4935 i64 iRowid /* Rowid to append */
4937 assert( pBuf->n!=0 || (*piLastRowid)==0 );
4938 fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid);
4939 *piLastRowid = iRowid;
4941 #endif
4943 #define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) { \
4944 assert( (pBuf)->n!=0 || (iLastRowid)==0 ); \
4945 fts5BufferSafeAppendVarint((pBuf), (u64)(iRowid) - (u64)(iLastRowid)); \
4946 (iLastRowid) = (iRowid); \
4950 ** Swap the contents of buffer *p1 with that of *p2.
4952 static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){
4953 Fts5Buffer tmp = *p1;
4954 *p1 = *p2;
4955 *p2 = tmp;
4958 static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){
4959 int i = *piOff;
4960 if( i>=pBuf->n ){
4961 *piOff = -1;
4962 }else{
4963 u64 iVal;
4964 *piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal);
4965 *piRowid += iVal;
4970 ** This is the equivalent of fts5MergePrefixLists() for detail=none mode.
4971 ** In this case the buffers consist of a delta-encoded list of rowids only.
4973 static void fts5MergeRowidLists(
4974 Fts5Index *p, /* FTS5 backend object */
4975 Fts5Buffer *p1, /* First list to merge */
4976 int nBuf, /* Number of entries in apBuf[] */
4977 Fts5Buffer *aBuf /* Array of other lists to merge into p1 */
4979 int i1 = 0;
4980 int i2 = 0;
4981 i64 iRowid1 = 0;
4982 i64 iRowid2 = 0;
4983 i64 iOut = 0;
4984 Fts5Buffer *p2 = &aBuf[0];
4985 Fts5Buffer out;
4987 (void)nBuf;
4988 memset(&out, 0, sizeof(out));
4989 assert( nBuf==1 );
4990 sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n);
4991 if( p->rc ) return;
4993 fts5NextRowid(p1, &i1, &iRowid1);
4994 fts5NextRowid(p2, &i2, &iRowid2);
4995 while( i1>=0 || i2>=0 ){
4996 if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){
4997 assert( iOut==0 || iRowid1>iOut );
4998 fts5BufferSafeAppendVarint(&out, iRowid1 - iOut);
4999 iOut = iRowid1;
5000 fts5NextRowid(p1, &i1, &iRowid1);
5001 }else{
5002 assert( iOut==0 || iRowid2>iOut );
5003 fts5BufferSafeAppendVarint(&out, iRowid2 - iOut);
5004 iOut = iRowid2;
5005 if( i1>=0 && iRowid1==iRowid2 ){
5006 fts5NextRowid(p1, &i1, &iRowid1);
5008 fts5NextRowid(p2, &i2, &iRowid2);
5012 fts5BufferSwap(&out, p1);
5013 fts5BufferFree(&out);
5016 typedef struct PrefixMerger PrefixMerger;
5017 struct PrefixMerger {
5018 Fts5DoclistIter iter; /* Doclist iterator */
5019 i64 iPos; /* For iterating through a position list */
5020 int iOff;
5021 u8 *aPos;
5022 PrefixMerger *pNext; /* Next in docid/poslist order */
5025 static void fts5PrefixMergerInsertByRowid(
5026 PrefixMerger **ppHead,
5027 PrefixMerger *p
5029 if( p->iter.aPoslist ){
5030 PrefixMerger **pp = ppHead;
5031 while( *pp && p->iter.iRowid>(*pp)->iter.iRowid ){
5032 pp = &(*pp)->pNext;
5034 p->pNext = *pp;
5035 *pp = p;
5039 static void fts5PrefixMergerInsertByPosition(
5040 PrefixMerger **ppHead,
5041 PrefixMerger *p
5043 if( p->iPos>=0 ){
5044 PrefixMerger **pp = ppHead;
5045 while( *pp && p->iPos>(*pp)->iPos ){
5046 pp = &(*pp)->pNext;
5048 p->pNext = *pp;
5049 *pp = p;
5055 ** Array aBuf[] contains nBuf doclists. These are all merged in with the
5056 ** doclist in buffer p1.
5058 static void fts5MergePrefixLists(
5059 Fts5Index *p, /* FTS5 backend object */
5060 Fts5Buffer *p1, /* First list to merge */
5061 int nBuf, /* Number of buffers in array aBuf[] */
5062 Fts5Buffer *aBuf /* Other lists to merge in */
5064 #define fts5PrefixMergerNextPosition(p) \
5065 sqlite3Fts5PoslistNext64((p)->aPos,(p)->iter.nPoslist,&(p)->iOff,&(p)->iPos)
5066 #define FTS5_MERGE_NLIST 16
5067 PrefixMerger aMerger[FTS5_MERGE_NLIST];
5068 PrefixMerger *pHead = 0;
5069 int i;
5070 int nOut = 0;
5071 Fts5Buffer out = {0, 0, 0};
5072 Fts5Buffer tmp = {0, 0, 0};
5073 i64 iLastRowid = 0;
5075 /* Initialize a doclist-iterator for each input buffer. Arrange them in
5076 ** a linked-list starting at pHead in ascending order of rowid. Avoid
5077 ** linking any iterators already at EOF into the linked list at all. */
5078 assert( nBuf+1<=(int)(sizeof(aMerger)/sizeof(aMerger[0])) );
5079 memset(aMerger, 0, sizeof(PrefixMerger)*(nBuf+1));
5080 pHead = &aMerger[nBuf];
5081 fts5DoclistIterInit(p1, &pHead->iter);
5082 for(i=0; i<nBuf; i++){
5083 fts5DoclistIterInit(&aBuf[i], &aMerger[i].iter);
5084 fts5PrefixMergerInsertByRowid(&pHead, &aMerger[i]);
5085 nOut += aBuf[i].n;
5087 if( nOut==0 ) return;
5088 nOut += p1->n + 9 + 10*nBuf;
5090 /* The maximum size of the output is equal to the sum of the
5091 ** input sizes + 1 varint (9 bytes). The extra varint is because if the
5092 ** first rowid in one input is a large negative number, and the first in
5093 ** the other a non-negative number, the delta for the non-negative
5094 ** number will be larger on disk than the literal integer value
5095 ** was.
5097 ** Or, if the input position-lists are corrupt, then the output might
5098 ** include up to (nBuf+1) extra 10-byte positions created by interpreting -1
5099 ** (the value PoslistNext64() uses for EOF) as a position and appending
5100 ** it to the output. This can happen at most once for each input
5101 ** position-list, hence (nBuf+1) 10 byte paddings. */
5102 if( sqlite3Fts5BufferSize(&p->rc, &out, nOut) ) return;
5104 while( pHead ){
5105 fts5MergeAppendDocid(&out, iLastRowid, pHead->iter.iRowid);
5107 if( pHead->pNext && iLastRowid==pHead->pNext->iter.iRowid ){
5108 /* Merge data from two or more poslists */
5109 i64 iPrev = 0;
5110 int nTmp = FTS5_DATA_ZERO_PADDING;
5111 int nMerge = 0;
5112 PrefixMerger *pSave = pHead;
5113 PrefixMerger *pThis = 0;
5114 int nTail = 0;
5116 pHead = 0;
5117 while( pSave && pSave->iter.iRowid==iLastRowid ){
5118 PrefixMerger *pNext = pSave->pNext;
5119 pSave->iOff = 0;
5120 pSave->iPos = 0;
5121 pSave->aPos = &pSave->iter.aPoslist[pSave->iter.nSize];
5122 fts5PrefixMergerNextPosition(pSave);
5123 nTmp += pSave->iter.nPoslist + 10;
5124 nMerge++;
5125 fts5PrefixMergerInsertByPosition(&pHead, pSave);
5126 pSave = pNext;
5129 if( pHead==0 || pHead->pNext==0 ){
5130 p->rc = FTS5_CORRUPT;
5131 break;
5134 /* See the earlier comment in this function for an explanation of why
5135 ** corrupt input position lists might cause the output to consume
5136 ** at most nMerge*10 bytes of unexpected space. */
5137 if( sqlite3Fts5BufferSize(&p->rc, &tmp, nTmp+nMerge*10) ){
5138 break;
5140 fts5BufferZero(&tmp);
5142 pThis = pHead;
5143 pHead = pThis->pNext;
5144 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos);
5145 fts5PrefixMergerNextPosition(pThis);
5146 fts5PrefixMergerInsertByPosition(&pHead, pThis);
5148 while( pHead->pNext ){
5149 pThis = pHead;
5150 if( pThis->iPos!=iPrev ){
5151 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos);
5153 fts5PrefixMergerNextPosition(pThis);
5154 pHead = pThis->pNext;
5155 fts5PrefixMergerInsertByPosition(&pHead, pThis);
5158 if( pHead->iPos!=iPrev ){
5159 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pHead->iPos);
5161 nTail = pHead->iter.nPoslist - pHead->iOff;
5163 /* WRITEPOSLISTSIZE */
5164 assert_nc( tmp.n+nTail<=nTmp );
5165 assert( tmp.n+nTail<=nTmp+nMerge*10 );
5166 if( tmp.n+nTail>nTmp-FTS5_DATA_ZERO_PADDING ){
5167 if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
5168 break;
5170 fts5BufferSafeAppendVarint(&out, (tmp.n+nTail) * 2);
5171 fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n);
5172 if( nTail>0 ){
5173 fts5BufferSafeAppendBlob(&out, &pHead->aPos[pHead->iOff], nTail);
5176 pHead = pSave;
5177 for(i=0; i<nBuf+1; i++){
5178 PrefixMerger *pX = &aMerger[i];
5179 if( pX->iter.aPoslist && pX->iter.iRowid==iLastRowid ){
5180 fts5DoclistIterNext(&pX->iter);
5181 fts5PrefixMergerInsertByRowid(&pHead, pX);
5185 }else{
5186 /* Copy poslist from pHead to output */
5187 PrefixMerger *pThis = pHead;
5188 Fts5DoclistIter *pI = &pThis->iter;
5189 fts5BufferSafeAppendBlob(&out, pI->aPoslist, pI->nPoslist+pI->nSize);
5190 fts5DoclistIterNext(pI);
5191 pHead = pThis->pNext;
5192 fts5PrefixMergerInsertByRowid(&pHead, pThis);
5196 fts5BufferFree(p1);
5197 fts5BufferFree(&tmp);
5198 memset(&out.p[out.n], 0, FTS5_DATA_ZERO_PADDING);
5199 *p1 = out;
5202 static void fts5SetupPrefixIter(
5203 Fts5Index *p, /* Index to read from */
5204 int bDesc, /* True for "ORDER BY rowid DESC" */
5205 int iIdx, /* Index to scan for data */
5206 u8 *pToken, /* Buffer containing prefix to match */
5207 int nToken, /* Size of buffer pToken in bytes */
5208 Fts5Colset *pColset, /* Restrict matches to these columns */
5209 Fts5Iter **ppIter /* OUT: New iterator */
5211 Fts5Structure *pStruct;
5212 Fts5Buffer *aBuf;
5213 int nBuf = 32;
5214 int nMerge = 1;
5216 void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*);
5217 void (*xAppend)(Fts5Index*, u64, Fts5Iter*, Fts5Buffer*);
5218 if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
5219 xMerge = fts5MergeRowidLists;
5220 xAppend = fts5AppendRowid;
5221 }else{
5222 nMerge = FTS5_MERGE_NLIST-1;
5223 nBuf = nMerge*8; /* Sufficient to merge (16^8)==(2^32) lists */
5224 xMerge = fts5MergePrefixLists;
5225 xAppend = fts5AppendPoslist;
5228 aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
5229 pStruct = fts5StructureRead(p);
5231 if( aBuf && pStruct ){
5232 const int flags = FTS5INDEX_QUERY_SCAN
5233 | FTS5INDEX_QUERY_SKIPEMPTY
5234 | FTS5INDEX_QUERY_NOOUTPUT;
5235 int i;
5236 i64 iLastRowid = 0;
5237 Fts5Iter *p1 = 0; /* Iterator used to gather data from index */
5238 Fts5Data *pData;
5239 Fts5Buffer doclist;
5240 int bNewTerm = 1;
5242 memset(&doclist, 0, sizeof(doclist));
5243 if( iIdx!=0 ){
5244 int dummy = 0;
5245 const int f2 = FTS5INDEX_QUERY_SKIPEMPTY|FTS5INDEX_QUERY_NOOUTPUT;
5246 pToken[0] = FTS5_MAIN_PREFIX;
5247 fts5MultiIterNew(p, pStruct, f2, pColset, pToken, nToken, -1, 0, &p1);
5248 fts5IterSetOutputCb(&p->rc, p1);
5249 for(;
5250 fts5MultiIterEof(p, p1)==0;
5251 fts5MultiIterNext2(p, p1, &dummy)
5253 Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
5254 p1->xSetOutputs(p1, pSeg);
5255 if( p1->base.nData ){
5256 xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist);
5257 iLastRowid = p1->base.iRowid;
5260 fts5MultiIterFree(p1);
5263 pToken[0] = FTS5_MAIN_PREFIX + iIdx;
5264 fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1);
5265 fts5IterSetOutputCb(&p->rc, p1);
5266 for( /* no-op */ ;
5267 fts5MultiIterEof(p, p1)==0;
5268 fts5MultiIterNext2(p, p1, &bNewTerm)
5270 Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
5271 int nTerm = pSeg->term.n;
5272 const u8 *pTerm = pSeg->term.p;
5273 p1->xSetOutputs(p1, pSeg);
5275 assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 );
5276 if( bNewTerm ){
5277 if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break;
5280 if( p1->base.nData==0 ) continue;
5282 if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){
5283 for(i=0; p->rc==SQLITE_OK && doclist.n; i++){
5284 int i1 = i*nMerge;
5285 int iStore;
5286 assert( i1+nMerge<=nBuf );
5287 for(iStore=i1; iStore<i1+nMerge; iStore++){
5288 if( aBuf[iStore].n==0 ){
5289 fts5BufferSwap(&doclist, &aBuf[iStore]);
5290 fts5BufferZero(&doclist);
5291 break;
5294 if( iStore==i1+nMerge ){
5295 xMerge(p, &doclist, nMerge, &aBuf[i1]);
5296 for(iStore=i1; iStore<i1+nMerge; iStore++){
5297 fts5BufferZero(&aBuf[iStore]);
5301 iLastRowid = 0;
5304 xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist);
5305 iLastRowid = p1->base.iRowid;
5308 assert( (nBuf%nMerge)==0 );
5309 for(i=0; i<nBuf; i+=nMerge){
5310 int iFree;
5311 if( p->rc==SQLITE_OK ){
5312 xMerge(p, &doclist, nMerge, &aBuf[i]);
5314 for(iFree=i; iFree<i+nMerge; iFree++){
5315 fts5BufferFree(&aBuf[iFree]);
5318 fts5MultiIterFree(p1);
5320 pData = fts5IdxMalloc(p, sizeof(Fts5Data)+doclist.n+FTS5_DATA_ZERO_PADDING);
5321 if( pData ){
5322 pData->p = (u8*)&pData[1];
5323 pData->nn = pData->szLeaf = doclist.n;
5324 if( doclist.n ) memcpy(pData->p, doclist.p, doclist.n);
5325 fts5MultiIterNew2(p, pData, bDesc, ppIter);
5327 fts5BufferFree(&doclist);
5330 fts5StructureRelease(pStruct);
5331 sqlite3_free(aBuf);
5336 ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
5337 ** to the document with rowid iRowid.
5339 int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){
5340 assert( p->rc==SQLITE_OK );
5342 /* Allocate the hash table if it has not already been allocated */
5343 if( p->pHash==0 ){
5344 p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData);
5347 /* Flush the hash table to disk if required */
5348 if( iRowid<p->iWriteRowid
5349 || (iRowid==p->iWriteRowid && p->bDelete==0)
5350 || (p->nPendingData > p->pConfig->nHashSize)
5352 fts5IndexFlush(p);
5355 p->iWriteRowid = iRowid;
5356 p->bDelete = bDelete;
5357 return fts5IndexReturn(p);
5361 ** Commit data to disk.
5363 int sqlite3Fts5IndexSync(Fts5Index *p){
5364 assert( p->rc==SQLITE_OK );
5365 fts5IndexFlush(p);
5366 sqlite3Fts5IndexCloseReader(p);
5367 return fts5IndexReturn(p);
5371 ** Discard any data stored in the in-memory hash tables. Do not write it
5372 ** to the database. Additionally, assume that the contents of the %_data
5373 ** table may have changed on disk. So any in-memory caches of %_data
5374 ** records must be invalidated.
5376 int sqlite3Fts5IndexRollback(Fts5Index *p){
5377 sqlite3Fts5IndexCloseReader(p);
5378 fts5IndexDiscardData(p);
5379 fts5StructureInvalidate(p);
5380 /* assert( p->rc==SQLITE_OK ); */
5381 return SQLITE_OK;
5385 ** The %_data table is completely empty when this function is called. This
5386 ** function populates it with the initial structure objects for each index,
5387 ** and the initial version of the "averages" record (a zero-byte blob).
5389 int sqlite3Fts5IndexReinit(Fts5Index *p){
5390 Fts5Structure s;
5391 fts5StructureInvalidate(p);
5392 fts5IndexDiscardData(p);
5393 memset(&s, 0, sizeof(Fts5Structure));
5394 fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0);
5395 fts5StructureWrite(p, &s);
5396 return fts5IndexReturn(p);
5400 ** Open a new Fts5Index handle. If the bCreate argument is true, create
5401 ** and initialize the underlying %_data table.
5403 ** If successful, set *pp to point to the new object and return SQLITE_OK.
5404 ** Otherwise, set *pp to NULL and return an SQLite error code.
5406 int sqlite3Fts5IndexOpen(
5407 Fts5Config *pConfig,
5408 int bCreate,
5409 Fts5Index **pp,
5410 char **pzErr
5412 int rc = SQLITE_OK;
5413 Fts5Index *p; /* New object */
5415 *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index));
5416 if( rc==SQLITE_OK ){
5417 p->pConfig = pConfig;
5418 p->nWorkUnit = FTS5_WORK_UNIT;
5419 p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName);
5420 if( p->zDataTbl && bCreate ){
5421 rc = sqlite3Fts5CreateTable(
5422 pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr
5424 if( rc==SQLITE_OK ){
5425 rc = sqlite3Fts5CreateTable(pConfig, "idx",
5426 "segid, term, pgno, PRIMARY KEY(segid, term)",
5427 1, pzErr
5430 if( rc==SQLITE_OK ){
5431 rc = sqlite3Fts5IndexReinit(p);
5436 assert( rc!=SQLITE_OK || p->rc==SQLITE_OK );
5437 if( rc ){
5438 sqlite3Fts5IndexClose(p);
5439 *pp = 0;
5441 return rc;
5445 ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen().
5447 int sqlite3Fts5IndexClose(Fts5Index *p){
5448 int rc = SQLITE_OK;
5449 if( p ){
5450 assert( p->pReader==0 );
5451 fts5StructureInvalidate(p);
5452 sqlite3_finalize(p->pWriter);
5453 sqlite3_finalize(p->pDeleter);
5454 sqlite3_finalize(p->pIdxWriter);
5455 sqlite3_finalize(p->pIdxDeleter);
5456 sqlite3_finalize(p->pIdxSelect);
5457 sqlite3_finalize(p->pDataVersion);
5458 sqlite3Fts5HashFree(p->pHash);
5459 sqlite3_free(p->zDataTbl);
5460 sqlite3_free(p);
5462 return rc;
5466 ** Argument p points to a buffer containing utf-8 text that is n bytes in
5467 ** size. Return the number of bytes in the nChar character prefix of the
5468 ** buffer, or 0 if there are less than nChar characters in total.
5470 int sqlite3Fts5IndexCharlenToBytelen(
5471 const char *p,
5472 int nByte,
5473 int nChar
5475 int n = 0;
5476 int i;
5477 for(i=0; i<nChar; i++){
5478 if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */
5479 if( (unsigned char)p[n++]>=0xc0 ){
5480 if( n>=nByte ) return 0;
5481 while( (p[n] & 0xc0)==0x80 ){
5482 n++;
5483 if( n>=nByte ){
5484 if( i+1==nChar ) break;
5485 return 0;
5490 return n;
5494 ** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of
5495 ** unicode characters in the string.
5497 static int fts5IndexCharlen(const char *pIn, int nIn){
5498 int nChar = 0;
5499 int i = 0;
5500 while( i<nIn ){
5501 if( (unsigned char)pIn[i++]>=0xc0 ){
5502 while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++;
5504 nChar++;
5506 return nChar;
5510 ** Insert or remove data to or from the index. Each time a document is
5511 ** added to or removed from the index, this function is called one or more
5512 ** times.
5514 ** For an insert, it must be called once for each token in the new document.
5515 ** If the operation is a delete, it must be called (at least) once for each
5516 ** unique token in the document with an iCol value less than zero. The iPos
5517 ** argument is ignored for a delete.
5519 int sqlite3Fts5IndexWrite(
5520 Fts5Index *p, /* Index to write to */
5521 int iCol, /* Column token appears in (-ve -> delete) */
5522 int iPos, /* Position of token within column */
5523 const char *pToken, int nToken /* Token to add or remove to or from index */
5525 int i; /* Used to iterate through indexes */
5526 int rc = SQLITE_OK; /* Return code */
5527 Fts5Config *pConfig = p->pConfig;
5529 assert( p->rc==SQLITE_OK );
5530 assert( (iCol<0)==p->bDelete );
5532 /* Add the entry to the main terms index. */
5533 rc = sqlite3Fts5HashWrite(
5534 p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken
5537 for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){
5538 const int nChar = pConfig->aPrefix[i];
5539 int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
5540 if( nByte ){
5541 rc = sqlite3Fts5HashWrite(p->pHash,
5542 p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken,
5543 nByte
5548 return rc;
5552 ** Open a new iterator to iterate though all rowid that match the
5553 ** specified token or token prefix.
5555 int sqlite3Fts5IndexQuery(
5556 Fts5Index *p, /* FTS index to query */
5557 const char *pToken, int nToken, /* Token (or prefix) to query for */
5558 int flags, /* Mask of FTS5INDEX_QUERY_X flags */
5559 Fts5Colset *pColset, /* Match these columns only */
5560 Fts5IndexIter **ppIter /* OUT: New iterator object */
5562 Fts5Config *pConfig = p->pConfig;
5563 Fts5Iter *pRet = 0;
5564 Fts5Buffer buf = {0, 0, 0};
5566 /* If the QUERY_SCAN flag is set, all other flags must be clear. */
5567 assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN );
5569 if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){
5570 int iIdx = 0; /* Index to search */
5571 int iPrefixIdx = 0; /* +1 prefix index */
5572 if( nToken>0 ) memcpy(&buf.p[1], pToken, nToken);
5574 /* Figure out which index to search and set iIdx accordingly. If this
5575 ** is a prefix query for which there is no prefix index, set iIdx to
5576 ** greater than pConfig->nPrefix to indicate that the query will be
5577 ** satisfied by scanning multiple terms in the main index.
5579 ** If the QUERY_TEST_NOIDX flag was specified, then this must be a
5580 ** prefix-query. Instead of using a prefix-index (if one exists),
5581 ** evaluate the prefix query using the main FTS index. This is used
5582 ** for internal sanity checking by the integrity-check in debug
5583 ** mode only. */
5584 #ifdef SQLITE_DEBUG
5585 if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){
5586 assert( flags & FTS5INDEX_QUERY_PREFIX );
5587 iIdx = 1+pConfig->nPrefix;
5588 }else
5589 #endif
5590 if( flags & FTS5INDEX_QUERY_PREFIX ){
5591 int nChar = fts5IndexCharlen(pToken, nToken);
5592 for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){
5593 int nIdxChar = pConfig->aPrefix[iIdx-1];
5594 if( nIdxChar==nChar ) break;
5595 if( nIdxChar==nChar+1 ) iPrefixIdx = iIdx;
5599 if( iIdx<=pConfig->nPrefix ){
5600 /* Straight index lookup */
5601 Fts5Structure *pStruct = fts5StructureRead(p);
5602 buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx);
5603 if( pStruct ){
5604 fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY,
5605 pColset, buf.p, nToken+1, -1, 0, &pRet
5607 fts5StructureRelease(pStruct);
5609 }else{
5610 /* Scan multiple terms in the main index */
5611 int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0;
5612 fts5SetupPrefixIter(p, bDesc, iPrefixIdx, buf.p, nToken+1, pColset,&pRet);
5613 if( pRet==0 ){
5614 assert( p->rc!=SQLITE_OK );
5615 }else{
5616 assert( pRet->pColset==0 );
5617 fts5IterSetOutputCb(&p->rc, pRet);
5618 if( p->rc==SQLITE_OK ){
5619 Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst];
5620 if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg);
5625 if( p->rc ){
5626 sqlite3Fts5IterClose((Fts5IndexIter*)pRet);
5627 pRet = 0;
5628 sqlite3Fts5IndexCloseReader(p);
5631 *ppIter = (Fts5IndexIter*)pRet;
5632 sqlite3Fts5BufferFree(&buf);
5634 return fts5IndexReturn(p);
5638 ** Return true if the iterator passed as the only argument is at EOF.
5641 ** Move to the next matching rowid.
5643 int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){
5644 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
5645 assert( pIter->pIndex->rc==SQLITE_OK );
5646 fts5MultiIterNext(pIter->pIndex, pIter, 0, 0);
5647 return fts5IndexReturn(pIter->pIndex);
5651 ** Move to the next matching term/rowid. Used by the fts5vocab module.
5653 int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){
5654 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
5655 Fts5Index *p = pIter->pIndex;
5657 assert( pIter->pIndex->rc==SQLITE_OK );
5659 fts5MultiIterNext(p, pIter, 0, 0);
5660 if( p->rc==SQLITE_OK ){
5661 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
5662 if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){
5663 fts5DataRelease(pSeg->pLeaf);
5664 pSeg->pLeaf = 0;
5665 pIter->base.bEof = 1;
5669 return fts5IndexReturn(pIter->pIndex);
5673 ** Move to the next matching rowid that occurs at or after iMatch. The
5674 ** definition of "at or after" depends on whether this iterator iterates
5675 ** in ascending or descending rowid order.
5677 int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){
5678 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
5679 fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch);
5680 return fts5IndexReturn(pIter->pIndex);
5684 ** Return the current term.
5686 const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){
5687 int n;
5688 const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n);
5689 assert_nc( z || n<=1 );
5690 *pn = n-1;
5691 return (z ? &z[1] : 0);
5695 ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
5697 void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){
5698 if( pIndexIter ){
5699 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
5700 Fts5Index *pIndex = pIter->pIndex;
5701 fts5MultiIterFree(pIter);
5702 sqlite3Fts5IndexCloseReader(pIndex);
5707 ** Read and decode the "averages" record from the database.
5709 ** Parameter anSize must point to an array of size nCol, where nCol is
5710 ** the number of user defined columns in the FTS table.
5712 int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){
5713 int nCol = p->pConfig->nCol;
5714 Fts5Data *pData;
5716 *pnRow = 0;
5717 memset(anSize, 0, sizeof(i64) * nCol);
5718 pData = fts5DataRead(p, FTS5_AVERAGES_ROWID);
5719 if( p->rc==SQLITE_OK && pData->nn ){
5720 int i = 0;
5721 int iCol;
5722 i += fts5GetVarint(&pData->p[i], (u64*)pnRow);
5723 for(iCol=0; i<pData->nn && iCol<nCol; iCol++){
5724 i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]);
5728 fts5DataRelease(pData);
5729 return fts5IndexReturn(p);
5733 ** Replace the current "averages" record with the contents of the buffer
5734 ** supplied as the second argument.
5736 int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){
5737 assert( p->rc==SQLITE_OK );
5738 fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData);
5739 return fts5IndexReturn(p);
5743 ** Return the total number of blocks this module has read from the %_data
5744 ** table since it was created.
5746 int sqlite3Fts5IndexReads(Fts5Index *p){
5747 return p->nRead;
5751 ** Set the 32-bit cookie value stored at the start of all structure
5752 ** records to the value passed as the second argument.
5754 ** Return SQLITE_OK if successful, or an SQLite error code if an error
5755 ** occurs.
5757 int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){
5758 int rc; /* Return code */
5759 Fts5Config *pConfig = p->pConfig; /* Configuration object */
5760 u8 aCookie[4]; /* Binary representation of iNew */
5761 sqlite3_blob *pBlob = 0;
5763 assert( p->rc==SQLITE_OK );
5764 sqlite3Fts5Put32(aCookie, iNew);
5766 rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl,
5767 "block", FTS5_STRUCTURE_ROWID, 1, &pBlob
5769 if( rc==SQLITE_OK ){
5770 sqlite3_blob_write(pBlob, aCookie, 4, 0);
5771 rc = sqlite3_blob_close(pBlob);
5774 return rc;
5777 int sqlite3Fts5IndexLoadConfig(Fts5Index *p){
5778 Fts5Structure *pStruct;
5779 pStruct = fts5StructureRead(p);
5780 fts5StructureRelease(pStruct);
5781 return fts5IndexReturn(p);
5785 /*************************************************************************
5786 **************************************************************************
5787 ** Below this point is the implementation of the integrity-check
5788 ** functionality.
5792 ** Return a simple checksum value based on the arguments.
5794 u64 sqlite3Fts5IndexEntryCksum(
5795 i64 iRowid,
5796 int iCol,
5797 int iPos,
5798 int iIdx,
5799 const char *pTerm,
5800 int nTerm
5802 int i;
5803 u64 ret = iRowid;
5804 ret += (ret<<3) + iCol;
5805 ret += (ret<<3) + iPos;
5806 if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX + iIdx);
5807 for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i];
5808 return ret;
5811 #ifdef SQLITE_DEBUG
5813 ** This function is purely an internal test. It does not contribute to
5814 ** FTS functionality, or even the integrity-check, in any way.
5816 ** Instead, it tests that the same set of pgno/rowid combinations are
5817 ** visited regardless of whether the doclist-index identified by parameters
5818 ** iSegid/iLeaf is iterated in forwards or reverse order.
5820 static void fts5TestDlidxReverse(
5821 Fts5Index *p,
5822 int iSegid, /* Segment id to load from */
5823 int iLeaf /* Load doclist-index for this leaf */
5825 Fts5DlidxIter *pDlidx = 0;
5826 u64 cksum1 = 13;
5827 u64 cksum2 = 13;
5829 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf);
5830 fts5DlidxIterEof(p, pDlidx)==0;
5831 fts5DlidxIterNext(p, pDlidx)
5833 i64 iRowid = fts5DlidxIterRowid(pDlidx);
5834 int pgno = fts5DlidxIterPgno(pDlidx);
5835 assert( pgno>iLeaf );
5836 cksum1 += iRowid + ((i64)pgno<<32);
5838 fts5DlidxIterFree(pDlidx);
5839 pDlidx = 0;
5841 for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf);
5842 fts5DlidxIterEof(p, pDlidx)==0;
5843 fts5DlidxIterPrev(p, pDlidx)
5845 i64 iRowid = fts5DlidxIterRowid(pDlidx);
5846 int pgno = fts5DlidxIterPgno(pDlidx);
5847 assert( fts5DlidxIterPgno(pDlidx)>iLeaf );
5848 cksum2 += iRowid + ((i64)pgno<<32);
5850 fts5DlidxIterFree(pDlidx);
5851 pDlidx = 0;
5853 if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT;
5856 static int fts5QueryCksum(
5857 Fts5Index *p, /* Fts5 index object */
5858 int iIdx,
5859 const char *z, /* Index key to query for */
5860 int n, /* Size of index key in bytes */
5861 int flags, /* Flags for Fts5IndexQuery */
5862 u64 *pCksum /* IN/OUT: Checksum value */
5864 int eDetail = p->pConfig->eDetail;
5865 u64 cksum = *pCksum;
5866 Fts5IndexIter *pIter = 0;
5867 int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter);
5869 while( rc==SQLITE_OK && ALWAYS(pIter!=0) && 0==sqlite3Fts5IterEof(pIter) ){
5870 i64 rowid = pIter->iRowid;
5872 if( eDetail==FTS5_DETAIL_NONE ){
5873 cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n);
5874 }else{
5875 Fts5PoslistReader sReader;
5876 for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader);
5877 sReader.bEof==0;
5878 sqlite3Fts5PoslistReaderNext(&sReader)
5880 int iCol = FTS5_POS2COLUMN(sReader.iPos);
5881 int iOff = FTS5_POS2OFFSET(sReader.iPos);
5882 cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
5885 if( rc==SQLITE_OK ){
5886 rc = sqlite3Fts5IterNext(pIter);
5889 sqlite3Fts5IterClose(pIter);
5891 *pCksum = cksum;
5892 return rc;
5896 ** Check if buffer z[], size n bytes, contains as series of valid utf-8
5897 ** encoded codepoints. If so, return 0. Otherwise, if the buffer does not
5898 ** contain valid utf-8, return non-zero.
5900 static int fts5TestUtf8(const char *z, int n){
5901 int i = 0;
5902 assert_nc( n>0 );
5903 while( i<n ){
5904 if( (z[i] & 0x80)==0x00 ){
5905 i++;
5906 }else
5907 if( (z[i] & 0xE0)==0xC0 ){
5908 if( i+1>=n || (z[i+1] & 0xC0)!=0x80 ) return 1;
5909 i += 2;
5910 }else
5911 if( (z[i] & 0xF0)==0xE0 ){
5912 if( i+2>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1;
5913 i += 3;
5914 }else
5915 if( (z[i] & 0xF8)==0xF0 ){
5916 if( i+3>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1;
5917 if( (z[i+2] & 0xC0)!=0x80 ) return 1;
5918 i += 3;
5919 }else{
5920 return 1;
5924 return 0;
5928 ** This function is also purely an internal test. It does not contribute to
5929 ** FTS functionality, or even the integrity-check, in any way.
5931 static void fts5TestTerm(
5932 Fts5Index *p,
5933 Fts5Buffer *pPrev, /* Previous term */
5934 const char *z, int n, /* Possibly new term to test */
5935 u64 expected,
5936 u64 *pCksum
5938 int rc = p->rc;
5939 if( pPrev->n==0 ){
5940 fts5BufferSet(&rc, pPrev, n, (const u8*)z);
5941 }else
5942 if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){
5943 u64 cksum3 = *pCksum;
5944 const char *zTerm = (const char*)&pPrev->p[1]; /* term sans prefix-byte */
5945 int nTerm = pPrev->n-1; /* Size of zTerm in bytes */
5946 int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX);
5947 int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX);
5948 u64 ck1 = 0;
5949 u64 ck2 = 0;
5951 /* Check that the results returned for ASC and DESC queries are
5952 ** the same. If not, call this corruption. */
5953 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1);
5954 if( rc==SQLITE_OK ){
5955 int f = flags|FTS5INDEX_QUERY_DESC;
5956 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
5958 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
5960 /* If this is a prefix query, check that the results returned if the
5961 ** the index is disabled are the same. In both ASC and DESC order.
5963 ** This check may only be performed if the hash table is empty. This
5964 ** is because the hash table only supports a single scan query at
5965 ** a time, and the multi-iter loop from which this function is called
5966 ** is already performing such a scan.
5968 ** Also only do this if buffer zTerm contains nTerm bytes of valid
5969 ** utf-8. Otherwise, the last part of the buffer contents might contain
5970 ** a non-utf-8 sequence that happens to be a prefix of a valid utf-8
5971 ** character stored in the main fts index, which will cause the
5972 ** test to fail. */
5973 if( p->nPendingData==0 && 0==fts5TestUtf8(zTerm, nTerm) ){
5974 if( iIdx>0 && rc==SQLITE_OK ){
5975 int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
5976 ck2 = 0;
5977 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
5978 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
5980 if( iIdx>0 && rc==SQLITE_OK ){
5981 int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC;
5982 ck2 = 0;
5983 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
5984 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
5988 cksum3 ^= ck1;
5989 fts5BufferSet(&rc, pPrev, n, (const u8*)z);
5991 if( rc==SQLITE_OK && cksum3!=expected ){
5992 rc = FTS5_CORRUPT;
5994 *pCksum = cksum3;
5996 p->rc = rc;
5999 #else
6000 # define fts5TestDlidxReverse(x,y,z)
6001 # define fts5TestTerm(u,v,w,x,y,z)
6002 #endif
6005 ** Check that:
6007 ** 1) All leaves of pSeg between iFirst and iLast (inclusive) exist and
6008 ** contain zero terms.
6009 ** 2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and
6010 ** contain zero rowids.
6012 static void fts5IndexIntegrityCheckEmpty(
6013 Fts5Index *p,
6014 Fts5StructureSegment *pSeg, /* Segment to check internal consistency */
6015 int iFirst,
6016 int iNoRowid,
6017 int iLast
6019 int i;
6021 /* Now check that the iter.nEmpty leaves following the current leaf
6022 ** (a) exist and (b) contain no terms. */
6023 for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){
6024 Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i));
6025 if( pLeaf ){
6026 if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT;
6027 if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT;
6029 fts5DataRelease(pLeaf);
6033 static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){
6034 int iTermOff = 0;
6035 int ii;
6037 Fts5Buffer buf1 = {0,0,0};
6038 Fts5Buffer buf2 = {0,0,0};
6040 ii = pLeaf->szLeaf;
6041 while( ii<pLeaf->nn && p->rc==SQLITE_OK ){
6042 int res;
6043 int iOff;
6044 int nIncr;
6046 ii += fts5GetVarint32(&pLeaf->p[ii], nIncr);
6047 iTermOff += nIncr;
6048 iOff = iTermOff;
6050 if( iOff>=pLeaf->szLeaf ){
6051 p->rc = FTS5_CORRUPT;
6052 }else if( iTermOff==nIncr ){
6053 int nByte;
6054 iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
6055 if( (iOff+nByte)>pLeaf->szLeaf ){
6056 p->rc = FTS5_CORRUPT;
6057 }else{
6058 fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
6060 }else{
6061 int nKeep, nByte;
6062 iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep);
6063 iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
6064 if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){
6065 p->rc = FTS5_CORRUPT;
6066 }else{
6067 buf1.n = nKeep;
6068 fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
6071 if( p->rc==SQLITE_OK ){
6072 res = fts5BufferCompare(&buf1, &buf2);
6073 if( res<=0 ) p->rc = FTS5_CORRUPT;
6076 fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p);
6079 fts5BufferFree(&buf1);
6080 fts5BufferFree(&buf2);
6083 static void fts5IndexIntegrityCheckSegment(
6084 Fts5Index *p, /* FTS5 backend object */
6085 Fts5StructureSegment *pSeg /* Segment to check internal consistency */
6087 Fts5Config *pConfig = p->pConfig;
6088 sqlite3_stmt *pStmt = 0;
6089 int rc2;
6090 int iIdxPrevLeaf = pSeg->pgnoFirst-1;
6091 int iDlidxPrevLeaf = pSeg->pgnoLast;
6093 if( pSeg->pgnoFirst==0 ) return;
6095 fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf(
6096 "SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d "
6097 "ORDER BY 1, 2",
6098 pConfig->zDb, pConfig->zName, pSeg->iSegid
6101 /* Iterate through the b-tree hierarchy. */
6102 while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
6103 i64 iRow; /* Rowid for this leaf */
6104 Fts5Data *pLeaf; /* Data for this leaf */
6106 const char *zIdxTerm = (const char*)sqlite3_column_blob(pStmt, 1);
6107 int nIdxTerm = sqlite3_column_bytes(pStmt, 1);
6108 int iIdxLeaf = sqlite3_column_int(pStmt, 2);
6109 int bIdxDlidx = sqlite3_column_int(pStmt, 3);
6111 /* If the leaf in question has already been trimmed from the segment,
6112 ** ignore this b-tree entry. Otherwise, load it into memory. */
6113 if( iIdxLeaf<pSeg->pgnoFirst ) continue;
6114 iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf);
6115 pLeaf = fts5LeafRead(p, iRow);
6116 if( pLeaf==0 ) break;
6118 /* Check that the leaf contains at least one term, and that it is equal
6119 ** to or larger than the split-key in zIdxTerm. Also check that if there
6120 ** is also a rowid pointer within the leaf page header, it points to a
6121 ** location before the term. */
6122 if( pLeaf->nn<=pLeaf->szLeaf ){
6123 p->rc = FTS5_CORRUPT;
6124 }else{
6125 int iOff; /* Offset of first term on leaf */
6126 int iRowidOff; /* Offset of first rowid on leaf */
6127 int nTerm; /* Size of term on leaf in bytes */
6128 int res; /* Comparison of term and split-key */
6130 iOff = fts5LeafFirstTermOff(pLeaf);
6131 iRowidOff = fts5LeafFirstRowidOff(pLeaf);
6132 if( iRowidOff>=iOff || iOff>=pLeaf->szLeaf ){
6133 p->rc = FTS5_CORRUPT;
6134 }else{
6135 iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm);
6136 res = fts5Memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm));
6137 if( res==0 ) res = nTerm - nIdxTerm;
6138 if( res<0 ) p->rc = FTS5_CORRUPT;
6141 fts5IntegrityCheckPgidx(p, pLeaf);
6143 fts5DataRelease(pLeaf);
6144 if( p->rc ) break;
6146 /* Now check that the iter.nEmpty leaves following the current leaf
6147 ** (a) exist and (b) contain no terms. */
6148 fts5IndexIntegrityCheckEmpty(
6149 p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1
6151 if( p->rc ) break;
6153 /* If there is a doclist-index, check that it looks right. */
6154 if( bIdxDlidx ){
6155 Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */
6156 int iPrevLeaf = iIdxLeaf;
6157 int iSegid = pSeg->iSegid;
6158 int iPg = 0;
6159 i64 iKey;
6161 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf);
6162 fts5DlidxIterEof(p, pDlidx)==0;
6163 fts5DlidxIterNext(p, pDlidx)
6166 /* Check any rowid-less pages that occur before the current leaf. */
6167 for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
6168 iKey = FTS5_SEGMENT_ROWID(iSegid, iPg);
6169 pLeaf = fts5DataRead(p, iKey);
6170 if( pLeaf ){
6171 if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT;
6172 fts5DataRelease(pLeaf);
6175 iPrevLeaf = fts5DlidxIterPgno(pDlidx);
6177 /* Check that the leaf page indicated by the iterator really does
6178 ** contain the rowid suggested by the same. */
6179 iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf);
6180 pLeaf = fts5DataRead(p, iKey);
6181 if( pLeaf ){
6182 i64 iRowid;
6183 int iRowidOff = fts5LeafFirstRowidOff(pLeaf);
6184 ASSERT_SZLEAF_OK(pLeaf);
6185 if( iRowidOff>=pLeaf->szLeaf ){
6186 p->rc = FTS5_CORRUPT;
6187 }else{
6188 fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
6189 if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT;
6191 fts5DataRelease(pLeaf);
6195 iDlidxPrevLeaf = iPg;
6196 fts5DlidxIterFree(pDlidx);
6197 fts5TestDlidxReverse(p, iSegid, iIdxLeaf);
6198 }else{
6199 iDlidxPrevLeaf = pSeg->pgnoLast;
6200 /* TODO: Check there is no doclist index */
6203 iIdxPrevLeaf = iIdxLeaf;
6206 rc2 = sqlite3_finalize(pStmt);
6207 if( p->rc==SQLITE_OK ) p->rc = rc2;
6209 /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */
6210 #if 0
6211 if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
6212 p->rc = FTS5_CORRUPT;
6214 #endif
6219 ** Run internal checks to ensure that the FTS index (a) is internally
6220 ** consistent and (b) contains entries for which the XOR of the checksums
6221 ** as calculated by sqlite3Fts5IndexEntryCksum() is cksum.
6223 ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
6224 ** checksum does not match. Return SQLITE_OK if all checks pass without
6225 ** error, or some other SQLite error code if another error (e.g. OOM)
6226 ** occurs.
6228 int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum, int bUseCksum){
6229 int eDetail = p->pConfig->eDetail;
6230 u64 cksum2 = 0; /* Checksum based on contents of indexes */
6231 Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */
6232 Fts5Iter *pIter; /* Used to iterate through entire index */
6233 Fts5Structure *pStruct; /* Index structure */
6234 int iLvl, iSeg;
6236 #ifdef SQLITE_DEBUG
6237 /* Used by extra internal tests only run if NDEBUG is not defined */
6238 u64 cksum3 = 0; /* Checksum based on contents of indexes */
6239 Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */
6240 #endif
6241 const int flags = FTS5INDEX_QUERY_NOOUTPUT;
6243 /* Load the FTS index structure */
6244 pStruct = fts5StructureRead(p);
6245 if( pStruct==0 ){
6246 assert( p->rc!=SQLITE_OK );
6247 return fts5IndexReturn(p);
6250 /* Check that the internal nodes of each segment match the leaves */
6251 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
6252 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
6253 Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
6254 fts5IndexIntegrityCheckSegment(p, pSeg);
6258 /* The cksum argument passed to this function is a checksum calculated
6259 ** based on all expected entries in the FTS index (including prefix index
6260 ** entries). This block checks that a checksum calculated based on the
6261 ** actual contents of FTS index is identical.
6263 ** Two versions of the same checksum are calculated. The first (stack
6264 ** variable cksum2) based on entries extracted from the full-text index
6265 ** while doing a linear scan of each individual index in turn.
6267 ** As each term visited by the linear scans, a separate query for the
6268 ** same term is performed. cksum3 is calculated based on the entries
6269 ** extracted by these queries.
6271 for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter);
6272 fts5MultiIterEof(p, pIter)==0;
6273 fts5MultiIterNext(p, pIter, 0, 0)
6275 int n; /* Size of term in bytes */
6276 i64 iPos = 0; /* Position read from poslist */
6277 int iOff = 0; /* Offset within poslist */
6278 i64 iRowid = fts5MultiIterRowid(pIter);
6279 char *z = (char*)fts5MultiIterTerm(pIter, &n);
6281 /* If this is a new term, query for it. Update cksum3 with the results. */
6282 fts5TestTerm(p, &term, z, n, cksum2, &cksum3);
6283 if( p->rc ) break;
6285 if( eDetail==FTS5_DETAIL_NONE ){
6286 if( 0==fts5MultiIterIsEmpty(p, pIter) ){
6287 cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n);
6289 }else{
6290 poslist.n = 0;
6291 fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist);
6292 fts5BufferAppendBlob(&p->rc, &poslist, 4, (const u8*)"\0\0\0\0");
6293 while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
6294 int iCol = FTS5_POS2COLUMN(iPos);
6295 int iTokOff = FTS5_POS2OFFSET(iPos);
6296 cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
6300 fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3);
6302 fts5MultiIterFree(pIter);
6303 if( p->rc==SQLITE_OK && bUseCksum && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
6305 fts5StructureRelease(pStruct);
6306 #ifdef SQLITE_DEBUG
6307 fts5BufferFree(&term);
6308 #endif
6309 fts5BufferFree(&poslist);
6310 return fts5IndexReturn(p);
6313 /*************************************************************************
6314 **************************************************************************
6315 ** Below this point is the implementation of the fts5_decode() scalar
6316 ** function only.
6319 #ifdef SQLITE_TEST
6321 ** Decode a segment-data rowid from the %_data table. This function is
6322 ** the opposite of macro FTS5_SEGMENT_ROWID().
6324 static void fts5DecodeRowid(
6325 i64 iRowid, /* Rowid from %_data table */
6326 int *piSegid, /* OUT: Segment id */
6327 int *pbDlidx, /* OUT: Dlidx flag */
6328 int *piHeight, /* OUT: Height */
6329 int *piPgno /* OUT: Page number */
6331 *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1));
6332 iRowid >>= FTS5_DATA_PAGE_B;
6334 *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1));
6335 iRowid >>= FTS5_DATA_HEIGHT_B;
6337 *pbDlidx = (int)(iRowid & 0x0001);
6338 iRowid >>= FTS5_DATA_DLI_B;
6340 *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1));
6342 #endif /* SQLITE_TEST */
6344 #ifdef SQLITE_TEST
6345 static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
6346 int iSegid, iHeight, iPgno, bDlidx; /* Rowid compenents */
6347 fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno);
6349 if( iSegid==0 ){
6350 if( iKey==FTS5_AVERAGES_ROWID ){
6351 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} ");
6352 }else{
6353 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}");
6356 else{
6357 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%ssegid=%d h=%d pgno=%d}",
6358 bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno
6362 #endif /* SQLITE_TEST */
6364 #ifdef SQLITE_TEST
6365 static void fts5DebugStructure(
6366 int *pRc, /* IN/OUT: error code */
6367 Fts5Buffer *pBuf,
6368 Fts5Structure *p
6370 int iLvl, iSeg; /* Iterate through levels, segments */
6372 for(iLvl=0; iLvl<p->nLevel; iLvl++){
6373 Fts5StructureLevel *pLvl = &p->aLevel[iLvl];
6374 sqlite3Fts5BufferAppendPrintf(pRc, pBuf,
6375 " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg
6377 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
6378 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
6379 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d}",
6380 pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast
6383 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
6386 #endif /* SQLITE_TEST */
6388 #ifdef SQLITE_TEST
6390 ** This is part of the fts5_decode() debugging aid.
6392 ** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This
6393 ** function appends a human-readable representation of the same object
6394 ** to the buffer passed as the second argument.
6396 static void fts5DecodeStructure(
6397 int *pRc, /* IN/OUT: error code */
6398 Fts5Buffer *pBuf,
6399 const u8 *pBlob, int nBlob
6401 int rc; /* Return code */
6402 Fts5Structure *p = 0; /* Decoded structure object */
6404 rc = fts5StructureDecode(pBlob, nBlob, 0, &p);
6405 if( rc!=SQLITE_OK ){
6406 *pRc = rc;
6407 return;
6410 fts5DebugStructure(pRc, pBuf, p);
6411 fts5StructureRelease(p);
6413 #endif /* SQLITE_TEST */
6415 #ifdef SQLITE_TEST
6417 ** This is part of the fts5_decode() debugging aid.
6419 ** Arguments pBlob/nBlob contain an "averages" record. This function
6420 ** appends a human-readable representation of record to the buffer passed
6421 ** as the second argument.
6423 static void fts5DecodeAverages(
6424 int *pRc, /* IN/OUT: error code */
6425 Fts5Buffer *pBuf,
6426 const u8 *pBlob, int nBlob
6428 int i = 0;
6429 const char *zSpace = "";
6431 while( i<nBlob ){
6432 u64 iVal;
6433 i += sqlite3Fts5GetVarint(&pBlob[i], &iVal);
6434 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%s%d", zSpace, (int)iVal);
6435 zSpace = " ";
6438 #endif /* SQLITE_TEST */
6440 #ifdef SQLITE_TEST
6442 ** Buffer (a/n) is assumed to contain a list of serialized varints. Read
6443 ** each varint and append its string representation to buffer pBuf. Return
6444 ** after either the input buffer is exhausted or a 0 value is read.
6446 ** The return value is the number of bytes read from the input buffer.
6448 static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
6449 int iOff = 0;
6450 while( iOff<n ){
6451 int iVal;
6452 iOff += fts5GetVarint32(&a[iOff], iVal);
6453 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal);
6455 return iOff;
6457 #endif /* SQLITE_TEST */
6459 #ifdef SQLITE_TEST
6461 ** The start of buffer (a/n) contains the start of a doclist. The doclist
6462 ** may or may not finish within the buffer. This function appends a text
6463 ** representation of the part of the doclist that is present to buffer
6464 ** pBuf.
6466 ** The return value is the number of bytes read from the input buffer.
6468 static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
6469 i64 iDocid = 0;
6470 int iOff = 0;
6472 if( n>0 ){
6473 iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid);
6474 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
6476 while( iOff<n ){
6477 int nPos;
6478 int bDel;
6479 iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel);
6480 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s", nPos, bDel?"*":"");
6481 iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos));
6482 if( iOff<n ){
6483 i64 iDelta;
6484 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
6485 iDocid += iDelta;
6486 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
6490 return iOff;
6492 #endif /* SQLITE_TEST */
6494 #ifdef SQLITE_TEST
6496 ** This function is part of the fts5_decode() debugging function. It is
6497 ** only ever used with detail=none tables.
6499 ** Buffer (pData/nData) contains a doclist in the format used by detail=none
6500 ** tables. This function appends a human-readable version of that list to
6501 ** buffer pBuf.
6503 ** If *pRc is other than SQLITE_OK when this function is called, it is a
6504 ** no-op. If an OOM or other error occurs within this function, *pRc is
6505 ** set to an SQLite error code before returning. The final state of buffer
6506 ** pBuf is undefined in this case.
6508 static void fts5DecodeRowidList(
6509 int *pRc, /* IN/OUT: Error code */
6510 Fts5Buffer *pBuf, /* Buffer to append text to */
6511 const u8 *pData, int nData /* Data to decode list-of-rowids from */
6513 int i = 0;
6514 i64 iRowid = 0;
6516 while( i<nData ){
6517 const char *zApp = "";
6518 u64 iVal;
6519 i += sqlite3Fts5GetVarint(&pData[i], &iVal);
6520 iRowid += iVal;
6522 if( i<nData && pData[i]==0x00 ){
6523 i++;
6524 if( i<nData && pData[i]==0x00 ){
6525 i++;
6526 zApp = "+";
6527 }else{
6528 zApp = "*";
6532 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp);
6535 #endif /* SQLITE_TEST */
6537 #ifdef SQLITE_TEST
6539 ** The implementation of user-defined scalar function fts5_decode().
6541 static void fts5DecodeFunction(
6542 sqlite3_context *pCtx, /* Function call context */
6543 int nArg, /* Number of args (always 2) */
6544 sqlite3_value **apVal /* Function arguments */
6546 i64 iRowid; /* Rowid for record being decoded */
6547 int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */
6548 const u8 *aBlob; int n; /* Record to decode */
6549 u8 *a = 0;
6550 Fts5Buffer s; /* Build up text to return here */
6551 int rc = SQLITE_OK; /* Return code */
6552 sqlite3_int64 nSpace = 0;
6553 int eDetailNone = (sqlite3_user_data(pCtx)!=0);
6555 assert( nArg==2 );
6556 UNUSED_PARAM(nArg);
6557 memset(&s, 0, sizeof(Fts5Buffer));
6558 iRowid = sqlite3_value_int64(apVal[0]);
6560 /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[]
6561 ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
6562 ** buffer overreads even if the record is corrupt. */
6563 n = sqlite3_value_bytes(apVal[1]);
6564 aBlob = sqlite3_value_blob(apVal[1]);
6565 nSpace = n + FTS5_DATA_ZERO_PADDING;
6566 a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
6567 if( a==0 ) goto decode_out;
6568 if( n>0 ) memcpy(a, aBlob, n);
6570 fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno);
6572 fts5DebugRowid(&rc, &s, iRowid);
6573 if( bDlidx ){
6574 Fts5Data dlidx;
6575 Fts5DlidxLvl lvl;
6577 dlidx.p = a;
6578 dlidx.nn = n;
6580 memset(&lvl, 0, sizeof(Fts5DlidxLvl));
6581 lvl.pData = &dlidx;
6582 lvl.iLeafPgno = iPgno;
6584 for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){
6585 sqlite3Fts5BufferAppendPrintf(&rc, &s,
6586 " %d(%lld)", lvl.iLeafPgno, lvl.iRowid
6589 }else if( iSegid==0 ){
6590 if( iRowid==FTS5_AVERAGES_ROWID ){
6591 fts5DecodeAverages(&rc, &s, a, n);
6592 }else{
6593 fts5DecodeStructure(&rc, &s, a, n);
6595 }else if( eDetailNone ){
6596 Fts5Buffer term; /* Current term read from page */
6597 int szLeaf;
6598 int iPgidxOff = szLeaf = fts5GetU16(&a[2]);
6599 int iTermOff;
6600 int nKeep = 0;
6601 int iOff;
6603 memset(&term, 0, sizeof(Fts5Buffer));
6605 /* Decode any entries that occur before the first term. */
6606 if( szLeaf<n ){
6607 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff);
6608 }else{
6609 iTermOff = szLeaf;
6611 fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4);
6613 iOff = iTermOff;
6614 while( iOff<szLeaf ){
6615 int nAppend;
6617 /* Read the term data for the next term*/
6618 iOff += fts5GetVarint32(&a[iOff], nAppend);
6619 term.n = nKeep;
6620 fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]);
6621 sqlite3Fts5BufferAppendPrintf(
6622 &rc, &s, " term=%.*s", term.n, (const char*)term.p
6624 iOff += nAppend;
6626 /* Figure out where the doclist for this term ends */
6627 if( iPgidxOff<n ){
6628 int nIncr;
6629 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr);
6630 iTermOff += nIncr;
6631 }else{
6632 iTermOff = szLeaf;
6635 fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff);
6636 iOff = iTermOff;
6637 if( iOff<szLeaf ){
6638 iOff += fts5GetVarint32(&a[iOff], nKeep);
6642 fts5BufferFree(&term);
6643 }else{
6644 Fts5Buffer term; /* Current term read from page */
6645 int szLeaf; /* Offset of pgidx in a[] */
6646 int iPgidxOff;
6647 int iPgidxPrev = 0; /* Previous value read from pgidx */
6648 int iTermOff = 0;
6649 int iRowidOff = 0;
6650 int iOff;
6651 int nDoclist;
6653 memset(&term, 0, sizeof(Fts5Buffer));
6655 if( n<4 ){
6656 sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt");
6657 goto decode_out;
6658 }else{
6659 iRowidOff = fts5GetU16(&a[0]);
6660 iPgidxOff = szLeaf = fts5GetU16(&a[2]);
6661 if( iPgidxOff<n ){
6662 fts5GetVarint32(&a[iPgidxOff], iTermOff);
6663 }else if( iPgidxOff>n ){
6664 rc = FTS5_CORRUPT;
6665 goto decode_out;
6669 /* Decode the position list tail at the start of the page */
6670 if( iRowidOff!=0 ){
6671 iOff = iRowidOff;
6672 }else if( iTermOff!=0 ){
6673 iOff = iTermOff;
6674 }else{
6675 iOff = szLeaf;
6677 if( iOff>n ){
6678 rc = FTS5_CORRUPT;
6679 goto decode_out;
6681 fts5DecodePoslist(&rc, &s, &a[4], iOff-4);
6683 /* Decode any more doclist data that appears on the page before the
6684 ** first term. */
6685 nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
6686 if( nDoclist+iOff>n ){
6687 rc = FTS5_CORRUPT;
6688 goto decode_out;
6690 fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);
6692 while( iPgidxOff<n && rc==SQLITE_OK ){
6693 int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */
6694 int nByte; /* Bytes of data */
6695 int iEnd;
6697 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte);
6698 iPgidxPrev += nByte;
6699 iOff = iPgidxPrev;
6701 if( iPgidxOff<n ){
6702 fts5GetVarint32(&a[iPgidxOff], nByte);
6703 iEnd = iPgidxPrev + nByte;
6704 }else{
6705 iEnd = szLeaf;
6707 if( iEnd>szLeaf ){
6708 rc = FTS5_CORRUPT;
6709 break;
6712 if( bFirst==0 ){
6713 iOff += fts5GetVarint32(&a[iOff], nByte);
6714 if( nByte>term.n ){
6715 rc = FTS5_CORRUPT;
6716 break;
6718 term.n = nByte;
6720 iOff += fts5GetVarint32(&a[iOff], nByte);
6721 if( iOff+nByte>n ){
6722 rc = FTS5_CORRUPT;
6723 break;
6725 fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
6726 iOff += nByte;
6728 sqlite3Fts5BufferAppendPrintf(
6729 &rc, &s, " term=%.*s", term.n, (const char*)term.p
6731 iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);
6734 fts5BufferFree(&term);
6737 decode_out:
6738 sqlite3_free(a);
6739 if( rc==SQLITE_OK ){
6740 sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT);
6741 }else{
6742 sqlite3_result_error_code(pCtx, rc);
6744 fts5BufferFree(&s);
6746 #endif /* SQLITE_TEST */
6748 #ifdef SQLITE_TEST
6750 ** The implementation of user-defined scalar function fts5_rowid().
6752 static void fts5RowidFunction(
6753 sqlite3_context *pCtx, /* Function call context */
6754 int nArg, /* Number of args (always 2) */
6755 sqlite3_value **apVal /* Function arguments */
6757 const char *zArg;
6758 if( nArg==0 ){
6759 sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1);
6760 }else{
6761 zArg = (const char*)sqlite3_value_text(apVal[0]);
6762 if( 0==sqlite3_stricmp(zArg, "segment") ){
6763 i64 iRowid;
6764 int segid, pgno;
6765 if( nArg!=3 ){
6766 sqlite3_result_error(pCtx,
6767 "should be: fts5_rowid('segment', segid, pgno))", -1
6769 }else{
6770 segid = sqlite3_value_int(apVal[1]);
6771 pgno = sqlite3_value_int(apVal[2]);
6772 iRowid = FTS5_SEGMENT_ROWID(segid, pgno);
6773 sqlite3_result_int64(pCtx, iRowid);
6775 }else{
6776 sqlite3_result_error(pCtx,
6777 "first arg to fts5_rowid() must be 'segment'" , -1
6782 #endif /* SQLITE_TEST */
6785 ** This is called as part of registering the FTS5 module with database
6786 ** connection db. It registers several user-defined scalar functions useful
6787 ** with FTS5.
6789 ** If successful, SQLITE_OK is returned. If an error occurs, some other
6790 ** SQLite error code is returned instead.
6792 int sqlite3Fts5IndexInit(sqlite3 *db){
6793 #ifdef SQLITE_TEST
6794 int rc = sqlite3_create_function(
6795 db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0
6798 if( rc==SQLITE_OK ){
6799 rc = sqlite3_create_function(
6800 db, "fts5_decode_none", 2,
6801 SQLITE_UTF8, (void*)db, fts5DecodeFunction, 0, 0
6805 if( rc==SQLITE_OK ){
6806 rc = sqlite3_create_function(
6807 db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0
6810 return rc;
6811 #else
6812 return SQLITE_OK;
6813 UNUSED_PARAM(db);
6814 #endif
6818 int sqlite3Fts5IndexReset(Fts5Index *p){
6819 assert( p->pStruct==0 || p->iStructVersion!=0 );
6820 if( fts5IndexDataVersion(p)!=p->iStructVersion ){
6821 fts5StructureInvalidate(p);
6823 return fts5IndexReturn(p);