Snapshot of upstream SQLite 3.42.0
[sqlcipher.git] / ext / fts5 / fts5_index.c
blobeaeeeff4f7469d13c11011df7ab62cfb65b577b7
1 /*
2 ** 2014 May 31
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
13 ** Low level access to the FTS index stored in the database file. The
14 ** routines in this file file implement all read and write access to the
15 ** %_data table. Other parts of the system access this functionality via
16 ** the interface defined in fts5Int.h.
20 #include "fts5Int.h"
23 ** Overview:
25 ** The %_data table contains all the FTS indexes for an FTS5 virtual table.
26 ** As well as the main term index, there may be up to 31 prefix indexes.
27 ** The format is similar to FTS3/4, except that:
29 ** * all segment b-tree leaf data is stored in fixed size page records
30 ** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is
31 ** taken to ensure it is possible to iterate in either direction through
32 ** the entries in a doclist, or to seek to a specific entry within a
33 ** doclist, without loading it into memory.
35 ** * large doclists that span many pages have associated "doclist index"
36 ** records that contain a copy of the first rowid on each page spanned by
37 ** the doclist. This is used to speed up seek operations, and merges of
38 ** large doclists with very small doclists.
40 ** * extra fields in the "structure record" record the state of ongoing
41 ** incremental merge operations.
46 #define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */
47 #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */
49 #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */
51 #define FTS5_MAIN_PREFIX '0'
53 #if FTS5_MAX_PREFIX_INDEXES > 31
54 # error "FTS5_MAX_PREFIX_INDEXES is too large"
55 #endif
57 #define FTS5_MAX_LEVEL 64
60 ** Details:
62 ** The %_data table managed by this module,
64 ** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB);
66 ** , contains the following 5 types of records. See the comments surrounding
67 ** the FTS5_*_ROWID macros below for a description of how %_data rowids are
68 ** assigned to each fo them.
70 ** 1. Structure Records:
72 ** The set of segments that make up an index - the index structure - are
73 ** recorded in a single record within the %_data table. The record consists
74 ** of a single 32-bit configuration cookie value followed by a list of
75 ** SQLite varints. If the FTS table features more than one index (because
76 ** there are one or more prefix indexes), it is guaranteed that all share
77 ** the same cookie value.
79 ** Immediately following the configuration cookie, the record begins with
80 ** three varints:
82 ** + number of levels,
83 ** + total number of segments on all levels,
84 ** + value of write counter.
86 ** Then, for each level from 0 to nMax:
88 ** + number of input segments in ongoing merge.
89 ** + total number of segments in level.
90 ** + for each segment from oldest to newest:
91 ** + segment id (always > 0)
92 ** + first leaf page number (often 1, always greater than 0)
93 ** + final leaf page number
95 ** 2. The Averages Record:
97 ** A single record within the %_data table. The data is a list of varints.
98 ** The first value is the number of rows in the index. Then, for each column
99 ** from left to right, the total number of tokens in the column for all
100 ** rows of the table.
102 ** 3. Segment leaves:
104 ** TERM/DOCLIST FORMAT:
106 ** Most of each segment leaf is taken up by term/doclist data. The
107 ** general format of term/doclist, starting with the first term
108 ** on the leaf page, is:
110 ** varint : size of first term
111 ** blob: first term data
112 ** doclist: first doclist
113 ** zero-or-more {
114 ** varint: number of bytes in common with previous term
115 ** varint: number of bytes of new term data (nNew)
116 ** blob: nNew bytes of new term data
117 ** doclist: next doclist
118 ** }
120 ** doclist format:
122 ** varint: first rowid
123 ** poslist: first poslist
124 ** zero-or-more {
125 ** varint: rowid delta (always > 0)
126 ** poslist: next poslist
127 ** }
129 ** poslist format:
131 ** varint: size of poslist in bytes multiplied by 2, not including
132 ** this field. Plus 1 if this entry carries the "delete" flag.
133 ** collist: collist for column 0
134 ** zero-or-more {
135 ** 0x01 byte
136 ** varint: column number (I)
137 ** collist: collist for column I
138 ** }
140 ** collist format:
142 ** varint: first offset + 2
143 ** zero-or-more {
144 ** varint: offset delta + 2
145 ** }
147 ** PAGE FORMAT
149 ** Each leaf page begins with a 4-byte header containing 2 16-bit
150 ** unsigned integer fields in big-endian format. They are:
152 ** * The byte offset of the first rowid on the page, if it exists
153 ** and occurs before the first term (otherwise 0).
155 ** * The byte offset of the start of the page footer. If the page
156 ** footer is 0 bytes in size, then this field is the same as the
157 ** size of the leaf page in bytes.
159 ** The page footer consists of a single varint for each term located
160 ** on the page. Each varint is the byte offset of the current term
161 ** within the page, delta-compressed against the previous value. In
162 ** other words, the first varint in the footer is the byte offset of
163 ** the first term, the second is the byte offset of the second less that
164 ** of the first, and so on.
166 ** The term/doclist format described above is accurate if the entire
167 ** term/doclist data fits on a single leaf page. If this is not the case,
168 ** the format is changed in two ways:
170 ** + if the first rowid on a page occurs before the first term, it
171 ** is stored as a literal value:
173 ** varint: first rowid
175 ** + the first term on each page is stored in the same way as the
176 ** very first term of the segment:
178 ** varint : size of first term
179 ** blob: first term data
181 ** 5. Segment doclist indexes:
183 ** Doclist indexes are themselves b-trees, however they usually consist of
184 ** a single leaf record only. The format of each doclist index leaf page
185 ** is:
187 ** * Flags byte. Bits are:
188 ** 0x01: Clear if leaf is also the root page, otherwise set.
190 ** * Page number of fts index leaf page. As a varint.
192 ** * First rowid on page indicated by previous field. As a varint.
194 ** * A list of varints, one for each subsequent termless page. A
195 ** positive delta if the termless page contains at least one rowid,
196 ** or an 0x00 byte otherwise.
198 ** Internal doclist index nodes are:
200 ** * Flags byte. Bits are:
201 ** 0x01: Clear for root page, otherwise set.
203 ** * Page number of first child page. As a varint.
205 ** * Copy of first rowid on page indicated by previous field. As a varint.
207 ** * A list of delta-encoded varints - the first rowid on each subsequent
208 ** child page.
213 ** Rowids for the averages and structure records in the %_data table.
215 #define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */
216 #define FTS5_STRUCTURE_ROWID 10 /* The structure record */
219 ** Macros determining the rowids used by segment leaves and dlidx leaves
220 ** and nodes. All nodes and leaves are stored in the %_data table with large
221 ** positive rowids.
223 ** Each segment has a unique non-zero 16-bit id.
225 ** The rowid for each segment leaf is found by passing the segment id and
226 ** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered
227 ** sequentially starting from 1.
229 #define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */
230 #define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */
231 #define FTS5_DATA_HEIGHT_B 5 /* Max dlidx tree height of 32 */
232 #define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */
234 #define fts5_dri(segid, dlidx, height, pgno) ( \
235 ((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \
236 ((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \
237 ((i64)(height) << (FTS5_DATA_PAGE_B)) + \
238 ((i64)(pgno)) \
241 #define FTS5_SEGMENT_ROWID(segid, pgno) fts5_dri(segid, 0, 0, pgno)
242 #define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)
244 #ifdef SQLITE_DEBUG
245 int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
246 #endif
250 ** Each time a blob is read from the %_data table, it is padded with this
251 ** many zero bytes. This makes it easier to decode the various record formats
252 ** without overreading if the records are corrupt.
254 #define FTS5_DATA_ZERO_PADDING 8
255 #define FTS5_DATA_PADDING 20
257 typedef struct Fts5Data Fts5Data;
258 typedef struct Fts5DlidxIter Fts5DlidxIter;
259 typedef struct Fts5DlidxLvl Fts5DlidxLvl;
260 typedef struct Fts5DlidxWriter Fts5DlidxWriter;
261 typedef struct Fts5Iter Fts5Iter;
262 typedef struct Fts5PageWriter Fts5PageWriter;
263 typedef struct Fts5SegIter Fts5SegIter;
264 typedef struct Fts5DoclistIter Fts5DoclistIter;
265 typedef struct Fts5SegWriter Fts5SegWriter;
266 typedef struct Fts5Structure Fts5Structure;
267 typedef struct Fts5StructureLevel Fts5StructureLevel;
268 typedef struct Fts5StructureSegment Fts5StructureSegment;
270 struct Fts5Data {
271 u8 *p; /* Pointer to buffer containing record */
272 int nn; /* Size of record in bytes */
273 int szLeaf; /* Size of leaf without page-index */
277 ** One object per %_data table.
279 struct Fts5Index {
280 Fts5Config *pConfig; /* Virtual table configuration */
281 char *zDataTbl; /* Name of %_data table */
282 int nWorkUnit; /* Leaf pages in a "unit" of work */
285 ** Variables related to the accumulation of tokens and doclists within the
286 ** in-memory hash tables before they are flushed to disk.
288 Fts5Hash *pHash; /* Hash table for in-memory data */
289 int nPendingData; /* Current bytes of pending data */
290 i64 iWriteRowid; /* Rowid for current doc being written */
291 int bDelete; /* Current write is a delete */
293 /* Error state. */
294 int rc; /* Current error code */
296 /* State used by the fts5DataXXX() functions. */
297 sqlite3_blob *pReader; /* RO incr-blob open on %_data table */
298 sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */
299 sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */
300 sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */
301 sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=?" */
302 sqlite3_stmt *pIdxSelect;
303 int nRead; /* Total number of blocks read */
305 sqlite3_stmt *pDeleteFromIdx;
307 sqlite3_stmt *pDataVersion;
308 i64 iStructVersion; /* data_version when pStruct read */
309 Fts5Structure *pStruct; /* Current db structure (or NULL) */
312 struct Fts5DoclistIter {
313 u8 *aEof; /* Pointer to 1 byte past end of doclist */
315 /* Output variables. aPoslist==0 at EOF */
316 i64 iRowid;
317 u8 *aPoslist;
318 int nPoslist;
319 int nSize;
323 ** The contents of the "structure" record for each index are represented
324 ** using an Fts5Structure record in memory. Which uses instances of the
325 ** other Fts5StructureXXX types as components.
327 struct Fts5StructureSegment {
328 int iSegid; /* Segment id */
329 int pgnoFirst; /* First leaf page number in segment */
330 int pgnoLast; /* Last leaf page number in segment */
332 struct Fts5StructureLevel {
333 int nMerge; /* Number of segments in incr-merge */
334 int nSeg; /* Total number of segments on level */
335 Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */
337 struct Fts5Structure {
338 int nRef; /* Object reference count */
339 u64 nWriteCounter; /* Total leaves written to level 0 */
340 int nSegment; /* Total segments in this structure */
341 int nLevel; /* Number of levels in this index */
342 Fts5StructureLevel aLevel[1]; /* Array of nLevel level objects */
346 ** An object of type Fts5SegWriter is used to write to segments.
348 struct Fts5PageWriter {
349 int pgno; /* Page number for this page */
350 int iPrevPgidx; /* Previous value written into pgidx */
351 Fts5Buffer buf; /* Buffer containing leaf data */
352 Fts5Buffer pgidx; /* Buffer containing page-index */
353 Fts5Buffer term; /* Buffer containing previous term on page */
355 struct Fts5DlidxWriter {
356 int pgno; /* Page number for this page */
357 int bPrevValid; /* True if iPrev is valid */
358 i64 iPrev; /* Previous rowid value written to page */
359 Fts5Buffer buf; /* Buffer containing page data */
361 struct Fts5SegWriter {
362 int iSegid; /* Segid to write to */
363 Fts5PageWriter writer; /* PageWriter object */
364 i64 iPrevRowid; /* Previous rowid written to current leaf */
365 u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */
366 u8 bFirstRowidInPage; /* True if next rowid is first in page */
367 /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
368 u8 bFirstTermInPage; /* True if next term will be first in leaf */
369 int nLeafWritten; /* Number of leaf pages written */
370 int nEmpty; /* Number of contiguous term-less nodes */
372 int nDlidx; /* Allocated size of aDlidx[] array */
373 Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */
375 /* Values to insert into the %_idx table */
376 Fts5Buffer btterm; /* Next term to insert into %_idx table */
377 int iBtPage; /* Page number corresponding to btterm */
380 typedef struct Fts5CResult Fts5CResult;
381 struct Fts5CResult {
382 u16 iFirst; /* aSeg[] index of firstest iterator */
383 u8 bTermEq; /* True if the terms are equal */
387 ** Object for iterating through a single segment, visiting each term/rowid
388 ** pair in the segment.
390 ** pSeg:
391 ** The segment to iterate through.
393 ** iLeafPgno:
394 ** Current leaf page number within segment.
396 ** iLeafOffset:
397 ** Byte offset within the current leaf that is the first byte of the
398 ** position list data (one byte passed the position-list size field).
400 ** pLeaf:
401 ** Buffer containing current leaf page data. Set to NULL at EOF.
403 ** iTermLeafPgno, iTermLeafOffset:
404 ** Leaf page number containing the last term read from the segment. And
405 ** the offset immediately following the term data.
407 ** flags:
408 ** Mask of FTS5_SEGITER_XXX values. Interpreted as follows:
410 ** FTS5_SEGITER_ONETERM:
411 ** If set, set the iterator to point to EOF after the current doclist
412 ** has been exhausted. Do not proceed to the next term in the segment.
414 ** FTS5_SEGITER_REVERSE:
415 ** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If
416 ** it is set, iterate through rowid in descending order instead of the
417 ** default ascending order.
419 ** iRowidOffset/nRowidOffset/aRowidOffset:
420 ** These are used if the FTS5_SEGITER_REVERSE flag is set.
422 ** For each rowid on the page corresponding to the current term, the
423 ** corresponding aRowidOffset[] entry is set to the byte offset of the
424 ** start of the "position-list-size" field within the page.
426 ** iTermIdx:
427 ** Index of current term on iTermLeafPgno.
429 struct Fts5SegIter {
430 Fts5StructureSegment *pSeg; /* Segment to iterate through */
431 int flags; /* Mask of configuration flags */
432 int iLeafPgno; /* Current leaf page number */
433 Fts5Data *pLeaf; /* Current leaf data */
434 Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */
435 i64 iLeafOffset; /* Byte offset within current leaf */
437 /* Next method */
438 void (*xNext)(Fts5Index*, Fts5SegIter*, int*);
440 /* The page and offset from which the current term was read. The offset
441 ** is the offset of the first rowid in the current doclist. */
442 int iTermLeafPgno;
443 int iTermLeafOffset;
445 int iPgidxOff; /* Next offset in pgidx */
446 int iEndofDoclist;
448 /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
449 int iRowidOffset; /* Current entry in aRowidOffset[] */
450 int nRowidOffset; /* Allocated size of aRowidOffset[] array */
451 int *aRowidOffset; /* Array of offset to rowid fields */
453 Fts5DlidxIter *pDlidx; /* If there is a doclist-index */
455 /* Variables populated based on current entry. */
456 Fts5Buffer term; /* Current term */
457 i64 iRowid; /* Current rowid */
458 int nPos; /* Number of bytes in current position list */
459 u8 bDel; /* True if the delete flag is set */
463 ** Argument is a pointer to an Fts5Data structure that contains a
464 ** leaf page.
466 #define ASSERT_SZLEAF_OK(x) assert( \
467 (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \
470 #define FTS5_SEGITER_ONETERM 0x01
471 #define FTS5_SEGITER_REVERSE 0x02
474 ** Argument is a pointer to an Fts5Data structure that contains a leaf
475 ** page. This macro evaluates to true if the leaf contains no terms, or
476 ** false if it contains at least one term.
478 #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)
480 #define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))
482 #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))
485 ** Object for iterating through the merged results of one or more segments,
486 ** visiting each term/rowid pair in the merged data.
488 ** nSeg is always a power of two greater than or equal to the number of
489 ** segments that this object is merging data from. Both the aSeg[] and
490 ** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded
491 ** with zeroed objects - these are handled as if they were iterators opened
492 ** on empty segments.
494 ** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an
495 ** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the
496 ** comparison in this context is the index of the iterator that currently
497 ** points to the smaller term/rowid combination. Iterators at EOF are
498 ** considered to be greater than all other iterators.
500 ** aFirst[1] contains the index in aSeg[] of the iterator that points to
501 ** the smallest key overall. aFirst[0] is unused.
503 ** poslist:
504 ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
505 ** There is no way to tell if this is populated or not.
507 struct Fts5Iter {
508 Fts5IndexIter base; /* Base class containing output vars */
510 Fts5Index *pIndex; /* Index that owns this iterator */
511 Fts5Buffer poslist; /* Buffer containing current poslist */
512 Fts5Colset *pColset; /* Restrict matches to these columns */
514 /* Invoked to set output variables. */
515 void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*);
517 int nSeg; /* Size of aSeg[] array */
518 int bRev; /* True to iterate in reverse order */
519 u8 bSkipEmpty; /* True to skip deleted entries */
521 i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */
522 Fts5CResult *aFirst; /* Current merge state (see above) */
523 Fts5SegIter aSeg[1]; /* Array of segment iterators */
528 ** An instance of the following type is used to iterate through the contents
529 ** of a doclist-index record.
531 ** pData:
532 ** Record containing the doclist-index data.
534 ** bEof:
535 ** Set to true once iterator has reached EOF.
537 ** iOff:
538 ** Set to the current offset within record pData.
540 struct Fts5DlidxLvl {
541 Fts5Data *pData; /* Data for current page of this level */
542 int iOff; /* Current offset into pData */
543 int bEof; /* At EOF already */
544 int iFirstOff; /* Used by reverse iterators */
546 /* Output variables */
547 int iLeafPgno; /* Page number of current leaf page */
548 i64 iRowid; /* First rowid on leaf iLeafPgno */
550 struct Fts5DlidxIter {
551 int nLvl;
552 int iSegid;
553 Fts5DlidxLvl aLvl[1];
556 static void fts5PutU16(u8 *aOut, u16 iVal){
557 aOut[0] = (iVal>>8);
558 aOut[1] = (iVal&0xFF);
561 static u16 fts5GetU16(const u8 *aIn){
562 return ((u16)aIn[0] << 8) + aIn[1];
566 ** Allocate and return a buffer at least nByte bytes in size.
568 ** If an OOM error is encountered, return NULL and set the error code in
569 ** the Fts5Index handle passed as the first argument.
571 static void *fts5IdxMalloc(Fts5Index *p, sqlite3_int64 nByte){
572 return sqlite3Fts5MallocZero(&p->rc, nByte);
576 ** Compare the contents of the pLeft buffer with the pRight/nRight blob.
578 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
579 ** +ve if pRight is smaller than pLeft. In other words:
581 ** res = *pLeft - *pRight
583 #ifdef SQLITE_DEBUG
584 static int fts5BufferCompareBlob(
585 Fts5Buffer *pLeft, /* Left hand side of comparison */
586 const u8 *pRight, int nRight /* Right hand side of comparison */
588 int nCmp = MIN(pLeft->n, nRight);
589 int res = memcmp(pLeft->p, pRight, nCmp);
590 return (res==0 ? (pLeft->n - nRight) : res);
592 #endif
595 ** Compare the contents of the two buffers using memcmp(). If one buffer
596 ** is a prefix of the other, it is considered the lesser.
598 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
599 ** +ve if pRight is smaller than pLeft. In other words:
601 ** res = *pLeft - *pRight
603 static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){
604 int nCmp, res;
605 nCmp = MIN(pLeft->n, pRight->n);
606 assert( nCmp<=0 || pLeft->p!=0 );
607 assert( nCmp<=0 || pRight->p!=0 );
608 res = fts5Memcmp(pLeft->p, pRight->p, nCmp);
609 return (res==0 ? (pLeft->n - pRight->n) : res);
612 static int fts5LeafFirstTermOff(Fts5Data *pLeaf){
613 int ret;
614 fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret);
615 return ret;
619 ** Close the read-only blob handle, if it is open.
621 void sqlite3Fts5IndexCloseReader(Fts5Index *p){
622 if( p->pReader ){
623 sqlite3_blob *pReader = p->pReader;
624 p->pReader = 0;
625 sqlite3_blob_close(pReader);
630 ** Retrieve a record from the %_data table.
632 ** If an error occurs, NULL is returned and an error left in the
633 ** Fts5Index object.
635 static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
636 Fts5Data *pRet = 0;
637 if( p->rc==SQLITE_OK ){
638 int rc = SQLITE_OK;
640 if( p->pReader ){
641 /* This call may return SQLITE_ABORT if there has been a savepoint
642 ** rollback since it was last used. In this case a new blob handle
643 ** is required. */
644 sqlite3_blob *pBlob = p->pReader;
645 p->pReader = 0;
646 rc = sqlite3_blob_reopen(pBlob, iRowid);
647 assert( p->pReader==0 );
648 p->pReader = pBlob;
649 if( rc!=SQLITE_OK ){
650 sqlite3Fts5IndexCloseReader(p);
652 if( rc==SQLITE_ABORT ) rc = SQLITE_OK;
655 /* If the blob handle is not open at this point, open it and seek
656 ** to the requested entry. */
657 if( p->pReader==0 && rc==SQLITE_OK ){
658 Fts5Config *pConfig = p->pConfig;
659 rc = sqlite3_blob_open(pConfig->db,
660 pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader
664 /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls
665 ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead.
666 ** All the reasons those functions might return SQLITE_ERROR - missing
667 ** table, missing row, non-blob/text in block column - indicate
668 ** backing store corruption. */
669 if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT;
671 if( rc==SQLITE_OK ){
672 u8 *aOut = 0; /* Read blob data into this buffer */
673 int nByte = sqlite3_blob_bytes(p->pReader);
674 sqlite3_int64 nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING;
675 pRet = (Fts5Data*)sqlite3_malloc64(nAlloc);
676 if( pRet ){
677 pRet->nn = nByte;
678 aOut = pRet->p = (u8*)&pRet[1];
679 }else{
680 rc = SQLITE_NOMEM;
683 if( rc==SQLITE_OK ){
684 rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0);
686 if( rc!=SQLITE_OK ){
687 sqlite3_free(pRet);
688 pRet = 0;
689 }else{
690 /* TODO1: Fix this */
691 pRet->p[nByte] = 0x00;
692 pRet->p[nByte+1] = 0x00;
693 pRet->szLeaf = fts5GetU16(&pRet->p[2]);
696 p->rc = rc;
697 p->nRead++;
700 assert( (pRet==0)==(p->rc!=SQLITE_OK) );
701 return pRet;
706 ** Release a reference to data record returned by an earlier call to
707 ** fts5DataRead().
709 static void fts5DataRelease(Fts5Data *pData){
710 sqlite3_free(pData);
713 static Fts5Data *fts5LeafRead(Fts5Index *p, i64 iRowid){
714 Fts5Data *pRet = fts5DataRead(p, iRowid);
715 if( pRet ){
716 if( pRet->nn<4 || pRet->szLeaf>pRet->nn ){
717 p->rc = FTS5_CORRUPT;
718 fts5DataRelease(pRet);
719 pRet = 0;
722 return pRet;
725 static int fts5IndexPrepareStmt(
726 Fts5Index *p,
727 sqlite3_stmt **ppStmt,
728 char *zSql
730 if( p->rc==SQLITE_OK ){
731 if( zSql ){
732 p->rc = sqlite3_prepare_v3(p->pConfig->db, zSql, -1,
733 SQLITE_PREPARE_PERSISTENT|SQLITE_PREPARE_NO_VTAB,
734 ppStmt, 0);
735 }else{
736 p->rc = SQLITE_NOMEM;
739 sqlite3_free(zSql);
740 return p->rc;
745 ** INSERT OR REPLACE a record into the %_data table.
747 static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){
748 if( p->rc!=SQLITE_OK ) return;
750 if( p->pWriter==0 ){
751 Fts5Config *pConfig = p->pConfig;
752 fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintf(
753 "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)",
754 pConfig->zDb, pConfig->zName
756 if( p->rc ) return;
759 sqlite3_bind_int64(p->pWriter, 1, iRowid);
760 sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC);
761 sqlite3_step(p->pWriter);
762 p->rc = sqlite3_reset(p->pWriter);
763 sqlite3_bind_null(p->pWriter, 2);
767 ** Execute the following SQL:
769 ** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast
771 static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){
772 if( p->rc!=SQLITE_OK ) return;
774 if( p->pDeleter==0 ){
775 Fts5Config *pConfig = p->pConfig;
776 char *zSql = sqlite3_mprintf(
777 "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?",
778 pConfig->zDb, pConfig->zName
780 if( fts5IndexPrepareStmt(p, &p->pDeleter, zSql) ) return;
783 sqlite3_bind_int64(p->pDeleter, 1, iFirst);
784 sqlite3_bind_int64(p->pDeleter, 2, iLast);
785 sqlite3_step(p->pDeleter);
786 p->rc = sqlite3_reset(p->pDeleter);
790 ** Remove all records associated with segment iSegid.
792 static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){
793 i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0);
794 i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1;
795 fts5DataDelete(p, iFirst, iLast);
796 if( p->pIdxDeleter==0 ){
797 Fts5Config *pConfig = p->pConfig;
798 fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf(
799 "DELETE FROM '%q'.'%q_idx' WHERE segid=?",
800 pConfig->zDb, pConfig->zName
803 if( p->rc==SQLITE_OK ){
804 sqlite3_bind_int(p->pIdxDeleter, 1, iSegid);
805 sqlite3_step(p->pIdxDeleter);
806 p->rc = sqlite3_reset(p->pIdxDeleter);
811 ** Release a reference to an Fts5Structure object returned by an earlier
812 ** call to fts5StructureRead() or fts5StructureDecode().
814 static void fts5StructureRelease(Fts5Structure *pStruct){
815 if( pStruct && 0>=(--pStruct->nRef) ){
816 int i;
817 assert( pStruct->nRef==0 );
818 for(i=0; i<pStruct->nLevel; i++){
819 sqlite3_free(pStruct->aLevel[i].aSeg);
821 sqlite3_free(pStruct);
825 static void fts5StructureRef(Fts5Structure *pStruct){
826 pStruct->nRef++;
829 void *sqlite3Fts5StructureRef(Fts5Index *p){
830 fts5StructureRef(p->pStruct);
831 return (void*)p->pStruct;
833 void sqlite3Fts5StructureRelease(void *p){
834 if( p ){
835 fts5StructureRelease((Fts5Structure*)p);
838 int sqlite3Fts5StructureTest(Fts5Index *p, void *pStruct){
839 if( p->pStruct!=(Fts5Structure*)pStruct ){
840 return SQLITE_ABORT;
842 return SQLITE_OK;
846 ** Ensure that structure object (*pp) is writable.
848 ** This function is a no-op if (*pRc) is not SQLITE_OK when it is called. If
849 ** an error occurs, (*pRc) is set to an SQLite error code before returning.
851 static void fts5StructureMakeWritable(int *pRc, Fts5Structure **pp){
852 Fts5Structure *p = *pp;
853 if( *pRc==SQLITE_OK && p->nRef>1 ){
854 i64 nByte = sizeof(Fts5Structure)+(p->nLevel-1)*sizeof(Fts5StructureLevel);
855 Fts5Structure *pNew;
856 pNew = (Fts5Structure*)sqlite3Fts5MallocZero(pRc, nByte);
857 if( pNew ){
858 int i;
859 memcpy(pNew, p, nByte);
860 for(i=0; i<p->nLevel; i++) pNew->aLevel[i].aSeg = 0;
861 for(i=0; i<p->nLevel; i++){
862 Fts5StructureLevel *pLvl = &pNew->aLevel[i];
863 nByte = sizeof(Fts5StructureSegment) * pNew->aLevel[i].nSeg;
864 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(pRc, nByte);
865 if( pLvl->aSeg==0 ){
866 for(i=0; i<p->nLevel; i++){
867 sqlite3_free(pNew->aLevel[i].aSeg);
869 sqlite3_free(pNew);
870 return;
872 memcpy(pLvl->aSeg, p->aLevel[i].aSeg, nByte);
874 p->nRef--;
875 pNew->nRef = 1;
877 *pp = pNew;
882 ** Deserialize and return the structure record currently stored in serialized
883 ** form within buffer pData/nData.
885 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
886 ** are over-allocated by one slot. This allows the structure contents
887 ** to be more easily edited.
889 ** If an error occurs, *ppOut is set to NULL and an SQLite error code
890 ** returned. Otherwise, *ppOut is set to point to the new object and
891 ** SQLITE_OK returned.
893 static int fts5StructureDecode(
894 const u8 *pData, /* Buffer containing serialized structure */
895 int nData, /* Size of buffer pData in bytes */
896 int *piCookie, /* Configuration cookie value */
897 Fts5Structure **ppOut /* OUT: Deserialized object */
899 int rc = SQLITE_OK;
900 int i = 0;
901 int iLvl;
902 int nLevel = 0;
903 int nSegment = 0;
904 sqlite3_int64 nByte; /* Bytes of space to allocate at pRet */
905 Fts5Structure *pRet = 0; /* Structure object to return */
907 /* Grab the cookie value */
908 if( piCookie ) *piCookie = sqlite3Fts5Get32(pData);
909 i = 4;
911 /* Read the total number of levels and segments from the start of the
912 ** structure record. */
913 i += fts5GetVarint32(&pData[i], nLevel);
914 i += fts5GetVarint32(&pData[i], nSegment);
915 if( nLevel>FTS5_MAX_SEGMENT || nLevel<0
916 || nSegment>FTS5_MAX_SEGMENT || nSegment<0
918 return FTS5_CORRUPT;
920 nByte = (
921 sizeof(Fts5Structure) + /* Main structure */
922 sizeof(Fts5StructureLevel) * (nLevel-1) /* aLevel[] array */
924 pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte);
926 if( pRet ){
927 pRet->nRef = 1;
928 pRet->nLevel = nLevel;
929 pRet->nSegment = nSegment;
930 i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter);
932 for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){
933 Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl];
934 int nTotal = 0;
935 int iSeg;
937 if( i>=nData ){
938 rc = FTS5_CORRUPT;
939 }else{
940 i += fts5GetVarint32(&pData[i], pLvl->nMerge);
941 i += fts5GetVarint32(&pData[i], nTotal);
942 if( nTotal<pLvl->nMerge ) rc = FTS5_CORRUPT;
943 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc,
944 nTotal * sizeof(Fts5StructureSegment)
946 nSegment -= nTotal;
949 if( rc==SQLITE_OK ){
950 pLvl->nSeg = nTotal;
951 for(iSeg=0; iSeg<nTotal; iSeg++){
952 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
953 if( i>=nData ){
954 rc = FTS5_CORRUPT;
955 break;
957 assert( pSeg!=0 );
958 i += fts5GetVarint32(&pData[i], pSeg->iSegid);
959 i += fts5GetVarint32(&pData[i], pSeg->pgnoFirst);
960 i += fts5GetVarint32(&pData[i], pSeg->pgnoLast);
961 if( pSeg->pgnoLast<pSeg->pgnoFirst ){
962 rc = FTS5_CORRUPT;
963 break;
966 if( iLvl>0 && pLvl[-1].nMerge && nTotal==0 ) rc = FTS5_CORRUPT;
967 if( iLvl==nLevel-1 && pLvl->nMerge ) rc = FTS5_CORRUPT;
970 if( nSegment!=0 && rc==SQLITE_OK ) rc = FTS5_CORRUPT;
972 if( rc!=SQLITE_OK ){
973 fts5StructureRelease(pRet);
974 pRet = 0;
978 *ppOut = pRet;
979 return rc;
983 ** Add a level to the Fts5Structure.aLevel[] array of structure object
984 ** (*ppStruct).
986 static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){
987 fts5StructureMakeWritable(pRc, ppStruct);
988 assert( (ppStruct!=0 && (*ppStruct)!=0) || (*pRc)!=SQLITE_OK );
989 if( *pRc==SQLITE_OK ){
990 Fts5Structure *pStruct = *ppStruct;
991 int nLevel = pStruct->nLevel;
992 sqlite3_int64 nByte = (
993 sizeof(Fts5Structure) + /* Main structure */
994 sizeof(Fts5StructureLevel) * (nLevel+1) /* aLevel[] array */
997 pStruct = sqlite3_realloc64(pStruct, nByte);
998 if( pStruct ){
999 memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel));
1000 pStruct->nLevel++;
1001 *ppStruct = pStruct;
1002 }else{
1003 *pRc = SQLITE_NOMEM;
1009 ** Extend level iLvl so that there is room for at least nExtra more
1010 ** segments.
1012 static void fts5StructureExtendLevel(
1013 int *pRc,
1014 Fts5Structure *pStruct,
1015 int iLvl,
1016 int nExtra,
1017 int bInsert
1019 if( *pRc==SQLITE_OK ){
1020 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
1021 Fts5StructureSegment *aNew;
1022 sqlite3_int64 nByte;
1024 nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment);
1025 aNew = sqlite3_realloc64(pLvl->aSeg, nByte);
1026 if( aNew ){
1027 if( bInsert==0 ){
1028 memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra);
1029 }else{
1030 int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment);
1031 memmove(&aNew[nExtra], aNew, nMove);
1032 memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra);
1034 pLvl->aSeg = aNew;
1035 }else{
1036 *pRc = SQLITE_NOMEM;
1041 static Fts5Structure *fts5StructureReadUncached(Fts5Index *p){
1042 Fts5Structure *pRet = 0;
1043 Fts5Config *pConfig = p->pConfig;
1044 int iCookie; /* Configuration cookie */
1045 Fts5Data *pData;
1047 pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
1048 if( p->rc==SQLITE_OK ){
1049 /* TODO: Do we need this if the leaf-index is appended? Probably... */
1050 memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING);
1051 p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet);
1052 if( p->rc==SQLITE_OK && (pConfig->pgsz==0 || pConfig->iCookie!=iCookie) ){
1053 p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
1055 fts5DataRelease(pData);
1056 if( p->rc!=SQLITE_OK ){
1057 fts5StructureRelease(pRet);
1058 pRet = 0;
1062 return pRet;
1065 static i64 fts5IndexDataVersion(Fts5Index *p){
1066 i64 iVersion = 0;
1068 if( p->rc==SQLITE_OK ){
1069 if( p->pDataVersion==0 ){
1070 p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion,
1071 sqlite3_mprintf("PRAGMA %Q.data_version", p->pConfig->zDb)
1073 if( p->rc ) return 0;
1076 if( SQLITE_ROW==sqlite3_step(p->pDataVersion) ){
1077 iVersion = sqlite3_column_int64(p->pDataVersion, 0);
1079 p->rc = sqlite3_reset(p->pDataVersion);
1082 return iVersion;
1086 ** Read, deserialize and return the structure record.
1088 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
1089 ** are over-allocated as described for function fts5StructureDecode()
1090 ** above.
1092 ** If an error occurs, NULL is returned and an error code left in the
1093 ** Fts5Index handle. If an error has already occurred when this function
1094 ** is called, it is a no-op.
1096 static Fts5Structure *fts5StructureRead(Fts5Index *p){
1098 if( p->pStruct==0 ){
1099 p->iStructVersion = fts5IndexDataVersion(p);
1100 if( p->rc==SQLITE_OK ){
1101 p->pStruct = fts5StructureReadUncached(p);
1105 #if 0
1106 else{
1107 Fts5Structure *pTest = fts5StructureReadUncached(p);
1108 if( pTest ){
1109 int i, j;
1110 assert_nc( p->pStruct->nSegment==pTest->nSegment );
1111 assert_nc( p->pStruct->nLevel==pTest->nLevel );
1112 for(i=0; i<pTest->nLevel; i++){
1113 assert_nc( p->pStruct->aLevel[i].nMerge==pTest->aLevel[i].nMerge );
1114 assert_nc( p->pStruct->aLevel[i].nSeg==pTest->aLevel[i].nSeg );
1115 for(j=0; j<pTest->aLevel[i].nSeg; j++){
1116 Fts5StructureSegment *p1 = &pTest->aLevel[i].aSeg[j];
1117 Fts5StructureSegment *p2 = &p->pStruct->aLevel[i].aSeg[j];
1118 assert_nc( p1->iSegid==p2->iSegid );
1119 assert_nc( p1->pgnoFirst==p2->pgnoFirst );
1120 assert_nc( p1->pgnoLast==p2->pgnoLast );
1123 fts5StructureRelease(pTest);
1126 #endif
1128 if( p->rc!=SQLITE_OK ) return 0;
1129 assert( p->iStructVersion!=0 );
1130 assert( p->pStruct!=0 );
1131 fts5StructureRef(p->pStruct);
1132 return p->pStruct;
1135 static void fts5StructureInvalidate(Fts5Index *p){
1136 if( p->pStruct ){
1137 fts5StructureRelease(p->pStruct);
1138 p->pStruct = 0;
1143 ** Return the total number of segments in index structure pStruct. This
1144 ** function is only ever used as part of assert() conditions.
1146 #ifdef SQLITE_DEBUG
1147 static int fts5StructureCountSegments(Fts5Structure *pStruct){
1148 int nSegment = 0; /* Total number of segments */
1149 if( pStruct ){
1150 int iLvl; /* Used to iterate through levels */
1151 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
1152 nSegment += pStruct->aLevel[iLvl].nSeg;
1156 return nSegment;
1158 #endif
1160 #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \
1161 assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) ); \
1162 memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob); \
1163 (pBuf)->n += nBlob; \
1166 #define fts5BufferSafeAppendVarint(pBuf, iVal) { \
1167 (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal)); \
1168 assert( (pBuf)->nSpace>=(pBuf)->n ); \
1173 ** Serialize and store the "structure" record.
1175 ** If an error occurs, leave an error code in the Fts5Index object. If an
1176 ** error has already occurred, this function is a no-op.
1178 static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){
1179 if( p->rc==SQLITE_OK ){
1180 Fts5Buffer buf; /* Buffer to serialize record into */
1181 int iLvl; /* Used to iterate through levels */
1182 int iCookie; /* Cookie value to store */
1184 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
1185 memset(&buf, 0, sizeof(Fts5Buffer));
1187 /* Append the current configuration cookie */
1188 iCookie = p->pConfig->iCookie;
1189 if( iCookie<0 ) iCookie = 0;
1191 if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, 4+9+9+9) ){
1192 sqlite3Fts5Put32(buf.p, iCookie);
1193 buf.n = 4;
1194 fts5BufferSafeAppendVarint(&buf, pStruct->nLevel);
1195 fts5BufferSafeAppendVarint(&buf, pStruct->nSegment);
1196 fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter);
1199 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
1200 int iSeg; /* Used to iterate through segments */
1201 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
1202 fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge);
1203 fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg);
1204 assert( pLvl->nMerge<=pLvl->nSeg );
1206 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
1207 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid);
1208 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst);
1209 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast);
1213 fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n);
1214 fts5BufferFree(&buf);
1218 #if 0
1219 static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*);
1220 static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){
1221 int rc = SQLITE_OK;
1222 Fts5Buffer buf;
1223 memset(&buf, 0, sizeof(buf));
1224 fts5DebugStructure(&rc, &buf, pStruct);
1225 fprintf(stdout, "%s: %s\n", zCaption, buf.p);
1226 fflush(stdout);
1227 fts5BufferFree(&buf);
1229 #else
1230 # define fts5PrintStructure(x,y)
1231 #endif
1233 static int fts5SegmentSize(Fts5StructureSegment *pSeg){
1234 return 1 + pSeg->pgnoLast - pSeg->pgnoFirst;
1238 ** Return a copy of index structure pStruct. Except, promote as many
1239 ** segments as possible to level iPromote. If an OOM occurs, NULL is
1240 ** returned.
1242 static void fts5StructurePromoteTo(
1243 Fts5Index *p,
1244 int iPromote,
1245 int szPromote,
1246 Fts5Structure *pStruct
1248 int il, is;
1249 Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote];
1251 if( pOut->nMerge==0 ){
1252 for(il=iPromote+1; il<pStruct->nLevel; il++){
1253 Fts5StructureLevel *pLvl = &pStruct->aLevel[il];
1254 if( pLvl->nMerge ) return;
1255 for(is=pLvl->nSeg-1; is>=0; is--){
1256 int sz = fts5SegmentSize(&pLvl->aSeg[is]);
1257 if( sz>szPromote ) return;
1258 fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1);
1259 if( p->rc ) return;
1260 memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment));
1261 pOut->nSeg++;
1262 pLvl->nSeg--;
1269 ** A new segment has just been written to level iLvl of index structure
1270 ** pStruct. This function determines if any segments should be promoted
1271 ** as a result. Segments are promoted in two scenarios:
1273 ** a) If the segment just written is smaller than one or more segments
1274 ** within the previous populated level, it is promoted to the previous
1275 ** populated level.
1277 ** b) If the segment just written is larger than the newest segment on
1278 ** the next populated level, then that segment, and any other adjacent
1279 ** segments that are also smaller than the one just written, are
1280 ** promoted.
1282 ** If one or more segments are promoted, the structure object is updated
1283 ** to reflect this.
1285 static void fts5StructurePromote(
1286 Fts5Index *p, /* FTS5 backend object */
1287 int iLvl, /* Index level just updated */
1288 Fts5Structure *pStruct /* Index structure */
1290 if( p->rc==SQLITE_OK ){
1291 int iTst;
1292 int iPromote = -1;
1293 int szPromote = 0; /* Promote anything this size or smaller */
1294 Fts5StructureSegment *pSeg; /* Segment just written */
1295 int szSeg; /* Size of segment just written */
1296 int nSeg = pStruct->aLevel[iLvl].nSeg;
1298 if( nSeg==0 ) return;
1299 pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1];
1300 szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst);
1302 /* Check for condition (a) */
1303 for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--);
1304 if( iTst>=0 ){
1305 int i;
1306 int szMax = 0;
1307 Fts5StructureLevel *pTst = &pStruct->aLevel[iTst];
1308 assert( pTst->nMerge==0 );
1309 for(i=0; i<pTst->nSeg; i++){
1310 int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1;
1311 if( sz>szMax ) szMax = sz;
1313 if( szMax>=szSeg ){
1314 /* Condition (a) is true. Promote the newest segment on level
1315 ** iLvl to level iTst. */
1316 iPromote = iTst;
1317 szPromote = szMax;
1321 /* If condition (a) is not met, assume (b) is true. StructurePromoteTo()
1322 ** is a no-op if it is not. */
1323 if( iPromote<0 ){
1324 iPromote = iLvl;
1325 szPromote = szSeg;
1327 fts5StructurePromoteTo(p, iPromote, szPromote, pStruct);
1333 ** Advance the iterator passed as the only argument. If the end of the
1334 ** doclist-index page is reached, return non-zero.
1336 static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
1337 Fts5Data *pData = pLvl->pData;
1339 if( pLvl->iOff==0 ){
1340 assert( pLvl->bEof==0 );
1341 pLvl->iOff = 1;
1342 pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno);
1343 pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid);
1344 pLvl->iFirstOff = pLvl->iOff;
1345 }else{
1346 int iOff;
1347 for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){
1348 if( pData->p[iOff] ) break;
1351 if( iOff<pData->nn ){
1352 i64 iVal;
1353 pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1;
1354 iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal);
1355 pLvl->iRowid += iVal;
1356 pLvl->iOff = iOff;
1357 }else{
1358 pLvl->bEof = 1;
1362 return pLvl->bEof;
1366 ** Advance the iterator passed as the only argument.
1368 static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
1369 Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
1371 assert( iLvl<pIter->nLvl );
1372 if( fts5DlidxLvlNext(pLvl) ){
1373 if( (iLvl+1) < pIter->nLvl ){
1374 fts5DlidxIterNextR(p, pIter, iLvl+1);
1375 if( pLvl[1].bEof==0 ){
1376 fts5DataRelease(pLvl->pData);
1377 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1378 pLvl->pData = fts5DataRead(p,
1379 FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
1381 if( pLvl->pData ) fts5DlidxLvlNext(pLvl);
1386 return pIter->aLvl[0].bEof;
1388 static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){
1389 return fts5DlidxIterNextR(p, pIter, 0);
1393 ** The iterator passed as the first argument has the following fields set
1394 ** as follows. This function sets up the rest of the iterator so that it
1395 ** points to the first rowid in the doclist-index.
1397 ** pData:
1398 ** pointer to doclist-index record,
1400 ** When this function is called pIter->iLeafPgno is the page number the
1401 ** doclist is associated with (the one featuring the term).
1403 static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
1404 int i;
1405 for(i=0; i<pIter->nLvl; i++){
1406 fts5DlidxLvlNext(&pIter->aLvl[i]);
1408 return pIter->aLvl[0].bEof;
1412 static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){
1413 return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof;
1416 static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){
1417 int i;
1419 /* Advance each level to the last entry on the last page */
1420 for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){
1421 Fts5DlidxLvl *pLvl = &pIter->aLvl[i];
1422 while( fts5DlidxLvlNext(pLvl)==0 );
1423 pLvl->bEof = 0;
1425 if( i>0 ){
1426 Fts5DlidxLvl *pChild = &pLvl[-1];
1427 fts5DataRelease(pChild->pData);
1428 memset(pChild, 0, sizeof(Fts5DlidxLvl));
1429 pChild->pData = fts5DataRead(p,
1430 FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno)
1437 ** Move the iterator passed as the only argument to the previous entry.
1439 static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){
1440 int iOff = pLvl->iOff;
1442 assert( pLvl->bEof==0 );
1443 if( iOff<=pLvl->iFirstOff ){
1444 pLvl->bEof = 1;
1445 }else{
1446 u8 *a = pLvl->pData->p;
1448 pLvl->iOff = 0;
1449 fts5DlidxLvlNext(pLvl);
1450 while( 1 ){
1451 int nZero = 0;
1452 int ii = pLvl->iOff;
1453 u64 delta = 0;
1455 while( a[ii]==0 ){
1456 nZero++;
1457 ii++;
1459 ii += sqlite3Fts5GetVarint(&a[ii], &delta);
1461 if( ii>=iOff ) break;
1462 pLvl->iLeafPgno += nZero+1;
1463 pLvl->iRowid += delta;
1464 pLvl->iOff = ii;
1468 return pLvl->bEof;
1471 static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
1472 Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
1474 assert( iLvl<pIter->nLvl );
1475 if( fts5DlidxLvlPrev(pLvl) ){
1476 if( (iLvl+1) < pIter->nLvl ){
1477 fts5DlidxIterPrevR(p, pIter, iLvl+1);
1478 if( pLvl[1].bEof==0 ){
1479 fts5DataRelease(pLvl->pData);
1480 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1481 pLvl->pData = fts5DataRead(p,
1482 FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
1484 if( pLvl->pData ){
1485 while( fts5DlidxLvlNext(pLvl)==0 );
1486 pLvl->bEof = 0;
1492 return pIter->aLvl[0].bEof;
1494 static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){
1495 return fts5DlidxIterPrevR(p, pIter, 0);
1499 ** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
1501 static void fts5DlidxIterFree(Fts5DlidxIter *pIter){
1502 if( pIter ){
1503 int i;
1504 for(i=0; i<pIter->nLvl; i++){
1505 fts5DataRelease(pIter->aLvl[i].pData);
1507 sqlite3_free(pIter);
1511 static Fts5DlidxIter *fts5DlidxIterInit(
1512 Fts5Index *p, /* Fts5 Backend to iterate within */
1513 int bRev, /* True for ORDER BY ASC */
1514 int iSegid, /* Segment id */
1515 int iLeafPg /* Leaf page number to load dlidx for */
1517 Fts5DlidxIter *pIter = 0;
1518 int i;
1519 int bDone = 0;
1521 for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
1522 sqlite3_int64 nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl);
1523 Fts5DlidxIter *pNew;
1525 pNew = (Fts5DlidxIter*)sqlite3_realloc64(pIter, nByte);
1526 if( pNew==0 ){
1527 p->rc = SQLITE_NOMEM;
1528 }else{
1529 i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg);
1530 Fts5DlidxLvl *pLvl = &pNew->aLvl[i];
1531 pIter = pNew;
1532 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1533 pLvl->pData = fts5DataRead(p, iRowid);
1534 if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){
1535 bDone = 1;
1537 pIter->nLvl = i+1;
1541 if( p->rc==SQLITE_OK ){
1542 pIter->iSegid = iSegid;
1543 if( bRev==0 ){
1544 fts5DlidxIterFirst(pIter);
1545 }else{
1546 fts5DlidxIterLast(p, pIter);
1550 if( p->rc!=SQLITE_OK ){
1551 fts5DlidxIterFree(pIter);
1552 pIter = 0;
1555 return pIter;
1558 static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){
1559 return pIter->aLvl[0].iRowid;
1561 static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){
1562 return pIter->aLvl[0].iLeafPgno;
1566 ** Load the next leaf page into the segment iterator.
1568 static void fts5SegIterNextPage(
1569 Fts5Index *p, /* FTS5 backend object */
1570 Fts5SegIter *pIter /* Iterator to advance to next page */
1572 Fts5Data *pLeaf;
1573 Fts5StructureSegment *pSeg = pIter->pSeg;
1574 fts5DataRelease(pIter->pLeaf);
1575 pIter->iLeafPgno++;
1576 if( pIter->pNextLeaf ){
1577 pIter->pLeaf = pIter->pNextLeaf;
1578 pIter->pNextLeaf = 0;
1579 }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
1580 pIter->pLeaf = fts5LeafRead(p,
1581 FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)
1583 }else{
1584 pIter->pLeaf = 0;
1586 pLeaf = pIter->pLeaf;
1588 if( pLeaf ){
1589 pIter->iPgidxOff = pLeaf->szLeaf;
1590 if( fts5LeafIsTermless(pLeaf) ){
1591 pIter->iEndofDoclist = pLeaf->nn+1;
1592 }else{
1593 pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],
1594 pIter->iEndofDoclist
1601 ** Argument p points to a buffer containing a varint to be interpreted as a
1602 ** position list size field. Read the varint and return the number of bytes
1603 ** read. Before returning, set *pnSz to the number of bytes in the position
1604 ** list, and *pbDel to true if the delete flag is set, or false otherwise.
1606 static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
1607 int nSz;
1608 int n = 0;
1609 fts5FastGetVarint32(p, n, nSz);
1610 assert_nc( nSz>=0 );
1611 *pnSz = nSz/2;
1612 *pbDel = nSz & 0x0001;
1613 return n;
1617 ** Fts5SegIter.iLeafOffset currently points to the first byte of a
1618 ** position-list size field. Read the value of the field and store it
1619 ** in the following variables:
1621 ** Fts5SegIter.nPos
1622 ** Fts5SegIter.bDel
1624 ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the
1625 ** position list content (if any).
1627 static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
1628 if( p->rc==SQLITE_OK ){
1629 int iOff = pIter->iLeafOffset; /* Offset to read at */
1630 ASSERT_SZLEAF_OK(pIter->pLeaf);
1631 if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
1632 int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf);
1633 pIter->bDel = 0;
1634 pIter->nPos = 1;
1635 if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
1636 pIter->bDel = 1;
1637 iOff++;
1638 if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
1639 pIter->nPos = 1;
1640 iOff++;
1641 }else{
1642 pIter->nPos = 0;
1645 }else{
1646 int nSz;
1647 fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz);
1648 pIter->bDel = (nSz & 0x0001);
1649 pIter->nPos = nSz>>1;
1650 assert_nc( pIter->nPos>=0 );
1652 pIter->iLeafOffset = iOff;
1656 static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
1657 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
1658 i64 iOff = pIter->iLeafOffset;
1660 ASSERT_SZLEAF_OK(pIter->pLeaf);
1661 while( iOff>=pIter->pLeaf->szLeaf ){
1662 fts5SegIterNextPage(p, pIter);
1663 if( pIter->pLeaf==0 ){
1664 if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
1665 return;
1667 iOff = 4;
1668 a = pIter->pLeaf->p;
1670 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
1671 pIter->iLeafOffset = iOff;
1675 ** Fts5SegIter.iLeafOffset currently points to the first byte of the
1676 ** "nSuffix" field of a term. Function parameter nKeep contains the value
1677 ** of the "nPrefix" field (if there was one - it is passed 0 if this is
1678 ** the first term in the segment).
1680 ** This function populates:
1682 ** Fts5SegIter.term
1683 ** Fts5SegIter.rowid
1685 ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of
1686 ** the first position list. The position list belonging to document
1687 ** (Fts5SegIter.iRowid).
1689 static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
1690 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
1691 i64 iOff = pIter->iLeafOffset; /* Offset to read at */
1692 int nNew; /* Bytes of new data */
1694 iOff += fts5GetVarint32(&a[iOff], nNew);
1695 if( iOff+nNew>pIter->pLeaf->szLeaf || nKeep>pIter->term.n || nNew==0 ){
1696 p->rc = FTS5_CORRUPT;
1697 return;
1699 pIter->term.n = nKeep;
1700 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
1701 assert( pIter->term.n<=pIter->term.nSpace );
1702 iOff += nNew;
1703 pIter->iTermLeafOffset = iOff;
1704 pIter->iTermLeafPgno = pIter->iLeafPgno;
1705 pIter->iLeafOffset = iOff;
1707 if( pIter->iPgidxOff>=pIter->pLeaf->nn ){
1708 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
1709 }else{
1710 int nExtra;
1711 pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra);
1712 pIter->iEndofDoclist += nExtra;
1715 fts5SegIterLoadRowid(p, pIter);
1718 static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*);
1719 static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*);
1720 static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*);
1722 static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){
1723 if( pIter->flags & FTS5_SEGITER_REVERSE ){
1724 pIter->xNext = fts5SegIterNext_Reverse;
1725 }else if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
1726 pIter->xNext = fts5SegIterNext_None;
1727 }else{
1728 pIter->xNext = fts5SegIterNext;
1733 ** Initialize the iterator object pIter to iterate through the entries in
1734 ** segment pSeg. The iterator is left pointing to the first entry when
1735 ** this function returns.
1737 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
1738 ** an error has already occurred when this function is called, it is a no-op.
1740 static void fts5SegIterInit(
1741 Fts5Index *p, /* FTS index object */
1742 Fts5StructureSegment *pSeg, /* Description of segment */
1743 Fts5SegIter *pIter /* Object to populate */
1745 if( pSeg->pgnoFirst==0 ){
1746 /* This happens if the segment is being used as an input to an incremental
1747 ** merge and all data has already been "trimmed". See function
1748 ** fts5TrimSegments() for details. In this case leave the iterator empty.
1749 ** The caller will see the (pIter->pLeaf==0) and assume the iterator is
1750 ** at EOF already. */
1751 assert( pIter->pLeaf==0 );
1752 return;
1755 if( p->rc==SQLITE_OK ){
1756 memset(pIter, 0, sizeof(*pIter));
1757 fts5SegIterSetNext(p, pIter);
1758 pIter->pSeg = pSeg;
1759 pIter->iLeafPgno = pSeg->pgnoFirst-1;
1760 do {
1761 fts5SegIterNextPage(p, pIter);
1762 }while( p->rc==SQLITE_OK && pIter->pLeaf && pIter->pLeaf->nn==4 );
1765 if( p->rc==SQLITE_OK && pIter->pLeaf ){
1766 pIter->iLeafOffset = 4;
1767 assert( pIter->pLeaf!=0 );
1768 assert_nc( pIter->pLeaf->nn>4 );
1769 assert_nc( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
1770 pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
1771 fts5SegIterLoadTerm(p, pIter, 0);
1772 fts5SegIterLoadNPos(p, pIter);
1777 ** This function is only ever called on iterators created by calls to
1778 ** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set.
1780 ** The iterator is in an unusual state when this function is called: the
1781 ** Fts5SegIter.iLeafOffset variable is set to the offset of the start of
1782 ** the position-list size field for the first relevant rowid on the page.
1783 ** Fts5SegIter.rowid is set, but nPos and bDel are not.
1785 ** This function advances the iterator so that it points to the last
1786 ** relevant rowid on the page and, if necessary, initializes the
1787 ** aRowidOffset[] and iRowidOffset variables. At this point the iterator
1788 ** is in its regular state - Fts5SegIter.iLeafOffset points to the first
1789 ** byte of the position list content associated with said rowid.
1791 static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
1792 int eDetail = p->pConfig->eDetail;
1793 int n = pIter->pLeaf->szLeaf;
1794 int i = pIter->iLeafOffset;
1795 u8 *a = pIter->pLeaf->p;
1796 int iRowidOffset = 0;
1798 if( n>pIter->iEndofDoclist ){
1799 n = pIter->iEndofDoclist;
1802 ASSERT_SZLEAF_OK(pIter->pLeaf);
1803 while( 1 ){
1804 u64 iDelta = 0;
1806 if( eDetail==FTS5_DETAIL_NONE ){
1807 /* todo */
1808 if( i<n && a[i]==0 ){
1809 i++;
1810 if( i<n && a[i]==0 ) i++;
1812 }else{
1813 int nPos;
1814 int bDummy;
1815 i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
1816 i += nPos;
1818 if( i>=n ) break;
1819 i += fts5GetVarint(&a[i], &iDelta);
1820 pIter->iRowid += iDelta;
1822 /* If necessary, grow the pIter->aRowidOffset[] array. */
1823 if( iRowidOffset>=pIter->nRowidOffset ){
1824 int nNew = pIter->nRowidOffset + 8;
1825 int *aNew = (int*)sqlite3_realloc64(pIter->aRowidOffset,nNew*sizeof(int));
1826 if( aNew==0 ){
1827 p->rc = SQLITE_NOMEM;
1828 break;
1830 pIter->aRowidOffset = aNew;
1831 pIter->nRowidOffset = nNew;
1834 pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset;
1835 pIter->iLeafOffset = i;
1837 pIter->iRowidOffset = iRowidOffset;
1838 fts5SegIterLoadNPos(p, pIter);
1844 static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){
1845 assert( pIter->flags & FTS5_SEGITER_REVERSE );
1846 assert( pIter->flags & FTS5_SEGITER_ONETERM );
1848 fts5DataRelease(pIter->pLeaf);
1849 pIter->pLeaf = 0;
1850 while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
1851 Fts5Data *pNew;
1852 pIter->iLeafPgno--;
1853 pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
1854 pIter->pSeg->iSegid, pIter->iLeafPgno
1856 if( pNew ){
1857 /* iTermLeafOffset may be equal to szLeaf if the term is the last
1858 ** thing on the page - i.e. the first rowid is on the following page.
1859 ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */
1860 if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
1861 assert( pIter->pLeaf==0 );
1862 if( pIter->iTermLeafOffset<pNew->szLeaf ){
1863 pIter->pLeaf = pNew;
1864 pIter->iLeafOffset = pIter->iTermLeafOffset;
1866 }else{
1867 int iRowidOff;
1868 iRowidOff = fts5LeafFirstRowidOff(pNew);
1869 if( iRowidOff ){
1870 if( iRowidOff>=pNew->szLeaf ){
1871 p->rc = FTS5_CORRUPT;
1872 }else{
1873 pIter->pLeaf = pNew;
1874 pIter->iLeafOffset = iRowidOff;
1879 if( pIter->pLeaf ){
1880 u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset];
1881 pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid);
1882 break;
1883 }else{
1884 fts5DataRelease(pNew);
1889 if( pIter->pLeaf ){
1890 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
1891 fts5SegIterReverseInitPage(p, pIter);
1896 ** Return true if the iterator passed as the second argument currently
1897 ** points to a delete marker. A delete marker is an entry with a 0 byte
1898 ** position-list.
1900 static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){
1901 Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
1902 return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0);
1906 ** Advance iterator pIter to the next entry.
1908 ** This version of fts5SegIterNext() is only used by reverse iterators.
1910 static void fts5SegIterNext_Reverse(
1911 Fts5Index *p, /* FTS5 backend object */
1912 Fts5SegIter *pIter, /* Iterator to advance */
1913 int *pbUnused /* Unused */
1915 assert( pIter->flags & FTS5_SEGITER_REVERSE );
1916 assert( pIter->pNextLeaf==0 );
1917 UNUSED_PARAM(pbUnused);
1919 if( pIter->iRowidOffset>0 ){
1920 u8 *a = pIter->pLeaf->p;
1921 int iOff;
1922 u64 iDelta;
1924 pIter->iRowidOffset--;
1925 pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset];
1926 fts5SegIterLoadNPos(p, pIter);
1927 iOff = pIter->iLeafOffset;
1928 if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){
1929 iOff += pIter->nPos;
1931 fts5GetVarint(&a[iOff], &iDelta);
1932 pIter->iRowid -= iDelta;
1933 }else{
1934 fts5SegIterReverseNewPage(p, pIter);
1939 ** Advance iterator pIter to the next entry.
1941 ** This version of fts5SegIterNext() is only used if detail=none and the
1942 ** iterator is not a reverse direction iterator.
1944 static void fts5SegIterNext_None(
1945 Fts5Index *p, /* FTS5 backend object */
1946 Fts5SegIter *pIter, /* Iterator to advance */
1947 int *pbNewTerm /* OUT: Set for new term */
1949 int iOff;
1951 assert( p->rc==SQLITE_OK );
1952 assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 );
1953 assert( p->pConfig->eDetail==FTS5_DETAIL_NONE );
1955 ASSERT_SZLEAF_OK(pIter->pLeaf);
1956 iOff = pIter->iLeafOffset;
1958 /* Next entry is on the next page */
1959 while( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){
1960 fts5SegIterNextPage(p, pIter);
1961 if( p->rc || pIter->pLeaf==0 ) return;
1962 pIter->iRowid = 0;
1963 iOff = 4;
1966 if( iOff<pIter->iEndofDoclist ){
1967 /* Next entry is on the current page */
1968 i64 iDelta;
1969 iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], (u64*)&iDelta);
1970 pIter->iLeafOffset = iOff;
1971 pIter->iRowid += iDelta;
1972 }else if( (pIter->flags & FTS5_SEGITER_ONETERM)==0 ){
1973 if( pIter->pSeg ){
1974 int nKeep = 0;
1975 if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){
1976 iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep);
1978 pIter->iLeafOffset = iOff;
1979 fts5SegIterLoadTerm(p, pIter, nKeep);
1980 }else{
1981 const u8 *pList = 0;
1982 const char *zTerm = 0;
1983 int nList;
1984 sqlite3Fts5HashScanNext(p->pHash);
1985 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
1986 if( pList==0 ) goto next_none_eof;
1987 pIter->pLeaf->p = (u8*)pList;
1988 pIter->pLeaf->nn = nList;
1989 pIter->pLeaf->szLeaf = nList;
1990 pIter->iEndofDoclist = nList;
1991 sqlite3Fts5BufferSet(&p->rc,&pIter->term, (int)strlen(zTerm), (u8*)zTerm);
1992 pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
1995 if( pbNewTerm ) *pbNewTerm = 1;
1996 }else{
1997 goto next_none_eof;
2000 fts5SegIterLoadNPos(p, pIter);
2002 return;
2003 next_none_eof:
2004 fts5DataRelease(pIter->pLeaf);
2005 pIter->pLeaf = 0;
2010 ** Advance iterator pIter to the next entry.
2012 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It
2013 ** is not considered an error if the iterator reaches EOF. If an error has
2014 ** already occurred when this function is called, it is a no-op.
2016 static void fts5SegIterNext(
2017 Fts5Index *p, /* FTS5 backend object */
2018 Fts5SegIter *pIter, /* Iterator to advance */
2019 int *pbNewTerm /* OUT: Set for new term */
2021 Fts5Data *pLeaf = pIter->pLeaf;
2022 int iOff;
2023 int bNewTerm = 0;
2024 int nKeep = 0;
2025 u8 *a;
2026 int n;
2028 assert( pbNewTerm==0 || *pbNewTerm==0 );
2029 assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
2031 /* Search for the end of the position list within the current page. */
2032 a = pLeaf->p;
2033 n = pLeaf->szLeaf;
2035 ASSERT_SZLEAF_OK(pLeaf);
2036 iOff = pIter->iLeafOffset + pIter->nPos;
2038 if( iOff<n ){
2039 /* The next entry is on the current page. */
2040 assert_nc( iOff<=pIter->iEndofDoclist );
2041 if( iOff>=pIter->iEndofDoclist ){
2042 bNewTerm = 1;
2043 if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
2044 iOff += fts5GetVarint32(&a[iOff], nKeep);
2046 }else{
2047 u64 iDelta;
2048 iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
2049 pIter->iRowid += iDelta;
2050 assert_nc( iDelta>0 );
2052 pIter->iLeafOffset = iOff;
2054 }else if( pIter->pSeg==0 ){
2055 const u8 *pList = 0;
2056 const char *zTerm = 0;
2057 int nList = 0;
2058 assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm );
2059 if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){
2060 sqlite3Fts5HashScanNext(p->pHash);
2061 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
2063 if( pList==0 ){
2064 fts5DataRelease(pIter->pLeaf);
2065 pIter->pLeaf = 0;
2066 }else{
2067 pIter->pLeaf->p = (u8*)pList;
2068 pIter->pLeaf->nn = nList;
2069 pIter->pLeaf->szLeaf = nList;
2070 pIter->iEndofDoclist = nList+1;
2071 sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm),
2072 (u8*)zTerm);
2073 pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
2074 *pbNewTerm = 1;
2076 }else{
2077 iOff = 0;
2078 /* Next entry is not on the current page */
2079 while( iOff==0 ){
2080 fts5SegIterNextPage(p, pIter);
2081 pLeaf = pIter->pLeaf;
2082 if( pLeaf==0 ) break;
2083 ASSERT_SZLEAF_OK(pLeaf);
2084 if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
2085 iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
2086 pIter->iLeafOffset = iOff;
2088 if( pLeaf->nn>pLeaf->szLeaf ){
2089 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
2090 &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
2094 else if( pLeaf->nn>pLeaf->szLeaf ){
2095 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
2096 &pLeaf->p[pLeaf->szLeaf], iOff
2098 pIter->iLeafOffset = iOff;
2099 pIter->iEndofDoclist = iOff;
2100 bNewTerm = 1;
2102 assert_nc( iOff<pLeaf->szLeaf );
2103 if( iOff>pLeaf->szLeaf ){
2104 p->rc = FTS5_CORRUPT;
2105 return;
2110 /* Check if the iterator is now at EOF. If so, return early. */
2111 if( pIter->pLeaf ){
2112 if( bNewTerm ){
2113 if( pIter->flags & FTS5_SEGITER_ONETERM ){
2114 fts5DataRelease(pIter->pLeaf);
2115 pIter->pLeaf = 0;
2116 }else{
2117 fts5SegIterLoadTerm(p, pIter, nKeep);
2118 fts5SegIterLoadNPos(p, pIter);
2119 if( pbNewTerm ) *pbNewTerm = 1;
2121 }else{
2122 /* The following could be done by calling fts5SegIterLoadNPos(). But
2123 ** this block is particularly performance critical, so equivalent
2124 ** code is inlined. */
2125 int nSz;
2126 assert_nc( pIter->iLeafOffset<=pIter->pLeaf->nn );
2127 fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz);
2128 pIter->bDel = (nSz & 0x0001);
2129 pIter->nPos = nSz>>1;
2130 assert_nc( pIter->nPos>=0 );
2135 #define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; }
2137 #define fts5IndexSkipVarint(a, iOff) { \
2138 int iEnd = iOff+9; \
2139 while( (a[iOff++] & 0x80) && iOff<iEnd ); \
2143 ** Iterator pIter currently points to the first rowid in a doclist. This
2144 ** function sets the iterator up so that iterates in reverse order through
2145 ** the doclist.
2147 static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
2148 Fts5DlidxIter *pDlidx = pIter->pDlidx;
2149 Fts5Data *pLast = 0;
2150 int pgnoLast = 0;
2152 if( pDlidx && p->pConfig->iVersion==FTS5_CURRENT_VERSION ){
2153 int iSegid = pIter->pSeg->iSegid;
2154 pgnoLast = fts5DlidxIterPgno(pDlidx);
2155 pLast = fts5LeafRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
2156 }else{
2157 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
2159 /* Currently, Fts5SegIter.iLeafOffset points to the first byte of
2160 ** position-list content for the current rowid. Back it up so that it
2161 ** points to the start of the position-list size field. */
2162 int iPoslist;
2163 if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
2164 iPoslist = pIter->iTermLeafOffset;
2165 }else{
2166 iPoslist = 4;
2168 fts5IndexSkipVarint(pLeaf->p, iPoslist);
2169 pIter->iLeafOffset = iPoslist;
2171 /* If this condition is true then the largest rowid for the current
2172 ** term may not be stored on the current page. So search forward to
2173 ** see where said rowid really is. */
2174 if( pIter->iEndofDoclist>=pLeaf->szLeaf ){
2175 int pgno;
2176 Fts5StructureSegment *pSeg = pIter->pSeg;
2178 /* The last rowid in the doclist may not be on the current page. Search
2179 ** forward to find the page containing the last rowid. */
2180 for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){
2181 i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
2182 Fts5Data *pNew = fts5LeafRead(p, iAbs);
2183 if( pNew ){
2184 int iRowid, bTermless;
2185 iRowid = fts5LeafFirstRowidOff(pNew);
2186 bTermless = fts5LeafIsTermless(pNew);
2187 if( iRowid ){
2188 SWAPVAL(Fts5Data*, pNew, pLast);
2189 pgnoLast = pgno;
2191 fts5DataRelease(pNew);
2192 if( bTermless==0 ) break;
2198 /* If pLast is NULL at this point, then the last rowid for this doclist
2199 ** lies on the page currently indicated by the iterator. In this case
2200 ** pIter->iLeafOffset is already set to point to the position-list size
2201 ** field associated with the first relevant rowid on the page.
2203 ** Or, if pLast is non-NULL, then it is the page that contains the last
2204 ** rowid. In this case configure the iterator so that it points to the
2205 ** first rowid on this page.
2207 if( pLast ){
2208 int iOff;
2209 fts5DataRelease(pIter->pLeaf);
2210 pIter->pLeaf = pLast;
2211 pIter->iLeafPgno = pgnoLast;
2212 iOff = fts5LeafFirstRowidOff(pLast);
2213 if( iOff>pLast->szLeaf ){
2214 p->rc = FTS5_CORRUPT;
2215 return;
2217 iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
2218 pIter->iLeafOffset = iOff;
2220 if( fts5LeafIsTermless(pLast) ){
2221 pIter->iEndofDoclist = pLast->nn+1;
2222 }else{
2223 pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast);
2227 fts5SegIterReverseInitPage(p, pIter);
2231 ** Iterator pIter currently points to the first rowid of a doclist.
2232 ** There is a doclist-index associated with the final term on the current
2233 ** page. If the current term is the last term on the page, load the
2234 ** doclist-index from disk and initialize an iterator at (pIter->pDlidx).
2236 static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){
2237 int iSeg = pIter->pSeg->iSegid;
2238 int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
2239 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
2241 assert( pIter->flags & FTS5_SEGITER_ONETERM );
2242 assert( pIter->pDlidx==0 );
2244 /* Check if the current doclist ends on this page. If it does, return
2245 ** early without loading the doclist-index (as it belongs to a different
2246 ** term. */
2247 if( pIter->iTermLeafPgno==pIter->iLeafPgno
2248 && pIter->iEndofDoclist<pLeaf->szLeaf
2250 return;
2253 pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
2257 ** The iterator object passed as the second argument currently contains
2258 ** no valid values except for the Fts5SegIter.pLeaf member variable. This
2259 ** function searches the leaf page for a term matching (pTerm/nTerm).
2261 ** If the specified term is found on the page, then the iterator is left
2262 ** pointing to it. If argument bGe is zero and the term is not found,
2263 ** the iterator is left pointing at EOF.
2265 ** If bGe is non-zero and the specified term is not found, then the
2266 ** iterator is left pointing to the smallest term in the segment that
2267 ** is larger than the specified term, even if this term is not on the
2268 ** current page.
2270 static void fts5LeafSeek(
2271 Fts5Index *p, /* Leave any error code here */
2272 int bGe, /* True for a >= search */
2273 Fts5SegIter *pIter, /* Iterator to seek */
2274 const u8 *pTerm, int nTerm /* Term to search for */
2276 u32 iOff;
2277 const u8 *a = pIter->pLeaf->p;
2278 u32 n = (u32)pIter->pLeaf->nn;
2280 u32 nMatch = 0;
2281 u32 nKeep = 0;
2282 u32 nNew = 0;
2283 u32 iTermOff;
2284 u32 iPgidx; /* Current offset in pgidx */
2285 int bEndOfPage = 0;
2287 assert( p->rc==SQLITE_OK );
2289 iPgidx = (u32)pIter->pLeaf->szLeaf;
2290 iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff);
2291 iOff = iTermOff;
2292 if( iOff>n ){
2293 p->rc = FTS5_CORRUPT;
2294 return;
2297 while( 1 ){
2299 /* Figure out how many new bytes are in this term */
2300 fts5FastGetVarint32(a, iOff, nNew);
2301 if( nKeep<nMatch ){
2302 goto search_failed;
2305 assert( nKeep>=nMatch );
2306 if( nKeep==nMatch ){
2307 u32 nCmp;
2308 u32 i;
2309 nCmp = (u32)MIN(nNew, nTerm-nMatch);
2310 for(i=0; i<nCmp; i++){
2311 if( a[iOff+i]!=pTerm[nMatch+i] ) break;
2313 nMatch += i;
2315 if( (u32)nTerm==nMatch ){
2316 if( i==nNew ){
2317 goto search_success;
2318 }else{
2319 goto search_failed;
2321 }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
2322 goto search_failed;
2326 if( iPgidx>=n ){
2327 bEndOfPage = 1;
2328 break;
2331 iPgidx += fts5GetVarint32(&a[iPgidx], nKeep);
2332 iTermOff += nKeep;
2333 iOff = iTermOff;
2335 if( iOff>=n ){
2336 p->rc = FTS5_CORRUPT;
2337 return;
2340 /* Read the nKeep field of the next term. */
2341 fts5FastGetVarint32(a, iOff, nKeep);
2344 search_failed:
2345 if( bGe==0 ){
2346 fts5DataRelease(pIter->pLeaf);
2347 pIter->pLeaf = 0;
2348 return;
2349 }else if( bEndOfPage ){
2350 do {
2351 fts5SegIterNextPage(p, pIter);
2352 if( pIter->pLeaf==0 ) return;
2353 a = pIter->pLeaf->p;
2354 if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
2355 iPgidx = (u32)pIter->pLeaf->szLeaf;
2356 iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff);
2357 if( iOff<4 || (i64)iOff>=pIter->pLeaf->szLeaf ){
2358 p->rc = FTS5_CORRUPT;
2359 return;
2360 }else{
2361 nKeep = 0;
2362 iTermOff = iOff;
2363 n = (u32)pIter->pLeaf->nn;
2364 iOff += fts5GetVarint32(&a[iOff], nNew);
2365 break;
2368 }while( 1 );
2371 search_success:
2372 if( (i64)iOff+nNew>n || nNew<1 ){
2373 p->rc = FTS5_CORRUPT;
2374 return;
2376 pIter->iLeafOffset = iOff + nNew;
2377 pIter->iTermLeafOffset = pIter->iLeafOffset;
2378 pIter->iTermLeafPgno = pIter->iLeafPgno;
2380 fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
2381 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
2383 if( iPgidx>=n ){
2384 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
2385 }else{
2386 int nExtra;
2387 iPgidx += fts5GetVarint32(&a[iPgidx], nExtra);
2388 pIter->iEndofDoclist = iTermOff + nExtra;
2390 pIter->iPgidxOff = iPgidx;
2392 fts5SegIterLoadRowid(p, pIter);
2393 fts5SegIterLoadNPos(p, pIter);
2396 static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){
2397 if( p->pIdxSelect==0 ){
2398 Fts5Config *pConfig = p->pConfig;
2399 fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf(
2400 "SELECT pgno FROM '%q'.'%q_idx' WHERE "
2401 "segid=? AND term<=? ORDER BY term DESC LIMIT 1",
2402 pConfig->zDb, pConfig->zName
2405 return p->pIdxSelect;
2409 ** Initialize the object pIter to point to term pTerm/nTerm within segment
2410 ** pSeg. If there is no such term in the index, the iterator is set to EOF.
2412 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
2413 ** an error has already occurred when this function is called, it is a no-op.
2415 static void fts5SegIterSeekInit(
2416 Fts5Index *p, /* FTS5 backend */
2417 const u8 *pTerm, int nTerm, /* Term to seek to */
2418 int flags, /* Mask of FTS5INDEX_XXX flags */
2419 Fts5StructureSegment *pSeg, /* Description of segment */
2420 Fts5SegIter *pIter /* Object to populate */
2422 int iPg = 1;
2423 int bGe = (flags & FTS5INDEX_QUERY_SCAN);
2424 int bDlidx = 0; /* True if there is a doclist-index */
2425 sqlite3_stmt *pIdxSelect = 0;
2427 assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 );
2428 assert( pTerm && nTerm );
2429 memset(pIter, 0, sizeof(*pIter));
2430 pIter->pSeg = pSeg;
2432 /* This block sets stack variable iPg to the leaf page number that may
2433 ** contain term (pTerm/nTerm), if it is present in the segment. */
2434 pIdxSelect = fts5IdxSelectStmt(p);
2435 if( p->rc ) return;
2436 sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid);
2437 sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC);
2438 if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){
2439 i64 val = sqlite3_column_int(pIdxSelect, 0);
2440 iPg = (int)(val>>1);
2441 bDlidx = (val & 0x0001);
2443 p->rc = sqlite3_reset(pIdxSelect);
2444 sqlite3_bind_null(pIdxSelect, 2);
2446 if( iPg<pSeg->pgnoFirst ){
2447 iPg = pSeg->pgnoFirst;
2448 bDlidx = 0;
2451 pIter->iLeafPgno = iPg - 1;
2452 fts5SegIterNextPage(p, pIter);
2454 if( pIter->pLeaf ){
2455 fts5LeafSeek(p, bGe, pIter, pTerm, nTerm);
2458 if( p->rc==SQLITE_OK && bGe==0 ){
2459 pIter->flags |= FTS5_SEGITER_ONETERM;
2460 if( pIter->pLeaf ){
2461 if( flags & FTS5INDEX_QUERY_DESC ){
2462 pIter->flags |= FTS5_SEGITER_REVERSE;
2464 if( bDlidx ){
2465 fts5SegIterLoadDlidx(p, pIter);
2467 if( flags & FTS5INDEX_QUERY_DESC ){
2468 fts5SegIterReverse(p, pIter);
2473 fts5SegIterSetNext(p, pIter);
2475 /* Either:
2477 ** 1) an error has occurred, or
2478 ** 2) the iterator points to EOF, or
2479 ** 3) the iterator points to an entry with term (pTerm/nTerm), or
2480 ** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points
2481 ** to an entry with a term greater than or equal to (pTerm/nTerm).
2483 assert_nc( p->rc!=SQLITE_OK /* 1 */
2484 || pIter->pLeaf==0 /* 2 */
2485 || fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0 /* 3 */
2486 || (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0) /* 4 */
2491 ** Initialize the object pIter to point to term pTerm/nTerm within the
2492 ** in-memory hash table. If there is no such term in the hash-table, the
2493 ** iterator is set to EOF.
2495 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
2496 ** an error has already occurred when this function is called, it is a no-op.
2498 static void fts5SegIterHashInit(
2499 Fts5Index *p, /* FTS5 backend */
2500 const u8 *pTerm, int nTerm, /* Term to seek to */
2501 int flags, /* Mask of FTS5INDEX_XXX flags */
2502 Fts5SegIter *pIter /* Object to populate */
2504 int nList = 0;
2505 const u8 *z = 0;
2506 int n = 0;
2507 Fts5Data *pLeaf = 0;
2509 assert( p->pHash );
2510 assert( p->rc==SQLITE_OK );
2512 if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN) ){
2513 const u8 *pList = 0;
2515 p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm);
2516 sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &pList, &nList);
2517 n = (z ? (int)strlen((const char*)z) : 0);
2518 if( pList ){
2519 pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data));
2520 if( pLeaf ){
2521 pLeaf->p = (u8*)pList;
2524 }else{
2525 p->rc = sqlite3Fts5HashQuery(p->pHash, sizeof(Fts5Data),
2526 (const char*)pTerm, nTerm, (void**)&pLeaf, &nList
2528 if( pLeaf ){
2529 pLeaf->p = (u8*)&pLeaf[1];
2531 z = pTerm;
2532 n = nTerm;
2533 pIter->flags |= FTS5_SEGITER_ONETERM;
2536 if( pLeaf ){
2537 sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z);
2538 pLeaf->nn = pLeaf->szLeaf = nList;
2539 pIter->pLeaf = pLeaf;
2540 pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid);
2541 pIter->iEndofDoclist = pLeaf->nn;
2543 if( flags & FTS5INDEX_QUERY_DESC ){
2544 pIter->flags |= FTS5_SEGITER_REVERSE;
2545 fts5SegIterReverseInitPage(p, pIter);
2546 }else{
2547 fts5SegIterLoadNPos(p, pIter);
2551 fts5SegIterSetNext(p, pIter);
2555 ** Zero the iterator passed as the only argument.
2557 static void fts5SegIterClear(Fts5SegIter *pIter){
2558 fts5BufferFree(&pIter->term);
2559 fts5DataRelease(pIter->pLeaf);
2560 fts5DataRelease(pIter->pNextLeaf);
2561 fts5DlidxIterFree(pIter->pDlidx);
2562 sqlite3_free(pIter->aRowidOffset);
2563 memset(pIter, 0, sizeof(Fts5SegIter));
2566 #ifdef SQLITE_DEBUG
2569 ** This function is used as part of the big assert() procedure implemented by
2570 ** fts5AssertMultiIterSetup(). It ensures that the result currently stored
2571 ** in *pRes is the correct result of comparing the current positions of the
2572 ** two iterators.
2574 static void fts5AssertComparisonResult(
2575 Fts5Iter *pIter,
2576 Fts5SegIter *p1,
2577 Fts5SegIter *p2,
2578 Fts5CResult *pRes
2580 int i1 = p1 - pIter->aSeg;
2581 int i2 = p2 - pIter->aSeg;
2583 if( p1->pLeaf || p2->pLeaf ){
2584 if( p1->pLeaf==0 ){
2585 assert( pRes->iFirst==i2 );
2586 }else if( p2->pLeaf==0 ){
2587 assert( pRes->iFirst==i1 );
2588 }else{
2589 int nMin = MIN(p1->term.n, p2->term.n);
2590 int res = fts5Memcmp(p1->term.p, p2->term.p, nMin);
2591 if( res==0 ) res = p1->term.n - p2->term.n;
2593 if( res==0 ){
2594 assert( pRes->bTermEq==1 );
2595 assert( p1->iRowid!=p2->iRowid );
2596 res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1;
2597 }else{
2598 assert( pRes->bTermEq==0 );
2601 if( res<0 ){
2602 assert( pRes->iFirst==i1 );
2603 }else{
2604 assert( pRes->iFirst==i2 );
2611 ** This function is a no-op unless SQLITE_DEBUG is defined when this module
2612 ** is compiled. In that case, this function is essentially an assert()
2613 ** statement used to verify that the contents of the pIter->aFirst[] array
2614 ** are correct.
2616 static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){
2617 if( p->rc==SQLITE_OK ){
2618 Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
2619 int i;
2621 assert( (pFirst->pLeaf==0)==pIter->base.bEof );
2623 /* Check that pIter->iSwitchRowid is set correctly. */
2624 for(i=0; i<pIter->nSeg; i++){
2625 Fts5SegIter *p1 = &pIter->aSeg[i];
2626 assert( p1==pFirst
2627 || p1->pLeaf==0
2628 || fts5BufferCompare(&pFirst->term, &p1->term)
2629 || p1->iRowid==pIter->iSwitchRowid
2630 || (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev
2634 for(i=0; i<pIter->nSeg; i+=2){
2635 Fts5SegIter *p1 = &pIter->aSeg[i];
2636 Fts5SegIter *p2 = &pIter->aSeg[i+1];
2637 Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2];
2638 fts5AssertComparisonResult(pIter, p1, p2, pRes);
2641 for(i=1; i<(pIter->nSeg / 2); i+=2){
2642 Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ];
2643 Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ];
2644 Fts5CResult *pRes = &pIter->aFirst[i];
2645 fts5AssertComparisonResult(pIter, p1, p2, pRes);
2649 #else
2650 # define fts5AssertMultiIterSetup(x,y)
2651 #endif
2654 ** Do the comparison necessary to populate pIter->aFirst[iOut].
2656 ** If the returned value is non-zero, then it is the index of an entry
2657 ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing
2658 ** to a key that is a duplicate of another, higher priority,
2659 ** segment-iterator in the pSeg->aSeg[] array.
2661 static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){
2662 int i1; /* Index of left-hand Fts5SegIter */
2663 int i2; /* Index of right-hand Fts5SegIter */
2664 int iRes;
2665 Fts5SegIter *p1; /* Left-hand Fts5SegIter */
2666 Fts5SegIter *p2; /* Right-hand Fts5SegIter */
2667 Fts5CResult *pRes = &pIter->aFirst[iOut];
2669 assert( iOut<pIter->nSeg && iOut>0 );
2670 assert( pIter->bRev==0 || pIter->bRev==1 );
2672 if( iOut>=(pIter->nSeg/2) ){
2673 i1 = (iOut - pIter->nSeg/2) * 2;
2674 i2 = i1 + 1;
2675 }else{
2676 i1 = pIter->aFirst[iOut*2].iFirst;
2677 i2 = pIter->aFirst[iOut*2+1].iFirst;
2679 p1 = &pIter->aSeg[i1];
2680 p2 = &pIter->aSeg[i2];
2682 pRes->bTermEq = 0;
2683 if( p1->pLeaf==0 ){ /* If p1 is at EOF */
2684 iRes = i2;
2685 }else if( p2->pLeaf==0 ){ /* If p2 is at EOF */
2686 iRes = i1;
2687 }else{
2688 int res = fts5BufferCompare(&p1->term, &p2->term);
2689 if( res==0 ){
2690 assert_nc( i2>i1 );
2691 assert_nc( i2!=0 );
2692 pRes->bTermEq = 1;
2693 if( p1->iRowid==p2->iRowid ){
2694 p1->bDel = p2->bDel;
2695 return i2;
2697 res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1;
2699 assert( res!=0 );
2700 if( res<0 ){
2701 iRes = i1;
2702 }else{
2703 iRes = i2;
2707 pRes->iFirst = (u16)iRes;
2708 return 0;
2712 ** Move the seg-iter so that it points to the first rowid on page iLeafPgno.
2713 ** It is an error if leaf iLeafPgno does not exist. Unless the db is
2714 ** a 'secure-delete' db, if it contains no rowids then this is also an error.
2716 static void fts5SegIterGotoPage(
2717 Fts5Index *p, /* FTS5 backend object */
2718 Fts5SegIter *pIter, /* Iterator to advance */
2719 int iLeafPgno
2721 assert( iLeafPgno>pIter->iLeafPgno );
2723 if( iLeafPgno>pIter->pSeg->pgnoLast ){
2724 p->rc = FTS5_CORRUPT;
2725 }else{
2726 fts5DataRelease(pIter->pNextLeaf);
2727 pIter->pNextLeaf = 0;
2728 pIter->iLeafPgno = iLeafPgno-1;
2730 while( p->rc==SQLITE_OK ){
2731 int iOff;
2732 fts5SegIterNextPage(p, pIter);
2733 if( pIter->pLeaf==0 ) break;
2734 iOff = fts5LeafFirstRowidOff(pIter->pLeaf);
2735 if( iOff>0 ){
2736 u8 *a = pIter->pLeaf->p;
2737 int n = pIter->pLeaf->szLeaf;
2738 if( iOff<4 || iOff>=n ){
2739 p->rc = FTS5_CORRUPT;
2740 }else{
2741 iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
2742 pIter->iLeafOffset = iOff;
2743 fts5SegIterLoadNPos(p, pIter);
2745 break;
2752 ** Advance the iterator passed as the second argument until it is at or
2753 ** past rowid iFrom. Regardless of the value of iFrom, the iterator is
2754 ** always advanced at least once.
2756 static void fts5SegIterNextFrom(
2757 Fts5Index *p, /* FTS5 backend object */
2758 Fts5SegIter *pIter, /* Iterator to advance */
2759 i64 iMatch /* Advance iterator at least this far */
2761 int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
2762 Fts5DlidxIter *pDlidx = pIter->pDlidx;
2763 int iLeafPgno = pIter->iLeafPgno;
2764 int bMove = 1;
2766 assert( pIter->flags & FTS5_SEGITER_ONETERM );
2767 assert( pIter->pDlidx );
2768 assert( pIter->pLeaf );
2770 if( bRev==0 ){
2771 while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){
2772 iLeafPgno = fts5DlidxIterPgno(pDlidx);
2773 fts5DlidxIterNext(p, pDlidx);
2775 assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc );
2776 if( iLeafPgno>pIter->iLeafPgno ){
2777 fts5SegIterGotoPage(p, pIter, iLeafPgno);
2778 bMove = 0;
2780 }else{
2781 assert( pIter->pNextLeaf==0 );
2782 assert( iMatch<pIter->iRowid );
2783 while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){
2784 fts5DlidxIterPrev(p, pDlidx);
2786 iLeafPgno = fts5DlidxIterPgno(pDlidx);
2788 assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno );
2790 if( iLeafPgno<pIter->iLeafPgno ){
2791 pIter->iLeafPgno = iLeafPgno+1;
2792 fts5SegIterReverseNewPage(p, pIter);
2793 bMove = 0;
2798 if( bMove && p->rc==SQLITE_OK ) pIter->xNext(p, pIter, 0);
2799 if( pIter->pLeaf==0 ) break;
2800 if( bRev==0 && pIter->iRowid>=iMatch ) break;
2801 if( bRev!=0 && pIter->iRowid<=iMatch ) break;
2802 bMove = 1;
2803 }while( p->rc==SQLITE_OK );
2808 ** Free the iterator object passed as the second argument.
2810 static void fts5MultiIterFree(Fts5Iter *pIter){
2811 if( pIter ){
2812 int i;
2813 for(i=0; i<pIter->nSeg; i++){
2814 fts5SegIterClear(&pIter->aSeg[i]);
2816 fts5BufferFree(&pIter->poslist);
2817 sqlite3_free(pIter);
2821 static void fts5MultiIterAdvanced(
2822 Fts5Index *p, /* FTS5 backend to iterate within */
2823 Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
2824 int iChanged, /* Index of sub-iterator just advanced */
2825 int iMinset /* Minimum entry in aFirst[] to set */
2827 int i;
2828 for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){
2829 int iEq;
2830 if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){
2831 Fts5SegIter *pSeg = &pIter->aSeg[iEq];
2832 assert( p->rc==SQLITE_OK );
2833 pSeg->xNext(p, pSeg, 0);
2834 i = pIter->nSeg + iEq;
2840 ** Sub-iterator iChanged of iterator pIter has just been advanced. It still
2841 ** points to the same term though - just a different rowid. This function
2842 ** attempts to update the contents of the pIter->aFirst[] accordingly.
2843 ** If it does so successfully, 0 is returned. Otherwise 1.
2845 ** If non-zero is returned, the caller should call fts5MultiIterAdvanced()
2846 ** on the iterator instead. That function does the same as this one, except
2847 ** that it deals with more complicated cases as well.
2849 static int fts5MultiIterAdvanceRowid(
2850 Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
2851 int iChanged, /* Index of sub-iterator just advanced */
2852 Fts5SegIter **ppFirst
2854 Fts5SegIter *pNew = &pIter->aSeg[iChanged];
2856 if( pNew->iRowid==pIter->iSwitchRowid
2857 || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev
2859 int i;
2860 Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001];
2861 pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64 : LARGEST_INT64;
2862 for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){
2863 Fts5CResult *pRes = &pIter->aFirst[i];
2865 assert( pNew->pLeaf );
2866 assert( pRes->bTermEq==0 || pOther->pLeaf );
2868 if( pRes->bTermEq ){
2869 if( pNew->iRowid==pOther->iRowid ){
2870 return 1;
2871 }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){
2872 pIter->iSwitchRowid = pOther->iRowid;
2873 pNew = pOther;
2874 }else if( (pOther->iRowid>pIter->iSwitchRowid)==pIter->bRev ){
2875 pIter->iSwitchRowid = pOther->iRowid;
2878 pRes->iFirst = (u16)(pNew - pIter->aSeg);
2879 if( i==1 ) break;
2881 pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ];
2885 *ppFirst = pNew;
2886 return 0;
2890 ** Set the pIter->bEof variable based on the state of the sub-iterators.
2892 static void fts5MultiIterSetEof(Fts5Iter *pIter){
2893 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
2894 pIter->base.bEof = pSeg->pLeaf==0;
2895 pIter->iSwitchRowid = pSeg->iRowid;
2899 ** Move the iterator to the next entry.
2901 ** If an error occurs, an error code is left in Fts5Index.rc. It is not
2902 ** considered an error if the iterator reaches EOF, or if it is already at
2903 ** EOF when this function is called.
2905 static void fts5MultiIterNext(
2906 Fts5Index *p,
2907 Fts5Iter *pIter,
2908 int bFrom, /* True if argument iFrom is valid */
2909 i64 iFrom /* Advance at least as far as this */
2911 int bUseFrom = bFrom;
2912 assert( pIter->base.bEof==0 );
2913 while( p->rc==SQLITE_OK ){
2914 int iFirst = pIter->aFirst[1].iFirst;
2915 int bNewTerm = 0;
2916 Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
2917 assert( p->rc==SQLITE_OK );
2918 if( bUseFrom && pSeg->pDlidx ){
2919 fts5SegIterNextFrom(p, pSeg, iFrom);
2920 }else{
2921 pSeg->xNext(p, pSeg, &bNewTerm);
2924 if( pSeg->pLeaf==0 || bNewTerm
2925 || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
2927 fts5MultiIterAdvanced(p, pIter, iFirst, 1);
2928 fts5MultiIterSetEof(pIter);
2929 pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
2930 if( pSeg->pLeaf==0 ) return;
2933 fts5AssertMultiIterSetup(p, pIter);
2934 assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf );
2935 if( pIter->bSkipEmpty==0 || pSeg->nPos ){
2936 pIter->xSetOutputs(pIter, pSeg);
2937 return;
2939 bUseFrom = 0;
2943 static void fts5MultiIterNext2(
2944 Fts5Index *p,
2945 Fts5Iter *pIter,
2946 int *pbNewTerm /* OUT: True if *might* be new term */
2948 assert( pIter->bSkipEmpty );
2949 if( p->rc==SQLITE_OK ){
2950 *pbNewTerm = 0;
2952 int iFirst = pIter->aFirst[1].iFirst;
2953 Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
2954 int bNewTerm = 0;
2956 assert( p->rc==SQLITE_OK );
2957 pSeg->xNext(p, pSeg, &bNewTerm);
2958 if( pSeg->pLeaf==0 || bNewTerm
2959 || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
2961 fts5MultiIterAdvanced(p, pIter, iFirst, 1);
2962 fts5MultiIterSetEof(pIter);
2963 *pbNewTerm = 1;
2965 fts5AssertMultiIterSetup(p, pIter);
2967 }while( fts5MultiIterIsEmpty(p, pIter) );
2971 static void fts5IterSetOutputs_Noop(Fts5Iter *pUnused1, Fts5SegIter *pUnused2){
2972 UNUSED_PARAM2(pUnused1, pUnused2);
2975 static Fts5Iter *fts5MultiIterAlloc(
2976 Fts5Index *p, /* FTS5 backend to iterate within */
2977 int nSeg
2979 Fts5Iter *pNew;
2980 int nSlot; /* Power of two >= nSeg */
2982 for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2);
2983 pNew = fts5IdxMalloc(p,
2984 sizeof(Fts5Iter) + /* pNew */
2985 sizeof(Fts5SegIter) * (nSlot-1) + /* pNew->aSeg[] */
2986 sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */
2988 if( pNew ){
2989 pNew->nSeg = nSlot;
2990 pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot];
2991 pNew->pIndex = p;
2992 pNew->xSetOutputs = fts5IterSetOutputs_Noop;
2994 return pNew;
2997 static void fts5PoslistCallback(
2998 Fts5Index *pUnused,
2999 void *pContext,
3000 const u8 *pChunk, int nChunk
3002 UNUSED_PARAM(pUnused);
3003 assert_nc( nChunk>=0 );
3004 if( nChunk>0 ){
3005 fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk);
3009 typedef struct PoslistCallbackCtx PoslistCallbackCtx;
3010 struct PoslistCallbackCtx {
3011 Fts5Buffer *pBuf; /* Append to this buffer */
3012 Fts5Colset *pColset; /* Restrict matches to this column */
3013 int eState; /* See above */
3016 typedef struct PoslistOffsetsCtx PoslistOffsetsCtx;
3017 struct PoslistOffsetsCtx {
3018 Fts5Buffer *pBuf; /* Append to this buffer */
3019 Fts5Colset *pColset; /* Restrict matches to this column */
3020 int iRead;
3021 int iWrite;
3025 ** TODO: Make this more efficient!
3027 static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){
3028 int i;
3029 for(i=0; i<pColset->nCol; i++){
3030 if( pColset->aiCol[i]==iCol ) return 1;
3032 return 0;
3035 static void fts5PoslistOffsetsCallback(
3036 Fts5Index *pUnused,
3037 void *pContext,
3038 const u8 *pChunk, int nChunk
3040 PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext;
3041 UNUSED_PARAM(pUnused);
3042 assert_nc( nChunk>=0 );
3043 if( nChunk>0 ){
3044 int i = 0;
3045 while( i<nChunk ){
3046 int iVal;
3047 i += fts5GetVarint32(&pChunk[i], iVal);
3048 iVal += pCtx->iRead - 2;
3049 pCtx->iRead = iVal;
3050 if( fts5IndexColsetTest(pCtx->pColset, iVal) ){
3051 fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite);
3052 pCtx->iWrite = iVal;
3058 static void fts5PoslistFilterCallback(
3059 Fts5Index *pUnused,
3060 void *pContext,
3061 const u8 *pChunk, int nChunk
3063 PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext;
3064 UNUSED_PARAM(pUnused);
3065 assert_nc( nChunk>=0 );
3066 if( nChunk>0 ){
3067 /* Search through to find the first varint with value 1. This is the
3068 ** start of the next columns hits. */
3069 int i = 0;
3070 int iStart = 0;
3072 if( pCtx->eState==2 ){
3073 int iCol;
3074 fts5FastGetVarint32(pChunk, i, iCol);
3075 if( fts5IndexColsetTest(pCtx->pColset, iCol) ){
3076 pCtx->eState = 1;
3077 fts5BufferSafeAppendVarint(pCtx->pBuf, 1);
3078 }else{
3079 pCtx->eState = 0;
3083 do {
3084 while( i<nChunk && pChunk[i]!=0x01 ){
3085 while( pChunk[i] & 0x80 ) i++;
3086 i++;
3088 if( pCtx->eState ){
3089 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
3091 if( i<nChunk ){
3092 int iCol;
3093 iStart = i;
3094 i++;
3095 if( i>=nChunk ){
3096 pCtx->eState = 2;
3097 }else{
3098 fts5FastGetVarint32(pChunk, i, iCol);
3099 pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
3100 if( pCtx->eState ){
3101 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
3102 iStart = i;
3106 }while( i<nChunk );
3110 static void fts5ChunkIterate(
3111 Fts5Index *p, /* Index object */
3112 Fts5SegIter *pSeg, /* Poslist of this iterator */
3113 void *pCtx, /* Context pointer for xChunk callback */
3114 void (*xChunk)(Fts5Index*, void*, const u8*, int)
3116 int nRem = pSeg->nPos; /* Number of bytes still to come */
3117 Fts5Data *pData = 0;
3118 u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3119 int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
3120 int pgno = pSeg->iLeafPgno;
3121 int pgnoSave = 0;
3123 /* This function does not work with detail=none databases. */
3124 assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
3126 if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
3127 pgnoSave = pgno+1;
3130 while( 1 ){
3131 xChunk(p, pCtx, pChunk, nChunk);
3132 nRem -= nChunk;
3133 fts5DataRelease(pData);
3134 if( nRem<=0 ){
3135 break;
3136 }else if( pSeg->pSeg==0 ){
3137 p->rc = FTS5_CORRUPT;
3138 return;
3139 }else{
3140 pgno++;
3141 pData = fts5LeafRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno));
3142 if( pData==0 ) break;
3143 pChunk = &pData->p[4];
3144 nChunk = MIN(nRem, pData->szLeaf - 4);
3145 if( pgno==pgnoSave ){
3146 assert( pSeg->pNextLeaf==0 );
3147 pSeg->pNextLeaf = pData;
3148 pData = 0;
3155 ** Iterator pIter currently points to a valid entry (not EOF). This
3156 ** function appends the position list data for the current entry to
3157 ** buffer pBuf. It does not make a copy of the position-list size
3158 ** field.
3160 static void fts5SegiterPoslist(
3161 Fts5Index *p,
3162 Fts5SegIter *pSeg,
3163 Fts5Colset *pColset,
3164 Fts5Buffer *pBuf
3166 assert( pBuf!=0 );
3167 assert( pSeg!=0 );
3168 if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos+FTS5_DATA_ZERO_PADDING) ){
3169 assert( pBuf->p!=0 );
3170 assert( pBuf->nSpace >= pBuf->n+pSeg->nPos+FTS5_DATA_ZERO_PADDING );
3171 memset(&pBuf->p[pBuf->n+pSeg->nPos], 0, FTS5_DATA_ZERO_PADDING);
3172 if( pColset==0 ){
3173 fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
3174 }else{
3175 if( p->pConfig->eDetail==FTS5_DETAIL_FULL ){
3176 PoslistCallbackCtx sCtx;
3177 sCtx.pBuf = pBuf;
3178 sCtx.pColset = pColset;
3179 sCtx.eState = fts5IndexColsetTest(pColset, 0);
3180 assert( sCtx.eState==0 || sCtx.eState==1 );
3181 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
3182 }else{
3183 PoslistOffsetsCtx sCtx;
3184 memset(&sCtx, 0, sizeof(sCtx));
3185 sCtx.pBuf = pBuf;
3186 sCtx.pColset = pColset;
3187 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback);
3194 ** Parameter pPos points to a buffer containing a position list, size nPos.
3195 ** This function filters it according to pColset (which must be non-NULL)
3196 ** and sets pIter->base.pData/nData to point to the new position list.
3197 ** If memory is required for the new position list, use buffer pIter->poslist.
3198 ** Or, if the new position list is a contiguous subset of the input, set
3199 ** pIter->base.pData/nData to point directly to it.
3201 ** This function is a no-op if *pRc is other than SQLITE_OK when it is
3202 ** called. If an OOM error is encountered, *pRc is set to SQLITE_NOMEM
3203 ** before returning.
3205 static void fts5IndexExtractColset(
3206 int *pRc,
3207 Fts5Colset *pColset, /* Colset to filter on */
3208 const u8 *pPos, int nPos, /* Position list */
3209 Fts5Iter *pIter
3211 if( *pRc==SQLITE_OK ){
3212 const u8 *p = pPos;
3213 const u8 *aCopy = p;
3214 const u8 *pEnd = &p[nPos]; /* One byte past end of position list */
3215 int i = 0;
3216 int iCurrent = 0;
3218 if( pColset->nCol>1 && sqlite3Fts5BufferSize(pRc, &pIter->poslist, nPos) ){
3219 return;
3222 while( 1 ){
3223 while( pColset->aiCol[i]<iCurrent ){
3224 i++;
3225 if( i==pColset->nCol ){
3226 pIter->base.pData = pIter->poslist.p;
3227 pIter->base.nData = pIter->poslist.n;
3228 return;
3232 /* Advance pointer p until it points to pEnd or an 0x01 byte that is
3233 ** not part of a varint */
3234 while( p<pEnd && *p!=0x01 ){
3235 while( *p++ & 0x80 );
3238 if( pColset->aiCol[i]==iCurrent ){
3239 if( pColset->nCol==1 ){
3240 pIter->base.pData = aCopy;
3241 pIter->base.nData = p-aCopy;
3242 return;
3244 fts5BufferSafeAppendBlob(&pIter->poslist, aCopy, p-aCopy);
3246 if( p>=pEnd ){
3247 pIter->base.pData = pIter->poslist.p;
3248 pIter->base.nData = pIter->poslist.n;
3249 return;
3251 aCopy = p++;
3252 iCurrent = *p++;
3253 if( iCurrent & 0x80 ){
3254 p--;
3255 p += fts5GetVarint32(p, iCurrent);
3263 ** xSetOutputs callback used by detail=none tables.
3265 static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){
3266 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE );
3267 pIter->base.iRowid = pSeg->iRowid;
3268 pIter->base.nData = pSeg->nPos;
3272 ** xSetOutputs callback used by detail=full and detail=col tables when no
3273 ** column filters are specified.
3275 static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){
3276 pIter->base.iRowid = pSeg->iRowid;
3277 pIter->base.nData = pSeg->nPos;
3279 assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE );
3280 assert( pIter->pColset==0 );
3282 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
3283 /* All data is stored on the current page. Populate the output
3284 ** variables to point into the body of the page object. */
3285 pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3286 }else{
3287 /* The data is distributed over two or more pages. Copy it into the
3288 ** Fts5Iter.poslist buffer and then set the output pointer to point
3289 ** to this buffer. */
3290 fts5BufferZero(&pIter->poslist);
3291 fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist);
3292 pIter->base.pData = pIter->poslist.p;
3297 ** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match
3298 ** against no columns at all).
3300 static void fts5IterSetOutputs_ZeroColset(Fts5Iter *pIter, Fts5SegIter *pSeg){
3301 UNUSED_PARAM(pSeg);
3302 pIter->base.nData = 0;
3306 ** xSetOutputs callback used by detail=col when there is a column filter
3307 ** and there are 100 or more columns. Also called as a fallback from
3308 ** fts5IterSetOutputs_Col100 if the column-list spans more than one page.
3310 static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){
3311 fts5BufferZero(&pIter->poslist);
3312 fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist);
3313 pIter->base.iRowid = pSeg->iRowid;
3314 pIter->base.pData = pIter->poslist.p;
3315 pIter->base.nData = pIter->poslist.n;
3319 ** xSetOutputs callback used when:
3321 ** * detail=col,
3322 ** * there is a column filter, and
3323 ** * the table contains 100 or fewer columns.
3325 ** The last point is to ensure all column numbers are stored as
3326 ** single-byte varints.
3328 static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){
3330 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS );
3331 assert( pIter->pColset );
3333 if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){
3334 fts5IterSetOutputs_Col(pIter, pSeg);
3335 }else{
3336 u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset];
3337 u8 *pEnd = (u8*)&a[pSeg->nPos];
3338 int iPrev = 0;
3339 int *aiCol = pIter->pColset->aiCol;
3340 int *aiColEnd = &aiCol[pIter->pColset->nCol];
3342 u8 *aOut = pIter->poslist.p;
3343 int iPrevOut = 0;
3345 pIter->base.iRowid = pSeg->iRowid;
3347 while( a<pEnd ){
3348 iPrev += (int)a++[0] - 2;
3349 while( *aiCol<iPrev ){
3350 aiCol++;
3351 if( aiCol==aiColEnd ) goto setoutputs_col_out;
3353 if( *aiCol==iPrev ){
3354 *aOut++ = (u8)((iPrev - iPrevOut) + 2);
3355 iPrevOut = iPrev;
3359 setoutputs_col_out:
3360 pIter->base.pData = pIter->poslist.p;
3361 pIter->base.nData = aOut - pIter->poslist.p;
3366 ** xSetOutputs callback used by detail=full when there is a column filter.
3368 static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){
3369 Fts5Colset *pColset = pIter->pColset;
3370 pIter->base.iRowid = pSeg->iRowid;
3372 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL );
3373 assert( pColset );
3375 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
3376 /* All data is stored on the current page. Populate the output
3377 ** variables to point into the body of the page object. */
3378 const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3379 int *pRc = &pIter->pIndex->rc;
3380 fts5BufferZero(&pIter->poslist);
3381 fts5IndexExtractColset(pRc, pColset, a, pSeg->nPos, pIter);
3382 }else{
3383 /* The data is distributed over two or more pages. Copy it into the
3384 ** Fts5Iter.poslist buffer and then set the output pointer to point
3385 ** to this buffer. */
3386 fts5BufferZero(&pIter->poslist);
3387 fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
3388 pIter->base.pData = pIter->poslist.p;
3389 pIter->base.nData = pIter->poslist.n;
3393 static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){
3394 assert( pIter!=0 || (*pRc)!=SQLITE_OK );
3395 if( *pRc==SQLITE_OK ){
3396 Fts5Config *pConfig = pIter->pIndex->pConfig;
3397 if( pConfig->eDetail==FTS5_DETAIL_NONE ){
3398 pIter->xSetOutputs = fts5IterSetOutputs_None;
3401 else if( pIter->pColset==0 ){
3402 pIter->xSetOutputs = fts5IterSetOutputs_Nocolset;
3405 else if( pIter->pColset->nCol==0 ){
3406 pIter->xSetOutputs = fts5IterSetOutputs_ZeroColset;
3409 else if( pConfig->eDetail==FTS5_DETAIL_FULL ){
3410 pIter->xSetOutputs = fts5IterSetOutputs_Full;
3413 else{
3414 assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS );
3415 if( pConfig->nCol<=100 ){
3416 pIter->xSetOutputs = fts5IterSetOutputs_Col100;
3417 sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol);
3418 }else{
3419 pIter->xSetOutputs = fts5IterSetOutputs_Col;
3427 ** Allocate a new Fts5Iter object.
3429 ** The new object will be used to iterate through data in structure pStruct.
3430 ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel
3431 ** is zero or greater, data from the first nSegment segments on level iLevel
3432 ** is merged.
3434 ** The iterator initially points to the first term/rowid entry in the
3435 ** iterated data.
3437 static void fts5MultiIterNew(
3438 Fts5Index *p, /* FTS5 backend to iterate within */
3439 Fts5Structure *pStruct, /* Structure of specific index */
3440 int flags, /* FTS5INDEX_QUERY_XXX flags */
3441 Fts5Colset *pColset, /* Colset to filter on (or NULL) */
3442 const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */
3443 int iLevel, /* Level to iterate (-1 for all) */
3444 int nSegment, /* Number of segments to merge (iLevel>=0) */
3445 Fts5Iter **ppOut /* New object */
3447 int nSeg = 0; /* Number of segment-iters in use */
3448 int iIter = 0; /* */
3449 int iSeg; /* Used to iterate through segments */
3450 Fts5StructureLevel *pLvl;
3451 Fts5Iter *pNew;
3453 assert( (pTerm==0 && nTerm==0) || iLevel<0 );
3455 /* Allocate space for the new multi-seg-iterator. */
3456 if( p->rc==SQLITE_OK ){
3457 if( iLevel<0 ){
3458 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
3459 nSeg = pStruct->nSegment;
3460 nSeg += (p->pHash && 0==(flags & FTS5INDEX_QUERY_SKIPHASH));
3461 }else{
3462 nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment);
3465 *ppOut = pNew = fts5MultiIterAlloc(p, nSeg);
3466 if( pNew==0 ){
3467 assert( p->rc!=SQLITE_OK );
3468 goto fts5MultiIterNew_post_check;
3470 pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC));
3471 pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY));
3472 pNew->pColset = pColset;
3473 if( (flags & FTS5INDEX_QUERY_NOOUTPUT)==0 ){
3474 fts5IterSetOutputCb(&p->rc, pNew);
3477 /* Initialize each of the component segment iterators. */
3478 if( p->rc==SQLITE_OK ){
3479 if( iLevel<0 ){
3480 Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel];
3481 if( p->pHash && 0==(flags & FTS5INDEX_QUERY_SKIPHASH) ){
3482 /* Add a segment iterator for the current contents of the hash table. */
3483 Fts5SegIter *pIter = &pNew->aSeg[iIter++];
3484 fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter);
3486 for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){
3487 for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){
3488 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
3489 Fts5SegIter *pIter = &pNew->aSeg[iIter++];
3490 if( pTerm==0 ){
3491 fts5SegIterInit(p, pSeg, pIter);
3492 }else{
3493 fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter);
3497 }else{
3498 pLvl = &pStruct->aLevel[iLevel];
3499 for(iSeg=nSeg-1; iSeg>=0; iSeg--){
3500 fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]);
3503 assert( iIter==nSeg );
3506 /* If the above was successful, each component iterators now points
3507 ** to the first entry in its segment. In this case initialize the
3508 ** aFirst[] array. Or, if an error has occurred, free the iterator
3509 ** object and set the output variable to NULL. */
3510 if( p->rc==SQLITE_OK ){
3511 for(iIter=pNew->nSeg-1; iIter>0; iIter--){
3512 int iEq;
3513 if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){
3514 Fts5SegIter *pSeg = &pNew->aSeg[iEq];
3515 if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0);
3516 fts5MultiIterAdvanced(p, pNew, iEq, iIter);
3519 fts5MultiIterSetEof(pNew);
3520 fts5AssertMultiIterSetup(p, pNew);
3522 if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){
3523 fts5MultiIterNext(p, pNew, 0, 0);
3524 }else if( pNew->base.bEof==0 ){
3525 Fts5SegIter *pSeg = &pNew->aSeg[pNew->aFirst[1].iFirst];
3526 pNew->xSetOutputs(pNew, pSeg);
3529 }else{
3530 fts5MultiIterFree(pNew);
3531 *ppOut = 0;
3534 fts5MultiIterNew_post_check:
3535 assert( (*ppOut)!=0 || p->rc!=SQLITE_OK );
3536 return;
3540 ** Create an Fts5Iter that iterates through the doclist provided
3541 ** as the second argument.
3543 static void fts5MultiIterNew2(
3544 Fts5Index *p, /* FTS5 backend to iterate within */
3545 Fts5Data *pData, /* Doclist to iterate through */
3546 int bDesc, /* True for descending rowid order */
3547 Fts5Iter **ppOut /* New object */
3549 Fts5Iter *pNew;
3550 pNew = fts5MultiIterAlloc(p, 2);
3551 if( pNew ){
3552 Fts5SegIter *pIter = &pNew->aSeg[1];
3554 pIter->flags = FTS5_SEGITER_ONETERM;
3555 if( pData->szLeaf>0 ){
3556 pIter->pLeaf = pData;
3557 pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
3558 pIter->iEndofDoclist = pData->nn;
3559 pNew->aFirst[1].iFirst = 1;
3560 if( bDesc ){
3561 pNew->bRev = 1;
3562 pIter->flags |= FTS5_SEGITER_REVERSE;
3563 fts5SegIterReverseInitPage(p, pIter);
3564 }else{
3565 fts5SegIterLoadNPos(p, pIter);
3567 pData = 0;
3568 }else{
3569 pNew->base.bEof = 1;
3571 fts5SegIterSetNext(p, pIter);
3573 *ppOut = pNew;
3576 fts5DataRelease(pData);
3580 ** Return true if the iterator is at EOF or if an error has occurred.
3581 ** False otherwise.
3583 static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){
3584 assert( pIter!=0 || p->rc!=SQLITE_OK );
3585 assert( p->rc!=SQLITE_OK
3586 || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof
3588 return (p->rc || pIter->base.bEof);
3592 ** Return the rowid of the entry that the iterator currently points
3593 ** to. If the iterator points to EOF when this function is called the
3594 ** results are undefined.
3596 static i64 fts5MultiIterRowid(Fts5Iter *pIter){
3597 assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf );
3598 return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid;
3602 ** Move the iterator to the next entry at or following iMatch.
3604 static void fts5MultiIterNextFrom(
3605 Fts5Index *p,
3606 Fts5Iter *pIter,
3607 i64 iMatch
3609 while( 1 ){
3610 i64 iRowid;
3611 fts5MultiIterNext(p, pIter, 1, iMatch);
3612 if( fts5MultiIterEof(p, pIter) ) break;
3613 iRowid = fts5MultiIterRowid(pIter);
3614 if( pIter->bRev==0 && iRowid>=iMatch ) break;
3615 if( pIter->bRev!=0 && iRowid<=iMatch ) break;
3620 ** Return a pointer to a buffer containing the term associated with the
3621 ** entry that the iterator currently points to.
3623 static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){
3624 Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
3625 *pn = p->term.n;
3626 return p->term.p;
3630 ** Allocate a new segment-id for the structure pStruct. The new segment
3631 ** id must be between 1 and 65335 inclusive, and must not be used by
3632 ** any currently existing segment. If a free segment id cannot be found,
3633 ** SQLITE_FULL is returned.
3635 ** If an error has already occurred, this function is a no-op. 0 is
3636 ** returned in this case.
3638 static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){
3639 int iSegid = 0;
3641 if( p->rc==SQLITE_OK ){
3642 if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){
3643 p->rc = SQLITE_FULL;
3644 }else{
3645 /* FTS5_MAX_SEGMENT is currently defined as 2000. So the following
3646 ** array is 63 elements, or 252 bytes, in size. */
3647 u32 aUsed[(FTS5_MAX_SEGMENT+31) / 32];
3648 int iLvl, iSeg;
3649 int i;
3650 u32 mask;
3651 memset(aUsed, 0, sizeof(aUsed));
3652 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
3653 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
3654 int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid;
3655 if( iId<=FTS5_MAX_SEGMENT && iId>0 ){
3656 aUsed[(iId-1) / 32] |= (u32)1 << ((iId-1) % 32);
3661 for(i=0; aUsed[i]==0xFFFFFFFF; i++);
3662 mask = aUsed[i];
3663 for(iSegid=0; mask & ((u32)1 << iSegid); iSegid++);
3664 iSegid += 1 + i*32;
3666 #ifdef SQLITE_DEBUG
3667 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
3668 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
3669 assert_nc( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid );
3672 assert_nc( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT );
3675 sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p);
3676 if( p->rc==SQLITE_OK ){
3677 u8 aBlob[2] = {0xff, 0xff};
3678 sqlite3_bind_int(pIdxSelect, 1, iSegid);
3679 sqlite3_bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC);
3680 assert_nc( sqlite3_step(pIdxSelect)!=SQLITE_ROW );
3681 p->rc = sqlite3_reset(pIdxSelect);
3682 sqlite3_bind_null(pIdxSelect, 2);
3685 #endif
3689 return iSegid;
3693 ** Discard all data currently cached in the hash-tables.
3695 static void fts5IndexDiscardData(Fts5Index *p){
3696 assert( p->pHash || p->nPendingData==0 );
3697 if( p->pHash ){
3698 sqlite3Fts5HashClear(p->pHash);
3699 p->nPendingData = 0;
3704 ** Return the size of the prefix, in bytes, that buffer
3705 ** (pNew/<length-unknown>) shares with buffer (pOld/nOld).
3707 ** Buffer (pNew/<length-unknown>) is guaranteed to be greater
3708 ** than buffer (pOld/nOld).
3710 static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){
3711 int i;
3712 for(i=0; i<nOld; i++){
3713 if( pOld[i]!=pNew[i] ) break;
3715 return i;
3718 static void fts5WriteDlidxClear(
3719 Fts5Index *p,
3720 Fts5SegWriter *pWriter,
3721 int bFlush /* If true, write dlidx to disk */
3723 int i;
3724 assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) );
3725 for(i=0; i<pWriter->nDlidx; i++){
3726 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
3727 if( pDlidx->buf.n==0 ) break;
3728 if( bFlush ){
3729 assert( pDlidx->pgno!=0 );
3730 fts5DataWrite(p,
3731 FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
3732 pDlidx->buf.p, pDlidx->buf.n
3735 sqlite3Fts5BufferZero(&pDlidx->buf);
3736 pDlidx->bPrevValid = 0;
3741 ** Grow the pWriter->aDlidx[] array to at least nLvl elements in size.
3742 ** Any new array elements are zeroed before returning.
3744 static int fts5WriteDlidxGrow(
3745 Fts5Index *p,
3746 Fts5SegWriter *pWriter,
3747 int nLvl
3749 if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){
3750 Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc64(
3751 pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl
3753 if( aDlidx==0 ){
3754 p->rc = SQLITE_NOMEM;
3755 }else{
3756 size_t nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx);
3757 memset(&aDlidx[pWriter->nDlidx], 0, nByte);
3758 pWriter->aDlidx = aDlidx;
3759 pWriter->nDlidx = nLvl;
3762 return p->rc;
3766 ** If the current doclist-index accumulating in pWriter->aDlidx[] is large
3767 ** enough, flush it to disk and return 1. Otherwise discard it and return
3768 ** zero.
3770 static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){
3771 int bFlag = 0;
3773 /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written
3774 ** to the database, also write the doclist-index to disk. */
3775 if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
3776 bFlag = 1;
3778 fts5WriteDlidxClear(p, pWriter, bFlag);
3779 pWriter->nEmpty = 0;
3780 return bFlag;
3784 ** This function is called whenever processing of the doclist for the
3785 ** last term on leaf page (pWriter->iBtPage) is completed.
3787 ** The doclist-index for that term is currently stored in-memory within the
3788 ** Fts5SegWriter.aDlidx[] array. If it is large enough, this function
3789 ** writes it out to disk. Or, if it is too small to bother with, discards
3790 ** it.
3792 ** Fts5SegWriter.btterm currently contains the first term on page iBtPage.
3794 static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){
3795 int bFlag;
3797 assert( pWriter->iBtPage || pWriter->nEmpty==0 );
3798 if( pWriter->iBtPage==0 ) return;
3799 bFlag = fts5WriteFlushDlidx(p, pWriter);
3801 if( p->rc==SQLITE_OK ){
3802 const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:"");
3803 /* The following was already done in fts5WriteInit(): */
3804 /* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */
3805 sqlite3_bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC);
3806 sqlite3_bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1));
3807 sqlite3_step(p->pIdxWriter);
3808 p->rc = sqlite3_reset(p->pIdxWriter);
3809 sqlite3_bind_null(p->pIdxWriter, 2);
3811 pWriter->iBtPage = 0;
3815 ** This is called once for each leaf page except the first that contains
3816 ** at least one term. Argument (nTerm/pTerm) is the split-key - a term that
3817 ** is larger than all terms written to earlier leaves, and equal to or
3818 ** smaller than the first term on the new leaf.
3820 ** If an error occurs, an error code is left in Fts5Index.rc. If an error
3821 ** has already occurred when this function is called, it is a no-op.
3823 static void fts5WriteBtreeTerm(
3824 Fts5Index *p, /* FTS5 backend object */
3825 Fts5SegWriter *pWriter, /* Writer object */
3826 int nTerm, const u8 *pTerm /* First term on new page */
3828 fts5WriteFlushBtree(p, pWriter);
3829 if( p->rc==SQLITE_OK ){
3830 fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm);
3831 pWriter->iBtPage = pWriter->writer.pgno;
3836 ** This function is called when flushing a leaf page that contains no
3837 ** terms at all to disk.
3839 static void fts5WriteBtreeNoTerm(
3840 Fts5Index *p, /* FTS5 backend object */
3841 Fts5SegWriter *pWriter /* Writer object */
3843 /* If there were no rowids on the leaf page either and the doclist-index
3844 ** has already been started, append an 0x00 byte to it. */
3845 if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){
3846 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0];
3847 assert( pDlidx->bPrevValid );
3848 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0);
3851 /* Increment the "number of sequential leaves without a term" counter. */
3852 pWriter->nEmpty++;
3855 static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){
3856 i64 iRowid;
3857 int iOff;
3859 iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid);
3860 fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid);
3861 return iRowid;
3865 ** Rowid iRowid has just been appended to the current leaf page. It is the
3866 ** first on the page. This function appends an appropriate entry to the current
3867 ** doclist-index.
3869 static void fts5WriteDlidxAppend(
3870 Fts5Index *p,
3871 Fts5SegWriter *pWriter,
3872 i64 iRowid
3874 int i;
3875 int bDone = 0;
3877 for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
3878 i64 iVal;
3879 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
3881 if( pDlidx->buf.n>=p->pConfig->pgsz ){
3882 /* The current doclist-index page is full. Write it to disk and push
3883 ** a copy of iRowid (which will become the first rowid on the next
3884 ** doclist-index leaf page) up into the next level of the b-tree
3885 ** hierarchy. If the node being flushed is currently the root node,
3886 ** also push its first rowid upwards. */
3887 pDlidx->buf.p[0] = 0x01; /* Not the root node */
3888 fts5DataWrite(p,
3889 FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
3890 pDlidx->buf.p, pDlidx->buf.n
3892 fts5WriteDlidxGrow(p, pWriter, i+2);
3893 pDlidx = &pWriter->aDlidx[i];
3894 if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){
3895 i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf);
3897 /* This was the root node. Push its first rowid up to the new root. */
3898 pDlidx[1].pgno = pDlidx->pgno;
3899 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0);
3900 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno);
3901 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst);
3902 pDlidx[1].bPrevValid = 1;
3903 pDlidx[1].iPrev = iFirst;
3906 sqlite3Fts5BufferZero(&pDlidx->buf);
3907 pDlidx->bPrevValid = 0;
3908 pDlidx->pgno++;
3909 }else{
3910 bDone = 1;
3913 if( pDlidx->bPrevValid ){
3914 iVal = iRowid - pDlidx->iPrev;
3915 }else{
3916 i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno);
3917 assert( pDlidx->buf.n==0 );
3918 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone);
3919 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno);
3920 iVal = iRowid;
3923 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal);
3924 pDlidx->bPrevValid = 1;
3925 pDlidx->iPrev = iRowid;
3929 static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
3930 static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
3931 Fts5PageWriter *pPage = &pWriter->writer;
3932 i64 iRowid;
3934 assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) );
3936 /* Set the szLeaf header field. */
3937 assert( 0==fts5GetU16(&pPage->buf.p[2]) );
3938 fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n);
3940 if( pWriter->bFirstTermInPage ){
3941 /* No term was written to this page. */
3942 assert( pPage->pgidx.n==0 );
3943 fts5WriteBtreeNoTerm(p, pWriter);
3944 }else{
3945 /* Append the pgidx to the page buffer. Set the szLeaf header field. */
3946 fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p);
3949 /* Write the page out to disk */
3950 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno);
3951 fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);
3953 /* Initialize the next page. */
3954 fts5BufferZero(&pPage->buf);
3955 fts5BufferZero(&pPage->pgidx);
3956 fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
3957 pPage->iPrevPgidx = 0;
3958 pPage->pgno++;
3960 /* Increase the leaves written counter */
3961 pWriter->nLeafWritten++;
3963 /* The new leaf holds no terms or rowids */
3964 pWriter->bFirstTermInPage = 1;
3965 pWriter->bFirstRowidInPage = 1;
3969 ** Append term pTerm/nTerm to the segment being written by the writer passed
3970 ** as the second argument.
3972 ** If an error occurs, set the Fts5Index.rc error code. If an error has
3973 ** already occurred, this function is a no-op.
3975 static void fts5WriteAppendTerm(
3976 Fts5Index *p,
3977 Fts5SegWriter *pWriter,
3978 int nTerm, const u8 *pTerm
3980 int nPrefix; /* Bytes of prefix compression for term */
3981 Fts5PageWriter *pPage = &pWriter->writer;
3982 Fts5Buffer *pPgidx = &pWriter->writer.pgidx;
3983 int nMin = MIN(pPage->term.n, nTerm);
3985 assert( p->rc==SQLITE_OK );
3986 assert( pPage->buf.n>=4 );
3987 assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );
3989 /* If the current leaf page is full, flush it to disk. */
3990 if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
3991 if( pPage->buf.n>4 ){
3992 fts5WriteFlushLeaf(p, pWriter);
3993 if( p->rc!=SQLITE_OK ) return;
3995 fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);
3998 /* TODO1: Updating pgidx here. */
3999 pPgidx->n += sqlite3Fts5PutVarint(
4000 &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx
4002 pPage->iPrevPgidx = pPage->buf.n;
4003 #if 0
4004 fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
4005 pPgidx->n += 2;
4006 #endif
4008 if( pWriter->bFirstTermInPage ){
4009 nPrefix = 0;
4010 if( pPage->pgno!=1 ){
4011 /* This is the first term on a leaf that is not the leftmost leaf in
4012 ** the segment b-tree. In this case it is necessary to add a term to
4013 ** the b-tree hierarchy that is (a) larger than the largest term
4014 ** already written to the segment and (b) smaller than or equal to
4015 ** this term. In other words, a prefix of (pTerm/nTerm) that is one
4016 ** byte longer than the longest prefix (pTerm/nTerm) shares with the
4017 ** previous term.
4019 ** Usually, the previous term is available in pPage->term. The exception
4020 ** is if this is the first term written in an incremental-merge step.
4021 ** In this case the previous term is not available, so just write a
4022 ** copy of (pTerm/nTerm) into the parent node. This is slightly
4023 ** inefficient, but still correct. */
4024 int n = nTerm;
4025 if( pPage->term.n ){
4026 n = 1 + fts5PrefixCompress(nMin, pPage->term.p, pTerm);
4028 fts5WriteBtreeTerm(p, pWriter, n, pTerm);
4029 if( p->rc!=SQLITE_OK ) return;
4030 pPage = &pWriter->writer;
4032 }else{
4033 nPrefix = fts5PrefixCompress(nMin, pPage->term.p, pTerm);
4034 fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
4037 /* Append the number of bytes of new data, then the term data itself
4038 ** to the page. */
4039 fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix);
4040 fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]);
4042 /* Update the Fts5PageWriter.term field. */
4043 fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
4044 pWriter->bFirstTermInPage = 0;
4046 pWriter->bFirstRowidInPage = 0;
4047 pWriter->bFirstRowidInDoclist = 1;
4049 assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
4050 pWriter->aDlidx[0].pgno = pPage->pgno;
4054 ** Append a rowid and position-list size field to the writers output.
4056 static void fts5WriteAppendRowid(
4057 Fts5Index *p,
4058 Fts5SegWriter *pWriter,
4059 i64 iRowid
4061 if( p->rc==SQLITE_OK ){
4062 Fts5PageWriter *pPage = &pWriter->writer;
4064 if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
4065 fts5WriteFlushLeaf(p, pWriter);
4068 /* If this is to be the first rowid written to the page, set the
4069 ** rowid-pointer in the page-header. Also append a value to the dlidx
4070 ** buffer, in case a doclist-index is required. */
4071 if( pWriter->bFirstRowidInPage ){
4072 fts5PutU16(pPage->buf.p, (u16)pPage->buf.n);
4073 fts5WriteDlidxAppend(p, pWriter, iRowid);
4076 /* Write the rowid. */
4077 if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){
4078 fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid);
4079 }else{
4080 assert_nc( p->rc || iRowid>pWriter->iPrevRowid );
4081 fts5BufferAppendVarint(&p->rc, &pPage->buf,
4082 (u64)iRowid - (u64)pWriter->iPrevRowid
4085 pWriter->iPrevRowid = iRowid;
4086 pWriter->bFirstRowidInDoclist = 0;
4087 pWriter->bFirstRowidInPage = 0;
4091 static void fts5WriteAppendPoslistData(
4092 Fts5Index *p,
4093 Fts5SegWriter *pWriter,
4094 const u8 *aData,
4095 int nData
4097 Fts5PageWriter *pPage = &pWriter->writer;
4098 const u8 *a = aData;
4099 int n = nData;
4101 assert( p->pConfig->pgsz>0 );
4102 while( p->rc==SQLITE_OK
4103 && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz
4105 int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n;
4106 int nCopy = 0;
4107 while( nCopy<nReq ){
4108 i64 dummy;
4109 nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy);
4111 fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a);
4112 a += nCopy;
4113 n -= nCopy;
4114 fts5WriteFlushLeaf(p, pWriter);
4116 if( n>0 ){
4117 fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a);
4122 ** Flush any data cached by the writer object to the database. Free any
4123 ** allocations associated with the writer.
4125 static void fts5WriteFinish(
4126 Fts5Index *p,
4127 Fts5SegWriter *pWriter, /* Writer object */
4128 int *pnLeaf /* OUT: Number of leaf pages in b-tree */
4130 int i;
4131 Fts5PageWriter *pLeaf = &pWriter->writer;
4132 if( p->rc==SQLITE_OK ){
4133 assert( pLeaf->pgno>=1 );
4134 if( pLeaf->buf.n>4 ){
4135 fts5WriteFlushLeaf(p, pWriter);
4137 *pnLeaf = pLeaf->pgno-1;
4138 if( pLeaf->pgno>1 ){
4139 fts5WriteFlushBtree(p, pWriter);
4142 fts5BufferFree(&pLeaf->term);
4143 fts5BufferFree(&pLeaf->buf);
4144 fts5BufferFree(&pLeaf->pgidx);
4145 fts5BufferFree(&pWriter->btterm);
4147 for(i=0; i<pWriter->nDlidx; i++){
4148 sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
4150 sqlite3_free(pWriter->aDlidx);
4153 static void fts5WriteInit(
4154 Fts5Index *p,
4155 Fts5SegWriter *pWriter,
4156 int iSegid
4158 const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING;
4160 memset(pWriter, 0, sizeof(Fts5SegWriter));
4161 pWriter->iSegid = iSegid;
4163 fts5WriteDlidxGrow(p, pWriter, 1);
4164 pWriter->writer.pgno = 1;
4165 pWriter->bFirstTermInPage = 1;
4166 pWriter->iBtPage = 1;
4168 assert( pWriter->writer.buf.n==0 );
4169 assert( pWriter->writer.pgidx.n==0 );
4171 /* Grow the two buffers to pgsz + padding bytes in size. */
4172 sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer);
4173 sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer);
4175 if( p->pIdxWriter==0 ){
4176 Fts5Config *pConfig = p->pConfig;
4177 fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf(
4178 "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)",
4179 pConfig->zDb, pConfig->zName
4183 if( p->rc==SQLITE_OK ){
4184 /* Initialize the 4-byte leaf-page header to 0x00. */
4185 memset(pWriter->writer.buf.p, 0, 4);
4186 pWriter->writer.buf.n = 4;
4188 /* Bind the current output segment id to the index-writer. This is an
4189 ** optimization over binding the same value over and over as rows are
4190 ** inserted into %_idx by the current writer. */
4191 sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
4196 ** Iterator pIter was used to iterate through the input segments of on an
4197 ** incremental merge operation. This function is called if the incremental
4198 ** merge step has finished but the input has not been completely exhausted.
4200 static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){
4201 int i;
4202 Fts5Buffer buf;
4203 memset(&buf, 0, sizeof(Fts5Buffer));
4204 for(i=0; i<pIter->nSeg && p->rc==SQLITE_OK; i++){
4205 Fts5SegIter *pSeg = &pIter->aSeg[i];
4206 if( pSeg->pSeg==0 ){
4207 /* no-op */
4208 }else if( pSeg->pLeaf==0 ){
4209 /* All keys from this input segment have been transfered to the output.
4210 ** Set both the first and last page-numbers to 0 to indicate that the
4211 ** segment is now empty. */
4212 pSeg->pSeg->pgnoLast = 0;
4213 pSeg->pSeg->pgnoFirst = 0;
4214 }else{
4215 int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */
4216 i64 iLeafRowid;
4217 Fts5Data *pData;
4218 int iId = pSeg->pSeg->iSegid;
4219 u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00};
4221 iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno);
4222 pData = fts5LeafRead(p, iLeafRowid);
4223 if( pData ){
4224 if( iOff>pData->szLeaf ){
4225 /* This can occur if the pages that the segments occupy overlap - if
4226 ** a single page has been assigned to more than one segment. In
4227 ** this case a prior iteration of this loop may have corrupted the
4228 ** segment currently being trimmed. */
4229 p->rc = FTS5_CORRUPT;
4230 }else{
4231 fts5BufferZero(&buf);
4232 fts5BufferGrow(&p->rc, &buf, pData->nn);
4233 fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr);
4234 fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n);
4235 fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p);
4236 fts5BufferAppendBlob(&p->rc, &buf,pData->szLeaf-iOff,&pData->p[iOff]);
4237 if( p->rc==SQLITE_OK ){
4238 /* Set the szLeaf field */
4239 fts5PutU16(&buf.p[2], (u16)buf.n);
4242 /* Set up the new page-index array */
4243 fts5BufferAppendVarint(&p->rc, &buf, 4);
4244 if( pSeg->iLeafPgno==pSeg->iTermLeafPgno
4245 && pSeg->iEndofDoclist<pData->szLeaf
4246 && pSeg->iPgidxOff<=pData->nn
4248 int nDiff = pData->szLeaf - pSeg->iEndofDoclist;
4249 fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4);
4250 fts5BufferAppendBlob(&p->rc, &buf,
4251 pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]
4255 pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
4256 fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid);
4257 fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
4259 fts5DataRelease(pData);
4263 fts5BufferFree(&buf);
4266 static void fts5MergeChunkCallback(
4267 Fts5Index *p,
4268 void *pCtx,
4269 const u8 *pChunk, int nChunk
4271 Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx;
4272 fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk);
4278 static void fts5IndexMergeLevel(
4279 Fts5Index *p, /* FTS5 backend object */
4280 Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */
4281 int iLvl, /* Level to read input from */
4282 int *pnRem /* Write up to this many output leaves */
4284 Fts5Structure *pStruct = *ppStruct;
4285 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
4286 Fts5StructureLevel *pLvlOut;
4287 Fts5Iter *pIter = 0; /* Iterator to read input data */
4288 int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */
4289 int nInput; /* Number of input segments */
4290 Fts5SegWriter writer; /* Writer object */
4291 Fts5StructureSegment *pSeg; /* Output segment */
4292 Fts5Buffer term;
4293 int bOldest; /* True if the output segment is the oldest */
4294 int eDetail = p->pConfig->eDetail;
4295 const int flags = FTS5INDEX_QUERY_NOOUTPUT;
4296 int bTermWritten = 0; /* True if current term already output */
4298 assert( iLvl<pStruct->nLevel );
4299 assert( pLvl->nMerge<=pLvl->nSeg );
4301 memset(&writer, 0, sizeof(Fts5SegWriter));
4302 memset(&term, 0, sizeof(Fts5Buffer));
4303 if( pLvl->nMerge ){
4304 pLvlOut = &pStruct->aLevel[iLvl+1];
4305 assert( pLvlOut->nSeg>0 );
4306 nInput = pLvl->nMerge;
4307 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1];
4309 fts5WriteInit(p, &writer, pSeg->iSegid);
4310 writer.writer.pgno = pSeg->pgnoLast+1;
4311 writer.iBtPage = 0;
4312 }else{
4313 int iSegid = fts5AllocateSegid(p, pStruct);
4315 /* Extend the Fts5Structure object as required to ensure the output
4316 ** segment exists. */
4317 if( iLvl==pStruct->nLevel-1 ){
4318 fts5StructureAddLevel(&p->rc, ppStruct);
4319 pStruct = *ppStruct;
4321 fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0);
4322 if( p->rc ) return;
4323 pLvl = &pStruct->aLevel[iLvl];
4324 pLvlOut = &pStruct->aLevel[iLvl+1];
4326 fts5WriteInit(p, &writer, iSegid);
4328 /* Add the new segment to the output level */
4329 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg];
4330 pLvlOut->nSeg++;
4331 pSeg->pgnoFirst = 1;
4332 pSeg->iSegid = iSegid;
4333 pStruct->nSegment++;
4335 /* Read input from all segments in the input level */
4336 nInput = pLvl->nSeg;
4338 bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2);
4340 assert( iLvl>=0 );
4341 for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter);
4342 fts5MultiIterEof(p, pIter)==0;
4343 fts5MultiIterNext(p, pIter, 0, 0)
4345 Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
4346 int nPos; /* position-list size field value */
4347 int nTerm;
4348 const u8 *pTerm;
4350 pTerm = fts5MultiIterTerm(pIter, &nTerm);
4351 if( nTerm!=term.n || fts5Memcmp(pTerm, term.p, nTerm) ){
4352 if( pnRem && writer.nLeafWritten>nRem ){
4353 break;
4355 fts5BufferSet(&p->rc, &term, nTerm, pTerm);
4356 bTermWritten =0;
4359 /* Check for key annihilation. */
4360 if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue;
4362 if( p->rc==SQLITE_OK && bTermWritten==0 ){
4363 /* This is a new term. Append a term to the output segment. */
4364 fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
4365 bTermWritten = 1;
4368 /* Append the rowid to the output */
4369 /* WRITEPOSLISTSIZE */
4370 fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));
4372 if( eDetail==FTS5_DETAIL_NONE ){
4373 if( pSegIter->bDel ){
4374 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
4375 if( pSegIter->nPos>0 ){
4376 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
4379 }else{
4380 /* Append the position-list data to the output */
4381 nPos = pSegIter->nPos*2 + pSegIter->bDel;
4382 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos);
4383 fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
4387 /* Flush the last leaf page to disk. Set the output segment b-tree height
4388 ** and last leaf page number at the same time. */
4389 fts5WriteFinish(p, &writer, &pSeg->pgnoLast);
4391 assert( pIter!=0 || p->rc!=SQLITE_OK );
4392 if( fts5MultiIterEof(p, pIter) ){
4393 int i;
4395 /* Remove the redundant segments from the %_data table */
4396 for(i=0; i<nInput; i++){
4397 fts5DataRemoveSegment(p, pLvl->aSeg[i].iSegid);
4400 /* Remove the redundant segments from the input level */
4401 if( pLvl->nSeg!=nInput ){
4402 int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment);
4403 memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove);
4405 pStruct->nSegment -= nInput;
4406 pLvl->nSeg -= nInput;
4407 pLvl->nMerge = 0;
4408 if( pSeg->pgnoLast==0 ){
4409 pLvlOut->nSeg--;
4410 pStruct->nSegment--;
4412 }else{
4413 assert( pSeg->pgnoLast>0 );
4414 fts5TrimSegments(p, pIter);
4415 pLvl->nMerge = nInput;
4418 fts5MultiIterFree(pIter);
4419 fts5BufferFree(&term);
4420 if( pnRem ) *pnRem -= writer.nLeafWritten;
4424 ** Do up to nPg pages of automerge work on the index.
4426 ** Return true if any changes were actually made, or false otherwise.
4428 static int fts5IndexMerge(
4429 Fts5Index *p, /* FTS5 backend object */
4430 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
4431 int nPg, /* Pages of work to do */
4432 int nMin /* Minimum number of segments to merge */
4434 int nRem = nPg;
4435 int bRet = 0;
4436 Fts5Structure *pStruct = *ppStruct;
4437 while( nRem>0 && p->rc==SQLITE_OK ){
4438 int iLvl; /* To iterate through levels */
4439 int iBestLvl = 0; /* Level offering the most input segments */
4440 int nBest = 0; /* Number of input segments on best level */
4442 /* Set iBestLvl to the level to read input segments from. */
4443 assert( pStruct->nLevel>0 );
4444 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
4445 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
4446 if( pLvl->nMerge ){
4447 if( pLvl->nMerge>nBest ){
4448 iBestLvl = iLvl;
4449 nBest = pLvl->nMerge;
4451 break;
4453 if( pLvl->nSeg>nBest ){
4454 nBest = pLvl->nSeg;
4455 iBestLvl = iLvl;
4459 /* If nBest is still 0, then the index must be empty. */
4460 #ifdef SQLITE_DEBUG
4461 for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){
4462 assert( pStruct->aLevel[iLvl].nSeg==0 );
4464 #endif
4466 if( nBest<nMin && pStruct->aLevel[iBestLvl].nMerge==0 ){
4467 break;
4469 bRet = 1;
4470 fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem);
4471 if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){
4472 fts5StructurePromote(p, iBestLvl+1, pStruct);
4475 *ppStruct = pStruct;
4476 return bRet;
4480 ** A total of nLeaf leaf pages of data has just been flushed to a level-0
4481 ** segment. This function updates the write-counter accordingly and, if
4482 ** necessary, performs incremental merge work.
4484 ** If an error occurs, set the Fts5Index.rc error code. If an error has
4485 ** already occurred, this function is a no-op.
4487 static void fts5IndexAutomerge(
4488 Fts5Index *p, /* FTS5 backend object */
4489 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
4490 int nLeaf /* Number of output leaves just written */
4492 if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 && ALWAYS((*ppStruct)!=0) ){
4493 Fts5Structure *pStruct = *ppStruct;
4494 u64 nWrite; /* Initial value of write-counter */
4495 int nWork; /* Number of work-quanta to perform */
4496 int nRem; /* Number of leaf pages left to write */
4498 /* Update the write-counter. While doing so, set nWork. */
4499 nWrite = pStruct->nWriteCounter;
4500 nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit));
4501 pStruct->nWriteCounter += nLeaf;
4502 nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel);
4504 fts5IndexMerge(p, ppStruct, nRem, p->pConfig->nAutomerge);
4508 static void fts5IndexCrisismerge(
4509 Fts5Index *p, /* FTS5 backend object */
4510 Fts5Structure **ppStruct /* IN/OUT: Current structure of index */
4512 const int nCrisis = p->pConfig->nCrisisMerge;
4513 Fts5Structure *pStruct = *ppStruct;
4514 if( pStruct && pStruct->nLevel>0 ){
4515 int iLvl = 0;
4516 while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){
4517 fts5IndexMergeLevel(p, &pStruct, iLvl, 0);
4518 assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) );
4519 fts5StructurePromote(p, iLvl+1, pStruct);
4520 iLvl++;
4522 *ppStruct = pStruct;
4526 static int fts5IndexReturn(Fts5Index *p){
4527 int rc = p->rc;
4528 p->rc = SQLITE_OK;
4529 return rc;
4532 typedef struct Fts5FlushCtx Fts5FlushCtx;
4533 struct Fts5FlushCtx {
4534 Fts5Index *pIdx;
4535 Fts5SegWriter writer;
4539 ** Buffer aBuf[] contains a list of varints, all small enough to fit
4540 ** in a 32-bit integer. Return the size of the largest prefix of this
4541 ** list nMax bytes or less in size.
4543 static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
4544 int ret;
4545 u32 dummy;
4546 ret = fts5GetVarint32(aBuf, dummy);
4547 if( ret<nMax ){
4548 while( 1 ){
4549 int i = fts5GetVarint32(&aBuf[ret], dummy);
4550 if( (ret + i) > nMax ) break;
4551 ret += i;
4554 return ret;
4558 ** Execute the SQL statement:
4560 ** DELETE FROM %_idx WHERE (segid, (pgno/2)) = ($iSegid, $iPgno);
4562 ** This is used when a secure-delete operation removes the last term
4563 ** from a segment leaf page. In that case the %_idx entry is removed
4564 ** too. This is done to ensure that if all instances of a token are
4565 ** removed from an fts5 database in secure-delete mode, no trace of
4566 ** the token itself remains in the database.
4568 static void fts5SecureDeleteIdxEntry(
4569 Fts5Index *p, /* FTS5 backend object */
4570 int iSegid, /* Id of segment to delete entry for */
4571 int iPgno /* Page number within segment */
4573 if( iPgno!=1 ){
4574 assert( p->pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE );
4575 if( p->pDeleteFromIdx==0 ){
4576 fts5IndexPrepareStmt(p, &p->pDeleteFromIdx, sqlite3_mprintf(
4577 "DELETE FROM '%q'.'%q_idx' WHERE (segid, (pgno/2)) = (?1, ?2)",
4578 p->pConfig->zDb, p->pConfig->zName
4581 if( p->rc==SQLITE_OK ){
4582 sqlite3_bind_int(p->pDeleteFromIdx, 1, iSegid);
4583 sqlite3_bind_int(p->pDeleteFromIdx, 2, iPgno);
4584 sqlite3_step(p->pDeleteFromIdx);
4585 p->rc = sqlite3_reset(p->pDeleteFromIdx);
4591 ** This is called when a secure-delete operation removes a position-list
4592 ** that overflows onto segment page iPgno of segment pSeg. This function
4593 ** rewrites node iPgno, and possibly one or more of its right-hand peers,
4594 ** to remove this portion of the position list.
4596 ** Output variable (*pbLastInDoclist) is set to true if the position-list
4597 ** removed is followed by a new term or the end-of-segment, or false if
4598 ** it is followed by another rowid/position list.
4600 static void fts5SecureDeleteOverflow(
4601 Fts5Index *p,
4602 Fts5StructureSegment *pSeg,
4603 int iPgno,
4604 int *pbLastInDoclist
4606 const int bDetailNone = (p->pConfig->eDetail==FTS5_DETAIL_NONE);
4607 int pgno;
4608 Fts5Data *pLeaf = 0;
4609 assert( iPgno!=1 );
4611 *pbLastInDoclist = 1;
4612 for(pgno=iPgno; p->rc==SQLITE_OK && pgno<=pSeg->pgnoLast; pgno++){
4613 i64 iRowid = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
4614 int iNext = 0;
4615 u8 *aPg = 0;
4617 pLeaf = fts5DataRead(p, iRowid);
4618 if( pLeaf==0 ) break;
4619 aPg = pLeaf->p;
4621 iNext = fts5GetU16(&aPg[0]);
4622 if( iNext!=0 ){
4623 *pbLastInDoclist = 0;
4625 if( iNext==0 && pLeaf->szLeaf!=pLeaf->nn ){
4626 fts5GetVarint32(&aPg[pLeaf->szLeaf], iNext);
4629 if( iNext==0 ){
4630 /* The page contains no terms or rowids. Replace it with an empty
4631 ** page and move on to the right-hand peer. */
4632 const u8 aEmpty[] = {0x00, 0x00, 0x00, 0x04};
4633 assert_nc( bDetailNone==0 || pLeaf->nn==4 );
4634 if( bDetailNone==0 ) fts5DataWrite(p, iRowid, aEmpty, sizeof(aEmpty));
4635 fts5DataRelease(pLeaf);
4636 pLeaf = 0;
4637 }else if( bDetailNone ){
4638 break;
4639 }else if( iNext>=pLeaf->szLeaf || iNext<4 ){
4640 p->rc = FTS5_CORRUPT;
4641 break;
4642 }else{
4643 int nShift = iNext - 4;
4644 int nPg;
4646 int nIdx = 0;
4647 u8 *aIdx = 0;
4649 /* Unless the current page footer is 0 bytes in size (in which case
4650 ** the new page footer will be as well), allocate and populate a
4651 ** buffer containing the new page footer. Set stack variables aIdx
4652 ** and nIdx accordingly. */
4653 if( pLeaf->nn>pLeaf->szLeaf ){
4654 int iFirst = 0;
4655 int i1 = pLeaf->szLeaf;
4656 int i2 = 0;
4658 aIdx = sqlite3Fts5MallocZero(&p->rc, (pLeaf->nn-pLeaf->szLeaf)+2);
4659 if( aIdx==0 ) break;
4660 i1 += fts5GetVarint32(&aPg[i1], iFirst);
4661 i2 = sqlite3Fts5PutVarint(aIdx, iFirst-nShift);
4662 if( i1<pLeaf->nn ){
4663 memcpy(&aIdx[i2], &aPg[i1], pLeaf->nn-i1);
4664 i2 += (pLeaf->nn-i1);
4666 nIdx = i2;
4669 /* Modify the contents of buffer aPg[]. Set nPg to the new size
4670 ** in bytes. The new page is always smaller than the old. */
4671 nPg = pLeaf->szLeaf - nShift;
4672 memmove(&aPg[4], &aPg[4+nShift], nPg-4);
4673 fts5PutU16(&aPg[2], nPg);
4674 if( fts5GetU16(&aPg[0]) ) fts5PutU16(&aPg[0], 4);
4675 if( nIdx>0 ){
4676 memcpy(&aPg[nPg], aIdx, nIdx);
4677 nPg += nIdx;
4679 sqlite3_free(aIdx);
4681 /* Write the new page to disk and exit the loop */
4682 assert( nPg>4 || fts5GetU16(aPg)==0 );
4683 fts5DataWrite(p, iRowid, aPg, nPg);
4684 break;
4687 fts5DataRelease(pLeaf);
4691 ** Completely remove the entry that pSeg currently points to from
4692 ** the database.
4694 static void fts5DoSecureDelete(
4695 Fts5Index *p,
4696 Fts5SegIter *pSeg
4698 const int bDetailNone = (p->pConfig->eDetail==FTS5_DETAIL_NONE);
4699 int iSegid = pSeg->pSeg->iSegid;
4700 u8 *aPg = pSeg->pLeaf->p;
4701 int nPg = pSeg->pLeaf->nn;
4702 int iPgIdx = pSeg->pLeaf->szLeaf;
4704 u64 iDelta = 0;
4705 u64 iNextDelta = 0;
4706 int iNextOff = 0;
4707 int iOff = 0;
4708 int nIdx = 0;
4709 u8 *aIdx = 0;
4710 int bLastInDoclist = 0;
4711 int iIdx = 0;
4712 int iStart = 0;
4713 int iKeyOff = 0;
4714 int iPrevKeyOff = 0;
4715 int iDelKeyOff = 0; /* Offset of deleted key, if any */
4717 nIdx = nPg-iPgIdx;
4718 aIdx = sqlite3Fts5MallocZero(&p->rc, nIdx+16);
4719 if( p->rc ) return;
4720 memcpy(aIdx, &aPg[iPgIdx], nIdx);
4722 /* At this point segment iterator pSeg points to the entry
4723 ** this function should remove from the b-tree segment.
4725 ** In detail=full or detail=column mode, pSeg->iLeafOffset is the
4726 ** offset of the first byte in the position-list for the entry to
4727 ** remove. Immediately before this comes two varints that will also
4728 ** need to be removed:
4730 ** + the rowid or delta rowid value for the entry, and
4731 ** + the size of the position list in bytes.
4733 ** Or, in detail=none mode, there is a single varint prior to
4734 ** pSeg->iLeafOffset - the rowid or delta rowid value.
4736 ** This block sets the following variables:
4738 ** iStart:
4739 ** iDelta:
4742 int iSOP;
4743 if( pSeg->iLeafPgno==pSeg->iTermLeafPgno ){
4744 iStart = pSeg->iTermLeafOffset;
4745 }else{
4746 iStart = fts5GetU16(&aPg[0]);
4749 iSOP = iStart + fts5GetVarint(&aPg[iStart], &iDelta);
4750 assert_nc( iSOP<=pSeg->iLeafOffset );
4752 if( bDetailNone ){
4753 while( iSOP<pSeg->iLeafOffset ){
4754 if( aPg[iSOP]==0x00 ) iSOP++;
4755 if( aPg[iSOP]==0x00 ) iSOP++;
4756 iStart = iSOP;
4757 iSOP = iStart + fts5GetVarint(&aPg[iStart], &iDelta);
4760 iNextOff = iSOP;
4761 if( iNextOff<pSeg->iEndofDoclist && aPg[iNextOff]==0x00 ) iNextOff++;
4762 if( iNextOff<pSeg->iEndofDoclist && aPg[iNextOff]==0x00 ) iNextOff++;
4764 }else{
4765 int nPos = 0;
4766 iSOP += fts5GetVarint32(&aPg[iSOP], nPos);
4767 while( iSOP<pSeg->iLeafOffset ){
4768 iStart = iSOP + (nPos/2);
4769 iSOP = iStart + fts5GetVarint(&aPg[iStart], &iDelta);
4770 iSOP += fts5GetVarint32(&aPg[iSOP], nPos);
4772 assert_nc( iSOP==pSeg->iLeafOffset );
4773 iNextOff = pSeg->iLeafOffset + pSeg->nPos;
4777 iOff = iStart;
4778 if( iNextOff>=iPgIdx ){
4779 int pgno = pSeg->iLeafPgno+1;
4780 fts5SecureDeleteOverflow(p, pSeg->pSeg, pgno, &bLastInDoclist);
4781 iNextOff = iPgIdx;
4782 }else{
4783 /* Set bLastInDoclist to true if the entry being removed is the last
4784 ** in its doclist. */
4785 for(iIdx=0, iKeyOff=0; iIdx<nIdx; /* no-op */){
4786 u32 iVal = 0;
4787 iIdx += fts5GetVarint32(&aIdx[iIdx], iVal);
4788 iKeyOff += iVal;
4789 if( iKeyOff==iNextOff ){
4790 bLastInDoclist = 1;
4795 if( fts5GetU16(&aPg[0])==iStart && (bLastInDoclist||iNextOff==iPgIdx) ){
4796 fts5PutU16(&aPg[0], 0);
4799 if( bLastInDoclist==0 ){
4800 if( iNextOff!=iPgIdx ){
4801 iNextOff += fts5GetVarint(&aPg[iNextOff], &iNextDelta);
4802 iOff += sqlite3Fts5PutVarint(&aPg[iOff], iDelta + iNextDelta);
4804 }else if(
4805 iStart==pSeg->iTermLeafOffset && pSeg->iLeafPgno==pSeg->iTermLeafPgno
4807 /* The entry being removed was the only position list in its
4808 ** doclist. Therefore the term needs to be removed as well. */
4809 int iKey = 0;
4810 for(iIdx=0, iKeyOff=0; iIdx<nIdx; iKey++){
4811 u32 iVal = 0;
4812 iIdx += fts5GetVarint32(&aIdx[iIdx], iVal);
4813 if( (iKeyOff+iVal)>(u32)iStart ) break;
4814 iKeyOff += iVal;
4817 iDelKeyOff = iOff = iKeyOff;
4818 if( iNextOff!=iPgIdx ){
4819 int nPrefix = 0;
4820 int nSuffix = 0;
4821 int nPrefix2 = 0;
4822 int nSuffix2 = 0;
4824 iDelKeyOff = iNextOff;
4825 iNextOff += fts5GetVarint32(&aPg[iNextOff], nPrefix2);
4826 iNextOff += fts5GetVarint32(&aPg[iNextOff], nSuffix2);
4828 if( iKey!=1 ){
4829 iKeyOff += fts5GetVarint32(&aPg[iKeyOff], nPrefix);
4831 iKeyOff += fts5GetVarint32(&aPg[iKeyOff], nSuffix);
4833 nPrefix = MIN(nPrefix, nPrefix2);
4834 nSuffix = (nPrefix2 + nSuffix2) - nPrefix;
4836 if( (iKeyOff+nSuffix)>iPgIdx || (iNextOff+nSuffix2)>iPgIdx ){
4837 p->rc = FTS5_CORRUPT;
4838 }else{
4839 if( iKey!=1 ){
4840 iOff += sqlite3Fts5PutVarint(&aPg[iOff], nPrefix);
4842 iOff += sqlite3Fts5PutVarint(&aPg[iOff], nSuffix);
4843 if( nPrefix2>nPrefix ){
4844 memcpy(&aPg[iOff], &pSeg->term.p[nPrefix], nPrefix2-nPrefix);
4845 iOff += (nPrefix2-nPrefix);
4847 memmove(&aPg[iOff], &aPg[iNextOff], nSuffix2);
4848 iOff += nSuffix2;
4849 iNextOff += nSuffix2;
4852 }else if( iStart==4 ){
4853 int iPgno;
4855 assert_nc( pSeg->iLeafPgno>pSeg->iTermLeafPgno );
4856 /* The entry being removed may be the only position list in
4857 ** its doclist. */
4858 for(iPgno=pSeg->iLeafPgno-1; iPgno>pSeg->iTermLeafPgno; iPgno-- ){
4859 Fts5Data *pPg = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, iPgno));
4860 int bEmpty = (pPg && pPg->nn==4);
4861 fts5DataRelease(pPg);
4862 if( bEmpty==0 ) break;
4865 if( iPgno==pSeg->iTermLeafPgno ){
4866 i64 iId = FTS5_SEGMENT_ROWID(iSegid, pSeg->iTermLeafPgno);
4867 Fts5Data *pTerm = fts5DataRead(p, iId);
4868 if( pTerm && pTerm->szLeaf==pSeg->iTermLeafOffset ){
4869 u8 *aTermIdx = &pTerm->p[pTerm->szLeaf];
4870 int nTermIdx = pTerm->nn - pTerm->szLeaf;
4871 int iTermIdx = 0;
4872 int iTermOff = 0;
4874 while( 1 ){
4875 u32 iVal = 0;
4876 int nByte = fts5GetVarint32(&aTermIdx[iTermIdx], iVal);
4877 iTermOff += iVal;
4878 if( (iTermIdx+nByte)>=nTermIdx ) break;
4879 iTermIdx += nByte;
4881 nTermIdx = iTermIdx;
4883 memmove(&pTerm->p[iTermOff], &pTerm->p[pTerm->szLeaf], nTermIdx);
4884 fts5PutU16(&pTerm->p[2], iTermOff);
4886 fts5DataWrite(p, iId, pTerm->p, iTermOff+nTermIdx);
4887 if( nTermIdx==0 ){
4888 fts5SecureDeleteIdxEntry(p, iSegid, pSeg->iTermLeafPgno);
4891 fts5DataRelease(pTerm);
4895 if( p->rc==SQLITE_OK ){
4896 const int nMove = nPg - iNextOff;
4897 int nShift = 0;
4899 memmove(&aPg[iOff], &aPg[iNextOff], nMove);
4900 iPgIdx -= (iNextOff - iOff);
4901 nPg = iPgIdx;
4902 fts5PutU16(&aPg[2], iPgIdx);
4904 nShift = iNextOff - iOff;
4905 for(iIdx=0, iKeyOff=0, iPrevKeyOff=0; iIdx<nIdx; /* no-op */){
4906 u32 iVal = 0;
4907 iIdx += fts5GetVarint32(&aIdx[iIdx], iVal);
4908 iKeyOff += iVal;
4909 if( iKeyOff!=iDelKeyOff ){
4910 if( iKeyOff>iOff ){
4911 iKeyOff -= nShift;
4912 nShift = 0;
4914 nPg += sqlite3Fts5PutVarint(&aPg[nPg], iKeyOff - iPrevKeyOff);
4915 iPrevKeyOff = iKeyOff;
4919 if( iPgIdx==nPg && nIdx>0 && pSeg->iLeafPgno!=1 ){
4920 fts5SecureDeleteIdxEntry(p, iSegid, pSeg->iLeafPgno);
4923 assert_nc( nPg>4 || fts5GetU16(aPg)==0 );
4924 fts5DataWrite(p, FTS5_SEGMENT_ROWID(iSegid,pSeg->iLeafPgno), aPg,nPg);
4926 sqlite3_free(aIdx);
4930 ** This is called as part of flushing a delete to disk in 'secure-delete'
4931 ** mode. It edits the segments within the database described by argument
4932 ** pStruct to remove the entries for term zTerm, rowid iRowid.
4934 static void fts5FlushSecureDelete(
4935 Fts5Index *p,
4936 Fts5Structure *pStruct,
4937 const char *zTerm,
4938 i64 iRowid
4940 const int f = FTS5INDEX_QUERY_SKIPHASH;
4941 int nTerm = (int)strlen(zTerm);
4942 Fts5Iter *pIter = 0; /* Used to find term instance */
4944 fts5MultiIterNew(p, pStruct, f, 0, (const u8*)zTerm, nTerm, -1, 0, &pIter);
4945 if( fts5MultiIterEof(p, pIter)==0 ){
4946 i64 iThis = fts5MultiIterRowid(pIter);
4947 if( iThis<iRowid ){
4948 fts5MultiIterNextFrom(p, pIter, iRowid);
4951 if( p->rc==SQLITE_OK
4952 && fts5MultiIterEof(p, pIter)==0
4953 && iRowid==fts5MultiIterRowid(pIter)
4955 Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
4956 fts5DoSecureDelete(p, pSeg);
4960 fts5MultiIterFree(pIter);
4965 ** Flush the contents of in-memory hash table iHash to a new level-0
4966 ** segment on disk. Also update the corresponding structure record.
4968 ** If an error occurs, set the Fts5Index.rc error code. If an error has
4969 ** already occurred, this function is a no-op.
4971 static void fts5FlushOneHash(Fts5Index *p){
4972 Fts5Hash *pHash = p->pHash;
4973 Fts5Structure *pStruct;
4974 int iSegid;
4975 int pgnoLast = 0; /* Last leaf page number in segment */
4977 /* Obtain a reference to the index structure and allocate a new segment-id
4978 ** for the new level-0 segment. */
4979 pStruct = fts5StructureRead(p);
4980 iSegid = fts5AllocateSegid(p, pStruct);
4981 fts5StructureInvalidate(p);
4983 if( iSegid ){
4984 const int pgsz = p->pConfig->pgsz;
4985 int eDetail = p->pConfig->eDetail;
4986 int bSecureDelete = p->pConfig->bSecureDelete;
4987 Fts5StructureSegment *pSeg; /* New segment within pStruct */
4988 Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */
4989 Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */
4991 Fts5SegWriter writer;
4992 fts5WriteInit(p, &writer, iSegid);
4994 pBuf = &writer.writer.buf;
4995 pPgidx = &writer.writer.pgidx;
4997 /* fts5WriteInit() should have initialized the buffers to (most likely)
4998 ** the maximum space required. */
4999 assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
5000 assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );
5002 /* Begin scanning through hash table entries. This loop runs once for each
5003 ** term/doclist currently stored within the hash table. */
5004 if( p->rc==SQLITE_OK ){
5005 p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
5007 while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
5008 const char *zTerm; /* Buffer containing term */
5009 int nTerm; /* Size of zTerm in bytes */
5010 const u8 *pDoclist; /* Pointer to doclist for this term */
5011 int nDoclist; /* Size of doclist in bytes */
5013 /* Get the term and doclist for this entry. */
5014 sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
5015 nTerm = (int)strlen(zTerm);
5016 if( bSecureDelete==0 ){
5017 fts5WriteAppendTerm(p, &writer, nTerm, (const u8*)zTerm);
5018 if( p->rc!=SQLITE_OK ) break;
5019 assert( writer.bFirstRowidInPage==0 );
5022 if( !bSecureDelete && pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){
5023 /* The entire doclist will fit on the current leaf. */
5024 fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
5025 }else{
5026 int bTermWritten = !bSecureDelete;
5027 i64 iRowid = 0;
5028 i64 iPrev = 0;
5029 int iOff = 0;
5031 /* The entire doclist will not fit on this leaf. The following
5032 ** loop iterates through the poslists that make up the current
5033 ** doclist. */
5034 while( p->rc==SQLITE_OK && iOff<nDoclist ){
5035 u64 iDelta = 0;
5036 iOff += fts5GetVarint(&pDoclist[iOff], &iDelta);
5037 iRowid += iDelta;
5039 /* If in secure delete mode, and if this entry in the poslist is
5040 ** in fact a delete, then edit the existing segments directly
5041 ** using fts5FlushSecureDelete(). */
5042 if( bSecureDelete ){
5043 if( eDetail==FTS5_DETAIL_NONE ){
5044 if( iOff<nDoclist && pDoclist[iOff]==0x00 ){
5045 fts5FlushSecureDelete(p, pStruct, zTerm, iRowid);
5046 iOff++;
5047 if( iOff<nDoclist && pDoclist[iOff]==0x00 ){
5048 iOff++;
5049 nDoclist = 0;
5050 }else{
5051 continue;
5054 }else if( (pDoclist[iOff] & 0x01) ){
5055 fts5FlushSecureDelete(p, pStruct, zTerm, iRowid);
5056 if( p->rc!=SQLITE_OK || pDoclist[iOff]==0x01 ){
5057 iOff++;
5058 continue;
5063 if( p->rc==SQLITE_OK && bTermWritten==0 ){
5064 fts5WriteAppendTerm(p, &writer, nTerm, (const u8*)zTerm);
5065 bTermWritten = 1;
5066 assert( p->rc!=SQLITE_OK || writer.bFirstRowidInPage==0 );
5069 if( writer.bFirstRowidInPage ){
5070 fts5PutU16(&pBuf->p[0], (u16)pBuf->n); /* first rowid on page */
5071 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid);
5072 writer.bFirstRowidInPage = 0;
5073 fts5WriteDlidxAppend(p, &writer, iRowid);
5074 }else{
5075 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid-iPrev);
5077 if( p->rc!=SQLITE_OK ) break;
5078 assert( pBuf->n<=pBuf->nSpace );
5079 iPrev = iRowid;
5081 if( eDetail==FTS5_DETAIL_NONE ){
5082 if( iOff<nDoclist && pDoclist[iOff]==0 ){
5083 pBuf->p[pBuf->n++] = 0;
5084 iOff++;
5085 if( iOff<nDoclist && pDoclist[iOff]==0 ){
5086 pBuf->p[pBuf->n++] = 0;
5087 iOff++;
5090 if( (pBuf->n + pPgidx->n)>=pgsz ){
5091 fts5WriteFlushLeaf(p, &writer);
5093 }else{
5094 int bDummy;
5095 int nPos;
5096 int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy);
5097 nCopy += nPos;
5098 if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
5099 /* The entire poslist will fit on the current leaf. So copy
5100 ** it in one go. */
5101 fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
5102 }else{
5103 /* The entire poslist will not fit on this leaf. So it needs
5104 ** to be broken into sections. The only qualification being
5105 ** that each varint must be stored contiguously. */
5106 const u8 *pPoslist = &pDoclist[iOff];
5107 int iPos = 0;
5108 while( p->rc==SQLITE_OK ){
5109 int nSpace = pgsz - pBuf->n - pPgidx->n;
5110 int n = 0;
5111 if( (nCopy - iPos)<=nSpace ){
5112 n = nCopy - iPos;
5113 }else{
5114 n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
5116 assert( n>0 );
5117 fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
5118 iPos += n;
5119 if( (pBuf->n + pPgidx->n)>=pgsz ){
5120 fts5WriteFlushLeaf(p, &writer);
5122 if( iPos>=nCopy ) break;
5125 iOff += nCopy;
5130 /* TODO2: Doclist terminator written here. */
5131 /* pBuf->p[pBuf->n++] = '\0'; */
5132 assert( pBuf->n<=pBuf->nSpace );
5133 if( p->rc==SQLITE_OK ) sqlite3Fts5HashScanNext(pHash);
5135 sqlite3Fts5HashClear(pHash);
5136 fts5WriteFinish(p, &writer, &pgnoLast);
5138 assert( p->rc!=SQLITE_OK || bSecureDelete || pgnoLast>0 );
5139 if( pgnoLast>0 ){
5140 /* Update the Fts5Structure. It is written back to the database by the
5141 ** fts5StructureRelease() call below. */
5142 if( pStruct->nLevel==0 ){
5143 fts5StructureAddLevel(&p->rc, &pStruct);
5145 fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
5146 if( p->rc==SQLITE_OK ){
5147 pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
5148 pSeg->iSegid = iSegid;
5149 pSeg->pgnoFirst = 1;
5150 pSeg->pgnoLast = pgnoLast;
5151 pStruct->nSegment++;
5153 fts5StructurePromote(p, 0, pStruct);
5157 fts5IndexAutomerge(p, &pStruct, pgnoLast);
5158 fts5IndexCrisismerge(p, &pStruct);
5159 fts5StructureWrite(p, pStruct);
5160 fts5StructureRelease(pStruct);
5164 ** Flush any data stored in the in-memory hash tables to the database.
5166 static void fts5IndexFlush(Fts5Index *p){
5167 /* Unless it is empty, flush the hash table to disk */
5168 if( p->nPendingData ){
5169 assert( p->pHash );
5170 p->nPendingData = 0;
5171 fts5FlushOneHash(p);
5175 static Fts5Structure *fts5IndexOptimizeStruct(
5176 Fts5Index *p,
5177 Fts5Structure *pStruct
5179 Fts5Structure *pNew = 0;
5180 sqlite3_int64 nByte = sizeof(Fts5Structure);
5181 int nSeg = pStruct->nSegment;
5182 int i;
5184 /* Figure out if this structure requires optimization. A structure does
5185 ** not require optimization if either:
5187 ** + it consists of fewer than two segments, or
5188 ** + all segments are on the same level, or
5189 ** + all segments except one are currently inputs to a merge operation.
5191 ** In the first case, return NULL. In the second, increment the ref-count
5192 ** on *pStruct and return a copy of the pointer to it.
5194 if( nSeg<2 ) return 0;
5195 for(i=0; i<pStruct->nLevel; i++){
5196 int nThis = pStruct->aLevel[i].nSeg;
5197 if( nThis==nSeg || (nThis==nSeg-1 && pStruct->aLevel[i].nMerge==nThis) ){
5198 fts5StructureRef(pStruct);
5199 return pStruct;
5201 assert( pStruct->aLevel[i].nMerge<=nThis );
5204 nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel);
5205 pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte);
5207 if( pNew ){
5208 Fts5StructureLevel *pLvl;
5209 nByte = nSeg * sizeof(Fts5StructureSegment);
5210 pNew->nLevel = MIN(pStruct->nLevel+1, FTS5_MAX_LEVEL);
5211 pNew->nRef = 1;
5212 pNew->nWriteCounter = pStruct->nWriteCounter;
5213 pLvl = &pNew->aLevel[pNew->nLevel-1];
5214 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte);
5215 if( pLvl->aSeg ){
5216 int iLvl, iSeg;
5217 int iSegOut = 0;
5218 /* Iterate through all segments, from oldest to newest. Add them to
5219 ** the new Fts5Level object so that pLvl->aSeg[0] is the oldest
5220 ** segment in the data structure. */
5221 for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){
5222 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
5223 pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg];
5224 iSegOut++;
5227 pNew->nSegment = pLvl->nSeg = nSeg;
5228 }else{
5229 sqlite3_free(pNew);
5230 pNew = 0;
5234 return pNew;
5237 int sqlite3Fts5IndexOptimize(Fts5Index *p){
5238 Fts5Structure *pStruct;
5239 Fts5Structure *pNew = 0;
5241 assert( p->rc==SQLITE_OK );
5242 fts5IndexFlush(p);
5243 pStruct = fts5StructureRead(p);
5244 fts5StructureInvalidate(p);
5246 if( pStruct ){
5247 pNew = fts5IndexOptimizeStruct(p, pStruct);
5249 fts5StructureRelease(pStruct);
5251 assert( pNew==0 || pNew->nSegment>0 );
5252 if( pNew ){
5253 int iLvl;
5254 for(iLvl=0; pNew->aLevel[iLvl].nSeg==0; iLvl++){}
5255 while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){
5256 int nRem = FTS5_OPT_WORK_UNIT;
5257 fts5IndexMergeLevel(p, &pNew, iLvl, &nRem);
5260 fts5StructureWrite(p, pNew);
5261 fts5StructureRelease(pNew);
5264 return fts5IndexReturn(p);
5268 ** This is called to implement the special "VALUES('merge', $nMerge)"
5269 ** INSERT command.
5271 int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){
5272 Fts5Structure *pStruct = fts5StructureRead(p);
5273 if( pStruct ){
5274 int nMin = p->pConfig->nUsermerge;
5275 fts5StructureInvalidate(p);
5276 if( nMerge<0 ){
5277 Fts5Structure *pNew = fts5IndexOptimizeStruct(p, pStruct);
5278 fts5StructureRelease(pStruct);
5279 pStruct = pNew;
5280 nMin = 2;
5281 nMerge = nMerge*-1;
5283 if( pStruct && pStruct->nLevel ){
5284 if( fts5IndexMerge(p, &pStruct, nMerge, nMin) ){
5285 fts5StructureWrite(p, pStruct);
5288 fts5StructureRelease(pStruct);
5290 return fts5IndexReturn(p);
5293 static void fts5AppendRowid(
5294 Fts5Index *p,
5295 u64 iDelta,
5296 Fts5Iter *pUnused,
5297 Fts5Buffer *pBuf
5299 UNUSED_PARAM(pUnused);
5300 fts5BufferAppendVarint(&p->rc, pBuf, iDelta);
5303 static void fts5AppendPoslist(
5304 Fts5Index *p,
5305 u64 iDelta,
5306 Fts5Iter *pMulti,
5307 Fts5Buffer *pBuf
5309 int nData = pMulti->base.nData;
5310 int nByte = nData + 9 + 9 + FTS5_DATA_ZERO_PADDING;
5311 assert( nData>0 );
5312 if( p->rc==SQLITE_OK && 0==fts5BufferGrow(&p->rc, pBuf, nByte) ){
5313 fts5BufferSafeAppendVarint(pBuf, iDelta);
5314 fts5BufferSafeAppendVarint(pBuf, nData*2);
5315 fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData);
5316 memset(&pBuf->p[pBuf->n], 0, FTS5_DATA_ZERO_PADDING);
5321 static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
5322 u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist;
5324 assert( pIter->aPoslist || (p==0 && pIter->aPoslist==0) );
5325 if( p>=pIter->aEof ){
5326 pIter->aPoslist = 0;
5327 }else{
5328 i64 iDelta;
5330 p += fts5GetVarint(p, (u64*)&iDelta);
5331 pIter->iRowid += iDelta;
5333 /* Read position list size */
5334 if( p[0] & 0x80 ){
5335 int nPos;
5336 pIter->nSize = fts5GetVarint32(p, nPos);
5337 pIter->nPoslist = (nPos>>1);
5338 }else{
5339 pIter->nPoslist = ((int)(p[0])) >> 1;
5340 pIter->nSize = 1;
5343 pIter->aPoslist = p;
5344 if( &pIter->aPoslist[pIter->nPoslist]>pIter->aEof ){
5345 pIter->aPoslist = 0;
5350 static void fts5DoclistIterInit(
5351 Fts5Buffer *pBuf,
5352 Fts5DoclistIter *pIter
5354 memset(pIter, 0, sizeof(*pIter));
5355 if( pBuf->n>0 ){
5356 pIter->aPoslist = pBuf->p;
5357 pIter->aEof = &pBuf->p[pBuf->n];
5358 fts5DoclistIterNext(pIter);
5362 #if 0
5364 ** Append a doclist to buffer pBuf.
5366 ** This function assumes that space within the buffer has already been
5367 ** allocated.
5369 static void fts5MergeAppendDocid(
5370 Fts5Buffer *pBuf, /* Buffer to write to */
5371 i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */
5372 i64 iRowid /* Rowid to append */
5374 assert( pBuf->n!=0 || (*piLastRowid)==0 );
5375 fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid);
5376 *piLastRowid = iRowid;
5378 #endif
5380 #define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) { \
5381 assert( (pBuf)->n!=0 || (iLastRowid)==0 ); \
5382 fts5BufferSafeAppendVarint((pBuf), (u64)(iRowid) - (u64)(iLastRowid)); \
5383 (iLastRowid) = (iRowid); \
5387 ** Swap the contents of buffer *p1 with that of *p2.
5389 static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){
5390 Fts5Buffer tmp = *p1;
5391 *p1 = *p2;
5392 *p2 = tmp;
5395 static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){
5396 int i = *piOff;
5397 if( i>=pBuf->n ){
5398 *piOff = -1;
5399 }else{
5400 u64 iVal;
5401 *piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal);
5402 *piRowid += iVal;
5407 ** This is the equivalent of fts5MergePrefixLists() for detail=none mode.
5408 ** In this case the buffers consist of a delta-encoded list of rowids only.
5410 static void fts5MergeRowidLists(
5411 Fts5Index *p, /* FTS5 backend object */
5412 Fts5Buffer *p1, /* First list to merge */
5413 int nBuf, /* Number of entries in apBuf[] */
5414 Fts5Buffer *aBuf /* Array of other lists to merge into p1 */
5416 int i1 = 0;
5417 int i2 = 0;
5418 i64 iRowid1 = 0;
5419 i64 iRowid2 = 0;
5420 i64 iOut = 0;
5421 Fts5Buffer *p2 = &aBuf[0];
5422 Fts5Buffer out;
5424 (void)nBuf;
5425 memset(&out, 0, sizeof(out));
5426 assert( nBuf==1 );
5427 sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n);
5428 if( p->rc ) return;
5430 fts5NextRowid(p1, &i1, &iRowid1);
5431 fts5NextRowid(p2, &i2, &iRowid2);
5432 while( i1>=0 || i2>=0 ){
5433 if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){
5434 assert( iOut==0 || iRowid1>iOut );
5435 fts5BufferSafeAppendVarint(&out, iRowid1 - iOut);
5436 iOut = iRowid1;
5437 fts5NextRowid(p1, &i1, &iRowid1);
5438 }else{
5439 assert( iOut==0 || iRowid2>iOut );
5440 fts5BufferSafeAppendVarint(&out, iRowid2 - iOut);
5441 iOut = iRowid2;
5442 if( i1>=0 && iRowid1==iRowid2 ){
5443 fts5NextRowid(p1, &i1, &iRowid1);
5445 fts5NextRowid(p2, &i2, &iRowid2);
5449 fts5BufferSwap(&out, p1);
5450 fts5BufferFree(&out);
5453 typedef struct PrefixMerger PrefixMerger;
5454 struct PrefixMerger {
5455 Fts5DoclistIter iter; /* Doclist iterator */
5456 i64 iPos; /* For iterating through a position list */
5457 int iOff;
5458 u8 *aPos;
5459 PrefixMerger *pNext; /* Next in docid/poslist order */
5462 static void fts5PrefixMergerInsertByRowid(
5463 PrefixMerger **ppHead,
5464 PrefixMerger *p
5466 if( p->iter.aPoslist ){
5467 PrefixMerger **pp = ppHead;
5468 while( *pp && p->iter.iRowid>(*pp)->iter.iRowid ){
5469 pp = &(*pp)->pNext;
5471 p->pNext = *pp;
5472 *pp = p;
5476 static void fts5PrefixMergerInsertByPosition(
5477 PrefixMerger **ppHead,
5478 PrefixMerger *p
5480 if( p->iPos>=0 ){
5481 PrefixMerger **pp = ppHead;
5482 while( *pp && p->iPos>(*pp)->iPos ){
5483 pp = &(*pp)->pNext;
5485 p->pNext = *pp;
5486 *pp = p;
5492 ** Array aBuf[] contains nBuf doclists. These are all merged in with the
5493 ** doclist in buffer p1.
5495 static void fts5MergePrefixLists(
5496 Fts5Index *p, /* FTS5 backend object */
5497 Fts5Buffer *p1, /* First list to merge */
5498 int nBuf, /* Number of buffers in array aBuf[] */
5499 Fts5Buffer *aBuf /* Other lists to merge in */
5501 #define fts5PrefixMergerNextPosition(p) \
5502 sqlite3Fts5PoslistNext64((p)->aPos,(p)->iter.nPoslist,&(p)->iOff,&(p)->iPos)
5503 #define FTS5_MERGE_NLIST 16
5504 PrefixMerger aMerger[FTS5_MERGE_NLIST];
5505 PrefixMerger *pHead = 0;
5506 int i;
5507 int nOut = 0;
5508 Fts5Buffer out = {0, 0, 0};
5509 Fts5Buffer tmp = {0, 0, 0};
5510 i64 iLastRowid = 0;
5512 /* Initialize a doclist-iterator for each input buffer. Arrange them in
5513 ** a linked-list starting at pHead in ascending order of rowid. Avoid
5514 ** linking any iterators already at EOF into the linked list at all. */
5515 assert( nBuf+1<=(int)(sizeof(aMerger)/sizeof(aMerger[0])) );
5516 memset(aMerger, 0, sizeof(PrefixMerger)*(nBuf+1));
5517 pHead = &aMerger[nBuf];
5518 fts5DoclistIterInit(p1, &pHead->iter);
5519 for(i=0; i<nBuf; i++){
5520 fts5DoclistIterInit(&aBuf[i], &aMerger[i].iter);
5521 fts5PrefixMergerInsertByRowid(&pHead, &aMerger[i]);
5522 nOut += aBuf[i].n;
5524 if( nOut==0 ) return;
5525 nOut += p1->n + 9 + 10*nBuf;
5527 /* The maximum size of the output is equal to the sum of the
5528 ** input sizes + 1 varint (9 bytes). The extra varint is because if the
5529 ** first rowid in one input is a large negative number, and the first in
5530 ** the other a non-negative number, the delta for the non-negative
5531 ** number will be larger on disk than the literal integer value
5532 ** was.
5534 ** Or, if the input position-lists are corrupt, then the output might
5535 ** include up to (nBuf+1) extra 10-byte positions created by interpreting -1
5536 ** (the value PoslistNext64() uses for EOF) as a position and appending
5537 ** it to the output. This can happen at most once for each input
5538 ** position-list, hence (nBuf+1) 10 byte paddings. */
5539 if( sqlite3Fts5BufferSize(&p->rc, &out, nOut) ) return;
5541 while( pHead ){
5542 fts5MergeAppendDocid(&out, iLastRowid, pHead->iter.iRowid);
5544 if( pHead->pNext && iLastRowid==pHead->pNext->iter.iRowid ){
5545 /* Merge data from two or more poslists */
5546 i64 iPrev = 0;
5547 int nTmp = FTS5_DATA_ZERO_PADDING;
5548 int nMerge = 0;
5549 PrefixMerger *pSave = pHead;
5550 PrefixMerger *pThis = 0;
5551 int nTail = 0;
5553 pHead = 0;
5554 while( pSave && pSave->iter.iRowid==iLastRowid ){
5555 PrefixMerger *pNext = pSave->pNext;
5556 pSave->iOff = 0;
5557 pSave->iPos = 0;
5558 pSave->aPos = &pSave->iter.aPoslist[pSave->iter.nSize];
5559 fts5PrefixMergerNextPosition(pSave);
5560 nTmp += pSave->iter.nPoslist + 10;
5561 nMerge++;
5562 fts5PrefixMergerInsertByPosition(&pHead, pSave);
5563 pSave = pNext;
5566 if( pHead==0 || pHead->pNext==0 ){
5567 p->rc = FTS5_CORRUPT;
5568 break;
5571 /* See the earlier comment in this function for an explanation of why
5572 ** corrupt input position lists might cause the output to consume
5573 ** at most nMerge*10 bytes of unexpected space. */
5574 if( sqlite3Fts5BufferSize(&p->rc, &tmp, nTmp+nMerge*10) ){
5575 break;
5577 fts5BufferZero(&tmp);
5579 pThis = pHead;
5580 pHead = pThis->pNext;
5581 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos);
5582 fts5PrefixMergerNextPosition(pThis);
5583 fts5PrefixMergerInsertByPosition(&pHead, pThis);
5585 while( pHead->pNext ){
5586 pThis = pHead;
5587 if( pThis->iPos!=iPrev ){
5588 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos);
5590 fts5PrefixMergerNextPosition(pThis);
5591 pHead = pThis->pNext;
5592 fts5PrefixMergerInsertByPosition(&pHead, pThis);
5595 if( pHead->iPos!=iPrev ){
5596 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pHead->iPos);
5598 nTail = pHead->iter.nPoslist - pHead->iOff;
5600 /* WRITEPOSLISTSIZE */
5601 assert_nc( tmp.n+nTail<=nTmp );
5602 assert( tmp.n+nTail<=nTmp+nMerge*10 );
5603 if( tmp.n+nTail>nTmp-FTS5_DATA_ZERO_PADDING ){
5604 if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
5605 break;
5607 fts5BufferSafeAppendVarint(&out, (tmp.n+nTail) * 2);
5608 fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n);
5609 if( nTail>0 ){
5610 fts5BufferSafeAppendBlob(&out, &pHead->aPos[pHead->iOff], nTail);
5613 pHead = pSave;
5614 for(i=0; i<nBuf+1; i++){
5615 PrefixMerger *pX = &aMerger[i];
5616 if( pX->iter.aPoslist && pX->iter.iRowid==iLastRowid ){
5617 fts5DoclistIterNext(&pX->iter);
5618 fts5PrefixMergerInsertByRowid(&pHead, pX);
5622 }else{
5623 /* Copy poslist from pHead to output */
5624 PrefixMerger *pThis = pHead;
5625 Fts5DoclistIter *pI = &pThis->iter;
5626 fts5BufferSafeAppendBlob(&out, pI->aPoslist, pI->nPoslist+pI->nSize);
5627 fts5DoclistIterNext(pI);
5628 pHead = pThis->pNext;
5629 fts5PrefixMergerInsertByRowid(&pHead, pThis);
5633 fts5BufferFree(p1);
5634 fts5BufferFree(&tmp);
5635 memset(&out.p[out.n], 0, FTS5_DATA_ZERO_PADDING);
5636 *p1 = out;
5639 static void fts5SetupPrefixIter(
5640 Fts5Index *p, /* Index to read from */
5641 int bDesc, /* True for "ORDER BY rowid DESC" */
5642 int iIdx, /* Index to scan for data */
5643 u8 *pToken, /* Buffer containing prefix to match */
5644 int nToken, /* Size of buffer pToken in bytes */
5645 Fts5Colset *pColset, /* Restrict matches to these columns */
5646 Fts5Iter **ppIter /* OUT: New iterator */
5648 Fts5Structure *pStruct;
5649 Fts5Buffer *aBuf;
5650 int nBuf = 32;
5651 int nMerge = 1;
5653 void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*);
5654 void (*xAppend)(Fts5Index*, u64, Fts5Iter*, Fts5Buffer*);
5655 if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
5656 xMerge = fts5MergeRowidLists;
5657 xAppend = fts5AppendRowid;
5658 }else{
5659 nMerge = FTS5_MERGE_NLIST-1;
5660 nBuf = nMerge*8; /* Sufficient to merge (16^8)==(2^32) lists */
5661 xMerge = fts5MergePrefixLists;
5662 xAppend = fts5AppendPoslist;
5665 aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
5666 pStruct = fts5StructureRead(p);
5668 if( aBuf && pStruct ){
5669 const int flags = FTS5INDEX_QUERY_SCAN
5670 | FTS5INDEX_QUERY_SKIPEMPTY
5671 | FTS5INDEX_QUERY_NOOUTPUT;
5672 int i;
5673 i64 iLastRowid = 0;
5674 Fts5Iter *p1 = 0; /* Iterator used to gather data from index */
5675 Fts5Data *pData;
5676 Fts5Buffer doclist;
5677 int bNewTerm = 1;
5679 memset(&doclist, 0, sizeof(doclist));
5680 if( iIdx!=0 ){
5681 int dummy = 0;
5682 const int f2 = FTS5INDEX_QUERY_SKIPEMPTY|FTS5INDEX_QUERY_NOOUTPUT;
5683 pToken[0] = FTS5_MAIN_PREFIX;
5684 fts5MultiIterNew(p, pStruct, f2, pColset, pToken, nToken, -1, 0, &p1);
5685 fts5IterSetOutputCb(&p->rc, p1);
5686 for(;
5687 fts5MultiIterEof(p, p1)==0;
5688 fts5MultiIterNext2(p, p1, &dummy)
5690 Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
5691 p1->xSetOutputs(p1, pSeg);
5692 if( p1->base.nData ){
5693 xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist);
5694 iLastRowid = p1->base.iRowid;
5697 fts5MultiIterFree(p1);
5700 pToken[0] = FTS5_MAIN_PREFIX + iIdx;
5701 fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1);
5702 fts5IterSetOutputCb(&p->rc, p1);
5703 for( /* no-op */ ;
5704 fts5MultiIterEof(p, p1)==0;
5705 fts5MultiIterNext2(p, p1, &bNewTerm)
5707 Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
5708 int nTerm = pSeg->term.n;
5709 const u8 *pTerm = pSeg->term.p;
5710 p1->xSetOutputs(p1, pSeg);
5712 assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 );
5713 if( bNewTerm ){
5714 if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break;
5717 if( p1->base.nData==0 ) continue;
5719 if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){
5720 for(i=0; p->rc==SQLITE_OK && doclist.n; i++){
5721 int i1 = i*nMerge;
5722 int iStore;
5723 assert( i1+nMerge<=nBuf );
5724 for(iStore=i1; iStore<i1+nMerge; iStore++){
5725 if( aBuf[iStore].n==0 ){
5726 fts5BufferSwap(&doclist, &aBuf[iStore]);
5727 fts5BufferZero(&doclist);
5728 break;
5731 if( iStore==i1+nMerge ){
5732 xMerge(p, &doclist, nMerge, &aBuf[i1]);
5733 for(iStore=i1; iStore<i1+nMerge; iStore++){
5734 fts5BufferZero(&aBuf[iStore]);
5738 iLastRowid = 0;
5741 xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist);
5742 iLastRowid = p1->base.iRowid;
5745 assert( (nBuf%nMerge)==0 );
5746 for(i=0; i<nBuf; i+=nMerge){
5747 int iFree;
5748 if( p->rc==SQLITE_OK ){
5749 xMerge(p, &doclist, nMerge, &aBuf[i]);
5751 for(iFree=i; iFree<i+nMerge; iFree++){
5752 fts5BufferFree(&aBuf[iFree]);
5755 fts5MultiIterFree(p1);
5757 pData = fts5IdxMalloc(p, sizeof(Fts5Data)+doclist.n+FTS5_DATA_ZERO_PADDING);
5758 if( pData ){
5759 pData->p = (u8*)&pData[1];
5760 pData->nn = pData->szLeaf = doclist.n;
5761 if( doclist.n ) memcpy(pData->p, doclist.p, doclist.n);
5762 fts5MultiIterNew2(p, pData, bDesc, ppIter);
5764 fts5BufferFree(&doclist);
5767 fts5StructureRelease(pStruct);
5768 sqlite3_free(aBuf);
5773 ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
5774 ** to the document with rowid iRowid.
5776 int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){
5777 assert( p->rc==SQLITE_OK );
5779 /* Allocate the hash table if it has not already been allocated */
5780 if( p->pHash==0 ){
5781 p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData);
5784 /* Flush the hash table to disk if required */
5785 if( iRowid<p->iWriteRowid
5786 || (iRowid==p->iWriteRowid && p->bDelete==0)
5787 || (p->nPendingData > p->pConfig->nHashSize)
5789 fts5IndexFlush(p);
5792 p->iWriteRowid = iRowid;
5793 p->bDelete = bDelete;
5794 return fts5IndexReturn(p);
5798 ** Commit data to disk.
5800 int sqlite3Fts5IndexSync(Fts5Index *p){
5801 assert( p->rc==SQLITE_OK );
5802 fts5IndexFlush(p);
5803 sqlite3Fts5IndexCloseReader(p);
5804 return fts5IndexReturn(p);
5808 ** Discard any data stored in the in-memory hash tables. Do not write it
5809 ** to the database. Additionally, assume that the contents of the %_data
5810 ** table may have changed on disk. So any in-memory caches of %_data
5811 ** records must be invalidated.
5813 int sqlite3Fts5IndexRollback(Fts5Index *p){
5814 sqlite3Fts5IndexCloseReader(p);
5815 fts5IndexDiscardData(p);
5816 fts5StructureInvalidate(p);
5817 /* assert( p->rc==SQLITE_OK ); */
5818 return SQLITE_OK;
5822 ** The %_data table is completely empty when this function is called. This
5823 ** function populates it with the initial structure objects for each index,
5824 ** and the initial version of the "averages" record (a zero-byte blob).
5826 int sqlite3Fts5IndexReinit(Fts5Index *p){
5827 Fts5Structure s;
5828 fts5StructureInvalidate(p);
5829 fts5IndexDiscardData(p);
5830 memset(&s, 0, sizeof(Fts5Structure));
5831 fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0);
5832 fts5StructureWrite(p, &s);
5833 return fts5IndexReturn(p);
5837 ** Open a new Fts5Index handle. If the bCreate argument is true, create
5838 ** and initialize the underlying %_data table.
5840 ** If successful, set *pp to point to the new object and return SQLITE_OK.
5841 ** Otherwise, set *pp to NULL and return an SQLite error code.
5843 int sqlite3Fts5IndexOpen(
5844 Fts5Config *pConfig,
5845 int bCreate,
5846 Fts5Index **pp,
5847 char **pzErr
5849 int rc = SQLITE_OK;
5850 Fts5Index *p; /* New object */
5852 *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index));
5853 if( rc==SQLITE_OK ){
5854 p->pConfig = pConfig;
5855 p->nWorkUnit = FTS5_WORK_UNIT;
5856 p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName);
5857 if( p->zDataTbl && bCreate ){
5858 rc = sqlite3Fts5CreateTable(
5859 pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr
5861 if( rc==SQLITE_OK ){
5862 rc = sqlite3Fts5CreateTable(pConfig, "idx",
5863 "segid, term, pgno, PRIMARY KEY(segid, term)",
5864 1, pzErr
5867 if( rc==SQLITE_OK ){
5868 rc = sqlite3Fts5IndexReinit(p);
5873 assert( rc!=SQLITE_OK || p->rc==SQLITE_OK );
5874 if( rc ){
5875 sqlite3Fts5IndexClose(p);
5876 *pp = 0;
5878 return rc;
5882 ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen().
5884 int sqlite3Fts5IndexClose(Fts5Index *p){
5885 int rc = SQLITE_OK;
5886 if( p ){
5887 assert( p->pReader==0 );
5888 fts5StructureInvalidate(p);
5889 sqlite3_finalize(p->pWriter);
5890 sqlite3_finalize(p->pDeleter);
5891 sqlite3_finalize(p->pIdxWriter);
5892 sqlite3_finalize(p->pIdxDeleter);
5893 sqlite3_finalize(p->pIdxSelect);
5894 sqlite3_finalize(p->pDataVersion);
5895 sqlite3_finalize(p->pDeleteFromIdx);
5896 sqlite3Fts5HashFree(p->pHash);
5897 sqlite3_free(p->zDataTbl);
5898 sqlite3_free(p);
5900 return rc;
5904 ** Argument p points to a buffer containing utf-8 text that is n bytes in
5905 ** size. Return the number of bytes in the nChar character prefix of the
5906 ** buffer, or 0 if there are less than nChar characters in total.
5908 int sqlite3Fts5IndexCharlenToBytelen(
5909 const char *p,
5910 int nByte,
5911 int nChar
5913 int n = 0;
5914 int i;
5915 for(i=0; i<nChar; i++){
5916 if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */
5917 if( (unsigned char)p[n++]>=0xc0 ){
5918 if( n>=nByte ) return 0;
5919 while( (p[n] & 0xc0)==0x80 ){
5920 n++;
5921 if( n>=nByte ){
5922 if( i+1==nChar ) break;
5923 return 0;
5928 return n;
5932 ** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of
5933 ** unicode characters in the string.
5935 static int fts5IndexCharlen(const char *pIn, int nIn){
5936 int nChar = 0;
5937 int i = 0;
5938 while( i<nIn ){
5939 if( (unsigned char)pIn[i++]>=0xc0 ){
5940 while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++;
5942 nChar++;
5944 return nChar;
5948 ** Insert or remove data to or from the index. Each time a document is
5949 ** added to or removed from the index, this function is called one or more
5950 ** times.
5952 ** For an insert, it must be called once for each token in the new document.
5953 ** If the operation is a delete, it must be called (at least) once for each
5954 ** unique token in the document with an iCol value less than zero. The iPos
5955 ** argument is ignored for a delete.
5957 int sqlite3Fts5IndexWrite(
5958 Fts5Index *p, /* Index to write to */
5959 int iCol, /* Column token appears in (-ve -> delete) */
5960 int iPos, /* Position of token within column */
5961 const char *pToken, int nToken /* Token to add or remove to or from index */
5963 int i; /* Used to iterate through indexes */
5964 int rc = SQLITE_OK; /* Return code */
5965 Fts5Config *pConfig = p->pConfig;
5967 assert( p->rc==SQLITE_OK );
5968 assert( (iCol<0)==p->bDelete );
5970 /* Add the entry to the main terms index. */
5971 rc = sqlite3Fts5HashWrite(
5972 p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken
5975 for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){
5976 const int nChar = pConfig->aPrefix[i];
5977 int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
5978 if( nByte ){
5979 rc = sqlite3Fts5HashWrite(p->pHash,
5980 p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken,
5981 nByte
5986 return rc;
5990 ** Open a new iterator to iterate though all rowid that match the
5991 ** specified token or token prefix.
5993 int sqlite3Fts5IndexQuery(
5994 Fts5Index *p, /* FTS index to query */
5995 const char *pToken, int nToken, /* Token (or prefix) to query for */
5996 int flags, /* Mask of FTS5INDEX_QUERY_X flags */
5997 Fts5Colset *pColset, /* Match these columns only */
5998 Fts5IndexIter **ppIter /* OUT: New iterator object */
6000 Fts5Config *pConfig = p->pConfig;
6001 Fts5Iter *pRet = 0;
6002 Fts5Buffer buf = {0, 0, 0};
6004 /* If the QUERY_SCAN flag is set, all other flags must be clear. */
6005 assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN );
6007 if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){
6008 int iIdx = 0; /* Index to search */
6009 int iPrefixIdx = 0; /* +1 prefix index */
6010 if( nToken>0 ) memcpy(&buf.p[1], pToken, nToken);
6012 /* Figure out which index to search and set iIdx accordingly. If this
6013 ** is a prefix query for which there is no prefix index, set iIdx to
6014 ** greater than pConfig->nPrefix to indicate that the query will be
6015 ** satisfied by scanning multiple terms in the main index.
6017 ** If the QUERY_TEST_NOIDX flag was specified, then this must be a
6018 ** prefix-query. Instead of using a prefix-index (if one exists),
6019 ** evaluate the prefix query using the main FTS index. This is used
6020 ** for internal sanity checking by the integrity-check in debug
6021 ** mode only. */
6022 #ifdef SQLITE_DEBUG
6023 if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){
6024 assert( flags & FTS5INDEX_QUERY_PREFIX );
6025 iIdx = 1+pConfig->nPrefix;
6026 }else
6027 #endif
6028 if( flags & FTS5INDEX_QUERY_PREFIX ){
6029 int nChar = fts5IndexCharlen(pToken, nToken);
6030 for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){
6031 int nIdxChar = pConfig->aPrefix[iIdx-1];
6032 if( nIdxChar==nChar ) break;
6033 if( nIdxChar==nChar+1 ) iPrefixIdx = iIdx;
6037 if( iIdx<=pConfig->nPrefix ){
6038 /* Straight index lookup */
6039 Fts5Structure *pStruct = fts5StructureRead(p);
6040 buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx);
6041 if( pStruct ){
6042 fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY,
6043 pColset, buf.p, nToken+1, -1, 0, &pRet
6045 fts5StructureRelease(pStruct);
6047 }else{
6048 /* Scan multiple terms in the main index */
6049 int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0;
6050 fts5SetupPrefixIter(p, bDesc, iPrefixIdx, buf.p, nToken+1, pColset,&pRet);
6051 if( pRet==0 ){
6052 assert( p->rc!=SQLITE_OK );
6053 }else{
6054 assert( pRet->pColset==0 );
6055 fts5IterSetOutputCb(&p->rc, pRet);
6056 if( p->rc==SQLITE_OK ){
6057 Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst];
6058 if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg);
6063 if( p->rc ){
6064 sqlite3Fts5IterClose((Fts5IndexIter*)pRet);
6065 pRet = 0;
6066 sqlite3Fts5IndexCloseReader(p);
6069 *ppIter = (Fts5IndexIter*)pRet;
6070 sqlite3Fts5BufferFree(&buf);
6072 return fts5IndexReturn(p);
6076 ** Return true if the iterator passed as the only argument is at EOF.
6079 ** Move to the next matching rowid.
6081 int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){
6082 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
6083 assert( pIter->pIndex->rc==SQLITE_OK );
6084 fts5MultiIterNext(pIter->pIndex, pIter, 0, 0);
6085 return fts5IndexReturn(pIter->pIndex);
6089 ** Move to the next matching term/rowid. Used by the fts5vocab module.
6091 int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){
6092 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
6093 Fts5Index *p = pIter->pIndex;
6095 assert( pIter->pIndex->rc==SQLITE_OK );
6097 fts5MultiIterNext(p, pIter, 0, 0);
6098 if( p->rc==SQLITE_OK ){
6099 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
6100 if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){
6101 fts5DataRelease(pSeg->pLeaf);
6102 pSeg->pLeaf = 0;
6103 pIter->base.bEof = 1;
6107 return fts5IndexReturn(pIter->pIndex);
6111 ** Move to the next matching rowid that occurs at or after iMatch. The
6112 ** definition of "at or after" depends on whether this iterator iterates
6113 ** in ascending or descending rowid order.
6115 int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){
6116 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
6117 fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch);
6118 return fts5IndexReturn(pIter->pIndex);
6122 ** Return the current term.
6124 const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){
6125 int n;
6126 const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n);
6127 assert_nc( z || n<=1 );
6128 *pn = n-1;
6129 return (z ? &z[1] : 0);
6133 ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
6135 void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){
6136 if( pIndexIter ){
6137 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
6138 Fts5Index *pIndex = pIter->pIndex;
6139 fts5MultiIterFree(pIter);
6140 sqlite3Fts5IndexCloseReader(pIndex);
6145 ** Read and decode the "averages" record from the database.
6147 ** Parameter anSize must point to an array of size nCol, where nCol is
6148 ** the number of user defined columns in the FTS table.
6150 int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){
6151 int nCol = p->pConfig->nCol;
6152 Fts5Data *pData;
6154 *pnRow = 0;
6155 memset(anSize, 0, sizeof(i64) * nCol);
6156 pData = fts5DataRead(p, FTS5_AVERAGES_ROWID);
6157 if( p->rc==SQLITE_OK && pData->nn ){
6158 int i = 0;
6159 int iCol;
6160 i += fts5GetVarint(&pData->p[i], (u64*)pnRow);
6161 for(iCol=0; i<pData->nn && iCol<nCol; iCol++){
6162 i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]);
6166 fts5DataRelease(pData);
6167 return fts5IndexReturn(p);
6171 ** Replace the current "averages" record with the contents of the buffer
6172 ** supplied as the second argument.
6174 int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){
6175 assert( p->rc==SQLITE_OK );
6176 fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData);
6177 return fts5IndexReturn(p);
6181 ** Return the total number of blocks this module has read from the %_data
6182 ** table since it was created.
6184 int sqlite3Fts5IndexReads(Fts5Index *p){
6185 return p->nRead;
6189 ** Set the 32-bit cookie value stored at the start of all structure
6190 ** records to the value passed as the second argument.
6192 ** Return SQLITE_OK if successful, or an SQLite error code if an error
6193 ** occurs.
6195 int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){
6196 int rc; /* Return code */
6197 Fts5Config *pConfig = p->pConfig; /* Configuration object */
6198 u8 aCookie[4]; /* Binary representation of iNew */
6199 sqlite3_blob *pBlob = 0;
6201 assert( p->rc==SQLITE_OK );
6202 sqlite3Fts5Put32(aCookie, iNew);
6204 rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl,
6205 "block", FTS5_STRUCTURE_ROWID, 1, &pBlob
6207 if( rc==SQLITE_OK ){
6208 sqlite3_blob_write(pBlob, aCookie, 4, 0);
6209 rc = sqlite3_blob_close(pBlob);
6212 return rc;
6215 int sqlite3Fts5IndexLoadConfig(Fts5Index *p){
6216 Fts5Structure *pStruct;
6217 pStruct = fts5StructureRead(p);
6218 fts5StructureRelease(pStruct);
6219 return fts5IndexReturn(p);
6223 /*************************************************************************
6224 **************************************************************************
6225 ** Below this point is the implementation of the integrity-check
6226 ** functionality.
6230 ** Return a simple checksum value based on the arguments.
6232 u64 sqlite3Fts5IndexEntryCksum(
6233 i64 iRowid,
6234 int iCol,
6235 int iPos,
6236 int iIdx,
6237 const char *pTerm,
6238 int nTerm
6240 int i;
6241 u64 ret = iRowid;
6242 ret += (ret<<3) + iCol;
6243 ret += (ret<<3) + iPos;
6244 if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX + iIdx);
6245 for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i];
6246 return ret;
6249 #ifdef SQLITE_DEBUG
6251 ** This function is purely an internal test. It does not contribute to
6252 ** FTS functionality, or even the integrity-check, in any way.
6254 ** Instead, it tests that the same set of pgno/rowid combinations are
6255 ** visited regardless of whether the doclist-index identified by parameters
6256 ** iSegid/iLeaf is iterated in forwards or reverse order.
6258 static void fts5TestDlidxReverse(
6259 Fts5Index *p,
6260 int iSegid, /* Segment id to load from */
6261 int iLeaf /* Load doclist-index for this leaf */
6263 Fts5DlidxIter *pDlidx = 0;
6264 u64 cksum1 = 13;
6265 u64 cksum2 = 13;
6267 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf);
6268 fts5DlidxIterEof(p, pDlidx)==0;
6269 fts5DlidxIterNext(p, pDlidx)
6271 i64 iRowid = fts5DlidxIterRowid(pDlidx);
6272 int pgno = fts5DlidxIterPgno(pDlidx);
6273 assert( pgno>iLeaf );
6274 cksum1 += iRowid + ((i64)pgno<<32);
6276 fts5DlidxIterFree(pDlidx);
6277 pDlidx = 0;
6279 for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf);
6280 fts5DlidxIterEof(p, pDlidx)==0;
6281 fts5DlidxIterPrev(p, pDlidx)
6283 i64 iRowid = fts5DlidxIterRowid(pDlidx);
6284 int pgno = fts5DlidxIterPgno(pDlidx);
6285 assert( fts5DlidxIterPgno(pDlidx)>iLeaf );
6286 cksum2 += iRowid + ((i64)pgno<<32);
6288 fts5DlidxIterFree(pDlidx);
6289 pDlidx = 0;
6291 if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT;
6294 static int fts5QueryCksum(
6295 Fts5Index *p, /* Fts5 index object */
6296 int iIdx,
6297 const char *z, /* Index key to query for */
6298 int n, /* Size of index key in bytes */
6299 int flags, /* Flags for Fts5IndexQuery */
6300 u64 *pCksum /* IN/OUT: Checksum value */
6302 int eDetail = p->pConfig->eDetail;
6303 u64 cksum = *pCksum;
6304 Fts5IndexIter *pIter = 0;
6305 int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter);
6307 while( rc==SQLITE_OK && ALWAYS(pIter!=0) && 0==sqlite3Fts5IterEof(pIter) ){
6308 i64 rowid = pIter->iRowid;
6310 if( eDetail==FTS5_DETAIL_NONE ){
6311 cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n);
6312 }else{
6313 Fts5PoslistReader sReader;
6314 for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader);
6315 sReader.bEof==0;
6316 sqlite3Fts5PoslistReaderNext(&sReader)
6318 int iCol = FTS5_POS2COLUMN(sReader.iPos);
6319 int iOff = FTS5_POS2OFFSET(sReader.iPos);
6320 cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
6323 if( rc==SQLITE_OK ){
6324 rc = sqlite3Fts5IterNext(pIter);
6327 sqlite3Fts5IterClose(pIter);
6329 *pCksum = cksum;
6330 return rc;
6334 ** Check if buffer z[], size n bytes, contains as series of valid utf-8
6335 ** encoded codepoints. If so, return 0. Otherwise, if the buffer does not
6336 ** contain valid utf-8, return non-zero.
6338 static int fts5TestUtf8(const char *z, int n){
6339 int i = 0;
6340 assert_nc( n>0 );
6341 while( i<n ){
6342 if( (z[i] & 0x80)==0x00 ){
6343 i++;
6344 }else
6345 if( (z[i] & 0xE0)==0xC0 ){
6346 if( i+1>=n || (z[i+1] & 0xC0)!=0x80 ) return 1;
6347 i += 2;
6348 }else
6349 if( (z[i] & 0xF0)==0xE0 ){
6350 if( i+2>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1;
6351 i += 3;
6352 }else
6353 if( (z[i] & 0xF8)==0xF0 ){
6354 if( i+3>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1;
6355 if( (z[i+2] & 0xC0)!=0x80 ) return 1;
6356 i += 3;
6357 }else{
6358 return 1;
6362 return 0;
6366 ** This function is also purely an internal test. It does not contribute to
6367 ** FTS functionality, or even the integrity-check, in any way.
6369 static void fts5TestTerm(
6370 Fts5Index *p,
6371 Fts5Buffer *pPrev, /* Previous term */
6372 const char *z, int n, /* Possibly new term to test */
6373 u64 expected,
6374 u64 *pCksum
6376 int rc = p->rc;
6377 if( pPrev->n==0 ){
6378 fts5BufferSet(&rc, pPrev, n, (const u8*)z);
6379 }else
6380 if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){
6381 u64 cksum3 = *pCksum;
6382 const char *zTerm = (const char*)&pPrev->p[1]; /* term sans prefix-byte */
6383 int nTerm = pPrev->n-1; /* Size of zTerm in bytes */
6384 int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX);
6385 int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX);
6386 u64 ck1 = 0;
6387 u64 ck2 = 0;
6389 /* Check that the results returned for ASC and DESC queries are
6390 ** the same. If not, call this corruption. */
6391 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1);
6392 if( rc==SQLITE_OK ){
6393 int f = flags|FTS5INDEX_QUERY_DESC;
6394 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
6396 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
6398 /* If this is a prefix query, check that the results returned if the
6399 ** the index is disabled are the same. In both ASC and DESC order.
6401 ** This check may only be performed if the hash table is empty. This
6402 ** is because the hash table only supports a single scan query at
6403 ** a time, and the multi-iter loop from which this function is called
6404 ** is already performing such a scan.
6406 ** Also only do this if buffer zTerm contains nTerm bytes of valid
6407 ** utf-8. Otherwise, the last part of the buffer contents might contain
6408 ** a non-utf-8 sequence that happens to be a prefix of a valid utf-8
6409 ** character stored in the main fts index, which will cause the
6410 ** test to fail. */
6411 if( p->nPendingData==0 && 0==fts5TestUtf8(zTerm, nTerm) ){
6412 if( iIdx>0 && rc==SQLITE_OK ){
6413 int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
6414 ck2 = 0;
6415 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
6416 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
6418 if( iIdx>0 && rc==SQLITE_OK ){
6419 int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC;
6420 ck2 = 0;
6421 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
6422 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
6426 cksum3 ^= ck1;
6427 fts5BufferSet(&rc, pPrev, n, (const u8*)z);
6429 if( rc==SQLITE_OK && cksum3!=expected ){
6430 rc = FTS5_CORRUPT;
6432 *pCksum = cksum3;
6434 p->rc = rc;
6437 #else
6438 # define fts5TestDlidxReverse(x,y,z)
6439 # define fts5TestTerm(u,v,w,x,y,z)
6440 #endif
6443 ** Check that:
6445 ** 1) All leaves of pSeg between iFirst and iLast (inclusive) exist and
6446 ** contain zero terms.
6447 ** 2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and
6448 ** contain zero rowids.
6450 static void fts5IndexIntegrityCheckEmpty(
6451 Fts5Index *p,
6452 Fts5StructureSegment *pSeg, /* Segment to check internal consistency */
6453 int iFirst,
6454 int iNoRowid,
6455 int iLast
6457 int i;
6459 /* Now check that the iter.nEmpty leaves following the current leaf
6460 ** (a) exist and (b) contain no terms. */
6461 for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){
6462 Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i));
6463 if( pLeaf ){
6464 if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT;
6465 if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT;
6467 fts5DataRelease(pLeaf);
6471 static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){
6472 int iTermOff = 0;
6473 int ii;
6475 Fts5Buffer buf1 = {0,0,0};
6476 Fts5Buffer buf2 = {0,0,0};
6478 ii = pLeaf->szLeaf;
6479 while( ii<pLeaf->nn && p->rc==SQLITE_OK ){
6480 int res;
6481 int iOff;
6482 int nIncr;
6484 ii += fts5GetVarint32(&pLeaf->p[ii], nIncr);
6485 iTermOff += nIncr;
6486 iOff = iTermOff;
6488 if( iOff>=pLeaf->szLeaf ){
6489 p->rc = FTS5_CORRUPT;
6490 }else if( iTermOff==nIncr ){
6491 int nByte;
6492 iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
6493 if( (iOff+nByte)>pLeaf->szLeaf ){
6494 p->rc = FTS5_CORRUPT;
6495 }else{
6496 fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
6498 }else{
6499 int nKeep, nByte;
6500 iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep);
6501 iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
6502 if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){
6503 p->rc = FTS5_CORRUPT;
6504 }else{
6505 buf1.n = nKeep;
6506 fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
6509 if( p->rc==SQLITE_OK ){
6510 res = fts5BufferCompare(&buf1, &buf2);
6511 if( res<=0 ) p->rc = FTS5_CORRUPT;
6514 fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p);
6517 fts5BufferFree(&buf1);
6518 fts5BufferFree(&buf2);
6521 static void fts5IndexIntegrityCheckSegment(
6522 Fts5Index *p, /* FTS5 backend object */
6523 Fts5StructureSegment *pSeg /* Segment to check internal consistency */
6525 Fts5Config *pConfig = p->pConfig;
6526 int bSecureDelete = (pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE);
6527 sqlite3_stmt *pStmt = 0;
6528 int rc2;
6529 int iIdxPrevLeaf = pSeg->pgnoFirst-1;
6530 int iDlidxPrevLeaf = pSeg->pgnoLast;
6532 if( pSeg->pgnoFirst==0 ) return;
6534 fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf(
6535 "SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d "
6536 "ORDER BY 1, 2",
6537 pConfig->zDb, pConfig->zName, pSeg->iSegid
6540 /* Iterate through the b-tree hierarchy. */
6541 while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
6542 i64 iRow; /* Rowid for this leaf */
6543 Fts5Data *pLeaf; /* Data for this leaf */
6545 const char *zIdxTerm = (const char*)sqlite3_column_blob(pStmt, 1);
6546 int nIdxTerm = sqlite3_column_bytes(pStmt, 1);
6547 int iIdxLeaf = sqlite3_column_int(pStmt, 2);
6548 int bIdxDlidx = sqlite3_column_int(pStmt, 3);
6550 /* If the leaf in question has already been trimmed from the segment,
6551 ** ignore this b-tree entry. Otherwise, load it into memory. */
6552 if( iIdxLeaf<pSeg->pgnoFirst ) continue;
6553 iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf);
6554 pLeaf = fts5LeafRead(p, iRow);
6555 if( pLeaf==0 ) break;
6557 /* Check that the leaf contains at least one term, and that it is equal
6558 ** to or larger than the split-key in zIdxTerm. Also check that if there
6559 ** is also a rowid pointer within the leaf page header, it points to a
6560 ** location before the term. */
6561 if( pLeaf->nn<=pLeaf->szLeaf ){
6563 if( nIdxTerm==0
6564 && pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE
6565 && pLeaf->nn==pLeaf->szLeaf
6566 && pLeaf->nn==4
6568 /* special case - the very first page in a segment keeps its %_idx
6569 ** entry even if all the terms are removed from it by secure-delete
6570 ** operations. */
6571 }else{
6572 p->rc = FTS5_CORRUPT;
6575 }else{
6576 int iOff; /* Offset of first term on leaf */
6577 int iRowidOff; /* Offset of first rowid on leaf */
6578 int nTerm; /* Size of term on leaf in bytes */
6579 int res; /* Comparison of term and split-key */
6581 iOff = fts5LeafFirstTermOff(pLeaf);
6582 iRowidOff = fts5LeafFirstRowidOff(pLeaf);
6583 if( iRowidOff>=iOff || iOff>=pLeaf->szLeaf ){
6584 p->rc = FTS5_CORRUPT;
6585 }else{
6586 iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm);
6587 res = fts5Memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm));
6588 if( res==0 ) res = nTerm - nIdxTerm;
6589 if( res<0 ) p->rc = FTS5_CORRUPT;
6592 fts5IntegrityCheckPgidx(p, pLeaf);
6594 fts5DataRelease(pLeaf);
6595 if( p->rc ) break;
6597 /* Now check that the iter.nEmpty leaves following the current leaf
6598 ** (a) exist and (b) contain no terms. */
6599 fts5IndexIntegrityCheckEmpty(
6600 p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1
6602 if( p->rc ) break;
6604 /* If there is a doclist-index, check that it looks right. */
6605 if( bIdxDlidx ){
6606 Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */
6607 int iPrevLeaf = iIdxLeaf;
6608 int iSegid = pSeg->iSegid;
6609 int iPg = 0;
6610 i64 iKey;
6612 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf);
6613 fts5DlidxIterEof(p, pDlidx)==0;
6614 fts5DlidxIterNext(p, pDlidx)
6617 /* Check any rowid-less pages that occur before the current leaf. */
6618 for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
6619 iKey = FTS5_SEGMENT_ROWID(iSegid, iPg);
6620 pLeaf = fts5DataRead(p, iKey);
6621 if( pLeaf ){
6622 if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT;
6623 fts5DataRelease(pLeaf);
6626 iPrevLeaf = fts5DlidxIterPgno(pDlidx);
6628 /* Check that the leaf page indicated by the iterator really does
6629 ** contain the rowid suggested by the same. */
6630 iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf);
6631 pLeaf = fts5DataRead(p, iKey);
6632 if( pLeaf ){
6633 i64 iRowid;
6634 int iRowidOff = fts5LeafFirstRowidOff(pLeaf);
6635 ASSERT_SZLEAF_OK(pLeaf);
6636 if( iRowidOff>=pLeaf->szLeaf ){
6637 p->rc = FTS5_CORRUPT;
6638 }else if( bSecureDelete==0 || iRowidOff>0 ){
6639 i64 iDlRowid = fts5DlidxIterRowid(pDlidx);
6640 fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
6641 if( iRowid<iDlRowid || (bSecureDelete==0 && iRowid!=iDlRowid) ){
6642 p->rc = FTS5_CORRUPT;
6645 fts5DataRelease(pLeaf);
6649 iDlidxPrevLeaf = iPg;
6650 fts5DlidxIterFree(pDlidx);
6651 fts5TestDlidxReverse(p, iSegid, iIdxLeaf);
6652 }else{
6653 iDlidxPrevLeaf = pSeg->pgnoLast;
6654 /* TODO: Check there is no doclist index */
6657 iIdxPrevLeaf = iIdxLeaf;
6660 rc2 = sqlite3_finalize(pStmt);
6661 if( p->rc==SQLITE_OK ) p->rc = rc2;
6663 /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */
6664 #if 0
6665 if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
6666 p->rc = FTS5_CORRUPT;
6668 #endif
6673 ** Run internal checks to ensure that the FTS index (a) is internally
6674 ** consistent and (b) contains entries for which the XOR of the checksums
6675 ** as calculated by sqlite3Fts5IndexEntryCksum() is cksum.
6677 ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
6678 ** checksum does not match. Return SQLITE_OK if all checks pass without
6679 ** error, or some other SQLite error code if another error (e.g. OOM)
6680 ** occurs.
6682 int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum, int bUseCksum){
6683 int eDetail = p->pConfig->eDetail;
6684 u64 cksum2 = 0; /* Checksum based on contents of indexes */
6685 Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */
6686 Fts5Iter *pIter; /* Used to iterate through entire index */
6687 Fts5Structure *pStruct; /* Index structure */
6688 int iLvl, iSeg;
6690 #ifdef SQLITE_DEBUG
6691 /* Used by extra internal tests only run if NDEBUG is not defined */
6692 u64 cksum3 = 0; /* Checksum based on contents of indexes */
6693 Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */
6694 #endif
6695 const int flags = FTS5INDEX_QUERY_NOOUTPUT;
6697 /* Load the FTS index structure */
6698 pStruct = fts5StructureRead(p);
6699 if( pStruct==0 ){
6700 assert( p->rc!=SQLITE_OK );
6701 return fts5IndexReturn(p);
6704 /* Check that the internal nodes of each segment match the leaves */
6705 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
6706 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
6707 Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
6708 fts5IndexIntegrityCheckSegment(p, pSeg);
6712 /* The cksum argument passed to this function is a checksum calculated
6713 ** based on all expected entries in the FTS index (including prefix index
6714 ** entries). This block checks that a checksum calculated based on the
6715 ** actual contents of FTS index is identical.
6717 ** Two versions of the same checksum are calculated. The first (stack
6718 ** variable cksum2) based on entries extracted from the full-text index
6719 ** while doing a linear scan of each individual index in turn.
6721 ** As each term visited by the linear scans, a separate query for the
6722 ** same term is performed. cksum3 is calculated based on the entries
6723 ** extracted by these queries.
6725 for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter);
6726 fts5MultiIterEof(p, pIter)==0;
6727 fts5MultiIterNext(p, pIter, 0, 0)
6729 int n; /* Size of term in bytes */
6730 i64 iPos = 0; /* Position read from poslist */
6731 int iOff = 0; /* Offset within poslist */
6732 i64 iRowid = fts5MultiIterRowid(pIter);
6733 char *z = (char*)fts5MultiIterTerm(pIter, &n);
6735 /* If this is a new term, query for it. Update cksum3 with the results. */
6736 fts5TestTerm(p, &term, z, n, cksum2, &cksum3);
6737 if( p->rc ) break;
6739 if( eDetail==FTS5_DETAIL_NONE ){
6740 if( 0==fts5MultiIterIsEmpty(p, pIter) ){
6741 cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n);
6743 }else{
6744 poslist.n = 0;
6745 fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist);
6746 fts5BufferAppendBlob(&p->rc, &poslist, 4, (const u8*)"\0\0\0\0");
6747 while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
6748 int iCol = FTS5_POS2COLUMN(iPos);
6749 int iTokOff = FTS5_POS2OFFSET(iPos);
6750 cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
6754 fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3);
6756 fts5MultiIterFree(pIter);
6757 if( p->rc==SQLITE_OK && bUseCksum && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
6759 fts5StructureRelease(pStruct);
6760 #ifdef SQLITE_DEBUG
6761 fts5BufferFree(&term);
6762 #endif
6763 fts5BufferFree(&poslist);
6764 return fts5IndexReturn(p);
6767 /*************************************************************************
6768 **************************************************************************
6769 ** Below this point is the implementation of the fts5_decode() scalar
6770 ** function only.
6773 #ifdef SQLITE_TEST
6775 ** Decode a segment-data rowid from the %_data table. This function is
6776 ** the opposite of macro FTS5_SEGMENT_ROWID().
6778 static void fts5DecodeRowid(
6779 i64 iRowid, /* Rowid from %_data table */
6780 int *piSegid, /* OUT: Segment id */
6781 int *pbDlidx, /* OUT: Dlidx flag */
6782 int *piHeight, /* OUT: Height */
6783 int *piPgno /* OUT: Page number */
6785 *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1));
6786 iRowid >>= FTS5_DATA_PAGE_B;
6788 *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1));
6789 iRowid >>= FTS5_DATA_HEIGHT_B;
6791 *pbDlidx = (int)(iRowid & 0x0001);
6792 iRowid >>= FTS5_DATA_DLI_B;
6794 *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1));
6796 #endif /* SQLITE_TEST */
6798 #ifdef SQLITE_TEST
6799 static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
6800 int iSegid, iHeight, iPgno, bDlidx; /* Rowid compenents */
6801 fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno);
6803 if( iSegid==0 ){
6804 if( iKey==FTS5_AVERAGES_ROWID ){
6805 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} ");
6806 }else{
6807 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}");
6810 else{
6811 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%ssegid=%d h=%d pgno=%d}",
6812 bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno
6816 #endif /* SQLITE_TEST */
6818 #ifdef SQLITE_TEST
6819 static void fts5DebugStructure(
6820 int *pRc, /* IN/OUT: error code */
6821 Fts5Buffer *pBuf,
6822 Fts5Structure *p
6824 int iLvl, iSeg; /* Iterate through levels, segments */
6826 for(iLvl=0; iLvl<p->nLevel; iLvl++){
6827 Fts5StructureLevel *pLvl = &p->aLevel[iLvl];
6828 sqlite3Fts5BufferAppendPrintf(pRc, pBuf,
6829 " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg
6831 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
6832 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
6833 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d}",
6834 pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast
6837 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
6840 #endif /* SQLITE_TEST */
6842 #ifdef SQLITE_TEST
6844 ** This is part of the fts5_decode() debugging aid.
6846 ** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This
6847 ** function appends a human-readable representation of the same object
6848 ** to the buffer passed as the second argument.
6850 static void fts5DecodeStructure(
6851 int *pRc, /* IN/OUT: error code */
6852 Fts5Buffer *pBuf,
6853 const u8 *pBlob, int nBlob
6855 int rc; /* Return code */
6856 Fts5Structure *p = 0; /* Decoded structure object */
6858 rc = fts5StructureDecode(pBlob, nBlob, 0, &p);
6859 if( rc!=SQLITE_OK ){
6860 *pRc = rc;
6861 return;
6864 fts5DebugStructure(pRc, pBuf, p);
6865 fts5StructureRelease(p);
6867 #endif /* SQLITE_TEST */
6869 #ifdef SQLITE_TEST
6871 ** This is part of the fts5_decode() debugging aid.
6873 ** Arguments pBlob/nBlob contain an "averages" record. This function
6874 ** appends a human-readable representation of record to the buffer passed
6875 ** as the second argument.
6877 static void fts5DecodeAverages(
6878 int *pRc, /* IN/OUT: error code */
6879 Fts5Buffer *pBuf,
6880 const u8 *pBlob, int nBlob
6882 int i = 0;
6883 const char *zSpace = "";
6885 while( i<nBlob ){
6886 u64 iVal;
6887 i += sqlite3Fts5GetVarint(&pBlob[i], &iVal);
6888 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%s%d", zSpace, (int)iVal);
6889 zSpace = " ";
6892 #endif /* SQLITE_TEST */
6894 #ifdef SQLITE_TEST
6896 ** Buffer (a/n) is assumed to contain a list of serialized varints. Read
6897 ** each varint and append its string representation to buffer pBuf. Return
6898 ** after either the input buffer is exhausted or a 0 value is read.
6900 ** The return value is the number of bytes read from the input buffer.
6902 static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
6903 int iOff = 0;
6904 while( iOff<n ){
6905 int iVal;
6906 iOff += fts5GetVarint32(&a[iOff], iVal);
6907 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal);
6909 return iOff;
6911 #endif /* SQLITE_TEST */
6913 #ifdef SQLITE_TEST
6915 ** The start of buffer (a/n) contains the start of a doclist. The doclist
6916 ** may or may not finish within the buffer. This function appends a text
6917 ** representation of the part of the doclist that is present to buffer
6918 ** pBuf.
6920 ** The return value is the number of bytes read from the input buffer.
6922 static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
6923 i64 iDocid = 0;
6924 int iOff = 0;
6926 if( n>0 ){
6927 iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid);
6928 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
6930 while( iOff<n ){
6931 int nPos;
6932 int bDel;
6933 iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel);
6934 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s", nPos, bDel?"*":"");
6935 iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos));
6936 if( iOff<n ){
6937 i64 iDelta;
6938 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
6939 iDocid += iDelta;
6940 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
6944 return iOff;
6946 #endif /* SQLITE_TEST */
6948 #ifdef SQLITE_TEST
6950 ** This function is part of the fts5_decode() debugging function. It is
6951 ** only ever used with detail=none tables.
6953 ** Buffer (pData/nData) contains a doclist in the format used by detail=none
6954 ** tables. This function appends a human-readable version of that list to
6955 ** buffer pBuf.
6957 ** If *pRc is other than SQLITE_OK when this function is called, it is a
6958 ** no-op. If an OOM or other error occurs within this function, *pRc is
6959 ** set to an SQLite error code before returning. The final state of buffer
6960 ** pBuf is undefined in this case.
6962 static void fts5DecodeRowidList(
6963 int *pRc, /* IN/OUT: Error code */
6964 Fts5Buffer *pBuf, /* Buffer to append text to */
6965 const u8 *pData, int nData /* Data to decode list-of-rowids from */
6967 int i = 0;
6968 i64 iRowid = 0;
6970 while( i<nData ){
6971 const char *zApp = "";
6972 u64 iVal;
6973 i += sqlite3Fts5GetVarint(&pData[i], &iVal);
6974 iRowid += iVal;
6976 if( i<nData && pData[i]==0x00 ){
6977 i++;
6978 if( i<nData && pData[i]==0x00 ){
6979 i++;
6980 zApp = "+";
6981 }else{
6982 zApp = "*";
6986 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp);
6989 #endif /* SQLITE_TEST */
6991 #ifdef SQLITE_TEST
6993 ** The implementation of user-defined scalar function fts5_decode().
6995 static void fts5DecodeFunction(
6996 sqlite3_context *pCtx, /* Function call context */
6997 int nArg, /* Number of args (always 2) */
6998 sqlite3_value **apVal /* Function arguments */
7000 i64 iRowid; /* Rowid for record being decoded */
7001 int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */
7002 const u8 *aBlob; int n; /* Record to decode */
7003 u8 *a = 0;
7004 Fts5Buffer s; /* Build up text to return here */
7005 int rc = SQLITE_OK; /* Return code */
7006 sqlite3_int64 nSpace = 0;
7007 int eDetailNone = (sqlite3_user_data(pCtx)!=0);
7009 assert( nArg==2 );
7010 UNUSED_PARAM(nArg);
7011 memset(&s, 0, sizeof(Fts5Buffer));
7012 iRowid = sqlite3_value_int64(apVal[0]);
7014 /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[]
7015 ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
7016 ** buffer overreads even if the record is corrupt. */
7017 n = sqlite3_value_bytes(apVal[1]);
7018 aBlob = sqlite3_value_blob(apVal[1]);
7019 nSpace = n + FTS5_DATA_ZERO_PADDING;
7020 a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
7021 if( a==0 ) goto decode_out;
7022 if( n>0 ) memcpy(a, aBlob, n);
7024 fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno);
7026 fts5DebugRowid(&rc, &s, iRowid);
7027 if( bDlidx ){
7028 Fts5Data dlidx;
7029 Fts5DlidxLvl lvl;
7031 dlidx.p = a;
7032 dlidx.nn = n;
7034 memset(&lvl, 0, sizeof(Fts5DlidxLvl));
7035 lvl.pData = &dlidx;
7036 lvl.iLeafPgno = iPgno;
7038 for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){
7039 sqlite3Fts5BufferAppendPrintf(&rc, &s,
7040 " %d(%lld)", lvl.iLeafPgno, lvl.iRowid
7043 }else if( iSegid==0 ){
7044 if( iRowid==FTS5_AVERAGES_ROWID ){
7045 fts5DecodeAverages(&rc, &s, a, n);
7046 }else{
7047 fts5DecodeStructure(&rc, &s, a, n);
7049 }else if( eDetailNone ){
7050 Fts5Buffer term; /* Current term read from page */
7051 int szLeaf;
7052 int iPgidxOff = szLeaf = fts5GetU16(&a[2]);
7053 int iTermOff;
7054 int nKeep = 0;
7055 int iOff;
7057 memset(&term, 0, sizeof(Fts5Buffer));
7059 /* Decode any entries that occur before the first term. */
7060 if( szLeaf<n ){
7061 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff);
7062 }else{
7063 iTermOff = szLeaf;
7065 fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4);
7067 iOff = iTermOff;
7068 while( iOff<szLeaf ){
7069 int nAppend;
7071 /* Read the term data for the next term*/
7072 iOff += fts5GetVarint32(&a[iOff], nAppend);
7073 term.n = nKeep;
7074 fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]);
7075 sqlite3Fts5BufferAppendPrintf(
7076 &rc, &s, " term=%.*s", term.n, (const char*)term.p
7078 iOff += nAppend;
7080 /* Figure out where the doclist for this term ends */
7081 if( iPgidxOff<n ){
7082 int nIncr;
7083 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr);
7084 iTermOff += nIncr;
7085 }else{
7086 iTermOff = szLeaf;
7089 fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff);
7090 iOff = iTermOff;
7091 if( iOff<szLeaf ){
7092 iOff += fts5GetVarint32(&a[iOff], nKeep);
7096 fts5BufferFree(&term);
7097 }else{
7098 Fts5Buffer term; /* Current term read from page */
7099 int szLeaf; /* Offset of pgidx in a[] */
7100 int iPgidxOff;
7101 int iPgidxPrev = 0; /* Previous value read from pgidx */
7102 int iTermOff = 0;
7103 int iRowidOff = 0;
7104 int iOff;
7105 int nDoclist;
7107 memset(&term, 0, sizeof(Fts5Buffer));
7109 if( n<4 ){
7110 sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt");
7111 goto decode_out;
7112 }else{
7113 iRowidOff = fts5GetU16(&a[0]);
7114 iPgidxOff = szLeaf = fts5GetU16(&a[2]);
7115 if( iPgidxOff<n ){
7116 fts5GetVarint32(&a[iPgidxOff], iTermOff);
7117 }else if( iPgidxOff>n ){
7118 rc = FTS5_CORRUPT;
7119 goto decode_out;
7123 /* Decode the position list tail at the start of the page */
7124 if( iRowidOff!=0 ){
7125 iOff = iRowidOff;
7126 }else if( iTermOff!=0 ){
7127 iOff = iTermOff;
7128 }else{
7129 iOff = szLeaf;
7131 if( iOff>n ){
7132 rc = FTS5_CORRUPT;
7133 goto decode_out;
7135 fts5DecodePoslist(&rc, &s, &a[4], iOff-4);
7137 /* Decode any more doclist data that appears on the page before the
7138 ** first term. */
7139 nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
7140 if( nDoclist+iOff>n ){
7141 rc = FTS5_CORRUPT;
7142 goto decode_out;
7144 fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);
7146 while( iPgidxOff<n && rc==SQLITE_OK ){
7147 int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */
7148 int nByte; /* Bytes of data */
7149 int iEnd;
7151 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte);
7152 iPgidxPrev += nByte;
7153 iOff = iPgidxPrev;
7155 if( iPgidxOff<n ){
7156 fts5GetVarint32(&a[iPgidxOff], nByte);
7157 iEnd = iPgidxPrev + nByte;
7158 }else{
7159 iEnd = szLeaf;
7161 if( iEnd>szLeaf ){
7162 rc = FTS5_CORRUPT;
7163 break;
7166 if( bFirst==0 ){
7167 iOff += fts5GetVarint32(&a[iOff], nByte);
7168 if( nByte>term.n ){
7169 rc = FTS5_CORRUPT;
7170 break;
7172 term.n = nByte;
7174 iOff += fts5GetVarint32(&a[iOff], nByte);
7175 if( iOff+nByte>n ){
7176 rc = FTS5_CORRUPT;
7177 break;
7179 fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
7180 iOff += nByte;
7182 sqlite3Fts5BufferAppendPrintf(
7183 &rc, &s, " term=%.*s", term.n, (const char*)term.p
7185 iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);
7188 fts5BufferFree(&term);
7191 decode_out:
7192 sqlite3_free(a);
7193 if( rc==SQLITE_OK ){
7194 sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT);
7195 }else{
7196 sqlite3_result_error_code(pCtx, rc);
7198 fts5BufferFree(&s);
7200 #endif /* SQLITE_TEST */
7202 #ifdef SQLITE_TEST
7204 ** The implementation of user-defined scalar function fts5_rowid().
7206 static void fts5RowidFunction(
7207 sqlite3_context *pCtx, /* Function call context */
7208 int nArg, /* Number of args (always 2) */
7209 sqlite3_value **apVal /* Function arguments */
7211 const char *zArg;
7212 if( nArg==0 ){
7213 sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1);
7214 }else{
7215 zArg = (const char*)sqlite3_value_text(apVal[0]);
7216 if( 0==sqlite3_stricmp(zArg, "segment") ){
7217 i64 iRowid;
7218 int segid, pgno;
7219 if( nArg!=3 ){
7220 sqlite3_result_error(pCtx,
7221 "should be: fts5_rowid('segment', segid, pgno))", -1
7223 }else{
7224 segid = sqlite3_value_int(apVal[1]);
7225 pgno = sqlite3_value_int(apVal[2]);
7226 iRowid = FTS5_SEGMENT_ROWID(segid, pgno);
7227 sqlite3_result_int64(pCtx, iRowid);
7229 }else{
7230 sqlite3_result_error(pCtx,
7231 "first arg to fts5_rowid() must be 'segment'" , -1
7236 #endif /* SQLITE_TEST */
7239 ** This is called as part of registering the FTS5 module with database
7240 ** connection db. It registers several user-defined scalar functions useful
7241 ** with FTS5.
7243 ** If successful, SQLITE_OK is returned. If an error occurs, some other
7244 ** SQLite error code is returned instead.
7246 int sqlite3Fts5IndexInit(sqlite3 *db){
7247 #ifdef SQLITE_TEST
7248 int rc = sqlite3_create_function(
7249 db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0
7252 if( rc==SQLITE_OK ){
7253 rc = sqlite3_create_function(
7254 db, "fts5_decode_none", 2,
7255 SQLITE_UTF8, (void*)db, fts5DecodeFunction, 0, 0
7259 if( rc==SQLITE_OK ){
7260 rc = sqlite3_create_function(
7261 db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0
7264 return rc;
7265 #else
7266 return SQLITE_OK;
7267 UNUSED_PARAM(db);
7268 #endif
7272 int sqlite3Fts5IndexReset(Fts5Index *p){
7273 assert( p->pStruct==0 || p->iStructVersion!=0 );
7274 if( fts5IndexDataVersion(p)!=p->iStructVersion ){
7275 fts5StructureInvalidate(p);
7277 return fts5IndexReturn(p);