4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
13 ** Low level access to the FTS index stored in the database file. The
14 ** routines in this file file implement all read and write access to the
15 ** %_data table. Other parts of the system access this functionality via
16 ** the interface defined in fts5Int.h.
25 ** The %_data table contains all the FTS indexes for an FTS5 virtual table.
26 ** As well as the main term index, there may be up to 31 prefix indexes.
27 ** The format is similar to FTS3/4, except that:
29 ** * all segment b-tree leaf data is stored in fixed size page records
30 ** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is
31 ** taken to ensure it is possible to iterate in either direction through
32 ** the entries in a doclist, or to seek to a specific entry within a
33 ** doclist, without loading it into memory.
35 ** * large doclists that span many pages have associated "doclist index"
36 ** records that contain a copy of the first rowid on each page spanned by
37 ** the doclist. This is used to speed up seek operations, and merges of
38 ** large doclists with very small doclists.
40 ** * extra fields in the "structure record" record the state of ongoing
41 ** incremental merge operations.
46 #define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */
47 #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */
49 #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */
51 #define FTS5_MAIN_PREFIX '0'
53 #if FTS5_MAX_PREFIX_INDEXES > 31
54 # error "FTS5_MAX_PREFIX_INDEXES is too large"
57 #define FTS5_MAX_LEVEL 64
62 ** The %_data table managed by this module,
64 ** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB);
66 ** , contains the following 5 types of records. See the comments surrounding
67 ** the FTS5_*_ROWID macros below for a description of how %_data rowids are
68 ** assigned to each fo them.
70 ** 1. Structure Records:
72 ** The set of segments that make up an index - the index structure - are
73 ** recorded in a single record within the %_data table. The record consists
74 ** of a single 32-bit configuration cookie value followed by a list of
75 ** SQLite varints. If the FTS table features more than one index (because
76 ** there are one or more prefix indexes), it is guaranteed that all share
77 ** the same cookie value.
79 ** Immediately following the configuration cookie, the record begins with
82 ** + number of levels,
83 ** + total number of segments on all levels,
84 ** + value of write counter.
86 ** Then, for each level from 0 to nMax:
88 ** + number of input segments in ongoing merge.
89 ** + total number of segments in level.
90 ** + for each segment from oldest to newest:
91 ** + segment id (always > 0)
92 ** + first leaf page number (often 1, always greater than 0)
93 ** + final leaf page number
95 ** 2. The Averages Record:
97 ** A single record within the %_data table. The data is a list of varints.
98 ** The first value is the number of rows in the index. Then, for each column
99 ** from left to right, the total number of tokens in the column for all
100 ** rows of the table.
102 ** 3. Segment leaves:
104 ** TERM/DOCLIST FORMAT:
106 ** Most of each segment leaf is taken up by term/doclist data. The
107 ** general format of term/doclist, starting with the first term
108 ** on the leaf page, is:
110 ** varint : size of first term
111 ** blob: first term data
112 ** doclist: first doclist
114 ** varint: number of bytes in common with previous term
115 ** varint: number of bytes of new term data (nNew)
116 ** blob: nNew bytes of new term data
117 ** doclist: next doclist
122 ** varint: first rowid
123 ** poslist: first poslist
125 ** varint: rowid delta (always > 0)
126 ** poslist: next poslist
131 ** varint: size of poslist in bytes multiplied by 2, not including
132 ** this field. Plus 1 if this entry carries the "delete" flag.
133 ** collist: collist for column 0
136 ** varint: column number (I)
137 ** collist: collist for column I
142 ** varint: first offset + 2
144 ** varint: offset delta + 2
149 ** Each leaf page begins with a 4-byte header containing 2 16-bit
150 ** unsigned integer fields in big-endian format. They are:
152 ** * The byte offset of the first rowid on the page, if it exists
153 ** and occurs before the first term (otherwise 0).
155 ** * The byte offset of the start of the page footer. If the page
156 ** footer is 0 bytes in size, then this field is the same as the
157 ** size of the leaf page in bytes.
159 ** The page footer consists of a single varint for each term located
160 ** on the page. Each varint is the byte offset of the current term
161 ** within the page, delta-compressed against the previous value. In
162 ** other words, the first varint in the footer is the byte offset of
163 ** the first term, the second is the byte offset of the second less that
164 ** of the first, and so on.
166 ** The term/doclist format described above is accurate if the entire
167 ** term/doclist data fits on a single leaf page. If this is not the case,
168 ** the format is changed in two ways:
170 ** + if the first rowid on a page occurs before the first term, it
171 ** is stored as a literal value:
173 ** varint: first rowid
175 ** + the first term on each page is stored in the same way as the
176 ** very first term of the segment:
178 ** varint : size of first term
179 ** blob: first term data
181 ** 5. Segment doclist indexes:
183 ** Doclist indexes are themselves b-trees, however they usually consist of
184 ** a single leaf record only. The format of each doclist index leaf page
187 ** * Flags byte. Bits are:
188 ** 0x01: Clear if leaf is also the root page, otherwise set.
190 ** * Page number of fts index leaf page. As a varint.
192 ** * First rowid on page indicated by previous field. As a varint.
194 ** * A list of varints, one for each subsequent termless page. A
195 ** positive delta if the termless page contains at least one rowid,
196 ** or an 0x00 byte otherwise.
198 ** Internal doclist index nodes are:
200 ** * Flags byte. Bits are:
201 ** 0x01: Clear for root page, otherwise set.
203 ** * Page number of first child page. As a varint.
205 ** * Copy of first rowid on page indicated by previous field. As a varint.
207 ** * A list of delta-encoded varints - the first rowid on each subsequent
213 ** Rowids for the averages and structure records in the %_data table.
215 #define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */
216 #define FTS5_STRUCTURE_ROWID 10 /* The structure record */
219 ** Macros determining the rowids used by segment leaves and dlidx leaves
220 ** and nodes. All nodes and leaves are stored in the %_data table with large
223 ** Each segment has a unique non-zero 16-bit id.
225 ** The rowid for each segment leaf is found by passing the segment id and
226 ** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered
227 ** sequentially starting from 1.
229 #define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */
230 #define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */
231 #define FTS5_DATA_HEIGHT_B 5 /* Max dlidx tree height of 32 */
232 #define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */
234 #define fts5_dri(segid, dlidx, height, pgno) ( \
235 ((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \
236 ((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \
237 ((i64)(height) << (FTS5_DATA_PAGE_B)) + \
241 #define FTS5_SEGMENT_ROWID(segid, pgno) fts5_dri(segid, 0, 0, pgno)
242 #define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)
245 int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB
; }
250 ** Each time a blob is read from the %_data table, it is padded with this
251 ** many zero bytes. This makes it easier to decode the various record formats
252 ** without overreading if the records are corrupt.
254 #define FTS5_DATA_ZERO_PADDING 8
255 #define FTS5_DATA_PADDING 20
257 typedef struct Fts5Data Fts5Data
;
258 typedef struct Fts5DlidxIter Fts5DlidxIter
;
259 typedef struct Fts5DlidxLvl Fts5DlidxLvl
;
260 typedef struct Fts5DlidxWriter Fts5DlidxWriter
;
261 typedef struct Fts5Iter Fts5Iter
;
262 typedef struct Fts5PageWriter Fts5PageWriter
;
263 typedef struct Fts5SegIter Fts5SegIter
;
264 typedef struct Fts5DoclistIter Fts5DoclistIter
;
265 typedef struct Fts5SegWriter Fts5SegWriter
;
266 typedef struct Fts5Structure Fts5Structure
;
267 typedef struct Fts5StructureLevel Fts5StructureLevel
;
268 typedef struct Fts5StructureSegment Fts5StructureSegment
;
271 u8
*p
; /* Pointer to buffer containing record */
272 int nn
; /* Size of record in bytes */
273 int szLeaf
; /* Size of leaf without page-index */
277 ** One object per %_data table.
280 Fts5Config
*pConfig
; /* Virtual table configuration */
281 char *zDataTbl
; /* Name of %_data table */
282 int nWorkUnit
; /* Leaf pages in a "unit" of work */
285 ** Variables related to the accumulation of tokens and doclists within the
286 ** in-memory hash tables before they are flushed to disk.
288 Fts5Hash
*pHash
; /* Hash table for in-memory data */
289 int nPendingData
; /* Current bytes of pending data */
290 i64 iWriteRowid
; /* Rowid for current doc being written */
291 int bDelete
; /* Current write is a delete */
294 int rc
; /* Current error code */
296 /* State used by the fts5DataXXX() functions. */
297 sqlite3_blob
*pReader
; /* RO incr-blob open on %_data table */
298 sqlite3_stmt
*pWriter
; /* "INSERT ... %_data VALUES(?,?)" */
299 sqlite3_stmt
*pDeleter
; /* "DELETE FROM %_data ... id>=? AND id<=?" */
300 sqlite3_stmt
*pIdxWriter
; /* "INSERT ... %_idx VALUES(?,?,?,?)" */
301 sqlite3_stmt
*pIdxDeleter
; /* "DELETE FROM %_idx WHERE segid=?" */
302 sqlite3_stmt
*pIdxSelect
;
303 int nRead
; /* Total number of blocks read */
305 sqlite3_stmt
*pDeleteFromIdx
;
307 sqlite3_stmt
*pDataVersion
;
308 i64 iStructVersion
; /* data_version when pStruct read */
309 Fts5Structure
*pStruct
; /* Current db structure (or NULL) */
312 struct Fts5DoclistIter
{
313 u8
*aEof
; /* Pointer to 1 byte past end of doclist */
315 /* Output variables. aPoslist==0 at EOF */
323 ** The contents of the "structure" record for each index are represented
324 ** using an Fts5Structure record in memory. Which uses instances of the
325 ** other Fts5StructureXXX types as components.
327 struct Fts5StructureSegment
{
328 int iSegid
; /* Segment id */
329 int pgnoFirst
; /* First leaf page number in segment */
330 int pgnoLast
; /* Last leaf page number in segment */
332 struct Fts5StructureLevel
{
333 int nMerge
; /* Number of segments in incr-merge */
334 int nSeg
; /* Total number of segments on level */
335 Fts5StructureSegment
*aSeg
; /* Array of segments. aSeg[0] is oldest. */
337 struct Fts5Structure
{
338 int nRef
; /* Object reference count */
339 u64 nWriteCounter
; /* Total leaves written to level 0 */
340 int nSegment
; /* Total segments in this structure */
341 int nLevel
; /* Number of levels in this index */
342 Fts5StructureLevel aLevel
[1]; /* Array of nLevel level objects */
346 ** An object of type Fts5SegWriter is used to write to segments.
348 struct Fts5PageWriter
{
349 int pgno
; /* Page number for this page */
350 int iPrevPgidx
; /* Previous value written into pgidx */
351 Fts5Buffer buf
; /* Buffer containing leaf data */
352 Fts5Buffer pgidx
; /* Buffer containing page-index */
353 Fts5Buffer term
; /* Buffer containing previous term on page */
355 struct Fts5DlidxWriter
{
356 int pgno
; /* Page number for this page */
357 int bPrevValid
; /* True if iPrev is valid */
358 i64 iPrev
; /* Previous rowid value written to page */
359 Fts5Buffer buf
; /* Buffer containing page data */
361 struct Fts5SegWriter
{
362 int iSegid
; /* Segid to write to */
363 Fts5PageWriter writer
; /* PageWriter object */
364 i64 iPrevRowid
; /* Previous rowid written to current leaf */
365 u8 bFirstRowidInDoclist
; /* True if next rowid is first in doclist */
366 u8 bFirstRowidInPage
; /* True if next rowid is first in page */
367 /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
368 u8 bFirstTermInPage
; /* True if next term will be first in leaf */
369 int nLeafWritten
; /* Number of leaf pages written */
370 int nEmpty
; /* Number of contiguous term-less nodes */
372 int nDlidx
; /* Allocated size of aDlidx[] array */
373 Fts5DlidxWriter
*aDlidx
; /* Array of Fts5DlidxWriter objects */
375 /* Values to insert into the %_idx table */
376 Fts5Buffer btterm
; /* Next term to insert into %_idx table */
377 int iBtPage
; /* Page number corresponding to btterm */
380 typedef struct Fts5CResult Fts5CResult
;
382 u16 iFirst
; /* aSeg[] index of firstest iterator */
383 u8 bTermEq
; /* True if the terms are equal */
387 ** Object for iterating through a single segment, visiting each term/rowid
388 ** pair in the segment.
391 ** The segment to iterate through.
394 ** Current leaf page number within segment.
397 ** Byte offset within the current leaf that is the first byte of the
398 ** position list data (one byte passed the position-list size field).
401 ** Buffer containing current leaf page data. Set to NULL at EOF.
403 ** iTermLeafPgno, iTermLeafOffset:
404 ** Leaf page number containing the last term read from the segment. And
405 ** the offset immediately following the term data.
408 ** Mask of FTS5_SEGITER_XXX values. Interpreted as follows:
410 ** FTS5_SEGITER_ONETERM:
411 ** If set, set the iterator to point to EOF after the current doclist
412 ** has been exhausted. Do not proceed to the next term in the segment.
414 ** FTS5_SEGITER_REVERSE:
415 ** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If
416 ** it is set, iterate through rowid in descending order instead of the
417 ** default ascending order.
419 ** iRowidOffset/nRowidOffset/aRowidOffset:
420 ** These are used if the FTS5_SEGITER_REVERSE flag is set.
422 ** For each rowid on the page corresponding to the current term, the
423 ** corresponding aRowidOffset[] entry is set to the byte offset of the
424 ** start of the "position-list-size" field within the page.
427 ** Index of current term on iTermLeafPgno.
430 Fts5StructureSegment
*pSeg
; /* Segment to iterate through */
431 int flags
; /* Mask of configuration flags */
432 int iLeafPgno
; /* Current leaf page number */
433 Fts5Data
*pLeaf
; /* Current leaf data */
434 Fts5Data
*pNextLeaf
; /* Leaf page (iLeafPgno+1) */
435 i64 iLeafOffset
; /* Byte offset within current leaf */
438 void (*xNext
)(Fts5Index
*, Fts5SegIter
*, int*);
440 /* The page and offset from which the current term was read. The offset
441 ** is the offset of the first rowid in the current doclist. */
445 int iPgidxOff
; /* Next offset in pgidx */
448 /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
449 int iRowidOffset
; /* Current entry in aRowidOffset[] */
450 int nRowidOffset
; /* Allocated size of aRowidOffset[] array */
451 int *aRowidOffset
; /* Array of offset to rowid fields */
453 Fts5DlidxIter
*pDlidx
; /* If there is a doclist-index */
455 /* Variables populated based on current entry. */
456 Fts5Buffer term
; /* Current term */
457 i64 iRowid
; /* Current rowid */
458 int nPos
; /* Number of bytes in current position list */
459 u8 bDel
; /* True if the delete flag is set */
463 ** Argument is a pointer to an Fts5Data structure that contains a
466 #define ASSERT_SZLEAF_OK(x) assert( \
467 (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \
470 #define FTS5_SEGITER_ONETERM 0x01
471 #define FTS5_SEGITER_REVERSE 0x02
474 ** Argument is a pointer to an Fts5Data structure that contains a leaf
475 ** page. This macro evaluates to true if the leaf contains no terms, or
476 ** false if it contains at least one term.
478 #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)
480 #define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))
482 #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))
485 ** Object for iterating through the merged results of one or more segments,
486 ** visiting each term/rowid pair in the merged data.
488 ** nSeg is always a power of two greater than or equal to the number of
489 ** segments that this object is merging data from. Both the aSeg[] and
490 ** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded
491 ** with zeroed objects - these are handled as if they were iterators opened
492 ** on empty segments.
494 ** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an
495 ** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the
496 ** comparison in this context is the index of the iterator that currently
497 ** points to the smaller term/rowid combination. Iterators at EOF are
498 ** considered to be greater than all other iterators.
500 ** aFirst[1] contains the index in aSeg[] of the iterator that points to
501 ** the smallest key overall. aFirst[0] is unused.
504 ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
505 ** There is no way to tell if this is populated or not.
508 Fts5IndexIter base
; /* Base class containing output vars */
510 Fts5Index
*pIndex
; /* Index that owns this iterator */
511 Fts5Buffer poslist
; /* Buffer containing current poslist */
512 Fts5Colset
*pColset
; /* Restrict matches to these columns */
514 /* Invoked to set output variables. */
515 void (*xSetOutputs
)(Fts5Iter
*, Fts5SegIter
*);
517 int nSeg
; /* Size of aSeg[] array */
518 int bRev
; /* True to iterate in reverse order */
519 u8 bSkipEmpty
; /* True to skip deleted entries */
521 i64 iSwitchRowid
; /* Firstest rowid of other than aFirst[1] */
522 Fts5CResult
*aFirst
; /* Current merge state (see above) */
523 Fts5SegIter aSeg
[1]; /* Array of segment iterators */
528 ** An instance of the following type is used to iterate through the contents
529 ** of a doclist-index record.
532 ** Record containing the doclist-index data.
535 ** Set to true once iterator has reached EOF.
538 ** Set to the current offset within record pData.
540 struct Fts5DlidxLvl
{
541 Fts5Data
*pData
; /* Data for current page of this level */
542 int iOff
; /* Current offset into pData */
543 int bEof
; /* At EOF already */
544 int iFirstOff
; /* Used by reverse iterators */
546 /* Output variables */
547 int iLeafPgno
; /* Page number of current leaf page */
548 i64 iRowid
; /* First rowid on leaf iLeafPgno */
550 struct Fts5DlidxIter
{
553 Fts5DlidxLvl aLvl
[1];
556 static void fts5PutU16(u8
*aOut
, u16 iVal
){
558 aOut
[1] = (iVal
&0xFF);
561 static u16
fts5GetU16(const u8
*aIn
){
562 return ((u16
)aIn
[0] << 8) + aIn
[1];
566 ** Allocate and return a buffer at least nByte bytes in size.
568 ** If an OOM error is encountered, return NULL and set the error code in
569 ** the Fts5Index handle passed as the first argument.
571 static void *fts5IdxMalloc(Fts5Index
*p
, sqlite3_int64 nByte
){
572 return sqlite3Fts5MallocZero(&p
->rc
, nByte
);
576 ** Compare the contents of the pLeft buffer with the pRight/nRight blob.
578 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
579 ** +ve if pRight is smaller than pLeft. In other words:
581 ** res = *pLeft - *pRight
584 static int fts5BufferCompareBlob(
585 Fts5Buffer
*pLeft
, /* Left hand side of comparison */
586 const u8
*pRight
, int nRight
/* Right hand side of comparison */
588 int nCmp
= MIN(pLeft
->n
, nRight
);
589 int res
= memcmp(pLeft
->p
, pRight
, nCmp
);
590 return (res
==0 ? (pLeft
->n
- nRight
) : res
);
595 ** Compare the contents of the two buffers using memcmp(). If one buffer
596 ** is a prefix of the other, it is considered the lesser.
598 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
599 ** +ve if pRight is smaller than pLeft. In other words:
601 ** res = *pLeft - *pRight
603 static int fts5BufferCompare(Fts5Buffer
*pLeft
, Fts5Buffer
*pRight
){
605 nCmp
= MIN(pLeft
->n
, pRight
->n
);
606 assert( nCmp
<=0 || pLeft
->p
!=0 );
607 assert( nCmp
<=0 || pRight
->p
!=0 );
608 res
= fts5Memcmp(pLeft
->p
, pRight
->p
, nCmp
);
609 return (res
==0 ? (pLeft
->n
- pRight
->n
) : res
);
612 static int fts5LeafFirstTermOff(Fts5Data
*pLeaf
){
614 fts5GetVarint32(&pLeaf
->p
[pLeaf
->szLeaf
], ret
);
619 ** Close the read-only blob handle, if it is open.
621 void sqlite3Fts5IndexCloseReader(Fts5Index
*p
){
623 sqlite3_blob
*pReader
= p
->pReader
;
625 sqlite3_blob_close(pReader
);
630 ** Retrieve a record from the %_data table.
632 ** If an error occurs, NULL is returned and an error left in the
635 static Fts5Data
*fts5DataRead(Fts5Index
*p
, i64 iRowid
){
637 if( p
->rc
==SQLITE_OK
){
641 /* This call may return SQLITE_ABORT if there has been a savepoint
642 ** rollback since it was last used. In this case a new blob handle
644 sqlite3_blob
*pBlob
= p
->pReader
;
646 rc
= sqlite3_blob_reopen(pBlob
, iRowid
);
647 assert( p
->pReader
==0 );
650 sqlite3Fts5IndexCloseReader(p
);
652 if( rc
==SQLITE_ABORT
) rc
= SQLITE_OK
;
655 /* If the blob handle is not open at this point, open it and seek
656 ** to the requested entry. */
657 if( p
->pReader
==0 && rc
==SQLITE_OK
){
658 Fts5Config
*pConfig
= p
->pConfig
;
659 rc
= sqlite3_blob_open(pConfig
->db
,
660 pConfig
->zDb
, p
->zDataTbl
, "block", iRowid
, 0, &p
->pReader
664 /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls
665 ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead.
666 ** All the reasons those functions might return SQLITE_ERROR - missing
667 ** table, missing row, non-blob/text in block column - indicate
668 ** backing store corruption. */
669 if( rc
==SQLITE_ERROR
) rc
= FTS5_CORRUPT
;
672 u8
*aOut
= 0; /* Read blob data into this buffer */
673 int nByte
= sqlite3_blob_bytes(p
->pReader
);
674 sqlite3_int64 nAlloc
= sizeof(Fts5Data
) + nByte
+ FTS5_DATA_PADDING
;
675 pRet
= (Fts5Data
*)sqlite3_malloc64(nAlloc
);
678 aOut
= pRet
->p
= (u8
*)&pRet
[1];
684 rc
= sqlite3_blob_read(p
->pReader
, aOut
, nByte
, 0);
690 /* TODO1: Fix this */
691 pRet
->p
[nByte
] = 0x00;
692 pRet
->p
[nByte
+1] = 0x00;
693 pRet
->szLeaf
= fts5GetU16(&pRet
->p
[2]);
700 assert( (pRet
==0)==(p
->rc
!=SQLITE_OK
) );
706 ** Release a reference to data record returned by an earlier call to
709 static void fts5DataRelease(Fts5Data
*pData
){
713 static Fts5Data
*fts5LeafRead(Fts5Index
*p
, i64 iRowid
){
714 Fts5Data
*pRet
= fts5DataRead(p
, iRowid
);
716 if( pRet
->nn
<4 || pRet
->szLeaf
>pRet
->nn
){
717 p
->rc
= FTS5_CORRUPT
;
718 fts5DataRelease(pRet
);
725 static int fts5IndexPrepareStmt(
727 sqlite3_stmt
**ppStmt
,
730 if( p
->rc
==SQLITE_OK
){
732 p
->rc
= sqlite3_prepare_v3(p
->pConfig
->db
, zSql
, -1,
733 SQLITE_PREPARE_PERSISTENT
|SQLITE_PREPARE_NO_VTAB
,
736 p
->rc
= SQLITE_NOMEM
;
745 ** INSERT OR REPLACE a record into the %_data table.
747 static void fts5DataWrite(Fts5Index
*p
, i64 iRowid
, const u8
*pData
, int nData
){
748 if( p
->rc
!=SQLITE_OK
) return;
751 Fts5Config
*pConfig
= p
->pConfig
;
752 fts5IndexPrepareStmt(p
, &p
->pWriter
, sqlite3_mprintf(
753 "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)",
754 pConfig
->zDb
, pConfig
->zName
759 sqlite3_bind_int64(p
->pWriter
, 1, iRowid
);
760 sqlite3_bind_blob(p
->pWriter
, 2, pData
, nData
, SQLITE_STATIC
);
761 sqlite3_step(p
->pWriter
);
762 p
->rc
= sqlite3_reset(p
->pWriter
);
763 sqlite3_bind_null(p
->pWriter
, 2);
767 ** Execute the following SQL:
769 ** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast
771 static void fts5DataDelete(Fts5Index
*p
, i64 iFirst
, i64 iLast
){
772 if( p
->rc
!=SQLITE_OK
) return;
774 if( p
->pDeleter
==0 ){
775 Fts5Config
*pConfig
= p
->pConfig
;
776 char *zSql
= sqlite3_mprintf(
777 "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?",
778 pConfig
->zDb
, pConfig
->zName
780 if( fts5IndexPrepareStmt(p
, &p
->pDeleter
, zSql
) ) return;
783 sqlite3_bind_int64(p
->pDeleter
, 1, iFirst
);
784 sqlite3_bind_int64(p
->pDeleter
, 2, iLast
);
785 sqlite3_step(p
->pDeleter
);
786 p
->rc
= sqlite3_reset(p
->pDeleter
);
790 ** Remove all records associated with segment iSegid.
792 static void fts5DataRemoveSegment(Fts5Index
*p
, int iSegid
){
793 i64 iFirst
= FTS5_SEGMENT_ROWID(iSegid
, 0);
794 i64 iLast
= FTS5_SEGMENT_ROWID(iSegid
+1, 0)-1;
795 fts5DataDelete(p
, iFirst
, iLast
);
796 if( p
->pIdxDeleter
==0 ){
797 Fts5Config
*pConfig
= p
->pConfig
;
798 fts5IndexPrepareStmt(p
, &p
->pIdxDeleter
, sqlite3_mprintf(
799 "DELETE FROM '%q'.'%q_idx' WHERE segid=?",
800 pConfig
->zDb
, pConfig
->zName
803 if( p
->rc
==SQLITE_OK
){
804 sqlite3_bind_int(p
->pIdxDeleter
, 1, iSegid
);
805 sqlite3_step(p
->pIdxDeleter
);
806 p
->rc
= sqlite3_reset(p
->pIdxDeleter
);
811 ** Release a reference to an Fts5Structure object returned by an earlier
812 ** call to fts5StructureRead() or fts5StructureDecode().
814 static void fts5StructureRelease(Fts5Structure
*pStruct
){
815 if( pStruct
&& 0>=(--pStruct
->nRef
) ){
817 assert( pStruct
->nRef
==0 );
818 for(i
=0; i
<pStruct
->nLevel
; i
++){
819 sqlite3_free(pStruct
->aLevel
[i
].aSeg
);
821 sqlite3_free(pStruct
);
825 static void fts5StructureRef(Fts5Structure
*pStruct
){
829 void *sqlite3Fts5StructureRef(Fts5Index
*p
){
830 fts5StructureRef(p
->pStruct
);
831 return (void*)p
->pStruct
;
833 void sqlite3Fts5StructureRelease(void *p
){
835 fts5StructureRelease((Fts5Structure
*)p
);
838 int sqlite3Fts5StructureTest(Fts5Index
*p
, void *pStruct
){
839 if( p
->pStruct
!=(Fts5Structure
*)pStruct
){
846 ** Ensure that structure object (*pp) is writable.
848 ** This function is a no-op if (*pRc) is not SQLITE_OK when it is called. If
849 ** an error occurs, (*pRc) is set to an SQLite error code before returning.
851 static void fts5StructureMakeWritable(int *pRc
, Fts5Structure
**pp
){
852 Fts5Structure
*p
= *pp
;
853 if( *pRc
==SQLITE_OK
&& p
->nRef
>1 ){
854 i64 nByte
= sizeof(Fts5Structure
)+(p
->nLevel
-1)*sizeof(Fts5StructureLevel
);
856 pNew
= (Fts5Structure
*)sqlite3Fts5MallocZero(pRc
, nByte
);
859 memcpy(pNew
, p
, nByte
);
860 for(i
=0; i
<p
->nLevel
; i
++) pNew
->aLevel
[i
].aSeg
= 0;
861 for(i
=0; i
<p
->nLevel
; i
++){
862 Fts5StructureLevel
*pLvl
= &pNew
->aLevel
[i
];
863 nByte
= sizeof(Fts5StructureSegment
) * pNew
->aLevel
[i
].nSeg
;
864 pLvl
->aSeg
= (Fts5StructureSegment
*)sqlite3Fts5MallocZero(pRc
, nByte
);
866 for(i
=0; i
<p
->nLevel
; i
++){
867 sqlite3_free(pNew
->aLevel
[i
].aSeg
);
872 memcpy(pLvl
->aSeg
, p
->aLevel
[i
].aSeg
, nByte
);
882 ** Deserialize and return the structure record currently stored in serialized
883 ** form within buffer pData/nData.
885 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
886 ** are over-allocated by one slot. This allows the structure contents
887 ** to be more easily edited.
889 ** If an error occurs, *ppOut is set to NULL and an SQLite error code
890 ** returned. Otherwise, *ppOut is set to point to the new object and
891 ** SQLITE_OK returned.
893 static int fts5StructureDecode(
894 const u8
*pData
, /* Buffer containing serialized structure */
895 int nData
, /* Size of buffer pData in bytes */
896 int *piCookie
, /* Configuration cookie value */
897 Fts5Structure
**ppOut
/* OUT: Deserialized object */
904 sqlite3_int64 nByte
; /* Bytes of space to allocate at pRet */
905 Fts5Structure
*pRet
= 0; /* Structure object to return */
907 /* Grab the cookie value */
908 if( piCookie
) *piCookie
= sqlite3Fts5Get32(pData
);
911 /* Read the total number of levels and segments from the start of the
912 ** structure record. */
913 i
+= fts5GetVarint32(&pData
[i
], nLevel
);
914 i
+= fts5GetVarint32(&pData
[i
], nSegment
);
915 if( nLevel
>FTS5_MAX_SEGMENT
|| nLevel
<0
916 || nSegment
>FTS5_MAX_SEGMENT
|| nSegment
<0
921 sizeof(Fts5Structure
) + /* Main structure */
922 sizeof(Fts5StructureLevel
) * (nLevel
-1) /* aLevel[] array */
924 pRet
= (Fts5Structure
*)sqlite3Fts5MallocZero(&rc
, nByte
);
928 pRet
->nLevel
= nLevel
;
929 pRet
->nSegment
= nSegment
;
930 i
+= sqlite3Fts5GetVarint(&pData
[i
], &pRet
->nWriteCounter
);
932 for(iLvl
=0; rc
==SQLITE_OK
&& iLvl
<nLevel
; iLvl
++){
933 Fts5StructureLevel
*pLvl
= &pRet
->aLevel
[iLvl
];
940 i
+= fts5GetVarint32(&pData
[i
], pLvl
->nMerge
);
941 i
+= fts5GetVarint32(&pData
[i
], nTotal
);
942 if( nTotal
<pLvl
->nMerge
) rc
= FTS5_CORRUPT
;
943 pLvl
->aSeg
= (Fts5StructureSegment
*)sqlite3Fts5MallocZero(&rc
,
944 nTotal
* sizeof(Fts5StructureSegment
)
951 for(iSeg
=0; iSeg
<nTotal
; iSeg
++){
952 Fts5StructureSegment
*pSeg
= &pLvl
->aSeg
[iSeg
];
958 i
+= fts5GetVarint32(&pData
[i
], pSeg
->iSegid
);
959 i
+= fts5GetVarint32(&pData
[i
], pSeg
->pgnoFirst
);
960 i
+= fts5GetVarint32(&pData
[i
], pSeg
->pgnoLast
);
961 if( pSeg
->pgnoLast
<pSeg
->pgnoFirst
){
966 if( iLvl
>0 && pLvl
[-1].nMerge
&& nTotal
==0 ) rc
= FTS5_CORRUPT
;
967 if( iLvl
==nLevel
-1 && pLvl
->nMerge
) rc
= FTS5_CORRUPT
;
970 if( nSegment
!=0 && rc
==SQLITE_OK
) rc
= FTS5_CORRUPT
;
973 fts5StructureRelease(pRet
);
983 ** Add a level to the Fts5Structure.aLevel[] array of structure object
986 static void fts5StructureAddLevel(int *pRc
, Fts5Structure
**ppStruct
){
987 fts5StructureMakeWritable(pRc
, ppStruct
);
988 assert( (ppStruct
!=0 && (*ppStruct
)!=0) || (*pRc
)!=SQLITE_OK
);
989 if( *pRc
==SQLITE_OK
){
990 Fts5Structure
*pStruct
= *ppStruct
;
991 int nLevel
= pStruct
->nLevel
;
992 sqlite3_int64 nByte
= (
993 sizeof(Fts5Structure
) + /* Main structure */
994 sizeof(Fts5StructureLevel
) * (nLevel
+1) /* aLevel[] array */
997 pStruct
= sqlite3_realloc64(pStruct
, nByte
);
999 memset(&pStruct
->aLevel
[nLevel
], 0, sizeof(Fts5StructureLevel
));
1001 *ppStruct
= pStruct
;
1003 *pRc
= SQLITE_NOMEM
;
1009 ** Extend level iLvl so that there is room for at least nExtra more
1012 static void fts5StructureExtendLevel(
1014 Fts5Structure
*pStruct
,
1019 if( *pRc
==SQLITE_OK
){
1020 Fts5StructureLevel
*pLvl
= &pStruct
->aLevel
[iLvl
];
1021 Fts5StructureSegment
*aNew
;
1022 sqlite3_int64 nByte
;
1024 nByte
= (pLvl
->nSeg
+ nExtra
) * sizeof(Fts5StructureSegment
);
1025 aNew
= sqlite3_realloc64(pLvl
->aSeg
, nByte
);
1028 memset(&aNew
[pLvl
->nSeg
], 0, sizeof(Fts5StructureSegment
) * nExtra
);
1030 int nMove
= pLvl
->nSeg
* sizeof(Fts5StructureSegment
);
1031 memmove(&aNew
[nExtra
], aNew
, nMove
);
1032 memset(aNew
, 0, sizeof(Fts5StructureSegment
) * nExtra
);
1036 *pRc
= SQLITE_NOMEM
;
1041 static Fts5Structure
*fts5StructureReadUncached(Fts5Index
*p
){
1042 Fts5Structure
*pRet
= 0;
1043 Fts5Config
*pConfig
= p
->pConfig
;
1044 int iCookie
; /* Configuration cookie */
1047 pData
= fts5DataRead(p
, FTS5_STRUCTURE_ROWID
);
1048 if( p
->rc
==SQLITE_OK
){
1049 /* TODO: Do we need this if the leaf-index is appended? Probably... */
1050 memset(&pData
->p
[pData
->nn
], 0, FTS5_DATA_PADDING
);
1051 p
->rc
= fts5StructureDecode(pData
->p
, pData
->nn
, &iCookie
, &pRet
);
1052 if( p
->rc
==SQLITE_OK
&& (pConfig
->pgsz
==0 || pConfig
->iCookie
!=iCookie
) ){
1053 p
->rc
= sqlite3Fts5ConfigLoad(pConfig
, iCookie
);
1055 fts5DataRelease(pData
);
1056 if( p
->rc
!=SQLITE_OK
){
1057 fts5StructureRelease(pRet
);
1065 static i64
fts5IndexDataVersion(Fts5Index
*p
){
1068 if( p
->rc
==SQLITE_OK
){
1069 if( p
->pDataVersion
==0 ){
1070 p
->rc
= fts5IndexPrepareStmt(p
, &p
->pDataVersion
,
1071 sqlite3_mprintf("PRAGMA %Q.data_version", p
->pConfig
->zDb
)
1073 if( p
->rc
) return 0;
1076 if( SQLITE_ROW
==sqlite3_step(p
->pDataVersion
) ){
1077 iVersion
= sqlite3_column_int64(p
->pDataVersion
, 0);
1079 p
->rc
= sqlite3_reset(p
->pDataVersion
);
1086 ** Read, deserialize and return the structure record.
1088 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
1089 ** are over-allocated as described for function fts5StructureDecode()
1092 ** If an error occurs, NULL is returned and an error code left in the
1093 ** Fts5Index handle. If an error has already occurred when this function
1094 ** is called, it is a no-op.
1096 static Fts5Structure
*fts5StructureRead(Fts5Index
*p
){
1098 if( p
->pStruct
==0 ){
1099 p
->iStructVersion
= fts5IndexDataVersion(p
);
1100 if( p
->rc
==SQLITE_OK
){
1101 p
->pStruct
= fts5StructureReadUncached(p
);
1107 Fts5Structure
*pTest
= fts5StructureReadUncached(p
);
1110 assert_nc( p
->pStruct
->nSegment
==pTest
->nSegment
);
1111 assert_nc( p
->pStruct
->nLevel
==pTest
->nLevel
);
1112 for(i
=0; i
<pTest
->nLevel
; i
++){
1113 assert_nc( p
->pStruct
->aLevel
[i
].nMerge
==pTest
->aLevel
[i
].nMerge
);
1114 assert_nc( p
->pStruct
->aLevel
[i
].nSeg
==pTest
->aLevel
[i
].nSeg
);
1115 for(j
=0; j
<pTest
->aLevel
[i
].nSeg
; j
++){
1116 Fts5StructureSegment
*p1
= &pTest
->aLevel
[i
].aSeg
[j
];
1117 Fts5StructureSegment
*p2
= &p
->pStruct
->aLevel
[i
].aSeg
[j
];
1118 assert_nc( p1
->iSegid
==p2
->iSegid
);
1119 assert_nc( p1
->pgnoFirst
==p2
->pgnoFirst
);
1120 assert_nc( p1
->pgnoLast
==p2
->pgnoLast
);
1123 fts5StructureRelease(pTest
);
1128 if( p
->rc
!=SQLITE_OK
) return 0;
1129 assert( p
->iStructVersion
!=0 );
1130 assert( p
->pStruct
!=0 );
1131 fts5StructureRef(p
->pStruct
);
1135 static void fts5StructureInvalidate(Fts5Index
*p
){
1137 fts5StructureRelease(p
->pStruct
);
1143 ** Return the total number of segments in index structure pStruct. This
1144 ** function is only ever used as part of assert() conditions.
1147 static int fts5StructureCountSegments(Fts5Structure
*pStruct
){
1148 int nSegment
= 0; /* Total number of segments */
1150 int iLvl
; /* Used to iterate through levels */
1151 for(iLvl
=0; iLvl
<pStruct
->nLevel
; iLvl
++){
1152 nSegment
+= pStruct
->aLevel
[iLvl
].nSeg
;
1160 #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \
1161 assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) ); \
1162 memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob); \
1163 (pBuf)->n += nBlob; \
1166 #define fts5BufferSafeAppendVarint(pBuf, iVal) { \
1167 (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal)); \
1168 assert( (pBuf)->nSpace>=(pBuf)->n ); \
1173 ** Serialize and store the "structure" record.
1175 ** If an error occurs, leave an error code in the Fts5Index object. If an
1176 ** error has already occurred, this function is a no-op.
1178 static void fts5StructureWrite(Fts5Index
*p
, Fts5Structure
*pStruct
){
1179 if( p
->rc
==SQLITE_OK
){
1180 Fts5Buffer buf
; /* Buffer to serialize record into */
1181 int iLvl
; /* Used to iterate through levels */
1182 int iCookie
; /* Cookie value to store */
1184 assert( pStruct
->nSegment
==fts5StructureCountSegments(pStruct
) );
1185 memset(&buf
, 0, sizeof(Fts5Buffer
));
1187 /* Append the current configuration cookie */
1188 iCookie
= p
->pConfig
->iCookie
;
1189 if( iCookie
<0 ) iCookie
= 0;
1191 if( 0==sqlite3Fts5BufferSize(&p
->rc
, &buf
, 4+9+9+9) ){
1192 sqlite3Fts5Put32(buf
.p
, iCookie
);
1194 fts5BufferSafeAppendVarint(&buf
, pStruct
->nLevel
);
1195 fts5BufferSafeAppendVarint(&buf
, pStruct
->nSegment
);
1196 fts5BufferSafeAppendVarint(&buf
, (i64
)pStruct
->nWriteCounter
);
1199 for(iLvl
=0; iLvl
<pStruct
->nLevel
; iLvl
++){
1200 int iSeg
; /* Used to iterate through segments */
1201 Fts5StructureLevel
*pLvl
= &pStruct
->aLevel
[iLvl
];
1202 fts5BufferAppendVarint(&p
->rc
, &buf
, pLvl
->nMerge
);
1203 fts5BufferAppendVarint(&p
->rc
, &buf
, pLvl
->nSeg
);
1204 assert( pLvl
->nMerge
<=pLvl
->nSeg
);
1206 for(iSeg
=0; iSeg
<pLvl
->nSeg
; iSeg
++){
1207 fts5BufferAppendVarint(&p
->rc
, &buf
, pLvl
->aSeg
[iSeg
].iSegid
);
1208 fts5BufferAppendVarint(&p
->rc
, &buf
, pLvl
->aSeg
[iSeg
].pgnoFirst
);
1209 fts5BufferAppendVarint(&p
->rc
, &buf
, pLvl
->aSeg
[iSeg
].pgnoLast
);
1213 fts5DataWrite(p
, FTS5_STRUCTURE_ROWID
, buf
.p
, buf
.n
);
1214 fts5BufferFree(&buf
);
1219 static void fts5DebugStructure(int*,Fts5Buffer
*,Fts5Structure
*);
1220 static void fts5PrintStructure(const char *zCaption
, Fts5Structure
*pStruct
){
1223 memset(&buf
, 0, sizeof(buf
));
1224 fts5DebugStructure(&rc
, &buf
, pStruct
);
1225 fprintf(stdout
, "%s: %s\n", zCaption
, buf
.p
);
1227 fts5BufferFree(&buf
);
1230 # define fts5PrintStructure(x,y)
1233 static int fts5SegmentSize(Fts5StructureSegment
*pSeg
){
1234 return 1 + pSeg
->pgnoLast
- pSeg
->pgnoFirst
;
1238 ** Return a copy of index structure pStruct. Except, promote as many
1239 ** segments as possible to level iPromote. If an OOM occurs, NULL is
1242 static void fts5StructurePromoteTo(
1246 Fts5Structure
*pStruct
1249 Fts5StructureLevel
*pOut
= &pStruct
->aLevel
[iPromote
];
1251 if( pOut
->nMerge
==0 ){
1252 for(il
=iPromote
+1; il
<pStruct
->nLevel
; il
++){
1253 Fts5StructureLevel
*pLvl
= &pStruct
->aLevel
[il
];
1254 if( pLvl
->nMerge
) return;
1255 for(is
=pLvl
->nSeg
-1; is
>=0; is
--){
1256 int sz
= fts5SegmentSize(&pLvl
->aSeg
[is
]);
1257 if( sz
>szPromote
) return;
1258 fts5StructureExtendLevel(&p
->rc
, pStruct
, iPromote
, 1, 1);
1260 memcpy(pOut
->aSeg
, &pLvl
->aSeg
[is
], sizeof(Fts5StructureSegment
));
1269 ** A new segment has just been written to level iLvl of index structure
1270 ** pStruct. This function determines if any segments should be promoted
1271 ** as a result. Segments are promoted in two scenarios:
1273 ** a) If the segment just written is smaller than one or more segments
1274 ** within the previous populated level, it is promoted to the previous
1277 ** b) If the segment just written is larger than the newest segment on
1278 ** the next populated level, then that segment, and any other adjacent
1279 ** segments that are also smaller than the one just written, are
1282 ** If one or more segments are promoted, the structure object is updated
1285 static void fts5StructurePromote(
1286 Fts5Index
*p
, /* FTS5 backend object */
1287 int iLvl
, /* Index level just updated */
1288 Fts5Structure
*pStruct
/* Index structure */
1290 if( p
->rc
==SQLITE_OK
){
1293 int szPromote
= 0; /* Promote anything this size or smaller */
1294 Fts5StructureSegment
*pSeg
; /* Segment just written */
1295 int szSeg
; /* Size of segment just written */
1296 int nSeg
= pStruct
->aLevel
[iLvl
].nSeg
;
1298 if( nSeg
==0 ) return;
1299 pSeg
= &pStruct
->aLevel
[iLvl
].aSeg
[pStruct
->aLevel
[iLvl
].nSeg
-1];
1300 szSeg
= (1 + pSeg
->pgnoLast
- pSeg
->pgnoFirst
);
1302 /* Check for condition (a) */
1303 for(iTst
=iLvl
-1; iTst
>=0 && pStruct
->aLevel
[iTst
].nSeg
==0; iTst
--);
1307 Fts5StructureLevel
*pTst
= &pStruct
->aLevel
[iTst
];
1308 assert( pTst
->nMerge
==0 );
1309 for(i
=0; i
<pTst
->nSeg
; i
++){
1310 int sz
= pTst
->aSeg
[i
].pgnoLast
- pTst
->aSeg
[i
].pgnoFirst
+ 1;
1311 if( sz
>szMax
) szMax
= sz
;
1314 /* Condition (a) is true. Promote the newest segment on level
1315 ** iLvl to level iTst. */
1321 /* If condition (a) is not met, assume (b) is true. StructurePromoteTo()
1322 ** is a no-op if it is not. */
1327 fts5StructurePromoteTo(p
, iPromote
, szPromote
, pStruct
);
1333 ** Advance the iterator passed as the only argument. If the end of the
1334 ** doclist-index page is reached, return non-zero.
1336 static int fts5DlidxLvlNext(Fts5DlidxLvl
*pLvl
){
1337 Fts5Data
*pData
= pLvl
->pData
;
1339 if( pLvl
->iOff
==0 ){
1340 assert( pLvl
->bEof
==0 );
1342 pLvl
->iOff
+= fts5GetVarint32(&pData
->p
[1], pLvl
->iLeafPgno
);
1343 pLvl
->iOff
+= fts5GetVarint(&pData
->p
[pLvl
->iOff
], (u64
*)&pLvl
->iRowid
);
1344 pLvl
->iFirstOff
= pLvl
->iOff
;
1347 for(iOff
=pLvl
->iOff
; iOff
<pData
->nn
; iOff
++){
1348 if( pData
->p
[iOff
] ) break;
1351 if( iOff
<pData
->nn
){
1353 pLvl
->iLeafPgno
+= (iOff
- pLvl
->iOff
) + 1;
1354 iOff
+= fts5GetVarint(&pData
->p
[iOff
], (u64
*)&iVal
);
1355 pLvl
->iRowid
+= iVal
;
1366 ** Advance the iterator passed as the only argument.
1368 static int fts5DlidxIterNextR(Fts5Index
*p
, Fts5DlidxIter
*pIter
, int iLvl
){
1369 Fts5DlidxLvl
*pLvl
= &pIter
->aLvl
[iLvl
];
1371 assert( iLvl
<pIter
->nLvl
);
1372 if( fts5DlidxLvlNext(pLvl
) ){
1373 if( (iLvl
+1) < pIter
->nLvl
){
1374 fts5DlidxIterNextR(p
, pIter
, iLvl
+1);
1375 if( pLvl
[1].bEof
==0 ){
1376 fts5DataRelease(pLvl
->pData
);
1377 memset(pLvl
, 0, sizeof(Fts5DlidxLvl
));
1378 pLvl
->pData
= fts5DataRead(p
,
1379 FTS5_DLIDX_ROWID(pIter
->iSegid
, iLvl
, pLvl
[1].iLeafPgno
)
1381 if( pLvl
->pData
) fts5DlidxLvlNext(pLvl
);
1386 return pIter
->aLvl
[0].bEof
;
1388 static int fts5DlidxIterNext(Fts5Index
*p
, Fts5DlidxIter
*pIter
){
1389 return fts5DlidxIterNextR(p
, pIter
, 0);
1393 ** The iterator passed as the first argument has the following fields set
1394 ** as follows. This function sets up the rest of the iterator so that it
1395 ** points to the first rowid in the doclist-index.
1398 ** pointer to doclist-index record,
1400 ** When this function is called pIter->iLeafPgno is the page number the
1401 ** doclist is associated with (the one featuring the term).
1403 static int fts5DlidxIterFirst(Fts5DlidxIter
*pIter
){
1405 for(i
=0; i
<pIter
->nLvl
; i
++){
1406 fts5DlidxLvlNext(&pIter
->aLvl
[i
]);
1408 return pIter
->aLvl
[0].bEof
;
1412 static int fts5DlidxIterEof(Fts5Index
*p
, Fts5DlidxIter
*pIter
){
1413 return p
->rc
!=SQLITE_OK
|| pIter
->aLvl
[0].bEof
;
1416 static void fts5DlidxIterLast(Fts5Index
*p
, Fts5DlidxIter
*pIter
){
1419 /* Advance each level to the last entry on the last page */
1420 for(i
=pIter
->nLvl
-1; p
->rc
==SQLITE_OK
&& i
>=0; i
--){
1421 Fts5DlidxLvl
*pLvl
= &pIter
->aLvl
[i
];
1422 while( fts5DlidxLvlNext(pLvl
)==0 );
1426 Fts5DlidxLvl
*pChild
= &pLvl
[-1];
1427 fts5DataRelease(pChild
->pData
);
1428 memset(pChild
, 0, sizeof(Fts5DlidxLvl
));
1429 pChild
->pData
= fts5DataRead(p
,
1430 FTS5_DLIDX_ROWID(pIter
->iSegid
, i
-1, pLvl
->iLeafPgno
)
1437 ** Move the iterator passed as the only argument to the previous entry.
1439 static int fts5DlidxLvlPrev(Fts5DlidxLvl
*pLvl
){
1440 int iOff
= pLvl
->iOff
;
1442 assert( pLvl
->bEof
==0 );
1443 if( iOff
<=pLvl
->iFirstOff
){
1446 u8
*a
= pLvl
->pData
->p
;
1449 fts5DlidxLvlNext(pLvl
);
1452 int ii
= pLvl
->iOff
;
1459 ii
+= sqlite3Fts5GetVarint(&a
[ii
], &delta
);
1461 if( ii
>=iOff
) break;
1462 pLvl
->iLeafPgno
+= nZero
+1;
1463 pLvl
->iRowid
+= delta
;
1471 static int fts5DlidxIterPrevR(Fts5Index
*p
, Fts5DlidxIter
*pIter
, int iLvl
){
1472 Fts5DlidxLvl
*pLvl
= &pIter
->aLvl
[iLvl
];
1474 assert( iLvl
<pIter
->nLvl
);
1475 if( fts5DlidxLvlPrev(pLvl
) ){
1476 if( (iLvl
+1) < pIter
->nLvl
){
1477 fts5DlidxIterPrevR(p
, pIter
, iLvl
+1);
1478 if( pLvl
[1].bEof
==0 ){
1479 fts5DataRelease(pLvl
->pData
);
1480 memset(pLvl
, 0, sizeof(Fts5DlidxLvl
));
1481 pLvl
->pData
= fts5DataRead(p
,
1482 FTS5_DLIDX_ROWID(pIter
->iSegid
, iLvl
, pLvl
[1].iLeafPgno
)
1485 while( fts5DlidxLvlNext(pLvl
)==0 );
1492 return pIter
->aLvl
[0].bEof
;
1494 static int fts5DlidxIterPrev(Fts5Index
*p
, Fts5DlidxIter
*pIter
){
1495 return fts5DlidxIterPrevR(p
, pIter
, 0);
1499 ** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
1501 static void fts5DlidxIterFree(Fts5DlidxIter
*pIter
){
1504 for(i
=0; i
<pIter
->nLvl
; i
++){
1505 fts5DataRelease(pIter
->aLvl
[i
].pData
);
1507 sqlite3_free(pIter
);
1511 static Fts5DlidxIter
*fts5DlidxIterInit(
1512 Fts5Index
*p
, /* Fts5 Backend to iterate within */
1513 int bRev
, /* True for ORDER BY ASC */
1514 int iSegid
, /* Segment id */
1515 int iLeafPg
/* Leaf page number to load dlidx for */
1517 Fts5DlidxIter
*pIter
= 0;
1521 for(i
=0; p
->rc
==SQLITE_OK
&& bDone
==0; i
++){
1522 sqlite3_int64 nByte
= sizeof(Fts5DlidxIter
) + i
* sizeof(Fts5DlidxLvl
);
1523 Fts5DlidxIter
*pNew
;
1525 pNew
= (Fts5DlidxIter
*)sqlite3_realloc64(pIter
, nByte
);
1527 p
->rc
= SQLITE_NOMEM
;
1529 i64 iRowid
= FTS5_DLIDX_ROWID(iSegid
, i
, iLeafPg
);
1530 Fts5DlidxLvl
*pLvl
= &pNew
->aLvl
[i
];
1532 memset(pLvl
, 0, sizeof(Fts5DlidxLvl
));
1533 pLvl
->pData
= fts5DataRead(p
, iRowid
);
1534 if( pLvl
->pData
&& (pLvl
->pData
->p
[0] & 0x0001)==0 ){
1541 if( p
->rc
==SQLITE_OK
){
1542 pIter
->iSegid
= iSegid
;
1544 fts5DlidxIterFirst(pIter
);
1546 fts5DlidxIterLast(p
, pIter
);
1550 if( p
->rc
!=SQLITE_OK
){
1551 fts5DlidxIterFree(pIter
);
1558 static i64
fts5DlidxIterRowid(Fts5DlidxIter
*pIter
){
1559 return pIter
->aLvl
[0].iRowid
;
1561 static int fts5DlidxIterPgno(Fts5DlidxIter
*pIter
){
1562 return pIter
->aLvl
[0].iLeafPgno
;
1566 ** Load the next leaf page into the segment iterator.
1568 static void fts5SegIterNextPage(
1569 Fts5Index
*p
, /* FTS5 backend object */
1570 Fts5SegIter
*pIter
/* Iterator to advance to next page */
1573 Fts5StructureSegment
*pSeg
= pIter
->pSeg
;
1574 fts5DataRelease(pIter
->pLeaf
);
1576 if( pIter
->pNextLeaf
){
1577 pIter
->pLeaf
= pIter
->pNextLeaf
;
1578 pIter
->pNextLeaf
= 0;
1579 }else if( pIter
->iLeafPgno
<=pSeg
->pgnoLast
){
1580 pIter
->pLeaf
= fts5LeafRead(p
,
1581 FTS5_SEGMENT_ROWID(pSeg
->iSegid
, pIter
->iLeafPgno
)
1586 pLeaf
= pIter
->pLeaf
;
1589 pIter
->iPgidxOff
= pLeaf
->szLeaf
;
1590 if( fts5LeafIsTermless(pLeaf
) ){
1591 pIter
->iEndofDoclist
= pLeaf
->nn
+1;
1593 pIter
->iPgidxOff
+= fts5GetVarint32(&pLeaf
->p
[pIter
->iPgidxOff
],
1594 pIter
->iEndofDoclist
1601 ** Argument p points to a buffer containing a varint to be interpreted as a
1602 ** position list size field. Read the varint and return the number of bytes
1603 ** read. Before returning, set *pnSz to the number of bytes in the position
1604 ** list, and *pbDel to true if the delete flag is set, or false otherwise.
1606 static int fts5GetPoslistSize(const u8
*p
, int *pnSz
, int *pbDel
){
1609 fts5FastGetVarint32(p
, n
, nSz
);
1610 assert_nc( nSz
>=0 );
1612 *pbDel
= nSz
& 0x0001;
1617 ** Fts5SegIter.iLeafOffset currently points to the first byte of a
1618 ** position-list size field. Read the value of the field and store it
1619 ** in the following variables:
1624 ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the
1625 ** position list content (if any).
1627 static void fts5SegIterLoadNPos(Fts5Index
*p
, Fts5SegIter
*pIter
){
1628 if( p
->rc
==SQLITE_OK
){
1629 int iOff
= pIter
->iLeafOffset
; /* Offset to read at */
1630 ASSERT_SZLEAF_OK(pIter
->pLeaf
);
1631 if( p
->pConfig
->eDetail
==FTS5_DETAIL_NONE
){
1632 int iEod
= MIN(pIter
->iEndofDoclist
, pIter
->pLeaf
->szLeaf
);
1635 if( iOff
<iEod
&& pIter
->pLeaf
->p
[iOff
]==0 ){
1638 if( iOff
<iEod
&& pIter
->pLeaf
->p
[iOff
]==0 ){
1647 fts5FastGetVarint32(pIter
->pLeaf
->p
, iOff
, nSz
);
1648 pIter
->bDel
= (nSz
& 0x0001);
1649 pIter
->nPos
= nSz
>>1;
1650 assert_nc( pIter
->nPos
>=0 );
1652 pIter
->iLeafOffset
= iOff
;
1656 static void fts5SegIterLoadRowid(Fts5Index
*p
, Fts5SegIter
*pIter
){
1657 u8
*a
= pIter
->pLeaf
->p
; /* Buffer to read data from */
1658 i64 iOff
= pIter
->iLeafOffset
;
1660 ASSERT_SZLEAF_OK(pIter
->pLeaf
);
1661 while( iOff
>=pIter
->pLeaf
->szLeaf
){
1662 fts5SegIterNextPage(p
, pIter
);
1663 if( pIter
->pLeaf
==0 ){
1664 if( p
->rc
==SQLITE_OK
) p
->rc
= FTS5_CORRUPT
;
1668 a
= pIter
->pLeaf
->p
;
1670 iOff
+= sqlite3Fts5GetVarint(&a
[iOff
], (u64
*)&pIter
->iRowid
);
1671 pIter
->iLeafOffset
= iOff
;
1675 ** Fts5SegIter.iLeafOffset currently points to the first byte of the
1676 ** "nSuffix" field of a term. Function parameter nKeep contains the value
1677 ** of the "nPrefix" field (if there was one - it is passed 0 if this is
1678 ** the first term in the segment).
1680 ** This function populates:
1683 ** Fts5SegIter.rowid
1685 ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of
1686 ** the first position list. The position list belonging to document
1687 ** (Fts5SegIter.iRowid).
1689 static void fts5SegIterLoadTerm(Fts5Index
*p
, Fts5SegIter
*pIter
, int nKeep
){
1690 u8
*a
= pIter
->pLeaf
->p
; /* Buffer to read data from */
1691 i64 iOff
= pIter
->iLeafOffset
; /* Offset to read at */
1692 int nNew
; /* Bytes of new data */
1694 iOff
+= fts5GetVarint32(&a
[iOff
], nNew
);
1695 if( iOff
+nNew
>pIter
->pLeaf
->szLeaf
|| nKeep
>pIter
->term
.n
|| nNew
==0 ){
1696 p
->rc
= FTS5_CORRUPT
;
1699 pIter
->term
.n
= nKeep
;
1700 fts5BufferAppendBlob(&p
->rc
, &pIter
->term
, nNew
, &a
[iOff
]);
1701 assert( pIter
->term
.n
<=pIter
->term
.nSpace
);
1703 pIter
->iTermLeafOffset
= iOff
;
1704 pIter
->iTermLeafPgno
= pIter
->iLeafPgno
;
1705 pIter
->iLeafOffset
= iOff
;
1707 if( pIter
->iPgidxOff
>=pIter
->pLeaf
->nn
){
1708 pIter
->iEndofDoclist
= pIter
->pLeaf
->nn
+1;
1711 pIter
->iPgidxOff
+= fts5GetVarint32(&a
[pIter
->iPgidxOff
], nExtra
);
1712 pIter
->iEndofDoclist
+= nExtra
;
1715 fts5SegIterLoadRowid(p
, pIter
);
1718 static void fts5SegIterNext(Fts5Index
*, Fts5SegIter
*, int*);
1719 static void fts5SegIterNext_Reverse(Fts5Index
*, Fts5SegIter
*, int*);
1720 static void fts5SegIterNext_None(Fts5Index
*, Fts5SegIter
*, int*);
1722 static void fts5SegIterSetNext(Fts5Index
*p
, Fts5SegIter
*pIter
){
1723 if( pIter
->flags
& FTS5_SEGITER_REVERSE
){
1724 pIter
->xNext
= fts5SegIterNext_Reverse
;
1725 }else if( p
->pConfig
->eDetail
==FTS5_DETAIL_NONE
){
1726 pIter
->xNext
= fts5SegIterNext_None
;
1728 pIter
->xNext
= fts5SegIterNext
;
1733 ** Initialize the iterator object pIter to iterate through the entries in
1734 ** segment pSeg. The iterator is left pointing to the first entry when
1735 ** this function returns.
1737 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
1738 ** an error has already occurred when this function is called, it is a no-op.
1740 static void fts5SegIterInit(
1741 Fts5Index
*p
, /* FTS index object */
1742 Fts5StructureSegment
*pSeg
, /* Description of segment */
1743 Fts5SegIter
*pIter
/* Object to populate */
1745 if( pSeg
->pgnoFirst
==0 ){
1746 /* This happens if the segment is being used as an input to an incremental
1747 ** merge and all data has already been "trimmed". See function
1748 ** fts5TrimSegments() for details. In this case leave the iterator empty.
1749 ** The caller will see the (pIter->pLeaf==0) and assume the iterator is
1750 ** at EOF already. */
1751 assert( pIter
->pLeaf
==0 );
1755 if( p
->rc
==SQLITE_OK
){
1756 memset(pIter
, 0, sizeof(*pIter
));
1757 fts5SegIterSetNext(p
, pIter
);
1759 pIter
->iLeafPgno
= pSeg
->pgnoFirst
-1;
1761 fts5SegIterNextPage(p
, pIter
);
1762 }while( p
->rc
==SQLITE_OK
&& pIter
->pLeaf
&& pIter
->pLeaf
->nn
==4 );
1765 if( p
->rc
==SQLITE_OK
&& pIter
->pLeaf
){
1766 pIter
->iLeafOffset
= 4;
1767 assert( pIter
->pLeaf
!=0 );
1768 assert_nc( pIter
->pLeaf
->nn
>4 );
1769 assert_nc( fts5LeafFirstTermOff(pIter
->pLeaf
)==4 );
1770 pIter
->iPgidxOff
= pIter
->pLeaf
->szLeaf
+1;
1771 fts5SegIterLoadTerm(p
, pIter
, 0);
1772 fts5SegIterLoadNPos(p
, pIter
);
1777 ** This function is only ever called on iterators created by calls to
1778 ** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set.
1780 ** The iterator is in an unusual state when this function is called: the
1781 ** Fts5SegIter.iLeafOffset variable is set to the offset of the start of
1782 ** the position-list size field for the first relevant rowid on the page.
1783 ** Fts5SegIter.rowid is set, but nPos and bDel are not.
1785 ** This function advances the iterator so that it points to the last
1786 ** relevant rowid on the page and, if necessary, initializes the
1787 ** aRowidOffset[] and iRowidOffset variables. At this point the iterator
1788 ** is in its regular state - Fts5SegIter.iLeafOffset points to the first
1789 ** byte of the position list content associated with said rowid.
1791 static void fts5SegIterReverseInitPage(Fts5Index
*p
, Fts5SegIter
*pIter
){
1792 int eDetail
= p
->pConfig
->eDetail
;
1793 int n
= pIter
->pLeaf
->szLeaf
;
1794 int i
= pIter
->iLeafOffset
;
1795 u8
*a
= pIter
->pLeaf
->p
;
1796 int iRowidOffset
= 0;
1798 if( n
>pIter
->iEndofDoclist
){
1799 n
= pIter
->iEndofDoclist
;
1802 ASSERT_SZLEAF_OK(pIter
->pLeaf
);
1806 if( eDetail
==FTS5_DETAIL_NONE
){
1808 if( i
<n
&& a
[i
]==0 ){
1810 if( i
<n
&& a
[i
]==0 ) i
++;
1815 i
+= fts5GetPoslistSize(&a
[i
], &nPos
, &bDummy
);
1819 i
+= fts5GetVarint(&a
[i
], &iDelta
);
1820 pIter
->iRowid
+= iDelta
;
1822 /* If necessary, grow the pIter->aRowidOffset[] array. */
1823 if( iRowidOffset
>=pIter
->nRowidOffset
){
1824 int nNew
= pIter
->nRowidOffset
+ 8;
1825 int *aNew
= (int*)sqlite3_realloc64(pIter
->aRowidOffset
,nNew
*sizeof(int));
1827 p
->rc
= SQLITE_NOMEM
;
1830 pIter
->aRowidOffset
= aNew
;
1831 pIter
->nRowidOffset
= nNew
;
1834 pIter
->aRowidOffset
[iRowidOffset
++] = pIter
->iLeafOffset
;
1835 pIter
->iLeafOffset
= i
;
1837 pIter
->iRowidOffset
= iRowidOffset
;
1838 fts5SegIterLoadNPos(p
, pIter
);
1844 static void fts5SegIterReverseNewPage(Fts5Index
*p
, Fts5SegIter
*pIter
){
1845 assert( pIter
->flags
& FTS5_SEGITER_REVERSE
);
1846 assert( pIter
->flags
& FTS5_SEGITER_ONETERM
);
1848 fts5DataRelease(pIter
->pLeaf
);
1850 while( p
->rc
==SQLITE_OK
&& pIter
->iLeafPgno
>pIter
->iTermLeafPgno
){
1853 pNew
= fts5DataRead(p
, FTS5_SEGMENT_ROWID(
1854 pIter
->pSeg
->iSegid
, pIter
->iLeafPgno
1857 /* iTermLeafOffset may be equal to szLeaf if the term is the last
1858 ** thing on the page - i.e. the first rowid is on the following page.
1859 ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */
1860 if( pIter
->iLeafPgno
==pIter
->iTermLeafPgno
){
1861 assert( pIter
->pLeaf
==0 );
1862 if( pIter
->iTermLeafOffset
<pNew
->szLeaf
){
1863 pIter
->pLeaf
= pNew
;
1864 pIter
->iLeafOffset
= pIter
->iTermLeafOffset
;
1868 iRowidOff
= fts5LeafFirstRowidOff(pNew
);
1870 if( iRowidOff
>=pNew
->szLeaf
){
1871 p
->rc
= FTS5_CORRUPT
;
1873 pIter
->pLeaf
= pNew
;
1874 pIter
->iLeafOffset
= iRowidOff
;
1880 u8
*a
= &pIter
->pLeaf
->p
[pIter
->iLeafOffset
];
1881 pIter
->iLeafOffset
+= fts5GetVarint(a
, (u64
*)&pIter
->iRowid
);
1884 fts5DataRelease(pNew
);
1890 pIter
->iEndofDoclist
= pIter
->pLeaf
->nn
+1;
1891 fts5SegIterReverseInitPage(p
, pIter
);
1896 ** Return true if the iterator passed as the second argument currently
1897 ** points to a delete marker. A delete marker is an entry with a 0 byte
1900 static int fts5MultiIterIsEmpty(Fts5Index
*p
, Fts5Iter
*pIter
){
1901 Fts5SegIter
*pSeg
= &pIter
->aSeg
[pIter
->aFirst
[1].iFirst
];
1902 return (p
->rc
==SQLITE_OK
&& pSeg
->pLeaf
&& pSeg
->nPos
==0);
1906 ** Advance iterator pIter to the next entry.
1908 ** This version of fts5SegIterNext() is only used by reverse iterators.
1910 static void fts5SegIterNext_Reverse(
1911 Fts5Index
*p
, /* FTS5 backend object */
1912 Fts5SegIter
*pIter
, /* Iterator to advance */
1913 int *pbUnused
/* Unused */
1915 assert( pIter
->flags
& FTS5_SEGITER_REVERSE
);
1916 assert( pIter
->pNextLeaf
==0 );
1917 UNUSED_PARAM(pbUnused
);
1919 if( pIter
->iRowidOffset
>0 ){
1920 u8
*a
= pIter
->pLeaf
->p
;
1924 pIter
->iRowidOffset
--;
1925 pIter
->iLeafOffset
= pIter
->aRowidOffset
[pIter
->iRowidOffset
];
1926 fts5SegIterLoadNPos(p
, pIter
);
1927 iOff
= pIter
->iLeafOffset
;
1928 if( p
->pConfig
->eDetail
!=FTS5_DETAIL_NONE
){
1929 iOff
+= pIter
->nPos
;
1931 fts5GetVarint(&a
[iOff
], &iDelta
);
1932 pIter
->iRowid
-= iDelta
;
1934 fts5SegIterReverseNewPage(p
, pIter
);
1939 ** Advance iterator pIter to the next entry.
1941 ** This version of fts5SegIterNext() is only used if detail=none and the
1942 ** iterator is not a reverse direction iterator.
1944 static void fts5SegIterNext_None(
1945 Fts5Index
*p
, /* FTS5 backend object */
1946 Fts5SegIter
*pIter
, /* Iterator to advance */
1947 int *pbNewTerm
/* OUT: Set for new term */
1951 assert( p
->rc
==SQLITE_OK
);
1952 assert( (pIter
->flags
& FTS5_SEGITER_REVERSE
)==0 );
1953 assert( p
->pConfig
->eDetail
==FTS5_DETAIL_NONE
);
1955 ASSERT_SZLEAF_OK(pIter
->pLeaf
);
1956 iOff
= pIter
->iLeafOffset
;
1958 /* Next entry is on the next page */
1959 while( pIter
->pSeg
&& iOff
>=pIter
->pLeaf
->szLeaf
){
1960 fts5SegIterNextPage(p
, pIter
);
1961 if( p
->rc
|| pIter
->pLeaf
==0 ) return;
1966 if( iOff
<pIter
->iEndofDoclist
){
1967 /* Next entry is on the current page */
1969 iOff
+= sqlite3Fts5GetVarint(&pIter
->pLeaf
->p
[iOff
], (u64
*)&iDelta
);
1970 pIter
->iLeafOffset
= iOff
;
1971 pIter
->iRowid
+= iDelta
;
1972 }else if( (pIter
->flags
& FTS5_SEGITER_ONETERM
)==0 ){
1975 if( iOff
!=fts5LeafFirstTermOff(pIter
->pLeaf
) ){
1976 iOff
+= fts5GetVarint32(&pIter
->pLeaf
->p
[iOff
], nKeep
);
1978 pIter
->iLeafOffset
= iOff
;
1979 fts5SegIterLoadTerm(p
, pIter
, nKeep
);
1981 const u8
*pList
= 0;
1982 const char *zTerm
= 0;
1984 sqlite3Fts5HashScanNext(p
->pHash
);
1985 sqlite3Fts5HashScanEntry(p
->pHash
, &zTerm
, &pList
, &nList
);
1986 if( pList
==0 ) goto next_none_eof
;
1987 pIter
->pLeaf
->p
= (u8
*)pList
;
1988 pIter
->pLeaf
->nn
= nList
;
1989 pIter
->pLeaf
->szLeaf
= nList
;
1990 pIter
->iEndofDoclist
= nList
;
1991 sqlite3Fts5BufferSet(&p
->rc
,&pIter
->term
, (int)strlen(zTerm
), (u8
*)zTerm
);
1992 pIter
->iLeafOffset
= fts5GetVarint(pList
, (u64
*)&pIter
->iRowid
);
1995 if( pbNewTerm
) *pbNewTerm
= 1;
2000 fts5SegIterLoadNPos(p
, pIter
);
2004 fts5DataRelease(pIter
->pLeaf
);
2010 ** Advance iterator pIter to the next entry.
2012 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It
2013 ** is not considered an error if the iterator reaches EOF. If an error has
2014 ** already occurred when this function is called, it is a no-op.
2016 static void fts5SegIterNext(
2017 Fts5Index
*p
, /* FTS5 backend object */
2018 Fts5SegIter
*pIter
, /* Iterator to advance */
2019 int *pbNewTerm
/* OUT: Set for new term */
2021 Fts5Data
*pLeaf
= pIter
->pLeaf
;
2028 assert( pbNewTerm
==0 || *pbNewTerm
==0 );
2029 assert( p
->pConfig
->eDetail
!=FTS5_DETAIL_NONE
);
2031 /* Search for the end of the position list within the current page. */
2035 ASSERT_SZLEAF_OK(pLeaf
);
2036 iOff
= pIter
->iLeafOffset
+ pIter
->nPos
;
2039 /* The next entry is on the current page. */
2040 assert_nc( iOff
<=pIter
->iEndofDoclist
);
2041 if( iOff
>=pIter
->iEndofDoclist
){
2043 if( iOff
!=fts5LeafFirstTermOff(pLeaf
) ){
2044 iOff
+= fts5GetVarint32(&a
[iOff
], nKeep
);
2048 iOff
+= sqlite3Fts5GetVarint(&a
[iOff
], &iDelta
);
2049 pIter
->iRowid
+= iDelta
;
2050 assert_nc( iDelta
>0 );
2052 pIter
->iLeafOffset
= iOff
;
2054 }else if( pIter
->pSeg
==0 ){
2055 const u8
*pList
= 0;
2056 const char *zTerm
= 0;
2058 assert( (pIter
->flags
& FTS5_SEGITER_ONETERM
) || pbNewTerm
);
2059 if( 0==(pIter
->flags
& FTS5_SEGITER_ONETERM
) ){
2060 sqlite3Fts5HashScanNext(p
->pHash
);
2061 sqlite3Fts5HashScanEntry(p
->pHash
, &zTerm
, &pList
, &nList
);
2064 fts5DataRelease(pIter
->pLeaf
);
2067 pIter
->pLeaf
->p
= (u8
*)pList
;
2068 pIter
->pLeaf
->nn
= nList
;
2069 pIter
->pLeaf
->szLeaf
= nList
;
2070 pIter
->iEndofDoclist
= nList
+1;
2071 sqlite3Fts5BufferSet(&p
->rc
, &pIter
->term
, (int)strlen(zTerm
),
2073 pIter
->iLeafOffset
= fts5GetVarint(pList
, (u64
*)&pIter
->iRowid
);
2078 /* Next entry is not on the current page */
2080 fts5SegIterNextPage(p
, pIter
);
2081 pLeaf
= pIter
->pLeaf
;
2082 if( pLeaf
==0 ) break;
2083 ASSERT_SZLEAF_OK(pLeaf
);
2084 if( (iOff
= fts5LeafFirstRowidOff(pLeaf
)) && iOff
<pLeaf
->szLeaf
){
2085 iOff
+= sqlite3Fts5GetVarint(&pLeaf
->p
[iOff
], (u64
*)&pIter
->iRowid
);
2086 pIter
->iLeafOffset
= iOff
;
2088 if( pLeaf
->nn
>pLeaf
->szLeaf
){
2089 pIter
->iPgidxOff
= pLeaf
->szLeaf
+ fts5GetVarint32(
2090 &pLeaf
->p
[pLeaf
->szLeaf
], pIter
->iEndofDoclist
2094 else if( pLeaf
->nn
>pLeaf
->szLeaf
){
2095 pIter
->iPgidxOff
= pLeaf
->szLeaf
+ fts5GetVarint32(
2096 &pLeaf
->p
[pLeaf
->szLeaf
], iOff
2098 pIter
->iLeafOffset
= iOff
;
2099 pIter
->iEndofDoclist
= iOff
;
2102 assert_nc( iOff
<pLeaf
->szLeaf
);
2103 if( iOff
>pLeaf
->szLeaf
){
2104 p
->rc
= FTS5_CORRUPT
;
2110 /* Check if the iterator is now at EOF. If so, return early. */
2113 if( pIter
->flags
& FTS5_SEGITER_ONETERM
){
2114 fts5DataRelease(pIter
->pLeaf
);
2117 fts5SegIterLoadTerm(p
, pIter
, nKeep
);
2118 fts5SegIterLoadNPos(p
, pIter
);
2119 if( pbNewTerm
) *pbNewTerm
= 1;
2122 /* The following could be done by calling fts5SegIterLoadNPos(). But
2123 ** this block is particularly performance critical, so equivalent
2124 ** code is inlined. */
2126 assert_nc( pIter
->iLeafOffset
<=pIter
->pLeaf
->nn
);
2127 fts5FastGetVarint32(pIter
->pLeaf
->p
, pIter
->iLeafOffset
, nSz
);
2128 pIter
->bDel
= (nSz
& 0x0001);
2129 pIter
->nPos
= nSz
>>1;
2130 assert_nc( pIter
->nPos
>=0 );
2135 #define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; }
2137 #define fts5IndexSkipVarint(a, iOff) { \
2138 int iEnd = iOff+9; \
2139 while( (a[iOff++] & 0x80) && iOff<iEnd ); \
2143 ** Iterator pIter currently points to the first rowid in a doclist. This
2144 ** function sets the iterator up so that iterates in reverse order through
2147 static void fts5SegIterReverse(Fts5Index
*p
, Fts5SegIter
*pIter
){
2148 Fts5DlidxIter
*pDlidx
= pIter
->pDlidx
;
2149 Fts5Data
*pLast
= 0;
2152 if( pDlidx
&& p
->pConfig
->iVersion
==FTS5_CURRENT_VERSION
){
2153 int iSegid
= pIter
->pSeg
->iSegid
;
2154 pgnoLast
= fts5DlidxIterPgno(pDlidx
);
2155 pLast
= fts5LeafRead(p
, FTS5_SEGMENT_ROWID(iSegid
, pgnoLast
));
2157 Fts5Data
*pLeaf
= pIter
->pLeaf
; /* Current leaf data */
2159 /* Currently, Fts5SegIter.iLeafOffset points to the first byte of
2160 ** position-list content for the current rowid. Back it up so that it
2161 ** points to the start of the position-list size field. */
2163 if( pIter
->iTermLeafPgno
==pIter
->iLeafPgno
){
2164 iPoslist
= pIter
->iTermLeafOffset
;
2168 fts5IndexSkipVarint(pLeaf
->p
, iPoslist
);
2169 pIter
->iLeafOffset
= iPoslist
;
2171 /* If this condition is true then the largest rowid for the current
2172 ** term may not be stored on the current page. So search forward to
2173 ** see where said rowid really is. */
2174 if( pIter
->iEndofDoclist
>=pLeaf
->szLeaf
){
2176 Fts5StructureSegment
*pSeg
= pIter
->pSeg
;
2178 /* The last rowid in the doclist may not be on the current page. Search
2179 ** forward to find the page containing the last rowid. */
2180 for(pgno
=pIter
->iLeafPgno
+1; !p
->rc
&& pgno
<=pSeg
->pgnoLast
; pgno
++){
2181 i64 iAbs
= FTS5_SEGMENT_ROWID(pSeg
->iSegid
, pgno
);
2182 Fts5Data
*pNew
= fts5LeafRead(p
, iAbs
);
2184 int iRowid
, bTermless
;
2185 iRowid
= fts5LeafFirstRowidOff(pNew
);
2186 bTermless
= fts5LeafIsTermless(pNew
);
2188 SWAPVAL(Fts5Data
*, pNew
, pLast
);
2191 fts5DataRelease(pNew
);
2192 if( bTermless
==0 ) break;
2198 /* If pLast is NULL at this point, then the last rowid for this doclist
2199 ** lies on the page currently indicated by the iterator. In this case
2200 ** pIter->iLeafOffset is already set to point to the position-list size
2201 ** field associated with the first relevant rowid on the page.
2203 ** Or, if pLast is non-NULL, then it is the page that contains the last
2204 ** rowid. In this case configure the iterator so that it points to the
2205 ** first rowid on this page.
2209 fts5DataRelease(pIter
->pLeaf
);
2210 pIter
->pLeaf
= pLast
;
2211 pIter
->iLeafPgno
= pgnoLast
;
2212 iOff
= fts5LeafFirstRowidOff(pLast
);
2213 if( iOff
>pLast
->szLeaf
){
2214 p
->rc
= FTS5_CORRUPT
;
2217 iOff
+= fts5GetVarint(&pLast
->p
[iOff
], (u64
*)&pIter
->iRowid
);
2218 pIter
->iLeafOffset
= iOff
;
2220 if( fts5LeafIsTermless(pLast
) ){
2221 pIter
->iEndofDoclist
= pLast
->nn
+1;
2223 pIter
->iEndofDoclist
= fts5LeafFirstTermOff(pLast
);
2227 fts5SegIterReverseInitPage(p
, pIter
);
2231 ** Iterator pIter currently points to the first rowid of a doclist.
2232 ** There is a doclist-index associated with the final term on the current
2233 ** page. If the current term is the last term on the page, load the
2234 ** doclist-index from disk and initialize an iterator at (pIter->pDlidx).
2236 static void fts5SegIterLoadDlidx(Fts5Index
*p
, Fts5SegIter
*pIter
){
2237 int iSeg
= pIter
->pSeg
->iSegid
;
2238 int bRev
= (pIter
->flags
& FTS5_SEGITER_REVERSE
);
2239 Fts5Data
*pLeaf
= pIter
->pLeaf
; /* Current leaf data */
2241 assert( pIter
->flags
& FTS5_SEGITER_ONETERM
);
2242 assert( pIter
->pDlidx
==0 );
2244 /* Check if the current doclist ends on this page. If it does, return
2245 ** early without loading the doclist-index (as it belongs to a different
2247 if( pIter
->iTermLeafPgno
==pIter
->iLeafPgno
2248 && pIter
->iEndofDoclist
<pLeaf
->szLeaf
2253 pIter
->pDlidx
= fts5DlidxIterInit(p
, bRev
, iSeg
, pIter
->iTermLeafPgno
);
2257 ** The iterator object passed as the second argument currently contains
2258 ** no valid values except for the Fts5SegIter.pLeaf member variable. This
2259 ** function searches the leaf page for a term matching (pTerm/nTerm).
2261 ** If the specified term is found on the page, then the iterator is left
2262 ** pointing to it. If argument bGe is zero and the term is not found,
2263 ** the iterator is left pointing at EOF.
2265 ** If bGe is non-zero and the specified term is not found, then the
2266 ** iterator is left pointing to the smallest term in the segment that
2267 ** is larger than the specified term, even if this term is not on the
2270 static void fts5LeafSeek(
2271 Fts5Index
*p
, /* Leave any error code here */
2272 int bGe
, /* True for a >= search */
2273 Fts5SegIter
*pIter
, /* Iterator to seek */
2274 const u8
*pTerm
, int nTerm
/* Term to search for */
2277 const u8
*a
= pIter
->pLeaf
->p
;
2278 u32 n
= (u32
)pIter
->pLeaf
->nn
;
2284 u32 iPgidx
; /* Current offset in pgidx */
2287 assert( p
->rc
==SQLITE_OK
);
2289 iPgidx
= (u32
)pIter
->pLeaf
->szLeaf
;
2290 iPgidx
+= fts5GetVarint32(&a
[iPgidx
], iTermOff
);
2293 p
->rc
= FTS5_CORRUPT
;
2299 /* Figure out how many new bytes are in this term */
2300 fts5FastGetVarint32(a
, iOff
, nNew
);
2305 assert( nKeep
>=nMatch
);
2306 if( nKeep
==nMatch
){
2309 nCmp
= (u32
)MIN(nNew
, nTerm
-nMatch
);
2310 for(i
=0; i
<nCmp
; i
++){
2311 if( a
[iOff
+i
]!=pTerm
[nMatch
+i
] ) break;
2315 if( (u32
)nTerm
==nMatch
){
2317 goto search_success
;
2321 }else if( i
<nNew
&& a
[iOff
+i
]>pTerm
[nMatch
] ){
2331 iPgidx
+= fts5GetVarint32(&a
[iPgidx
], nKeep
);
2336 p
->rc
= FTS5_CORRUPT
;
2340 /* Read the nKeep field of the next term. */
2341 fts5FastGetVarint32(a
, iOff
, nKeep
);
2346 fts5DataRelease(pIter
->pLeaf
);
2349 }else if( bEndOfPage
){
2351 fts5SegIterNextPage(p
, pIter
);
2352 if( pIter
->pLeaf
==0 ) return;
2353 a
= pIter
->pLeaf
->p
;
2354 if( fts5LeafIsTermless(pIter
->pLeaf
)==0 ){
2355 iPgidx
= (u32
)pIter
->pLeaf
->szLeaf
;
2356 iPgidx
+= fts5GetVarint32(&pIter
->pLeaf
->p
[iPgidx
], iOff
);
2357 if( iOff
<4 || (i64
)iOff
>=pIter
->pLeaf
->szLeaf
){
2358 p
->rc
= FTS5_CORRUPT
;
2363 n
= (u32
)pIter
->pLeaf
->nn
;
2364 iOff
+= fts5GetVarint32(&a
[iOff
], nNew
);
2372 if( (i64
)iOff
+nNew
>n
|| nNew
<1 ){
2373 p
->rc
= FTS5_CORRUPT
;
2376 pIter
->iLeafOffset
= iOff
+ nNew
;
2377 pIter
->iTermLeafOffset
= pIter
->iLeafOffset
;
2378 pIter
->iTermLeafPgno
= pIter
->iLeafPgno
;
2380 fts5BufferSet(&p
->rc
, &pIter
->term
, nKeep
, pTerm
);
2381 fts5BufferAppendBlob(&p
->rc
, &pIter
->term
, nNew
, &a
[iOff
]);
2384 pIter
->iEndofDoclist
= pIter
->pLeaf
->nn
+1;
2387 iPgidx
+= fts5GetVarint32(&a
[iPgidx
], nExtra
);
2388 pIter
->iEndofDoclist
= iTermOff
+ nExtra
;
2390 pIter
->iPgidxOff
= iPgidx
;
2392 fts5SegIterLoadRowid(p
, pIter
);
2393 fts5SegIterLoadNPos(p
, pIter
);
2396 static sqlite3_stmt
*fts5IdxSelectStmt(Fts5Index
*p
){
2397 if( p
->pIdxSelect
==0 ){
2398 Fts5Config
*pConfig
= p
->pConfig
;
2399 fts5IndexPrepareStmt(p
, &p
->pIdxSelect
, sqlite3_mprintf(
2400 "SELECT pgno FROM '%q'.'%q_idx' WHERE "
2401 "segid=? AND term<=? ORDER BY term DESC LIMIT 1",
2402 pConfig
->zDb
, pConfig
->zName
2405 return p
->pIdxSelect
;
2409 ** Initialize the object pIter to point to term pTerm/nTerm within segment
2410 ** pSeg. If there is no such term in the index, the iterator is set to EOF.
2412 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
2413 ** an error has already occurred when this function is called, it is a no-op.
2415 static void fts5SegIterSeekInit(
2416 Fts5Index
*p
, /* FTS5 backend */
2417 const u8
*pTerm
, int nTerm
, /* Term to seek to */
2418 int flags
, /* Mask of FTS5INDEX_XXX flags */
2419 Fts5StructureSegment
*pSeg
, /* Description of segment */
2420 Fts5SegIter
*pIter
/* Object to populate */
2423 int bGe
= (flags
& FTS5INDEX_QUERY_SCAN
);
2424 int bDlidx
= 0; /* True if there is a doclist-index */
2425 sqlite3_stmt
*pIdxSelect
= 0;
2427 assert( bGe
==0 || (flags
& FTS5INDEX_QUERY_DESC
)==0 );
2428 assert( pTerm
&& nTerm
);
2429 memset(pIter
, 0, sizeof(*pIter
));
2432 /* This block sets stack variable iPg to the leaf page number that may
2433 ** contain term (pTerm/nTerm), if it is present in the segment. */
2434 pIdxSelect
= fts5IdxSelectStmt(p
);
2436 sqlite3_bind_int(pIdxSelect
, 1, pSeg
->iSegid
);
2437 sqlite3_bind_blob(pIdxSelect
, 2, pTerm
, nTerm
, SQLITE_STATIC
);
2438 if( SQLITE_ROW
==sqlite3_step(pIdxSelect
) ){
2439 i64 val
= sqlite3_column_int(pIdxSelect
, 0);
2440 iPg
= (int)(val
>>1);
2441 bDlidx
= (val
& 0x0001);
2443 p
->rc
= sqlite3_reset(pIdxSelect
);
2444 sqlite3_bind_null(pIdxSelect
, 2);
2446 if( iPg
<pSeg
->pgnoFirst
){
2447 iPg
= pSeg
->pgnoFirst
;
2451 pIter
->iLeafPgno
= iPg
- 1;
2452 fts5SegIterNextPage(p
, pIter
);
2455 fts5LeafSeek(p
, bGe
, pIter
, pTerm
, nTerm
);
2458 if( p
->rc
==SQLITE_OK
&& bGe
==0 ){
2459 pIter
->flags
|= FTS5_SEGITER_ONETERM
;
2461 if( flags
& FTS5INDEX_QUERY_DESC
){
2462 pIter
->flags
|= FTS5_SEGITER_REVERSE
;
2465 fts5SegIterLoadDlidx(p
, pIter
);
2467 if( flags
& FTS5INDEX_QUERY_DESC
){
2468 fts5SegIterReverse(p
, pIter
);
2473 fts5SegIterSetNext(p
, pIter
);
2477 ** 1) an error has occurred, or
2478 ** 2) the iterator points to EOF, or
2479 ** 3) the iterator points to an entry with term (pTerm/nTerm), or
2480 ** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points
2481 ** to an entry with a term greater than or equal to (pTerm/nTerm).
2483 assert_nc( p
->rc
!=SQLITE_OK
/* 1 */
2484 || pIter
->pLeaf
==0 /* 2 */
2485 || fts5BufferCompareBlob(&pIter
->term
, pTerm
, nTerm
)==0 /* 3 */
2486 || (bGe
&& fts5BufferCompareBlob(&pIter
->term
, pTerm
, nTerm
)>0) /* 4 */
2491 ** Initialize the object pIter to point to term pTerm/nTerm within the
2492 ** in-memory hash table. If there is no such term in the hash-table, the
2493 ** iterator is set to EOF.
2495 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
2496 ** an error has already occurred when this function is called, it is a no-op.
2498 static void fts5SegIterHashInit(
2499 Fts5Index
*p
, /* FTS5 backend */
2500 const u8
*pTerm
, int nTerm
, /* Term to seek to */
2501 int flags
, /* Mask of FTS5INDEX_XXX flags */
2502 Fts5SegIter
*pIter
/* Object to populate */
2507 Fts5Data
*pLeaf
= 0;
2510 assert( p
->rc
==SQLITE_OK
);
2512 if( pTerm
==0 || (flags
& FTS5INDEX_QUERY_SCAN
) ){
2513 const u8
*pList
= 0;
2515 p
->rc
= sqlite3Fts5HashScanInit(p
->pHash
, (const char*)pTerm
, nTerm
);
2516 sqlite3Fts5HashScanEntry(p
->pHash
, (const char**)&z
, &pList
, &nList
);
2517 n
= (z
? (int)strlen((const char*)z
) : 0);
2519 pLeaf
= fts5IdxMalloc(p
, sizeof(Fts5Data
));
2521 pLeaf
->p
= (u8
*)pList
;
2525 p
->rc
= sqlite3Fts5HashQuery(p
->pHash
, sizeof(Fts5Data
),
2526 (const char*)pTerm
, nTerm
, (void**)&pLeaf
, &nList
2529 pLeaf
->p
= (u8
*)&pLeaf
[1];
2533 pIter
->flags
|= FTS5_SEGITER_ONETERM
;
2537 sqlite3Fts5BufferSet(&p
->rc
, &pIter
->term
, n
, z
);
2538 pLeaf
->nn
= pLeaf
->szLeaf
= nList
;
2539 pIter
->pLeaf
= pLeaf
;
2540 pIter
->iLeafOffset
= fts5GetVarint(pLeaf
->p
, (u64
*)&pIter
->iRowid
);
2541 pIter
->iEndofDoclist
= pLeaf
->nn
;
2543 if( flags
& FTS5INDEX_QUERY_DESC
){
2544 pIter
->flags
|= FTS5_SEGITER_REVERSE
;
2545 fts5SegIterReverseInitPage(p
, pIter
);
2547 fts5SegIterLoadNPos(p
, pIter
);
2551 fts5SegIterSetNext(p
, pIter
);
2555 ** Zero the iterator passed as the only argument.
2557 static void fts5SegIterClear(Fts5SegIter
*pIter
){
2558 fts5BufferFree(&pIter
->term
);
2559 fts5DataRelease(pIter
->pLeaf
);
2560 fts5DataRelease(pIter
->pNextLeaf
);
2561 fts5DlidxIterFree(pIter
->pDlidx
);
2562 sqlite3_free(pIter
->aRowidOffset
);
2563 memset(pIter
, 0, sizeof(Fts5SegIter
));
2569 ** This function is used as part of the big assert() procedure implemented by
2570 ** fts5AssertMultiIterSetup(). It ensures that the result currently stored
2571 ** in *pRes is the correct result of comparing the current positions of the
2574 static void fts5AssertComparisonResult(
2580 int i1
= p1
- pIter
->aSeg
;
2581 int i2
= p2
- pIter
->aSeg
;
2583 if( p1
->pLeaf
|| p2
->pLeaf
){
2585 assert( pRes
->iFirst
==i2
);
2586 }else if( p2
->pLeaf
==0 ){
2587 assert( pRes
->iFirst
==i1
);
2589 int nMin
= MIN(p1
->term
.n
, p2
->term
.n
);
2590 int res
= fts5Memcmp(p1
->term
.p
, p2
->term
.p
, nMin
);
2591 if( res
==0 ) res
= p1
->term
.n
- p2
->term
.n
;
2594 assert( pRes
->bTermEq
==1 );
2595 assert( p1
->iRowid
!=p2
->iRowid
);
2596 res
= ((p1
->iRowid
> p2
->iRowid
)==pIter
->bRev
) ? -1 : 1;
2598 assert( pRes
->bTermEq
==0 );
2602 assert( pRes
->iFirst
==i1
);
2604 assert( pRes
->iFirst
==i2
);
2611 ** This function is a no-op unless SQLITE_DEBUG is defined when this module
2612 ** is compiled. In that case, this function is essentially an assert()
2613 ** statement used to verify that the contents of the pIter->aFirst[] array
2616 static void fts5AssertMultiIterSetup(Fts5Index
*p
, Fts5Iter
*pIter
){
2617 if( p
->rc
==SQLITE_OK
){
2618 Fts5SegIter
*pFirst
= &pIter
->aSeg
[ pIter
->aFirst
[1].iFirst
];
2621 assert( (pFirst
->pLeaf
==0)==pIter
->base
.bEof
);
2623 /* Check that pIter->iSwitchRowid is set correctly. */
2624 for(i
=0; i
<pIter
->nSeg
; i
++){
2625 Fts5SegIter
*p1
= &pIter
->aSeg
[i
];
2628 || fts5BufferCompare(&pFirst
->term
, &p1
->term
)
2629 || p1
->iRowid
==pIter
->iSwitchRowid
2630 || (p1
->iRowid
<pIter
->iSwitchRowid
)==pIter
->bRev
2634 for(i
=0; i
<pIter
->nSeg
; i
+=2){
2635 Fts5SegIter
*p1
= &pIter
->aSeg
[i
];
2636 Fts5SegIter
*p2
= &pIter
->aSeg
[i
+1];
2637 Fts5CResult
*pRes
= &pIter
->aFirst
[(pIter
->nSeg
+ i
) / 2];
2638 fts5AssertComparisonResult(pIter
, p1
, p2
, pRes
);
2641 for(i
=1; i
<(pIter
->nSeg
/ 2); i
+=2){
2642 Fts5SegIter
*p1
= &pIter
->aSeg
[ pIter
->aFirst
[i
*2].iFirst
];
2643 Fts5SegIter
*p2
= &pIter
->aSeg
[ pIter
->aFirst
[i
*2+1].iFirst
];
2644 Fts5CResult
*pRes
= &pIter
->aFirst
[i
];
2645 fts5AssertComparisonResult(pIter
, p1
, p2
, pRes
);
2650 # define fts5AssertMultiIterSetup(x,y)
2654 ** Do the comparison necessary to populate pIter->aFirst[iOut].
2656 ** If the returned value is non-zero, then it is the index of an entry
2657 ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing
2658 ** to a key that is a duplicate of another, higher priority,
2659 ** segment-iterator in the pSeg->aSeg[] array.
2661 static int fts5MultiIterDoCompare(Fts5Iter
*pIter
, int iOut
){
2662 int i1
; /* Index of left-hand Fts5SegIter */
2663 int i2
; /* Index of right-hand Fts5SegIter */
2665 Fts5SegIter
*p1
; /* Left-hand Fts5SegIter */
2666 Fts5SegIter
*p2
; /* Right-hand Fts5SegIter */
2667 Fts5CResult
*pRes
= &pIter
->aFirst
[iOut
];
2669 assert( iOut
<pIter
->nSeg
&& iOut
>0 );
2670 assert( pIter
->bRev
==0 || pIter
->bRev
==1 );
2672 if( iOut
>=(pIter
->nSeg
/2) ){
2673 i1
= (iOut
- pIter
->nSeg
/2) * 2;
2676 i1
= pIter
->aFirst
[iOut
*2].iFirst
;
2677 i2
= pIter
->aFirst
[iOut
*2+1].iFirst
;
2679 p1
= &pIter
->aSeg
[i1
];
2680 p2
= &pIter
->aSeg
[i2
];
2683 if( p1
->pLeaf
==0 ){ /* If p1 is at EOF */
2685 }else if( p2
->pLeaf
==0 ){ /* If p2 is at EOF */
2688 int res
= fts5BufferCompare(&p1
->term
, &p2
->term
);
2693 if( p1
->iRowid
==p2
->iRowid
){
2694 p1
->bDel
= p2
->bDel
;
2697 res
= ((p1
->iRowid
> p2
->iRowid
)==pIter
->bRev
) ? -1 : +1;
2707 pRes
->iFirst
= (u16
)iRes
;
2712 ** Move the seg-iter so that it points to the first rowid on page iLeafPgno.
2713 ** It is an error if leaf iLeafPgno does not exist. Unless the db is
2714 ** a 'secure-delete' db, if it contains no rowids then this is also an error.
2716 static void fts5SegIterGotoPage(
2717 Fts5Index
*p
, /* FTS5 backend object */
2718 Fts5SegIter
*pIter
, /* Iterator to advance */
2721 assert( iLeafPgno
>pIter
->iLeafPgno
);
2723 if( iLeafPgno
>pIter
->pSeg
->pgnoLast
){
2724 p
->rc
= FTS5_CORRUPT
;
2726 fts5DataRelease(pIter
->pNextLeaf
);
2727 pIter
->pNextLeaf
= 0;
2728 pIter
->iLeafPgno
= iLeafPgno
-1;
2730 while( p
->rc
==SQLITE_OK
){
2732 fts5SegIterNextPage(p
, pIter
);
2733 if( pIter
->pLeaf
==0 ) break;
2734 iOff
= fts5LeafFirstRowidOff(pIter
->pLeaf
);
2736 u8
*a
= pIter
->pLeaf
->p
;
2737 int n
= pIter
->pLeaf
->szLeaf
;
2738 if( iOff
<4 || iOff
>=n
){
2739 p
->rc
= FTS5_CORRUPT
;
2741 iOff
+= fts5GetVarint(&a
[iOff
], (u64
*)&pIter
->iRowid
);
2742 pIter
->iLeafOffset
= iOff
;
2743 fts5SegIterLoadNPos(p
, pIter
);
2752 ** Advance the iterator passed as the second argument until it is at or
2753 ** past rowid iFrom. Regardless of the value of iFrom, the iterator is
2754 ** always advanced at least once.
2756 static void fts5SegIterNextFrom(
2757 Fts5Index
*p
, /* FTS5 backend object */
2758 Fts5SegIter
*pIter
, /* Iterator to advance */
2759 i64 iMatch
/* Advance iterator at least this far */
2761 int bRev
= (pIter
->flags
& FTS5_SEGITER_REVERSE
);
2762 Fts5DlidxIter
*pDlidx
= pIter
->pDlidx
;
2763 int iLeafPgno
= pIter
->iLeafPgno
;
2766 assert( pIter
->flags
& FTS5_SEGITER_ONETERM
);
2767 assert( pIter
->pDlidx
);
2768 assert( pIter
->pLeaf
);
2771 while( !fts5DlidxIterEof(p
, pDlidx
) && iMatch
>fts5DlidxIterRowid(pDlidx
) ){
2772 iLeafPgno
= fts5DlidxIterPgno(pDlidx
);
2773 fts5DlidxIterNext(p
, pDlidx
);
2775 assert_nc( iLeafPgno
>=pIter
->iLeafPgno
|| p
->rc
);
2776 if( iLeafPgno
>pIter
->iLeafPgno
){
2777 fts5SegIterGotoPage(p
, pIter
, iLeafPgno
);
2781 assert( pIter
->pNextLeaf
==0 );
2782 assert( iMatch
<pIter
->iRowid
);
2783 while( !fts5DlidxIterEof(p
, pDlidx
) && iMatch
<fts5DlidxIterRowid(pDlidx
) ){
2784 fts5DlidxIterPrev(p
, pDlidx
);
2786 iLeafPgno
= fts5DlidxIterPgno(pDlidx
);
2788 assert( fts5DlidxIterEof(p
, pDlidx
) || iLeafPgno
<=pIter
->iLeafPgno
);
2790 if( iLeafPgno
<pIter
->iLeafPgno
){
2791 pIter
->iLeafPgno
= iLeafPgno
+1;
2792 fts5SegIterReverseNewPage(p
, pIter
);
2798 if( bMove
&& p
->rc
==SQLITE_OK
) pIter
->xNext(p
, pIter
, 0);
2799 if( pIter
->pLeaf
==0 ) break;
2800 if( bRev
==0 && pIter
->iRowid
>=iMatch
) break;
2801 if( bRev
!=0 && pIter
->iRowid
<=iMatch
) break;
2803 }while( p
->rc
==SQLITE_OK
);
2808 ** Free the iterator object passed as the second argument.
2810 static void fts5MultiIterFree(Fts5Iter
*pIter
){
2813 for(i
=0; i
<pIter
->nSeg
; i
++){
2814 fts5SegIterClear(&pIter
->aSeg
[i
]);
2816 fts5BufferFree(&pIter
->poslist
);
2817 sqlite3_free(pIter
);
2821 static void fts5MultiIterAdvanced(
2822 Fts5Index
*p
, /* FTS5 backend to iterate within */
2823 Fts5Iter
*pIter
, /* Iterator to update aFirst[] array for */
2824 int iChanged
, /* Index of sub-iterator just advanced */
2825 int iMinset
/* Minimum entry in aFirst[] to set */
2828 for(i
=(pIter
->nSeg
+iChanged
)/2; i
>=iMinset
&& p
->rc
==SQLITE_OK
; i
=i
/2){
2830 if( (iEq
= fts5MultiIterDoCompare(pIter
, i
)) ){
2831 Fts5SegIter
*pSeg
= &pIter
->aSeg
[iEq
];
2832 assert( p
->rc
==SQLITE_OK
);
2833 pSeg
->xNext(p
, pSeg
, 0);
2834 i
= pIter
->nSeg
+ iEq
;
2840 ** Sub-iterator iChanged of iterator pIter has just been advanced. It still
2841 ** points to the same term though - just a different rowid. This function
2842 ** attempts to update the contents of the pIter->aFirst[] accordingly.
2843 ** If it does so successfully, 0 is returned. Otherwise 1.
2845 ** If non-zero is returned, the caller should call fts5MultiIterAdvanced()
2846 ** on the iterator instead. That function does the same as this one, except
2847 ** that it deals with more complicated cases as well.
2849 static int fts5MultiIterAdvanceRowid(
2850 Fts5Iter
*pIter
, /* Iterator to update aFirst[] array for */
2851 int iChanged
, /* Index of sub-iterator just advanced */
2852 Fts5SegIter
**ppFirst
2854 Fts5SegIter
*pNew
= &pIter
->aSeg
[iChanged
];
2856 if( pNew
->iRowid
==pIter
->iSwitchRowid
2857 || (pNew
->iRowid
<pIter
->iSwitchRowid
)==pIter
->bRev
2860 Fts5SegIter
*pOther
= &pIter
->aSeg
[iChanged
^ 0x0001];
2861 pIter
->iSwitchRowid
= pIter
->bRev
? SMALLEST_INT64
: LARGEST_INT64
;
2862 for(i
=(pIter
->nSeg
+iChanged
)/2; 1; i
=i
/2){
2863 Fts5CResult
*pRes
= &pIter
->aFirst
[i
];
2865 assert( pNew
->pLeaf
);
2866 assert( pRes
->bTermEq
==0 || pOther
->pLeaf
);
2868 if( pRes
->bTermEq
){
2869 if( pNew
->iRowid
==pOther
->iRowid
){
2871 }else if( (pOther
->iRowid
>pNew
->iRowid
)==pIter
->bRev
){
2872 pIter
->iSwitchRowid
= pOther
->iRowid
;
2874 }else if( (pOther
->iRowid
>pIter
->iSwitchRowid
)==pIter
->bRev
){
2875 pIter
->iSwitchRowid
= pOther
->iRowid
;
2878 pRes
->iFirst
= (u16
)(pNew
- pIter
->aSeg
);
2881 pOther
= &pIter
->aSeg
[ pIter
->aFirst
[i
^ 0x0001].iFirst
];
2890 ** Set the pIter->bEof variable based on the state of the sub-iterators.
2892 static void fts5MultiIterSetEof(Fts5Iter
*pIter
){
2893 Fts5SegIter
*pSeg
= &pIter
->aSeg
[ pIter
->aFirst
[1].iFirst
];
2894 pIter
->base
.bEof
= pSeg
->pLeaf
==0;
2895 pIter
->iSwitchRowid
= pSeg
->iRowid
;
2899 ** Move the iterator to the next entry.
2901 ** If an error occurs, an error code is left in Fts5Index.rc. It is not
2902 ** considered an error if the iterator reaches EOF, or if it is already at
2903 ** EOF when this function is called.
2905 static void fts5MultiIterNext(
2908 int bFrom
, /* True if argument iFrom is valid */
2909 i64 iFrom
/* Advance at least as far as this */
2911 int bUseFrom
= bFrom
;
2912 assert( pIter
->base
.bEof
==0 );
2913 while( p
->rc
==SQLITE_OK
){
2914 int iFirst
= pIter
->aFirst
[1].iFirst
;
2916 Fts5SegIter
*pSeg
= &pIter
->aSeg
[iFirst
];
2917 assert( p
->rc
==SQLITE_OK
);
2918 if( bUseFrom
&& pSeg
->pDlidx
){
2919 fts5SegIterNextFrom(p
, pSeg
, iFrom
);
2921 pSeg
->xNext(p
, pSeg
, &bNewTerm
);
2924 if( pSeg
->pLeaf
==0 || bNewTerm
2925 || fts5MultiIterAdvanceRowid(pIter
, iFirst
, &pSeg
)
2927 fts5MultiIterAdvanced(p
, pIter
, iFirst
, 1);
2928 fts5MultiIterSetEof(pIter
);
2929 pSeg
= &pIter
->aSeg
[pIter
->aFirst
[1].iFirst
];
2930 if( pSeg
->pLeaf
==0 ) return;
2933 fts5AssertMultiIterSetup(p
, pIter
);
2934 assert( pSeg
==&pIter
->aSeg
[pIter
->aFirst
[1].iFirst
] && pSeg
->pLeaf
);
2935 if( pIter
->bSkipEmpty
==0 || pSeg
->nPos
){
2936 pIter
->xSetOutputs(pIter
, pSeg
);
2943 static void fts5MultiIterNext2(
2946 int *pbNewTerm
/* OUT: True if *might* be new term */
2948 assert( pIter
->bSkipEmpty
);
2949 if( p
->rc
==SQLITE_OK
){
2952 int iFirst
= pIter
->aFirst
[1].iFirst
;
2953 Fts5SegIter
*pSeg
= &pIter
->aSeg
[iFirst
];
2956 assert( p
->rc
==SQLITE_OK
);
2957 pSeg
->xNext(p
, pSeg
, &bNewTerm
);
2958 if( pSeg
->pLeaf
==0 || bNewTerm
2959 || fts5MultiIterAdvanceRowid(pIter
, iFirst
, &pSeg
)
2961 fts5MultiIterAdvanced(p
, pIter
, iFirst
, 1);
2962 fts5MultiIterSetEof(pIter
);
2965 fts5AssertMultiIterSetup(p
, pIter
);
2967 }while( fts5MultiIterIsEmpty(p
, pIter
) );
2971 static void fts5IterSetOutputs_Noop(Fts5Iter
*pUnused1
, Fts5SegIter
*pUnused2
){
2972 UNUSED_PARAM2(pUnused1
, pUnused2
);
2975 static Fts5Iter
*fts5MultiIterAlloc(
2976 Fts5Index
*p
, /* FTS5 backend to iterate within */
2980 int nSlot
; /* Power of two >= nSeg */
2982 for(nSlot
=2; nSlot
<nSeg
; nSlot
=nSlot
*2);
2983 pNew
= fts5IdxMalloc(p
,
2984 sizeof(Fts5Iter
) + /* pNew */
2985 sizeof(Fts5SegIter
) * (nSlot
-1) + /* pNew->aSeg[] */
2986 sizeof(Fts5CResult
) * nSlot
/* pNew->aFirst[] */
2990 pNew
->aFirst
= (Fts5CResult
*)&pNew
->aSeg
[nSlot
];
2992 pNew
->xSetOutputs
= fts5IterSetOutputs_Noop
;
2997 static void fts5PoslistCallback(
3000 const u8
*pChunk
, int nChunk
3002 UNUSED_PARAM(pUnused
);
3003 assert_nc( nChunk
>=0 );
3005 fts5BufferSafeAppendBlob((Fts5Buffer
*)pContext
, pChunk
, nChunk
);
3009 typedef struct PoslistCallbackCtx PoslistCallbackCtx
;
3010 struct PoslistCallbackCtx
{
3011 Fts5Buffer
*pBuf
; /* Append to this buffer */
3012 Fts5Colset
*pColset
; /* Restrict matches to this column */
3013 int eState
; /* See above */
3016 typedef struct PoslistOffsetsCtx PoslistOffsetsCtx
;
3017 struct PoslistOffsetsCtx
{
3018 Fts5Buffer
*pBuf
; /* Append to this buffer */
3019 Fts5Colset
*pColset
; /* Restrict matches to this column */
3025 ** TODO: Make this more efficient!
3027 static int fts5IndexColsetTest(Fts5Colset
*pColset
, int iCol
){
3029 for(i
=0; i
<pColset
->nCol
; i
++){
3030 if( pColset
->aiCol
[i
]==iCol
) return 1;
3035 static void fts5PoslistOffsetsCallback(
3038 const u8
*pChunk
, int nChunk
3040 PoslistOffsetsCtx
*pCtx
= (PoslistOffsetsCtx
*)pContext
;
3041 UNUSED_PARAM(pUnused
);
3042 assert_nc( nChunk
>=0 );
3047 i
+= fts5GetVarint32(&pChunk
[i
], iVal
);
3048 iVal
+= pCtx
->iRead
- 2;
3050 if( fts5IndexColsetTest(pCtx
->pColset
, iVal
) ){
3051 fts5BufferSafeAppendVarint(pCtx
->pBuf
, iVal
+ 2 - pCtx
->iWrite
);
3052 pCtx
->iWrite
= iVal
;
3058 static void fts5PoslistFilterCallback(
3061 const u8
*pChunk
, int nChunk
3063 PoslistCallbackCtx
*pCtx
= (PoslistCallbackCtx
*)pContext
;
3064 UNUSED_PARAM(pUnused
);
3065 assert_nc( nChunk
>=0 );
3067 /* Search through to find the first varint with value 1. This is the
3068 ** start of the next columns hits. */
3072 if( pCtx
->eState
==2 ){
3074 fts5FastGetVarint32(pChunk
, i
, iCol
);
3075 if( fts5IndexColsetTest(pCtx
->pColset
, iCol
) ){
3077 fts5BufferSafeAppendVarint(pCtx
->pBuf
, 1);
3084 while( i
<nChunk
&& pChunk
[i
]!=0x01 ){
3085 while( pChunk
[i
] & 0x80 ) i
++;
3089 fts5BufferSafeAppendBlob(pCtx
->pBuf
, &pChunk
[iStart
], i
-iStart
);
3098 fts5FastGetVarint32(pChunk
, i
, iCol
);
3099 pCtx
->eState
= fts5IndexColsetTest(pCtx
->pColset
, iCol
);
3101 fts5BufferSafeAppendBlob(pCtx
->pBuf
, &pChunk
[iStart
], i
-iStart
);
3110 static void fts5ChunkIterate(
3111 Fts5Index
*p
, /* Index object */
3112 Fts5SegIter
*pSeg
, /* Poslist of this iterator */
3113 void *pCtx
, /* Context pointer for xChunk callback */
3114 void (*xChunk
)(Fts5Index
*, void*, const u8
*, int)
3116 int nRem
= pSeg
->nPos
; /* Number of bytes still to come */
3117 Fts5Data
*pData
= 0;
3118 u8
*pChunk
= &pSeg
->pLeaf
->p
[pSeg
->iLeafOffset
];
3119 int nChunk
= MIN(nRem
, pSeg
->pLeaf
->szLeaf
- pSeg
->iLeafOffset
);
3120 int pgno
= pSeg
->iLeafPgno
;
3123 /* This function does not work with detail=none databases. */
3124 assert( p
->pConfig
->eDetail
!=FTS5_DETAIL_NONE
);
3126 if( (pSeg
->flags
& FTS5_SEGITER_REVERSE
)==0 ){
3131 xChunk(p
, pCtx
, pChunk
, nChunk
);
3133 fts5DataRelease(pData
);
3136 }else if( pSeg
->pSeg
==0 ){
3137 p
->rc
= FTS5_CORRUPT
;
3141 pData
= fts5LeafRead(p
, FTS5_SEGMENT_ROWID(pSeg
->pSeg
->iSegid
, pgno
));
3142 if( pData
==0 ) break;
3143 pChunk
= &pData
->p
[4];
3144 nChunk
= MIN(nRem
, pData
->szLeaf
- 4);
3145 if( pgno
==pgnoSave
){
3146 assert( pSeg
->pNextLeaf
==0 );
3147 pSeg
->pNextLeaf
= pData
;
3155 ** Iterator pIter currently points to a valid entry (not EOF). This
3156 ** function appends the position list data for the current entry to
3157 ** buffer pBuf. It does not make a copy of the position-list size
3160 static void fts5SegiterPoslist(
3163 Fts5Colset
*pColset
,
3168 if( 0==fts5BufferGrow(&p
->rc
, pBuf
, pSeg
->nPos
+FTS5_DATA_ZERO_PADDING
) ){
3169 assert( pBuf
->p
!=0 );
3170 assert( pBuf
->nSpace
>= pBuf
->n
+pSeg
->nPos
+FTS5_DATA_ZERO_PADDING
);
3171 memset(&pBuf
->p
[pBuf
->n
+pSeg
->nPos
], 0, FTS5_DATA_ZERO_PADDING
);
3173 fts5ChunkIterate(p
, pSeg
, (void*)pBuf
, fts5PoslistCallback
);
3175 if( p
->pConfig
->eDetail
==FTS5_DETAIL_FULL
){
3176 PoslistCallbackCtx sCtx
;
3178 sCtx
.pColset
= pColset
;
3179 sCtx
.eState
= fts5IndexColsetTest(pColset
, 0);
3180 assert( sCtx
.eState
==0 || sCtx
.eState
==1 );
3181 fts5ChunkIterate(p
, pSeg
, (void*)&sCtx
, fts5PoslistFilterCallback
);
3183 PoslistOffsetsCtx sCtx
;
3184 memset(&sCtx
, 0, sizeof(sCtx
));
3186 sCtx
.pColset
= pColset
;
3187 fts5ChunkIterate(p
, pSeg
, (void*)&sCtx
, fts5PoslistOffsetsCallback
);
3194 ** Parameter pPos points to a buffer containing a position list, size nPos.
3195 ** This function filters it according to pColset (which must be non-NULL)
3196 ** and sets pIter->base.pData/nData to point to the new position list.
3197 ** If memory is required for the new position list, use buffer pIter->poslist.
3198 ** Or, if the new position list is a contiguous subset of the input, set
3199 ** pIter->base.pData/nData to point directly to it.
3201 ** This function is a no-op if *pRc is other than SQLITE_OK when it is
3202 ** called. If an OOM error is encountered, *pRc is set to SQLITE_NOMEM
3203 ** before returning.
3205 static void fts5IndexExtractColset(
3207 Fts5Colset
*pColset
, /* Colset to filter on */
3208 const u8
*pPos
, int nPos
, /* Position list */
3211 if( *pRc
==SQLITE_OK
){
3213 const u8
*aCopy
= p
;
3214 const u8
*pEnd
= &p
[nPos
]; /* One byte past end of position list */
3218 if( pColset
->nCol
>1 && sqlite3Fts5BufferSize(pRc
, &pIter
->poslist
, nPos
) ){
3223 while( pColset
->aiCol
[i
]<iCurrent
){
3225 if( i
==pColset
->nCol
){
3226 pIter
->base
.pData
= pIter
->poslist
.p
;
3227 pIter
->base
.nData
= pIter
->poslist
.n
;
3232 /* Advance pointer p until it points to pEnd or an 0x01 byte that is
3233 ** not part of a varint */
3234 while( p
<pEnd
&& *p
!=0x01 ){
3235 while( *p
++ & 0x80 );
3238 if( pColset
->aiCol
[i
]==iCurrent
){
3239 if( pColset
->nCol
==1 ){
3240 pIter
->base
.pData
= aCopy
;
3241 pIter
->base
.nData
= p
-aCopy
;
3244 fts5BufferSafeAppendBlob(&pIter
->poslist
, aCopy
, p
-aCopy
);
3247 pIter
->base
.pData
= pIter
->poslist
.p
;
3248 pIter
->base
.nData
= pIter
->poslist
.n
;
3253 if( iCurrent
& 0x80 ){
3255 p
+= fts5GetVarint32(p
, iCurrent
);
3263 ** xSetOutputs callback used by detail=none tables.
3265 static void fts5IterSetOutputs_None(Fts5Iter
*pIter
, Fts5SegIter
*pSeg
){
3266 assert( pIter
->pIndex
->pConfig
->eDetail
==FTS5_DETAIL_NONE
);
3267 pIter
->base
.iRowid
= pSeg
->iRowid
;
3268 pIter
->base
.nData
= pSeg
->nPos
;
3272 ** xSetOutputs callback used by detail=full and detail=col tables when no
3273 ** column filters are specified.
3275 static void fts5IterSetOutputs_Nocolset(Fts5Iter
*pIter
, Fts5SegIter
*pSeg
){
3276 pIter
->base
.iRowid
= pSeg
->iRowid
;
3277 pIter
->base
.nData
= pSeg
->nPos
;
3279 assert( pIter
->pIndex
->pConfig
->eDetail
!=FTS5_DETAIL_NONE
);
3280 assert( pIter
->pColset
==0 );
3282 if( pSeg
->iLeafOffset
+pSeg
->nPos
<=pSeg
->pLeaf
->szLeaf
){
3283 /* All data is stored on the current page. Populate the output
3284 ** variables to point into the body of the page object. */
3285 pIter
->base
.pData
= &pSeg
->pLeaf
->p
[pSeg
->iLeafOffset
];
3287 /* The data is distributed over two or more pages. Copy it into the
3288 ** Fts5Iter.poslist buffer and then set the output pointer to point
3289 ** to this buffer. */
3290 fts5BufferZero(&pIter
->poslist
);
3291 fts5SegiterPoslist(pIter
->pIndex
, pSeg
, 0, &pIter
->poslist
);
3292 pIter
->base
.pData
= pIter
->poslist
.p
;
3297 ** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match
3298 ** against no columns at all).
3300 static void fts5IterSetOutputs_ZeroColset(Fts5Iter
*pIter
, Fts5SegIter
*pSeg
){
3302 pIter
->base
.nData
= 0;
3306 ** xSetOutputs callback used by detail=col when there is a column filter
3307 ** and there are 100 or more columns. Also called as a fallback from
3308 ** fts5IterSetOutputs_Col100 if the column-list spans more than one page.
3310 static void fts5IterSetOutputs_Col(Fts5Iter
*pIter
, Fts5SegIter
*pSeg
){
3311 fts5BufferZero(&pIter
->poslist
);
3312 fts5SegiterPoslist(pIter
->pIndex
, pSeg
, pIter
->pColset
, &pIter
->poslist
);
3313 pIter
->base
.iRowid
= pSeg
->iRowid
;
3314 pIter
->base
.pData
= pIter
->poslist
.p
;
3315 pIter
->base
.nData
= pIter
->poslist
.n
;
3319 ** xSetOutputs callback used when:
3322 ** * there is a column filter, and
3323 ** * the table contains 100 or fewer columns.
3325 ** The last point is to ensure all column numbers are stored as
3326 ** single-byte varints.
3328 static void fts5IterSetOutputs_Col100(Fts5Iter
*pIter
, Fts5SegIter
*pSeg
){
3330 assert( pIter
->pIndex
->pConfig
->eDetail
==FTS5_DETAIL_COLUMNS
);
3331 assert( pIter
->pColset
);
3333 if( pSeg
->iLeafOffset
+pSeg
->nPos
>pSeg
->pLeaf
->szLeaf
){
3334 fts5IterSetOutputs_Col(pIter
, pSeg
);
3336 u8
*a
= (u8
*)&pSeg
->pLeaf
->p
[pSeg
->iLeafOffset
];
3337 u8
*pEnd
= (u8
*)&a
[pSeg
->nPos
];
3339 int *aiCol
= pIter
->pColset
->aiCol
;
3340 int *aiColEnd
= &aiCol
[pIter
->pColset
->nCol
];
3342 u8
*aOut
= pIter
->poslist
.p
;
3345 pIter
->base
.iRowid
= pSeg
->iRowid
;
3348 iPrev
+= (int)a
++[0] - 2;
3349 while( *aiCol
<iPrev
){
3351 if( aiCol
==aiColEnd
) goto setoutputs_col_out
;
3353 if( *aiCol
==iPrev
){
3354 *aOut
++ = (u8
)((iPrev
- iPrevOut
) + 2);
3360 pIter
->base
.pData
= pIter
->poslist
.p
;
3361 pIter
->base
.nData
= aOut
- pIter
->poslist
.p
;
3366 ** xSetOutputs callback used by detail=full when there is a column filter.
3368 static void fts5IterSetOutputs_Full(Fts5Iter
*pIter
, Fts5SegIter
*pSeg
){
3369 Fts5Colset
*pColset
= pIter
->pColset
;
3370 pIter
->base
.iRowid
= pSeg
->iRowid
;
3372 assert( pIter
->pIndex
->pConfig
->eDetail
==FTS5_DETAIL_FULL
);
3375 if( pSeg
->iLeafOffset
+pSeg
->nPos
<=pSeg
->pLeaf
->szLeaf
){
3376 /* All data is stored on the current page. Populate the output
3377 ** variables to point into the body of the page object. */
3378 const u8
*a
= &pSeg
->pLeaf
->p
[pSeg
->iLeafOffset
];
3379 int *pRc
= &pIter
->pIndex
->rc
;
3380 fts5BufferZero(&pIter
->poslist
);
3381 fts5IndexExtractColset(pRc
, pColset
, a
, pSeg
->nPos
, pIter
);
3383 /* The data is distributed over two or more pages. Copy it into the
3384 ** Fts5Iter.poslist buffer and then set the output pointer to point
3385 ** to this buffer. */
3386 fts5BufferZero(&pIter
->poslist
);
3387 fts5SegiterPoslist(pIter
->pIndex
, pSeg
, pColset
, &pIter
->poslist
);
3388 pIter
->base
.pData
= pIter
->poslist
.p
;
3389 pIter
->base
.nData
= pIter
->poslist
.n
;
3393 static void fts5IterSetOutputCb(int *pRc
, Fts5Iter
*pIter
){
3394 assert( pIter
!=0 || (*pRc
)!=SQLITE_OK
);
3395 if( *pRc
==SQLITE_OK
){
3396 Fts5Config
*pConfig
= pIter
->pIndex
->pConfig
;
3397 if( pConfig
->eDetail
==FTS5_DETAIL_NONE
){
3398 pIter
->xSetOutputs
= fts5IterSetOutputs_None
;
3401 else if( pIter
->pColset
==0 ){
3402 pIter
->xSetOutputs
= fts5IterSetOutputs_Nocolset
;
3405 else if( pIter
->pColset
->nCol
==0 ){
3406 pIter
->xSetOutputs
= fts5IterSetOutputs_ZeroColset
;
3409 else if( pConfig
->eDetail
==FTS5_DETAIL_FULL
){
3410 pIter
->xSetOutputs
= fts5IterSetOutputs_Full
;
3414 assert( pConfig
->eDetail
==FTS5_DETAIL_COLUMNS
);
3415 if( pConfig
->nCol
<=100 ){
3416 pIter
->xSetOutputs
= fts5IterSetOutputs_Col100
;
3417 sqlite3Fts5BufferSize(pRc
, &pIter
->poslist
, pConfig
->nCol
);
3419 pIter
->xSetOutputs
= fts5IterSetOutputs_Col
;
3427 ** Allocate a new Fts5Iter object.
3429 ** The new object will be used to iterate through data in structure pStruct.
3430 ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel
3431 ** is zero or greater, data from the first nSegment segments on level iLevel
3434 ** The iterator initially points to the first term/rowid entry in the
3437 static void fts5MultiIterNew(
3438 Fts5Index
*p
, /* FTS5 backend to iterate within */
3439 Fts5Structure
*pStruct
, /* Structure of specific index */
3440 int flags
, /* FTS5INDEX_QUERY_XXX flags */
3441 Fts5Colset
*pColset
, /* Colset to filter on (or NULL) */
3442 const u8
*pTerm
, int nTerm
, /* Term to seek to (or NULL/0) */
3443 int iLevel
, /* Level to iterate (-1 for all) */
3444 int nSegment
, /* Number of segments to merge (iLevel>=0) */
3445 Fts5Iter
**ppOut
/* New object */
3447 int nSeg
= 0; /* Number of segment-iters in use */
3448 int iIter
= 0; /* */
3449 int iSeg
; /* Used to iterate through segments */
3450 Fts5StructureLevel
*pLvl
;
3453 assert( (pTerm
==0 && nTerm
==0) || iLevel
<0 );
3455 /* Allocate space for the new multi-seg-iterator. */
3456 if( p
->rc
==SQLITE_OK
){
3458 assert( pStruct
->nSegment
==fts5StructureCountSegments(pStruct
) );
3459 nSeg
= pStruct
->nSegment
;
3460 nSeg
+= (p
->pHash
&& 0==(flags
& FTS5INDEX_QUERY_SKIPHASH
));
3462 nSeg
= MIN(pStruct
->aLevel
[iLevel
].nSeg
, nSegment
);
3465 *ppOut
= pNew
= fts5MultiIterAlloc(p
, nSeg
);
3467 assert( p
->rc
!=SQLITE_OK
);
3468 goto fts5MultiIterNew_post_check
;
3470 pNew
->bRev
= (0!=(flags
& FTS5INDEX_QUERY_DESC
));
3471 pNew
->bSkipEmpty
= (0!=(flags
& FTS5INDEX_QUERY_SKIPEMPTY
));
3472 pNew
->pColset
= pColset
;
3473 if( (flags
& FTS5INDEX_QUERY_NOOUTPUT
)==0 ){
3474 fts5IterSetOutputCb(&p
->rc
, pNew
);
3477 /* Initialize each of the component segment iterators. */
3478 if( p
->rc
==SQLITE_OK
){
3480 Fts5StructureLevel
*pEnd
= &pStruct
->aLevel
[pStruct
->nLevel
];
3481 if( p
->pHash
&& 0==(flags
& FTS5INDEX_QUERY_SKIPHASH
) ){
3482 /* Add a segment iterator for the current contents of the hash table. */
3483 Fts5SegIter
*pIter
= &pNew
->aSeg
[iIter
++];
3484 fts5SegIterHashInit(p
, pTerm
, nTerm
, flags
, pIter
);
3486 for(pLvl
=&pStruct
->aLevel
[0]; pLvl
<pEnd
; pLvl
++){
3487 for(iSeg
=pLvl
->nSeg
-1; iSeg
>=0; iSeg
--){
3488 Fts5StructureSegment
*pSeg
= &pLvl
->aSeg
[iSeg
];
3489 Fts5SegIter
*pIter
= &pNew
->aSeg
[iIter
++];
3491 fts5SegIterInit(p
, pSeg
, pIter
);
3493 fts5SegIterSeekInit(p
, pTerm
, nTerm
, flags
, pSeg
, pIter
);
3498 pLvl
= &pStruct
->aLevel
[iLevel
];
3499 for(iSeg
=nSeg
-1; iSeg
>=0; iSeg
--){
3500 fts5SegIterInit(p
, &pLvl
->aSeg
[iSeg
], &pNew
->aSeg
[iIter
++]);
3503 assert( iIter
==nSeg
);
3506 /* If the above was successful, each component iterators now points
3507 ** to the first entry in its segment. In this case initialize the
3508 ** aFirst[] array. Or, if an error has occurred, free the iterator
3509 ** object and set the output variable to NULL. */
3510 if( p
->rc
==SQLITE_OK
){
3511 for(iIter
=pNew
->nSeg
-1; iIter
>0; iIter
--){
3513 if( (iEq
= fts5MultiIterDoCompare(pNew
, iIter
)) ){
3514 Fts5SegIter
*pSeg
= &pNew
->aSeg
[iEq
];
3515 if( p
->rc
==SQLITE_OK
) pSeg
->xNext(p
, pSeg
, 0);
3516 fts5MultiIterAdvanced(p
, pNew
, iEq
, iIter
);
3519 fts5MultiIterSetEof(pNew
);
3520 fts5AssertMultiIterSetup(p
, pNew
);
3522 if( pNew
->bSkipEmpty
&& fts5MultiIterIsEmpty(p
, pNew
) ){
3523 fts5MultiIterNext(p
, pNew
, 0, 0);
3524 }else if( pNew
->base
.bEof
==0 ){
3525 Fts5SegIter
*pSeg
= &pNew
->aSeg
[pNew
->aFirst
[1].iFirst
];
3526 pNew
->xSetOutputs(pNew
, pSeg
);
3530 fts5MultiIterFree(pNew
);
3534 fts5MultiIterNew_post_check
:
3535 assert( (*ppOut
)!=0 || p
->rc
!=SQLITE_OK
);
3540 ** Create an Fts5Iter that iterates through the doclist provided
3541 ** as the second argument.
3543 static void fts5MultiIterNew2(
3544 Fts5Index
*p
, /* FTS5 backend to iterate within */
3545 Fts5Data
*pData
, /* Doclist to iterate through */
3546 int bDesc
, /* True for descending rowid order */
3547 Fts5Iter
**ppOut
/* New object */
3550 pNew
= fts5MultiIterAlloc(p
, 2);
3552 Fts5SegIter
*pIter
= &pNew
->aSeg
[1];
3554 pIter
->flags
= FTS5_SEGITER_ONETERM
;
3555 if( pData
->szLeaf
>0 ){
3556 pIter
->pLeaf
= pData
;
3557 pIter
->iLeafOffset
= fts5GetVarint(pData
->p
, (u64
*)&pIter
->iRowid
);
3558 pIter
->iEndofDoclist
= pData
->nn
;
3559 pNew
->aFirst
[1].iFirst
= 1;
3562 pIter
->flags
|= FTS5_SEGITER_REVERSE
;
3563 fts5SegIterReverseInitPage(p
, pIter
);
3565 fts5SegIterLoadNPos(p
, pIter
);
3569 pNew
->base
.bEof
= 1;
3571 fts5SegIterSetNext(p
, pIter
);
3576 fts5DataRelease(pData
);
3580 ** Return true if the iterator is at EOF or if an error has occurred.
3583 static int fts5MultiIterEof(Fts5Index
*p
, Fts5Iter
*pIter
){
3584 assert( pIter
!=0 || p
->rc
!=SQLITE_OK
);
3585 assert( p
->rc
!=SQLITE_OK
3586 || (pIter
->aSeg
[ pIter
->aFirst
[1].iFirst
].pLeaf
==0)==pIter
->base
.bEof
3588 return (p
->rc
|| pIter
->base
.bEof
);
3592 ** Return the rowid of the entry that the iterator currently points
3593 ** to. If the iterator points to EOF when this function is called the
3594 ** results are undefined.
3596 static i64
fts5MultiIterRowid(Fts5Iter
*pIter
){
3597 assert( pIter
->aSeg
[ pIter
->aFirst
[1].iFirst
].pLeaf
);
3598 return pIter
->aSeg
[ pIter
->aFirst
[1].iFirst
].iRowid
;
3602 ** Move the iterator to the next entry at or following iMatch.
3604 static void fts5MultiIterNextFrom(
3611 fts5MultiIterNext(p
, pIter
, 1, iMatch
);
3612 if( fts5MultiIterEof(p
, pIter
) ) break;
3613 iRowid
= fts5MultiIterRowid(pIter
);
3614 if( pIter
->bRev
==0 && iRowid
>=iMatch
) break;
3615 if( pIter
->bRev
!=0 && iRowid
<=iMatch
) break;
3620 ** Return a pointer to a buffer containing the term associated with the
3621 ** entry that the iterator currently points to.
3623 static const u8
*fts5MultiIterTerm(Fts5Iter
*pIter
, int *pn
){
3624 Fts5SegIter
*p
= &pIter
->aSeg
[ pIter
->aFirst
[1].iFirst
];
3630 ** Allocate a new segment-id for the structure pStruct. The new segment
3631 ** id must be between 1 and 65335 inclusive, and must not be used by
3632 ** any currently existing segment. If a free segment id cannot be found,
3633 ** SQLITE_FULL is returned.
3635 ** If an error has already occurred, this function is a no-op. 0 is
3636 ** returned in this case.
3638 static int fts5AllocateSegid(Fts5Index
*p
, Fts5Structure
*pStruct
){
3641 if( p
->rc
==SQLITE_OK
){
3642 if( pStruct
->nSegment
>=FTS5_MAX_SEGMENT
){
3643 p
->rc
= SQLITE_FULL
;
3645 /* FTS5_MAX_SEGMENT is currently defined as 2000. So the following
3646 ** array is 63 elements, or 252 bytes, in size. */
3647 u32 aUsed
[(FTS5_MAX_SEGMENT
+31) / 32];
3651 memset(aUsed
, 0, sizeof(aUsed
));
3652 for(iLvl
=0; iLvl
<pStruct
->nLevel
; iLvl
++){
3653 for(iSeg
=0; iSeg
<pStruct
->aLevel
[iLvl
].nSeg
; iSeg
++){
3654 int iId
= pStruct
->aLevel
[iLvl
].aSeg
[iSeg
].iSegid
;
3655 if( iId
<=FTS5_MAX_SEGMENT
&& iId
>0 ){
3656 aUsed
[(iId
-1) / 32] |= (u32
)1 << ((iId
-1) % 32);
3661 for(i
=0; aUsed
[i
]==0xFFFFFFFF; i
++);
3663 for(iSegid
=0; mask
& ((u32
)1 << iSegid
); iSegid
++);
3667 for(iLvl
=0; iLvl
<pStruct
->nLevel
; iLvl
++){
3668 for(iSeg
=0; iSeg
<pStruct
->aLevel
[iLvl
].nSeg
; iSeg
++){
3669 assert_nc( iSegid
!=pStruct
->aLevel
[iLvl
].aSeg
[iSeg
].iSegid
);
3672 assert_nc( iSegid
>0 && iSegid
<=FTS5_MAX_SEGMENT
);
3675 sqlite3_stmt
*pIdxSelect
= fts5IdxSelectStmt(p
);
3676 if( p
->rc
==SQLITE_OK
){
3677 u8 aBlob
[2] = {0xff, 0xff};
3678 sqlite3_bind_int(pIdxSelect
, 1, iSegid
);
3679 sqlite3_bind_blob(pIdxSelect
, 2, aBlob
, 2, SQLITE_STATIC
);
3680 assert_nc( sqlite3_step(pIdxSelect
)!=SQLITE_ROW
);
3681 p
->rc
= sqlite3_reset(pIdxSelect
);
3682 sqlite3_bind_null(pIdxSelect
, 2);
3693 ** Discard all data currently cached in the hash-tables.
3695 static void fts5IndexDiscardData(Fts5Index
*p
){
3696 assert( p
->pHash
|| p
->nPendingData
==0 );
3698 sqlite3Fts5HashClear(p
->pHash
);
3699 p
->nPendingData
= 0;
3704 ** Return the size of the prefix, in bytes, that buffer
3705 ** (pNew/<length-unknown>) shares with buffer (pOld/nOld).
3707 ** Buffer (pNew/<length-unknown>) is guaranteed to be greater
3708 ** than buffer (pOld/nOld).
3710 static int fts5PrefixCompress(int nOld
, const u8
*pOld
, const u8
*pNew
){
3712 for(i
=0; i
<nOld
; i
++){
3713 if( pOld
[i
]!=pNew
[i
] ) break;
3718 static void fts5WriteDlidxClear(
3720 Fts5SegWriter
*pWriter
,
3721 int bFlush
/* If true, write dlidx to disk */
3724 assert( bFlush
==0 || (pWriter
->nDlidx
>0 && pWriter
->aDlidx
[0].buf
.n
>0) );
3725 for(i
=0; i
<pWriter
->nDlidx
; i
++){
3726 Fts5DlidxWriter
*pDlidx
= &pWriter
->aDlidx
[i
];
3727 if( pDlidx
->buf
.n
==0 ) break;
3729 assert( pDlidx
->pgno
!=0 );
3731 FTS5_DLIDX_ROWID(pWriter
->iSegid
, i
, pDlidx
->pgno
),
3732 pDlidx
->buf
.p
, pDlidx
->buf
.n
3735 sqlite3Fts5BufferZero(&pDlidx
->buf
);
3736 pDlidx
->bPrevValid
= 0;
3741 ** Grow the pWriter->aDlidx[] array to at least nLvl elements in size.
3742 ** Any new array elements are zeroed before returning.
3744 static int fts5WriteDlidxGrow(
3746 Fts5SegWriter
*pWriter
,
3749 if( p
->rc
==SQLITE_OK
&& nLvl
>=pWriter
->nDlidx
){
3750 Fts5DlidxWriter
*aDlidx
= (Fts5DlidxWriter
*)sqlite3_realloc64(
3751 pWriter
->aDlidx
, sizeof(Fts5DlidxWriter
) * nLvl
3754 p
->rc
= SQLITE_NOMEM
;
3756 size_t nByte
= sizeof(Fts5DlidxWriter
) * (nLvl
- pWriter
->nDlidx
);
3757 memset(&aDlidx
[pWriter
->nDlidx
], 0, nByte
);
3758 pWriter
->aDlidx
= aDlidx
;
3759 pWriter
->nDlidx
= nLvl
;
3766 ** If the current doclist-index accumulating in pWriter->aDlidx[] is large
3767 ** enough, flush it to disk and return 1. Otherwise discard it and return
3770 static int fts5WriteFlushDlidx(Fts5Index
*p
, Fts5SegWriter
*pWriter
){
3773 /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written
3774 ** to the database, also write the doclist-index to disk. */
3775 if( pWriter
->aDlidx
[0].buf
.n
>0 && pWriter
->nEmpty
>=FTS5_MIN_DLIDX_SIZE
){
3778 fts5WriteDlidxClear(p
, pWriter
, bFlag
);
3779 pWriter
->nEmpty
= 0;
3784 ** This function is called whenever processing of the doclist for the
3785 ** last term on leaf page (pWriter->iBtPage) is completed.
3787 ** The doclist-index for that term is currently stored in-memory within the
3788 ** Fts5SegWriter.aDlidx[] array. If it is large enough, this function
3789 ** writes it out to disk. Or, if it is too small to bother with, discards
3792 ** Fts5SegWriter.btterm currently contains the first term on page iBtPage.
3794 static void fts5WriteFlushBtree(Fts5Index
*p
, Fts5SegWriter
*pWriter
){
3797 assert( pWriter
->iBtPage
|| pWriter
->nEmpty
==0 );
3798 if( pWriter
->iBtPage
==0 ) return;
3799 bFlag
= fts5WriteFlushDlidx(p
, pWriter
);
3801 if( p
->rc
==SQLITE_OK
){
3802 const char *z
= (pWriter
->btterm
.n
>0?(const char*)pWriter
->btterm
.p
:"");
3803 /* The following was already done in fts5WriteInit(): */
3804 /* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */
3805 sqlite3_bind_blob(p
->pIdxWriter
, 2, z
, pWriter
->btterm
.n
, SQLITE_STATIC
);
3806 sqlite3_bind_int64(p
->pIdxWriter
, 3, bFlag
+ ((i64
)pWriter
->iBtPage
<<1));
3807 sqlite3_step(p
->pIdxWriter
);
3808 p
->rc
= sqlite3_reset(p
->pIdxWriter
);
3809 sqlite3_bind_null(p
->pIdxWriter
, 2);
3811 pWriter
->iBtPage
= 0;
3815 ** This is called once for each leaf page except the first that contains
3816 ** at least one term. Argument (nTerm/pTerm) is the split-key - a term that
3817 ** is larger than all terms written to earlier leaves, and equal to or
3818 ** smaller than the first term on the new leaf.
3820 ** If an error occurs, an error code is left in Fts5Index.rc. If an error
3821 ** has already occurred when this function is called, it is a no-op.
3823 static void fts5WriteBtreeTerm(
3824 Fts5Index
*p
, /* FTS5 backend object */
3825 Fts5SegWriter
*pWriter
, /* Writer object */
3826 int nTerm
, const u8
*pTerm
/* First term on new page */
3828 fts5WriteFlushBtree(p
, pWriter
);
3829 if( p
->rc
==SQLITE_OK
){
3830 fts5BufferSet(&p
->rc
, &pWriter
->btterm
, nTerm
, pTerm
);
3831 pWriter
->iBtPage
= pWriter
->writer
.pgno
;
3836 ** This function is called when flushing a leaf page that contains no
3837 ** terms at all to disk.
3839 static void fts5WriteBtreeNoTerm(
3840 Fts5Index
*p
, /* FTS5 backend object */
3841 Fts5SegWriter
*pWriter
/* Writer object */
3843 /* If there were no rowids on the leaf page either and the doclist-index
3844 ** has already been started, append an 0x00 byte to it. */
3845 if( pWriter
->bFirstRowidInPage
&& pWriter
->aDlidx
[0].buf
.n
>0 ){
3846 Fts5DlidxWriter
*pDlidx
= &pWriter
->aDlidx
[0];
3847 assert( pDlidx
->bPrevValid
);
3848 sqlite3Fts5BufferAppendVarint(&p
->rc
, &pDlidx
->buf
, 0);
3851 /* Increment the "number of sequential leaves without a term" counter. */
3855 static i64
fts5DlidxExtractFirstRowid(Fts5Buffer
*pBuf
){
3859 iOff
= 1 + fts5GetVarint(&pBuf
->p
[1], (u64
*)&iRowid
);
3860 fts5GetVarint(&pBuf
->p
[iOff
], (u64
*)&iRowid
);
3865 ** Rowid iRowid has just been appended to the current leaf page. It is the
3866 ** first on the page. This function appends an appropriate entry to the current
3869 static void fts5WriteDlidxAppend(
3871 Fts5SegWriter
*pWriter
,
3877 for(i
=0; p
->rc
==SQLITE_OK
&& bDone
==0; i
++){
3879 Fts5DlidxWriter
*pDlidx
= &pWriter
->aDlidx
[i
];
3881 if( pDlidx
->buf
.n
>=p
->pConfig
->pgsz
){
3882 /* The current doclist-index page is full. Write it to disk and push
3883 ** a copy of iRowid (which will become the first rowid on the next
3884 ** doclist-index leaf page) up into the next level of the b-tree
3885 ** hierarchy. If the node being flushed is currently the root node,
3886 ** also push its first rowid upwards. */
3887 pDlidx
->buf
.p
[0] = 0x01; /* Not the root node */
3889 FTS5_DLIDX_ROWID(pWriter
->iSegid
, i
, pDlidx
->pgno
),
3890 pDlidx
->buf
.p
, pDlidx
->buf
.n
3892 fts5WriteDlidxGrow(p
, pWriter
, i
+2);
3893 pDlidx
= &pWriter
->aDlidx
[i
];
3894 if( p
->rc
==SQLITE_OK
&& pDlidx
[1].buf
.n
==0 ){
3895 i64 iFirst
= fts5DlidxExtractFirstRowid(&pDlidx
->buf
);
3897 /* This was the root node. Push its first rowid up to the new root. */
3898 pDlidx
[1].pgno
= pDlidx
->pgno
;
3899 sqlite3Fts5BufferAppendVarint(&p
->rc
, &pDlidx
[1].buf
, 0);
3900 sqlite3Fts5BufferAppendVarint(&p
->rc
, &pDlidx
[1].buf
, pDlidx
->pgno
);
3901 sqlite3Fts5BufferAppendVarint(&p
->rc
, &pDlidx
[1].buf
, iFirst
);
3902 pDlidx
[1].bPrevValid
= 1;
3903 pDlidx
[1].iPrev
= iFirst
;
3906 sqlite3Fts5BufferZero(&pDlidx
->buf
);
3907 pDlidx
->bPrevValid
= 0;
3913 if( pDlidx
->bPrevValid
){
3914 iVal
= iRowid
- pDlidx
->iPrev
;
3916 i64 iPgno
= (i
==0 ? pWriter
->writer
.pgno
: pDlidx
[-1].pgno
);
3917 assert( pDlidx
->buf
.n
==0 );
3918 sqlite3Fts5BufferAppendVarint(&p
->rc
, &pDlidx
->buf
, !bDone
);
3919 sqlite3Fts5BufferAppendVarint(&p
->rc
, &pDlidx
->buf
, iPgno
);
3923 sqlite3Fts5BufferAppendVarint(&p
->rc
, &pDlidx
->buf
, iVal
);
3924 pDlidx
->bPrevValid
= 1;
3925 pDlidx
->iPrev
= iRowid
;
3929 static void fts5WriteFlushLeaf(Fts5Index
*p
, Fts5SegWriter
*pWriter
){
3930 static const u8 zero
[] = { 0x00, 0x00, 0x00, 0x00 };
3931 Fts5PageWriter
*pPage
= &pWriter
->writer
;
3934 assert( (pPage
->pgidx
.n
==0)==(pWriter
->bFirstTermInPage
) );
3936 /* Set the szLeaf header field. */
3937 assert( 0==fts5GetU16(&pPage
->buf
.p
[2]) );
3938 fts5PutU16(&pPage
->buf
.p
[2], (u16
)pPage
->buf
.n
);
3940 if( pWriter
->bFirstTermInPage
){
3941 /* No term was written to this page. */
3942 assert( pPage
->pgidx
.n
==0 );
3943 fts5WriteBtreeNoTerm(p
, pWriter
);
3945 /* Append the pgidx to the page buffer. Set the szLeaf header field. */
3946 fts5BufferAppendBlob(&p
->rc
, &pPage
->buf
, pPage
->pgidx
.n
, pPage
->pgidx
.p
);
3949 /* Write the page out to disk */
3950 iRowid
= FTS5_SEGMENT_ROWID(pWriter
->iSegid
, pPage
->pgno
);
3951 fts5DataWrite(p
, iRowid
, pPage
->buf
.p
, pPage
->buf
.n
);
3953 /* Initialize the next page. */
3954 fts5BufferZero(&pPage
->buf
);
3955 fts5BufferZero(&pPage
->pgidx
);
3956 fts5BufferAppendBlob(&p
->rc
, &pPage
->buf
, 4, zero
);
3957 pPage
->iPrevPgidx
= 0;
3960 /* Increase the leaves written counter */
3961 pWriter
->nLeafWritten
++;
3963 /* The new leaf holds no terms or rowids */
3964 pWriter
->bFirstTermInPage
= 1;
3965 pWriter
->bFirstRowidInPage
= 1;
3969 ** Append term pTerm/nTerm to the segment being written by the writer passed
3970 ** as the second argument.
3972 ** If an error occurs, set the Fts5Index.rc error code. If an error has
3973 ** already occurred, this function is a no-op.
3975 static void fts5WriteAppendTerm(
3977 Fts5SegWriter
*pWriter
,
3978 int nTerm
, const u8
*pTerm
3980 int nPrefix
; /* Bytes of prefix compression for term */
3981 Fts5PageWriter
*pPage
= &pWriter
->writer
;
3982 Fts5Buffer
*pPgidx
= &pWriter
->writer
.pgidx
;
3983 int nMin
= MIN(pPage
->term
.n
, nTerm
);
3985 assert( p
->rc
==SQLITE_OK
);
3986 assert( pPage
->buf
.n
>=4 );
3987 assert( pPage
->buf
.n
>4 || pWriter
->bFirstTermInPage
);
3989 /* If the current leaf page is full, flush it to disk. */
3990 if( (pPage
->buf
.n
+ pPgidx
->n
+ nTerm
+ 2)>=p
->pConfig
->pgsz
){
3991 if( pPage
->buf
.n
>4 ){
3992 fts5WriteFlushLeaf(p
, pWriter
);
3993 if( p
->rc
!=SQLITE_OK
) return;
3995 fts5BufferGrow(&p
->rc
, &pPage
->buf
, nTerm
+FTS5_DATA_PADDING
);
3998 /* TODO1: Updating pgidx here. */
3999 pPgidx
->n
+= sqlite3Fts5PutVarint(
4000 &pPgidx
->p
[pPgidx
->n
], pPage
->buf
.n
- pPage
->iPrevPgidx
4002 pPage
->iPrevPgidx
= pPage
->buf
.n
;
4004 fts5PutU16(&pPgidx
->p
[pPgidx
->n
], pPage
->buf
.n
);
4008 if( pWriter
->bFirstTermInPage
){
4010 if( pPage
->pgno
!=1 ){
4011 /* This is the first term on a leaf that is not the leftmost leaf in
4012 ** the segment b-tree. In this case it is necessary to add a term to
4013 ** the b-tree hierarchy that is (a) larger than the largest term
4014 ** already written to the segment and (b) smaller than or equal to
4015 ** this term. In other words, a prefix of (pTerm/nTerm) that is one
4016 ** byte longer than the longest prefix (pTerm/nTerm) shares with the
4019 ** Usually, the previous term is available in pPage->term. The exception
4020 ** is if this is the first term written in an incremental-merge step.
4021 ** In this case the previous term is not available, so just write a
4022 ** copy of (pTerm/nTerm) into the parent node. This is slightly
4023 ** inefficient, but still correct. */
4025 if( pPage
->term
.n
){
4026 n
= 1 + fts5PrefixCompress(nMin
, pPage
->term
.p
, pTerm
);
4028 fts5WriteBtreeTerm(p
, pWriter
, n
, pTerm
);
4029 if( p
->rc
!=SQLITE_OK
) return;
4030 pPage
= &pWriter
->writer
;
4033 nPrefix
= fts5PrefixCompress(nMin
, pPage
->term
.p
, pTerm
);
4034 fts5BufferAppendVarint(&p
->rc
, &pPage
->buf
, nPrefix
);
4037 /* Append the number of bytes of new data, then the term data itself
4039 fts5BufferAppendVarint(&p
->rc
, &pPage
->buf
, nTerm
- nPrefix
);
4040 fts5BufferAppendBlob(&p
->rc
, &pPage
->buf
, nTerm
- nPrefix
, &pTerm
[nPrefix
]);
4042 /* Update the Fts5PageWriter.term field. */
4043 fts5BufferSet(&p
->rc
, &pPage
->term
, nTerm
, pTerm
);
4044 pWriter
->bFirstTermInPage
= 0;
4046 pWriter
->bFirstRowidInPage
= 0;
4047 pWriter
->bFirstRowidInDoclist
= 1;
4049 assert( p
->rc
|| (pWriter
->nDlidx
>0 && pWriter
->aDlidx
[0].buf
.n
==0) );
4050 pWriter
->aDlidx
[0].pgno
= pPage
->pgno
;
4054 ** Append a rowid and position-list size field to the writers output.
4056 static void fts5WriteAppendRowid(
4058 Fts5SegWriter
*pWriter
,
4061 if( p
->rc
==SQLITE_OK
){
4062 Fts5PageWriter
*pPage
= &pWriter
->writer
;
4064 if( (pPage
->buf
.n
+ pPage
->pgidx
.n
)>=p
->pConfig
->pgsz
){
4065 fts5WriteFlushLeaf(p
, pWriter
);
4068 /* If this is to be the first rowid written to the page, set the
4069 ** rowid-pointer in the page-header. Also append a value to the dlidx
4070 ** buffer, in case a doclist-index is required. */
4071 if( pWriter
->bFirstRowidInPage
){
4072 fts5PutU16(pPage
->buf
.p
, (u16
)pPage
->buf
.n
);
4073 fts5WriteDlidxAppend(p
, pWriter
, iRowid
);
4076 /* Write the rowid. */
4077 if( pWriter
->bFirstRowidInDoclist
|| pWriter
->bFirstRowidInPage
){
4078 fts5BufferAppendVarint(&p
->rc
, &pPage
->buf
, iRowid
);
4080 assert_nc( p
->rc
|| iRowid
>pWriter
->iPrevRowid
);
4081 fts5BufferAppendVarint(&p
->rc
, &pPage
->buf
,
4082 (u64
)iRowid
- (u64
)pWriter
->iPrevRowid
4085 pWriter
->iPrevRowid
= iRowid
;
4086 pWriter
->bFirstRowidInDoclist
= 0;
4087 pWriter
->bFirstRowidInPage
= 0;
4091 static void fts5WriteAppendPoslistData(
4093 Fts5SegWriter
*pWriter
,
4097 Fts5PageWriter
*pPage
= &pWriter
->writer
;
4098 const u8
*a
= aData
;
4101 assert( p
->pConfig
->pgsz
>0 );
4102 while( p
->rc
==SQLITE_OK
4103 && (pPage
->buf
.n
+ pPage
->pgidx
.n
+ n
)>=p
->pConfig
->pgsz
4105 int nReq
= p
->pConfig
->pgsz
- pPage
->buf
.n
- pPage
->pgidx
.n
;
4107 while( nCopy
<nReq
){
4109 nCopy
+= fts5GetVarint(&a
[nCopy
], (u64
*)&dummy
);
4111 fts5BufferAppendBlob(&p
->rc
, &pPage
->buf
, nCopy
, a
);
4114 fts5WriteFlushLeaf(p
, pWriter
);
4117 fts5BufferAppendBlob(&p
->rc
, &pPage
->buf
, n
, a
);
4122 ** Flush any data cached by the writer object to the database. Free any
4123 ** allocations associated with the writer.
4125 static void fts5WriteFinish(
4127 Fts5SegWriter
*pWriter
, /* Writer object */
4128 int *pnLeaf
/* OUT: Number of leaf pages in b-tree */
4131 Fts5PageWriter
*pLeaf
= &pWriter
->writer
;
4132 if( p
->rc
==SQLITE_OK
){
4133 assert( pLeaf
->pgno
>=1 );
4134 if( pLeaf
->buf
.n
>4 ){
4135 fts5WriteFlushLeaf(p
, pWriter
);
4137 *pnLeaf
= pLeaf
->pgno
-1;
4138 if( pLeaf
->pgno
>1 ){
4139 fts5WriteFlushBtree(p
, pWriter
);
4142 fts5BufferFree(&pLeaf
->term
);
4143 fts5BufferFree(&pLeaf
->buf
);
4144 fts5BufferFree(&pLeaf
->pgidx
);
4145 fts5BufferFree(&pWriter
->btterm
);
4147 for(i
=0; i
<pWriter
->nDlidx
; i
++){
4148 sqlite3Fts5BufferFree(&pWriter
->aDlidx
[i
].buf
);
4150 sqlite3_free(pWriter
->aDlidx
);
4153 static void fts5WriteInit(
4155 Fts5SegWriter
*pWriter
,
4158 const int nBuffer
= p
->pConfig
->pgsz
+ FTS5_DATA_PADDING
;
4160 memset(pWriter
, 0, sizeof(Fts5SegWriter
));
4161 pWriter
->iSegid
= iSegid
;
4163 fts5WriteDlidxGrow(p
, pWriter
, 1);
4164 pWriter
->writer
.pgno
= 1;
4165 pWriter
->bFirstTermInPage
= 1;
4166 pWriter
->iBtPage
= 1;
4168 assert( pWriter
->writer
.buf
.n
==0 );
4169 assert( pWriter
->writer
.pgidx
.n
==0 );
4171 /* Grow the two buffers to pgsz + padding bytes in size. */
4172 sqlite3Fts5BufferSize(&p
->rc
, &pWriter
->writer
.pgidx
, nBuffer
);
4173 sqlite3Fts5BufferSize(&p
->rc
, &pWriter
->writer
.buf
, nBuffer
);
4175 if( p
->pIdxWriter
==0 ){
4176 Fts5Config
*pConfig
= p
->pConfig
;
4177 fts5IndexPrepareStmt(p
, &p
->pIdxWriter
, sqlite3_mprintf(
4178 "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)",
4179 pConfig
->zDb
, pConfig
->zName
4183 if( p
->rc
==SQLITE_OK
){
4184 /* Initialize the 4-byte leaf-page header to 0x00. */
4185 memset(pWriter
->writer
.buf
.p
, 0, 4);
4186 pWriter
->writer
.buf
.n
= 4;
4188 /* Bind the current output segment id to the index-writer. This is an
4189 ** optimization over binding the same value over and over as rows are
4190 ** inserted into %_idx by the current writer. */
4191 sqlite3_bind_int(p
->pIdxWriter
, 1, pWriter
->iSegid
);
4196 ** Iterator pIter was used to iterate through the input segments of on an
4197 ** incremental merge operation. This function is called if the incremental
4198 ** merge step has finished but the input has not been completely exhausted.
4200 static void fts5TrimSegments(Fts5Index
*p
, Fts5Iter
*pIter
){
4203 memset(&buf
, 0, sizeof(Fts5Buffer
));
4204 for(i
=0; i
<pIter
->nSeg
&& p
->rc
==SQLITE_OK
; i
++){
4205 Fts5SegIter
*pSeg
= &pIter
->aSeg
[i
];
4206 if( pSeg
->pSeg
==0 ){
4208 }else if( pSeg
->pLeaf
==0 ){
4209 /* All keys from this input segment have been transfered to the output.
4210 ** Set both the first and last page-numbers to 0 to indicate that the
4211 ** segment is now empty. */
4212 pSeg
->pSeg
->pgnoLast
= 0;
4213 pSeg
->pSeg
->pgnoFirst
= 0;
4215 int iOff
= pSeg
->iTermLeafOffset
; /* Offset on new first leaf page */
4218 int iId
= pSeg
->pSeg
->iSegid
;
4219 u8 aHdr
[4] = {0x00, 0x00, 0x00, 0x00};
4221 iLeafRowid
= FTS5_SEGMENT_ROWID(iId
, pSeg
->iTermLeafPgno
);
4222 pData
= fts5LeafRead(p
, iLeafRowid
);
4224 if( iOff
>pData
->szLeaf
){
4225 /* This can occur if the pages that the segments occupy overlap - if
4226 ** a single page has been assigned to more than one segment. In
4227 ** this case a prior iteration of this loop may have corrupted the
4228 ** segment currently being trimmed. */
4229 p
->rc
= FTS5_CORRUPT
;
4231 fts5BufferZero(&buf
);
4232 fts5BufferGrow(&p
->rc
, &buf
, pData
->nn
);
4233 fts5BufferAppendBlob(&p
->rc
, &buf
, sizeof(aHdr
), aHdr
);
4234 fts5BufferAppendVarint(&p
->rc
, &buf
, pSeg
->term
.n
);
4235 fts5BufferAppendBlob(&p
->rc
, &buf
, pSeg
->term
.n
, pSeg
->term
.p
);
4236 fts5BufferAppendBlob(&p
->rc
, &buf
,pData
->szLeaf
-iOff
,&pData
->p
[iOff
]);
4237 if( p
->rc
==SQLITE_OK
){
4238 /* Set the szLeaf field */
4239 fts5PutU16(&buf
.p
[2], (u16
)buf
.n
);
4242 /* Set up the new page-index array */
4243 fts5BufferAppendVarint(&p
->rc
, &buf
, 4);
4244 if( pSeg
->iLeafPgno
==pSeg
->iTermLeafPgno
4245 && pSeg
->iEndofDoclist
<pData
->szLeaf
4246 && pSeg
->iPgidxOff
<=pData
->nn
4248 int nDiff
= pData
->szLeaf
- pSeg
->iEndofDoclist
;
4249 fts5BufferAppendVarint(&p
->rc
, &buf
, buf
.n
- 1 - nDiff
- 4);
4250 fts5BufferAppendBlob(&p
->rc
, &buf
,
4251 pData
->nn
- pSeg
->iPgidxOff
, &pData
->p
[pSeg
->iPgidxOff
]
4255 pSeg
->pSeg
->pgnoFirst
= pSeg
->iTermLeafPgno
;
4256 fts5DataDelete(p
, FTS5_SEGMENT_ROWID(iId
, 1), iLeafRowid
);
4257 fts5DataWrite(p
, iLeafRowid
, buf
.p
, buf
.n
);
4259 fts5DataRelease(pData
);
4263 fts5BufferFree(&buf
);
4266 static void fts5MergeChunkCallback(
4269 const u8
*pChunk
, int nChunk
4271 Fts5SegWriter
*pWriter
= (Fts5SegWriter
*)pCtx
;
4272 fts5WriteAppendPoslistData(p
, pWriter
, pChunk
, nChunk
);
4278 static void fts5IndexMergeLevel(
4279 Fts5Index
*p
, /* FTS5 backend object */
4280 Fts5Structure
**ppStruct
, /* IN/OUT: Stucture of index */
4281 int iLvl
, /* Level to read input from */
4282 int *pnRem
/* Write up to this many output leaves */
4284 Fts5Structure
*pStruct
= *ppStruct
;
4285 Fts5StructureLevel
*pLvl
= &pStruct
->aLevel
[iLvl
];
4286 Fts5StructureLevel
*pLvlOut
;
4287 Fts5Iter
*pIter
= 0; /* Iterator to read input data */
4288 int nRem
= pnRem
? *pnRem
: 0; /* Output leaf pages left to write */
4289 int nInput
; /* Number of input segments */
4290 Fts5SegWriter writer
; /* Writer object */
4291 Fts5StructureSegment
*pSeg
; /* Output segment */
4293 int bOldest
; /* True if the output segment is the oldest */
4294 int eDetail
= p
->pConfig
->eDetail
;
4295 const int flags
= FTS5INDEX_QUERY_NOOUTPUT
;
4296 int bTermWritten
= 0; /* True if current term already output */
4298 assert( iLvl
<pStruct
->nLevel
);
4299 assert( pLvl
->nMerge
<=pLvl
->nSeg
);
4301 memset(&writer
, 0, sizeof(Fts5SegWriter
));
4302 memset(&term
, 0, sizeof(Fts5Buffer
));
4304 pLvlOut
= &pStruct
->aLevel
[iLvl
+1];
4305 assert( pLvlOut
->nSeg
>0 );
4306 nInput
= pLvl
->nMerge
;
4307 pSeg
= &pLvlOut
->aSeg
[pLvlOut
->nSeg
-1];
4309 fts5WriteInit(p
, &writer
, pSeg
->iSegid
);
4310 writer
.writer
.pgno
= pSeg
->pgnoLast
+1;
4313 int iSegid
= fts5AllocateSegid(p
, pStruct
);
4315 /* Extend the Fts5Structure object as required to ensure the output
4316 ** segment exists. */
4317 if( iLvl
==pStruct
->nLevel
-1 ){
4318 fts5StructureAddLevel(&p
->rc
, ppStruct
);
4319 pStruct
= *ppStruct
;
4321 fts5StructureExtendLevel(&p
->rc
, pStruct
, iLvl
+1, 1, 0);
4323 pLvl
= &pStruct
->aLevel
[iLvl
];
4324 pLvlOut
= &pStruct
->aLevel
[iLvl
+1];
4326 fts5WriteInit(p
, &writer
, iSegid
);
4328 /* Add the new segment to the output level */
4329 pSeg
= &pLvlOut
->aSeg
[pLvlOut
->nSeg
];
4331 pSeg
->pgnoFirst
= 1;
4332 pSeg
->iSegid
= iSegid
;
4333 pStruct
->nSegment
++;
4335 /* Read input from all segments in the input level */
4336 nInput
= pLvl
->nSeg
;
4338 bOldest
= (pLvlOut
->nSeg
==1 && pStruct
->nLevel
==iLvl
+2);
4341 for(fts5MultiIterNew(p
, pStruct
, flags
, 0, 0, 0, iLvl
, nInput
, &pIter
);
4342 fts5MultiIterEof(p
, pIter
)==0;
4343 fts5MultiIterNext(p
, pIter
, 0, 0)
4345 Fts5SegIter
*pSegIter
= &pIter
->aSeg
[ pIter
->aFirst
[1].iFirst
];
4346 int nPos
; /* position-list size field value */
4350 pTerm
= fts5MultiIterTerm(pIter
, &nTerm
);
4351 if( nTerm
!=term
.n
|| fts5Memcmp(pTerm
, term
.p
, nTerm
) ){
4352 if( pnRem
&& writer
.nLeafWritten
>nRem
){
4355 fts5BufferSet(&p
->rc
, &term
, nTerm
, pTerm
);
4359 /* Check for key annihilation. */
4360 if( pSegIter
->nPos
==0 && (bOldest
|| pSegIter
->bDel
==0) ) continue;
4362 if( p
->rc
==SQLITE_OK
&& bTermWritten
==0 ){
4363 /* This is a new term. Append a term to the output segment. */
4364 fts5WriteAppendTerm(p
, &writer
, nTerm
, pTerm
);
4368 /* Append the rowid to the output */
4369 /* WRITEPOSLISTSIZE */
4370 fts5WriteAppendRowid(p
, &writer
, fts5MultiIterRowid(pIter
));
4372 if( eDetail
==FTS5_DETAIL_NONE
){
4373 if( pSegIter
->bDel
){
4374 fts5BufferAppendVarint(&p
->rc
, &writer
.writer
.buf
, 0);
4375 if( pSegIter
->nPos
>0 ){
4376 fts5BufferAppendVarint(&p
->rc
, &writer
.writer
.buf
, 0);
4380 /* Append the position-list data to the output */
4381 nPos
= pSegIter
->nPos
*2 + pSegIter
->bDel
;
4382 fts5BufferAppendVarint(&p
->rc
, &writer
.writer
.buf
, nPos
);
4383 fts5ChunkIterate(p
, pSegIter
, (void*)&writer
, fts5MergeChunkCallback
);
4387 /* Flush the last leaf page to disk. Set the output segment b-tree height
4388 ** and last leaf page number at the same time. */
4389 fts5WriteFinish(p
, &writer
, &pSeg
->pgnoLast
);
4391 assert( pIter
!=0 || p
->rc
!=SQLITE_OK
);
4392 if( fts5MultiIterEof(p
, pIter
) ){
4395 /* Remove the redundant segments from the %_data table */
4396 for(i
=0; i
<nInput
; i
++){
4397 fts5DataRemoveSegment(p
, pLvl
->aSeg
[i
].iSegid
);
4400 /* Remove the redundant segments from the input level */
4401 if( pLvl
->nSeg
!=nInput
){
4402 int nMove
= (pLvl
->nSeg
- nInput
) * sizeof(Fts5StructureSegment
);
4403 memmove(pLvl
->aSeg
, &pLvl
->aSeg
[nInput
], nMove
);
4405 pStruct
->nSegment
-= nInput
;
4406 pLvl
->nSeg
-= nInput
;
4408 if( pSeg
->pgnoLast
==0 ){
4410 pStruct
->nSegment
--;
4413 assert( pSeg
->pgnoLast
>0 );
4414 fts5TrimSegments(p
, pIter
);
4415 pLvl
->nMerge
= nInput
;
4418 fts5MultiIterFree(pIter
);
4419 fts5BufferFree(&term
);
4420 if( pnRem
) *pnRem
-= writer
.nLeafWritten
;
4424 ** Do up to nPg pages of automerge work on the index.
4426 ** Return true if any changes were actually made, or false otherwise.
4428 static int fts5IndexMerge(
4429 Fts5Index
*p
, /* FTS5 backend object */
4430 Fts5Structure
**ppStruct
, /* IN/OUT: Current structure of index */
4431 int nPg
, /* Pages of work to do */
4432 int nMin
/* Minimum number of segments to merge */
4436 Fts5Structure
*pStruct
= *ppStruct
;
4437 while( nRem
>0 && p
->rc
==SQLITE_OK
){
4438 int iLvl
; /* To iterate through levels */
4439 int iBestLvl
= 0; /* Level offering the most input segments */
4440 int nBest
= 0; /* Number of input segments on best level */
4442 /* Set iBestLvl to the level to read input segments from. */
4443 assert( pStruct
->nLevel
>0 );
4444 for(iLvl
=0; iLvl
<pStruct
->nLevel
; iLvl
++){
4445 Fts5StructureLevel
*pLvl
= &pStruct
->aLevel
[iLvl
];
4447 if( pLvl
->nMerge
>nBest
){
4449 nBest
= pLvl
->nMerge
;
4453 if( pLvl
->nSeg
>nBest
){
4459 /* If nBest is still 0, then the index must be empty. */
4461 for(iLvl
=0; nBest
==0 && iLvl
<pStruct
->nLevel
; iLvl
++){
4462 assert( pStruct
->aLevel
[iLvl
].nSeg
==0 );
4466 if( nBest
<nMin
&& pStruct
->aLevel
[iBestLvl
].nMerge
==0 ){
4470 fts5IndexMergeLevel(p
, &pStruct
, iBestLvl
, &nRem
);
4471 if( p
->rc
==SQLITE_OK
&& pStruct
->aLevel
[iBestLvl
].nMerge
==0 ){
4472 fts5StructurePromote(p
, iBestLvl
+1, pStruct
);
4475 *ppStruct
= pStruct
;
4480 ** A total of nLeaf leaf pages of data has just been flushed to a level-0
4481 ** segment. This function updates the write-counter accordingly and, if
4482 ** necessary, performs incremental merge work.
4484 ** If an error occurs, set the Fts5Index.rc error code. If an error has
4485 ** already occurred, this function is a no-op.
4487 static void fts5IndexAutomerge(
4488 Fts5Index
*p
, /* FTS5 backend object */
4489 Fts5Structure
**ppStruct
, /* IN/OUT: Current structure of index */
4490 int nLeaf
/* Number of output leaves just written */
4492 if( p
->rc
==SQLITE_OK
&& p
->pConfig
->nAutomerge
>0 && ALWAYS((*ppStruct
)!=0) ){
4493 Fts5Structure
*pStruct
= *ppStruct
;
4494 u64 nWrite
; /* Initial value of write-counter */
4495 int nWork
; /* Number of work-quanta to perform */
4496 int nRem
; /* Number of leaf pages left to write */
4498 /* Update the write-counter. While doing so, set nWork. */
4499 nWrite
= pStruct
->nWriteCounter
;
4500 nWork
= (int)(((nWrite
+ nLeaf
) / p
->nWorkUnit
) - (nWrite
/ p
->nWorkUnit
));
4501 pStruct
->nWriteCounter
+= nLeaf
;
4502 nRem
= (int)(p
->nWorkUnit
* nWork
* pStruct
->nLevel
);
4504 fts5IndexMerge(p
, ppStruct
, nRem
, p
->pConfig
->nAutomerge
);
4508 static void fts5IndexCrisismerge(
4509 Fts5Index
*p
, /* FTS5 backend object */
4510 Fts5Structure
**ppStruct
/* IN/OUT: Current structure of index */
4512 const int nCrisis
= p
->pConfig
->nCrisisMerge
;
4513 Fts5Structure
*pStruct
= *ppStruct
;
4514 if( pStruct
&& pStruct
->nLevel
>0 ){
4516 while( p
->rc
==SQLITE_OK
&& pStruct
->aLevel
[iLvl
].nSeg
>=nCrisis
){
4517 fts5IndexMergeLevel(p
, &pStruct
, iLvl
, 0);
4518 assert( p
->rc
!=SQLITE_OK
|| pStruct
->nLevel
>(iLvl
+1) );
4519 fts5StructurePromote(p
, iLvl
+1, pStruct
);
4522 *ppStruct
= pStruct
;
4526 static int fts5IndexReturn(Fts5Index
*p
){
4532 typedef struct Fts5FlushCtx Fts5FlushCtx
;
4533 struct Fts5FlushCtx
{
4535 Fts5SegWriter writer
;
4539 ** Buffer aBuf[] contains a list of varints, all small enough to fit
4540 ** in a 32-bit integer. Return the size of the largest prefix of this
4541 ** list nMax bytes or less in size.
4543 static int fts5PoslistPrefix(const u8
*aBuf
, int nMax
){
4546 ret
= fts5GetVarint32(aBuf
, dummy
);
4549 int i
= fts5GetVarint32(&aBuf
[ret
], dummy
);
4550 if( (ret
+ i
) > nMax
) break;
4558 ** Execute the SQL statement:
4560 ** DELETE FROM %_idx WHERE (segid, (pgno/2)) = ($iSegid, $iPgno);
4562 ** This is used when a secure-delete operation removes the last term
4563 ** from a segment leaf page. In that case the %_idx entry is removed
4564 ** too. This is done to ensure that if all instances of a token are
4565 ** removed from an fts5 database in secure-delete mode, no trace of
4566 ** the token itself remains in the database.
4568 static void fts5SecureDeleteIdxEntry(
4569 Fts5Index
*p
, /* FTS5 backend object */
4570 int iSegid
, /* Id of segment to delete entry for */
4571 int iPgno
/* Page number within segment */
4574 assert( p
->pConfig
->iVersion
==FTS5_CURRENT_VERSION_SECUREDELETE
);
4575 if( p
->pDeleteFromIdx
==0 ){
4576 fts5IndexPrepareStmt(p
, &p
->pDeleteFromIdx
, sqlite3_mprintf(
4577 "DELETE FROM '%q'.'%q_idx' WHERE (segid, (pgno/2)) = (?1, ?2)",
4578 p
->pConfig
->zDb
, p
->pConfig
->zName
4581 if( p
->rc
==SQLITE_OK
){
4582 sqlite3_bind_int(p
->pDeleteFromIdx
, 1, iSegid
);
4583 sqlite3_bind_int(p
->pDeleteFromIdx
, 2, iPgno
);
4584 sqlite3_step(p
->pDeleteFromIdx
);
4585 p
->rc
= sqlite3_reset(p
->pDeleteFromIdx
);
4591 ** This is called when a secure-delete operation removes a position-list
4592 ** that overflows onto segment page iPgno of segment pSeg. This function
4593 ** rewrites node iPgno, and possibly one or more of its right-hand peers,
4594 ** to remove this portion of the position list.
4596 ** Output variable (*pbLastInDoclist) is set to true if the position-list
4597 ** removed is followed by a new term or the end-of-segment, or false if
4598 ** it is followed by another rowid/position list.
4600 static void fts5SecureDeleteOverflow(
4602 Fts5StructureSegment
*pSeg
,
4604 int *pbLastInDoclist
4606 const int bDetailNone
= (p
->pConfig
->eDetail
==FTS5_DETAIL_NONE
);
4608 Fts5Data
*pLeaf
= 0;
4611 *pbLastInDoclist
= 1;
4612 for(pgno
=iPgno
; p
->rc
==SQLITE_OK
&& pgno
<=pSeg
->pgnoLast
; pgno
++){
4613 i64 iRowid
= FTS5_SEGMENT_ROWID(pSeg
->iSegid
, pgno
);
4617 pLeaf
= fts5DataRead(p
, iRowid
);
4618 if( pLeaf
==0 ) break;
4621 iNext
= fts5GetU16(&aPg
[0]);
4623 *pbLastInDoclist
= 0;
4625 if( iNext
==0 && pLeaf
->szLeaf
!=pLeaf
->nn
){
4626 fts5GetVarint32(&aPg
[pLeaf
->szLeaf
], iNext
);
4630 /* The page contains no terms or rowids. Replace it with an empty
4631 ** page and move on to the right-hand peer. */
4632 const u8 aEmpty
[] = {0x00, 0x00, 0x00, 0x04};
4633 assert_nc( bDetailNone
==0 || pLeaf
->nn
==4 );
4634 if( bDetailNone
==0 ) fts5DataWrite(p
, iRowid
, aEmpty
, sizeof(aEmpty
));
4635 fts5DataRelease(pLeaf
);
4637 }else if( bDetailNone
){
4639 }else if( iNext
>=pLeaf
->szLeaf
|| iNext
<4 ){
4640 p
->rc
= FTS5_CORRUPT
;
4643 int nShift
= iNext
- 4;
4649 /* Unless the current page footer is 0 bytes in size (in which case
4650 ** the new page footer will be as well), allocate and populate a
4651 ** buffer containing the new page footer. Set stack variables aIdx
4652 ** and nIdx accordingly. */
4653 if( pLeaf
->nn
>pLeaf
->szLeaf
){
4655 int i1
= pLeaf
->szLeaf
;
4658 aIdx
= sqlite3Fts5MallocZero(&p
->rc
, (pLeaf
->nn
-pLeaf
->szLeaf
)+2);
4659 if( aIdx
==0 ) break;
4660 i1
+= fts5GetVarint32(&aPg
[i1
], iFirst
);
4661 i2
= sqlite3Fts5PutVarint(aIdx
, iFirst
-nShift
);
4663 memcpy(&aIdx
[i2
], &aPg
[i1
], pLeaf
->nn
-i1
);
4664 i2
+= (pLeaf
->nn
-i1
);
4669 /* Modify the contents of buffer aPg[]. Set nPg to the new size
4670 ** in bytes. The new page is always smaller than the old. */
4671 nPg
= pLeaf
->szLeaf
- nShift
;
4672 memmove(&aPg
[4], &aPg
[4+nShift
], nPg
-4);
4673 fts5PutU16(&aPg
[2], nPg
);
4674 if( fts5GetU16(&aPg
[0]) ) fts5PutU16(&aPg
[0], 4);
4676 memcpy(&aPg
[nPg
], aIdx
, nIdx
);
4681 /* Write the new page to disk and exit the loop */
4682 assert( nPg
>4 || fts5GetU16(aPg
)==0 );
4683 fts5DataWrite(p
, iRowid
, aPg
, nPg
);
4687 fts5DataRelease(pLeaf
);
4691 ** Completely remove the entry that pSeg currently points to from
4694 static void fts5DoSecureDelete(
4698 const int bDetailNone
= (p
->pConfig
->eDetail
==FTS5_DETAIL_NONE
);
4699 int iSegid
= pSeg
->pSeg
->iSegid
;
4700 u8
*aPg
= pSeg
->pLeaf
->p
;
4701 int nPg
= pSeg
->pLeaf
->nn
;
4702 int iPgIdx
= pSeg
->pLeaf
->szLeaf
;
4710 int bLastInDoclist
= 0;
4714 int iPrevKeyOff
= 0;
4715 int iDelKeyOff
= 0; /* Offset of deleted key, if any */
4718 aIdx
= sqlite3Fts5MallocZero(&p
->rc
, nIdx
+16);
4720 memcpy(aIdx
, &aPg
[iPgIdx
], nIdx
);
4722 /* At this point segment iterator pSeg points to the entry
4723 ** this function should remove from the b-tree segment.
4725 ** In detail=full or detail=column mode, pSeg->iLeafOffset is the
4726 ** offset of the first byte in the position-list for the entry to
4727 ** remove. Immediately before this comes two varints that will also
4728 ** need to be removed:
4730 ** + the rowid or delta rowid value for the entry, and
4731 ** + the size of the position list in bytes.
4733 ** Or, in detail=none mode, there is a single varint prior to
4734 ** pSeg->iLeafOffset - the rowid or delta rowid value.
4736 ** This block sets the following variables:
4743 if( pSeg
->iLeafPgno
==pSeg
->iTermLeafPgno
){
4744 iStart
= pSeg
->iTermLeafOffset
;
4746 iStart
= fts5GetU16(&aPg
[0]);
4749 iSOP
= iStart
+ fts5GetVarint(&aPg
[iStart
], &iDelta
);
4750 assert_nc( iSOP
<=pSeg
->iLeafOffset
);
4753 while( iSOP
<pSeg
->iLeafOffset
){
4754 if( aPg
[iSOP
]==0x00 ) iSOP
++;
4755 if( aPg
[iSOP
]==0x00 ) iSOP
++;
4757 iSOP
= iStart
+ fts5GetVarint(&aPg
[iStart
], &iDelta
);
4761 if( iNextOff
<pSeg
->iEndofDoclist
&& aPg
[iNextOff
]==0x00 ) iNextOff
++;
4762 if( iNextOff
<pSeg
->iEndofDoclist
&& aPg
[iNextOff
]==0x00 ) iNextOff
++;
4766 iSOP
+= fts5GetVarint32(&aPg
[iSOP
], nPos
);
4767 while( iSOP
<pSeg
->iLeafOffset
){
4768 iStart
= iSOP
+ (nPos
/2);
4769 iSOP
= iStart
+ fts5GetVarint(&aPg
[iStart
], &iDelta
);
4770 iSOP
+= fts5GetVarint32(&aPg
[iSOP
], nPos
);
4772 assert_nc( iSOP
==pSeg
->iLeafOffset
);
4773 iNextOff
= pSeg
->iLeafOffset
+ pSeg
->nPos
;
4778 if( iNextOff
>=iPgIdx
){
4779 int pgno
= pSeg
->iLeafPgno
+1;
4780 fts5SecureDeleteOverflow(p
, pSeg
->pSeg
, pgno
, &bLastInDoclist
);
4783 /* Set bLastInDoclist to true if the entry being removed is the last
4784 ** in its doclist. */
4785 for(iIdx
=0, iKeyOff
=0; iIdx
<nIdx
; /* no-op */){
4787 iIdx
+= fts5GetVarint32(&aIdx
[iIdx
], iVal
);
4789 if( iKeyOff
==iNextOff
){
4795 if( fts5GetU16(&aPg
[0])==iStart
&& (bLastInDoclist
||iNextOff
==iPgIdx
) ){
4796 fts5PutU16(&aPg
[0], 0);
4799 if( bLastInDoclist
==0 ){
4800 if( iNextOff
!=iPgIdx
){
4801 iNextOff
+= fts5GetVarint(&aPg
[iNextOff
], &iNextDelta
);
4802 iOff
+= sqlite3Fts5PutVarint(&aPg
[iOff
], iDelta
+ iNextDelta
);
4805 iStart
==pSeg
->iTermLeafOffset
&& pSeg
->iLeafPgno
==pSeg
->iTermLeafPgno
4807 /* The entry being removed was the only position list in its
4808 ** doclist. Therefore the term needs to be removed as well. */
4810 for(iIdx
=0, iKeyOff
=0; iIdx
<nIdx
; iKey
++){
4812 iIdx
+= fts5GetVarint32(&aIdx
[iIdx
], iVal
);
4813 if( (iKeyOff
+iVal
)>(u32
)iStart
) break;
4817 iDelKeyOff
= iOff
= iKeyOff
;
4818 if( iNextOff
!=iPgIdx
){
4824 iDelKeyOff
= iNextOff
;
4825 iNextOff
+= fts5GetVarint32(&aPg
[iNextOff
], nPrefix2
);
4826 iNextOff
+= fts5GetVarint32(&aPg
[iNextOff
], nSuffix2
);
4829 iKeyOff
+= fts5GetVarint32(&aPg
[iKeyOff
], nPrefix
);
4831 iKeyOff
+= fts5GetVarint32(&aPg
[iKeyOff
], nSuffix
);
4833 nPrefix
= MIN(nPrefix
, nPrefix2
);
4834 nSuffix
= (nPrefix2
+ nSuffix2
) - nPrefix
;
4836 if( (iKeyOff
+nSuffix
)>iPgIdx
|| (iNextOff
+nSuffix2
)>iPgIdx
){
4837 p
->rc
= FTS5_CORRUPT
;
4840 iOff
+= sqlite3Fts5PutVarint(&aPg
[iOff
], nPrefix
);
4842 iOff
+= sqlite3Fts5PutVarint(&aPg
[iOff
], nSuffix
);
4843 if( nPrefix2
>nPrefix
){
4844 memcpy(&aPg
[iOff
], &pSeg
->term
.p
[nPrefix
], nPrefix2
-nPrefix
);
4845 iOff
+= (nPrefix2
-nPrefix
);
4847 memmove(&aPg
[iOff
], &aPg
[iNextOff
], nSuffix2
);
4849 iNextOff
+= nSuffix2
;
4852 }else if( iStart
==4 ){
4855 assert_nc( pSeg
->iLeafPgno
>pSeg
->iTermLeafPgno
);
4856 /* The entry being removed may be the only position list in
4858 for(iPgno
=pSeg
->iLeafPgno
-1; iPgno
>pSeg
->iTermLeafPgno
; iPgno
-- ){
4859 Fts5Data
*pPg
= fts5DataRead(p
, FTS5_SEGMENT_ROWID(iSegid
, iPgno
));
4860 int bEmpty
= (pPg
&& pPg
->nn
==4);
4861 fts5DataRelease(pPg
);
4862 if( bEmpty
==0 ) break;
4865 if( iPgno
==pSeg
->iTermLeafPgno
){
4866 i64 iId
= FTS5_SEGMENT_ROWID(iSegid
, pSeg
->iTermLeafPgno
);
4867 Fts5Data
*pTerm
= fts5DataRead(p
, iId
);
4868 if( pTerm
&& pTerm
->szLeaf
==pSeg
->iTermLeafOffset
){
4869 u8
*aTermIdx
= &pTerm
->p
[pTerm
->szLeaf
];
4870 int nTermIdx
= pTerm
->nn
- pTerm
->szLeaf
;
4876 int nByte
= fts5GetVarint32(&aTermIdx
[iTermIdx
], iVal
);
4878 if( (iTermIdx
+nByte
)>=nTermIdx
) break;
4881 nTermIdx
= iTermIdx
;
4883 memmove(&pTerm
->p
[iTermOff
], &pTerm
->p
[pTerm
->szLeaf
], nTermIdx
);
4884 fts5PutU16(&pTerm
->p
[2], iTermOff
);
4886 fts5DataWrite(p
, iId
, pTerm
->p
, iTermOff
+nTermIdx
);
4888 fts5SecureDeleteIdxEntry(p
, iSegid
, pSeg
->iTermLeafPgno
);
4891 fts5DataRelease(pTerm
);
4895 if( p
->rc
==SQLITE_OK
){
4896 const int nMove
= nPg
- iNextOff
;
4899 memmove(&aPg
[iOff
], &aPg
[iNextOff
], nMove
);
4900 iPgIdx
-= (iNextOff
- iOff
);
4902 fts5PutU16(&aPg
[2], iPgIdx
);
4904 nShift
= iNextOff
- iOff
;
4905 for(iIdx
=0, iKeyOff
=0, iPrevKeyOff
=0; iIdx
<nIdx
; /* no-op */){
4907 iIdx
+= fts5GetVarint32(&aIdx
[iIdx
], iVal
);
4909 if( iKeyOff
!=iDelKeyOff
){
4914 nPg
+= sqlite3Fts5PutVarint(&aPg
[nPg
], iKeyOff
- iPrevKeyOff
);
4915 iPrevKeyOff
= iKeyOff
;
4919 if( iPgIdx
==nPg
&& nIdx
>0 && pSeg
->iLeafPgno
!=1 ){
4920 fts5SecureDeleteIdxEntry(p
, iSegid
, pSeg
->iLeafPgno
);
4923 assert_nc( nPg
>4 || fts5GetU16(aPg
)==0 );
4924 fts5DataWrite(p
, FTS5_SEGMENT_ROWID(iSegid
,pSeg
->iLeafPgno
), aPg
,nPg
);
4930 ** This is called as part of flushing a delete to disk in 'secure-delete'
4931 ** mode. It edits the segments within the database described by argument
4932 ** pStruct to remove the entries for term zTerm, rowid iRowid.
4934 static void fts5FlushSecureDelete(
4936 Fts5Structure
*pStruct
,
4940 const int f
= FTS5INDEX_QUERY_SKIPHASH
;
4941 int nTerm
= (int)strlen(zTerm
);
4942 Fts5Iter
*pIter
= 0; /* Used to find term instance */
4944 fts5MultiIterNew(p
, pStruct
, f
, 0, (const u8
*)zTerm
, nTerm
, -1, 0, &pIter
);
4945 if( fts5MultiIterEof(p
, pIter
)==0 ){
4946 i64 iThis
= fts5MultiIterRowid(pIter
);
4948 fts5MultiIterNextFrom(p
, pIter
, iRowid
);
4951 if( p
->rc
==SQLITE_OK
4952 && fts5MultiIterEof(p
, pIter
)==0
4953 && iRowid
==fts5MultiIterRowid(pIter
)
4955 Fts5SegIter
*pSeg
= &pIter
->aSeg
[pIter
->aFirst
[1].iFirst
];
4956 fts5DoSecureDelete(p
, pSeg
);
4960 fts5MultiIterFree(pIter
);
4965 ** Flush the contents of in-memory hash table iHash to a new level-0
4966 ** segment on disk. Also update the corresponding structure record.
4968 ** If an error occurs, set the Fts5Index.rc error code. If an error has
4969 ** already occurred, this function is a no-op.
4971 static void fts5FlushOneHash(Fts5Index
*p
){
4972 Fts5Hash
*pHash
= p
->pHash
;
4973 Fts5Structure
*pStruct
;
4975 int pgnoLast
= 0; /* Last leaf page number in segment */
4977 /* Obtain a reference to the index structure and allocate a new segment-id
4978 ** for the new level-0 segment. */
4979 pStruct
= fts5StructureRead(p
);
4980 iSegid
= fts5AllocateSegid(p
, pStruct
);
4981 fts5StructureInvalidate(p
);
4984 const int pgsz
= p
->pConfig
->pgsz
;
4985 int eDetail
= p
->pConfig
->eDetail
;
4986 int bSecureDelete
= p
->pConfig
->bSecureDelete
;
4987 Fts5StructureSegment
*pSeg
; /* New segment within pStruct */
4988 Fts5Buffer
*pBuf
; /* Buffer in which to assemble leaf page */
4989 Fts5Buffer
*pPgidx
; /* Buffer in which to assemble pgidx */
4991 Fts5SegWriter writer
;
4992 fts5WriteInit(p
, &writer
, iSegid
);
4994 pBuf
= &writer
.writer
.buf
;
4995 pPgidx
= &writer
.writer
.pgidx
;
4997 /* fts5WriteInit() should have initialized the buffers to (most likely)
4998 ** the maximum space required. */
4999 assert( p
->rc
|| pBuf
->nSpace
>=(pgsz
+ FTS5_DATA_PADDING
) );
5000 assert( p
->rc
|| pPgidx
->nSpace
>=(pgsz
+ FTS5_DATA_PADDING
) );
5002 /* Begin scanning through hash table entries. This loop runs once for each
5003 ** term/doclist currently stored within the hash table. */
5004 if( p
->rc
==SQLITE_OK
){
5005 p
->rc
= sqlite3Fts5HashScanInit(pHash
, 0, 0);
5007 while( p
->rc
==SQLITE_OK
&& 0==sqlite3Fts5HashScanEof(pHash
) ){
5008 const char *zTerm
; /* Buffer containing term */
5009 int nTerm
; /* Size of zTerm in bytes */
5010 const u8
*pDoclist
; /* Pointer to doclist for this term */
5011 int nDoclist
; /* Size of doclist in bytes */
5013 /* Get the term and doclist for this entry. */
5014 sqlite3Fts5HashScanEntry(pHash
, &zTerm
, &pDoclist
, &nDoclist
);
5015 nTerm
= (int)strlen(zTerm
);
5016 if( bSecureDelete
==0 ){
5017 fts5WriteAppendTerm(p
, &writer
, nTerm
, (const u8
*)zTerm
);
5018 if( p
->rc
!=SQLITE_OK
) break;
5019 assert( writer
.bFirstRowidInPage
==0 );
5022 if( !bSecureDelete
&& pgsz
>=(pBuf
->n
+ pPgidx
->n
+ nDoclist
+ 1) ){
5023 /* The entire doclist will fit on the current leaf. */
5024 fts5BufferSafeAppendBlob(pBuf
, pDoclist
, nDoclist
);
5026 int bTermWritten
= !bSecureDelete
;
5031 /* The entire doclist will not fit on this leaf. The following
5032 ** loop iterates through the poslists that make up the current
5034 while( p
->rc
==SQLITE_OK
&& iOff
<nDoclist
){
5036 iOff
+= fts5GetVarint(&pDoclist
[iOff
], &iDelta
);
5039 /* If in secure delete mode, and if this entry in the poslist is
5040 ** in fact a delete, then edit the existing segments directly
5041 ** using fts5FlushSecureDelete(). */
5042 if( bSecureDelete
){
5043 if( eDetail
==FTS5_DETAIL_NONE
){
5044 if( iOff
<nDoclist
&& pDoclist
[iOff
]==0x00 ){
5045 fts5FlushSecureDelete(p
, pStruct
, zTerm
, iRowid
);
5047 if( iOff
<nDoclist
&& pDoclist
[iOff
]==0x00 ){
5054 }else if( (pDoclist
[iOff
] & 0x01) ){
5055 fts5FlushSecureDelete(p
, pStruct
, zTerm
, iRowid
);
5056 if( p
->rc
!=SQLITE_OK
|| pDoclist
[iOff
]==0x01 ){
5063 if( p
->rc
==SQLITE_OK
&& bTermWritten
==0 ){
5064 fts5WriteAppendTerm(p
, &writer
, nTerm
, (const u8
*)zTerm
);
5066 assert( p
->rc
!=SQLITE_OK
|| writer
.bFirstRowidInPage
==0 );
5069 if( writer
.bFirstRowidInPage
){
5070 fts5PutU16(&pBuf
->p
[0], (u16
)pBuf
->n
); /* first rowid on page */
5071 pBuf
->n
+= sqlite3Fts5PutVarint(&pBuf
->p
[pBuf
->n
], iRowid
);
5072 writer
.bFirstRowidInPage
= 0;
5073 fts5WriteDlidxAppend(p
, &writer
, iRowid
);
5075 pBuf
->n
+= sqlite3Fts5PutVarint(&pBuf
->p
[pBuf
->n
], iRowid
-iPrev
);
5077 if( p
->rc
!=SQLITE_OK
) break;
5078 assert( pBuf
->n
<=pBuf
->nSpace
);
5081 if( eDetail
==FTS5_DETAIL_NONE
){
5082 if( iOff
<nDoclist
&& pDoclist
[iOff
]==0 ){
5083 pBuf
->p
[pBuf
->n
++] = 0;
5085 if( iOff
<nDoclist
&& pDoclist
[iOff
]==0 ){
5086 pBuf
->p
[pBuf
->n
++] = 0;
5090 if( (pBuf
->n
+ pPgidx
->n
)>=pgsz
){
5091 fts5WriteFlushLeaf(p
, &writer
);
5096 int nCopy
= fts5GetPoslistSize(&pDoclist
[iOff
], &nPos
, &bDummy
);
5098 if( (pBuf
->n
+ pPgidx
->n
+ nCopy
) <= pgsz
){
5099 /* The entire poslist will fit on the current leaf. So copy
5101 fts5BufferSafeAppendBlob(pBuf
, &pDoclist
[iOff
], nCopy
);
5103 /* The entire poslist will not fit on this leaf. So it needs
5104 ** to be broken into sections. The only qualification being
5105 ** that each varint must be stored contiguously. */
5106 const u8
*pPoslist
= &pDoclist
[iOff
];
5108 while( p
->rc
==SQLITE_OK
){
5109 int nSpace
= pgsz
- pBuf
->n
- pPgidx
->n
;
5111 if( (nCopy
- iPos
)<=nSpace
){
5114 n
= fts5PoslistPrefix(&pPoslist
[iPos
], nSpace
);
5117 fts5BufferSafeAppendBlob(pBuf
, &pPoslist
[iPos
], n
);
5119 if( (pBuf
->n
+ pPgidx
->n
)>=pgsz
){
5120 fts5WriteFlushLeaf(p
, &writer
);
5122 if( iPos
>=nCopy
) break;
5130 /* TODO2: Doclist terminator written here. */
5131 /* pBuf->p[pBuf->n++] = '\0'; */
5132 assert( pBuf
->n
<=pBuf
->nSpace
);
5133 if( p
->rc
==SQLITE_OK
) sqlite3Fts5HashScanNext(pHash
);
5135 sqlite3Fts5HashClear(pHash
);
5136 fts5WriteFinish(p
, &writer
, &pgnoLast
);
5138 assert( p
->rc
!=SQLITE_OK
|| bSecureDelete
|| pgnoLast
>0 );
5140 /* Update the Fts5Structure. It is written back to the database by the
5141 ** fts5StructureRelease() call below. */
5142 if( pStruct
->nLevel
==0 ){
5143 fts5StructureAddLevel(&p
->rc
, &pStruct
);
5145 fts5StructureExtendLevel(&p
->rc
, pStruct
, 0, 1, 0);
5146 if( p
->rc
==SQLITE_OK
){
5147 pSeg
= &pStruct
->aLevel
[0].aSeg
[ pStruct
->aLevel
[0].nSeg
++ ];
5148 pSeg
->iSegid
= iSegid
;
5149 pSeg
->pgnoFirst
= 1;
5150 pSeg
->pgnoLast
= pgnoLast
;
5151 pStruct
->nSegment
++;
5153 fts5StructurePromote(p
, 0, pStruct
);
5157 fts5IndexAutomerge(p
, &pStruct
, pgnoLast
);
5158 fts5IndexCrisismerge(p
, &pStruct
);
5159 fts5StructureWrite(p
, pStruct
);
5160 fts5StructureRelease(pStruct
);
5164 ** Flush any data stored in the in-memory hash tables to the database.
5166 static void fts5IndexFlush(Fts5Index
*p
){
5167 /* Unless it is empty, flush the hash table to disk */
5168 if( p
->nPendingData
){
5170 p
->nPendingData
= 0;
5171 fts5FlushOneHash(p
);
5175 static Fts5Structure
*fts5IndexOptimizeStruct(
5177 Fts5Structure
*pStruct
5179 Fts5Structure
*pNew
= 0;
5180 sqlite3_int64 nByte
= sizeof(Fts5Structure
);
5181 int nSeg
= pStruct
->nSegment
;
5184 /* Figure out if this structure requires optimization. A structure does
5185 ** not require optimization if either:
5187 ** + it consists of fewer than two segments, or
5188 ** + all segments are on the same level, or
5189 ** + all segments except one are currently inputs to a merge operation.
5191 ** In the first case, return NULL. In the second, increment the ref-count
5192 ** on *pStruct and return a copy of the pointer to it.
5194 if( nSeg
<2 ) return 0;
5195 for(i
=0; i
<pStruct
->nLevel
; i
++){
5196 int nThis
= pStruct
->aLevel
[i
].nSeg
;
5197 if( nThis
==nSeg
|| (nThis
==nSeg
-1 && pStruct
->aLevel
[i
].nMerge
==nThis
) ){
5198 fts5StructureRef(pStruct
);
5201 assert( pStruct
->aLevel
[i
].nMerge
<=nThis
);
5204 nByte
+= (pStruct
->nLevel
+1) * sizeof(Fts5StructureLevel
);
5205 pNew
= (Fts5Structure
*)sqlite3Fts5MallocZero(&p
->rc
, nByte
);
5208 Fts5StructureLevel
*pLvl
;
5209 nByte
= nSeg
* sizeof(Fts5StructureSegment
);
5210 pNew
->nLevel
= MIN(pStruct
->nLevel
+1, FTS5_MAX_LEVEL
);
5212 pNew
->nWriteCounter
= pStruct
->nWriteCounter
;
5213 pLvl
= &pNew
->aLevel
[pNew
->nLevel
-1];
5214 pLvl
->aSeg
= (Fts5StructureSegment
*)sqlite3Fts5MallocZero(&p
->rc
, nByte
);
5218 /* Iterate through all segments, from oldest to newest. Add them to
5219 ** the new Fts5Level object so that pLvl->aSeg[0] is the oldest
5220 ** segment in the data structure. */
5221 for(iLvl
=pStruct
->nLevel
-1; iLvl
>=0; iLvl
--){
5222 for(iSeg
=0; iSeg
<pStruct
->aLevel
[iLvl
].nSeg
; iSeg
++){
5223 pLvl
->aSeg
[iSegOut
] = pStruct
->aLevel
[iLvl
].aSeg
[iSeg
];
5227 pNew
->nSegment
= pLvl
->nSeg
= nSeg
;
5237 int sqlite3Fts5IndexOptimize(Fts5Index
*p
){
5238 Fts5Structure
*pStruct
;
5239 Fts5Structure
*pNew
= 0;
5241 assert( p
->rc
==SQLITE_OK
);
5243 pStruct
= fts5StructureRead(p
);
5244 fts5StructureInvalidate(p
);
5247 pNew
= fts5IndexOptimizeStruct(p
, pStruct
);
5249 fts5StructureRelease(pStruct
);
5251 assert( pNew
==0 || pNew
->nSegment
>0 );
5254 for(iLvl
=0; pNew
->aLevel
[iLvl
].nSeg
==0; iLvl
++){}
5255 while( p
->rc
==SQLITE_OK
&& pNew
->aLevel
[iLvl
].nSeg
>0 ){
5256 int nRem
= FTS5_OPT_WORK_UNIT
;
5257 fts5IndexMergeLevel(p
, &pNew
, iLvl
, &nRem
);
5260 fts5StructureWrite(p
, pNew
);
5261 fts5StructureRelease(pNew
);
5264 return fts5IndexReturn(p
);
5268 ** This is called to implement the special "VALUES('merge', $nMerge)"
5271 int sqlite3Fts5IndexMerge(Fts5Index
*p
, int nMerge
){
5272 Fts5Structure
*pStruct
= fts5StructureRead(p
);
5274 int nMin
= p
->pConfig
->nUsermerge
;
5275 fts5StructureInvalidate(p
);
5277 Fts5Structure
*pNew
= fts5IndexOptimizeStruct(p
, pStruct
);
5278 fts5StructureRelease(pStruct
);
5283 if( pStruct
&& pStruct
->nLevel
){
5284 if( fts5IndexMerge(p
, &pStruct
, nMerge
, nMin
) ){
5285 fts5StructureWrite(p
, pStruct
);
5288 fts5StructureRelease(pStruct
);
5290 return fts5IndexReturn(p
);
5293 static void fts5AppendRowid(
5299 UNUSED_PARAM(pUnused
);
5300 fts5BufferAppendVarint(&p
->rc
, pBuf
, iDelta
);
5303 static void fts5AppendPoslist(
5309 int nData
= pMulti
->base
.nData
;
5310 int nByte
= nData
+ 9 + 9 + FTS5_DATA_ZERO_PADDING
;
5312 if( p
->rc
==SQLITE_OK
&& 0==fts5BufferGrow(&p
->rc
, pBuf
, nByte
) ){
5313 fts5BufferSafeAppendVarint(pBuf
, iDelta
);
5314 fts5BufferSafeAppendVarint(pBuf
, nData
*2);
5315 fts5BufferSafeAppendBlob(pBuf
, pMulti
->base
.pData
, nData
);
5316 memset(&pBuf
->p
[pBuf
->n
], 0, FTS5_DATA_ZERO_PADDING
);
5321 static void fts5DoclistIterNext(Fts5DoclistIter
*pIter
){
5322 u8
*p
= pIter
->aPoslist
+ pIter
->nSize
+ pIter
->nPoslist
;
5324 assert( pIter
->aPoslist
|| (p
==0 && pIter
->aPoslist
==0) );
5325 if( p
>=pIter
->aEof
){
5326 pIter
->aPoslist
= 0;
5330 p
+= fts5GetVarint(p
, (u64
*)&iDelta
);
5331 pIter
->iRowid
+= iDelta
;
5333 /* Read position list size */
5336 pIter
->nSize
= fts5GetVarint32(p
, nPos
);
5337 pIter
->nPoslist
= (nPos
>>1);
5339 pIter
->nPoslist
= ((int)(p
[0])) >> 1;
5343 pIter
->aPoslist
= p
;
5344 if( &pIter
->aPoslist
[pIter
->nPoslist
]>pIter
->aEof
){
5345 pIter
->aPoslist
= 0;
5350 static void fts5DoclistIterInit(
5352 Fts5DoclistIter
*pIter
5354 memset(pIter
, 0, sizeof(*pIter
));
5356 pIter
->aPoslist
= pBuf
->p
;
5357 pIter
->aEof
= &pBuf
->p
[pBuf
->n
];
5358 fts5DoclistIterNext(pIter
);
5364 ** Append a doclist to buffer pBuf.
5366 ** This function assumes that space within the buffer has already been
5369 static void fts5MergeAppendDocid(
5370 Fts5Buffer
*pBuf
, /* Buffer to write to */
5371 i64
*piLastRowid
, /* IN/OUT: Previous rowid written (if any) */
5372 i64 iRowid
/* Rowid to append */
5374 assert( pBuf
->n
!=0 || (*piLastRowid
)==0 );
5375 fts5BufferSafeAppendVarint(pBuf
, iRowid
- *piLastRowid
);
5376 *piLastRowid
= iRowid
;
5380 #define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) { \
5381 assert( (pBuf)->n!=0 || (iLastRowid)==0 ); \
5382 fts5BufferSafeAppendVarint((pBuf), (u64)(iRowid) - (u64)(iLastRowid)); \
5383 (iLastRowid) = (iRowid); \
5387 ** Swap the contents of buffer *p1 with that of *p2.
5389 static void fts5BufferSwap(Fts5Buffer
*p1
, Fts5Buffer
*p2
){
5390 Fts5Buffer tmp
= *p1
;
5395 static void fts5NextRowid(Fts5Buffer
*pBuf
, int *piOff
, i64
*piRowid
){
5401 *piOff
= i
+ sqlite3Fts5GetVarint(&pBuf
->p
[i
], &iVal
);
5407 ** This is the equivalent of fts5MergePrefixLists() for detail=none mode.
5408 ** In this case the buffers consist of a delta-encoded list of rowids only.
5410 static void fts5MergeRowidLists(
5411 Fts5Index
*p
, /* FTS5 backend object */
5412 Fts5Buffer
*p1
, /* First list to merge */
5413 int nBuf
, /* Number of entries in apBuf[] */
5414 Fts5Buffer
*aBuf
/* Array of other lists to merge into p1 */
5421 Fts5Buffer
*p2
= &aBuf
[0];
5425 memset(&out
, 0, sizeof(out
));
5427 sqlite3Fts5BufferSize(&p
->rc
, &out
, p1
->n
+ p2
->n
);
5430 fts5NextRowid(p1
, &i1
, &iRowid1
);
5431 fts5NextRowid(p2
, &i2
, &iRowid2
);
5432 while( i1
>=0 || i2
>=0 ){
5433 if( i1
>=0 && (i2
<0 || iRowid1
<iRowid2
) ){
5434 assert( iOut
==0 || iRowid1
>iOut
);
5435 fts5BufferSafeAppendVarint(&out
, iRowid1
- iOut
);
5437 fts5NextRowid(p1
, &i1
, &iRowid1
);
5439 assert( iOut
==0 || iRowid2
>iOut
);
5440 fts5BufferSafeAppendVarint(&out
, iRowid2
- iOut
);
5442 if( i1
>=0 && iRowid1
==iRowid2
){
5443 fts5NextRowid(p1
, &i1
, &iRowid1
);
5445 fts5NextRowid(p2
, &i2
, &iRowid2
);
5449 fts5BufferSwap(&out
, p1
);
5450 fts5BufferFree(&out
);
5453 typedef struct PrefixMerger PrefixMerger
;
5454 struct PrefixMerger
{
5455 Fts5DoclistIter iter
; /* Doclist iterator */
5456 i64 iPos
; /* For iterating through a position list */
5459 PrefixMerger
*pNext
; /* Next in docid/poslist order */
5462 static void fts5PrefixMergerInsertByRowid(
5463 PrefixMerger
**ppHead
,
5466 if( p
->iter
.aPoslist
){
5467 PrefixMerger
**pp
= ppHead
;
5468 while( *pp
&& p
->iter
.iRowid
>(*pp
)->iter
.iRowid
){
5476 static void fts5PrefixMergerInsertByPosition(
5477 PrefixMerger
**ppHead
,
5481 PrefixMerger
**pp
= ppHead
;
5482 while( *pp
&& p
->iPos
>(*pp
)->iPos
){
5492 ** Array aBuf[] contains nBuf doclists. These are all merged in with the
5493 ** doclist in buffer p1.
5495 static void fts5MergePrefixLists(
5496 Fts5Index
*p
, /* FTS5 backend object */
5497 Fts5Buffer
*p1
, /* First list to merge */
5498 int nBuf
, /* Number of buffers in array aBuf[] */
5499 Fts5Buffer
*aBuf
/* Other lists to merge in */
5501 #define fts5PrefixMergerNextPosition(p) \
5502 sqlite3Fts5PoslistNext64((p)->aPos,(p)->iter.nPoslist,&(p)->iOff,&(p)->iPos)
5503 #define FTS5_MERGE_NLIST 16
5504 PrefixMerger aMerger
[FTS5_MERGE_NLIST
];
5505 PrefixMerger
*pHead
= 0;
5508 Fts5Buffer out
= {0, 0, 0};
5509 Fts5Buffer tmp
= {0, 0, 0};
5512 /* Initialize a doclist-iterator for each input buffer. Arrange them in
5513 ** a linked-list starting at pHead in ascending order of rowid. Avoid
5514 ** linking any iterators already at EOF into the linked list at all. */
5515 assert( nBuf
+1<=(int)(sizeof(aMerger
)/sizeof(aMerger
[0])) );
5516 memset(aMerger
, 0, sizeof(PrefixMerger
)*(nBuf
+1));
5517 pHead
= &aMerger
[nBuf
];
5518 fts5DoclistIterInit(p1
, &pHead
->iter
);
5519 for(i
=0; i
<nBuf
; i
++){
5520 fts5DoclistIterInit(&aBuf
[i
], &aMerger
[i
].iter
);
5521 fts5PrefixMergerInsertByRowid(&pHead
, &aMerger
[i
]);
5524 if( nOut
==0 ) return;
5525 nOut
+= p1
->n
+ 9 + 10*nBuf
;
5527 /* The maximum size of the output is equal to the sum of the
5528 ** input sizes + 1 varint (9 bytes). The extra varint is because if the
5529 ** first rowid in one input is a large negative number, and the first in
5530 ** the other a non-negative number, the delta for the non-negative
5531 ** number will be larger on disk than the literal integer value
5534 ** Or, if the input position-lists are corrupt, then the output might
5535 ** include up to (nBuf+1) extra 10-byte positions created by interpreting -1
5536 ** (the value PoslistNext64() uses for EOF) as a position and appending
5537 ** it to the output. This can happen at most once for each input
5538 ** position-list, hence (nBuf+1) 10 byte paddings. */
5539 if( sqlite3Fts5BufferSize(&p
->rc
, &out
, nOut
) ) return;
5542 fts5MergeAppendDocid(&out
, iLastRowid
, pHead
->iter
.iRowid
);
5544 if( pHead
->pNext
&& iLastRowid
==pHead
->pNext
->iter
.iRowid
){
5545 /* Merge data from two or more poslists */
5547 int nTmp
= FTS5_DATA_ZERO_PADDING
;
5549 PrefixMerger
*pSave
= pHead
;
5550 PrefixMerger
*pThis
= 0;
5554 while( pSave
&& pSave
->iter
.iRowid
==iLastRowid
){
5555 PrefixMerger
*pNext
= pSave
->pNext
;
5558 pSave
->aPos
= &pSave
->iter
.aPoslist
[pSave
->iter
.nSize
];
5559 fts5PrefixMergerNextPosition(pSave
);
5560 nTmp
+= pSave
->iter
.nPoslist
+ 10;
5562 fts5PrefixMergerInsertByPosition(&pHead
, pSave
);
5566 if( pHead
==0 || pHead
->pNext
==0 ){
5567 p
->rc
= FTS5_CORRUPT
;
5571 /* See the earlier comment in this function for an explanation of why
5572 ** corrupt input position lists might cause the output to consume
5573 ** at most nMerge*10 bytes of unexpected space. */
5574 if( sqlite3Fts5BufferSize(&p
->rc
, &tmp
, nTmp
+nMerge
*10) ){
5577 fts5BufferZero(&tmp
);
5580 pHead
= pThis
->pNext
;
5581 sqlite3Fts5PoslistSafeAppend(&tmp
, &iPrev
, pThis
->iPos
);
5582 fts5PrefixMergerNextPosition(pThis
);
5583 fts5PrefixMergerInsertByPosition(&pHead
, pThis
);
5585 while( pHead
->pNext
){
5587 if( pThis
->iPos
!=iPrev
){
5588 sqlite3Fts5PoslistSafeAppend(&tmp
, &iPrev
, pThis
->iPos
);
5590 fts5PrefixMergerNextPosition(pThis
);
5591 pHead
= pThis
->pNext
;
5592 fts5PrefixMergerInsertByPosition(&pHead
, pThis
);
5595 if( pHead
->iPos
!=iPrev
){
5596 sqlite3Fts5PoslistSafeAppend(&tmp
, &iPrev
, pHead
->iPos
);
5598 nTail
= pHead
->iter
.nPoslist
- pHead
->iOff
;
5600 /* WRITEPOSLISTSIZE */
5601 assert_nc( tmp
.n
+nTail
<=nTmp
);
5602 assert( tmp
.n
+nTail
<=nTmp
+nMerge
*10 );
5603 if( tmp
.n
+nTail
>nTmp
-FTS5_DATA_ZERO_PADDING
){
5604 if( p
->rc
==SQLITE_OK
) p
->rc
= FTS5_CORRUPT
;
5607 fts5BufferSafeAppendVarint(&out
, (tmp
.n
+nTail
) * 2);
5608 fts5BufferSafeAppendBlob(&out
, tmp
.p
, tmp
.n
);
5610 fts5BufferSafeAppendBlob(&out
, &pHead
->aPos
[pHead
->iOff
], nTail
);
5614 for(i
=0; i
<nBuf
+1; i
++){
5615 PrefixMerger
*pX
= &aMerger
[i
];
5616 if( pX
->iter
.aPoslist
&& pX
->iter
.iRowid
==iLastRowid
){
5617 fts5DoclistIterNext(&pX
->iter
);
5618 fts5PrefixMergerInsertByRowid(&pHead
, pX
);
5623 /* Copy poslist from pHead to output */
5624 PrefixMerger
*pThis
= pHead
;
5625 Fts5DoclistIter
*pI
= &pThis
->iter
;
5626 fts5BufferSafeAppendBlob(&out
, pI
->aPoslist
, pI
->nPoslist
+pI
->nSize
);
5627 fts5DoclistIterNext(pI
);
5628 pHead
= pThis
->pNext
;
5629 fts5PrefixMergerInsertByRowid(&pHead
, pThis
);
5634 fts5BufferFree(&tmp
);
5635 memset(&out
.p
[out
.n
], 0, FTS5_DATA_ZERO_PADDING
);
5639 static void fts5SetupPrefixIter(
5640 Fts5Index
*p
, /* Index to read from */
5641 int bDesc
, /* True for "ORDER BY rowid DESC" */
5642 int iIdx
, /* Index to scan for data */
5643 u8
*pToken
, /* Buffer containing prefix to match */
5644 int nToken
, /* Size of buffer pToken in bytes */
5645 Fts5Colset
*pColset
, /* Restrict matches to these columns */
5646 Fts5Iter
**ppIter
/* OUT: New iterator */
5648 Fts5Structure
*pStruct
;
5653 void (*xMerge
)(Fts5Index
*, Fts5Buffer
*, int, Fts5Buffer
*);
5654 void (*xAppend
)(Fts5Index
*, u64
, Fts5Iter
*, Fts5Buffer
*);
5655 if( p
->pConfig
->eDetail
==FTS5_DETAIL_NONE
){
5656 xMerge
= fts5MergeRowidLists
;
5657 xAppend
= fts5AppendRowid
;
5659 nMerge
= FTS5_MERGE_NLIST
-1;
5660 nBuf
= nMerge
*8; /* Sufficient to merge (16^8)==(2^32) lists */
5661 xMerge
= fts5MergePrefixLists
;
5662 xAppend
= fts5AppendPoslist
;
5665 aBuf
= (Fts5Buffer
*)fts5IdxMalloc(p
, sizeof(Fts5Buffer
)*nBuf
);
5666 pStruct
= fts5StructureRead(p
);
5668 if( aBuf
&& pStruct
){
5669 const int flags
= FTS5INDEX_QUERY_SCAN
5670 | FTS5INDEX_QUERY_SKIPEMPTY
5671 | FTS5INDEX_QUERY_NOOUTPUT
;
5674 Fts5Iter
*p1
= 0; /* Iterator used to gather data from index */
5679 memset(&doclist
, 0, sizeof(doclist
));
5682 const int f2
= FTS5INDEX_QUERY_SKIPEMPTY
|FTS5INDEX_QUERY_NOOUTPUT
;
5683 pToken
[0] = FTS5_MAIN_PREFIX
;
5684 fts5MultiIterNew(p
, pStruct
, f2
, pColset
, pToken
, nToken
, -1, 0, &p1
);
5685 fts5IterSetOutputCb(&p
->rc
, p1
);
5687 fts5MultiIterEof(p
, p1
)==0;
5688 fts5MultiIterNext2(p
, p1
, &dummy
)
5690 Fts5SegIter
*pSeg
= &p1
->aSeg
[ p1
->aFirst
[1].iFirst
];
5691 p1
->xSetOutputs(p1
, pSeg
);
5692 if( p1
->base
.nData
){
5693 xAppend(p
, (u64
)p1
->base
.iRowid
-(u64
)iLastRowid
, p1
, &doclist
);
5694 iLastRowid
= p1
->base
.iRowid
;
5697 fts5MultiIterFree(p1
);
5700 pToken
[0] = FTS5_MAIN_PREFIX
+ iIdx
;
5701 fts5MultiIterNew(p
, pStruct
, flags
, pColset
, pToken
, nToken
, -1, 0, &p1
);
5702 fts5IterSetOutputCb(&p
->rc
, p1
);
5704 fts5MultiIterEof(p
, p1
)==0;
5705 fts5MultiIterNext2(p
, p1
, &bNewTerm
)
5707 Fts5SegIter
*pSeg
= &p1
->aSeg
[ p1
->aFirst
[1].iFirst
];
5708 int nTerm
= pSeg
->term
.n
;
5709 const u8
*pTerm
= pSeg
->term
.p
;
5710 p1
->xSetOutputs(p1
, pSeg
);
5712 assert_nc( memcmp(pToken
, pTerm
, MIN(nToken
, nTerm
))<=0 );
5714 if( nTerm
<nToken
|| memcmp(pToken
, pTerm
, nToken
) ) break;
5717 if( p1
->base
.nData
==0 ) continue;
5719 if( p1
->base
.iRowid
<=iLastRowid
&& doclist
.n
>0 ){
5720 for(i
=0; p
->rc
==SQLITE_OK
&& doclist
.n
; i
++){
5723 assert( i1
+nMerge
<=nBuf
);
5724 for(iStore
=i1
; iStore
<i1
+nMerge
; iStore
++){
5725 if( aBuf
[iStore
].n
==0 ){
5726 fts5BufferSwap(&doclist
, &aBuf
[iStore
]);
5727 fts5BufferZero(&doclist
);
5731 if( iStore
==i1
+nMerge
){
5732 xMerge(p
, &doclist
, nMerge
, &aBuf
[i1
]);
5733 for(iStore
=i1
; iStore
<i1
+nMerge
; iStore
++){
5734 fts5BufferZero(&aBuf
[iStore
]);
5741 xAppend(p
, (u64
)p1
->base
.iRowid
-(u64
)iLastRowid
, p1
, &doclist
);
5742 iLastRowid
= p1
->base
.iRowid
;
5745 assert( (nBuf
%nMerge
)==0 );
5746 for(i
=0; i
<nBuf
; i
+=nMerge
){
5748 if( p
->rc
==SQLITE_OK
){
5749 xMerge(p
, &doclist
, nMerge
, &aBuf
[i
]);
5751 for(iFree
=i
; iFree
<i
+nMerge
; iFree
++){
5752 fts5BufferFree(&aBuf
[iFree
]);
5755 fts5MultiIterFree(p1
);
5757 pData
= fts5IdxMalloc(p
, sizeof(Fts5Data
)+doclist
.n
+FTS5_DATA_ZERO_PADDING
);
5759 pData
->p
= (u8
*)&pData
[1];
5760 pData
->nn
= pData
->szLeaf
= doclist
.n
;
5761 if( doclist
.n
) memcpy(pData
->p
, doclist
.p
, doclist
.n
);
5762 fts5MultiIterNew2(p
, pData
, bDesc
, ppIter
);
5764 fts5BufferFree(&doclist
);
5767 fts5StructureRelease(pStruct
);
5773 ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
5774 ** to the document with rowid iRowid.
5776 int sqlite3Fts5IndexBeginWrite(Fts5Index
*p
, int bDelete
, i64 iRowid
){
5777 assert( p
->rc
==SQLITE_OK
);
5779 /* Allocate the hash table if it has not already been allocated */
5781 p
->rc
= sqlite3Fts5HashNew(p
->pConfig
, &p
->pHash
, &p
->nPendingData
);
5784 /* Flush the hash table to disk if required */
5785 if( iRowid
<p
->iWriteRowid
5786 || (iRowid
==p
->iWriteRowid
&& p
->bDelete
==0)
5787 || (p
->nPendingData
> p
->pConfig
->nHashSize
)
5792 p
->iWriteRowid
= iRowid
;
5793 p
->bDelete
= bDelete
;
5794 return fts5IndexReturn(p
);
5798 ** Commit data to disk.
5800 int sqlite3Fts5IndexSync(Fts5Index
*p
){
5801 assert( p
->rc
==SQLITE_OK
);
5803 sqlite3Fts5IndexCloseReader(p
);
5804 return fts5IndexReturn(p
);
5808 ** Discard any data stored in the in-memory hash tables. Do not write it
5809 ** to the database. Additionally, assume that the contents of the %_data
5810 ** table may have changed on disk. So any in-memory caches of %_data
5811 ** records must be invalidated.
5813 int sqlite3Fts5IndexRollback(Fts5Index
*p
){
5814 sqlite3Fts5IndexCloseReader(p
);
5815 fts5IndexDiscardData(p
);
5816 fts5StructureInvalidate(p
);
5817 /* assert( p->rc==SQLITE_OK ); */
5822 ** The %_data table is completely empty when this function is called. This
5823 ** function populates it with the initial structure objects for each index,
5824 ** and the initial version of the "averages" record (a zero-byte blob).
5826 int sqlite3Fts5IndexReinit(Fts5Index
*p
){
5828 fts5StructureInvalidate(p
);
5829 fts5IndexDiscardData(p
);
5830 memset(&s
, 0, sizeof(Fts5Structure
));
5831 fts5DataWrite(p
, FTS5_AVERAGES_ROWID
, (const u8
*)"", 0);
5832 fts5StructureWrite(p
, &s
);
5833 return fts5IndexReturn(p
);
5837 ** Open a new Fts5Index handle. If the bCreate argument is true, create
5838 ** and initialize the underlying %_data table.
5840 ** If successful, set *pp to point to the new object and return SQLITE_OK.
5841 ** Otherwise, set *pp to NULL and return an SQLite error code.
5843 int sqlite3Fts5IndexOpen(
5844 Fts5Config
*pConfig
,
5850 Fts5Index
*p
; /* New object */
5852 *pp
= p
= (Fts5Index
*)sqlite3Fts5MallocZero(&rc
, sizeof(Fts5Index
));
5853 if( rc
==SQLITE_OK
){
5854 p
->pConfig
= pConfig
;
5855 p
->nWorkUnit
= FTS5_WORK_UNIT
;
5856 p
->zDataTbl
= sqlite3Fts5Mprintf(&rc
, "%s_data", pConfig
->zName
);
5857 if( p
->zDataTbl
&& bCreate
){
5858 rc
= sqlite3Fts5CreateTable(
5859 pConfig
, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr
5861 if( rc
==SQLITE_OK
){
5862 rc
= sqlite3Fts5CreateTable(pConfig
, "idx",
5863 "segid, term, pgno, PRIMARY KEY(segid, term)",
5867 if( rc
==SQLITE_OK
){
5868 rc
= sqlite3Fts5IndexReinit(p
);
5873 assert( rc
!=SQLITE_OK
|| p
->rc
==SQLITE_OK
);
5875 sqlite3Fts5IndexClose(p
);
5882 ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen().
5884 int sqlite3Fts5IndexClose(Fts5Index
*p
){
5887 assert( p
->pReader
==0 );
5888 fts5StructureInvalidate(p
);
5889 sqlite3_finalize(p
->pWriter
);
5890 sqlite3_finalize(p
->pDeleter
);
5891 sqlite3_finalize(p
->pIdxWriter
);
5892 sqlite3_finalize(p
->pIdxDeleter
);
5893 sqlite3_finalize(p
->pIdxSelect
);
5894 sqlite3_finalize(p
->pDataVersion
);
5895 sqlite3_finalize(p
->pDeleteFromIdx
);
5896 sqlite3Fts5HashFree(p
->pHash
);
5897 sqlite3_free(p
->zDataTbl
);
5904 ** Argument p points to a buffer containing utf-8 text that is n bytes in
5905 ** size. Return the number of bytes in the nChar character prefix of the
5906 ** buffer, or 0 if there are less than nChar characters in total.
5908 int sqlite3Fts5IndexCharlenToBytelen(
5915 for(i
=0; i
<nChar
; i
++){
5916 if( n
>=nByte
) return 0; /* Input contains fewer than nChar chars */
5917 if( (unsigned char)p
[n
++]>=0xc0 ){
5918 if( n
>=nByte
) return 0;
5919 while( (p
[n
] & 0xc0)==0x80 ){
5922 if( i
+1==nChar
) break;
5932 ** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of
5933 ** unicode characters in the string.
5935 static int fts5IndexCharlen(const char *pIn
, int nIn
){
5939 if( (unsigned char)pIn
[i
++]>=0xc0 ){
5940 while( i
<nIn
&& (pIn
[i
] & 0xc0)==0x80 ) i
++;
5948 ** Insert or remove data to or from the index. Each time a document is
5949 ** added to or removed from the index, this function is called one or more
5952 ** For an insert, it must be called once for each token in the new document.
5953 ** If the operation is a delete, it must be called (at least) once for each
5954 ** unique token in the document with an iCol value less than zero. The iPos
5955 ** argument is ignored for a delete.
5957 int sqlite3Fts5IndexWrite(
5958 Fts5Index
*p
, /* Index to write to */
5959 int iCol
, /* Column token appears in (-ve -> delete) */
5960 int iPos
, /* Position of token within column */
5961 const char *pToken
, int nToken
/* Token to add or remove to or from index */
5963 int i
; /* Used to iterate through indexes */
5964 int rc
= SQLITE_OK
; /* Return code */
5965 Fts5Config
*pConfig
= p
->pConfig
;
5967 assert( p
->rc
==SQLITE_OK
);
5968 assert( (iCol
<0)==p
->bDelete
);
5970 /* Add the entry to the main terms index. */
5971 rc
= sqlite3Fts5HashWrite(
5972 p
->pHash
, p
->iWriteRowid
, iCol
, iPos
, FTS5_MAIN_PREFIX
, pToken
, nToken
5975 for(i
=0; i
<pConfig
->nPrefix
&& rc
==SQLITE_OK
; i
++){
5976 const int nChar
= pConfig
->aPrefix
[i
];
5977 int nByte
= sqlite3Fts5IndexCharlenToBytelen(pToken
, nToken
, nChar
);
5979 rc
= sqlite3Fts5HashWrite(p
->pHash
,
5980 p
->iWriteRowid
, iCol
, iPos
, (char)(FTS5_MAIN_PREFIX
+i
+1), pToken
,
5990 ** Open a new iterator to iterate though all rowid that match the
5991 ** specified token or token prefix.
5993 int sqlite3Fts5IndexQuery(
5994 Fts5Index
*p
, /* FTS index to query */
5995 const char *pToken
, int nToken
, /* Token (or prefix) to query for */
5996 int flags
, /* Mask of FTS5INDEX_QUERY_X flags */
5997 Fts5Colset
*pColset
, /* Match these columns only */
5998 Fts5IndexIter
**ppIter
/* OUT: New iterator object */
6000 Fts5Config
*pConfig
= p
->pConfig
;
6002 Fts5Buffer buf
= {0, 0, 0};
6004 /* If the QUERY_SCAN flag is set, all other flags must be clear. */
6005 assert( (flags
& FTS5INDEX_QUERY_SCAN
)==0 || flags
==FTS5INDEX_QUERY_SCAN
);
6007 if( sqlite3Fts5BufferSize(&p
->rc
, &buf
, nToken
+1)==0 ){
6008 int iIdx
= 0; /* Index to search */
6009 int iPrefixIdx
= 0; /* +1 prefix index */
6010 if( nToken
>0 ) memcpy(&buf
.p
[1], pToken
, nToken
);
6012 /* Figure out which index to search and set iIdx accordingly. If this
6013 ** is a prefix query for which there is no prefix index, set iIdx to
6014 ** greater than pConfig->nPrefix to indicate that the query will be
6015 ** satisfied by scanning multiple terms in the main index.
6017 ** If the QUERY_TEST_NOIDX flag was specified, then this must be a
6018 ** prefix-query. Instead of using a prefix-index (if one exists),
6019 ** evaluate the prefix query using the main FTS index. This is used
6020 ** for internal sanity checking by the integrity-check in debug
6023 if( pConfig
->bPrefixIndex
==0 || (flags
& FTS5INDEX_QUERY_TEST_NOIDX
) ){
6024 assert( flags
& FTS5INDEX_QUERY_PREFIX
);
6025 iIdx
= 1+pConfig
->nPrefix
;
6028 if( flags
& FTS5INDEX_QUERY_PREFIX
){
6029 int nChar
= fts5IndexCharlen(pToken
, nToken
);
6030 for(iIdx
=1; iIdx
<=pConfig
->nPrefix
; iIdx
++){
6031 int nIdxChar
= pConfig
->aPrefix
[iIdx
-1];
6032 if( nIdxChar
==nChar
) break;
6033 if( nIdxChar
==nChar
+1 ) iPrefixIdx
= iIdx
;
6037 if( iIdx
<=pConfig
->nPrefix
){
6038 /* Straight index lookup */
6039 Fts5Structure
*pStruct
= fts5StructureRead(p
);
6040 buf
.p
[0] = (u8
)(FTS5_MAIN_PREFIX
+ iIdx
);
6042 fts5MultiIterNew(p
, pStruct
, flags
| FTS5INDEX_QUERY_SKIPEMPTY
,
6043 pColset
, buf
.p
, nToken
+1, -1, 0, &pRet
6045 fts5StructureRelease(pStruct
);
6048 /* Scan multiple terms in the main index */
6049 int bDesc
= (flags
& FTS5INDEX_QUERY_DESC
)!=0;
6050 fts5SetupPrefixIter(p
, bDesc
, iPrefixIdx
, buf
.p
, nToken
+1, pColset
,&pRet
);
6052 assert( p
->rc
!=SQLITE_OK
);
6054 assert( pRet
->pColset
==0 );
6055 fts5IterSetOutputCb(&p
->rc
, pRet
);
6056 if( p
->rc
==SQLITE_OK
){
6057 Fts5SegIter
*pSeg
= &pRet
->aSeg
[pRet
->aFirst
[1].iFirst
];
6058 if( pSeg
->pLeaf
) pRet
->xSetOutputs(pRet
, pSeg
);
6064 sqlite3Fts5IterClose((Fts5IndexIter
*)pRet
);
6066 sqlite3Fts5IndexCloseReader(p
);
6069 *ppIter
= (Fts5IndexIter
*)pRet
;
6070 sqlite3Fts5BufferFree(&buf
);
6072 return fts5IndexReturn(p
);
6076 ** Return true if the iterator passed as the only argument is at EOF.
6079 ** Move to the next matching rowid.
6081 int sqlite3Fts5IterNext(Fts5IndexIter
*pIndexIter
){
6082 Fts5Iter
*pIter
= (Fts5Iter
*)pIndexIter
;
6083 assert( pIter
->pIndex
->rc
==SQLITE_OK
);
6084 fts5MultiIterNext(pIter
->pIndex
, pIter
, 0, 0);
6085 return fts5IndexReturn(pIter
->pIndex
);
6089 ** Move to the next matching term/rowid. Used by the fts5vocab module.
6091 int sqlite3Fts5IterNextScan(Fts5IndexIter
*pIndexIter
){
6092 Fts5Iter
*pIter
= (Fts5Iter
*)pIndexIter
;
6093 Fts5Index
*p
= pIter
->pIndex
;
6095 assert( pIter
->pIndex
->rc
==SQLITE_OK
);
6097 fts5MultiIterNext(p
, pIter
, 0, 0);
6098 if( p
->rc
==SQLITE_OK
){
6099 Fts5SegIter
*pSeg
= &pIter
->aSeg
[ pIter
->aFirst
[1].iFirst
];
6100 if( pSeg
->pLeaf
&& pSeg
->term
.p
[0]!=FTS5_MAIN_PREFIX
){
6101 fts5DataRelease(pSeg
->pLeaf
);
6103 pIter
->base
.bEof
= 1;
6107 return fts5IndexReturn(pIter
->pIndex
);
6111 ** Move to the next matching rowid that occurs at or after iMatch. The
6112 ** definition of "at or after" depends on whether this iterator iterates
6113 ** in ascending or descending rowid order.
6115 int sqlite3Fts5IterNextFrom(Fts5IndexIter
*pIndexIter
, i64 iMatch
){
6116 Fts5Iter
*pIter
= (Fts5Iter
*)pIndexIter
;
6117 fts5MultiIterNextFrom(pIter
->pIndex
, pIter
, iMatch
);
6118 return fts5IndexReturn(pIter
->pIndex
);
6122 ** Return the current term.
6124 const char *sqlite3Fts5IterTerm(Fts5IndexIter
*pIndexIter
, int *pn
){
6126 const char *z
= (const char*)fts5MultiIterTerm((Fts5Iter
*)pIndexIter
, &n
);
6127 assert_nc( z
|| n
<=1 );
6129 return (z
? &z
[1] : 0);
6133 ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
6135 void sqlite3Fts5IterClose(Fts5IndexIter
*pIndexIter
){
6137 Fts5Iter
*pIter
= (Fts5Iter
*)pIndexIter
;
6138 Fts5Index
*pIndex
= pIter
->pIndex
;
6139 fts5MultiIterFree(pIter
);
6140 sqlite3Fts5IndexCloseReader(pIndex
);
6145 ** Read and decode the "averages" record from the database.
6147 ** Parameter anSize must point to an array of size nCol, where nCol is
6148 ** the number of user defined columns in the FTS table.
6150 int sqlite3Fts5IndexGetAverages(Fts5Index
*p
, i64
*pnRow
, i64
*anSize
){
6151 int nCol
= p
->pConfig
->nCol
;
6155 memset(anSize
, 0, sizeof(i64
) * nCol
);
6156 pData
= fts5DataRead(p
, FTS5_AVERAGES_ROWID
);
6157 if( p
->rc
==SQLITE_OK
&& pData
->nn
){
6160 i
+= fts5GetVarint(&pData
->p
[i
], (u64
*)pnRow
);
6161 for(iCol
=0; i
<pData
->nn
&& iCol
<nCol
; iCol
++){
6162 i
+= fts5GetVarint(&pData
->p
[i
], (u64
*)&anSize
[iCol
]);
6166 fts5DataRelease(pData
);
6167 return fts5IndexReturn(p
);
6171 ** Replace the current "averages" record with the contents of the buffer
6172 ** supplied as the second argument.
6174 int sqlite3Fts5IndexSetAverages(Fts5Index
*p
, const u8
*pData
, int nData
){
6175 assert( p
->rc
==SQLITE_OK
);
6176 fts5DataWrite(p
, FTS5_AVERAGES_ROWID
, pData
, nData
);
6177 return fts5IndexReturn(p
);
6181 ** Return the total number of blocks this module has read from the %_data
6182 ** table since it was created.
6184 int sqlite3Fts5IndexReads(Fts5Index
*p
){
6189 ** Set the 32-bit cookie value stored at the start of all structure
6190 ** records to the value passed as the second argument.
6192 ** Return SQLITE_OK if successful, or an SQLite error code if an error
6195 int sqlite3Fts5IndexSetCookie(Fts5Index
*p
, int iNew
){
6196 int rc
; /* Return code */
6197 Fts5Config
*pConfig
= p
->pConfig
; /* Configuration object */
6198 u8 aCookie
[4]; /* Binary representation of iNew */
6199 sqlite3_blob
*pBlob
= 0;
6201 assert( p
->rc
==SQLITE_OK
);
6202 sqlite3Fts5Put32(aCookie
, iNew
);
6204 rc
= sqlite3_blob_open(pConfig
->db
, pConfig
->zDb
, p
->zDataTbl
,
6205 "block", FTS5_STRUCTURE_ROWID
, 1, &pBlob
6207 if( rc
==SQLITE_OK
){
6208 sqlite3_blob_write(pBlob
, aCookie
, 4, 0);
6209 rc
= sqlite3_blob_close(pBlob
);
6215 int sqlite3Fts5IndexLoadConfig(Fts5Index
*p
){
6216 Fts5Structure
*pStruct
;
6217 pStruct
= fts5StructureRead(p
);
6218 fts5StructureRelease(pStruct
);
6219 return fts5IndexReturn(p
);
6223 /*************************************************************************
6224 **************************************************************************
6225 ** Below this point is the implementation of the integrity-check
6230 ** Return a simple checksum value based on the arguments.
6232 u64
sqlite3Fts5IndexEntryCksum(
6242 ret
+= (ret
<<3) + iCol
;
6243 ret
+= (ret
<<3) + iPos
;
6244 if( iIdx
>=0 ) ret
+= (ret
<<3) + (FTS5_MAIN_PREFIX
+ iIdx
);
6245 for(i
=0; i
<nTerm
; i
++) ret
+= (ret
<<3) + pTerm
[i
];
6251 ** This function is purely an internal test. It does not contribute to
6252 ** FTS functionality, or even the integrity-check, in any way.
6254 ** Instead, it tests that the same set of pgno/rowid combinations are
6255 ** visited regardless of whether the doclist-index identified by parameters
6256 ** iSegid/iLeaf is iterated in forwards or reverse order.
6258 static void fts5TestDlidxReverse(
6260 int iSegid
, /* Segment id to load from */
6261 int iLeaf
/* Load doclist-index for this leaf */
6263 Fts5DlidxIter
*pDlidx
= 0;
6267 for(pDlidx
=fts5DlidxIterInit(p
, 0, iSegid
, iLeaf
);
6268 fts5DlidxIterEof(p
, pDlidx
)==0;
6269 fts5DlidxIterNext(p
, pDlidx
)
6271 i64 iRowid
= fts5DlidxIterRowid(pDlidx
);
6272 int pgno
= fts5DlidxIterPgno(pDlidx
);
6273 assert( pgno
>iLeaf
);
6274 cksum1
+= iRowid
+ ((i64
)pgno
<<32);
6276 fts5DlidxIterFree(pDlidx
);
6279 for(pDlidx
=fts5DlidxIterInit(p
, 1, iSegid
, iLeaf
);
6280 fts5DlidxIterEof(p
, pDlidx
)==0;
6281 fts5DlidxIterPrev(p
, pDlidx
)
6283 i64 iRowid
= fts5DlidxIterRowid(pDlidx
);
6284 int pgno
= fts5DlidxIterPgno(pDlidx
);
6285 assert( fts5DlidxIterPgno(pDlidx
)>iLeaf
);
6286 cksum2
+= iRowid
+ ((i64
)pgno
<<32);
6288 fts5DlidxIterFree(pDlidx
);
6291 if( p
->rc
==SQLITE_OK
&& cksum1
!=cksum2
) p
->rc
= FTS5_CORRUPT
;
6294 static int fts5QueryCksum(
6295 Fts5Index
*p
, /* Fts5 index object */
6297 const char *z
, /* Index key to query for */
6298 int n
, /* Size of index key in bytes */
6299 int flags
, /* Flags for Fts5IndexQuery */
6300 u64
*pCksum
/* IN/OUT: Checksum value */
6302 int eDetail
= p
->pConfig
->eDetail
;
6303 u64 cksum
= *pCksum
;
6304 Fts5IndexIter
*pIter
= 0;
6305 int rc
= sqlite3Fts5IndexQuery(p
, z
, n
, flags
, 0, &pIter
);
6307 while( rc
==SQLITE_OK
&& ALWAYS(pIter
!=0) && 0==sqlite3Fts5IterEof(pIter
) ){
6308 i64 rowid
= pIter
->iRowid
;
6310 if( eDetail
==FTS5_DETAIL_NONE
){
6311 cksum
^= sqlite3Fts5IndexEntryCksum(rowid
, 0, 0, iIdx
, z
, n
);
6313 Fts5PoslistReader sReader
;
6314 for(sqlite3Fts5PoslistReaderInit(pIter
->pData
, pIter
->nData
, &sReader
);
6316 sqlite3Fts5PoslistReaderNext(&sReader
)
6318 int iCol
= FTS5_POS2COLUMN(sReader
.iPos
);
6319 int iOff
= FTS5_POS2OFFSET(sReader
.iPos
);
6320 cksum
^= sqlite3Fts5IndexEntryCksum(rowid
, iCol
, iOff
, iIdx
, z
, n
);
6323 if( rc
==SQLITE_OK
){
6324 rc
= sqlite3Fts5IterNext(pIter
);
6327 sqlite3Fts5IterClose(pIter
);
6334 ** Check if buffer z[], size n bytes, contains as series of valid utf-8
6335 ** encoded codepoints. If so, return 0. Otherwise, if the buffer does not
6336 ** contain valid utf-8, return non-zero.
6338 static int fts5TestUtf8(const char *z
, int n
){
6342 if( (z
[i
] & 0x80)==0x00 ){
6345 if( (z
[i
] & 0xE0)==0xC0 ){
6346 if( i
+1>=n
|| (z
[i
+1] & 0xC0)!=0x80 ) return 1;
6349 if( (z
[i
] & 0xF0)==0xE0 ){
6350 if( i
+2>=n
|| (z
[i
+1] & 0xC0)!=0x80 || (z
[i
+2] & 0xC0)!=0x80 ) return 1;
6353 if( (z
[i
] & 0xF8)==0xF0 ){
6354 if( i
+3>=n
|| (z
[i
+1] & 0xC0)!=0x80 || (z
[i
+2] & 0xC0)!=0x80 ) return 1;
6355 if( (z
[i
+2] & 0xC0)!=0x80 ) return 1;
6366 ** This function is also purely an internal test. It does not contribute to
6367 ** FTS functionality, or even the integrity-check, in any way.
6369 static void fts5TestTerm(
6371 Fts5Buffer
*pPrev
, /* Previous term */
6372 const char *z
, int n
, /* Possibly new term to test */
6378 fts5BufferSet(&rc
, pPrev
, n
, (const u8
*)z
);
6380 if( rc
==SQLITE_OK
&& (pPrev
->n
!=n
|| memcmp(pPrev
->p
, z
, n
)) ){
6381 u64 cksum3
= *pCksum
;
6382 const char *zTerm
= (const char*)&pPrev
->p
[1]; /* term sans prefix-byte */
6383 int nTerm
= pPrev
->n
-1; /* Size of zTerm in bytes */
6384 int iIdx
= (pPrev
->p
[0] - FTS5_MAIN_PREFIX
);
6385 int flags
= (iIdx
==0 ? 0 : FTS5INDEX_QUERY_PREFIX
);
6389 /* Check that the results returned for ASC and DESC queries are
6390 ** the same. If not, call this corruption. */
6391 rc
= fts5QueryCksum(p
, iIdx
, zTerm
, nTerm
, flags
, &ck1
);
6392 if( rc
==SQLITE_OK
){
6393 int f
= flags
|FTS5INDEX_QUERY_DESC
;
6394 rc
= fts5QueryCksum(p
, iIdx
, zTerm
, nTerm
, f
, &ck2
);
6396 if( rc
==SQLITE_OK
&& ck1
!=ck2
) rc
= FTS5_CORRUPT
;
6398 /* If this is a prefix query, check that the results returned if the
6399 ** the index is disabled are the same. In both ASC and DESC order.
6401 ** This check may only be performed if the hash table is empty. This
6402 ** is because the hash table only supports a single scan query at
6403 ** a time, and the multi-iter loop from which this function is called
6404 ** is already performing such a scan.
6406 ** Also only do this if buffer zTerm contains nTerm bytes of valid
6407 ** utf-8. Otherwise, the last part of the buffer contents might contain
6408 ** a non-utf-8 sequence that happens to be a prefix of a valid utf-8
6409 ** character stored in the main fts index, which will cause the
6411 if( p
->nPendingData
==0 && 0==fts5TestUtf8(zTerm
, nTerm
) ){
6412 if( iIdx
>0 && rc
==SQLITE_OK
){
6413 int f
= flags
|FTS5INDEX_QUERY_TEST_NOIDX
;
6415 rc
= fts5QueryCksum(p
, iIdx
, zTerm
, nTerm
, f
, &ck2
);
6416 if( rc
==SQLITE_OK
&& ck1
!=ck2
) rc
= FTS5_CORRUPT
;
6418 if( iIdx
>0 && rc
==SQLITE_OK
){
6419 int f
= flags
|FTS5INDEX_QUERY_TEST_NOIDX
|FTS5INDEX_QUERY_DESC
;
6421 rc
= fts5QueryCksum(p
, iIdx
, zTerm
, nTerm
, f
, &ck2
);
6422 if( rc
==SQLITE_OK
&& ck1
!=ck2
) rc
= FTS5_CORRUPT
;
6427 fts5BufferSet(&rc
, pPrev
, n
, (const u8
*)z
);
6429 if( rc
==SQLITE_OK
&& cksum3
!=expected
){
6438 # define fts5TestDlidxReverse(x,y,z)
6439 # define fts5TestTerm(u,v,w,x,y,z)
6445 ** 1) All leaves of pSeg between iFirst and iLast (inclusive) exist and
6446 ** contain zero terms.
6447 ** 2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and
6448 ** contain zero rowids.
6450 static void fts5IndexIntegrityCheckEmpty(
6452 Fts5StructureSegment
*pSeg
, /* Segment to check internal consistency */
6459 /* Now check that the iter.nEmpty leaves following the current leaf
6460 ** (a) exist and (b) contain no terms. */
6461 for(i
=iFirst
; p
->rc
==SQLITE_OK
&& i
<=iLast
; i
++){
6462 Fts5Data
*pLeaf
= fts5DataRead(p
, FTS5_SEGMENT_ROWID(pSeg
->iSegid
, i
));
6464 if( !fts5LeafIsTermless(pLeaf
) ) p
->rc
= FTS5_CORRUPT
;
6465 if( i
>=iNoRowid
&& 0!=fts5LeafFirstRowidOff(pLeaf
) ) p
->rc
= FTS5_CORRUPT
;
6467 fts5DataRelease(pLeaf
);
6471 static void fts5IntegrityCheckPgidx(Fts5Index
*p
, Fts5Data
*pLeaf
){
6475 Fts5Buffer buf1
= {0,0,0};
6476 Fts5Buffer buf2
= {0,0,0};
6479 while( ii
<pLeaf
->nn
&& p
->rc
==SQLITE_OK
){
6484 ii
+= fts5GetVarint32(&pLeaf
->p
[ii
], nIncr
);
6488 if( iOff
>=pLeaf
->szLeaf
){
6489 p
->rc
= FTS5_CORRUPT
;
6490 }else if( iTermOff
==nIncr
){
6492 iOff
+= fts5GetVarint32(&pLeaf
->p
[iOff
], nByte
);
6493 if( (iOff
+nByte
)>pLeaf
->szLeaf
){
6494 p
->rc
= FTS5_CORRUPT
;
6496 fts5BufferSet(&p
->rc
, &buf1
, nByte
, &pLeaf
->p
[iOff
]);
6500 iOff
+= fts5GetVarint32(&pLeaf
->p
[iOff
], nKeep
);
6501 iOff
+= fts5GetVarint32(&pLeaf
->p
[iOff
], nByte
);
6502 if( nKeep
>buf1
.n
|| (iOff
+nByte
)>pLeaf
->szLeaf
){
6503 p
->rc
= FTS5_CORRUPT
;
6506 fts5BufferAppendBlob(&p
->rc
, &buf1
, nByte
, &pLeaf
->p
[iOff
]);
6509 if( p
->rc
==SQLITE_OK
){
6510 res
= fts5BufferCompare(&buf1
, &buf2
);
6511 if( res
<=0 ) p
->rc
= FTS5_CORRUPT
;
6514 fts5BufferSet(&p
->rc
, &buf2
, buf1
.n
, buf1
.p
);
6517 fts5BufferFree(&buf1
);
6518 fts5BufferFree(&buf2
);
6521 static void fts5IndexIntegrityCheckSegment(
6522 Fts5Index
*p
, /* FTS5 backend object */
6523 Fts5StructureSegment
*pSeg
/* Segment to check internal consistency */
6525 Fts5Config
*pConfig
= p
->pConfig
;
6526 int bSecureDelete
= (pConfig
->iVersion
==FTS5_CURRENT_VERSION_SECUREDELETE
);
6527 sqlite3_stmt
*pStmt
= 0;
6529 int iIdxPrevLeaf
= pSeg
->pgnoFirst
-1;
6530 int iDlidxPrevLeaf
= pSeg
->pgnoLast
;
6532 if( pSeg
->pgnoFirst
==0 ) return;
6534 fts5IndexPrepareStmt(p
, &pStmt
, sqlite3_mprintf(
6535 "SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d "
6537 pConfig
->zDb
, pConfig
->zName
, pSeg
->iSegid
6540 /* Iterate through the b-tree hierarchy. */
6541 while( p
->rc
==SQLITE_OK
&& SQLITE_ROW
==sqlite3_step(pStmt
) ){
6542 i64 iRow
; /* Rowid for this leaf */
6543 Fts5Data
*pLeaf
; /* Data for this leaf */
6545 const char *zIdxTerm
= (const char*)sqlite3_column_blob(pStmt
, 1);
6546 int nIdxTerm
= sqlite3_column_bytes(pStmt
, 1);
6547 int iIdxLeaf
= sqlite3_column_int(pStmt
, 2);
6548 int bIdxDlidx
= sqlite3_column_int(pStmt
, 3);
6550 /* If the leaf in question has already been trimmed from the segment,
6551 ** ignore this b-tree entry. Otherwise, load it into memory. */
6552 if( iIdxLeaf
<pSeg
->pgnoFirst
) continue;
6553 iRow
= FTS5_SEGMENT_ROWID(pSeg
->iSegid
, iIdxLeaf
);
6554 pLeaf
= fts5LeafRead(p
, iRow
);
6555 if( pLeaf
==0 ) break;
6557 /* Check that the leaf contains at least one term, and that it is equal
6558 ** to or larger than the split-key in zIdxTerm. Also check that if there
6559 ** is also a rowid pointer within the leaf page header, it points to a
6560 ** location before the term. */
6561 if( pLeaf
->nn
<=pLeaf
->szLeaf
){
6564 && pConfig
->iVersion
==FTS5_CURRENT_VERSION_SECUREDELETE
6565 && pLeaf
->nn
==pLeaf
->szLeaf
6568 /* special case - the very first page in a segment keeps its %_idx
6569 ** entry even if all the terms are removed from it by secure-delete
6572 p
->rc
= FTS5_CORRUPT
;
6576 int iOff
; /* Offset of first term on leaf */
6577 int iRowidOff
; /* Offset of first rowid on leaf */
6578 int nTerm
; /* Size of term on leaf in bytes */
6579 int res
; /* Comparison of term and split-key */
6581 iOff
= fts5LeafFirstTermOff(pLeaf
);
6582 iRowidOff
= fts5LeafFirstRowidOff(pLeaf
);
6583 if( iRowidOff
>=iOff
|| iOff
>=pLeaf
->szLeaf
){
6584 p
->rc
= FTS5_CORRUPT
;
6586 iOff
+= fts5GetVarint32(&pLeaf
->p
[iOff
], nTerm
);
6587 res
= fts5Memcmp(&pLeaf
->p
[iOff
], zIdxTerm
, MIN(nTerm
, nIdxTerm
));
6588 if( res
==0 ) res
= nTerm
- nIdxTerm
;
6589 if( res
<0 ) p
->rc
= FTS5_CORRUPT
;
6592 fts5IntegrityCheckPgidx(p
, pLeaf
);
6594 fts5DataRelease(pLeaf
);
6597 /* Now check that the iter.nEmpty leaves following the current leaf
6598 ** (a) exist and (b) contain no terms. */
6599 fts5IndexIntegrityCheckEmpty(
6600 p
, pSeg
, iIdxPrevLeaf
+1, iDlidxPrevLeaf
+1, iIdxLeaf
-1
6604 /* If there is a doclist-index, check that it looks right. */
6606 Fts5DlidxIter
*pDlidx
= 0; /* For iterating through doclist index */
6607 int iPrevLeaf
= iIdxLeaf
;
6608 int iSegid
= pSeg
->iSegid
;
6612 for(pDlidx
=fts5DlidxIterInit(p
, 0, iSegid
, iIdxLeaf
);
6613 fts5DlidxIterEof(p
, pDlidx
)==0;
6614 fts5DlidxIterNext(p
, pDlidx
)
6617 /* Check any rowid-less pages that occur before the current leaf. */
6618 for(iPg
=iPrevLeaf
+1; iPg
<fts5DlidxIterPgno(pDlidx
); iPg
++){
6619 iKey
= FTS5_SEGMENT_ROWID(iSegid
, iPg
);
6620 pLeaf
= fts5DataRead(p
, iKey
);
6622 if( fts5LeafFirstRowidOff(pLeaf
)!=0 ) p
->rc
= FTS5_CORRUPT
;
6623 fts5DataRelease(pLeaf
);
6626 iPrevLeaf
= fts5DlidxIterPgno(pDlidx
);
6628 /* Check that the leaf page indicated by the iterator really does
6629 ** contain the rowid suggested by the same. */
6630 iKey
= FTS5_SEGMENT_ROWID(iSegid
, iPrevLeaf
);
6631 pLeaf
= fts5DataRead(p
, iKey
);
6634 int iRowidOff
= fts5LeafFirstRowidOff(pLeaf
);
6635 ASSERT_SZLEAF_OK(pLeaf
);
6636 if( iRowidOff
>=pLeaf
->szLeaf
){
6637 p
->rc
= FTS5_CORRUPT
;
6638 }else if( bSecureDelete
==0 || iRowidOff
>0 ){
6639 i64 iDlRowid
= fts5DlidxIterRowid(pDlidx
);
6640 fts5GetVarint(&pLeaf
->p
[iRowidOff
], (u64
*)&iRowid
);
6641 if( iRowid
<iDlRowid
|| (bSecureDelete
==0 && iRowid
!=iDlRowid
) ){
6642 p
->rc
= FTS5_CORRUPT
;
6645 fts5DataRelease(pLeaf
);
6649 iDlidxPrevLeaf
= iPg
;
6650 fts5DlidxIterFree(pDlidx
);
6651 fts5TestDlidxReverse(p
, iSegid
, iIdxLeaf
);
6653 iDlidxPrevLeaf
= pSeg
->pgnoLast
;
6654 /* TODO: Check there is no doclist index */
6657 iIdxPrevLeaf
= iIdxLeaf
;
6660 rc2
= sqlite3_finalize(pStmt
);
6661 if( p
->rc
==SQLITE_OK
) p
->rc
= rc2
;
6663 /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */
6665 if( p
->rc
==SQLITE_OK
&& iter
.iLeaf
!=pSeg
->pgnoLast
){
6666 p
->rc
= FTS5_CORRUPT
;
6673 ** Run internal checks to ensure that the FTS index (a) is internally
6674 ** consistent and (b) contains entries for which the XOR of the checksums
6675 ** as calculated by sqlite3Fts5IndexEntryCksum() is cksum.
6677 ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
6678 ** checksum does not match. Return SQLITE_OK if all checks pass without
6679 ** error, or some other SQLite error code if another error (e.g. OOM)
6682 int sqlite3Fts5IndexIntegrityCheck(Fts5Index
*p
, u64 cksum
, int bUseCksum
){
6683 int eDetail
= p
->pConfig
->eDetail
;
6684 u64 cksum2
= 0; /* Checksum based on contents of indexes */
6685 Fts5Buffer poslist
= {0,0,0}; /* Buffer used to hold a poslist */
6686 Fts5Iter
*pIter
; /* Used to iterate through entire index */
6687 Fts5Structure
*pStruct
; /* Index structure */
6691 /* Used by extra internal tests only run if NDEBUG is not defined */
6692 u64 cksum3
= 0; /* Checksum based on contents of indexes */
6693 Fts5Buffer term
= {0,0,0}; /* Buffer used to hold most recent term */
6695 const int flags
= FTS5INDEX_QUERY_NOOUTPUT
;
6697 /* Load the FTS index structure */
6698 pStruct
= fts5StructureRead(p
);
6700 assert( p
->rc
!=SQLITE_OK
);
6701 return fts5IndexReturn(p
);
6704 /* Check that the internal nodes of each segment match the leaves */
6705 for(iLvl
=0; iLvl
<pStruct
->nLevel
; iLvl
++){
6706 for(iSeg
=0; iSeg
<pStruct
->aLevel
[iLvl
].nSeg
; iSeg
++){
6707 Fts5StructureSegment
*pSeg
= &pStruct
->aLevel
[iLvl
].aSeg
[iSeg
];
6708 fts5IndexIntegrityCheckSegment(p
, pSeg
);
6712 /* The cksum argument passed to this function is a checksum calculated
6713 ** based on all expected entries in the FTS index (including prefix index
6714 ** entries). This block checks that a checksum calculated based on the
6715 ** actual contents of FTS index is identical.
6717 ** Two versions of the same checksum are calculated. The first (stack
6718 ** variable cksum2) based on entries extracted from the full-text index
6719 ** while doing a linear scan of each individual index in turn.
6721 ** As each term visited by the linear scans, a separate query for the
6722 ** same term is performed. cksum3 is calculated based on the entries
6723 ** extracted by these queries.
6725 for(fts5MultiIterNew(p
, pStruct
, flags
, 0, 0, 0, -1, 0, &pIter
);
6726 fts5MultiIterEof(p
, pIter
)==0;
6727 fts5MultiIterNext(p
, pIter
, 0, 0)
6729 int n
; /* Size of term in bytes */
6730 i64 iPos
= 0; /* Position read from poslist */
6731 int iOff
= 0; /* Offset within poslist */
6732 i64 iRowid
= fts5MultiIterRowid(pIter
);
6733 char *z
= (char*)fts5MultiIterTerm(pIter
, &n
);
6735 /* If this is a new term, query for it. Update cksum3 with the results. */
6736 fts5TestTerm(p
, &term
, z
, n
, cksum2
, &cksum3
);
6739 if( eDetail
==FTS5_DETAIL_NONE
){
6740 if( 0==fts5MultiIterIsEmpty(p
, pIter
) ){
6741 cksum2
^= sqlite3Fts5IndexEntryCksum(iRowid
, 0, 0, -1, z
, n
);
6745 fts5SegiterPoslist(p
, &pIter
->aSeg
[pIter
->aFirst
[1].iFirst
], 0, &poslist
);
6746 fts5BufferAppendBlob(&p
->rc
, &poslist
, 4, (const u8
*)"\0\0\0\0");
6747 while( 0==sqlite3Fts5PoslistNext64(poslist
.p
, poslist
.n
, &iOff
, &iPos
) ){
6748 int iCol
= FTS5_POS2COLUMN(iPos
);
6749 int iTokOff
= FTS5_POS2OFFSET(iPos
);
6750 cksum2
^= sqlite3Fts5IndexEntryCksum(iRowid
, iCol
, iTokOff
, -1, z
, n
);
6754 fts5TestTerm(p
, &term
, 0, 0, cksum2
, &cksum3
);
6756 fts5MultiIterFree(pIter
);
6757 if( p
->rc
==SQLITE_OK
&& bUseCksum
&& cksum
!=cksum2
) p
->rc
= FTS5_CORRUPT
;
6759 fts5StructureRelease(pStruct
);
6761 fts5BufferFree(&term
);
6763 fts5BufferFree(&poslist
);
6764 return fts5IndexReturn(p
);
6767 /*************************************************************************
6768 **************************************************************************
6769 ** Below this point is the implementation of the fts5_decode() scalar
6775 ** Decode a segment-data rowid from the %_data table. This function is
6776 ** the opposite of macro FTS5_SEGMENT_ROWID().
6778 static void fts5DecodeRowid(
6779 i64 iRowid
, /* Rowid from %_data table */
6780 int *piSegid
, /* OUT: Segment id */
6781 int *pbDlidx
, /* OUT: Dlidx flag */
6782 int *piHeight
, /* OUT: Height */
6783 int *piPgno
/* OUT: Page number */
6785 *piPgno
= (int)(iRowid
& (((i64
)1 << FTS5_DATA_PAGE_B
) - 1));
6786 iRowid
>>= FTS5_DATA_PAGE_B
;
6788 *piHeight
= (int)(iRowid
& (((i64
)1 << FTS5_DATA_HEIGHT_B
) - 1));
6789 iRowid
>>= FTS5_DATA_HEIGHT_B
;
6791 *pbDlidx
= (int)(iRowid
& 0x0001);
6792 iRowid
>>= FTS5_DATA_DLI_B
;
6794 *piSegid
= (int)(iRowid
& (((i64
)1 << FTS5_DATA_ID_B
) - 1));
6796 #endif /* SQLITE_TEST */
6799 static void fts5DebugRowid(int *pRc
, Fts5Buffer
*pBuf
, i64 iKey
){
6800 int iSegid
, iHeight
, iPgno
, bDlidx
; /* Rowid compenents */
6801 fts5DecodeRowid(iKey
, &iSegid
, &bDlidx
, &iHeight
, &iPgno
);
6804 if( iKey
==FTS5_AVERAGES_ROWID
){
6805 sqlite3Fts5BufferAppendPrintf(pRc
, pBuf
, "{averages} ");
6807 sqlite3Fts5BufferAppendPrintf(pRc
, pBuf
, "{structure}");
6811 sqlite3Fts5BufferAppendPrintf(pRc
, pBuf
, "{%ssegid=%d h=%d pgno=%d}",
6812 bDlidx
? "dlidx " : "", iSegid
, iHeight
, iPgno
6816 #endif /* SQLITE_TEST */
6819 static void fts5DebugStructure(
6820 int *pRc
, /* IN/OUT: error code */
6824 int iLvl
, iSeg
; /* Iterate through levels, segments */
6826 for(iLvl
=0; iLvl
<p
->nLevel
; iLvl
++){
6827 Fts5StructureLevel
*pLvl
= &p
->aLevel
[iLvl
];
6828 sqlite3Fts5BufferAppendPrintf(pRc
, pBuf
,
6829 " {lvl=%d nMerge=%d nSeg=%d", iLvl
, pLvl
->nMerge
, pLvl
->nSeg
6831 for(iSeg
=0; iSeg
<pLvl
->nSeg
; iSeg
++){
6832 Fts5StructureSegment
*pSeg
= &pLvl
->aSeg
[iSeg
];
6833 sqlite3Fts5BufferAppendPrintf(pRc
, pBuf
, " {id=%d leaves=%d..%d}",
6834 pSeg
->iSegid
, pSeg
->pgnoFirst
, pSeg
->pgnoLast
6837 sqlite3Fts5BufferAppendPrintf(pRc
, pBuf
, "}");
6840 #endif /* SQLITE_TEST */
6844 ** This is part of the fts5_decode() debugging aid.
6846 ** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This
6847 ** function appends a human-readable representation of the same object
6848 ** to the buffer passed as the second argument.
6850 static void fts5DecodeStructure(
6851 int *pRc
, /* IN/OUT: error code */
6853 const u8
*pBlob
, int nBlob
6855 int rc
; /* Return code */
6856 Fts5Structure
*p
= 0; /* Decoded structure object */
6858 rc
= fts5StructureDecode(pBlob
, nBlob
, 0, &p
);
6859 if( rc
!=SQLITE_OK
){
6864 fts5DebugStructure(pRc
, pBuf
, p
);
6865 fts5StructureRelease(p
);
6867 #endif /* SQLITE_TEST */
6871 ** This is part of the fts5_decode() debugging aid.
6873 ** Arguments pBlob/nBlob contain an "averages" record. This function
6874 ** appends a human-readable representation of record to the buffer passed
6875 ** as the second argument.
6877 static void fts5DecodeAverages(
6878 int *pRc
, /* IN/OUT: error code */
6880 const u8
*pBlob
, int nBlob
6883 const char *zSpace
= "";
6887 i
+= sqlite3Fts5GetVarint(&pBlob
[i
], &iVal
);
6888 sqlite3Fts5BufferAppendPrintf(pRc
, pBuf
, "%s%d", zSpace
, (int)iVal
);
6892 #endif /* SQLITE_TEST */
6896 ** Buffer (a/n) is assumed to contain a list of serialized varints. Read
6897 ** each varint and append its string representation to buffer pBuf. Return
6898 ** after either the input buffer is exhausted or a 0 value is read.
6900 ** The return value is the number of bytes read from the input buffer.
6902 static int fts5DecodePoslist(int *pRc
, Fts5Buffer
*pBuf
, const u8
*a
, int n
){
6906 iOff
+= fts5GetVarint32(&a
[iOff
], iVal
);
6907 sqlite3Fts5BufferAppendPrintf(pRc
, pBuf
, " %d", iVal
);
6911 #endif /* SQLITE_TEST */
6915 ** The start of buffer (a/n) contains the start of a doclist. The doclist
6916 ** may or may not finish within the buffer. This function appends a text
6917 ** representation of the part of the doclist that is present to buffer
6920 ** The return value is the number of bytes read from the input buffer.
6922 static int fts5DecodeDoclist(int *pRc
, Fts5Buffer
*pBuf
, const u8
*a
, int n
){
6927 iOff
= sqlite3Fts5GetVarint(a
, (u64
*)&iDocid
);
6928 sqlite3Fts5BufferAppendPrintf(pRc
, pBuf
, " id=%lld", iDocid
);
6933 iOff
+= fts5GetPoslistSize(&a
[iOff
], &nPos
, &bDel
);
6934 sqlite3Fts5BufferAppendPrintf(pRc
, pBuf
, " nPos=%d%s", nPos
, bDel
?"*":"");
6935 iOff
+= fts5DecodePoslist(pRc
, pBuf
, &a
[iOff
], MIN(n
-iOff
, nPos
));
6938 iOff
+= sqlite3Fts5GetVarint(&a
[iOff
], (u64
*)&iDelta
);
6940 sqlite3Fts5BufferAppendPrintf(pRc
, pBuf
, " id=%lld", iDocid
);
6946 #endif /* SQLITE_TEST */
6950 ** This function is part of the fts5_decode() debugging function. It is
6951 ** only ever used with detail=none tables.
6953 ** Buffer (pData/nData) contains a doclist in the format used by detail=none
6954 ** tables. This function appends a human-readable version of that list to
6957 ** If *pRc is other than SQLITE_OK when this function is called, it is a
6958 ** no-op. If an OOM or other error occurs within this function, *pRc is
6959 ** set to an SQLite error code before returning. The final state of buffer
6960 ** pBuf is undefined in this case.
6962 static void fts5DecodeRowidList(
6963 int *pRc
, /* IN/OUT: Error code */
6964 Fts5Buffer
*pBuf
, /* Buffer to append text to */
6965 const u8
*pData
, int nData
/* Data to decode list-of-rowids from */
6971 const char *zApp
= "";
6973 i
+= sqlite3Fts5GetVarint(&pData
[i
], &iVal
);
6976 if( i
<nData
&& pData
[i
]==0x00 ){
6978 if( i
<nData
&& pData
[i
]==0x00 ){
6986 sqlite3Fts5BufferAppendPrintf(pRc
, pBuf
, " %lld%s", iRowid
, zApp
);
6989 #endif /* SQLITE_TEST */
6993 ** The implementation of user-defined scalar function fts5_decode().
6995 static void fts5DecodeFunction(
6996 sqlite3_context
*pCtx
, /* Function call context */
6997 int nArg
, /* Number of args (always 2) */
6998 sqlite3_value
**apVal
/* Function arguments */
7000 i64 iRowid
; /* Rowid for record being decoded */
7001 int iSegid
,iHeight
,iPgno
,bDlidx
;/* Rowid components */
7002 const u8
*aBlob
; int n
; /* Record to decode */
7004 Fts5Buffer s
; /* Build up text to return here */
7005 int rc
= SQLITE_OK
; /* Return code */
7006 sqlite3_int64 nSpace
= 0;
7007 int eDetailNone
= (sqlite3_user_data(pCtx
)!=0);
7011 memset(&s
, 0, sizeof(Fts5Buffer
));
7012 iRowid
= sqlite3_value_int64(apVal
[0]);
7014 /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[]
7015 ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
7016 ** buffer overreads even if the record is corrupt. */
7017 n
= sqlite3_value_bytes(apVal
[1]);
7018 aBlob
= sqlite3_value_blob(apVal
[1]);
7019 nSpace
= n
+ FTS5_DATA_ZERO_PADDING
;
7020 a
= (u8
*)sqlite3Fts5MallocZero(&rc
, nSpace
);
7021 if( a
==0 ) goto decode_out
;
7022 if( n
>0 ) memcpy(a
, aBlob
, n
);
7024 fts5DecodeRowid(iRowid
, &iSegid
, &bDlidx
, &iHeight
, &iPgno
);
7026 fts5DebugRowid(&rc
, &s
, iRowid
);
7034 memset(&lvl
, 0, sizeof(Fts5DlidxLvl
));
7036 lvl
.iLeafPgno
= iPgno
;
7038 for(fts5DlidxLvlNext(&lvl
); lvl
.bEof
==0; fts5DlidxLvlNext(&lvl
)){
7039 sqlite3Fts5BufferAppendPrintf(&rc
, &s
,
7040 " %d(%lld)", lvl
.iLeafPgno
, lvl
.iRowid
7043 }else if( iSegid
==0 ){
7044 if( iRowid
==FTS5_AVERAGES_ROWID
){
7045 fts5DecodeAverages(&rc
, &s
, a
, n
);
7047 fts5DecodeStructure(&rc
, &s
, a
, n
);
7049 }else if( eDetailNone
){
7050 Fts5Buffer term
; /* Current term read from page */
7052 int iPgidxOff
= szLeaf
= fts5GetU16(&a
[2]);
7057 memset(&term
, 0, sizeof(Fts5Buffer
));
7059 /* Decode any entries that occur before the first term. */
7061 iPgidxOff
+= fts5GetVarint32(&a
[iPgidxOff
], iTermOff
);
7065 fts5DecodeRowidList(&rc
, &s
, &a
[4], iTermOff
-4);
7068 while( iOff
<szLeaf
){
7071 /* Read the term data for the next term*/
7072 iOff
+= fts5GetVarint32(&a
[iOff
], nAppend
);
7074 fts5BufferAppendBlob(&rc
, &term
, nAppend
, &a
[iOff
]);
7075 sqlite3Fts5BufferAppendPrintf(
7076 &rc
, &s
, " term=%.*s", term
.n
, (const char*)term
.p
7080 /* Figure out where the doclist for this term ends */
7083 iPgidxOff
+= fts5GetVarint32(&a
[iPgidxOff
], nIncr
);
7089 fts5DecodeRowidList(&rc
, &s
, &a
[iOff
], iTermOff
-iOff
);
7092 iOff
+= fts5GetVarint32(&a
[iOff
], nKeep
);
7096 fts5BufferFree(&term
);
7098 Fts5Buffer term
; /* Current term read from page */
7099 int szLeaf
; /* Offset of pgidx in a[] */
7101 int iPgidxPrev
= 0; /* Previous value read from pgidx */
7107 memset(&term
, 0, sizeof(Fts5Buffer
));
7110 sqlite3Fts5BufferSet(&rc
, &s
, 7, (const u8
*)"corrupt");
7113 iRowidOff
= fts5GetU16(&a
[0]);
7114 iPgidxOff
= szLeaf
= fts5GetU16(&a
[2]);
7116 fts5GetVarint32(&a
[iPgidxOff
], iTermOff
);
7117 }else if( iPgidxOff
>n
){
7123 /* Decode the position list tail at the start of the page */
7126 }else if( iTermOff
!=0 ){
7135 fts5DecodePoslist(&rc
, &s
, &a
[4], iOff
-4);
7137 /* Decode any more doclist data that appears on the page before the
7139 nDoclist
= (iTermOff
? iTermOff
: szLeaf
) - iOff
;
7140 if( nDoclist
+iOff
>n
){
7144 fts5DecodeDoclist(&rc
, &s
, &a
[iOff
], nDoclist
);
7146 while( iPgidxOff
<n
&& rc
==SQLITE_OK
){
7147 int bFirst
= (iPgidxOff
==szLeaf
); /* True for first term on page */
7148 int nByte
; /* Bytes of data */
7151 iPgidxOff
+= fts5GetVarint32(&a
[iPgidxOff
], nByte
);
7152 iPgidxPrev
+= nByte
;
7156 fts5GetVarint32(&a
[iPgidxOff
], nByte
);
7157 iEnd
= iPgidxPrev
+ nByte
;
7167 iOff
+= fts5GetVarint32(&a
[iOff
], nByte
);
7174 iOff
+= fts5GetVarint32(&a
[iOff
], nByte
);
7179 fts5BufferAppendBlob(&rc
, &term
, nByte
, &a
[iOff
]);
7182 sqlite3Fts5BufferAppendPrintf(
7183 &rc
, &s
, " term=%.*s", term
.n
, (const char*)term
.p
7185 iOff
+= fts5DecodeDoclist(&rc
, &s
, &a
[iOff
], iEnd
-iOff
);
7188 fts5BufferFree(&term
);
7193 if( rc
==SQLITE_OK
){
7194 sqlite3_result_text(pCtx
, (const char*)s
.p
, s
.n
, SQLITE_TRANSIENT
);
7196 sqlite3_result_error_code(pCtx
, rc
);
7200 #endif /* SQLITE_TEST */
7204 ** The implementation of user-defined scalar function fts5_rowid().
7206 static void fts5RowidFunction(
7207 sqlite3_context
*pCtx
, /* Function call context */
7208 int nArg
, /* Number of args (always 2) */
7209 sqlite3_value
**apVal
/* Function arguments */
7213 sqlite3_result_error(pCtx
, "should be: fts5_rowid(subject, ....)", -1);
7215 zArg
= (const char*)sqlite3_value_text(apVal
[0]);
7216 if( 0==sqlite3_stricmp(zArg
, "segment") ){
7220 sqlite3_result_error(pCtx
,
7221 "should be: fts5_rowid('segment', segid, pgno))", -1
7224 segid
= sqlite3_value_int(apVal
[1]);
7225 pgno
= sqlite3_value_int(apVal
[2]);
7226 iRowid
= FTS5_SEGMENT_ROWID(segid
, pgno
);
7227 sqlite3_result_int64(pCtx
, iRowid
);
7230 sqlite3_result_error(pCtx
,
7231 "first arg to fts5_rowid() must be 'segment'" , -1
7236 #endif /* SQLITE_TEST */
7239 ** This is called as part of registering the FTS5 module with database
7240 ** connection db. It registers several user-defined scalar functions useful
7243 ** If successful, SQLITE_OK is returned. If an error occurs, some other
7244 ** SQLite error code is returned instead.
7246 int sqlite3Fts5IndexInit(sqlite3
*db
){
7248 int rc
= sqlite3_create_function(
7249 db
, "fts5_decode", 2, SQLITE_UTF8
, 0, fts5DecodeFunction
, 0, 0
7252 if( rc
==SQLITE_OK
){
7253 rc
= sqlite3_create_function(
7254 db
, "fts5_decode_none", 2,
7255 SQLITE_UTF8
, (void*)db
, fts5DecodeFunction
, 0, 0
7259 if( rc
==SQLITE_OK
){
7260 rc
= sqlite3_create_function(
7261 db
, "fts5_rowid", -1, SQLITE_UTF8
, 0, fts5RowidFunction
, 0, 0
7272 int sqlite3Fts5IndexReset(Fts5Index
*p
){
7273 assert( p
->pStruct
==0 || p
->iStructVersion
!=0 );
7274 if( fts5IndexDataVersion(p
)!=p
->iStructVersion
){
7275 fts5StructureInvalidate(p
);
7277 return fts5IndexReturn(p
);