Fixes default log output to console for macOS
[sqlcipher.git] / ext / fts5 / fts5_index.c
blob333fefa2d3fa614e0fb85eee2f720bd96ec96904
1 /*
2 ** 2014 May 31
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
13 ** Low level access to the FTS index stored in the database file. The
14 ** routines in this file file implement all read and write access to the
15 ** %_data table. Other parts of the system access this functionality via
16 ** the interface defined in fts5Int.h.
20 #include "fts5Int.h"
23 ** Overview:
25 ** The %_data table contains all the FTS indexes for an FTS5 virtual table.
26 ** As well as the main term index, there may be up to 31 prefix indexes.
27 ** The format is similar to FTS3/4, except that:
29 ** * all segment b-tree leaf data is stored in fixed size page records
30 ** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is
31 ** taken to ensure it is possible to iterate in either direction through
32 ** the entries in a doclist, or to seek to a specific entry within a
33 ** doclist, without loading it into memory.
35 ** * large doclists that span many pages have associated "doclist index"
36 ** records that contain a copy of the first rowid on each page spanned by
37 ** the doclist. This is used to speed up seek operations, and merges of
38 ** large doclists with very small doclists.
40 ** * extra fields in the "structure record" record the state of ongoing
41 ** incremental merge operations.
46 #define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */
47 #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */
49 #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */
51 #define FTS5_MAIN_PREFIX '0'
53 #if FTS5_MAX_PREFIX_INDEXES > 31
54 # error "FTS5_MAX_PREFIX_INDEXES is too large"
55 #endif
57 #define FTS5_MAX_LEVEL 64
60 ** There are two versions of the format used for the structure record:
62 ** 1. the legacy format, that may be read by all fts5 versions, and
64 ** 2. the V2 format, which is used by contentless_delete=1 databases.
66 ** Both begin with a 4-byte "configuration cookie" value. Then, a legacy
67 ** format structure record contains a varint - the number of levels in
68 ** the structure. Whereas a V2 structure record contains the constant
69 ** 4 bytes [0xff 0x00 0x00 0x01]. This is unambiguous as the value of a
70 ** varint has to be at least 16256 to begin with "0xFF". And the default
71 ** maximum number of levels is 64.
73 ** See below for more on structure record formats.
75 #define FTS5_STRUCTURE_V2 "\xFF\x00\x00\x01"
78 ** Details:
80 ** The %_data table managed by this module,
82 ** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB);
84 ** , contains the following 6 types of records. See the comments surrounding
85 ** the FTS5_*_ROWID macros below for a description of how %_data rowids are
86 ** assigned to each fo them.
88 ** 1. Structure Records:
90 ** The set of segments that make up an index - the index structure - are
91 ** recorded in a single record within the %_data table. The record consists
92 ** of a single 32-bit configuration cookie value followed by a list of
93 ** SQLite varints.
95 ** If the structure record is a V2 record, the configuration cookie is
96 ** followed by the following 4 bytes: [0xFF 0x00 0x00 0x01].
98 ** Next, the record continues with three varints:
100 ** + number of levels,
101 ** + total number of segments on all levels,
102 ** + value of write counter.
104 ** Then, for each level from 0 to nMax:
106 ** + number of input segments in ongoing merge.
107 ** + total number of segments in level.
108 ** + for each segment from oldest to newest:
109 ** + segment id (always > 0)
110 ** + first leaf page number (often 1, always greater than 0)
111 ** + final leaf page number
113 ** Then, for V2 structures only:
115 ** + lower origin counter value,
116 ** + upper origin counter value,
117 ** + the number of tombstone hash pages.
119 ** 2. The Averages Record:
121 ** A single record within the %_data table. The data is a list of varints.
122 ** The first value is the number of rows in the index. Then, for each column
123 ** from left to right, the total number of tokens in the column for all
124 ** rows of the table.
126 ** 3. Segment leaves:
128 ** TERM/DOCLIST FORMAT:
130 ** Most of each segment leaf is taken up by term/doclist data. The
131 ** general format of term/doclist, starting with the first term
132 ** on the leaf page, is:
134 ** varint : size of first term
135 ** blob: first term data
136 ** doclist: first doclist
137 ** zero-or-more {
138 ** varint: number of bytes in common with previous term
139 ** varint: number of bytes of new term data (nNew)
140 ** blob: nNew bytes of new term data
141 ** doclist: next doclist
142 ** }
144 ** doclist format:
146 ** varint: first rowid
147 ** poslist: first poslist
148 ** zero-or-more {
149 ** varint: rowid delta (always > 0)
150 ** poslist: next poslist
151 ** }
153 ** poslist format:
155 ** varint: size of poslist in bytes multiplied by 2, not including
156 ** this field. Plus 1 if this entry carries the "delete" flag.
157 ** collist: collist for column 0
158 ** zero-or-more {
159 ** 0x01 byte
160 ** varint: column number (I)
161 ** collist: collist for column I
162 ** }
164 ** collist format:
166 ** varint: first offset + 2
167 ** zero-or-more {
168 ** varint: offset delta + 2
169 ** }
171 ** PAGE FORMAT
173 ** Each leaf page begins with a 4-byte header containing 2 16-bit
174 ** unsigned integer fields in big-endian format. They are:
176 ** * The byte offset of the first rowid on the page, if it exists
177 ** and occurs before the first term (otherwise 0).
179 ** * The byte offset of the start of the page footer. If the page
180 ** footer is 0 bytes in size, then this field is the same as the
181 ** size of the leaf page in bytes.
183 ** The page footer consists of a single varint for each term located
184 ** on the page. Each varint is the byte offset of the current term
185 ** within the page, delta-compressed against the previous value. In
186 ** other words, the first varint in the footer is the byte offset of
187 ** the first term, the second is the byte offset of the second less that
188 ** of the first, and so on.
190 ** The term/doclist format described above is accurate if the entire
191 ** term/doclist data fits on a single leaf page. If this is not the case,
192 ** the format is changed in two ways:
194 ** + if the first rowid on a page occurs before the first term, it
195 ** is stored as a literal value:
197 ** varint: first rowid
199 ** + the first term on each page is stored in the same way as the
200 ** very first term of the segment:
202 ** varint : size of first term
203 ** blob: first term data
205 ** 5. Segment doclist indexes:
207 ** Doclist indexes are themselves b-trees, however they usually consist of
208 ** a single leaf record only. The format of each doclist index leaf page
209 ** is:
211 ** * Flags byte. Bits are:
212 ** 0x01: Clear if leaf is also the root page, otherwise set.
214 ** * Page number of fts index leaf page. As a varint.
216 ** * First rowid on page indicated by previous field. As a varint.
218 ** * A list of varints, one for each subsequent termless page. A
219 ** positive delta if the termless page contains at least one rowid,
220 ** or an 0x00 byte otherwise.
222 ** Internal doclist index nodes are:
224 ** * Flags byte. Bits are:
225 ** 0x01: Clear for root page, otherwise set.
227 ** * Page number of first child page. As a varint.
229 ** * Copy of first rowid on page indicated by previous field. As a varint.
231 ** * A list of delta-encoded varints - the first rowid on each subsequent
232 ** child page.
234 ** 6. Tombstone Hash Page
236 ** These records are only ever present in contentless_delete=1 tables.
237 ** There are zero or more of these associated with each segment. They
238 ** are used to store the tombstone rowids for rows contained in the
239 ** associated segments.
241 ** The set of nHashPg tombstone hash pages associated with a single
242 ** segment together form a single hash table containing tombstone rowids.
243 ** To find the page of the hash on which a key might be stored:
245 ** iPg = (rowid % nHashPg)
247 ** Then, within page iPg, which has nSlot slots:
249 ** iSlot = (rowid / nHashPg) % nSlot
251 ** Each tombstone hash page begins with an 8 byte header:
253 ** 1-byte: Key-size (the size in bytes of each slot). Either 4 or 8.
254 ** 1-byte: rowid-0-tombstone flag. This flag is only valid on the
255 ** first tombstone hash page for each segment (iPg=0). If set,
256 ** the hash table contains rowid 0. If clear, it does not.
257 ** Rowid 0 is handled specially.
258 ** 2-bytes: unused.
259 ** 4-bytes: Big-endian integer containing number of entries on page.
261 ** Following this are nSlot 4 or 8 byte slots (depending on the key-size
262 ** in the first byte of the page header). The number of slots may be
263 ** determined based on the size of the page record and the key-size:
265 ** nSlot = (nByte - 8) / key-size
269 ** Rowids for the averages and structure records in the %_data table.
271 #define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */
272 #define FTS5_STRUCTURE_ROWID 10 /* The structure record */
275 ** Macros determining the rowids used by segment leaves and dlidx leaves
276 ** and nodes. All nodes and leaves are stored in the %_data table with large
277 ** positive rowids.
279 ** Each segment has a unique non-zero 16-bit id.
281 ** The rowid for each segment leaf is found by passing the segment id and
282 ** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered
283 ** sequentially starting from 1.
285 #define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */
286 #define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */
287 #define FTS5_DATA_HEIGHT_B 5 /* Max dlidx tree height of 32 */
288 #define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */
290 #define fts5_dri(segid, dlidx, height, pgno) ( \
291 ((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \
292 ((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \
293 ((i64)(height) << (FTS5_DATA_PAGE_B)) + \
294 ((i64)(pgno)) \
297 #define FTS5_SEGMENT_ROWID(segid, pgno) fts5_dri(segid, 0, 0, pgno)
298 #define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)
299 #define FTS5_TOMBSTONE_ROWID(segid,ipg) fts5_dri(segid+(1<<16), 0, 0, ipg)
301 #ifdef SQLITE_DEBUG
302 int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
303 #endif
307 ** Each time a blob is read from the %_data table, it is padded with this
308 ** many zero bytes. This makes it easier to decode the various record formats
309 ** without overreading if the records are corrupt.
311 #define FTS5_DATA_ZERO_PADDING 8
312 #define FTS5_DATA_PADDING 20
314 typedef struct Fts5Data Fts5Data;
315 typedef struct Fts5DlidxIter Fts5DlidxIter;
316 typedef struct Fts5DlidxLvl Fts5DlidxLvl;
317 typedef struct Fts5DlidxWriter Fts5DlidxWriter;
318 typedef struct Fts5Iter Fts5Iter;
319 typedef struct Fts5PageWriter Fts5PageWriter;
320 typedef struct Fts5SegIter Fts5SegIter;
321 typedef struct Fts5DoclistIter Fts5DoclistIter;
322 typedef struct Fts5SegWriter Fts5SegWriter;
323 typedef struct Fts5Structure Fts5Structure;
324 typedef struct Fts5StructureLevel Fts5StructureLevel;
325 typedef struct Fts5StructureSegment Fts5StructureSegment;
326 typedef struct Fts5TokenDataIter Fts5TokenDataIter;
327 typedef struct Fts5TokenDataMap Fts5TokenDataMap;
328 typedef struct Fts5TombstoneArray Fts5TombstoneArray;
330 struct Fts5Data {
331 u8 *p; /* Pointer to buffer containing record */
332 int nn; /* Size of record in bytes */
333 int szLeaf; /* Size of leaf without page-index */
337 ** One object per %_data table.
339 ** nContentlessDelete:
340 ** The number of contentless delete operations since the most recent
341 ** call to fts5IndexFlush() or fts5IndexDiscardData(). This is tracked
342 ** so that extra auto-merge work can be done by fts5IndexFlush() to
343 ** account for the delete operations.
345 struct Fts5Index {
346 Fts5Config *pConfig; /* Virtual table configuration */
347 char *zDataTbl; /* Name of %_data table */
348 int nWorkUnit; /* Leaf pages in a "unit" of work */
351 ** Variables related to the accumulation of tokens and doclists within the
352 ** in-memory hash tables before they are flushed to disk.
354 Fts5Hash *pHash; /* Hash table for in-memory data */
355 int nPendingData; /* Current bytes of pending data */
356 i64 iWriteRowid; /* Rowid for current doc being written */
357 int bDelete; /* Current write is a delete */
358 int nContentlessDelete; /* Number of contentless delete ops */
359 int nPendingRow; /* Number of INSERT in hash table */
361 /* Error state. */
362 int rc; /* Current error code */
363 int flushRc;
365 /* State used by the fts5DataXXX() functions. */
366 sqlite3_blob *pReader; /* RO incr-blob open on %_data table */
367 sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */
368 sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */
369 sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */
370 sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=?" */
371 sqlite3_stmt *pIdxSelect;
372 sqlite3_stmt *pIdxNextSelect;
373 int nRead; /* Total number of blocks read */
375 sqlite3_stmt *pDeleteFromIdx;
377 sqlite3_stmt *pDataVersion;
378 i64 iStructVersion; /* data_version when pStruct read */
379 Fts5Structure *pStruct; /* Current db structure (or NULL) */
382 struct Fts5DoclistIter {
383 u8 *aEof; /* Pointer to 1 byte past end of doclist */
385 /* Output variables. aPoslist==0 at EOF */
386 i64 iRowid;
387 u8 *aPoslist;
388 int nPoslist;
389 int nSize;
393 ** The contents of the "structure" record for each index are represented
394 ** using an Fts5Structure record in memory. Which uses instances of the
395 ** other Fts5StructureXXX types as components.
397 ** nOriginCntr:
398 ** This value is set to non-zero for structure records created for
399 ** contentlessdelete=1 tables only. In that case it represents the
400 ** origin value to apply to the next top-level segment created.
402 struct Fts5StructureSegment {
403 int iSegid; /* Segment id */
404 int pgnoFirst; /* First leaf page number in segment */
405 int pgnoLast; /* Last leaf page number in segment */
407 /* contentlessdelete=1 tables only: */
408 u64 iOrigin1;
409 u64 iOrigin2;
410 int nPgTombstone; /* Number of tombstone hash table pages */
411 u64 nEntryTombstone; /* Number of tombstone entries that "count" */
412 u64 nEntry; /* Number of rows in this segment */
414 struct Fts5StructureLevel {
415 int nMerge; /* Number of segments in incr-merge */
416 int nSeg; /* Total number of segments on level */
417 Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */
419 struct Fts5Structure {
420 int nRef; /* Object reference count */
421 u64 nWriteCounter; /* Total leaves written to level 0 */
422 u64 nOriginCntr; /* Origin value for next top-level segment */
423 int nSegment; /* Total segments in this structure */
424 int nLevel; /* Number of levels in this index */
425 Fts5StructureLevel aLevel[1]; /* Array of nLevel level objects */
429 ** An object of type Fts5SegWriter is used to write to segments.
431 struct Fts5PageWriter {
432 int pgno; /* Page number for this page */
433 int iPrevPgidx; /* Previous value written into pgidx */
434 Fts5Buffer buf; /* Buffer containing leaf data */
435 Fts5Buffer pgidx; /* Buffer containing page-index */
436 Fts5Buffer term; /* Buffer containing previous term on page */
438 struct Fts5DlidxWriter {
439 int pgno; /* Page number for this page */
440 int bPrevValid; /* True if iPrev is valid */
441 i64 iPrev; /* Previous rowid value written to page */
442 Fts5Buffer buf; /* Buffer containing page data */
444 struct Fts5SegWriter {
445 int iSegid; /* Segid to write to */
446 Fts5PageWriter writer; /* PageWriter object */
447 i64 iPrevRowid; /* Previous rowid written to current leaf */
448 u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */
449 u8 bFirstRowidInPage; /* True if next rowid is first in page */
450 /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
451 u8 bFirstTermInPage; /* True if next term will be first in leaf */
452 int nLeafWritten; /* Number of leaf pages written */
453 int nEmpty; /* Number of contiguous term-less nodes */
455 int nDlidx; /* Allocated size of aDlidx[] array */
456 Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */
458 /* Values to insert into the %_idx table */
459 Fts5Buffer btterm; /* Next term to insert into %_idx table */
460 int iBtPage; /* Page number corresponding to btterm */
463 typedef struct Fts5CResult Fts5CResult;
464 struct Fts5CResult {
465 u16 iFirst; /* aSeg[] index of firstest iterator */
466 u8 bTermEq; /* True if the terms are equal */
470 ** Object for iterating through a single segment, visiting each term/rowid
471 ** pair in the segment.
473 ** pSeg:
474 ** The segment to iterate through.
476 ** iLeafPgno:
477 ** Current leaf page number within segment.
479 ** iLeafOffset:
480 ** Byte offset within the current leaf that is the first byte of the
481 ** position list data (one byte passed the position-list size field).
483 ** pLeaf:
484 ** Buffer containing current leaf page data. Set to NULL at EOF.
486 ** iTermLeafPgno, iTermLeafOffset:
487 ** Leaf page number containing the last term read from the segment. And
488 ** the offset immediately following the term data.
490 ** flags:
491 ** Mask of FTS5_SEGITER_XXX values. Interpreted as follows:
493 ** FTS5_SEGITER_ONETERM:
494 ** If set, set the iterator to point to EOF after the current doclist
495 ** has been exhausted. Do not proceed to the next term in the segment.
497 ** FTS5_SEGITER_REVERSE:
498 ** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If
499 ** it is set, iterate through rowid in descending order instead of the
500 ** default ascending order.
502 ** iRowidOffset/nRowidOffset/aRowidOffset:
503 ** These are used if the FTS5_SEGITER_REVERSE flag is set.
505 ** For each rowid on the page corresponding to the current term, the
506 ** corresponding aRowidOffset[] entry is set to the byte offset of the
507 ** start of the "position-list-size" field within the page.
509 ** iTermIdx:
510 ** Index of current term on iTermLeafPgno.
512 ** apTombstone/nTombstone:
513 ** These are used for contentless_delete=1 tables only. When the cursor
514 ** is first allocated, the apTombstone[] array is allocated so that it
515 ** is large enough for all tombstones hash pages associated with the
516 ** segment. The pages themselves are loaded lazily from the database as
517 ** they are required.
519 struct Fts5SegIter {
520 Fts5StructureSegment *pSeg; /* Segment to iterate through */
521 int flags; /* Mask of configuration flags */
522 int iLeafPgno; /* Current leaf page number */
523 Fts5Data *pLeaf; /* Current leaf data */
524 Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */
525 i64 iLeafOffset; /* Byte offset within current leaf */
526 Fts5TombstoneArray *pTombArray; /* Array of tombstone pages */
528 /* Next method */
529 void (*xNext)(Fts5Index*, Fts5SegIter*, int*);
531 /* The page and offset from which the current term was read. The offset
532 ** is the offset of the first rowid in the current doclist. */
533 int iTermLeafPgno;
534 int iTermLeafOffset;
536 int iPgidxOff; /* Next offset in pgidx */
537 int iEndofDoclist;
539 /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
540 int iRowidOffset; /* Current entry in aRowidOffset[] */
541 int nRowidOffset; /* Allocated size of aRowidOffset[] array */
542 int *aRowidOffset; /* Array of offset to rowid fields */
544 Fts5DlidxIter *pDlidx; /* If there is a doclist-index */
546 /* Variables populated based on current entry. */
547 Fts5Buffer term; /* Current term */
548 i64 iRowid; /* Current rowid */
549 int nPos; /* Number of bytes in current position list */
550 u8 bDel; /* True if the delete flag is set */
554 ** Array of tombstone pages. Reference counted.
556 struct Fts5TombstoneArray {
557 int nRef; /* Number of pointers to this object */
558 int nTombstone;
559 Fts5Data *apTombstone[1]; /* Array of tombstone pages */
563 ** Argument is a pointer to an Fts5Data structure that contains a
564 ** leaf page.
566 #define ASSERT_SZLEAF_OK(x) assert( \
567 (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \
570 #define FTS5_SEGITER_ONETERM 0x01
571 #define FTS5_SEGITER_REVERSE 0x02
574 ** Argument is a pointer to an Fts5Data structure that contains a leaf
575 ** page. This macro evaluates to true if the leaf contains no terms, or
576 ** false if it contains at least one term.
578 #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)
580 #define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))
582 #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))
585 ** Object for iterating through the merged results of one or more segments,
586 ** visiting each term/rowid pair in the merged data.
588 ** nSeg is always a power of two greater than or equal to the number of
589 ** segments that this object is merging data from. Both the aSeg[] and
590 ** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded
591 ** with zeroed objects - these are handled as if they were iterators opened
592 ** on empty segments.
594 ** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an
595 ** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the
596 ** comparison in this context is the index of the iterator that currently
597 ** points to the smaller term/rowid combination. Iterators at EOF are
598 ** considered to be greater than all other iterators.
600 ** aFirst[1] contains the index in aSeg[] of the iterator that points to
601 ** the smallest key overall. aFirst[0] is unused.
603 ** poslist:
604 ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
605 ** There is no way to tell if this is populated or not.
607 ** pColset:
608 ** If not NULL, points to an object containing a set of column indices.
609 ** Only matches that occur in one of these columns will be returned.
610 ** The Fts5Iter does not own the Fts5Colset object, and so it is not
611 ** freed when the iterator is closed - it is owned by the upper layer.
613 struct Fts5Iter {
614 Fts5IndexIter base; /* Base class containing output vars */
615 Fts5TokenDataIter *pTokenDataIter;
617 Fts5Index *pIndex; /* Index that owns this iterator */
618 Fts5Buffer poslist; /* Buffer containing current poslist */
619 Fts5Colset *pColset; /* Restrict matches to these columns */
621 /* Invoked to set output variables. */
622 void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*);
624 int nSeg; /* Size of aSeg[] array */
625 int bRev; /* True to iterate in reverse order */
626 u8 bSkipEmpty; /* True to skip deleted entries */
628 i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */
629 Fts5CResult *aFirst; /* Current merge state (see above) */
630 Fts5SegIter aSeg[1]; /* Array of segment iterators */
634 ** An instance of the following type is used to iterate through the contents
635 ** of a doclist-index record.
637 ** pData:
638 ** Record containing the doclist-index data.
640 ** bEof:
641 ** Set to true once iterator has reached EOF.
643 ** iOff:
644 ** Set to the current offset within record pData.
646 struct Fts5DlidxLvl {
647 Fts5Data *pData; /* Data for current page of this level */
648 int iOff; /* Current offset into pData */
649 int bEof; /* At EOF already */
650 int iFirstOff; /* Used by reverse iterators */
652 /* Output variables */
653 int iLeafPgno; /* Page number of current leaf page */
654 i64 iRowid; /* First rowid on leaf iLeafPgno */
656 struct Fts5DlidxIter {
657 int nLvl;
658 int iSegid;
659 Fts5DlidxLvl aLvl[1];
662 static void fts5PutU16(u8 *aOut, u16 iVal){
663 aOut[0] = (iVal>>8);
664 aOut[1] = (iVal&0xFF);
667 static u16 fts5GetU16(const u8 *aIn){
668 return ((u16)aIn[0] << 8) + aIn[1];
672 ** The only argument points to a buffer at least 8 bytes in size. This
673 ** function interprets the first 8 bytes of the buffer as a 64-bit big-endian
674 ** unsigned integer and returns the result.
676 static u64 fts5GetU64(u8 *a){
677 return ((u64)a[0] << 56)
678 + ((u64)a[1] << 48)
679 + ((u64)a[2] << 40)
680 + ((u64)a[3] << 32)
681 + ((u64)a[4] << 24)
682 + ((u64)a[5] << 16)
683 + ((u64)a[6] << 8)
684 + ((u64)a[7] << 0);
688 ** The only argument points to a buffer at least 4 bytes in size. This
689 ** function interprets the first 4 bytes of the buffer as a 32-bit big-endian
690 ** unsigned integer and returns the result.
692 static u32 fts5GetU32(const u8 *a){
693 return ((u32)a[0] << 24)
694 + ((u32)a[1] << 16)
695 + ((u32)a[2] << 8)
696 + ((u32)a[3] << 0);
700 ** Write iVal, formated as a 64-bit big-endian unsigned integer, to the
701 ** buffer indicated by the first argument.
703 static void fts5PutU64(u8 *a, u64 iVal){
704 a[0] = ((iVal >> 56) & 0xFF);
705 a[1] = ((iVal >> 48) & 0xFF);
706 a[2] = ((iVal >> 40) & 0xFF);
707 a[3] = ((iVal >> 32) & 0xFF);
708 a[4] = ((iVal >> 24) & 0xFF);
709 a[5] = ((iVal >> 16) & 0xFF);
710 a[6] = ((iVal >> 8) & 0xFF);
711 a[7] = ((iVal >> 0) & 0xFF);
715 ** Write iVal, formated as a 32-bit big-endian unsigned integer, to the
716 ** buffer indicated by the first argument.
718 static void fts5PutU32(u8 *a, u32 iVal){
719 a[0] = ((iVal >> 24) & 0xFF);
720 a[1] = ((iVal >> 16) & 0xFF);
721 a[2] = ((iVal >> 8) & 0xFF);
722 a[3] = ((iVal >> 0) & 0xFF);
726 ** Allocate and return a buffer at least nByte bytes in size.
728 ** If an OOM error is encountered, return NULL and set the error code in
729 ** the Fts5Index handle passed as the first argument.
731 static void *fts5IdxMalloc(Fts5Index *p, sqlite3_int64 nByte){
732 return sqlite3Fts5MallocZero(&p->rc, nByte);
736 ** Compare the contents of the pLeft buffer with the pRight/nRight blob.
738 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
739 ** +ve if pRight is smaller than pLeft. In other words:
741 ** res = *pLeft - *pRight
743 #ifdef SQLITE_DEBUG
744 static int fts5BufferCompareBlob(
745 Fts5Buffer *pLeft, /* Left hand side of comparison */
746 const u8 *pRight, int nRight /* Right hand side of comparison */
748 int nCmp = MIN(pLeft->n, nRight);
749 int res = memcmp(pLeft->p, pRight, nCmp);
750 return (res==0 ? (pLeft->n - nRight) : res);
752 #endif
755 ** Compare the contents of the two buffers using memcmp(). If one buffer
756 ** is a prefix of the other, it is considered the lesser.
758 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
759 ** +ve if pRight is smaller than pLeft. In other words:
761 ** res = *pLeft - *pRight
763 static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){
764 int nCmp, res;
765 nCmp = MIN(pLeft->n, pRight->n);
766 assert( nCmp<=0 || pLeft->p!=0 );
767 assert( nCmp<=0 || pRight->p!=0 );
768 res = fts5Memcmp(pLeft->p, pRight->p, nCmp);
769 return (res==0 ? (pLeft->n - pRight->n) : res);
772 static int fts5LeafFirstTermOff(Fts5Data *pLeaf){
773 int ret;
774 fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret);
775 return ret;
779 ** Close the read-only blob handle, if it is open.
781 void sqlite3Fts5IndexCloseReader(Fts5Index *p){
782 if( p->pReader ){
783 sqlite3_blob *pReader = p->pReader;
784 p->pReader = 0;
785 sqlite3_blob_close(pReader);
790 ** Retrieve a record from the %_data table.
792 ** If an error occurs, NULL is returned and an error left in the
793 ** Fts5Index object.
795 static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
796 Fts5Data *pRet = 0;
797 if( p->rc==SQLITE_OK ){
798 int rc = SQLITE_OK;
800 if( p->pReader ){
801 /* This call may return SQLITE_ABORT if there has been a savepoint
802 ** rollback since it was last used. In this case a new blob handle
803 ** is required. */
804 sqlite3_blob *pBlob = p->pReader;
805 p->pReader = 0;
806 rc = sqlite3_blob_reopen(pBlob, iRowid);
807 assert( p->pReader==0 );
808 p->pReader = pBlob;
809 if( rc!=SQLITE_OK ){
810 sqlite3Fts5IndexCloseReader(p);
812 if( rc==SQLITE_ABORT ) rc = SQLITE_OK;
815 /* If the blob handle is not open at this point, open it and seek
816 ** to the requested entry. */
817 if( p->pReader==0 && rc==SQLITE_OK ){
818 Fts5Config *pConfig = p->pConfig;
819 rc = sqlite3_blob_open(pConfig->db,
820 pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader
824 /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls
825 ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead.
826 ** All the reasons those functions might return SQLITE_ERROR - missing
827 ** table, missing row, non-blob/text in block column - indicate
828 ** backing store corruption. */
829 if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT;
831 if( rc==SQLITE_OK ){
832 u8 *aOut = 0; /* Read blob data into this buffer */
833 int nByte = sqlite3_blob_bytes(p->pReader);
834 sqlite3_int64 nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING;
835 pRet = (Fts5Data*)sqlite3_malloc64(nAlloc);
836 if( pRet ){
837 pRet->nn = nByte;
838 aOut = pRet->p = (u8*)&pRet[1];
839 }else{
840 rc = SQLITE_NOMEM;
843 if( rc==SQLITE_OK ){
844 rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0);
846 if( rc!=SQLITE_OK ){
847 sqlite3_free(pRet);
848 pRet = 0;
849 }else{
850 /* TODO1: Fix this */
851 pRet->p[nByte] = 0x00;
852 pRet->p[nByte+1] = 0x00;
853 pRet->szLeaf = fts5GetU16(&pRet->p[2]);
856 p->rc = rc;
857 p->nRead++;
860 assert( (pRet==0)==(p->rc!=SQLITE_OK) );
861 return pRet;
866 ** Release a reference to data record returned by an earlier call to
867 ** fts5DataRead().
869 static void fts5DataRelease(Fts5Data *pData){
870 sqlite3_free(pData);
873 static Fts5Data *fts5LeafRead(Fts5Index *p, i64 iRowid){
874 Fts5Data *pRet = fts5DataRead(p, iRowid);
875 if( pRet ){
876 if( pRet->nn<4 || pRet->szLeaf>pRet->nn ){
877 p->rc = FTS5_CORRUPT;
878 fts5DataRelease(pRet);
879 pRet = 0;
882 return pRet;
885 static int fts5IndexPrepareStmt(
886 Fts5Index *p,
887 sqlite3_stmt **ppStmt,
888 char *zSql
890 if( p->rc==SQLITE_OK ){
891 if( zSql ){
892 p->rc = sqlite3_prepare_v3(p->pConfig->db, zSql, -1,
893 SQLITE_PREPARE_PERSISTENT|SQLITE_PREPARE_NO_VTAB,
894 ppStmt, 0);
895 }else{
896 p->rc = SQLITE_NOMEM;
899 sqlite3_free(zSql);
900 return p->rc;
905 ** INSERT OR REPLACE a record into the %_data table.
907 static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){
908 if( p->rc!=SQLITE_OK ) return;
910 if( p->pWriter==0 ){
911 Fts5Config *pConfig = p->pConfig;
912 fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintf(
913 "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)",
914 pConfig->zDb, pConfig->zName
916 if( p->rc ) return;
919 sqlite3_bind_int64(p->pWriter, 1, iRowid);
920 sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC);
921 sqlite3_step(p->pWriter);
922 p->rc = sqlite3_reset(p->pWriter);
923 sqlite3_bind_null(p->pWriter, 2);
927 ** Execute the following SQL:
929 ** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast
931 static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){
932 if( p->rc!=SQLITE_OK ) return;
934 if( p->pDeleter==0 ){
935 Fts5Config *pConfig = p->pConfig;
936 char *zSql = sqlite3_mprintf(
937 "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?",
938 pConfig->zDb, pConfig->zName
940 if( fts5IndexPrepareStmt(p, &p->pDeleter, zSql) ) return;
943 sqlite3_bind_int64(p->pDeleter, 1, iFirst);
944 sqlite3_bind_int64(p->pDeleter, 2, iLast);
945 sqlite3_step(p->pDeleter);
946 p->rc = sqlite3_reset(p->pDeleter);
950 ** Remove all records associated with segment iSegid.
952 static void fts5DataRemoveSegment(Fts5Index *p, Fts5StructureSegment *pSeg){
953 int iSegid = pSeg->iSegid;
954 i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0);
955 i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1;
956 fts5DataDelete(p, iFirst, iLast);
958 if( pSeg->nPgTombstone ){
959 i64 iTomb1 = FTS5_TOMBSTONE_ROWID(iSegid, 0);
960 i64 iTomb2 = FTS5_TOMBSTONE_ROWID(iSegid, pSeg->nPgTombstone-1);
961 fts5DataDelete(p, iTomb1, iTomb2);
963 if( p->pIdxDeleter==0 ){
964 Fts5Config *pConfig = p->pConfig;
965 fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf(
966 "DELETE FROM '%q'.'%q_idx' WHERE segid=?",
967 pConfig->zDb, pConfig->zName
970 if( p->rc==SQLITE_OK ){
971 sqlite3_bind_int(p->pIdxDeleter, 1, iSegid);
972 sqlite3_step(p->pIdxDeleter);
973 p->rc = sqlite3_reset(p->pIdxDeleter);
978 ** Release a reference to an Fts5Structure object returned by an earlier
979 ** call to fts5StructureRead() or fts5StructureDecode().
981 static void fts5StructureRelease(Fts5Structure *pStruct){
982 if( pStruct && 0>=(--pStruct->nRef) ){
983 int i;
984 assert( pStruct->nRef==0 );
985 for(i=0; i<pStruct->nLevel; i++){
986 sqlite3_free(pStruct->aLevel[i].aSeg);
988 sqlite3_free(pStruct);
992 static void fts5StructureRef(Fts5Structure *pStruct){
993 pStruct->nRef++;
996 void *sqlite3Fts5StructureRef(Fts5Index *p){
997 fts5StructureRef(p->pStruct);
998 return (void*)p->pStruct;
1000 void sqlite3Fts5StructureRelease(void *p){
1001 if( p ){
1002 fts5StructureRelease((Fts5Structure*)p);
1005 int sqlite3Fts5StructureTest(Fts5Index *p, void *pStruct){
1006 if( p->pStruct!=(Fts5Structure*)pStruct ){
1007 return SQLITE_ABORT;
1009 return SQLITE_OK;
1013 ** Ensure that structure object (*pp) is writable.
1015 ** This function is a no-op if (*pRc) is not SQLITE_OK when it is called. If
1016 ** an error occurs, (*pRc) is set to an SQLite error code before returning.
1018 static void fts5StructureMakeWritable(int *pRc, Fts5Structure **pp){
1019 Fts5Structure *p = *pp;
1020 if( *pRc==SQLITE_OK && p->nRef>1 ){
1021 i64 nByte = sizeof(Fts5Structure)+(p->nLevel-1)*sizeof(Fts5StructureLevel);
1022 Fts5Structure *pNew;
1023 pNew = (Fts5Structure*)sqlite3Fts5MallocZero(pRc, nByte);
1024 if( pNew ){
1025 int i;
1026 memcpy(pNew, p, nByte);
1027 for(i=0; i<p->nLevel; i++) pNew->aLevel[i].aSeg = 0;
1028 for(i=0; i<p->nLevel; i++){
1029 Fts5StructureLevel *pLvl = &pNew->aLevel[i];
1030 nByte = sizeof(Fts5StructureSegment) * pNew->aLevel[i].nSeg;
1031 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(pRc, nByte);
1032 if( pLvl->aSeg==0 ){
1033 for(i=0; i<p->nLevel; i++){
1034 sqlite3_free(pNew->aLevel[i].aSeg);
1036 sqlite3_free(pNew);
1037 return;
1039 memcpy(pLvl->aSeg, p->aLevel[i].aSeg, nByte);
1041 p->nRef--;
1042 pNew->nRef = 1;
1044 *pp = pNew;
1049 ** Deserialize and return the structure record currently stored in serialized
1050 ** form within buffer pData/nData.
1052 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
1053 ** are over-allocated by one slot. This allows the structure contents
1054 ** to be more easily edited.
1056 ** If an error occurs, *ppOut is set to NULL and an SQLite error code
1057 ** returned. Otherwise, *ppOut is set to point to the new object and
1058 ** SQLITE_OK returned.
1060 static int fts5StructureDecode(
1061 const u8 *pData, /* Buffer containing serialized structure */
1062 int nData, /* Size of buffer pData in bytes */
1063 int *piCookie, /* Configuration cookie value */
1064 Fts5Structure **ppOut /* OUT: Deserialized object */
1066 int rc = SQLITE_OK;
1067 int i = 0;
1068 int iLvl;
1069 int nLevel = 0;
1070 int nSegment = 0;
1071 sqlite3_int64 nByte; /* Bytes of space to allocate at pRet */
1072 Fts5Structure *pRet = 0; /* Structure object to return */
1073 int bStructureV2 = 0; /* True for FTS5_STRUCTURE_V2 */
1074 u64 nOriginCntr = 0; /* Largest origin value seen so far */
1076 /* Grab the cookie value */
1077 if( piCookie ) *piCookie = sqlite3Fts5Get32(pData);
1078 i = 4;
1080 /* Check if this is a V2 structure record. Set bStructureV2 if it is. */
1081 if( 0==memcmp(&pData[i], FTS5_STRUCTURE_V2, 4) ){
1082 i += 4;
1083 bStructureV2 = 1;
1086 /* Read the total number of levels and segments from the start of the
1087 ** structure record. */
1088 i += fts5GetVarint32(&pData[i], nLevel);
1089 i += fts5GetVarint32(&pData[i], nSegment);
1090 if( nLevel>FTS5_MAX_SEGMENT || nLevel<0
1091 || nSegment>FTS5_MAX_SEGMENT || nSegment<0
1093 return FTS5_CORRUPT;
1095 nByte = (
1096 sizeof(Fts5Structure) + /* Main structure */
1097 sizeof(Fts5StructureLevel) * (nLevel-1) /* aLevel[] array */
1099 pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte);
1101 if( pRet ){
1102 pRet->nRef = 1;
1103 pRet->nLevel = nLevel;
1104 pRet->nSegment = nSegment;
1105 i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter);
1107 for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){
1108 Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl];
1109 int nTotal = 0;
1110 int iSeg;
1112 if( i>=nData ){
1113 rc = FTS5_CORRUPT;
1114 }else{
1115 i += fts5GetVarint32(&pData[i], pLvl->nMerge);
1116 i += fts5GetVarint32(&pData[i], nTotal);
1117 if( nTotal<pLvl->nMerge ) rc = FTS5_CORRUPT;
1118 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc,
1119 nTotal * sizeof(Fts5StructureSegment)
1121 nSegment -= nTotal;
1124 if( rc==SQLITE_OK ){
1125 pLvl->nSeg = nTotal;
1126 for(iSeg=0; iSeg<nTotal; iSeg++){
1127 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
1128 if( i>=nData ){
1129 rc = FTS5_CORRUPT;
1130 break;
1132 assert( pSeg!=0 );
1133 i += fts5GetVarint32(&pData[i], pSeg->iSegid);
1134 i += fts5GetVarint32(&pData[i], pSeg->pgnoFirst);
1135 i += fts5GetVarint32(&pData[i], pSeg->pgnoLast);
1136 if( bStructureV2 ){
1137 i += fts5GetVarint(&pData[i], &pSeg->iOrigin1);
1138 i += fts5GetVarint(&pData[i], &pSeg->iOrigin2);
1139 i += fts5GetVarint32(&pData[i], pSeg->nPgTombstone);
1140 i += fts5GetVarint(&pData[i], &pSeg->nEntryTombstone);
1141 i += fts5GetVarint(&pData[i], &pSeg->nEntry);
1142 nOriginCntr = MAX(nOriginCntr, pSeg->iOrigin2);
1144 if( pSeg->pgnoLast<pSeg->pgnoFirst ){
1145 rc = FTS5_CORRUPT;
1146 break;
1149 if( iLvl>0 && pLvl[-1].nMerge && nTotal==0 ) rc = FTS5_CORRUPT;
1150 if( iLvl==nLevel-1 && pLvl->nMerge ) rc = FTS5_CORRUPT;
1153 if( nSegment!=0 && rc==SQLITE_OK ) rc = FTS5_CORRUPT;
1154 if( bStructureV2 ){
1155 pRet->nOriginCntr = nOriginCntr+1;
1158 if( rc!=SQLITE_OK ){
1159 fts5StructureRelease(pRet);
1160 pRet = 0;
1164 *ppOut = pRet;
1165 return rc;
1169 ** Add a level to the Fts5Structure.aLevel[] array of structure object
1170 ** (*ppStruct).
1172 static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){
1173 fts5StructureMakeWritable(pRc, ppStruct);
1174 assert( (ppStruct!=0 && (*ppStruct)!=0) || (*pRc)!=SQLITE_OK );
1175 if( *pRc==SQLITE_OK ){
1176 Fts5Structure *pStruct = *ppStruct;
1177 int nLevel = pStruct->nLevel;
1178 sqlite3_int64 nByte = (
1179 sizeof(Fts5Structure) + /* Main structure */
1180 sizeof(Fts5StructureLevel) * (nLevel+1) /* aLevel[] array */
1183 pStruct = sqlite3_realloc64(pStruct, nByte);
1184 if( pStruct ){
1185 memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel));
1186 pStruct->nLevel++;
1187 *ppStruct = pStruct;
1188 }else{
1189 *pRc = SQLITE_NOMEM;
1195 ** Extend level iLvl so that there is room for at least nExtra more
1196 ** segments.
1198 static void fts5StructureExtendLevel(
1199 int *pRc,
1200 Fts5Structure *pStruct,
1201 int iLvl,
1202 int nExtra,
1203 int bInsert
1205 if( *pRc==SQLITE_OK ){
1206 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
1207 Fts5StructureSegment *aNew;
1208 sqlite3_int64 nByte;
1210 nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment);
1211 aNew = sqlite3_realloc64(pLvl->aSeg, nByte);
1212 if( aNew ){
1213 if( bInsert==0 ){
1214 memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra);
1215 }else{
1216 int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment);
1217 memmove(&aNew[nExtra], aNew, nMove);
1218 memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra);
1220 pLvl->aSeg = aNew;
1221 }else{
1222 *pRc = SQLITE_NOMEM;
1227 static Fts5Structure *fts5StructureReadUncached(Fts5Index *p){
1228 Fts5Structure *pRet = 0;
1229 Fts5Config *pConfig = p->pConfig;
1230 int iCookie; /* Configuration cookie */
1231 Fts5Data *pData;
1233 pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
1234 if( p->rc==SQLITE_OK ){
1235 /* TODO: Do we need this if the leaf-index is appended? Probably... */
1236 memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING);
1237 p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet);
1238 if( p->rc==SQLITE_OK && (pConfig->pgsz==0 || pConfig->iCookie!=iCookie) ){
1239 p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
1241 fts5DataRelease(pData);
1242 if( p->rc!=SQLITE_OK ){
1243 fts5StructureRelease(pRet);
1244 pRet = 0;
1248 return pRet;
1251 static i64 fts5IndexDataVersion(Fts5Index *p){
1252 i64 iVersion = 0;
1254 if( p->rc==SQLITE_OK ){
1255 if( p->pDataVersion==0 ){
1256 p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion,
1257 sqlite3_mprintf("PRAGMA %Q.data_version", p->pConfig->zDb)
1259 if( p->rc ) return 0;
1262 if( SQLITE_ROW==sqlite3_step(p->pDataVersion) ){
1263 iVersion = sqlite3_column_int64(p->pDataVersion, 0);
1265 p->rc = sqlite3_reset(p->pDataVersion);
1268 return iVersion;
1272 ** Read, deserialize and return the structure record.
1274 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
1275 ** are over-allocated as described for function fts5StructureDecode()
1276 ** above.
1278 ** If an error occurs, NULL is returned and an error code left in the
1279 ** Fts5Index handle. If an error has already occurred when this function
1280 ** is called, it is a no-op.
1282 static Fts5Structure *fts5StructureRead(Fts5Index *p){
1284 if( p->pStruct==0 ){
1285 p->iStructVersion = fts5IndexDataVersion(p);
1286 if( p->rc==SQLITE_OK ){
1287 p->pStruct = fts5StructureReadUncached(p);
1291 #if 0
1292 else{
1293 Fts5Structure *pTest = fts5StructureReadUncached(p);
1294 if( pTest ){
1295 int i, j;
1296 assert_nc( p->pStruct->nSegment==pTest->nSegment );
1297 assert_nc( p->pStruct->nLevel==pTest->nLevel );
1298 for(i=0; i<pTest->nLevel; i++){
1299 assert_nc( p->pStruct->aLevel[i].nMerge==pTest->aLevel[i].nMerge );
1300 assert_nc( p->pStruct->aLevel[i].nSeg==pTest->aLevel[i].nSeg );
1301 for(j=0; j<pTest->aLevel[i].nSeg; j++){
1302 Fts5StructureSegment *p1 = &pTest->aLevel[i].aSeg[j];
1303 Fts5StructureSegment *p2 = &p->pStruct->aLevel[i].aSeg[j];
1304 assert_nc( p1->iSegid==p2->iSegid );
1305 assert_nc( p1->pgnoFirst==p2->pgnoFirst );
1306 assert_nc( p1->pgnoLast==p2->pgnoLast );
1309 fts5StructureRelease(pTest);
1312 #endif
1314 if( p->rc!=SQLITE_OK ) return 0;
1315 assert( p->iStructVersion!=0 );
1316 assert( p->pStruct!=0 );
1317 fts5StructureRef(p->pStruct);
1318 return p->pStruct;
1321 static void fts5StructureInvalidate(Fts5Index *p){
1322 if( p->pStruct ){
1323 fts5StructureRelease(p->pStruct);
1324 p->pStruct = 0;
1329 ** Return the total number of segments in index structure pStruct. This
1330 ** function is only ever used as part of assert() conditions.
1332 #ifdef SQLITE_DEBUG
1333 static int fts5StructureCountSegments(Fts5Structure *pStruct){
1334 int nSegment = 0; /* Total number of segments */
1335 if( pStruct ){
1336 int iLvl; /* Used to iterate through levels */
1337 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
1338 nSegment += pStruct->aLevel[iLvl].nSeg;
1342 return nSegment;
1344 #endif
1346 #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \
1347 assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) ); \
1348 memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob); \
1349 (pBuf)->n += nBlob; \
1352 #define fts5BufferSafeAppendVarint(pBuf, iVal) { \
1353 (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal)); \
1354 assert( (pBuf)->nSpace>=(pBuf)->n ); \
1359 ** Serialize and store the "structure" record.
1361 ** If an error occurs, leave an error code in the Fts5Index object. If an
1362 ** error has already occurred, this function is a no-op.
1364 static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){
1365 if( p->rc==SQLITE_OK ){
1366 Fts5Buffer buf; /* Buffer to serialize record into */
1367 int iLvl; /* Used to iterate through levels */
1368 int iCookie; /* Cookie value to store */
1369 int nHdr = (pStruct->nOriginCntr>0 ? (4+4+9+9+9) : (4+9+9));
1371 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
1372 memset(&buf, 0, sizeof(Fts5Buffer));
1374 /* Append the current configuration cookie */
1375 iCookie = p->pConfig->iCookie;
1376 if( iCookie<0 ) iCookie = 0;
1378 if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, nHdr) ){
1379 sqlite3Fts5Put32(buf.p, iCookie);
1380 buf.n = 4;
1381 if( pStruct->nOriginCntr>0 ){
1382 fts5BufferSafeAppendBlob(&buf, FTS5_STRUCTURE_V2, 4);
1384 fts5BufferSafeAppendVarint(&buf, pStruct->nLevel);
1385 fts5BufferSafeAppendVarint(&buf, pStruct->nSegment);
1386 fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter);
1389 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
1390 int iSeg; /* Used to iterate through segments */
1391 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
1392 fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge);
1393 fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg);
1394 assert( pLvl->nMerge<=pLvl->nSeg );
1396 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
1397 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
1398 fts5BufferAppendVarint(&p->rc, &buf, pSeg->iSegid);
1399 fts5BufferAppendVarint(&p->rc, &buf, pSeg->pgnoFirst);
1400 fts5BufferAppendVarint(&p->rc, &buf, pSeg->pgnoLast);
1401 if( pStruct->nOriginCntr>0 ){
1402 fts5BufferAppendVarint(&p->rc, &buf, pSeg->iOrigin1);
1403 fts5BufferAppendVarint(&p->rc, &buf, pSeg->iOrigin2);
1404 fts5BufferAppendVarint(&p->rc, &buf, pSeg->nPgTombstone);
1405 fts5BufferAppendVarint(&p->rc, &buf, pSeg->nEntryTombstone);
1406 fts5BufferAppendVarint(&p->rc, &buf, pSeg->nEntry);
1411 fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n);
1412 fts5BufferFree(&buf);
1416 #if 0
1417 static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*);
1418 static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){
1419 int rc = SQLITE_OK;
1420 Fts5Buffer buf;
1421 memset(&buf, 0, sizeof(buf));
1422 fts5DebugStructure(&rc, &buf, pStruct);
1423 fprintf(stdout, "%s: %s\n", zCaption, buf.p);
1424 fflush(stdout);
1425 fts5BufferFree(&buf);
1427 #else
1428 # define fts5PrintStructure(x,y)
1429 #endif
1431 static int fts5SegmentSize(Fts5StructureSegment *pSeg){
1432 return 1 + pSeg->pgnoLast - pSeg->pgnoFirst;
1436 ** Return a copy of index structure pStruct. Except, promote as many
1437 ** segments as possible to level iPromote. If an OOM occurs, NULL is
1438 ** returned.
1440 static void fts5StructurePromoteTo(
1441 Fts5Index *p,
1442 int iPromote,
1443 int szPromote,
1444 Fts5Structure *pStruct
1446 int il, is;
1447 Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote];
1449 if( pOut->nMerge==0 ){
1450 for(il=iPromote+1; il<pStruct->nLevel; il++){
1451 Fts5StructureLevel *pLvl = &pStruct->aLevel[il];
1452 if( pLvl->nMerge ) return;
1453 for(is=pLvl->nSeg-1; is>=0; is--){
1454 int sz = fts5SegmentSize(&pLvl->aSeg[is]);
1455 if( sz>szPromote ) return;
1456 fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1);
1457 if( p->rc ) return;
1458 memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment));
1459 pOut->nSeg++;
1460 pLvl->nSeg--;
1467 ** A new segment has just been written to level iLvl of index structure
1468 ** pStruct. This function determines if any segments should be promoted
1469 ** as a result. Segments are promoted in two scenarios:
1471 ** a) If the segment just written is smaller than one or more segments
1472 ** within the previous populated level, it is promoted to the previous
1473 ** populated level.
1475 ** b) If the segment just written is larger than the newest segment on
1476 ** the next populated level, then that segment, and any other adjacent
1477 ** segments that are also smaller than the one just written, are
1478 ** promoted.
1480 ** If one or more segments are promoted, the structure object is updated
1481 ** to reflect this.
1483 static void fts5StructurePromote(
1484 Fts5Index *p, /* FTS5 backend object */
1485 int iLvl, /* Index level just updated */
1486 Fts5Structure *pStruct /* Index structure */
1488 if( p->rc==SQLITE_OK ){
1489 int iTst;
1490 int iPromote = -1;
1491 int szPromote = 0; /* Promote anything this size or smaller */
1492 Fts5StructureSegment *pSeg; /* Segment just written */
1493 int szSeg; /* Size of segment just written */
1494 int nSeg = pStruct->aLevel[iLvl].nSeg;
1496 if( nSeg==0 ) return;
1497 pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1];
1498 szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst);
1500 /* Check for condition (a) */
1501 for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--);
1502 if( iTst>=0 ){
1503 int i;
1504 int szMax = 0;
1505 Fts5StructureLevel *pTst = &pStruct->aLevel[iTst];
1506 assert( pTst->nMerge==0 );
1507 for(i=0; i<pTst->nSeg; i++){
1508 int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1;
1509 if( sz>szMax ) szMax = sz;
1511 if( szMax>=szSeg ){
1512 /* Condition (a) is true. Promote the newest segment on level
1513 ** iLvl to level iTst. */
1514 iPromote = iTst;
1515 szPromote = szMax;
1519 /* If condition (a) is not met, assume (b) is true. StructurePromoteTo()
1520 ** is a no-op if it is not. */
1521 if( iPromote<0 ){
1522 iPromote = iLvl;
1523 szPromote = szSeg;
1525 fts5StructurePromoteTo(p, iPromote, szPromote, pStruct);
1531 ** Advance the iterator passed as the only argument. If the end of the
1532 ** doclist-index page is reached, return non-zero.
1534 static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
1535 Fts5Data *pData = pLvl->pData;
1537 if( pLvl->iOff==0 ){
1538 assert( pLvl->bEof==0 );
1539 pLvl->iOff = 1;
1540 pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno);
1541 pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid);
1542 pLvl->iFirstOff = pLvl->iOff;
1543 }else{
1544 int iOff;
1545 for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){
1546 if( pData->p[iOff] ) break;
1549 if( iOff<pData->nn ){
1550 u64 iVal;
1551 pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1;
1552 iOff += fts5GetVarint(&pData->p[iOff], &iVal);
1553 pLvl->iRowid += iVal;
1554 pLvl->iOff = iOff;
1555 }else{
1556 pLvl->bEof = 1;
1560 return pLvl->bEof;
1564 ** Advance the iterator passed as the only argument.
1566 static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
1567 Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
1569 assert( iLvl<pIter->nLvl );
1570 if( fts5DlidxLvlNext(pLvl) ){
1571 if( (iLvl+1) < pIter->nLvl ){
1572 fts5DlidxIterNextR(p, pIter, iLvl+1);
1573 if( pLvl[1].bEof==0 ){
1574 fts5DataRelease(pLvl->pData);
1575 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1576 pLvl->pData = fts5DataRead(p,
1577 FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
1579 if( pLvl->pData ) fts5DlidxLvlNext(pLvl);
1584 return pIter->aLvl[0].bEof;
1586 static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){
1587 return fts5DlidxIterNextR(p, pIter, 0);
1591 ** The iterator passed as the first argument has the following fields set
1592 ** as follows. This function sets up the rest of the iterator so that it
1593 ** points to the first rowid in the doclist-index.
1595 ** pData:
1596 ** pointer to doclist-index record,
1598 ** When this function is called pIter->iLeafPgno is the page number the
1599 ** doclist is associated with (the one featuring the term).
1601 static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
1602 int i;
1603 for(i=0; i<pIter->nLvl; i++){
1604 fts5DlidxLvlNext(&pIter->aLvl[i]);
1606 return pIter->aLvl[0].bEof;
1610 static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){
1611 return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof;
1614 static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){
1615 int i;
1617 /* Advance each level to the last entry on the last page */
1618 for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){
1619 Fts5DlidxLvl *pLvl = &pIter->aLvl[i];
1620 while( fts5DlidxLvlNext(pLvl)==0 );
1621 pLvl->bEof = 0;
1623 if( i>0 ){
1624 Fts5DlidxLvl *pChild = &pLvl[-1];
1625 fts5DataRelease(pChild->pData);
1626 memset(pChild, 0, sizeof(Fts5DlidxLvl));
1627 pChild->pData = fts5DataRead(p,
1628 FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno)
1635 ** Move the iterator passed as the only argument to the previous entry.
1637 static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){
1638 int iOff = pLvl->iOff;
1640 assert( pLvl->bEof==0 );
1641 if( iOff<=pLvl->iFirstOff ){
1642 pLvl->bEof = 1;
1643 }else{
1644 u8 *a = pLvl->pData->p;
1646 pLvl->iOff = 0;
1647 fts5DlidxLvlNext(pLvl);
1648 while( 1 ){
1649 int nZero = 0;
1650 int ii = pLvl->iOff;
1651 u64 delta = 0;
1653 while( a[ii]==0 ){
1654 nZero++;
1655 ii++;
1657 ii += sqlite3Fts5GetVarint(&a[ii], &delta);
1659 if( ii>=iOff ) break;
1660 pLvl->iLeafPgno += nZero+1;
1661 pLvl->iRowid += delta;
1662 pLvl->iOff = ii;
1666 return pLvl->bEof;
1669 static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
1670 Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
1672 assert( iLvl<pIter->nLvl );
1673 if( fts5DlidxLvlPrev(pLvl) ){
1674 if( (iLvl+1) < pIter->nLvl ){
1675 fts5DlidxIterPrevR(p, pIter, iLvl+1);
1676 if( pLvl[1].bEof==0 ){
1677 fts5DataRelease(pLvl->pData);
1678 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1679 pLvl->pData = fts5DataRead(p,
1680 FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
1682 if( pLvl->pData ){
1683 while( fts5DlidxLvlNext(pLvl)==0 );
1684 pLvl->bEof = 0;
1690 return pIter->aLvl[0].bEof;
1692 static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){
1693 return fts5DlidxIterPrevR(p, pIter, 0);
1697 ** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
1699 static void fts5DlidxIterFree(Fts5DlidxIter *pIter){
1700 if( pIter ){
1701 int i;
1702 for(i=0; i<pIter->nLvl; i++){
1703 fts5DataRelease(pIter->aLvl[i].pData);
1705 sqlite3_free(pIter);
1709 static Fts5DlidxIter *fts5DlidxIterInit(
1710 Fts5Index *p, /* Fts5 Backend to iterate within */
1711 int bRev, /* True for ORDER BY ASC */
1712 int iSegid, /* Segment id */
1713 int iLeafPg /* Leaf page number to load dlidx for */
1715 Fts5DlidxIter *pIter = 0;
1716 int i;
1717 int bDone = 0;
1719 for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
1720 sqlite3_int64 nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl);
1721 Fts5DlidxIter *pNew;
1723 pNew = (Fts5DlidxIter*)sqlite3_realloc64(pIter, nByte);
1724 if( pNew==0 ){
1725 p->rc = SQLITE_NOMEM;
1726 }else{
1727 i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg);
1728 Fts5DlidxLvl *pLvl = &pNew->aLvl[i];
1729 pIter = pNew;
1730 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1731 pLvl->pData = fts5DataRead(p, iRowid);
1732 if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){
1733 bDone = 1;
1735 pIter->nLvl = i+1;
1739 if( p->rc==SQLITE_OK ){
1740 pIter->iSegid = iSegid;
1741 if( bRev==0 ){
1742 fts5DlidxIterFirst(pIter);
1743 }else{
1744 fts5DlidxIterLast(p, pIter);
1748 if( p->rc!=SQLITE_OK ){
1749 fts5DlidxIterFree(pIter);
1750 pIter = 0;
1753 return pIter;
1756 static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){
1757 return pIter->aLvl[0].iRowid;
1759 static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){
1760 return pIter->aLvl[0].iLeafPgno;
1764 ** Load the next leaf page into the segment iterator.
1766 static void fts5SegIterNextPage(
1767 Fts5Index *p, /* FTS5 backend object */
1768 Fts5SegIter *pIter /* Iterator to advance to next page */
1770 Fts5Data *pLeaf;
1771 Fts5StructureSegment *pSeg = pIter->pSeg;
1772 fts5DataRelease(pIter->pLeaf);
1773 pIter->iLeafPgno++;
1774 if( pIter->pNextLeaf ){
1775 pIter->pLeaf = pIter->pNextLeaf;
1776 pIter->pNextLeaf = 0;
1777 }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
1778 pIter->pLeaf = fts5LeafRead(p,
1779 FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)
1781 }else{
1782 pIter->pLeaf = 0;
1784 pLeaf = pIter->pLeaf;
1786 if( pLeaf ){
1787 pIter->iPgidxOff = pLeaf->szLeaf;
1788 if( fts5LeafIsTermless(pLeaf) ){
1789 pIter->iEndofDoclist = pLeaf->nn+1;
1790 }else{
1791 pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],
1792 pIter->iEndofDoclist
1799 ** Argument p points to a buffer containing a varint to be interpreted as a
1800 ** position list size field. Read the varint and return the number of bytes
1801 ** read. Before returning, set *pnSz to the number of bytes in the position
1802 ** list, and *pbDel to true if the delete flag is set, or false otherwise.
1804 static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
1805 int nSz;
1806 int n = 0;
1807 fts5FastGetVarint32(p, n, nSz);
1808 assert_nc( nSz>=0 );
1809 *pnSz = nSz/2;
1810 *pbDel = nSz & 0x0001;
1811 return n;
1815 ** Fts5SegIter.iLeafOffset currently points to the first byte of a
1816 ** position-list size field. Read the value of the field and store it
1817 ** in the following variables:
1819 ** Fts5SegIter.nPos
1820 ** Fts5SegIter.bDel
1822 ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the
1823 ** position list content (if any).
1825 static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
1826 if( p->rc==SQLITE_OK ){
1827 int iOff = pIter->iLeafOffset; /* Offset to read at */
1828 ASSERT_SZLEAF_OK(pIter->pLeaf);
1829 if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
1830 int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf);
1831 pIter->bDel = 0;
1832 pIter->nPos = 1;
1833 if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
1834 pIter->bDel = 1;
1835 iOff++;
1836 if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
1837 pIter->nPos = 1;
1838 iOff++;
1839 }else{
1840 pIter->nPos = 0;
1843 }else{
1844 int nSz;
1845 fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz);
1846 pIter->bDel = (nSz & 0x0001);
1847 pIter->nPos = nSz>>1;
1848 assert_nc( pIter->nPos>=0 );
1850 pIter->iLeafOffset = iOff;
1854 static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
1855 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
1856 i64 iOff = pIter->iLeafOffset;
1858 ASSERT_SZLEAF_OK(pIter->pLeaf);
1859 while( iOff>=pIter->pLeaf->szLeaf ){
1860 fts5SegIterNextPage(p, pIter);
1861 if( pIter->pLeaf==0 ){
1862 if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
1863 return;
1865 iOff = 4;
1866 a = pIter->pLeaf->p;
1868 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
1869 pIter->iLeafOffset = iOff;
1873 ** Fts5SegIter.iLeafOffset currently points to the first byte of the
1874 ** "nSuffix" field of a term. Function parameter nKeep contains the value
1875 ** of the "nPrefix" field (if there was one - it is passed 0 if this is
1876 ** the first term in the segment).
1878 ** This function populates:
1880 ** Fts5SegIter.term
1881 ** Fts5SegIter.rowid
1883 ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of
1884 ** the first position list. The position list belonging to document
1885 ** (Fts5SegIter.iRowid).
1887 static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
1888 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
1889 i64 iOff = pIter->iLeafOffset; /* Offset to read at */
1890 int nNew; /* Bytes of new data */
1892 iOff += fts5GetVarint32(&a[iOff], nNew);
1893 if( iOff+nNew>pIter->pLeaf->szLeaf || nKeep>pIter->term.n || nNew==0 ){
1894 p->rc = FTS5_CORRUPT;
1895 return;
1897 pIter->term.n = nKeep;
1898 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
1899 assert( pIter->term.n<=pIter->term.nSpace );
1900 iOff += nNew;
1901 pIter->iTermLeafOffset = iOff;
1902 pIter->iTermLeafPgno = pIter->iLeafPgno;
1903 pIter->iLeafOffset = iOff;
1905 if( pIter->iPgidxOff>=pIter->pLeaf->nn ){
1906 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
1907 }else{
1908 int nExtra;
1909 pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra);
1910 pIter->iEndofDoclist += nExtra;
1913 fts5SegIterLoadRowid(p, pIter);
1916 static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*);
1917 static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*);
1918 static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*);
1920 static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){
1921 if( pIter->flags & FTS5_SEGITER_REVERSE ){
1922 pIter->xNext = fts5SegIterNext_Reverse;
1923 }else if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
1924 pIter->xNext = fts5SegIterNext_None;
1925 }else{
1926 pIter->xNext = fts5SegIterNext;
1931 ** Allocate a tombstone hash page array object (pIter->pTombArray) for
1932 ** the iterator passed as the second argument. If an OOM error occurs,
1933 ** leave an error in the Fts5Index object.
1935 static void fts5SegIterAllocTombstone(Fts5Index *p, Fts5SegIter *pIter){
1936 const int nTomb = pIter->pSeg->nPgTombstone;
1937 if( nTomb>0 ){
1938 int nByte = nTomb * sizeof(Fts5Data*) + sizeof(Fts5TombstoneArray);
1939 Fts5TombstoneArray *pNew;
1940 pNew = (Fts5TombstoneArray*)sqlite3Fts5MallocZero(&p->rc, nByte);
1941 if( pNew ){
1942 pNew->nTombstone = nTomb;
1943 pNew->nRef = 1;
1944 pIter->pTombArray = pNew;
1950 ** Initialize the iterator object pIter to iterate through the entries in
1951 ** segment pSeg. The iterator is left pointing to the first entry when
1952 ** this function returns.
1954 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
1955 ** an error has already occurred when this function is called, it is a no-op.
1957 static void fts5SegIterInit(
1958 Fts5Index *p, /* FTS index object */
1959 Fts5StructureSegment *pSeg, /* Description of segment */
1960 Fts5SegIter *pIter /* Object to populate */
1962 if( pSeg->pgnoFirst==0 ){
1963 /* This happens if the segment is being used as an input to an incremental
1964 ** merge and all data has already been "trimmed". See function
1965 ** fts5TrimSegments() for details. In this case leave the iterator empty.
1966 ** The caller will see the (pIter->pLeaf==0) and assume the iterator is
1967 ** at EOF already. */
1968 assert( pIter->pLeaf==0 );
1969 return;
1972 if( p->rc==SQLITE_OK ){
1973 memset(pIter, 0, sizeof(*pIter));
1974 fts5SegIterSetNext(p, pIter);
1975 pIter->pSeg = pSeg;
1976 pIter->iLeafPgno = pSeg->pgnoFirst-1;
1977 do {
1978 fts5SegIterNextPage(p, pIter);
1979 }while( p->rc==SQLITE_OK && pIter->pLeaf && pIter->pLeaf->nn==4 );
1982 if( p->rc==SQLITE_OK && pIter->pLeaf ){
1983 pIter->iLeafOffset = 4;
1984 assert( pIter->pLeaf!=0 );
1985 assert_nc( pIter->pLeaf->nn>4 );
1986 assert_nc( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
1987 pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
1988 fts5SegIterLoadTerm(p, pIter, 0);
1989 fts5SegIterLoadNPos(p, pIter);
1990 fts5SegIterAllocTombstone(p, pIter);
1995 ** This function is only ever called on iterators created by calls to
1996 ** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set.
1998 ** The iterator is in an unusual state when this function is called: the
1999 ** Fts5SegIter.iLeafOffset variable is set to the offset of the start of
2000 ** the position-list size field for the first relevant rowid on the page.
2001 ** Fts5SegIter.rowid is set, but nPos and bDel are not.
2003 ** This function advances the iterator so that it points to the last
2004 ** relevant rowid on the page and, if necessary, initializes the
2005 ** aRowidOffset[] and iRowidOffset variables. At this point the iterator
2006 ** is in its regular state - Fts5SegIter.iLeafOffset points to the first
2007 ** byte of the position list content associated with said rowid.
2009 static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
2010 int eDetail = p->pConfig->eDetail;
2011 int n = pIter->pLeaf->szLeaf;
2012 int i = pIter->iLeafOffset;
2013 u8 *a = pIter->pLeaf->p;
2014 int iRowidOffset = 0;
2016 if( n>pIter->iEndofDoclist ){
2017 n = pIter->iEndofDoclist;
2020 ASSERT_SZLEAF_OK(pIter->pLeaf);
2021 while( 1 ){
2022 u64 iDelta = 0;
2024 if( eDetail==FTS5_DETAIL_NONE ){
2025 /* todo */
2026 if( i<n && a[i]==0 ){
2027 i++;
2028 if( i<n && a[i]==0 ) i++;
2030 }else{
2031 int nPos;
2032 int bDummy;
2033 i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
2034 i += nPos;
2036 if( i>=n ) break;
2037 i += fts5GetVarint(&a[i], &iDelta);
2038 pIter->iRowid += iDelta;
2040 /* If necessary, grow the pIter->aRowidOffset[] array. */
2041 if( iRowidOffset>=pIter->nRowidOffset ){
2042 int nNew = pIter->nRowidOffset + 8;
2043 int *aNew = (int*)sqlite3_realloc64(pIter->aRowidOffset,nNew*sizeof(int));
2044 if( aNew==0 ){
2045 p->rc = SQLITE_NOMEM;
2046 break;
2048 pIter->aRowidOffset = aNew;
2049 pIter->nRowidOffset = nNew;
2052 pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset;
2053 pIter->iLeafOffset = i;
2055 pIter->iRowidOffset = iRowidOffset;
2056 fts5SegIterLoadNPos(p, pIter);
2062 static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){
2063 assert( pIter->flags & FTS5_SEGITER_REVERSE );
2064 assert( pIter->flags & FTS5_SEGITER_ONETERM );
2066 fts5DataRelease(pIter->pLeaf);
2067 pIter->pLeaf = 0;
2068 while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
2069 Fts5Data *pNew;
2070 pIter->iLeafPgno--;
2071 pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
2072 pIter->pSeg->iSegid, pIter->iLeafPgno
2074 if( pNew ){
2075 /* iTermLeafOffset may be equal to szLeaf if the term is the last
2076 ** thing on the page - i.e. the first rowid is on the following page.
2077 ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */
2078 if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
2079 assert( pIter->pLeaf==0 );
2080 if( pIter->iTermLeafOffset<pNew->szLeaf ){
2081 pIter->pLeaf = pNew;
2082 pIter->iLeafOffset = pIter->iTermLeafOffset;
2084 }else{
2085 int iRowidOff;
2086 iRowidOff = fts5LeafFirstRowidOff(pNew);
2087 if( iRowidOff ){
2088 if( iRowidOff>=pNew->szLeaf ){
2089 p->rc = FTS5_CORRUPT;
2090 }else{
2091 pIter->pLeaf = pNew;
2092 pIter->iLeafOffset = iRowidOff;
2097 if( pIter->pLeaf ){
2098 u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset];
2099 pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid);
2100 break;
2101 }else{
2102 fts5DataRelease(pNew);
2107 if( pIter->pLeaf ){
2108 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
2109 fts5SegIterReverseInitPage(p, pIter);
2114 ** Return true if the iterator passed as the second argument currently
2115 ** points to a delete marker. A delete marker is an entry with a 0 byte
2116 ** position-list.
2118 static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){
2119 Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
2120 return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0);
2124 ** Advance iterator pIter to the next entry.
2126 ** This version of fts5SegIterNext() is only used by reverse iterators.
2128 static void fts5SegIterNext_Reverse(
2129 Fts5Index *p, /* FTS5 backend object */
2130 Fts5SegIter *pIter, /* Iterator to advance */
2131 int *pbUnused /* Unused */
2133 assert( pIter->flags & FTS5_SEGITER_REVERSE );
2134 assert( pIter->pNextLeaf==0 );
2135 UNUSED_PARAM(pbUnused);
2137 if( pIter->iRowidOffset>0 ){
2138 u8 *a = pIter->pLeaf->p;
2139 int iOff;
2140 u64 iDelta;
2142 pIter->iRowidOffset--;
2143 pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset];
2144 fts5SegIterLoadNPos(p, pIter);
2145 iOff = pIter->iLeafOffset;
2146 if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){
2147 iOff += pIter->nPos;
2149 fts5GetVarint(&a[iOff], &iDelta);
2150 pIter->iRowid -= iDelta;
2151 }else{
2152 fts5SegIterReverseNewPage(p, pIter);
2157 ** Advance iterator pIter to the next entry.
2159 ** This version of fts5SegIterNext() is only used if detail=none and the
2160 ** iterator is not a reverse direction iterator.
2162 static void fts5SegIterNext_None(
2163 Fts5Index *p, /* FTS5 backend object */
2164 Fts5SegIter *pIter, /* Iterator to advance */
2165 int *pbNewTerm /* OUT: Set for new term */
2167 int iOff;
2169 assert( p->rc==SQLITE_OK );
2170 assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 );
2171 assert( p->pConfig->eDetail==FTS5_DETAIL_NONE );
2173 ASSERT_SZLEAF_OK(pIter->pLeaf);
2174 iOff = pIter->iLeafOffset;
2176 /* Next entry is on the next page */
2177 while( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){
2178 fts5SegIterNextPage(p, pIter);
2179 if( p->rc || pIter->pLeaf==0 ) return;
2180 pIter->iRowid = 0;
2181 iOff = 4;
2184 if( iOff<pIter->iEndofDoclist ){
2185 /* Next entry is on the current page */
2186 i64 iDelta;
2187 iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], (u64*)&iDelta);
2188 pIter->iLeafOffset = iOff;
2189 pIter->iRowid += iDelta;
2190 }else if( (pIter->flags & FTS5_SEGITER_ONETERM)==0 ){
2191 if( pIter->pSeg ){
2192 int nKeep = 0;
2193 if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){
2194 iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep);
2196 pIter->iLeafOffset = iOff;
2197 fts5SegIterLoadTerm(p, pIter, nKeep);
2198 }else{
2199 const u8 *pList = 0;
2200 const char *zTerm = 0;
2201 int nTerm = 0;
2202 int nList;
2203 sqlite3Fts5HashScanNext(p->pHash);
2204 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &nTerm, &pList, &nList);
2205 if( pList==0 ) goto next_none_eof;
2206 pIter->pLeaf->p = (u8*)pList;
2207 pIter->pLeaf->nn = nList;
2208 pIter->pLeaf->szLeaf = nList;
2209 pIter->iEndofDoclist = nList;
2210 sqlite3Fts5BufferSet(&p->rc,&pIter->term, nTerm, (u8*)zTerm);
2211 pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
2214 if( pbNewTerm ) *pbNewTerm = 1;
2215 }else{
2216 goto next_none_eof;
2219 fts5SegIterLoadNPos(p, pIter);
2221 return;
2222 next_none_eof:
2223 fts5DataRelease(pIter->pLeaf);
2224 pIter->pLeaf = 0;
2229 ** Advance iterator pIter to the next entry.
2231 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It
2232 ** is not considered an error if the iterator reaches EOF. If an error has
2233 ** already occurred when this function is called, it is a no-op.
2235 static void fts5SegIterNext(
2236 Fts5Index *p, /* FTS5 backend object */
2237 Fts5SegIter *pIter, /* Iterator to advance */
2238 int *pbNewTerm /* OUT: Set for new term */
2240 Fts5Data *pLeaf = pIter->pLeaf;
2241 int iOff;
2242 int bNewTerm = 0;
2243 int nKeep = 0;
2244 u8 *a;
2245 int n;
2247 assert( pbNewTerm==0 || *pbNewTerm==0 );
2248 assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
2250 /* Search for the end of the position list within the current page. */
2251 a = pLeaf->p;
2252 n = pLeaf->szLeaf;
2254 ASSERT_SZLEAF_OK(pLeaf);
2255 iOff = pIter->iLeafOffset + pIter->nPos;
2257 if( iOff<n ){
2258 /* The next entry is on the current page. */
2259 assert_nc( iOff<=pIter->iEndofDoclist );
2260 if( iOff>=pIter->iEndofDoclist ){
2261 bNewTerm = 1;
2262 if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
2263 iOff += fts5GetVarint32(&a[iOff], nKeep);
2265 }else{
2266 u64 iDelta;
2267 iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
2268 pIter->iRowid += iDelta;
2269 assert_nc( iDelta>0 );
2271 pIter->iLeafOffset = iOff;
2273 }else if( pIter->pSeg==0 ){
2274 const u8 *pList = 0;
2275 const char *zTerm = 0;
2276 int nTerm = 0;
2277 int nList = 0;
2278 assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm );
2279 if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){
2280 sqlite3Fts5HashScanNext(p->pHash);
2281 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &nTerm, &pList, &nList);
2283 if( pList==0 ){
2284 fts5DataRelease(pIter->pLeaf);
2285 pIter->pLeaf = 0;
2286 }else{
2287 pIter->pLeaf->p = (u8*)pList;
2288 pIter->pLeaf->nn = nList;
2289 pIter->pLeaf->szLeaf = nList;
2290 pIter->iEndofDoclist = nList+1;
2291 sqlite3Fts5BufferSet(&p->rc, &pIter->term, nTerm, (u8*)zTerm);
2292 pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
2293 *pbNewTerm = 1;
2295 }else{
2296 iOff = 0;
2297 /* Next entry is not on the current page */
2298 while( iOff==0 ){
2299 fts5SegIterNextPage(p, pIter);
2300 pLeaf = pIter->pLeaf;
2301 if( pLeaf==0 ) break;
2302 ASSERT_SZLEAF_OK(pLeaf);
2303 if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
2304 iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
2305 pIter->iLeafOffset = iOff;
2307 if( pLeaf->nn>pLeaf->szLeaf ){
2308 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
2309 &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
2313 else if( pLeaf->nn>pLeaf->szLeaf ){
2314 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
2315 &pLeaf->p[pLeaf->szLeaf], iOff
2317 pIter->iLeafOffset = iOff;
2318 pIter->iEndofDoclist = iOff;
2319 bNewTerm = 1;
2321 assert_nc( iOff<pLeaf->szLeaf );
2322 if( iOff>pLeaf->szLeaf ){
2323 p->rc = FTS5_CORRUPT;
2324 return;
2329 /* Check if the iterator is now at EOF. If so, return early. */
2330 if( pIter->pLeaf ){
2331 if( bNewTerm ){
2332 if( pIter->flags & FTS5_SEGITER_ONETERM ){
2333 fts5DataRelease(pIter->pLeaf);
2334 pIter->pLeaf = 0;
2335 }else{
2336 fts5SegIterLoadTerm(p, pIter, nKeep);
2337 fts5SegIterLoadNPos(p, pIter);
2338 if( pbNewTerm ) *pbNewTerm = 1;
2340 }else{
2341 /* The following could be done by calling fts5SegIterLoadNPos(). But
2342 ** this block is particularly performance critical, so equivalent
2343 ** code is inlined. */
2344 int nSz;
2345 assert_nc( pIter->iLeafOffset<=pIter->pLeaf->nn );
2346 fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz);
2347 pIter->bDel = (nSz & 0x0001);
2348 pIter->nPos = nSz>>1;
2349 assert_nc( pIter->nPos>=0 );
2354 #define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; }
2356 #define fts5IndexSkipVarint(a, iOff) { \
2357 int iEnd = iOff+9; \
2358 while( (a[iOff++] & 0x80) && iOff<iEnd ); \
2362 ** Iterator pIter currently points to the first rowid in a doclist. This
2363 ** function sets the iterator up so that iterates in reverse order through
2364 ** the doclist.
2366 static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
2367 Fts5DlidxIter *pDlidx = pIter->pDlidx;
2368 Fts5Data *pLast = 0;
2369 int pgnoLast = 0;
2371 if( pDlidx && p->pConfig->iVersion==FTS5_CURRENT_VERSION ){
2372 int iSegid = pIter->pSeg->iSegid;
2373 pgnoLast = fts5DlidxIterPgno(pDlidx);
2374 pLast = fts5LeafRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
2375 }else{
2376 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
2378 /* Currently, Fts5SegIter.iLeafOffset points to the first byte of
2379 ** position-list content for the current rowid. Back it up so that it
2380 ** points to the start of the position-list size field. */
2381 int iPoslist;
2382 if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
2383 iPoslist = pIter->iTermLeafOffset;
2384 }else{
2385 iPoslist = 4;
2387 fts5IndexSkipVarint(pLeaf->p, iPoslist);
2388 pIter->iLeafOffset = iPoslist;
2390 /* If this condition is true then the largest rowid for the current
2391 ** term may not be stored on the current page. So search forward to
2392 ** see where said rowid really is. */
2393 if( pIter->iEndofDoclist>=pLeaf->szLeaf ){
2394 int pgno;
2395 Fts5StructureSegment *pSeg = pIter->pSeg;
2397 /* The last rowid in the doclist may not be on the current page. Search
2398 ** forward to find the page containing the last rowid. */
2399 for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){
2400 i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
2401 Fts5Data *pNew = fts5LeafRead(p, iAbs);
2402 if( pNew ){
2403 int iRowid, bTermless;
2404 iRowid = fts5LeafFirstRowidOff(pNew);
2405 bTermless = fts5LeafIsTermless(pNew);
2406 if( iRowid ){
2407 SWAPVAL(Fts5Data*, pNew, pLast);
2408 pgnoLast = pgno;
2410 fts5DataRelease(pNew);
2411 if( bTermless==0 ) break;
2417 /* If pLast is NULL at this point, then the last rowid for this doclist
2418 ** lies on the page currently indicated by the iterator. In this case
2419 ** pIter->iLeafOffset is already set to point to the position-list size
2420 ** field associated with the first relevant rowid on the page.
2422 ** Or, if pLast is non-NULL, then it is the page that contains the last
2423 ** rowid. In this case configure the iterator so that it points to the
2424 ** first rowid on this page.
2426 if( pLast ){
2427 int iOff;
2428 fts5DataRelease(pIter->pLeaf);
2429 pIter->pLeaf = pLast;
2430 pIter->iLeafPgno = pgnoLast;
2431 iOff = fts5LeafFirstRowidOff(pLast);
2432 if( iOff>pLast->szLeaf ){
2433 p->rc = FTS5_CORRUPT;
2434 return;
2436 iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
2437 pIter->iLeafOffset = iOff;
2439 if( fts5LeafIsTermless(pLast) ){
2440 pIter->iEndofDoclist = pLast->nn+1;
2441 }else{
2442 pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast);
2446 fts5SegIterReverseInitPage(p, pIter);
2450 ** Iterator pIter currently points to the first rowid of a doclist.
2451 ** There is a doclist-index associated with the final term on the current
2452 ** page. If the current term is the last term on the page, load the
2453 ** doclist-index from disk and initialize an iterator at (pIter->pDlidx).
2455 static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){
2456 int iSeg = pIter->pSeg->iSegid;
2457 int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
2458 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
2460 assert( pIter->flags & FTS5_SEGITER_ONETERM );
2461 assert( pIter->pDlidx==0 );
2463 /* Check if the current doclist ends on this page. If it does, return
2464 ** early without loading the doclist-index (as it belongs to a different
2465 ** term. */
2466 if( pIter->iTermLeafPgno==pIter->iLeafPgno
2467 && pIter->iEndofDoclist<pLeaf->szLeaf
2469 return;
2472 pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
2476 ** The iterator object passed as the second argument currently contains
2477 ** no valid values except for the Fts5SegIter.pLeaf member variable. This
2478 ** function searches the leaf page for a term matching (pTerm/nTerm).
2480 ** If the specified term is found on the page, then the iterator is left
2481 ** pointing to it. If argument bGe is zero and the term is not found,
2482 ** the iterator is left pointing at EOF.
2484 ** If bGe is non-zero and the specified term is not found, then the
2485 ** iterator is left pointing to the smallest term in the segment that
2486 ** is larger than the specified term, even if this term is not on the
2487 ** current page.
2489 static void fts5LeafSeek(
2490 Fts5Index *p, /* Leave any error code here */
2491 int bGe, /* True for a >= search */
2492 Fts5SegIter *pIter, /* Iterator to seek */
2493 const u8 *pTerm, int nTerm /* Term to search for */
2495 u32 iOff;
2496 const u8 *a = pIter->pLeaf->p;
2497 u32 n = (u32)pIter->pLeaf->nn;
2499 u32 nMatch = 0;
2500 u32 nKeep = 0;
2501 u32 nNew = 0;
2502 u32 iTermOff;
2503 u32 iPgidx; /* Current offset in pgidx */
2504 int bEndOfPage = 0;
2506 assert( p->rc==SQLITE_OK );
2508 iPgidx = (u32)pIter->pLeaf->szLeaf;
2509 iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff);
2510 iOff = iTermOff;
2511 if( iOff>n ){
2512 p->rc = FTS5_CORRUPT;
2513 return;
2516 while( 1 ){
2518 /* Figure out how many new bytes are in this term */
2519 fts5FastGetVarint32(a, iOff, nNew);
2520 if( nKeep<nMatch ){
2521 goto search_failed;
2524 assert( nKeep>=nMatch );
2525 if( nKeep==nMatch ){
2526 u32 nCmp;
2527 u32 i;
2528 nCmp = (u32)MIN(nNew, nTerm-nMatch);
2529 for(i=0; i<nCmp; i++){
2530 if( a[iOff+i]!=pTerm[nMatch+i] ) break;
2532 nMatch += i;
2534 if( (u32)nTerm==nMatch ){
2535 if( i==nNew ){
2536 goto search_success;
2537 }else{
2538 goto search_failed;
2540 }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
2541 goto search_failed;
2545 if( iPgidx>=n ){
2546 bEndOfPage = 1;
2547 break;
2550 iPgidx += fts5GetVarint32(&a[iPgidx], nKeep);
2551 iTermOff += nKeep;
2552 iOff = iTermOff;
2554 if( iOff>=n ){
2555 p->rc = FTS5_CORRUPT;
2556 return;
2559 /* Read the nKeep field of the next term. */
2560 fts5FastGetVarint32(a, iOff, nKeep);
2563 search_failed:
2564 if( bGe==0 ){
2565 fts5DataRelease(pIter->pLeaf);
2566 pIter->pLeaf = 0;
2567 return;
2568 }else if( bEndOfPage ){
2569 do {
2570 fts5SegIterNextPage(p, pIter);
2571 if( pIter->pLeaf==0 ) return;
2572 a = pIter->pLeaf->p;
2573 if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
2574 iPgidx = (u32)pIter->pLeaf->szLeaf;
2575 iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff);
2576 if( iOff<4 || (i64)iOff>=pIter->pLeaf->szLeaf ){
2577 p->rc = FTS5_CORRUPT;
2578 return;
2579 }else{
2580 nKeep = 0;
2581 iTermOff = iOff;
2582 n = (u32)pIter->pLeaf->nn;
2583 iOff += fts5GetVarint32(&a[iOff], nNew);
2584 break;
2587 }while( 1 );
2590 search_success:
2591 if( (i64)iOff+nNew>n || nNew<1 ){
2592 p->rc = FTS5_CORRUPT;
2593 return;
2595 pIter->iLeafOffset = iOff + nNew;
2596 pIter->iTermLeafOffset = pIter->iLeafOffset;
2597 pIter->iTermLeafPgno = pIter->iLeafPgno;
2599 fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
2600 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
2602 if( iPgidx>=n ){
2603 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
2604 }else{
2605 int nExtra;
2606 iPgidx += fts5GetVarint32(&a[iPgidx], nExtra);
2607 pIter->iEndofDoclist = iTermOff + nExtra;
2609 pIter->iPgidxOff = iPgidx;
2611 fts5SegIterLoadRowid(p, pIter);
2612 fts5SegIterLoadNPos(p, pIter);
2615 static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){
2616 if( p->pIdxSelect==0 ){
2617 Fts5Config *pConfig = p->pConfig;
2618 fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf(
2619 "SELECT pgno FROM '%q'.'%q_idx' WHERE "
2620 "segid=? AND term<=? ORDER BY term DESC LIMIT 1",
2621 pConfig->zDb, pConfig->zName
2624 return p->pIdxSelect;
2628 ** Initialize the object pIter to point to term pTerm/nTerm within segment
2629 ** pSeg. If there is no such term in the index, the iterator is set to EOF.
2631 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
2632 ** an error has already occurred when this function is called, it is a no-op.
2634 static void fts5SegIterSeekInit(
2635 Fts5Index *p, /* FTS5 backend */
2636 const u8 *pTerm, int nTerm, /* Term to seek to */
2637 int flags, /* Mask of FTS5INDEX_XXX flags */
2638 Fts5StructureSegment *pSeg, /* Description of segment */
2639 Fts5SegIter *pIter /* Object to populate */
2641 int iPg = 1;
2642 int bGe = (flags & FTS5INDEX_QUERY_SCAN);
2643 int bDlidx = 0; /* True if there is a doclist-index */
2644 sqlite3_stmt *pIdxSelect = 0;
2646 assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 );
2647 assert( pTerm && nTerm );
2648 memset(pIter, 0, sizeof(*pIter));
2649 pIter->pSeg = pSeg;
2651 /* This block sets stack variable iPg to the leaf page number that may
2652 ** contain term (pTerm/nTerm), if it is present in the segment. */
2653 pIdxSelect = fts5IdxSelectStmt(p);
2654 if( p->rc ) return;
2655 sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid);
2656 sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC);
2657 if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){
2658 i64 val = sqlite3_column_int(pIdxSelect, 0);
2659 iPg = (int)(val>>1);
2660 bDlidx = (val & 0x0001);
2662 p->rc = sqlite3_reset(pIdxSelect);
2663 sqlite3_bind_null(pIdxSelect, 2);
2665 if( iPg<pSeg->pgnoFirst ){
2666 iPg = pSeg->pgnoFirst;
2667 bDlidx = 0;
2670 pIter->iLeafPgno = iPg - 1;
2671 fts5SegIterNextPage(p, pIter);
2673 if( pIter->pLeaf ){
2674 fts5LeafSeek(p, bGe, pIter, pTerm, nTerm);
2677 if( p->rc==SQLITE_OK && (bGe==0 || (flags & FTS5INDEX_QUERY_SCANONETERM)) ){
2678 pIter->flags |= FTS5_SEGITER_ONETERM;
2679 if( pIter->pLeaf ){
2680 if( flags & FTS5INDEX_QUERY_DESC ){
2681 pIter->flags |= FTS5_SEGITER_REVERSE;
2683 if( bDlidx ){
2684 fts5SegIterLoadDlidx(p, pIter);
2686 if( flags & FTS5INDEX_QUERY_DESC ){
2687 fts5SegIterReverse(p, pIter);
2692 fts5SegIterSetNext(p, pIter);
2693 if( 0==(flags & FTS5INDEX_QUERY_SCANONETERM) ){
2694 fts5SegIterAllocTombstone(p, pIter);
2697 /* Either:
2699 ** 1) an error has occurred, or
2700 ** 2) the iterator points to EOF, or
2701 ** 3) the iterator points to an entry with term (pTerm/nTerm), or
2702 ** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points
2703 ** to an entry with a term greater than or equal to (pTerm/nTerm).
2705 assert_nc( p->rc!=SQLITE_OK /* 1 */
2706 || pIter->pLeaf==0 /* 2 */
2707 || fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0 /* 3 */
2708 || (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0) /* 4 */
2714 ** SQL used by fts5SegIterNextInit() to find the page to open.
2716 static sqlite3_stmt *fts5IdxNextStmt(Fts5Index *p){
2717 if( p->pIdxNextSelect==0 ){
2718 Fts5Config *pConfig = p->pConfig;
2719 fts5IndexPrepareStmt(p, &p->pIdxNextSelect, sqlite3_mprintf(
2720 "SELECT pgno FROM '%q'.'%q_idx' WHERE "
2721 "segid=? AND term>? ORDER BY term ASC LIMIT 1",
2722 pConfig->zDb, pConfig->zName
2726 return p->pIdxNextSelect;
2730 ** This is similar to fts5SegIterSeekInit(), except that it initializes
2731 ** the segment iterator to point to the first term following the page
2732 ** with pToken/nToken on it.
2734 static void fts5SegIterNextInit(
2735 Fts5Index *p,
2736 const char *pTerm, int nTerm,
2737 Fts5StructureSegment *pSeg, /* Description of segment */
2738 Fts5SegIter *pIter /* Object to populate */
2740 int iPg = -1; /* Page of segment to open */
2741 int bDlidx = 0;
2742 sqlite3_stmt *pSel = 0; /* SELECT to find iPg */
2744 pSel = fts5IdxNextStmt(p);
2745 if( pSel ){
2746 assert( p->rc==SQLITE_OK );
2747 sqlite3_bind_int(pSel, 1, pSeg->iSegid);
2748 sqlite3_bind_blob(pSel, 2, pTerm, nTerm, SQLITE_STATIC);
2750 if( sqlite3_step(pSel)==SQLITE_ROW ){
2751 i64 val = sqlite3_column_int64(pSel, 0);
2752 iPg = (int)(val>>1);
2753 bDlidx = (val & 0x0001);
2755 p->rc = sqlite3_reset(pSel);
2756 sqlite3_bind_null(pSel, 2);
2757 if( p->rc ) return;
2760 memset(pIter, 0, sizeof(*pIter));
2761 pIter->pSeg = pSeg;
2762 pIter->flags |= FTS5_SEGITER_ONETERM;
2763 if( iPg>=0 ){
2764 pIter->iLeafPgno = iPg - 1;
2765 fts5SegIterNextPage(p, pIter);
2766 fts5SegIterSetNext(p, pIter);
2768 if( pIter->pLeaf ){
2769 const u8 *a = pIter->pLeaf->p;
2770 int iTermOff = 0;
2772 pIter->iPgidxOff = pIter->pLeaf->szLeaf;
2773 pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], iTermOff);
2774 pIter->iLeafOffset = iTermOff;
2775 fts5SegIterLoadTerm(p, pIter, 0);
2776 fts5SegIterLoadNPos(p, pIter);
2777 if( bDlidx ) fts5SegIterLoadDlidx(p, pIter);
2779 assert( p->rc!=SQLITE_OK ||
2780 fts5BufferCompareBlob(&pIter->term, (const u8*)pTerm, nTerm)>0
2786 ** Initialize the object pIter to point to term pTerm/nTerm within the
2787 ** in-memory hash table. If there is no such term in the hash-table, the
2788 ** iterator is set to EOF.
2790 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
2791 ** an error has already occurred when this function is called, it is a no-op.
2793 static void fts5SegIterHashInit(
2794 Fts5Index *p, /* FTS5 backend */
2795 const u8 *pTerm, int nTerm, /* Term to seek to */
2796 int flags, /* Mask of FTS5INDEX_XXX flags */
2797 Fts5SegIter *pIter /* Object to populate */
2799 int nList = 0;
2800 const u8 *z = 0;
2801 int n = 0;
2802 Fts5Data *pLeaf = 0;
2804 assert( p->pHash );
2805 assert( p->rc==SQLITE_OK );
2807 if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN) ){
2808 const u8 *pList = 0;
2810 p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm);
2811 sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &n, &pList, &nList);
2812 if( pList ){
2813 pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data));
2814 if( pLeaf ){
2815 pLeaf->p = (u8*)pList;
2819 /* The call to sqlite3Fts5HashScanInit() causes the hash table to
2820 ** fill the size field of all existing position lists. This means they
2821 ** can no longer be appended to. Since the only scenario in which they
2822 ** can be appended to is if the previous operation on this table was
2823 ** a DELETE, by clearing the Fts5Index.bDelete flag we can avoid this
2824 ** possibility altogether. */
2825 p->bDelete = 0;
2826 }else{
2827 p->rc = sqlite3Fts5HashQuery(p->pHash, sizeof(Fts5Data),
2828 (const char*)pTerm, nTerm, (void**)&pLeaf, &nList
2830 if( pLeaf ){
2831 pLeaf->p = (u8*)&pLeaf[1];
2833 z = pTerm;
2834 n = nTerm;
2835 pIter->flags |= FTS5_SEGITER_ONETERM;
2838 if( pLeaf ){
2839 sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z);
2840 pLeaf->nn = pLeaf->szLeaf = nList;
2841 pIter->pLeaf = pLeaf;
2842 pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid);
2843 pIter->iEndofDoclist = pLeaf->nn;
2845 if( flags & FTS5INDEX_QUERY_DESC ){
2846 pIter->flags |= FTS5_SEGITER_REVERSE;
2847 fts5SegIterReverseInitPage(p, pIter);
2848 }else{
2849 fts5SegIterLoadNPos(p, pIter);
2853 fts5SegIterSetNext(p, pIter);
2857 ** Array ap[] contains n elements. Release each of these elements using
2858 ** fts5DataRelease(). Then free the array itself using sqlite3_free().
2860 static void fts5IndexFreeArray(Fts5Data **ap, int n){
2861 if( ap ){
2862 int ii;
2863 for(ii=0; ii<n; ii++){
2864 fts5DataRelease(ap[ii]);
2866 sqlite3_free(ap);
2871 ** Decrement the ref-count of the object passed as the only argument. If it
2872 ** reaches 0, free it and its contents.
2874 static void fts5TombstoneArrayDelete(Fts5TombstoneArray *p){
2875 if( p ){
2876 p->nRef--;
2877 if( p->nRef<=0 ){
2878 int ii;
2879 for(ii=0; ii<p->nTombstone; ii++){
2880 fts5DataRelease(p->apTombstone[ii]);
2882 sqlite3_free(p);
2888 ** Zero the iterator passed as the only argument.
2890 static void fts5SegIterClear(Fts5SegIter *pIter){
2891 fts5BufferFree(&pIter->term);
2892 fts5DataRelease(pIter->pLeaf);
2893 fts5DataRelease(pIter->pNextLeaf);
2894 fts5TombstoneArrayDelete(pIter->pTombArray);
2895 fts5DlidxIterFree(pIter->pDlidx);
2896 sqlite3_free(pIter->aRowidOffset);
2897 memset(pIter, 0, sizeof(Fts5SegIter));
2900 #ifdef SQLITE_DEBUG
2903 ** This function is used as part of the big assert() procedure implemented by
2904 ** fts5AssertMultiIterSetup(). It ensures that the result currently stored
2905 ** in *pRes is the correct result of comparing the current positions of the
2906 ** two iterators.
2908 static void fts5AssertComparisonResult(
2909 Fts5Iter *pIter,
2910 Fts5SegIter *p1,
2911 Fts5SegIter *p2,
2912 Fts5CResult *pRes
2914 int i1 = p1 - pIter->aSeg;
2915 int i2 = p2 - pIter->aSeg;
2917 if( p1->pLeaf || p2->pLeaf ){
2918 if( p1->pLeaf==0 ){
2919 assert( pRes->iFirst==i2 );
2920 }else if( p2->pLeaf==0 ){
2921 assert( pRes->iFirst==i1 );
2922 }else{
2923 int nMin = MIN(p1->term.n, p2->term.n);
2924 int res = fts5Memcmp(p1->term.p, p2->term.p, nMin);
2925 if( res==0 ) res = p1->term.n - p2->term.n;
2927 if( res==0 ){
2928 assert( pRes->bTermEq==1 );
2929 assert( p1->iRowid!=p2->iRowid );
2930 res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1;
2931 }else{
2932 assert( pRes->bTermEq==0 );
2935 if( res<0 ){
2936 assert( pRes->iFirst==i1 );
2937 }else{
2938 assert( pRes->iFirst==i2 );
2945 ** This function is a no-op unless SQLITE_DEBUG is defined when this module
2946 ** is compiled. In that case, this function is essentially an assert()
2947 ** statement used to verify that the contents of the pIter->aFirst[] array
2948 ** are correct.
2950 static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){
2951 if( p->rc==SQLITE_OK ){
2952 Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
2953 int i;
2955 assert( (pFirst->pLeaf==0)==pIter->base.bEof );
2957 /* Check that pIter->iSwitchRowid is set correctly. */
2958 for(i=0; i<pIter->nSeg; i++){
2959 Fts5SegIter *p1 = &pIter->aSeg[i];
2960 assert( p1==pFirst
2961 || p1->pLeaf==0
2962 || fts5BufferCompare(&pFirst->term, &p1->term)
2963 || p1->iRowid==pIter->iSwitchRowid
2964 || (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev
2968 for(i=0; i<pIter->nSeg; i+=2){
2969 Fts5SegIter *p1 = &pIter->aSeg[i];
2970 Fts5SegIter *p2 = &pIter->aSeg[i+1];
2971 Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2];
2972 fts5AssertComparisonResult(pIter, p1, p2, pRes);
2975 for(i=1; i<(pIter->nSeg / 2); i+=2){
2976 Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ];
2977 Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ];
2978 Fts5CResult *pRes = &pIter->aFirst[i];
2979 fts5AssertComparisonResult(pIter, p1, p2, pRes);
2983 #else
2984 # define fts5AssertMultiIterSetup(x,y)
2985 #endif
2988 ** Do the comparison necessary to populate pIter->aFirst[iOut].
2990 ** If the returned value is non-zero, then it is the index of an entry
2991 ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing
2992 ** to a key that is a duplicate of another, higher priority,
2993 ** segment-iterator in the pSeg->aSeg[] array.
2995 static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){
2996 int i1; /* Index of left-hand Fts5SegIter */
2997 int i2; /* Index of right-hand Fts5SegIter */
2998 int iRes;
2999 Fts5SegIter *p1; /* Left-hand Fts5SegIter */
3000 Fts5SegIter *p2; /* Right-hand Fts5SegIter */
3001 Fts5CResult *pRes = &pIter->aFirst[iOut];
3003 assert( iOut<pIter->nSeg && iOut>0 );
3004 assert( pIter->bRev==0 || pIter->bRev==1 );
3006 if( iOut>=(pIter->nSeg/2) ){
3007 i1 = (iOut - pIter->nSeg/2) * 2;
3008 i2 = i1 + 1;
3009 }else{
3010 i1 = pIter->aFirst[iOut*2].iFirst;
3011 i2 = pIter->aFirst[iOut*2+1].iFirst;
3013 p1 = &pIter->aSeg[i1];
3014 p2 = &pIter->aSeg[i2];
3016 pRes->bTermEq = 0;
3017 if( p1->pLeaf==0 ){ /* If p1 is at EOF */
3018 iRes = i2;
3019 }else if( p2->pLeaf==0 ){ /* If p2 is at EOF */
3020 iRes = i1;
3021 }else{
3022 int res = fts5BufferCompare(&p1->term, &p2->term);
3023 if( res==0 ){
3024 assert_nc( i2>i1 );
3025 assert_nc( i2!=0 );
3026 pRes->bTermEq = 1;
3027 if( p1->iRowid==p2->iRowid ){
3028 return i2;
3030 res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1;
3032 assert( res!=0 );
3033 if( res<0 ){
3034 iRes = i1;
3035 }else{
3036 iRes = i2;
3040 pRes->iFirst = (u16)iRes;
3041 return 0;
3045 ** Move the seg-iter so that it points to the first rowid on page iLeafPgno.
3046 ** It is an error if leaf iLeafPgno does not exist. Unless the db is
3047 ** a 'secure-delete' db, if it contains no rowids then this is also an error.
3049 static void fts5SegIterGotoPage(
3050 Fts5Index *p, /* FTS5 backend object */
3051 Fts5SegIter *pIter, /* Iterator to advance */
3052 int iLeafPgno
3054 assert( iLeafPgno>pIter->iLeafPgno );
3056 if( iLeafPgno>pIter->pSeg->pgnoLast ){
3057 p->rc = FTS5_CORRUPT;
3058 }else{
3059 fts5DataRelease(pIter->pNextLeaf);
3060 pIter->pNextLeaf = 0;
3061 pIter->iLeafPgno = iLeafPgno-1;
3063 while( p->rc==SQLITE_OK ){
3064 int iOff;
3065 fts5SegIterNextPage(p, pIter);
3066 if( pIter->pLeaf==0 ) break;
3067 iOff = fts5LeafFirstRowidOff(pIter->pLeaf);
3068 if( iOff>0 ){
3069 u8 *a = pIter->pLeaf->p;
3070 int n = pIter->pLeaf->szLeaf;
3071 if( iOff<4 || iOff>=n ){
3072 p->rc = FTS5_CORRUPT;
3073 }else{
3074 iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
3075 pIter->iLeafOffset = iOff;
3076 fts5SegIterLoadNPos(p, pIter);
3078 break;
3085 ** Advance the iterator passed as the second argument until it is at or
3086 ** past rowid iFrom. Regardless of the value of iFrom, the iterator is
3087 ** always advanced at least once.
3089 static void fts5SegIterNextFrom(
3090 Fts5Index *p, /* FTS5 backend object */
3091 Fts5SegIter *pIter, /* Iterator to advance */
3092 i64 iMatch /* Advance iterator at least this far */
3094 int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
3095 Fts5DlidxIter *pDlidx = pIter->pDlidx;
3096 int iLeafPgno = pIter->iLeafPgno;
3097 int bMove = 1;
3099 assert( pIter->flags & FTS5_SEGITER_ONETERM );
3100 assert( pIter->pDlidx );
3101 assert( pIter->pLeaf );
3103 if( bRev==0 ){
3104 while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){
3105 iLeafPgno = fts5DlidxIterPgno(pDlidx);
3106 fts5DlidxIterNext(p, pDlidx);
3108 assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc );
3109 if( iLeafPgno>pIter->iLeafPgno ){
3110 fts5SegIterGotoPage(p, pIter, iLeafPgno);
3111 bMove = 0;
3113 }else{
3114 assert( pIter->pNextLeaf==0 );
3115 assert( iMatch<pIter->iRowid );
3116 while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){
3117 fts5DlidxIterPrev(p, pDlidx);
3119 iLeafPgno = fts5DlidxIterPgno(pDlidx);
3121 assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno );
3123 if( iLeafPgno<pIter->iLeafPgno ){
3124 pIter->iLeafPgno = iLeafPgno+1;
3125 fts5SegIterReverseNewPage(p, pIter);
3126 bMove = 0;
3131 if( bMove && p->rc==SQLITE_OK ) pIter->xNext(p, pIter, 0);
3132 if( pIter->pLeaf==0 ) break;
3133 if( bRev==0 && pIter->iRowid>=iMatch ) break;
3134 if( bRev!=0 && pIter->iRowid<=iMatch ) break;
3135 bMove = 1;
3136 }while( p->rc==SQLITE_OK );
3140 ** Free the iterator object passed as the second argument.
3142 static void fts5MultiIterFree(Fts5Iter *pIter){
3143 if( pIter ){
3144 int i;
3145 for(i=0; i<pIter->nSeg; i++){
3146 fts5SegIterClear(&pIter->aSeg[i]);
3148 fts5BufferFree(&pIter->poslist);
3149 sqlite3_free(pIter);
3153 static void fts5MultiIterAdvanced(
3154 Fts5Index *p, /* FTS5 backend to iterate within */
3155 Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
3156 int iChanged, /* Index of sub-iterator just advanced */
3157 int iMinset /* Minimum entry in aFirst[] to set */
3159 int i;
3160 for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){
3161 int iEq;
3162 if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){
3163 Fts5SegIter *pSeg = &pIter->aSeg[iEq];
3164 assert( p->rc==SQLITE_OK );
3165 pSeg->xNext(p, pSeg, 0);
3166 i = pIter->nSeg + iEq;
3172 ** Sub-iterator iChanged of iterator pIter has just been advanced. It still
3173 ** points to the same term though - just a different rowid. This function
3174 ** attempts to update the contents of the pIter->aFirst[] accordingly.
3175 ** If it does so successfully, 0 is returned. Otherwise 1.
3177 ** If non-zero is returned, the caller should call fts5MultiIterAdvanced()
3178 ** on the iterator instead. That function does the same as this one, except
3179 ** that it deals with more complicated cases as well.
3181 static int fts5MultiIterAdvanceRowid(
3182 Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
3183 int iChanged, /* Index of sub-iterator just advanced */
3184 Fts5SegIter **ppFirst
3186 Fts5SegIter *pNew = &pIter->aSeg[iChanged];
3188 if( pNew->iRowid==pIter->iSwitchRowid
3189 || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev
3191 int i;
3192 Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001];
3193 pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64 : LARGEST_INT64;
3194 for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){
3195 Fts5CResult *pRes = &pIter->aFirst[i];
3197 assert( pNew->pLeaf );
3198 assert( pRes->bTermEq==0 || pOther->pLeaf );
3200 if( pRes->bTermEq ){
3201 if( pNew->iRowid==pOther->iRowid ){
3202 return 1;
3203 }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){
3204 pIter->iSwitchRowid = pOther->iRowid;
3205 pNew = pOther;
3206 }else if( (pOther->iRowid>pIter->iSwitchRowid)==pIter->bRev ){
3207 pIter->iSwitchRowid = pOther->iRowid;
3210 pRes->iFirst = (u16)(pNew - pIter->aSeg);
3211 if( i==1 ) break;
3213 pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ];
3217 *ppFirst = pNew;
3218 return 0;
3222 ** Set the pIter->bEof variable based on the state of the sub-iterators.
3224 static void fts5MultiIterSetEof(Fts5Iter *pIter){
3225 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
3226 pIter->base.bEof = pSeg->pLeaf==0;
3227 pIter->iSwitchRowid = pSeg->iRowid;
3231 ** The argument to this macro must be an Fts5Data structure containing a
3232 ** tombstone hash page. This macro returns the key-size of the hash-page.
3234 #define TOMBSTONE_KEYSIZE(pPg) (pPg->p[0]==4 ? 4 : 8)
3236 #define TOMBSTONE_NSLOT(pPg) \
3237 ((pPg->nn > 16) ? ((pPg->nn-8) / TOMBSTONE_KEYSIZE(pPg)) : 1)
3240 ** Query a single tombstone hash table for rowid iRowid. Return true if
3241 ** it is found or false otherwise. The tombstone hash table is one of
3242 ** nHashTable tables.
3244 static int fts5IndexTombstoneQuery(
3245 Fts5Data *pHash, /* Hash table page to query */
3246 int nHashTable, /* Number of pages attached to segment */
3247 u64 iRowid /* Rowid to query hash for */
3249 const int szKey = TOMBSTONE_KEYSIZE(pHash);
3250 const int nSlot = TOMBSTONE_NSLOT(pHash);
3251 int iSlot = (iRowid / nHashTable) % nSlot;
3252 int nCollide = nSlot;
3254 if( iRowid==0 ){
3255 return pHash->p[1];
3256 }else if( szKey==4 ){
3257 u32 *aSlot = (u32*)&pHash->p[8];
3258 while( aSlot[iSlot] ){
3259 if( fts5GetU32((u8*)&aSlot[iSlot])==iRowid ) return 1;
3260 if( nCollide--==0 ) break;
3261 iSlot = (iSlot+1)%nSlot;
3263 }else{
3264 u64 *aSlot = (u64*)&pHash->p[8];
3265 while( aSlot[iSlot] ){
3266 if( fts5GetU64((u8*)&aSlot[iSlot])==iRowid ) return 1;
3267 if( nCollide--==0 ) break;
3268 iSlot = (iSlot+1)%nSlot;
3272 return 0;
3276 ** Return true if the iterator passed as the only argument points
3277 ** to an segment entry for which there is a tombstone. Return false
3278 ** if there is no tombstone or if the iterator is already at EOF.
3280 static int fts5MultiIterIsDeleted(Fts5Iter *pIter){
3281 int iFirst = pIter->aFirst[1].iFirst;
3282 Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
3283 Fts5TombstoneArray *pArray = pSeg->pTombArray;
3285 if( pSeg->pLeaf && pArray ){
3286 /* Figure out which page the rowid might be present on. */
3287 int iPg = ((u64)pSeg->iRowid) % pArray->nTombstone;
3288 assert( iPg>=0 );
3290 /* If tombstone hash page iPg has not yet been loaded from the
3291 ** database, load it now. */
3292 if( pArray->apTombstone[iPg]==0 ){
3293 pArray->apTombstone[iPg] = fts5DataRead(pIter->pIndex,
3294 FTS5_TOMBSTONE_ROWID(pSeg->pSeg->iSegid, iPg)
3296 if( pArray->apTombstone[iPg]==0 ) return 0;
3299 return fts5IndexTombstoneQuery(
3300 pArray->apTombstone[iPg],
3301 pArray->nTombstone,
3302 pSeg->iRowid
3306 return 0;
3310 ** Move the iterator to the next entry.
3312 ** If an error occurs, an error code is left in Fts5Index.rc. It is not
3313 ** considered an error if the iterator reaches EOF, or if it is already at
3314 ** EOF when this function is called.
3316 static void fts5MultiIterNext(
3317 Fts5Index *p,
3318 Fts5Iter *pIter,
3319 int bFrom, /* True if argument iFrom is valid */
3320 i64 iFrom /* Advance at least as far as this */
3322 int bUseFrom = bFrom;
3323 assert( pIter->base.bEof==0 );
3324 while( p->rc==SQLITE_OK ){
3325 int iFirst = pIter->aFirst[1].iFirst;
3326 int bNewTerm = 0;
3327 Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
3328 assert( p->rc==SQLITE_OK );
3329 if( bUseFrom && pSeg->pDlidx ){
3330 fts5SegIterNextFrom(p, pSeg, iFrom);
3331 }else{
3332 pSeg->xNext(p, pSeg, &bNewTerm);
3335 if( pSeg->pLeaf==0 || bNewTerm
3336 || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
3338 fts5MultiIterAdvanced(p, pIter, iFirst, 1);
3339 fts5MultiIterSetEof(pIter);
3340 pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
3341 if( pSeg->pLeaf==0 ) return;
3344 fts5AssertMultiIterSetup(p, pIter);
3345 assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf );
3346 if( (pIter->bSkipEmpty==0 || pSeg->nPos)
3347 && 0==fts5MultiIterIsDeleted(pIter)
3349 pIter->xSetOutputs(pIter, pSeg);
3350 return;
3352 bUseFrom = 0;
3356 static void fts5MultiIterNext2(
3357 Fts5Index *p,
3358 Fts5Iter *pIter,
3359 int *pbNewTerm /* OUT: True if *might* be new term */
3361 assert( pIter->bSkipEmpty );
3362 if( p->rc==SQLITE_OK ){
3363 *pbNewTerm = 0;
3365 int iFirst = pIter->aFirst[1].iFirst;
3366 Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
3367 int bNewTerm = 0;
3369 assert( p->rc==SQLITE_OK );
3370 pSeg->xNext(p, pSeg, &bNewTerm);
3371 if( pSeg->pLeaf==0 || bNewTerm
3372 || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
3374 fts5MultiIterAdvanced(p, pIter, iFirst, 1);
3375 fts5MultiIterSetEof(pIter);
3376 *pbNewTerm = 1;
3378 fts5AssertMultiIterSetup(p, pIter);
3380 }while( (fts5MultiIterIsEmpty(p, pIter) || fts5MultiIterIsDeleted(pIter))
3381 && (p->rc==SQLITE_OK)
3386 static void fts5IterSetOutputs_Noop(Fts5Iter *pUnused1, Fts5SegIter *pUnused2){
3387 UNUSED_PARAM2(pUnused1, pUnused2);
3390 static Fts5Iter *fts5MultiIterAlloc(
3391 Fts5Index *p, /* FTS5 backend to iterate within */
3392 int nSeg
3394 Fts5Iter *pNew;
3395 i64 nSlot; /* Power of two >= nSeg */
3397 for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2);
3398 pNew = fts5IdxMalloc(p,
3399 sizeof(Fts5Iter) + /* pNew */
3400 sizeof(Fts5SegIter) * (nSlot-1) + /* pNew->aSeg[] */
3401 sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */
3403 if( pNew ){
3404 pNew->nSeg = nSlot;
3405 pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot];
3406 pNew->pIndex = p;
3407 pNew->xSetOutputs = fts5IterSetOutputs_Noop;
3409 return pNew;
3412 static void fts5PoslistCallback(
3413 Fts5Index *pUnused,
3414 void *pContext,
3415 const u8 *pChunk, int nChunk
3417 UNUSED_PARAM(pUnused);
3418 assert_nc( nChunk>=0 );
3419 if( nChunk>0 ){
3420 fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk);
3424 typedef struct PoslistCallbackCtx PoslistCallbackCtx;
3425 struct PoslistCallbackCtx {
3426 Fts5Buffer *pBuf; /* Append to this buffer */
3427 Fts5Colset *pColset; /* Restrict matches to this column */
3428 int eState; /* See above */
3431 typedef struct PoslistOffsetsCtx PoslistOffsetsCtx;
3432 struct PoslistOffsetsCtx {
3433 Fts5Buffer *pBuf; /* Append to this buffer */
3434 Fts5Colset *pColset; /* Restrict matches to this column */
3435 int iRead;
3436 int iWrite;
3440 ** TODO: Make this more efficient!
3442 static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){
3443 int i;
3444 for(i=0; i<pColset->nCol; i++){
3445 if( pColset->aiCol[i]==iCol ) return 1;
3447 return 0;
3450 static void fts5PoslistOffsetsCallback(
3451 Fts5Index *pUnused,
3452 void *pContext,
3453 const u8 *pChunk, int nChunk
3455 PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext;
3456 UNUSED_PARAM(pUnused);
3457 assert_nc( nChunk>=0 );
3458 if( nChunk>0 ){
3459 int i = 0;
3460 while( i<nChunk ){
3461 int iVal;
3462 i += fts5GetVarint32(&pChunk[i], iVal);
3463 iVal += pCtx->iRead - 2;
3464 pCtx->iRead = iVal;
3465 if( fts5IndexColsetTest(pCtx->pColset, iVal) ){
3466 fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite);
3467 pCtx->iWrite = iVal;
3473 static void fts5PoslistFilterCallback(
3474 Fts5Index *pUnused,
3475 void *pContext,
3476 const u8 *pChunk, int nChunk
3478 PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext;
3479 UNUSED_PARAM(pUnused);
3480 assert_nc( nChunk>=0 );
3481 if( nChunk>0 ){
3482 /* Search through to find the first varint with value 1. This is the
3483 ** start of the next columns hits. */
3484 int i = 0;
3485 int iStart = 0;
3487 if( pCtx->eState==2 ){
3488 int iCol;
3489 fts5FastGetVarint32(pChunk, i, iCol);
3490 if( fts5IndexColsetTest(pCtx->pColset, iCol) ){
3491 pCtx->eState = 1;
3492 fts5BufferSafeAppendVarint(pCtx->pBuf, 1);
3493 }else{
3494 pCtx->eState = 0;
3498 do {
3499 while( i<nChunk && pChunk[i]!=0x01 ){
3500 while( pChunk[i] & 0x80 ) i++;
3501 i++;
3503 if( pCtx->eState ){
3504 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
3506 if( i<nChunk ){
3507 int iCol;
3508 iStart = i;
3509 i++;
3510 if( i>=nChunk ){
3511 pCtx->eState = 2;
3512 }else{
3513 fts5FastGetVarint32(pChunk, i, iCol);
3514 pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
3515 if( pCtx->eState ){
3516 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
3517 iStart = i;
3521 }while( i<nChunk );
3525 static void fts5ChunkIterate(
3526 Fts5Index *p, /* Index object */
3527 Fts5SegIter *pSeg, /* Poslist of this iterator */
3528 void *pCtx, /* Context pointer for xChunk callback */
3529 void (*xChunk)(Fts5Index*, void*, const u8*, int)
3531 int nRem = pSeg->nPos; /* Number of bytes still to come */
3532 Fts5Data *pData = 0;
3533 u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3534 int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
3535 int pgno = pSeg->iLeafPgno;
3536 int pgnoSave = 0;
3538 /* This function does not work with detail=none databases. */
3539 assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
3541 if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
3542 pgnoSave = pgno+1;
3545 while( 1 ){
3546 xChunk(p, pCtx, pChunk, nChunk);
3547 nRem -= nChunk;
3548 fts5DataRelease(pData);
3549 if( nRem<=0 ){
3550 break;
3551 }else if( pSeg->pSeg==0 ){
3552 p->rc = FTS5_CORRUPT;
3553 return;
3554 }else{
3555 pgno++;
3556 pData = fts5LeafRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno));
3557 if( pData==0 ) break;
3558 pChunk = &pData->p[4];
3559 nChunk = MIN(nRem, pData->szLeaf - 4);
3560 if( pgno==pgnoSave ){
3561 assert( pSeg->pNextLeaf==0 );
3562 pSeg->pNextLeaf = pData;
3563 pData = 0;
3570 ** Iterator pIter currently points to a valid entry (not EOF). This
3571 ** function appends the position list data for the current entry to
3572 ** buffer pBuf. It does not make a copy of the position-list size
3573 ** field.
3575 static void fts5SegiterPoslist(
3576 Fts5Index *p,
3577 Fts5SegIter *pSeg,
3578 Fts5Colset *pColset,
3579 Fts5Buffer *pBuf
3581 assert( pBuf!=0 );
3582 assert( pSeg!=0 );
3583 if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos+FTS5_DATA_ZERO_PADDING) ){
3584 assert( pBuf->p!=0 );
3585 assert( pBuf->nSpace >= pBuf->n+pSeg->nPos+FTS5_DATA_ZERO_PADDING );
3586 memset(&pBuf->p[pBuf->n+pSeg->nPos], 0, FTS5_DATA_ZERO_PADDING);
3587 if( pColset==0 ){
3588 fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
3589 }else{
3590 if( p->pConfig->eDetail==FTS5_DETAIL_FULL ){
3591 PoslistCallbackCtx sCtx;
3592 sCtx.pBuf = pBuf;
3593 sCtx.pColset = pColset;
3594 sCtx.eState = fts5IndexColsetTest(pColset, 0);
3595 assert( sCtx.eState==0 || sCtx.eState==1 );
3596 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
3597 }else{
3598 PoslistOffsetsCtx sCtx;
3599 memset(&sCtx, 0, sizeof(sCtx));
3600 sCtx.pBuf = pBuf;
3601 sCtx.pColset = pColset;
3602 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback);
3609 ** Parameter pPos points to a buffer containing a position list, size nPos.
3610 ** This function filters it according to pColset (which must be non-NULL)
3611 ** and sets pIter->base.pData/nData to point to the new position list.
3612 ** If memory is required for the new position list, use buffer pIter->poslist.
3613 ** Or, if the new position list is a contiguous subset of the input, set
3614 ** pIter->base.pData/nData to point directly to it.
3616 ** This function is a no-op if *pRc is other than SQLITE_OK when it is
3617 ** called. If an OOM error is encountered, *pRc is set to SQLITE_NOMEM
3618 ** before returning.
3620 static void fts5IndexExtractColset(
3621 int *pRc,
3622 Fts5Colset *pColset, /* Colset to filter on */
3623 const u8 *pPos, int nPos, /* Position list */
3624 Fts5Iter *pIter
3626 if( *pRc==SQLITE_OK ){
3627 const u8 *p = pPos;
3628 const u8 *aCopy = p;
3629 const u8 *pEnd = &p[nPos]; /* One byte past end of position list */
3630 int i = 0;
3631 int iCurrent = 0;
3633 if( pColset->nCol>1 && sqlite3Fts5BufferSize(pRc, &pIter->poslist, nPos) ){
3634 return;
3637 while( 1 ){
3638 while( pColset->aiCol[i]<iCurrent ){
3639 i++;
3640 if( i==pColset->nCol ){
3641 pIter->base.pData = pIter->poslist.p;
3642 pIter->base.nData = pIter->poslist.n;
3643 return;
3647 /* Advance pointer p until it points to pEnd or an 0x01 byte that is
3648 ** not part of a varint */
3649 while( p<pEnd && *p!=0x01 ){
3650 while( *p++ & 0x80 );
3653 if( pColset->aiCol[i]==iCurrent ){
3654 if( pColset->nCol==1 ){
3655 pIter->base.pData = aCopy;
3656 pIter->base.nData = p-aCopy;
3657 return;
3659 fts5BufferSafeAppendBlob(&pIter->poslist, aCopy, p-aCopy);
3661 if( p>=pEnd ){
3662 pIter->base.pData = pIter->poslist.p;
3663 pIter->base.nData = pIter->poslist.n;
3664 return;
3666 aCopy = p++;
3667 iCurrent = *p++;
3668 if( iCurrent & 0x80 ){
3669 p--;
3670 p += fts5GetVarint32(p, iCurrent);
3678 ** xSetOutputs callback used by detail=none tables.
3680 static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){
3681 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE );
3682 pIter->base.iRowid = pSeg->iRowid;
3683 pIter->base.nData = pSeg->nPos;
3687 ** xSetOutputs callback used by detail=full and detail=col tables when no
3688 ** column filters are specified.
3690 static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){
3691 pIter->base.iRowid = pSeg->iRowid;
3692 pIter->base.nData = pSeg->nPos;
3694 assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE );
3695 assert( pIter->pColset==0 );
3697 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
3698 /* All data is stored on the current page. Populate the output
3699 ** variables to point into the body of the page object. */
3700 pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3701 }else{
3702 /* The data is distributed over two or more pages. Copy it into the
3703 ** Fts5Iter.poslist buffer and then set the output pointer to point
3704 ** to this buffer. */
3705 fts5BufferZero(&pIter->poslist);
3706 fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist);
3707 pIter->base.pData = pIter->poslist.p;
3712 ** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match
3713 ** against no columns at all).
3715 static void fts5IterSetOutputs_ZeroColset(Fts5Iter *pIter, Fts5SegIter *pSeg){
3716 UNUSED_PARAM(pSeg);
3717 pIter->base.nData = 0;
3721 ** xSetOutputs callback used by detail=col when there is a column filter
3722 ** and there are 100 or more columns. Also called as a fallback from
3723 ** fts5IterSetOutputs_Col100 if the column-list spans more than one page.
3725 static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){
3726 fts5BufferZero(&pIter->poslist);
3727 fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist);
3728 pIter->base.iRowid = pSeg->iRowid;
3729 pIter->base.pData = pIter->poslist.p;
3730 pIter->base.nData = pIter->poslist.n;
3734 ** xSetOutputs callback used when:
3736 ** * detail=col,
3737 ** * there is a column filter, and
3738 ** * the table contains 100 or fewer columns.
3740 ** The last point is to ensure all column numbers are stored as
3741 ** single-byte varints.
3743 static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){
3745 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS );
3746 assert( pIter->pColset );
3748 if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){
3749 fts5IterSetOutputs_Col(pIter, pSeg);
3750 }else{
3751 u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset];
3752 u8 *pEnd = (u8*)&a[pSeg->nPos];
3753 int iPrev = 0;
3754 int *aiCol = pIter->pColset->aiCol;
3755 int *aiColEnd = &aiCol[pIter->pColset->nCol];
3757 u8 *aOut = pIter->poslist.p;
3758 int iPrevOut = 0;
3760 pIter->base.iRowid = pSeg->iRowid;
3762 while( a<pEnd ){
3763 iPrev += (int)a++[0] - 2;
3764 while( *aiCol<iPrev ){
3765 aiCol++;
3766 if( aiCol==aiColEnd ) goto setoutputs_col_out;
3768 if( *aiCol==iPrev ){
3769 *aOut++ = (u8)((iPrev - iPrevOut) + 2);
3770 iPrevOut = iPrev;
3774 setoutputs_col_out:
3775 pIter->base.pData = pIter->poslist.p;
3776 pIter->base.nData = aOut - pIter->poslist.p;
3781 ** xSetOutputs callback used by detail=full when there is a column filter.
3783 static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){
3784 Fts5Colset *pColset = pIter->pColset;
3785 pIter->base.iRowid = pSeg->iRowid;
3787 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL );
3788 assert( pColset );
3790 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
3791 /* All data is stored on the current page. Populate the output
3792 ** variables to point into the body of the page object. */
3793 const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3794 int *pRc = &pIter->pIndex->rc;
3795 fts5BufferZero(&pIter->poslist);
3796 fts5IndexExtractColset(pRc, pColset, a, pSeg->nPos, pIter);
3797 }else{
3798 /* The data is distributed over two or more pages. Copy it into the
3799 ** Fts5Iter.poslist buffer and then set the output pointer to point
3800 ** to this buffer. */
3801 fts5BufferZero(&pIter->poslist);
3802 fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
3803 pIter->base.pData = pIter->poslist.p;
3804 pIter->base.nData = pIter->poslist.n;
3808 static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){
3809 assert( pIter!=0 || (*pRc)!=SQLITE_OK );
3810 if( *pRc==SQLITE_OK ){
3811 Fts5Config *pConfig = pIter->pIndex->pConfig;
3812 if( pConfig->eDetail==FTS5_DETAIL_NONE ){
3813 pIter->xSetOutputs = fts5IterSetOutputs_None;
3816 else if( pIter->pColset==0 ){
3817 pIter->xSetOutputs = fts5IterSetOutputs_Nocolset;
3820 else if( pIter->pColset->nCol==0 ){
3821 pIter->xSetOutputs = fts5IterSetOutputs_ZeroColset;
3824 else if( pConfig->eDetail==FTS5_DETAIL_FULL ){
3825 pIter->xSetOutputs = fts5IterSetOutputs_Full;
3828 else{
3829 assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS );
3830 if( pConfig->nCol<=100 ){
3831 pIter->xSetOutputs = fts5IterSetOutputs_Col100;
3832 sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol);
3833 }else{
3834 pIter->xSetOutputs = fts5IterSetOutputs_Col;
3841 ** All the component segment-iterators of pIter have been set up. This
3842 ** functions finishes setup for iterator pIter itself.
3844 static void fts5MultiIterFinishSetup(Fts5Index *p, Fts5Iter *pIter){
3845 int iIter;
3846 for(iIter=pIter->nSeg-1; iIter>0; iIter--){
3847 int iEq;
3848 if( (iEq = fts5MultiIterDoCompare(pIter, iIter)) ){
3849 Fts5SegIter *pSeg = &pIter->aSeg[iEq];
3850 if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0);
3851 fts5MultiIterAdvanced(p, pIter, iEq, iIter);
3854 fts5MultiIterSetEof(pIter);
3855 fts5AssertMultiIterSetup(p, pIter);
3857 if( (pIter->bSkipEmpty && fts5MultiIterIsEmpty(p, pIter))
3858 || fts5MultiIterIsDeleted(pIter)
3860 fts5MultiIterNext(p, pIter, 0, 0);
3861 }else if( pIter->base.bEof==0 ){
3862 Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
3863 pIter->xSetOutputs(pIter, pSeg);
3868 ** Allocate a new Fts5Iter object.
3870 ** The new object will be used to iterate through data in structure pStruct.
3871 ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel
3872 ** is zero or greater, data from the first nSegment segments on level iLevel
3873 ** is merged.
3875 ** The iterator initially points to the first term/rowid entry in the
3876 ** iterated data.
3878 static void fts5MultiIterNew(
3879 Fts5Index *p, /* FTS5 backend to iterate within */
3880 Fts5Structure *pStruct, /* Structure of specific index */
3881 int flags, /* FTS5INDEX_QUERY_XXX flags */
3882 Fts5Colset *pColset, /* Colset to filter on (or NULL) */
3883 const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */
3884 int iLevel, /* Level to iterate (-1 for all) */
3885 int nSegment, /* Number of segments to merge (iLevel>=0) */
3886 Fts5Iter **ppOut /* New object */
3888 int nSeg = 0; /* Number of segment-iters in use */
3889 int iIter = 0; /* */
3890 int iSeg; /* Used to iterate through segments */
3891 Fts5StructureLevel *pLvl;
3892 Fts5Iter *pNew;
3894 assert( (pTerm==0 && nTerm==0) || iLevel<0 );
3896 /* Allocate space for the new multi-seg-iterator. */
3897 if( p->rc==SQLITE_OK ){
3898 if( iLevel<0 ){
3899 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
3900 nSeg = pStruct->nSegment;
3901 nSeg += (p->pHash && 0==(flags & FTS5INDEX_QUERY_SKIPHASH));
3902 }else{
3903 nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment);
3906 *ppOut = pNew = fts5MultiIterAlloc(p, nSeg);
3907 if( pNew==0 ){
3908 assert( p->rc!=SQLITE_OK );
3909 goto fts5MultiIterNew_post_check;
3911 pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC));
3912 pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY));
3913 pNew->pColset = pColset;
3914 if( (flags & FTS5INDEX_QUERY_NOOUTPUT)==0 ){
3915 fts5IterSetOutputCb(&p->rc, pNew);
3918 /* Initialize each of the component segment iterators. */
3919 if( p->rc==SQLITE_OK ){
3920 if( iLevel<0 ){
3921 Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel];
3922 if( p->pHash && 0==(flags & FTS5INDEX_QUERY_SKIPHASH) ){
3923 /* Add a segment iterator for the current contents of the hash table. */
3924 Fts5SegIter *pIter = &pNew->aSeg[iIter++];
3925 fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter);
3927 for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){
3928 for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){
3929 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
3930 Fts5SegIter *pIter = &pNew->aSeg[iIter++];
3931 if( pTerm==0 ){
3932 fts5SegIterInit(p, pSeg, pIter);
3933 }else{
3934 fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter);
3938 }else{
3939 pLvl = &pStruct->aLevel[iLevel];
3940 for(iSeg=nSeg-1; iSeg>=0; iSeg--){
3941 fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]);
3944 assert( iIter==nSeg );
3947 /* If the above was successful, each component iterator now points
3948 ** to the first entry in its segment. In this case initialize the
3949 ** aFirst[] array. Or, if an error has occurred, free the iterator
3950 ** object and set the output variable to NULL. */
3951 if( p->rc==SQLITE_OK ){
3952 fts5MultiIterFinishSetup(p, pNew);
3953 }else{
3954 fts5MultiIterFree(pNew);
3955 *ppOut = 0;
3958 fts5MultiIterNew_post_check:
3959 assert( (*ppOut)!=0 || p->rc!=SQLITE_OK );
3960 return;
3964 ** Create an Fts5Iter that iterates through the doclist provided
3965 ** as the second argument.
3967 static void fts5MultiIterNew2(
3968 Fts5Index *p, /* FTS5 backend to iterate within */
3969 Fts5Data *pData, /* Doclist to iterate through */
3970 int bDesc, /* True for descending rowid order */
3971 Fts5Iter **ppOut /* New object */
3973 Fts5Iter *pNew;
3974 pNew = fts5MultiIterAlloc(p, 2);
3975 if( pNew ){
3976 Fts5SegIter *pIter = &pNew->aSeg[1];
3977 pIter->flags = FTS5_SEGITER_ONETERM;
3978 if( pData->szLeaf>0 ){
3979 pIter->pLeaf = pData;
3980 pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
3981 pIter->iEndofDoclist = pData->nn;
3982 pNew->aFirst[1].iFirst = 1;
3983 if( bDesc ){
3984 pNew->bRev = 1;
3985 pIter->flags |= FTS5_SEGITER_REVERSE;
3986 fts5SegIterReverseInitPage(p, pIter);
3987 }else{
3988 fts5SegIterLoadNPos(p, pIter);
3990 pData = 0;
3991 }else{
3992 pNew->base.bEof = 1;
3994 fts5SegIterSetNext(p, pIter);
3996 *ppOut = pNew;
3999 fts5DataRelease(pData);
4003 ** Return true if the iterator is at EOF or if an error has occurred.
4004 ** False otherwise.
4006 static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){
4007 assert( pIter!=0 || p->rc!=SQLITE_OK );
4008 assert( p->rc!=SQLITE_OK
4009 || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof
4011 return (p->rc || pIter->base.bEof);
4015 ** Return the rowid of the entry that the iterator currently points
4016 ** to. If the iterator points to EOF when this function is called the
4017 ** results are undefined.
4019 static i64 fts5MultiIterRowid(Fts5Iter *pIter){
4020 assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf );
4021 return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid;
4025 ** Move the iterator to the next entry at or following iMatch.
4027 static void fts5MultiIterNextFrom(
4028 Fts5Index *p,
4029 Fts5Iter *pIter,
4030 i64 iMatch
4032 while( 1 ){
4033 i64 iRowid;
4034 fts5MultiIterNext(p, pIter, 1, iMatch);
4035 if( fts5MultiIterEof(p, pIter) ) break;
4036 iRowid = fts5MultiIterRowid(pIter);
4037 if( pIter->bRev==0 && iRowid>=iMatch ) break;
4038 if( pIter->bRev!=0 && iRowid<=iMatch ) break;
4043 ** Return a pointer to a buffer containing the term associated with the
4044 ** entry that the iterator currently points to.
4046 static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){
4047 Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
4048 *pn = p->term.n;
4049 return p->term.p;
4053 ** Allocate a new segment-id for the structure pStruct. The new segment
4054 ** id must be between 1 and 65335 inclusive, and must not be used by
4055 ** any currently existing segment. If a free segment id cannot be found,
4056 ** SQLITE_FULL is returned.
4058 ** If an error has already occurred, this function is a no-op. 0 is
4059 ** returned in this case.
4061 static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){
4062 int iSegid = 0;
4064 if( p->rc==SQLITE_OK ){
4065 if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){
4066 p->rc = SQLITE_FULL;
4067 }else{
4068 /* FTS5_MAX_SEGMENT is currently defined as 2000. So the following
4069 ** array is 63 elements, or 252 bytes, in size. */
4070 u32 aUsed[(FTS5_MAX_SEGMENT+31) / 32];
4071 int iLvl, iSeg;
4072 int i;
4073 u32 mask;
4074 memset(aUsed, 0, sizeof(aUsed));
4075 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
4076 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
4077 int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid;
4078 if( iId<=FTS5_MAX_SEGMENT && iId>0 ){
4079 aUsed[(iId-1) / 32] |= (u32)1 << ((iId-1) % 32);
4084 for(i=0; aUsed[i]==0xFFFFFFFF; i++);
4085 mask = aUsed[i];
4086 for(iSegid=0; mask & ((u32)1 << iSegid); iSegid++);
4087 iSegid += 1 + i*32;
4089 #ifdef SQLITE_DEBUG
4090 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
4091 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
4092 assert_nc( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid );
4095 assert_nc( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT );
4098 sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p);
4099 if( p->rc==SQLITE_OK ){
4100 u8 aBlob[2] = {0xff, 0xff};
4101 sqlite3_bind_int(pIdxSelect, 1, iSegid);
4102 sqlite3_bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC);
4103 assert_nc( sqlite3_step(pIdxSelect)!=SQLITE_ROW );
4104 p->rc = sqlite3_reset(pIdxSelect);
4105 sqlite3_bind_null(pIdxSelect, 2);
4108 #endif
4112 return iSegid;
4116 ** Discard all data currently cached in the hash-tables.
4118 static void fts5IndexDiscardData(Fts5Index *p){
4119 assert( p->pHash || p->nPendingData==0 );
4120 if( p->pHash ){
4121 sqlite3Fts5HashClear(p->pHash);
4122 p->nPendingData = 0;
4123 p->nPendingRow = 0;
4124 p->flushRc = SQLITE_OK;
4126 p->nContentlessDelete = 0;
4130 ** Return the size of the prefix, in bytes, that buffer
4131 ** (pNew/<length-unknown>) shares with buffer (pOld/nOld).
4133 ** Buffer (pNew/<length-unknown>) is guaranteed to be greater
4134 ** than buffer (pOld/nOld).
4136 static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){
4137 int i;
4138 for(i=0; i<nOld; i++){
4139 if( pOld[i]!=pNew[i] ) break;
4141 return i;
4144 static void fts5WriteDlidxClear(
4145 Fts5Index *p,
4146 Fts5SegWriter *pWriter,
4147 int bFlush /* If true, write dlidx to disk */
4149 int i;
4150 assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) );
4151 for(i=0; i<pWriter->nDlidx; i++){
4152 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
4153 if( pDlidx->buf.n==0 ) break;
4154 if( bFlush ){
4155 assert( pDlidx->pgno!=0 );
4156 fts5DataWrite(p,
4157 FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
4158 pDlidx->buf.p, pDlidx->buf.n
4161 sqlite3Fts5BufferZero(&pDlidx->buf);
4162 pDlidx->bPrevValid = 0;
4167 ** Grow the pWriter->aDlidx[] array to at least nLvl elements in size.
4168 ** Any new array elements are zeroed before returning.
4170 static int fts5WriteDlidxGrow(
4171 Fts5Index *p,
4172 Fts5SegWriter *pWriter,
4173 int nLvl
4175 if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){
4176 Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc64(
4177 pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl
4179 if( aDlidx==0 ){
4180 p->rc = SQLITE_NOMEM;
4181 }else{
4182 size_t nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx);
4183 memset(&aDlidx[pWriter->nDlidx], 0, nByte);
4184 pWriter->aDlidx = aDlidx;
4185 pWriter->nDlidx = nLvl;
4188 return p->rc;
4192 ** If the current doclist-index accumulating in pWriter->aDlidx[] is large
4193 ** enough, flush it to disk and return 1. Otherwise discard it and return
4194 ** zero.
4196 static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){
4197 int bFlag = 0;
4199 /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written
4200 ** to the database, also write the doclist-index to disk. */
4201 if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
4202 bFlag = 1;
4204 fts5WriteDlidxClear(p, pWriter, bFlag);
4205 pWriter->nEmpty = 0;
4206 return bFlag;
4210 ** This function is called whenever processing of the doclist for the
4211 ** last term on leaf page (pWriter->iBtPage) is completed.
4213 ** The doclist-index for that term is currently stored in-memory within the
4214 ** Fts5SegWriter.aDlidx[] array. If it is large enough, this function
4215 ** writes it out to disk. Or, if it is too small to bother with, discards
4216 ** it.
4218 ** Fts5SegWriter.btterm currently contains the first term on page iBtPage.
4220 static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){
4221 int bFlag;
4223 assert( pWriter->iBtPage || pWriter->nEmpty==0 );
4224 if( pWriter->iBtPage==0 ) return;
4225 bFlag = fts5WriteFlushDlidx(p, pWriter);
4227 if( p->rc==SQLITE_OK ){
4228 const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:"");
4229 /* The following was already done in fts5WriteInit(): */
4230 /* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */
4231 sqlite3_bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC);
4232 sqlite3_bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1));
4233 sqlite3_step(p->pIdxWriter);
4234 p->rc = sqlite3_reset(p->pIdxWriter);
4235 sqlite3_bind_null(p->pIdxWriter, 2);
4237 pWriter->iBtPage = 0;
4241 ** This is called once for each leaf page except the first that contains
4242 ** at least one term. Argument (nTerm/pTerm) is the split-key - a term that
4243 ** is larger than all terms written to earlier leaves, and equal to or
4244 ** smaller than the first term on the new leaf.
4246 ** If an error occurs, an error code is left in Fts5Index.rc. If an error
4247 ** has already occurred when this function is called, it is a no-op.
4249 static void fts5WriteBtreeTerm(
4250 Fts5Index *p, /* FTS5 backend object */
4251 Fts5SegWriter *pWriter, /* Writer object */
4252 int nTerm, const u8 *pTerm /* First term on new page */
4254 fts5WriteFlushBtree(p, pWriter);
4255 if( p->rc==SQLITE_OK ){
4256 fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm);
4257 pWriter->iBtPage = pWriter->writer.pgno;
4262 ** This function is called when flushing a leaf page that contains no
4263 ** terms at all to disk.
4265 static void fts5WriteBtreeNoTerm(
4266 Fts5Index *p, /* FTS5 backend object */
4267 Fts5SegWriter *pWriter /* Writer object */
4269 /* If there were no rowids on the leaf page either and the doclist-index
4270 ** has already been started, append an 0x00 byte to it. */
4271 if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){
4272 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0];
4273 assert( pDlidx->bPrevValid );
4274 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0);
4277 /* Increment the "number of sequential leaves without a term" counter. */
4278 pWriter->nEmpty++;
4281 static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){
4282 i64 iRowid;
4283 int iOff;
4285 iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid);
4286 fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid);
4287 return iRowid;
4291 ** Rowid iRowid has just been appended to the current leaf page. It is the
4292 ** first on the page. This function appends an appropriate entry to the current
4293 ** doclist-index.
4295 static void fts5WriteDlidxAppend(
4296 Fts5Index *p,
4297 Fts5SegWriter *pWriter,
4298 i64 iRowid
4300 int i;
4301 int bDone = 0;
4303 for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
4304 i64 iVal;
4305 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
4307 if( pDlidx->buf.n>=p->pConfig->pgsz ){
4308 /* The current doclist-index page is full. Write it to disk and push
4309 ** a copy of iRowid (which will become the first rowid on the next
4310 ** doclist-index leaf page) up into the next level of the b-tree
4311 ** hierarchy. If the node being flushed is currently the root node,
4312 ** also push its first rowid upwards. */
4313 pDlidx->buf.p[0] = 0x01; /* Not the root node */
4314 fts5DataWrite(p,
4315 FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
4316 pDlidx->buf.p, pDlidx->buf.n
4318 fts5WriteDlidxGrow(p, pWriter, i+2);
4319 pDlidx = &pWriter->aDlidx[i];
4320 if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){
4321 i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf);
4323 /* This was the root node. Push its first rowid up to the new root. */
4324 pDlidx[1].pgno = pDlidx->pgno;
4325 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0);
4326 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno);
4327 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst);
4328 pDlidx[1].bPrevValid = 1;
4329 pDlidx[1].iPrev = iFirst;
4332 sqlite3Fts5BufferZero(&pDlidx->buf);
4333 pDlidx->bPrevValid = 0;
4334 pDlidx->pgno++;
4335 }else{
4336 bDone = 1;
4339 if( pDlidx->bPrevValid ){
4340 iVal = (u64)iRowid - (u64)pDlidx->iPrev;
4341 }else{
4342 i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno);
4343 assert( pDlidx->buf.n==0 );
4344 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone);
4345 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno);
4346 iVal = iRowid;
4349 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal);
4350 pDlidx->bPrevValid = 1;
4351 pDlidx->iPrev = iRowid;
4355 static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
4356 static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
4357 Fts5PageWriter *pPage = &pWriter->writer;
4358 i64 iRowid;
4360 assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) );
4362 /* Set the szLeaf header field. */
4363 assert( 0==fts5GetU16(&pPage->buf.p[2]) );
4364 fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n);
4366 if( pWriter->bFirstTermInPage ){
4367 /* No term was written to this page. */
4368 assert( pPage->pgidx.n==0 );
4369 fts5WriteBtreeNoTerm(p, pWriter);
4370 }else{
4371 /* Append the pgidx to the page buffer. Set the szLeaf header field. */
4372 fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p);
4375 /* Write the page out to disk */
4376 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno);
4377 fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);
4379 /* Initialize the next page. */
4380 fts5BufferZero(&pPage->buf);
4381 fts5BufferZero(&pPage->pgidx);
4382 fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
4383 pPage->iPrevPgidx = 0;
4384 pPage->pgno++;
4386 /* Increase the leaves written counter */
4387 pWriter->nLeafWritten++;
4389 /* The new leaf holds no terms or rowids */
4390 pWriter->bFirstTermInPage = 1;
4391 pWriter->bFirstRowidInPage = 1;
4395 ** Append term pTerm/nTerm to the segment being written by the writer passed
4396 ** as the second argument.
4398 ** If an error occurs, set the Fts5Index.rc error code. If an error has
4399 ** already occurred, this function is a no-op.
4401 static void fts5WriteAppendTerm(
4402 Fts5Index *p,
4403 Fts5SegWriter *pWriter,
4404 int nTerm, const u8 *pTerm
4406 int nPrefix; /* Bytes of prefix compression for term */
4407 Fts5PageWriter *pPage = &pWriter->writer;
4408 Fts5Buffer *pPgidx = &pWriter->writer.pgidx;
4409 int nMin = MIN(pPage->term.n, nTerm);
4411 assert( p->rc==SQLITE_OK );
4412 assert( pPage->buf.n>=4 );
4413 assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );
4415 /* If the current leaf page is full, flush it to disk. */
4416 if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
4417 if( pPage->buf.n>4 ){
4418 fts5WriteFlushLeaf(p, pWriter);
4419 if( p->rc!=SQLITE_OK ) return;
4421 fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);
4424 /* TODO1: Updating pgidx here. */
4425 pPgidx->n += sqlite3Fts5PutVarint(
4426 &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx
4428 pPage->iPrevPgidx = pPage->buf.n;
4429 #if 0
4430 fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
4431 pPgidx->n += 2;
4432 #endif
4434 if( pWriter->bFirstTermInPage ){
4435 nPrefix = 0;
4436 if( pPage->pgno!=1 ){
4437 /* This is the first term on a leaf that is not the leftmost leaf in
4438 ** the segment b-tree. In this case it is necessary to add a term to
4439 ** the b-tree hierarchy that is (a) larger than the largest term
4440 ** already written to the segment and (b) smaller than or equal to
4441 ** this term. In other words, a prefix of (pTerm/nTerm) that is one
4442 ** byte longer than the longest prefix (pTerm/nTerm) shares with the
4443 ** previous term.
4445 ** Usually, the previous term is available in pPage->term. The exception
4446 ** is if this is the first term written in an incremental-merge step.
4447 ** In this case the previous term is not available, so just write a
4448 ** copy of (pTerm/nTerm) into the parent node. This is slightly
4449 ** inefficient, but still correct. */
4450 int n = nTerm;
4451 if( pPage->term.n ){
4452 n = 1 + fts5PrefixCompress(nMin, pPage->term.p, pTerm);
4454 fts5WriteBtreeTerm(p, pWriter, n, pTerm);
4455 if( p->rc!=SQLITE_OK ) return;
4456 pPage = &pWriter->writer;
4458 }else{
4459 nPrefix = fts5PrefixCompress(nMin, pPage->term.p, pTerm);
4460 fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
4463 /* Append the number of bytes of new data, then the term data itself
4464 ** to the page. */
4465 fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix);
4466 fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]);
4468 /* Update the Fts5PageWriter.term field. */
4469 fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
4470 pWriter->bFirstTermInPage = 0;
4472 pWriter->bFirstRowidInPage = 0;
4473 pWriter->bFirstRowidInDoclist = 1;
4475 assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
4476 pWriter->aDlidx[0].pgno = pPage->pgno;
4480 ** Append a rowid and position-list size field to the writers output.
4482 static void fts5WriteAppendRowid(
4483 Fts5Index *p,
4484 Fts5SegWriter *pWriter,
4485 i64 iRowid
4487 if( p->rc==SQLITE_OK ){
4488 Fts5PageWriter *pPage = &pWriter->writer;
4490 if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
4491 fts5WriteFlushLeaf(p, pWriter);
4494 /* If this is to be the first rowid written to the page, set the
4495 ** rowid-pointer in the page-header. Also append a value to the dlidx
4496 ** buffer, in case a doclist-index is required. */
4497 if( pWriter->bFirstRowidInPage ){
4498 fts5PutU16(pPage->buf.p, (u16)pPage->buf.n);
4499 fts5WriteDlidxAppend(p, pWriter, iRowid);
4502 /* Write the rowid. */
4503 if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){
4504 fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid);
4505 }else{
4506 assert_nc( p->rc || iRowid>pWriter->iPrevRowid );
4507 fts5BufferAppendVarint(&p->rc, &pPage->buf,
4508 (u64)iRowid - (u64)pWriter->iPrevRowid
4511 pWriter->iPrevRowid = iRowid;
4512 pWriter->bFirstRowidInDoclist = 0;
4513 pWriter->bFirstRowidInPage = 0;
4517 static void fts5WriteAppendPoslistData(
4518 Fts5Index *p,
4519 Fts5SegWriter *pWriter,
4520 const u8 *aData,
4521 int nData
4523 Fts5PageWriter *pPage = &pWriter->writer;
4524 const u8 *a = aData;
4525 int n = nData;
4527 assert( p->pConfig->pgsz>0 || p->rc!=SQLITE_OK );
4528 while( p->rc==SQLITE_OK
4529 && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz
4531 int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n;
4532 int nCopy = 0;
4533 while( nCopy<nReq ){
4534 i64 dummy;
4535 nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy);
4537 fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a);
4538 a += nCopy;
4539 n -= nCopy;
4540 fts5WriteFlushLeaf(p, pWriter);
4542 if( n>0 ){
4543 fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a);
4548 ** Flush any data cached by the writer object to the database. Free any
4549 ** allocations associated with the writer.
4551 static void fts5WriteFinish(
4552 Fts5Index *p,
4553 Fts5SegWriter *pWriter, /* Writer object */
4554 int *pnLeaf /* OUT: Number of leaf pages in b-tree */
4556 int i;
4557 Fts5PageWriter *pLeaf = &pWriter->writer;
4558 if( p->rc==SQLITE_OK ){
4559 assert( pLeaf->pgno>=1 );
4560 if( pLeaf->buf.n>4 ){
4561 fts5WriteFlushLeaf(p, pWriter);
4563 *pnLeaf = pLeaf->pgno-1;
4564 if( pLeaf->pgno>1 ){
4565 fts5WriteFlushBtree(p, pWriter);
4568 fts5BufferFree(&pLeaf->term);
4569 fts5BufferFree(&pLeaf->buf);
4570 fts5BufferFree(&pLeaf->pgidx);
4571 fts5BufferFree(&pWriter->btterm);
4573 for(i=0; i<pWriter->nDlidx; i++){
4574 sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
4576 sqlite3_free(pWriter->aDlidx);
4579 static void fts5WriteInit(
4580 Fts5Index *p,
4581 Fts5SegWriter *pWriter,
4582 int iSegid
4584 const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING;
4586 memset(pWriter, 0, sizeof(Fts5SegWriter));
4587 pWriter->iSegid = iSegid;
4589 fts5WriteDlidxGrow(p, pWriter, 1);
4590 pWriter->writer.pgno = 1;
4591 pWriter->bFirstTermInPage = 1;
4592 pWriter->iBtPage = 1;
4594 assert( pWriter->writer.buf.n==0 );
4595 assert( pWriter->writer.pgidx.n==0 );
4597 /* Grow the two buffers to pgsz + padding bytes in size. */
4598 sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer);
4599 sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer);
4601 if( p->pIdxWriter==0 ){
4602 Fts5Config *pConfig = p->pConfig;
4603 fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf(
4604 "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)",
4605 pConfig->zDb, pConfig->zName
4609 if( p->rc==SQLITE_OK ){
4610 /* Initialize the 4-byte leaf-page header to 0x00. */
4611 memset(pWriter->writer.buf.p, 0, 4);
4612 pWriter->writer.buf.n = 4;
4614 /* Bind the current output segment id to the index-writer. This is an
4615 ** optimization over binding the same value over and over as rows are
4616 ** inserted into %_idx by the current writer. */
4617 sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
4622 ** Iterator pIter was used to iterate through the input segments of on an
4623 ** incremental merge operation. This function is called if the incremental
4624 ** merge step has finished but the input has not been completely exhausted.
4626 static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){
4627 int i;
4628 Fts5Buffer buf;
4629 memset(&buf, 0, sizeof(Fts5Buffer));
4630 for(i=0; i<pIter->nSeg && p->rc==SQLITE_OK; i++){
4631 Fts5SegIter *pSeg = &pIter->aSeg[i];
4632 if( pSeg->pSeg==0 ){
4633 /* no-op */
4634 }else if( pSeg->pLeaf==0 ){
4635 /* All keys from this input segment have been transfered to the output.
4636 ** Set both the first and last page-numbers to 0 to indicate that the
4637 ** segment is now empty. */
4638 pSeg->pSeg->pgnoLast = 0;
4639 pSeg->pSeg->pgnoFirst = 0;
4640 }else{
4641 int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */
4642 i64 iLeafRowid;
4643 Fts5Data *pData;
4644 int iId = pSeg->pSeg->iSegid;
4645 u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00};
4647 iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno);
4648 pData = fts5LeafRead(p, iLeafRowid);
4649 if( pData ){
4650 if( iOff>pData->szLeaf ){
4651 /* This can occur if the pages that the segments occupy overlap - if
4652 ** a single page has been assigned to more than one segment. In
4653 ** this case a prior iteration of this loop may have corrupted the
4654 ** segment currently being trimmed. */
4655 p->rc = FTS5_CORRUPT;
4656 }else{
4657 fts5BufferZero(&buf);
4658 fts5BufferGrow(&p->rc, &buf, pData->nn);
4659 fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr);
4660 fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n);
4661 fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p);
4662 fts5BufferAppendBlob(&p->rc, &buf,pData->szLeaf-iOff,&pData->p[iOff]);
4663 if( p->rc==SQLITE_OK ){
4664 /* Set the szLeaf field */
4665 fts5PutU16(&buf.p[2], (u16)buf.n);
4668 /* Set up the new page-index array */
4669 fts5BufferAppendVarint(&p->rc, &buf, 4);
4670 if( pSeg->iLeafPgno==pSeg->iTermLeafPgno
4671 && pSeg->iEndofDoclist<pData->szLeaf
4672 && pSeg->iPgidxOff<=pData->nn
4674 int nDiff = pData->szLeaf - pSeg->iEndofDoclist;
4675 fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4);
4676 fts5BufferAppendBlob(&p->rc, &buf,
4677 pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]
4681 pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
4682 fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid);
4683 fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
4685 fts5DataRelease(pData);
4689 fts5BufferFree(&buf);
4692 static void fts5MergeChunkCallback(
4693 Fts5Index *p,
4694 void *pCtx,
4695 const u8 *pChunk, int nChunk
4697 Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx;
4698 fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk);
4704 static void fts5IndexMergeLevel(
4705 Fts5Index *p, /* FTS5 backend object */
4706 Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */
4707 int iLvl, /* Level to read input from */
4708 int *pnRem /* Write up to this many output leaves */
4710 Fts5Structure *pStruct = *ppStruct;
4711 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
4712 Fts5StructureLevel *pLvlOut;
4713 Fts5Iter *pIter = 0; /* Iterator to read input data */
4714 int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */
4715 int nInput; /* Number of input segments */
4716 Fts5SegWriter writer; /* Writer object */
4717 Fts5StructureSegment *pSeg; /* Output segment */
4718 Fts5Buffer term;
4719 int bOldest; /* True if the output segment is the oldest */
4720 int eDetail = p->pConfig->eDetail;
4721 const int flags = FTS5INDEX_QUERY_NOOUTPUT;
4722 int bTermWritten = 0; /* True if current term already output */
4724 assert( iLvl<pStruct->nLevel );
4725 assert( pLvl->nMerge<=pLvl->nSeg );
4727 memset(&writer, 0, sizeof(Fts5SegWriter));
4728 memset(&term, 0, sizeof(Fts5Buffer));
4729 if( pLvl->nMerge ){
4730 pLvlOut = &pStruct->aLevel[iLvl+1];
4731 assert( pLvlOut->nSeg>0 );
4732 nInput = pLvl->nMerge;
4733 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1];
4735 fts5WriteInit(p, &writer, pSeg->iSegid);
4736 writer.writer.pgno = pSeg->pgnoLast+1;
4737 writer.iBtPage = 0;
4738 }else{
4739 int iSegid = fts5AllocateSegid(p, pStruct);
4741 /* Extend the Fts5Structure object as required to ensure the output
4742 ** segment exists. */
4743 if( iLvl==pStruct->nLevel-1 ){
4744 fts5StructureAddLevel(&p->rc, ppStruct);
4745 pStruct = *ppStruct;
4747 fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0);
4748 if( p->rc ) return;
4749 pLvl = &pStruct->aLevel[iLvl];
4750 pLvlOut = &pStruct->aLevel[iLvl+1];
4752 fts5WriteInit(p, &writer, iSegid);
4754 /* Add the new segment to the output level */
4755 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg];
4756 pLvlOut->nSeg++;
4757 pSeg->pgnoFirst = 1;
4758 pSeg->iSegid = iSegid;
4759 pStruct->nSegment++;
4761 /* Read input from all segments in the input level */
4762 nInput = pLvl->nSeg;
4764 /* Set the range of origins that will go into the output segment. */
4765 if( pStruct->nOriginCntr>0 ){
4766 pSeg->iOrigin1 = pLvl->aSeg[0].iOrigin1;
4767 pSeg->iOrigin2 = pLvl->aSeg[pLvl->nSeg-1].iOrigin2;
4770 bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2);
4772 assert( iLvl>=0 );
4773 for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter);
4774 fts5MultiIterEof(p, pIter)==0;
4775 fts5MultiIterNext(p, pIter, 0, 0)
4777 Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
4778 int nPos; /* position-list size field value */
4779 int nTerm;
4780 const u8 *pTerm;
4782 pTerm = fts5MultiIterTerm(pIter, &nTerm);
4783 if( nTerm!=term.n || fts5Memcmp(pTerm, term.p, nTerm) ){
4784 if( pnRem && writer.nLeafWritten>nRem ){
4785 break;
4787 fts5BufferSet(&p->rc, &term, nTerm, pTerm);
4788 bTermWritten =0;
4791 /* Check for key annihilation. */
4792 if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue;
4794 if( p->rc==SQLITE_OK && bTermWritten==0 ){
4795 /* This is a new term. Append a term to the output segment. */
4796 fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
4797 bTermWritten = 1;
4800 /* Append the rowid to the output */
4801 /* WRITEPOSLISTSIZE */
4802 fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));
4804 if( eDetail==FTS5_DETAIL_NONE ){
4805 if( pSegIter->bDel ){
4806 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
4807 if( pSegIter->nPos>0 ){
4808 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
4811 }else{
4812 /* Append the position-list data to the output */
4813 nPos = pSegIter->nPos*2 + pSegIter->bDel;
4814 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos);
4815 fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
4819 /* Flush the last leaf page to disk. Set the output segment b-tree height
4820 ** and last leaf page number at the same time. */
4821 fts5WriteFinish(p, &writer, &pSeg->pgnoLast);
4823 assert( pIter!=0 || p->rc!=SQLITE_OK );
4824 if( fts5MultiIterEof(p, pIter) ){
4825 int i;
4827 /* Remove the redundant segments from the %_data table */
4828 assert( pSeg->nEntry==0 );
4829 for(i=0; i<nInput; i++){
4830 Fts5StructureSegment *pOld = &pLvl->aSeg[i];
4831 pSeg->nEntry += (pOld->nEntry - pOld->nEntryTombstone);
4832 fts5DataRemoveSegment(p, pOld);
4835 /* Remove the redundant segments from the input level */
4836 if( pLvl->nSeg!=nInput ){
4837 int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment);
4838 memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove);
4840 pStruct->nSegment -= nInput;
4841 pLvl->nSeg -= nInput;
4842 pLvl->nMerge = 0;
4843 if( pSeg->pgnoLast==0 ){
4844 pLvlOut->nSeg--;
4845 pStruct->nSegment--;
4847 }else{
4848 assert( pSeg->pgnoLast>0 );
4849 fts5TrimSegments(p, pIter);
4850 pLvl->nMerge = nInput;
4853 fts5MultiIterFree(pIter);
4854 fts5BufferFree(&term);
4855 if( pnRem ) *pnRem -= writer.nLeafWritten;
4859 ** If this is not a contentless_delete=1 table, or if the 'deletemerge'
4860 ** configuration option is set to 0, then this function always returns -1.
4861 ** Otherwise, it searches the structure object passed as the second argument
4862 ** for a level suitable for merging due to having a large number of
4863 ** tombstones in the tombstone hash. If one is found, its index is returned.
4864 ** Otherwise, if there is no suitable level, -1.
4866 static int fts5IndexFindDeleteMerge(Fts5Index *p, Fts5Structure *pStruct){
4867 Fts5Config *pConfig = p->pConfig;
4868 int iRet = -1;
4869 if( pConfig->bContentlessDelete && pConfig->nDeleteMerge>0 ){
4870 int ii;
4871 int nBest = 0;
4873 for(ii=0; ii<pStruct->nLevel; ii++){
4874 Fts5StructureLevel *pLvl = &pStruct->aLevel[ii];
4875 i64 nEntry = 0;
4876 i64 nTomb = 0;
4877 int iSeg;
4878 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
4879 nEntry += pLvl->aSeg[iSeg].nEntry;
4880 nTomb += pLvl->aSeg[iSeg].nEntryTombstone;
4882 assert_nc( nEntry>0 || pLvl->nSeg==0 );
4883 if( nEntry>0 ){
4884 int nPercent = (nTomb * 100) / nEntry;
4885 if( nPercent>=pConfig->nDeleteMerge && nPercent>nBest ){
4886 iRet = ii;
4887 nBest = nPercent;
4892 return iRet;
4896 ** Do up to nPg pages of automerge work on the index.
4898 ** Return true if any changes were actually made, or false otherwise.
4900 static int fts5IndexMerge(
4901 Fts5Index *p, /* FTS5 backend object */
4902 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
4903 int nPg, /* Pages of work to do */
4904 int nMin /* Minimum number of segments to merge */
4906 int nRem = nPg;
4907 int bRet = 0;
4908 Fts5Structure *pStruct = *ppStruct;
4909 while( nRem>0 && p->rc==SQLITE_OK ){
4910 int iLvl; /* To iterate through levels */
4911 int iBestLvl = 0; /* Level offering the most input segments */
4912 int nBest = 0; /* Number of input segments on best level */
4914 /* Set iBestLvl to the level to read input segments from. Or to -1 if
4915 ** there is no level suitable to merge segments from. */
4916 assert( pStruct->nLevel>0 );
4917 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
4918 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
4919 if( pLvl->nMerge ){
4920 if( pLvl->nMerge>nBest ){
4921 iBestLvl = iLvl;
4922 nBest = nMin;
4924 break;
4926 if( pLvl->nSeg>nBest ){
4927 nBest = pLvl->nSeg;
4928 iBestLvl = iLvl;
4931 if( nBest<nMin ){
4932 iBestLvl = fts5IndexFindDeleteMerge(p, pStruct);
4935 if( iBestLvl<0 ) break;
4936 bRet = 1;
4937 fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem);
4938 if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){
4939 fts5StructurePromote(p, iBestLvl+1, pStruct);
4942 if( nMin==1 ) nMin = 2;
4944 *ppStruct = pStruct;
4945 return bRet;
4949 ** A total of nLeaf leaf pages of data has just been flushed to a level-0
4950 ** segment. This function updates the write-counter accordingly and, if
4951 ** necessary, performs incremental merge work.
4953 ** If an error occurs, set the Fts5Index.rc error code. If an error has
4954 ** already occurred, this function is a no-op.
4956 static void fts5IndexAutomerge(
4957 Fts5Index *p, /* FTS5 backend object */
4958 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
4959 int nLeaf /* Number of output leaves just written */
4961 if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 && ALWAYS((*ppStruct)!=0) ){
4962 Fts5Structure *pStruct = *ppStruct;
4963 u64 nWrite; /* Initial value of write-counter */
4964 int nWork; /* Number of work-quanta to perform */
4965 int nRem; /* Number of leaf pages left to write */
4967 /* Update the write-counter. While doing so, set nWork. */
4968 nWrite = pStruct->nWriteCounter;
4969 nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit));
4970 pStruct->nWriteCounter += nLeaf;
4971 nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel);
4973 fts5IndexMerge(p, ppStruct, nRem, p->pConfig->nAutomerge);
4977 static void fts5IndexCrisismerge(
4978 Fts5Index *p, /* FTS5 backend object */
4979 Fts5Structure **ppStruct /* IN/OUT: Current structure of index */
4981 const int nCrisis = p->pConfig->nCrisisMerge;
4982 Fts5Structure *pStruct = *ppStruct;
4983 if( pStruct && pStruct->nLevel>0 ){
4984 int iLvl = 0;
4985 while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){
4986 fts5IndexMergeLevel(p, &pStruct, iLvl, 0);
4987 assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) );
4988 fts5StructurePromote(p, iLvl+1, pStruct);
4989 iLvl++;
4991 *ppStruct = pStruct;
4995 static int fts5IndexReturn(Fts5Index *p){
4996 int rc = p->rc;
4997 p->rc = SQLITE_OK;
4998 return rc;
5001 typedef struct Fts5FlushCtx Fts5FlushCtx;
5002 struct Fts5FlushCtx {
5003 Fts5Index *pIdx;
5004 Fts5SegWriter writer;
5008 ** Buffer aBuf[] contains a list of varints, all small enough to fit
5009 ** in a 32-bit integer. Return the size of the largest prefix of this
5010 ** list nMax bytes or less in size.
5012 static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
5013 int ret;
5014 u32 dummy;
5015 ret = fts5GetVarint32(aBuf, dummy);
5016 if( ret<nMax ){
5017 while( 1 ){
5018 int i = fts5GetVarint32(&aBuf[ret], dummy);
5019 if( (ret + i) > nMax ) break;
5020 ret += i;
5023 return ret;
5027 ** Execute the SQL statement:
5029 ** DELETE FROM %_idx WHERE (segid, (pgno/2)) = ($iSegid, $iPgno);
5031 ** This is used when a secure-delete operation removes the last term
5032 ** from a segment leaf page. In that case the %_idx entry is removed
5033 ** too. This is done to ensure that if all instances of a token are
5034 ** removed from an fts5 database in secure-delete mode, no trace of
5035 ** the token itself remains in the database.
5037 static void fts5SecureDeleteIdxEntry(
5038 Fts5Index *p, /* FTS5 backend object */
5039 int iSegid, /* Id of segment to delete entry for */
5040 int iPgno /* Page number within segment */
5042 if( iPgno!=1 ){
5043 assert( p->pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE );
5044 if( p->pDeleteFromIdx==0 ){
5045 fts5IndexPrepareStmt(p, &p->pDeleteFromIdx, sqlite3_mprintf(
5046 "DELETE FROM '%q'.'%q_idx' WHERE (segid, (pgno/2)) = (?1, ?2)",
5047 p->pConfig->zDb, p->pConfig->zName
5050 if( p->rc==SQLITE_OK ){
5051 sqlite3_bind_int(p->pDeleteFromIdx, 1, iSegid);
5052 sqlite3_bind_int(p->pDeleteFromIdx, 2, iPgno);
5053 sqlite3_step(p->pDeleteFromIdx);
5054 p->rc = sqlite3_reset(p->pDeleteFromIdx);
5060 ** This is called when a secure-delete operation removes a position-list
5061 ** that overflows onto segment page iPgno of segment pSeg. This function
5062 ** rewrites node iPgno, and possibly one or more of its right-hand peers,
5063 ** to remove this portion of the position list.
5065 ** Output variable (*pbLastInDoclist) is set to true if the position-list
5066 ** removed is followed by a new term or the end-of-segment, or false if
5067 ** it is followed by another rowid/position list.
5069 static void fts5SecureDeleteOverflow(
5070 Fts5Index *p,
5071 Fts5StructureSegment *pSeg,
5072 int iPgno,
5073 int *pbLastInDoclist
5075 const int bDetailNone = (p->pConfig->eDetail==FTS5_DETAIL_NONE);
5076 int pgno;
5077 Fts5Data *pLeaf = 0;
5078 assert( iPgno!=1 );
5080 *pbLastInDoclist = 1;
5081 for(pgno=iPgno; p->rc==SQLITE_OK && pgno<=pSeg->pgnoLast; pgno++){
5082 i64 iRowid = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
5083 int iNext = 0;
5084 u8 *aPg = 0;
5086 pLeaf = fts5DataRead(p, iRowid);
5087 if( pLeaf==0 ) break;
5088 aPg = pLeaf->p;
5090 iNext = fts5GetU16(&aPg[0]);
5091 if( iNext!=0 ){
5092 *pbLastInDoclist = 0;
5094 if( iNext==0 && pLeaf->szLeaf!=pLeaf->nn ){
5095 fts5GetVarint32(&aPg[pLeaf->szLeaf], iNext);
5098 if( iNext==0 ){
5099 /* The page contains no terms or rowids. Replace it with an empty
5100 ** page and move on to the right-hand peer. */
5101 const u8 aEmpty[] = {0x00, 0x00, 0x00, 0x04};
5102 assert_nc( bDetailNone==0 || pLeaf->nn==4 );
5103 if( bDetailNone==0 ) fts5DataWrite(p, iRowid, aEmpty, sizeof(aEmpty));
5104 fts5DataRelease(pLeaf);
5105 pLeaf = 0;
5106 }else if( bDetailNone ){
5107 break;
5108 }else if( iNext>=pLeaf->szLeaf || pLeaf->nn<pLeaf->szLeaf || iNext<4 ){
5109 p->rc = FTS5_CORRUPT;
5110 break;
5111 }else{
5112 int nShift = iNext - 4;
5113 int nPg;
5115 int nIdx = 0;
5116 u8 *aIdx = 0;
5118 /* Unless the current page footer is 0 bytes in size (in which case
5119 ** the new page footer will be as well), allocate and populate a
5120 ** buffer containing the new page footer. Set stack variables aIdx
5121 ** and nIdx accordingly. */
5122 if( pLeaf->nn>pLeaf->szLeaf ){
5123 int iFirst = 0;
5124 int i1 = pLeaf->szLeaf;
5125 int i2 = 0;
5127 i1 += fts5GetVarint32(&aPg[i1], iFirst);
5128 if( iFirst<iNext ){
5129 p->rc = FTS5_CORRUPT;
5130 break;
5132 aIdx = sqlite3Fts5MallocZero(&p->rc, (pLeaf->nn-pLeaf->szLeaf)+2);
5133 if( aIdx==0 ) break;
5134 i2 = sqlite3Fts5PutVarint(aIdx, iFirst-nShift);
5135 if( i1<pLeaf->nn ){
5136 memcpy(&aIdx[i2], &aPg[i1], pLeaf->nn-i1);
5137 i2 += (pLeaf->nn-i1);
5139 nIdx = i2;
5142 /* Modify the contents of buffer aPg[]. Set nPg to the new size
5143 ** in bytes. The new page is always smaller than the old. */
5144 nPg = pLeaf->szLeaf - nShift;
5145 memmove(&aPg[4], &aPg[4+nShift], nPg-4);
5146 fts5PutU16(&aPg[2], nPg);
5147 if( fts5GetU16(&aPg[0]) ) fts5PutU16(&aPg[0], 4);
5148 if( nIdx>0 ){
5149 memcpy(&aPg[nPg], aIdx, nIdx);
5150 nPg += nIdx;
5152 sqlite3_free(aIdx);
5154 /* Write the new page to disk and exit the loop */
5155 assert( nPg>4 || fts5GetU16(aPg)==0 );
5156 fts5DataWrite(p, iRowid, aPg, nPg);
5157 break;
5160 fts5DataRelease(pLeaf);
5164 ** Completely remove the entry that pSeg currently points to from
5165 ** the database.
5167 static void fts5DoSecureDelete(
5168 Fts5Index *p,
5169 Fts5SegIter *pSeg
5171 const int bDetailNone = (p->pConfig->eDetail==FTS5_DETAIL_NONE);
5172 int iSegid = pSeg->pSeg->iSegid;
5173 u8 *aPg = pSeg->pLeaf->p;
5174 int nPg = pSeg->pLeaf->nn;
5175 int iPgIdx = pSeg->pLeaf->szLeaf;
5177 u64 iDelta = 0;
5178 int iNextOff = 0;
5179 int iOff = 0;
5180 int nIdx = 0;
5181 u8 *aIdx = 0;
5182 int bLastInDoclist = 0;
5183 int iIdx = 0;
5184 int iStart = 0;
5185 int iDelKeyOff = 0; /* Offset of deleted key, if any */
5187 nIdx = nPg-iPgIdx;
5188 aIdx = sqlite3Fts5MallocZero(&p->rc, nIdx+16);
5189 if( p->rc ) return;
5190 memcpy(aIdx, &aPg[iPgIdx], nIdx);
5192 /* At this point segment iterator pSeg points to the entry
5193 ** this function should remove from the b-tree segment.
5195 ** In detail=full or detail=column mode, pSeg->iLeafOffset is the
5196 ** offset of the first byte in the position-list for the entry to
5197 ** remove. Immediately before this comes two varints that will also
5198 ** need to be removed:
5200 ** + the rowid or delta rowid value for the entry, and
5201 ** + the size of the position list in bytes.
5203 ** Or, in detail=none mode, there is a single varint prior to
5204 ** pSeg->iLeafOffset - the rowid or delta rowid value.
5206 ** This block sets the following variables:
5208 ** iStart:
5209 ** The offset of the first byte of the rowid or delta-rowid
5210 ** value for the doclist entry being removed.
5212 ** iDelta:
5213 ** The value of the rowid or delta-rowid value for the doclist
5214 ** entry being removed.
5216 ** iNextOff:
5217 ** The offset of the next entry following the position list
5218 ** for the one being removed. If the position list for this
5219 ** entry overflows onto the next leaf page, this value will be
5220 ** greater than pLeaf->szLeaf.
5223 int iSOP; /* Start-Of-Position-list */
5224 if( pSeg->iLeafPgno==pSeg->iTermLeafPgno ){
5225 iStart = pSeg->iTermLeafOffset;
5226 }else{
5227 iStart = fts5GetU16(&aPg[0]);
5230 iSOP = iStart + fts5GetVarint(&aPg[iStart], &iDelta);
5231 assert_nc( iSOP<=pSeg->iLeafOffset );
5233 if( bDetailNone ){
5234 while( iSOP<pSeg->iLeafOffset ){
5235 if( aPg[iSOP]==0x00 ) iSOP++;
5236 if( aPg[iSOP]==0x00 ) iSOP++;
5237 iStart = iSOP;
5238 iSOP = iStart + fts5GetVarint(&aPg[iStart], &iDelta);
5241 iNextOff = iSOP;
5242 if( iNextOff<pSeg->iEndofDoclist && aPg[iNextOff]==0x00 ) iNextOff++;
5243 if( iNextOff<pSeg->iEndofDoclist && aPg[iNextOff]==0x00 ) iNextOff++;
5245 }else{
5246 int nPos = 0;
5247 iSOP += fts5GetVarint32(&aPg[iSOP], nPos);
5248 while( iSOP<pSeg->iLeafOffset ){
5249 iStart = iSOP + (nPos/2);
5250 iSOP = iStart + fts5GetVarint(&aPg[iStart], &iDelta);
5251 iSOP += fts5GetVarint32(&aPg[iSOP], nPos);
5253 assert_nc( iSOP==pSeg->iLeafOffset );
5254 iNextOff = pSeg->iLeafOffset + pSeg->nPos;
5258 iOff = iStart;
5260 /* If the position-list for the entry being removed flows over past
5261 ** the end of this page, delete the portion of the position-list on the
5262 ** next page and beyond.
5264 ** Set variable bLastInDoclist to true if this entry happens
5265 ** to be the last rowid in the doclist for its term. */
5266 if( iNextOff>=iPgIdx ){
5267 int pgno = pSeg->iLeafPgno+1;
5268 fts5SecureDeleteOverflow(p, pSeg->pSeg, pgno, &bLastInDoclist);
5269 iNextOff = iPgIdx;
5272 if( pSeg->bDel==0 ){
5273 if( iNextOff!=iPgIdx ){
5274 /* Loop through the page-footer. If iNextOff (offset of the
5275 ** entry following the one we are removing) is equal to the
5276 ** offset of a key on this page, then the entry is the last
5277 ** in its doclist. */
5278 int iKeyOff = 0;
5279 for(iIdx=0; iIdx<nIdx; /* no-op */){
5280 u32 iVal = 0;
5281 iIdx += fts5GetVarint32(&aIdx[iIdx], iVal);
5282 iKeyOff += iVal;
5283 if( iKeyOff==iNextOff ){
5284 bLastInDoclist = 1;
5289 /* If this is (a) the first rowid on a page and (b) is not followed by
5290 ** another position list on the same page, set the "first-rowid" field
5291 ** of the header to 0. */
5292 if( fts5GetU16(&aPg[0])==iStart && (bLastInDoclist || iNextOff==iPgIdx) ){
5293 fts5PutU16(&aPg[0], 0);
5297 if( pSeg->bDel ){
5298 iOff += sqlite3Fts5PutVarint(&aPg[iOff], iDelta);
5299 aPg[iOff++] = 0x01;
5300 }else if( bLastInDoclist==0 ){
5301 if( iNextOff!=iPgIdx ){
5302 u64 iNextDelta = 0;
5303 iNextOff += fts5GetVarint(&aPg[iNextOff], &iNextDelta);
5304 iOff += sqlite3Fts5PutVarint(&aPg[iOff], iDelta + iNextDelta);
5306 }else if(
5307 pSeg->iLeafPgno==pSeg->iTermLeafPgno
5308 && iStart==pSeg->iTermLeafOffset
5310 /* The entry being removed was the only position list in its
5311 ** doclist. Therefore the term needs to be removed as well. */
5312 int iKey = 0;
5313 int iKeyOff = 0;
5315 /* Set iKeyOff to the offset of the term that will be removed - the
5316 ** last offset in the footer that is not greater than iStart. */
5317 for(iIdx=0; iIdx<nIdx; iKey++){
5318 u32 iVal = 0;
5319 iIdx += fts5GetVarint32(&aIdx[iIdx], iVal);
5320 if( (iKeyOff+iVal)>(u32)iStart ) break;
5321 iKeyOff += iVal;
5323 assert_nc( iKey>=1 );
5325 /* Set iDelKeyOff to the value of the footer entry to remove from
5326 ** the page. */
5327 iDelKeyOff = iOff = iKeyOff;
5329 if( iNextOff!=iPgIdx ){
5330 /* This is the only position-list associated with the term, and there
5331 ** is another term following it on this page. So the subsequent term
5332 ** needs to be moved to replace the term associated with the entry
5333 ** being removed. */
5334 int nPrefix = 0;
5335 int nSuffix = 0;
5336 int nPrefix2 = 0;
5337 int nSuffix2 = 0;
5339 iDelKeyOff = iNextOff;
5340 iNextOff += fts5GetVarint32(&aPg[iNextOff], nPrefix2);
5341 iNextOff += fts5GetVarint32(&aPg[iNextOff], nSuffix2);
5343 if( iKey!=1 ){
5344 iKeyOff += fts5GetVarint32(&aPg[iKeyOff], nPrefix);
5346 iKeyOff += fts5GetVarint32(&aPg[iKeyOff], nSuffix);
5348 nPrefix = MIN(nPrefix, nPrefix2);
5349 nSuffix = (nPrefix2 + nSuffix2) - nPrefix;
5351 if( (iKeyOff+nSuffix)>iPgIdx || (iNextOff+nSuffix2)>iPgIdx ){
5352 p->rc = FTS5_CORRUPT;
5353 }else{
5354 if( iKey!=1 ){
5355 iOff += sqlite3Fts5PutVarint(&aPg[iOff], nPrefix);
5357 iOff += sqlite3Fts5PutVarint(&aPg[iOff], nSuffix);
5358 if( nPrefix2>pSeg->term.n ){
5359 p->rc = FTS5_CORRUPT;
5360 }else if( nPrefix2>nPrefix ){
5361 memcpy(&aPg[iOff], &pSeg->term.p[nPrefix], nPrefix2-nPrefix);
5362 iOff += (nPrefix2-nPrefix);
5364 memmove(&aPg[iOff], &aPg[iNextOff], nSuffix2);
5365 iOff += nSuffix2;
5366 iNextOff += nSuffix2;
5369 }else if( iStart==4 ){
5370 int iPgno;
5372 assert_nc( pSeg->iLeafPgno>pSeg->iTermLeafPgno );
5373 /* The entry being removed may be the only position list in
5374 ** its doclist. */
5375 for(iPgno=pSeg->iLeafPgno-1; iPgno>pSeg->iTermLeafPgno; iPgno-- ){
5376 Fts5Data *pPg = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, iPgno));
5377 int bEmpty = (pPg && pPg->nn==4);
5378 fts5DataRelease(pPg);
5379 if( bEmpty==0 ) break;
5382 if( iPgno==pSeg->iTermLeafPgno ){
5383 i64 iId = FTS5_SEGMENT_ROWID(iSegid, pSeg->iTermLeafPgno);
5384 Fts5Data *pTerm = fts5DataRead(p, iId);
5385 if( pTerm && pTerm->szLeaf==pSeg->iTermLeafOffset ){
5386 u8 *aTermIdx = &pTerm->p[pTerm->szLeaf];
5387 int nTermIdx = pTerm->nn - pTerm->szLeaf;
5388 int iTermIdx = 0;
5389 int iTermOff = 0;
5391 while( 1 ){
5392 u32 iVal = 0;
5393 int nByte = fts5GetVarint32(&aTermIdx[iTermIdx], iVal);
5394 iTermOff += iVal;
5395 if( (iTermIdx+nByte)>=nTermIdx ) break;
5396 iTermIdx += nByte;
5398 nTermIdx = iTermIdx;
5400 memmove(&pTerm->p[iTermOff], &pTerm->p[pTerm->szLeaf], nTermIdx);
5401 fts5PutU16(&pTerm->p[2], iTermOff);
5403 fts5DataWrite(p, iId, pTerm->p, iTermOff+nTermIdx);
5404 if( nTermIdx==0 ){
5405 fts5SecureDeleteIdxEntry(p, iSegid, pSeg->iTermLeafPgno);
5408 fts5DataRelease(pTerm);
5412 /* Assuming no error has occurred, this block does final edits to the
5413 ** leaf page before writing it back to disk. Input variables are:
5415 ** nPg: Total initial size of leaf page.
5416 ** iPgIdx: Initial offset of page footer.
5418 ** iOff: Offset to move data to
5419 ** iNextOff: Offset to move data from
5421 if( p->rc==SQLITE_OK ){
5422 const int nMove = nPg - iNextOff; /* Number of bytes to move */
5423 int nShift = iNextOff - iOff; /* Distance to move them */
5425 int iPrevKeyOut = 0;
5426 int iKeyIn = 0;
5428 memmove(&aPg[iOff], &aPg[iNextOff], nMove);
5429 iPgIdx -= nShift;
5430 nPg = iPgIdx;
5431 fts5PutU16(&aPg[2], iPgIdx);
5433 for(iIdx=0; iIdx<nIdx; /* no-op */){
5434 u32 iVal = 0;
5435 iIdx += fts5GetVarint32(&aIdx[iIdx], iVal);
5436 iKeyIn += iVal;
5437 if( iKeyIn!=iDelKeyOff ){
5438 int iKeyOut = (iKeyIn - (iKeyIn>iOff ? nShift : 0));
5439 nPg += sqlite3Fts5PutVarint(&aPg[nPg], iKeyOut - iPrevKeyOut);
5440 iPrevKeyOut = iKeyOut;
5444 if( iPgIdx==nPg && nIdx>0 && pSeg->iLeafPgno!=1 ){
5445 fts5SecureDeleteIdxEntry(p, iSegid, pSeg->iLeafPgno);
5448 assert_nc( nPg>4 || fts5GetU16(aPg)==0 );
5449 fts5DataWrite(p, FTS5_SEGMENT_ROWID(iSegid,pSeg->iLeafPgno), aPg, nPg);
5451 sqlite3_free(aIdx);
5455 ** This is called as part of flushing a delete to disk in 'secure-delete'
5456 ** mode. It edits the segments within the database described by argument
5457 ** pStruct to remove the entries for term zTerm, rowid iRowid.
5459 static void fts5FlushSecureDelete(
5460 Fts5Index *p,
5461 Fts5Structure *pStruct,
5462 const char *zTerm,
5463 int nTerm,
5464 i64 iRowid
5466 const int f = FTS5INDEX_QUERY_SKIPHASH;
5467 Fts5Iter *pIter = 0; /* Used to find term instance */
5469 fts5MultiIterNew(p, pStruct, f, 0, (const u8*)zTerm, nTerm, -1, 0, &pIter);
5470 if( fts5MultiIterEof(p, pIter)==0 ){
5471 i64 iThis = fts5MultiIterRowid(pIter);
5472 if( iThis<iRowid ){
5473 fts5MultiIterNextFrom(p, pIter, iRowid);
5476 if( p->rc==SQLITE_OK
5477 && fts5MultiIterEof(p, pIter)==0
5478 && iRowid==fts5MultiIterRowid(pIter)
5480 Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
5481 fts5DoSecureDelete(p, pSeg);
5485 fts5MultiIterFree(pIter);
5490 ** Flush the contents of in-memory hash table iHash to a new level-0
5491 ** segment on disk. Also update the corresponding structure record.
5493 ** If an error occurs, set the Fts5Index.rc error code. If an error has
5494 ** already occurred, this function is a no-op.
5496 static void fts5FlushOneHash(Fts5Index *p){
5497 Fts5Hash *pHash = p->pHash;
5498 Fts5Structure *pStruct;
5499 int iSegid;
5500 int pgnoLast = 0; /* Last leaf page number in segment */
5502 /* Obtain a reference to the index structure and allocate a new segment-id
5503 ** for the new level-0 segment. */
5504 pStruct = fts5StructureRead(p);
5505 fts5StructureInvalidate(p);
5507 if( sqlite3Fts5HashIsEmpty(pHash)==0 ){
5508 iSegid = fts5AllocateSegid(p, pStruct);
5509 if( iSegid ){
5510 const int pgsz = p->pConfig->pgsz;
5511 int eDetail = p->pConfig->eDetail;
5512 int bSecureDelete = p->pConfig->bSecureDelete;
5513 Fts5StructureSegment *pSeg; /* New segment within pStruct */
5514 Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */
5515 Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */
5517 Fts5SegWriter writer;
5518 fts5WriteInit(p, &writer, iSegid);
5520 pBuf = &writer.writer.buf;
5521 pPgidx = &writer.writer.pgidx;
5523 /* fts5WriteInit() should have initialized the buffers to (most likely)
5524 ** the maximum space required. */
5525 assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
5526 assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );
5528 /* Begin scanning through hash table entries. This loop runs once for each
5529 ** term/doclist currently stored within the hash table. */
5530 if( p->rc==SQLITE_OK ){
5531 p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
5533 while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
5534 const char *zTerm; /* Buffer containing term */
5535 int nTerm; /* Size of zTerm in bytes */
5536 const u8 *pDoclist; /* Pointer to doclist for this term */
5537 int nDoclist; /* Size of doclist in bytes */
5539 /* Get the term and doclist for this entry. */
5540 sqlite3Fts5HashScanEntry(pHash, &zTerm, &nTerm, &pDoclist, &nDoclist);
5541 if( bSecureDelete==0 ){
5542 fts5WriteAppendTerm(p, &writer, nTerm, (const u8*)zTerm);
5543 if( p->rc!=SQLITE_OK ) break;
5544 assert( writer.bFirstRowidInPage==0 );
5547 if( !bSecureDelete && pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){
5548 /* The entire doclist will fit on the current leaf. */
5549 fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
5550 }else{
5551 int bTermWritten = !bSecureDelete;
5552 i64 iRowid = 0;
5553 i64 iPrev = 0;
5554 int iOff = 0;
5556 /* The entire doclist will not fit on this leaf. The following
5557 ** loop iterates through the poslists that make up the current
5558 ** doclist. */
5559 while( p->rc==SQLITE_OK && iOff<nDoclist ){
5560 u64 iDelta = 0;
5561 iOff += fts5GetVarint(&pDoclist[iOff], &iDelta);
5562 iRowid += iDelta;
5564 /* If in secure delete mode, and if this entry in the poslist is
5565 ** in fact a delete, then edit the existing segments directly
5566 ** using fts5FlushSecureDelete(). */
5567 if( bSecureDelete ){
5568 if( eDetail==FTS5_DETAIL_NONE ){
5569 if( iOff<nDoclist && pDoclist[iOff]==0x00 ){
5570 fts5FlushSecureDelete(p, pStruct, zTerm, nTerm, iRowid);
5571 iOff++;
5572 if( iOff<nDoclist && pDoclist[iOff]==0x00 ){
5573 iOff++;
5574 nDoclist = 0;
5575 }else{
5576 continue;
5579 }else if( (pDoclist[iOff] & 0x01) ){
5580 fts5FlushSecureDelete(p, pStruct, zTerm, nTerm, iRowid);
5581 if( p->rc!=SQLITE_OK || pDoclist[iOff]==0x01 ){
5582 iOff++;
5583 continue;
5588 if( p->rc==SQLITE_OK && bTermWritten==0 ){
5589 fts5WriteAppendTerm(p, &writer, nTerm, (const u8*)zTerm);
5590 bTermWritten = 1;
5591 assert( p->rc!=SQLITE_OK || writer.bFirstRowidInPage==0 );
5594 if( writer.bFirstRowidInPage ){
5595 fts5PutU16(&pBuf->p[0], (u16)pBuf->n); /* first rowid on page */
5596 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid);
5597 writer.bFirstRowidInPage = 0;
5598 fts5WriteDlidxAppend(p, &writer, iRowid);
5599 }else{
5600 u64 iRowidDelta = (u64)iRowid - (u64)iPrev;
5601 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowidDelta);
5603 if( p->rc!=SQLITE_OK ) break;
5604 assert( pBuf->n<=pBuf->nSpace );
5605 iPrev = iRowid;
5607 if( eDetail==FTS5_DETAIL_NONE ){
5608 if( iOff<nDoclist && pDoclist[iOff]==0 ){
5609 pBuf->p[pBuf->n++] = 0;
5610 iOff++;
5611 if( iOff<nDoclist && pDoclist[iOff]==0 ){
5612 pBuf->p[pBuf->n++] = 0;
5613 iOff++;
5616 if( (pBuf->n + pPgidx->n)>=pgsz ){
5617 fts5WriteFlushLeaf(p, &writer);
5619 }else{
5620 int bDel = 0;
5621 int nPos = 0;
5622 int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDel);
5623 if( bDel && bSecureDelete ){
5624 fts5BufferAppendVarint(&p->rc, pBuf, nPos*2);
5625 iOff += nCopy;
5626 nCopy = nPos;
5627 }else{
5628 nCopy += nPos;
5630 if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
5631 /* The entire poslist will fit on the current leaf. So copy
5632 ** it in one go. */
5633 fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
5634 }else{
5635 /* The entire poslist will not fit on this leaf. So it needs
5636 ** to be broken into sections. The only qualification being
5637 ** that each varint must be stored contiguously. */
5638 const u8 *pPoslist = &pDoclist[iOff];
5639 int iPos = 0;
5640 while( p->rc==SQLITE_OK ){
5641 int nSpace = pgsz - pBuf->n - pPgidx->n;
5642 int n = 0;
5643 if( (nCopy - iPos)<=nSpace ){
5644 n = nCopy - iPos;
5645 }else{
5646 n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
5648 assert( n>0 );
5649 fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
5650 iPos += n;
5651 if( (pBuf->n + pPgidx->n)>=pgsz ){
5652 fts5WriteFlushLeaf(p, &writer);
5654 if( iPos>=nCopy ) break;
5657 iOff += nCopy;
5662 /* TODO2: Doclist terminator written here. */
5663 /* pBuf->p[pBuf->n++] = '\0'; */
5664 assert( pBuf->n<=pBuf->nSpace );
5665 if( p->rc==SQLITE_OK ) sqlite3Fts5HashScanNext(pHash);
5667 fts5WriteFinish(p, &writer, &pgnoLast);
5669 assert( p->rc!=SQLITE_OK || bSecureDelete || pgnoLast>0 );
5670 if( pgnoLast>0 ){
5671 /* Update the Fts5Structure. It is written back to the database by the
5672 ** fts5StructureRelease() call below. */
5673 if( pStruct->nLevel==0 ){
5674 fts5StructureAddLevel(&p->rc, &pStruct);
5676 fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
5677 if( p->rc==SQLITE_OK ){
5678 pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
5679 pSeg->iSegid = iSegid;
5680 pSeg->pgnoFirst = 1;
5681 pSeg->pgnoLast = pgnoLast;
5682 if( pStruct->nOriginCntr>0 ){
5683 pSeg->iOrigin1 = pStruct->nOriginCntr;
5684 pSeg->iOrigin2 = pStruct->nOriginCntr;
5685 pSeg->nEntry = p->nPendingRow;
5686 pStruct->nOriginCntr++;
5688 pStruct->nSegment++;
5690 fts5StructurePromote(p, 0, pStruct);
5695 fts5IndexAutomerge(p, &pStruct, pgnoLast + p->nContentlessDelete);
5696 fts5IndexCrisismerge(p, &pStruct);
5697 fts5StructureWrite(p, pStruct);
5698 fts5StructureRelease(pStruct);
5702 ** Flush any data stored in the in-memory hash tables to the database.
5704 static void fts5IndexFlush(Fts5Index *p){
5705 /* Unless it is empty, flush the hash table to disk */
5706 if( p->flushRc ){
5707 p->rc = p->flushRc;
5708 return;
5710 if( p->nPendingData || p->nContentlessDelete ){
5711 assert( p->pHash );
5712 fts5FlushOneHash(p);
5713 if( p->rc==SQLITE_OK ){
5714 sqlite3Fts5HashClear(p->pHash);
5715 p->nPendingData = 0;
5716 p->nPendingRow = 0;
5717 p->nContentlessDelete = 0;
5718 }else if( p->nPendingData || p->nContentlessDelete ){
5719 p->flushRc = p->rc;
5724 static Fts5Structure *fts5IndexOptimizeStruct(
5725 Fts5Index *p,
5726 Fts5Structure *pStruct
5728 Fts5Structure *pNew = 0;
5729 sqlite3_int64 nByte = sizeof(Fts5Structure);
5730 int nSeg = pStruct->nSegment;
5731 int i;
5733 /* Figure out if this structure requires optimization. A structure does
5734 ** not require optimization if either:
5736 ** 1. it consists of fewer than two segments, or
5737 ** 2. all segments are on the same level, or
5738 ** 3. all segments except one are currently inputs to a merge operation.
5740 ** In the first case, if there are no tombstone hash pages, return NULL. In
5741 ** the second, increment the ref-count on *pStruct and return a copy of the
5742 ** pointer to it.
5744 if( nSeg==0 ) return 0;
5745 for(i=0; i<pStruct->nLevel; i++){
5746 int nThis = pStruct->aLevel[i].nSeg;
5747 int nMerge = pStruct->aLevel[i].nMerge;
5748 if( nThis>0 && (nThis==nSeg || (nThis==nSeg-1 && nMerge==nThis)) ){
5749 if( nSeg==1 && nThis==1 && pStruct->aLevel[i].aSeg[0].nPgTombstone==0 ){
5750 return 0;
5752 fts5StructureRef(pStruct);
5753 return pStruct;
5755 assert( pStruct->aLevel[i].nMerge<=nThis );
5758 nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel);
5759 pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte);
5761 if( pNew ){
5762 Fts5StructureLevel *pLvl;
5763 nByte = nSeg * sizeof(Fts5StructureSegment);
5764 pNew->nLevel = MIN(pStruct->nLevel+1, FTS5_MAX_LEVEL);
5765 pNew->nRef = 1;
5766 pNew->nWriteCounter = pStruct->nWriteCounter;
5767 pNew->nOriginCntr = pStruct->nOriginCntr;
5768 pLvl = &pNew->aLevel[pNew->nLevel-1];
5769 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte);
5770 if( pLvl->aSeg ){
5771 int iLvl, iSeg;
5772 int iSegOut = 0;
5773 /* Iterate through all segments, from oldest to newest. Add them to
5774 ** the new Fts5Level object so that pLvl->aSeg[0] is the oldest
5775 ** segment in the data structure. */
5776 for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){
5777 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
5778 pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg];
5779 iSegOut++;
5782 pNew->nSegment = pLvl->nSeg = nSeg;
5783 }else{
5784 sqlite3_free(pNew);
5785 pNew = 0;
5789 return pNew;
5792 int sqlite3Fts5IndexOptimize(Fts5Index *p){
5793 Fts5Structure *pStruct;
5794 Fts5Structure *pNew = 0;
5796 assert( p->rc==SQLITE_OK );
5797 fts5IndexFlush(p);
5798 assert( p->rc!=SQLITE_OK || p->nContentlessDelete==0 );
5799 pStruct = fts5StructureRead(p);
5800 assert( p->rc!=SQLITE_OK || pStruct!=0 );
5801 fts5StructureInvalidate(p);
5803 if( pStruct ){
5804 pNew = fts5IndexOptimizeStruct(p, pStruct);
5806 fts5StructureRelease(pStruct);
5808 assert( pNew==0 || pNew->nSegment>0 );
5809 if( pNew ){
5810 int iLvl;
5811 for(iLvl=0; pNew->aLevel[iLvl].nSeg==0; iLvl++){}
5812 while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){
5813 int nRem = FTS5_OPT_WORK_UNIT;
5814 fts5IndexMergeLevel(p, &pNew, iLvl, &nRem);
5817 fts5StructureWrite(p, pNew);
5818 fts5StructureRelease(pNew);
5821 return fts5IndexReturn(p);
5825 ** This is called to implement the special "VALUES('merge', $nMerge)"
5826 ** INSERT command.
5828 int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){
5829 Fts5Structure *pStruct = 0;
5831 fts5IndexFlush(p);
5832 pStruct = fts5StructureRead(p);
5833 if( pStruct ){
5834 int nMin = p->pConfig->nUsermerge;
5835 fts5StructureInvalidate(p);
5836 if( nMerge<0 ){
5837 Fts5Structure *pNew = fts5IndexOptimizeStruct(p, pStruct);
5838 fts5StructureRelease(pStruct);
5839 pStruct = pNew;
5840 nMin = 1;
5841 nMerge = nMerge*-1;
5843 if( pStruct && pStruct->nLevel ){
5844 if( fts5IndexMerge(p, &pStruct, nMerge, nMin) ){
5845 fts5StructureWrite(p, pStruct);
5848 fts5StructureRelease(pStruct);
5850 return fts5IndexReturn(p);
5853 static void fts5AppendRowid(
5854 Fts5Index *p,
5855 u64 iDelta,
5856 Fts5Iter *pUnused,
5857 Fts5Buffer *pBuf
5859 UNUSED_PARAM(pUnused);
5860 fts5BufferAppendVarint(&p->rc, pBuf, iDelta);
5863 static void fts5AppendPoslist(
5864 Fts5Index *p,
5865 u64 iDelta,
5866 Fts5Iter *pMulti,
5867 Fts5Buffer *pBuf
5869 int nData = pMulti->base.nData;
5870 int nByte = nData + 9 + 9 + FTS5_DATA_ZERO_PADDING;
5871 assert( nData>0 );
5872 if( p->rc==SQLITE_OK && 0==fts5BufferGrow(&p->rc, pBuf, nByte) ){
5873 fts5BufferSafeAppendVarint(pBuf, iDelta);
5874 fts5BufferSafeAppendVarint(pBuf, nData*2);
5875 fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData);
5876 memset(&pBuf->p[pBuf->n], 0, FTS5_DATA_ZERO_PADDING);
5881 static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
5882 u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist;
5884 assert( pIter->aPoslist || (p==0 && pIter->aPoslist==0) );
5885 if( p>=pIter->aEof ){
5886 pIter->aPoslist = 0;
5887 }else{
5888 i64 iDelta;
5890 p += fts5GetVarint(p, (u64*)&iDelta);
5891 pIter->iRowid += iDelta;
5893 /* Read position list size */
5894 if( p[0] & 0x80 ){
5895 int nPos;
5896 pIter->nSize = fts5GetVarint32(p, nPos);
5897 pIter->nPoslist = (nPos>>1);
5898 }else{
5899 pIter->nPoslist = ((int)(p[0])) >> 1;
5900 pIter->nSize = 1;
5903 pIter->aPoslist = p;
5904 if( &pIter->aPoslist[pIter->nPoslist]>pIter->aEof ){
5905 pIter->aPoslist = 0;
5910 static void fts5DoclistIterInit(
5911 Fts5Buffer *pBuf,
5912 Fts5DoclistIter *pIter
5914 memset(pIter, 0, sizeof(*pIter));
5915 if( pBuf->n>0 ){
5916 pIter->aPoslist = pBuf->p;
5917 pIter->aEof = &pBuf->p[pBuf->n];
5918 fts5DoclistIterNext(pIter);
5922 #if 0
5924 ** Append a doclist to buffer pBuf.
5926 ** This function assumes that space within the buffer has already been
5927 ** allocated.
5929 static void fts5MergeAppendDocid(
5930 Fts5Buffer *pBuf, /* Buffer to write to */
5931 i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */
5932 i64 iRowid /* Rowid to append */
5934 assert( pBuf->n!=0 || (*piLastRowid)==0 );
5935 fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid);
5936 *piLastRowid = iRowid;
5938 #endif
5940 #define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) { \
5941 assert( (pBuf)->n!=0 || (iLastRowid)==0 ); \
5942 fts5BufferSafeAppendVarint((pBuf), (u64)(iRowid) - (u64)(iLastRowid)); \
5943 (iLastRowid) = (iRowid); \
5947 ** Swap the contents of buffer *p1 with that of *p2.
5949 static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){
5950 Fts5Buffer tmp = *p1;
5951 *p1 = *p2;
5952 *p2 = tmp;
5955 static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){
5956 int i = *piOff;
5957 if( i>=pBuf->n ){
5958 *piOff = -1;
5959 }else{
5960 u64 iVal;
5961 *piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal);
5962 *piRowid += iVal;
5967 ** This is the equivalent of fts5MergePrefixLists() for detail=none mode.
5968 ** In this case the buffers consist of a delta-encoded list of rowids only.
5970 static void fts5MergeRowidLists(
5971 Fts5Index *p, /* FTS5 backend object */
5972 Fts5Buffer *p1, /* First list to merge */
5973 int nBuf, /* Number of entries in apBuf[] */
5974 Fts5Buffer *aBuf /* Array of other lists to merge into p1 */
5976 int i1 = 0;
5977 int i2 = 0;
5978 i64 iRowid1 = 0;
5979 i64 iRowid2 = 0;
5980 i64 iOut = 0;
5981 Fts5Buffer *p2 = &aBuf[0];
5982 Fts5Buffer out;
5984 (void)nBuf;
5985 memset(&out, 0, sizeof(out));
5986 assert( nBuf==1 );
5987 sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n);
5988 if( p->rc ) return;
5990 fts5NextRowid(p1, &i1, &iRowid1);
5991 fts5NextRowid(p2, &i2, &iRowid2);
5992 while( i1>=0 || i2>=0 ){
5993 if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){
5994 assert( iOut==0 || iRowid1>iOut );
5995 fts5BufferSafeAppendVarint(&out, iRowid1 - iOut);
5996 iOut = iRowid1;
5997 fts5NextRowid(p1, &i1, &iRowid1);
5998 }else{
5999 assert( iOut==0 || iRowid2>iOut );
6000 fts5BufferSafeAppendVarint(&out, iRowid2 - iOut);
6001 iOut = iRowid2;
6002 if( i1>=0 && iRowid1==iRowid2 ){
6003 fts5NextRowid(p1, &i1, &iRowid1);
6005 fts5NextRowid(p2, &i2, &iRowid2);
6009 fts5BufferSwap(&out, p1);
6010 fts5BufferFree(&out);
6013 typedef struct PrefixMerger PrefixMerger;
6014 struct PrefixMerger {
6015 Fts5DoclistIter iter; /* Doclist iterator */
6016 i64 iPos; /* For iterating through a position list */
6017 int iOff;
6018 u8 *aPos;
6019 PrefixMerger *pNext; /* Next in docid/poslist order */
6022 static void fts5PrefixMergerInsertByRowid(
6023 PrefixMerger **ppHead,
6024 PrefixMerger *p
6026 if( p->iter.aPoslist ){
6027 PrefixMerger **pp = ppHead;
6028 while( *pp && p->iter.iRowid>(*pp)->iter.iRowid ){
6029 pp = &(*pp)->pNext;
6031 p->pNext = *pp;
6032 *pp = p;
6036 static void fts5PrefixMergerInsertByPosition(
6037 PrefixMerger **ppHead,
6038 PrefixMerger *p
6040 if( p->iPos>=0 ){
6041 PrefixMerger **pp = ppHead;
6042 while( *pp && p->iPos>(*pp)->iPos ){
6043 pp = &(*pp)->pNext;
6045 p->pNext = *pp;
6046 *pp = p;
6052 ** Array aBuf[] contains nBuf doclists. These are all merged in with the
6053 ** doclist in buffer p1.
6055 static void fts5MergePrefixLists(
6056 Fts5Index *p, /* FTS5 backend object */
6057 Fts5Buffer *p1, /* First list to merge */
6058 int nBuf, /* Number of buffers in array aBuf[] */
6059 Fts5Buffer *aBuf /* Other lists to merge in */
6061 #define fts5PrefixMergerNextPosition(p) \
6062 sqlite3Fts5PoslistNext64((p)->aPos,(p)->iter.nPoslist,&(p)->iOff,&(p)->iPos)
6063 #define FTS5_MERGE_NLIST 16
6064 PrefixMerger aMerger[FTS5_MERGE_NLIST];
6065 PrefixMerger *pHead = 0;
6066 int i;
6067 int nOut = 0;
6068 Fts5Buffer out = {0, 0, 0};
6069 Fts5Buffer tmp = {0, 0, 0};
6070 i64 iLastRowid = 0;
6072 /* Initialize a doclist-iterator for each input buffer. Arrange them in
6073 ** a linked-list starting at pHead in ascending order of rowid. Avoid
6074 ** linking any iterators already at EOF into the linked list at all. */
6075 assert( nBuf+1<=(int)(sizeof(aMerger)/sizeof(aMerger[0])) );
6076 memset(aMerger, 0, sizeof(PrefixMerger)*(nBuf+1));
6077 pHead = &aMerger[nBuf];
6078 fts5DoclistIterInit(p1, &pHead->iter);
6079 for(i=0; i<nBuf; i++){
6080 fts5DoclistIterInit(&aBuf[i], &aMerger[i].iter);
6081 fts5PrefixMergerInsertByRowid(&pHead, &aMerger[i]);
6082 nOut += aBuf[i].n;
6084 if( nOut==0 ) return;
6085 nOut += p1->n + 9 + 10*nBuf;
6087 /* The maximum size of the output is equal to the sum of the
6088 ** input sizes + 1 varint (9 bytes). The extra varint is because if the
6089 ** first rowid in one input is a large negative number, and the first in
6090 ** the other a non-negative number, the delta for the non-negative
6091 ** number will be larger on disk than the literal integer value
6092 ** was.
6094 ** Or, if the input position-lists are corrupt, then the output might
6095 ** include up to (nBuf+1) extra 10-byte positions created by interpreting -1
6096 ** (the value PoslistNext64() uses for EOF) as a position and appending
6097 ** it to the output. This can happen at most once for each input
6098 ** position-list, hence (nBuf+1) 10 byte paddings. */
6099 if( sqlite3Fts5BufferSize(&p->rc, &out, nOut) ) return;
6101 while( pHead ){
6102 fts5MergeAppendDocid(&out, iLastRowid, pHead->iter.iRowid);
6104 if( pHead->pNext && iLastRowid==pHead->pNext->iter.iRowid ){
6105 /* Merge data from two or more poslists */
6106 i64 iPrev = 0;
6107 int nTmp = FTS5_DATA_ZERO_PADDING;
6108 int nMerge = 0;
6109 PrefixMerger *pSave = pHead;
6110 PrefixMerger *pThis = 0;
6111 int nTail = 0;
6113 pHead = 0;
6114 while( pSave && pSave->iter.iRowid==iLastRowid ){
6115 PrefixMerger *pNext = pSave->pNext;
6116 pSave->iOff = 0;
6117 pSave->iPos = 0;
6118 pSave->aPos = &pSave->iter.aPoslist[pSave->iter.nSize];
6119 fts5PrefixMergerNextPosition(pSave);
6120 nTmp += pSave->iter.nPoslist + 10;
6121 nMerge++;
6122 fts5PrefixMergerInsertByPosition(&pHead, pSave);
6123 pSave = pNext;
6126 if( pHead==0 || pHead->pNext==0 ){
6127 p->rc = FTS5_CORRUPT;
6128 break;
6131 /* See the earlier comment in this function for an explanation of why
6132 ** corrupt input position lists might cause the output to consume
6133 ** at most nMerge*10 bytes of unexpected space. */
6134 if( sqlite3Fts5BufferSize(&p->rc, &tmp, nTmp+nMerge*10) ){
6135 break;
6137 fts5BufferZero(&tmp);
6139 pThis = pHead;
6140 pHead = pThis->pNext;
6141 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos);
6142 fts5PrefixMergerNextPosition(pThis);
6143 fts5PrefixMergerInsertByPosition(&pHead, pThis);
6145 while( pHead->pNext ){
6146 pThis = pHead;
6147 if( pThis->iPos!=iPrev ){
6148 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos);
6150 fts5PrefixMergerNextPosition(pThis);
6151 pHead = pThis->pNext;
6152 fts5PrefixMergerInsertByPosition(&pHead, pThis);
6155 if( pHead->iPos!=iPrev ){
6156 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pHead->iPos);
6158 nTail = pHead->iter.nPoslist - pHead->iOff;
6160 /* WRITEPOSLISTSIZE */
6161 assert_nc( tmp.n+nTail<=nTmp );
6162 assert( tmp.n+nTail<=nTmp+nMerge*10 );
6163 if( tmp.n+nTail>nTmp-FTS5_DATA_ZERO_PADDING ){
6164 if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
6165 break;
6167 fts5BufferSafeAppendVarint(&out, (tmp.n+nTail) * 2);
6168 fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n);
6169 if( nTail>0 ){
6170 fts5BufferSafeAppendBlob(&out, &pHead->aPos[pHead->iOff], nTail);
6173 pHead = pSave;
6174 for(i=0; i<nBuf+1; i++){
6175 PrefixMerger *pX = &aMerger[i];
6176 if( pX->iter.aPoslist && pX->iter.iRowid==iLastRowid ){
6177 fts5DoclistIterNext(&pX->iter);
6178 fts5PrefixMergerInsertByRowid(&pHead, pX);
6182 }else{
6183 /* Copy poslist from pHead to output */
6184 PrefixMerger *pThis = pHead;
6185 Fts5DoclistIter *pI = &pThis->iter;
6186 fts5BufferSafeAppendBlob(&out, pI->aPoslist, pI->nPoslist+pI->nSize);
6187 fts5DoclistIterNext(pI);
6188 pHead = pThis->pNext;
6189 fts5PrefixMergerInsertByRowid(&pHead, pThis);
6193 fts5BufferFree(p1);
6194 fts5BufferFree(&tmp);
6195 memset(&out.p[out.n], 0, FTS5_DATA_ZERO_PADDING);
6196 *p1 = out;
6199 static void fts5SetupPrefixIter(
6200 Fts5Index *p, /* Index to read from */
6201 int bDesc, /* True for "ORDER BY rowid DESC" */
6202 int iIdx, /* Index to scan for data */
6203 u8 *pToken, /* Buffer containing prefix to match */
6204 int nToken, /* Size of buffer pToken in bytes */
6205 Fts5Colset *pColset, /* Restrict matches to these columns */
6206 Fts5Iter **ppIter /* OUT: New iterator */
6208 Fts5Structure *pStruct;
6209 Fts5Buffer *aBuf;
6210 int nBuf = 32;
6211 int nMerge = 1;
6213 void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*);
6214 void (*xAppend)(Fts5Index*, u64, Fts5Iter*, Fts5Buffer*);
6215 if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
6216 xMerge = fts5MergeRowidLists;
6217 xAppend = fts5AppendRowid;
6218 }else{
6219 nMerge = FTS5_MERGE_NLIST-1;
6220 nBuf = nMerge*8; /* Sufficient to merge (16^8)==(2^32) lists */
6221 xMerge = fts5MergePrefixLists;
6222 xAppend = fts5AppendPoslist;
6225 aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
6226 pStruct = fts5StructureRead(p);
6227 assert( p->rc!=SQLITE_OK || (aBuf && pStruct) );
6229 if( p->rc==SQLITE_OK ){
6230 const int flags = FTS5INDEX_QUERY_SCAN
6231 | FTS5INDEX_QUERY_SKIPEMPTY
6232 | FTS5INDEX_QUERY_NOOUTPUT;
6233 int i;
6234 i64 iLastRowid = 0;
6235 Fts5Iter *p1 = 0; /* Iterator used to gather data from index */
6236 Fts5Data *pData;
6237 Fts5Buffer doclist;
6238 int bNewTerm = 1;
6240 memset(&doclist, 0, sizeof(doclist));
6242 /* If iIdx is non-zero, then it is the number of a prefix-index for
6243 ** prefixes 1 character longer than the prefix being queried for. That
6244 ** index contains all the doclists required, except for the one
6245 ** corresponding to the prefix itself. That one is extracted from the
6246 ** main term index here. */
6247 if( iIdx!=0 ){
6248 int dummy = 0;
6249 const int f2 = FTS5INDEX_QUERY_SKIPEMPTY|FTS5INDEX_QUERY_NOOUTPUT;
6250 pToken[0] = FTS5_MAIN_PREFIX;
6251 fts5MultiIterNew(p, pStruct, f2, pColset, pToken, nToken, -1, 0, &p1);
6252 fts5IterSetOutputCb(&p->rc, p1);
6253 for(;
6254 fts5MultiIterEof(p, p1)==0;
6255 fts5MultiIterNext2(p, p1, &dummy)
6257 Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
6258 p1->xSetOutputs(p1, pSeg);
6259 if( p1->base.nData ){
6260 xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist);
6261 iLastRowid = p1->base.iRowid;
6264 fts5MultiIterFree(p1);
6267 pToken[0] = FTS5_MAIN_PREFIX + iIdx;
6268 fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1);
6269 fts5IterSetOutputCb(&p->rc, p1);
6271 for( /* no-op */ ;
6272 fts5MultiIterEof(p, p1)==0;
6273 fts5MultiIterNext2(p, p1, &bNewTerm)
6275 Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
6276 int nTerm = pSeg->term.n;
6277 const u8 *pTerm = pSeg->term.p;
6278 p1->xSetOutputs(p1, pSeg);
6280 assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 );
6281 if( bNewTerm ){
6282 if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break;
6285 if( p1->base.nData==0 ) continue;
6286 if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){
6287 for(i=0; p->rc==SQLITE_OK && doclist.n; i++){
6288 int i1 = i*nMerge;
6289 int iStore;
6290 assert( i1+nMerge<=nBuf );
6291 for(iStore=i1; iStore<i1+nMerge; iStore++){
6292 if( aBuf[iStore].n==0 ){
6293 fts5BufferSwap(&doclist, &aBuf[iStore]);
6294 fts5BufferZero(&doclist);
6295 break;
6298 if( iStore==i1+nMerge ){
6299 xMerge(p, &doclist, nMerge, &aBuf[i1]);
6300 for(iStore=i1; iStore<i1+nMerge; iStore++){
6301 fts5BufferZero(&aBuf[iStore]);
6305 iLastRowid = 0;
6308 xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist);
6309 iLastRowid = p1->base.iRowid;
6312 assert( (nBuf%nMerge)==0 );
6313 for(i=0; i<nBuf; i+=nMerge){
6314 int iFree;
6315 if( p->rc==SQLITE_OK ){
6316 xMerge(p, &doclist, nMerge, &aBuf[i]);
6318 for(iFree=i; iFree<i+nMerge; iFree++){
6319 fts5BufferFree(&aBuf[iFree]);
6322 fts5MultiIterFree(p1);
6324 pData = fts5IdxMalloc(p, sizeof(*pData)+doclist.n+FTS5_DATA_ZERO_PADDING);
6325 if( pData ){
6326 pData->p = (u8*)&pData[1];
6327 pData->nn = pData->szLeaf = doclist.n;
6328 if( doclist.n ) memcpy(pData->p, doclist.p, doclist.n);
6329 fts5MultiIterNew2(p, pData, bDesc, ppIter);
6331 fts5BufferFree(&doclist);
6334 fts5StructureRelease(pStruct);
6335 sqlite3_free(aBuf);
6340 ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
6341 ** to the document with rowid iRowid.
6343 int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){
6344 assert( p->rc==SQLITE_OK );
6346 /* Allocate the hash table if it has not already been allocated */
6347 if( p->pHash==0 ){
6348 p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData);
6351 /* Flush the hash table to disk if required */
6352 if( iRowid<p->iWriteRowid
6353 || (iRowid==p->iWriteRowid && p->bDelete==0)
6354 || (p->nPendingData > p->pConfig->nHashSize)
6356 fts5IndexFlush(p);
6359 p->iWriteRowid = iRowid;
6360 p->bDelete = bDelete;
6361 if( bDelete==0 ){
6362 p->nPendingRow++;
6364 return fts5IndexReturn(p);
6368 ** Commit data to disk.
6370 int sqlite3Fts5IndexSync(Fts5Index *p){
6371 assert( p->rc==SQLITE_OK );
6372 fts5IndexFlush(p);
6373 sqlite3Fts5IndexCloseReader(p);
6374 return fts5IndexReturn(p);
6378 ** Discard any data stored in the in-memory hash tables. Do not write it
6379 ** to the database. Additionally, assume that the contents of the %_data
6380 ** table may have changed on disk. So any in-memory caches of %_data
6381 ** records must be invalidated.
6383 int sqlite3Fts5IndexRollback(Fts5Index *p){
6384 sqlite3Fts5IndexCloseReader(p);
6385 fts5IndexDiscardData(p);
6386 fts5StructureInvalidate(p);
6387 /* assert( p->rc==SQLITE_OK ); */
6388 return SQLITE_OK;
6392 ** The %_data table is completely empty when this function is called. This
6393 ** function populates it with the initial structure objects for each index,
6394 ** and the initial version of the "averages" record (a zero-byte blob).
6396 int sqlite3Fts5IndexReinit(Fts5Index *p){
6397 Fts5Structure s;
6398 fts5StructureInvalidate(p);
6399 fts5IndexDiscardData(p);
6400 memset(&s, 0, sizeof(Fts5Structure));
6401 if( p->pConfig->bContentlessDelete ){
6402 s.nOriginCntr = 1;
6404 fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0);
6405 fts5StructureWrite(p, &s);
6406 return fts5IndexReturn(p);
6410 ** Open a new Fts5Index handle. If the bCreate argument is true, create
6411 ** and initialize the underlying %_data table.
6413 ** If successful, set *pp to point to the new object and return SQLITE_OK.
6414 ** Otherwise, set *pp to NULL and return an SQLite error code.
6416 int sqlite3Fts5IndexOpen(
6417 Fts5Config *pConfig,
6418 int bCreate,
6419 Fts5Index **pp,
6420 char **pzErr
6422 int rc = SQLITE_OK;
6423 Fts5Index *p; /* New object */
6425 *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index));
6426 if( rc==SQLITE_OK ){
6427 p->pConfig = pConfig;
6428 p->nWorkUnit = FTS5_WORK_UNIT;
6429 p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName);
6430 if( p->zDataTbl && bCreate ){
6431 rc = sqlite3Fts5CreateTable(
6432 pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr
6434 if( rc==SQLITE_OK ){
6435 rc = sqlite3Fts5CreateTable(pConfig, "idx",
6436 "segid, term, pgno, PRIMARY KEY(segid, term)",
6437 1, pzErr
6440 if( rc==SQLITE_OK ){
6441 rc = sqlite3Fts5IndexReinit(p);
6446 assert( rc!=SQLITE_OK || p->rc==SQLITE_OK );
6447 if( rc ){
6448 sqlite3Fts5IndexClose(p);
6449 *pp = 0;
6451 return rc;
6455 ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen().
6457 int sqlite3Fts5IndexClose(Fts5Index *p){
6458 int rc = SQLITE_OK;
6459 if( p ){
6460 assert( p->pReader==0 );
6461 fts5StructureInvalidate(p);
6462 sqlite3_finalize(p->pWriter);
6463 sqlite3_finalize(p->pDeleter);
6464 sqlite3_finalize(p->pIdxWriter);
6465 sqlite3_finalize(p->pIdxDeleter);
6466 sqlite3_finalize(p->pIdxSelect);
6467 sqlite3_finalize(p->pIdxNextSelect);
6468 sqlite3_finalize(p->pDataVersion);
6469 sqlite3_finalize(p->pDeleteFromIdx);
6470 sqlite3Fts5HashFree(p->pHash);
6471 sqlite3_free(p->zDataTbl);
6472 sqlite3_free(p);
6474 return rc;
6478 ** Argument p points to a buffer containing utf-8 text that is n bytes in
6479 ** size. Return the number of bytes in the nChar character prefix of the
6480 ** buffer, or 0 if there are less than nChar characters in total.
6482 int sqlite3Fts5IndexCharlenToBytelen(
6483 const char *p,
6484 int nByte,
6485 int nChar
6487 int n = 0;
6488 int i;
6489 for(i=0; i<nChar; i++){
6490 if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */
6491 if( (unsigned char)p[n++]>=0xc0 ){
6492 if( n>=nByte ) return 0;
6493 while( (p[n] & 0xc0)==0x80 ){
6494 n++;
6495 if( n>=nByte ){
6496 if( i+1==nChar ) break;
6497 return 0;
6502 return n;
6506 ** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of
6507 ** unicode characters in the string.
6509 static int fts5IndexCharlen(const char *pIn, int nIn){
6510 int nChar = 0;
6511 int i = 0;
6512 while( i<nIn ){
6513 if( (unsigned char)pIn[i++]>=0xc0 ){
6514 while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++;
6516 nChar++;
6518 return nChar;
6522 ** Insert or remove data to or from the index. Each time a document is
6523 ** added to or removed from the index, this function is called one or more
6524 ** times.
6526 ** For an insert, it must be called once for each token in the new document.
6527 ** If the operation is a delete, it must be called (at least) once for each
6528 ** unique token in the document with an iCol value less than zero. The iPos
6529 ** argument is ignored for a delete.
6531 int sqlite3Fts5IndexWrite(
6532 Fts5Index *p, /* Index to write to */
6533 int iCol, /* Column token appears in (-ve -> delete) */
6534 int iPos, /* Position of token within column */
6535 const char *pToken, int nToken /* Token to add or remove to or from index */
6537 int i; /* Used to iterate through indexes */
6538 int rc = SQLITE_OK; /* Return code */
6539 Fts5Config *pConfig = p->pConfig;
6541 assert( p->rc==SQLITE_OK );
6542 assert( (iCol<0)==p->bDelete );
6544 /* Add the entry to the main terms index. */
6545 rc = sqlite3Fts5HashWrite(
6546 p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken
6549 for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){
6550 const int nChar = pConfig->aPrefix[i];
6551 int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
6552 if( nByte ){
6553 rc = sqlite3Fts5HashWrite(p->pHash,
6554 p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken,
6555 nByte
6560 return rc;
6564 ** pToken points to a buffer of size nToken bytes containing a search
6565 ** term, including the index number at the start, used on a tokendata=1
6566 ** table. This function returns true if the term in buffer pBuf matches
6567 ** token pToken/nToken.
6569 static int fts5IsTokendataPrefix(
6570 Fts5Buffer *pBuf,
6571 const u8 *pToken,
6572 int nToken
6574 return (
6575 pBuf->n>=nToken
6576 && 0==memcmp(pBuf->p, pToken, nToken)
6577 && (pBuf->n==nToken || pBuf->p[nToken]==0x00)
6582 ** Ensure the segment-iterator passed as the only argument points to EOF.
6584 static void fts5SegIterSetEOF(Fts5SegIter *pSeg){
6585 fts5DataRelease(pSeg->pLeaf);
6586 pSeg->pLeaf = 0;
6590 ** Usually, a tokendata=1 iterator (struct Fts5TokenDataIter) accumulates an
6591 ** array of these for each row it visits. Or, for an iterator used by an
6592 ** "ORDER BY rank" query, it accumulates an array of these for the entire
6593 ** query.
6595 ** Each instance in the array indicates the iterator (and therefore term)
6596 ** associated with position iPos of rowid iRowid. This is used by the
6597 ** xInstToken() API.
6599 struct Fts5TokenDataMap {
6600 i64 iRowid; /* Row this token is located in */
6601 i64 iPos; /* Position of token */
6602 int iIter; /* Iterator token was read from */
6606 ** An object used to supplement Fts5Iter for tokendata=1 iterators.
6608 struct Fts5TokenDataIter {
6609 int nIter;
6610 int nIterAlloc;
6612 int nMap;
6613 int nMapAlloc;
6614 Fts5TokenDataMap *aMap;
6616 Fts5PoslistReader *aPoslistReader;
6617 int *aPoslistToIter;
6618 Fts5Iter *apIter[1];
6622 ** This function appends iterator pAppend to Fts5TokenDataIter pIn and
6623 ** returns the result.
6625 static Fts5TokenDataIter *fts5AppendTokendataIter(
6626 Fts5Index *p, /* Index object (for error code) */
6627 Fts5TokenDataIter *pIn, /* Current Fts5TokenDataIter struct */
6628 Fts5Iter *pAppend /* Append this iterator */
6630 Fts5TokenDataIter *pRet = pIn;
6632 if( p->rc==SQLITE_OK ){
6633 if( pIn==0 || pIn->nIter==pIn->nIterAlloc ){
6634 int nAlloc = pIn ? pIn->nIterAlloc*2 : 16;
6635 int nByte = nAlloc * sizeof(Fts5Iter*) + sizeof(Fts5TokenDataIter);
6636 Fts5TokenDataIter *pNew = (Fts5TokenDataIter*)sqlite3_realloc(pIn, nByte);
6638 if( pNew==0 ){
6639 p->rc = SQLITE_NOMEM;
6640 }else{
6641 if( pIn==0 ) memset(pNew, 0, nByte);
6642 pRet = pNew;
6643 pNew->nIterAlloc = nAlloc;
6647 if( p->rc ){
6648 sqlite3Fts5IterClose((Fts5IndexIter*)pAppend);
6649 }else{
6650 pRet->apIter[pRet->nIter++] = pAppend;
6652 assert( pRet==0 || pRet->nIter<=pRet->nIterAlloc );
6654 return pRet;
6658 ** Delete an Fts5TokenDataIter structure and its contents.
6660 static void fts5TokendataIterDelete(Fts5TokenDataIter *pSet){
6661 if( pSet ){
6662 int ii;
6663 for(ii=0; ii<pSet->nIter; ii++){
6664 fts5MultiIterFree(pSet->apIter[ii]);
6666 sqlite3_free(pSet->aPoslistReader);
6667 sqlite3_free(pSet->aMap);
6668 sqlite3_free(pSet);
6673 ** Append a mapping to the token-map belonging to object pT.
6675 static void fts5TokendataIterAppendMap(
6676 Fts5Index *p,
6677 Fts5TokenDataIter *pT,
6678 int iIter,
6679 i64 iRowid,
6680 i64 iPos
6682 if( p->rc==SQLITE_OK ){
6683 if( pT->nMap==pT->nMapAlloc ){
6684 int nNew = pT->nMapAlloc ? pT->nMapAlloc*2 : 64;
6685 int nByte = nNew * sizeof(Fts5TokenDataMap);
6686 Fts5TokenDataMap *aNew;
6688 aNew = (Fts5TokenDataMap*)sqlite3_realloc(pT->aMap, nByte);
6689 if( aNew==0 ){
6690 p->rc = SQLITE_NOMEM;
6691 return;
6694 pT->aMap = aNew;
6695 pT->nMapAlloc = nNew;
6698 pT->aMap[pT->nMap].iRowid = iRowid;
6699 pT->aMap[pT->nMap].iPos = iPos;
6700 pT->aMap[pT->nMap].iIter = iIter;
6701 pT->nMap++;
6706 ** The iterator passed as the only argument must be a tokendata=1 iterator
6707 ** (pIter->pTokenDataIter!=0). This function sets the iterator output
6708 ** variables (pIter->base.*) according to the contents of the current
6709 ** row.
6711 static void fts5IterSetOutputsTokendata(Fts5Iter *pIter){
6712 int ii;
6713 int nHit = 0;
6714 i64 iRowid = SMALLEST_INT64;
6715 int iMin = 0;
6717 Fts5TokenDataIter *pT = pIter->pTokenDataIter;
6719 pIter->base.nData = 0;
6720 pIter->base.pData = 0;
6722 for(ii=0; ii<pT->nIter; ii++){
6723 Fts5Iter *p = pT->apIter[ii];
6724 if( p->base.bEof==0 ){
6725 if( nHit==0 || p->base.iRowid<iRowid ){
6726 iRowid = p->base.iRowid;
6727 nHit = 1;
6728 pIter->base.pData = p->base.pData;
6729 pIter->base.nData = p->base.nData;
6730 iMin = ii;
6731 }else if( p->base.iRowid==iRowid ){
6732 nHit++;
6737 if( nHit==0 ){
6738 pIter->base.bEof = 1;
6739 }else{
6740 int eDetail = pIter->pIndex->pConfig->eDetail;
6741 pIter->base.bEof = 0;
6742 pIter->base.iRowid = iRowid;
6744 if( nHit==1 && eDetail==FTS5_DETAIL_FULL ){
6745 fts5TokendataIterAppendMap(pIter->pIndex, pT, iMin, iRowid, -1);
6746 }else
6747 if( nHit>1 && eDetail!=FTS5_DETAIL_NONE ){
6748 int nReader = 0;
6749 int nByte = 0;
6750 i64 iPrev = 0;
6752 /* Allocate array of iterators if they are not already allocated. */
6753 if( pT->aPoslistReader==0 ){
6754 pT->aPoslistReader = (Fts5PoslistReader*)sqlite3Fts5MallocZero(
6755 &pIter->pIndex->rc,
6756 pT->nIter * (sizeof(Fts5PoslistReader) + sizeof(int))
6758 if( pT->aPoslistReader==0 ) return;
6759 pT->aPoslistToIter = (int*)&pT->aPoslistReader[pT->nIter];
6762 /* Populate an iterator for each poslist that will be merged */
6763 for(ii=0; ii<pT->nIter; ii++){
6764 Fts5Iter *p = pT->apIter[ii];
6765 if( iRowid==p->base.iRowid ){
6766 pT->aPoslistToIter[nReader] = ii;
6767 sqlite3Fts5PoslistReaderInit(
6768 p->base.pData, p->base.nData, &pT->aPoslistReader[nReader++]
6770 nByte += p->base.nData;
6774 /* Ensure the output buffer is large enough */
6775 if( fts5BufferGrow(&pIter->pIndex->rc, &pIter->poslist, nByte+nHit*10) ){
6776 return;
6779 /* Ensure the token-mapping is large enough */
6780 if( eDetail==FTS5_DETAIL_FULL && pT->nMapAlloc<(pT->nMap + nByte) ){
6781 int nNew = (pT->nMapAlloc + nByte) * 2;
6782 Fts5TokenDataMap *aNew = (Fts5TokenDataMap*)sqlite3_realloc(
6783 pT->aMap, nNew*sizeof(Fts5TokenDataMap)
6785 if( aNew==0 ){
6786 pIter->pIndex->rc = SQLITE_NOMEM;
6787 return;
6789 pT->aMap = aNew;
6790 pT->nMapAlloc = nNew;
6793 pIter->poslist.n = 0;
6795 while( 1 ){
6796 i64 iMinPos = LARGEST_INT64;
6798 /* Find smallest position */
6799 iMin = 0;
6800 for(ii=0; ii<nReader; ii++){
6801 Fts5PoslistReader *pReader = &pT->aPoslistReader[ii];
6802 if( pReader->bEof==0 ){
6803 if( pReader->iPos<iMinPos ){
6804 iMinPos = pReader->iPos;
6805 iMin = ii;
6810 /* If all readers were at EOF, break out of the loop. */
6811 if( iMinPos==LARGEST_INT64 ) break;
6813 sqlite3Fts5PoslistSafeAppend(&pIter->poslist, &iPrev, iMinPos);
6814 sqlite3Fts5PoslistReaderNext(&pT->aPoslistReader[iMin]);
6816 if( eDetail==FTS5_DETAIL_FULL ){
6817 pT->aMap[pT->nMap].iPos = iMinPos;
6818 pT->aMap[pT->nMap].iIter = pT->aPoslistToIter[iMin];
6819 pT->aMap[pT->nMap].iRowid = iRowid;
6820 pT->nMap++;
6824 pIter->base.pData = pIter->poslist.p;
6825 pIter->base.nData = pIter->poslist.n;
6831 ** The iterator passed as the only argument must be a tokendata=1 iterator
6832 ** (pIter->pTokenDataIter!=0). This function advances the iterator. If
6833 ** argument bFrom is false, then the iterator is advanced to the next
6834 ** entry. Or, if bFrom is true, it is advanced to the first entry with
6835 ** a rowid of iFrom or greater.
6837 static void fts5TokendataIterNext(Fts5Iter *pIter, int bFrom, i64 iFrom){
6838 int ii;
6839 Fts5TokenDataIter *pT = pIter->pTokenDataIter;
6840 Fts5Index *pIndex = pIter->pIndex;
6842 for(ii=0; ii<pT->nIter; ii++){
6843 Fts5Iter *p = pT->apIter[ii];
6844 if( p->base.bEof==0
6845 && (p->base.iRowid==pIter->base.iRowid || (bFrom && p->base.iRowid<iFrom))
6847 fts5MultiIterNext(pIndex, p, bFrom, iFrom);
6848 while( bFrom && p->base.bEof==0
6849 && p->base.iRowid<iFrom
6850 && pIndex->rc==SQLITE_OK
6852 fts5MultiIterNext(pIndex, p, 0, 0);
6857 if( pIndex->rc==SQLITE_OK ){
6858 fts5IterSetOutputsTokendata(pIter);
6863 ** If the segment-iterator passed as the first argument is at EOF, then
6864 ** set pIter->term to a copy of buffer pTerm.
6866 static void fts5TokendataSetTermIfEof(Fts5Iter *pIter, Fts5Buffer *pTerm){
6867 if( pIter && pIter->aSeg[0].pLeaf==0 ){
6868 fts5BufferSet(&pIter->pIndex->rc, &pIter->aSeg[0].term, pTerm->n, pTerm->p);
6873 ** This function sets up an iterator to use for a non-prefix query on a
6874 ** tokendata=1 table.
6876 static Fts5Iter *fts5SetupTokendataIter(
6877 Fts5Index *p, /* FTS index to query */
6878 const u8 *pToken, /* Buffer containing query term */
6879 int nToken, /* Size of buffer pToken in bytes */
6880 Fts5Colset *pColset /* Colset to filter on */
6882 Fts5Iter *pRet = 0;
6883 Fts5TokenDataIter *pSet = 0;
6884 Fts5Structure *pStruct = 0;
6885 const int flags = FTS5INDEX_QUERY_SCANONETERM | FTS5INDEX_QUERY_SCAN;
6887 Fts5Buffer bSeek = {0, 0, 0};
6888 Fts5Buffer *pSmall = 0;
6890 fts5IndexFlush(p);
6891 pStruct = fts5StructureRead(p);
6893 while( p->rc==SQLITE_OK ){
6894 Fts5Iter *pPrev = pSet ? pSet->apIter[pSet->nIter-1] : 0;
6895 Fts5Iter *pNew = 0;
6896 Fts5SegIter *pNewIter = 0;
6897 Fts5SegIter *pPrevIter = 0;
6899 int iLvl, iSeg, ii;
6901 pNew = fts5MultiIterAlloc(p, pStruct->nSegment);
6902 if( pSmall ){
6903 fts5BufferSet(&p->rc, &bSeek, pSmall->n, pSmall->p);
6904 fts5BufferAppendBlob(&p->rc, &bSeek, 1, (const u8*)"\0");
6905 }else{
6906 fts5BufferSet(&p->rc, &bSeek, nToken, pToken);
6908 if( p->rc ){
6909 sqlite3Fts5IterClose((Fts5IndexIter*)pNew);
6910 break;
6913 pNewIter = &pNew->aSeg[0];
6914 pPrevIter = (pPrev ? &pPrev->aSeg[0] : 0);
6915 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
6916 for(iSeg=pStruct->aLevel[iLvl].nSeg-1; iSeg>=0; iSeg--){
6917 Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
6918 int bDone = 0;
6920 if( pPrevIter ){
6921 if( fts5BufferCompare(pSmall, &pPrevIter->term) ){
6922 memcpy(pNewIter, pPrevIter, sizeof(Fts5SegIter));
6923 memset(pPrevIter, 0, sizeof(Fts5SegIter));
6924 bDone = 1;
6925 }else if( pPrevIter->iEndofDoclist>pPrevIter->pLeaf->szLeaf ){
6926 fts5SegIterNextInit(p,(const char*)bSeek.p,bSeek.n-1,pSeg,pNewIter);
6927 bDone = 1;
6931 if( bDone==0 ){
6932 fts5SegIterSeekInit(p, bSeek.p, bSeek.n, flags, pSeg, pNewIter);
6935 if( pPrevIter ){
6936 if( pPrevIter->pTombArray ){
6937 pNewIter->pTombArray = pPrevIter->pTombArray;
6938 pNewIter->pTombArray->nRef++;
6940 }else{
6941 fts5SegIterAllocTombstone(p, pNewIter);
6944 pNewIter++;
6945 if( pPrevIter ) pPrevIter++;
6946 if( p->rc ) break;
6949 fts5TokendataSetTermIfEof(pPrev, pSmall);
6951 pNew->bSkipEmpty = 1;
6952 pNew->pColset = pColset;
6953 fts5IterSetOutputCb(&p->rc, pNew);
6955 /* Loop through all segments in the new iterator. Find the smallest
6956 ** term that any segment-iterator points to. Iterator pNew will be
6957 ** used for this term. Also, set any iterator that points to a term that
6958 ** does not match pToken/nToken to point to EOF */
6959 pSmall = 0;
6960 for(ii=0; ii<pNew->nSeg; ii++){
6961 Fts5SegIter *pII = &pNew->aSeg[ii];
6962 if( 0==fts5IsTokendataPrefix(&pII->term, pToken, nToken) ){
6963 fts5SegIterSetEOF(pII);
6965 if( pII->pLeaf && (!pSmall || fts5BufferCompare(pSmall, &pII->term)>0) ){
6966 pSmall = &pII->term;
6970 /* If pSmall is still NULL at this point, then the new iterator does
6971 ** not point to any terms that match the query. So delete it and break
6972 ** out of the loop - all required iterators have been collected. */
6973 if( pSmall==0 ){
6974 sqlite3Fts5IterClose((Fts5IndexIter*)pNew);
6975 break;
6978 /* Append this iterator to the set and continue. */
6979 pSet = fts5AppendTokendataIter(p, pSet, pNew);
6982 if( p->rc==SQLITE_OK && pSet ){
6983 int ii;
6984 for(ii=0; ii<pSet->nIter; ii++){
6985 Fts5Iter *pIter = pSet->apIter[ii];
6986 int iSeg;
6987 for(iSeg=0; iSeg<pIter->nSeg; iSeg++){
6988 pIter->aSeg[iSeg].flags |= FTS5_SEGITER_ONETERM;
6990 fts5MultiIterFinishSetup(p, pIter);
6994 if( p->rc==SQLITE_OK ){
6995 pRet = fts5MultiIterAlloc(p, 0);
6997 if( pRet ){
6998 pRet->pTokenDataIter = pSet;
6999 if( pSet ){
7000 fts5IterSetOutputsTokendata(pRet);
7001 }else{
7002 pRet->base.bEof = 1;
7004 }else{
7005 fts5TokendataIterDelete(pSet);
7008 fts5StructureRelease(pStruct);
7009 fts5BufferFree(&bSeek);
7010 return pRet;
7015 ** Open a new iterator to iterate though all rowid that match the
7016 ** specified token or token prefix.
7018 int sqlite3Fts5IndexQuery(
7019 Fts5Index *p, /* FTS index to query */
7020 const char *pToken, int nToken, /* Token (or prefix) to query for */
7021 int flags, /* Mask of FTS5INDEX_QUERY_X flags */
7022 Fts5Colset *pColset, /* Match these columns only */
7023 Fts5IndexIter **ppIter /* OUT: New iterator object */
7025 Fts5Config *pConfig = p->pConfig;
7026 Fts5Iter *pRet = 0;
7027 Fts5Buffer buf = {0, 0, 0};
7029 /* If the QUERY_SCAN flag is set, all other flags must be clear. */
7030 assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN );
7032 if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){
7033 int iIdx = 0; /* Index to search */
7034 int iPrefixIdx = 0; /* +1 prefix index */
7035 int bTokendata = pConfig->bTokendata;
7036 if( nToken>0 ) memcpy(&buf.p[1], pToken, nToken);
7038 if( flags & (FTS5INDEX_QUERY_NOTOKENDATA|FTS5INDEX_QUERY_SCAN) ){
7039 bTokendata = 0;
7042 /* Figure out which index to search and set iIdx accordingly. If this
7043 ** is a prefix query for which there is no prefix index, set iIdx to
7044 ** greater than pConfig->nPrefix to indicate that the query will be
7045 ** satisfied by scanning multiple terms in the main index.
7047 ** If the QUERY_TEST_NOIDX flag was specified, then this must be a
7048 ** prefix-query. Instead of using a prefix-index (if one exists),
7049 ** evaluate the prefix query using the main FTS index. This is used
7050 ** for internal sanity checking by the integrity-check in debug
7051 ** mode only. */
7052 #ifdef SQLITE_DEBUG
7053 if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){
7054 assert( flags & FTS5INDEX_QUERY_PREFIX );
7055 iIdx = 1+pConfig->nPrefix;
7056 }else
7057 #endif
7058 if( flags & FTS5INDEX_QUERY_PREFIX ){
7059 int nChar = fts5IndexCharlen(pToken, nToken);
7060 for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){
7061 int nIdxChar = pConfig->aPrefix[iIdx-1];
7062 if( nIdxChar==nChar ) break;
7063 if( nIdxChar==nChar+1 ) iPrefixIdx = iIdx;
7067 if( bTokendata && iIdx==0 ){
7068 buf.p[0] = '0';
7069 pRet = fts5SetupTokendataIter(p, buf.p, nToken+1, pColset);
7070 }else if( iIdx<=pConfig->nPrefix ){
7071 /* Straight index lookup */
7072 Fts5Structure *pStruct = fts5StructureRead(p);
7073 buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx);
7074 if( pStruct ){
7075 fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY,
7076 pColset, buf.p, nToken+1, -1, 0, &pRet
7078 fts5StructureRelease(pStruct);
7080 }else{
7081 /* Scan multiple terms in the main index */
7082 int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0;
7083 fts5SetupPrefixIter(p, bDesc, iPrefixIdx, buf.p, nToken+1, pColset,&pRet);
7084 if( pRet==0 ){
7085 assert( p->rc!=SQLITE_OK );
7086 }else{
7087 assert( pRet->pColset==0 );
7088 fts5IterSetOutputCb(&p->rc, pRet);
7089 if( p->rc==SQLITE_OK ){
7090 Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst];
7091 if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg);
7096 if( p->rc ){
7097 sqlite3Fts5IterClose((Fts5IndexIter*)pRet);
7098 pRet = 0;
7099 sqlite3Fts5IndexCloseReader(p);
7102 *ppIter = (Fts5IndexIter*)pRet;
7103 sqlite3Fts5BufferFree(&buf);
7105 return fts5IndexReturn(p);
7109 ** Return true if the iterator passed as the only argument is at EOF.
7112 ** Move to the next matching rowid.
7114 int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){
7115 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
7116 assert( pIter->pIndex->rc==SQLITE_OK );
7117 if( pIter->pTokenDataIter ){
7118 fts5TokendataIterNext(pIter, 0, 0);
7119 }else{
7120 fts5MultiIterNext(pIter->pIndex, pIter, 0, 0);
7122 return fts5IndexReturn(pIter->pIndex);
7126 ** Move to the next matching term/rowid. Used by the fts5vocab module.
7128 int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){
7129 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
7130 Fts5Index *p = pIter->pIndex;
7132 assert( pIter->pIndex->rc==SQLITE_OK );
7134 fts5MultiIterNext(p, pIter, 0, 0);
7135 if( p->rc==SQLITE_OK ){
7136 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
7137 if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){
7138 fts5DataRelease(pSeg->pLeaf);
7139 pSeg->pLeaf = 0;
7140 pIter->base.bEof = 1;
7144 return fts5IndexReturn(pIter->pIndex);
7148 ** Move to the next matching rowid that occurs at or after iMatch. The
7149 ** definition of "at or after" depends on whether this iterator iterates
7150 ** in ascending or descending rowid order.
7152 int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){
7153 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
7154 if( pIter->pTokenDataIter ){
7155 fts5TokendataIterNext(pIter, 1, iMatch);
7156 }else{
7157 fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch);
7159 return fts5IndexReturn(pIter->pIndex);
7163 ** Return the current term.
7165 const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){
7166 int n;
7167 const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n);
7168 assert_nc( z || n<=1 );
7169 *pn = n-1;
7170 return (z ? &z[1] : 0);
7174 ** This is used by xInstToken() to access the token at offset iOff, column
7175 ** iCol of row iRowid. The token is returned via output variables *ppOut
7176 ** and *pnOut. The iterator passed as the first argument must be a tokendata=1
7177 ** iterator (pIter->pTokenDataIter!=0).
7179 int sqlite3Fts5IterToken(
7180 Fts5IndexIter *pIndexIter,
7181 i64 iRowid,
7182 int iCol,
7183 int iOff,
7184 const char **ppOut, int *pnOut
7186 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
7187 Fts5TokenDataIter *pT = pIter->pTokenDataIter;
7188 Fts5TokenDataMap *aMap = pT->aMap;
7189 i64 iPos = (((i64)iCol)<<32) + iOff;
7191 int i1 = 0;
7192 int i2 = pT->nMap;
7193 int iTest = 0;
7195 while( i2>i1 ){
7196 iTest = (i1 + i2) / 2;
7198 if( aMap[iTest].iRowid<iRowid ){
7199 i1 = iTest+1;
7200 }else if( aMap[iTest].iRowid>iRowid ){
7201 i2 = iTest;
7202 }else{
7203 if( aMap[iTest].iPos<iPos ){
7204 if( aMap[iTest].iPos<0 ){
7205 break;
7207 i1 = iTest+1;
7208 }else if( aMap[iTest].iPos>iPos ){
7209 i2 = iTest;
7210 }else{
7211 break;
7216 if( i2>i1 ){
7217 Fts5Iter *pMap = pT->apIter[aMap[iTest].iIter];
7218 *ppOut = (const char*)pMap->aSeg[0].term.p+1;
7219 *pnOut = pMap->aSeg[0].term.n-1;
7222 return SQLITE_OK;
7226 ** Clear any existing entries from the token-map associated with the
7227 ** iterator passed as the only argument.
7229 void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter *pIndexIter){
7230 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
7231 if( pIter && pIter->pTokenDataIter ){
7232 pIter->pTokenDataIter->nMap = 0;
7237 ** Set a token-mapping for the iterator passed as the first argument. This
7238 ** is used in detail=column or detail=none mode when a token is requested
7239 ** using the xInstToken() API. In this case the caller tokenizers the
7240 ** current row and configures the token-mapping via multiple calls to this
7241 ** function.
7243 int sqlite3Fts5IndexIterWriteTokendata(
7244 Fts5IndexIter *pIndexIter,
7245 const char *pToken, int nToken,
7246 i64 iRowid, int iCol, int iOff
7248 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
7249 Fts5TokenDataIter *pT = pIter->pTokenDataIter;
7250 Fts5Index *p = pIter->pIndex;
7251 int ii;
7253 assert( p->pConfig->eDetail!=FTS5_DETAIL_FULL );
7254 assert( pIter->pTokenDataIter );
7256 for(ii=0; ii<pT->nIter; ii++){
7257 Fts5Buffer *pTerm = &pT->apIter[ii]->aSeg[0].term;
7258 if( nToken==pTerm->n-1 && memcmp(pToken, pTerm->p+1, nToken)==0 ) break;
7260 if( ii<pT->nIter ){
7261 fts5TokendataIterAppendMap(p, pT, ii, iRowid, (((i64)iCol)<<32) + iOff);
7263 return fts5IndexReturn(p);
7267 ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
7269 void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){
7270 if( pIndexIter ){
7271 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
7272 Fts5Index *pIndex = pIter->pIndex;
7273 fts5TokendataIterDelete(pIter->pTokenDataIter);
7274 fts5MultiIterFree(pIter);
7275 sqlite3Fts5IndexCloseReader(pIndex);
7280 ** Read and decode the "averages" record from the database.
7282 ** Parameter anSize must point to an array of size nCol, where nCol is
7283 ** the number of user defined columns in the FTS table.
7285 int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){
7286 int nCol = p->pConfig->nCol;
7287 Fts5Data *pData;
7289 *pnRow = 0;
7290 memset(anSize, 0, sizeof(i64) * nCol);
7291 pData = fts5DataRead(p, FTS5_AVERAGES_ROWID);
7292 if( p->rc==SQLITE_OK && pData->nn ){
7293 int i = 0;
7294 int iCol;
7295 i += fts5GetVarint(&pData->p[i], (u64*)pnRow);
7296 for(iCol=0; i<pData->nn && iCol<nCol; iCol++){
7297 i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]);
7301 fts5DataRelease(pData);
7302 return fts5IndexReturn(p);
7306 ** Replace the current "averages" record with the contents of the buffer
7307 ** supplied as the second argument.
7309 int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){
7310 assert( p->rc==SQLITE_OK );
7311 fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData);
7312 return fts5IndexReturn(p);
7316 ** Return the total number of blocks this module has read from the %_data
7317 ** table since it was created.
7319 int sqlite3Fts5IndexReads(Fts5Index *p){
7320 return p->nRead;
7324 ** Set the 32-bit cookie value stored at the start of all structure
7325 ** records to the value passed as the second argument.
7327 ** Return SQLITE_OK if successful, or an SQLite error code if an error
7328 ** occurs.
7330 int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){
7331 int rc; /* Return code */
7332 Fts5Config *pConfig = p->pConfig; /* Configuration object */
7333 u8 aCookie[4]; /* Binary representation of iNew */
7334 sqlite3_blob *pBlob = 0;
7336 assert( p->rc==SQLITE_OK );
7337 sqlite3Fts5Put32(aCookie, iNew);
7339 rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl,
7340 "block", FTS5_STRUCTURE_ROWID, 1, &pBlob
7342 if( rc==SQLITE_OK ){
7343 sqlite3_blob_write(pBlob, aCookie, 4, 0);
7344 rc = sqlite3_blob_close(pBlob);
7347 return rc;
7350 int sqlite3Fts5IndexLoadConfig(Fts5Index *p){
7351 Fts5Structure *pStruct;
7352 pStruct = fts5StructureRead(p);
7353 fts5StructureRelease(pStruct);
7354 return fts5IndexReturn(p);
7358 ** Retrieve the origin value that will be used for the segment currently
7359 ** being accumulated in the in-memory hash table when it is flushed to
7360 ** disk. If successful, SQLITE_OK is returned and (*piOrigin) set to
7361 ** the queried value. Or, if an error occurs, an error code is returned
7362 ** and the final value of (*piOrigin) is undefined.
7364 int sqlite3Fts5IndexGetOrigin(Fts5Index *p, i64 *piOrigin){
7365 Fts5Structure *pStruct;
7366 pStruct = fts5StructureRead(p);
7367 if( pStruct ){
7368 *piOrigin = pStruct->nOriginCntr;
7369 fts5StructureRelease(pStruct);
7371 return fts5IndexReturn(p);
7375 ** Buffer pPg contains a page of a tombstone hash table - one of nPg pages
7376 ** associated with the same segment. This function adds rowid iRowid to
7377 ** the hash table. The caller is required to guarantee that there is at
7378 ** least one free slot on the page.
7380 ** If parameter bForce is false and the hash table is deemed to be full
7381 ** (more than half of the slots are occupied), then non-zero is returned
7382 ** and iRowid not inserted. Or, if bForce is true or if the hash table page
7383 ** is not full, iRowid is inserted and zero returned.
7385 static int fts5IndexTombstoneAddToPage(
7386 Fts5Data *pPg,
7387 int bForce,
7388 int nPg,
7389 u64 iRowid
7391 const int szKey = TOMBSTONE_KEYSIZE(pPg);
7392 const int nSlot = TOMBSTONE_NSLOT(pPg);
7393 const int nElem = fts5GetU32(&pPg->p[4]);
7394 int iSlot = (iRowid / nPg) % nSlot;
7395 int nCollide = nSlot;
7397 if( szKey==4 && iRowid>0xFFFFFFFF ) return 2;
7398 if( iRowid==0 ){
7399 pPg->p[1] = 0x01;
7400 return 0;
7403 if( bForce==0 && nElem>=(nSlot/2) ){
7404 return 1;
7407 fts5PutU32(&pPg->p[4], nElem+1);
7408 if( szKey==4 ){
7409 u32 *aSlot = (u32*)&pPg->p[8];
7410 while( aSlot[iSlot] ){
7411 iSlot = (iSlot + 1) % nSlot;
7412 if( nCollide--==0 ) return 0;
7414 fts5PutU32((u8*)&aSlot[iSlot], (u32)iRowid);
7415 }else{
7416 u64 *aSlot = (u64*)&pPg->p[8];
7417 while( aSlot[iSlot] ){
7418 iSlot = (iSlot + 1) % nSlot;
7419 if( nCollide--==0 ) return 0;
7421 fts5PutU64((u8*)&aSlot[iSlot], iRowid);
7424 return 0;
7428 ** This function attempts to build a new hash containing all the keys
7429 ** currently in the tombstone hash table for segment pSeg. The new
7430 ** hash will be stored in the nOut buffers passed in array apOut[].
7431 ** All pages of the new hash use key-size szKey (4 or 8).
7433 ** Return 0 if the hash is successfully rebuilt into the nOut pages.
7434 ** Or non-zero if it is not (because one page became overfull). In this
7435 ** case the caller should retry with a larger nOut parameter.
7437 ** Parameter pData1 is page iPg1 of the hash table being rebuilt.
7439 static int fts5IndexTombstoneRehash(
7440 Fts5Index *p,
7441 Fts5StructureSegment *pSeg, /* Segment to rebuild hash of */
7442 Fts5Data *pData1, /* One page of current hash - or NULL */
7443 int iPg1, /* Which page of the current hash is pData1 */
7444 int szKey, /* 4 or 8, the keysize */
7445 int nOut, /* Number of output pages */
7446 Fts5Data **apOut /* Array of output hash pages */
7448 int ii;
7449 int res = 0;
7451 /* Initialize the headers of all the output pages */
7452 for(ii=0; ii<nOut; ii++){
7453 apOut[ii]->p[0] = szKey;
7454 fts5PutU32(&apOut[ii]->p[4], 0);
7457 /* Loop through the current pages of the hash table. */
7458 for(ii=0; res==0 && ii<pSeg->nPgTombstone; ii++){
7459 Fts5Data *pData = 0; /* Page ii of the current hash table */
7460 Fts5Data *pFree = 0; /* Free this at the end of the loop */
7462 if( iPg1==ii ){
7463 pData = pData1;
7464 }else{
7465 pFree = pData = fts5DataRead(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid, ii));
7468 if( pData ){
7469 int szKeyIn = TOMBSTONE_KEYSIZE(pData);
7470 int nSlotIn = (pData->nn - 8) / szKeyIn;
7471 int iIn;
7472 for(iIn=0; iIn<nSlotIn; iIn++){
7473 u64 iVal = 0;
7475 /* Read the value from slot iIn of the input page into iVal. */
7476 if( szKeyIn==4 ){
7477 u32 *aSlot = (u32*)&pData->p[8];
7478 if( aSlot[iIn] ) iVal = fts5GetU32((u8*)&aSlot[iIn]);
7479 }else{
7480 u64 *aSlot = (u64*)&pData->p[8];
7481 if( aSlot[iIn] ) iVal = fts5GetU64((u8*)&aSlot[iIn]);
7484 /* If iVal is not 0 at this point, insert it into the new hash table */
7485 if( iVal ){
7486 Fts5Data *pPg = apOut[(iVal % nOut)];
7487 res = fts5IndexTombstoneAddToPage(pPg, 0, nOut, iVal);
7488 if( res ) break;
7492 /* If this is page 0 of the old hash, copy the rowid-0-flag from the
7493 ** old hash to the new. */
7494 if( ii==0 ){
7495 apOut[0]->p[1] = pData->p[1];
7498 fts5DataRelease(pFree);
7501 return res;
7505 ** This is called to rebuild the hash table belonging to segment pSeg.
7506 ** If parameter pData1 is not NULL, then one page of the existing hash table
7507 ** has already been loaded - pData1, which is page iPg1. The key-size for
7508 ** the new hash table is szKey (4 or 8).
7510 ** If successful, the new hash table is not written to disk. Instead,
7511 ** output parameter (*pnOut) is set to the number of pages in the new
7512 ** hash table, and (*papOut) to point to an array of buffers containing
7513 ** the new page data.
7515 ** If an error occurs, an error code is left in the Fts5Index object and
7516 ** both output parameters set to 0 before returning.
7518 static void fts5IndexTombstoneRebuild(
7519 Fts5Index *p,
7520 Fts5StructureSegment *pSeg, /* Segment to rebuild hash of */
7521 Fts5Data *pData1, /* One page of current hash - or NULL */
7522 int iPg1, /* Which page of the current hash is pData1 */
7523 int szKey, /* 4 or 8, the keysize */
7524 int *pnOut, /* OUT: Number of output pages */
7525 Fts5Data ***papOut /* OUT: Output hash pages */
7527 const int MINSLOT = 32;
7528 int nSlotPerPage = MAX(MINSLOT, (p->pConfig->pgsz - 8) / szKey);
7529 int nSlot = 0; /* Number of slots in each output page */
7530 int nOut = 0;
7532 /* Figure out how many output pages (nOut) and how many slots per
7533 ** page (nSlot). There are three possibilities:
7535 ** 1. The hash table does not yet exist. In this case the new hash
7536 ** table will consist of a single page with MINSLOT slots.
7538 ** 2. The hash table exists but is currently a single page. In this
7539 ** case an attempt is made to grow the page to accommodate the new
7540 ** entry. The page is allowed to grow up to nSlotPerPage (see above)
7541 ** slots.
7543 ** 3. The hash table already consists of more than one page, or of
7544 ** a single page already so large that it cannot be grown. In this
7545 ** case the new hash consists of (nPg*2+1) pages of nSlotPerPage
7546 ** slots each, where nPg is the current number of pages in the
7547 ** hash table.
7549 if( pSeg->nPgTombstone==0 ){
7550 /* Case 1. */
7551 nOut = 1;
7552 nSlot = MINSLOT;
7553 }else if( pSeg->nPgTombstone==1 ){
7554 /* Case 2. */
7555 int nElem = (int)fts5GetU32(&pData1->p[4]);
7556 assert( pData1 && iPg1==0 );
7557 nOut = 1;
7558 nSlot = MAX(nElem*4, MINSLOT);
7559 if( nSlot>nSlotPerPage ) nOut = 0;
7561 if( nOut==0 ){
7562 /* Case 3. */
7563 nOut = (pSeg->nPgTombstone * 2 + 1);
7564 nSlot = nSlotPerPage;
7567 /* Allocate the required array and output pages */
7568 while( 1 ){
7569 int res = 0;
7570 int ii = 0;
7571 int szPage = 0;
7572 Fts5Data **apOut = 0;
7574 /* Allocate space for the new hash table */
7575 assert( nSlot>=MINSLOT );
7576 apOut = (Fts5Data**)sqlite3Fts5MallocZero(&p->rc, sizeof(Fts5Data*) * nOut);
7577 szPage = 8 + nSlot*szKey;
7578 for(ii=0; ii<nOut; ii++){
7579 Fts5Data *pNew = (Fts5Data*)sqlite3Fts5MallocZero(&p->rc,
7580 sizeof(Fts5Data)+szPage
7582 if( pNew ){
7583 pNew->nn = szPage;
7584 pNew->p = (u8*)&pNew[1];
7585 apOut[ii] = pNew;
7589 /* Rebuild the hash table. */
7590 if( p->rc==SQLITE_OK ){
7591 res = fts5IndexTombstoneRehash(p, pSeg, pData1, iPg1, szKey, nOut, apOut);
7593 if( res==0 ){
7594 if( p->rc ){
7595 fts5IndexFreeArray(apOut, nOut);
7596 apOut = 0;
7597 nOut = 0;
7599 *pnOut = nOut;
7600 *papOut = apOut;
7601 break;
7604 /* If control flows to here, it was not possible to rebuild the hash
7605 ** table. Free all buffers and then try again with more pages. */
7606 assert( p->rc==SQLITE_OK );
7607 fts5IndexFreeArray(apOut, nOut);
7608 nSlot = nSlotPerPage;
7609 nOut = nOut*2 + 1;
7615 ** Add a tombstone for rowid iRowid to segment pSeg.
7617 static void fts5IndexTombstoneAdd(
7618 Fts5Index *p,
7619 Fts5StructureSegment *pSeg,
7620 u64 iRowid
7622 Fts5Data *pPg = 0;
7623 int iPg = -1;
7624 int szKey = 0;
7625 int nHash = 0;
7626 Fts5Data **apHash = 0;
7628 p->nContentlessDelete++;
7630 if( pSeg->nPgTombstone>0 ){
7631 iPg = iRowid % pSeg->nPgTombstone;
7632 pPg = fts5DataRead(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid,iPg));
7633 if( pPg==0 ){
7634 assert( p->rc!=SQLITE_OK );
7635 return;
7638 if( 0==fts5IndexTombstoneAddToPage(pPg, 0, pSeg->nPgTombstone, iRowid) ){
7639 fts5DataWrite(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid,iPg), pPg->p, pPg->nn);
7640 fts5DataRelease(pPg);
7641 return;
7645 /* Have to rebuild the hash table. First figure out the key-size (4 or 8). */
7646 szKey = pPg ? TOMBSTONE_KEYSIZE(pPg) : 4;
7647 if( iRowid>0xFFFFFFFF ) szKey = 8;
7649 /* Rebuild the hash table */
7650 fts5IndexTombstoneRebuild(p, pSeg, pPg, iPg, szKey, &nHash, &apHash);
7651 assert( p->rc==SQLITE_OK || (nHash==0 && apHash==0) );
7653 /* If all has succeeded, write the new rowid into one of the new hash
7654 ** table pages, then write them all out to disk. */
7655 if( nHash ){
7656 int ii = 0;
7657 fts5IndexTombstoneAddToPage(apHash[iRowid % nHash], 1, nHash, iRowid);
7658 for(ii=0; ii<nHash; ii++){
7659 i64 iTombstoneRowid = FTS5_TOMBSTONE_ROWID(pSeg->iSegid, ii);
7660 fts5DataWrite(p, iTombstoneRowid, apHash[ii]->p, apHash[ii]->nn);
7662 pSeg->nPgTombstone = nHash;
7663 fts5StructureWrite(p, p->pStruct);
7666 fts5DataRelease(pPg);
7667 fts5IndexFreeArray(apHash, nHash);
7671 ** Add iRowid to the tombstone list of the segment or segments that contain
7672 ** rows from origin iOrigin. Return SQLITE_OK if successful, or an SQLite
7673 ** error code otherwise.
7675 int sqlite3Fts5IndexContentlessDelete(Fts5Index *p, i64 iOrigin, i64 iRowid){
7676 Fts5Structure *pStruct;
7677 pStruct = fts5StructureRead(p);
7678 if( pStruct ){
7679 int bFound = 0; /* True after pSeg->nEntryTombstone incr. */
7680 int iLvl;
7681 for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){
7682 int iSeg;
7683 for(iSeg=pStruct->aLevel[iLvl].nSeg-1; iSeg>=0; iSeg--){
7684 Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
7685 if( pSeg->iOrigin1<=(u64)iOrigin && pSeg->iOrigin2>=(u64)iOrigin ){
7686 if( bFound==0 ){
7687 pSeg->nEntryTombstone++;
7688 bFound = 1;
7690 fts5IndexTombstoneAdd(p, pSeg, iRowid);
7694 fts5StructureRelease(pStruct);
7696 return fts5IndexReturn(p);
7699 /*************************************************************************
7700 **************************************************************************
7701 ** Below this point is the implementation of the integrity-check
7702 ** functionality.
7706 ** Return a simple checksum value based on the arguments.
7708 u64 sqlite3Fts5IndexEntryCksum(
7709 i64 iRowid,
7710 int iCol,
7711 int iPos,
7712 int iIdx,
7713 const char *pTerm,
7714 int nTerm
7716 int i;
7717 u64 ret = iRowid;
7718 ret += (ret<<3) + iCol;
7719 ret += (ret<<3) + iPos;
7720 if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX + iIdx);
7721 for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i];
7722 return ret;
7725 #ifdef SQLITE_DEBUG
7727 ** This function is purely an internal test. It does not contribute to
7728 ** FTS functionality, or even the integrity-check, in any way.
7730 ** Instead, it tests that the same set of pgno/rowid combinations are
7731 ** visited regardless of whether the doclist-index identified by parameters
7732 ** iSegid/iLeaf is iterated in forwards or reverse order.
7734 static void fts5TestDlidxReverse(
7735 Fts5Index *p,
7736 int iSegid, /* Segment id to load from */
7737 int iLeaf /* Load doclist-index for this leaf */
7739 Fts5DlidxIter *pDlidx = 0;
7740 u64 cksum1 = 13;
7741 u64 cksum2 = 13;
7743 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf);
7744 fts5DlidxIterEof(p, pDlidx)==0;
7745 fts5DlidxIterNext(p, pDlidx)
7747 i64 iRowid = fts5DlidxIterRowid(pDlidx);
7748 int pgno = fts5DlidxIterPgno(pDlidx);
7749 assert( pgno>iLeaf );
7750 cksum1 += iRowid + ((i64)pgno<<32);
7752 fts5DlidxIterFree(pDlidx);
7753 pDlidx = 0;
7755 for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf);
7756 fts5DlidxIterEof(p, pDlidx)==0;
7757 fts5DlidxIterPrev(p, pDlidx)
7759 i64 iRowid = fts5DlidxIterRowid(pDlidx);
7760 int pgno = fts5DlidxIterPgno(pDlidx);
7761 assert( fts5DlidxIterPgno(pDlidx)>iLeaf );
7762 cksum2 += iRowid + ((i64)pgno<<32);
7764 fts5DlidxIterFree(pDlidx);
7765 pDlidx = 0;
7767 if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT;
7770 static int fts5QueryCksum(
7771 Fts5Index *p, /* Fts5 index object */
7772 int iIdx,
7773 const char *z, /* Index key to query for */
7774 int n, /* Size of index key in bytes */
7775 int flags, /* Flags for Fts5IndexQuery */
7776 u64 *pCksum /* IN/OUT: Checksum value */
7778 int eDetail = p->pConfig->eDetail;
7779 u64 cksum = *pCksum;
7780 Fts5IndexIter *pIter = 0;
7781 int rc = sqlite3Fts5IndexQuery(
7782 p, z, n, (flags | FTS5INDEX_QUERY_NOTOKENDATA), 0, &pIter
7785 while( rc==SQLITE_OK && ALWAYS(pIter!=0) && 0==sqlite3Fts5IterEof(pIter) ){
7786 i64 rowid = pIter->iRowid;
7788 if( eDetail==FTS5_DETAIL_NONE ){
7789 cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n);
7790 }else{
7791 Fts5PoslistReader sReader;
7792 for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader);
7793 sReader.bEof==0;
7794 sqlite3Fts5PoslistReaderNext(&sReader)
7796 int iCol = FTS5_POS2COLUMN(sReader.iPos);
7797 int iOff = FTS5_POS2OFFSET(sReader.iPos);
7798 cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
7801 if( rc==SQLITE_OK ){
7802 rc = sqlite3Fts5IterNext(pIter);
7805 sqlite3Fts5IterClose(pIter);
7807 *pCksum = cksum;
7808 return rc;
7812 ** Check if buffer z[], size n bytes, contains as series of valid utf-8
7813 ** encoded codepoints. If so, return 0. Otherwise, if the buffer does not
7814 ** contain valid utf-8, return non-zero.
7816 static int fts5TestUtf8(const char *z, int n){
7817 int i = 0;
7818 assert_nc( n>0 );
7819 while( i<n ){
7820 if( (z[i] & 0x80)==0x00 ){
7821 i++;
7822 }else
7823 if( (z[i] & 0xE0)==0xC0 ){
7824 if( i+1>=n || (z[i+1] & 0xC0)!=0x80 ) return 1;
7825 i += 2;
7826 }else
7827 if( (z[i] & 0xF0)==0xE0 ){
7828 if( i+2>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1;
7829 i += 3;
7830 }else
7831 if( (z[i] & 0xF8)==0xF0 ){
7832 if( i+3>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1;
7833 if( (z[i+2] & 0xC0)!=0x80 ) return 1;
7834 i += 3;
7835 }else{
7836 return 1;
7840 return 0;
7844 ** This function is also purely an internal test. It does not contribute to
7845 ** FTS functionality, or even the integrity-check, in any way.
7847 static void fts5TestTerm(
7848 Fts5Index *p,
7849 Fts5Buffer *pPrev, /* Previous term */
7850 const char *z, int n, /* Possibly new term to test */
7851 u64 expected,
7852 u64 *pCksum
7854 int rc = p->rc;
7855 if( pPrev->n==0 ){
7856 fts5BufferSet(&rc, pPrev, n, (const u8*)z);
7857 }else
7858 if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){
7859 u64 cksum3 = *pCksum;
7860 const char *zTerm = (const char*)&pPrev->p[1]; /* term sans prefix-byte */
7861 int nTerm = pPrev->n-1; /* Size of zTerm in bytes */
7862 int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX);
7863 int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX);
7864 u64 ck1 = 0;
7865 u64 ck2 = 0;
7867 /* Check that the results returned for ASC and DESC queries are
7868 ** the same. If not, call this corruption. */
7869 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1);
7870 if( rc==SQLITE_OK ){
7871 int f = flags|FTS5INDEX_QUERY_DESC;
7872 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
7874 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
7876 /* If this is a prefix query, check that the results returned if the
7877 ** the index is disabled are the same. In both ASC and DESC order.
7879 ** This check may only be performed if the hash table is empty. This
7880 ** is because the hash table only supports a single scan query at
7881 ** a time, and the multi-iter loop from which this function is called
7882 ** is already performing such a scan.
7884 ** Also only do this if buffer zTerm contains nTerm bytes of valid
7885 ** utf-8. Otherwise, the last part of the buffer contents might contain
7886 ** a non-utf-8 sequence that happens to be a prefix of a valid utf-8
7887 ** character stored in the main fts index, which will cause the
7888 ** test to fail. */
7889 if( p->nPendingData==0 && 0==fts5TestUtf8(zTerm, nTerm) ){
7890 if( iIdx>0 && rc==SQLITE_OK ){
7891 int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
7892 ck2 = 0;
7893 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
7894 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
7896 if( iIdx>0 && rc==SQLITE_OK ){
7897 int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC;
7898 ck2 = 0;
7899 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
7900 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
7904 cksum3 ^= ck1;
7905 fts5BufferSet(&rc, pPrev, n, (const u8*)z);
7907 if( rc==SQLITE_OK && cksum3!=expected ){
7908 rc = FTS5_CORRUPT;
7910 *pCksum = cksum3;
7912 p->rc = rc;
7915 #else
7916 # define fts5TestDlidxReverse(x,y,z)
7917 # define fts5TestTerm(u,v,w,x,y,z)
7918 #endif
7921 ** Check that:
7923 ** 1) All leaves of pSeg between iFirst and iLast (inclusive) exist and
7924 ** contain zero terms.
7925 ** 2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and
7926 ** contain zero rowids.
7928 static void fts5IndexIntegrityCheckEmpty(
7929 Fts5Index *p,
7930 Fts5StructureSegment *pSeg, /* Segment to check internal consistency */
7931 int iFirst,
7932 int iNoRowid,
7933 int iLast
7935 int i;
7937 /* Now check that the iter.nEmpty leaves following the current leaf
7938 ** (a) exist and (b) contain no terms. */
7939 for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){
7940 Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i));
7941 if( pLeaf ){
7942 if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT;
7943 if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT;
7945 fts5DataRelease(pLeaf);
7949 static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){
7950 i64 iTermOff = 0;
7951 int ii;
7953 Fts5Buffer buf1 = {0,0,0};
7954 Fts5Buffer buf2 = {0,0,0};
7956 ii = pLeaf->szLeaf;
7957 while( ii<pLeaf->nn && p->rc==SQLITE_OK ){
7958 int res;
7959 i64 iOff;
7960 int nIncr;
7962 ii += fts5GetVarint32(&pLeaf->p[ii], nIncr);
7963 iTermOff += nIncr;
7964 iOff = iTermOff;
7966 if( iOff>=pLeaf->szLeaf ){
7967 p->rc = FTS5_CORRUPT;
7968 }else if( iTermOff==nIncr ){
7969 int nByte;
7970 iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
7971 if( (iOff+nByte)>pLeaf->szLeaf ){
7972 p->rc = FTS5_CORRUPT;
7973 }else{
7974 fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
7976 }else{
7977 int nKeep, nByte;
7978 iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep);
7979 iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
7980 if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){
7981 p->rc = FTS5_CORRUPT;
7982 }else{
7983 buf1.n = nKeep;
7984 fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
7987 if( p->rc==SQLITE_OK ){
7988 res = fts5BufferCompare(&buf1, &buf2);
7989 if( res<=0 ) p->rc = FTS5_CORRUPT;
7992 fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p);
7995 fts5BufferFree(&buf1);
7996 fts5BufferFree(&buf2);
7999 static void fts5IndexIntegrityCheckSegment(
8000 Fts5Index *p, /* FTS5 backend object */
8001 Fts5StructureSegment *pSeg /* Segment to check internal consistency */
8003 Fts5Config *pConfig = p->pConfig;
8004 int bSecureDelete = (pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE);
8005 sqlite3_stmt *pStmt = 0;
8006 int rc2;
8007 int iIdxPrevLeaf = pSeg->pgnoFirst-1;
8008 int iDlidxPrevLeaf = pSeg->pgnoLast;
8010 if( pSeg->pgnoFirst==0 ) return;
8012 fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf(
8013 "SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d "
8014 "ORDER BY 1, 2",
8015 pConfig->zDb, pConfig->zName, pSeg->iSegid
8018 /* Iterate through the b-tree hierarchy. */
8019 while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
8020 i64 iRow; /* Rowid for this leaf */
8021 Fts5Data *pLeaf; /* Data for this leaf */
8023 const char *zIdxTerm = (const char*)sqlite3_column_blob(pStmt, 1);
8024 int nIdxTerm = sqlite3_column_bytes(pStmt, 1);
8025 int iIdxLeaf = sqlite3_column_int(pStmt, 2);
8026 int bIdxDlidx = sqlite3_column_int(pStmt, 3);
8028 /* If the leaf in question has already been trimmed from the segment,
8029 ** ignore this b-tree entry. Otherwise, load it into memory. */
8030 if( iIdxLeaf<pSeg->pgnoFirst ) continue;
8031 iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf);
8032 pLeaf = fts5LeafRead(p, iRow);
8033 if( pLeaf==0 ) break;
8035 /* Check that the leaf contains at least one term, and that it is equal
8036 ** to or larger than the split-key in zIdxTerm. Also check that if there
8037 ** is also a rowid pointer within the leaf page header, it points to a
8038 ** location before the term. */
8039 if( pLeaf->nn<=pLeaf->szLeaf ){
8041 if( nIdxTerm==0
8042 && pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE
8043 && pLeaf->nn==pLeaf->szLeaf
8044 && pLeaf->nn==4
8046 /* special case - the very first page in a segment keeps its %_idx
8047 ** entry even if all the terms are removed from it by secure-delete
8048 ** operations. */
8049 }else{
8050 p->rc = FTS5_CORRUPT;
8053 }else{
8054 int iOff; /* Offset of first term on leaf */
8055 int iRowidOff; /* Offset of first rowid on leaf */
8056 int nTerm; /* Size of term on leaf in bytes */
8057 int res; /* Comparison of term and split-key */
8059 iOff = fts5LeafFirstTermOff(pLeaf);
8060 iRowidOff = fts5LeafFirstRowidOff(pLeaf);
8061 if( iRowidOff>=iOff || iOff>=pLeaf->szLeaf ){
8062 p->rc = FTS5_CORRUPT;
8063 }else{
8064 iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm);
8065 res = fts5Memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm));
8066 if( res==0 ) res = nTerm - nIdxTerm;
8067 if( res<0 ) p->rc = FTS5_CORRUPT;
8070 fts5IntegrityCheckPgidx(p, pLeaf);
8072 fts5DataRelease(pLeaf);
8073 if( p->rc ) break;
8075 /* Now check that the iter.nEmpty leaves following the current leaf
8076 ** (a) exist and (b) contain no terms. */
8077 fts5IndexIntegrityCheckEmpty(
8078 p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1
8080 if( p->rc ) break;
8082 /* If there is a doclist-index, check that it looks right. */
8083 if( bIdxDlidx ){
8084 Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */
8085 int iPrevLeaf = iIdxLeaf;
8086 int iSegid = pSeg->iSegid;
8087 int iPg = 0;
8088 i64 iKey;
8090 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf);
8091 fts5DlidxIterEof(p, pDlidx)==0;
8092 fts5DlidxIterNext(p, pDlidx)
8095 /* Check any rowid-less pages that occur before the current leaf. */
8096 for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
8097 iKey = FTS5_SEGMENT_ROWID(iSegid, iPg);
8098 pLeaf = fts5DataRead(p, iKey);
8099 if( pLeaf ){
8100 if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT;
8101 fts5DataRelease(pLeaf);
8104 iPrevLeaf = fts5DlidxIterPgno(pDlidx);
8106 /* Check that the leaf page indicated by the iterator really does
8107 ** contain the rowid suggested by the same. */
8108 iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf);
8109 pLeaf = fts5DataRead(p, iKey);
8110 if( pLeaf ){
8111 i64 iRowid;
8112 int iRowidOff = fts5LeafFirstRowidOff(pLeaf);
8113 ASSERT_SZLEAF_OK(pLeaf);
8114 if( iRowidOff>=pLeaf->szLeaf ){
8115 p->rc = FTS5_CORRUPT;
8116 }else if( bSecureDelete==0 || iRowidOff>0 ){
8117 i64 iDlRowid = fts5DlidxIterRowid(pDlidx);
8118 fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
8119 if( iRowid<iDlRowid || (bSecureDelete==0 && iRowid!=iDlRowid) ){
8120 p->rc = FTS5_CORRUPT;
8123 fts5DataRelease(pLeaf);
8127 iDlidxPrevLeaf = iPg;
8128 fts5DlidxIterFree(pDlidx);
8129 fts5TestDlidxReverse(p, iSegid, iIdxLeaf);
8130 }else{
8131 iDlidxPrevLeaf = pSeg->pgnoLast;
8132 /* TODO: Check there is no doclist index */
8135 iIdxPrevLeaf = iIdxLeaf;
8138 rc2 = sqlite3_finalize(pStmt);
8139 if( p->rc==SQLITE_OK ) p->rc = rc2;
8141 /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */
8142 #if 0
8143 if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
8144 p->rc = FTS5_CORRUPT;
8146 #endif
8151 ** Run internal checks to ensure that the FTS index (a) is internally
8152 ** consistent and (b) contains entries for which the XOR of the checksums
8153 ** as calculated by sqlite3Fts5IndexEntryCksum() is cksum.
8155 ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
8156 ** checksum does not match. Return SQLITE_OK if all checks pass without
8157 ** error, or some other SQLite error code if another error (e.g. OOM)
8158 ** occurs.
8160 int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum, int bUseCksum){
8161 int eDetail = p->pConfig->eDetail;
8162 u64 cksum2 = 0; /* Checksum based on contents of indexes */
8163 Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */
8164 Fts5Iter *pIter; /* Used to iterate through entire index */
8165 Fts5Structure *pStruct; /* Index structure */
8166 int iLvl, iSeg;
8168 #ifdef SQLITE_DEBUG
8169 /* Used by extra internal tests only run if NDEBUG is not defined */
8170 u64 cksum3 = 0; /* Checksum based on contents of indexes */
8171 Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */
8172 #endif
8173 const int flags = FTS5INDEX_QUERY_NOOUTPUT;
8175 /* Load the FTS index structure */
8176 pStruct = fts5StructureRead(p);
8177 if( pStruct==0 ){
8178 assert( p->rc!=SQLITE_OK );
8179 return fts5IndexReturn(p);
8182 /* Check that the internal nodes of each segment match the leaves */
8183 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
8184 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
8185 Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
8186 fts5IndexIntegrityCheckSegment(p, pSeg);
8190 /* The cksum argument passed to this function is a checksum calculated
8191 ** based on all expected entries in the FTS index (including prefix index
8192 ** entries). This block checks that a checksum calculated based on the
8193 ** actual contents of FTS index is identical.
8195 ** Two versions of the same checksum are calculated. The first (stack
8196 ** variable cksum2) based on entries extracted from the full-text index
8197 ** while doing a linear scan of each individual index in turn.
8199 ** As each term visited by the linear scans, a separate query for the
8200 ** same term is performed. cksum3 is calculated based on the entries
8201 ** extracted by these queries.
8203 for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter);
8204 fts5MultiIterEof(p, pIter)==0;
8205 fts5MultiIterNext(p, pIter, 0, 0)
8207 int n; /* Size of term in bytes */
8208 i64 iPos = 0; /* Position read from poslist */
8209 int iOff = 0; /* Offset within poslist */
8210 i64 iRowid = fts5MultiIterRowid(pIter);
8211 char *z = (char*)fts5MultiIterTerm(pIter, &n);
8213 /* If this is a new term, query for it. Update cksum3 with the results. */
8214 fts5TestTerm(p, &term, z, n, cksum2, &cksum3);
8215 if( p->rc ) break;
8217 if( eDetail==FTS5_DETAIL_NONE ){
8218 if( 0==fts5MultiIterIsEmpty(p, pIter) ){
8219 cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n);
8221 }else{
8222 poslist.n = 0;
8223 fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist);
8224 fts5BufferAppendBlob(&p->rc, &poslist, 4, (const u8*)"\0\0\0\0");
8225 while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
8226 int iCol = FTS5_POS2COLUMN(iPos);
8227 int iTokOff = FTS5_POS2OFFSET(iPos);
8228 cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
8232 fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3);
8234 fts5MultiIterFree(pIter);
8235 if( p->rc==SQLITE_OK && bUseCksum && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
8237 fts5StructureRelease(pStruct);
8238 #ifdef SQLITE_DEBUG
8239 fts5BufferFree(&term);
8240 #endif
8241 fts5BufferFree(&poslist);
8242 return fts5IndexReturn(p);
8245 /*************************************************************************
8246 **************************************************************************
8247 ** Below this point is the implementation of the fts5_decode() scalar
8248 ** function only.
8251 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8253 ** Decode a segment-data rowid from the %_data table. This function is
8254 ** the opposite of macro FTS5_SEGMENT_ROWID().
8256 static void fts5DecodeRowid(
8257 i64 iRowid, /* Rowid from %_data table */
8258 int *pbTombstone, /* OUT: Tombstone hash flag */
8259 int *piSegid, /* OUT: Segment id */
8260 int *pbDlidx, /* OUT: Dlidx flag */
8261 int *piHeight, /* OUT: Height */
8262 int *piPgno /* OUT: Page number */
8264 *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1));
8265 iRowid >>= FTS5_DATA_PAGE_B;
8267 *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1));
8268 iRowid >>= FTS5_DATA_HEIGHT_B;
8270 *pbDlidx = (int)(iRowid & 0x0001);
8271 iRowid >>= FTS5_DATA_DLI_B;
8273 *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1));
8274 iRowid >>= FTS5_DATA_ID_B;
8276 *pbTombstone = (int)(iRowid & 0x0001);
8278 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
8280 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8281 static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
8282 int iSegid, iHeight, iPgno, bDlidx, bTomb; /* Rowid compenents */
8283 fts5DecodeRowid(iKey, &bTomb, &iSegid, &bDlidx, &iHeight, &iPgno);
8285 if( iSegid==0 ){
8286 if( iKey==FTS5_AVERAGES_ROWID ){
8287 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} ");
8288 }else{
8289 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}");
8292 else{
8293 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%s%ssegid=%d h=%d pgno=%d}",
8294 bDlidx ? "dlidx " : "",
8295 bTomb ? "tombstone " : "",
8296 iSegid, iHeight, iPgno
8300 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
8302 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8303 static void fts5DebugStructure(
8304 int *pRc, /* IN/OUT: error code */
8305 Fts5Buffer *pBuf,
8306 Fts5Structure *p
8308 int iLvl, iSeg; /* Iterate through levels, segments */
8310 for(iLvl=0; iLvl<p->nLevel; iLvl++){
8311 Fts5StructureLevel *pLvl = &p->aLevel[iLvl];
8312 sqlite3Fts5BufferAppendPrintf(pRc, pBuf,
8313 " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg
8315 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
8316 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
8317 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d",
8318 pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast
8320 if( pSeg->iOrigin1>0 ){
8321 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " origin=%lld..%lld",
8322 pSeg->iOrigin1, pSeg->iOrigin2
8325 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
8327 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
8330 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
8332 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8334 ** This is part of the fts5_decode() debugging aid.
8336 ** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This
8337 ** function appends a human-readable representation of the same object
8338 ** to the buffer passed as the second argument.
8340 static void fts5DecodeStructure(
8341 int *pRc, /* IN/OUT: error code */
8342 Fts5Buffer *pBuf,
8343 const u8 *pBlob, int nBlob
8345 int rc; /* Return code */
8346 Fts5Structure *p = 0; /* Decoded structure object */
8348 rc = fts5StructureDecode(pBlob, nBlob, 0, &p);
8349 if( rc!=SQLITE_OK ){
8350 *pRc = rc;
8351 return;
8354 fts5DebugStructure(pRc, pBuf, p);
8355 fts5StructureRelease(p);
8357 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
8359 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8361 ** This is part of the fts5_decode() debugging aid.
8363 ** Arguments pBlob/nBlob contain an "averages" record. This function
8364 ** appends a human-readable representation of record to the buffer passed
8365 ** as the second argument.
8367 static void fts5DecodeAverages(
8368 int *pRc, /* IN/OUT: error code */
8369 Fts5Buffer *pBuf,
8370 const u8 *pBlob, int nBlob
8372 int i = 0;
8373 const char *zSpace = "";
8375 while( i<nBlob ){
8376 u64 iVal;
8377 i += sqlite3Fts5GetVarint(&pBlob[i], &iVal);
8378 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%s%d", zSpace, (int)iVal);
8379 zSpace = " ";
8382 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
8384 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8386 ** Buffer (a/n) is assumed to contain a list of serialized varints. Read
8387 ** each varint and append its string representation to buffer pBuf. Return
8388 ** after either the input buffer is exhausted or a 0 value is read.
8390 ** The return value is the number of bytes read from the input buffer.
8392 static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
8393 int iOff = 0;
8394 while( iOff<n ){
8395 int iVal;
8396 iOff += fts5GetVarint32(&a[iOff], iVal);
8397 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal);
8399 return iOff;
8401 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
8403 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8405 ** The start of buffer (a/n) contains the start of a doclist. The doclist
8406 ** may or may not finish within the buffer. This function appends a text
8407 ** representation of the part of the doclist that is present to buffer
8408 ** pBuf.
8410 ** The return value is the number of bytes read from the input buffer.
8412 static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
8413 i64 iDocid = 0;
8414 int iOff = 0;
8416 if( n>0 ){
8417 iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid);
8418 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
8420 while( iOff<n ){
8421 int nPos;
8422 int bDel;
8423 iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel);
8424 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s", nPos, bDel?"*":"");
8425 iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos));
8426 if( iOff<n ){
8427 i64 iDelta;
8428 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
8429 iDocid += iDelta;
8430 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
8434 return iOff;
8436 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
8438 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8440 ** This function is part of the fts5_decode() debugging function. It is
8441 ** only ever used with detail=none tables.
8443 ** Buffer (pData/nData) contains a doclist in the format used by detail=none
8444 ** tables. This function appends a human-readable version of that list to
8445 ** buffer pBuf.
8447 ** If *pRc is other than SQLITE_OK when this function is called, it is a
8448 ** no-op. If an OOM or other error occurs within this function, *pRc is
8449 ** set to an SQLite error code before returning. The final state of buffer
8450 ** pBuf is undefined in this case.
8452 static void fts5DecodeRowidList(
8453 int *pRc, /* IN/OUT: Error code */
8454 Fts5Buffer *pBuf, /* Buffer to append text to */
8455 const u8 *pData, int nData /* Data to decode list-of-rowids from */
8457 int i = 0;
8458 i64 iRowid = 0;
8460 while( i<nData ){
8461 const char *zApp = "";
8462 u64 iVal;
8463 i += sqlite3Fts5GetVarint(&pData[i], &iVal);
8464 iRowid += iVal;
8466 if( i<nData && pData[i]==0x00 ){
8467 i++;
8468 if( i<nData && pData[i]==0x00 ){
8469 i++;
8470 zApp = "+";
8471 }else{
8472 zApp = "*";
8476 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp);
8479 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
8481 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8482 static void fts5BufferAppendTerm(int *pRc, Fts5Buffer *pBuf, Fts5Buffer *pTerm){
8483 int ii;
8484 fts5BufferGrow(pRc, pBuf, pTerm->n*2 + 1);
8485 if( *pRc==SQLITE_OK ){
8486 for(ii=0; ii<pTerm->n; ii++){
8487 if( pTerm->p[ii]==0x00 ){
8488 pBuf->p[pBuf->n++] = '\\';
8489 pBuf->p[pBuf->n++] = '0';
8490 }else{
8491 pBuf->p[pBuf->n++] = pTerm->p[ii];
8494 pBuf->p[pBuf->n] = 0x00;
8497 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
8499 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8501 ** The implementation of user-defined scalar function fts5_decode().
8503 static void fts5DecodeFunction(
8504 sqlite3_context *pCtx, /* Function call context */
8505 int nArg, /* Number of args (always 2) */
8506 sqlite3_value **apVal /* Function arguments */
8508 i64 iRowid; /* Rowid for record being decoded */
8509 int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */
8510 int bTomb;
8511 const u8 *aBlob; int n; /* Record to decode */
8512 u8 *a = 0;
8513 Fts5Buffer s; /* Build up text to return here */
8514 int rc = SQLITE_OK; /* Return code */
8515 sqlite3_int64 nSpace = 0;
8516 int eDetailNone = (sqlite3_user_data(pCtx)!=0);
8518 assert( nArg==2 );
8519 UNUSED_PARAM(nArg);
8520 memset(&s, 0, sizeof(Fts5Buffer));
8521 iRowid = sqlite3_value_int64(apVal[0]);
8523 /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[]
8524 ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
8525 ** buffer overreads even if the record is corrupt. */
8526 n = sqlite3_value_bytes(apVal[1]);
8527 aBlob = sqlite3_value_blob(apVal[1]);
8528 nSpace = n + FTS5_DATA_ZERO_PADDING;
8529 a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
8530 if( a==0 ) goto decode_out;
8531 if( n>0 ) memcpy(a, aBlob, n);
8533 fts5DecodeRowid(iRowid, &bTomb, &iSegid, &bDlidx, &iHeight, &iPgno);
8535 fts5DebugRowid(&rc, &s, iRowid);
8536 if( bDlidx ){
8537 Fts5Data dlidx;
8538 Fts5DlidxLvl lvl;
8540 dlidx.p = a;
8541 dlidx.nn = n;
8543 memset(&lvl, 0, sizeof(Fts5DlidxLvl));
8544 lvl.pData = &dlidx;
8545 lvl.iLeafPgno = iPgno;
8547 for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){
8548 sqlite3Fts5BufferAppendPrintf(&rc, &s,
8549 " %d(%lld)", lvl.iLeafPgno, lvl.iRowid
8552 }else if( bTomb ){
8553 u32 nElem = fts5GetU32(&a[4]);
8554 int szKey = (aBlob[0]==4 || aBlob[0]==8) ? aBlob[0] : 8;
8555 int nSlot = (n - 8) / szKey;
8556 int ii;
8557 sqlite3Fts5BufferAppendPrintf(&rc, &s, " nElem=%d", (int)nElem);
8558 if( aBlob[1] ){
8559 sqlite3Fts5BufferAppendPrintf(&rc, &s, " 0");
8561 for(ii=0; ii<nSlot; ii++){
8562 u64 iVal = 0;
8563 if( szKey==4 ){
8564 u32 *aSlot = (u32*)&aBlob[8];
8565 if( aSlot[ii] ) iVal = fts5GetU32((u8*)&aSlot[ii]);
8566 }else{
8567 u64 *aSlot = (u64*)&aBlob[8];
8568 if( aSlot[ii] ) iVal = fts5GetU64((u8*)&aSlot[ii]);
8570 if( iVal!=0 ){
8571 sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", (i64)iVal);
8574 }else if( iSegid==0 ){
8575 if( iRowid==FTS5_AVERAGES_ROWID ){
8576 fts5DecodeAverages(&rc, &s, a, n);
8577 }else{
8578 fts5DecodeStructure(&rc, &s, a, n);
8580 }else if( eDetailNone ){
8581 Fts5Buffer term; /* Current term read from page */
8582 int szLeaf;
8583 int iPgidxOff = szLeaf = fts5GetU16(&a[2]);
8584 int iTermOff;
8585 int nKeep = 0;
8586 int iOff;
8588 memset(&term, 0, sizeof(Fts5Buffer));
8590 /* Decode any entries that occur before the first term. */
8591 if( szLeaf<n ){
8592 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff);
8593 }else{
8594 iTermOff = szLeaf;
8596 fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4);
8598 iOff = iTermOff;
8599 while( iOff<szLeaf && rc==SQLITE_OK ){
8600 int nAppend;
8602 /* Read the term data for the next term*/
8603 iOff += fts5GetVarint32(&a[iOff], nAppend);
8604 term.n = nKeep;
8605 fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]);
8606 sqlite3Fts5BufferAppendPrintf(&rc, &s, " term=");
8607 fts5BufferAppendTerm(&rc, &s, &term);
8608 iOff += nAppend;
8610 /* Figure out where the doclist for this term ends */
8611 if( iPgidxOff<n ){
8612 int nIncr;
8613 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr);
8614 iTermOff += nIncr;
8615 }else{
8616 iTermOff = szLeaf;
8618 if( iTermOff>szLeaf ){
8619 rc = FTS5_CORRUPT;
8620 }else{
8621 fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff);
8623 iOff = iTermOff;
8624 if( iOff<szLeaf ){
8625 iOff += fts5GetVarint32(&a[iOff], nKeep);
8629 fts5BufferFree(&term);
8630 }else{
8631 Fts5Buffer term; /* Current term read from page */
8632 int szLeaf; /* Offset of pgidx in a[] */
8633 int iPgidxOff;
8634 int iPgidxPrev = 0; /* Previous value read from pgidx */
8635 int iTermOff = 0;
8636 int iRowidOff = 0;
8637 int iOff;
8638 int nDoclist;
8640 memset(&term, 0, sizeof(Fts5Buffer));
8642 if( n<4 ){
8643 sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt");
8644 goto decode_out;
8645 }else{
8646 iRowidOff = fts5GetU16(&a[0]);
8647 iPgidxOff = szLeaf = fts5GetU16(&a[2]);
8648 if( iPgidxOff<n ){
8649 fts5GetVarint32(&a[iPgidxOff], iTermOff);
8650 }else if( iPgidxOff>n ){
8651 rc = FTS5_CORRUPT;
8652 goto decode_out;
8656 /* Decode the position list tail at the start of the page */
8657 if( iRowidOff!=0 ){
8658 iOff = iRowidOff;
8659 }else if( iTermOff!=0 ){
8660 iOff = iTermOff;
8661 }else{
8662 iOff = szLeaf;
8664 if( iOff>n ){
8665 rc = FTS5_CORRUPT;
8666 goto decode_out;
8668 fts5DecodePoslist(&rc, &s, &a[4], iOff-4);
8670 /* Decode any more doclist data that appears on the page before the
8671 ** first term. */
8672 nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
8673 if( nDoclist+iOff>n ){
8674 rc = FTS5_CORRUPT;
8675 goto decode_out;
8677 fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);
8679 while( iPgidxOff<n && rc==SQLITE_OK ){
8680 int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */
8681 int nByte; /* Bytes of data */
8682 int iEnd;
8684 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte);
8685 iPgidxPrev += nByte;
8686 iOff = iPgidxPrev;
8688 if( iPgidxOff<n ){
8689 fts5GetVarint32(&a[iPgidxOff], nByte);
8690 iEnd = iPgidxPrev + nByte;
8691 }else{
8692 iEnd = szLeaf;
8694 if( iEnd>szLeaf ){
8695 rc = FTS5_CORRUPT;
8696 break;
8699 if( bFirst==0 ){
8700 iOff += fts5GetVarint32(&a[iOff], nByte);
8701 if( nByte>term.n ){
8702 rc = FTS5_CORRUPT;
8703 break;
8705 term.n = nByte;
8707 iOff += fts5GetVarint32(&a[iOff], nByte);
8708 if( iOff+nByte>n ){
8709 rc = FTS5_CORRUPT;
8710 break;
8712 fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
8713 iOff += nByte;
8715 sqlite3Fts5BufferAppendPrintf(&rc, &s, " term=");
8716 fts5BufferAppendTerm(&rc, &s, &term);
8717 iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);
8720 fts5BufferFree(&term);
8723 decode_out:
8724 sqlite3_free(a);
8725 if( rc==SQLITE_OK ){
8726 sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT);
8727 }else{
8728 sqlite3_result_error_code(pCtx, rc);
8730 fts5BufferFree(&s);
8732 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
8734 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8736 ** The implementation of user-defined scalar function fts5_rowid().
8738 static void fts5RowidFunction(
8739 sqlite3_context *pCtx, /* Function call context */
8740 int nArg, /* Number of args (always 2) */
8741 sqlite3_value **apVal /* Function arguments */
8743 const char *zArg;
8744 if( nArg==0 ){
8745 sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1);
8746 }else{
8747 zArg = (const char*)sqlite3_value_text(apVal[0]);
8748 if( 0==sqlite3_stricmp(zArg, "segment") ){
8749 i64 iRowid;
8750 int segid, pgno;
8751 if( nArg!=3 ){
8752 sqlite3_result_error(pCtx,
8753 "should be: fts5_rowid('segment', segid, pgno))", -1
8755 }else{
8756 segid = sqlite3_value_int(apVal[1]);
8757 pgno = sqlite3_value_int(apVal[2]);
8758 iRowid = FTS5_SEGMENT_ROWID(segid, pgno);
8759 sqlite3_result_int64(pCtx, iRowid);
8761 }else{
8762 sqlite3_result_error(pCtx,
8763 "first arg to fts5_rowid() must be 'segment'" , -1
8768 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
8770 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8772 typedef struct Fts5StructVtab Fts5StructVtab;
8773 struct Fts5StructVtab {
8774 sqlite3_vtab base;
8777 typedef struct Fts5StructVcsr Fts5StructVcsr;
8778 struct Fts5StructVcsr {
8779 sqlite3_vtab_cursor base;
8780 Fts5Structure *pStruct;
8781 int iLevel;
8782 int iSeg;
8783 int iRowid;
8787 ** Create a new fts5_structure() table-valued function.
8789 static int fts5structConnectMethod(
8790 sqlite3 *db,
8791 void *pAux,
8792 int argc, const char *const*argv,
8793 sqlite3_vtab **ppVtab,
8794 char **pzErr
8796 Fts5StructVtab *pNew = 0;
8797 int rc = SQLITE_OK;
8799 rc = sqlite3_declare_vtab(db,
8800 "CREATE TABLE xyz("
8801 "level, segment, merge, segid, leaf1, leaf2, loc1, loc2, "
8802 "npgtombstone, nentrytombstone, nentry, struct HIDDEN);"
8804 if( rc==SQLITE_OK ){
8805 pNew = sqlite3Fts5MallocZero(&rc, sizeof(*pNew));
8808 *ppVtab = (sqlite3_vtab*)pNew;
8809 return rc;
8813 ** We must have a single struct=? constraint that will be passed through
8814 ** into the xFilter method. If there is no valid stmt=? constraint,
8815 ** then return an SQLITE_CONSTRAINT error.
8817 static int fts5structBestIndexMethod(
8818 sqlite3_vtab *tab,
8819 sqlite3_index_info *pIdxInfo
8821 int i;
8822 int rc = SQLITE_CONSTRAINT;
8823 struct sqlite3_index_constraint *p;
8824 pIdxInfo->estimatedCost = (double)100;
8825 pIdxInfo->estimatedRows = 100;
8826 pIdxInfo->idxNum = 0;
8827 for(i=0, p=pIdxInfo->aConstraint; i<pIdxInfo->nConstraint; i++, p++){
8828 if( p->usable==0 ) continue;
8829 if( p->op==SQLITE_INDEX_CONSTRAINT_EQ && p->iColumn==11 ){
8830 rc = SQLITE_OK;
8831 pIdxInfo->aConstraintUsage[i].omit = 1;
8832 pIdxInfo->aConstraintUsage[i].argvIndex = 1;
8833 break;
8836 return rc;
8840 ** This method is the destructor for bytecodevtab objects.
8842 static int fts5structDisconnectMethod(sqlite3_vtab *pVtab){
8843 Fts5StructVtab *p = (Fts5StructVtab*)pVtab;
8844 sqlite3_free(p);
8845 return SQLITE_OK;
8849 ** Constructor for a new bytecodevtab_cursor object.
8851 static int fts5structOpenMethod(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCsr){
8852 int rc = SQLITE_OK;
8853 Fts5StructVcsr *pNew = 0;
8855 pNew = sqlite3Fts5MallocZero(&rc, sizeof(*pNew));
8856 *ppCsr = (sqlite3_vtab_cursor*)pNew;
8858 return SQLITE_OK;
8862 ** Destructor for a bytecodevtab_cursor.
8864 static int fts5structCloseMethod(sqlite3_vtab_cursor *cur){
8865 Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
8866 fts5StructureRelease(pCsr->pStruct);
8867 sqlite3_free(pCsr);
8868 return SQLITE_OK;
8873 ** Advance a bytecodevtab_cursor to its next row of output.
8875 static int fts5structNextMethod(sqlite3_vtab_cursor *cur){
8876 Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
8877 Fts5Structure *p = pCsr->pStruct;
8879 assert( pCsr->pStruct );
8880 pCsr->iSeg++;
8881 pCsr->iRowid++;
8882 while( pCsr->iLevel<p->nLevel && pCsr->iSeg>=p->aLevel[pCsr->iLevel].nSeg ){
8883 pCsr->iLevel++;
8884 pCsr->iSeg = 0;
8886 if( pCsr->iLevel>=p->nLevel ){
8887 fts5StructureRelease(pCsr->pStruct);
8888 pCsr->pStruct = 0;
8890 return SQLITE_OK;
8894 ** Return TRUE if the cursor has been moved off of the last
8895 ** row of output.
8897 static int fts5structEofMethod(sqlite3_vtab_cursor *cur){
8898 Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
8899 return pCsr->pStruct==0;
8902 static int fts5structRowidMethod(
8903 sqlite3_vtab_cursor *cur,
8904 sqlite_int64 *piRowid
8906 Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
8907 *piRowid = pCsr->iRowid;
8908 return SQLITE_OK;
8912 ** Return values of columns for the row at which the bytecodevtab_cursor
8913 ** is currently pointing.
8915 static int fts5structColumnMethod(
8916 sqlite3_vtab_cursor *cur, /* The cursor */
8917 sqlite3_context *ctx, /* First argument to sqlite3_result_...() */
8918 int i /* Which column to return */
8920 Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
8921 Fts5Structure *p = pCsr->pStruct;
8922 Fts5StructureSegment *pSeg = &p->aLevel[pCsr->iLevel].aSeg[pCsr->iSeg];
8924 switch( i ){
8925 case 0: /* level */
8926 sqlite3_result_int(ctx, pCsr->iLevel);
8927 break;
8928 case 1: /* segment */
8929 sqlite3_result_int(ctx, pCsr->iSeg);
8930 break;
8931 case 2: /* merge */
8932 sqlite3_result_int(ctx, pCsr->iSeg < p->aLevel[pCsr->iLevel].nMerge);
8933 break;
8934 case 3: /* segid */
8935 sqlite3_result_int(ctx, pSeg->iSegid);
8936 break;
8937 case 4: /* leaf1 */
8938 sqlite3_result_int(ctx, pSeg->pgnoFirst);
8939 break;
8940 case 5: /* leaf2 */
8941 sqlite3_result_int(ctx, pSeg->pgnoLast);
8942 break;
8943 case 6: /* origin1 */
8944 sqlite3_result_int64(ctx, pSeg->iOrigin1);
8945 break;
8946 case 7: /* origin2 */
8947 sqlite3_result_int64(ctx, pSeg->iOrigin2);
8948 break;
8949 case 8: /* npgtombstone */
8950 sqlite3_result_int(ctx, pSeg->nPgTombstone);
8951 break;
8952 case 9: /* nentrytombstone */
8953 sqlite3_result_int64(ctx, pSeg->nEntryTombstone);
8954 break;
8955 case 10: /* nentry */
8956 sqlite3_result_int64(ctx, pSeg->nEntry);
8957 break;
8959 return SQLITE_OK;
8963 ** Initialize a cursor.
8965 ** idxNum==0 means show all subprograms
8966 ** idxNum==1 means show only the main bytecode and omit subprograms.
8968 static int fts5structFilterMethod(
8969 sqlite3_vtab_cursor *pVtabCursor,
8970 int idxNum, const char *idxStr,
8971 int argc, sqlite3_value **argv
8973 Fts5StructVcsr *pCsr = (Fts5StructVcsr *)pVtabCursor;
8974 int rc = SQLITE_OK;
8976 const u8 *aBlob = 0;
8977 int nBlob = 0;
8979 assert( argc==1 );
8980 fts5StructureRelease(pCsr->pStruct);
8981 pCsr->pStruct = 0;
8983 nBlob = sqlite3_value_bytes(argv[0]);
8984 aBlob = (const u8*)sqlite3_value_blob(argv[0]);
8985 rc = fts5StructureDecode(aBlob, nBlob, 0, &pCsr->pStruct);
8986 if( rc==SQLITE_OK ){
8987 pCsr->iLevel = 0;
8988 pCsr->iRowid = 0;
8989 pCsr->iSeg = -1;
8990 rc = fts5structNextMethod(pVtabCursor);
8993 return rc;
8996 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
8999 ** This is called as part of registering the FTS5 module with database
9000 ** connection db. It registers several user-defined scalar functions useful
9001 ** with FTS5.
9003 ** If successful, SQLITE_OK is returned. If an error occurs, some other
9004 ** SQLite error code is returned instead.
9006 int sqlite3Fts5IndexInit(sqlite3 *db){
9007 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
9008 int rc = sqlite3_create_function(
9009 db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0
9012 if( rc==SQLITE_OK ){
9013 rc = sqlite3_create_function(
9014 db, "fts5_decode_none", 2,
9015 SQLITE_UTF8, (void*)db, fts5DecodeFunction, 0, 0
9019 if( rc==SQLITE_OK ){
9020 rc = sqlite3_create_function(
9021 db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0
9025 if( rc==SQLITE_OK ){
9026 static const sqlite3_module fts5structure_module = {
9027 0, /* iVersion */
9028 0, /* xCreate */
9029 fts5structConnectMethod, /* xConnect */
9030 fts5structBestIndexMethod, /* xBestIndex */
9031 fts5structDisconnectMethod, /* xDisconnect */
9032 0, /* xDestroy */
9033 fts5structOpenMethod, /* xOpen */
9034 fts5structCloseMethod, /* xClose */
9035 fts5structFilterMethod, /* xFilter */
9036 fts5structNextMethod, /* xNext */
9037 fts5structEofMethod, /* xEof */
9038 fts5structColumnMethod, /* xColumn */
9039 fts5structRowidMethod, /* xRowid */
9040 0, /* xUpdate */
9041 0, /* xBegin */
9042 0, /* xSync */
9043 0, /* xCommit */
9044 0, /* xRollback */
9045 0, /* xFindFunction */
9046 0, /* xRename */
9047 0, /* xSavepoint */
9048 0, /* xRelease */
9049 0, /* xRollbackTo */
9050 0, /* xShadowName */
9051 0 /* xIntegrity */
9053 rc = sqlite3_create_module(db, "fts5_structure", &fts5structure_module, 0);
9055 return rc;
9056 #else
9057 return SQLITE_OK;
9058 UNUSED_PARAM(db);
9059 #endif
9063 int sqlite3Fts5IndexReset(Fts5Index *p){
9064 assert( p->pStruct==0 || p->iStructVersion!=0 );
9065 if( fts5IndexDataVersion(p)!=p->iStructVersion ){
9066 fts5StructureInvalidate(p);
9068 return fts5IndexReturn(p);