Snapshot of upstream SQLite 3.43.1
[sqlcipher.git] / ext / fts5 / fts5_index.c
blobe729b13f20ec18c079842915b73752fa4ec56670
1 /*
2 ** 2014 May 31
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
13 ** Low level access to the FTS index stored in the database file. The
14 ** routines in this file file implement all read and write access to the
15 ** %_data table. Other parts of the system access this functionality via
16 ** the interface defined in fts5Int.h.
20 #include "fts5Int.h"
23 ** Overview:
25 ** The %_data table contains all the FTS indexes for an FTS5 virtual table.
26 ** As well as the main term index, there may be up to 31 prefix indexes.
27 ** The format is similar to FTS3/4, except that:
29 ** * all segment b-tree leaf data is stored in fixed size page records
30 ** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is
31 ** taken to ensure it is possible to iterate in either direction through
32 ** the entries in a doclist, or to seek to a specific entry within a
33 ** doclist, without loading it into memory.
35 ** * large doclists that span many pages have associated "doclist index"
36 ** records that contain a copy of the first rowid on each page spanned by
37 ** the doclist. This is used to speed up seek operations, and merges of
38 ** large doclists with very small doclists.
40 ** * extra fields in the "structure record" record the state of ongoing
41 ** incremental merge operations.
46 #define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */
47 #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */
49 #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */
51 #define FTS5_MAIN_PREFIX '0'
53 #if FTS5_MAX_PREFIX_INDEXES > 31
54 # error "FTS5_MAX_PREFIX_INDEXES is too large"
55 #endif
57 #define FTS5_MAX_LEVEL 64
60 ** There are two versions of the format used for the structure record:
62 ** 1. the legacy format, that may be read by all fts5 versions, and
64 ** 2. the V2 format, which is used by contentless_delete=1 databases.
66 ** Both begin with a 4-byte "configuration cookie" value. Then, a legacy
67 ** format structure record contains a varint - the number of levels in
68 ** the structure. Whereas a V2 structure record contains the constant
69 ** 4 bytes [0xff 0x00 0x00 0x01]. This is unambiguous as the value of a
70 ** varint has to be at least 16256 to begin with "0xFF". And the default
71 ** maximum number of levels is 64.
73 ** See below for more on structure record formats.
75 #define FTS5_STRUCTURE_V2 "\xFF\x00\x00\x01"
78 ** Details:
80 ** The %_data table managed by this module,
82 ** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB);
84 ** , contains the following 6 types of records. See the comments surrounding
85 ** the FTS5_*_ROWID macros below for a description of how %_data rowids are
86 ** assigned to each fo them.
88 ** 1. Structure Records:
90 ** The set of segments that make up an index - the index structure - are
91 ** recorded in a single record within the %_data table. The record consists
92 ** of a single 32-bit configuration cookie value followed by a list of
93 ** SQLite varints.
95 ** If the structure record is a V2 record, the configuration cookie is
96 ** followed by the following 4 bytes: [0xFF 0x00 0x00 0x01].
98 ** Next, the record continues with three varints:
100 ** + number of levels,
101 ** + total number of segments on all levels,
102 ** + value of write counter.
104 ** Then, for each level from 0 to nMax:
106 ** + number of input segments in ongoing merge.
107 ** + total number of segments in level.
108 ** + for each segment from oldest to newest:
109 ** + segment id (always > 0)
110 ** + first leaf page number (often 1, always greater than 0)
111 ** + final leaf page number
113 ** Then, for V2 structures only:
115 ** + lower origin counter value,
116 ** + upper origin counter value,
117 ** + the number of tombstone hash pages.
119 ** 2. The Averages Record:
121 ** A single record within the %_data table. The data is a list of varints.
122 ** The first value is the number of rows in the index. Then, for each column
123 ** from left to right, the total number of tokens in the column for all
124 ** rows of the table.
126 ** 3. Segment leaves:
128 ** TERM/DOCLIST FORMAT:
130 ** Most of each segment leaf is taken up by term/doclist data. The
131 ** general format of term/doclist, starting with the first term
132 ** on the leaf page, is:
134 ** varint : size of first term
135 ** blob: first term data
136 ** doclist: first doclist
137 ** zero-or-more {
138 ** varint: number of bytes in common with previous term
139 ** varint: number of bytes of new term data (nNew)
140 ** blob: nNew bytes of new term data
141 ** doclist: next doclist
142 ** }
144 ** doclist format:
146 ** varint: first rowid
147 ** poslist: first poslist
148 ** zero-or-more {
149 ** varint: rowid delta (always > 0)
150 ** poslist: next poslist
151 ** }
153 ** poslist format:
155 ** varint: size of poslist in bytes multiplied by 2, not including
156 ** this field. Plus 1 if this entry carries the "delete" flag.
157 ** collist: collist for column 0
158 ** zero-or-more {
159 ** 0x01 byte
160 ** varint: column number (I)
161 ** collist: collist for column I
162 ** }
164 ** collist format:
166 ** varint: first offset + 2
167 ** zero-or-more {
168 ** varint: offset delta + 2
169 ** }
171 ** PAGE FORMAT
173 ** Each leaf page begins with a 4-byte header containing 2 16-bit
174 ** unsigned integer fields in big-endian format. They are:
176 ** * The byte offset of the first rowid on the page, if it exists
177 ** and occurs before the first term (otherwise 0).
179 ** * The byte offset of the start of the page footer. If the page
180 ** footer is 0 bytes in size, then this field is the same as the
181 ** size of the leaf page in bytes.
183 ** The page footer consists of a single varint for each term located
184 ** on the page. Each varint is the byte offset of the current term
185 ** within the page, delta-compressed against the previous value. In
186 ** other words, the first varint in the footer is the byte offset of
187 ** the first term, the second is the byte offset of the second less that
188 ** of the first, and so on.
190 ** The term/doclist format described above is accurate if the entire
191 ** term/doclist data fits on a single leaf page. If this is not the case,
192 ** the format is changed in two ways:
194 ** + if the first rowid on a page occurs before the first term, it
195 ** is stored as a literal value:
197 ** varint: first rowid
199 ** + the first term on each page is stored in the same way as the
200 ** very first term of the segment:
202 ** varint : size of first term
203 ** blob: first term data
205 ** 5. Segment doclist indexes:
207 ** Doclist indexes are themselves b-trees, however they usually consist of
208 ** a single leaf record only. The format of each doclist index leaf page
209 ** is:
211 ** * Flags byte. Bits are:
212 ** 0x01: Clear if leaf is also the root page, otherwise set.
214 ** * Page number of fts index leaf page. As a varint.
216 ** * First rowid on page indicated by previous field. As a varint.
218 ** * A list of varints, one for each subsequent termless page. A
219 ** positive delta if the termless page contains at least one rowid,
220 ** or an 0x00 byte otherwise.
222 ** Internal doclist index nodes are:
224 ** * Flags byte. Bits are:
225 ** 0x01: Clear for root page, otherwise set.
227 ** * Page number of first child page. As a varint.
229 ** * Copy of first rowid on page indicated by previous field. As a varint.
231 ** * A list of delta-encoded varints - the first rowid on each subsequent
232 ** child page.
234 ** 6. Tombstone Hash Page
236 ** These records are only ever present in contentless_delete=1 tables.
237 ** There are zero or more of these associated with each segment. They
238 ** are used to store the tombstone rowids for rows contained in the
239 ** associated segments.
241 ** The set of nHashPg tombstone hash pages associated with a single
242 ** segment together form a single hash table containing tombstone rowids.
243 ** To find the page of the hash on which a key might be stored:
245 ** iPg = (rowid % nHashPg)
247 ** Then, within page iPg, which has nSlot slots:
249 ** iSlot = (rowid / nHashPg) % nSlot
251 ** Each tombstone hash page begins with an 8 byte header:
253 ** 1-byte: Key-size (the size in bytes of each slot). Either 4 or 8.
254 ** 1-byte: rowid-0-tombstone flag. This flag is only valid on the
255 ** first tombstone hash page for each segment (iPg=0). If set,
256 ** the hash table contains rowid 0. If clear, it does not.
257 ** Rowid 0 is handled specially.
258 ** 2-bytes: unused.
259 ** 4-bytes: Big-endian integer containing number of entries on page.
261 ** Following this are nSlot 4 or 8 byte slots (depending on the key-size
262 ** in the first byte of the page header). The number of slots may be
263 ** determined based on the size of the page record and the key-size:
265 ** nSlot = (nByte - 8) / key-size
269 ** Rowids for the averages and structure records in the %_data table.
271 #define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */
272 #define FTS5_STRUCTURE_ROWID 10 /* The structure record */
275 ** Macros determining the rowids used by segment leaves and dlidx leaves
276 ** and nodes. All nodes and leaves are stored in the %_data table with large
277 ** positive rowids.
279 ** Each segment has a unique non-zero 16-bit id.
281 ** The rowid for each segment leaf is found by passing the segment id and
282 ** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered
283 ** sequentially starting from 1.
285 #define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */
286 #define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */
287 #define FTS5_DATA_HEIGHT_B 5 /* Max dlidx tree height of 32 */
288 #define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */
290 #define fts5_dri(segid, dlidx, height, pgno) ( \
291 ((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \
292 ((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \
293 ((i64)(height) << (FTS5_DATA_PAGE_B)) + \
294 ((i64)(pgno)) \
297 #define FTS5_SEGMENT_ROWID(segid, pgno) fts5_dri(segid, 0, 0, pgno)
298 #define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)
299 #define FTS5_TOMBSTONE_ROWID(segid,ipg) fts5_dri(segid+(1<<16), 0, 0, ipg)
301 #ifdef SQLITE_DEBUG
302 int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
303 #endif
307 ** Each time a blob is read from the %_data table, it is padded with this
308 ** many zero bytes. This makes it easier to decode the various record formats
309 ** without overreading if the records are corrupt.
311 #define FTS5_DATA_ZERO_PADDING 8
312 #define FTS5_DATA_PADDING 20
314 typedef struct Fts5Data Fts5Data;
315 typedef struct Fts5DlidxIter Fts5DlidxIter;
316 typedef struct Fts5DlidxLvl Fts5DlidxLvl;
317 typedef struct Fts5DlidxWriter Fts5DlidxWriter;
318 typedef struct Fts5Iter Fts5Iter;
319 typedef struct Fts5PageWriter Fts5PageWriter;
320 typedef struct Fts5SegIter Fts5SegIter;
321 typedef struct Fts5DoclistIter Fts5DoclistIter;
322 typedef struct Fts5SegWriter Fts5SegWriter;
323 typedef struct Fts5Structure Fts5Structure;
324 typedef struct Fts5StructureLevel Fts5StructureLevel;
325 typedef struct Fts5StructureSegment Fts5StructureSegment;
327 struct Fts5Data {
328 u8 *p; /* Pointer to buffer containing record */
329 int nn; /* Size of record in bytes */
330 int szLeaf; /* Size of leaf without page-index */
334 ** One object per %_data table.
336 ** nContentlessDelete:
337 ** The number of contentless delete operations since the most recent
338 ** call to fts5IndexFlush() or fts5IndexDiscardData(). This is tracked
339 ** so that extra auto-merge work can be done by fts5IndexFlush() to
340 ** account for the delete operations.
342 struct Fts5Index {
343 Fts5Config *pConfig; /* Virtual table configuration */
344 char *zDataTbl; /* Name of %_data table */
345 int nWorkUnit; /* Leaf pages in a "unit" of work */
348 ** Variables related to the accumulation of tokens and doclists within the
349 ** in-memory hash tables before they are flushed to disk.
351 Fts5Hash *pHash; /* Hash table for in-memory data */
352 int nPendingData; /* Current bytes of pending data */
353 i64 iWriteRowid; /* Rowid for current doc being written */
354 int bDelete; /* Current write is a delete */
355 int nContentlessDelete; /* Number of contentless delete ops */
356 int nPendingRow; /* Number of INSERT in hash table */
358 /* Error state. */
359 int rc; /* Current error code */
361 /* State used by the fts5DataXXX() functions. */
362 sqlite3_blob *pReader; /* RO incr-blob open on %_data table */
363 sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */
364 sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */
365 sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */
366 sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=?" */
367 sqlite3_stmt *pIdxSelect;
368 int nRead; /* Total number of blocks read */
370 sqlite3_stmt *pDeleteFromIdx;
372 sqlite3_stmt *pDataVersion;
373 i64 iStructVersion; /* data_version when pStruct read */
374 Fts5Structure *pStruct; /* Current db structure (or NULL) */
377 struct Fts5DoclistIter {
378 u8 *aEof; /* Pointer to 1 byte past end of doclist */
380 /* Output variables. aPoslist==0 at EOF */
381 i64 iRowid;
382 u8 *aPoslist;
383 int nPoslist;
384 int nSize;
388 ** The contents of the "structure" record for each index are represented
389 ** using an Fts5Structure record in memory. Which uses instances of the
390 ** other Fts5StructureXXX types as components.
392 ** nOriginCntr:
393 ** This value is set to non-zero for structure records created for
394 ** contentlessdelete=1 tables only. In that case it represents the
395 ** origin value to apply to the next top-level segment created.
397 struct Fts5StructureSegment {
398 int iSegid; /* Segment id */
399 int pgnoFirst; /* First leaf page number in segment */
400 int pgnoLast; /* Last leaf page number in segment */
402 /* contentlessdelete=1 tables only: */
403 u64 iOrigin1;
404 u64 iOrigin2;
405 int nPgTombstone; /* Number of tombstone hash table pages */
406 u64 nEntryTombstone; /* Number of tombstone entries that "count" */
407 u64 nEntry; /* Number of rows in this segment */
409 struct Fts5StructureLevel {
410 int nMerge; /* Number of segments in incr-merge */
411 int nSeg; /* Total number of segments on level */
412 Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */
414 struct Fts5Structure {
415 int nRef; /* Object reference count */
416 u64 nWriteCounter; /* Total leaves written to level 0 */
417 u64 nOriginCntr; /* Origin value for next top-level segment */
418 int nSegment; /* Total segments in this structure */
419 int nLevel; /* Number of levels in this index */
420 Fts5StructureLevel aLevel[1]; /* Array of nLevel level objects */
424 ** An object of type Fts5SegWriter is used to write to segments.
426 struct Fts5PageWriter {
427 int pgno; /* Page number for this page */
428 int iPrevPgidx; /* Previous value written into pgidx */
429 Fts5Buffer buf; /* Buffer containing leaf data */
430 Fts5Buffer pgidx; /* Buffer containing page-index */
431 Fts5Buffer term; /* Buffer containing previous term on page */
433 struct Fts5DlidxWriter {
434 int pgno; /* Page number for this page */
435 int bPrevValid; /* True if iPrev is valid */
436 i64 iPrev; /* Previous rowid value written to page */
437 Fts5Buffer buf; /* Buffer containing page data */
439 struct Fts5SegWriter {
440 int iSegid; /* Segid to write to */
441 Fts5PageWriter writer; /* PageWriter object */
442 i64 iPrevRowid; /* Previous rowid written to current leaf */
443 u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */
444 u8 bFirstRowidInPage; /* True if next rowid is first in page */
445 /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
446 u8 bFirstTermInPage; /* True if next term will be first in leaf */
447 int nLeafWritten; /* Number of leaf pages written */
448 int nEmpty; /* Number of contiguous term-less nodes */
450 int nDlidx; /* Allocated size of aDlidx[] array */
451 Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */
453 /* Values to insert into the %_idx table */
454 Fts5Buffer btterm; /* Next term to insert into %_idx table */
455 int iBtPage; /* Page number corresponding to btterm */
458 typedef struct Fts5CResult Fts5CResult;
459 struct Fts5CResult {
460 u16 iFirst; /* aSeg[] index of firstest iterator */
461 u8 bTermEq; /* True if the terms are equal */
465 ** Object for iterating through a single segment, visiting each term/rowid
466 ** pair in the segment.
468 ** pSeg:
469 ** The segment to iterate through.
471 ** iLeafPgno:
472 ** Current leaf page number within segment.
474 ** iLeafOffset:
475 ** Byte offset within the current leaf that is the first byte of the
476 ** position list data (one byte passed the position-list size field).
478 ** pLeaf:
479 ** Buffer containing current leaf page data. Set to NULL at EOF.
481 ** iTermLeafPgno, iTermLeafOffset:
482 ** Leaf page number containing the last term read from the segment. And
483 ** the offset immediately following the term data.
485 ** flags:
486 ** Mask of FTS5_SEGITER_XXX values. Interpreted as follows:
488 ** FTS5_SEGITER_ONETERM:
489 ** If set, set the iterator to point to EOF after the current doclist
490 ** has been exhausted. Do not proceed to the next term in the segment.
492 ** FTS5_SEGITER_REVERSE:
493 ** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If
494 ** it is set, iterate through rowid in descending order instead of the
495 ** default ascending order.
497 ** iRowidOffset/nRowidOffset/aRowidOffset:
498 ** These are used if the FTS5_SEGITER_REVERSE flag is set.
500 ** For each rowid on the page corresponding to the current term, the
501 ** corresponding aRowidOffset[] entry is set to the byte offset of the
502 ** start of the "position-list-size" field within the page.
504 ** iTermIdx:
505 ** Index of current term on iTermLeafPgno.
507 ** apTombstone/nTombstone:
508 ** These are used for contentless_delete=1 tables only. When the cursor
509 ** is first allocated, the apTombstone[] array is allocated so that it
510 ** is large enough for all tombstones hash pages associated with the
511 ** segment. The pages themselves are loaded lazily from the database as
512 ** they are required.
514 struct Fts5SegIter {
515 Fts5StructureSegment *pSeg; /* Segment to iterate through */
516 int flags; /* Mask of configuration flags */
517 int iLeafPgno; /* Current leaf page number */
518 Fts5Data *pLeaf; /* Current leaf data */
519 Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */
520 i64 iLeafOffset; /* Byte offset within current leaf */
521 Fts5Data **apTombstone; /* Array of tombstone pages */
522 int nTombstone;
524 /* Next method */
525 void (*xNext)(Fts5Index*, Fts5SegIter*, int*);
527 /* The page and offset from which the current term was read. The offset
528 ** is the offset of the first rowid in the current doclist. */
529 int iTermLeafPgno;
530 int iTermLeafOffset;
532 int iPgidxOff; /* Next offset in pgidx */
533 int iEndofDoclist;
535 /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
536 int iRowidOffset; /* Current entry in aRowidOffset[] */
537 int nRowidOffset; /* Allocated size of aRowidOffset[] array */
538 int *aRowidOffset; /* Array of offset to rowid fields */
540 Fts5DlidxIter *pDlidx; /* If there is a doclist-index */
542 /* Variables populated based on current entry. */
543 Fts5Buffer term; /* Current term */
544 i64 iRowid; /* Current rowid */
545 int nPos; /* Number of bytes in current position list */
546 u8 bDel; /* True if the delete flag is set */
550 ** Argument is a pointer to an Fts5Data structure that contains a
551 ** leaf page.
553 #define ASSERT_SZLEAF_OK(x) assert( \
554 (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \
557 #define FTS5_SEGITER_ONETERM 0x01
558 #define FTS5_SEGITER_REVERSE 0x02
561 ** Argument is a pointer to an Fts5Data structure that contains a leaf
562 ** page. This macro evaluates to true if the leaf contains no terms, or
563 ** false if it contains at least one term.
565 #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)
567 #define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))
569 #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))
572 ** Object for iterating through the merged results of one or more segments,
573 ** visiting each term/rowid pair in the merged data.
575 ** nSeg is always a power of two greater than or equal to the number of
576 ** segments that this object is merging data from. Both the aSeg[] and
577 ** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded
578 ** with zeroed objects - these are handled as if they were iterators opened
579 ** on empty segments.
581 ** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an
582 ** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the
583 ** comparison in this context is the index of the iterator that currently
584 ** points to the smaller term/rowid combination. Iterators at EOF are
585 ** considered to be greater than all other iterators.
587 ** aFirst[1] contains the index in aSeg[] of the iterator that points to
588 ** the smallest key overall. aFirst[0] is unused.
590 ** poslist:
591 ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
592 ** There is no way to tell if this is populated or not.
594 struct Fts5Iter {
595 Fts5IndexIter base; /* Base class containing output vars */
597 Fts5Index *pIndex; /* Index that owns this iterator */
598 Fts5Buffer poslist; /* Buffer containing current poslist */
599 Fts5Colset *pColset; /* Restrict matches to these columns */
601 /* Invoked to set output variables. */
602 void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*);
604 int nSeg; /* Size of aSeg[] array */
605 int bRev; /* True to iterate in reverse order */
606 u8 bSkipEmpty; /* True to skip deleted entries */
608 i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */
609 Fts5CResult *aFirst; /* Current merge state (see above) */
610 Fts5SegIter aSeg[1]; /* Array of segment iterators */
615 ** An instance of the following type is used to iterate through the contents
616 ** of a doclist-index record.
618 ** pData:
619 ** Record containing the doclist-index data.
621 ** bEof:
622 ** Set to true once iterator has reached EOF.
624 ** iOff:
625 ** Set to the current offset within record pData.
627 struct Fts5DlidxLvl {
628 Fts5Data *pData; /* Data for current page of this level */
629 int iOff; /* Current offset into pData */
630 int bEof; /* At EOF already */
631 int iFirstOff; /* Used by reverse iterators */
633 /* Output variables */
634 int iLeafPgno; /* Page number of current leaf page */
635 i64 iRowid; /* First rowid on leaf iLeafPgno */
637 struct Fts5DlidxIter {
638 int nLvl;
639 int iSegid;
640 Fts5DlidxLvl aLvl[1];
643 static void fts5PutU16(u8 *aOut, u16 iVal){
644 aOut[0] = (iVal>>8);
645 aOut[1] = (iVal&0xFF);
648 static u16 fts5GetU16(const u8 *aIn){
649 return ((u16)aIn[0] << 8) + aIn[1];
653 ** The only argument points to a buffer at least 8 bytes in size. This
654 ** function interprets the first 8 bytes of the buffer as a 64-bit big-endian
655 ** unsigned integer and returns the result.
657 static u64 fts5GetU64(u8 *a){
658 return ((u64)a[0] << 56)
659 + ((u64)a[1] << 48)
660 + ((u64)a[2] << 40)
661 + ((u64)a[3] << 32)
662 + ((u64)a[4] << 24)
663 + ((u64)a[5] << 16)
664 + ((u64)a[6] << 8)
665 + ((u64)a[7] << 0);
669 ** The only argument points to a buffer at least 4 bytes in size. This
670 ** function interprets the first 4 bytes of the buffer as a 32-bit big-endian
671 ** unsigned integer and returns the result.
673 static u32 fts5GetU32(const u8 *a){
674 return ((u32)a[0] << 24)
675 + ((u32)a[1] << 16)
676 + ((u32)a[2] << 8)
677 + ((u32)a[3] << 0);
681 ** Write iVal, formated as a 64-bit big-endian unsigned integer, to the
682 ** buffer indicated by the first argument.
684 static void fts5PutU64(u8 *a, u64 iVal){
685 a[0] = ((iVal >> 56) & 0xFF);
686 a[1] = ((iVal >> 48) & 0xFF);
687 a[2] = ((iVal >> 40) & 0xFF);
688 a[3] = ((iVal >> 32) & 0xFF);
689 a[4] = ((iVal >> 24) & 0xFF);
690 a[5] = ((iVal >> 16) & 0xFF);
691 a[6] = ((iVal >> 8) & 0xFF);
692 a[7] = ((iVal >> 0) & 0xFF);
696 ** Write iVal, formated as a 32-bit big-endian unsigned integer, to the
697 ** buffer indicated by the first argument.
699 static void fts5PutU32(u8 *a, u32 iVal){
700 a[0] = ((iVal >> 24) & 0xFF);
701 a[1] = ((iVal >> 16) & 0xFF);
702 a[2] = ((iVal >> 8) & 0xFF);
703 a[3] = ((iVal >> 0) & 0xFF);
707 ** Allocate and return a buffer at least nByte bytes in size.
709 ** If an OOM error is encountered, return NULL and set the error code in
710 ** the Fts5Index handle passed as the first argument.
712 static void *fts5IdxMalloc(Fts5Index *p, sqlite3_int64 nByte){
713 return sqlite3Fts5MallocZero(&p->rc, nByte);
717 ** Compare the contents of the pLeft buffer with the pRight/nRight blob.
719 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
720 ** +ve if pRight is smaller than pLeft. In other words:
722 ** res = *pLeft - *pRight
724 #ifdef SQLITE_DEBUG
725 static int fts5BufferCompareBlob(
726 Fts5Buffer *pLeft, /* Left hand side of comparison */
727 const u8 *pRight, int nRight /* Right hand side of comparison */
729 int nCmp = MIN(pLeft->n, nRight);
730 int res = memcmp(pLeft->p, pRight, nCmp);
731 return (res==0 ? (pLeft->n - nRight) : res);
733 #endif
736 ** Compare the contents of the two buffers using memcmp(). If one buffer
737 ** is a prefix of the other, it is considered the lesser.
739 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
740 ** +ve if pRight is smaller than pLeft. In other words:
742 ** res = *pLeft - *pRight
744 static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){
745 int nCmp, res;
746 nCmp = MIN(pLeft->n, pRight->n);
747 assert( nCmp<=0 || pLeft->p!=0 );
748 assert( nCmp<=0 || pRight->p!=0 );
749 res = fts5Memcmp(pLeft->p, pRight->p, nCmp);
750 return (res==0 ? (pLeft->n - pRight->n) : res);
753 static int fts5LeafFirstTermOff(Fts5Data *pLeaf){
754 int ret;
755 fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret);
756 return ret;
760 ** Close the read-only blob handle, if it is open.
762 void sqlite3Fts5IndexCloseReader(Fts5Index *p){
763 if( p->pReader ){
764 sqlite3_blob *pReader = p->pReader;
765 p->pReader = 0;
766 sqlite3_blob_close(pReader);
771 ** Retrieve a record from the %_data table.
773 ** If an error occurs, NULL is returned and an error left in the
774 ** Fts5Index object.
776 static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
777 Fts5Data *pRet = 0;
778 if( p->rc==SQLITE_OK ){
779 int rc = SQLITE_OK;
781 if( p->pReader ){
782 /* This call may return SQLITE_ABORT if there has been a savepoint
783 ** rollback since it was last used. In this case a new blob handle
784 ** is required. */
785 sqlite3_blob *pBlob = p->pReader;
786 p->pReader = 0;
787 rc = sqlite3_blob_reopen(pBlob, iRowid);
788 assert( p->pReader==0 );
789 p->pReader = pBlob;
790 if( rc!=SQLITE_OK ){
791 sqlite3Fts5IndexCloseReader(p);
793 if( rc==SQLITE_ABORT ) rc = SQLITE_OK;
796 /* If the blob handle is not open at this point, open it and seek
797 ** to the requested entry. */
798 if( p->pReader==0 && rc==SQLITE_OK ){
799 Fts5Config *pConfig = p->pConfig;
800 rc = sqlite3_blob_open(pConfig->db,
801 pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader
805 /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls
806 ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead.
807 ** All the reasons those functions might return SQLITE_ERROR - missing
808 ** table, missing row, non-blob/text in block column - indicate
809 ** backing store corruption. */
810 if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT;
812 if( rc==SQLITE_OK ){
813 u8 *aOut = 0; /* Read blob data into this buffer */
814 int nByte = sqlite3_blob_bytes(p->pReader);
815 sqlite3_int64 nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING;
816 pRet = (Fts5Data*)sqlite3_malloc64(nAlloc);
817 if( pRet ){
818 pRet->nn = nByte;
819 aOut = pRet->p = (u8*)&pRet[1];
820 }else{
821 rc = SQLITE_NOMEM;
824 if( rc==SQLITE_OK ){
825 rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0);
827 if( rc!=SQLITE_OK ){
828 sqlite3_free(pRet);
829 pRet = 0;
830 }else{
831 /* TODO1: Fix this */
832 pRet->p[nByte] = 0x00;
833 pRet->p[nByte+1] = 0x00;
834 pRet->szLeaf = fts5GetU16(&pRet->p[2]);
837 p->rc = rc;
838 p->nRead++;
841 assert( (pRet==0)==(p->rc!=SQLITE_OK) );
842 return pRet;
847 ** Release a reference to data record returned by an earlier call to
848 ** fts5DataRead().
850 static void fts5DataRelease(Fts5Data *pData){
851 sqlite3_free(pData);
854 static Fts5Data *fts5LeafRead(Fts5Index *p, i64 iRowid){
855 Fts5Data *pRet = fts5DataRead(p, iRowid);
856 if( pRet ){
857 if( pRet->nn<4 || pRet->szLeaf>pRet->nn ){
858 p->rc = FTS5_CORRUPT;
859 fts5DataRelease(pRet);
860 pRet = 0;
863 return pRet;
866 static int fts5IndexPrepareStmt(
867 Fts5Index *p,
868 sqlite3_stmt **ppStmt,
869 char *zSql
871 if( p->rc==SQLITE_OK ){
872 if( zSql ){
873 p->rc = sqlite3_prepare_v3(p->pConfig->db, zSql, -1,
874 SQLITE_PREPARE_PERSISTENT|SQLITE_PREPARE_NO_VTAB,
875 ppStmt, 0);
876 }else{
877 p->rc = SQLITE_NOMEM;
880 sqlite3_free(zSql);
881 return p->rc;
886 ** INSERT OR REPLACE a record into the %_data table.
888 static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){
889 if( p->rc!=SQLITE_OK ) return;
891 if( p->pWriter==0 ){
892 Fts5Config *pConfig = p->pConfig;
893 fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintf(
894 "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)",
895 pConfig->zDb, pConfig->zName
897 if( p->rc ) return;
900 sqlite3_bind_int64(p->pWriter, 1, iRowid);
901 sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC);
902 sqlite3_step(p->pWriter);
903 p->rc = sqlite3_reset(p->pWriter);
904 sqlite3_bind_null(p->pWriter, 2);
908 ** Execute the following SQL:
910 ** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast
912 static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){
913 if( p->rc!=SQLITE_OK ) return;
915 if( p->pDeleter==0 ){
916 Fts5Config *pConfig = p->pConfig;
917 char *zSql = sqlite3_mprintf(
918 "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?",
919 pConfig->zDb, pConfig->zName
921 if( fts5IndexPrepareStmt(p, &p->pDeleter, zSql) ) return;
924 sqlite3_bind_int64(p->pDeleter, 1, iFirst);
925 sqlite3_bind_int64(p->pDeleter, 2, iLast);
926 sqlite3_step(p->pDeleter);
927 p->rc = sqlite3_reset(p->pDeleter);
931 ** Remove all records associated with segment iSegid.
933 static void fts5DataRemoveSegment(Fts5Index *p, Fts5StructureSegment *pSeg){
934 int iSegid = pSeg->iSegid;
935 i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0);
936 i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1;
937 fts5DataDelete(p, iFirst, iLast);
939 if( pSeg->nPgTombstone ){
940 i64 iTomb1 = FTS5_TOMBSTONE_ROWID(iSegid, 0);
941 i64 iTomb2 = FTS5_TOMBSTONE_ROWID(iSegid, pSeg->nPgTombstone-1);
942 fts5DataDelete(p, iTomb1, iTomb2);
944 if( p->pIdxDeleter==0 ){
945 Fts5Config *pConfig = p->pConfig;
946 fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf(
947 "DELETE FROM '%q'.'%q_idx' WHERE segid=?",
948 pConfig->zDb, pConfig->zName
951 if( p->rc==SQLITE_OK ){
952 sqlite3_bind_int(p->pIdxDeleter, 1, iSegid);
953 sqlite3_step(p->pIdxDeleter);
954 p->rc = sqlite3_reset(p->pIdxDeleter);
959 ** Release a reference to an Fts5Structure object returned by an earlier
960 ** call to fts5StructureRead() or fts5StructureDecode().
962 static void fts5StructureRelease(Fts5Structure *pStruct){
963 if( pStruct && 0>=(--pStruct->nRef) ){
964 int i;
965 assert( pStruct->nRef==0 );
966 for(i=0; i<pStruct->nLevel; i++){
967 sqlite3_free(pStruct->aLevel[i].aSeg);
969 sqlite3_free(pStruct);
973 static void fts5StructureRef(Fts5Structure *pStruct){
974 pStruct->nRef++;
977 void *sqlite3Fts5StructureRef(Fts5Index *p){
978 fts5StructureRef(p->pStruct);
979 return (void*)p->pStruct;
981 void sqlite3Fts5StructureRelease(void *p){
982 if( p ){
983 fts5StructureRelease((Fts5Structure*)p);
986 int sqlite3Fts5StructureTest(Fts5Index *p, void *pStruct){
987 if( p->pStruct!=(Fts5Structure*)pStruct ){
988 return SQLITE_ABORT;
990 return SQLITE_OK;
994 ** Ensure that structure object (*pp) is writable.
996 ** This function is a no-op if (*pRc) is not SQLITE_OK when it is called. If
997 ** an error occurs, (*pRc) is set to an SQLite error code before returning.
999 static void fts5StructureMakeWritable(int *pRc, Fts5Structure **pp){
1000 Fts5Structure *p = *pp;
1001 if( *pRc==SQLITE_OK && p->nRef>1 ){
1002 i64 nByte = sizeof(Fts5Structure)+(p->nLevel-1)*sizeof(Fts5StructureLevel);
1003 Fts5Structure *pNew;
1004 pNew = (Fts5Structure*)sqlite3Fts5MallocZero(pRc, nByte);
1005 if( pNew ){
1006 int i;
1007 memcpy(pNew, p, nByte);
1008 for(i=0; i<p->nLevel; i++) pNew->aLevel[i].aSeg = 0;
1009 for(i=0; i<p->nLevel; i++){
1010 Fts5StructureLevel *pLvl = &pNew->aLevel[i];
1011 nByte = sizeof(Fts5StructureSegment) * pNew->aLevel[i].nSeg;
1012 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(pRc, nByte);
1013 if( pLvl->aSeg==0 ){
1014 for(i=0; i<p->nLevel; i++){
1015 sqlite3_free(pNew->aLevel[i].aSeg);
1017 sqlite3_free(pNew);
1018 return;
1020 memcpy(pLvl->aSeg, p->aLevel[i].aSeg, nByte);
1022 p->nRef--;
1023 pNew->nRef = 1;
1025 *pp = pNew;
1030 ** Deserialize and return the structure record currently stored in serialized
1031 ** form within buffer pData/nData.
1033 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
1034 ** are over-allocated by one slot. This allows the structure contents
1035 ** to be more easily edited.
1037 ** If an error occurs, *ppOut is set to NULL and an SQLite error code
1038 ** returned. Otherwise, *ppOut is set to point to the new object and
1039 ** SQLITE_OK returned.
1041 static int fts5StructureDecode(
1042 const u8 *pData, /* Buffer containing serialized structure */
1043 int nData, /* Size of buffer pData in bytes */
1044 int *piCookie, /* Configuration cookie value */
1045 Fts5Structure **ppOut /* OUT: Deserialized object */
1047 int rc = SQLITE_OK;
1048 int i = 0;
1049 int iLvl;
1050 int nLevel = 0;
1051 int nSegment = 0;
1052 sqlite3_int64 nByte; /* Bytes of space to allocate at pRet */
1053 Fts5Structure *pRet = 0; /* Structure object to return */
1054 int bStructureV2 = 0; /* True for FTS5_STRUCTURE_V2 */
1055 u64 nOriginCntr = 0; /* Largest origin value seen so far */
1057 /* Grab the cookie value */
1058 if( piCookie ) *piCookie = sqlite3Fts5Get32(pData);
1059 i = 4;
1061 /* Check if this is a V2 structure record. Set bStructureV2 if it is. */
1062 if( 0==memcmp(&pData[i], FTS5_STRUCTURE_V2, 4) ){
1063 i += 4;
1064 bStructureV2 = 1;
1067 /* Read the total number of levels and segments from the start of the
1068 ** structure record. */
1069 i += fts5GetVarint32(&pData[i], nLevel);
1070 i += fts5GetVarint32(&pData[i], nSegment);
1071 if( nLevel>FTS5_MAX_SEGMENT || nLevel<0
1072 || nSegment>FTS5_MAX_SEGMENT || nSegment<0
1074 return FTS5_CORRUPT;
1076 nByte = (
1077 sizeof(Fts5Structure) + /* Main structure */
1078 sizeof(Fts5StructureLevel) * (nLevel-1) /* aLevel[] array */
1080 pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte);
1082 if( pRet ){
1083 pRet->nRef = 1;
1084 pRet->nLevel = nLevel;
1085 pRet->nSegment = nSegment;
1086 i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter);
1088 for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){
1089 Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl];
1090 int nTotal = 0;
1091 int iSeg;
1093 if( i>=nData ){
1094 rc = FTS5_CORRUPT;
1095 }else{
1096 i += fts5GetVarint32(&pData[i], pLvl->nMerge);
1097 i += fts5GetVarint32(&pData[i], nTotal);
1098 if( nTotal<pLvl->nMerge ) rc = FTS5_CORRUPT;
1099 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc,
1100 nTotal * sizeof(Fts5StructureSegment)
1102 nSegment -= nTotal;
1105 if( rc==SQLITE_OK ){
1106 pLvl->nSeg = nTotal;
1107 for(iSeg=0; iSeg<nTotal; iSeg++){
1108 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
1109 if( i>=nData ){
1110 rc = FTS5_CORRUPT;
1111 break;
1113 assert( pSeg!=0 );
1114 i += fts5GetVarint32(&pData[i], pSeg->iSegid);
1115 i += fts5GetVarint32(&pData[i], pSeg->pgnoFirst);
1116 i += fts5GetVarint32(&pData[i], pSeg->pgnoLast);
1117 if( bStructureV2 ){
1118 i += fts5GetVarint(&pData[i], &pSeg->iOrigin1);
1119 i += fts5GetVarint(&pData[i], &pSeg->iOrigin2);
1120 i += fts5GetVarint32(&pData[i], pSeg->nPgTombstone);
1121 i += fts5GetVarint(&pData[i], &pSeg->nEntryTombstone);
1122 i += fts5GetVarint(&pData[i], &pSeg->nEntry);
1123 nOriginCntr = MAX(nOriginCntr, pSeg->iOrigin2);
1125 if( pSeg->pgnoLast<pSeg->pgnoFirst ){
1126 rc = FTS5_CORRUPT;
1127 break;
1130 if( iLvl>0 && pLvl[-1].nMerge && nTotal==0 ) rc = FTS5_CORRUPT;
1131 if( iLvl==nLevel-1 && pLvl->nMerge ) rc = FTS5_CORRUPT;
1134 if( nSegment!=0 && rc==SQLITE_OK ) rc = FTS5_CORRUPT;
1135 if( bStructureV2 ){
1136 pRet->nOriginCntr = nOriginCntr+1;
1139 if( rc!=SQLITE_OK ){
1140 fts5StructureRelease(pRet);
1141 pRet = 0;
1145 *ppOut = pRet;
1146 return rc;
1150 ** Add a level to the Fts5Structure.aLevel[] array of structure object
1151 ** (*ppStruct).
1153 static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){
1154 fts5StructureMakeWritable(pRc, ppStruct);
1155 assert( (ppStruct!=0 && (*ppStruct)!=0) || (*pRc)!=SQLITE_OK );
1156 if( *pRc==SQLITE_OK ){
1157 Fts5Structure *pStruct = *ppStruct;
1158 int nLevel = pStruct->nLevel;
1159 sqlite3_int64 nByte = (
1160 sizeof(Fts5Structure) + /* Main structure */
1161 sizeof(Fts5StructureLevel) * (nLevel+1) /* aLevel[] array */
1164 pStruct = sqlite3_realloc64(pStruct, nByte);
1165 if( pStruct ){
1166 memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel));
1167 pStruct->nLevel++;
1168 *ppStruct = pStruct;
1169 }else{
1170 *pRc = SQLITE_NOMEM;
1176 ** Extend level iLvl so that there is room for at least nExtra more
1177 ** segments.
1179 static void fts5StructureExtendLevel(
1180 int *pRc,
1181 Fts5Structure *pStruct,
1182 int iLvl,
1183 int nExtra,
1184 int bInsert
1186 if( *pRc==SQLITE_OK ){
1187 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
1188 Fts5StructureSegment *aNew;
1189 sqlite3_int64 nByte;
1191 nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment);
1192 aNew = sqlite3_realloc64(pLvl->aSeg, nByte);
1193 if( aNew ){
1194 if( bInsert==0 ){
1195 memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra);
1196 }else{
1197 int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment);
1198 memmove(&aNew[nExtra], aNew, nMove);
1199 memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra);
1201 pLvl->aSeg = aNew;
1202 }else{
1203 *pRc = SQLITE_NOMEM;
1208 static Fts5Structure *fts5StructureReadUncached(Fts5Index *p){
1209 Fts5Structure *pRet = 0;
1210 Fts5Config *pConfig = p->pConfig;
1211 int iCookie; /* Configuration cookie */
1212 Fts5Data *pData;
1214 pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
1215 if( p->rc==SQLITE_OK ){
1216 /* TODO: Do we need this if the leaf-index is appended? Probably... */
1217 memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING);
1218 p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet);
1219 if( p->rc==SQLITE_OK && (pConfig->pgsz==0 || pConfig->iCookie!=iCookie) ){
1220 p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
1222 fts5DataRelease(pData);
1223 if( p->rc!=SQLITE_OK ){
1224 fts5StructureRelease(pRet);
1225 pRet = 0;
1229 return pRet;
1232 static i64 fts5IndexDataVersion(Fts5Index *p){
1233 i64 iVersion = 0;
1235 if( p->rc==SQLITE_OK ){
1236 if( p->pDataVersion==0 ){
1237 p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion,
1238 sqlite3_mprintf("PRAGMA %Q.data_version", p->pConfig->zDb)
1240 if( p->rc ) return 0;
1243 if( SQLITE_ROW==sqlite3_step(p->pDataVersion) ){
1244 iVersion = sqlite3_column_int64(p->pDataVersion, 0);
1246 p->rc = sqlite3_reset(p->pDataVersion);
1249 return iVersion;
1253 ** Read, deserialize and return the structure record.
1255 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
1256 ** are over-allocated as described for function fts5StructureDecode()
1257 ** above.
1259 ** If an error occurs, NULL is returned and an error code left in the
1260 ** Fts5Index handle. If an error has already occurred when this function
1261 ** is called, it is a no-op.
1263 static Fts5Structure *fts5StructureRead(Fts5Index *p){
1265 if( p->pStruct==0 ){
1266 p->iStructVersion = fts5IndexDataVersion(p);
1267 if( p->rc==SQLITE_OK ){
1268 p->pStruct = fts5StructureReadUncached(p);
1272 #if 0
1273 else{
1274 Fts5Structure *pTest = fts5StructureReadUncached(p);
1275 if( pTest ){
1276 int i, j;
1277 assert_nc( p->pStruct->nSegment==pTest->nSegment );
1278 assert_nc( p->pStruct->nLevel==pTest->nLevel );
1279 for(i=0; i<pTest->nLevel; i++){
1280 assert_nc( p->pStruct->aLevel[i].nMerge==pTest->aLevel[i].nMerge );
1281 assert_nc( p->pStruct->aLevel[i].nSeg==pTest->aLevel[i].nSeg );
1282 for(j=0; j<pTest->aLevel[i].nSeg; j++){
1283 Fts5StructureSegment *p1 = &pTest->aLevel[i].aSeg[j];
1284 Fts5StructureSegment *p2 = &p->pStruct->aLevel[i].aSeg[j];
1285 assert_nc( p1->iSegid==p2->iSegid );
1286 assert_nc( p1->pgnoFirst==p2->pgnoFirst );
1287 assert_nc( p1->pgnoLast==p2->pgnoLast );
1290 fts5StructureRelease(pTest);
1293 #endif
1295 if( p->rc!=SQLITE_OK ) return 0;
1296 assert( p->iStructVersion!=0 );
1297 assert( p->pStruct!=0 );
1298 fts5StructureRef(p->pStruct);
1299 return p->pStruct;
1302 static void fts5StructureInvalidate(Fts5Index *p){
1303 if( p->pStruct ){
1304 fts5StructureRelease(p->pStruct);
1305 p->pStruct = 0;
1310 ** Return the total number of segments in index structure pStruct. This
1311 ** function is only ever used as part of assert() conditions.
1313 #ifdef SQLITE_DEBUG
1314 static int fts5StructureCountSegments(Fts5Structure *pStruct){
1315 int nSegment = 0; /* Total number of segments */
1316 if( pStruct ){
1317 int iLvl; /* Used to iterate through levels */
1318 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
1319 nSegment += pStruct->aLevel[iLvl].nSeg;
1323 return nSegment;
1325 #endif
1327 #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \
1328 assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) ); \
1329 memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob); \
1330 (pBuf)->n += nBlob; \
1333 #define fts5BufferSafeAppendVarint(pBuf, iVal) { \
1334 (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal)); \
1335 assert( (pBuf)->nSpace>=(pBuf)->n ); \
1340 ** Serialize and store the "structure" record.
1342 ** If an error occurs, leave an error code in the Fts5Index object. If an
1343 ** error has already occurred, this function is a no-op.
1345 static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){
1346 if( p->rc==SQLITE_OK ){
1347 Fts5Buffer buf; /* Buffer to serialize record into */
1348 int iLvl; /* Used to iterate through levels */
1349 int iCookie; /* Cookie value to store */
1350 int nHdr = (pStruct->nOriginCntr>0 ? (4+4+9+9+9) : (4+9+9));
1352 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
1353 memset(&buf, 0, sizeof(Fts5Buffer));
1355 /* Append the current configuration cookie */
1356 iCookie = p->pConfig->iCookie;
1357 if( iCookie<0 ) iCookie = 0;
1359 if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, nHdr) ){
1360 sqlite3Fts5Put32(buf.p, iCookie);
1361 buf.n = 4;
1362 if( pStruct->nOriginCntr>0 ){
1363 fts5BufferSafeAppendBlob(&buf, FTS5_STRUCTURE_V2, 4);
1365 fts5BufferSafeAppendVarint(&buf, pStruct->nLevel);
1366 fts5BufferSafeAppendVarint(&buf, pStruct->nSegment);
1367 fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter);
1370 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
1371 int iSeg; /* Used to iterate through segments */
1372 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
1373 fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge);
1374 fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg);
1375 assert( pLvl->nMerge<=pLvl->nSeg );
1377 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
1378 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
1379 fts5BufferAppendVarint(&p->rc, &buf, pSeg->iSegid);
1380 fts5BufferAppendVarint(&p->rc, &buf, pSeg->pgnoFirst);
1381 fts5BufferAppendVarint(&p->rc, &buf, pSeg->pgnoLast);
1382 if( pStruct->nOriginCntr>0 ){
1383 fts5BufferAppendVarint(&p->rc, &buf, pSeg->iOrigin1);
1384 fts5BufferAppendVarint(&p->rc, &buf, pSeg->iOrigin2);
1385 fts5BufferAppendVarint(&p->rc, &buf, pSeg->nPgTombstone);
1386 fts5BufferAppendVarint(&p->rc, &buf, pSeg->nEntryTombstone);
1387 fts5BufferAppendVarint(&p->rc, &buf, pSeg->nEntry);
1392 fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n);
1393 fts5BufferFree(&buf);
1397 #if 0
1398 static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*);
1399 static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){
1400 int rc = SQLITE_OK;
1401 Fts5Buffer buf;
1402 memset(&buf, 0, sizeof(buf));
1403 fts5DebugStructure(&rc, &buf, pStruct);
1404 fprintf(stdout, "%s: %s\n", zCaption, buf.p);
1405 fflush(stdout);
1406 fts5BufferFree(&buf);
1408 #else
1409 # define fts5PrintStructure(x,y)
1410 #endif
1412 static int fts5SegmentSize(Fts5StructureSegment *pSeg){
1413 return 1 + pSeg->pgnoLast - pSeg->pgnoFirst;
1417 ** Return a copy of index structure pStruct. Except, promote as many
1418 ** segments as possible to level iPromote. If an OOM occurs, NULL is
1419 ** returned.
1421 static void fts5StructurePromoteTo(
1422 Fts5Index *p,
1423 int iPromote,
1424 int szPromote,
1425 Fts5Structure *pStruct
1427 int il, is;
1428 Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote];
1430 if( pOut->nMerge==0 ){
1431 for(il=iPromote+1; il<pStruct->nLevel; il++){
1432 Fts5StructureLevel *pLvl = &pStruct->aLevel[il];
1433 if( pLvl->nMerge ) return;
1434 for(is=pLvl->nSeg-1; is>=0; is--){
1435 int sz = fts5SegmentSize(&pLvl->aSeg[is]);
1436 if( sz>szPromote ) return;
1437 fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1);
1438 if( p->rc ) return;
1439 memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment));
1440 pOut->nSeg++;
1441 pLvl->nSeg--;
1448 ** A new segment has just been written to level iLvl of index structure
1449 ** pStruct. This function determines if any segments should be promoted
1450 ** as a result. Segments are promoted in two scenarios:
1452 ** a) If the segment just written is smaller than one or more segments
1453 ** within the previous populated level, it is promoted to the previous
1454 ** populated level.
1456 ** b) If the segment just written is larger than the newest segment on
1457 ** the next populated level, then that segment, and any other adjacent
1458 ** segments that are also smaller than the one just written, are
1459 ** promoted.
1461 ** If one or more segments are promoted, the structure object is updated
1462 ** to reflect this.
1464 static void fts5StructurePromote(
1465 Fts5Index *p, /* FTS5 backend object */
1466 int iLvl, /* Index level just updated */
1467 Fts5Structure *pStruct /* Index structure */
1469 if( p->rc==SQLITE_OK ){
1470 int iTst;
1471 int iPromote = -1;
1472 int szPromote = 0; /* Promote anything this size or smaller */
1473 Fts5StructureSegment *pSeg; /* Segment just written */
1474 int szSeg; /* Size of segment just written */
1475 int nSeg = pStruct->aLevel[iLvl].nSeg;
1477 if( nSeg==0 ) return;
1478 pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1];
1479 szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst);
1481 /* Check for condition (a) */
1482 for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--);
1483 if( iTst>=0 ){
1484 int i;
1485 int szMax = 0;
1486 Fts5StructureLevel *pTst = &pStruct->aLevel[iTst];
1487 assert( pTst->nMerge==0 );
1488 for(i=0; i<pTst->nSeg; i++){
1489 int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1;
1490 if( sz>szMax ) szMax = sz;
1492 if( szMax>=szSeg ){
1493 /* Condition (a) is true. Promote the newest segment on level
1494 ** iLvl to level iTst. */
1495 iPromote = iTst;
1496 szPromote = szMax;
1500 /* If condition (a) is not met, assume (b) is true. StructurePromoteTo()
1501 ** is a no-op if it is not. */
1502 if( iPromote<0 ){
1503 iPromote = iLvl;
1504 szPromote = szSeg;
1506 fts5StructurePromoteTo(p, iPromote, szPromote, pStruct);
1512 ** Advance the iterator passed as the only argument. If the end of the
1513 ** doclist-index page is reached, return non-zero.
1515 static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
1516 Fts5Data *pData = pLvl->pData;
1518 if( pLvl->iOff==0 ){
1519 assert( pLvl->bEof==0 );
1520 pLvl->iOff = 1;
1521 pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno);
1522 pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid);
1523 pLvl->iFirstOff = pLvl->iOff;
1524 }else{
1525 int iOff;
1526 for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){
1527 if( pData->p[iOff] ) break;
1530 if( iOff<pData->nn ){
1531 i64 iVal;
1532 pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1;
1533 iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal);
1534 pLvl->iRowid += iVal;
1535 pLvl->iOff = iOff;
1536 }else{
1537 pLvl->bEof = 1;
1541 return pLvl->bEof;
1545 ** Advance the iterator passed as the only argument.
1547 static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
1548 Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
1550 assert( iLvl<pIter->nLvl );
1551 if( fts5DlidxLvlNext(pLvl) ){
1552 if( (iLvl+1) < pIter->nLvl ){
1553 fts5DlidxIterNextR(p, pIter, iLvl+1);
1554 if( pLvl[1].bEof==0 ){
1555 fts5DataRelease(pLvl->pData);
1556 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1557 pLvl->pData = fts5DataRead(p,
1558 FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
1560 if( pLvl->pData ) fts5DlidxLvlNext(pLvl);
1565 return pIter->aLvl[0].bEof;
1567 static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){
1568 return fts5DlidxIterNextR(p, pIter, 0);
1572 ** The iterator passed as the first argument has the following fields set
1573 ** as follows. This function sets up the rest of the iterator so that it
1574 ** points to the first rowid in the doclist-index.
1576 ** pData:
1577 ** pointer to doclist-index record,
1579 ** When this function is called pIter->iLeafPgno is the page number the
1580 ** doclist is associated with (the one featuring the term).
1582 static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
1583 int i;
1584 for(i=0; i<pIter->nLvl; i++){
1585 fts5DlidxLvlNext(&pIter->aLvl[i]);
1587 return pIter->aLvl[0].bEof;
1591 static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){
1592 return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof;
1595 static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){
1596 int i;
1598 /* Advance each level to the last entry on the last page */
1599 for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){
1600 Fts5DlidxLvl *pLvl = &pIter->aLvl[i];
1601 while( fts5DlidxLvlNext(pLvl)==0 );
1602 pLvl->bEof = 0;
1604 if( i>0 ){
1605 Fts5DlidxLvl *pChild = &pLvl[-1];
1606 fts5DataRelease(pChild->pData);
1607 memset(pChild, 0, sizeof(Fts5DlidxLvl));
1608 pChild->pData = fts5DataRead(p,
1609 FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno)
1616 ** Move the iterator passed as the only argument to the previous entry.
1618 static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){
1619 int iOff = pLvl->iOff;
1621 assert( pLvl->bEof==0 );
1622 if( iOff<=pLvl->iFirstOff ){
1623 pLvl->bEof = 1;
1624 }else{
1625 u8 *a = pLvl->pData->p;
1627 pLvl->iOff = 0;
1628 fts5DlidxLvlNext(pLvl);
1629 while( 1 ){
1630 int nZero = 0;
1631 int ii = pLvl->iOff;
1632 u64 delta = 0;
1634 while( a[ii]==0 ){
1635 nZero++;
1636 ii++;
1638 ii += sqlite3Fts5GetVarint(&a[ii], &delta);
1640 if( ii>=iOff ) break;
1641 pLvl->iLeafPgno += nZero+1;
1642 pLvl->iRowid += delta;
1643 pLvl->iOff = ii;
1647 return pLvl->bEof;
1650 static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
1651 Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
1653 assert( iLvl<pIter->nLvl );
1654 if( fts5DlidxLvlPrev(pLvl) ){
1655 if( (iLvl+1) < pIter->nLvl ){
1656 fts5DlidxIterPrevR(p, pIter, iLvl+1);
1657 if( pLvl[1].bEof==0 ){
1658 fts5DataRelease(pLvl->pData);
1659 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1660 pLvl->pData = fts5DataRead(p,
1661 FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
1663 if( pLvl->pData ){
1664 while( fts5DlidxLvlNext(pLvl)==0 );
1665 pLvl->bEof = 0;
1671 return pIter->aLvl[0].bEof;
1673 static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){
1674 return fts5DlidxIterPrevR(p, pIter, 0);
1678 ** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
1680 static void fts5DlidxIterFree(Fts5DlidxIter *pIter){
1681 if( pIter ){
1682 int i;
1683 for(i=0; i<pIter->nLvl; i++){
1684 fts5DataRelease(pIter->aLvl[i].pData);
1686 sqlite3_free(pIter);
1690 static Fts5DlidxIter *fts5DlidxIterInit(
1691 Fts5Index *p, /* Fts5 Backend to iterate within */
1692 int bRev, /* True for ORDER BY ASC */
1693 int iSegid, /* Segment id */
1694 int iLeafPg /* Leaf page number to load dlidx for */
1696 Fts5DlidxIter *pIter = 0;
1697 int i;
1698 int bDone = 0;
1700 for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
1701 sqlite3_int64 nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl);
1702 Fts5DlidxIter *pNew;
1704 pNew = (Fts5DlidxIter*)sqlite3_realloc64(pIter, nByte);
1705 if( pNew==0 ){
1706 p->rc = SQLITE_NOMEM;
1707 }else{
1708 i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg);
1709 Fts5DlidxLvl *pLvl = &pNew->aLvl[i];
1710 pIter = pNew;
1711 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1712 pLvl->pData = fts5DataRead(p, iRowid);
1713 if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){
1714 bDone = 1;
1716 pIter->nLvl = i+1;
1720 if( p->rc==SQLITE_OK ){
1721 pIter->iSegid = iSegid;
1722 if( bRev==0 ){
1723 fts5DlidxIterFirst(pIter);
1724 }else{
1725 fts5DlidxIterLast(p, pIter);
1729 if( p->rc!=SQLITE_OK ){
1730 fts5DlidxIterFree(pIter);
1731 pIter = 0;
1734 return pIter;
1737 static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){
1738 return pIter->aLvl[0].iRowid;
1740 static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){
1741 return pIter->aLvl[0].iLeafPgno;
1745 ** Load the next leaf page into the segment iterator.
1747 static void fts5SegIterNextPage(
1748 Fts5Index *p, /* FTS5 backend object */
1749 Fts5SegIter *pIter /* Iterator to advance to next page */
1751 Fts5Data *pLeaf;
1752 Fts5StructureSegment *pSeg = pIter->pSeg;
1753 fts5DataRelease(pIter->pLeaf);
1754 pIter->iLeafPgno++;
1755 if( pIter->pNextLeaf ){
1756 pIter->pLeaf = pIter->pNextLeaf;
1757 pIter->pNextLeaf = 0;
1758 }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
1759 pIter->pLeaf = fts5LeafRead(p,
1760 FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)
1762 }else{
1763 pIter->pLeaf = 0;
1765 pLeaf = pIter->pLeaf;
1767 if( pLeaf ){
1768 pIter->iPgidxOff = pLeaf->szLeaf;
1769 if( fts5LeafIsTermless(pLeaf) ){
1770 pIter->iEndofDoclist = pLeaf->nn+1;
1771 }else{
1772 pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],
1773 pIter->iEndofDoclist
1780 ** Argument p points to a buffer containing a varint to be interpreted as a
1781 ** position list size field. Read the varint and return the number of bytes
1782 ** read. Before returning, set *pnSz to the number of bytes in the position
1783 ** list, and *pbDel to true if the delete flag is set, or false otherwise.
1785 static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
1786 int nSz;
1787 int n = 0;
1788 fts5FastGetVarint32(p, n, nSz);
1789 assert_nc( nSz>=0 );
1790 *pnSz = nSz/2;
1791 *pbDel = nSz & 0x0001;
1792 return n;
1796 ** Fts5SegIter.iLeafOffset currently points to the first byte of a
1797 ** position-list size field. Read the value of the field and store it
1798 ** in the following variables:
1800 ** Fts5SegIter.nPos
1801 ** Fts5SegIter.bDel
1803 ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the
1804 ** position list content (if any).
1806 static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
1807 if( p->rc==SQLITE_OK ){
1808 int iOff = pIter->iLeafOffset; /* Offset to read at */
1809 ASSERT_SZLEAF_OK(pIter->pLeaf);
1810 if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
1811 int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf);
1812 pIter->bDel = 0;
1813 pIter->nPos = 1;
1814 if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
1815 pIter->bDel = 1;
1816 iOff++;
1817 if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
1818 pIter->nPos = 1;
1819 iOff++;
1820 }else{
1821 pIter->nPos = 0;
1824 }else{
1825 int nSz;
1826 fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz);
1827 pIter->bDel = (nSz & 0x0001);
1828 pIter->nPos = nSz>>1;
1829 assert_nc( pIter->nPos>=0 );
1831 pIter->iLeafOffset = iOff;
1835 static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
1836 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
1837 i64 iOff = pIter->iLeafOffset;
1839 ASSERT_SZLEAF_OK(pIter->pLeaf);
1840 while( iOff>=pIter->pLeaf->szLeaf ){
1841 fts5SegIterNextPage(p, pIter);
1842 if( pIter->pLeaf==0 ){
1843 if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
1844 return;
1846 iOff = 4;
1847 a = pIter->pLeaf->p;
1849 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
1850 pIter->iLeafOffset = iOff;
1854 ** Fts5SegIter.iLeafOffset currently points to the first byte of the
1855 ** "nSuffix" field of a term. Function parameter nKeep contains the value
1856 ** of the "nPrefix" field (if there was one - it is passed 0 if this is
1857 ** the first term in the segment).
1859 ** This function populates:
1861 ** Fts5SegIter.term
1862 ** Fts5SegIter.rowid
1864 ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of
1865 ** the first position list. The position list belonging to document
1866 ** (Fts5SegIter.iRowid).
1868 static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
1869 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
1870 i64 iOff = pIter->iLeafOffset; /* Offset to read at */
1871 int nNew; /* Bytes of new data */
1873 iOff += fts5GetVarint32(&a[iOff], nNew);
1874 if( iOff+nNew>pIter->pLeaf->szLeaf || nKeep>pIter->term.n || nNew==0 ){
1875 p->rc = FTS5_CORRUPT;
1876 return;
1878 pIter->term.n = nKeep;
1879 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
1880 assert( pIter->term.n<=pIter->term.nSpace );
1881 iOff += nNew;
1882 pIter->iTermLeafOffset = iOff;
1883 pIter->iTermLeafPgno = pIter->iLeafPgno;
1884 pIter->iLeafOffset = iOff;
1886 if( pIter->iPgidxOff>=pIter->pLeaf->nn ){
1887 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
1888 }else{
1889 int nExtra;
1890 pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra);
1891 pIter->iEndofDoclist += nExtra;
1894 fts5SegIterLoadRowid(p, pIter);
1897 static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*);
1898 static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*);
1899 static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*);
1901 static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){
1902 if( pIter->flags & FTS5_SEGITER_REVERSE ){
1903 pIter->xNext = fts5SegIterNext_Reverse;
1904 }else if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
1905 pIter->xNext = fts5SegIterNext_None;
1906 }else{
1907 pIter->xNext = fts5SegIterNext;
1912 ** Allocate a tombstone hash page array (pIter->apTombstone) for the
1913 ** iterator passed as the second argument. If an OOM error occurs, leave
1914 ** an error in the Fts5Index object.
1916 static void fts5SegIterAllocTombstone(Fts5Index *p, Fts5SegIter *pIter){
1917 const int nTomb = pIter->pSeg->nPgTombstone;
1918 if( nTomb>0 ){
1919 Fts5Data **apTomb = 0;
1920 apTomb = (Fts5Data**)sqlite3Fts5MallocZero(&p->rc, sizeof(Fts5Data)*nTomb);
1921 if( apTomb ){
1922 pIter->apTombstone = apTomb;
1923 pIter->nTombstone = nTomb;
1929 ** Initialize the iterator object pIter to iterate through the entries in
1930 ** segment pSeg. The iterator is left pointing to the first entry when
1931 ** this function returns.
1933 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
1934 ** an error has already occurred when this function is called, it is a no-op.
1936 static void fts5SegIterInit(
1937 Fts5Index *p, /* FTS index object */
1938 Fts5StructureSegment *pSeg, /* Description of segment */
1939 Fts5SegIter *pIter /* Object to populate */
1941 if( pSeg->pgnoFirst==0 ){
1942 /* This happens if the segment is being used as an input to an incremental
1943 ** merge and all data has already been "trimmed". See function
1944 ** fts5TrimSegments() for details. In this case leave the iterator empty.
1945 ** The caller will see the (pIter->pLeaf==0) and assume the iterator is
1946 ** at EOF already. */
1947 assert( pIter->pLeaf==0 );
1948 return;
1951 if( p->rc==SQLITE_OK ){
1952 memset(pIter, 0, sizeof(*pIter));
1953 fts5SegIterSetNext(p, pIter);
1954 pIter->pSeg = pSeg;
1955 pIter->iLeafPgno = pSeg->pgnoFirst-1;
1956 do {
1957 fts5SegIterNextPage(p, pIter);
1958 }while( p->rc==SQLITE_OK && pIter->pLeaf && pIter->pLeaf->nn==4 );
1961 if( p->rc==SQLITE_OK && pIter->pLeaf ){
1962 pIter->iLeafOffset = 4;
1963 assert( pIter->pLeaf!=0 );
1964 assert_nc( pIter->pLeaf->nn>4 );
1965 assert_nc( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
1966 pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
1967 fts5SegIterLoadTerm(p, pIter, 0);
1968 fts5SegIterLoadNPos(p, pIter);
1969 fts5SegIterAllocTombstone(p, pIter);
1974 ** This function is only ever called on iterators created by calls to
1975 ** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set.
1977 ** The iterator is in an unusual state when this function is called: the
1978 ** Fts5SegIter.iLeafOffset variable is set to the offset of the start of
1979 ** the position-list size field for the first relevant rowid on the page.
1980 ** Fts5SegIter.rowid is set, but nPos and bDel are not.
1982 ** This function advances the iterator so that it points to the last
1983 ** relevant rowid on the page and, if necessary, initializes the
1984 ** aRowidOffset[] and iRowidOffset variables. At this point the iterator
1985 ** is in its regular state - Fts5SegIter.iLeafOffset points to the first
1986 ** byte of the position list content associated with said rowid.
1988 static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
1989 int eDetail = p->pConfig->eDetail;
1990 int n = pIter->pLeaf->szLeaf;
1991 int i = pIter->iLeafOffset;
1992 u8 *a = pIter->pLeaf->p;
1993 int iRowidOffset = 0;
1995 if( n>pIter->iEndofDoclist ){
1996 n = pIter->iEndofDoclist;
1999 ASSERT_SZLEAF_OK(pIter->pLeaf);
2000 while( 1 ){
2001 u64 iDelta = 0;
2003 if( eDetail==FTS5_DETAIL_NONE ){
2004 /* todo */
2005 if( i<n && a[i]==0 ){
2006 i++;
2007 if( i<n && a[i]==0 ) i++;
2009 }else{
2010 int nPos;
2011 int bDummy;
2012 i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
2013 i += nPos;
2015 if( i>=n ) break;
2016 i += fts5GetVarint(&a[i], &iDelta);
2017 pIter->iRowid += iDelta;
2019 /* If necessary, grow the pIter->aRowidOffset[] array. */
2020 if( iRowidOffset>=pIter->nRowidOffset ){
2021 int nNew = pIter->nRowidOffset + 8;
2022 int *aNew = (int*)sqlite3_realloc64(pIter->aRowidOffset,nNew*sizeof(int));
2023 if( aNew==0 ){
2024 p->rc = SQLITE_NOMEM;
2025 break;
2027 pIter->aRowidOffset = aNew;
2028 pIter->nRowidOffset = nNew;
2031 pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset;
2032 pIter->iLeafOffset = i;
2034 pIter->iRowidOffset = iRowidOffset;
2035 fts5SegIterLoadNPos(p, pIter);
2041 static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){
2042 assert( pIter->flags & FTS5_SEGITER_REVERSE );
2043 assert( pIter->flags & FTS5_SEGITER_ONETERM );
2045 fts5DataRelease(pIter->pLeaf);
2046 pIter->pLeaf = 0;
2047 while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
2048 Fts5Data *pNew;
2049 pIter->iLeafPgno--;
2050 pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
2051 pIter->pSeg->iSegid, pIter->iLeafPgno
2053 if( pNew ){
2054 /* iTermLeafOffset may be equal to szLeaf if the term is the last
2055 ** thing on the page - i.e. the first rowid is on the following page.
2056 ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */
2057 if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
2058 assert( pIter->pLeaf==0 );
2059 if( pIter->iTermLeafOffset<pNew->szLeaf ){
2060 pIter->pLeaf = pNew;
2061 pIter->iLeafOffset = pIter->iTermLeafOffset;
2063 }else{
2064 int iRowidOff;
2065 iRowidOff = fts5LeafFirstRowidOff(pNew);
2066 if( iRowidOff ){
2067 if( iRowidOff>=pNew->szLeaf ){
2068 p->rc = FTS5_CORRUPT;
2069 }else{
2070 pIter->pLeaf = pNew;
2071 pIter->iLeafOffset = iRowidOff;
2076 if( pIter->pLeaf ){
2077 u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset];
2078 pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid);
2079 break;
2080 }else{
2081 fts5DataRelease(pNew);
2086 if( pIter->pLeaf ){
2087 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
2088 fts5SegIterReverseInitPage(p, pIter);
2093 ** Return true if the iterator passed as the second argument currently
2094 ** points to a delete marker. A delete marker is an entry with a 0 byte
2095 ** position-list.
2097 static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){
2098 Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
2099 return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0);
2103 ** Advance iterator pIter to the next entry.
2105 ** This version of fts5SegIterNext() is only used by reverse iterators.
2107 static void fts5SegIterNext_Reverse(
2108 Fts5Index *p, /* FTS5 backend object */
2109 Fts5SegIter *pIter, /* Iterator to advance */
2110 int *pbUnused /* Unused */
2112 assert( pIter->flags & FTS5_SEGITER_REVERSE );
2113 assert( pIter->pNextLeaf==0 );
2114 UNUSED_PARAM(pbUnused);
2116 if( pIter->iRowidOffset>0 ){
2117 u8 *a = pIter->pLeaf->p;
2118 int iOff;
2119 u64 iDelta;
2121 pIter->iRowidOffset--;
2122 pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset];
2123 fts5SegIterLoadNPos(p, pIter);
2124 iOff = pIter->iLeafOffset;
2125 if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){
2126 iOff += pIter->nPos;
2128 fts5GetVarint(&a[iOff], &iDelta);
2129 pIter->iRowid -= iDelta;
2130 }else{
2131 fts5SegIterReverseNewPage(p, pIter);
2136 ** Advance iterator pIter to the next entry.
2138 ** This version of fts5SegIterNext() is only used if detail=none and the
2139 ** iterator is not a reverse direction iterator.
2141 static void fts5SegIterNext_None(
2142 Fts5Index *p, /* FTS5 backend object */
2143 Fts5SegIter *pIter, /* Iterator to advance */
2144 int *pbNewTerm /* OUT: Set for new term */
2146 int iOff;
2148 assert( p->rc==SQLITE_OK );
2149 assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 );
2150 assert( p->pConfig->eDetail==FTS5_DETAIL_NONE );
2152 ASSERT_SZLEAF_OK(pIter->pLeaf);
2153 iOff = pIter->iLeafOffset;
2155 /* Next entry is on the next page */
2156 while( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){
2157 fts5SegIterNextPage(p, pIter);
2158 if( p->rc || pIter->pLeaf==0 ) return;
2159 pIter->iRowid = 0;
2160 iOff = 4;
2163 if( iOff<pIter->iEndofDoclist ){
2164 /* Next entry is on the current page */
2165 i64 iDelta;
2166 iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], (u64*)&iDelta);
2167 pIter->iLeafOffset = iOff;
2168 pIter->iRowid += iDelta;
2169 }else if( (pIter->flags & FTS5_SEGITER_ONETERM)==0 ){
2170 if( pIter->pSeg ){
2171 int nKeep = 0;
2172 if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){
2173 iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep);
2175 pIter->iLeafOffset = iOff;
2176 fts5SegIterLoadTerm(p, pIter, nKeep);
2177 }else{
2178 const u8 *pList = 0;
2179 const char *zTerm = 0;
2180 int nList;
2181 sqlite3Fts5HashScanNext(p->pHash);
2182 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
2183 if( pList==0 ) goto next_none_eof;
2184 pIter->pLeaf->p = (u8*)pList;
2185 pIter->pLeaf->nn = nList;
2186 pIter->pLeaf->szLeaf = nList;
2187 pIter->iEndofDoclist = nList;
2188 sqlite3Fts5BufferSet(&p->rc,&pIter->term, (int)strlen(zTerm), (u8*)zTerm);
2189 pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
2192 if( pbNewTerm ) *pbNewTerm = 1;
2193 }else{
2194 goto next_none_eof;
2197 fts5SegIterLoadNPos(p, pIter);
2199 return;
2200 next_none_eof:
2201 fts5DataRelease(pIter->pLeaf);
2202 pIter->pLeaf = 0;
2207 ** Advance iterator pIter to the next entry.
2209 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It
2210 ** is not considered an error if the iterator reaches EOF. If an error has
2211 ** already occurred when this function is called, it is a no-op.
2213 static void fts5SegIterNext(
2214 Fts5Index *p, /* FTS5 backend object */
2215 Fts5SegIter *pIter, /* Iterator to advance */
2216 int *pbNewTerm /* OUT: Set for new term */
2218 Fts5Data *pLeaf = pIter->pLeaf;
2219 int iOff;
2220 int bNewTerm = 0;
2221 int nKeep = 0;
2222 u8 *a;
2223 int n;
2225 assert( pbNewTerm==0 || *pbNewTerm==0 );
2226 assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
2228 /* Search for the end of the position list within the current page. */
2229 a = pLeaf->p;
2230 n = pLeaf->szLeaf;
2232 ASSERT_SZLEAF_OK(pLeaf);
2233 iOff = pIter->iLeafOffset + pIter->nPos;
2235 if( iOff<n ){
2236 /* The next entry is on the current page. */
2237 assert_nc( iOff<=pIter->iEndofDoclist );
2238 if( iOff>=pIter->iEndofDoclist ){
2239 bNewTerm = 1;
2240 if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
2241 iOff += fts5GetVarint32(&a[iOff], nKeep);
2243 }else{
2244 u64 iDelta;
2245 iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
2246 pIter->iRowid += iDelta;
2247 assert_nc( iDelta>0 );
2249 pIter->iLeafOffset = iOff;
2251 }else if( pIter->pSeg==0 ){
2252 const u8 *pList = 0;
2253 const char *zTerm = 0;
2254 int nList = 0;
2255 assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm );
2256 if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){
2257 sqlite3Fts5HashScanNext(p->pHash);
2258 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
2260 if( pList==0 ){
2261 fts5DataRelease(pIter->pLeaf);
2262 pIter->pLeaf = 0;
2263 }else{
2264 pIter->pLeaf->p = (u8*)pList;
2265 pIter->pLeaf->nn = nList;
2266 pIter->pLeaf->szLeaf = nList;
2267 pIter->iEndofDoclist = nList+1;
2268 sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm),
2269 (u8*)zTerm);
2270 pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
2271 *pbNewTerm = 1;
2273 }else{
2274 iOff = 0;
2275 /* Next entry is not on the current page */
2276 while( iOff==0 ){
2277 fts5SegIterNextPage(p, pIter);
2278 pLeaf = pIter->pLeaf;
2279 if( pLeaf==0 ) break;
2280 ASSERT_SZLEAF_OK(pLeaf);
2281 if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
2282 iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
2283 pIter->iLeafOffset = iOff;
2285 if( pLeaf->nn>pLeaf->szLeaf ){
2286 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
2287 &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
2291 else if( pLeaf->nn>pLeaf->szLeaf ){
2292 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
2293 &pLeaf->p[pLeaf->szLeaf], iOff
2295 pIter->iLeafOffset = iOff;
2296 pIter->iEndofDoclist = iOff;
2297 bNewTerm = 1;
2299 assert_nc( iOff<pLeaf->szLeaf );
2300 if( iOff>pLeaf->szLeaf ){
2301 p->rc = FTS5_CORRUPT;
2302 return;
2307 /* Check if the iterator is now at EOF. If so, return early. */
2308 if( pIter->pLeaf ){
2309 if( bNewTerm ){
2310 if( pIter->flags & FTS5_SEGITER_ONETERM ){
2311 fts5DataRelease(pIter->pLeaf);
2312 pIter->pLeaf = 0;
2313 }else{
2314 fts5SegIterLoadTerm(p, pIter, nKeep);
2315 fts5SegIterLoadNPos(p, pIter);
2316 if( pbNewTerm ) *pbNewTerm = 1;
2318 }else{
2319 /* The following could be done by calling fts5SegIterLoadNPos(). But
2320 ** this block is particularly performance critical, so equivalent
2321 ** code is inlined. */
2322 int nSz;
2323 assert_nc( pIter->iLeafOffset<=pIter->pLeaf->nn );
2324 fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz);
2325 pIter->bDel = (nSz & 0x0001);
2326 pIter->nPos = nSz>>1;
2327 assert_nc( pIter->nPos>=0 );
2332 #define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; }
2334 #define fts5IndexSkipVarint(a, iOff) { \
2335 int iEnd = iOff+9; \
2336 while( (a[iOff++] & 0x80) && iOff<iEnd ); \
2340 ** Iterator pIter currently points to the first rowid in a doclist. This
2341 ** function sets the iterator up so that iterates in reverse order through
2342 ** the doclist.
2344 static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
2345 Fts5DlidxIter *pDlidx = pIter->pDlidx;
2346 Fts5Data *pLast = 0;
2347 int pgnoLast = 0;
2349 if( pDlidx && p->pConfig->iVersion==FTS5_CURRENT_VERSION ){
2350 int iSegid = pIter->pSeg->iSegid;
2351 pgnoLast = fts5DlidxIterPgno(pDlidx);
2352 pLast = fts5LeafRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
2353 }else{
2354 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
2356 /* Currently, Fts5SegIter.iLeafOffset points to the first byte of
2357 ** position-list content for the current rowid. Back it up so that it
2358 ** points to the start of the position-list size field. */
2359 int iPoslist;
2360 if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
2361 iPoslist = pIter->iTermLeafOffset;
2362 }else{
2363 iPoslist = 4;
2365 fts5IndexSkipVarint(pLeaf->p, iPoslist);
2366 pIter->iLeafOffset = iPoslist;
2368 /* If this condition is true then the largest rowid for the current
2369 ** term may not be stored on the current page. So search forward to
2370 ** see where said rowid really is. */
2371 if( pIter->iEndofDoclist>=pLeaf->szLeaf ){
2372 int pgno;
2373 Fts5StructureSegment *pSeg = pIter->pSeg;
2375 /* The last rowid in the doclist may not be on the current page. Search
2376 ** forward to find the page containing the last rowid. */
2377 for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){
2378 i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
2379 Fts5Data *pNew = fts5LeafRead(p, iAbs);
2380 if( pNew ){
2381 int iRowid, bTermless;
2382 iRowid = fts5LeafFirstRowidOff(pNew);
2383 bTermless = fts5LeafIsTermless(pNew);
2384 if( iRowid ){
2385 SWAPVAL(Fts5Data*, pNew, pLast);
2386 pgnoLast = pgno;
2388 fts5DataRelease(pNew);
2389 if( bTermless==0 ) break;
2395 /* If pLast is NULL at this point, then the last rowid for this doclist
2396 ** lies on the page currently indicated by the iterator. In this case
2397 ** pIter->iLeafOffset is already set to point to the position-list size
2398 ** field associated with the first relevant rowid on the page.
2400 ** Or, if pLast is non-NULL, then it is the page that contains the last
2401 ** rowid. In this case configure the iterator so that it points to the
2402 ** first rowid on this page.
2404 if( pLast ){
2405 int iOff;
2406 fts5DataRelease(pIter->pLeaf);
2407 pIter->pLeaf = pLast;
2408 pIter->iLeafPgno = pgnoLast;
2409 iOff = fts5LeafFirstRowidOff(pLast);
2410 if( iOff>pLast->szLeaf ){
2411 p->rc = FTS5_CORRUPT;
2412 return;
2414 iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
2415 pIter->iLeafOffset = iOff;
2417 if( fts5LeafIsTermless(pLast) ){
2418 pIter->iEndofDoclist = pLast->nn+1;
2419 }else{
2420 pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast);
2424 fts5SegIterReverseInitPage(p, pIter);
2428 ** Iterator pIter currently points to the first rowid of a doclist.
2429 ** There is a doclist-index associated with the final term on the current
2430 ** page. If the current term is the last term on the page, load the
2431 ** doclist-index from disk and initialize an iterator at (pIter->pDlidx).
2433 static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){
2434 int iSeg = pIter->pSeg->iSegid;
2435 int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
2436 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
2438 assert( pIter->flags & FTS5_SEGITER_ONETERM );
2439 assert( pIter->pDlidx==0 );
2441 /* Check if the current doclist ends on this page. If it does, return
2442 ** early without loading the doclist-index (as it belongs to a different
2443 ** term. */
2444 if( pIter->iTermLeafPgno==pIter->iLeafPgno
2445 && pIter->iEndofDoclist<pLeaf->szLeaf
2447 return;
2450 pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
2454 ** The iterator object passed as the second argument currently contains
2455 ** no valid values except for the Fts5SegIter.pLeaf member variable. This
2456 ** function searches the leaf page for a term matching (pTerm/nTerm).
2458 ** If the specified term is found on the page, then the iterator is left
2459 ** pointing to it. If argument bGe is zero and the term is not found,
2460 ** the iterator is left pointing at EOF.
2462 ** If bGe is non-zero and the specified term is not found, then the
2463 ** iterator is left pointing to the smallest term in the segment that
2464 ** is larger than the specified term, even if this term is not on the
2465 ** current page.
2467 static void fts5LeafSeek(
2468 Fts5Index *p, /* Leave any error code here */
2469 int bGe, /* True for a >= search */
2470 Fts5SegIter *pIter, /* Iterator to seek */
2471 const u8 *pTerm, int nTerm /* Term to search for */
2473 u32 iOff;
2474 const u8 *a = pIter->pLeaf->p;
2475 u32 n = (u32)pIter->pLeaf->nn;
2477 u32 nMatch = 0;
2478 u32 nKeep = 0;
2479 u32 nNew = 0;
2480 u32 iTermOff;
2481 u32 iPgidx; /* Current offset in pgidx */
2482 int bEndOfPage = 0;
2484 assert( p->rc==SQLITE_OK );
2486 iPgidx = (u32)pIter->pLeaf->szLeaf;
2487 iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff);
2488 iOff = iTermOff;
2489 if( iOff>n ){
2490 p->rc = FTS5_CORRUPT;
2491 return;
2494 while( 1 ){
2496 /* Figure out how many new bytes are in this term */
2497 fts5FastGetVarint32(a, iOff, nNew);
2498 if( nKeep<nMatch ){
2499 goto search_failed;
2502 assert( nKeep>=nMatch );
2503 if( nKeep==nMatch ){
2504 u32 nCmp;
2505 u32 i;
2506 nCmp = (u32)MIN(nNew, nTerm-nMatch);
2507 for(i=0; i<nCmp; i++){
2508 if( a[iOff+i]!=pTerm[nMatch+i] ) break;
2510 nMatch += i;
2512 if( (u32)nTerm==nMatch ){
2513 if( i==nNew ){
2514 goto search_success;
2515 }else{
2516 goto search_failed;
2518 }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
2519 goto search_failed;
2523 if( iPgidx>=n ){
2524 bEndOfPage = 1;
2525 break;
2528 iPgidx += fts5GetVarint32(&a[iPgidx], nKeep);
2529 iTermOff += nKeep;
2530 iOff = iTermOff;
2532 if( iOff>=n ){
2533 p->rc = FTS5_CORRUPT;
2534 return;
2537 /* Read the nKeep field of the next term. */
2538 fts5FastGetVarint32(a, iOff, nKeep);
2541 search_failed:
2542 if( bGe==0 ){
2543 fts5DataRelease(pIter->pLeaf);
2544 pIter->pLeaf = 0;
2545 return;
2546 }else if( bEndOfPage ){
2547 do {
2548 fts5SegIterNextPage(p, pIter);
2549 if( pIter->pLeaf==0 ) return;
2550 a = pIter->pLeaf->p;
2551 if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
2552 iPgidx = (u32)pIter->pLeaf->szLeaf;
2553 iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff);
2554 if( iOff<4 || (i64)iOff>=pIter->pLeaf->szLeaf ){
2555 p->rc = FTS5_CORRUPT;
2556 return;
2557 }else{
2558 nKeep = 0;
2559 iTermOff = iOff;
2560 n = (u32)pIter->pLeaf->nn;
2561 iOff += fts5GetVarint32(&a[iOff], nNew);
2562 break;
2565 }while( 1 );
2568 search_success:
2569 if( (i64)iOff+nNew>n || nNew<1 ){
2570 p->rc = FTS5_CORRUPT;
2571 return;
2573 pIter->iLeafOffset = iOff + nNew;
2574 pIter->iTermLeafOffset = pIter->iLeafOffset;
2575 pIter->iTermLeafPgno = pIter->iLeafPgno;
2577 fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
2578 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
2580 if( iPgidx>=n ){
2581 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
2582 }else{
2583 int nExtra;
2584 iPgidx += fts5GetVarint32(&a[iPgidx], nExtra);
2585 pIter->iEndofDoclist = iTermOff + nExtra;
2587 pIter->iPgidxOff = iPgidx;
2589 fts5SegIterLoadRowid(p, pIter);
2590 fts5SegIterLoadNPos(p, pIter);
2593 static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){
2594 if( p->pIdxSelect==0 ){
2595 Fts5Config *pConfig = p->pConfig;
2596 fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf(
2597 "SELECT pgno FROM '%q'.'%q_idx' WHERE "
2598 "segid=? AND term<=? ORDER BY term DESC LIMIT 1",
2599 pConfig->zDb, pConfig->zName
2602 return p->pIdxSelect;
2606 ** Initialize the object pIter to point to term pTerm/nTerm within segment
2607 ** pSeg. If there is no such term in the index, the iterator is set to EOF.
2609 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
2610 ** an error has already occurred when this function is called, it is a no-op.
2612 static void fts5SegIterSeekInit(
2613 Fts5Index *p, /* FTS5 backend */
2614 const u8 *pTerm, int nTerm, /* Term to seek to */
2615 int flags, /* Mask of FTS5INDEX_XXX flags */
2616 Fts5StructureSegment *pSeg, /* Description of segment */
2617 Fts5SegIter *pIter /* Object to populate */
2619 int iPg = 1;
2620 int bGe = (flags & FTS5INDEX_QUERY_SCAN);
2621 int bDlidx = 0; /* True if there is a doclist-index */
2622 sqlite3_stmt *pIdxSelect = 0;
2624 assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 );
2625 assert( pTerm && nTerm );
2626 memset(pIter, 0, sizeof(*pIter));
2627 pIter->pSeg = pSeg;
2629 /* This block sets stack variable iPg to the leaf page number that may
2630 ** contain term (pTerm/nTerm), if it is present in the segment. */
2631 pIdxSelect = fts5IdxSelectStmt(p);
2632 if( p->rc ) return;
2633 sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid);
2634 sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC);
2635 if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){
2636 i64 val = sqlite3_column_int(pIdxSelect, 0);
2637 iPg = (int)(val>>1);
2638 bDlidx = (val & 0x0001);
2640 p->rc = sqlite3_reset(pIdxSelect);
2641 sqlite3_bind_null(pIdxSelect, 2);
2643 if( iPg<pSeg->pgnoFirst ){
2644 iPg = pSeg->pgnoFirst;
2645 bDlidx = 0;
2648 pIter->iLeafPgno = iPg - 1;
2649 fts5SegIterNextPage(p, pIter);
2651 if( pIter->pLeaf ){
2652 fts5LeafSeek(p, bGe, pIter, pTerm, nTerm);
2655 if( p->rc==SQLITE_OK && bGe==0 ){
2656 pIter->flags |= FTS5_SEGITER_ONETERM;
2657 if( pIter->pLeaf ){
2658 if( flags & FTS5INDEX_QUERY_DESC ){
2659 pIter->flags |= FTS5_SEGITER_REVERSE;
2661 if( bDlidx ){
2662 fts5SegIterLoadDlidx(p, pIter);
2664 if( flags & FTS5INDEX_QUERY_DESC ){
2665 fts5SegIterReverse(p, pIter);
2670 fts5SegIterSetNext(p, pIter);
2671 fts5SegIterAllocTombstone(p, pIter);
2673 /* Either:
2675 ** 1) an error has occurred, or
2676 ** 2) the iterator points to EOF, or
2677 ** 3) the iterator points to an entry with term (pTerm/nTerm), or
2678 ** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points
2679 ** to an entry with a term greater than or equal to (pTerm/nTerm).
2681 assert_nc( p->rc!=SQLITE_OK /* 1 */
2682 || pIter->pLeaf==0 /* 2 */
2683 || fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0 /* 3 */
2684 || (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0) /* 4 */
2689 ** Initialize the object pIter to point to term pTerm/nTerm within the
2690 ** in-memory hash table. If there is no such term in the hash-table, the
2691 ** iterator is set to EOF.
2693 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
2694 ** an error has already occurred when this function is called, it is a no-op.
2696 static void fts5SegIterHashInit(
2697 Fts5Index *p, /* FTS5 backend */
2698 const u8 *pTerm, int nTerm, /* Term to seek to */
2699 int flags, /* Mask of FTS5INDEX_XXX flags */
2700 Fts5SegIter *pIter /* Object to populate */
2702 int nList = 0;
2703 const u8 *z = 0;
2704 int n = 0;
2705 Fts5Data *pLeaf = 0;
2707 assert( p->pHash );
2708 assert( p->rc==SQLITE_OK );
2710 if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN) ){
2711 const u8 *pList = 0;
2713 p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm);
2714 sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &pList, &nList);
2715 n = (z ? (int)strlen((const char*)z) : 0);
2716 if( pList ){
2717 pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data));
2718 if( pLeaf ){
2719 pLeaf->p = (u8*)pList;
2722 }else{
2723 p->rc = sqlite3Fts5HashQuery(p->pHash, sizeof(Fts5Data),
2724 (const char*)pTerm, nTerm, (void**)&pLeaf, &nList
2726 if( pLeaf ){
2727 pLeaf->p = (u8*)&pLeaf[1];
2729 z = pTerm;
2730 n = nTerm;
2731 pIter->flags |= FTS5_SEGITER_ONETERM;
2734 if( pLeaf ){
2735 sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z);
2736 pLeaf->nn = pLeaf->szLeaf = nList;
2737 pIter->pLeaf = pLeaf;
2738 pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid);
2739 pIter->iEndofDoclist = pLeaf->nn;
2741 if( flags & FTS5INDEX_QUERY_DESC ){
2742 pIter->flags |= FTS5_SEGITER_REVERSE;
2743 fts5SegIterReverseInitPage(p, pIter);
2744 }else{
2745 fts5SegIterLoadNPos(p, pIter);
2749 fts5SegIterSetNext(p, pIter);
2753 ** Array ap[] contains n elements. Release each of these elements using
2754 ** fts5DataRelease(). Then free the array itself using sqlite3_free().
2756 static void fts5IndexFreeArray(Fts5Data **ap, int n){
2757 if( ap ){
2758 int ii;
2759 for(ii=0; ii<n; ii++){
2760 fts5DataRelease(ap[ii]);
2762 sqlite3_free(ap);
2767 ** Zero the iterator passed as the only argument.
2769 static void fts5SegIterClear(Fts5SegIter *pIter){
2770 fts5BufferFree(&pIter->term);
2771 fts5DataRelease(pIter->pLeaf);
2772 fts5DataRelease(pIter->pNextLeaf);
2773 fts5IndexFreeArray(pIter->apTombstone, pIter->nTombstone);
2774 fts5DlidxIterFree(pIter->pDlidx);
2775 sqlite3_free(pIter->aRowidOffset);
2776 memset(pIter, 0, sizeof(Fts5SegIter));
2779 #ifdef SQLITE_DEBUG
2782 ** This function is used as part of the big assert() procedure implemented by
2783 ** fts5AssertMultiIterSetup(). It ensures that the result currently stored
2784 ** in *pRes is the correct result of comparing the current positions of the
2785 ** two iterators.
2787 static void fts5AssertComparisonResult(
2788 Fts5Iter *pIter,
2789 Fts5SegIter *p1,
2790 Fts5SegIter *p2,
2791 Fts5CResult *pRes
2793 int i1 = p1 - pIter->aSeg;
2794 int i2 = p2 - pIter->aSeg;
2796 if( p1->pLeaf || p2->pLeaf ){
2797 if( p1->pLeaf==0 ){
2798 assert( pRes->iFirst==i2 );
2799 }else if( p2->pLeaf==0 ){
2800 assert( pRes->iFirst==i1 );
2801 }else{
2802 int nMin = MIN(p1->term.n, p2->term.n);
2803 int res = fts5Memcmp(p1->term.p, p2->term.p, nMin);
2804 if( res==0 ) res = p1->term.n - p2->term.n;
2806 if( res==0 ){
2807 assert( pRes->bTermEq==1 );
2808 assert( p1->iRowid!=p2->iRowid );
2809 res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1;
2810 }else{
2811 assert( pRes->bTermEq==0 );
2814 if( res<0 ){
2815 assert( pRes->iFirst==i1 );
2816 }else{
2817 assert( pRes->iFirst==i2 );
2824 ** This function is a no-op unless SQLITE_DEBUG is defined when this module
2825 ** is compiled. In that case, this function is essentially an assert()
2826 ** statement used to verify that the contents of the pIter->aFirst[] array
2827 ** are correct.
2829 static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){
2830 if( p->rc==SQLITE_OK ){
2831 Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
2832 int i;
2834 assert( (pFirst->pLeaf==0)==pIter->base.bEof );
2836 /* Check that pIter->iSwitchRowid is set correctly. */
2837 for(i=0; i<pIter->nSeg; i++){
2838 Fts5SegIter *p1 = &pIter->aSeg[i];
2839 assert( p1==pFirst
2840 || p1->pLeaf==0
2841 || fts5BufferCompare(&pFirst->term, &p1->term)
2842 || p1->iRowid==pIter->iSwitchRowid
2843 || (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev
2847 for(i=0; i<pIter->nSeg; i+=2){
2848 Fts5SegIter *p1 = &pIter->aSeg[i];
2849 Fts5SegIter *p2 = &pIter->aSeg[i+1];
2850 Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2];
2851 fts5AssertComparisonResult(pIter, p1, p2, pRes);
2854 for(i=1; i<(pIter->nSeg / 2); i+=2){
2855 Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ];
2856 Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ];
2857 Fts5CResult *pRes = &pIter->aFirst[i];
2858 fts5AssertComparisonResult(pIter, p1, p2, pRes);
2862 #else
2863 # define fts5AssertMultiIterSetup(x,y)
2864 #endif
2867 ** Do the comparison necessary to populate pIter->aFirst[iOut].
2869 ** If the returned value is non-zero, then it is the index of an entry
2870 ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing
2871 ** to a key that is a duplicate of another, higher priority,
2872 ** segment-iterator in the pSeg->aSeg[] array.
2874 static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){
2875 int i1; /* Index of left-hand Fts5SegIter */
2876 int i2; /* Index of right-hand Fts5SegIter */
2877 int iRes;
2878 Fts5SegIter *p1; /* Left-hand Fts5SegIter */
2879 Fts5SegIter *p2; /* Right-hand Fts5SegIter */
2880 Fts5CResult *pRes = &pIter->aFirst[iOut];
2882 assert( iOut<pIter->nSeg && iOut>0 );
2883 assert( pIter->bRev==0 || pIter->bRev==1 );
2885 if( iOut>=(pIter->nSeg/2) ){
2886 i1 = (iOut - pIter->nSeg/2) * 2;
2887 i2 = i1 + 1;
2888 }else{
2889 i1 = pIter->aFirst[iOut*2].iFirst;
2890 i2 = pIter->aFirst[iOut*2+1].iFirst;
2892 p1 = &pIter->aSeg[i1];
2893 p2 = &pIter->aSeg[i2];
2895 pRes->bTermEq = 0;
2896 if( p1->pLeaf==0 ){ /* If p1 is at EOF */
2897 iRes = i2;
2898 }else if( p2->pLeaf==0 ){ /* If p2 is at EOF */
2899 iRes = i1;
2900 }else{
2901 int res = fts5BufferCompare(&p1->term, &p2->term);
2902 if( res==0 ){
2903 assert_nc( i2>i1 );
2904 assert_nc( i2!=0 );
2905 pRes->bTermEq = 1;
2906 if( p1->iRowid==p2->iRowid ){
2907 p1->bDel = p2->bDel;
2908 return i2;
2910 res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1;
2912 assert( res!=0 );
2913 if( res<0 ){
2914 iRes = i1;
2915 }else{
2916 iRes = i2;
2920 pRes->iFirst = (u16)iRes;
2921 return 0;
2925 ** Move the seg-iter so that it points to the first rowid on page iLeafPgno.
2926 ** It is an error if leaf iLeafPgno does not exist. Unless the db is
2927 ** a 'secure-delete' db, if it contains no rowids then this is also an error.
2929 static void fts5SegIterGotoPage(
2930 Fts5Index *p, /* FTS5 backend object */
2931 Fts5SegIter *pIter, /* Iterator to advance */
2932 int iLeafPgno
2934 assert( iLeafPgno>pIter->iLeafPgno );
2936 if( iLeafPgno>pIter->pSeg->pgnoLast ){
2937 p->rc = FTS5_CORRUPT;
2938 }else{
2939 fts5DataRelease(pIter->pNextLeaf);
2940 pIter->pNextLeaf = 0;
2941 pIter->iLeafPgno = iLeafPgno-1;
2943 while( p->rc==SQLITE_OK ){
2944 int iOff;
2945 fts5SegIterNextPage(p, pIter);
2946 if( pIter->pLeaf==0 ) break;
2947 iOff = fts5LeafFirstRowidOff(pIter->pLeaf);
2948 if( iOff>0 ){
2949 u8 *a = pIter->pLeaf->p;
2950 int n = pIter->pLeaf->szLeaf;
2951 if( iOff<4 || iOff>=n ){
2952 p->rc = FTS5_CORRUPT;
2953 }else{
2954 iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
2955 pIter->iLeafOffset = iOff;
2956 fts5SegIterLoadNPos(p, pIter);
2958 break;
2965 ** Advance the iterator passed as the second argument until it is at or
2966 ** past rowid iFrom. Regardless of the value of iFrom, the iterator is
2967 ** always advanced at least once.
2969 static void fts5SegIterNextFrom(
2970 Fts5Index *p, /* FTS5 backend object */
2971 Fts5SegIter *pIter, /* Iterator to advance */
2972 i64 iMatch /* Advance iterator at least this far */
2974 int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
2975 Fts5DlidxIter *pDlidx = pIter->pDlidx;
2976 int iLeafPgno = pIter->iLeafPgno;
2977 int bMove = 1;
2979 assert( pIter->flags & FTS5_SEGITER_ONETERM );
2980 assert( pIter->pDlidx );
2981 assert( pIter->pLeaf );
2983 if( bRev==0 ){
2984 while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){
2985 iLeafPgno = fts5DlidxIterPgno(pDlidx);
2986 fts5DlidxIterNext(p, pDlidx);
2988 assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc );
2989 if( iLeafPgno>pIter->iLeafPgno ){
2990 fts5SegIterGotoPage(p, pIter, iLeafPgno);
2991 bMove = 0;
2993 }else{
2994 assert( pIter->pNextLeaf==0 );
2995 assert( iMatch<pIter->iRowid );
2996 while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){
2997 fts5DlidxIterPrev(p, pDlidx);
2999 iLeafPgno = fts5DlidxIterPgno(pDlidx);
3001 assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno );
3003 if( iLeafPgno<pIter->iLeafPgno ){
3004 pIter->iLeafPgno = iLeafPgno+1;
3005 fts5SegIterReverseNewPage(p, pIter);
3006 bMove = 0;
3011 if( bMove && p->rc==SQLITE_OK ) pIter->xNext(p, pIter, 0);
3012 if( pIter->pLeaf==0 ) break;
3013 if( bRev==0 && pIter->iRowid>=iMatch ) break;
3014 if( bRev!=0 && pIter->iRowid<=iMatch ) break;
3015 bMove = 1;
3016 }while( p->rc==SQLITE_OK );
3021 ** Free the iterator object passed as the second argument.
3023 static void fts5MultiIterFree(Fts5Iter *pIter){
3024 if( pIter ){
3025 int i;
3026 for(i=0; i<pIter->nSeg; i++){
3027 fts5SegIterClear(&pIter->aSeg[i]);
3029 fts5BufferFree(&pIter->poslist);
3030 sqlite3_free(pIter);
3034 static void fts5MultiIterAdvanced(
3035 Fts5Index *p, /* FTS5 backend to iterate within */
3036 Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
3037 int iChanged, /* Index of sub-iterator just advanced */
3038 int iMinset /* Minimum entry in aFirst[] to set */
3040 int i;
3041 for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){
3042 int iEq;
3043 if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){
3044 Fts5SegIter *pSeg = &pIter->aSeg[iEq];
3045 assert( p->rc==SQLITE_OK );
3046 pSeg->xNext(p, pSeg, 0);
3047 i = pIter->nSeg + iEq;
3053 ** Sub-iterator iChanged of iterator pIter has just been advanced. It still
3054 ** points to the same term though - just a different rowid. This function
3055 ** attempts to update the contents of the pIter->aFirst[] accordingly.
3056 ** If it does so successfully, 0 is returned. Otherwise 1.
3058 ** If non-zero is returned, the caller should call fts5MultiIterAdvanced()
3059 ** on the iterator instead. That function does the same as this one, except
3060 ** that it deals with more complicated cases as well.
3062 static int fts5MultiIterAdvanceRowid(
3063 Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
3064 int iChanged, /* Index of sub-iterator just advanced */
3065 Fts5SegIter **ppFirst
3067 Fts5SegIter *pNew = &pIter->aSeg[iChanged];
3069 if( pNew->iRowid==pIter->iSwitchRowid
3070 || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev
3072 int i;
3073 Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001];
3074 pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64 : LARGEST_INT64;
3075 for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){
3076 Fts5CResult *pRes = &pIter->aFirst[i];
3078 assert( pNew->pLeaf );
3079 assert( pRes->bTermEq==0 || pOther->pLeaf );
3081 if( pRes->bTermEq ){
3082 if( pNew->iRowid==pOther->iRowid ){
3083 return 1;
3084 }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){
3085 pIter->iSwitchRowid = pOther->iRowid;
3086 pNew = pOther;
3087 }else if( (pOther->iRowid>pIter->iSwitchRowid)==pIter->bRev ){
3088 pIter->iSwitchRowid = pOther->iRowid;
3091 pRes->iFirst = (u16)(pNew - pIter->aSeg);
3092 if( i==1 ) break;
3094 pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ];
3098 *ppFirst = pNew;
3099 return 0;
3103 ** Set the pIter->bEof variable based on the state of the sub-iterators.
3105 static void fts5MultiIterSetEof(Fts5Iter *pIter){
3106 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
3107 pIter->base.bEof = pSeg->pLeaf==0;
3108 pIter->iSwitchRowid = pSeg->iRowid;
3112 ** The argument to this macro must be an Fts5Data structure containing a
3113 ** tombstone hash page. This macro returns the key-size of the hash-page.
3115 #define TOMBSTONE_KEYSIZE(pPg) (pPg->p[0]==4 ? 4 : 8)
3117 #define TOMBSTONE_NSLOT(pPg) \
3118 ((pPg->nn > 16) ? ((pPg->nn-8) / TOMBSTONE_KEYSIZE(pPg)) : 1)
3121 ** Query a single tombstone hash table for rowid iRowid. Return true if
3122 ** it is found or false otherwise. The tombstone hash table is one of
3123 ** nHashTable tables.
3125 static int fts5IndexTombstoneQuery(
3126 Fts5Data *pHash, /* Hash table page to query */
3127 int nHashTable, /* Number of pages attached to segment */
3128 u64 iRowid /* Rowid to query hash for */
3130 const int szKey = TOMBSTONE_KEYSIZE(pHash);
3131 const int nSlot = TOMBSTONE_NSLOT(pHash);
3132 int iSlot = (iRowid / nHashTable) % nSlot;
3133 int nCollide = nSlot;
3135 if( iRowid==0 ){
3136 return pHash->p[1];
3137 }else if( szKey==4 ){
3138 u32 *aSlot = (u32*)&pHash->p[8];
3139 while( aSlot[iSlot] ){
3140 if( fts5GetU32((u8*)&aSlot[iSlot])==iRowid ) return 1;
3141 if( nCollide--==0 ) break;
3142 iSlot = (iSlot+1)%nSlot;
3144 }else{
3145 u64 *aSlot = (u64*)&pHash->p[8];
3146 while( aSlot[iSlot] ){
3147 if( fts5GetU64((u8*)&aSlot[iSlot])==iRowid ) return 1;
3148 if( nCollide--==0 ) break;
3149 iSlot = (iSlot+1)%nSlot;
3153 return 0;
3157 ** Return true if the iterator passed as the only argument points
3158 ** to an segment entry for which there is a tombstone. Return false
3159 ** if there is no tombstone or if the iterator is already at EOF.
3161 static int fts5MultiIterIsDeleted(Fts5Iter *pIter){
3162 int iFirst = pIter->aFirst[1].iFirst;
3163 Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
3165 if( pSeg->pLeaf && pSeg->nTombstone ){
3166 /* Figure out which page the rowid might be present on. */
3167 int iPg = ((u64)pSeg->iRowid) % pSeg->nTombstone;
3168 assert( iPg>=0 );
3170 /* If tombstone hash page iPg has not yet been loaded from the
3171 ** database, load it now. */
3172 if( pSeg->apTombstone[iPg]==0 ){
3173 pSeg->apTombstone[iPg] = fts5DataRead(pIter->pIndex,
3174 FTS5_TOMBSTONE_ROWID(pSeg->pSeg->iSegid, iPg)
3176 if( pSeg->apTombstone[iPg]==0 ) return 0;
3179 return fts5IndexTombstoneQuery(
3180 pSeg->apTombstone[iPg],
3181 pSeg->nTombstone,
3182 pSeg->iRowid
3186 return 0;
3190 ** Move the iterator to the next entry.
3192 ** If an error occurs, an error code is left in Fts5Index.rc. It is not
3193 ** considered an error if the iterator reaches EOF, or if it is already at
3194 ** EOF when this function is called.
3196 static void fts5MultiIterNext(
3197 Fts5Index *p,
3198 Fts5Iter *pIter,
3199 int bFrom, /* True if argument iFrom is valid */
3200 i64 iFrom /* Advance at least as far as this */
3202 int bUseFrom = bFrom;
3203 assert( pIter->base.bEof==0 );
3204 while( p->rc==SQLITE_OK ){
3205 int iFirst = pIter->aFirst[1].iFirst;
3206 int bNewTerm = 0;
3207 Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
3208 assert( p->rc==SQLITE_OK );
3209 if( bUseFrom && pSeg->pDlidx ){
3210 fts5SegIterNextFrom(p, pSeg, iFrom);
3211 }else{
3212 pSeg->xNext(p, pSeg, &bNewTerm);
3215 if( pSeg->pLeaf==0 || bNewTerm
3216 || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
3218 fts5MultiIterAdvanced(p, pIter, iFirst, 1);
3219 fts5MultiIterSetEof(pIter);
3220 pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
3221 if( pSeg->pLeaf==0 ) return;
3224 fts5AssertMultiIterSetup(p, pIter);
3225 assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf );
3226 if( (pIter->bSkipEmpty==0 || pSeg->nPos)
3227 && 0==fts5MultiIterIsDeleted(pIter)
3229 pIter->xSetOutputs(pIter, pSeg);
3230 return;
3232 bUseFrom = 0;
3236 static void fts5MultiIterNext2(
3237 Fts5Index *p,
3238 Fts5Iter *pIter,
3239 int *pbNewTerm /* OUT: True if *might* be new term */
3241 assert( pIter->bSkipEmpty );
3242 if( p->rc==SQLITE_OK ){
3243 *pbNewTerm = 0;
3245 int iFirst = pIter->aFirst[1].iFirst;
3246 Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
3247 int bNewTerm = 0;
3249 assert( p->rc==SQLITE_OK );
3250 pSeg->xNext(p, pSeg, &bNewTerm);
3251 if( pSeg->pLeaf==0 || bNewTerm
3252 || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
3254 fts5MultiIterAdvanced(p, pIter, iFirst, 1);
3255 fts5MultiIterSetEof(pIter);
3256 *pbNewTerm = 1;
3258 fts5AssertMultiIterSetup(p, pIter);
3260 }while( (fts5MultiIterIsEmpty(p, pIter) || fts5MultiIterIsDeleted(pIter))
3261 && (p->rc==SQLITE_OK)
3266 static void fts5IterSetOutputs_Noop(Fts5Iter *pUnused1, Fts5SegIter *pUnused2){
3267 UNUSED_PARAM2(pUnused1, pUnused2);
3270 static Fts5Iter *fts5MultiIterAlloc(
3271 Fts5Index *p, /* FTS5 backend to iterate within */
3272 int nSeg
3274 Fts5Iter *pNew;
3275 int nSlot; /* Power of two >= nSeg */
3277 for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2);
3278 pNew = fts5IdxMalloc(p,
3279 sizeof(Fts5Iter) + /* pNew */
3280 sizeof(Fts5SegIter) * (nSlot-1) + /* pNew->aSeg[] */
3281 sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */
3283 if( pNew ){
3284 pNew->nSeg = nSlot;
3285 pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot];
3286 pNew->pIndex = p;
3287 pNew->xSetOutputs = fts5IterSetOutputs_Noop;
3289 return pNew;
3292 static void fts5PoslistCallback(
3293 Fts5Index *pUnused,
3294 void *pContext,
3295 const u8 *pChunk, int nChunk
3297 UNUSED_PARAM(pUnused);
3298 assert_nc( nChunk>=0 );
3299 if( nChunk>0 ){
3300 fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk);
3304 typedef struct PoslistCallbackCtx PoslistCallbackCtx;
3305 struct PoslistCallbackCtx {
3306 Fts5Buffer *pBuf; /* Append to this buffer */
3307 Fts5Colset *pColset; /* Restrict matches to this column */
3308 int eState; /* See above */
3311 typedef struct PoslistOffsetsCtx PoslistOffsetsCtx;
3312 struct PoslistOffsetsCtx {
3313 Fts5Buffer *pBuf; /* Append to this buffer */
3314 Fts5Colset *pColset; /* Restrict matches to this column */
3315 int iRead;
3316 int iWrite;
3320 ** TODO: Make this more efficient!
3322 static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){
3323 int i;
3324 for(i=0; i<pColset->nCol; i++){
3325 if( pColset->aiCol[i]==iCol ) return 1;
3327 return 0;
3330 static void fts5PoslistOffsetsCallback(
3331 Fts5Index *pUnused,
3332 void *pContext,
3333 const u8 *pChunk, int nChunk
3335 PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext;
3336 UNUSED_PARAM(pUnused);
3337 assert_nc( nChunk>=0 );
3338 if( nChunk>0 ){
3339 int i = 0;
3340 while( i<nChunk ){
3341 int iVal;
3342 i += fts5GetVarint32(&pChunk[i], iVal);
3343 iVal += pCtx->iRead - 2;
3344 pCtx->iRead = iVal;
3345 if( fts5IndexColsetTest(pCtx->pColset, iVal) ){
3346 fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite);
3347 pCtx->iWrite = iVal;
3353 static void fts5PoslistFilterCallback(
3354 Fts5Index *pUnused,
3355 void *pContext,
3356 const u8 *pChunk, int nChunk
3358 PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext;
3359 UNUSED_PARAM(pUnused);
3360 assert_nc( nChunk>=0 );
3361 if( nChunk>0 ){
3362 /* Search through to find the first varint with value 1. This is the
3363 ** start of the next columns hits. */
3364 int i = 0;
3365 int iStart = 0;
3367 if( pCtx->eState==2 ){
3368 int iCol;
3369 fts5FastGetVarint32(pChunk, i, iCol);
3370 if( fts5IndexColsetTest(pCtx->pColset, iCol) ){
3371 pCtx->eState = 1;
3372 fts5BufferSafeAppendVarint(pCtx->pBuf, 1);
3373 }else{
3374 pCtx->eState = 0;
3378 do {
3379 while( i<nChunk && pChunk[i]!=0x01 ){
3380 while( pChunk[i] & 0x80 ) i++;
3381 i++;
3383 if( pCtx->eState ){
3384 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
3386 if( i<nChunk ){
3387 int iCol;
3388 iStart = i;
3389 i++;
3390 if( i>=nChunk ){
3391 pCtx->eState = 2;
3392 }else{
3393 fts5FastGetVarint32(pChunk, i, iCol);
3394 pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
3395 if( pCtx->eState ){
3396 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
3397 iStart = i;
3401 }while( i<nChunk );
3405 static void fts5ChunkIterate(
3406 Fts5Index *p, /* Index object */
3407 Fts5SegIter *pSeg, /* Poslist of this iterator */
3408 void *pCtx, /* Context pointer for xChunk callback */
3409 void (*xChunk)(Fts5Index*, void*, const u8*, int)
3411 int nRem = pSeg->nPos; /* Number of bytes still to come */
3412 Fts5Data *pData = 0;
3413 u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3414 int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
3415 int pgno = pSeg->iLeafPgno;
3416 int pgnoSave = 0;
3418 /* This function does not work with detail=none databases. */
3419 assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
3421 if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
3422 pgnoSave = pgno+1;
3425 while( 1 ){
3426 xChunk(p, pCtx, pChunk, nChunk);
3427 nRem -= nChunk;
3428 fts5DataRelease(pData);
3429 if( nRem<=0 ){
3430 break;
3431 }else if( pSeg->pSeg==0 ){
3432 p->rc = FTS5_CORRUPT;
3433 return;
3434 }else{
3435 pgno++;
3436 pData = fts5LeafRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno));
3437 if( pData==0 ) break;
3438 pChunk = &pData->p[4];
3439 nChunk = MIN(nRem, pData->szLeaf - 4);
3440 if( pgno==pgnoSave ){
3441 assert( pSeg->pNextLeaf==0 );
3442 pSeg->pNextLeaf = pData;
3443 pData = 0;
3450 ** Iterator pIter currently points to a valid entry (not EOF). This
3451 ** function appends the position list data for the current entry to
3452 ** buffer pBuf. It does not make a copy of the position-list size
3453 ** field.
3455 static void fts5SegiterPoslist(
3456 Fts5Index *p,
3457 Fts5SegIter *pSeg,
3458 Fts5Colset *pColset,
3459 Fts5Buffer *pBuf
3461 assert( pBuf!=0 );
3462 assert( pSeg!=0 );
3463 if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos+FTS5_DATA_ZERO_PADDING) ){
3464 assert( pBuf->p!=0 );
3465 assert( pBuf->nSpace >= pBuf->n+pSeg->nPos+FTS5_DATA_ZERO_PADDING );
3466 memset(&pBuf->p[pBuf->n+pSeg->nPos], 0, FTS5_DATA_ZERO_PADDING);
3467 if( pColset==0 ){
3468 fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
3469 }else{
3470 if( p->pConfig->eDetail==FTS5_DETAIL_FULL ){
3471 PoslistCallbackCtx sCtx;
3472 sCtx.pBuf = pBuf;
3473 sCtx.pColset = pColset;
3474 sCtx.eState = fts5IndexColsetTest(pColset, 0);
3475 assert( sCtx.eState==0 || sCtx.eState==1 );
3476 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
3477 }else{
3478 PoslistOffsetsCtx sCtx;
3479 memset(&sCtx, 0, sizeof(sCtx));
3480 sCtx.pBuf = pBuf;
3481 sCtx.pColset = pColset;
3482 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback);
3489 ** Parameter pPos points to a buffer containing a position list, size nPos.
3490 ** This function filters it according to pColset (which must be non-NULL)
3491 ** and sets pIter->base.pData/nData to point to the new position list.
3492 ** If memory is required for the new position list, use buffer pIter->poslist.
3493 ** Or, if the new position list is a contiguous subset of the input, set
3494 ** pIter->base.pData/nData to point directly to it.
3496 ** This function is a no-op if *pRc is other than SQLITE_OK when it is
3497 ** called. If an OOM error is encountered, *pRc is set to SQLITE_NOMEM
3498 ** before returning.
3500 static void fts5IndexExtractColset(
3501 int *pRc,
3502 Fts5Colset *pColset, /* Colset to filter on */
3503 const u8 *pPos, int nPos, /* Position list */
3504 Fts5Iter *pIter
3506 if( *pRc==SQLITE_OK ){
3507 const u8 *p = pPos;
3508 const u8 *aCopy = p;
3509 const u8 *pEnd = &p[nPos]; /* One byte past end of position list */
3510 int i = 0;
3511 int iCurrent = 0;
3513 if( pColset->nCol>1 && sqlite3Fts5BufferSize(pRc, &pIter->poslist, nPos) ){
3514 return;
3517 while( 1 ){
3518 while( pColset->aiCol[i]<iCurrent ){
3519 i++;
3520 if( i==pColset->nCol ){
3521 pIter->base.pData = pIter->poslist.p;
3522 pIter->base.nData = pIter->poslist.n;
3523 return;
3527 /* Advance pointer p until it points to pEnd or an 0x01 byte that is
3528 ** not part of a varint */
3529 while( p<pEnd && *p!=0x01 ){
3530 while( *p++ & 0x80 );
3533 if( pColset->aiCol[i]==iCurrent ){
3534 if( pColset->nCol==1 ){
3535 pIter->base.pData = aCopy;
3536 pIter->base.nData = p-aCopy;
3537 return;
3539 fts5BufferSafeAppendBlob(&pIter->poslist, aCopy, p-aCopy);
3541 if( p>=pEnd ){
3542 pIter->base.pData = pIter->poslist.p;
3543 pIter->base.nData = pIter->poslist.n;
3544 return;
3546 aCopy = p++;
3547 iCurrent = *p++;
3548 if( iCurrent & 0x80 ){
3549 p--;
3550 p += fts5GetVarint32(p, iCurrent);
3558 ** xSetOutputs callback used by detail=none tables.
3560 static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){
3561 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE );
3562 pIter->base.iRowid = pSeg->iRowid;
3563 pIter->base.nData = pSeg->nPos;
3567 ** xSetOutputs callback used by detail=full and detail=col tables when no
3568 ** column filters are specified.
3570 static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){
3571 pIter->base.iRowid = pSeg->iRowid;
3572 pIter->base.nData = pSeg->nPos;
3574 assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE );
3575 assert( pIter->pColset==0 );
3577 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
3578 /* All data is stored on the current page. Populate the output
3579 ** variables to point into the body of the page object. */
3580 pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3581 }else{
3582 /* The data is distributed over two or more pages. Copy it into the
3583 ** Fts5Iter.poslist buffer and then set the output pointer to point
3584 ** to this buffer. */
3585 fts5BufferZero(&pIter->poslist);
3586 fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist);
3587 pIter->base.pData = pIter->poslist.p;
3592 ** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match
3593 ** against no columns at all).
3595 static void fts5IterSetOutputs_ZeroColset(Fts5Iter *pIter, Fts5SegIter *pSeg){
3596 UNUSED_PARAM(pSeg);
3597 pIter->base.nData = 0;
3601 ** xSetOutputs callback used by detail=col when there is a column filter
3602 ** and there are 100 or more columns. Also called as a fallback from
3603 ** fts5IterSetOutputs_Col100 if the column-list spans more than one page.
3605 static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){
3606 fts5BufferZero(&pIter->poslist);
3607 fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist);
3608 pIter->base.iRowid = pSeg->iRowid;
3609 pIter->base.pData = pIter->poslist.p;
3610 pIter->base.nData = pIter->poslist.n;
3614 ** xSetOutputs callback used when:
3616 ** * detail=col,
3617 ** * there is a column filter, and
3618 ** * the table contains 100 or fewer columns.
3620 ** The last point is to ensure all column numbers are stored as
3621 ** single-byte varints.
3623 static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){
3625 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS );
3626 assert( pIter->pColset );
3628 if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){
3629 fts5IterSetOutputs_Col(pIter, pSeg);
3630 }else{
3631 u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset];
3632 u8 *pEnd = (u8*)&a[pSeg->nPos];
3633 int iPrev = 0;
3634 int *aiCol = pIter->pColset->aiCol;
3635 int *aiColEnd = &aiCol[pIter->pColset->nCol];
3637 u8 *aOut = pIter->poslist.p;
3638 int iPrevOut = 0;
3640 pIter->base.iRowid = pSeg->iRowid;
3642 while( a<pEnd ){
3643 iPrev += (int)a++[0] - 2;
3644 while( *aiCol<iPrev ){
3645 aiCol++;
3646 if( aiCol==aiColEnd ) goto setoutputs_col_out;
3648 if( *aiCol==iPrev ){
3649 *aOut++ = (u8)((iPrev - iPrevOut) + 2);
3650 iPrevOut = iPrev;
3654 setoutputs_col_out:
3655 pIter->base.pData = pIter->poslist.p;
3656 pIter->base.nData = aOut - pIter->poslist.p;
3661 ** xSetOutputs callback used by detail=full when there is a column filter.
3663 static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){
3664 Fts5Colset *pColset = pIter->pColset;
3665 pIter->base.iRowid = pSeg->iRowid;
3667 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL );
3668 assert( pColset );
3670 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
3671 /* All data is stored on the current page. Populate the output
3672 ** variables to point into the body of the page object. */
3673 const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3674 int *pRc = &pIter->pIndex->rc;
3675 fts5BufferZero(&pIter->poslist);
3676 fts5IndexExtractColset(pRc, pColset, a, pSeg->nPos, pIter);
3677 }else{
3678 /* The data is distributed over two or more pages. Copy it into the
3679 ** Fts5Iter.poslist buffer and then set the output pointer to point
3680 ** to this buffer. */
3681 fts5BufferZero(&pIter->poslist);
3682 fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
3683 pIter->base.pData = pIter->poslist.p;
3684 pIter->base.nData = pIter->poslist.n;
3688 static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){
3689 assert( pIter!=0 || (*pRc)!=SQLITE_OK );
3690 if( *pRc==SQLITE_OK ){
3691 Fts5Config *pConfig = pIter->pIndex->pConfig;
3692 if( pConfig->eDetail==FTS5_DETAIL_NONE ){
3693 pIter->xSetOutputs = fts5IterSetOutputs_None;
3696 else if( pIter->pColset==0 ){
3697 pIter->xSetOutputs = fts5IterSetOutputs_Nocolset;
3700 else if( pIter->pColset->nCol==0 ){
3701 pIter->xSetOutputs = fts5IterSetOutputs_ZeroColset;
3704 else if( pConfig->eDetail==FTS5_DETAIL_FULL ){
3705 pIter->xSetOutputs = fts5IterSetOutputs_Full;
3708 else{
3709 assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS );
3710 if( pConfig->nCol<=100 ){
3711 pIter->xSetOutputs = fts5IterSetOutputs_Col100;
3712 sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol);
3713 }else{
3714 pIter->xSetOutputs = fts5IterSetOutputs_Col;
3722 ** Allocate a new Fts5Iter object.
3724 ** The new object will be used to iterate through data in structure pStruct.
3725 ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel
3726 ** is zero or greater, data from the first nSegment segments on level iLevel
3727 ** is merged.
3729 ** The iterator initially points to the first term/rowid entry in the
3730 ** iterated data.
3732 static void fts5MultiIterNew(
3733 Fts5Index *p, /* FTS5 backend to iterate within */
3734 Fts5Structure *pStruct, /* Structure of specific index */
3735 int flags, /* FTS5INDEX_QUERY_XXX flags */
3736 Fts5Colset *pColset, /* Colset to filter on (or NULL) */
3737 const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */
3738 int iLevel, /* Level to iterate (-1 for all) */
3739 int nSegment, /* Number of segments to merge (iLevel>=0) */
3740 Fts5Iter **ppOut /* New object */
3742 int nSeg = 0; /* Number of segment-iters in use */
3743 int iIter = 0; /* */
3744 int iSeg; /* Used to iterate through segments */
3745 Fts5StructureLevel *pLvl;
3746 Fts5Iter *pNew;
3748 assert( (pTerm==0 && nTerm==0) || iLevel<0 );
3750 /* Allocate space for the new multi-seg-iterator. */
3751 if( p->rc==SQLITE_OK ){
3752 if( iLevel<0 ){
3753 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
3754 nSeg = pStruct->nSegment;
3755 nSeg += (p->pHash && 0==(flags & FTS5INDEX_QUERY_SKIPHASH));
3756 }else{
3757 nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment);
3760 *ppOut = pNew = fts5MultiIterAlloc(p, nSeg);
3761 if( pNew==0 ){
3762 assert( p->rc!=SQLITE_OK );
3763 goto fts5MultiIterNew_post_check;
3765 pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC));
3766 pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY));
3767 pNew->pColset = pColset;
3768 if( (flags & FTS5INDEX_QUERY_NOOUTPUT)==0 ){
3769 fts5IterSetOutputCb(&p->rc, pNew);
3772 /* Initialize each of the component segment iterators. */
3773 if( p->rc==SQLITE_OK ){
3774 if( iLevel<0 ){
3775 Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel];
3776 if( p->pHash && 0==(flags & FTS5INDEX_QUERY_SKIPHASH) ){
3777 /* Add a segment iterator for the current contents of the hash table. */
3778 Fts5SegIter *pIter = &pNew->aSeg[iIter++];
3779 fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter);
3781 for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){
3782 for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){
3783 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
3784 Fts5SegIter *pIter = &pNew->aSeg[iIter++];
3785 if( pTerm==0 ){
3786 fts5SegIterInit(p, pSeg, pIter);
3787 }else{
3788 fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter);
3792 }else{
3793 pLvl = &pStruct->aLevel[iLevel];
3794 for(iSeg=nSeg-1; iSeg>=0; iSeg--){
3795 fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]);
3798 assert( iIter==nSeg );
3801 /* If the above was successful, each component iterators now points
3802 ** to the first entry in its segment. In this case initialize the
3803 ** aFirst[] array. Or, if an error has occurred, free the iterator
3804 ** object and set the output variable to NULL. */
3805 if( p->rc==SQLITE_OK ){
3806 for(iIter=pNew->nSeg-1; iIter>0; iIter--){
3807 int iEq;
3808 if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){
3809 Fts5SegIter *pSeg = &pNew->aSeg[iEq];
3810 if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0);
3811 fts5MultiIterAdvanced(p, pNew, iEq, iIter);
3814 fts5MultiIterSetEof(pNew);
3815 fts5AssertMultiIterSetup(p, pNew);
3817 if( (pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew))
3818 || fts5MultiIterIsDeleted(pNew)
3820 fts5MultiIterNext(p, pNew, 0, 0);
3821 }else if( pNew->base.bEof==0 ){
3822 Fts5SegIter *pSeg = &pNew->aSeg[pNew->aFirst[1].iFirst];
3823 pNew->xSetOutputs(pNew, pSeg);
3826 }else{
3827 fts5MultiIterFree(pNew);
3828 *ppOut = 0;
3831 fts5MultiIterNew_post_check:
3832 assert( (*ppOut)!=0 || p->rc!=SQLITE_OK );
3833 return;
3837 ** Create an Fts5Iter that iterates through the doclist provided
3838 ** as the second argument.
3840 static void fts5MultiIterNew2(
3841 Fts5Index *p, /* FTS5 backend to iterate within */
3842 Fts5Data *pData, /* Doclist to iterate through */
3843 int bDesc, /* True for descending rowid order */
3844 Fts5Iter **ppOut /* New object */
3846 Fts5Iter *pNew;
3847 pNew = fts5MultiIterAlloc(p, 2);
3848 if( pNew ){
3849 Fts5SegIter *pIter = &pNew->aSeg[1];
3851 pIter->flags = FTS5_SEGITER_ONETERM;
3852 if( pData->szLeaf>0 ){
3853 pIter->pLeaf = pData;
3854 pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
3855 pIter->iEndofDoclist = pData->nn;
3856 pNew->aFirst[1].iFirst = 1;
3857 if( bDesc ){
3858 pNew->bRev = 1;
3859 pIter->flags |= FTS5_SEGITER_REVERSE;
3860 fts5SegIterReverseInitPage(p, pIter);
3861 }else{
3862 fts5SegIterLoadNPos(p, pIter);
3864 pData = 0;
3865 }else{
3866 pNew->base.bEof = 1;
3868 fts5SegIterSetNext(p, pIter);
3870 *ppOut = pNew;
3873 fts5DataRelease(pData);
3877 ** Return true if the iterator is at EOF or if an error has occurred.
3878 ** False otherwise.
3880 static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){
3881 assert( pIter!=0 || p->rc!=SQLITE_OK );
3882 assert( p->rc!=SQLITE_OK
3883 || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof
3885 return (p->rc || pIter->base.bEof);
3889 ** Return the rowid of the entry that the iterator currently points
3890 ** to. If the iterator points to EOF when this function is called the
3891 ** results are undefined.
3893 static i64 fts5MultiIterRowid(Fts5Iter *pIter){
3894 assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf );
3895 return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid;
3899 ** Move the iterator to the next entry at or following iMatch.
3901 static void fts5MultiIterNextFrom(
3902 Fts5Index *p,
3903 Fts5Iter *pIter,
3904 i64 iMatch
3906 while( 1 ){
3907 i64 iRowid;
3908 fts5MultiIterNext(p, pIter, 1, iMatch);
3909 if( fts5MultiIterEof(p, pIter) ) break;
3910 iRowid = fts5MultiIterRowid(pIter);
3911 if( pIter->bRev==0 && iRowid>=iMatch ) break;
3912 if( pIter->bRev!=0 && iRowid<=iMatch ) break;
3917 ** Return a pointer to a buffer containing the term associated with the
3918 ** entry that the iterator currently points to.
3920 static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){
3921 Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
3922 *pn = p->term.n;
3923 return p->term.p;
3927 ** Allocate a new segment-id for the structure pStruct. The new segment
3928 ** id must be between 1 and 65335 inclusive, and must not be used by
3929 ** any currently existing segment. If a free segment id cannot be found,
3930 ** SQLITE_FULL is returned.
3932 ** If an error has already occurred, this function is a no-op. 0 is
3933 ** returned in this case.
3935 static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){
3936 int iSegid = 0;
3938 if( p->rc==SQLITE_OK ){
3939 if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){
3940 p->rc = SQLITE_FULL;
3941 }else{
3942 /* FTS5_MAX_SEGMENT is currently defined as 2000. So the following
3943 ** array is 63 elements, or 252 bytes, in size. */
3944 u32 aUsed[(FTS5_MAX_SEGMENT+31) / 32];
3945 int iLvl, iSeg;
3946 int i;
3947 u32 mask;
3948 memset(aUsed, 0, sizeof(aUsed));
3949 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
3950 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
3951 int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid;
3952 if( iId<=FTS5_MAX_SEGMENT && iId>0 ){
3953 aUsed[(iId-1) / 32] |= (u32)1 << ((iId-1) % 32);
3958 for(i=0; aUsed[i]==0xFFFFFFFF; i++);
3959 mask = aUsed[i];
3960 for(iSegid=0; mask & ((u32)1 << iSegid); iSegid++);
3961 iSegid += 1 + i*32;
3963 #ifdef SQLITE_DEBUG
3964 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
3965 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
3966 assert_nc( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid );
3969 assert_nc( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT );
3972 sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p);
3973 if( p->rc==SQLITE_OK ){
3974 u8 aBlob[2] = {0xff, 0xff};
3975 sqlite3_bind_int(pIdxSelect, 1, iSegid);
3976 sqlite3_bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC);
3977 assert_nc( sqlite3_step(pIdxSelect)!=SQLITE_ROW );
3978 p->rc = sqlite3_reset(pIdxSelect);
3979 sqlite3_bind_null(pIdxSelect, 2);
3982 #endif
3986 return iSegid;
3990 ** Discard all data currently cached in the hash-tables.
3992 static void fts5IndexDiscardData(Fts5Index *p){
3993 assert( p->pHash || p->nPendingData==0 );
3994 if( p->pHash ){
3995 sqlite3Fts5HashClear(p->pHash);
3996 p->nPendingData = 0;
3997 p->nPendingRow = 0;
3999 p->nContentlessDelete = 0;
4003 ** Return the size of the prefix, in bytes, that buffer
4004 ** (pNew/<length-unknown>) shares with buffer (pOld/nOld).
4006 ** Buffer (pNew/<length-unknown>) is guaranteed to be greater
4007 ** than buffer (pOld/nOld).
4009 static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){
4010 int i;
4011 for(i=0; i<nOld; i++){
4012 if( pOld[i]!=pNew[i] ) break;
4014 return i;
4017 static void fts5WriteDlidxClear(
4018 Fts5Index *p,
4019 Fts5SegWriter *pWriter,
4020 int bFlush /* If true, write dlidx to disk */
4022 int i;
4023 assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) );
4024 for(i=0; i<pWriter->nDlidx; i++){
4025 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
4026 if( pDlidx->buf.n==0 ) break;
4027 if( bFlush ){
4028 assert( pDlidx->pgno!=0 );
4029 fts5DataWrite(p,
4030 FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
4031 pDlidx->buf.p, pDlidx->buf.n
4034 sqlite3Fts5BufferZero(&pDlidx->buf);
4035 pDlidx->bPrevValid = 0;
4040 ** Grow the pWriter->aDlidx[] array to at least nLvl elements in size.
4041 ** Any new array elements are zeroed before returning.
4043 static int fts5WriteDlidxGrow(
4044 Fts5Index *p,
4045 Fts5SegWriter *pWriter,
4046 int nLvl
4048 if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){
4049 Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc64(
4050 pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl
4052 if( aDlidx==0 ){
4053 p->rc = SQLITE_NOMEM;
4054 }else{
4055 size_t nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx);
4056 memset(&aDlidx[pWriter->nDlidx], 0, nByte);
4057 pWriter->aDlidx = aDlidx;
4058 pWriter->nDlidx = nLvl;
4061 return p->rc;
4065 ** If the current doclist-index accumulating in pWriter->aDlidx[] is large
4066 ** enough, flush it to disk and return 1. Otherwise discard it and return
4067 ** zero.
4069 static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){
4070 int bFlag = 0;
4072 /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written
4073 ** to the database, also write the doclist-index to disk. */
4074 if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
4075 bFlag = 1;
4077 fts5WriteDlidxClear(p, pWriter, bFlag);
4078 pWriter->nEmpty = 0;
4079 return bFlag;
4083 ** This function is called whenever processing of the doclist for the
4084 ** last term on leaf page (pWriter->iBtPage) is completed.
4086 ** The doclist-index for that term is currently stored in-memory within the
4087 ** Fts5SegWriter.aDlidx[] array. If it is large enough, this function
4088 ** writes it out to disk. Or, if it is too small to bother with, discards
4089 ** it.
4091 ** Fts5SegWriter.btterm currently contains the first term on page iBtPage.
4093 static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){
4094 int bFlag;
4096 assert( pWriter->iBtPage || pWriter->nEmpty==0 );
4097 if( pWriter->iBtPage==0 ) return;
4098 bFlag = fts5WriteFlushDlidx(p, pWriter);
4100 if( p->rc==SQLITE_OK ){
4101 const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:"");
4102 /* The following was already done in fts5WriteInit(): */
4103 /* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */
4104 sqlite3_bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC);
4105 sqlite3_bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1));
4106 sqlite3_step(p->pIdxWriter);
4107 p->rc = sqlite3_reset(p->pIdxWriter);
4108 sqlite3_bind_null(p->pIdxWriter, 2);
4110 pWriter->iBtPage = 0;
4114 ** This is called once for each leaf page except the first that contains
4115 ** at least one term. Argument (nTerm/pTerm) is the split-key - a term that
4116 ** is larger than all terms written to earlier leaves, and equal to or
4117 ** smaller than the first term on the new leaf.
4119 ** If an error occurs, an error code is left in Fts5Index.rc. If an error
4120 ** has already occurred when this function is called, it is a no-op.
4122 static void fts5WriteBtreeTerm(
4123 Fts5Index *p, /* FTS5 backend object */
4124 Fts5SegWriter *pWriter, /* Writer object */
4125 int nTerm, const u8 *pTerm /* First term on new page */
4127 fts5WriteFlushBtree(p, pWriter);
4128 if( p->rc==SQLITE_OK ){
4129 fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm);
4130 pWriter->iBtPage = pWriter->writer.pgno;
4135 ** This function is called when flushing a leaf page that contains no
4136 ** terms at all to disk.
4138 static void fts5WriteBtreeNoTerm(
4139 Fts5Index *p, /* FTS5 backend object */
4140 Fts5SegWriter *pWriter /* Writer object */
4142 /* If there were no rowids on the leaf page either and the doclist-index
4143 ** has already been started, append an 0x00 byte to it. */
4144 if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){
4145 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0];
4146 assert( pDlidx->bPrevValid );
4147 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0);
4150 /* Increment the "number of sequential leaves without a term" counter. */
4151 pWriter->nEmpty++;
4154 static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){
4155 i64 iRowid;
4156 int iOff;
4158 iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid);
4159 fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid);
4160 return iRowid;
4164 ** Rowid iRowid has just been appended to the current leaf page. It is the
4165 ** first on the page. This function appends an appropriate entry to the current
4166 ** doclist-index.
4168 static void fts5WriteDlidxAppend(
4169 Fts5Index *p,
4170 Fts5SegWriter *pWriter,
4171 i64 iRowid
4173 int i;
4174 int bDone = 0;
4176 for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
4177 i64 iVal;
4178 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
4180 if( pDlidx->buf.n>=p->pConfig->pgsz ){
4181 /* The current doclist-index page is full. Write it to disk and push
4182 ** a copy of iRowid (which will become the first rowid on the next
4183 ** doclist-index leaf page) up into the next level of the b-tree
4184 ** hierarchy. If the node being flushed is currently the root node,
4185 ** also push its first rowid upwards. */
4186 pDlidx->buf.p[0] = 0x01; /* Not the root node */
4187 fts5DataWrite(p,
4188 FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
4189 pDlidx->buf.p, pDlidx->buf.n
4191 fts5WriteDlidxGrow(p, pWriter, i+2);
4192 pDlidx = &pWriter->aDlidx[i];
4193 if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){
4194 i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf);
4196 /* This was the root node. Push its first rowid up to the new root. */
4197 pDlidx[1].pgno = pDlidx->pgno;
4198 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0);
4199 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno);
4200 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst);
4201 pDlidx[1].bPrevValid = 1;
4202 pDlidx[1].iPrev = iFirst;
4205 sqlite3Fts5BufferZero(&pDlidx->buf);
4206 pDlidx->bPrevValid = 0;
4207 pDlidx->pgno++;
4208 }else{
4209 bDone = 1;
4212 if( pDlidx->bPrevValid ){
4213 iVal = iRowid - pDlidx->iPrev;
4214 }else{
4215 i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno);
4216 assert( pDlidx->buf.n==0 );
4217 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone);
4218 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno);
4219 iVal = iRowid;
4222 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal);
4223 pDlidx->bPrevValid = 1;
4224 pDlidx->iPrev = iRowid;
4228 static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
4229 static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
4230 Fts5PageWriter *pPage = &pWriter->writer;
4231 i64 iRowid;
4233 assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) );
4235 /* Set the szLeaf header field. */
4236 assert( 0==fts5GetU16(&pPage->buf.p[2]) );
4237 fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n);
4239 if( pWriter->bFirstTermInPage ){
4240 /* No term was written to this page. */
4241 assert( pPage->pgidx.n==0 );
4242 fts5WriteBtreeNoTerm(p, pWriter);
4243 }else{
4244 /* Append the pgidx to the page buffer. Set the szLeaf header field. */
4245 fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p);
4248 /* Write the page out to disk */
4249 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno);
4250 fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);
4252 /* Initialize the next page. */
4253 fts5BufferZero(&pPage->buf);
4254 fts5BufferZero(&pPage->pgidx);
4255 fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
4256 pPage->iPrevPgidx = 0;
4257 pPage->pgno++;
4259 /* Increase the leaves written counter */
4260 pWriter->nLeafWritten++;
4262 /* The new leaf holds no terms or rowids */
4263 pWriter->bFirstTermInPage = 1;
4264 pWriter->bFirstRowidInPage = 1;
4268 ** Append term pTerm/nTerm to the segment being written by the writer passed
4269 ** as the second argument.
4271 ** If an error occurs, set the Fts5Index.rc error code. If an error has
4272 ** already occurred, this function is a no-op.
4274 static void fts5WriteAppendTerm(
4275 Fts5Index *p,
4276 Fts5SegWriter *pWriter,
4277 int nTerm, const u8 *pTerm
4279 int nPrefix; /* Bytes of prefix compression for term */
4280 Fts5PageWriter *pPage = &pWriter->writer;
4281 Fts5Buffer *pPgidx = &pWriter->writer.pgidx;
4282 int nMin = MIN(pPage->term.n, nTerm);
4284 assert( p->rc==SQLITE_OK );
4285 assert( pPage->buf.n>=4 );
4286 assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );
4288 /* If the current leaf page is full, flush it to disk. */
4289 if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
4290 if( pPage->buf.n>4 ){
4291 fts5WriteFlushLeaf(p, pWriter);
4292 if( p->rc!=SQLITE_OK ) return;
4294 fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);
4297 /* TODO1: Updating pgidx here. */
4298 pPgidx->n += sqlite3Fts5PutVarint(
4299 &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx
4301 pPage->iPrevPgidx = pPage->buf.n;
4302 #if 0
4303 fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
4304 pPgidx->n += 2;
4305 #endif
4307 if( pWriter->bFirstTermInPage ){
4308 nPrefix = 0;
4309 if( pPage->pgno!=1 ){
4310 /* This is the first term on a leaf that is not the leftmost leaf in
4311 ** the segment b-tree. In this case it is necessary to add a term to
4312 ** the b-tree hierarchy that is (a) larger than the largest term
4313 ** already written to the segment and (b) smaller than or equal to
4314 ** this term. In other words, a prefix of (pTerm/nTerm) that is one
4315 ** byte longer than the longest prefix (pTerm/nTerm) shares with the
4316 ** previous term.
4318 ** Usually, the previous term is available in pPage->term. The exception
4319 ** is if this is the first term written in an incremental-merge step.
4320 ** In this case the previous term is not available, so just write a
4321 ** copy of (pTerm/nTerm) into the parent node. This is slightly
4322 ** inefficient, but still correct. */
4323 int n = nTerm;
4324 if( pPage->term.n ){
4325 n = 1 + fts5PrefixCompress(nMin, pPage->term.p, pTerm);
4327 fts5WriteBtreeTerm(p, pWriter, n, pTerm);
4328 if( p->rc!=SQLITE_OK ) return;
4329 pPage = &pWriter->writer;
4331 }else{
4332 nPrefix = fts5PrefixCompress(nMin, pPage->term.p, pTerm);
4333 fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
4336 /* Append the number of bytes of new data, then the term data itself
4337 ** to the page. */
4338 fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix);
4339 fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]);
4341 /* Update the Fts5PageWriter.term field. */
4342 fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
4343 pWriter->bFirstTermInPage = 0;
4345 pWriter->bFirstRowidInPage = 0;
4346 pWriter->bFirstRowidInDoclist = 1;
4348 assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
4349 pWriter->aDlidx[0].pgno = pPage->pgno;
4353 ** Append a rowid and position-list size field to the writers output.
4355 static void fts5WriteAppendRowid(
4356 Fts5Index *p,
4357 Fts5SegWriter *pWriter,
4358 i64 iRowid
4360 if( p->rc==SQLITE_OK ){
4361 Fts5PageWriter *pPage = &pWriter->writer;
4363 if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
4364 fts5WriteFlushLeaf(p, pWriter);
4367 /* If this is to be the first rowid written to the page, set the
4368 ** rowid-pointer in the page-header. Also append a value to the dlidx
4369 ** buffer, in case a doclist-index is required. */
4370 if( pWriter->bFirstRowidInPage ){
4371 fts5PutU16(pPage->buf.p, (u16)pPage->buf.n);
4372 fts5WriteDlidxAppend(p, pWriter, iRowid);
4375 /* Write the rowid. */
4376 if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){
4377 fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid);
4378 }else{
4379 assert_nc( p->rc || iRowid>pWriter->iPrevRowid );
4380 fts5BufferAppendVarint(&p->rc, &pPage->buf,
4381 (u64)iRowid - (u64)pWriter->iPrevRowid
4384 pWriter->iPrevRowid = iRowid;
4385 pWriter->bFirstRowidInDoclist = 0;
4386 pWriter->bFirstRowidInPage = 0;
4390 static void fts5WriteAppendPoslistData(
4391 Fts5Index *p,
4392 Fts5SegWriter *pWriter,
4393 const u8 *aData,
4394 int nData
4396 Fts5PageWriter *pPage = &pWriter->writer;
4397 const u8 *a = aData;
4398 int n = nData;
4400 assert( p->pConfig->pgsz>0 );
4401 while( p->rc==SQLITE_OK
4402 && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz
4404 int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n;
4405 int nCopy = 0;
4406 while( nCopy<nReq ){
4407 i64 dummy;
4408 nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy);
4410 fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a);
4411 a += nCopy;
4412 n -= nCopy;
4413 fts5WriteFlushLeaf(p, pWriter);
4415 if( n>0 ){
4416 fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a);
4421 ** Flush any data cached by the writer object to the database. Free any
4422 ** allocations associated with the writer.
4424 static void fts5WriteFinish(
4425 Fts5Index *p,
4426 Fts5SegWriter *pWriter, /* Writer object */
4427 int *pnLeaf /* OUT: Number of leaf pages in b-tree */
4429 int i;
4430 Fts5PageWriter *pLeaf = &pWriter->writer;
4431 if( p->rc==SQLITE_OK ){
4432 assert( pLeaf->pgno>=1 );
4433 if( pLeaf->buf.n>4 ){
4434 fts5WriteFlushLeaf(p, pWriter);
4436 *pnLeaf = pLeaf->pgno-1;
4437 if( pLeaf->pgno>1 ){
4438 fts5WriteFlushBtree(p, pWriter);
4441 fts5BufferFree(&pLeaf->term);
4442 fts5BufferFree(&pLeaf->buf);
4443 fts5BufferFree(&pLeaf->pgidx);
4444 fts5BufferFree(&pWriter->btterm);
4446 for(i=0; i<pWriter->nDlidx; i++){
4447 sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
4449 sqlite3_free(pWriter->aDlidx);
4452 static void fts5WriteInit(
4453 Fts5Index *p,
4454 Fts5SegWriter *pWriter,
4455 int iSegid
4457 const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING;
4459 memset(pWriter, 0, sizeof(Fts5SegWriter));
4460 pWriter->iSegid = iSegid;
4462 fts5WriteDlidxGrow(p, pWriter, 1);
4463 pWriter->writer.pgno = 1;
4464 pWriter->bFirstTermInPage = 1;
4465 pWriter->iBtPage = 1;
4467 assert( pWriter->writer.buf.n==0 );
4468 assert( pWriter->writer.pgidx.n==0 );
4470 /* Grow the two buffers to pgsz + padding bytes in size. */
4471 sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer);
4472 sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer);
4474 if( p->pIdxWriter==0 ){
4475 Fts5Config *pConfig = p->pConfig;
4476 fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf(
4477 "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)",
4478 pConfig->zDb, pConfig->zName
4482 if( p->rc==SQLITE_OK ){
4483 /* Initialize the 4-byte leaf-page header to 0x00. */
4484 memset(pWriter->writer.buf.p, 0, 4);
4485 pWriter->writer.buf.n = 4;
4487 /* Bind the current output segment id to the index-writer. This is an
4488 ** optimization over binding the same value over and over as rows are
4489 ** inserted into %_idx by the current writer. */
4490 sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
4495 ** Iterator pIter was used to iterate through the input segments of on an
4496 ** incremental merge operation. This function is called if the incremental
4497 ** merge step has finished but the input has not been completely exhausted.
4499 static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){
4500 int i;
4501 Fts5Buffer buf;
4502 memset(&buf, 0, sizeof(Fts5Buffer));
4503 for(i=0; i<pIter->nSeg && p->rc==SQLITE_OK; i++){
4504 Fts5SegIter *pSeg = &pIter->aSeg[i];
4505 if( pSeg->pSeg==0 ){
4506 /* no-op */
4507 }else if( pSeg->pLeaf==0 ){
4508 /* All keys from this input segment have been transfered to the output.
4509 ** Set both the first and last page-numbers to 0 to indicate that the
4510 ** segment is now empty. */
4511 pSeg->pSeg->pgnoLast = 0;
4512 pSeg->pSeg->pgnoFirst = 0;
4513 }else{
4514 int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */
4515 i64 iLeafRowid;
4516 Fts5Data *pData;
4517 int iId = pSeg->pSeg->iSegid;
4518 u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00};
4520 iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno);
4521 pData = fts5LeafRead(p, iLeafRowid);
4522 if( pData ){
4523 if( iOff>pData->szLeaf ){
4524 /* This can occur if the pages that the segments occupy overlap - if
4525 ** a single page has been assigned to more than one segment. In
4526 ** this case a prior iteration of this loop may have corrupted the
4527 ** segment currently being trimmed. */
4528 p->rc = FTS5_CORRUPT;
4529 }else{
4530 fts5BufferZero(&buf);
4531 fts5BufferGrow(&p->rc, &buf, pData->nn);
4532 fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr);
4533 fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n);
4534 fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p);
4535 fts5BufferAppendBlob(&p->rc, &buf,pData->szLeaf-iOff,&pData->p[iOff]);
4536 if( p->rc==SQLITE_OK ){
4537 /* Set the szLeaf field */
4538 fts5PutU16(&buf.p[2], (u16)buf.n);
4541 /* Set up the new page-index array */
4542 fts5BufferAppendVarint(&p->rc, &buf, 4);
4543 if( pSeg->iLeafPgno==pSeg->iTermLeafPgno
4544 && pSeg->iEndofDoclist<pData->szLeaf
4545 && pSeg->iPgidxOff<=pData->nn
4547 int nDiff = pData->szLeaf - pSeg->iEndofDoclist;
4548 fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4);
4549 fts5BufferAppendBlob(&p->rc, &buf,
4550 pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]
4554 pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
4555 fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid);
4556 fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
4558 fts5DataRelease(pData);
4562 fts5BufferFree(&buf);
4565 static void fts5MergeChunkCallback(
4566 Fts5Index *p,
4567 void *pCtx,
4568 const u8 *pChunk, int nChunk
4570 Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx;
4571 fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk);
4577 static void fts5IndexMergeLevel(
4578 Fts5Index *p, /* FTS5 backend object */
4579 Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */
4580 int iLvl, /* Level to read input from */
4581 int *pnRem /* Write up to this many output leaves */
4583 Fts5Structure *pStruct = *ppStruct;
4584 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
4585 Fts5StructureLevel *pLvlOut;
4586 Fts5Iter *pIter = 0; /* Iterator to read input data */
4587 int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */
4588 int nInput; /* Number of input segments */
4589 Fts5SegWriter writer; /* Writer object */
4590 Fts5StructureSegment *pSeg; /* Output segment */
4591 Fts5Buffer term;
4592 int bOldest; /* True if the output segment is the oldest */
4593 int eDetail = p->pConfig->eDetail;
4594 const int flags = FTS5INDEX_QUERY_NOOUTPUT;
4595 int bTermWritten = 0; /* True if current term already output */
4597 assert( iLvl<pStruct->nLevel );
4598 assert( pLvl->nMerge<=pLvl->nSeg );
4600 memset(&writer, 0, sizeof(Fts5SegWriter));
4601 memset(&term, 0, sizeof(Fts5Buffer));
4602 if( pLvl->nMerge ){
4603 pLvlOut = &pStruct->aLevel[iLvl+1];
4604 assert( pLvlOut->nSeg>0 );
4605 nInput = pLvl->nMerge;
4606 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1];
4608 fts5WriteInit(p, &writer, pSeg->iSegid);
4609 writer.writer.pgno = pSeg->pgnoLast+1;
4610 writer.iBtPage = 0;
4611 }else{
4612 int iSegid = fts5AllocateSegid(p, pStruct);
4614 /* Extend the Fts5Structure object as required to ensure the output
4615 ** segment exists. */
4616 if( iLvl==pStruct->nLevel-1 ){
4617 fts5StructureAddLevel(&p->rc, ppStruct);
4618 pStruct = *ppStruct;
4620 fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0);
4621 if( p->rc ) return;
4622 pLvl = &pStruct->aLevel[iLvl];
4623 pLvlOut = &pStruct->aLevel[iLvl+1];
4625 fts5WriteInit(p, &writer, iSegid);
4627 /* Add the new segment to the output level */
4628 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg];
4629 pLvlOut->nSeg++;
4630 pSeg->pgnoFirst = 1;
4631 pSeg->iSegid = iSegid;
4632 pStruct->nSegment++;
4634 /* Read input from all segments in the input level */
4635 nInput = pLvl->nSeg;
4637 /* Set the range of origins that will go into the output segment. */
4638 if( pStruct->nOriginCntr>0 ){
4639 pSeg->iOrigin1 = pLvl->aSeg[0].iOrigin1;
4640 pSeg->iOrigin2 = pLvl->aSeg[pLvl->nSeg-1].iOrigin2;
4643 bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2);
4645 assert( iLvl>=0 );
4646 for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter);
4647 fts5MultiIterEof(p, pIter)==0;
4648 fts5MultiIterNext(p, pIter, 0, 0)
4650 Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
4651 int nPos; /* position-list size field value */
4652 int nTerm;
4653 const u8 *pTerm;
4655 pTerm = fts5MultiIterTerm(pIter, &nTerm);
4656 if( nTerm!=term.n || fts5Memcmp(pTerm, term.p, nTerm) ){
4657 if( pnRem && writer.nLeafWritten>nRem ){
4658 break;
4660 fts5BufferSet(&p->rc, &term, nTerm, pTerm);
4661 bTermWritten =0;
4664 /* Check for key annihilation. */
4665 if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue;
4667 if( p->rc==SQLITE_OK && bTermWritten==0 ){
4668 /* This is a new term. Append a term to the output segment. */
4669 fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
4670 bTermWritten = 1;
4673 /* Append the rowid to the output */
4674 /* WRITEPOSLISTSIZE */
4675 fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));
4677 if( eDetail==FTS5_DETAIL_NONE ){
4678 if( pSegIter->bDel ){
4679 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
4680 if( pSegIter->nPos>0 ){
4681 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
4684 }else{
4685 /* Append the position-list data to the output */
4686 nPos = pSegIter->nPos*2 + pSegIter->bDel;
4687 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos);
4688 fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
4692 /* Flush the last leaf page to disk. Set the output segment b-tree height
4693 ** and last leaf page number at the same time. */
4694 fts5WriteFinish(p, &writer, &pSeg->pgnoLast);
4696 assert( pIter!=0 || p->rc!=SQLITE_OK );
4697 if( fts5MultiIterEof(p, pIter) ){
4698 int i;
4700 /* Remove the redundant segments from the %_data table */
4701 assert( pSeg->nEntry==0 );
4702 for(i=0; i<nInput; i++){
4703 Fts5StructureSegment *pOld = &pLvl->aSeg[i];
4704 pSeg->nEntry += (pOld->nEntry - pOld->nEntryTombstone);
4705 fts5DataRemoveSegment(p, pOld);
4708 /* Remove the redundant segments from the input level */
4709 if( pLvl->nSeg!=nInput ){
4710 int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment);
4711 memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove);
4713 pStruct->nSegment -= nInput;
4714 pLvl->nSeg -= nInput;
4715 pLvl->nMerge = 0;
4716 if( pSeg->pgnoLast==0 ){
4717 pLvlOut->nSeg--;
4718 pStruct->nSegment--;
4720 }else{
4721 assert( pSeg->pgnoLast>0 );
4722 fts5TrimSegments(p, pIter);
4723 pLvl->nMerge = nInput;
4726 fts5MultiIterFree(pIter);
4727 fts5BufferFree(&term);
4728 if( pnRem ) *pnRem -= writer.nLeafWritten;
4732 ** If this is not a contentless_delete=1 table, or if the 'deletemerge'
4733 ** configuration option is set to 0, then this function always returns -1.
4734 ** Otherwise, it searches the structure object passed as the second argument
4735 ** for a level suitable for merging due to having a large number of
4736 ** tombstones in the tombstone hash. If one is found, its index is returned.
4737 ** Otherwise, if there is no suitable level, -1.
4739 static int fts5IndexFindDeleteMerge(Fts5Index *p, Fts5Structure *pStruct){
4740 Fts5Config *pConfig = p->pConfig;
4741 int iRet = -1;
4742 if( pConfig->bContentlessDelete && pConfig->nDeleteMerge>0 ){
4743 int ii;
4744 int nBest = 0;
4746 for(ii=0; ii<pStruct->nLevel; ii++){
4747 Fts5StructureLevel *pLvl = &pStruct->aLevel[ii];
4748 i64 nEntry = 0;
4749 i64 nTomb = 0;
4750 int iSeg;
4751 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
4752 nEntry += pLvl->aSeg[iSeg].nEntry;
4753 nTomb += pLvl->aSeg[iSeg].nEntryTombstone;
4755 assert_nc( nEntry>0 || pLvl->nSeg==0 );
4756 if( nEntry>0 ){
4757 int nPercent = (nTomb * 100) / nEntry;
4758 if( nPercent>=pConfig->nDeleteMerge && nPercent>nBest ){
4759 iRet = ii;
4760 nBest = nPercent;
4765 return iRet;
4769 ** Do up to nPg pages of automerge work on the index.
4771 ** Return true if any changes were actually made, or false otherwise.
4773 static int fts5IndexMerge(
4774 Fts5Index *p, /* FTS5 backend object */
4775 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
4776 int nPg, /* Pages of work to do */
4777 int nMin /* Minimum number of segments to merge */
4779 int nRem = nPg;
4780 int bRet = 0;
4781 Fts5Structure *pStruct = *ppStruct;
4782 while( nRem>0 && p->rc==SQLITE_OK ){
4783 int iLvl; /* To iterate through levels */
4784 int iBestLvl = 0; /* Level offering the most input segments */
4785 int nBest = 0; /* Number of input segments on best level */
4787 /* Set iBestLvl to the level to read input segments from. Or to -1 if
4788 ** there is no level suitable to merge segments from. */
4789 assert( pStruct->nLevel>0 );
4790 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
4791 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
4792 if( pLvl->nMerge ){
4793 if( pLvl->nMerge>nBest ){
4794 iBestLvl = iLvl;
4795 nBest = nMin;
4797 break;
4799 if( pLvl->nSeg>nBest ){
4800 nBest = pLvl->nSeg;
4801 iBestLvl = iLvl;
4804 if( nBest<nMin ){
4805 iBestLvl = fts5IndexFindDeleteMerge(p, pStruct);
4808 if( iBestLvl<0 ) break;
4809 bRet = 1;
4810 fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem);
4811 if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){
4812 fts5StructurePromote(p, iBestLvl+1, pStruct);
4815 if( nMin==1 ) nMin = 2;
4817 *ppStruct = pStruct;
4818 return bRet;
4822 ** A total of nLeaf leaf pages of data has just been flushed to a level-0
4823 ** segment. This function updates the write-counter accordingly and, if
4824 ** necessary, performs incremental merge work.
4826 ** If an error occurs, set the Fts5Index.rc error code. If an error has
4827 ** already occurred, this function is a no-op.
4829 static void fts5IndexAutomerge(
4830 Fts5Index *p, /* FTS5 backend object */
4831 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
4832 int nLeaf /* Number of output leaves just written */
4834 if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 && ALWAYS((*ppStruct)!=0) ){
4835 Fts5Structure *pStruct = *ppStruct;
4836 u64 nWrite; /* Initial value of write-counter */
4837 int nWork; /* Number of work-quanta to perform */
4838 int nRem; /* Number of leaf pages left to write */
4840 /* Update the write-counter. While doing so, set nWork. */
4841 nWrite = pStruct->nWriteCounter;
4842 nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit));
4843 pStruct->nWriteCounter += nLeaf;
4844 nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel);
4846 fts5IndexMerge(p, ppStruct, nRem, p->pConfig->nAutomerge);
4850 static void fts5IndexCrisismerge(
4851 Fts5Index *p, /* FTS5 backend object */
4852 Fts5Structure **ppStruct /* IN/OUT: Current structure of index */
4854 const int nCrisis = p->pConfig->nCrisisMerge;
4855 Fts5Structure *pStruct = *ppStruct;
4856 if( pStruct && pStruct->nLevel>0 ){
4857 int iLvl = 0;
4858 while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){
4859 fts5IndexMergeLevel(p, &pStruct, iLvl, 0);
4860 assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) );
4861 fts5StructurePromote(p, iLvl+1, pStruct);
4862 iLvl++;
4864 *ppStruct = pStruct;
4868 static int fts5IndexReturn(Fts5Index *p){
4869 int rc = p->rc;
4870 p->rc = SQLITE_OK;
4871 return rc;
4874 typedef struct Fts5FlushCtx Fts5FlushCtx;
4875 struct Fts5FlushCtx {
4876 Fts5Index *pIdx;
4877 Fts5SegWriter writer;
4881 ** Buffer aBuf[] contains a list of varints, all small enough to fit
4882 ** in a 32-bit integer. Return the size of the largest prefix of this
4883 ** list nMax bytes or less in size.
4885 static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
4886 int ret;
4887 u32 dummy;
4888 ret = fts5GetVarint32(aBuf, dummy);
4889 if( ret<nMax ){
4890 while( 1 ){
4891 int i = fts5GetVarint32(&aBuf[ret], dummy);
4892 if( (ret + i) > nMax ) break;
4893 ret += i;
4896 return ret;
4900 ** Execute the SQL statement:
4902 ** DELETE FROM %_idx WHERE (segid, (pgno/2)) = ($iSegid, $iPgno);
4904 ** This is used when a secure-delete operation removes the last term
4905 ** from a segment leaf page. In that case the %_idx entry is removed
4906 ** too. This is done to ensure that if all instances of a token are
4907 ** removed from an fts5 database in secure-delete mode, no trace of
4908 ** the token itself remains in the database.
4910 static void fts5SecureDeleteIdxEntry(
4911 Fts5Index *p, /* FTS5 backend object */
4912 int iSegid, /* Id of segment to delete entry for */
4913 int iPgno /* Page number within segment */
4915 if( iPgno!=1 ){
4916 assert( p->pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE );
4917 if( p->pDeleteFromIdx==0 ){
4918 fts5IndexPrepareStmt(p, &p->pDeleteFromIdx, sqlite3_mprintf(
4919 "DELETE FROM '%q'.'%q_idx' WHERE (segid, (pgno/2)) = (?1, ?2)",
4920 p->pConfig->zDb, p->pConfig->zName
4923 if( p->rc==SQLITE_OK ){
4924 sqlite3_bind_int(p->pDeleteFromIdx, 1, iSegid);
4925 sqlite3_bind_int(p->pDeleteFromIdx, 2, iPgno);
4926 sqlite3_step(p->pDeleteFromIdx);
4927 p->rc = sqlite3_reset(p->pDeleteFromIdx);
4933 ** This is called when a secure-delete operation removes a position-list
4934 ** that overflows onto segment page iPgno of segment pSeg. This function
4935 ** rewrites node iPgno, and possibly one or more of its right-hand peers,
4936 ** to remove this portion of the position list.
4938 ** Output variable (*pbLastInDoclist) is set to true if the position-list
4939 ** removed is followed by a new term or the end-of-segment, or false if
4940 ** it is followed by another rowid/position list.
4942 static void fts5SecureDeleteOverflow(
4943 Fts5Index *p,
4944 Fts5StructureSegment *pSeg,
4945 int iPgno,
4946 int *pbLastInDoclist
4948 const int bDetailNone = (p->pConfig->eDetail==FTS5_DETAIL_NONE);
4949 int pgno;
4950 Fts5Data *pLeaf = 0;
4951 assert( iPgno!=1 );
4953 *pbLastInDoclist = 1;
4954 for(pgno=iPgno; p->rc==SQLITE_OK && pgno<=pSeg->pgnoLast; pgno++){
4955 i64 iRowid = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
4956 int iNext = 0;
4957 u8 *aPg = 0;
4959 pLeaf = fts5DataRead(p, iRowid);
4960 if( pLeaf==0 ) break;
4961 aPg = pLeaf->p;
4963 iNext = fts5GetU16(&aPg[0]);
4964 if( iNext!=0 ){
4965 *pbLastInDoclist = 0;
4967 if( iNext==0 && pLeaf->szLeaf!=pLeaf->nn ){
4968 fts5GetVarint32(&aPg[pLeaf->szLeaf], iNext);
4971 if( iNext==0 ){
4972 /* The page contains no terms or rowids. Replace it with an empty
4973 ** page and move on to the right-hand peer. */
4974 const u8 aEmpty[] = {0x00, 0x00, 0x00, 0x04};
4975 assert_nc( bDetailNone==0 || pLeaf->nn==4 );
4976 if( bDetailNone==0 ) fts5DataWrite(p, iRowid, aEmpty, sizeof(aEmpty));
4977 fts5DataRelease(pLeaf);
4978 pLeaf = 0;
4979 }else if( bDetailNone ){
4980 break;
4981 }else if( iNext>=pLeaf->szLeaf || pLeaf->nn<pLeaf->szLeaf || iNext<4 ){
4982 p->rc = FTS5_CORRUPT;
4983 break;
4984 }else{
4985 int nShift = iNext - 4;
4986 int nPg;
4988 int nIdx = 0;
4989 u8 *aIdx = 0;
4991 /* Unless the current page footer is 0 bytes in size (in which case
4992 ** the new page footer will be as well), allocate and populate a
4993 ** buffer containing the new page footer. Set stack variables aIdx
4994 ** and nIdx accordingly. */
4995 if( pLeaf->nn>pLeaf->szLeaf ){
4996 int iFirst = 0;
4997 int i1 = pLeaf->szLeaf;
4998 int i2 = 0;
5000 i1 += fts5GetVarint32(&aPg[i1], iFirst);
5001 if( iFirst<iNext ){
5002 p->rc = FTS5_CORRUPT;
5003 break;
5005 aIdx = sqlite3Fts5MallocZero(&p->rc, (pLeaf->nn-pLeaf->szLeaf)+2);
5006 if( aIdx==0 ) break;
5007 i2 = sqlite3Fts5PutVarint(aIdx, iFirst-nShift);
5008 if( i1<pLeaf->nn ){
5009 memcpy(&aIdx[i2], &aPg[i1], pLeaf->nn-i1);
5010 i2 += (pLeaf->nn-i1);
5012 nIdx = i2;
5015 /* Modify the contents of buffer aPg[]. Set nPg to the new size
5016 ** in bytes. The new page is always smaller than the old. */
5017 nPg = pLeaf->szLeaf - nShift;
5018 memmove(&aPg[4], &aPg[4+nShift], nPg-4);
5019 fts5PutU16(&aPg[2], nPg);
5020 if( fts5GetU16(&aPg[0]) ) fts5PutU16(&aPg[0], 4);
5021 if( nIdx>0 ){
5022 memcpy(&aPg[nPg], aIdx, nIdx);
5023 nPg += nIdx;
5025 sqlite3_free(aIdx);
5027 /* Write the new page to disk and exit the loop */
5028 assert( nPg>4 || fts5GetU16(aPg)==0 );
5029 fts5DataWrite(p, iRowid, aPg, nPg);
5030 break;
5033 fts5DataRelease(pLeaf);
5037 ** Completely remove the entry that pSeg currently points to from
5038 ** the database.
5040 static void fts5DoSecureDelete(
5041 Fts5Index *p,
5042 Fts5SegIter *pSeg
5044 const int bDetailNone = (p->pConfig->eDetail==FTS5_DETAIL_NONE);
5045 int iSegid = pSeg->pSeg->iSegid;
5046 u8 *aPg = pSeg->pLeaf->p;
5047 int nPg = pSeg->pLeaf->nn;
5048 int iPgIdx = pSeg->pLeaf->szLeaf;
5050 u64 iDelta = 0;
5051 u64 iNextDelta = 0;
5052 int iNextOff = 0;
5053 int iOff = 0;
5054 int nIdx = 0;
5055 u8 *aIdx = 0;
5056 int bLastInDoclist = 0;
5057 int iIdx = 0;
5058 int iStart = 0;
5059 int iKeyOff = 0;
5060 int iPrevKeyOff = 0;
5061 int iDelKeyOff = 0; /* Offset of deleted key, if any */
5063 nIdx = nPg-iPgIdx;
5064 aIdx = sqlite3Fts5MallocZero(&p->rc, nIdx+16);
5065 if( p->rc ) return;
5066 memcpy(aIdx, &aPg[iPgIdx], nIdx);
5068 /* At this point segment iterator pSeg points to the entry
5069 ** this function should remove from the b-tree segment.
5071 ** In detail=full or detail=column mode, pSeg->iLeafOffset is the
5072 ** offset of the first byte in the position-list for the entry to
5073 ** remove. Immediately before this comes two varints that will also
5074 ** need to be removed:
5076 ** + the rowid or delta rowid value for the entry, and
5077 ** + the size of the position list in bytes.
5079 ** Or, in detail=none mode, there is a single varint prior to
5080 ** pSeg->iLeafOffset - the rowid or delta rowid value.
5082 ** This block sets the following variables:
5084 ** iStart:
5085 ** iDelta:
5088 int iSOP;
5089 if( pSeg->iLeafPgno==pSeg->iTermLeafPgno ){
5090 iStart = pSeg->iTermLeafOffset;
5091 }else{
5092 iStart = fts5GetU16(&aPg[0]);
5095 iSOP = iStart + fts5GetVarint(&aPg[iStart], &iDelta);
5096 assert_nc( iSOP<=pSeg->iLeafOffset );
5098 if( bDetailNone ){
5099 while( iSOP<pSeg->iLeafOffset ){
5100 if( aPg[iSOP]==0x00 ) iSOP++;
5101 if( aPg[iSOP]==0x00 ) iSOP++;
5102 iStart = iSOP;
5103 iSOP = iStart + fts5GetVarint(&aPg[iStart], &iDelta);
5106 iNextOff = iSOP;
5107 if( iNextOff<pSeg->iEndofDoclist && aPg[iNextOff]==0x00 ) iNextOff++;
5108 if( iNextOff<pSeg->iEndofDoclist && aPg[iNextOff]==0x00 ) iNextOff++;
5110 }else{
5111 int nPos = 0;
5112 iSOP += fts5GetVarint32(&aPg[iSOP], nPos);
5113 while( iSOP<pSeg->iLeafOffset ){
5114 iStart = iSOP + (nPos/2);
5115 iSOP = iStart + fts5GetVarint(&aPg[iStart], &iDelta);
5116 iSOP += fts5GetVarint32(&aPg[iSOP], nPos);
5118 assert_nc( iSOP==pSeg->iLeafOffset );
5119 iNextOff = pSeg->iLeafOffset + pSeg->nPos;
5123 iOff = iStart;
5124 if( iNextOff>=iPgIdx ){
5125 int pgno = pSeg->iLeafPgno+1;
5126 fts5SecureDeleteOverflow(p, pSeg->pSeg, pgno, &bLastInDoclist);
5127 iNextOff = iPgIdx;
5128 }else{
5129 /* Set bLastInDoclist to true if the entry being removed is the last
5130 ** in its doclist. */
5131 for(iIdx=0, iKeyOff=0; iIdx<nIdx; /* no-op */){
5132 u32 iVal = 0;
5133 iIdx += fts5GetVarint32(&aIdx[iIdx], iVal);
5134 iKeyOff += iVal;
5135 if( iKeyOff==iNextOff ){
5136 bLastInDoclist = 1;
5141 if( fts5GetU16(&aPg[0])==iStart && (bLastInDoclist||iNextOff==iPgIdx) ){
5142 fts5PutU16(&aPg[0], 0);
5145 if( bLastInDoclist==0 ){
5146 if( iNextOff!=iPgIdx ){
5147 iNextOff += fts5GetVarint(&aPg[iNextOff], &iNextDelta);
5148 iOff += sqlite3Fts5PutVarint(&aPg[iOff], iDelta + iNextDelta);
5150 }else if(
5151 iStart==pSeg->iTermLeafOffset && pSeg->iLeafPgno==pSeg->iTermLeafPgno
5153 /* The entry being removed was the only position list in its
5154 ** doclist. Therefore the term needs to be removed as well. */
5155 int iKey = 0;
5156 for(iIdx=0, iKeyOff=0; iIdx<nIdx; iKey++){
5157 u32 iVal = 0;
5158 iIdx += fts5GetVarint32(&aIdx[iIdx], iVal);
5159 if( (iKeyOff+iVal)>(u32)iStart ) break;
5160 iKeyOff += iVal;
5163 iDelKeyOff = iOff = iKeyOff;
5164 if( iNextOff!=iPgIdx ){
5165 int nPrefix = 0;
5166 int nSuffix = 0;
5167 int nPrefix2 = 0;
5168 int nSuffix2 = 0;
5170 iDelKeyOff = iNextOff;
5171 iNextOff += fts5GetVarint32(&aPg[iNextOff], nPrefix2);
5172 iNextOff += fts5GetVarint32(&aPg[iNextOff], nSuffix2);
5174 if( iKey!=1 ){
5175 iKeyOff += fts5GetVarint32(&aPg[iKeyOff], nPrefix);
5177 iKeyOff += fts5GetVarint32(&aPg[iKeyOff], nSuffix);
5179 nPrefix = MIN(nPrefix, nPrefix2);
5180 nSuffix = (nPrefix2 + nSuffix2) - nPrefix;
5182 if( (iKeyOff+nSuffix)>iPgIdx || (iNextOff+nSuffix2)>iPgIdx ){
5183 p->rc = FTS5_CORRUPT;
5184 }else{
5185 if( iKey!=1 ){
5186 iOff += sqlite3Fts5PutVarint(&aPg[iOff], nPrefix);
5188 iOff += sqlite3Fts5PutVarint(&aPg[iOff], nSuffix);
5189 if( nPrefix2>pSeg->term.n ){
5190 p->rc = FTS5_CORRUPT;
5191 }else if( nPrefix2>nPrefix ){
5192 memcpy(&aPg[iOff], &pSeg->term.p[nPrefix], nPrefix2-nPrefix);
5193 iOff += (nPrefix2-nPrefix);
5195 memmove(&aPg[iOff], &aPg[iNextOff], nSuffix2);
5196 iOff += nSuffix2;
5197 iNextOff += nSuffix2;
5200 }else if( iStart==4 ){
5201 int iPgno;
5203 assert_nc( pSeg->iLeafPgno>pSeg->iTermLeafPgno );
5204 /* The entry being removed may be the only position list in
5205 ** its doclist. */
5206 for(iPgno=pSeg->iLeafPgno-1; iPgno>pSeg->iTermLeafPgno; iPgno-- ){
5207 Fts5Data *pPg = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, iPgno));
5208 int bEmpty = (pPg && pPg->nn==4);
5209 fts5DataRelease(pPg);
5210 if( bEmpty==0 ) break;
5213 if( iPgno==pSeg->iTermLeafPgno ){
5214 i64 iId = FTS5_SEGMENT_ROWID(iSegid, pSeg->iTermLeafPgno);
5215 Fts5Data *pTerm = fts5DataRead(p, iId);
5216 if( pTerm && pTerm->szLeaf==pSeg->iTermLeafOffset ){
5217 u8 *aTermIdx = &pTerm->p[pTerm->szLeaf];
5218 int nTermIdx = pTerm->nn - pTerm->szLeaf;
5219 int iTermIdx = 0;
5220 int iTermOff = 0;
5222 while( 1 ){
5223 u32 iVal = 0;
5224 int nByte = fts5GetVarint32(&aTermIdx[iTermIdx], iVal);
5225 iTermOff += iVal;
5226 if( (iTermIdx+nByte)>=nTermIdx ) break;
5227 iTermIdx += nByte;
5229 nTermIdx = iTermIdx;
5231 memmove(&pTerm->p[iTermOff], &pTerm->p[pTerm->szLeaf], nTermIdx);
5232 fts5PutU16(&pTerm->p[2], iTermOff);
5234 fts5DataWrite(p, iId, pTerm->p, iTermOff+nTermIdx);
5235 if( nTermIdx==0 ){
5236 fts5SecureDeleteIdxEntry(p, iSegid, pSeg->iTermLeafPgno);
5239 fts5DataRelease(pTerm);
5243 if( p->rc==SQLITE_OK ){
5244 const int nMove = nPg - iNextOff; /* Number of bytes to move */
5245 int nShift = iNextOff - iOff; /* Distance to move them */
5247 int iPrevKeyOut = 0;
5248 int iKeyIn = 0;
5250 memmove(&aPg[iOff], &aPg[iNextOff], nMove);
5251 iPgIdx -= nShift;
5252 nPg = iPgIdx;
5253 fts5PutU16(&aPg[2], iPgIdx);
5255 for(iIdx=0; iIdx<nIdx; /* no-op */){
5256 u32 iVal = 0;
5257 iIdx += fts5GetVarint32(&aIdx[iIdx], iVal);
5258 iKeyIn += iVal;
5259 if( iKeyIn!=iDelKeyOff ){
5260 int iKeyOut = (iKeyIn - (iKeyIn>iOff ? nShift : 0));
5261 nPg += sqlite3Fts5PutVarint(&aPg[nPg], iKeyOut - iPrevKeyOut);
5262 iPrevKeyOut = iKeyOut;
5266 if( iPgIdx==nPg && nIdx>0 && pSeg->iLeafPgno!=1 ){
5267 fts5SecureDeleteIdxEntry(p, iSegid, pSeg->iLeafPgno);
5270 assert_nc( nPg>4 || fts5GetU16(aPg)==0 );
5271 fts5DataWrite(p, FTS5_SEGMENT_ROWID(iSegid,pSeg->iLeafPgno), aPg, nPg);
5273 sqlite3_free(aIdx);
5277 ** This is called as part of flushing a delete to disk in 'secure-delete'
5278 ** mode. It edits the segments within the database described by argument
5279 ** pStruct to remove the entries for term zTerm, rowid iRowid.
5281 static void fts5FlushSecureDelete(
5282 Fts5Index *p,
5283 Fts5Structure *pStruct,
5284 const char *zTerm,
5285 i64 iRowid
5287 const int f = FTS5INDEX_QUERY_SKIPHASH;
5288 int nTerm = (int)strlen(zTerm);
5289 Fts5Iter *pIter = 0; /* Used to find term instance */
5291 fts5MultiIterNew(p, pStruct, f, 0, (const u8*)zTerm, nTerm, -1, 0, &pIter);
5292 if( fts5MultiIterEof(p, pIter)==0 ){
5293 i64 iThis = fts5MultiIterRowid(pIter);
5294 if( iThis<iRowid ){
5295 fts5MultiIterNextFrom(p, pIter, iRowid);
5298 if( p->rc==SQLITE_OK
5299 && fts5MultiIterEof(p, pIter)==0
5300 && iRowid==fts5MultiIterRowid(pIter)
5302 Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
5303 fts5DoSecureDelete(p, pSeg);
5307 fts5MultiIterFree(pIter);
5312 ** Flush the contents of in-memory hash table iHash to a new level-0
5313 ** segment on disk. Also update the corresponding structure record.
5315 ** If an error occurs, set the Fts5Index.rc error code. If an error has
5316 ** already occurred, this function is a no-op.
5318 static void fts5FlushOneHash(Fts5Index *p){
5319 Fts5Hash *pHash = p->pHash;
5320 Fts5Structure *pStruct;
5321 int iSegid;
5322 int pgnoLast = 0; /* Last leaf page number in segment */
5324 /* Obtain a reference to the index structure and allocate a new segment-id
5325 ** for the new level-0 segment. */
5326 pStruct = fts5StructureRead(p);
5327 fts5StructureInvalidate(p);
5329 if( sqlite3Fts5HashIsEmpty(pHash)==0 ){
5330 iSegid = fts5AllocateSegid(p, pStruct);
5331 if( iSegid ){
5332 const int pgsz = p->pConfig->pgsz;
5333 int eDetail = p->pConfig->eDetail;
5334 int bSecureDelete = p->pConfig->bSecureDelete;
5335 Fts5StructureSegment *pSeg; /* New segment within pStruct */
5336 Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */
5337 Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */
5339 Fts5SegWriter writer;
5340 fts5WriteInit(p, &writer, iSegid);
5342 pBuf = &writer.writer.buf;
5343 pPgidx = &writer.writer.pgidx;
5345 /* fts5WriteInit() should have initialized the buffers to (most likely)
5346 ** the maximum space required. */
5347 assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
5348 assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );
5350 /* Begin scanning through hash table entries. This loop runs once for each
5351 ** term/doclist currently stored within the hash table. */
5352 if( p->rc==SQLITE_OK ){
5353 p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
5355 while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
5356 const char *zTerm; /* Buffer containing term */
5357 int nTerm; /* Size of zTerm in bytes */
5358 const u8 *pDoclist; /* Pointer to doclist for this term */
5359 int nDoclist; /* Size of doclist in bytes */
5361 /* Get the term and doclist for this entry. */
5362 sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
5363 nTerm = (int)strlen(zTerm);
5364 if( bSecureDelete==0 ){
5365 fts5WriteAppendTerm(p, &writer, nTerm, (const u8*)zTerm);
5366 if( p->rc!=SQLITE_OK ) break;
5367 assert( writer.bFirstRowidInPage==0 );
5370 if( !bSecureDelete && pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){
5371 /* The entire doclist will fit on the current leaf. */
5372 fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
5373 }else{
5374 int bTermWritten = !bSecureDelete;
5375 i64 iRowid = 0;
5376 i64 iPrev = 0;
5377 int iOff = 0;
5379 /* The entire doclist will not fit on this leaf. The following
5380 ** loop iterates through the poslists that make up the current
5381 ** doclist. */
5382 while( p->rc==SQLITE_OK && iOff<nDoclist ){
5383 u64 iDelta = 0;
5384 iOff += fts5GetVarint(&pDoclist[iOff], &iDelta);
5385 iRowid += iDelta;
5387 /* If in secure delete mode, and if this entry in the poslist is
5388 ** in fact a delete, then edit the existing segments directly
5389 ** using fts5FlushSecureDelete(). */
5390 if( bSecureDelete ){
5391 if( eDetail==FTS5_DETAIL_NONE ){
5392 if( iOff<nDoclist && pDoclist[iOff]==0x00 ){
5393 fts5FlushSecureDelete(p, pStruct, zTerm, iRowid);
5394 iOff++;
5395 if( iOff<nDoclist && pDoclist[iOff]==0x00 ){
5396 iOff++;
5397 nDoclist = 0;
5398 }else{
5399 continue;
5402 }else if( (pDoclist[iOff] & 0x01) ){
5403 fts5FlushSecureDelete(p, pStruct, zTerm, iRowid);
5404 if( p->rc!=SQLITE_OK || pDoclist[iOff]==0x01 ){
5405 iOff++;
5406 continue;
5411 if( p->rc==SQLITE_OK && bTermWritten==0 ){
5412 fts5WriteAppendTerm(p, &writer, nTerm, (const u8*)zTerm);
5413 bTermWritten = 1;
5414 assert( p->rc!=SQLITE_OK || writer.bFirstRowidInPage==0 );
5417 if( writer.bFirstRowidInPage ){
5418 fts5PutU16(&pBuf->p[0], (u16)pBuf->n); /* first rowid on page */
5419 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid);
5420 writer.bFirstRowidInPage = 0;
5421 fts5WriteDlidxAppend(p, &writer, iRowid);
5422 }else{
5423 u64 iRowidDelta = (u64)iRowid - (u64)iPrev;
5424 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowidDelta);
5426 if( p->rc!=SQLITE_OK ) break;
5427 assert( pBuf->n<=pBuf->nSpace );
5428 iPrev = iRowid;
5430 if( eDetail==FTS5_DETAIL_NONE ){
5431 if( iOff<nDoclist && pDoclist[iOff]==0 ){
5432 pBuf->p[pBuf->n++] = 0;
5433 iOff++;
5434 if( iOff<nDoclist && pDoclist[iOff]==0 ){
5435 pBuf->p[pBuf->n++] = 0;
5436 iOff++;
5439 if( (pBuf->n + pPgidx->n)>=pgsz ){
5440 fts5WriteFlushLeaf(p, &writer);
5442 }else{
5443 int bDummy;
5444 int nPos;
5445 int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy);
5446 nCopy += nPos;
5447 if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
5448 /* The entire poslist will fit on the current leaf. So copy
5449 ** it in one go. */
5450 fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
5451 }else{
5452 /* The entire poslist will not fit on this leaf. So it needs
5453 ** to be broken into sections. The only qualification being
5454 ** that each varint must be stored contiguously. */
5455 const u8 *pPoslist = &pDoclist[iOff];
5456 int iPos = 0;
5457 while( p->rc==SQLITE_OK ){
5458 int nSpace = pgsz - pBuf->n - pPgidx->n;
5459 int n = 0;
5460 if( (nCopy - iPos)<=nSpace ){
5461 n = nCopy - iPos;
5462 }else{
5463 n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
5465 assert( n>0 );
5466 fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
5467 iPos += n;
5468 if( (pBuf->n + pPgidx->n)>=pgsz ){
5469 fts5WriteFlushLeaf(p, &writer);
5471 if( iPos>=nCopy ) break;
5474 iOff += nCopy;
5479 /* TODO2: Doclist terminator written here. */
5480 /* pBuf->p[pBuf->n++] = '\0'; */
5481 assert( pBuf->n<=pBuf->nSpace );
5482 if( p->rc==SQLITE_OK ) sqlite3Fts5HashScanNext(pHash);
5484 sqlite3Fts5HashClear(pHash);
5485 fts5WriteFinish(p, &writer, &pgnoLast);
5487 assert( p->rc!=SQLITE_OK || bSecureDelete || pgnoLast>0 );
5488 if( pgnoLast>0 ){
5489 /* Update the Fts5Structure. It is written back to the database by the
5490 ** fts5StructureRelease() call below. */
5491 if( pStruct->nLevel==0 ){
5492 fts5StructureAddLevel(&p->rc, &pStruct);
5494 fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
5495 if( p->rc==SQLITE_OK ){
5496 pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
5497 pSeg->iSegid = iSegid;
5498 pSeg->pgnoFirst = 1;
5499 pSeg->pgnoLast = pgnoLast;
5500 if( pStruct->nOriginCntr>0 ){
5501 pSeg->iOrigin1 = pStruct->nOriginCntr;
5502 pSeg->iOrigin2 = pStruct->nOriginCntr;
5503 pSeg->nEntry = p->nPendingRow;
5504 pStruct->nOriginCntr++;
5506 pStruct->nSegment++;
5508 fts5StructurePromote(p, 0, pStruct);
5513 fts5IndexAutomerge(p, &pStruct, pgnoLast + p->nContentlessDelete);
5514 fts5IndexCrisismerge(p, &pStruct);
5515 fts5StructureWrite(p, pStruct);
5516 fts5StructureRelease(pStruct);
5517 p->nContentlessDelete = 0;
5521 ** Flush any data stored in the in-memory hash tables to the database.
5523 static void fts5IndexFlush(Fts5Index *p){
5524 /* Unless it is empty, flush the hash table to disk */
5525 if( p->nPendingData || p->nContentlessDelete ){
5526 assert( p->pHash );
5527 fts5FlushOneHash(p);
5528 p->nPendingData = 0;
5529 p->nPendingRow = 0;
5533 static Fts5Structure *fts5IndexOptimizeStruct(
5534 Fts5Index *p,
5535 Fts5Structure *pStruct
5537 Fts5Structure *pNew = 0;
5538 sqlite3_int64 nByte = sizeof(Fts5Structure);
5539 int nSeg = pStruct->nSegment;
5540 int i;
5542 /* Figure out if this structure requires optimization. A structure does
5543 ** not require optimization if either:
5545 ** 1. it consists of fewer than two segments, or
5546 ** 2. all segments are on the same level, or
5547 ** 3. all segments except one are currently inputs to a merge operation.
5549 ** In the first case, if there are no tombstone hash pages, return NULL. In
5550 ** the second, increment the ref-count on *pStruct and return a copy of the
5551 ** pointer to it.
5553 if( nSeg==0 ) return 0;
5554 for(i=0; i<pStruct->nLevel; i++){
5555 int nThis = pStruct->aLevel[i].nSeg;
5556 int nMerge = pStruct->aLevel[i].nMerge;
5557 if( nThis>0 && (nThis==nSeg || (nThis==nSeg-1 && nMerge==nThis)) ){
5558 if( nSeg==1 && nThis==1 && pStruct->aLevel[i].aSeg[0].nPgTombstone==0 ){
5559 return 0;
5561 fts5StructureRef(pStruct);
5562 return pStruct;
5564 assert( pStruct->aLevel[i].nMerge<=nThis );
5567 nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel);
5568 pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte);
5570 if( pNew ){
5571 Fts5StructureLevel *pLvl;
5572 nByte = nSeg * sizeof(Fts5StructureSegment);
5573 pNew->nLevel = MIN(pStruct->nLevel+1, FTS5_MAX_LEVEL);
5574 pNew->nRef = 1;
5575 pNew->nWriteCounter = pStruct->nWriteCounter;
5576 pNew->nOriginCntr = pStruct->nOriginCntr;
5577 pLvl = &pNew->aLevel[pNew->nLevel-1];
5578 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte);
5579 if( pLvl->aSeg ){
5580 int iLvl, iSeg;
5581 int iSegOut = 0;
5582 /* Iterate through all segments, from oldest to newest. Add them to
5583 ** the new Fts5Level object so that pLvl->aSeg[0] is the oldest
5584 ** segment in the data structure. */
5585 for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){
5586 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
5587 pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg];
5588 iSegOut++;
5591 pNew->nSegment = pLvl->nSeg = nSeg;
5592 }else{
5593 sqlite3_free(pNew);
5594 pNew = 0;
5598 return pNew;
5601 int sqlite3Fts5IndexOptimize(Fts5Index *p){
5602 Fts5Structure *pStruct;
5603 Fts5Structure *pNew = 0;
5605 assert( p->rc==SQLITE_OK );
5606 fts5IndexFlush(p);
5607 assert( p->nContentlessDelete==0 );
5608 pStruct = fts5StructureRead(p);
5609 fts5StructureInvalidate(p);
5611 if( pStruct ){
5612 pNew = fts5IndexOptimizeStruct(p, pStruct);
5614 fts5StructureRelease(pStruct);
5616 assert( pNew==0 || pNew->nSegment>0 );
5617 if( pNew ){
5618 int iLvl;
5619 for(iLvl=0; pNew->aLevel[iLvl].nSeg==0; iLvl++){}
5620 while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){
5621 int nRem = FTS5_OPT_WORK_UNIT;
5622 fts5IndexMergeLevel(p, &pNew, iLvl, &nRem);
5625 fts5StructureWrite(p, pNew);
5626 fts5StructureRelease(pNew);
5629 return fts5IndexReturn(p);
5633 ** This is called to implement the special "VALUES('merge', $nMerge)"
5634 ** INSERT command.
5636 int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){
5637 Fts5Structure *pStruct = 0;
5639 fts5IndexFlush(p);
5640 pStruct = fts5StructureRead(p);
5641 if( pStruct ){
5642 int nMin = p->pConfig->nUsermerge;
5643 fts5StructureInvalidate(p);
5644 if( nMerge<0 ){
5645 Fts5Structure *pNew = fts5IndexOptimizeStruct(p, pStruct);
5646 fts5StructureRelease(pStruct);
5647 pStruct = pNew;
5648 nMin = 1;
5649 nMerge = nMerge*-1;
5651 if( pStruct && pStruct->nLevel ){
5652 if( fts5IndexMerge(p, &pStruct, nMerge, nMin) ){
5653 fts5StructureWrite(p, pStruct);
5656 fts5StructureRelease(pStruct);
5658 return fts5IndexReturn(p);
5661 static void fts5AppendRowid(
5662 Fts5Index *p,
5663 u64 iDelta,
5664 Fts5Iter *pUnused,
5665 Fts5Buffer *pBuf
5667 UNUSED_PARAM(pUnused);
5668 fts5BufferAppendVarint(&p->rc, pBuf, iDelta);
5671 static void fts5AppendPoslist(
5672 Fts5Index *p,
5673 u64 iDelta,
5674 Fts5Iter *pMulti,
5675 Fts5Buffer *pBuf
5677 int nData = pMulti->base.nData;
5678 int nByte = nData + 9 + 9 + FTS5_DATA_ZERO_PADDING;
5679 assert( nData>0 );
5680 if( p->rc==SQLITE_OK && 0==fts5BufferGrow(&p->rc, pBuf, nByte) ){
5681 fts5BufferSafeAppendVarint(pBuf, iDelta);
5682 fts5BufferSafeAppendVarint(pBuf, nData*2);
5683 fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData);
5684 memset(&pBuf->p[pBuf->n], 0, FTS5_DATA_ZERO_PADDING);
5689 static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
5690 u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist;
5692 assert( pIter->aPoslist || (p==0 && pIter->aPoslist==0) );
5693 if( p>=pIter->aEof ){
5694 pIter->aPoslist = 0;
5695 }else{
5696 i64 iDelta;
5698 p += fts5GetVarint(p, (u64*)&iDelta);
5699 pIter->iRowid += iDelta;
5701 /* Read position list size */
5702 if( p[0] & 0x80 ){
5703 int nPos;
5704 pIter->nSize = fts5GetVarint32(p, nPos);
5705 pIter->nPoslist = (nPos>>1);
5706 }else{
5707 pIter->nPoslist = ((int)(p[0])) >> 1;
5708 pIter->nSize = 1;
5711 pIter->aPoslist = p;
5712 if( &pIter->aPoslist[pIter->nPoslist]>pIter->aEof ){
5713 pIter->aPoslist = 0;
5718 static void fts5DoclistIterInit(
5719 Fts5Buffer *pBuf,
5720 Fts5DoclistIter *pIter
5722 memset(pIter, 0, sizeof(*pIter));
5723 if( pBuf->n>0 ){
5724 pIter->aPoslist = pBuf->p;
5725 pIter->aEof = &pBuf->p[pBuf->n];
5726 fts5DoclistIterNext(pIter);
5730 #if 0
5732 ** Append a doclist to buffer pBuf.
5734 ** This function assumes that space within the buffer has already been
5735 ** allocated.
5737 static void fts5MergeAppendDocid(
5738 Fts5Buffer *pBuf, /* Buffer to write to */
5739 i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */
5740 i64 iRowid /* Rowid to append */
5742 assert( pBuf->n!=0 || (*piLastRowid)==0 );
5743 fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid);
5744 *piLastRowid = iRowid;
5746 #endif
5748 #define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) { \
5749 assert( (pBuf)->n!=0 || (iLastRowid)==0 ); \
5750 fts5BufferSafeAppendVarint((pBuf), (u64)(iRowid) - (u64)(iLastRowid)); \
5751 (iLastRowid) = (iRowid); \
5755 ** Swap the contents of buffer *p1 with that of *p2.
5757 static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){
5758 Fts5Buffer tmp = *p1;
5759 *p1 = *p2;
5760 *p2 = tmp;
5763 static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){
5764 int i = *piOff;
5765 if( i>=pBuf->n ){
5766 *piOff = -1;
5767 }else{
5768 u64 iVal;
5769 *piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal);
5770 *piRowid += iVal;
5775 ** This is the equivalent of fts5MergePrefixLists() for detail=none mode.
5776 ** In this case the buffers consist of a delta-encoded list of rowids only.
5778 static void fts5MergeRowidLists(
5779 Fts5Index *p, /* FTS5 backend object */
5780 Fts5Buffer *p1, /* First list to merge */
5781 int nBuf, /* Number of entries in apBuf[] */
5782 Fts5Buffer *aBuf /* Array of other lists to merge into p1 */
5784 int i1 = 0;
5785 int i2 = 0;
5786 i64 iRowid1 = 0;
5787 i64 iRowid2 = 0;
5788 i64 iOut = 0;
5789 Fts5Buffer *p2 = &aBuf[0];
5790 Fts5Buffer out;
5792 (void)nBuf;
5793 memset(&out, 0, sizeof(out));
5794 assert( nBuf==1 );
5795 sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n);
5796 if( p->rc ) return;
5798 fts5NextRowid(p1, &i1, &iRowid1);
5799 fts5NextRowid(p2, &i2, &iRowid2);
5800 while( i1>=0 || i2>=0 ){
5801 if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){
5802 assert( iOut==0 || iRowid1>iOut );
5803 fts5BufferSafeAppendVarint(&out, iRowid1 - iOut);
5804 iOut = iRowid1;
5805 fts5NextRowid(p1, &i1, &iRowid1);
5806 }else{
5807 assert( iOut==0 || iRowid2>iOut );
5808 fts5BufferSafeAppendVarint(&out, iRowid2 - iOut);
5809 iOut = iRowid2;
5810 if( i1>=0 && iRowid1==iRowid2 ){
5811 fts5NextRowid(p1, &i1, &iRowid1);
5813 fts5NextRowid(p2, &i2, &iRowid2);
5817 fts5BufferSwap(&out, p1);
5818 fts5BufferFree(&out);
5821 typedef struct PrefixMerger PrefixMerger;
5822 struct PrefixMerger {
5823 Fts5DoclistIter iter; /* Doclist iterator */
5824 i64 iPos; /* For iterating through a position list */
5825 int iOff;
5826 u8 *aPos;
5827 PrefixMerger *pNext; /* Next in docid/poslist order */
5830 static void fts5PrefixMergerInsertByRowid(
5831 PrefixMerger **ppHead,
5832 PrefixMerger *p
5834 if( p->iter.aPoslist ){
5835 PrefixMerger **pp = ppHead;
5836 while( *pp && p->iter.iRowid>(*pp)->iter.iRowid ){
5837 pp = &(*pp)->pNext;
5839 p->pNext = *pp;
5840 *pp = p;
5844 static void fts5PrefixMergerInsertByPosition(
5845 PrefixMerger **ppHead,
5846 PrefixMerger *p
5848 if( p->iPos>=0 ){
5849 PrefixMerger **pp = ppHead;
5850 while( *pp && p->iPos>(*pp)->iPos ){
5851 pp = &(*pp)->pNext;
5853 p->pNext = *pp;
5854 *pp = p;
5860 ** Array aBuf[] contains nBuf doclists. These are all merged in with the
5861 ** doclist in buffer p1.
5863 static void fts5MergePrefixLists(
5864 Fts5Index *p, /* FTS5 backend object */
5865 Fts5Buffer *p1, /* First list to merge */
5866 int nBuf, /* Number of buffers in array aBuf[] */
5867 Fts5Buffer *aBuf /* Other lists to merge in */
5869 #define fts5PrefixMergerNextPosition(p) \
5870 sqlite3Fts5PoslistNext64((p)->aPos,(p)->iter.nPoslist,&(p)->iOff,&(p)->iPos)
5871 #define FTS5_MERGE_NLIST 16
5872 PrefixMerger aMerger[FTS5_MERGE_NLIST];
5873 PrefixMerger *pHead = 0;
5874 int i;
5875 int nOut = 0;
5876 Fts5Buffer out = {0, 0, 0};
5877 Fts5Buffer tmp = {0, 0, 0};
5878 i64 iLastRowid = 0;
5880 /* Initialize a doclist-iterator for each input buffer. Arrange them in
5881 ** a linked-list starting at pHead in ascending order of rowid. Avoid
5882 ** linking any iterators already at EOF into the linked list at all. */
5883 assert( nBuf+1<=(int)(sizeof(aMerger)/sizeof(aMerger[0])) );
5884 memset(aMerger, 0, sizeof(PrefixMerger)*(nBuf+1));
5885 pHead = &aMerger[nBuf];
5886 fts5DoclistIterInit(p1, &pHead->iter);
5887 for(i=0; i<nBuf; i++){
5888 fts5DoclistIterInit(&aBuf[i], &aMerger[i].iter);
5889 fts5PrefixMergerInsertByRowid(&pHead, &aMerger[i]);
5890 nOut += aBuf[i].n;
5892 if( nOut==0 ) return;
5893 nOut += p1->n + 9 + 10*nBuf;
5895 /* The maximum size of the output is equal to the sum of the
5896 ** input sizes + 1 varint (9 bytes). The extra varint is because if the
5897 ** first rowid in one input is a large negative number, and the first in
5898 ** the other a non-negative number, the delta for the non-negative
5899 ** number will be larger on disk than the literal integer value
5900 ** was.
5902 ** Or, if the input position-lists are corrupt, then the output might
5903 ** include up to (nBuf+1) extra 10-byte positions created by interpreting -1
5904 ** (the value PoslistNext64() uses for EOF) as a position and appending
5905 ** it to the output. This can happen at most once for each input
5906 ** position-list, hence (nBuf+1) 10 byte paddings. */
5907 if( sqlite3Fts5BufferSize(&p->rc, &out, nOut) ) return;
5909 while( pHead ){
5910 fts5MergeAppendDocid(&out, iLastRowid, pHead->iter.iRowid);
5912 if( pHead->pNext && iLastRowid==pHead->pNext->iter.iRowid ){
5913 /* Merge data from two or more poslists */
5914 i64 iPrev = 0;
5915 int nTmp = FTS5_DATA_ZERO_PADDING;
5916 int nMerge = 0;
5917 PrefixMerger *pSave = pHead;
5918 PrefixMerger *pThis = 0;
5919 int nTail = 0;
5921 pHead = 0;
5922 while( pSave && pSave->iter.iRowid==iLastRowid ){
5923 PrefixMerger *pNext = pSave->pNext;
5924 pSave->iOff = 0;
5925 pSave->iPos = 0;
5926 pSave->aPos = &pSave->iter.aPoslist[pSave->iter.nSize];
5927 fts5PrefixMergerNextPosition(pSave);
5928 nTmp += pSave->iter.nPoslist + 10;
5929 nMerge++;
5930 fts5PrefixMergerInsertByPosition(&pHead, pSave);
5931 pSave = pNext;
5934 if( pHead==0 || pHead->pNext==0 ){
5935 p->rc = FTS5_CORRUPT;
5936 break;
5939 /* See the earlier comment in this function for an explanation of why
5940 ** corrupt input position lists might cause the output to consume
5941 ** at most nMerge*10 bytes of unexpected space. */
5942 if( sqlite3Fts5BufferSize(&p->rc, &tmp, nTmp+nMerge*10) ){
5943 break;
5945 fts5BufferZero(&tmp);
5947 pThis = pHead;
5948 pHead = pThis->pNext;
5949 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos);
5950 fts5PrefixMergerNextPosition(pThis);
5951 fts5PrefixMergerInsertByPosition(&pHead, pThis);
5953 while( pHead->pNext ){
5954 pThis = pHead;
5955 if( pThis->iPos!=iPrev ){
5956 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos);
5958 fts5PrefixMergerNextPosition(pThis);
5959 pHead = pThis->pNext;
5960 fts5PrefixMergerInsertByPosition(&pHead, pThis);
5963 if( pHead->iPos!=iPrev ){
5964 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pHead->iPos);
5966 nTail = pHead->iter.nPoslist - pHead->iOff;
5968 /* WRITEPOSLISTSIZE */
5969 assert_nc( tmp.n+nTail<=nTmp );
5970 assert( tmp.n+nTail<=nTmp+nMerge*10 );
5971 if( tmp.n+nTail>nTmp-FTS5_DATA_ZERO_PADDING ){
5972 if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
5973 break;
5975 fts5BufferSafeAppendVarint(&out, (tmp.n+nTail) * 2);
5976 fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n);
5977 if( nTail>0 ){
5978 fts5BufferSafeAppendBlob(&out, &pHead->aPos[pHead->iOff], nTail);
5981 pHead = pSave;
5982 for(i=0; i<nBuf+1; i++){
5983 PrefixMerger *pX = &aMerger[i];
5984 if( pX->iter.aPoslist && pX->iter.iRowid==iLastRowid ){
5985 fts5DoclistIterNext(&pX->iter);
5986 fts5PrefixMergerInsertByRowid(&pHead, pX);
5990 }else{
5991 /* Copy poslist from pHead to output */
5992 PrefixMerger *pThis = pHead;
5993 Fts5DoclistIter *pI = &pThis->iter;
5994 fts5BufferSafeAppendBlob(&out, pI->aPoslist, pI->nPoslist+pI->nSize);
5995 fts5DoclistIterNext(pI);
5996 pHead = pThis->pNext;
5997 fts5PrefixMergerInsertByRowid(&pHead, pThis);
6001 fts5BufferFree(p1);
6002 fts5BufferFree(&tmp);
6003 memset(&out.p[out.n], 0, FTS5_DATA_ZERO_PADDING);
6004 *p1 = out;
6007 static void fts5SetupPrefixIter(
6008 Fts5Index *p, /* Index to read from */
6009 int bDesc, /* True for "ORDER BY rowid DESC" */
6010 int iIdx, /* Index to scan for data */
6011 u8 *pToken, /* Buffer containing prefix to match */
6012 int nToken, /* Size of buffer pToken in bytes */
6013 Fts5Colset *pColset, /* Restrict matches to these columns */
6014 Fts5Iter **ppIter /* OUT: New iterator */
6016 Fts5Structure *pStruct;
6017 Fts5Buffer *aBuf;
6018 int nBuf = 32;
6019 int nMerge = 1;
6021 void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*);
6022 void (*xAppend)(Fts5Index*, u64, Fts5Iter*, Fts5Buffer*);
6023 if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
6024 xMerge = fts5MergeRowidLists;
6025 xAppend = fts5AppendRowid;
6026 }else{
6027 nMerge = FTS5_MERGE_NLIST-1;
6028 nBuf = nMerge*8; /* Sufficient to merge (16^8)==(2^32) lists */
6029 xMerge = fts5MergePrefixLists;
6030 xAppend = fts5AppendPoslist;
6033 aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
6034 pStruct = fts5StructureRead(p);
6036 if( aBuf && pStruct ){
6037 const int flags = FTS5INDEX_QUERY_SCAN
6038 | FTS5INDEX_QUERY_SKIPEMPTY
6039 | FTS5INDEX_QUERY_NOOUTPUT;
6040 int i;
6041 i64 iLastRowid = 0;
6042 Fts5Iter *p1 = 0; /* Iterator used to gather data from index */
6043 Fts5Data *pData;
6044 Fts5Buffer doclist;
6045 int bNewTerm = 1;
6047 memset(&doclist, 0, sizeof(doclist));
6048 if( iIdx!=0 ){
6049 int dummy = 0;
6050 const int f2 = FTS5INDEX_QUERY_SKIPEMPTY|FTS5INDEX_QUERY_NOOUTPUT;
6051 pToken[0] = FTS5_MAIN_PREFIX;
6052 fts5MultiIterNew(p, pStruct, f2, pColset, pToken, nToken, -1, 0, &p1);
6053 fts5IterSetOutputCb(&p->rc, p1);
6054 for(;
6055 fts5MultiIterEof(p, p1)==0;
6056 fts5MultiIterNext2(p, p1, &dummy)
6058 Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
6059 p1->xSetOutputs(p1, pSeg);
6060 if( p1->base.nData ){
6061 xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist);
6062 iLastRowid = p1->base.iRowid;
6065 fts5MultiIterFree(p1);
6068 pToken[0] = FTS5_MAIN_PREFIX + iIdx;
6069 fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1);
6070 fts5IterSetOutputCb(&p->rc, p1);
6071 for( /* no-op */ ;
6072 fts5MultiIterEof(p, p1)==0;
6073 fts5MultiIterNext2(p, p1, &bNewTerm)
6075 Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
6076 int nTerm = pSeg->term.n;
6077 const u8 *pTerm = pSeg->term.p;
6078 p1->xSetOutputs(p1, pSeg);
6080 assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 );
6081 if( bNewTerm ){
6082 if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break;
6085 if( p1->base.nData==0 ) continue;
6087 if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){
6088 for(i=0; p->rc==SQLITE_OK && doclist.n; i++){
6089 int i1 = i*nMerge;
6090 int iStore;
6091 assert( i1+nMerge<=nBuf );
6092 for(iStore=i1; iStore<i1+nMerge; iStore++){
6093 if( aBuf[iStore].n==0 ){
6094 fts5BufferSwap(&doclist, &aBuf[iStore]);
6095 fts5BufferZero(&doclist);
6096 break;
6099 if( iStore==i1+nMerge ){
6100 xMerge(p, &doclist, nMerge, &aBuf[i1]);
6101 for(iStore=i1; iStore<i1+nMerge; iStore++){
6102 fts5BufferZero(&aBuf[iStore]);
6106 iLastRowid = 0;
6109 xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist);
6110 iLastRowid = p1->base.iRowid;
6113 assert( (nBuf%nMerge)==0 );
6114 for(i=0; i<nBuf; i+=nMerge){
6115 int iFree;
6116 if( p->rc==SQLITE_OK ){
6117 xMerge(p, &doclist, nMerge, &aBuf[i]);
6119 for(iFree=i; iFree<i+nMerge; iFree++){
6120 fts5BufferFree(&aBuf[iFree]);
6123 fts5MultiIterFree(p1);
6125 pData = fts5IdxMalloc(p, sizeof(Fts5Data)+doclist.n+FTS5_DATA_ZERO_PADDING);
6126 if( pData ){
6127 pData->p = (u8*)&pData[1];
6128 pData->nn = pData->szLeaf = doclist.n;
6129 if( doclist.n ) memcpy(pData->p, doclist.p, doclist.n);
6130 fts5MultiIterNew2(p, pData, bDesc, ppIter);
6132 fts5BufferFree(&doclist);
6135 fts5StructureRelease(pStruct);
6136 sqlite3_free(aBuf);
6141 ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
6142 ** to the document with rowid iRowid.
6144 int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){
6145 assert( p->rc==SQLITE_OK );
6147 /* Allocate the hash table if it has not already been allocated */
6148 if( p->pHash==0 ){
6149 p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData);
6152 /* Flush the hash table to disk if required */
6153 if( iRowid<p->iWriteRowid
6154 || (iRowid==p->iWriteRowid && p->bDelete==0)
6155 || (p->nPendingData > p->pConfig->nHashSize)
6157 fts5IndexFlush(p);
6160 p->iWriteRowid = iRowid;
6161 p->bDelete = bDelete;
6162 if( bDelete==0 ){
6163 p->nPendingRow++;
6165 return fts5IndexReturn(p);
6169 ** Commit data to disk.
6171 int sqlite3Fts5IndexSync(Fts5Index *p){
6172 assert( p->rc==SQLITE_OK );
6173 fts5IndexFlush(p);
6174 sqlite3Fts5IndexCloseReader(p);
6175 return fts5IndexReturn(p);
6179 ** Discard any data stored in the in-memory hash tables. Do not write it
6180 ** to the database. Additionally, assume that the contents of the %_data
6181 ** table may have changed on disk. So any in-memory caches of %_data
6182 ** records must be invalidated.
6184 int sqlite3Fts5IndexRollback(Fts5Index *p){
6185 sqlite3Fts5IndexCloseReader(p);
6186 fts5IndexDiscardData(p);
6187 fts5StructureInvalidate(p);
6188 /* assert( p->rc==SQLITE_OK ); */
6189 return SQLITE_OK;
6193 ** The %_data table is completely empty when this function is called. This
6194 ** function populates it with the initial structure objects for each index,
6195 ** and the initial version of the "averages" record (a zero-byte blob).
6197 int sqlite3Fts5IndexReinit(Fts5Index *p){
6198 Fts5Structure s;
6199 fts5StructureInvalidate(p);
6200 fts5IndexDiscardData(p);
6201 memset(&s, 0, sizeof(Fts5Structure));
6202 if( p->pConfig->bContentlessDelete ){
6203 s.nOriginCntr = 1;
6205 fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0);
6206 fts5StructureWrite(p, &s);
6207 return fts5IndexReturn(p);
6211 ** Open a new Fts5Index handle. If the bCreate argument is true, create
6212 ** and initialize the underlying %_data table.
6214 ** If successful, set *pp to point to the new object and return SQLITE_OK.
6215 ** Otherwise, set *pp to NULL and return an SQLite error code.
6217 int sqlite3Fts5IndexOpen(
6218 Fts5Config *pConfig,
6219 int bCreate,
6220 Fts5Index **pp,
6221 char **pzErr
6223 int rc = SQLITE_OK;
6224 Fts5Index *p; /* New object */
6226 *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index));
6227 if( rc==SQLITE_OK ){
6228 p->pConfig = pConfig;
6229 p->nWorkUnit = FTS5_WORK_UNIT;
6230 p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName);
6231 if( p->zDataTbl && bCreate ){
6232 rc = sqlite3Fts5CreateTable(
6233 pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr
6235 if( rc==SQLITE_OK ){
6236 rc = sqlite3Fts5CreateTable(pConfig, "idx",
6237 "segid, term, pgno, PRIMARY KEY(segid, term)",
6238 1, pzErr
6241 if( rc==SQLITE_OK ){
6242 rc = sqlite3Fts5IndexReinit(p);
6247 assert( rc!=SQLITE_OK || p->rc==SQLITE_OK );
6248 if( rc ){
6249 sqlite3Fts5IndexClose(p);
6250 *pp = 0;
6252 return rc;
6256 ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen().
6258 int sqlite3Fts5IndexClose(Fts5Index *p){
6259 int rc = SQLITE_OK;
6260 if( p ){
6261 assert( p->pReader==0 );
6262 fts5StructureInvalidate(p);
6263 sqlite3_finalize(p->pWriter);
6264 sqlite3_finalize(p->pDeleter);
6265 sqlite3_finalize(p->pIdxWriter);
6266 sqlite3_finalize(p->pIdxDeleter);
6267 sqlite3_finalize(p->pIdxSelect);
6268 sqlite3_finalize(p->pDataVersion);
6269 sqlite3_finalize(p->pDeleteFromIdx);
6270 sqlite3Fts5HashFree(p->pHash);
6271 sqlite3_free(p->zDataTbl);
6272 sqlite3_free(p);
6274 return rc;
6278 ** Argument p points to a buffer containing utf-8 text that is n bytes in
6279 ** size. Return the number of bytes in the nChar character prefix of the
6280 ** buffer, or 0 if there are less than nChar characters in total.
6282 int sqlite3Fts5IndexCharlenToBytelen(
6283 const char *p,
6284 int nByte,
6285 int nChar
6287 int n = 0;
6288 int i;
6289 for(i=0; i<nChar; i++){
6290 if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */
6291 if( (unsigned char)p[n++]>=0xc0 ){
6292 if( n>=nByte ) return 0;
6293 while( (p[n] & 0xc0)==0x80 ){
6294 n++;
6295 if( n>=nByte ){
6296 if( i+1==nChar ) break;
6297 return 0;
6302 return n;
6306 ** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of
6307 ** unicode characters in the string.
6309 static int fts5IndexCharlen(const char *pIn, int nIn){
6310 int nChar = 0;
6311 int i = 0;
6312 while( i<nIn ){
6313 if( (unsigned char)pIn[i++]>=0xc0 ){
6314 while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++;
6316 nChar++;
6318 return nChar;
6322 ** Insert or remove data to or from the index. Each time a document is
6323 ** added to or removed from the index, this function is called one or more
6324 ** times.
6326 ** For an insert, it must be called once for each token in the new document.
6327 ** If the operation is a delete, it must be called (at least) once for each
6328 ** unique token in the document with an iCol value less than zero. The iPos
6329 ** argument is ignored for a delete.
6331 int sqlite3Fts5IndexWrite(
6332 Fts5Index *p, /* Index to write to */
6333 int iCol, /* Column token appears in (-ve -> delete) */
6334 int iPos, /* Position of token within column */
6335 const char *pToken, int nToken /* Token to add or remove to or from index */
6337 int i; /* Used to iterate through indexes */
6338 int rc = SQLITE_OK; /* Return code */
6339 Fts5Config *pConfig = p->pConfig;
6341 assert( p->rc==SQLITE_OK );
6342 assert( (iCol<0)==p->bDelete );
6344 /* Add the entry to the main terms index. */
6345 rc = sqlite3Fts5HashWrite(
6346 p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken
6349 for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){
6350 const int nChar = pConfig->aPrefix[i];
6351 int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
6352 if( nByte ){
6353 rc = sqlite3Fts5HashWrite(p->pHash,
6354 p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken,
6355 nByte
6360 return rc;
6364 ** Open a new iterator to iterate though all rowid that match the
6365 ** specified token or token prefix.
6367 int sqlite3Fts5IndexQuery(
6368 Fts5Index *p, /* FTS index to query */
6369 const char *pToken, int nToken, /* Token (or prefix) to query for */
6370 int flags, /* Mask of FTS5INDEX_QUERY_X flags */
6371 Fts5Colset *pColset, /* Match these columns only */
6372 Fts5IndexIter **ppIter /* OUT: New iterator object */
6374 Fts5Config *pConfig = p->pConfig;
6375 Fts5Iter *pRet = 0;
6376 Fts5Buffer buf = {0, 0, 0};
6378 /* If the QUERY_SCAN flag is set, all other flags must be clear. */
6379 assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN );
6381 if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){
6382 int iIdx = 0; /* Index to search */
6383 int iPrefixIdx = 0; /* +1 prefix index */
6384 if( nToken>0 ) memcpy(&buf.p[1], pToken, nToken);
6386 /* Figure out which index to search and set iIdx accordingly. If this
6387 ** is a prefix query for which there is no prefix index, set iIdx to
6388 ** greater than pConfig->nPrefix to indicate that the query will be
6389 ** satisfied by scanning multiple terms in the main index.
6391 ** If the QUERY_TEST_NOIDX flag was specified, then this must be a
6392 ** prefix-query. Instead of using a prefix-index (if one exists),
6393 ** evaluate the prefix query using the main FTS index. This is used
6394 ** for internal sanity checking by the integrity-check in debug
6395 ** mode only. */
6396 #ifdef SQLITE_DEBUG
6397 if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){
6398 assert( flags & FTS5INDEX_QUERY_PREFIX );
6399 iIdx = 1+pConfig->nPrefix;
6400 }else
6401 #endif
6402 if( flags & FTS5INDEX_QUERY_PREFIX ){
6403 int nChar = fts5IndexCharlen(pToken, nToken);
6404 for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){
6405 int nIdxChar = pConfig->aPrefix[iIdx-1];
6406 if( nIdxChar==nChar ) break;
6407 if( nIdxChar==nChar+1 ) iPrefixIdx = iIdx;
6411 if( iIdx<=pConfig->nPrefix ){
6412 /* Straight index lookup */
6413 Fts5Structure *pStruct = fts5StructureRead(p);
6414 buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx);
6415 if( pStruct ){
6416 fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY,
6417 pColset, buf.p, nToken+1, -1, 0, &pRet
6419 fts5StructureRelease(pStruct);
6421 }else{
6422 /* Scan multiple terms in the main index */
6423 int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0;
6424 fts5SetupPrefixIter(p, bDesc, iPrefixIdx, buf.p, nToken+1, pColset,&pRet);
6425 if( pRet==0 ){
6426 assert( p->rc!=SQLITE_OK );
6427 }else{
6428 assert( pRet->pColset==0 );
6429 fts5IterSetOutputCb(&p->rc, pRet);
6430 if( p->rc==SQLITE_OK ){
6431 Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst];
6432 if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg);
6437 if( p->rc ){
6438 sqlite3Fts5IterClose((Fts5IndexIter*)pRet);
6439 pRet = 0;
6440 sqlite3Fts5IndexCloseReader(p);
6443 *ppIter = (Fts5IndexIter*)pRet;
6444 sqlite3Fts5BufferFree(&buf);
6446 return fts5IndexReturn(p);
6450 ** Return true if the iterator passed as the only argument is at EOF.
6453 ** Move to the next matching rowid.
6455 int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){
6456 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
6457 assert( pIter->pIndex->rc==SQLITE_OK );
6458 fts5MultiIterNext(pIter->pIndex, pIter, 0, 0);
6459 return fts5IndexReturn(pIter->pIndex);
6463 ** Move to the next matching term/rowid. Used by the fts5vocab module.
6465 int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){
6466 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
6467 Fts5Index *p = pIter->pIndex;
6469 assert( pIter->pIndex->rc==SQLITE_OK );
6471 fts5MultiIterNext(p, pIter, 0, 0);
6472 if( p->rc==SQLITE_OK ){
6473 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
6474 if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){
6475 fts5DataRelease(pSeg->pLeaf);
6476 pSeg->pLeaf = 0;
6477 pIter->base.bEof = 1;
6481 return fts5IndexReturn(pIter->pIndex);
6485 ** Move to the next matching rowid that occurs at or after iMatch. The
6486 ** definition of "at or after" depends on whether this iterator iterates
6487 ** in ascending or descending rowid order.
6489 int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){
6490 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
6491 fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch);
6492 return fts5IndexReturn(pIter->pIndex);
6496 ** Return the current term.
6498 const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){
6499 int n;
6500 const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n);
6501 assert_nc( z || n<=1 );
6502 *pn = n-1;
6503 return (z ? &z[1] : 0);
6507 ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
6509 void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){
6510 if( pIndexIter ){
6511 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
6512 Fts5Index *pIndex = pIter->pIndex;
6513 fts5MultiIterFree(pIter);
6514 sqlite3Fts5IndexCloseReader(pIndex);
6519 ** Read and decode the "averages" record from the database.
6521 ** Parameter anSize must point to an array of size nCol, where nCol is
6522 ** the number of user defined columns in the FTS table.
6524 int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){
6525 int nCol = p->pConfig->nCol;
6526 Fts5Data *pData;
6528 *pnRow = 0;
6529 memset(anSize, 0, sizeof(i64) * nCol);
6530 pData = fts5DataRead(p, FTS5_AVERAGES_ROWID);
6531 if( p->rc==SQLITE_OK && pData->nn ){
6532 int i = 0;
6533 int iCol;
6534 i += fts5GetVarint(&pData->p[i], (u64*)pnRow);
6535 for(iCol=0; i<pData->nn && iCol<nCol; iCol++){
6536 i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]);
6540 fts5DataRelease(pData);
6541 return fts5IndexReturn(p);
6545 ** Replace the current "averages" record with the contents of the buffer
6546 ** supplied as the second argument.
6548 int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){
6549 assert( p->rc==SQLITE_OK );
6550 fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData);
6551 return fts5IndexReturn(p);
6555 ** Return the total number of blocks this module has read from the %_data
6556 ** table since it was created.
6558 int sqlite3Fts5IndexReads(Fts5Index *p){
6559 return p->nRead;
6563 ** Set the 32-bit cookie value stored at the start of all structure
6564 ** records to the value passed as the second argument.
6566 ** Return SQLITE_OK if successful, or an SQLite error code if an error
6567 ** occurs.
6569 int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){
6570 int rc; /* Return code */
6571 Fts5Config *pConfig = p->pConfig; /* Configuration object */
6572 u8 aCookie[4]; /* Binary representation of iNew */
6573 sqlite3_blob *pBlob = 0;
6575 assert( p->rc==SQLITE_OK );
6576 sqlite3Fts5Put32(aCookie, iNew);
6578 rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl,
6579 "block", FTS5_STRUCTURE_ROWID, 1, &pBlob
6581 if( rc==SQLITE_OK ){
6582 sqlite3_blob_write(pBlob, aCookie, 4, 0);
6583 rc = sqlite3_blob_close(pBlob);
6586 return rc;
6589 int sqlite3Fts5IndexLoadConfig(Fts5Index *p){
6590 Fts5Structure *pStruct;
6591 pStruct = fts5StructureRead(p);
6592 fts5StructureRelease(pStruct);
6593 return fts5IndexReturn(p);
6597 ** Retrieve the origin value that will be used for the segment currently
6598 ** being accumulated in the in-memory hash table when it is flushed to
6599 ** disk. If successful, SQLITE_OK is returned and (*piOrigin) set to
6600 ** the queried value. Or, if an error occurs, an error code is returned
6601 ** and the final value of (*piOrigin) is undefined.
6603 int sqlite3Fts5IndexGetOrigin(Fts5Index *p, i64 *piOrigin){
6604 Fts5Structure *pStruct;
6605 pStruct = fts5StructureRead(p);
6606 if( pStruct ){
6607 *piOrigin = pStruct->nOriginCntr;
6608 fts5StructureRelease(pStruct);
6610 return fts5IndexReturn(p);
6614 ** Buffer pPg contains a page of a tombstone hash table - one of nPg pages
6615 ** associated with the same segment. This function adds rowid iRowid to
6616 ** the hash table. The caller is required to guarantee that there is at
6617 ** least one free slot on the page.
6619 ** If parameter bForce is false and the hash table is deemed to be full
6620 ** (more than half of the slots are occupied), then non-zero is returned
6621 ** and iRowid not inserted. Or, if bForce is true or if the hash table page
6622 ** is not full, iRowid is inserted and zero returned.
6624 static int fts5IndexTombstoneAddToPage(
6625 Fts5Data *pPg,
6626 int bForce,
6627 int nPg,
6628 u64 iRowid
6630 const int szKey = TOMBSTONE_KEYSIZE(pPg);
6631 const int nSlot = TOMBSTONE_NSLOT(pPg);
6632 const int nElem = fts5GetU32(&pPg->p[4]);
6633 int iSlot = (iRowid / nPg) % nSlot;
6634 int nCollide = nSlot;
6636 if( szKey==4 && iRowid>0xFFFFFFFF ) return 2;
6637 if( iRowid==0 ){
6638 pPg->p[1] = 0x01;
6639 return 0;
6642 if( bForce==0 && nElem>=(nSlot/2) ){
6643 return 1;
6646 fts5PutU32(&pPg->p[4], nElem+1);
6647 if( szKey==4 ){
6648 u32 *aSlot = (u32*)&pPg->p[8];
6649 while( aSlot[iSlot] ){
6650 iSlot = (iSlot + 1) % nSlot;
6651 if( nCollide--==0 ) return 0;
6653 fts5PutU32((u8*)&aSlot[iSlot], (u32)iRowid);
6654 }else{
6655 u64 *aSlot = (u64*)&pPg->p[8];
6656 while( aSlot[iSlot] ){
6657 iSlot = (iSlot + 1) % nSlot;
6658 if( nCollide--==0 ) return 0;
6660 fts5PutU64((u8*)&aSlot[iSlot], iRowid);
6663 return 0;
6667 ** This function attempts to build a new hash containing all the keys
6668 ** currently in the tombstone hash table for segment pSeg. The new
6669 ** hash will be stored in the nOut buffers passed in array apOut[].
6670 ** All pages of the new hash use key-size szKey (4 or 8).
6672 ** Return 0 if the hash is successfully rebuilt into the nOut pages.
6673 ** Or non-zero if it is not (because one page became overfull). In this
6674 ** case the caller should retry with a larger nOut parameter.
6676 ** Parameter pData1 is page iPg1 of the hash table being rebuilt.
6678 static int fts5IndexTombstoneRehash(
6679 Fts5Index *p,
6680 Fts5StructureSegment *pSeg, /* Segment to rebuild hash of */
6681 Fts5Data *pData1, /* One page of current hash - or NULL */
6682 int iPg1, /* Which page of the current hash is pData1 */
6683 int szKey, /* 4 or 8, the keysize */
6684 int nOut, /* Number of output pages */
6685 Fts5Data **apOut /* Array of output hash pages */
6687 int ii;
6688 int res = 0;
6690 /* Initialize the headers of all the output pages */
6691 for(ii=0; ii<nOut; ii++){
6692 apOut[ii]->p[0] = szKey;
6693 fts5PutU32(&apOut[ii]->p[4], 0);
6696 /* Loop through the current pages of the hash table. */
6697 for(ii=0; res==0 && ii<pSeg->nPgTombstone; ii++){
6698 Fts5Data *pData = 0; /* Page ii of the current hash table */
6699 Fts5Data *pFree = 0; /* Free this at the end of the loop */
6701 if( iPg1==ii ){
6702 pData = pData1;
6703 }else{
6704 pFree = pData = fts5DataRead(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid, ii));
6707 if( pData ){
6708 int szKeyIn = TOMBSTONE_KEYSIZE(pData);
6709 int nSlotIn = (pData->nn - 8) / szKeyIn;
6710 int iIn;
6711 for(iIn=0; iIn<nSlotIn; iIn++){
6712 u64 iVal = 0;
6714 /* Read the value from slot iIn of the input page into iVal. */
6715 if( szKeyIn==4 ){
6716 u32 *aSlot = (u32*)&pData->p[8];
6717 if( aSlot[iIn] ) iVal = fts5GetU32((u8*)&aSlot[iIn]);
6718 }else{
6719 u64 *aSlot = (u64*)&pData->p[8];
6720 if( aSlot[iIn] ) iVal = fts5GetU64((u8*)&aSlot[iIn]);
6723 /* If iVal is not 0 at this point, insert it into the new hash table */
6724 if( iVal ){
6725 Fts5Data *pPg = apOut[(iVal % nOut)];
6726 res = fts5IndexTombstoneAddToPage(pPg, 0, nOut, iVal);
6727 if( res ) break;
6731 /* If this is page 0 of the old hash, copy the rowid-0-flag from the
6732 ** old hash to the new. */
6733 if( ii==0 ){
6734 apOut[0]->p[1] = pData->p[1];
6737 fts5DataRelease(pFree);
6740 return res;
6744 ** This is called to rebuild the hash table belonging to segment pSeg.
6745 ** If parameter pData1 is not NULL, then one page of the existing hash table
6746 ** has already been loaded - pData1, which is page iPg1. The key-size for
6747 ** the new hash table is szKey (4 or 8).
6749 ** If successful, the new hash table is not written to disk. Instead,
6750 ** output parameter (*pnOut) is set to the number of pages in the new
6751 ** hash table, and (*papOut) to point to an array of buffers containing
6752 ** the new page data.
6754 ** If an error occurs, an error code is left in the Fts5Index object and
6755 ** both output parameters set to 0 before returning.
6757 static void fts5IndexTombstoneRebuild(
6758 Fts5Index *p,
6759 Fts5StructureSegment *pSeg, /* Segment to rebuild hash of */
6760 Fts5Data *pData1, /* One page of current hash - or NULL */
6761 int iPg1, /* Which page of the current hash is pData1 */
6762 int szKey, /* 4 or 8, the keysize */
6763 int *pnOut, /* OUT: Number of output pages */
6764 Fts5Data ***papOut /* OUT: Output hash pages */
6766 const int MINSLOT = 32;
6767 int nSlotPerPage = MAX(MINSLOT, (p->pConfig->pgsz - 8) / szKey);
6768 int nSlot = 0; /* Number of slots in each output page */
6769 int nOut = 0;
6771 /* Figure out how many output pages (nOut) and how many slots per
6772 ** page (nSlot). There are three possibilities:
6774 ** 1. The hash table does not yet exist. In this case the new hash
6775 ** table will consist of a single page with MINSLOT slots.
6777 ** 2. The hash table exists but is currently a single page. In this
6778 ** case an attempt is made to grow the page to accommodate the new
6779 ** entry. The page is allowed to grow up to nSlotPerPage (see above)
6780 ** slots.
6782 ** 3. The hash table already consists of more than one page, or of
6783 ** a single page already so large that it cannot be grown. In this
6784 ** case the new hash consists of (nPg*2+1) pages of nSlotPerPage
6785 ** slots each, where nPg is the current number of pages in the
6786 ** hash table.
6788 if( pSeg->nPgTombstone==0 ){
6789 /* Case 1. */
6790 nOut = 1;
6791 nSlot = MINSLOT;
6792 }else if( pSeg->nPgTombstone==1 ){
6793 /* Case 2. */
6794 int nElem = (int)fts5GetU32(&pData1->p[4]);
6795 assert( pData1 && iPg1==0 );
6796 nOut = 1;
6797 nSlot = MAX(nElem*4, MINSLOT);
6798 if( nSlot>nSlotPerPage ) nOut = 0;
6800 if( nOut==0 ){
6801 /* Case 3. */
6802 nOut = (pSeg->nPgTombstone * 2 + 1);
6803 nSlot = nSlotPerPage;
6806 /* Allocate the required array and output pages */
6807 while( 1 ){
6808 int res = 0;
6809 int ii = 0;
6810 int szPage = 0;
6811 Fts5Data **apOut = 0;
6813 /* Allocate space for the new hash table */
6814 assert( nSlot>=MINSLOT );
6815 apOut = (Fts5Data**)sqlite3Fts5MallocZero(&p->rc, sizeof(Fts5Data*) * nOut);
6816 szPage = 8 + nSlot*szKey;
6817 for(ii=0; ii<nOut; ii++){
6818 Fts5Data *pNew = (Fts5Data*)sqlite3Fts5MallocZero(&p->rc,
6819 sizeof(Fts5Data)+szPage
6821 if( pNew ){
6822 pNew->nn = szPage;
6823 pNew->p = (u8*)&pNew[1];
6824 apOut[ii] = pNew;
6828 /* Rebuild the hash table. */
6829 if( p->rc==SQLITE_OK ){
6830 res = fts5IndexTombstoneRehash(p, pSeg, pData1, iPg1, szKey, nOut, apOut);
6832 if( res==0 ){
6833 if( p->rc ){
6834 fts5IndexFreeArray(apOut, nOut);
6835 apOut = 0;
6836 nOut = 0;
6838 *pnOut = nOut;
6839 *papOut = apOut;
6840 break;
6843 /* If control flows to here, it was not possible to rebuild the hash
6844 ** table. Free all buffers and then try again with more pages. */
6845 assert( p->rc==SQLITE_OK );
6846 fts5IndexFreeArray(apOut, nOut);
6847 nSlot = nSlotPerPage;
6848 nOut = nOut*2 + 1;
6854 ** Add a tombstone for rowid iRowid to segment pSeg.
6856 static void fts5IndexTombstoneAdd(
6857 Fts5Index *p,
6858 Fts5StructureSegment *pSeg,
6859 u64 iRowid
6861 Fts5Data *pPg = 0;
6862 int iPg = -1;
6863 int szKey = 0;
6864 int nHash = 0;
6865 Fts5Data **apHash = 0;
6867 p->nContentlessDelete++;
6869 if( pSeg->nPgTombstone>0 ){
6870 iPg = iRowid % pSeg->nPgTombstone;
6871 pPg = fts5DataRead(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid,iPg));
6872 if( pPg==0 ){
6873 assert( p->rc!=SQLITE_OK );
6874 return;
6877 if( 0==fts5IndexTombstoneAddToPage(pPg, 0, pSeg->nPgTombstone, iRowid) ){
6878 fts5DataWrite(p, FTS5_TOMBSTONE_ROWID(pSeg->iSegid,iPg), pPg->p, pPg->nn);
6879 fts5DataRelease(pPg);
6880 return;
6884 /* Have to rebuild the hash table. First figure out the key-size (4 or 8). */
6885 szKey = pPg ? TOMBSTONE_KEYSIZE(pPg) : 4;
6886 if( iRowid>0xFFFFFFFF ) szKey = 8;
6888 /* Rebuild the hash table */
6889 fts5IndexTombstoneRebuild(p, pSeg, pPg, iPg, szKey, &nHash, &apHash);
6890 assert( p->rc==SQLITE_OK || (nHash==0 && apHash==0) );
6892 /* If all has succeeded, write the new rowid into one of the new hash
6893 ** table pages, then write them all out to disk. */
6894 if( nHash ){
6895 int ii = 0;
6896 fts5IndexTombstoneAddToPage(apHash[iRowid % nHash], 1, nHash, iRowid);
6897 for(ii=0; ii<nHash; ii++){
6898 i64 iTombstoneRowid = FTS5_TOMBSTONE_ROWID(pSeg->iSegid, ii);
6899 fts5DataWrite(p, iTombstoneRowid, apHash[ii]->p, apHash[ii]->nn);
6901 pSeg->nPgTombstone = nHash;
6902 fts5StructureWrite(p, p->pStruct);
6905 fts5DataRelease(pPg);
6906 fts5IndexFreeArray(apHash, nHash);
6910 ** Add iRowid to the tombstone list of the segment or segments that contain
6911 ** rows from origin iOrigin. Return SQLITE_OK if successful, or an SQLite
6912 ** error code otherwise.
6914 int sqlite3Fts5IndexContentlessDelete(Fts5Index *p, i64 iOrigin, i64 iRowid){
6915 Fts5Structure *pStruct;
6916 pStruct = fts5StructureRead(p);
6917 if( pStruct ){
6918 int bFound = 0; /* True after pSeg->nEntryTombstone incr. */
6919 int iLvl;
6920 for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){
6921 int iSeg;
6922 for(iSeg=pStruct->aLevel[iLvl].nSeg-1; iSeg>=0; iSeg--){
6923 Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
6924 if( pSeg->iOrigin1<=(u64)iOrigin && pSeg->iOrigin2>=(u64)iOrigin ){
6925 if( bFound==0 ){
6926 pSeg->nEntryTombstone++;
6927 bFound = 1;
6929 fts5IndexTombstoneAdd(p, pSeg, iRowid);
6933 fts5StructureRelease(pStruct);
6935 return fts5IndexReturn(p);
6938 /*************************************************************************
6939 **************************************************************************
6940 ** Below this point is the implementation of the integrity-check
6941 ** functionality.
6945 ** Return a simple checksum value based on the arguments.
6947 u64 sqlite3Fts5IndexEntryCksum(
6948 i64 iRowid,
6949 int iCol,
6950 int iPos,
6951 int iIdx,
6952 const char *pTerm,
6953 int nTerm
6955 int i;
6956 u64 ret = iRowid;
6957 ret += (ret<<3) + iCol;
6958 ret += (ret<<3) + iPos;
6959 if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX + iIdx);
6960 for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i];
6961 return ret;
6964 #ifdef SQLITE_DEBUG
6966 ** This function is purely an internal test. It does not contribute to
6967 ** FTS functionality, or even the integrity-check, in any way.
6969 ** Instead, it tests that the same set of pgno/rowid combinations are
6970 ** visited regardless of whether the doclist-index identified by parameters
6971 ** iSegid/iLeaf is iterated in forwards or reverse order.
6973 static void fts5TestDlidxReverse(
6974 Fts5Index *p,
6975 int iSegid, /* Segment id to load from */
6976 int iLeaf /* Load doclist-index for this leaf */
6978 Fts5DlidxIter *pDlidx = 0;
6979 u64 cksum1 = 13;
6980 u64 cksum2 = 13;
6982 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf);
6983 fts5DlidxIterEof(p, pDlidx)==0;
6984 fts5DlidxIterNext(p, pDlidx)
6986 i64 iRowid = fts5DlidxIterRowid(pDlidx);
6987 int pgno = fts5DlidxIterPgno(pDlidx);
6988 assert( pgno>iLeaf );
6989 cksum1 += iRowid + ((i64)pgno<<32);
6991 fts5DlidxIterFree(pDlidx);
6992 pDlidx = 0;
6994 for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf);
6995 fts5DlidxIterEof(p, pDlidx)==0;
6996 fts5DlidxIterPrev(p, pDlidx)
6998 i64 iRowid = fts5DlidxIterRowid(pDlidx);
6999 int pgno = fts5DlidxIterPgno(pDlidx);
7000 assert( fts5DlidxIterPgno(pDlidx)>iLeaf );
7001 cksum2 += iRowid + ((i64)pgno<<32);
7003 fts5DlidxIterFree(pDlidx);
7004 pDlidx = 0;
7006 if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT;
7009 static int fts5QueryCksum(
7010 Fts5Index *p, /* Fts5 index object */
7011 int iIdx,
7012 const char *z, /* Index key to query for */
7013 int n, /* Size of index key in bytes */
7014 int flags, /* Flags for Fts5IndexQuery */
7015 u64 *pCksum /* IN/OUT: Checksum value */
7017 int eDetail = p->pConfig->eDetail;
7018 u64 cksum = *pCksum;
7019 Fts5IndexIter *pIter = 0;
7020 int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter);
7022 while( rc==SQLITE_OK && ALWAYS(pIter!=0) && 0==sqlite3Fts5IterEof(pIter) ){
7023 i64 rowid = pIter->iRowid;
7025 if( eDetail==FTS5_DETAIL_NONE ){
7026 cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n);
7027 }else{
7028 Fts5PoslistReader sReader;
7029 for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader);
7030 sReader.bEof==0;
7031 sqlite3Fts5PoslistReaderNext(&sReader)
7033 int iCol = FTS5_POS2COLUMN(sReader.iPos);
7034 int iOff = FTS5_POS2OFFSET(sReader.iPos);
7035 cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
7038 if( rc==SQLITE_OK ){
7039 rc = sqlite3Fts5IterNext(pIter);
7042 sqlite3Fts5IterClose(pIter);
7044 *pCksum = cksum;
7045 return rc;
7049 ** Check if buffer z[], size n bytes, contains as series of valid utf-8
7050 ** encoded codepoints. If so, return 0. Otherwise, if the buffer does not
7051 ** contain valid utf-8, return non-zero.
7053 static int fts5TestUtf8(const char *z, int n){
7054 int i = 0;
7055 assert_nc( n>0 );
7056 while( i<n ){
7057 if( (z[i] & 0x80)==0x00 ){
7058 i++;
7059 }else
7060 if( (z[i] & 0xE0)==0xC0 ){
7061 if( i+1>=n || (z[i+1] & 0xC0)!=0x80 ) return 1;
7062 i += 2;
7063 }else
7064 if( (z[i] & 0xF0)==0xE0 ){
7065 if( i+2>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1;
7066 i += 3;
7067 }else
7068 if( (z[i] & 0xF8)==0xF0 ){
7069 if( i+3>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1;
7070 if( (z[i+2] & 0xC0)!=0x80 ) return 1;
7071 i += 3;
7072 }else{
7073 return 1;
7077 return 0;
7081 ** This function is also purely an internal test. It does not contribute to
7082 ** FTS functionality, or even the integrity-check, in any way.
7084 static void fts5TestTerm(
7085 Fts5Index *p,
7086 Fts5Buffer *pPrev, /* Previous term */
7087 const char *z, int n, /* Possibly new term to test */
7088 u64 expected,
7089 u64 *pCksum
7091 int rc = p->rc;
7092 if( pPrev->n==0 ){
7093 fts5BufferSet(&rc, pPrev, n, (const u8*)z);
7094 }else
7095 if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){
7096 u64 cksum3 = *pCksum;
7097 const char *zTerm = (const char*)&pPrev->p[1]; /* term sans prefix-byte */
7098 int nTerm = pPrev->n-1; /* Size of zTerm in bytes */
7099 int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX);
7100 int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX);
7101 u64 ck1 = 0;
7102 u64 ck2 = 0;
7104 /* Check that the results returned for ASC and DESC queries are
7105 ** the same. If not, call this corruption. */
7106 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1);
7107 if( rc==SQLITE_OK ){
7108 int f = flags|FTS5INDEX_QUERY_DESC;
7109 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
7111 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
7113 /* If this is a prefix query, check that the results returned if the
7114 ** the index is disabled are the same. In both ASC and DESC order.
7116 ** This check may only be performed if the hash table is empty. This
7117 ** is because the hash table only supports a single scan query at
7118 ** a time, and the multi-iter loop from which this function is called
7119 ** is already performing such a scan.
7121 ** Also only do this if buffer zTerm contains nTerm bytes of valid
7122 ** utf-8. Otherwise, the last part of the buffer contents might contain
7123 ** a non-utf-8 sequence that happens to be a prefix of a valid utf-8
7124 ** character stored in the main fts index, which will cause the
7125 ** test to fail. */
7126 if( p->nPendingData==0 && 0==fts5TestUtf8(zTerm, nTerm) ){
7127 if( iIdx>0 && rc==SQLITE_OK ){
7128 int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
7129 ck2 = 0;
7130 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
7131 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
7133 if( iIdx>0 && rc==SQLITE_OK ){
7134 int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC;
7135 ck2 = 0;
7136 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
7137 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
7141 cksum3 ^= ck1;
7142 fts5BufferSet(&rc, pPrev, n, (const u8*)z);
7144 if( rc==SQLITE_OK && cksum3!=expected ){
7145 rc = FTS5_CORRUPT;
7147 *pCksum = cksum3;
7149 p->rc = rc;
7152 #else
7153 # define fts5TestDlidxReverse(x,y,z)
7154 # define fts5TestTerm(u,v,w,x,y,z)
7155 #endif
7158 ** Check that:
7160 ** 1) All leaves of pSeg between iFirst and iLast (inclusive) exist and
7161 ** contain zero terms.
7162 ** 2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and
7163 ** contain zero rowids.
7165 static void fts5IndexIntegrityCheckEmpty(
7166 Fts5Index *p,
7167 Fts5StructureSegment *pSeg, /* Segment to check internal consistency */
7168 int iFirst,
7169 int iNoRowid,
7170 int iLast
7172 int i;
7174 /* Now check that the iter.nEmpty leaves following the current leaf
7175 ** (a) exist and (b) contain no terms. */
7176 for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){
7177 Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i));
7178 if( pLeaf ){
7179 if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT;
7180 if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT;
7182 fts5DataRelease(pLeaf);
7186 static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){
7187 int iTermOff = 0;
7188 int ii;
7190 Fts5Buffer buf1 = {0,0,0};
7191 Fts5Buffer buf2 = {0,0,0};
7193 ii = pLeaf->szLeaf;
7194 while( ii<pLeaf->nn && p->rc==SQLITE_OK ){
7195 int res;
7196 int iOff;
7197 int nIncr;
7199 ii += fts5GetVarint32(&pLeaf->p[ii], nIncr);
7200 iTermOff += nIncr;
7201 iOff = iTermOff;
7203 if( iOff>=pLeaf->szLeaf ){
7204 p->rc = FTS5_CORRUPT;
7205 }else if( iTermOff==nIncr ){
7206 int nByte;
7207 iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
7208 if( (iOff+nByte)>pLeaf->szLeaf ){
7209 p->rc = FTS5_CORRUPT;
7210 }else{
7211 fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
7213 }else{
7214 int nKeep, nByte;
7215 iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep);
7216 iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
7217 if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){
7218 p->rc = FTS5_CORRUPT;
7219 }else{
7220 buf1.n = nKeep;
7221 fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
7224 if( p->rc==SQLITE_OK ){
7225 res = fts5BufferCompare(&buf1, &buf2);
7226 if( res<=0 ) p->rc = FTS5_CORRUPT;
7229 fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p);
7232 fts5BufferFree(&buf1);
7233 fts5BufferFree(&buf2);
7236 static void fts5IndexIntegrityCheckSegment(
7237 Fts5Index *p, /* FTS5 backend object */
7238 Fts5StructureSegment *pSeg /* Segment to check internal consistency */
7240 Fts5Config *pConfig = p->pConfig;
7241 int bSecureDelete = (pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE);
7242 sqlite3_stmt *pStmt = 0;
7243 int rc2;
7244 int iIdxPrevLeaf = pSeg->pgnoFirst-1;
7245 int iDlidxPrevLeaf = pSeg->pgnoLast;
7247 if( pSeg->pgnoFirst==0 ) return;
7249 fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf(
7250 "SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d "
7251 "ORDER BY 1, 2",
7252 pConfig->zDb, pConfig->zName, pSeg->iSegid
7255 /* Iterate through the b-tree hierarchy. */
7256 while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
7257 i64 iRow; /* Rowid for this leaf */
7258 Fts5Data *pLeaf; /* Data for this leaf */
7260 const char *zIdxTerm = (const char*)sqlite3_column_blob(pStmt, 1);
7261 int nIdxTerm = sqlite3_column_bytes(pStmt, 1);
7262 int iIdxLeaf = sqlite3_column_int(pStmt, 2);
7263 int bIdxDlidx = sqlite3_column_int(pStmt, 3);
7265 /* If the leaf in question has already been trimmed from the segment,
7266 ** ignore this b-tree entry. Otherwise, load it into memory. */
7267 if( iIdxLeaf<pSeg->pgnoFirst ) continue;
7268 iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf);
7269 pLeaf = fts5LeafRead(p, iRow);
7270 if( pLeaf==0 ) break;
7272 /* Check that the leaf contains at least one term, and that it is equal
7273 ** to or larger than the split-key in zIdxTerm. Also check that if there
7274 ** is also a rowid pointer within the leaf page header, it points to a
7275 ** location before the term. */
7276 if( pLeaf->nn<=pLeaf->szLeaf ){
7278 if( nIdxTerm==0
7279 && pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE
7280 && pLeaf->nn==pLeaf->szLeaf
7281 && pLeaf->nn==4
7283 /* special case - the very first page in a segment keeps its %_idx
7284 ** entry even if all the terms are removed from it by secure-delete
7285 ** operations. */
7286 }else{
7287 p->rc = FTS5_CORRUPT;
7290 }else{
7291 int iOff; /* Offset of first term on leaf */
7292 int iRowidOff; /* Offset of first rowid on leaf */
7293 int nTerm; /* Size of term on leaf in bytes */
7294 int res; /* Comparison of term and split-key */
7296 iOff = fts5LeafFirstTermOff(pLeaf);
7297 iRowidOff = fts5LeafFirstRowidOff(pLeaf);
7298 if( iRowidOff>=iOff || iOff>=pLeaf->szLeaf ){
7299 p->rc = FTS5_CORRUPT;
7300 }else{
7301 iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm);
7302 res = fts5Memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm));
7303 if( res==0 ) res = nTerm - nIdxTerm;
7304 if( res<0 ) p->rc = FTS5_CORRUPT;
7307 fts5IntegrityCheckPgidx(p, pLeaf);
7309 fts5DataRelease(pLeaf);
7310 if( p->rc ) break;
7312 /* Now check that the iter.nEmpty leaves following the current leaf
7313 ** (a) exist and (b) contain no terms. */
7314 fts5IndexIntegrityCheckEmpty(
7315 p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1
7317 if( p->rc ) break;
7319 /* If there is a doclist-index, check that it looks right. */
7320 if( bIdxDlidx ){
7321 Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */
7322 int iPrevLeaf = iIdxLeaf;
7323 int iSegid = pSeg->iSegid;
7324 int iPg = 0;
7325 i64 iKey;
7327 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf);
7328 fts5DlidxIterEof(p, pDlidx)==0;
7329 fts5DlidxIterNext(p, pDlidx)
7332 /* Check any rowid-less pages that occur before the current leaf. */
7333 for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
7334 iKey = FTS5_SEGMENT_ROWID(iSegid, iPg);
7335 pLeaf = fts5DataRead(p, iKey);
7336 if( pLeaf ){
7337 if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT;
7338 fts5DataRelease(pLeaf);
7341 iPrevLeaf = fts5DlidxIterPgno(pDlidx);
7343 /* Check that the leaf page indicated by the iterator really does
7344 ** contain the rowid suggested by the same. */
7345 iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf);
7346 pLeaf = fts5DataRead(p, iKey);
7347 if( pLeaf ){
7348 i64 iRowid;
7349 int iRowidOff = fts5LeafFirstRowidOff(pLeaf);
7350 ASSERT_SZLEAF_OK(pLeaf);
7351 if( iRowidOff>=pLeaf->szLeaf ){
7352 p->rc = FTS5_CORRUPT;
7353 }else if( bSecureDelete==0 || iRowidOff>0 ){
7354 i64 iDlRowid = fts5DlidxIterRowid(pDlidx);
7355 fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
7356 if( iRowid<iDlRowid || (bSecureDelete==0 && iRowid!=iDlRowid) ){
7357 p->rc = FTS5_CORRUPT;
7360 fts5DataRelease(pLeaf);
7364 iDlidxPrevLeaf = iPg;
7365 fts5DlidxIterFree(pDlidx);
7366 fts5TestDlidxReverse(p, iSegid, iIdxLeaf);
7367 }else{
7368 iDlidxPrevLeaf = pSeg->pgnoLast;
7369 /* TODO: Check there is no doclist index */
7372 iIdxPrevLeaf = iIdxLeaf;
7375 rc2 = sqlite3_finalize(pStmt);
7376 if( p->rc==SQLITE_OK ) p->rc = rc2;
7378 /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */
7379 #if 0
7380 if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
7381 p->rc = FTS5_CORRUPT;
7383 #endif
7388 ** Run internal checks to ensure that the FTS index (a) is internally
7389 ** consistent and (b) contains entries for which the XOR of the checksums
7390 ** as calculated by sqlite3Fts5IndexEntryCksum() is cksum.
7392 ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
7393 ** checksum does not match. Return SQLITE_OK if all checks pass without
7394 ** error, or some other SQLite error code if another error (e.g. OOM)
7395 ** occurs.
7397 int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum, int bUseCksum){
7398 int eDetail = p->pConfig->eDetail;
7399 u64 cksum2 = 0; /* Checksum based on contents of indexes */
7400 Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */
7401 Fts5Iter *pIter; /* Used to iterate through entire index */
7402 Fts5Structure *pStruct; /* Index structure */
7403 int iLvl, iSeg;
7405 #ifdef SQLITE_DEBUG
7406 /* Used by extra internal tests only run if NDEBUG is not defined */
7407 u64 cksum3 = 0; /* Checksum based on contents of indexes */
7408 Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */
7409 #endif
7410 const int flags = FTS5INDEX_QUERY_NOOUTPUT;
7412 /* Load the FTS index structure */
7413 pStruct = fts5StructureRead(p);
7414 if( pStruct==0 ){
7415 assert( p->rc!=SQLITE_OK );
7416 return fts5IndexReturn(p);
7419 /* Check that the internal nodes of each segment match the leaves */
7420 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
7421 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
7422 Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
7423 fts5IndexIntegrityCheckSegment(p, pSeg);
7427 /* The cksum argument passed to this function is a checksum calculated
7428 ** based on all expected entries in the FTS index (including prefix index
7429 ** entries). This block checks that a checksum calculated based on the
7430 ** actual contents of FTS index is identical.
7432 ** Two versions of the same checksum are calculated. The first (stack
7433 ** variable cksum2) based on entries extracted from the full-text index
7434 ** while doing a linear scan of each individual index in turn.
7436 ** As each term visited by the linear scans, a separate query for the
7437 ** same term is performed. cksum3 is calculated based on the entries
7438 ** extracted by these queries.
7440 for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter);
7441 fts5MultiIterEof(p, pIter)==0;
7442 fts5MultiIterNext(p, pIter, 0, 0)
7444 int n; /* Size of term in bytes */
7445 i64 iPos = 0; /* Position read from poslist */
7446 int iOff = 0; /* Offset within poslist */
7447 i64 iRowid = fts5MultiIterRowid(pIter);
7448 char *z = (char*)fts5MultiIterTerm(pIter, &n);
7450 /* If this is a new term, query for it. Update cksum3 with the results. */
7451 fts5TestTerm(p, &term, z, n, cksum2, &cksum3);
7452 if( p->rc ) break;
7454 if( eDetail==FTS5_DETAIL_NONE ){
7455 if( 0==fts5MultiIterIsEmpty(p, pIter) ){
7456 cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n);
7458 }else{
7459 poslist.n = 0;
7460 fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist);
7461 fts5BufferAppendBlob(&p->rc, &poslist, 4, (const u8*)"\0\0\0\0");
7462 while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
7463 int iCol = FTS5_POS2COLUMN(iPos);
7464 int iTokOff = FTS5_POS2OFFSET(iPos);
7465 cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
7469 fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3);
7471 fts5MultiIterFree(pIter);
7472 if( p->rc==SQLITE_OK && bUseCksum && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
7474 fts5StructureRelease(pStruct);
7475 #ifdef SQLITE_DEBUG
7476 fts5BufferFree(&term);
7477 #endif
7478 fts5BufferFree(&poslist);
7479 return fts5IndexReturn(p);
7482 /*************************************************************************
7483 **************************************************************************
7484 ** Below this point is the implementation of the fts5_decode() scalar
7485 ** function only.
7488 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
7490 ** Decode a segment-data rowid from the %_data table. This function is
7491 ** the opposite of macro FTS5_SEGMENT_ROWID().
7493 static void fts5DecodeRowid(
7494 i64 iRowid, /* Rowid from %_data table */
7495 int *pbTombstone, /* OUT: Tombstone hash flag */
7496 int *piSegid, /* OUT: Segment id */
7497 int *pbDlidx, /* OUT: Dlidx flag */
7498 int *piHeight, /* OUT: Height */
7499 int *piPgno /* OUT: Page number */
7501 *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1));
7502 iRowid >>= FTS5_DATA_PAGE_B;
7504 *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1));
7505 iRowid >>= FTS5_DATA_HEIGHT_B;
7507 *pbDlidx = (int)(iRowid & 0x0001);
7508 iRowid >>= FTS5_DATA_DLI_B;
7510 *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1));
7511 iRowid >>= FTS5_DATA_ID_B;
7513 *pbTombstone = (int)(iRowid & 0x0001);
7515 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
7517 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
7518 static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
7519 int iSegid, iHeight, iPgno, bDlidx, bTomb; /* Rowid compenents */
7520 fts5DecodeRowid(iKey, &bTomb, &iSegid, &bDlidx, &iHeight, &iPgno);
7522 if( iSegid==0 ){
7523 if( iKey==FTS5_AVERAGES_ROWID ){
7524 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} ");
7525 }else{
7526 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}");
7529 else{
7530 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%s%ssegid=%d h=%d pgno=%d}",
7531 bDlidx ? "dlidx " : "",
7532 bTomb ? "tombstone " : "",
7533 iSegid, iHeight, iPgno
7537 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
7539 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
7540 static void fts5DebugStructure(
7541 int *pRc, /* IN/OUT: error code */
7542 Fts5Buffer *pBuf,
7543 Fts5Structure *p
7545 int iLvl, iSeg; /* Iterate through levels, segments */
7547 for(iLvl=0; iLvl<p->nLevel; iLvl++){
7548 Fts5StructureLevel *pLvl = &p->aLevel[iLvl];
7549 sqlite3Fts5BufferAppendPrintf(pRc, pBuf,
7550 " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg
7552 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
7553 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
7554 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d",
7555 pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast
7557 if( pSeg->iOrigin1>0 ){
7558 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " origin=%lld..%lld",
7559 pSeg->iOrigin1, pSeg->iOrigin2
7562 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
7564 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
7567 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
7569 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
7571 ** This is part of the fts5_decode() debugging aid.
7573 ** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This
7574 ** function appends a human-readable representation of the same object
7575 ** to the buffer passed as the second argument.
7577 static void fts5DecodeStructure(
7578 int *pRc, /* IN/OUT: error code */
7579 Fts5Buffer *pBuf,
7580 const u8 *pBlob, int nBlob
7582 int rc; /* Return code */
7583 Fts5Structure *p = 0; /* Decoded structure object */
7585 rc = fts5StructureDecode(pBlob, nBlob, 0, &p);
7586 if( rc!=SQLITE_OK ){
7587 *pRc = rc;
7588 return;
7591 fts5DebugStructure(pRc, pBuf, p);
7592 fts5StructureRelease(p);
7594 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
7596 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
7598 ** This is part of the fts5_decode() debugging aid.
7600 ** Arguments pBlob/nBlob contain an "averages" record. This function
7601 ** appends a human-readable representation of record to the buffer passed
7602 ** as the second argument.
7604 static void fts5DecodeAverages(
7605 int *pRc, /* IN/OUT: error code */
7606 Fts5Buffer *pBuf,
7607 const u8 *pBlob, int nBlob
7609 int i = 0;
7610 const char *zSpace = "";
7612 while( i<nBlob ){
7613 u64 iVal;
7614 i += sqlite3Fts5GetVarint(&pBlob[i], &iVal);
7615 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%s%d", zSpace, (int)iVal);
7616 zSpace = " ";
7619 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
7621 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
7623 ** Buffer (a/n) is assumed to contain a list of serialized varints. Read
7624 ** each varint and append its string representation to buffer pBuf. Return
7625 ** after either the input buffer is exhausted or a 0 value is read.
7627 ** The return value is the number of bytes read from the input buffer.
7629 static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
7630 int iOff = 0;
7631 while( iOff<n ){
7632 int iVal;
7633 iOff += fts5GetVarint32(&a[iOff], iVal);
7634 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal);
7636 return iOff;
7638 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
7640 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
7642 ** The start of buffer (a/n) contains the start of a doclist. The doclist
7643 ** may or may not finish within the buffer. This function appends a text
7644 ** representation of the part of the doclist that is present to buffer
7645 ** pBuf.
7647 ** The return value is the number of bytes read from the input buffer.
7649 static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
7650 i64 iDocid = 0;
7651 int iOff = 0;
7653 if( n>0 ){
7654 iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid);
7655 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
7657 while( iOff<n ){
7658 int nPos;
7659 int bDel;
7660 iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel);
7661 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s", nPos, bDel?"*":"");
7662 iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos));
7663 if( iOff<n ){
7664 i64 iDelta;
7665 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
7666 iDocid += iDelta;
7667 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
7671 return iOff;
7673 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
7675 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
7677 ** This function is part of the fts5_decode() debugging function. It is
7678 ** only ever used with detail=none tables.
7680 ** Buffer (pData/nData) contains a doclist in the format used by detail=none
7681 ** tables. This function appends a human-readable version of that list to
7682 ** buffer pBuf.
7684 ** If *pRc is other than SQLITE_OK when this function is called, it is a
7685 ** no-op. If an OOM or other error occurs within this function, *pRc is
7686 ** set to an SQLite error code before returning. The final state of buffer
7687 ** pBuf is undefined in this case.
7689 static void fts5DecodeRowidList(
7690 int *pRc, /* IN/OUT: Error code */
7691 Fts5Buffer *pBuf, /* Buffer to append text to */
7692 const u8 *pData, int nData /* Data to decode list-of-rowids from */
7694 int i = 0;
7695 i64 iRowid = 0;
7697 while( i<nData ){
7698 const char *zApp = "";
7699 u64 iVal;
7700 i += sqlite3Fts5GetVarint(&pData[i], &iVal);
7701 iRowid += iVal;
7703 if( i<nData && pData[i]==0x00 ){
7704 i++;
7705 if( i<nData && pData[i]==0x00 ){
7706 i++;
7707 zApp = "+";
7708 }else{
7709 zApp = "*";
7713 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp);
7716 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
7718 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
7720 ** The implementation of user-defined scalar function fts5_decode().
7722 static void fts5DecodeFunction(
7723 sqlite3_context *pCtx, /* Function call context */
7724 int nArg, /* Number of args (always 2) */
7725 sqlite3_value **apVal /* Function arguments */
7727 i64 iRowid; /* Rowid for record being decoded */
7728 int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */
7729 int bTomb;
7730 const u8 *aBlob; int n; /* Record to decode */
7731 u8 *a = 0;
7732 Fts5Buffer s; /* Build up text to return here */
7733 int rc = SQLITE_OK; /* Return code */
7734 sqlite3_int64 nSpace = 0;
7735 int eDetailNone = (sqlite3_user_data(pCtx)!=0);
7737 assert( nArg==2 );
7738 UNUSED_PARAM(nArg);
7739 memset(&s, 0, sizeof(Fts5Buffer));
7740 iRowid = sqlite3_value_int64(apVal[0]);
7742 /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[]
7743 ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
7744 ** buffer overreads even if the record is corrupt. */
7745 n = sqlite3_value_bytes(apVal[1]);
7746 aBlob = sqlite3_value_blob(apVal[1]);
7747 nSpace = n + FTS5_DATA_ZERO_PADDING;
7748 a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
7749 if( a==0 ) goto decode_out;
7750 if( n>0 ) memcpy(a, aBlob, n);
7752 fts5DecodeRowid(iRowid, &bTomb, &iSegid, &bDlidx, &iHeight, &iPgno);
7754 fts5DebugRowid(&rc, &s, iRowid);
7755 if( bDlidx ){
7756 Fts5Data dlidx;
7757 Fts5DlidxLvl lvl;
7759 dlidx.p = a;
7760 dlidx.nn = n;
7762 memset(&lvl, 0, sizeof(Fts5DlidxLvl));
7763 lvl.pData = &dlidx;
7764 lvl.iLeafPgno = iPgno;
7766 for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){
7767 sqlite3Fts5BufferAppendPrintf(&rc, &s,
7768 " %d(%lld)", lvl.iLeafPgno, lvl.iRowid
7771 }else if( bTomb ){
7772 u32 nElem = fts5GetU32(&a[4]);
7773 int szKey = (aBlob[0]==4 || aBlob[0]==8) ? aBlob[0] : 8;
7774 int nSlot = (n - 8) / szKey;
7775 int ii;
7776 sqlite3Fts5BufferAppendPrintf(&rc, &s, " nElem=%d", (int)nElem);
7777 if( aBlob[1] ){
7778 sqlite3Fts5BufferAppendPrintf(&rc, &s, " 0");
7780 for(ii=0; ii<nSlot; ii++){
7781 u64 iVal = 0;
7782 if( szKey==4 ){
7783 u32 *aSlot = (u32*)&aBlob[8];
7784 if( aSlot[ii] ) iVal = fts5GetU32((u8*)&aSlot[ii]);
7785 }else{
7786 u64 *aSlot = (u64*)&aBlob[8];
7787 if( aSlot[ii] ) iVal = fts5GetU64((u8*)&aSlot[ii]);
7789 if( iVal!=0 ){
7790 sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", (i64)iVal);
7793 }else if( iSegid==0 ){
7794 if( iRowid==FTS5_AVERAGES_ROWID ){
7795 fts5DecodeAverages(&rc, &s, a, n);
7796 }else{
7797 fts5DecodeStructure(&rc, &s, a, n);
7799 }else if( eDetailNone ){
7800 Fts5Buffer term; /* Current term read from page */
7801 int szLeaf;
7802 int iPgidxOff = szLeaf = fts5GetU16(&a[2]);
7803 int iTermOff;
7804 int nKeep = 0;
7805 int iOff;
7807 memset(&term, 0, sizeof(Fts5Buffer));
7809 /* Decode any entries that occur before the first term. */
7810 if( szLeaf<n ){
7811 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff);
7812 }else{
7813 iTermOff = szLeaf;
7815 fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4);
7817 iOff = iTermOff;
7818 while( iOff<szLeaf && rc==SQLITE_OK ){
7819 int nAppend;
7821 /* Read the term data for the next term*/
7822 iOff += fts5GetVarint32(&a[iOff], nAppend);
7823 term.n = nKeep;
7824 fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]);
7825 sqlite3Fts5BufferAppendPrintf(
7826 &rc, &s, " term=%.*s", term.n, (const char*)term.p
7828 iOff += nAppend;
7830 /* Figure out where the doclist for this term ends */
7831 if( iPgidxOff<n ){
7832 int nIncr;
7833 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr);
7834 iTermOff += nIncr;
7835 }else{
7836 iTermOff = szLeaf;
7838 if( iTermOff>szLeaf ){
7839 rc = FTS5_CORRUPT;
7840 }else{
7841 fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff);
7843 iOff = iTermOff;
7844 if( iOff<szLeaf ){
7845 iOff += fts5GetVarint32(&a[iOff], nKeep);
7849 fts5BufferFree(&term);
7850 }else{
7851 Fts5Buffer term; /* Current term read from page */
7852 int szLeaf; /* Offset of pgidx in a[] */
7853 int iPgidxOff;
7854 int iPgidxPrev = 0; /* Previous value read from pgidx */
7855 int iTermOff = 0;
7856 int iRowidOff = 0;
7857 int iOff;
7858 int nDoclist;
7860 memset(&term, 0, sizeof(Fts5Buffer));
7862 if( n<4 ){
7863 sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt");
7864 goto decode_out;
7865 }else{
7866 iRowidOff = fts5GetU16(&a[0]);
7867 iPgidxOff = szLeaf = fts5GetU16(&a[2]);
7868 if( iPgidxOff<n ){
7869 fts5GetVarint32(&a[iPgidxOff], iTermOff);
7870 }else if( iPgidxOff>n ){
7871 rc = FTS5_CORRUPT;
7872 goto decode_out;
7876 /* Decode the position list tail at the start of the page */
7877 if( iRowidOff!=0 ){
7878 iOff = iRowidOff;
7879 }else if( iTermOff!=0 ){
7880 iOff = iTermOff;
7881 }else{
7882 iOff = szLeaf;
7884 if( iOff>n ){
7885 rc = FTS5_CORRUPT;
7886 goto decode_out;
7888 fts5DecodePoslist(&rc, &s, &a[4], iOff-4);
7890 /* Decode any more doclist data that appears on the page before the
7891 ** first term. */
7892 nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
7893 if( nDoclist+iOff>n ){
7894 rc = FTS5_CORRUPT;
7895 goto decode_out;
7897 fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);
7899 while( iPgidxOff<n && rc==SQLITE_OK ){
7900 int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */
7901 int nByte; /* Bytes of data */
7902 int iEnd;
7904 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte);
7905 iPgidxPrev += nByte;
7906 iOff = iPgidxPrev;
7908 if( iPgidxOff<n ){
7909 fts5GetVarint32(&a[iPgidxOff], nByte);
7910 iEnd = iPgidxPrev + nByte;
7911 }else{
7912 iEnd = szLeaf;
7914 if( iEnd>szLeaf ){
7915 rc = FTS5_CORRUPT;
7916 break;
7919 if( bFirst==0 ){
7920 iOff += fts5GetVarint32(&a[iOff], nByte);
7921 if( nByte>term.n ){
7922 rc = FTS5_CORRUPT;
7923 break;
7925 term.n = nByte;
7927 iOff += fts5GetVarint32(&a[iOff], nByte);
7928 if( iOff+nByte>n ){
7929 rc = FTS5_CORRUPT;
7930 break;
7932 fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
7933 iOff += nByte;
7935 sqlite3Fts5BufferAppendPrintf(
7936 &rc, &s, " term=%.*s", term.n, (const char*)term.p
7938 iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);
7941 fts5BufferFree(&term);
7944 decode_out:
7945 sqlite3_free(a);
7946 if( rc==SQLITE_OK ){
7947 sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT);
7948 }else{
7949 sqlite3_result_error_code(pCtx, rc);
7951 fts5BufferFree(&s);
7953 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
7955 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
7957 ** The implementation of user-defined scalar function fts5_rowid().
7959 static void fts5RowidFunction(
7960 sqlite3_context *pCtx, /* Function call context */
7961 int nArg, /* Number of args (always 2) */
7962 sqlite3_value **apVal /* Function arguments */
7964 const char *zArg;
7965 if( nArg==0 ){
7966 sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1);
7967 }else{
7968 zArg = (const char*)sqlite3_value_text(apVal[0]);
7969 if( 0==sqlite3_stricmp(zArg, "segment") ){
7970 i64 iRowid;
7971 int segid, pgno;
7972 if( nArg!=3 ){
7973 sqlite3_result_error(pCtx,
7974 "should be: fts5_rowid('segment', segid, pgno))", -1
7976 }else{
7977 segid = sqlite3_value_int(apVal[1]);
7978 pgno = sqlite3_value_int(apVal[2]);
7979 iRowid = FTS5_SEGMENT_ROWID(segid, pgno);
7980 sqlite3_result_int64(pCtx, iRowid);
7982 }else{
7983 sqlite3_result_error(pCtx,
7984 "first arg to fts5_rowid() must be 'segment'" , -1
7989 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
7991 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
7993 typedef struct Fts5StructVtab Fts5StructVtab;
7994 struct Fts5StructVtab {
7995 sqlite3_vtab base;
7998 typedef struct Fts5StructVcsr Fts5StructVcsr;
7999 struct Fts5StructVcsr {
8000 sqlite3_vtab_cursor base;
8001 Fts5Structure *pStruct;
8002 int iLevel;
8003 int iSeg;
8004 int iRowid;
8008 ** Create a new fts5_structure() table-valued function.
8010 static int fts5structConnectMethod(
8011 sqlite3 *db,
8012 void *pAux,
8013 int argc, const char *const*argv,
8014 sqlite3_vtab **ppVtab,
8015 char **pzErr
8017 Fts5StructVtab *pNew = 0;
8018 int rc = SQLITE_OK;
8020 rc = sqlite3_declare_vtab(db,
8021 "CREATE TABLE xyz("
8022 "level, segment, merge, segid, leaf1, leaf2, loc1, loc2, "
8023 "npgtombstone, nentrytombstone, nentry, struct HIDDEN);"
8025 if( rc==SQLITE_OK ){
8026 pNew = sqlite3Fts5MallocZero(&rc, sizeof(*pNew));
8029 *ppVtab = (sqlite3_vtab*)pNew;
8030 return rc;
8034 ** We must have a single struct=? constraint that will be passed through
8035 ** into the xFilter method. If there is no valid stmt=? constraint,
8036 ** then return an SQLITE_CONSTRAINT error.
8038 static int fts5structBestIndexMethod(
8039 sqlite3_vtab *tab,
8040 sqlite3_index_info *pIdxInfo
8042 int i;
8043 int rc = SQLITE_CONSTRAINT;
8044 struct sqlite3_index_constraint *p;
8045 pIdxInfo->estimatedCost = (double)100;
8046 pIdxInfo->estimatedRows = 100;
8047 pIdxInfo->idxNum = 0;
8048 for(i=0, p=pIdxInfo->aConstraint; i<pIdxInfo->nConstraint; i++, p++){
8049 if( p->usable==0 ) continue;
8050 if( p->op==SQLITE_INDEX_CONSTRAINT_EQ && p->iColumn==11 ){
8051 rc = SQLITE_OK;
8052 pIdxInfo->aConstraintUsage[i].omit = 1;
8053 pIdxInfo->aConstraintUsage[i].argvIndex = 1;
8054 break;
8057 return rc;
8061 ** This method is the destructor for bytecodevtab objects.
8063 static int fts5structDisconnectMethod(sqlite3_vtab *pVtab){
8064 Fts5StructVtab *p = (Fts5StructVtab*)pVtab;
8065 sqlite3_free(p);
8066 return SQLITE_OK;
8070 ** Constructor for a new bytecodevtab_cursor object.
8072 static int fts5structOpenMethod(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCsr){
8073 int rc = SQLITE_OK;
8074 Fts5StructVcsr *pNew = 0;
8076 pNew = sqlite3Fts5MallocZero(&rc, sizeof(*pNew));
8077 *ppCsr = (sqlite3_vtab_cursor*)pNew;
8079 return SQLITE_OK;
8083 ** Destructor for a bytecodevtab_cursor.
8085 static int fts5structCloseMethod(sqlite3_vtab_cursor *cur){
8086 Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
8087 fts5StructureRelease(pCsr->pStruct);
8088 sqlite3_free(pCsr);
8089 return SQLITE_OK;
8094 ** Advance a bytecodevtab_cursor to its next row of output.
8096 static int fts5structNextMethod(sqlite3_vtab_cursor *cur){
8097 Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
8098 Fts5Structure *p = pCsr->pStruct;
8100 assert( pCsr->pStruct );
8101 pCsr->iSeg++;
8102 pCsr->iRowid++;
8103 while( pCsr->iLevel<p->nLevel && pCsr->iSeg>=p->aLevel[pCsr->iLevel].nSeg ){
8104 pCsr->iLevel++;
8105 pCsr->iSeg = 0;
8107 if( pCsr->iLevel>=p->nLevel ){
8108 fts5StructureRelease(pCsr->pStruct);
8109 pCsr->pStruct = 0;
8111 return SQLITE_OK;
8115 ** Return TRUE if the cursor has been moved off of the last
8116 ** row of output.
8118 static int fts5structEofMethod(sqlite3_vtab_cursor *cur){
8119 Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
8120 return pCsr->pStruct==0;
8123 static int fts5structRowidMethod(
8124 sqlite3_vtab_cursor *cur,
8125 sqlite_int64 *piRowid
8127 Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
8128 *piRowid = pCsr->iRowid;
8129 return SQLITE_OK;
8133 ** Return values of columns for the row at which the bytecodevtab_cursor
8134 ** is currently pointing.
8136 static int fts5structColumnMethod(
8137 sqlite3_vtab_cursor *cur, /* The cursor */
8138 sqlite3_context *ctx, /* First argument to sqlite3_result_...() */
8139 int i /* Which column to return */
8141 Fts5StructVcsr *pCsr = (Fts5StructVcsr*)cur;
8142 Fts5Structure *p = pCsr->pStruct;
8143 Fts5StructureSegment *pSeg = &p->aLevel[pCsr->iLevel].aSeg[pCsr->iSeg];
8145 switch( i ){
8146 case 0: /* level */
8147 sqlite3_result_int(ctx, pCsr->iLevel);
8148 break;
8149 case 1: /* segment */
8150 sqlite3_result_int(ctx, pCsr->iSeg);
8151 break;
8152 case 2: /* merge */
8153 sqlite3_result_int(ctx, pCsr->iSeg < p->aLevel[pCsr->iLevel].nMerge);
8154 break;
8155 case 3: /* segid */
8156 sqlite3_result_int(ctx, pSeg->iSegid);
8157 break;
8158 case 4: /* leaf1 */
8159 sqlite3_result_int(ctx, pSeg->pgnoFirst);
8160 break;
8161 case 5: /* leaf2 */
8162 sqlite3_result_int(ctx, pSeg->pgnoLast);
8163 break;
8164 case 6: /* origin1 */
8165 sqlite3_result_int64(ctx, pSeg->iOrigin1);
8166 break;
8167 case 7: /* origin2 */
8168 sqlite3_result_int64(ctx, pSeg->iOrigin2);
8169 break;
8170 case 8: /* npgtombstone */
8171 sqlite3_result_int(ctx, pSeg->nPgTombstone);
8172 break;
8173 case 9: /* nentrytombstone */
8174 sqlite3_result_int64(ctx, pSeg->nEntryTombstone);
8175 break;
8176 case 10: /* nentry */
8177 sqlite3_result_int64(ctx, pSeg->nEntry);
8178 break;
8180 return SQLITE_OK;
8184 ** Initialize a cursor.
8186 ** idxNum==0 means show all subprograms
8187 ** idxNum==1 means show only the main bytecode and omit subprograms.
8189 static int fts5structFilterMethod(
8190 sqlite3_vtab_cursor *pVtabCursor,
8191 int idxNum, const char *idxStr,
8192 int argc, sqlite3_value **argv
8194 Fts5StructVcsr *pCsr = (Fts5StructVcsr *)pVtabCursor;
8195 int rc = SQLITE_OK;
8197 const u8 *aBlob = 0;
8198 int nBlob = 0;
8200 assert( argc==1 );
8201 fts5StructureRelease(pCsr->pStruct);
8202 pCsr->pStruct = 0;
8204 nBlob = sqlite3_value_bytes(argv[0]);
8205 aBlob = (const u8*)sqlite3_value_blob(argv[0]);
8206 rc = fts5StructureDecode(aBlob, nBlob, 0, &pCsr->pStruct);
8207 if( rc==SQLITE_OK ){
8208 pCsr->iLevel = 0;
8209 pCsr->iRowid = 0;
8210 pCsr->iSeg = -1;
8211 rc = fts5structNextMethod(pVtabCursor);
8214 return rc;
8217 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
8220 ** This is called as part of registering the FTS5 module with database
8221 ** connection db. It registers several user-defined scalar functions useful
8222 ** with FTS5.
8224 ** If successful, SQLITE_OK is returned. If an error occurs, some other
8225 ** SQLite error code is returned instead.
8227 int sqlite3Fts5IndexInit(sqlite3 *db){
8228 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8229 int rc = sqlite3_create_function(
8230 db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0
8233 if( rc==SQLITE_OK ){
8234 rc = sqlite3_create_function(
8235 db, "fts5_decode_none", 2,
8236 SQLITE_UTF8, (void*)db, fts5DecodeFunction, 0, 0
8240 if( rc==SQLITE_OK ){
8241 rc = sqlite3_create_function(
8242 db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0
8246 if( rc==SQLITE_OK ){
8247 static const sqlite3_module fts5structure_module = {
8248 0, /* iVersion */
8249 0, /* xCreate */
8250 fts5structConnectMethod, /* xConnect */
8251 fts5structBestIndexMethod, /* xBestIndex */
8252 fts5structDisconnectMethod, /* xDisconnect */
8253 0, /* xDestroy */
8254 fts5structOpenMethod, /* xOpen */
8255 fts5structCloseMethod, /* xClose */
8256 fts5structFilterMethod, /* xFilter */
8257 fts5structNextMethod, /* xNext */
8258 fts5structEofMethod, /* xEof */
8259 fts5structColumnMethod, /* xColumn */
8260 fts5structRowidMethod, /* xRowid */
8261 0, /* xUpdate */
8262 0, /* xBegin */
8263 0, /* xSync */
8264 0, /* xCommit */
8265 0, /* xRollback */
8266 0, /* xFindFunction */
8267 0, /* xRename */
8268 0, /* xSavepoint */
8269 0, /* xRelease */
8270 0, /* xRollbackTo */
8271 0 /* xShadowName */
8273 rc = sqlite3_create_module(db, "fts5_structure", &fts5structure_module, 0);
8275 return rc;
8276 #else
8277 return SQLITE_OK;
8278 UNUSED_PARAM(db);
8279 #endif
8283 int sqlite3Fts5IndexReset(Fts5Index *p){
8284 assert( p->pStruct==0 || p->iStructVersion!=0 );
8285 if( fts5IndexDataVersion(p)!=p->iStructVersion ){
8286 fts5StructureInvalidate(p);
8288 return fts5IndexReturn(p);