4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
19 typedef struct Fts5HashEntry Fts5HashEntry
;
22 ** This file contains the implementation of an in-memory hash table used
23 ** to accumuluate "term -> doclist" content before it is flused to a level-0
29 int eDetail
; /* Copy of Fts5Config.eDetail */
30 int *pnByte
; /* Pointer to bytes counter */
31 int nEntry
; /* Number of entries currently in hash */
32 int nSlot
; /* Size of aSlot[] array */
33 Fts5HashEntry
*pScan
; /* Current ordered scan item */
34 Fts5HashEntry
**aSlot
; /* Array of hash slots */
38 ** Each entry in the hash table is represented by an object of the
39 ** following type. Each object, its key, and its current data are stored
40 ** in a single memory allocation. The key immediately follows the object
41 ** in memory. The position list data immediately follows the key data
44 ** The key is Fts5HashEntry.nKey bytes in size. It consists of a single
45 ** byte identifying the index (either the main term index or a prefix-index),
46 ** followed by the term data. For example: "0token". There is no
47 ** nul-terminator - in this case nKey=6.
49 ** The data that follows the key is in a similar, but not identical format
50 ** to the doclist data stored in the database. It is:
52 ** * Rowid, as a varint
53 ** * Position list, without 0x00 terminator.
54 ** * Size of previous position list and rowid, as a 4 byte
55 ** big-endian integer.
58 ** Offset of last rowid written to data area. Relative to first byte of
62 ** Bytes of data written since iRowidOff.
64 struct Fts5HashEntry
{
65 Fts5HashEntry
*pHashNext
; /* Next hash entry with same hash-key */
66 Fts5HashEntry
*pScanNext
; /* Next entry in sorted order */
68 int nAlloc
; /* Total size of allocation */
69 int iSzPoslist
; /* Offset of space for 4-byte poslist size */
70 int nData
; /* Total bytes of data (incl. structure) */
71 int nKey
; /* Length of key in bytes */
72 u8 bDel
; /* Set delete-flag @ iSzPoslist */
73 u8 bContent
; /* Set content-flag (detail=none mode) */
74 i16 iCol
; /* Column of last value written */
75 int iPos
; /* Position of last value written */
76 i64 iRowid
; /* Rowid of last value written */
82 ** char *fts5EntryKey(Fts5HashEntry *pEntry){ return zKey; }
84 #define fts5EntryKey(p) ( ((char *)(&(p)[1])) )
88 ** Allocate a new hash table.
90 int sqlite3Fts5HashNew(Fts5Config
*pConfig
, Fts5Hash
**ppNew
, int *pnByte
){
94 *ppNew
= pNew
= (Fts5Hash
*)sqlite3_malloc(sizeof(Fts5Hash
));
99 memset(pNew
, 0, sizeof(Fts5Hash
));
100 pNew
->pnByte
= pnByte
;
101 pNew
->eDetail
= pConfig
->eDetail
;
104 nByte
= sizeof(Fts5HashEntry
*) * pNew
->nSlot
;
105 pNew
->aSlot
= (Fts5HashEntry
**)sqlite3_malloc64(nByte
);
106 if( pNew
->aSlot
==0 ){
111 memset(pNew
->aSlot
, 0, (size_t)nByte
);
118 ** Free a hash table object.
120 void sqlite3Fts5HashFree(Fts5Hash
*pHash
){
122 sqlite3Fts5HashClear(pHash
);
123 sqlite3_free(pHash
->aSlot
);
129 ** Empty (but do not delete) a hash table.
131 void sqlite3Fts5HashClear(Fts5Hash
*pHash
){
133 for(i
=0; i
<pHash
->nSlot
; i
++){
134 Fts5HashEntry
*pNext
;
135 Fts5HashEntry
*pSlot
;
136 for(pSlot
=pHash
->aSlot
[i
]; pSlot
; pSlot
=pNext
){
137 pNext
= pSlot
->pHashNext
;
141 memset(pHash
->aSlot
, 0, pHash
->nSlot
* sizeof(Fts5HashEntry
*));
145 static unsigned int fts5HashKey(int nSlot
, const u8
*p
, int n
){
148 for(i
=n
-1; i
>=0; i
--){
149 h
= (h
<< 3) ^ h
^ p
[i
];
154 static unsigned int fts5HashKey2(int nSlot
, u8 b
, const u8
*p
, int n
){
157 for(i
=n
-1; i
>=0; i
--){
158 h
= (h
<< 3) ^ h
^ p
[i
];
160 h
= (h
<< 3) ^ h
^ b
;
165 ** Resize the hash table by doubling the number of slots.
167 static int fts5HashResize(Fts5Hash
*pHash
){
168 int nNew
= pHash
->nSlot
*2;
170 Fts5HashEntry
**apNew
;
171 Fts5HashEntry
**apOld
= pHash
->aSlot
;
173 apNew
= (Fts5HashEntry
**)sqlite3_malloc64(nNew
*sizeof(Fts5HashEntry
*));
174 if( !apNew
) return SQLITE_NOMEM
;
175 memset(apNew
, 0, nNew
*sizeof(Fts5HashEntry
*));
177 for(i
=0; i
<pHash
->nSlot
; i
++){
180 Fts5HashEntry
*p
= apOld
[i
];
181 apOld
[i
] = p
->pHashNext
;
182 iHash
= fts5HashKey(nNew
, (u8
*)fts5EntryKey(p
), p
->nKey
);
183 p
->pHashNext
= apNew
[iHash
];
190 pHash
->aSlot
= apNew
;
194 static int fts5HashAddPoslistSize(
201 u8
*pPtr
= p2
? (u8
*)p2
: (u8
*)p
;
202 int nData
= p
->nData
;
203 if( pHash
->eDetail
==FTS5_DETAIL_NONE
){
204 assert( nData
==p
->iSzPoslist
);
206 pPtr
[nData
++] = 0x00;
208 pPtr
[nData
++] = 0x00;
212 int nSz
= (nData
- p
->iSzPoslist
- 1); /* Size in bytes */
213 int nPos
= nSz
*2 + p
->bDel
; /* Value of nPos field */
215 assert( p
->bDel
==0 || p
->bDel
==1 );
217 pPtr
[p
->iSzPoslist
] = (u8
)nPos
;
219 int nByte
= sqlite3Fts5GetVarintLen((u32
)nPos
);
220 memmove(&pPtr
[p
->iSzPoslist
+ nByte
], &pPtr
[p
->iSzPoslist
+ 1], nSz
);
221 sqlite3Fts5PutVarint(&pPtr
[p
->iSzPoslist
], nPos
);
226 nRet
= nData
- p
->nData
;
238 ** Add an entry to the in-memory hash table. The key is the concatenation
239 ** of bByte and (pToken/nToken). The value is (iRowid/iCol/iPos).
241 ** (bByte || pToken) -> (iRowid,iCol,iPos)
243 ** Or, if iCol is negative, then the value is a delete marker.
245 int sqlite3Fts5HashWrite(
247 i64 iRowid
, /* Rowid for this entry */
248 int iCol
, /* Column token appears in (-ve -> delete) */
249 int iPos
, /* Position of token within column */
250 char bByte
, /* First byte of token */
251 const char *pToken
, int nToken
/* Token to add or remove to or from index */
256 int nIncr
= 0; /* Amount to increment (*pHash->pnByte) by */
257 int bNew
; /* If non-delete entry should be written */
259 bNew
= (pHash
->eDetail
==FTS5_DETAIL_FULL
);
261 /* Attempt to locate an existing hash entry */
262 iHash
= fts5HashKey2(pHash
->nSlot
, (u8
)bByte
, (const u8
*)pToken
, nToken
);
263 for(p
=pHash
->aSlot
[iHash
]; p
; p
=p
->pHashNext
){
264 char *zKey
= fts5EntryKey(p
);
267 && memcmp(&zKey
[1], pToken
, nToken
)==0
273 /* If an existing hash entry cannot be found, create a new one. */
275 /* Figure out how much space to allocate */
277 sqlite3_int64 nByte
= sizeof(Fts5HashEntry
) + (nToken
+1) + 1 + 64;
278 if( nByte
<128 ) nByte
= 128;
280 /* Grow the Fts5Hash.aSlot[] array if necessary. */
281 if( (pHash
->nEntry
*2)>=pHash
->nSlot
){
282 int rc
= fts5HashResize(pHash
);
283 if( rc
!=SQLITE_OK
) return rc
;
284 iHash
= fts5HashKey2(pHash
->nSlot
, (u8
)bByte
, (const u8
*)pToken
, nToken
);
287 /* Allocate new Fts5HashEntry and add it to the hash table. */
288 p
= (Fts5HashEntry
*)sqlite3_malloc64(nByte
);
289 if( !p
) return SQLITE_NOMEM
;
290 memset(p
, 0, sizeof(Fts5HashEntry
));
291 p
->nAlloc
= (int)nByte
;
292 zKey
= fts5EntryKey(p
);
294 memcpy(&zKey
[1], pToken
, nToken
);
295 assert( iHash
==fts5HashKey(pHash
->nSlot
, (u8
*)zKey
, nToken
+1) );
297 zKey
[nToken
+1] = '\0';
298 p
->nData
= nToken
+1 + sizeof(Fts5HashEntry
);
299 p
->pHashNext
= pHash
->aSlot
[iHash
];
300 pHash
->aSlot
[iHash
] = p
;
303 /* Add the first rowid field to the hash-entry */
304 p
->nData
+= sqlite3Fts5PutVarint(&((u8
*)p
)[p
->nData
], iRowid
);
307 p
->iSzPoslist
= p
->nData
;
308 if( pHash
->eDetail
!=FTS5_DETAIL_NONE
){
310 p
->iCol
= (pHash
->eDetail
==FTS5_DETAIL_FULL
? 0 : -1);
315 /* Appending to an existing hash-entry. Check that there is enough
316 ** space to append the largest possible new entry. Worst case scenario
319 ** + 9 bytes for a new rowid,
320 ** + 4 byte reserved for the "poslist size" varint.
321 ** + 1 byte for a "new column" byte,
322 ** + 3 bytes for a new column number (16-bit max) as a varint,
323 ** + 5 bytes for the new position offset (32-bit max).
325 if( (p
->nAlloc
- p
->nData
) < (9 + 4 + 1 + 3 + 5) ){
326 sqlite3_int64 nNew
= p
->nAlloc
* 2;
329 pNew
= (Fts5HashEntry
*)sqlite3_realloc64(p
, nNew
);
330 if( pNew
==0 ) return SQLITE_NOMEM
;
331 pNew
->nAlloc
= (int)nNew
;
332 for(pp
=&pHash
->aSlot
[iHash
]; *pp
!=p
; pp
=&(*pp
)->pHashNext
);
338 assert( (p
->nAlloc
- p
->nData
) >= (9 + 4 + 1 + 3 + 5) );
342 /* If this is a new rowid, append the 4-byte size field for the previous
343 ** entry, and the new rowid for this entry. */
344 if( iRowid
!=p
->iRowid
){
345 u64 iDiff
= (u64
)iRowid
- (u64
)p
->iRowid
;
346 fts5HashAddPoslistSize(pHash
, p
, 0);
347 p
->nData
+= sqlite3Fts5PutVarint(&pPtr
[p
->nData
], iDiff
);
350 p
->iSzPoslist
= p
->nData
;
351 if( pHash
->eDetail
!=FTS5_DETAIL_NONE
){
353 p
->iCol
= (pHash
->eDetail
==FTS5_DETAIL_FULL
? 0 : -1);
359 if( pHash
->eDetail
==FTS5_DETAIL_NONE
){
362 /* Append a new column value, if necessary */
363 assert_nc( iCol
>=p
->iCol
);
365 if( pHash
->eDetail
==FTS5_DETAIL_FULL
){
366 pPtr
[p
->nData
++] = 0x01;
367 p
->nData
+= sqlite3Fts5PutVarint(&pPtr
[p
->nData
], iCol
);
372 p
->iCol
= (i16
)(iPos
= iCol
);
376 /* Append the new position offset, if necessary */
378 p
->nData
+= sqlite3Fts5PutVarint(&pPtr
[p
->nData
], iPos
- p
->iPos
+ 2);
383 /* This is a delete. Set the delete flag. */
388 *pHash
->pnByte
+= nIncr
;
394 ** Arguments pLeft and pRight point to linked-lists of hash-entry objects,
395 ** each sorted in key order. This function merges the two lists into a
396 ** single list and returns a pointer to its first element.
398 static Fts5HashEntry
*fts5HashEntryMerge(
399 Fts5HashEntry
*pLeft
,
400 Fts5HashEntry
*pRight
402 Fts5HashEntry
*p1
= pLeft
;
403 Fts5HashEntry
*p2
= pRight
;
404 Fts5HashEntry
*pRet
= 0;
405 Fts5HashEntry
**ppOut
= &pRet
;
415 char *zKey1
= fts5EntryKey(p1
);
416 char *zKey2
= fts5EntryKey(p2
);
417 int nMin
= MIN(p1
->nKey
, p2
->nKey
);
419 int cmp
= memcmp(zKey1
, zKey2
, nMin
);
421 cmp
= p1
->nKey
- p2
->nKey
;
428 ppOut
= &p2
->pScanNext
;
433 ppOut
= &p1
->pScanNext
;
444 ** Link all tokens from hash table iHash into a list in sorted order. The
445 ** tokens are not removed from the hash table.
447 static int fts5HashEntrySort(
449 const char *pTerm
, int nTerm
, /* Query prefix, if any */
450 Fts5HashEntry
**ppSorted
452 const int nMergeSlot
= 32;
454 Fts5HashEntry
*pList
;
459 ap
= sqlite3_malloc64(sizeof(Fts5HashEntry
*) * nMergeSlot
);
460 if( !ap
) return SQLITE_NOMEM
;
461 memset(ap
, 0, sizeof(Fts5HashEntry
*) * nMergeSlot
);
463 for(iSlot
=0; iSlot
<pHash
->nSlot
; iSlot
++){
464 Fts5HashEntry
*pIter
;
465 for(pIter
=pHash
->aSlot
[iSlot
]; pIter
; pIter
=pIter
->pHashNext
){
467 || (pIter
->nKey
>=nTerm
&& 0==memcmp(fts5EntryKey(pIter
), pTerm
, nTerm
))
469 Fts5HashEntry
*pEntry
= pIter
;
470 pEntry
->pScanNext
= 0;
471 for(i
=0; ap
[i
]; i
++){
472 pEntry
= fts5HashEntryMerge(pEntry
, ap
[i
]);
481 for(i
=0; i
<nMergeSlot
; i
++){
482 pList
= fts5HashEntryMerge(pList
, ap
[i
]);
491 ** Query the hash table for a doclist associated with term pTerm/nTerm.
493 int sqlite3Fts5HashQuery(
494 Fts5Hash
*pHash
, /* Hash table to query */
496 const char *pTerm
, int nTerm
, /* Query term */
497 void **ppOut
, /* OUT: Pointer to new object */
498 int *pnDoclist
/* OUT: Size of doclist in bytes */
500 unsigned int iHash
= fts5HashKey(pHash
->nSlot
, (const u8
*)pTerm
, nTerm
);
504 for(p
=pHash
->aSlot
[iHash
]; p
; p
=p
->pHashNext
){
505 zKey
= fts5EntryKey(p
);
506 if( nTerm
==p
->nKey
&& memcmp(zKey
, pTerm
, nTerm
)==0 ) break;
510 int nHashPre
= sizeof(Fts5HashEntry
) + nTerm
;
511 int nList
= p
->nData
- nHashPre
;
512 u8
*pRet
= (u8
*)(*ppOut
= sqlite3_malloc64(nPre
+ nList
+ 10));
514 Fts5HashEntry
*pFaux
= (Fts5HashEntry
*)&pRet
[nPre
-nHashPre
];
515 memcpy(&pRet
[nPre
], &((u8
*)p
)[nHashPre
], nList
);
516 nList
+= fts5HashAddPoslistSize(pHash
, p
, pFaux
);
530 int sqlite3Fts5HashScanInit(
531 Fts5Hash
*p
, /* Hash table to query */
532 const char *pTerm
, int nTerm
/* Query prefix */
534 return fts5HashEntrySort(p
, pTerm
, nTerm
, &p
->pScan
);
538 static int fts5HashCount(Fts5Hash
*pHash
){
541 for(ii
=0; ii
<pHash
->nSlot
; ii
++){
542 Fts5HashEntry
*p
= 0;
543 for(p
=pHash
->aSlot
[ii
]; p
; p
=p
->pHashNext
){
552 ** Return true if the hash table is empty, false otherwise.
554 int sqlite3Fts5HashIsEmpty(Fts5Hash
*pHash
){
555 assert( pHash
->nEntry
==fts5HashCount(pHash
) );
556 return pHash
->nEntry
==0;
559 void sqlite3Fts5HashScanNext(Fts5Hash
*p
){
560 assert( !sqlite3Fts5HashScanEof(p
) );
561 p
->pScan
= p
->pScan
->pScanNext
;
564 int sqlite3Fts5HashScanEof(Fts5Hash
*p
){
565 return (p
->pScan
==0);
568 void sqlite3Fts5HashScanEntry(
570 const char **pzTerm
, /* OUT: term (nul-terminated) */
571 int *pnTerm
, /* OUT: Size of term in bytes */
572 const u8
**ppDoclist
, /* OUT: pointer to doclist */
573 int *pnDoclist
/* OUT: size of doclist in bytes */
576 if( (p
= pHash
->pScan
) ){
577 char *zKey
= fts5EntryKey(p
);
579 fts5HashAddPoslistSize(pHash
, p
, 0);
582 *ppDoclist
= (const u8
*)&zKey
[nTerm
];
583 *pnDoclist
= p
->nData
- (sizeof(Fts5HashEntry
) + nTerm
);