4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
19 typedef struct Fts5HashEntry Fts5HashEntry
;
22 ** This file contains the implementation of an in-memory hash table used
23 ** to accumuluate "term -> doclist" content before it is flused to a level-0
29 int eDetail
; /* Copy of Fts5Config.eDetail */
30 int *pnByte
; /* Pointer to bytes counter */
31 int nEntry
; /* Number of entries currently in hash */
32 int nSlot
; /* Size of aSlot[] array */
33 Fts5HashEntry
*pScan
; /* Current ordered scan item */
34 Fts5HashEntry
**aSlot
; /* Array of hash slots */
38 ** Each entry in the hash table is represented by an object of the
39 ** following type. Each object, its key (a nul-terminated string) and
40 ** its current data are stored in a single memory allocation. The
41 ** key immediately follows the object in memory. The position list
42 ** data immediately follows the key data in memory.
44 ** The data that follows the key is in a similar, but not identical format
45 ** to the doclist data stored in the database. It is:
47 ** * Rowid, as a varint
48 ** * Position list, without 0x00 terminator.
49 ** * Size of previous position list and rowid, as a 4 byte
50 ** big-endian integer.
53 ** Offset of last rowid written to data area. Relative to first byte of
57 ** Bytes of data written since iRowidOff.
59 struct Fts5HashEntry
{
60 Fts5HashEntry
*pHashNext
; /* Next hash entry with same hash-key */
61 Fts5HashEntry
*pScanNext
; /* Next entry in sorted order */
63 int nAlloc
; /* Total size of allocation */
64 int iSzPoslist
; /* Offset of space for 4-byte poslist size */
65 int nData
; /* Total bytes of data (incl. structure) */
66 int nKey
; /* Length of key in bytes */
67 u8 bDel
; /* Set delete-flag @ iSzPoslist */
68 u8 bContent
; /* Set content-flag (detail=none mode) */
69 i16 iCol
; /* Column of last value written */
70 int iPos
; /* Position of last value written */
71 i64 iRowid
; /* Rowid of last value written */
77 ** char *fts5EntryKey(Fts5HashEntry *pEntry){ return zKey; }
79 #define fts5EntryKey(p) ( ((char *)(&(p)[1])) )
83 ** Allocate a new hash table.
85 int sqlite3Fts5HashNew(Fts5Config
*pConfig
, Fts5Hash
**ppNew
, int *pnByte
){
89 *ppNew
= pNew
= (Fts5Hash
*)sqlite3_malloc(sizeof(Fts5Hash
));
94 memset(pNew
, 0, sizeof(Fts5Hash
));
95 pNew
->pnByte
= pnByte
;
96 pNew
->eDetail
= pConfig
->eDetail
;
99 nByte
= sizeof(Fts5HashEntry
*) * pNew
->nSlot
;
100 pNew
->aSlot
= (Fts5HashEntry
**)sqlite3_malloc64(nByte
);
101 if( pNew
->aSlot
==0 ){
106 memset(pNew
->aSlot
, 0, (size_t)nByte
);
113 ** Free a hash table object.
115 void sqlite3Fts5HashFree(Fts5Hash
*pHash
){
117 sqlite3Fts5HashClear(pHash
);
118 sqlite3_free(pHash
->aSlot
);
124 ** Empty (but do not delete) a hash table.
126 void sqlite3Fts5HashClear(Fts5Hash
*pHash
){
128 for(i
=0; i
<pHash
->nSlot
; i
++){
129 Fts5HashEntry
*pNext
;
130 Fts5HashEntry
*pSlot
;
131 for(pSlot
=pHash
->aSlot
[i
]; pSlot
; pSlot
=pNext
){
132 pNext
= pSlot
->pHashNext
;
136 memset(pHash
->aSlot
, 0, pHash
->nSlot
* sizeof(Fts5HashEntry
*));
140 static unsigned int fts5HashKey(int nSlot
, const u8
*p
, int n
){
143 for(i
=n
-1; i
>=0; i
--){
144 h
= (h
<< 3) ^ h
^ p
[i
];
149 static unsigned int fts5HashKey2(int nSlot
, u8 b
, const u8
*p
, int n
){
152 for(i
=n
-1; i
>=0; i
--){
153 h
= (h
<< 3) ^ h
^ p
[i
];
155 h
= (h
<< 3) ^ h
^ b
;
160 ** Resize the hash table by doubling the number of slots.
162 static int fts5HashResize(Fts5Hash
*pHash
){
163 int nNew
= pHash
->nSlot
*2;
165 Fts5HashEntry
**apNew
;
166 Fts5HashEntry
**apOld
= pHash
->aSlot
;
168 apNew
= (Fts5HashEntry
**)sqlite3_malloc64(nNew
*sizeof(Fts5HashEntry
*));
169 if( !apNew
) return SQLITE_NOMEM
;
170 memset(apNew
, 0, nNew
*sizeof(Fts5HashEntry
*));
172 for(i
=0; i
<pHash
->nSlot
; i
++){
175 Fts5HashEntry
*p
= apOld
[i
];
176 apOld
[i
] = p
->pHashNext
;
177 iHash
= fts5HashKey(nNew
, (u8
*)fts5EntryKey(p
),
178 (int)strlen(fts5EntryKey(p
)));
179 p
->pHashNext
= apNew
[iHash
];
186 pHash
->aSlot
= apNew
;
190 static int fts5HashAddPoslistSize(
197 u8
*pPtr
= p2
? (u8
*)p2
: (u8
*)p
;
198 int nData
= p
->nData
;
199 if( pHash
->eDetail
==FTS5_DETAIL_NONE
){
200 assert( nData
==p
->iSzPoslist
);
202 pPtr
[nData
++] = 0x00;
204 pPtr
[nData
++] = 0x00;
208 int nSz
= (nData
- p
->iSzPoslist
- 1); /* Size in bytes */
209 int nPos
= nSz
*2 + p
->bDel
; /* Value of nPos field */
211 assert( p
->bDel
==0 || p
->bDel
==1 );
213 pPtr
[p
->iSzPoslist
] = (u8
)nPos
;
215 int nByte
= sqlite3Fts5GetVarintLen((u32
)nPos
);
216 memmove(&pPtr
[p
->iSzPoslist
+ nByte
], &pPtr
[p
->iSzPoslist
+ 1], nSz
);
217 sqlite3Fts5PutVarint(&pPtr
[p
->iSzPoslist
], nPos
);
222 nRet
= nData
- p
->nData
;
234 ** Add an entry to the in-memory hash table. The key is the concatenation
235 ** of bByte and (pToken/nToken). The value is (iRowid/iCol/iPos).
237 ** (bByte || pToken) -> (iRowid,iCol,iPos)
239 ** Or, if iCol is negative, then the value is a delete marker.
241 int sqlite3Fts5HashWrite(
243 i64 iRowid
, /* Rowid for this entry */
244 int iCol
, /* Column token appears in (-ve -> delete) */
245 int iPos
, /* Position of token within column */
246 char bByte
, /* First byte of token */
247 const char *pToken
, int nToken
/* Token to add or remove to or from index */
252 int nIncr
= 0; /* Amount to increment (*pHash->pnByte) by */
253 int bNew
; /* If non-delete entry should be written */
255 bNew
= (pHash
->eDetail
==FTS5_DETAIL_FULL
);
257 /* Attempt to locate an existing hash entry */
258 iHash
= fts5HashKey2(pHash
->nSlot
, (u8
)bByte
, (const u8
*)pToken
, nToken
);
259 for(p
=pHash
->aSlot
[iHash
]; p
; p
=p
->pHashNext
){
260 char *zKey
= fts5EntryKey(p
);
263 && memcmp(&zKey
[1], pToken
, nToken
)==0
269 /* If an existing hash entry cannot be found, create a new one. */
271 /* Figure out how much space to allocate */
273 sqlite3_int64 nByte
= sizeof(Fts5HashEntry
) + (nToken
+1) + 1 + 64;
274 if( nByte
<128 ) nByte
= 128;
276 /* Grow the Fts5Hash.aSlot[] array if necessary. */
277 if( (pHash
->nEntry
*2)>=pHash
->nSlot
){
278 int rc
= fts5HashResize(pHash
);
279 if( rc
!=SQLITE_OK
) return rc
;
280 iHash
= fts5HashKey2(pHash
->nSlot
, (u8
)bByte
, (const u8
*)pToken
, nToken
);
283 /* Allocate new Fts5HashEntry and add it to the hash table. */
284 p
= (Fts5HashEntry
*)sqlite3_malloc64(nByte
);
285 if( !p
) return SQLITE_NOMEM
;
286 memset(p
, 0, sizeof(Fts5HashEntry
));
287 p
->nAlloc
= (int)nByte
;
288 zKey
= fts5EntryKey(p
);
290 memcpy(&zKey
[1], pToken
, nToken
);
291 assert( iHash
==fts5HashKey(pHash
->nSlot
, (u8
*)zKey
, nToken
+1) );
293 zKey
[nToken
+1] = '\0';
294 p
->nData
= nToken
+1 + 1 + sizeof(Fts5HashEntry
);
295 p
->pHashNext
= pHash
->aSlot
[iHash
];
296 pHash
->aSlot
[iHash
] = p
;
299 /* Add the first rowid field to the hash-entry */
300 p
->nData
+= sqlite3Fts5PutVarint(&((u8
*)p
)[p
->nData
], iRowid
);
303 p
->iSzPoslist
= p
->nData
;
304 if( pHash
->eDetail
!=FTS5_DETAIL_NONE
){
306 p
->iCol
= (pHash
->eDetail
==FTS5_DETAIL_FULL
? 0 : -1);
311 /* Appending to an existing hash-entry. Check that there is enough
312 ** space to append the largest possible new entry. Worst case scenario
315 ** + 9 bytes for a new rowid,
316 ** + 4 byte reserved for the "poslist size" varint.
317 ** + 1 byte for a "new column" byte,
318 ** + 3 bytes for a new column number (16-bit max) as a varint,
319 ** + 5 bytes for the new position offset (32-bit max).
321 if( (p
->nAlloc
- p
->nData
) < (9 + 4 + 1 + 3 + 5) ){
322 sqlite3_int64 nNew
= p
->nAlloc
* 2;
325 pNew
= (Fts5HashEntry
*)sqlite3_realloc64(p
, nNew
);
326 if( pNew
==0 ) return SQLITE_NOMEM
;
327 pNew
->nAlloc
= (int)nNew
;
328 for(pp
=&pHash
->aSlot
[iHash
]; *pp
!=p
; pp
=&(*pp
)->pHashNext
);
334 assert( (p
->nAlloc
- p
->nData
) >= (9 + 4 + 1 + 3 + 5) );
338 /* If this is a new rowid, append the 4-byte size field for the previous
339 ** entry, and the new rowid for this entry. */
340 if( iRowid
!=p
->iRowid
){
341 u64 iDiff
= (u64
)iRowid
- (u64
)p
->iRowid
;
342 fts5HashAddPoslistSize(pHash
, p
, 0);
343 p
->nData
+= sqlite3Fts5PutVarint(&pPtr
[p
->nData
], iDiff
);
346 p
->iSzPoslist
= p
->nData
;
347 if( pHash
->eDetail
!=FTS5_DETAIL_NONE
){
349 p
->iCol
= (pHash
->eDetail
==FTS5_DETAIL_FULL
? 0 : -1);
355 if( pHash
->eDetail
==FTS5_DETAIL_NONE
){
358 /* Append a new column value, if necessary */
359 assert_nc( iCol
>=p
->iCol
);
361 if( pHash
->eDetail
==FTS5_DETAIL_FULL
){
362 pPtr
[p
->nData
++] = 0x01;
363 p
->nData
+= sqlite3Fts5PutVarint(&pPtr
[p
->nData
], iCol
);
368 p
->iCol
= (i16
)(iPos
= iCol
);
372 /* Append the new position offset, if necessary */
374 p
->nData
+= sqlite3Fts5PutVarint(&pPtr
[p
->nData
], iPos
- p
->iPos
+ 2);
379 /* This is a delete. Set the delete flag. */
384 *pHash
->pnByte
+= nIncr
;
390 ** Arguments pLeft and pRight point to linked-lists of hash-entry objects,
391 ** each sorted in key order. This function merges the two lists into a
392 ** single list and returns a pointer to its first element.
394 static Fts5HashEntry
*fts5HashEntryMerge(
395 Fts5HashEntry
*pLeft
,
396 Fts5HashEntry
*pRight
398 Fts5HashEntry
*p1
= pLeft
;
399 Fts5HashEntry
*p2
= pRight
;
400 Fts5HashEntry
*pRet
= 0;
401 Fts5HashEntry
**ppOut
= &pRet
;
412 char *zKey1
= fts5EntryKey(p1
);
413 char *zKey2
= fts5EntryKey(p2
);
414 while( zKey1
[i
]==zKey2
[i
] ) i
++;
416 if( ((u8
)zKey1
[i
])>((u8
)zKey2
[i
]) ){
419 ppOut
= &p2
->pScanNext
;
424 ppOut
= &p1
->pScanNext
;
435 ** Extract all tokens from hash table iHash and link them into a list
436 ** in sorted order. The hash table is cleared before returning. It is
437 ** the responsibility of the caller to free the elements of the returned
440 static int fts5HashEntrySort(
442 const char *pTerm
, int nTerm
, /* Query prefix, if any */
443 Fts5HashEntry
**ppSorted
445 const int nMergeSlot
= 32;
447 Fts5HashEntry
*pList
;
452 ap
= sqlite3_malloc64(sizeof(Fts5HashEntry
*) * nMergeSlot
);
453 if( !ap
) return SQLITE_NOMEM
;
454 memset(ap
, 0, sizeof(Fts5HashEntry
*) * nMergeSlot
);
456 for(iSlot
=0; iSlot
<pHash
->nSlot
; iSlot
++){
457 Fts5HashEntry
*pIter
;
458 for(pIter
=pHash
->aSlot
[iSlot
]; pIter
; pIter
=pIter
->pHashNext
){
460 || (pIter
->nKey
+1>=nTerm
&& 0==memcmp(fts5EntryKey(pIter
), pTerm
, nTerm
))
462 Fts5HashEntry
*pEntry
= pIter
;
463 pEntry
->pScanNext
= 0;
464 for(i
=0; ap
[i
]; i
++){
465 pEntry
= fts5HashEntryMerge(pEntry
, ap
[i
]);
474 for(i
=0; i
<nMergeSlot
; i
++){
475 pList
= fts5HashEntryMerge(pList
, ap
[i
]);
485 ** Query the hash table for a doclist associated with term pTerm/nTerm.
487 int sqlite3Fts5HashQuery(
488 Fts5Hash
*pHash
, /* Hash table to query */
490 const char *pTerm
, int nTerm
, /* Query term */
491 void **ppOut
, /* OUT: Pointer to new object */
492 int *pnDoclist
/* OUT: Size of doclist in bytes */
494 unsigned int iHash
= fts5HashKey(pHash
->nSlot
, (const u8
*)pTerm
, nTerm
);
498 for(p
=pHash
->aSlot
[iHash
]; p
; p
=p
->pHashNext
){
499 zKey
= fts5EntryKey(p
);
500 assert( p
->nKey
+1==(int)strlen(zKey
) );
501 if( nTerm
==p
->nKey
+1 && memcmp(zKey
, pTerm
, nTerm
)==0 ) break;
505 int nHashPre
= sizeof(Fts5HashEntry
) + nTerm
+ 1;
506 int nList
= p
->nData
- nHashPre
;
507 u8
*pRet
= (u8
*)(*ppOut
= sqlite3_malloc64(nPre
+ nList
+ 10));
509 Fts5HashEntry
*pFaux
= (Fts5HashEntry
*)&pRet
[nPre
-nHashPre
];
510 memcpy(&pRet
[nPre
], &((u8
*)p
)[nHashPre
], nList
);
511 nList
+= fts5HashAddPoslistSize(pHash
, p
, pFaux
);
525 int sqlite3Fts5HashScanInit(
526 Fts5Hash
*p
, /* Hash table to query */
527 const char *pTerm
, int nTerm
/* Query prefix */
529 return fts5HashEntrySort(p
, pTerm
, nTerm
, &p
->pScan
);
532 void sqlite3Fts5HashScanNext(Fts5Hash
*p
){
533 assert( !sqlite3Fts5HashScanEof(p
) );
534 p
->pScan
= p
->pScan
->pScanNext
;
537 int sqlite3Fts5HashScanEof(Fts5Hash
*p
){
538 return (p
->pScan
==0);
541 void sqlite3Fts5HashScanEntry(
543 const char **pzTerm
, /* OUT: term (nul-terminated) */
544 const u8
**ppDoclist
, /* OUT: pointer to doclist */
545 int *pnDoclist
/* OUT: size of doclist in bytes */
548 if( (p
= pHash
->pScan
) ){
549 char *zKey
= fts5EntryKey(p
);
550 int nTerm
= (int)strlen(zKey
);
551 fts5HashAddPoslistSize(pHash
, p
, 0);
553 *ppDoclist
= (const u8
*)&zKey
[nTerm
+1];
554 *pnDoclist
= p
->nData
- (sizeof(Fts5HashEntry
) + nTerm
+ 1);