4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 *************************************************************************
12 ** This file contains code for the VdbeSorter object, used in concert with
13 ** a VdbeCursor to sort large numbers of keys (as may be required, for
14 ** example, by CREATE INDEX statements on tables too large to fit in main
18 #include "sqliteInt.h"
21 #ifndef SQLITE_OMIT_MERGE_SORT
23 typedef struct VdbeSorterIter VdbeSorterIter
;
24 typedef struct SorterRecord SorterRecord
;
27 ** NOTES ON DATA STRUCTURE USED FOR N-WAY MERGES:
29 ** As keys are added to the sorter, they are written to disk in a series
30 ** of sorted packed-memory-arrays (PMAs). The size of each PMA is roughly
31 ** the same as the cache-size allowed for temporary databases. In order
32 ** to allow the caller to extract keys from the sorter in sorted order,
33 ** all PMAs currently stored on disk must be merged together. This comment
34 ** describes the data structure used to do so. The structure supports
35 ** merging any number of arrays in a single pass with no redundant comparison
38 ** The aIter[] array contains an iterator for each of the PMAs being merged.
39 ** An aIter[] iterator either points to a valid key or else is at EOF. For
40 ** the purposes of the paragraphs below, we assume that the array is actually
41 ** N elements in size, where N is the smallest power of 2 greater to or equal
42 ** to the number of iterators being merged. The extra aIter[] elements are
43 ** treated as if they are empty (always at EOF).
45 ** The aTree[] array is also N elements in size. The value of N is stored in
46 ** the VdbeSorter.nTree variable.
48 ** The final (N/2) elements of aTree[] contain the results of comparing
49 ** pairs of iterator keys together. Element i contains the result of
50 ** comparing aIter[2*i-N] and aIter[2*i-N+1]. Whichever key is smaller, the
51 ** aTree element is set to the index of it.
53 ** For the purposes of this comparison, EOF is considered greater than any
54 ** other key value. If the keys are equal (only possible with two EOF
55 ** values), it doesn't matter which index is stored.
57 ** The (N/4) elements of aTree[] that preceed the final (N/2) described
58 ** above contains the index of the smallest of each block of 4 iterators.
59 ** And so on. So that aTree[1] contains the index of the iterator that
60 ** currently points to the smallest key value. aTree[0] is unused.
66 ** aIter[2] -> Elderberry
67 ** aIter[3] -> Currant
68 ** aIter[4] -> Grapefruit
73 ** aTree[] = { X, 5 0, 5 0, 3, 5, 6 }
75 ** The current element is "Apple" (the value of the key indicated by
76 ** iterator 5). When the Next() operation is invoked, iterator 5 will
77 ** be advanced to the next key in its segment. Say the next key is
80 ** aIter[5] -> Eggplant
82 ** The contents of aTree[] are updated first by comparing the new iterator
83 ** 5 key to the current key of iterator 4 (still "Grapefruit"). The iterator
84 ** 5 value is still smaller, so aTree[6] is set to 5. And so on up the tree.
85 ** The value of iterator 6 - "Durian" - is now smaller than that of iterator
86 ** 5, so aTree[3] is set to 6. Key 0 is smaller than key 6 (Banana<Durian),
87 ** so the value written into element 1 of the array is 0. As follows:
89 ** aTree[] = { X, 0 0, 6 0, 3, 5, 6 }
91 ** In other words, each time we advance to the next sorter element, log2(N)
92 ** key comparison operations are required, where N is the number of segments
93 ** being merged (rounded up to the next power of 2).
96 i64 iWriteOff
; /* Current write offset within file pTemp1 */
97 i64 iReadOff
; /* Current read offset within file pTemp1 */
98 int nInMemory
; /* Current size of pRecord list as PMA */
99 int nTree
; /* Used size of aTree/aIter (power of 2) */
100 int nPMA
; /* Number of PMAs stored in pTemp1 */
101 int mnPmaSize
; /* Minimum PMA size, in bytes */
102 int mxPmaSize
; /* Maximum PMA size, in bytes. 0==no limit */
103 VdbeSorterIter
*aIter
; /* Array of iterators to merge */
104 int *aTree
; /* Current state of incremental merge */
105 sqlite3_file
*pTemp1
; /* PMA file 1 */
106 SorterRecord
*pRecord
; /* Head of in-memory record list */
107 UnpackedRecord
*pUnpacked
; /* Used to unpack keys */
111 ** The following type is an iterator for a PMA. It caches the current key in
112 ** variables nKey/aKey. If the iterator is at EOF, pFile==0.
114 struct VdbeSorterIter
{
115 i64 iReadOff
; /* Current read offset */
116 i64 iEof
; /* 1 byte past EOF for this iterator */
117 int nAlloc
; /* Bytes of space at aAlloc */
118 int nKey
; /* Number of bytes in key */
119 sqlite3_file
*pFile
; /* File iterator is reading from */
120 u8
*aAlloc
; /* Allocated space */
121 u8
*aKey
; /* Pointer to current key */
125 ** A structure to store a single record. All in-memory records are connected
126 ** together into a linked list headed at VdbeSorter.pRecord using the
127 ** SorterRecord.pNext pointer.
129 struct SorterRecord
{
135 /* Minimum allowable value for the VdbeSorter.nWorking variable */
136 #define SORTER_MIN_WORKING 10
138 /* Maximum number of segments to merge in a single pass. */
139 #define SORTER_MAX_MERGE_COUNT 16
142 ** Free all memory belonging to the VdbeSorterIter object passed as the second
143 ** argument. All structure fields are set to zero before returning.
145 static void vdbeSorterIterZero(sqlite3
*db
, VdbeSorterIter
*pIter
){
146 sqlite3DbFree(db
, pIter
->aAlloc
);
147 memset(pIter
, 0, sizeof(VdbeSorterIter
));
151 ** Advance iterator pIter to the next key in its PMA. Return SQLITE_OK if
152 ** no error occurs, or an SQLite error code if one does.
154 static int vdbeSorterIterNext(
155 sqlite3
*db
, /* Database handle (for sqlite3DbMalloc() ) */
156 VdbeSorterIter
*pIter
/* Iterator to advance */
158 int rc
; /* Return Code */
159 int nRead
; /* Number of bytes read */
160 int nRec
= 0; /* Size of record in bytes */
161 int iOff
= 0; /* Size of serialized size varint in bytes */
163 assert( pIter
->iEof
>=pIter
->iReadOff
);
164 if( pIter
->iEof
-pIter
->iReadOff
>5 ){
167 nRead
= (int)(pIter
->iEof
- pIter
->iReadOff
);
170 /* This is an EOF condition */
171 vdbeSorterIterZero(db
, pIter
);
175 rc
= sqlite3OsRead(pIter
->pFile
, pIter
->aAlloc
, nRead
, pIter
->iReadOff
);
177 iOff
= getVarint32(pIter
->aAlloc
, nRec
);
178 if( (iOff
+nRec
)>nRead
){
179 int nRead2
; /* Number of extra bytes to read */
180 if( (iOff
+nRec
)>pIter
->nAlloc
){
181 int nNew
= pIter
->nAlloc
*2;
182 while( (iOff
+nRec
)>nNew
) nNew
= nNew
*2;
183 pIter
->aAlloc
= sqlite3DbReallocOrFree(db
, pIter
->aAlloc
, nNew
);
184 if( !pIter
->aAlloc
) return SQLITE_NOMEM
;
185 pIter
->nAlloc
= nNew
;
188 nRead2
= iOff
+ nRec
- nRead
;
190 pIter
->pFile
, &pIter
->aAlloc
[nRead
], nRead2
, pIter
->iReadOff
+nRead
195 assert( rc
!=SQLITE_OK
|| nRec
>0 );
196 pIter
->iReadOff
+= iOff
+nRec
;
198 pIter
->aKey
= &pIter
->aAlloc
[iOff
];
203 ** Write a single varint, value iVal, to file-descriptor pFile. Return
204 ** SQLITE_OK if successful, or an SQLite error code if some error occurs.
206 ** The value of *piOffset when this function is called is used as the byte
207 ** offset in file pFile to write to. Before returning, *piOffset is
208 ** incremented by the number of bytes written.
210 static int vdbeSorterWriteVarint(
211 sqlite3_file
*pFile
, /* File to write to */
212 i64 iVal
, /* Value to write as a varint */
213 i64
*piOffset
/* IN/OUT: Write offset in file pFile */
215 u8 aVarint
[9]; /* Buffer large enough for a varint */
216 int nVarint
; /* Number of used bytes in varint */
217 int rc
; /* Result of write() call */
219 nVarint
= sqlite3PutVarint(aVarint
, iVal
);
220 rc
= sqlite3OsWrite(pFile
, aVarint
, nVarint
, *piOffset
);
221 *piOffset
+= nVarint
;
227 ** Read a single varint from file-descriptor pFile. Return SQLITE_OK if
228 ** successful, or an SQLite error code if some error occurs.
230 ** The value of *piOffset when this function is called is used as the
231 ** byte offset in file pFile from whence to read the varint. If successful
232 ** (i.e. if no IO error occurs), then *piOffset is set to the offset of
233 ** the first byte past the end of the varint before returning. *piVal is
234 ** set to the integer value read. If an error occurs, the final values of
235 ** both *piOffset and *piVal are undefined.
237 static int vdbeSorterReadVarint(
238 sqlite3_file
*pFile
, /* File to read from */
239 i64
*piOffset
, /* IN/OUT: Read offset in pFile */
240 i64
*piVal
/* OUT: Value read from file */
242 u8 aVarint
[9]; /* Buffer large enough for a varint */
243 i64 iOff
= *piOffset
; /* Offset in file to read from */
244 int rc
; /* Return code */
246 rc
= sqlite3OsRead(pFile
, aVarint
, 9, iOff
);
248 *piOffset
+= getVarint(aVarint
, (u64
*)piVal
);
255 ** Initialize iterator pIter to scan through the PMA stored in file pFile
256 ** starting at offset iStart and ending at offset iEof-1. This function
257 ** leaves the iterator pointing to the first key in the PMA (or EOF if the
260 static int vdbeSorterIterInit(
261 sqlite3
*db
, /* Database handle */
262 VdbeSorter
*pSorter
, /* Sorter object */
263 i64 iStart
, /* Start offset in pFile */
264 VdbeSorterIter
*pIter
, /* Iterator to populate */
265 i64
*pnByte
/* IN/OUT: Increment this value by PMA size */
269 assert( pSorter
->iWriteOff
>iStart
);
270 assert( pIter
->aAlloc
==0 );
271 pIter
->pFile
= pSorter
->pTemp1
;
272 pIter
->iReadOff
= iStart
;
274 pIter
->aAlloc
= (u8
*)sqlite3DbMallocRaw(db
, pIter
->nAlloc
);
275 if( !pIter
->aAlloc
){
278 i64 nByte
; /* Total size of PMA in bytes */
279 rc
= vdbeSorterReadVarint(pSorter
->pTemp1
, &pIter
->iReadOff
, &nByte
);
281 pIter
->iEof
= pIter
->iReadOff
+ nByte
;
284 rc
= vdbeSorterIterNext(db
, pIter
);
291 ** Compare key1 (buffer pKey1, size nKey1 bytes) with key2 (buffer pKey2,
292 ** size nKey2 bytes). Argument pKeyInfo supplies the collation functions
293 ** used by the comparison. If an error occurs, return an SQLite error code.
294 ** Otherwise, return SQLITE_OK and set *pRes to a negative, zero or positive
295 ** value, depending on whether key1 is smaller, equal to or larger than key2.
297 ** If the bOmitRowid argument is non-zero, assume both keys end in a rowid
298 ** field. For the purposes of the comparison, ignore it. Also, if bOmitRowid
299 ** is true and key1 contains even a single NULL value, it is considered to
300 ** be less than key2. Even if key2 also contains NULL values.
302 ** If pKey2 is passed a NULL pointer, then it is assumed that the pCsr->aSpace
303 ** has been allocated and contains an unpacked record that is used as key2.
305 static void vdbeSorterCompare(
306 VdbeCursor
*pCsr
, /* Cursor object (for pKeyInfo) */
307 int bOmitRowid
, /* Ignore rowid field at end of keys */
308 void *pKey1
, int nKey1
, /* Left side of comparison */
309 void *pKey2
, int nKey2
, /* Right side of comparison */
310 int *pRes
/* OUT: Result of comparison */
312 KeyInfo
*pKeyInfo
= pCsr
->pKeyInfo
;
313 VdbeSorter
*pSorter
= pCsr
->pSorter
;
314 UnpackedRecord
*r2
= pSorter
->pUnpacked
;
318 sqlite3VdbeRecordUnpack(pKeyInfo
, nKey2
, pKey2
, r2
);
322 r2
->nField
= pKeyInfo
->nField
;
323 assert( r2
->nField
>0 );
324 for(i
=0; i
<r2
->nField
; i
++){
325 if( r2
->aMem
[i
].flags
& MEM_Null
){
330 r2
->flags
|= UNPACKED_PREFIX_MATCH
;
333 *pRes
= sqlite3VdbeRecordCompare(nKey1
, pKey1
, r2
);
337 ** This function is called to compare two iterator keys when merging
338 ** multiple b-tree segments. Parameter iOut is the index of the aTree[]
339 ** value to recalculate.
341 static int vdbeSorterDoCompare(VdbeCursor
*pCsr
, int iOut
){
342 VdbeSorter
*pSorter
= pCsr
->pSorter
;
349 assert( iOut
<pSorter
->nTree
&& iOut
>0 );
351 if( iOut
>=(pSorter
->nTree
/2) ){
352 i1
= (iOut
- pSorter
->nTree
/2) * 2;
355 i1
= pSorter
->aTree
[iOut
*2];
356 i2
= pSorter
->aTree
[iOut
*2+1];
359 p1
= &pSorter
->aIter
[i1
];
360 p2
= &pSorter
->aIter
[i2
];
364 }else if( p2
->pFile
==0 ){
368 assert( pCsr
->pSorter
->pUnpacked
!=0 ); /* allocated in vdbeSorterMerge() */
370 pCsr
, 0, p1
->aKey
, p1
->nKey
, p2
->aKey
, p2
->nKey
, &res
379 pSorter
->aTree
[iOut
] = iRes
;
384 ** Initialize the temporary index cursor just opened as a sorter cursor.
386 int sqlite3VdbeSorterInit(sqlite3
*db
, VdbeCursor
*pCsr
){
387 int pgsz
; /* Page size of main database */
388 int mxCache
; /* Cache size */
389 VdbeSorter
*pSorter
; /* The new sorter */
392 assert( pCsr
->pKeyInfo
&& pCsr
->pBt
==0 );
393 pCsr
->pSorter
= pSorter
= sqlite3DbMallocZero(db
, sizeof(VdbeSorter
));
398 pSorter
->pUnpacked
= sqlite3VdbeAllocUnpackedRecord(pCsr
->pKeyInfo
, 0, 0, &d
);
399 if( pSorter
->pUnpacked
==0 ) return SQLITE_NOMEM
;
400 assert( pSorter
->pUnpacked
==(UnpackedRecord
*)d
);
402 if( !sqlite3TempInMemory(db
) ){
403 pgsz
= sqlite3BtreeGetPageSize(db
->aDb
[0].pBt
);
404 pSorter
->mnPmaSize
= SORTER_MIN_WORKING
* pgsz
;
405 mxCache
= db
->aDb
[0].pSchema
->cache_size
;
406 if( mxCache
<SORTER_MIN_WORKING
) mxCache
= SORTER_MIN_WORKING
;
407 pSorter
->mxPmaSize
= mxCache
* pgsz
;
414 ** Free the list of sorted records starting at pRecord.
416 static void vdbeSorterRecordFree(sqlite3
*db
, SorterRecord
*pRecord
){
419 for(p
=pRecord
; p
; p
=pNext
){
421 sqlite3DbFree(db
, p
);
426 ** Free any cursor components allocated by sqlite3VdbeSorterXXX routines.
428 void sqlite3VdbeSorterClose(sqlite3
*db
, VdbeCursor
*pCsr
){
429 VdbeSorter
*pSorter
= pCsr
->pSorter
;
431 if( pSorter
->aIter
){
433 for(i
=0; i
<pSorter
->nTree
; i
++){
434 vdbeSorterIterZero(db
, &pSorter
->aIter
[i
]);
436 sqlite3DbFree(db
, pSorter
->aIter
);
438 if( pSorter
->pTemp1
){
439 sqlite3OsCloseFree(pSorter
->pTemp1
);
441 vdbeSorterRecordFree(db
, pSorter
->pRecord
);
442 sqlite3DbFree(db
, pSorter
->pUnpacked
);
443 sqlite3DbFree(db
, pSorter
);
449 ** Allocate space for a file-handle and open a temporary file. If successful,
450 ** set *ppFile to point to the malloc'd file-handle and return SQLITE_OK.
451 ** Otherwise, set *ppFile to 0 and return an SQLite error code.
453 static int vdbeSorterOpenTempFile(sqlite3
*db
, sqlite3_file
**ppFile
){
455 return sqlite3OsOpenMalloc(db
->pVfs
, 0, ppFile
,
456 SQLITE_OPEN_TEMP_JOURNAL
|
457 SQLITE_OPEN_READWRITE
| SQLITE_OPEN_CREATE
|
458 SQLITE_OPEN_EXCLUSIVE
| SQLITE_OPEN_DELETEONCLOSE
, &dummy
463 ** Merge the two sorted lists p1 and p2 into a single list.
464 ** Set *ppOut to the head of the new list.
466 static void vdbeSorterMerge(
467 VdbeCursor
*pCsr
, /* For pKeyInfo */
468 SorterRecord
*p1
, /* First list to merge */
469 SorterRecord
*p2
, /* Second list to merge */
470 SorterRecord
**ppOut
/* OUT: Head of merged list */
472 SorterRecord
*pFinal
= 0;
473 SorterRecord
**pp
= &pFinal
;
474 void *pVal2
= p2
? p2
->pVal
: 0;
478 vdbeSorterCompare(pCsr
, 0, p1
->pVal
, p1
->nVal
, pVal2
, p2
->nVal
, &res
);
497 ** Sort the linked list of records headed at pCsr->pRecord. Return SQLITE_OK
498 ** if successful, or an SQLite error code (i.e. SQLITE_NOMEM) if an error
501 static int vdbeSorterSort(VdbeCursor
*pCsr
){
503 SorterRecord
**aSlot
;
505 VdbeSorter
*pSorter
= pCsr
->pSorter
;
507 aSlot
= (SorterRecord
**)sqlite3MallocZero(64 * sizeof(SorterRecord
*));
512 p
= pSorter
->pRecord
;
514 SorterRecord
*pNext
= p
->pNext
;
516 for(i
=0; aSlot
[i
]; i
++){
517 vdbeSorterMerge(pCsr
, p
, aSlot
[i
], &p
);
526 vdbeSorterMerge(pCsr
, p
, aSlot
[i
], &p
);
528 pSorter
->pRecord
= p
;
536 ** Write the current contents of the in-memory linked-list to a PMA. Return
537 ** SQLITE_OK if successful, or an SQLite error code otherwise.
539 ** The format of a PMA is:
541 ** * A varint. This varint contains the total number of bytes of content
542 ** in the PMA (not including the varint itself).
544 ** * One or more records packed end-to-end in order of ascending keys.
545 ** Each record consists of a varint followed by a blob of data (the
546 ** key). The varint is the number of bytes in the blob of data.
548 static int vdbeSorterListToPMA(sqlite3
*db
, VdbeCursor
*pCsr
){
549 int rc
= SQLITE_OK
; /* Return code */
550 VdbeSorter
*pSorter
= pCsr
->pSorter
;
552 if( pSorter
->nInMemory
==0 ){
553 assert( pSorter
->pRecord
==0 );
557 rc
= vdbeSorterSort(pCsr
);
559 /* If the first temporary PMA file has not been opened, open it now. */
560 if( rc
==SQLITE_OK
&& pSorter
->pTemp1
==0 ){
561 rc
= vdbeSorterOpenTempFile(db
, &pSorter
->pTemp1
);
562 assert( rc
!=SQLITE_OK
|| pSorter
->pTemp1
);
563 assert( pSorter
->iWriteOff
==0 );
564 assert( pSorter
->nPMA
==0 );
568 i64 iOff
= pSorter
->iWriteOff
;
570 SorterRecord
*pNext
= 0;
571 static const char eightZeros
[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
574 rc
= vdbeSorterWriteVarint(pSorter
->pTemp1
, pSorter
->nInMemory
, &iOff
);
575 for(p
=pSorter
->pRecord
; rc
==SQLITE_OK
&& p
; p
=pNext
){
577 rc
= vdbeSorterWriteVarint(pSorter
->pTemp1
, p
->nVal
, &iOff
);
580 rc
= sqlite3OsWrite(pSorter
->pTemp1
, p
->pVal
, p
->nVal
, iOff
);
584 sqlite3DbFree(db
, p
);
587 /* This assert verifies that unless an error has occurred, the size of
588 ** the PMA on disk is the same as the expected size stored in
589 ** pSorter->nInMemory. */
590 assert( rc
!=SQLITE_OK
|| pSorter
->nInMemory
==(
591 iOff
-pSorter
->iWriteOff
-sqlite3VarintLen(pSorter
->nInMemory
)
594 pSorter
->iWriteOff
= iOff
;
596 /* Terminate each file with 8 extra bytes so that from any offset
597 ** in the file we can always read 9 bytes without a SHORT_READ error */
598 rc
= sqlite3OsWrite(pSorter
->pTemp1
, eightZeros
, 8, iOff
);
600 pSorter
->pRecord
= p
;
607 ** Add a record to the sorter.
609 int sqlite3VdbeSorterWrite(
610 sqlite3
*db
, /* Database handle */
611 VdbeCursor
*pCsr
, /* Sorter cursor */
612 Mem
*pVal
/* Memory cell containing record */
614 VdbeSorter
*pSorter
= pCsr
->pSorter
;
615 int rc
= SQLITE_OK
; /* Return Code */
616 SorterRecord
*pNew
; /* New list element */
619 pSorter
->nInMemory
+= sqlite3VarintLen(pVal
->n
) + pVal
->n
;
621 pNew
= (SorterRecord
*)sqlite3DbMallocRaw(db
, pVal
->n
+ sizeof(SorterRecord
));
625 pNew
->pVal
= (void *)&pNew
[1];
626 memcpy(pNew
->pVal
, pVal
->z
, pVal
->n
);
627 pNew
->nVal
= pVal
->n
;
628 pNew
->pNext
= pSorter
->pRecord
;
629 pSorter
->pRecord
= pNew
;
632 /* See if the contents of the sorter should now be written out. They
633 ** are written out when either of the following are true:
635 ** * The total memory allocated for the in-memory list is greater
636 ** than (page-size * cache-size), or
638 ** * The total memory allocated for the in-memory list is greater
639 ** than (page-size * 10) and sqlite3HeapNearlyFull() returns true.
641 if( rc
==SQLITE_OK
&& pSorter
->mxPmaSize
>0 && (
642 (pSorter
->nInMemory
>pSorter
->mxPmaSize
)
643 || (pSorter
->nInMemory
>pSorter
->mnPmaSize
&& sqlite3HeapNearlyFull())
645 rc
= vdbeSorterListToPMA(db
, pCsr
);
646 pSorter
->nInMemory
= 0;
653 ** Helper function for sqlite3VdbeSorterRewind().
655 static int vdbeSorterInitMerge(
656 sqlite3
*db
, /* Database handle */
657 VdbeCursor
*pCsr
, /* Cursor handle for this sorter */
658 i64
*pnByte
/* Sum of bytes in all opened PMAs */
660 VdbeSorter
*pSorter
= pCsr
->pSorter
;
661 int rc
= SQLITE_OK
; /* Return code */
662 int i
; /* Used to iterator through aIter[] */
663 i64 nByte
= 0; /* Total bytes in all opened PMAs */
665 /* Initialize the iterators. */
666 for(i
=0; i
<SORTER_MAX_MERGE_COUNT
; i
++){
667 VdbeSorterIter
*pIter
= &pSorter
->aIter
[i
];
668 rc
= vdbeSorterIterInit(db
, pSorter
, pSorter
->iReadOff
, pIter
, &nByte
);
669 pSorter
->iReadOff
= pIter
->iEof
;
670 assert( rc
!=SQLITE_OK
|| pSorter
->iReadOff
<=pSorter
->iWriteOff
);
671 if( rc
!=SQLITE_OK
|| pSorter
->iReadOff
>=pSorter
->iWriteOff
) break;
674 /* Initialize the aTree[] array. */
675 for(i
=pSorter
->nTree
-1; rc
==SQLITE_OK
&& i
>0; i
--){
676 rc
= vdbeSorterDoCompare(pCsr
, i
);
684 ** Once the sorter has been populated, this function is called to prepare
685 ** for iterating through its contents in sorted order.
687 int sqlite3VdbeSorterRewind(sqlite3
*db
, VdbeCursor
*pCsr
, int *pbEof
){
688 VdbeSorter
*pSorter
= pCsr
->pSorter
;
689 int rc
; /* Return code */
690 sqlite3_file
*pTemp2
= 0; /* Second temp file to use */
691 i64 iWrite2
= 0; /* Write offset for pTemp2 */
692 int nIter
; /* Number of iterators used */
693 int nByte
; /* Bytes of space required for aIter/aTree */
694 int N
= 2; /* Power of 2 >= nIter */
698 /* If no data has been written to disk, then do not do so now. Instead,
699 ** sort the VdbeSorter.pRecord list. The vdbe layer will read data directly
700 ** from the in-memory list. */
701 if( pSorter
->nPMA
==0 ){
702 *pbEof
= !pSorter
->pRecord
;
703 assert( pSorter
->aTree
==0 );
704 return vdbeSorterSort(pCsr
);
707 /* Write the current b-tree to a PMA. Close the b-tree cursor. */
708 rc
= vdbeSorterListToPMA(db
, pCsr
);
709 if( rc
!=SQLITE_OK
) return rc
;
711 /* Allocate space for aIter[] and aTree[]. */
712 nIter
= pSorter
->nPMA
;
713 if( nIter
>SORTER_MAX_MERGE_COUNT
) nIter
= SORTER_MAX_MERGE_COUNT
;
715 while( N
<nIter
) N
+= N
;
716 nByte
= N
* (sizeof(int) + sizeof(VdbeSorterIter
));
717 pSorter
->aIter
= (VdbeSorterIter
*)sqlite3DbMallocZero(db
, nByte
);
718 if( !pSorter
->aIter
) return SQLITE_NOMEM
;
719 pSorter
->aTree
= (int *)&pSorter
->aIter
[N
];
723 int iNew
; /* Index of new, merged, PMA */
726 rc
==SQLITE_OK
&& iNew
*SORTER_MAX_MERGE_COUNT
<pSorter
->nPMA
;
729 i64 nWrite
; /* Number of bytes in new PMA */
731 /* If there are SORTER_MAX_MERGE_COUNT or less PMAs in file pTemp1,
732 ** initialize an iterator for each of them and break out of the loop.
733 ** These iterators will be incrementally merged as the VDBE layer calls
734 ** sqlite3VdbeSorterNext().
736 ** Otherwise, if pTemp1 contains more than SORTER_MAX_MERGE_COUNT PMAs,
737 ** initialize interators for SORTER_MAX_MERGE_COUNT of them. These PMAs
738 ** are merged into a single PMA that is written to file pTemp2.
740 rc
= vdbeSorterInitMerge(db
, pCsr
, &nWrite
);
741 assert( rc
!=SQLITE_OK
|| pSorter
->aIter
[ pSorter
->aTree
[1] ].pFile
);
742 if( rc
!=SQLITE_OK
|| pSorter
->nPMA
<=SORTER_MAX_MERGE_COUNT
){
746 /* Open the second temp file, if it is not already open. */
748 assert( iWrite2
==0 );
749 rc
= vdbeSorterOpenTempFile(db
, &pTemp2
);
753 rc
= vdbeSorterWriteVarint(pTemp2
, nWrite
, &iWrite2
);
758 while( rc
==SQLITE_OK
&& bEof
==0 ){
760 VdbeSorterIter
*pIter
= &pSorter
->aIter
[ pSorter
->aTree
[1] ];
761 assert( pIter
->pFile
);
762 nToWrite
= pIter
->nKey
+ sqlite3VarintLen(pIter
->nKey
);
763 rc
= sqlite3OsWrite(pTemp2
, pIter
->aAlloc
, nToWrite
, iWrite2
);
766 rc
= sqlite3VdbeSorterNext(db
, pCsr
, &bEof
);
772 if( pSorter
->nPMA
<=SORTER_MAX_MERGE_COUNT
){
775 sqlite3_file
*pTmp
= pSorter
->pTemp1
;
776 pSorter
->nPMA
= iNew
;
777 pSorter
->pTemp1
= pTemp2
;
779 pSorter
->iWriteOff
= iWrite2
;
780 pSorter
->iReadOff
= 0;
783 }while( rc
==SQLITE_OK
);
786 sqlite3OsCloseFree(pTemp2
);
788 *pbEof
= (pSorter
->aIter
[pSorter
->aTree
[1]].pFile
==0);
793 ** Advance to the next element in the sorter.
795 int sqlite3VdbeSorterNext(sqlite3
*db
, VdbeCursor
*pCsr
, int *pbEof
){
796 VdbeSorter
*pSorter
= pCsr
->pSorter
;
797 int rc
; /* Return code */
799 if( pSorter
->aTree
){
800 int iPrev
= pSorter
->aTree
[1];/* Index of iterator to advance */
801 int i
; /* Index of aTree[] to recalculate */
803 rc
= vdbeSorterIterNext(db
, &pSorter
->aIter
[iPrev
]);
804 for(i
=(pSorter
->nTree
+iPrev
)/2; rc
==SQLITE_OK
&& i
>0; i
=i
/2){
805 rc
= vdbeSorterDoCompare(pCsr
, i
);
808 *pbEof
= (pSorter
->aIter
[pSorter
->aTree
[1]].pFile
==0);
810 SorterRecord
*pFree
= pSorter
->pRecord
;
811 pSorter
->pRecord
= pFree
->pNext
;
813 vdbeSorterRecordFree(db
, pFree
);
814 *pbEof
= !pSorter
->pRecord
;
821 ** Return a pointer to a buffer owned by the sorter that contains the
824 static void *vdbeSorterRowkey(
825 VdbeSorter
*pSorter
, /* Sorter object */
826 int *pnKey
/* OUT: Size of current key in bytes */
829 if( pSorter
->aTree
){
830 VdbeSorterIter
*pIter
;
831 pIter
= &pSorter
->aIter
[ pSorter
->aTree
[1] ];
832 *pnKey
= pIter
->nKey
;
835 *pnKey
= pSorter
->pRecord
->nVal
;
836 pKey
= pSorter
->pRecord
->pVal
;
842 ** Copy the current sorter key into the memory cell pOut.
844 int sqlite3VdbeSorterRowkey(VdbeCursor
*pCsr
, Mem
*pOut
){
845 VdbeSorter
*pSorter
= pCsr
->pSorter
;
846 void *pKey
; int nKey
; /* Sorter key to copy into pOut */
848 pKey
= vdbeSorterRowkey(pSorter
, &nKey
);
849 if( sqlite3VdbeMemGrow(pOut
, nKey
, 0) ){
853 MemSetTypeFlag(pOut
, MEM_Blob
);
854 memcpy(pOut
->z
, pKey
, nKey
);
860 ** Compare the key in memory cell pVal with the key that the sorter cursor
861 ** passed as the first argument currently points to. For the purposes of
862 ** the comparison, ignore the rowid field at the end of each record.
864 ** If an error occurs, return an SQLite error code (i.e. SQLITE_NOMEM).
865 ** Otherwise, set *pRes to a negative, zero or positive value if the
866 ** key in pVal is smaller than, equal to or larger than the current sorter
869 int sqlite3VdbeSorterCompare(
870 VdbeCursor
*pCsr
, /* Sorter cursor */
871 Mem
*pVal
, /* Value to compare to current sorter key */
872 int *pRes
/* OUT: Result of comparison */
874 VdbeSorter
*pSorter
= pCsr
->pSorter
;
875 void *pKey
; int nKey
; /* Sorter key to compare pVal with */
877 pKey
= vdbeSorterRowkey(pSorter
, &nKey
);
878 vdbeSorterCompare(pCsr
, 1, pVal
->z
, pVal
->n
, pKey
, nKey
, pRes
);
882 #endif /* #ifndef SQLITE_OMIT_MERGE_SORT */