4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 *************************************************************************
13 ** This file contains code use to implement an in-memory rollback journal.
14 ** The in-memory rollback journal is used to journal transactions for
15 ** ":memory:" databases and when the journal_mode=MEMORY pragma is used.
17 ** Update: The in-memory journal is also used to temporarily cache
18 ** smaller journals that are not critical for power-loss recovery.
19 ** For example, statement journals that are not too big will be held
20 ** entirely in memory, thus reducing the number of file I/O calls, and
21 ** more importantly, reducing temporary file creation events. If these
22 ** journals become too large for memory, they are spilled to disk. But
23 ** in the common case, they are usually small and no file I/O needs to
26 #include "sqliteInt.h"
28 /* Forward references to internal structures */
29 typedef struct MemJournal MemJournal
;
30 typedef struct FilePoint FilePoint
;
31 typedef struct FileChunk FileChunk
;
34 ** The rollback journal is composed of a linked list of these structures.
36 ** The zChunk array is always at least 8 bytes in size - usually much more.
37 ** Its actual size is stored in the MemJournal.nChunkSize variable.
40 FileChunk
*pNext
; /* Next chunk in the journal */
41 u8 zChunk
[8]; /* Content of this chunk */
45 ** By default, allocate this many bytes of memory for each FileChunk object.
47 #define MEMJOURNAL_DFLT_FILECHUNKSIZE 1024
50 ** For chunk size nChunkSize, return the number of bytes that should
51 ** be allocated for each FileChunk structure.
53 #define fileChunkSize(nChunkSize) (sizeof(FileChunk) + ((nChunkSize)-8))
56 ** An instance of this object serves as a cursor into the rollback journal.
57 ** The cursor can be either for reading or writing.
60 sqlite3_int64 iOffset
; /* Offset from the beginning of the file */
61 FileChunk
*pChunk
; /* Specific chunk into which cursor points */
65 ** This structure is a subclass of sqlite3_file. Each open memory-journal
66 ** is an instance of this class.
69 const sqlite3_io_methods
*pMethod
; /* Parent class. MUST BE FIRST */
70 int nChunkSize
; /* In-memory chunk-size */
72 int nSpill
; /* Bytes of data before flushing */
73 FileChunk
*pFirst
; /* Head of in-memory chunk-list */
74 FilePoint endpoint
; /* Pointer to the end of the file */
75 FilePoint readpoint
; /* Pointer to the end of the last xRead() */
77 int flags
; /* xOpen flags */
78 sqlite3_vfs
*pVfs
; /* The "real" underlying VFS */
79 const char *zJournal
; /* Name of the journal file */
83 ** Read data from the in-memory journal file. This is the implementation
84 ** of the sqlite3_vfs.xRead method.
86 static int memjrnlRead(
87 sqlite3_file
*pJfd
, /* The journal file from which to read */
88 void *zBuf
, /* Put the results here */
89 int iAmt
, /* Number of bytes to read */
90 sqlite_int64 iOfst
/* Begin reading at this offset */
92 MemJournal
*p
= (MemJournal
*)pJfd
;
98 if( (iAmt
+iOfst
)>p
->endpoint
.iOffset
){
99 return SQLITE_IOERR_SHORT_READ
;
101 assert( p
->readpoint
.iOffset
==0 || p
->readpoint
.pChunk
!=0 );
102 if( p
->readpoint
.iOffset
!=iOfst
|| iOfst
==0 ){
103 sqlite3_int64 iOff
= 0;
104 for(pChunk
=p
->pFirst
;
105 ALWAYS(pChunk
) && (iOff
+p
->nChunkSize
)<=iOfst
;
108 iOff
+= p
->nChunkSize
;
111 pChunk
= p
->readpoint
.pChunk
;
115 iChunkOffset
= (int)(iOfst
%p
->nChunkSize
);
117 int iSpace
= p
->nChunkSize
- iChunkOffset
;
118 int nCopy
= MIN(nRead
, (p
->nChunkSize
- iChunkOffset
));
119 memcpy(zOut
, (u8
*)pChunk
->zChunk
+ iChunkOffset
, nCopy
);
123 } while( nRead
>=0 && (pChunk
=pChunk
->pNext
)!=0 && nRead
>0 );
124 p
->readpoint
.iOffset
= pChunk
? iOfst
+iAmt
: 0;
125 p
->readpoint
.pChunk
= pChunk
;
131 ** Free the list of FileChunk structures headed at MemJournal.pFirst.
133 static void memjrnlFreeChunks(FileChunk
*pFirst
){
136 for(pIter
=pFirst
; pIter
; pIter
=pNext
){
137 pNext
= pIter
->pNext
;
143 ** Flush the contents of memory to a real file on disk.
145 static int memjrnlCreateFile(MemJournal
*p
){
147 sqlite3_file
*pReal
= (sqlite3_file
*)p
;
148 MemJournal copy
= *p
;
150 memset(p
, 0, sizeof(MemJournal
));
151 rc
= sqlite3OsOpen(copy
.pVfs
, copy
.zJournal
, pReal
, copy
.flags
, 0);
153 int nChunk
= copy
.nChunkSize
;
156 for(pIter
=copy
.pFirst
; pIter
; pIter
=pIter
->pNext
){
157 if( iOff
+ nChunk
> copy
.endpoint
.iOffset
){
158 nChunk
= copy
.endpoint
.iOffset
- iOff
;
160 rc
= sqlite3OsWrite(pReal
, (u8
*)pIter
->zChunk
, nChunk
, iOff
);
165 /* No error has occurred. Free the in-memory buffers. */
166 memjrnlFreeChunks(copy
.pFirst
);
170 /* If an error occurred while creating or writing to the file, restore
171 ** the original before returning. This way, SQLite uses the in-memory
172 ** journal data to roll back changes made to the internal page-cache
173 ** before this function was called. */
174 sqlite3OsClose(pReal
);
181 /* Forward reference */
182 static int memjrnlTruncate(sqlite3_file
*pJfd
, sqlite_int64 size
);
185 ** Write data to the file.
187 static int memjrnlWrite(
188 sqlite3_file
*pJfd
, /* The journal file into which to write */
189 const void *zBuf
, /* Take data to be written from here */
190 int iAmt
, /* Number of bytes to write */
191 sqlite_int64 iOfst
/* Begin writing at this offset into the file */
193 MemJournal
*p
= (MemJournal
*)pJfd
;
195 u8
*zWrite
= (u8
*)zBuf
;
197 /* If the file should be created now, create it and write the new data
198 ** into the file on disk. */
199 if( p
->nSpill
>0 && (iAmt
+iOfst
)>p
->nSpill
){
200 int rc
= memjrnlCreateFile(p
);
202 rc
= sqlite3OsWrite(pJfd
, zBuf
, iAmt
, iOfst
);
207 /* If the contents of this write should be stored in memory */
209 /* An in-memory journal file should only ever be appended to. Random
210 ** access writes are not required. The only exception to this is when
211 ** the in-memory journal is being used by a connection using the
212 ** atomic-write optimization. In this case the first 28 bytes of the
213 ** journal file may be written as part of committing the transaction. */
214 assert( iOfst
<=p
->endpoint
.iOffset
);
215 if( iOfst
>0 && iOfst
!=p
->endpoint
.iOffset
){
216 memjrnlTruncate(pJfd
, iOfst
);
218 if( iOfst
==0 && p
->pFirst
){
219 assert( p
->nChunkSize
>iAmt
);
220 memcpy((u8
*)p
->pFirst
->zChunk
, zBuf
, iAmt
);
223 FileChunk
*pChunk
= p
->endpoint
.pChunk
;
224 int iChunkOffset
= (int)(p
->endpoint
.iOffset
%p
->nChunkSize
);
225 int iSpace
= MIN(nWrite
, p
->nChunkSize
- iChunkOffset
);
227 assert( pChunk
!=0 || iChunkOffset
==0 );
228 if( iChunkOffset
==0 ){
229 /* New chunk is required to extend the file. */
230 FileChunk
*pNew
= sqlite3_malloc(fileChunkSize(p
->nChunkSize
));
232 return SQLITE_IOERR_NOMEM_BKPT
;
237 pChunk
->pNext
= pNew
;
239 assert( !p
->pFirst
);
242 pChunk
= p
->endpoint
.pChunk
= pNew
;
246 memcpy((u8
*)pChunk
->zChunk
+ iChunkOffset
, zWrite
, iSpace
);
249 p
->endpoint
.iOffset
+= iSpace
;
258 ** Truncate the in-memory file.
260 static int memjrnlTruncate(sqlite3_file
*pJfd
, sqlite_int64 size
){
261 MemJournal
*p
= (MemJournal
*)pJfd
;
262 assert( p
->endpoint
.pChunk
==0 || p
->endpoint
.pChunk
->pNext
==0 );
263 if( size
<p
->endpoint
.iOffset
){
264 FileChunk
*pIter
= 0;
266 memjrnlFreeChunks(p
->pFirst
);
269 i64 iOff
= p
->nChunkSize
;
270 for(pIter
=p
->pFirst
; ALWAYS(pIter
) && iOff
<size
; pIter
=pIter
->pNext
){
271 iOff
+= p
->nChunkSize
;
274 memjrnlFreeChunks(pIter
->pNext
);
279 p
->endpoint
.pChunk
= pIter
;
280 p
->endpoint
.iOffset
= size
;
281 p
->readpoint
.pChunk
= 0;
282 p
->readpoint
.iOffset
= 0;
290 static int memjrnlClose(sqlite3_file
*pJfd
){
291 MemJournal
*p
= (MemJournal
*)pJfd
;
292 memjrnlFreeChunks(p
->pFirst
);
299 ** If the real file has been created, call its xSync method. Otherwise,
300 ** syncing an in-memory journal is a no-op.
302 static int memjrnlSync(sqlite3_file
*pJfd
, int flags
){
303 UNUSED_PARAMETER2(pJfd
, flags
);
308 ** Query the size of the file in bytes.
310 static int memjrnlFileSize(sqlite3_file
*pJfd
, sqlite_int64
*pSize
){
311 MemJournal
*p
= (MemJournal
*)pJfd
;
312 *pSize
= (sqlite_int64
) p
->endpoint
.iOffset
;
317 ** Table of methods for MemJournal sqlite3_file object.
319 static const struct sqlite3_io_methods MemJournalMethods
= {
321 memjrnlClose
, /* xClose */
322 memjrnlRead
, /* xRead */
323 memjrnlWrite
, /* xWrite */
324 memjrnlTruncate
, /* xTruncate */
325 memjrnlSync
, /* xSync */
326 memjrnlFileSize
, /* xFileSize */
329 0, /* xCheckReservedLock */
330 0, /* xFileControl */
332 0, /* xDeviceCharacteristics */
342 ** Open a journal file.
344 ** The behaviour of the journal file depends on the value of parameter
345 ** nSpill. If nSpill is 0, then the journal file is always create and
346 ** accessed using the underlying VFS. If nSpill is less than zero, then
347 ** all content is always stored in main-memory. Finally, if nSpill is a
348 ** positive value, then the journal file is initially created in-memory
349 ** but may be flushed to disk later on. In this case the journal file is
350 ** flushed to disk either when it grows larger than nSpill bytes in size,
351 ** or when sqlite3JournalCreate() is called.
353 int sqlite3JournalOpen(
354 sqlite3_vfs
*pVfs
, /* The VFS to use for actual file I/O */
355 const char *zName
, /* Name of the journal file */
356 sqlite3_file
*pJfd
, /* Preallocated, blank file handle */
357 int flags
, /* Opening flags */
358 int nSpill
/* Bytes buffered before opening the file */
360 MemJournal
*p
= (MemJournal
*)pJfd
;
362 assert( zName
|| nSpill
<0 || (flags
& SQLITE_OPEN_EXCLUSIVE
) );
364 /* Zero the file-handle object. If nSpill was passed zero, initialize
365 ** it using the sqlite3OsOpen() function of the underlying VFS. In this
366 ** case none of the code in this module is executed as a result of calls
367 ** made on the journal file-handle. */
368 memset(p
, 0, sizeof(MemJournal
));
370 return sqlite3OsOpen(pVfs
, zName
, pJfd
, flags
, 0);
374 p
->nChunkSize
= nSpill
;
376 p
->nChunkSize
= 8 + MEMJOURNAL_DFLT_FILECHUNKSIZE
- sizeof(FileChunk
);
377 assert( MEMJOURNAL_DFLT_FILECHUNKSIZE
==fileChunkSize(p
->nChunkSize
) );
380 pJfd
->pMethods
= (const sqlite3_io_methods
*)&MemJournalMethods
;
389 ** Open an in-memory journal file.
391 void sqlite3MemJournalOpen(sqlite3_file
*pJfd
){
392 sqlite3JournalOpen(0, 0, pJfd
, 0, -1);
395 #if defined(SQLITE_ENABLE_ATOMIC_WRITE) \
396 || defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE)
398 ** If the argument p points to a MemJournal structure that is not an
399 ** in-memory-only journal file (i.e. is one that was opened with a +ve
400 ** nSpill parameter or as SQLITE_OPEN_MAIN_JOURNAL), and the underlying
401 ** file has not yet been created, create it now.
403 int sqlite3JournalCreate(sqlite3_file
*pJfd
){
405 MemJournal
*p
= (MemJournal
*)pJfd
;
406 if( pJfd
->pMethods
==&MemJournalMethods
&& (
407 #ifdef SQLITE_ENABLE_ATOMIC_WRITE
410 /* While this appears to not be possible without ATOMIC_WRITE, the
411 ** paths are complex, so it seems prudent to leave the test in as
412 ** a NEVER(), in case our analysis is subtly flawed. */
415 #ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE
416 || (p
->flags
& SQLITE_OPEN_MAIN_JOURNAL
)
419 rc
= memjrnlCreateFile(p
);
426 ** The file-handle passed as the only argument is open on a journal file.
427 ** Return true if this "journal file" is currently stored in heap memory,
428 ** or false otherwise.
430 int sqlite3JournalIsInMemory(sqlite3_file
*p
){
431 return p
->pMethods
==&MemJournalMethods
;
435 ** Return the number of bytes required to store a JournalFile that uses vfs
436 ** pVfs to create the underlying on-disk files.
438 int sqlite3JournalSize(sqlite3_vfs
*pVfs
){
439 return MAX(pVfs
->szOsFile
, (int)sizeof(MemJournal
));