vfs: check userland buffers before reading them.
[haiku.git] / src / add-ons / kernel / file_systems / ext2 / Journal.cpp
blob26030baebde6fb4463d6aa7a64bce552308500b4
1 /*
2 * Copyright 2010, Haiku Inc. All rights reserved.
3 * Copyright 2001-2010, Axel Dörfler, axeld@pinc-software.de.
4 * This file may be used under the terms of the MIT License.
6 * Authors:
7 * Janito V. Ferreira Filho
8 */
11 #include "Journal.h"
13 #include <new>
14 #include <string.h>
15 #include <unistd.h>
17 #include <fs_cache.h>
19 #include "CachedBlock.h"
20 #include "HashRevokeManager.h"
23 //#define TRACE_EXT2
24 #ifdef TRACE_EXT2
25 # define TRACE(x...) dprintf("\33[34mext2:\33[0m " x)
26 #else
27 # define TRACE(x...) ;
28 #endif
29 #define ERROR(x...) dprintf("\33[34mext2:\33[0m " x)
30 #define WARN(x...) dprintf("\33[34mext2:\33[0m " x)
33 class LogEntry : public DoublyLinkedListLinkImpl<LogEntry> {
34 public:
35 LogEntry(Journal* journal, uint32 logStart,
36 uint32 length);
37 ~LogEntry();
39 uint32 Start() const { return fStart; }
40 uint32 CommitID() const { return fCommitID; }
42 Journal* GetJournal() { return fJournal; }
44 private:
45 Journal* fJournal;
46 uint32 fStart;
47 uint32 fCommitID;
51 LogEntry::LogEntry(Journal* journal, uint32 logStart, uint32 commitID)
53 fJournal(journal),
54 fStart(logStart),
55 fCommitID(commitID)
60 LogEntry::~LogEntry()
65 void
66 JournalHeader::MakeDescriptor(uint32 sequence)
68 this->magic = B_HOST_TO_BENDIAN_INT32(JOURNAL_MAGIC);
69 this->sequence = B_HOST_TO_BENDIAN_INT32(sequence);
70 this->block_type = B_HOST_TO_BENDIAN_INT32(JOURNAL_DESCRIPTOR_BLOCK);
74 void
75 JournalHeader::MakeCommit(uint32 sequence)
77 this->magic = B_HOST_TO_BENDIAN_INT32(JOURNAL_MAGIC);
78 this->sequence = B_HOST_TO_BENDIAN_INT32(sequence);
79 this->block_type = B_HOST_TO_BENDIAN_INT32(JOURNAL_COMMIT_BLOCK);
83 Journal::Journal(Volume* fsVolume, Volume* jVolume)
85 fJournalVolume(jVolume),
86 fJournalBlockCache(jVolume->BlockCache()),
87 fFilesystemVolume(fsVolume),
88 fFilesystemBlockCache(fsVolume->BlockCache()),
89 fRevokeManager(NULL),
90 fInitStatus(B_OK),
91 fBlockSize(sizeof(JournalSuperBlock)),
92 fFirstCommitID(0),
93 fFirstCacheCommitID(0),
94 fFirstLogBlock(1),
95 fLogSize(0),
96 fVersion(0),
97 fLogStart(0),
98 fLogEnd(0),
99 fFreeBlocks(0),
100 fMaxTransactionSize(0),
101 fCurrentCommitID(0),
102 fHasSubTransaction(false),
103 fSeparateSubTransactions(false),
104 fUnwrittenTransactions(0),
105 fTransactionID(0)
107 recursive_lock_init(&fLock, "ext2 journal");
108 mutex_init(&fLogEntriesLock, "ext2 journal log entries");
110 HashRevokeManager* revokeManager = new(std::nothrow) HashRevokeManager;
111 TRACE("Journal::Journal(): Allocated a hash revoke manager at %p\n",
112 revokeManager);
114 if (revokeManager == NULL)
115 fInitStatus = B_NO_MEMORY;
116 else {
117 fInitStatus = revokeManager->Init();
119 if (fInitStatus == B_OK) {
120 fRevokeManager = revokeManager;
121 fInitStatus = _LoadSuperBlock();
122 } else
123 delete revokeManager;
128 Journal::Journal()
130 fJournalVolume(NULL),
131 fJournalBlockCache(NULL),
132 fFilesystemVolume(NULL),
133 fFilesystemBlockCache(NULL),
134 fRevokeManager(NULL),
135 fInitStatus(B_OK),
136 fBlockSize(sizeof(JournalSuperBlock)),
137 fFirstCommitID(0),
138 fFirstCacheCommitID(0),
139 fFirstLogBlock(1),
140 fLogSize(0),
141 fVersion(0),
142 fIsStarted(false),
143 fLogStart(0),
144 fLogEnd(0),
145 fFreeBlocks(0),
146 fMaxTransactionSize(0),
147 fCurrentCommitID(0),
148 fHasSubTransaction(false),
149 fSeparateSubTransactions(false),
150 fUnwrittenTransactions(0),
151 fTransactionID(0)
153 recursive_lock_init(&fLock, "ext2 journal");
154 mutex_init(&fLogEntriesLock, "ext2 journal log entries");
158 Journal::~Journal()
160 TRACE("Journal destructor.\n");
162 TRACE("Journal::~Journal(): Attempting to delete revoke manager at %p\n",
163 fRevokeManager);
164 delete fRevokeManager;
166 recursive_lock_destroy(&fLock);
167 mutex_destroy(&fLogEntriesLock);
171 status_t
172 Journal::InitCheck()
174 return fInitStatus;
178 status_t
179 Journal::Uninit()
181 if (!fIsStarted)
182 return B_OK;
184 status_t status = FlushLogAndBlocks();
186 if (status == B_OK) {
187 // Mark journal as clean
188 fLogStart = 0;
189 status = _SaveSuperBlock();
192 fIsStarted = false;
194 return status;
198 /*virtual*/ status_t
199 Journal::StartLog()
201 fLogStart = fFirstLogBlock;
202 fLogEnd = fFirstLogBlock;
203 fFreeBlocks = 0;
204 fIsStarted = true;
206 fCurrentCommitID = fFirstCommitID;
208 return _SaveSuperBlock();
212 status_t
213 Journal::RestartLog()
215 fFirstCommitID = 1;
217 return B_OK;
221 /*virtual*/ status_t
222 Journal::Lock(Transaction* owner, bool separateSubTransactions)
224 TRACE("Journal::Lock()\n");
225 status_t status = recursive_lock_lock(&fLock);
226 if (status != B_OK)
227 return status;
229 TRACE("Journal::Lock(): Aquired lock\n");
231 if (!fSeparateSubTransactions && recursive_lock_get_recursion(&fLock) > 1) {
232 // reuse current transaction
233 TRACE("Journal::Lock(): Reusing current transaction\n");
234 return B_OK;
237 if(separateSubTransactions)
238 fSeparateSubTransactions = true;
240 if (owner != NULL)
241 owner->SetParent(fOwner);
243 fOwner = owner;
245 if (fOwner != NULL) {
246 if (fUnwrittenTransactions > 0) {
247 // start a sub transaction
248 TRACE("Journal::Lock(): Starting sub transaction\n");
249 cache_start_sub_transaction(fFilesystemBlockCache, fTransactionID);
250 fHasSubTransaction = true;
251 } else {
252 TRACE("Journal::Lock(): Starting new transaction\n");
253 fTransactionID = cache_start_transaction(fFilesystemBlockCache);
256 if (fTransactionID < B_OK) {
257 recursive_lock_unlock(&fLock);
258 return fTransactionID;
261 cache_add_transaction_listener(fFilesystemBlockCache, fTransactionID,
262 TRANSACTION_IDLE, _TransactionIdle, this);
265 return B_OK;
269 /*virtual*/ status_t
270 Journal::Unlock(Transaction* owner, bool success)
272 TRACE("Journal::Unlock(): Lock recursion: %" B_PRId32 "\n",
273 recursive_lock_get_recursion(&fLock));
274 if (fSeparateSubTransactions
275 || recursive_lock_get_recursion(&fLock) == 1) {
276 // we only end the transaction if we unlock it
277 if (owner != NULL) {
278 TRACE("Journal::Unlock(): Calling _TransactionDone\n");
279 status_t status = _TransactionDone(success);
280 if (status != B_OK)
281 return status;
283 TRACE("Journal::Unlock(): Returned from _TransactionDone\n");
284 bool separateSubTransactions = fSeparateSubTransactions;
285 fSeparateSubTransactions = true;
286 TRACE("Journal::Unlock(): Notifying listeners for: %p\n", owner);
287 owner->NotifyListeners(success);
288 TRACE("Journal::Unlock(): Done notifying listeners\n");
289 fSeparateSubTransactions = separateSubTransactions;
291 fOwner = owner->Parent();
292 } else
293 fOwner = NULL;
295 if (fSeparateSubTransactions
296 && recursive_lock_get_recursion(&fLock) == 1)
297 fSeparateSubTransactions = false;
298 } else
299 owner->MoveListenersTo(fOwner);
301 TRACE("Journal::Unlock(): Unlocking the lock\n");
303 recursive_lock_unlock(&fLock);
304 return B_OK;
308 status_t
309 Journal::MapBlock(off_t logical, fsblock_t& physical)
311 TRACE("Journal::MapBlock()\n");
312 physical = logical;
314 return B_OK;
318 inline uint32
319 Journal::FreeLogBlocks() const
321 TRACE("Journal::FreeLogBlocks(): start: %" B_PRIu32 ", end: %" B_PRIu32
322 ", size: %" B_PRIu32 "\n", fLogStart, fLogEnd, fLogSize);
323 return fLogStart <= fLogEnd
324 ? fLogSize - fLogEnd + fLogStart - 1
325 : fLogStart - fLogEnd;
329 status_t
330 Journal::FlushLogAndBlocks()
332 return _FlushLog(true, true);
336 int32
337 Journal::TransactionID() const
339 return fTransactionID;
343 status_t
344 Journal::_WritePartialTransactionToLog(JournalHeader* descriptorBlock,
345 bool detached, uint8** _escapedData, uint32 &logBlock, off_t& blockNumber,
346 long& cookie, ArrayDeleter<uint8>& escapedDataDeleter, uint32& blockCount,
347 bool& finished)
349 TRACE("Journal::_WritePartialTransactionToLog()\n");
351 uint32 descriptorBlockPos = logBlock;
352 uint8* escapedData = *_escapedData;
354 JournalBlockTag* tag = (JournalBlockTag*)descriptorBlock->data;
355 JournalBlockTag* lastTag = (JournalBlockTag*)((uint8*)descriptorBlock
356 + fBlockSize - sizeof(JournalHeader));
358 finished = false;
359 status_t status = B_OK;
361 while (tag < lastTag && status == B_OK) {
362 tag->SetBlockNumber(blockNumber);
363 tag->SetFlags(0);
365 CachedBlock data(fFilesystemVolume);
366 const JournalHeader* blockData = (JournalHeader*)data.SetTo(
367 blockNumber);
368 if (blockData == NULL) {
369 panic("Got a NULL pointer while iterating through transaction "
370 "blocks.\n");
371 return B_ERROR;
374 void* finalData;
376 if (blockData->CheckMagic()) {
377 // The journaled block starts with the magic value
378 // We must remove it to prevent confusion
379 TRACE("Journal::_WritePartialTransactionToLog(): Block starts with "
380 "magic number. Escaping it\n");
381 tag->SetEscapedFlag();
383 if (escapedData == NULL) {
384 TRACE("Journal::_WritePartialTransactionToLog(): Allocating "
385 "space for escaped block (%" B_PRIu32 ")\n", fBlockSize);
386 escapedData = new(std::nothrow) uint8[fBlockSize];
387 if (escapedData == NULL) {
388 TRACE("Journal::_WritePartialTransactionToLof(): Failed to "
389 "allocate buffer for escaped data block\n");
390 return B_NO_MEMORY;
392 escapedDataDeleter.SetTo(escapedData);
393 *_escapedData = escapedData;
395 ((int32*)escapedData)[0] = 0; // Remove magic
398 memcpy(escapedData + 4, blockData->data, fBlockSize - 4);
399 finalData = escapedData;
400 } else
401 finalData = (void*)blockData;
403 // TODO: use iovecs?
405 logBlock = _WrapAroundLog(logBlock + 1);
407 fsblock_t physicalBlock;
408 status = MapBlock(logBlock, physicalBlock);
409 if (status != B_OK)
410 return status;
412 off_t logOffset = physicalBlock * fBlockSize;
414 TRACE("Journal::_WritePartialTransactionToLog(): Writing from memory: "
415 "%p, to disk: %" B_PRIdOFF "\n", finalData, logOffset);
416 size_t written = write_pos(fJournalVolume->Device(), logOffset,
417 finalData, fBlockSize);
418 if (written != fBlockSize) {
419 TRACE("Failed to write journal block.\n");
420 return B_IO_ERROR;
423 TRACE("Journal::_WritePartialTransactionToLog(): Wrote a journal block "
424 "at: %" B_PRIu32 "\n", logBlock);
426 blockCount++;
427 tag++;
429 status = cache_next_block_in_transaction(fFilesystemBlockCache,
430 fTransactionID, detached, &cookie, &blockNumber, NULL, NULL);
433 finished = status != B_OK;
435 // Write descriptor block
436 --tag;
437 tag->SetLastTagFlag();
439 fsblock_t physicalBlock;
440 status = MapBlock(descriptorBlockPos, physicalBlock);
441 if (status != B_OK)
442 return status;
444 off_t descriptorBlockOffset = physicalBlock * fBlockSize;
446 TRACE("Journal::_WritePartialTransactionToLog(): Writing to: %" B_PRIdOFF
447 "\n", descriptorBlockOffset);
448 size_t written = write_pos(fJournalVolume->Device(),
449 descriptorBlockOffset, descriptorBlock, fBlockSize);
450 if (written != fBlockSize) {
451 TRACE("Failed to write journal descriptor block.\n");
452 return B_IO_ERROR;
455 blockCount++;
456 logBlock = _WrapAroundLog(logBlock + 1);
458 return B_OK;
462 status_t
463 Journal::_WriteTransactionToLog()
465 TRACE("Journal::_WriteTransactionToLog()\n");
466 // Transaction enters the Flush state
467 bool detached = false;
468 TRACE("Journal::_WriteTransactionToLog(): Attempting to get transaction "
469 "size\n");
470 size_t size = _FullTransactionSize();
471 TRACE("Journal::_WriteTransactionToLog(): transaction size: %" B_PRIuSIZE
472 "\n", size);
474 if (size > fMaxTransactionSize) {
475 TRACE("Journal::_WriteTransactionToLog(): not enough free space "
476 "for the transaction. Attempting to free some space.\n");
477 size = _MainTransactionSize();
478 TRACE("Journal::_WriteTransactionToLog(): main transaction size: %"
479 B_PRIuSIZE "\n", size);
481 if(fHasSubTransaction && size < fMaxTransactionSize) {
482 TRACE("Journal::_WriteTransactionToLog(): transaction doesn't fit, "
483 "but it can be separated\n");
484 detached = true;
485 } else {
486 // Error: transaction can't fit in log
487 panic("transaction too large (size: %" B_PRIuSIZE ", max size: %"
488 B_PRIu32 ", log size: %" B_PRIu32 ")\n", size,
489 fMaxTransactionSize, fLogSize);
490 return B_BUFFER_OVERFLOW;
494 TRACE("Journal::_WriteTransactionToLog(): free log blocks: %" B_PRIu32
495 "\n", FreeLogBlocks());
496 if (size > FreeLogBlocks()) {
497 TRACE("Journal::_WriteTransactionToLog(): Syncing block cache\n");
498 cache_sync_transaction(fFilesystemBlockCache, fTransactionID);
500 if (size > FreeLogBlocks()) {
501 panic("Transaction fits, but sync didn't result in enough"
502 "free space.\n\tGot %" B_PRIu32 " when at least %" B_PRIuSIZE
503 " was expected.", FreeLogBlocks(), size);
507 TRACE("Journal::_WriteTransactionToLog(): finished managing space for "
508 "the transaction\n");
510 fHasSubTransaction = false;
511 if (!fIsStarted)
512 StartLog();
514 // Prepare Descriptor block
515 TRACE("Journal::_WriteTransactionToLog(): attempting to allocate space for "
516 "the descriptor block, block size %" B_PRIu32 "\n", fBlockSize);
517 JournalHeader* descriptorBlock =
518 (JournalHeader*)new(std::nothrow) uint8[fBlockSize];
519 if (descriptorBlock == NULL) {
520 TRACE("Journal::_WriteTransactionToLog(): Failed to allocate a buffer "
521 "for the descriptor block\n");
522 return B_NO_MEMORY;
524 ArrayDeleter<uint8> descriptorBlockDeleter((uint8*)descriptorBlock);
526 descriptorBlock->MakeDescriptor(fCurrentCommitID);
528 // Prepare Commit block
529 TRACE("Journal::_WriteTransactionToLog(): attempting to allocate space for "
530 "the commit block, block size %" B_PRIu32 "\n", fBlockSize);
531 JournalHeader* commitBlock =
532 (JournalHeader*)new(std::nothrow) uint8[fBlockSize];
533 if (descriptorBlock == NULL) {
534 TRACE("Journal::_WriteTransactionToLog(): Failed to allocate a buffer "
535 "for the commit block\n");
536 return B_NO_MEMORY;
538 ArrayDeleter<uint8> commitBlockDeleter((uint8*)commitBlock);
540 commitBlock->MakeCommit(fCurrentCommitID + 1);
541 memset(commitBlock->data, 0, fBlockSize - sizeof(JournalHeader));
542 // TODO: This probably isn't necessary
544 uint8* escapedData = NULL;
545 ArrayDeleter<uint8> escapedDataDeleter;
547 off_t blockNumber;
548 long cookie = 0;
550 status_t status = cache_next_block_in_transaction(fFilesystemBlockCache,
551 fTransactionID, detached, &cookie, &blockNumber, NULL, NULL);
552 if (status != B_OK) {
553 TRACE("Journal::_WriteTransactionToLog(): Transaction has no blocks to "
554 "write\n");
555 return B_OK;
558 uint32 blockCount = 0;
560 uint32 logBlock = _WrapAroundLog(fLogEnd);
562 bool finished = false;
564 status = _WritePartialTransactionToLog(descriptorBlock, detached,
565 &escapedData, logBlock, blockNumber, cookie, escapedDataDeleter,
566 blockCount, finished);
567 if (!finished && status != B_OK)
568 return status;
570 uint32 commitBlockPos = logBlock;
572 while (!finished) {
573 descriptorBlock->IncrementSequence();
575 status = _WritePartialTransactionToLog(descriptorBlock, detached,
576 &escapedData, logBlock, blockNumber, cookie, escapedDataDeleter,
577 blockCount, finished);
578 if (!finished && status != B_OK)
579 return status;
581 // It is okay to write the commit blocks of the partial transactions
582 // as long as the commit block of the first partial transaction isn't
583 // written. When it recovery reaches where the first commit should be
584 // and doesn't find it, it considers it found the end of the log.
586 fsblock_t physicalBlock;
587 status = MapBlock(logBlock, physicalBlock);
588 if (status != B_OK)
589 return status;
591 off_t logOffset = physicalBlock * fBlockSize;
593 TRACE("Journal::_WriteTransactionToLog(): Writting commit block to "
594 "%" B_PRIdOFF "\n", logOffset);
595 off_t written = write_pos(fJournalVolume->Device(), logOffset,
596 commitBlock, fBlockSize);
597 if (written != fBlockSize) {
598 TRACE("Failed to write journal commit block.\n");
599 return B_IO_ERROR;
602 commitBlock->IncrementSequence();
603 blockCount++;
605 logBlock = _WrapAroundLog(logBlock + 1);
608 // Transaction will enter the Commit state
609 fsblock_t physicalBlock;
610 status = MapBlock(commitBlockPos, physicalBlock);
611 if (status != B_OK)
612 return status;
614 off_t logOffset = physicalBlock * fBlockSize;
616 TRACE("Journal::_WriteTransactionToLog(): Writing to: %" B_PRIdOFF "\n",
617 logOffset);
618 off_t written = write_pos(fJournalVolume->Device(), logOffset, commitBlock,
619 fBlockSize);
620 if (written != fBlockSize) {
621 TRACE("Failed to write journal commit block.\n");
622 return B_IO_ERROR;
625 blockCount++;
626 fLogEnd = _WrapAroundLog(fLogEnd + blockCount);
628 status = _SaveSuperBlock();
630 // Transaction will enter Finished state
631 LogEntry *logEntry = new LogEntry(this, fLogEnd, fCurrentCommitID++);
632 TRACE("Journal::_WriteTransactionToLog(): Allocating log entry at %p\n",
633 logEntry);
634 if (logEntry == NULL) {
635 panic("no memory to allocate log entries!");
636 return B_NO_MEMORY;
639 mutex_lock(&fLogEntriesLock);
640 fLogEntries.Add(logEntry);
641 mutex_unlock(&fLogEntriesLock);
643 if (detached) {
644 fTransactionID = cache_detach_sub_transaction(fFilesystemBlockCache,
645 fTransactionID, _TransactionWritten, logEntry);
646 fUnwrittenTransactions = 1;
648 if (status == B_OK && _FullTransactionSize() > fLogSize) {
649 // If the transaction is too large after writing, there is no way to
650 // recover, so let this transaction fail.
651 ERROR("transaction too large (%" B_PRIuSIZE " blocks, log size %"
652 B_PRIu32 ")!\n", _FullTransactionSize(), fLogSize);
653 return B_BUFFER_OVERFLOW;
655 } else {
656 cache_end_transaction(fFilesystemBlockCache, fTransactionID,
657 _TransactionWritten, logEntry);
658 fUnwrittenTransactions = 0;
661 return B_OK;
665 status_t
666 Journal::_SaveSuperBlock()
668 TRACE("Journal::_SaveSuperBlock()\n");
669 fsblock_t physicalBlock;
670 status_t status = MapBlock(0, physicalBlock);
671 if (status != B_OK)
672 return status;
674 off_t superblockPos = physicalBlock * fBlockSize;
676 JournalSuperBlock superblock;
677 size_t bytesRead = read_pos(fJournalVolume->Device(), superblockPos,
678 &superblock, sizeof(superblock));
680 if (bytesRead != sizeof(superblock))
681 return B_IO_ERROR;
683 superblock.SetFirstCommitID(fFirstCommitID);
684 superblock.SetLogStart(fLogStart);
686 TRACE("Journal::SaveSuperBlock(): Write to %" B_PRIdOFF "\n",
687 superblockPos);
688 size_t bytesWritten = write_pos(fJournalVolume->Device(), superblockPos,
689 &superblock, sizeof(superblock));
691 if (bytesWritten != sizeof(superblock))
692 return B_IO_ERROR;
694 TRACE("Journal::_SaveSuperBlock(): Done\n");
696 return B_OK;
700 status_t
701 Journal::_LoadSuperBlock()
703 TRACE("Journal::_LoadSuperBlock()\n");
704 fsblock_t superblockPos;
706 status_t status = MapBlock(0, superblockPos);
707 if (status != B_OK)
708 return status;
710 TRACE("Journal::_LoadSuperBlock(): superblock physical block: %" B_PRIu64
711 "\n", superblockPos);
713 JournalSuperBlock superblock;
714 size_t bytesRead = read_pos(fJournalVolume->Device(), superblockPos
715 * fJournalVolume->BlockSize(), &superblock, sizeof(superblock));
717 if (bytesRead != sizeof(superblock)) {
718 ERROR("Journal::_LoadSuperBlock(): failed to read superblock\n");
719 return B_IO_ERROR;
722 if (!superblock.header.CheckMagic()) {
723 ERROR("Journal::_LoadSuperBlock(): Invalid superblock magic %" B_PRIx32
724 "\n", superblock.header.Magic());
725 return B_BAD_VALUE;
728 if (superblock.header.BlockType() == JOURNAL_SUPERBLOCK_V1) {
729 TRACE("Journal::_LoadSuperBlock(): Journal superblock version 1\n");
730 fVersion = 1;
731 } else if (superblock.header.BlockType() == JOURNAL_SUPERBLOCK_V2) {
732 TRACE("Journal::_LoadSuperBlock(): Journal superblock version 2\n");
733 fVersion = 2;
734 } else {
735 ERROR("Journal::_LoadSuperBlock(): Invalid superblock version\n");
736 return B_BAD_VALUE;
739 if (fVersion >= 2) {
740 status = _CheckFeatures(&superblock);
742 if (status != B_OK) {
743 ERROR("Journal::_LoadSuperBlock(): Unsupported features\n");
744 return status;
748 fBlockSize = superblock.BlockSize();
749 fFirstCommitID = superblock.FirstCommitID();
750 fFirstLogBlock = superblock.FirstLogBlock();
751 fLogStart = superblock.LogStart();
752 fLogSize = superblock.NumBlocks();
754 uint32 descriptorTags = (fBlockSize - sizeof(JournalHeader))
755 / sizeof(JournalBlockTag);
756 // Maximum tags per descriptor block
757 uint32 maxDescriptors = (fLogSize - 1) / (descriptorTags + 2);
758 // Maximum number of full journal transactions
759 fMaxTransactionSize = maxDescriptors * descriptorTags;
760 fMaxTransactionSize += (fLogSize - 1) - fMaxTransactionSize - 2;
761 // Maximum size of a "logical" transaction
762 // TODO: Why is "superblock.MaxTransactionBlocks();" zero?
763 //fFirstCacheCommitID = fFirstCommitID - fTransactionID /*+ 1*/;
765 TRACE("Journal::_LoadSuperBlock(): block size: %" B_PRIu32 ", first commit"
766 " id: %" B_PRIu32 ", first log block: %" B_PRIu32 ", log start: %"
767 B_PRIu32 ", log size: %" B_PRIu32 ", max transaction size: %" B_PRIu32
768 "\n", fBlockSize, fFirstCommitID, fFirstLogBlock, fLogStart,
769 fLogSize, fMaxTransactionSize);
771 return B_OK;
775 status_t
776 Journal::_CheckFeatures(JournalSuperBlock* superblock)
778 if ((superblock->ReadOnlyCompatibleFeatures()
779 & ~JOURNAL_KNOWN_READ_ONLY_COMPATIBLE_FEATURES) != 0
780 || (superblock->IncompatibleFeatures()
781 & ~JOURNAL_KNOWN_INCOMPATIBLE_FEATURES) != 0)
782 return B_UNSUPPORTED;
784 return B_OK;
788 uint32
789 Journal::_CountTags(JournalHeader* descriptorBlock)
791 uint32 count = 0;
793 JournalBlockTag* tags = (JournalBlockTag*)descriptorBlock->data;
794 // Skip the header
795 JournalBlockTag* lastTag = (JournalBlockTag*)
796 (descriptorBlock + fBlockSize - sizeof(JournalBlockTag));
798 while (tags < lastTag && (tags->Flags() & JOURNAL_FLAG_LAST_TAG) == 0) {
799 if ((tags->Flags() & JOURNAL_FLAG_SAME_UUID) == 0) {
800 // sizeof(UUID) = 16 = 2*sizeof(JournalBlockTag)
801 tags += 2; // Skip new UUID
804 TRACE("Journal::_CountTags(): Tag block: %" B_PRIu32 "\n",
805 tags->BlockNumber());
807 tags++; // Go to next tag
808 count++;
811 if ((tags->Flags() & JOURNAL_FLAG_LAST_TAG) != 0)
812 count++;
814 TRACE("Journal::_CountTags(): counted tags: %" B_PRIu32 "\n", count);
816 return count;
820 /*virtual*/ status_t
821 Journal::Recover()
823 TRACE("Journal::Recover()\n");
824 if (fLogStart == 0) // Journal was cleanly unmounted
825 return B_OK;
827 TRACE("Journal::Recover(): Journal needs recovery\n");
829 uint32 lastCommitID;
831 status_t status = _RecoverPassScan(lastCommitID);
832 if (status != B_OK)
833 return status;
835 status = _RecoverPassRevoke(lastCommitID);
836 if (status != B_OK)
837 return status;
839 return _RecoverPassReplay(lastCommitID);
843 // First pass: Find the end of the log
844 status_t
845 Journal::_RecoverPassScan(uint32& lastCommitID)
847 TRACE("Journal Recover: 1st Pass: Scan\n");
849 CachedBlock cached(fJournalVolume);
850 JournalHeader* header;
851 uint32 nextCommitID = fFirstCommitID;
852 uint32 nextBlock = fLogStart;
853 fsblock_t nextBlockPos;
855 status_t status = MapBlock(nextBlock, nextBlockPos);
856 if (status != B_OK)
857 return status;
859 header = (JournalHeader*)cached.SetTo(nextBlockPos);
861 while (header->CheckMagic() && header->Sequence() == nextCommitID) {
862 uint32 blockType = header->BlockType();
864 if (blockType == JOURNAL_DESCRIPTOR_BLOCK) {
865 uint32 tags = _CountTags(header);
866 nextBlock += tags;
867 TRACE("Journal recover pass scan: Found a descriptor block with "
868 "%" B_PRIu32 " tags\n", tags);
869 } else if (blockType == JOURNAL_COMMIT_BLOCK) {
870 nextCommitID++;
871 TRACE("Journal recover pass scan: Found a commit block. Next "
872 "commit ID: %" B_PRIu32 "\n", nextCommitID);
873 } else if (blockType != JOURNAL_REVOKE_BLOCK) {
874 TRACE("Journal recover pass scan: Reached an unrecognized block, "
875 "assuming as log's end.\n");
876 break;
877 } else {
878 TRACE("Journal recover pass scan: Found a revoke block, "
879 "skipping it\n");
882 nextBlock = _WrapAroundLog(nextBlock + 1);
884 status = MapBlock(nextBlock, nextBlockPos);
885 if (status != B_OK)
886 return status;
888 header = (JournalHeader*)cached.SetTo(nextBlockPos);
891 TRACE("Journal Recovery pass scan: Last detected transaction ID: %"
892 B_PRIu32 "\n", nextCommitID);
894 lastCommitID = nextCommitID;
895 return B_OK;
899 // Second pass: Collect all revoked blocks
900 status_t
901 Journal::_RecoverPassRevoke(uint32 lastCommitID)
903 TRACE("Journal Recover: 2nd Pass: Revoke\n");
905 CachedBlock cached(fJournalVolume);
906 JournalHeader* header;
907 uint32 nextCommitID = fFirstCommitID;
908 uint32 nextBlock = fLogStart;
909 fsblock_t nextBlockPos;
911 status_t status = MapBlock(nextBlock, nextBlockPos);
912 if (status != B_OK)
913 return status;
915 header = (JournalHeader*)cached.SetTo(nextBlockPos);
917 while (nextCommitID < lastCommitID) {
918 if (!header->CheckMagic() || header->Sequence() != nextCommitID) {
919 // Somehow the log is different than the expexted
920 return B_ERROR;
923 uint32 blockType = header->BlockType();
925 if (blockType == JOURNAL_DESCRIPTOR_BLOCK)
926 nextBlock += _CountTags(header);
927 else if (blockType == JOURNAL_COMMIT_BLOCK)
928 nextCommitID++;
929 else if (blockType == JOURNAL_REVOKE_BLOCK) {
930 TRACE("Journal::_RecoverPassRevoke(): Found a revoke block\n");
931 status = fRevokeManager->ScanRevokeBlock(
932 (JournalRevokeHeader*)header, nextCommitID);
934 if (status != B_OK)
935 return status;
936 } else {
937 WARN("Journal::_RecoverPassRevoke(): Found an unrecognized block\n");
938 break;
941 nextBlock = _WrapAroundLog(nextBlock + 1);
943 status = MapBlock(nextBlock, nextBlockPos);
944 if (status != B_OK)
945 return status;
947 header = (JournalHeader*)cached.SetTo(nextBlockPos);
950 if (nextCommitID != lastCommitID) {
951 // Possibly because of some sort of IO error
952 TRACE("Journal::_RecoverPassRevoke(): Incompatible commit IDs\n");
953 return B_ERROR;
956 TRACE("Journal recovery pass revoke: Revoked blocks: %" B_PRIu32 "\n",
957 fRevokeManager->NumRevokes());
959 return B_OK;
963 // Third pass: Replay log
964 status_t
965 Journal::_RecoverPassReplay(uint32 lastCommitID)
967 TRACE("Journal Recover: 3rd Pass: Replay\n");
969 uint32 nextCommitID = fFirstCommitID;
970 uint32 nextBlock = fLogStart;
971 fsblock_t nextBlockPos;
973 status_t status = MapBlock(nextBlock, nextBlockPos);
974 if (status != B_OK)
975 return status;
977 CachedBlock cached(fJournalVolume);
978 JournalHeader* header = (JournalHeader*)cached.SetTo(nextBlockPos);
980 int count = 0;
982 uint8* data = new(std::nothrow) uint8[fBlockSize];
983 if (data == NULL) {
984 TRACE("Journal::_RecoverPassReplay(): Failed to allocate memory for "
985 "data\n");
986 return B_NO_MEMORY;
989 ArrayDeleter<uint8> dataDeleter(data);
991 while (nextCommitID < lastCommitID) {
992 if (!header->CheckMagic() || header->Sequence() != nextCommitID) {
993 // Somehow the log is different than the expected
994 ERROR("Journal::_RecoverPassReplay(): Weird problem with block\n");
995 return B_ERROR;
998 uint32 blockType = header->BlockType();
1000 if (blockType == JOURNAL_DESCRIPTOR_BLOCK) {
1001 JournalBlockTag* last_tag = (JournalBlockTag*)((uint8*)header
1002 + fBlockSize - sizeof(JournalBlockTag));
1004 for (JournalBlockTag* tag = (JournalBlockTag*)header->data;
1005 tag <= last_tag; ++tag) {
1006 nextBlock = _WrapAroundLog(nextBlock + 1);
1008 status = MapBlock(nextBlock, nextBlockPos);
1009 if (status != B_OK)
1010 return status;
1012 if (!fRevokeManager->Lookup(tag->BlockNumber(),
1013 nextCommitID)) {
1014 // Block isn't revoked
1015 size_t read = read_pos(fJournalVolume->Device(),
1016 nextBlockPos * fBlockSize, data, fBlockSize);
1017 if (read != fBlockSize)
1018 return B_IO_ERROR;
1020 if ((tag->Flags() & JOURNAL_FLAG_ESCAPED) != 0) {
1021 // Block is escaped
1022 ((int32*)data)[0]
1023 = B_HOST_TO_BENDIAN_INT32(JOURNAL_MAGIC);
1026 TRACE("Journal::_RevoverPassReplay(): Write to %" B_PRIu32
1027 "\n", tag->BlockNumber() * fBlockSize);
1028 size_t written = write_pos(fFilesystemVolume->Device(),
1029 tag->BlockNumber() * fBlockSize, data, fBlockSize);
1031 if (written != fBlockSize)
1032 return B_IO_ERROR;
1034 ++count;
1037 if ((tag->Flags() & JOURNAL_FLAG_LAST_TAG) != 0)
1038 break;
1039 if ((tag->Flags() & JOURNAL_FLAG_SAME_UUID) == 0) {
1040 // TODO: Check new UUID with file system UUID
1041 tag += 2;
1042 // sizeof(JournalBlockTag) = 8
1043 // sizeof(UUID) = 16
1046 } else if (blockType == JOURNAL_COMMIT_BLOCK)
1047 nextCommitID++;
1048 else if (blockType != JOURNAL_REVOKE_BLOCK) {
1049 WARN("Journal::_RecoverPassReplay(): Found an unrecognized block\n");
1050 break;
1051 } // If blockType == JOURNAL_REVOKE_BLOCK we just skip it
1053 nextBlock = _WrapAroundLog(nextBlock + 1);
1055 status = MapBlock(nextBlock, nextBlockPos);
1056 if (status != B_OK)
1057 return status;
1059 header = (JournalHeader*)cached.SetTo(nextBlockPos);
1062 if (nextCommitID != lastCommitID) {
1063 // Possibly because of some sort of IO error
1064 return B_ERROR;
1067 TRACE("Journal recovery pass replay: Replayed blocks: %u\n", count);
1069 return B_OK;
1073 status_t
1074 Journal::_FlushLog(bool canWait, bool flushBlocks)
1076 TRACE("Journal::_FlushLog()\n");
1077 status_t status = canWait ? recursive_lock_lock(&fLock)
1078 : recursive_lock_trylock(&fLock);
1080 TRACE("Journal::_FlushLog(): Acquired fLock, recursion: %" B_PRId32 "\n",
1081 recursive_lock_get_recursion(&fLock));
1082 if (status != B_OK)
1083 return status;
1085 if (recursive_lock_get_recursion(&fLock) > 1) {
1086 // Called from inside a transaction
1087 recursive_lock_unlock(&fLock);
1088 TRACE("Journal::_FlushLog(): Called from a transaction. Leaving...\n");
1089 return B_OK;
1092 if (fUnwrittenTransactions != 0 && _FullTransactionSize() != 0) {
1093 status = _WriteTransactionToLog();
1094 if (status < B_OK)
1095 panic("Failed flushing transaction: %s\n", strerror(status));
1098 TRACE("Journal::_FlushLog(): Attempting to flush journal volume at %p\n",
1099 fJournalVolume);
1101 // TODO: Not sure this is correct. Need to review...
1102 // NOTE: Not correct. Causes double lock of a block cache mutex
1103 // TODO: Need some other way to synchronize the journal...
1104 /*status = fJournalVolume->FlushDevice();
1105 if (status != B_OK)
1106 return status;*/
1108 TRACE("Journal::_FlushLog(): Flushed journal volume\n");
1110 if (flushBlocks) {
1111 TRACE("Journal::_FlushLog(): Attempting to flush file system volume "
1112 "at %p\n", fFilesystemVolume);
1113 status = fFilesystemVolume->FlushDevice();
1114 if (status == B_OK)
1115 TRACE("Journal::_FlushLog(): Flushed file system volume\n");
1118 TRACE("Journal::_FlushLog(): Finished. Releasing lock\n");
1120 recursive_lock_unlock(&fLock);
1122 TRACE("Journal::_FlushLog(): Done, final status: %s\n", strerror(status));
1123 return status;
1127 inline uint32
1128 Journal::_WrapAroundLog(uint32 block)
1130 TRACE("Journal::_WrapAroundLog()\n");
1131 if (block >= fLogSize)
1132 return block - fLogSize + fFirstLogBlock;
1133 else
1134 return block;
1138 size_t
1139 Journal::_CurrentTransactionSize() const
1141 TRACE("Journal::_CurrentTransactionSize(): transaction %" B_PRIu32 "\n",
1142 fTransactionID);
1144 size_t count;
1146 if (fHasSubTransaction) {
1147 count = cache_blocks_in_sub_transaction(fFilesystemBlockCache,
1148 fTransactionID);
1150 TRACE("\tSub transaction size: %" B_PRIuSIZE "\n", count);
1151 } else {
1152 count = cache_blocks_in_transaction(fFilesystemBlockCache,
1153 fTransactionID);
1155 TRACE("\tTransaction size: %" B_PRIuSIZE "\n", count);
1158 return count;
1162 size_t
1163 Journal::_FullTransactionSize() const
1165 TRACE("Journal::_FullTransactionSize(): transaction %" B_PRIu32 "\n",
1166 fTransactionID);
1167 TRACE("\tFile sytem block cache: %p\n", fFilesystemBlockCache);
1169 size_t count = cache_blocks_in_transaction(fFilesystemBlockCache,
1170 fTransactionID);
1172 TRACE("\tFull transaction size: %" B_PRIuSIZE "\n", count);
1174 return count;
1178 size_t
1179 Journal::_MainTransactionSize() const
1181 TRACE("Journal::_MainTransactionSize(): transaction %" B_PRIu32 "\n",
1182 fTransactionID);
1184 size_t count = cache_blocks_in_main_transaction(fFilesystemBlockCache,
1185 fTransactionID);
1187 TRACE("\tMain transaction size: %" B_PRIuSIZE "\n", count);
1189 return count;
1193 status_t
1194 Journal::_TransactionDone(bool success)
1196 if (!success) {
1197 if (fHasSubTransaction) {
1198 TRACE("Journal::_TransactionDone(): transaction %" B_PRIu32
1199 " failed, aborting subtransaction\n", fTransactionID);
1200 cache_abort_sub_transaction(fFilesystemBlockCache, fTransactionID);
1201 // parent is unaffected
1202 } else {
1203 TRACE("Journal::_TransactionDone(): transaction %" B_PRIu32
1204 " failed, aborting\n", fTransactionID);
1205 cache_abort_transaction(fFilesystemBlockCache, fTransactionID);
1206 fUnwrittenTransactions = 0;
1209 TRACE("Journal::_TransactionDone(): returning B_OK\n");
1210 return B_OK;
1213 // If possible, delay flushing the transaction
1214 uint32 size = _FullTransactionSize();
1215 TRACE("Journal::_TransactionDone(): full transaction size: %" B_PRIu32
1216 ", max transaction size: %" B_PRIu32 ", free log blocks: %" B_PRIu32
1217 "\n", size, fMaxTransactionSize, FreeLogBlocks());
1218 if (fMaxTransactionSize > 0 && size < fMaxTransactionSize) {
1219 TRACE("Journal::_TransactionDone(): delaying flush of transaction "
1220 "%" B_PRIu32 "\n", fTransactionID);
1222 // Make sure the transaction fits in the log
1223 if (size < FreeLogBlocks())
1224 cache_sync_transaction(fFilesystemBlockCache, fTransactionID);
1226 fUnwrittenTransactions++;
1227 TRACE("Journal::_TransactionDone(): returning B_OK\n");
1228 return B_OK;
1231 return _WriteTransactionToLog();
1235 /*static*/ void
1236 Journal::_TransactionWritten(int32 transactionID, int32 event, void* _logEntry)
1238 LogEntry* logEntry = (LogEntry*)_logEntry;
1240 TRACE("Journal::_TransactionWritten(): Transaction %" B_PRIu32
1241 " checkpointed\n", transactionID);
1243 Journal* journal = logEntry->GetJournal();
1245 TRACE("Journal::_TransactionWritten(): log entry: %p, journal: %p\n",
1246 logEntry, journal);
1247 TRACE("Journal::_TransactionWritten(): log entries: %p\n",
1248 &journal->fLogEntries);
1250 mutex_lock(&journal->fLogEntriesLock);
1252 TRACE("Journal::_TransactionWritten(): first log entry: %p\n",
1253 journal->fLogEntries.First());
1254 if (logEntry == journal->fLogEntries.First()) {
1255 TRACE("Journal::_TransactionWritten(): Moving start of log to %"
1256 B_PRIu32 "\n", logEntry->Start());
1257 journal->fLogStart = logEntry->Start();
1258 journal->fFirstCommitID = logEntry->CommitID();
1259 TRACE("Journal::_TransactionWritten(): Setting commit ID to %" B_PRIu32
1260 "\n", logEntry->CommitID());
1262 if (journal->_SaveSuperBlock() != B_OK)
1263 panic("ext2: Failed to write journal superblock\n");
1266 TRACE("Journal::_TransactionWritten(): Removing log entry\n");
1267 journal->fLogEntries.Remove(logEntry);
1269 TRACE("Journal::_TransactionWritten(): Unlocking entries list\n");
1270 mutex_unlock(&journal->fLogEntriesLock);
1272 TRACE("Journal::_TransactionWritten(): Deleting log entry at %p\n", logEntry);
1273 delete logEntry;
1277 /*static*/ void
1278 Journal::_TransactionIdle(int32 transactionID, int32 event, void* _journal)
1280 Journal* journal = (Journal*)_journal;
1281 journal->_FlushLog(false, false);