vfs: check userland buffers before reading them.
[haiku.git] / src / system / kernel / cache / block_cache.cpp
blob5d76a79d1091728bf7d6ca9b507958fac4e5d288
1 /*
2 * Copyright 2004-2012, Axel Dörfler, axeld@pinc-software.de.
3 * Distributed under the terms of the MIT License.
4 */
7 #include <block_cache.h>
9 #include <unistd.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <errno.h>
14 #include <KernelExport.h>
15 #include <fs_cache.h>
17 #include <condition_variable.h>
18 #include <lock.h>
19 #include <low_resource_manager.h>
20 #include <slab/Slab.h>
21 #include <tracing.h>
22 #include <util/kernel_cpp.h>
23 #include <util/DoublyLinkedList.h>
24 #include <util/AutoLock.h>
25 #include <vm/vm_page.h>
27 #include "kernel_debug_config.h"
30 // TODO: this is a naive but growing implementation to test the API:
31 // block reading/writing is not at all optimized for speed, it will
32 // just read and write single blocks.
33 // TODO: the retrieval/copy of the original data could be delayed until the
34 // new data must be written, ie. in low memory situations.
36 //#define TRACE_BLOCK_CACHE
37 #ifdef TRACE_BLOCK_CACHE
38 # define TRACE(x) dprintf x
39 #else
40 # define TRACE(x) ;
41 #endif
43 #define TRACE_ALWAYS(x) dprintf x
45 // This macro is used for fatal situations that are acceptable in a running
46 // system, like out of memory situations - should only panic for debugging.
47 #define FATAL(x) panic x
49 static const bigtime_t kTransactionIdleTime = 2000000LL;
50 // a transaction is considered idle after 2 seconds of inactivity
53 namespace {
55 struct cache_transaction;
56 struct cached_block;
57 struct block_cache;
58 typedef DoublyLinkedListLink<cached_block> block_link;
60 struct cached_block {
61 cached_block* next; // next in hash
62 cached_block* transaction_next;
63 block_link link;
64 off_t block_number;
65 void* current_data;
66 // The data that is seen by everyone using the API; this one is always
67 // present.
68 void* original_data;
69 // When in a transaction, this contains the original data from before
70 // the transaction.
71 void* parent_data;
72 // This is a lazily alloced buffer that represents the contents of the
73 // block in the parent transaction. It may point to current_data if the
74 // contents have been changed only in the parent transaction, or, if the
75 // block has been changed in the current sub transaction already, to a
76 // new block containing the contents changed in the parent transaction.
77 // If this is NULL, the block has not been changed in the parent
78 // transaction at all.
79 #if BLOCK_CACHE_DEBUG_CHANGED
80 void* compare;
81 #endif
82 int32 ref_count;
83 int32 last_accessed;
84 bool busy_reading : 1;
85 bool busy_writing : 1;
86 bool is_writing : 1;
87 // Block has been checked out for writing without transactions, and
88 // cannot be written back if set
89 bool is_dirty : 1;
90 bool unused : 1;
91 bool discard : 1;
92 bool busy_reading_waiters : 1;
93 bool busy_writing_waiters : 1;
94 cache_transaction* transaction;
95 // This is the current active transaction, if any, the block is
96 // currently in (meaning was changed as a part of it).
97 cache_transaction* previous_transaction;
98 // This is set to the last transaction that was ended containing this
99 // block. In this case, the block has not yet written back yet, and
100 // the changed data is either in current_data, or original_data -- the
101 // latter if the block is already being part of another transaction.
102 // There can only be one previous transaction, so when the active
103 // transaction ends, the changes of the previous transaction have to
104 // be written back before that transaction becomes the next previous
105 // transaction.
107 bool CanBeWritten() const;
108 int32 LastAccess() const
109 { return system_time() / 1000000L - last_accessed; }
112 typedef DoublyLinkedList<cached_block,
113 DoublyLinkedListMemberGetLink<cached_block,
114 &cached_block::link> > block_list;
116 struct cache_notification : DoublyLinkedListLinkImpl<cache_notification> {
117 int32 transaction_id;
118 int32 events_pending;
119 int32 events;
120 transaction_notification_hook hook;
121 void* data;
122 bool delete_after_event;
125 typedef DoublyLinkedList<cache_notification> NotificationList;
127 struct BlockHash {
128 typedef off_t KeyType;
129 typedef cached_block ValueType;
131 size_t HashKey(KeyType key) const
133 return key;
136 size_t Hash(ValueType* block) const
138 return block->block_number;
141 bool Compare(KeyType key, ValueType* block) const
143 return block->block_number == key;
146 ValueType*& GetLink(ValueType* value) const
148 return value->next;
152 typedef BOpenHashTable<BlockHash> BlockTable;
155 struct TransactionHash {
156 typedef int32 KeyType;
157 typedef cache_transaction ValueType;
159 size_t HashKey(KeyType key) const
161 return key;
164 size_t Hash(ValueType* transaction) const;
165 bool Compare(KeyType key, ValueType* transaction) const;
166 ValueType*& GetLink(ValueType* value) const;
169 typedef BOpenHashTable<TransactionHash> TransactionTable;
172 struct block_cache : DoublyLinkedListLinkImpl<block_cache> {
173 BlockTable* hash;
174 mutex lock;
175 int fd;
176 off_t max_blocks;
177 size_t block_size;
178 int32 next_transaction_id;
179 cache_transaction* last_transaction;
180 TransactionTable* transaction_hash;
182 object_cache* buffer_cache;
183 block_list unused_blocks;
184 uint32 unused_block_count;
186 ConditionVariable busy_reading_condition;
187 uint32 busy_reading_count;
188 bool busy_reading_waiters;
190 ConditionVariable busy_writing_condition;
191 uint32 busy_writing_count;
192 bool busy_writing_waiters;
194 uint32 num_dirty_blocks;
195 bool read_only;
197 NotificationList pending_notifications;
198 ConditionVariable condition_variable;
200 block_cache(int fd, off_t numBlocks, size_t blockSize,
201 bool readOnly);
202 ~block_cache();
204 status_t Init();
206 void Free(void* buffer);
207 void* Allocate();
208 void FreeBlock(cached_block* block);
209 cached_block* NewBlock(off_t blockNumber);
210 void FreeBlockParentData(cached_block* block);
212 void RemoveUnusedBlocks(int32 count, int32 minSecondsOld = 0);
213 void RemoveBlock(cached_block* block);
214 void DiscardBlock(cached_block* block);
216 private:
217 static void _LowMemoryHandler(void* data, uint32 resources,
218 int32 level);
219 cached_block* _GetUnusedBlock();
222 struct cache_listener;
223 typedef DoublyLinkedListLink<cache_listener> listener_link;
225 struct cache_listener : cache_notification {
226 listener_link link;
229 typedef DoublyLinkedList<cache_listener,
230 DoublyLinkedListMemberGetLink<cache_listener,
231 &cache_listener::link> > ListenerList;
234 struct cache_transaction {
235 cache_transaction();
237 cache_transaction* next;
238 int32 id;
239 int32 num_blocks;
240 int32 main_num_blocks;
241 int32 sub_num_blocks;
242 cached_block* first_block;
243 block_list blocks;
244 ListenerList listeners;
245 bool open;
246 bool has_sub_transaction;
247 bigtime_t last_used;
248 int32 busy_writing_count;
252 class BlockWriter {
253 public:
254 BlockWriter(block_cache* cache,
255 size_t max = SIZE_MAX);
256 ~BlockWriter();
258 bool Add(cached_block* block,
259 cache_transaction* transaction = NULL);
260 bool Add(cache_transaction* transaction,
261 bool& hasLeftOvers);
263 status_t Write(cache_transaction* transaction = NULL,
264 bool canUnlock = true);
266 bool DeletedTransaction() const
267 { return fDeletedTransaction; }
269 static status_t WriteBlock(block_cache* cache,
270 cached_block* block);
272 private:
273 void* _Data(cached_block* block) const;
274 status_t _WriteBlock(cached_block* block);
275 void _BlockDone(cached_block* block,
276 cache_transaction* transaction);
277 void _UnmarkWriting(cached_block* block);
279 static int _CompareBlocks(const void* _blockA,
280 const void* _blockB);
282 private:
283 static const size_t kBufferSize = 64;
285 block_cache* fCache;
286 cached_block* fBuffer[kBufferSize];
287 cached_block** fBlocks;
288 size_t fCount;
289 size_t fTotal;
290 size_t fCapacity;
291 size_t fMax;
292 status_t fStatus;
293 bool fDeletedTransaction;
297 class TransactionLocking {
298 public:
299 inline bool Lock(block_cache* cache)
301 mutex_lock(&cache->lock);
303 while (cache->busy_writing_count != 0) {
304 // wait for all blocks to be written
305 ConditionVariableEntry entry;
306 cache->busy_writing_condition.Add(&entry);
307 cache->busy_writing_waiters = true;
309 mutex_unlock(&cache->lock);
311 entry.Wait();
313 mutex_lock(&cache->lock);
316 return true;
319 inline void Unlock(block_cache* cache)
321 mutex_unlock(&cache->lock);
325 typedef AutoLocker<block_cache, TransactionLocking> TransactionLocker;
327 } // namespace
330 #if BLOCK_CACHE_BLOCK_TRACING && !defined(BUILDING_USERLAND_FS_SERVER)
331 namespace BlockTracing {
333 class Action : public AbstractTraceEntry {
334 public:
335 Action(block_cache* cache, cached_block* block)
337 fCache(cache),
338 fBlockNumber(block->block_number),
339 fIsDirty(block->is_dirty),
340 fHasOriginal(block->original_data != NULL),
341 fHasParent(block->parent_data != NULL),
342 fTransactionID(-1),
343 fPreviousID(-1)
345 if (block->transaction != NULL)
346 fTransactionID = block->transaction->id;
347 if (block->previous_transaction != NULL)
348 fPreviousID = block->previous_transaction->id;
351 virtual void AddDump(TraceOutput& out)
353 out.Print("block cache %p, %s %" B_PRIu64 ", %c%c%c transaction %" B_PRId32
354 " (previous id %" B_PRId32 ")\n", fCache, _Action(), fBlockNumber,
355 fIsDirty ? 'd' : '-', fHasOriginal ? 'o' : '-',
356 fHasParent ? 'p' : '-', fTransactionID, fPreviousID);
359 virtual const char* _Action() const = 0;
361 private:
362 block_cache* fCache;
363 uint64 fBlockNumber;
364 bool fIsDirty;
365 bool fHasOriginal;
366 bool fHasParent;
367 int32 fTransactionID;
368 int32 fPreviousID;
371 class Get : public Action {
372 public:
373 Get(block_cache* cache, cached_block* block)
375 Action(cache, block)
377 Initialized();
380 virtual const char* _Action() const { return "get"; }
383 class Put : public Action {
384 public:
385 Put(block_cache* cache, cached_block* block)
387 Action(cache, block)
389 Initialized();
392 virtual const char* _Action() const { return "put"; }
395 class Read : public Action {
396 public:
397 Read(block_cache* cache, cached_block* block)
399 Action(cache, block)
401 Initialized();
404 virtual const char* _Action() const { return "read"; }
407 class Write : public Action {
408 public:
409 Write(block_cache* cache, cached_block* block)
411 Action(cache, block)
413 Initialized();
416 virtual const char* _Action() const { return "write"; }
419 class Flush : public Action {
420 public:
421 Flush(block_cache* cache, cached_block* block, bool getUnused = false)
423 Action(cache, block),
424 fGetUnused(getUnused)
426 Initialized();
429 virtual const char* _Action() const
430 { return fGetUnused ? "get-unused" : "flush"; }
432 private:
433 bool fGetUnused;
436 class Error : public AbstractTraceEntry {
437 public:
438 Error(block_cache* cache, uint64 blockNumber, const char* message,
439 status_t status = B_OK)
441 fCache(cache),
442 fBlockNumber(blockNumber),
443 fMessage(message),
444 fStatus(status)
446 Initialized();
449 virtual void AddDump(TraceOutput& out)
451 out.Print("block cache %p, error %" B_PRIu64 ", %s%s%s",
452 fCache, fBlockNumber, fMessage, fStatus != B_OK ? ": " : "",
453 fStatus != B_OK ? strerror(fStatus) : "");
456 private:
457 block_cache* fCache;
458 uint64 fBlockNumber;
459 const char* fMessage;
460 status_t fStatus;
463 #if BLOCK_CACHE_BLOCK_TRACING >= 2
464 class BlockData : public AbstractTraceEntry {
465 public:
466 enum {
467 kCurrent = 0x01,
468 kParent = 0x02,
469 kOriginal = 0x04
472 BlockData(block_cache* cache, cached_block* block, const char* message)
474 fCache(cache),
475 fSize(cache->block_size),
476 fBlockNumber(block->block_number),
477 fMessage(message)
479 _Allocate(fCurrent, block->current_data);
480 _Allocate(fParent, block->parent_data);
481 _Allocate(fOriginal, block->original_data);
483 #if KTRACE_PRINTF_STACK_TRACE
484 fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1,
485 false);
486 #endif
488 Initialized();
491 virtual void AddDump(TraceOutput& out)
493 out.Print("block cache %p, block %" B_PRIu64 ", data %c%c%c: %s",
494 fCache, fBlockNumber, fCurrent != NULL ? 'c' : '-',
495 fParent != NULL ? 'p' : '-', fOriginal != NULL ? 'o' : '-',
496 fMessage);
499 #if KTRACE_PRINTF_STACK_TRACE
500 virtual void DumpStackTrace(TraceOutput& out)
502 out.PrintStackTrace(fStackTrace);
504 #endif
506 void DumpBlocks(uint32 which, uint32 offset, uint32 size)
508 if ((which & kCurrent) != 0)
509 DumpBlock(kCurrent, offset, size);
510 if ((which & kParent) != 0)
511 DumpBlock(kParent, offset, size);
512 if ((which & kOriginal) != 0)
513 DumpBlock(kOriginal, offset, size);
516 void DumpBlock(uint32 which, uint32 offset, uint32 size)
518 if (offset > fSize) {
519 kprintf("invalid offset (block size %" B_PRIu32 ")\n", fSize);
520 return;
522 if (offset + size > fSize)
523 size = fSize - offset;
525 const char* label;
526 uint8* data;
528 if ((which & kCurrent) != 0) {
529 label = "current";
530 data = fCurrent;
531 } else if ((which & kParent) != 0) {
532 label = "parent";
533 data = fParent;
534 } else if ((which & kOriginal) != 0) {
535 label = "original";
536 data = fOriginal;
537 } else
538 return;
540 kprintf("%s: offset %" B_PRIu32 ", %" B_PRIu32 " bytes\n", label, offset, size);
542 static const uint32 kBlockSize = 16;
543 data += offset;
545 for (uint32 i = 0; i < size;) {
546 int start = i;
548 kprintf(" %04" B_PRIx32 " ", i);
549 for (; i < start + kBlockSize; i++) {
550 if (!(i % 4))
551 kprintf(" ");
553 if (i >= size)
554 kprintf(" ");
555 else
556 kprintf("%02x", data[i]);
559 kprintf("\n");
563 private:
564 void _Allocate(uint8*& target, void* source)
566 if (source == NULL) {
567 target = NULL;
568 return;
571 target = alloc_tracing_buffer_memcpy(source, fSize, false);
574 block_cache* fCache;
575 uint32 fSize;
576 uint64 fBlockNumber;
577 const char* fMessage;
578 uint8* fCurrent;
579 uint8* fParent;
580 uint8* fOriginal;
581 #if KTRACE_PRINTF_STACK_TRACE
582 tracing_stack_trace* fStackTrace;
583 #endif
585 #endif // BLOCK_CACHE_BLOCK_TRACING >= 2
587 } // namespace BlockTracing
589 # define TB(x) new(std::nothrow) BlockTracing::x;
590 #else
591 # define TB(x) ;
592 #endif
594 #if BLOCK_CACHE_BLOCK_TRACING >= 2
595 # define TB2(x) new(std::nothrow) BlockTracing::x;
596 #else
597 # define TB2(x) ;
598 #endif
601 #if BLOCK_CACHE_TRANSACTION_TRACING && !defined(BUILDING_USERLAND_FS_SERVER)
602 namespace TransactionTracing {
604 class Action : public AbstractTraceEntry {
605 public:
606 Action(const char* label, block_cache* cache,
607 cache_transaction* transaction)
609 fCache(cache),
610 fTransaction(transaction),
611 fID(transaction->id),
612 fSub(transaction->has_sub_transaction),
613 fNumBlocks(transaction->num_blocks),
614 fSubNumBlocks(transaction->sub_num_blocks)
616 strlcpy(fLabel, label, sizeof(fLabel));
617 Initialized();
620 virtual void AddDump(TraceOutput& out)
622 out.Print("block cache %p, %s transaction %p (id %" B_PRId32 ")%s"
623 ", %" B_PRId32 "/%" B_PRId32 " blocks", fCache, fLabel, fTransaction,
624 fID, fSub ? " sub" : "", fNumBlocks, fSubNumBlocks);
627 private:
628 char fLabel[12];
629 block_cache* fCache;
630 cache_transaction* fTransaction;
631 int32 fID;
632 bool fSub;
633 int32 fNumBlocks;
634 int32 fSubNumBlocks;
637 class Detach : public AbstractTraceEntry {
638 public:
639 Detach(block_cache* cache, cache_transaction* transaction,
640 cache_transaction* newTransaction)
642 fCache(cache),
643 fTransaction(transaction),
644 fID(transaction->id),
645 fSub(transaction->has_sub_transaction),
646 fNewTransaction(newTransaction),
647 fNewID(newTransaction->id)
649 Initialized();
652 virtual void AddDump(TraceOutput& out)
654 out.Print("block cache %p, detach transaction %p (id %" B_PRId32 ")"
655 "from transaction %p (id %" B_PRId32 ")%s",
656 fCache, fNewTransaction, fNewID, fTransaction, fID,
657 fSub ? " sub" : "");
660 private:
661 block_cache* fCache;
662 cache_transaction* fTransaction;
663 int32 fID;
664 bool fSub;
665 cache_transaction* fNewTransaction;
666 int32 fNewID;
669 class Abort : public AbstractTraceEntry {
670 public:
671 Abort(block_cache* cache, cache_transaction* transaction)
673 fCache(cache),
674 fTransaction(transaction),
675 fID(transaction->id),
676 fNumBlocks(0)
678 bool isSub = transaction->has_sub_transaction;
679 fNumBlocks = isSub ? transaction->sub_num_blocks
680 : transaction->num_blocks;
681 fBlocks = (off_t*)alloc_tracing_buffer(fNumBlocks * sizeof(off_t));
682 if (fBlocks != NULL) {
683 cached_block* block = transaction->first_block;
684 for (int32 i = 0; block != NULL && i < fNumBlocks;
685 block = block->transaction_next) {
686 fBlocks[i++] = block->block_number;
688 } else
689 fNumBlocks = 0;
691 #if KTRACE_PRINTF_STACK_TRACE
692 fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1,
693 false);
694 #endif
696 Initialized();
699 virtual void AddDump(TraceOutput& out)
701 out.Print("block cache %p, abort transaction "
702 "%p (id %" B_PRId32 "), blocks", fCache, fTransaction, fID);
703 for (int32 i = 0; i < fNumBlocks && !out.IsFull(); i++)
704 out.Print(" %" B_PRIdOFF, fBlocks[i]);
707 #if KTRACE_PRINTF_STACK_TRACE
708 virtual void DumpStackTrace(TraceOutput& out)
710 out.PrintStackTrace(fStackTrace);
712 #endif
714 private:
715 block_cache* fCache;
716 cache_transaction* fTransaction;
717 int32 fID;
718 off_t* fBlocks;
719 int32 fNumBlocks;
720 #if KTRACE_PRINTF_STACK_TRACE
721 tracing_stack_trace* fStackTrace;
722 #endif
725 } // namespace TransactionTracing
727 # define T(x) new(std::nothrow) TransactionTracing::x;
728 #else
729 # define T(x) ;
730 #endif
733 static DoublyLinkedList<block_cache> sCaches;
734 static mutex sCachesLock = MUTEX_INITIALIZER("block caches");
735 static mutex sCachesMemoryUseLock
736 = MUTEX_INITIALIZER("block caches memory use");
737 static size_t sUsedMemory;
738 static sem_id sEventSemaphore;
739 static mutex sNotificationsLock
740 = MUTEX_INITIALIZER("block cache notifications");
741 static thread_id sNotifierWriterThread;
742 static DoublyLinkedListLink<block_cache> sMarkCache;
743 // TODO: this only works if the link is the first entry of block_cache
744 static object_cache* sBlockCache;
747 // #pragma mark - notifications/listener
750 /*! Checks whether or not this is an event that closes a transaction. */
751 static inline bool
752 is_closing_event(int32 event)
754 return (event & (TRANSACTION_ABORTED | TRANSACTION_ENDED)) != 0;
758 static inline bool
759 is_written_event(int32 event)
761 return (event & TRANSACTION_WRITTEN) != 0;
765 /*! From the specified \a notification, it will remove the lowest pending
766 event, and return that one in \a _event.
767 If there is no pending event anymore, it will return \c false.
769 static bool
770 get_next_pending_event(cache_notification* notification, int32* _event)
772 for (int32 eventMask = 1; eventMask <= TRANSACTION_IDLE; eventMask <<= 1) {
773 int32 pending = atomic_and(&notification->events_pending,
774 ~eventMask);
776 bool more = (pending & ~eventMask) != 0;
778 if ((pending & eventMask) != 0) {
779 *_event = eventMask;
780 return more;
784 return false;
788 static void
789 flush_pending_notifications(block_cache* cache)
791 ASSERT_LOCKED_MUTEX(&sCachesLock);
793 while (true) {
794 MutexLocker locker(sNotificationsLock);
796 cache_notification* notification = cache->pending_notifications.Head();
797 if (notification == NULL)
798 return;
800 bool deleteAfterEvent = false;
801 int32 event = -1;
802 if (!get_next_pending_event(notification, &event)) {
803 // remove the notification if this was the last pending event
804 cache->pending_notifications.Remove(notification);
805 deleteAfterEvent = notification->delete_after_event;
808 if (event >= 0) {
809 // Notify listener, we need to copy the notification, as it might
810 // be removed when we unlock the list.
811 cache_notification copy = *notification;
812 locker.Unlock();
814 copy.hook(copy.transaction_id, event, copy.data);
816 locker.Lock();
819 if (deleteAfterEvent)
820 delete notification;
825 /*! Flushes all pending notifications by calling the appropriate hook
826 functions.
827 Must not be called with a cache lock held.
829 static void
830 flush_pending_notifications()
832 MutexLocker _(sCachesLock);
834 DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator();
835 while (iterator.HasNext()) {
836 block_cache* cache = iterator.Next();
838 flush_pending_notifications(cache);
843 /*! Initializes the \a notification as specified. */
844 static void
845 set_notification(cache_transaction* transaction,
846 cache_notification &notification, int32 events,
847 transaction_notification_hook hook, void* data)
849 notification.transaction_id = transaction != NULL ? transaction->id : -1;
850 notification.events_pending = 0;
851 notification.events = events;
852 notification.hook = hook;
853 notification.data = data;
854 notification.delete_after_event = false;
858 /*! Makes sure the notification is deleted. It either deletes it directly,
859 when possible, or marks it for deletion if the notification is pending.
861 static void
862 delete_notification(cache_notification* notification)
864 MutexLocker locker(sNotificationsLock);
866 if (notification->events_pending != 0)
867 notification->delete_after_event = true;
868 else
869 delete notification;
873 /*! Adds the notification to the pending notifications list, or, if it's
874 already part of it, updates its events_pending field.
875 Also marks the notification to be deleted if \a deleteNotification
876 is \c true.
877 Triggers the notifier thread to run.
879 static void
880 add_notification(block_cache* cache, cache_notification* notification,
881 int32 event, bool deleteNotification)
883 if (notification->hook == NULL)
884 return;
886 int32 pending = atomic_or(&notification->events_pending, event);
887 if (pending == 0) {
888 // not yet part of the notification list
889 MutexLocker locker(sNotificationsLock);
890 if (deleteNotification)
891 notification->delete_after_event = true;
892 cache->pending_notifications.Add(notification);
893 } else if (deleteNotification) {
894 // we might need to delete it ourselves if we're late
895 delete_notification(notification);
898 release_sem_etc(sEventSemaphore, 1, B_DO_NOT_RESCHEDULE);
899 // We're probably still holding some locks that makes rescheduling
900 // not a good idea at this point.
904 /*! Notifies all interested listeners of this transaction about the \a event.
905 If \a event is a closing event (ie. TRANSACTION_ENDED, and
906 TRANSACTION_ABORTED), all listeners except those listening to
907 TRANSACTION_WRITTEN will be removed.
909 static void
910 notify_transaction_listeners(block_cache* cache, cache_transaction* transaction,
911 int32 event)
913 T(Action("notify", cache, transaction));
915 bool isClosing = is_closing_event(event);
916 bool isWritten = is_written_event(event);
918 ListenerList::Iterator iterator = transaction->listeners.GetIterator();
919 while (iterator.HasNext()) {
920 cache_listener* listener = iterator.Next();
922 bool remove = (isClosing && !is_written_event(listener->events))
923 || (isWritten && is_written_event(listener->events));
924 if (remove)
925 iterator.Remove();
927 if ((listener->events & event) != 0)
928 add_notification(cache, listener, event, remove);
929 else if (remove)
930 delete_notification(listener);
935 /*! Removes and deletes all listeners that are still monitoring this
936 transaction.
938 static void
939 remove_transaction_listeners(block_cache* cache, cache_transaction* transaction)
941 ListenerList::Iterator iterator = transaction->listeners.GetIterator();
942 while (iterator.HasNext()) {
943 cache_listener* listener = iterator.Next();
944 iterator.Remove();
946 delete_notification(listener);
951 static status_t
952 add_transaction_listener(block_cache* cache, cache_transaction* transaction,
953 int32 events, transaction_notification_hook hookFunction, void* data)
955 ListenerList::Iterator iterator = transaction->listeners.GetIterator();
956 while (iterator.HasNext()) {
957 cache_listener* listener = iterator.Next();
959 if (listener->data == data && listener->hook == hookFunction) {
960 // this listener already exists, just update it
961 listener->events |= events;
962 return B_OK;
966 cache_listener* listener = new(std::nothrow) cache_listener;
967 if (listener == NULL)
968 return B_NO_MEMORY;
970 set_notification(transaction, *listener, events, hookFunction, data);
971 transaction->listeners.Add(listener);
972 return B_OK;
976 // #pragma mark - private transaction
979 cache_transaction::cache_transaction()
981 num_blocks = 0;
982 main_num_blocks = 0;
983 sub_num_blocks = 0;
984 first_block = NULL;
985 open = true;
986 has_sub_transaction = false;
987 last_used = system_time();
988 busy_writing_count = 0;
992 static void
993 delete_transaction(block_cache* cache, cache_transaction* transaction)
995 if (cache->last_transaction == transaction)
996 cache->last_transaction = NULL;
998 remove_transaction_listeners(cache, transaction);
999 delete transaction;
1003 static cache_transaction*
1004 lookup_transaction(block_cache* cache, int32 id)
1006 return cache->transaction_hash->Lookup(id);
1010 size_t TransactionHash::Hash(cache_transaction* transaction) const
1012 return transaction->id;
1016 bool TransactionHash::Compare(int32 key, cache_transaction* transaction) const
1018 return transaction->id == key;
1022 cache_transaction*& TransactionHash::GetLink(cache_transaction* value) const
1024 return value->next;
1028 /*! Writes back any changes made to blocks in \a transaction that are still
1029 part of a previous transacton.
1031 static status_t
1032 write_blocks_in_previous_transaction(block_cache* cache,
1033 cache_transaction* transaction)
1035 BlockWriter writer(cache);
1037 cached_block* block = transaction->first_block;
1038 for (; block != NULL; block = block->transaction_next) {
1039 if (block->previous_transaction != NULL) {
1040 // need to write back pending changes
1041 writer.Add(block);
1045 return writer.Write();
1049 // #pragma mark - cached_block
1052 bool
1053 cached_block::CanBeWritten() const
1055 return !busy_writing && !busy_reading
1056 && (previous_transaction != NULL
1057 || (transaction == NULL && is_dirty && !is_writing));
1061 // #pragma mark - BlockWriter
1064 BlockWriter::BlockWriter(block_cache* cache, size_t max)
1066 fCache(cache),
1067 fBlocks(fBuffer),
1068 fCount(0),
1069 fTotal(0),
1070 fCapacity(kBufferSize),
1071 fMax(max),
1072 fStatus(B_OK),
1073 fDeletedTransaction(false)
1078 BlockWriter::~BlockWriter()
1080 if (fBlocks != fBuffer)
1081 free(fBlocks);
1085 /*! Adds the specified block to the to be written array. If no more blocks can
1086 be added, false is returned, otherwise true.
1088 bool
1089 BlockWriter::Add(cached_block* block, cache_transaction* transaction)
1091 ASSERT(block->CanBeWritten());
1093 if (fTotal == fMax)
1094 return false;
1096 if (fCount >= fCapacity) {
1097 // Enlarge array if necessary
1098 cached_block** newBlocks;
1099 size_t newCapacity = max_c(256, fCapacity * 2);
1100 if (fBlocks == fBuffer)
1101 newBlocks = (cached_block**)malloc(newCapacity * sizeof(void*));
1102 else {
1103 newBlocks = (cached_block**)realloc(fBlocks,
1104 newCapacity * sizeof(void*));
1107 if (newBlocks == NULL) {
1108 // Allocating a larger array failed - we need to write back what
1109 // we have synchronously now (this will also clear the array)
1110 Write(transaction, false);
1111 } else {
1112 if (fBlocks == fBuffer)
1113 memcpy(newBlocks, fBuffer, kBufferSize * sizeof(void*));
1115 fBlocks = newBlocks;
1116 fCapacity = newCapacity;
1120 fBlocks[fCount++] = block;
1121 fTotal++;
1122 block->busy_writing = true;
1123 fCache->busy_writing_count++;
1124 if (block->previous_transaction != NULL)
1125 block->previous_transaction->busy_writing_count++;
1127 return true;
1131 /*! Adds all blocks of the specified transaction to the to be written array.
1132 If no more blocks can be added, false is returned, otherwise true.
1134 bool
1135 BlockWriter::Add(cache_transaction* transaction, bool& hasLeftOvers)
1137 ASSERT(!transaction->open);
1139 if (transaction->busy_writing_count != 0) {
1140 hasLeftOvers = true;
1141 return true;
1144 hasLeftOvers = false;
1146 block_list::Iterator blockIterator = transaction->blocks.GetIterator();
1147 while (cached_block* block = blockIterator.Next()) {
1148 if (!block->CanBeWritten()) {
1149 // This block was already part of a previous transaction within this
1150 // writer
1151 hasLeftOvers = true;
1152 continue;
1154 if (!Add(block, transaction))
1155 return false;
1157 if (DeletedTransaction())
1158 break;
1161 return true;
1165 /*! Cache must be locked when calling this method, but it will be unlocked
1166 while the blocks are written back.
1168 status_t
1169 BlockWriter::Write(cache_transaction* transaction, bool canUnlock)
1171 if (fCount == 0)
1172 return B_OK;
1174 if (canUnlock)
1175 mutex_unlock(&fCache->lock);
1177 // Sort blocks in their on-disk order
1178 // TODO: ideally, this should be handled by the I/O scheduler
1180 qsort(fBlocks, fCount, sizeof(void*), &_CompareBlocks);
1181 fDeletedTransaction = false;
1183 for (uint32 i = 0; i < fCount; i++) {
1184 status_t status = _WriteBlock(fBlocks[i]);
1185 if (status != B_OK) {
1186 // propagate to global error handling
1187 if (fStatus == B_OK)
1188 fStatus = status;
1190 _UnmarkWriting(fBlocks[i]);
1191 fBlocks[i] = NULL;
1192 // This block will not be marked clean
1196 if (canUnlock)
1197 mutex_lock(&fCache->lock);
1199 for (uint32 i = 0; i < fCount; i++)
1200 _BlockDone(fBlocks[i], transaction);
1202 fCount = 0;
1203 return fStatus;
1207 /*! Writes the specified \a block back to disk. It will always only write back
1208 the oldest change of the block if it is part of more than one transaction.
1209 It will automatically send out TRANSACTION_WRITTEN notices, as well as
1210 delete transactions when they are no longer used, and \a deleteTransaction
1211 is \c true.
1213 /*static*/ status_t
1214 BlockWriter::WriteBlock(block_cache* cache, cached_block* block)
1216 BlockWriter writer(cache);
1218 writer.Add(block);
1219 return writer.Write();
1223 void*
1224 BlockWriter::_Data(cached_block* block) const
1226 return block->previous_transaction != NULL && block->original_data != NULL
1227 ? block->original_data : block->current_data;
1228 // We first need to write back changes from previous transactions
1232 status_t
1233 BlockWriter::_WriteBlock(cached_block* block)
1235 ASSERT(block->busy_writing);
1237 TRACE(("BlockWriter::_WriteBlock(block %" B_PRIdOFF ")\n", block->block_number));
1238 TB(Write(fCache, block));
1239 TB2(BlockData(fCache, block, "before write"));
1241 size_t blockSize = fCache->block_size;
1243 ssize_t written = write_pos(fCache->fd,
1244 block->block_number * blockSize, _Data(block), blockSize);
1246 if (written != (ssize_t)blockSize) {
1247 TB(Error(fCache, block->block_number, "write failed", written));
1248 TRACE_ALWAYS(("could not write back block %" B_PRIdOFF " (%s)\n", block->block_number,
1249 strerror(errno)));
1250 if (written < 0)
1251 return errno;
1253 return B_IO_ERROR;
1256 return B_OK;
1260 void
1261 BlockWriter::_BlockDone(cached_block* block,
1262 cache_transaction* transaction)
1264 if (block == NULL) {
1265 // An error occured when trying to write this block
1266 return;
1269 if (fCache->num_dirty_blocks > 0)
1270 fCache->num_dirty_blocks--;
1272 if (_Data(block) == block->current_data)
1273 block->is_dirty = false;
1275 _UnmarkWriting(block);
1277 cache_transaction* previous = block->previous_transaction;
1278 if (previous != NULL) {
1279 previous->blocks.Remove(block);
1280 block->previous_transaction = NULL;
1282 if (block->original_data != NULL && block->transaction == NULL) {
1283 // This block is not part of a transaction, so it does not need
1284 // its original pointer anymore.
1285 fCache->Free(block->original_data);
1286 block->original_data = NULL;
1289 // Has the previous transaction been finished with that write?
1290 if (--previous->num_blocks == 0) {
1291 TRACE(("cache transaction %" B_PRId32 " finished!\n", previous->id));
1292 T(Action("written", fCache, previous));
1294 notify_transaction_listeners(fCache, previous,
1295 TRANSACTION_WRITTEN);
1297 if (transaction != NULL) {
1298 // This function is called while iterating transaction_hash. We
1299 // use RemoveUnchecked so the iterator is still valid. A regular
1300 // Remove can trigger a resize of the hash table which would
1301 // result in the linked items in the table changing order.
1302 fCache->transaction_hash->RemoveUnchecked(transaction);
1303 } else
1304 fCache->transaction_hash->Remove(previous);
1306 delete_transaction(fCache, previous);
1307 fDeletedTransaction = true;
1310 if (block->transaction == NULL && block->ref_count == 0 && !block->unused) {
1311 // the block is no longer used
1312 ASSERT(block->original_data == NULL && block->parent_data == NULL);
1313 block->unused = true;
1314 fCache->unused_blocks.Add(block);
1315 fCache->unused_block_count++;
1318 TB2(BlockData(fCache, block, "after write"));
1322 void
1323 BlockWriter::_UnmarkWriting(cached_block* block)
1325 block->busy_writing = false;
1326 if (block->previous_transaction != NULL)
1327 block->previous_transaction->busy_writing_count--;
1328 fCache->busy_writing_count--;
1330 if ((fCache->busy_writing_waiters && fCache->busy_writing_count == 0)
1331 || block->busy_writing_waiters) {
1332 fCache->busy_writing_waiters = false;
1333 block->busy_writing_waiters = false;
1334 fCache->busy_writing_condition.NotifyAll();
1339 /*static*/ int
1340 BlockWriter::_CompareBlocks(const void* _blockA, const void* _blockB)
1342 cached_block* blockA = *(cached_block**)_blockA;
1343 cached_block* blockB = *(cached_block**)_blockB;
1345 off_t diff = blockA->block_number - blockB->block_number;
1346 if (diff > 0)
1347 return 1;
1349 return diff < 0 ? -1 : 0;
1353 // #pragma mark - block_cache
1356 block_cache::block_cache(int _fd, off_t numBlocks, size_t blockSize,
1357 bool readOnly)
1359 hash(NULL),
1360 fd(_fd),
1361 max_blocks(numBlocks),
1362 block_size(blockSize),
1363 next_transaction_id(1),
1364 last_transaction(NULL),
1365 transaction_hash(NULL),
1366 buffer_cache(NULL),
1367 unused_block_count(0),
1368 busy_reading_count(0),
1369 busy_reading_waiters(false),
1370 busy_writing_count(0),
1371 busy_writing_waiters(0),
1372 num_dirty_blocks(0),
1373 read_only(readOnly)
1378 /*! Should be called with the cache's lock held. */
1379 block_cache::~block_cache()
1381 unregister_low_resource_handler(&_LowMemoryHandler, this);
1383 delete transaction_hash;
1384 delete hash;
1386 delete_object_cache(buffer_cache);
1388 mutex_destroy(&lock);
1392 status_t
1393 block_cache::Init()
1395 busy_reading_condition.Init(this, "cache block busy_reading");
1396 busy_writing_condition.Init(this, "cache block busy writing");
1397 condition_variable.Init(this, "cache transaction sync");
1398 mutex_init(&lock, "block cache");
1400 buffer_cache = create_object_cache_etc("block cache buffers", block_size,
1401 8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL);
1402 if (buffer_cache == NULL)
1403 return B_NO_MEMORY;
1405 hash = new BlockTable();
1406 if (hash == NULL || hash->Init(1024) != B_OK)
1407 return B_NO_MEMORY;
1409 transaction_hash = new(std::nothrow) TransactionTable();
1410 if (transaction_hash == NULL || transaction_hash->Init(16) != B_OK)
1411 return B_NO_MEMORY;
1413 return register_low_resource_handler(&_LowMemoryHandler, this,
1414 B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1415 | B_KERNEL_RESOURCE_ADDRESS_SPACE, 0);
1419 void
1420 block_cache::Free(void* buffer)
1422 if (buffer != NULL)
1423 object_cache_free(buffer_cache, buffer, 0);
1427 void*
1428 block_cache::Allocate()
1430 void* block = object_cache_alloc(buffer_cache, 0);
1431 if (block != NULL)
1432 return block;
1434 // recycle existing before allocating a new one
1435 RemoveUnusedBlocks(100);
1437 return object_cache_alloc(buffer_cache, 0);
1441 void
1442 block_cache::FreeBlock(cached_block* block)
1444 Free(block->current_data);
1446 if (block->original_data != NULL || block->parent_data != NULL) {
1447 panic("block_cache::FreeBlock(): %" B_PRIdOFF ", original %p, parent %p\n",
1448 block->block_number, block->original_data, block->parent_data);
1451 #if BLOCK_CACHE_DEBUG_CHANGED
1452 Free(block->compare);
1453 #endif
1455 object_cache_free(sBlockCache, block, 0);
1459 /*! Allocates a new block for \a blockNumber, ready for use */
1460 cached_block*
1461 block_cache::NewBlock(off_t blockNumber)
1463 cached_block* block = NULL;
1465 if (low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1466 | B_KERNEL_RESOURCE_ADDRESS_SPACE) != B_NO_LOW_RESOURCE) {
1467 // recycle existing instead of allocating a new one
1468 block = _GetUnusedBlock();
1470 if (block == NULL) {
1471 block = (cached_block*)object_cache_alloc(sBlockCache, 0);
1472 if (block != NULL) {
1473 block->current_data = Allocate();
1474 if (block->current_data == NULL) {
1475 object_cache_free(sBlockCache, block, 0);
1476 return NULL;
1478 } else {
1479 TB(Error(this, blockNumber, "allocation failed"));
1480 dprintf("block allocation failed, unused list is %sempty.\n",
1481 unused_blocks.IsEmpty() ? "" : "not ");
1483 // allocation failed, try to reuse an unused block
1484 block = _GetUnusedBlock();
1485 if (block == NULL) {
1486 TB(Error(this, blockNumber, "get unused failed"));
1487 FATAL(("could not allocate block!\n"));
1488 return NULL;
1493 block->block_number = blockNumber;
1494 block->ref_count = 0;
1495 block->last_accessed = 0;
1496 block->transaction_next = NULL;
1497 block->transaction = block->previous_transaction = NULL;
1498 block->original_data = NULL;
1499 block->parent_data = NULL;
1500 block->busy_reading = false;
1501 block->busy_writing = false;
1502 block->is_writing = false;
1503 block->is_dirty = false;
1504 block->unused = false;
1505 block->discard = false;
1506 block->busy_reading_waiters = false;
1507 block->busy_writing_waiters = false;
1508 #if BLOCK_CACHE_DEBUG_CHANGED
1509 block->compare = NULL;
1510 #endif
1512 return block;
1516 void
1517 block_cache::FreeBlockParentData(cached_block* block)
1519 ASSERT(block->parent_data != NULL);
1520 if (block->parent_data != block->current_data)
1521 Free(block->parent_data);
1522 block->parent_data = NULL;
1526 void
1527 block_cache::RemoveUnusedBlocks(int32 count, int32 minSecondsOld)
1529 TRACE(("block_cache: remove up to %" B_PRId32 " unused blocks\n", count));
1531 for (block_list::Iterator iterator = unused_blocks.GetIterator();
1532 cached_block* block = iterator.Next();) {
1533 if (minSecondsOld >= block->LastAccess()) {
1534 // The list is sorted by last access
1535 break;
1537 if (block->busy_reading || block->busy_writing)
1538 continue;
1540 TB(Flush(this, block));
1541 TRACE((" remove block %" B_PRIdOFF ", last accessed %" B_PRId32 "\n",
1542 block->block_number, block->last_accessed));
1544 // this can only happen if no transactions are used
1545 if (block->is_dirty && !block->discard) {
1546 if (block->busy_writing)
1547 continue;
1549 BlockWriter::WriteBlock(this, block);
1552 // remove block from lists
1553 iterator.Remove();
1554 unused_block_count--;
1555 RemoveBlock(block);
1557 if (--count <= 0)
1558 break;
1563 void
1564 block_cache::RemoveBlock(cached_block* block)
1566 hash->Remove(block);
1567 FreeBlock(block);
1571 /*! Discards the block from a transaction (this method must not be called
1572 for blocks not part of a transaction).
1574 void
1575 block_cache::DiscardBlock(cached_block* block)
1577 ASSERT(block->discard);
1578 ASSERT(block->previous_transaction == NULL);
1580 if (block->parent_data != NULL)
1581 FreeBlockParentData(block);
1583 if (block->original_data != NULL) {
1584 Free(block->original_data);
1585 block->original_data = NULL;
1588 RemoveBlock(block);
1592 void
1593 block_cache::_LowMemoryHandler(void* data, uint32 resources, int32 level)
1595 TRACE(("block_cache: low memory handler called with level %" B_PRId32 "\n", level));
1597 // free some blocks according to the low memory state
1598 // (if there is enough memory left, we don't free any)
1600 block_cache* cache = (block_cache*)data;
1601 int32 free = 0;
1602 int32 secondsOld = 0;
1603 switch (level) {
1604 case B_NO_LOW_RESOURCE:
1605 return;
1606 case B_LOW_RESOURCE_NOTE:
1607 free = cache->unused_block_count / 8;
1608 secondsOld = 120;
1609 break;
1610 case B_LOW_RESOURCE_WARNING:
1611 free = cache->unused_block_count / 4;
1612 secondsOld = 10;
1613 break;
1614 case B_LOW_RESOURCE_CRITICAL:
1615 free = cache->unused_block_count / 2;
1616 secondsOld = 0;
1617 break;
1620 MutexLocker locker(&cache->lock);
1622 if (!locker.IsLocked()) {
1623 // If our block_cache were deleted, it could be that we had
1624 // been called before that deletion went through, therefore,
1625 // acquiring its lock might fail.
1626 return;
1629 #ifdef TRACE_BLOCK_CACHE
1630 uint32 oldUnused = cache->unused_block_count;
1631 #endif
1633 cache->RemoveUnusedBlocks(free, secondsOld);
1635 TRACE(("block_cache::_LowMemoryHandler(): %p: unused: %" B_PRIu32 " -> %" B_PRIu32 "\n",
1636 cache, oldUnused, cache->unused_block_count));
1640 cached_block*
1641 block_cache::_GetUnusedBlock()
1643 TRACE(("block_cache: get unused block\n"));
1645 for (block_list::Iterator iterator = unused_blocks.GetIterator();
1646 cached_block* block = iterator.Next();) {
1647 TB(Flush(this, block, true));
1648 // this can only happen if no transactions are used
1649 if (block->is_dirty && !block->busy_writing && !block->discard)
1650 BlockWriter::WriteBlock(this, block);
1652 // remove block from lists
1653 iterator.Remove();
1654 unused_block_count--;
1655 hash->Remove(block);
1657 ASSERT(block->original_data == NULL && block->parent_data == NULL);
1658 block->unused = false;
1660 // TODO: see if compare data is handled correctly here!
1661 #if BLOCK_CACHE_DEBUG_CHANGED
1662 if (block->compare != NULL)
1663 Free(block->compare);
1664 #endif
1665 return block;
1668 return NULL;
1672 // #pragma mark - private block functions
1675 /*! Cache must be locked.
1677 static void
1678 mark_block_busy_reading(block_cache* cache, cached_block* block)
1680 block->busy_reading = true;
1681 cache->busy_reading_count++;
1685 /*! Cache must be locked.
1687 static void
1688 mark_block_unbusy_reading(block_cache* cache, cached_block* block)
1690 block->busy_reading = false;
1691 cache->busy_reading_count--;
1693 if ((cache->busy_reading_waiters && cache->busy_reading_count == 0)
1694 || block->busy_reading_waiters) {
1695 cache->busy_reading_waiters = false;
1696 block->busy_reading_waiters = false;
1697 cache->busy_reading_condition.NotifyAll();
1702 /*! Cache must be locked.
1704 static void
1705 wait_for_busy_reading_block(block_cache* cache, cached_block* block)
1707 while (block->busy_reading) {
1708 // wait for at least the specified block to be read in
1709 ConditionVariableEntry entry;
1710 cache->busy_reading_condition.Add(&entry);
1711 block->busy_reading_waiters = true;
1713 mutex_unlock(&cache->lock);
1715 entry.Wait();
1717 mutex_lock(&cache->lock);
1722 /*! Cache must be locked.
1724 static void
1725 wait_for_busy_reading_blocks(block_cache* cache)
1727 while (cache->busy_reading_count != 0) {
1728 // wait for all blocks to be read in
1729 ConditionVariableEntry entry;
1730 cache->busy_reading_condition.Add(&entry);
1731 cache->busy_reading_waiters = true;
1733 mutex_unlock(&cache->lock);
1735 entry.Wait();
1737 mutex_lock(&cache->lock);
1742 /*! Cache must be locked.
1744 static void
1745 wait_for_busy_writing_block(block_cache* cache, cached_block* block)
1747 while (block->busy_writing) {
1748 // wait for all blocks to be written back
1749 ConditionVariableEntry entry;
1750 cache->busy_writing_condition.Add(&entry);
1751 block->busy_writing_waiters = true;
1753 mutex_unlock(&cache->lock);
1755 entry.Wait();
1757 mutex_lock(&cache->lock);
1762 /*! Cache must be locked.
1764 static void
1765 wait_for_busy_writing_blocks(block_cache* cache)
1767 while (cache->busy_writing_count != 0) {
1768 // wait for all blocks to be written back
1769 ConditionVariableEntry entry;
1770 cache->busy_writing_condition.Add(&entry);
1771 cache->busy_writing_waiters = true;
1773 mutex_unlock(&cache->lock);
1775 entry.Wait();
1777 mutex_lock(&cache->lock);
1782 /*! Removes a reference from the specified \a block. If this was the last
1783 reference, the block is moved into the unused list.
1784 In low memory situations, it will also free some blocks from that list,
1785 but not necessarily the \a block it just released.
1787 static void
1788 put_cached_block(block_cache* cache, cached_block* block)
1790 #if BLOCK_CACHE_DEBUG_CHANGED
1791 if (!block->is_dirty && block->compare != NULL
1792 && memcmp(block->current_data, block->compare, cache->block_size)) {
1793 dprintf("new block:\n");
1794 dump_block((const char*)block->current_data, 256, " ");
1795 dprintf("unchanged block:\n");
1796 dump_block((const char*)block->compare, 256, " ");
1797 BlockWriter::WriteBlock(cache, block);
1798 panic("block_cache: supposed to be clean block was changed!\n");
1800 cache->Free(block->compare);
1801 block->compare = NULL;
1803 #endif
1804 TB(Put(cache, block));
1806 if (block->ref_count < 1) {
1807 panic("Invalid ref_count for block %p, cache %p\n", block, cache);
1808 return;
1811 if (--block->ref_count == 0
1812 && block->transaction == NULL && block->previous_transaction == NULL) {
1813 // This block is not used anymore, and not part of any transaction
1814 block->is_writing = false;
1816 if (block->discard) {
1817 cache->RemoveBlock(block);
1818 } else {
1819 // put this block in the list of unused blocks
1820 ASSERT(!block->unused);
1821 block->unused = true;
1823 ASSERT(block->original_data == NULL && block->parent_data == NULL);
1824 cache->unused_blocks.Add(block);
1825 cache->unused_block_count++;
1831 static void
1832 put_cached_block(block_cache* cache, off_t blockNumber)
1834 if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1835 panic("put_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
1836 blockNumber, cache->max_blocks - 1);
1839 cached_block* block = cache->hash->Lookup(blockNumber);
1840 if (block != NULL)
1841 put_cached_block(cache, block);
1842 else {
1843 TB(Error(cache, blockNumber, "put unknown"));
1848 /*! Retrieves the block \a blockNumber from the hash table, if it's already
1849 there, or reads it from the disk.
1850 You need to have the cache locked when calling this function.
1852 \param _allocated tells you whether or not a new block has been allocated
1853 to satisfy your request.
1854 \param readBlock if \c false, the block will not be read in case it was
1855 not already in the cache. The block you retrieve may contain random
1856 data. If \c true, the cache will be temporarily unlocked while the
1857 block is read in.
1859 static cached_block*
1860 get_cached_block(block_cache* cache, off_t blockNumber, bool* _allocated,
1861 bool readBlock = true)
1863 ASSERT_LOCKED_MUTEX(&cache->lock);
1865 if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1866 panic("get_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
1867 blockNumber, cache->max_blocks - 1);
1868 return NULL;
1871 retry:
1872 cached_block* block = cache->hash->Lookup(blockNumber);
1873 *_allocated = false;
1875 if (block == NULL) {
1876 // put block into cache
1877 block = cache->NewBlock(blockNumber);
1878 if (block == NULL)
1879 return NULL;
1881 cache->hash->Insert(block);
1882 *_allocated = true;
1883 } else if (block->busy_reading) {
1884 // The block is currently busy_reading - wait and try again later
1885 wait_for_busy_reading_block(cache, block);
1886 goto retry;
1889 if (block->unused) {
1890 //TRACE(("remove block %" B_PRIdOFF " from unused\n", blockNumber));
1891 block->unused = false;
1892 cache->unused_blocks.Remove(block);
1893 cache->unused_block_count--;
1896 if (*_allocated && readBlock) {
1897 // read block into cache
1898 int32 blockSize = cache->block_size;
1900 mark_block_busy_reading(cache, block);
1901 mutex_unlock(&cache->lock);
1903 ssize_t bytesRead = read_pos(cache->fd, blockNumber * blockSize,
1904 block->current_data, blockSize);
1906 mutex_lock(&cache->lock);
1907 if (bytesRead < blockSize) {
1908 cache->RemoveBlock(block);
1909 TB(Error(cache, blockNumber, "read failed", bytesRead));
1911 TRACE_ALWAYS(("could not read block %" B_PRIdOFF ": bytesRead: %zd, error: %s\n",
1912 blockNumber, bytesRead, strerror(errno)));
1913 return NULL;
1915 TB(Read(cache, block));
1917 mark_block_unbusy_reading(cache, block);
1920 block->ref_count++;
1921 block->last_accessed = system_time() / 1000000L;
1923 return block;
1927 /*! Returns the writable block data for the requested blockNumber.
1928 If \a cleared is true, the block is not read from disk; an empty block
1929 is returned.
1931 This is the only method to insert a block into a transaction. It makes
1932 sure that the previous block contents are preserved in that case.
1934 static void*
1935 get_writable_cached_block(block_cache* cache, off_t blockNumber, off_t base,
1936 off_t length, int32 transactionID, bool cleared)
1938 TRACE(("get_writable_cached_block(blockNumber = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
1939 blockNumber, transactionID));
1941 if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
1942 panic("get_writable_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
1943 blockNumber, cache->max_blocks - 1);
1946 bool allocated;
1947 cached_block* block = get_cached_block(cache, blockNumber, &allocated,
1948 !cleared);
1949 if (block == NULL)
1950 return NULL;
1952 if (block->busy_writing)
1953 wait_for_busy_writing_block(cache, block);
1955 block->discard = false;
1957 // if there is no transaction support, we just return the current block
1958 if (transactionID == -1) {
1959 if (cleared) {
1960 mark_block_busy_reading(cache, block);
1961 mutex_unlock(&cache->lock);
1963 memset(block->current_data, 0, cache->block_size);
1965 mutex_lock(&cache->lock);
1966 mark_block_unbusy_reading(cache, block);
1969 block->is_writing = true;
1971 if (!block->is_dirty) {
1972 cache->num_dirty_blocks++;
1973 block->is_dirty = true;
1974 // mark the block as dirty
1977 TB(Get(cache, block));
1978 return block->current_data;
1981 cache_transaction* transaction = block->transaction;
1983 if (transaction != NULL && transaction->id != transactionID) {
1984 // TODO: we have to wait here until the other transaction is done.
1985 // Maybe we should even panic, since we can't prevent any deadlocks.
1986 panic("get_writable_cached_block(): asked to get busy writable block "
1987 "(transaction %" B_PRId32 ")\n", block->transaction->id);
1988 put_cached_block(cache, block);
1989 return NULL;
1991 if (transaction == NULL && transactionID != -1) {
1992 // get new transaction
1993 transaction = lookup_transaction(cache, transactionID);
1994 if (transaction == NULL) {
1995 panic("get_writable_cached_block(): invalid transaction %" B_PRId32 "!\n",
1996 transactionID);
1997 put_cached_block(cache, block);
1998 return NULL;
2000 if (!transaction->open) {
2001 panic("get_writable_cached_block(): transaction already done!\n");
2002 put_cached_block(cache, block);
2003 return NULL;
2006 block->transaction = transaction;
2008 // attach the block to the transaction block list
2009 block->transaction_next = transaction->first_block;
2010 transaction->first_block = block;
2011 transaction->num_blocks++;
2013 if (transaction != NULL)
2014 transaction->last_used = system_time();
2016 bool wasUnchanged = block->original_data == NULL
2017 || block->previous_transaction != NULL;
2019 if (!(allocated && cleared) && block->original_data == NULL) {
2020 // we already have data, so we need to preserve it
2021 block->original_data = cache->Allocate();
2022 if (block->original_data == NULL) {
2023 TB(Error(cache, blockNumber, "allocate original failed"));
2024 FATAL(("could not allocate original_data\n"));
2025 put_cached_block(cache, block);
2026 return NULL;
2029 mark_block_busy_reading(cache, block);
2030 mutex_unlock(&cache->lock);
2032 memcpy(block->original_data, block->current_data, cache->block_size);
2034 mutex_lock(&cache->lock);
2035 mark_block_unbusy_reading(cache, block);
2037 if (block->parent_data == block->current_data) {
2038 // remember any previous contents for the parent transaction
2039 block->parent_data = cache->Allocate();
2040 if (block->parent_data == NULL) {
2041 // TODO: maybe we should just continue the current transaction in
2042 // this case...
2043 TB(Error(cache, blockNumber, "allocate parent failed"));
2044 FATAL(("could not allocate parent\n"));
2045 put_cached_block(cache, block);
2046 return NULL;
2049 mark_block_busy_reading(cache, block);
2050 mutex_unlock(&cache->lock);
2052 memcpy(block->parent_data, block->current_data, cache->block_size);
2054 mutex_lock(&cache->lock);
2055 mark_block_unbusy_reading(cache, block);
2057 transaction->sub_num_blocks++;
2058 } else if (transaction != NULL && transaction->has_sub_transaction
2059 && block->parent_data == NULL && wasUnchanged)
2060 transaction->sub_num_blocks++;
2062 if (cleared) {
2063 mark_block_busy_reading(cache, block);
2064 mutex_unlock(&cache->lock);
2066 memset(block->current_data, 0, cache->block_size);
2068 mutex_lock(&cache->lock);
2069 mark_block_unbusy_reading(cache, block);
2072 block->is_dirty = true;
2073 TB(Get(cache, block));
2074 TB2(BlockData(cache, block, "get writable"));
2076 return block->current_data;
2080 #if DEBUG_BLOCK_CACHE
2083 static void
2084 dump_block(cached_block* block)
2086 kprintf("%08lx %9" B_PRIdOFF " %08lx %08lx %08lx %5" B_PRId32 " %6" B_PRId32
2087 " %c%c%c%c%c%c %08lx %08lx\n",
2088 (addr_t)block, block->block_number,
2089 (addr_t)block->current_data, (addr_t)block->original_data,
2090 (addr_t)block->parent_data, block->ref_count, block->LastAccess(),
2091 block->busy_reading ? 'r' : '-', block->busy_writing ? 'w' : '-',
2092 block->is_writing ? 'W' : '-', block->is_dirty ? 'D' : '-',
2093 block->unused ? 'U' : '-', block->discard ? 'D' : '-',
2094 (addr_t)block->transaction,
2095 (addr_t)block->previous_transaction);
2099 static void
2100 dump_block_long(cached_block* block)
2102 kprintf("BLOCK %p\n", block);
2103 kprintf(" current data: %p\n", block->current_data);
2104 kprintf(" original data: %p\n", block->original_data);
2105 kprintf(" parent data: %p\n", block->parent_data);
2106 #if BLOCK_CACHE_DEBUG_CHANGED
2107 kprintf(" compare data: %p\n", block->compare);
2108 #endif
2109 kprintf(" ref_count: %" B_PRId32 "\n", block->ref_count);
2110 kprintf(" accessed: %" B_PRId32 "\n", block->LastAccess());
2111 kprintf(" flags: ");
2112 if (block->busy_reading)
2113 kprintf(" busy_reading");
2114 if (block->busy_writing)
2115 kprintf(" busy_writing");
2116 if (block->is_writing)
2117 kprintf(" is-writing");
2118 if (block->is_dirty)
2119 kprintf(" is-dirty");
2120 if (block->unused)
2121 kprintf(" unused");
2122 if (block->discard)
2123 kprintf(" discard");
2124 kprintf("\n");
2125 if (block->transaction != NULL) {
2126 kprintf(" transaction: %p (%" B_PRId32 ")\n", block->transaction,
2127 block->transaction->id);
2128 if (block->transaction_next != NULL) {
2129 kprintf(" next in transaction: %" B_PRIdOFF "\n",
2130 block->transaction_next->block_number);
2133 if (block->previous_transaction != NULL) {
2134 kprintf(" previous transaction: %p (%" B_PRId32 ")\n",
2135 block->previous_transaction,
2136 block->previous_transaction->id);
2139 set_debug_variable("_current", (addr_t)block->current_data);
2140 set_debug_variable("_original", (addr_t)block->original_data);
2141 set_debug_variable("_parent", (addr_t)block->parent_data);
2145 static int
2146 dump_cached_block(int argc, char** argv)
2148 if (argc != 2) {
2149 kprintf("usage: %s <block-address>\n", argv[0]);
2150 return 0;
2153 dump_block_long((struct cached_block*)(addr_t)parse_expression(argv[1]));
2154 return 0;
2158 static int
2159 dump_cache(int argc, char** argv)
2161 bool showTransactions = false;
2162 bool showBlocks = false;
2163 int32 i = 1;
2164 while (argv[i] != NULL && argv[i][0] == '-') {
2165 for (char* arg = &argv[i][1]; arg[0]; arg++) {
2166 switch (arg[0]) {
2167 case 'b':
2168 showBlocks = true;
2169 break;
2170 case 't':
2171 showTransactions = true;
2172 break;
2173 default:
2174 print_debugger_command_usage(argv[0]);
2175 return 0;
2178 i++;
2181 if (i >= argc) {
2182 print_debugger_command_usage(argv[0]);
2183 return 0;
2186 block_cache* cache = (struct block_cache*)(addr_t)parse_expression(argv[i]);
2187 if (cache == NULL) {
2188 kprintf("invalid cache address\n");
2189 return 0;
2192 off_t blockNumber = -1;
2193 if (i + 1 < argc) {
2194 blockNumber = parse_expression(argv[i + 1]);
2195 cached_block* block = cache->hash->Lookup(blockNumber);
2196 if (block != NULL)
2197 dump_block_long(block);
2198 else
2199 kprintf("block %" B_PRIdOFF " not found\n", blockNumber);
2200 return 0;
2203 kprintf("BLOCK CACHE: %p\n", cache);
2205 kprintf(" fd: %d\n", cache->fd);
2206 kprintf(" max_blocks: %" B_PRIdOFF "\n", cache->max_blocks);
2207 kprintf(" block_size: %zu\n", cache->block_size);
2208 kprintf(" next_transaction_id: %" B_PRId32 "\n", cache->next_transaction_id);
2209 kprintf(" buffer_cache: %p\n", cache->buffer_cache);
2210 kprintf(" busy_reading: %" B_PRIu32 ", %s waiters\n", cache->busy_reading_count,
2211 cache->busy_reading_waiters ? "has" : "no");
2212 kprintf(" busy_writing: %" B_PRIu32 ", %s waiters\n", cache->busy_writing_count,
2213 cache->busy_writing_waiters ? "has" : "no");
2215 if (!cache->pending_notifications.IsEmpty()) {
2216 kprintf(" pending notifications:\n");
2218 NotificationList::Iterator iterator
2219 = cache->pending_notifications.GetIterator();
2220 while (iterator.HasNext()) {
2221 cache_notification* notification = iterator.Next();
2223 kprintf(" %p %5" B_PRIx32 " %p - %p\n", notification,
2224 notification->events_pending, notification->hook,
2225 notification->data);
2229 if (showTransactions) {
2230 kprintf(" transactions:\n");
2231 kprintf("address id state blocks main sub\n");
2233 TransactionTable::Iterator iterator(cache->transaction_hash);
2235 while (iterator.HasNext()) {
2236 cache_transaction* transaction = iterator.Next();
2237 kprintf("%p %5" B_PRId32 " %-7s %5" B_PRId32 " %5" B_PRId32 " %5"
2238 B_PRId32 "\n", transaction, transaction->id,
2239 transaction->open ? "open" : "closed",
2240 transaction->num_blocks, transaction->main_num_blocks,
2241 transaction->sub_num_blocks);
2245 if (showBlocks) {
2246 kprintf(" blocks:\n");
2247 kprintf("address block no. current original parent refs access "
2248 "flags transact prev. trans\n");
2251 uint32 referenced = 0;
2252 uint32 count = 0;
2253 uint32 dirty = 0;
2254 uint32 discarded = 0;
2255 BlockTable::Iterator iterator(cache->hash);
2256 while (iterator.HasNext()) {
2257 cached_block* block = iterator.Next();
2258 if (showBlocks)
2259 dump_block(block);
2261 if (block->is_dirty)
2262 dirty++;
2263 if (block->discard)
2264 discarded++;
2265 if (block->ref_count)
2266 referenced++;
2267 count++;
2270 kprintf(" %" B_PRIu32 " blocks total, %" B_PRIu32 " dirty, %" B_PRIu32
2271 " discarded, %" B_PRIu32 " referenced, %" B_PRIu32 " busy, %" B_PRIu32
2272 " in unused.\n",
2273 count, dirty, discarded, referenced, cache->busy_reading_count,
2274 cache->unused_block_count);
2275 return 0;
2279 static int
2280 dump_transaction(int argc, char** argv)
2282 bool showBlocks = false;
2283 int i = 1;
2284 if (argc > 1 && !strcmp(argv[1], "-b")) {
2285 showBlocks = true;
2286 i++;
2289 if (argc - i < 1 || argc - i > 2) {
2290 print_debugger_command_usage(argv[0]);
2291 return 0;
2294 cache_transaction* transaction = NULL;
2296 if (argc - i == 1) {
2297 transaction = (cache_transaction*)(addr_t)parse_expression(argv[i]);
2298 } else {
2299 block_cache* cache = (block_cache*)(addr_t)parse_expression(argv[i]);
2300 int32 id = parse_expression(argv[i + 1]);
2301 transaction = lookup_transaction(cache, id);
2302 if (transaction == NULL) {
2303 kprintf("No transaction with ID %" B_PRId32 " found.\n", id);
2304 return 0;
2308 kprintf("TRANSACTION %p\n", transaction);
2310 kprintf(" id: %" B_PRId32 "\n", transaction->id);
2311 kprintf(" num block: %" B_PRId32 "\n", transaction->num_blocks);
2312 kprintf(" main num block: %" B_PRId32 "\n", transaction->main_num_blocks);
2313 kprintf(" sub num block: %" B_PRId32 "\n", transaction->sub_num_blocks);
2314 kprintf(" has sub: %d\n", transaction->has_sub_transaction);
2315 kprintf(" state: %s\n", transaction->open ? "open" : "closed");
2316 kprintf(" idle: %" B_PRId64 " secs\n",
2317 (system_time() - transaction->last_used) / 1000000);
2319 kprintf(" listeners:\n");
2321 ListenerList::Iterator iterator = transaction->listeners.GetIterator();
2322 while (iterator.HasNext()) {
2323 cache_listener* listener = iterator.Next();
2325 kprintf(" %p %5" B_PRIx32 " %p - %p\n", listener, listener->events_pending,
2326 listener->hook, listener->data);
2329 if (!showBlocks)
2330 return 0;
2332 kprintf(" blocks:\n");
2333 kprintf("address block no. current original parent refs access "
2334 "flags transact prev. trans\n");
2336 cached_block* block = transaction->first_block;
2337 while (block != NULL) {
2338 dump_block(block);
2339 block = block->transaction_next;
2342 kprintf("--\n");
2344 block_list::Iterator blockIterator = transaction->blocks.GetIterator();
2345 while (blockIterator.HasNext()) {
2346 block = blockIterator.Next();
2347 dump_block(block);
2350 return 0;
2354 static int
2355 dump_caches(int argc, char** argv)
2357 kprintf("Block caches:\n");
2358 DoublyLinkedList<block_cache>::Iterator i = sCaches.GetIterator();
2359 while (i.HasNext()) {
2360 block_cache* cache = i.Next();
2361 if (cache == (block_cache*)&sMarkCache)
2362 continue;
2364 kprintf(" %p\n", cache);
2367 return 0;
2371 #if BLOCK_CACHE_BLOCK_TRACING >= 2
2372 static int
2373 dump_block_data(int argc, char** argv)
2375 using namespace BlockTracing;
2377 // Determine which blocks to show
2379 bool printStackTrace = true;
2380 uint32 which = 0;
2381 int32 i = 1;
2382 while (i < argc && argv[i][0] == '-') {
2383 char* arg = &argv[i][1];
2384 while (arg[0]) {
2385 switch (arg[0]) {
2386 case 'c':
2387 which |= BlockData::kCurrent;
2388 break;
2389 case 'p':
2390 which |= BlockData::kParent;
2391 break;
2392 case 'o':
2393 which |= BlockData::kOriginal;
2394 break;
2396 default:
2397 kprintf("invalid block specifier (only o/c/p are "
2398 "allowed).\n");
2399 return 0;
2401 arg++;
2404 i++;
2406 if (which == 0)
2407 which = BlockData::kCurrent | BlockData::kParent | BlockData::kOriginal;
2409 if (i == argc) {
2410 print_debugger_command_usage(argv[0]);
2411 return 0;
2414 // Get the range of blocks to print
2416 int64 from = parse_expression(argv[i]);
2417 int64 to = from;
2418 if (argc > i + 1)
2419 to = parse_expression(argv[i + 1]);
2420 if (to < from)
2421 to = from;
2423 uint32 offset = 0;
2424 uint32 size = LONG_MAX;
2425 if (argc > i + 2)
2426 offset = parse_expression(argv[i + 2]);
2427 if (argc > i + 3)
2428 size = parse_expression(argv[i + 3]);
2430 TraceEntryIterator iterator;
2431 iterator.MoveTo(from - 1);
2433 static char sBuffer[1024];
2434 LazyTraceOutput out(sBuffer, sizeof(sBuffer), TRACE_OUTPUT_TEAM_ID);
2436 while (TraceEntry* entry = iterator.Next()) {
2437 int32 index = iterator.Index();
2438 if (index > to)
2439 break;
2441 Action* action = dynamic_cast<Action*>(entry);
2442 if (action != NULL) {
2443 out.Clear();
2444 out.DumpEntry(action);
2445 continue;
2448 BlockData* blockData = dynamic_cast<BlockData*>(entry);
2449 if (blockData == NULL)
2450 continue;
2452 out.Clear();
2454 const char* dump = out.DumpEntry(entry);
2455 int length = strlen(dump);
2456 if (length > 0 && dump[length - 1] == '\n')
2457 length--;
2459 kprintf("%5" B_PRId32 ". %.*s\n", index, length, dump);
2461 if (printStackTrace) {
2462 out.Clear();
2463 entry->DumpStackTrace(out);
2464 if (out.Size() > 0)
2465 kputs(out.Buffer());
2468 blockData->DumpBlocks(which, offset, size);
2471 return 0;
2473 #endif // BLOCK_CACHE_BLOCK_TRACING >= 2
2476 #endif // DEBUG_BLOCK_CACHE
2479 /*! Traverses through the block_cache list, and returns one cache after the
2480 other. The cache returned is automatically locked when you get it, and
2481 unlocked with the next call to this function. Ignores caches that are in
2482 deletion state.
2483 Returns \c NULL when the end of the list is reached.
2485 static block_cache*
2486 get_next_locked_block_cache(block_cache* last)
2488 MutexLocker _(sCachesLock);
2490 block_cache* cache;
2491 if (last != NULL) {
2492 mutex_unlock(&last->lock);
2494 cache = sCaches.GetNext((block_cache*)&sMarkCache);
2495 sCaches.Remove((block_cache*)&sMarkCache);
2496 } else
2497 cache = sCaches.Head();
2499 if (cache != NULL) {
2500 mutex_lock(&cache->lock);
2501 sCaches.Insert(sCaches.GetNext(cache), (block_cache*)&sMarkCache);
2504 return cache;
2508 /*! Background thread that continuously checks for pending notifications of
2509 all caches.
2510 Every two seconds, it will also write back up to 64 blocks per cache.
2512 static status_t
2513 block_notifier_and_writer(void* /*data*/)
2515 const bigtime_t kTimeout = 2000000LL;
2516 bigtime_t timeout = kTimeout;
2518 while (true) {
2519 bigtime_t start = system_time();
2521 status_t status = acquire_sem_etc(sEventSemaphore, 1,
2522 B_RELATIVE_TIMEOUT, timeout);
2523 if (status == B_OK) {
2524 flush_pending_notifications();
2525 timeout -= system_time() - start;
2526 continue;
2529 // write 64 blocks of each block_cache every two seconds
2530 // TODO: change this once we have an I/O scheduler
2531 timeout = kTimeout;
2532 size_t usedMemory;
2533 object_cache_get_usage(sBlockCache, &usedMemory);
2535 block_cache* cache = NULL;
2536 while ((cache = get_next_locked_block_cache(cache)) != NULL) {
2537 BlockWriter writer(cache, 64);
2539 size_t cacheUsedMemory;
2540 object_cache_get_usage(cache->buffer_cache, &cacheUsedMemory);
2541 usedMemory += cacheUsedMemory;
2543 if (cache->num_dirty_blocks) {
2544 // This cache is not using transactions, we'll scan the blocks
2545 // directly
2546 BlockTable::Iterator iterator(cache->hash);
2548 while (iterator.HasNext()) {
2549 cached_block* block = iterator.Next();
2550 if (block->CanBeWritten() && !writer.Add(block))
2551 break;
2554 } else {
2555 TransactionTable::Iterator iterator(cache->transaction_hash);
2557 while (iterator.HasNext()) {
2558 cache_transaction* transaction = iterator.Next();
2559 if (transaction->open) {
2560 if (system_time() > transaction->last_used
2561 + kTransactionIdleTime) {
2562 // Transaction is open but idle
2563 notify_transaction_listeners(cache, transaction,
2564 TRANSACTION_IDLE);
2566 continue;
2569 bool hasLeftOvers;
2570 // we ignore this one
2571 if (!writer.Add(transaction, hasLeftOvers))
2572 break;
2576 writer.Write();
2578 if ((block_cache_used_memory() / B_PAGE_SIZE)
2579 > vm_page_num_pages() / 2) {
2580 // Try to reduce memory usage to half of the available
2581 // RAM at maximum
2582 cache->RemoveUnusedBlocks(1000, 10);
2586 MutexLocker _(sCachesMemoryUseLock);
2587 sUsedMemory = usedMemory;
2590 // never can get here
2591 return B_OK;
2595 /*! Notify function for wait_for_notifications(). */
2596 static void
2597 notify_sync(int32 transactionID, int32 event, void* _cache)
2599 block_cache* cache = (block_cache*)_cache;
2601 cache->condition_variable.NotifyOne();
2605 /*! Must be called with the sCachesLock held. */
2606 static bool
2607 is_valid_cache(block_cache* cache)
2609 ASSERT_LOCKED_MUTEX(&sCachesLock);
2611 DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator();
2612 while (iterator.HasNext()) {
2613 if (cache == iterator.Next())
2614 return true;
2617 return false;
2621 /*! Waits until all pending notifications are carried out.
2622 Safe to be called from the block writer/notifier thread.
2623 You must not hold the \a cache lock when calling this function.
2625 static void
2626 wait_for_notifications(block_cache* cache)
2628 MutexLocker locker(sCachesLock);
2630 if (find_thread(NULL) == sNotifierWriterThread) {
2631 // We're the notifier thread, don't wait, but flush all pending
2632 // notifications directly.
2633 if (is_valid_cache(cache))
2634 flush_pending_notifications(cache);
2635 return;
2638 // add sync notification
2639 cache_notification notification;
2640 set_notification(NULL, notification, TRANSACTION_WRITTEN, notify_sync,
2641 cache);
2643 ConditionVariableEntry entry;
2644 cache->condition_variable.Add(&entry);
2646 add_notification(cache, &notification, TRANSACTION_WRITTEN, false);
2647 locker.Unlock();
2649 // wait for notification hook to be called
2650 entry.Wait();
2654 status_t
2655 block_cache_init(void)
2657 sBlockCache = create_object_cache_etc("cached blocks", sizeof(cached_block),
2658 8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL);
2659 if (sBlockCache == NULL)
2660 return B_NO_MEMORY;
2662 new (&sCaches) DoublyLinkedList<block_cache>;
2663 // manually call constructor
2665 sEventSemaphore = create_sem(0, "block cache event");
2666 if (sEventSemaphore < B_OK)
2667 return sEventSemaphore;
2669 sNotifierWriterThread = spawn_kernel_thread(&block_notifier_and_writer,
2670 "block notifier/writer", B_LOW_PRIORITY, NULL);
2671 if (sNotifierWriterThread >= B_OK)
2672 resume_thread(sNotifierWriterThread);
2674 #if DEBUG_BLOCK_CACHE
2675 add_debugger_command_etc("block_caches", &dump_caches,
2676 "dumps all block caches", "\n", 0);
2677 add_debugger_command_etc("block_cache", &dump_cache,
2678 "dumps a specific block cache",
2679 "[-bt] <cache-address> [block-number]\n"
2680 " -t lists the transactions\n"
2681 " -b lists all blocks\n", 0);
2682 add_debugger_command("cached_block", &dump_cached_block,
2683 "dumps the specified cached block");
2684 add_debugger_command_etc("transaction", &dump_transaction,
2685 "dumps a specific transaction", "[-b] ((<cache> <id>) | <transaction>)\n"
2686 "Either use a block cache pointer and an ID or a pointer to the transaction.\n"
2687 " -b lists all blocks that are part of this transaction\n", 0);
2688 # if BLOCK_CACHE_BLOCK_TRACING >= 2
2689 add_debugger_command_etc("block_cache_data", &dump_block_data,
2690 "dumps the data blocks logged for the actions",
2691 "[-cpo] <from> [<to> [<offset> [<size>]]]\n"
2692 "If no data specifier is used, all blocks are shown by default.\n"
2693 " -c the current data is shown, if available.\n"
2694 " -p the parent data is shown, if available.\n"
2695 " -o the original data is shown, if available.\n"
2696 " <from> first index of tracing entries to show.\n"
2697 " <to> if given, the last entry. If not, only <from> is shown.\n"
2698 " <offset> the offset of the block data.\n"
2699 " <from> the size of the block data that is dumped\n", 0);
2700 # endif
2701 #endif // DEBUG_BLOCK_CACHE
2703 return B_OK;
2707 size_t
2708 block_cache_used_memory(void)
2710 MutexLocker _(sCachesMemoryUseLock);
2711 return sUsedMemory;
2715 // #pragma mark - public transaction API
2718 int32
2719 cache_start_transaction(void* _cache)
2721 block_cache* cache = (block_cache*)_cache;
2722 TransactionLocker locker(cache);
2724 if (cache->last_transaction && cache->last_transaction->open) {
2725 panic("last transaction (%" B_PRId32 ") still open!\n",
2726 cache->last_transaction->id);
2729 cache_transaction* transaction = new(std::nothrow) cache_transaction;
2730 if (transaction == NULL)
2731 return B_NO_MEMORY;
2733 transaction->id = atomic_add(&cache->next_transaction_id, 1);
2734 cache->last_transaction = transaction;
2736 TRACE(("cache_start_transaction(): id %" B_PRId32 " started\n", transaction->id));
2737 T(Action("start", cache, transaction));
2739 cache->transaction_hash->Insert(transaction);
2741 return transaction->id;
2745 status_t
2746 cache_sync_transaction(void* _cache, int32 id)
2748 block_cache* cache = (block_cache*)_cache;
2749 bool hadBusy;
2751 TRACE(("cache_sync_transaction(id %" B_PRId32 ")\n", id));
2753 do {
2754 TransactionLocker locker(cache);
2755 hadBusy = false;
2757 BlockWriter writer(cache);
2758 TransactionTable::Iterator iterator(cache->transaction_hash);
2760 while (iterator.HasNext()) {
2761 // close all earlier transactions which haven't been closed yet
2762 cache_transaction* transaction = iterator.Next();
2764 if (transaction->busy_writing_count != 0) {
2765 hadBusy = true;
2766 continue;
2768 if (transaction->id <= id && !transaction->open) {
2769 // write back all of their remaining dirty blocks
2770 T(Action("sync", cache, transaction));
2772 bool hasLeftOvers;
2773 writer.Add(transaction, hasLeftOvers);
2775 if (hasLeftOvers) {
2776 // This transaction contains blocks that a previous
2777 // transaction is trying to write back in this write run
2778 hadBusy = true;
2783 status_t status = writer.Write();
2784 if (status != B_OK)
2785 return status;
2786 } while (hadBusy);
2788 wait_for_notifications(cache);
2789 // make sure that all pending TRANSACTION_WRITTEN notifications
2790 // are handled after we return
2791 return B_OK;
2795 status_t
2796 cache_end_transaction(void* _cache, int32 id,
2797 transaction_notification_hook hook, void* data)
2799 block_cache* cache = (block_cache*)_cache;
2800 TransactionLocker locker(cache);
2802 TRACE(("cache_end_transaction(id = %" B_PRId32 ")\n", id));
2804 cache_transaction* transaction = lookup_transaction(cache, id);
2805 if (transaction == NULL) {
2806 panic("cache_end_transaction(): invalid transaction ID\n");
2807 return B_BAD_VALUE;
2810 // Write back all pending transaction blocks
2811 status_t status = write_blocks_in_previous_transaction(cache, transaction);
2812 if (status != B_OK)
2813 return status;
2815 notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
2817 if (hook != NULL
2818 && add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN,
2819 hook, data) != B_OK) {
2820 return B_NO_MEMORY;
2823 T(Action("end", cache, transaction));
2825 // iterate through all blocks and free the unchanged original contents
2827 cached_block* next;
2828 for (cached_block* block = transaction->first_block; block != NULL;
2829 block = next) {
2830 next = block->transaction_next;
2831 ASSERT(block->previous_transaction == NULL);
2833 if (block->discard) {
2834 // This block has been discarded in the transaction
2835 cache->DiscardBlock(block);
2836 transaction->num_blocks--;
2837 continue;
2840 if (block->original_data != NULL) {
2841 cache->Free(block->original_data);
2842 block->original_data = NULL;
2844 if (block->parent_data != NULL) {
2845 ASSERT(transaction->has_sub_transaction);
2846 cache->FreeBlockParentData(block);
2849 // move the block to the previous transaction list
2850 transaction->blocks.Add(block);
2852 block->previous_transaction = transaction;
2853 block->transaction_next = NULL;
2854 block->transaction = NULL;
2857 transaction->open = false;
2858 return B_OK;
2862 status_t
2863 cache_abort_transaction(void* _cache, int32 id)
2865 block_cache* cache = (block_cache*)_cache;
2866 TransactionLocker locker(cache);
2868 TRACE(("cache_abort_transaction(id = %" B_PRId32 ")\n", id));
2870 cache_transaction* transaction = lookup_transaction(cache, id);
2871 if (transaction == NULL) {
2872 panic("cache_abort_transaction(): invalid transaction ID\n");
2873 return B_BAD_VALUE;
2876 T(Abort(cache, transaction));
2877 notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED);
2879 // iterate through all blocks and restore their original contents
2881 cached_block* block = transaction->first_block;
2882 cached_block* next;
2883 for (; block != NULL; block = next) {
2884 next = block->transaction_next;
2886 if (block->original_data != NULL) {
2887 TRACE(("cache_abort_transaction(id = %" B_PRId32 "): restored contents of "
2888 "block %" B_PRIdOFF "\n", transaction->id, block->block_number));
2889 memcpy(block->current_data, block->original_data,
2890 cache->block_size);
2891 cache->Free(block->original_data);
2892 block->original_data = NULL;
2894 if (transaction->has_sub_transaction && block->parent_data != NULL)
2895 cache->FreeBlockParentData(block);
2897 block->transaction_next = NULL;
2898 block->transaction = NULL;
2899 block->discard = false;
2900 if (block->previous_transaction == NULL)
2901 block->is_dirty = false;
2904 cache->transaction_hash->Remove(transaction);
2905 delete_transaction(cache, transaction);
2906 return B_OK;
2910 /*! Acknowledges the current parent transaction, and starts a new transaction
2911 from its sub transaction.
2912 The new transaction also gets a new transaction ID.
2914 int32
2915 cache_detach_sub_transaction(void* _cache, int32 id,
2916 transaction_notification_hook hook, void* data)
2918 block_cache* cache = (block_cache*)_cache;
2919 TransactionLocker locker(cache);
2921 TRACE(("cache_detach_sub_transaction(id = %" B_PRId32 ")\n", id));
2923 cache_transaction* transaction = lookup_transaction(cache, id);
2924 if (transaction == NULL) {
2925 panic("cache_detach_sub_transaction(): invalid transaction ID\n");
2926 return B_BAD_VALUE;
2928 if (!transaction->has_sub_transaction)
2929 return B_BAD_VALUE;
2931 // iterate through all blocks and free the unchanged original contents
2933 status_t status = write_blocks_in_previous_transaction(cache, transaction);
2934 if (status != B_OK)
2935 return status;
2937 // create a new transaction for the sub transaction
2938 cache_transaction* newTransaction = new(std::nothrow) cache_transaction;
2939 if (newTransaction == NULL)
2940 return B_NO_MEMORY;
2942 newTransaction->id = atomic_add(&cache->next_transaction_id, 1);
2943 T(Detach(cache, transaction, newTransaction));
2945 notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
2947 if (add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN, hook,
2948 data) != B_OK) {
2949 delete newTransaction;
2950 return B_NO_MEMORY;
2953 cached_block* last = NULL;
2954 cached_block* next;
2955 for (cached_block* block = transaction->first_block; block != NULL;
2956 block = next) {
2957 next = block->transaction_next;
2958 ASSERT(block->previous_transaction == NULL);
2960 if (block->discard) {
2961 cache->DiscardBlock(block);
2962 transaction->main_num_blocks--;
2963 continue;
2966 if (block->parent_data != NULL) {
2967 // The block changed in the parent - free the original data, since
2968 // they will be replaced by what is in current.
2969 ASSERT(block->original_data != NULL);
2970 cache->Free(block->original_data);
2972 if (block->parent_data != block->current_data) {
2973 // The block had been changed in both transactions
2974 block->original_data = block->parent_data;
2975 } else {
2976 // The block has only been changed in the parent
2977 block->original_data = NULL;
2980 // move the block to the previous transaction list
2981 transaction->blocks.Add(block);
2982 block->previous_transaction = transaction;
2983 block->parent_data = NULL;
2986 if (block->original_data != NULL) {
2987 // This block had been changed in the current sub transaction,
2988 // we need to move this block over to the new transaction.
2989 ASSERT(block->parent_data == NULL);
2991 if (last == NULL)
2992 newTransaction->first_block = block;
2993 else
2994 last->transaction_next = block;
2996 block->transaction = newTransaction;
2997 last = block;
2998 } else
2999 block->transaction = NULL;
3001 block->transaction_next = NULL;
3004 newTransaction->num_blocks = transaction->sub_num_blocks;
3006 transaction->open = false;
3007 transaction->has_sub_transaction = false;
3008 transaction->num_blocks = transaction->main_num_blocks;
3009 transaction->sub_num_blocks = 0;
3011 cache->transaction_hash->Insert(newTransaction);
3012 cache->last_transaction = newTransaction;
3014 return newTransaction->id;
3018 status_t
3019 cache_abort_sub_transaction(void* _cache, int32 id)
3021 block_cache* cache = (block_cache*)_cache;
3022 TransactionLocker locker(cache);
3024 TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 ")\n", id));
3026 cache_transaction* transaction = lookup_transaction(cache, id);
3027 if (transaction == NULL) {
3028 panic("cache_abort_sub_transaction(): invalid transaction ID\n");
3029 return B_BAD_VALUE;
3031 if (!transaction->has_sub_transaction)
3032 return B_BAD_VALUE;
3034 T(Abort(cache, transaction));
3035 notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED);
3037 // revert all changes back to the version of the parent
3039 cached_block* block = transaction->first_block;
3040 cached_block* last = NULL;
3041 cached_block* next;
3042 for (; block != NULL; block = next) {
3043 next = block->transaction_next;
3045 if (block->parent_data == NULL) {
3046 // The parent transaction didn't change the block, but the sub
3047 // transaction did - we need to revert to the original data.
3048 // The block is no longer part of the transaction
3049 if (block->original_data != NULL) {
3050 // The block might not have original data if was empty
3051 memcpy(block->current_data, block->original_data,
3052 cache->block_size);
3055 if (last != NULL)
3056 last->transaction_next = next;
3057 else
3058 transaction->first_block = next;
3060 block->transaction_next = NULL;
3061 block->transaction = NULL;
3062 transaction->num_blocks--;
3064 if (block->previous_transaction == NULL) {
3065 cache->Free(block->original_data);
3066 block->original_data = NULL;
3067 block->is_dirty = false;
3069 if (block->ref_count == 0) {
3070 // Move the block into the unused list if possible
3071 block->unused = true;
3072 cache->unused_blocks.Add(block);
3073 cache->unused_block_count++;
3076 } else {
3077 if (block->parent_data != block->current_data) {
3078 // The block has been changed and must be restored - the block
3079 // is still dirty and part of the transaction
3080 TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 "): "
3081 "restored contents of block %" B_PRIdOFF "\n",
3082 transaction->id, block->block_number));
3083 memcpy(block->current_data, block->parent_data,
3084 cache->block_size);
3085 cache->Free(block->parent_data);
3086 // The block stays dirty
3088 block->parent_data = NULL;
3089 last = block;
3092 block->discard = false;
3095 // all subsequent changes will go into the main transaction
3096 transaction->has_sub_transaction = false;
3097 transaction->sub_num_blocks = 0;
3099 return B_OK;
3103 status_t
3104 cache_start_sub_transaction(void* _cache, int32 id)
3106 block_cache* cache = (block_cache*)_cache;
3107 TransactionLocker locker(cache);
3109 TRACE(("cache_start_sub_transaction(id = %" B_PRId32 ")\n", id));
3111 cache_transaction* transaction = lookup_transaction(cache, id);
3112 if (transaction == NULL) {
3113 panic("cache_start_sub_transaction(): invalid transaction ID %" B_PRId32 "\n",
3114 id);
3115 return B_BAD_VALUE;
3118 notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
3120 // move all changed blocks up to the parent
3122 cached_block* block = transaction->first_block;
3123 cached_block* next;
3124 for (; block != NULL; block = next) {
3125 next = block->transaction_next;
3127 if (block->parent_data != NULL) {
3128 // There already is an older sub transaction - we acknowledge
3129 // its changes and move its blocks up to the parent
3130 ASSERT(transaction->has_sub_transaction);
3131 cache->FreeBlockParentData(block);
3133 if (block->discard) {
3134 // This block has been discarded in the parent transaction.
3135 // Just throw away any changes made in this transaction, so that
3136 // it can still be reverted to its original contents if needed
3137 ASSERT(block->previous_transaction == NULL);
3138 if (block->original_data != NULL) {
3139 memcpy(block->current_data, block->original_data,
3140 cache->block_size);
3141 block->original_data = NULL;
3143 continue;
3146 // we "allocate" the parent data lazily, that means, we don't copy
3147 // the data (and allocate memory for it) until we need to
3148 block->parent_data = block->current_data;
3151 // all subsequent changes will go into the sub transaction
3152 transaction->has_sub_transaction = true;
3153 transaction->main_num_blocks = transaction->num_blocks;
3154 transaction->sub_num_blocks = 0;
3155 T(Action("start-sub", cache, transaction));
3157 return B_OK;
3161 /*! Adds a transaction listener that gets notified when the transaction
3162 is ended, aborted, written, or idle as specified by \a events.
3163 The listener gets automatically removed when the transaction ends.
3165 status_t
3166 cache_add_transaction_listener(void* _cache, int32 id, int32 events,
3167 transaction_notification_hook hook, void* data)
3169 block_cache* cache = (block_cache*)_cache;
3170 TransactionLocker locker(cache);
3172 cache_transaction* transaction = lookup_transaction(cache, id);
3173 if (transaction == NULL)
3174 return B_BAD_VALUE;
3176 return add_transaction_listener(cache, transaction, events, hook, data);
3180 status_t
3181 cache_remove_transaction_listener(void* _cache, int32 id,
3182 transaction_notification_hook hookFunction, void* data)
3184 block_cache* cache = (block_cache*)_cache;
3185 TransactionLocker locker(cache);
3187 cache_transaction* transaction = lookup_transaction(cache, id);
3188 if (transaction == NULL)
3189 return B_BAD_VALUE;
3191 ListenerList::Iterator iterator = transaction->listeners.GetIterator();
3192 while (iterator.HasNext()) {
3193 cache_listener* listener = iterator.Next();
3194 if (listener->data == data && listener->hook == hookFunction) {
3195 iterator.Remove();
3197 if (listener->events_pending != 0) {
3198 MutexLocker _(sNotificationsLock);
3199 if (listener->events_pending != 0)
3200 cache->pending_notifications.Remove(listener);
3202 delete listener;
3203 return B_OK;
3207 return B_ENTRY_NOT_FOUND;
3211 status_t
3212 cache_next_block_in_transaction(void* _cache, int32 id, bool mainOnly,
3213 long* _cookie, off_t* _blockNumber, void** _data, void** _unchangedData)
3215 cached_block* block = (cached_block*)*_cookie;
3216 block_cache* cache = (block_cache*)_cache;
3217 TransactionLocker locker(cache);
3219 cache_transaction* transaction = lookup_transaction(cache, id);
3220 if (transaction == NULL || !transaction->open)
3221 return B_BAD_VALUE;
3223 if (block == NULL)
3224 block = transaction->first_block;
3225 else
3226 block = block->transaction_next;
3228 if (transaction->has_sub_transaction) {
3229 if (mainOnly) {
3230 // find next block that the parent changed
3231 while (block != NULL && block->parent_data == NULL)
3232 block = block->transaction_next;
3233 } else {
3234 // find next non-discarded block
3235 while (block != NULL && block->discard)
3236 block = block->transaction_next;
3240 if (block == NULL)
3241 return B_ENTRY_NOT_FOUND;
3243 if (_blockNumber)
3244 *_blockNumber = block->block_number;
3245 if (_data)
3246 *_data = mainOnly ? block->parent_data : block->current_data;
3247 if (_unchangedData)
3248 *_unchangedData = block->original_data;
3250 *_cookie = (addr_t)block;
3251 return B_OK;
3255 int32
3256 cache_blocks_in_transaction(void* _cache, int32 id)
3258 block_cache* cache = (block_cache*)_cache;
3259 TransactionLocker locker(cache);
3261 cache_transaction* transaction = lookup_transaction(cache, id);
3262 if (transaction == NULL)
3263 return B_BAD_VALUE;
3265 return transaction->num_blocks;
3269 /*! Returns the number of blocks that are part of the main transaction. If this
3270 transaction does not have a sub transaction yet, this is the same value as
3271 cache_blocks_in_transaction() would return.
3273 int32
3274 cache_blocks_in_main_transaction(void* _cache, int32 id)
3276 block_cache* cache = (block_cache*)_cache;
3277 TransactionLocker locker(cache);
3279 cache_transaction* transaction = lookup_transaction(cache, id);
3280 if (transaction == NULL)
3281 return B_BAD_VALUE;
3283 if (transaction->has_sub_transaction)
3284 return transaction->main_num_blocks;
3286 return transaction->num_blocks;
3290 int32
3291 cache_blocks_in_sub_transaction(void* _cache, int32 id)
3293 block_cache* cache = (block_cache*)_cache;
3294 TransactionLocker locker(cache);
3296 cache_transaction* transaction = lookup_transaction(cache, id);
3297 if (transaction == NULL)
3298 return B_BAD_VALUE;
3300 return transaction->sub_num_blocks;
3304 /*! Check if block is in transaction
3306 bool
3307 cache_has_block_in_transaction(void* _cache, int32 id, off_t blockNumber)
3309 block_cache* cache = (block_cache*)_cache;
3310 TransactionLocker locker(cache);
3312 cached_block* block = cache->hash->Lookup(blockNumber);
3314 return (block != NULL && block->transaction != NULL
3315 && block->transaction->id == id);
3319 // #pragma mark - public block cache API
3322 void
3323 block_cache_delete(void* _cache, bool allowWrites)
3325 block_cache* cache = (block_cache*)_cache;
3327 if (allowWrites)
3328 block_cache_sync(cache);
3330 mutex_lock(&sCachesLock);
3331 sCaches.Remove(cache);
3332 mutex_unlock(&sCachesLock);
3334 mutex_lock(&cache->lock);
3336 // wait for all blocks to become unbusy
3337 wait_for_busy_reading_blocks(cache);
3338 wait_for_busy_writing_blocks(cache);
3340 // free all blocks
3342 cached_block* block = cache->hash->Clear(true);
3343 while (block != NULL) {
3344 cached_block* next = block->next;
3345 cache->FreeBlock(block);
3346 block = next;
3349 // free all transactions (they will all be aborted)
3351 cache_transaction* transaction = cache->transaction_hash->Clear(true);
3352 while (transaction != NULL) {
3353 cache_transaction* next = transaction->next;
3354 delete transaction;
3355 transaction = next;
3358 delete cache;
3362 void*
3363 block_cache_create(int fd, off_t numBlocks, size_t blockSize, bool readOnly)
3365 block_cache* cache = new(std::nothrow) block_cache(fd, numBlocks, blockSize,
3366 readOnly);
3367 if (cache == NULL)
3368 return NULL;
3370 if (cache->Init() != B_OK) {
3371 delete cache;
3372 return NULL;
3375 MutexLocker _(sCachesLock);
3376 sCaches.Add(cache);
3378 return cache;
3382 status_t
3383 block_cache_sync(void* _cache)
3385 block_cache* cache = (block_cache*)_cache;
3387 // We will sync all dirty blocks to disk that have a completed
3388 // transaction or no transaction only
3390 MutexLocker locker(&cache->lock);
3392 BlockWriter writer(cache);
3393 BlockTable::Iterator iterator(cache->hash);
3395 while (iterator.HasNext()) {
3396 cached_block* block = iterator.Next();
3397 if (block->CanBeWritten())
3398 writer.Add(block);
3401 status_t status = writer.Write();
3403 locker.Unlock();
3405 wait_for_notifications(cache);
3406 // make sure that all pending TRANSACTION_WRITTEN notifications
3407 // are handled after we return
3408 return status;
3412 status_t
3413 block_cache_sync_etc(void* _cache, off_t blockNumber, size_t numBlocks)
3415 block_cache* cache = (block_cache*)_cache;
3417 // We will sync all dirty blocks to disk that have a completed
3418 // transaction or no transaction only
3420 if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
3421 panic("block_cache_sync_etc: invalid block number %" B_PRIdOFF
3422 " (max %" B_PRIdOFF ")",
3423 blockNumber, cache->max_blocks - 1);
3424 return B_BAD_VALUE;
3427 MutexLocker locker(&cache->lock);
3428 BlockWriter writer(cache);
3430 for (; numBlocks > 0; numBlocks--, blockNumber++) {
3431 cached_block* block = cache->hash->Lookup(blockNumber);
3432 if (block == NULL)
3433 continue;
3435 if (block->CanBeWritten())
3436 writer.Add(block);
3439 status_t status = writer.Write();
3441 locker.Unlock();
3443 wait_for_notifications(cache);
3444 // make sure that all pending TRANSACTION_WRITTEN notifications
3445 // are handled after we return
3446 return status;
3450 /*! Discards a block from the current transaction or from the cache.
3451 You have to call this function when you no longer use a block, ie. when it
3452 might be reclaimed by the file cache in order to make sure they won't
3453 interfere.
3455 void
3456 block_cache_discard(void* _cache, off_t blockNumber, size_t numBlocks)
3458 // TODO: this could be a nice place to issue the ATA trim command
3459 block_cache* cache = (block_cache*)_cache;
3460 TransactionLocker locker(cache);
3462 BlockWriter writer(cache);
3464 for (size_t i = 0; i < numBlocks; i++, blockNumber++) {
3465 cached_block* block = cache->hash->Lookup(blockNumber);
3466 if (block != NULL && block->previous_transaction != NULL)
3467 writer.Add(block);
3470 writer.Write();
3471 // TODO: this can fail, too!
3473 blockNumber -= numBlocks;
3474 // reset blockNumber to its original value
3476 for (size_t i = 0; i < numBlocks; i++, blockNumber++) {
3477 cached_block* block = cache->hash->Lookup(blockNumber);
3478 if (block == NULL)
3479 continue;
3481 ASSERT(block->previous_transaction == NULL);
3483 if (block->unused) {
3484 cache->unused_blocks.Remove(block);
3485 cache->unused_block_count--;
3486 cache->RemoveBlock(block);
3487 } else {
3488 if (block->transaction != NULL && block->parent_data != NULL
3489 && block->parent_data != block->current_data) {
3490 panic("Discarded block %" B_PRIdOFF " has already been changed in this "
3491 "transaction!", blockNumber);
3494 // mark it as discarded (in the current transaction only, if any)
3495 block->discard = true;
3501 status_t
3502 block_cache_make_writable(void* _cache, off_t blockNumber, int32 transaction)
3504 block_cache* cache = (block_cache*)_cache;
3505 MutexLocker locker(&cache->lock);
3507 if (cache->read_only) {
3508 panic("tried to make block writable on a read-only cache!");
3509 return B_ERROR;
3512 // TODO: this can be done better!
3513 void* block = get_writable_cached_block(cache, blockNumber,
3514 blockNumber, 1, transaction, false);
3515 if (block != NULL) {
3516 put_cached_block((block_cache*)_cache, blockNumber);
3517 return B_OK;
3520 return B_ERROR;
3524 void*
3525 block_cache_get_writable_etc(void* _cache, off_t blockNumber, off_t base,
3526 off_t length, int32 transaction)
3528 block_cache* cache = (block_cache*)_cache;
3529 MutexLocker locker(&cache->lock);
3531 TRACE(("block_cache_get_writable_etc(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
3532 blockNumber, transaction));
3533 if (cache->read_only)
3534 panic("tried to get writable block on a read-only cache!");
3536 return get_writable_cached_block(cache, blockNumber, base, length,
3537 transaction, false);
3541 void*
3542 block_cache_get_writable(void* _cache, off_t blockNumber, int32 transaction)
3544 return block_cache_get_writable_etc(_cache, blockNumber,
3545 blockNumber, 1, transaction);
3549 void*
3550 block_cache_get_empty(void* _cache, off_t blockNumber, int32 transaction)
3552 block_cache* cache = (block_cache*)_cache;
3553 MutexLocker locker(&cache->lock);
3555 TRACE(("block_cache_get_empty(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
3556 blockNumber, transaction));
3557 if (cache->read_only)
3558 panic("tried to get empty writable block on a read-only cache!");
3560 return get_writable_cached_block((block_cache*)_cache, blockNumber,
3561 blockNumber, 1, transaction, true);
3565 const void*
3566 block_cache_get_etc(void* _cache, off_t blockNumber, off_t base, off_t length)
3568 block_cache* cache = (block_cache*)_cache;
3569 MutexLocker locker(&cache->lock);
3570 bool allocated;
3572 cached_block* block = get_cached_block(cache, blockNumber, &allocated);
3573 if (block == NULL)
3574 return NULL;
3576 #if BLOCK_CACHE_DEBUG_CHANGED
3577 if (block->compare == NULL)
3578 block->compare = cache->Allocate();
3579 if (block->compare != NULL)
3580 memcpy(block->compare, block->current_data, cache->block_size);
3581 #endif
3582 TB(Get(cache, block));
3584 return block->current_data;
3588 const void*
3589 block_cache_get(void* _cache, off_t blockNumber)
3591 return block_cache_get_etc(_cache, blockNumber, blockNumber, 1);
3595 /*! Changes the internal status of a writable block to \a dirty. This can be
3596 helpful in case you realize you don't need to change that block anymore
3597 for whatever reason.
3599 Note, you must only use this function on blocks that were acquired
3600 writable!
3602 status_t
3603 block_cache_set_dirty(void* _cache, off_t blockNumber, bool dirty,
3604 int32 transaction)
3606 block_cache* cache = (block_cache*)_cache;
3607 MutexLocker locker(&cache->lock);
3609 cached_block* block = cache->hash->Lookup(blockNumber);
3610 if (block == NULL)
3611 return B_BAD_VALUE;
3612 if (block->is_dirty == dirty) {
3613 // there is nothing to do for us
3614 return B_OK;
3617 // TODO: not yet implemented
3618 if (dirty)
3619 panic("block_cache_set_dirty(): not yet implemented that way!\n");
3621 return B_OK;
3625 void
3626 block_cache_put(void* _cache, off_t blockNumber)
3628 block_cache* cache = (block_cache*)_cache;
3629 MutexLocker locker(&cache->lock);
3631 put_cached_block(cache, blockNumber);