2 * Copyright 2004-2009, Axel Dörfler, axeld@pinc-software.de.
3 * Distributed under the terms of the MIT License.
7 #include "vnode_store.h"
13 #include <KernelExport.h>
16 #include <condition_variable.h>
17 #include <file_cache.h>
18 #include <generic_syscall.h>
19 #include <low_resource_manager.h>
21 #include <util/AutoLock.h>
22 #include <util/kernel_cpp.h>
25 #include <vm/vm_page.h>
26 #include <vm/VMCache.h>
28 #include "IORequest.h"
31 //#define TRACE_FILE_CACHE
32 #ifdef TRACE_FILE_CACHE
33 # define TRACE(x) dprintf x
38 // maximum number of iovecs per request
39 #define MAX_IO_VECS 32 // 128 kB
40 #define MAX_FILE_IO_VECS 32
42 #define BYPASS_IO_SIZE 65536
43 #define LAST_ACCESSES 3
45 struct file_cache_ref
{
48 off_t last_access
[LAST_ACCESSES
];
49 // TODO: it would probably be enough to only store the least
50 // significant 31 bits, and make this uint32 (one bit for
52 int32 last_access_index
;
53 uint16 disabled_count
;
55 inline void SetLastAccess(int32 index
, off_t access
, bool isWrite
)
57 // we remember writes as negative offsets
58 last_access
[index
] = isWrite
? -access
: access
;
61 inline off_t
LastAccess(int32 index
, bool isWrite
) const
63 return isWrite
? -last_access
[index
] : last_access
[index
];
66 inline uint32
LastAccessPageOffset(int32 index
, bool isWrite
)
68 return LastAccess(index
, isWrite
) >> PAGE_SHIFT
;
72 class PrecacheIO
: public AsyncIOCallback
{
74 PrecacheIO(file_cache_ref
* ref
, off_t offset
,
78 status_t
Prepare(vm_page_reservation
* reservation
);
81 virtual void IOFinished(status_t status
,
83 generic_size_t bytesTransferred
);
90 ConditionVariable
* fBusyConditions
;
91 generic_io_vec
* fVecs
;
96 thread_id fAllocatingThread
;
100 typedef status_t (*cache_func
)(file_cache_ref
* ref
, void* cookie
, off_t offset
,
101 int32 pageOffset
, addr_t buffer
, size_t bufferSize
, bool useBuffer
,
102 vm_page_reservation
* reservation
, size_t reservePages
);
104 static void add_to_iovec(generic_io_vec
* vecs
, uint32
&index
, uint32 max
,
105 generic_addr_t address
, generic_size_t size
);
108 static struct cache_module_info
* sCacheModule
;
111 static const uint32 kZeroVecCount
= 32;
112 static const size_t kZeroVecSize
= kZeroVecCount
* B_PAGE_SIZE
;
113 static phys_addr_t sZeroPage
; // physical address
114 static generic_io_vec sZeroVecs
[kZeroVecCount
];
120 PrecacheIO::PrecacheIO(file_cache_ref
* ref
, off_t offset
, generic_size_t size
)
130 fPageCount
= (size
+ B_PAGE_SIZE
- 1) / B_PAGE_SIZE
;
131 fCache
->AcquireRefLocked();
135 PrecacheIO::~PrecacheIO()
139 fCache
->ReleaseRefLocked();
144 PrecacheIO::Prepare(vm_page_reservation
* reservation
)
149 fPages
= new(std::nothrow
) vm_page
*[fPageCount
];
153 fVecs
= new(std::nothrow
) generic_io_vec
[fPageCount
];
157 // allocate pages for the cache and mark them busy
159 for (generic_size_t pos
= 0; pos
< fSize
; pos
+= B_PAGE_SIZE
) {
160 vm_page
* page
= vm_page_allocate_page(reservation
,
161 PAGE_STATE_CACHED
| VM_PAGE_ALLOC_BUSY
);
163 fCache
->InsertPage(page
, fOffset
+ pos
);
165 add_to_iovec(fVecs
, fVecCount
, fPageCount
,
166 page
->physical_page_number
* B_PAGE_SIZE
, B_PAGE_SIZE
);
170 #if DEBUG_PAGE_ACCESS
171 fAllocatingThread
= find_thread(NULL
);
179 PrecacheIO::ReadAsync()
181 // This object is going to be deleted after the I/O request has been
183 vfs_asynchronous_read_pages(fRef
->vnode
, NULL
, fOffset
, fVecs
, fVecCount
,
184 fSize
, B_PHYSICAL_IO_REQUEST
, this);
189 PrecacheIO::IOFinished(status_t status
, bool partialTransfer
,
190 generic_size_t bytesTransferred
)
192 AutoLocker
<VMCache
> locker(fCache
);
194 // Make successfully loaded pages accessible again (partially
195 // transferred pages are considered failed)
196 phys_size_t pagesTransferred
197 = (bytesTransferred
+ B_PAGE_SIZE
- 1) / B_PAGE_SIZE
;
199 if (fOffset
+ (off_t
)bytesTransferred
> fCache
->virtual_end
)
200 bytesTransferred
= fCache
->virtual_end
- fOffset
;
202 for (uint32 i
= 0; i
< pagesTransferred
; i
++) {
203 if (i
== pagesTransferred
- 1
204 && (bytesTransferred
% B_PAGE_SIZE
) != 0) {
205 // clear partial page
206 size_t bytesTouched
= bytesTransferred
% B_PAGE_SIZE
;
208 ((phys_addr_t
)fPages
[i
]->physical_page_number
<< PAGE_SHIFT
)
210 0, B_PAGE_SIZE
- bytesTouched
);
213 DEBUG_PAGE_ACCESS_TRANSFER(fPages
[i
], fAllocatingThread
);
215 fCache
->MarkPageUnbusy(fPages
[i
]);
217 DEBUG_PAGE_ACCESS_END(fPages
[i
]);
220 // Free pages after failed I/O
221 for (uint32 i
= pagesTransferred
; i
< fPageCount
; i
++) {
222 DEBUG_PAGE_ACCESS_TRANSFER(fPages
[i
], fAllocatingThread
);
223 fCache
->NotifyPageEvents(fPages
[i
], PAGE_EVENT_NOT_BUSY
);
224 fCache
->RemovePage(fPages
[i
]);
225 vm_page_set_state(fPages
[i
], PAGE_STATE_FREE
);
236 add_to_iovec(generic_io_vec
* vecs
, uint32
&index
, uint32 max
,
237 generic_addr_t address
, generic_size_t size
)
239 if (index
> 0 && vecs
[index
- 1].base
+ vecs
[index
- 1].length
== address
) {
240 // the iovec can be combined with the previous one
241 vecs
[index
- 1].length
+= size
;
246 panic("no more space for iovecs!");
248 // we need to start a new iovec
249 vecs
[index
].base
= address
;
250 vecs
[index
].length
= size
;
256 access_is_sequential(file_cache_ref
* ref
)
258 return ref
->last_access
[ref
->last_access_index
] != 0;
263 push_access(file_cache_ref
* ref
, off_t offset
, generic_size_t bytes
,
266 TRACE(("%p: push %Ld, %ld, %s\n", ref
, offset
, bytes
,
267 isWrite
? "write" : "read"));
269 int32 index
= ref
->last_access_index
;
270 int32 previous
= index
- 1;
272 previous
= LAST_ACCESSES
- 1;
274 if (offset
!= ref
->LastAccess(previous
, isWrite
))
275 ref
->last_access
[previous
] = 0;
277 ref
->SetLastAccess(index
, offset
+ bytes
, isWrite
);
279 if (++index
>= LAST_ACCESSES
)
281 ref
->last_access_index
= index
;
286 reserve_pages(file_cache_ref
* ref
, vm_page_reservation
* reservation
,
287 size_t reservePages
, bool isWrite
)
289 if (low_resource_state(B_KERNEL_RESOURCE_PAGES
) != B_NO_LOW_RESOURCE
) {
290 VMCache
* cache
= ref
->cache
;
293 if (cache
->consumers
.IsEmpty() && cache
->areas
== NULL
294 && access_is_sequential(ref
)) {
295 // we are not mapped, and we're accessed sequentially
298 // Just write some pages back, and actually wait until they
299 // have been written back in order to relieve the page pressure
301 int32 index
= ref
->last_access_index
;
302 int32 previous
= index
- 1;
304 previous
= LAST_ACCESSES
- 1;
306 vm_page_write_modified_page_range(cache
,
307 ref
->LastAccessPageOffset(previous
, true),
308 ref
->LastAccessPageOffset(index
, true));
310 // free some pages from our cache
311 // TODO: start with oldest
312 uint32 left
= reservePages
;
314 for (VMCachePagesTree::Iterator it
= cache
->pages
.GetIterator();
315 (page
= it
.Next()) != NULL
&& left
> 0;) {
316 if (page
->State() == PAGE_STATE_CACHED
&& !page
->busy
) {
317 DEBUG_PAGE_ACCESS_START(page
);
318 ASSERT(!page
->IsMapped());
319 ASSERT(!page
->modified
);
320 cache
->RemovePage(page
);
321 vm_page_set_state(page
, PAGE_STATE_FREE
);
330 vm_page_reserve_pages(reservation
, reservePages
, VM_PRIORITY_USER
);
334 static inline status_t
335 read_pages_and_clear_partial(file_cache_ref
* ref
, void* cookie
, off_t offset
,
336 const generic_io_vec
* vecs
, size_t count
, uint32 flags
,
337 generic_size_t
* _numBytes
)
339 generic_size_t bytesUntouched
= *_numBytes
;
341 status_t status
= vfs_read_pages(ref
->vnode
, cookie
, offset
, vecs
, count
,
344 generic_size_t bytesEnd
= *_numBytes
;
346 if (offset
+ (off_t
)bytesEnd
> ref
->cache
->virtual_end
)
347 bytesEnd
= ref
->cache
->virtual_end
- offset
;
349 if (status
== B_OK
&& bytesEnd
< bytesUntouched
) {
350 // Clear out any leftovers that were not touched by the above read.
351 // We're doing this here so that not every file system/device has to
353 bytesUntouched
-= bytesEnd
;
355 for (int32 i
= count
; i
-- > 0 && bytesUntouched
!= 0; ) {
356 generic_size_t length
= min_c(bytesUntouched
, vecs
[i
].length
);
357 vm_memset_physical(vecs
[i
].base
+ vecs
[i
].length
- length
, 0,
360 bytesUntouched
-= length
;
368 /*! Reads the requested amount of data into the cache, and allocates
369 pages needed to fulfill that request. This function is called by cache_io().
370 It can only handle a certain amount of bytes, and the caller must make
371 sure that it matches that criterion.
372 The cache_ref lock must be held when calling this function; during
373 operation it will unlock the cache, though.
376 read_into_cache(file_cache_ref
* ref
, void* cookie
, off_t offset
,
377 int32 pageOffset
, addr_t buffer
, size_t bufferSize
, bool useBuffer
,
378 vm_page_reservation
* reservation
, size_t reservePages
)
380 TRACE(("read_into_cache(offset = %Ld, pageOffset = %ld, buffer = %#lx, "
381 "bufferSize = %lu\n", offset
, pageOffset
, buffer
, bufferSize
));
383 VMCache
* cache
= ref
->cache
;
385 // TODO: We're using way too much stack! Rather allocate a sufficiently
386 // large chunk on the heap.
387 generic_io_vec vecs
[MAX_IO_VECS
];
390 generic_size_t numBytes
= PAGE_ALIGN(pageOffset
+ bufferSize
);
391 vm_page
* pages
[MAX_IO_VECS
];
394 // allocate pages for the cache and mark them busy
395 for (generic_size_t pos
= 0; pos
< numBytes
; pos
+= B_PAGE_SIZE
) {
396 vm_page
* page
= pages
[pageIndex
++] = vm_page_allocate_page(
397 reservation
, PAGE_STATE_CACHED
| VM_PAGE_ALLOC_BUSY
);
399 cache
->InsertPage(page
, offset
+ pos
);
401 add_to_iovec(vecs
, vecCount
, MAX_IO_VECS
,
402 page
->physical_page_number
* B_PAGE_SIZE
, B_PAGE_SIZE
);
403 // TODO: check if the array is large enough (currently panics)!
406 push_access(ref
, offset
, bufferSize
, false);
408 vm_page_unreserve_pages(reservation
);
410 // read file into reserved pages
411 status_t status
= read_pages_and_clear_partial(ref
, cookie
, offset
, vecs
,
412 vecCount
, B_PHYSICAL_IO_REQUEST
, &numBytes
);
413 if (status
!= B_OK
) {
414 // reading failed, free allocated pages
416 dprintf("file_cache: read pages failed: %s\n", strerror(status
));
420 for (int32 i
= 0; i
< pageIndex
; i
++) {
421 cache
->NotifyPageEvents(pages
[i
], PAGE_EVENT_NOT_BUSY
);
422 cache
->RemovePage(pages
[i
]);
423 vm_page_set_state(pages
[i
], PAGE_STATE_FREE
);
429 // copy the pages if needed and unmap them again
431 for (int32 i
= 0; i
< pageIndex
; i
++) {
432 if (useBuffer
&& bufferSize
!= 0) {
433 size_t bytes
= min_c(bufferSize
, (size_t)B_PAGE_SIZE
- pageOffset
);
435 vm_memcpy_from_physical((void*)buffer
,
436 pages
[i
]->physical_page_number
* B_PAGE_SIZE
+ pageOffset
,
437 bytes
, IS_USER_ADDRESS(buffer
));
445 reserve_pages(ref
, reservation
, reservePages
, false);
448 // make the pages accessible in the cache
449 for (int32 i
= pageIndex
; i
-- > 0;) {
450 DEBUG_PAGE_ACCESS_END(pages
[i
]);
452 cache
->MarkPageUnbusy(pages
[i
]);
460 read_from_file(file_cache_ref
* ref
, void* cookie
, off_t offset
,
461 int32 pageOffset
, addr_t buffer
, size_t bufferSize
, bool useBuffer
,
462 vm_page_reservation
* reservation
, size_t reservePages
)
464 TRACE(("read_from_file(offset = %Ld, pageOffset = %ld, buffer = %#lx, "
465 "bufferSize = %lu\n", offset
, pageOffset
, buffer
, bufferSize
));
472 vec
.length
= bufferSize
;
474 push_access(ref
, offset
, bufferSize
, false);
475 ref
->cache
->Unlock();
476 vm_page_unreserve_pages(reservation
);
478 generic_size_t toRead
= bufferSize
;
479 status_t status
= vfs_read_pages(ref
->vnode
, cookie
, offset
+ pageOffset
,
480 &vec
, 1, 0, &toRead
);
483 reserve_pages(ref
, reservation
, reservePages
, false);
491 /*! Like read_into_cache() but writes data into the cache.
492 To preserve data consistency, it might also read pages into the cache,
493 though, if only a partial page gets written.
494 The same restrictions apply.
497 write_to_cache(file_cache_ref
* ref
, void* cookie
, off_t offset
,
498 int32 pageOffset
, addr_t buffer
, size_t bufferSize
, bool useBuffer
,
499 vm_page_reservation
* reservation
, size_t reservePages
)
501 // TODO: We're using way too much stack! Rather allocate a sufficiently
502 // large chunk on the heap.
503 generic_io_vec vecs
[MAX_IO_VECS
];
505 generic_size_t numBytes
= PAGE_ALIGN(pageOffset
+ bufferSize
);
506 vm_page
* pages
[MAX_IO_VECS
];
508 status_t status
= B_OK
;
510 // ToDo: this should be settable somewhere
511 bool writeThrough
= false;
513 // allocate pages for the cache and mark them busy
514 for (generic_size_t pos
= 0; pos
< numBytes
; pos
+= B_PAGE_SIZE
) {
515 // TODO: if space is becoming tight, and this cache is already grown
516 // big - shouldn't we better steal the pages directly in that case?
517 // (a working set like approach for the file cache)
518 // TODO: the pages we allocate here should have been reserved upfront
520 vm_page
* page
= pages
[pageIndex
++] = vm_page_allocate_page(
522 (writeThrough
? PAGE_STATE_CACHED
: PAGE_STATE_MODIFIED
)
523 | VM_PAGE_ALLOC_BUSY
);
525 page
->modified
= !writeThrough
;
527 ref
->cache
->InsertPage(page
, offset
+ pos
);
529 add_to_iovec(vecs
, vecCount
, MAX_IO_VECS
,
530 page
->physical_page_number
* B_PAGE_SIZE
, B_PAGE_SIZE
);
533 push_access(ref
, offset
, bufferSize
, true);
534 ref
->cache
->Unlock();
535 vm_page_unreserve_pages(reservation
);
537 // copy contents (and read in partially written pages first)
539 if (pageOffset
!= 0) {
540 // This is only a partial write, so we have to read the rest of the page
541 // from the file to have consistent data in the cache
542 generic_io_vec readVec
= { vecs
[0].base
, B_PAGE_SIZE
};
543 generic_size_t bytesRead
= B_PAGE_SIZE
;
545 status
= vfs_read_pages(ref
->vnode
, cookie
, offset
, &readVec
, 1,
546 B_PHYSICAL_IO_REQUEST
, &bytesRead
);
547 // ToDo: handle errors for real!
549 panic("1. vfs_read_pages() failed: %s!\n", strerror(status
));
552 size_t lastPageOffset
= (pageOffset
+ bufferSize
) % B_PAGE_SIZE
;
553 if (lastPageOffset
!= 0) {
554 // get the last page in the I/O vectors
555 generic_addr_t last
= vecs
[vecCount
- 1].base
556 + vecs
[vecCount
- 1].length
- B_PAGE_SIZE
;
558 if ((off_t
)(offset
+ pageOffset
+ bufferSize
) == ref
->cache
->virtual_end
) {
559 // the space in the page after this write action needs to be cleaned
560 vm_memset_physical(last
+ lastPageOffset
, 0,
561 B_PAGE_SIZE
- lastPageOffset
);
563 // the end of this write does not happen on a page boundary, so we
564 // need to fetch the last page before we can update it
565 generic_io_vec readVec
= { last
, B_PAGE_SIZE
};
566 generic_size_t bytesRead
= B_PAGE_SIZE
;
568 status
= vfs_read_pages(ref
->vnode
, cookie
,
569 PAGE_ALIGN(offset
+ pageOffset
+ bufferSize
) - B_PAGE_SIZE
,
570 &readVec
, 1, B_PHYSICAL_IO_REQUEST
, &bytesRead
);
571 // ToDo: handle errors for real!
573 panic("vfs_read_pages() failed: %s!\n", strerror(status
));
575 if (bytesRead
< B_PAGE_SIZE
) {
576 // the space beyond the file size needs to be cleaned
577 vm_memset_physical(last
+ bytesRead
, 0,
578 B_PAGE_SIZE
- bytesRead
);
583 for (uint32 i
= 0; i
< vecCount
; i
++) {
584 generic_addr_t base
= vecs
[i
].base
;
585 generic_size_t bytes
= min_c((generic_size_t
)bufferSize
,
586 generic_size_t(vecs
[i
].length
- pageOffset
));
589 // copy data from user buffer
590 vm_memcpy_to_physical(base
+ pageOffset
, (void*)buffer
, bytes
,
591 IS_USER_ADDRESS(buffer
));
593 // clear buffer instead
594 vm_memset_physical(base
+ pageOffset
, 0, bytes
);
606 // write cached pages back to the file if we were asked to do that
607 status_t status
= vfs_write_pages(ref
->vnode
, cookie
, offset
, vecs
,
608 vecCount
, B_PHYSICAL_IO_REQUEST
, &numBytes
);
610 // ToDo: remove allocated pages, ...?
611 panic("file_cache: remove allocated pages! write pages failed: %s\n",
617 reserve_pages(ref
, reservation
, reservePages
, true);
621 // make the pages accessible in the cache
622 for (int32 i
= pageIndex
; i
-- > 0;) {
623 ref
->cache
->MarkPageUnbusy(pages
[i
]);
625 DEBUG_PAGE_ACCESS_END(pages
[i
]);
633 write_to_file(file_cache_ref
* ref
, void* cookie
, off_t offset
, int32 pageOffset
,
634 addr_t buffer
, size_t bufferSize
, bool useBuffer
,
635 vm_page_reservation
* reservation
, size_t reservePages
)
637 push_access(ref
, offset
, bufferSize
, true);
638 ref
->cache
->Unlock();
639 vm_page_unreserve_pages(reservation
);
641 status_t status
= B_OK
;
644 while (bufferSize
> 0) {
645 generic_size_t written
= min_c(bufferSize
, kZeroVecSize
);
646 status
= vfs_write_pages(ref
->vnode
, cookie
, offset
+ pageOffset
,
647 sZeroVecs
, kZeroVecCount
, B_PHYSICAL_IO_REQUEST
, &written
);
653 bufferSize
-= written
;
654 pageOffset
+= written
;
659 vec
.length
= bufferSize
;
660 generic_size_t toWrite
= bufferSize
;
661 status
= vfs_write_pages(ref
->vnode
, cookie
, offset
+ pageOffset
,
662 &vec
, 1, 0, &toWrite
);
666 reserve_pages(ref
, reservation
, reservePages
, true);
674 static inline status_t
675 satisfy_cache_io(file_cache_ref
* ref
, void* cookie
, cache_func function
,
676 off_t offset
, addr_t buffer
, bool useBuffer
, int32
&pageOffset
,
677 size_t bytesLeft
, size_t &reservePages
, off_t
&lastOffset
,
678 addr_t
&lastBuffer
, int32
&lastPageOffset
, size_t &lastLeft
,
679 size_t &lastReservedPages
, vm_page_reservation
* reservation
)
681 if (lastBuffer
== buffer
)
684 size_t requestSize
= buffer
- lastBuffer
;
685 reservePages
= min_c(MAX_IO_VECS
, (lastLeft
- requestSize
686 + lastPageOffset
+ B_PAGE_SIZE
- 1) >> PAGE_SHIFT
);
688 status_t status
= function(ref
, cookie
, lastOffset
, lastPageOffset
,
689 lastBuffer
, requestSize
, useBuffer
, reservation
, reservePages
);
690 if (status
== B_OK
) {
691 lastReservedPages
= reservePages
;
693 lastLeft
= bytesLeft
;
703 cache_io(void* _cacheRef
, void* cookie
, off_t offset
, addr_t buffer
,
704 size_t* _size
, bool doWrite
)
706 if (_cacheRef
== NULL
)
707 panic("cache_io() called with NULL ref!\n");
709 file_cache_ref
* ref
= (file_cache_ref
*)_cacheRef
;
710 VMCache
* cache
= ref
->cache
;
711 off_t fileSize
= cache
->virtual_end
;
712 bool useBuffer
= buffer
!= 0;
714 TRACE(("cache_io(ref = %p, offset = %Ld, buffer = %p, size = %lu, %s)\n",
715 ref
, offset
, (void*)buffer
, *_size
, doWrite
? "write" : "read"));
717 // out of bounds access?
718 if (offset
>= fileSize
|| offset
< 0) {
723 int32 pageOffset
= offset
& (B_PAGE_SIZE
- 1);
724 size_t size
= *_size
;
725 offset
-= pageOffset
;
727 if ((off_t
)(offset
+ pageOffset
+ size
) > fileSize
) {
728 // adapt size to be within the file's offsets
729 size
= fileSize
- pageOffset
- offset
;
735 // "offset" and "lastOffset" are always aligned to B_PAGE_SIZE,
736 // the "last*" variables always point to the end of the last
737 // satisfied request part
739 const uint32 kMaxChunkSize
= MAX_IO_VECS
* B_PAGE_SIZE
;
740 size_t bytesLeft
= size
, lastLeft
= size
;
741 int32 lastPageOffset
= pageOffset
;
742 addr_t lastBuffer
= buffer
;
743 off_t lastOffset
= offset
;
744 size_t lastReservedPages
= min_c(MAX_IO_VECS
, (pageOffset
+ bytesLeft
745 + B_PAGE_SIZE
- 1) >> PAGE_SHIFT
);
746 size_t reservePages
= 0;
747 size_t pagesProcessed
= 0;
748 cache_func function
= NULL
;
750 vm_page_reservation reservation
;
751 reserve_pages(ref
, &reservation
, lastReservedPages
, doWrite
);
753 AutoLocker
<VMCache
> locker(cache
);
755 while (bytesLeft
> 0) {
756 // Periodically reevaluate the low memory situation and select the
757 // read/write hook accordingly
758 if (pagesProcessed
% 32 == 0) {
759 if (size
>= BYPASS_IO_SIZE
760 && low_resource_state(B_KERNEL_RESOURCE_PAGES
)
761 != B_NO_LOW_RESOURCE
) {
762 // In low memory situations we bypass the cache beyond a
764 function
= doWrite
? write_to_file
: read_from_file
;
766 function
= doWrite
? write_to_cache
: read_into_cache
;
769 // check if this page is already in memory
770 vm_page
* page
= cache
->LookupPage(offset
);
772 // The page may be busy - since we need to unlock the cache sometime
773 // in the near future, we need to satisfy the request of the pages
774 // we didn't get yet (to make sure no one else interferes in the
776 status_t status
= satisfy_cache_io(ref
, cookie
, function
, offset
,
777 buffer
, useBuffer
, pageOffset
, bytesLeft
, reservePages
,
778 lastOffset
, lastBuffer
, lastPageOffset
, lastLeft
,
779 lastReservedPages
, &reservation
);
783 // Since satisfy_cache_io() unlocks the cache, we need to look up
785 page
= cache
->LookupPage(offset
);
786 if (page
!= NULL
&& page
->busy
) {
787 cache
->WaitForPageEvents(page
, PAGE_EVENT_NOT_BUSY
, true);
792 size_t bytesInPage
= min_c(size_t(B_PAGE_SIZE
- pageOffset
), bytesLeft
);
794 TRACE(("lookup page from offset %Ld: %p, size = %lu, pageOffset "
795 "= %lu\n", offset
, page
, bytesLeft
, pageOffset
));
798 if (doWrite
|| useBuffer
) {
799 // Since the following user_mem{cpy,set}() might cause a page
800 // fault, which in turn might cause pages to be reserved, we
801 // need to unlock the cache temporarily to avoid a potential
802 // deadlock. To make sure that our page doesn't go away, we mark
803 // it busy for the time.
807 // copy the contents of the page already in memory
808 phys_addr_t pageAddress
809 = (phys_addr_t
)page
->physical_page_number
* B_PAGE_SIZE
811 bool userBuffer
= IS_USER_ADDRESS(buffer
);
814 vm_memcpy_to_physical(pageAddress
, (void*)buffer
,
815 bytesInPage
, userBuffer
);
817 vm_memset_physical(pageAddress
, 0, bytesInPage
);
819 } else if (useBuffer
) {
820 vm_memcpy_from_physical((void*)buffer
, pageAddress
,
821 bytesInPage
, userBuffer
);
827 DEBUG_PAGE_ACCESS_START(page
);
829 page
->modified
= true;
831 if (page
->State() != PAGE_STATE_MODIFIED
)
832 vm_page_set_state(page
, PAGE_STATE_MODIFIED
);
834 DEBUG_PAGE_ACCESS_END(page
);
837 cache
->MarkPageUnbusy(page
);
840 // If it is cached only, requeue the page, so the respective queue
841 // roughly remains LRU first sorted.
842 if (page
->State() == PAGE_STATE_CACHED
843 || page
->State() == PAGE_STATE_MODIFIED
) {
844 DEBUG_PAGE_ACCESS_START(page
);
845 vm_page_requeue(page
, true);
846 DEBUG_PAGE_ACCESS_END(page
);
849 if (bytesLeft
<= bytesInPage
) {
850 // we've read the last page, so we're done!
852 vm_page_unreserve_pages(&reservation
);
856 // prepare a potential gap request
857 lastBuffer
= buffer
+ bytesInPage
;
858 lastLeft
= bytesLeft
- bytesInPage
;
859 lastOffset
= offset
+ B_PAGE_SIZE
;
863 if (bytesLeft
<= bytesInPage
)
866 buffer
+= bytesInPage
;
867 bytesLeft
-= bytesInPage
;
869 offset
+= B_PAGE_SIZE
;
872 if (buffer
- lastBuffer
+ lastPageOffset
>= kMaxChunkSize
) {
873 status_t status
= satisfy_cache_io(ref
, cookie
, function
, offset
,
874 buffer
, useBuffer
, pageOffset
, bytesLeft
, reservePages
,
875 lastOffset
, lastBuffer
, lastPageOffset
, lastLeft
,
876 lastReservedPages
, &reservation
);
882 // fill the last remaining bytes of the request (either write or read)
884 return function(ref
, cookie
, lastOffset
, lastPageOffset
, lastBuffer
,
885 lastLeft
, useBuffer
, &reservation
, 0);
890 file_cache_control(const char* subsystem
, uint32 function
, void* buffer
,
895 // ToDo: clear the cache
896 dprintf("cache_control: clear cache!\n");
899 case CACHE_SET_MODULE
:
901 cache_module_info
* module
= sCacheModule
;
903 // unset previous module
905 if (sCacheModule
!= NULL
) {
907 snooze(100000); // 0.1 secs
908 put_module(module
->info
.name
);
911 // get new module, if any
916 char name
[B_FILE_NAME_LENGTH
];
917 if (!IS_USER_ADDRESS(buffer
)
918 || user_strlcpy(name
, (char*)buffer
,
919 B_FILE_NAME_LENGTH
) < B_OK
)
920 return B_BAD_ADDRESS
;
922 if (strncmp(name
, CACHE_MODULES_NAME
, strlen(CACHE_MODULES_NAME
)))
925 dprintf("cache_control: set module %s!\n", name
);
927 status_t status
= get_module(name
, (module_info
**)&module
);
929 sCacheModule
= module
;
935 return B_BAD_HANDLER
;
939 // #pragma mark - private kernel API
943 cache_prefetch_vnode(struct vnode
* vnode
, off_t offset
, size_t size
)
949 if (vfs_get_vnode_cache(vnode
, &cache
, false) != B_OK
)
952 file_cache_ref
* ref
= ((VMVnodeCache
*)cache
)->FileCacheRef();
953 off_t fileSize
= cache
->virtual_end
;
955 if ((off_t
)(offset
+ size
) > fileSize
)
956 size
= fileSize
- offset
;
958 // "offset" and "size" are always aligned to B_PAGE_SIZE,
959 offset
= ROUNDDOWN(offset
, B_PAGE_SIZE
);
960 size
= ROUNDUP(size
, B_PAGE_SIZE
);
962 size_t reservePages
= size
/ B_PAGE_SIZE
;
964 // Don't do anything if we don't have the resources left, or the cache
965 // already contains more than 2/3 of its pages
966 if (offset
>= fileSize
|| vm_page_num_unused_pages() < 2 * reservePages
967 || 3 * cache
->page_count
> 2 * fileSize
/ B_PAGE_SIZE
) {
972 size_t bytesToRead
= 0;
973 off_t lastOffset
= offset
;
975 vm_page_reservation reservation
;
976 vm_page_reserve_pages(&reservation
, reservePages
, VM_PRIORITY_USER
);
981 // check if this page is already in memory
983 vm_page
* page
= cache
->LookupPage(offset
);
985 offset
+= B_PAGE_SIZE
;
989 bytesToRead
+= B_PAGE_SIZE
;
993 if (bytesToRead
!= 0) {
994 // read the part before the current page (or the end of the request)
995 PrecacheIO
* io
= new(std::nothrow
) PrecacheIO(ref
, lastOffset
,
997 if (io
== NULL
|| io
->Prepare(&reservation
) != B_OK
) {
1002 // we must not have the cache locked during I/O
1011 // we have reached the end of the request
1015 lastOffset
= offset
;
1018 cache
->ReleaseRefAndUnlock();
1019 vm_page_unreserve_pages(&reservation
);
1024 cache_prefetch(dev_t mountID
, ino_t vnodeID
, off_t offset
, size_t size
)
1026 // ToDo: schedule prefetch
1028 TRACE(("cache_prefetch(vnode %ld:%Ld)\n", mountID
, vnodeID
));
1030 // get the vnode for the object, this also grabs a ref to it
1031 struct vnode
* vnode
;
1032 if (vfs_get_vnode(mountID
, vnodeID
, true, &vnode
) != B_OK
)
1035 cache_prefetch_vnode(vnode
, offset
, size
);
1036 vfs_put_vnode(vnode
);
1041 cache_node_opened(struct vnode
* vnode
, int32 fdType
, VMCache
* cache
,
1042 dev_t mountID
, ino_t parentID
, ino_t vnodeID
, const char* name
)
1044 if (sCacheModule
== NULL
|| sCacheModule
->node_opened
== NULL
)
1048 if (cache
!= NULL
) {
1049 file_cache_ref
* ref
= ((VMVnodeCache
*)cache
)->FileCacheRef();
1051 size
= cache
->virtual_end
;
1054 sCacheModule
->node_opened(vnode
, fdType
, mountID
, parentID
, vnodeID
, name
,
1060 cache_node_closed(struct vnode
* vnode
, int32 fdType
, VMCache
* cache
,
1061 dev_t mountID
, ino_t vnodeID
)
1063 if (sCacheModule
== NULL
|| sCacheModule
->node_closed
== NULL
)
1066 int32 accessType
= 0;
1067 if (cache
!= NULL
) {
1068 // ToDo: set accessType
1071 sCacheModule
->node_closed(vnode
, fdType
, mountID
, vnodeID
, accessType
);
1076 cache_node_launched(size_t argCount
, char* const* args
)
1078 if (sCacheModule
== NULL
|| sCacheModule
->node_launched
== NULL
)
1081 sCacheModule
->node_launched(argCount
, args
);
1086 file_cache_init_post_boot_device(void)
1088 // ToDo: get cache module out of driver settings
1090 if (get_module("file_cache/launch_speedup/v1",
1091 (module_info
**)&sCacheModule
) == B_OK
) {
1092 dprintf("** opened launch speedup: %" B_PRId64
"\n", system_time());
1099 file_cache_init(void)
1101 // allocate a clean page we can use for writing zeroes
1102 vm_page_reservation reservation
;
1103 vm_page_reserve_pages(&reservation
, 1, VM_PRIORITY_SYSTEM
);
1104 vm_page
* page
= vm_page_allocate_page(&reservation
,
1105 PAGE_STATE_WIRED
| VM_PAGE_ALLOC_CLEAR
);
1106 vm_page_unreserve_pages(&reservation
);
1108 sZeroPage
= (phys_addr_t
)page
->physical_page_number
* B_PAGE_SIZE
;
1110 for (uint32 i
= 0; i
< kZeroVecCount
; i
++) {
1111 sZeroVecs
[i
].base
= sZeroPage
;
1112 sZeroVecs
[i
].length
= B_PAGE_SIZE
;
1115 register_generic_syscall(CACHE_SYSCALLS
, file_cache_control
, 1, 0);
1120 // #pragma mark - public FS API
1124 file_cache_create(dev_t mountID
, ino_t vnodeID
, off_t size
)
1126 TRACE(("file_cache_create(mountID = %ld, vnodeID = %Ld, size = %Ld)\n",
1127 mountID
, vnodeID
, size
));
1129 file_cache_ref
* ref
= new file_cache_ref
;
1133 memset(ref
->last_access
, 0, sizeof(ref
->last_access
));
1134 ref
->last_access_index
= 0;
1135 ref
->disabled_count
= 0;
1137 // TODO: delay VMCache creation until data is
1138 // requested/written for the first time? Listing lots of
1139 // files in Tracker (and elsewhere) could be slowed down.
1140 // Since the file_cache_ref itself doesn't have a lock,
1141 // we would need to "rent" one during construction, possibly
1142 // the vnode lock, maybe a dedicated one.
1143 // As there shouldn't be too much contention, we could also
1144 // use atomic_test_and_set(), and free the resources again
1145 // when that fails...
1147 // Get the vnode for the object
1148 // (note, this does not grab a reference to the node)
1149 if (vfs_lookup_vnode(mountID
, vnodeID
, &ref
->vnode
) != B_OK
)
1152 // Gets (usually creates) the cache for the node
1153 if (vfs_get_vnode_cache(ref
->vnode
, &ref
->cache
, true) != B_OK
)
1156 ref
->cache
->virtual_end
= size
;
1157 ((VMVnodeCache
*)ref
->cache
)->SetFileCacheRef(ref
);
1167 file_cache_delete(void* _cacheRef
)
1169 file_cache_ref
* ref
= (file_cache_ref
*)_cacheRef
;
1174 TRACE(("file_cache_delete(ref = %p)\n", ref
));
1176 ref
->cache
->ReleaseRef();
1182 file_cache_enable(void* _cacheRef
)
1184 file_cache_ref
* ref
= (file_cache_ref
*)_cacheRef
;
1186 AutoLocker
<VMCache
> _(ref
->cache
);
1188 if (ref
->disabled_count
== 0) {
1189 panic("Unbalanced file_cache_enable()!");
1193 ref
->disabled_count
--;
1198 file_cache_disable(void* _cacheRef
)
1200 // TODO: This function only removes all pages from the cache and prevents
1201 // that the file cache functions add any new ones until re-enabled. The
1202 // VM (on page fault) can still add pages, if the file is mmap()ed. We
1203 // should mark the cache to prevent shared mappings of the file and fix
1204 // the page fault code to deal correctly with private mappings (i.e. only
1205 // insert pages in consumer caches).
1207 file_cache_ref
* ref
= (file_cache_ref
*)_cacheRef
;
1209 AutoLocker
<VMCache
> _(ref
->cache
);
1211 // If already disabled, there's nothing to do for us.
1212 if (ref
->disabled_count
> 0) {
1213 ref
->disabled_count
++;
1217 // The file cache is not yet disabled. We need to evict all cached pages.
1218 status_t error
= ref
->cache
->FlushAndRemoveAllPages();
1222 ref
->disabled_count
++;
1228 file_cache_is_enabled(void* _cacheRef
)
1230 file_cache_ref
* ref
= (file_cache_ref
*)_cacheRef
;
1231 AutoLocker
<VMCache
> _(ref
->cache
);
1233 return ref
->disabled_count
== 0;
1238 file_cache_set_size(void* _cacheRef
, off_t newSize
)
1240 file_cache_ref
* ref
= (file_cache_ref
*)_cacheRef
;
1242 TRACE(("file_cache_set_size(ref = %p, size = %Ld)\n", ref
, newSize
));
1247 VMCache
* cache
= ref
->cache
;
1248 AutoLocker
<VMCache
> _(cache
);
1250 off_t oldSize
= cache
->virtual_end
;
1251 status_t status
= cache
->Resize(newSize
, VM_PRIORITY_USER
);
1252 // Note, the priority doesn't really matter, since this cache doesn't
1253 // reserve any memory.
1254 if (status
== B_OK
&& newSize
< oldSize
) {
1255 // We may have a new partial page at the end of the cache that must be
1257 uint32 partialBytes
= newSize
% B_PAGE_SIZE
;
1258 if (partialBytes
!= 0) {
1259 vm_page
* page
= cache
->LookupPage(newSize
- partialBytes
);
1261 vm_memset_physical(page
->physical_page_number
* B_PAGE_SIZE
1262 + partialBytes
, 0, B_PAGE_SIZE
- partialBytes
);
1272 file_cache_sync(void* _cacheRef
)
1274 file_cache_ref
* ref
= (file_cache_ref
*)_cacheRef
;
1278 return ref
->cache
->WriteModified();
1283 file_cache_read(void* _cacheRef
, void* cookie
, off_t offset
, void* buffer
,
1286 file_cache_ref
* ref
= (file_cache_ref
*)_cacheRef
;
1288 TRACE(("file_cache_read(ref = %p, offset = %Ld, buffer = %p, size = %lu)\n",
1289 ref
, offset
, buffer
, *_size
));
1291 if (ref
->disabled_count
> 0) {
1292 // Caching is disabled -- read directly from the file.
1294 vec
.base
= (addr_t
)buffer
;
1295 generic_size_t size
= vec
.length
= *_size
;
1296 status_t error
= vfs_read_pages(ref
->vnode
, cookie
, offset
, &vec
, 1, 0,
1302 return cache_io(ref
, cookie
, offset
, (addr_t
)buffer
, _size
, false);
1307 file_cache_write(void* _cacheRef
, void* cookie
, off_t offset
,
1308 const void* buffer
, size_t* _size
)
1310 file_cache_ref
* ref
= (file_cache_ref
*)_cacheRef
;
1312 if (ref
->disabled_count
> 0) {
1313 // Caching is disabled -- write directly to the file.
1315 if (buffer
!= NULL
) {
1317 vec
.base
= (addr_t
)buffer
;
1318 generic_size_t size
= vec
.length
= *_size
;
1320 status_t error
= vfs_write_pages(ref
->vnode
, cookie
, offset
, &vec
,
1326 // NULL buffer -- use a dummy buffer to write zeroes
1327 size_t size
= *_size
;
1329 size_t toWrite
= min_c(size
, kZeroVecSize
);
1330 generic_size_t written
= toWrite
;
1331 status_t error
= vfs_write_pages(ref
->vnode
, cookie
, offset
,
1332 sZeroVecs
, kZeroVecCount
, B_PHYSICAL_IO_REQUEST
, &written
);
1346 status_t status
= cache_io(ref
, cookie
, offset
,
1347 (addr_t
)const_cast<void*>(buffer
), _size
, true);
1349 TRACE(("file_cache_write(ref = %p, offset = %Ld, buffer = %p, size = %lu)"
1350 " = %ld\n", ref
, offset
, buffer
, *_size
, status
));