btrfs: Attempt to fix GCC2 build.
[haiku.git] / src / system / kernel / vm / vm.cpp
blob553f937ced5218002655a1c85831b45f88bbde89
1 /*
2 * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4 * Distributed under the terms of the MIT License.
6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7 * Distributed under the terms of the NewOS License.
8 */
11 #include <vm/vm.h>
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
19 #include <algorithm>
21 #include <OS.h>
22 #include <KernelExport.h>
24 #include <AutoDeleter.h>
26 #include <symbol_versioning.h>
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_priv.h>
52 #include <vm/VMAddressSpace.h>
53 #include <vm/VMArea.h>
54 #include <vm/VMCache.h>
56 #include "VMAddressSpaceLocking.h"
57 #include "VMAnonymousCache.h"
58 #include "VMAnonymousNoSwapCache.h"
59 #include "IORequest.h"
62 //#define TRACE_VM
63 //#define TRACE_FAULTS
64 #ifdef TRACE_VM
65 # define TRACE(x) dprintf x
66 #else
67 # define TRACE(x) ;
68 #endif
69 #ifdef TRACE_FAULTS
70 # define FTRACE(x) dprintf x
71 #else
72 # define FTRACE(x) ;
73 #endif
76 namespace {
78 class AreaCacheLocking {
79 public:
80 inline bool Lock(VMCache* lockable)
82 return false;
85 inline void Unlock(VMCache* lockable)
87 vm_area_put_locked_cache(lockable);
91 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
92 public:
93 inline AreaCacheLocker(VMCache* cache = NULL)
94 : AutoLocker<VMCache, AreaCacheLocking>(cache, true)
98 inline AreaCacheLocker(VMArea* area)
99 : AutoLocker<VMCache, AreaCacheLocking>()
101 SetTo(area);
104 inline void SetTo(VMCache* cache, bool alreadyLocked)
106 AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
109 inline void SetTo(VMArea* area)
111 return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
112 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
117 class VMCacheChainLocker {
118 public:
119 VMCacheChainLocker()
121 fTopCache(NULL),
122 fBottomCache(NULL)
126 VMCacheChainLocker(VMCache* topCache)
128 fTopCache(topCache),
129 fBottomCache(topCache)
133 ~VMCacheChainLocker()
135 Unlock();
138 void SetTo(VMCache* topCache)
140 fTopCache = topCache;
141 fBottomCache = topCache;
143 if (topCache != NULL)
144 topCache->SetUserData(NULL);
147 VMCache* LockSourceCache()
149 if (fBottomCache == NULL || fBottomCache->source == NULL)
150 return NULL;
152 VMCache* previousCache = fBottomCache;
154 fBottomCache = fBottomCache->source;
155 fBottomCache->Lock();
156 fBottomCache->AcquireRefLocked();
157 fBottomCache->SetUserData(previousCache);
159 return fBottomCache;
162 void LockAllSourceCaches()
164 while (LockSourceCache() != NULL) {
168 void Unlock(VMCache* exceptCache = NULL)
170 if (fTopCache == NULL)
171 return;
173 // Unlock caches in source -> consumer direction. This is important to
174 // avoid double-locking and a reversal of locking order in case a cache
175 // is eligable for merging.
176 VMCache* cache = fBottomCache;
177 while (cache != NULL) {
178 VMCache* nextCache = (VMCache*)cache->UserData();
179 if (cache != exceptCache)
180 cache->ReleaseRefAndUnlock(cache != fTopCache);
182 if (cache == fTopCache)
183 break;
185 cache = nextCache;
188 fTopCache = NULL;
189 fBottomCache = NULL;
192 void UnlockKeepRefs(bool keepTopCacheLocked)
194 if (fTopCache == NULL)
195 return;
197 VMCache* nextCache = fBottomCache;
198 VMCache* cache = NULL;
200 while (keepTopCacheLocked
201 ? nextCache != fTopCache : cache != fTopCache) {
202 cache = nextCache;
203 nextCache = (VMCache*)cache->UserData();
204 cache->Unlock(cache != fTopCache);
208 void RelockCaches(bool topCacheLocked)
210 if (fTopCache == NULL)
211 return;
213 VMCache* nextCache = fTopCache;
214 VMCache* cache = NULL;
215 if (topCacheLocked) {
216 cache = nextCache;
217 nextCache = cache->source;
220 while (cache != fBottomCache && nextCache != NULL) {
221 VMCache* consumer = cache;
222 cache = nextCache;
223 nextCache = cache->source;
224 cache->Lock();
225 cache->SetUserData(consumer);
229 private:
230 VMCache* fTopCache;
231 VMCache* fBottomCache;
234 } // namespace
237 // The memory reserve an allocation of the certain priority must not touch.
238 static const size_t kMemoryReserveForPriority[] = {
239 VM_MEMORY_RESERVE_USER, // user
240 VM_MEMORY_RESERVE_SYSTEM, // system
241 0 // VIP
245 ObjectCache* gPageMappingsObjectCache;
247 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
249 static off_t sAvailableMemory;
250 static off_t sNeededMemory;
251 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
252 static uint32 sPageFaults;
254 static VMPhysicalPageMapper* sPhysicalPageMapper;
256 #if DEBUG_CACHE_LIST
258 struct cache_info {
259 VMCache* cache;
260 addr_t page_count;
261 addr_t committed;
264 static const int kCacheInfoTableCount = 100 * 1024;
265 static cache_info* sCacheInfoTable;
267 #endif // DEBUG_CACHE_LIST
270 // function declarations
271 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
272 bool addressSpaceCleanup);
273 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
274 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
275 static status_t map_backing_store(VMAddressSpace* addressSpace,
276 VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
277 int protection, int mapping, uint32 flags,
278 const virtual_address_restrictions* addressRestrictions, bool kernel,
279 VMArea** _area, void** _virtualAddress);
280 static void fix_protection(uint32* protection);
283 // #pragma mark -
286 #if VM_PAGE_FAULT_TRACING
288 namespace VMPageFaultTracing {
290 class PageFaultStart : public AbstractTraceEntry {
291 public:
292 PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
294 fAddress(address),
295 fPC(pc),
296 fWrite(write),
297 fUser(user)
299 Initialized();
302 virtual void AddDump(TraceOutput& out)
304 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
305 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
308 private:
309 addr_t fAddress;
310 addr_t fPC;
311 bool fWrite;
312 bool fUser;
316 // page fault errors
317 enum {
318 PAGE_FAULT_ERROR_NO_AREA = 0,
319 PAGE_FAULT_ERROR_KERNEL_ONLY,
320 PAGE_FAULT_ERROR_WRITE_PROTECTED,
321 PAGE_FAULT_ERROR_READ_PROTECTED,
322 PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
323 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
324 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
328 class PageFaultError : public AbstractTraceEntry {
329 public:
330 PageFaultError(area_id area, status_t error)
332 fArea(area),
333 fError(error)
335 Initialized();
338 virtual void AddDump(TraceOutput& out)
340 switch (fError) {
341 case PAGE_FAULT_ERROR_NO_AREA:
342 out.Print("page fault error: no area");
343 break;
344 case PAGE_FAULT_ERROR_KERNEL_ONLY:
345 out.Print("page fault error: area: %ld, kernel only", fArea);
346 break;
347 case PAGE_FAULT_ERROR_WRITE_PROTECTED:
348 out.Print("page fault error: area: %ld, write protected",
349 fArea);
350 break;
351 case PAGE_FAULT_ERROR_READ_PROTECTED:
352 out.Print("page fault error: area: %ld, read protected", fArea);
353 break;
354 case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
355 out.Print("page fault error: area: %ld, execute protected",
356 fArea);
357 break;
358 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
359 out.Print("page fault error: kernel touching bad user memory");
360 break;
361 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
362 out.Print("page fault error: no address space");
363 break;
364 default:
365 out.Print("page fault error: area: %ld, error: %s", fArea,
366 strerror(fError));
367 break;
371 private:
372 area_id fArea;
373 status_t fError;
377 class PageFaultDone : public AbstractTraceEntry {
378 public:
379 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
380 vm_page* page)
382 fArea(area),
383 fTopCache(topCache),
384 fCache(cache),
385 fPage(page)
387 Initialized();
390 virtual void AddDump(TraceOutput& out)
392 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
393 "page: %p", fArea, fTopCache, fCache, fPage);
396 private:
397 area_id fArea;
398 VMCache* fTopCache;
399 VMCache* fCache;
400 vm_page* fPage;
403 } // namespace VMPageFaultTracing
405 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
406 #else
407 # define TPF(x) ;
408 #endif // VM_PAGE_FAULT_TRACING
411 // #pragma mark -
414 /*! The page's cache must be locked.
416 static inline void
417 increment_page_wired_count(vm_page* page)
419 if (!page->IsMapped())
420 atomic_add(&gMappedPagesCount, 1);
421 page->IncrementWiredCount();
425 /*! The page's cache must be locked.
427 static inline void
428 decrement_page_wired_count(vm_page* page)
430 page->DecrementWiredCount();
431 if (!page->IsMapped())
432 atomic_add(&gMappedPagesCount, -1);
436 static inline addr_t
437 virtual_page_address(VMArea* area, vm_page* page)
439 return area->Base()
440 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
444 //! You need to have the address space locked when calling this function
445 static VMArea*
446 lookup_area(VMAddressSpace* addressSpace, area_id id)
448 VMAreaHash::ReadLock();
450 VMArea* area = VMAreaHash::LookupLocked(id);
451 if (area != NULL && area->address_space != addressSpace)
452 area = NULL;
454 VMAreaHash::ReadUnlock();
456 return area;
460 static status_t
461 allocate_area_page_protections(VMArea* area)
463 // In the page protections we store only the three user protections,
464 // so we use 4 bits per page.
465 uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
466 area->page_protections = (uint8*)malloc_etc(bytes,
467 HEAP_DONT_LOCK_KERNEL_SPACE);
468 if (area->page_protections == NULL)
469 return B_NO_MEMORY;
471 // init the page protections for all pages to that of the area
472 uint32 areaProtection = area->protection
473 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
474 memset(area->page_protections, areaProtection | (areaProtection << 4),
475 bytes);
476 return B_OK;
480 static inline void
481 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
483 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
484 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
485 uint8& entry = area->page_protections[pageIndex / 2];
486 if (pageIndex % 2 == 0)
487 entry = (entry & 0xf0) | protection;
488 else
489 entry = (entry & 0x0f) | (protection << 4);
493 static inline uint32
494 get_area_page_protection(VMArea* area, addr_t pageAddress)
496 if (area->page_protections == NULL)
497 return area->protection;
499 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
500 uint32 protection = area->page_protections[pageIndex / 2];
501 if (pageIndex % 2 == 0)
502 protection &= 0x0f;
503 else
504 protection >>= 4;
506 // If this is a kernel area we translate the user flags to kernel flags.
507 if (area->address_space == VMAddressSpace::Kernel()) {
508 uint32 kernelProtection = 0;
509 if ((protection & B_READ_AREA) != 0)
510 kernelProtection |= B_KERNEL_READ_AREA;
511 if ((protection & B_WRITE_AREA) != 0)
512 kernelProtection |= B_KERNEL_WRITE_AREA;
514 return kernelProtection;
517 return protection | B_KERNEL_READ_AREA
518 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
522 /*! The caller must have reserved enough pages the translation map
523 implementation might need to map this page.
524 The page's cache must be locked.
526 static status_t
527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
528 vm_page_reservation* reservation)
530 VMTranslationMap* map = area->address_space->TranslationMap();
532 bool wasMapped = page->IsMapped();
534 if (area->wiring == B_NO_LOCK) {
535 DEBUG_PAGE_ACCESS_CHECK(page);
537 bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
538 vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
539 gPageMappingsObjectCache,
540 CACHE_DONT_WAIT_FOR_MEMORY
541 | (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
542 if (mapping == NULL)
543 return B_NO_MEMORY;
545 mapping->page = page;
546 mapping->area = area;
548 map->Lock();
550 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
551 area->MemoryType(), reservation);
553 // insert mapping into lists
554 if (!page->IsMapped())
555 atomic_add(&gMappedPagesCount, 1);
557 page->mappings.Add(mapping);
558 area->mappings.Add(mapping);
560 map->Unlock();
561 } else {
562 DEBUG_PAGE_ACCESS_CHECK(page);
564 map->Lock();
565 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
566 area->MemoryType(), reservation);
567 map->Unlock();
569 increment_page_wired_count(page);
572 if (!wasMapped) {
573 // The page is mapped now, so we must not remain in the cached queue.
574 // It also makes sense to move it from the inactive to the active, since
575 // otherwise the page daemon wouldn't come to keep track of it (in idle
576 // mode) -- if the page isn't touched, it will be deactivated after a
577 // full iteration through the queue at the latest.
578 if (page->State() == PAGE_STATE_CACHED
579 || page->State() == PAGE_STATE_INACTIVE) {
580 vm_page_set_state(page, PAGE_STATE_ACTIVE);
584 return B_OK;
588 /*! If \a preserveModified is \c true, the caller must hold the lock of the
589 page's cache.
591 static inline bool
592 unmap_page(VMArea* area, addr_t virtualAddress)
594 return area->address_space->TranslationMap()->UnmapPage(area,
595 virtualAddress, true);
599 /*! If \a preserveModified is \c true, the caller must hold the lock of all
600 mapped pages' caches.
602 static inline void
603 unmap_pages(VMArea* area, addr_t base, size_t size)
605 area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
609 /*! Cuts a piece out of an area. If the given cut range covers the complete
610 area, it is deleted. If it covers the beginning or the end, the area is
611 resized accordingly. If the range covers some part in the middle of the
612 area, it is split in two; in this case the second area is returned via
613 \a _secondArea (the variable is left untouched in the other cases).
614 The address space must be write locked.
615 The caller must ensure that no part of the given range is wired.
617 static status_t
618 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
619 addr_t lastAddress, VMArea** _secondArea, bool kernel)
621 // Does the cut range intersect with the area at all?
622 addr_t areaLast = area->Base() + (area->Size() - 1);
623 if (area->Base() > lastAddress || areaLast < address)
624 return B_OK;
626 // Is the area fully covered?
627 if (area->Base() >= address && areaLast <= lastAddress) {
628 delete_area(addressSpace, area, false);
629 return B_OK;
632 int priority;
633 uint32 allocationFlags;
634 if (addressSpace == VMAddressSpace::Kernel()) {
635 priority = VM_PRIORITY_SYSTEM;
636 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
637 | HEAP_DONT_LOCK_KERNEL_SPACE;
638 } else {
639 priority = VM_PRIORITY_USER;
640 allocationFlags = 0;
643 VMCache* cache = vm_area_get_locked_cache(area);
644 VMCacheChainLocker cacheChainLocker(cache);
645 cacheChainLocker.LockAllSourceCaches();
647 // Cut the end only?
648 if (areaLast <= lastAddress) {
649 size_t oldSize = area->Size();
650 size_t newSize = address - area->Base();
652 status_t error = addressSpace->ShrinkAreaTail(area, newSize,
653 allocationFlags);
654 if (error != B_OK)
655 return error;
657 // unmap pages
658 unmap_pages(area, address, oldSize - newSize);
660 // If no one else uses the area's cache, we can resize it, too.
661 if (cache->areas == area && area->cache_next == NULL
662 && cache->consumers.IsEmpty()
663 && cache->type == CACHE_TYPE_RAM) {
664 // Since VMCache::Resize() can temporarily drop the lock, we must
665 // unlock all lower caches to prevent locking order inversion.
666 cacheChainLocker.Unlock(cache);
667 cache->Resize(cache->virtual_base + newSize, priority);
668 cache->ReleaseRefAndUnlock();
671 return B_OK;
674 // Cut the beginning only?
675 if (area->Base() >= address) {
676 addr_t oldBase = area->Base();
677 addr_t newBase = lastAddress + 1;
678 size_t newSize = areaLast - lastAddress;
680 // unmap pages
681 unmap_pages(area, oldBase, newBase - oldBase);
683 // resize the area
684 status_t error = addressSpace->ShrinkAreaHead(area, newSize,
685 allocationFlags);
686 if (error != B_OK)
687 return error;
689 // TODO: If no one else uses the area's cache, we should resize it, too!
691 area->cache_offset += newBase - oldBase;
693 return B_OK;
696 // The tough part -- cut a piece out of the middle of the area.
697 // We do that by shrinking the area to the begin section and creating a
698 // new area for the end section.
700 addr_t firstNewSize = address - area->Base();
701 addr_t secondBase = lastAddress + 1;
702 addr_t secondSize = areaLast - lastAddress;
704 // unmap pages
705 unmap_pages(area, address, area->Size() - firstNewSize);
707 // resize the area
708 addr_t oldSize = area->Size();
709 status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
710 allocationFlags);
711 if (error != B_OK)
712 return error;
714 // TODO: If no one else uses the area's cache, we might want to create a
715 // new cache for the second area, transfer the concerned pages from the
716 // first cache to it and resize the first cache.
718 // map the second area
719 virtual_address_restrictions addressRestrictions = {};
720 addressRestrictions.address = (void*)secondBase;
721 addressRestrictions.address_specification = B_EXACT_ADDRESS;
722 VMArea* secondArea;
723 error = map_backing_store(addressSpace, cache,
724 area->cache_offset + (secondBase - area->Base()), area->name,
725 secondSize, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 0,
726 &addressRestrictions, kernel, &secondArea, NULL);
727 if (error != B_OK) {
728 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
729 return error;
732 // We need a cache reference for the new area.
733 cache->AcquireRefLocked();
735 if (_secondArea != NULL)
736 *_secondArea = secondArea;
738 return B_OK;
742 /*! Deletes all areas in the given address range.
743 The address space must be write-locked.
744 The caller must ensure that no part of the given range is wired.
746 static status_t
747 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
748 bool kernel)
750 size = PAGE_ALIGN(size);
751 addr_t lastAddress = address + (size - 1);
753 // Check, whether the caller is allowed to modify the concerned areas.
754 if (!kernel) {
755 for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
756 VMArea* area = it.Next();) {
757 addr_t areaLast = area->Base() + (area->Size() - 1);
758 if (area->Base() < lastAddress && address < areaLast) {
759 if ((area->protection & B_KERNEL_AREA) != 0)
760 return B_NOT_ALLOWED;
765 for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
766 VMArea* area = it.Next();) {
767 addr_t areaLast = area->Base() + (area->Size() - 1);
768 if (area->Base() < lastAddress && address < areaLast) {
769 status_t error = cut_area(addressSpace, area, address,
770 lastAddress, NULL, kernel);
771 if (error != B_OK)
772 return error;
773 // Failing after already messing with areas is ugly, but we
774 // can't do anything about it.
778 return B_OK;
782 /*! You need to hold the lock of the cache and the write lock of the address
783 space when calling this function.
784 Note, that in case of error your cache will be temporarily unlocked.
785 If \a addressSpec is \c B_EXACT_ADDRESS and the
786 \c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
787 that no part of the specified address range (base \c *_virtualAddress, size
788 \a size) is wired.
790 static status_t
791 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
792 const char* areaName, addr_t size, int wiring, int protection, int mapping,
793 uint32 flags, const virtual_address_restrictions* addressRestrictions,
794 bool kernel, VMArea** _area, void** _virtualAddress)
796 TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
797 B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
798 ", protection %d, area %p, areaName '%s'\n", addressSpace, cache,
799 addressRestrictions->address, offset, size,
800 addressRestrictions->address_specification, wiring, protection,
801 _area, areaName));
802 cache->AssertLocked();
804 uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
805 | HEAP_DONT_LOCK_KERNEL_SPACE;
806 int priority;
807 if (addressSpace != VMAddressSpace::Kernel()) {
808 priority = VM_PRIORITY_USER;
809 } else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
810 priority = VM_PRIORITY_VIP;
811 allocationFlags |= HEAP_PRIORITY_VIP;
812 } else
813 priority = VM_PRIORITY_SYSTEM;
815 VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
816 allocationFlags);
817 if (area == NULL)
818 return B_NO_MEMORY;
820 status_t status;
822 // if this is a private map, we need to create a new cache
823 // to handle the private copies of pages as they are written to
824 VMCache* sourceCache = cache;
825 if (mapping == REGION_PRIVATE_MAP) {
826 VMCache* newCache;
828 // create an anonymous cache
829 status = VMCacheFactory::CreateAnonymousCache(newCache,
830 (protection & B_STACK_AREA) != 0
831 || (protection & B_OVERCOMMITTING_AREA) != 0, 0,
832 cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
833 if (status != B_OK)
834 goto err1;
836 newCache->Lock();
837 newCache->temporary = 1;
838 newCache->virtual_base = offset;
839 newCache->virtual_end = offset + size;
841 cache->AddConsumer(newCache);
843 cache = newCache;
846 if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
847 status = cache->SetMinimalCommitment(size, priority);
848 if (status != B_OK)
849 goto err2;
852 // check to see if this address space has entered DELETE state
853 if (addressSpace->IsBeingDeleted()) {
854 // okay, someone is trying to delete this address space now, so we can't
855 // insert the area, so back out
856 status = B_BAD_TEAM_ID;
857 goto err2;
860 if (addressRestrictions->address_specification == B_EXACT_ADDRESS
861 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
862 status = unmap_address_range(addressSpace,
863 (addr_t)addressRestrictions->address, size, kernel);
864 if (status != B_OK)
865 goto err2;
868 status = addressSpace->InsertArea(area, size, addressRestrictions,
869 allocationFlags, _virtualAddress);
870 if (status != B_OK) {
871 // TODO: wait and try again once this is working in the backend
872 #if 0
873 if (status == B_NO_MEMORY && addressSpec == B_ANY_KERNEL_ADDRESS) {
874 low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size,
875 0, 0);
877 #endif
878 goto err2;
881 // attach the cache to the area
882 area->cache = cache;
883 area->cache_offset = offset;
885 // point the cache back to the area
886 cache->InsertAreaLocked(area);
887 if (mapping == REGION_PRIVATE_MAP)
888 cache->Unlock();
890 // insert the area in the global area hash table
891 VMAreaHash::Insert(area);
893 // grab a ref to the address space (the area holds this)
894 addressSpace->Get();
896 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
897 // cache, sourceCache, areaName, area);
899 *_area = area;
900 return B_OK;
902 err2:
903 if (mapping == REGION_PRIVATE_MAP) {
904 // We created this cache, so we must delete it again. Note, that we
905 // need to temporarily unlock the source cache or we'll otherwise
906 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
907 sourceCache->Unlock();
908 cache->ReleaseRefAndUnlock();
909 sourceCache->Lock();
911 err1:
912 addressSpace->DeleteArea(area, allocationFlags);
913 return status;
917 /*! Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
918 locker1, locker2).
920 template<typename LockerType1, typename LockerType2>
921 static inline bool
922 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
924 area->cache->AssertLocked();
926 VMAreaUnwiredWaiter waiter;
927 if (!area->AddWaiterIfWired(&waiter))
928 return false;
930 // unlock everything and wait
931 if (locker1 != NULL)
932 locker1->Unlock();
933 if (locker2 != NULL)
934 locker2->Unlock();
936 waiter.waitEntry.Wait();
938 return true;
942 /*! Checks whether the given area has any wired ranges intersecting with the
943 specified range and waits, if so.
945 When it has to wait, the function calls \c Unlock() on both \a locker1
946 and \a locker2, if given.
947 The area's top cache must be locked and must be unlocked as a side effect
948 of calling \c Unlock() on either \a locker1 or \a locker2.
950 If the function does not have to wait it does not modify or unlock any
951 object.
953 \param area The area to be checked.
954 \param base The base address of the range to check.
955 \param size The size of the address range to check.
956 \param locker1 An object to be unlocked when before starting to wait (may
957 be \c NULL).
958 \param locker2 An object to be unlocked when before starting to wait (may
959 be \c NULL).
960 \return \c true, if the function had to wait, \c false otherwise.
962 template<typename LockerType1, typename LockerType2>
963 static inline bool
964 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
965 LockerType1* locker1, LockerType2* locker2)
967 area->cache->AssertLocked();
969 VMAreaUnwiredWaiter waiter;
970 if (!area->AddWaiterIfWired(&waiter, base, size))
971 return false;
973 // unlock everything and wait
974 if (locker1 != NULL)
975 locker1->Unlock();
976 if (locker2 != NULL)
977 locker2->Unlock();
979 waiter.waitEntry.Wait();
981 return true;
985 /*! Checks whether the given address space has any wired ranges intersecting
986 with the specified range and waits, if so.
988 Similar to wait_if_area_range_is_wired(), with the following differences:
989 - All areas intersecting with the range are checked (respectively all until
990 one is found that contains a wired range intersecting with the given
991 range).
992 - The given address space must at least be read-locked and must be unlocked
993 when \c Unlock() is called on \a locker.
994 - None of the areas' caches are allowed to be locked.
996 template<typename LockerType>
997 static inline bool
998 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
999 size_t size, LockerType* locker)
1001 addr_t end = base + size - 1;
1002 for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
1003 VMArea* area = it.Next();) {
1004 // TODO: Introduce a VMAddressSpace method to get a close iterator!
1005 if (area->Base() > end)
1006 return false;
1008 if (base >= area->Base() + area->Size() - 1)
1009 continue;
1011 AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1013 if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1014 return true;
1017 return false;
1021 /*! Prepares an area to be used for vm_set_kernel_area_debug_protection().
1022 It must be called in a situation where the kernel address space may be
1023 locked.
1025 status_t
1026 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1028 AddressSpaceReadLocker locker;
1029 VMArea* area;
1030 status_t status = locker.SetFromArea(id, area);
1031 if (status != B_OK)
1032 return status;
1034 if (area->page_protections == NULL) {
1035 status = allocate_area_page_protections(area);
1036 if (status != B_OK)
1037 return status;
1040 *cookie = (void*)area;
1041 return B_OK;
1045 /*! This is a debug helper function that can only be used with very specific
1046 use cases.
1047 Sets protection for the given address range to the protection specified.
1048 If \a protection is 0 then the involved pages will be marked non-present
1049 in the translation map to cause a fault on access. The pages aren't
1050 actually unmapped however so that they can be marked present again with
1051 additional calls to this function. For this to work the area must be
1052 fully locked in memory so that the pages aren't otherwise touched.
1053 This function does not lock the kernel address space and needs to be
1054 supplied with a \a cookie retrieved from a successful call to
1055 vm_prepare_kernel_area_debug_protection().
1057 status_t
1058 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1059 uint32 protection)
1061 // check address range
1062 addr_t address = (addr_t)_address;
1063 size = PAGE_ALIGN(size);
1065 if ((address % B_PAGE_SIZE) != 0
1066 || (addr_t)address + size < (addr_t)address
1067 || !IS_KERNEL_ADDRESS(address)
1068 || !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1069 return B_BAD_VALUE;
1072 // Translate the kernel protection to user protection as we only store that.
1073 if ((protection & B_KERNEL_READ_AREA) != 0)
1074 protection |= B_READ_AREA;
1075 if ((protection & B_KERNEL_WRITE_AREA) != 0)
1076 protection |= B_WRITE_AREA;
1078 VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1079 VMTranslationMap* map = addressSpace->TranslationMap();
1080 VMArea* area = (VMArea*)cookie;
1082 addr_t offset = address - area->Base();
1083 if (area->Size() - offset < size) {
1084 panic("protect range not fully within supplied area");
1085 return B_BAD_VALUE;
1088 if (area->page_protections == NULL) {
1089 panic("area has no page protections");
1090 return B_BAD_VALUE;
1093 // Invalidate the mapping entries so any access to them will fault or
1094 // restore the mapping entries unchanged so that lookup will success again.
1095 map->Lock();
1096 map->DebugMarkRangePresent(address, address + size, protection != 0);
1097 map->Unlock();
1099 // And set the proper page protections so that the fault case will actually
1100 // fail and not simply try to map a new page.
1101 for (addr_t pageAddress = address; pageAddress < address + size;
1102 pageAddress += B_PAGE_SIZE) {
1103 set_area_page_protection(area, pageAddress, protection);
1106 return B_OK;
1110 status_t
1111 vm_block_address_range(const char* name, void* address, addr_t size)
1113 if (!arch_vm_supports_protection(0))
1114 return B_NOT_SUPPORTED;
1116 AddressSpaceWriteLocker locker;
1117 status_t status = locker.SetTo(VMAddressSpace::KernelID());
1118 if (status != B_OK)
1119 return status;
1121 VMAddressSpace* addressSpace = locker.AddressSpace();
1123 // create an anonymous cache
1124 VMCache* cache;
1125 status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1126 VM_PRIORITY_SYSTEM);
1127 if (status != B_OK)
1128 return status;
1130 cache->temporary = 1;
1131 cache->virtual_end = size;
1132 cache->Lock();
1134 VMArea* area;
1135 virtual_address_restrictions addressRestrictions = {};
1136 addressRestrictions.address = address;
1137 addressRestrictions.address_specification = B_EXACT_ADDRESS;
1138 status = map_backing_store(addressSpace, cache, 0, name, size,
1139 B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1140 true, &area, NULL);
1141 if (status != B_OK) {
1142 cache->ReleaseRefAndUnlock();
1143 return status;
1146 cache->Unlock();
1147 area->cache_type = CACHE_TYPE_RAM;
1148 return area->id;
1152 status_t
1153 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1155 AddressSpaceWriteLocker locker(team);
1156 if (!locker.IsLocked())
1157 return B_BAD_TEAM_ID;
1159 VMAddressSpace* addressSpace = locker.AddressSpace();
1160 return addressSpace->UnreserveAddressRange((addr_t)address, size,
1161 addressSpace == VMAddressSpace::Kernel()
1162 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1166 status_t
1167 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1168 addr_t size, uint32 flags)
1170 if (size == 0)
1171 return B_BAD_VALUE;
1173 AddressSpaceWriteLocker locker(team);
1174 if (!locker.IsLocked())
1175 return B_BAD_TEAM_ID;
1177 virtual_address_restrictions addressRestrictions = {};
1178 addressRestrictions.address = *_address;
1179 addressRestrictions.address_specification = addressSpec;
1180 VMAddressSpace* addressSpace = locker.AddressSpace();
1181 return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1182 addressSpace == VMAddressSpace::Kernel()
1183 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1184 _address);
1188 area_id
1189 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1190 uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1191 const virtual_address_restrictions* virtualAddressRestrictions,
1192 const physical_address_restrictions* physicalAddressRestrictions,
1193 bool kernel, void** _address)
1195 VMArea* area;
1196 VMCache* cache;
1197 vm_page* page = NULL;
1198 bool isStack = (protection & B_STACK_AREA) != 0;
1199 page_num_t guardPages;
1200 bool canOvercommit = false;
1201 uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1202 ? VM_PAGE_ALLOC_CLEAR : 0;
1204 TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1205 team, name, size));
1207 size = PAGE_ALIGN(size);
1208 guardSize = PAGE_ALIGN(guardSize);
1209 guardPages = guardSize / B_PAGE_SIZE;
1211 if (size == 0 || size < guardSize)
1212 return B_BAD_VALUE;
1213 if (!arch_vm_supports_protection(protection))
1214 return B_NOT_SUPPORTED;
1216 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1217 canOvercommit = true;
1219 #ifdef DEBUG_KERNEL_STACKS
1220 if ((protection & B_KERNEL_STACK_AREA) != 0)
1221 isStack = true;
1222 #endif
1224 // check parameters
1225 switch (virtualAddressRestrictions->address_specification) {
1226 case B_ANY_ADDRESS:
1227 case B_EXACT_ADDRESS:
1228 case B_BASE_ADDRESS:
1229 case B_ANY_KERNEL_ADDRESS:
1230 case B_ANY_KERNEL_BLOCK_ADDRESS:
1231 case B_RANDOMIZED_ANY_ADDRESS:
1232 case B_RANDOMIZED_BASE_ADDRESS:
1233 break;
1235 default:
1236 return B_BAD_VALUE;
1239 // If low or high physical address restrictions are given, we force
1240 // B_CONTIGUOUS wiring, since only then we'll use
1241 // vm_page_allocate_page_run() which deals with those restrictions.
1242 if (physicalAddressRestrictions->low_address != 0
1243 || physicalAddressRestrictions->high_address != 0) {
1244 wiring = B_CONTIGUOUS;
1247 physical_address_restrictions stackPhysicalRestrictions;
1248 bool doReserveMemory = false;
1249 switch (wiring) {
1250 case B_NO_LOCK:
1251 break;
1252 case B_FULL_LOCK:
1253 case B_LAZY_LOCK:
1254 case B_CONTIGUOUS:
1255 doReserveMemory = true;
1256 break;
1257 case B_ALREADY_WIRED:
1258 break;
1259 case B_LOMEM:
1260 stackPhysicalRestrictions = *physicalAddressRestrictions;
1261 stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1262 physicalAddressRestrictions = &stackPhysicalRestrictions;
1263 wiring = B_CONTIGUOUS;
1264 doReserveMemory = true;
1265 break;
1266 case B_32_BIT_FULL_LOCK:
1267 if (B_HAIKU_PHYSICAL_BITS <= 32
1268 || (uint64)vm_page_max_address() < (uint64)1 << 32) {
1269 wiring = B_FULL_LOCK;
1270 doReserveMemory = true;
1271 break;
1273 // TODO: We don't really support this mode efficiently. Just fall
1274 // through for now ...
1275 case B_32_BIT_CONTIGUOUS:
1276 #if B_HAIKU_PHYSICAL_BITS > 32
1277 if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1278 stackPhysicalRestrictions = *physicalAddressRestrictions;
1279 stackPhysicalRestrictions.high_address
1280 = (phys_addr_t)1 << 32;
1281 physicalAddressRestrictions = &stackPhysicalRestrictions;
1283 #endif
1284 wiring = B_CONTIGUOUS;
1285 doReserveMemory = true;
1286 break;
1287 default:
1288 return B_BAD_VALUE;
1291 // Optimization: For a single-page contiguous allocation without low/high
1292 // memory restriction B_FULL_LOCK wiring suffices.
1293 if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1294 && physicalAddressRestrictions->low_address == 0
1295 && physicalAddressRestrictions->high_address == 0) {
1296 wiring = B_FULL_LOCK;
1299 // For full lock or contiguous areas we're also going to map the pages and
1300 // thus need to reserve pages for the mapping backend upfront.
1301 addr_t reservedMapPages = 0;
1302 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1303 AddressSpaceWriteLocker locker;
1304 status_t status = locker.SetTo(team);
1305 if (status != B_OK)
1306 return status;
1308 VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1309 reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1312 int priority;
1313 if (team != VMAddressSpace::KernelID())
1314 priority = VM_PRIORITY_USER;
1315 else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1316 priority = VM_PRIORITY_VIP;
1317 else
1318 priority = VM_PRIORITY_SYSTEM;
1320 // Reserve memory before acquiring the address space lock. This reduces the
1321 // chances of failure, since while holding the write lock to the address
1322 // space (if it is the kernel address space that is), the low memory handler
1323 // won't be able to free anything for us.
1324 addr_t reservedMemory = 0;
1325 if (doReserveMemory) {
1326 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1327 if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1328 return B_NO_MEMORY;
1329 reservedMemory = size;
1330 // TODO: We don't reserve the memory for the pages for the page
1331 // directories/tables. We actually need to do since we currently don't
1332 // reclaim them (and probably can't reclaim all of them anyway). Thus
1333 // there are actually less physical pages than there should be, which
1334 // can get the VM into trouble in low memory situations.
1337 AddressSpaceWriteLocker locker;
1338 VMAddressSpace* addressSpace;
1339 status_t status;
1341 // For full lock areas reserve the pages before locking the address
1342 // space. E.g. block caches can't release their memory while we hold the
1343 // address space lock.
1344 page_num_t reservedPages = reservedMapPages;
1345 if (wiring == B_FULL_LOCK)
1346 reservedPages += size / B_PAGE_SIZE;
1348 vm_page_reservation reservation;
1349 if (reservedPages > 0) {
1350 if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1351 if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1352 priority)) {
1353 reservedPages = 0;
1354 status = B_WOULD_BLOCK;
1355 goto err0;
1357 } else
1358 vm_page_reserve_pages(&reservation, reservedPages, priority);
1361 if (wiring == B_CONTIGUOUS) {
1362 // we try to allocate the page run here upfront as this may easily
1363 // fail for obvious reasons
1364 page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1365 size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1366 if (page == NULL) {
1367 status = B_NO_MEMORY;
1368 goto err0;
1372 // Lock the address space and, if B_EXACT_ADDRESS and
1373 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1374 // is not wired.
1375 do {
1376 status = locker.SetTo(team);
1377 if (status != B_OK)
1378 goto err1;
1380 addressSpace = locker.AddressSpace();
1381 } while (virtualAddressRestrictions->address_specification
1382 == B_EXACT_ADDRESS
1383 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1384 && wait_if_address_range_is_wired(addressSpace,
1385 (addr_t)virtualAddressRestrictions->address, size, &locker));
1387 // create an anonymous cache
1388 // if it's a stack, make sure that two pages are available at least
1389 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1390 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1391 wiring == B_NO_LOCK, priority);
1392 if (status != B_OK)
1393 goto err1;
1395 cache->temporary = 1;
1396 cache->virtual_end = size;
1397 cache->committed_size = reservedMemory;
1398 // TODO: This should be done via a method.
1399 reservedMemory = 0;
1401 cache->Lock();
1403 status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1404 protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions,
1405 kernel, &area, _address);
1407 if (status != B_OK) {
1408 cache->ReleaseRefAndUnlock();
1409 goto err1;
1412 locker.DegradeToReadLock();
1414 switch (wiring) {
1415 case B_NO_LOCK:
1416 case B_LAZY_LOCK:
1417 // do nothing - the pages are mapped in as needed
1418 break;
1420 case B_FULL_LOCK:
1422 // Allocate and map all pages for this area
1424 off_t offset = 0;
1425 for (addr_t address = area->Base();
1426 address < area->Base() + (area->Size() - 1);
1427 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1428 #ifdef DEBUG_KERNEL_STACKS
1429 # ifdef STACK_GROWS_DOWNWARDS
1430 if (isStack && address < area->Base()
1431 + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1432 # else
1433 if (isStack && address >= area->Base() + area->Size()
1434 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1435 # endif
1436 continue;
1437 #endif
1438 vm_page* page = vm_page_allocate_page(&reservation,
1439 PAGE_STATE_WIRED | pageAllocFlags);
1440 cache->InsertPage(page, offset);
1441 map_page(area, page, address, protection, &reservation);
1443 DEBUG_PAGE_ACCESS_END(page);
1446 break;
1449 case B_ALREADY_WIRED:
1451 // The pages should already be mapped. This is only really useful
1452 // during boot time. Find the appropriate vm_page objects and stick
1453 // them in the cache object.
1454 VMTranslationMap* map = addressSpace->TranslationMap();
1455 off_t offset = 0;
1457 if (!gKernelStartup)
1458 panic("ALREADY_WIRED flag used outside kernel startup\n");
1460 map->Lock();
1462 for (addr_t virtualAddress = area->Base();
1463 virtualAddress < area->Base() + (area->Size() - 1);
1464 virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1465 phys_addr_t physicalAddress;
1466 uint32 flags;
1467 status = map->Query(virtualAddress, &physicalAddress, &flags);
1468 if (status < B_OK) {
1469 panic("looking up mapping failed for va 0x%lx\n",
1470 virtualAddress);
1472 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1473 if (page == NULL) {
1474 panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1475 "\n", physicalAddress);
1478 DEBUG_PAGE_ACCESS_START(page);
1480 cache->InsertPage(page, offset);
1481 increment_page_wired_count(page);
1482 vm_page_set_state(page, PAGE_STATE_WIRED);
1483 page->busy = false;
1485 DEBUG_PAGE_ACCESS_END(page);
1488 map->Unlock();
1489 break;
1492 case B_CONTIGUOUS:
1494 // We have already allocated our continuous pages run, so we can now
1495 // just map them in the address space
1496 VMTranslationMap* map = addressSpace->TranslationMap();
1497 phys_addr_t physicalAddress
1498 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1499 addr_t virtualAddress = area->Base();
1500 off_t offset = 0;
1502 map->Lock();
1504 for (virtualAddress = area->Base(); virtualAddress < area->Base()
1505 + (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1506 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1507 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1508 if (page == NULL)
1509 panic("couldn't lookup physical page just allocated\n");
1511 status = map->Map(virtualAddress, physicalAddress, protection,
1512 area->MemoryType(), &reservation);
1513 if (status < B_OK)
1514 panic("couldn't map physical page in page run\n");
1516 cache->InsertPage(page, offset);
1517 increment_page_wired_count(page);
1519 DEBUG_PAGE_ACCESS_END(page);
1522 map->Unlock();
1523 break;
1526 default:
1527 break;
1530 cache->Unlock();
1532 if (reservedPages > 0)
1533 vm_page_unreserve_pages(&reservation);
1535 TRACE(("vm_create_anonymous_area: done\n"));
1537 area->cache_type = CACHE_TYPE_RAM;
1538 return area->id;
1540 err1:
1541 if (wiring == B_CONTIGUOUS) {
1542 // we had reserved the area space upfront...
1543 phys_addr_t pageNumber = page->physical_page_number;
1544 int32 i;
1545 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1546 page = vm_lookup_page(pageNumber);
1547 if (page == NULL)
1548 panic("couldn't lookup physical page just allocated\n");
1550 vm_page_set_state(page, PAGE_STATE_FREE);
1554 err0:
1555 if (reservedPages > 0)
1556 vm_page_unreserve_pages(&reservation);
1557 if (reservedMemory > 0)
1558 vm_unreserve_memory(reservedMemory);
1560 return status;
1564 area_id
1565 vm_map_physical_memory(team_id team, const char* name, void** _address,
1566 uint32 addressSpec, addr_t size, uint32 protection,
1567 phys_addr_t physicalAddress, bool alreadyWired)
1569 VMArea* area;
1570 VMCache* cache;
1571 addr_t mapOffset;
1573 TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1574 ", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1575 B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1576 addressSpec, size, protection, physicalAddress));
1578 if (!arch_vm_supports_protection(protection))
1579 return B_NOT_SUPPORTED;
1581 AddressSpaceWriteLocker locker(team);
1582 if (!locker.IsLocked())
1583 return B_BAD_TEAM_ID;
1585 // if the physical address is somewhat inside a page,
1586 // move the actual area down to align on a page boundary
1587 mapOffset = physicalAddress % B_PAGE_SIZE;
1588 size += mapOffset;
1589 physicalAddress -= mapOffset;
1591 size = PAGE_ALIGN(size);
1593 // create a device cache
1594 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1595 if (status != B_OK)
1596 return status;
1598 cache->virtual_end = size;
1600 cache->Lock();
1602 virtual_address_restrictions addressRestrictions = {};
1603 addressRestrictions.address = *_address;
1604 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1605 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1606 B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1607 true, &area, _address);
1609 if (status < B_OK)
1610 cache->ReleaseRefLocked();
1612 cache->Unlock();
1614 if (status == B_OK) {
1615 // set requested memory type -- use uncached, if not given
1616 uint32 memoryType = addressSpec & B_MTR_MASK;
1617 if (memoryType == 0)
1618 memoryType = B_MTR_UC;
1620 area->SetMemoryType(memoryType);
1622 status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1623 if (status != B_OK)
1624 delete_area(locker.AddressSpace(), area, false);
1627 if (status != B_OK)
1628 return status;
1630 VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1632 if (alreadyWired) {
1633 // The area is already mapped, but possibly not with the right
1634 // memory type.
1635 map->Lock();
1636 map->ProtectArea(area, area->protection);
1637 map->Unlock();
1638 } else {
1639 // Map the area completely.
1641 // reserve pages needed for the mapping
1642 size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1643 area->Base() + (size - 1));
1644 vm_page_reservation reservation;
1645 vm_page_reserve_pages(&reservation, reservePages,
1646 team == VMAddressSpace::KernelID()
1647 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1649 map->Lock();
1651 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1652 map->Map(area->Base() + offset, physicalAddress + offset,
1653 protection, area->MemoryType(), &reservation);
1656 map->Unlock();
1658 vm_page_unreserve_pages(&reservation);
1661 // modify the pointer returned to be offset back into the new area
1662 // the same way the physical address in was offset
1663 *_address = (void*)((addr_t)*_address + mapOffset);
1665 area->cache_type = CACHE_TYPE_DEVICE;
1666 return area->id;
1670 /*! Don't use!
1671 TODO: This function was introduced to map physical page vecs to
1672 contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1673 use a device cache and does not track vm_page::wired_count!
1675 area_id
1676 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1677 uint32 addressSpec, addr_t* _size, uint32 protection,
1678 struct generic_io_vec* vecs, uint32 vecCount)
1680 TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1681 "= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1682 "vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1683 addressSpec, _size, protection, vecs, vecCount));
1685 if (!arch_vm_supports_protection(protection)
1686 || (addressSpec & B_MTR_MASK) != 0) {
1687 return B_NOT_SUPPORTED;
1690 AddressSpaceWriteLocker locker(team);
1691 if (!locker.IsLocked())
1692 return B_BAD_TEAM_ID;
1694 if (vecCount == 0)
1695 return B_BAD_VALUE;
1697 addr_t size = 0;
1698 for (uint32 i = 0; i < vecCount; i++) {
1699 if (vecs[i].base % B_PAGE_SIZE != 0
1700 || vecs[i].length % B_PAGE_SIZE != 0) {
1701 return B_BAD_VALUE;
1704 size += vecs[i].length;
1707 // create a device cache
1708 VMCache* cache;
1709 status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1710 if (result != B_OK)
1711 return result;
1713 cache->virtual_end = size;
1715 cache->Lock();
1717 VMArea* area;
1718 virtual_address_restrictions addressRestrictions = {};
1719 addressRestrictions.address = *_address;
1720 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1721 result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1722 size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0,
1723 &addressRestrictions, true, &area, _address);
1725 if (result != B_OK)
1726 cache->ReleaseRefLocked();
1728 cache->Unlock();
1730 if (result != B_OK)
1731 return result;
1733 VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1734 size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1735 area->Base() + (size - 1));
1737 vm_page_reservation reservation;
1738 vm_page_reserve_pages(&reservation, reservePages,
1739 team == VMAddressSpace::KernelID()
1740 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1741 map->Lock();
1743 uint32 vecIndex = 0;
1744 size_t vecOffset = 0;
1745 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1746 while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1747 vecOffset = 0;
1748 vecIndex++;
1751 if (vecIndex >= vecCount)
1752 break;
1754 map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1755 protection, area->MemoryType(), &reservation);
1757 vecOffset += B_PAGE_SIZE;
1760 map->Unlock();
1761 vm_page_unreserve_pages(&reservation);
1763 if (_size != NULL)
1764 *_size = size;
1766 area->cache_type = CACHE_TYPE_DEVICE;
1767 return area->id;
1771 area_id
1772 vm_create_null_area(team_id team, const char* name, void** address,
1773 uint32 addressSpec, addr_t size, uint32 flags)
1775 size = PAGE_ALIGN(size);
1777 // Lock the address space and, if B_EXACT_ADDRESS and
1778 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1779 // is not wired.
1780 AddressSpaceWriteLocker locker;
1781 do {
1782 if (locker.SetTo(team) != B_OK)
1783 return B_BAD_TEAM_ID;
1784 } while (addressSpec == B_EXACT_ADDRESS
1785 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1786 && wait_if_address_range_is_wired(locker.AddressSpace(),
1787 (addr_t)*address, size, &locker));
1789 // create a null cache
1790 int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1791 ? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1792 VMCache* cache;
1793 status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1794 if (status != B_OK)
1795 return status;
1797 cache->temporary = 1;
1798 cache->virtual_end = size;
1800 cache->Lock();
1802 VMArea* area;
1803 virtual_address_restrictions addressRestrictions = {};
1804 addressRestrictions.address = *address;
1805 addressRestrictions.address_specification = addressSpec;
1806 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1807 B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags,
1808 &addressRestrictions, true, &area, address);
1810 if (status < B_OK) {
1811 cache->ReleaseRefAndUnlock();
1812 return status;
1815 cache->Unlock();
1817 area->cache_type = CACHE_TYPE_NULL;
1818 return area->id;
1822 /*! Creates the vnode cache for the specified \a vnode.
1823 The vnode has to be marked busy when calling this function.
1825 status_t
1826 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1828 return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1832 /*! \a cache must be locked. The area's address space must be read-locked.
1834 static void
1835 pre_map_area_pages(VMArea* area, VMCache* cache,
1836 vm_page_reservation* reservation)
1838 addr_t baseAddress = area->Base();
1839 addr_t cacheOffset = area->cache_offset;
1840 page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1841 page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1843 for (VMCachePagesTree::Iterator it
1844 = cache->pages.GetIterator(firstPage, true, true);
1845 vm_page* page = it.Next();) {
1846 if (page->cache_offset >= endPage)
1847 break;
1849 // skip busy and inactive pages
1850 if (page->busy || page->usage_count == 0)
1851 continue;
1853 DEBUG_PAGE_ACCESS_START(page);
1854 map_page(area, page,
1855 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1856 B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1857 DEBUG_PAGE_ACCESS_END(page);
1862 /*! Will map the file specified by \a fd to an area in memory.
1863 The file will be mirrored beginning at the specified \a offset. The
1864 \a offset and \a size arguments have to be page aligned.
1866 static area_id
1867 _vm_map_file(team_id team, const char* name, void** _address,
1868 uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
1869 bool unmapAddressRange, int fd, off_t offset, bool kernel)
1871 // TODO: for binary files, we want to make sure that they get the
1872 // copy of a file at a given time, ie. later changes should not
1873 // make it into the mapped copy -- this will need quite some changes
1874 // to be done in a nice way
1875 TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
1876 "%" B_PRIu32 ")\n", fd, offset, size, mapping));
1878 offset = ROUNDDOWN(offset, B_PAGE_SIZE);
1879 size = PAGE_ALIGN(size);
1881 if (mapping == REGION_NO_PRIVATE_MAP)
1882 protection |= B_SHARED_AREA;
1883 if (addressSpec != B_EXACT_ADDRESS)
1884 unmapAddressRange = false;
1886 if (fd < 0) {
1887 uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
1888 virtual_address_restrictions virtualRestrictions = {};
1889 virtualRestrictions.address = *_address;
1890 virtualRestrictions.address_specification = addressSpec;
1891 physical_address_restrictions physicalRestrictions = {};
1892 return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
1893 flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
1894 _address);
1897 // get the open flags of the FD
1898 file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
1899 if (descriptor == NULL)
1900 return EBADF;
1901 int32 openMode = descriptor->open_mode;
1902 put_fd(descriptor);
1904 // The FD must open for reading at any rate. For shared mapping with write
1905 // access, additionally the FD must be open for writing.
1906 if ((openMode & O_ACCMODE) == O_WRONLY
1907 || (mapping == REGION_NO_PRIVATE_MAP
1908 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1909 && (openMode & O_ACCMODE) == O_RDONLY)) {
1910 return EACCES;
1913 // get the vnode for the object, this also grabs a ref to it
1914 struct vnode* vnode = NULL;
1915 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
1916 if (status < B_OK)
1917 return status;
1918 CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
1920 // If we're going to pre-map pages, we need to reserve the pages needed by
1921 // the mapping backend upfront.
1922 page_num_t reservedPreMapPages = 0;
1923 vm_page_reservation reservation;
1924 if ((protection & B_READ_AREA) != 0) {
1925 AddressSpaceWriteLocker locker;
1926 status = locker.SetTo(team);
1927 if (status != B_OK)
1928 return status;
1930 VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1931 reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
1933 locker.Unlock();
1935 vm_page_reserve_pages(&reservation, reservedPreMapPages,
1936 team == VMAddressSpace::KernelID()
1937 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1940 struct PageUnreserver {
1941 PageUnreserver(vm_page_reservation* reservation)
1943 fReservation(reservation)
1947 ~PageUnreserver()
1949 if (fReservation != NULL)
1950 vm_page_unreserve_pages(fReservation);
1953 vm_page_reservation* fReservation;
1954 } pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
1956 // Lock the address space and, if the specified address range shall be
1957 // unmapped, ensure it is not wired.
1958 AddressSpaceWriteLocker locker;
1959 do {
1960 if (locker.SetTo(team) != B_OK)
1961 return B_BAD_TEAM_ID;
1962 } while (unmapAddressRange
1963 && wait_if_address_range_is_wired(locker.AddressSpace(),
1964 (addr_t)*_address, size, &locker));
1966 // TODO: this only works for file systems that use the file cache
1967 VMCache* cache;
1968 status = vfs_get_vnode_cache(vnode, &cache, false);
1969 if (status < B_OK)
1970 return status;
1972 cache->Lock();
1974 VMArea* area;
1975 virtual_address_restrictions addressRestrictions = {};
1976 addressRestrictions.address = *_address;
1977 addressRestrictions.address_specification = addressSpec;
1978 status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
1979 0, protection, mapping,
1980 unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
1981 &addressRestrictions, kernel, &area, _address);
1983 if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
1984 // map_backing_store() cannot know we no longer need the ref
1985 cache->ReleaseRefLocked();
1988 if (status == B_OK && (protection & B_READ_AREA) != 0)
1989 pre_map_area_pages(area, cache, &reservation);
1991 cache->Unlock();
1993 if (status == B_OK) {
1994 // TODO: this probably deserves a smarter solution, ie. don't always
1995 // prefetch stuff, and also, probably don't trigger it at this place.
1996 cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
1997 // prefetches at max 10 MB starting from "offset"
2000 if (status != B_OK)
2001 return status;
2003 area->cache_type = CACHE_TYPE_VNODE;
2004 return area->id;
2008 area_id
2009 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2010 addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2011 int fd, off_t offset)
2013 if (!arch_vm_supports_protection(protection))
2014 return B_NOT_SUPPORTED;
2016 return _vm_map_file(aid, name, address, addressSpec, size, protection,
2017 mapping, unmapAddressRange, fd, offset, true);
2021 VMCache*
2022 vm_area_get_locked_cache(VMArea* area)
2024 rw_lock_read_lock(&sAreaCacheLock);
2026 while (true) {
2027 VMCache* cache = area->cache;
2029 if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2030 // cache has been deleted
2031 rw_lock_read_lock(&sAreaCacheLock);
2032 continue;
2035 rw_lock_read_lock(&sAreaCacheLock);
2037 if (cache == area->cache) {
2038 cache->AcquireRefLocked();
2039 rw_lock_read_unlock(&sAreaCacheLock);
2040 return cache;
2043 // the cache changed in the meantime
2044 cache->Unlock();
2049 void
2050 vm_area_put_locked_cache(VMCache* cache)
2052 cache->ReleaseRefAndUnlock();
2056 area_id
2057 vm_clone_area(team_id team, const char* name, void** address,
2058 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2059 bool kernel)
2061 VMArea* newArea = NULL;
2062 VMArea* sourceArea;
2064 // Check whether the source area exists and is cloneable. If so, mark it
2065 // B_SHARED_AREA, so that we don't get problems with copy-on-write.
2067 AddressSpaceWriteLocker locker;
2068 status_t status = locker.SetFromArea(sourceID, sourceArea);
2069 if (status != B_OK)
2070 return status;
2072 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2073 return B_NOT_ALLOWED;
2075 sourceArea->protection |= B_SHARED_AREA;
2076 protection |= B_SHARED_AREA;
2079 // Now lock both address spaces and actually do the cloning.
2081 MultiAddressSpaceLocker locker;
2082 VMAddressSpace* sourceAddressSpace;
2083 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2084 if (status != B_OK)
2085 return status;
2087 VMAddressSpace* targetAddressSpace;
2088 status = locker.AddTeam(team, true, &targetAddressSpace);
2089 if (status != B_OK)
2090 return status;
2092 status = locker.Lock();
2093 if (status != B_OK)
2094 return status;
2096 sourceArea = lookup_area(sourceAddressSpace, sourceID);
2097 if (sourceArea == NULL)
2098 return B_BAD_VALUE;
2100 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2101 return B_NOT_ALLOWED;
2103 VMCache* cache = vm_area_get_locked_cache(sourceArea);
2105 // TODO: for now, B_USER_CLONEABLE is disabled, until all drivers
2106 // have been adapted. Maybe it should be part of the kernel settings,
2107 // anyway (so that old drivers can always work).
2108 #if 0
2109 if (sourceArea->aspace == VMAddressSpace::Kernel()
2110 && addressSpace != VMAddressSpace::Kernel()
2111 && !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
2112 // kernel areas must not be cloned in userland, unless explicitly
2113 // declared user-cloneable upon construction
2114 status = B_NOT_ALLOWED;
2115 } else
2116 #endif
2117 if (sourceArea->cache_type == CACHE_TYPE_NULL)
2118 status = B_NOT_ALLOWED;
2119 else {
2120 virtual_address_restrictions addressRestrictions = {};
2121 addressRestrictions.address = *address;
2122 addressRestrictions.address_specification = addressSpec;
2123 status = map_backing_store(targetAddressSpace, cache,
2124 sourceArea->cache_offset, name, sourceArea->Size(),
2125 sourceArea->wiring, protection, mapping, 0, &addressRestrictions,
2126 kernel, &newArea, address);
2128 if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2129 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2130 // to create a new cache, and has therefore already acquired a reference
2131 // to the source cache - but otherwise it has no idea that we need
2132 // one.
2133 cache->AcquireRefLocked();
2135 if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2136 // we need to map in everything at this point
2137 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2138 // we don't have actual pages to map but a physical area
2139 VMTranslationMap* map
2140 = sourceArea->address_space->TranslationMap();
2141 map->Lock();
2143 phys_addr_t physicalAddress;
2144 uint32 oldProtection;
2145 map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2147 map->Unlock();
2149 map = targetAddressSpace->TranslationMap();
2150 size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2151 newArea->Base() + (newArea->Size() - 1));
2153 vm_page_reservation reservation;
2154 vm_page_reserve_pages(&reservation, reservePages,
2155 targetAddressSpace == VMAddressSpace::Kernel()
2156 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2157 map->Lock();
2159 for (addr_t offset = 0; offset < newArea->Size();
2160 offset += B_PAGE_SIZE) {
2161 map->Map(newArea->Base() + offset, physicalAddress + offset,
2162 protection, newArea->MemoryType(), &reservation);
2165 map->Unlock();
2166 vm_page_unreserve_pages(&reservation);
2167 } else {
2168 VMTranslationMap* map = targetAddressSpace->TranslationMap();
2169 size_t reservePages = map->MaxPagesNeededToMap(
2170 newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2171 vm_page_reservation reservation;
2172 vm_page_reserve_pages(&reservation, reservePages,
2173 targetAddressSpace == VMAddressSpace::Kernel()
2174 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2176 // map in all pages from source
2177 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2178 vm_page* page = it.Next();) {
2179 if (!page->busy) {
2180 DEBUG_PAGE_ACCESS_START(page);
2181 map_page(newArea, page,
2182 newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2183 - newArea->cache_offset),
2184 protection, &reservation);
2185 DEBUG_PAGE_ACCESS_END(page);
2188 // TODO: B_FULL_LOCK means that all pages are locked. We are not
2189 // ensuring that!
2191 vm_page_unreserve_pages(&reservation);
2194 if (status == B_OK)
2195 newArea->cache_type = sourceArea->cache_type;
2197 vm_area_put_locked_cache(cache);
2199 if (status < B_OK)
2200 return status;
2202 return newArea->id;
2206 /*! Deletes the specified area of the given address space.
2208 The address space must be write-locked.
2209 The caller must ensure that the area does not have any wired ranges.
2211 \param addressSpace The address space containing the area.
2212 \param area The area to be deleted.
2213 \param deletingAddressSpace \c true, if the address space is in the process
2214 of being deleted.
2216 static void
2217 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2218 bool deletingAddressSpace)
2220 ASSERT(!area->IsWired());
2222 VMAreaHash::Remove(area);
2224 // At this point the area is removed from the global hash table, but
2225 // still exists in the area list.
2227 // Unmap the virtual address space the area occupied.
2229 // We need to lock the complete cache chain.
2230 VMCache* topCache = vm_area_get_locked_cache(area);
2231 VMCacheChainLocker cacheChainLocker(topCache);
2232 cacheChainLocker.LockAllSourceCaches();
2234 // If the area's top cache is a temporary cache and the area is the only
2235 // one referencing it (besides us currently holding a second reference),
2236 // the unmapping code doesn't need to care about preserving the accessed
2237 // and dirty flags of the top cache page mappings.
2238 bool ignoreTopCachePageFlags
2239 = topCache->temporary && topCache->RefCount() == 2;
2241 area->address_space->TranslationMap()->UnmapArea(area,
2242 deletingAddressSpace, ignoreTopCachePageFlags);
2245 if (!area->cache->temporary)
2246 area->cache->WriteModified();
2248 uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2249 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2251 arch_vm_unset_memory_type(area);
2252 addressSpace->RemoveArea(area, allocationFlags);
2253 addressSpace->Put();
2255 area->cache->RemoveArea(area);
2256 area->cache->ReleaseRef();
2258 addressSpace->DeleteArea(area, allocationFlags);
2262 status_t
2263 vm_delete_area(team_id team, area_id id, bool kernel)
2265 TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2266 team, id));
2268 // lock the address space and make sure the area isn't wired
2269 AddressSpaceWriteLocker locker;
2270 VMArea* area;
2271 AreaCacheLocker cacheLocker;
2273 do {
2274 status_t status = locker.SetFromArea(team, id, area);
2275 if (status != B_OK)
2276 return status;
2278 cacheLocker.SetTo(area);
2279 } while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2281 cacheLocker.Unlock();
2283 if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2284 return B_NOT_ALLOWED;
2286 delete_area(locker.AddressSpace(), area, false);
2287 return B_OK;
2291 /*! Creates a new cache on top of given cache, moves all areas from
2292 the old cache to the new one, and changes the protection of all affected
2293 areas' pages to read-only. If requested, wired pages are moved up to the
2294 new cache and copies are added to the old cache in their place.
2295 Preconditions:
2296 - The given cache must be locked.
2297 - All of the cache's areas' address spaces must be read locked.
2298 - Either the cache must not have any wired ranges or a page reservation for
2299 all wired pages must be provided, so they can be copied.
2301 \param lowerCache The cache on top of which a new cache shall be created.
2302 \param wiredPagesReservation If \c NULL there must not be any wired pages
2303 in \a lowerCache. Otherwise as many pages must be reserved as the cache
2304 has wired page. The wired pages are copied in this case.
2306 static status_t
2307 vm_copy_on_write_area(VMCache* lowerCache,
2308 vm_page_reservation* wiredPagesReservation)
2310 VMCache* upperCache;
2312 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2314 // We need to separate the cache from its areas. The cache goes one level
2315 // deeper and we create a new cache inbetween.
2317 // create an anonymous cache
2318 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2319 lowerCache->GuardSize() / B_PAGE_SIZE,
2320 dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2321 VM_PRIORITY_USER);
2322 if (status != B_OK)
2323 return status;
2325 upperCache->Lock();
2327 upperCache->temporary = 1;
2328 upperCache->virtual_base = lowerCache->virtual_base;
2329 upperCache->virtual_end = lowerCache->virtual_end;
2331 // transfer the lower cache areas to the upper cache
2332 rw_lock_write_lock(&sAreaCacheLock);
2333 upperCache->TransferAreas(lowerCache);
2334 rw_lock_write_unlock(&sAreaCacheLock);
2336 lowerCache->AddConsumer(upperCache);
2338 // We now need to remap all pages from all of the cache's areas read-only,
2339 // so that a copy will be created on next write access. If there are wired
2340 // pages, we keep their protection, move them to the upper cache and create
2341 // copies for the lower cache.
2342 if (wiredPagesReservation != NULL) {
2343 // We need to handle wired pages -- iterate through the cache's pages.
2344 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2345 vm_page* page = it.Next();) {
2346 if (page->WiredCount() > 0) {
2347 // allocate a new page and copy the wired one
2348 vm_page* copiedPage = vm_page_allocate_page(
2349 wiredPagesReservation, PAGE_STATE_ACTIVE);
2351 vm_memcpy_physical_page(
2352 copiedPage->physical_page_number * B_PAGE_SIZE,
2353 page->physical_page_number * B_PAGE_SIZE);
2355 // move the wired page to the upper cache (note: removing is OK
2356 // with the SplayTree iterator) and insert the copy
2357 upperCache->MovePage(page);
2358 lowerCache->InsertPage(copiedPage,
2359 page->cache_offset * B_PAGE_SIZE);
2361 DEBUG_PAGE_ACCESS_END(copiedPage);
2362 } else {
2363 // Change the protection of this page in all areas.
2364 for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2365 tempArea = tempArea->cache_next) {
2366 // The area must be readable in the same way it was
2367 // previously writable.
2368 uint32 protection = B_KERNEL_READ_AREA;
2369 if ((tempArea->protection & B_READ_AREA) != 0)
2370 protection |= B_READ_AREA;
2372 VMTranslationMap* map
2373 = tempArea->address_space->TranslationMap();
2374 map->Lock();
2375 map->ProtectPage(tempArea,
2376 virtual_page_address(tempArea, page), protection);
2377 map->Unlock();
2381 } else {
2382 ASSERT(lowerCache->WiredPagesCount() == 0);
2384 // just change the protection of all areas
2385 for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2386 tempArea = tempArea->cache_next) {
2387 // The area must be readable in the same way it was previously
2388 // writable.
2389 uint32 protection = B_KERNEL_READ_AREA;
2390 if ((tempArea->protection & B_READ_AREA) != 0)
2391 protection |= B_READ_AREA;
2393 VMTranslationMap* map = tempArea->address_space->TranslationMap();
2394 map->Lock();
2395 map->ProtectArea(tempArea, protection);
2396 map->Unlock();
2400 vm_area_put_locked_cache(upperCache);
2402 return B_OK;
2406 area_id
2407 vm_copy_area(team_id team, const char* name, void** _address,
2408 uint32 addressSpec, uint32 protection, area_id sourceID)
2410 bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2412 if ((protection & B_KERNEL_PROTECTION) == 0) {
2413 // set the same protection for the kernel as for userland
2414 protection |= B_KERNEL_READ_AREA;
2415 if (writableCopy)
2416 protection |= B_KERNEL_WRITE_AREA;
2419 // Do the locking: target address space, all address spaces associated with
2420 // the source cache, and the cache itself.
2421 MultiAddressSpaceLocker locker;
2422 VMAddressSpace* targetAddressSpace;
2423 VMCache* cache;
2424 VMArea* source;
2425 AreaCacheLocker cacheLocker;
2426 status_t status;
2427 bool sharedArea;
2429 page_num_t wiredPages = 0;
2430 vm_page_reservation wiredPagesReservation;
2432 bool restart;
2433 do {
2434 restart = false;
2436 locker.Unset();
2437 status = locker.AddTeam(team, true, &targetAddressSpace);
2438 if (status == B_OK) {
2439 status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2440 &cache);
2442 if (status != B_OK)
2443 return status;
2445 cacheLocker.SetTo(cache, true); // already locked
2447 sharedArea = (source->protection & B_SHARED_AREA) != 0;
2449 page_num_t oldWiredPages = wiredPages;
2450 wiredPages = 0;
2452 // If the source area isn't shared, count the number of wired pages in
2453 // the cache and reserve as many pages.
2454 if (!sharedArea) {
2455 wiredPages = cache->WiredPagesCount();
2457 if (wiredPages > oldWiredPages) {
2458 cacheLocker.Unlock();
2459 locker.Unlock();
2461 if (oldWiredPages > 0)
2462 vm_page_unreserve_pages(&wiredPagesReservation);
2464 vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2465 VM_PRIORITY_USER);
2467 restart = true;
2469 } else if (oldWiredPages > 0)
2470 vm_page_unreserve_pages(&wiredPagesReservation);
2471 } while (restart);
2473 // unreserve pages later
2474 struct PagesUnreserver {
2475 PagesUnreserver(vm_page_reservation* reservation)
2477 fReservation(reservation)
2481 ~PagesUnreserver()
2483 if (fReservation != NULL)
2484 vm_page_unreserve_pages(fReservation);
2487 private:
2488 vm_page_reservation* fReservation;
2489 } pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2491 if (addressSpec == B_CLONE_ADDRESS) {
2492 addressSpec = B_EXACT_ADDRESS;
2493 *_address = (void*)source->Base();
2496 // First, create a cache on top of the source area, respectively use the
2497 // existing one, if this is a shared area.
2499 VMArea* target;
2500 virtual_address_restrictions addressRestrictions = {};
2501 addressRestrictions.address = *_address;
2502 addressRestrictions.address_specification = addressSpec;
2503 status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2504 name, source->Size(), source->wiring, protection,
2505 sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2506 writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2507 &addressRestrictions, true, &target, _address);
2508 if (status < B_OK)
2509 return status;
2511 if (sharedArea) {
2512 // The new area uses the old area's cache, but map_backing_store()
2513 // hasn't acquired a ref. So we have to do that now.
2514 cache->AcquireRefLocked();
2517 // If the source area is writable, we need to move it one layer up as well
2519 if (!sharedArea) {
2520 if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2521 // TODO: do something more useful if this fails!
2522 if (vm_copy_on_write_area(cache,
2523 wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2524 panic("vm_copy_on_write_area() failed!\n");
2529 // we return the ID of the newly created area
2530 return target->id;
2534 status_t
2535 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2536 bool kernel)
2538 fix_protection(&newProtection);
2540 TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2541 ", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2543 if (!arch_vm_supports_protection(newProtection))
2544 return B_NOT_SUPPORTED;
2546 bool becomesWritable
2547 = (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2549 // lock address spaces and cache
2550 MultiAddressSpaceLocker locker;
2551 VMCache* cache;
2552 VMArea* area;
2553 status_t status;
2554 AreaCacheLocker cacheLocker;
2555 bool isWritable;
2557 bool restart;
2558 do {
2559 restart = false;
2561 locker.Unset();
2562 status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2563 if (status != B_OK)
2564 return status;
2566 cacheLocker.SetTo(cache, true); // already locked
2568 if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2569 return B_NOT_ALLOWED;
2571 if (area->protection == newProtection)
2572 return B_OK;
2574 if (team != VMAddressSpace::KernelID()
2575 && area->address_space->ID() != team) {
2576 // unless you're the kernel, you are only allowed to set
2577 // the protection of your own areas
2578 return B_NOT_ALLOWED;
2581 isWritable
2582 = (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2584 // Make sure the area (respectively, if we're going to call
2585 // vm_copy_on_write_area(), all areas of the cache) doesn't have any
2586 // wired ranges.
2587 if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2588 for (VMArea* otherArea = cache->areas; otherArea != NULL;
2589 otherArea = otherArea->cache_next) {
2590 if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2591 restart = true;
2592 break;
2595 } else {
2596 if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2597 restart = true;
2599 } while (restart);
2601 bool changePageProtection = true;
2602 bool changeTopCachePagesOnly = false;
2604 if (isWritable && !becomesWritable) {
2605 // writable -> !writable
2607 if (cache->source != NULL && cache->temporary) {
2608 if (cache->CountWritableAreas(area) == 0) {
2609 // Since this cache now lives from the pages in its source cache,
2610 // we can change the cache's commitment to take only those pages
2611 // into account that really are in this cache.
2613 status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2614 team == VMAddressSpace::KernelID()
2615 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2617 // TODO: we may be able to join with our source cache, if
2618 // count == 0
2622 // If only the writability changes, we can just remap the pages of the
2623 // top cache, since the pages of lower caches are mapped read-only
2624 // anyway. That's advantageous only, if the number of pages in the cache
2625 // is significantly smaller than the number of pages in the area,
2626 // though.
2627 if (newProtection
2628 == (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2629 && cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2630 changeTopCachePagesOnly = true;
2632 } else if (!isWritable && becomesWritable) {
2633 // !writable -> writable
2635 if (!cache->consumers.IsEmpty()) {
2636 // There are consumers -- we have to insert a new cache. Fortunately
2637 // vm_copy_on_write_area() does everything that's needed.
2638 changePageProtection = false;
2639 status = vm_copy_on_write_area(cache, NULL);
2640 } else {
2641 // No consumers, so we don't need to insert a new one.
2642 if (cache->source != NULL && cache->temporary) {
2643 // the cache's commitment must contain all possible pages
2644 status = cache->Commit(cache->virtual_end - cache->virtual_base,
2645 team == VMAddressSpace::KernelID()
2646 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2649 if (status == B_OK && cache->source != NULL) {
2650 // There's a source cache, hence we can't just change all pages'
2651 // protection or we might allow writing into pages belonging to
2652 // a lower cache.
2653 changeTopCachePagesOnly = true;
2656 } else {
2657 // we don't have anything special to do in all other cases
2660 if (status == B_OK) {
2661 // remap existing pages in this cache
2662 if (changePageProtection) {
2663 VMTranslationMap* map = area->address_space->TranslationMap();
2664 map->Lock();
2666 if (changeTopCachePagesOnly) {
2667 page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2668 page_num_t lastPageOffset
2669 = firstPageOffset + area->Size() / B_PAGE_SIZE;
2670 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2671 vm_page* page = it.Next();) {
2672 if (page->cache_offset >= firstPageOffset
2673 && page->cache_offset <= lastPageOffset) {
2674 addr_t address = virtual_page_address(area, page);
2675 map->ProtectPage(area, address, newProtection);
2678 } else
2679 map->ProtectArea(area, newProtection);
2681 map->Unlock();
2684 area->protection = newProtection;
2687 return status;
2691 status_t
2692 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2694 VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2695 if (addressSpace == NULL)
2696 return B_BAD_TEAM_ID;
2698 VMTranslationMap* map = addressSpace->TranslationMap();
2700 map->Lock();
2701 uint32 dummyFlags;
2702 status_t status = map->Query(vaddr, paddr, &dummyFlags);
2703 map->Unlock();
2705 addressSpace->Put();
2706 return status;
2710 /*! The page's cache must be locked.
2712 bool
2713 vm_test_map_modification(vm_page* page)
2715 if (page->modified)
2716 return true;
2718 vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2719 vm_page_mapping* mapping;
2720 while ((mapping = iterator.Next()) != NULL) {
2721 VMArea* area = mapping->area;
2722 VMTranslationMap* map = area->address_space->TranslationMap();
2724 phys_addr_t physicalAddress;
2725 uint32 flags;
2726 map->Lock();
2727 map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2728 map->Unlock();
2730 if ((flags & PAGE_MODIFIED) != 0)
2731 return true;
2734 return false;
2738 /*! The page's cache must be locked.
2740 void
2741 vm_clear_map_flags(vm_page* page, uint32 flags)
2743 if ((flags & PAGE_ACCESSED) != 0)
2744 page->accessed = false;
2745 if ((flags & PAGE_MODIFIED) != 0)
2746 page->modified = false;
2748 vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2749 vm_page_mapping* mapping;
2750 while ((mapping = iterator.Next()) != NULL) {
2751 VMArea* area = mapping->area;
2752 VMTranslationMap* map = area->address_space->TranslationMap();
2754 map->Lock();
2755 map->ClearFlags(virtual_page_address(area, page), flags);
2756 map->Unlock();
2761 /*! Removes all mappings from a page.
2762 After you've called this function, the page is unmapped from memory and
2763 the page's \c accessed and \c modified flags have been updated according
2764 to the state of the mappings.
2765 The page's cache must be locked.
2767 void
2768 vm_remove_all_page_mappings(vm_page* page)
2770 while (vm_page_mapping* mapping = page->mappings.Head()) {
2771 VMArea* area = mapping->area;
2772 VMTranslationMap* map = area->address_space->TranslationMap();
2773 addr_t address = virtual_page_address(area, page);
2774 map->UnmapPage(area, address, false);
2779 int32
2780 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2782 int32 count = 0;
2784 vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2785 vm_page_mapping* mapping;
2786 while ((mapping = iterator.Next()) != NULL) {
2787 VMArea* area = mapping->area;
2788 VMTranslationMap* map = area->address_space->TranslationMap();
2790 bool modified;
2791 if (map->ClearAccessedAndModified(area,
2792 virtual_page_address(area, page), false, modified)) {
2793 count++;
2796 page->modified |= modified;
2800 if (page->accessed) {
2801 count++;
2802 page->accessed = false;
2805 return count;
2809 /*! Removes all mappings of a page and/or clears the accessed bits of the
2810 mappings.
2811 The function iterates through the page mappings and removes them until
2812 encountering one that has been accessed. From then on it will continue to
2813 iterate, but only clear the accessed flag of the mapping. The page's
2814 \c modified bit will be updated accordingly, the \c accessed bit will be
2815 cleared.
2816 \return The number of mapping accessed bits encountered, including the
2817 \c accessed bit of the page itself. If \c 0 is returned, all mappings
2818 of the page have been removed.
2820 int32
2821 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
2823 ASSERT(page->WiredCount() == 0);
2825 if (page->accessed)
2826 return vm_clear_page_mapping_accessed_flags(page);
2828 while (vm_page_mapping* mapping = page->mappings.Head()) {
2829 VMArea* area = mapping->area;
2830 VMTranslationMap* map = area->address_space->TranslationMap();
2831 addr_t address = virtual_page_address(area, page);
2832 bool modified = false;
2833 if (map->ClearAccessedAndModified(area, address, true, modified)) {
2834 page->accessed = true;
2835 page->modified |= modified;
2836 return vm_clear_page_mapping_accessed_flags(page);
2838 page->modified |= modified;
2841 return 0;
2845 static int
2846 display_mem(int argc, char** argv)
2848 bool physical = false;
2849 addr_t copyAddress;
2850 int32 displayWidth;
2851 int32 itemSize;
2852 int32 num = -1;
2853 addr_t address;
2854 int i = 1, j;
2856 if (argc > 1 && argv[1][0] == '-') {
2857 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2858 physical = true;
2859 i++;
2860 } else
2861 i = 99;
2864 if (argc < i + 1 || argc > i + 2) {
2865 kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
2866 "\tdl - 8 bytes\n"
2867 "\tdw - 4 bytes\n"
2868 "\tds - 2 bytes\n"
2869 "\tdb - 1 byte\n"
2870 "\tstring - a whole string\n"
2871 " -p or --physical only allows memory from a single page to be "
2872 "displayed.\n");
2873 return 0;
2876 address = parse_expression(argv[i]);
2878 if (argc > i + 1)
2879 num = parse_expression(argv[i + 1]);
2881 // build the format string
2882 if (strcmp(argv[0], "db") == 0) {
2883 itemSize = 1;
2884 displayWidth = 16;
2885 } else if (strcmp(argv[0], "ds") == 0) {
2886 itemSize = 2;
2887 displayWidth = 8;
2888 } else if (strcmp(argv[0], "dw") == 0) {
2889 itemSize = 4;
2890 displayWidth = 4;
2891 } else if (strcmp(argv[0], "dl") == 0) {
2892 itemSize = 8;
2893 displayWidth = 2;
2894 } else if (strcmp(argv[0], "string") == 0) {
2895 itemSize = 1;
2896 displayWidth = -1;
2897 } else {
2898 kprintf("display_mem called in an invalid way!\n");
2899 return 0;
2902 if (num <= 0)
2903 num = displayWidth;
2905 void* physicalPageHandle = NULL;
2907 if (physical) {
2908 int32 offset = address & (B_PAGE_SIZE - 1);
2909 if (num * itemSize + offset > B_PAGE_SIZE) {
2910 num = (B_PAGE_SIZE - offset) / itemSize;
2911 kprintf("NOTE: number of bytes has been cut to page size\n");
2914 address = ROUNDDOWN(address, B_PAGE_SIZE);
2916 if (vm_get_physical_page_debug(address, &copyAddress,
2917 &physicalPageHandle) != B_OK) {
2918 kprintf("getting the hardware page failed.");
2919 return 0;
2922 address += offset;
2923 copyAddress += offset;
2924 } else
2925 copyAddress = address;
2927 if (!strcmp(argv[0], "string")) {
2928 kprintf("%p \"", (char*)copyAddress);
2930 // string mode
2931 for (i = 0; true; i++) {
2932 char c;
2933 if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
2934 != B_OK
2935 || c == '\0') {
2936 break;
2939 if (c == '\n')
2940 kprintf("\\n");
2941 else if (c == '\t')
2942 kprintf("\\t");
2943 else {
2944 if (!isprint(c))
2945 c = '.';
2947 kprintf("%c", c);
2951 kprintf("\"\n");
2952 } else {
2953 // number mode
2954 for (i = 0; i < num; i++) {
2955 uint32 value;
2957 if ((i % displayWidth) == 0) {
2958 int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
2959 if (i != 0)
2960 kprintf("\n");
2962 kprintf("[0x%lx] ", address + i * itemSize);
2964 for (j = 0; j < displayed; j++) {
2965 char c;
2966 if (debug_memcpy(B_CURRENT_TEAM, &c,
2967 (char*)copyAddress + i * itemSize + j, 1) != B_OK) {
2968 displayed = j;
2969 break;
2971 if (!isprint(c))
2972 c = '.';
2974 kprintf("%c", c);
2976 if (num > displayWidth) {
2977 // make sure the spacing in the last line is correct
2978 for (j = displayed; j < displayWidth * itemSize; j++)
2979 kprintf(" ");
2981 kprintf(" ");
2984 if (debug_memcpy(B_CURRENT_TEAM, &value,
2985 (uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
2986 kprintf("read fault");
2987 break;
2990 switch (itemSize) {
2991 case 1:
2992 kprintf(" %02" B_PRIx8, *(uint8*)&value);
2993 break;
2994 case 2:
2995 kprintf(" %04" B_PRIx16, *(uint16*)&value);
2996 break;
2997 case 4:
2998 kprintf(" %08" B_PRIx32, *(uint32*)&value);
2999 break;
3000 case 8:
3001 kprintf(" %016" B_PRIx64, *(uint64*)&value);
3002 break;
3006 kprintf("\n");
3009 if (physical) {
3010 copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3011 vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3013 return 0;
3017 static void
3018 dump_cache_tree_recursively(VMCache* cache, int level,
3019 VMCache* highlightCache)
3021 // print this cache
3022 for (int i = 0; i < level; i++)
3023 kprintf(" ");
3024 if (cache == highlightCache)
3025 kprintf("%p <--\n", cache);
3026 else
3027 kprintf("%p\n", cache);
3029 // recursively print its consumers
3030 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3031 VMCache* consumer = it.Next();) {
3032 dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3037 static int
3038 dump_cache_tree(int argc, char** argv)
3040 if (argc != 2 || !strcmp(argv[1], "--help")) {
3041 kprintf("usage: %s <address>\n", argv[0]);
3042 return 0;
3045 addr_t address = parse_expression(argv[1]);
3046 if (address == 0)
3047 return 0;
3049 VMCache* cache = (VMCache*)address;
3050 VMCache* root = cache;
3052 // find the root cache (the transitive source)
3053 while (root->source != NULL)
3054 root = root->source;
3056 dump_cache_tree_recursively(root, 0, cache);
3058 return 0;
3062 const char*
3063 vm_cache_type_to_string(int32 type)
3065 switch (type) {
3066 case CACHE_TYPE_RAM:
3067 return "RAM";
3068 case CACHE_TYPE_DEVICE:
3069 return "device";
3070 case CACHE_TYPE_VNODE:
3071 return "vnode";
3072 case CACHE_TYPE_NULL:
3073 return "null";
3075 default:
3076 return "unknown";
3081 #if DEBUG_CACHE_LIST
3083 static void
3084 update_cache_info_recursively(VMCache* cache, cache_info& info)
3086 info.page_count += cache->page_count;
3087 if (cache->type == CACHE_TYPE_RAM)
3088 info.committed += cache->committed_size;
3090 // recurse
3091 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3092 VMCache* consumer = it.Next();) {
3093 update_cache_info_recursively(consumer, info);
3098 static int
3099 cache_info_compare_page_count(const void* _a, const void* _b)
3101 const cache_info* a = (const cache_info*)_a;
3102 const cache_info* b = (const cache_info*)_b;
3103 if (a->page_count == b->page_count)
3104 return 0;
3105 return a->page_count < b->page_count ? 1 : -1;
3109 static int
3110 cache_info_compare_committed(const void* _a, const void* _b)
3112 const cache_info* a = (const cache_info*)_a;
3113 const cache_info* b = (const cache_info*)_b;
3114 if (a->committed == b->committed)
3115 return 0;
3116 return a->committed < b->committed ? 1 : -1;
3120 static void
3121 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3123 for (int i = 0; i < level; i++)
3124 kprintf(" ");
3126 kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3127 "pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3128 cache->virtual_base, cache->virtual_end, cache->page_count);
3130 if (level == 0)
3131 kprintf("/%lu", info.page_count);
3133 if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3134 kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3136 if (level == 0)
3137 kprintf("/%lu", info.committed);
3140 // areas
3141 if (cache->areas != NULL) {
3142 VMArea* area = cache->areas;
3143 kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3144 area->name, area->address_space->ID());
3146 while (area->cache_next != NULL) {
3147 area = area->cache_next;
3148 kprintf(", %" B_PRId32, area->id);
3152 kputs("\n");
3154 // recurse
3155 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3156 VMCache* consumer = it.Next();) {
3157 dump_caches_recursively(consumer, info, level + 1);
3162 static int
3163 dump_caches(int argc, char** argv)
3165 if (sCacheInfoTable == NULL) {
3166 kprintf("No cache info table!\n");
3167 return 0;
3170 bool sortByPageCount = true;
3172 for (int32 i = 1; i < argc; i++) {
3173 if (strcmp(argv[i], "-c") == 0) {
3174 sortByPageCount = false;
3175 } else {
3176 print_debugger_command_usage(argv[0]);
3177 return 0;
3181 uint32 totalCount = 0;
3182 uint32 rootCount = 0;
3183 off_t totalCommitted = 0;
3184 page_num_t totalPages = 0;
3186 VMCache* cache = gDebugCacheList;
3187 while (cache) {
3188 totalCount++;
3189 if (cache->source == NULL) {
3190 cache_info stackInfo;
3191 cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3192 ? sCacheInfoTable[rootCount] : stackInfo;
3193 rootCount++;
3194 info.cache = cache;
3195 info.page_count = 0;
3196 info.committed = 0;
3197 update_cache_info_recursively(cache, info);
3198 totalCommitted += info.committed;
3199 totalPages += info.page_count;
3202 cache = cache->debug_next;
3205 if (rootCount <= (uint32)kCacheInfoTableCount) {
3206 qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3207 sortByPageCount
3208 ? &cache_info_compare_page_count
3209 : &cache_info_compare_committed);
3212 kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3213 B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3214 kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3215 "per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3216 "page count" : "committed size");
3218 if (rootCount <= (uint32)kCacheInfoTableCount) {
3219 for (uint32 i = 0; i < rootCount; i++) {
3220 cache_info& info = sCacheInfoTable[i];
3221 dump_caches_recursively(info.cache, info, 0);
3223 } else
3224 kprintf("Cache info table too small! Can't sort and print caches!\n");
3226 return 0;
3229 #endif // DEBUG_CACHE_LIST
3232 static int
3233 dump_cache(int argc, char** argv)
3235 VMCache* cache;
3236 bool showPages = false;
3237 int i = 1;
3239 if (argc < 2 || !strcmp(argv[1], "--help")) {
3240 kprintf("usage: %s [-ps] <address>\n"
3241 " if -p is specified, all pages are shown, if -s is used\n"
3242 " only the cache info is shown respectively.\n", argv[0]);
3243 return 0;
3245 while (argv[i][0] == '-') {
3246 char* arg = argv[i] + 1;
3247 while (arg[0]) {
3248 if (arg[0] == 'p')
3249 showPages = true;
3250 arg++;
3252 i++;
3254 if (argv[i] == NULL) {
3255 kprintf("%s: invalid argument, pass address\n", argv[0]);
3256 return 0;
3259 addr_t address = parse_expression(argv[i]);
3260 if (address == 0)
3261 return 0;
3263 cache = (VMCache*)address;
3265 cache->Dump(showPages);
3267 set_debug_variable("_sourceCache", (addr_t)cache->source);
3269 return 0;
3273 static void
3274 dump_area_struct(VMArea* area, bool mappings)
3276 kprintf("AREA: %p\n", area);
3277 kprintf("name:\t\t'%s'\n", area->name);
3278 kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3279 kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3280 kprintf("base:\t\t0x%lx\n", area->Base());
3281 kprintf("size:\t\t0x%lx\n", area->Size());
3282 kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3283 kprintf("wiring:\t\t0x%x\n", area->wiring);
3284 kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3285 kprintf("cache:\t\t%p\n", area->cache);
3286 kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3287 kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3288 kprintf("cache_next:\t%p\n", area->cache_next);
3289 kprintf("cache_prev:\t%p\n", area->cache_prev);
3291 VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3292 if (mappings) {
3293 kprintf("page mappings:\n");
3294 while (iterator.HasNext()) {
3295 vm_page_mapping* mapping = iterator.Next();
3296 kprintf(" %p", mapping->page);
3298 kprintf("\n");
3299 } else {
3300 uint32 count = 0;
3301 while (iterator.Next() != NULL) {
3302 count++;
3304 kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3309 static int
3310 dump_area(int argc, char** argv)
3312 bool mappings = false;
3313 bool found = false;
3314 int32 index = 1;
3315 VMArea* area;
3316 addr_t num;
3318 if (argc < 2 || !strcmp(argv[1], "--help")) {
3319 kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3320 "All areas matching either id/address/name are listed. You can\n"
3321 "force to check only a specific item by prefixing the specifier\n"
3322 "with the id/contains/address/name keywords.\n"
3323 "-m shows the area's mappings as well.\n");
3324 return 0;
3327 if (!strcmp(argv[1], "-m")) {
3328 mappings = true;
3329 index++;
3332 int32 mode = 0xf;
3333 if (!strcmp(argv[index], "id"))
3334 mode = 1;
3335 else if (!strcmp(argv[index], "contains"))
3336 mode = 2;
3337 else if (!strcmp(argv[index], "name"))
3338 mode = 4;
3339 else if (!strcmp(argv[index], "address"))
3340 mode = 0;
3341 if (mode != 0xf)
3342 index++;
3344 if (index >= argc) {
3345 kprintf("No area specifier given.\n");
3346 return 0;
3349 num = parse_expression(argv[index]);
3351 if (mode == 0) {
3352 dump_area_struct((struct VMArea*)num, mappings);
3353 } else {
3354 // walk through the area list, looking for the arguments as a name
3356 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3357 while ((area = it.Next()) != NULL) {
3358 if (((mode & 4) != 0 && area->name != NULL
3359 && !strcmp(argv[index], area->name))
3360 || (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3361 || (((mode & 2) != 0 && area->Base() <= num
3362 && area->Base() + area->Size() > num))))) {
3363 dump_area_struct(area, mappings);
3364 found = true;
3368 if (!found)
3369 kprintf("could not find area %s (%ld)\n", argv[index], num);
3372 return 0;
3376 static int
3377 dump_area_list(int argc, char** argv)
3379 VMArea* area;
3380 const char* name = NULL;
3381 int32 id = 0;
3383 if (argc > 1) {
3384 id = parse_expression(argv[1]);
3385 if (id == 0)
3386 name = argv[1];
3389 kprintf("%-*s id %-*s %-*sprotect lock name\n",
3390 B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3391 B_PRINTF_POINTER_WIDTH, "size");
3393 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3394 while ((area = it.Next()) != NULL) {
3395 if ((id != 0 && area->address_space->ID() != id)
3396 || (name != NULL && strstr(area->name, name) == NULL))
3397 continue;
3399 kprintf("%p %5" B_PRIx32 " %p %p %4" B_PRIx32 " %4d %s\n", area,
3400 area->id, (void*)area->Base(), (void*)area->Size(),
3401 area->protection, area->wiring, area->name);
3403 return 0;
3407 static int
3408 dump_available_memory(int argc, char** argv)
3410 kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3411 sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3412 return 0;
3416 static int
3417 dump_mapping_info(int argc, char** argv)
3419 bool reverseLookup = false;
3420 bool pageLookup = false;
3422 int argi = 1;
3423 for (; argi < argc && argv[argi][0] == '-'; argi++) {
3424 const char* arg = argv[argi];
3425 if (strcmp(arg, "-r") == 0) {
3426 reverseLookup = true;
3427 } else if (strcmp(arg, "-p") == 0) {
3428 reverseLookup = true;
3429 pageLookup = true;
3430 } else {
3431 print_debugger_command_usage(argv[0]);
3432 return 0;
3436 // We need at least one argument, the address. Optionally a thread ID can be
3437 // specified.
3438 if (argi >= argc || argi + 2 < argc) {
3439 print_debugger_command_usage(argv[0]);
3440 return 0;
3443 uint64 addressValue;
3444 if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3445 return 0;
3447 Team* team = NULL;
3448 if (argi < argc) {
3449 uint64 threadID;
3450 if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3451 return 0;
3453 Thread* thread = Thread::GetDebug(threadID);
3454 if (thread == NULL) {
3455 kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3456 return 0;
3459 team = thread->team;
3462 if (reverseLookup) {
3463 phys_addr_t physicalAddress;
3464 if (pageLookup) {
3465 vm_page* page = (vm_page*)(addr_t)addressValue;
3466 physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3467 } else {
3468 physicalAddress = (phys_addr_t)addressValue;
3469 physicalAddress -= physicalAddress % B_PAGE_SIZE;
3472 kprintf(" Team Virtual Address Area\n");
3473 kprintf("--------------------------------------\n");
3475 struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3476 Callback()
3478 fAddressSpace(NULL)
3482 void SetAddressSpace(VMAddressSpace* addressSpace)
3484 fAddressSpace = addressSpace;
3487 virtual bool HandleVirtualAddress(addr_t virtualAddress)
3489 kprintf("%8" B_PRId32 " %#18" B_PRIxADDR, fAddressSpace->ID(),
3490 virtualAddress);
3491 if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3492 kprintf(" %8" B_PRId32 " %s\n", area->id, area->name);
3493 else
3494 kprintf("\n");
3495 return false;
3498 private:
3499 VMAddressSpace* fAddressSpace;
3500 } callback;
3502 if (team != NULL) {
3503 // team specified -- get its address space
3504 VMAddressSpace* addressSpace = team->address_space;
3505 if (addressSpace == NULL) {
3506 kprintf("Failed to get address space!\n");
3507 return 0;
3510 callback.SetAddressSpace(addressSpace);
3511 addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3512 physicalAddress, callback);
3513 } else {
3514 // no team specified -- iterate through all address spaces
3515 for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3516 addressSpace != NULL;
3517 addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3518 callback.SetAddressSpace(addressSpace);
3519 addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3520 physicalAddress, callback);
3523 } else {
3524 // get the address space
3525 addr_t virtualAddress = (addr_t)addressValue;
3526 virtualAddress -= virtualAddress % B_PAGE_SIZE;
3527 VMAddressSpace* addressSpace;
3528 if (IS_KERNEL_ADDRESS(virtualAddress)) {
3529 addressSpace = VMAddressSpace::Kernel();
3530 } else if (team != NULL) {
3531 addressSpace = team->address_space;
3532 } else {
3533 Thread* thread = debug_get_debugged_thread();
3534 if (thread == NULL || thread->team == NULL) {
3535 kprintf("Failed to get team!\n");
3536 return 0;
3539 addressSpace = thread->team->address_space;
3542 if (addressSpace == NULL) {
3543 kprintf("Failed to get address space!\n");
3544 return 0;
3547 // let the translation map implementation do the job
3548 addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3551 return 0;
3555 /*! Deletes all areas and reserved regions in the given address space.
3557 The caller must ensure that none of the areas has any wired ranges.
3559 \param addressSpace The address space.
3560 \param deletingAddressSpace \c true, if the address space is in the process
3561 of being deleted.
3563 void
3564 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3566 TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3567 addressSpace->ID()));
3569 addressSpace->WriteLock();
3571 // remove all reserved areas in this address space
3572 addressSpace->UnreserveAllAddressRanges(0);
3574 // delete all the areas in this address space
3575 while (VMArea* area = addressSpace->FirstArea()) {
3576 ASSERT(!area->IsWired());
3577 delete_area(addressSpace, area, deletingAddressSpace);
3580 addressSpace->WriteUnlock();
3584 static area_id
3585 vm_area_for(addr_t address, bool kernel)
3587 team_id team;
3588 if (IS_USER_ADDRESS(address)) {
3589 // we try the user team address space, if any
3590 team = VMAddressSpace::CurrentID();
3591 if (team < 0)
3592 return team;
3593 } else
3594 team = VMAddressSpace::KernelID();
3596 AddressSpaceReadLocker locker(team);
3597 if (!locker.IsLocked())
3598 return B_BAD_TEAM_ID;
3600 VMArea* area = locker.AddressSpace()->LookupArea(address);
3601 if (area != NULL) {
3602 if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3603 return B_ERROR;
3605 return area->id;
3608 return B_ERROR;
3612 /*! Frees physical pages that were used during the boot process.
3613 \a end is inclusive.
3615 static void
3616 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3618 // free all physical pages in the specified range
3620 for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3621 phys_addr_t physicalAddress;
3622 uint32 flags;
3624 if (map->Query(current, &physicalAddress, &flags) == B_OK
3625 && (flags & PAGE_PRESENT) != 0) {
3626 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3627 if (page != NULL && page->State() != PAGE_STATE_FREE
3628 && page->State() != PAGE_STATE_CLEAR
3629 && page->State() != PAGE_STATE_UNUSED) {
3630 DEBUG_PAGE_ACCESS_START(page);
3631 vm_page_set_state(page, PAGE_STATE_FREE);
3636 // unmap the memory
3637 map->Unmap(start, end);
3641 void
3642 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3644 VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3645 addr_t end = start + (size - 1);
3646 addr_t lastEnd = start;
3648 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3649 (void*)start, (void*)end));
3651 // The areas are sorted in virtual address space order, so
3652 // we just have to find the holes between them that fall
3653 // into the area we should dispose
3655 map->Lock();
3657 for (VMAddressSpace::AreaIterator it
3658 = VMAddressSpace::Kernel()->GetAreaIterator();
3659 VMArea* area = it.Next();) {
3660 addr_t areaStart = area->Base();
3661 addr_t areaEnd = areaStart + (area->Size() - 1);
3663 if (areaEnd < start)
3664 continue;
3666 if (areaStart > end) {
3667 // we are done, the area is already beyond of what we have to free
3668 break;
3671 if (areaStart > lastEnd) {
3672 // this is something we can free
3673 TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3674 (void*)areaStart));
3675 unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3678 if (areaEnd >= end) {
3679 lastEnd = areaEnd;
3680 // no +1 to prevent potential overflow
3681 break;
3684 lastEnd = areaEnd + 1;
3687 if (lastEnd < end) {
3688 // we can also get rid of some space at the end of the area
3689 TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3690 (void*)end));
3691 unmap_and_free_physical_pages(map, lastEnd, end);
3694 map->Unlock();
3698 static void
3699 create_preloaded_image_areas(struct preloaded_image* _image)
3701 preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3702 char name[B_OS_NAME_LENGTH];
3703 void* address;
3704 int32 length;
3706 // use file name to create a good area name
3707 char* fileName = strrchr(image->name, '/');
3708 if (fileName == NULL)
3709 fileName = image->name;
3710 else
3711 fileName++;
3713 length = strlen(fileName);
3714 // make sure there is enough space for the suffix
3715 if (length > 25)
3716 length = 25;
3718 memcpy(name, fileName, length);
3719 strcpy(name + length, "_text");
3720 address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3721 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3722 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3723 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3724 // this will later be remapped read-only/executable by the
3725 // ELF initialization code
3727 strcpy(name + length, "_data");
3728 address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3729 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3730 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3731 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3735 /*! Frees all previously kernel arguments areas from the kernel_args structure.
3736 Any boot loader resources contained in that arguments must not be accessed
3737 anymore past this point.
3739 void
3740 vm_free_kernel_args(kernel_args* args)
3742 uint32 i;
3744 TRACE(("vm_free_kernel_args()\n"));
3746 for (i = 0; i < args->num_kernel_args_ranges; i++) {
3747 area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3748 if (area >= B_OK)
3749 delete_area(area);
3754 static void
3755 allocate_kernel_args(kernel_args* args)
3757 TRACE(("allocate_kernel_args()\n"));
3759 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3760 void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3762 create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3763 args->kernel_args_range[i].size, B_ALREADY_WIRED,
3764 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3769 static void
3770 unreserve_boot_loader_ranges(kernel_args* args)
3772 TRACE(("unreserve_boot_loader_ranges()\n"));
3774 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3775 vm_unreserve_address_range(VMAddressSpace::KernelID(),
3776 (void*)(addr_t)args->virtual_allocated_range[i].start,
3777 args->virtual_allocated_range[i].size);
3782 static void
3783 reserve_boot_loader_ranges(kernel_args* args)
3785 TRACE(("reserve_boot_loader_ranges()\n"));
3787 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3788 void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
3790 // If the address is no kernel address, we just skip it. The
3791 // architecture specific code has to deal with it.
3792 if (!IS_KERNEL_ADDRESS(address)) {
3793 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3794 B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
3795 continue;
3798 status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3799 &address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3800 if (status < B_OK)
3801 panic("could not reserve boot loader ranges\n");
3806 static addr_t
3807 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3809 size = PAGE_ALIGN(size);
3811 // find a slot in the virtual allocation addr range
3812 for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3813 // check to see if the space between this one and the last is big enough
3814 addr_t rangeStart = args->virtual_allocated_range[i].start;
3815 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3816 + args->virtual_allocated_range[i - 1].size;
3818 addr_t base = alignment > 0
3819 ? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3821 if (base >= KERNEL_BASE && base < rangeStart
3822 && rangeStart - base >= size) {
3823 args->virtual_allocated_range[i - 1].size
3824 += base + size - previousRangeEnd;
3825 return base;
3829 // we hadn't found one between allocation ranges. this is ok.
3830 // see if there's a gap after the last one
3831 int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
3832 addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
3833 + args->virtual_allocated_range[lastEntryIndex].size;
3834 addr_t base = alignment > 0
3835 ? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3836 if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
3837 args->virtual_allocated_range[lastEntryIndex].size
3838 += base + size - lastRangeEnd;
3839 return base;
3842 // see if there's a gap before the first one
3843 addr_t rangeStart = args->virtual_allocated_range[0].start;
3844 if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
3845 base = rangeStart - size;
3846 if (alignment > 0)
3847 base = ROUNDDOWN(base, alignment);
3849 if (base >= KERNEL_BASE) {
3850 args->virtual_allocated_range[0].start = base;
3851 args->virtual_allocated_range[0].size += rangeStart - base;
3852 return base;
3856 return 0;
3860 static bool
3861 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
3863 // TODO: horrible brute-force method of determining if the page can be
3864 // allocated
3865 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3866 if (address >= args->physical_memory_range[i].start
3867 && address < args->physical_memory_range[i].start
3868 + args->physical_memory_range[i].size)
3869 return true;
3871 return false;
3875 page_num_t
3876 vm_allocate_early_physical_page(kernel_args* args)
3878 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3879 phys_addr_t nextPage;
3881 nextPage = args->physical_allocated_range[i].start
3882 + args->physical_allocated_range[i].size;
3883 // see if the page after the next allocated paddr run can be allocated
3884 if (i + 1 < args->num_physical_allocated_ranges
3885 && args->physical_allocated_range[i + 1].size != 0) {
3886 // see if the next page will collide with the next allocated range
3887 if (nextPage >= args->physical_allocated_range[i+1].start)
3888 continue;
3890 // see if the next physical page fits in the memory block
3891 if (is_page_in_physical_memory_range(args, nextPage)) {
3892 // we got one!
3893 args->physical_allocated_range[i].size += B_PAGE_SIZE;
3894 return nextPage / B_PAGE_SIZE;
3898 // Expanding upwards didn't work, try going downwards.
3899 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3900 phys_addr_t nextPage;
3902 nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
3903 // see if the page after the prev allocated paddr run can be allocated
3904 if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
3905 // see if the next page will collide with the next allocated range
3906 if (nextPage < args->physical_allocated_range[i-1].start
3907 + args->physical_allocated_range[i-1].size)
3908 continue;
3910 // see if the next physical page fits in the memory block
3911 if (is_page_in_physical_memory_range(args, nextPage)) {
3912 // we got one!
3913 args->physical_allocated_range[i].start -= B_PAGE_SIZE;
3914 args->physical_allocated_range[i].size += B_PAGE_SIZE;
3915 return nextPage / B_PAGE_SIZE;
3919 return 0;
3920 // could not allocate a block
3924 /*! This one uses the kernel_args' physical and virtual memory ranges to
3925 allocate some pages before the VM is completely up.
3927 addr_t
3928 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
3929 uint32 attributes, addr_t alignment)
3931 if (physicalSize > virtualSize)
3932 physicalSize = virtualSize;
3934 // find the vaddr to allocate at
3935 addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
3936 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
3937 if (virtualBase == 0) {
3938 panic("vm_allocate_early: could not allocate virtual address\n");
3939 return 0;
3942 // map the pages
3943 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
3944 page_num_t physicalAddress = vm_allocate_early_physical_page(args);
3945 if (physicalAddress == 0)
3946 panic("error allocating early page!\n");
3948 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3950 arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
3951 physicalAddress * B_PAGE_SIZE, attributes,
3952 &vm_allocate_early_physical_page);
3955 return virtualBase;
3959 /*! The main entrance point to initialize the VM. */
3960 status_t
3961 vm_init(kernel_args* args)
3963 struct preloaded_image* image;
3964 void* address;
3965 status_t err = 0;
3966 uint32 i;
3968 TRACE(("vm_init: entry\n"));
3969 err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
3970 err = arch_vm_init(args);
3972 // initialize some globals
3973 vm_page_init_num_pages(args);
3974 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3976 slab_init(args);
3978 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3979 off_t heapSize = INITIAL_HEAP_SIZE;
3980 // try to accomodate low memory systems
3981 while (heapSize > sAvailableMemory / 8)
3982 heapSize /= 2;
3983 if (heapSize < 1024 * 1024)
3984 panic("vm_init: go buy some RAM please.");
3986 // map in the new heap and initialize it
3987 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
3988 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3989 TRACE(("heap at 0x%lx\n", heapBase));
3990 heap_init(heapBase, heapSize);
3991 #endif
3993 // initialize the free page list and physical page mapper
3994 vm_page_init(args);
3996 // initialize the cache allocators
3997 vm_cache_init(args);
4000 status_t error = VMAreaHash::Init();
4001 if (error != B_OK)
4002 panic("vm_init: error initializing area hash table\n");
4005 VMAddressSpace::Init();
4006 reserve_boot_loader_ranges(args);
4008 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4009 heap_init_post_area();
4010 #endif
4012 // Do any further initialization that the architecture dependant layers may
4013 // need now
4014 arch_vm_translation_map_init_post_area(args);
4015 arch_vm_init_post_area(args);
4016 vm_page_init_post_area(args);
4017 slab_init_post_area();
4019 // allocate areas to represent stuff that already exists
4021 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4022 address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4023 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4024 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4025 #endif
4027 allocate_kernel_args(args);
4029 create_preloaded_image_areas(args->kernel_image);
4031 // allocate areas for preloaded images
4032 for (image = args->preloaded_images; image != NULL; image = image->next)
4033 create_preloaded_image_areas(image);
4035 // allocate kernel stacks
4036 for (i = 0; i < args->num_cpus; i++) {
4037 char name[64];
4039 sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4040 address = (void*)args->cpu_kstack[i].start;
4041 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4042 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4045 void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4046 vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4048 #if PARANOID_KERNEL_MALLOC
4049 vm_block_address_range("uninitialized heap memory",
4050 (void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4051 #endif
4052 #if PARANOID_KERNEL_FREE
4053 vm_block_address_range("freed heap memory",
4054 (void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4055 #endif
4057 // create the object cache for the page mappings
4058 gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4059 sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4060 NULL, NULL);
4061 if (gPageMappingsObjectCache == NULL)
4062 panic("failed to create page mappings object cache");
4064 object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4066 #if DEBUG_CACHE_LIST
4067 if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4068 virtual_address_restrictions virtualRestrictions = {};
4069 virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4070 physical_address_restrictions physicalRestrictions = {};
4071 create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4072 ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4073 B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4074 CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4075 &physicalRestrictions, (void**)&sCacheInfoTable);
4077 #endif // DEBUG_CACHE_LIST
4079 // add some debugger commands
4080 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4081 add_debugger_command("area", &dump_area,
4082 "Dump info about a particular area");
4083 add_debugger_command("cache", &dump_cache, "Dump VMCache");
4084 add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4085 #if DEBUG_CACHE_LIST
4086 if (sCacheInfoTable != NULL) {
4087 add_debugger_command_etc("caches", &dump_caches,
4088 "List all VMCache trees",
4089 "[ \"-c\" ]\n"
4090 "All cache trees are listed sorted in decreasing order by number "
4091 "of\n"
4092 "used pages or, if \"-c\" is specified, by size of committed "
4093 "memory.\n",
4096 #endif
4097 add_debugger_command("avail", &dump_available_memory,
4098 "Dump available memory");
4099 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4100 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4101 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4102 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4103 add_debugger_command("string", &display_mem, "dump strings");
4105 add_debugger_command_etc("mapping", &dump_mapping_info,
4106 "Print address mapping information",
4107 "[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4108 "Prints low-level page mapping information for a given address. If\n"
4109 "neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4110 "address that is looked up in the translation map of the current\n"
4111 "team, respectively the team specified by thread ID <thread ID>. If\n"
4112 "\"-r\" is specified, <address> is a physical address that is\n"
4113 "searched in the translation map of all teams, respectively the team\n"
4114 "specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4115 "<address> is the address of a vm_page structure. The behavior is\n"
4116 "equivalent to specifying \"-r\" with the physical address of that\n"
4117 "page.\n",
4120 TRACE(("vm_init: exit\n"));
4122 vm_cache_init_post_heap();
4124 return err;
4128 status_t
4129 vm_init_post_sem(kernel_args* args)
4131 // This frees all unused boot loader resources and makes its space available
4132 // again
4133 arch_vm_init_end(args);
4134 unreserve_boot_loader_ranges(args);
4136 // fill in all of the semaphores that were not allocated before
4137 // since we're still single threaded and only the kernel address space
4138 // exists, it isn't that hard to find all of the ones we need to create
4140 arch_vm_translation_map_init_post_sem(args);
4142 slab_init_post_sem();
4144 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4145 heap_init_post_sem();
4146 #endif
4148 return B_OK;
4152 status_t
4153 vm_init_post_thread(kernel_args* args)
4155 vm_page_init_post_thread(args);
4156 slab_init_post_thread();
4157 return heap_init_post_thread();
4161 status_t
4162 vm_init_post_modules(kernel_args* args)
4164 return arch_vm_init_post_modules(args);
4168 void
4169 permit_page_faults(void)
4171 Thread* thread = thread_get_current_thread();
4172 if (thread != NULL)
4173 atomic_add(&thread->page_faults_allowed, 1);
4177 void
4178 forbid_page_faults(void)
4180 Thread* thread = thread_get_current_thread();
4181 if (thread != NULL)
4182 atomic_add(&thread->page_faults_allowed, -1);
4186 status_t
4187 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4188 bool isUser, addr_t* newIP)
4190 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4191 faultAddress));
4193 TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4195 addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4196 VMAddressSpace* addressSpace = NULL;
4198 status_t status = B_OK;
4199 *newIP = 0;
4200 atomic_add((int32*)&sPageFaults, 1);
4202 if (IS_KERNEL_ADDRESS(pageAddress)) {
4203 addressSpace = VMAddressSpace::GetKernel();
4204 } else if (IS_USER_ADDRESS(pageAddress)) {
4205 addressSpace = VMAddressSpace::GetCurrent();
4206 if (addressSpace == NULL) {
4207 if (!isUser) {
4208 dprintf("vm_page_fault: kernel thread accessing invalid user "
4209 "memory!\n");
4210 status = B_BAD_ADDRESS;
4211 TPF(PageFaultError(-1,
4212 VMPageFaultTracing
4213 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4214 } else {
4215 // XXX weird state.
4216 panic("vm_page_fault: non kernel thread accessing user memory "
4217 "that doesn't exist!\n");
4218 status = B_BAD_ADDRESS;
4221 } else {
4222 // the hit was probably in the 64k DMZ between kernel and user space
4223 // this keeps a user space thread from passing a buffer that crosses
4224 // into kernel space
4225 status = B_BAD_ADDRESS;
4226 TPF(PageFaultError(-1,
4227 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4230 if (status == B_OK) {
4231 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4232 isUser, NULL);
4235 if (status < B_OK) {
4236 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4237 "0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n",
4238 strerror(status), address, faultAddress, isWrite, isUser,
4239 thread_get_current_thread_id());
4240 if (!isUser) {
4241 Thread* thread = thread_get_current_thread();
4242 if (thread != NULL && thread->fault_handler != 0) {
4243 // this will cause the arch dependant page fault handler to
4244 // modify the IP on the interrupt frame or whatever to return
4245 // to this address
4246 *newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4247 } else {
4248 // unhandled page fault in the kernel
4249 panic("vm_page_fault: unhandled page fault in kernel space at "
4250 "0x%lx, ip 0x%lx\n", address, faultAddress);
4252 } else {
4253 #if 1
4254 // TODO: remove me once we have proper userland debugging support
4255 // (and tools)
4256 VMArea* area = NULL;
4257 if (addressSpace != NULL) {
4258 addressSpace->ReadLock();
4259 area = addressSpace->LookupArea(faultAddress);
4262 Thread* thread = thread_get_current_thread();
4263 dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4264 "\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4265 "(\"%s\" +%#lx)\n", thread->name, thread->id,
4266 thread->team->Name(), thread->team->id,
4267 isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4268 faultAddress, area ? area->name : "???", faultAddress - (area ?
4269 area->Base() : 0x0));
4271 // We can print a stack trace of the userland thread here.
4272 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page
4273 // fault and someone is already waiting for a write lock on the same address
4274 // space. This thread will then try to acquire the lock again and will
4275 // be queued after the writer.
4276 # if 0
4277 if (area) {
4278 struct stack_frame {
4279 #if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__)
4280 struct stack_frame* previous;
4281 void* return_address;
4282 #else
4283 // ...
4284 #warning writeme
4285 #endif
4286 } frame;
4287 # ifdef __INTEL__
4288 struct iframe* iframe = x86_get_user_iframe();
4289 if (iframe == NULL)
4290 panic("iframe is NULL!");
4292 status_t status = user_memcpy(&frame, (void*)iframe->ebp,
4293 sizeof(struct stack_frame));
4294 # elif defined(__POWERPC__)
4295 struct iframe* iframe = ppc_get_user_iframe();
4296 if (iframe == NULL)
4297 panic("iframe is NULL!");
4299 status_t status = user_memcpy(&frame, (void*)iframe->r1,
4300 sizeof(struct stack_frame));
4301 # else
4302 # warning "vm_page_fault() stack trace won't work"
4303 status = B_ERROR;
4304 # endif
4306 dprintf("stack trace:\n");
4307 int32 maxFrames = 50;
4308 while (status == B_OK && --maxFrames >= 0
4309 && frame.return_address != NULL) {
4310 dprintf(" %p", frame.return_address);
4311 area = addressSpace->LookupArea(
4312 (addr_t)frame.return_address);
4313 if (area) {
4314 dprintf(" (%s + %#lx)", area->name,
4315 (addr_t)frame.return_address - area->Base());
4317 dprintf("\n");
4319 status = user_memcpy(&frame, frame.previous,
4320 sizeof(struct stack_frame));
4323 # endif // 0 (stack trace)
4325 if (addressSpace != NULL)
4326 addressSpace->ReadUnlock();
4327 #endif
4329 // If the thread has a signal handler for SIGSEGV, we simply
4330 // send it the signal. Otherwise we notify the user debugger
4331 // first.
4332 struct sigaction action;
4333 if ((sigaction(SIGSEGV, NULL, &action) == 0
4334 && action.sa_handler != SIG_DFL
4335 && action.sa_handler != SIG_IGN)
4336 || user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4337 SIGSEGV)) {
4338 Signal signal(SIGSEGV,
4339 status == B_PERMISSION_DENIED
4340 ? SEGV_ACCERR : SEGV_MAPERR,
4341 EFAULT, thread->team->id);
4342 signal.SetAddress((void*)address);
4343 send_signal_to_thread(thread, signal, 0);
4348 if (addressSpace != NULL)
4349 addressSpace->Put();
4351 return B_HANDLED_INTERRUPT;
4355 struct PageFaultContext {
4356 AddressSpaceReadLocker addressSpaceLocker;
4357 VMCacheChainLocker cacheChainLocker;
4359 VMTranslationMap* map;
4360 VMCache* topCache;
4361 off_t cacheOffset;
4362 vm_page_reservation reservation;
4363 bool isWrite;
4365 // return values
4366 vm_page* page;
4367 bool restart;
4368 bool pageAllocated;
4371 PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4373 addressSpaceLocker(addressSpace, true),
4374 map(addressSpace->TranslationMap()),
4375 isWrite(isWrite)
4379 ~PageFaultContext()
4381 UnlockAll();
4382 vm_page_unreserve_pages(&reservation);
4385 void Prepare(VMCache* topCache, off_t cacheOffset)
4387 this->topCache = topCache;
4388 this->cacheOffset = cacheOffset;
4389 page = NULL;
4390 restart = false;
4391 pageAllocated = false;
4393 cacheChainLocker.SetTo(topCache);
4396 void UnlockAll(VMCache* exceptCache = NULL)
4398 topCache = NULL;
4399 addressSpaceLocker.Unlock();
4400 cacheChainLocker.Unlock(exceptCache);
4405 /*! Gets the page that should be mapped into the area.
4406 Returns an error code other than \c B_OK, if the page couldn't be found or
4407 paged in. The locking state of the address space and the caches is undefined
4408 in that case.
4409 Returns \c B_OK with \c context.restart set to \c true, if the functions
4410 had to unlock the address space and all caches and is supposed to be called
4411 again.
4412 Returns \c B_OK with \c context.restart set to \c false, if the page was
4413 found. It is returned in \c context.page. The address space will still be
4414 locked as well as all caches starting from the top cache to at least the
4415 cache the page lives in.
4417 static status_t
4418 fault_get_page(PageFaultContext& context)
4420 VMCache* cache = context.topCache;
4421 VMCache* lastCache = NULL;
4422 vm_page* page = NULL;
4424 while (cache != NULL) {
4425 // We already hold the lock of the cache at this point.
4427 lastCache = cache;
4429 page = cache->LookupPage(context.cacheOffset);
4430 if (page != NULL && page->busy) {
4431 // page must be busy -- wait for it to become unbusy
4432 context.UnlockAll(cache);
4433 cache->ReleaseRefLocked();
4434 cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4436 // restart the whole process
4437 context.restart = true;
4438 return B_OK;
4441 if (page != NULL)
4442 break;
4444 // The current cache does not contain the page we're looking for.
4446 // see if the backing store has it
4447 if (cache->HasPage(context.cacheOffset)) {
4448 // insert a fresh page and mark it busy -- we're going to read it in
4449 page = vm_page_allocate_page(&context.reservation,
4450 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4451 cache->InsertPage(page, context.cacheOffset);
4453 // We need to unlock all caches and the address space while reading
4454 // the page in. Keep a reference to the cache around.
4455 cache->AcquireRefLocked();
4456 context.UnlockAll();
4458 // read the page in
4459 generic_io_vec vec;
4460 vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4461 generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4463 status_t status = cache->Read(context.cacheOffset, &vec, 1,
4464 B_PHYSICAL_IO_REQUEST, &bytesRead);
4466 cache->Lock();
4468 if (status < B_OK) {
4469 // on error remove and free the page
4470 dprintf("reading page from cache %p returned: %s!\n",
4471 cache, strerror(status));
4473 cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4474 cache->RemovePage(page);
4475 vm_page_set_state(page, PAGE_STATE_FREE);
4477 cache->ReleaseRefAndUnlock();
4478 return status;
4481 // mark the page unbusy again
4482 cache->MarkPageUnbusy(page);
4484 DEBUG_PAGE_ACCESS_END(page);
4486 // Since we needed to unlock everything temporarily, the area
4487 // situation might have changed. So we need to restart the whole
4488 // process.
4489 cache->ReleaseRefAndUnlock();
4490 context.restart = true;
4491 return B_OK;
4494 cache = context.cacheChainLocker.LockSourceCache();
4497 if (page == NULL) {
4498 // There was no adequate page, determine the cache for a clean one.
4499 // Read-only pages come in the deepest cache, only the top most cache
4500 // may have direct write access.
4501 cache = context.isWrite ? context.topCache : lastCache;
4503 // allocate a clean page
4504 page = vm_page_allocate_page(&context.reservation,
4505 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4506 FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4507 page->physical_page_number));
4509 // insert the new page into our cache
4510 cache->InsertPage(page, context.cacheOffset);
4511 context.pageAllocated = true;
4512 } else if (page->Cache() != context.topCache && context.isWrite) {
4513 // We have a page that has the data we want, but in the wrong cache
4514 // object so we need to copy it and stick it into the top cache.
4515 vm_page* sourcePage = page;
4517 // TODO: If memory is low, it might be a good idea to steal the page
4518 // from our source cache -- if possible, that is.
4519 FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4520 page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4522 // To not needlessly kill concurrency we unlock all caches but the top
4523 // one while copying the page. Lacking another mechanism to ensure that
4524 // the source page doesn't disappear, we mark it busy.
4525 sourcePage->busy = true;
4526 context.cacheChainLocker.UnlockKeepRefs(true);
4528 // copy the page
4529 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4530 sourcePage->physical_page_number * B_PAGE_SIZE);
4532 context.cacheChainLocker.RelockCaches(true);
4533 sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4535 // insert the new page into our cache
4536 context.topCache->InsertPage(page, context.cacheOffset);
4537 context.pageAllocated = true;
4538 } else
4539 DEBUG_PAGE_ACCESS_START(page);
4541 context.page = page;
4542 return B_OK;
4546 /*! Makes sure the address in the given address space is mapped.
4548 \param addressSpace The address space.
4549 \param originalAddress The address. Doesn't need to be page aligned.
4550 \param isWrite If \c true the address shall be write-accessible.
4551 \param isUser If \c true the access is requested by a userland team.
4552 \param wirePage On success, if non \c NULL, the wired count of the page
4553 mapped at the given address is incremented and the page is returned
4554 via this parameter.
4555 \return \c B_OK on success, another error code otherwise.
4557 static status_t
4558 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4559 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4561 FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4562 "isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4563 originalAddress, isWrite, isUser));
4565 PageFaultContext context(addressSpace, isWrite);
4567 addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4568 status_t status = B_OK;
4570 addressSpace->IncrementFaultCount();
4572 // We may need up to 2 pages plus pages needed for mapping them -- reserving
4573 // the pages upfront makes sure we don't have any cache locked, so that the
4574 // page daemon/thief can do their job without problems.
4575 size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4576 originalAddress);
4577 context.addressSpaceLocker.Unlock();
4578 vm_page_reserve_pages(&context.reservation, reservePages,
4579 addressSpace == VMAddressSpace::Kernel()
4580 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4582 while (true) {
4583 context.addressSpaceLocker.Lock();
4585 // get the area the fault was in
4586 VMArea* area = addressSpace->LookupArea(address);
4587 if (area == NULL) {
4588 dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4589 "space\n", originalAddress);
4590 TPF(PageFaultError(-1,
4591 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4592 status = B_BAD_ADDRESS;
4593 break;
4596 // check permissions
4597 uint32 protection = get_area_page_protection(area, address);
4598 if (isUser && (protection & B_USER_PROTECTION) == 0) {
4599 dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4600 area->id, (void*)originalAddress);
4601 TPF(PageFaultError(area->id,
4602 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4603 status = B_PERMISSION_DENIED;
4604 break;
4606 if (isWrite && (protection
4607 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4608 dprintf("write access attempted on write-protected area 0x%"
4609 B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4610 TPF(PageFaultError(area->id,
4611 VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4612 status = B_PERMISSION_DENIED;
4613 break;
4614 } else if (isExecute && (protection
4615 & (B_EXECUTE_AREA
4616 | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4617 dprintf("instruction fetch attempted on execute-protected area 0x%"
4618 B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4619 TPF(PageFaultError(area->id,
4620 VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4621 status = B_PERMISSION_DENIED;
4622 break;
4623 } else if (!isWrite && !isExecute && (protection
4624 & (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4625 dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4626 " at %p\n", area->id, (void*)originalAddress);
4627 TPF(PageFaultError(area->id,
4628 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4629 status = B_PERMISSION_DENIED;
4630 break;
4633 // We have the area, it was a valid access, so let's try to resolve the
4634 // page fault now.
4635 // At first, the top most cache from the area is investigated.
4637 context.Prepare(vm_area_get_locked_cache(area),
4638 address - area->Base() + area->cache_offset);
4640 // See if this cache has a fault handler -- this will do all the work
4641 // for us.
4643 // Note, since the page fault is resolved with interrupts enabled,
4644 // the fault handler could be called more than once for the same
4645 // reason -- the store must take this into account.
4646 status = context.topCache->Fault(addressSpace, context.cacheOffset);
4647 if (status != B_BAD_HANDLER)
4648 break;
4651 // The top most cache has no fault handler, so let's see if the cache or
4652 // its sources already have the page we're searching for (we're going
4653 // from top to bottom).
4654 status = fault_get_page(context);
4655 if (status != B_OK) {
4656 TPF(PageFaultError(area->id, status));
4657 break;
4660 if (context.restart)
4661 continue;
4663 // All went fine, all there is left to do is to map the page into the
4664 // address space.
4665 TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4666 context.page));
4668 // If the page doesn't reside in the area's cache, we need to make sure
4669 // it's mapped in read-only, so that we cannot overwrite someone else's
4670 // data (copy-on-write)
4671 uint32 newProtection = protection;
4672 if (context.page->Cache() != context.topCache && !isWrite)
4673 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4675 bool unmapPage = false;
4676 bool mapPage = true;
4678 // check whether there's already a page mapped at the address
4679 context.map->Lock();
4681 phys_addr_t physicalAddress;
4682 uint32 flags;
4683 vm_page* mappedPage = NULL;
4684 if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4685 && (flags & PAGE_PRESENT) != 0
4686 && (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4687 != NULL) {
4688 // Yep there's already a page. If it's ours, we can simply adjust
4689 // its protection. Otherwise we have to unmap it.
4690 if (mappedPage == context.page) {
4691 context.map->ProtectPage(area, address, newProtection);
4692 // Note: We assume that ProtectPage() is atomic (i.e.
4693 // the page isn't temporarily unmapped), otherwise we'd have
4694 // to make sure it isn't wired.
4695 mapPage = false;
4696 } else
4697 unmapPage = true;
4700 context.map->Unlock();
4702 if (unmapPage) {
4703 // If the page is wired, we can't unmap it. Wait until it is unwired
4704 // again and restart. Note that the page cannot be wired for
4705 // writing, since it it isn't in the topmost cache. So we can safely
4706 // ignore ranges wired for writing (our own and other concurrent
4707 // wiring attempts in progress) and in fact have to do that to avoid
4708 // a deadlock.
4709 VMAreaUnwiredWaiter waiter;
4710 if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4711 VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4712 // unlock everything and wait
4713 if (context.pageAllocated) {
4714 // ... but since we allocated a page and inserted it into
4715 // the top cache, remove and free it first. Otherwise we'd
4716 // have a page from a lower cache mapped while an upper
4717 // cache has a page that would shadow it.
4718 context.topCache->RemovePage(context.page);
4719 vm_page_free_etc(context.topCache, context.page,
4720 &context.reservation);
4721 } else
4722 DEBUG_PAGE_ACCESS_END(context.page);
4724 context.UnlockAll();
4725 waiter.waitEntry.Wait();
4726 continue;
4729 // Note: The mapped page is a page of a lower cache. We are
4730 // guaranteed to have that cached locked, our new page is a copy of
4731 // that page, and the page is not busy. The logic for that guarantee
4732 // is as follows: Since the page is mapped, it must live in the top
4733 // cache (ruled out above) or any of its lower caches, and there is
4734 // (was before the new page was inserted) no other page in any
4735 // cache between the top cache and the page's cache (otherwise that
4736 // would be mapped instead). That in turn means that our algorithm
4737 // must have found it and therefore it cannot be busy either.
4738 DEBUG_PAGE_ACCESS_START(mappedPage);
4739 unmap_page(area, address);
4740 DEBUG_PAGE_ACCESS_END(mappedPage);
4743 if (mapPage) {
4744 if (map_page(area, context.page, address, newProtection,
4745 &context.reservation) != B_OK) {
4746 // Mapping can only fail, when the page mapping object couldn't
4747 // be allocated. Save for the missing mapping everything is
4748 // fine, though. If this was a regular page fault, we'll simply
4749 // leave and probably fault again. To make sure we'll have more
4750 // luck then, we ensure that the minimum object reserve is
4751 // available.
4752 DEBUG_PAGE_ACCESS_END(context.page);
4754 context.UnlockAll();
4756 if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4757 != B_OK) {
4758 // Apparently the situation is serious. Let's get ourselves
4759 // killed.
4760 status = B_NO_MEMORY;
4761 } else if (wirePage != NULL) {
4762 // The caller expects us to wire the page. Since
4763 // object_cache_reserve() succeeded, we should now be able
4764 // to allocate a mapping structure. Restart.
4765 continue;
4768 break;
4770 } else if (context.page->State() == PAGE_STATE_INACTIVE)
4771 vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4773 // also wire the page, if requested
4774 if (wirePage != NULL && status == B_OK) {
4775 increment_page_wired_count(context.page);
4776 *wirePage = context.page;
4779 DEBUG_PAGE_ACCESS_END(context.page);
4781 break;
4784 return status;
4788 status_t
4789 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4791 return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4794 status_t
4795 vm_put_physical_page(addr_t vaddr, void* handle)
4797 return sPhysicalPageMapper->PutPage(vaddr, handle);
4801 status_t
4802 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4803 void** _handle)
4805 return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4808 status_t
4809 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4811 return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4815 status_t
4816 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4818 return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4821 status_t
4822 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4824 return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4828 void
4829 vm_get_info(system_info* info)
4831 swap_get_info(info);
4833 MutexLocker locker(sAvailableMemoryLock);
4834 info->needed_memory = sNeededMemory;
4835 info->free_memory = sAvailableMemory;
4839 uint32
4840 vm_num_page_faults(void)
4842 return sPageFaults;
4846 off_t
4847 vm_available_memory(void)
4849 MutexLocker locker(sAvailableMemoryLock);
4850 return sAvailableMemory;
4854 off_t
4855 vm_available_not_needed_memory(void)
4857 MutexLocker locker(sAvailableMemoryLock);
4858 return sAvailableMemory - sNeededMemory;
4862 /*! Like vm_available_not_needed_memory(), but only for use in the kernel
4863 debugger.
4865 off_t
4866 vm_available_not_needed_memory_debug(void)
4868 return sAvailableMemory - sNeededMemory;
4872 size_t
4873 vm_kernel_address_space_left(void)
4875 return VMAddressSpace::Kernel()->FreeSpace();
4879 void
4880 vm_unreserve_memory(size_t amount)
4882 mutex_lock(&sAvailableMemoryLock);
4884 sAvailableMemory += amount;
4886 mutex_unlock(&sAvailableMemoryLock);
4890 status_t
4891 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4893 size_t reserve = kMemoryReserveForPriority[priority];
4895 MutexLocker locker(sAvailableMemoryLock);
4897 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4899 if (sAvailableMemory >= (off_t)(amount + reserve)) {
4900 sAvailableMemory -= amount;
4901 return B_OK;
4904 if (timeout <= 0)
4905 return B_NO_MEMORY;
4907 // turn timeout into an absolute timeout
4908 timeout += system_time();
4910 // loop until we've got the memory or the timeout occurs
4911 do {
4912 sNeededMemory += amount;
4914 // call the low resource manager
4915 locker.Unlock();
4916 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4917 B_ABSOLUTE_TIMEOUT, timeout);
4918 locker.Lock();
4920 sNeededMemory -= amount;
4922 if (sAvailableMemory >= (off_t)(amount + reserve)) {
4923 sAvailableMemory -= amount;
4924 return B_OK;
4926 } while (timeout > system_time());
4928 return B_NO_MEMORY;
4932 status_t
4933 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
4935 // NOTE: The caller is responsible for synchronizing calls to this function!
4937 AddressSpaceReadLocker locker;
4938 VMArea* area;
4939 status_t status = locker.SetFromArea(id, area);
4940 if (status != B_OK)
4941 return status;
4943 // nothing to do, if the type doesn't change
4944 uint32 oldType = area->MemoryType();
4945 if (type == oldType)
4946 return B_OK;
4948 // set the memory type of the area and the mapped pages
4949 VMTranslationMap* map = area->address_space->TranslationMap();
4950 map->Lock();
4951 area->SetMemoryType(type);
4952 map->ProtectArea(area, area->protection);
4953 map->Unlock();
4955 // set the physical memory type
4956 status_t error = arch_vm_set_memory_type(area, physicalBase, type);
4957 if (error != B_OK) {
4958 // reset the memory type of the area and the mapped pages
4959 map->Lock();
4960 area->SetMemoryType(oldType);
4961 map->ProtectArea(area, area->protection);
4962 map->Unlock();
4963 return error;
4966 return B_OK;
4971 /*! This function enforces some protection properties:
4972 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
4973 - if B_EXECUTE_AREA is set, B_KERNEL_EXECUTE_AREA is set as well
4974 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4975 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
4976 and B_KERNEL_WRITE_AREA.
4978 static void
4979 fix_protection(uint32* protection)
4981 if ((*protection & B_KERNEL_PROTECTION) == 0) {
4982 if ((*protection & B_USER_PROTECTION) == 0
4983 || (*protection & B_WRITE_AREA) != 0)
4984 *protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
4985 else
4986 *protection |= B_KERNEL_READ_AREA;
4987 if ((*protection & B_EXECUTE_AREA) != 0)
4988 *protection |= B_KERNEL_EXECUTE_AREA;
4993 static void
4994 fill_area_info(struct VMArea* area, area_info* info, size_t size)
4996 strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
4997 info->area = area->id;
4998 info->address = (void*)area->Base();
4999 info->size = area->Size();
5000 info->protection = area->protection;
5001 info->lock = B_FULL_LOCK;
5002 info->team = area->address_space->ID();
5003 info->copy_count = 0;
5004 info->in_count = 0;
5005 info->out_count = 0;
5006 // TODO: retrieve real values here!
5008 VMCache* cache = vm_area_get_locked_cache(area);
5010 // Note, this is a simplification; the cache could be larger than this area
5011 info->ram_size = cache->page_count * B_PAGE_SIZE;
5013 vm_area_put_locked_cache(cache);
5017 static status_t
5018 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5020 // is newSize a multiple of B_PAGE_SIZE?
5021 if (newSize & (B_PAGE_SIZE - 1))
5022 return B_BAD_VALUE;
5024 // lock all affected address spaces and the cache
5025 VMArea* area;
5026 VMCache* cache;
5028 MultiAddressSpaceLocker locker;
5029 AreaCacheLocker cacheLocker;
5031 status_t status;
5032 size_t oldSize;
5033 bool anyKernelArea;
5034 bool restart;
5036 do {
5037 anyKernelArea = false;
5038 restart = false;
5040 locker.Unset();
5041 status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5042 if (status != B_OK)
5043 return status;
5044 cacheLocker.SetTo(cache, true); // already locked
5046 // enforce restrictions
5047 if (!kernel) {
5048 if ((area->protection & B_KERNEL_AREA) != 0)
5049 return B_NOT_ALLOWED;
5050 // TODO: Enforce all restrictions (team, etc.)!
5053 oldSize = area->Size();
5054 if (newSize == oldSize)
5055 return B_OK;
5057 if (cache->type != CACHE_TYPE_RAM)
5058 return B_NOT_ALLOWED;
5060 if (oldSize < newSize) {
5061 // We need to check if all areas of this cache can be resized.
5062 for (VMArea* current = cache->areas; current != NULL;
5063 current = current->cache_next) {
5064 if (!current->address_space->CanResizeArea(current, newSize))
5065 return B_ERROR;
5066 anyKernelArea
5067 |= current->address_space == VMAddressSpace::Kernel();
5069 } else {
5070 // We're shrinking the areas, so we must make sure the affected
5071 // ranges are not wired.
5072 for (VMArea* current = cache->areas; current != NULL;
5073 current = current->cache_next) {
5074 anyKernelArea
5075 |= current->address_space == VMAddressSpace::Kernel();
5077 if (wait_if_area_range_is_wired(current,
5078 current->Base() + newSize, oldSize - newSize, &locker,
5079 &cacheLocker)) {
5080 restart = true;
5081 break;
5085 } while (restart);
5087 // Okay, looks good so far, so let's do it
5089 int priority = kernel && anyKernelArea
5090 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5091 uint32 allocationFlags = kernel && anyKernelArea
5092 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5094 if (oldSize < newSize) {
5095 // Growing the cache can fail, so we do it first.
5096 status = cache->Resize(cache->virtual_base + newSize, priority);
5097 if (status != B_OK)
5098 return status;
5101 for (VMArea* current = cache->areas; current != NULL;
5102 current = current->cache_next) {
5103 status = current->address_space->ResizeArea(current, newSize,
5104 allocationFlags);
5105 if (status != B_OK)
5106 break;
5108 // We also need to unmap all pages beyond the new size, if the area has
5109 // shrunk
5110 if (newSize < oldSize) {
5111 VMCacheChainLocker cacheChainLocker(cache);
5112 cacheChainLocker.LockAllSourceCaches();
5114 unmap_pages(current, current->Base() + newSize,
5115 oldSize - newSize);
5117 cacheChainLocker.Unlock(cache);
5121 if (status == B_OK) {
5122 // Shrink or grow individual page protections if in use.
5123 if (area->page_protections != NULL) {
5124 uint32 bytes = (newSize / B_PAGE_SIZE + 1) / 2;
5125 uint8* newProtections
5126 = (uint8*)realloc(area->page_protections, bytes);
5127 if (newProtections == NULL)
5128 status = B_NO_MEMORY;
5129 else {
5130 area->page_protections = newProtections;
5132 if (oldSize < newSize) {
5133 // init the additional page protections to that of the area
5134 uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
5135 uint32 areaProtection = area->protection
5136 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5137 memset(area->page_protections + offset,
5138 areaProtection | (areaProtection << 4), bytes - offset);
5139 if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5140 uint8& entry = area->page_protections[offset - 1];
5141 entry = (entry & 0x0f) | (areaProtection << 4);
5148 // shrinking the cache can't fail, so we do it now
5149 if (status == B_OK && newSize < oldSize)
5150 status = cache->Resize(cache->virtual_base + newSize, priority);
5152 if (status != B_OK) {
5153 // Something failed -- resize the areas back to their original size.
5154 // This can fail, too, in which case we're seriously screwed.
5155 for (VMArea* current = cache->areas; current != NULL;
5156 current = current->cache_next) {
5157 if (current->address_space->ResizeArea(current, oldSize,
5158 allocationFlags) != B_OK) {
5159 panic("vm_resize_area(): Failed and not being able to restore "
5160 "original state.");
5164 cache->Resize(cache->virtual_base + oldSize, priority);
5167 // TODO: we must honour the lock restrictions of this area
5168 return status;
5172 status_t
5173 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5175 return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5179 status_t
5180 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5182 return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5186 status_t
5187 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5188 bool user)
5190 return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5194 void
5195 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5197 return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5201 /*! Copies a range of memory directly from/to a page that might not be mapped
5202 at the moment.
5204 For \a unsafeMemory the current mapping (if any is ignored). The function
5205 walks through the respective area's cache chain to find the physical page
5206 and copies from/to it directly.
5207 The memory range starting at \a unsafeMemory with a length of \a size bytes
5208 must not cross a page boundary.
5210 \param teamID The team ID identifying the address space \a unsafeMemory is
5211 to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5212 (the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5213 is passed, the address space of the thread returned by
5214 debug_get_debugged_thread() is used.
5215 \param unsafeMemory The start of the unsafe memory range to be copied
5216 from/to.
5217 \param buffer A safely accessible kernel buffer to be copied from/to.
5218 \param size The number of bytes to be copied.
5219 \param copyToUnsafe If \c true, memory is copied from \a buffer to
5220 \a unsafeMemory, the other way around otherwise.
5222 status_t
5223 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5224 size_t size, bool copyToUnsafe)
5226 if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5227 != ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5228 return B_BAD_VALUE;
5231 // get the address space for the debugged thread
5232 VMAddressSpace* addressSpace;
5233 if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5234 addressSpace = VMAddressSpace::Kernel();
5235 } else if (teamID == B_CURRENT_TEAM) {
5236 Thread* thread = debug_get_debugged_thread();
5237 if (thread == NULL || thread->team == NULL)
5238 return B_BAD_ADDRESS;
5240 addressSpace = thread->team->address_space;
5241 } else
5242 addressSpace = VMAddressSpace::DebugGet(teamID);
5244 if (addressSpace == NULL)
5245 return B_BAD_ADDRESS;
5247 // get the area
5248 VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5249 if (area == NULL)
5250 return B_BAD_ADDRESS;
5252 // search the page
5253 off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5254 + area->cache_offset;
5255 VMCache* cache = area->cache;
5256 vm_page* page = NULL;
5257 while (cache != NULL) {
5258 page = cache->DebugLookupPage(cacheOffset);
5259 if (page != NULL)
5260 break;
5262 // Page not found in this cache -- if it is paged out, we must not try
5263 // to get it from lower caches.
5264 if (cache->DebugHasPage(cacheOffset))
5265 break;
5267 cache = cache->source;
5270 if (page == NULL)
5271 return B_UNSUPPORTED;
5273 // copy from/to physical memory
5274 phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5275 + (addr_t)unsafeMemory % B_PAGE_SIZE;
5277 if (copyToUnsafe) {
5278 if (page->Cache() != area->cache)
5279 return B_UNSUPPORTED;
5281 return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5284 return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5288 // #pragma mark - kernel public API
5291 status_t
5292 user_memcpy(void* to, const void* from, size_t size)
5294 // don't allow address overflows
5295 if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to)
5296 return B_BAD_ADDRESS;
5298 if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5299 return B_BAD_ADDRESS;
5301 return B_OK;
5305 /*! \brief Copies at most (\a size - 1) characters from the string in \a from to
5306 the string in \a to, NULL-terminating the result.
5308 \param to Pointer to the destination C-string.
5309 \param from Pointer to the source C-string.
5310 \param size Size in bytes of the string buffer pointed to by \a to.
5312 \return strlen(\a from).
5314 ssize_t
5315 user_strlcpy(char* to, const char* from, size_t size)
5317 if (to == NULL && size != 0)
5318 return B_BAD_VALUE;
5319 if (from == NULL)
5320 return B_BAD_ADDRESS;
5322 // limit size to avoid address overflows
5323 size_t maxSize = std::min((addr_t)size,
5324 ~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1);
5325 // NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from,
5326 // the source address might still overflow.
5328 ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5330 // If we hit the address overflow boundary, fail.
5331 if (result < 0 || (result >= 0 && (size_t)result >= maxSize
5332 && maxSize < size)) {
5333 return B_BAD_ADDRESS;
5336 return result;
5340 status_t
5341 user_memset(void* s, char c, size_t count)
5343 // don't allow address overflows
5344 if ((addr_t)s + count < (addr_t)s)
5345 return B_BAD_ADDRESS;
5346 if (arch_cpu_user_memset(s, c, count) < B_OK)
5347 return B_BAD_ADDRESS;
5349 return B_OK;
5353 /*! Wires a single page at the given address.
5355 \param team The team whose address space the address belongs to. Supports
5356 also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5357 parameter is ignored.
5358 \param address address The virtual address to wire down. Does not need to
5359 be page aligned.
5360 \param writable If \c true the page shall be writable.
5361 \param info On success the info is filled in, among other things
5362 containing the physical address the given virtual one translates to.
5363 \return \c B_OK, when the page could be wired, another error code otherwise.
5365 status_t
5366 vm_wire_page(team_id team, addr_t address, bool writable,
5367 VMPageWiringInfo* info)
5369 addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5370 info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5372 // compute the page protection that is required
5373 bool isUser = IS_USER_ADDRESS(address);
5374 uint32 requiredProtection = PAGE_PRESENT
5375 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5376 if (writable)
5377 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5379 // get and read lock the address space
5380 VMAddressSpace* addressSpace = NULL;
5381 if (isUser) {
5382 if (team == B_CURRENT_TEAM)
5383 addressSpace = VMAddressSpace::GetCurrent();
5384 else
5385 addressSpace = VMAddressSpace::Get(team);
5386 } else
5387 addressSpace = VMAddressSpace::GetKernel();
5388 if (addressSpace == NULL)
5389 return B_ERROR;
5391 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5393 VMTranslationMap* map = addressSpace->TranslationMap();
5394 status_t error = B_OK;
5396 // get the area
5397 VMArea* area = addressSpace->LookupArea(pageAddress);
5398 if (area == NULL) {
5399 addressSpace->Put();
5400 return B_BAD_ADDRESS;
5403 // Lock the area's top cache. This is a requirement for VMArea::Wire().
5404 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5406 // mark the area range wired
5407 area->Wire(&info->range);
5409 // Lock the area's cache chain and the translation map. Needed to look
5410 // up the page and play with its wired count.
5411 cacheChainLocker.LockAllSourceCaches();
5412 map->Lock();
5414 phys_addr_t physicalAddress;
5415 uint32 flags;
5416 vm_page* page;
5417 if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5418 && (flags & requiredProtection) == requiredProtection
5419 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5420 != NULL) {
5421 // Already mapped with the correct permissions -- just increment
5422 // the page's wired count.
5423 increment_page_wired_count(page);
5425 map->Unlock();
5426 cacheChainLocker.Unlock();
5427 addressSpaceLocker.Unlock();
5428 } else {
5429 // Let vm_soft_fault() map the page for us, if possible. We need
5430 // to fully unlock to avoid deadlocks. Since we have already
5431 // wired the area itself, nothing disturbing will happen with it
5432 // in the meantime.
5433 map->Unlock();
5434 cacheChainLocker.Unlock();
5435 addressSpaceLocker.Unlock();
5437 error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5438 isUser, &page);
5440 if (error != B_OK) {
5441 // The page could not be mapped -- clean up.
5442 VMCache* cache = vm_area_get_locked_cache(area);
5443 area->Unwire(&info->range);
5444 cache->ReleaseRefAndUnlock();
5445 addressSpace->Put();
5446 return error;
5450 info->physicalAddress
5451 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5452 + address % B_PAGE_SIZE;
5453 info->page = page;
5455 return B_OK;
5459 /*! Unwires a single page previously wired via vm_wire_page().
5461 \param info The same object passed to vm_wire_page() before.
5463 void
5464 vm_unwire_page(VMPageWiringInfo* info)
5466 // lock the address space
5467 VMArea* area = info->range.area;
5468 AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5469 // takes over our reference
5471 // lock the top cache
5472 VMCache* cache = vm_area_get_locked_cache(area);
5473 VMCacheChainLocker cacheChainLocker(cache);
5475 if (info->page->Cache() != cache) {
5476 // The page is not in the top cache, so we lock the whole cache chain
5477 // before touching the page's wired count.
5478 cacheChainLocker.LockAllSourceCaches();
5481 decrement_page_wired_count(info->page);
5483 // remove the wired range from the range
5484 area->Unwire(&info->range);
5486 cacheChainLocker.Unlock();
5490 /*! Wires down the given address range in the specified team's address space.
5492 If successful the function
5493 - acquires a reference to the specified team's address space,
5494 - adds respective wired ranges to all areas that intersect with the given
5495 address range,
5496 - makes sure all pages in the given address range are mapped with the
5497 requested access permissions and increments their wired count.
5499 It fails, when \a team doesn't specify a valid address space, when any part
5500 of the specified address range is not covered by areas, when the concerned
5501 areas don't allow mapping with the requested permissions, or when mapping
5502 failed for another reason.
5504 When successful the call must be balanced by a unlock_memory_etc() call with
5505 the exact same parameters.
5507 \param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5508 supported.
5509 \param address The start of the address range to be wired.
5510 \param numBytes The size of the address range to be wired.
5511 \param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5512 requests that the range must be wired writable ("read from device
5513 into memory").
5514 \return \c B_OK on success, another error code otherwise.
5516 status_t
5517 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5519 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5520 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5522 // compute the page protection that is required
5523 bool isUser = IS_USER_ADDRESS(address);
5524 bool writable = (flags & B_READ_DEVICE) == 0;
5525 uint32 requiredProtection = PAGE_PRESENT
5526 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5527 if (writable)
5528 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5530 uint32 mallocFlags = isUser
5531 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5533 // get and read lock the address space
5534 VMAddressSpace* addressSpace = NULL;
5535 if (isUser) {
5536 if (team == B_CURRENT_TEAM)
5537 addressSpace = VMAddressSpace::GetCurrent();
5538 else
5539 addressSpace = VMAddressSpace::Get(team);
5540 } else
5541 addressSpace = VMAddressSpace::GetKernel();
5542 if (addressSpace == NULL)
5543 return B_ERROR;
5545 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5546 // We get a new address space reference here. The one we got above will
5547 // be freed by unlock_memory_etc().
5549 VMTranslationMap* map = addressSpace->TranslationMap();
5550 status_t error = B_OK;
5552 // iterate through all concerned areas
5553 addr_t nextAddress = lockBaseAddress;
5554 while (nextAddress != lockEndAddress) {
5555 // get the next area
5556 VMArea* area = addressSpace->LookupArea(nextAddress);
5557 if (area == NULL) {
5558 error = B_BAD_ADDRESS;
5559 break;
5562 addr_t areaStart = nextAddress;
5563 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5565 // allocate the wired range (do that before locking the cache to avoid
5566 // deadlocks)
5567 VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5568 VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5569 if (range == NULL) {
5570 error = B_NO_MEMORY;
5571 break;
5574 // Lock the area's top cache. This is a requirement for VMArea::Wire().
5575 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5577 // mark the area range wired
5578 area->Wire(range);
5580 // Depending on the area cache type and the wiring, we may not need to
5581 // look at the individual pages.
5582 if (area->cache_type == CACHE_TYPE_NULL
5583 || area->cache_type == CACHE_TYPE_DEVICE
5584 || area->wiring == B_FULL_LOCK
5585 || area->wiring == B_CONTIGUOUS) {
5586 nextAddress = areaEnd;
5587 continue;
5590 // Lock the area's cache chain and the translation map. Needed to look
5591 // up pages and play with their wired count.
5592 cacheChainLocker.LockAllSourceCaches();
5593 map->Lock();
5595 // iterate through the pages and wire them
5596 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5597 phys_addr_t physicalAddress;
5598 uint32 flags;
5600 vm_page* page;
5601 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5602 && (flags & requiredProtection) == requiredProtection
5603 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5604 != NULL) {
5605 // Already mapped with the correct permissions -- just increment
5606 // the page's wired count.
5607 increment_page_wired_count(page);
5608 } else {
5609 // Let vm_soft_fault() map the page for us, if possible. We need
5610 // to fully unlock to avoid deadlocks. Since we have already
5611 // wired the area itself, nothing disturbing will happen with it
5612 // in the meantime.
5613 map->Unlock();
5614 cacheChainLocker.Unlock();
5615 addressSpaceLocker.Unlock();
5617 error = vm_soft_fault(addressSpace, nextAddress, writable,
5618 false, isUser, &page);
5620 addressSpaceLocker.Lock();
5621 cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5622 cacheChainLocker.LockAllSourceCaches();
5623 map->Lock();
5626 if (error != B_OK)
5627 break;
5630 map->Unlock();
5632 if (error == B_OK) {
5633 cacheChainLocker.Unlock();
5634 } else {
5635 // An error occurred, so abort right here. If the current address
5636 // is the first in this area, unwire the area, since we won't get
5637 // to it when reverting what we've done so far.
5638 if (nextAddress == areaStart) {
5639 area->Unwire(range);
5640 cacheChainLocker.Unlock();
5641 range->~VMAreaWiredRange();
5642 free_etc(range, mallocFlags);
5643 } else
5644 cacheChainLocker.Unlock();
5646 break;
5650 if (error != B_OK) {
5651 // An error occurred, so unwire all that we've already wired. Note that
5652 // even if not a single page was wired, unlock_memory_etc() is called
5653 // to put the address space reference.
5654 addressSpaceLocker.Unlock();
5655 unlock_memory_etc(team, (void*)lockBaseAddress,
5656 nextAddress - lockBaseAddress, flags);
5659 return error;
5663 status_t
5664 lock_memory(void* address, size_t numBytes, uint32 flags)
5666 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5670 /*! Unwires an address range previously wired with lock_memory_etc().
5672 Note that a call to this function must balance a previous lock_memory_etc()
5673 call with exactly the same parameters.
5675 status_t
5676 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5678 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5679 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5681 // compute the page protection that is required
5682 bool isUser = IS_USER_ADDRESS(address);
5683 bool writable = (flags & B_READ_DEVICE) == 0;
5684 uint32 requiredProtection = PAGE_PRESENT
5685 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5686 if (writable)
5687 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5689 uint32 mallocFlags = isUser
5690 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5692 // get and read lock the address space
5693 VMAddressSpace* addressSpace = NULL;
5694 if (isUser) {
5695 if (team == B_CURRENT_TEAM)
5696 addressSpace = VMAddressSpace::GetCurrent();
5697 else
5698 addressSpace = VMAddressSpace::Get(team);
5699 } else
5700 addressSpace = VMAddressSpace::GetKernel();
5701 if (addressSpace == NULL)
5702 return B_ERROR;
5704 AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5705 // Take over the address space reference. We don't unlock until we're
5706 // done.
5708 VMTranslationMap* map = addressSpace->TranslationMap();
5709 status_t error = B_OK;
5711 // iterate through all concerned areas
5712 addr_t nextAddress = lockBaseAddress;
5713 while (nextAddress != lockEndAddress) {
5714 // get the next area
5715 VMArea* area = addressSpace->LookupArea(nextAddress);
5716 if (area == NULL) {
5717 error = B_BAD_ADDRESS;
5718 break;
5721 addr_t areaStart = nextAddress;
5722 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5724 // Lock the area's top cache. This is a requirement for
5725 // VMArea::Unwire().
5726 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5728 // Depending on the area cache type and the wiring, we may not need to
5729 // look at the individual pages.
5730 if (area->cache_type == CACHE_TYPE_NULL
5731 || area->cache_type == CACHE_TYPE_DEVICE
5732 || area->wiring == B_FULL_LOCK
5733 || area->wiring == B_CONTIGUOUS) {
5734 // unwire the range (to avoid deadlocks we delete the range after
5735 // unlocking the cache)
5736 nextAddress = areaEnd;
5737 VMAreaWiredRange* range = area->Unwire(areaStart,
5738 areaEnd - areaStart, writable);
5739 cacheChainLocker.Unlock();
5740 if (range != NULL) {
5741 range->~VMAreaWiredRange();
5742 free_etc(range, mallocFlags);
5744 continue;
5747 // Lock the area's cache chain and the translation map. Needed to look
5748 // up pages and play with their wired count.
5749 cacheChainLocker.LockAllSourceCaches();
5750 map->Lock();
5752 // iterate through the pages and unwire them
5753 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5754 phys_addr_t physicalAddress;
5755 uint32 flags;
5757 vm_page* page;
5758 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5759 && (flags & PAGE_PRESENT) != 0
5760 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5761 != NULL) {
5762 // Already mapped with the correct permissions -- just increment
5763 // the page's wired count.
5764 decrement_page_wired_count(page);
5765 } else {
5766 panic("unlock_memory_etc(): Failed to unwire page: address "
5767 "space %p, address: %#" B_PRIxADDR, addressSpace,
5768 nextAddress);
5769 error = B_BAD_VALUE;
5770 break;
5774 map->Unlock();
5776 // All pages are unwired. Remove the area's wired range as well (to
5777 // avoid deadlocks we delete the range after unlocking the cache).
5778 VMAreaWiredRange* range = area->Unwire(areaStart,
5779 areaEnd - areaStart, writable);
5781 cacheChainLocker.Unlock();
5783 if (range != NULL) {
5784 range->~VMAreaWiredRange();
5785 free_etc(range, mallocFlags);
5788 if (error != B_OK)
5789 break;
5792 // get rid of the address space reference lock_memory_etc() acquired
5793 addressSpace->Put();
5795 return error;
5799 status_t
5800 unlock_memory(void* address, size_t numBytes, uint32 flags)
5802 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5806 /*! Similar to get_memory_map(), but also allows to specify the address space
5807 for the memory in question and has a saner semantics.
5808 Returns \c B_OK when the complete range could be translated or
5809 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5810 case the actual number of entries is written to \c *_numEntries. Any other
5811 error case indicates complete failure; \c *_numEntries will be set to \c 0
5812 in this case.
5814 status_t
5815 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5816 physical_entry* table, uint32* _numEntries)
5818 uint32 numEntries = *_numEntries;
5819 *_numEntries = 0;
5821 VMAddressSpace* addressSpace;
5822 addr_t virtualAddress = (addr_t)address;
5823 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5824 phys_addr_t physicalAddress;
5825 status_t status = B_OK;
5826 int32 index = -1;
5827 addr_t offset = 0;
5828 bool interrupts = are_interrupts_enabled();
5830 TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
5831 "entries)\n", team, address, numBytes, numEntries));
5833 if (numEntries == 0 || numBytes == 0)
5834 return B_BAD_VALUE;
5836 // in which address space is the address to be found?
5837 if (IS_USER_ADDRESS(virtualAddress)) {
5838 if (team == B_CURRENT_TEAM)
5839 addressSpace = VMAddressSpace::GetCurrent();
5840 else
5841 addressSpace = VMAddressSpace::Get(team);
5842 } else
5843 addressSpace = VMAddressSpace::GetKernel();
5845 if (addressSpace == NULL)
5846 return B_ERROR;
5848 VMTranslationMap* map = addressSpace->TranslationMap();
5850 if (interrupts)
5851 map->Lock();
5853 while (offset < numBytes) {
5854 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5855 uint32 flags;
5857 if (interrupts) {
5858 status = map->Query((addr_t)address + offset, &physicalAddress,
5859 &flags);
5860 } else {
5861 status = map->QueryInterrupt((addr_t)address + offset,
5862 &physicalAddress, &flags);
5864 if (status < B_OK)
5865 break;
5866 if ((flags & PAGE_PRESENT) == 0) {
5867 panic("get_memory_map() called on unmapped memory!");
5868 return B_BAD_ADDRESS;
5871 if (index < 0 && pageOffset > 0) {
5872 physicalAddress += pageOffset;
5873 if (bytes > B_PAGE_SIZE - pageOffset)
5874 bytes = B_PAGE_SIZE - pageOffset;
5877 // need to switch to the next physical_entry?
5878 if (index < 0 || table[index].address
5879 != physicalAddress - table[index].size) {
5880 if ((uint32)++index + 1 > numEntries) {
5881 // table to small
5882 break;
5884 table[index].address = physicalAddress;
5885 table[index].size = bytes;
5886 } else {
5887 // page does fit in current entry
5888 table[index].size += bytes;
5891 offset += bytes;
5894 if (interrupts)
5895 map->Unlock();
5897 if (status != B_OK)
5898 return status;
5900 if ((uint32)index + 1 > numEntries) {
5901 *_numEntries = index;
5902 return B_BUFFER_OVERFLOW;
5905 *_numEntries = index + 1;
5906 return B_OK;
5910 /*! According to the BeBook, this function should always succeed.
5911 This is no longer the case.
5913 extern "C" int32
5914 __get_memory_map_haiku(const void* address, size_t numBytes,
5915 physical_entry* table, int32 numEntries)
5917 uint32 entriesRead = numEntries;
5918 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5919 table, &entriesRead);
5920 if (error != B_OK)
5921 return error;
5923 // close the entry list
5925 // if it's only one entry, we will silently accept the missing ending
5926 if (numEntries == 1)
5927 return B_OK;
5929 if (entriesRead + 1 > (uint32)numEntries)
5930 return B_BUFFER_OVERFLOW;
5932 table[entriesRead].address = 0;
5933 table[entriesRead].size = 0;
5935 return B_OK;
5939 area_id
5940 area_for(void* address)
5942 return vm_area_for((addr_t)address, true);
5946 area_id
5947 find_area(const char* name)
5949 return VMAreaHash::Find(name);
5953 status_t
5954 _get_area_info(area_id id, area_info* info, size_t size)
5956 if (size != sizeof(area_info) || info == NULL)
5957 return B_BAD_VALUE;
5959 AddressSpaceReadLocker locker;
5960 VMArea* area;
5961 status_t status = locker.SetFromArea(id, area);
5962 if (status != B_OK)
5963 return status;
5965 fill_area_info(area, info, size);
5966 return B_OK;
5970 status_t
5971 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
5973 addr_t nextBase = *(addr_t*)cookie;
5975 // we're already through the list
5976 if (nextBase == (addr_t)-1)
5977 return B_ENTRY_NOT_FOUND;
5979 if (team == B_CURRENT_TEAM)
5980 team = team_get_current_team_id();
5982 AddressSpaceReadLocker locker(team);
5983 if (!locker.IsLocked())
5984 return B_BAD_TEAM_ID;
5986 VMArea* area;
5987 for (VMAddressSpace::AreaIterator it
5988 = locker.AddressSpace()->GetAreaIterator();
5989 (area = it.Next()) != NULL;) {
5990 if (area->Base() > nextBase)
5991 break;
5994 if (area == NULL) {
5995 nextBase = (addr_t)-1;
5996 return B_ENTRY_NOT_FOUND;
5999 fill_area_info(area, info, size);
6000 *cookie = (ssize_t)(area->Base());
6002 return B_OK;
6006 status_t
6007 set_area_protection(area_id area, uint32 newProtection)
6009 return vm_set_area_protection(VMAddressSpace::KernelID(), area,
6010 newProtection, true);
6014 status_t
6015 resize_area(area_id areaID, size_t newSize)
6017 return vm_resize_area(areaID, newSize, true);
6021 /*! Transfers the specified area to a new team. The caller must be the owner
6022 of the area.
6024 area_id
6025 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6026 bool kernel)
6028 area_info info;
6029 status_t status = get_area_info(id, &info);
6030 if (status != B_OK)
6031 return status;
6033 if (info.team != thread_get_current_thread()->team->id)
6034 return B_PERMISSION_DENIED;
6036 area_id clonedArea = vm_clone_area(target, info.name, _address,
6037 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6038 if (clonedArea < 0)
6039 return clonedArea;
6041 status = vm_delete_area(info.team, id, kernel);
6042 if (status != B_OK) {
6043 vm_delete_area(target, clonedArea, kernel);
6044 return status;
6047 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6049 return clonedArea;
6053 extern "C" area_id
6054 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6055 size_t numBytes, uint32 addressSpec, uint32 protection,
6056 void** _virtualAddress)
6058 if (!arch_vm_supports_protection(protection))
6059 return B_NOT_SUPPORTED;
6061 fix_protection(&protection);
6063 return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6064 _virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6065 false);
6069 area_id
6070 clone_area(const char* name, void** _address, uint32 addressSpec,
6071 uint32 protection, area_id source)
6073 if ((protection & B_KERNEL_PROTECTION) == 0)
6074 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6076 return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6077 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6081 area_id
6082 create_area_etc(team_id team, const char* name, uint32 size, uint32 lock,
6083 uint32 protection, uint32 flags, uint32 guardSize,
6084 const virtual_address_restrictions* virtualAddressRestrictions,
6085 const physical_address_restrictions* physicalAddressRestrictions,
6086 void** _address)
6088 fix_protection(&protection);
6090 return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6091 guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6092 true, _address);
6096 extern "C" area_id
6097 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6098 size_t size, uint32 lock, uint32 protection)
6100 fix_protection(&protection);
6102 virtual_address_restrictions virtualRestrictions = {};
6103 virtualRestrictions.address = *_address;
6104 virtualRestrictions.address_specification = addressSpec;
6105 physical_address_restrictions physicalRestrictions = {};
6106 return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6107 lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6108 true, _address);
6112 status_t
6113 delete_area(area_id area)
6115 return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6119 // #pragma mark - Userland syscalls
6122 status_t
6123 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6124 addr_t size)
6126 // filter out some unavailable values (for userland)
6127 switch (addressSpec) {
6128 case B_ANY_KERNEL_ADDRESS:
6129 case B_ANY_KERNEL_BLOCK_ADDRESS:
6130 return B_BAD_VALUE;
6133 addr_t address;
6135 if (!IS_USER_ADDRESS(userAddress)
6136 || user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6137 return B_BAD_ADDRESS;
6139 status_t status = vm_reserve_address_range(
6140 VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6141 RESERVED_AVOID_BASE);
6142 if (status != B_OK)
6143 return status;
6145 if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6146 vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6147 (void*)address, size);
6148 return B_BAD_ADDRESS;
6151 return B_OK;
6155 status_t
6156 _user_unreserve_address_range(addr_t address, addr_t size)
6158 return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6159 (void*)address, size);
6163 area_id
6164 _user_area_for(void* address)
6166 return vm_area_for((addr_t)address, false);
6170 area_id
6171 _user_find_area(const char* userName)
6173 char name[B_OS_NAME_LENGTH];
6175 if (!IS_USER_ADDRESS(userName)
6176 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6177 return B_BAD_ADDRESS;
6179 return find_area(name);
6183 status_t
6184 _user_get_area_info(area_id area, area_info* userInfo)
6186 if (!IS_USER_ADDRESS(userInfo))
6187 return B_BAD_ADDRESS;
6189 area_info info;
6190 status_t status = get_area_info(area, &info);
6191 if (status < B_OK)
6192 return status;
6194 // TODO: do we want to prevent userland from seeing kernel protections?
6195 //info.protection &= B_USER_PROTECTION;
6197 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6198 return B_BAD_ADDRESS;
6200 return status;
6204 status_t
6205 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6207 ssize_t cookie;
6209 if (!IS_USER_ADDRESS(userCookie)
6210 || !IS_USER_ADDRESS(userInfo)
6211 || user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6212 return B_BAD_ADDRESS;
6214 area_info info;
6215 status_t status = _get_next_area_info(team, &cookie, &info,
6216 sizeof(area_info));
6217 if (status != B_OK)
6218 return status;
6220 //info.protection &= B_USER_PROTECTION;
6222 if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6223 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6224 return B_BAD_ADDRESS;
6226 return status;
6230 status_t
6231 _user_set_area_protection(area_id area, uint32 newProtection)
6233 if ((newProtection & ~B_USER_PROTECTION) != 0)
6234 return B_BAD_VALUE;
6236 return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6237 newProtection, false);
6241 status_t
6242 _user_resize_area(area_id area, size_t newSize)
6244 // TODO: Since we restrict deleting of areas to those owned by the team,
6245 // we should also do that for resizing (check other functions, too).
6246 return vm_resize_area(area, newSize, false);
6250 area_id
6251 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6252 team_id target)
6254 // filter out some unavailable values (for userland)
6255 switch (addressSpec) {
6256 case B_ANY_KERNEL_ADDRESS:
6257 case B_ANY_KERNEL_BLOCK_ADDRESS:
6258 return B_BAD_VALUE;
6261 void* address;
6262 if (!IS_USER_ADDRESS(userAddress)
6263 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6264 return B_BAD_ADDRESS;
6266 area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6267 if (newArea < B_OK)
6268 return newArea;
6270 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6271 return B_BAD_ADDRESS;
6273 return newArea;
6277 area_id
6278 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6279 uint32 protection, area_id sourceArea)
6281 char name[B_OS_NAME_LENGTH];
6282 void* address;
6284 // filter out some unavailable values (for userland)
6285 switch (addressSpec) {
6286 case B_ANY_KERNEL_ADDRESS:
6287 case B_ANY_KERNEL_BLOCK_ADDRESS:
6288 return B_BAD_VALUE;
6290 if ((protection & ~B_USER_AREA_FLAGS) != 0)
6291 return B_BAD_VALUE;
6293 if (!IS_USER_ADDRESS(userName)
6294 || !IS_USER_ADDRESS(userAddress)
6295 || user_strlcpy(name, userName, sizeof(name)) < B_OK
6296 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6297 return B_BAD_ADDRESS;
6299 fix_protection(&protection);
6301 area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6302 &address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6303 false);
6304 if (clonedArea < B_OK)
6305 return clonedArea;
6307 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6308 delete_area(clonedArea);
6309 return B_BAD_ADDRESS;
6312 return clonedArea;
6316 area_id
6317 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6318 size_t size, uint32 lock, uint32 protection)
6320 char name[B_OS_NAME_LENGTH];
6321 void* address;
6323 // filter out some unavailable values (for userland)
6324 switch (addressSpec) {
6325 case B_ANY_KERNEL_ADDRESS:
6326 case B_ANY_KERNEL_BLOCK_ADDRESS:
6327 return B_BAD_VALUE;
6329 if ((protection & ~B_USER_AREA_FLAGS) != 0)
6330 return B_BAD_VALUE;
6332 if (!IS_USER_ADDRESS(userName)
6333 || !IS_USER_ADDRESS(userAddress)
6334 || user_strlcpy(name, userName, sizeof(name)) < B_OK
6335 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6336 return B_BAD_ADDRESS;
6338 if (addressSpec == B_EXACT_ADDRESS
6339 && IS_KERNEL_ADDRESS(address))
6340 return B_BAD_VALUE;
6342 if (addressSpec == B_ANY_ADDRESS)
6343 addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6344 if (addressSpec == B_BASE_ADDRESS)
6345 addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6347 fix_protection(&protection);
6349 virtual_address_restrictions virtualRestrictions = {};
6350 virtualRestrictions.address = address;
6351 virtualRestrictions.address_specification = addressSpec;
6352 physical_address_restrictions physicalRestrictions = {};
6353 area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6354 size, lock, protection, 0, 0, &virtualRestrictions,
6355 &physicalRestrictions, false, &address);
6357 if (area >= B_OK
6358 && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6359 delete_area(area);
6360 return B_BAD_ADDRESS;
6363 return area;
6367 status_t
6368 _user_delete_area(area_id area)
6370 // Unlike the BeOS implementation, you can now only delete areas
6371 // that you have created yourself from userland.
6372 // The documentation to delete_area() explicitly states that this
6373 // will be restricted in the future, and so it will.
6374 return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6378 // TODO: create a BeOS style call for this!
6380 area_id
6381 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6382 size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6383 int fd, off_t offset)
6385 char name[B_OS_NAME_LENGTH];
6386 void* address;
6387 area_id area;
6389 if ((protection & ~B_USER_AREA_FLAGS) != 0)
6390 return B_BAD_VALUE;
6392 fix_protection(&protection);
6394 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6395 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6396 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6397 return B_BAD_ADDRESS;
6399 if (addressSpec == B_EXACT_ADDRESS) {
6400 if ((addr_t)address + size < (addr_t)address
6401 || (addr_t)address % B_PAGE_SIZE != 0) {
6402 return B_BAD_VALUE;
6404 if (!IS_USER_ADDRESS(address)
6405 || !IS_USER_ADDRESS((addr_t)address + size)) {
6406 return B_BAD_ADDRESS;
6410 area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6411 addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6412 false);
6413 if (area < B_OK)
6414 return area;
6416 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6417 return B_BAD_ADDRESS;
6419 return area;
6423 status_t
6424 _user_unmap_memory(void* _address, size_t size)
6426 addr_t address = (addr_t)_address;
6428 // check params
6429 if (size == 0 || (addr_t)address + size < (addr_t)address
6430 || (addr_t)address % B_PAGE_SIZE != 0) {
6431 return B_BAD_VALUE;
6434 if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6435 return B_BAD_ADDRESS;
6437 // Write lock the address space and ensure the address range is not wired.
6438 AddressSpaceWriteLocker locker;
6439 do {
6440 status_t status = locker.SetTo(team_get_current_team_id());
6441 if (status != B_OK)
6442 return status;
6443 } while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6444 size, &locker));
6446 // unmap
6447 return unmap_address_range(locker.AddressSpace(), address, size, false);
6451 status_t
6452 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6454 // check address range
6455 addr_t address = (addr_t)_address;
6456 size = PAGE_ALIGN(size);
6458 if ((address % B_PAGE_SIZE) != 0)
6459 return B_BAD_VALUE;
6460 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6461 || !IS_USER_ADDRESS((addr_t)address + size)) {
6462 // weird error code required by POSIX
6463 return ENOMEM;
6466 // extend and check protection
6467 if ((protection & ~B_USER_PROTECTION) != 0)
6468 return B_BAD_VALUE;
6470 fix_protection(&protection);
6472 // We need to write lock the address space, since we're going to play with
6473 // the areas. Also make sure that none of the areas is wired and that we're
6474 // actually allowed to change the protection.
6475 AddressSpaceWriteLocker locker;
6477 bool restart;
6478 do {
6479 restart = false;
6481 status_t status = locker.SetTo(team_get_current_team_id());
6482 if (status != B_OK)
6483 return status;
6485 // First round: Check whether the whole range is covered by areas and we
6486 // are allowed to modify them.
6487 addr_t currentAddress = address;
6488 size_t sizeLeft = size;
6489 while (sizeLeft > 0) {
6490 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6491 if (area == NULL)
6492 return B_NO_MEMORY;
6494 if ((area->protection & B_KERNEL_AREA) != 0)
6495 return B_NOT_ALLOWED;
6497 // TODO: For (shared) mapped files we should check whether the new
6498 // protections are compatible with the file permissions. We don't
6499 // have a way to do that yet, though.
6501 addr_t offset = currentAddress - area->Base();
6502 size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6504 AreaCacheLocker cacheLocker(area);
6506 if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6507 &locker, &cacheLocker)) {
6508 restart = true;
6509 break;
6512 cacheLocker.Unlock();
6514 currentAddress += rangeSize;
6515 sizeLeft -= rangeSize;
6517 } while (restart);
6519 // Second round: If the protections differ from that of the area, create a
6520 // page protection array and re-map mapped pages.
6521 VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6522 addr_t currentAddress = address;
6523 size_t sizeLeft = size;
6524 while (sizeLeft > 0) {
6525 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6526 if (area == NULL)
6527 return B_NO_MEMORY;
6529 addr_t offset = currentAddress - area->Base();
6530 size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6532 currentAddress += rangeSize;
6533 sizeLeft -= rangeSize;
6535 if (area->page_protections == NULL) {
6536 if (area->protection == protection)
6537 continue;
6539 status_t status = allocate_area_page_protections(area);
6540 if (status != B_OK)
6541 return status;
6544 // We need to lock the complete cache chain, since we potentially unmap
6545 // pages of lower caches.
6546 VMCache* topCache = vm_area_get_locked_cache(area);
6547 VMCacheChainLocker cacheChainLocker(topCache);
6548 cacheChainLocker.LockAllSourceCaches();
6550 for (addr_t pageAddress = area->Base() + offset;
6551 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6552 map->Lock();
6554 set_area_page_protection(area, pageAddress, protection);
6556 phys_addr_t physicalAddress;
6557 uint32 flags;
6559 status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6560 if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6561 map->Unlock();
6562 continue;
6565 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6566 if (page == NULL) {
6567 panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6568 "\n", area, physicalAddress);
6569 map->Unlock();
6570 return B_ERROR;
6573 // If the page is not in the topmost cache and write access is
6574 // requested, we have to unmap it. Otherwise we can re-map it with
6575 // the new protection.
6576 bool unmapPage = page->Cache() != topCache
6577 && (protection & B_WRITE_AREA) != 0;
6579 if (!unmapPage)
6580 map->ProtectPage(area, pageAddress, protection);
6582 map->Unlock();
6584 if (unmapPage) {
6585 DEBUG_PAGE_ACCESS_START(page);
6586 unmap_page(area, pageAddress);
6587 DEBUG_PAGE_ACCESS_END(page);
6592 return B_OK;
6596 status_t
6597 _user_sync_memory(void* _address, size_t size, uint32 flags)
6599 addr_t address = (addr_t)_address;
6600 size = PAGE_ALIGN(size);
6602 // check params
6603 if ((address % B_PAGE_SIZE) != 0)
6604 return B_BAD_VALUE;
6605 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6606 || !IS_USER_ADDRESS((addr_t)address + size)) {
6607 // weird error code required by POSIX
6608 return ENOMEM;
6611 bool writeSync = (flags & MS_SYNC) != 0;
6612 bool writeAsync = (flags & MS_ASYNC) != 0;
6613 if (writeSync && writeAsync)
6614 return B_BAD_VALUE;
6616 if (size == 0 || (!writeSync && !writeAsync))
6617 return B_OK;
6619 // iterate through the range and sync all concerned areas
6620 while (size > 0) {
6621 // read lock the address space
6622 AddressSpaceReadLocker locker;
6623 status_t error = locker.SetTo(team_get_current_team_id());
6624 if (error != B_OK)
6625 return error;
6627 // get the first area
6628 VMArea* area = locker.AddressSpace()->LookupArea(address);
6629 if (area == NULL)
6630 return B_NO_MEMORY;
6632 uint32 offset = address - area->Base();
6633 size_t rangeSize = min_c(area->Size() - offset, size);
6634 offset += area->cache_offset;
6636 // lock the cache
6637 AreaCacheLocker cacheLocker(area);
6638 if (!cacheLocker)
6639 return B_BAD_VALUE;
6640 VMCache* cache = area->cache;
6642 locker.Unlock();
6644 uint32 firstPage = offset >> PAGE_SHIFT;
6645 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6647 // write the pages
6648 if (cache->type == CACHE_TYPE_VNODE) {
6649 if (writeSync) {
6650 // synchronous
6651 error = vm_page_write_modified_page_range(cache, firstPage,
6652 endPage);
6653 if (error != B_OK)
6654 return error;
6655 } else {
6656 // asynchronous
6657 vm_page_schedule_write_page_range(cache, firstPage, endPage);
6658 // TODO: This is probably not quite what is supposed to happen.
6659 // Especially when a lot has to be written, it might take ages
6660 // until it really hits the disk.
6664 address += rangeSize;
6665 size -= rangeSize;
6668 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6669 // synchronize multiple mappings of the same file. In our VM they never get
6670 // out of sync, though, so we don't have to do anything.
6672 return B_OK;
6676 status_t
6677 _user_memory_advice(void* address, size_t size, uint32 advice)
6679 // TODO: Implement!
6680 return B_OK;
6684 status_t
6685 _user_get_memory_properties(team_id teamID, const void* address,
6686 uint32* _protected, uint32* _lock)
6688 if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6689 return B_BAD_ADDRESS;
6691 AddressSpaceReadLocker locker;
6692 status_t error = locker.SetTo(teamID);
6693 if (error != B_OK)
6694 return error;
6696 VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6697 if (area == NULL)
6698 return B_NO_MEMORY;
6701 uint32 protection = area->protection;
6702 if (area->page_protections != NULL)
6703 protection = get_area_page_protection(area, (addr_t)address);
6705 uint32 wiring = area->wiring;
6707 locker.Unlock();
6709 error = user_memcpy(_protected, &protection, sizeof(protection));
6710 if (error != B_OK)
6711 return error;
6713 error = user_memcpy(_lock, &wiring, sizeof(wiring));
6715 return error;
6719 // #pragma mark -- compatibility
6722 #if defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6725 struct physical_entry_beos {
6726 uint32 address;
6727 uint32 size;
6731 /*! The physical_entry structure has changed. We need to translate it to the
6732 old one.
6734 extern "C" int32
6735 __get_memory_map_beos(const void* _address, size_t numBytes,
6736 physical_entry_beos* table, int32 numEntries)
6738 if (numEntries <= 0)
6739 return B_BAD_VALUE;
6741 const uint8* address = (const uint8*)_address;
6743 int32 count = 0;
6744 while (numBytes > 0 && count < numEntries) {
6745 physical_entry entry;
6746 status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
6747 if (result < 0) {
6748 if (result != B_BUFFER_OVERFLOW)
6749 return result;
6752 if (entry.address >= (phys_addr_t)1 << 32) {
6753 panic("get_memory_map(): Address is greater 4 GB!");
6754 return B_ERROR;
6757 table[count].address = entry.address;
6758 table[count++].size = entry.size;
6760 address += entry.size;
6761 numBytes -= entry.size;
6764 // null-terminate the table, if possible
6765 if (count < numEntries) {
6766 table[count].address = 0;
6767 table[count].size = 0;
6770 return B_OK;
6774 /*! The type of the \a physicalAddress parameter has changed from void* to
6775 phys_addr_t.
6777 extern "C" area_id
6778 __map_physical_memory_beos(const char* name, void* physicalAddress,
6779 size_t numBytes, uint32 addressSpec, uint32 protection,
6780 void** _virtualAddress)
6782 return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
6783 addressSpec, protection, _virtualAddress);
6787 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
6788 we meddle with the \a lock parameter to force 32 bit.
6790 extern "C" area_id
6791 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
6792 size_t size, uint32 lock, uint32 protection)
6794 switch (lock) {
6795 case B_NO_LOCK:
6796 break;
6797 case B_FULL_LOCK:
6798 case B_LAZY_LOCK:
6799 lock = B_32_BIT_FULL_LOCK;
6800 break;
6801 case B_CONTIGUOUS:
6802 lock = B_32_BIT_CONTIGUOUS;
6803 break;
6806 return __create_area_haiku(name, _address, addressSpec, size, lock,
6807 protection);
6811 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
6812 "BASE");
6813 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
6814 "map_physical_memory@", "BASE");
6815 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
6816 "BASE");
6818 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6819 "get_memory_map@@", "1_ALPHA3");
6820 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6821 "map_physical_memory@@", "1_ALPHA3");
6822 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6823 "1_ALPHA3");
6826 #else
6829 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6830 "get_memory_map@@", "BASE");
6831 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6832 "map_physical_memory@@", "BASE");
6833 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6834 "BASE");
6837 #endif // defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32