2 * Copyright 2009-2010, Ingo Weinhold, ingo_weinhold@gmx.de.
3 * Copyright 2008, Jérôme Duval.
4 * Copyright 2002-2007, Axel Dörfler, axeld@pinc-software.de.
5 * Distributed under the terms of the MIT License.
7 * Copyright 2001, Travis Geiselbrecht. All rights reserved.
8 * Distributed under the terms of the NewOS License.
18 #include <KernelExport.h>
20 #include <boot/kernel_args.h>
22 #include <util/AutoLock.h>
24 #include <vm/vm_page.h>
25 #include <vm/vm_priv.h>
26 #include <vm/VMAddressSpace.h>
27 #include <vm/VMArea.h>
33 #include <arch/x86/bios.h>
36 //#define TRACE_ARCH_VM
38 # define TRACE(x) dprintf x
43 // 0: disabled, 1: some, 2: more
44 #define TRACE_MTRR_ARCH_VM 1
46 #if TRACE_MTRR_ARCH_VM >= 1
47 # define TRACE_MTRR(x...) dprintf(x)
49 # define TRACE_MTRR(x...)
52 #if TRACE_MTRR_ARCH_VM >= 2
53 # define TRACE_MTRR2(x...) dprintf(x)
55 # define TRACE_MTRR2(x...)
64 struct memory_type_range
: DoublyLinkedListLinkImpl
<memory_type_range
> {
72 struct memory_type_range_point
73 : DoublyLinkedListLinkImpl
<memory_type_range_point
> {
75 memory_type_range
* range
;
77 bool IsStart() const { return range
->base
== address
; }
79 bool operator<(const memory_type_range_point
& other
) const
81 return address
< other
.address
;
86 struct update_mtrr_info
{
87 uint64 ignoreUncacheableSize
;
88 uint64 shortestUncacheableSize
;
92 typedef DoublyLinkedList
<memory_type_range
> MemoryTypeRangeList
;
97 static mutex sMemoryTypeLock
= MUTEX_INITIALIZER("memory type ranges");
98 static MemoryTypeRangeList sMemoryTypeRanges
;
99 static int32 sMemoryTypeRangeCount
= 0;
101 static const uint32 kMaxMemoryTypeRegisters
= 32;
102 static x86_mtrr_info sMemoryTypeRegisters
[kMaxMemoryTypeRegisters
];
103 static uint32 sMemoryTypeRegisterCount
;
104 static uint32 sMemoryTypeRegistersUsed
;
106 static memory_type_range
* sTemporaryRanges
= NULL
;
107 static memory_type_range_point
* sTemporaryRangePoints
= NULL
;
108 static int32 sTemporaryRangeCount
= 0;
109 static int32 sTemporaryRangePointCount
= 0;
115 x86_set_mtrrs(IA32_MTR_WRITE_BACK
, sMemoryTypeRegisters
,
116 sMemoryTypeRegistersUsed
);
118 #if TRACE_MTRR_ARCH_VM
119 TRACE_MTRR("set MTRRs to:\n");
120 for (uint32 i
= 0; i
< sMemoryTypeRegistersUsed
; i
++) {
121 const x86_mtrr_info
& info
= sMemoryTypeRegisters
[i
];
122 TRACE_MTRR(" mtrr: %2" B_PRIu32
": base: %#10" B_PRIx64
", size: %#10"
123 B_PRIx64
", type: %u\n", i
, info
.base
, info
.size
,
131 add_used_mtrr(uint64 base
, uint64 size
, uint32 type
)
133 if (sMemoryTypeRegistersUsed
== sMemoryTypeRegisterCount
)
136 x86_mtrr_info
& mtrr
= sMemoryTypeRegisters
[sMemoryTypeRegistersUsed
++];
146 add_mtrrs_for_range(uint64 base
, uint64 size
, uint32 type
)
148 for (uint64 interval
= B_PAGE_SIZE
; size
> 0; interval
<<= 1) {
149 if ((base
& interval
) != 0) {
150 if (!add_used_mtrr(base
, interval
, type
))
156 if ((size
& interval
) != 0) {
157 if (!add_used_mtrr(base
+ size
- interval
, interval
, type
))
167 static memory_type_range
*
168 find_range(area_id areaID
)
170 for (MemoryTypeRangeList::Iterator it
= sMemoryTypeRanges
.GetIterator();
171 memory_type_range
* range
= it
.Next();) {
172 if (range
->area
== areaID
)
181 optimize_memory_ranges(MemoryTypeRangeList
& ranges
, uint32 type
,
184 uint64 previousEnd
= 0;
185 uint64 nextStart
= 0;
186 MemoryTypeRangeList::Iterator it
= ranges
.GetIterator();
187 memory_type_range
* range
= it
.Next();
188 while (range
!= NULL
) {
189 if (range
->type
!= type
) {
190 previousEnd
= range
->base
+ range
->size
;
196 // find the start of the next range we cannot join this one with
197 if (nextStart
== 0) {
198 MemoryTypeRangeList::Iterator nextIt
= it
;
199 while (memory_type_range
* nextRange
= nextIt
.Next()) {
200 if (nextRange
->type
!= range
->type
) {
201 nextStart
= nextRange
->base
;
206 if (nextStart
== 0) {
207 // no upper limit -- set an artificial one, so we don't need to
208 // special case below
209 nextStart
= (uint64
)1 << 32;
213 // Align the range's base and end to the greatest power of two possible.
214 // As long as we can align both without intersecting any differently
215 // range, we can extend the range without making it more complicated.
216 // Once one side hit a limit we need to be careful. We can still
217 // continue aligning the other side, if the range crosses the power of
219 uint64 rangeBase
= range
->base
;
220 uint64 rangeEnd
= rangeBase
+ range
->size
;
221 uint64 interval
= B_PAGE_SIZE
* 2;
223 uint64 alignedBase
= rangeBase
& ~(interval
- 1);
224 uint64 alignedEnd
= (rangeEnd
+ interval
- 1) & ~(interval
- 1);
226 if (alignedBase
< previousEnd
)
227 alignedBase
+= interval
;
229 if (alignedEnd
> nextStart
)
230 alignedEnd
-= interval
;
232 if (alignedBase
>= alignedEnd
)
235 rangeBase
= std::min(rangeBase
, alignedBase
);
236 rangeEnd
= std::max(rangeEnd
, alignedEnd
);
241 range
->base
= rangeBase
;
242 range
->size
= rangeEnd
- rangeBase
;
247 previousEnd
= rangeEnd
;
249 // Skip the subsequent ranges we have swallowed and possible cut one
250 // we now partially intersect with.
251 while ((range
= it
.Next()) != NULL
) {
252 if (range
->base
>= rangeEnd
)
255 if (range
->base
+ range
->size
> rangeEnd
) {
256 // we partially intersect -- cut the range
257 range
->size
= range
->base
+ range
->size
- rangeEnd
;
258 range
->base
= rangeEnd
;
262 // we have swallowed this range completely
271 ensure_temporary_ranges_space(int32 count
)
273 if (sTemporaryRangeCount
>= count
&& sTemporaryRangePointCount
>= count
)
276 // round count to power of 2
277 int32 unalignedCount
= count
;
279 while (count
< unalignedCount
)
282 // resize ranges array
283 if (sTemporaryRangeCount
< count
) {
284 memory_type_range
* ranges
= new(std::nothrow
) memory_type_range
[count
];
288 delete[] sTemporaryRanges
;
290 sTemporaryRanges
= ranges
;
291 sTemporaryRangeCount
= count
;
294 // resize points array
295 if (sTemporaryRangePointCount
< count
) {
296 memory_type_range_point
* points
297 = new(std::nothrow
) memory_type_range_point
[count
];
301 delete[] sTemporaryRangePoints
;
303 sTemporaryRangePoints
= points
;
304 sTemporaryRangePointCount
= count
;
312 update_mtrrs(update_mtrr_info
& updateInfo
)
314 // resize the temporary points/ranges arrays, if necessary
315 if (!ensure_temporary_ranges_space(sMemoryTypeRangeCount
* 2))
318 // get the range points and sort them
319 memory_type_range_point
* rangePoints
= sTemporaryRangePoints
;
320 int32 pointCount
= 0;
321 for (MemoryTypeRangeList::Iterator it
= sMemoryTypeRanges
.GetIterator();
322 memory_type_range
* range
= it
.Next();) {
323 if (range
->type
== IA32_MTR_UNCACHED
) {
324 // Ignore uncacheable ranges below a certain size, if requested.
325 // Since we always enforce uncacheability via the PTE attributes,
326 // this is no problem (though not recommended for performance
328 if (range
->size
<= updateInfo
.ignoreUncacheableSize
)
330 if (range
->size
< updateInfo
.shortestUncacheableSize
)
331 updateInfo
.shortestUncacheableSize
= range
->size
;
334 rangePoints
[pointCount
].address
= range
->base
;
335 rangePoints
[pointCount
++].range
= range
;
336 rangePoints
[pointCount
].address
= range
->base
+ range
->size
;
337 rangePoints
[pointCount
++].range
= range
;
340 std::sort(rangePoints
, rangePoints
+ pointCount
);
342 #if TRACE_MTRR_ARCH_VM >= 2
343 TRACE_MTRR2("memory type range points:\n");
344 for (int32 i
= 0; i
< pointCount
; i
++) {
345 TRACE_MTRR2("%12" B_PRIx64
" (%p)\n", rangePoints
[i
].address
,
346 rangePoints
[i
].range
);
350 // Compute the effective ranges. When ranges overlap, we go with the
351 // stricter requirement. The types are not necessarily totally ordered, so
352 // the order we use below is not always correct. To keep it simple we
353 // consider it the reponsibility of the callers not to define overlapping
354 // memory ranges with uncomparable types.
356 memory_type_range
* ranges
= sTemporaryRanges
;
357 typedef DoublyLinkedList
<memory_type_range_point
> PointList
;
358 PointList pendingPoints
;
359 memory_type_range
* activeRange
= NULL
;
360 int32 rangeCount
= 0;
362 for (int32 i
= 0; i
< pointCount
; i
++) {
363 memory_type_range_point
* point
= &rangePoints
[i
];
364 bool terminateRange
= false;
365 if (point
->IsStart()) {
366 // a range start point
367 pendingPoints
.Add(point
);
368 if (activeRange
!= NULL
&& activeRange
->type
> point
->range
->type
)
369 terminateRange
= true;
371 // a range end point -- remove the pending start point
372 for (PointList::Iterator it
= pendingPoints
.GetIterator();
373 memory_type_range_point
* pendingPoint
= it
.Next();) {
374 if (pendingPoint
->range
== point
->range
) {
380 if (point
->range
== activeRange
)
381 terminateRange
= true;
384 if (terminateRange
) {
385 ranges
[rangeCount
].size
= point
->address
- ranges
[rangeCount
].base
;
390 if (activeRange
!= NULL
|| pendingPoints
.IsEmpty())
393 // we need to start a new range -- find the strictest pending range
394 for (PointList::Iterator it
= pendingPoints
.GetIterator();
395 memory_type_range_point
* pendingPoint
= it
.Next();) {
396 memory_type_range
* pendingRange
= pendingPoint
->range
;
397 if (activeRange
== NULL
|| activeRange
->type
> pendingRange
->type
)
398 activeRange
= pendingRange
;
401 memory_type_range
* previousRange
= rangeCount
> 0
402 ? &ranges
[rangeCount
- 1] : NULL
;
403 if (previousRange
== NULL
|| previousRange
->type
!= activeRange
->type
404 || previousRange
->base
+ previousRange
->size
405 < activeRange
->base
) {
406 // we can't join with the previous range -- add a new one
407 ranges
[rangeCount
].base
= point
->address
;
408 ranges
[rangeCount
].type
= activeRange
->type
;
413 #if TRACE_MTRR_ARCH_VM >= 2
414 TRACE_MTRR2("effective memory type ranges:\n");
415 for (int32 i
= 0; i
< rangeCount
; i
++) {
416 TRACE_MTRR2("%12" B_PRIx64
" - %12" B_PRIx64
": %" B_PRIu32
"\n",
417 ranges
[i
].base
, ranges
[i
].base
+ ranges
[i
].size
, ranges
[i
].type
);
421 // Extend ranges to be more MTRR-friendly. A range is MTRR friendly, when it
422 // has a power of two size and a base address aligned to the size. For
423 // ranges without this property we need more than one MTRR. We improve
424 // MTRR-friendliness by aligning a range's base and end address to the
425 // greatest power of two (base rounded down, end up) such that the extended
426 // range does not intersect with any other differently typed range. We join
427 // equally typed ranges, if possible. There are two exceptions to the
428 // intersection requirement: Uncached ranges may intersect with any other
429 // range; the resulting type will still be uncached. Hence we can ignore
430 // uncached ranges when extending the other ranges. Write-through ranges may
431 // intersect with write-back ranges; the resulting type will be
432 // write-through. Hence we can ignore write-through ranges when extending
433 // write-back ranges.
435 MemoryTypeRangeList rangeList
;
436 for (int32 i
= 0; i
< rangeCount
; i
++)
437 rangeList
.Add(&ranges
[i
]);
439 static const uint32 kMemoryTypes
[] = {
441 IA32_MTR_WRITE_COMBINING
,
442 IA32_MTR_WRITE_PROTECTED
,
443 IA32_MTR_WRITE_THROUGH
,
446 static const int32 kMemoryTypeCount
= sizeof(kMemoryTypes
)
447 / sizeof(*kMemoryTypes
);
449 for (int32 i
= 0; i
< kMemoryTypeCount
; i
++) {
450 uint32 type
= kMemoryTypes
[i
];
452 // Remove uncached and write-through ranges after processing them. This
453 // let's us leverage their intersection property with any other
454 // respectively write-back ranges.
455 bool removeRanges
= type
== IA32_MTR_UNCACHED
456 || type
== IA32_MTR_WRITE_THROUGH
;
458 optimize_memory_ranges(rangeList
, type
, removeRanges
);
461 #if TRACE_MTRR_ARCH_VM >= 2
462 TRACE_MTRR2("optimized memory type ranges:\n");
463 for (int32 i
= 0; i
< rangeCount
; i
++) {
464 if (ranges
[i
].size
> 0) {
465 TRACE_MTRR2("%12" B_PRIx64
" - %12" B_PRIx64
": %" B_PRIu32
"\n",
466 ranges
[i
].base
, ranges
[i
].base
+ ranges
[i
].size
,
472 // compute the mtrrs from the ranges
473 sMemoryTypeRegistersUsed
= 0;
474 for (int32 i
= 0; i
< kMemoryTypeCount
; i
++) {
475 uint32 type
= kMemoryTypes
[i
];
477 // skip write-back ranges -- that'll be the default type anyway
478 if (type
== IA32_MTR_WRITE_BACK
)
481 for (int32 i
= 0; i
< rangeCount
; i
++) {
482 if (ranges
[i
].size
== 0 || ranges
[i
].type
!= type
)
485 if (!add_mtrrs_for_range(ranges
[i
].base
, ranges
[i
].size
, type
))
499 // Until we know how many MTRRs we have, pretend everything is OK.
500 if (sMemoryTypeRegisterCount
== 0)
503 update_mtrr_info updateInfo
;
504 updateInfo
.ignoreUncacheableSize
= 0;
507 TRACE_MTRR2("update_mtrrs(): Trying with ignoreUncacheableSize %#"
508 B_PRIx64
".\n", updateInfo
.ignoreUncacheableSize
);
510 updateInfo
.shortestUncacheableSize
= ~(uint64
)0;
511 status_t error
= update_mtrrs(updateInfo
);
512 if (error
!= B_BUSY
) {
513 if (error
== B_OK
&& updateInfo
.ignoreUncacheableSize
> 0) {
514 TRACE_MTRR("update_mtrrs(): Succeeded setting MTRRs after "
515 "ignoring uncacheable ranges up to size %#" B_PRIx64
".\n",
516 updateInfo
.ignoreUncacheableSize
);
521 // Not enough MTRRs. Retry with less uncacheable ranges.
522 if (updateInfo
.shortestUncacheableSize
== ~(uint64
)0) {
523 // Ugh, even without any uncacheable ranges the available MTRRs do
525 panic("update_mtrrs(): Out of MTRRs!");
529 ASSERT(updateInfo
.ignoreUncacheableSize
530 < updateInfo
.shortestUncacheableSize
);
532 updateInfo
.ignoreUncacheableSize
= updateInfo
.shortestUncacheableSize
;
538 add_memory_type_range(area_id areaID
, uint64 base
, uint64 size
, uint32 type
)
540 // translate the type
546 type
= IA32_MTR_UNCACHED
;
549 type
= IA32_MTR_WRITE_COMBINING
;
552 type
= IA32_MTR_WRITE_THROUGH
;
555 type
= IA32_MTR_WRITE_PROTECTED
;
558 type
= IA32_MTR_WRITE_BACK
;
564 TRACE_MTRR("add_memory_type_range(%" B_PRId32
", %#" B_PRIx64
", %#"
565 B_PRIx64
", %" B_PRIu32
")\n", areaID
, base
, size
, type
);
567 MutexLocker
locker(sMemoryTypeLock
);
569 memory_type_range
* range
= areaID
>= 0 ? find_range(areaID
) : NULL
;
570 int32 oldRangeType
= -1;
572 if (range
->base
!= base
|| range
->size
!= size
)
574 if (range
->type
== type
)
577 oldRangeType
= range
->type
;
580 range
= new(std::nothrow
) memory_type_range
;
584 range
->area
= areaID
;
588 sMemoryTypeRanges
.Add(range
);
589 sMemoryTypeRangeCount
++;
592 status_t error
= update_mtrrs();
594 // revert the addition of the range/change of its type
595 if (oldRangeType
< 0) {
596 sMemoryTypeRanges
.Remove(range
);
597 sMemoryTypeRangeCount
--;
600 range
->type
= oldRangeType
;
611 remove_memory_type_range(area_id areaID
)
613 MutexLocker
locker(sMemoryTypeLock
);
615 memory_type_range
* range
= find_range(areaID
);
617 TRACE_MTRR("remove_memory_type_range(%" B_PRId32
", %#" B_PRIx64
", %#"
618 B_PRIx64
", %" B_PRIu32
")\n", range
->area
, range
->base
,
619 range
->size
, range
->type
);
621 sMemoryTypeRanges
.Remove(range
);
622 sMemoryTypeRangeCount
--;
627 dprintf("remove_memory_type_range(): no range known for area %" B_PRId32
637 arch_vm_init(kernel_args
*args
)
639 TRACE(("arch_vm_init: entry\n"));
644 /*! Marks DMA region as in-use, and maps it into the kernel space */
646 arch_vm_init_post_area(kernel_args
*args
)
650 TRACE(("arch_vm_init_post_area: entry\n"));
652 // account for DMA area and mark the pages unusable
653 vm_mark_page_range_inuse(0x0, 0xa0000 / B_PAGE_SIZE
);
655 // map 0 - 0xa0000 directly
656 id
= map_physical_memory("dma_region", 0x0, 0xa0000,
657 B_ANY_KERNEL_ADDRESS
| B_MTR_WB
,
658 B_KERNEL_READ_AREA
| B_KERNEL_WRITE_AREA
, &gDmaAddress
);
660 panic("arch_vm_init_post_area: unable to map dma region\n");
672 /*! Gets rid of all yet unmapped (and therefore now unused) page tables */
674 arch_vm_init_end(kernel_args
*args
)
676 TRACE(("arch_vm_init_endvm: entry\n"));
678 // throw away anything in the kernel_args.pgtable[] that's not yet mapped
679 vm_free_unused_boot_loader_range(KERNEL_LOAD_BASE
,
680 args
->arch_args
.virtual_end
- KERNEL_LOAD_BASE
);
687 arch_vm_init_post_modules(kernel_args
*args
)
689 // the x86 CPU modules are now accessible
691 sMemoryTypeRegisterCount
= x86_count_mtrrs();
692 if (sMemoryTypeRegisterCount
== 0)
695 // not very likely, but play safe here
696 if (sMemoryTypeRegisterCount
> kMaxMemoryTypeRegisters
)
697 sMemoryTypeRegisterCount
= kMaxMemoryTypeRegisters
;
699 // set the physical memory ranges to write-back mode
700 for (uint32 i
= 0; i
< args
->num_physical_memory_ranges
; i
++) {
701 add_memory_type_range(-1, args
->physical_memory_range
[i
].start
,
702 args
->physical_memory_range
[i
].size
, B_MTR_WB
);
710 arch_vm_aspace_swap(struct VMAddressSpace
*from
, struct VMAddressSpace
*to
)
712 // This functions is only invoked when a userland thread is in the process
713 // of dying. It switches to the kernel team and does whatever cleanup is
714 // necessary (in case it is the team's main thread, it will delete the
716 // It is however not necessary to change the page directory. Userland team's
717 // page directories include all kernel mappings as well. Furthermore our
718 // arch specific translation map data objects are ref-counted, so they won't
719 // go away as long as they are still used on any CPU.
724 arch_vm_supports_protection(uint32 protection
)
726 // x86 always has the same read/write properties for userland and the
728 // That's why we do not support user-read/kernel-write access. While the
729 // other way around is not supported either, we don't care in this case
730 // and give the kernel full access.
731 if ((protection
& (B_READ_AREA
| B_WRITE_AREA
)) == B_READ_AREA
732 && (protection
& B_KERNEL_WRITE_AREA
) != 0) {
736 // Userland and the kernel have the same setting of NX-bit.
737 // That's why we do not allow any area that user can access, but not execute
738 // and the kernel can execute.
739 if ((protection
& (B_READ_AREA
| B_WRITE_AREA
)) != 0
740 && (protection
& B_EXECUTE_AREA
) == 0
741 && (protection
& B_KERNEL_EXECUTE_AREA
) != 0) {
750 arch_vm_unset_memory_type(struct VMArea
*area
)
752 if (area
->MemoryType() == 0)
755 remove_memory_type_range(area
->id
);
760 arch_vm_set_memory_type(struct VMArea
*area
, phys_addr_t physicalBase
,
763 return add_memory_type_range(area
->id
, physicalBase
, area
->Size(), type
);