headers/bsd: Add sys/queue.h.
[haiku.git] / src / system / kernel / arch / x86 / arch_vm.cpp
blob95b608d836263ad6868093b4788a3a024da3d871
1 /*
2 * Copyright 2009-2010, Ingo Weinhold, ingo_weinhold@gmx.de.
3 * Copyright 2008, Jérôme Duval.
4 * Copyright 2002-2007, Axel Dörfler, axeld@pinc-software.de.
5 * Distributed under the terms of the MIT License.
7 * Copyright 2001, Travis Geiselbrecht. All rights reserved.
8 * Distributed under the terms of the NewOS License.
9 */
12 #include <stdlib.h>
13 #include <string.h>
15 #include <algorithm>
16 #include <new>
18 #include <KernelExport.h>
20 #include <boot/kernel_args.h>
21 #include <smp.h>
22 #include <util/AutoLock.h>
23 #include <vm/vm.h>
24 #include <vm/vm_page.h>
25 #include <vm/vm_priv.h>
26 #include <vm/VMAddressSpace.h>
27 #include <vm/VMArea.h>
29 #include <arch/vm.h>
30 #include <arch/int.h>
31 #include <arch/cpu.h>
33 #include <arch/x86/bios.h>
36 //#define TRACE_ARCH_VM
37 #ifdef TRACE_ARCH_VM
38 # define TRACE(x) dprintf x
39 #else
40 # define TRACE(x) ;
41 #endif
43 // 0: disabled, 1: some, 2: more
44 #define TRACE_MTRR_ARCH_VM 1
46 #if TRACE_MTRR_ARCH_VM >= 1
47 # define TRACE_MTRR(x...) dprintf(x)
48 #else
49 # define TRACE_MTRR(x...)
50 #endif
52 #if TRACE_MTRR_ARCH_VM >= 2
53 # define TRACE_MTRR2(x...) dprintf(x)
54 #else
55 # define TRACE_MTRR2(x...)
56 #endif
59 void *gDmaAddress;
62 namespace {
64 struct memory_type_range : DoublyLinkedListLinkImpl<memory_type_range> {
65 uint64 base;
66 uint64 size;
67 uint32 type;
68 area_id area;
72 struct memory_type_range_point
73 : DoublyLinkedListLinkImpl<memory_type_range_point> {
74 uint64 address;
75 memory_type_range* range;
77 bool IsStart() const { return range->base == address; }
79 bool operator<(const memory_type_range_point& other) const
81 return address < other.address;
86 struct update_mtrr_info {
87 uint64 ignoreUncacheableSize;
88 uint64 shortestUncacheableSize;
92 typedef DoublyLinkedList<memory_type_range> MemoryTypeRangeList;
94 } // namespace
97 static mutex sMemoryTypeLock = MUTEX_INITIALIZER("memory type ranges");
98 static MemoryTypeRangeList sMemoryTypeRanges;
99 static int32 sMemoryTypeRangeCount = 0;
101 static const uint32 kMaxMemoryTypeRegisters = 32;
102 static x86_mtrr_info sMemoryTypeRegisters[kMaxMemoryTypeRegisters];
103 static uint32 sMemoryTypeRegisterCount;
104 static uint32 sMemoryTypeRegistersUsed;
106 static memory_type_range* sTemporaryRanges = NULL;
107 static memory_type_range_point* sTemporaryRangePoints = NULL;
108 static int32 sTemporaryRangeCount = 0;
109 static int32 sTemporaryRangePointCount = 0;
112 static void
113 set_mtrrs()
115 x86_set_mtrrs(IA32_MTR_WRITE_BACK, sMemoryTypeRegisters,
116 sMemoryTypeRegistersUsed);
118 #if TRACE_MTRR_ARCH_VM
119 TRACE_MTRR("set MTRRs to:\n");
120 for (uint32 i = 0; i < sMemoryTypeRegistersUsed; i++) {
121 const x86_mtrr_info& info = sMemoryTypeRegisters[i];
122 TRACE_MTRR(" mtrr: %2" B_PRIu32 ": base: %#10" B_PRIx64 ", size: %#10"
123 B_PRIx64 ", type: %u\n", i, info.base, info.size,
124 info.type);
126 #endif
130 static bool
131 add_used_mtrr(uint64 base, uint64 size, uint32 type)
133 if (sMemoryTypeRegistersUsed == sMemoryTypeRegisterCount)
134 return false;
136 x86_mtrr_info& mtrr = sMemoryTypeRegisters[sMemoryTypeRegistersUsed++];
137 mtrr.base = base;
138 mtrr.size = size;
139 mtrr.type = type;
141 return true;
145 static bool
146 add_mtrrs_for_range(uint64 base, uint64 size, uint32 type)
148 for (uint64 interval = B_PAGE_SIZE; size > 0; interval <<= 1) {
149 if ((base & interval) != 0) {
150 if (!add_used_mtrr(base, interval, type))
151 return false;
152 base += interval;
153 size -= interval;
156 if ((size & interval) != 0) {
157 if (!add_used_mtrr(base + size - interval, interval, type))
158 return false;
159 size -= interval;
163 return true;
167 static memory_type_range*
168 find_range(area_id areaID)
170 for (MemoryTypeRangeList::Iterator it = sMemoryTypeRanges.GetIterator();
171 memory_type_range* range = it.Next();) {
172 if (range->area == areaID)
173 return range;
176 return NULL;
180 static void
181 optimize_memory_ranges(MemoryTypeRangeList& ranges, uint32 type,
182 bool removeRanges)
184 uint64 previousEnd = 0;
185 uint64 nextStart = 0;
186 MemoryTypeRangeList::Iterator it = ranges.GetIterator();
187 memory_type_range* range = it.Next();
188 while (range != NULL) {
189 if (range->type != type) {
190 previousEnd = range->base + range->size;
191 nextStart = 0;
192 range = it.Next();
193 continue;
196 // find the start of the next range we cannot join this one with
197 if (nextStart == 0) {
198 MemoryTypeRangeList::Iterator nextIt = it;
199 while (memory_type_range* nextRange = nextIt.Next()) {
200 if (nextRange->type != range->type) {
201 nextStart = nextRange->base;
202 break;
206 if (nextStart == 0) {
207 // no upper limit -- set an artificial one, so we don't need to
208 // special case below
209 nextStart = (uint64)1 << 32;
213 // Align the range's base and end to the greatest power of two possible.
214 // As long as we can align both without intersecting any differently
215 // range, we can extend the range without making it more complicated.
216 // Once one side hit a limit we need to be careful. We can still
217 // continue aligning the other side, if the range crosses the power of
218 // two boundary.
219 uint64 rangeBase = range->base;
220 uint64 rangeEnd = rangeBase + range->size;
221 uint64 interval = B_PAGE_SIZE * 2;
222 while (true) {
223 uint64 alignedBase = rangeBase & ~(interval - 1);
224 uint64 alignedEnd = (rangeEnd + interval - 1) & ~(interval - 1);
226 if (alignedBase < previousEnd)
227 alignedBase += interval;
229 if (alignedEnd > nextStart)
230 alignedEnd -= interval;
232 if (alignedBase >= alignedEnd)
233 break;
235 rangeBase = std::min(rangeBase, alignedBase);
236 rangeEnd = std::max(rangeEnd, alignedEnd);
238 interval <<= 1;
241 range->base = rangeBase;
242 range->size = rangeEnd - rangeBase;
244 if (removeRanges)
245 it.Remove();
247 previousEnd = rangeEnd;
249 // Skip the subsequent ranges we have swallowed and possible cut one
250 // we now partially intersect with.
251 while ((range = it.Next()) != NULL) {
252 if (range->base >= rangeEnd)
253 break;
255 if (range->base + range->size > rangeEnd) {
256 // we partially intersect -- cut the range
257 range->size = range->base + range->size - rangeEnd;
258 range->base = rangeEnd;
259 break;
262 // we have swallowed this range completely
263 range->size = 0;
264 it.Remove();
270 static bool
271 ensure_temporary_ranges_space(int32 count)
273 if (sTemporaryRangeCount >= count && sTemporaryRangePointCount >= count)
274 return true;
276 // round count to power of 2
277 int32 unalignedCount = count;
278 count = 8;
279 while (count < unalignedCount)
280 count <<= 1;
282 // resize ranges array
283 if (sTemporaryRangeCount < count) {
284 memory_type_range* ranges = new(std::nothrow) memory_type_range[count];
285 if (ranges == NULL)
286 return false;
288 delete[] sTemporaryRanges;
290 sTemporaryRanges = ranges;
291 sTemporaryRangeCount = count;
294 // resize points array
295 if (sTemporaryRangePointCount < count) {
296 memory_type_range_point* points
297 = new(std::nothrow) memory_type_range_point[count];
298 if (points == NULL)
299 return false;
301 delete[] sTemporaryRangePoints;
303 sTemporaryRangePoints = points;
304 sTemporaryRangePointCount = count;
307 return true;
311 status_t
312 update_mtrrs(update_mtrr_info& updateInfo)
314 // resize the temporary points/ranges arrays, if necessary
315 if (!ensure_temporary_ranges_space(sMemoryTypeRangeCount * 2))
316 return B_NO_MEMORY;
318 // get the range points and sort them
319 memory_type_range_point* rangePoints = sTemporaryRangePoints;
320 int32 pointCount = 0;
321 for (MemoryTypeRangeList::Iterator it = sMemoryTypeRanges.GetIterator();
322 memory_type_range* range = it.Next();) {
323 if (range->type == IA32_MTR_UNCACHED) {
324 // Ignore uncacheable ranges below a certain size, if requested.
325 // Since we always enforce uncacheability via the PTE attributes,
326 // this is no problem (though not recommended for performance
327 // reasons).
328 if (range->size <= updateInfo.ignoreUncacheableSize)
329 continue;
330 if (range->size < updateInfo.shortestUncacheableSize)
331 updateInfo.shortestUncacheableSize = range->size;
334 rangePoints[pointCount].address = range->base;
335 rangePoints[pointCount++].range = range;
336 rangePoints[pointCount].address = range->base + range->size;
337 rangePoints[pointCount++].range = range;
340 std::sort(rangePoints, rangePoints + pointCount);
342 #if TRACE_MTRR_ARCH_VM >= 2
343 TRACE_MTRR2("memory type range points:\n");
344 for (int32 i = 0; i < pointCount; i++) {
345 TRACE_MTRR2("%12" B_PRIx64 " (%p)\n", rangePoints[i].address,
346 rangePoints[i].range);
348 #endif
350 // Compute the effective ranges. When ranges overlap, we go with the
351 // stricter requirement. The types are not necessarily totally ordered, so
352 // the order we use below is not always correct. To keep it simple we
353 // consider it the reponsibility of the callers not to define overlapping
354 // memory ranges with uncomparable types.
356 memory_type_range* ranges = sTemporaryRanges;
357 typedef DoublyLinkedList<memory_type_range_point> PointList;
358 PointList pendingPoints;
359 memory_type_range* activeRange = NULL;
360 int32 rangeCount = 0;
362 for (int32 i = 0; i < pointCount; i++) {
363 memory_type_range_point* point = &rangePoints[i];
364 bool terminateRange = false;
365 if (point->IsStart()) {
366 // a range start point
367 pendingPoints.Add(point);
368 if (activeRange != NULL && activeRange->type > point->range->type)
369 terminateRange = true;
370 } else {
371 // a range end point -- remove the pending start point
372 for (PointList::Iterator it = pendingPoints.GetIterator();
373 memory_type_range_point* pendingPoint = it.Next();) {
374 if (pendingPoint->range == point->range) {
375 it.Remove();
376 break;
380 if (point->range == activeRange)
381 terminateRange = true;
384 if (terminateRange) {
385 ranges[rangeCount].size = point->address - ranges[rangeCount].base;
386 rangeCount++;
387 activeRange = NULL;
390 if (activeRange != NULL || pendingPoints.IsEmpty())
391 continue;
393 // we need to start a new range -- find the strictest pending range
394 for (PointList::Iterator it = pendingPoints.GetIterator();
395 memory_type_range_point* pendingPoint = it.Next();) {
396 memory_type_range* pendingRange = pendingPoint->range;
397 if (activeRange == NULL || activeRange->type > pendingRange->type)
398 activeRange = pendingRange;
401 memory_type_range* previousRange = rangeCount > 0
402 ? &ranges[rangeCount - 1] : NULL;
403 if (previousRange == NULL || previousRange->type != activeRange->type
404 || previousRange->base + previousRange->size
405 < activeRange->base) {
406 // we can't join with the previous range -- add a new one
407 ranges[rangeCount].base = point->address;
408 ranges[rangeCount].type = activeRange->type;
409 } else
410 rangeCount--;
413 #if TRACE_MTRR_ARCH_VM >= 2
414 TRACE_MTRR2("effective memory type ranges:\n");
415 for (int32 i = 0; i < rangeCount; i++) {
416 TRACE_MTRR2("%12" B_PRIx64 " - %12" B_PRIx64 ": %" B_PRIu32 "\n",
417 ranges[i].base, ranges[i].base + ranges[i].size, ranges[i].type);
419 #endif
421 // Extend ranges to be more MTRR-friendly. A range is MTRR friendly, when it
422 // has a power of two size and a base address aligned to the size. For
423 // ranges without this property we need more than one MTRR. We improve
424 // MTRR-friendliness by aligning a range's base and end address to the
425 // greatest power of two (base rounded down, end up) such that the extended
426 // range does not intersect with any other differently typed range. We join
427 // equally typed ranges, if possible. There are two exceptions to the
428 // intersection requirement: Uncached ranges may intersect with any other
429 // range; the resulting type will still be uncached. Hence we can ignore
430 // uncached ranges when extending the other ranges. Write-through ranges may
431 // intersect with write-back ranges; the resulting type will be
432 // write-through. Hence we can ignore write-through ranges when extending
433 // write-back ranges.
435 MemoryTypeRangeList rangeList;
436 for (int32 i = 0; i < rangeCount; i++)
437 rangeList.Add(&ranges[i]);
439 static const uint32 kMemoryTypes[] = {
440 IA32_MTR_UNCACHED,
441 IA32_MTR_WRITE_COMBINING,
442 IA32_MTR_WRITE_PROTECTED,
443 IA32_MTR_WRITE_THROUGH,
444 IA32_MTR_WRITE_BACK
446 static const int32 kMemoryTypeCount = sizeof(kMemoryTypes)
447 / sizeof(*kMemoryTypes);
449 for (int32 i = 0; i < kMemoryTypeCount; i++) {
450 uint32 type = kMemoryTypes[i];
452 // Remove uncached and write-through ranges after processing them. This
453 // let's us leverage their intersection property with any other
454 // respectively write-back ranges.
455 bool removeRanges = type == IA32_MTR_UNCACHED
456 || type == IA32_MTR_WRITE_THROUGH;
458 optimize_memory_ranges(rangeList, type, removeRanges);
461 #if TRACE_MTRR_ARCH_VM >= 2
462 TRACE_MTRR2("optimized memory type ranges:\n");
463 for (int32 i = 0; i < rangeCount; i++) {
464 if (ranges[i].size > 0) {
465 TRACE_MTRR2("%12" B_PRIx64 " - %12" B_PRIx64 ": %" B_PRIu32 "\n",
466 ranges[i].base, ranges[i].base + ranges[i].size,
467 ranges[i].type);
470 #endif
472 // compute the mtrrs from the ranges
473 sMemoryTypeRegistersUsed = 0;
474 for (int32 i = 0; i < kMemoryTypeCount; i++) {
475 uint32 type = kMemoryTypes[i];
477 // skip write-back ranges -- that'll be the default type anyway
478 if (type == IA32_MTR_WRITE_BACK)
479 continue;
481 for (int32 i = 0; i < rangeCount; i++) {
482 if (ranges[i].size == 0 || ranges[i].type != type)
483 continue;
485 if (!add_mtrrs_for_range(ranges[i].base, ranges[i].size, type))
486 return B_BUSY;
490 set_mtrrs();
492 return B_OK;
496 status_t
497 update_mtrrs()
499 // Until we know how many MTRRs we have, pretend everything is OK.
500 if (sMemoryTypeRegisterCount == 0)
501 return B_OK;
503 update_mtrr_info updateInfo;
504 updateInfo.ignoreUncacheableSize = 0;
506 while (true) {
507 TRACE_MTRR2("update_mtrrs(): Trying with ignoreUncacheableSize %#"
508 B_PRIx64 ".\n", updateInfo.ignoreUncacheableSize);
510 updateInfo.shortestUncacheableSize = ~(uint64)0;
511 status_t error = update_mtrrs(updateInfo);
512 if (error != B_BUSY) {
513 if (error == B_OK && updateInfo.ignoreUncacheableSize > 0) {
514 TRACE_MTRR("update_mtrrs(): Succeeded setting MTRRs after "
515 "ignoring uncacheable ranges up to size %#" B_PRIx64 ".\n",
516 updateInfo.ignoreUncacheableSize);
518 return error;
521 // Not enough MTRRs. Retry with less uncacheable ranges.
522 if (updateInfo.shortestUncacheableSize == ~(uint64)0) {
523 // Ugh, even without any uncacheable ranges the available MTRRs do
524 // not suffice.
525 panic("update_mtrrs(): Out of MTRRs!");
526 return B_BUSY;
529 ASSERT(updateInfo.ignoreUncacheableSize
530 < updateInfo.shortestUncacheableSize);
532 updateInfo.ignoreUncacheableSize = updateInfo.shortestUncacheableSize;
537 static status_t
538 add_memory_type_range(area_id areaID, uint64 base, uint64 size, uint32 type)
540 // translate the type
541 if (type == 0)
542 return B_OK;
544 switch (type) {
545 case B_MTR_UC:
546 type = IA32_MTR_UNCACHED;
547 break;
548 case B_MTR_WC:
549 type = IA32_MTR_WRITE_COMBINING;
550 break;
551 case B_MTR_WT:
552 type = IA32_MTR_WRITE_THROUGH;
553 break;
554 case B_MTR_WP:
555 type = IA32_MTR_WRITE_PROTECTED;
556 break;
557 case B_MTR_WB:
558 type = IA32_MTR_WRITE_BACK;
559 break;
560 default:
561 return B_BAD_VALUE;
564 TRACE_MTRR("add_memory_type_range(%" B_PRId32 ", %#" B_PRIx64 ", %#"
565 B_PRIx64 ", %" B_PRIu32 ")\n", areaID, base, size, type);
567 MutexLocker locker(sMemoryTypeLock);
569 memory_type_range* range = areaID >= 0 ? find_range(areaID) : NULL;
570 int32 oldRangeType = -1;
571 if (range != NULL) {
572 if (range->base != base || range->size != size)
573 return B_BAD_VALUE;
574 if (range->type == type)
575 return B_OK;
577 oldRangeType = range->type;
578 range->type = type;
579 } else {
580 range = new(std::nothrow) memory_type_range;
581 if (range == NULL)
582 return B_NO_MEMORY;
584 range->area = areaID;
585 range->base = base;
586 range->size = size;
587 range->type = type;
588 sMemoryTypeRanges.Add(range);
589 sMemoryTypeRangeCount++;
592 status_t error = update_mtrrs();
593 if (error != B_OK) {
594 // revert the addition of the range/change of its type
595 if (oldRangeType < 0) {
596 sMemoryTypeRanges.Remove(range);
597 sMemoryTypeRangeCount--;
598 delete range;
599 } else
600 range->type = oldRangeType;
602 update_mtrrs();
603 return error;
606 return B_OK;
610 static void
611 remove_memory_type_range(area_id areaID)
613 MutexLocker locker(sMemoryTypeLock);
615 memory_type_range* range = find_range(areaID);
616 if (range != NULL) {
617 TRACE_MTRR("remove_memory_type_range(%" B_PRId32 ", %#" B_PRIx64 ", %#"
618 B_PRIx64 ", %" B_PRIu32 ")\n", range->area, range->base,
619 range->size, range->type);
621 sMemoryTypeRanges.Remove(range);
622 sMemoryTypeRangeCount--;
623 delete range;
625 update_mtrrs();
626 } else {
627 dprintf("remove_memory_type_range(): no range known for area %" B_PRId32
628 "\n", areaID);
633 // #pragma mark -
636 status_t
637 arch_vm_init(kernel_args *args)
639 TRACE(("arch_vm_init: entry\n"));
640 return 0;
644 /*! Marks DMA region as in-use, and maps it into the kernel space */
645 status_t
646 arch_vm_init_post_area(kernel_args *args)
648 area_id id;
650 TRACE(("arch_vm_init_post_area: entry\n"));
652 // account for DMA area and mark the pages unusable
653 vm_mark_page_range_inuse(0x0, 0xa0000 / B_PAGE_SIZE);
655 // map 0 - 0xa0000 directly
656 id = map_physical_memory("dma_region", 0x0, 0xa0000,
657 B_ANY_KERNEL_ADDRESS | B_MTR_WB,
658 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, &gDmaAddress);
659 if (id < 0) {
660 panic("arch_vm_init_post_area: unable to map dma region\n");
661 return B_NO_MEMORY;
664 #ifndef __x86_64__
665 return bios_init();
666 #else
667 return B_OK;
668 #endif
672 /*! Gets rid of all yet unmapped (and therefore now unused) page tables */
673 status_t
674 arch_vm_init_end(kernel_args *args)
676 TRACE(("arch_vm_init_endvm: entry\n"));
678 // throw away anything in the kernel_args.pgtable[] that's not yet mapped
679 vm_free_unused_boot_loader_range(KERNEL_LOAD_BASE,
680 args->arch_args.virtual_end - KERNEL_LOAD_BASE);
682 return B_OK;
686 status_t
687 arch_vm_init_post_modules(kernel_args *args)
689 // the x86 CPU modules are now accessible
691 sMemoryTypeRegisterCount = x86_count_mtrrs();
692 if (sMemoryTypeRegisterCount == 0)
693 return B_OK;
695 // not very likely, but play safe here
696 if (sMemoryTypeRegisterCount > kMaxMemoryTypeRegisters)
697 sMemoryTypeRegisterCount = kMaxMemoryTypeRegisters;
699 // set the physical memory ranges to write-back mode
700 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
701 add_memory_type_range(-1, args->physical_memory_range[i].start,
702 args->physical_memory_range[i].size, B_MTR_WB);
705 return B_OK;
709 void
710 arch_vm_aspace_swap(struct VMAddressSpace *from, struct VMAddressSpace *to)
712 // This functions is only invoked when a userland thread is in the process
713 // of dying. It switches to the kernel team and does whatever cleanup is
714 // necessary (in case it is the team's main thread, it will delete the
715 // team).
716 // It is however not necessary to change the page directory. Userland team's
717 // page directories include all kernel mappings as well. Furthermore our
718 // arch specific translation map data objects are ref-counted, so they won't
719 // go away as long as they are still used on any CPU.
723 bool
724 arch_vm_supports_protection(uint32 protection)
726 // x86 always has the same read/write properties for userland and the
727 // kernel.
728 // That's why we do not support user-read/kernel-write access. While the
729 // other way around is not supported either, we don't care in this case
730 // and give the kernel full access.
731 if ((protection & (B_READ_AREA | B_WRITE_AREA)) == B_READ_AREA
732 && (protection & B_KERNEL_WRITE_AREA) != 0) {
733 return false;
736 // Userland and the kernel have the same setting of NX-bit.
737 // That's why we do not allow any area that user can access, but not execute
738 // and the kernel can execute.
739 if ((protection & (B_READ_AREA | B_WRITE_AREA)) != 0
740 && (protection & B_EXECUTE_AREA) == 0
741 && (protection & B_KERNEL_EXECUTE_AREA) != 0) {
742 return false;
745 return true;
749 void
750 arch_vm_unset_memory_type(struct VMArea *area)
752 if (area->MemoryType() == 0)
753 return;
755 remove_memory_type_range(area->id);
759 status_t
760 arch_vm_set_memory_type(struct VMArea *area, phys_addr_t physicalBase,
761 uint32 type)
763 return add_memory_type_range(area->id, physicalBase, area->Size(), type);