VM: simplify slab allocator
[minix.git] / servers / vm / arch / i386 / pagetable.c
blob6ec3a363b9c393ddbb470570f1d90a044509db5c
2 #define _SYSTEM 1
3 #define _POSIX_SOURCE 1
5 #include <minix/callnr.h>
6 #include <minix/com.h>
7 #include <minix/config.h>
8 #include <minix/const.h>
9 #include <minix/ds.h>
10 #include <minix/endpoint.h>
11 #include <minix/keymap.h>
12 #include <minix/minlib.h>
13 #include <minix/type.h>
14 #include <minix/ipc.h>
15 #include <minix/sysutil.h>
16 #include <minix/syslib.h>
17 #include <minix/safecopies.h>
18 #include <minix/cpufeature.h>
19 #include <minix/bitmap.h>
20 #include <minix/debug.h>
22 #include <errno.h>
23 #include <stdlib.h>
24 #include <assert.h>
25 #include <string.h>
26 #include <env.h>
27 #include <stdio.h>
28 #include <fcntl.h>
29 #include <stdlib.h>
31 #include "proto.h"
32 #include "glo.h"
33 #include "util.h"
34 #include "vm.h"
35 #include "sanitycheck.h"
37 #include "memory.h"
39 /* PDE used to map in kernel, kernel physical address. */
40 static int pagedir_pde = -1;
41 static u32_t global_bit = 0, pagedir_pde_val;
43 static multiboot_module_t *kern_mb_mod = NULL;
44 static size_t kern_size = 0;
45 static int kern_start_pde = -1;
47 /* 4MB page size available in hardware? */
48 static int bigpage_ok = 0;
50 /* Our process table entry. */
51 struct vmproc *vmprocess = &vmproc[VM_PROC_NR];
53 /* Spare memory, ready to go after initialization, to avoid a
54 * circular dependency on allocating memory and writing it into VM's
55 * page table.
57 #if SANITYCHECKS
58 #define SPAREPAGES 100
59 #define STATIC_SPAREPAGES 90
60 #else
61 #define SPAREPAGES 15
62 #define STATIC_SPAREPAGES 10
63 #endif
64 int missing_spares = SPAREPAGES;
65 static struct {
66 void *page;
67 phys_bytes phys;
68 } sparepages[SPAREPAGES];
70 #define MAX_KERNMAPPINGS 10
71 static struct {
72 phys_bytes phys_addr; /* Physical addr. */
73 phys_bytes len; /* Length in bytes. */
74 vir_bytes vir_addr; /* Offset in page table. */
75 int flags;
76 } kern_mappings[MAX_KERNMAPPINGS];
77 int kernmappings = 0;
79 /* Clicks must be pages, as
80 * - they must be page aligned to map them
81 * - they must be a multiple of the page size
82 * - it's inconvenient to have them bigger than pages, because we often want
83 * just one page
84 * May as well require them to be equal then.
86 #if CLICK_SIZE != I386_PAGE_SIZE
87 #error CLICK_SIZE must be page size.
88 #endif
90 /* Page table that contains pointers to all page directories. */
91 phys_bytes page_directories_phys;
92 u32_t *page_directories = NULL;
94 static char static_sparepages[I386_PAGE_SIZE*STATIC_SPAREPAGES]
95 __aligned(I386_PAGE_SIZE);
97 #if SANITYCHECKS
98 /*===========================================================================*
99 * pt_sanitycheck *
100 *===========================================================================*/
101 void pt_sanitycheck(pt_t *pt, char *file, int line)
103 /* Basic pt sanity check. */
104 int slot;
106 MYASSERT(pt);
107 MYASSERT(pt->pt_dir);
108 MYASSERT(pt->pt_dir_phys);
110 for(slot = 0; slot < ELEMENTS(vmproc); slot++) {
111 if(pt == &vmproc[slot].vm_pt)
112 break;
115 if(slot >= ELEMENTS(vmproc)) {
116 panic("pt_sanitycheck: passed pt not in any proc");
119 MYASSERT(usedpages_add(pt->pt_dir_phys, I386_PAGE_SIZE) == OK);
121 #endif
123 /*===========================================================================*
124 * findhole *
125 *===========================================================================*/
126 static u32_t findhole(void)
128 /* Find a space in the virtual address space of VM. */
129 u32_t curv;
130 int pde = 0, try_restart;
131 static u32_t lastv = 0;
132 pt_t *pt = &vmprocess->vm_pt;
133 extern char _end;
134 vir_bytes vmin, vmax;
136 vmin = (vir_bytes) (&_end) & I386_VM_ADDR_MASK; /* marks end of VM BSS */
137 vmax = VM_STACKTOP;
139 /* Input sanity check. */
140 assert(vmin + I386_PAGE_SIZE >= vmin);
141 assert(vmax >= vmin + I386_PAGE_SIZE);
142 assert((vmin % I386_PAGE_SIZE) == 0);
143 assert((vmax % I386_PAGE_SIZE) == 0);
145 #if SANITYCHECKS
146 curv = ((u32_t) random()) % ((vmax - vmin)/I386_PAGE_SIZE);
147 curv *= I386_PAGE_SIZE;
148 curv += vmin;
149 #else
150 curv = lastv;
151 if(curv < vmin || curv >= vmax)
152 curv = vmin;
153 #endif
154 try_restart = 1;
156 /* Start looking for a free page starting at vmin. */
157 while(curv < vmax) {
158 int pte;
160 assert(curv >= vmin);
161 assert(curv < vmax);
163 pde = I386_VM_PDE(curv);
164 pte = I386_VM_PTE(curv);
166 if(!(pt->pt_dir[pde] & I386_VM_PRESENT) ||
167 !(pt->pt_pt[pde][pte] & I386_VM_PRESENT)) {
168 lastv = curv;
169 return curv;
172 curv+=I386_PAGE_SIZE;
174 if(curv >= vmax && try_restart) {
175 curv = vmin;
176 try_restart = 0;
180 printf("VM: out of virtual address space in vm\n");
182 return NO_MEM;
185 /*===========================================================================*
186 * vm_freepages *
187 *===========================================================================*/
188 void vm_freepages(vir_bytes vir, int pages)
190 assert(!(vir % I386_PAGE_SIZE));
191 extern char _end;
193 if(vir < (vir_bytes) &_end) {
194 printf("VM: not freeing static page\n");
195 return;
198 if(pt_writemap(vmprocess, &vmprocess->vm_pt, vir,
199 MAP_NONE, pages*I386_PAGE_SIZE, 0,
200 WMF_OVERWRITE | WMF_FREE) != OK)
201 panic("vm_freepages: pt_writemap failed");
203 #if SANITYCHECKS
204 /* If SANITYCHECKS are on, flush tlb so accessing freed pages is
205 * always trapped, also if not in tlb.
207 if((sys_vmctl(SELF, VMCTL_FLUSHTLB, 0)) != OK) {
208 panic("VMCTL_FLUSHTLB failed");
210 #endif
213 /*===========================================================================*
214 * vm_getsparepage *
215 *===========================================================================*/
216 static void *vm_getsparepage(phys_bytes *phys)
218 int s;
219 assert(missing_spares >= 0 && missing_spares <= SPAREPAGES);
220 for(s = 0; s < SPAREPAGES; s++) {
221 if(sparepages[s].page) {
222 void *sp;
223 sp = sparepages[s].page;
224 *phys = sparepages[s].phys;
225 sparepages[s].page = NULL;
226 missing_spares++;
227 assert(missing_spares >= 0 && missing_spares <= SPAREPAGES);
228 return sp;
231 return NULL;
234 /*===========================================================================*
235 * vm_checkspares *
236 *===========================================================================*/
237 static void *vm_checkspares(void)
239 int s, n = 0;
240 static int total = 0, worst = 0;
241 assert(missing_spares >= 0 && missing_spares <= SPAREPAGES);
242 for(s = 0; s < SPAREPAGES && missing_spares > 0; s++)
243 if(!sparepages[s].page) {
244 n++;
245 if((sparepages[s].page = vm_allocpage(&sparepages[s].phys,
246 VMP_SPARE))) {
247 missing_spares--;
248 assert(missing_spares >= 0);
249 assert(missing_spares <= SPAREPAGES);
250 } else {
251 printf("VM: warning: couldn't get new spare page\n");
254 if(worst < n) worst = n;
255 total += n;
257 return NULL;
260 static int pt_init_done;
262 /*===========================================================================*
263 * vm_allocpage *
264 *===========================================================================*/
265 void *vm_allocpage(phys_bytes *phys, int reason)
267 /* Allocate a page for use by VM itself. */
268 phys_bytes newpage;
269 vir_bytes loc;
270 pt_t *pt;
271 int r;
272 static int level = 0;
273 void *ret;
275 pt = &vmprocess->vm_pt;
276 assert(reason >= 0 && reason < VMP_CATEGORIES);
278 level++;
280 assert(level >= 1);
281 assert(level <= 2);
283 if((level > 1) || !pt_init_done) {
284 void *s;
285 s=vm_getsparepage(phys);
286 level--;
287 if(!s) {
288 util_stacktrace();
289 printf("VM: warning: out of spare pages\n");
291 return s;
294 /* VM does have a pagetable, so get a page and map it in there.
295 * Where in our virtual address space can we put it?
297 loc = findhole();
298 if(loc == NO_MEM) {
299 level--;
300 printf("VM: vm_allocpage: findhole failed\n");
301 return NULL;
304 /* Allocate page of memory for use by VM. As VM
305 * is trusted, we don't have to pre-clear it.
307 if((newpage = alloc_mem(CLICKSPERPAGE, 0)) == NO_MEM) {
308 level--;
309 printf("VM: vm_allocpage: alloc_mem failed\n");
310 return NULL;
313 *phys = CLICK2ABS(newpage);
315 /* Map this page into our address space. */
316 if((r=pt_writemap(vmprocess, pt, loc, *phys, I386_PAGE_SIZE,
317 I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE, 0)) != OK) {
318 free_mem(newpage, CLICKSPERPAGE);
319 printf("vm_allocpage writemap failed\n");
320 level--;
321 return NULL;
324 if((r=sys_vmctl(SELF, VMCTL_FLUSHTLB, 0)) != OK) {
325 panic("VMCTL_FLUSHTLB failed: %d", r);
328 level--;
330 /* Return user-space-ready pointer to it. */
331 ret = (void *) loc;
333 return ret;
336 /*===========================================================================*
337 * vm_pagelock *
338 *===========================================================================*/
339 void vm_pagelock(void *vir, int lockflag)
341 /* Mark a page allocated by vm_allocpage() unwritable, i.e. only for VM. */
342 vir_bytes m = (vir_bytes) vir;
343 int r;
344 u32_t flags = I386_VM_PRESENT | I386_VM_USER;
345 pt_t *pt;
347 pt = &vmprocess->vm_pt;
349 assert(!(m % I386_PAGE_SIZE));
351 if(!lockflag)
352 flags |= I386_VM_WRITE;
354 /* Update flags. */
355 if((r=pt_writemap(vmprocess, pt, m, 0, I386_PAGE_SIZE,
356 flags, WMF_OVERWRITE | WMF_WRITEFLAGSONLY)) != OK) {
357 panic("vm_lockpage: pt_writemap failed");
360 if((r=sys_vmctl(SELF, VMCTL_FLUSHTLB, 0)) != OK) {
361 panic("VMCTL_FLUSHTLB failed: %d", r);
364 return;
367 /*===========================================================================*
368 * vm_addrok *
369 *===========================================================================*/
370 int vm_addrok(void *vir, int writeflag)
372 pt_t *pt = &vmprocess->vm_pt;
373 int pde, pte;
374 vir_bytes v = (vir_bytes) vir;
376 pde = I386_VM_PDE(v);
377 pte = I386_VM_PTE(v);
379 if(!(pt->pt_dir[pde] & I386_VM_PRESENT)) {
380 printf("addr not ok: missing pde %d\n", pde);
381 return 0;
384 if(writeflag &&
385 !(pt->pt_dir[pde] & I386_VM_WRITE)) {
386 printf("addr not ok: pde %d present but pde unwritable\n", pde);
387 return 0;
390 if(!(pt->pt_pt[pde][pte] & I386_VM_PRESENT)) {
391 printf("addr not ok: missing pde %d / pte %d\n",
392 pde, pte);
393 return 0;
396 if(writeflag &&
397 !(pt->pt_pt[pde][pte] & I386_VM_WRITE)) {
398 printf("addr not ok: pde %d / pte %d present but unwritable\n",
399 pde, pte);
400 return 0;
403 return 1;
406 /*===========================================================================*
407 * pt_ptalloc *
408 *===========================================================================*/
409 static int pt_ptalloc(pt_t *pt, int pde, u32_t flags)
411 /* Allocate a page table and write its address into the page directory. */
412 int i;
413 phys_bytes pt_phys;
415 /* Argument must make sense. */
416 assert(pde >= 0 && pde < I386_VM_DIR_ENTRIES);
417 assert(!(flags & ~(PTF_ALLFLAGS)));
419 /* We don't expect to overwrite page directory entry, nor
420 * storage for the page table.
422 assert(!(pt->pt_dir[pde] & I386_VM_PRESENT));
423 assert(!pt->pt_pt[pde]);
425 /* Get storage for the page table. */
426 if(!(pt->pt_pt[pde] = vm_allocpage(&pt_phys, VMP_PAGETABLE)))
427 return ENOMEM;
429 for(i = 0; i < I386_VM_PT_ENTRIES; i++)
430 pt->pt_pt[pde][i] = 0; /* Empty entry. */
432 /* Make page directory entry.
433 * The PDE is always 'present,' 'writable,' and 'user accessible,'
434 * relying on the PTE for protection.
436 pt->pt_dir[pde] = (pt_phys & I386_VM_ADDR_MASK) | flags
437 | I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE;
439 return OK;
442 /*===========================================================================*
443 * pt_ptalloc_in_range *
444 *===========================================================================*/
445 int pt_ptalloc_in_range(pt_t *pt, vir_bytes start, vir_bytes end,
446 u32_t flags, int verify)
448 /* Allocate all the page tables in the range specified. */
449 int pde, first_pde, last_pde;
451 first_pde = I386_VM_PDE(start);
452 last_pde = I386_VM_PDE(end-1);
453 assert(first_pde >= 0);
454 assert(last_pde < I386_VM_DIR_ENTRIES);
456 /* Scan all page-directory entries in the range. */
457 for(pde = first_pde; pde <= last_pde; pde++) {
458 assert(!(pt->pt_dir[pde] & I386_VM_BIGPAGE));
459 if(!(pt->pt_dir[pde] & I386_VM_PRESENT)) {
460 int r;
461 if(verify) {
462 printf("pt_ptalloc_in_range: no pde %d\n", pde);
463 return EFAULT;
465 assert(!pt->pt_dir[pde]);
466 if((r=pt_ptalloc(pt, pde, flags)) != OK) {
467 /* Couldn't do (complete) mapping.
468 * Don't bother freeing any previously
469 * allocated page tables, they're
470 * still writable, don't point to nonsense,
471 * and pt_ptalloc leaves the directory
472 * and other data in a consistent state.
474 printf("pt_ptalloc_in_range: pt_ptalloc failed\n");
475 return r;
478 assert(pt->pt_dir[pde]);
479 assert(pt->pt_dir[pde] & I386_VM_PRESENT);
482 return OK;
485 static char *ptestr(u32_t pte)
487 #define FLAG(constant, name) { \
488 if(pte & (constant)) { strcat(str, name); strcat(str, " "); } \
491 static char str[30];
492 if(!(pte & I386_VM_PRESENT)) {
493 return "not present";
495 str[0] = '\0';
496 FLAG(I386_VM_WRITE, "W");
497 FLAG(I386_VM_USER, "U");
498 FLAG(I386_VM_PWT, "PWT");
499 FLAG(I386_VM_PCD, "PCD");
500 FLAG(I386_VM_ACC, "ACC");
501 FLAG(I386_VM_DIRTY, "DIRTY");
502 FLAG(I386_VM_PS, "PS");
503 FLAG(I386_VM_GLOBAL, "G");
504 FLAG(I386_VM_PTAVAIL1, "AV1");
505 FLAG(I386_VM_PTAVAIL2, "AV2");
506 FLAG(I386_VM_PTAVAIL3, "AV3");
508 return str;
511 /*===========================================================================*
512 * pt_map_in_range *
513 *===========================================================================*/
514 int pt_map_in_range(struct vmproc *src_vmp, struct vmproc *dst_vmp,
515 vir_bytes start, vir_bytes end)
517 /* Transfer all the mappings from the pt of the source process to the pt of
518 * the destination process in the range specified.
520 int pde, pte;
521 vir_bytes viraddr;
522 pt_t *pt, *dst_pt;
524 pt = &src_vmp->vm_pt;
525 dst_pt = &dst_vmp->vm_pt;
527 end = end ? end : VM_DATATOP;
528 assert(start % I386_PAGE_SIZE == 0);
529 assert(end % I386_PAGE_SIZE == 0);
530 assert(start <= end);
531 assert(I386_VM_PDE(end) < I386_VM_DIR_ENTRIES);
533 #if LU_DEBUG
534 printf("VM: pt_map_in_range: src = %d, dst = %d\n",
535 src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
536 printf("VM: pt_map_in_range: transferring from 0x%08x (pde %d pte %d) to 0x%08x (pde %d pte %d)\n",
537 start, I386_VM_PDE(start), I386_VM_PTE(start),
538 end, I386_VM_PDE(end), I386_VM_PTE(end));
539 #endif
541 /* Scan all page-table entries in the range. */
542 for(viraddr = start; viraddr <= end; viraddr += I386_PAGE_SIZE) {
543 pde = I386_VM_PDE(viraddr);
544 if(!(pt->pt_dir[pde] & I386_VM_PRESENT)) {
545 if(viraddr == VM_DATATOP) break;
546 continue;
548 pte = I386_VM_PTE(viraddr);
549 if(!(pt->pt_pt[pde][pte] & I386_VM_PRESENT)) {
550 if(viraddr == VM_DATATOP) break;
551 continue;
554 /* Transfer the mapping. */
555 dst_pt->pt_pt[pde][pte] = pt->pt_pt[pde][pte];
557 if(viraddr == VM_DATATOP) break;
560 return OK;
563 /*===========================================================================*
564 * pt_ptmap *
565 *===========================================================================*/
566 int pt_ptmap(struct vmproc *src_vmp, struct vmproc *dst_vmp)
568 /* Transfer mappings to page dir and page tables from source process and
569 * destination process. Make sure all the mappings are above the stack, not
570 * to corrupt valid mappings in the data segment of the destination process.
572 int pde, r;
573 phys_bytes physaddr;
574 vir_bytes viraddr;
575 pt_t *pt;
577 pt = &src_vmp->vm_pt;
579 #if LU_DEBUG
580 printf("VM: pt_ptmap: src = %d, dst = %d\n",
581 src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
582 #endif
584 /* Transfer mapping to the page directory. */
585 viraddr = (vir_bytes) pt->pt_dir;
586 physaddr = pt->pt_dir_phys & I386_VM_ADDR_MASK;
587 if((r=pt_writemap(dst_vmp, &dst_vmp->vm_pt, viraddr, physaddr, I386_PAGE_SIZE,
588 I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE,
589 WMF_OVERWRITE)) != OK) {
590 return r;
592 #if LU_DEBUG
593 printf("VM: pt_ptmap: transferred mapping to page dir: 0x%08x (0x%08x)\n",
594 viraddr, physaddr);
595 #endif
597 /* Scan all non-reserved page-directory entries. */
598 for(pde=0; pde < I386_VM_DIR_ENTRIES; pde++) {
599 if(!(pt->pt_dir[pde] & I386_VM_PRESENT)) {
600 continue;
603 /* Transfer mapping to the page table. */
604 viraddr = (vir_bytes) pt->pt_pt[pde];
605 physaddr = pt->pt_dir[pde] & I386_VM_ADDR_MASK;
606 if((r=pt_writemap(dst_vmp, &dst_vmp->vm_pt, viraddr, physaddr, I386_PAGE_SIZE,
607 I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE,
608 WMF_OVERWRITE)) != OK) {
609 return r;
613 return OK;
616 void pt_clearmapcache(void)
618 /* Make sure kernel will invalidate tlb when using current
619 * pagetable (i.e. vm's) to make new mappings before new cr3
620 * is loaded.
622 if(sys_vmctl(SELF, VMCTL_CLEARMAPCACHE, 0) != OK)
623 panic("VMCTL_CLEARMAPCACHE failed");
626 /*===========================================================================*
627 * pt_writemap *
628 *===========================================================================*/
629 int pt_writemap(struct vmproc * vmp,
630 pt_t *pt,
631 vir_bytes v,
632 phys_bytes physaddr,
633 size_t bytes,
634 u32_t flags,
635 u32_t writemapflags)
637 /* Write mapping into page table. Allocate a new page table if necessary. */
638 /* Page directory and table entries for this virtual address. */
639 int p, pages;
640 int verify = 0;
641 int ret = OK;
643 #ifdef CONFIG_SMP
644 int vminhibit_clear = 0;
645 /* FIXME
646 * don't do it everytime, stop the process only on the first change and
647 * resume the execution on the last change. Do in a wrapper of this
648 * function
650 if (vmp && vmp->vm_endpoint != NONE && vmp->vm_endpoint != VM_PROC_NR &&
651 !(vmp->vm_flags & VMF_EXITING)) {
652 sys_vmctl(vmp->vm_endpoint, VMCTL_VMINHIBIT_SET, 0);
653 vminhibit_clear = 1;
655 #endif
657 if(writemapflags & WMF_VERIFY)
658 verify = 1;
660 assert(!(bytes % I386_PAGE_SIZE));
661 assert(!(flags & ~(PTF_ALLFLAGS)));
663 pages = bytes / I386_PAGE_SIZE;
665 /* MAP_NONE means to clear the mapping. It doesn't matter
666 * what's actually written into the PTE if I386_VM_PRESENT
667 * isn't on, so we can just write MAP_NONE into it.
669 assert(physaddr == MAP_NONE || (flags & I386_VM_PRESENT));
670 assert(physaddr != MAP_NONE || !flags);
672 /* First make sure all the necessary page tables are allocated,
673 * before we start writing in any of them, because it's a pain
674 * to undo our work properly.
676 ret = pt_ptalloc_in_range(pt, v, v + I386_PAGE_SIZE*pages, flags, verify);
677 if(ret != OK) {
678 printf("VM: writemap: pt_ptalloc_in_range failed\n");
679 goto resume_exit;
682 /* Now write in them. */
683 for(p = 0; p < pages; p++) {
684 u32_t entry;
685 int pde = I386_VM_PDE(v);
686 int pte = I386_VM_PTE(v);
688 if(!v) { printf("VM: warning: making zero page for %d\n",
689 vmp->vm_endpoint); }
691 assert(!(v % I386_PAGE_SIZE));
692 assert(pte >= 0 && pte < I386_VM_PT_ENTRIES);
693 assert(pde >= 0 && pde < I386_VM_DIR_ENTRIES);
695 /* Page table has to be there. */
696 assert(pt->pt_dir[pde] & I386_VM_PRESENT);
698 /* We do not expect it to be a bigpage. */
699 assert(!(pt->pt_dir[pde] & I386_VM_BIGPAGE));
701 /* Make sure page directory entry for this page table
702 * is marked present and page table entry is available.
704 assert(pt->pt_pt[pde]);
706 #if SANITYCHECKS
707 /* We don't expect to overwrite a page. */
708 if(!(writemapflags & (WMF_OVERWRITE|WMF_VERIFY)))
709 assert(!(pt->pt_pt[pde][pte] & I386_VM_PRESENT));
710 #endif
711 if(writemapflags & (WMF_WRITEFLAGSONLY|WMF_FREE)) {
712 physaddr = pt->pt_pt[pde][pte] & I386_VM_ADDR_MASK;
715 if(writemapflags & WMF_FREE) {
716 free_mem(ABS2CLICK(physaddr), 1);
719 /* Entry we will write. */
720 entry = (physaddr & I386_VM_ADDR_MASK) | flags;
722 if(verify) {
723 u32_t maskedentry;
724 maskedentry = pt->pt_pt[pde][pte];
725 maskedentry &= ~(I386_VM_ACC|I386_VM_DIRTY);
726 /* Verify pagetable entry. */
727 if(entry & I386_VM_WRITE) {
728 /* If we expect a writable page, allow a readonly page. */
729 maskedentry |= I386_VM_WRITE;
731 if(maskedentry != entry) {
732 printf("pt_writemap: mismatch: ");
733 if((entry & I386_VM_ADDR_MASK) !=
734 (maskedentry & I386_VM_ADDR_MASK)) {
735 printf("pt_writemap: physaddr mismatch (0x%lx, 0x%lx); ",
736 (long)entry, (long)maskedentry);
737 } else printf("phys ok; ");
738 printf(" flags: found %s; ",
739 ptestr(pt->pt_pt[pde][pte]));
740 printf(" masked %s; ",
741 ptestr(maskedentry));
742 printf(" expected %s\n", ptestr(entry));
743 ret = EFAULT;
744 goto resume_exit;
746 } else {
747 /* Write pagetable entry. */
748 pt->pt_pt[pde][pte] = entry;
751 physaddr += I386_PAGE_SIZE;
752 v += I386_PAGE_SIZE;
755 resume_exit:
757 #ifdef CONFIG_SMP
758 if (vminhibit_clear) {
759 assert(vmp && vmp->vm_endpoint != NONE && vmp->vm_endpoint != VM_PROC_NR &&
760 !(vmp->vm_flags & VMF_EXITING));
761 sys_vmctl(vmp->vm_endpoint, VMCTL_VMINHIBIT_CLEAR, 0);
763 #endif
765 return ret;
768 /*===========================================================================*
769 * pt_checkrange *
770 *===========================================================================*/
771 int pt_checkrange(pt_t *pt, vir_bytes v, size_t bytes,
772 int write)
774 int p, pages;
776 assert(!(bytes % I386_PAGE_SIZE));
778 pages = bytes / I386_PAGE_SIZE;
780 for(p = 0; p < pages; p++) {
781 int pde = I386_VM_PDE(v);
782 int pte = I386_VM_PTE(v);
784 assert(!(v % I386_PAGE_SIZE));
785 assert(pte >= 0 && pte < I386_VM_PT_ENTRIES);
786 assert(pde >= 0 && pde < I386_VM_DIR_ENTRIES);
788 /* Page table has to be there. */
789 if(!(pt->pt_dir[pde] & I386_VM_PRESENT))
790 return EFAULT;
792 /* Make sure page directory entry for this page table
793 * is marked present and page table entry is available.
795 assert((pt->pt_dir[pde] & I386_VM_PRESENT) && pt->pt_pt[pde]);
797 if(!(pt->pt_pt[pde][pte] & I386_VM_PRESENT)) {
798 return EFAULT;
801 if(write && !(pt->pt_pt[pde][pte] & I386_VM_WRITE)) {
802 return EFAULT;
805 v += I386_PAGE_SIZE;
808 return OK;
811 /*===========================================================================*
812 * pt_new *
813 *===========================================================================*/
814 int pt_new(pt_t *pt)
816 /* Allocate a pagetable root. On i386, allocate a page-aligned page directory
817 * and set them to 0 (indicating no page tables are allocated). Lookup
818 * its physical address as we'll need that in the future. Verify it's
819 * page-aligned.
821 int i;
823 /* Don't ever re-allocate/re-move a certain process slot's
824 * page directory once it's been created. This is a fraction
825 * faster, but also avoids having to invalidate the page
826 * mappings from in-kernel page tables pointing to
827 * the page directories (the page_directories data).
829 if(!pt->pt_dir &&
830 !(pt->pt_dir = vm_allocpage((phys_bytes *)&pt->pt_dir_phys, VMP_PAGEDIR))) {
831 return ENOMEM;
834 for(i = 0; i < I386_VM_DIR_ENTRIES; i++) {
835 pt->pt_dir[i] = 0; /* invalid entry (I386_VM_PRESENT bit = 0) */
836 pt->pt_pt[i] = NULL;
839 /* Where to start looking for free virtual address space? */
840 pt->pt_virtop = 0;
842 /* Map in kernel. */
843 if(pt_mapkernel(pt) != OK)
844 panic("pt_new: pt_mapkernel failed");
846 return OK;
849 static int freepde(void)
851 int p = kernel_boot_info.freepde_start++;
852 assert(kernel_boot_info.freepde_start < I386_VM_DIR_ENTRIES);
853 return p;
856 /*===========================================================================*
857 * pt_init *
858 *===========================================================================*/
859 void pt_init(void)
861 pt_t *newpt;
862 int s, r, p;
863 int global_bit_ok = 0;
864 vir_bytes sparepages_mem;
865 static u32_t currentpagedir[I386_VM_DIR_ENTRIES];
866 int m = kernel_boot_info.kern_mod;
867 u32_t mypdbr; /* Page Directory Base Register (cr3) value */
869 /* Find what the physical location of the kernel is. */
870 assert(m >= 0);
871 assert(m < kernel_boot_info.mods_with_kernel);
872 assert(kernel_boot_info.mods_with_kernel < MULTIBOOT_MAX_MODS);
873 kern_mb_mod = &kernel_boot_info.module_list[m];
874 kern_size = kern_mb_mod->mod_end - kern_mb_mod->mod_start;
875 assert(!(kern_mb_mod->mod_start % I386_BIG_PAGE_SIZE));
876 assert(!(kernel_boot_info.vir_kern_start % I386_BIG_PAGE_SIZE));
877 kern_start_pde = kernel_boot_info.vir_kern_start / I386_BIG_PAGE_SIZE;
879 /* Get ourselves spare pages. */
880 sparepages_mem = (vir_bytes) static_sparepages;
881 assert(!(sparepages_mem % I386_PAGE_SIZE));
883 /* Spare pages are used to allocate memory before VM has its own page
884 * table that things (i.e. arbitrary physical memory) can be mapped into.
885 * We get it by pre-allocating it in our bss (allocated and mapped in by
886 * the kernel) in static_sparepages. We also need the physical addresses
887 * though; we look them up now so they are ready for use.
890 missing_spares = 0;
891 assert(STATIC_SPAREPAGES < SPAREPAGES);
892 for(s = 0; s < SPAREPAGES; s++) {
893 vir_bytes v = (sparepages_mem + s*I386_PAGE_SIZE);;
894 phys_bytes ph;
895 if((r=sys_umap(SELF, VM_D, (vir_bytes) v,
896 I386_PAGE_SIZE*SPAREPAGES, &ph)) != OK)
897 panic("pt_init: sys_umap failed: %d", r);
898 if(s >= STATIC_SPAREPAGES) {
899 sparepages[s].page = NULL;
900 missing_spares++;
901 continue;
903 sparepages[s].page = (void *) v;
904 sparepages[s].phys = ph;
907 /* global bit and 4MB pages available? */
908 global_bit_ok = _cpufeature(_CPUF_I386_PGE);
909 bigpage_ok = _cpufeature(_CPUF_I386_PSE);
911 /* Set bit for PTE's and PDE's if available. */
912 if(global_bit_ok)
913 global_bit = I386_VM_GLOBAL;
915 /* Allocate us a page table in which to remember page directory
916 * pointers.
918 if(!(page_directories = vm_allocpage(&page_directories_phys,
919 VMP_PAGETABLE)))
920 panic("no virt addr for vm mappings");
922 memset(page_directories, 0, I386_PAGE_SIZE);
924 /* Now reserve another pde for kernel's own mappings. */
926 int kernmap_pde;
927 phys_bytes addr, len;
928 int flags, index = 0;
929 u32_t offset = 0;
931 kernmap_pde = freepde();
932 offset = kernmap_pde * I386_BIG_PAGE_SIZE;
934 while(sys_vmctl_get_mapping(index, &addr, &len,
935 &flags) == OK) {
936 vir_bytes vir;
937 if(index >= MAX_KERNMAPPINGS)
938 panic("VM: too many kernel mappings: %d", index);
939 kern_mappings[index].phys_addr = addr;
940 kern_mappings[index].len = len;
941 kern_mappings[index].flags = flags;
942 kern_mappings[index].vir_addr = offset;
943 kern_mappings[index].flags =
944 I386_VM_PRESENT;
945 if(flags & VMMF_UNCACHED)
946 kern_mappings[index].flags |= PTF_NOCACHE;
947 if(flags & VMMF_USER)
948 kern_mappings[index].flags |= I386_VM_USER;
949 if(flags & VMMF_WRITE)
950 kern_mappings[index].flags |= I386_VM_WRITE;
951 if(flags & VMMF_GLO)
952 kern_mappings[index].flags |= I386_VM_GLOBAL;
953 if(addr % I386_PAGE_SIZE)
954 panic("VM: addr unaligned: %d", addr);
955 if(len % I386_PAGE_SIZE)
956 panic("VM: len unaligned: %d", len);
957 vir = offset;
958 if(sys_vmctl_reply_mapping(index, vir) != OK)
959 panic("VM: reply failed");
960 offset += len;
961 index++;
962 kernmappings++;
966 /* Find a PDE below processes available for mapping in the
967 * page directories.
969 pagedir_pde = freepde();
970 pagedir_pde_val = (page_directories_phys & I386_VM_ADDR_MASK) |
971 I386_VM_PRESENT | I386_VM_WRITE;
973 /* Allright. Now. We have to make our own page directory and page tables,
974 * that the kernel has already set up, accessible to us. It's easier to
975 * understand if we just copy all the required pages (i.e. page directory
976 * and page tables), and set up the pointers as if VM had done it itself.
978 * This allocation will happen without using any page table, and just
979 * uses spare pages.
981 newpt = &vmprocess->vm_pt;
982 if(pt_new(newpt) != OK)
983 panic("vm pt_new failed");
985 /* Get our current pagedir so we can see it. */
986 if(sys_vmctl_get_pdbr(SELF, &mypdbr) != OK)
987 panic("VM: sys_vmctl_get_pdbr failed");
988 if(sys_vircopy(NONE, mypdbr, SELF,
989 (vir_bytes) currentpagedir, I386_PAGE_SIZE) != OK)
990 panic("VM: sys_vircopy failed");
992 /* We have mapped in kernel ourselves; now copy mappings for VM
993 * that kernel made, including allocations for BSS. Skip identity
994 * mapping bits; just map in VM.
996 for(p = 0; p < I386_VM_DIR_ENTRIES; p++) {
997 u32_t entry = currentpagedir[p];
998 phys_bytes ptaddr_kern, ptaddr_us;
1000 /* BIGPAGEs are kernel mapping (do ourselves) or boot
1001 * identity mapping (don't want).
1003 if(!(entry & I386_VM_PRESENT)) continue;
1004 if((entry & I386_VM_BIGPAGE)) continue;
1006 if(pt_ptalloc(newpt, p, 0) != OK)
1007 panic("pt_ptalloc failed");
1008 assert(newpt->pt_dir[p] & I386_VM_PRESENT);
1010 ptaddr_kern = entry & I386_VM_ADDR_MASK;
1011 ptaddr_us = newpt->pt_dir[p] & I386_VM_ADDR_MASK;
1013 /* Copy kernel-initialized pagetable contents into our
1014 * normally accessible pagetable.
1016 if(sys_abscopy(ptaddr_kern, ptaddr_us, I386_PAGE_SIZE) != OK)
1017 panic("pt_init: abscopy failed");
1020 /* Inform kernel vm has a newly built page table. */
1021 assert(vmproc[VM_PROC_NR].vm_endpoint == VM_PROC_NR);
1022 pt_bind(newpt, &vmproc[VM_PROC_NR]);
1024 pt_init_done = 1;
1026 /* All OK. */
1027 return;
1030 /*===========================================================================*
1031 * pt_bind *
1032 *===========================================================================*/
1033 int pt_bind(pt_t *pt, struct vmproc *who)
1035 int slot;
1036 u32_t phys;
1037 void *pdes;
1039 /* Basic sanity checks. */
1040 assert(who);
1041 assert(who->vm_flags & VMF_INUSE);
1042 assert(pt);
1044 assert(pagedir_pde >= 0);
1046 slot = who->vm_slot;
1047 assert(slot >= 0);
1048 assert(slot < ELEMENTS(vmproc));
1049 assert(slot < I386_VM_PT_ENTRIES);
1051 phys = pt->pt_dir_phys & I386_VM_ADDR_MASK;
1052 assert(pt->pt_dir_phys == phys);
1054 /* Update "page directory pagetable." */
1055 page_directories[slot] = phys | I386_VM_PRESENT|I386_VM_WRITE;
1057 /* This is where the PDE's will be visible to the kernel
1058 * in its address space.
1060 pdes = (void *) (pagedir_pde*I386_BIG_PAGE_SIZE +
1061 slot * I386_PAGE_SIZE);
1063 #if 0
1064 printf("VM: slot %d endpoint %d has pde val 0x%lx at kernel address 0x%lx\n",
1065 slot, who->vm_endpoint, page_directories[slot], pdes);
1066 #endif
1067 /* Tell kernel about new page table root. */
1068 return sys_vmctl_set_addrspace(who->vm_endpoint, pt->pt_dir_phys, pdes);
1071 /*===========================================================================*
1072 * pt_free *
1073 *===========================================================================*/
1074 void pt_free(pt_t *pt)
1076 /* Free memory associated with this pagetable. */
1077 int i;
1079 for(i = 0; i < I386_VM_DIR_ENTRIES; i++)
1080 if(pt->pt_pt[i])
1081 vm_freepages((vir_bytes) pt->pt_pt[i], 1);
1083 return;
1086 /*===========================================================================*
1087 * pt_mapkernel *
1088 *===========================================================================*/
1089 int pt_mapkernel(pt_t *pt)
1091 int i;
1092 int kern_pde = kern_start_pde;
1093 phys_bytes addr, mapped = 0;
1095 /* Any i386 page table needs to map in the kernel address space. */
1096 assert(bigpage_ok);
1097 assert(pagedir_pde >= 0);
1098 assert(kern_pde >= 0);
1100 /* pt_init() has made sure this is ok. */
1101 addr = kern_mb_mod->mod_start;
1103 /* Actually mapping in kernel */
1104 while(mapped < kern_size) {
1105 pt->pt_dir[kern_pde] = addr | I386_VM_PRESENT |
1106 I386_VM_BIGPAGE | I386_VM_WRITE | global_bit;
1107 kern_pde++;
1108 mapped += I386_BIG_PAGE_SIZE;
1109 addr += I386_BIG_PAGE_SIZE;
1112 /* Kernel also wants to know about all page directories. */
1113 assert(pagedir_pde > kern_pde);
1114 pt->pt_dir[pagedir_pde] = pagedir_pde_val;
1116 /* Kernel also wants various mappings of its own. */
1117 for(i = 0; i < kernmappings; i++) {
1118 if(pt_writemap(NULL, pt,
1119 kern_mappings[i].vir_addr,
1120 kern_mappings[i].phys_addr,
1121 kern_mappings[i].len,
1122 kern_mappings[i].flags, 0) != OK) {
1123 panic("pt_mapkernel: pt_writemap failed");
1127 return OK;
1130 /*===========================================================================*
1131 * pt_cycle *
1132 *===========================================================================*/
1133 void pt_cycle(void)
1135 vm_checkspares();