vm, kernel, top: report memory usage of vm, kernel
[minix.git] / servers / vm / arch / i386 / pagetable.c
blob1e5f6e8e4304756261ac9a194c1a343c8d0e7ecf
2 #define _SYSTEM 1
3 #define _POSIX_SOURCE 1
5 #include <minix/callnr.h>
6 #include <minix/com.h>
7 #include <minix/config.h>
8 #include <minix/const.h>
9 #include <minix/ds.h>
10 #include <minix/endpoint.h>
11 #include <minix/keymap.h>
12 #include <minix/minlib.h>
13 #include <minix/type.h>
14 #include <minix/ipc.h>
15 #include <minix/sysutil.h>
16 #include <minix/syslib.h>
17 #include <minix/safecopies.h>
18 #include <minix/cpufeature.h>
19 #include <minix/bitmap.h>
20 #include <minix/debug.h>
22 #include <errno.h>
23 #include <stdlib.h>
24 #include <assert.h>
25 #include <string.h>
26 #include <env.h>
27 #include <stdio.h>
28 #include <fcntl.h>
29 #include <stdlib.h>
31 #include "proto.h"
32 #include "glo.h"
33 #include "util.h"
34 #include "vm.h"
35 #include "sanitycheck.h"
37 #include "memory.h"
39 static int vm_self_pages;
41 /* PDE used to map in kernel, kernel physical address. */
42 static int pagedir_pde = -1;
43 static u32_t global_bit = 0, pagedir_pde_val;
45 static multiboot_module_t *kern_mb_mod = NULL;
46 static size_t kern_size = 0;
47 static int kern_start_pde = -1;
49 /* 4MB page size available in hardware? */
50 static int bigpage_ok = 0;
52 /* Our process table entry. */
53 struct vmproc *vmprocess = &vmproc[VM_PROC_NR];
55 /* Spare memory, ready to go after initialization, to avoid a
56 * circular dependency on allocating memory and writing it into VM's
57 * page table.
59 #if SANITYCHECKS
60 #define SPAREPAGES 100
61 #define STATIC_SPAREPAGES 90
62 #else
63 #define SPAREPAGES 15
64 #define STATIC_SPAREPAGES 10
65 #endif
66 int missing_spares = SPAREPAGES;
67 static struct {
68 void *page;
69 phys_bytes phys;
70 } sparepages[SPAREPAGES];
72 extern char _end;
73 #define is_staticaddr(v) ((vir_bytes) (v) < (vir_bytes) &_end)
75 #define MAX_KERNMAPPINGS 10
76 static struct {
77 phys_bytes phys_addr; /* Physical addr. */
78 phys_bytes len; /* Length in bytes. */
79 vir_bytes vir_addr; /* Offset in page table. */
80 int flags;
81 } kern_mappings[MAX_KERNMAPPINGS];
82 int kernmappings = 0;
84 /* Clicks must be pages, as
85 * - they must be page aligned to map them
86 * - they must be a multiple of the page size
87 * - it's inconvenient to have them bigger than pages, because we often want
88 * just one page
89 * May as well require them to be equal then.
91 #if CLICK_SIZE != I386_PAGE_SIZE
92 #error CLICK_SIZE must be page size.
93 #endif
95 /* Page table that contains pointers to all page directories. */
96 phys_bytes page_directories_phys;
97 u32_t *page_directories = NULL;
99 static char static_sparepages[I386_PAGE_SIZE*STATIC_SPAREPAGES]
100 __aligned(I386_PAGE_SIZE);
102 #if SANITYCHECKS
103 /*===========================================================================*
104 * pt_sanitycheck *
105 *===========================================================================*/
106 void pt_sanitycheck(pt_t *pt, char *file, int line)
108 /* Basic pt sanity check. */
109 int slot;
111 MYASSERT(pt);
112 MYASSERT(pt->pt_dir);
113 MYASSERT(pt->pt_dir_phys);
115 for(slot = 0; slot < ELEMENTS(vmproc); slot++) {
116 if(pt == &vmproc[slot].vm_pt)
117 break;
120 if(slot >= ELEMENTS(vmproc)) {
121 panic("pt_sanitycheck: passed pt not in any proc");
124 MYASSERT(usedpages_add(pt->pt_dir_phys, I386_PAGE_SIZE) == OK);
126 #endif
128 /*===========================================================================*
129 * findhole *
130 *===========================================================================*/
131 static u32_t findhole(void)
133 /* Find a space in the virtual address space of VM. */
134 u32_t curv;
135 int pde = 0, try_restart;
136 static u32_t lastv = 0;
137 pt_t *pt = &vmprocess->vm_pt;
138 vir_bytes vmin, vmax;
140 vmin = (vir_bytes) (&_end) & I386_VM_ADDR_MASK; /* marks end of VM BSS */
141 vmax = VM_STACKTOP;
143 /* Input sanity check. */
144 assert(vmin + I386_PAGE_SIZE >= vmin);
145 assert(vmax >= vmin + I386_PAGE_SIZE);
146 assert((vmin % I386_PAGE_SIZE) == 0);
147 assert((vmax % I386_PAGE_SIZE) == 0);
149 #if SANITYCHECKS
150 curv = ((u32_t) random()) % ((vmax - vmin)/I386_PAGE_SIZE);
151 curv *= I386_PAGE_SIZE;
152 curv += vmin;
153 #else
154 curv = lastv;
155 if(curv < vmin || curv >= vmax)
156 curv = vmin;
157 #endif
158 try_restart = 1;
160 /* Start looking for a free page starting at vmin. */
161 while(curv < vmax) {
162 int pte;
164 assert(curv >= vmin);
165 assert(curv < vmax);
167 pde = I386_VM_PDE(curv);
168 pte = I386_VM_PTE(curv);
170 if(!(pt->pt_dir[pde] & I386_VM_PRESENT) ||
171 !(pt->pt_pt[pde][pte] & I386_VM_PRESENT)) {
172 lastv = curv;
173 return curv;
176 curv+=I386_PAGE_SIZE;
178 if(curv >= vmax && try_restart) {
179 curv = vmin;
180 try_restart = 0;
184 printf("VM: out of virtual address space in vm\n");
186 return NO_MEM;
189 /*===========================================================================*
190 * vm_freepages *
191 *===========================================================================*/
192 void vm_freepages(vir_bytes vir, int pages)
194 assert(!(vir % I386_PAGE_SIZE));
196 if(is_staticaddr(vir)) {
197 printf("VM: not freeing static page\n");
198 return;
201 if(pt_writemap(vmprocess, &vmprocess->vm_pt, vir,
202 MAP_NONE, pages*I386_PAGE_SIZE, 0,
203 WMF_OVERWRITE | WMF_FREE) != OK)
204 panic("vm_freepages: pt_writemap failed");
206 vm_self_pages--;
208 #if SANITYCHECKS
209 /* If SANITYCHECKS are on, flush tlb so accessing freed pages is
210 * always trapped, also if not in tlb.
212 if((sys_vmctl(SELF, VMCTL_FLUSHTLB, 0)) != OK) {
213 panic("VMCTL_FLUSHTLB failed");
215 #endif
218 /*===========================================================================*
219 * vm_getsparepage *
220 *===========================================================================*/
221 static void *vm_getsparepage(phys_bytes *phys)
223 int s;
224 assert(missing_spares >= 0 && missing_spares <= SPAREPAGES);
225 for(s = 0; s < SPAREPAGES; s++) {
226 if(sparepages[s].page) {
227 void *sp;
228 sp = sparepages[s].page;
229 *phys = sparepages[s].phys;
230 sparepages[s].page = NULL;
231 missing_spares++;
232 assert(missing_spares >= 0 && missing_spares <= SPAREPAGES);
233 return sp;
236 return NULL;
239 /*===========================================================================*
240 * vm_checkspares *
241 *===========================================================================*/
242 static void *vm_checkspares(void)
244 int s, n = 0;
245 static int total = 0, worst = 0;
246 assert(missing_spares >= 0 && missing_spares <= SPAREPAGES);
247 for(s = 0; s < SPAREPAGES && missing_spares > 0; s++)
248 if(!sparepages[s].page) {
249 n++;
250 if((sparepages[s].page = vm_allocpage(&sparepages[s].phys,
251 VMP_SPARE))) {
252 missing_spares--;
253 assert(missing_spares >= 0);
254 assert(missing_spares <= SPAREPAGES);
255 } else {
256 printf("VM: warning: couldn't get new spare page\n");
259 if(worst < n) worst = n;
260 total += n;
262 return NULL;
265 static int pt_init_done;
267 /*===========================================================================*
268 * vm_allocpage *
269 *===========================================================================*/
270 void *vm_allocpage(phys_bytes *phys, int reason)
272 /* Allocate a page for use by VM itself. */
273 phys_bytes newpage;
274 vir_bytes loc;
275 pt_t *pt;
276 int r;
277 static int level = 0;
278 void *ret;
280 pt = &vmprocess->vm_pt;
281 assert(reason >= 0 && reason < VMP_CATEGORIES);
283 level++;
285 assert(level >= 1);
286 assert(level <= 2);
288 if((level > 1) || !pt_init_done) {
289 void *s;
290 s=vm_getsparepage(phys);
291 level--;
292 if(!s) {
293 util_stacktrace();
294 printf("VM: warning: out of spare pages\n");
296 if(!is_staticaddr(s)) vm_self_pages++;
297 return s;
300 /* VM does have a pagetable, so get a page and map it in there.
301 * Where in our virtual address space can we put it?
303 loc = findhole();
304 if(loc == NO_MEM) {
305 level--;
306 printf("VM: vm_allocpage: findhole failed\n");
307 return NULL;
310 /* Allocate page of memory for use by VM. As VM
311 * is trusted, we don't have to pre-clear it.
313 if((newpage = alloc_mem(CLICKSPERPAGE, 0)) == NO_MEM) {
314 level--;
315 printf("VM: vm_allocpage: alloc_mem failed\n");
316 return NULL;
319 *phys = CLICK2ABS(newpage);
321 /* Map this page into our address space. */
322 if((r=pt_writemap(vmprocess, pt, loc, *phys, I386_PAGE_SIZE,
323 I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE, 0)) != OK) {
324 free_mem(newpage, CLICKSPERPAGE);
325 printf("vm_allocpage writemap failed\n");
326 level--;
327 return NULL;
330 if((r=sys_vmctl(SELF, VMCTL_FLUSHTLB, 0)) != OK) {
331 panic("VMCTL_FLUSHTLB failed: %d", r);
334 level--;
336 /* Return user-space-ready pointer to it. */
337 ret = (void *) loc;
339 vm_self_pages++;
340 return ret;
343 /*===========================================================================*
344 * vm_pagelock *
345 *===========================================================================*/
346 void vm_pagelock(void *vir, int lockflag)
348 /* Mark a page allocated by vm_allocpage() unwritable, i.e. only for VM. */
349 vir_bytes m = (vir_bytes) vir;
350 int r;
351 u32_t flags = I386_VM_PRESENT | I386_VM_USER;
352 pt_t *pt;
354 pt = &vmprocess->vm_pt;
356 assert(!(m % I386_PAGE_SIZE));
358 if(!lockflag)
359 flags |= I386_VM_WRITE;
361 /* Update flags. */
362 if((r=pt_writemap(vmprocess, pt, m, 0, I386_PAGE_SIZE,
363 flags, WMF_OVERWRITE | WMF_WRITEFLAGSONLY)) != OK) {
364 panic("vm_lockpage: pt_writemap failed");
367 if((r=sys_vmctl(SELF, VMCTL_FLUSHTLB, 0)) != OK) {
368 panic("VMCTL_FLUSHTLB failed: %d", r);
371 return;
374 /*===========================================================================*
375 * vm_addrok *
376 *===========================================================================*/
377 int vm_addrok(void *vir, int writeflag)
379 pt_t *pt = &vmprocess->vm_pt;
380 int pde, pte;
381 vir_bytes v = (vir_bytes) vir;
383 pde = I386_VM_PDE(v);
384 pte = I386_VM_PTE(v);
386 if(!(pt->pt_dir[pde] & I386_VM_PRESENT)) {
387 printf("addr not ok: missing pde %d\n", pde);
388 return 0;
391 if(writeflag &&
392 !(pt->pt_dir[pde] & I386_VM_WRITE)) {
393 printf("addr not ok: pde %d present but pde unwritable\n", pde);
394 return 0;
397 if(!(pt->pt_pt[pde][pte] & I386_VM_PRESENT)) {
398 printf("addr not ok: missing pde %d / pte %d\n",
399 pde, pte);
400 return 0;
403 if(writeflag &&
404 !(pt->pt_pt[pde][pte] & I386_VM_WRITE)) {
405 printf("addr not ok: pde %d / pte %d present but unwritable\n",
406 pde, pte);
407 return 0;
410 return 1;
413 /*===========================================================================*
414 * pt_ptalloc *
415 *===========================================================================*/
416 static int pt_ptalloc(pt_t *pt, int pde, u32_t flags)
418 /* Allocate a page table and write its address into the page directory. */
419 int i;
420 phys_bytes pt_phys;
422 /* Argument must make sense. */
423 assert(pde >= 0 && pde < I386_VM_DIR_ENTRIES);
424 assert(!(flags & ~(PTF_ALLFLAGS)));
426 /* We don't expect to overwrite page directory entry, nor
427 * storage for the page table.
429 assert(!(pt->pt_dir[pde] & I386_VM_PRESENT));
430 assert(!pt->pt_pt[pde]);
432 /* Get storage for the page table. */
433 if(!(pt->pt_pt[pde] = vm_allocpage(&pt_phys, VMP_PAGETABLE)))
434 return ENOMEM;
436 for(i = 0; i < I386_VM_PT_ENTRIES; i++)
437 pt->pt_pt[pde][i] = 0; /* Empty entry. */
439 /* Make page directory entry.
440 * The PDE is always 'present,' 'writable,' and 'user accessible,'
441 * relying on the PTE for protection.
443 pt->pt_dir[pde] = (pt_phys & I386_VM_ADDR_MASK) | flags
444 | I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE;
446 return OK;
449 /*===========================================================================*
450 * pt_ptalloc_in_range *
451 *===========================================================================*/
452 int pt_ptalloc_in_range(pt_t *pt, vir_bytes start, vir_bytes end,
453 u32_t flags, int verify)
455 /* Allocate all the page tables in the range specified. */
456 int pde, first_pde, last_pde;
458 first_pde = I386_VM_PDE(start);
459 last_pde = I386_VM_PDE(end-1);
460 assert(first_pde >= 0);
461 assert(last_pde < I386_VM_DIR_ENTRIES);
463 /* Scan all page-directory entries in the range. */
464 for(pde = first_pde; pde <= last_pde; pde++) {
465 assert(!(pt->pt_dir[pde] & I386_VM_BIGPAGE));
466 if(!(pt->pt_dir[pde] & I386_VM_PRESENT)) {
467 int r;
468 if(verify) {
469 printf("pt_ptalloc_in_range: no pde %d\n", pde);
470 return EFAULT;
472 assert(!pt->pt_dir[pde]);
473 if((r=pt_ptalloc(pt, pde, flags)) != OK) {
474 /* Couldn't do (complete) mapping.
475 * Don't bother freeing any previously
476 * allocated page tables, they're
477 * still writable, don't point to nonsense,
478 * and pt_ptalloc leaves the directory
479 * and other data in a consistent state.
481 printf("pt_ptalloc_in_range: pt_ptalloc failed\n");
482 return r;
485 assert(pt->pt_dir[pde]);
486 assert(pt->pt_dir[pde] & I386_VM_PRESENT);
489 return OK;
492 static char *ptestr(u32_t pte)
494 #define FLAG(constant, name) { \
495 if(pte & (constant)) { strcat(str, name); strcat(str, " "); } \
498 static char str[30];
499 if(!(pte & I386_VM_PRESENT)) {
500 return "not present";
502 str[0] = '\0';
503 FLAG(I386_VM_WRITE, "W");
504 FLAG(I386_VM_USER, "U");
505 FLAG(I386_VM_PWT, "PWT");
506 FLAG(I386_VM_PCD, "PCD");
507 FLAG(I386_VM_ACC, "ACC");
508 FLAG(I386_VM_DIRTY, "DIRTY");
509 FLAG(I386_VM_PS, "PS");
510 FLAG(I386_VM_GLOBAL, "G");
511 FLAG(I386_VM_PTAVAIL1, "AV1");
512 FLAG(I386_VM_PTAVAIL2, "AV2");
513 FLAG(I386_VM_PTAVAIL3, "AV3");
515 return str;
518 /*===========================================================================*
519 * pt_map_in_range *
520 *===========================================================================*/
521 int pt_map_in_range(struct vmproc *src_vmp, struct vmproc *dst_vmp,
522 vir_bytes start, vir_bytes end)
524 /* Transfer all the mappings from the pt of the source process to the pt of
525 * the destination process in the range specified.
527 int pde, pte;
528 vir_bytes viraddr;
529 pt_t *pt, *dst_pt;
531 pt = &src_vmp->vm_pt;
532 dst_pt = &dst_vmp->vm_pt;
534 end = end ? end : VM_DATATOP;
535 assert(start % I386_PAGE_SIZE == 0);
536 assert(end % I386_PAGE_SIZE == 0);
537 assert(start <= end);
538 assert(I386_VM_PDE(end) < I386_VM_DIR_ENTRIES);
540 #if LU_DEBUG
541 printf("VM: pt_map_in_range: src = %d, dst = %d\n",
542 src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
543 printf("VM: pt_map_in_range: transferring from 0x%08x (pde %d pte %d) to 0x%08x (pde %d pte %d)\n",
544 start, I386_VM_PDE(start), I386_VM_PTE(start),
545 end, I386_VM_PDE(end), I386_VM_PTE(end));
546 #endif
548 /* Scan all page-table entries in the range. */
549 for(viraddr = start; viraddr <= end; viraddr += I386_PAGE_SIZE) {
550 pde = I386_VM_PDE(viraddr);
551 if(!(pt->pt_dir[pde] & I386_VM_PRESENT)) {
552 if(viraddr == VM_DATATOP) break;
553 continue;
555 pte = I386_VM_PTE(viraddr);
556 if(!(pt->pt_pt[pde][pte] & I386_VM_PRESENT)) {
557 if(viraddr == VM_DATATOP) break;
558 continue;
561 /* Transfer the mapping. */
562 dst_pt->pt_pt[pde][pte] = pt->pt_pt[pde][pte];
564 if(viraddr == VM_DATATOP) break;
567 return OK;
570 /*===========================================================================*
571 * pt_ptmap *
572 *===========================================================================*/
573 int pt_ptmap(struct vmproc *src_vmp, struct vmproc *dst_vmp)
575 /* Transfer mappings to page dir and page tables from source process and
576 * destination process. Make sure all the mappings are above the stack, not
577 * to corrupt valid mappings in the data segment of the destination process.
579 int pde, r;
580 phys_bytes physaddr;
581 vir_bytes viraddr;
582 pt_t *pt;
584 pt = &src_vmp->vm_pt;
586 #if LU_DEBUG
587 printf("VM: pt_ptmap: src = %d, dst = %d\n",
588 src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
589 #endif
591 /* Transfer mapping to the page directory. */
592 viraddr = (vir_bytes) pt->pt_dir;
593 physaddr = pt->pt_dir_phys & I386_VM_ADDR_MASK;
594 if((r=pt_writemap(dst_vmp, &dst_vmp->vm_pt, viraddr, physaddr, I386_PAGE_SIZE,
595 I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE,
596 WMF_OVERWRITE)) != OK) {
597 return r;
599 #if LU_DEBUG
600 printf("VM: pt_ptmap: transferred mapping to page dir: 0x%08x (0x%08x)\n",
601 viraddr, physaddr);
602 #endif
604 /* Scan all non-reserved page-directory entries. */
605 for(pde=0; pde < I386_VM_DIR_ENTRIES; pde++) {
606 if(!(pt->pt_dir[pde] & I386_VM_PRESENT)) {
607 continue;
610 /* Transfer mapping to the page table. */
611 viraddr = (vir_bytes) pt->pt_pt[pde];
612 physaddr = pt->pt_dir[pde] & I386_VM_ADDR_MASK;
613 if((r=pt_writemap(dst_vmp, &dst_vmp->vm_pt, viraddr, physaddr, I386_PAGE_SIZE,
614 I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE,
615 WMF_OVERWRITE)) != OK) {
616 return r;
620 return OK;
623 void pt_clearmapcache(void)
625 /* Make sure kernel will invalidate tlb when using current
626 * pagetable (i.e. vm's) to make new mappings before new cr3
627 * is loaded.
629 if(sys_vmctl(SELF, VMCTL_CLEARMAPCACHE, 0) != OK)
630 panic("VMCTL_CLEARMAPCACHE failed");
633 /*===========================================================================*
634 * pt_writemap *
635 *===========================================================================*/
636 int pt_writemap(struct vmproc * vmp,
637 pt_t *pt,
638 vir_bytes v,
639 phys_bytes physaddr,
640 size_t bytes,
641 u32_t flags,
642 u32_t writemapflags)
644 /* Write mapping into page table. Allocate a new page table if necessary. */
645 /* Page directory and table entries for this virtual address. */
646 int p, pages;
647 int verify = 0;
648 int ret = OK;
650 #ifdef CONFIG_SMP
651 int vminhibit_clear = 0;
652 /* FIXME
653 * don't do it everytime, stop the process only on the first change and
654 * resume the execution on the last change. Do in a wrapper of this
655 * function
657 if (vmp && vmp->vm_endpoint != NONE && vmp->vm_endpoint != VM_PROC_NR &&
658 !(vmp->vm_flags & VMF_EXITING)) {
659 sys_vmctl(vmp->vm_endpoint, VMCTL_VMINHIBIT_SET, 0);
660 vminhibit_clear = 1;
662 #endif
664 if(writemapflags & WMF_VERIFY)
665 verify = 1;
667 assert(!(bytes % I386_PAGE_SIZE));
668 assert(!(flags & ~(PTF_ALLFLAGS)));
670 pages = bytes / I386_PAGE_SIZE;
672 /* MAP_NONE means to clear the mapping. It doesn't matter
673 * what's actually written into the PTE if I386_VM_PRESENT
674 * isn't on, so we can just write MAP_NONE into it.
676 assert(physaddr == MAP_NONE || (flags & I386_VM_PRESENT));
677 assert(physaddr != MAP_NONE || !flags);
679 /* First make sure all the necessary page tables are allocated,
680 * before we start writing in any of them, because it's a pain
681 * to undo our work properly.
683 ret = pt_ptalloc_in_range(pt, v, v + I386_PAGE_SIZE*pages, flags, verify);
684 if(ret != OK) {
685 printf("VM: writemap: pt_ptalloc_in_range failed\n");
686 goto resume_exit;
689 /* Now write in them. */
690 for(p = 0; p < pages; p++) {
691 u32_t entry;
692 int pde = I386_VM_PDE(v);
693 int pte = I386_VM_PTE(v);
695 if(!v) { printf("VM: warning: making zero page for %d\n",
696 vmp->vm_endpoint); }
698 assert(!(v % I386_PAGE_SIZE));
699 assert(pte >= 0 && pte < I386_VM_PT_ENTRIES);
700 assert(pde >= 0 && pde < I386_VM_DIR_ENTRIES);
702 /* Page table has to be there. */
703 assert(pt->pt_dir[pde] & I386_VM_PRESENT);
705 /* We do not expect it to be a bigpage. */
706 assert(!(pt->pt_dir[pde] & I386_VM_BIGPAGE));
708 /* Make sure page directory entry for this page table
709 * is marked present and page table entry is available.
711 assert(pt->pt_pt[pde]);
713 #if SANITYCHECKS
714 /* We don't expect to overwrite a page. */
715 if(!(writemapflags & (WMF_OVERWRITE|WMF_VERIFY)))
716 assert(!(pt->pt_pt[pde][pte] & I386_VM_PRESENT));
717 #endif
718 if(writemapflags & (WMF_WRITEFLAGSONLY|WMF_FREE)) {
719 physaddr = pt->pt_pt[pde][pte] & I386_VM_ADDR_MASK;
722 if(writemapflags & WMF_FREE) {
723 free_mem(ABS2CLICK(physaddr), 1);
726 /* Entry we will write. */
727 entry = (physaddr & I386_VM_ADDR_MASK) | flags;
729 if(verify) {
730 u32_t maskedentry;
731 maskedentry = pt->pt_pt[pde][pte];
732 maskedentry &= ~(I386_VM_ACC|I386_VM_DIRTY);
733 /* Verify pagetable entry. */
734 if(entry & I386_VM_WRITE) {
735 /* If we expect a writable page, allow a readonly page. */
736 maskedentry |= I386_VM_WRITE;
738 if(maskedentry != entry) {
739 printf("pt_writemap: mismatch: ");
740 if((entry & I386_VM_ADDR_MASK) !=
741 (maskedentry & I386_VM_ADDR_MASK)) {
742 printf("pt_writemap: physaddr mismatch (0x%lx, 0x%lx); ",
743 (long)entry, (long)maskedentry);
744 } else printf("phys ok; ");
745 printf(" flags: found %s; ",
746 ptestr(pt->pt_pt[pde][pte]));
747 printf(" masked %s; ",
748 ptestr(maskedentry));
749 printf(" expected %s\n", ptestr(entry));
750 ret = EFAULT;
751 goto resume_exit;
753 } else {
754 /* Write pagetable entry. */
755 pt->pt_pt[pde][pte] = entry;
758 physaddr += I386_PAGE_SIZE;
759 v += I386_PAGE_SIZE;
762 resume_exit:
764 #ifdef CONFIG_SMP
765 if (vminhibit_clear) {
766 assert(vmp && vmp->vm_endpoint != NONE && vmp->vm_endpoint != VM_PROC_NR &&
767 !(vmp->vm_flags & VMF_EXITING));
768 sys_vmctl(vmp->vm_endpoint, VMCTL_VMINHIBIT_CLEAR, 0);
770 #endif
772 return ret;
775 /*===========================================================================*
776 * pt_checkrange *
777 *===========================================================================*/
778 int pt_checkrange(pt_t *pt, vir_bytes v, size_t bytes,
779 int write)
781 int p, pages;
783 assert(!(bytes % I386_PAGE_SIZE));
785 pages = bytes / I386_PAGE_SIZE;
787 for(p = 0; p < pages; p++) {
788 int pde = I386_VM_PDE(v);
789 int pte = I386_VM_PTE(v);
791 assert(!(v % I386_PAGE_SIZE));
792 assert(pte >= 0 && pte < I386_VM_PT_ENTRIES);
793 assert(pde >= 0 && pde < I386_VM_DIR_ENTRIES);
795 /* Page table has to be there. */
796 if(!(pt->pt_dir[pde] & I386_VM_PRESENT))
797 return EFAULT;
799 /* Make sure page directory entry for this page table
800 * is marked present and page table entry is available.
802 assert((pt->pt_dir[pde] & I386_VM_PRESENT) && pt->pt_pt[pde]);
804 if(!(pt->pt_pt[pde][pte] & I386_VM_PRESENT)) {
805 return EFAULT;
808 if(write && !(pt->pt_pt[pde][pte] & I386_VM_WRITE)) {
809 return EFAULT;
812 v += I386_PAGE_SIZE;
815 return OK;
818 /*===========================================================================*
819 * pt_new *
820 *===========================================================================*/
821 int pt_new(pt_t *pt)
823 /* Allocate a pagetable root. On i386, allocate a page-aligned page directory
824 * and set them to 0 (indicating no page tables are allocated). Lookup
825 * its physical address as we'll need that in the future. Verify it's
826 * page-aligned.
828 int i;
830 /* Don't ever re-allocate/re-move a certain process slot's
831 * page directory once it's been created. This is a fraction
832 * faster, but also avoids having to invalidate the page
833 * mappings from in-kernel page tables pointing to
834 * the page directories (the page_directories data).
836 if(!pt->pt_dir &&
837 !(pt->pt_dir = vm_allocpage((phys_bytes *)&pt->pt_dir_phys, VMP_PAGEDIR))) {
838 return ENOMEM;
841 for(i = 0; i < I386_VM_DIR_ENTRIES; i++) {
842 pt->pt_dir[i] = 0; /* invalid entry (I386_VM_PRESENT bit = 0) */
843 pt->pt_pt[i] = NULL;
846 /* Where to start looking for free virtual address space? */
847 pt->pt_virtop = 0;
849 /* Map in kernel. */
850 if(pt_mapkernel(pt) != OK)
851 panic("pt_new: pt_mapkernel failed");
853 return OK;
856 static int freepde(void)
858 int p = kernel_boot_info.freepde_start++;
859 assert(kernel_boot_info.freepde_start < I386_VM_DIR_ENTRIES);
860 return p;
863 /*===========================================================================*
864 * pt_init *
865 *===========================================================================*/
866 void pt_init(void)
868 pt_t *newpt;
869 int s, r, p;
870 int global_bit_ok = 0;
871 vir_bytes sparepages_mem;
872 static u32_t currentpagedir[I386_VM_DIR_ENTRIES];
873 int m = kernel_boot_info.kern_mod;
874 u32_t mypdbr; /* Page Directory Base Register (cr3) value */
876 /* Find what the physical location of the kernel is. */
877 assert(m >= 0);
878 assert(m < kernel_boot_info.mods_with_kernel);
879 assert(kernel_boot_info.mods_with_kernel < MULTIBOOT_MAX_MODS);
880 kern_mb_mod = &kernel_boot_info.module_list[m];
881 kern_size = kern_mb_mod->mod_end - kern_mb_mod->mod_start;
882 assert(!(kern_mb_mod->mod_start % I386_BIG_PAGE_SIZE));
883 assert(!(kernel_boot_info.vir_kern_start % I386_BIG_PAGE_SIZE));
884 kern_start_pde = kernel_boot_info.vir_kern_start / I386_BIG_PAGE_SIZE;
886 /* Get ourselves spare pages. */
887 sparepages_mem = (vir_bytes) static_sparepages;
888 assert(!(sparepages_mem % I386_PAGE_SIZE));
890 /* Spare pages are used to allocate memory before VM has its own page
891 * table that things (i.e. arbitrary physical memory) can be mapped into.
892 * We get it by pre-allocating it in our bss (allocated and mapped in by
893 * the kernel) in static_sparepages. We also need the physical addresses
894 * though; we look them up now so they are ready for use.
897 missing_spares = 0;
898 assert(STATIC_SPAREPAGES < SPAREPAGES);
899 for(s = 0; s < SPAREPAGES; s++) {
900 vir_bytes v = (sparepages_mem + s*I386_PAGE_SIZE);;
901 phys_bytes ph;
902 if((r=sys_umap(SELF, VM_D, (vir_bytes) v,
903 I386_PAGE_SIZE*SPAREPAGES, &ph)) != OK)
904 panic("pt_init: sys_umap failed: %d", r);
905 if(s >= STATIC_SPAREPAGES) {
906 sparepages[s].page = NULL;
907 missing_spares++;
908 continue;
910 sparepages[s].page = (void *) v;
911 sparepages[s].phys = ph;
914 /* global bit and 4MB pages available? */
915 global_bit_ok = _cpufeature(_CPUF_I386_PGE);
916 bigpage_ok = _cpufeature(_CPUF_I386_PSE);
918 /* Set bit for PTE's and PDE's if available. */
919 if(global_bit_ok)
920 global_bit = I386_VM_GLOBAL;
922 /* Allocate us a page table in which to remember page directory
923 * pointers.
925 if(!(page_directories = vm_allocpage(&page_directories_phys,
926 VMP_PAGETABLE)))
927 panic("no virt addr for vm mappings");
929 memset(page_directories, 0, I386_PAGE_SIZE);
931 /* Now reserve another pde for kernel's own mappings. */
933 int kernmap_pde;
934 phys_bytes addr, len;
935 int flags, index = 0;
936 u32_t offset = 0;
938 kernmap_pde = freepde();
939 offset = kernmap_pde * I386_BIG_PAGE_SIZE;
941 while(sys_vmctl_get_mapping(index, &addr, &len,
942 &flags) == OK) {
943 vir_bytes vir;
944 if(index >= MAX_KERNMAPPINGS)
945 panic("VM: too many kernel mappings: %d", index);
946 kern_mappings[index].phys_addr = addr;
947 kern_mappings[index].len = len;
948 kern_mappings[index].flags = flags;
949 kern_mappings[index].vir_addr = offset;
950 kern_mappings[index].flags =
951 I386_VM_PRESENT;
952 if(flags & VMMF_UNCACHED)
953 kern_mappings[index].flags |= PTF_NOCACHE;
954 if(flags & VMMF_USER)
955 kern_mappings[index].flags |= I386_VM_USER;
956 if(flags & VMMF_WRITE)
957 kern_mappings[index].flags |= I386_VM_WRITE;
958 if(flags & VMMF_GLO)
959 kern_mappings[index].flags |= I386_VM_GLOBAL;
960 if(addr % I386_PAGE_SIZE)
961 panic("VM: addr unaligned: %d", addr);
962 if(len % I386_PAGE_SIZE)
963 panic("VM: len unaligned: %d", len);
964 vir = offset;
965 if(sys_vmctl_reply_mapping(index, vir) != OK)
966 panic("VM: reply failed");
967 offset += len;
968 index++;
969 kernmappings++;
973 /* Find a PDE below processes available for mapping in the
974 * page directories.
976 pagedir_pde = freepde();
977 pagedir_pde_val = (page_directories_phys & I386_VM_ADDR_MASK) |
978 I386_VM_PRESENT | I386_VM_WRITE;
980 /* Allright. Now. We have to make our own page directory and page tables,
981 * that the kernel has already set up, accessible to us. It's easier to
982 * understand if we just copy all the required pages (i.e. page directory
983 * and page tables), and set up the pointers as if VM had done it itself.
985 * This allocation will happen without using any page table, and just
986 * uses spare pages.
988 newpt = &vmprocess->vm_pt;
989 if(pt_new(newpt) != OK)
990 panic("vm pt_new failed");
992 /* Get our current pagedir so we can see it. */
993 if(sys_vmctl_get_pdbr(SELF, &mypdbr) != OK)
994 panic("VM: sys_vmctl_get_pdbr failed");
995 if(sys_vircopy(NONE, mypdbr, SELF,
996 (vir_bytes) currentpagedir, I386_PAGE_SIZE) != OK)
997 panic("VM: sys_vircopy failed");
999 /* We have mapped in kernel ourselves; now copy mappings for VM
1000 * that kernel made, including allocations for BSS. Skip identity
1001 * mapping bits; just map in VM.
1003 for(p = 0; p < I386_VM_DIR_ENTRIES; p++) {
1004 u32_t entry = currentpagedir[p];
1005 phys_bytes ptaddr_kern, ptaddr_us;
1007 /* BIGPAGEs are kernel mapping (do ourselves) or boot
1008 * identity mapping (don't want).
1010 if(!(entry & I386_VM_PRESENT)) continue;
1011 if((entry & I386_VM_BIGPAGE)) continue;
1013 if(pt_ptalloc(newpt, p, 0) != OK)
1014 panic("pt_ptalloc failed");
1015 assert(newpt->pt_dir[p] & I386_VM_PRESENT);
1017 ptaddr_kern = entry & I386_VM_ADDR_MASK;
1018 ptaddr_us = newpt->pt_dir[p] & I386_VM_ADDR_MASK;
1020 /* Copy kernel-initialized pagetable contents into our
1021 * normally accessible pagetable.
1023 if(sys_abscopy(ptaddr_kern, ptaddr_us, I386_PAGE_SIZE) != OK)
1024 panic("pt_init: abscopy failed");
1027 /* Inform kernel vm has a newly built page table. */
1028 assert(vmproc[VM_PROC_NR].vm_endpoint == VM_PROC_NR);
1029 pt_bind(newpt, &vmproc[VM_PROC_NR]);
1031 pt_init_done = 1;
1033 /* All OK. */
1034 return;
1037 /*===========================================================================*
1038 * pt_bind *
1039 *===========================================================================*/
1040 int pt_bind(pt_t *pt, struct vmproc *who)
1042 int slot;
1043 u32_t phys;
1044 void *pdes;
1046 /* Basic sanity checks. */
1047 assert(who);
1048 assert(who->vm_flags & VMF_INUSE);
1049 assert(pt);
1051 assert(pagedir_pde >= 0);
1053 slot = who->vm_slot;
1054 assert(slot >= 0);
1055 assert(slot < ELEMENTS(vmproc));
1056 assert(slot < I386_VM_PT_ENTRIES);
1058 phys = pt->pt_dir_phys & I386_VM_ADDR_MASK;
1059 assert(pt->pt_dir_phys == phys);
1061 /* Update "page directory pagetable." */
1062 page_directories[slot] = phys | I386_VM_PRESENT|I386_VM_WRITE;
1064 /* This is where the PDE's will be visible to the kernel
1065 * in its address space.
1067 pdes = (void *) (pagedir_pde*I386_BIG_PAGE_SIZE +
1068 slot * I386_PAGE_SIZE);
1070 #if 0
1071 printf("VM: slot %d endpoint %d has pde val 0x%lx at kernel address 0x%lx\n",
1072 slot, who->vm_endpoint, page_directories[slot], pdes);
1073 #endif
1074 /* Tell kernel about new page table root. */
1075 return sys_vmctl_set_addrspace(who->vm_endpoint, pt->pt_dir_phys, pdes);
1078 /*===========================================================================*
1079 * pt_free *
1080 *===========================================================================*/
1081 void pt_free(pt_t *pt)
1083 /* Free memory associated with this pagetable. */
1084 int i;
1086 for(i = 0; i < I386_VM_DIR_ENTRIES; i++)
1087 if(pt->pt_pt[i])
1088 vm_freepages((vir_bytes) pt->pt_pt[i], 1);
1090 return;
1093 /*===========================================================================*
1094 * pt_mapkernel *
1095 *===========================================================================*/
1096 int pt_mapkernel(pt_t *pt)
1098 int i;
1099 int kern_pde = kern_start_pde;
1100 phys_bytes addr, mapped = 0;
1102 /* Any i386 page table needs to map in the kernel address space. */
1103 assert(bigpage_ok);
1104 assert(pagedir_pde >= 0);
1105 assert(kern_pde >= 0);
1107 /* pt_init() has made sure this is ok. */
1108 addr = kern_mb_mod->mod_start;
1110 /* Actually mapping in kernel */
1111 while(mapped < kern_size) {
1112 pt->pt_dir[kern_pde] = addr | I386_VM_PRESENT |
1113 I386_VM_BIGPAGE | I386_VM_WRITE | global_bit;
1114 kern_pde++;
1115 mapped += I386_BIG_PAGE_SIZE;
1116 addr += I386_BIG_PAGE_SIZE;
1119 /* Kernel also wants to know about all page directories. */
1120 assert(pagedir_pde > kern_pde);
1121 pt->pt_dir[pagedir_pde] = pagedir_pde_val;
1123 /* Kernel also wants various mappings of its own. */
1124 for(i = 0; i < kernmappings; i++) {
1125 if(pt_writemap(NULL, pt,
1126 kern_mappings[i].vir_addr,
1127 kern_mappings[i].phys_addr,
1128 kern_mappings[i].len,
1129 kern_mappings[i].flags, 0) != OK) {
1130 panic("pt_mapkernel: pt_writemap failed");
1134 return OK;
1137 /*===========================================================================*
1138 * pt_cycle *
1139 *===========================================================================*/
1140 void pt_cycle(void)
1142 vm_checkspares();
1145 int get_vm_self_pages(void) { return vm_self_pages; }