kernel/vm: change pde table info from single buffer to explicit per-process.
[minix.git] / servers / vm / arch / i386 / pagetable.c
blob939322b41a9a62e451b02d46d896a09a919bd223
2 #define _SYSTEM 1
3 #define _POSIX_SOURCE 1
5 #include <minix/callnr.h>
6 #include <minix/com.h>
7 #include <minix/config.h>
8 #include <minix/const.h>
9 #include <minix/ds.h>
10 #include <minix/endpoint.h>
11 #include <minix/keymap.h>
12 #include <minix/minlib.h>
13 #include <minix/type.h>
14 #include <minix/ipc.h>
15 #include <minix/sysutil.h>
16 #include <minix/syslib.h>
17 #include <minix/safecopies.h>
18 #include <minix/cpufeature.h>
19 #include <minix/bitmap.h>
20 #include <minix/debug.h>
22 #include <errno.h>
23 #include <stdlib.h>
24 #include <assert.h>
25 #include <string.h>
26 #include <env.h>
27 #include <stdio.h>
28 #include <fcntl.h>
29 #include <stdlib.h>
31 #include "proto.h"
32 #include "glo.h"
33 #include "util.h"
34 #include "vm.h"
35 #include "sanitycheck.h"
37 #include "memory.h"
39 /* PDE used to map in kernel, kernel physical address. */
40 PRIVATE int id_map_high_pde = -1, pagedir_pde = -1;
41 PRIVATE u32_t global_bit = 0, pagedir_pde_val;
43 PRIVATE int proc_pde = 0;
45 /* 4MB page size available in hardware? */
46 PRIVATE int bigpage_ok = 0;
48 /* Our process table entry. */
49 struct vmproc *vmprocess = &vmproc[VM_PROC_NR];
51 /* Spare memory, ready to go after initialization, to avoid a
52 * circular dependency on allocating memory and writing it into VM's
53 * page table.
55 #define SPAREPAGES 25
56 int missing_spares = SPAREPAGES;
57 PRIVATE struct {
58 void *page;
59 u32_t phys;
60 } sparepages[SPAREPAGES];
62 #define MAX_KERNMAPPINGS 10
63 PRIVATE struct {
64 phys_bytes phys_addr; /* Physical addr. */
65 phys_bytes len; /* Length in bytes. */
66 vir_bytes lin_addr; /* Offset in page table. */
67 int flags;
68 } kern_mappings[MAX_KERNMAPPINGS];
69 int kernmappings = 0;
71 /* Clicks must be pages, as
72 * - they must be page aligned to map them
73 * - they must be a multiple of the page size
74 * - it's inconvenient to have them bigger than pages, because we often want
75 * just one page
76 * May as well require them to be equal then.
78 #if CLICK_SIZE != I386_PAGE_SIZE
79 #error CLICK_SIZE must be page size.
80 #endif
82 /* Bytes of virtual address space one pde controls. */
83 #define BYTESPERPDE (I386_VM_PT_ENTRIES * I386_PAGE_SIZE)
85 /* Nevertheless, introduce these macros to make the code readable. */
86 #define CLICK2PAGE(c) ((c) / CLICKSPERPAGE)
88 /* Page table that contains pointers to all page directories. */
89 u32_t page_directories_phys, *page_directories = NULL;
91 #if SANITYCHECKS
92 /*===========================================================================*
93 * pt_sanitycheck *
94 *===========================================================================*/
95 PUBLIC void pt_sanitycheck(pt_t *pt, char *file, int line)
97 /* Basic pt sanity check. */
98 int i;
99 int slot;
101 MYASSERT(pt);
102 MYASSERT(pt->pt_dir);
103 MYASSERT(pt->pt_dir_phys);
105 for(slot = 0; slot < ELEMENTS(vmproc); slot++) {
106 if(pt == &vmproc[slot].vm_pt)
107 break;
110 if(slot >= ELEMENTS(vmproc)) {
111 panic("pt_sanitycheck: passed pt not in any proc");
114 MYASSERT(usedpages_add(pt->pt_dir_phys, I386_PAGE_SIZE) == OK);
116 for(i = proc_pde; i < I386_VM_DIR_ENTRIES; i++) {
117 if(pt->pt_pt[i]) {
118 int pte;
119 MYASSERT(vm_addrok(pt->pt_pt[i], 1));
120 if(!(pt->pt_dir[i] & I386_VM_PRESENT)) {
121 printf("slot %d: pt->pt_pt[%d] = 0x%lx, but pt_dir entry 0x%lx\n",
122 slot, i, pt->pt_pt[i], pt->pt_dir[i]);
124 MYASSERT(pt->pt_dir[i] & I386_VM_PRESENT);
125 MYASSERT(usedpages_add(I386_VM_PFA(pt->pt_dir[i]),
126 I386_PAGE_SIZE) == OK);
127 } else {
128 MYASSERT(!(pt->pt_dir[i] & I386_VM_PRESENT));
132 #endif
134 /*===========================================================================*
135 * aalloc *
136 *===========================================================================*/
137 PRIVATE void *aalloc(size_t bytes)
139 /* Page-aligned malloc(). only used if vm_allocpage can't be used. */
140 u32_t b;
142 b = (u32_t) malloc(I386_PAGE_SIZE + bytes);
143 if(!b) panic("aalloc: out of memory: %d", bytes);
144 b += I386_PAGE_SIZE - (b % I386_PAGE_SIZE);
146 return (void *) b;
149 /*===========================================================================*
150 * findhole *
151 *===========================================================================*/
152 PRIVATE u32_t findhole(pt_t *pt, u32_t vmin, u32_t vmax)
154 /* Find a space in the virtual address space of pageteble 'pt',
155 * between page-aligned BYTE offsets vmin and vmax, to fit
156 * a page in. Return byte offset.
158 u32_t freefound = 0, curv;
159 int pde = 0, try_restart;
160 static u32_t lastv = 0;
162 /* Input sanity check. */
163 assert(vmin + I386_PAGE_SIZE >= vmin);
164 assert(vmax >= vmin + I386_PAGE_SIZE);
165 assert((vmin % I386_PAGE_SIZE) == 0);
166 assert((vmax % I386_PAGE_SIZE) == 0);
168 #if SANITYCHECKS
169 curv = ((u32_t) random()) % ((vmax - vmin)/I386_PAGE_SIZE);
170 curv *= I386_PAGE_SIZE;
171 curv += vmin;
172 #else
173 curv = lastv;
174 if(curv < vmin || curv >= vmax)
175 curv = vmin;
176 #endif
177 try_restart = 1;
179 /* Start looking for a free page starting at vmin. */
180 while(curv < vmax) {
181 int pte;
183 assert(curv >= vmin);
184 assert(curv < vmax);
186 pde = I386_VM_PDE(curv);
187 pte = I386_VM_PTE(curv);
189 if(!(pt->pt_dir[pde] & I386_VM_PRESENT) ||
190 !(pt->pt_pt[pde][pte] & I386_VM_PRESENT)) {
191 lastv = curv;
192 return curv;
195 curv+=I386_PAGE_SIZE;
197 if(curv >= vmax && try_restart) {
198 curv = vmin;
199 try_restart = 0;
203 printf("VM: out of virtual address space in vm\n");
205 return NO_MEM;
208 /*===========================================================================*
209 * vm_freepages *
210 *===========================================================================*/
211 PRIVATE void vm_freepages(vir_bytes vir, vir_bytes phys, int pages, int reason)
213 assert(reason >= 0 && reason < VMP_CATEGORIES);
214 if(vir >= vmprocess->vm_stacktop) {
215 assert(!(vir % I386_PAGE_SIZE));
216 assert(!(phys % I386_PAGE_SIZE));
217 free_mem(ABS2CLICK(phys), pages);
218 if(pt_writemap(&vmprocess->vm_pt, arch_vir2map(vmprocess, vir),
219 MAP_NONE, pages*I386_PAGE_SIZE, 0, WMF_OVERWRITE) != OK)
220 panic("vm_freepages: pt_writemap failed");
221 } else {
222 printf("VM: vm_freepages not freeing VM heap pages (%d)\n",
223 pages);
226 #if SANITYCHECKS
227 /* If SANITYCHECKS are on, flush tlb so accessing freed pages is
228 * always trapped, also if not in tlb.
230 if((sys_vmctl(SELF, VMCTL_FLUSHTLB, 0)) != OK) {
231 panic("VMCTL_FLUSHTLB failed");
233 #endif
236 /*===========================================================================*
237 * vm_getsparepage *
238 *===========================================================================*/
239 PRIVATE void *vm_getsparepage(u32_t *phys)
241 int s;
242 assert(missing_spares >= 0 && missing_spares <= SPAREPAGES);
243 for(s = 0; s < SPAREPAGES; s++) {
244 if(sparepages[s].page) {
245 void *sp;
246 sp = sparepages[s].page;
247 *phys = sparepages[s].phys;
248 sparepages[s].page = NULL;
249 missing_spares++;
250 assert(missing_spares >= 0 && missing_spares <= SPAREPAGES);
251 return sp;
254 return NULL;
257 /*===========================================================================*
258 * vm_checkspares *
259 *===========================================================================*/
260 PRIVATE void *vm_checkspares(void)
262 int s, n = 0;
263 static int total = 0, worst = 0;
264 assert(missing_spares >= 0 && missing_spares <= SPAREPAGES);
265 for(s = 0; s < SPAREPAGES && missing_spares > 0; s++)
266 if(!sparepages[s].page) {
267 n++;
268 if((sparepages[s].page = vm_allocpage(&sparepages[s].phys,
269 VMP_SPARE))) {
270 missing_spares--;
271 assert(missing_spares >= 0);
272 assert(missing_spares <= SPAREPAGES);
273 } else {
274 printf("VM: warning: couldn't get new spare page\n");
277 if(worst < n) worst = n;
278 total += n;
280 return NULL;
283 /*===========================================================================*
284 * vm_allocpage *
285 *===========================================================================*/
286 PUBLIC void *vm_allocpage(phys_bytes *phys, int reason)
288 /* Allocate a page for use by VM itself. */
289 phys_bytes newpage;
290 vir_bytes loc;
291 pt_t *pt;
292 int r;
293 static int level = 0;
294 void *ret;
296 pt = &vmprocess->vm_pt;
297 assert(reason >= 0 && reason < VMP_CATEGORIES);
299 level++;
301 assert(level >= 1);
302 assert(level <= 2);
304 if(level > 1 || !(vmprocess->vm_flags & VMF_HASPT) || !meminit_done) {
305 int r;
306 void *s;
307 s=vm_getsparepage(phys);
308 level--;
309 if(!s) {
310 util_stacktrace();
311 printf("VM: warning: out of spare pages\n");
313 return s;
316 /* VM does have a pagetable, so get a page and map it in there.
317 * Where in our virtual address space can we put it?
319 loc = findhole(pt, arch_vir2map(vmprocess, vmprocess->vm_stacktop),
320 vmprocess->vm_arch.vm_data_top);
321 if(loc == NO_MEM) {
322 level--;
323 printf("VM: vm_allocpage: findhole failed\n");
324 return NULL;
327 /* Allocate page of memory for use by VM. As VM
328 * is trusted, we don't have to pre-clear it.
330 if((newpage = alloc_mem(CLICKSPERPAGE, 0)) == NO_MEM) {
331 level--;
332 printf("VM: vm_allocpage: alloc_mem failed\n");
333 return NULL;
336 *phys = CLICK2ABS(newpage);
338 /* Map this page into our address space. */
339 if((r=pt_writemap(pt, loc, *phys, I386_PAGE_SIZE,
340 I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE, 0)) != OK) {
341 free_mem(newpage, CLICKSPERPAGE);
342 printf("vm_allocpage writemap failed\n");
343 level--;
344 return NULL;
347 if((r=sys_vmctl(SELF, VMCTL_FLUSHTLB, 0)) != OK) {
348 panic("VMCTL_FLUSHTLB failed: %d", r);
351 level--;
353 /* Return user-space-ready pointer to it. */
354 ret = (void *) arch_map2vir(vmprocess, loc);
356 return ret;
359 /*===========================================================================*
360 * vm_pagelock *
361 *===========================================================================*/
362 PUBLIC void vm_pagelock(void *vir, int lockflag)
364 /* Mark a page allocated by vm_allocpage() unwritable, i.e. only for VM. */
365 vir_bytes m;
366 int r;
367 u32_t flags = I386_VM_PRESENT | I386_VM_USER;
368 pt_t *pt;
370 pt = &vmprocess->vm_pt;
371 m = arch_vir2map(vmprocess, (vir_bytes) vir);
373 assert(!(m % I386_PAGE_SIZE));
375 if(!lockflag)
376 flags |= I386_VM_WRITE;
378 /* Update flags. */
379 if((r=pt_writemap(pt, m, 0, I386_PAGE_SIZE,
380 flags, WMF_OVERWRITE | WMF_WRITEFLAGSONLY)) != OK) {
381 panic("vm_lockpage: pt_writemap failed");
384 if((r=sys_vmctl(SELF, VMCTL_FLUSHTLB, 0)) != OK) {
385 panic("VMCTL_FLUSHTLB failed: %d", r);
388 return;
391 /*===========================================================================*
392 * vm_addrok *
393 *===========================================================================*/
394 PUBLIC int vm_addrok(void *vir, int writeflag)
396 /* Mark a page allocated by vm_allocpage() unwritable, i.e. only for VM. */
397 pt_t *pt = &vmprocess->vm_pt;
398 int pde, pte;
399 vir_bytes v = arch_vir2map(vmprocess, (vir_bytes) vir);
401 /* No PT yet? Don't bother looking. */
402 if(!(vmprocess->vm_flags & VMF_HASPT)) {
403 return 1;
406 pde = I386_VM_PDE(v);
407 pte = I386_VM_PTE(v);
409 if(!(pt->pt_dir[pde] & I386_VM_PRESENT)) {
410 printf("addr not ok: missing pde %d\n", pde);
411 return 0;
414 if(writeflag &&
415 !(pt->pt_dir[pde] & I386_VM_WRITE)) {
416 printf("addr not ok: pde %d present but pde unwritable\n", pde);
417 return 0;
420 if(!(pt->pt_pt[pde][pte] & I386_VM_PRESENT)) {
421 printf("addr not ok: missing pde %d / pte %d\n",
422 pde, pte);
423 return 0;
426 if(writeflag &&
427 !(pt->pt_pt[pde][pte] & I386_VM_WRITE)) {
428 printf("addr not ok: pde %d / pte %d present but unwritable\n",
429 pde, pte);
430 return 0;
433 return 1;
436 /*===========================================================================*
437 * pt_ptalloc *
438 *===========================================================================*/
439 PRIVATE int pt_ptalloc(pt_t *pt, int pde, u32_t flags)
441 /* Allocate a page table and write its address into the page directory. */
442 int i;
443 u32_t pt_phys;
445 /* Argument must make sense. */
446 assert(pde >= 0 && pde < I386_VM_DIR_ENTRIES);
447 assert(!(flags & ~(PTF_ALLFLAGS)));
449 /* We don't expect to overwrite page directory entry, nor
450 * storage for the page table.
452 assert(!(pt->pt_dir[pde] & I386_VM_PRESENT));
453 assert(!pt->pt_pt[pde]);
455 /* Get storage for the page table. */
456 if(!(pt->pt_pt[pde] = vm_allocpage(&pt_phys, VMP_PAGETABLE)))
457 return ENOMEM;
459 for(i = 0; i < I386_VM_PT_ENTRIES; i++)
460 pt->pt_pt[pde][i] = 0; /* Empty entry. */
462 /* Make page directory entry.
463 * The PDE is always 'present,' 'writable,' and 'user accessible,'
464 * relying on the PTE for protection.
466 pt->pt_dir[pde] = (pt_phys & I386_VM_ADDR_MASK) | flags
467 | I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE;
469 return OK;
472 PRIVATE char *ptestr(u32_t pte)
474 #define FLAG(constant, name) { \
475 if(pte & (constant)) { strcat(str, name); strcat(str, " "); } \
478 static char str[30];
479 if(!(pte & I386_VM_PRESENT)) {
480 return "not present";
482 str[0] = '\0';
483 FLAG(I386_VM_WRITE, "W");
484 FLAG(I386_VM_USER, "U");
485 FLAG(I386_VM_PWT, "PWT");
486 FLAG(I386_VM_PCD, "PCD");
487 FLAG(I386_VM_ACC, "ACC");
488 FLAG(I386_VM_DIRTY, "DIRTY");
489 FLAG(I386_VM_PS, "PS");
490 FLAG(I386_VM_GLOBAL, "G");
491 FLAG(I386_VM_PTAVAIL1, "AV1");
492 FLAG(I386_VM_PTAVAIL2, "AV2");
493 FLAG(I386_VM_PTAVAIL3, "AV3");
495 return str;
498 /*===========================================================================*
499 * pt_writemap *
500 *===========================================================================*/
501 PUBLIC int pt_writemap(pt_t *pt, vir_bytes v, phys_bytes physaddr,
502 size_t bytes, u32_t flags, u32_t writemapflags)
504 /* Write mapping into page table. Allocate a new page table if necessary. */
505 /* Page directory and table entries for this virtual address. */
506 int p, pages, pdecheck;
507 int finalpde;
508 int verify = 0;
510 if(writemapflags & WMF_VERIFY)
511 verify = 1;
513 assert(!(bytes % I386_PAGE_SIZE));
514 assert(!(flags & ~(PTF_ALLFLAGS)));
516 pages = bytes / I386_PAGE_SIZE;
518 /* MAP_NONE means to clear the mapping. It doesn't matter
519 * what's actually written into the PTE if I386_VM_PRESENT
520 * isn't on, so we can just write MAP_NONE into it.
522 assert(physaddr == MAP_NONE || (flags & I386_VM_PRESENT));
523 assert(physaddr != MAP_NONE || !flags);
525 finalpde = I386_VM_PDE(v + I386_PAGE_SIZE * pages);
527 /* First make sure all the necessary page tables are allocated,
528 * before we start writing in any of them, because it's a pain
529 * to undo our work properly. Walk the range in page-directory-entry
530 * sized leaps.
532 for(pdecheck = I386_VM_PDE(v); pdecheck <= finalpde; pdecheck++) {
533 assert(pdecheck >= 0 && pdecheck < I386_VM_DIR_ENTRIES);
534 assert(!(pt->pt_dir[pdecheck] & I386_VM_BIGPAGE));
535 if(!(pt->pt_dir[pdecheck] & I386_VM_PRESENT)) {
536 int r;
537 if(verify) {
538 printf("pt_writemap verify: no pde %d\n", pdecheck);
539 return EFAULT;
541 assert(!pt->pt_dir[pdecheck]);
542 if((r=pt_ptalloc(pt, pdecheck, flags)) != OK) {
543 /* Couldn't do (complete) mapping.
544 * Don't bother freeing any previously
545 * allocated page tables, they're
546 * still writable, don't point to nonsense,
547 * and pt_ptalloc leaves the directory
548 * and other data in a consistent state.
550 printf("pt_writemap: pt_ptalloc failed\n", pdecheck);
551 return r;
554 assert(pt->pt_dir[pdecheck] & I386_VM_PRESENT);
557 /* Now write in them. */
558 for(p = 0; p < pages; p++) {
559 u32_t entry;
560 int pde = I386_VM_PDE(v);
561 int pte = I386_VM_PTE(v);
563 assert(!(v % I386_PAGE_SIZE));
564 assert(pte >= 0 && pte < I386_VM_PT_ENTRIES);
565 assert(pde >= 0 && pde < I386_VM_DIR_ENTRIES);
567 /* Page table has to be there. */
568 assert(pt->pt_dir[pde] & I386_VM_PRESENT);
570 /* Make sure page directory entry for this page table
571 * is marked present and page table entry is available.
573 assert((pt->pt_dir[pde] & I386_VM_PRESENT));
574 assert(pt->pt_pt[pde]);
576 #if SANITYCHECKS
577 /* We don't expect to overwrite a page. */
578 if(!(writemapflags & (WMF_OVERWRITE|WMF_VERIFY)))
579 assert(!(pt->pt_pt[pde][pte] & I386_VM_PRESENT));
580 #endif
581 if(writemapflags & (WMF_WRITEFLAGSONLY|WMF_FREE)) {
582 physaddr = pt->pt_pt[pde][pte] & I386_VM_ADDR_MASK;
585 if(writemapflags & WMF_FREE) {
586 free_mem(ABS2CLICK(physaddr), 1);
589 /* Entry we will write. */
590 entry = (physaddr & I386_VM_ADDR_MASK) | flags;
592 if(verify) {
593 u32_t maskedentry;
594 maskedentry = pt->pt_pt[pde][pte];
595 maskedentry &= ~(I386_VM_ACC|I386_VM_DIRTY);
596 /* Verify pagetable entry. */
597 if(maskedentry != entry) {
598 printf("pt_writemap: mismatch: ");
599 if((entry & I386_VM_ADDR_MASK) !=
600 (maskedentry & I386_VM_ADDR_MASK)) {
601 printf("pt_writemap: physaddr mismatch (0x%lx, 0x%lx); ", entry, maskedentry);
602 } else printf("phys ok; ");
603 printf(" flags: found %s; ",
604 ptestr(pt->pt_pt[pde][pte]));
605 printf(" masked %s; ",
606 ptestr(maskedentry));
607 printf(" expected %s\n", ptestr(entry));
608 return EFAULT;
610 } else {
611 /* Write pagetable entry. */
612 #if SANITYCHECKS
613 assert(vm_addrok(pt->pt_pt[pde], 1));
614 #endif
615 pt->pt_pt[pde][pte] = entry;
618 physaddr += I386_PAGE_SIZE;
619 v += I386_PAGE_SIZE;
622 return OK;
625 /*===========================================================================*
626 * pt_checkrange *
627 *===========================================================================*/
628 PUBLIC int pt_checkrange(pt_t *pt, vir_bytes v, size_t bytes,
629 int write)
631 int p, pages, pde;
633 assert(!(bytes % I386_PAGE_SIZE));
635 pages = bytes / I386_PAGE_SIZE;
637 for(p = 0; p < pages; p++) {
638 u32_t entry;
639 int pde = I386_VM_PDE(v);
640 int pte = I386_VM_PTE(v);
642 assert(!(v % I386_PAGE_SIZE));
643 assert(pte >= 0 && pte < I386_VM_PT_ENTRIES);
644 assert(pde >= 0 && pde < I386_VM_DIR_ENTRIES);
646 /* Page table has to be there. */
647 if(!(pt->pt_dir[pde] & I386_VM_PRESENT))
648 return EFAULT;
650 /* Make sure page directory entry for this page table
651 * is marked present and page table entry is available.
653 assert((pt->pt_dir[pde] & I386_VM_PRESENT) && pt->pt_pt[pde]);
655 if(!(pt->pt_pt[pde][pte] & I386_VM_PRESENT)) {
656 return EFAULT;
659 if(write && !(pt->pt_pt[pde][pte] & I386_VM_WRITE)) {
660 return EFAULT;
663 v += I386_PAGE_SIZE;
666 return OK;
669 /*===========================================================================*
670 * pt_new *
671 *===========================================================================*/
672 PUBLIC int pt_new(pt_t *pt)
674 /* Allocate a pagetable root. On i386, allocate a page-aligned page directory
675 * and set them to 0 (indicating no page tables are allocated). Lookup
676 * its physical address as we'll need that in the future. Verify it's
677 * page-aligned.
679 int i;
681 /* Don't ever re-allocate/re-move a certain process slot's
682 * page directory once it's been created. This is a fraction
683 * faster, but also avoids having to invalidate the page
684 * mappings from in-kernel page tables pointing to
685 * the page directories (the page_directories data).
687 if(!pt->pt_dir &&
688 !(pt->pt_dir = vm_allocpage(&pt->pt_dir_phys, VMP_PAGEDIR))) {
689 return ENOMEM;
692 for(i = 0; i < I386_VM_DIR_ENTRIES; i++) {
693 pt->pt_dir[i] = 0; /* invalid entry (I386_VM_PRESENT bit = 0) */
694 pt->pt_pt[i] = NULL;
697 /* Where to start looking for free virtual address space? */
698 pt->pt_virtop = 0;
700 /* Map in kernel. */
701 if(pt_mapkernel(pt) != OK)
702 panic("pt_new: pt_mapkernel failed");
704 return OK;
707 /*===========================================================================*
708 * pt_init *
709 *===========================================================================*/
710 PUBLIC void pt_init(phys_bytes usedlimit)
712 /* By default, the kernel gives us a data segment with pre-allocated
713 * memory that then can't grow. We want to be able to allocate memory
714 * dynamically, however. So here we copy the part of the page table
715 * that's ours, so we get a private page table. Then we increase the
716 * hardware segment size so we can allocate memory above our stack.
718 pt_t *newpt;
719 int s, r;
720 vir_bytes v;
721 phys_bytes lo, hi;
722 vir_bytes extra_clicks;
723 u32_t moveup = 0;
724 int global_bit_ok = 0;
725 int free_pde;
726 int p;
727 struct vm_ep_data ep_data;
728 vir_bytes sparepages_mem;
729 phys_bytes sparepages_ph;
731 /* Shorthand. */
732 newpt = &vmprocess->vm_pt;
734 /* Get ourselves spare pages. */
735 if(!(sparepages_mem = (vir_bytes) aalloc(I386_PAGE_SIZE*SPAREPAGES)))
736 panic("pt_init: aalloc for spare failed");
737 if((r=sys_umap(SELF, VM_D, (vir_bytes) sparepages_mem,
738 I386_PAGE_SIZE*SPAREPAGES, &sparepages_ph)) != OK)
739 panic("pt_init: sys_umap failed: %d", r);
741 for(s = 0; s < SPAREPAGES; s++) {
742 sparepages[s].page = (void *) (sparepages_mem + s*I386_PAGE_SIZE);
743 sparepages[s].phys = sparepages_ph + s*I386_PAGE_SIZE;
746 missing_spares = 0;
748 /* global bit and 4MB pages available? */
749 global_bit_ok = _cpufeature(_CPUF_I386_PGE);
750 bigpage_ok = _cpufeature(_CPUF_I386_PSE);
752 /* Set bit for PTE's and PDE's if available. */
753 if(global_bit_ok)
754 global_bit = I386_VM_GLOBAL;
756 /* The kernel and boot time processes need an identity mapping.
757 * We use full PDE's for this without separate page tables.
758 * Figure out which pde we can start using for other purposes.
760 id_map_high_pde = usedlimit / I386_BIG_PAGE_SIZE;
762 /* We have to make mappings up till here. */
763 free_pde = id_map_high_pde+1;
765 /* Initial (current) range of our virtual address space. */
766 lo = CLICK2ABS(vmprocess->vm_arch.vm_seg[T].mem_phys);
767 hi = CLICK2ABS(vmprocess->vm_arch.vm_seg[S].mem_phys +
768 vmprocess->vm_arch.vm_seg[S].mem_len);
770 assert(!(lo % I386_PAGE_SIZE));
771 assert(!(hi % I386_PAGE_SIZE));
773 if(lo < VM_PROCSTART) {
774 moveup = VM_PROCSTART - lo;
775 assert(!(VM_PROCSTART % I386_PAGE_SIZE));
776 assert(!(lo % I386_PAGE_SIZE));
777 assert(!(moveup % I386_PAGE_SIZE));
780 /* Make new page table for ourselves, partly copied
781 * from the current one.
783 if(pt_new(newpt) != OK)
784 panic("pt_init: pt_new failed");
786 /* Set up mappings for VM process. */
787 for(v = lo; v < hi; v += I386_PAGE_SIZE) {
788 phys_bytes addr;
789 u32_t flags;
791 /* We have to write the new position in the PT,
792 * so we can move our segments.
794 if(pt_writemap(newpt, v+moveup, v, I386_PAGE_SIZE,
795 I386_VM_PRESENT|I386_VM_WRITE|I386_VM_USER, 0) != OK)
796 panic("pt_init: pt_writemap failed");
799 /* Move segments up too. */
800 vmprocess->vm_arch.vm_seg[T].mem_phys += ABS2CLICK(moveup);
801 vmprocess->vm_arch.vm_seg[D].mem_phys += ABS2CLICK(moveup);
802 vmprocess->vm_arch.vm_seg[S].mem_phys += ABS2CLICK(moveup);
804 /* Allocate us a page table in which to remember page directory
805 * pointers.
807 if(!(page_directories = vm_allocpage(&page_directories_phys,
808 VMP_PAGETABLE)))
809 panic("no virt addr for vm mappings");
811 memset(page_directories, 0, I386_PAGE_SIZE);
813 /* Increase our hardware data segment to create virtual address
814 * space above our stack. We want to increase it to VM_DATATOP,
815 * like regular processes have.
817 extra_clicks = ABS2CLICK(VM_DATATOP - hi);
818 vmprocess->vm_arch.vm_seg[S].mem_len += extra_clicks;
820 /* We pretend to the kernel we have a huge stack segment to
821 * increase our data segment.
823 vmprocess->vm_arch.vm_data_top =
824 (vmprocess->vm_arch.vm_seg[S].mem_vir +
825 vmprocess->vm_arch.vm_seg[S].mem_len) << CLICK_SHIFT;
827 /* Where our free virtual address space starts.
828 * This is only a hint to the VM system.
830 newpt->pt_virtop = 0;
832 /* Let other functions know VM now has a private page table. */
833 vmprocess->vm_flags |= VMF_HASPT;
835 /* Now reserve another pde for kernel's own mappings. */
837 int kernmap_pde;
838 phys_bytes addr, len;
839 int flags, index = 0;
840 u32_t offset = 0;
842 kernmap_pde = free_pde++;
843 offset = kernmap_pde * I386_BIG_PAGE_SIZE;
845 while(sys_vmctl_get_mapping(index, &addr, &len,
846 &flags) == OK) {
847 vir_bytes vir;
848 if(index >= MAX_KERNMAPPINGS)
849 panic("VM: too many kernel mappings: %d", index);
850 kern_mappings[index].phys_addr = addr;
851 kern_mappings[index].len = len;
852 kern_mappings[index].flags = flags;
853 kern_mappings[index].lin_addr = offset;
854 kern_mappings[index].flags =
855 I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE |
856 global_bit;
857 if(flags & VMMF_UNCACHED)
858 kern_mappings[index].flags |= PTF_NOCACHE;
859 if(addr % I386_PAGE_SIZE)
860 panic("VM: addr unaligned: %d", addr);
861 if(len % I386_PAGE_SIZE)
862 panic("VM: len unaligned: %d", len);
863 vir = arch_map2vir(&vmproc[VMP_SYSTEM], offset);
864 if(sys_vmctl_reply_mapping(index, vir) != OK)
865 panic("VM: reply failed");
866 offset += len;
867 index++;
868 kernmappings++;
872 /* Find a PDE below processes available for mapping in the
873 * page directories (readonly).
875 pagedir_pde = free_pde++;
876 pagedir_pde_val = (page_directories_phys & I386_VM_ADDR_MASK) |
877 I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE;
879 /* Tell kernel about free pde's. */
880 while(free_pde*I386_BIG_PAGE_SIZE < VM_PROCSTART) {
881 if((r=sys_vmctl(SELF, VMCTL_I386_FREEPDE, free_pde++)) != OK) {
882 panic("VMCTL_I386_FREEPDE failed: %d", r);
886 /* first pde in use by process. */
887 proc_pde = free_pde;
889 /* Give our process the new, copied, private page table. */
890 pt_mapkernel(newpt); /* didn't know about vm_dir pages earlier */
891 pt_bind(newpt, vmprocess);
893 /* new segment limit for the kernel after paging is enabled */
894 ep_data.data_seg_limit = free_pde*I386_BIG_PAGE_SIZE;
895 /* the memory map which must be installed after paging is enabled */
896 ep_data.mem_map = vmprocess->vm_arch.vm_seg;
898 /* Now actually enable paging. */
899 if(sys_vmctl_enable_paging(&ep_data) != OK)
900 panic("pt_init: enable paging failed");
902 /* Back to reality - this is where the stack actually is. */
903 vmprocess->vm_arch.vm_seg[S].mem_len -= extra_clicks;
905 /* All OK. */
906 return;
910 /*===========================================================================*
911 * pt_bind *
912 *===========================================================================*/
913 PUBLIC int pt_bind(pt_t *pt, struct vmproc *who)
915 int slot, ispt;
916 u32_t phys;
917 void *pdes;
919 /* Basic sanity checks. */
920 assert(who);
921 assert(who->vm_flags & VMF_INUSE);
922 assert(pt);
924 assert(pagedir_pde >= 0);
926 slot = who->vm_slot;
927 assert(slot >= 0);
928 assert(slot < ELEMENTS(vmproc));
929 assert(slot < I386_VM_PT_ENTRIES);
931 phys = pt->pt_dir_phys & I386_VM_ADDR_MASK;
932 assert(pt->pt_dir_phys == phys);
934 /* Update "page directory pagetable." */
935 page_directories[slot] = phys | I386_VM_PRESENT|I386_VM_WRITE;
937 /* This is where the PDE's will be visible to the kernel
938 * in its address space.
940 pdes = (void *) arch_map2vir(&vmproc[VMP_SYSTEM],
941 pagedir_pde*I386_BIG_PAGE_SIZE +
942 slot * I386_PAGE_SIZE);
944 #if 0
945 printf("VM: slot %d endpoint %d has pde val 0x%lx at kernel address 0x%lx\n",
946 slot, who->vm_endpoint, page_directories[slot], pdes);
947 #endif
948 /* Tell kernel about new page table root. */
949 return sys_vmctl_set_addrspace(who->vm_endpoint,
950 pt ? pt->pt_dir_phys : 0,
951 pt ? pdes : 0);
954 /*===========================================================================*
955 * pt_free *
956 *===========================================================================*/
957 PUBLIC void pt_free(pt_t *pt)
959 /* Free memory associated with this pagetable. */
960 int i;
962 for(i = 0; i < I386_VM_DIR_ENTRIES; i++)
963 if(pt->pt_pt[i])
964 vm_freepages((vir_bytes) pt->pt_pt[i],
965 I386_VM_PFA(pt->pt_dir[i]), 1, VMP_PAGETABLE);
967 return;
970 /*===========================================================================*
971 * pt_mapkernel *
972 *===========================================================================*/
973 PUBLIC int pt_mapkernel(pt_t *pt)
975 int r, i;
977 /* Any i386 page table needs to map in the kernel address space. */
978 assert(vmproc[VMP_SYSTEM].vm_flags & VMF_INUSE);
980 if(bigpage_ok) {
981 int pde;
982 for(pde = 0; pde <= id_map_high_pde; pde++) {
983 phys_bytes addr;
984 addr = pde * I386_BIG_PAGE_SIZE;
985 assert((addr & I386_VM_ADDR_MASK) == addr);
986 pt->pt_dir[pde] = addr | I386_VM_PRESENT |
987 I386_VM_BIGPAGE | I386_VM_USER |
988 I386_VM_WRITE | global_bit;
990 } else {
991 panic("VM: pt_mapkernel: no bigpage");
994 if(pagedir_pde >= 0) {
995 /* Kernel also wants to know about all page directories. */
996 pt->pt_dir[pagedir_pde] = pagedir_pde_val;
999 for(i = 0; i < kernmappings; i++) {
1000 if(pt_writemap(pt,
1001 kern_mappings[i].lin_addr,
1002 kern_mappings[i].phys_addr,
1003 kern_mappings[i].len,
1004 kern_mappings[i].flags, 0) != OK) {
1005 panic("pt_mapkernel: pt_writemap failed");
1009 return OK;
1012 /*===========================================================================*
1013 * pt_cycle *
1014 *===========================================================================*/
1015 PUBLIC void pt_cycle(void)
1017 vm_checkspares();