assert conditions fix from trunk.
[minix.git] / kernel / arch / i386 / memory.c
blobb465a8fa3505e2134869af4de713cbc86097201e
3 #include "kernel/kernel.h"
4 #include "kernel/proc.h"
5 #include "kernel/vm.h"
7 #include <machine/vm.h>
9 #include <minix/type.h>
10 #include <minix/syslib.h>
11 #include <minix/cpufeature.h>
12 #include <string.h>
13 #include <assert.h>
14 #include <signal.h>
15 #include <stdlib.h>
17 #include <machine/vm.h>
19 #include "oxpcie.h"
20 #include "proto.h"
21 #include "kernel/proto.h"
22 #include "kernel/debug.h"
24 #ifdef CONFIG_APIC
25 #include "apic.h"
26 #ifdef CONFIG_WATCHDOG
27 #include "kernel/watchdog.h"
28 #endif
29 #endif
31 PRIVATE int psok = 0;
33 #define MAX_FREEPDES (3 * CONFIG_MAX_CPUS)
34 PRIVATE int nfreepdes = 0, freepdes[MAX_FREEPDES];
36 #define HASPT(procptr) ((procptr)->p_seg.p_cr3 != 0)
38 FORWARD _PROTOTYPE( u32_t phys_get32, (phys_bytes v) );
39 FORWARD _PROTOTYPE( void vm_enable_paging, (void) );
42 /* *** Internal VM Functions *** */
44 PUBLIC void vm_init(struct proc *newptproc)
46 if(vm_running)
47 panic("vm_init: vm_running");
49 /* switch_address_space() checks what is in cr3, and doesn't do
50 * anything if it's the same as the cr3 of its argument, newptproc.
51 * If MINIX was previously booted, this could very well be the case.
53 * The first time switch_address_space() is called, we want to
54 * force it to do something (load cr3 and set newptproc), so we
55 * zero cr3, and force paging off to make that a safe thing to do.
57 * After that, vm_enable_paging() enables paging with the page table
58 * of newptproc loaded.
61 vm_stop();
62 write_cr3(0);
63 switch_address_space(newptproc);
64 assert(ptproc == newptproc);
65 vm_enable_paging();
66 vm_running = 1;
69 /* This function sets up a mapping from within the kernel's address
70 * space to any other area of memory, either straight physical
71 * memory (pr == NULL) or a process view of memory, in 4MB windows.
72 * I.e., it maps in 4MB chunks of virtual (or physical) address space
73 * to 4MB chunks of kernel virtual address space.
75 * It recognizes pr already being in memory as a special case (no
76 * mapping required).
78 * The target (i.e. in-kernel) mapping area is one of the freepdes[]
79 * VM has earlier already told the kernel about that is available. It is
80 * identified as the 'pde' parameter. This value can be chosen freely
81 * by the caller, as long as it is in range (i.e. 0 or higher and corresonds
82 * to a known freepde slot). It is up to the caller to keep track of which
83 * freepde's are in use, and to determine which ones are free to use.
85 * The logical number supplied by the caller is translated into an actual
86 * pde number to be used, and a pointer to it (linear address) is returned
87 * for actual use by phys_copy or phys_memset.
89 PRIVATE phys_bytes createpde(
90 const struct proc *pr, /* Requested process, NULL for physical. */
91 const phys_bytes linaddr,/* Address after segment translation. */
92 phys_bytes *bytes, /* Size of chunk, function may truncate it. */
93 int free_pde_idx, /* index of the free slot to use */
94 int *changed /* If mapping is made, this is set to 1. */
97 u32_t pdeval;
98 phys_bytes offset;
99 int pde;
101 assert(free_pde_idx >= 0 && free_pde_idx < nfreepdes);
102 pde = freepdes[free_pde_idx];
103 assert(pde >= 0 && pde < 1024);
105 if(pr && ((pr == ptproc) || !HASPT(pr))) {
106 /* Process memory is requested, and
107 * it's a process that is already in current page table, or
108 * a process that is in every page table.
109 * Therefore linaddr is valid directly, with the requested
110 * size.
112 return linaddr;
115 if(pr) {
116 /* Requested address is in a process that is not currently
117 * accessible directly. Grab the PDE entry of that process'
118 * page table that corresponds to the requested address.
120 assert(pr->p_seg.p_cr3_v);
121 pdeval = pr->p_seg.p_cr3_v[I386_VM_PDE(linaddr)];
122 } else {
123 /* Requested address is physical. Make up the PDE entry. */
124 pdeval = (linaddr & I386_VM_ADDR_MASK_4MB) |
125 I386_VM_BIGPAGE | I386_VM_PRESENT |
126 I386_VM_WRITE | I386_VM_USER;
129 /* Write the pde value that we need into a pde that the kernel
130 * can access, into the currently loaded page table so it becomes
131 * visible.
133 assert(ptproc->p_seg.p_cr3_v);
134 if(ptproc->p_seg.p_cr3_v[pde] != pdeval) {
135 ptproc->p_seg.p_cr3_v[pde] = pdeval;
136 *changed = 1;
139 /* Memory is now available, but only the 4MB window of virtual
140 * address space that we have mapped; calculate how much of
141 * the requested range is visible and return that in *bytes,
142 * if that is less than the requested range.
144 offset = linaddr & I386_VM_OFFSET_MASK_4MB; /* Offset in 4MB window. */
145 *bytes = MIN(*bytes, I386_BIG_PAGE_SIZE - offset);
147 /* Return the linear address of the start of the new mapping. */
148 return I386_BIG_PAGE_SIZE*pde + offset;
151 /*===========================================================================*
152 * lin_lin_copy *
153 *===========================================================================*/
154 PRIVATE int lin_lin_copy(const struct proc *srcproc, vir_bytes srclinaddr,
155 const struct proc *dstproc, vir_bytes dstlinaddr, vir_bytes bytes)
157 u32_t addr;
158 proc_nr_t procslot;
160 assert(vm_running);
161 assert(nfreepdes >= 3);
163 assert(ptproc);
164 assert(proc_ptr);
165 assert(read_cr3() == ptproc->p_seg.p_cr3);
167 procslot = ptproc->p_nr;
169 assert(procslot >= 0 && procslot < I386_VM_DIR_ENTRIES);
171 if(srcproc) assert(!RTS_ISSET(srcproc, RTS_SLOT_FREE));
172 if(dstproc) assert(!RTS_ISSET(dstproc, RTS_SLOT_FREE));
173 assert(!RTS_ISSET(ptproc, RTS_SLOT_FREE));
174 assert(ptproc->p_seg.p_cr3_v);
176 while(bytes > 0) {
177 phys_bytes srcptr, dstptr;
178 vir_bytes chunk = bytes;
179 int changed = 0;
181 /* Set up 4MB ranges. */
182 srcptr = createpde(srcproc, srclinaddr, &chunk, 0, &changed);
183 dstptr = createpde(dstproc, dstlinaddr, &chunk, 1, &changed);
184 if(changed)
185 reload_cr3();
187 /* Copy pages. */
188 PHYS_COPY_CATCH(srcptr, dstptr, chunk, addr);
190 if(addr) {
191 /* If addr is nonzero, a page fault was caught. */
193 if(addr >= srcptr && addr < (srcptr + chunk)) {
194 return EFAULT_SRC;
196 if(addr >= dstptr && addr < (dstptr + chunk)) {
197 return EFAULT_DST;
200 panic("lin_lin_copy fault out of range");
202 /* Not reached. */
203 return EFAULT;
206 /* Update counter and addresses for next iteration, if any. */
207 bytes -= chunk;
208 srclinaddr += chunk;
209 dstlinaddr += chunk;
212 if(srcproc) assert(!RTS_ISSET(srcproc, RTS_SLOT_FREE));
213 if(dstproc) assert(!RTS_ISSET(dstproc, RTS_SLOT_FREE));
214 assert(!RTS_ISSET(ptproc, RTS_SLOT_FREE));
215 assert(ptproc->p_seg.p_cr3_v);
217 return OK;
221 PRIVATE u32_t phys_get32(phys_bytes addr)
223 const u32_t v;
224 int r;
226 if(!vm_running) {
227 phys_copy(addr, vir2phys(&v), sizeof(v));
228 return v;
231 if((r=lin_lin_copy(NULL, addr,
232 proc_addr(SYSTEM), vir2phys(&v), sizeof(v))) != OK) {
233 panic("lin_lin_copy for phys_get32 failed: %d", r);
236 return v;
239 PRIVATE char *cr0_str(u32_t e)
241 static char str[80];
242 strcpy(str, "");
243 #define FLAG(v) do { if(e & (v)) { strcat(str, #v " "); e &= ~v; } } while(0)
244 FLAG(I386_CR0_PE);
245 FLAG(I386_CR0_MP);
246 FLAG(I386_CR0_EM);
247 FLAG(I386_CR0_TS);
248 FLAG(I386_CR0_ET);
249 FLAG(I386_CR0_PG);
250 FLAG(I386_CR0_WP);
251 if(e) { strcat(str, " (++)"); }
252 return str;
255 PRIVATE char *cr4_str(u32_t e)
257 static char str[80];
258 strcpy(str, "");
259 FLAG(I386_CR4_VME);
260 FLAG(I386_CR4_PVI);
261 FLAG(I386_CR4_TSD);
262 FLAG(I386_CR4_DE);
263 FLAG(I386_CR4_PSE);
264 FLAG(I386_CR4_PAE);
265 FLAG(I386_CR4_MCE);
266 FLAG(I386_CR4_PGE);
267 if(e) { strcat(str, " (++)"); }
268 return str;
271 PUBLIC void vm_stop(void)
273 write_cr0(read_cr0() & ~I386_CR0_PG);
276 PRIVATE void vm_enable_paging(void)
278 u32_t cr0, cr4;
279 int pgeok;
281 psok = _cpufeature(_CPUF_I386_PSE);
282 pgeok = _cpufeature(_CPUF_I386_PGE);
284 cr0= read_cr0();
285 cr4= read_cr4();
287 /* First clear PG and PGE flag, as PGE must be enabled after PG. */
288 write_cr0(cr0 & ~I386_CR0_PG);
289 write_cr4(cr4 & ~(I386_CR4_PGE | I386_CR4_PSE));
291 cr0= read_cr0();
292 cr4= read_cr4();
294 /* Our first page table contains 4MB entries. */
295 if(psok)
296 cr4 |= I386_CR4_PSE;
298 write_cr4(cr4);
300 /* First enable paging, then enable global page flag. */
301 cr0 |= I386_CR0_PG;
302 write_cr0(cr0 );
303 cr0 |= I386_CR0_WP;
304 write_cr0(cr0);
306 /* May we enable these features? */
307 if(pgeok)
308 cr4 |= I386_CR4_PGE;
310 write_cr4(cr4);
313 PUBLIC vir_bytes alloc_remote_segment(u32_t *selector,
314 segframe_t *segments, const int index, phys_bytes phys,
315 vir_bytes size, int priv)
317 phys_bytes offset = 0;
318 /* Check if the segment size can be recorded in bytes, that is, check
319 * if descriptor's limit field can delimited the allowed memory region
320 * precisely. This works up to 1MB. If the size is larger, 4K pages
321 * instead of bytes are used.
323 if (size < BYTE_GRAN_MAX) {
324 init_dataseg(&segments->p_ldt[EXTRA_LDT_INDEX+index],
325 phys, size, priv);
326 *selector = ((EXTRA_LDT_INDEX+index)*0x08) | (1*0x04) | priv;
327 offset = 0;
328 } else {
329 init_dataseg(&segments->p_ldt[EXTRA_LDT_INDEX+index],
330 phys & ~0xFFFF, 0, priv);
331 *selector = ((EXTRA_LDT_INDEX+index)*0x08) | (1*0x04) | priv;
332 offset = phys & 0xFFFF;
335 return offset;
338 PUBLIC phys_bytes umap_remote(const struct proc* rp, const int seg,
339 const vir_bytes vir_addr, const vir_bytes bytes)
341 /* Calculate the physical memory address for a given virtual address. */
342 struct far_mem *fm;
344 #if 0
345 if(rp->p_misc_flags & MF_FULLVM) return 0;
346 #endif
348 if (bytes <= 0) return( (phys_bytes) 0);
349 if (seg < 0 || seg >= NR_REMOTE_SEGS) return( (phys_bytes) 0);
351 fm = &rp->p_priv->s_farmem[seg];
352 if (! fm->in_use) return( (phys_bytes) 0);
353 if (vir_addr + bytes > fm->mem_len) return( (phys_bytes) 0);
355 return(fm->mem_phys + (phys_bytes) vir_addr);
358 /*===========================================================================*
359 * umap_local *
360 *===========================================================================*/
361 PUBLIC phys_bytes umap_local(rp, seg, vir_addr, bytes)
362 register struct proc *rp; /* pointer to proc table entry for process */
363 int seg; /* T, D, or S segment */
364 vir_bytes vir_addr; /* virtual address in bytes within the seg */
365 vir_bytes bytes; /* # of bytes to be copied */
367 /* Calculate the physical memory address for a given virtual address. */
368 vir_clicks vc; /* the virtual address in clicks */
369 phys_bytes pa; /* intermediate variables as phys_bytes */
370 phys_bytes seg_base;
372 if(seg != T && seg != D && seg != S)
373 panic("umap_local: wrong seg: %d", seg);
375 if (bytes <= 0) return( (phys_bytes) 0);
376 if (vir_addr + bytes <= vir_addr) return 0; /* overflow */
377 vc = (vir_addr + bytes - 1) >> CLICK_SHIFT; /* last click of data */
379 if (seg != T)
380 seg = (vc < rp->p_memmap[D].mem_vir + rp->p_memmap[D].mem_len ? D : S);
381 else if (rp->p_memmap[T].mem_len == 0) /* common I&D? */
382 seg = D; /* ptrace needs this */
384 if ((vir_addr>>CLICK_SHIFT) >= rp->p_memmap[seg].mem_vir +
385 rp->p_memmap[seg].mem_len) return( (phys_bytes) 0 );
387 if (vc >= rp->p_memmap[seg].mem_vir +
388 rp->p_memmap[seg].mem_len) return( (phys_bytes) 0 );
390 seg_base = (phys_bytes) rp->p_memmap[seg].mem_phys;
391 seg_base = seg_base << CLICK_SHIFT; /* segment origin in bytes */
392 pa = (phys_bytes) vir_addr;
393 pa -= rp->p_memmap[seg].mem_vir << CLICK_SHIFT;
394 return(seg_base + pa);
397 /*===========================================================================*
398 * umap_virtual *
399 *===========================================================================*/
400 PUBLIC phys_bytes umap_virtual(rp, seg, vir_addr, bytes)
401 register struct proc *rp; /* pointer to proc table entry for process */
402 int seg; /* T, D, or S segment */
403 vir_bytes vir_addr; /* virtual address in bytes within the seg */
404 vir_bytes bytes; /* # of bytes to be copied */
406 vir_bytes linear;
407 u32_t phys = 0;
409 if(seg == MEM_GRANT) {
410 return umap_grant(rp, (cp_grant_id_t) vir_addr, bytes);
413 if(!(linear = umap_local(rp, seg, vir_addr, bytes))) {
414 printf("SYSTEM:umap_virtual: umap_local failed\n");
415 phys = 0;
416 } else {
417 if(vm_lookup(rp, linear, &phys, NULL) != OK) {
418 printf("SYSTEM:umap_virtual: vm_lookup of %s: seg 0x%lx: 0x%lx failed\n", rp->p_name, seg, vir_addr);
419 phys = 0;
421 if(phys == 0)
422 panic("vm_lookup returned phys: %d", phys);
426 if(phys == 0) {
427 printf("SYSTEM:umap_virtual: lookup failed\n");
428 return 0;
431 /* Now make sure addresses are contiguous in physical memory
432 * so that the umap makes sense.
434 if(bytes > 0 && !vm_contiguous(rp, linear, bytes)) {
435 printf("umap_virtual: %s: %d at 0x%lx (vir 0x%lx) not contiguous\n",
436 rp->p_name, bytes, linear, vir_addr);
437 return 0;
440 /* phys must be larger than 0 (or the caller will think the call
441 * failed), and address must not cross a page boundary.
443 assert(phys);
445 return phys;
449 /*===========================================================================*
450 * vm_lookup *
451 *===========================================================================*/
452 PUBLIC int vm_lookup(const struct proc *proc, const vir_bytes virtual,
453 vir_bytes *physical, u32_t *ptent)
455 u32_t *root, *pt;
456 int pde, pte;
457 u32_t pde_v, pte_v;
459 assert(proc);
460 assert(physical);
461 assert(!isemptyp(proc));
463 if(!HASPT(proc)) {
464 *physical = virtual;
465 return OK;
468 /* Retrieve page directory entry. */
469 root = (u32_t *) proc->p_seg.p_cr3;
470 assert(!((u32_t) root % I386_PAGE_SIZE));
471 pde = I386_VM_PDE(virtual);
472 assert(pde >= 0 && pde < I386_VM_DIR_ENTRIES);
473 pde_v = phys_get32((u32_t) (root + pde));
475 if(!(pde_v & I386_VM_PRESENT)) {
476 return EFAULT;
479 /* We don't expect to ever see this. */
480 if(pde_v & I386_VM_BIGPAGE) {
481 *physical = pde_v & I386_VM_ADDR_MASK_4MB;
482 if(ptent) *ptent = pde_v;
483 *physical += virtual & I386_VM_OFFSET_MASK_4MB;
484 } else {
485 /* Retrieve page table entry. */
486 pt = (u32_t *) I386_VM_PFA(pde_v);
487 assert(!((u32_t) pt % I386_PAGE_SIZE));
488 pte = I386_VM_PTE(virtual);
489 assert(pte >= 0 && pte < I386_VM_PT_ENTRIES);
490 pte_v = phys_get32((u32_t) (pt + pte));
491 if(!(pte_v & I386_VM_PRESENT)) {
492 return EFAULT;
495 if(ptent) *ptent = pte_v;
497 /* Actual address now known; retrieve it and add page offset. */
498 *physical = I386_VM_PFA(pte_v);
499 *physical += virtual % I386_PAGE_SIZE;
502 return OK;
505 /*===========================================================================*
506 * vm_contiguous *
507 *===========================================================================*/
508 PUBLIC int vm_contiguous(const struct proc *targetproc, vir_bytes vir_buf, size_t bytes)
510 int first = 1, r;
511 u32_t prev_phys = 0; /* Keep lints happy. */
512 u32_t po;
514 assert(targetproc);
515 assert(bytes > 0);
517 if(!HASPT(targetproc))
518 return 1;
520 /* Start and end at page boundary to make logic simpler. */
521 po = vir_buf % I386_PAGE_SIZE;
522 if(po > 0) {
523 bytes += po;
524 vir_buf -= po;
526 po = (vir_buf + bytes) % I386_PAGE_SIZE;
527 if(po > 0)
528 bytes += I386_PAGE_SIZE - po;
530 /* Keep going as long as we cross a page boundary. */
531 while(bytes > 0) {
532 u32_t phys;
534 if((r=vm_lookup(targetproc, vir_buf, &phys, NULL)) != OK) {
535 printf("vm_contiguous: vm_lookup failed, %d\n", r);
536 printf("kernel stack: ");
537 util_stacktrace();
538 return 0;
541 if(!first) {
542 if(prev_phys+I386_PAGE_SIZE != phys) {
543 printf("vm_contiguous: no (0x%lx, 0x%lx)\n",
544 prev_phys, phys);
545 printf("kernel stack: ");
546 util_stacktrace();
547 return 0;
551 first = 0;
553 prev_phys = phys;
554 vir_buf += I386_PAGE_SIZE;
555 bytes -= I386_PAGE_SIZE;
558 return 1;
561 /*===========================================================================*
562 * vm_suspend *
563 *===========================================================================*/
564 PRIVATE void vm_suspend(struct proc *caller, const struct proc *target,
565 const vir_bytes linaddr, const vir_bytes len, const int type)
567 /* This range is not OK for this process. Set parameters
568 * of the request and notify VM about the pending request.
570 assert(!RTS_ISSET(caller, RTS_VMREQUEST));
571 assert(!RTS_ISSET(target, RTS_VMREQUEST));
573 RTS_SET(caller, RTS_VMREQUEST);
575 caller->p_vmrequest.req_type = VMPTYPE_CHECK;
576 caller->p_vmrequest.target = target->p_endpoint;
577 caller->p_vmrequest.params.check.start = linaddr;
578 caller->p_vmrequest.params.check.length = len;
579 caller->p_vmrequest.params.check.writeflag = 1;
580 caller->p_vmrequest.type = type;
582 /* Connect caller on vmrequest wait queue. */
583 if(!(caller->p_vmrequest.nextrequestor = vmrequest))
584 send_sig(VM_PROC_NR, SIGKMEM);
585 vmrequest = caller;
588 /*===========================================================================*
589 * delivermsg *
590 *===========================================================================*/
591 int delivermsg(struct proc *rp)
593 phys_bytes addr;
594 int r;
596 assert(rp->p_misc_flags & MF_DELIVERMSG);
597 assert(rp->p_delivermsg.m_source != NONE);
599 assert(rp->p_delivermsg_lin);
600 assert(rp->p_delivermsg_lin == umap_local(rp, D, rp->p_delivermsg_vir, sizeof(message)));
602 PHYS_COPY_CATCH(vir2phys(&rp->p_delivermsg),
603 rp->p_delivermsg_lin, sizeof(message), addr);
605 if(addr) {
606 vm_suspend(rp, rp, rp->p_delivermsg_lin, sizeof(message),
607 VMSTYPE_DELIVERMSG);
608 r = VMSUSPEND;
609 } else {
610 /* Indicate message has been delivered; address is 'used'. */
611 rp->p_delivermsg.m_source = NONE;
612 rp->p_delivermsg_lin = 0;
614 rp->p_misc_flags &= ~MF_DELIVERMSG;
615 r = OK;
618 return r;
621 PRIVATE char *flagstr(u32_t e, const int dir)
623 static char str[80];
624 strcpy(str, "");
625 FLAG(I386_VM_PRESENT);
626 FLAG(I386_VM_WRITE);
627 FLAG(I386_VM_USER);
628 FLAG(I386_VM_PWT);
629 FLAG(I386_VM_PCD);
630 FLAG(I386_VM_GLOBAL);
631 if(dir)
632 FLAG(I386_VM_BIGPAGE); /* Page directory entry only */
633 else
634 FLAG(I386_VM_DIRTY); /* Page table entry only */
635 return str;
638 PRIVATE void vm_pt_print(u32_t *pagetable, const u32_t v)
640 int pte;
641 int col = 0;
643 assert(!((u32_t) pagetable % I386_PAGE_SIZE));
645 for(pte = 0; pte < I386_VM_PT_ENTRIES; pte++) {
646 u32_t pte_v, pfa;
647 pte_v = phys_get32((u32_t) (pagetable + pte));
648 if(!(pte_v & I386_VM_PRESENT))
649 continue;
650 pfa = I386_VM_PFA(pte_v);
651 printf("%4d:%08lx:%08lx %2s ",
652 pte, v + I386_PAGE_SIZE*pte, pfa,
653 (pte_v & I386_VM_WRITE) ? "rw":"RO");
654 col++;
655 if(col == 3) { printf("\n"); col = 0; }
657 if(col > 0) printf("\n");
659 return;
662 PRIVATE void vm_print(u32_t *root)
664 int pde;
666 assert(!((u32_t) root % I386_PAGE_SIZE));
668 printf("page table 0x%lx:\n", root);
670 for(pde = 0; pde < I386_VM_DIR_ENTRIES; pde++) {
671 u32_t pde_v;
672 u32_t *pte_a;
673 pde_v = phys_get32((u32_t) (root + pde));
674 if(!(pde_v & I386_VM_PRESENT))
675 continue;
676 if(pde_v & I386_VM_BIGPAGE) {
677 printf("%4d: 0x%lx, flags %s\n",
678 pde, I386_VM_PFA(pde_v), flagstr(pde_v, 1));
679 } else {
680 pte_a = (u32_t *) I386_VM_PFA(pde_v);
681 printf("%4d: pt %08lx %s\n",
682 pde, pte_a, flagstr(pde_v, 1));
683 vm_pt_print(pte_a, pde * I386_VM_PT_ENTRIES * I386_PAGE_SIZE);
684 printf("\n");
689 return;
692 /*===========================================================================*
693 * lin_memset *
694 *===========================================================================*/
695 int vm_phys_memset(phys_bytes ph, const u8_t c, phys_bytes bytes)
697 u32_t p;
699 p = c | (c << 8) | (c << 16) | (c << 24);
701 if(!vm_running) {
702 phys_memset(ph, p, bytes);
703 return OK;
706 assert(nfreepdes >= 3);
708 assert(ptproc->p_seg.p_cr3_v);
710 /* With VM, we have to map in the physical memory.
711 * We can do this 4MB at a time.
713 while(bytes > 0) {
714 int changed = 0;
715 phys_bytes chunk = bytes, ptr;
716 ptr = createpde(NULL, ph, &chunk, 0, &changed);
717 if(changed)
718 reload_cr3();
720 /* We can memset as many bytes as we have remaining,
721 * or as many as remain in the 4MB chunk we mapped in.
723 phys_memset(ptr, p, chunk);
724 bytes -= chunk;
725 ph += chunk;
728 assert(ptproc->p_seg.p_cr3_v);
730 return OK;
733 /*===========================================================================*
734 * virtual_copy_f *
735 *===========================================================================*/
736 PUBLIC int virtual_copy_f(caller, src_addr, dst_addr, bytes, vmcheck)
737 struct proc * caller;
738 struct vir_addr *src_addr; /* source virtual address */
739 struct vir_addr *dst_addr; /* destination virtual address */
740 vir_bytes bytes; /* # of bytes to copy */
741 int vmcheck; /* if nonzero, can return VMSUSPEND */
743 /* Copy bytes from virtual address src_addr to virtual address dst_addr.
744 * Virtual addresses can be in ABS, LOCAL_SEG, REMOTE_SEG, or BIOS_SEG.
746 struct vir_addr *vir_addr[2]; /* virtual source and destination address */
747 phys_bytes phys_addr[2]; /* absolute source and destination */
748 int seg_index;
749 int i;
750 struct proc *procs[2];
752 assert((vmcheck && caller) || (!vmcheck && !caller));
754 /* Check copy count. */
755 if (bytes <= 0) return(EDOM);
757 /* Do some more checks and map virtual addresses to physical addresses. */
758 vir_addr[_SRC_] = src_addr;
759 vir_addr[_DST_] = dst_addr;
761 for (i=_SRC_; i<=_DST_; i++) {
762 int proc_nr, type;
763 struct proc *p;
765 type = vir_addr[i]->segment & SEGMENT_TYPE;
766 if((type != PHYS_SEG && type != BIOS_SEG) &&
767 isokendpt(vir_addr[i]->proc_nr_e, &proc_nr))
768 p = proc_addr(proc_nr);
769 else
770 p = NULL;
772 procs[i] = p;
774 /* Get physical address. */
775 switch(type) {
776 case LOCAL_SEG:
777 case LOCAL_VM_SEG:
778 if(!p) {
779 return EDEADSRCDST;
781 seg_index = vir_addr[i]->segment & SEGMENT_INDEX;
782 if(type == LOCAL_SEG)
783 phys_addr[i] = umap_local(p, seg_index, vir_addr[i]->offset,
784 bytes);
785 else
786 phys_addr[i] = umap_virtual(p, seg_index,
787 vir_addr[i]->offset, bytes);
788 if(phys_addr[i] == 0) {
789 printf("virtual_copy: map 0x%x failed for %s seg %d, "
790 "offset %lx, len %d, i %d\n",
791 type, p->p_name, seg_index, vir_addr[i]->offset,
792 bytes, i);
794 break;
795 case REMOTE_SEG:
796 if(!p) {
797 return EDEADSRCDST;
799 seg_index = vir_addr[i]->segment & SEGMENT_INDEX;
800 phys_addr[i] = umap_remote(p, seg_index, vir_addr[i]->offset, bytes);
801 break;
802 #if _MINIX_CHIP == _CHIP_INTEL
803 case BIOS_SEG:
804 phys_addr[i] = umap_bios(vir_addr[i]->offset, bytes );
805 break;
806 #endif
807 case PHYS_SEG:
808 phys_addr[i] = vir_addr[i]->offset;
809 break;
810 default:
811 printf("virtual_copy: strange type 0x%x\n", type);
812 return EINVAL;
815 /* Check if mapping succeeded. */
816 if (phys_addr[i] <= 0 && vir_addr[i]->segment != PHYS_SEG) {
817 printf("virtual_copy EFAULT\n");
818 return EFAULT;
822 if(vm_running) {
823 int r;
825 if(caller && RTS_ISSET(caller, RTS_VMREQUEST)) {
826 assert(caller->p_vmrequest.vmresult != VMSUSPEND);
827 RTS_UNSET(caller, RTS_VMREQUEST);
828 if(caller->p_vmrequest.vmresult != OK) {
829 return caller->p_vmrequest.vmresult;
833 if((r=lin_lin_copy(procs[_SRC_], phys_addr[_SRC_],
834 procs[_DST_], phys_addr[_DST_], bytes)) != OK) {
835 struct proc *target = NULL;
836 phys_bytes lin;
837 if(r != EFAULT_SRC && r != EFAULT_DST)
838 panic("lin_lin_copy failed: %d", r);
839 if(!vmcheck || !caller) {
840 return r;
843 if(r == EFAULT_SRC) {
844 lin = phys_addr[_SRC_];
845 target = procs[_SRC_];
846 } else if(r == EFAULT_DST) {
847 lin = phys_addr[_DST_];
848 target = procs[_DST_];
849 } else {
850 panic("r strange: %d", r);
853 assert(caller);
854 assert(target);
856 vm_suspend(caller, target, lin, bytes, VMSTYPE_KERNELCALL);
857 return VMSUSPEND;
860 return OK;
863 assert(!vm_running);
865 /* can't copy to/from process with PT without VM */
866 #define NOPT(p) (!(p) || !HASPT(p))
867 if(!NOPT(procs[_SRC_])) {
868 printf("ignoring page table src: %s / %d at 0x%lx\n",
869 procs[_SRC_]->p_name, procs[_SRC_]->p_endpoint, procs[_SRC_]->p_seg.p_cr3);
871 if(!NOPT(procs[_DST_])) {
872 printf("ignoring page table dst: %s / %d at 0x%lx\n",
873 procs[_DST_]->p_name, procs[_DST_]->p_endpoint,
874 procs[_DST_]->p_seg.p_cr3);
877 /* Now copy bytes between physical addresseses. */
878 if(phys_copy(phys_addr[_SRC_], phys_addr[_DST_], (phys_bytes) bytes))
879 return EFAULT;
881 return OK;
884 /*===========================================================================*
885 * data_copy *
886 *===========================================================================*/
887 PUBLIC int data_copy(const endpoint_t from_proc, const vir_bytes from_addr,
888 const endpoint_t to_proc, const vir_bytes to_addr,
889 size_t bytes)
891 struct vir_addr src, dst;
893 src.segment = dst.segment = D;
894 src.offset = from_addr;
895 dst.offset = to_addr;
896 src.proc_nr_e = from_proc;
897 dst.proc_nr_e = to_proc;
899 return virtual_copy(&src, &dst, bytes);
902 /*===========================================================================*
903 * data_copy_vmcheck *
904 *===========================================================================*/
905 PUBLIC int data_copy_vmcheck(struct proc * caller,
906 const endpoint_t from_proc, const vir_bytes from_addr,
907 const endpoint_t to_proc, const vir_bytes to_addr,
908 size_t bytes)
910 struct vir_addr src, dst;
912 src.segment = dst.segment = D;
913 src.offset = from_addr;
914 dst.offset = to_addr;
915 src.proc_nr_e = from_proc;
916 dst.proc_nr_e = to_proc;
918 return virtual_copy_vmcheck(caller, &src, &dst, bytes);
921 /*===========================================================================*
922 * arch_pre_exec *
923 *===========================================================================*/
924 PUBLIC void arch_pre_exec(struct proc *pr, const u32_t ip, const u32_t sp)
926 /* wipe extra LDT entries, set program counter, and stack pointer. */
927 memset(pr->p_seg.p_ldt + EXTRA_LDT_INDEX, 0,
928 sizeof(pr->p_seg.p_ldt[0]) * (LDT_SIZE - EXTRA_LDT_INDEX));
929 pr->p_reg.pc = ip;
930 pr->p_reg.sp = sp;
933 /*===========================================================================*
934 * arch_umap *
935 *===========================================================================*/
936 PUBLIC int arch_umap(const struct proc *pr, vir_bytes offset, vir_bytes count,
937 int seg, phys_bytes *addr)
939 switch(seg) {
940 case BIOS_SEG:
941 *addr = umap_bios(offset, count);
942 return OK;
945 /* This must be EINVAL; the umap fallback function in
946 * lib/syslib/alloc_util.c depends on it to detect an
947 * older kernel (as opposed to mapping error).
949 return EINVAL;
952 /* VM reports page directory slot we're allowed to use freely. */
953 void i386_freepde(const int pde)
955 if(nfreepdes >= MAX_FREEPDES)
956 return;
957 freepdes[nfreepdes++] = pde;
960 PRIVATE int lapic_mapping_index = -1, oxpcie_mapping_index = -1;
962 PUBLIC int arch_phys_map(const int index, phys_bytes *addr,
963 phys_bytes *len, int *flags)
965 static int first = 1;
966 int freeidx = 0;
967 static char *ser_var = NULL;
969 if(first) {
970 #ifdef CONFIG_APIC
971 if(lapic_addr)
972 lapic_mapping_index = freeidx++;
973 #endif
975 #ifdef CONFIG_OXPCIE
976 if((ser_var = env_get("oxpcie"))) {
977 if(ser_var[0] != '0' || ser_var[1] != 'x') {
978 printf("oxpcie address in hex please\n");
979 } else {
980 oxpcie_mapping_index = freeidx++;
983 #endif
984 first = 0;
987 #ifdef CONFIG_APIC
988 /* map the local APIC if enabled */
989 if (index == lapic_mapping_index) {
990 *addr = vir2phys(lapic_addr);
991 *len = 4 << 10 /* 4kB */;
992 *flags = VMMF_UNCACHED;
993 return OK;
995 #endif
997 #if CONFIG_OXPCIE
998 if(index == oxpcie_mapping_index) {
999 *addr = strtoul(ser_var+2, NULL, 16);
1000 *len = 0x4000;
1001 *flags = VMMF_UNCACHED;
1002 return OK;
1004 #endif
1006 return EINVAL;
1009 PUBLIC int arch_phys_map_reply(const int index, const vir_bytes addr)
1011 #ifdef CONFIG_APIC
1012 /* if local APIC is enabled */
1013 if (index == lapic_mapping_index && lapic_addr) {
1014 lapic_addr_vaddr = addr;
1016 #endif
1018 #if CONFIG_OXPCIE
1019 if (index == oxpcie_mapping_index) {
1020 oxpcie_set_vaddr((unsigned char *) addr);
1022 #endif
1024 return OK;
1027 PUBLIC int arch_enable_paging(struct proc * caller, const message * m_ptr)
1029 struct vm_ep_data ep_data;
1030 int r;
1033 * copy the extra data associated with the call from userspace
1035 if((r=data_copy(caller->p_endpoint, (vir_bytes)m_ptr->SVMCTL_VALUE,
1036 KERNEL, (vir_bytes) &ep_data, sizeof(ep_data))) != OK) {
1037 printf("vmctl_enable_paging: data_copy failed! (%d)\n", r);
1038 return r;
1042 * when turning paging on i386 we also change the segment limits to make
1043 * the special mappings requested by the kernel reachable
1045 if ((r = prot_set_kern_seg_limit(ep_data.data_seg_limit)) != OK)
1046 return r;
1049 * install the new map provided by the call
1051 if (newmap(caller, caller, ep_data.mem_map) != OK)
1052 panic("arch_enable_paging: newmap failed");
1054 FIXLINMSG(caller);
1055 assert(caller->p_delivermsg_lin == umap_local(caller, D,
1056 caller->p_delivermsg_vir, sizeof(message)));
1058 #ifdef CONFIG_APIC
1059 /* if local APIC is enabled */
1060 if (lapic_addr) {
1061 lapic_addr = lapic_addr_vaddr;
1062 lapic_eoi_addr = LAPIC_EOI;
1064 #endif
1065 #ifdef CONFIG_WATCHDOG
1067 * We make sure that we don't enable the watchdog until paging is turned
1068 * on as we might get a NMI while switching and we might still use wrong
1069 * lapic address. Bad things would happen. It is unfortunate but such is
1070 * life
1072 i386_watchdog_start();
1073 #endif
1075 return OK;
1078 PUBLIC void release_address_space(struct proc *pr)
1080 pr->p_seg.p_cr3_v = NULL;