make vfs & filesystems use failable copying
[minix3.git] / kernel / arch / i386 / memory.c
blob320dbe12f6c34c6318055fb74aaeeeb37f9d239b
2 #include "kernel/kernel.h"
3 #include "kernel/vm.h"
5 #include <machine/vm.h>
7 #include <minix/type.h>
8 #include <minix/syslib.h>
9 #include <minix/cpufeature.h>
10 #include <string.h>
11 #include <assert.h>
12 #include <signal.h>
13 #include <stdlib.h>
15 #include <machine/vm.h>
17 #include "oxpcie.h"
18 #include "arch_proto.h"
20 #ifdef USE_APIC
21 #include "apic.h"
22 #ifdef USE_WATCHDOG
23 #include "kernel/watchdog.h"
24 #endif
25 #endif
27 phys_bytes video_mem_vaddr = 0;
29 #define HASPT(procptr) ((procptr)->p_seg.p_cr3 != 0)
30 static int nfreepdes = 0;
31 #define MAXFREEPDES 2
32 static int freepdes[MAXFREEPDES];
34 static u32_t phys_get32(phys_bytes v);
36 void mem_clear_mapcache(void)
38 int i;
39 for(i = 0; i < nfreepdes; i++) {
40 struct proc *ptproc = get_cpulocal_var(ptproc);
41 int pde = freepdes[i];
42 u32_t *ptv;
43 assert(ptproc);
44 ptv = ptproc->p_seg.p_cr3_v;
45 assert(ptv);
46 ptv[pde] = 0;
50 /* This function sets up a mapping from within the kernel's address
51 * space to any other area of memory, either straight physical
52 * memory (pr == NULL) or a process view of memory, in 4MB windows.
53 * I.e., it maps in 4MB chunks of virtual (or physical) address space
54 * to 4MB chunks of kernel virtual address space.
56 * It recognizes pr already being in memory as a special case (no
57 * mapping required).
59 * The target (i.e. in-kernel) mapping area is one of the freepdes[]
60 * VM has earlier already told the kernel about that is available. It is
61 * identified as the 'pde' parameter. This value can be chosen freely
62 * by the caller, as long as it is in range (i.e. 0 or higher and corresponds
63 * to a known freepde slot). It is up to the caller to keep track of which
64 * freepde's are in use, and to determine which ones are free to use.
66 * The logical number supplied by the caller is translated into an actual
67 * pde number to be used, and a pointer to it (linear address) is returned
68 * for actual use by phys_copy or memset.
70 static phys_bytes createpde(
71 const struct proc *pr, /* Requested process, NULL for physical. */
72 const phys_bytes linaddr,/* Address after segment translation. */
73 phys_bytes *bytes, /* Size of chunk, function may truncate it. */
74 int free_pde_idx, /* index of the free slot to use */
75 int *changed /* If mapping is made, this is set to 1. */
78 u32_t pdeval;
79 phys_bytes offset;
80 int pde;
82 assert(free_pde_idx >= 0 && free_pde_idx < nfreepdes);
83 pde = freepdes[free_pde_idx];
84 assert(pde >= 0 && pde < 1024);
86 if(pr && ((pr == get_cpulocal_var(ptproc)) || iskernelp(pr))) {
87 /* Process memory is requested, and
88 * it's a process that is already in current page table, or
89 * the kernel, which is always there.
90 * Therefore linaddr is valid directly, with the requested
91 * size.
93 return linaddr;
96 if(pr) {
97 /* Requested address is in a process that is not currently
98 * accessible directly. Grab the PDE entry of that process'
99 * page table that corresponds to the requested address.
101 assert(pr->p_seg.p_cr3_v);
102 pdeval = pr->p_seg.p_cr3_v[I386_VM_PDE(linaddr)];
103 } else {
104 /* Requested address is physical. Make up the PDE entry. */
105 pdeval = (linaddr & I386_VM_ADDR_MASK_4MB) |
106 I386_VM_BIGPAGE | I386_VM_PRESENT |
107 I386_VM_WRITE | I386_VM_USER;
110 /* Write the pde value that we need into a pde that the kernel
111 * can access, into the currently loaded page table so it becomes
112 * visible.
114 assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v);
115 if(get_cpulocal_var(ptproc)->p_seg.p_cr3_v[pde] != pdeval) {
116 get_cpulocal_var(ptproc)->p_seg.p_cr3_v[pde] = pdeval;
117 *changed = 1;
120 /* Memory is now available, but only the 4MB window of virtual
121 * address space that we have mapped; calculate how much of
122 * the requested range is visible and return that in *bytes,
123 * if that is less than the requested range.
125 offset = linaddr & I386_VM_OFFSET_MASK_4MB; /* Offset in 4MB window. */
126 *bytes = MIN(*bytes, I386_BIG_PAGE_SIZE - offset);
128 /* Return the linear address of the start of the new mapping. */
129 return I386_BIG_PAGE_SIZE*pde + offset;
133 /*===========================================================================*
134 * check_resumed_caller *
135 *===========================================================================*/
136 static int check_resumed_caller(struct proc *caller)
138 /* Returns the result from VM if caller was resumed, otherwise OK. */
139 if (caller && (caller->p_misc_flags & MF_KCALL_RESUME)) {
140 assert(caller->p_vmrequest.vmresult != VMSUSPEND);
141 return caller->p_vmrequest.vmresult;
144 return OK;
147 /*===========================================================================*
148 * lin_lin_copy *
149 *===========================================================================*/
150 static int lin_lin_copy(struct proc *srcproc, vir_bytes srclinaddr,
151 struct proc *dstproc, vir_bytes dstlinaddr, vir_bytes bytes)
153 u32_t addr;
154 proc_nr_t procslot;
156 assert(get_cpulocal_var(ptproc));
157 assert(get_cpulocal_var(proc_ptr));
158 assert(read_cr3() == get_cpulocal_var(ptproc)->p_seg.p_cr3);
160 procslot = get_cpulocal_var(ptproc)->p_nr;
162 assert(procslot >= 0 && procslot < I386_VM_DIR_ENTRIES);
164 if(srcproc) assert(!RTS_ISSET(srcproc, RTS_SLOT_FREE));
165 if(dstproc) assert(!RTS_ISSET(dstproc, RTS_SLOT_FREE));
166 assert(!RTS_ISSET(get_cpulocal_var(ptproc), RTS_SLOT_FREE));
167 assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v);
168 if(srcproc) assert(!RTS_ISSET(srcproc, RTS_VMINHIBIT));
169 if(dstproc) assert(!RTS_ISSET(dstproc, RTS_VMINHIBIT));
171 while(bytes > 0) {
172 phys_bytes srcptr, dstptr;
173 vir_bytes chunk = bytes;
174 int changed = 0;
176 #ifdef CONFIG_SMP
177 unsigned cpu = cpuid;
179 if (srcproc && GET_BIT(srcproc->p_stale_tlb, cpu)) {
180 changed = 1;
181 UNSET_BIT(srcproc->p_stale_tlb, cpu);
183 if (dstproc && GET_BIT(dstproc->p_stale_tlb, cpu)) {
184 changed = 1;
185 UNSET_BIT(dstproc->p_stale_tlb, cpu);
187 #endif
189 /* Set up 4MB ranges. */
190 srcptr = createpde(srcproc, srclinaddr, &chunk, 0, &changed);
191 dstptr = createpde(dstproc, dstlinaddr, &chunk, 1, &changed);
192 if(changed)
193 reload_cr3();
195 /* Copy pages. */
196 PHYS_COPY_CATCH(srcptr, dstptr, chunk, addr);
198 if(addr) {
199 /* If addr is nonzero, a page fault was caught. */
201 if(addr >= srcptr && addr < (srcptr + chunk)) {
202 return EFAULT_SRC;
204 if(addr >= dstptr && addr < (dstptr + chunk)) {
205 return EFAULT_DST;
208 panic("lin_lin_copy fault out of range");
210 /* Not reached. */
211 return EFAULT;
214 /* Update counter and addresses for next iteration, if any. */
215 bytes -= chunk;
216 srclinaddr += chunk;
217 dstlinaddr += chunk;
220 if(srcproc) assert(!RTS_ISSET(srcproc, RTS_SLOT_FREE));
221 if(dstproc) assert(!RTS_ISSET(dstproc, RTS_SLOT_FREE));
222 assert(!RTS_ISSET(get_cpulocal_var(ptproc), RTS_SLOT_FREE));
223 assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v);
225 return OK;
229 static u32_t phys_get32(phys_bytes addr)
231 u32_t v;
232 int r;
234 if((r=lin_lin_copy(NULL, addr,
235 proc_addr(SYSTEM), (phys_bytes) &v, sizeof(v))) != OK) {
236 panic("lin_lin_copy for phys_get32 failed: %d", r);
239 return v;
242 #if 0
243 static char *cr0_str(u32_t e)
245 static char str[80];
246 strcpy(str, "");
247 #define FLAG(v) do { if(e & (v)) { strcat(str, #v " "); e &= ~v; } } while(0)
248 FLAG(I386_CR0_PE);
249 FLAG(I386_CR0_MP);
250 FLAG(I386_CR0_EM);
251 FLAG(I386_CR0_TS);
252 FLAG(I386_CR0_ET);
253 FLAG(I386_CR0_PG);
254 FLAG(I386_CR0_WP);
255 if(e) { strcat(str, " (++)"); }
256 return str;
259 static char *cr4_str(u32_t e)
261 static char str[80];
262 strcpy(str, "");
263 FLAG(I386_CR4_VME);
264 FLAG(I386_CR4_PVI);
265 FLAG(I386_CR4_TSD);
266 FLAG(I386_CR4_DE);
267 FLAG(I386_CR4_PSE);
268 FLAG(I386_CR4_PAE);
269 FLAG(I386_CR4_MCE);
270 FLAG(I386_CR4_PGE);
271 if(e) { strcat(str, " (++)"); }
272 return str;
274 #endif
276 /*===========================================================================*
277 * umap_virtual *
278 *===========================================================================*/
279 phys_bytes umap_virtual(rp, seg, vir_addr, bytes)
280 register struct proc *rp; /* pointer to proc table entry for process */
281 int seg; /* T, D, or S segment */
282 vir_bytes vir_addr; /* virtual address in bytes within the seg */
283 vir_bytes bytes; /* # of bytes to be copied */
285 phys_bytes phys = 0;
287 if(vm_lookup(rp, vir_addr, &phys, NULL) != OK) {
288 printf("SYSTEM:umap_virtual: vm_lookup of %s: seg 0x%x: 0x%lx failed\n", rp->p_name, seg, vir_addr);
289 phys = 0;
290 } else {
291 if(phys == 0)
292 panic("vm_lookup returned phys: 0x%lx", phys);
295 if(phys == 0) {
296 printf("SYSTEM:umap_virtual: lookup failed\n");
297 return 0;
300 /* Now make sure addresses are contiguous in physical memory
301 * so that the umap makes sense.
303 if(bytes > 0 && vm_lookup_range(rp, vir_addr, NULL, bytes) != bytes) {
304 printf("umap_virtual: %s: %lu at 0x%lx (vir 0x%lx) not contiguous\n",
305 rp->p_name, bytes, vir_addr, vir_addr);
306 return 0;
309 /* phys must be larger than 0 (or the caller will think the call
310 * failed), and address must not cross a page boundary.
312 assert(phys);
314 return phys;
318 /*===========================================================================*
319 * vm_lookup *
320 *===========================================================================*/
321 int vm_lookup(const struct proc *proc, const vir_bytes virtual,
322 phys_bytes *physical, u32_t *ptent)
324 u32_t *root, *pt;
325 int pde, pte;
326 u32_t pde_v, pte_v;
328 assert(proc);
329 assert(physical);
330 assert(!isemptyp(proc));
331 assert(HASPT(proc));
333 /* Retrieve page directory entry. */
334 root = (u32_t *) proc->p_seg.p_cr3;
335 assert(!((u32_t) root % I386_PAGE_SIZE));
336 pde = I386_VM_PDE(virtual);
337 assert(pde >= 0 && pde < I386_VM_DIR_ENTRIES);
338 pde_v = phys_get32((u32_t) (root + pde));
340 if(!(pde_v & I386_VM_PRESENT)) {
341 return EFAULT;
344 /* We don't expect to ever see this. */
345 if(pde_v & I386_VM_BIGPAGE) {
346 *physical = pde_v & I386_VM_ADDR_MASK_4MB;
347 if(ptent) *ptent = pde_v;
348 *physical += virtual & I386_VM_OFFSET_MASK_4MB;
349 } else {
350 /* Retrieve page table entry. */
351 pt = (u32_t *) I386_VM_PFA(pde_v);
352 assert(!((u32_t) pt % I386_PAGE_SIZE));
353 pte = I386_VM_PTE(virtual);
354 assert(pte >= 0 && pte < I386_VM_PT_ENTRIES);
355 pte_v = phys_get32((u32_t) (pt + pte));
356 if(!(pte_v & I386_VM_PRESENT)) {
357 return EFAULT;
360 if(ptent) *ptent = pte_v;
362 /* Actual address now known; retrieve it and add page offset. */
363 *physical = I386_VM_PFA(pte_v);
364 *physical += virtual % I386_PAGE_SIZE;
367 return OK;
370 /*===========================================================================*
371 * vm_lookup_range *
372 *===========================================================================*/
373 size_t vm_lookup_range(const struct proc *proc, vir_bytes vir_addr,
374 phys_bytes *phys_addr, size_t bytes)
376 /* Look up the physical address corresponding to linear virtual address
377 * 'vir_addr' for process 'proc'. Return the size of the range covered
378 * by contiguous physical memory starting from that address; this may
379 * be anywhere between 0 and 'bytes' inclusive. If the return value is
380 * nonzero, and 'phys_addr' is non-NULL, 'phys_addr' will be set to the
381 * base physical address of the range. 'vir_addr' and 'bytes' need not
382 * be page-aligned, but the caller must have verified that the given
383 * linear range is valid for the given process at all.
385 phys_bytes phys, next_phys;
386 size_t len;
388 assert(proc);
389 assert(bytes > 0);
390 assert(HASPT(proc));
392 /* Look up the first page. */
393 if (vm_lookup(proc, vir_addr, &phys, NULL) != OK)
394 return 0;
396 if (phys_addr != NULL)
397 *phys_addr = phys;
399 len = I386_PAGE_SIZE - (vir_addr % I386_PAGE_SIZE);
400 vir_addr += len;
401 next_phys = phys + len;
403 /* Look up any next pages and test physical contiguity. */
404 while (len < bytes) {
405 if (vm_lookup(proc, vir_addr, &phys, NULL) != OK)
406 break;
408 if (next_phys != phys)
409 break;
411 len += I386_PAGE_SIZE;
412 vir_addr += I386_PAGE_SIZE;
413 next_phys += I386_PAGE_SIZE;
416 /* We might now have overshot the requested length somewhat. */
417 return MIN(bytes, len);
420 /*===========================================================================*
421 * vm_suspend *
422 *===========================================================================*/
423 static void vm_suspend(struct proc *caller, const struct proc *target,
424 const vir_bytes linaddr, const vir_bytes len, const int type,
425 const int writeflag)
427 /* This range is not OK for this process. Set parameters
428 * of the request and notify VM about the pending request.
430 assert(!RTS_ISSET(caller, RTS_VMREQUEST));
431 assert(!RTS_ISSET(target, RTS_VMREQUEST));
433 RTS_SET(caller, RTS_VMREQUEST);
435 assert(caller->p_endpoint != VM_PROC_NR);
437 caller->p_vmrequest.req_type = VMPTYPE_CHECK;
438 caller->p_vmrequest.target = target->p_endpoint;
439 caller->p_vmrequest.params.check.start = linaddr;
440 caller->p_vmrequest.params.check.length = len;
441 caller->p_vmrequest.params.check.writeflag = writeflag;
442 caller->p_vmrequest.type = type;
444 /* Connect caller on vmrequest wait queue. */
445 if(!(caller->p_vmrequest.nextrequestor = vmrequest))
446 if(OK != send_sig(VM_PROC_NR, SIGKMEM))
447 panic("send_sig failed");
448 vmrequest = caller;
451 /*===========================================================================*
452 * vm_check_range *
453 *===========================================================================*/
454 int vm_check_range(struct proc *caller, struct proc *target,
455 vir_bytes vir_addr, size_t bytes, int writeflag)
457 /* Public interface to vm_suspend(), for use by kernel calls. On behalf
458 * of 'caller', call into VM to check linear virtual address range of
459 * process 'target', starting at 'vir_addr', for 'bytes' bytes. This
460 * function assumes that it will called twice if VM returned an error
461 * the first time (since nothing has changed in that case), and will
462 * then return the error code resulting from the first call. Upon the
463 * first call, a non-success error code is returned as well.
465 int r;
467 if ((caller->p_misc_flags & MF_KCALL_RESUME) &&
468 (r = caller->p_vmrequest.vmresult) != OK)
469 return r;
471 vm_suspend(caller, target, vir_addr, bytes, VMSTYPE_KERNELCALL,
472 writeflag);
474 return VMSUSPEND;
477 /*===========================================================================*
478 * delivermsg *
479 *===========================================================================*/
480 void delivermsg(struct proc *rp)
482 int r = OK;
484 assert(rp->p_misc_flags & MF_DELIVERMSG);
485 assert(rp->p_delivermsg.m_source != NONE);
487 if (copy_msg_to_user(&rp->p_delivermsg,
488 (message *) rp->p_delivermsg_vir)) {
489 printf("WARNING wrong user pointer 0x%08lx from "
490 "process %s / %d\n",
491 rp->p_delivermsg_vir,
492 rp->p_name,
493 rp->p_endpoint);
494 cause_sig(rp->p_nr, SIGSEGV);
495 r = EFAULT;
498 /* Indicate message has been delivered; address is 'used'. */
499 rp->p_delivermsg.m_source = NONE;
500 rp->p_misc_flags &= ~MF_DELIVERMSG;
502 if(!(rp->p_misc_flags & MF_CONTEXT_SET)) {
503 rp->p_reg.retreg = r;
507 #if 0
508 static char *flagstr(u32_t e, const int dir)
510 static char str[80];
511 strcpy(str, "");
512 FLAG(I386_VM_PRESENT);
513 FLAG(I386_VM_WRITE);
514 FLAG(I386_VM_USER);
515 FLAG(I386_VM_PWT);
516 FLAG(I386_VM_PCD);
517 FLAG(I386_VM_GLOBAL);
518 if(dir)
519 FLAG(I386_VM_BIGPAGE); /* Page directory entry only */
520 else
521 FLAG(I386_VM_DIRTY); /* Page table entry only */
522 return str;
525 static void vm_pt_print(u32_t *pagetable, const u32_t v)
527 int pte;
528 int col = 0;
530 assert(!((u32_t) pagetable % I386_PAGE_SIZE));
532 for(pte = 0; pte < I386_VM_PT_ENTRIES; pte++) {
533 u32_t pte_v, pfa;
534 pte_v = phys_get32((u32_t) (pagetable + pte));
535 if(!(pte_v & I386_VM_PRESENT))
536 continue;
537 pfa = I386_VM_PFA(pte_v);
538 printf("%4d:%08lx:%08lx %2s ",
539 pte, v + I386_PAGE_SIZE*pte, pfa,
540 (pte_v & I386_VM_WRITE) ? "rw":"RO");
541 col++;
542 if(col == 3) { printf("\n"); col = 0; }
544 if(col > 0) printf("\n");
546 return;
549 static void vm_print(u32_t *root)
551 int pde;
553 assert(!((u32_t) root % I386_PAGE_SIZE));
555 printf("page table 0x%lx:\n", root);
557 for(pde = 0; pde < I386_VM_DIR_ENTRIES; pde++) {
558 u32_t pde_v;
559 u32_t *pte_a;
560 pde_v = phys_get32((u32_t) (root + pde));
561 if(!(pde_v & I386_VM_PRESENT))
562 continue;
563 if(pde_v & I386_VM_BIGPAGE) {
564 printf("%4d: 0x%lx, flags %s\n",
565 pde, I386_VM_PFA(pde_v), flagstr(pde_v, 1));
566 } else {
567 pte_a = (u32_t *) I386_VM_PFA(pde_v);
568 printf("%4d: pt %08lx %s\n",
569 pde, pte_a, flagstr(pde_v, 1));
570 vm_pt_print(pte_a, pde * I386_VM_PT_ENTRIES * I386_PAGE_SIZE);
571 printf("\n");
576 return;
578 #endif
580 /*===========================================================================*
581 * vmmemset *
582 *===========================================================================*/
583 int vm_memset(struct proc* caller, endpoint_t who, phys_bytes ph, int c,
584 phys_bytes count)
586 u32_t pattern;
587 struct proc *whoptr = NULL;
588 phys_bytes cur_ph = ph;
589 phys_bytes left = count;
590 phys_bytes ptr, chunk, pfa = 0;
591 int new_cr3, r = OK;
593 if ((r = check_resumed_caller(caller)) != OK)
594 return r;
596 /* NONE for physical, otherwise virtual */
597 if (who != NONE && !(whoptr = endpoint_lookup(who)))
598 return ESRCH;
600 c &= 0xFF;
601 pattern = c | (c << 8) | (c << 16) | (c << 24);
603 assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v);
604 assert(!catch_pagefaults);
605 catch_pagefaults = 1;
607 /* We can memset as many bytes as we have remaining,
608 * or as many as remain in the 4MB chunk we mapped in.
610 while (left > 0) {
611 new_cr3 = 0;
612 chunk = left;
613 ptr = createpde(whoptr, cur_ph, &chunk, 0, &new_cr3);
615 if (new_cr3)
616 reload_cr3();
618 /* If a page fault happens, pfa is non-null */
619 if ((pfa = phys_memset(ptr, pattern, chunk))) {
621 /* If a process pagefaults, VM may help out */
622 if (whoptr) {
623 vm_suspend(caller, whoptr, ph, count,
624 VMSTYPE_KERNELCALL, 1);
625 assert(catch_pagefaults);
626 catch_pagefaults = 0;
627 return VMSUSPEND;
630 /* Pagefault when phys copying ?! */
631 panic("vm_memset: pf %lx addr=%lx len=%lu\n",
632 pfa , ptr, chunk);
635 cur_ph += chunk;
636 left -= chunk;
639 assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v);
640 assert(catch_pagefaults);
641 catch_pagefaults = 0;
643 return OK;
646 /*===========================================================================*
647 * virtual_copy_f *
648 *===========================================================================*/
649 int virtual_copy_f(caller, src_addr, dst_addr, bytes, vmcheck)
650 struct proc * caller;
651 struct vir_addr *src_addr; /* source virtual address */
652 struct vir_addr *dst_addr; /* destination virtual address */
653 vir_bytes bytes; /* # of bytes to copy */
654 int vmcheck; /* if nonzero, can return VMSUSPEND */
656 /* Copy bytes from virtual address src_addr to virtual address dst_addr. */
657 struct vir_addr *vir_addr[2]; /* virtual source and destination address */
658 int i, r;
659 struct proc *procs[2];
661 assert((vmcheck && caller) || (!vmcheck && !caller));
663 /* Check copy count. */
664 if (bytes <= 0) return(EDOM);
666 /* Do some more checks and map virtual addresses to physical addresses. */
667 vir_addr[_SRC_] = src_addr;
668 vir_addr[_DST_] = dst_addr;
670 for (i=_SRC_; i<=_DST_; i++) {
671 endpoint_t proc_e = vir_addr[i]->proc_nr_e;
672 int proc_nr;
673 struct proc *p;
675 if(proc_e == NONE) {
676 p = NULL;
677 } else {
678 if(!isokendpt(proc_e, &proc_nr)) {
679 printf("virtual_copy: no reasonable endpoint\n");
680 return ESRCH;
682 p = proc_addr(proc_nr);
685 procs[i] = p;
688 if ((r = check_resumed_caller(caller)) != OK)
689 return r;
691 if((r=lin_lin_copy(procs[_SRC_], vir_addr[_SRC_]->offset,
692 procs[_DST_], vir_addr[_DST_]->offset, bytes)) != OK) {
693 int writeflag;
694 struct proc *target = NULL;
695 phys_bytes lin;
696 if(r != EFAULT_SRC && r != EFAULT_DST)
697 panic("lin_lin_copy failed: %d", r);
698 if(!vmcheck || !caller) {
699 return r;
702 if(r == EFAULT_SRC) {
703 lin = vir_addr[_SRC_]->offset;
704 target = procs[_SRC_];
705 writeflag = 0;
706 } else if(r == EFAULT_DST) {
707 lin = vir_addr[_DST_]->offset;
708 target = procs[_DST_];
709 writeflag = 1;
710 } else {
711 panic("r strange: %d", r);
714 assert(caller);
715 assert(target);
717 vm_suspend(caller, target, lin, bytes, VMSTYPE_KERNELCALL, writeflag);
718 return VMSUSPEND;
721 return OK;
724 /*===========================================================================*
725 * data_copy *
726 *===========================================================================*/
727 int data_copy(const endpoint_t from_proc, const vir_bytes from_addr,
728 const endpoint_t to_proc, const vir_bytes to_addr,
729 size_t bytes)
731 struct vir_addr src, dst;
733 src.offset = from_addr;
734 dst.offset = to_addr;
735 src.proc_nr_e = from_proc;
736 dst.proc_nr_e = to_proc;
737 assert(src.proc_nr_e != NONE);
738 assert(dst.proc_nr_e != NONE);
740 return virtual_copy(&src, &dst, bytes);
743 /*===========================================================================*
744 * data_copy_vmcheck *
745 *===========================================================================*/
746 int data_copy_vmcheck(struct proc * caller,
747 const endpoint_t from_proc, const vir_bytes from_addr,
748 const endpoint_t to_proc, const vir_bytes to_addr,
749 size_t bytes)
751 struct vir_addr src, dst;
753 src.offset = from_addr;
754 dst.offset = to_addr;
755 src.proc_nr_e = from_proc;
756 dst.proc_nr_e = to_proc;
757 assert(src.proc_nr_e != NONE);
758 assert(dst.proc_nr_e != NONE);
760 return virtual_copy_vmcheck(caller, &src, &dst, bytes);
763 void memory_init(void)
765 assert(nfreepdes == 0);
767 freepdes[nfreepdes++] = kinfo.freepde_start++;
768 freepdes[nfreepdes++] = kinfo.freepde_start++;
770 assert(kinfo.freepde_start < I386_VM_DIR_ENTRIES);
771 assert(nfreepdes == 2);
772 assert(nfreepdes <= MAXFREEPDES);
775 /*===========================================================================*
776 * arch_proc_init *
777 *===========================================================================*/
778 void arch_proc_init(struct proc *pr, const u32_t ip, const u32_t sp,
779 const u32_t ps_str, char *name)
781 arch_proc_reset(pr);
782 strlcpy(pr->p_name, name, sizeof(pr->p_name));
784 /* set custom state we know */
785 pr->p_reg.pc = ip;
786 pr->p_reg.sp = sp;
787 pr->p_reg.bx = ps_str;
790 static int oxpcie_mapping_index = -1,
791 lapic_mapping_index = -1,
792 ioapic_first_index = -1,
793 ioapic_last_index = -1,
794 video_mem_mapping_index = -1,
795 usermapped_glo_index = -1,
796 usermapped_index = -1, first_um_idx = -1;
798 extern char *video_mem;
800 extern char usermapped_start, usermapped_end, usermapped_nonglo_start;
802 int arch_phys_map(const int index,
803 phys_bytes *addr,
804 phys_bytes *len,
805 int *flags)
807 static int first = 1;
808 int freeidx = 0;
809 static char *ser_var = NULL;
810 u32_t glo_len = (u32_t) &usermapped_nonglo_start -
811 (u32_t) &usermapped_start;
813 if(first) {
814 memset(&minix_kerninfo, 0, sizeof(minix_kerninfo));
815 video_mem_mapping_index = freeidx++;
816 if(glo_len > 0) {
817 usermapped_glo_index = freeidx++;
820 usermapped_index = freeidx++;
821 first_um_idx = usermapped_index;
822 if(usermapped_glo_index != -1)
823 first_um_idx = usermapped_glo_index;
825 #ifdef USE_APIC
826 if(lapic_addr)
827 lapic_mapping_index = freeidx++;
828 if (ioapic_enabled) {
829 ioapic_first_index = freeidx;
830 assert(nioapics > 0);
831 freeidx += nioapics;
832 ioapic_last_index = freeidx-1;
834 #endif
836 #ifdef CONFIG_OXPCIE
837 if((ser_var = env_get("oxpcie"))) {
838 if(ser_var[0] != '0' || ser_var[1] != 'x') {
839 printf("oxpcie address in hex please\n");
840 } else {
841 printf("oxpcie address is %s\n", ser_var);
842 oxpcie_mapping_index = freeidx++;
845 #endif
847 first = 0;
850 if(index == usermapped_glo_index) {
851 *addr = vir2phys(&usermapped_start);
852 *len = glo_len;
853 *flags = VMMF_USER | VMMF_GLO;
854 return OK;
856 else if(index == usermapped_index) {
857 *addr = vir2phys(&usermapped_nonglo_start);
858 *len = (u32_t) &usermapped_end -
859 (u32_t) &usermapped_nonglo_start;
860 *flags = VMMF_USER;
861 return OK;
863 else if (index == video_mem_mapping_index) {
864 /* map video memory in so we can print panic messages */
865 *addr = MULTIBOOT_VIDEO_BUFFER;
866 *len = I386_PAGE_SIZE;
867 *flags = VMMF_WRITE;
868 return OK;
870 #ifdef USE_APIC
871 else if (index == lapic_mapping_index) {
872 /* map the local APIC if enabled */
873 if (!lapic_addr)
874 return EINVAL;
875 *addr = lapic_addr;
876 *len = 4 << 10 /* 4kB */;
877 *flags = VMMF_UNCACHED | VMMF_WRITE;
878 return OK;
880 else if (ioapic_enabled && index >= ioapic_first_index && index <= ioapic_last_index) {
881 int ioapic_idx = index - ioapic_first_index;
882 *addr = io_apic[ioapic_idx].paddr;
883 assert(*addr);
884 *len = 4 << 10 /* 4kB */;
885 *flags = VMMF_UNCACHED | VMMF_WRITE;
886 printf("ioapic map: addr 0x%lx\n", *addr);
887 return OK;
889 #endif
891 #if CONFIG_OXPCIE
892 if(index == oxpcie_mapping_index) {
893 *addr = strtoul(ser_var+2, NULL, 16);
894 *len = 0x4000;
895 *flags = VMMF_UNCACHED | VMMF_WRITE;
896 return OK;
898 #endif
900 return EINVAL;
903 int arch_phys_map_reply(const int index, const vir_bytes addr)
905 #ifdef USE_APIC
906 /* if local APIC is enabled */
907 if (index == lapic_mapping_index && lapic_addr) {
908 lapic_addr_vaddr = addr;
909 return OK;
911 else if (ioapic_enabled && index >= ioapic_first_index &&
912 index <= ioapic_last_index) {
913 int i = index - ioapic_first_index;
914 io_apic[i].vaddr = addr;
915 return OK;
917 #endif
919 #if CONFIG_OXPCIE
920 if (index == oxpcie_mapping_index) {
921 oxpcie_set_vaddr((unsigned char *) addr);
922 return OK;
924 #endif
925 if(index == first_um_idx) {
926 extern struct minix_ipcvecs minix_ipcvecs_sysenter,
927 minix_ipcvecs_syscall,
928 minix_ipcvecs_softint;
929 extern u32_t usermapped_offset;
930 assert(addr > (u32_t) &usermapped_start);
931 usermapped_offset = addr - (u32_t) &usermapped_start;
932 #define FIXEDPTR(ptr) (void *) ((u32_t)ptr + usermapped_offset)
933 #define FIXPTR(ptr) ptr = FIXEDPTR(ptr)
934 #define ASSIGN(minixstruct) minix_kerninfo.minixstruct = FIXEDPTR(&minixstruct)
935 ASSIGN(kinfo);
936 ASSIGN(machine);
937 ASSIGN(kmessages);
938 ASSIGN(loadinfo);
940 /* select the right set of IPC routines to map into processes */
941 if(minix_feature_flags & MKF_I386_INTEL_SYSENTER) {
942 printf("kernel: selecting intel sysenter ipc style\n");
943 minix_kerninfo.minix_ipcvecs = &minix_ipcvecs_sysenter;
944 } else if(minix_feature_flags & MKF_I386_AMD_SYSCALL) {
945 printf("kernel: selecting amd syscall ipc style\n");
946 minix_kerninfo.minix_ipcvecs = &minix_ipcvecs_syscall;
947 } else {
948 printf("kernel: selecting fallback (int) ipc style\n");
949 minix_kerninfo.minix_ipcvecs = &minix_ipcvecs_softint;
952 /* adjust the pointers of the functions and the struct
953 * itself to the user-accessible mapping
955 FIXPTR(minix_kerninfo.minix_ipcvecs->send);
956 FIXPTR(minix_kerninfo.minix_ipcvecs->receive);
957 FIXPTR(minix_kerninfo.minix_ipcvecs->sendrec);
958 FIXPTR(minix_kerninfo.minix_ipcvecs->senda);
959 FIXPTR(minix_kerninfo.minix_ipcvecs->sendnb);
960 FIXPTR(minix_kerninfo.minix_ipcvecs->notify);
961 FIXPTR(minix_kerninfo.minix_ipcvecs->do_kernel_call);
962 FIXPTR(minix_kerninfo.minix_ipcvecs);
964 minix_kerninfo.kerninfo_magic = KERNINFO_MAGIC;
965 minix_kerninfo.minix_feature_flags = minix_feature_flags;
966 minix_kerninfo_user = (vir_bytes) FIXEDPTR(&minix_kerninfo);
968 /* if libc_ipc is set, disable usermapped ipc functions
969 * and force binaries to use in-libc fallbacks.
971 if(env_get("libc_ipc")) {
972 printf("kernel: forcing in-libc fallback ipc style\n");
973 minix_kerninfo.minix_ipcvecs = NULL;
974 } else {
975 minix_kerninfo.ki_flags |= MINIX_KIF_IPCVECS;
978 return OK;
981 if(index == usermapped_index) return OK;
983 if (index == video_mem_mapping_index) {
984 video_mem_vaddr = addr;
985 return OK;
988 return EINVAL;
991 int arch_enable_paging(struct proc * caller)
993 assert(caller->p_seg.p_cr3);
995 /* load caller's page table */
996 switch_address_space(caller);
998 video_mem = (char *) video_mem_vaddr;
1000 #ifdef USE_APIC
1001 /* start using the virtual addresses */
1003 /* if local APIC is enabled */
1004 if (lapic_addr) {
1005 lapic_addr = lapic_addr_vaddr;
1006 lapic_eoi_addr = LAPIC_EOI;
1008 /* if IO apics are enabled */
1009 if (ioapic_enabled) {
1010 int i;
1012 for (i = 0; i < nioapics; i++) {
1013 io_apic[i].addr = io_apic[i].vaddr;
1016 #if CONFIG_SMP
1017 barrier();
1019 wait_for_APs_to_finish_booting();
1020 #endif
1021 #endif
1023 #ifdef USE_WATCHDOG
1025 * We make sure that we don't enable the watchdog until paging is turned
1026 * on as we might get an NMI while switching and we might still use wrong
1027 * lapic address. Bad things would happen. It is unfortunate but such is
1028 * life
1030 if (watchdog_enabled)
1031 i386_watchdog_start();
1032 #endif
1034 return OK;
1037 void release_address_space(struct proc *pr)
1039 pr->p_seg.p_cr3_v = NULL;
1042 /* computes a checksum of a buffer of a given length. The byte sum must be zero */
1043 int platform_tbl_checksum_ok(void *ptr, unsigned int length)
1045 u8_t total = 0;
1046 unsigned int i;
1047 for (i = 0; i < length; i++)
1048 total += ((unsigned char *)ptr)[i];
1049 return !total;
1052 int platform_tbl_ptr(phys_bytes start,
1053 phys_bytes end,
1054 unsigned increment,
1055 void * buff,
1056 unsigned size,
1057 phys_bytes * phys_addr,
1058 int ((* cmp_f)(void *)))
1060 phys_bytes addr;
1062 for (addr = start; addr < end; addr += increment) {
1063 phys_copy (addr, (phys_bytes) buff, size);
1064 if (cmp_f(buff)) {
1065 if (phys_addr)
1066 *phys_addr = addr;
1067 return 1;
1070 return 0;