vm: allow split of mem_anon_contig region
[minix3.git] / minix / servers / vm / main.c
blob73cf0e2c422889d6d623169fedf2a43eb52799c6
2 #define _SYSTEM 1
4 #include <minix/callnr.h>
5 #include <minix/com.h>
6 #include <minix/config.h>
7 #include <minix/const.h>
8 #include <minix/ds.h>
9 #include <minix/endpoint.h>
10 #include <minix/minlib.h>
11 #include <minix/type.h>
12 #include <minix/ipc.h>
13 #include <minix/sysutil.h>
14 #include <minix/syslib.h>
15 #include <minix/const.h>
16 #include <minix/bitmap.h>
17 #include <minix/rs.h>
18 #include <minix/vfsif.h>
20 #include <sys/exec.h>
22 #include <libexec.h>
23 #include <ctype.h>
24 #include <errno.h>
25 #include <string.h>
26 #include <env.h>
27 #include <stdio.h>
28 #include <assert.h>
30 #define _MAIN 1
31 #include "glo.h"
32 #include "proto.h"
33 #include "util.h"
34 #include "vm.h"
35 #include "sanitycheck.h"
37 extern int missing_spares;
39 #include <machine/archtypes.h>
40 #include <sys/param.h>
41 #include "kernel/const.h"
42 #include "kernel/config.h"
43 #include "kernel/proc.h"
45 #include <signal.h>
46 #include <lib.h>
48 /* Table of calls and a macro to test for being in range. */
49 struct {
50 int (*vmc_func)(message *); /* Call handles message. */
51 const char *vmc_name; /* Human-readable string. */
52 } vm_calls[NR_VM_CALLS];
54 /* Macro to verify call range and map 'high' range to 'base' range
55 * (starting at 0) in one. Evaluates to zero-based call number if call
56 * number is valid, returns -1 otherwise.
58 #define CALLNUMBER(c) (((c) >= VM_RQ_BASE && \
59 (c) < VM_RQ_BASE + ELEMENTS(vm_calls)) ? \
60 ((c) - VM_RQ_BASE) : -1)
62 static int map_service(struct rprocpub *rpub);
63 static int do_rs_init(message *m);
65 /* SEF functions and variables. */
66 static void sef_cb_signal_handler(int signo);
68 void init_vm(void);
70 /*===========================================================================*
71 * main *
72 *===========================================================================*/
73 int main(void)
75 message msg;
76 int result, who_e, rcv_sts;
77 int caller_slot;
79 /* Initialize system so that all processes are runnable */
80 init_vm();
82 /* Register init callbacks. */
83 sef_setcb_init_restart(sef_cb_init_fail);
84 sef_setcb_signal_handler(sef_cb_signal_handler);
86 /* Let SEF perform startup. */
87 sef_startup();
89 SANITYCHECK(SCL_TOP);
91 /* This is VM's main loop. */
92 while (TRUE) {
93 int r, c;
94 int type;
95 int transid = 0; /* VFS transid if any */
97 SANITYCHECK(SCL_TOP);
98 if(missing_spares > 0) {
99 alloc_cycle(); /* mem alloc code wants to be called */
102 if ((r=sef_receive_status(ANY, &msg, &rcv_sts)) != OK)
103 panic("sef_receive_status() error: %d", r);
105 if (is_ipc_notify(rcv_sts)) {
106 /* Unexpected ipc_notify(). */
107 printf("VM: ignoring ipc_notify() from %d\n", msg.m_source);
108 continue;
110 who_e = msg.m_source;
111 if(vm_isokendpt(who_e, &caller_slot) != OK)
112 panic("invalid caller %d", who_e);
114 /* We depend on this being false for the initialized value. */
115 assert(!IS_VFS_FS_TRANSID(transid));
117 type = msg.m_type;
118 c = CALLNUMBER(type);
119 result = ENOSYS; /* Out of range or restricted calls return this. */
121 transid = TRNS_GET_ID(msg.m_type);
123 if((msg.m_source == VFS_PROC_NR) && IS_VFS_FS_TRANSID(transid)) {
124 /* If it's a request from VFS, it might have a transaction id. */
125 msg.m_type = TRNS_DEL_ID(msg.m_type);
127 /* Calls that use the transid */
128 result = do_procctl(&msg, transid);
129 } else if(msg.m_type == RS_INIT && msg.m_source == RS_PROC_NR) {
130 result = do_rs_init(&msg);
131 } else if (msg.m_type == VM_PAGEFAULT) {
132 if (!IPC_STATUS_FLAGS_TEST(rcv_sts, IPC_FLG_MSG_FROM_KERNEL)) {
133 printf("VM: process %d faked VM_PAGEFAULT "
134 "message!\n", msg.m_source);
136 do_pagefaults(&msg);
138 * do not reply to this call, the caller is unblocked by
139 * a sys_vmctl() call in do_pagefaults if success. VM panics
140 * otherwise
142 continue;
143 } else if(c < 0 || !vm_calls[c].vmc_func) {
144 /* out of range or missing callnr */
145 } else {
146 if (acl_check(&vmproc[caller_slot], c) != OK) {
147 printf("VM: unauthorized %s by %d\n",
148 vm_calls[c].vmc_name, who_e);
149 } else {
150 SANITYCHECK(SCL_FUNCTIONS);
151 result = vm_calls[c].vmc_func(&msg);
152 SANITYCHECK(SCL_FUNCTIONS);
156 /* Send reply message, unless the return code is SUSPEND,
157 * which is a pseudo-result suppressing the reply message.
159 if(result != SUSPEND) {
160 msg.m_type = result;
162 assert(!IS_VFS_FS_TRANSID(transid));
164 if((r=ipc_send(who_e, &msg)) != OK) {
165 printf("VM: couldn't send %d to %d (err %d)\n",
166 msg.m_type, who_e, r);
167 panic("ipc_send() error");
171 return(OK);
174 static int do_rs_init(message *m)
176 int s, i;
177 static struct rprocpub rprocpub[NR_BOOT_PROCS];
179 /* Map all the services in the boot image. */
180 if((s = sys_safecopyfrom(RS_PROC_NR, m->m_rs_init.rproctab_gid, 0,
181 (vir_bytes) rprocpub, sizeof(rprocpub))) != OK) {
182 panic("vm: sys_safecopyfrom (rs) failed: %d", s);
185 for(i=0;i < NR_BOOT_PROCS;i++) {
186 if(rprocpub[i].in_use) {
187 if((s = map_service(&rprocpub[i])) != OK) {
188 panic("unable to map service: %d", s);
193 /* RS expects this response that it then again wants to reply to: */
194 m->m_rs_init.result = OK;
195 ipc_sendrec(RS_PROC_NR, m);
197 return(SUSPEND);
200 static struct vmproc *init_proc(endpoint_t ep_nr)
202 static struct boot_image *ip;
204 for (ip = &kernel_boot_info.boot_procs[0];
205 ip < &kernel_boot_info.boot_procs[NR_BOOT_PROCS]; ip++) {
206 struct vmproc *vmp;
208 if(ip->proc_nr != ep_nr) continue;
210 if(ip->proc_nr >= _NR_PROCS || ip->proc_nr < 0)
211 panic("proc: %d", ip->proc_nr);
213 vmp = &vmproc[ip->proc_nr];
214 assert(!(vmp->vm_flags & VMF_INUSE)); /* no double procs */
215 clear_proc(vmp);
216 vmp->vm_flags = VMF_INUSE;
217 vmp->vm_endpoint = ip->endpoint;
218 vmp->vm_boot = ip;
220 return vmp;
223 panic("no init_proc");
226 struct vm_exec_info {
227 struct exec_info execi;
228 struct boot_image *ip;
229 struct vmproc *vmp;
232 static int libexec_copy_physcopy(struct exec_info *execi,
233 off_t off, vir_bytes vaddr, size_t len)
235 vir_bytes end;
236 struct vm_exec_info *ei = execi->opaque;
237 end = ei->ip->start_addr + ei->ip->len;
238 assert(ei->ip->start_addr + off + len <= end);
239 return sys_physcopy(NONE, ei->ip->start_addr + off,
240 execi->proc_e, vaddr, len, 0);
243 static void boot_alloc(struct exec_info *execi, off_t vaddr,
244 size_t len, int flags)
246 struct vmproc *vmp = ((struct vm_exec_info *) execi->opaque)->vmp;
248 if(!(map_page_region(vmp, vaddr, 0, len,
249 VR_ANON | VR_WRITABLE | VR_UNINITIALIZED, flags,
250 &mem_type_anon))) {
251 panic("VM: exec: map_page_region for boot process failed");
255 static int libexec_alloc_vm_prealloc(struct exec_info *execi,
256 vir_bytes vaddr, size_t len)
258 boot_alloc(execi, vaddr, len, MF_PREALLOC);
259 return OK;
262 static int libexec_alloc_vm_ondemand(struct exec_info *execi,
263 vir_bytes vaddr, size_t len)
265 boot_alloc(execi, vaddr, len, 0);
266 return OK;
269 static void exec_bootproc(struct vmproc *vmp, struct boot_image *ip)
271 struct vm_exec_info vmexeci;
272 struct exec_info *execi = &vmexeci.execi;
273 char hdr[VM_PAGE_SIZE];
275 size_t frame_size = 0; /* Size of the new initial stack. */
276 int argc = 0; /* Argument count. */
277 int envc = 0; /* Environment count */
278 char overflow = 0; /* No overflow yet. */
279 struct ps_strings *psp;
281 int vsp = 0; /* (virtual) Stack pointer in new address space. */
282 char *argv[] = { ip->proc_name, NULL };
283 char *envp[] = { NULL };
284 char *path = ip->proc_name;
285 char frame[VM_PAGE_SIZE];
287 memset(&vmexeci, 0, sizeof(vmexeci));
289 if(pt_new(&vmp->vm_pt) != OK)
290 panic("VM: no new pagetable");
292 if(pt_bind(&vmp->vm_pt, vmp) != OK)
293 panic("VM: pt_bind failed");
295 if(sys_physcopy(NONE, ip->start_addr, SELF,
296 (vir_bytes) hdr, sizeof(hdr), 0) != OK)
297 panic("can't look at boot proc header");
299 execi->stack_high = kernel_boot_info.user_sp;
300 execi->stack_size = DEFAULT_STACK_LIMIT;
301 execi->proc_e = vmp->vm_endpoint;
302 execi->hdr = hdr;
303 execi->hdr_len = sizeof(hdr);
304 strlcpy(execi->progname, ip->proc_name, sizeof(execi->progname));
305 execi->frame_len = 0;
306 execi->opaque = &vmexeci;
307 execi->filesize = ip->len;
309 vmexeci.ip = ip;
310 vmexeci.vmp = vmp;
312 /* callback functions and data */
313 execi->copymem = libexec_copy_physcopy;
314 execi->clearproc = NULL;
315 execi->clearmem = libexec_clear_sys_memset;
316 execi->allocmem_prealloc_junk = libexec_alloc_vm_prealloc;
317 execi->allocmem_prealloc_cleared = libexec_alloc_vm_prealloc;
318 execi->allocmem_ondemand = libexec_alloc_vm_ondemand;
320 if (libexec_load_elf(execi) != OK)
321 panic("vm: boot process load of process %s (ep=%d) failed\n",
322 execi->progname, vmp->vm_endpoint);
324 /* Setup a minimal stack. */
325 minix_stack_params(path, argv, envp, &frame_size, &overflow, &argc,
326 &envc);
328 /* The party is off if there is an overflow, or it is too big for our
329 * pre-allocated space. */
330 if(overflow || frame_size > sizeof(frame))
331 panic("vm: could not alloc stack for boot process %s (ep=%d)\n",
332 execi->progname, vmp->vm_endpoint);
334 minix_stack_fill(path, argc, argv, envc, envp, frame_size, frame, &vsp,
335 &psp);
337 if(handle_memory_once(vmp, vsp, frame_size, 1) != OK)
338 panic("vm: could not map stack for boot process %s (ep=%d)\n",
339 execi->progname, vmp->vm_endpoint);
341 if(sys_datacopy(SELF, (vir_bytes)frame, vmp->vm_endpoint, vsp, frame_size) != OK)
342 panic("vm: could not copy stack for boot process %s (ep=%d)\n",
343 execi->progname, vmp->vm_endpoint);
345 if(sys_exec(vmp->vm_endpoint, (vir_bytes)vsp,
346 (vir_bytes)execi->progname, execi->pc,
347 vsp + ((int)psp - (int)frame)) != OK)
348 panic("vm: boot process exec of process %s (ep=%d) failed\n",
349 execi->progname,vmp->vm_endpoint);
351 /* make it runnable */
352 if(sys_vmctl(vmp->vm_endpoint, VMCTL_BOOTINHIBIT_CLEAR, 0) != OK)
353 panic("VMCTL_BOOTINHIBIT_CLEAR failed");
356 static int do_procctl_notrans(message *msg)
358 int transid = 0;
360 assert(!IS_VFS_FS_TRANSID(transid));
362 return do_procctl(msg, transid);
365 void init_vm(void)
367 int s, i;
368 static struct memory mem_chunks[NR_MEMS];
369 static struct boot_image *ip;
370 extern void __minix_init(void);
371 multiboot_module_t *mod;
372 vir_bytes kern_dyn, kern_static;
374 #if SANITYCHECKS
375 incheck = nocheck = 0;
376 #endif
378 /* Retrieve various crucial boot parameters */
379 if(OK != (s=sys_getkinfo(&kernel_boot_info))) {
380 panic("couldn't get bootinfo: %d", s);
383 /* Turn file mmap on? */
384 enable_filemap=1; /* yes by default */
385 env_parse("filemap", "d", 0, &enable_filemap, 0, 1);
387 /* Sanity check */
388 assert(kernel_boot_info.mmap_size > 0);
389 assert(kernel_boot_info.mods_with_kernel > 0);
391 /* Get chunks of available memory. */
392 get_mem_chunks(mem_chunks);
394 /* Set table to 0. This invalidates all slots (clear VMF_INUSE). */
395 memset(vmproc, 0, sizeof(vmproc));
397 for(i = 0; i < ELEMENTS(vmproc); i++) {
398 vmproc[i].vm_slot = i;
401 /* Initialize ACL data structures. */
402 acl_init();
404 /* region management initialization. */
405 map_region_init();
407 /* Initialize tables to all physical memory. */
408 mem_init(mem_chunks);
410 /* Architecture-dependent initialization. */
411 init_proc(VM_PROC_NR);
412 pt_init();
414 /* Acquire kernel ipc vectors that weren't available
415 * before VM had determined kernel mappings
417 __minix_init();
419 /* The kernel's freelist does not include boot-time modules; let
420 * the allocator know that the total memory is bigger.
422 for (mod = &kernel_boot_info.module_list[0];
423 mod < &kernel_boot_info.module_list[kernel_boot_info.mods_with_kernel-1]; mod++) {
424 phys_bytes len = mod->mod_end-mod->mod_start+1;
425 len = roundup(len, VM_PAGE_SIZE);
426 mem_add_total_pages(len/VM_PAGE_SIZE);
429 kern_dyn = kernel_boot_info.kernel_allocated_bytes_dynamic;
430 kern_static = kernel_boot_info.kernel_allocated_bytes;
431 kern_static = roundup(kern_static, VM_PAGE_SIZE);
432 mem_add_total_pages((kern_dyn + kern_static)/VM_PAGE_SIZE);
434 /* Give these processes their own page table. */
435 for (ip = &kernel_boot_info.boot_procs[0];
436 ip < &kernel_boot_info.boot_procs[NR_BOOT_PROCS]; ip++) {
437 struct vmproc *vmp;
439 if(ip->proc_nr < 0) continue;
441 assert(ip->start_addr);
443 /* VM has already been set up by the kernel and pt_init().
444 * Any other boot process is already in memory and is set up
445 * here.
447 if(ip->proc_nr == VM_PROC_NR) continue;
449 vmp = init_proc(ip->proc_nr);
451 exec_bootproc(vmp, ip);
453 /* Free the file blob */
454 assert(!(ip->start_addr % VM_PAGE_SIZE));
455 ip->len = roundup(ip->len, VM_PAGE_SIZE);
456 free_mem(ABS2CLICK(ip->start_addr), ABS2CLICK(ip->len));
459 /* Set up table of calls. */
460 #define CALLMAP(code, func) { int _cmi; \
461 _cmi=CALLNUMBER(code); \
462 assert(_cmi >= 0); \
463 assert(_cmi < NR_VM_CALLS); \
464 vm_calls[_cmi].vmc_func = (func); \
465 vm_calls[_cmi].vmc_name = #code; \
468 /* Set call table to 0. This invalidates all calls (clear
469 * vmc_func).
471 memset(vm_calls, 0, sizeof(vm_calls));
473 /* Basic VM calls. */
474 CALLMAP(VM_MMAP, do_mmap);
475 CALLMAP(VM_MUNMAP, do_munmap);
476 CALLMAP(VM_MAP_PHYS, do_map_phys);
477 CALLMAP(VM_UNMAP_PHYS, do_munmap);
479 /* Calls from PM. */
480 CALLMAP(VM_EXIT, do_exit);
481 CALLMAP(VM_FORK, do_fork);
482 CALLMAP(VM_BRK, do_brk);
483 CALLMAP(VM_WILLEXIT, do_willexit);
484 CALLMAP(VM_NOTIFY_SIG, do_notify_sig);
486 CALLMAP(VM_PROCCTL, do_procctl_notrans);
488 /* Calls from VFS. */
489 CALLMAP(VM_VFS_REPLY, do_vfs_reply);
490 CALLMAP(VM_VFS_MMAP, do_vfs_mmap);
492 /* Calls from RS */
493 CALLMAP(VM_RS_SET_PRIV, do_rs_set_priv);
494 CALLMAP(VM_RS_UPDATE, do_rs_update);
495 CALLMAP(VM_RS_MEMCTL, do_rs_memctl);
497 /* Generic calls. */
498 CALLMAP(VM_REMAP, do_remap);
499 CALLMAP(VM_REMAP_RO, do_remap);
500 CALLMAP(VM_GETPHYS, do_get_phys);
501 CALLMAP(VM_SHM_UNMAP, do_munmap);
502 CALLMAP(VM_GETREF, do_get_refcount);
503 CALLMAP(VM_INFO, do_info);
504 CALLMAP(VM_QUERY_EXIT, do_query_exit);
505 CALLMAP(VM_WATCH_EXIT, do_watch_exit);
507 /* Cache blocks. */
508 CALLMAP(VM_MAPCACHEPAGE, do_mapcache);
509 CALLMAP(VM_SETCACHEPAGE, do_setcache);
510 CALLMAP(VM_CLEARCACHE, do_clearcache);
512 /* getrusage */
513 CALLMAP(VM_GETRUSAGE, do_getrusage);
515 /* Initialize the structures for queryexit */
516 init_query_exit();
519 /*===========================================================================*
520 * sef_cb_signal_handler *
521 *===========================================================================*/
522 static void sef_cb_signal_handler(int signo)
524 /* Check for known kernel signals, ignore anything else. */
525 switch(signo) {
526 /* There is a pending memory request from the kernel. */
527 case SIGKMEM:
528 do_memory();
529 break;
532 /* It can happen that we get stuck receiving signals
533 * without sef_receive() returning. We could need more memory
534 * though.
536 if(missing_spares > 0) {
537 alloc_cycle(); /* pagetable code wants to be called */
540 pt_clearmapcache();
543 /*===========================================================================*
544 * map_service *
545 *===========================================================================*/
546 static int map_service(struct rprocpub *rpub)
548 /* Map a new service by initializing its call mask. */
549 int r, proc_nr;
551 if ((r = vm_isokendpt(rpub->endpoint, &proc_nr)) != OK) {
552 return r;
555 /* Copy the call mask. */
556 acl_set(&vmproc[proc_nr], rpub->vm_call_mask, !IS_RPUB_BOOT_USR(rpub));
558 return(OK);