1 /* $NetBSD: rump.c,v 1.147 2009/12/09 00:11:21 pooka Exp $ */
4 * Copyright (c) 2007 Antti Kantee. All Rights Reserved.
6 * Development of this software was supported by Google Summer of Code.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: rump.c,v 1.147 2009/12/09 00:11:21 pooka Exp $");
33 #include <sys/param.h>
34 #include <sys/atomic.h>
36 #include <sys/callout.h>
39 #include <sys/device.h>
40 #include <sys/evcnt.h>
41 #include <sys/event.h>
42 #include <sys/exec_elf.h>
43 #include <sys/filedesc.h>
44 #include <sys/iostat.h>
45 #include <sys/kauth.h>
46 #include <sys/kernel.h>
48 #include <sys/kprintf.h>
49 #include <sys/ksyms.h>
50 #include <sys/msgbuf.h>
51 #include <sys/module.h>
53 #include <sys/percpu.h>
55 #include <sys/queue.h>
56 #include <sys/reboot.h>
57 #include <sys/resourcevar.h>
58 #include <sys/select.h>
59 #include <sys/sysctl.h>
60 #include <sys/syscall.h>
62 #include <sys/uidinfo.h>
64 #include <sys/xcall.h>
66 #include <rump/rumpuser.h>
68 #include <secmodel/suser/suser.h>
70 #include <prop/proplib.h>
72 #include <uvm/uvm_readahead.h>
74 #include "rump_private.h"
75 #include "rump_net_private.h"
76 #include "rump_vfs_private.h"
77 #include "rump_dev_private.h"
80 struct session rump_session
= {
84 .s_login
= "rumphobo",
87 struct pgrp rump_pgrp
= {
88 .pg_members
= LIST_HEAD_INITIALIZER(pg_members
),
89 .pg_session
= &rump_session
,
92 struct pstats rump_stats
;
93 struct plimit rump_limits
;
94 struct filedesc rump_filedesc0
;
95 struct proclist allproc
;
96 char machine
[] = "rump";
97 static kauth_cred_t rump_susercred
;
99 /* pretend the master rump proc is init */
100 struct proc
*initproc
= &proc0
;
102 struct rumpuser_mtx
*rump_giantlock
;
104 sigset_t sigcantmask
;
106 struct device rump_rootdev
= {
107 .dv_class
= DV_VIRTUAL
110 #ifdef RUMP_WITHOUT_THREADS
111 int rump_threads
= 0;
113 int rump_threads
= 1;
117 rump_aiodone_worker(struct work
*wk
, void *dummy
)
119 struct buf
*bp
= (struct buf
*)wk
;
121 KASSERT(&bp
->b_work
== wk
);
125 static int rump_inited
;
126 static struct emul emul_rump
= {
127 .e_vm_default_addr
= uvm_default_mapaddr
,
130 int rump__unavailable(void);
131 int rump__unavailable() {return EOPNOTSUPP
;}
132 __weak_alias(rump_net_init
,rump__unavailable
);
133 __weak_alias(rump_vfs_init
,rump__unavailable
);
134 __weak_alias(rump_dev_init
,rump__unavailable
);
136 __weak_alias(rump_vfs_fini
,rump__unavailable
);
138 __weak_alias(biodone
,rump__unavailable
);
139 __weak_alias(sopoll
,rump__unavailable
);
141 void rump__unavailable_vfs_panic(void);
142 void rump__unavailable_vfs_panic() {panic("vfs component not available");}
143 __weak_alias(usermount_common_policy
,rump__unavailable_vfs_panic
);
145 rump_proc_vfs_init_fn rump_proc_vfs_init
;
146 rump_proc_vfs_release_fn rump_proc_vfs_release
;
149 * Stir up the stack a bit. These are exported functions to help
150 * convince the compiler that we don't want these routines completely
151 * optimized out or inlined. Is there an easier way to do this?
153 void nullfn(uint32_t *);
154 void nullfn(uint32_t *arg
){}
155 void messthestack(void);
163 for (i
= 0; i
< 64; i
++) {
164 rumpuser_gettime(&d1
, &d2
, &error
);
171 rump__init(int rump_version
)
182 else if (rump_inited
== -1)
183 panic("rump_init: host process restart required");
187 /* Print some silly banners for spammy bootstrap. */
188 if (boothowto
& AB_VERBOSE
) {
189 printf("%s%s", copyright
, version
);
193 * Seed arc4random() with a "reasonable" amount of randomness.
194 * Yes, this is a quick kludge which depends on the arc4random
200 if (rump_version
!= RUMP_VERSION
) {
201 printf("rump version mismatch, %d vs. %d\n",
202 rump_version
, RUMP_VERSION
);
203 return EPROGMISMATCH
;
206 if (rumpuser_getenv("RUMP_THREADS", buf
, sizeof(buf
), &error
) == 0) {
207 rump_threads
= *buf
!= '0';
209 rumpuser_thrinit(rump_user_schedule
, rump_user_unschedule
,
213 /* init minimal lwp/cpu context */
217 rumpuser_set_curlwp(l
);
219 mutex_init(&tty_lock
, MUTEX_DEFAULT
, IPL_NONE
);
220 rumpuser_mutex_recursive_init(&rump_giantlock
);
228 pool_subsystem_init();
240 rump_susercred
= rump_cred_create(0, 0, 0, NULL
);
242 /* init proc0 and rest of lwp0 now that we can allocate memory */
244 p
->p_stats
= &rump_stats
;
245 p
->p_limit
= &rump_limits
;
246 p
->p_pgrp
= &rump_pgrp
;
248 p
->p_fd
= &rump_filedesc0
;
249 p
->p_vmspace
= &rump_vmspace
;
250 p
->p_emul
= &emul_rump
;
251 p
->p_lock
= mutex_obj_alloc(MUTEX_DEFAULT
, IPL_NONE
);
252 l
->l_cred
= rump_cred_suserget();
255 LIST_INSERT_HEAD(&allproc
, &proc0
, p_list
);
256 proc_lock
= mutex_obj_alloc(MUTEX_DEFAULT
, IPL_NONE
);
258 rump_limits
.pl_rlimit
[RLIMIT_FSIZE
].rlim_cur
= RLIM_INFINITY
;
259 rump_limits
.pl_rlimit
[RLIMIT_NOFILE
].rlim_cur
= RLIM_INFINITY
;
260 rump_limits
.pl_rlimit
[RLIMIT_SBSIZE
].rlim_cur
= RLIM_INFINITY
;
262 rump_scheduler_init();
263 /* revert temporary context and schedule a real context */
265 rumpuser_set_curlwp(NULL
);
268 /* we are mostly go. do per-cpu subsystem init */
269 for (i
= 0; i
< ncpu
; i
++) {
270 struct cpu_info
*ci
= cpu_lookup(i
);
272 callout_init_cpu(ci
);
275 pool_cache_cpu_init(ci
);
289 /* these do nothing if not present */
297 if (workqueue_create(&uvm
.aiodone_queue
, "aiodoned",
298 rump_aiodone_worker
, NULL
, 0, 0, WQ_MPSAFE
))
304 rumpuser_dl_module_bootstrap(rump_module_init
, rump_kernelfsym_load
);
306 rumpuser_gethostname(hostname
, MAXHOSTNAMELEN
, &error
);
307 hostnamelen
= strlen(hostname
);
309 sigemptyset(&sigcantmask
);
311 lwp0
.l_fd
= proc0
.p_fd
= fd_init(&rump_filedesc0
);
321 /* maybe support sys_reboot some day for remote shutdown */
323 rump_reboot(int howto
)
326 /* dump means we really take the dive here */
327 if ((howto
& RB_DUMP
) || panicstr
) {
328 rumpuser_exit(RUMPUSER_PANIC
);
333 if (!((howto
& RB_NOSYNC
) || panicstr
)) {
337 /* your wish is my command */
338 if (howto
& RB_HALT
) {
340 uint64_t sec
= 5, nsec
= 0;
343 rumpuser_nanosleep(&sec
, &nsec
, &error
);
350 rump_uio_setup(void *buf
, size_t bufsize
, off_t offset
, enum rump_uiorw rw
)
363 panic("%s: invalid rw %d", __func__
, rw
);
366 uio
= kmem_alloc(sizeof(struct uio
), KM_SLEEP
);
367 uio
->uio_iov
= kmem_alloc(sizeof(struct iovec
), KM_SLEEP
);
369 uio
->uio_iov
->iov_base
= buf
;
370 uio
->uio_iov
->iov_len
= bufsize
;
373 uio
->uio_offset
= offset
;
374 uio
->uio_resid
= bufsize
;
376 uio
->uio_vmspace
= UIO_VMSPACE_SYS
;
382 rump_uio_getresid(struct uio
*uio
)
385 return uio
->uio_resid
;
389 rump_uio_getoff(struct uio
*uio
)
392 return uio
->uio_offset
;
396 rump_uio_free(struct uio
*uio
)
400 resid
= uio
->uio_resid
;
401 kmem_free(uio
->uio_iov
, sizeof(*uio
->uio_iov
));
402 kmem_free(uio
, sizeof(*uio
));
407 static pid_t nextpid
= 1;
409 rump_newproc_switch()
414 mypid
= atomic_inc_uint_nv(&nextpid
);
415 if (__predict_false(mypid
== 0))
416 mypid
= atomic_inc_uint_nv(&nextpid
);
418 l
= rump_lwp_alloc(mypid
, 0);
425 rump_lwp_alloc_and_switch(pid_t pid
, lwpid_t lid
)
429 l
= rump_lwp_alloc(pid
, lid
);
436 rump_lwp_alloc(pid_t pid
, lwpid_t lid
)
441 l
= kmem_zalloc(sizeof(*l
), KM_SLEEP
);
443 p
= kmem_zalloc(sizeof(*p
), KM_SLEEP
);
444 if (rump_proc_vfs_init
)
445 rump_proc_vfs_init(p
);
446 p
->p_stats
= &rump_stats
;
447 p
->p_limit
= &rump_limits
;
449 p
->p_vmspace
= &rump_vmspace
;
450 p
->p_emul
= &emul_rump
;
451 p
->p_fd
= fd_init(NULL
);
452 p
->p_lock
= mutex_obj_alloc(MUTEX_DEFAULT
, IPL_NONE
);
453 l
->l_cred
= rump_cred_suserget();
456 l
->l_cred
= rump_susercred
;
468 rump_lwp_switch(struct lwp
*newlwp
)
470 struct lwp
*l
= curlwp
;
472 rumpuser_set_curlwp(NULL
);
473 newlwp
->l_cpu
= l
->l_cpu
;
474 newlwp
->l_mutex
= l
->l_mutex
;
477 rumpuser_set_curlwp(newlwp
);
478 if (l
->l_flag
& LW_WEXIT
)
482 /* XXX: this has effect only on non-pid0 lwps */
484 rump_lwp_release(struct lwp
*l
)
490 mutex_obj_free(p
->p_lock
);
492 if (rump_proc_vfs_release
)
493 rump_proc_vfs_release(p
);
494 rump_cred_put(l
->l_cred
);
495 kmem_free(p
, sizeof(*p
));
497 KASSERT((l
->l_flag
& LW_WEXIT
) == 0);
498 l
->l_flag
|= LW_WEXIT
;
502 rump_lwp_free(struct lwp
*l
)
505 KASSERT(l
->l_flag
& LW_WEXIT
);
506 KASSERT(l
->l_mutex
== NULL
);
507 kmem_free(l
, sizeof(*l
));
511 rump_lwp_curlwp(void)
513 struct lwp
*l
= curlwp
;
515 if (l
->l_flag
& LW_WEXIT
)
520 /* rump private. NEEDS WORK! */
522 rump_set_vmspace(struct vmspace
*vm
)
524 struct proc
*p
= curproc
;
530 rump_cred_create(uid_t uid
, gid_t gid
, size_t ngroups
, gid_t
*groups
)
535 cred
= kauth_cred_alloc();
536 kauth_cred_setuid(cred
, uid
);
537 kauth_cred_seteuid(cred
, uid
);
538 kauth_cred_setsvuid(cred
, uid
);
539 kauth_cred_setgid(cred
, gid
);
540 kauth_cred_setgid(cred
, gid
);
541 kauth_cred_setegid(cred
, gid
);
542 kauth_cred_setsvgid(cred
, gid
);
543 rv
= kauth_cred_setgroups(cred
, groups
, ngroups
, 0, UIO_SYSSPACE
);
544 /* oh this is silly. and by "this" I mean kauth_cred_setgroups() */
551 rump_cred_put(kauth_cred_t cred
)
554 kauth_cred_free(cred
);
558 rump_cred_suserget(void)
561 kauth_cred_hold(rump_susercred
);
562 return rump_susercred
;
566 * Return the next system lwpid
573 mutex_enter(proc0
.p_lock
);
575 * Take next one, don't return 0
576 * XXX: most likely we'll have collisions in case this
579 if (++proc0
.p_nlwpid
== 0)
581 retid
= proc0
.p_nlwpid
;
582 mutex_exit(proc0
.p_lock
);
587 #define ERROUT(err) do { rv = err; goto out; } while (/*CONSTCOND*/0)
589 rump_module_init(struct modinfo
*mi
, prop_dictionary_t props
)
595 if (mi
->mi_name
== NULL
)
598 mutex_enter(&module_lock
);
599 if (module_lookup(mi
->mi_name
))
602 if (!module_compatible(mi
->mi_version
, __NetBSD_Version__
))
603 ERROUT(EPROGMISMATCH
);
605 rv
= mi
->mi_modcmd(MODULE_CMD_INIT
, props
);
607 mod
= kmem_zalloc(sizeof(*mod
), KM_SLEEP
);
610 if (mi
->mi_class
== MODULE_CLASS_SECMODEL
)
615 mutex_exit(&module_lock
);
620 rump_module_fini(struct modinfo
*mi
)
624 rv
= mi
->mi_modcmd(MODULE_CMD_FINI
, NULL
);
625 if (rv
== 0 && mi
->mi_class
== MODULE_CLASS_SECMODEL
)
626 secmodel_deregister();
632 rump_kernelfsym_load(void *symtab
, uint64_t symsize
,
633 char *strtab
, uint64_t strsize
)
635 static int inited
= 0;
643 * Use 64bit header since it's bigger. Shouldn't make a
644 * difference, since we're passing in all zeroes anyway.
646 memset(&ehdr
, 0, sizeof(ehdr
));
647 ksyms_addsyms_explicit(&ehdr
, symtab
, symsize
, strtab
, strsize
);
653 rump_sysproxy_local(int num
, void *arg
, uint8_t *data
, size_t dlen
,
657 struct sysent
*callp
;
660 if (__predict_false(num
>= SYS_NSYSENT
))
663 callp
= rump_sysent
+ num
;
666 rv
= callp
->sy_call(l
, (void *)data
, retval
);
680 rump_boot_sethowto(int howto
)
686 rump_sysproxy_t rump_sysproxy
= rump_sysproxy_local
;
687 void *rump_sysproxy_arg
;
690 * This whole syscall-via-rpc is still taking form. For example, it
691 * may be necessary to set syscalls individually instead of lobbing
692 * them all to the same place. So don't think this interface is
696 rump_sysproxy_set(rump_sysproxy_t proxy
, void *arg
)
699 if (rump_sysproxy_arg
)
702 rump_sysproxy_arg
= arg
;
703 rump_sysproxy
= proxy
;
709 rump_getversion(void)
712 return __NetBSD_Version__
;