2 * Copyright (c) 2006 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vm/vm_vmspace.c,v 1.12 2007/06/29 21:54:15 dillon Exp $
38 #include <sys/param.h>
39 #include <sys/kernel.h>
40 #include <sys/systm.h>
41 #include <sys/sysproto.h>
42 #include <sys/kern_syscall.h>
45 #include <sys/malloc.h>
46 #include <sys/sysctl.h>
47 #include <sys/vkernel.h>
48 #include <sys/vmspace.h>
50 #include <vm/vm_extern.h>
54 #include <machine/vmparam.h>
56 #include <sys/spinlock2.h>
57 #include <sys/sysref2.h>
59 static struct vmspace_entry
*vkernel_find_vmspace(struct vkernel_common
*vc
,
61 static void vmspace_entry_delete(struct vmspace_entry
*ve
,
62 struct vkernel_common
*vc
);
64 static MALLOC_DEFINE(M_VKERNEL
, "vkernel", "VKernel structures");
67 * vmspace_create (void *id, int type, void *data)
69 * Create a VMSPACE under the control of the caller with the specified id.
70 * An id of NULL cannot be used. The type and data fields must currently
73 * The vmspace starts out completely empty. Memory may be mapped into the
74 * VMSPACE with vmspace_mmap() and MAP_VPAGETABLE section(s) controlled
75 * with vmspace_mcontrol().
78 sys_vmspace_create(struct vmspace_create_args
*uap
)
80 struct vkernel_common
*vc
;
81 struct vmspace_entry
*ve
;
84 if (vkernel_enable
== 0)
88 * Create a virtual kernel side-structure for the process if one
91 if ((vk
= curproc
->p_vkernel
) == NULL
) {
92 vk
= kmalloc(sizeof(*vk
), M_VKERNEL
, M_WAITOK
|M_ZERO
);
93 vc
= kmalloc(sizeof(*vc
), M_VKERNEL
, M_WAITOK
|M_ZERO
);
95 spin_init(&vc
->vc_spin
);
96 RB_INIT(&vc
->vc_root
);
98 curproc
->p_vkernel
= vk
;
103 * Create a new VMSPACE
105 if (vkernel_find_vmspace(vc
, uap
->id
))
107 ve
= kmalloc(sizeof(struct vmspace_entry
), M_VKERNEL
, M_WAITOK
|M_ZERO
);
108 ve
->vmspace
= vmspace_alloc(VM_MIN_USER_ADDRESS
, VM_MAX_USER_ADDRESS
);
110 pmap_pinit2(vmspace_pmap(ve
->vmspace
));
111 RB_INSERT(vmspace_rb_tree
, &vc
->vc_root
, ve
);
116 * vmspace_destroy (void *id)
121 sys_vmspace_destroy(struct vmspace_destroy_args
*uap
)
123 struct vkernel_common
*vc
;
124 struct vmspace_entry
*ve
;
127 if ((vk
= curproc
->p_vkernel
) == NULL
)
130 if ((ve
= vkernel_find_vmspace(vc
, uap
->id
)) == NULL
)
134 vmspace_entry_delete(ve
, vc
);
139 * vmspace_ctl (void *id, int cmd, struct trapframe *tframe,
140 * struct vextframe *vframe);
142 * Transfer control to a VMSPACE. Control is returned after the specified
143 * number of microseconds or if a page fault, signal, trap, or system call
144 * occurs. The context is updated as appropriate.
147 sys_vmspace_ctl(struct vmspace_ctl_args
*uap
)
149 struct vkernel_common
*vc
;
150 struct vmspace_entry
*ve
;
157 lwp
= curthread
->td_lwp
;
160 if ((vk
= p
->p_vkernel
) == NULL
)
163 if ((ve
= vkernel_find_vmspace(vc
, uap
->id
)) == NULL
)
167 * Signal mailbox interlock
169 if (p
->p_flag
& P_MAILBOX
) {
170 p
->p_flag
&= ~P_MAILBOX
;
175 case VMSPACE_CTL_RUN
:
177 * Save the caller's register context, swap VM spaces, and
178 * install the passed register context. Return with
179 * EJUSTRETURN so the syscall code doesn't adjust the context.
182 framesz
= sizeof(struct trapframe
);
183 vk
->vk_user_trapframe
= uap
->tframe
;
184 vk
->vk_user_vextframe
= uap
->vframe
;
185 bcopy(uap
->sysmsg_frame
, &vk
->vk_save_trapframe
, framesz
);
186 bcopy(&curthread
->td_tls
, &vk
->vk_save_vextframe
.vx_tls
,
187 sizeof(vk
->vk_save_vextframe
.vx_tls
));
188 error
= copyin(uap
->tframe
, uap
->sysmsg_frame
, framesz
);
190 error
= copyin(&uap
->vframe
->vx_tls
, &curthread
->td_tls
, sizeof(struct savetls
));
192 error
= cpu_sanitize_frame(uap
->sysmsg_frame
);
194 error
= cpu_sanitize_tls(&curthread
->td_tls
);
196 bcopy(&vk
->vk_save_trapframe
, uap
->sysmsg_frame
, framesz
);
197 bcopy(&vk
->vk_save_vextframe
.vx_tls
, &curthread
->td_tls
,
198 sizeof(vk
->vk_save_vextframe
.vx_tls
));
203 pmap_setlwpvm(lwp
, ve
->vmspace
);
205 set_vkernel_fp(uap
->sysmsg_frame
);
217 * vmspace_mmap(id, addr, len, prot, flags, fd, offset)
219 * map memory within a VMSPACE. This function is just like a normal mmap()
220 * but operates on the vmspace's memory map. Most callers use this to create
221 * a MAP_VPAGETABLE mapping.
224 sys_vmspace_mmap(struct vmspace_mmap_args
*uap
)
226 struct vkernel_common
*vc
;
227 struct vmspace_entry
*ve
;
231 if ((vk
= curproc
->p_vkernel
) == NULL
)
234 if ((ve
= vkernel_find_vmspace(vc
, uap
->id
)) == NULL
)
236 error
= kern_mmap(ve
->vmspace
, uap
->addr
, uap
->len
,
237 uap
->prot
, uap
->flags
,
238 uap
->fd
, uap
->offset
, &uap
->sysmsg_resultp
);
243 * vmspace_munmap(id, addr, len)
245 * unmap memory within a VMSPACE.
248 sys_vmspace_munmap(struct vmspace_munmap_args
*uap
)
250 struct vkernel_common
*vc
;
251 struct vmspace_entry
*ve
;
254 vm_size_t size
, pageoff
;
257 if ((vk
= curproc
->p_vkernel
) == NULL
)
260 if ((ve
= vkernel_find_vmspace(vc
, uap
->id
)) == NULL
)
264 * Copied from sys_munmap()
266 addr
= (vm_offset_t
)uap
->addr
;
269 pageoff
= (addr
& PAGE_MASK
);
272 size
= (vm_size_t
)round_page(size
);
273 if (addr
+ size
< addr
)
278 if (VM_MAX_USER_ADDRESS
> 0 && addr
+ size
> VM_MAX_USER_ADDRESS
)
280 if (VM_MIN_USER_ADDRESS
> 0 && addr
< VM_MIN_USER_ADDRESS
)
282 map
= &ve
->vmspace
->vm_map
;
283 if (!vm_map_check_protection(map
, addr
, addr
+ size
, VM_PROT_NONE
))
285 vm_map_remove(map
, addr
, addr
+ size
);
290 * vmspace_pread(id, buf, nbyte, flags, offset)
292 * Read data from a vmspace. The number of bytes read is returned or
293 * -1 if an unrecoverable error occured. If the number of bytes read is
294 * less then the request size, a page fault occured in the VMSPACE which
295 * the caller must resolve in order to proceed.
298 sys_vmspace_pread(struct vmspace_pread_args
*uap
)
300 struct vkernel_common
*vc
;
301 struct vmspace_entry
*ve
;
304 if ((vk
= curproc
->p_vkernel
) == NULL
)
307 if ((ve
= vkernel_find_vmspace(vc
, uap
->id
)) == NULL
)
313 * vmspace_pwrite(id, buf, nbyte, flags, offset)
315 * Write data to a vmspace. The number of bytes written is returned or
316 * -1 if an unrecoverable error occured. If the number of bytes written is
317 * less then the request size, a page fault occured in the VMSPACE which
318 * the caller must resolve in order to proceed.
321 sys_vmspace_pwrite(struct vmspace_pwrite_args
*uap
)
323 struct vkernel_common
*vc
;
324 struct vmspace_entry
*ve
;
327 if ((vk
= curproc
->p_vkernel
) == NULL
)
330 if ((ve
= vkernel_find_vmspace(vc
, uap
->id
)) == NULL
)
336 * vmspace_mcontrol(id, addr, len, behav, value)
338 * madvise/mcontrol support for a vmspace.
341 sys_vmspace_mcontrol(struct vmspace_mcontrol_args
*uap
)
343 struct vkernel_common
*vc
;
344 struct vmspace_entry
*ve
;
346 vm_offset_t start
, end
;
348 if ((vk
= curproc
->p_vkernel
) == NULL
)
351 if ((ve
= vkernel_find_vmspace(vc
, uap
->id
)) == NULL
)
355 * This code is basically copied from sys_mcontrol()
357 if (uap
->behav
< 0 || uap
->behav
> MADV_CONTROL_END
)
360 if (VM_MAX_USER_ADDRESS
> 0 &&
361 ((vm_offset_t
) uap
->addr
+ uap
->len
) > VM_MAX_USER_ADDRESS
)
363 if (VM_MIN_USER_ADDRESS
> 0 && uap
->addr
< VM_MIN_USER_ADDRESS
)
365 if (((vm_offset_t
) uap
->addr
+ uap
->len
) < (vm_offset_t
) uap
->addr
)
368 start
= trunc_page((vm_offset_t
) uap
->addr
);
369 end
= round_page((vm_offset_t
) uap
->addr
+ uap
->len
);
371 return (vm_map_madvise(&ve
->vmspace
->vm_map
, start
, end
,
372 uap
->behav
, uap
->value
));
376 * Red black tree functions
378 static int rb_vmspace_compare(struct vmspace_entry
*, struct vmspace_entry
*);
379 RB_GENERATE(vmspace_rb_tree
, vmspace_entry
, rb_entry
, rb_vmspace_compare
);
381 /* a->start is address, and the only field has to be initialized */
383 rb_vmspace_compare(struct vmspace_entry
*a
, struct vmspace_entry
*b
)
385 if ((char *)a
->id
< (char *)b
->id
)
387 else if ((char *)a
->id
> (char *)b
->id
)
394 rb_vmspace_delete(struct vmspace_entry
*ve
, void *data
)
396 struct vkernel_common
*vc
= data
;
398 KKASSERT(ve
->refs
== 0);
399 vmspace_entry_delete(ve
, vc
);
404 * Remove a vmspace_entry from the RB tree and destroy it. We have to clean
405 * up the pmap, the vm_map, then destroy the vmspace.
409 vmspace_entry_delete(struct vmspace_entry
*ve
, struct vkernel_common
*vc
)
411 RB_REMOVE(vmspace_rb_tree
, &vc
->vc_root
, ve
);
413 pmap_remove_pages(vmspace_pmap(ve
->vmspace
),
414 VM_MIN_USER_ADDRESS
, VM_MAX_USER_ADDRESS
);
415 vm_map_remove(&ve
->vmspace
->vm_map
,
416 VM_MIN_USER_ADDRESS
, VM_MAX_USER_ADDRESS
);
417 sysref_put(&ve
->vmspace
->vm_sysref
);
418 kfree(ve
, M_VKERNEL
);
423 struct vmspace_entry
*
424 vkernel_find_vmspace(struct vkernel_common
*vc
, void *id
)
426 struct vmspace_entry
*ve
;
427 struct vmspace_entry key
;
430 ve
= RB_FIND(vmspace_rb_tree
, &vc
->vc_root
, &key
);
435 * Manage vkernel refs, used by the kernel when fork()ing or exit()ing
439 vkernel_inherit(struct proc
*p1
, struct proc
*p2
)
441 struct vkernel_common
*vc
;
446 KKASSERT(vc
->vc_refs
> 0);
447 atomic_add_int(&vc
->vc_refs
, 1);
448 vk
= kmalloc(sizeof(*vk
), M_VKERNEL
, M_WAITOK
|M_ZERO
);
454 vkernel_exit(struct proc
*p
)
456 struct vkernel_common
*vc
;
457 struct vmspace_entry
*ve
;
465 vk
->vk_common
= NULL
;
468 * Restore the original VM context if we are killed while running
471 * This isn't supposed to happen. What is supposed to happen is
472 * that the process should enter vkernel_trap() before the handling
475 LIST_FOREACH(lp
, &p
->p_lwps
, lwp_list
) {
476 if ((ve
= lp
->lwp_ve
) != NULL
) {
477 kprintf("Warning, pid %d killed with active VC!\n",
480 db_print_backtrace();
483 pmap_setlwpvm(lp
, p
->p_vmspace
);
484 KKASSERT(ve
->refs
> 0);
490 * Dereference the common area
492 KKASSERT(vc
->vc_refs
> 0);
493 spin_lock_wr(&vc
->vc_spin
);
494 if (--vc
->vc_refs
== 0)
496 spin_unlock_wr(&vc
->vc_spin
);
499 RB_SCAN(vmspace_rb_tree
, &vc
->vc_root
, NULL
,
500 rb_vmspace_delete
, vc
);
501 kfree(vc
, M_VKERNEL
);
503 kfree(vk
, M_VKERNEL
);
507 * A VM space under virtual kernel control trapped out or made a system call
508 * or otherwise needs to return control to the virtual kernel context.
511 vkernel_trap(struct lwp
*lp
, struct trapframe
*frame
)
513 struct proc
*p
= lp
->lwp_proc
;
514 struct vmspace_entry
*ve
;
519 * Which vmspace entry was running?
523 KKASSERT(ve
!= NULL
);
526 * Switch the LWP vmspace back to the virtual kernel's VM space.
529 pmap_setlwpvm(lp
, p
->p_vmspace
);
530 KKASSERT(ve
->refs
> 0);
534 * Copy the emulated process frame to the virtual kernel process.
535 * The emulated process cannot change TLS descriptors so don't
536 * bother saving them, we already have a copy.
538 * Restore the virtual kernel's saved context so the virtual kernel
539 * process can resume.
541 error
= copyout(frame
, vk
->vk_user_trapframe
, sizeof(*frame
));
542 bcopy(&vk
->vk_save_trapframe
, frame
, sizeof(*frame
));
543 bcopy(&vk
->vk_save_vextframe
.vx_tls
, &curthread
->td_tls
,
544 sizeof(vk
->vk_save_vextframe
.vx_tls
));