1 /* $NetBSD: sysv_shm.c,v 1.116 2009/03/06 20:31:54 joerg Exp $ */
4 * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Mindaugas Rasiukevicius.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
34 * Copyright (c) 1994 Adam Glass and Charles M. Hannum. All rights reserved.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by Adam Glass and Charles M.
48 * 4. The names of the authors may not be used to endorse or promote products
49 * derived from this software without specific prior written permission.
51 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
52 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
53 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
54 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
55 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
56 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
57 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
58 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
59 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
60 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: sysv_shm.c,v 1.116 2009/03/06 20:31:54 joerg Exp $");
68 #include <sys/param.h>
69 #include <sys/kernel.h>
72 #include <sys/mutex.h>
75 #include <sys/sysctl.h>
76 #include <sys/mount.h> /* XXX for <sys/syscallargs.h> */
77 #include <sys/syscallargs.h>
78 #include <sys/queue.h>
80 #include <sys/kauth.h>
82 #include <uvm/uvm_extern.h>
83 #include <uvm/uvm_object.h>
86 struct shmid_ds
*shmsegs
;
89 SLIST_ENTRY(shmmap_entry
) next
;
94 static kmutex_t shm_lock
;
95 static kcondvar_t
* shm_cv
;
96 static struct pool shmmap_entry_pool
;
97 static int shm_last_free
, shm_use_phys
;
98 static size_t shm_committed
;
100 static kcondvar_t shm_realloc_cv
;
101 static bool shm_realloc_state
;
102 static u_int shm_realloc_disable
;
104 struct shmmap_state
{
107 SLIST_HEAD(, shmmap_entry
) entries
;
111 #define SHMPRINTF(a) printf a
116 static int shmrealloc(int);
119 * Find the shared memory segment by the identifier.
120 * => must be called with shm_lock held;
122 static struct shmid_ds
*
123 shm_find_segment_by_shmid(int shmid
)
126 struct shmid_ds
*shmseg
;
128 KASSERT(mutex_owned(&shm_lock
));
130 segnum
= IPCID_TO_IX(shmid
);
131 if (segnum
< 0 || segnum
>= shminfo
.shmmni
)
133 shmseg
= &shmsegs
[segnum
];
134 if ((shmseg
->shm_perm
.mode
& SHMSEG_ALLOCATED
) == 0)
136 if ((shmseg
->shm_perm
.mode
&
137 (SHMSEG_REMOVED
|SHMSEG_RMLINGER
)) == SHMSEG_REMOVED
)
139 if (shmseg
->shm_perm
._seq
!= IPCID_TO_SEQ(shmid
))
146 * Free memory segment.
147 * => must be called with shm_lock held;
150 shm_free_segment(int segnum
)
152 struct shmid_ds
*shmseg
;
156 KASSERT(mutex_owned(&shm_lock
));
158 shmseg
= &shmsegs
[segnum
];
159 SHMPRINTF(("shm freeing key 0x%lx seq 0x%x\n",
160 shmseg
->shm_perm
._key
, shmseg
->shm_perm
._seq
));
162 size
= (shmseg
->shm_segsz
+ PGOFSET
) & ~PGOFSET
;
163 wanted
= (shmseg
->shm_perm
.mode
& SHMSEG_WANTED
);
165 shmseg
->_shm_internal
= NULL
;
166 shm_committed
-= btoc(size
);
168 shmseg
->shm_perm
.mode
= SHMSEG_FREE
;
169 shm_last_free
= segnum
;
171 cv_broadcast(&shm_cv
[segnum
]);
175 * Delete entry from the shm map.
176 * => must be called with shm_lock held;
178 static struct uvm_object
*
179 shm_delete_mapping(struct shmmap_state
*shmmap_s
,
180 struct shmmap_entry
*shmmap_se
)
182 struct uvm_object
*uobj
= NULL
;
183 struct shmid_ds
*shmseg
;
186 KASSERT(mutex_owned(&shm_lock
));
188 segnum
= IPCID_TO_IX(shmmap_se
->shmid
);
189 shmseg
= &shmsegs
[segnum
];
190 SLIST_REMOVE(&shmmap_s
->entries
, shmmap_se
, shmmap_entry
, next
);
192 shmseg
->shm_dtime
= time_second
;
193 if ((--shmseg
->shm_nattch
<= 0) &&
194 (shmseg
->shm_perm
.mode
& SHMSEG_REMOVED
)) {
195 uobj
= shmseg
->_shm_internal
;
196 shm_free_segment(segnum
);
203 * Get a non-shared shm map for that vmspace. Note, that memory
204 * allocation might be performed with lock held.
206 static struct shmmap_state
*
207 shmmap_getprivate(struct proc
*p
)
209 struct shmmap_state
*oshmmap_s
, *shmmap_s
;
210 struct shmmap_entry
*oshmmap_se
, *shmmap_se
;
212 KASSERT(mutex_owned(&shm_lock
));
214 /* 1. A shm map with refcnt = 1, used by ourselves, thus return */
215 oshmmap_s
= (struct shmmap_state
*)p
->p_vmspace
->vm_shm
;
216 if (oshmmap_s
&& oshmmap_s
->nrefs
== 1)
219 /* 2. No shm map preset - create a fresh one */
220 shmmap_s
= kmem_zalloc(sizeof(struct shmmap_state
), KM_SLEEP
);
222 SLIST_INIT(&shmmap_s
->entries
);
223 p
->p_vmspace
->vm_shm
= (void *)shmmap_s
;
225 if (oshmmap_s
== NULL
)
228 SHMPRINTF(("shmmap_getprivate: vm %p split (%d entries), was used by %d\n",
229 p
->p_vmspace
, oshmmap_s
->nitems
, oshmmap_s
->nrefs
));
231 /* 3. A shared shm map, copy to a fresh one and adjust refcounts */
232 SLIST_FOREACH(oshmmap_se
, &oshmmap_s
->entries
, next
) {
233 shmmap_se
= pool_get(&shmmap_entry_pool
, PR_WAITOK
);
234 shmmap_se
->va
= oshmmap_se
->va
;
235 shmmap_se
->shmid
= oshmmap_se
->shmid
;
236 SLIST_INSERT_HEAD(&shmmap_s
->entries
, shmmap_se
, next
);
238 shmmap_s
->nitems
= oshmmap_s
->nitems
;
245 * Lock/unlock the memory.
246 * => must be called with shm_lock held;
247 * => called from one place, thus, inline;
250 shm_memlock(struct lwp
*l
, struct shmid_ds
*shmseg
, int shmid
, int cmd
)
252 struct proc
*p
= l
->l_proc
;
253 struct shmmap_entry
*shmmap_se
;
254 struct shmmap_state
*shmmap_s
;
258 KASSERT(mutex_owned(&shm_lock
));
259 shmmap_s
= shmmap_getprivate(p
);
261 /* Find our shared memory address by shmid */
262 SLIST_FOREACH(shmmap_se
, &shmmap_s
->entries
, next
) {
263 if (shmmap_se
->shmid
!= shmid
)
266 size
= (shmseg
->shm_segsz
+ PGOFSET
) & ~PGOFSET
;
268 if (cmd
== SHM_LOCK
&&
269 (shmseg
->shm_perm
.mode
& SHMSEG_WIRED
) == 0) {
270 /* Wire the object and map, then tag it */
271 error
= uobj_wirepages(shmseg
->_shm_internal
, 0, size
);
274 error
= uvm_map_pageable(&p
->p_vmspace
->vm_map
,
275 shmmap_se
->va
, shmmap_se
->va
+ size
, false, 0);
277 uobj_unwirepages(shmseg
->_shm_internal
, 0, size
);
282 shmseg
->shm_perm
.mode
|= SHMSEG_WIRED
;
284 } else if (cmd
== SHM_UNLOCK
&&
285 (shmseg
->shm_perm
.mode
& SHMSEG_WIRED
) != 0) {
286 /* Unwire the object and map, then untag it */
287 uobj_unwirepages(shmseg
->_shm_internal
, 0, size
);
288 error
= uvm_map_pageable(&p
->p_vmspace
->vm_map
,
289 shmmap_se
->va
, shmmap_se
->va
+ size
, true, 0);
292 shmseg
->shm_perm
.mode
&= ~SHMSEG_WIRED
;
300 * Unmap shared memory.
303 sys_shmdt(struct lwp
*l
, const struct sys_shmdt_args
*uap
, register_t
*retval
)
306 syscallarg(const void *) shmaddr;
308 struct proc
*p
= l
->l_proc
;
309 struct shmmap_state
*shmmap_s1
, *shmmap_s
;
310 struct shmmap_entry
*shmmap_se
;
311 struct uvm_object
*uobj
;
312 struct shmid_ds
*shmseg
;
315 mutex_enter(&shm_lock
);
316 /* In case of reallocation, we will wait for completion */
317 while (__predict_false(shm_realloc_state
))
318 cv_wait(&shm_realloc_cv
, &shm_lock
);
320 shmmap_s1
= (struct shmmap_state
*)p
->p_vmspace
->vm_shm
;
321 if (shmmap_s1
== NULL
) {
322 mutex_exit(&shm_lock
);
326 /* Find the map entry */
327 SLIST_FOREACH(shmmap_se
, &shmmap_s1
->entries
, next
)
328 if (shmmap_se
->va
== (vaddr_t
)SCARG(uap
, shmaddr
))
330 if (shmmap_se
== NULL
) {
331 mutex_exit(&shm_lock
);
335 shmmap_s
= shmmap_getprivate(p
);
336 if (shmmap_s
!= shmmap_s1
) {
337 /* Map has been copied, lookup entry in new map */
338 SLIST_FOREACH(shmmap_se
, &shmmap_s
->entries
, next
)
339 if (shmmap_se
->va
== (vaddr_t
)SCARG(uap
, shmaddr
))
341 if (shmmap_se
== NULL
) {
342 mutex_exit(&shm_lock
);
347 SHMPRINTF(("shmdt: vm %p: remove %d @%lx\n",
348 p
->p_vmspace
, shmmap_se
->shmid
, shmmap_se
->va
));
350 /* Delete the entry from shm map */
351 uobj
= shm_delete_mapping(shmmap_s
, shmmap_se
);
352 shmseg
= &shmsegs
[IPCID_TO_IX(shmmap_se
->shmid
)];
353 size
= (shmseg
->shm_segsz
+ PGOFSET
) & ~PGOFSET
;
354 mutex_exit(&shm_lock
);
356 uvm_deallocate(&p
->p_vmspace
->vm_map
, shmmap_se
->va
, size
);
359 pool_put(&shmmap_entry_pool
, shmmap_se
);
368 sys_shmat(struct lwp
*l
, const struct sys_shmat_args
*uap
, register_t
*retval
)
371 syscallarg(int) shmid;
372 syscallarg(const void *) shmaddr;
373 syscallarg(int) shmflg;
375 int error
, flags
= 0;
376 struct proc
*p
= l
->l_proc
;
377 kauth_cred_t cred
= l
->l_cred
;
378 struct shmid_ds
*shmseg
;
379 struct shmmap_state
*shmmap_s
;
380 struct shmmap_entry
*shmmap_se
;
381 struct uvm_object
*uobj
;
387 /* Allocate a new map entry and set it */
388 shmmap_se
= pool_get(&shmmap_entry_pool
, PR_WAITOK
);
389 shmmap_se
->shmid
= SCARG(uap
, shmid
);
391 mutex_enter(&shm_lock
);
392 /* In case of reallocation, we will wait for completion */
393 while (__predict_false(shm_realloc_state
))
394 cv_wait(&shm_realloc_cv
, &shm_lock
);
396 shmseg
= shm_find_segment_by_shmid(SCARG(uap
, shmid
));
397 if (shmseg
== NULL
) {
401 error
= ipcperm(cred
, &shmseg
->shm_perm
,
402 (SCARG(uap
, shmflg
) & SHM_RDONLY
) ? IPC_R
: IPC_R
|IPC_W
);
407 shmmap_s
= (struct shmmap_state
*)vm
->vm_shm
;
408 if (shmmap_s
&& shmmap_s
->nitems
>= shminfo
.shmseg
) {
413 size
= (shmseg
->shm_segsz
+ PGOFSET
) & ~PGOFSET
;
415 if ((SCARG(uap
, shmflg
) & SHM_RDONLY
) == 0)
416 prot
|= VM_PROT_WRITE
;
417 if (SCARG(uap
, shmaddr
)) {
418 flags
|= UVM_FLAG_FIXED
;
419 if (SCARG(uap
, shmflg
) & SHM_RND
)
421 (vaddr_t
)SCARG(uap
, shmaddr
) & ~(SHMLBA
-1);
422 else if (((vaddr_t
)SCARG(uap
, shmaddr
) & (SHMLBA
-1)) == 0)
423 attach_va
= (vaddr_t
)SCARG(uap
, shmaddr
);
429 /* This is just a hint to uvm_map() about where to put it. */
430 attach_va
= p
->p_emul
->e_vm_default_addr(p
,
431 (vaddr_t
)vm
->vm_daddr
, size
);
435 * Create a map entry, add it to the list and increase the counters.
436 * The lock will be dropped before the mapping, disable reallocation.
438 shmmap_s
= shmmap_getprivate(p
);
439 SLIST_INSERT_HEAD(&shmmap_s
->entries
, shmmap_se
, next
);
441 shmseg
->shm_lpid
= p
->p_pid
;
442 shmseg
->shm_nattch
++;
443 shm_realloc_disable
++;
444 mutex_exit(&shm_lock
);
447 * Add a reference to the memory object, map it to the
448 * address space, and lock the memory, if needed.
450 uobj
= shmseg
->_shm_internal
;
452 error
= uvm_map(&vm
->vm_map
, &attach_va
, size
, uobj
, 0, 0,
453 UVM_MAPFLAG(prot
, prot
, UVM_INH_SHARE
, UVM_ADV_RANDOM
, flags
));
456 if (shm_use_phys
|| (shmseg
->shm_perm
.mode
& SHMSEG_WIRED
)) {
457 error
= uvm_map_pageable(&vm
->vm_map
, attach_va
,
458 attach_va
+ size
, false, 0);
462 uvm_deallocate(&vm
->vm_map
, attach_va
, size
);
467 /* Set the new address, and update the time */
468 mutex_enter(&shm_lock
);
469 shmmap_se
->va
= attach_va
;
470 shmseg
->shm_atime
= time_second
;
471 shm_realloc_disable
--;
472 retval
[0] = attach_va
;
473 SHMPRINTF(("shmat: vm %p: add %d @%lx\n",
474 p
->p_vmspace
, shmmap_se
->shmid
, attach_va
));
476 cv_broadcast(&shm_realloc_cv
);
477 mutex_exit(&shm_lock
);
478 if (error
&& shmmap_se
)
479 pool_put(&shmmap_entry_pool
, shmmap_se
);
484 mutex_enter(&shm_lock
);
485 uobj
= shm_delete_mapping(shmmap_s
, shmmap_se
);
486 shm_realloc_disable
--;
487 cv_broadcast(&shm_realloc_cv
);
488 mutex_exit(&shm_lock
);
491 pool_put(&shmmap_entry_pool
, shmmap_se
);
496 * Shared memory control operations.
499 sys___shmctl50(struct lwp
*l
, const struct sys___shmctl50_args
*uap
,
503 syscallarg(int) shmid;
505 syscallarg(struct shmid_ds *) buf;
507 struct shmid_ds shmbuf
;
510 cmd
= SCARG(uap
, cmd
);
511 if (cmd
== IPC_SET
) {
512 error
= copyin(SCARG(uap
, buf
), &shmbuf
, sizeof(shmbuf
));
517 error
= shmctl1(l
, SCARG(uap
, shmid
), cmd
,
518 (cmd
== IPC_SET
|| cmd
== IPC_STAT
) ? &shmbuf
: NULL
);
520 if (error
== 0 && cmd
== IPC_STAT
)
521 error
= copyout(&shmbuf
, SCARG(uap
, buf
), sizeof(shmbuf
));
527 shmctl1(struct lwp
*l
, int shmid
, int cmd
, struct shmid_ds
*shmbuf
)
529 struct uvm_object
*uobj
= NULL
;
530 kauth_cred_t cred
= l
->l_cred
;
531 struct shmid_ds
*shmseg
;
534 mutex_enter(&shm_lock
);
535 /* In case of reallocation, we will wait for completion */
536 while (__predict_false(shm_realloc_state
))
537 cv_wait(&shm_realloc_cv
, &shm_lock
);
539 shmseg
= shm_find_segment_by_shmid(shmid
);
540 if (shmseg
== NULL
) {
541 mutex_exit(&shm_lock
);
547 if ((error
= ipcperm(cred
, &shmseg
->shm_perm
, IPC_R
)) != 0)
549 memcpy(shmbuf
, shmseg
, sizeof(struct shmid_ds
));
552 if ((error
= ipcperm(cred
, &shmseg
->shm_perm
, IPC_M
)) != 0)
554 shmseg
->shm_perm
.uid
= shmbuf
->shm_perm
.uid
;
555 shmseg
->shm_perm
.gid
= shmbuf
->shm_perm
.gid
;
556 shmseg
->shm_perm
.mode
=
557 (shmseg
->shm_perm
.mode
& ~ACCESSPERMS
) |
558 (shmbuf
->shm_perm
.mode
& ACCESSPERMS
);
559 shmseg
->shm_ctime
= time_second
;
562 if ((error
= ipcperm(cred
, &shmseg
->shm_perm
, IPC_M
)) != 0)
564 shmseg
->shm_perm
._key
= IPC_PRIVATE
;
565 shmseg
->shm_perm
.mode
|= SHMSEG_REMOVED
;
566 if (shmseg
->shm_nattch
<= 0) {
567 uobj
= shmseg
->_shm_internal
;
568 shm_free_segment(IPCID_TO_IX(shmid
));
573 if ((error
= kauth_authorize_generic(cred
,
574 KAUTH_GENERIC_ISSUSER
, NULL
)) != 0)
576 error
= shm_memlock(l
, shmseg
, shmid
, cmd
);
582 mutex_exit(&shm_lock
);
589 * Try to take an already existing segment.
590 * => must be called with shm_lock held;
591 * => called from one place, thus, inline;
594 shmget_existing(struct lwp
*l
, const struct sys_shmget_args
*uap
, int mode
,
597 struct shmid_ds
*shmseg
;
598 kauth_cred_t cred
= l
->l_cred
;
601 KASSERT(mutex_owned(&shm_lock
));
603 /* Find segment by key */
604 for (segnum
= 0; segnum
< shminfo
.shmmni
; segnum
++)
605 if ((shmsegs
[segnum
].shm_perm
.mode
& SHMSEG_ALLOCATED
) &&
606 shmsegs
[segnum
].shm_perm
._key
== SCARG(uap
, key
))
608 if (segnum
== shminfo
.shmmni
) {
613 shmseg
= &shmsegs
[segnum
];
614 if (shmseg
->shm_perm
.mode
& SHMSEG_REMOVED
) {
616 * This segment is in the process of being allocated. Wait
617 * until it's done, and look the key up again (in case the
618 * allocation failed or it was freed).
620 shmseg
->shm_perm
.mode
|= SHMSEG_WANTED
;
621 error
= cv_wait_sig(&shm_cv
[segnum
], &shm_lock
);
628 * First check the flags, to generate a useful error when a
629 * segment already exists.
631 if ((SCARG(uap
, shmflg
) & (IPC_CREAT
| IPC_EXCL
)) ==
632 (IPC_CREAT
| IPC_EXCL
))
635 /* Check the permission and segment size. */
636 error
= ipcperm(cred
, &shmseg
->shm_perm
, mode
);
639 if (SCARG(uap
, size
) && SCARG(uap
, size
) > shmseg
->shm_segsz
)
642 *retval
= IXSEQ_TO_IPCID(segnum
, shmseg
->shm_perm
);
647 sys_shmget(struct lwp
*l
, const struct sys_shmget_args
*uap
, register_t
*retval
)
650 syscallarg(key_t) key;
651 syscallarg(size_t) size;
652 syscallarg(int) shmflg;
654 struct shmid_ds
*shmseg
;
655 kauth_cred_t cred
= l
->l_cred
;
656 key_t key
= SCARG(uap
, key
);
658 int error
, mode
, segnum
;
661 mode
= SCARG(uap
, shmflg
) & ACCESSPERMS
;
662 if (SCARG(uap
, shmflg
) & _SHM_RMLINGER
)
663 mode
|= SHMSEG_RMLINGER
;
665 SHMPRINTF(("shmget: key 0x%lx size 0x%x shmflg 0x%x mode 0x%x\n",
666 SCARG(uap
, key
), SCARG(uap
, size
), SCARG(uap
, shmflg
), mode
));
668 mutex_enter(&shm_lock
);
669 /* In case of reallocation, we will wait for completion */
670 while (__predict_false(shm_realloc_state
))
671 cv_wait(&shm_realloc_cv
, &shm_lock
);
673 if (key
!= IPC_PRIVATE
) {
674 error
= shmget_existing(l
, uap
, mode
, retval
);
676 mutex_exit(&shm_lock
);
679 if ((SCARG(uap
, shmflg
) & IPC_CREAT
) == 0) {
680 mutex_exit(&shm_lock
);
687 * Check the for the limits.
689 size
= SCARG(uap
, size
);
690 if (size
< shminfo
.shmmin
|| size
> shminfo
.shmmax
) {
691 mutex_exit(&shm_lock
);
694 if (shm_nused
>= shminfo
.shmmni
) {
695 mutex_exit(&shm_lock
);
698 size
= (size
+ PGOFSET
) & ~PGOFSET
;
699 if (shm_committed
+ btoc(size
) > shminfo
.shmall
) {
700 mutex_exit(&shm_lock
);
704 /* Find the first available segment */
705 if (shm_last_free
< 0) {
706 for (segnum
= 0; segnum
< shminfo
.shmmni
; segnum
++)
707 if (shmsegs
[segnum
].shm_perm
.mode
& SHMSEG_FREE
)
709 KASSERT(segnum
< shminfo
.shmmni
);
711 segnum
= shm_last_free
;
716 * Initialize the segment.
717 * We will drop the lock while allocating the memory, thus mark the
718 * segment present, but removed, that no other thread could take it.
719 * Also, disable reallocation, while lock is dropped.
721 shmseg
= &shmsegs
[segnum
];
722 shmseg
->shm_perm
.mode
= SHMSEG_ALLOCATED
| SHMSEG_REMOVED
;
723 shm_committed
+= btoc(size
);
725 lockmem
= shm_use_phys
;
726 shm_realloc_disable
++;
727 mutex_exit(&shm_lock
);
729 /* Allocate the memory object and lock it if needed */
730 shmseg
->_shm_internal
= uao_create(size
, 0);
732 /* Wire the pages and tag it */
733 error
= uobj_wirepages(shmseg
->_shm_internal
, 0, size
);
735 uao_detach(shmseg
->_shm_internal
);
736 mutex_enter(&shm_lock
);
737 shm_free_segment(segnum
);
738 shm_realloc_disable
--;
739 mutex_exit(&shm_lock
);
745 * Please note, while segment is marked, there are no need to hold the
746 * lock, while setting it (except shm_perm.mode).
748 shmseg
->shm_perm
._key
= SCARG(uap
, key
);
749 shmseg
->shm_perm
._seq
= (shmseg
->shm_perm
._seq
+ 1) & 0x7fff;
750 *retval
= IXSEQ_TO_IPCID(segnum
, shmseg
->shm_perm
);
752 shmseg
->shm_perm
.cuid
= shmseg
->shm_perm
.uid
= kauth_cred_geteuid(cred
);
753 shmseg
->shm_perm
.cgid
= shmseg
->shm_perm
.gid
= kauth_cred_getegid(cred
);
754 shmseg
->shm_segsz
= SCARG(uap
, size
);
755 shmseg
->shm_cpid
= l
->l_proc
->p_pid
;
756 shmseg
->shm_lpid
= shmseg
->shm_nattch
= 0;
757 shmseg
->shm_atime
= shmseg
->shm_dtime
= 0;
758 shmseg
->shm_ctime
= time_second
;
761 * Segment is initialized.
762 * Enter the lock, mark as allocated, and notify waiters (if any).
763 * Also, unmark the state of reallocation.
765 mutex_enter(&shm_lock
);
766 shmseg
->shm_perm
.mode
= (shmseg
->shm_perm
.mode
& SHMSEG_WANTED
) |
767 (mode
& (ACCESSPERMS
| SHMSEG_RMLINGER
)) |
768 SHMSEG_ALLOCATED
| (lockmem
? SHMSEG_WIRED
: 0);
769 if (shmseg
->shm_perm
.mode
& SHMSEG_WANTED
) {
770 shmseg
->shm_perm
.mode
&= ~SHMSEG_WANTED
;
771 cv_broadcast(&shm_cv
[segnum
]);
773 shm_realloc_disable
--;
774 cv_broadcast(&shm_realloc_cv
);
775 mutex_exit(&shm_lock
);
781 shmfork(struct vmspace
*vm1
, struct vmspace
*vm2
)
783 struct shmmap_state
*shmmap_s
;
784 struct shmmap_entry
*shmmap_se
;
786 SHMPRINTF(("shmfork %p->%p\n", vm1
, vm2
));
787 mutex_enter(&shm_lock
);
788 vm2
->vm_shm
= vm1
->vm_shm
;
790 shmmap_s
= (struct shmmap_state
*)vm1
->vm_shm
;
791 SLIST_FOREACH(shmmap_se
, &shmmap_s
->entries
, next
)
792 shmsegs
[IPCID_TO_IX(shmmap_se
->shmid
)].shm_nattch
++;
795 mutex_exit(&shm_lock
);
799 shmexit(struct vmspace
*vm
)
801 struct shmmap_state
*shmmap_s
;
802 struct shmmap_entry
*shmmap_se
;
804 mutex_enter(&shm_lock
);
805 shmmap_s
= (struct shmmap_state
*)vm
->vm_shm
;
806 if (shmmap_s
== NULL
) {
807 mutex_exit(&shm_lock
);
812 if (--shmmap_s
->nrefs
> 0) {
813 SHMPRINTF(("shmexit: vm %p drop ref (%d entries), refs = %d\n",
814 vm
, shmmap_s
->nitems
, shmmap_s
->nrefs
));
815 SLIST_FOREACH(shmmap_se
, &shmmap_s
->entries
, next
) {
816 shmsegs
[IPCID_TO_IX(shmmap_se
->shmid
)].shm_nattch
--;
818 mutex_exit(&shm_lock
);
822 SHMPRINTF(("shmexit: vm %p cleanup (%d entries)\n", vm
, shmmap_s
->nitems
));
823 if (shmmap_s
->nitems
== 0) {
824 mutex_exit(&shm_lock
);
825 kmem_free(shmmap_s
, sizeof(struct shmmap_state
));
830 * Delete the entry from shm map.
833 struct shmid_ds
*shmseg
;
834 struct uvm_object
*uobj
;
837 shmmap_se
= SLIST_FIRST(&shmmap_s
->entries
);
838 KASSERT(shmmap_se
!= NULL
);
840 shmseg
= &shmsegs
[IPCID_TO_IX(shmmap_se
->shmid
)];
841 sz
= (shmseg
->shm_segsz
+ PGOFSET
) & ~PGOFSET
;
842 /* shm_delete_mapping() removes from the list. */
843 uobj
= shm_delete_mapping(shmmap_s
, shmmap_se
);
844 mutex_exit(&shm_lock
);
846 uvm_deallocate(&vm
->vm_map
, shmmap_se
->va
, sz
);
850 pool_put(&shmmap_entry_pool
, shmmap_se
);
852 if (SLIST_EMPTY(&shmmap_s
->entries
)) {
855 mutex_enter(&shm_lock
);
856 KASSERT(!SLIST_EMPTY(&shmmap_s
->entries
));
858 kmem_free(shmmap_s
, sizeof(struct shmmap_state
));
862 shmrealloc(int newshmni
)
865 struct shmid_ds
*oldshmsegs
, *newshmsegs
;
866 kcondvar_t
*newshm_cv
, *oldshm_cv
;
868 int i
, lsegid
, oldshmni
;
873 /* Allocate new memory area */
874 sz
= ALIGN(newshmni
* sizeof(struct shmid_ds
)) +
875 ALIGN(newshmni
* sizeof(kcondvar_t
));
876 v
= uvm_km_alloc(kernel_map
, round_page(sz
), 0,
877 UVM_KMF_WIRED
|UVM_KMF_ZERO
);
881 mutex_enter(&shm_lock
);
882 while (shm_realloc_state
|| shm_realloc_disable
)
883 cv_wait(&shm_realloc_cv
, &shm_lock
);
886 * Get the number of last segment. Fail we are trying to
887 * reallocate less memory than we use.
890 for (i
= 0; i
< shminfo
.shmmni
; i
++)
891 if ((shmsegs
[i
].shm_perm
.mode
& SHMSEG_FREE
) == 0)
893 if (lsegid
>= newshmni
) {
894 mutex_exit(&shm_lock
);
895 uvm_km_free(kernel_map
, v
, sz
, UVM_KMF_WIRED
);
898 shm_realloc_state
= true;
900 newshmsegs
= (void *)v
;
901 newshm_cv
= (void *)((uintptr_t)newshmsegs
+
902 ALIGN(newshmni
* sizeof(struct shmid_ds
)));
904 /* Copy all memory to the new area */
905 for (i
= 0; i
< shm_nused
; i
++)
906 (void)memcpy(&newshmsegs
[i
], &shmsegs
[i
],
907 sizeof(newshmsegs
[0]));
909 /* Mark as free all new segments, if there is any */
910 for (; i
< newshmni
; i
++) {
911 cv_init(&newshm_cv
[i
], "shmwait");
912 newshmsegs
[i
].shm_perm
.mode
= SHMSEG_FREE
;
913 newshmsegs
[i
].shm_perm
._seq
= 0;
916 oldshmsegs
= shmsegs
;
917 oldshmni
= shminfo
.shmmni
;
918 shminfo
.shmmni
= newshmni
;
919 shmsegs
= newshmsegs
;
922 /* Reallocation completed - notify all waiters, if any */
923 shm_realloc_state
= false;
924 cv_broadcast(&shm_realloc_cv
);
925 mutex_exit(&shm_lock
);
927 /* Release now unused resources. */
928 oldshm_cv
= (void *)((uintptr_t)oldshmsegs
+
929 ALIGN(oldshmni
* sizeof(struct shmid_ds
)));
930 for (i
= 0; i
< oldshmni
; i
++)
931 cv_destroy(&oldshm_cv
[i
]);
933 sz
= ALIGN(oldshmni
* sizeof(struct shmid_ds
)) +
934 ALIGN(oldshmni
* sizeof(kcondvar_t
));
935 uvm_km_free(kernel_map
, (vaddr_t
)oldshmsegs
, sz
, UVM_KMF_WIRED
);
947 mutex_init(&shm_lock
, MUTEX_DEFAULT
, IPL_NONE
);
948 pool_init(&shmmap_entry_pool
, sizeof(struct shmmap_entry
), 0, 0, 0,
949 "shmmp", &pool_allocator_nointr
, IPL_NONE
);
950 cv_init(&shm_realloc_cv
, "shmrealc");
952 /* Allocate the wired memory for our structures */
953 sz
= ALIGN(shminfo
.shmmni
* sizeof(struct shmid_ds
)) +
954 ALIGN(shminfo
.shmmni
* sizeof(kcondvar_t
));
955 v
= uvm_km_alloc(kernel_map
, round_page(sz
), 0,
956 UVM_KMF_WIRED
|UVM_KMF_ZERO
);
958 panic("sysv_shm: cannot allocate memory");
960 shm_cv
= (void *)((uintptr_t)shmsegs
+
961 ALIGN(shminfo
.shmmni
* sizeof(struct shmid_ds
)));
963 if (shminfo
.shmmax
== 0)
964 shminfo
.shmmax
= max(physmem
/ 4, 1024) * PAGE_SIZE
;
966 shminfo
.shmmax
*= PAGE_SIZE
;
967 shminfo
.shmall
= shminfo
.shmmax
/ PAGE_SIZE
;
969 for (i
= 0; i
< shminfo
.shmmni
; i
++) {
970 cv_init(&shm_cv
[i
], "shmwait");
971 shmsegs
[i
].shm_perm
.mode
= SHMSEG_FREE
;
972 shmsegs
[i
].shm_perm
._seq
= 0;
977 shm_realloc_disable
= 0;
978 shm_realloc_state
= false;
982 sysctl_ipc_shmmni(SYSCTLFN_ARGS
)
985 struct sysctlnode node
;
987 node
.sysctl_data
= &newsize
;
989 newsize
= shminfo
.shmmni
;
990 error
= sysctl_lookup(SYSCTLFN_CALL(&node
));
991 if (error
|| newp
== NULL
)
995 error
= shmrealloc(newsize
);
1001 sysctl_ipc_shmmaxpgs(SYSCTLFN_ARGS
)
1005 struct sysctlnode node
;
1007 node
.sysctl_data
= &newsize
;
1009 newsize
= shminfo
.shmall
;
1010 error
= sysctl_lookup(SYSCTLFN_CALL(&node
));
1011 if (error
|| newp
== NULL
)
1017 shminfo
.shmall
= newsize
;
1018 shminfo
.shmmax
= (uint64_t)shminfo
.shmall
* PAGE_SIZE
;
1024 sysctl_ipc_shmmax(SYSCTLFN_ARGS
)
1028 struct sysctlnode node
;
1030 node
.sysctl_data
= &newsize
;
1032 newsize
= shminfo
.shmmax
;
1033 error
= sysctl_lookup(SYSCTLFN_CALL(&node
));
1034 if (error
|| newp
== NULL
)
1037 if (newsize
< PAGE_SIZE
)
1040 shminfo
.shmmax
= round_page(newsize
);
1041 shminfo
.shmall
= shminfo
.shmmax
>> PAGE_SHIFT
;
1046 SYSCTL_SETUP(sysctl_ipc_shm_setup
, "sysctl kern.ipc subtree setup")
1049 sysctl_createv(clog
, 0, NULL
, NULL
,
1051 CTLTYPE_NODE
, "kern", NULL
,
1054 sysctl_createv(clog
, 0, NULL
, NULL
,
1056 CTLTYPE_NODE
, "ipc",
1057 SYSCTL_DESCR("SysV IPC options"),
1059 CTL_KERN
, KERN_SYSVIPC
, CTL_EOL
);
1060 sysctl_createv(clog
, 0, NULL
, NULL
,
1061 CTLFLAG_PERMANENT
| CTLFLAG_READWRITE
,
1062 CTLTYPE_QUAD
, "shmmax",
1063 SYSCTL_DESCR("Max shared memory segment size in bytes"),
1064 sysctl_ipc_shmmax
, 0, &shminfo
.shmmax
, 0,
1065 CTL_KERN
, KERN_SYSVIPC
, KERN_SYSVIPC_SHMMAX
, CTL_EOL
);
1066 sysctl_createv(clog
, 0, NULL
, NULL
,
1067 CTLFLAG_PERMANENT
| CTLFLAG_READWRITE
,
1068 CTLTYPE_INT
, "shmmni",
1069 SYSCTL_DESCR("Max number of shared memory identifiers"),
1070 sysctl_ipc_shmmni
, 0, &shminfo
.shmmni
, 0,
1071 CTL_KERN
, KERN_SYSVIPC
, KERN_SYSVIPC_SHMMNI
, CTL_EOL
);
1072 sysctl_createv(clog
, 0, NULL
, NULL
,
1073 CTLFLAG_PERMANENT
| CTLFLAG_READWRITE
,
1074 CTLTYPE_INT
, "shmseg",
1075 SYSCTL_DESCR("Max shared memory segments per process"),
1076 NULL
, 0, &shminfo
.shmseg
, 0,
1077 CTL_KERN
, KERN_SYSVIPC
, KERN_SYSVIPC_SHMSEG
, CTL_EOL
);
1078 sysctl_createv(clog
, 0, NULL
, NULL
,
1079 CTLFLAG_PERMANENT
| CTLFLAG_READWRITE
,
1080 CTLTYPE_INT
, "shmmaxpgs",
1081 SYSCTL_DESCR("Max amount of shared memory in pages"),
1082 sysctl_ipc_shmmaxpgs
, 0, &shminfo
.shmall
, 0,
1083 CTL_KERN
, KERN_SYSVIPC
, KERN_SYSVIPC_SHMMAXPGS
, CTL_EOL
);
1084 sysctl_createv(clog
, 0, NULL
, NULL
,
1085 CTLFLAG_PERMANENT
| CTLFLAG_READWRITE
,
1086 CTLTYPE_INT
, "shm_use_phys",
1087 SYSCTL_DESCR("Enable/disable locking of shared memory in "
1088 "physical memory"), NULL
, 0, &shm_use_phys
, 0,
1089 CTL_KERN
, KERN_SYSVIPC
, KERN_SYSVIPC_SHMUSEPHYS
, CTL_EOL
);