Try to fixup the mess of mdoc(7)/man(7) mixture as created by the merge.
[netbsd-mini2440.git] / sys / kern / sysv_msg.c
bloba31fdb68cb36665bc99d7ef52b624544cd4d74fd
1 /* $NetBSD: sysv_msg.c,v 1.60 2009/01/26 13:08:48 njoly Exp $ */
3 /*-
4 * Copyright (c) 1999, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Andrew Doran.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
34 * Implementation of SVID messages
36 * Author: Daniel Boulet
38 * Copyright 1993 Daniel Boulet and RTMX Inc.
40 * This system call was implemented by Daniel Boulet under contract from RTMX.
42 * Redistribution and use in source forms, with and without modification,
43 * are permitted provided that this entire comment appears intact.
45 * Redistribution in binary form may occur without any restrictions.
46 * Obviously, it would be nice if you gave credit where credit is due
47 * but requiring it would be too onerous.
49 * This software is provided ``AS IS'' without any warranties of any kind.
52 #include <sys/cdefs.h>
53 __KERNEL_RCSID(0, "$NetBSD: sysv_msg.c,v 1.60 2009/01/26 13:08:48 njoly Exp $");
55 #define SYSVMSG
57 #include <sys/param.h>
58 #include <sys/kernel.h>
59 #include <sys/msg.h>
60 #include <sys/sysctl.h>
61 #include <sys/mount.h> /* XXX for <sys/syscallargs.h> */
62 #include <sys/syscallargs.h>
63 #include <sys/kauth.h>
65 #define MSG_DEBUG
66 #undef MSG_DEBUG_OK
68 #ifdef MSG_DEBUG_OK
69 #define MSG_PRINTF(a) printf a
70 #else
71 #define MSG_PRINTF(a)
72 #endif
74 static int nfree_msgmaps; /* # of free map entries */
75 static short free_msgmaps; /* head of linked list of free map entries */
76 static struct __msg *free_msghdrs; /* list of free msg headers */
77 static char *msgpool; /* MSGMAX byte long msg buffer pool */
78 static struct msgmap *msgmaps; /* MSGSEG msgmap structures */
79 static struct __msg *msghdrs; /* MSGTQL msg headers */
81 kmsq_t *msqs; /* MSGMNI msqid_ds struct's */
82 kmutex_t msgmutex; /* subsystem lock */
84 static u_int msg_waiters = 0; /* total number of msgrcv waiters */
85 static bool msg_realloc_state;
86 static kcondvar_t msg_realloc_cv;
88 static void msg_freehdr(struct __msg *);
90 void
91 msginit(void)
93 int i, sz;
94 vaddr_t v;
97 * msginfo.msgssz should be a power of two for efficiency reasons.
98 * It is also pretty silly if msginfo.msgssz is less than 8
99 * or greater than about 256 so ...
102 i = 8;
103 while (i < 1024 && i != msginfo.msgssz)
104 i <<= 1;
105 if (i != msginfo.msgssz) {
106 panic("msginfo.msgssz = %d, not a small power of 2",
107 msginfo.msgssz);
110 if (msginfo.msgseg > 32767) {
111 panic("msginfo.msgseg = %d > 32767", msginfo.msgseg);
114 /* Allocate the wired memory for our structures */
115 sz = ALIGN(msginfo.msgmax) +
116 ALIGN(msginfo.msgseg * sizeof(struct msgmap)) +
117 ALIGN(msginfo.msgtql * sizeof(struct __msg)) +
118 ALIGN(msginfo.msgmni * sizeof(kmsq_t));
119 v = uvm_km_alloc(kernel_map, round_page(sz), 0,
120 UVM_KMF_WIRED|UVM_KMF_ZERO);
121 if (v == 0)
122 panic("sysv_msg: cannot allocate memory");
123 msgpool = (void *)v;
124 msgmaps = (void *)((uintptr_t)msgpool + ALIGN(msginfo.msgmax));
125 msghdrs = (void *)((uintptr_t)msgmaps +
126 ALIGN(msginfo.msgseg * sizeof(struct msgmap)));
127 msqs = (void *)((uintptr_t)msghdrs +
128 ALIGN(msginfo.msgtql * sizeof(struct __msg)));
130 for (i = 0; i < (msginfo.msgseg - 1); i++)
131 msgmaps[i].next = i + 1;
132 msgmaps[msginfo.msgseg - 1].next = -1;
134 free_msgmaps = 0;
135 nfree_msgmaps = msginfo.msgseg;
137 for (i = 0; i < (msginfo.msgtql - 1); i++) {
138 msghdrs[i].msg_type = 0;
139 msghdrs[i].msg_next = &msghdrs[i + 1];
141 i = msginfo.msgtql - 1;
142 msghdrs[i].msg_type = 0;
143 msghdrs[i].msg_next = NULL;
144 free_msghdrs = &msghdrs[0];
146 for (i = 0; i < msginfo.msgmni; i++) {
147 cv_init(&msqs[i].msq_cv, "msgwait");
148 /* Implies entry is available */
149 msqs[i].msq_u.msg_qbytes = 0;
150 /* Reset to a known value */
151 msqs[i].msq_u.msg_perm._seq = 0;
154 mutex_init(&msgmutex, MUTEX_DEFAULT, IPL_NONE);
155 cv_init(&msg_realloc_cv, "msgrealc");
156 msg_realloc_state = false;
159 static int
160 msgrealloc(int newmsgmni, int newmsgseg)
162 struct msgmap *new_msgmaps;
163 struct __msg *new_msghdrs, *new_free_msghdrs;
164 char *old_msgpool, *new_msgpool;
165 kmsq_t *new_msqs;
166 vaddr_t v;
167 int i, sz, msqid, newmsgmax, new_nfree_msgmaps;
168 short new_free_msgmaps;
170 if (newmsgmni < 1 || newmsgseg < 1)
171 return EINVAL;
173 /* Allocate the wired memory for our structures */
174 newmsgmax = msginfo.msgssz * newmsgseg;
175 sz = ALIGN(newmsgmax) +
176 ALIGN(newmsgseg * sizeof(struct msgmap)) +
177 ALIGN(msginfo.msgtql * sizeof(struct __msg)) +
178 ALIGN(newmsgmni * sizeof(kmsq_t));
179 v = uvm_km_alloc(kernel_map, round_page(sz), 0,
180 UVM_KMF_WIRED|UVM_KMF_ZERO);
181 if (v == 0)
182 return ENOMEM;
184 mutex_enter(&msgmutex);
185 if (msg_realloc_state) {
186 mutex_exit(&msgmutex);
187 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
188 return EBUSY;
190 msg_realloc_state = true;
191 if (msg_waiters) {
193 * Mark reallocation state, wake-up all waiters,
194 * and wait while they will all exit.
196 for (i = 0; i < msginfo.msgmni; i++)
197 cv_broadcast(&msqs[i].msq_cv);
198 while (msg_waiters)
199 cv_wait(&msg_realloc_cv, &msgmutex);
201 old_msgpool = msgpool;
203 /* We cannot reallocate less memory than we use */
204 i = 0;
205 for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
206 struct msqid_ds *mptr;
207 kmsq_t *msq;
209 msq = &msqs[msqid];
210 mptr = &msq->msq_u;
211 if (mptr->msg_qbytes || (mptr->msg_perm.mode & MSG_LOCKED))
212 i = msqid;
214 if (i >= newmsgmni || (msginfo.msgseg - nfree_msgmaps) > newmsgseg) {
215 mutex_exit(&msgmutex);
216 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
217 return EBUSY;
220 new_msgpool = (void *)v;
221 new_msgmaps = (void *)((uintptr_t)new_msgpool + ALIGN(newmsgmax));
222 new_msghdrs = (void *)((uintptr_t)new_msgmaps +
223 ALIGN(newmsgseg * sizeof(struct msgmap)));
224 new_msqs = (void *)((uintptr_t)new_msghdrs +
225 ALIGN(msginfo.msgtql * sizeof(struct __msg)));
227 /* Initialize the structures */
228 for (i = 0; i < (newmsgseg - 1); i++)
229 new_msgmaps[i].next = i + 1;
230 new_msgmaps[newmsgseg - 1].next = -1;
231 new_free_msgmaps = 0;
232 new_nfree_msgmaps = newmsgseg;
234 for (i = 0; i < (msginfo.msgtql - 1); i++) {
235 new_msghdrs[i].msg_type = 0;
236 new_msghdrs[i].msg_next = &new_msghdrs[i + 1];
238 i = msginfo.msgtql - 1;
239 new_msghdrs[i].msg_type = 0;
240 new_msghdrs[i].msg_next = NULL;
241 new_free_msghdrs = &new_msghdrs[0];
243 for (i = 0; i < newmsgmni; i++) {
244 new_msqs[i].msq_u.msg_qbytes = 0;
245 new_msqs[i].msq_u.msg_perm._seq = 0;
246 cv_init(&new_msqs[i].msq_cv, "msgwait");
250 * Copy all message queue identifiers, mesage headers and buffer
251 * pools to the new memory location.
253 for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
254 struct __msg *nmsghdr, *msghdr, *pmsghdr;
255 struct msqid_ds *nmptr, *mptr;
256 kmsq_t *nmsq, *msq;
258 msq = &msqs[msqid];
259 mptr = &msq->msq_u;
261 if (mptr->msg_qbytes == 0 &&
262 (mptr->msg_perm.mode & MSG_LOCKED) == 0)
263 continue;
265 nmsq = &new_msqs[msqid];
266 nmptr = &nmsq->msq_u;
267 memcpy(nmptr, mptr, sizeof(struct msqid_ds));
270 * Go through the message headers, and and copy each
271 * one by taking the new ones, and thus defragmenting.
273 nmsghdr = pmsghdr = NULL;
274 msghdr = mptr->_msg_first;
275 while (msghdr) {
276 short nnext = 0, next;
277 u_short msgsz, segcnt;
279 /* Take an entry from the new list of free msghdrs */
280 nmsghdr = new_free_msghdrs;
281 KASSERT(nmsghdr != NULL);
282 new_free_msghdrs = nmsghdr->msg_next;
284 nmsghdr->msg_next = NULL;
285 if (pmsghdr) {
286 pmsghdr->msg_next = nmsghdr;
287 } else {
288 nmptr->_msg_first = nmsghdr;
289 pmsghdr = nmsghdr;
291 nmsghdr->msg_ts = msghdr->msg_ts;
292 nmsghdr->msg_spot = -1;
294 /* Compute the amount of segments and reserve them */
295 msgsz = msghdr->msg_ts;
296 segcnt = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
297 if (segcnt == 0)
298 continue;
299 while (segcnt--) {
300 nnext = new_free_msgmaps;
301 new_free_msgmaps = new_msgmaps[nnext].next;
302 new_nfree_msgmaps--;
303 new_msgmaps[nnext].next = nmsghdr->msg_spot;
304 nmsghdr->msg_spot = nnext;
307 /* Copy all segments */
308 KASSERT(nnext == nmsghdr->msg_spot);
309 next = msghdr->msg_spot;
310 while (msgsz > 0) {
311 size_t tlen;
313 if (msgsz >= msginfo.msgssz) {
314 tlen = msginfo.msgssz;
315 msgsz -= msginfo.msgssz;
316 } else {
317 tlen = msgsz;
318 msgsz = 0;
321 /* Copy the message buffer */
322 memcpy(&new_msgpool[nnext * msginfo.msgssz],
323 &msgpool[next * msginfo.msgssz], tlen);
325 /* Next entry of the map */
326 nnext = msgmaps[nnext].next;
327 next = msgmaps[next].next;
330 /* Next message header */
331 msghdr = msghdr->msg_next;
333 nmptr->_msg_last = nmsghdr;
335 KASSERT((msginfo.msgseg - nfree_msgmaps) ==
336 (newmsgseg - new_nfree_msgmaps));
338 sz = ALIGN(msginfo.msgmax) +
339 ALIGN(msginfo.msgseg * sizeof(struct msgmap)) +
340 ALIGN(msginfo.msgtql * sizeof(struct __msg)) +
341 ALIGN(msginfo.msgmni * sizeof(kmsq_t));
343 for (i = 0; i < msginfo.msgmni; i++)
344 cv_destroy(&msqs[i].msq_cv);
346 /* Set the pointers and update the new values */
347 msgpool = new_msgpool;
348 msgmaps = new_msgmaps;
349 msghdrs = new_msghdrs;
350 msqs = new_msqs;
352 free_msghdrs = new_free_msghdrs;
353 free_msgmaps = new_free_msgmaps;
354 nfree_msgmaps = new_nfree_msgmaps;
355 msginfo.msgmni = newmsgmni;
356 msginfo.msgseg = newmsgseg;
357 msginfo.msgmax = newmsgmax;
359 /* Reallocation completed - notify all waiters, if any */
360 msg_realloc_state = false;
361 cv_broadcast(&msg_realloc_cv);
362 mutex_exit(&msgmutex);
364 uvm_km_free(kernel_map, (vaddr_t)old_msgpool, sz, UVM_KMF_WIRED);
365 return 0;
368 static void
369 msg_freehdr(struct __msg *msghdr)
372 KASSERT(mutex_owned(&msgmutex));
374 while (msghdr->msg_ts > 0) {
375 short next;
376 KASSERT(msghdr->msg_spot >= 0);
377 KASSERT(msghdr->msg_spot < msginfo.msgseg);
379 next = msgmaps[msghdr->msg_spot].next;
380 msgmaps[msghdr->msg_spot].next = free_msgmaps;
381 free_msgmaps = msghdr->msg_spot;
382 nfree_msgmaps++;
383 msghdr->msg_spot = next;
384 if (msghdr->msg_ts >= msginfo.msgssz)
385 msghdr->msg_ts -= msginfo.msgssz;
386 else
387 msghdr->msg_ts = 0;
389 KASSERT(msghdr->msg_spot == -1);
390 msghdr->msg_next = free_msghdrs;
391 free_msghdrs = msghdr;
395 sys___msgctl50(struct lwp *l, const struct sys___msgctl50_args *uap,
396 register_t *retval)
398 /* {
399 syscallarg(int) msqid;
400 syscallarg(int) cmd;
401 syscallarg(struct msqid_ds *) buf;
402 } */
403 struct msqid_ds msqbuf;
404 int cmd, error;
406 cmd = SCARG(uap, cmd);
408 if (cmd == IPC_SET) {
409 error = copyin(SCARG(uap, buf), &msqbuf, sizeof(msqbuf));
410 if (error)
411 return (error);
414 error = msgctl1(l, SCARG(uap, msqid), cmd,
415 (cmd == IPC_SET || cmd == IPC_STAT) ? &msqbuf : NULL);
417 if (error == 0 && cmd == IPC_STAT)
418 error = copyout(&msqbuf, SCARG(uap, buf), sizeof(msqbuf));
420 return (error);
424 msgctl1(struct lwp *l, int msqid, int cmd, struct msqid_ds *msqbuf)
426 kauth_cred_t cred = l->l_cred;
427 struct msqid_ds *msqptr;
428 kmsq_t *msq;
429 int error = 0, ix;
431 MSG_PRINTF(("call to msgctl1(%d, %d)\n", msqid, cmd));
433 ix = IPCID_TO_IX(msqid);
435 mutex_enter(&msgmutex);
437 if (ix < 0 || ix >= msginfo.msgmni) {
438 MSG_PRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", ix,
439 msginfo.msgmni));
440 error = EINVAL;
441 goto unlock;
444 msq = &msqs[ix];
445 msqptr = &msq->msq_u;
447 if (msqptr->msg_qbytes == 0) {
448 MSG_PRINTF(("no such msqid\n"));
449 error = EINVAL;
450 goto unlock;
452 if (msqptr->msg_perm._seq != IPCID_TO_SEQ(msqid)) {
453 MSG_PRINTF(("wrong sequence number\n"));
454 error = EINVAL;
455 goto unlock;
458 switch (cmd) {
459 case IPC_RMID:
461 struct __msg *msghdr;
462 if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_M)) != 0)
463 break;
464 /* Free the message headers */
465 msghdr = msqptr->_msg_first;
466 while (msghdr != NULL) {
467 struct __msg *msghdr_tmp;
469 /* Free the segments of each message */
470 msqptr->_msg_cbytes -= msghdr->msg_ts;
471 msqptr->msg_qnum--;
472 msghdr_tmp = msghdr;
473 msghdr = msghdr->msg_next;
474 msg_freehdr(msghdr_tmp);
476 KASSERT(msqptr->_msg_cbytes == 0);
477 KASSERT(msqptr->msg_qnum == 0);
479 /* Mark it as free */
480 msqptr->msg_qbytes = 0;
481 cv_broadcast(&msq->msq_cv);
483 break;
485 case IPC_SET:
486 if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_M)))
487 break;
488 if (msqbuf->msg_qbytes > msqptr->msg_qbytes &&
489 kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER,
490 NULL) != 0) {
491 error = EPERM;
492 break;
494 if (msqbuf->msg_qbytes > msginfo.msgmnb) {
495 MSG_PRINTF(("can't increase msg_qbytes beyond %d "
496 "(truncating)\n", msginfo.msgmnb));
497 /* silently restrict qbytes to system limit */
498 msqbuf->msg_qbytes = msginfo.msgmnb;
500 if (msqbuf->msg_qbytes == 0) {
501 MSG_PRINTF(("can't reduce msg_qbytes to 0\n"));
502 error = EINVAL; /* XXX non-standard errno! */
503 break;
505 msqptr->msg_perm.uid = msqbuf->msg_perm.uid;
506 msqptr->msg_perm.gid = msqbuf->msg_perm.gid;
507 msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) |
508 (msqbuf->msg_perm.mode & 0777);
509 msqptr->msg_qbytes = msqbuf->msg_qbytes;
510 msqptr->msg_ctime = time_second;
511 break;
513 case IPC_STAT:
514 if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_R))) {
515 MSG_PRINTF(("requester doesn't have read access\n"));
516 break;
518 memcpy(msqbuf, msqptr, sizeof(struct msqid_ds));
519 break;
521 default:
522 MSG_PRINTF(("invalid command %d\n", cmd));
523 error = EINVAL;
524 break;
527 unlock:
528 mutex_exit(&msgmutex);
529 return (error);
533 sys_msgget(struct lwp *l, const struct sys_msgget_args *uap, register_t *retval)
535 /* {
536 syscallarg(key_t) key;
537 syscallarg(int) msgflg;
538 } */
539 int msqid, error = 0;
540 int key = SCARG(uap, key);
541 int msgflg = SCARG(uap, msgflg);
542 kauth_cred_t cred = l->l_cred;
543 struct msqid_ds *msqptr = NULL;
544 kmsq_t *msq;
546 mutex_enter(&msgmutex);
548 MSG_PRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
550 if (key != IPC_PRIVATE) {
551 for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
552 msq = &msqs[msqid];
553 msqptr = &msq->msq_u;
554 if (msqptr->msg_qbytes != 0 &&
555 msqptr->msg_perm._key == key)
556 break;
558 if (msqid < msginfo.msgmni) {
559 MSG_PRINTF(("found public key\n"));
560 if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
561 MSG_PRINTF(("not exclusive\n"));
562 error = EEXIST;
563 goto unlock;
565 if ((error = ipcperm(cred, &msqptr->msg_perm,
566 msgflg & 0700 ))) {
567 MSG_PRINTF(("requester doesn't have 0%o access\n",
568 msgflg & 0700));
569 goto unlock;
571 goto found;
575 MSG_PRINTF(("need to allocate the msqid_ds\n"));
576 if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
577 for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
579 * Look for an unallocated and unlocked msqid_ds.
580 * msqid_ds's can be locked by msgsnd or msgrcv while
581 * they are copying the message in/out. We can't
582 * re-use the entry until they release it.
584 msq = &msqs[msqid];
585 msqptr = &msq->msq_u;
586 if (msqptr->msg_qbytes == 0 &&
587 (msqptr->msg_perm.mode & MSG_LOCKED) == 0)
588 break;
590 if (msqid == msginfo.msgmni) {
591 MSG_PRINTF(("no more msqid_ds's available\n"));
592 error = ENOSPC;
593 goto unlock;
595 MSG_PRINTF(("msqid %d is available\n", msqid));
596 msqptr->msg_perm._key = key;
597 msqptr->msg_perm.cuid = kauth_cred_geteuid(cred);
598 msqptr->msg_perm.uid = kauth_cred_geteuid(cred);
599 msqptr->msg_perm.cgid = kauth_cred_getegid(cred);
600 msqptr->msg_perm.gid = kauth_cred_getegid(cred);
601 msqptr->msg_perm.mode = (msgflg & 0777);
602 /* Make sure that the returned msqid is unique */
603 msqptr->msg_perm._seq++;
604 msqptr->_msg_first = NULL;
605 msqptr->_msg_last = NULL;
606 msqptr->_msg_cbytes = 0;
607 msqptr->msg_qnum = 0;
608 msqptr->msg_qbytes = msginfo.msgmnb;
609 msqptr->msg_lspid = 0;
610 msqptr->msg_lrpid = 0;
611 msqptr->msg_stime = 0;
612 msqptr->msg_rtime = 0;
613 msqptr->msg_ctime = time_second;
614 } else {
615 MSG_PRINTF(("didn't find it and wasn't asked to create it\n"));
616 error = ENOENT;
617 goto unlock;
620 found:
621 /* Construct the unique msqid */
622 *retval = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm);
624 unlock:
625 mutex_exit(&msgmutex);
626 return (error);
630 sys_msgsnd(struct lwp *l, const struct sys_msgsnd_args *uap, register_t *retval)
632 /* {
633 syscallarg(int) msqid;
634 syscallarg(const void *) msgp;
635 syscallarg(size_t) msgsz;
636 syscallarg(int) msgflg;
637 } */
639 return msgsnd1(l, SCARG(uap, msqid), SCARG(uap, msgp),
640 SCARG(uap, msgsz), SCARG(uap, msgflg), sizeof(long), copyin);
644 msgsnd1(struct lwp *l, int msqidr, const char *user_msgp, size_t msgsz,
645 int msgflg, size_t typesz, copyin_t fetch_type)
647 int segs_needed, error = 0, msqid;
648 kauth_cred_t cred = l->l_cred;
649 struct msqid_ds *msqptr;
650 struct __msg *msghdr;
651 kmsq_t *msq;
652 short next;
654 MSG_PRINTF(("call to msgsnd(%d, %p, %lld, %d)\n", msqid, user_msgp,
655 (long long)msgsz, msgflg));
657 if ((ssize_t)msgsz < 0)
658 return EINVAL;
660 restart:
661 msqid = IPCID_TO_IX(msqidr);
663 mutex_enter(&msgmutex);
664 /* In case of reallocation, we will wait for completion */
665 while (__predict_false(msg_realloc_state))
666 cv_wait(&msg_realloc_cv, &msgmutex);
668 if (msqid < 0 || msqid >= msginfo.msgmni) {
669 MSG_PRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
670 msginfo.msgmni));
671 error = EINVAL;
672 goto unlock;
675 msq = &msqs[msqid];
676 msqptr = &msq->msq_u;
678 if (msqptr->msg_qbytes == 0) {
679 MSG_PRINTF(("no such message queue id\n"));
680 error = EINVAL;
681 goto unlock;
683 if (msqptr->msg_perm._seq != IPCID_TO_SEQ(msqidr)) {
684 MSG_PRINTF(("wrong sequence number\n"));
685 error = EINVAL;
686 goto unlock;
689 if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_W))) {
690 MSG_PRINTF(("requester doesn't have write access\n"));
691 goto unlock;
694 segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
695 MSG_PRINTF(("msgsz=%lld, msgssz=%d, segs_needed=%d\n",
696 (long long)msgsz, msginfo.msgssz, segs_needed));
697 for (;;) {
698 int need_more_resources = 0;
701 * check msgsz [cannot be negative since it is unsigned]
702 * (inside this loop in case msg_qbytes changes while we sleep)
705 if (msgsz > msqptr->msg_qbytes) {
706 MSG_PRINTF(("msgsz > msqptr->msg_qbytes\n"));
707 error = EINVAL;
708 goto unlock;
711 if (msqptr->msg_perm.mode & MSG_LOCKED) {
712 MSG_PRINTF(("msqid is locked\n"));
713 need_more_resources = 1;
715 if (msgsz + msqptr->_msg_cbytes > msqptr->msg_qbytes) {
716 MSG_PRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
717 need_more_resources = 1;
719 if (segs_needed > nfree_msgmaps) {
720 MSG_PRINTF(("segs_needed > nfree_msgmaps\n"));
721 need_more_resources = 1;
723 if (free_msghdrs == NULL) {
724 MSG_PRINTF(("no more msghdrs\n"));
725 need_more_resources = 1;
728 if (need_more_resources) {
729 int we_own_it;
731 if ((msgflg & IPC_NOWAIT) != 0) {
732 MSG_PRINTF(("need more resources but caller "
733 "doesn't want to wait\n"));
734 error = EAGAIN;
735 goto unlock;
738 if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) {
739 MSG_PRINTF(("we don't own the msqid_ds\n"));
740 we_own_it = 0;
741 } else {
742 /* Force later arrivals to wait for our
743 request */
744 MSG_PRINTF(("we own the msqid_ds\n"));
745 msqptr->msg_perm.mode |= MSG_LOCKED;
746 we_own_it = 1;
749 msg_waiters++;
750 MSG_PRINTF(("goodnight\n"));
751 error = cv_wait_sig(&msq->msq_cv, &msgmutex);
752 MSG_PRINTF(("good morning, error=%d\n", error));
753 msg_waiters--;
755 if (we_own_it)
756 msqptr->msg_perm.mode &= ~MSG_LOCKED;
759 * In case of such state, notify reallocator and
760 * restart the call.
762 if (msg_realloc_state) {
763 cv_broadcast(&msg_realloc_cv);
764 mutex_exit(&msgmutex);
765 goto restart;
768 if (error != 0) {
769 MSG_PRINTF(("msgsnd: interrupted system "
770 "call\n"));
771 error = EINTR;
772 goto unlock;
776 * Make sure that the msq queue still exists
779 if (msqptr->msg_qbytes == 0) {
780 MSG_PRINTF(("msqid deleted\n"));
781 error = EIDRM;
782 goto unlock;
784 } else {
785 MSG_PRINTF(("got all the resources that we need\n"));
786 break;
791 * We have the resources that we need.
792 * Make sure!
795 KASSERT((msqptr->msg_perm.mode & MSG_LOCKED) == 0);
796 KASSERT(segs_needed <= nfree_msgmaps);
797 KASSERT(msgsz + msqptr->_msg_cbytes <= msqptr->msg_qbytes);
798 KASSERT(free_msghdrs != NULL);
801 * Re-lock the msqid_ds in case we page-fault when copying in the
802 * message
805 KASSERT((msqptr->msg_perm.mode & MSG_LOCKED) == 0);
806 msqptr->msg_perm.mode |= MSG_LOCKED;
809 * Allocate a message header
812 msghdr = free_msghdrs;
813 free_msghdrs = msghdr->msg_next;
814 msghdr->msg_spot = -1;
815 msghdr->msg_ts = msgsz;
818 * Allocate space for the message
821 while (segs_needed > 0) {
822 KASSERT(nfree_msgmaps > 0);
823 KASSERT(free_msgmaps != -1);
824 KASSERT(free_msgmaps < msginfo.msgseg);
826 next = free_msgmaps;
827 MSG_PRINTF(("allocating segment %d to message\n", next));
828 free_msgmaps = msgmaps[next].next;
829 nfree_msgmaps--;
830 msgmaps[next].next = msghdr->msg_spot;
831 msghdr->msg_spot = next;
832 segs_needed--;
836 * Copy in the message type
838 mutex_exit(&msgmutex);
839 error = (*fetch_type)(user_msgp, &msghdr->msg_type, typesz);
840 mutex_enter(&msgmutex);
841 if (error != 0) {
842 MSG_PRINTF(("error %d copying the message type\n", error));
843 msg_freehdr(msghdr);
844 msqptr->msg_perm.mode &= ~MSG_LOCKED;
845 cv_broadcast(&msq->msq_cv);
846 goto unlock;
848 user_msgp += typesz;
851 * Validate the message type
854 if (msghdr->msg_type < 1) {
855 msg_freehdr(msghdr);
856 msqptr->msg_perm.mode &= ~MSG_LOCKED;
857 cv_broadcast(&msq->msq_cv);
858 MSG_PRINTF(("mtype (%ld) < 1\n", msghdr->msg_type));
859 error = EINVAL;
860 goto unlock;
864 * Copy in the message body
867 next = msghdr->msg_spot;
868 while (msgsz > 0) {
869 size_t tlen;
870 KASSERT(next > -1);
871 KASSERT(next < msginfo.msgseg);
873 if (msgsz > msginfo.msgssz)
874 tlen = msginfo.msgssz;
875 else
876 tlen = msgsz;
877 mutex_exit(&msgmutex);
878 error = copyin(user_msgp, &msgpool[next * msginfo.msgssz], tlen);
879 mutex_enter(&msgmutex);
880 if (error != 0) {
881 MSG_PRINTF(("error %d copying in message segment\n",
882 error));
883 msg_freehdr(msghdr);
884 msqptr->msg_perm.mode &= ~MSG_LOCKED;
885 cv_broadcast(&msq->msq_cv);
886 goto unlock;
888 msgsz -= tlen;
889 user_msgp += tlen;
890 next = msgmaps[next].next;
892 KASSERT(next == -1);
895 * We've got the message. Unlock the msqid_ds.
898 msqptr->msg_perm.mode &= ~MSG_LOCKED;
901 * Make sure that the msqid_ds is still allocated.
904 if (msqptr->msg_qbytes == 0) {
905 msg_freehdr(msghdr);
906 cv_broadcast(&msq->msq_cv);
907 error = EIDRM;
908 goto unlock;
912 * Put the message into the queue
915 if (msqptr->_msg_first == NULL) {
916 msqptr->_msg_first = msghdr;
917 msqptr->_msg_last = msghdr;
918 } else {
919 msqptr->_msg_last->msg_next = msghdr;
920 msqptr->_msg_last = msghdr;
922 msqptr->_msg_last->msg_next = NULL;
924 msqptr->_msg_cbytes += msghdr->msg_ts;
925 msqptr->msg_qnum++;
926 msqptr->msg_lspid = l->l_proc->p_pid;
927 msqptr->msg_stime = time_second;
929 cv_broadcast(&msq->msq_cv);
931 unlock:
932 mutex_exit(&msgmutex);
933 return error;
937 sys_msgrcv(struct lwp *l, const struct sys_msgrcv_args *uap, register_t *retval)
939 /* {
940 syscallarg(int) msqid;
941 syscallarg(void *) msgp;
942 syscallarg(size_t) msgsz;
943 syscallarg(long) msgtyp;
944 syscallarg(int) msgflg;
945 } */
947 return msgrcv1(l, SCARG(uap, msqid), SCARG(uap, msgp),
948 SCARG(uap, msgsz), SCARG(uap, msgtyp), SCARG(uap, msgflg),
949 sizeof(long), copyout, retval);
953 msgrcv1(struct lwp *l, int msqidr, char *user_msgp, size_t msgsz, long msgtyp,
954 int msgflg, size_t typesz, copyout_t put_type, register_t *retval)
956 size_t len;
957 kauth_cred_t cred = l->l_cred;
958 struct msqid_ds *msqptr;
959 struct __msg *msghdr;
960 int error = 0, msqid;
961 kmsq_t *msq;
962 short next;
964 MSG_PRINTF(("call to msgrcv(%d, %p, %lld, %ld, %d)\n", msqid,
965 user_msgp, (long long)msgsz, msgtyp, msgflg));
967 if ((ssize_t)msgsz < 0)
968 return EINVAL;
970 restart:
971 msqid = IPCID_TO_IX(msqidr);
973 mutex_enter(&msgmutex);
974 /* In case of reallocation, we will wait for completion */
975 while (__predict_false(msg_realloc_state))
976 cv_wait(&msg_realloc_cv, &msgmutex);
978 if (msqid < 0 || msqid >= msginfo.msgmni) {
979 MSG_PRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
980 msginfo.msgmni));
981 error = EINVAL;
982 goto unlock;
985 msq = &msqs[msqid];
986 msqptr = &msq->msq_u;
988 if (msqptr->msg_qbytes == 0) {
989 MSG_PRINTF(("no such message queue id\n"));
990 error = EINVAL;
991 goto unlock;
993 if (msqptr->msg_perm._seq != IPCID_TO_SEQ(msqidr)) {
994 MSG_PRINTF(("wrong sequence number\n"));
995 error = EINVAL;
996 goto unlock;
999 if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_R))) {
1000 MSG_PRINTF(("requester doesn't have read access\n"));
1001 goto unlock;
1004 msghdr = NULL;
1005 while (msghdr == NULL) {
1006 if (msgtyp == 0) {
1007 msghdr = msqptr->_msg_first;
1008 if (msghdr != NULL) {
1009 if (msgsz < msghdr->msg_ts &&
1010 (msgflg & MSG_NOERROR) == 0) {
1011 MSG_PRINTF(("first msg on the queue "
1012 "is too big (want %lld, got %d)\n",
1013 (long long)msgsz, msghdr->msg_ts));
1014 error = E2BIG;
1015 goto unlock;
1017 if (msqptr->_msg_first == msqptr->_msg_last) {
1018 msqptr->_msg_first = NULL;
1019 msqptr->_msg_last = NULL;
1020 } else {
1021 msqptr->_msg_first = msghdr->msg_next;
1022 KASSERT(msqptr->_msg_first != NULL);
1025 } else {
1026 struct __msg *previous;
1027 struct __msg **prev;
1029 for (previous = NULL, prev = &msqptr->_msg_first;
1030 (msghdr = *prev) != NULL;
1031 previous = msghdr, prev = &msghdr->msg_next) {
1033 * Is this message's type an exact match or is
1034 * this message's type less than or equal to
1035 * the absolute value of a negative msgtyp?
1036 * Note that the second half of this test can
1037 * NEVER be true if msgtyp is positive since
1038 * msg_type is always positive!
1041 if (msgtyp != msghdr->msg_type &&
1042 msghdr->msg_type > -msgtyp)
1043 continue;
1045 MSG_PRINTF(("found message type %ld, requested %ld\n",
1046 msghdr->msg_type, msgtyp));
1047 if (msgsz < msghdr->msg_ts &&
1048 (msgflg & MSG_NOERROR) == 0) {
1049 MSG_PRINTF(("requested message on the queue "
1050 "is too big (want %lld, got %d)\n",
1051 (long long)msgsz, msghdr->msg_ts));
1052 error = E2BIG;
1053 goto unlock;
1055 *prev = msghdr->msg_next;
1056 if (msghdr != msqptr->_msg_last)
1057 break;
1058 if (previous == NULL) {
1059 KASSERT(prev == &msqptr->_msg_first);
1060 msqptr->_msg_first = NULL;
1061 msqptr->_msg_last = NULL;
1062 } else {
1063 KASSERT(prev != &msqptr->_msg_first);
1064 msqptr->_msg_last = previous;
1066 break;
1071 * We've either extracted the msghdr for the appropriate
1072 * message or there isn't one.
1073 * If there is one then bail out of this loop.
1075 if (msghdr != NULL)
1076 break;
1079 * Hmph! No message found. Does the user want to wait?
1082 if ((msgflg & IPC_NOWAIT) != 0) {
1083 MSG_PRINTF(("no appropriate message found (msgtyp=%ld)\n",
1084 msgtyp));
1085 error = ENOMSG;
1086 goto unlock;
1090 * Wait for something to happen
1093 msg_waiters++;
1094 MSG_PRINTF(("msgrcv: goodnight\n"));
1095 error = cv_wait_sig(&msq->msq_cv, &msgmutex);
1096 MSG_PRINTF(("msgrcv: good morning (error=%d)\n", error));
1097 msg_waiters--;
1100 * In case of such state, notify reallocator and
1101 * restart the call.
1103 if (msg_realloc_state) {
1104 cv_broadcast(&msg_realloc_cv);
1105 mutex_exit(&msgmutex);
1106 goto restart;
1109 if (error != 0) {
1110 MSG_PRINTF(("msgsnd: interrupted system call\n"));
1111 error = EINTR;
1112 goto unlock;
1116 * Make sure that the msq queue still exists
1119 if (msqptr->msg_qbytes == 0 ||
1120 msqptr->msg_perm._seq != IPCID_TO_SEQ(msqidr)) {
1121 MSG_PRINTF(("msqid deleted\n"));
1122 error = EIDRM;
1123 goto unlock;
1128 * Return the message to the user.
1130 * First, do the bookkeeping (before we risk being interrupted).
1133 msqptr->_msg_cbytes -= msghdr->msg_ts;
1134 msqptr->msg_qnum--;
1135 msqptr->msg_lrpid = l->l_proc->p_pid;
1136 msqptr->msg_rtime = time_second;
1139 * Make msgsz the actual amount that we'll be returning.
1140 * Note that this effectively truncates the message if it is too long
1141 * (since msgsz is never increased).
1144 MSG_PRINTF(("found a message, msgsz=%lld, msg_ts=%d\n",
1145 (long long)msgsz, msghdr->msg_ts));
1146 if (msgsz > msghdr->msg_ts)
1147 msgsz = msghdr->msg_ts;
1150 * Return the type to the user.
1152 mutex_exit(&msgmutex);
1153 error = (*put_type)(&msghdr->msg_type, user_msgp, typesz);
1154 mutex_enter(&msgmutex);
1155 if (error != 0) {
1156 MSG_PRINTF(("error (%d) copying out message type\n", error));
1157 msg_freehdr(msghdr);
1158 cv_broadcast(&msq->msq_cv);
1159 goto unlock;
1161 user_msgp += typesz;
1164 * Return the segments to the user
1167 next = msghdr->msg_spot;
1168 for (len = 0; len < msgsz; len += msginfo.msgssz) {
1169 size_t tlen;
1170 KASSERT(next > -1);
1171 KASSERT(next < msginfo.msgseg);
1173 if (msgsz - len > msginfo.msgssz)
1174 tlen = msginfo.msgssz;
1175 else
1176 tlen = msgsz - len;
1177 mutex_exit(&msgmutex);
1178 error = copyout(&msgpool[next * msginfo.msgssz],
1179 user_msgp, tlen);
1180 mutex_enter(&msgmutex);
1181 if (error != 0) {
1182 MSG_PRINTF(("error (%d) copying out message segment\n",
1183 error));
1184 msg_freehdr(msghdr);
1185 cv_broadcast(&msq->msq_cv);
1186 goto unlock;
1188 user_msgp += tlen;
1189 next = msgmaps[next].next;
1193 * Done, return the actual number of bytes copied out.
1196 msg_freehdr(msghdr);
1197 cv_broadcast(&msq->msq_cv);
1198 *retval = msgsz;
1200 unlock:
1201 mutex_exit(&msgmutex);
1202 return error;
1206 * Sysctl initialization and nodes.
1209 static int
1210 sysctl_ipc_msgmni(SYSCTLFN_ARGS)
1212 int newsize, error;
1213 struct sysctlnode node;
1214 node = *rnode;
1215 node.sysctl_data = &newsize;
1217 newsize = msginfo.msgmni;
1218 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1219 if (error || newp == NULL)
1220 return error;
1222 sysctl_unlock();
1223 error = msgrealloc(newsize, msginfo.msgseg);
1224 sysctl_relock();
1225 return error;
1228 static int
1229 sysctl_ipc_msgseg(SYSCTLFN_ARGS)
1231 int newsize, error;
1232 struct sysctlnode node;
1233 node = *rnode;
1234 node.sysctl_data = &newsize;
1236 newsize = msginfo.msgseg;
1237 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1238 if (error || newp == NULL)
1239 return error;
1241 sysctl_unlock();
1242 error = msgrealloc(msginfo.msgmni, newsize);
1243 sysctl_relock();
1244 return error;
1247 SYSCTL_SETUP(sysctl_ipc_msg_setup, "sysctl kern.ipc subtree setup")
1249 const struct sysctlnode *node = NULL;
1251 sysctl_createv(clog, 0, NULL, NULL,
1252 CTLFLAG_PERMANENT,
1253 CTLTYPE_NODE, "kern", NULL,
1254 NULL, 0, NULL, 0,
1255 CTL_KERN, CTL_EOL);
1256 sysctl_createv(clog, 0, NULL, &node,
1257 CTLFLAG_PERMANENT,
1258 CTLTYPE_NODE, "ipc",
1259 SYSCTL_DESCR("SysV IPC options"),
1260 NULL, 0, NULL, 0,
1261 CTL_KERN, KERN_SYSVIPC, CTL_EOL);
1263 if (node == NULL)
1264 return;
1266 sysctl_createv(clog, 0, &node, NULL,
1267 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1268 CTLTYPE_INT, "msgmni",
1269 SYSCTL_DESCR("Max number of message queue identifiers"),
1270 sysctl_ipc_msgmni, 0, &msginfo.msgmni, 0,
1271 CTL_CREATE, CTL_EOL);
1272 sysctl_createv(clog, 0, &node, NULL,
1273 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1274 CTLTYPE_INT, "msgseg",
1275 SYSCTL_DESCR("Max number of number of message segments"),
1276 sysctl_ipc_msgseg, 0, &msginfo.msgseg, 0,
1277 CTL_CREATE, CTL_EOL);