dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / kernel / fs / sockfs / sockcommon_sops.c
bloba63297e7cd6fe912c946f017bfbfa980b4880fbe
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
27 * Copyright (c) 2014, Joyent, Inc. All rights reserved.
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/sysmacros.h>
34 #include <sys/debug.h>
35 #include <sys/cmn_err.h>
37 #include <sys/stropts.h>
38 #include <sys/socket.h>
39 #include <sys/socketvar.h>
41 #define _SUN_TPI_VERSION 2
42 #include <sys/tihdr.h>
43 #include <sys/sockio.h>
44 #include <sys/kmem_impl.h>
46 #include <sys/strsubr.h>
47 #include <sys/strsun.h>
48 #include <sys/ddi.h>
49 #include <netinet/in.h>
50 #include <inet/ip.h>
52 #include "sockcommon.h"
53 #include "sockfilter_impl.h"
55 #include <sys/socket_proto.h>
57 #include "socktpi_impl.h"
58 #include "sodirect.h"
59 #include <sys/tihdr.h>
61 extern int xnet_skip_checks;
62 extern int xnet_check_print;
64 static void so_queue_oob(struct sonode *, mblk_t *, size_t);
67 /*ARGSUSED*/
68 int
69 so_accept_notsupp(struct sonode *lso, int fflag,
70 struct cred *cr, struct sonode **nsop)
72 return (EOPNOTSUPP);
75 /*ARGSUSED*/
76 int
77 so_listen_notsupp(struct sonode *so, int backlog, struct cred *cr)
79 return (EOPNOTSUPP);
82 /*ARGSUSED*/
83 int
84 so_getsockname_notsupp(struct sonode *so, struct sockaddr *sa,
85 socklen_t *len, struct cred *cr)
87 return (EOPNOTSUPP);
90 /*ARGSUSED*/
91 int
92 so_getpeername_notsupp(struct sonode *so, struct sockaddr *addr,
93 socklen_t *addrlen, boolean_t accept, struct cred *cr)
95 return (EOPNOTSUPP);
98 /*ARGSUSED*/
99 int
100 so_shutdown_notsupp(struct sonode *so, int how, struct cred *cr)
102 return (EOPNOTSUPP);
105 /*ARGSUSED*/
107 so_sendmblk_notsupp(struct sonode *so, struct msghdr *msg, int fflag,
108 struct cred *cr, mblk_t **mpp)
110 return (EOPNOTSUPP);
114 * Generic Socket Ops
117 /* ARGSUSED */
119 so_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags)
121 return (socket_init_common(so, pso, flags, cr));
125 so_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
126 int flags, struct cred *cr)
128 int error;
130 SO_BLOCK_FALLBACK(so, SOP_BIND(so, name, namelen, flags, cr));
132 /* X/Open requires this check */
133 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
134 if (xnet_check_print) {
135 printf("sockfs: X/Open bind state check "
136 "caused EINVAL\n");
138 error = EINVAL;
139 goto done;
143 * a bind to a NULL address is interpreted as unbind. So just
144 * do the downcall.
146 if (name == NULL)
147 goto dobind;
149 switch (so->so_family) {
150 case AF_INET:
151 if ((size_t)namelen != sizeof (sin_t)) {
152 error = name->sa_family != so->so_family ?
153 EAFNOSUPPORT : EINVAL;
154 eprintsoline(so, error);
155 goto done;
158 if (name->sa_family != so->so_family) {
159 error = EAFNOSUPPORT;
160 eprintsoline(so, error);
161 goto done;
164 * Force a zero sa_family to match so_family.
166 * Some programs like inetd(1M) don't set the
167 * family field. Other programs leave
168 * sin_family set to garbage - SunOS 4.X does
169 * not check the family field on a bind.
170 * We use the family field that
171 * was passed in to the socket() call.
173 name->sa_family = so->so_family;
174 break;
176 case AF_INET6: {
177 #ifdef DEBUG
178 sin6_t *sin6 = (sin6_t *)name;
179 #endif
180 if ((size_t)namelen != sizeof (sin6_t)) {
181 error = name->sa_family != so->so_family ?
182 EAFNOSUPPORT : EINVAL;
183 eprintsoline(so, error);
184 goto done;
187 if (name->sa_family != so->so_family) {
189 * With IPv6 we require the family to match
190 * unlike in IPv4.
192 error = EAFNOSUPPORT;
193 eprintsoline(so, error);
194 goto done;
196 #ifdef DEBUG
198 * Verify that apps don't forget to clear
199 * sin6_scope_id etc
201 if (sin6->sin6_scope_id != 0 &&
202 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
203 zcmn_err(getzoneid(), CE_WARN,
204 "bind with uninitialized sin6_scope_id "
205 "(%d) on socket. Pid = %d\n",
206 (int)sin6->sin6_scope_id,
207 (int)curproc->p_pid);
209 if (sin6->__sin6_src_id != 0) {
210 zcmn_err(getzoneid(), CE_WARN,
211 "bind with uninitialized __sin6_src_id "
212 "(%d) on socket. Pid = %d\n",
213 (int)sin6->__sin6_src_id,
214 (int)curproc->p_pid);
216 #endif /* DEBUG */
218 break;
220 default:
221 /* Just pass the request to the protocol */
222 goto dobind;
225 dobind:
226 if (so->so_filter_active == 0 ||
227 (error = sof_filter_bind(so, name, &namelen, cr)) < 0) {
228 error = (*so->so_downcalls->sd_bind)
229 (so->so_proto_handle, name, namelen, cr);
231 done:
232 SO_UNBLOCK_FALLBACK(so);
234 return (error);
238 so_listen(struct sonode *so, int backlog, struct cred *cr)
240 int error = 0;
242 ASSERT(MUTEX_NOT_HELD(&so->so_lock));
243 SO_BLOCK_FALLBACK(so, SOP_LISTEN(so, backlog, cr));
245 if ((so)->so_filter_active == 0 ||
246 (error = sof_filter_listen(so, &backlog, cr)) < 0)
247 error = (*so->so_downcalls->sd_listen)(so->so_proto_handle,
248 backlog, cr);
250 SO_UNBLOCK_FALLBACK(so);
252 return (error);
257 so_connect(struct sonode *so, struct sockaddr *name,
258 socklen_t namelen, int fflag, int flags, struct cred *cr)
260 int error = 0;
261 sock_connid_t id;
263 ASSERT(MUTEX_NOT_HELD(&so->so_lock));
264 SO_BLOCK_FALLBACK(so, SOP_CONNECT(so, name, namelen, fflag, flags, cr));
267 * If there is a pending error, return error
268 * This can happen if a non blocking operation caused an error.
271 if (so->so_error != 0) {
272 mutex_enter(&so->so_lock);
273 error = sogeterr(so, B_TRUE);
274 mutex_exit(&so->so_lock);
275 if (error != 0)
276 goto done;
279 if (so->so_filter_active == 0 ||
280 (error = sof_filter_connect(so, (struct sockaddr *)name,
281 &namelen, cr)) < 0) {
282 error = (*so->so_downcalls->sd_connect)(so->so_proto_handle,
283 name, namelen, &id, cr);
285 if (error == EINPROGRESS)
286 error = so_wait_connected(so,
287 fflag & (FNONBLOCK|FNDELAY), id);
289 done:
290 SO_UNBLOCK_FALLBACK(so);
291 return (error);
294 /*ARGSUSED*/
296 so_accept(struct sonode *so, int fflag, struct cred *cr, struct sonode **nsop)
298 int error = 0;
299 struct sonode *nso;
301 *nsop = NULL;
303 SO_BLOCK_FALLBACK(so, SOP_ACCEPT(so, fflag, cr, nsop));
304 if ((so->so_state & SS_ACCEPTCONN) == 0) {
305 SO_UNBLOCK_FALLBACK(so);
306 return ((so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) ?
307 EOPNOTSUPP : EINVAL);
310 if ((error = so_acceptq_dequeue(so, (fflag & (FNONBLOCK|FNDELAY)),
311 &nso)) == 0) {
312 ASSERT(nso != NULL);
314 /* finish the accept */
315 if ((so->so_filter_active > 0 &&
316 (error = sof_filter_accept(nso, cr)) > 0) ||
317 (error = (*so->so_downcalls->sd_accept)(so->so_proto_handle,
318 nso->so_proto_handle, (sock_upper_handle_t)nso, cr)) != 0) {
319 (void) socket_close(nso, 0, cr);
320 socket_destroy(nso);
321 } else {
322 *nsop = nso;
326 SO_UNBLOCK_FALLBACK(so);
327 return (error);
331 so_sendmsg(struct sonode *so, struct msghdr *msg, struct uio *uiop,
332 struct cred *cr)
334 int error, flags;
335 boolean_t dontblock;
336 ssize_t orig_resid;
337 mblk_t *mp;
339 SO_BLOCK_FALLBACK(so, SOP_SENDMSG(so, msg, uiop, cr));
341 flags = msg->msg_flags;
342 error = 0;
343 dontblock = (flags & MSG_DONTWAIT) ||
344 (uiop->uio_fmode & (FNONBLOCK|FNDELAY));
346 if ((so->so_mode & SM_ATOMIC) &&
347 uiop->uio_resid > so->so_proto_props.sopp_maxpsz &&
348 so->so_proto_props.sopp_maxpsz != -1) {
349 SO_UNBLOCK_FALLBACK(so);
350 return (EMSGSIZE);
354 * For atomic sends we will only do one iteration.
356 do {
357 if (so->so_state & SS_CANTSENDMORE) {
358 error = EPIPE;
359 break;
362 if (so->so_error != 0) {
363 mutex_enter(&so->so_lock);
364 error = sogeterr(so, B_TRUE);
365 mutex_exit(&so->so_lock);
366 if (error != 0)
367 break;
371 * Send down OOB messages even if the send path is being
372 * flow controlled (assuming the protocol supports OOB data).
374 if (flags & MSG_OOB) {
375 if ((so->so_mode & SM_EXDATA) == 0) {
376 error = EOPNOTSUPP;
377 break;
379 } else if (SO_SND_FLOWCTRLD(so)) {
381 * Need to wait until the protocol is ready to receive
382 * more data for transmission.
384 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0)
385 break;
389 * Time to send data to the protocol. We either copy the
390 * data into mblks or pass the uio directly to the protocol.
391 * We decide what to do based on the available down calls.
393 if (so->so_downcalls->sd_send_uio != NULL) {
394 error = (*so->so_downcalls->sd_send_uio)
395 (so->so_proto_handle, uiop, msg, cr);
396 if (error != 0)
397 break;
398 } else {
399 /* save the resid in case of failure */
400 orig_resid = uiop->uio_resid;
402 if ((mp = socopyinuio(uiop,
403 so->so_proto_props.sopp_maxpsz,
404 so->so_proto_props.sopp_wroff,
405 so->so_proto_props.sopp_maxblk,
406 so->so_proto_props.sopp_tail, &error)) == NULL) {
407 break;
409 ASSERT(uiop->uio_resid >= 0);
411 if (so->so_filter_active > 0 &&
412 ((mp = SOF_FILTER_DATA_OUT(so, mp, msg, cr,
413 &error)) == NULL)) {
414 if (error != 0)
415 break;
416 continue;
418 error = (*so->so_downcalls->sd_send)
419 (so->so_proto_handle, mp, msg, cr);
420 if (error != 0) {
422 * The send failed. We do not have to free the
423 * mblks, because that is the protocol's
424 * responsibility. However, uio_resid must
425 * remain accurate, so adjust that here.
427 uiop->uio_resid = orig_resid;
428 break;
431 } while (uiop->uio_resid > 0);
433 SO_UNBLOCK_FALLBACK(so);
435 return (error);
439 so_sendmblk_impl(struct sonode *so, struct msghdr *msg, int fflag,
440 struct cred *cr, mblk_t **mpp, sof_instance_t *fil,
441 boolean_t fil_inject)
443 int error;
444 boolean_t dontblock;
445 size_t size;
446 mblk_t *mp = *mpp;
448 if (so->so_downcalls->sd_send == NULL)
449 return (EOPNOTSUPP);
451 error = 0;
452 dontblock = (msg->msg_flags & MSG_DONTWAIT) ||
453 (fflag & (FNONBLOCK|FNDELAY));
454 size = msgdsize(mp);
456 if ((so->so_mode & SM_ATOMIC) &&
457 size > so->so_proto_props.sopp_maxpsz &&
458 so->so_proto_props.sopp_maxpsz != -1) {
459 SO_UNBLOCK_FALLBACK(so);
460 return (EMSGSIZE);
463 while (mp != NULL) {
464 mblk_t *nmp, *last_mblk;
465 size_t mlen;
467 if (so->so_state & SS_CANTSENDMORE) {
468 error = EPIPE;
469 break;
471 if (so->so_error != 0) {
472 mutex_enter(&so->so_lock);
473 error = sogeterr(so, B_TRUE);
474 mutex_exit(&so->so_lock);
475 if (error != 0)
476 break;
478 /* Socket filters are not flow controlled */
479 if (SO_SND_FLOWCTRLD(so) && !fil_inject) {
481 * Need to wait until the protocol is ready to receive
482 * more data for transmission.
484 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0)
485 break;
489 * We only allow so_maxpsz of data to be sent down to
490 * the protocol at time.
492 mlen = MBLKL(mp);
493 nmp = mp->b_cont;
494 last_mblk = mp;
495 while (nmp != NULL) {
496 mlen += MBLKL(nmp);
497 if (mlen > so->so_proto_props.sopp_maxpsz) {
498 last_mblk->b_cont = NULL;
499 break;
501 last_mblk = nmp;
502 nmp = nmp->b_cont;
505 if (so->so_filter_active > 0 &&
506 (mp = SOF_FILTER_DATA_OUT_FROM(so, fil, mp, msg,
507 cr, &error)) == NULL) {
508 *mpp = mp = nmp;
509 if (error != 0)
510 break;
511 continue;
513 error = (*so->so_downcalls->sd_send)
514 (so->so_proto_handle, mp, msg, cr);
515 if (error != 0) {
517 * The send failed. The protocol will free the mblks
518 * that were sent down. Let the caller deal with the
519 * rest.
521 *mpp = nmp;
522 break;
525 *mpp = mp = nmp;
527 /* Let the filter know whether the protocol is flow controlled */
528 if (fil_inject && error == 0 && SO_SND_FLOWCTRLD(so))
529 error = ENOSPC;
531 return (error);
534 #pragma inline(so_sendmblk_impl)
537 so_sendmblk(struct sonode *so, struct msghdr *msg, int fflag,
538 struct cred *cr, mblk_t **mpp)
540 int error;
542 SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp));
544 error = so_sendmblk_impl(so, msg, fflag, cr, mpp, so->so_filter_top,
545 B_FALSE);
547 SO_UNBLOCK_FALLBACK(so);
549 return (error);
553 so_shutdown(struct sonode *so, int how, struct cred *cr)
555 int error;
557 SO_BLOCK_FALLBACK(so, SOP_SHUTDOWN(so, how, cr));
560 * SunOS 4.X has no check for datagram sockets.
561 * 5.X checks that it is connected (ENOTCONN)
562 * X/Open requires that we check the connected state.
564 if (!(so->so_state & SS_ISCONNECTED)) {
565 if (!xnet_skip_checks) {
566 error = ENOTCONN;
567 if (xnet_check_print) {
568 printf("sockfs: X/Open shutdown check "
569 "caused ENOTCONN\n");
572 goto done;
575 if (so->so_filter_active == 0 ||
576 (error = sof_filter_shutdown(so, &how, cr)) < 0)
577 error = ((*so->so_downcalls->sd_shutdown)(so->so_proto_handle,
578 how, cr));
581 * Protocol agreed to shutdown. We need to flush the
582 * receive buffer if the receive side is being shutdown.
584 if (error == 0 && how != SHUT_WR) {
585 mutex_enter(&so->so_lock);
586 /* wait for active reader to finish */
587 (void) so_lock_read(so, 0);
589 so_rcv_flush(so);
591 so_unlock_read(so);
592 mutex_exit(&so->so_lock);
595 done:
596 SO_UNBLOCK_FALLBACK(so);
597 return (error);
601 so_getsockname(struct sonode *so, struct sockaddr *addr,
602 socklen_t *addrlen, struct cred *cr)
604 int error;
606 SO_BLOCK_FALLBACK(so, SOP_GETSOCKNAME(so, addr, addrlen, cr));
608 if (so->so_filter_active == 0 ||
609 (error = sof_filter_getsockname(so, addr, addrlen, cr)) < 0)
610 error = (*so->so_downcalls->sd_getsockname)
611 (so->so_proto_handle, addr, addrlen, cr);
613 SO_UNBLOCK_FALLBACK(so);
614 return (error);
618 so_getpeername(struct sonode *so, struct sockaddr *addr,
619 socklen_t *addrlen, boolean_t accept, struct cred *cr)
621 int error;
623 SO_BLOCK_FALLBACK(so, SOP_GETPEERNAME(so, addr, addrlen, accept, cr));
625 if (accept) {
626 error = (*so->so_downcalls->sd_getpeername)
627 (so->so_proto_handle, addr, addrlen, cr);
628 } else if (!(so->so_state & SS_ISCONNECTED)) {
629 error = ENOTCONN;
630 } else if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
631 /* Added this check for X/Open */
632 error = EINVAL;
633 if (xnet_check_print) {
634 printf("sockfs: X/Open getpeername check => EINVAL\n");
636 } else if (so->so_filter_active == 0 ||
637 (error = sof_filter_getpeername(so, addr, addrlen, cr)) < 0) {
638 error = (*so->so_downcalls->sd_getpeername)
639 (so->so_proto_handle, addr, addrlen, cr);
642 SO_UNBLOCK_FALLBACK(so);
643 return (error);
647 so_getsockopt(struct sonode *so, int level, int option_name,
648 void *optval, socklen_t *optlenp, int flags, struct cred *cr)
650 int error = 0;
652 if (level == SOL_FILTER)
653 return (sof_getsockopt(so, option_name, optval, optlenp, cr));
655 SO_BLOCK_FALLBACK(so,
656 SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr));
658 if ((so->so_filter_active == 0 ||
659 (error = sof_filter_getsockopt(so, level, option_name, optval,
660 optlenp, cr)) < 0) &&
661 (error = socket_getopt_common(so, level, option_name, optval,
662 optlenp, flags)) < 0) {
663 error = (*so->so_downcalls->sd_getsockopt)
664 (so->so_proto_handle, level, option_name, optval, optlenp,
665 cr);
666 if (error == ENOPROTOOPT) {
667 if (level == SOL_SOCKET) {
669 * If a protocol does not support a particular
670 * socket option, set can fail (not allowed)
671 * but get can not fail. This is the previous
672 * sockfs bahvior.
674 switch (option_name) {
675 case SO_LINGER:
676 if (*optlenp < (t_uscalar_t)
677 sizeof (struct linger)) {
678 error = EINVAL;
679 break;
681 error = 0;
682 bzero(optval, sizeof (struct linger));
683 *optlenp = sizeof (struct linger);
684 break;
685 case SO_RCVTIMEO:
686 case SO_SNDTIMEO:
687 if (*optlenp < (t_uscalar_t)
688 sizeof (struct timeval)) {
689 error = EINVAL;
690 break;
692 error = 0;
693 bzero(optval, sizeof (struct timeval));
694 *optlenp = sizeof (struct timeval);
695 break;
696 case SO_SND_BUFINFO:
697 if (*optlenp < (t_uscalar_t)
698 sizeof (struct so_snd_bufinfo)) {
699 error = EINVAL;
700 break;
702 error = 0;
703 bzero(optval,
704 sizeof (struct so_snd_bufinfo));
705 *optlenp =
706 sizeof (struct so_snd_bufinfo);
707 break;
708 case SO_DEBUG:
709 case SO_REUSEADDR:
710 case SO_KEEPALIVE:
711 case SO_DONTROUTE:
712 case SO_BROADCAST:
713 case SO_USELOOPBACK:
714 case SO_OOBINLINE:
715 case SO_DGRAM_ERRIND:
716 case SO_SNDBUF:
717 case SO_RCVBUF:
718 error = 0;
719 *((int32_t *)optval) = 0;
720 *optlenp = sizeof (int32_t);
721 break;
722 default:
723 break;
729 SO_UNBLOCK_FALLBACK(so);
730 return (error);
734 so_setsockopt(struct sonode *so, int level, int option_name,
735 const void *optval, socklen_t optlen, struct cred *cr)
737 int error = 0;
738 struct timeval tl;
739 const void *opt = optval;
741 if (level == SOL_FILTER)
742 return (sof_setsockopt(so, option_name, optval, optlen, cr));
744 SO_BLOCK_FALLBACK(so,
745 SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr));
747 /* X/Open requires this check */
748 if (so->so_state & SS_CANTSENDMORE && !xnet_skip_checks) {
749 SO_UNBLOCK_FALLBACK(so);
750 if (xnet_check_print)
751 printf("sockfs: X/Open setsockopt check => EINVAL\n");
752 return (EINVAL);
755 if (so->so_filter_active > 0 &&
756 (error = sof_filter_setsockopt(so, level, option_name,
757 (void *)optval, &optlen, cr)) >= 0)
758 goto done;
760 if (level == SOL_SOCKET) {
761 switch (option_name) {
762 case SO_RCVTIMEO:
763 case SO_SNDTIMEO: {
765 * We pass down these two options to protocol in order
766 * to support some third part protocols which need to
767 * know them. For those protocols which don't care
768 * these two options, simply return 0.
770 clock_t t_usec;
772 if (get_udatamodel() == DATAMODEL_NONE ||
773 get_udatamodel() == DATAMODEL_NATIVE) {
774 if (optlen != sizeof (struct timeval)) {
775 error = EINVAL;
776 goto done;
778 bcopy((struct timeval *)optval, &tl,
779 sizeof (struct timeval));
780 } else {
781 if (optlen != sizeof (struct timeval32)) {
782 error = EINVAL;
783 goto done;
785 TIMEVAL32_TO_TIMEVAL(&tl,
786 (struct timeval32 *)optval);
788 opt = &tl;
789 optlen = sizeof (tl);
790 t_usec = tl.tv_sec * 1000 * 1000 + tl.tv_usec;
791 mutex_enter(&so->so_lock);
792 if (option_name == SO_RCVTIMEO)
793 so->so_rcvtimeo = drv_usectohz(t_usec);
794 else
795 so->so_sndtimeo = drv_usectohz(t_usec);
796 mutex_exit(&so->so_lock);
797 break;
801 error = (*so->so_downcalls->sd_setsockopt)
802 (so->so_proto_handle, level, option_name, opt, optlen, cr);
803 done:
804 SO_UNBLOCK_FALLBACK(so);
805 return (error);
809 so_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
810 struct cred *cr, int32_t *rvalp)
812 int error = 0;
814 SO_BLOCK_FALLBACK(so, SOP_IOCTL(so, cmd, arg, mode, cr, rvalp));
817 * If there is a pending error, return error
818 * This can happen if a non blocking operation caused an error.
820 if (so->so_error != 0) {
821 mutex_enter(&so->so_lock);
822 error = sogeterr(so, B_TRUE);
823 mutex_exit(&so->so_lock);
824 if (error != 0)
825 goto done;
829 * calling strioc can result in the socket falling back to TPI,
830 * if that is supported.
832 if ((so->so_filter_active == 0 ||
833 (error = sof_filter_ioctl(so, cmd, arg, mode,
834 rvalp, cr)) < 0) &&
835 (error = socket_ioctl_common(so, cmd, arg, mode, cr, rvalp)) < 0 &&
836 (error = socket_strioc_common(so, cmd, arg, mode, cr, rvalp)) < 0) {
837 error = (*so->so_downcalls->sd_ioctl)(so->so_proto_handle,
838 cmd, arg, mode, rvalp, cr);
841 done:
842 SO_UNBLOCK_FALLBACK(so);
844 return (error);
848 so_poll(struct sonode *so, short events, int anyyet, short *reventsp,
849 struct pollhead **phpp)
851 int state = so->so_state, mask;
852 *reventsp = 0;
855 * In sockets the errors are represented as input/output events
857 if (so->so_error != 0 &&
858 ((POLLIN|POLLRDNORM|POLLOUT) & events) != 0) {
859 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & events;
860 return (0);
864 * If the socket is in a state where it can send data
865 * turn on POLLWRBAND and POLLOUT events.
867 if ((so->so_mode & SM_CONNREQUIRED) == 0 || (state & SS_ISCONNECTED)) {
869 * out of band data is allowed even if the connection
870 * is flow controlled
872 *reventsp |= POLLWRBAND & events;
873 if (!SO_SND_FLOWCTRLD(so)) {
875 * As long as there is buffer to send data
876 * turn on POLLOUT events
878 *reventsp |= POLLOUT & events;
883 * Turn on POLLIN whenever there is data on the receive queue,
884 * or the socket is in a state where no more data will be received.
885 * Also, if the socket is accepting connections, flip the bit if
886 * there is something on the queue.
888 * We do an initial check for events without holding locks. However,
889 * if there are no event available, then we redo the check for POLLIN
890 * events under the lock.
893 /* Pending connections */
894 if (!list_is_empty(&so->so_acceptq_list))
895 *reventsp |= (POLLIN|POLLRDNORM) & events;
898 * If we're looking for POLLRDHUP, indicate it if we have sent the
899 * last rx signal for the socket.
901 if ((events & POLLRDHUP) && (state & SS_SENTLASTREADSIG))
902 *reventsp |= POLLRDHUP;
904 /* Data */
905 /* so_downcalls is null for sctp */
906 if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) {
907 *reventsp |= (*so->so_downcalls->sd_poll)
908 (so->so_proto_handle, events & SO_PROTO_POLLEV, anyyet,
909 CRED()) & events;
910 ASSERT((*reventsp & ~events) == 0);
911 /* do not recheck events */
912 events &= ~SO_PROTO_POLLEV;
913 } else {
914 if (SO_HAVE_DATA(so))
915 *reventsp |= (POLLIN|POLLRDNORM) & events;
917 /* Urgent data */
918 if ((state & SS_OOBPEND) != 0) {
919 *reventsp |= (POLLRDBAND | POLLPRI) & events;
923 * If the socket has become disconnected, we set POLLHUP.
924 * Note that if we are in this state, we will have set POLLIN
925 * (SO_HAVE_DATA() is true on a disconnected socket), but not
926 * POLLOUT (SS_ISCONNECTED is false). This is in keeping with
927 * the semantics of POLLHUP, which is defined to be mutually
928 * exclusive with respect to POLLOUT but not POLLIN. We are
929 * therefore setting POLLHUP primarily for the benefit of
930 * those not polling on POLLIN, as they have no other way of
931 * knowing that the socket has been disconnected.
933 mask = SS_SENTLASTREADSIG | SS_SENTLASTWRITESIG;
935 if ((state & (mask | SS_ISCONNECTED)) == mask)
936 *reventsp |= POLLHUP;
939 if ((!*reventsp && !anyyet) || (events & POLLET)) {
940 /* Check for read events again, but this time under lock */
941 if (events & (POLLIN|POLLRDNORM)) {
942 mutex_enter(&so->so_lock);
943 if (SO_HAVE_DATA(so) ||
944 !list_is_empty(&so->so_acceptq_list)) {
945 if (events & POLLET) {
946 so->so_pollev |= SO_POLLEV_IN;
947 *phpp = &so->so_poll_list;
950 mutex_exit(&so->so_lock);
951 *reventsp |= (POLLIN|POLLRDNORM) & events;
953 return (0);
954 } else {
955 so->so_pollev |= SO_POLLEV_IN;
956 mutex_exit(&so->so_lock);
959 *phpp = &so->so_poll_list;
961 return (0);
965 * Generic Upcalls
967 void
968 so_connected(sock_upper_handle_t sock_handle, sock_connid_t id,
969 cred_t *peer_cred, pid_t peer_cpid)
971 struct sonode *so = (struct sonode *)sock_handle;
973 mutex_enter(&so->so_lock);
974 ASSERT(so->so_proto_handle != NULL);
976 if (peer_cred != NULL) {
977 if (so->so_peercred != NULL)
978 crfree(so->so_peercred);
979 crhold(peer_cred);
980 so->so_peercred = peer_cred;
981 so->so_cpid = peer_cpid;
984 so->so_proto_connid = id;
985 soisconnected(so);
987 * Wake ones who're waiting for conn to become established.
989 so_notify_connected(so);
993 so_disconnected(sock_upper_handle_t sock_handle, sock_connid_t id, int error)
995 struct sonode *so = (struct sonode *)sock_handle;
996 boolean_t connect_failed;
998 mutex_enter(&so->so_lock);
1001 * If we aren't currently connected, then this isn't a disconnect but
1002 * rather a failure to connect.
1004 connect_failed = !(so->so_state & SS_ISCONNECTED);
1006 so->so_proto_connid = id;
1007 soisdisconnected(so, error);
1008 so_notify_disconnected(so, connect_failed, error);
1010 return (0);
1013 void
1014 so_opctl(sock_upper_handle_t sock_handle, sock_opctl_action_t action,
1015 uintptr_t arg)
1017 struct sonode *so = (struct sonode *)sock_handle;
1019 switch (action) {
1020 case SOCK_OPCTL_SHUT_SEND:
1021 mutex_enter(&so->so_lock);
1022 socantsendmore(so);
1023 so_notify_disconnecting(so);
1024 break;
1025 case SOCK_OPCTL_SHUT_RECV: {
1026 mutex_enter(&so->so_lock);
1027 socantrcvmore(so);
1028 so_notify_eof(so);
1029 break;
1031 case SOCK_OPCTL_ENAB_ACCEPT:
1032 mutex_enter(&so->so_lock);
1033 so->so_state |= SS_ACCEPTCONN;
1034 so->so_backlog = (unsigned int)arg;
1036 * The protocol can stop generating newconn upcalls when
1037 * the backlog is full, so to make sure the listener does
1038 * not end up with a queue full of deferred connections
1039 * we reduce the backlog by one. Thus the listener will
1040 * start closing deferred connections before the backlog
1041 * is full.
1043 if (so->so_filter_active > 0)
1044 so->so_backlog = MAX(1, so->so_backlog - 1);
1045 mutex_exit(&so->so_lock);
1046 break;
1047 default:
1048 ASSERT(0);
1049 break;
1053 void
1054 so_txq_full(sock_upper_handle_t sock_handle, boolean_t qfull)
1056 struct sonode *so = (struct sonode *)sock_handle;
1058 if (qfull) {
1059 so_snd_qfull(so);
1060 } else {
1061 so_snd_qnotfull(so);
1062 mutex_enter(&so->so_lock);
1063 /* so_notify_writable drops so_lock */
1064 so_notify_writable(so);
1068 sock_upper_handle_t
1069 so_newconn(sock_upper_handle_t parenthandle,
1070 sock_lower_handle_t proto_handle, sock_downcalls_t *sock_downcalls,
1071 struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **sock_upcallsp)
1073 struct sonode *so = (struct sonode *)parenthandle;
1074 struct sonode *nso;
1075 int error;
1077 ASSERT(proto_handle != NULL);
1079 if ((so->so_state & SS_ACCEPTCONN) == 0 ||
1080 (so->so_acceptq_len >= so->so_backlog &&
1081 (so->so_filter_active == 0 || !sof_sonode_drop_deferred(so)))) {
1082 return (NULL);
1085 nso = socket_newconn(so, proto_handle, sock_downcalls, SOCKET_NOSLEEP,
1086 &error);
1087 if (nso == NULL)
1088 return (NULL);
1090 if (peer_cred != NULL) {
1091 crhold(peer_cred);
1092 nso->so_peercred = peer_cred;
1093 nso->so_cpid = peer_cpid;
1095 nso->so_listener = so;
1098 * The new socket (nso), proto_handle and sock_upcallsp are all
1099 * valid at this point. But as soon as nso is placed in the accept
1100 * queue that can no longer be assumed (since an accept() thread may
1101 * pull it off the queue and close the socket).
1103 *sock_upcallsp = &so_upcalls;
1105 mutex_enter(&so->so_acceptq_lock);
1106 if (so->so_state & (SS_CLOSING|SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) {
1107 mutex_exit(&so->so_acceptq_lock);
1108 ASSERT(nso->so_count == 1);
1109 nso->so_count--;
1110 nso->so_listener = NULL;
1111 /* drop proto ref */
1112 VN_RELE(SOTOV(nso));
1113 socket_destroy(nso);
1114 return (NULL);
1115 } else {
1116 so->so_acceptq_len++;
1117 if (nso->so_state & SS_FIL_DEFER) {
1118 list_insert_tail(&so->so_acceptq_defer, nso);
1119 mutex_exit(&so->so_acceptq_lock);
1120 } else {
1121 list_insert_tail(&so->so_acceptq_list, nso);
1122 cv_signal(&so->so_acceptq_cv);
1123 mutex_exit(&so->so_acceptq_lock);
1124 mutex_enter(&so->so_lock);
1125 so_notify_newconn(so);
1128 return ((sock_upper_handle_t)nso);
1132 void
1133 so_set_prop(sock_upper_handle_t sock_handle, struct sock_proto_props *soppp)
1135 struct sonode *so;
1137 so = (struct sonode *)sock_handle;
1139 mutex_enter(&so->so_lock);
1141 if (soppp->sopp_flags & SOCKOPT_MAXBLK)
1142 so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk;
1143 if (soppp->sopp_flags & SOCKOPT_WROFF)
1144 so->so_proto_props.sopp_wroff = soppp->sopp_wroff;
1145 if (soppp->sopp_flags & SOCKOPT_TAIL)
1146 so->so_proto_props.sopp_tail = soppp->sopp_tail;
1147 if (soppp->sopp_flags & SOCKOPT_RCVHIWAT)
1148 so->so_proto_props.sopp_rxhiwat = soppp->sopp_rxhiwat;
1149 if (soppp->sopp_flags & SOCKOPT_RCVLOWAT)
1150 so->so_proto_props.sopp_rxlowat = soppp->sopp_rxlowat;
1151 if (soppp->sopp_flags & SOCKOPT_MAXPSZ)
1152 so->so_proto_props.sopp_maxpsz = soppp->sopp_maxpsz;
1153 if (soppp->sopp_flags & SOCKOPT_MINPSZ)
1154 so->so_proto_props.sopp_minpsz = soppp->sopp_minpsz;
1155 if (soppp->sopp_flags & SOCKOPT_ZCOPY) {
1156 if (soppp->sopp_zcopyflag & ZCVMSAFE) {
1157 so->so_proto_props.sopp_zcopyflag |= STZCVMSAFE;
1158 so->so_proto_props.sopp_zcopyflag &= ~STZCVMUNSAFE;
1159 } else if (soppp->sopp_zcopyflag & ZCVMUNSAFE) {
1160 so->so_proto_props.sopp_zcopyflag |= STZCVMUNSAFE;
1161 so->so_proto_props.sopp_zcopyflag &= ~STZCVMSAFE;
1164 if (soppp->sopp_zcopyflag & COPYCACHED) {
1165 so->so_proto_props.sopp_zcopyflag |= STRCOPYCACHED;
1168 if (soppp->sopp_flags & SOCKOPT_OOBINLINE)
1169 so->so_proto_props.sopp_oobinline = soppp->sopp_oobinline;
1170 if (soppp->sopp_flags & SOCKOPT_RCVTIMER)
1171 so->so_proto_props.sopp_rcvtimer = soppp->sopp_rcvtimer;
1172 if (soppp->sopp_flags & SOCKOPT_RCVTHRESH)
1173 so->so_proto_props.sopp_rcvthresh = soppp->sopp_rcvthresh;
1174 if (soppp->sopp_flags & SOCKOPT_MAXADDRLEN)
1175 so->so_proto_props.sopp_maxaddrlen = soppp->sopp_maxaddrlen;
1176 if (soppp->sopp_flags & SOCKOPT_LOOPBACK)
1177 so->so_proto_props.sopp_loopback = soppp->sopp_loopback;
1179 mutex_exit(&so->so_lock);
1181 if (so->so_filter_active > 0) {
1182 sof_instance_t *inst;
1183 ssize_t maxblk;
1184 ushort_t wroff, tail;
1185 maxblk = so->so_proto_props.sopp_maxblk;
1186 wroff = so->so_proto_props.sopp_wroff;
1187 tail = so->so_proto_props.sopp_tail;
1188 for (inst = so->so_filter_bottom; inst != NULL;
1189 inst = inst->sofi_prev) {
1190 if (SOF_INTERESTED(inst, mblk_prop)) {
1191 (*inst->sofi_ops->sofop_mblk_prop)(
1192 (sof_handle_t)inst, inst->sofi_cookie,
1193 &maxblk, &wroff, &tail);
1196 mutex_enter(&so->so_lock);
1197 so->so_proto_props.sopp_maxblk = maxblk;
1198 so->so_proto_props.sopp_wroff = wroff;
1199 so->so_proto_props.sopp_tail = tail;
1200 mutex_exit(&so->so_lock);
1202 #ifdef DEBUG
1203 soppp->sopp_flags &= ~(SOCKOPT_MAXBLK | SOCKOPT_WROFF | SOCKOPT_TAIL |
1204 SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | SOCKOPT_MAXPSZ |
1205 SOCKOPT_ZCOPY | SOCKOPT_OOBINLINE | SOCKOPT_RCVTIMER |
1206 SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ |
1207 SOCKOPT_LOOPBACK);
1208 ASSERT(soppp->sopp_flags == 0);
1209 #endif
1212 /* ARGSUSED */
1213 ssize_t
1214 so_queue_msg_impl(struct sonode *so, mblk_t *mp,
1215 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp,
1216 sof_instance_t *filter)
1218 boolean_t force_push = B_TRUE;
1219 int space_left;
1220 sodirect_t *sodp = so->so_direct;
1222 ASSERT(errorp != NULL);
1223 *errorp = 0;
1224 if (mp == NULL) {
1225 if (so->so_downcalls->sd_recv_uio != NULL) {
1226 mutex_enter(&so->so_lock);
1227 /* the notify functions will drop the lock */
1228 if (flags & MSG_OOB)
1229 so_notify_oobdata(so, IS_SO_OOB_INLINE(so));
1230 else
1231 so_notify_data(so, msg_size);
1232 return (0);
1234 ASSERT(msg_size == 0);
1235 mutex_enter(&so->so_lock);
1236 goto space_check;
1239 ASSERT(mp->b_next == NULL);
1240 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO);
1241 ASSERT(msg_size == msgdsize(mp));
1243 if (DB_TYPE(mp) == M_PROTO && !__TPI_PRIM_ISALIGNED(mp->b_rptr)) {
1244 /* The read pointer is not aligned correctly for TPI */
1245 zcmn_err(getzoneid(), CE_WARN,
1246 "sockfs: Unaligned TPI message received. rptr = %p\n",
1247 (void *)mp->b_rptr);
1248 freemsg(mp);
1249 mutex_enter(&so->so_lock);
1250 if (sodp != NULL)
1251 SOD_UIOAFINI(sodp);
1252 goto space_check;
1255 if (so->so_filter_active > 0) {
1256 for (; filter != NULL; filter = filter->sofi_prev) {
1257 if (!SOF_INTERESTED(filter, data_in))
1258 continue;
1259 mp = (*filter->sofi_ops->sofop_data_in)(
1260 (sof_handle_t)filter, filter->sofi_cookie, mp,
1261 flags, &msg_size);
1262 ASSERT(msgdsize(mp) == msg_size);
1263 DTRACE_PROBE2(filter__data, (sof_instance_t), filter,
1264 (mblk_t *), mp);
1265 /* Data was consumed/dropped, just do space check */
1266 if (msg_size == 0) {
1267 mutex_enter(&so->so_lock);
1268 goto space_check;
1273 if (flags & MSG_OOB) {
1274 so_queue_oob(so, mp, msg_size);
1275 mutex_enter(&so->so_lock);
1276 goto space_check;
1279 if (force_pushp != NULL)
1280 force_push = *force_pushp;
1282 mutex_enter(&so->so_lock);
1283 if (so->so_state & (SS_FALLBACK_DRAIN | SS_FALLBACK_COMP)) {
1284 if (sodp != NULL)
1285 SOD_DISABLE(sodp);
1286 mutex_exit(&so->so_lock);
1287 *errorp = EOPNOTSUPP;
1288 return (-1);
1290 if (so->so_state & (SS_CANTRCVMORE | SS_CLOSING)) {
1291 freemsg(mp);
1292 if (sodp != NULL)
1293 SOD_DISABLE(sodp);
1294 mutex_exit(&so->so_lock);
1295 return (0);
1298 /* process the mblk via I/OAT if capable */
1299 if (sodp != NULL && sodp->sod_enabled) {
1300 if (DB_TYPE(mp) == M_DATA) {
1301 sod_uioa_mblk_init(sodp, mp, msg_size);
1302 } else {
1303 SOD_UIOAFINI(sodp);
1307 if (mp->b_next == NULL) {
1308 so_enqueue_msg(so, mp, msg_size);
1309 } else {
1310 do {
1311 mblk_t *nmp;
1313 if ((nmp = mp->b_next) != NULL) {
1314 mp->b_next = NULL;
1316 so_enqueue_msg(so, mp, msgdsize(mp));
1317 mp = nmp;
1318 } while (mp != NULL);
1321 space_left = so->so_rcvbuf - so->so_rcv_queued;
1322 if (space_left <= 0) {
1323 so->so_flowctrld = B_TRUE;
1324 *errorp = ENOSPC;
1325 space_left = -1;
1328 if (force_push || so->so_rcv_queued >= so->so_rcv_thresh ||
1329 so->so_rcv_queued >= so->so_rcv_wanted) {
1330 SOCKET_TIMER_CANCEL(so);
1332 * so_notify_data will release the lock
1334 so_notify_data(so, so->so_rcv_queued);
1336 if (force_pushp != NULL)
1337 *force_pushp = B_TRUE;
1338 goto done;
1339 } else if (so->so_rcv_timer_tid == 0) {
1340 /* Make sure the recv push timer is running */
1341 SOCKET_TIMER_START(so);
1344 done_unlock:
1345 mutex_exit(&so->so_lock);
1346 done:
1347 return (space_left);
1349 space_check:
1350 space_left = so->so_rcvbuf - so->so_rcv_queued;
1351 if (space_left <= 0) {
1352 so->so_flowctrld = B_TRUE;
1353 *errorp = ENOSPC;
1354 space_left = -1;
1356 goto done_unlock;
1359 #pragma inline(so_queue_msg_impl)
1361 ssize_t
1362 so_queue_msg(sock_upper_handle_t sock_handle, mblk_t *mp,
1363 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp)
1365 struct sonode *so = (struct sonode *)sock_handle;
1367 return (so_queue_msg_impl(so, mp, msg_size, flags, errorp, force_pushp,
1368 so->so_filter_bottom));
1372 * Set the offset of where the oob data is relative to the bytes in
1373 * queued. Also generate SIGURG
1375 void
1376 so_signal_oob(sock_upper_handle_t sock_handle, ssize_t offset)
1378 struct sonode *so;
1380 ASSERT(offset >= 0);
1381 so = (struct sonode *)sock_handle;
1382 mutex_enter(&so->so_lock);
1383 if (so->so_direct != NULL)
1384 SOD_UIOAFINI(so->so_direct);
1387 * New urgent data on the way so forget about any old
1388 * urgent data.
1390 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA);
1393 * Record that urgent data is pending.
1395 so->so_state |= SS_OOBPEND;
1397 if (so->so_oobmsg != NULL) {
1398 dprintso(so, 1, ("sock: discarding old oob\n"));
1399 freemsg(so->so_oobmsg);
1400 so->so_oobmsg = NULL;
1404 * set the offset where the urgent byte is
1406 so->so_oobmark = so->so_rcv_queued + offset;
1407 if (so->so_oobmark == 0)
1408 so->so_state |= SS_RCVATMARK;
1409 else
1410 so->so_state &= ~SS_RCVATMARK;
1412 so_notify_oobsig(so);
1416 * Queue the OOB byte
1418 static void
1419 so_queue_oob(struct sonode *so, mblk_t *mp, size_t len)
1421 mutex_enter(&so->so_lock);
1422 if (so->so_direct != NULL)
1423 SOD_UIOAFINI(so->so_direct);
1425 ASSERT(mp != NULL);
1426 if (!IS_SO_OOB_INLINE(so)) {
1427 so->so_oobmsg = mp;
1428 so->so_state |= SS_HAVEOOBDATA;
1429 } else {
1430 so_enqueue_msg(so, mp, len);
1433 so_notify_oobdata(so, IS_SO_OOB_INLINE(so));
1437 so_close(struct sonode *so, int flag, struct cred *cr)
1439 int error;
1442 * No new data will be enqueued once the CLOSING flag is set.
1444 mutex_enter(&so->so_lock);
1445 so->so_state |= SS_CLOSING;
1446 ASSERT(so_verify_oobstate(so));
1447 so_rcv_flush(so);
1448 mutex_exit(&so->so_lock);
1450 if (so->so_filter_active > 0)
1451 sof_sonode_closing(so);
1453 if (so->so_state & SS_ACCEPTCONN) {
1455 * We grab and release the accept lock to ensure that any
1456 * thread about to insert a socket in so_newconn completes
1457 * before we flush the queue. Any thread calling so_newconn
1458 * after we drop the lock will observe the SS_CLOSING flag,
1459 * which will stop it from inserting the socket in the queue.
1461 mutex_enter(&so->so_acceptq_lock);
1462 mutex_exit(&so->so_acceptq_lock);
1464 so_acceptq_flush(so, B_TRUE);
1467 error = (*so->so_downcalls->sd_close)(so->so_proto_handle, flag, cr);
1468 switch (error) {
1469 default:
1470 /* Protocol made a synchronous close; remove proto ref */
1471 VN_RELE(SOTOV(so));
1472 break;
1473 case EINPROGRESS:
1475 * Protocol is in the process of closing, it will make a
1476 * 'closed' upcall to remove the reference.
1478 error = 0;
1479 break;
1482 return (error);
1486 * Upcall made by the protocol when it's doing an asynchronous close. It
1487 * will drop the protocol's reference on the socket.
1489 void
1490 so_closed(sock_upper_handle_t sock_handle)
1492 struct sonode *so = (struct sonode *)sock_handle;
1494 VN_RELE(SOTOV(so));
1497 void
1498 so_zcopy_notify(sock_upper_handle_t sock_handle)
1500 struct sonode *so = (struct sonode *)sock_handle;
1502 mutex_enter(&so->so_lock);
1503 so->so_copyflag |= STZCNOTIFY;
1504 cv_broadcast(&so->so_copy_cv);
1505 mutex_exit(&so->so_lock);
1508 void
1509 so_set_error(sock_upper_handle_t sock_handle, int error)
1511 struct sonode *so = (struct sonode *)sock_handle;
1513 mutex_enter(&so->so_lock);
1515 soseterror(so, error);
1517 so_notify_error(so);
1521 * so_recvmsg - read data from the socket
1523 * There are two ways of obtaining data; either we ask the protocol to
1524 * copy directly into the supplied buffer, or we copy data from the
1525 * sonode's receive queue. The decision which one to use depends on
1526 * whether the protocol has a sd_recv_uio down call.
1529 so_recvmsg(struct sonode *so, struct msghdr *msg, struct uio *uiop,
1530 struct cred *cr)
1532 rval_t rval;
1533 int flags = 0;
1534 t_uscalar_t controllen, namelen;
1535 int error = 0;
1536 int ret;
1537 mblk_t *mctlp = NULL;
1538 union T_primitives *tpr;
1539 void *control;
1540 ssize_t saved_resid;
1541 struct uio *suiop;
1543 SO_BLOCK_FALLBACK(so, SOP_RECVMSG(so, msg, uiop, cr));
1545 if ((so->so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 &&
1546 (so->so_mode & SM_CONNREQUIRED)) {
1547 SO_UNBLOCK_FALLBACK(so);
1548 return (ENOTCONN);
1551 if (msg->msg_flags & MSG_PEEK)
1552 msg->msg_flags &= ~MSG_WAITALL;
1554 if (so->so_mode & SM_ATOMIC)
1555 msg->msg_flags |= MSG_TRUNC;
1557 if (msg->msg_flags & MSG_OOB) {
1558 if ((so->so_mode & SM_EXDATA) == 0) {
1559 error = EOPNOTSUPP;
1560 } else if (so->so_downcalls->sd_recv_uio != NULL) {
1561 error = (*so->so_downcalls->sd_recv_uio)
1562 (so->so_proto_handle, uiop, msg, cr);
1563 } else {
1564 error = sorecvoob(so, msg, uiop, msg->msg_flags,
1565 IS_SO_OOB_INLINE(so));
1567 SO_UNBLOCK_FALLBACK(so);
1568 return (error);
1572 * If the protocol has the recv down call, then pass the request
1573 * down.
1575 if (so->so_downcalls->sd_recv_uio != NULL) {
1576 error = (*so->so_downcalls->sd_recv_uio)
1577 (so->so_proto_handle, uiop, msg, cr);
1578 SO_UNBLOCK_FALLBACK(so);
1579 return (error);
1583 * Reading data from the socket buffer
1585 flags = msg->msg_flags;
1586 msg->msg_flags = 0;
1589 * Set msg_controllen and msg_namelen to zero here to make it
1590 * simpler in the cases that no control or name is returned.
1592 controllen = msg->msg_controllen;
1593 namelen = msg->msg_namelen;
1594 msg->msg_controllen = 0;
1595 msg->msg_namelen = 0;
1597 mutex_enter(&so->so_lock);
1598 /* Set SOREADLOCKED */
1599 error = so_lock_read_intr(so,
1600 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0));
1601 mutex_exit(&so->so_lock);
1602 if (error) {
1603 SO_UNBLOCK_FALLBACK(so);
1604 return (error);
1607 suiop = sod_rcv_init(so, flags, &uiop);
1608 retry:
1609 saved_resid = uiop->uio_resid;
1610 error = so_dequeue_msg(so, &mctlp, uiop, &rval, flags);
1611 if (error != 0) {
1612 goto out;
1615 * For datagrams the MOREDATA flag is used to set MSG_TRUNC.
1616 * For non-datagrams MOREDATA is used to set MSG_EOR.
1618 ASSERT(!(rval.r_val1 & MORECTL));
1619 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC))
1620 msg->msg_flags |= MSG_TRUNC;
1621 if (mctlp == NULL) {
1622 dprintso(so, 1, ("so_recvmsg: got M_DATA\n"));
1624 mutex_enter(&so->so_lock);
1625 /* Set MSG_EOR based on MOREDATA */
1626 if (!(rval.r_val1 & MOREDATA)) {
1627 if (so->so_state & SS_SAVEDEOR) {
1628 msg->msg_flags |= MSG_EOR;
1629 so->so_state &= ~SS_SAVEDEOR;
1633 * If some data was received (i.e. not EOF) and the
1634 * read/recv* has not been satisfied wait for some more.
1636 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1637 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1638 mutex_exit(&so->so_lock);
1639 flags |= MSG_NOMARK;
1640 goto retry;
1643 goto out_locked;
1645 /* so_queue_msg has already verified length and alignment */
1646 tpr = (union T_primitives *)mctlp->b_rptr;
1647 dprintso(so, 1, ("so_recvmsg: type %d\n", tpr->type));
1648 switch (tpr->type) {
1649 case T_DATA_IND: {
1651 * Set msg_flags to MSG_EOR based on
1652 * MORE_flag and MOREDATA.
1654 mutex_enter(&so->so_lock);
1655 so->so_state &= ~SS_SAVEDEOR;
1656 if (!(tpr->data_ind.MORE_flag & 1)) {
1657 if (!(rval.r_val1 & MOREDATA))
1658 msg->msg_flags |= MSG_EOR;
1659 else
1660 so->so_state |= SS_SAVEDEOR;
1662 freemsg(mctlp);
1664 * If some data was received (i.e. not EOF) and the
1665 * read/recv* has not been satisfied wait for some more.
1667 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1668 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1669 mutex_exit(&so->so_lock);
1670 flags |= MSG_NOMARK;
1671 goto retry;
1673 goto out_locked;
1675 case T_UNITDATA_IND: {
1676 void *addr;
1677 t_uscalar_t addrlen;
1678 void *abuf;
1679 t_uscalar_t optlen;
1680 void *opt;
1682 if (namelen != 0) {
1683 /* Caller wants source address */
1684 addrlen = tpr->unitdata_ind.SRC_length;
1685 addr = sogetoff(mctlp, tpr->unitdata_ind.SRC_offset,
1686 addrlen, 1);
1687 if (addr == NULL) {
1688 freemsg(mctlp);
1689 error = EPROTO;
1690 eprintsoline(so, error);
1691 goto out;
1693 ASSERT(so->so_family != AF_UNIX);
1695 optlen = tpr->unitdata_ind.OPT_length;
1696 if (optlen != 0) {
1697 t_uscalar_t ncontrollen;
1700 * Extract any source address option.
1701 * Determine how large cmsg buffer is needed.
1703 opt = sogetoff(mctlp, tpr->unitdata_ind.OPT_offset,
1704 optlen, __TPI_ALIGN_SIZE);
1706 if (opt == NULL) {
1707 freemsg(mctlp);
1708 error = EPROTO;
1709 eprintsoline(so, error);
1710 goto out;
1712 if (so->so_family == AF_UNIX)
1713 so_getopt_srcaddr(opt, optlen, &addr, &addrlen);
1714 ncontrollen = so_cmsglen(mctlp, opt, optlen);
1715 if (controllen != 0)
1716 controllen = ncontrollen;
1717 else if (ncontrollen != 0)
1718 msg->msg_flags |= MSG_CTRUNC;
1719 } else {
1720 controllen = 0;
1723 if (namelen != 0) {
1725 * Return address to caller.
1726 * Caller handles truncation if length
1727 * exceeds msg_namelen.
1728 * NOTE: AF_UNIX NUL termination is ensured by
1729 * the sender's copyin_name().
1731 abuf = kmem_alloc(addrlen, KM_SLEEP);
1733 bcopy(addr, abuf, addrlen);
1734 msg->msg_name = abuf;
1735 msg->msg_namelen = addrlen;
1738 if (controllen != 0) {
1740 * Return control msg to caller.
1741 * Caller handles truncation if length
1742 * exceeds msg_controllen.
1744 control = kmem_zalloc(controllen, KM_SLEEP);
1746 error = so_opt2cmsg(mctlp, opt, optlen, control,
1747 controllen);
1748 if (error) {
1749 freemsg(mctlp);
1750 if (msg->msg_namelen != 0)
1751 kmem_free(msg->msg_name,
1752 msg->msg_namelen);
1753 kmem_free(control, controllen);
1754 eprintsoline(so, error);
1755 goto out;
1757 msg->msg_control = control;
1758 msg->msg_controllen = controllen;
1761 freemsg(mctlp);
1762 goto out;
1764 case T_OPTDATA_IND: {
1765 struct T_optdata_req *tdr;
1766 void *opt;
1767 t_uscalar_t optlen;
1769 tdr = (struct T_optdata_req *)mctlp->b_rptr;
1770 optlen = tdr->OPT_length;
1771 if (optlen != 0) {
1772 t_uscalar_t ncontrollen;
1774 * Determine how large cmsg buffer is needed.
1776 opt = sogetoff(mctlp,
1777 tpr->optdata_ind.OPT_offset, optlen,
1778 __TPI_ALIGN_SIZE);
1780 if (opt == NULL) {
1781 freemsg(mctlp);
1782 error = EPROTO;
1783 eprintsoline(so, error);
1784 goto out;
1787 ncontrollen = so_cmsglen(mctlp, opt, optlen);
1788 if (controllen != 0)
1789 controllen = ncontrollen;
1790 else if (ncontrollen != 0)
1791 msg->msg_flags |= MSG_CTRUNC;
1792 } else {
1793 controllen = 0;
1796 if (controllen != 0) {
1798 * Return control msg to caller.
1799 * Caller handles truncation if length
1800 * exceeds msg_controllen.
1802 control = kmem_zalloc(controllen, KM_SLEEP);
1804 error = so_opt2cmsg(mctlp, opt, optlen, control,
1805 controllen);
1806 if (error) {
1807 freemsg(mctlp);
1808 kmem_free(control, controllen);
1809 eprintsoline(so, error);
1810 goto out;
1812 msg->msg_control = control;
1813 msg->msg_controllen = controllen;
1817 * Set msg_flags to MSG_EOR based on
1818 * DATA_flag and MOREDATA.
1820 mutex_enter(&so->so_lock);
1821 so->so_state &= ~SS_SAVEDEOR;
1822 if (!(tpr->data_ind.MORE_flag & 1)) {
1823 if (!(rval.r_val1 & MOREDATA))
1824 msg->msg_flags |= MSG_EOR;
1825 else
1826 so->so_state |= SS_SAVEDEOR;
1828 freemsg(mctlp);
1830 * If some data was received (i.e. not EOF) and the
1831 * read/recv* has not been satisfied wait for some more.
1832 * Not possible to wait if control info was received.
1834 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1835 controllen == 0 &&
1836 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1837 mutex_exit(&so->so_lock);
1838 flags |= MSG_NOMARK;
1839 goto retry;
1841 goto out_locked;
1843 default:
1844 cmn_err(CE_CONT, "so_recvmsg bad type %x \n",
1845 tpr->type);
1846 freemsg(mctlp);
1847 error = EPROTO;
1848 ASSERT(0);
1850 out:
1851 mutex_enter(&so->so_lock);
1852 out_locked:
1853 ret = sod_rcv_done(so, suiop, uiop);
1854 if (ret != 0 && error == 0)
1855 error = ret;
1857 so_unlock_read(so); /* Clear SOREADLOCKED */
1858 mutex_exit(&so->so_lock);
1860 SO_UNBLOCK_FALLBACK(so);
1862 return (error);
1865 sonodeops_t so_sonodeops = {
1866 so_init, /* sop_init */
1867 so_accept, /* sop_accept */
1868 so_bind, /* sop_bind */
1869 so_listen, /* sop_listen */
1870 so_connect, /* sop_connect */
1871 so_recvmsg, /* sop_recvmsg */
1872 so_sendmsg, /* sop_sendmsg */
1873 so_sendmblk, /* sop_sendmblk */
1874 so_getpeername, /* sop_getpeername */
1875 so_getsockname, /* sop_getsockname */
1876 so_shutdown, /* sop_shutdown */
1877 so_getsockopt, /* sop_getsockopt */
1878 so_setsockopt, /* sop_setsockopt */
1879 so_ioctl, /* sop_ioctl */
1880 so_poll, /* sop_poll */
1881 so_close, /* sop_close */
1884 sock_upcalls_t so_upcalls = {
1885 so_newconn,
1886 so_connected,
1887 so_disconnected,
1888 so_opctl,
1889 so_queue_msg,
1890 so_set_prop,
1891 so_txq_full,
1892 so_signal_oob,
1893 so_zcopy_notify,
1894 so_set_error,
1895 so_closed