4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2015, Joyent, Inc.
25 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
28 #include <sys/types.h>
29 #include <sys/t_lock.h>
30 #include <sys/param.h>
31 #include <sys/systm.h>
36 #include <sys/kmem_impl.h>
37 #include <sys/sysmacros.h>
39 #include <sys/vnode.h>
40 #include <sys/debug.h>
41 #include <sys/errno.h>
46 #include <sys/termios.h>
47 #include <sys/stream.h>
48 #include <sys/strsubr.h>
49 #include <sys/strsun.h>
50 #include <sys/suntpi.h>
52 #include <sys/esunddi.h>
53 #include <sys/flock.h>
54 #include <sys/modctl.h>
55 #include <sys/vtrace.h>
56 #include <sys/cmn_err.h>
57 #include <sys/pathname.h>
59 #include <sys/socket.h>
60 #include <sys/socketvar.h>
61 #include <sys/sockio.h>
62 #include <netinet/in.h>
64 #include <sys/strsun.h>
66 #include <sys/tiuser.h>
67 #define _SUN_TPI_VERSION 2
68 #include <sys/tihdr.h>
69 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */
73 #include <inet/common.h>
77 #include <inet/udp_impl.h>
82 #include "sockcommon.h"
84 #include "socktpi_impl.h"
87 * Possible failures when memory can't be allocated. The documented behavior:
90 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/
92 * (4.X does not document EINTR but returns it)
93 * bind: ENOSR - ENOBUFS/ENOSR
94 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR
95 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR
96 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR
97 * (4.X getpeername and getsockname do not fail in practice)
98 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR
100 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/
102 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/
104 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR
105 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR
106 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR
107 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR
109 * Resolution. When allocation fails:
112 * connect, accept: EINTR
113 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep
114 * socket, socketpair: ENOBUFS
115 * getpeername, getsockname: sleep
116 * getsockopt, setsockopt: sleep
121 * Variables that make sockfs do something other than the standard TPI
122 * for the AF_INET transports.
125 * TCP can handle a O_T_BIND_REQ with an increased backlog even though
126 * the transport is already bound. This is needed to avoid loosing the
127 * port number should listen() do a T_UNBIND_REQ followed by a
131 * UDP and ICMP can handle a T_CONN_REQ.
132 * This is needed to make the sequence of connect(), getsockname()
133 * return the local IP address used to send packets to the connected to
137 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ.
138 * Set this to non-zero to send TPI conformant messages to TCP in this
139 * respect. This is a performance optimization.
142 * TCP can handle a T_CONN_REQ without the acceptor being bound.
143 * This is a performance optimization that has been picked up in XTI.
145 * soaccept_tpi_multioptions:
146 * When inheriting SOL_SOCKET options from the listener to the accepting
147 * socket send them as a single message for AF_INET{,6}.
149 int solisten_tpi_tcp
= 0;
150 int soconnect_tpi_udp
= 0;
151 int soconnect_tpi_tcp
= 0;
152 int soaccept_tpi_tcp
= 0;
153 int soaccept_tpi_multioptions
= 1;
154 #else /* SOCK_TEST */
155 #define soconnect_tpi_tcp 0
156 #define soconnect_tpi_udp 0
157 #define solisten_tpi_tcp 0
158 #define soaccept_tpi_tcp 0
159 #define soaccept_tpi_multioptions 1
160 #endif /* SOCK_TEST */
163 extern int do_useracc
;
164 extern clock_t sock_test_timelimit
;
165 #endif /* SOCK_TEST */
167 extern uint32_t ucredsize
;
170 * Some X/Open added checks might have to be backed out to keep SunOS 4.X
171 * applications working. Turn on this flag to disable these checks.
173 int xnet_skip_checks
= 0;
174 int xnet_check_print
= 0;
175 int xnet_truncate_print
= 0;
177 static void sotpi_destroy(struct sonode
*);
178 static struct sonode
*sotpi_create(struct sockparams
*, int, int, int, int,
181 static boolean_t
sotpi_info_create(struct sonode
*, int);
182 static void sotpi_info_init(struct sonode
*);
183 static void sotpi_info_fini(struct sonode
*);
184 static void sotpi_info_destroy(struct sonode
*);
187 * Do direct function call to the transport layer below; this would
188 * also allow the transport to utilize read-side synchronous stream
189 * interface if necessary. This is a /etc/system tunable that must
190 * not be modified on a running system. By default this is enabled
191 * for performance reasons and may be disabled for debugging purposes.
193 boolean_t socktpi_direct
= B_TRUE
;
195 static struct kmem_cache
*socktpi_cache
, *socktpi_unix_cache
;
197 extern void sigintr(k_sigset_t
*, int);
198 extern void sigunintr(k_sigset_t
*);
200 static int sotpi_unbind(struct sonode
*, int);
202 /* TPI sockfs sonode operations */
203 int sotpi_init(struct sonode
*, struct sonode
*, struct cred
*,
205 static int sotpi_accept(struct sonode
*, int, struct cred
*,
207 static int sotpi_bind(struct sonode
*, struct sockaddr
*, socklen_t
,
209 static int sotpi_listen(struct sonode
*, int, struct cred
*);
210 static int sotpi_connect(struct sonode
*, struct sockaddr
*,
211 socklen_t
, int, int, struct cred
*);
212 extern int sotpi_recvmsg(struct sonode
*, struct msghdr
*,
213 struct uio
*, struct cred
*);
214 static int sotpi_sendmsg(struct sonode
*, struct msghdr
*,
215 struct uio
*, struct cred
*);
216 static int sotpi_sendmblk(struct sonode
*, struct msghdr
*, int,
217 struct cred
*, mblk_t
**);
218 static int sosend_dgramcmsg(struct sonode
*, struct sockaddr
*, socklen_t
,
219 struct uio
*, void *, t_uscalar_t
, int);
220 static int sodgram_direct(struct sonode
*, struct sockaddr
*,
221 socklen_t
, struct uio
*, int);
222 extern int sotpi_getpeername(struct sonode
*, struct sockaddr
*,
223 socklen_t
*, boolean_t
, struct cred
*);
224 static int sotpi_getsockname(struct sonode
*, struct sockaddr
*,
225 socklen_t
*, struct cred
*);
226 static int sotpi_shutdown(struct sonode
*, int, struct cred
*);
227 extern int sotpi_getsockopt(struct sonode
*, int, int, void *,
228 socklen_t
*, int, struct cred
*);
229 extern int sotpi_setsockopt(struct sonode
*, int, int, const void *,
230 socklen_t
, struct cred
*);
231 static int sotpi_ioctl(struct sonode
*, int, intptr_t, int, struct cred
*,
233 static int socktpi_plumbioctl(struct vnode
*, int, intptr_t, int,
234 struct cred
*, int32_t *);
235 static int sotpi_poll(struct sonode
*, short, int, short *,
237 static int sotpi_close(struct sonode
*, int, struct cred
*);
239 static int i_sotpi_info_constructor(sotpi_info_t
*);
240 static void i_sotpi_info_destructor(sotpi_info_t
*);
242 sonodeops_t sotpi_sonodeops
= {
243 sotpi_init
, /* sop_init */
244 sotpi_accept
, /* sop_accept */
245 sotpi_bind
, /* sop_bind */
246 sotpi_listen
, /* sop_listen */
247 sotpi_connect
, /* sop_connect */
248 sotpi_recvmsg
, /* sop_recvmsg */
249 sotpi_sendmsg
, /* sop_sendmsg */
250 sotpi_sendmblk
, /* sop_sendmblk */
251 sotpi_getpeername
, /* sop_getpeername */
252 sotpi_getsockname
, /* sop_getsockname */
253 sotpi_shutdown
, /* sop_shutdown */
254 sotpi_getsockopt
, /* sop_getsockopt */
255 sotpi_setsockopt
, /* sop_setsockopt */
256 sotpi_ioctl
, /* sop_ioctl */
257 sotpi_poll
, /* sop_poll */
258 sotpi_close
, /* sop_close */
262 * Return a TPI socket vnode.
264 * Note that sockets assume that the driver will clone (either itself
265 * or by using the clone driver) i.e. a socket() call will always
266 * result in a new vnode being created.
270 * Common create code for socket and accept. If tso is set the values
271 * from that node is used instead of issuing a T_INFO_REQ.
275 static struct sonode
*
276 sotpi_create(struct sockparams
*sp
, int family
, int type
, int protocol
,
277 int sflags
, int *errorp
, cred_t
*cr
)
281 int sfamily
= family
;
283 ASSERT(sp
->sp_sdev_info
.sd_vnode
!= NULL
);
286 * to be compatible with old tpi socket implementation ignore
287 * sleep flag (sflags) passed in
289 cp
= (family
== AF_UNIX
) ? socktpi_unix_cache
: socktpi_cache
;
290 so
= kmem_cache_alloc(cp
, KM_SLEEP
);
296 sonode_init(so
, sp
, family
, type
, protocol
, &sotpi_sonodeops
);
299 so
->so_is_stream
= false;
306 sotpi_destroy(struct sonode
*so
)
309 struct sockparams
*origsp
;
312 * If there is a new dealloc function (ie. smod_destroy_func),
313 * then it should check the correctness of the ops.
316 ASSERT(so
->so_ops
== &sotpi_sonodeops
);
318 origsp
= SOTOTPI(so
)->sti_orig_sp
;
322 if (so
->so_state
& SS_FALLBACK_COMP
) {
324 * A fallback happend, which means that a sotpi_info_t struct
325 * was allocated (as opposed to being allocated from the TPI
326 * sonode cache. Therefore we explicitly free the struct
329 sotpi_info_destroy(so
);
330 ASSERT(origsp
!= NULL
);
332 origsp
->sp_smod_info
->smod_sock_destroy_func(so
);
333 SOCKPARAMS_DEC_REF(origsp
);
336 cp
= (so
->so_family
== AF_UNIX
) ? socktpi_unix_cache
:
338 kmem_cache_free(cp
, so
);
344 sotpi_init(struct sonode
*so
, struct sonode
*tso
, struct cred
*cr
, int flags
)
352 sotpi_info_t
*sti
= SOTOTPI(so
);
354 dprint(1, ("sotpi_init()\n"));
357 * over write the sleep flag passed in but that is ok
358 * as tpi socket does not honor sleep flag.
360 flags
|= FREAD
|FWRITE
;
363 * Record in so_flag that it is a clone.
365 if (getmajor(sti
->sti_dev
) == clone_major
)
366 so
->so_flag
|= SOCLONE
;
368 if ((so
->so_type
== SOCK_STREAM
|| so
->so_type
== SOCK_DGRAM
) &&
369 (so
->so_family
== AF_INET
|| so
->so_family
== AF_INET6
) &&
370 (so
->so_protocol
== IPPROTO_TCP
|| so
->so_protocol
== IPPROTO_UDP
||
371 so
->so_protocol
== IPPROTO_IP
)) {
372 /* Tell tcp or udp that it's talking to sockets */
376 * Here we indicate to socktpi_open() our attempt to
377 * make direct calls between sockfs and transport.
378 * The final decision is left to socktpi_open().
382 ASSERT(so
->so_type
!= SOCK_DGRAM
|| tso
== NULL
);
383 if (so
->so_type
== SOCK_STREAM
&& tso
!= NULL
) {
384 if (SOTOTPI(tso
)->sti_direct
) {
386 * Inherit sti_direct from listener and pass
387 * SO_ACCEPTOR open flag to tcp, indicating
388 * that this is an accept fast-path instance.
390 flags
|= SO_ACCEPTOR
;
393 * sti_direct is not set on listener, meaning
394 * that the listener has been converted from
395 * a socket to a stream. Ensure that the
396 * acceptor inherits these settings.
399 flags
&= ~SO_SOCKSTR
;
405 * Tell local transport that it is talking to sockets.
407 if (so
->so_family
== AF_UNIX
) {
413 maj
= getmajor(newdev
);
414 ASSERT(STREAMSTAB(maj
));
416 error
= stropen(vp
, &newdev
, flags
, cr
);
420 if (so
->so_flag
& SOCLONE
)
421 ASSERT(newdev
!= vp
->v_rdev
);
422 mutex_enter(&so
->so_lock
);
423 sti
->sti_dev
= newdev
;
425 mutex_exit(&so
->so_lock
);
427 if (stp
->sd_flag
& STRISTTY
) {
429 * this is a post SVR4 tty driver - a socket can not
430 * be a controlling terminal. Fail the open.
432 (void) sotpi_close(so
, flags
, cr
);
433 return (ENOTTY
); /* XXX */
436 ASSERT(stp
->sd_wrq
!= NULL
);
437 sti
->sti_provinfo
= tpi_findprov(stp
->sd_wrq
);
440 * If caller is interested in doing direct function call
441 * interface to/from transport module, probe the module
442 * directly beneath the streamhead to see if it qualifies.
444 * We turn off the direct interface when qualifications fail.
445 * In the acceptor case, we simply turn off the sti_direct
446 * flag on the socket. We do the fallback after the accept
447 * has completed, before the new socket is returned to the
450 if (sti
->sti_direct
) {
451 queue_t
*tq
= stp
->sd_wrq
->q_next
;
454 * sti_direct is currently supported and tested
455 * only for tcp/udp; this is the main reason to
456 * have the following assertions.
458 ASSERT(so
->so_family
== AF_INET
||
459 so
->so_family
== AF_INET6
);
460 ASSERT(so
->so_protocol
== IPPROTO_UDP
||
461 so
->so_protocol
== IPPROTO_TCP
||
462 so
->so_protocol
== IPPROTO_IP
);
463 ASSERT(so
->so_type
== SOCK_DGRAM
||
464 so
->so_type
== SOCK_STREAM
);
467 * Abort direct call interface if the module directly
468 * underneath the stream head is not defined with the
469 * _D_DIRECT flag. This could happen in the tcp or
470 * udp case, when some other module is autopushed
471 * above it, or for some reasons the expected module
472 * isn't purely D_MP (which is the main requirement).
474 if (!socktpi_direct
|| !(tq
->q_flag
& _QDIRECT
) ||
475 !(_OTHERQ(tq
)->q_flag
& _QDIRECT
)) {
478 /* Continue on without direct calls */
482 * Cannot issue ioctl on fallback socket since
483 * there is no conn associated with the queue.
484 * The fallback downcall will notify the proto
487 if (!(flags
& SO_ACCEPTOR
) &&
488 !(flags
& SO_FALLBACK
)) {
489 if ((error
= strioctl(vp
,
490 _SIOCSOCKFALLBACK
, 0, 0, K_TO_K
,
492 (void) sotpi_close(so
, flags
,
500 if (flags
& SO_FALLBACK
) {
502 * The stream created does not have a conn.
503 * do stream set up after conn has been assigned
507 if (error
= so_strinit(so
, tso
)) {
508 (void) sotpi_close(so
, flags
, cr
);
512 /* Enable sendfile() on AF_UNIX streams */
513 if (so
->so_family
== AF_UNIX
&& so
->so_type
== SOCK_STREAM
) {
514 mutex_enter(&so
->so_lock
);
515 so
->so_mode
|= SM_SENDFILESUPP
;
516 mutex_exit(&so
->so_lock
);
520 if (so
->so_protocol
!= so
->so_sockparams
->sp_protocol
) {
521 int protocol
= so
->so_protocol
;
523 * Issue SO_PROTOTYPE setsockopt.
525 error
= sotpi_setsockopt(so
, SOL_SOCKET
, SO_PROTOTYPE
,
526 &protocol
, (t_uscalar_t
)sizeof (protocol
), cr
);
528 (void) sotpi_close(so
, flags
, cr
);
530 * Setsockopt often fails with ENOPROTOOPT but
531 * socket() should fail with
532 * EPROTONOSUPPORT/EPROTOTYPE.
534 return (EPROTONOSUPPORT
);
540 * While the same socket can not be reopened (unlike specfs)
541 * the stream head sets STREOPENFAIL when the autopush fails.
544 (stp
->sd_flag
& STREOPENFAIL
)) {
546 * Open failed part way through.
548 mutex_enter(&stp
->sd_lock
);
549 stp
->sd_flag
&= ~STREOPENFAIL
;
550 mutex_exit(&stp
->sd_lock
);
551 (void) sotpi_close(so
, flags
, cr
);
557 TRACE_4(TR_FAC_SOCKFS
, TR_SOCKFS_OPEN
,
558 "sockfs open:maj %d vp %p so %p error %d",
564 * Bind the socket to an unspecified address in sockfs only.
565 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't
566 * required in all cases.
569 so_automatic_bind(struct sonode
*so
)
571 sotpi_info_t
*sti
= SOTOTPI(so
);
572 ASSERT(so
->so_family
== AF_INET
|| so
->so_family
== AF_INET6
);
574 ASSERT(MUTEX_HELD(&so
->so_lock
));
575 ASSERT(!(so
->so_state
& SS_ISBOUND
));
576 ASSERT(sti
->sti_unbind_mp
);
578 ASSERT(sti
->sti_laddr_len
<= sti
->sti_laddr_maxlen
);
579 bzero(sti
->sti_laddr_sa
, sti
->sti_laddr_len
);
580 sti
->sti_laddr_sa
->sa_family
= so
->so_family
;
581 so
->so_state
|= SS_ISBOUND
;
588 * A null "name" can be used to unbind the socket if:
589 * - it is a SOCK_DGRAM, or
590 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected
591 * and no listen() has been done.
595 sotpi_bindlisten(struct sonode
*so
, struct sockaddr
*name
,
596 socklen_t namelen
, int backlog
, int flags
, struct cred
*cr
)
598 struct T_bind_req bind_req
;
599 struct T_bind_ack
*bind_ack
;
604 int unbind_on_err
= 1;
605 boolean_t clear_acceptconn_on_err
= B_FALSE
;
606 boolean_t restore_backlog_on_err
= B_FALSE
;
608 t_scalar_t PRIM_type
= O_T_BIND_REQ
;
609 boolean_t tcp_udp_xport
;
610 sotpi_info_t
*sti
= SOTOTPI(so
);
612 dprintso(so
, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n",
613 (void *)so
, (void *)name
, namelen
, backlog
, flags
,
614 pr_state(so
->so_state
, so
->so_mode
)));
616 tcp_udp_xport
= so
->so_type
== SOCK_STREAM
|| so
->so_type
== SOCK_DGRAM
;
618 if (!(flags
& _SOBIND_LOCK_HELD
)) {
619 mutex_enter(&so
->so_lock
);
620 so_lock_single(so
); /* Set SOLOCKED */
622 ASSERT(MUTEX_HELD(&so
->so_lock
));
623 ASSERT(so
->so_flag
& SOLOCKED
);
627 * Make sure that there is a preallocated unbind_req message
628 * before binding. This message allocated when the socket is
629 * created but it might be have been consumed.
631 if (sti
->sti_unbind_mp
== NULL
) {
632 dprintso(so
, 1, ("sobind: allocating unbind_req\n"));
633 /* NOTE: holding so_lock while sleeping */
635 soallocproto(sizeof (struct T_unbind_req
), _ALLOC_SLEEP
,
639 if (flags
& _SOBIND_REBIND
) {
641 * Called from solisten after doing an sotpi_unbind() or
642 * potentially without the unbind (latter for AF_INET{,6}).
644 ASSERT(name
== NULL
&& namelen
== 0);
646 if (so
->so_family
== AF_UNIX
) {
647 ASSERT(sti
->sti_ux_bound_vp
);
648 addr
= &sti
->sti_ux_laddr
;
649 addrlen
= (t_uscalar_t
)sizeof (sti
->sti_ux_laddr
);
650 dprintso(so
, 1, ("sobind rebind UNIX: addrlen %d, "
651 "addr 0x%p, vp %p\n",
653 (void *)((struct so_ux_addr
*)addr
)->soua_vp
,
654 (void *)sti
->sti_ux_bound_vp
));
656 addr
= sti
->sti_laddr_sa
;
657 addrlen
= (t_uscalar_t
)sti
->sti_laddr_len
;
659 } else if (flags
& _SOBIND_UNSPEC
) {
660 ASSERT(name
== NULL
&& namelen
== 0);
663 * The caller checked SS_ISBOUND but not necessarily
666 if (so
->so_state
& SS_ISBOUND
) {
671 /* Set an initial local address */
672 switch (so
->so_family
) {
675 * Use an address with same size as struct sockaddr
679 (socklen_t
)sizeof (struct sockaddr
);
680 ASSERT(sti
->sti_laddr_len
<= sti
->sti_laddr_maxlen
);
681 bzero(sti
->sti_laddr_sa
, sti
->sti_laddr_len
);
682 sti
->sti_laddr_sa
->sa_family
= so
->so_family
;
685 * Pass down an address with the implicit bind
686 * magic number and the rest all zeros.
687 * The transport will return a unique address.
689 sti
->sti_ux_laddr
.soua_vp
= NULL
;
690 sti
->sti_ux_laddr
.soua_magic
= SOU_MAGIC_IMPLICIT
;
691 addr
= &sti
->sti_ux_laddr
;
692 addrlen
= (t_uscalar_t
)sizeof (sti
->sti_ux_laddr
);
698 * An unspecified bind in TPI has a NULL address.
699 * Set the address in sockfs to have the sa_family.
701 sti
->sti_laddr_len
= (so
->so_family
== AF_INET
) ?
702 (socklen_t
)sizeof (sin_t
) :
703 (socklen_t
)sizeof (sin6_t
);
704 ASSERT(sti
->sti_laddr_len
<= sti
->sti_laddr_maxlen
);
705 bzero(sti
->sti_laddr_sa
, sti
->sti_laddr_len
);
706 sti
->sti_laddr_sa
->sa_family
= so
->so_family
;
713 * An unspecified bind in TPI has a NULL address.
714 * Set the address in sockfs to be zero length.
716 * Can not assume there is a sa_family for all
717 * protocol families. For example, AF_X25 does not
718 * have a family field.
720 bzero(sti
->sti_laddr_sa
, sti
->sti_laddr_len
);
721 sti
->sti_laddr_len
= 0; /* XXX correct? */
728 if (so
->so_state
& SS_ISBOUND
) {
731 eprintsoline(so
, error
);
735 /* X/Open requires this check */
736 if ((so
->so_state
& SS_CANTSENDMORE
) && !xnet_skip_checks
) {
737 if (xnet_check_print
) {
738 printf("sockfs: X/Open bind state check "
745 switch (so
->so_family
) {
748 * All AF_UNIX addresses are nul terminated
749 * when copied (copyin_name) in so the minimum
753 (ssize_t
)namelen
<= sizeof (short) + 1) {
755 eprintsoline(so
, error
);
759 * Verify so_family matches the bound family.
760 * BSD does not check this for AF_UNIX resulting
763 if (name
->sa_family
!= so
->so_family
) {
764 error
= EAFNOSUPPORT
;
771 eprintsoline(so
, error
);
774 if ((size_t)namelen
!= sizeof (sin_t
)) {
775 error
= name
->sa_family
!= so
->so_family
?
776 EAFNOSUPPORT
: EINVAL
;
777 eprintsoline(so
, error
);
780 if ((name
->sa_family
!= so
->so_family
)) {
781 error
= EAFNOSUPPORT
;
782 eprintsoline(so
, error
);
786 * Force a zero sa_family to match so_family.
788 * Some programs like inetd(1M) don't set the
789 * family field. Other programs leave
790 * sin_family set to garbage - SunOS 4.X does
791 * not check the family field on a bind.
792 * We use the family field that
793 * was passed in to the socket() call.
795 name
->sa_family
= so
->so_family
;
800 sin6_t
*sin6
= (sin6_t
*)name
;
805 eprintsoline(so
, error
);
808 if ((size_t)namelen
!= sizeof (sin6_t
)) {
809 error
= name
->sa_family
!= so
->so_family
?
810 EAFNOSUPPORT
: EINVAL
;
811 eprintsoline(so
, error
);
814 if (name
->sa_family
!= so
->so_family
) {
816 * With IPv6 we require the family to match
819 error
= EAFNOSUPPORT
;
820 eprintsoline(so
, error
);
825 * Verify that apps don't forget to clear
828 if (sin6
->sin6_scope_id
!= 0 &&
829 !IN6_IS_ADDR_LINKSCOPE(&sin6
->sin6_addr
)) {
830 zcmn_err(getzoneid(), CE_WARN
,
831 "bind with uninitialized sin6_scope_id "
832 "(%d) on socket. Pid = %d\n",
833 (int)sin6
->sin6_scope_id
,
834 (int)curproc
->p_pid
);
836 if (sin6
->__sin6_src_id
!= 0) {
837 zcmn_err(getzoneid(), CE_WARN
,
838 "bind with uninitialized __sin6_src_id "
839 "(%d) on socket. Pid = %d\n",
840 (int)sin6
->__sin6_src_id
,
841 (int)curproc
->p_pid
);
848 * Don't do any length or sa_family check to allow
849 * non-sockaddr style addresses.
853 eprintsoline(so
, error
);
859 if (namelen
> (t_uscalar_t
)sti
->sti_laddr_maxlen
) {
860 error
= ENAMETOOLONG
;
861 eprintsoline(so
, error
);
865 * Save local address.
867 sti
->sti_laddr_len
= (socklen_t
)namelen
;
868 ASSERT(sti
->sti_laddr_len
<= sti
->sti_laddr_maxlen
);
869 bcopy(name
, sti
->sti_laddr_sa
, namelen
);
871 addr
= sti
->sti_laddr_sa
;
872 addrlen
= (t_uscalar_t
)sti
->sti_laddr_len
;
873 switch (so
->so_family
) {
878 struct sockaddr_un
*soun
=
879 (struct sockaddr_un
*)sti
->sti_laddr_sa
;
880 struct vnode
*vp
, *rvp
;
883 ASSERT(sti
->sti_ux_bound_vp
== NULL
);
885 * Create vnode for the specified path name.
886 * Keep vnode held with a reference in sti_ux_bound_vp.
887 * Use the vnode pointer as the address used in the
888 * bind with the transport.
890 * Use the same mode as in BSD. In particular this does
891 * not observe the umask.
893 /* MAXPATHLEN + soun_family + nul termination */
894 if (sti
->sti_laddr_len
>
895 (socklen_t
)(MAXPATHLEN
+ sizeof (short) + 1)) {
896 error
= ENAMETOOLONG
;
897 eprintsoline(so
, error
);
900 vattr
.va_type
= VSOCK
;
901 vattr
.va_mode
= 0777 & ~PTOU(curproc
)->u_cmask
;
902 vattr
.va_mask
= AT_TYPE
|AT_MODE
;
903 /* NOTE: holding so_lock */
904 error
= vn_create(soun
->sun_path
, UIO_SYSSPACE
, &vattr
,
905 EXCL
, 0, &vp
, CRMKNOD
, 0, 0);
909 eprintsoline(so
, error
);
913 * Establish pointer from the underlying filesystem
914 * vnode to the socket node.
915 * sti_ux_bound_vp and v_stream->sd_vnode form the
916 * cross-linkage between the underlying filesystem
917 * node and the socket node.
920 if ((fop_realvp(vp
, &rvp
, NULL
) == 0) && (vp
!= rvp
)) {
926 ASSERT(SOTOV(so
)->v_stream
);
927 mutex_enter(&vp
->v_lock
);
928 vp
->v_stream
= SOTOV(so
)->v_stream
;
929 sti
->sti_ux_bound_vp
= vp
;
930 mutex_exit(&vp
->v_lock
);
933 * Use the vnode pointer value as a unique address
934 * (together with the magic number to avoid conflicts
935 * with implicit binds) in the transport provider.
937 sti
->sti_ux_laddr
.soua_vp
=
938 (void *)sti
->sti_ux_bound_vp
;
939 sti
->sti_ux_laddr
.soua_magic
= SOU_MAGIC_EXPLICIT
;
940 addr
= &sti
->sti_ux_laddr
;
941 addrlen
= (t_uscalar_t
)sizeof (sti
->sti_ux_laddr
);
942 dprintso(so
, 1, ("sobind UNIX: addrlen %d, addr %p\n",
944 (void *)((struct so_ux_addr
*)addr
)->soua_vp
));
947 } /* end switch (so->so_family) */
951 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since
952 * the transport can start passing up T_CONN_IND messages
953 * as soon as it receives the bind req and strsock_proto()
954 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs.
956 if (flags
& _SOBIND_LISTEN
) {
957 if ((so
->so_state
& SS_ACCEPTCONN
) == 0)
958 clear_acceptconn_on_err
= B_TRUE
;
959 save_so_backlog
= so
->so_backlog
;
960 restore_backlog_on_err
= B_TRUE
;
961 so
->so_state
|= SS_ACCEPTCONN
;
962 so
->so_backlog
= backlog
;
966 * We send a T_BIND_REQ for TCP/UDP since we know it supports it,
967 * for other transports we will send in a O_T_BIND_REQ.
970 (so
->so_family
== AF_INET
|| so
->so_family
== AF_INET6
))
971 PRIM_type
= T_BIND_REQ
;
973 bind_req
.PRIM_type
= PRIM_type
;
974 bind_req
.ADDR_length
= addrlen
;
975 bind_req
.ADDR_offset
= (t_scalar_t
)sizeof (bind_req
);
976 bind_req
.CONIND_number
= backlog
;
977 /* NOTE: holding so_lock while sleeping */
978 mp
= soallocproto2(&bind_req
, sizeof (bind_req
),
979 addr
, addrlen
, 0, _ALLOC_SLEEP
, cr
);
980 sti
->sti_laddr_valid
= 0;
982 /* Done using sti_laddr_sa - can drop the lock */
983 mutex_exit(&so
->so_lock
);
985 error
= kstrputmsg(SOTOV(so
), mp
, NULL
, 0, 0,
986 MSG_BAND
|MSG_HOLDSIG
|MSG_IGNERROR
, 0);
988 eprintsoline(so
, error
);
989 mutex_enter(&so
->so_lock
);
993 mutex_enter(&so
->so_lock
);
994 error
= sowaitprim(so
, PRIM_type
, T_BIND_ACK
,
995 (t_uscalar_t
)sizeof (*bind_ack
), &mp
, 0);
997 eprintsoline(so
, error
);
1002 * Even if some TPI message (e.g. T_DISCON_IND) was received in
1003 * strsock_proto while the lock was dropped above, the bind
1004 * is allowed to complete.
1007 /* Mark as bound. This will be undone if we detect errors below. */
1008 if (flags
& _SOBIND_NOXLATE
) {
1009 ASSERT(so
->so_family
== AF_UNIX
);
1010 sti
->sti_faddr_noxlate
= 1;
1012 ASSERT(!(so
->so_state
& SS_ISBOUND
) || (flags
& _SOBIND_REBIND
));
1013 so
->so_state
|= SS_ISBOUND
;
1014 ASSERT(sti
->sti_unbind_mp
);
1016 /* note that we've already set SS_ACCEPTCONN above */
1019 * Recompute addrlen - an unspecied bind sent down an
1020 * address of length zero but we expect the appropriate length
1023 addrlen
= (t_uscalar_t
)(so
->so_family
== AF_UNIX
?
1024 sizeof (sti
->sti_ux_laddr
) : sti
->sti_laddr_len
);
1026 bind_ack
= (struct T_bind_ack
*)mp
->b_rptr
;
1028 * The alignment restriction is really too strict but
1029 * we want enough alignment to inspect the fields of
1032 addr
= sogetoff(mp
, bind_ack
->ADDR_offset
,
1033 bind_ack
->ADDR_length
,
1038 eprintsoline(so
, error
);
1041 if (!(flags
& _SOBIND_UNSPEC
)) {
1043 * Verify that the transport didn't return something we
1044 * did not want e.g. an address other than what we asked for.
1046 * NOTE: These checks would go away if/when we switch to
1047 * using the new TPI (in which the transport would fail
1048 * the request instead of assigning a different address).
1050 * NOTE2: For protocols that we don't know (i.e. any
1051 * other than AF_INET6, AF_INET and AF_UNIX), we
1052 * cannot know if the transport should be expected to
1053 * return the same address as that requested.
1055 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send
1056 * down a T_BIND_REQ. We use O_T_BIND_REQ for others.
1058 * For example, in the case of netatalk it may be
1059 * inappropriate for the transport to return the
1060 * requested address (as it may have allocated a local
1061 * port number in behaviour similar to that of an
1062 * AF_INET bind request with a port number of zero).
1064 * Given the definition of O_T_BIND_REQ, where the
1065 * transport may bind to an address other than the
1066 * requested address, it's not possible to determine
1067 * whether a returned address that differs from the
1068 * requested address is a reason to fail (because the
1069 * requested address was not available) or succeed
1070 * (because the transport allocated an appropriate
1071 * address and/or port).
1073 * sockfs currently requires that the transport return
1074 * the requested address in the T_BIND_ACK, unless
1075 * there is code here to allow for any discrepancy.
1076 * Such code exists for AF_INET and AF_INET6.
1078 * Netatalk chooses to return the requested address
1079 * rather than the (correct) allocated address. This
1080 * means that netatalk violates the TPI specification
1081 * (and would not function correctly if used from a
1082 * TLI application), but it does mean that it works
1085 * As noted above, using the newer XTI bind primitive
1086 * (T_BIND_REQ) in preference to O_T_BIND_REQ would
1087 * allow sockfs to be more sure about whether or not
1088 * the bind request had succeeded (as transports are
1089 * not permitted to bind to a different address than
1090 * that requested - they must return failure).
1091 * Unfortunately, support for T_BIND_REQ may not be
1092 * present in all transport implementations (netatalk,
1093 * for example, doesn't have it), making the
1094 * transition difficult.
1096 if (bind_ack
->ADDR_length
!= addrlen
) {
1097 /* Assumes that the requested address was in use */
1100 eprintsoline(so
, error
);
1104 switch (so
->so_family
) {
1107 sin_t
*rname
, *aname
;
1109 rname
= (sin_t
*)addr
;
1110 aname
= (sin_t
*)sti
->sti_laddr_sa
;
1113 * Take advantage of the alignment
1114 * of sin_port and sin6_port which fall
1115 * in the same place in their data structures.
1116 * Just use sin_port for either address family.
1118 * This may become a problem if (heaven forbid)
1119 * there's a separate ipv6port_reserved... :-P
1121 * Binding to port 0 has the semantics of letting
1122 * the transport bind to any port.
1124 * If the transport is TCP or UDP since we had sent
1125 * a T_BIND_REQ we would not get a port other than
1126 * what we asked for.
1128 if (tcp_udp_xport
) {
1130 * Pick up the new port number if we bound to
1133 if (aname
->sin_port
== 0)
1134 aname
->sin_port
= rname
->sin_port
;
1135 sti
->sti_laddr_valid
= 1;
1138 if (aname
->sin_port
!= 0 &&
1139 aname
->sin_port
!= rname
->sin_port
) {
1142 eprintsoline(so
, error
);
1146 * Pick up the new port number if we bound to port 0.
1148 aname
->sin_port
= rname
->sin_port
;
1151 * Unfortunately, addresses aren't _quite_ the same.
1153 if (so
->so_family
== AF_INET
) {
1154 if (aname
->sin_addr
.s_addr
!=
1155 rname
->sin_addr
.s_addr
) {
1157 error
= EADDRNOTAVAIL
;
1158 eprintsoline(so
, error
);
1162 sin6_t
*rname6
= (sin6_t
*)rname
;
1163 sin6_t
*aname6
= (sin6_t
*)aname
;
1165 if (!IN6_ARE_ADDR_EQUAL(&aname6
->sin6_addr
,
1166 &rname6
->sin6_addr
)) {
1168 error
= EADDRNOTAVAIL
;
1169 eprintsoline(so
, error
);
1176 if (bcmp(addr
, &sti
->sti_ux_laddr
, addrlen
) != 0) {
1179 eprintsoline(so
, error
);
1181 ("addrlen %d, addr 0x%x, vp %p\n",
1182 addrlen
, *((int *)addr
),
1183 (void *)sti
->sti_ux_bound_vp
));
1186 sti
->sti_laddr_valid
= 1;
1190 * NOTE: This assumes that addresses can be
1191 * byte-compared for equivalence.
1193 if (bcmp(addr
, sti
->sti_laddr_sa
, addrlen
) != 0) {
1196 eprintsoline(so
, error
);
1200 * Don't mark sti_laddr_valid, as we cannot be
1201 * sure that the returned address is the real
1202 * bound address when talking to an unknown
1209 * Save for returned address for getsockname.
1210 * Needed for unspecific bind unless transport supports
1211 * the TI_GETMYNAME ioctl.
1212 * Do this for AF_INET{,6} even though they do, as
1213 * caching info here is much better performance than
1214 * a TPI/STREAMS trip to the transport for getsockname.
1215 * Any which can't for some reason _must_ _not_ set
1216 * sti_laddr_valid here for the caching version of
1217 * getsockname to not break;
1219 switch (so
->so_family
) {
1222 * Record the address bound with the transport
1223 * for use by socketpair.
1225 bcopy(addr
, &sti
->sti_ux_laddr
, addrlen
);
1226 sti
->sti_laddr_valid
= 1;
1230 ASSERT(sti
->sti_laddr_len
<= sti
->sti_laddr_maxlen
);
1231 bcopy(addr
, sti
->sti_laddr_sa
, sti
->sti_laddr_len
);
1232 sti
->sti_laddr_valid
= 1;
1236 * Don't mark sti_laddr_valid, as we cannot be
1237 * sure that the returned address is the real
1238 * bound address when talking to an unknown
1249 /* reset state & backlog to values held on entry */
1250 if (clear_acceptconn_on_err
== B_TRUE
)
1251 so
->so_state
&= ~SS_ACCEPTCONN
;
1252 if (restore_backlog_on_err
== B_TRUE
)
1253 so
->so_backlog
= save_so_backlog
;
1255 if (unbind_on_err
&& so
->so_state
& SS_ISBOUND
) {
1258 err
= sotpi_unbind(so
, 0);
1259 /* LINTED - statement has no consequent: if */
1261 eprintsoline(so
, error
);
1263 ASSERT(!(so
->so_state
& SS_ISBOUND
));
1267 if (!(flags
& _SOBIND_LOCK_HELD
)) {
1268 so_unlock_single(so
, SOLOCKED
);
1269 mutex_exit(&so
->so_lock
);
1271 ASSERT(MUTEX_HELD(&so
->so_lock
));
1272 ASSERT(so
->so_flag
& SOLOCKED
);
1277 /* bind the socket */
1279 sotpi_bind(struct sonode
*so
, struct sockaddr
*name
, socklen_t namelen
,
1280 int flags
, struct cred
*cr
)
1282 if ((flags
& _SOBIND_SOCKETPAIR
) == 0)
1283 return (sotpi_bindlisten(so
, name
, namelen
, 0, flags
, cr
));
1285 flags
&= ~_SOBIND_SOCKETPAIR
;
1286 return (sotpi_bindlisten(so
, name
, namelen
, 1, flags
, cr
));
1290 * Unbind a socket - used when bind() fails, when bind() specifies a NULL
1291 * address, or when listen needs to unbind and bind.
1292 * If the _SOUNBIND_REBIND flag is specified the addresses are retained
1293 * so that a sobind can pick them up.
1296 sotpi_unbind(struct sonode
*so
, int flags
)
1298 struct T_unbind_req unbind_req
;
1301 sotpi_info_t
*sti
= SOTOTPI(so
);
1303 dprintso(so
, 1, ("sotpi_unbind(%p, 0x%x) %s\n",
1304 (void *)so
, flags
, pr_state(so
->so_state
, so
->so_mode
)));
1306 ASSERT(MUTEX_HELD(&so
->so_lock
));
1307 ASSERT(so
->so_flag
& SOLOCKED
);
1309 if (!(so
->so_state
& SS_ISBOUND
)) {
1311 eprintsoline(so
, error
);
1315 mutex_exit(&so
->so_lock
);
1318 * Flush the read and write side (except stream head read queue)
1319 * and send down T_UNBIND_REQ.
1321 (void) putnextctl1(strvp2wq(SOTOV(so
)), M_FLUSH
, FLUSHRW
);
1323 unbind_req
.PRIM_type
= T_UNBIND_REQ
;
1324 mp
= soallocproto1(&unbind_req
, sizeof (unbind_req
),
1325 0, _ALLOC_SLEEP
, CRED());
1326 error
= kstrputmsg(SOTOV(so
), mp
, NULL
, 0, 0,
1327 MSG_BAND
|MSG_HOLDSIG
|MSG_IGNERROR
, 0);
1328 mutex_enter(&so
->so_lock
);
1330 eprintsoline(so
, error
);
1334 error
= sowaitokack(so
, T_UNBIND_REQ
);
1336 eprintsoline(so
, error
);
1341 * Even if some TPI message (e.g. T_DISCON_IND) was received in
1342 * strsock_proto while the lock was dropped above, the unbind
1343 * is allowed to complete.
1345 if (!(flags
& _SOUNBIND_REBIND
)) {
1347 * Clear out bound address.
1351 if ((vp
= sti
->sti_ux_bound_vp
) != NULL
) {
1352 sti
->sti_ux_bound_vp
= NULL
;
1355 /* Clear out address */
1356 sti
->sti_laddr_len
= 0;
1358 so
->so_state
&= ~(SS_ISBOUND
|SS_ACCEPTCONN
);
1359 sti
->sti_laddr_valid
= 0;
1363 /* If the caller held the lock don't release it here */
1364 ASSERT(MUTEX_HELD(&so
->so_lock
));
1365 ASSERT(so
->so_flag
& SOLOCKED
);
1371 * listen on the socket.
1372 * For TPI conforming transports this has to first unbind with the transport
1373 * and then bind again using the new backlog.
1377 sotpi_listen(struct sonode
*so
, int backlog
, struct cred
*cr
)
1380 sotpi_info_t
*sti
= SOTOTPI(so
);
1382 dprintso(so
, 1, ("sotpi_listen(%p, %d) %s\n",
1383 (void *)so
, backlog
, pr_state(so
->so_state
, so
->so_mode
)));
1385 if (sti
->sti_serv_type
== T_CLTS
)
1386 return (EOPNOTSUPP
);
1389 * If the socket is ready to accept connections already, then
1390 * return without doing anything. This avoids a problem where
1391 * a second listen() call fails if a connection is pending and
1392 * leaves the socket unbound. Only when we are not unbinding
1393 * with the transport can we safely increase the backlog.
1395 if (so
->so_state
& SS_ACCEPTCONN
&&
1396 !((so
->so_family
== AF_INET
|| so
->so_family
== AF_INET6
) &&
1401 if (so
->so_state
& SS_ISCONNECTED
)
1404 mutex_enter(&so
->so_lock
);
1405 so_lock_single(so
); /* Set SOLOCKED */
1408 * If the listen doesn't change the backlog we do nothing.
1409 * This avoids an EPROTO error from the transport.
1411 if ((so
->so_state
& SS_ACCEPTCONN
) &&
1412 so
->so_backlog
== backlog
)
1415 if (!(so
->so_state
& SS_ISBOUND
)) {
1417 * Must have been explicitly bound in the UNIX domain.
1419 if (so
->so_family
== AF_UNIX
) {
1423 error
= sotpi_bindlisten(so
, NULL
, 0, backlog
,
1424 _SOBIND_UNSPEC
|_SOBIND_LOCK_HELD
|_SOBIND_LISTEN
, cr
);
1425 } else if (backlog
> 0) {
1427 * AF_INET{,6} hack to avoid losing the port.
1428 * Assumes that all AF_INET{,6} transports can handle a
1429 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI
1430 * has already bound thus it is possible to avoid the unbind.
1432 if (!((so
->so_family
== AF_INET
|| so
->so_family
== AF_INET6
) &&
1434 !solisten_tpi_tcp
)) {
1435 error
= sotpi_unbind(so
, _SOUNBIND_REBIND
);
1439 error
= sotpi_bindlisten(so
, NULL
, 0, backlog
,
1440 _SOBIND_REBIND
|_SOBIND_LOCK_HELD
|_SOBIND_LISTEN
, cr
);
1442 so
->so_state
|= SS_ACCEPTCONN
;
1443 so
->so_backlog
= backlog
;
1447 ASSERT(so
->so_state
& SS_ACCEPTCONN
);
1449 so_unlock_single(so
, SOLOCKED
);
1450 mutex_exit(&so
->so_lock
);
1455 * Disconnect either a specified seqno or all (-1).
1456 * The former is used on listening sockets only.
1458 * When seqno == -1 sodisconnect could call sotpi_unbind. However,
1459 * the current use of sodisconnect(seqno == -1) is only for shutdown
1460 * so there is no point (and potentially incorrect) to unbind.
1463 sodisconnect(struct sonode
*so
, t_scalar_t seqno
, int flags
)
1465 struct T_discon_req discon_req
;
1469 dprintso(so
, 1, ("sodisconnect(%p, %d, 0x%x) %s\n",
1470 (void *)so
, seqno
, flags
, pr_state(so
->so_state
, so
->so_mode
)));
1472 if (!(flags
& _SODISCONNECT_LOCK_HELD
)) {
1473 mutex_enter(&so
->so_lock
);
1474 so_lock_single(so
); /* Set SOLOCKED */
1476 ASSERT(MUTEX_HELD(&so
->so_lock
));
1477 ASSERT(so
->so_flag
& SOLOCKED
);
1480 if (!(so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
|SS_ACCEPTCONN
))) {
1482 eprintsoline(so
, error
);
1486 mutex_exit(&so
->so_lock
);
1488 * Flush the write side (unless this is a listener)
1489 * and then send down a T_DISCON_REQ.
1490 * (Don't flush on listener since it could flush {O_}T_CONN_RES
1491 * and other messages.)
1493 if (!(so
->so_state
& SS_ACCEPTCONN
))
1494 (void) putnextctl1(strvp2wq(SOTOV(so
)), M_FLUSH
, FLUSHW
);
1496 discon_req
.PRIM_type
= T_DISCON_REQ
;
1497 discon_req
.SEQ_number
= seqno
;
1498 mp
= soallocproto1(&discon_req
, sizeof (discon_req
),
1499 0, _ALLOC_SLEEP
, CRED());
1500 error
= kstrputmsg(SOTOV(so
), mp
, NULL
, 0, 0,
1501 MSG_BAND
|MSG_HOLDSIG
|MSG_IGNERROR
, 0);
1502 mutex_enter(&so
->so_lock
);
1504 eprintsoline(so
, error
);
1508 error
= sowaitokack(so
, T_DISCON_REQ
);
1510 eprintsoline(so
, error
);
1514 * Even if some TPI message (e.g. T_DISCON_IND) was received in
1515 * strsock_proto while the lock was dropped above, the disconnect
1516 * is allowed to complete. However, it is not possible to
1517 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set.
1519 so
->so_state
&= ~(SS_ISCONNECTED
|SS_ISCONNECTING
);
1520 SOTOTPI(so
)->sti_laddr_valid
= 0;
1521 SOTOTPI(so
)->sti_faddr_valid
= 0;
1523 if (!(flags
& _SODISCONNECT_LOCK_HELD
)) {
1524 so_unlock_single(so
, SOLOCKED
);
1525 mutex_exit(&so
->so_lock
);
1527 /* If the caller held the lock don't release it here */
1528 ASSERT(MUTEX_HELD(&so
->so_lock
));
1529 ASSERT(so
->so_flag
& SOLOCKED
);
1536 sotpi_accept(struct sonode
*so
, int fflag
, struct cred
*cr
,
1537 struct sonode
**nsop
)
1539 struct T_conn_ind
*conn_ind
;
1540 struct T_conn_res
*conn_res
;
1542 mblk_t
*mp
, *ack_mp
;
1549 t_scalar_t PRIM_type
;
1550 t_scalar_t SEQ_number
;
1552 sotpi_info_t
*sti
= SOTOTPI(so
);
1555 dprintso(so
, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n",
1556 (void *)so
, fflag
, (void *)nsop
,
1557 pr_state(so
->so_state
, so
->so_mode
)));
1560 * Defer single-threading the accepting socket until
1561 * the T_CONN_IND has been received and parsed and the
1562 * new sonode has been opened.
1565 /* Check that we are not already connected */
1566 if ((so
->so_state
& SS_ACCEPTCONN
) == 0)
1569 if ((error
= sowaitconnind(so
, fflag
, &mp
)) != 0)
1573 conn_ind
= (struct T_conn_ind
*)mp
->b_rptr
;
1576 * Save SEQ_number for error paths.
1578 SEQ_number
= conn_ind
->SEQ_number
;
1580 srclen
= conn_ind
->SRC_length
;
1581 src
= sogetoff(mp
, conn_ind
->SRC_offset
, srclen
, 1);
1585 eprintsoline(so
, error
);
1586 goto disconnect_unlocked
;
1588 optlen
= conn_ind
->OPT_length
;
1589 switch (so
->so_family
) {
1592 if ((optlen
== sizeof (intptr_t)) && (sti
->sti_direct
!= 0)) {
1593 bcopy(mp
->b_rptr
+ conn_ind
->OPT_offset
,
1594 &opt
, conn_ind
->OPT_length
);
1597 * The transport (in this case TCP) hasn't sent up
1598 * a pointer to an instance for the accept fast-path.
1599 * Disable fast-path completely because the call to
1600 * sotpi_create() below would otherwise create an
1601 * incomplete TCP instance, which would lead to
1602 * problems when sockfs sends a normal T_CONN_RES
1603 * message down the new stream.
1605 if (sti
->sti_direct
) {
1608 * For consistency we inform tcp to disable
1609 * direct interface on the listener, though
1610 * we can certainly live without doing this
1611 * because no data will ever travel upstream
1612 * on the listening socket.
1614 sti
->sti_direct
= 0;
1615 (void) strioctl(SOTOV(so
), _SIOCSOCKFALLBACK
,
1616 0, 0, K_TO_K
, cr
, &rval
);
1625 opt
= sogetoff(mp
, conn_ind
->OPT_offset
, optlen
,
1630 eprintsoline(so
, error
);
1631 goto disconnect_unlocked
;
1634 if (so
->so_family
== AF_UNIX
) {
1635 if (!sti
->sti_faddr_noxlate
) {
1639 /* Extract src address from options */
1641 so_getopt_srcaddr(opt
, optlen
, &src
, &srclen
);
1647 * Create the new socket.
1649 nso
= socket_newconn(so
, NULL
, NULL
, SOCKET_SLEEP
, &error
);
1653 * Accept can not fail with ENOBUFS. sotpi_create
1654 * sleeps waiting for memory until a signal is caught
1658 if (error
== ENOBUFS
)
1663 nsti
= SOTOTPI(nso
);
1667 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus
1668 * it's inherited early to allow debugging of the accept code itself.
1670 nso
->so_options
|= so
->so_options
& SO_DEBUG
;
1674 * Save the SRC address from the T_CONN_IND
1675 * for getpeername to work on AF_UNIX and on transports that do not
1676 * support TI_GETPEERNAME.
1678 * NOTE: AF_UNIX NUL termination is ensured by the sender's
1681 if (srclen
> (t_uscalar_t
)nsti
->sti_faddr_maxlen
) {
1684 eprintsoline(so
, error
);
1685 goto disconnect_vp_unlocked
;
1687 nsti
->sti_faddr_len
= (socklen_t
)srclen
;
1688 ASSERT(sti
->sti_faddr_len
<= sti
->sti_faddr_maxlen
);
1689 bcopy(src
, nsti
->sti_faddr_sa
, srclen
);
1690 nsti
->sti_faddr_valid
= 1;
1693 * Record so_peercred and so_cpid from a cred in the T_CONN_IND.
1695 if ((DB_REF(mp
) > 1) || MBLKSIZE(mp
) <
1696 (sizeof (struct T_conn_res
) + sizeof (intptr_t))) {
1700 cr
= msg_getcred(mp
, &cpid
);
1703 nso
->so_peercred
= cr
;
1704 nso
->so_cpid
= cpid
;
1708 mp
= soallocproto1(NULL
, sizeof (struct T_conn_res
) +
1709 sizeof (intptr_t), 0, _ALLOC_INTR
, cr
);
1712 * Accept can not fail with ENOBUFS.
1713 * A signal was caught so return EINTR.
1716 eprintsoline(so
, error
);
1717 goto disconnect_vp_unlocked
;
1719 conn_res
= (struct T_conn_res
*)mp
->b_rptr
;
1722 * For efficency reasons we use msg_extractcred; no crhold
1723 * needed since db_credp is cleared (i.e., we move the cred
1724 * from the message to so_peercred.
1726 nso
->so_peercred
= msg_extractcred(mp
, &nso
->so_cpid
);
1728 mp
->b_rptr
= DB_BASE(mp
);
1729 conn_res
= (struct T_conn_res
*)mp
->b_rptr
;
1730 mp
->b_wptr
= mp
->b_rptr
+ sizeof (struct T_conn_res
);
1732 mblk_setcred(mp
, cr
, curproc
->p_pid
);
1736 * New socket must be bound at least in sockfs and, except for AF_INET,
1737 * (or AF_INET6) it also has to be bound in the transport provider.
1738 * We set the local address in the sonode from the T_OK_ACK of the
1739 * T_CONN_RES. For this reason the address we bind to here isn't
1742 if ((nso
->so_family
== AF_INET
|| nso
->so_family
== AF_INET6
) &&
1744 nso
->so_type
== SOCK_STREAM
&& !soaccept_tpi_tcp
) {
1746 * Optimization for AF_INET{,6} transports
1747 * that can handle a T_CONN_RES without being bound.
1749 mutex_enter(&nso
->so_lock
);
1750 so_automatic_bind(nso
);
1751 mutex_exit(&nso
->so_lock
);
1753 /* Perform NULL bind with the transport provider. */
1754 if ((error
= sotpi_bind(nso
, NULL
, 0, _SOBIND_UNSPEC
,
1756 ASSERT(error
!= ENOBUFS
);
1758 eprintsoline(nso
, error
);
1759 goto disconnect_vp_unlocked
;
1764 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES
1765 * so that any data arriving on the new socket will cause the
1766 * appropriate signals to be delivered for the new socket.
1768 * No other thread (except strsock_proto and strsock_misc)
1769 * can access the new socket thus we relax the locking.
1771 nso
->so_pgrp
= so
->so_pgrp
;
1772 nso
->so_state
|= so
->so_state
& SS_ASYNC
;
1773 nsti
->sti_faddr_noxlate
= sti
->sti_faddr_noxlate
;
1775 if (nso
->so_pgrp
!= 0) {
1776 if ((error
= so_set_events(nso
, nvp
, cr
)) != 0) {
1777 eprintsoline(nso
, error
);
1784 * Make note of the socket level options. TCP and IP level options
1785 * are already inherited. We could do all this after accept is
1786 * successful but doing it here simplifies code and no harm done
1789 nso
->so_options
= so
->so_options
& (SO_DEBUG
|SO_REUSEADDR
|SO_KEEPALIVE
|
1790 SO_DONTROUTE
|SO_BROADCAST
|SO_USELOOPBACK
|
1791 SO_OOBINLINE
|SO_DGRAM_ERRIND
|SO_LINGER
);
1792 nso
->so_sndbuf
= so
->so_sndbuf
;
1793 nso
->so_rcvbuf
= so
->so_rcvbuf
;
1794 if (nso
->so_options
& SO_LINGER
)
1795 nso
->so_linger
= so
->so_linger
;
1798 * Note that the following sti_direct code path should be
1799 * removed once we are confident that the direct sockets
1800 * do not result in any degradation.
1802 if (sti
->sti_direct
) {
1804 ASSERT(opt
!= NULL
);
1806 conn_res
->OPT_length
= optlen
;
1807 conn_res
->OPT_offset
= MBLKL(mp
);
1808 bcopy(&opt
, mp
->b_wptr
, optlen
);
1809 mp
->b_wptr
+= optlen
;
1810 conn_res
->PRIM_type
= T_CONN_RES
;
1811 conn_res
->ACCEPTOR_id
= 0;
1812 PRIM_type
= T_CONN_RES
;
1814 /* Send down the T_CONN_RES on acceptor STREAM */
1815 error
= kstrputmsg(SOTOV(nso
), mp
, NULL
,
1816 0, 0, MSG_BAND
|MSG_HOLDSIG
|MSG_IGNERROR
, 0);
1818 mutex_enter(&so
->so_lock
);
1820 eprintsoline(so
, error
);
1823 mutex_enter(&nso
->so_lock
);
1824 error
= sowaitprim(nso
, T_CONN_RES
, T_OK_ACK
,
1825 (t_uscalar_t
)sizeof (struct T_ok_ack
), &ack_mp
, 0);
1827 mutex_exit(&nso
->so_lock
);
1828 mutex_enter(&so
->so_lock
);
1830 eprintsoline(so
, error
);
1833 if (nso
->so_family
== AF_INET
) {
1836 sin
= (sin_t
*)(ack_mp
->b_rptr
+
1837 sizeof (struct T_ok_ack
));
1838 bcopy(sin
, nsti
->sti_laddr_sa
, sizeof (sin_t
));
1839 nsti
->sti_laddr_len
= sizeof (sin_t
);
1843 sin6
= (sin6_t
*)(ack_mp
->b_rptr
+
1844 sizeof (struct T_ok_ack
));
1845 bcopy(sin6
, nsti
->sti_laddr_sa
, sizeof (sin6_t
));
1846 nsti
->sti_laddr_len
= sizeof (sin6_t
);
1850 nso
->so_state
|= SS_ISCONNECTED
;
1851 nso
->so_proto_handle
= (sock_lower_handle_t
)opt
;
1852 nsti
->sti_laddr_valid
= 1;
1854 mutex_exit(&nso
->so_lock
);
1857 * It's possible, through the use of autopush for example,
1858 * that the acceptor stream may not support sti_direct
1859 * semantics. If the new socket does not support sti_direct
1860 * we issue a _SIOCSOCKFALLBACK to inform the transport
1861 * as we would in the I_PUSH case.
1863 if (nsti
->sti_direct
== 0) {
1866 if ((error
= strioctl(SOTOV(nso
), _SIOCSOCKFALLBACK
,
1867 0, 0, K_TO_K
, cr
, &rval
)) != 0) {
1868 mutex_enter(&so
->so_lock
);
1870 eprintsoline(so
, error
);
1876 * Pass out new socket.
1885 * This is the non-performance case for sockets (e.g. AF_UNIX sockets)
1886 * which don't support the FireEngine accept fast-path. It is also
1887 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd
1888 * again. Neither sockfs nor TCP attempt to find out if some other
1889 * random module has been inserted in between (in which case we
1890 * should follow TLI accept behaviour). We blindly assume the worst
1891 * case and revert back to old behaviour i.e. TCP will not send us
1892 * any option (eager) and the accept should happen on the listener
1893 * queue. Any queued T_conn_ind have already got their options removed
1894 * by so_sock2_stream() when "sockmod" was I_POP'd.
1897 * Fill in the {O_}T_CONN_RES before getting SOLOCKED.
1899 if ((nso
->so_mode
& SM_ACCEPTOR_ID
) == 0) {
1904 * Find read queue in driver
1905 * Can safely do this since we "own" nso/nvp.
1907 q
= strvp2wq(nvp
)->q_next
;
1911 conn_res
->ACCEPTOR_id
= (t_uscalar_t
)q
;
1913 conn_res
->ACCEPTOR_id
= (t_uscalar_t
)getminor(nvp
->v_rdev
);
1915 conn_res
->PRIM_type
= O_T_CONN_RES
;
1916 PRIM_type
= O_T_CONN_RES
;
1918 conn_res
->ACCEPTOR_id
= nsti
->sti_acceptor_id
;
1919 conn_res
->PRIM_type
= T_CONN_RES
;
1920 PRIM_type
= T_CONN_RES
;
1922 conn_res
->SEQ_number
= SEQ_number
;
1923 conn_res
->OPT_length
= 0;
1924 conn_res
->OPT_offset
= 0;
1926 mutex_enter(&so
->so_lock
);
1927 so_lock_single(so
); /* Set SOLOCKED */
1928 mutex_exit(&so
->so_lock
);
1930 error
= kstrputmsg(SOTOV(so
), mp
, NULL
,
1931 0, 0, MSG_BAND
|MSG_HOLDSIG
|MSG_IGNERROR
, 0);
1932 mutex_enter(&so
->so_lock
);
1934 eprintsoline(so
, error
);
1937 error
= sowaitprim(so
, PRIM_type
, T_OK_ACK
,
1938 (t_uscalar_t
)sizeof (struct T_ok_ack
), &ack_mp
, 0);
1940 eprintsoline(so
, error
);
1943 mutex_exit(&so
->so_lock
);
1945 * If there is a sin/sin6 appended onto the T_OK_ACK use
1946 * that to set the local address. If this is not present
1947 * then we zero out the address and don't set the
1948 * sti_laddr_valid bit. For AF_UNIX endpoints we copy over
1949 * the pathname from the listening socket.
1950 * In the case where this is TCP or an AF_UNIX socket the
1951 * client side may have queued data or a T_ORDREL in the
1952 * transport. Having now sent the T_CONN_RES we may receive
1953 * those queued messages at any time. Hold the acceptor
1954 * so_lock until its state and laddr are finalized.
1956 mutex_enter(&nso
->so_lock
);
1957 sinlen
= (nso
->so_family
== AF_INET
) ? sizeof (sin_t
) : sizeof (sin6_t
);
1958 if ((nso
->so_family
== AF_INET
) || (nso
->so_family
== AF_INET6
) &&
1959 MBLKL(ack_mp
) == (sizeof (struct T_ok_ack
) + sinlen
)) {
1960 ack_mp
->b_rptr
+= sizeof (struct T_ok_ack
);
1961 bcopy(ack_mp
->b_rptr
, nsti
->sti_laddr_sa
, sinlen
);
1962 nsti
->sti_laddr_len
= sinlen
;
1963 nsti
->sti_laddr_valid
= 1;
1964 } else if (nso
->so_family
== AF_UNIX
) {
1965 ASSERT(so
->so_family
== AF_UNIX
);
1966 nsti
->sti_laddr_len
= sti
->sti_laddr_len
;
1967 ASSERT(nsti
->sti_laddr_len
<= nsti
->sti_laddr_maxlen
);
1968 bcopy(sti
->sti_laddr_sa
, nsti
->sti_laddr_sa
,
1969 nsti
->sti_laddr_len
);
1970 nsti
->sti_laddr_valid
= 1;
1972 nsti
->sti_laddr_len
= sti
->sti_laddr_len
;
1973 ASSERT(nsti
->sti_laddr_len
<= nsti
->sti_laddr_maxlen
);
1974 bzero(nsti
->sti_laddr_sa
, nsti
->sti_addr_size
);
1975 nsti
->sti_laddr_sa
->sa_family
= nso
->so_family
;
1977 nso
->so_state
|= SS_ISCONNECTED
;
1978 mutex_exit(&nso
->so_lock
);
1982 mutex_enter(&so
->so_lock
);
1983 so_unlock_single(so
, SOLOCKED
);
1984 mutex_exit(&so
->so_lock
);
1987 * Pass out new socket.
1998 eprintsoline(so
, error
);
1999 goto disconnect_unlocked
;
2002 eprintsoline(so
, error
);
2003 disconnect_vp_unlocked
:
2004 (void) fop_close(nvp
, 0, 1, 0, cr
, NULL
);
2006 disconnect_unlocked
:
2007 (void) sodisconnect(so
, SEQ_number
, 0);
2011 eprintsoline(so
, error
);
2013 (void) sodisconnect(so
, SEQ_number
, _SODISCONNECT_LOCK_HELD
);
2014 so_unlock_single(so
, SOLOCKED
);
2015 mutex_exit(&so
->so_lock
);
2016 (void) fop_close(nvp
, 0, 1, 0, cr
, NULL
);
2020 conn_bad
: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */
2021 error
= (so
->so_type
== SOCK_DGRAM
|| so
->so_type
== SOCK_RAW
)
2022 ? EOPNOTSUPP
: EINVAL
;
2024 eprintsoline(so
, error
);
2031 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to
2032 * unconnect (by specifying a null address).
2035 sotpi_connect(struct sonode
*so
,
2036 struct sockaddr
*name
,
2042 struct T_conn_req conn_req
;
2049 boolean_t need_unlock
;
2050 sotpi_info_t
*sti
= SOTOTPI(so
);
2052 dprintso(so
, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n",
2053 (void *)so
, (void *)name
, namelen
, fflag
, flags
,
2054 pr_state(so
->so_state
, so
->so_mode
)));
2057 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to
2058 * avoid sleeping for memory with SOLOCKED held.
2059 * We know that the T_CONN_REQ can't be larger than 2 * sti_faddr_maxlen
2060 * + sizeof (struct T_opthdr).
2061 * (the AF_UNIX so_ux_addr_xlate() does not make the address
2062 * exceed sti_faddr_maxlen).
2064 mp
= soallocproto(sizeof (struct T_conn_req
) +
2065 2 * sti
->sti_faddr_maxlen
+ sizeof (struct T_opthdr
), _ALLOC_INTR
,
2069 * Connect can not fail with ENOBUFS. A signal was
2070 * caught so return EINTR.
2073 eprintsoline(so
, error
);
2077 mutex_enter(&so
->so_lock
);
2079 * Make sure there is a preallocated T_unbind_req message
2080 * before any binding. This message is allocated when the
2081 * socket is created. Since another thread can consume
2082 * so_unbind_mp by the time we return from so_lock_single(),
2083 * we should check the availability of so_unbind_mp after
2084 * we return from so_lock_single().
2087 so_lock_single(so
); /* Set SOLOCKED */
2088 need_unlock
= B_TRUE
;
2090 if (sti
->sti_unbind_mp
== NULL
) {
2091 dprintso(so
, 1, ("sotpi_connect: allocating unbind_req\n"));
2092 /* NOTE: holding so_lock while sleeping */
2093 sti
->sti_unbind_mp
=
2094 soallocproto(sizeof (struct T_unbind_req
), _ALLOC_INTR
, cr
);
2095 if (sti
->sti_unbind_mp
== NULL
) {
2102 * Can't have done a listen before connecting.
2104 if (so
->so_state
& SS_ACCEPTCONN
) {
2110 * Must be bound with the transport
2112 if (!(so
->so_state
& SS_ISBOUND
)) {
2113 if ((so
->so_family
== AF_INET
|| so
->so_family
== AF_INET6
) &&
2115 so
->so_type
== SOCK_STREAM
&& !soconnect_tpi_tcp
) {
2117 * Optimization for AF_INET{,6} transports
2118 * that can handle a T_CONN_REQ without being bound.
2120 so_automatic_bind(so
);
2122 error
= sotpi_bind(so
, NULL
, 0,
2123 _SOBIND_UNSPEC
|_SOBIND_LOCK_HELD
, cr
);
2127 ASSERT(so
->so_state
& SS_ISBOUND
);
2128 flags
|= _SOCONNECT_DID_BIND
;
2132 * Handle a connect to a name parameter of type AF_UNSPEC like a
2133 * connect to a null address. This is the portable method to
2134 * unconnect a socket.
2136 if ((namelen
>= sizeof (sa_family_t
)) &&
2137 (name
->sa_family
== AF_UNSPEC
)) {
2143 * Check that we are not already connected.
2144 * A connection-oriented socket cannot be reconnected.
2145 * A connected connection-less socket can be
2146 * - connected to a different address by a subsequent connect
2147 * - "unconnected" by a connect to the NULL address
2149 if (so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
)) {
2150 ASSERT(!(flags
& _SOCONNECT_DID_BIND
));
2151 if (so
->so_mode
& SM_CONNREQUIRED
) {
2152 /* Connection-oriented socket */
2153 error
= so
->so_state
& SS_ISCONNECTED
?
2157 /* Connection-less socket */
2160 * Remove the connected state and clear SO_DGRAM_ERRIND
2161 * since it was set when the socket was connected.
2162 * If this is UDP also send down a T_DISCON_REQ.
2166 if ((so
->so_family
== AF_INET
||
2167 so
->so_family
== AF_INET6
) &&
2168 (so
->so_type
== SOCK_DGRAM
||
2169 so
->so_type
== SOCK_RAW
) &&
2171 !soconnect_tpi_udp
) {
2172 /* XXX What about implicitly unbinding here? */
2173 error
= sodisconnect(so
, -1,
2174 _SODISCONNECT_LOCK_HELD
);
2177 ~(SS_ISCONNECTED
| SS_ISCONNECTING
);
2178 sti
->sti_faddr_valid
= 0;
2179 sti
->sti_faddr_len
= 0;
2182 /* Remove SOLOCKED since setsockopt will grab it */
2183 so_unlock_single(so
, SOLOCKED
);
2184 mutex_exit(&so
->so_lock
);
2187 (void) sotpi_setsockopt(so
, SOL_SOCKET
,
2188 SO_DGRAM_ERRIND
, &val
, (t_uscalar_t
)sizeof (val
),
2191 mutex_enter(&so
->so_lock
);
2192 so_lock_single(so
); /* Set SOLOCKED */
2196 ASSERT(so
->so_state
& SS_ISBOUND
);
2198 if (name
== NULL
|| namelen
== 0) {
2203 * Mark the socket if sti_faddr_sa represents the transport level
2206 if (flags
& _SOCONNECT_NOXLATE
) {
2207 struct sockaddr_ux
*soaddr_ux
;
2209 ASSERT(so
->so_family
== AF_UNIX
);
2210 if (namelen
!= sizeof (struct sockaddr_ux
)) {
2214 soaddr_ux
= (struct sockaddr_ux
*)name
;
2215 name
= (struct sockaddr
*)&soaddr_ux
->sou_addr
;
2216 namelen
= sizeof (soaddr_ux
->sou_addr
);
2217 sti
->sti_faddr_noxlate
= 1;
2221 * Length and family checks.
2223 error
= so_addr_verify(so
, name
, namelen
);
2228 * Save foreign address. Needed for AF_UNIX as well as
2229 * transport providers that do not support TI_GETPEERNAME.
2230 * Also used for cached foreign address for TCP and UDP.
2232 if (namelen
> (t_uscalar_t
)sti
->sti_faddr_maxlen
) {
2236 sti
->sti_faddr_len
= (socklen_t
)namelen
;
2237 ASSERT(sti
->sti_faddr_len
<= sti
->sti_faddr_maxlen
);
2238 bcopy(name
, sti
->sti_faddr_sa
, namelen
);
2239 sti
->sti_faddr_valid
= 1;
2241 if (so
->so_family
== AF_UNIX
) {
2242 if (sti
->sti_faddr_noxlate
) {
2244 * sti_faddr is a transport-level address, so
2245 * don't pass it as an option. Do save it in
2246 * sti_ux_faddr, used for connected DG send.
2250 addr
= sti
->sti_faddr_sa
;
2251 addrlen
= (t_uscalar_t
)sti
->sti_faddr_len
;
2252 bcopy(addr
, &sti
->sti_ux_faddr
,
2253 sizeof (sti
->sti_ux_faddr
));
2256 * Pass the sockaddr_un source address as an option
2257 * and translate the remote address.
2258 * Holding so_lock thus sti_laddr_sa can not change.
2260 src
= sti
->sti_laddr_sa
;
2261 srclen
= (t_uscalar_t
)sti
->sti_laddr_len
;
2263 ("sotpi_connect UNIX: srclen %d, src %p\n",
2266 * Translate the destination address into our
2267 * internal form, and save it in sti_ux_faddr.
2268 * After this call, addr==&sti->sti_ux_taddr,
2269 * and we copy that to sti->sti_ux_faddr so
2270 * we save the connected peer address.
2272 error
= so_ux_addr_xlate(so
,
2273 sti
->sti_faddr_sa
, (socklen_t
)sti
->sti_faddr_len
,
2277 bcopy(&sti
->sti_ux_taddr
, &sti
->sti_ux_faddr
,
2278 sizeof (sti
->sti_ux_faddr
));
2281 addr
= sti
->sti_faddr_sa
;
2282 addrlen
= (t_uscalar_t
)sti
->sti_faddr_len
;
2287 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND
2288 * option which asks the transport provider to send T_UDERR_IND
2289 * messages. These T_UDERR_IND messages are used to return connected
2290 * style errors (e.g. ECONNRESET) for connected datagram sockets.
2292 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets)
2293 * we send down a T_CONN_REQ. This is needed to let the
2294 * transport assign a local address that is consistent with
2295 * the remote address. Applications depend on a getsockname()
2296 * after a connect() to retrieve the "source" IP address for
2297 * the connected socket. Invalidate the cached local address
2298 * to force getsockname() to enquire of the transport.
2300 if (!(so
->so_mode
& SM_CONNREQUIRED
)) {
2306 so_unlock_single(so
, SOLOCKED
);
2307 mutex_exit(&so
->so_lock
);
2310 (void) sotpi_setsockopt(so
, SOL_SOCKET
, SO_DGRAM_ERRIND
,
2311 &val
, (t_uscalar_t
)sizeof (val
), cr
);
2313 mutex_enter(&so
->so_lock
);
2314 so_lock_single(so
); /* Set SOLOCKED */
2315 if ((so
->so_family
!= AF_INET
&& so
->so_family
!= AF_INET6
) ||
2316 (so
->so_type
!= SOCK_DGRAM
&& so
->so_type
!= SOCK_RAW
) ||
2317 soconnect_tpi_udp
) {
2322 * Send down T_CONN_REQ etc.
2323 * Clear fflag to avoid returning EWOULDBLOCK.
2326 ASSERT(so
->so_family
!= AF_UNIX
);
2327 sti
->sti_laddr_valid
= 0;
2328 } else if (sti
->sti_laddr_len
!= 0) {
2330 * If the local address or port was "any" then it may be
2331 * changed by the transport as a result of the
2332 * connect. Invalidate the cached version if we have one.
2334 switch (so
->so_family
) {
2336 ASSERT(sti
->sti_laddr_len
== (socklen_t
)sizeof (sin_t
));
2337 if (((sin_t
*)sti
->sti_laddr_sa
)->sin_addr
.s_addr
==
2339 ((sin_t
*)sti
->sti_laddr_sa
)->sin_port
== 0)
2340 sti
->sti_laddr_valid
= 0;
2344 ASSERT(sti
->sti_laddr_len
==
2345 (socklen_t
)sizeof (sin6_t
));
2346 if (IN6_IS_ADDR_UNSPECIFIED(
2347 &((sin6_t
*)sti
->sti_laddr_sa
) ->sin6_addr
) ||
2348 IN6_IS_ADDR_V4MAPPED_ANY(
2349 &((sin6_t
*)sti
->sti_laddr_sa
)->sin6_addr
) ||
2350 ((sin6_t
*)sti
->sti_laddr_sa
)->sin6_port
== 0)
2351 sti
->sti_laddr_valid
= 0;
2360 * Check for failure of an earlier call
2362 if (so
->so_error
!= 0)
2366 * Send down T_CONN_REQ. Message was allocated above.
2368 conn_req
.PRIM_type
= T_CONN_REQ
;
2369 conn_req
.DEST_length
= addrlen
;
2370 conn_req
.DEST_offset
= (t_scalar_t
)sizeof (conn_req
);
2372 conn_req
.OPT_length
= 0;
2373 conn_req
.OPT_offset
= 0;
2374 soappendmsg(mp
, &conn_req
, sizeof (conn_req
));
2375 soappendmsg(mp
, addr
, addrlen
);
2378 * There is a AF_UNIX sockaddr_un to include as a source
2381 struct T_opthdr toh
;
2383 toh
.level
= SOL_SOCKET
;
2384 toh
.name
= SO_SRCADDR
;
2385 toh
.len
= (t_uscalar_t
)(srclen
+ sizeof (struct T_opthdr
));
2387 conn_req
.OPT_length
=
2388 (t_scalar_t
)(sizeof (toh
) + _TPI_ALIGN_TOPT(srclen
));
2389 conn_req
.OPT_offset
= (t_scalar_t
)(sizeof (conn_req
) +
2390 _TPI_ALIGN_TOPT(addrlen
));
2392 soappendmsg(mp
, &conn_req
, sizeof (conn_req
));
2393 soappendmsg(mp
, addr
, addrlen
);
2394 mp
->b_wptr
+= _TPI_ALIGN_TOPT(addrlen
) - addrlen
;
2395 soappendmsg(mp
, &toh
, sizeof (toh
));
2396 soappendmsg(mp
, src
, srclen
);
2397 mp
->b_wptr
+= _TPI_ALIGN_TOPT(srclen
) - srclen
;
2398 ASSERT(mp
->b_wptr
<= mp
->b_datap
->db_lim
);
2401 * Set SS_ISCONNECTING before sending down the T_CONN_REQ
2402 * in order to have the right state when the T_CONN_CON shows up.
2405 mutex_exit(&so
->so_lock
);
2408 audit_sock(T_CONN_REQ
, strvp2wq(SOTOV(so
)), mp
, 0);
2410 error
= kstrputmsg(SOTOV(so
), mp
, NULL
, 0, 0,
2411 MSG_BAND
|MSG_HOLDSIG
|MSG_IGNERROR
, 0);
2413 mutex_enter(&so
->so_lock
);
2417 if ((error
= sowaitokack(so
, T_CONN_REQ
)) != 0)
2420 /* Allow other threads to access the socket */
2421 so_unlock_single(so
, SOLOCKED
);
2422 need_unlock
= B_FALSE
;
2425 * Wait until we get a T_CONN_CON or an error
2427 if ((error
= sowaitconnected(so
, fflag
, 0)) != 0) {
2428 so_lock_single(so
); /* Set SOLOCKED */
2429 need_unlock
= B_TRUE
;
2439 /* Non-fatal errors */
2440 sti
->sti_laddr_valid
= 0;
2445 ASSERT(need_unlock
);
2447 * Fatal errors: clear SS_ISCONNECTING in case it was set,
2448 * and invalidate local-address cache
2450 so
->so_state
&= ~SS_ISCONNECTING
;
2451 sti
->sti_laddr_valid
= 0;
2452 /* A discon_ind might have already unbound us */
2453 if ((flags
& _SOCONNECT_DID_BIND
) &&
2454 (so
->so_state
& SS_ISBOUND
)) {
2457 err
= sotpi_unbind(so
, 0);
2458 /* LINTED - statement has no conseq */
2460 eprintsoline(so
, err
);
2466 so_unlock_single(so
, SOLOCKED
);
2467 mutex_exit(&so
->so_lock
);
2470 so_bad
: error
= sogeterr(so
, B_TRUE
);
2471 bad
: eprintsoline(so
, error
);
2477 sotpi_shutdown(struct sonode
*so
, int how
, struct cred
*cr
)
2479 struct T_ordrel_req ordrel_req
;
2481 uint_t old_state
, state_change
;
2483 sotpi_info_t
*sti
= SOTOTPI(so
);
2485 dprintso(so
, 1, ("sotpi_shutdown(%p, %d) %s\n",
2486 (void *)so
, how
, pr_state(so
->so_state
, so
->so_mode
)));
2488 mutex_enter(&so
->so_lock
);
2489 so_lock_single(so
); /* Set SOLOCKED */
2492 * SunOS 4.X has no check for datagram sockets.
2493 * 5.X checks that it is connected (ENOTCONN)
2494 * X/Open requires that we check the connected state.
2496 if (!(so
->so_state
& SS_ISCONNECTED
)) {
2497 if (!xnet_skip_checks
) {
2499 if (xnet_check_print
) {
2500 printf("sockfs: X/Open shutdown check "
2501 "caused ENOTCONN\n");
2507 * Record the current state and then perform any state changes.
2508 * Then use the difference between the old and new states to
2509 * determine which messages need to be sent.
2510 * This prevents e.g. duplicate T_ORDREL_REQ when there are
2511 * duplicate calls to shutdown().
2513 old_state
= so
->so_state
;
2532 * Assumes that the SS_CANT* flags are never cleared in the above code.
2534 state_change
= (so
->so_state
& (SS_CANTRCVMORE
|SS_CANTSENDMORE
)) -
2535 (old_state
& (SS_CANTRCVMORE
|SS_CANTSENDMORE
));
2536 ASSERT((state_change
& ~(SS_CANTRCVMORE
|SS_CANTSENDMORE
)) == 0);
2538 switch (state_change
) {
2541 ("sotpi_shutdown: nothing to send in state 0x%x\n",
2545 case SS_CANTRCVMORE
:
2546 mutex_exit(&so
->so_lock
);
2547 strseteof(SOTOV(so
), 1);
2549 * strseteof takes care of read side wakeups,
2550 * pollwakeups, and signals.
2553 * Get the read lock before flushing data to avoid problems
2554 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg.
2556 mutex_enter(&so
->so_lock
);
2557 (void) so_lock_read(so
, 0); /* Set SOREADLOCKED */
2558 mutex_exit(&so
->so_lock
);
2560 /* Flush read side queue */
2561 strflushrq(SOTOV(so
), FLUSHALL
);
2563 mutex_enter(&so
->so_lock
);
2564 so_unlock_read(so
); /* Clear SOREADLOCKED */
2567 case SS_CANTSENDMORE
:
2568 mutex_exit(&so
->so_lock
);
2569 strsetwerror(SOTOV(so
), 0, 0, sogetwrerr
);
2570 mutex_enter(&so
->so_lock
);
2573 case SS_CANTSENDMORE
|SS_CANTRCVMORE
:
2574 mutex_exit(&so
->so_lock
);
2575 strsetwerror(SOTOV(so
), 0, 0, sogetwrerr
);
2576 strseteof(SOTOV(so
), 1);
2578 * strseteof takes care of read side wakeups,
2579 * pollwakeups, and signals.
2582 * Get the read lock before flushing data to avoid problems
2583 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg.
2585 mutex_enter(&so
->so_lock
);
2586 (void) so_lock_read(so
, 0); /* Set SOREADLOCKED */
2587 mutex_exit(&so
->so_lock
);
2589 /* Flush read side queue */
2590 strflushrq(SOTOV(so
), FLUSHALL
);
2592 mutex_enter(&so
->so_lock
);
2593 so_unlock_read(so
); /* Clear SOREADLOCKED */
2597 ASSERT(MUTEX_HELD(&so
->so_lock
));
2600 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them
2601 * was set due to this call and the new state has both of them set:
2602 * Send the AF_UNIX close indication
2603 * For T_COTS send a discon_ind
2605 * If cantsend was set due to this call:
2606 * For T_COTSORD send an ordrel_ind
2608 * Note that for T_CLTS there is no message sent here.
2610 if ((so
->so_state
& (SS_CANTRCVMORE
|SS_CANTSENDMORE
)) ==
2611 (SS_CANTRCVMORE
|SS_CANTSENDMORE
)) {
2613 * For SunOS 4.X compatibility we tell the other end
2614 * that we are unable to receive at this point.
2616 if (so
->so_family
== AF_UNIX
&& sti
->sti_serv_type
!= T_CLTS
)
2619 if (sti
->sti_serv_type
== T_COTS
)
2620 error
= sodisconnect(so
, -1, _SODISCONNECT_LOCK_HELD
);
2622 if ((state_change
& SS_CANTSENDMORE
) &&
2623 (sti
->sti_serv_type
== T_COTS_ORD
)) {
2624 /* Send an orderly release */
2625 ordrel_req
.PRIM_type
= T_ORDREL_REQ
;
2627 mutex_exit(&so
->so_lock
);
2628 mp
= soallocproto1(&ordrel_req
, sizeof (ordrel_req
),
2629 0, _ALLOC_SLEEP
, cr
);
2631 * Send down the T_ORDREL_REQ even if there is flow control.
2632 * This prevents shutdown from blocking.
2633 * Note that there is no T_OK_ACK for ordrel_req.
2635 error
= kstrputmsg(SOTOV(so
), mp
, NULL
, 0, 0,
2636 MSG_BAND
|MSG_HOLDSIG
|MSG_IGNERROR
|MSG_IGNFLOW
, 0);
2637 mutex_enter(&so
->so_lock
);
2639 eprintsoline(so
, error
);
2645 so_unlock_single(so
, SOLOCKED
);
2646 mutex_exit(&so
->so_lock
);
2651 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send
2652 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer
2653 * that we have closed.
2654 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length
2655 * T_UNITDATA_REQ containing the same option.
2657 * For SOCK_DGRAM half-connections (somebody connected to this end
2658 * but this end is not connect) we don't know where to send any
2661 * We have to ignore stream head errors just in case there has been
2662 * a shutdown(output).
2663 * Ignore any flow control to try to get the message more quickly to the peer.
2664 * While locally ignoring flow control solves the problem when there
2665 * is only the loopback transport on the stream it would not provide
2666 * the correct AF_UNIX socket semantics when one or more modules have
2670 so_unix_close(struct sonode
*so
)
2672 struct T_opthdr toh
;
2674 sotpi_info_t
*sti
= SOTOTPI(so
);
2676 ASSERT(MUTEX_HELD(&so
->so_lock
));
2678 ASSERT(so
->so_family
== AF_UNIX
);
2680 if ((so
->so_state
& (SS_ISCONNECTED
|SS_ISBOUND
)) !=
2681 (SS_ISCONNECTED
|SS_ISBOUND
))
2684 dprintso(so
, 1, ("so_unix_close(%p) %s\n",
2685 (void *)so
, pr_state(so
->so_state
, so
->so_mode
)));
2687 toh
.level
= SOL_SOCKET
;
2688 toh
.name
= SO_UNIX_CLOSE
;
2690 /* zero length + header */
2691 toh
.len
= (t_uscalar_t
)sizeof (struct T_opthdr
);
2694 if (so
->so_type
== SOCK_STREAM
|| so
->so_type
== SOCK_SEQPACKET
) {
2695 struct T_optdata_req tdr
;
2697 tdr
.PRIM_type
= T_OPTDATA_REQ
;
2700 tdr
.OPT_length
= (t_scalar_t
)sizeof (toh
);
2701 tdr
.OPT_offset
= (t_scalar_t
)sizeof (tdr
);
2703 /* NOTE: holding so_lock while sleeping */
2704 mp
= soallocproto2(&tdr
, sizeof (tdr
),
2705 &toh
, sizeof (toh
), 0, _ALLOC_SLEEP
, CRED());
2707 struct T_unitdata_req tudr
;
2712 struct T_opthdr toh2
;
2716 * We know this is an AF_UNIX connected DGRAM socket.
2717 * We therefore already have the destination address
2718 * in the internal form needed for this send. This is
2719 * similar to the sosend_dgram call later in this file
2720 * when there's no user-specified destination address.
2722 if (sti
->sti_faddr_noxlate
) {
2724 * Already have a transport internal address. Do not
2725 * pass any (transport internal) source address.
2727 addr
= sti
->sti_faddr_sa
;
2728 addrlen
= (t_uscalar_t
)sti
->sti_faddr_len
;
2733 * Pass the sockaddr_un source address as an option
2734 * and translate the remote address.
2735 * Holding so_lock thus sti_laddr_sa can not change.
2737 src
= sti
->sti_laddr_sa
;
2738 srclen
= (socklen_t
)sti
->sti_laddr_len
;
2740 ("so_ux_close: srclen %d, src %p\n",
2743 * Use the destination address saved in connect.
2745 addr
= &sti
->sti_ux_faddr
;
2746 addrlen
= sizeof (sti
->sti_ux_faddr
);
2748 tudr
.PRIM_type
= T_UNITDATA_REQ
;
2749 tudr
.DEST_length
= addrlen
;
2750 tudr
.DEST_offset
= (t_scalar_t
)sizeof (tudr
);
2752 tudr
.OPT_length
= (t_scalar_t
)sizeof (toh
);
2753 tudr
.OPT_offset
= (t_scalar_t
)(sizeof (tudr
) +
2754 _TPI_ALIGN_TOPT(addrlen
));
2756 size
= tudr
.OPT_offset
+ tudr
.OPT_length
;
2757 /* NOTE: holding so_lock while sleeping */
2758 mp
= soallocproto2(&tudr
, sizeof (tudr
),
2759 addr
, addrlen
, size
, _ALLOC_SLEEP
, CRED());
2760 mp
->b_wptr
+= (_TPI_ALIGN_TOPT(addrlen
) - addrlen
);
2761 soappendmsg(mp
, &toh
, sizeof (toh
));
2764 * There is a AF_UNIX sockaddr_un to include as a
2765 * source address option.
2767 tudr
.OPT_length
= (t_scalar_t
)(2 * sizeof (toh
) +
2768 _TPI_ALIGN_TOPT(srclen
));
2769 tudr
.OPT_offset
= (t_scalar_t
)(sizeof (tudr
) +
2770 _TPI_ALIGN_TOPT(addrlen
));
2772 toh2
.level
= SOL_SOCKET
;
2773 toh2
.name
= SO_SRCADDR
;
2774 toh2
.len
= (t_uscalar_t
)(srclen
+
2775 sizeof (struct T_opthdr
));
2778 size
= tudr
.OPT_offset
+ tudr
.OPT_length
;
2780 /* NOTE: holding so_lock while sleeping */
2781 mp
= soallocproto2(&tudr
, sizeof (tudr
),
2782 addr
, addrlen
, size
, _ALLOC_SLEEP
, CRED());
2783 mp
->b_wptr
+= _TPI_ALIGN_TOPT(addrlen
) - addrlen
;
2784 soappendmsg(mp
, &toh
, sizeof (toh
));
2785 soappendmsg(mp
, &toh2
, sizeof (toh2
));
2786 soappendmsg(mp
, src
, srclen
);
2787 mp
->b_wptr
+= _TPI_ALIGN_TOPT(srclen
) - srclen
;
2789 ASSERT(mp
->b_wptr
<= mp
->b_datap
->db_lim
);
2791 mutex_exit(&so
->so_lock
);
2792 (void) kstrputmsg(SOTOV(so
), mp
, NULL
, 0, 0,
2793 MSG_BAND
|MSG_HOLDSIG
|MSG_IGNERROR
|MSG_IGNFLOW
, 0);
2794 mutex_enter(&so
->so_lock
);
2798 * Called by sotpi_recvmsg when reading a non-zero amount of data.
2799 * In addition, the caller typically verifies that there is some
2800 * potential state to clear by checking
2801 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK))
2802 * before calling this routine.
2803 * Note that such a check can be made without holding so_lock since
2804 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg
2805 * decrements sti_oobsigcnt.
2807 * When data is read *after* the point that all pending
2808 * oob data has been consumed the oob indication is cleared.
2810 * This logic keeps select/poll returning POLLRDBAND and
2811 * SIOCATMARK returning true until we have read past
2815 sorecv_update_oobstate(struct sonode
*so
)
2817 sotpi_info_t
*sti
= SOTOTPI(so
);
2819 mutex_enter(&so
->so_lock
);
2820 ASSERT(so_verify_oobstate(so
));
2822 ("sorecv_update_oobstate: counts %d/%d state %s\n",
2824 sti
->sti_oobcnt
, pr_state(so
->so_state
, so
->so_mode
)));
2825 if (sti
->sti_oobsigcnt
== 0) {
2826 /* No more pending oob indications */
2827 so
->so_state
&= ~(SS_OOBPEND
|SS_HAVEOOBDATA
|SS_RCVATMARK
);
2828 freemsg(so
->so_oobmsg
);
2829 so
->so_oobmsg
= NULL
;
2831 ASSERT(so_verify_oobstate(so
));
2832 mutex_exit(&so
->so_lock
);
2836 * Receive the next message on the queue.
2837 * If msg_controllen is non-zero when called the caller is interested in
2838 * any received control info (options).
2839 * If msg_namelen is non-zero when called the caller is interested in
2840 * any received source address.
2841 * The routine returns with msg_control and msg_name pointing to
2842 * kmem_alloc'ed memory which the caller has to free.
2846 sotpi_recvmsg(struct sonode
*so
, struct msghdr
*msg
, struct uio
*uiop
,
2849 union T_primitives
*tpr
;
2854 t_uscalar_t controllen
;
2855 t_uscalar_t namelen
;
2856 int so_state
= so
->so_state
; /* Snapshot */
2857 ssize_t saved_resid
;
2862 sotpi_info_t
*sti
= SOTOTPI(so
);
2864 flags
= msg
->msg_flags
;
2867 dprintso(so
, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n",
2868 (void *)so
, (void *)msg
, flags
,
2869 pr_state(so
->so_state
, so
->so_mode
), so
->so_error
));
2871 if (so
->so_is_stream
) {
2872 so_update_attrs(so
, SOACC
);
2873 /* The imaginary "sockmod" has been popped - act as a stream */
2874 return (strread(SOTOV(so
), uiop
, cr
));
2878 * If we are not connected because we have never been connected
2879 * we return ENOTCONN. If we have been connected (but are no longer
2880 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return
2883 * An alternative would be to post an ENOTCONN error in stream head
2884 * (read+write) and clear it when we're connected. However, that error
2885 * would cause incorrect poll/select behavior!
2887 if ((so_state
& (SS_ISCONNECTED
|SS_CANTRCVMORE
)) == 0 &&
2888 (so
->so_mode
& SM_CONNREQUIRED
)) {
2893 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but
2894 * after checking that the read queue is empty) and returns zero.
2895 * This implementation will sleep (in kstrgetmsg) even if uio_resid
2899 if (flags
& MSG_OOB
) {
2900 /* Check that the transport supports OOB */
2901 if (!(so
->so_mode
& SM_EXDATA
))
2902 return (EOPNOTSUPP
);
2903 so_update_attrs(so
, SOACC
);
2904 return (sorecvoob(so
, msg
, uiop
, flags
,
2905 (so
->so_options
& SO_OOBINLINE
)));
2908 so_update_attrs(so
, SOACC
);
2911 * Set msg_controllen and msg_namelen to zero here to make it
2912 * simpler in the cases that no control or name is returned.
2914 controllen
= msg
->msg_controllen
;
2915 namelen
= msg
->msg_namelen
;
2916 msg
->msg_controllen
= 0;
2917 msg
->msg_namelen
= 0;
2919 dprintso(so
, 1, ("sotpi_recvmsg: namelen %d controllen %d\n",
2920 namelen
, controllen
));
2922 mutex_enter(&so
->so_lock
);
2924 * Only one reader is allowed at any given time. This is needed
2925 * for T_EXDATA handling and, in the future, MSG_WAITALL.
2927 * This is slightly different that BSD behavior in that it fails with
2928 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access
2929 * is single-threaded using sblock(), which is dropped while waiting
2930 * for data to appear. The difference shows up e.g. if one
2931 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor
2932 * does use nonblocking io and different threads are reading each
2933 * file descriptor. In BSD there would never be an EWOULDBLOCK error
2934 * in this case as long as the read queue doesn't get empty.
2935 * In this implementation the thread using nonblocking io can
2936 * get an EWOULDBLOCK error due to the blocking thread executing
2937 * e.g. in the uiomove in kstrgetmsg.
2938 * This difference is not believed to be significant.
2940 /* Set SOREADLOCKED */
2941 error
= so_lock_read_intr(so
,
2942 uiop
->uio_fmode
| ((flags
& MSG_DONTWAIT
) ? FNONBLOCK
: 0));
2943 mutex_exit(&so
->so_lock
);
2948 * Tell kstrgetmsg to not inspect the stream head errors until all
2949 * queued data has been consumed.
2950 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set.
2951 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block.
2953 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and
2954 * to T_OPTDATA_IND that do not contain any user-visible control msg.
2955 * Note that MSG_WAITALL set with MSG_PEEK is a noop.
2957 pflag
= MSG_ANY
| MSG_DELAYERROR
;
2958 if (flags
& MSG_PEEK
) {
2960 flags
&= ~MSG_WAITALL
;
2962 if (so
->so_mode
& SM_ATOMIC
)
2963 pflag
|= MSG_DISCARDTAIL
;
2965 if (flags
& MSG_DONTWAIT
)
2967 else if (so
->so_rcvtimeo
!= 0)
2968 timout
= TICK_TO_MSEC(so
->so_rcvtimeo
);
2973 saved_resid
= uiop
->uio_resid
;
2976 error
= kstrgetmsg(SOTOV(so
), &mp
, uiop
, &pri
, &pflag
, timout
, &rval
);
2978 /* kstrgetmsg returns ETIME when timeout expires */
2980 error
= EWOULDBLOCK
;
2984 * For datagrams the MOREDATA flag is used to set MSG_TRUNC.
2985 * For non-datagrams MOREDATA is used to set MSG_EOR.
2987 ASSERT(!(rval
.r_val1
& MORECTL
));
2988 if ((rval
.r_val1
& MOREDATA
) && (so
->so_mode
& SM_ATOMIC
))
2989 msg
->msg_flags
|= MSG_TRUNC
;
2992 dprintso(so
, 1, ("sotpi_recvmsg: got M_DATA\n"));
2994 * 4.3BSD and 4.4BSD clears the mark when peeking across it.
2995 * The draft Posix socket spec states that the mark should
2996 * not be cleared when peeking. We follow the latter.
2999 (SS_OOBPEND
|SS_HAVEOOBDATA
|SS_RCVATMARK
)) &&
3000 (uiop
->uio_resid
!= saved_resid
) &&
3001 !(flags
& MSG_PEEK
)) {
3002 sorecv_update_oobstate(so
);
3005 mutex_enter(&so
->so_lock
);
3006 /* Set MSG_EOR based on MOREDATA */
3007 if (!(rval
.r_val1
& MOREDATA
)) {
3008 if (so
->so_state
& SS_SAVEDEOR
) {
3009 msg
->msg_flags
|= MSG_EOR
;
3010 so
->so_state
&= ~SS_SAVEDEOR
;
3014 * If some data was received (i.e. not EOF) and the
3015 * read/recv* has not been satisfied wait for some more.
3017 if ((flags
& MSG_WAITALL
) && !(msg
->msg_flags
& MSG_EOR
) &&
3018 uiop
->uio_resid
!= saved_resid
&& uiop
->uio_resid
> 0) {
3019 mutex_exit(&so
->so_lock
);
3020 pflag
= opflag
| MSG_NOMARK
;
3026 /* strsock_proto has already verified length and alignment */
3027 tpr
= (union T_primitives
*)mp
->b_rptr
;
3028 dprintso(so
, 1, ("sotpi_recvmsg: type %d\n", tpr
->type
));
3030 switch (tpr
->type
) {
3033 (SS_OOBPEND
|SS_HAVEOOBDATA
|SS_RCVATMARK
)) &&
3034 (uiop
->uio_resid
!= saved_resid
) &&
3035 !(flags
& MSG_PEEK
)) {
3036 sorecv_update_oobstate(so
);
3040 * Set msg_flags to MSG_EOR based on
3041 * MORE_flag and MOREDATA.
3043 mutex_enter(&so
->so_lock
);
3044 so
->so_state
&= ~SS_SAVEDEOR
;
3045 if (!(tpr
->data_ind
.MORE_flag
& 1)) {
3046 if (!(rval
.r_val1
& MOREDATA
))
3047 msg
->msg_flags
|= MSG_EOR
;
3049 so
->so_state
|= SS_SAVEDEOR
;
3053 * If some data was received (i.e. not EOF) and the
3054 * read/recv* has not been satisfied wait for some more.
3056 if ((flags
& MSG_WAITALL
) && !(msg
->msg_flags
& MSG_EOR
) &&
3057 uiop
->uio_resid
!= saved_resid
&& uiop
->uio_resid
> 0) {
3058 mutex_exit(&so
->so_lock
);
3059 pflag
= opflag
| MSG_NOMARK
;
3064 case T_UNITDATA_IND
: {
3066 t_uscalar_t addrlen
;
3072 (SS_OOBPEND
|SS_HAVEOOBDATA
|SS_RCVATMARK
)) &&
3073 (uiop
->uio_resid
!= saved_resid
) &&
3074 !(flags
& MSG_PEEK
)) {
3075 sorecv_update_oobstate(so
);
3079 /* Caller wants source address */
3080 addrlen
= tpr
->unitdata_ind
.SRC_length
;
3082 tpr
->unitdata_ind
.SRC_offset
,
3087 eprintsoline(so
, error
);
3090 if (so
->so_family
== AF_UNIX
) {
3092 * Can not use the transport level address.
3093 * If there is a SO_SRCADDR option carrying
3094 * the socket level address it will be
3101 optlen
= tpr
->unitdata_ind
.OPT_length
;
3103 t_uscalar_t ncontrollen
;
3106 * Extract any source address option.
3107 * Determine how large cmsg buffer is needed.
3110 tpr
->unitdata_ind
.OPT_offset
,
3111 optlen
, __TPI_ALIGN_SIZE
);
3116 eprintsoline(so
, error
);
3119 if (so
->so_family
== AF_UNIX
)
3120 so_getopt_srcaddr(opt
, optlen
, &addr
, &addrlen
);
3121 ncontrollen
= so_cmsglen(mp
, opt
, optlen
);
3122 if (controllen
!= 0)
3123 controllen
= ncontrollen
;
3124 else if (ncontrollen
!= 0)
3125 msg
->msg_flags
|= MSG_CTRUNC
;
3132 * Return address to caller.
3133 * Caller handles truncation if length
3134 * exceeds msg_namelen.
3135 * NOTE: AF_UNIX NUL termination is ensured by
3136 * the sender's copyin_name().
3138 abuf
= kmem_alloc(addrlen
, KM_SLEEP
);
3140 bcopy(addr
, abuf
, addrlen
);
3141 msg
->msg_name
= abuf
;
3142 msg
->msg_namelen
= addrlen
;
3145 if (controllen
!= 0) {
3147 * Return control msg to caller.
3148 * Caller handles truncation if length
3149 * exceeds msg_controllen.
3151 control
= kmem_zalloc(controllen
, KM_SLEEP
);
3153 error
= so_opt2cmsg(mp
, opt
, optlen
, control
,
3157 if (msg
->msg_namelen
!= 0)
3158 kmem_free(msg
->msg_name
,
3160 kmem_free(control
, controllen
);
3161 eprintsoline(so
, error
);
3164 msg
->msg_control
= control
;
3165 msg
->msg_controllen
= controllen
;
3171 case T_OPTDATA_IND
: {
3172 struct T_optdata_req
*tdr
;
3177 (SS_OOBPEND
|SS_HAVEOOBDATA
|SS_RCVATMARK
)) &&
3178 (uiop
->uio_resid
!= saved_resid
) &&
3179 !(flags
& MSG_PEEK
)) {
3180 sorecv_update_oobstate(so
);
3183 tdr
= (struct T_optdata_req
*)mp
->b_rptr
;
3184 optlen
= tdr
->OPT_length
;
3186 t_uscalar_t ncontrollen
;
3188 * Determine how large cmsg buffer is needed.
3191 tpr
->optdata_ind
.OPT_offset
,
3192 optlen
, __TPI_ALIGN_SIZE
);
3197 eprintsoline(so
, error
);
3201 ncontrollen
= so_cmsglen(mp
, opt
, optlen
);
3202 if (controllen
!= 0)
3203 controllen
= ncontrollen
;
3204 else if (ncontrollen
!= 0)
3205 msg
->msg_flags
|= MSG_CTRUNC
;
3210 if (controllen
!= 0) {
3212 * Return control msg to caller.
3213 * Caller handles truncation if length
3214 * exceeds msg_controllen.
3216 control
= kmem_zalloc(controllen
, KM_SLEEP
);
3218 error
= so_opt2cmsg(mp
, opt
, optlen
, control
,
3222 kmem_free(control
, controllen
);
3223 eprintsoline(so
, error
);
3226 msg
->msg_control
= control
;
3227 msg
->msg_controllen
= controllen
;
3231 * Set msg_flags to MSG_EOR based on
3232 * DATA_flag and MOREDATA.
3234 mutex_enter(&so
->so_lock
);
3235 so
->so_state
&= ~SS_SAVEDEOR
;
3236 if (!(tpr
->data_ind
.MORE_flag
& 1)) {
3237 if (!(rval
.r_val1
& MOREDATA
))
3238 msg
->msg_flags
|= MSG_EOR
;
3240 so
->so_state
|= SS_SAVEDEOR
;
3244 * If some data was received (i.e. not EOF) and the
3245 * read/recv* has not been satisfied wait for some more.
3246 * Not possible to wait if control info was received.
3248 if ((flags
& MSG_WAITALL
) && !(msg
->msg_flags
& MSG_EOR
) &&
3250 uiop
->uio_resid
!= saved_resid
&& uiop
->uio_resid
> 0) {
3251 mutex_exit(&so
->so_lock
);
3252 pflag
= opflag
| MSG_NOMARK
;
3257 case T_EXDATA_IND
: {
3259 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld "
3261 sti
->sti_oobsigcnt
, sti
->sti_oobcnt
,
3262 saved_resid
- uiop
->uio_resid
,
3263 pr_state(so
->so_state
, so
->so_mode
)));
3265 * kstrgetmsg handles MSGMARK so there is nothing to
3266 * inspect in the T_EXDATA_IND.
3267 * strsock_proto makes the stream head queue the T_EXDATA_IND
3268 * as a separate message with no M_DATA component. Furthermore,
3269 * the stream head does not consolidate M_DATA messages onto
3270 * an MSGMARK'ed message ensuring that the T_EXDATA_IND
3271 * remains a message by itself. This is needed since MSGMARK
3272 * marks both the whole message as well as the last byte
3276 ASSERT(uiop
->uio_resid
== saved_resid
); /* No data */
3277 if (flags
& MSG_PEEK
) {
3279 * Even though we are peeking we consume the
3280 * T_EXDATA_IND thereby moving the mark information
3281 * to SS_RCVATMARK. Then the oob code below will
3282 * retry the peeking kstrgetmsg.
3283 * Note that the stream head read queue is
3284 * never flushed without holding SOREADLOCKED
3285 * thus the T_EXDATA_IND can not disappear
3289 ("sotpi_recvmsg: consume EXDATA_IND "
3290 "counts %d/%d state %s\n",
3293 pr_state(so
->so_state
, so
->so_mode
)));
3295 pflag
= MSG_ANY
| MSG_DELAYERROR
;
3296 if (so
->so_mode
& SM_ATOMIC
)
3297 pflag
|= MSG_DISCARDTAIL
;
3302 error
= kstrgetmsg(SOTOV(so
), &mp
, uiop
,
3303 &pri
, &pflag
, (clock_t)-1, &rval
);
3304 ASSERT(uiop
->uio_resid
== saved_resid
);
3308 if (error
!= EWOULDBLOCK
&& error
!= EINTR
) {
3309 eprintsoline(so
, error
);
3311 #endif /* SOCK_DEBUG */
3315 tpr
= (union T_primitives
*)mp
->b_rptr
;
3316 ASSERT(tpr
->type
== T_EXDATA_IND
);
3318 } /* end "if (flags & MSG_PEEK)" */
3321 * Decrement the number of queued and pending oob.
3323 * SS_RCVATMARK is cleared when we read past a mark.
3324 * SS_HAVEOOBDATA is cleared when we've read past the
3326 * SS_OOBPEND is cleared if we've read past the last
3327 * mark and no (new) SIGURG has been posted.
3329 mutex_enter(&so
->so_lock
);
3330 ASSERT(so_verify_oobstate(so
));
3331 ASSERT(sti
->sti_oobsigcnt
>= sti
->sti_oobcnt
);
3332 ASSERT(sti
->sti_oobsigcnt
> 0);
3333 sti
->sti_oobsigcnt
--;
3334 ASSERT(sti
->sti_oobcnt
> 0);
3337 * Since the T_EXDATA_IND has been removed from the stream
3338 * head, but we have not read data past the mark,
3339 * sockfs needs to track that the socket is still at the mark.
3341 * Since no data was received call kstrgetmsg again to wait
3344 so
->so_state
|= SS_RCVATMARK
;
3345 mutex_exit(&so
->so_lock
);
3347 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n",
3348 sti
->sti_oobsigcnt
, sti
->sti_oobcnt
,
3349 pr_state(so
->so_state
, so
->so_mode
)));
3354 cmn_err(CE_CONT
, "sotpi_recvmsg: so %p prim %d mp %p\n",
3355 (void *)so
, tpr
->type
, (void *)mp
);
3359 eprintsoline(so
, error
);
3364 mutex_enter(&so
->so_lock
);
3366 so_unlock_read(so
); /* Clear SOREADLOCKED */
3367 mutex_exit(&so
->so_lock
);
3372 * Sending data with options on a datagram socket.
3373 * Assumes caller has verified that SS_ISBOUND etc. are set.
3375 * For AF_UNIX the destination address may be already in
3376 * internal form, as indicated by sti->sti_faddr_noxlate
3377 * or the MSG_SENDTO_NOXLATE flag. Otherwise we need to
3378 * translate the destination address to internal form.
3380 * The source address is passed as an option. If passing
3381 * file descriptors, those are passed as file pointers in
3385 sosend_dgramcmsg(struct sonode
*so
, struct sockaddr
*name
, socklen_t namelen
,
3386 struct uio
*uiop
, void *control
, t_uscalar_t controllen
, int flags
)
3388 struct T_unitdata_req tudr
;
3397 struct T_opthdr toh
;
3398 struct fdbuf
*fdbuf
;
3402 sotpi_info_t
*sti
= SOTOTPI(so
);
3404 ASSERT(name
&& namelen
);
3405 ASSERT(control
&& controllen
);
3407 len
= uiop
->uio_resid
;
3408 if (len
> (ssize_t
)sti
->sti_tidu_size
) {
3412 if (sti
->sti_faddr_noxlate
== 0 &&
3413 (flags
& MSG_SENDTO_NOXLATE
) == 0) {
3415 * Length and family checks.
3416 * Don't verify internal form.
3418 error
= so_addr_verify(so
, name
, namelen
);
3420 eprintsoline(so
, error
);
3425 if (so
->so_family
== AF_UNIX
) {
3426 if (sti
->sti_faddr_noxlate
) {
3428 * Already have a transport internal address. Do not
3429 * pass any (transport internal) source address.
3435 } else if (flags
& MSG_SENDTO_NOXLATE
) {
3437 * Have an internal form dest. address.
3438 * Pass the source address as usual.
3442 src
= sti
->sti_laddr_sa
;
3443 srclen
= (socklen_t
)sti
->sti_laddr_len
;
3446 * Pass the sockaddr_un source address as an option
3447 * and translate the remote address.
3449 * Note that this code does not prevent sti_laddr_sa
3450 * from changing while it is being used. Thus
3451 * if an unbind+bind occurs concurrently with this
3452 * send the peer might see a partially new and a
3453 * partially old "from" address.
3455 src
= sti
->sti_laddr_sa
;
3456 srclen
= (socklen_t
)sti
->sti_laddr_len
;
3458 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n",
3461 * The sendmsg caller specified a destination
3462 * address, which we must translate into our
3463 * internal form. addr = &sti->sti_ux_taddr
3465 error
= so_ux_addr_xlate(so
, name
, namelen
,
3468 eprintsoline(so
, error
);
3478 optlen
= so_optlen(control
, controllen
);
3479 tudr
.PRIM_type
= T_UNITDATA_REQ
;
3480 tudr
.DEST_length
= addrlen
;
3481 tudr
.DEST_offset
= (t_scalar_t
)sizeof (tudr
);
3483 tudr
.OPT_length
= (t_scalar_t
)(optlen
+ sizeof (toh
) +
3484 _TPI_ALIGN_TOPT(srclen
));
3486 tudr
.OPT_length
= optlen
;
3487 tudr
.OPT_offset
= (t_scalar_t
)(sizeof (tudr
) +
3488 _TPI_ALIGN_TOPT(addrlen
));
3490 size
= tudr
.OPT_offset
+ tudr
.OPT_length
;
3493 * File descriptors only when SM_FDPASSING set.
3495 error
= so_getfdopt(control
, controllen
, &fds
, &fdlen
);
3499 if (!(so
->so_mode
& SM_FDPASSING
))
3500 return (EOPNOTSUPP
);
3502 error
= fdbuf_create(fds
, fdlen
, &fdbuf
);
3507 * Pre-allocate enough additional space for lower level modules
3508 * to append an option (e.g. see tl_unitdata). The following
3509 * is enough extra space for the largest option we might append.
3511 size
+= sizeof (struct T_opthdr
) + ucredsize
;
3512 mp
= fdbuf_allocmsg(size
, fdbuf
);
3514 mp
= soallocproto(size
, _ALLOC_INTR
, CRED());
3517 * Caught a signal waiting for memory.
3518 * Let send* return EINTR.
3523 soappendmsg(mp
, &tudr
, sizeof (tudr
));
3524 soappendmsg(mp
, addr
, addrlen
);
3525 mp
->b_wptr
+= _TPI_ALIGN_TOPT(addrlen
) - addrlen
;
3528 ASSERT(fdbuf
!= NULL
);
3529 toh
.level
= SOL_SOCKET
;
3530 toh
.name
= SO_FILEP
;
3531 toh
.len
= fdbuf
->fd_size
+
3532 (t_uscalar_t
)sizeof (struct T_opthdr
);
3534 soappendmsg(mp
, &toh
, sizeof (toh
));
3535 soappendmsg(mp
, fdbuf
, fdbuf
->fd_size
);
3536 ASSERT(__TPI_TOPT_ISALIGNED(mp
->b_wptr
));
3540 * There is a AF_UNIX sockaddr_un to include as a source
3543 toh
.level
= SOL_SOCKET
;
3544 toh
.name
= SO_SRCADDR
;
3545 toh
.len
= (t_uscalar_t
)(srclen
+ sizeof (struct T_opthdr
));
3547 soappendmsg(mp
, &toh
, sizeof (toh
));
3548 soappendmsg(mp
, src
, srclen
);
3549 mp
->b_wptr
+= _TPI_ALIGN_TOPT(srclen
) - srclen
;
3550 ASSERT(__TPI_TOPT_ISALIGNED(mp
->b_wptr
));
3552 ASSERT(mp
->b_wptr
<= mp
->b_datap
->db_lim
);
3553 so_cmsg2opt(control
, controllen
, mp
);
3555 * Normally at most 3 bytes left in the message, but we might have
3556 * allowed for extra space if we're passing fd's through.
3558 ASSERT(MBLKL(mp
) <= (ssize_t
)size
);
3560 ASSERT(mp
->b_wptr
<= mp
->b_datap
->db_lim
);
3562 audit_sock(T_UNITDATA_REQ
, strvp2wq(SOTOV(so
)), mp
, 0);
3564 error
= kstrputmsg(SOTOV(so
), mp
, uiop
, len
, 0, MSG_BAND
, 0);
3567 eprintsoline(so
, error
);
3569 #endif /* SOCK_DEBUG */
3574 * Sending data with options on a connected stream socket.
3575 * Assumes caller has verified that SS_ISCONNECTED is set.
3578 sosend_svccmsg(struct sonode
*so
, struct uio
*uiop
, int more
, void *control
,
3579 t_uscalar_t controllen
, int flags
)
3581 struct T_optdata_req tdr
;
3586 struct fdbuf
*fdbuf
;
3590 struct T_opthdr toh
;
3591 sotpi_info_t
*sti
= SOTOTPI(so
);
3594 ("sosend_svccmsg: resid %ld bytes\n", uiop
->uio_resid
));
3597 * Has to be bound and connected. However, since no locks are
3598 * held the state could have changed after sotpi_sendmsg checked it
3599 * thus it is not possible to ASSERT on the state.
3602 /* Options on connection-oriented only when SM_OPTDATA set. */
3603 if (!(so
->so_mode
& SM_OPTDATA
))
3604 return (EOPNOTSUPP
);
3608 * Set the MORE flag if uio_resid does not fit in this
3609 * message or if the caller passed in "more".
3610 * Error for transports with zero tidu_size.
3612 tdr
.PRIM_type
= T_OPTDATA_REQ
;
3613 iosize
= sti
->sti_tidu_size
;
3616 if (uiop
->uio_resid
> iosize
) {
3623 iosize
= uiop
->uio_resid
;
3625 dprintso(so
, 1, ("sosend_svccmsg: sending %d, %ld bytes\n",
3626 tdr
.DATA_flag
, iosize
));
3628 optlen
= so_optlen(control
, controllen
);
3629 tdr
.OPT_length
= optlen
;
3630 tdr
.OPT_offset
= (t_scalar_t
)sizeof (tdr
);
3632 size
= (int)sizeof (tdr
) + optlen
;
3634 * File descriptors only when SM_FDPASSING set.
3636 error
= so_getfdopt(control
, controllen
, &fds
, &fdlen
);
3640 if (!(so
->so_mode
& SM_FDPASSING
))
3641 return (EOPNOTSUPP
);
3643 error
= fdbuf_create(fds
, fdlen
, &fdbuf
);
3648 * Pre-allocate enough additional space for lower level
3649 * modules to append an option (e.g. see tl_unitdata).
3650 * The following is enough extra space for the largest
3651 * option we might append.
3653 size
+= sizeof (struct T_opthdr
) + ucredsize
;
3654 mp
= fdbuf_allocmsg(size
, fdbuf
);
3656 mp
= soallocproto(size
, _ALLOC_INTR
, CRED());
3659 * Caught a signal waiting for memory.
3660 * Let send* return EINTR.
3665 soappendmsg(mp
, &tdr
, sizeof (tdr
));
3668 ASSERT(fdbuf
!= NULL
);
3669 toh
.level
= SOL_SOCKET
;
3670 toh
.name
= SO_FILEP
;
3671 toh
.len
= fdbuf
->fd_size
+
3672 (t_uscalar_t
)sizeof (struct T_opthdr
);
3674 soappendmsg(mp
, &toh
, sizeof (toh
));
3675 soappendmsg(mp
, fdbuf
, fdbuf
->fd_size
);
3676 ASSERT(__TPI_TOPT_ISALIGNED(mp
->b_wptr
));
3678 so_cmsg2opt(control
, controllen
, mp
);
3680 * Normally at most 3 bytes left in the message, but we might
3681 * have allowed for extra space if we're passing fd's through.
3683 ASSERT(MBLKL(mp
) <= (ssize_t
)size
);
3685 ASSERT(mp
->b_wptr
<= mp
->b_datap
->db_lim
);
3687 error
= kstrputmsg(SOTOV(so
), mp
, uiop
, iosize
,
3690 eprintsoline(so
, error
);
3694 if (uiop
->uio_resid
> 0) {
3696 * Recheck for fatal errors. Fail write even though
3697 * some data have been written. This is consistent
3698 * with strwrite semantics and BSD sockets semantics.
3700 if (so
->so_state
& SS_CANTSENDMORE
) {
3701 eprintsoline(so
, error
);
3704 if (so
->so_error
!= 0) {
3705 mutex_enter(&so
->so_lock
);
3706 error
= sogeterr(so
, B_TRUE
);
3707 mutex_exit(&so
->so_lock
);
3709 eprintsoline(so
, error
);
3714 } while (uiop
->uio_resid
> 0);
3719 * Sending data on a datagram socket.
3720 * Assumes caller has verified that SS_ISBOUND etc. are set.
3722 * For AF_UNIX the destination address may be already in
3723 * internal form, as indicated by sti->sti_faddr_noxlate
3724 * or the MSG_SENDTO_NOXLATE flag. Otherwise we need to
3725 * translate the destination address to internal form.
3727 * The source address is passed as an option.
3730 sosend_dgram(struct sonode
*so
, struct sockaddr
*name
, socklen_t namelen
,
3731 struct uio
*uiop
, int flags
)
3733 struct T_unitdata_req tudr
;
3741 sotpi_info_t
*sti
= SOTOTPI(so
);
3743 ASSERT(name
!= NULL
&& namelen
!= 0);
3745 len
= uiop
->uio_resid
;
3746 if (len
> sti
->sti_tidu_size
) {
3751 if (sti
->sti_faddr_noxlate
== 0 &&
3752 (flags
& MSG_SENDTO_NOXLATE
) == 0) {
3754 * Length and family checks.
3755 * Don't verify internal form.
3757 error
= so_addr_verify(so
, name
, namelen
);
3762 if (sti
->sti_direct
) /* Never on AF_UNIX */
3763 return (sodgram_direct(so
, name
, namelen
, uiop
, flags
));
3765 if (so
->so_family
== AF_UNIX
) {
3766 if (sti
->sti_faddr_noxlate
) {
3768 * Already have a transport internal address. Do not
3769 * pass any (transport internal) source address.
3775 } else if (flags
& MSG_SENDTO_NOXLATE
) {
3777 * Have an internal form dest. address.
3778 * Pass the source address as usual.
3782 src
= sti
->sti_laddr_sa
;
3783 srclen
= (socklen_t
)sti
->sti_laddr_len
;
3786 * Pass the sockaddr_un source address as an option
3787 * and translate the remote address.
3789 * Note that this code does not prevent sti_laddr_sa
3790 * from changing while it is being used. Thus
3791 * if an unbind+bind occurs concurrently with this
3792 * send the peer might see a partially new and a
3793 * partially old "from" address.
3795 src
= sti
->sti_laddr_sa
;
3796 srclen
= (socklen_t
)sti
->sti_laddr_len
;
3798 ("sosend_dgram UNIX: srclen %d, src %p\n",
3801 * The sendmsg caller specified a destination
3802 * address, which we must translate into our
3803 * internal form. addr = &sti->sti_ux_taddr
3805 error
= so_ux_addr_xlate(so
, name
, namelen
,
3808 eprintsoline(so
, error
);
3818 tudr
.PRIM_type
= T_UNITDATA_REQ
;
3819 tudr
.DEST_length
= addrlen
;
3820 tudr
.DEST_offset
= (t_scalar_t
)sizeof (tudr
);
3822 tudr
.OPT_length
= 0;
3823 tudr
.OPT_offset
= 0;
3825 mp
= soallocproto2(&tudr
, sizeof (tudr
),
3826 addr
, addrlen
, 0, _ALLOC_INTR
, CRED());
3829 * Caught a signal waiting for memory.
3830 * Let send* return EINTR.
3837 * There is a AF_UNIX sockaddr_un to include as a source
3840 struct T_opthdr toh
;
3843 tudr
.OPT_length
= (t_scalar_t
)(sizeof (toh
) +
3844 _TPI_ALIGN_TOPT(srclen
));
3845 tudr
.OPT_offset
= (t_scalar_t
)(sizeof (tudr
) +
3846 _TPI_ALIGN_TOPT(addrlen
));
3848 toh
.level
= SOL_SOCKET
;
3849 toh
.name
= SO_SRCADDR
;
3850 toh
.len
= (t_uscalar_t
)(srclen
+ sizeof (struct T_opthdr
));
3853 size
= tudr
.OPT_offset
+ tudr
.OPT_length
;
3854 mp
= soallocproto2(&tudr
, sizeof (tudr
),
3855 addr
, addrlen
, size
, _ALLOC_INTR
, CRED());
3858 * Caught a signal waiting for memory.
3859 * Let send* return EINTR.
3864 mp
->b_wptr
+= _TPI_ALIGN_TOPT(addrlen
) - addrlen
;
3865 soappendmsg(mp
, &toh
, sizeof (toh
));
3866 soappendmsg(mp
, src
, srclen
);
3867 mp
->b_wptr
+= _TPI_ALIGN_TOPT(srclen
) - srclen
;
3868 ASSERT(mp
->b_wptr
<= mp
->b_datap
->db_lim
);
3872 audit_sock(T_UNITDATA_REQ
, strvp2wq(SOTOV(so
)), mp
, 0);
3874 error
= kstrputmsg(SOTOV(so
), mp
, uiop
, len
, 0, MSG_BAND
, 0);
3878 eprintsoline(so
, error
);
3880 #endif /* SOCK_DEBUG */
3885 * Sending data on a connected stream socket.
3886 * Assumes caller has verified that SS_ISCONNECTED is set.
3889 sosend_svc(struct sonode
*so
, struct uio
*uiop
, t_scalar_t prim
, int more
,
3892 struct T_data_req tdr
;
3896 sotpi_info_t
*sti
= SOTOTPI(so
);
3899 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n",
3900 (void *)so
, uiop
->uio_resid
, prim
, sflag
));
3903 * Has to be bound and connected. However, since no locks are
3904 * held the state could have changed after sotpi_sendmsg checked it
3905 * thus it is not possible to ASSERT on the state.
3910 * Set the MORE flag if uio_resid does not fit in this
3911 * message or if the caller passed in "more".
3912 * Error for transports with zero tidu_size.
3914 tdr
.PRIM_type
= prim
;
3915 iosize
= sti
->sti_tidu_size
;
3918 if (uiop
->uio_resid
> iosize
) {
3925 iosize
= uiop
->uio_resid
;
3927 dprintso(so
, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n",
3928 prim
, tdr
.MORE_flag
, iosize
));
3929 mp
= soallocproto1(&tdr
, sizeof (tdr
), 0, _ALLOC_INTR
, CRED());
3932 * Caught a signal waiting for memory.
3933 * Let send* return EINTR.
3938 error
= kstrputmsg(SOTOV(so
), mp
, uiop
, iosize
,
3939 0, sflag
| MSG_BAND
, 0);
3941 eprintsoline(so
, error
);
3944 if (uiop
->uio_resid
> 0) {
3946 * Recheck for fatal errors. Fail write even though
3947 * some data have been written. This is consistent
3948 * with strwrite semantics and BSD sockets semantics.
3950 if (so
->so_state
& SS_CANTSENDMORE
) {
3951 eprintsoline(so
, error
);
3954 if (so
->so_error
!= 0) {
3955 mutex_enter(&so
->so_lock
);
3956 error
= sogeterr(so
, B_TRUE
);
3957 mutex_exit(&so
->so_lock
);
3959 eprintsoline(so
, error
);
3964 } while (uiop
->uio_resid
> 0);
3969 * Check the state for errors and call the appropriate send function.
3971 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set)
3972 * this function issues a setsockopt to toggle SO_DONTROUTE before and
3973 * after sending the message.
3975 * The caller may optionally specify a destination address, for either
3976 * stream or datagram sockets. This table summarizes the cases:
3978 * Socket type Dest. given Connected Result
3979 * ----------- ----------- --------- --------------
3980 * Stream * Yes send to conn. addr.
3981 * Stream * No error ENOTCONN
3982 * Dgram yes * send to given addr.
3983 * Dgram no yes send to conn. addr.
3984 * Dgram no no error EDESTADDRREQ
3986 * There are subtleties around the destination address when using
3987 * AF_UNIX datagram sockets. When the sendmsg call specifies the
3988 * destination address, it's in (struct sockaddr_un) form and we
3989 * need to translate it to our internal form (struct so_ux_addr).
3991 * When the sendmsg call does not specify a destination address
3992 * we're using the peer address saved during sotpi_connect, and
3993 * that address is already in internal form. In this case, the
3994 * (internal only) flag MSG_SENDTO_NOXLATE is set in the flags
3995 * passed to sosend_dgram or sosend_dgramcmsg to indicate that
3996 * those functions should skip translation to internal form.
3997 * Avoiding that translation is not only more efficient, but it's
3998 * also necessary when a process does a connect on an AF_UNIX
3999 * datagram socket and then drops privileges. After the process
4000 * has dropped privileges, it may no longer be able to lookup the
4001 * the external name in the filesystem, but it should still be
4002 * able to send messages on the connected socket by leaving the
4003 * destination name unspecified.
4005 * Yet more subtleties arise with sockets connected by socketpair(),
4006 * which puts internal form addresses in the fields where normally
4007 * the external form is found, and sets sti_faddr_noxlate=1, which
4008 * (like flag MSG_SENDTO_NOXLATE) causes the sosend_dgram functions
4009 * to skip translation of destination addresses to internal form.
4010 * However, beware that the flag sti_faddr_noxlate=1 also triggers
4011 * different behaviour almost everywhere AF_UNIX addresses appear.
4014 sotpi_sendmsg(struct sonode
*so
, struct msghdr
*msg
, struct uio
*uiop
,
4020 struct sockaddr
*name
;
4021 t_uscalar_t namelen
;
4024 sotpi_info_t
*sti
= SOTOTPI(so
);
4026 dprintso(so
, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n",
4027 (void *)so
, (void *)msg
, msg
->msg_flags
,
4028 pr_state(so
->so_state
, so
->so_mode
), so
->so_error
));
4030 if (so
->so_is_stream
) {
4031 /* The imaginary "sockmod" has been popped - act as a stream */
4032 so_update_attrs(so
, SOMOD
);
4033 return (strwrite(SOTOV(so
), uiop
, cr
));
4036 mutex_enter(&so
->so_lock
);
4037 so_state
= so
->so_state
;
4039 if (so_state
& SS_CANTSENDMORE
) {
4040 mutex_exit(&so
->so_lock
);
4044 if (so
->so_error
!= 0) {
4045 error
= sogeterr(so
, B_TRUE
);
4047 mutex_exit(&so
->so_lock
);
4052 name
= (struct sockaddr
*)msg
->msg_name
;
4053 namelen
= msg
->msg_namelen
;
4054 flags
= msg
->msg_flags
;
4057 * Historically, this function does not validate the flags
4058 * passed in, and any errant bits are ignored. However,
4059 * we would not want any such errant flag bits accidently
4060 * being treated as one of the internal-only flags, so
4061 * clear the internal-only flag bits.
4063 flags
&= ~MSG_SENDTO_NOXLATE
;
4065 so_mode
= so
->so_mode
;
4068 if (!(so_state
& SS_ISCONNECTED
)) {
4069 mutex_exit(&so
->so_lock
);
4070 if (so_mode
& SM_CONNREQUIRED
)
4073 return (EDESTADDRREQ
);
4076 * This is a connected socket.
4078 if (so_mode
& SM_CONNREQUIRED
) {
4080 * This is a connected STREAM socket,
4081 * destination not specified.
4087 * Datagram send on connected socket with
4088 * the destination name not specified.
4089 * Use the peer address from connect.
4091 if (so
->so_family
== AF_UNIX
) {
4093 * Use the (internal form) address saved
4094 * in sotpi_connect. See above.
4096 name
= (void *)&sti
->sti_ux_faddr
;
4097 namelen
= sizeof (sti
->sti_ux_faddr
);
4098 flags
|= MSG_SENDTO_NOXLATE
;
4100 ASSERT(sti
->sti_faddr_sa
);
4101 name
= sti
->sti_faddr_sa
;
4102 namelen
= (t_uscalar_t
)sti
->sti_faddr_len
;
4107 * Sendmsg specifies a destination name
4109 if (!(so_state
& SS_ISCONNECTED
) &&
4110 (so_mode
& SM_CONNREQUIRED
)) {
4111 /* i.e. TCP not connected */
4112 mutex_exit(&so
->so_lock
);
4116 * Ignore the address on connection-oriented sockets.
4117 * Just like BSD this code does not generate an error for
4118 * TCP (a CONNREQUIRED socket) when sending to an address
4119 * passed in with sendto/sendmsg. Instead the data is
4120 * delivered on the connection as if no address had been
4123 if ((so_state
& SS_ISCONNECTED
) &&
4124 !(so_mode
& SM_CONNREQUIRED
)) {
4125 mutex_exit(&so
->so_lock
);
4128 if (!(so_state
& SS_ISBOUND
)) {
4129 so_lock_single(so
); /* Set SOLOCKED */
4130 error
= sotpi_bind(so
, NULL
, 0,
4131 _SOBIND_UNSPEC
|_SOBIND_LOCK_HELD
, cr
);
4132 so_unlock_single(so
, SOLOCKED
);
4134 mutex_exit(&so
->so_lock
);
4135 eprintsoline(so
, error
);
4140 * Handle delayed datagram errors. These are only queued
4141 * when the application sets SO_DGRAM_ERRIND.
4142 * Return the error if we are sending to the address
4143 * that was returned in the last T_UDERROR_IND.
4144 * If sending to some other address discard the delayed
4147 if (sti
->sti_delayed_error
) {
4148 struct T_uderror_ind
*tudi
;
4150 t_uscalar_t addrlen
;
4151 boolean_t match
= B_FALSE
;
4153 ASSERT(sti
->sti_eaddr_mp
);
4154 error
= sti
->sti_delayed_error
;
4155 sti
->sti_delayed_error
= 0;
4157 (struct T_uderror_ind
*)sti
->sti_eaddr_mp
->b_rptr
;
4158 addrlen
= tudi
->DEST_length
;
4159 addr
= sogetoff(sti
->sti_eaddr_mp
,
4160 tudi
->DEST_offset
, addrlen
, 1);
4161 ASSERT(addr
); /* Checked by strsock_proto */
4162 switch (so
->so_family
) {
4164 /* Compare just IP address and port */
4165 sin_t
*sin1
= (sin_t
*)name
;
4166 sin_t
*sin2
= (sin_t
*)addr
;
4168 if (addrlen
== sizeof (sin_t
) &&
4169 namelen
== addrlen
&&
4170 sin1
->sin_port
== sin2
->sin_port
&&
4171 sin1
->sin_addr
.s_addr
==
4172 sin2
->sin_addr
.s_addr
)
4177 /* Compare just IP address and port. Not flow */
4178 sin6_t
*sin1
= (sin6_t
*)name
;
4179 sin6_t
*sin2
= (sin6_t
*)addr
;
4181 if (addrlen
== sizeof (sin6_t
) &&
4182 namelen
== addrlen
&&
4183 sin1
->sin6_port
== sin2
->sin6_port
&&
4184 IN6_ARE_ADDR_EQUAL(&sin1
->sin6_addr
,
4191 if (namelen
== addrlen
&&
4192 bcmp(name
, addr
, namelen
) == 0)
4196 freemsg(sti
->sti_eaddr_mp
);
4197 sti
->sti_eaddr_mp
= NULL
;
4198 mutex_exit(&so
->so_lock
);
4201 ("sockfs delayed error %d for %s\n",
4203 pr_addr(so
->so_family
, name
, namelen
)));
4207 freemsg(sti
->sti_eaddr_mp
);
4208 sti
->sti_eaddr_mp
= NULL
;
4211 mutex_exit(&so
->so_lock
);
4214 if ((flags
& MSG_DONTROUTE
) && !(so
->so_options
& SO_DONTROUTE
)) {
4218 error
= sotpi_setsockopt(so
, SOL_SOCKET
, SO_DONTROUTE
,
4219 &val
, (t_uscalar_t
)sizeof (val
), cr
);
4225 if ((flags
& MSG_OOB
) && !(so_mode
& SM_EXDATA
)) {
4229 if (msg
->msg_controllen
!= 0) {
4230 if (!(so_mode
& SM_CONNREQUIRED
)) {
4231 so_update_attrs(so
, SOMOD
);
4232 error
= sosend_dgramcmsg(so
, name
, namelen
, uiop
,
4233 msg
->msg_control
, msg
->msg_controllen
, flags
);
4235 if (flags
& MSG_OOB
) {
4236 /* Can't generate T_EXDATA_REQ with options */
4240 so_update_attrs(so
, SOMOD
);
4241 error
= sosend_svccmsg(so
, uiop
,
4243 msg
->msg_control
, msg
->msg_controllen
,
4249 so_update_attrs(so
, SOMOD
);
4250 if (!(so_mode
& SM_CONNREQUIRED
)) {
4252 * If there is no SO_DONTROUTE to turn off return immediately
4253 * from send_dgram. This can allow tail-call optimizations.
4256 return (sosend_dgram(so
, name
, namelen
, uiop
, flags
));
4258 error
= sosend_dgram(so
, name
, namelen
, uiop
, flags
);
4263 /* Ignore msg_name in the connected state */
4264 if (flags
& MSG_OOB
) {
4265 prim
= T_EXDATA_REQ
;
4267 * Send down T_EXDATA_REQ even if there is flow
4270 sflag
= MSG_IGNFLOW
;
4272 if (so_mode
& SM_BYTESTREAM
) {
4273 /* Byte stream transport - use write */
4274 dprintso(so
, 1, ("sotpi_sendmsg: write\n"));
4277 * If there is no SO_DONTROUTE to turn off,
4278 * sti_direct is on, and there is no flow
4279 * control, we can take the fast path.
4281 if (!dontroute
&& sti
->sti_direct
!= 0 &&
4282 canputnext(SOTOV(so
)->v_stream
->sd_wrq
)) {
4283 return (sostream_direct(so
, uiop
,
4286 error
= strwrite(SOTOV(so
), uiop
, cr
);
4293 * If there is no SO_DONTROUTE to turn off return immediately
4294 * from sosend_svc. This can allow tail-call optimizations.
4297 return (sosend_svc(so
, uiop
, prim
,
4298 !(flags
& MSG_EOR
), sflag
));
4299 error
= sosend_svc(so
, uiop
, prim
,
4300 !(flags
& MSG_EOR
), sflag
);
4308 (void) sotpi_setsockopt(so
, SOL_SOCKET
, SO_DONTROUTE
,
4309 &val
, (t_uscalar_t
)sizeof (val
), cr
);
4315 * kstrwritemp() has very similar semantics as that of strwrite().
4316 * The main difference is it obtains mblks from the caller and also
4317 * does not do any copy as done in strwrite() from user buffers to
4320 * Currently, this routine is used by sendfile to send data allocated
4321 * within the kernel without any copying. This interface does not use the
4322 * synchronous stream interface as synch. stream interface implies
4326 kstrwritemp(struct vnode
*vp
, mblk_t
*mp
, ushort_t fmode
)
4338 ASSERT(vp
->v_stream
);
4342 direct
= _SOTOTPI(so
)->sti_direct
;
4345 * This is the sockfs direct fast path. canputnext() need
4346 * not be accurate so we don't grab the sd_lock here. If
4347 * we get flow-controlled, we grab sd_lock just before the
4348 * do..while loop below to emulate what strwrite() does.
4351 if (canputnext(wqp
) && direct
&&
4352 !(stp
->sd_flag
& (STWRERR
|STRHUP
|STPLEX
))) {
4353 return (sostream_direct(so
, NULL
, mp
, CRED()));
4354 } else if (stp
->sd_flag
& (STWRERR
|STRHUP
|STPLEX
)) {
4355 /* Fast check of flags before acquiring the lock */
4356 mutex_enter(&stp
->sd_lock
);
4357 error
= strgeterr(stp
, STWRERR
|STRHUP
|STPLEX
, 0);
4358 mutex_exit(&stp
->sd_lock
);
4360 if (!(stp
->sd_flag
& STPLEX
) &&
4361 (stp
->sd_wput_opt
& SW_SIGPIPE
)) {
4368 waitflag
= WRITEWAIT
;
4369 if (stp
->sd_flag
& OLDNDELAY
)
4370 tempmode
= fmode
& ~FNDELAY
;
4374 mutex_enter(&stp
->sd_lock
);
4376 if (canputnext(wqp
)) {
4377 mutex_exit(&stp
->sd_lock
);
4378 if (stp
->sd_wputdatafunc
!= NULL
) {
4379 newmp
= (stp
->sd_wputdatafunc
)(vp
, mp
, NULL
,
4381 if (newmp
== NULL
) {
4382 /* The caller will free mp */
4390 error
= strwaitq(stp
, waitflag
, (ssize_t
)0, tempmode
, -1,
4392 } while (error
== 0 && !done
);
4394 mutex_exit(&stp
->sd_lock
);
4396 * EAGAIN tells the application to try again. ENOMEM
4397 * is returned only if the memory allocation size
4398 * exceeds the physical limits of the system. ENOMEM
4399 * can't be true here.
4401 if (error
== ENOMEM
)
4408 sotpi_sendmblk(struct sonode
*so
, struct msghdr
*msg
, int fflag
,
4409 struct cred
*cr
, mblk_t
**mpp
)
4413 switch (so
->so_family
) {
4419 return (EAFNOSUPPORT
);
4423 if (so
->so_state
& SS_CANTSENDMORE
)
4426 if (so
->so_type
!= SOCK_STREAM
)
4427 return (EOPNOTSUPP
);
4429 if ((so
->so_state
& SS_ISCONNECTED
) == 0)
4432 error
= kstrwritemp(so
->so_vnode
, *mpp
, fflag
);
4439 * Sending data on a datagram socket.
4440 * Assumes caller has verified that SS_ISBOUND etc. are set.
4444 sodgram_direct(struct sonode
*so
, struct sockaddr
*name
,
4445 socklen_t namelen
, struct uio
*uiop
, int flags
)
4447 struct T_unitdata_req tudr
;
4453 struct stdata
*stp
= SOTOV(so
)->v_stream
;
4456 boolean_t connected
;
4457 mblk_t
*mpdata
= NULL
;
4458 sotpi_info_t
*sti
= SOTOTPI(so
);
4459 uint32_t auditing
= AU_AUDITING();
4461 ASSERT(name
!= NULL
&& namelen
!= 0);
4462 ASSERT(!(so
->so_mode
& SM_CONNREQUIRED
));
4463 ASSERT(!(so
->so_mode
& SM_EXDATA
));
4464 ASSERT(so
->so_family
== AF_INET
|| so
->so_family
== AF_INET6
);
4465 ASSERT(SOTOV(so
)->v_type
== VSOCK
);
4467 /* Caller checked for proper length */
4468 len
= uiop
->uio_resid
;
4469 ASSERT(len
<= sti
->sti_tidu_size
);
4471 /* Length and family checks have been done by caller */
4472 ASSERT(name
->sa_family
== so
->so_family
);
4473 ASSERT(so
->so_family
== AF_INET
||
4474 (namelen
== (socklen_t
)sizeof (struct sockaddr_in6
)));
4475 ASSERT(so
->so_family
== AF_INET6
||
4476 (namelen
== (socklen_t
)sizeof (struct sockaddr_in
)));
4481 if (stp
->sd_sidp
!= NULL
&&
4482 (error
= straccess(stp
, JCWRITE
)) != 0)
4485 so_state
= so
->so_state
;
4487 connected
= so_state
& SS_ISCONNECTED
;
4489 tudr
.PRIM_type
= T_UNITDATA_REQ
;
4490 tudr
.DEST_length
= addrlen
;
4491 tudr
.DEST_offset
= (t_scalar_t
)sizeof (tudr
);
4492 tudr
.OPT_length
= 0;
4493 tudr
.OPT_offset
= 0;
4495 mp
= soallocproto2(&tudr
, sizeof (tudr
), addr
, addrlen
, 0,
4496 _ALLOC_INTR
, CRED());
4499 * Caught a signal waiting for memory.
4500 * Let send* return EINTR.
4508 * For UDP we don't break up the copyin into smaller pieces
4509 * as in the TCP case. That means if ENOMEM is returned by
4510 * mcopyinuio() then the uio vector has not been modified at
4511 * all and we fallback to either strwrite() or kstrputmsg()
4512 * below. Note also that we never generate priority messages
4515 udp_wq
= stp
->sd_wrq
->q_next
;
4516 if (canput(udp_wq
) &&
4517 (mpdata
= mcopyinuio(stp
, uiop
, -1, -1, &error
)) != NULL
) {
4518 ASSERT(DB_TYPE(mpdata
) == M_DATA
);
4519 ASSERT(uiop
->uio_resid
== 0);
4525 audit_sock(T_UNITDATA_REQ
, strvp2wq(SOTOV(so
)), mp
, 0);
4527 udp_wput(udp_wq
, mp
);
4531 ASSERT(mpdata
== NULL
);
4532 if (error
!= 0 && error
!= ENOMEM
) {
4538 * For connected, let strwrite() handle the blocking case.
4539 * Otherwise we fall thru and use kstrputmsg().
4542 return (strwrite(SOTOV(so
), uiop
, CRED()));
4545 audit_sock(T_UNITDATA_REQ
, strvp2wq(SOTOV(so
)), mp
, 0);
4547 error
= kstrputmsg(SOTOV(so
), mp
, uiop
, len
, 0, MSG_BAND
, 0);
4551 eprintsoline(so
, error
);
4553 #endif /* SOCK_DEBUG */
4558 sostream_direct(struct sonode
*so
, struct uio
*uiop
, mblk_t
*mp
, cred_t
*cr
)
4560 struct stdata
*stp
= SOTOV(so
)->v_stream
;
4561 ssize_t iosize
, rmax
, maxblk
;
4562 queue_t
*tcp_wq
= stp
->sd_wrq
->q_next
;
4564 int error
= 0, wflag
= 0;
4566 ASSERT(so
->so_mode
& SM_BYTESTREAM
);
4567 ASSERT(SOTOV(so
)->v_type
== VSOCK
);
4569 if (stp
->sd_sidp
!= NULL
&&
4570 (error
= straccess(stp
, JCWRITE
)) != 0)
4575 * kstrwritemp() should have checked sd_flag and
4576 * flow-control before coming here. If we end up
4577 * here it means that we can simply pass down the
4581 if (stp
->sd_wputdatafunc
!= NULL
) {
4582 newmp
= (stp
->sd_wputdatafunc
)(SOTOV(so
), mp
, NULL
,
4584 if (newmp
== NULL
) {
4585 /* The caller will free mp */
4590 tcp_wput(tcp_wq
, mp
);
4594 /* Fallback to strwrite() to do proper error handling */
4595 if (stp
->sd_flag
& (STWRERR
|STRHUP
|STPLEX
|STRDELIM
|OLDNDELAY
))
4596 return (strwrite(SOTOV(so
), uiop
, cr
));
4598 rmax
= stp
->sd_qn_maxpsz
;
4599 ASSERT(rmax
>= 0 || rmax
== INFPSZ
);
4600 if (rmax
== 0 || uiop
->uio_resid
<= 0)
4604 rmax
= uiop
->uio_resid
;
4606 maxblk
= stp
->sd_maxblk
;
4609 iosize
= MIN(uiop
->uio_resid
, rmax
);
4611 mp
= mcopyinuio(stp
, uiop
, iosize
, maxblk
, &error
);
4614 * Fallback to strwrite() for ENOMEM; if this
4615 * is our first time in this routine and the uio
4616 * vector has not been modified, we will end up
4617 * calling strwrite() without any flag set.
4619 if (error
== ENOMEM
)
4624 ASSERT(uiop
->uio_resid
>= 0);
4626 * If mp is non-NULL and ENOMEM is set, it means that
4627 * mcopyinuio() was able to break down some of the user
4628 * data into one or more mblks. Send the partial data
4629 * to tcp and let the rest be handled in strwrite().
4631 ASSERT(error
== 0 || error
== ENOMEM
);
4632 if (stp
->sd_wputdatafunc
!= NULL
) {
4633 newmp
= (stp
->sd_wputdatafunc
)(SOTOV(so
), mp
, NULL
,
4635 if (newmp
== NULL
) {
4636 /* The caller will free mp */
4641 tcp_wput(tcp_wq
, mp
);
4645 if (uiop
->uio_resid
== 0) { /* No more data; we're done */
4648 } else if (error
== ENOMEM
|| !canput(tcp_wq
) || (stp
->sd_flag
&
4649 (STWRERR
|STRHUP
|STPLEX
|STRDELIM
|OLDNDELAY
))) {
4652 * We were able to send down partial data using
4653 * the direct call interface, but are now relying
4654 * on strwrite() to handle the non-fastpath cases.
4655 * If the socket is blocking we will sleep in
4656 * strwaitq() until write is permitted, otherwise,
4657 * we will need to return the amount of bytes
4658 * written so far back to the app. This is the
4659 * reason why we pass NOINTR flag to strwrite()
4660 * for non-blocking socket, because we don't want
4661 * to return EAGAIN when portion of the user data
4662 * has actually been sent down.
4664 return (strwrite_common(SOTOV(so
), uiop
, cr
, wflag
));
4671 * Update sti_faddr by asking the transport (unless AF_UNIX).
4675 sotpi_getpeername(struct sonode
*so
, struct sockaddr
*name
, socklen_t
*namelen
,
4676 boolean_t accept
, struct cred
*cr
)
4678 struct strbuf strbuf
;
4681 t_uscalar_t addrlen
;
4683 sotpi_info_t
*sti
= SOTOTPI(so
);
4685 dprintso(so
, 1, ("sotpi_getpeername(%p) %s\n",
4686 (void *)so
, pr_state(so
->so_state
, so
->so_mode
)));
4688 ASSERT(*namelen
> 0);
4689 mutex_enter(&so
->so_lock
);
4690 so_lock_single(so
); /* Set SOLOCKED */
4693 bcopy(sti
->sti_faddr_sa
, name
,
4694 MIN(*namelen
, sti
->sti_faddr_len
));
4695 *namelen
= sti
->sti_faddr_noxlate
? 0: sti
->sti_faddr_len
;
4699 if (!(so
->so_state
& SS_ISCONNECTED
)) {
4703 /* Added this check for X/Open */
4704 if ((so
->so_state
& SS_CANTSENDMORE
) && !xnet_skip_checks
) {
4706 if (xnet_check_print
) {
4707 printf("sockfs: X/Open getpeername check => EINVAL\n");
4712 if (sti
->sti_faddr_valid
) {
4713 bcopy(sti
->sti_faddr_sa
, name
,
4714 MIN(*namelen
, sti
->sti_faddr_len
));
4715 *namelen
= sti
->sti_faddr_noxlate
? 0: sti
->sti_faddr_len
;
4720 dprintso(so
, 1, ("sotpi_getpeername (local): %s\n",
4721 pr_addr(so
->so_family
, sti
->sti_faddr_sa
,
4722 (t_uscalar_t
)sti
->sti_faddr_len
)));
4725 if (so
->so_family
== AF_UNIX
) {
4726 /* Transport has different name space - return local info */
4727 if (sti
->sti_faddr_noxlate
)
4733 ASSERT(so
->so_family
!= AF_UNIX
&& sti
->sti_faddr_noxlate
== 0);
4735 ASSERT(sti
->sti_faddr_sa
);
4736 /* Allocate local buffer to use with ioctl */
4737 addrlen
= (t_uscalar_t
)sti
->sti_faddr_maxlen
;
4738 mutex_exit(&so
->so_lock
);
4739 addr
= kmem_alloc(addrlen
, KM_SLEEP
);
4742 * Issue TI_GETPEERNAME with signals masked.
4743 * Put the result in sti_faddr_sa so that getpeername works after
4744 * a shutdown(output).
4745 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted
4746 * back to the socket.
4749 strbuf
.maxlen
= addrlen
;
4755 error
= strioctl(SOTOV(so
), TI_GETPEERNAME
, (intptr_t)&strbuf
,
4756 0, K_TO_K
, cr
, &res
);
4759 mutex_enter(&so
->so_lock
);
4761 * If there is an error record the error in so_error put don't fail
4762 * the getpeername. Instead fallback on the recorded
4763 * sti->sti_faddr_sa.
4767 * Various stream head errors can be returned to the ioctl.
4768 * However, it is impossible to determine which ones of
4769 * these are really socket level errors that were incorrectly
4770 * consumed by the ioctl. Thus this code silently ignores the
4771 * error - to code explicitly does not reinstate the error
4772 * using soseterror().
4773 * Experiments have shows that at least this set of
4774 * errors are reported and should not be reinstated on the
4776 * EINVAL E.g. if an I_LINK was in effect when
4777 * getpeername was called.
4778 * EPIPE The ioctl error semantics prefer the write
4779 * side error over the read side error.
4780 * ENOTCONN The transport just got disconnected but
4781 * sockfs had not yet seen the T_DISCON_IND
4782 * when issuing the ioctl.
4785 } else if (res
== 0 && strbuf
.len
> 0 &&
4786 (so
->so_state
& SS_ISCONNECTED
)) {
4787 ASSERT(strbuf
.len
<= (int)sti
->sti_faddr_maxlen
);
4788 sti
->sti_faddr_len
= (socklen_t
)strbuf
.len
;
4789 bcopy(addr
, sti
->sti_faddr_sa
, sti
->sti_faddr_len
);
4790 sti
->sti_faddr_valid
= 1;
4792 bcopy(addr
, name
, MIN(*namelen
, sti
->sti_faddr_len
));
4793 *namelen
= sti
->sti_faddr_len
;
4795 kmem_free(addr
, addrlen
);
4797 dprintso(so
, 1, ("sotpi_getpeername (tp): %s\n",
4798 pr_addr(so
->so_family
, sti
->sti_faddr_sa
,
4799 (t_uscalar_t
)sti
->sti_faddr_len
)));
4802 so_unlock_single(so
, SOLOCKED
);
4803 mutex_exit(&so
->so_lock
);
4808 * Update sti_laddr by asking the transport (unless AF_UNIX).
4811 sotpi_getsockname(struct sonode
*so
, struct sockaddr
*name
, socklen_t
*namelen
,
4814 struct strbuf strbuf
;
4817 t_uscalar_t addrlen
;
4819 sotpi_info_t
*sti
= SOTOTPI(so
);
4821 dprintso(so
, 1, ("sotpi_getsockname(%p) %s\n",
4822 (void *)so
, pr_state(so
->so_state
, so
->so_mode
)));
4824 ASSERT(*namelen
> 0);
4825 mutex_enter(&so
->so_lock
);
4826 so_lock_single(so
); /* Set SOLOCKED */
4830 dprintso(so
, 1, ("sotpi_getsockname (local): %s\n",
4831 pr_addr(so
->so_family
, sti
->sti_laddr_sa
,
4832 (t_uscalar_t
)sti
->sti_laddr_len
)));
4834 if (sti
->sti_laddr_valid
) {
4835 bcopy(sti
->sti_laddr_sa
, name
,
4836 MIN(*namelen
, sti
->sti_laddr_len
));
4837 *namelen
= sti
->sti_laddr_len
;
4841 if (so
->so_family
== AF_UNIX
) {
4843 * Transport has different name space - return local info. If we
4844 * have enough space, let consumers know the family.
4846 if (*namelen
>= sizeof (sa_family_t
)) {
4847 name
->sa_family
= AF_UNIX
;
4848 *namelen
= sizeof (sa_family_t
);
4855 if (!(so
->so_state
& SS_ISBOUND
)) {
4856 /* If not bound, then nothing to return. */
4861 /* Allocate local buffer to use with ioctl */
4862 addrlen
= (t_uscalar_t
)sti
->sti_laddr_maxlen
;
4863 mutex_exit(&so
->so_lock
);
4864 addr
= kmem_alloc(addrlen
, KM_SLEEP
);
4867 * Issue TI_GETMYNAME with signals masked.
4868 * Put the result in sti_laddr_sa so that getsockname works after
4869 * a shutdown(output).
4870 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted
4871 * back to the socket.
4874 strbuf
.maxlen
= addrlen
;
4880 error
= strioctl(SOTOV(so
), TI_GETMYNAME
, (intptr_t)&strbuf
,
4881 0, K_TO_K
, cr
, &res
);
4884 mutex_enter(&so
->so_lock
);
4886 * If there is an error record the error in so_error put don't fail
4887 * the getsockname. Instead fallback on the recorded
4888 * sti->sti_laddr_sa.
4892 * Various stream head errors can be returned to the ioctl.
4893 * However, it is impossible to determine which ones of
4894 * these are really socket level errors that were incorrectly
4895 * consumed by the ioctl. Thus this code silently ignores the
4896 * error - to code explicitly does not reinstate the error
4897 * using soseterror().
4898 * Experiments have shows that at least this set of
4899 * errors are reported and should not be reinstated on the
4901 * EINVAL E.g. if an I_LINK was in effect when
4902 * getsockname was called.
4903 * EPIPE The ioctl error semantics prefer the write
4904 * side error over the read side error.
4907 } else if (res
== 0 && strbuf
.len
> 0 &&
4908 (so
->so_state
& SS_ISBOUND
)) {
4909 ASSERT(strbuf
.len
<= (int)sti
->sti_laddr_maxlen
);
4910 sti
->sti_laddr_len
= (socklen_t
)strbuf
.len
;
4911 bcopy(addr
, sti
->sti_laddr_sa
, sti
->sti_laddr_len
);
4912 sti
->sti_laddr_valid
= 1;
4914 bcopy(addr
, name
, MIN(sti
->sti_laddr_len
, *namelen
));
4915 *namelen
= sti
->sti_laddr_len
;
4917 kmem_free(addr
, addrlen
);
4919 dprintso(so
, 1, ("sotpi_getsockname (tp): %s\n",
4920 pr_addr(so
->so_family
, sti
->sti_laddr_sa
,
4921 (t_uscalar_t
)sti
->sti_laddr_len
)));
4924 so_unlock_single(so
, SOLOCKED
);
4925 mutex_exit(&so
->so_lock
);
4930 * Get socket options. For SOL_SOCKET options some options are handled
4931 * by the sockfs while others use the value recorded in the sonode as a
4932 * fallback should the T_SVR4_OPTMGMT_REQ fail.
4934 * On the return most *optlenp bytes are copied to optval.
4938 sotpi_getsockopt(struct sonode
*so
, int level
, int option_name
,
4939 void *optval
, socklen_t
*optlenp
, int flags
, struct cred
*cr
)
4941 struct T_optmgmt_req optmgmt_req
;
4942 struct T_optmgmt_ack
*optmgmt_ack
;
4944 struct opthdr
*opt_res
;
4947 void *option
= NULL
; /* Set if fallback value */
4948 t_uscalar_t maxlen
= *optlenp
;
4951 struct timeval tmo_val
; /* used for SO_RCVTIMEO, SO_SNDTIMEO */
4952 struct timeval32 tmo_val32
;
4953 struct so_snd_bufinfo snd_bufinfo
; /* used for zero copy */
4955 dprintso(so
, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n",
4956 (void *)so
, level
, option_name
, optval
, (void *)optlenp
,
4957 pr_state(so
->so_state
, so
->so_mode
)));
4959 mutex_enter(&so
->so_lock
);
4960 so_lock_single(so
); /* Set SOLOCKED */
4963 * Check for SOL_SOCKET options.
4964 * Certain SOL_SOCKET options are returned directly whereas
4965 * others only provide a default (fallback) value should
4966 * the T_SVR4_OPTMGMT_REQ fail.
4968 if (level
== SOL_SOCKET
) {
4969 /* Check parameters */
4970 switch (option_name
) {
4979 case SO_USELOOPBACK
:
4988 case SO_DGRAM_ERRIND
:
4989 if (maxlen
< (t_uscalar_t
)sizeof (int32_t)) {
4991 eprintsoline(so
, error
);
4997 if (get_udatamodel() == DATAMODEL_NONE
||
4998 get_udatamodel() == DATAMODEL_NATIVE
) {
4999 if (maxlen
< sizeof (struct timeval
)) {
5001 eprintsoline(so
, error
);
5005 if (maxlen
< sizeof (struct timeval32
)) {
5007 eprintsoline(so
, error
);
5014 if (maxlen
< (t_uscalar_t
)sizeof (struct linger
)) {
5016 eprintsoline(so
, error
);
5020 case SO_SND_BUFINFO
:
5021 if (maxlen
< (t_uscalar_t
)
5022 sizeof (struct so_snd_bufinfo
)) {
5024 eprintsoline(so
, error
);
5030 len
= (t_uscalar_t
)sizeof (uint32_t); /* Default */
5032 switch (option_name
) {
5034 value
= so
->so_type
;
5036 goto copyout
; /* No need to issue T_SVR4_OPTMGMT_REQ */
5039 value
= sogeterr(so
, B_TRUE
);
5041 goto copyout
; /* No need to issue T_SVR4_OPTMGMT_REQ */
5044 if (so
->so_state
& SS_ACCEPTCONN
)
5045 value
= SO_ACCEPTCONN
;
5051 ("sotpi_getsockopt: 0x%x is set\n",
5055 ("sotpi_getsockopt: 0x%x not set\n",
5060 goto copyout
; /* No need to issue T_SVR4_OPTMGMT_REQ */
5067 case SO_USELOOPBACK
:
5069 case SO_DGRAM_ERRIND
:
5070 value
= (so
->so_options
& option_name
);
5074 ("sotpi_getsockopt: 0x%x is set\n",
5078 ("sotpi_getsockopt: 0x%x not set\n",
5083 goto copyout
; /* No need to issue T_SVR4_OPTMGMT_REQ */
5086 * The following options are only returned by sockfs when the
5087 * T_SVR4_OPTMGMT_REQ fails.
5090 option
= &so
->so_linger
;
5091 len
= (t_uscalar_t
)sizeof (struct linger
);
5097 * If the option has not been set then get a default
5098 * value from the read queue. This value is
5099 * returned if the transport fails
5100 * the T_SVR4_OPTMGMT_REQ.
5102 lvalue
= so
->so_sndbuf
;
5104 mutex_exit(&so
->so_lock
);
5105 (void) strqget(strvp2wq(SOTOV(so
))->q_next
,
5106 QHIWAT
, 0, &lvalue
);
5107 mutex_enter(&so
->so_lock
);
5109 ("got SO_SNDBUF %ld from q\n", lvalue
));
5111 value
= (int)lvalue
;
5113 len
= (t_uscalar_t
)sizeof (so
->so_sndbuf
);
5120 * If the option has not been set then get a default
5121 * value from the read queue. This value is
5122 * returned if the transport fails
5123 * the T_SVR4_OPTMGMT_REQ.
5125 lvalue
= so
->so_rcvbuf
;
5127 mutex_exit(&so
->so_lock
);
5128 (void) strqget(RD(strvp2wq(SOTOV(so
))),
5129 QHIWAT
, 0, &lvalue
);
5130 mutex_enter(&so
->so_lock
);
5132 ("got SO_RCVBUF %ld from q\n", lvalue
));
5134 value
= (int)lvalue
;
5136 len
= (t_uscalar_t
)sizeof (so
->so_rcvbuf
);
5140 value
= so
->so_family
;
5142 goto copyout
; /* No need to issue T_SVR4_OPTMGMT_REQ */
5146 * We do not implement the semantics of these options
5147 * thus we shouldn't implement the options either.
5150 value
= so
->so_sndlowat
;
5154 value
= so
->so_rcvlowat
;
5162 if (option_name
== SO_RCVTIMEO
)
5163 val
= drv_hztousec(so
->so_rcvtimeo
);
5165 val
= drv_hztousec(so
->so_sndtimeo
);
5166 tmo_val
.tv_sec
= val
/ (1000 * 1000);
5167 tmo_val
.tv_usec
= val
% (1000 * 1000);
5168 if (get_udatamodel() == DATAMODEL_NONE
||
5169 get_udatamodel() == DATAMODEL_NATIVE
) {
5171 len
= sizeof (struct timeval
);
5173 TIMEVAL_TO_TIMEVAL32(&tmo_val32
, &tmo_val
);
5174 option
= &tmo_val32
;
5175 len
= sizeof (struct timeval32
);
5179 case SO_SND_BUFINFO
: {
5180 snd_bufinfo
.sbi_wroff
=
5181 (so
->so_proto_props
).sopp_wroff
;
5182 snd_bufinfo
.sbi_maxblk
=
5183 (so
->so_proto_props
).sopp_maxblk
;
5184 snd_bufinfo
.sbi_maxpsz
=
5185 (so
->so_proto_props
).sopp_maxpsz
;
5186 snd_bufinfo
.sbi_tail
=
5187 (so
->so_proto_props
).sopp_tail
;
5188 option
= &snd_bufinfo
;
5189 len
= (t_uscalar_t
)sizeof (struct so_snd_bufinfo
);
5195 mutex_exit(&so
->so_lock
);
5198 optmgmt_req
.PRIM_type
= T_SVR4_OPTMGMT_REQ
;
5199 optmgmt_req
.MGMT_flags
= T_CHECK
;
5200 optmgmt_req
.OPT_length
= (t_scalar_t
)(sizeof (oh
) + maxlen
);
5201 optmgmt_req
.OPT_offset
= (t_scalar_t
)sizeof (optmgmt_req
);
5204 oh
.name
= option_name
;
5207 mp
= soallocproto3(&optmgmt_req
, sizeof (optmgmt_req
),
5208 &oh
, sizeof (oh
), NULL
, maxlen
, 0, _ALLOC_SLEEP
, cr
);
5209 /* Let option management work in the presence of data flow control */
5210 error
= kstrputmsg(SOTOV(so
), mp
, NULL
, 0, 0,
5211 MSG_BAND
|MSG_HOLDSIG
|MSG_IGNERROR
|MSG_IGNFLOW
, 0);
5213 mutex_enter(&so
->so_lock
);
5215 eprintsoline(so
, error
);
5218 error
= sowaitprim(so
, T_SVR4_OPTMGMT_REQ
, T_OPTMGMT_ACK
,
5219 (t_uscalar_t
)(sizeof (*optmgmt_ack
) + sizeof (*opt_res
)), &mp
, 0);
5221 if (option
!= NULL
) {
5222 /* We have a fallback value */
5226 eprintsoline(so
, error
);
5230 optmgmt_ack
= (struct T_optmgmt_ack
*)mp
->b_rptr
;
5231 opt_res
= (struct opthdr
*)sogetoff(mp
, optmgmt_ack
->OPT_offset
,
5232 optmgmt_ack
->OPT_length
, __TPI_ALIGN_SIZE
);
5233 if (opt_res
== NULL
) {
5234 if (option
!= NULL
) {
5235 /* We have a fallback value */
5240 eprintsoline(so
, error
);
5243 option
= &opt_res
[1];
5245 /* check to ensure that the option is within bounds */
5246 if (((uintptr_t)option
+ opt_res
->len
< (uintptr_t)option
) ||
5247 (uintptr_t)option
+ opt_res
->len
> (uintptr_t)mp
->b_wptr
) {
5248 if (option
!= NULL
) {
5249 /* We have a fallback value */
5254 eprintsoline(so
, error
);
5261 t_uscalar_t size
= MIN(len
, maxlen
);
5262 bcopy(option
, optval
, size
);
5263 bcopy(&size
, optlenp
, sizeof (size
));
5268 so_unlock_single(so
, SOLOCKED
);
5269 mutex_exit(&so
->so_lock
);
5275 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ.
5276 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for
5277 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails -
5278 * setsockopt has to work even if the transport does not support the option.
5282 sotpi_setsockopt(struct sonode
*so
, int level
, int option_name
,
5283 const void *optval
, t_uscalar_t optlen
, struct cred
*cr
)
5285 struct T_optmgmt_req optmgmt_req
;
5289 boolean_t handled
= B_FALSE
;
5291 dprintso(so
, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n",
5292 (void *)so
, level
, option_name
, optval
, optlen
,
5293 pr_state(so
->so_state
, so
->so_mode
)));
5295 /* X/Open requires this check */
5296 if ((so
->so_state
& SS_CANTSENDMORE
) && !xnet_skip_checks
) {
5297 if (xnet_check_print
)
5298 printf("sockfs: X/Open setsockopt check => EINVAL\n");
5302 mutex_enter(&so
->so_lock
);
5303 so_lock_single(so
); /* Set SOLOCKED */
5304 mutex_exit(&so
->so_lock
);
5306 optmgmt_req
.PRIM_type
= T_SVR4_OPTMGMT_REQ
;
5307 optmgmt_req
.MGMT_flags
= T_NEGOTIATE
;
5308 optmgmt_req
.OPT_length
= (t_scalar_t
)sizeof (oh
) + optlen
;
5309 optmgmt_req
.OPT_offset
= (t_scalar_t
)sizeof (optmgmt_req
);
5312 oh
.name
= option_name
;
5315 mp
= soallocproto3(&optmgmt_req
, sizeof (optmgmt_req
),
5316 &oh
, sizeof (oh
), optval
, optlen
, 0, _ALLOC_SLEEP
, cr
);
5317 /* Let option management work in the presence of data flow control */
5318 error
= kstrputmsg(SOTOV(so
), mp
, NULL
, 0, 0,
5319 MSG_BAND
|MSG_HOLDSIG
|MSG_IGNERROR
|MSG_IGNFLOW
, 0);
5321 mutex_enter(&so
->so_lock
);
5323 eprintsoline(so
, error
);
5326 error
= sowaitprim(so
, T_SVR4_OPTMGMT_REQ
, T_OPTMGMT_ACK
,
5327 (t_uscalar_t
)sizeof (struct T_optmgmt_ack
), &mp
, 0);
5329 eprintsoline(so
, error
);
5333 /* No need to verify T_optmgmt_ack */
5337 * Check for SOL_SOCKET options and record their values.
5338 * If we know about a SOL_SOCKET parameter and the transport
5339 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or
5340 * EPROTO) we let the setsockopt succeed.
5342 if (level
== SOL_SOCKET
) {
5343 /* Check parameters */
5344 switch (option_name
) {
5350 case SO_USELOOPBACK
:
5358 case SO_DGRAM_ERRIND
:
5359 if (optlen
!= (t_uscalar_t
)sizeof (int32_t)) {
5361 eprintsoline(so
, error
);
5369 if (get_udatamodel() == DATAMODEL_NONE
||
5370 get_udatamodel() == DATAMODEL_NATIVE
) {
5371 if (optlen
!= sizeof (struct timeval
)) {
5373 eprintsoline(so
, error
);
5377 if (optlen
!= sizeof (struct timeval32
)) {
5379 eprintsoline(so
, error
);
5387 if (optlen
!= (t_uscalar_t
)sizeof (struct linger
)) {
5389 eprintsoline(so
, error
);
5397 #define intvalue (*(int32_t *)optval)
5399 switch (option_name
) {
5404 error
= ENOPROTOOPT
;
5407 struct linger
*l
= (struct linger
*)optval
;
5409 so
->so_linger
.l_linger
= l
->l_linger
;
5411 so
->so_linger
.l_onoff
= SO_LINGER
;
5412 so
->so_options
|= SO_LINGER
;
5414 so
->so_linger
.l_onoff
= 0;
5415 so
->so_options
&= ~SO_LINGER
;
5423 sock_test_timelimit
= 10 * hz
;
5425 sock_test_timelimit
= 0;
5431 #endif /* SOCK_TEST */
5437 case SO_USELOOPBACK
:
5439 case SO_DGRAM_ERRIND
:
5440 if (intvalue
!= 0) {
5442 ("socket_setsockopt: setting 0x%x\n",
5444 so
->so_options
|= option_name
;
5447 ("socket_setsockopt: clearing 0x%x\n",
5449 so
->so_options
&= ~option_name
;
5453 * The following options are only returned by us when the
5454 * transport layer fails.
5455 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs
5456 * since the transport might adjust the value and not
5457 * return exactly what was set by the application.
5460 so
->so_sndbuf
= intvalue
;
5463 so
->so_rcvbuf
= intvalue
;
5466 so
->so_rcv_timer_interval
= intvalue
;
5470 * We do not implement the semantics of these options
5471 * thus we shouldn't implement the options either.
5474 so
->so_sndlowat
= intvalue
;
5477 so
->so_rcvlowat
= intvalue
;
5485 if (get_udatamodel() == DATAMODEL_NONE
||
5486 get_udatamodel() == DATAMODEL_NATIVE
)
5487 bcopy(&tl
, (struct timeval
*)optval
,
5488 sizeof (struct timeval
));
5490 TIMEVAL32_TO_TIMEVAL(&tl
,
5491 (struct timeval32
*)optval
);
5492 val
= tl
.tv_sec
* 1000 * 1000 + tl
.tv_usec
;
5493 if (option_name
== SO_RCVTIMEO
)
5494 so
->so_rcvtimeo
= drv_usectohz(val
);
5496 so
->so_sndtimeo
= drv_usectohz(val
);
5503 if ((error
== ENOPROTOOPT
|| error
== EPROTO
||
5504 error
== EINVAL
) && handled
) {
5506 ("setsockopt: ignoring error %d for 0x%x\n",
5507 error
, option_name
));
5513 so_unlock_single(so
, SOLOCKED
);
5514 mutex_exit(&so
->so_lock
);
5519 * sotpi_close() is called when the last open reference goes away.
5523 sotpi_close(struct sonode
*so
, int flag
, struct cred
*cr
)
5525 struct vnode
*vp
= SOTOV(so
);
5528 sotpi_info_t
*sti
= SOTOTPI(so
);
5530 dprintso(so
, 1, ("sotpi_close(%p, %x) %s\n",
5531 (void *)vp
, flag
, pr_state(so
->so_state
, so
->so_mode
)));
5535 ASSERT(STREAMSTAB(getmajor(dev
)));
5537 mutex_enter(&so
->so_lock
);
5538 so_lock_single(so
); /* Set SOLOCKED */
5540 ASSERT(so_verify_oobstate(so
));
5542 if (vp
->v_stream
!= NULL
) {
5545 if (so
->so_family
== AF_UNIX
) {
5546 /* Could avoid this when CANTSENDMORE for !dgram */
5550 mutex_exit(&so
->so_lock
);
5552 * Disassemble the linkage from the AF_UNIX underlying file
5553 * system vnode to this socket (by atomically clearing
5554 * v_stream in vn_rele_stream) before strclose clears sd_vnode
5555 * and frees the stream head.
5557 if ((ux_vp
= sti
->sti_ux_bound_vp
) != NULL
) {
5558 ASSERT(ux_vp
->v_stream
);
5559 sti
->sti_ux_bound_vp
= NULL
;
5560 vn_rele_stream(ux_vp
);
5562 error
= strclose(vp
, flag
, cr
);
5563 vp
->v_stream
= NULL
;
5564 mutex_enter(&so
->so_lock
);
5568 * Flush the T_DISCON_IND on sti_discon_ind_mp.
5570 so_flush_discon_ind(so
);
5572 so_unlock_single(so
, SOLOCKED
);
5573 mutex_exit(&so
->so_lock
);
5576 * Needed for STREAMs.
5577 * Decrement the device driver's reference count for streams
5578 * opened via the clone dip. The driver was held in clone_open().
5579 * The absence of clone_close() forces this asymmetry.
5581 if (so
->so_flag
& SOCLONE
)
5582 ddi_rele_driver(getmajor(dev
));
5588 sotpi_ioctl(struct sonode
*so
, int cmd
, intptr_t arg
, int mode
,
5589 struct cred
*cr
, int32_t *rvalp
)
5591 struct vnode
*vp
= SOTOV(so
);
5592 sotpi_info_t
*sti
= SOTOTPI(so
);
5595 dprintso(so
, 0, ("sotpi_ioctl: cmd 0x%x, arg 0x%lx, state %s\n",
5596 cmd
, arg
, pr_state(so
->so_state
, so
->so_mode
)));
5601 * SIOCSQPTR is valid only when helper stream is created
5607 * Since there's no compelling reason to support these ioctls
5608 * on sockets, and doing so would increase the complexity
5609 * markedly, prevent it.
5611 return (EOPNOTSUPP
);
5619 * To prevent races and inconsistencies between the actual
5620 * state of the stream and the state according to the sonode,
5621 * we serialize all operations which modify or operate on the
5622 * list of modules on the socket's stream.
5624 mutex_enter(&sti
->sti_plumb_lock
);
5625 error
= socktpi_plumbioctl(vp
, cmd
, arg
, mode
, cr
, rvalp
);
5626 mutex_exit(&sti
->sti_plumb_lock
);
5630 if (!so
->so_is_stream
)
5634 * The imaginary "sockmod" has been popped; act as a stream.
5636 return (strioctl(vp
, cmd
, arg
, mode
, U_TO_K
, cr
, rvalp
));
5639 ASSERT(!so
->so_is_stream
);
5642 * Process socket-specific ioctls.
5648 if (so_copyin((void *)arg
, &value
, sizeof (int32_t),
5649 (mode
& (int)FKIOCTL
)))
5652 mutex_enter(&so
->so_lock
);
5654 so
->so_state
|= SS_NDELAY
;
5656 so
->so_state
&= ~SS_NDELAY
;
5658 mutex_exit(&so
->so_lock
);
5665 if (so_copyin((void *)arg
, &value
, sizeof (int32_t),
5666 (mode
& (int)FKIOCTL
)))
5669 mutex_enter(&so
->so_lock
);
5671 * SS_ASYNC flag not already set correctly?
5672 * (!value != !(so->so_state & SS_ASYNC))
5673 * but some engineers find that too hard to read.
5675 if (value
== 0 && (so
->so_state
& SS_ASYNC
) != 0 ||
5676 value
!= 0 && (so
->so_state
& SS_ASYNC
) == 0)
5677 error
= so_flip_async(so
, vp
, mode
, cr
);
5678 mutex_exit(&so
->so_lock
);
5686 if (so_copyin((void *)arg
, &pgrp
, sizeof (pid_t
),
5687 (mode
& (int)FKIOCTL
)))
5690 mutex_enter(&so
->so_lock
);
5691 dprintso(so
, 1, ("setown: new %d old %d\n", pgrp
, so
->so_pgrp
));
5693 if (pgrp
!= so
->so_pgrp
)
5694 error
= so_set_siggrp(so
, vp
, pgrp
, mode
, cr
);
5695 mutex_exit(&so
->so_lock
);
5700 if (so_copyout(&so
->so_pgrp
, (void *)arg
,
5701 sizeof (pid_t
), (mode
& (int)FKIOCTL
)))
5710 * strwaitmark has a finite timeout after which it
5711 * returns -1 if the mark state is undetermined.
5712 * In order to avoid any race between the mark state
5713 * in sockfs and the mark state in the stream head this
5714 * routine loops until the mark state can be determined
5715 * (or the urgent data indication has been removed by some
5719 mutex_enter(&so
->so_lock
);
5720 so_state
= so
->so_state
;
5721 mutex_exit(&so
->so_lock
);
5722 if (so_state
& SS_RCVATMARK
) {
5724 } else if (!(so_state
& SS_OOBPEND
)) {
5726 * No SIGURG has been generated -- there is no
5727 * pending or present urgent data. Thus can't
5728 * possibly be at the mark.
5733 * Have the stream head wait until there is
5734 * either some messages on the read queue, or
5735 * STRATMARK or STRNOTATMARK gets set. The
5736 * STRNOTATMARK flag is used so that the
5737 * transport can send up a MSGNOTMARKNEXT
5738 * M_DATA to indicate that it is not
5739 * at the mark and additional data is not about
5740 * to be send upstream.
5742 * If the mark state is undetermined this will
5743 * return -1 and we will loop rechecking the
5746 retval
= strwaitmark(vp
);
5748 } while (retval
== -1);
5750 if (so_copyout(&retval
, (void *)arg
, sizeof (int),
5751 (mode
& (int)FKIOCTL
)))
5760 case _SIOCSOCKFALLBACK
:
5762 * These ioctls do not apply to sockets. I_FDINSERT can be
5763 * used to send M_PROTO messages without modifying the socket
5764 * state. I_SENDFD/RECVFD should not be used for socket file
5765 * descriptor passing since they assume a twisted stream.
5766 * SIOCATMARK must be used instead of I_ATMARK.
5768 * _SIOCSOCKFALLBACK from an application should never be
5769 * processed. It is only generated by socktpi_open() or
5770 * in response to I_POP or I_PUSH.
5773 zcmn_err(getzoneid(), CE_WARN
,
5774 "Unsupported STREAMS ioctl 0x%x on socket. "
5775 "Pid = %d\n", cmd
, curproc
->p_pid
);
5777 return (EOPNOTSUPP
);
5779 case _I_GETPEERCRED
:
5780 if ((mode
& FKIOCTL
) == 0)
5783 mutex_enter(&so
->so_lock
);
5784 if ((so
->so_mode
& SM_CONNREQUIRED
) == 0) {
5786 } else if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
5788 } else if (so
->so_peercred
!= NULL
) {
5789 k_peercred_t
*kp
= (k_peercred_t
*)arg
;
5790 kp
->pc_cr
= so
->so_peercred
;
5791 kp
->pc_cpid
= so
->so_cpid
;
5792 crhold(so
->so_peercred
);
5796 mutex_exit(&so
->so_lock
);
5801 * Do the higher-order bits of the ioctl cmd indicate
5802 * that it is an I_* streams ioctl?
5804 if ((cmd
& 0xffffff00U
) == STR
&&
5805 !so
->so_is_stream
) {
5807 zcmn_err(getzoneid(), CE_WARN
,
5808 "Unsupported STREAMS ioctl 0x%x on socket. "
5809 "Pid = %d\n", cmd
, curproc
->p_pid
);
5811 return (EOPNOTSUPP
);
5813 return (strioctl(vp
, cmd
, arg
, mode
, U_TO_K
, cr
, rvalp
));
5818 * Handle plumbing-related ioctls.
5821 socktpi_plumbioctl(struct vnode
*vp
, int cmd
, intptr_t arg
, int mode
,
5822 struct cred
*cr
, int32_t *rvalp
)
5824 static const char sockmod_name
[] = "sockmod";
5825 struct sonode
*so
= VTOSO(vp
);
5826 char mname
[FMNAMESZ
+ 1];
5828 sotpi_info_t
*sti
= SOTOTPI(so
);
5830 ASSERT(MUTEX_HELD(&sti
->sti_plumb_lock
));
5832 if (!so
->so_is_stream
)
5833 return (EOPNOTSUPP
);
5835 if (so
->so_is_stream
) {
5837 * The imaginary "sockmod" has been popped - act as a stream.
5838 * If this is a push of sockmod then change back to a socket.
5840 if (cmd
== I_PUSH
) {
5841 error
= ((mode
& FKIOCTL
) ? copystr
: copyinstr
)(
5842 (void *)arg
, mname
, sizeof (mname
), NULL
);
5844 if (error
== 0 && strcmp(mname
, sockmod_name
) == 0) {
5845 dprintso(so
, 0, ("socktpi_ioctl: going to "
5846 "socket version\n"));
5851 return (strioctl(vp
, cmd
, arg
, mode
, U_TO_K
, cr
, rvalp
));
5856 if (sti
->sti_direct
) {
5857 mutex_enter(&so
->so_lock
);
5859 mutex_exit(&so
->so_lock
);
5861 error
= strioctl(vp
, _SIOCSOCKFALLBACK
, 0, 0, K_TO_K
,
5864 mutex_enter(&so
->so_lock
);
5866 sti
->sti_direct
= 0;
5867 so_unlock_single(so
, SOLOCKED
);
5868 mutex_exit(&so
->so_lock
);
5874 error
= strioctl(vp
, cmd
, arg
, mode
, U_TO_K
, cr
, rvalp
);
5880 if (sti
->sti_pushcnt
== 0) {
5881 /* Emulate sockmod being popped */
5883 ("socktpi_ioctl: going to STREAMS version\n"));
5884 return (so_sock2stream(so
));
5887 error
= strioctl(vp
, cmd
, arg
, mode
, U_TO_K
, cr
, rvalp
);
5893 struct str_mlist
*kmlistp
, *umlistp
;
5894 struct str_list kstrlist
;
5895 ssize_t kstrlistsize
;
5898 STRUCT_DECL(str_list
, ustrlist
);
5899 STRUCT_INIT(ustrlist
, mode
);
5901 if (arg
== (intptr_t)NULL
) {
5902 error
= strioctl(vp
, cmd
, arg
, mode
, U_TO_K
, cr
, rvalp
);
5904 (*rvalp
)++; /* Add one for sockmod */
5908 error
= so_copyin((void *)arg
, STRUCT_BUF(ustrlist
),
5909 STRUCT_SIZE(ustrlist
), mode
& FKIOCTL
);
5913 nmods
= STRUCT_FGET(ustrlist
, sl_nmods
);
5917 * Ceiling nmods at nstrpush to prevent someone from
5918 * maliciously consuming lots of kernel memory.
5920 nmods
= MIN(nmods
, nstrpush
);
5922 kstrlistsize
= (nmods
+ 1) * sizeof (struct str_mlist
);
5923 kstrlist
.sl_nmods
= nmods
;
5924 kstrlist
.sl_modlist
= kmem_zalloc(kstrlistsize
, KM_SLEEP
);
5926 error
= strioctl(vp
, cmd
, (intptr_t)&kstrlist
, mode
, K_TO_K
,
5932 * Considering the module list as a 0-based array of sl_nmods
5933 * modules, sockmod should conceptually exist at slot
5934 * sti_pushcnt. Insert sockmod at this location by sliding all
5935 * of the module names after so_pushcnt over by one. We know
5936 * that there will be room to do this since we allocated
5937 * sl_modlist with an additional slot.
5939 for (i
= kstrlist
.sl_nmods
; i
> sti
->sti_pushcnt
; i
--)
5940 kstrlist
.sl_modlist
[i
] = kstrlist
.sl_modlist
[i
- 1];
5942 (void) strcpy(kstrlist
.sl_modlist
[i
].l_name
, sockmod_name
);
5943 kstrlist
.sl_nmods
++;
5946 * Copy all of the entries out to ustrlist.
5948 kmlistp
= kstrlist
.sl_modlist
;
5949 umlistp
= STRUCT_FGETP(ustrlist
, sl_modlist
);
5950 for (i
= 0; i
< nmods
&& i
< kstrlist
.sl_nmods
; i
++) {
5951 error
= so_copyout(kmlistp
++, umlistp
++,
5952 sizeof (struct str_mlist
), mode
& FKIOCTL
);
5957 error
= so_copyout(&i
, (void *)arg
, sizeof (int32_t),
5962 kmem_free(kstrlist
.sl_modlist
, kstrlistsize
);
5966 if (sti
->sti_pushcnt
== 0) {
5967 return (so_copyout(sockmod_name
, (void *)arg
,
5968 sizeof (sockmod_name
), mode
& FKIOCTL
));
5970 return (strioctl(vp
, cmd
, arg
, mode
, U_TO_K
, cr
, rvalp
));
5973 error
= strioctl(vp
, cmd
, arg
, mode
, U_TO_K
, cr
, rvalp
);
5974 if (error
&& error
!= EINVAL
)
5977 /* if not found and string was sockmod return 1 */
5978 if (*rvalp
== 0 || error
== EINVAL
) {
5979 error
= ((mode
& FKIOCTL
) ? copystr
: copyinstr
)(
5980 (void *)arg
, mname
, sizeof (mname
), NULL
);
5981 if (error
== ENAMETOOLONG
)
5984 if (error
== 0 && strcmp(mname
, sockmod_name
) == 0)
5990 panic("socktpi_plumbioctl: unknown ioctl %d", cmd
);
5998 * Wrapper around the streams poll routine that implements socket poll
6000 * The sockfs never calls pollwakeup itself - the stream head take care
6001 * of all pollwakeups. Since sockfs never holds so_lock when calling the
6002 * stream head there can never be a deadlock due to holding so_lock across
6003 * pollwakeup and acquiring so_lock in this routine.
6005 * However, since the performance of fop_poll is critical we avoid
6006 * acquiring so_lock here. This is based on two assumptions:
6007 * - The poll implementation holds locks to serialize the fop_poll call
6008 * and a pollwakeup for the same pollhead. This ensures that should
6009 * e.g. so_state change during a socktpi_poll call the pollwakeup
6010 * (which strsock_* and strrput conspire to issue) is issued after
6011 * the state change. Thus the pollwakeup will block until fop_poll has
6012 * returned and then wake up poll and have it call fop_poll again.
6013 * - The reading of so_state without holding so_lock does not result in
6014 * stale data that is older than the latest state change that has dropped
6015 * so_lock. This is ensured by the mutex_exit issuing the appropriate
6016 * memory barrier to force the data into the coherency domain.
6024 struct pollhead
**phpp
)
6026 short origevents
= events
;
6027 struct vnode
*vp
= SOTOV(so
);
6029 int so_state
= so
->so_state
; /* snapshot */
6030 sotpi_info_t
*sti
= SOTOTPI(so
);
6032 dprintso(so
, 0, ("socktpi_poll(%p): state %s err %d\n",
6033 (void *)vp
, pr_state(so_state
, so
->so_mode
), so
->so_error
));
6035 ASSERT(vp
->v_type
== VSOCK
);
6036 ASSERT(vp
->v_stream
!= NULL
);
6038 if (so
->so_is_stream
) {
6039 /* The imaginary "sockmod" has been popped - act as a stream */
6040 return (strpoll(vp
->v_stream
, events
, anyyet
,
6044 if (!(so_state
& SS_ISCONNECTED
) &&
6045 (so
->so_mode
& SM_CONNREQUIRED
)) {
6046 /* Not connected yet - turn off write side events */
6047 events
&= ~(POLLOUT
|POLLWRBAND
);
6050 * Check for errors without calling strpoll if the caller wants them.
6051 * In sockets the errors are represented as input/output events
6052 * and there is no need to ask the stream head for this information.
6054 if (so
->so_error
!= 0 &&
6055 ((POLLIN
|POLLRDNORM
|POLLOUT
) & origevents
) != 0) {
6056 *reventsp
= (POLLIN
|POLLRDNORM
|POLLOUT
) & origevents
;
6060 * Ignore M_PROTO only messages such as the T_EXDATA_IND messages.
6061 * These message with only an M_PROTO/M_PCPROTO part and no M_DATA
6062 * will not trigger a POLLIN event with POLLRDDATA set.
6063 * The handling of urgent data (causing POLLRDBAND) is done by
6064 * inspecting SS_OOBPEND below.
6066 events
|= POLLRDDATA
;
6069 * After shutdown(output) a stream head write error is set.
6070 * However, we should not return output events.
6072 events
|= POLLNOERR
;
6073 error
= strpoll(vp
->v_stream
, events
, anyyet
,
6078 ASSERT(!(*reventsp
& POLLERR
));
6081 * Notes on T_CONN_IND handling for sockets.
6083 * If strpoll() returned without events, SR_POLLIN is guaranteed
6084 * to be set, ensuring any subsequent strrput() runs pollwakeup().
6086 * Since the so_lock is not held, soqueueconnind() may have run
6087 * and a T_CONN_IND may be waiting. We now check for any queued
6088 * T_CONN_IND msgs on sti_conn_ind_head and set appropriate events
6089 * to ensure poll returns.
6092 * If the T_CONN_IND hasn't arrived by the time strpoll() returns,
6093 * when strrput() does run for an arriving M_PROTO with T_CONN_IND
6094 * the following actions will occur; taken together they ensure the
6095 * syscall will return.
6097 * 1. If a socket, soqueueconnind() will queue the T_CONN_IND but if
6098 * the accept() was run on a non-blocking socket sowaitconnind()
6099 * may have already returned EWOULDBLOCK, so not be waiting to
6100 * process the message. Additionally socktpi_poll() has probably
6101 * proceeded past the sti_conn_ind_head check below.
6102 * 2. strrput() runs pollwakeup()->pollnotify()->cv_signal() to wake
6103 * this thread, however that could occur before poll_common()
6104 * has entered cv_wait.
6105 * 3. pollnotify() sets T_POLLWAKE, while holding the pc_lock.
6107 * Before proceeding to cv_wait() in poll_common() for an event,
6108 * poll_common() atomically checks for T_POLLWAKE under the pc_lock,
6109 * and if set, re-calls strpoll() to ensure the late arriving
6110 * T_CONN_IND is recognized, and pollsys() returns.
6113 if (sti
->sti_conn_ind_head
!= NULL
)
6114 *reventsp
|= (POLLIN
|POLLRDNORM
) & events
;
6116 if (so
->so_state
& SS_CANTRCVMORE
) {
6117 *reventsp
|= POLLRDHUP
& events
;
6119 if (so
->so_state
& SS_CANTSENDMORE
)
6120 *reventsp
|= POLLHUP
;
6123 if (so
->so_state
& SS_OOBPEND
)
6124 *reventsp
|= POLLRDBAND
& events
;
6131 socktpi_constructor(void *buf
, void *cdrarg
, int kmflags
)
6133 sotpi_sonode_t
*st
= (sotpi_sonode_t
*)buf
;
6136 error
= sonode_constructor(buf
, cdrarg
, kmflags
);
6140 error
= i_sotpi_info_constructor(&st
->st_info
);
6142 sonode_destructor(buf
, cdrarg
);
6144 st
->st_sonode
.so_priv
= &st
->st_info
;
6151 socktpi_destructor(void *buf
, void *cdrarg
)
6153 sotpi_sonode_t
*st
= (sotpi_sonode_t
*)buf
;
6155 ASSERT(st
->st_sonode
.so_priv
== &st
->st_info
);
6156 st
->st_sonode
.so_priv
= NULL
;
6158 i_sotpi_info_destructor(&st
->st_info
);
6159 sonode_destructor(buf
, cdrarg
);
6163 socktpi_unix_constructor(void *buf
, void *cdrarg
, int kmflags
)
6167 if ((retval
= socktpi_constructor(buf
, cdrarg
, kmflags
)) == 0) {
6168 struct sonode
*so
= (struct sonode
*)buf
;
6169 sotpi_info_t
*sti
= SOTOTPI(so
);
6171 mutex_enter(&socklist
.sl_lock
);
6173 sti
->sti_next_so
= socklist
.sl_list
;
6174 sti
->sti_prev_so
= NULL
;
6175 if (sti
->sti_next_so
!= NULL
)
6176 SOTOTPI(sti
->sti_next_so
)->sti_prev_so
= so
;
6177 socklist
.sl_list
= so
;
6179 mutex_exit(&socklist
.sl_lock
);
6186 socktpi_unix_destructor(void *buf
, void *cdrarg
)
6188 struct sonode
*so
= (struct sonode
*)buf
;
6189 sotpi_info_t
*sti
= SOTOTPI(so
);
6191 mutex_enter(&socklist
.sl_lock
);
6193 if (sti
->sti_next_so
!= NULL
)
6194 SOTOTPI(sti
->sti_next_so
)->sti_prev_so
= sti
->sti_prev_so
;
6195 if (sti
->sti_prev_so
!= NULL
)
6196 SOTOTPI(sti
->sti_prev_so
)->sti_next_so
= sti
->sti_next_so
;
6198 socklist
.sl_list
= sti
->sti_next_so
;
6200 mutex_exit(&socklist
.sl_lock
);
6202 socktpi_destructor(buf
, cdrarg
);
6209 * Create sonode caches. We create a special one for AF_UNIX so
6210 * that we can track them for netstat(1m).
6212 socktpi_cache
= kmem_cache_create("socktpi_cache",
6213 sizeof (struct sotpi_sonode
), 0, socktpi_constructor
,
6214 socktpi_destructor
, NULL
, NULL
, NULL
, 0);
6216 socktpi_unix_cache
= kmem_cache_create("socktpi_unix_cache",
6217 sizeof (struct sotpi_sonode
), 0, socktpi_unix_constructor
,
6218 socktpi_unix_destructor
, NULL
, NULL
, NULL
, 0);
6224 * Given a non-TPI sonode, allocate and prep it to be ready for TPI.
6226 * Caller must still update state and mode using sotpi_update_state().
6229 sotpi_convert_sonode(struct sonode
*so
, struct sockparams
*newsp
,
6230 boolean_t
*direct
, queue_t
**qp
, struct cred
*cr
)
6233 struct sockparams
*origsp
= so
->so_sockparams
;
6234 sock_lower_handle_t handle
= so
->so_proto_handle
;
6240 ASSERT((so
->so_state
& (SS_FALLBACK_PENDING
|SS_FALLBACK_COMP
)) ==
6241 SS_FALLBACK_PENDING
);
6242 ASSERT(SOCK_IS_NONSTR(so
));
6246 so
->so_sockparams
= newsp
;
6248 * Allocate and initalize fields required by TPI.
6250 (void) sotpi_info_create(so
, KM_SLEEP
);
6251 sotpi_info_init(so
);
6253 if ((error
= sotpi_init(so
, NULL
, cr
, SO_FALLBACK
)) != 0) {
6254 sotpi_info_fini(so
);
6255 sotpi_info_destroy(so
);
6258 ASSERT(handle
== so
->so_proto_handle
);
6260 if (sti
->sti_direct
!= 0)
6264 * Keep the original sp around so we can properly dispose of the
6265 * sonode when the socket is being closed.
6267 sti
->sti_orig_sp
= origsp
;
6269 so_basic_strinit(so
); /* skips the T_CAPABILITY_REQ */
6270 so_alloc_addr(so
, so
->so_max_addr_len
);
6273 * If the application has done a SIOCSPGRP, make sure the
6274 * STREAM head is aware. This needs to take place before
6275 * the protocol start sending up messages. Otherwise we
6276 * might miss to generate SIGPOLL.
6278 * It is possible that the application will receive duplicate
6279 * signals if some were already generated for either data or
6280 * connection indications.
6282 if (so
->so_pgrp
!= 0) {
6283 if (so_set_events(so
, so
->so_vnode
, cr
) != 0)
6288 * Determine which queue to use.
6292 ASSERT(stp
!= NULL
);
6293 q
= stp
->sd_wrq
->q_next
;
6296 * Skip any modules that may have been auto pushed when the device
6299 while (q
->q_next
!= NULL
)
6303 /* This is now a STREAMS sockets */
6304 so
->so_not_str
= B_FALSE
;
6310 * Revert a TPI sonode. It is only allowed to revert the sonode during
6311 * the fallback process.
6314 sotpi_revert_sonode(struct sonode
*so
, struct cred
*cr
)
6316 vnode_t
*vp
= SOTOV(so
);
6318 ASSERT((so
->so_state
& (SS_FALLBACK_PENDING
|SS_FALLBACK_COMP
)) ==
6319 SS_FALLBACK_PENDING
);
6320 ASSERT(!SOCK_IS_NONSTR(so
));
6321 ASSERT(vp
->v_stream
!= NULL
);
6324 (void) strclose(vp
, FREAD
|FWRITE
|SO_FALLBACK
, cr
);
6327 * Restore the original sockparams. The caller is responsible for
6328 * dropping the ref to the new sp.
6330 so
->so_sockparams
= SOTOTPI(so
)->sti_orig_sp
;
6332 sotpi_info_fini(so
);
6333 sotpi_info_destroy(so
);
6335 /* This is no longer a STREAMS sockets */
6336 so
->so_not_str
= B_TRUE
;
6340 sotpi_update_state(struct sonode
*so
, struct T_capability_ack
*tcap
,
6341 struct sockaddr
*laddr
, socklen_t laddrlen
, struct sockaddr
*faddr
,
6342 socklen_t faddrlen
, short opts
)
6344 sotpi_info_t
*sti
= SOTOTPI(so
);
6346 so_proc_tcapability_ack(so
, tcap
);
6348 so
->so_options
|= opts
;
6351 * Determine whether the foreign and local address are valid
6353 if (laddrlen
!= 0) {
6354 ASSERT(laddrlen
<= sti
->sti_laddr_maxlen
);
6355 sti
->sti_laddr_len
= laddrlen
;
6356 bcopy(laddr
, sti
->sti_laddr_sa
, laddrlen
);
6357 sti
->sti_laddr_valid
= (so
->so_state
& SS_ISBOUND
);
6360 if (faddrlen
!= 0) {
6361 ASSERT(faddrlen
<= sti
->sti_faddr_maxlen
);
6362 sti
->sti_faddr_len
= faddrlen
;
6363 bcopy(faddr
, sti
->sti_faddr_sa
, faddrlen
);
6364 sti
->sti_faddr_valid
= (so
->so_state
& SS_ISCONNECTED
);
6370 * Allocate enough space to cache the local and foreign addresses.
6373 so_alloc_addr(struct sonode
*so
, t_uscalar_t maxlen
)
6375 sotpi_info_t
*sti
= SOTOTPI(so
);
6377 ASSERT(sti
->sti_laddr_sa
== NULL
&& sti
->sti_faddr_sa
== NULL
);
6378 ASSERT(sti
->sti_laddr_len
== 0 && sti
->sti_faddr_len
== 0);
6379 sti
->sti_laddr_maxlen
= sti
->sti_faddr_maxlen
=
6380 P2ROUNDUP(maxlen
, KMEM_ALIGN
);
6381 so
->so_max_addr_len
= sti
->sti_laddr_maxlen
;
6382 sti
->sti_laddr_sa
= kmem_alloc(sti
->sti_laddr_maxlen
* 2, KM_SLEEP
);
6383 sti
->sti_faddr_sa
= (struct sockaddr
*)((caddr_t
)sti
->sti_laddr_sa
6384 + sti
->sti_laddr_maxlen
);
6386 if (so
->so_family
== AF_UNIX
) {
6388 * Initialize AF_UNIX related fields.
6390 bzero(&sti
->sti_ux_laddr
, sizeof (sti
->sti_ux_laddr
));
6391 bzero(&sti
->sti_ux_faddr
, sizeof (sti
->sti_ux_faddr
));
6397 sotpi_sototpi(struct sonode
*so
)
6403 sti
= (sotpi_info_t
*)so
->so_priv
;
6405 ASSERT(sti
!= NULL
);
6406 ASSERT(sti
->sti_magic
== SOTPI_INFO_MAGIC
);
6412 i_sotpi_info_constructor(sotpi_info_t
*sti
)
6414 sti
->sti_magic
= SOTPI_INFO_MAGIC
;
6415 sti
->sti_ack_mp
= NULL
;
6416 sti
->sti_discon_ind_mp
= NULL
;
6417 sti
->sti_ux_bound_vp
= NULL
;
6418 sti
->sti_unbind_mp
= NULL
;
6420 sti
->sti_conn_ind_head
= NULL
;
6421 sti
->sti_conn_ind_tail
= NULL
;
6423 sti
->sti_laddr_sa
= NULL
;
6424 sti
->sti_faddr_sa
= NULL
;
6426 mutex_init(&sti
->sti_plumb_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
6427 cv_init(&sti
->sti_ack_cv
, NULL
, CV_DEFAULT
, NULL
);
6433 i_sotpi_info_destructor(sotpi_info_t
*sti
)
6435 ASSERT(sti
->sti_magic
== SOTPI_INFO_MAGIC
);
6436 ASSERT(sti
->sti_ack_mp
== NULL
);
6437 ASSERT(sti
->sti_discon_ind_mp
== NULL
);
6438 ASSERT(sti
->sti_ux_bound_vp
== NULL
);
6439 ASSERT(sti
->sti_unbind_mp
== NULL
);
6441 ASSERT(sti
->sti_conn_ind_head
== NULL
);
6442 ASSERT(sti
->sti_conn_ind_tail
== NULL
);
6444 ASSERT(sti
->sti_laddr_sa
== NULL
);
6445 ASSERT(sti
->sti_faddr_sa
== NULL
);
6447 mutex_destroy(&sti
->sti_plumb_lock
);
6448 cv_destroy(&sti
->sti_ack_cv
);
6452 * Creates and attaches TPI information to the given sonode
6455 sotpi_info_create(struct sonode
*so
, int kmflags
)
6459 ASSERT(so
->so_priv
== NULL
);
6461 if ((sti
= kmem_zalloc(sizeof (*sti
), kmflags
)) == NULL
)
6464 if (i_sotpi_info_constructor(sti
) != 0) {
6465 kmem_free(sti
, sizeof (*sti
));
6469 so
->so_priv
= (void *)sti
;
6474 * Initializes the TPI information.
6477 sotpi_info_init(struct sonode
*so
)
6479 struct vnode
*vp
= SOTOV(so
);
6480 sotpi_info_t
*sti
= SOTOTPI(so
);
6483 sti
->sti_dev
= so
->so_sockparams
->sp_sdev_info
.sd_vnode
->v_rdev
;
6484 vp
->v_rdev
= sti
->sti_dev
;
6486 sti
->sti_orig_sp
= NULL
;
6488 sti
->sti_pushcnt
= 0;
6490 now
= gethrestime_sec();
6491 sti
->sti_atime
= now
;
6492 sti
->sti_mtime
= now
;
6493 sti
->sti_ctime
= now
;
6495 sti
->sti_eaddr_mp
= NULL
;
6496 sti
->sti_delayed_error
= 0;
6498 sti
->sti_provinfo
= NULL
;
6500 sti
->sti_oobcnt
= 0;
6501 sti
->sti_oobsigcnt
= 0;
6503 ASSERT(sti
->sti_laddr_sa
== NULL
&& sti
->sti_faddr_sa
== NULL
);
6505 sti
->sti_laddr_sa
= 0;
6506 sti
->sti_faddr_sa
= 0;
6507 sti
->sti_laddr_maxlen
= sti
->sti_faddr_maxlen
= 0;
6508 sti
->sti_laddr_len
= sti
->sti_faddr_len
= 0;
6510 sti
->sti_laddr_valid
= 0;
6511 sti
->sti_faddr_valid
= 0;
6512 sti
->sti_faddr_noxlate
= 0;
6514 sti
->sti_direct
= 0;
6516 ASSERT(sti
->sti_ack_mp
== NULL
);
6517 ASSERT(sti
->sti_ux_bound_vp
== NULL
);
6518 ASSERT(sti
->sti_unbind_mp
== NULL
);
6520 ASSERT(sti
->sti_conn_ind_head
== NULL
);
6521 ASSERT(sti
->sti_conn_ind_tail
== NULL
);
6525 * Given a sonode, grab the TPI info and free any data.
6528 sotpi_info_fini(struct sonode
*so
)
6530 sotpi_info_t
*sti
= SOTOTPI(so
);
6533 ASSERT(sti
->sti_discon_ind_mp
== NULL
);
6535 if ((mp
= sti
->sti_conn_ind_head
) != NULL
) {
6544 sti
->sti_conn_ind_head
= sti
->sti_conn_ind_tail
= NULL
;
6548 * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely
6549 * indirect them. It also uses so_count as a validity test.
6551 mutex_enter(&so
->so_lock
);
6553 if (sti
->sti_laddr_sa
) {
6554 ASSERT((caddr_t
)sti
->sti_faddr_sa
==
6555 (caddr_t
)sti
->sti_laddr_sa
+ sti
->sti_laddr_maxlen
);
6556 ASSERT(sti
->sti_faddr_maxlen
== sti
->sti_laddr_maxlen
);
6557 sti
->sti_laddr_valid
= 0;
6558 sti
->sti_faddr_valid
= 0;
6559 kmem_free(sti
->sti_laddr_sa
, sti
->sti_laddr_maxlen
* 2);
6560 sti
->sti_laddr_sa
= NULL
;
6561 sti
->sti_laddr_len
= sti
->sti_laddr_maxlen
= 0;
6562 sti
->sti_faddr_sa
= NULL
;
6563 sti
->sti_faddr_len
= sti
->sti_faddr_maxlen
= 0;
6566 mutex_exit(&so
->so_lock
);
6568 if ((mp
= sti
->sti_eaddr_mp
) != NULL
) {
6570 sti
->sti_eaddr_mp
= NULL
;
6571 sti
->sti_delayed_error
= 0;
6574 if ((mp
= sti
->sti_ack_mp
) != NULL
) {
6576 sti
->sti_ack_mp
= NULL
;
6579 ASSERT(sti
->sti_ux_bound_vp
== NULL
);
6580 if ((mp
= sti
->sti_unbind_mp
) != NULL
) {
6582 sti
->sti_unbind_mp
= NULL
;
6587 * Destroys the TPI information attached to a sonode.
6590 sotpi_info_destroy(struct sonode
*so
)
6592 sotpi_info_t
*sti
= SOTOTPI(so
);
6594 i_sotpi_info_destructor(sti
);
6595 kmem_free(sti
, sizeof (*sti
));
6601 * Create the global sotpi socket module entry. It will never be freed.
6604 sotpi_smod_create(void)
6608 smodp
= kmem_zalloc(sizeof (*smodp
), KM_SLEEP
);
6609 smodp
->smod_name
= kmem_alloc(sizeof (SOTPI_SMOD_NAME
), KM_SLEEP
);
6610 (void) strcpy(smodp
->smod_name
, SOTPI_SMOD_NAME
);
6612 * Initialize the smod_refcnt to 1 so it will never be freed.
6614 smodp
->smod_refcnt
= 1;
6615 smodp
->smod_uc_version
= SOCK_UC_VERSION
;
6616 smodp
->smod_dc_version
= SOCK_DC_VERSION
;
6617 smodp
->smod_sock_create_func
= &sotpi_create
;
6618 smodp
->smod_sock_destroy_func
= &sotpi_destroy
;