4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2017 Sebastian Wiedenroth
27 #include <sys/types.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/sysmacros.h>
31 #include <sys/debug.h>
32 #include <sys/cmn_err.h>
34 #include <sys/policy.h>
35 #include <sys/modctl.h>
37 #include <sys/sunddi.h>
39 #include <sys/strsun.h>
40 #include <sys/stropts.h>
41 #include <sys/strsubr.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
46 #include <inet/ipclassifier.h>
47 #include "sockcommon.h"
48 #include "sockfilter_impl.h"
53 extern int xnet_skip_checks
, xnet_check_print
, xnet_truncate_print
;
56 * Common socket access functions.
58 * Instead of accessing the sonode switch directly (i.e., SOP_xxx()),
59 * the socket_xxx() function should be used.
63 * Try to create a new sonode of the requested <family, type, protocol>.
67 socket_create(int family
, int type
, int protocol
, char *devpath
, char *mod
,
68 int flags
, struct cred
*cr
, int *errorp
)
71 struct sockparams
*sp
= NULL
;
75 * Look for a sockparams entry that match the given criteria.
76 * solookup() returns with the entry held.
78 *errorp
= solookup(family
, type
, protocol
, &sp
);
79 saved_error
= *errorp
;
81 int kmflags
= (flags
== SOCKET_SLEEP
) ? KM_SLEEP
: KM_NOSLEEP
;
83 * There is no matching sockparams entry. An ephemeral entry is
84 * created if the caller specifies a device or a socket module.
86 if (devpath
!= NULL
) {
88 sp
= sockparams_hold_ephemeral_bydev(family
, type
,
89 protocol
, devpath
, kmflags
, errorp
);
90 } else if (mod
!= NULL
) {
92 sp
= sockparams_hold_ephemeral_bymod(family
, type
,
93 protocol
, mod
, kmflags
, errorp
);
95 *errorp
= solookup(family
, type
, 0, &sp
);
99 if (saved_error
&& (*errorp
== EPROTONOSUPPORT
||
100 *errorp
== EPROTOTYPE
|| *errorp
== ENOPROTOOPT
))
101 *errorp
= saved_error
;
106 ASSERT(sp
->sp_smod_info
!= NULL
);
107 ASSERT(flags
== SOCKET_SLEEP
|| flags
== SOCKET_NOSLEEP
);
108 sp
->sp_stats
.sps_ncreate
.value
.ui64
++;
109 so
= sp
->sp_smod_info
->smod_sock_create_func(sp
, family
, type
,
110 protocol
, flags
, errorp
, cr
);
112 SOCKPARAMS_DEC_REF(sp
);
114 if ((*errorp
= SOP_INIT(so
, NULL
, cr
, flags
)) == 0) {
115 /* Cannot fail, only bumps so_count */
116 (void) fop_open(&SOTOV(so
), FREAD
|FWRITE
, cr
, NULL
);
118 if (saved_error
&& (*errorp
== EPROTONOSUPPORT
||
119 *errorp
== EPROTOTYPE
|| *errorp
== ENOPROTOOPT
))
120 *errorp
= saved_error
;
129 socket_newconn(struct sonode
*parent
, sock_lower_handle_t lh
,
130 sock_downcalls_t
*dc
, int flags
, int *errorp
)
133 struct sockparams
*sp
;
136 if ((cr
= CRED()) == NULL
)
139 sp
= parent
->so_sockparams
;
142 sp
->sp_stats
.sps_ncreate
.value
.ui64
++;
143 so
= sp
->sp_smod_info
->smod_sock_create_func(sp
, parent
->so_family
,
144 parent
->so_type
, parent
->so_protocol
, flags
, errorp
, cr
);
146 SOCKPARAMS_INC_REF(sp
);
148 so
->so_proto_handle
= lh
;
149 so
->so_downcalls
= dc
;
151 * This function may be called in interrupt context, and CRED()
152 * will be NULL. In this case, pass in kcred.
154 if ((*errorp
= SOP_INIT(so
, parent
, cr
, flags
)) == 0) {
155 /* Cannot fail, only bumps so_count */
156 (void) fop_open(&SOTOV(so
), FREAD
|FWRITE
, cr
, NULL
);
167 * Bind local endpoint.
170 socket_bind(struct sonode
*so
, struct sockaddr
*name
, socklen_t namelen
,
171 int flags
, cred_t
*cr
)
173 return (SOP_BIND(so
, name
, namelen
, flags
, cr
));
177 * Turn socket into a listen socket.
180 socket_listen(struct sonode
*so
, int backlog
, cred_t
*cr
)
187 * Use the same qlimit as in BSD. BSD checks the qlimit
188 * before queuing the next connection implying that a
189 * listen(sock, 0) allows one connection to be queued.
190 * BSD also uses 1.5 times the requested backlog.
192 * XNS Issue 4 required a strict interpretation of the backlog.
193 * This has been waived subsequently for Issue 4 and the change
194 * incorporated in XNS Issue 5. So we aren't required to do
195 * anything special for XPG apps.
197 if (backlog
>= (INT_MAX
- 1) / 3)
200 backlog
= backlog
* 3 / 2 + 1;
202 return (SOP_LISTEN(so
, backlog
, cr
));
206 * Accept incoming connection.
209 socket_accept(struct sonode
*lso
, int fflag
, cred_t
*cr
, struct sonode
**nsop
)
211 return (SOP_ACCEPT(lso
, fflag
, cr
, nsop
));
218 socket_connect(struct sonode
*so
, struct sockaddr
*name
,
219 socklen_t namelen
, int fflag
, int flags
, cred_t
*cr
)
224 * Handle a connect to a name parameter of type AF_UNSPEC like a
225 * connect to a null address. This is the portable method to
226 * unconnect a socket.
228 if ((namelen
>= sizeof (sa_family_t
)) &&
229 (name
->sa_family
== AF_UNSPEC
)) {
234 error
= SOP_CONNECT(so
, name
, namelen
, fflag
, flags
, cr
);
240 * Get address of remote node.
243 socket_getpeername(struct sonode
*so
, struct sockaddr
*addr
,
244 socklen_t
*addrlen
, boolean_t accept
, cred_t
*cr
)
246 ASSERT(*addrlen
> 0);
247 return (SOP_GETPEERNAME(so
, addr
, addrlen
, accept
, cr
));
255 socket_getsockname(struct sonode
*so
, struct sockaddr
*addr
,
256 socklen_t
*addrlen
, cred_t
*cr
)
258 return (SOP_GETSOCKNAME(so
, addr
, addrlen
, cr
));
263 * Called from shutdown().
266 socket_shutdown(struct sonode
*so
, int how
, cred_t
*cr
)
268 return (SOP_SHUTDOWN(so
, how
, cr
));
272 * Get socket options.
276 socket_getsockopt(struct sonode
*so
, int level
, int option_name
,
277 void *optval
, socklen_t
*optlenp
, int flags
, cred_t
*cr
)
279 return (SOP_GETSOCKOPT(so
, level
, option_name
, optval
,
280 optlenp
, flags
, cr
));
287 socket_setsockopt(struct sonode
*so
, int level
, int option_name
,
288 const void *optval
, t_uscalar_t optlen
, cred_t
*cr
)
291 /* Caller allocates aligned optval, or passes null */
292 ASSERT(((uintptr_t)optval
& (sizeof (t_scalar_t
) - 1)) == 0);
293 /* If optval is null optlen is 0, and vice-versa */
294 ASSERT(optval
!= NULL
|| optlen
== 0);
295 ASSERT(optlen
!= 0 || optval
== NULL
);
297 if (optval
== NULL
&& optlen
== 0)
300 return (SOP_SETSOCKOPT(so
, level
, option_name
, optval
, optlen
, cr
));
304 socket_sendmsg(struct sonode
*so
, struct msghdr
*msg
, struct uio
*uiop
,
308 ssize_t orig_resid
= uiop
->uio_resid
;
311 * Do not bypass the cache if we are doing a local (AF_UNIX) write.
313 if (so
->so_family
== AF_UNIX
)
314 uiop
->uio_extflg
|= UIO_COPY_CACHED
;
316 uiop
->uio_extflg
&= ~UIO_COPY_CACHED
;
318 error
= SOP_SENDMSG(so
, msg
, uiop
, cr
);
324 /* EAGAIN is EWOULDBLOCK */
326 /* We did a partial send */
327 if (uiop
->uio_resid
!= orig_resid
)
331 if (((so
->so_mode
& SM_KERNEL
) == 0) &&
332 ((msg
->msg_flags
& MSG_NOSIGNAL
) == 0)) {
333 tsignal(curthread
, SIGPIPE
);
342 socket_sendmblk(struct sonode
*so
, struct msghdr
*msg
, int fflag
,
343 struct cred
*cr
, mblk_t
**mpp
)
347 error
= SOP_SENDMBLK(so
, msg
, fflag
, cr
, mpp
);
348 if (error
== EPIPE
) {
349 tsignal(curthread
, SIGPIPE
);
355 socket_recvmsg(struct sonode
*so
, struct msghdr
*msg
, struct uio
*uiop
,
359 ssize_t orig_resid
= uiop
->uio_resid
;
362 * Do not bypass the cache when reading data, as the application
363 * is likely to access the data shortly.
365 uiop
->uio_extflg
|= UIO_COPY_CACHED
;
367 error
= SOP_RECVMSG(so
, msg
, uiop
, cr
);
371 /* EAGAIN is EWOULDBLOCK */
373 /* We did a partial read */
374 if (uiop
->uio_resid
!= orig_resid
)
384 socket_ioctl(struct sonode
*so
, int cmd
, intptr_t arg
, int mode
,
385 struct cred
*cr
, int32_t *rvalp
)
387 return (SOP_IOCTL(so
, cmd
, arg
, mode
, cr
, rvalp
));
391 socket_poll(struct sonode
*so
, short events
, int anyyet
, short *reventsp
,
392 struct pollhead
**phpp
)
394 return (SOP_POLL(so
, events
, anyyet
, reventsp
, phpp
));
398 socket_close(struct sonode
*so
, int flag
, struct cred
*cr
)
400 return (fop_close(SOTOV(so
), flag
, 1, 0, cr
, NULL
));
404 socket_close_internal(struct sonode
*so
, int flag
, cred_t
*cr
)
406 ASSERT(so
->so_count
== 0);
408 return (SOP_CLOSE(so
, flag
, cr
));
412 socket_destroy(struct sonode
*so
)
414 vn_invalid(SOTOV(so
));
420 socket_destroy_internal(struct sonode
*so
, cred_t
*cr
)
422 struct sockparams
*sp
= so
->so_sockparams
;
423 ASSERT(so
->so_count
== 0 && sp
!= NULL
);
425 sp
->sp_smod_info
->smod_sock_destroy_func(so
);
427 SOCKPARAMS_DEC_REF(sp
);
431 * TODO Once the common vnode ops is available, then the vnops argument
436 sonode_constructor(void *buf
, void *cdrarg
, int kmflags
)
438 struct sonode
*so
= buf
;
441 vp
= so
->so_vnode
= vn_alloc(kmflags
);
446 vn_setops(vp
, &socket_vnodeops
);
449 so
->so_oobmsg
= NULL
;
451 so
->so_proto_handle
= NULL
;
453 so
->so_peercred
= NULL
;
455 so
->so_rcv_queued
= 0;
456 so
->so_rcv_q_head
= NULL
;
457 so
->so_rcv_q_last_head
= NULL
;
458 so
->so_rcv_head
= NULL
;
459 so
->so_rcv_last_head
= NULL
;
460 so
->so_rcv_wanted
= 0;
461 so
->so_rcv_timer_interval
= SOCKET_NO_RCVTIMER
;
462 so
->so_rcv_timer_tid
= 0;
463 so
->so_rcv_thresh
= 0;
465 list_create(&so
->so_acceptq_list
, sizeof (struct sonode
),
466 offsetof(struct sonode
, so_acceptq_node
));
467 list_create(&so
->so_acceptq_defer
, sizeof (struct sonode
),
468 offsetof(struct sonode
, so_acceptq_node
));
469 list_link_init(&so
->so_acceptq_node
);
470 so
->so_acceptq_len
= 0;
472 so
->so_listener
= NULL
;
474 so
->so_snd_qfull
= B_FALSE
;
476 so
->so_filter_active
= 0;
477 so
->so_filter_tx
= 0;
478 so
->so_filter_defertime
= 0;
479 so
->so_filter_top
= NULL
;
480 so
->so_filter_bottom
= NULL
;
482 mutex_init(&so
->so_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
483 mutex_init(&so
->so_acceptq_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
484 rw_init(&so
->so_fallback_rwlock
, NULL
, RW_DEFAULT
, NULL
);
485 cv_init(&so
->so_state_cv
, NULL
, CV_DEFAULT
, NULL
);
486 cv_init(&so
->so_single_cv
, NULL
, CV_DEFAULT
, NULL
);
487 cv_init(&so
->so_read_cv
, NULL
, CV_DEFAULT
, NULL
);
489 cv_init(&so
->so_acceptq_cv
, NULL
, CV_DEFAULT
, NULL
);
490 cv_init(&so
->so_snd_cv
, NULL
, CV_DEFAULT
, NULL
);
491 cv_init(&so
->so_rcv_cv
, NULL
, CV_DEFAULT
, NULL
);
492 cv_init(&so
->so_copy_cv
, NULL
, CV_DEFAULT
, NULL
);
493 cv_init(&so
->so_closing_cv
, NULL
, CV_DEFAULT
, NULL
);
500 sonode_destructor(void *buf
, void *cdrarg
)
502 struct sonode
*so
= buf
;
503 struct vnode
*vp
= SOTOV(so
);
505 ASSERT(so
->so_priv
== NULL
);
506 ASSERT(so
->so_peercred
== NULL
);
508 ASSERT(so
->so_oobmsg
== NULL
);
510 ASSERT(so
->so_rcv_q_head
== NULL
);
512 list_destroy(&so
->so_acceptq_list
);
513 list_destroy(&so
->so_acceptq_defer
);
514 ASSERT(!list_link_active(&so
->so_acceptq_node
));
515 ASSERT(so
->so_listener
== NULL
);
517 ASSERT(so
->so_filter_active
== 0);
518 ASSERT(so
->so_filter_tx
== 0);
519 ASSERT(so
->so_filter_top
== NULL
);
520 ASSERT(so
->so_filter_bottom
== NULL
);
522 ASSERT(vp
->v_data
== so
);
523 ASSERT(vn_matchops(vp
, &socket_vnodeops
));
527 mutex_destroy(&so
->so_lock
);
528 mutex_destroy(&so
->so_acceptq_lock
);
529 rw_destroy(&so
->so_fallback_rwlock
);
531 cv_destroy(&so
->so_state_cv
);
532 cv_destroy(&so
->so_single_cv
);
533 cv_destroy(&so
->so_read_cv
);
534 cv_destroy(&so
->so_acceptq_cv
);
535 cv_destroy(&so
->so_snd_cv
);
536 cv_destroy(&so
->so_rcv_cv
);
537 cv_destroy(&so
->so_closing_cv
);
541 sonode_init(struct sonode
*so
, struct sockparams
*sp
, int family
,
542 int type
, int protocol
, sonodeops_t
*sops
)
555 so
->so_family
= family
;
557 so
->so_protocol
= protocol
;
559 SOCK_CONNID_INIT(so
->so_proto_connid
);
562 so
->so_linger
.l_onoff
= 0;
563 so
->so_linger
.l_linger
= 0;
569 ASSERT(so
->so_oobmsg
== NULL
);
573 ASSERT(so
->so_peercred
== NULL
);
575 so
->so_zoneid
= getzoneid();
577 so
->so_sockparams
= sp
;
581 so
->so_not_str
= (sops
!= &sotpi_sonodeops
);
583 so
->so_proto_handle
= NULL
;
585 so
->so_downcalls
= NULL
;
590 vp
->v_vfsp
= rootvfs
;
592 vp
->v_rdev
= sockdev
;
594 so
->so_snd_qfull
= B_FALSE
;
597 so
->so_rcv_wakeup
= B_FALSE
;
598 so
->so_snd_wakeup
= B_FALSE
;
599 so
->so_flowctrld
= B_FALSE
;
602 bzero(&so
->so_poll_list
, sizeof (so
->so_poll_list
));
603 bzero(&so
->so_proto_props
, sizeof (struct sock_proto_props
));
605 bzero(&(so
->so_ksock_callbacks
), sizeof (ksocket_callbacks_t
));
606 so
->so_ksock_cb_arg
= NULL
;
608 so
->so_max_addr_len
= sizeof (struct sockaddr_storage
);
610 so
->so_direct
= NULL
;
616 sonode_fini(struct sonode
*so
)
620 ASSERT(so
->so_count
== 0);
622 if (so
->so_rcv_timer_tid
) {
623 ASSERT(MUTEX_NOT_HELD(&so
->so_lock
));
624 (void) untimeout(so
->so_rcv_timer_tid
);
625 so
->so_rcv_timer_tid
= 0;
628 if (so
->so_poll_list
.ph_list
!= NULL
) {
629 pollwakeup(&so
->so_poll_list
, POLLERR
);
630 pollhead_clean(&so
->so_poll_list
);
633 if (so
->so_direct
!= NULL
)
639 if (so
->so_peercred
!= NULL
) {
640 crfree(so
->so_peercred
);
641 so
->so_peercred
= NULL
;
643 /* Detach and destroy filters */
644 if (so
->so_filter_top
!= NULL
)
645 sof_sonode_cleanup(so
);
647 ASSERT(list_is_empty(&so
->so_acceptq_list
));
648 ASSERT(list_is_empty(&so
->so_acceptq_defer
));
649 ASSERT(!list_link_active(&so
->so_acceptq_node
));
651 ASSERT(so
->so_rcv_queued
== 0);
652 ASSERT(so
->so_rcv_q_head
== NULL
);
653 ASSERT(so
->so_rcv_q_last_head
== NULL
);
654 ASSERT(so
->so_rcv_head
== NULL
);
655 ASSERT(so
->so_rcv_last_head
== NULL
);