dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / kernel / fs / sockfs / sockcommon.c
blob8038a0bac9663dafbddbc9c319d9d0e52e89a5f2
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2017 Sebastian Wiedenroth
27 #include <sys/types.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/sysmacros.h>
31 #include <sys/debug.h>
32 #include <sys/cmn_err.h>
33 #include <sys/vfs.h>
34 #include <sys/policy.h>
35 #include <sys/modctl.h>
37 #include <sys/sunddi.h>
39 #include <sys/strsun.h>
40 #include <sys/stropts.h>
41 #include <sys/strsubr.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/uio.h>
46 #include <inet/ipclassifier.h>
47 #include "sockcommon.h"
48 #include "sockfilter_impl.h"
49 #include "socktpi.h"
50 #include "sodirect.h"
51 #include <inet/ip.h>
53 extern int xnet_skip_checks, xnet_check_print, xnet_truncate_print;
56 * Common socket access functions.
58 * Instead of accessing the sonode switch directly (i.e., SOP_xxx()),
59 * the socket_xxx() function should be used.
63 * Try to create a new sonode of the requested <family, type, protocol>.
65 /* ARGSUSED */
66 struct sonode *
67 socket_create(int family, int type, int protocol, char *devpath, char *mod,
68 int flags, struct cred *cr, int *errorp)
70 struct sonode *so;
71 struct sockparams *sp = NULL;
72 int saved_error;
75 * Look for a sockparams entry that match the given criteria.
76 * solookup() returns with the entry held.
78 *errorp = solookup(family, type, protocol, &sp);
79 saved_error = *errorp;
80 if (sp == NULL) {
81 int kmflags = (flags == SOCKET_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
83 * There is no matching sockparams entry. An ephemeral entry is
84 * created if the caller specifies a device or a socket module.
86 if (devpath != NULL) {
87 saved_error = 0;
88 sp = sockparams_hold_ephemeral_bydev(family, type,
89 protocol, devpath, kmflags, errorp);
90 } else if (mod != NULL) {
91 saved_error = 0;
92 sp = sockparams_hold_ephemeral_bymod(family, type,
93 protocol, mod, kmflags, errorp);
94 } else {
95 *errorp = solookup(family, type, 0, &sp);
98 if (sp == NULL) {
99 if (saved_error && (*errorp == EPROTONOSUPPORT ||
100 *errorp == EPROTOTYPE || *errorp == ENOPROTOOPT))
101 *errorp = saved_error;
102 return (NULL);
106 ASSERT(sp->sp_smod_info != NULL);
107 ASSERT(flags == SOCKET_SLEEP || flags == SOCKET_NOSLEEP);
108 sp->sp_stats.sps_ncreate.value.ui64++;
109 so = sp->sp_smod_info->smod_sock_create_func(sp, family, type,
110 protocol, flags, errorp, cr);
111 if (so == NULL) {
112 SOCKPARAMS_DEC_REF(sp);
113 } else {
114 if ((*errorp = SOP_INIT(so, NULL, cr, flags)) == 0) {
115 /* Cannot fail, only bumps so_count */
116 (void) fop_open(&SOTOV(so), FREAD|FWRITE, cr, NULL);
117 } else {
118 if (saved_error && (*errorp == EPROTONOSUPPORT ||
119 *errorp == EPROTOTYPE || *errorp == ENOPROTOOPT))
120 *errorp = saved_error;
121 socket_destroy(so);
122 so = NULL;
125 return (so);
128 struct sonode *
129 socket_newconn(struct sonode *parent, sock_lower_handle_t lh,
130 sock_downcalls_t *dc, int flags, int *errorp)
132 struct sonode *so;
133 struct sockparams *sp;
134 struct cred *cr;
136 if ((cr = CRED()) == NULL)
137 cr = kcred;
139 sp = parent->so_sockparams;
140 ASSERT(sp != NULL);
142 sp->sp_stats.sps_ncreate.value.ui64++;
143 so = sp->sp_smod_info->smod_sock_create_func(sp, parent->so_family,
144 parent->so_type, parent->so_protocol, flags, errorp, cr);
145 if (so != NULL) {
146 SOCKPARAMS_INC_REF(sp);
148 so->so_proto_handle = lh;
149 so->so_downcalls = dc;
151 * This function may be called in interrupt context, and CRED()
152 * will be NULL. In this case, pass in kcred.
154 if ((*errorp = SOP_INIT(so, parent, cr, flags)) == 0) {
155 /* Cannot fail, only bumps so_count */
156 (void) fop_open(&SOTOV(so), FREAD|FWRITE, cr, NULL);
157 } else {
158 socket_destroy(so);
159 so = NULL;
163 return (so);
167 * Bind local endpoint.
170 socket_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
171 int flags, cred_t *cr)
173 return (SOP_BIND(so, name, namelen, flags, cr));
177 * Turn socket into a listen socket.
180 socket_listen(struct sonode *so, int backlog, cred_t *cr)
182 if (backlog < 0) {
183 backlog = 0;
187 * Use the same qlimit as in BSD. BSD checks the qlimit
188 * before queuing the next connection implying that a
189 * listen(sock, 0) allows one connection to be queued.
190 * BSD also uses 1.5 times the requested backlog.
192 * XNS Issue 4 required a strict interpretation of the backlog.
193 * This has been waived subsequently for Issue 4 and the change
194 * incorporated in XNS Issue 5. So we aren't required to do
195 * anything special for XPG apps.
197 if (backlog >= (INT_MAX - 1) / 3)
198 backlog = INT_MAX;
199 else
200 backlog = backlog * 3 / 2 + 1;
202 return (SOP_LISTEN(so, backlog, cr));
206 * Accept incoming connection.
209 socket_accept(struct sonode *lso, int fflag, cred_t *cr, struct sonode **nsop)
211 return (SOP_ACCEPT(lso, fflag, cr, nsop));
215 * Active open.
218 socket_connect(struct sonode *so, struct sockaddr *name,
219 socklen_t namelen, int fflag, int flags, cred_t *cr)
221 int error;
224 * Handle a connect to a name parameter of type AF_UNSPEC like a
225 * connect to a null address. This is the portable method to
226 * unconnect a socket.
228 if ((namelen >= sizeof (sa_family_t)) &&
229 (name->sa_family == AF_UNSPEC)) {
230 name = NULL;
231 namelen = 0;
234 error = SOP_CONNECT(so, name, namelen, fflag, flags, cr);
236 return (error);
240 * Get address of remote node.
243 socket_getpeername(struct sonode *so, struct sockaddr *addr,
244 socklen_t *addrlen, boolean_t accept, cred_t *cr)
246 ASSERT(*addrlen > 0);
247 return (SOP_GETPEERNAME(so, addr, addrlen, accept, cr));
252 * Get local address.
255 socket_getsockname(struct sonode *so, struct sockaddr *addr,
256 socklen_t *addrlen, cred_t *cr)
258 return (SOP_GETSOCKNAME(so, addr, addrlen, cr));
263 * Called from shutdown().
266 socket_shutdown(struct sonode *so, int how, cred_t *cr)
268 return (SOP_SHUTDOWN(so, how, cr));
272 * Get socket options.
274 /*ARGSUSED*/
276 socket_getsockopt(struct sonode *so, int level, int option_name,
277 void *optval, socklen_t *optlenp, int flags, cred_t *cr)
279 return (SOP_GETSOCKOPT(so, level, option_name, optval,
280 optlenp, flags, cr));
284 * Set socket options
287 socket_setsockopt(struct sonode *so, int level, int option_name,
288 const void *optval, t_uscalar_t optlen, cred_t *cr)
290 int val = 1;
291 /* Caller allocates aligned optval, or passes null */
292 ASSERT(((uintptr_t)optval & (sizeof (t_scalar_t) - 1)) == 0);
293 /* If optval is null optlen is 0, and vice-versa */
294 ASSERT(optval != NULL || optlen == 0);
295 ASSERT(optlen != 0 || optval == NULL);
297 if (optval == NULL && optlen == 0)
298 optval = &val;
300 return (SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr));
304 socket_sendmsg(struct sonode *so, struct msghdr *msg, struct uio *uiop,
305 cred_t *cr)
307 int error = 0;
308 ssize_t orig_resid = uiop->uio_resid;
311 * Do not bypass the cache if we are doing a local (AF_UNIX) write.
313 if (so->so_family == AF_UNIX)
314 uiop->uio_extflg |= UIO_COPY_CACHED;
315 else
316 uiop->uio_extflg &= ~UIO_COPY_CACHED;
318 error = SOP_SENDMSG(so, msg, uiop, cr);
319 switch (error) {
320 default:
321 break;
322 case EINTR:
323 case ENOMEM:
324 /* EAGAIN is EWOULDBLOCK */
325 case EWOULDBLOCK:
326 /* We did a partial send */
327 if (uiop->uio_resid != orig_resid)
328 error = 0;
329 break;
330 case EPIPE:
331 if (((so->so_mode & SM_KERNEL) == 0) &&
332 ((msg->msg_flags & MSG_NOSIGNAL) == 0)) {
333 tsignal(curthread, SIGPIPE);
335 break;
338 return (error);
342 socket_sendmblk(struct sonode *so, struct msghdr *msg, int fflag,
343 struct cred *cr, mblk_t **mpp)
345 int error = 0;
347 error = SOP_SENDMBLK(so, msg, fflag, cr, mpp);
348 if (error == EPIPE) {
349 tsignal(curthread, SIGPIPE);
351 return (error);
355 socket_recvmsg(struct sonode *so, struct msghdr *msg, struct uio *uiop,
356 cred_t *cr)
358 int error;
359 ssize_t orig_resid = uiop->uio_resid;
362 * Do not bypass the cache when reading data, as the application
363 * is likely to access the data shortly.
365 uiop->uio_extflg |= UIO_COPY_CACHED;
367 error = SOP_RECVMSG(so, msg, uiop, cr);
369 switch (error) {
370 case EINTR:
371 /* EAGAIN is EWOULDBLOCK */
372 case EWOULDBLOCK:
373 /* We did a partial read */
374 if (uiop->uio_resid != orig_resid)
375 error = 0;
376 break;
377 default:
378 break;
380 return (error);
384 socket_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
385 struct cred *cr, int32_t *rvalp)
387 return (SOP_IOCTL(so, cmd, arg, mode, cr, rvalp));
391 socket_poll(struct sonode *so, short events, int anyyet, short *reventsp,
392 struct pollhead **phpp)
394 return (SOP_POLL(so, events, anyyet, reventsp, phpp));
398 socket_close(struct sonode *so, int flag, struct cred *cr)
400 return (fop_close(SOTOV(so), flag, 1, 0, cr, NULL));
404 socket_close_internal(struct sonode *so, int flag, cred_t *cr)
406 ASSERT(so->so_count == 0);
408 return (SOP_CLOSE(so, flag, cr));
411 void
412 socket_destroy(struct sonode *so)
414 vn_invalid(SOTOV(so));
415 VN_RELE(SOTOV(so));
418 /* ARGSUSED */
419 void
420 socket_destroy_internal(struct sonode *so, cred_t *cr)
422 struct sockparams *sp = so->so_sockparams;
423 ASSERT(so->so_count == 0 && sp != NULL);
425 sp->sp_smod_info->smod_sock_destroy_func(so);
427 SOCKPARAMS_DEC_REF(sp);
431 * TODO Once the common vnode ops is available, then the vnops argument
432 * should be removed.
434 /*ARGSUSED*/
436 sonode_constructor(void *buf, void *cdrarg, int kmflags)
438 struct sonode *so = buf;
439 struct vnode *vp;
441 vp = so->so_vnode = vn_alloc(kmflags);
442 if (vp == NULL) {
443 return (-1);
445 vp->v_data = so;
446 vn_setops(vp, &socket_vnodeops);
448 so->so_priv = NULL;
449 so->so_oobmsg = NULL;
451 so->so_proto_handle = NULL;
453 so->so_peercred = NULL;
455 so->so_rcv_queued = 0;
456 so->so_rcv_q_head = NULL;
457 so->so_rcv_q_last_head = NULL;
458 so->so_rcv_head = NULL;
459 so->so_rcv_last_head = NULL;
460 so->so_rcv_wanted = 0;
461 so->so_rcv_timer_interval = SOCKET_NO_RCVTIMER;
462 so->so_rcv_timer_tid = 0;
463 so->so_rcv_thresh = 0;
465 list_create(&so->so_acceptq_list, sizeof (struct sonode),
466 offsetof(struct sonode, so_acceptq_node));
467 list_create(&so->so_acceptq_defer, sizeof (struct sonode),
468 offsetof(struct sonode, so_acceptq_node));
469 list_link_init(&so->so_acceptq_node);
470 so->so_acceptq_len = 0;
471 so->so_backlog = 0;
472 so->so_listener = NULL;
474 so->so_snd_qfull = B_FALSE;
476 so->so_filter_active = 0;
477 so->so_filter_tx = 0;
478 so->so_filter_defertime = 0;
479 so->so_filter_top = NULL;
480 so->so_filter_bottom = NULL;
482 mutex_init(&so->so_lock, NULL, MUTEX_DEFAULT, NULL);
483 mutex_init(&so->so_acceptq_lock, NULL, MUTEX_DEFAULT, NULL);
484 rw_init(&so->so_fallback_rwlock, NULL, RW_DEFAULT, NULL);
485 cv_init(&so->so_state_cv, NULL, CV_DEFAULT, NULL);
486 cv_init(&so->so_single_cv, NULL, CV_DEFAULT, NULL);
487 cv_init(&so->so_read_cv, NULL, CV_DEFAULT, NULL);
489 cv_init(&so->so_acceptq_cv, NULL, CV_DEFAULT, NULL);
490 cv_init(&so->so_snd_cv, NULL, CV_DEFAULT, NULL);
491 cv_init(&so->so_rcv_cv, NULL, CV_DEFAULT, NULL);
492 cv_init(&so->so_copy_cv, NULL, CV_DEFAULT, NULL);
493 cv_init(&so->so_closing_cv, NULL, CV_DEFAULT, NULL);
495 return (0);
498 /*ARGSUSED*/
499 void
500 sonode_destructor(void *buf, void *cdrarg)
502 struct sonode *so = buf;
503 struct vnode *vp = SOTOV(so);
505 ASSERT(so->so_priv == NULL);
506 ASSERT(so->so_peercred == NULL);
508 ASSERT(so->so_oobmsg == NULL);
510 ASSERT(so->so_rcv_q_head == NULL);
512 list_destroy(&so->so_acceptq_list);
513 list_destroy(&so->so_acceptq_defer);
514 ASSERT(!list_link_active(&so->so_acceptq_node));
515 ASSERT(so->so_listener == NULL);
517 ASSERT(so->so_filter_active == 0);
518 ASSERT(so->so_filter_tx == 0);
519 ASSERT(so->so_filter_top == NULL);
520 ASSERT(so->so_filter_bottom == NULL);
522 ASSERT(vp->v_data == so);
523 ASSERT(vn_matchops(vp, &socket_vnodeops));
525 vn_free(vp);
527 mutex_destroy(&so->so_lock);
528 mutex_destroy(&so->so_acceptq_lock);
529 rw_destroy(&so->so_fallback_rwlock);
531 cv_destroy(&so->so_state_cv);
532 cv_destroy(&so->so_single_cv);
533 cv_destroy(&so->so_read_cv);
534 cv_destroy(&so->so_acceptq_cv);
535 cv_destroy(&so->so_snd_cv);
536 cv_destroy(&so->so_rcv_cv);
537 cv_destroy(&so->so_closing_cv);
540 void
541 sonode_init(struct sonode *so, struct sockparams *sp, int family,
542 int type, int protocol, sonodeops_t *sops)
544 vnode_t *vp;
546 vp = SOTOV(so);
548 so->so_flag = 0;
550 so->so_state = 0;
551 so->so_mode = 0;
553 so->so_count = 0;
555 so->so_family = family;
556 so->so_type = type;
557 so->so_protocol = protocol;
559 SOCK_CONNID_INIT(so->so_proto_connid);
561 so->so_options = 0;
562 so->so_linger.l_onoff = 0;
563 so->so_linger.l_linger = 0;
564 so->so_sndbuf = 0;
565 so->so_error = 0;
566 so->so_rcvtimeo = 0;
567 so->so_sndtimeo = 0;
569 ASSERT(so->so_oobmsg == NULL);
570 so->so_oobmark = 0;
571 so->so_pgrp = 0;
573 ASSERT(so->so_peercred == NULL);
575 so->so_zoneid = getzoneid();
577 so->so_sockparams = sp;
579 so->so_ops = sops;
581 so->so_not_str = (sops != &sotpi_sonodeops);
583 so->so_proto_handle = NULL;
585 so->so_downcalls = NULL;
587 so->so_copyflag = 0;
589 vn_reinit(vp);
590 vp->v_vfsp = rootvfs;
591 vp->v_type = VSOCK;
592 vp->v_rdev = sockdev;
594 so->so_snd_qfull = B_FALSE;
595 so->so_minpsz = 0;
597 so->so_rcv_wakeup = B_FALSE;
598 so->so_snd_wakeup = B_FALSE;
599 so->so_flowctrld = B_FALSE;
601 so->so_pollev = 0;
602 bzero(&so->so_poll_list, sizeof (so->so_poll_list));
603 bzero(&so->so_proto_props, sizeof (struct sock_proto_props));
605 bzero(&(so->so_ksock_callbacks), sizeof (ksocket_callbacks_t));
606 so->so_ksock_cb_arg = NULL;
608 so->so_max_addr_len = sizeof (struct sockaddr_storage);
610 so->so_direct = NULL;
612 vn_exists(vp);
615 void
616 sonode_fini(struct sonode *so)
618 vnode_t *vp;
620 ASSERT(so->so_count == 0);
622 if (so->so_rcv_timer_tid) {
623 ASSERT(MUTEX_NOT_HELD(&so->so_lock));
624 (void) untimeout(so->so_rcv_timer_tid);
625 so->so_rcv_timer_tid = 0;
628 if (so->so_poll_list.ph_list != NULL) {
629 pollwakeup(&so->so_poll_list, POLLERR);
630 pollhead_clean(&so->so_poll_list);
633 if (so->so_direct != NULL)
634 sod_sock_fini(so);
636 vp = SOTOV(so);
637 vn_invalid(vp);
639 if (so->so_peercred != NULL) {
640 crfree(so->so_peercred);
641 so->so_peercred = NULL;
643 /* Detach and destroy filters */
644 if (so->so_filter_top != NULL)
645 sof_sonode_cleanup(so);
647 ASSERT(list_is_empty(&so->so_acceptq_list));
648 ASSERT(list_is_empty(&so->so_acceptq_defer));
649 ASSERT(!list_link_active(&so->so_acceptq_node));
651 ASSERT(so->so_rcv_queued == 0);
652 ASSERT(so->so_rcv_q_head == NULL);
653 ASSERT(so->so_rcv_q_last_head == NULL);
654 ASSERT(so->so_rcv_head == NULL);
655 ASSERT(so->so_rcv_last_head == NULL);