4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2015, Joyent, Inc. All rights reserved.
25 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
28 #include <sys/types.h>
29 #include <sys/t_lock.h>
30 #include <sys/param.h>
31 #include <sys/systm.h>
36 #include <sys/sysmacros.h>
38 #include <sys/vnode.h>
39 #include <sys/debug.h>
40 #include <sys/errno.h>
45 #include <sys/termios.h>
46 #include <sys/stream.h>
47 #include <sys/strsubr.h>
48 #include <sys/strsun.h>
49 #include <sys/esunddi.h>
50 #include <sys/flock.h>
51 #include <sys/modctl.h>
52 #include <sys/cmn_err.h>
53 #include <sys/mkdev.h>
54 #include <sys/pathname.h>
57 #include <sys/fs/snode.h>
58 #include <sys/fs/dv_node.h>
61 #include <sys/socket.h>
62 #include <sys/socketvar.h>
63 #include <netinet/in.h>
65 #include <sys/ucred.h>
67 #include <sys/tiuser.h>
68 #define _SUN_TPI_VERSION 2
69 #include <sys/tihdr.h>
73 #include "sockcommon.h"
74 #include "sockfilter_impl.h"
76 #include "socktpi_impl.h"
80 * Macros that operate on struct cmsghdr.
81 * The CMSG_VALID macro does not assume that the last option buffer is padded.
83 #define CMSG_CONTENT(cmsg) (&((cmsg)[1]))
84 #define CMSG_CONTENTLEN(cmsg) ((cmsg)->cmsg_len - sizeof (struct cmsghdr))
85 #define CMSG_VALID(cmsg, start, end) \
86 (ISALIGNED_cmsghdr(cmsg) && \
87 ((uintptr_t)(cmsg) >= (uintptr_t)(start)) && \
88 ((uintptr_t)(cmsg) < (uintptr_t)(end)) && \
89 ((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) && \
90 ((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end)))
91 #define SO_LOCK_WAKEUP_TIME 3000 /* Wakeup time in milliseconds */
93 dev_t sockdev
; /* For fsid in getattr */
95 struct socklist socklist
;
97 struct kmem_cache
*socket_cache
;
100 * sockconf_lock protects the socket configuration (socket types and
101 * socket filters) which is changed via the sockconfig system call.
103 krwlock_t sockconf_lock
;
105 static int sockfs_update(kstat_t
*, int);
106 static int sockfs_snapshot(kstat_t
*, void *, int);
107 extern smod_info_t
*sotpi_smod_create(void);
109 extern void sendfile_init();
111 extern int modrootloaded
;
113 #define ADRSTRLEN (2 * sizeof (void *) + 1)
115 * kernel structure for passing the sockinfo data back up to the user.
116 * the strings array allows us to convert AF_UNIX addresses into strings
117 * with a common method regardless of which n-bit kernel we're running.
120 struct sockinfo ks_si
;
121 char ks_straddr
[3][ADRSTRLEN
];
125 * Translate from a device pathname (e.g. "/dev/tcp") to a vnode.
126 * Returns with the vnode held.
129 sogetvp(char *devpath
, vnode_t
**vpp
, int uioflag
)
136 ASSERT(uioflag
== UIO_SYSSPACE
|| uioflag
== UIO_USERSPACE
);
139 * Lookup the underlying filesystem vnode.
141 error
= lookupname(devpath
, uioflag
, FOLLOW
, NULLVPP
, &vp
);
145 /* Check that it is the correct vnode */
146 if (vp
->v_type
!= VCHR
) {
152 * If devpath went through devfs, the device should already
153 * be configured. If devpath is a mknod file, however, we
154 * need to make sure the device is properly configured.
155 * To do this, we do something similar to spec_open()
156 * except that we resolve to the minor/leaf level since
157 * we need to return a vnode.
159 csp
= VTOS(VTOS(vp
)->s_commonvp
);
160 if (!(csp
->s_flag
& SDIPSET
)) {
161 char *pathname
= kmem_alloc(MAXPATHLEN
, KM_SLEEP
);
162 error
= ddi_dev_pathname(vp
->v_rdev
, S_IFCHR
, pathname
);
164 error
= devfs_lookupname(pathname
, NULLVPP
, &dvp
);
166 kmem_free(pathname
, MAXPATHLEN
);
169 vp
= dvp
; /* use the devfs vp */
172 /* device is configured at this point */
173 maj
= getmajor(vp
->v_rdev
);
174 if (!STREAMSTAB(maj
)) {
184 * Update the accessed, updated, or changed times in an sonode
185 * with the current time.
187 * Note that both SunOS 4.X and 4.4BSD sockets do not present reasonable
188 * attributes in a fstat call. (They return the current time and 0 for
189 * all timestamps, respectively.) We maintain the current timestamps
190 * here primarily so that should sockmod be popped the resulting
191 * file descriptor will behave like a stream w.r.t. the timestamps.
194 so_update_attrs(struct sonode
*so
, int flag
)
196 time_t now
= gethrestime_sec();
198 if (SOCK_IS_NONSTR(so
))
201 mutex_enter(&so
->so_lock
);
204 SOTOTPI(so
)->sti_atime
= now
;
206 SOTOTPI(so
)->sti_mtime
= now
;
207 mutex_exit(&so
->so_lock
);
210 extern so_create_func_t sock_comm_create_function
;
211 extern so_destroy_func_t sock_comm_destroy_function
;
213 /* yes, we want all defaults */
214 static const struct vfsops sock_vfsops
;
217 * Init function called when sockfs is loaded.
220 sockinit(int fstype
, char *name
)
226 error
= vfs_setfsops(fstype
, &sock_vfsops
);
228 zcmn_err(GLOBAL_ZONEID
, CE_WARN
,
229 "sockinit: bad fstype");
233 socket_cache
= kmem_cache_create("socket_cache",
234 sizeof (struct sonode
), 0, sonode_constructor
,
235 sonode_destructor
, NULL
, NULL
, NULL
, 0);
237 rw_init(&sockconf_lock
, NULL
, RW_DEFAULT
, NULL
);
239 error
= socktpi_init();
252 * Set up the default create and destroy functions
254 sock_comm_create_function
= socket_sonode_create
;
255 sock_comm_destroy_function
= socket_sonode_destroy
;
258 * Build initial list mapping socket parameters to vnode.
261 smod_add(sotpi_smod_create());
266 * If sockets are needed before init runs /sbin/soconfig
267 * it is possible to preload the sockparams list here using
269 * sockconfig(1,2,3, "/dev/tcp", 0);
273 * Create a unique dev_t for use in so_fsid.
276 if ((dev
= getudev()) == (major_t
)-1)
278 sockdev
= makedevice(dev
, 0);
280 mutex_init(&socklist
.sl_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
282 /* Initialize socket filters */
288 (void) vfs_freevfsops_by_type(fstype
);
290 zcmn_err(GLOBAL_ZONEID
, CE_WARN
, err_str
);
295 * Caller must hold the mutex. Used to set SOLOCKED.
298 so_lock_single(struct sonode
*so
)
300 ASSERT(MUTEX_HELD(&so
->so_lock
));
302 while (so
->so_flag
& (SOLOCKED
| SOASYNC_UNBIND
)) {
303 cv_wait_stop(&so
->so_single_cv
, &so
->so_lock
,
304 SO_LOCK_WAKEUP_TIME
);
306 so
->so_flag
|= SOLOCKED
;
310 * Caller must hold the mutex and pass in SOLOCKED or SOASYNC_UNBIND.
311 * Used to clear SOLOCKED or SOASYNC_UNBIND.
314 so_unlock_single(struct sonode
*so
, int flag
)
316 ASSERT(MUTEX_HELD(&so
->so_lock
));
317 ASSERT(flag
& (SOLOCKED
|SOASYNC_UNBIND
));
318 ASSERT((flag
& ~(SOLOCKED
|SOASYNC_UNBIND
)) == 0);
319 ASSERT(so
->so_flag
& flag
);
321 * Process the T_DISCON_IND on sti_discon_ind_mp.
323 * Call to so_drain_discon_ind will result in so_lock
324 * being dropped and re-acquired later.
326 if (!SOCK_IS_NONSTR(so
)) {
327 sotpi_info_t
*sti
= SOTOTPI(so
);
329 if (sti
->sti_discon_ind_mp
!= NULL
)
330 so_drain_discon_ind(so
);
333 cv_signal(&so
->so_single_cv
);
334 so
->so_flag
&= ~flag
;
338 * Caller must hold the mutex. Used to set SOREADLOCKED.
339 * If the caller wants nonblocking behavior it should set fmode.
342 so_lock_read(struct sonode
*so
, int fmode
)
344 ASSERT(MUTEX_HELD(&so
->so_lock
));
346 while (so
->so_flag
& SOREADLOCKED
) {
347 if (fmode
& (FNDELAY
|FNONBLOCK
))
348 return (EWOULDBLOCK
);
349 cv_wait_stop(&so
->so_read_cv
, &so
->so_lock
,
350 SO_LOCK_WAKEUP_TIME
);
352 so
->so_flag
|= SOREADLOCKED
;
357 * Like so_lock_read above but allows signals.
360 so_lock_read_intr(struct sonode
*so
, int fmode
)
362 ASSERT(MUTEX_HELD(&so
->so_lock
));
364 while (so
->so_flag
& SOREADLOCKED
) {
365 if (fmode
& (FNDELAY
|FNONBLOCK
))
366 return (EWOULDBLOCK
);
367 if (!cv_wait_sig(&so
->so_read_cv
, &so
->so_lock
))
370 so
->so_flag
|= SOREADLOCKED
;
375 * Caller must hold the mutex. Used to clear SOREADLOCKED,
376 * set in so_lock_read() or so_lock_read_intr().
379 so_unlock_read(struct sonode
*so
)
381 ASSERT(MUTEX_HELD(&so
->so_lock
));
382 ASSERT(so
->so_flag
& SOREADLOCKED
);
384 cv_signal(&so
->so_read_cv
);
385 so
->so_flag
&= ~SOREADLOCKED
;
389 * Verify that the specified offset falls within the mblk and
390 * that the resulting pointer is aligned.
391 * Returns NULL if not.
394 sogetoff(mblk_t
*mp
, t_uscalar_t offset
,
395 t_uscalar_t length
, uint_t align_size
)
397 uintptr_t ptr1
, ptr2
;
399 ASSERT(mp
&& mp
->b_wptr
>= mp
->b_rptr
);
400 ptr1
= (uintptr_t)mp
->b_rptr
+ offset
;
401 ptr2
= (uintptr_t)ptr1
+ length
;
402 if (ptr1
< (uintptr_t)mp
->b_rptr
|| ptr2
> (uintptr_t)mp
->b_wptr
) {
406 if ((ptr1
& (align_size
- 1)) != 0) {
410 return ((void *)ptr1
);
414 * Return the AF_UNIX underlying filesystem vnode matching a given name.
415 * Makes sure the sending and the destination sonodes are compatible.
416 * The vnode is returned held.
418 * The underlying filesystem VSOCK vnode has a v_stream pointer that
419 * references the actual stream head (hence indirectly the actual sonode).
422 so_ux_lookup(struct sonode
*so
, struct sockaddr_un
*soun
, vnode_t
**vpp
)
424 vnode_t
*vp
; /* Underlying filesystem vnode */
425 vnode_t
*rvp
; /* real vnode */
426 vnode_t
*svp
; /* sockfs vnode */
430 dprintso(so
, 1, ("so_ux_lookup(%p) name <%s>\n", (void *)so
,
433 error
= lookupname(soun
->sun_path
, UIO_SYSSPACE
, FOLLOW
, NULLVPP
, &vp
);
435 eprintsoline(so
, error
);
440 * Traverse lofs mounts get the real vnode
442 if (fop_realvp(vp
, &rvp
, NULL
) == 0) {
443 VN_HOLD(rvp
); /* hold the real vnode */
444 VN_RELE(vp
); /* release hold from lookup */
448 if (vp
->v_type
!= VSOCK
) {
450 eprintsoline(so
, error
);
455 * Check that we have permissions to access the destination
458 if (error
= fop_access(vp
, VREAD
|VWRITE
, 0, CRED(), NULL
)) {
459 eprintsoline(so
, error
);
464 * Check if the remote socket has been closed.
466 * Synchronize with vn_rele_stream by holding v_lock while traversing
467 * v_stream->sd_vnode.
469 mutex_enter(&vp
->v_lock
);
470 if (vp
->v_stream
== NULL
) {
471 mutex_exit(&vp
->v_lock
);
472 if (so
->so_type
== SOCK_DGRAM
)
473 error
= EDESTADDRREQ
;
475 error
= ECONNREFUSED
;
477 eprintsoline(so
, error
);
480 ASSERT(vp
->v_stream
->sd_vnode
);
481 svp
= vp
->v_stream
->sd_vnode
;
483 * holding v_lock on underlying filesystem vnode and acquiring
484 * it on sockfs vnode. Assumes that no code ever attempts to
485 * acquire these locks in the reverse order.
488 mutex_exit(&vp
->v_lock
);
490 if (svp
->v_type
!= VSOCK
) {
492 eprintsoline(so
, error
);
498 if (so
->so_type
!= so2
->so_type
) {
500 eprintsoline(so
, error
);
516 * Verify peer address for connect and sendto/sendmsg.
517 * Since sendto/sendmsg would not get synchronous errors from the transport
518 * provider we have to do these ugly checks in the socket layer to
519 * preserve compatibility with SunOS 4.X.
522 so_addr_verify(struct sonode
*so
, const struct sockaddr
*name
,
527 dprintso(so
, 1, ("so_addr_verify(%p, %p, %d)\n",
528 (void *)so
, (void *)name
, namelen
));
530 ASSERT(name
!= NULL
);
532 family
= so
->so_family
;
535 if (name
->sa_family
!= family
) {
536 eprintsoline(so
, EAFNOSUPPORT
);
537 return (EAFNOSUPPORT
);
539 if (namelen
!= (socklen_t
)sizeof (struct sockaddr_in
)) {
540 eprintsoline(so
, EINVAL
);
546 struct sockaddr_in6
*sin6
;
549 if (name
->sa_family
!= family
) {
550 eprintsoline(so
, EAFNOSUPPORT
);
551 return (EAFNOSUPPORT
);
553 if (namelen
!= (socklen_t
)sizeof (struct sockaddr_in6
)) {
554 eprintsoline(so
, EINVAL
);
558 /* Verify that apps don't forget to clear sin6_scope_id etc */
559 sin6
= (struct sockaddr_in6
*)name
;
560 if (sin6
->sin6_scope_id
!= 0 &&
561 !IN6_IS_ADDR_LINKSCOPE(&sin6
->sin6_addr
)) {
562 zcmn_err(getzoneid(), CE_WARN
,
563 "connect/send* with uninitialized sin6_scope_id "
564 "(%d) on socket. Pid = %d\n",
565 (int)sin6
->sin6_scope_id
, (int)curproc
->p_pid
);
571 if (SOTOTPI(so
)->sti_faddr_noxlate
) {
574 if (namelen
< (socklen_t
)sizeof (short)) {
575 eprintsoline(so
, ENOENT
);
578 if (name
->sa_family
!= family
) {
579 eprintsoline(so
, EAFNOSUPPORT
);
580 return (EAFNOSUPPORT
);
582 /* MAXPATHLEN + soun_family + nul termination */
583 if (namelen
> (socklen_t
)(MAXPATHLEN
+ sizeof (short) + 1)) {
584 eprintsoline(so
, ENAMETOOLONG
);
585 return (ENAMETOOLONG
);
592 * Default is don't do any length or sa_family check
593 * to allow non-sockaddr style addresses.
603 * Translate an AF_UNIX sockaddr_un to the transport internal name.
604 * Assumes caller has called so_addr_verify first. The translated
605 * (internal form) address is stored in sti->sti_ux_taddr.
609 so_ux_addr_xlate(struct sonode
*so
, struct sockaddr
*name
,
610 socklen_t namelen
, void **addrp
, socklen_t
*addrlenp
)
613 struct sockaddr_un
*soun
;
617 sotpi_info_t
*sti
= SOTOTPI(so
);
619 dprintso(so
, 1, ("so_ux_addr_xlate(%p, %p, %d)\n",
620 (void *)so
, (void *)name
, namelen
));
622 ASSERT(name
!= NULL
);
623 ASSERT(so
->so_family
== AF_UNIX
);
624 ASSERT(!sti
->sti_faddr_noxlate
);
625 ASSERT(namelen
>= (socklen_t
)sizeof (short));
626 ASSERT(name
->sa_family
== AF_UNIX
);
627 soun
= (struct sockaddr_un
*)name
;
629 * Lookup vnode for the specified path name and verify that
632 error
= so_ux_lookup(so
, soun
, &vp
);
634 eprintsoline(so
, error
);
638 * Use the address of the peer vnode as the address to send
639 * to. We release the peer vnode here. In case it has been
640 * closed by the time the T_CONN_REQ or T_UNITDATA_REQ reaches the
641 * transport the message will get an error or be dropped.
642 * Note that that soua_vp is never dereferenced; it's just a
643 * convenient value by which we can identify the peer.
645 sti
->sti_ux_taddr
.soua_vp
= vp
;
646 sti
->sti_ux_taddr
.soua_magic
= SOU_MAGIC_EXPLICIT
;
647 addr
= &sti
->sti_ux_taddr
;
648 addrlen
= (socklen_t
)sizeof (sti
->sti_ux_taddr
);
649 dprintso(so
, 1, ("ux_xlate UNIX: addrlen %d, vp %p\n",
650 addrlen
, (void *)vp
));
653 *addrlenp
= (socklen_t
)addrlen
;
658 * Esballoc free function for messages that contain SO_FILEP option.
659 * Decrement the reference count on the file pointers using closef.
662 fdbuf_free(struct fdbuf
*fdbuf
)
667 dprint(1, ("fdbuf_free: %d fds\n", fdbuf
->fd_numfd
));
668 for (i
= 0; i
< fdbuf
->fd_numfd
; i
++) {
670 * We need pointer size alignment for fd_fds. On a LP64
671 * kernel, the required alignment is 8 bytes while
672 * the option headers and values are only 4 bytes
673 * aligned. So its safer to do a bcopy compared to
674 * assigning fdbuf->fd_fds[i] to fp.
676 bcopy((char *)&fdbuf
->fd_fds
[i
], (char *)&fp
, sizeof (fp
));
677 dprint(1, ("fdbuf_free: [%d] = %p\n", i
, (void *)fp
));
680 if (fdbuf
->fd_ebuf
!= NULL
)
681 kmem_free(fdbuf
->fd_ebuf
, fdbuf
->fd_ebuflen
);
682 kmem_free(fdbuf
, fdbuf
->fd_size
);
686 * Allocate an esballoc'ed message for AF_UNIX file descriptor passing.
687 * Waits if memory is not available.
690 fdbuf_allocmsg(int size
, struct fdbuf
*fdbuf
)
695 dprint(1, ("fdbuf_allocmsg: size %d, %d fds\n", size
, fdbuf
->fd_numfd
));
696 buf
= kmem_alloc(size
, KM_SLEEP
);
697 fdbuf
->fd_ebuf
= (caddr_t
)buf
;
698 fdbuf
->fd_ebuflen
= size
;
699 fdbuf
->fd_frtn
.free_func
= fdbuf_free
;
700 fdbuf
->fd_frtn
.free_arg
= (caddr_t
)fdbuf
;
702 mp
= esballoc_wait(buf
, size
, BPRI_MED
, &fdbuf
->fd_frtn
);
703 mp
->b_datap
->db_type
= M_PROTO
;
708 * Extract file descriptors from a fdbuf.
709 * Return list in rights/rightslen.
713 fdbuf_extract(struct fdbuf
*fdbuf
, void *rights
, int rightslen
)
720 dprint(1, ("fdbuf_extract: %d fds, len %d\n",
721 fdbuf
->fd_numfd
, rightslen
));
723 numfd
= fdbuf
->fd_numfd
;
724 ASSERT(rightslen
== numfd
* (int)sizeof (int));
727 * Allocate a file descriptor and increment the f_count.
728 * The latter is needed since we always call fdbuf_free
729 * which performs a closef.
732 for (i
= 0; i
< numfd
; i
++) {
733 if ((fd
= ufalloc(0)) == -1)
736 * We need pointer size alignment for fd_fds. On a LP64
737 * kernel, the required alignment is 8 bytes while
738 * the option headers and values are only 4 bytes
739 * aligned. So its safer to do a bcopy compared to
740 * assigning fdbuf->fd_fds[i] to fp.
742 bcopy((char *)&fdbuf
->fd_fds
[i
], (char *)&fp
, sizeof (fp
));
743 mutex_enter(&fp
->f_tlock
);
745 mutex_exit(&fp
->f_tlock
);
749 audit_fdrecv(fd
, fp
);
750 dprint(1, ("fdbuf_extract: [%d] = %d, %p refcnt %d\n",
751 i
, fd
, (void *)fp
, fp
->f_count
));
757 * Undo whatever partial work the loop above has done.
763 for (j
= 0; j
< i
; j
++) {
765 ("fdbuf_extract: cleanup[%d] = %d\n", j
, *rp
));
766 (void) closeandsetf(*rp
++, NULL
);
774 * Insert file descriptors into an fdbuf.
775 * Returns a kmem_alloc'ed fdbuf. The fdbuf should be freed
776 * by calling fdbuf_free().
779 fdbuf_create(void *rights
, int rightslen
, struct fdbuf
**fdbufp
)
787 dprint(1, ("fdbuf_create: len %d\n", rightslen
));
789 numfd
= rightslen
/ (int)sizeof (int);
791 fdbufsize
= (int)FDBUF_HDRSIZE
+ (numfd
* (int)sizeof (struct file
*));
792 fdbuf
= kmem_alloc(fdbufsize
, KM_SLEEP
);
793 fdbuf
->fd_size
= fdbufsize
;
795 fdbuf
->fd_ebuf
= NULL
;
796 fdbuf
->fd_ebuflen
= 0;
798 for (i
= 0; i
< numfd
; i
++) {
799 if ((fp
= getf(fds
[i
])) == NULL
) {
803 dprint(1, ("fdbuf_create: [%d] = %d, %p refcnt %d\n",
804 i
, fds
[i
], (void *)fp
, fp
->f_count
));
805 mutex_enter(&fp
->f_tlock
);
807 mutex_exit(&fp
->f_tlock
);
809 * The maximum alignment for fdbuf (or any option header
810 * and its value) it 4 bytes. On a LP64 kernel, the alignment
811 * is not sufficient for pointers (fd_fds in this case). Since
812 * we just did a kmem_alloc (we get a double word alignment),
813 * we don't need to do anything on the send side (we loose
814 * the double word alignment because fdbuf goes after an
815 * option header (eg T_unitdata_req) which is only 4 byte
816 * aligned). We take care of this when we extract the file
817 * descriptor in fdbuf_extract or fdbuf_free.
819 fdbuf
->fd_fds
[i
] = fp
;
823 audit_fdsend(fds
[i
], fp
, 0);
830 fdbuf_optlen(int rightslen
)
834 numfd
= rightslen
/ (int)sizeof (int);
836 return ((int)FDBUF_HDRSIZE
+ (numfd
* (int)sizeof (struct file
*)));
840 fdbuf_cmsglen(int fdbuflen
)
842 return (t_uscalar_t
)((fdbuflen
- FDBUF_HDRSIZE
) /
843 (int)sizeof (struct file
*) * (int)sizeof (int));
848 * Return non-zero if the mblk and fdbuf are consistent.
851 fdbuf_verify(mblk_t
*mp
, struct fdbuf
*fdbuf
, int fdbuflen
)
853 if (fdbuflen
>= FDBUF_HDRSIZE
&&
854 fdbuflen
== fdbuf
->fd_size
) {
855 frtn_t
*frp
= mp
->b_datap
->db_frtnp
;
857 * Check that the SO_FILEP portion of the
858 * message has not been modified by
859 * the loopback transport. The sending sockfs generates
860 * a message that is esballoc'ed with the free function
861 * being fdbuf_free() and where free_arg contains the
862 * identical information as the SO_FILEP content.
864 * If any of these constraints are not satisfied we
865 * silently ignore the option.
869 frp
->free_func
== fdbuf_free
&&
870 frp
->free_arg
!= NULL
&&
871 bcmp(frp
->free_arg
, fdbuf
, fdbuflen
) == 0) {
872 dprint(1, ("fdbuf_verify: fdbuf %p len %d\n",
873 (void *)fdbuf
, fdbuflen
));
876 zcmn_err(getzoneid(), CE_WARN
,
877 "sockfs: mismatched fdbuf content (%p)",
882 zcmn_err(getzoneid(), CE_WARN
,
883 "sockfs: mismatched fdbuf len %d, %d\n",
884 fdbuflen
, fdbuf
->fd_size
);
890 * When the file descriptors returned by sorecvmsg can not be passed
891 * to the application this routine will cleanup the references on
892 * the files. Start at startoff bytes into the buffer.
895 close_fds(void *fdbuf
, int fdbuflen
, int startoff
)
897 int *fds
= (int *)fdbuf
;
898 int numfd
= fdbuflen
/ (int)sizeof (int);
901 dprint(1, ("close_fds(%p, %d, %d)\n", fdbuf
, fdbuflen
, startoff
));
903 for (i
= 0; i
< numfd
; i
++) {
906 if (startoff
< (int)sizeof (int)) {
908 * This file descriptor is partially or fully after
912 ("close_fds: cleanup[%d] = %d\n", i
, fds
[i
]));
913 (void) closeandsetf(fds
[i
], NULL
);
915 startoff
-= (int)sizeof (int);
920 * Close all file descriptors contained in the control part starting at
924 so_closefds(void *control
, t_uscalar_t controllen
, int startoff
)
926 struct cmsghdr
*cmsg
;
931 /* Scan control part for file descriptors. */
932 for (cmsg
= (struct cmsghdr
*)control
;
933 CMSG_VALID(cmsg
, control
, (uintptr_t)control
+ controllen
);
934 cmsg
= CMSG_NEXT(cmsg
)) {
935 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
936 cmsg
->cmsg_type
== SCM_RIGHTS
) {
937 close_fds(CMSG_CONTENT(cmsg
),
938 (int)CMSG_CONTENTLEN(cmsg
),
939 startoff
- (int)sizeof (struct cmsghdr
));
941 startoff
-= cmsg
->cmsg_len
;
946 * Returns a pointer/length for the file descriptors contained
947 * in the control buffer. Returns with *fdlenp == -1 if there are no
948 * file descriptor options present. This is different than there being
949 * a zero-length file descriptor option.
950 * Fail if there are multiple SCM_RIGHT cmsgs.
953 so_getfdopt(void *control
, t_uscalar_t controllen
, void **fdsp
, int *fdlenp
)
955 struct cmsghdr
*cmsg
;
959 if (control
== NULL
) {
968 for (cmsg
= (struct cmsghdr
*)control
;
969 CMSG_VALID(cmsg
, control
, (uintptr_t)control
+ controllen
);
970 cmsg
= CMSG_NEXT(cmsg
)) {
971 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
972 cmsg
->cmsg_type
== SCM_RIGHTS
) {
975 fds
= CMSG_CONTENT(cmsg
);
976 fdlen
= (int)CMSG_CONTENTLEN(cmsg
);
977 dprint(1, ("so_getfdopt: new %lu\n",
978 (size_t)CMSG_CONTENTLEN(cmsg
)));
982 dprint(1, ("so_getfdopt: NONE\n"));
991 * Return the length of the options including any file descriptor options.
994 so_optlen(void *control
, t_uscalar_t controllen
)
996 struct cmsghdr
*cmsg
;
997 t_uscalar_t optlen
= 0;
1000 if (control
== NULL
)
1003 for (cmsg
= (struct cmsghdr
*)control
;
1004 CMSG_VALID(cmsg
, control
, (uintptr_t)control
+ controllen
);
1005 cmsg
= CMSG_NEXT(cmsg
)) {
1006 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1007 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1008 len
= fdbuf_optlen((int)CMSG_CONTENTLEN(cmsg
));
1010 len
= (t_uscalar_t
)CMSG_CONTENTLEN(cmsg
);
1012 optlen
+= (t_uscalar_t
)(_TPI_ALIGN_TOPT(len
) +
1013 sizeof (struct T_opthdr
));
1015 dprint(1, ("so_optlen: controllen %d -> optlen %d\n",
1016 controllen
, optlen
));
1021 * Copy options from control to the mblk. Skip any file descriptor options.
1024 so_cmsg2opt(void *control
, t_uscalar_t controllen
, mblk_t
*mp
)
1026 struct T_opthdr toh
;
1027 struct cmsghdr
*cmsg
;
1029 if (control
== NULL
)
1032 for (cmsg
= (struct cmsghdr
*)control
;
1033 CMSG_VALID(cmsg
, control
, (uintptr_t)control
+ controllen
);
1034 cmsg
= CMSG_NEXT(cmsg
)) {
1036 * Note: The caller handles file descriptors prior
1037 * to calling this function.
1041 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1042 cmsg
->cmsg_type
== SCM_RIGHTS
)
1045 len
= (t_uscalar_t
)CMSG_CONTENTLEN(cmsg
);
1046 toh
.level
= cmsg
->cmsg_level
;
1047 toh
.name
= cmsg
->cmsg_type
;
1048 toh
.len
= len
+ (t_uscalar_t
)sizeof (struct T_opthdr
);
1051 soappendmsg(mp
, &toh
, sizeof (toh
));
1052 soappendmsg(mp
, CMSG_CONTENT(cmsg
), len
);
1053 mp
->b_wptr
+= _TPI_ALIGN_TOPT(len
) - len
;
1054 ASSERT(mp
->b_wptr
<= mp
->b_datap
->db_lim
);
1059 * Return the length of the control message derived from the options.
1060 * Exclude SO_SRCADDR and SO_UNIX_CLOSE options. Include SO_FILEP.
1061 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen
1062 * allocates the space that so_opt2cmsg fills. If one changes, the other should
1063 * also be checked for any possible impacts.
1066 so_cmsglen(mblk_t
*mp
, void *opt
, t_uscalar_t optlen
)
1068 t_uscalar_t cmsglen
= 0;
1069 struct T_opthdr
*tohp
;
1071 t_uscalar_t last_roundup
= 0;
1073 ASSERT(__TPI_TOPT_ISALIGNED(opt
));
1075 for (tohp
= (struct T_opthdr
*)opt
;
1076 tohp
&& _TPI_TOPT_VALID(tohp
, opt
, (uintptr_t)opt
+ optlen
);
1077 tohp
= _TPI_TOPT_NEXTHDR(opt
, optlen
, tohp
)) {
1078 dprint(1, ("so_cmsglen: level 0x%x, name %d, len %d\n",
1079 tohp
->level
, tohp
->name
, tohp
->len
));
1080 if (tohp
->level
== SOL_SOCKET
&&
1081 (tohp
->name
== SO_SRCADDR
||
1082 tohp
->name
== SO_UNIX_CLOSE
)) {
1085 if (tohp
->level
== SOL_SOCKET
&& tohp
->name
== SO_FILEP
) {
1086 struct fdbuf
*fdbuf
;
1089 fdbuf
= (struct fdbuf
*)_TPI_TOPT_DATA(tohp
);
1090 fdbuflen
= (int)_TPI_TOPT_DATALEN(tohp
);
1092 if (!fdbuf_verify(mp
, fdbuf
, fdbuflen
))
1094 len
= fdbuf_cmsglen(fdbuflen
);
1095 } else if (tohp
->level
== SOL_SOCKET
&&
1096 tohp
->name
== SCM_TIMESTAMP
) {
1098 if (get_udatamodel() == DATAMODEL_NATIVE
) {
1099 len
= sizeof (struct timeval
);
1101 len
= sizeof (struct timeval32
);
1104 len
= (t_uscalar_t
)_TPI_TOPT_DATALEN(tohp
);
1107 * Exclude roundup for last option to not set
1108 * MSG_CTRUNC when the cmsg fits but the padding doesn't fit.
1110 last_roundup
= (t_uscalar_t
)
1111 (ROUNDUP_cmsglen(len
+ (int)sizeof (struct cmsghdr
)) -
1112 (len
+ (int)sizeof (struct cmsghdr
)));
1113 cmsglen
+= (t_uscalar_t
)(len
+ (int)sizeof (struct cmsghdr
)) +
1116 cmsglen
-= last_roundup
;
1117 dprint(1, ("so_cmsglen: optlen %d -> cmsglen %d\n",
1123 * Copy options from options to the control. Convert SO_FILEP to
1125 * Returns errno or zero.
1126 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen
1127 * allocates the space that so_opt2cmsg fills. If one changes, the other should
1128 * also be checked for any possible impacts.
1131 so_opt2cmsg(mblk_t
*mp
, void *opt
, t_uscalar_t optlen
, void *control
,
1132 t_uscalar_t controllen
)
1134 struct T_opthdr
*tohp
;
1135 struct cmsghdr
*cmsg
;
1136 struct fdbuf
*fdbuf
;
1139 #if defined(DEBUG) || defined(__lint)
1140 struct cmsghdr
*cend
= (struct cmsghdr
*)
1141 (((uint8_t *)control
) + ROUNDUP_cmsglen(controllen
));
1143 cmsg
= (struct cmsghdr
*)control
;
1145 ASSERT(__TPI_TOPT_ISALIGNED(opt
));
1147 for (tohp
= (struct T_opthdr
*)opt
;
1148 tohp
&& _TPI_TOPT_VALID(tohp
, opt
, (uintptr_t)opt
+ optlen
);
1149 tohp
= _TPI_TOPT_NEXTHDR(opt
, optlen
, tohp
)) {
1150 dprint(1, ("so_opt2cmsg: level 0x%x, name %d, len %d\n",
1151 tohp
->level
, tohp
->name
, tohp
->len
));
1153 if (tohp
->level
== SOL_SOCKET
&&
1154 (tohp
->name
== SO_SRCADDR
||
1155 tohp
->name
== SO_UNIX_CLOSE
)) {
1158 ASSERT((uintptr_t)cmsg
<= (uintptr_t)control
+ controllen
);
1159 if (tohp
->level
== SOL_SOCKET
&& tohp
->name
== SO_FILEP
) {
1160 fdbuf
= (struct fdbuf
*)_TPI_TOPT_DATA(tohp
);
1161 fdbuflen
= (int)_TPI_TOPT_DATALEN(tohp
);
1163 if (!fdbuf_verify(mp
, fdbuf
, fdbuflen
))
1167 fdlen
= (int)fdbuf_cmsglen(
1168 (int)_TPI_TOPT_DATALEN(tohp
));
1170 cmsg
->cmsg_level
= tohp
->level
;
1171 cmsg
->cmsg_type
= SCM_RIGHTS
;
1172 cmsg
->cmsg_len
= (socklen_t
)(fdlen
+
1173 sizeof (struct cmsghdr
));
1175 error
= fdbuf_extract(fdbuf
,
1176 CMSG_CONTENT(cmsg
), fdlen
);
1179 } else if (tohp
->level
== SOL_SOCKET
&&
1180 tohp
->name
== SCM_TIMESTAMP
) {
1181 timestruc_t
*timestamp
;
1183 cmsg
->cmsg_level
= tohp
->level
;
1184 cmsg
->cmsg_type
= tohp
->name
;
1187 (timestruc_t
*)P2ROUNDUP((intptr_t)&tohp
[1],
1190 if (get_udatamodel() == DATAMODEL_NATIVE
) {
1193 cmsg
->cmsg_len
= sizeof (struct timeval
) +
1194 sizeof (struct cmsghdr
);
1195 tv
.tv_sec
= timestamp
->tv_sec
;
1196 tv
.tv_usec
= timestamp
->tv_nsec
/
1197 (NANOSEC
/ MICROSEC
);
1199 * on LP64 systems, the struct timeval in
1200 * the destination will not be 8-byte aligned,
1201 * so use bcopy to avoid alignment trouble
1203 bcopy(&tv
, CMSG_CONTENT(cmsg
), sizeof (tv
));
1205 struct timeval32
*time32
;
1207 cmsg
->cmsg_len
= sizeof (struct timeval32
) +
1208 sizeof (struct cmsghdr
);
1209 time32
= (struct timeval32
*)CMSG_CONTENT(cmsg
);
1210 time32
->tv_sec
= (time32_t
)timestamp
->tv_sec
;
1212 (int32_t)(timestamp
->tv_nsec
/
1213 (NANOSEC
/ MICROSEC
));
1217 cmsg
->cmsg_level
= tohp
->level
;
1218 cmsg
->cmsg_type
= tohp
->name
;
1219 cmsg
->cmsg_len
= (socklen_t
)(_TPI_TOPT_DATALEN(tohp
) +
1220 sizeof (struct cmsghdr
));
1222 /* copy content to control data part */
1223 bcopy(&tohp
[1], CMSG_CONTENT(cmsg
),
1224 CMSG_CONTENTLEN(cmsg
));
1226 /* move to next CMSG structure! */
1227 cmsg
= CMSG_NEXT(cmsg
);
1229 dprint(1, ("so_opt2cmsg: buf %p len %d; cend %p; final cmsg %p\n",
1230 control
, controllen
, (void *)cend
, (void *)cmsg
));
1231 ASSERT(cmsg
<= cend
);
1236 * Extract the SO_SRCADDR option value if present.
1239 so_getopt_srcaddr(void *opt
, t_uscalar_t optlen
, void **srcp
,
1240 t_uscalar_t
*srclenp
)
1242 struct T_opthdr
*tohp
;
1244 ASSERT(__TPI_TOPT_ISALIGNED(opt
));
1246 ASSERT(srcp
!= NULL
&& srclenp
!= NULL
);
1250 for (tohp
= (struct T_opthdr
*)opt
;
1251 tohp
&& _TPI_TOPT_VALID(tohp
, opt
, (uintptr_t)opt
+ optlen
);
1252 tohp
= _TPI_TOPT_NEXTHDR(opt
, optlen
, tohp
)) {
1253 dprint(1, ("so_getopt_srcaddr: level 0x%x, name %d, len %d\n",
1254 tohp
->level
, tohp
->name
, tohp
->len
));
1255 if (tohp
->level
== SOL_SOCKET
&&
1256 tohp
->name
== SO_SRCADDR
) {
1257 *srcp
= _TPI_TOPT_DATA(tohp
);
1258 *srclenp
= (t_uscalar_t
)_TPI_TOPT_DATALEN(tohp
);
1264 * Verify if the SO_UNIX_CLOSE option is present.
1267 so_getopt_unix_close(void *opt
, t_uscalar_t optlen
)
1269 struct T_opthdr
*tohp
;
1271 ASSERT(__TPI_TOPT_ISALIGNED(opt
));
1273 for (tohp
= (struct T_opthdr
*)opt
;
1274 tohp
&& _TPI_TOPT_VALID(tohp
, opt
, (uintptr_t)opt
+ optlen
);
1275 tohp
= _TPI_TOPT_NEXTHDR(opt
, optlen
, tohp
)) {
1277 ("so_getopt_unix_close: level 0x%x, name %d, len %d\n",
1278 tohp
->level
, tohp
->name
, tohp
->len
));
1279 if (tohp
->level
== SOL_SOCKET
&&
1280 tohp
->name
== SO_UNIX_CLOSE
)
1287 * Allocate an M_PROTO message.
1289 * If allocation fails the behavior depends on sleepflg:
1290 * _ALLOC_NOSLEEP fail immediately
1291 * _ALLOC_INTR sleep for memory until a signal is caught
1292 * _ALLOC_SLEEP sleep forever. Don't return NULL.
1295 soallocproto(size_t size
, int sleepflg
, cred_t
*cr
)
1299 /* Round up size for reuse */
1300 size
= MAX(size
, 64);
1302 mp
= allocb_cred(size
, cr
, curproc
->p_pid
);
1304 mp
= allocb(size
, BPRI_MED
);
1307 int error
; /* Dummy - error not returned to caller */
1312 mp
= allocb_cred_wait(size
, STR_NOSIG
, &error
,
1313 cr
, curproc
->p_pid
);
1315 mp
= allocb_wait(size
, BPRI_MED
, STR_NOSIG
,
1322 mp
= allocb_cred_wait(size
, 0, &error
, cr
,
1325 mp
= allocb_wait(size
, BPRI_MED
, 0, &error
);
1328 /* Caught signal while sleeping for memory */
1329 eprintline(ENOBUFS
);
1333 case _ALLOC_NOSLEEP
:
1335 eprintline(ENOBUFS
);
1339 DB_TYPE(mp
) = M_PROTO
;
1344 * Allocate an M_PROTO message with a single component.
1345 * len is the length of buf. size is the amount to allocate.
1347 * buf can be NULL with a non-zero len.
1348 * This results in a bzero'ed chunk being placed the message.
1351 soallocproto1(const void *buf
, ssize_t len
, ssize_t size
, int sleepflg
,
1359 ASSERT(size
>= len
);
1360 /* Round up size for reuse */
1361 size
= MAX(size
, 64);
1362 mp
= soallocproto(size
, sleepflg
, cr
);
1365 mp
->b_datap
->db_type
= M_PROTO
;
1368 bcopy(buf
, mp
->b_wptr
, len
);
1370 bzero(mp
->b_wptr
, len
);
1377 * Append buf/len to mp.
1378 * The caller has to ensure that there is enough room in the mblk.
1380 * buf can be NULL with a non-zero len.
1381 * This results in a bzero'ed chunk being placed the message.
1384 soappendmsg(mblk_t
*mp
, const void *buf
, ssize_t len
)
1389 /* Assert for room left */
1390 ASSERT(mp
->b_datap
->db_lim
- mp
->b_wptr
>= len
);
1392 bcopy(buf
, mp
->b_wptr
, len
);
1394 bzero(mp
->b_wptr
, len
);
1400 * Create a message using two kernel buffers.
1401 * If size is set that will determine the allocation size (e.g. for future
1402 * soappendmsg calls). If size is zero it is derived from the buffer
1406 soallocproto2(const void *buf1
, ssize_t len1
, const void *buf2
, ssize_t len2
,
1407 ssize_t size
, int sleepflg
, cred_t
*cr
)
1413 ASSERT(size
>= len1
+ len2
);
1415 mp
= soallocproto1(buf1
, len1
, size
, sleepflg
, cr
);
1417 soappendmsg(mp
, buf2
, len2
);
1422 * Create a message using three kernel buffers.
1423 * If size is set that will determine the allocation size (for future
1424 * soappendmsg calls). If size is zero it is derived from the buffer
1428 soallocproto3(const void *buf1
, ssize_t len1
, const void *buf2
, ssize_t len2
,
1429 const void *buf3
, ssize_t len3
, ssize_t size
, int sleepflg
, cred_t
*cr
)
1434 size
= len1
+ len2
+len3
;
1435 ASSERT(size
>= len1
+ len2
+ len3
);
1437 mp
= soallocproto1(buf1
, len1
, size
, sleepflg
, cr
);
1439 soappendmsg(mp
, buf2
, len2
);
1440 soappendmsg(mp
, buf3
, len3
);
1447 pr_state(uint_t state
, uint_t mode
)
1449 static char buf
[1024];
1452 if (state
& SS_ISCONNECTED
)
1453 (void) strcat(buf
, "ISCONNECTED ");
1454 if (state
& SS_ISCONNECTING
)
1455 (void) strcat(buf
, "ISCONNECTING ");
1456 if (state
& SS_ISDISCONNECTING
)
1457 (void) strcat(buf
, "ISDISCONNECTING ");
1458 if (state
& SS_CANTSENDMORE
)
1459 (void) strcat(buf
, "CANTSENDMORE ");
1461 if (state
& SS_CANTRCVMORE
)
1462 (void) strcat(buf
, "CANTRCVMORE ");
1463 if (state
& SS_ISBOUND
)
1464 (void) strcat(buf
, "ISBOUND ");
1465 if (state
& SS_NDELAY
)
1466 (void) strcat(buf
, "NDELAY ");
1467 if (state
& SS_NONBLOCK
)
1468 (void) strcat(buf
, "NONBLOCK ");
1470 if (state
& SS_ASYNC
)
1471 (void) strcat(buf
, "ASYNC ");
1472 if (state
& SS_ACCEPTCONN
)
1473 (void) strcat(buf
, "ACCEPTCONN ");
1474 if (state
& SS_SAVEDEOR
)
1475 (void) strcat(buf
, "SAVEDEOR ");
1477 if (state
& SS_RCVATMARK
)
1478 (void) strcat(buf
, "RCVATMARK ");
1479 if (state
& SS_OOBPEND
)
1480 (void) strcat(buf
, "OOBPEND ");
1481 if (state
& SS_HAVEOOBDATA
)
1482 (void) strcat(buf
, "HAVEOOBDATA ");
1483 if (state
& SS_HADOOBDATA
)
1484 (void) strcat(buf
, "HADOOBDATA ");
1487 (void) strcat(buf
, "PRIV ");
1488 if (mode
& SM_ATOMIC
)
1489 (void) strcat(buf
, "ATOMIC ");
1491 (void) strcat(buf
, "ADDR ");
1492 if (mode
& SM_CONNREQUIRED
)
1493 (void) strcat(buf
, "CONNREQUIRED ");
1495 if (mode
& SM_FDPASSING
)
1496 (void) strcat(buf
, "FDPASSING ");
1497 if (mode
& SM_EXDATA
)
1498 (void) strcat(buf
, "EXDATA ");
1499 if (mode
& SM_OPTDATA
)
1500 (void) strcat(buf
, "OPTDATA ");
1501 if (mode
& SM_BYTESTREAM
)
1502 (void) strcat(buf
, "BYTESTREAM ");
1507 pr_addr(int family
, struct sockaddr
*addr
, t_uscalar_t addrlen
)
1509 static char buf
[1024];
1511 if (addr
== NULL
|| addrlen
== 0) {
1512 (void) sprintf(buf
, "(len %d) %p", addrlen
, (void *)addr
);
1517 struct sockaddr_in sin
;
1519 bcopy(addr
, &sin
, sizeof (sin
));
1521 (void) sprintf(buf
, "(len %d) %x/%d",
1522 addrlen
, ntohl(sin
.sin_addr
.s_addr
), ntohs(sin
.sin_port
));
1526 struct sockaddr_in6 sin6
;
1527 uint16_t *piece
= (uint16_t *)&sin6
.sin6_addr
;
1529 bcopy((char *)addr
, (char *)&sin6
, sizeof (sin6
));
1530 (void) sprintf(buf
, "(len %d) %x:%x:%x:%x:%x:%x:%x:%x/%d",
1532 ntohs(piece
[0]), ntohs(piece
[1]),
1533 ntohs(piece
[2]), ntohs(piece
[3]),
1534 ntohs(piece
[4]), ntohs(piece
[5]),
1535 ntohs(piece
[6]), ntohs(piece
[7]),
1536 ntohs(sin6
.sin6_port
));
1540 struct sockaddr_un
*soun
= (struct sockaddr_un
*)addr
;
1542 (void) sprintf(buf
, "(len %d) %s", addrlen
,
1543 (soun
== NULL
) ? "(none)" : soun
->sun_path
);
1547 (void) sprintf(buf
, "(unknown af %d)", family
);
1553 /* The logical equivalence operator (a if-and-only-if b) */
1554 #define EQUIVALENT(a, b) (((a) && (b)) || (!(a) && (!(b))))
1557 * Verify limitations and invariants on oob state.
1558 * Return 1 if OK, otherwise 0 so that it can be used as
1559 * ASSERT(verify_oobstate(so));
1562 so_verify_oobstate(struct sonode
*so
)
1566 ASSERT(MUTEX_HELD(&so
->so_lock
));
1569 * The possible state combinations are:
1572 * SS_OOBPEND|SS_HAVEOOBDATA
1573 * SS_OOBPEND|SS_HADOOBDATA
1576 switch (so
->so_state
& (SS_OOBPEND
|SS_HAVEOOBDATA
|SS_HADOOBDATA
)) {
1579 case SS_OOBPEND
|SS_HAVEOOBDATA
:
1580 case SS_OOBPEND
|SS_HADOOBDATA
:
1584 printf("Bad oob state 1 (%p): state %s\n",
1585 (void *)so
, pr_state(so
->so_state
, so
->so_mode
));
1589 /* SS_RCVATMARK should only be set when SS_OOBPEND is set */
1590 if ((so
->so_state
& (SS_RCVATMARK
|SS_OOBPEND
)) == SS_RCVATMARK
) {
1591 printf("Bad oob state 2 (%p): state %s\n",
1592 (void *)so
, pr_state(so
->so_state
, so
->so_mode
));
1597 * (havemark != 0 or SS_RCVATMARK) iff SS_OOBPEND
1598 * For TPI, the presence of a "mark" is indicated by sti_oobsigcnt.
1600 havemark
= (SOCK_IS_NONSTR(so
)) ? so
->so_oobmark
> 0 :
1601 SOTOTPI(so
)->sti_oobsigcnt
> 0;
1603 if (!EQUIVALENT(havemark
|| (so
->so_state
& SS_RCVATMARK
),
1604 so
->so_state
& SS_OOBPEND
)) {
1605 printf("Bad oob state 3 (%p): state %s\n",
1606 (void *)so
, pr_state(so
->so_state
, so
->so_mode
));
1611 * Unless SO_OOBINLINE we have so_oobmsg != NULL iff SS_HAVEOOBDATA
1613 if (!(so
->so_options
& SO_OOBINLINE
) &&
1614 !EQUIVALENT(so
->so_oobmsg
!= NULL
, so
->so_state
& SS_HAVEOOBDATA
)) {
1615 printf("Bad oob state 4 (%p): state %s\n",
1616 (void *)so
, pr_state(so
->so_state
, so
->so_mode
));
1620 if (!SOCK_IS_NONSTR(so
) &&
1621 SOTOTPI(so
)->sti_oobsigcnt
< SOTOTPI(so
)->sti_oobcnt
) {
1622 printf("Bad oob state 5 (%p): counts %d/%d state %s\n",
1623 (void *)so
, SOTOTPI(so
)->sti_oobsigcnt
,
1624 SOTOTPI(so
)->sti_oobcnt
,
1625 pr_state(so
->so_state
, so
->so_mode
));
1634 /* initialize sockfs zone specific kstat related items */
1636 sock_kstat_init(zoneid_t zoneid
)
1640 ksp
= kstat_create_zone("sockfs", 0, "sock_unix_list", "misc",
1641 KSTAT_TYPE_RAW
, 0, KSTAT_FLAG_VAR_SIZE
|KSTAT_FLAG_VIRTUAL
, zoneid
);
1644 ksp
->ks_update
= sockfs_update
;
1645 ksp
->ks_snapshot
= sockfs_snapshot
;
1646 ksp
->ks_lock
= &socklist
.sl_lock
;
1647 ksp
->ks_private
= (void *)(uintptr_t)zoneid
;
1654 /* tear down sockfs zone specific kstat related items */
1657 sock_kstat_fini(zoneid_t zoneid
, void *arg
)
1659 kstat_t
*ksp
= (kstat_t
*)arg
;
1662 ASSERT(zoneid
== (zoneid_t
)(uintptr_t)ksp
->ks_private
);
1669 * Note that nactive is going to be different for each zone.
1670 * This means we require kstat to call sockfs_update and then sockfs_snapshot
1671 * for the same zone, or sockfs_snapshot will be taken into the wrong size
1672 * buffer. This is safe, but if the buffer is too small, user will not be
1673 * given details of all sockets. However, as this kstat has a ks_lock, kstat
1674 * driver will keep it locked between the update and the snapshot, so no
1675 * other process (zone) can currently get inbetween resulting in a wrong size
1676 * buffer allocation.
1679 sockfs_update(kstat_t
*ksp
, int rw
)
1681 uint_t nactive
= 0; /* # of active AF_UNIX sockets */
1682 struct sonode
*so
; /* current sonode on socklist */
1683 zoneid_t myzoneid
= (zoneid_t
)(uintptr_t)ksp
->ks_private
;
1685 ASSERT((zoneid_t
)(uintptr_t)ksp
->ks_private
== getzoneid());
1687 if (rw
== KSTAT_WRITE
) { /* bounce all writes */
1691 for (so
= socklist
.sl_list
; so
!= NULL
; so
= SOTOTPI(so
)->sti_next_so
) {
1692 if (so
->so_count
!= 0 && so
->so_zoneid
== myzoneid
) {
1696 ksp
->ks_ndata
= nactive
;
1697 ksp
->ks_data_size
= nactive
* sizeof (struct k_sockinfo
);
1703 sockfs_snapshot(kstat_t
*ksp
, void *buf
, int rw
)
1705 int ns
; /* # of sonodes we've copied */
1706 struct sonode
*so
; /* current sonode on socklist */
1707 struct k_sockinfo
*pksi
; /* where we put sockinfo data */
1708 t_uscalar_t sn_len
; /* soa_len */
1709 zoneid_t myzoneid
= (zoneid_t
)(uintptr_t)ksp
->ks_private
;
1712 ASSERT((zoneid_t
)(uintptr_t)ksp
->ks_private
== getzoneid());
1714 ksp
->ks_snaptime
= gethrtime();
1716 if (rw
== KSTAT_WRITE
) { /* bounce all writes */
1721 * for each sonode on the socklist, we massage the important
1722 * info into buf, in k_sockinfo format.
1724 pksi
= (struct k_sockinfo
*)buf
;
1726 for (so
= socklist
.sl_list
; so
!= NULL
; so
= SOTOTPI(so
)->sti_next_so
) {
1727 /* only stuff active sonodes and the same zone: */
1728 if (so
->so_count
== 0 || so
->so_zoneid
!= myzoneid
) {
1733 * If the sonode was activated between the update and the
1734 * snapshot, we're done - as this is only a snapshot.
1736 if ((caddr_t
)(pksi
) >= (caddr_t
)buf
+ ksp
->ks_data_size
) {
1741 /* copy important info into buf: */
1742 pksi
->ks_si
.si_size
= sizeof (struct k_sockinfo
);
1743 pksi
->ks_si
.si_family
= so
->so_family
;
1744 pksi
->ks_si
.si_type
= so
->so_type
;
1745 pksi
->ks_si
.si_flag
= so
->so_flag
;
1746 pksi
->ks_si
.si_state
= so
->so_state
;
1747 pksi
->ks_si
.si_serv_type
= sti
->sti_serv_type
;
1748 pksi
->ks_si
.si_ux_laddr_sou_magic
=
1749 sti
->sti_ux_laddr
.soua_magic
;
1750 pksi
->ks_si
.si_ux_faddr_sou_magic
=
1751 sti
->sti_ux_faddr
.soua_magic
;
1752 pksi
->ks_si
.si_laddr_soa_len
= sti
->sti_laddr
.soa_len
;
1753 pksi
->ks_si
.si_faddr_soa_len
= sti
->sti_faddr
.soa_len
;
1754 pksi
->ks_si
.si_szoneid
= so
->so_zoneid
;
1755 pksi
->ks_si
.si_faddr_noxlate
= sti
->sti_faddr_noxlate
;
1757 mutex_enter(&so
->so_lock
);
1759 if (sti
->sti_laddr_sa
!= NULL
) {
1760 ASSERT(sti
->sti_laddr_sa
->sa_data
!= NULL
);
1761 sn_len
= sti
->sti_laddr_len
;
1762 ASSERT(sn_len
<= sizeof (short) +
1763 sizeof (pksi
->ks_si
.si_laddr_sun_path
));
1765 pksi
->ks_si
.si_laddr_family
=
1766 sti
->sti_laddr_sa
->sa_family
;
1768 /* AF_UNIX socket names are NULL terminated */
1769 (void) strncpy(pksi
->ks_si
.si_laddr_sun_path
,
1770 sti
->sti_laddr_sa
->sa_data
,
1771 sizeof (pksi
->ks_si
.si_laddr_sun_path
));
1772 sn_len
= strlen(pksi
->ks_si
.si_laddr_sun_path
);
1774 pksi
->ks_si
.si_laddr_sun_path
[sn_len
] = 0;
1777 if (sti
->sti_faddr_sa
!= NULL
) {
1778 ASSERT(sti
->sti_faddr_sa
->sa_data
!= NULL
);
1779 sn_len
= sti
->sti_faddr_len
;
1780 ASSERT(sn_len
<= sizeof (short) +
1781 sizeof (pksi
->ks_si
.si_faddr_sun_path
));
1783 pksi
->ks_si
.si_faddr_family
=
1784 sti
->sti_faddr_sa
->sa_family
;
1786 (void) strncpy(pksi
->ks_si
.si_faddr_sun_path
,
1787 sti
->sti_faddr_sa
->sa_data
,
1788 sizeof (pksi
->ks_si
.si_faddr_sun_path
));
1789 sn_len
= strlen(pksi
->ks_si
.si_faddr_sun_path
);
1791 pksi
->ks_si
.si_faddr_sun_path
[sn_len
] = 0;
1794 mutex_exit(&so
->so_lock
);
1796 (void) sprintf(pksi
->ks_straddr
[0], "%p", (void *)so
);
1797 (void) sprintf(pksi
->ks_straddr
[1], "%p",
1798 (void *)sti
->sti_ux_laddr
.soua_vp
);
1799 (void) sprintf(pksi
->ks_straddr
[2], "%p",
1800 (void *)sti
->sti_ux_faddr
.soua_vp
);
1811 soreadfile(file_t
*fp
, uchar_t
*buf
, uoff_t fileoff
, int *err
, size_t size
)
1814 struct iovec aiov
[1];
1815 register vnode_t
*vp
;
1826 aiov
[0].iov_base
= (caddr_t
)buf
;
1827 aiov
[0].iov_len
= size
;
1829 cnt
= (ssize_t
)size
;
1830 (void) fop_rwlock(vp
, rwflag
, NULL
);
1832 auio
.uio_loffset
= fileoff
;
1833 auio
.uio_iov
= aiov
;
1834 auio
.uio_iovcnt
= iovcnt
;
1835 auio
.uio_resid
= cnt
;
1836 auio
.uio_segflg
= UIO_SYSSPACE
;
1837 auio
.uio_llimit
= MAXOFFSET_T
;
1838 auio
.uio_fmode
= fflag
;
1839 auio
.uio_extflg
= UIO_COPY_CACHED
;
1841 ioflag
= auio
.uio_fmode
& (FAPPEND
|FSYNC
|FDSYNC
|FRSYNC
);
1843 /* If read sync is not asked for, filter sync flags */
1844 if ((ioflag
& FRSYNC
) == 0)
1845 ioflag
&= ~(FSYNC
|FDSYNC
);
1846 error
= fop_read(vp
, &auio
, ioflag
, fp
->f_cred
, NULL
);
1847 cnt
-= auio
.uio_resid
;
1849 fop_rwunlock(vp
, rwflag
, NULL
);
1851 if (error
== EINTR
&& cnt
!= 0)
1864 so_copyin(const void *from
, void *to
, size_t size
, int fromkernel
)
1867 bcopy(from
, to
, size
);
1870 return (xcopyin(from
, to
, size
));
1874 so_copyout(const void *from
, void *to
, size_t size
, int tokernel
)
1877 bcopy(from
, to
, size
);
1880 return (xcopyout(from
, to
, size
));