1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * NET4: Implementation of BSD Unix domain sockets.
5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
8 * Linus Torvalds : Assorted bug cures.
9 * Niibe Yutaka : async I/O support.
10 * Carsten Paeth : PF_UNIX check, address fixes.
11 * Alan Cox : Limit size of allocated blocks.
12 * Alan Cox : Fixed the stupid socketpair bug.
13 * Alan Cox : BSD compatibility fine tuning.
14 * Alan Cox : Fixed a bug in connect when interrupted.
15 * Alan Cox : Sorted out a proper draft version of
16 * file descriptor passing hacked up from
18 * Marty Leisner : Fixes to fd passing
19 * Nick Nevin : recvmsg bugfix.
20 * Alan Cox : Started proper garbage collector
21 * Heiko EiBfeldt : Missing verify_area check
22 * Alan Cox : Started POSIXisms
23 * Andreas Schwab : Replace inode by dentry for proper
25 * Kirk Petersen : Made this a module
26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
29 * by above two patches.
30 * Andrea Arcangeli : If possible we block in connect(2)
31 * if the max backlog of the listen socket
32 * is been reached. This won't break
33 * old apps and it will avoid huge amount
34 * of socks hashed (this for unix_gc()
35 * performances reasons).
36 * Security fix that limits the max
37 * number of socks to 2*max_files and
38 * the number of skb queueable in the
40 * Artur Skawina : Hash function optimizations
41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
42 * Malcolm Beattie : Set peercred for socketpair
43 * Michal Ostrowski : Module initialization cleanup.
44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
45 * the core infrastructure is doing that
46 * for all net proto families now (2.5.69+)
48 * Known differences from reference BSD that was tested:
51 * ECONNREFUSED is not returned from one end of a connected() socket to the
52 * other the moment one end closes.
53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
54 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
56 * accept() returns a path name even if the connecting socket has closed
57 * in the meantime (BSD loses the path and gives up).
58 * accept() returns 0 length path for an unbound connector. BSD returns 16
59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
61 * BSD af_unix apparently has connect forgetting to block properly.
62 * (need to check this with the POSIX spec in detail)
64 * Differences from 2.0.0-11-... (ANK)
65 * Bug fixes and improvements.
66 * - client shutdown killed server socket.
67 * - removed all useless cli/sti pairs.
69 * Semantic changes/extensions.
70 * - generic control message passing.
71 * - SCM_CREDENTIALS control message.
72 * - "Abstract" (not FS based) socket bindings.
73 * Abstract names are sequences of bytes (not zero terminated)
74 * started by 0, so that this name space does not intersect
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
80 #include <linux/module.h>
81 #include <linux/kernel.h>
82 #include <linux/signal.h>
83 #include <linux/sched/signal.h>
84 #include <linux/errno.h>
85 #include <linux/string.h>
86 #include <linux/stat.h>
87 #include <linux/dcache.h>
88 #include <linux/namei.h>
89 #include <linux/socket.h>
91 #include <linux/fcntl.h>
92 #include <linux/termios.h>
93 #include <linux/sockios.h>
94 #include <linux/net.h>
97 #include <linux/slab.h>
98 #include <linux/uaccess.h>
99 #include <linux/skbuff.h>
100 #include <linux/netdevice.h>
101 #include <net/net_namespace.h>
102 #include <net/sock.h>
103 #include <net/tcp_states.h>
104 #include <net/af_unix.h>
105 #include <linux/proc_fs.h>
106 #include <linux/seq_file.h>
108 #include <linux/init.h>
109 #include <linux/poll.h>
110 #include <linux/rtnetlink.h>
111 #include <linux/mount.h>
112 #include <net/checksum.h>
113 #include <linux/security.h>
114 #include <linux/freezer.h>
115 #include <linux/file.h>
119 struct hlist_head unix_socket_table
[2 * UNIX_HASH_SIZE
];
120 EXPORT_SYMBOL_GPL(unix_socket_table
);
121 DEFINE_SPINLOCK(unix_table_lock
);
122 EXPORT_SYMBOL_GPL(unix_table_lock
);
123 static atomic_long_t unix_nr_socks
;
126 static struct hlist_head
*unix_sockets_unbound(void *addr
)
128 unsigned long hash
= (unsigned long)addr
;
132 hash
%= UNIX_HASH_SIZE
;
133 return &unix_socket_table
[UNIX_HASH_SIZE
+ hash
];
136 #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
138 #ifdef CONFIG_SECURITY_NETWORK
139 static void unix_get_secdata(struct scm_cookie
*scm
, struct sk_buff
*skb
)
141 UNIXCB(skb
).secid
= scm
->secid
;
144 static inline void unix_set_secdata(struct scm_cookie
*scm
, struct sk_buff
*skb
)
146 scm
->secid
= UNIXCB(skb
).secid
;
149 static inline bool unix_secdata_eq(struct scm_cookie
*scm
, struct sk_buff
*skb
)
151 return (scm
->secid
== UNIXCB(skb
).secid
);
154 static inline void unix_get_secdata(struct scm_cookie
*scm
, struct sk_buff
*skb
)
157 static inline void unix_set_secdata(struct scm_cookie
*scm
, struct sk_buff
*skb
)
160 static inline bool unix_secdata_eq(struct scm_cookie
*scm
, struct sk_buff
*skb
)
164 #endif /* CONFIG_SECURITY_NETWORK */
167 * SMP locking strategy:
168 * hash table is protected with spinlock unix_table_lock
169 * each socket state is protected by separate spin lock.
172 static inline unsigned int unix_hash_fold(__wsum n
)
174 unsigned int hash
= (__force
unsigned int)csum_fold(n
);
177 return hash
&(UNIX_HASH_SIZE
-1);
180 #define unix_peer(sk) (unix_sk(sk)->peer)
182 static inline int unix_our_peer(struct sock
*sk
, struct sock
*osk
)
184 return unix_peer(osk
) == sk
;
187 static inline int unix_may_send(struct sock
*sk
, struct sock
*osk
)
189 return unix_peer(osk
) == NULL
|| unix_our_peer(sk
, osk
);
192 static inline int unix_recvq_full(struct sock
const *sk
)
194 return skb_queue_len(&sk
->sk_receive_queue
) > sk
->sk_max_ack_backlog
;
197 struct sock
*unix_peer_get(struct sock
*s
)
205 unix_state_unlock(s
);
208 EXPORT_SYMBOL_GPL(unix_peer_get
);
210 static inline void unix_release_addr(struct unix_address
*addr
)
212 if (refcount_dec_and_test(&addr
->refcnt
))
217 * Check unix socket name:
218 * - should be not zero length.
219 * - if started by not zero, should be NULL terminated (FS object)
220 * - if started by zero, it is abstract name.
223 static int unix_mkname(struct sockaddr_un
*sunaddr
, int len
, unsigned int *hashp
)
227 if (len
<= sizeof(short) || len
> sizeof(*sunaddr
))
229 if (!sunaddr
|| sunaddr
->sun_family
!= AF_UNIX
)
231 if (sunaddr
->sun_path
[0]) {
233 * This may look like an off by one error but it is a bit more
234 * subtle. 108 is the longest valid AF_UNIX path for a binding.
235 * sun_path[108] doesn't as such exist. However in kernel space
236 * we are guaranteed that it is a valid memory location in our
237 * kernel address buffer.
239 ((char *)sunaddr
)[len
] = 0;
240 len
= strlen(sunaddr
->sun_path
)+1+sizeof(short);
244 *hashp
= unix_hash_fold(csum_partial(sunaddr
, len
, 0));
248 static void __unix_remove_socket(struct sock
*sk
)
250 sk_del_node_init(sk
);
253 static void __unix_insert_socket(struct hlist_head
*list
, struct sock
*sk
)
255 WARN_ON(!sk_unhashed(sk
));
256 sk_add_node(sk
, list
);
259 static inline void unix_remove_socket(struct sock
*sk
)
261 spin_lock(&unix_table_lock
);
262 __unix_remove_socket(sk
);
263 spin_unlock(&unix_table_lock
);
266 static inline void unix_insert_socket(struct hlist_head
*list
, struct sock
*sk
)
268 spin_lock(&unix_table_lock
);
269 __unix_insert_socket(list
, sk
);
270 spin_unlock(&unix_table_lock
);
273 static struct sock
*__unix_find_socket_byname(struct net
*net
,
274 struct sockaddr_un
*sunname
,
275 int len
, int type
, unsigned int hash
)
279 sk_for_each(s
, &unix_socket_table
[hash
^ type
]) {
280 struct unix_sock
*u
= unix_sk(s
);
282 if (!net_eq(sock_net(s
), net
))
285 if (u
->addr
->len
== len
&&
286 !memcmp(u
->addr
->name
, sunname
, len
))
294 static inline struct sock
*unix_find_socket_byname(struct net
*net
,
295 struct sockaddr_un
*sunname
,
301 spin_lock(&unix_table_lock
);
302 s
= __unix_find_socket_byname(net
, sunname
, len
, type
, hash
);
305 spin_unlock(&unix_table_lock
);
309 static struct sock
*unix_find_socket_byinode(struct inode
*i
)
313 spin_lock(&unix_table_lock
);
315 &unix_socket_table
[i
->i_ino
& (UNIX_HASH_SIZE
- 1)]) {
316 struct dentry
*dentry
= unix_sk(s
)->path
.dentry
;
318 if (dentry
&& d_backing_inode(dentry
) == i
) {
325 spin_unlock(&unix_table_lock
);
329 /* Support code for asymmetrically connected dgram sockets
331 * If a datagram socket is connected to a socket not itself connected
332 * to the first socket (eg, /dev/log), clients may only enqueue more
333 * messages if the present receive queue of the server socket is not
334 * "too large". This means there's a second writeability condition
335 * poll and sendmsg need to test. The dgram recv code will do a wake
336 * up on the peer_wait wait queue of a socket upon reception of a
337 * datagram which needs to be propagated to sleeping would-be writers
338 * since these might not have sent anything so far. This can't be
339 * accomplished via poll_wait because the lifetime of the server
340 * socket might be less than that of its clients if these break their
341 * association with it or if the server socket is closed while clients
342 * are still connected to it and there's no way to inform "a polling
343 * implementation" that it should let go of a certain wait queue
345 * In order to propagate a wake up, a wait_queue_entry_t of the client
346 * socket is enqueued on the peer_wait queue of the server socket
347 * whose wake function does a wake_up on the ordinary client socket
348 * wait queue. This connection is established whenever a write (or
349 * poll for write) hit the flow control condition and broken when the
350 * association to the server socket is dissolved or after a wake up
354 static int unix_dgram_peer_wake_relay(wait_queue_entry_t
*q
, unsigned mode
, int flags
,
358 wait_queue_head_t
*u_sleep
;
360 u
= container_of(q
, struct unix_sock
, peer_wake
);
362 __remove_wait_queue(&unix_sk(u
->peer_wake
.private)->peer_wait
,
364 u
->peer_wake
.private = NULL
;
366 /* relaying can only happen while the wq still exists */
367 u_sleep
= sk_sleep(&u
->sk
);
369 wake_up_interruptible_poll(u_sleep
, key_to_poll(key
));
374 static int unix_dgram_peer_wake_connect(struct sock
*sk
, struct sock
*other
)
376 struct unix_sock
*u
, *u_other
;
380 u_other
= unix_sk(other
);
382 spin_lock(&u_other
->peer_wait
.lock
);
384 if (!u
->peer_wake
.private) {
385 u
->peer_wake
.private = other
;
386 __add_wait_queue(&u_other
->peer_wait
, &u
->peer_wake
);
391 spin_unlock(&u_other
->peer_wait
.lock
);
395 static void unix_dgram_peer_wake_disconnect(struct sock
*sk
,
398 struct unix_sock
*u
, *u_other
;
401 u_other
= unix_sk(other
);
402 spin_lock(&u_other
->peer_wait
.lock
);
404 if (u
->peer_wake
.private == other
) {
405 __remove_wait_queue(&u_other
->peer_wait
, &u
->peer_wake
);
406 u
->peer_wake
.private = NULL
;
409 spin_unlock(&u_other
->peer_wait
.lock
);
412 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock
*sk
,
415 unix_dgram_peer_wake_disconnect(sk
, other
);
416 wake_up_interruptible_poll(sk_sleep(sk
),
423 * - unix_peer(sk) == other
424 * - association is stable
426 static int unix_dgram_peer_wake_me(struct sock
*sk
, struct sock
*other
)
430 connected
= unix_dgram_peer_wake_connect(sk
, other
);
432 /* If other is SOCK_DEAD, we want to make sure we signal
433 * POLLOUT, such that a subsequent write() can get a
434 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
435 * to other and its full, we will hang waiting for POLLOUT.
437 if (unix_recvq_full(other
) && !sock_flag(other
, SOCK_DEAD
))
441 unix_dgram_peer_wake_disconnect(sk
, other
);
446 static int unix_writable(const struct sock
*sk
)
448 return sk
->sk_state
!= TCP_LISTEN
&&
449 (refcount_read(&sk
->sk_wmem_alloc
) << 2) <= sk
->sk_sndbuf
;
452 static void unix_write_space(struct sock
*sk
)
454 struct socket_wq
*wq
;
457 if (unix_writable(sk
)) {
458 wq
= rcu_dereference(sk
->sk_wq
);
459 if (skwq_has_sleeper(wq
))
460 wake_up_interruptible_sync_poll(&wq
->wait
,
461 EPOLLOUT
| EPOLLWRNORM
| EPOLLWRBAND
);
462 sk_wake_async(sk
, SOCK_WAKE_SPACE
, POLL_OUT
);
467 /* When dgram socket disconnects (or changes its peer), we clear its receive
468 * queue of packets arrived from previous peer. First, it allows to do
469 * flow control based only on wmem_alloc; second, sk connected to peer
470 * may receive messages only from that peer. */
471 static void unix_dgram_disconnected(struct sock
*sk
, struct sock
*other
)
473 if (!skb_queue_empty(&sk
->sk_receive_queue
)) {
474 skb_queue_purge(&sk
->sk_receive_queue
);
475 wake_up_interruptible_all(&unix_sk(sk
)->peer_wait
);
477 /* If one link of bidirectional dgram pipe is disconnected,
478 * we signal error. Messages are lost. Do not make this,
479 * when peer was not connected to us.
481 if (!sock_flag(other
, SOCK_DEAD
) && unix_peer(other
) == sk
) {
482 other
->sk_err
= ECONNRESET
;
483 other
->sk_error_report(other
);
488 static void unix_sock_destructor(struct sock
*sk
)
490 struct unix_sock
*u
= unix_sk(sk
);
492 skb_queue_purge(&sk
->sk_receive_queue
);
494 WARN_ON(refcount_read(&sk
->sk_wmem_alloc
));
495 WARN_ON(!sk_unhashed(sk
));
496 WARN_ON(sk
->sk_socket
);
497 if (!sock_flag(sk
, SOCK_DEAD
)) {
498 pr_info("Attempt to release alive unix socket: %p\n", sk
);
503 unix_release_addr(u
->addr
);
505 atomic_long_dec(&unix_nr_socks
);
507 sock_prot_inuse_add(sock_net(sk
), sk
->sk_prot
, -1);
509 #ifdef UNIX_REFCNT_DEBUG
510 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk
,
511 atomic_long_read(&unix_nr_socks
));
515 static void unix_release_sock(struct sock
*sk
, int embrion
)
517 struct unix_sock
*u
= unix_sk(sk
);
523 unix_remove_socket(sk
);
528 sk
->sk_shutdown
= SHUTDOWN_MASK
;
530 u
->path
.dentry
= NULL
;
532 state
= sk
->sk_state
;
533 sk
->sk_state
= TCP_CLOSE
;
534 unix_state_unlock(sk
);
536 wake_up_interruptible_all(&u
->peer_wait
);
538 skpair
= unix_peer(sk
);
540 if (skpair
!= NULL
) {
541 if (sk
->sk_type
== SOCK_STREAM
|| sk
->sk_type
== SOCK_SEQPACKET
) {
542 unix_state_lock(skpair
);
544 skpair
->sk_shutdown
= SHUTDOWN_MASK
;
545 if (!skb_queue_empty(&sk
->sk_receive_queue
) || embrion
)
546 skpair
->sk_err
= ECONNRESET
;
547 unix_state_unlock(skpair
);
548 skpair
->sk_state_change(skpair
);
549 sk_wake_async(skpair
, SOCK_WAKE_WAITD
, POLL_HUP
);
552 unix_dgram_peer_wake_disconnect(sk
, skpair
);
553 sock_put(skpair
); /* It may now die */
554 unix_peer(sk
) = NULL
;
557 /* Try to flush out this socket. Throw out buffers at least */
559 while ((skb
= skb_dequeue(&sk
->sk_receive_queue
)) != NULL
) {
560 if (state
== TCP_LISTEN
)
561 unix_release_sock(skb
->sk
, 1);
562 /* passed fds are erased in the kfree_skb hook */
563 UNIXCB(skb
).consumed
= skb
->len
;
572 /* ---- Socket is dead now and most probably destroyed ---- */
575 * Fixme: BSD difference: In BSD all sockets connected to us get
576 * ECONNRESET and we die on the spot. In Linux we behave
577 * like files and pipes do and wait for the last
580 * Can't we simply set sock->err?
582 * What the above comment does talk about? --ANK(980817)
585 if (unix_tot_inflight
)
586 unix_gc(); /* Garbage collect fds */
589 static void init_peercred(struct sock
*sk
)
591 put_pid(sk
->sk_peer_pid
);
592 if (sk
->sk_peer_cred
)
593 put_cred(sk
->sk_peer_cred
);
594 sk
->sk_peer_pid
= get_pid(task_tgid(current
));
595 sk
->sk_peer_cred
= get_current_cred();
598 static void copy_peercred(struct sock
*sk
, struct sock
*peersk
)
600 put_pid(sk
->sk_peer_pid
);
601 if (sk
->sk_peer_cred
)
602 put_cred(sk
->sk_peer_cred
);
603 sk
->sk_peer_pid
= get_pid(peersk
->sk_peer_pid
);
604 sk
->sk_peer_cred
= get_cred(peersk
->sk_peer_cred
);
607 static int unix_listen(struct socket
*sock
, int backlog
)
610 struct sock
*sk
= sock
->sk
;
611 struct unix_sock
*u
= unix_sk(sk
);
612 struct pid
*old_pid
= NULL
;
615 if (sock
->type
!= SOCK_STREAM
&& sock
->type
!= SOCK_SEQPACKET
)
616 goto out
; /* Only stream/seqpacket sockets accept */
619 goto out
; /* No listens on an unbound socket */
621 if (sk
->sk_state
!= TCP_CLOSE
&& sk
->sk_state
!= TCP_LISTEN
)
623 if (backlog
> sk
->sk_max_ack_backlog
)
624 wake_up_interruptible_all(&u
->peer_wait
);
625 sk
->sk_max_ack_backlog
= backlog
;
626 sk
->sk_state
= TCP_LISTEN
;
627 /* set credentials so connect can copy them */
632 unix_state_unlock(sk
);
638 static int unix_release(struct socket
*);
639 static int unix_bind(struct socket
*, struct sockaddr
*, int);
640 static int unix_stream_connect(struct socket
*, struct sockaddr
*,
641 int addr_len
, int flags
);
642 static int unix_socketpair(struct socket
*, struct socket
*);
643 static int unix_accept(struct socket
*, struct socket
*, int, bool);
644 static int unix_getname(struct socket
*, struct sockaddr
*, int);
645 static __poll_t
unix_poll(struct file
*, struct socket
*, poll_table
*);
646 static __poll_t
unix_dgram_poll(struct file
*, struct socket
*,
648 static int unix_ioctl(struct socket
*, unsigned int, unsigned long);
650 static int unix_compat_ioctl(struct socket
*sock
, unsigned int cmd
, unsigned long arg
);
652 static int unix_shutdown(struct socket
*, int);
653 static int unix_stream_sendmsg(struct socket
*, struct msghdr
*, size_t);
654 static int unix_stream_recvmsg(struct socket
*, struct msghdr
*, size_t, int);
655 static ssize_t
unix_stream_sendpage(struct socket
*, struct page
*, int offset
,
656 size_t size
, int flags
);
657 static ssize_t
unix_stream_splice_read(struct socket
*, loff_t
*ppos
,
658 struct pipe_inode_info
*, size_t size
,
660 static int unix_dgram_sendmsg(struct socket
*, struct msghdr
*, size_t);
661 static int unix_dgram_recvmsg(struct socket
*, struct msghdr
*, size_t, int);
662 static int unix_dgram_connect(struct socket
*, struct sockaddr
*,
664 static int unix_seqpacket_sendmsg(struct socket
*, struct msghdr
*, size_t);
665 static int unix_seqpacket_recvmsg(struct socket
*, struct msghdr
*, size_t,
668 static int unix_set_peek_off(struct sock
*sk
, int val
)
670 struct unix_sock
*u
= unix_sk(sk
);
672 if (mutex_lock_interruptible(&u
->iolock
))
675 sk
->sk_peek_off
= val
;
676 mutex_unlock(&u
->iolock
);
682 static const struct proto_ops unix_stream_ops
= {
684 .owner
= THIS_MODULE
,
685 .release
= unix_release
,
687 .connect
= unix_stream_connect
,
688 .socketpair
= unix_socketpair
,
689 .accept
= unix_accept
,
690 .getname
= unix_getname
,
694 .compat_ioctl
= unix_compat_ioctl
,
696 .listen
= unix_listen
,
697 .shutdown
= unix_shutdown
,
698 .setsockopt
= sock_no_setsockopt
,
699 .getsockopt
= sock_no_getsockopt
,
700 .sendmsg
= unix_stream_sendmsg
,
701 .recvmsg
= unix_stream_recvmsg
,
702 .mmap
= sock_no_mmap
,
703 .sendpage
= unix_stream_sendpage
,
704 .splice_read
= unix_stream_splice_read
,
705 .set_peek_off
= unix_set_peek_off
,
708 static const struct proto_ops unix_dgram_ops
= {
710 .owner
= THIS_MODULE
,
711 .release
= unix_release
,
713 .connect
= unix_dgram_connect
,
714 .socketpair
= unix_socketpair
,
715 .accept
= sock_no_accept
,
716 .getname
= unix_getname
,
717 .poll
= unix_dgram_poll
,
720 .compat_ioctl
= unix_compat_ioctl
,
722 .listen
= sock_no_listen
,
723 .shutdown
= unix_shutdown
,
724 .setsockopt
= sock_no_setsockopt
,
725 .getsockopt
= sock_no_getsockopt
,
726 .sendmsg
= unix_dgram_sendmsg
,
727 .recvmsg
= unix_dgram_recvmsg
,
728 .mmap
= sock_no_mmap
,
729 .sendpage
= sock_no_sendpage
,
730 .set_peek_off
= unix_set_peek_off
,
733 static const struct proto_ops unix_seqpacket_ops
= {
735 .owner
= THIS_MODULE
,
736 .release
= unix_release
,
738 .connect
= unix_stream_connect
,
739 .socketpair
= unix_socketpair
,
740 .accept
= unix_accept
,
741 .getname
= unix_getname
,
742 .poll
= unix_dgram_poll
,
745 .compat_ioctl
= unix_compat_ioctl
,
747 .listen
= unix_listen
,
748 .shutdown
= unix_shutdown
,
749 .setsockopt
= sock_no_setsockopt
,
750 .getsockopt
= sock_no_getsockopt
,
751 .sendmsg
= unix_seqpacket_sendmsg
,
752 .recvmsg
= unix_seqpacket_recvmsg
,
753 .mmap
= sock_no_mmap
,
754 .sendpage
= sock_no_sendpage
,
755 .set_peek_off
= unix_set_peek_off
,
758 static struct proto unix_proto
= {
760 .owner
= THIS_MODULE
,
761 .obj_size
= sizeof(struct unix_sock
),
764 static struct sock
*unix_create1(struct net
*net
, struct socket
*sock
, int kern
)
766 struct sock
*sk
= NULL
;
769 atomic_long_inc(&unix_nr_socks
);
770 if (atomic_long_read(&unix_nr_socks
) > 2 * get_max_files())
773 sk
= sk_alloc(net
, PF_UNIX
, GFP_KERNEL
, &unix_proto
, kern
);
777 sock_init_data(sock
, sk
);
779 sk
->sk_allocation
= GFP_KERNEL_ACCOUNT
;
780 sk
->sk_write_space
= unix_write_space
;
781 sk
->sk_max_ack_backlog
= net
->unx
.sysctl_max_dgram_qlen
;
782 sk
->sk_destruct
= unix_sock_destructor
;
784 u
->path
.dentry
= NULL
;
786 spin_lock_init(&u
->lock
);
787 atomic_long_set(&u
->inflight
, 0);
788 INIT_LIST_HEAD(&u
->link
);
789 mutex_init(&u
->iolock
); /* single task reading lock */
790 mutex_init(&u
->bindlock
); /* single task binding lock */
791 init_waitqueue_head(&u
->peer_wait
);
792 init_waitqueue_func_entry(&u
->peer_wake
, unix_dgram_peer_wake_relay
);
793 unix_insert_socket(unix_sockets_unbound(sk
), sk
);
796 atomic_long_dec(&unix_nr_socks
);
799 sock_prot_inuse_add(sock_net(sk
), sk
->sk_prot
, 1);
805 static int unix_create(struct net
*net
, struct socket
*sock
, int protocol
,
808 if (protocol
&& protocol
!= PF_UNIX
)
809 return -EPROTONOSUPPORT
;
811 sock
->state
= SS_UNCONNECTED
;
813 switch (sock
->type
) {
815 sock
->ops
= &unix_stream_ops
;
818 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
822 sock
->type
= SOCK_DGRAM
;
825 sock
->ops
= &unix_dgram_ops
;
828 sock
->ops
= &unix_seqpacket_ops
;
831 return -ESOCKTNOSUPPORT
;
834 return unix_create1(net
, sock
, kern
) ? 0 : -ENOMEM
;
837 static int unix_release(struct socket
*sock
)
839 struct sock
*sk
= sock
->sk
;
844 unix_release_sock(sk
, 0);
850 static int unix_autobind(struct socket
*sock
)
852 struct sock
*sk
= sock
->sk
;
853 struct net
*net
= sock_net(sk
);
854 struct unix_sock
*u
= unix_sk(sk
);
855 static u32 ordernum
= 1;
856 struct unix_address
*addr
;
858 unsigned int retries
= 0;
860 err
= mutex_lock_interruptible(&u
->bindlock
);
869 addr
= kzalloc(sizeof(*addr
) + sizeof(short) + 16, GFP_KERNEL
);
873 addr
->name
->sun_family
= AF_UNIX
;
874 refcount_set(&addr
->refcnt
, 1);
877 addr
->len
= sprintf(addr
->name
->sun_path
+1, "%05x", ordernum
) + 1 + sizeof(short);
878 addr
->hash
= unix_hash_fold(csum_partial(addr
->name
, addr
->len
, 0));
880 spin_lock(&unix_table_lock
);
881 ordernum
= (ordernum
+1)&0xFFFFF;
883 if (__unix_find_socket_byname(net
, addr
->name
, addr
->len
, sock
->type
,
885 spin_unlock(&unix_table_lock
);
887 * __unix_find_socket_byname() may take long time if many names
888 * are already in use.
891 /* Give up if all names seems to be in use. */
892 if (retries
++ == 0xFFFFF) {
899 addr
->hash
^= sk
->sk_type
;
901 __unix_remove_socket(sk
);
902 smp_store_release(&u
->addr
, addr
);
903 __unix_insert_socket(&unix_socket_table
[addr
->hash
], sk
);
904 spin_unlock(&unix_table_lock
);
907 out
: mutex_unlock(&u
->bindlock
);
911 static struct sock
*unix_find_other(struct net
*net
,
912 struct sockaddr_un
*sunname
, int len
,
913 int type
, unsigned int hash
, int *error
)
919 if (sunname
->sun_path
[0]) {
921 err
= kern_path(sunname
->sun_path
, LOOKUP_FOLLOW
, &path
);
924 inode
= d_backing_inode(path
.dentry
);
925 err
= inode_permission(inode
, MAY_WRITE
);
930 if (!S_ISSOCK(inode
->i_mode
))
932 u
= unix_find_socket_byinode(inode
);
936 if (u
->sk_type
== type
)
942 if (u
->sk_type
!= type
) {
948 u
= unix_find_socket_byname(net
, sunname
, len
, type
, hash
);
950 struct dentry
*dentry
;
951 dentry
= unix_sk(u
)->path
.dentry
;
953 touch_atime(&unix_sk(u
)->path
);
966 static int unix_mknod(const char *sun_path
, umode_t mode
, struct path
*res
)
968 struct dentry
*dentry
;
972 * Get the parent directory, calculate the hash for last
975 dentry
= kern_path_create(AT_FDCWD
, sun_path
, &path
, 0);
976 err
= PTR_ERR(dentry
);
981 * All right, let's create it.
983 err
= security_path_mknod(&path
, dentry
, mode
, 0);
985 err
= vfs_mknod(d_inode(path
.dentry
), dentry
, mode
, 0);
987 res
->mnt
= mntget(path
.mnt
);
988 res
->dentry
= dget(dentry
);
991 done_path_create(&path
, dentry
);
995 static int unix_bind(struct socket
*sock
, struct sockaddr
*uaddr
, int addr_len
)
997 struct sock
*sk
= sock
->sk
;
998 struct net
*net
= sock_net(sk
);
999 struct unix_sock
*u
= unix_sk(sk
);
1000 struct sockaddr_un
*sunaddr
= (struct sockaddr_un
*)uaddr
;
1001 char *sun_path
= sunaddr
->sun_path
;
1004 struct unix_address
*addr
;
1005 struct hlist_head
*list
;
1006 struct path path
= { };
1009 if (addr_len
< offsetofend(struct sockaddr_un
, sun_family
) ||
1010 sunaddr
->sun_family
!= AF_UNIX
)
1013 if (addr_len
== sizeof(short)) {
1014 err
= unix_autobind(sock
);
1018 err
= unix_mkname(sunaddr
, addr_len
, &hash
);
1024 umode_t mode
= S_IFSOCK
|
1025 (SOCK_INODE(sock
)->i_mode
& ~current_umask());
1026 err
= unix_mknod(sun_path
, mode
, &path
);
1034 err
= mutex_lock_interruptible(&u
->bindlock
);
1043 addr
= kmalloc(sizeof(*addr
)+addr_len
, GFP_KERNEL
);
1047 memcpy(addr
->name
, sunaddr
, addr_len
);
1048 addr
->len
= addr_len
;
1049 addr
->hash
= hash
^ sk
->sk_type
;
1050 refcount_set(&addr
->refcnt
, 1);
1053 addr
->hash
= UNIX_HASH_SIZE
;
1054 hash
= d_backing_inode(path
.dentry
)->i_ino
& (UNIX_HASH_SIZE
- 1);
1055 spin_lock(&unix_table_lock
);
1057 list
= &unix_socket_table
[hash
];
1059 spin_lock(&unix_table_lock
);
1061 if (__unix_find_socket_byname(net
, sunaddr
, addr_len
,
1062 sk
->sk_type
, hash
)) {
1063 unix_release_addr(addr
);
1067 list
= &unix_socket_table
[addr
->hash
];
1071 __unix_remove_socket(sk
);
1072 smp_store_release(&u
->addr
, addr
);
1073 __unix_insert_socket(list
, sk
);
1076 spin_unlock(&unix_table_lock
);
1078 mutex_unlock(&u
->bindlock
);
1086 static void unix_state_double_lock(struct sock
*sk1
, struct sock
*sk2
)
1088 if (unlikely(sk1
== sk2
) || !sk2
) {
1089 unix_state_lock(sk1
);
1093 unix_state_lock(sk1
);
1094 unix_state_lock_nested(sk2
);
1096 unix_state_lock(sk2
);
1097 unix_state_lock_nested(sk1
);
1101 static void unix_state_double_unlock(struct sock
*sk1
, struct sock
*sk2
)
1103 if (unlikely(sk1
== sk2
) || !sk2
) {
1104 unix_state_unlock(sk1
);
1107 unix_state_unlock(sk1
);
1108 unix_state_unlock(sk2
);
1111 static int unix_dgram_connect(struct socket
*sock
, struct sockaddr
*addr
,
1112 int alen
, int flags
)
1114 struct sock
*sk
= sock
->sk
;
1115 struct net
*net
= sock_net(sk
);
1116 struct sockaddr_un
*sunaddr
= (struct sockaddr_un
*)addr
;
1122 if (alen
< offsetofend(struct sockaddr
, sa_family
))
1125 if (addr
->sa_family
!= AF_UNSPEC
) {
1126 err
= unix_mkname(sunaddr
, alen
, &hash
);
1131 if (test_bit(SOCK_PASSCRED
, &sock
->flags
) &&
1132 !unix_sk(sk
)->addr
&& (err
= unix_autobind(sock
)) != 0)
1136 other
= unix_find_other(net
, sunaddr
, alen
, sock
->type
, hash
, &err
);
1140 unix_state_double_lock(sk
, other
);
1142 /* Apparently VFS overslept socket death. Retry. */
1143 if (sock_flag(other
, SOCK_DEAD
)) {
1144 unix_state_double_unlock(sk
, other
);
1150 if (!unix_may_send(sk
, other
))
1153 err
= security_unix_may_send(sk
->sk_socket
, other
->sk_socket
);
1159 * 1003.1g breaking connected state with AF_UNSPEC
1162 unix_state_double_lock(sk
, other
);
1166 * If it was connected, reconnect.
1168 if (unix_peer(sk
)) {
1169 struct sock
*old_peer
= unix_peer(sk
);
1170 unix_peer(sk
) = other
;
1171 unix_dgram_peer_wake_disconnect_wakeup(sk
, old_peer
);
1173 unix_state_double_unlock(sk
, other
);
1175 if (other
!= old_peer
)
1176 unix_dgram_disconnected(sk
, old_peer
);
1179 unix_peer(sk
) = other
;
1180 unix_state_double_unlock(sk
, other
);
1185 unix_state_double_unlock(sk
, other
);
1191 static long unix_wait_for_peer(struct sock
*other
, long timeo
)
1193 struct unix_sock
*u
= unix_sk(other
);
1197 prepare_to_wait_exclusive(&u
->peer_wait
, &wait
, TASK_INTERRUPTIBLE
);
1199 sched
= !sock_flag(other
, SOCK_DEAD
) &&
1200 !(other
->sk_shutdown
& RCV_SHUTDOWN
) &&
1201 unix_recvq_full(other
);
1203 unix_state_unlock(other
);
1206 timeo
= schedule_timeout(timeo
);
1208 finish_wait(&u
->peer_wait
, &wait
);
1212 static int unix_stream_connect(struct socket
*sock
, struct sockaddr
*uaddr
,
1213 int addr_len
, int flags
)
1215 struct sockaddr_un
*sunaddr
= (struct sockaddr_un
*)uaddr
;
1216 struct sock
*sk
= sock
->sk
;
1217 struct net
*net
= sock_net(sk
);
1218 struct unix_sock
*u
= unix_sk(sk
), *newu
, *otheru
;
1219 struct sock
*newsk
= NULL
;
1220 struct sock
*other
= NULL
;
1221 struct sk_buff
*skb
= NULL
;
1227 err
= unix_mkname(sunaddr
, addr_len
, &hash
);
1232 if (test_bit(SOCK_PASSCRED
, &sock
->flags
) && !u
->addr
&&
1233 (err
= unix_autobind(sock
)) != 0)
1236 timeo
= sock_sndtimeo(sk
, flags
& O_NONBLOCK
);
1238 /* First of all allocate resources.
1239 If we will make it after state is locked,
1240 we will have to recheck all again in any case.
1245 /* create new sock for complete connection */
1246 newsk
= unix_create1(sock_net(sk
), NULL
, 0);
1250 /* Allocate skb for sending to listening sock */
1251 skb
= sock_wmalloc(newsk
, 1, 0, GFP_KERNEL
);
1256 /* Find listening sock. */
1257 other
= unix_find_other(net
, sunaddr
, addr_len
, sk
->sk_type
, hash
, &err
);
1261 /* Latch state of peer */
1262 unix_state_lock(other
);
1264 /* Apparently VFS overslept socket death. Retry. */
1265 if (sock_flag(other
, SOCK_DEAD
)) {
1266 unix_state_unlock(other
);
1271 err
= -ECONNREFUSED
;
1272 if (other
->sk_state
!= TCP_LISTEN
)
1274 if (other
->sk_shutdown
& RCV_SHUTDOWN
)
1277 if (unix_recvq_full(other
)) {
1282 timeo
= unix_wait_for_peer(other
, timeo
);
1284 err
= sock_intr_errno(timeo
);
1285 if (signal_pending(current
))
1293 It is tricky place. We need to grab our state lock and cannot
1294 drop lock on peer. It is dangerous because deadlock is
1295 possible. Connect to self case and simultaneous
1296 attempt to connect are eliminated by checking socket
1297 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1298 check this before attempt to grab lock.
1300 Well, and we have to recheck the state after socket locked.
1306 /* This is ok... continue with connect */
1308 case TCP_ESTABLISHED
:
1309 /* Socket is already connected */
1317 unix_state_lock_nested(sk
);
1319 if (sk
->sk_state
!= st
) {
1320 unix_state_unlock(sk
);
1321 unix_state_unlock(other
);
1326 err
= security_unix_stream_connect(sk
, other
, newsk
);
1328 unix_state_unlock(sk
);
1332 /* The way is open! Fastly set all the necessary fields... */
1335 unix_peer(newsk
) = sk
;
1336 newsk
->sk_state
= TCP_ESTABLISHED
;
1337 newsk
->sk_type
= sk
->sk_type
;
1338 init_peercred(newsk
);
1339 newu
= unix_sk(newsk
);
1340 RCU_INIT_POINTER(newsk
->sk_wq
, &newu
->peer_wq
);
1341 otheru
= unix_sk(other
);
1343 /* copy address information from listening to new sock
1345 * The contents of *(otheru->addr) and otheru->path
1346 * are seen fully set up here, since we have found
1347 * otheru in hash under unix_table_lock. Insertion
1348 * into the hash chain we'd found it in had been done
1349 * in an earlier critical area protected by unix_table_lock,
1350 * the same one where we'd set *(otheru->addr) contents,
1351 * as well as otheru->path and otheru->addr itself.
1353 * Using smp_store_release() here to set newu->addr
1354 * is enough to make those stores, as well as stores
1355 * to newu->path visible to anyone who gets newu->addr
1356 * by smp_load_acquire(). IOW, the same warranties
1357 * as for unix_sock instances bound in unix_bind() or
1358 * in unix_autobind().
1360 if (otheru
->path
.dentry
) {
1361 path_get(&otheru
->path
);
1362 newu
->path
= otheru
->path
;
1364 refcount_inc(&otheru
->addr
->refcnt
);
1365 smp_store_release(&newu
->addr
, otheru
->addr
);
1367 /* Set credentials */
1368 copy_peercred(sk
, other
);
1370 sock
->state
= SS_CONNECTED
;
1371 sk
->sk_state
= TCP_ESTABLISHED
;
1374 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1375 unix_peer(sk
) = newsk
;
1377 unix_state_unlock(sk
);
1379 /* take ten and and send info to listening sock */
1380 spin_lock(&other
->sk_receive_queue
.lock
);
1381 __skb_queue_tail(&other
->sk_receive_queue
, skb
);
1382 spin_unlock(&other
->sk_receive_queue
.lock
);
1383 unix_state_unlock(other
);
1384 other
->sk_data_ready(other
);
1390 unix_state_unlock(other
);
1395 unix_release_sock(newsk
, 0);
1401 static int unix_socketpair(struct socket
*socka
, struct socket
*sockb
)
1403 struct sock
*ska
= socka
->sk
, *skb
= sockb
->sk
;
1405 /* Join our sockets back to back */
1408 unix_peer(ska
) = skb
;
1409 unix_peer(skb
) = ska
;
1413 if (ska
->sk_type
!= SOCK_DGRAM
) {
1414 ska
->sk_state
= TCP_ESTABLISHED
;
1415 skb
->sk_state
= TCP_ESTABLISHED
;
1416 socka
->state
= SS_CONNECTED
;
1417 sockb
->state
= SS_CONNECTED
;
1422 static void unix_sock_inherit_flags(const struct socket
*old
,
1425 if (test_bit(SOCK_PASSCRED
, &old
->flags
))
1426 set_bit(SOCK_PASSCRED
, &new->flags
);
1427 if (test_bit(SOCK_PASSSEC
, &old
->flags
))
1428 set_bit(SOCK_PASSSEC
, &new->flags
);
1431 static int unix_accept(struct socket
*sock
, struct socket
*newsock
, int flags
,
1434 struct sock
*sk
= sock
->sk
;
1436 struct sk_buff
*skb
;
1440 if (sock
->type
!= SOCK_STREAM
&& sock
->type
!= SOCK_SEQPACKET
)
1444 if (sk
->sk_state
!= TCP_LISTEN
)
1447 /* If socket state is TCP_LISTEN it cannot change (for now...),
1448 * so that no locks are necessary.
1451 skb
= skb_recv_datagram(sk
, 0, flags
&O_NONBLOCK
, &err
);
1453 /* This means receive shutdown. */
1460 skb_free_datagram(sk
, skb
);
1461 wake_up_interruptible(&unix_sk(sk
)->peer_wait
);
1463 /* attach accepted sock to socket */
1464 unix_state_lock(tsk
);
1465 newsock
->state
= SS_CONNECTED
;
1466 unix_sock_inherit_flags(sock
, newsock
);
1467 sock_graft(tsk
, newsock
);
1468 unix_state_unlock(tsk
);
1476 static int unix_getname(struct socket
*sock
, struct sockaddr
*uaddr
, int peer
)
1478 struct sock
*sk
= sock
->sk
;
1479 struct unix_address
*addr
;
1480 DECLARE_SOCKADDR(struct sockaddr_un
*, sunaddr
, uaddr
);
1484 sk
= unix_peer_get(sk
);
1494 addr
= smp_load_acquire(&unix_sk(sk
)->addr
);
1496 sunaddr
->sun_family
= AF_UNIX
;
1497 sunaddr
->sun_path
[0] = 0;
1498 err
= sizeof(short);
1501 memcpy(sunaddr
, addr
->name
, addr
->len
);
1508 static int unix_scm_to_skb(struct scm_cookie
*scm
, struct sk_buff
*skb
, bool send_fds
)
1512 UNIXCB(skb
).pid
= get_pid(scm
->pid
);
1513 UNIXCB(skb
).uid
= scm
->creds
.uid
;
1514 UNIXCB(skb
).gid
= scm
->creds
.gid
;
1515 UNIXCB(skb
).fp
= NULL
;
1516 unix_get_secdata(scm
, skb
);
1517 if (scm
->fp
&& send_fds
)
1518 err
= unix_attach_fds(scm
, skb
);
1520 skb
->destructor
= unix_destruct_scm
;
1524 static bool unix_passcred_enabled(const struct socket
*sock
,
1525 const struct sock
*other
)
1527 return test_bit(SOCK_PASSCRED
, &sock
->flags
) ||
1528 !other
->sk_socket
||
1529 test_bit(SOCK_PASSCRED
, &other
->sk_socket
->flags
);
1533 * Some apps rely on write() giving SCM_CREDENTIALS
1534 * We include credentials if source or destination socket
1535 * asserted SOCK_PASSCRED.
1537 static void maybe_add_creds(struct sk_buff
*skb
, const struct socket
*sock
,
1538 const struct sock
*other
)
1540 if (UNIXCB(skb
).pid
)
1542 if (unix_passcred_enabled(sock
, other
)) {
1543 UNIXCB(skb
).pid
= get_pid(task_tgid(current
));
1544 current_uid_gid(&UNIXCB(skb
).uid
, &UNIXCB(skb
).gid
);
1548 static int maybe_init_creds(struct scm_cookie
*scm
,
1549 struct socket
*socket
,
1550 const struct sock
*other
)
1553 struct msghdr msg
= { .msg_controllen
= 0 };
1555 err
= scm_send(socket
, &msg
, scm
, false);
1559 if (unix_passcred_enabled(socket
, other
)) {
1560 scm
->pid
= get_pid(task_tgid(current
));
1561 current_uid_gid(&scm
->creds
.uid
, &scm
->creds
.gid
);
1566 static bool unix_skb_scm_eq(struct sk_buff
*skb
,
1567 struct scm_cookie
*scm
)
1569 const struct unix_skb_parms
*u
= &UNIXCB(skb
);
1571 return u
->pid
== scm
->pid
&&
1572 uid_eq(u
->uid
, scm
->creds
.uid
) &&
1573 gid_eq(u
->gid
, scm
->creds
.gid
) &&
1574 unix_secdata_eq(scm
, skb
);
1578 * Send AF_UNIX data.
1581 static int unix_dgram_sendmsg(struct socket
*sock
, struct msghdr
*msg
,
1584 struct sock
*sk
= sock
->sk
;
1585 struct net
*net
= sock_net(sk
);
1586 struct unix_sock
*u
= unix_sk(sk
);
1587 DECLARE_SOCKADDR(struct sockaddr_un
*, sunaddr
, msg
->msg_name
);
1588 struct sock
*other
= NULL
;
1589 int namelen
= 0; /* fake GCC */
1592 struct sk_buff
*skb
;
1594 struct scm_cookie scm
;
1599 err
= scm_send(sock
, msg
, &scm
, false);
1604 if (msg
->msg_flags
&MSG_OOB
)
1607 if (msg
->msg_namelen
) {
1608 err
= unix_mkname(sunaddr
, msg
->msg_namelen
, &hash
);
1615 other
= unix_peer_get(sk
);
1620 if (test_bit(SOCK_PASSCRED
, &sock
->flags
) && !u
->addr
1621 && (err
= unix_autobind(sock
)) != 0)
1625 if (len
> sk
->sk_sndbuf
- 32)
1628 if (len
> SKB_MAX_ALLOC
) {
1629 data_len
= min_t(size_t,
1630 len
- SKB_MAX_ALLOC
,
1631 MAX_SKB_FRAGS
* PAGE_SIZE
);
1632 data_len
= PAGE_ALIGN(data_len
);
1634 BUILD_BUG_ON(SKB_MAX_ALLOC
< PAGE_SIZE
);
1637 skb
= sock_alloc_send_pskb(sk
, len
- data_len
, data_len
,
1638 msg
->msg_flags
& MSG_DONTWAIT
, &err
,
1639 PAGE_ALLOC_COSTLY_ORDER
);
1643 err
= unix_scm_to_skb(&scm
, skb
, true);
1647 skb_put(skb
, len
- data_len
);
1648 skb
->data_len
= data_len
;
1650 err
= skb_copy_datagram_from_iter(skb
, 0, &msg
->msg_iter
, len
);
1654 timeo
= sock_sndtimeo(sk
, msg
->msg_flags
& MSG_DONTWAIT
);
1659 if (sunaddr
== NULL
)
1662 other
= unix_find_other(net
, sunaddr
, namelen
, sk
->sk_type
,
1668 if (sk_filter(other
, skb
) < 0) {
1669 /* Toss the packet but do not return any error to the sender */
1675 unix_state_lock(other
);
1678 if (!unix_may_send(sk
, other
))
1681 if (unlikely(sock_flag(other
, SOCK_DEAD
))) {
1683 * Check with 1003.1g - what should
1686 unix_state_unlock(other
);
1690 unix_state_lock(sk
);
1693 if (unix_peer(sk
) == other
) {
1694 unix_peer(sk
) = NULL
;
1695 unix_dgram_peer_wake_disconnect_wakeup(sk
, other
);
1697 unix_state_unlock(sk
);
1699 unix_dgram_disconnected(sk
, other
);
1701 err
= -ECONNREFUSED
;
1703 unix_state_unlock(sk
);
1713 if (other
->sk_shutdown
& RCV_SHUTDOWN
)
1716 if (sk
->sk_type
!= SOCK_SEQPACKET
) {
1717 err
= security_unix_may_send(sk
->sk_socket
, other
->sk_socket
);
1722 /* other == sk && unix_peer(other) != sk if
1723 * - unix_peer(sk) == NULL, destination address bound to sk
1724 * - unix_peer(sk) == sk by time of get but disconnected before lock
1727 unlikely(unix_peer(other
) != sk
&& unix_recvq_full(other
))) {
1729 timeo
= unix_wait_for_peer(other
, timeo
);
1731 err
= sock_intr_errno(timeo
);
1732 if (signal_pending(current
))
1739 unix_state_unlock(other
);
1740 unix_state_double_lock(sk
, other
);
1743 if (unix_peer(sk
) != other
||
1744 unix_dgram_peer_wake_me(sk
, other
)) {
1752 goto restart_locked
;
1756 if (unlikely(sk_locked
))
1757 unix_state_unlock(sk
);
1759 if (sock_flag(other
, SOCK_RCVTSTAMP
))
1760 __net_timestamp(skb
);
1761 maybe_add_creds(skb
, sock
, other
);
1762 skb_queue_tail(&other
->sk_receive_queue
, skb
);
1763 unix_state_unlock(other
);
1764 other
->sk_data_ready(other
);
1771 unix_state_unlock(sk
);
1772 unix_state_unlock(other
);
1782 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1783 * bytes, and a minimum of a full page.
1785 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1787 static int unix_stream_sendmsg(struct socket
*sock
, struct msghdr
*msg
,
1790 struct sock
*sk
= sock
->sk
;
1791 struct sock
*other
= NULL
;
1793 struct sk_buff
*skb
;
1795 struct scm_cookie scm
;
1796 bool fds_sent
= false;
1800 err
= scm_send(sock
, msg
, &scm
, false);
1805 if (msg
->msg_flags
&MSG_OOB
)
1808 if (msg
->msg_namelen
) {
1809 err
= sk
->sk_state
== TCP_ESTABLISHED
? -EISCONN
: -EOPNOTSUPP
;
1813 other
= unix_peer(sk
);
1818 if (sk
->sk_shutdown
& SEND_SHUTDOWN
)
1821 while (sent
< len
) {
1824 /* Keep two messages in the pipe so it schedules better */
1825 size
= min_t(int, size
, (sk
->sk_sndbuf
>> 1) - 64);
1827 /* allow fallback to order-0 allocations */
1828 size
= min_t(int, size
, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ
);
1830 data_len
= max_t(int, 0, size
- SKB_MAX_HEAD(0));
1832 data_len
= min_t(size_t, size
, PAGE_ALIGN(data_len
));
1834 skb
= sock_alloc_send_pskb(sk
, size
- data_len
, data_len
,
1835 msg
->msg_flags
& MSG_DONTWAIT
, &err
,
1836 get_order(UNIX_SKB_FRAGS_SZ
));
1840 /* Only send the fds in the first buffer */
1841 err
= unix_scm_to_skb(&scm
, skb
, !fds_sent
);
1848 skb_put(skb
, size
- data_len
);
1849 skb
->data_len
= data_len
;
1851 err
= skb_copy_datagram_from_iter(skb
, 0, &msg
->msg_iter
, size
);
1857 unix_state_lock(other
);
1859 if (sock_flag(other
, SOCK_DEAD
) ||
1860 (other
->sk_shutdown
& RCV_SHUTDOWN
))
1863 maybe_add_creds(skb
, sock
, other
);
1864 skb_queue_tail(&other
->sk_receive_queue
, skb
);
1865 unix_state_unlock(other
);
1866 other
->sk_data_ready(other
);
1875 unix_state_unlock(other
);
1878 if (sent
== 0 && !(msg
->msg_flags
&MSG_NOSIGNAL
))
1879 send_sig(SIGPIPE
, current
, 0);
1883 return sent
? : err
;
1886 static ssize_t
unix_stream_sendpage(struct socket
*socket
, struct page
*page
,
1887 int offset
, size_t size
, int flags
)
1890 bool send_sigpipe
= false;
1891 bool init_scm
= true;
1892 struct scm_cookie scm
;
1893 struct sock
*other
, *sk
= socket
->sk
;
1894 struct sk_buff
*skb
, *newskb
= NULL
, *tail
= NULL
;
1896 if (flags
& MSG_OOB
)
1899 other
= unix_peer(sk
);
1900 if (!other
|| sk
->sk_state
!= TCP_ESTABLISHED
)
1905 unix_state_unlock(other
);
1906 mutex_unlock(&unix_sk(other
)->iolock
);
1907 newskb
= sock_alloc_send_pskb(sk
, 0, 0, flags
& MSG_DONTWAIT
,
1913 /* we must acquire iolock as we modify already present
1914 * skbs in the sk_receive_queue and mess with skb->len
1916 err
= mutex_lock_interruptible(&unix_sk(other
)->iolock
);
1918 err
= flags
& MSG_DONTWAIT
? -EAGAIN
: -ERESTARTSYS
;
1922 if (sk
->sk_shutdown
& SEND_SHUTDOWN
) {
1924 send_sigpipe
= true;
1928 unix_state_lock(other
);
1930 if (sock_flag(other
, SOCK_DEAD
) ||
1931 other
->sk_shutdown
& RCV_SHUTDOWN
) {
1933 send_sigpipe
= true;
1934 goto err_state_unlock
;
1938 err
= maybe_init_creds(&scm
, socket
, other
);
1940 goto err_state_unlock
;
1944 skb
= skb_peek_tail(&other
->sk_receive_queue
);
1945 if (tail
&& tail
== skb
) {
1947 } else if (!skb
|| !unix_skb_scm_eq(skb
, &scm
)) {
1954 } else if (newskb
) {
1955 /* this is fast path, we don't necessarily need to
1956 * call to kfree_skb even though with newskb == NULL
1957 * this - does no harm
1959 consume_skb(newskb
);
1963 if (skb_append_pagefrags(skb
, page
, offset
, size
)) {
1969 skb
->data_len
+= size
;
1970 skb
->truesize
+= size
;
1971 refcount_add(size
, &sk
->sk_wmem_alloc
);
1974 err
= unix_scm_to_skb(&scm
, skb
, false);
1976 goto err_state_unlock
;
1977 spin_lock(&other
->sk_receive_queue
.lock
);
1978 __skb_queue_tail(&other
->sk_receive_queue
, newskb
);
1979 spin_unlock(&other
->sk_receive_queue
.lock
);
1982 unix_state_unlock(other
);
1983 mutex_unlock(&unix_sk(other
)->iolock
);
1985 other
->sk_data_ready(other
);
1990 unix_state_unlock(other
);
1992 mutex_unlock(&unix_sk(other
)->iolock
);
1995 if (send_sigpipe
&& !(flags
& MSG_NOSIGNAL
))
1996 send_sig(SIGPIPE
, current
, 0);
2002 static int unix_seqpacket_sendmsg(struct socket
*sock
, struct msghdr
*msg
,
2006 struct sock
*sk
= sock
->sk
;
2008 err
= sock_error(sk
);
2012 if (sk
->sk_state
!= TCP_ESTABLISHED
)
2015 if (msg
->msg_namelen
)
2016 msg
->msg_namelen
= 0;
2018 return unix_dgram_sendmsg(sock
, msg
, len
);
2021 static int unix_seqpacket_recvmsg(struct socket
*sock
, struct msghdr
*msg
,
2022 size_t size
, int flags
)
2024 struct sock
*sk
= sock
->sk
;
2026 if (sk
->sk_state
!= TCP_ESTABLISHED
)
2029 return unix_dgram_recvmsg(sock
, msg
, size
, flags
);
2032 static void unix_copy_addr(struct msghdr
*msg
, struct sock
*sk
)
2034 struct unix_address
*addr
= smp_load_acquire(&unix_sk(sk
)->addr
);
2037 msg
->msg_namelen
= addr
->len
;
2038 memcpy(msg
->msg_name
, addr
->name
, addr
->len
);
2042 static int unix_dgram_recvmsg(struct socket
*sock
, struct msghdr
*msg
,
2043 size_t size
, int flags
)
2045 struct scm_cookie scm
;
2046 struct sock
*sk
= sock
->sk
;
2047 struct unix_sock
*u
= unix_sk(sk
);
2048 struct sk_buff
*skb
, *last
;
2057 timeo
= sock_rcvtimeo(sk
, flags
& MSG_DONTWAIT
);
2060 mutex_lock(&u
->iolock
);
2062 skip
= sk_peek_offset(sk
, flags
);
2063 skb
= __skb_try_recv_datagram(sk
, flags
, NULL
, &skip
, &err
,
2068 mutex_unlock(&u
->iolock
);
2073 !__skb_wait_for_more_packets(sk
, &err
, &timeo
, last
));
2075 if (!skb
) { /* implies iolock unlocked */
2076 unix_state_lock(sk
);
2077 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2078 if (sk
->sk_type
== SOCK_SEQPACKET
&& err
== -EAGAIN
&&
2079 (sk
->sk_shutdown
& RCV_SHUTDOWN
))
2081 unix_state_unlock(sk
);
2085 if (wq_has_sleeper(&u
->peer_wait
))
2086 wake_up_interruptible_sync_poll(&u
->peer_wait
,
2087 EPOLLOUT
| EPOLLWRNORM
|
2091 unix_copy_addr(msg
, skb
->sk
);
2093 if (size
> skb
->len
- skip
)
2094 size
= skb
->len
- skip
;
2095 else if (size
< skb
->len
- skip
)
2096 msg
->msg_flags
|= MSG_TRUNC
;
2098 err
= skb_copy_datagram_msg(skb
, skip
, msg
, size
);
2102 if (sock_flag(sk
, SOCK_RCVTSTAMP
))
2103 __sock_recv_timestamp(msg
, sk
, skb
);
2105 memset(&scm
, 0, sizeof(scm
));
2107 scm_set_cred(&scm
, UNIXCB(skb
).pid
, UNIXCB(skb
).uid
, UNIXCB(skb
).gid
);
2108 unix_set_secdata(&scm
, skb
);
2110 if (!(flags
& MSG_PEEK
)) {
2112 unix_detach_fds(&scm
, skb
);
2114 sk_peek_offset_bwd(sk
, skb
->len
);
2116 /* It is questionable: on PEEK we could:
2117 - do not return fds - good, but too simple 8)
2118 - return fds, and do not return them on read (old strategy,
2120 - clone fds (I chose it for now, it is the most universal
2123 POSIX 1003.1g does not actually define this clearly
2124 at all. POSIX 1003.1g doesn't define a lot of things
2129 sk_peek_offset_fwd(sk
, size
);
2132 scm
.fp
= scm_fp_dup(UNIXCB(skb
).fp
);
2134 err
= (flags
& MSG_TRUNC
) ? skb
->len
- skip
: size
;
2136 scm_recv(sock
, msg
, &scm
, flags
);
2139 skb_free_datagram(sk
, skb
);
2140 mutex_unlock(&u
->iolock
);
2146 * Sleep until more data has arrived. But check for races..
2148 static long unix_stream_data_wait(struct sock
*sk
, long timeo
,
2149 struct sk_buff
*last
, unsigned int last_len
,
2152 struct sk_buff
*tail
;
2155 unix_state_lock(sk
);
2158 prepare_to_wait(sk_sleep(sk
), &wait
, TASK_INTERRUPTIBLE
);
2160 tail
= skb_peek_tail(&sk
->sk_receive_queue
);
2162 (tail
&& tail
->len
!= last_len
) ||
2164 (sk
->sk_shutdown
& RCV_SHUTDOWN
) ||
2165 signal_pending(current
) ||
2169 sk_set_bit(SOCKWQ_ASYNC_WAITDATA
, sk
);
2170 unix_state_unlock(sk
);
2172 timeo
= freezable_schedule_timeout(timeo
);
2174 timeo
= schedule_timeout(timeo
);
2175 unix_state_lock(sk
);
2177 if (sock_flag(sk
, SOCK_DEAD
))
2180 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA
, sk
);
2183 finish_wait(sk_sleep(sk
), &wait
);
2184 unix_state_unlock(sk
);
2188 static unsigned int unix_skb_len(const struct sk_buff
*skb
)
2190 return skb
->len
- UNIXCB(skb
).consumed
;
2193 struct unix_stream_read_state
{
2194 int (*recv_actor
)(struct sk_buff
*, int, int,
2195 struct unix_stream_read_state
*);
2196 struct socket
*socket
;
2198 struct pipe_inode_info
*pipe
;
2201 unsigned int splice_flags
;
2204 static int unix_stream_read_generic(struct unix_stream_read_state
*state
,
2207 struct scm_cookie scm
;
2208 struct socket
*sock
= state
->socket
;
2209 struct sock
*sk
= sock
->sk
;
2210 struct unix_sock
*u
= unix_sk(sk
);
2212 int flags
= state
->flags
;
2213 int noblock
= flags
& MSG_DONTWAIT
;
2214 bool check_creds
= false;
2219 size_t size
= state
->size
;
2220 unsigned int last_len
;
2222 if (unlikely(sk
->sk_state
!= TCP_ESTABLISHED
)) {
2227 if (unlikely(flags
& MSG_OOB
)) {
2232 target
= sock_rcvlowat(sk
, flags
& MSG_WAITALL
, size
);
2233 timeo
= sock_rcvtimeo(sk
, noblock
);
2235 memset(&scm
, 0, sizeof(scm
));
2237 /* Lock the socket to prevent queue disordering
2238 * while sleeps in memcpy_tomsg
2240 mutex_lock(&u
->iolock
);
2242 skip
= max(sk_peek_offset(sk
, flags
), 0);
2247 struct sk_buff
*skb
, *last
;
2250 unix_state_lock(sk
);
2251 if (sock_flag(sk
, SOCK_DEAD
)) {
2255 last
= skb
= skb_peek(&sk
->sk_receive_queue
);
2256 last_len
= last
? last
->len
: 0;
2259 if (copied
>= target
)
2263 * POSIX 1003.1g mandates this order.
2266 err
= sock_error(sk
);
2269 if (sk
->sk_shutdown
& RCV_SHUTDOWN
)
2272 unix_state_unlock(sk
);
2278 mutex_unlock(&u
->iolock
);
2280 timeo
= unix_stream_data_wait(sk
, timeo
, last
,
2281 last_len
, freezable
);
2283 if (signal_pending(current
)) {
2284 err
= sock_intr_errno(timeo
);
2289 mutex_lock(&u
->iolock
);
2292 unix_state_unlock(sk
);
2296 while (skip
>= unix_skb_len(skb
)) {
2297 skip
-= unix_skb_len(skb
);
2299 last_len
= skb
->len
;
2300 skb
= skb_peek_next(skb
, &sk
->sk_receive_queue
);
2305 unix_state_unlock(sk
);
2308 /* Never glue messages from different writers */
2309 if (!unix_skb_scm_eq(skb
, &scm
))
2311 } else if (test_bit(SOCK_PASSCRED
, &sock
->flags
)) {
2312 /* Copy credentials */
2313 scm_set_cred(&scm
, UNIXCB(skb
).pid
, UNIXCB(skb
).uid
, UNIXCB(skb
).gid
);
2314 unix_set_secdata(&scm
, skb
);
2318 /* Copy address just once */
2319 if (state
->msg
&& state
->msg
->msg_name
) {
2320 DECLARE_SOCKADDR(struct sockaddr_un
*, sunaddr
,
2321 state
->msg
->msg_name
);
2322 unix_copy_addr(state
->msg
, skb
->sk
);
2326 chunk
= min_t(unsigned int, unix_skb_len(skb
) - skip
, size
);
2328 chunk
= state
->recv_actor(skb
, skip
, chunk
, state
);
2329 drop_skb
= !unix_skb_len(skb
);
2330 /* skb is only safe to use if !drop_skb */
2341 /* the skb was touched by a concurrent reader;
2342 * we should not expect anything from this skb
2343 * anymore and assume it invalid - we can be
2344 * sure it was dropped from the socket queue
2346 * let's report a short read
2352 /* Mark read part of skb as used */
2353 if (!(flags
& MSG_PEEK
)) {
2354 UNIXCB(skb
).consumed
+= chunk
;
2356 sk_peek_offset_bwd(sk
, chunk
);
2359 unix_detach_fds(&scm
, skb
);
2361 if (unix_skb_len(skb
))
2364 skb_unlink(skb
, &sk
->sk_receive_queue
);
2370 /* It is questionable, see note in unix_dgram_recvmsg.
2373 scm
.fp
= scm_fp_dup(UNIXCB(skb
).fp
);
2375 sk_peek_offset_fwd(sk
, chunk
);
2382 last_len
= skb
->len
;
2383 unix_state_lock(sk
);
2384 skb
= skb_peek_next(skb
, &sk
->sk_receive_queue
);
2387 unix_state_unlock(sk
);
2392 mutex_unlock(&u
->iolock
);
2394 scm_recv(sock
, state
->msg
, &scm
, flags
);
2398 return copied
? : err
;
2401 static int unix_stream_read_actor(struct sk_buff
*skb
,
2402 int skip
, int chunk
,
2403 struct unix_stream_read_state
*state
)
2407 ret
= skb_copy_datagram_msg(skb
, UNIXCB(skb
).consumed
+ skip
,
2409 return ret
?: chunk
;
2412 static int unix_stream_recvmsg(struct socket
*sock
, struct msghdr
*msg
,
2413 size_t size
, int flags
)
2415 struct unix_stream_read_state state
= {
2416 .recv_actor
= unix_stream_read_actor
,
2423 return unix_stream_read_generic(&state
, true);
2426 static int unix_stream_splice_actor(struct sk_buff
*skb
,
2427 int skip
, int chunk
,
2428 struct unix_stream_read_state
*state
)
2430 return skb_splice_bits(skb
, state
->socket
->sk
,
2431 UNIXCB(skb
).consumed
+ skip
,
2432 state
->pipe
, chunk
, state
->splice_flags
);
2435 static ssize_t
unix_stream_splice_read(struct socket
*sock
, loff_t
*ppos
,
2436 struct pipe_inode_info
*pipe
,
2437 size_t size
, unsigned int flags
)
2439 struct unix_stream_read_state state
= {
2440 .recv_actor
= unix_stream_splice_actor
,
2444 .splice_flags
= flags
,
2447 if (unlikely(*ppos
))
2450 if (sock
->file
->f_flags
& O_NONBLOCK
||
2451 flags
& SPLICE_F_NONBLOCK
)
2452 state
.flags
= MSG_DONTWAIT
;
2454 return unix_stream_read_generic(&state
, false);
2457 static int unix_shutdown(struct socket
*sock
, int mode
)
2459 struct sock
*sk
= sock
->sk
;
2462 if (mode
< SHUT_RD
|| mode
> SHUT_RDWR
)
2465 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2466 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2467 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2471 unix_state_lock(sk
);
2472 sk
->sk_shutdown
|= mode
;
2473 other
= unix_peer(sk
);
2476 unix_state_unlock(sk
);
2477 sk
->sk_state_change(sk
);
2480 (sk
->sk_type
== SOCK_STREAM
|| sk
->sk_type
== SOCK_SEQPACKET
)) {
2484 if (mode
&RCV_SHUTDOWN
)
2485 peer_mode
|= SEND_SHUTDOWN
;
2486 if (mode
&SEND_SHUTDOWN
)
2487 peer_mode
|= RCV_SHUTDOWN
;
2488 unix_state_lock(other
);
2489 other
->sk_shutdown
|= peer_mode
;
2490 unix_state_unlock(other
);
2491 other
->sk_state_change(other
);
2492 if (peer_mode
== SHUTDOWN_MASK
)
2493 sk_wake_async(other
, SOCK_WAKE_WAITD
, POLL_HUP
);
2494 else if (peer_mode
& RCV_SHUTDOWN
)
2495 sk_wake_async(other
, SOCK_WAKE_WAITD
, POLL_IN
);
2503 long unix_inq_len(struct sock
*sk
)
2505 struct sk_buff
*skb
;
2508 if (sk
->sk_state
== TCP_LISTEN
)
2511 spin_lock(&sk
->sk_receive_queue
.lock
);
2512 if (sk
->sk_type
== SOCK_STREAM
||
2513 sk
->sk_type
== SOCK_SEQPACKET
) {
2514 skb_queue_walk(&sk
->sk_receive_queue
, skb
)
2515 amount
+= unix_skb_len(skb
);
2517 skb
= skb_peek(&sk
->sk_receive_queue
);
2521 spin_unlock(&sk
->sk_receive_queue
.lock
);
2525 EXPORT_SYMBOL_GPL(unix_inq_len
);
2527 long unix_outq_len(struct sock
*sk
)
2529 return sk_wmem_alloc_get(sk
);
2531 EXPORT_SYMBOL_GPL(unix_outq_len
);
2533 static int unix_open_file(struct sock
*sk
)
2539 if (!ns_capable(sock_net(sk
)->user_ns
, CAP_NET_ADMIN
))
2542 if (!smp_load_acquire(&unix_sk(sk
)->addr
))
2545 path
= unix_sk(sk
)->path
;
2551 fd
= get_unused_fd_flags(O_CLOEXEC
);
2555 f
= dentry_open(&path
, O_PATH
, current_cred());
2569 static int unix_ioctl(struct socket
*sock
, unsigned int cmd
, unsigned long arg
)
2571 struct sock
*sk
= sock
->sk
;
2577 amount
= unix_outq_len(sk
);
2578 err
= put_user(amount
, (int __user
*)arg
);
2581 amount
= unix_inq_len(sk
);
2585 err
= put_user(amount
, (int __user
*)arg
);
2588 err
= unix_open_file(sk
);
2597 #ifdef CONFIG_COMPAT
2598 static int unix_compat_ioctl(struct socket
*sock
, unsigned int cmd
, unsigned long arg
)
2600 return unix_ioctl(sock
, cmd
, (unsigned long)compat_ptr(arg
));
2604 static __poll_t
unix_poll(struct file
*file
, struct socket
*sock
, poll_table
*wait
)
2606 struct sock
*sk
= sock
->sk
;
2609 sock_poll_wait(file
, sock
, wait
);
2612 /* exceptional events? */
2615 if (sk
->sk_shutdown
== SHUTDOWN_MASK
)
2617 if (sk
->sk_shutdown
& RCV_SHUTDOWN
)
2618 mask
|= EPOLLRDHUP
| EPOLLIN
| EPOLLRDNORM
;
2621 if (!skb_queue_empty_lockless(&sk
->sk_receive_queue
))
2622 mask
|= EPOLLIN
| EPOLLRDNORM
;
2624 /* Connection-based need to check for termination and startup */
2625 if ((sk
->sk_type
== SOCK_STREAM
|| sk
->sk_type
== SOCK_SEQPACKET
) &&
2626 sk
->sk_state
== TCP_CLOSE
)
2630 * we set writable also when the other side has shut down the
2631 * connection. This prevents stuck sockets.
2633 if (unix_writable(sk
))
2634 mask
|= EPOLLOUT
| EPOLLWRNORM
| EPOLLWRBAND
;
2639 static __poll_t
unix_dgram_poll(struct file
*file
, struct socket
*sock
,
2642 struct sock
*sk
= sock
->sk
, *other
;
2643 unsigned int writable
;
2646 sock_poll_wait(file
, sock
, wait
);
2649 /* exceptional events? */
2650 if (sk
->sk_err
|| !skb_queue_empty_lockless(&sk
->sk_error_queue
))
2652 (sock_flag(sk
, SOCK_SELECT_ERR_QUEUE
) ? EPOLLPRI
: 0);
2654 if (sk
->sk_shutdown
& RCV_SHUTDOWN
)
2655 mask
|= EPOLLRDHUP
| EPOLLIN
| EPOLLRDNORM
;
2656 if (sk
->sk_shutdown
== SHUTDOWN_MASK
)
2660 if (!skb_queue_empty_lockless(&sk
->sk_receive_queue
))
2661 mask
|= EPOLLIN
| EPOLLRDNORM
;
2663 /* Connection-based need to check for termination and startup */
2664 if (sk
->sk_type
== SOCK_SEQPACKET
) {
2665 if (sk
->sk_state
== TCP_CLOSE
)
2667 /* connection hasn't started yet? */
2668 if (sk
->sk_state
== TCP_SYN_SENT
)
2672 /* No write status requested, avoid expensive OUT tests. */
2673 if (!(poll_requested_events(wait
) & (EPOLLWRBAND
|EPOLLWRNORM
|EPOLLOUT
)))
2676 writable
= unix_writable(sk
);
2678 unix_state_lock(sk
);
2680 other
= unix_peer(sk
);
2681 if (other
&& unix_peer(other
) != sk
&&
2682 unix_recvq_full(other
) &&
2683 unix_dgram_peer_wake_me(sk
, other
))
2686 unix_state_unlock(sk
);
2690 mask
|= EPOLLOUT
| EPOLLWRNORM
| EPOLLWRBAND
;
2692 sk_set_bit(SOCKWQ_ASYNC_NOSPACE
, sk
);
2697 #ifdef CONFIG_PROC_FS
2699 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2701 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2702 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2703 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2705 static struct sock
*unix_from_bucket(struct seq_file
*seq
, loff_t
*pos
)
2707 unsigned long offset
= get_offset(*pos
);
2708 unsigned long bucket
= get_bucket(*pos
);
2710 unsigned long count
= 0;
2712 for (sk
= sk_head(&unix_socket_table
[bucket
]); sk
; sk
= sk_next(sk
)) {
2713 if (sock_net(sk
) != seq_file_net(seq
))
2715 if (++count
== offset
)
2722 static struct sock
*unix_next_socket(struct seq_file
*seq
,
2726 unsigned long bucket
;
2728 while (sk
> (struct sock
*)SEQ_START_TOKEN
) {
2732 if (sock_net(sk
) == seq_file_net(seq
))
2737 sk
= unix_from_bucket(seq
, pos
);
2742 bucket
= get_bucket(*pos
) + 1;
2743 *pos
= set_bucket_offset(bucket
, 1);
2744 } while (bucket
< ARRAY_SIZE(unix_socket_table
));
2749 static void *unix_seq_start(struct seq_file
*seq
, loff_t
*pos
)
2750 __acquires(unix_table_lock
)
2752 spin_lock(&unix_table_lock
);
2755 return SEQ_START_TOKEN
;
2757 if (get_bucket(*pos
) >= ARRAY_SIZE(unix_socket_table
))
2760 return unix_next_socket(seq
, NULL
, pos
);
2763 static void *unix_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
2766 return unix_next_socket(seq
, v
, pos
);
2769 static void unix_seq_stop(struct seq_file
*seq
, void *v
)
2770 __releases(unix_table_lock
)
2772 spin_unlock(&unix_table_lock
);
2775 static int unix_seq_show(struct seq_file
*seq
, void *v
)
2778 if (v
== SEQ_START_TOKEN
)
2779 seq_puts(seq
, "Num RefCount Protocol Flags Type St "
2783 struct unix_sock
*u
= unix_sk(s
);
2786 seq_printf(seq
, "%pK: %08X %08X %08X %04X %02X %5lu",
2788 refcount_read(&s
->sk_refcnt
),
2790 s
->sk_state
== TCP_LISTEN
? __SO_ACCEPTCON
: 0,
2793 (s
->sk_state
== TCP_ESTABLISHED
? SS_CONNECTED
: SS_UNCONNECTED
) :
2794 (s
->sk_state
== TCP_ESTABLISHED
? SS_CONNECTING
: SS_DISCONNECTING
),
2797 if (u
->addr
) { // under unix_table_lock here
2802 len
= u
->addr
->len
- sizeof(short);
2803 if (!UNIX_ABSTRACT(s
))
2809 for ( ; i
< len
; i
++)
2810 seq_putc(seq
, u
->addr
->name
->sun_path
[i
] ?:
2813 unix_state_unlock(s
);
2814 seq_putc(seq
, '\n');
2820 static const struct seq_operations unix_seq_ops
= {
2821 .start
= unix_seq_start
,
2822 .next
= unix_seq_next
,
2823 .stop
= unix_seq_stop
,
2824 .show
= unix_seq_show
,
2828 static const struct net_proto_family unix_family_ops
= {
2830 .create
= unix_create
,
2831 .owner
= THIS_MODULE
,
2835 static int __net_init
unix_net_init(struct net
*net
)
2837 int error
= -ENOMEM
;
2839 net
->unx
.sysctl_max_dgram_qlen
= 10;
2840 if (unix_sysctl_register(net
))
2843 #ifdef CONFIG_PROC_FS
2844 if (!proc_create_net("unix", 0, net
->proc_net
, &unix_seq_ops
,
2845 sizeof(struct seq_net_private
))) {
2846 unix_sysctl_unregister(net
);
2855 static void __net_exit
unix_net_exit(struct net
*net
)
2857 unix_sysctl_unregister(net
);
2858 remove_proc_entry("unix", net
->proc_net
);
2861 static struct pernet_operations unix_net_ops
= {
2862 .init
= unix_net_init
,
2863 .exit
= unix_net_exit
,
2866 static int __init
af_unix_init(void)
2870 BUILD_BUG_ON(sizeof(struct unix_skb_parms
) > FIELD_SIZEOF(struct sk_buff
, cb
));
2872 rc
= proto_register(&unix_proto
, 1);
2874 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__
);
2878 sock_register(&unix_family_ops
);
2879 register_pernet_subsys(&unix_net_ops
);
2884 static void __exit
af_unix_exit(void)
2886 sock_unregister(PF_UNIX
);
2887 proto_unregister(&unix_proto
);
2888 unregister_pernet_subsys(&unix_net_ops
);
2891 /* Earlier than device_initcall() so that other drivers invoking
2892 request_module() don't end up in a loop when modprobe tries
2893 to use a UNIX socket. But later than subsys_initcall() because
2894 we depend on stuff initialised there */
2895 fs_initcall(af_unix_init
);
2896 module_exit(af_unix_exit
);
2898 MODULE_LICENSE("GPL");
2899 MODULE_ALIAS_NETPROTO(PF_UNIX
);