2 * NET4: Implementation of BSD Unix domain sockets.
4 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
12 * Linus Torvalds : Assorted bug cures.
13 * Niibe Yutaka : async I/O support.
14 * Carsten Paeth : PF_UNIX check, address fixes.
15 * Alan Cox : Limit size of allocated blocks.
16 * Alan Cox : Fixed the stupid socketpair bug.
17 * Alan Cox : BSD compatibility fine tuning.
18 * Alan Cox : Fixed a bug in connect when interrupted.
19 * Alan Cox : Sorted out a proper draft version of
20 * file descriptor passing hacked up from
22 * Marty Leisner : Fixes to fd passing
23 * Nick Nevin : recvmsg bugfix.
24 * Alan Cox : Started proper garbage collector
25 * Heiko EiBfeldt : Missing verify_area check
26 * Alan Cox : Started POSIXisms
27 * Andreas Schwab : Replace inode by dentry for proper
29 * Kirk Petersen : Made this a module
30 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
32 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
33 * by above two patches.
34 * Andrea Arcangeli : If possible we block in connect(2)
35 * if the max backlog of the listen socket
36 * is been reached. This won't break
37 * old apps and it will avoid huge amount
38 * of socks hashed (this for unix_gc()
39 * performances reasons).
40 * Security fix that limits the max
41 * number of socks to 2*max_files and
42 * the number of skb queueable in the
44 * Artur Skawina : Hash function optimizations
45 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
46 * Malcolm Beattie : Set peercred for socketpair
47 * Michal Ostrowski : Module initialization cleanup.
48 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
49 * the core infrastructure is doing that
50 * for all net proto families now (2.5.69+)
53 * Known differences from reference BSD that was tested:
56 * ECONNREFUSED is not returned from one end of a connected() socket to the
57 * other the moment one end closes.
58 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
61 * accept() returns a path name even if the connecting socket has closed
62 * in the meantime (BSD loses the path and gives up).
63 * accept() returns 0 length path for an unbound connector. BSD returns 16
64 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 * BSD af_unix apparently has connect forgetting to block properly.
67 * (need to check this with the POSIX spec in detail)
69 * Differences from 2.0.0-11-... (ANK)
70 * Bug fixes and improvements.
71 * - client shutdown killed server socket.
72 * - removed all useless cli/sti pairs.
74 * Semantic changes/extensions.
75 * - generic control message passing.
76 * - SCM_CREDENTIALS control message.
77 * - "Abstract" (not FS based) socket bindings.
78 * Abstract names are sequences of bytes (not zero terminated)
79 * started by 0, so that this name space does not intersect
83 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
85 #include <linux/module.h>
86 #include <linux/kernel.h>
87 #include <linux/signal.h>
88 #include <linux/sched.h>
89 #include <linux/errno.h>
90 #include <linux/string.h>
91 #include <linux/stat.h>
92 #include <linux/dcache.h>
93 #include <linux/namei.h>
94 #include <linux/socket.h>
96 #include <linux/fcntl.h>
97 #include <linux/termios.h>
98 #include <linux/sockios.h>
99 #include <linux/net.h>
100 #include <linux/in.h>
101 #include <linux/fs.h>
102 #include <linux/slab.h>
103 #include <asm/uaccess.h>
104 #include <linux/skbuff.h>
105 #include <linux/netdevice.h>
106 #include <net/net_namespace.h>
107 #include <net/sock.h>
108 #include <net/tcp_states.h>
109 #include <net/af_unix.h>
110 #include <linux/proc_fs.h>
111 #include <linux/seq_file.h>
113 #include <linux/init.h>
114 #include <linux/poll.h>
115 #include <linux/rtnetlink.h>
116 #include <linux/mount.h>
117 #include <net/checksum.h>
118 #include <linux/security.h>
119 #include <linux/freezer.h>
121 struct hlist_head unix_socket_table
[2 * UNIX_HASH_SIZE
];
122 EXPORT_SYMBOL_GPL(unix_socket_table
);
123 DEFINE_SPINLOCK(unix_table_lock
);
124 EXPORT_SYMBOL_GPL(unix_table_lock
);
125 static atomic_long_t unix_nr_socks
;
128 static struct hlist_head
*unix_sockets_unbound(void *addr
)
130 unsigned long hash
= (unsigned long)addr
;
134 hash
%= UNIX_HASH_SIZE
;
135 return &unix_socket_table
[UNIX_HASH_SIZE
+ hash
];
138 #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
140 #ifdef CONFIG_SECURITY_NETWORK
141 static void unix_get_secdata(struct scm_cookie
*scm
, struct sk_buff
*skb
)
143 memcpy(UNIXSID(skb
), &scm
->secid
, sizeof(u32
));
146 static inline void unix_set_secdata(struct scm_cookie
*scm
, struct sk_buff
*skb
)
148 scm
->secid
= *UNIXSID(skb
);
151 static inline void unix_get_secdata(struct scm_cookie
*scm
, struct sk_buff
*skb
)
154 static inline void unix_set_secdata(struct scm_cookie
*scm
, struct sk_buff
*skb
)
156 #endif /* CONFIG_SECURITY_NETWORK */
159 * SMP locking strategy:
160 * hash table is protected with spinlock unix_table_lock
161 * each socket state is protected by separate spin lock.
164 static inline unsigned int unix_hash_fold(__wsum n
)
166 unsigned int hash
= (__force
unsigned int)csum_fold(n
);
169 return hash
&(UNIX_HASH_SIZE
-1);
172 #define unix_peer(sk) (unix_sk(sk)->peer)
174 static inline int unix_our_peer(struct sock
*sk
, struct sock
*osk
)
176 return unix_peer(osk
) == sk
;
179 static inline int unix_may_send(struct sock
*sk
, struct sock
*osk
)
181 return unix_peer(osk
) == NULL
|| unix_our_peer(sk
, osk
);
184 static inline int unix_recvq_full(struct sock
const *sk
)
186 return skb_queue_len(&sk
->sk_receive_queue
) > sk
->sk_max_ack_backlog
;
189 struct sock
*unix_peer_get(struct sock
*s
)
197 unix_state_unlock(s
);
200 EXPORT_SYMBOL_GPL(unix_peer_get
);
202 static inline void unix_release_addr(struct unix_address
*addr
)
204 if (atomic_dec_and_test(&addr
->refcnt
))
209 * Check unix socket name:
210 * - should be not zero length.
211 * - if started by not zero, should be NULL terminated (FS object)
212 * - if started by zero, it is abstract name.
215 static int unix_mkname(struct sockaddr_un
*sunaddr
, int len
, unsigned int *hashp
)
217 if (len
<= sizeof(short) || len
> sizeof(*sunaddr
))
219 if (!sunaddr
|| sunaddr
->sun_family
!= AF_UNIX
)
221 if (sunaddr
->sun_path
[0]) {
223 * This may look like an off by one error but it is a bit more
224 * subtle. 108 is the longest valid AF_UNIX path for a binding.
225 * sun_path[108] doesn't as such exist. However in kernel space
226 * we are guaranteed that it is a valid memory location in our
227 * kernel address buffer.
229 ((char *)sunaddr
)[len
] = 0;
230 len
= strlen(sunaddr
->sun_path
)+1+sizeof(short);
234 *hashp
= unix_hash_fold(csum_partial(sunaddr
, len
, 0));
238 static void __unix_remove_socket(struct sock
*sk
)
240 sk_del_node_init(sk
);
243 static void __unix_insert_socket(struct hlist_head
*list
, struct sock
*sk
)
245 WARN_ON(!sk_unhashed(sk
));
246 sk_add_node(sk
, list
);
249 static inline void unix_remove_socket(struct sock
*sk
)
251 spin_lock(&unix_table_lock
);
252 __unix_remove_socket(sk
);
253 spin_unlock(&unix_table_lock
);
256 static inline void unix_insert_socket(struct hlist_head
*list
, struct sock
*sk
)
258 spin_lock(&unix_table_lock
);
259 __unix_insert_socket(list
, sk
);
260 spin_unlock(&unix_table_lock
);
263 static struct sock
*__unix_find_socket_byname(struct net
*net
,
264 struct sockaddr_un
*sunname
,
265 int len
, int type
, unsigned int hash
)
269 sk_for_each(s
, &unix_socket_table
[hash
^ type
]) {
270 struct unix_sock
*u
= unix_sk(s
);
272 if (!net_eq(sock_net(s
), net
))
275 if (u
->addr
->len
== len
&&
276 !memcmp(u
->addr
->name
, sunname
, len
))
284 static inline struct sock
*unix_find_socket_byname(struct net
*net
,
285 struct sockaddr_un
*sunname
,
291 spin_lock(&unix_table_lock
);
292 s
= __unix_find_socket_byname(net
, sunname
, len
, type
, hash
);
295 spin_unlock(&unix_table_lock
);
299 static struct sock
*unix_find_socket_byinode(struct inode
*i
)
303 spin_lock(&unix_table_lock
);
305 &unix_socket_table
[i
->i_ino
& (UNIX_HASH_SIZE
- 1)]) {
306 struct dentry
*dentry
= unix_sk(s
)->path
.dentry
;
308 if (dentry
&& d_backing_inode(dentry
) == i
) {
315 spin_unlock(&unix_table_lock
);
319 /* Support code for asymmetrically connected dgram sockets
321 * If a datagram socket is connected to a socket not itself connected
322 * to the first socket (eg, /dev/log), clients may only enqueue more
323 * messages if the present receive queue of the server socket is not
324 * "too large". This means there's a second writeability condition
325 * poll and sendmsg need to test. The dgram recv code will do a wake
326 * up on the peer_wait wait queue of a socket upon reception of a
327 * datagram which needs to be propagated to sleeping would-be writers
328 * since these might not have sent anything so far. This can't be
329 * accomplished via poll_wait because the lifetime of the server
330 * socket might be less than that of its clients if these break their
331 * association with it or if the server socket is closed while clients
332 * are still connected to it and there's no way to inform "a polling
333 * implementation" that it should let go of a certain wait queue
335 * In order to propagate a wake up, a wait_queue_t of the client
336 * socket is enqueued on the peer_wait queue of the server socket
337 * whose wake function does a wake_up on the ordinary client socket
338 * wait queue. This connection is established whenever a write (or
339 * poll for write) hit the flow control condition and broken when the
340 * association to the server socket is dissolved or after a wake up
344 static int unix_dgram_peer_wake_relay(wait_queue_t
*q
, unsigned mode
, int flags
,
348 wait_queue_head_t
*u_sleep
;
350 u
= container_of(q
, struct unix_sock
, peer_wake
);
352 __remove_wait_queue(&unix_sk(u
->peer_wake
.private)->peer_wait
,
354 u
->peer_wake
.private = NULL
;
356 /* relaying can only happen while the wq still exists */
357 u_sleep
= sk_sleep(&u
->sk
);
359 wake_up_interruptible_poll(u_sleep
, key
);
364 static int unix_dgram_peer_wake_connect(struct sock
*sk
, struct sock
*other
)
366 struct unix_sock
*u
, *u_other
;
370 u_other
= unix_sk(other
);
372 spin_lock(&u_other
->peer_wait
.lock
);
374 if (!u
->peer_wake
.private) {
375 u
->peer_wake
.private = other
;
376 __add_wait_queue(&u_other
->peer_wait
, &u
->peer_wake
);
381 spin_unlock(&u_other
->peer_wait
.lock
);
385 static void unix_dgram_peer_wake_disconnect(struct sock
*sk
,
388 struct unix_sock
*u
, *u_other
;
391 u_other
= unix_sk(other
);
392 spin_lock(&u_other
->peer_wait
.lock
);
394 if (u
->peer_wake
.private == other
) {
395 __remove_wait_queue(&u_other
->peer_wait
, &u
->peer_wake
);
396 u
->peer_wake
.private = NULL
;
399 spin_unlock(&u_other
->peer_wait
.lock
);
402 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock
*sk
,
405 unix_dgram_peer_wake_disconnect(sk
, other
);
406 wake_up_interruptible_poll(sk_sleep(sk
),
413 * - unix_peer(sk) == other
414 * - association is stable
416 static int unix_dgram_peer_wake_me(struct sock
*sk
, struct sock
*other
)
420 connected
= unix_dgram_peer_wake_connect(sk
, other
);
422 if (unix_recvq_full(other
))
426 unix_dgram_peer_wake_disconnect(sk
, other
);
431 static inline int unix_writable(struct sock
*sk
)
433 return (atomic_read(&sk
->sk_wmem_alloc
) << 2) <= sk
->sk_sndbuf
;
436 static void unix_write_space(struct sock
*sk
)
438 struct socket_wq
*wq
;
441 if (unix_writable(sk
)) {
442 wq
= rcu_dereference(sk
->sk_wq
);
443 if (wq_has_sleeper(wq
))
444 wake_up_interruptible_sync_poll(&wq
->wait
,
445 POLLOUT
| POLLWRNORM
| POLLWRBAND
);
446 sk_wake_async(sk
, SOCK_WAKE_SPACE
, POLL_OUT
);
451 /* When dgram socket disconnects (or changes its peer), we clear its receive
452 * queue of packets arrived from previous peer. First, it allows to do
453 * flow control based only on wmem_alloc; second, sk connected to peer
454 * may receive messages only from that peer. */
455 static void unix_dgram_disconnected(struct sock
*sk
, struct sock
*other
)
457 if (!skb_queue_empty(&sk
->sk_receive_queue
)) {
458 skb_queue_purge(&sk
->sk_receive_queue
);
459 wake_up_interruptible_all(&unix_sk(sk
)->peer_wait
);
461 /* If one link of bidirectional dgram pipe is disconnected,
462 * we signal error. Messages are lost. Do not make this,
463 * when peer was not connected to us.
465 if (!sock_flag(other
, SOCK_DEAD
) && unix_peer(other
) == sk
) {
466 other
->sk_err
= ECONNRESET
;
467 other
->sk_error_report(other
);
472 static void unix_sock_destructor(struct sock
*sk
)
474 struct unix_sock
*u
= unix_sk(sk
);
476 skb_queue_purge(&sk
->sk_receive_queue
);
478 WARN_ON(atomic_read(&sk
->sk_wmem_alloc
));
479 WARN_ON(!sk_unhashed(sk
));
480 WARN_ON(sk
->sk_socket
);
481 if (!sock_flag(sk
, SOCK_DEAD
)) {
482 pr_info("Attempt to release alive unix socket: %p\n", sk
);
487 unix_release_addr(u
->addr
);
489 atomic_long_dec(&unix_nr_socks
);
491 sock_prot_inuse_add(sock_net(sk
), sk
->sk_prot
, -1);
493 #ifdef UNIX_REFCNT_DEBUG
494 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk
,
495 atomic_long_read(&unix_nr_socks
));
499 static void unix_release_sock(struct sock
*sk
, int embrion
)
501 struct unix_sock
*u
= unix_sk(sk
);
507 unix_remove_socket(sk
);
512 sk
->sk_shutdown
= SHUTDOWN_MASK
;
514 u
->path
.dentry
= NULL
;
516 state
= sk
->sk_state
;
517 sk
->sk_state
= TCP_CLOSE
;
518 unix_state_unlock(sk
);
520 wake_up_interruptible_all(&u
->peer_wait
);
522 skpair
= unix_peer(sk
);
524 if (skpair
!= NULL
) {
525 if (sk
->sk_type
== SOCK_STREAM
|| sk
->sk_type
== SOCK_SEQPACKET
) {
526 unix_state_lock(skpair
);
528 skpair
->sk_shutdown
= SHUTDOWN_MASK
;
529 if (!skb_queue_empty(&sk
->sk_receive_queue
) || embrion
)
530 skpair
->sk_err
= ECONNRESET
;
531 unix_state_unlock(skpair
);
532 skpair
->sk_state_change(skpair
);
533 sk_wake_async(skpair
, SOCK_WAKE_WAITD
, POLL_HUP
);
536 unix_dgram_peer_wake_disconnect(sk
, skpair
);
537 sock_put(skpair
); /* It may now die */
538 unix_peer(sk
) = NULL
;
541 /* Try to flush out this socket. Throw out buffers at least */
543 while ((skb
= skb_dequeue(&sk
->sk_receive_queue
)) != NULL
) {
544 if (state
== TCP_LISTEN
)
545 unix_release_sock(skb
->sk
, 1);
546 /* passed fds are erased in the kfree_skb hook */
555 /* ---- Socket is dead now and most probably destroyed ---- */
558 * Fixme: BSD difference: In BSD all sockets connected to us get
559 * ECONNRESET and we die on the spot. In Linux we behave
560 * like files and pipes do and wait for the last
563 * Can't we simply set sock->err?
565 * What the above comment does talk about? --ANK(980817)
568 if (unix_tot_inflight
)
569 unix_gc(); /* Garbage collect fds */
572 static void init_peercred(struct sock
*sk
)
574 put_pid(sk
->sk_peer_pid
);
575 if (sk
->sk_peer_cred
)
576 put_cred(sk
->sk_peer_cred
);
577 sk
->sk_peer_pid
= get_pid(task_tgid(current
));
578 sk
->sk_peer_cred
= get_current_cred();
581 static void copy_peercred(struct sock
*sk
, struct sock
*peersk
)
583 put_pid(sk
->sk_peer_pid
);
584 if (sk
->sk_peer_cred
)
585 put_cred(sk
->sk_peer_cred
);
586 sk
->sk_peer_pid
= get_pid(peersk
->sk_peer_pid
);
587 sk
->sk_peer_cred
= get_cred(peersk
->sk_peer_cred
);
590 static int unix_listen(struct socket
*sock
, int backlog
)
593 struct sock
*sk
= sock
->sk
;
594 struct unix_sock
*u
= unix_sk(sk
);
595 struct pid
*old_pid
= NULL
;
598 if (sock
->type
!= SOCK_STREAM
&& sock
->type
!= SOCK_SEQPACKET
)
599 goto out
; /* Only stream/seqpacket sockets accept */
602 goto out
; /* No listens on an unbound socket */
604 if (sk
->sk_state
!= TCP_CLOSE
&& sk
->sk_state
!= TCP_LISTEN
)
606 if (backlog
> sk
->sk_max_ack_backlog
)
607 wake_up_interruptible_all(&u
->peer_wait
);
608 sk
->sk_max_ack_backlog
= backlog
;
609 sk
->sk_state
= TCP_LISTEN
;
610 /* set credentials so connect can copy them */
615 unix_state_unlock(sk
);
621 static int unix_release(struct socket
*);
622 static int unix_bind(struct socket
*, struct sockaddr
*, int);
623 static int unix_stream_connect(struct socket
*, struct sockaddr
*,
624 int addr_len
, int flags
);
625 static int unix_socketpair(struct socket
*, struct socket
*);
626 static int unix_accept(struct socket
*, struct socket
*, int);
627 static int unix_getname(struct socket
*, struct sockaddr
*, int *, int);
628 static unsigned int unix_poll(struct file
*, struct socket
*, poll_table
*);
629 static unsigned int unix_dgram_poll(struct file
*, struct socket
*,
631 static int unix_ioctl(struct socket
*, unsigned int, unsigned long);
632 static int unix_shutdown(struct socket
*, int);
633 static int unix_stream_sendmsg(struct kiocb
*, struct socket
*,
634 struct msghdr
*, size_t);
635 static int unix_stream_recvmsg(struct kiocb
*, struct socket
*,
636 struct msghdr
*, size_t, int);
637 static int unix_dgram_sendmsg(struct kiocb
*, struct socket
*,
638 struct msghdr
*, size_t);
639 static int unix_dgram_recvmsg(struct kiocb
*, struct socket
*,
640 struct msghdr
*, size_t, int);
641 static int unix_dgram_connect(struct socket
*, struct sockaddr
*,
643 static int unix_seqpacket_sendmsg(struct kiocb
*, struct socket
*,
644 struct msghdr
*, size_t);
645 static int unix_seqpacket_recvmsg(struct kiocb
*, struct socket
*,
646 struct msghdr
*, size_t, int);
648 static int unix_set_peek_off(struct sock
*sk
, int val
)
650 struct unix_sock
*u
= unix_sk(sk
);
652 if (mutex_lock_interruptible(&u
->readlock
))
655 sk
->sk_peek_off
= val
;
656 mutex_unlock(&u
->readlock
);
662 static const struct proto_ops unix_stream_ops
= {
664 .owner
= THIS_MODULE
,
665 .release
= unix_release
,
667 .connect
= unix_stream_connect
,
668 .socketpair
= unix_socketpair
,
669 .accept
= unix_accept
,
670 .getname
= unix_getname
,
673 .listen
= unix_listen
,
674 .shutdown
= unix_shutdown
,
675 .setsockopt
= sock_no_setsockopt
,
676 .getsockopt
= sock_no_getsockopt
,
677 .sendmsg
= unix_stream_sendmsg
,
678 .recvmsg
= unix_stream_recvmsg
,
679 .mmap
= sock_no_mmap
,
680 .sendpage
= sock_no_sendpage
,
681 .set_peek_off
= unix_set_peek_off
,
684 static const struct proto_ops unix_dgram_ops
= {
686 .owner
= THIS_MODULE
,
687 .release
= unix_release
,
689 .connect
= unix_dgram_connect
,
690 .socketpair
= unix_socketpair
,
691 .accept
= sock_no_accept
,
692 .getname
= unix_getname
,
693 .poll
= unix_dgram_poll
,
695 .listen
= sock_no_listen
,
696 .shutdown
= unix_shutdown
,
697 .setsockopt
= sock_no_setsockopt
,
698 .getsockopt
= sock_no_getsockopt
,
699 .sendmsg
= unix_dgram_sendmsg
,
700 .recvmsg
= unix_dgram_recvmsg
,
701 .mmap
= sock_no_mmap
,
702 .sendpage
= sock_no_sendpage
,
703 .set_peek_off
= unix_set_peek_off
,
706 static const struct proto_ops unix_seqpacket_ops
= {
708 .owner
= THIS_MODULE
,
709 .release
= unix_release
,
711 .connect
= unix_stream_connect
,
712 .socketpair
= unix_socketpair
,
713 .accept
= unix_accept
,
714 .getname
= unix_getname
,
715 .poll
= unix_dgram_poll
,
717 .listen
= unix_listen
,
718 .shutdown
= unix_shutdown
,
719 .setsockopt
= sock_no_setsockopt
,
720 .getsockopt
= sock_no_getsockopt
,
721 .sendmsg
= unix_seqpacket_sendmsg
,
722 .recvmsg
= unix_seqpacket_recvmsg
,
723 .mmap
= sock_no_mmap
,
724 .sendpage
= sock_no_sendpage
,
725 .set_peek_off
= unix_set_peek_off
,
728 static struct proto unix_proto
= {
730 .owner
= THIS_MODULE
,
731 .obj_size
= sizeof(struct unix_sock
),
735 * AF_UNIX sockets do not interact with hardware, hence they
736 * dont trigger interrupts - so it's safe for them to have
737 * bh-unsafe locking for their sk_receive_queue.lock. Split off
738 * this special lock-class by reinitializing the spinlock key:
740 static struct lock_class_key af_unix_sk_receive_queue_lock_key
;
742 static struct sock
*unix_create1(struct net
*net
, struct socket
*sock
)
744 struct sock
*sk
= NULL
;
747 atomic_long_inc(&unix_nr_socks
);
748 if (atomic_long_read(&unix_nr_socks
) > 2 * get_max_files())
751 sk
= sk_alloc(net
, PF_UNIX
, GFP_KERNEL
, &unix_proto
);
755 sock_init_data(sock
, sk
);
756 lockdep_set_class(&sk
->sk_receive_queue
.lock
,
757 &af_unix_sk_receive_queue_lock_key
);
759 sk
->sk_write_space
= unix_write_space
;
760 sk
->sk_max_ack_backlog
= net
->unx
.sysctl_max_dgram_qlen
;
761 sk
->sk_destruct
= unix_sock_destructor
;
763 u
->path
.dentry
= NULL
;
765 spin_lock_init(&u
->lock
);
766 atomic_long_set(&u
->inflight
, 0);
767 INIT_LIST_HEAD(&u
->link
);
768 mutex_init(&u
->readlock
); /* single task reading lock */
769 init_waitqueue_head(&u
->peer_wait
);
770 init_waitqueue_func_entry(&u
->peer_wake
, unix_dgram_peer_wake_relay
);
771 unix_insert_socket(unix_sockets_unbound(sk
), sk
);
774 atomic_long_dec(&unix_nr_socks
);
777 sock_prot_inuse_add(sock_net(sk
), sk
->sk_prot
, 1);
783 static int unix_create(struct net
*net
, struct socket
*sock
, int protocol
,
786 if (protocol
&& protocol
!= PF_UNIX
)
787 return -EPROTONOSUPPORT
;
789 sock
->state
= SS_UNCONNECTED
;
791 switch (sock
->type
) {
793 sock
->ops
= &unix_stream_ops
;
796 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
800 sock
->type
= SOCK_DGRAM
;
802 sock
->ops
= &unix_dgram_ops
;
805 sock
->ops
= &unix_seqpacket_ops
;
808 return -ESOCKTNOSUPPORT
;
811 return unix_create1(net
, sock
) ? 0 : -ENOMEM
;
814 static int unix_release(struct socket
*sock
)
816 struct sock
*sk
= sock
->sk
;
821 unix_release_sock(sk
, 0);
827 static int unix_autobind(struct socket
*sock
)
829 struct sock
*sk
= sock
->sk
;
830 struct net
*net
= sock_net(sk
);
831 struct unix_sock
*u
= unix_sk(sk
);
832 static u32 ordernum
= 1;
833 struct unix_address
*addr
;
835 unsigned int retries
= 0;
837 err
= mutex_lock_interruptible(&u
->readlock
);
846 addr
= kzalloc(sizeof(*addr
) + sizeof(short) + 16, GFP_KERNEL
);
850 addr
->name
->sun_family
= AF_UNIX
;
851 atomic_set(&addr
->refcnt
, 1);
854 addr
->len
= sprintf(addr
->name
->sun_path
+1, "%05x", ordernum
) + 1 + sizeof(short);
855 addr
->hash
= unix_hash_fold(csum_partial(addr
->name
, addr
->len
, 0));
857 spin_lock(&unix_table_lock
);
858 ordernum
= (ordernum
+1)&0xFFFFF;
860 if (__unix_find_socket_byname(net
, addr
->name
, addr
->len
, sock
->type
,
862 spin_unlock(&unix_table_lock
);
864 * __unix_find_socket_byname() may take long time if many names
865 * are already in use.
868 /* Give up if all names seems to be in use. */
869 if (retries
++ == 0xFFFFF) {
876 addr
->hash
^= sk
->sk_type
;
878 __unix_remove_socket(sk
);
880 __unix_insert_socket(&unix_socket_table
[addr
->hash
], sk
);
881 spin_unlock(&unix_table_lock
);
884 out
: mutex_unlock(&u
->readlock
);
888 static struct sock
*unix_find_other(struct net
*net
,
889 struct sockaddr_un
*sunname
, int len
,
890 int type
, unsigned int hash
, int *error
)
896 if (sunname
->sun_path
[0]) {
898 err
= kern_path(sunname
->sun_path
, LOOKUP_FOLLOW
, &path
);
901 inode
= d_backing_inode(path
.dentry
);
902 err
= inode_permission(inode
, MAY_WRITE
);
907 if (!S_ISSOCK(inode
->i_mode
))
909 u
= unix_find_socket_byinode(inode
);
913 if (u
->sk_type
== type
)
919 if (u
->sk_type
!= type
) {
925 u
= unix_find_socket_byname(net
, sunname
, len
, type
, hash
);
927 struct dentry
*dentry
;
928 dentry
= unix_sk(u
)->path
.dentry
;
930 touch_atime(&unix_sk(u
)->path
);
943 static int unix_mknod(const char *sun_path
, umode_t mode
, struct path
*res
)
945 struct dentry
*dentry
;
949 * Get the parent directory, calculate the hash for last
952 dentry
= kern_path_create(AT_FDCWD
, sun_path
, &path
, 0);
953 err
= PTR_ERR(dentry
);
958 * All right, let's create it.
960 err
= security_path_mknod(&path
, dentry
, mode
, 0);
962 err
= vfs_mknod(d_inode(path
.dentry
), dentry
, mode
, 0);
964 res
->mnt
= mntget(path
.mnt
);
965 res
->dentry
= dget(dentry
);
968 done_path_create(&path
, dentry
);
972 static int unix_bind(struct socket
*sock
, struct sockaddr
*uaddr
, int addr_len
)
974 struct sock
*sk
= sock
->sk
;
975 struct net
*net
= sock_net(sk
);
976 struct unix_sock
*u
= unix_sk(sk
);
977 struct sockaddr_un
*sunaddr
= (struct sockaddr_un
*)uaddr
;
978 char *sun_path
= sunaddr
->sun_path
;
981 struct unix_address
*addr
;
982 struct hlist_head
*list
;
985 if (sunaddr
->sun_family
!= AF_UNIX
)
988 if (addr_len
== sizeof(short)) {
989 err
= unix_autobind(sock
);
993 err
= unix_mkname(sunaddr
, addr_len
, &hash
);
998 err
= mutex_lock_interruptible(&u
->readlock
);
1007 addr
= kmalloc(sizeof(*addr
)+addr_len
, GFP_KERNEL
);
1011 memcpy(addr
->name
, sunaddr
, addr_len
);
1012 addr
->len
= addr_len
;
1013 addr
->hash
= hash
^ sk
->sk_type
;
1014 atomic_set(&addr
->refcnt
, 1);
1018 umode_t mode
= S_IFSOCK
|
1019 (SOCK_INODE(sock
)->i_mode
& ~current_umask());
1020 err
= unix_mknod(sun_path
, mode
, &path
);
1024 unix_release_addr(addr
);
1027 addr
->hash
= UNIX_HASH_SIZE
;
1028 hash
= d_backing_inode(path
.dentry
)->i_ino
& (UNIX_HASH_SIZE
-1);
1029 spin_lock(&unix_table_lock
);
1031 list
= &unix_socket_table
[hash
];
1033 spin_lock(&unix_table_lock
);
1035 if (__unix_find_socket_byname(net
, sunaddr
, addr_len
,
1036 sk
->sk_type
, hash
)) {
1037 unix_release_addr(addr
);
1041 list
= &unix_socket_table
[addr
->hash
];
1045 __unix_remove_socket(sk
);
1047 __unix_insert_socket(list
, sk
);
1050 spin_unlock(&unix_table_lock
);
1052 mutex_unlock(&u
->readlock
);
1057 static void unix_state_double_lock(struct sock
*sk1
, struct sock
*sk2
)
1059 if (unlikely(sk1
== sk2
) || !sk2
) {
1060 unix_state_lock(sk1
);
1064 unix_state_lock(sk1
);
1065 unix_state_lock_nested(sk2
);
1067 unix_state_lock(sk2
);
1068 unix_state_lock_nested(sk1
);
1072 static void unix_state_double_unlock(struct sock
*sk1
, struct sock
*sk2
)
1074 if (unlikely(sk1
== sk2
) || !sk2
) {
1075 unix_state_unlock(sk1
);
1078 unix_state_unlock(sk1
);
1079 unix_state_unlock(sk2
);
1082 static int unix_dgram_connect(struct socket
*sock
, struct sockaddr
*addr
,
1083 int alen
, int flags
)
1085 struct sock
*sk
= sock
->sk
;
1086 struct net
*net
= sock_net(sk
);
1087 struct sockaddr_un
*sunaddr
= (struct sockaddr_un
*)addr
;
1092 if (addr
->sa_family
!= AF_UNSPEC
) {
1093 err
= unix_mkname(sunaddr
, alen
, &hash
);
1098 if (test_bit(SOCK_PASSCRED
, &sock
->flags
) &&
1099 !unix_sk(sk
)->addr
&& (err
= unix_autobind(sock
)) != 0)
1103 other
= unix_find_other(net
, sunaddr
, alen
, sock
->type
, hash
, &err
);
1107 unix_state_double_lock(sk
, other
);
1109 /* Apparently VFS overslept socket death. Retry. */
1110 if (sock_flag(other
, SOCK_DEAD
)) {
1111 unix_state_double_unlock(sk
, other
);
1117 if (!unix_may_send(sk
, other
))
1120 err
= security_unix_may_send(sk
->sk_socket
, other
->sk_socket
);
1126 * 1003.1g breaking connected state with AF_UNSPEC
1129 unix_state_double_lock(sk
, other
);
1133 * If it was connected, reconnect.
1135 if (unix_peer(sk
)) {
1136 struct sock
*old_peer
= unix_peer(sk
);
1137 unix_peer(sk
) = other
;
1138 unix_dgram_peer_wake_disconnect_wakeup(sk
, old_peer
);
1140 unix_state_double_unlock(sk
, other
);
1142 if (other
!= old_peer
)
1143 unix_dgram_disconnected(sk
, old_peer
);
1146 unix_peer(sk
) = other
;
1147 unix_state_double_unlock(sk
, other
);
1152 unix_state_double_unlock(sk
, other
);
1158 static long unix_wait_for_peer(struct sock
*other
, long timeo
)
1160 struct unix_sock
*u
= unix_sk(other
);
1164 prepare_to_wait_exclusive(&u
->peer_wait
, &wait
, TASK_INTERRUPTIBLE
);
1166 sched
= !sock_flag(other
, SOCK_DEAD
) &&
1167 !(other
->sk_shutdown
& RCV_SHUTDOWN
) &&
1168 unix_recvq_full(other
);
1170 unix_state_unlock(other
);
1173 timeo
= schedule_timeout(timeo
);
1175 finish_wait(&u
->peer_wait
, &wait
);
1179 static int unix_stream_connect(struct socket
*sock
, struct sockaddr
*uaddr
,
1180 int addr_len
, int flags
)
1182 struct sockaddr_un
*sunaddr
= (struct sockaddr_un
*)uaddr
;
1183 struct sock
*sk
= sock
->sk
;
1184 struct net
*net
= sock_net(sk
);
1185 struct unix_sock
*u
= unix_sk(sk
), *newu
, *otheru
;
1186 struct sock
*newsk
= NULL
;
1187 struct sock
*other
= NULL
;
1188 struct sk_buff
*skb
= NULL
;
1194 err
= unix_mkname(sunaddr
, addr_len
, &hash
);
1199 if (test_bit(SOCK_PASSCRED
, &sock
->flags
) && !u
->addr
&&
1200 (err
= unix_autobind(sock
)) != 0)
1203 timeo
= sock_sndtimeo(sk
, flags
& O_NONBLOCK
);
1205 /* First of all allocate resources.
1206 If we will make it after state is locked,
1207 we will have to recheck all again in any case.
1212 /* create new sock for complete connection */
1213 newsk
= unix_create1(sock_net(sk
), NULL
);
1217 /* Allocate skb for sending to listening sock */
1218 skb
= sock_wmalloc(newsk
, 1, 0, GFP_KERNEL
);
1223 /* Find listening sock. */
1224 other
= unix_find_other(net
, sunaddr
, addr_len
, sk
->sk_type
, hash
, &err
);
1228 /* Latch state of peer */
1229 unix_state_lock(other
);
1231 /* Apparently VFS overslept socket death. Retry. */
1232 if (sock_flag(other
, SOCK_DEAD
)) {
1233 unix_state_unlock(other
);
1238 err
= -ECONNREFUSED
;
1239 if (other
->sk_state
!= TCP_LISTEN
)
1241 if (other
->sk_shutdown
& RCV_SHUTDOWN
)
1244 if (unix_recvq_full(other
)) {
1249 timeo
= unix_wait_for_peer(other
, timeo
);
1251 err
= sock_intr_errno(timeo
);
1252 if (signal_pending(current
))
1260 It is tricky place. We need to grab our state lock and cannot
1261 drop lock on peer. It is dangerous because deadlock is
1262 possible. Connect to self case and simultaneous
1263 attempt to connect are eliminated by checking socket
1264 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1265 check this before attempt to grab lock.
1267 Well, and we have to recheck the state after socket locked.
1273 /* This is ok... continue with connect */
1275 case TCP_ESTABLISHED
:
1276 /* Socket is already connected */
1284 unix_state_lock_nested(sk
);
1286 if (sk
->sk_state
!= st
) {
1287 unix_state_unlock(sk
);
1288 unix_state_unlock(other
);
1293 err
= security_unix_stream_connect(sk
, other
, newsk
);
1295 unix_state_unlock(sk
);
1299 /* The way is open! Fastly set all the necessary fields... */
1302 unix_peer(newsk
) = sk
;
1303 newsk
->sk_state
= TCP_ESTABLISHED
;
1304 newsk
->sk_type
= sk
->sk_type
;
1305 init_peercred(newsk
);
1306 newu
= unix_sk(newsk
);
1307 RCU_INIT_POINTER(newsk
->sk_wq
, &newu
->peer_wq
);
1308 otheru
= unix_sk(other
);
1310 /* copy address information from listening to new sock*/
1312 atomic_inc(&otheru
->addr
->refcnt
);
1313 newu
->addr
= otheru
->addr
;
1315 if (otheru
->path
.dentry
) {
1316 path_get(&otheru
->path
);
1317 newu
->path
= otheru
->path
;
1320 /* Set credentials */
1321 copy_peercred(sk
, other
);
1323 sock
->state
= SS_CONNECTED
;
1324 sk
->sk_state
= TCP_ESTABLISHED
;
1327 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1328 unix_peer(sk
) = newsk
;
1330 unix_state_unlock(sk
);
1332 /* take ten and and send info to listening sock */
1333 spin_lock(&other
->sk_receive_queue
.lock
);
1334 __skb_queue_tail(&other
->sk_receive_queue
, skb
);
1335 spin_unlock(&other
->sk_receive_queue
.lock
);
1336 unix_state_unlock(other
);
1337 other
->sk_data_ready(other
);
1343 unix_state_unlock(other
);
1348 unix_release_sock(newsk
, 0);
1354 static int unix_socketpair(struct socket
*socka
, struct socket
*sockb
)
1356 struct sock
*ska
= socka
->sk
, *skb
= sockb
->sk
;
1358 /* Join our sockets back to back */
1361 unix_peer(ska
) = skb
;
1362 unix_peer(skb
) = ska
;
1366 if (ska
->sk_type
!= SOCK_DGRAM
) {
1367 ska
->sk_state
= TCP_ESTABLISHED
;
1368 skb
->sk_state
= TCP_ESTABLISHED
;
1369 socka
->state
= SS_CONNECTED
;
1370 sockb
->state
= SS_CONNECTED
;
1375 static void unix_sock_inherit_flags(const struct socket
*old
,
1378 if (test_bit(SOCK_PASSCRED
, &old
->flags
))
1379 set_bit(SOCK_PASSCRED
, &new->flags
);
1380 if (test_bit(SOCK_PASSSEC
, &old
->flags
))
1381 set_bit(SOCK_PASSSEC
, &new->flags
);
1384 static int unix_accept(struct socket
*sock
, struct socket
*newsock
, int flags
)
1386 struct sock
*sk
= sock
->sk
;
1388 struct sk_buff
*skb
;
1392 if (sock
->type
!= SOCK_STREAM
&& sock
->type
!= SOCK_SEQPACKET
)
1396 if (sk
->sk_state
!= TCP_LISTEN
)
1399 /* If socket state is TCP_LISTEN it cannot change (for now...),
1400 * so that no locks are necessary.
1403 skb
= skb_recv_datagram(sk
, 0, flags
&O_NONBLOCK
, &err
);
1405 /* This means receive shutdown. */
1412 skb_free_datagram(sk
, skb
);
1413 wake_up_interruptible(&unix_sk(sk
)->peer_wait
);
1415 /* attach accepted sock to socket */
1416 unix_state_lock(tsk
);
1417 newsock
->state
= SS_CONNECTED
;
1418 unix_sock_inherit_flags(sock
, newsock
);
1419 sock_graft(tsk
, newsock
);
1420 unix_state_unlock(tsk
);
1428 static int unix_getname(struct socket
*sock
, struct sockaddr
*uaddr
, int *uaddr_len
, int peer
)
1430 struct sock
*sk
= sock
->sk
;
1431 struct unix_sock
*u
;
1432 DECLARE_SOCKADDR(struct sockaddr_un
*, sunaddr
, uaddr
);
1436 sk
= unix_peer_get(sk
);
1447 unix_state_lock(sk
);
1449 sunaddr
->sun_family
= AF_UNIX
;
1450 sunaddr
->sun_path
[0] = 0;
1451 *uaddr_len
= sizeof(short);
1453 struct unix_address
*addr
= u
->addr
;
1455 *uaddr_len
= addr
->len
;
1456 memcpy(sunaddr
, addr
->name
, *uaddr_len
);
1458 unix_state_unlock(sk
);
1464 static void unix_detach_fds(struct scm_cookie
*scm
, struct sk_buff
*skb
)
1468 scm
->fp
= UNIXCB(skb
).fp
;
1469 UNIXCB(skb
).fp
= NULL
;
1471 for (i
= scm
->fp
->count
-1; i
>= 0; i
--)
1472 unix_notinflight(scm
->fp
->fp
[i
]);
1475 static void unix_destruct_scm(struct sk_buff
*skb
)
1477 struct scm_cookie scm
;
1478 memset(&scm
, 0, sizeof(scm
));
1479 scm
.pid
= UNIXCB(skb
).pid
;
1481 unix_detach_fds(&scm
, skb
);
1483 /* Alas, it calls VFS */
1484 /* So fscking what? fput() had been SMP-safe since the last Summer */
1490 * The "user->unix_inflight" variable is protected by the garbage
1491 * collection lock, and we just read it locklessly here. If you go
1492 * over the limit, there might be a tiny race in actually noticing
1493 * it across threads. Tough.
1495 static inline bool too_many_unix_fds(struct task_struct
*p
)
1497 struct user_struct
*user
= current_user();
1499 if (unlikely(user
->unix_inflight
> task_rlimit(p
, RLIMIT_NOFILE
)))
1500 return !capable(CAP_SYS_RESOURCE
) && !capable(CAP_SYS_ADMIN
);
1504 #define MAX_RECURSION_LEVEL 4
1506 static int unix_attach_fds(struct scm_cookie
*scm
, struct sk_buff
*skb
)
1509 unsigned char max_level
= 0;
1510 int unix_sock_count
= 0;
1512 if (too_many_unix_fds(current
))
1513 return -ETOOMANYREFS
;
1515 for (i
= scm
->fp
->count
- 1; i
>= 0; i
--) {
1516 struct sock
*sk
= unix_get_socket(scm
->fp
->fp
[i
]);
1520 max_level
= max(max_level
,
1521 unix_sk(sk
)->recursion_level
);
1524 if (unlikely(max_level
> MAX_RECURSION_LEVEL
))
1525 return -ETOOMANYREFS
;
1528 * Need to duplicate file references for the sake of garbage
1529 * collection. Otherwise a socket in the fps might become a
1530 * candidate for GC while the skb is not yet queued.
1532 UNIXCB(skb
).fp
= scm_fp_dup(scm
->fp
);
1533 if (!UNIXCB(skb
).fp
)
1536 for (i
= scm
->fp
->count
- 1; i
>= 0; i
--)
1537 unix_inflight(scm
->fp
->fp
[i
]);
1541 static int unix_scm_to_skb(struct scm_cookie
*scm
, struct sk_buff
*skb
, bool send_fds
)
1545 UNIXCB(skb
).pid
= get_pid(scm
->pid
);
1546 UNIXCB(skb
).uid
= scm
->creds
.uid
;
1547 UNIXCB(skb
).gid
= scm
->creds
.gid
;
1548 UNIXCB(skb
).fp
= NULL
;
1549 if (scm
->fp
&& send_fds
)
1550 err
= unix_attach_fds(scm
, skb
);
1552 skb
->destructor
= unix_destruct_scm
;
1557 * Some apps rely on write() giving SCM_CREDENTIALS
1558 * We include credentials if source or destination socket
1559 * asserted SOCK_PASSCRED.
1561 static void maybe_add_creds(struct sk_buff
*skb
, const struct socket
*sock
,
1562 const struct sock
*other
)
1564 if (UNIXCB(skb
).pid
)
1566 if (test_bit(SOCK_PASSCRED
, &sock
->flags
) ||
1567 !other
->sk_socket
||
1568 test_bit(SOCK_PASSCRED
, &other
->sk_socket
->flags
)) {
1569 UNIXCB(skb
).pid
= get_pid(task_tgid(current
));
1570 current_uid_gid(&UNIXCB(skb
).uid
, &UNIXCB(skb
).gid
);
1575 * Send AF_UNIX data.
1578 static int unix_dgram_sendmsg(struct kiocb
*kiocb
, struct socket
*sock
,
1579 struct msghdr
*msg
, size_t len
)
1581 struct sock_iocb
*siocb
= kiocb_to_siocb(kiocb
);
1582 struct sock
*sk
= sock
->sk
;
1583 struct net
*net
= sock_net(sk
);
1584 struct unix_sock
*u
= unix_sk(sk
);
1585 DECLARE_SOCKADDR(struct sockaddr_un
*, sunaddr
, msg
->msg_name
);
1586 struct sock
*other
= NULL
;
1587 int namelen
= 0; /* fake GCC */
1590 struct sk_buff
*skb
;
1592 struct scm_cookie tmp_scm
;
1597 if (NULL
== siocb
->scm
)
1598 siocb
->scm
= &tmp_scm
;
1600 err
= scm_send(sock
, msg
, siocb
->scm
, false);
1605 if (msg
->msg_flags
&MSG_OOB
)
1608 if (msg
->msg_namelen
) {
1609 err
= unix_mkname(sunaddr
, msg
->msg_namelen
, &hash
);
1616 other
= unix_peer_get(sk
);
1621 if (test_bit(SOCK_PASSCRED
, &sock
->flags
) && !u
->addr
1622 && (err
= unix_autobind(sock
)) != 0)
1626 if (len
> sk
->sk_sndbuf
- 32)
1629 if (len
> SKB_MAX_ALLOC
) {
1630 data_len
= min_t(size_t,
1631 len
- SKB_MAX_ALLOC
,
1632 MAX_SKB_FRAGS
* PAGE_SIZE
);
1633 data_len
= PAGE_ALIGN(data_len
);
1635 BUILD_BUG_ON(SKB_MAX_ALLOC
< PAGE_SIZE
);
1638 skb
= sock_alloc_send_pskb(sk
, len
- data_len
, data_len
,
1639 msg
->msg_flags
& MSG_DONTWAIT
, &err
,
1640 PAGE_ALLOC_COSTLY_ORDER
);
1644 err
= unix_scm_to_skb(siocb
->scm
, skb
, true);
1647 max_level
= err
+ 1;
1648 unix_get_secdata(siocb
->scm
, skb
);
1650 skb_put(skb
, len
- data_len
);
1651 skb
->data_len
= data_len
;
1653 err
= skb_copy_datagram_from_iovec(skb
, 0, msg
->msg_iov
, 0, len
);
1657 timeo
= sock_sndtimeo(sk
, msg
->msg_flags
& MSG_DONTWAIT
);
1662 if (sunaddr
== NULL
)
1665 other
= unix_find_other(net
, sunaddr
, namelen
, sk
->sk_type
,
1671 if (sk_filter(other
, skb
) < 0) {
1672 /* Toss the packet but do not return any error to the sender */
1678 unix_state_lock(other
);
1681 if (!unix_may_send(sk
, other
))
1684 if (unlikely(sock_flag(other
, SOCK_DEAD
))) {
1686 * Check with 1003.1g - what should
1689 unix_state_unlock(other
);
1693 unix_state_lock(sk
);
1696 if (unix_peer(sk
) == other
) {
1697 unix_peer(sk
) = NULL
;
1698 unix_dgram_peer_wake_disconnect_wakeup(sk
, other
);
1700 unix_state_unlock(sk
);
1702 unix_dgram_disconnected(sk
, other
);
1704 err
= -ECONNREFUSED
;
1706 unix_state_unlock(sk
);
1716 if (other
->sk_shutdown
& RCV_SHUTDOWN
)
1719 if (sk
->sk_type
!= SOCK_SEQPACKET
) {
1720 err
= security_unix_may_send(sk
->sk_socket
, other
->sk_socket
);
1725 /* other == sk && unix_peer(other) != sk if
1726 * - unix_peer(sk) == NULL, destination address bound to sk
1727 * - unix_peer(sk) == sk by time of get but disconnected before lock
1730 unlikely(unix_peer(other
) != sk
&& unix_recvq_full(other
))) {
1732 timeo
= unix_wait_for_peer(other
, timeo
);
1734 err
= sock_intr_errno(timeo
);
1735 if (signal_pending(current
))
1742 unix_state_unlock(other
);
1743 unix_state_double_lock(sk
, other
);
1746 if (unix_peer(sk
) != other
||
1747 unix_dgram_peer_wake_me(sk
, other
)) {
1755 goto restart_locked
;
1759 if (unlikely(sk_locked
))
1760 unix_state_unlock(sk
);
1762 if (sock_flag(other
, SOCK_RCVTSTAMP
))
1763 __net_timestamp(skb
);
1764 maybe_add_creds(skb
, sock
, other
);
1765 skb_queue_tail(&other
->sk_receive_queue
, skb
);
1766 if (max_level
> unix_sk(other
)->recursion_level
)
1767 unix_sk(other
)->recursion_level
= max_level
;
1768 unix_state_unlock(other
);
1769 other
->sk_data_ready(other
);
1771 scm_destroy(siocb
->scm
);
1776 unix_state_unlock(sk
);
1777 unix_state_unlock(other
);
1783 scm_destroy(siocb
->scm
);
1787 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1788 * bytes, and a minimun of a full page.
1790 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1792 static int unix_stream_sendmsg(struct kiocb
*kiocb
, struct socket
*sock
,
1793 struct msghdr
*msg
, size_t len
)
1795 struct sock_iocb
*siocb
= kiocb_to_siocb(kiocb
);
1796 struct sock
*sk
= sock
->sk
;
1797 struct sock
*other
= NULL
;
1799 struct sk_buff
*skb
;
1801 struct scm_cookie tmp_scm
;
1802 bool fds_sent
= false;
1806 if (NULL
== siocb
->scm
)
1807 siocb
->scm
= &tmp_scm
;
1809 err
= scm_send(sock
, msg
, siocb
->scm
, false);
1814 if (msg
->msg_flags
&MSG_OOB
)
1817 if (msg
->msg_namelen
) {
1818 err
= sk
->sk_state
== TCP_ESTABLISHED
? -EISCONN
: -EOPNOTSUPP
;
1822 other
= unix_peer(sk
);
1827 if (sk
->sk_shutdown
& SEND_SHUTDOWN
)
1830 while (sent
< len
) {
1833 /* Keep two messages in the pipe so it schedules better */
1834 size
= min_t(int, size
, (sk
->sk_sndbuf
>> 1) - 64);
1836 /* allow fallback to order-0 allocations */
1837 size
= min_t(int, size
, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ
);
1839 data_len
= max_t(int, 0, size
- SKB_MAX_HEAD(0));
1841 data_len
= min_t(size_t, size
, PAGE_ALIGN(data_len
));
1843 skb
= sock_alloc_send_pskb(sk
, size
- data_len
, data_len
,
1844 msg
->msg_flags
& MSG_DONTWAIT
, &err
,
1845 get_order(UNIX_SKB_FRAGS_SZ
));
1849 /* Only send the fds in the first buffer */
1850 err
= unix_scm_to_skb(siocb
->scm
, skb
, !fds_sent
);
1855 max_level
= err
+ 1;
1858 skb_put(skb
, size
- data_len
);
1859 skb
->data_len
= data_len
;
1861 err
= skb_copy_datagram_from_iovec(skb
, 0, msg
->msg_iov
,
1868 unix_state_lock(other
);
1870 if (sock_flag(other
, SOCK_DEAD
) ||
1871 (other
->sk_shutdown
& RCV_SHUTDOWN
))
1874 maybe_add_creds(skb
, sock
, other
);
1875 skb_queue_tail(&other
->sk_receive_queue
, skb
);
1876 if (max_level
> unix_sk(other
)->recursion_level
)
1877 unix_sk(other
)->recursion_level
= max_level
;
1878 unix_state_unlock(other
);
1879 other
->sk_data_ready(other
);
1883 scm_destroy(siocb
->scm
);
1889 unix_state_unlock(other
);
1892 if (sent
== 0 && !(msg
->msg_flags
&MSG_NOSIGNAL
))
1893 send_sig(SIGPIPE
, current
, 0);
1896 scm_destroy(siocb
->scm
);
1898 return sent
? : err
;
1901 static int unix_seqpacket_sendmsg(struct kiocb
*kiocb
, struct socket
*sock
,
1902 struct msghdr
*msg
, size_t len
)
1905 struct sock
*sk
= sock
->sk
;
1907 err
= sock_error(sk
);
1911 if (sk
->sk_state
!= TCP_ESTABLISHED
)
1914 if (msg
->msg_namelen
)
1915 msg
->msg_namelen
= 0;
1917 return unix_dgram_sendmsg(kiocb
, sock
, msg
, len
);
1920 static int unix_seqpacket_recvmsg(struct kiocb
*iocb
, struct socket
*sock
,
1921 struct msghdr
*msg
, size_t size
,
1924 struct sock
*sk
= sock
->sk
;
1926 if (sk
->sk_state
!= TCP_ESTABLISHED
)
1929 return unix_dgram_recvmsg(iocb
, sock
, msg
, size
, flags
);
1932 static void unix_copy_addr(struct msghdr
*msg
, struct sock
*sk
)
1934 struct unix_sock
*u
= unix_sk(sk
);
1937 msg
->msg_namelen
= u
->addr
->len
;
1938 memcpy(msg
->msg_name
, u
->addr
->name
, u
->addr
->len
);
1942 static int unix_dgram_recvmsg(struct kiocb
*iocb
, struct socket
*sock
,
1943 struct msghdr
*msg
, size_t size
,
1946 struct sock_iocb
*siocb
= kiocb_to_siocb(iocb
);
1947 struct scm_cookie tmp_scm
;
1948 struct sock
*sk
= sock
->sk
;
1949 struct unix_sock
*u
= unix_sk(sk
);
1950 int noblock
= flags
& MSG_DONTWAIT
;
1951 struct sk_buff
*skb
;
1959 err
= mutex_lock_interruptible(&u
->readlock
);
1960 if (unlikely(err
)) {
1961 /* recvmsg() in non blocking mode is supposed to return -EAGAIN
1962 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
1964 err
= noblock
? -EAGAIN
: -ERESTARTSYS
;
1968 skip
= sk_peek_offset(sk
, flags
);
1970 skb
= __skb_recv_datagram(sk
, flags
, &peeked
, &skip
, &err
);
1972 unix_state_lock(sk
);
1973 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1974 if (sk
->sk_type
== SOCK_SEQPACKET
&& err
== -EAGAIN
&&
1975 (sk
->sk_shutdown
& RCV_SHUTDOWN
))
1977 unix_state_unlock(sk
);
1981 wake_up_interruptible_sync_poll(&u
->peer_wait
,
1982 POLLOUT
| POLLWRNORM
| POLLWRBAND
);
1985 unix_copy_addr(msg
, skb
->sk
);
1987 if (size
> skb
->len
- skip
)
1988 size
= skb
->len
- skip
;
1989 else if (size
< skb
->len
- skip
)
1990 msg
->msg_flags
|= MSG_TRUNC
;
1992 err
= skb_copy_datagram_iovec(skb
, skip
, msg
->msg_iov
, size
);
1996 if (sock_flag(sk
, SOCK_RCVTSTAMP
))
1997 __sock_recv_timestamp(msg
, sk
, skb
);
2000 siocb
->scm
= &tmp_scm
;
2001 memset(&tmp_scm
, 0, sizeof(tmp_scm
));
2003 scm_set_cred(siocb
->scm
, UNIXCB(skb
).pid
, UNIXCB(skb
).uid
, UNIXCB(skb
).gid
);
2004 unix_set_secdata(siocb
->scm
, skb
);
2006 if (!(flags
& MSG_PEEK
)) {
2008 unix_detach_fds(siocb
->scm
, skb
);
2010 sk_peek_offset_bwd(sk
, skb
->len
);
2012 /* It is questionable: on PEEK we could:
2013 - do not return fds - good, but too simple 8)
2014 - return fds, and do not return them on read (old strategy,
2016 - clone fds (I chose it for now, it is the most universal
2019 POSIX 1003.1g does not actually define this clearly
2020 at all. POSIX 1003.1g doesn't define a lot of things
2025 sk_peek_offset_fwd(sk
, size
);
2028 siocb
->scm
->fp
= scm_fp_dup(UNIXCB(skb
).fp
);
2030 err
= (flags
& MSG_TRUNC
) ? skb
->len
- skip
: size
;
2032 scm_recv(sock
, msg
, siocb
->scm
, flags
);
2035 skb_free_datagram(sk
, skb
);
2037 mutex_unlock(&u
->readlock
);
2043 * Sleep until more data has arrived. But check for races..
2045 static long unix_stream_data_wait(struct sock
*sk
, long timeo
,
2046 struct sk_buff
*last
)
2050 unix_state_lock(sk
);
2053 prepare_to_wait(sk_sleep(sk
), &wait
, TASK_INTERRUPTIBLE
);
2055 if (skb_peek_tail(&sk
->sk_receive_queue
) != last
||
2057 (sk
->sk_shutdown
& RCV_SHUTDOWN
) ||
2058 signal_pending(current
) ||
2062 set_bit(SOCK_ASYNC_WAITDATA
, &sk
->sk_socket
->flags
);
2063 unix_state_unlock(sk
);
2064 timeo
= freezable_schedule_timeout(timeo
);
2065 unix_state_lock(sk
);
2067 if (sock_flag(sk
, SOCK_DEAD
))
2070 clear_bit(SOCK_ASYNC_WAITDATA
, &sk
->sk_socket
->flags
);
2073 finish_wait(sk_sleep(sk
), &wait
);
2074 unix_state_unlock(sk
);
2078 static unsigned int unix_skb_len(const struct sk_buff
*skb
)
2080 return skb
->len
- UNIXCB(skb
).consumed
;
2083 static int unix_stream_recvmsg(struct kiocb
*iocb
, struct socket
*sock
,
2084 struct msghdr
*msg
, size_t size
,
2087 struct sock_iocb
*siocb
= kiocb_to_siocb(iocb
);
2088 struct scm_cookie tmp_scm
;
2089 struct sock
*sk
= sock
->sk
;
2090 struct unix_sock
*u
= unix_sk(sk
);
2091 DECLARE_SOCKADDR(struct sockaddr_un
*, sunaddr
, msg
->msg_name
);
2093 int noblock
= flags
& MSG_DONTWAIT
;
2094 int check_creds
= 0;
2101 if (sk
->sk_state
!= TCP_ESTABLISHED
)
2108 target
= sock_rcvlowat(sk
, flags
&MSG_WAITALL
, size
);
2109 timeo
= sock_rcvtimeo(sk
, noblock
);
2111 /* Lock the socket to prevent queue disordering
2112 * while sleeps in memcpy_tomsg
2116 siocb
->scm
= &tmp_scm
;
2117 memset(&tmp_scm
, 0, sizeof(tmp_scm
));
2120 mutex_lock(&u
->readlock
);
2122 if (flags
& MSG_PEEK
)
2123 skip
= sk_peek_offset(sk
, flags
);
2129 struct sk_buff
*skb
, *last
;
2131 unix_state_lock(sk
);
2132 if (sock_flag(sk
, SOCK_DEAD
)) {
2136 last
= skb
= skb_peek(&sk
->sk_receive_queue
);
2139 unix_sk(sk
)->recursion_level
= 0;
2140 if (copied
>= target
)
2144 * POSIX 1003.1g mandates this order.
2147 err
= sock_error(sk
);
2150 if (sk
->sk_shutdown
& RCV_SHUTDOWN
)
2153 unix_state_unlock(sk
);
2157 mutex_unlock(&u
->readlock
);
2159 timeo
= unix_stream_data_wait(sk
, timeo
, last
);
2161 if (signal_pending(current
)) {
2162 err
= sock_intr_errno(timeo
);
2166 mutex_lock(&u
->readlock
);
2169 unix_state_unlock(sk
);
2173 while (skip
>= unix_skb_len(skb
)) {
2174 skip
-= unix_skb_len(skb
);
2176 skb
= skb_peek_next(skb
, &sk
->sk_receive_queue
);
2181 unix_state_unlock(sk
);
2184 /* Never glue messages from different writers */
2185 if ((UNIXCB(skb
).pid
!= siocb
->scm
->pid
) ||
2186 !uid_eq(UNIXCB(skb
).uid
, siocb
->scm
->creds
.uid
) ||
2187 !gid_eq(UNIXCB(skb
).gid
, siocb
->scm
->creds
.gid
))
2189 } else if (test_bit(SOCK_PASSCRED
, &sock
->flags
)) {
2190 /* Copy credentials */
2191 scm_set_cred(siocb
->scm
, UNIXCB(skb
).pid
, UNIXCB(skb
).uid
, UNIXCB(skb
).gid
);
2195 /* Copy address just once */
2197 unix_copy_addr(msg
, skb
->sk
);
2201 chunk
= min_t(unsigned int, unix_skb_len(skb
) - skip
, size
);
2202 if (skb_copy_datagram_iovec(skb
, UNIXCB(skb
).consumed
+ skip
,
2203 msg
->msg_iov
, chunk
)) {
2211 /* Mark read part of skb as used */
2212 if (!(flags
& MSG_PEEK
)) {
2213 UNIXCB(skb
).consumed
+= chunk
;
2215 sk_peek_offset_bwd(sk
, chunk
);
2218 unix_detach_fds(siocb
->scm
, skb
);
2220 if (unix_skb_len(skb
))
2223 skb_unlink(skb
, &sk
->sk_receive_queue
);
2229 /* It is questionable, see note in unix_dgram_recvmsg.
2232 siocb
->scm
->fp
= scm_fp_dup(UNIXCB(skb
).fp
);
2234 sk_peek_offset_fwd(sk
, chunk
);
2241 unix_state_lock(sk
);
2242 skb
= skb_peek_next(skb
, &sk
->sk_receive_queue
);
2245 unix_state_unlock(sk
);
2250 mutex_unlock(&u
->readlock
);
2251 scm_recv(sock
, msg
, siocb
->scm
, flags
);
2253 return copied
? : err
;
2256 static int unix_shutdown(struct socket
*sock
, int mode
)
2258 struct sock
*sk
= sock
->sk
;
2261 if (mode
< SHUT_RD
|| mode
> SHUT_RDWR
)
2264 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2265 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2266 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2270 unix_state_lock(sk
);
2271 sk
->sk_shutdown
|= mode
;
2272 other
= unix_peer(sk
);
2275 unix_state_unlock(sk
);
2276 sk
->sk_state_change(sk
);
2279 (sk
->sk_type
== SOCK_STREAM
|| sk
->sk_type
== SOCK_SEQPACKET
)) {
2283 if (mode
&RCV_SHUTDOWN
)
2284 peer_mode
|= SEND_SHUTDOWN
;
2285 if (mode
&SEND_SHUTDOWN
)
2286 peer_mode
|= RCV_SHUTDOWN
;
2287 unix_state_lock(other
);
2288 other
->sk_shutdown
|= peer_mode
;
2289 unix_state_unlock(other
);
2290 other
->sk_state_change(other
);
2291 if (peer_mode
== SHUTDOWN_MASK
)
2292 sk_wake_async(other
, SOCK_WAKE_WAITD
, POLL_HUP
);
2293 else if (peer_mode
& RCV_SHUTDOWN
)
2294 sk_wake_async(other
, SOCK_WAKE_WAITD
, POLL_IN
);
2302 long unix_inq_len(struct sock
*sk
)
2304 struct sk_buff
*skb
;
2307 if (sk
->sk_state
== TCP_LISTEN
)
2310 spin_lock(&sk
->sk_receive_queue
.lock
);
2311 if (sk
->sk_type
== SOCK_STREAM
||
2312 sk
->sk_type
== SOCK_SEQPACKET
) {
2313 skb_queue_walk(&sk
->sk_receive_queue
, skb
)
2314 amount
+= unix_skb_len(skb
);
2316 skb
= skb_peek(&sk
->sk_receive_queue
);
2320 spin_unlock(&sk
->sk_receive_queue
.lock
);
2324 EXPORT_SYMBOL_GPL(unix_inq_len
);
2326 long unix_outq_len(struct sock
*sk
)
2328 return sk_wmem_alloc_get(sk
);
2330 EXPORT_SYMBOL_GPL(unix_outq_len
);
2332 static int unix_ioctl(struct socket
*sock
, unsigned int cmd
, unsigned long arg
)
2334 struct sock
*sk
= sock
->sk
;
2340 amount
= unix_outq_len(sk
);
2341 err
= put_user(amount
, (int __user
*)arg
);
2344 amount
= unix_inq_len(sk
);
2348 err
= put_user(amount
, (int __user
*)arg
);
2357 static unsigned int unix_poll(struct file
*file
, struct socket
*sock
, poll_table
*wait
)
2359 struct sock
*sk
= sock
->sk
;
2362 sock_poll_wait(file
, sk_sleep(sk
), wait
);
2365 /* exceptional events? */
2368 if (sk
->sk_shutdown
== SHUTDOWN_MASK
)
2370 if (sk
->sk_shutdown
& RCV_SHUTDOWN
)
2371 mask
|= POLLRDHUP
| POLLIN
| POLLRDNORM
;
2374 if (!skb_queue_empty(&sk
->sk_receive_queue
))
2375 mask
|= POLLIN
| POLLRDNORM
;
2377 /* Connection-based need to check for termination and startup */
2378 if ((sk
->sk_type
== SOCK_STREAM
|| sk
->sk_type
== SOCK_SEQPACKET
) &&
2379 sk
->sk_state
== TCP_CLOSE
)
2383 * we set writable also when the other side has shut down the
2384 * connection. This prevents stuck sockets.
2386 if (unix_writable(sk
))
2387 mask
|= POLLOUT
| POLLWRNORM
| POLLWRBAND
;
2392 static unsigned int unix_dgram_poll(struct file
*file
, struct socket
*sock
,
2395 struct sock
*sk
= sock
->sk
, *other
;
2396 unsigned int mask
, writable
;
2398 sock_poll_wait(file
, sk_sleep(sk
), wait
);
2401 /* exceptional events? */
2402 if (sk
->sk_err
|| !skb_queue_empty(&sk
->sk_error_queue
))
2404 (sock_flag(sk
, SOCK_SELECT_ERR_QUEUE
) ? POLLPRI
: 0);
2406 if (sk
->sk_shutdown
& RCV_SHUTDOWN
)
2407 mask
|= POLLRDHUP
| POLLIN
| POLLRDNORM
;
2408 if (sk
->sk_shutdown
== SHUTDOWN_MASK
)
2412 if (!skb_queue_empty(&sk
->sk_receive_queue
))
2413 mask
|= POLLIN
| POLLRDNORM
;
2415 /* Connection-based need to check for termination and startup */
2416 if (sk
->sk_type
== SOCK_SEQPACKET
) {
2417 if (sk
->sk_state
== TCP_CLOSE
)
2419 /* connection hasn't started yet? */
2420 if (sk
->sk_state
== TCP_SYN_SENT
)
2424 /* No write status requested, avoid expensive OUT tests. */
2425 if (!(poll_requested_events(wait
) & (POLLWRBAND
|POLLWRNORM
|POLLOUT
)))
2428 writable
= unix_writable(sk
);
2430 unix_state_lock(sk
);
2432 other
= unix_peer(sk
);
2433 if (other
&& unix_peer(other
) != sk
&&
2434 unix_recvq_full(other
) &&
2435 unix_dgram_peer_wake_me(sk
, other
))
2438 unix_state_unlock(sk
);
2442 mask
|= POLLOUT
| POLLWRNORM
| POLLWRBAND
;
2444 set_bit(SOCK_ASYNC_NOSPACE
, &sk
->sk_socket
->flags
);
2449 #ifdef CONFIG_PROC_FS
2451 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2453 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2454 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2455 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2457 static struct sock
*unix_from_bucket(struct seq_file
*seq
, loff_t
*pos
)
2459 unsigned long offset
= get_offset(*pos
);
2460 unsigned long bucket
= get_bucket(*pos
);
2462 unsigned long count
= 0;
2464 for (sk
= sk_head(&unix_socket_table
[bucket
]); sk
; sk
= sk_next(sk
)) {
2465 if (sock_net(sk
) != seq_file_net(seq
))
2467 if (++count
== offset
)
2474 static struct sock
*unix_next_socket(struct seq_file
*seq
,
2478 unsigned long bucket
;
2480 while (sk
> (struct sock
*)SEQ_START_TOKEN
) {
2484 if (sock_net(sk
) == seq_file_net(seq
))
2489 sk
= unix_from_bucket(seq
, pos
);
2494 bucket
= get_bucket(*pos
) + 1;
2495 *pos
= set_bucket_offset(bucket
, 1);
2496 } while (bucket
< ARRAY_SIZE(unix_socket_table
));
2501 static void *unix_seq_start(struct seq_file
*seq
, loff_t
*pos
)
2502 __acquires(unix_table_lock
)
2504 spin_lock(&unix_table_lock
);
2507 return SEQ_START_TOKEN
;
2509 if (get_bucket(*pos
) >= ARRAY_SIZE(unix_socket_table
))
2512 return unix_next_socket(seq
, NULL
, pos
);
2515 static void *unix_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
2518 return unix_next_socket(seq
, v
, pos
);
2521 static void unix_seq_stop(struct seq_file
*seq
, void *v
)
2522 __releases(unix_table_lock
)
2524 spin_unlock(&unix_table_lock
);
2527 static int unix_seq_show(struct seq_file
*seq
, void *v
)
2530 if (v
== SEQ_START_TOKEN
)
2531 seq_puts(seq
, "Num RefCount Protocol Flags Type St "
2535 struct unix_sock
*u
= unix_sk(s
);
2538 seq_printf(seq
, "%pK: %08X %08X %08X %04X %02X %5lu",
2540 atomic_read(&s
->sk_refcnt
),
2542 s
->sk_state
== TCP_LISTEN
? __SO_ACCEPTCON
: 0,
2545 (s
->sk_state
== TCP_ESTABLISHED
? SS_CONNECTED
: SS_UNCONNECTED
) :
2546 (s
->sk_state
== TCP_ESTABLISHED
? SS_CONNECTING
: SS_DISCONNECTING
),
2554 len
= u
->addr
->len
- sizeof(short);
2555 if (!UNIX_ABSTRACT(s
))
2561 for ( ; i
< len
; i
++)
2562 seq_putc(seq
, u
->addr
->name
->sun_path
[i
]);
2564 unix_state_unlock(s
);
2565 seq_putc(seq
, '\n');
2571 static const struct seq_operations unix_seq_ops
= {
2572 .start
= unix_seq_start
,
2573 .next
= unix_seq_next
,
2574 .stop
= unix_seq_stop
,
2575 .show
= unix_seq_show
,
2578 static int unix_seq_open(struct inode
*inode
, struct file
*file
)
2580 return seq_open_net(inode
, file
, &unix_seq_ops
,
2581 sizeof(struct seq_net_private
));
2584 static const struct file_operations unix_seq_fops
= {
2585 .owner
= THIS_MODULE
,
2586 .open
= unix_seq_open
,
2588 .llseek
= seq_lseek
,
2589 .release
= seq_release_net
,
2594 static const struct net_proto_family unix_family_ops
= {
2596 .create
= unix_create
,
2597 .owner
= THIS_MODULE
,
2601 static int __net_init
unix_net_init(struct net
*net
)
2603 int error
= -ENOMEM
;
2605 net
->unx
.sysctl_max_dgram_qlen
= 10;
2606 if (unix_sysctl_register(net
))
2609 #ifdef CONFIG_PROC_FS
2610 if (!proc_create("unix", 0, net
->proc_net
, &unix_seq_fops
)) {
2611 unix_sysctl_unregister(net
);
2620 static void __net_exit
unix_net_exit(struct net
*net
)
2622 unix_sysctl_unregister(net
);
2623 remove_proc_entry("unix", net
->proc_net
);
2626 static struct pernet_operations unix_net_ops
= {
2627 .init
= unix_net_init
,
2628 .exit
= unix_net_exit
,
2631 static int __init
af_unix_init(void)
2635 BUILD_BUG_ON(sizeof(struct unix_skb_parms
) > FIELD_SIZEOF(struct sk_buff
, cb
));
2637 rc
= proto_register(&unix_proto
, 1);
2639 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__
);
2643 sock_register(&unix_family_ops
);
2644 register_pernet_subsys(&unix_net_ops
);
2649 static void __exit
af_unix_exit(void)
2651 sock_unregister(PF_UNIX
);
2652 proto_unregister(&unix_proto
);
2653 unregister_pernet_subsys(&unix_net_ops
);
2656 /* Earlier than device_initcall() so that other drivers invoking
2657 request_module() don't end up in a loop when modprobe tries
2658 to use a UNIX socket. But later than subsys_initcall() because
2659 we depend on stuff initialised there */
2660 fs_initcall(af_unix_init
);
2661 module_exit(af_unix_exit
);
2663 MODULE_LICENSE("GPL");
2664 MODULE_ALIAS_NETPROTO(PF_UNIX
);