2 * NET4: Implementation of BSD Unix domain sockets.
4 * Authors: Alan Cox, <alan.cox@linux.org>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
11 * Version: $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
14 * Linus Torvalds : Assorted bug cures.
15 * Niibe Yutaka : async I/O support.
16 * Carsten Paeth : PF_UNIX check, address fixes.
17 * Alan Cox : Limit size of allocated blocks.
18 * Alan Cox : Fixed the stupid socketpair bug.
19 * Alan Cox : BSD compatibility fine tuning.
20 * Alan Cox : Fixed a bug in connect when interrupted.
21 * Alan Cox : Sorted out a proper draft version of
22 * file descriptor passing hacked up from
24 * Marty Leisner : Fixes to fd passing
25 * Nick Nevin : recvmsg bugfix.
26 * Alan Cox : Started proper garbage collector
27 * Heiko EiBfeldt : Missing verify_area check
28 * Alan Cox : Started POSIXisms
29 * Andreas Schwab : Replace inode by dentry for proper
31 * Kirk Petersen : Made this a module
32 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
34 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
35 * by above two patches.
36 * Andrea Arcangeli : If possible we block in connect(2)
37 * if the max backlog of the listen socket
38 * is been reached. This won't break
39 * old apps and it will avoid huge amount
40 * of socks hashed (this for unix_gc()
41 * performances reasons).
42 * Security fix that limits the max
43 * number of socks to 2*max_files and
44 * the number of skb queueable in the
46 * Artur Skawina : Hash function optimizations
47 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
48 * Malcolm Beattie : Set peercred for socketpair
49 * Michal Ostrowski : Module initialization cleanup.
50 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
51 * the core infrastructure is doing that
52 * for all net proto families now (2.5.69+)
55 * Known differences from reference BSD that was tested:
58 * ECONNREFUSED is not returned from one end of a connected() socket to the
59 * other the moment one end closes.
60 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
61 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
63 * accept() returns a path name even if the connecting socket has closed
64 * in the meantime (BSD loses the path and gives up).
65 * accept() returns 0 length path for an unbound connector. BSD returns 16
66 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
67 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
68 * BSD af_unix apparently has connect forgetting to block properly.
69 * (need to check this with the POSIX spec in detail)
71 * Differences from 2.0.0-11-... (ANK)
72 * Bug fixes and improvements.
73 * - client shutdown killed server socket.
74 * - removed all useless cli/sti pairs.
76 * Semantic changes/extensions.
77 * - generic control message passing.
78 * - SCM_CREDENTIALS control message.
79 * - "Abstract" (not FS based) socket bindings.
80 * Abstract names are sequences of bytes (not zero terminated)
81 * started by 0, so that this name space does not intersect
85 #include <linux/module.h>
86 #include <linux/kernel.h>
87 #include <linux/signal.h>
88 #include <linux/sched.h>
89 #include <linux/errno.h>
90 #include <linux/string.h>
91 #include <linux/stat.h>
92 #include <linux/dcache.h>
93 #include <linux/namei.h>
94 #include <linux/socket.h>
96 #include <linux/fcntl.h>
97 #include <linux/termios.h>
98 #include <linux/sockios.h>
99 #include <linux/net.h>
100 #include <linux/in.h>
101 #include <linux/fs.h>
102 #include <linux/slab.h>
103 #include <asm/uaccess.h>
104 #include <linux/skbuff.h>
105 #include <linux/netdevice.h>
106 #include <net/net_namespace.h>
107 #include <net/sock.h>
108 #include <net/tcp_states.h>
109 #include <net/af_unix.h>
110 #include <linux/proc_fs.h>
111 #include <linux/seq_file.h>
113 #include <linux/init.h>
114 #include <linux/poll.h>
115 #include <linux/rtnetlink.h>
116 #include <linux/mount.h>
117 #include <net/checksum.h>
118 #include <linux/security.h>
120 static struct hlist_head unix_socket_table
[UNIX_HASH_SIZE
+ 1];
121 static DEFINE_SPINLOCK(unix_table_lock
);
122 static atomic_t unix_nr_socks
= ATOMIC_INIT(0);
124 #define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE])
126 #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
128 #ifdef CONFIG_SECURITY_NETWORK
129 static void unix_get_secdata(struct scm_cookie
*scm
, struct sk_buff
*skb
)
131 memcpy(UNIXSID(skb
), &scm
->secid
, sizeof(u32
));
134 static inline void unix_set_secdata(struct scm_cookie
*scm
, struct sk_buff
*skb
)
136 scm
->secid
= *UNIXSID(skb
);
139 static inline void unix_get_secdata(struct scm_cookie
*scm
, struct sk_buff
*skb
)
142 static inline void unix_set_secdata(struct scm_cookie
*scm
, struct sk_buff
*skb
)
144 #endif /* CONFIG_SECURITY_NETWORK */
147 * SMP locking strategy:
148 * hash table is protected with spinlock unix_table_lock
149 * each socket state is protected by separate rwlock.
152 static inline unsigned unix_hash_fold(__wsum n
)
154 unsigned hash
= (__force
unsigned)n
;
157 return hash
&(UNIX_HASH_SIZE
-1);
160 #define unix_peer(sk) (unix_sk(sk)->peer)
162 static inline int unix_our_peer(struct sock
*sk
, struct sock
*osk
)
164 return unix_peer(osk
) == sk
;
167 static inline int unix_may_send(struct sock
*sk
, struct sock
*osk
)
169 return (unix_peer(osk
) == NULL
|| unix_our_peer(sk
, osk
));
172 static struct sock
*unix_peer_get(struct sock
*s
)
180 unix_state_unlock(s
);
184 static inline void unix_release_addr(struct unix_address
*addr
)
186 if (atomic_dec_and_test(&addr
->refcnt
))
191 * Check unix socket name:
192 * - should be not zero length.
193 * - if started by not zero, should be NULL terminated (FS object)
194 * - if started by zero, it is abstract name.
197 static int unix_mkname(struct sockaddr_un
* sunaddr
, int len
, unsigned *hashp
)
199 if (len
<= sizeof(short) || len
> sizeof(*sunaddr
))
201 if (!sunaddr
|| sunaddr
->sun_family
!= AF_UNIX
)
203 if (sunaddr
->sun_path
[0]) {
205 * This may look like an off by one error but it is a bit more
206 * subtle. 108 is the longest valid AF_UNIX path for a binding.
207 * sun_path[108] doesnt as such exist. However in kernel space
208 * we are guaranteed that it is a valid memory location in our
209 * kernel address buffer.
211 ((char *)sunaddr
)[len
]=0;
212 len
= strlen(sunaddr
->sun_path
)+1+sizeof(short);
216 *hashp
= unix_hash_fold(csum_partial((char*)sunaddr
, len
, 0));
220 static void __unix_remove_socket(struct sock
*sk
)
222 sk_del_node_init(sk
);
225 static void __unix_insert_socket(struct hlist_head
*list
, struct sock
*sk
)
227 BUG_TRAP(sk_unhashed(sk
));
228 sk_add_node(sk
, list
);
231 static inline void unix_remove_socket(struct sock
*sk
)
233 spin_lock(&unix_table_lock
);
234 __unix_remove_socket(sk
);
235 spin_unlock(&unix_table_lock
);
238 static inline void unix_insert_socket(struct hlist_head
*list
, struct sock
*sk
)
240 spin_lock(&unix_table_lock
);
241 __unix_insert_socket(list
, sk
);
242 spin_unlock(&unix_table_lock
);
245 static struct sock
*__unix_find_socket_byname(struct net
*net
,
246 struct sockaddr_un
*sunname
,
247 int len
, int type
, unsigned hash
)
250 struct hlist_node
*node
;
252 sk_for_each(s
, node
, &unix_socket_table
[hash
^ type
]) {
253 struct unix_sock
*u
= unix_sk(s
);
255 if (!net_eq(sock_net(s
), net
))
258 if (u
->addr
->len
== len
&&
259 !memcmp(u
->addr
->name
, sunname
, len
))
267 static inline struct sock
*unix_find_socket_byname(struct net
*net
,
268 struct sockaddr_un
*sunname
,
274 spin_lock(&unix_table_lock
);
275 s
= __unix_find_socket_byname(net
, sunname
, len
, type
, hash
);
278 spin_unlock(&unix_table_lock
);
282 static struct sock
*unix_find_socket_byinode(struct net
*net
, struct inode
*i
)
285 struct hlist_node
*node
;
287 spin_lock(&unix_table_lock
);
289 &unix_socket_table
[i
->i_ino
& (UNIX_HASH_SIZE
- 1)]) {
290 struct dentry
*dentry
= unix_sk(s
)->dentry
;
292 if (!net_eq(sock_net(s
), net
))
295 if(dentry
&& dentry
->d_inode
== i
)
303 spin_unlock(&unix_table_lock
);
307 static inline int unix_writable(struct sock
*sk
)
309 return (atomic_read(&sk
->sk_wmem_alloc
) << 2) <= sk
->sk_sndbuf
;
312 static void unix_write_space(struct sock
*sk
)
314 read_lock(&sk
->sk_callback_lock
);
315 if (unix_writable(sk
)) {
316 if (sk
->sk_sleep
&& waitqueue_active(sk
->sk_sleep
))
317 wake_up_interruptible_sync(sk
->sk_sleep
);
318 sk_wake_async(sk
, SOCK_WAKE_SPACE
, POLL_OUT
);
320 read_unlock(&sk
->sk_callback_lock
);
323 /* When dgram socket disconnects (or changes its peer), we clear its receive
324 * queue of packets arrived from previous peer. First, it allows to do
325 * flow control based only on wmem_alloc; second, sk connected to peer
326 * may receive messages only from that peer. */
327 static void unix_dgram_disconnected(struct sock
*sk
, struct sock
*other
)
329 if (!skb_queue_empty(&sk
->sk_receive_queue
)) {
330 skb_queue_purge(&sk
->sk_receive_queue
);
331 wake_up_interruptible_all(&unix_sk(sk
)->peer_wait
);
333 /* If one link of bidirectional dgram pipe is disconnected,
334 * we signal error. Messages are lost. Do not make this,
335 * when peer was not connected to us.
337 if (!sock_flag(other
, SOCK_DEAD
) && unix_peer(other
) == sk
) {
338 other
->sk_err
= ECONNRESET
;
339 other
->sk_error_report(other
);
344 static void unix_sock_destructor(struct sock
*sk
)
346 struct unix_sock
*u
= unix_sk(sk
);
348 skb_queue_purge(&sk
->sk_receive_queue
);
350 BUG_TRAP(!atomic_read(&sk
->sk_wmem_alloc
));
351 BUG_TRAP(sk_unhashed(sk
));
352 BUG_TRAP(!sk
->sk_socket
);
353 if (!sock_flag(sk
, SOCK_DEAD
)) {
354 printk("Attempt to release alive unix socket: %p\n", sk
);
359 unix_release_addr(u
->addr
);
361 atomic_dec(&unix_nr_socks
);
362 #ifdef UNIX_REFCNT_DEBUG
363 printk(KERN_DEBUG
"UNIX %p is destroyed, %d are still alive.\n", sk
, atomic_read(&unix_nr_socks
));
367 static int unix_release_sock (struct sock
*sk
, int embrion
)
369 struct unix_sock
*u
= unix_sk(sk
);
370 struct dentry
*dentry
;
371 struct vfsmount
*mnt
;
376 unix_remove_socket(sk
);
381 sk
->sk_shutdown
= SHUTDOWN_MASK
;
386 state
= sk
->sk_state
;
387 sk
->sk_state
= TCP_CLOSE
;
388 unix_state_unlock(sk
);
390 wake_up_interruptible_all(&u
->peer_wait
);
392 skpair
=unix_peer(sk
);
395 if (sk
->sk_type
== SOCK_STREAM
|| sk
->sk_type
== SOCK_SEQPACKET
) {
396 unix_state_lock(skpair
);
398 skpair
->sk_shutdown
= SHUTDOWN_MASK
;
399 if (!skb_queue_empty(&sk
->sk_receive_queue
) || embrion
)
400 skpair
->sk_err
= ECONNRESET
;
401 unix_state_unlock(skpair
);
402 skpair
->sk_state_change(skpair
);
403 read_lock(&skpair
->sk_callback_lock
);
404 sk_wake_async(skpair
, SOCK_WAKE_WAITD
, POLL_HUP
);
405 read_unlock(&skpair
->sk_callback_lock
);
407 sock_put(skpair
); /* It may now die */
408 unix_peer(sk
) = NULL
;
411 /* Try to flush out this socket. Throw out buffers at least */
413 while ((skb
= skb_dequeue(&sk
->sk_receive_queue
)) != NULL
) {
414 if (state
==TCP_LISTEN
)
415 unix_release_sock(skb
->sk
, 1);
416 /* passed fds are erased in the kfree_skb hook */
427 /* ---- Socket is dead now and most probably destroyed ---- */
430 * Fixme: BSD difference: In BSD all sockets connected to use get
431 * ECONNRESET and we die on the spot. In Linux we behave
432 * like files and pipes do and wait for the last
435 * Can't we simply set sock->err?
437 * What the above comment does talk about? --ANK(980817)
440 if (unix_tot_inflight
)
441 unix_gc(); /* Garbage collect fds */
446 static int unix_listen(struct socket
*sock
, int backlog
)
449 struct sock
*sk
= sock
->sk
;
450 struct unix_sock
*u
= unix_sk(sk
);
453 if (sock
->type
!=SOCK_STREAM
&& sock
->type
!=SOCK_SEQPACKET
)
454 goto out
; /* Only stream/seqpacket sockets accept */
457 goto out
; /* No listens on an unbound socket */
459 if (sk
->sk_state
!= TCP_CLOSE
&& sk
->sk_state
!= TCP_LISTEN
)
461 if (backlog
> sk
->sk_max_ack_backlog
)
462 wake_up_interruptible_all(&u
->peer_wait
);
463 sk
->sk_max_ack_backlog
= backlog
;
464 sk
->sk_state
= TCP_LISTEN
;
465 /* set credentials so connect can copy them */
466 sk
->sk_peercred
.pid
= task_tgid_vnr(current
);
467 sk
->sk_peercred
.uid
= current
->euid
;
468 sk
->sk_peercred
.gid
= current
->egid
;
472 unix_state_unlock(sk
);
477 static int unix_release(struct socket
*);
478 static int unix_bind(struct socket
*, struct sockaddr
*, int);
479 static int unix_stream_connect(struct socket
*, struct sockaddr
*,
480 int addr_len
, int flags
);
481 static int unix_socketpair(struct socket
*, struct socket
*);
482 static int unix_accept(struct socket
*, struct socket
*, int);
483 static int unix_getname(struct socket
*, struct sockaddr
*, int *, int);
484 static unsigned int unix_poll(struct file
*, struct socket
*, poll_table
*);
485 static int unix_ioctl(struct socket
*, unsigned int, unsigned long);
486 static int unix_shutdown(struct socket
*, int);
487 static int unix_stream_sendmsg(struct kiocb
*, struct socket
*,
488 struct msghdr
*, size_t);
489 static int unix_stream_recvmsg(struct kiocb
*, struct socket
*,
490 struct msghdr
*, size_t, int);
491 static int unix_dgram_sendmsg(struct kiocb
*, struct socket
*,
492 struct msghdr
*, size_t);
493 static int unix_dgram_recvmsg(struct kiocb
*, struct socket
*,
494 struct msghdr
*, size_t, int);
495 static int unix_dgram_connect(struct socket
*, struct sockaddr
*,
497 static int unix_seqpacket_sendmsg(struct kiocb
*, struct socket
*,
498 struct msghdr
*, size_t);
500 static const struct proto_ops unix_stream_ops
= {
502 .owner
= THIS_MODULE
,
503 .release
= unix_release
,
505 .connect
= unix_stream_connect
,
506 .socketpair
= unix_socketpair
,
507 .accept
= unix_accept
,
508 .getname
= unix_getname
,
511 .listen
= unix_listen
,
512 .shutdown
= unix_shutdown
,
513 .setsockopt
= sock_no_setsockopt
,
514 .getsockopt
= sock_no_getsockopt
,
515 .sendmsg
= unix_stream_sendmsg
,
516 .recvmsg
= unix_stream_recvmsg
,
517 .mmap
= sock_no_mmap
,
518 .sendpage
= sock_no_sendpage
,
521 static const struct proto_ops unix_dgram_ops
= {
523 .owner
= THIS_MODULE
,
524 .release
= unix_release
,
526 .connect
= unix_dgram_connect
,
527 .socketpair
= unix_socketpair
,
528 .accept
= sock_no_accept
,
529 .getname
= unix_getname
,
530 .poll
= datagram_poll
,
532 .listen
= sock_no_listen
,
533 .shutdown
= unix_shutdown
,
534 .setsockopt
= sock_no_setsockopt
,
535 .getsockopt
= sock_no_getsockopt
,
536 .sendmsg
= unix_dgram_sendmsg
,
537 .recvmsg
= unix_dgram_recvmsg
,
538 .mmap
= sock_no_mmap
,
539 .sendpage
= sock_no_sendpage
,
542 static const struct proto_ops unix_seqpacket_ops
= {
544 .owner
= THIS_MODULE
,
545 .release
= unix_release
,
547 .connect
= unix_stream_connect
,
548 .socketpair
= unix_socketpair
,
549 .accept
= unix_accept
,
550 .getname
= unix_getname
,
551 .poll
= datagram_poll
,
553 .listen
= unix_listen
,
554 .shutdown
= unix_shutdown
,
555 .setsockopt
= sock_no_setsockopt
,
556 .getsockopt
= sock_no_getsockopt
,
557 .sendmsg
= unix_seqpacket_sendmsg
,
558 .recvmsg
= unix_dgram_recvmsg
,
559 .mmap
= sock_no_mmap
,
560 .sendpage
= sock_no_sendpage
,
563 static struct proto unix_proto
= {
565 .owner
= THIS_MODULE
,
566 .obj_size
= sizeof(struct unix_sock
),
570 * AF_UNIX sockets do not interact with hardware, hence they
571 * dont trigger interrupts - so it's safe for them to have
572 * bh-unsafe locking for their sk_receive_queue.lock. Split off
573 * this special lock-class by reinitializing the spinlock key:
575 static struct lock_class_key af_unix_sk_receive_queue_lock_key
;
577 static struct sock
* unix_create1(struct net
*net
, struct socket
*sock
)
579 struct sock
*sk
= NULL
;
582 atomic_inc(&unix_nr_socks
);
583 if (atomic_read(&unix_nr_socks
) > 2 * get_max_files())
586 sk
= sk_alloc(net
, PF_UNIX
, GFP_KERNEL
, &unix_proto
);
590 sock_init_data(sock
,sk
);
591 lockdep_set_class(&sk
->sk_receive_queue
.lock
,
592 &af_unix_sk_receive_queue_lock_key
);
594 sk
->sk_write_space
= unix_write_space
;
595 sk
->sk_max_ack_backlog
= net
->unx
.sysctl_max_dgram_qlen
;
596 sk
->sk_destruct
= unix_sock_destructor
;
600 spin_lock_init(&u
->lock
);
601 atomic_set(&u
->inflight
, 0);
602 INIT_LIST_HEAD(&u
->link
);
603 mutex_init(&u
->readlock
); /* single task reading lock */
604 init_waitqueue_head(&u
->peer_wait
);
605 unix_insert_socket(unix_sockets_unbound
, sk
);
608 atomic_dec(&unix_nr_socks
);
612 static int unix_create(struct net
*net
, struct socket
*sock
, int protocol
)
614 if (protocol
&& protocol
!= PF_UNIX
)
615 return -EPROTONOSUPPORT
;
617 sock
->state
= SS_UNCONNECTED
;
619 switch (sock
->type
) {
621 sock
->ops
= &unix_stream_ops
;
624 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
628 sock
->type
=SOCK_DGRAM
;
630 sock
->ops
= &unix_dgram_ops
;
633 sock
->ops
= &unix_seqpacket_ops
;
636 return -ESOCKTNOSUPPORT
;
639 return unix_create1(net
, sock
) ? 0 : -ENOMEM
;
642 static int unix_release(struct socket
*sock
)
644 struct sock
*sk
= sock
->sk
;
651 return unix_release_sock (sk
, 0);
654 static int unix_autobind(struct socket
*sock
)
656 struct sock
*sk
= sock
->sk
;
657 struct net
*net
= sock_net(sk
);
658 struct unix_sock
*u
= unix_sk(sk
);
659 static u32 ordernum
= 1;
660 struct unix_address
* addr
;
663 mutex_lock(&u
->readlock
);
670 addr
= kzalloc(sizeof(*addr
) + sizeof(short) + 16, GFP_KERNEL
);
674 addr
->name
->sun_family
= AF_UNIX
;
675 atomic_set(&addr
->refcnt
, 1);
678 addr
->len
= sprintf(addr
->name
->sun_path
+1, "%05x", ordernum
) + 1 + sizeof(short);
679 addr
->hash
= unix_hash_fold(csum_partial((void*)addr
->name
, addr
->len
, 0));
681 spin_lock(&unix_table_lock
);
682 ordernum
= (ordernum
+1)&0xFFFFF;
684 if (__unix_find_socket_byname(net
, addr
->name
, addr
->len
, sock
->type
,
686 spin_unlock(&unix_table_lock
);
687 /* Sanity yield. It is unusual case, but yet... */
688 if (!(ordernum
&0xFF))
692 addr
->hash
^= sk
->sk_type
;
694 __unix_remove_socket(sk
);
696 __unix_insert_socket(&unix_socket_table
[addr
->hash
], sk
);
697 spin_unlock(&unix_table_lock
);
700 out
: mutex_unlock(&u
->readlock
);
704 static struct sock
*unix_find_other(struct net
*net
,
705 struct sockaddr_un
*sunname
, int len
,
706 int type
, unsigned hash
, int *error
)
712 if (sunname
->sun_path
[0]) {
713 err
= path_lookup(sunname
->sun_path
, LOOKUP_FOLLOW
, &nd
);
716 err
= vfs_permission(&nd
, MAY_WRITE
);
721 if (!S_ISSOCK(nd
.path
.dentry
->d_inode
->i_mode
))
723 u
= unix_find_socket_byinode(net
, nd
.path
.dentry
->d_inode
);
727 if (u
->sk_type
== type
)
728 touch_atime(nd
.path
.mnt
, nd
.path
.dentry
);
733 if (u
->sk_type
!= type
) {
739 u
=unix_find_socket_byname(net
, sunname
, len
, type
, hash
);
741 struct dentry
*dentry
;
742 dentry
= unix_sk(u
)->dentry
;
744 touch_atime(unix_sk(u
)->mnt
, dentry
);
758 static int unix_bind(struct socket
*sock
, struct sockaddr
*uaddr
, int addr_len
)
760 struct sock
*sk
= sock
->sk
;
761 struct net
*net
= sock_net(sk
);
762 struct unix_sock
*u
= unix_sk(sk
);
763 struct sockaddr_un
*sunaddr
=(struct sockaddr_un
*)uaddr
;
764 struct dentry
* dentry
= NULL
;
768 struct unix_address
*addr
;
769 struct hlist_head
*list
;
772 if (sunaddr
->sun_family
!= AF_UNIX
)
775 if (addr_len
==sizeof(short)) {
776 err
= unix_autobind(sock
);
780 err
= unix_mkname(sunaddr
, addr_len
, &hash
);
785 mutex_lock(&u
->readlock
);
792 addr
= kmalloc(sizeof(*addr
)+addr_len
, GFP_KERNEL
);
796 memcpy(addr
->name
, sunaddr
, addr_len
);
797 addr
->len
= addr_len
;
798 addr
->hash
= hash
^ sk
->sk_type
;
799 atomic_set(&addr
->refcnt
, 1);
801 if (sunaddr
->sun_path
[0]) {
805 * Get the parent directory, calculate the hash for last
808 err
= path_lookup(sunaddr
->sun_path
, LOOKUP_PARENT
, &nd
);
810 goto out_mknod_parent
;
812 dentry
= lookup_create(&nd
, 0);
813 err
= PTR_ERR(dentry
);
815 goto out_mknod_unlock
;
818 * All right, let's create it.
821 (SOCK_INODE(sock
)->i_mode
& ~current
->fs
->umask
);
822 err
= mnt_want_write(nd
.path
.mnt
);
825 err
= vfs_mknod(nd
.path
.dentry
->d_inode
, dentry
, mode
, 0);
826 mnt_drop_write(nd
.path
.mnt
);
829 mutex_unlock(&nd
.path
.dentry
->d_inode
->i_mutex
);
830 dput(nd
.path
.dentry
);
831 nd
.path
.dentry
= dentry
;
833 addr
->hash
= UNIX_HASH_SIZE
;
836 spin_lock(&unix_table_lock
);
838 if (!sunaddr
->sun_path
[0]) {
840 if (__unix_find_socket_byname(net
, sunaddr
, addr_len
,
841 sk
->sk_type
, hash
)) {
842 unix_release_addr(addr
);
846 list
= &unix_socket_table
[addr
->hash
];
848 list
= &unix_socket_table
[dentry
->d_inode
->i_ino
& (UNIX_HASH_SIZE
-1)];
849 u
->dentry
= nd
.path
.dentry
;
850 u
->mnt
= nd
.path
.mnt
;
854 __unix_remove_socket(sk
);
856 __unix_insert_socket(list
, sk
);
859 spin_unlock(&unix_table_lock
);
861 mutex_unlock(&u
->readlock
);
868 mutex_unlock(&nd
.path
.dentry
->d_inode
->i_mutex
);
873 unix_release_addr(addr
);
877 static void unix_state_double_lock(struct sock
*sk1
, struct sock
*sk2
)
879 if (unlikely(sk1
== sk2
) || !sk2
) {
880 unix_state_lock(sk1
);
884 unix_state_lock(sk1
);
885 unix_state_lock_nested(sk2
);
887 unix_state_lock(sk2
);
888 unix_state_lock_nested(sk1
);
892 static void unix_state_double_unlock(struct sock
*sk1
, struct sock
*sk2
)
894 if (unlikely(sk1
== sk2
) || !sk2
) {
895 unix_state_unlock(sk1
);
898 unix_state_unlock(sk1
);
899 unix_state_unlock(sk2
);
902 static int unix_dgram_connect(struct socket
*sock
, struct sockaddr
*addr
,
905 struct sock
*sk
= sock
->sk
;
906 struct net
*net
= sock_net(sk
);
907 struct sockaddr_un
*sunaddr
=(struct sockaddr_un
*)addr
;
912 if (addr
->sa_family
!= AF_UNSPEC
) {
913 err
= unix_mkname(sunaddr
, alen
, &hash
);
918 if (test_bit(SOCK_PASSCRED
, &sock
->flags
) &&
919 !unix_sk(sk
)->addr
&& (err
= unix_autobind(sock
)) != 0)
923 other
=unix_find_other(net
, sunaddr
, alen
, sock
->type
, hash
, &err
);
927 unix_state_double_lock(sk
, other
);
929 /* Apparently VFS overslept socket death. Retry. */
930 if (sock_flag(other
, SOCK_DEAD
)) {
931 unix_state_double_unlock(sk
, other
);
937 if (!unix_may_send(sk
, other
))
940 err
= security_unix_may_send(sk
->sk_socket
, other
->sk_socket
);
946 * 1003.1g breaking connected state with AF_UNSPEC
949 unix_state_double_lock(sk
, other
);
953 * If it was connected, reconnect.
956 struct sock
*old_peer
= unix_peer(sk
);
958 unix_state_double_unlock(sk
, other
);
960 if (other
!= old_peer
)
961 unix_dgram_disconnected(sk
, old_peer
);
965 unix_state_double_unlock(sk
, other
);
970 unix_state_double_unlock(sk
, other
);
976 static long unix_wait_for_peer(struct sock
*other
, long timeo
)
978 struct unix_sock
*u
= unix_sk(other
);
982 prepare_to_wait_exclusive(&u
->peer_wait
, &wait
, TASK_INTERRUPTIBLE
);
984 sched
= !sock_flag(other
, SOCK_DEAD
) &&
985 !(other
->sk_shutdown
& RCV_SHUTDOWN
) &&
986 (skb_queue_len(&other
->sk_receive_queue
) >
987 other
->sk_max_ack_backlog
);
989 unix_state_unlock(other
);
992 timeo
= schedule_timeout(timeo
);
994 finish_wait(&u
->peer_wait
, &wait
);
998 static int unix_stream_connect(struct socket
*sock
, struct sockaddr
*uaddr
,
999 int addr_len
, int flags
)
1001 struct sockaddr_un
*sunaddr
=(struct sockaddr_un
*)uaddr
;
1002 struct sock
*sk
= sock
->sk
;
1003 struct net
*net
= sock_net(sk
);
1004 struct unix_sock
*u
= unix_sk(sk
), *newu
, *otheru
;
1005 struct sock
*newsk
= NULL
;
1006 struct sock
*other
= NULL
;
1007 struct sk_buff
*skb
= NULL
;
1013 err
= unix_mkname(sunaddr
, addr_len
, &hash
);
1018 if (test_bit(SOCK_PASSCRED
, &sock
->flags
)
1019 && !u
->addr
&& (err
= unix_autobind(sock
)) != 0)
1022 timeo
= sock_sndtimeo(sk
, flags
& O_NONBLOCK
);
1024 /* First of all allocate resources.
1025 If we will make it after state is locked,
1026 we will have to recheck all again in any case.
1031 /* create new sock for complete connection */
1032 newsk
= unix_create1(sock_net(sk
), NULL
);
1036 /* Allocate skb for sending to listening sock */
1037 skb
= sock_wmalloc(newsk
, 1, 0, GFP_KERNEL
);
1042 /* Find listening sock. */
1043 other
= unix_find_other(net
, sunaddr
, addr_len
, sk
->sk_type
, hash
, &err
);
1047 /* Latch state of peer */
1048 unix_state_lock(other
);
1050 /* Apparently VFS overslept socket death. Retry. */
1051 if (sock_flag(other
, SOCK_DEAD
)) {
1052 unix_state_unlock(other
);
1057 err
= -ECONNREFUSED
;
1058 if (other
->sk_state
!= TCP_LISTEN
)
1061 if (skb_queue_len(&other
->sk_receive_queue
) >
1062 other
->sk_max_ack_backlog
) {
1067 timeo
= unix_wait_for_peer(other
, timeo
);
1069 err
= sock_intr_errno(timeo
);
1070 if (signal_pending(current
))
1078 It is tricky place. We need to grab write lock and cannot
1079 drop lock on peer. It is dangerous because deadlock is
1080 possible. Connect to self case and simultaneous
1081 attempt to connect are eliminated by checking socket
1082 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1083 check this before attempt to grab lock.
1085 Well, and we have to recheck the state after socket locked.
1091 /* This is ok... continue with connect */
1093 case TCP_ESTABLISHED
:
1094 /* Socket is already connected */
1102 unix_state_lock_nested(sk
);
1104 if (sk
->sk_state
!= st
) {
1105 unix_state_unlock(sk
);
1106 unix_state_unlock(other
);
1111 err
= security_unix_stream_connect(sock
, other
->sk_socket
, newsk
);
1113 unix_state_unlock(sk
);
1117 /* The way is open! Fastly set all the necessary fields... */
1120 unix_peer(newsk
) = sk
;
1121 newsk
->sk_state
= TCP_ESTABLISHED
;
1122 newsk
->sk_type
= sk
->sk_type
;
1123 newsk
->sk_peercred
.pid
= task_tgid_vnr(current
);
1124 newsk
->sk_peercred
.uid
= current
->euid
;
1125 newsk
->sk_peercred
.gid
= current
->egid
;
1126 newu
= unix_sk(newsk
);
1127 newsk
->sk_sleep
= &newu
->peer_wait
;
1128 otheru
= unix_sk(other
);
1130 /* copy address information from listening to new sock*/
1132 atomic_inc(&otheru
->addr
->refcnt
);
1133 newu
->addr
= otheru
->addr
;
1135 if (otheru
->dentry
) {
1136 newu
->dentry
= dget(otheru
->dentry
);
1137 newu
->mnt
= mntget(otheru
->mnt
);
1140 /* Set credentials */
1141 sk
->sk_peercred
= other
->sk_peercred
;
1143 sock
->state
= SS_CONNECTED
;
1144 sk
->sk_state
= TCP_ESTABLISHED
;
1147 smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */
1148 unix_peer(sk
) = newsk
;
1150 unix_state_unlock(sk
);
1152 /* take ten and and send info to listening sock */
1153 spin_lock(&other
->sk_receive_queue
.lock
);
1154 __skb_queue_tail(&other
->sk_receive_queue
, skb
);
1155 spin_unlock(&other
->sk_receive_queue
.lock
);
1156 unix_state_unlock(other
);
1157 other
->sk_data_ready(other
, 0);
1163 unix_state_unlock(other
);
1169 unix_release_sock(newsk
, 0);
1175 static int unix_socketpair(struct socket
*socka
, struct socket
*sockb
)
1177 struct sock
*ska
=socka
->sk
, *skb
= sockb
->sk
;
1179 /* Join our sockets back to back */
1184 ska
->sk_peercred
.pid
= skb
->sk_peercred
.pid
= task_tgid_vnr(current
);
1185 ska
->sk_peercred
.uid
= skb
->sk_peercred
.uid
= current
->euid
;
1186 ska
->sk_peercred
.gid
= skb
->sk_peercred
.gid
= current
->egid
;
1188 if (ska
->sk_type
!= SOCK_DGRAM
) {
1189 ska
->sk_state
= TCP_ESTABLISHED
;
1190 skb
->sk_state
= TCP_ESTABLISHED
;
1191 socka
->state
= SS_CONNECTED
;
1192 sockb
->state
= SS_CONNECTED
;
1197 static int unix_accept(struct socket
*sock
, struct socket
*newsock
, int flags
)
1199 struct sock
*sk
= sock
->sk
;
1201 struct sk_buff
*skb
;
1205 if (sock
->type
!=SOCK_STREAM
&& sock
->type
!=SOCK_SEQPACKET
)
1209 if (sk
->sk_state
!= TCP_LISTEN
)
1212 /* If socket state is TCP_LISTEN it cannot change (for now...),
1213 * so that no locks are necessary.
1216 skb
= skb_recv_datagram(sk
, 0, flags
&O_NONBLOCK
, &err
);
1218 /* This means receive shutdown. */
1225 skb_free_datagram(sk
, skb
);
1226 wake_up_interruptible(&unix_sk(sk
)->peer_wait
);
1228 /* attach accepted sock to socket */
1229 unix_state_lock(tsk
);
1230 newsock
->state
= SS_CONNECTED
;
1231 sock_graft(tsk
, newsock
);
1232 unix_state_unlock(tsk
);
1240 static int unix_getname(struct socket
*sock
, struct sockaddr
*uaddr
, int *uaddr_len
, int peer
)
1242 struct sock
*sk
= sock
->sk
;
1243 struct unix_sock
*u
;
1244 struct sockaddr_un
*sunaddr
=(struct sockaddr_un
*)uaddr
;
1248 sk
= unix_peer_get(sk
);
1259 unix_state_lock(sk
);
1261 sunaddr
->sun_family
= AF_UNIX
;
1262 sunaddr
->sun_path
[0] = 0;
1263 *uaddr_len
= sizeof(short);
1265 struct unix_address
*addr
= u
->addr
;
1267 *uaddr_len
= addr
->len
;
1268 memcpy(sunaddr
, addr
->name
, *uaddr_len
);
1270 unix_state_unlock(sk
);
1276 static void unix_detach_fds(struct scm_cookie
*scm
, struct sk_buff
*skb
)
1280 scm
->fp
= UNIXCB(skb
).fp
;
1281 skb
->destructor
= sock_wfree
;
1282 UNIXCB(skb
).fp
= NULL
;
1284 for (i
=scm
->fp
->count
-1; i
>=0; i
--)
1285 unix_notinflight(scm
->fp
->fp
[i
]);
1288 static void unix_destruct_fds(struct sk_buff
*skb
)
1290 struct scm_cookie scm
;
1291 memset(&scm
, 0, sizeof(scm
));
1292 unix_detach_fds(&scm
, skb
);
1294 /* Alas, it calls VFS */
1295 /* So fscking what? fput() had been SMP-safe since the last Summer */
1300 static void unix_attach_fds(struct scm_cookie
*scm
, struct sk_buff
*skb
)
1303 for (i
=scm
->fp
->count
-1; i
>=0; i
--)
1304 unix_inflight(scm
->fp
->fp
[i
]);
1305 UNIXCB(skb
).fp
= scm
->fp
;
1306 skb
->destructor
= unix_destruct_fds
;
1311 * Send AF_UNIX data.
1314 static int unix_dgram_sendmsg(struct kiocb
*kiocb
, struct socket
*sock
,
1315 struct msghdr
*msg
, size_t len
)
1317 struct sock_iocb
*siocb
= kiocb_to_siocb(kiocb
);
1318 struct sock
*sk
= sock
->sk
;
1319 struct net
*net
= sock_net(sk
);
1320 struct unix_sock
*u
= unix_sk(sk
);
1321 struct sockaddr_un
*sunaddr
=msg
->msg_name
;
1322 struct sock
*other
= NULL
;
1323 int namelen
= 0; /* fake GCC */
1326 struct sk_buff
*skb
;
1328 struct scm_cookie tmp_scm
;
1330 if (NULL
== siocb
->scm
)
1331 siocb
->scm
= &tmp_scm
;
1332 err
= scm_send(sock
, msg
, siocb
->scm
);
1337 if (msg
->msg_flags
&MSG_OOB
)
1340 if (msg
->msg_namelen
) {
1341 err
= unix_mkname(sunaddr
, msg
->msg_namelen
, &hash
);
1348 other
= unix_peer_get(sk
);
1353 if (test_bit(SOCK_PASSCRED
, &sock
->flags
)
1354 && !u
->addr
&& (err
= unix_autobind(sock
)) != 0)
1358 if (len
> sk
->sk_sndbuf
- 32)
1361 skb
= sock_alloc_send_skb(sk
, len
, msg
->msg_flags
&MSG_DONTWAIT
, &err
);
1365 memcpy(UNIXCREDS(skb
), &siocb
->scm
->creds
, sizeof(struct ucred
));
1367 unix_attach_fds(siocb
->scm
, skb
);
1368 unix_get_secdata(siocb
->scm
, skb
);
1370 skb_reset_transport_header(skb
);
1371 err
= memcpy_fromiovec(skb_put(skb
,len
), msg
->msg_iov
, len
);
1375 timeo
= sock_sndtimeo(sk
, msg
->msg_flags
& MSG_DONTWAIT
);
1380 if (sunaddr
== NULL
)
1383 other
= unix_find_other(net
, sunaddr
, namelen
, sk
->sk_type
,
1389 unix_state_lock(other
);
1391 if (!unix_may_send(sk
, other
))
1394 if (sock_flag(other
, SOCK_DEAD
)) {
1396 * Check with 1003.1g - what should
1399 unix_state_unlock(other
);
1403 unix_state_lock(sk
);
1404 if (unix_peer(sk
) == other
) {
1406 unix_state_unlock(sk
);
1408 unix_dgram_disconnected(sk
, other
);
1410 err
= -ECONNREFUSED
;
1412 unix_state_unlock(sk
);
1422 if (other
->sk_shutdown
& RCV_SHUTDOWN
)
1425 if (sk
->sk_type
!= SOCK_SEQPACKET
) {
1426 err
= security_unix_may_send(sk
->sk_socket
, other
->sk_socket
);
1431 if (unix_peer(other
) != sk
&&
1432 (skb_queue_len(&other
->sk_receive_queue
) >
1433 other
->sk_max_ack_backlog
)) {
1439 timeo
= unix_wait_for_peer(other
, timeo
);
1441 err
= sock_intr_errno(timeo
);
1442 if (signal_pending(current
))
1448 skb_queue_tail(&other
->sk_receive_queue
, skb
);
1449 unix_state_unlock(other
);
1450 other
->sk_data_ready(other
, len
);
1452 scm_destroy(siocb
->scm
);
1456 unix_state_unlock(other
);
1462 scm_destroy(siocb
->scm
);
1467 static int unix_stream_sendmsg(struct kiocb
*kiocb
, struct socket
*sock
,
1468 struct msghdr
*msg
, size_t len
)
1470 struct sock_iocb
*siocb
= kiocb_to_siocb(kiocb
);
1471 struct sock
*sk
= sock
->sk
;
1472 struct sock
*other
= NULL
;
1473 struct sockaddr_un
*sunaddr
=msg
->msg_name
;
1475 struct sk_buff
*skb
;
1477 struct scm_cookie tmp_scm
;
1479 if (NULL
== siocb
->scm
)
1480 siocb
->scm
= &tmp_scm
;
1481 err
= scm_send(sock
, msg
, siocb
->scm
);
1486 if (msg
->msg_flags
&MSG_OOB
)
1489 if (msg
->msg_namelen
) {
1490 err
= sk
->sk_state
== TCP_ESTABLISHED
? -EISCONN
: -EOPNOTSUPP
;
1495 other
= unix_peer(sk
);
1500 if (sk
->sk_shutdown
& SEND_SHUTDOWN
)
1506 * Optimisation for the fact that under 0.01% of X
1507 * messages typically need breaking up.
1512 /* Keep two messages in the pipe so it schedules better */
1513 if (size
> ((sk
->sk_sndbuf
>> 1) - 64))
1514 size
= (sk
->sk_sndbuf
>> 1) - 64;
1516 if (size
> SKB_MAX_ALLOC
)
1517 size
= SKB_MAX_ALLOC
;
1523 skb
=sock_alloc_send_skb(sk
,size
,msg
->msg_flags
&MSG_DONTWAIT
, &err
);
1529 * If you pass two values to the sock_alloc_send_skb
1530 * it tries to grab the large buffer with GFP_NOFS
1531 * (which can fail easily), and if it fails grab the
1532 * fallback size buffer which is under a page and will
1535 size
= min_t(int, size
, skb_tailroom(skb
));
1537 memcpy(UNIXCREDS(skb
), &siocb
->scm
->creds
, sizeof(struct ucred
));
1539 unix_attach_fds(siocb
->scm
, skb
);
1541 if ((err
= memcpy_fromiovec(skb_put(skb
,size
), msg
->msg_iov
, size
)) != 0) {
1546 unix_state_lock(other
);
1548 if (sock_flag(other
, SOCK_DEAD
) ||
1549 (other
->sk_shutdown
& RCV_SHUTDOWN
))
1552 skb_queue_tail(&other
->sk_receive_queue
, skb
);
1553 unix_state_unlock(other
);
1554 other
->sk_data_ready(other
, size
);
1558 scm_destroy(siocb
->scm
);
1564 unix_state_unlock(other
);
1567 if (sent
==0 && !(msg
->msg_flags
&MSG_NOSIGNAL
))
1568 send_sig(SIGPIPE
,current
,0);
1571 scm_destroy(siocb
->scm
);
1573 return sent
? : err
;
1576 static int unix_seqpacket_sendmsg(struct kiocb
*kiocb
, struct socket
*sock
,
1577 struct msghdr
*msg
, size_t len
)
1580 struct sock
*sk
= sock
->sk
;
1582 err
= sock_error(sk
);
1586 if (sk
->sk_state
!= TCP_ESTABLISHED
)
1589 if (msg
->msg_namelen
)
1590 msg
->msg_namelen
= 0;
1592 return unix_dgram_sendmsg(kiocb
, sock
, msg
, len
);
1595 static void unix_copy_addr(struct msghdr
*msg
, struct sock
*sk
)
1597 struct unix_sock
*u
= unix_sk(sk
);
1599 msg
->msg_namelen
= 0;
1601 msg
->msg_namelen
= u
->addr
->len
;
1602 memcpy(msg
->msg_name
, u
->addr
->name
, u
->addr
->len
);
1606 static int unix_dgram_recvmsg(struct kiocb
*iocb
, struct socket
*sock
,
1607 struct msghdr
*msg
, size_t size
,
1610 struct sock_iocb
*siocb
= kiocb_to_siocb(iocb
);
1611 struct scm_cookie tmp_scm
;
1612 struct sock
*sk
= sock
->sk
;
1613 struct unix_sock
*u
= unix_sk(sk
);
1614 int noblock
= flags
& MSG_DONTWAIT
;
1615 struct sk_buff
*skb
;
1622 msg
->msg_namelen
= 0;
1624 mutex_lock(&u
->readlock
);
1626 skb
= skb_recv_datagram(sk
, flags
, noblock
, &err
);
1628 unix_state_lock(sk
);
1629 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1630 if (sk
->sk_type
== SOCK_SEQPACKET
&& err
== -EAGAIN
&&
1631 (sk
->sk_shutdown
& RCV_SHUTDOWN
))
1633 unix_state_unlock(sk
);
1637 wake_up_interruptible_sync(&u
->peer_wait
);
1640 unix_copy_addr(msg
, skb
->sk
);
1642 if (size
> skb
->len
)
1644 else if (size
< skb
->len
)
1645 msg
->msg_flags
|= MSG_TRUNC
;
1647 err
= skb_copy_datagram_iovec(skb
, 0, msg
->msg_iov
, size
);
1652 siocb
->scm
= &tmp_scm
;
1653 memset(&tmp_scm
, 0, sizeof(tmp_scm
));
1655 siocb
->scm
->creds
= *UNIXCREDS(skb
);
1656 unix_set_secdata(siocb
->scm
, skb
);
1658 if (!(flags
& MSG_PEEK
))
1661 unix_detach_fds(siocb
->scm
, skb
);
1665 /* It is questionable: on PEEK we could:
1666 - do not return fds - good, but too simple 8)
1667 - return fds, and do not return them on read (old strategy,
1669 - clone fds (I chose it for now, it is the most universal
1672 POSIX 1003.1g does not actually define this clearly
1673 at all. POSIX 1003.1g doesn't define a lot of things
1678 siocb
->scm
->fp
= scm_fp_dup(UNIXCB(skb
).fp
);
1682 scm_recv(sock
, msg
, siocb
->scm
, flags
);
1685 skb_free_datagram(sk
,skb
);
1687 mutex_unlock(&u
->readlock
);
1693 * Sleep until data has arrive. But check for races..
1696 static long unix_stream_data_wait(struct sock
* sk
, long timeo
)
1700 unix_state_lock(sk
);
1703 prepare_to_wait(sk
->sk_sleep
, &wait
, TASK_INTERRUPTIBLE
);
1705 if (!skb_queue_empty(&sk
->sk_receive_queue
) ||
1707 (sk
->sk_shutdown
& RCV_SHUTDOWN
) ||
1708 signal_pending(current
) ||
1712 set_bit(SOCK_ASYNC_WAITDATA
, &sk
->sk_socket
->flags
);
1713 unix_state_unlock(sk
);
1714 timeo
= schedule_timeout(timeo
);
1715 unix_state_lock(sk
);
1716 clear_bit(SOCK_ASYNC_WAITDATA
, &sk
->sk_socket
->flags
);
1719 finish_wait(sk
->sk_sleep
, &wait
);
1720 unix_state_unlock(sk
);
1726 static int unix_stream_recvmsg(struct kiocb
*iocb
, struct socket
*sock
,
1727 struct msghdr
*msg
, size_t size
,
1730 struct sock_iocb
*siocb
= kiocb_to_siocb(iocb
);
1731 struct scm_cookie tmp_scm
;
1732 struct sock
*sk
= sock
->sk
;
1733 struct unix_sock
*u
= unix_sk(sk
);
1734 struct sockaddr_un
*sunaddr
=msg
->msg_name
;
1736 int check_creds
= 0;
1742 if (sk
->sk_state
!= TCP_ESTABLISHED
)
1749 target
= sock_rcvlowat(sk
, flags
&MSG_WAITALL
, size
);
1750 timeo
= sock_rcvtimeo(sk
, flags
&MSG_DONTWAIT
);
1752 msg
->msg_namelen
= 0;
1754 /* Lock the socket to prevent queue disordering
1755 * while sleeps in memcpy_tomsg
1759 siocb
->scm
= &tmp_scm
;
1760 memset(&tmp_scm
, 0, sizeof(tmp_scm
));
1763 mutex_lock(&u
->readlock
);
1768 struct sk_buff
*skb
;
1770 unix_state_lock(sk
);
1771 skb
= skb_dequeue(&sk
->sk_receive_queue
);
1774 if (copied
>= target
)
1778 * POSIX 1003.1g mandates this order.
1781 if ((err
= sock_error(sk
)) != 0)
1783 if (sk
->sk_shutdown
& RCV_SHUTDOWN
)
1786 unix_state_unlock(sk
);
1790 mutex_unlock(&u
->readlock
);
1792 timeo
= unix_stream_data_wait(sk
, timeo
);
1794 if (signal_pending(current
)) {
1795 err
= sock_intr_errno(timeo
);
1798 mutex_lock(&u
->readlock
);
1801 unix_state_unlock(sk
);
1804 unix_state_unlock(sk
);
1807 /* Never glue messages from different writers */
1808 if (memcmp(UNIXCREDS(skb
), &siocb
->scm
->creds
, sizeof(siocb
->scm
->creds
)) != 0) {
1809 skb_queue_head(&sk
->sk_receive_queue
, skb
);
1813 /* Copy credentials */
1814 siocb
->scm
->creds
= *UNIXCREDS(skb
);
1818 /* Copy address just once */
1821 unix_copy_addr(msg
, skb
->sk
);
1825 chunk
= min_t(unsigned int, skb
->len
, size
);
1826 if (memcpy_toiovec(msg
->msg_iov
, skb
->data
, chunk
)) {
1827 skb_queue_head(&sk
->sk_receive_queue
, skb
);
1835 /* Mark read part of skb as used */
1836 if (!(flags
& MSG_PEEK
))
1838 skb_pull(skb
, chunk
);
1841 unix_detach_fds(siocb
->scm
, skb
);
1843 /* put the skb back if we didn't use it up.. */
1846 skb_queue_head(&sk
->sk_receive_queue
, skb
);
1857 /* It is questionable, see note in unix_dgram_recvmsg.
1860 siocb
->scm
->fp
= scm_fp_dup(UNIXCB(skb
).fp
);
1862 /* put message back and return */
1863 skb_queue_head(&sk
->sk_receive_queue
, skb
);
1868 mutex_unlock(&u
->readlock
);
1869 scm_recv(sock
, msg
, siocb
->scm
, flags
);
1871 return copied
? : err
;
1874 static int unix_shutdown(struct socket
*sock
, int mode
)
1876 struct sock
*sk
= sock
->sk
;
1879 mode
= (mode
+1)&(RCV_SHUTDOWN
|SEND_SHUTDOWN
);
1882 unix_state_lock(sk
);
1883 sk
->sk_shutdown
|= mode
;
1884 other
=unix_peer(sk
);
1887 unix_state_unlock(sk
);
1888 sk
->sk_state_change(sk
);
1891 (sk
->sk_type
== SOCK_STREAM
|| sk
->sk_type
== SOCK_SEQPACKET
)) {
1895 if (mode
&RCV_SHUTDOWN
)
1896 peer_mode
|= SEND_SHUTDOWN
;
1897 if (mode
&SEND_SHUTDOWN
)
1898 peer_mode
|= RCV_SHUTDOWN
;
1899 unix_state_lock(other
);
1900 other
->sk_shutdown
|= peer_mode
;
1901 unix_state_unlock(other
);
1902 other
->sk_state_change(other
);
1903 read_lock(&other
->sk_callback_lock
);
1904 if (peer_mode
== SHUTDOWN_MASK
)
1905 sk_wake_async(other
, SOCK_WAKE_WAITD
, POLL_HUP
);
1906 else if (peer_mode
& RCV_SHUTDOWN
)
1907 sk_wake_async(other
, SOCK_WAKE_WAITD
, POLL_IN
);
1908 read_unlock(&other
->sk_callback_lock
);
1916 static int unix_ioctl(struct socket
*sock
, unsigned int cmd
, unsigned long arg
)
1918 struct sock
*sk
= sock
->sk
;
1925 amount
= atomic_read(&sk
->sk_wmem_alloc
);
1926 err
= put_user(amount
, (int __user
*)arg
);
1930 struct sk_buff
*skb
;
1932 if (sk
->sk_state
== TCP_LISTEN
) {
1937 spin_lock(&sk
->sk_receive_queue
.lock
);
1938 if (sk
->sk_type
== SOCK_STREAM
||
1939 sk
->sk_type
== SOCK_SEQPACKET
) {
1940 skb_queue_walk(&sk
->sk_receive_queue
, skb
)
1943 skb
= skb_peek(&sk
->sk_receive_queue
);
1947 spin_unlock(&sk
->sk_receive_queue
.lock
);
1948 err
= put_user(amount
, (int __user
*)arg
);
1959 static unsigned int unix_poll(struct file
* file
, struct socket
*sock
, poll_table
*wait
)
1961 struct sock
*sk
= sock
->sk
;
1964 poll_wait(file
, sk
->sk_sleep
, wait
);
1967 /* exceptional events? */
1970 if (sk
->sk_shutdown
== SHUTDOWN_MASK
)
1972 if (sk
->sk_shutdown
& RCV_SHUTDOWN
)
1976 if (!skb_queue_empty(&sk
->sk_receive_queue
) ||
1977 (sk
->sk_shutdown
& RCV_SHUTDOWN
))
1978 mask
|= POLLIN
| POLLRDNORM
;
1980 /* Connection-based need to check for termination and startup */
1981 if ((sk
->sk_type
== SOCK_STREAM
|| sk
->sk_type
== SOCK_SEQPACKET
) && sk
->sk_state
== TCP_CLOSE
)
1985 * we set writable also when the other side has shut down the
1986 * connection. This prevents stuck sockets.
1988 if (unix_writable(sk
))
1989 mask
|= POLLOUT
| POLLWRNORM
| POLLWRBAND
;
1995 #ifdef CONFIG_PROC_FS
1996 static struct sock
*first_unix_socket(int *i
)
1998 for (*i
= 0; *i
<= UNIX_HASH_SIZE
; (*i
)++) {
1999 if (!hlist_empty(&unix_socket_table
[*i
]))
2000 return __sk_head(&unix_socket_table
[*i
]);
2005 static struct sock
*next_unix_socket(int *i
, struct sock
*s
)
2007 struct sock
*next
= sk_next(s
);
2008 /* More in this chain? */
2011 /* Look for next non-empty chain. */
2012 for ((*i
)++; *i
<= UNIX_HASH_SIZE
; (*i
)++) {
2013 if (!hlist_empty(&unix_socket_table
[*i
]))
2014 return __sk_head(&unix_socket_table
[*i
]);
2019 struct unix_iter_state
{
2020 struct seq_net_private p
;
2023 static struct sock
*unix_seq_idx(struct seq_file
*seq
, loff_t pos
)
2025 struct unix_iter_state
*iter
= seq
->private;
2029 for (s
= first_unix_socket(&iter
->i
); s
; s
= next_unix_socket(&iter
->i
, s
)) {
2030 if (sock_net(s
) != seq_file_net(seq
))
2040 static void *unix_seq_start(struct seq_file
*seq
, loff_t
*pos
)
2041 __acquires(unix_table_lock
)
2043 spin_lock(&unix_table_lock
);
2044 return *pos
? unix_seq_idx(seq
, *pos
- 1) : SEQ_START_TOKEN
;
2047 static void *unix_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
2049 struct unix_iter_state
*iter
= seq
->private;
2050 struct sock
*sk
= v
;
2053 if (v
== SEQ_START_TOKEN
)
2054 sk
= first_unix_socket(&iter
->i
);
2056 sk
= next_unix_socket(&iter
->i
, sk
);
2057 while (sk
&& (sock_net(sk
) != seq_file_net(seq
)))
2058 sk
= next_unix_socket(&iter
->i
, sk
);
2062 static void unix_seq_stop(struct seq_file
*seq
, void *v
)
2063 __releases(unix_table_lock
)
2065 spin_unlock(&unix_table_lock
);
2068 static int unix_seq_show(struct seq_file
*seq
, void *v
)
2071 if (v
== SEQ_START_TOKEN
)
2072 seq_puts(seq
, "Num RefCount Protocol Flags Type St "
2076 struct unix_sock
*u
= unix_sk(s
);
2079 seq_printf(seq
, "%p: %08X %08X %08X %04X %02X %5lu",
2081 atomic_read(&s
->sk_refcnt
),
2083 s
->sk_state
== TCP_LISTEN
? __SO_ACCEPTCON
: 0,
2086 (s
->sk_state
== TCP_ESTABLISHED
? SS_CONNECTED
: SS_UNCONNECTED
) :
2087 (s
->sk_state
== TCP_ESTABLISHED
? SS_CONNECTING
: SS_DISCONNECTING
),
2095 len
= u
->addr
->len
- sizeof(short);
2096 if (!UNIX_ABSTRACT(s
))
2102 for ( ; i
< len
; i
++)
2103 seq_putc(seq
, u
->addr
->name
->sun_path
[i
]);
2105 unix_state_unlock(s
);
2106 seq_putc(seq
, '\n');
2112 static const struct seq_operations unix_seq_ops
= {
2113 .start
= unix_seq_start
,
2114 .next
= unix_seq_next
,
2115 .stop
= unix_seq_stop
,
2116 .show
= unix_seq_show
,
2120 static int unix_seq_open(struct inode
*inode
, struct file
*file
)
2122 return seq_open_net(inode
, file
, &unix_seq_ops
,
2123 sizeof(struct unix_iter_state
));
2126 static const struct file_operations unix_seq_fops
= {
2127 .owner
= THIS_MODULE
,
2128 .open
= unix_seq_open
,
2130 .llseek
= seq_lseek
,
2131 .release
= seq_release_net
,
2136 static struct net_proto_family unix_family_ops
= {
2138 .create
= unix_create
,
2139 .owner
= THIS_MODULE
,
2143 static int unix_net_init(struct net
*net
)
2145 int error
= -ENOMEM
;
2147 net
->unx
.sysctl_max_dgram_qlen
= 10;
2148 if (unix_sysctl_register(net
))
2151 #ifdef CONFIG_PROC_FS
2152 if (!proc_net_fops_create(net
, "unix", 0, &unix_seq_fops
)) {
2153 unix_sysctl_unregister(net
);
2162 static void unix_net_exit(struct net
*net
)
2164 unix_sysctl_unregister(net
);
2165 proc_net_remove(net
, "unix");
2168 static struct pernet_operations unix_net_ops
= {
2169 .init
= unix_net_init
,
2170 .exit
= unix_net_exit
,
2173 static int __init
af_unix_init(void)
2176 struct sk_buff
*dummy_skb
;
2178 BUILD_BUG_ON(sizeof(struct unix_skb_parms
) > sizeof(dummy_skb
->cb
));
2180 rc
= proto_register(&unix_proto
, 1);
2182 printk(KERN_CRIT
"%s: Cannot create unix_sock SLAB cache!\n",
2187 sock_register(&unix_family_ops
);
2188 register_pernet_subsys(&unix_net_ops
);
2193 static void __exit
af_unix_exit(void)
2195 sock_unregister(PF_UNIX
);
2196 proto_unregister(&unix_proto
);
2197 unregister_pernet_subsys(&unix_net_ops
);
2200 /* Earlier than device_initcall() so that other drivers invoking
2201 request_module() don't end up in a loop when modprobe tries
2202 to use a UNIX socket. But later than subsys_initcall() because
2203 we depend on stuff initialised there */
2204 fs_initcall(af_unix_init
);
2205 module_exit(af_unix_exit
);
2207 MODULE_LICENSE("GPL");
2208 MODULE_ALIAS_NETPROTO(PF_UNIX
);