2 * NET3: Implementation of BSD Unix domain sockets.
4 * Authors: Alan Cox, <alan.cox@linux.org>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
11 * Version: $Id: af_unix.c,v 1.84 1999/09/08 03:47:18 davem Exp $
14 * Linus Torvalds : Assorted bug cures.
15 * Niibe Yutaka : async I/O support.
16 * Carsten Paeth : PF_UNIX check, address fixes.
17 * Alan Cox : Limit size of allocated blocks.
18 * Alan Cox : Fixed the stupid socketpair bug.
19 * Alan Cox : BSD compatibility fine tuning.
20 * Alan Cox : Fixed a bug in connect when interrupted.
21 * Alan Cox : Sorted out a proper draft version of
22 * file descriptor passing hacked up from
24 * Marty Leisner : Fixes to fd passing
25 * Nick Nevin : recvmsg bugfix.
26 * Alan Cox : Started proper garbage collector
27 * Heiko EiBfeldt : Missing verify_area check
28 * Alan Cox : Started POSIXisms
29 * Andreas Schwab : Replace inode by dentry for proper
31 * Kirk Petersen : Made this a module
32 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
34 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
35 * by above two patches.
36 * Andrea Arcangeli : If possible we block in connect(2)
37 * if the max backlog of the listen socket
38 * is been reached. This won't break
39 * old apps and it will avoid huge amount
40 * of socks hashed (this for unix_gc()
41 * performances reasons).
42 * Security fix that limits the max
43 * number of socks to 2*max_files and
44 * the number of skb queueable in the
46 * Artur Skawina : Hash function optimizations
47 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
50 * Known differences from reference BSD that was tested:
53 * ECONNREFUSED is not returned from one end of a connected() socket to the
54 * other the moment one end closes.
55 * fstat() doesn't return st_dev=NODEV, and give the blksize as high water mark
56 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
58 * accept() returns a path name even if the connecting socket has closed
59 * in the meantime (BSD loses the path and gives up).
60 * accept() returns 0 length path for an unbound connector. BSD returns 16
61 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
62 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
63 * BSD af_unix apparently has connect forgetting to block properly.
64 * (need to check this with the POSIX spec in detail)
66 * Differences from 2.0.0-11-... (ANK)
67 * Bug fixes and improvements.
68 * - client shutdown killed server socket.
69 * - removed all useless cli/sti pairs.
71 * Semantic changes/extensions.
72 * - generic control message passing.
73 * - SCM_CREDENTIALS control message.
74 * - "Abstract" (not FS based) socket bindings.
75 * Abstract names are sequences of bytes (not zero terminated)
76 * started by 0, so that this name space does not intersect
80 #include <linux/module.h>
81 #include <linux/config.h>
82 #include <linux/kernel.h>
83 #include <linux/major.h>
84 #include <linux/signal.h>
85 #include <linux/sched.h>
86 #include <linux/errno.h>
87 #include <linux/string.h>
88 #include <linux/stat.h>
89 #include <linux/socket.h>
91 #include <linux/fcntl.h>
92 #include <linux/termios.h>
93 #include <linux/sockios.h>
94 #include <linux/net.h>
97 #include <linux/malloc.h>
98 #include <asm/uaccess.h>
99 #include <linux/skbuff.h>
100 #include <linux/netdevice.h>
101 #include <net/sock.h>
103 #include <net/af_unix.h>
104 #include <linux/proc_fs.h>
106 #include <linux/init.h>
107 #include <linux/poll.h>
108 #include <linux/smp_lock.h>
110 #include <asm/checksum.h>
112 #define min(a,b) (((a)<(b))?(a):(b))
114 int sysctl_unix_max_dgram_qlen
= 10;
116 unix_socket
*unix_socket_table
[UNIX_HASH_SIZE
+1];
117 rwlock_t unix_table_lock
= RW_LOCK_UNLOCKED
;
118 static atomic_t unix_nr_socks
= ATOMIC_INIT(0);
120 #define unix_sockets_unbound (unix_socket_table[UNIX_HASH_SIZE])
122 #define UNIX_ABSTRACT(sk) ((sk)->protinfo.af_unix.addr->hash!=UNIX_HASH_SIZE)
125 SMP locking strategy.
126 * hash table is protceted with rwlock unix_table_lock
127 * each socket state is protected by separate rwlock.
131 extern __inline__
unsigned unix_hash_fold(unsigned hash
)
135 return hash
&(UNIX_HASH_SIZE
-1);
138 #define unix_peer(sk) ((sk)->pair)
140 extern __inline__
int unix_our_peer(unix_socket
*sk
, unix_socket
*osk
)
142 return unix_peer(osk
) == sk
;
145 extern __inline__
int unix_may_send(unix_socket
*sk
, unix_socket
*osk
)
147 return (unix_peer(osk
) == NULL
|| unix_our_peer(sk
, osk
));
150 static __inline__ unix_socket
* unix_peer_get(unix_socket
*s
)
158 unix_state_runlock(s
);
162 extern __inline__
void unix_release_addr(struct unix_address
*addr
)
164 if (atomic_dec_and_test(&addr
->refcnt
))
169 * Check unix socket name:
170 * - should be not zero length.
171 * - if started by not zero, should be NULL terminated (FS object)
172 * - if started by zero, it is abstract name.
175 static int unix_mkname(struct sockaddr_un
* sunaddr
, int len
, unsigned *hashp
)
177 if (len
<= sizeof(short) || len
> sizeof(*sunaddr
))
179 if (!sunaddr
|| sunaddr
->sun_family
!= AF_UNIX
)
181 if (sunaddr
->sun_path
[0])
184 * This may look like an off by one error but it is
185 * a bit more subtle. 108 is the longest valid AF_UNIX
186 * path for a binding. sun_path[108] doesnt as such
187 * exist. However in kernel space we are guaranteed that
188 * it is a valid memory location in our kernel
191 if (len
> sizeof(*sunaddr
))
192 len
= sizeof(*sunaddr
);
193 ((char *)sunaddr
)[len
]=0;
194 len
= strlen(sunaddr
->sun_path
)+1+sizeof(short);
198 *hashp
= unix_hash_fold(csum_partial((char*)sunaddr
, len
, 0));
202 static void __unix_remove_socket(unix_socket
*sk
)
204 unix_socket
**list
= sk
->protinfo
.af_unix
.list
;
207 sk
->next
->prev
= sk
->prev
;
209 sk
->prev
->next
= sk
->next
;
212 sk
->protinfo
.af_unix
.list
= NULL
;
219 static void __unix_insert_socket(unix_socket
**list
, unix_socket
*sk
)
221 BUG_TRAP(sk
->protinfo
.af_unix
.list
==NULL
);
223 sk
->protinfo
.af_unix
.list
= list
;
232 static __inline__
void unix_remove_socket(unix_socket
*sk
)
234 write_lock(&unix_table_lock
);
235 __unix_remove_socket(sk
);
236 write_unlock(&unix_table_lock
);
239 static __inline__
void unix_insert_socket(unix_socket
**list
, unix_socket
*sk
)
241 write_lock(&unix_table_lock
);
242 __unix_insert_socket(list
, sk
);
243 write_unlock(&unix_table_lock
);
246 static unix_socket
*__unix_find_socket_byname(struct sockaddr_un
*sunname
,
247 int len
, int type
, unsigned hash
)
251 for (s
=unix_socket_table
[hash
^type
]; s
; s
=s
->next
) {
252 if(s
->protinfo
.af_unix
.addr
->len
==len
&&
253 memcmp(s
->protinfo
.af_unix
.addr
->name
, sunname
, len
) == 0)
259 static __inline__ unix_socket
*
260 unix_find_socket_byname(struct sockaddr_un
*sunname
,
261 int len
, int type
, unsigned hash
)
265 read_lock(&unix_table_lock
);
266 s
= __unix_find_socket_byname(sunname
, len
, type
, hash
);
269 read_unlock(&unix_table_lock
);
273 static unix_socket
*unix_find_socket_byinode(struct inode
*i
)
277 read_lock(&unix_table_lock
);
278 for (s
=unix_socket_table
[i
->i_ino
& (UNIX_HASH_SIZE
-1)]; s
; s
=s
->next
)
280 struct dentry
*dentry
= s
->protinfo
.af_unix
.dentry
;
282 if(dentry
&& dentry
->d_inode
== i
)
288 read_unlock(&unix_table_lock
);
292 static __inline__
int unix_writable(struct sock
*sk
)
294 return ((atomic_read(&sk
->wmem_alloc
)<<2) <= sk
->sndbuf
);
297 static void unix_write_space(struct sock
*sk
)
299 read_lock(&sk
->callback_lock
);
300 if (!sk
->dead
&& unix_writable(sk
)) {
301 wake_up_interruptible(sk
->sleep
);
302 sock_wake_async(sk
->socket
, 2, POLL_OUT
);
304 read_unlock(&sk
->callback_lock
);
307 static void unix_sock_destructor(struct sock
*sk
)
309 skb_queue_purge(&sk
->receive_queue
);
311 BUG_TRAP(atomic_read(&sk
->wmem_alloc
) == 0);
312 BUG_TRAP(sk
->protinfo
.af_unix
.list
==NULL
);
313 BUG_TRAP(sk
->socket
==NULL
);
315 printk("Attempt to release alive unix socket: %p\n", sk
);
319 if (sk
->protinfo
.af_unix
.addr
)
320 unix_release_addr(sk
->protinfo
.af_unix
.addr
);
322 atomic_dec(&unix_nr_socks
);
323 #ifdef UNIX_REFCNT_DEBUG
324 printk(KERN_DEBUG
"UNIX %p is destroyed, %d are still alive.\n", sk
, atomic_read(&unix_nr_socks
));
329 static int unix_release_sock (unix_socket
*sk
, int embrion
)
331 struct dentry
*dentry
;
336 unix_remove_socket(sk
);
339 unix_state_wlock(sk
);
340 write_lock(&sk
->callback_lock
);
343 write_unlock(&sk
->callback_lock
);
344 sk
->shutdown
= SHUTDOWN_MASK
;
345 dentry
= sk
->protinfo
.af_unix
.dentry
;
346 sk
->protinfo
.af_unix
.dentry
=NULL
;
348 sk
->state
= TCP_CLOSE
;
349 unix_state_wunlock(sk
);
351 wake_up_interruptible(sk
->sleep
);
352 wake_up_interruptible(&sk
->protinfo
.af_unix
.peer_wait
);
354 skpair
=unix_peer(sk
);
357 if (sk
->type
==SOCK_STREAM
) {
358 unix_state_wlock(skpair
);
359 skpair
->shutdown
=SHUTDOWN_MASK
; /* No more writes*/
360 if (!skb_queue_empty(&sk
->receive_queue
) || embrion
)
361 skpair
->err
= ECONNRESET
;
362 unix_state_wunlock(skpair
);
363 sk
->data_ready(skpair
,0);
365 sock_put(skpair
); /* It may now die */
366 unix_peer(sk
) = NULL
;
369 /* Try to flush out this socket. Throw out buffers at least */
371 while((skb
=skb_dequeue(&sk
->receive_queue
))!=NULL
)
373 if (state
==TCP_LISTEN
)
374 unix_release_sock(skb
->sk
, 1);
375 /* passed fds are erased in the kfree_skb hook */
387 /* ---- Socket is dead now and most probably destroyed ---- */
390 * Fixme: BSD difference: In BSD all sockets connected to use get
391 * ECONNRESET and we die on the spot. In Linux we behave
392 * like files and pipes do and wait for the last
395 * Can't we simply set sock->err?
397 * What the above comment does talk about? --ANK(980817)
400 if (atomic_read(&unix_tot_inflight
))
401 unix_gc(); /* Garbage collect fds */
406 static int unix_listen(struct socket
*sock
, int backlog
)
409 struct sock
*sk
= sock
->sk
;
412 if (sock
->type
!=SOCK_STREAM
)
413 goto out
; /* Only stream sockets accept */
415 if (!sk
->protinfo
.af_unix
.addr
)
416 goto out
; /* No listens on an unbound socket */
417 unix_state_wlock(sk
);
418 if (sk
->state
!= TCP_CLOSE
&& sk
->state
!= TCP_LISTEN
)
420 if (backlog
> sk
->max_ack_backlog
)
421 wake_up_interruptible(&sk
->protinfo
.af_unix
.peer_wait
);
422 sk
->max_ack_backlog
=backlog
;
423 sk
->state
=TCP_LISTEN
;
424 sock
->flags
|= SO_ACCEPTCON
;
425 /* set credentials so connect can copy them */
426 sk
->peercred
.pid
= current
->pid
;
427 sk
->peercred
.uid
= current
->euid
;
428 sk
->peercred
.gid
= current
->egid
;
432 unix_state_wunlock(sk
);
437 extern struct proto_ops unix_stream_ops
;
438 extern struct proto_ops unix_dgram_ops
;
440 static struct sock
* unix_create1(struct socket
*sock
)
444 if (atomic_read(&unix_nr_socks
) >= 2*max_files
)
448 sk
= sk_alloc(PF_UNIX
, GFP_KERNEL
, 1);
454 atomic_inc(&unix_nr_socks
);
456 sock_init_data(sock
,sk
);
458 sk
->write_space
= unix_write_space
;
460 sk
->max_ack_backlog
= sysctl_unix_max_dgram_qlen
;
461 sk
->destruct
= unix_sock_destructor
;
462 sk
->protinfo
.af_unix
.dentry
=NULL
;
463 sk
->protinfo
.af_unix
.lock
= RW_LOCK_UNLOCKED
;
464 atomic_set(&sk
->protinfo
.af_unix
.inflight
, 0);
465 init_MUTEX(&sk
->protinfo
.af_unix
.readsem
);/* single task reading lock */
466 init_waitqueue_head(&sk
->protinfo
.af_unix
.peer_wait
);
467 sk
->protinfo
.af_unix
.list
=NULL
;
468 unix_insert_socket(&unix_sockets_unbound
, sk
);
473 static int unix_create(struct socket
*sock
, int protocol
)
475 if (protocol
&& protocol
!= PF_UNIX
)
476 return -EPROTONOSUPPORT
;
478 sock
->state
= SS_UNCONNECTED
;
480 switch (sock
->type
) {
482 sock
->ops
= &unix_stream_ops
;
485 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
489 sock
->type
=SOCK_DGRAM
;
491 sock
->ops
= &unix_dgram_ops
;
494 return -ESOCKTNOSUPPORT
;
497 return unix_create1(sock
) ? 0 : -ENOMEM
;
500 static int unix_release(struct socket
*sock
)
502 unix_socket
*sk
= sock
->sk
;
509 return unix_release_sock (sk
, 0);
512 static int unix_autobind(struct socket
*sock
)
514 struct sock
*sk
= sock
->sk
;
515 static u32 ordernum
= 1;
516 struct unix_address
* addr
;
519 down(&sk
->protinfo
.af_unix
.readsem
);
522 if (sk
->protinfo
.af_unix
.addr
)
526 addr
= kmalloc(sizeof(*addr
) + sizeof(short) + 16, GFP_KERNEL
);
530 memset(addr
, 0, sizeof(*addr
) + sizeof(short) + 16);
531 addr
->name
->sun_family
= AF_UNIX
;
532 atomic_set(&addr
->refcnt
, 1);
535 addr
->len
= sprintf(addr
->name
->sun_path
+1, "%05x", ordernum
) + 1 + sizeof(short);
536 addr
->hash
= unix_hash_fold(csum_partial((void*)addr
->name
, addr
->len
, 0));
538 write_lock(&unix_table_lock
);
539 ordernum
= (ordernum
+1)&0xFFFFF;
541 if (__unix_find_socket_byname(addr
->name
, addr
->len
, sock
->type
,
543 write_unlock(&unix_table_lock
);
544 /* Sanity yield. It is unusual case, but yet... */
545 if (!(ordernum
&0xFF)) {
546 current
->policy
|= SCHED_YIELD
;
551 addr
->hash
^= sk
->type
;
553 __unix_remove_socket(sk
);
554 sk
->protinfo
.af_unix
.addr
= addr
;
555 __unix_insert_socket(&unix_socket_table
[addr
->hash
], sk
);
556 write_unlock(&unix_table_lock
);
560 up(&sk
->protinfo
.af_unix
.readsem
);
564 static unix_socket
*unix_find_other(struct sockaddr_un
*sunname
, int len
,
565 int type
, unsigned hash
, int *error
)
569 if (sunname
->sun_path
[0])
571 struct dentry
*dentry
;
573 /* Do not believe to VFS, grab kernel lock */
575 dentry
= open_namei(sunname
->sun_path
, 2, S_IFSOCK
);
576 if (IS_ERR(dentry
)) {
577 *error
= PTR_ERR(dentry
);
581 u
=unix_find_socket_byinode(dentry
->d_inode
);
585 if (u
&& u
->type
!= type
)
593 u
=unix_find_socket_byname(sunname
, len
, type
, hash
);
597 *error
=-ECONNREFUSED
;
604 static int unix_bind(struct socket
*sock
, struct sockaddr
*uaddr
, int addr_len
)
606 struct sock
*sk
= sock
->sk
;
607 struct sockaddr_un
*sunaddr
=(struct sockaddr_un
*)uaddr
;
608 struct dentry
* dentry
= NULL
;
611 struct unix_address
*addr
;
615 if (sunaddr
->sun_family
!= AF_UNIX
)
618 if (addr_len
==sizeof(short)) {
619 err
= unix_autobind(sock
);
623 err
= unix_mkname(sunaddr
, addr_len
, &hash
);
628 down(&sk
->protinfo
.af_unix
.readsem
);
631 if (sk
->protinfo
.af_unix
.addr
)
635 addr
= kmalloc(sizeof(*addr
)+addr_len
, GFP_KERNEL
);
639 memcpy(addr
->name
, sunaddr
, addr_len
);
640 addr
->len
= addr_len
;
641 addr
->hash
= hash
^sk
->type
;
642 atomic_set(&addr
->refcnt
, 1);
644 if (sunaddr
->sun_path
[0]) {
646 dentry
= do_mknod(sunaddr
->sun_path
, S_IFSOCK
|sock
->inode
->i_mode
, 0);
647 if (IS_ERR(dentry
)) {
648 err
= PTR_ERR(dentry
);
652 unix_release_addr(addr
);
657 addr
->hash
= UNIX_HASH_SIZE
;
660 write_lock(&unix_table_lock
);
662 if (!sunaddr
->sun_path
[0]) {
664 if (__unix_find_socket_byname(sunaddr
, addr_len
,
666 unix_release_addr(addr
);
670 list
= &unix_socket_table
[addr
->hash
];
672 list
= &unix_socket_table
[dentry
->d_inode
->i_ino
& (UNIX_HASH_SIZE
-1)];
673 sk
->protinfo
.af_unix
.dentry
= dentry
;
677 __unix_remove_socket(sk
);
678 sk
->protinfo
.af_unix
.addr
= addr
;
679 __unix_insert_socket(list
, sk
);
682 write_unlock(&unix_table_lock
);
684 up(&sk
->protinfo
.af_unix
.readsem
);
689 static int unix_dgram_connect(struct socket
*sock
, struct sockaddr
*addr
,
692 struct sock
*sk
= sock
->sk
;
693 struct sockaddr_un
*sunaddr
=(struct sockaddr_un
*)addr
;
698 if (addr
->sa_family
!= AF_UNSPEC
) {
699 err
= unix_mkname(sunaddr
, alen
, &hash
);
704 if (sock
->passcred
&& !sk
->protinfo
.af_unix
.addr
&&
705 (err
= unix_autobind(sock
)) != 0)
708 other
=unix_find_other(sunaddr
, alen
, sock
->type
, hash
, &err
);
712 unix_state_wlock(sk
);
715 if (!unix_may_send(sk
, other
))
719 * 1003.1g breaking connected state with AF_UNSPEC
722 unix_state_wlock(sk
);
726 * If it was connected, reconnect.
729 sock_put(unix_peer(sk
));
733 unix_state_wunlock(sk
);
737 unix_state_wunlock(sk
);
743 static void unix_wait_for_peer(unix_socket
*other
)
746 DECLARE_WAITQUEUE(wait
, current
);
748 __set_current_state(TASK_INTERRUPTIBLE
);
749 add_wait_queue(&other
->protinfo
.af_unix
.peer_wait
, &wait
);
751 sched
= (!other
->dead
&&
752 !(other
->shutdown
&RCV_SHUTDOWN
) &&
753 !signal_pending(current
) &&
754 skb_queue_len(&other
->receive_queue
) >= other
->max_ack_backlog
);
756 unix_state_runlock(other
);
761 __set_current_state(TASK_RUNNING
);
762 remove_wait_queue(&other
->protinfo
.af_unix
.peer_wait
, &wait
);
765 static int unix_stream_connect(struct socket
*sock
, struct sockaddr
*uaddr
,
766 int addr_len
, int flags
)
768 struct sockaddr_un
*sunaddr
=(struct sockaddr_un
*)uaddr
;
769 struct sock
*sk
= sock
->sk
;
770 struct sock
*newsk
= NULL
;
771 unix_socket
*other
= NULL
;
772 struct sk_buff
*skb
= NULL
;
777 err
= unix_mkname(sunaddr
, addr_len
, &hash
);
782 if (sock
->passcred
&& !sk
->protinfo
.af_unix
.addr
&&
783 (err
= unix_autobind(sock
)) != 0)
786 /* First of all allocate resources.
787 If we will make it after state is locked,
788 we will have to recheck all again in any case.
793 /* create new sock for complete connection */
794 newsk
= unix_create1(NULL
);
798 /* Allocate skb for sending to listening sock */
799 skb
= sock_wmalloc(newsk
, 1, 0, GFP_KERNEL
);
804 /* Find listening sock. */
805 other
=unix_find_other(sunaddr
, addr_len
, sk
->type
, hash
, &err
);
809 /* Latch state of peer */
810 unix_state_rlock(other
);
812 /* Apparently VFS overslept socket death. Retry. */
814 unix_state_runlock(other
);
820 if (other
->state
!= TCP_LISTEN
)
823 if (skb_queue_len(&other
->receive_queue
) >= other
->max_ack_backlog
) {
825 if (flags
& O_NONBLOCK
)
828 unix_wait_for_peer(other
);
831 if (signal_pending(current
))
839 It is tricky place. We need to grab write lock and cannot
840 drop lock on peer. It is dangerous because deadlock is
841 possible. Connect to self case and simultaneous
842 attempt to connect are eliminated by checking socket
843 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
844 check this before attempt to grab lock.
846 Well, and we have to recheck the state after socket locked.
852 /* This is ok... continue with connect */
854 case TCP_ESTABLISHED
:
855 /* Socket is already connected */
863 unix_state_wlock(sk
);
865 if (sk
->state
!= st
) {
866 unix_state_wunlock(sk
);
867 unix_state_runlock(other
);
872 /* The way is open! Fastly set all the necessary fields... */
876 newsk
->state
=TCP_ESTABLISHED
;
877 newsk
->type
=SOCK_STREAM
;
878 newsk
->peercred
.pid
= current
->pid
;
879 newsk
->peercred
.uid
= current
->euid
;
880 newsk
->peercred
.gid
= current
->egid
;
881 newsk
->sleep
= &newsk
->protinfo
.af_unix
.peer_wait
;
883 /* copy address information from listening to new sock*/
884 if (other
->protinfo
.af_unix
.addr
)
886 atomic_inc(&other
->protinfo
.af_unix
.addr
->refcnt
);
887 newsk
->protinfo
.af_unix
.addr
=other
->protinfo
.af_unix
.addr
;
889 if (other
->protinfo
.af_unix
.dentry
) {
890 /* Damn, even dget is not SMP safe. It becomes ridiculous... */
892 newsk
->protinfo
.af_unix
.dentry
=dget(other
->protinfo
.af_unix
.dentry
);
896 /* Set credentials */
897 sk
->peercred
= other
->peercred
;
901 sock
->state
=SS_CONNECTED
;
902 sk
->state
=TCP_ESTABLISHED
;
904 unix_state_wunlock(sk
);
906 /* take ten and and send info to listening sock */
907 skb_queue_tail(&other
->receive_queue
,skb
);
908 unix_state_runlock(other
);
909 other
->data_ready(other
, 0);
915 unix_state_runlock(other
);
921 unix_release_sock(newsk
, 0);
927 static int unix_socketpair(struct socket
*socka
, struct socket
*sockb
)
929 struct sock
*ska
=socka
->sk
, *skb
= sockb
->sk
;
931 /* Join our sockets back to back */
937 if (ska
->type
!= SOCK_DGRAM
)
939 ska
->state
=TCP_ESTABLISHED
;
940 skb
->state
=TCP_ESTABLISHED
;
941 socka
->state
=SS_CONNECTED
;
942 sockb
->state
=SS_CONNECTED
;
947 static int unix_accept(struct socket
*sock
, struct socket
*newsock
, int flags
)
949 unix_socket
*sk
= sock
->sk
;
955 if (sock
->type
!=SOCK_STREAM
)
959 if (sk
->state
!=TCP_LISTEN
)
962 /* If socket state is TCP_LISTEN it cannot change,
963 so that no locks are necessary.
966 skb
= skb_recv_datagram(sk
, 0, flags
&O_NONBLOCK
, &err
);
971 if (skb_queue_len(&sk
->receive_queue
) <= sk
->max_ack_backlog
/2)
972 wake_up_interruptible(&sk
->protinfo
.af_unix
.peer_wait
);
973 skb_free_datagram(sk
, skb
);
975 /* attach accepted sock to socket */
976 unix_state_wlock(tsk
);
977 newsock
->state
= SS_CONNECTED
;
979 tsk
->sleep
= &newsock
->wait
;
980 tsk
->socket
= newsock
;
981 unix_state_wunlock(tsk
);
989 static int unix_getname(struct socket
*sock
, struct sockaddr
*uaddr
, int *uaddr_len
, int peer
)
991 struct sock
*sk
= sock
->sk
;
992 struct sockaddr_un
*sunaddr
=(struct sockaddr_un
*)uaddr
;
996 sk
= unix_peer_get(sk
);
1006 unix_state_rlock(sk
);
1007 if (!sk
->protinfo
.af_unix
.addr
) {
1008 sunaddr
->sun_family
= AF_UNIX
;
1009 sunaddr
->sun_path
[0] = 0;
1010 *uaddr_len
= sizeof(short);
1012 struct unix_address
*addr
= sk
->protinfo
.af_unix
.addr
;
1014 *uaddr_len
= addr
->len
;
1015 memcpy(sunaddr
, addr
->name
, *uaddr_len
);
1017 unix_state_runlock(sk
);
1023 static void unix_detach_fds(struct scm_cookie
*scm
, struct sk_buff
*skb
)
1027 scm
->fp
= UNIXCB(skb
).fp
;
1028 skb
->destructor
= sock_wfree
;
1029 UNIXCB(skb
).fp
= NULL
;
1031 for (i
=scm
->fp
->count
-1; i
>=0; i
--)
1032 unix_notinflight(scm
->fp
->fp
[i
]);
1035 static void unix_destruct_fds(struct sk_buff
*skb
)
1037 struct scm_cookie scm
;
1038 memset(&scm
, 0, sizeof(scm
));
1039 unix_detach_fds(&scm
, skb
);
1041 /* Alas, it calls VFS */
1048 static void unix_attach_fds(struct scm_cookie
*scm
, struct sk_buff
*skb
)
1051 for (i
=scm
->fp
->count
-1; i
>=0; i
--)
1052 unix_inflight(scm
->fp
->fp
[i
]);
1053 UNIXCB(skb
).fp
= scm
->fp
;
1054 skb
->destructor
= unix_destruct_fds
;
1059 * Send AF_UNIX data.
1062 static int unix_dgram_sendmsg(struct socket
*sock
, struct msghdr
*msg
, int len
,
1063 struct scm_cookie
*scm
)
1065 struct sock
*sk
= sock
->sk
;
1066 struct sockaddr_un
*sunaddr
=msg
->msg_name
;
1067 unix_socket
*other
= NULL
;
1068 int namelen
= 0; /* fake GCC */
1071 struct sk_buff
*skb
;
1074 if (msg
->msg_flags
&MSG_OOB
)
1078 if (msg
->msg_flags
&~(MSG_DONTWAIT
|MSG_NOSIGNAL
))
1081 if (msg
->msg_namelen
) {
1082 err
= unix_mkname(sunaddr
, msg
->msg_namelen
, &hash
);
1089 other
= unix_peer_get(sk
);
1094 if (sock
->passcred
&& !sk
->protinfo
.af_unix
.addr
&&
1095 (err
= unix_autobind(sock
)) != 0)
1098 skb
= sock_alloc_send_skb(sk
, len
, 0, msg
->msg_flags
&MSG_DONTWAIT
, &err
);
1102 memcpy(UNIXCREDS(skb
), &scm
->creds
, sizeof(struct ucred
));
1104 unix_attach_fds(scm
, skb
);
1106 skb
->h
.raw
= skb
->data
;
1107 err
= memcpy_fromiovec(skb_put(skb
,len
), msg
->msg_iov
, len
);
1114 if (sunaddr
== NULL
)
1117 other
= unix_find_other(sunaddr
, namelen
, sk
->type
, hash
, &err
);
1122 unix_state_rlock(other
);
1124 if (!unix_may_send(sk
, other
))
1129 * Check with 1003.1g - what should
1132 unix_state_runlock(other
);
1136 unix_state_wlock(sk
);
1137 if (unix_peer(sk
) == other
) {
1140 err
= -ECONNREFUSED
;
1142 unix_state_wunlock(sk
);
1151 if (other
->shutdown
&RCV_SHUTDOWN
)
1154 if (0/*other->user_callback &&
1155 other->user_callback(other->user_data, skb) == 0*/) {
1156 unix_state_runlock(other
);
1161 if (skb_queue_len(&other
->receive_queue
) >= other
->max_ack_backlog
) {
1162 if (msg
->msg_flags
& MSG_DONTWAIT
) {
1167 unix_wait_for_peer(other
);
1170 if (signal_pending(current
))
1176 skb_queue_tail(&other
->receive_queue
, skb
);
1177 unix_state_runlock(other
);
1178 other
->data_ready(other
, len
);
1183 unix_state_runlock(other
);
1193 static int unix_stream_sendmsg(struct socket
*sock
, struct msghdr
*msg
, int len
,
1194 struct scm_cookie
*scm
)
1196 struct sock
*sk
= sock
->sk
;
1197 unix_socket
*other
= NULL
;
1198 struct sockaddr_un
*sunaddr
=msg
->msg_name
;
1200 struct sk_buff
*skb
;
1205 if (msg
->msg_flags
&MSG_OOB
)
1209 if (msg
->msg_flags
&~(MSG_DONTWAIT
|MSG_NOSIGNAL
))
1212 if (msg
->msg_namelen
) {
1213 err
= (sk
->state
==TCP_ESTABLISHED
? -EISCONN
: -EOPNOTSUPP
);
1218 other
= unix_peer_get(sk
);
1223 if (sk
->shutdown
&SEND_SHUTDOWN
)
1229 * Optimisation for the fact that under 0.01% of X messages typically
1235 /* Keep two messages in the pipe so it schedules better */
1236 if (size
> sk
->sndbuf
/2 - 16)
1237 size
= sk
->sndbuf
/2 - 16;
1240 * Keep to page sized kmalloc()'s as various people
1241 * have suggested. Big mallocs stress the vm too
1246 limit
= 4096-16; /* Fall back to a page if we can't grab a big buffer this instant */
1248 limit
= 0; /* Otherwise just grab and wait */
1254 skb
=sock_alloc_send_skb(sk
,size
,limit
,msg
->msg_flags
&MSG_DONTWAIT
, &err
);
1260 * If you pass two values to the sock_alloc_send_skb
1261 * it tries to grab the large buffer with GFP_BUFFER
1262 * (which can fail easily), and if it fails grab the
1263 * fallback size buffer which is under a page and will
1266 size
= min(size
, skb_tailroom(skb
));
1268 memcpy(UNIXCREDS(skb
), &scm
->creds
, sizeof(struct ucred
));
1270 unix_attach_fds(scm
, skb
);
1272 if ((err
= memcpy_fromiovec(skb_put(skb
,size
), msg
->msg_iov
, size
)) != 0) {
1277 unix_state_rlock(other
);
1279 if (other
->dead
|| (other
->shutdown
& RCV_SHUTDOWN
))
1282 skb_queue_tail(&other
->receive_queue
, skb
);
1283 unix_state_runlock(other
);
1284 other
->data_ready(other
, size
);
1292 unix_state_runlock(other
);
1294 if (sent
==0 && !(msg
->msg_flags
&MSG_NOSIGNAL
))
1295 send_sig(SIGPIPE
,current
,0);
1300 return sent
? : err
;
1303 static void unix_copy_addr(struct msghdr
*msg
, struct sock
*sk
)
1305 msg
->msg_namelen
= sizeof(short);
1306 if (sk
->protinfo
.af_unix
.addr
) {
1307 msg
->msg_namelen
=sk
->protinfo
.af_unix
.addr
->len
;
1308 memcpy(msg
->msg_name
,
1309 sk
->protinfo
.af_unix
.addr
->name
,
1310 sk
->protinfo
.af_unix
.addr
->len
);
1314 static int unix_dgram_recvmsg(struct socket
*sock
, struct msghdr
*msg
, int size
,
1315 int flags
, struct scm_cookie
*scm
)
1317 struct sock
*sk
= sock
->sk
;
1318 int noblock
= flags
& MSG_DONTWAIT
;
1319 struct sk_buff
*skb
;
1326 msg
->msg_namelen
= 0;
1328 skb
= skb_recv_datagram(sk
, flags
, noblock
, &err
);
1332 if (skb_queue_len(&sk
->receive_queue
) <= sk
->max_ack_backlog
/2)
1333 wake_up_interruptible(&sk
->protinfo
.af_unix
.peer_wait
);
1336 unix_copy_addr(msg
, skb
->sk
);
1338 if (size
> skb
->len
)
1340 else if (size
< skb
->len
)
1341 msg
->msg_flags
|= MSG_TRUNC
;
1343 err
= skb_copy_datagram_iovec(skb
, 0, msg
->msg_iov
, size
);
1347 scm
->creds
= *UNIXCREDS(skb
);
1349 if (!(flags
& MSG_PEEK
))
1352 unix_detach_fds(scm
, skb
);
1356 /* It is questionable: on PEEK we could:
1357 - do not return fds - good, but too simple 8)
1358 - return fds, and do not return them on read (old strategy,
1360 - clone fds (I choosed it for now, it is the most universal
1363 POSIX 1003.1g does not actually define this clearly
1364 at all. POSIX 1003.1g doesn't define a lot of things
1369 scm
->fp
= scm_fp_dup(UNIXCB(skb
).fp
);
1374 skb_free_datagram(sk
,skb
);
1380 * Sleep until data has arrive. But check for races..
1383 static void unix_stream_data_wait(unix_socket
* sk
)
1385 DECLARE_WAITQUEUE(wait
, current
);
1387 unix_state_rlock(sk
);
1389 add_wait_queue(sk
->sleep
, &wait
);
1392 set_current_state(TASK_INTERRUPTIBLE
);
1394 if (skb_queue_len(&sk
->receive_queue
) ||
1396 (sk
->shutdown
& RCV_SHUTDOWN
) ||
1397 signal_pending(current
))
1400 sk
->socket
->flags
|= SO_WAITDATA
;
1401 unix_state_runlock(sk
);
1403 unix_state_rlock(sk
);
1404 sk
->socket
->flags
&= ~SO_WAITDATA
;
1407 __set_current_state(TASK_RUNNING
);
1408 remove_wait_queue(sk
->sleep
, &wait
);
1409 unix_state_runlock(sk
);
1414 static int unix_stream_recvmsg(struct socket
*sock
, struct msghdr
*msg
, int size
,
1415 int flags
, struct scm_cookie
*scm
)
1417 struct sock
*sk
= sock
->sk
;
1418 int noblock
= flags
& MSG_DONTWAIT
;
1419 struct sockaddr_un
*sunaddr
=msg
->msg_name
;
1421 int check_creds
= 0;
1426 if (sk
->state
!= TCP_ESTABLISHED
)
1433 if (flags
&MSG_WAITALL
)
1437 msg
->msg_namelen
= 0;
1439 /* Lock the socket to prevent queue disordering
1440 * while sleeps in memcpy_tomsg
1443 down(&sk
->protinfo
.af_unix
.readsem
);
1448 struct sk_buff
*skb
;
1450 skb
=skb_dequeue(&sk
->receive_queue
);
1453 if (copied
>= target
)
1457 * POSIX 1003.1g mandates this order.
1460 if ((err
= sock_error(sk
)) != 0)
1462 if (sk
->shutdown
& RCV_SHUTDOWN
)
1467 up(&sk
->protinfo
.af_unix
.readsem
);
1469 unix_stream_data_wait(sk
);
1471 if (signal_pending(current
)) {
1475 down(&sk
->protinfo
.af_unix
.readsem
);
1480 /* Never glue messages from different writers */
1481 if (memcmp(UNIXCREDS(skb
), &scm
->creds
, sizeof(scm
->creds
)) != 0) {
1482 skb_queue_head(&sk
->receive_queue
, skb
);
1486 /* Copy credentials */
1487 scm
->creds
= *UNIXCREDS(skb
);
1491 /* Copy address just once */
1494 unix_copy_addr(msg
, skb
->sk
);
1498 chunk
= min(skb
->len
, size
);
1499 if (memcpy_toiovec(msg
->msg_iov
, skb
->data
, chunk
)) {
1500 skb_queue_head(&sk
->receive_queue
, skb
);
1508 /* Mark read part of skb as used */
1509 if (!(flags
& MSG_PEEK
))
1511 skb_pull(skb
, chunk
);
1514 unix_detach_fds(scm
, skb
);
1516 /* put the skb back if we didn't use it up.. */
1519 skb_queue_head(&sk
->receive_queue
, skb
);
1530 /* It is questionable, see note in unix_dgram_recvmsg.
1533 scm
->fp
= scm_fp_dup(UNIXCB(skb
).fp
);
1535 /* put message back and return */
1536 skb_queue_head(&sk
->receive_queue
, skb
);
1541 up(&sk
->protinfo
.af_unix
.readsem
);
1543 return copied
? : err
;
1546 static int unix_shutdown(struct socket
*sock
, int mode
)
1548 struct sock
*sk
= sock
->sk
;
1551 mode
= (mode
+1)&(RCV_SHUTDOWN
|SEND_SHUTDOWN
);
1554 unix_state_wlock(sk
);
1555 sk
->shutdown
|= mode
;
1556 other
=unix_peer(sk
);
1559 unix_state_wunlock(sk
);
1560 sk
->state_change(sk
);
1562 if (other
&& sk
->type
== SOCK_STREAM
) {
1565 if (mode
&RCV_SHUTDOWN
)
1566 peer_mode
|= SEND_SHUTDOWN
;
1567 if (mode
&SEND_SHUTDOWN
)
1568 peer_mode
|= RCV_SHUTDOWN
;
1569 unix_state_wlock(other
);
1570 other
->shutdown
|= peer_mode
;
1571 unix_state_wunlock(other
);
1572 if (peer_mode
&RCV_SHUTDOWN
)
1573 other
->data_ready(other
,0);
1575 other
->state_change(other
);
1584 static int unix_ioctl(struct socket
*sock
, unsigned int cmd
, unsigned long arg
)
1586 struct sock
*sk
= sock
->sk
;
1594 amount
= sk
->sndbuf
- atomic_read(&sk
->wmem_alloc
);
1597 err
= put_user(amount
, (int *)arg
);
1601 struct sk_buff
*skb
;
1602 if (sk
->state
==TCP_LISTEN
) {
1607 spin_lock(&sk
->receive_queue
.lock
);
1608 if((skb
=skb_peek(&sk
->receive_queue
))!=NULL
)
1610 spin_unlock(&sk
->receive_queue
.lock
);
1611 err
= put_user(amount
, (int *)arg
);
1622 static unsigned int unix_poll(struct file
* file
, struct socket
*sock
, poll_table
*wait
)
1624 struct sock
*sk
= sock
->sk
;
1627 poll_wait(file
, sk
->sleep
, wait
);
1630 /* exceptional events? */
1633 if (sk
->shutdown
& RCV_SHUTDOWN
)
1637 if (!skb_queue_empty(&sk
->receive_queue
))
1638 mask
|= POLLIN
| POLLRDNORM
;
1640 /* Connection-based need to check for termination and startup */
1641 if (sk
->type
== SOCK_STREAM
&& sk
->state
==TCP_CLOSE
)
1645 * we set writable also when the other side has shut down the
1646 * connection. This prevents stuck sockets.
1648 if (unix_writable(sk
))
1649 mask
|= POLLOUT
| POLLWRNORM
| POLLWRBAND
;
1655 #ifdef CONFIG_PROC_FS
1656 static int unix_read_proc(char *buffer
, char **start
, off_t offset
,
1657 int length
, int *eof
, void *data
)
1665 len
+= sprintf(buffer
,"Num RefCount Protocol Flags Type St "
1668 read_lock(&unix_table_lock
);
1669 forall_unix_sockets (i
,s
)
1671 unix_state_rlock(s
);
1673 len
+=sprintf(buffer
+len
,"%p: %08X %08X %08X %04X %02X %5ld",
1675 atomic_read(&s
->refcnt
),
1677 s
->state
== TCP_LISTEN
? SO_ACCEPTCON
: 0,
1680 (s
->state
== TCP_ESTABLISHED
? SS_CONNECTED
: SS_UNCONNECTED
) :
1681 (s
->state
== TCP_ESTABLISHED
? SS_CONNECTING
: SS_DISCONNECTING
),
1682 s
->socket
? s
->socket
->inode
->i_ino
: 0);
1684 if (s
->protinfo
.af_unix
.addr
)
1686 buffer
[len
++] = ' ';
1687 memcpy(buffer
+len
, s
->protinfo
.af_unix
.addr
->name
->sun_path
,
1688 s
->protinfo
.af_unix
.addr
->len
-sizeof(short));
1689 if (!UNIX_ABSTRACT(s
))
1693 len
+= s
->protinfo
.af_unix
.addr
->len
- sizeof(short);
1695 unix_state_runlock(s
);
1705 if(pos
>offset
+length
)
1710 read_unlock(&unix_table_lock
);
1711 *start
=buffer
+(offset
-begin
);
1712 len
-=(offset
-begin
);
1721 struct proto_ops unix_stream_ops
= {
1726 unix_stream_connect
,
1737 unix_stream_sendmsg
,
1738 unix_stream_recvmsg
,
1742 struct proto_ops unix_dgram_ops
= {
1763 struct net_proto_family unix_family_ops
= {
1769 #ifdef CONFIG_SYSCTL
1770 extern void unix_sysctl_register(void);
1771 extern void unix_sysctl_unregister(void);
1774 int init_module(void)
1776 void __init
unix_proto_init(struct net_proto
*pro
)
1779 struct sk_buff
*dummy_skb
;
1780 struct proc_dir_entry
*ent
;
1782 printk(KERN_INFO
"NET4: Unix domain sockets 1.0/SMP for Linux NET4.0.\n");
1783 if (sizeof(struct unix_skb_parms
) > sizeof(dummy_skb
->cb
))
1785 printk(KERN_CRIT
"unix_proto_init: panic\n");
1792 sock_register(&unix_family_ops
);
1793 #ifdef CONFIG_PROC_FS
1794 ent
= create_proc_entry("net/unix", 0, 0);
1795 ent
->read_proc
= unix_read_proc
;
1799 #ifdef CONFIG_SYSCTL
1800 unix_sysctl_register();
1808 void cleanup_module(void)
1810 sock_unregister(PF_UNIX
);
1811 #ifdef CONFIG_SYSCTL
1812 unix_sysctl_unregister();
1814 #ifdef CONFIG_PROC_FS
1815 remove_proc_entry("net/unix", 0);
1822 * compile-command: "gcc -g -D__KERNEL__ -Wall -O6 -I/usr/src/linux/include -c af_unix.c"