net/unix/af_unix.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * NET4:        Implementation of BSD Unix domain sockets.
   4  *
   5  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   6  *
   7  * Fixes:
   8  *              Linus Torvalds  :       Assorted bug cures.
   9  *              Niibe Yutaka    :       async I/O support.
  10  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  11  *              Alan Cox        :       Limit size of allocated blocks.
  12  *              Alan Cox        :       Fixed the stupid socketpair bug.
  13  *              Alan Cox        :       BSD compatibility fine tuning.
  14  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  15  *              Alan Cox        :       Sorted out a proper draft version of
  16  *                                      file descriptor passing hacked up from
  17  *                                      Mike Shaver's work.
  18  *              Marty Leisner   :       Fixes to fd passing
  19  *              Nick Nevin      :       recvmsg bugfix.
  20  *              Alan Cox        :       Started proper garbage collector
  21  *              Heiko EiBfeldt  :       Missing verify_area check
  22  *              Alan Cox        :       Started POSIXisms
  23  *              Andreas Schwab  :       Replace inode by dentry for proper
  24  *                                      reference counting
  25  *              Kirk Petersen   :       Made this a module
  26  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  27  *                                      Lots of bug fixes.
  28  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  29  *                                      by above two patches.
  30  *           Andrea Arcangeli   :       If possible we block in connect(2)
  31  *                                      if the max backlog of the listen socket
  32  *                                      is been reached. This won't break
  33  *                                      old apps and it will avoid huge amount
  34  *                                      of socks hashed (this for unix_gc()
  35  *                                      performances reasons).
  36  *                                      Security fix that limits the max
  37  *                                      number of socks to 2*max_files and
  38  *                                      the number of skb queueable in the
  39  *                                      dgram receiver.
  40  *              Artur Skawina   :       Hash function optimizations
  41  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  42  *            Malcolm Beattie   :       Set peercred for socketpair
  43  *           Michal Ostrowski   :       Module initialization cleanup.
  44  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  45  *                                      the core infrastructure is doing that
  46  *                                      for all net proto families now (2.5.69+)
  47  *
  48  * Known differences from reference BSD that was tested:
  49  *
  50  *      [TO FIX]
  51  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  52  *              other the moment one end closes.
  53  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  54  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  55  *      [NOT TO FIX]
  56  *      accept() returns a path name even if the connecting socket has closed
  57  *              in the meantime (BSD loses the path and gives up).
  58  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  59  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  60  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  61  *      BSD af_unix apparently has connect forgetting to block properly.
  62  *              (need to check this with the POSIX spec in detail)
  63  *
  64  * Differences from 2.0.0-11-... (ANK)
  65  *      Bug fixes and improvements.
  66  *              - client shutdown killed server socket.
  67  *              - removed all useless cli/sti pairs.
  68  *
  69  *      Semantic changes/extensions.
  70  *              - generic control message passing.
  71  *              - SCM_CREDENTIALS control message.
  72  *              - "Abstract" (not FS based) socket bindings.
  73  *                Abstract names are sequences of bytes (not zero terminated)
  74  *                started by 0, so that this name space does not intersect
  75  *                with BSD names.
  76  */
  77
  78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  79
  80 #include <linux/module.h>
  81 #include <linux/kernel.h>
  82 #include <linux/signal.h>
  83 #include <linux/sched/signal.h>
  84 #include <linux/errno.h>
  85 #include <linux/string.h>
  86 #include <linux/stat.h>
  87 #include <linux/dcache.h>
  88 #include <linux/namei.h>
  89 #include <linux/socket.h>
  90 #include <linux/un.h>
  91 #include <linux/fcntl.h>
  92 #include <linux/termios.h>
  93 #include <linux/sockios.h>
  94 #include <linux/net.h>
  95 #include <linux/in.h>
  96 #include <linux/fs.h>
  97 #include <linux/slab.h>
  98 #include <linux/uaccess.h>
  99 #include <linux/skbuff.h>
 100 #include <linux/netdevice.h>
 101 #include <net/net_namespace.h>
 102 #include <net/sock.h>
 103 #include <net/tcp_states.h>
 104 #include <net/af_unix.h>
 105 #include <linux/proc_fs.h>
 106 #include <linux/seq_file.h>
 107 #include <net/scm.h>
 108 #include <linux/init.h>
 109 #include <linux/poll.h>
 110 #include <linux/rtnetlink.h>
 111 #include <linux/mount.h>
 112 #include <net/checksum.h>
 113 #include <linux/security.h>
 114 #include <linux/freezer.h>
 115 #include <linux/file.h>
 116
 117 #include "scm.h"
 118
 119 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 120 EXPORT_SYMBOL_GPL(unix_socket_table);
 121 DEFINE_SPINLOCK(unix_table_lock);
 122 EXPORT_SYMBOL_GPL(unix_table_lock);
 123 static atomic_long_t unix_nr_socks;
 124
 125
 126 static struct hlist_head *unix_sockets_unbound(void *addr)
 127 {
 128         unsigned long hash = (unsigned long)addr;
 129
 130         hash ^= hash >> 16;
 131         hash ^= hash >> 8;
 132         hash %= UNIX_HASH_SIZE;
 133         return &unix_socket_table[UNIX_HASH_SIZE + hash];
 134 }
 135
 136 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 137
 138 #ifdef CONFIG_SECURITY_NETWORK
 139 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 140 {
 141         UNIXCB(skb).secid = scm->secid;
 142 }
 143
 144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 145 {
 146         scm->secid = UNIXCB(skb).secid;
 147 }
 148
 149 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 150 {
 151         return (scm->secid == UNIXCB(skb).secid);
 152 }
 153 #else
 154 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 155 { }
 156
 157 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 158 { }
 159
 160 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 161 {
 162         return true;
 163 }
 164 #endif /* CONFIG_SECURITY_NETWORK */
 165
 166 /*
 167  *  SMP locking strategy:
 168  *    hash table is protected with spinlock unix_table_lock
 169  *    each socket state is protected by separate spin lock.
 170  */
 171
 172 static inline unsigned int unix_hash_fold(__wsum n)
 173 {
 174         unsigned int hash = (__force unsigned int)csum_fold(n);
 175
 176         hash ^= hash>>8;
 177         return hash&(UNIX_HASH_SIZE-1);
 178 }
 179
 180 #define unix_peer(sk) (unix_sk(sk)->peer)
 181
 182 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 183 {
 184         return unix_peer(osk) == sk;
 185 }
 186
 187 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 188 {
 189         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 190 }
 191
 192 static inline int unix_recvq_full(struct sock const *sk)
 193 {
 194         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 195 }
 196
 197 struct sock *unix_peer_get(struct sock *s)
 198 {
 199         struct sock *peer;
 200
 201         unix_state_lock(s);
 202         peer = unix_peer(s);
 203         if (peer)
 204                 sock_hold(peer);
 205         unix_state_unlock(s);
 206         return peer;
 207 }
 208 EXPORT_SYMBOL_GPL(unix_peer_get);
 209
 210 static inline void unix_release_addr(struct unix_address *addr)
 211 {
 212         if (refcount_dec_and_test(&addr->refcnt))
 213                 kfree(addr);
 214 }
 215
 216 /*
 217  *      Check unix socket name:
 218  *              - should be not zero length.
 219  *              - if started by not zero, should be NULL terminated (FS object)
 220  *              - if started by zero, it is abstract name.
 221  */
 222
 223 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 224 {
 225         *hashp = 0;
 226
 227         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 228                 return -EINVAL;
 229         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 230                 return -EINVAL;
 231         if (sunaddr->sun_path[0]) {
 232                 /*
 233                  * This may look like an off by one error but it is a bit more
 234                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 235                  * sun_path[108] doesn't as such exist.  However in kernel space
 236                  * we are guaranteed that it is a valid memory location in our
 237                  * kernel address buffer.
 238                  */
 239                 ((char *)sunaddr)[len] = 0;
 240                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 241                 return len;
 242         }
 243
 244         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 245         return len;
 246 }
 247
 248 static void __unix_remove_socket(struct sock *sk)
 249 {
 250         sk_del_node_init(sk);
 251 }
 252
 253 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 254 {
 255         WARN_ON(!sk_unhashed(sk));
 256         sk_add_node(sk, list);
 257 }
 258
 259 static inline void unix_remove_socket(struct sock *sk)
 260 {
 261         spin_lock(&unix_table_lock);
 262         __unix_remove_socket(sk);
 263         spin_unlock(&unix_table_lock);
 264 }
 265
 266 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 267 {
 268         spin_lock(&unix_table_lock);
 269         __unix_insert_socket(list, sk);
 270         spin_unlock(&unix_table_lock);
 271 }
 272
 273 static struct sock *__unix_find_socket_byname(struct net *net,
 274                                               struct sockaddr_un *sunname,
 275                                               int len, int type, unsigned int hash)
 276 {
 277         struct sock *s;
 278
 279         sk_for_each(s, &unix_socket_table[hash ^ type]) {
 280                 struct unix_sock *u = unix_sk(s);
 281
 282                 if (!net_eq(sock_net(s), net))
 283                         continue;
 284
 285                 if (u->addr->len == len &&
 286                     !memcmp(u->addr->name, sunname, len))
 287                         goto found;
 288         }
 289         s = NULL;
 290 found:
 291         return s;
 292 }
 293
 294 static inline struct sock *unix_find_socket_byname(struct net *net,
 295                                                    struct sockaddr_un *sunname,
 296                                                    int len, int type,
 297                                                    unsigned int hash)
 298 {
 299         struct sock *s;
 300
 301         spin_lock(&unix_table_lock);
 302         s = __unix_find_socket_byname(net, sunname, len, type, hash);
 303         if (s)
 304                 sock_hold(s);
 305         spin_unlock(&unix_table_lock);
 306         return s;
 307 }
 308
 309 static struct sock *unix_find_socket_byinode(struct inode *i)
 310 {
 311         struct sock *s;
 312
 313         spin_lock(&unix_table_lock);
 314         sk_for_each(s,
 315                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 316                 struct dentry *dentry = unix_sk(s)->path.dentry;
 317
 318                 if (dentry && d_backing_inode(dentry) == i) {
 319                         sock_hold(s);
 320                         goto found;
 321                 }
 322         }
 323         s = NULL;
 324 found:
 325         spin_unlock(&unix_table_lock);
 326         return s;
 327 }
 328
 329 /* Support code for asymmetrically connected dgram sockets
 330  *
 331  * If a datagram socket is connected to a socket not itself connected
 332  * to the first socket (eg, /dev/log), clients may only enqueue more
 333  * messages if the present receive queue of the server socket is not
 334  * "too large". This means there's a second writeability condition
 335  * poll and sendmsg need to test. The dgram recv code will do a wake
 336  * up on the peer_wait wait queue of a socket upon reception of a
 337  * datagram which needs to be propagated to sleeping would-be writers
 338  * since these might not have sent anything so far. This can't be
 339  * accomplished via poll_wait because the lifetime of the server
 340  * socket might be less than that of its clients if these break their
 341  * association with it or if the server socket is closed while clients
 342  * are still connected to it and there's no way to inform "a polling
 343  * implementation" that it should let go of a certain wait queue
 344  *
 345  * In order to propagate a wake up, a wait_queue_entry_t of the client
 346  * socket is enqueued on the peer_wait queue of the server socket
 347  * whose wake function does a wake_up on the ordinary client socket
 348  * wait queue. This connection is established whenever a write (or
 349  * poll for write) hit the flow control condition and broken when the
 350  * association to the server socket is dissolved or after a wake up
 351  * was relayed.
 352  */
 353
 354 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 355                                       void *key)
 356 {
 357         struct unix_sock *u;
 358         wait_queue_head_t *u_sleep;
 359
 360         u = container_of(q, struct unix_sock, peer_wake);
 361
 362         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 363                             q);
 364         u->peer_wake.private = NULL;
 365
 366         /* relaying can only happen while the wq still exists */
 367         u_sleep = sk_sleep(&u->sk);
 368         if (u_sleep)
 369                 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
 370
 371         return 0;
 372 }
 373
 374 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 375 {
 376         struct unix_sock *u, *u_other;
 377         int rc;
 378
 379         u = unix_sk(sk);
 380         u_other = unix_sk(other);
 381         rc = 0;
 382         spin_lock(&u_other->peer_wait.lock);
 383
 384         if (!u->peer_wake.private) {
 385                 u->peer_wake.private = other;
 386                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 387
 388                 rc = 1;
 389         }
 390
 391         spin_unlock(&u_other->peer_wait.lock);
 392         return rc;
 393 }
 394
 395 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 396                                             struct sock *other)
 397 {
 398         struct unix_sock *u, *u_other;
 399
 400         u = unix_sk(sk);
 401         u_other = unix_sk(other);
 402         spin_lock(&u_other->peer_wait.lock);
 403
 404         if (u->peer_wake.private == other) {
 405                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 406                 u->peer_wake.private = NULL;
 407         }
 408
 409         spin_unlock(&u_other->peer_wait.lock);
 410 }
 411
 412 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 413                                                    struct sock *other)
 414 {
 415         unix_dgram_peer_wake_disconnect(sk, other);
 416         wake_up_interruptible_poll(sk_sleep(sk),
 417                                    EPOLLOUT |
 418                                    EPOLLWRNORM |
 419                                    EPOLLWRBAND);
 420 }
 421
 422 /* preconditions:
 423  *      - unix_peer(sk) == other
 424  *      - association is stable
 425  */
 426 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 427 {
 428         int connected;
 429
 430         connected = unix_dgram_peer_wake_connect(sk, other);
 431
 432         /* If other is SOCK_DEAD, we want to make sure we signal
 433          * POLLOUT, such that a subsequent write() can get a
 434          * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
 435          * to other and its full, we will hang waiting for POLLOUT.
 436          */
 437         if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
 438                 return 1;
 439
 440         if (connected)
 441                 unix_dgram_peer_wake_disconnect(sk, other);
 442
 443         return 0;
 444 }
 445
 446 static int unix_writable(const struct sock *sk)
 447 {
 448         return sk->sk_state != TCP_LISTEN &&
 449                (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 450 }
 451
 452 static void unix_write_space(struct sock *sk)
 453 {
 454         struct socket_wq *wq;
 455
 456         rcu_read_lock();
 457         if (unix_writable(sk)) {
 458                 wq = rcu_dereference(sk->sk_wq);
 459                 if (skwq_has_sleeper(wq))
 460                         wake_up_interruptible_sync_poll(&wq->wait,
 461                                 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
 462                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 463         }
 464         rcu_read_unlock();
 465 }
 466
 467 /* When dgram socket disconnects (or changes its peer), we clear its receive
 468  * queue of packets arrived from previous peer. First, it allows to do
 469  * flow control based only on wmem_alloc; second, sk connected to peer
 470  * may receive messages only from that peer. */
 471 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 472 {
 473         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 474                 skb_queue_purge(&sk->sk_receive_queue);
 475                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 476
 477                 /* If one link of bidirectional dgram pipe is disconnected,
 478                  * we signal error. Messages are lost. Do not make this,
 479                  * when peer was not connected to us.
 480                  */
 481                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 482                         other->sk_err = ECONNRESET;
 483                         other->sk_error_report(other);
 484                 }
 485         }
 486 }
 487
 488 static void unix_sock_destructor(struct sock *sk)
 489 {
 490         struct unix_sock *u = unix_sk(sk);
 491
 492         skb_queue_purge(&sk->sk_receive_queue);
 493
 494         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 495         WARN_ON(!sk_unhashed(sk));
 496         WARN_ON(sk->sk_socket);
 497         if (!sock_flag(sk, SOCK_DEAD)) {
 498                 pr_info("Attempt to release alive unix socket: %p\n", sk);
 499                 return;
 500         }
 501
 502         if (u->addr)
 503                 unix_release_addr(u->addr);
 504
 505         atomic_long_dec(&unix_nr_socks);
 506         local_bh_disable();
 507         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 508         local_bh_enable();
 509 #ifdef UNIX_REFCNT_DEBUG
 510         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 511                 atomic_long_read(&unix_nr_socks));
 512 #endif
 513 }
 514
 515 static void unix_release_sock(struct sock *sk, int embrion)
 516 {
 517         struct unix_sock *u = unix_sk(sk);
 518         struct path path;
 519         struct sock *skpair;
 520         struct sk_buff *skb;
 521         int state;
 522
 523         unix_remove_socket(sk);
 524
 525         /* Clear state */
 526         unix_state_lock(sk);
 527         sock_orphan(sk);
 528         sk->sk_shutdown = SHUTDOWN_MASK;
 529         path         = u->path;
 530         u->path.dentry = NULL;
 531         u->path.mnt = NULL;
 532         state = sk->sk_state;
 533         sk->sk_state = TCP_CLOSE;
 534         unix_state_unlock(sk);
 535
 536         wake_up_interruptible_all(&u->peer_wait);
 537
 538         skpair = unix_peer(sk);
 539
 540         if (skpair != NULL) {
 541                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 542                         unix_state_lock(skpair);
 543                         /* No more writes */
 544                         skpair->sk_shutdown = SHUTDOWN_MASK;
 545                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 546                                 skpair->sk_err = ECONNRESET;
 547                         unix_state_unlock(skpair);
 548                         skpair->sk_state_change(skpair);
 549                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 550                 }
 551
 552                 unix_dgram_peer_wake_disconnect(sk, skpair);
 553                 sock_put(skpair); /* It may now die */
 554                 unix_peer(sk) = NULL;
 555         }
 556
 557         /* Try to flush out this socket. Throw out buffers at least */
 558
 559         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 560                 if (state == TCP_LISTEN)
 561                         unix_release_sock(skb->sk, 1);
 562                 /* passed fds are erased in the kfree_skb hook        */
 563                 UNIXCB(skb).consumed = skb->len;
 564                 kfree_skb(skb);
 565         }
 566
 567         if (path.dentry)
 568                 path_put(&path);
 569
 570         sock_put(sk);
 571
 572         /* ---- Socket is dead now and most probably destroyed ---- */
 573
 574         /*
 575          * Fixme: BSD difference: In BSD all sockets connected to us get
 576          *        ECONNRESET and we die on the spot. In Linux we behave
 577          *        like files and pipes do and wait for the last
 578          *        dereference.
 579          *
 580          * Can't we simply set sock->err?
 581          *
 582          *        What the above comment does talk about? --ANK(980817)
 583          */
 584
 585         if (unix_tot_inflight)
 586                 unix_gc();              /* Garbage collect fds */
 587 }
 588
 589 static void init_peercred(struct sock *sk)
 590 {
 591         put_pid(sk->sk_peer_pid);
 592         if (sk->sk_peer_cred)
 593                 put_cred(sk->sk_peer_cred);
 594         sk->sk_peer_pid  = get_pid(task_tgid(current));
 595         sk->sk_peer_cred = get_current_cred();
 596 }
 597
 598 static void copy_peercred(struct sock *sk, struct sock *peersk)
 599 {
 600         put_pid(sk->sk_peer_pid);
 601         if (sk->sk_peer_cred)
 602                 put_cred(sk->sk_peer_cred);
 603         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 604         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 605 }
 606
 607 static int unix_listen(struct socket *sock, int backlog)
 608 {
 609         int err;
 610         struct sock *sk = sock->sk;
 611         struct unix_sock *u = unix_sk(sk);
 612         struct pid *old_pid = NULL;
 613
 614         err = -EOPNOTSUPP;
 615         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 616                 goto out;       /* Only stream/seqpacket sockets accept */
 617         err = -EINVAL;
 618         if (!u->addr)
 619                 goto out;       /* No listens on an unbound socket */
 620         unix_state_lock(sk);
 621         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 622                 goto out_unlock;
 623         if (backlog > sk->sk_max_ack_backlog)
 624                 wake_up_interruptible_all(&u->peer_wait);
 625         sk->sk_max_ack_backlog  = backlog;
 626         sk->sk_state            = TCP_LISTEN;
 627         /* set credentials so connect can copy them */
 628         init_peercred(sk);
 629         err = 0;
 630
 631 out_unlock:
 632         unix_state_unlock(sk);
 633         put_pid(old_pid);
 634 out:
 635         return err;
 636 }
 637
 638 static int unix_release(struct socket *);
 639 static int unix_bind(struct socket *, struct sockaddr *, int);
 640 static int unix_stream_connect(struct socket *, struct sockaddr *,
 641                                int addr_len, int flags);
 642 static int unix_socketpair(struct socket *, struct socket *);
 643 static int unix_accept(struct socket *, struct socket *, int, bool);
 644 static int unix_getname(struct socket *, struct sockaddr *, int);
 645 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
 646 static __poll_t unix_dgram_poll(struct file *, struct socket *,
 647                                     poll_table *);
 648 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 649 #ifdef CONFIG_COMPAT
 650 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 651 #endif
 652 static int unix_shutdown(struct socket *, int);
 653 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 654 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 655 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 656                                     size_t size, int flags);
 657 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 658                                        struct pipe_inode_info *, size_t size,
 659                                        unsigned int flags);
 660 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 661 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 662 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 663                               int, int);
 664 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 665 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 666                                   int);
 667
 668 static int unix_set_peek_off(struct sock *sk, int val)
 669 {
 670         struct unix_sock *u = unix_sk(sk);
 671
 672         if (mutex_lock_interruptible(&u->iolock))
 673                 return -EINTR;
 674
 675         sk->sk_peek_off = val;
 676         mutex_unlock(&u->iolock);
 677
 678         return 0;
 679 }
 680
 681
 682 static const struct proto_ops unix_stream_ops = {
 683         .family =       PF_UNIX,
 684         .owner =        THIS_MODULE,
 685         .release =      unix_release,
 686         .bind =         unix_bind,
 687         .connect =      unix_stream_connect,
 688         .socketpair =   unix_socketpair,
 689         .accept =       unix_accept,
 690         .getname =      unix_getname,
 691         .poll =         unix_poll,
 692         .ioctl =        unix_ioctl,
 693 #ifdef CONFIG_COMPAT
 694         .compat_ioctl = unix_compat_ioctl,
 695 #endif
 696         .listen =       unix_listen,
 697         .shutdown =     unix_shutdown,
 698         .setsockopt =   sock_no_setsockopt,
 699         .getsockopt =   sock_no_getsockopt,
 700         .sendmsg =      unix_stream_sendmsg,
 701         .recvmsg =      unix_stream_recvmsg,
 702         .mmap =         sock_no_mmap,
 703         .sendpage =     unix_stream_sendpage,
 704         .splice_read =  unix_stream_splice_read,
 705         .set_peek_off = unix_set_peek_off,
 706 };
 707
 708 static const struct proto_ops unix_dgram_ops = {
 709         .family =       PF_UNIX,
 710         .owner =        THIS_MODULE,
 711         .release =      unix_release,
 712         .bind =         unix_bind,
 713         .connect =      unix_dgram_connect,
 714         .socketpair =   unix_socketpair,
 715         .accept =       sock_no_accept,
 716         .getname =      unix_getname,
 717         .poll =         unix_dgram_poll,
 718         .ioctl =        unix_ioctl,
 719 #ifdef CONFIG_COMPAT
 720         .compat_ioctl = unix_compat_ioctl,
 721 #endif
 722         .listen =       sock_no_listen,
 723         .shutdown =     unix_shutdown,
 724         .setsockopt =   sock_no_setsockopt,
 725         .getsockopt =   sock_no_getsockopt,
 726         .sendmsg =      unix_dgram_sendmsg,
 727         .recvmsg =      unix_dgram_recvmsg,
 728         .mmap =         sock_no_mmap,
 729         .sendpage =     sock_no_sendpage,
 730         .set_peek_off = unix_set_peek_off,
 731 };
 732
 733 static const struct proto_ops unix_seqpacket_ops = {
 734         .family =       PF_UNIX,
 735         .owner =        THIS_MODULE,
 736         .release =      unix_release,
 737         .bind =         unix_bind,
 738         .connect =      unix_stream_connect,
 739         .socketpair =   unix_socketpair,
 740         .accept =       unix_accept,
 741         .getname =      unix_getname,
 742         .poll =         unix_dgram_poll,
 743         .ioctl =        unix_ioctl,
 744 #ifdef CONFIG_COMPAT
 745         .compat_ioctl = unix_compat_ioctl,
 746 #endif
 747         .listen =       unix_listen,
 748         .shutdown =     unix_shutdown,
 749         .setsockopt =   sock_no_setsockopt,
 750         .getsockopt =   sock_no_getsockopt,
 751         .sendmsg =      unix_seqpacket_sendmsg,
 752         .recvmsg =      unix_seqpacket_recvmsg,
 753         .mmap =         sock_no_mmap,
 754         .sendpage =     sock_no_sendpage,
 755         .set_peek_off = unix_set_peek_off,
 756 };
 757
 758 static struct proto unix_proto = {
 759         .name                   = "UNIX",
 760         .owner                  = THIS_MODULE,
 761         .obj_size               = sizeof(struct unix_sock),
 762 };
 763
 764 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 765 {
 766         struct sock *sk = NULL;
 767         struct unix_sock *u;
 768
 769         atomic_long_inc(&unix_nr_socks);
 770         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 771                 goto out;
 772
 773         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
 774         if (!sk)
 775                 goto out;
 776
 777         sock_init_data(sock, sk);
 778
 779         sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 780         sk->sk_write_space      = unix_write_space;
 781         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 782         sk->sk_destruct         = unix_sock_destructor;
 783         u         = unix_sk(sk);
 784         u->path.dentry = NULL;
 785         u->path.mnt = NULL;
 786         spin_lock_init(&u->lock);
 787         atomic_long_set(&u->inflight, 0);
 788         INIT_LIST_HEAD(&u->link);
 789         mutex_init(&u->iolock); /* single task reading lock */
 790         mutex_init(&u->bindlock); /* single task binding lock */
 791         init_waitqueue_head(&u->peer_wait);
 792         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 793         unix_insert_socket(unix_sockets_unbound(sk), sk);
 794 out:
 795         if (sk == NULL)
 796                 atomic_long_dec(&unix_nr_socks);
 797         else {
 798                 local_bh_disable();
 799                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 800                 local_bh_enable();
 801         }
 802         return sk;
 803 }
 804
 805 static int unix_create(struct net *net, struct socket *sock, int protocol,
 806                        int kern)
 807 {
 808         if (protocol && protocol != PF_UNIX)
 809                 return -EPROTONOSUPPORT;
 810
 811         sock->state = SS_UNCONNECTED;
 812
 813         switch (sock->type) {
 814         case SOCK_STREAM:
 815                 sock->ops = &unix_stream_ops;
 816                 break;
 817                 /*
 818                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 819                  *      nothing uses it.
 820                  */
 821         case SOCK_RAW:
 822                 sock->type = SOCK_DGRAM;
 823                 /* fall through */
 824         case SOCK_DGRAM:
 825                 sock->ops = &unix_dgram_ops;
 826                 break;
 827         case SOCK_SEQPACKET:
 828                 sock->ops = &unix_seqpacket_ops;
 829                 break;
 830         default:
 831                 return -ESOCKTNOSUPPORT;
 832         }
 833
 834         return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
 835 }
 836
 837 static int unix_release(struct socket *sock)
 838 {
 839         struct sock *sk = sock->sk;
 840
 841         if (!sk)
 842                 return 0;
 843
 844         unix_release_sock(sk, 0);
 845         sock->sk = NULL;
 846
 847         return 0;
 848 }
 849
 850 static int unix_autobind(struct socket *sock)
 851 {
 852         struct sock *sk = sock->sk;
 853         struct net *net = sock_net(sk);
 854         struct unix_sock *u = unix_sk(sk);
 855         static u32 ordernum = 1;
 856         struct unix_address *addr;
 857         int err;
 858         unsigned int retries = 0;
 859
 860         err = mutex_lock_interruptible(&u->bindlock);
 861         if (err)
 862                 return err;
 863
 864         err = 0;
 865         if (u->addr)
 866                 goto out;
 867
 868         err = -ENOMEM;
 869         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 870         if (!addr)
 871                 goto out;
 872
 873         addr->name->sun_family = AF_UNIX;
 874         refcount_set(&addr->refcnt, 1);
 875
 876 retry:
 877         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 878         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 879
 880         spin_lock(&unix_table_lock);
 881         ordernum = (ordernum+1)&0xFFFFF;
 882
 883         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 884                                       addr->hash)) {
 885                 spin_unlock(&unix_table_lock);
 886                 /*
 887                  * __unix_find_socket_byname() may take long time if many names
 888                  * are already in use.
 889                  */
 890                 cond_resched();
 891                 /* Give up if all names seems to be in use. */
 892                 if (retries++ == 0xFFFFF) {
 893                         err = -ENOSPC;
 894                         kfree(addr);
 895                         goto out;
 896                 }
 897                 goto retry;
 898         }
 899         addr->hash ^= sk->sk_type;
 900
 901         __unix_remove_socket(sk);
 902         smp_store_release(&u->addr, addr);
 903         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 904         spin_unlock(&unix_table_lock);
 905         err = 0;
 906
 907 out:    mutex_unlock(&u->bindlock);
 908         return err;
 909 }
 910
 911 static struct sock *unix_find_other(struct net *net,
 912                                     struct sockaddr_un *sunname, int len,
 913                                     int type, unsigned int hash, int *error)
 914 {
 915         struct sock *u;
 916         struct path path;
 917         int err = 0;
 918
 919         if (sunname->sun_path[0]) {
 920                 struct inode *inode;
 921                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 922                 if (err)
 923                         goto fail;
 924                 inode = d_backing_inode(path.dentry);
 925                 err = inode_permission(inode, MAY_WRITE);
 926                 if (err)
 927                         goto put_fail;
 928
 929                 err = -ECONNREFUSED;
 930                 if (!S_ISSOCK(inode->i_mode))
 931                         goto put_fail;
 932                 u = unix_find_socket_byinode(inode);
 933                 if (!u)
 934                         goto put_fail;
 935
 936                 if (u->sk_type == type)
 937                         touch_atime(&path);
 938
 939                 path_put(&path);
 940
 941                 err = -EPROTOTYPE;
 942                 if (u->sk_type != type) {
 943                         sock_put(u);
 944                         goto fail;
 945                 }
 946         } else {
 947                 err = -ECONNREFUSED;
 948                 u = unix_find_socket_byname(net, sunname, len, type, hash);
 949                 if (u) {
 950                         struct dentry *dentry;
 951                         dentry = unix_sk(u)->path.dentry;
 952                         if (dentry)
 953                                 touch_atime(&unix_sk(u)->path);
 954                 } else
 955                         goto fail;
 956         }
 957         return u;
 958
 959 put_fail:
 960         path_put(&path);
 961 fail:
 962         *error = err;
 963         return NULL;
 964 }
 965
 966 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 967 {
 968         struct dentry *dentry;
 969         struct path path;
 970         int err = 0;
 971         /*
 972          * Get the parent directory, calculate the hash for last
 973          * component.
 974          */
 975         dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 976         err = PTR_ERR(dentry);
 977         if (IS_ERR(dentry))
 978                 return err;
 979
 980         /*
 981          * All right, let's create it.
 982          */
 983         err = security_path_mknod(&path, dentry, mode, 0);
 984         if (!err) {
 985                 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
 986                 if (!err) {
 987                         res->mnt = mntget(path.mnt);
 988                         res->dentry = dget(dentry);
 989                 }
 990         }
 991         done_path_create(&path, dentry);
 992         return err;
 993 }
 994
 995 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 996 {
 997         struct sock *sk = sock->sk;
 998         struct net *net = sock_net(sk);
 999         struct unix_sock *u = unix_sk(sk);
1000         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1001         char *sun_path = sunaddr->sun_path;
1002         int err;
1003         unsigned int hash;
1004         struct unix_address *addr;
1005         struct hlist_head *list;
1006         struct path path = { };
1007
1008         err = -EINVAL;
1009         if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1010             sunaddr->sun_family != AF_UNIX)
1011                 goto out;
1012
1013         if (addr_len == sizeof(short)) {
1014                 err = unix_autobind(sock);
1015                 goto out;
1016         }
1017
1018         err = unix_mkname(sunaddr, addr_len, &hash);
1019         if (err < 0)
1020                 goto out;
1021         addr_len = err;
1022
1023         if (sun_path[0]) {
1024                 umode_t mode = S_IFSOCK |
1025                        (SOCK_INODE(sock)->i_mode & ~current_umask());
1026                 err = unix_mknod(sun_path, mode, &path);
1027                 if (err) {
1028                         if (err == -EEXIST)
1029                                 err = -EADDRINUSE;
1030                         goto out;
1031                 }
1032         }
1033
1034         err = mutex_lock_interruptible(&u->bindlock);
1035         if (err)
1036                 goto out_put;
1037
1038         err = -EINVAL;
1039         if (u->addr)
1040                 goto out_up;
1041
1042         err = -ENOMEM;
1043         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1044         if (!addr)
1045                 goto out_up;
1046
1047         memcpy(addr->name, sunaddr, addr_len);
1048         addr->len = addr_len;
1049         addr->hash = hash ^ sk->sk_type;
1050         refcount_set(&addr->refcnt, 1);
1051
1052         if (sun_path[0]) {
1053                 addr->hash = UNIX_HASH_SIZE;
1054                 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1055                 spin_lock(&unix_table_lock);
1056                 u->path = path;
1057                 list = &unix_socket_table[hash];
1058         } else {
1059                 spin_lock(&unix_table_lock);
1060                 err = -EADDRINUSE;
1061                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1062                                               sk->sk_type, hash)) {
1063                         unix_release_addr(addr);
1064                         goto out_unlock;
1065                 }
1066
1067                 list = &unix_socket_table[addr->hash];
1068         }
1069
1070         err = 0;
1071         __unix_remove_socket(sk);
1072         smp_store_release(&u->addr, addr);
1073         __unix_insert_socket(list, sk);
1074
1075 out_unlock:
1076         spin_unlock(&unix_table_lock);
1077 out_up:
1078         mutex_unlock(&u->bindlock);
1079 out_put:
1080         if (err)
1081                 path_put(&path);
1082 out:
1083         return err;
1084 }
1085
1086 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1087 {
1088         if (unlikely(sk1 == sk2) || !sk2) {
1089                 unix_state_lock(sk1);
1090                 return;
1091         }
1092         if (sk1 < sk2) {
1093                 unix_state_lock(sk1);
1094                 unix_state_lock_nested(sk2);
1095         } else {
1096                 unix_state_lock(sk2);
1097                 unix_state_lock_nested(sk1);
1098         }
1099 }
1100
1101 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1102 {
1103         if (unlikely(sk1 == sk2) || !sk2) {
1104                 unix_state_unlock(sk1);
1105                 return;
1106         }
1107         unix_state_unlock(sk1);
1108         unix_state_unlock(sk2);
1109 }
1110
1111 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1112                               int alen, int flags)
1113 {
1114         struct sock *sk = sock->sk;
1115         struct net *net = sock_net(sk);
1116         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1117         struct sock *other;
1118         unsigned int hash;
1119         int err;
1120
1121         err = -EINVAL;
1122         if (alen < offsetofend(struct sockaddr, sa_family))
1123                 goto out;
1124
1125         if (addr->sa_family != AF_UNSPEC) {
1126                 err = unix_mkname(sunaddr, alen, &hash);
1127                 if (err < 0)
1128                         goto out;
1129                 alen = err;
1130
1131                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1132                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1133                         goto out;
1134
1135 restart:
1136                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1137                 if (!other)
1138                         goto out;
1139
1140                 unix_state_double_lock(sk, other);
1141
1142                 /* Apparently VFS overslept socket death. Retry. */
1143                 if (sock_flag(other, SOCK_DEAD)) {
1144                         unix_state_double_unlock(sk, other);
1145                         sock_put(other);
1146                         goto restart;
1147                 }
1148
1149                 err = -EPERM;
1150                 if (!unix_may_send(sk, other))
1151                         goto out_unlock;
1152
1153                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1154                 if (err)
1155                         goto out_unlock;
1156
1157         } else {
1158                 /*
1159                  *      1003.1g breaking connected state with AF_UNSPEC
1160                  */
1161                 other = NULL;
1162                 unix_state_double_lock(sk, other);
1163         }
1164
1165         /*
1166          * If it was connected, reconnect.
1167          */
1168         if (unix_peer(sk)) {
1169                 struct sock *old_peer = unix_peer(sk);
1170                 unix_peer(sk) = other;
1171                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1172
1173                 unix_state_double_unlock(sk, other);
1174
1175                 if (other != old_peer)
1176                         unix_dgram_disconnected(sk, old_peer);
1177                 sock_put(old_peer);
1178         } else {
1179                 unix_peer(sk) = other;
1180                 unix_state_double_unlock(sk, other);
1181         }
1182         return 0;
1183
1184 out_unlock:
1185         unix_state_double_unlock(sk, other);
1186         sock_put(other);
1187 out:
1188         return err;
1189 }
1190
1191 static long unix_wait_for_peer(struct sock *other, long timeo)
1192 {
1193         struct unix_sock *u = unix_sk(other);
1194         int sched;
1195         DEFINE_WAIT(wait);
1196
1197         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1198
1199         sched = !sock_flag(other, SOCK_DEAD) &&
1200                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1201                 unix_recvq_full(other);
1202
1203         unix_state_unlock(other);
1204
1205         if (sched)
1206                 timeo = schedule_timeout(timeo);
1207
1208         finish_wait(&u->peer_wait, &wait);
1209         return timeo;
1210 }
1211
1212 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1213                                int addr_len, int flags)
1214 {
1215         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1216         struct sock *sk = sock->sk;
1217         struct net *net = sock_net(sk);
1218         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1219         struct sock *newsk = NULL;
1220         struct sock *other = NULL;
1221         struct sk_buff *skb = NULL;
1222         unsigned int hash;
1223         int st;
1224         int err;
1225         long timeo;
1226
1227         err = unix_mkname(sunaddr, addr_len, &hash);
1228         if (err < 0)
1229                 goto out;
1230         addr_len = err;
1231
1232         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1233             (err = unix_autobind(sock)) != 0)
1234                 goto out;
1235
1236         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1237
1238         /* First of all allocate resources.
1239            If we will make it after state is locked,
1240            we will have to recheck all again in any case.
1241          */
1242
1243         err = -ENOMEM;
1244
1245         /* create new sock for complete connection */
1246         newsk = unix_create1(sock_net(sk), NULL, 0);
1247         if (newsk == NULL)
1248                 goto out;
1249
1250         /* Allocate skb for sending to listening sock */
1251         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1252         if (skb == NULL)
1253                 goto out;
1254
1255 restart:
1256         /*  Find listening sock. */
1257         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1258         if (!other)
1259                 goto out;
1260
1261         /* Latch state of peer */
1262         unix_state_lock(other);
1263
1264         /* Apparently VFS overslept socket death. Retry. */
1265         if (sock_flag(other, SOCK_DEAD)) {
1266                 unix_state_unlock(other);
1267                 sock_put(other);
1268                 goto restart;
1269         }
1270
1271         err = -ECONNREFUSED;
1272         if (other->sk_state != TCP_LISTEN)
1273                 goto out_unlock;
1274         if (other->sk_shutdown & RCV_SHUTDOWN)
1275                 goto out_unlock;
1276
1277         if (unix_recvq_full(other)) {
1278                 err = -EAGAIN;
1279                 if (!timeo)
1280                         goto out_unlock;
1281
1282                 timeo = unix_wait_for_peer(other, timeo);
1283
1284                 err = sock_intr_errno(timeo);
1285                 if (signal_pending(current))
1286                         goto out;
1287                 sock_put(other);
1288                 goto restart;
1289         }
1290
1291         /* Latch our state.
1292
1293            It is tricky place. We need to grab our state lock and cannot
1294            drop lock on peer. It is dangerous because deadlock is
1295            possible. Connect to self case and simultaneous
1296            attempt to connect are eliminated by checking socket
1297            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1298            check this before attempt to grab lock.
1299
1300            Well, and we have to recheck the state after socket locked.
1301          */
1302         st = sk->sk_state;
1303
1304         switch (st) {
1305         case TCP_CLOSE:
1306                 /* This is ok... continue with connect */
1307                 break;
1308         case TCP_ESTABLISHED:
1309                 /* Socket is already connected */
1310                 err = -EISCONN;
1311                 goto out_unlock;
1312         default:
1313                 err = -EINVAL;
1314                 goto out_unlock;
1315         }
1316
1317         unix_state_lock_nested(sk);
1318
1319         if (sk->sk_state != st) {
1320                 unix_state_unlock(sk);
1321                 unix_state_unlock(other);
1322                 sock_put(other);
1323                 goto restart;
1324         }
1325
1326         err = security_unix_stream_connect(sk, other, newsk);
1327         if (err) {
1328                 unix_state_unlock(sk);
1329                 goto out_unlock;
1330         }
1331
1332         /* The way is open! Fastly set all the necessary fields... */
1333
1334         sock_hold(sk);
1335         unix_peer(newsk)        = sk;
1336         newsk->sk_state         = TCP_ESTABLISHED;
1337         newsk->sk_type          = sk->sk_type;
1338         init_peercred(newsk);
1339         newu = unix_sk(newsk);
1340         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1341         otheru = unix_sk(other);
1342
1343         /* copy address information from listening to new sock
1344          *
1345          * The contents of *(otheru->addr) and otheru->path
1346          * are seen fully set up here, since we have found
1347          * otheru in hash under unix_table_lock.  Insertion
1348          * into the hash chain we'd found it in had been done
1349          * in an earlier critical area protected by unix_table_lock,
1350          * the same one where we'd set *(otheru->addr) contents,
1351          * as well as otheru->path and otheru->addr itself.
1352          *
1353          * Using smp_store_release() here to set newu->addr
1354          * is enough to make those stores, as well as stores
1355          * to newu->path visible to anyone who gets newu->addr
1356          * by smp_load_acquire().  IOW, the same warranties
1357          * as for unix_sock instances bound in unix_bind() or
1358          * in unix_autobind().
1359          */
1360         if (otheru->path.dentry) {
1361                 path_get(&otheru->path);
1362                 newu->path = otheru->path;
1363         }
1364         refcount_inc(&otheru->addr->refcnt);
1365         smp_store_release(&newu->addr, otheru->addr);
1366
1367         /* Set credentials */
1368         copy_peercred(sk, other);
1369
1370         sock->state     = SS_CONNECTED;
1371         sk->sk_state    = TCP_ESTABLISHED;
1372         sock_hold(newsk);
1373
1374         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1375         unix_peer(sk)   = newsk;
1376
1377         unix_state_unlock(sk);
1378
1379         /* take ten and and send info to listening sock */
1380         spin_lock(&other->sk_receive_queue.lock);
1381         __skb_queue_tail(&other->sk_receive_queue, skb);
1382         spin_unlock(&other->sk_receive_queue.lock);
1383         unix_state_unlock(other);
1384         other->sk_data_ready(other);
1385         sock_put(other);
1386         return 0;
1387
1388 out_unlock:
1389         if (other)
1390                 unix_state_unlock(other);
1391
1392 out:
1393         kfree_skb(skb);
1394         if (newsk)
1395                 unix_release_sock(newsk, 0);
1396         if (other)
1397                 sock_put(other);
1398         return err;
1399 }
1400
1401 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1402 {
1403         struct sock *ska = socka->sk, *skb = sockb->sk;
1404
1405         /* Join our sockets back to back */
1406         sock_hold(ska);
1407         sock_hold(skb);
1408         unix_peer(ska) = skb;
1409         unix_peer(skb) = ska;
1410         init_peercred(ska);
1411         init_peercred(skb);
1412
1413         if (ska->sk_type != SOCK_DGRAM) {
1414                 ska->sk_state = TCP_ESTABLISHED;
1415                 skb->sk_state = TCP_ESTABLISHED;
1416                 socka->state  = SS_CONNECTED;
1417                 sockb->state  = SS_CONNECTED;
1418         }
1419         return 0;
1420 }
1421
1422 static void unix_sock_inherit_flags(const struct socket *old,
1423                                     struct socket *new)
1424 {
1425         if (test_bit(SOCK_PASSCRED, &old->flags))
1426                 set_bit(SOCK_PASSCRED, &new->flags);
1427         if (test_bit(SOCK_PASSSEC, &old->flags))
1428                 set_bit(SOCK_PASSSEC, &new->flags);
1429 }
1430
1431 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1432                        bool kern)
1433 {
1434         struct sock *sk = sock->sk;
1435         struct sock *tsk;
1436         struct sk_buff *skb;
1437         int err;
1438
1439         err = -EOPNOTSUPP;
1440         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1441                 goto out;
1442
1443         err = -EINVAL;
1444         if (sk->sk_state != TCP_LISTEN)
1445                 goto out;
1446
1447         /* If socket state is TCP_LISTEN it cannot change (for now...),
1448          * so that no locks are necessary.
1449          */
1450
1451         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1452         if (!skb) {
1453                 /* This means receive shutdown. */
1454                 if (err == 0)
1455                         err = -EINVAL;
1456                 goto out;
1457         }
1458
1459         tsk = skb->sk;
1460         skb_free_datagram(sk, skb);
1461         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1462
1463         /* attach accepted sock to socket */
1464         unix_state_lock(tsk);
1465         newsock->state = SS_CONNECTED;
1466         unix_sock_inherit_flags(sock, newsock);
1467         sock_graft(tsk, newsock);
1468         unix_state_unlock(tsk);
1469         return 0;
1470
1471 out:
1472         return err;
1473 }
1474
1475
1476 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1477 {
1478         struct sock *sk = sock->sk;
1479         struct unix_address *addr;
1480         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1481         int err = 0;
1482
1483         if (peer) {
1484                 sk = unix_peer_get(sk);
1485
1486                 err = -ENOTCONN;
1487                 if (!sk)
1488                         goto out;
1489                 err = 0;
1490         } else {
1491                 sock_hold(sk);
1492         }
1493
1494         addr = smp_load_acquire(&unix_sk(sk)->addr);
1495         if (!addr) {
1496                 sunaddr->sun_family = AF_UNIX;
1497                 sunaddr->sun_path[0] = 0;
1498                 err = sizeof(short);
1499         } else {
1500                 err = addr->len;
1501                 memcpy(sunaddr, addr->name, addr->len);
1502         }
1503         sock_put(sk);
1504 out:
1505         return err;
1506 }
1507
1508 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1509 {
1510         int err = 0;
1511
1512         UNIXCB(skb).pid  = get_pid(scm->pid);
1513         UNIXCB(skb).uid = scm->creds.uid;
1514         UNIXCB(skb).gid = scm->creds.gid;
1515         UNIXCB(skb).fp = NULL;
1516         unix_get_secdata(scm, skb);
1517         if (scm->fp && send_fds)
1518                 err = unix_attach_fds(scm, skb);
1519
1520         skb->destructor = unix_destruct_scm;
1521         return err;
1522 }
1523
1524 static bool unix_passcred_enabled(const struct socket *sock,
1525                                   const struct sock *other)
1526 {
1527         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1528                !other->sk_socket ||
1529                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1530 }
1531
1532 /*
1533  * Some apps rely on write() giving SCM_CREDENTIALS
1534  * We include credentials if source or destination socket
1535  * asserted SOCK_PASSCRED.
1536  */
1537 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1538                             const struct sock *other)
1539 {
1540         if (UNIXCB(skb).pid)
1541                 return;
1542         if (unix_passcred_enabled(sock, other)) {
1543                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1544                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1545         }
1546 }
1547
1548 static int maybe_init_creds(struct scm_cookie *scm,
1549                             struct socket *socket,
1550                             const struct sock *other)
1551 {
1552         int err;
1553         struct msghdr msg = { .msg_controllen = 0 };
1554
1555         err = scm_send(socket, &msg, scm, false);
1556         if (err)
1557                 return err;
1558
1559         if (unix_passcred_enabled(socket, other)) {
1560                 scm->pid = get_pid(task_tgid(current));
1561                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1562         }
1563         return err;
1564 }
1565
1566 static bool unix_skb_scm_eq(struct sk_buff *skb,
1567                             struct scm_cookie *scm)
1568 {
1569         const struct unix_skb_parms *u = &UNIXCB(skb);
1570
1571         return u->pid == scm->pid &&
1572                uid_eq(u->uid, scm->creds.uid) &&
1573                gid_eq(u->gid, scm->creds.gid) &&
1574                unix_secdata_eq(scm, skb);
1575 }
1576
1577 /*
1578  *      Send AF_UNIX data.
1579  */
1580
1581 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1582                               size_t len)
1583 {
1584         struct sock *sk = sock->sk;
1585         struct net *net = sock_net(sk);
1586         struct unix_sock *u = unix_sk(sk);
1587         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1588         struct sock *other = NULL;
1589         int namelen = 0; /* fake GCC */
1590         int err;
1591         unsigned int hash;
1592         struct sk_buff *skb;
1593         long timeo;
1594         struct scm_cookie scm;
1595         int data_len = 0;
1596         int sk_locked;
1597
1598         wait_for_unix_gc();
1599         err = scm_send(sock, msg, &scm, false);
1600         if (err < 0)
1601                 return err;
1602
1603         err = -EOPNOTSUPP;
1604         if (msg->msg_flags&MSG_OOB)
1605                 goto out;
1606
1607         if (msg->msg_namelen) {
1608                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1609                 if (err < 0)
1610                         goto out;
1611                 namelen = err;
1612         } else {
1613                 sunaddr = NULL;
1614                 err = -ENOTCONN;
1615                 other = unix_peer_get(sk);
1616                 if (!other)
1617                         goto out;
1618         }
1619
1620         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1621             && (err = unix_autobind(sock)) != 0)
1622                 goto out;
1623
1624         err = -EMSGSIZE;
1625         if (len > sk->sk_sndbuf - 32)
1626                 goto out;
1627
1628         if (len > SKB_MAX_ALLOC) {
1629                 data_len = min_t(size_t,
1630                                  len - SKB_MAX_ALLOC,
1631                                  MAX_SKB_FRAGS * PAGE_SIZE);
1632                 data_len = PAGE_ALIGN(data_len);
1633
1634                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1635         }
1636
1637         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1638                                    msg->msg_flags & MSG_DONTWAIT, &err,
1639                                    PAGE_ALLOC_COSTLY_ORDER);
1640         if (skb == NULL)
1641                 goto out;
1642
1643         err = unix_scm_to_skb(&scm, skb, true);
1644         if (err < 0)
1645                 goto out_free;
1646
1647         skb_put(skb, len - data_len);
1648         skb->data_len = data_len;
1649         skb->len = len;
1650         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1651         if (err)
1652                 goto out_free;
1653
1654         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1655
1656 restart:
1657         if (!other) {
1658                 err = -ECONNRESET;
1659                 if (sunaddr == NULL)
1660                         goto out_free;
1661
1662                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1663                                         hash, &err);
1664                 if (other == NULL)
1665                         goto out_free;
1666         }
1667
1668         if (sk_filter(other, skb) < 0) {
1669                 /* Toss the packet but do not return any error to the sender */
1670                 err = len;
1671                 goto out_free;
1672         }
1673
1674         sk_locked = 0;
1675         unix_state_lock(other);
1676 restart_locked:
1677         err = -EPERM;
1678         if (!unix_may_send(sk, other))
1679                 goto out_unlock;
1680
1681         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1682                 /*
1683                  *      Check with 1003.1g - what should
1684                  *      datagram error
1685                  */
1686                 unix_state_unlock(other);
1687                 sock_put(other);
1688
1689                 if (!sk_locked)
1690                         unix_state_lock(sk);
1691
1692                 err = 0;
1693                 if (unix_peer(sk) == other) {
1694                         unix_peer(sk) = NULL;
1695                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1696
1697                         unix_state_unlock(sk);
1698
1699                         unix_dgram_disconnected(sk, other);
1700                         sock_put(other);
1701                         err = -ECONNREFUSED;
1702                 } else {
1703                         unix_state_unlock(sk);
1704                 }
1705
1706                 other = NULL;
1707                 if (err)
1708                         goto out_free;
1709                 goto restart;
1710         }
1711
1712         err = -EPIPE;
1713         if (other->sk_shutdown & RCV_SHUTDOWN)
1714                 goto out_unlock;
1715
1716         if (sk->sk_type != SOCK_SEQPACKET) {
1717                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1718                 if (err)
1719                         goto out_unlock;
1720         }
1721
1722         /* other == sk && unix_peer(other) != sk if
1723          * - unix_peer(sk) == NULL, destination address bound to sk
1724          * - unix_peer(sk) == sk by time of get but disconnected before lock
1725          */
1726         if (other != sk &&
1727             unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1728                 if (timeo) {
1729                         timeo = unix_wait_for_peer(other, timeo);
1730
1731                         err = sock_intr_errno(timeo);
1732                         if (signal_pending(current))
1733                                 goto out_free;
1734
1735                         goto restart;
1736                 }
1737
1738                 if (!sk_locked) {
1739                         unix_state_unlock(other);
1740                         unix_state_double_lock(sk, other);
1741                 }
1742
1743                 if (unix_peer(sk) != other ||
1744                     unix_dgram_peer_wake_me(sk, other)) {
1745                         err = -EAGAIN;
1746                         sk_locked = 1;
1747                         goto out_unlock;
1748                 }
1749
1750                 if (!sk_locked) {
1751                         sk_locked = 1;
1752                         goto restart_locked;
1753                 }
1754         }
1755
1756         if (unlikely(sk_locked))
1757                 unix_state_unlock(sk);
1758
1759         if (sock_flag(other, SOCK_RCVTSTAMP))
1760                 __net_timestamp(skb);
1761         maybe_add_creds(skb, sock, other);
1762         skb_queue_tail(&other->sk_receive_queue, skb);
1763         unix_state_unlock(other);
1764         other->sk_data_ready(other);
1765         sock_put(other);
1766         scm_destroy(&scm);
1767         return len;
1768
1769 out_unlock:
1770         if (sk_locked)
1771                 unix_state_unlock(sk);
1772         unix_state_unlock(other);
1773 out_free:
1774         kfree_skb(skb);
1775 out:
1776         if (other)
1777                 sock_put(other);
1778         scm_destroy(&scm);
1779         return err;
1780 }
1781
1782 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1783  * bytes, and a minimum of a full page.
1784  */
1785 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1786
1787 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1788                                size_t len)
1789 {
1790         struct sock *sk = sock->sk;
1791         struct sock *other = NULL;
1792         int err, size;
1793         struct sk_buff *skb;
1794         int sent = 0;
1795         struct scm_cookie scm;
1796         bool fds_sent = false;
1797         int data_len;
1798
1799         wait_for_unix_gc();
1800         err = scm_send(sock, msg, &scm, false);
1801         if (err < 0)
1802                 return err;
1803
1804         err = -EOPNOTSUPP;
1805         if (msg->msg_flags&MSG_OOB)
1806                 goto out_err;
1807
1808         if (msg->msg_namelen) {
1809                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1810                 goto out_err;
1811         } else {
1812                 err = -ENOTCONN;
1813                 other = unix_peer(sk);
1814                 if (!other)
1815                         goto out_err;
1816         }
1817
1818         if (sk->sk_shutdown & SEND_SHUTDOWN)
1819                 goto pipe_err;
1820
1821         while (sent < len) {
1822                 size = len - sent;
1823
1824                 /* Keep two messages in the pipe so it schedules better */
1825                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1826
1827                 /* allow fallback to order-0 allocations */
1828                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1829
1830                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1831
1832                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1833
1834                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1835                                            msg->msg_flags & MSG_DONTWAIT, &err,
1836                                            get_order(UNIX_SKB_FRAGS_SZ));
1837                 if (!skb)
1838                         goto out_err;
1839
1840                 /* Only send the fds in the first buffer */
1841                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1842                 if (err < 0) {
1843                         kfree_skb(skb);
1844                         goto out_err;
1845                 }
1846                 fds_sent = true;
1847
1848                 skb_put(skb, size - data_len);
1849                 skb->data_len = data_len;
1850                 skb->len = size;
1851                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1852                 if (err) {
1853                         kfree_skb(skb);
1854                         goto out_err;
1855                 }
1856
1857                 unix_state_lock(other);
1858
1859                 if (sock_flag(other, SOCK_DEAD) ||
1860                     (other->sk_shutdown & RCV_SHUTDOWN))
1861                         goto pipe_err_free;
1862
1863                 maybe_add_creds(skb, sock, other);
1864                 skb_queue_tail(&other->sk_receive_queue, skb);
1865                 unix_state_unlock(other);
1866                 other->sk_data_ready(other);
1867                 sent += size;
1868         }
1869
1870         scm_destroy(&scm);
1871
1872         return sent;
1873
1874 pipe_err_free:
1875         unix_state_unlock(other);
1876         kfree_skb(skb);
1877 pipe_err:
1878         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1879                 send_sig(SIGPIPE, current, 0);
1880         err = -EPIPE;
1881 out_err:
1882         scm_destroy(&scm);
1883         return sent ? : err;
1884 }
1885
1886 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1887                                     int offset, size_t size, int flags)
1888 {
1889         int err;
1890         bool send_sigpipe = false;
1891         bool init_scm = true;
1892         struct scm_cookie scm;
1893         struct sock *other, *sk = socket->sk;
1894         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1895
1896         if (flags & MSG_OOB)
1897                 return -EOPNOTSUPP;
1898
1899         other = unix_peer(sk);
1900         if (!other || sk->sk_state != TCP_ESTABLISHED)
1901                 return -ENOTCONN;
1902
1903         if (false) {
1904 alloc_skb:
1905                 unix_state_unlock(other);
1906                 mutex_unlock(&unix_sk(other)->iolock);
1907                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1908                                               &err, 0);
1909                 if (!newskb)
1910                         goto err;
1911         }
1912
1913         /* we must acquire iolock as we modify already present
1914          * skbs in the sk_receive_queue and mess with skb->len
1915          */
1916         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1917         if (err) {
1918                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1919                 goto err;
1920         }
1921
1922         if (sk->sk_shutdown & SEND_SHUTDOWN) {
1923                 err = -EPIPE;
1924                 send_sigpipe = true;
1925                 goto err_unlock;
1926         }
1927
1928         unix_state_lock(other);
1929
1930         if (sock_flag(other, SOCK_DEAD) ||
1931             other->sk_shutdown & RCV_SHUTDOWN) {
1932                 err = -EPIPE;
1933                 send_sigpipe = true;
1934                 goto err_state_unlock;
1935         }
1936
1937         if (init_scm) {
1938                 err = maybe_init_creds(&scm, socket, other);
1939                 if (err)
1940                         goto err_state_unlock;
1941                 init_scm = false;
1942         }
1943
1944         skb = skb_peek_tail(&other->sk_receive_queue);
1945         if (tail && tail == skb) {
1946                 skb = newskb;
1947         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1948                 if (newskb) {
1949                         skb = newskb;
1950                 } else {
1951                         tail = skb;
1952                         goto alloc_skb;
1953                 }
1954         } else if (newskb) {
1955                 /* this is fast path, we don't necessarily need to
1956                  * call to kfree_skb even though with newskb == NULL
1957                  * this - does no harm
1958                  */
1959                 consume_skb(newskb);
1960                 newskb = NULL;
1961         }
1962
1963         if (skb_append_pagefrags(skb, page, offset, size)) {
1964                 tail = skb;
1965                 goto alloc_skb;
1966         }
1967
1968         skb->len += size;
1969         skb->data_len += size;
1970         skb->truesize += size;
1971         refcount_add(size, &sk->sk_wmem_alloc);
1972
1973         if (newskb) {
1974                 err = unix_scm_to_skb(&scm, skb, false);
1975                 if (err)
1976                         goto err_state_unlock;
1977                 spin_lock(&other->sk_receive_queue.lock);
1978                 __skb_queue_tail(&other->sk_receive_queue, newskb);
1979                 spin_unlock(&other->sk_receive_queue.lock);
1980         }
1981
1982         unix_state_unlock(other);
1983         mutex_unlock(&unix_sk(other)->iolock);
1984
1985         other->sk_data_ready(other);
1986         scm_destroy(&scm);
1987         return size;
1988
1989 err_state_unlock:
1990         unix_state_unlock(other);
1991 err_unlock:
1992         mutex_unlock(&unix_sk(other)->iolock);
1993 err:
1994         kfree_skb(newskb);
1995         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
1996                 send_sig(SIGPIPE, current, 0);
1997         if (!init_scm)
1998                 scm_destroy(&scm);
1999         return err;
2000 }
2001
2002 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2003                                   size_t len)
2004 {
2005         int err;
2006         struct sock *sk = sock->sk;
2007
2008         err = sock_error(sk);
2009         if (err)
2010                 return err;
2011
2012         if (sk->sk_state != TCP_ESTABLISHED)
2013                 return -ENOTCONN;
2014
2015         if (msg->msg_namelen)
2016                 msg->msg_namelen = 0;
2017
2018         return unix_dgram_sendmsg(sock, msg, len);
2019 }
2020
2021 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2022                                   size_t size, int flags)
2023 {
2024         struct sock *sk = sock->sk;
2025
2026         if (sk->sk_state != TCP_ESTABLISHED)
2027                 return -ENOTCONN;
2028
2029         return unix_dgram_recvmsg(sock, msg, size, flags);
2030 }
2031
2032 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2033 {
2034         struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2035
2036         if (addr) {
2037                 msg->msg_namelen = addr->len;
2038                 memcpy(msg->msg_name, addr->name, addr->len);
2039         }
2040 }
2041
2042 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2043                               size_t size, int flags)
2044 {
2045         struct scm_cookie scm;
2046         struct sock *sk = sock->sk;
2047         struct unix_sock *u = unix_sk(sk);
2048         struct sk_buff *skb, *last;
2049         long timeo;
2050         int skip;
2051         int err;
2052
2053         err = -EOPNOTSUPP;
2054         if (flags&MSG_OOB)
2055                 goto out;
2056
2057         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2058
2059         do {
2060                 mutex_lock(&u->iolock);
2061
2062                 skip = sk_peek_offset(sk, flags);
2063                 skb = __skb_try_recv_datagram(sk, flags, NULL, &skip, &err,
2064                                               &last);
2065                 if (skb)
2066                         break;
2067
2068                 mutex_unlock(&u->iolock);
2069
2070                 if (err != -EAGAIN)
2071                         break;
2072         } while (timeo &&
2073                  !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2074
2075         if (!skb) { /* implies iolock unlocked */
2076                 unix_state_lock(sk);
2077                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2078                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2079                     (sk->sk_shutdown & RCV_SHUTDOWN))
2080                         err = 0;
2081                 unix_state_unlock(sk);
2082                 goto out;
2083         }
2084
2085         if (wq_has_sleeper(&u->peer_wait))
2086                 wake_up_interruptible_sync_poll(&u->peer_wait,
2087                                                 EPOLLOUT | EPOLLWRNORM |
2088                                                 EPOLLWRBAND);
2089
2090         if (msg->msg_name)
2091                 unix_copy_addr(msg, skb->sk);
2092
2093         if (size > skb->len - skip)
2094                 size = skb->len - skip;
2095         else if (size < skb->len - skip)
2096                 msg->msg_flags |= MSG_TRUNC;
2097
2098         err = skb_copy_datagram_msg(skb, skip, msg, size);
2099         if (err)
2100                 goto out_free;
2101
2102         if (sock_flag(sk, SOCK_RCVTSTAMP))
2103                 __sock_recv_timestamp(msg, sk, skb);
2104
2105         memset(&scm, 0, sizeof(scm));
2106
2107         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2108         unix_set_secdata(&scm, skb);
2109
2110         if (!(flags & MSG_PEEK)) {
2111                 if (UNIXCB(skb).fp)
2112                         unix_detach_fds(&scm, skb);
2113
2114                 sk_peek_offset_bwd(sk, skb->len);
2115         } else {
2116                 /* It is questionable: on PEEK we could:
2117                    - do not return fds - good, but too simple 8)
2118                    - return fds, and do not return them on read (old strategy,
2119                      apparently wrong)
2120                    - clone fds (I chose it for now, it is the most universal
2121                      solution)
2122
2123                    POSIX 1003.1g does not actually define this clearly
2124                    at all. POSIX 1003.1g doesn't define a lot of things
2125                    clearly however!
2126
2127                 */
2128
2129                 sk_peek_offset_fwd(sk, size);
2130
2131                 if (UNIXCB(skb).fp)
2132                         scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2133         }
2134         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2135
2136         scm_recv(sock, msg, &scm, flags);
2137
2138 out_free:
2139         skb_free_datagram(sk, skb);
2140         mutex_unlock(&u->iolock);
2141 out:
2142         return err;
2143 }
2144
2145 /*
2146  *      Sleep until more data has arrived. But check for races..
2147  */
2148 static long unix_stream_data_wait(struct sock *sk, long timeo,
2149                                   struct sk_buff *last, unsigned int last_len,
2150                                   bool freezable)
2151 {
2152         struct sk_buff *tail;
2153         DEFINE_WAIT(wait);
2154
2155         unix_state_lock(sk);
2156
2157         for (;;) {
2158                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2159
2160                 tail = skb_peek_tail(&sk->sk_receive_queue);
2161                 if (tail != last ||
2162                     (tail && tail->len != last_len) ||
2163                     sk->sk_err ||
2164                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2165                     signal_pending(current) ||
2166                     !timeo)
2167                         break;
2168
2169                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2170                 unix_state_unlock(sk);
2171                 if (freezable)
2172                         timeo = freezable_schedule_timeout(timeo);
2173                 else
2174                         timeo = schedule_timeout(timeo);
2175                 unix_state_lock(sk);
2176
2177                 if (sock_flag(sk, SOCK_DEAD))
2178                         break;
2179
2180                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2181         }
2182
2183         finish_wait(sk_sleep(sk), &wait);
2184         unix_state_unlock(sk);
2185         return timeo;
2186 }
2187
2188 static unsigned int unix_skb_len(const struct sk_buff *skb)
2189 {
2190         return skb->len - UNIXCB(skb).consumed;
2191 }
2192
2193 struct unix_stream_read_state {
2194         int (*recv_actor)(struct sk_buff *, int, int,
2195                           struct unix_stream_read_state *);
2196         struct socket *socket;
2197         struct msghdr *msg;
2198         struct pipe_inode_info *pipe;
2199         size_t size;
2200         int flags;
2201         unsigned int splice_flags;
2202 };
2203
2204 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2205                                     bool freezable)
2206 {
2207         struct scm_cookie scm;
2208         struct socket *sock = state->socket;
2209         struct sock *sk = sock->sk;
2210         struct unix_sock *u = unix_sk(sk);
2211         int copied = 0;
2212         int flags = state->flags;
2213         int noblock = flags & MSG_DONTWAIT;
2214         bool check_creds = false;
2215         int target;
2216         int err = 0;
2217         long timeo;
2218         int skip;
2219         size_t size = state->size;
2220         unsigned int last_len;
2221
2222         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2223                 err = -EINVAL;
2224                 goto out;
2225         }
2226
2227         if (unlikely(flags & MSG_OOB)) {
2228                 err = -EOPNOTSUPP;
2229                 goto out;
2230         }
2231
2232         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2233         timeo = sock_rcvtimeo(sk, noblock);
2234
2235         memset(&scm, 0, sizeof(scm));
2236
2237         /* Lock the socket to prevent queue disordering
2238          * while sleeps in memcpy_tomsg
2239          */
2240         mutex_lock(&u->iolock);
2241
2242         skip = max(sk_peek_offset(sk, flags), 0);
2243
2244         do {
2245                 int chunk;
2246                 bool drop_skb;
2247                 struct sk_buff *skb, *last;
2248
2249 redo:
2250                 unix_state_lock(sk);
2251                 if (sock_flag(sk, SOCK_DEAD)) {
2252                         err = -ECONNRESET;
2253                         goto unlock;
2254                 }
2255                 last = skb = skb_peek(&sk->sk_receive_queue);
2256                 last_len = last ? last->len : 0;
2257 again:
2258                 if (skb == NULL) {
2259                         if (copied >= target)
2260                                 goto unlock;
2261
2262                         /*
2263                          *      POSIX 1003.1g mandates this order.
2264                          */
2265
2266                         err = sock_error(sk);
2267                         if (err)
2268                                 goto unlock;
2269                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2270                                 goto unlock;
2271
2272                         unix_state_unlock(sk);
2273                         if (!timeo) {
2274                                 err = -EAGAIN;
2275                                 break;
2276                         }
2277
2278                         mutex_unlock(&u->iolock);
2279
2280                         timeo = unix_stream_data_wait(sk, timeo, last,
2281                                                       last_len, freezable);
2282
2283                         if (signal_pending(current)) {
2284                                 err = sock_intr_errno(timeo);
2285                                 scm_destroy(&scm);
2286                                 goto out;
2287                         }
2288
2289                         mutex_lock(&u->iolock);
2290                         goto redo;
2291 unlock:
2292                         unix_state_unlock(sk);
2293                         break;
2294                 }
2295
2296                 while (skip >= unix_skb_len(skb)) {
2297                         skip -= unix_skb_len(skb);
2298                         last = skb;
2299                         last_len = skb->len;
2300                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2301                         if (!skb)
2302                                 goto again;
2303                 }
2304
2305                 unix_state_unlock(sk);
2306
2307                 if (check_creds) {
2308                         /* Never glue messages from different writers */
2309                         if (!unix_skb_scm_eq(skb, &scm))
2310                                 break;
2311                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2312                         /* Copy credentials */
2313                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2314                         unix_set_secdata(&scm, skb);
2315                         check_creds = true;
2316                 }
2317
2318                 /* Copy address just once */
2319                 if (state->msg && state->msg->msg_name) {
2320                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2321                                          state->msg->msg_name);
2322                         unix_copy_addr(state->msg, skb->sk);
2323                         sunaddr = NULL;
2324                 }
2325
2326                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2327                 skb_get(skb);
2328                 chunk = state->recv_actor(skb, skip, chunk, state);
2329                 drop_skb = !unix_skb_len(skb);
2330                 /* skb is only safe to use if !drop_skb */
2331                 consume_skb(skb);
2332                 if (chunk < 0) {
2333                         if (copied == 0)
2334                                 copied = -EFAULT;
2335                         break;
2336                 }
2337                 copied += chunk;
2338                 size -= chunk;
2339
2340                 if (drop_skb) {
2341                         /* the skb was touched by a concurrent reader;
2342                          * we should not expect anything from this skb
2343                          * anymore and assume it invalid - we can be
2344                          * sure it was dropped from the socket queue
2345                          *
2346                          * let's report a short read
2347                          */
2348                         err = 0;
2349                         break;
2350                 }
2351
2352                 /* Mark read part of skb as used */
2353                 if (!(flags & MSG_PEEK)) {
2354                         UNIXCB(skb).consumed += chunk;
2355
2356                         sk_peek_offset_bwd(sk, chunk);
2357
2358                         if (UNIXCB(skb).fp)
2359                                 unix_detach_fds(&scm, skb);
2360
2361                         if (unix_skb_len(skb))
2362                                 break;
2363
2364                         skb_unlink(skb, &sk->sk_receive_queue);
2365                         consume_skb(skb);
2366
2367                         if (scm.fp)
2368                                 break;
2369                 } else {
2370                         /* It is questionable, see note in unix_dgram_recvmsg.
2371                          */
2372                         if (UNIXCB(skb).fp)
2373                                 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2374
2375                         sk_peek_offset_fwd(sk, chunk);
2376
2377                         if (UNIXCB(skb).fp)
2378                                 break;
2379
2380                         skip = 0;
2381                         last = skb;
2382                         last_len = skb->len;
2383                         unix_state_lock(sk);
2384                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2385                         if (skb)
2386                                 goto again;
2387                         unix_state_unlock(sk);
2388                         break;
2389                 }
2390         } while (size);
2391
2392         mutex_unlock(&u->iolock);
2393         if (state->msg)
2394                 scm_recv(sock, state->msg, &scm, flags);
2395         else
2396                 scm_destroy(&scm);
2397 out:
2398         return copied ? : err;
2399 }
2400
2401 static int unix_stream_read_actor(struct sk_buff *skb,
2402                                   int skip, int chunk,
2403                                   struct unix_stream_read_state *state)
2404 {
2405         int ret;
2406
2407         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2408                                     state->msg, chunk);
2409         return ret ?: chunk;
2410 }
2411
2412 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2413                                size_t size, int flags)
2414 {
2415         struct unix_stream_read_state state = {
2416                 .recv_actor = unix_stream_read_actor,
2417                 .socket = sock,
2418                 .msg = msg,
2419                 .size = size,
2420                 .flags = flags
2421         };
2422
2423         return unix_stream_read_generic(&state, true);
2424 }
2425
2426 static int unix_stream_splice_actor(struct sk_buff *skb,
2427                                     int skip, int chunk,
2428                                     struct unix_stream_read_state *state)
2429 {
2430         return skb_splice_bits(skb, state->socket->sk,
2431                                UNIXCB(skb).consumed + skip,
2432                                state->pipe, chunk, state->splice_flags);
2433 }
2434
2435 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2436                                        struct pipe_inode_info *pipe,
2437                                        size_t size, unsigned int flags)
2438 {
2439         struct unix_stream_read_state state = {
2440                 .recv_actor = unix_stream_splice_actor,
2441                 .socket = sock,
2442                 .pipe = pipe,
2443                 .size = size,
2444                 .splice_flags = flags,
2445         };
2446
2447         if (unlikely(*ppos))
2448                 return -ESPIPE;
2449
2450         if (sock->file->f_flags & O_NONBLOCK ||
2451             flags & SPLICE_F_NONBLOCK)
2452                 state.flags = MSG_DONTWAIT;
2453
2454         return unix_stream_read_generic(&state, false);
2455 }
2456
2457 static int unix_shutdown(struct socket *sock, int mode)
2458 {
2459         struct sock *sk = sock->sk;
2460         struct sock *other;
2461
2462         if (mode < SHUT_RD || mode > SHUT_RDWR)
2463                 return -EINVAL;
2464         /* This maps:
2465          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2466          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2467          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2468          */
2469         ++mode;
2470
2471         unix_state_lock(sk);
2472         sk->sk_shutdown |= mode;
2473         other = unix_peer(sk);
2474         if (other)
2475                 sock_hold(other);
2476         unix_state_unlock(sk);
2477         sk->sk_state_change(sk);
2478
2479         if (other &&
2480                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2481
2482                 int peer_mode = 0;
2483
2484                 if (mode&RCV_SHUTDOWN)
2485                         peer_mode |= SEND_SHUTDOWN;
2486                 if (mode&SEND_SHUTDOWN)
2487                         peer_mode |= RCV_SHUTDOWN;
2488                 unix_state_lock(other);
2489                 other->sk_shutdown |= peer_mode;
2490                 unix_state_unlock(other);
2491                 other->sk_state_change(other);
2492                 if (peer_mode == SHUTDOWN_MASK)
2493                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2494                 else if (peer_mode & RCV_SHUTDOWN)
2495                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2496         }
2497         if (other)
2498                 sock_put(other);
2499
2500         return 0;
2501 }
2502
2503 long unix_inq_len(struct sock *sk)
2504 {
2505         struct sk_buff *skb;
2506         long amount = 0;
2507
2508         if (sk->sk_state == TCP_LISTEN)
2509                 return -EINVAL;
2510
2511         spin_lock(&sk->sk_receive_queue.lock);
2512         if (sk->sk_type == SOCK_STREAM ||
2513             sk->sk_type == SOCK_SEQPACKET) {
2514                 skb_queue_walk(&sk->sk_receive_queue, skb)
2515                         amount += unix_skb_len(skb);
2516         } else {
2517                 skb = skb_peek(&sk->sk_receive_queue);
2518                 if (skb)
2519                         amount = skb->len;
2520         }
2521         spin_unlock(&sk->sk_receive_queue.lock);
2522
2523         return amount;
2524 }
2525 EXPORT_SYMBOL_GPL(unix_inq_len);
2526
2527 long unix_outq_len(struct sock *sk)
2528 {
2529         return sk_wmem_alloc_get(sk);
2530 }
2531 EXPORT_SYMBOL_GPL(unix_outq_len);
2532
2533 static int unix_open_file(struct sock *sk)
2534 {
2535         struct path path;
2536         struct file *f;
2537         int fd;
2538
2539         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2540                 return -EPERM;
2541
2542         if (!smp_load_acquire(&unix_sk(sk)->addr))
2543                 return -ENOENT;
2544
2545         path = unix_sk(sk)->path;
2546         if (!path.dentry)
2547                 return -ENOENT;
2548
2549         path_get(&path);
2550
2551         fd = get_unused_fd_flags(O_CLOEXEC);
2552         if (fd < 0)
2553                 goto out;
2554
2555         f = dentry_open(&path, O_PATH, current_cred());
2556         if (IS_ERR(f)) {
2557                 put_unused_fd(fd);
2558                 fd = PTR_ERR(f);
2559                 goto out;
2560         }
2561
2562         fd_install(fd, f);
2563 out:
2564         path_put(&path);
2565
2566         return fd;
2567 }
2568
2569 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2570 {
2571         struct sock *sk = sock->sk;
2572         long amount = 0;
2573         int err;
2574
2575         switch (cmd) {
2576         case SIOCOUTQ:
2577                 amount = unix_outq_len(sk);
2578                 err = put_user(amount, (int __user *)arg);
2579                 break;
2580         case SIOCINQ:
2581                 amount = unix_inq_len(sk);
2582                 if (amount < 0)
2583                         err = amount;
2584                 else
2585                         err = put_user(amount, (int __user *)arg);
2586                 break;
2587         case SIOCUNIXFILE:
2588                 err = unix_open_file(sk);
2589                 break;
2590         default:
2591                 err = -ENOIOCTLCMD;
2592                 break;
2593         }
2594         return err;
2595 }
2596
2597 #ifdef CONFIG_COMPAT
2598 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2599 {
2600         return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
2601 }
2602 #endif
2603
2604 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2605 {
2606         struct sock *sk = sock->sk;
2607         __poll_t mask;
2608
2609         sock_poll_wait(file, sock, wait);
2610         mask = 0;
2611
2612         /* exceptional events? */
2613         if (sk->sk_err)
2614                 mask |= EPOLLERR;
2615         if (sk->sk_shutdown == SHUTDOWN_MASK)
2616                 mask |= EPOLLHUP;
2617         if (sk->sk_shutdown & RCV_SHUTDOWN)
2618                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2619
2620         /* readable? */
2621         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2622                 mask |= EPOLLIN | EPOLLRDNORM;
2623
2624         /* Connection-based need to check for termination and startup */
2625         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2626             sk->sk_state == TCP_CLOSE)
2627                 mask |= EPOLLHUP;
2628
2629         /*
2630          * we set writable also when the other side has shut down the
2631          * connection. This prevents stuck sockets.
2632          */
2633         if (unix_writable(sk))
2634                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2635
2636         return mask;
2637 }
2638
2639 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2640                                     poll_table *wait)
2641 {
2642         struct sock *sk = sock->sk, *other;
2643         unsigned int writable;
2644         __poll_t mask;
2645
2646         sock_poll_wait(file, sock, wait);
2647         mask = 0;
2648
2649         /* exceptional events? */
2650         if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
2651                 mask |= EPOLLERR |
2652                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2653
2654         if (sk->sk_shutdown & RCV_SHUTDOWN)
2655                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2656         if (sk->sk_shutdown == SHUTDOWN_MASK)
2657                 mask |= EPOLLHUP;
2658
2659         /* readable? */
2660         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2661                 mask |= EPOLLIN | EPOLLRDNORM;
2662
2663         /* Connection-based need to check for termination and startup */
2664         if (sk->sk_type == SOCK_SEQPACKET) {
2665                 if (sk->sk_state == TCP_CLOSE)
2666                         mask |= EPOLLHUP;
2667                 /* connection hasn't started yet? */
2668                 if (sk->sk_state == TCP_SYN_SENT)
2669                         return mask;
2670         }
2671
2672         /* No write status requested, avoid expensive OUT tests. */
2673         if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2674                 return mask;
2675
2676         writable = unix_writable(sk);
2677         if (writable) {
2678                 unix_state_lock(sk);
2679
2680                 other = unix_peer(sk);
2681                 if (other && unix_peer(other) != sk &&
2682                     unix_recvq_full(other) &&
2683                     unix_dgram_peer_wake_me(sk, other))
2684                         writable = 0;
2685
2686                 unix_state_unlock(sk);
2687         }
2688
2689         if (writable)
2690                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2691         else
2692                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2693
2694         return mask;
2695 }
2696
2697 #ifdef CONFIG_PROC_FS
2698
2699 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2700
2701 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2702 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2703 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2704
2705 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2706 {
2707         unsigned long offset = get_offset(*pos);
2708         unsigned long bucket = get_bucket(*pos);
2709         struct sock *sk;
2710         unsigned long count = 0;
2711
2712         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2713                 if (sock_net(sk) != seq_file_net(seq))
2714                         continue;
2715                 if (++count == offset)
2716                         break;
2717         }
2718
2719         return sk;
2720 }
2721
2722 static struct sock *unix_next_socket(struct seq_file *seq,
2723                                      struct sock *sk,
2724                                      loff_t *pos)
2725 {
2726         unsigned long bucket;
2727
2728         while (sk > (struct sock *)SEQ_START_TOKEN) {
2729                 sk = sk_next(sk);
2730                 if (!sk)
2731                         goto next_bucket;
2732                 if (sock_net(sk) == seq_file_net(seq))
2733                         return sk;
2734         }
2735
2736         do {
2737                 sk = unix_from_bucket(seq, pos);
2738                 if (sk)
2739                         return sk;
2740
2741 next_bucket:
2742                 bucket = get_bucket(*pos) + 1;
2743                 *pos = set_bucket_offset(bucket, 1);
2744         } while (bucket < ARRAY_SIZE(unix_socket_table));
2745
2746         return NULL;
2747 }
2748
2749 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2750         __acquires(unix_table_lock)
2751 {
2752         spin_lock(&unix_table_lock);
2753
2754         if (!*pos)
2755                 return SEQ_START_TOKEN;
2756
2757         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2758                 return NULL;
2759
2760         return unix_next_socket(seq, NULL, pos);
2761 }
2762
2763 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2764 {
2765         ++*pos;
2766         return unix_next_socket(seq, v, pos);
2767 }
2768
2769 static void unix_seq_stop(struct seq_file *seq, void *v)
2770         __releases(unix_table_lock)
2771 {
2772         spin_unlock(&unix_table_lock);
2773 }
2774
2775 static int unix_seq_show(struct seq_file *seq, void *v)
2776 {
2777
2778         if (v == SEQ_START_TOKEN)
2779                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2780                          "Inode Path\n");
2781         else {
2782                 struct sock *s = v;
2783                 struct unix_sock *u = unix_sk(s);
2784                 unix_state_lock(s);
2785
2786                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2787                         s,
2788                         refcount_read(&s->sk_refcnt),
2789                         0,
2790                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2791                         s->sk_type,
2792                         s->sk_socket ?
2793                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2794                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2795                         sock_i_ino(s));
2796
2797                 if (u->addr) {  // under unix_table_lock here
2798                         int i, len;
2799                         seq_putc(seq, ' ');
2800
2801                         i = 0;
2802                         len = u->addr->len - sizeof(short);
2803                         if (!UNIX_ABSTRACT(s))
2804                                 len--;
2805                         else {
2806                                 seq_putc(seq, '@');
2807                                 i++;
2808                         }
2809                         for ( ; i < len; i++)
2810                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
2811                                          '@');
2812                 }
2813                 unix_state_unlock(s);
2814                 seq_putc(seq, '\n');
2815         }
2816
2817         return 0;
2818 }
2819
2820 static const struct seq_operations unix_seq_ops = {
2821         .start  = unix_seq_start,
2822         .next   = unix_seq_next,
2823         .stop   = unix_seq_stop,
2824         .show   = unix_seq_show,
2825 };
2826 #endif
2827
2828 static const struct net_proto_family unix_family_ops = {
2829         .family = PF_UNIX,
2830         .create = unix_create,
2831         .owner  = THIS_MODULE,
2832 };
2833
2834
2835 static int __net_init unix_net_init(struct net *net)
2836 {
2837         int error = -ENOMEM;
2838
2839         net->unx.sysctl_max_dgram_qlen = 10;
2840         if (unix_sysctl_register(net))
2841                 goto out;
2842
2843 #ifdef CONFIG_PROC_FS
2844         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2845                         sizeof(struct seq_net_private))) {
2846                 unix_sysctl_unregister(net);
2847                 goto out;
2848         }
2849 #endif
2850         error = 0;
2851 out:
2852         return error;
2853 }
2854
2855 static void __net_exit unix_net_exit(struct net *net)
2856 {
2857         unix_sysctl_unregister(net);
2858         remove_proc_entry("unix", net->proc_net);
2859 }
2860
2861 static struct pernet_operations unix_net_ops = {
2862         .init = unix_net_init,
2863         .exit = unix_net_exit,
2864 };
2865
2866 static int __init af_unix_init(void)
2867 {
2868         int rc = -1;
2869
2870         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2871
2872         rc = proto_register(&unix_proto, 1);
2873         if (rc != 0) {
2874                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2875                 goto out;
2876         }
2877
2878         sock_register(&unix_family_ops);
2879         register_pernet_subsys(&unix_net_ops);
2880 out:
2881         return rc;
2882 }
2883
2884 static void __exit af_unix_exit(void)
2885 {
2886         sock_unregister(PF_UNIX);
2887         proto_unregister(&unix_proto);
2888         unregister_pernet_subsys(&unix_net_ops);
2889 }
2890
2891 /* Earlier than device_initcall() so that other drivers invoking
2892    request_module() don't end up in a loop when modprobe tries
2893    to use a UNIX socket. But later than subsys_initcall() because
2894    we depend on stuff initialised there */
2895 fs_initcall(af_unix_init);
2896 module_exit(af_unix_exit);
2897
2898 MODULE_LICENSE("GPL");
2899 MODULE_ALIAS_NETPROTO(PF_UNIX);