net/unix/af_unix.c

   1 /*
   2  * NET4:        Implementation of BSD Unix domain sockets.
   3  *
   4  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5  *
   6  *              This program is free software; you can redistribute it and/or
   7  *              modify it under the terms of the GNU General Public License
   8  *              as published by the Free Software Foundation; either version
   9  *              2 of the License, or (at your option) any later version.
  10  *
  11  * Fixes:
  12  *              Linus Torvalds  :       Assorted bug cures.
  13  *              Niibe Yutaka    :       async I/O support.
  14  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  15  *              Alan Cox        :       Limit size of allocated blocks.
  16  *              Alan Cox        :       Fixed the stupid socketpair bug.
  17  *              Alan Cox        :       BSD compatibility fine tuning.
  18  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  19  *              Alan Cox        :       Sorted out a proper draft version of
  20  *                                      file descriptor passing hacked up from
  21  *                                      Mike Shaver's work.
  22  *              Marty Leisner   :       Fixes to fd passing
  23  *              Nick Nevin      :       recvmsg bugfix.
  24  *              Alan Cox        :       Started proper garbage collector
  25  *              Heiko EiBfeldt  :       Missing verify_area check
  26  *              Alan Cox        :       Started POSIXisms
  27  *              Andreas Schwab  :       Replace inode by dentry for proper
  28  *                                      reference counting
  29  *              Kirk Petersen   :       Made this a module
  30  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  31  *                                      Lots of bug fixes.
  32  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  33  *                                      by above two patches.
  34  *           Andrea Arcangeli   :       If possible we block in connect(2)
  35  *                                      if the max backlog of the listen socket
  36  *                                      is been reached. This won't break
  37  *                                      old apps and it will avoid huge amount
  38  *                                      of socks hashed (this for unix_gc()
  39  *                                      performances reasons).
  40  *                                      Security fix that limits the max
  41  *                                      number of socks to 2*max_files and
  42  *                                      the number of skb queueable in the
  43  *                                      dgram receiver.
  44  *              Artur Skawina   :       Hash function optimizations
  45  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  46  *            Malcolm Beattie   :       Set peercred for socketpair
  47  *           Michal Ostrowski   :       Module initialization cleanup.
  48  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  49  *                                      the core infrastructure is doing that
  50  *                                      for all net proto families now (2.5.69+)
  51  *
  52  *
  53  * Known differences from reference BSD that was tested:
  54  *
  55  *      [TO FIX]
  56  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  57  *              other the moment one end closes.
  58  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  59  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  60  *      [NOT TO FIX]
  61  *      accept() returns a path name even if the connecting socket has closed
  62  *              in the meantime (BSD loses the path and gives up).
  63  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  64  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  65  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  66  *      BSD af_unix apparently has connect forgetting to block properly.
  67  *              (need to check this with the POSIX spec in detail)
  68  *
  69  * Differences from 2.0.0-11-... (ANK)
  70  *      Bug fixes and improvements.
  71  *              - client shutdown killed server socket.
  72  *              - removed all useless cli/sti pairs.
  73  *
  74  *      Semantic changes/extensions.
  75  *              - generic control message passing.
  76  *              - SCM_CREDENTIALS control message.
  77  *              - "Abstract" (not FS based) socket bindings.
  78  *                Abstract names are sequences of bytes (not zero terminated)
  79  *                started by 0, so that this name space does not intersect
  80  *                with BSD names.
  81  */
  82
  83 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  84
  85 #include <linux/module.h>
  86 #include <linux/kernel.h>
  87 #include <linux/signal.h>
  88 #include <linux/sched/signal.h>
  89 #include <linux/errno.h>
  90 #include <linux/string.h>
  91 #include <linux/stat.h>
  92 #include <linux/dcache.h>
  93 #include <linux/namei.h>
  94 #include <linux/socket.h>
  95 #include <linux/un.h>
  96 #include <linux/fcntl.h>
  97 #include <linux/termios.h>
  98 #include <linux/sockios.h>
  99 #include <linux/net.h>
 100 #include <linux/in.h>
 101 #include <linux/fs.h>
 102 #include <linux/slab.h>
 103 #include <linux/uaccess.h>
 104 #include <linux/skbuff.h>
 105 #include <linux/netdevice.h>
 106 #include <net/net_namespace.h>
 107 #include <net/sock.h>
 108 #include <net/tcp_states.h>
 109 #include <net/af_unix.h>
 110 #include <linux/proc_fs.h>
 111 #include <linux/seq_file.h>
 112 #include <net/scm.h>
 113 #include <linux/init.h>
 114 #include <linux/poll.h>
 115 #include <linux/rtnetlink.h>
 116 #include <linux/mount.h>
 117 #include <net/checksum.h>
 118 #include <linux/security.h>
 119 #include <linux/freezer.h>
 120 #include <linux/file.h>
 121
 122 #include "scm.h"
 123
 124 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 125 EXPORT_SYMBOL_GPL(unix_socket_table);
 126 DEFINE_SPINLOCK(unix_table_lock);
 127 EXPORT_SYMBOL_GPL(unix_table_lock);
 128 static atomic_long_t unix_nr_socks;
 129
 130
 131 static struct hlist_head *unix_sockets_unbound(void *addr)
 132 {
 133         unsigned long hash = (unsigned long)addr;
 134
 135         hash ^= hash >> 16;
 136         hash ^= hash >> 8;
 137         hash %= UNIX_HASH_SIZE;
 138         return &unix_socket_table[UNIX_HASH_SIZE + hash];
 139 }
 140
 141 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 142
 143 #ifdef CONFIG_SECURITY_NETWORK
 144 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 145 {
 146         UNIXCB(skb).secid = scm->secid;
 147 }
 148
 149 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 150 {
 151         scm->secid = UNIXCB(skb).secid;
 152 }
 153
 154 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 155 {
 156         return (scm->secid == UNIXCB(skb).secid);
 157 }
 158 #else
 159 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 160 { }
 161
 162 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 163 { }
 164
 165 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 166 {
 167         return true;
 168 }
 169 #endif /* CONFIG_SECURITY_NETWORK */
 170
 171 /*
 172  *  SMP locking strategy:
 173  *    hash table is protected with spinlock unix_table_lock
 174  *    each socket state is protected by separate spin lock.
 175  */
 176
 177 static inline unsigned int unix_hash_fold(__wsum n)
 178 {
 179         unsigned int hash = (__force unsigned int)csum_fold(n);
 180
 181         hash ^= hash>>8;
 182         return hash&(UNIX_HASH_SIZE-1);
 183 }
 184
 185 #define unix_peer(sk) (unix_sk(sk)->peer)
 186
 187 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 188 {
 189         return unix_peer(osk) == sk;
 190 }
 191
 192 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 193 {
 194         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 195 }
 196
 197 static inline int unix_recvq_full(struct sock const *sk)
 198 {
 199         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 200 }
 201
 202 struct sock *unix_peer_get(struct sock *s)
 203 {
 204         struct sock *peer;
 205
 206         unix_state_lock(s);
 207         peer = unix_peer(s);
 208         if (peer)
 209                 sock_hold(peer);
 210         unix_state_unlock(s);
 211         return peer;
 212 }
 213 EXPORT_SYMBOL_GPL(unix_peer_get);
 214
 215 static inline void unix_release_addr(struct unix_address *addr)
 216 {
 217         if (refcount_dec_and_test(&addr->refcnt))
 218                 kfree(addr);
 219 }
 220
 221 /*
 222  *      Check unix socket name:
 223  *              - should be not zero length.
 224  *              - if started by not zero, should be NULL terminated (FS object)
 225  *              - if started by zero, it is abstract name.
 226  */
 227
 228 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 229 {
 230         *hashp = 0;
 231
 232         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 233                 return -EINVAL;
 234         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 235                 return -EINVAL;
 236         if (sunaddr->sun_path[0]) {
 237                 /*
 238                  * This may look like an off by one error but it is a bit more
 239                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 240                  * sun_path[108] doesn't as such exist.  However in kernel space
 241                  * we are guaranteed that it is a valid memory location in our
 242                  * kernel address buffer.
 243                  */
 244                 ((char *)sunaddr)[len] = 0;
 245                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 246                 return len;
 247         }
 248
 249         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 250         return len;
 251 }
 252
 253 static void __unix_remove_socket(struct sock *sk)
 254 {
 255         sk_del_node_init(sk);
 256 }
 257
 258 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 259 {
 260         WARN_ON(!sk_unhashed(sk));
 261         sk_add_node(sk, list);
 262 }
 263
 264 static inline void unix_remove_socket(struct sock *sk)
 265 {
 266         spin_lock(&unix_table_lock);
 267         __unix_remove_socket(sk);
 268         spin_unlock(&unix_table_lock);
 269 }
 270
 271 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 272 {
 273         spin_lock(&unix_table_lock);
 274         __unix_insert_socket(list, sk);
 275         spin_unlock(&unix_table_lock);
 276 }
 277
 278 static struct sock *__unix_find_socket_byname(struct net *net,
 279                                               struct sockaddr_un *sunname,
 280                                               int len, int type, unsigned int hash)
 281 {
 282         struct sock *s;
 283
 284         sk_for_each(s, &unix_socket_table[hash ^ type]) {
 285                 struct unix_sock *u = unix_sk(s);
 286
 287                 if (!net_eq(sock_net(s), net))
 288                         continue;
 289
 290                 if (u->addr->len == len &&
 291                     !memcmp(u->addr->name, sunname, len))
 292                         goto found;
 293         }
 294         s = NULL;
 295 found:
 296         return s;
 297 }
 298
 299 static inline struct sock *unix_find_socket_byname(struct net *net,
 300                                                    struct sockaddr_un *sunname,
 301                                                    int len, int type,
 302                                                    unsigned int hash)
 303 {
 304         struct sock *s;
 305
 306         spin_lock(&unix_table_lock);
 307         s = __unix_find_socket_byname(net, sunname, len, type, hash);
 308         if (s)
 309                 sock_hold(s);
 310         spin_unlock(&unix_table_lock);
 311         return s;
 312 }
 313
 314 static struct sock *unix_find_socket_byinode(struct inode *i)
 315 {
 316         struct sock *s;
 317
 318         spin_lock(&unix_table_lock);
 319         sk_for_each(s,
 320                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 321                 struct dentry *dentry = unix_sk(s)->path.dentry;
 322
 323                 if (dentry && d_backing_inode(dentry) == i) {
 324                         sock_hold(s);
 325                         goto found;
 326                 }
 327         }
 328         s = NULL;
 329 found:
 330         spin_unlock(&unix_table_lock);
 331         return s;
 332 }
 333
 334 /* Support code for asymmetrically connected dgram sockets
 335  *
 336  * If a datagram socket is connected to a socket not itself connected
 337  * to the first socket (eg, /dev/log), clients may only enqueue more
 338  * messages if the present receive queue of the server socket is not
 339  * "too large". This means there's a second writeability condition
 340  * poll and sendmsg need to test. The dgram recv code will do a wake
 341  * up on the peer_wait wait queue of a socket upon reception of a
 342  * datagram which needs to be propagated to sleeping would-be writers
 343  * since these might not have sent anything so far. This can't be
 344  * accomplished via poll_wait because the lifetime of the server
 345  * socket might be less than that of its clients if these break their
 346  * association with it or if the server socket is closed while clients
 347  * are still connected to it and there's no way to inform "a polling
 348  * implementation" that it should let go of a certain wait queue
 349  *
 350  * In order to propagate a wake up, a wait_queue_entry_t of the client
 351  * socket is enqueued on the peer_wait queue of the server socket
 352  * whose wake function does a wake_up on the ordinary client socket
 353  * wait queue. This connection is established whenever a write (or
 354  * poll for write) hit the flow control condition and broken when the
 355  * association to the server socket is dissolved or after a wake up
 356  * was relayed.
 357  */
 358
 359 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 360                                       void *key)
 361 {
 362         struct unix_sock *u;
 363         wait_queue_head_t *u_sleep;
 364
 365         u = container_of(q, struct unix_sock, peer_wake);
 366
 367         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 368                             q);
 369         u->peer_wake.private = NULL;
 370
 371         /* relaying can only happen while the wq still exists */
 372         u_sleep = sk_sleep(&u->sk);
 373         if (u_sleep)
 374                 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
 375
 376         return 0;
 377 }
 378
 379 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 380 {
 381         struct unix_sock *u, *u_other;
 382         int rc;
 383
 384         u = unix_sk(sk);
 385         u_other = unix_sk(other);
 386         rc = 0;
 387         spin_lock(&u_other->peer_wait.lock);
 388
 389         if (!u->peer_wake.private) {
 390                 u->peer_wake.private = other;
 391                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 392
 393                 rc = 1;
 394         }
 395
 396         spin_unlock(&u_other->peer_wait.lock);
 397         return rc;
 398 }
 399
 400 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 401                                             struct sock *other)
 402 {
 403         struct unix_sock *u, *u_other;
 404
 405         u = unix_sk(sk);
 406         u_other = unix_sk(other);
 407         spin_lock(&u_other->peer_wait.lock);
 408
 409         if (u->peer_wake.private == other) {
 410                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 411                 u->peer_wake.private = NULL;
 412         }
 413
 414         spin_unlock(&u_other->peer_wait.lock);
 415 }
 416
 417 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 418                                                    struct sock *other)
 419 {
 420         unix_dgram_peer_wake_disconnect(sk, other);
 421         wake_up_interruptible_poll(sk_sleep(sk),
 422                                    EPOLLOUT |
 423                                    EPOLLWRNORM |
 424                                    EPOLLWRBAND);
 425 }
 426
 427 /* preconditions:
 428  *      - unix_peer(sk) == other
 429  *      - association is stable
 430  */
 431 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 432 {
 433         int connected;
 434
 435         connected = unix_dgram_peer_wake_connect(sk, other);
 436
 437         /* If other is SOCK_DEAD, we want to make sure we signal
 438          * POLLOUT, such that a subsequent write() can get a
 439          * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
 440          * to other and its full, we will hang waiting for POLLOUT.
 441          */
 442         if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
 443                 return 1;
 444
 445         if (connected)
 446                 unix_dgram_peer_wake_disconnect(sk, other);
 447
 448         return 0;
 449 }
 450
 451 static int unix_writable(const struct sock *sk)
 452 {
 453         return sk->sk_state != TCP_LISTEN &&
 454                (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 455 }
 456
 457 static void unix_write_space(struct sock *sk)
 458 {
 459         struct socket_wq *wq;
 460
 461         rcu_read_lock();
 462         if (unix_writable(sk)) {
 463                 wq = rcu_dereference(sk->sk_wq);
 464                 if (skwq_has_sleeper(wq))
 465                         wake_up_interruptible_sync_poll(&wq->wait,
 466                                 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
 467                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 468         }
 469         rcu_read_unlock();
 470 }
 471
 472 /* When dgram socket disconnects (or changes its peer), we clear its receive
 473  * queue of packets arrived from previous peer. First, it allows to do
 474  * flow control based only on wmem_alloc; second, sk connected to peer
 475  * may receive messages only from that peer. */
 476 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 477 {
 478         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 479                 skb_queue_purge(&sk->sk_receive_queue);
 480                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 481
 482                 /* If one link of bidirectional dgram pipe is disconnected,
 483                  * we signal error. Messages are lost. Do not make this,
 484                  * when peer was not connected to us.
 485                  */
 486                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 487                         other->sk_err = ECONNRESET;
 488                         other->sk_error_report(other);
 489                 }
 490         }
 491 }
 492
 493 static void unix_sock_destructor(struct sock *sk)
 494 {
 495         struct unix_sock *u = unix_sk(sk);
 496
 497         skb_queue_purge(&sk->sk_receive_queue);
 498
 499         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 500         WARN_ON(!sk_unhashed(sk));
 501         WARN_ON(sk->sk_socket);
 502         if (!sock_flag(sk, SOCK_DEAD)) {
 503                 pr_info("Attempt to release alive unix socket: %p\n", sk);
 504                 return;
 505         }
 506
 507         if (u->addr)
 508                 unix_release_addr(u->addr);
 509
 510         atomic_long_dec(&unix_nr_socks);
 511         local_bh_disable();
 512         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 513         local_bh_enable();
 514 #ifdef UNIX_REFCNT_DEBUG
 515         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 516                 atomic_long_read(&unix_nr_socks));
 517 #endif
 518 }
 519
 520 static void unix_release_sock(struct sock *sk, int embrion)
 521 {
 522         struct unix_sock *u = unix_sk(sk);
 523         struct path path;
 524         struct sock *skpair;
 525         struct sk_buff *skb;
 526         int state;
 527
 528         unix_remove_socket(sk);
 529
 530         /* Clear state */
 531         unix_state_lock(sk);
 532         sock_orphan(sk);
 533         sk->sk_shutdown = SHUTDOWN_MASK;
 534         path         = u->path;
 535         u->path.dentry = NULL;
 536         u->path.mnt = NULL;
 537         state = sk->sk_state;
 538         sk->sk_state = TCP_CLOSE;
 539         unix_state_unlock(sk);
 540
 541         wake_up_interruptible_all(&u->peer_wait);
 542
 543         skpair = unix_peer(sk);
 544
 545         if (skpair != NULL) {
 546                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 547                         unix_state_lock(skpair);
 548                         /* No more writes */
 549                         skpair->sk_shutdown = SHUTDOWN_MASK;
 550                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 551                                 skpair->sk_err = ECONNRESET;
 552                         unix_state_unlock(skpair);
 553                         skpair->sk_state_change(skpair);
 554                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 555                 }
 556
 557                 unix_dgram_peer_wake_disconnect(sk, skpair);
 558                 sock_put(skpair); /* It may now die */
 559                 unix_peer(sk) = NULL;
 560         }
 561
 562         /* Try to flush out this socket. Throw out buffers at least */
 563
 564         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 565                 if (state == TCP_LISTEN)
 566                         unix_release_sock(skb->sk, 1);
 567                 /* passed fds are erased in the kfree_skb hook        */
 568                 UNIXCB(skb).consumed = skb->len;
 569                 kfree_skb(skb);
 570         }
 571
 572         if (path.dentry)
 573                 path_put(&path);
 574
 575         sock_put(sk);
 576
 577         /* ---- Socket is dead now and most probably destroyed ---- */
 578
 579         /*
 580          * Fixme: BSD difference: In BSD all sockets connected to us get
 581          *        ECONNRESET and we die on the spot. In Linux we behave
 582          *        like files and pipes do and wait for the last
 583          *        dereference.
 584          *
 585          * Can't we simply set sock->err?
 586          *
 587          *        What the above comment does talk about? --ANK(980817)
 588          */
 589
 590         if (unix_tot_inflight)
 591                 unix_gc();              /* Garbage collect fds */
 592 }
 593
 594 static void init_peercred(struct sock *sk)
 595 {
 596         put_pid(sk->sk_peer_pid);
 597         if (sk->sk_peer_cred)
 598                 put_cred(sk->sk_peer_cred);
 599         sk->sk_peer_pid  = get_pid(task_tgid(current));
 600         sk->sk_peer_cred = get_current_cred();
 601 }
 602
 603 static void copy_peercred(struct sock *sk, struct sock *peersk)
 604 {
 605         put_pid(sk->sk_peer_pid);
 606         if (sk->sk_peer_cred)
 607                 put_cred(sk->sk_peer_cred);
 608         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 609         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 610 }
 611
 612 static int unix_listen(struct socket *sock, int backlog)
 613 {
 614         int err;
 615         struct sock *sk = sock->sk;
 616         struct unix_sock *u = unix_sk(sk);
 617         struct pid *old_pid = NULL;
 618
 619         err = -EOPNOTSUPP;
 620         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 621                 goto out;       /* Only stream/seqpacket sockets accept */
 622         err = -EINVAL;
 623         if (!u->addr)
 624                 goto out;       /* No listens on an unbound socket */
 625         unix_state_lock(sk);
 626         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 627                 goto out_unlock;
 628         if (backlog > sk->sk_max_ack_backlog)
 629                 wake_up_interruptible_all(&u->peer_wait);
 630         sk->sk_max_ack_backlog  = backlog;
 631         sk->sk_state            = TCP_LISTEN;
 632         /* set credentials so connect can copy them */
 633         init_peercred(sk);
 634         err = 0;
 635
 636 out_unlock:
 637         unix_state_unlock(sk);
 638         put_pid(old_pid);
 639 out:
 640         return err;
 641 }
 642
 643 static int unix_release(struct socket *);
 644 static int unix_bind(struct socket *, struct sockaddr *, int);
 645 static int unix_stream_connect(struct socket *, struct sockaddr *,
 646                                int addr_len, int flags);
 647 static int unix_socketpair(struct socket *, struct socket *);
 648 static int unix_accept(struct socket *, struct socket *, int, bool);
 649 static int unix_getname(struct socket *, struct sockaddr *, int);
 650 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
 651 static __poll_t unix_dgram_poll(struct file *, struct socket *,
 652                                     poll_table *);
 653 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 654 static int unix_shutdown(struct socket *, int);
 655 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 656 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 657 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 658                                     size_t size, int flags);
 659 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 660                                        struct pipe_inode_info *, size_t size,
 661                                        unsigned int flags);
 662 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 663 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 664 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 665                               int, int);
 666 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 667 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 668                                   int);
 669
 670 static int unix_set_peek_off(struct sock *sk, int val)
 671 {
 672         struct unix_sock *u = unix_sk(sk);
 673
 674         if (mutex_lock_interruptible(&u->iolock))
 675                 return -EINTR;
 676
 677         sk->sk_peek_off = val;
 678         mutex_unlock(&u->iolock);
 679
 680         return 0;
 681 }
 682
 683
 684 static const struct proto_ops unix_stream_ops = {
 685         .family =       PF_UNIX,
 686         .owner =        THIS_MODULE,
 687         .release =      unix_release,
 688         .bind =         unix_bind,
 689         .connect =      unix_stream_connect,
 690         .socketpair =   unix_socketpair,
 691         .accept =       unix_accept,
 692         .getname =      unix_getname,
 693         .poll =         unix_poll,
 694         .ioctl =        unix_ioctl,
 695         .listen =       unix_listen,
 696         .shutdown =     unix_shutdown,
 697         .setsockopt =   sock_no_setsockopt,
 698         .getsockopt =   sock_no_getsockopt,
 699         .sendmsg =      unix_stream_sendmsg,
 700         .recvmsg =      unix_stream_recvmsg,
 701         .mmap =         sock_no_mmap,
 702         .sendpage =     unix_stream_sendpage,
 703         .splice_read =  unix_stream_splice_read,
 704         .set_peek_off = unix_set_peek_off,
 705 };
 706
 707 static const struct proto_ops unix_dgram_ops = {
 708         .family =       PF_UNIX,
 709         .owner =        THIS_MODULE,
 710         .release =      unix_release,
 711         .bind =         unix_bind,
 712         .connect =      unix_dgram_connect,
 713         .socketpair =   unix_socketpair,
 714         .accept =       sock_no_accept,
 715         .getname =      unix_getname,
 716         .poll =         unix_dgram_poll,
 717         .ioctl =        unix_ioctl,
 718         .listen =       sock_no_listen,
 719         .shutdown =     unix_shutdown,
 720         .setsockopt =   sock_no_setsockopt,
 721         .getsockopt =   sock_no_getsockopt,
 722         .sendmsg =      unix_dgram_sendmsg,
 723         .recvmsg =      unix_dgram_recvmsg,
 724         .mmap =         sock_no_mmap,
 725         .sendpage =     sock_no_sendpage,
 726         .set_peek_off = unix_set_peek_off,
 727 };
 728
 729 static const struct proto_ops unix_seqpacket_ops = {
 730         .family =       PF_UNIX,
 731         .owner =        THIS_MODULE,
 732         .release =      unix_release,
 733         .bind =         unix_bind,
 734         .connect =      unix_stream_connect,
 735         .socketpair =   unix_socketpair,
 736         .accept =       unix_accept,
 737         .getname =      unix_getname,
 738         .poll =         unix_dgram_poll,
 739         .ioctl =        unix_ioctl,
 740         .listen =       unix_listen,
 741         .shutdown =     unix_shutdown,
 742         .setsockopt =   sock_no_setsockopt,
 743         .getsockopt =   sock_no_getsockopt,
 744         .sendmsg =      unix_seqpacket_sendmsg,
 745         .recvmsg =      unix_seqpacket_recvmsg,
 746         .mmap =         sock_no_mmap,
 747         .sendpage =     sock_no_sendpage,
 748         .set_peek_off = unix_set_peek_off,
 749 };
 750
 751 static struct proto unix_proto = {
 752         .name                   = "UNIX",
 753         .owner                  = THIS_MODULE,
 754         .obj_size               = sizeof(struct unix_sock),
 755 };
 756
 757 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 758 {
 759         struct sock *sk = NULL;
 760         struct unix_sock *u;
 761
 762         atomic_long_inc(&unix_nr_socks);
 763         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 764                 goto out;
 765
 766         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
 767         if (!sk)
 768                 goto out;
 769
 770         sock_init_data(sock, sk);
 771
 772         sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 773         sk->sk_write_space      = unix_write_space;
 774         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 775         sk->sk_destruct         = unix_sock_destructor;
 776         u         = unix_sk(sk);
 777         u->path.dentry = NULL;
 778         u->path.mnt = NULL;
 779         spin_lock_init(&u->lock);
 780         atomic_long_set(&u->inflight, 0);
 781         INIT_LIST_HEAD(&u->link);
 782         mutex_init(&u->iolock); /* single task reading lock */
 783         mutex_init(&u->bindlock); /* single task binding lock */
 784         init_waitqueue_head(&u->peer_wait);
 785         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 786         unix_insert_socket(unix_sockets_unbound(sk), sk);
 787 out:
 788         if (sk == NULL)
 789                 atomic_long_dec(&unix_nr_socks);
 790         else {
 791                 local_bh_disable();
 792                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 793                 local_bh_enable();
 794         }
 795         return sk;
 796 }
 797
 798 static int unix_create(struct net *net, struct socket *sock, int protocol,
 799                        int kern)
 800 {
 801         if (protocol && protocol != PF_UNIX)
 802                 return -EPROTONOSUPPORT;
 803
 804         sock->state = SS_UNCONNECTED;
 805
 806         switch (sock->type) {
 807         case SOCK_STREAM:
 808                 sock->ops = &unix_stream_ops;
 809                 break;
 810                 /*
 811                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 812                  *      nothing uses it.
 813                  */
 814         case SOCK_RAW:
 815                 sock->type = SOCK_DGRAM;
 816                 /* fall through */
 817         case SOCK_DGRAM:
 818                 sock->ops = &unix_dgram_ops;
 819                 break;
 820         case SOCK_SEQPACKET:
 821                 sock->ops = &unix_seqpacket_ops;
 822                 break;
 823         default:
 824                 return -ESOCKTNOSUPPORT;
 825         }
 826
 827         return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
 828 }
 829
 830 static int unix_release(struct socket *sock)
 831 {
 832         struct sock *sk = sock->sk;
 833
 834         if (!sk)
 835                 return 0;
 836
 837         unix_release_sock(sk, 0);
 838         sock->sk = NULL;
 839
 840         return 0;
 841 }
 842
 843 static int unix_autobind(struct socket *sock)
 844 {
 845         struct sock *sk = sock->sk;
 846         struct net *net = sock_net(sk);
 847         struct unix_sock *u = unix_sk(sk);
 848         static u32 ordernum = 1;
 849         struct unix_address *addr;
 850         int err;
 851         unsigned int retries = 0;
 852
 853         err = mutex_lock_interruptible(&u->bindlock);
 854         if (err)
 855                 return err;
 856
 857         err = 0;
 858         if (u->addr)
 859                 goto out;
 860
 861         err = -ENOMEM;
 862         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 863         if (!addr)
 864                 goto out;
 865
 866         addr->name->sun_family = AF_UNIX;
 867         refcount_set(&addr->refcnt, 1);
 868
 869 retry:
 870         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 871         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 872
 873         spin_lock(&unix_table_lock);
 874         ordernum = (ordernum+1)&0xFFFFF;
 875
 876         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 877                                       addr->hash)) {
 878                 spin_unlock(&unix_table_lock);
 879                 /*
 880                  * __unix_find_socket_byname() may take long time if many names
 881                  * are already in use.
 882                  */
 883                 cond_resched();
 884                 /* Give up if all names seems to be in use. */
 885                 if (retries++ == 0xFFFFF) {
 886                         err = -ENOSPC;
 887                         kfree(addr);
 888                         goto out;
 889                 }
 890                 goto retry;
 891         }
 892         addr->hash ^= sk->sk_type;
 893
 894         __unix_remove_socket(sk);
 895         smp_store_release(&u->addr, addr);
 896         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 897         spin_unlock(&unix_table_lock);
 898         err = 0;
 899
 900 out:    mutex_unlock(&u->bindlock);
 901         return err;
 902 }
 903
 904 static struct sock *unix_find_other(struct net *net,
 905                                     struct sockaddr_un *sunname, int len,
 906                                     int type, unsigned int hash, int *error)
 907 {
 908         struct sock *u;
 909         struct path path;
 910         int err = 0;
 911
 912         if (sunname->sun_path[0]) {
 913                 struct inode *inode;
 914                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 915                 if (err)
 916                         goto fail;
 917                 inode = d_backing_inode(path.dentry);
 918                 err = inode_permission(inode, MAY_WRITE);
 919                 if (err)
 920                         goto put_fail;
 921
 922                 err = -ECONNREFUSED;
 923                 if (!S_ISSOCK(inode->i_mode))
 924                         goto put_fail;
 925                 u = unix_find_socket_byinode(inode);
 926                 if (!u)
 927                         goto put_fail;
 928
 929                 if (u->sk_type == type)
 930                         touch_atime(&path);
 931
 932                 path_put(&path);
 933
 934                 err = -EPROTOTYPE;
 935                 if (u->sk_type != type) {
 936                         sock_put(u);
 937                         goto fail;
 938                 }
 939         } else {
 940                 err = -ECONNREFUSED;
 941                 u = unix_find_socket_byname(net, sunname, len, type, hash);
 942                 if (u) {
 943                         struct dentry *dentry;
 944                         dentry = unix_sk(u)->path.dentry;
 945                         if (dentry)
 946                                 touch_atime(&unix_sk(u)->path);
 947                 } else
 948                         goto fail;
 949         }
 950         return u;
 951
 952 put_fail:
 953         path_put(&path);
 954 fail:
 955         *error = err;
 956         return NULL;
 957 }
 958
 959 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 960 {
 961         struct dentry *dentry;
 962         struct path path;
 963         int err = 0;
 964         /*
 965          * Get the parent directory, calculate the hash for last
 966          * component.
 967          */
 968         dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 969         err = PTR_ERR(dentry);
 970         if (IS_ERR(dentry))
 971                 return err;
 972
 973         /*
 974          * All right, let's create it.
 975          */
 976         err = security_path_mknod(&path, dentry, mode, 0);
 977         if (!err) {
 978                 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
 979                 if (!err) {
 980                         res->mnt = mntget(path.mnt);
 981                         res->dentry = dget(dentry);
 982                 }
 983         }
 984         done_path_create(&path, dentry);
 985         return err;
 986 }
 987
 988 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 989 {
 990         struct sock *sk = sock->sk;
 991         struct net *net = sock_net(sk);
 992         struct unix_sock *u = unix_sk(sk);
 993         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 994         char *sun_path = sunaddr->sun_path;
 995         int err;
 996         unsigned int hash;
 997         struct unix_address *addr;
 998         struct hlist_head *list;
 999         struct path path = { };
1000
1001         err = -EINVAL;
1002         if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1003             sunaddr->sun_family != AF_UNIX)
1004                 goto out;
1005
1006         if (addr_len == sizeof(short)) {
1007                 err = unix_autobind(sock);
1008                 goto out;
1009         }
1010
1011         err = unix_mkname(sunaddr, addr_len, &hash);
1012         if (err < 0)
1013                 goto out;
1014         addr_len = err;
1015
1016         if (sun_path[0]) {
1017                 umode_t mode = S_IFSOCK |
1018                        (SOCK_INODE(sock)->i_mode & ~current_umask());
1019                 err = unix_mknod(sun_path, mode, &path);
1020                 if (err) {
1021                         if (err == -EEXIST)
1022                                 err = -EADDRINUSE;
1023                         goto out;
1024                 }
1025         }
1026
1027         err = mutex_lock_interruptible(&u->bindlock);
1028         if (err)
1029                 goto out_put;
1030
1031         err = -EINVAL;
1032         if (u->addr)
1033                 goto out_up;
1034
1035         err = -ENOMEM;
1036         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1037         if (!addr)
1038                 goto out_up;
1039
1040         memcpy(addr->name, sunaddr, addr_len);
1041         addr->len = addr_len;
1042         addr->hash = hash ^ sk->sk_type;
1043         refcount_set(&addr->refcnt, 1);
1044
1045         if (sun_path[0]) {
1046                 addr->hash = UNIX_HASH_SIZE;
1047                 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1048                 spin_lock(&unix_table_lock);
1049                 u->path = path;
1050                 list = &unix_socket_table[hash];
1051         } else {
1052                 spin_lock(&unix_table_lock);
1053                 err = -EADDRINUSE;
1054                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1055                                               sk->sk_type, hash)) {
1056                         unix_release_addr(addr);
1057                         goto out_unlock;
1058                 }
1059
1060                 list = &unix_socket_table[addr->hash];
1061         }
1062
1063         err = 0;
1064         __unix_remove_socket(sk);
1065         smp_store_release(&u->addr, addr);
1066         __unix_insert_socket(list, sk);
1067
1068 out_unlock:
1069         spin_unlock(&unix_table_lock);
1070 out_up:
1071         mutex_unlock(&u->bindlock);
1072 out_put:
1073         if (err)
1074                 path_put(&path);
1075 out:
1076         return err;
1077 }
1078
1079 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1080 {
1081         if (unlikely(sk1 == sk2) || !sk2) {
1082                 unix_state_lock(sk1);
1083                 return;
1084         }
1085         if (sk1 < sk2) {
1086                 unix_state_lock(sk1);
1087                 unix_state_lock_nested(sk2);
1088         } else {
1089                 unix_state_lock(sk2);
1090                 unix_state_lock_nested(sk1);
1091         }
1092 }
1093
1094 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1095 {
1096         if (unlikely(sk1 == sk2) || !sk2) {
1097                 unix_state_unlock(sk1);
1098                 return;
1099         }
1100         unix_state_unlock(sk1);
1101         unix_state_unlock(sk2);
1102 }
1103
1104 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1105                               int alen, int flags)
1106 {
1107         struct sock *sk = sock->sk;
1108         struct net *net = sock_net(sk);
1109         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1110         struct sock *other;
1111         unsigned int hash;
1112         int err;
1113
1114         err = -EINVAL;
1115         if (alen < offsetofend(struct sockaddr, sa_family))
1116                 goto out;
1117
1118         if (addr->sa_family != AF_UNSPEC) {
1119                 err = unix_mkname(sunaddr, alen, &hash);
1120                 if (err < 0)
1121                         goto out;
1122                 alen = err;
1123
1124                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1125                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1126                         goto out;
1127
1128 restart:
1129                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1130                 if (!other)
1131                         goto out;
1132
1133                 unix_state_double_lock(sk, other);
1134
1135                 /* Apparently VFS overslept socket death. Retry. */
1136                 if (sock_flag(other, SOCK_DEAD)) {
1137                         unix_state_double_unlock(sk, other);
1138                         sock_put(other);
1139                         goto restart;
1140                 }
1141
1142                 err = -EPERM;
1143                 if (!unix_may_send(sk, other))
1144                         goto out_unlock;
1145
1146                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1147                 if (err)
1148                         goto out_unlock;
1149
1150         } else {
1151                 /*
1152                  *      1003.1g breaking connected state with AF_UNSPEC
1153                  */
1154                 other = NULL;
1155                 unix_state_double_lock(sk, other);
1156         }
1157
1158         /*
1159          * If it was connected, reconnect.
1160          */
1161         if (unix_peer(sk)) {
1162                 struct sock *old_peer = unix_peer(sk);
1163                 unix_peer(sk) = other;
1164                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1165
1166                 unix_state_double_unlock(sk, other);
1167
1168                 if (other != old_peer)
1169                         unix_dgram_disconnected(sk, old_peer);
1170                 sock_put(old_peer);
1171         } else {
1172                 unix_peer(sk) = other;
1173                 unix_state_double_unlock(sk, other);
1174         }
1175         return 0;
1176
1177 out_unlock:
1178         unix_state_double_unlock(sk, other);
1179         sock_put(other);
1180 out:
1181         return err;
1182 }
1183
1184 static long unix_wait_for_peer(struct sock *other, long timeo)
1185 {
1186         struct unix_sock *u = unix_sk(other);
1187         int sched;
1188         DEFINE_WAIT(wait);
1189
1190         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1191
1192         sched = !sock_flag(other, SOCK_DEAD) &&
1193                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1194                 unix_recvq_full(other);
1195
1196         unix_state_unlock(other);
1197
1198         if (sched)
1199                 timeo = schedule_timeout(timeo);
1200
1201         finish_wait(&u->peer_wait, &wait);
1202         return timeo;
1203 }
1204
1205 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1206                                int addr_len, int flags)
1207 {
1208         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1209         struct sock *sk = sock->sk;
1210         struct net *net = sock_net(sk);
1211         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1212         struct sock *newsk = NULL;
1213         struct sock *other = NULL;
1214         struct sk_buff *skb = NULL;
1215         unsigned int hash;
1216         int st;
1217         int err;
1218         long timeo;
1219
1220         err = unix_mkname(sunaddr, addr_len, &hash);
1221         if (err < 0)
1222                 goto out;
1223         addr_len = err;
1224
1225         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1226             (err = unix_autobind(sock)) != 0)
1227                 goto out;
1228
1229         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1230
1231         /* First of all allocate resources.
1232            If we will make it after state is locked,
1233            we will have to recheck all again in any case.
1234          */
1235
1236         err = -ENOMEM;
1237
1238         /* create new sock for complete connection */
1239         newsk = unix_create1(sock_net(sk), NULL, 0);
1240         if (newsk == NULL)
1241                 goto out;
1242
1243         /* Allocate skb for sending to listening sock */
1244         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1245         if (skb == NULL)
1246                 goto out;
1247
1248 restart:
1249         /*  Find listening sock. */
1250         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1251         if (!other)
1252                 goto out;
1253
1254         /* Latch state of peer */
1255         unix_state_lock(other);
1256
1257         /* Apparently VFS overslept socket death. Retry. */
1258         if (sock_flag(other, SOCK_DEAD)) {
1259                 unix_state_unlock(other);
1260                 sock_put(other);
1261                 goto restart;
1262         }
1263
1264         err = -ECONNREFUSED;
1265         if (other->sk_state != TCP_LISTEN)
1266                 goto out_unlock;
1267         if (other->sk_shutdown & RCV_SHUTDOWN)
1268                 goto out_unlock;
1269
1270         if (unix_recvq_full(other)) {
1271                 err = -EAGAIN;
1272                 if (!timeo)
1273                         goto out_unlock;
1274
1275                 timeo = unix_wait_for_peer(other, timeo);
1276
1277                 err = sock_intr_errno(timeo);
1278                 if (signal_pending(current))
1279                         goto out;
1280                 sock_put(other);
1281                 goto restart;
1282         }
1283
1284         /* Latch our state.
1285
1286            It is tricky place. We need to grab our state lock and cannot
1287            drop lock on peer. It is dangerous because deadlock is
1288            possible. Connect to self case and simultaneous
1289            attempt to connect are eliminated by checking socket
1290            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1291            check this before attempt to grab lock.
1292
1293            Well, and we have to recheck the state after socket locked.
1294          */
1295         st = sk->sk_state;
1296
1297         switch (st) {
1298         case TCP_CLOSE:
1299                 /* This is ok... continue with connect */
1300                 break;
1301         case TCP_ESTABLISHED:
1302                 /* Socket is already connected */
1303                 err = -EISCONN;
1304                 goto out_unlock;
1305         default:
1306                 err = -EINVAL;
1307                 goto out_unlock;
1308         }
1309
1310         unix_state_lock_nested(sk);
1311
1312         if (sk->sk_state != st) {
1313                 unix_state_unlock(sk);
1314                 unix_state_unlock(other);
1315                 sock_put(other);
1316                 goto restart;
1317         }
1318
1319         err = security_unix_stream_connect(sk, other, newsk);
1320         if (err) {
1321                 unix_state_unlock(sk);
1322                 goto out_unlock;
1323         }
1324
1325         /* The way is open! Fastly set all the necessary fields... */
1326
1327         sock_hold(sk);
1328         unix_peer(newsk)        = sk;
1329         newsk->sk_state         = TCP_ESTABLISHED;
1330         newsk->sk_type          = sk->sk_type;
1331         init_peercred(newsk);
1332         newu = unix_sk(newsk);
1333         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1334         otheru = unix_sk(other);
1335
1336         /* copy address information from listening to new sock
1337          *
1338          * The contents of *(otheru->addr) and otheru->path
1339          * are seen fully set up here, since we have found
1340          * otheru in hash under unix_table_lock.  Insertion
1341          * into the hash chain we'd found it in had been done
1342          * in an earlier critical area protected by unix_table_lock,
1343          * the same one where we'd set *(otheru->addr) contents,
1344          * as well as otheru->path and otheru->addr itself.
1345          *
1346          * Using smp_store_release() here to set newu->addr
1347          * is enough to make those stores, as well as stores
1348          * to newu->path visible to anyone who gets newu->addr
1349          * by smp_load_acquire().  IOW, the same warranties
1350          * as for unix_sock instances bound in unix_bind() or
1351          * in unix_autobind().
1352          */
1353         if (otheru->path.dentry) {
1354                 path_get(&otheru->path);
1355                 newu->path = otheru->path;
1356         }
1357         refcount_inc(&otheru->addr->refcnt);
1358         smp_store_release(&newu->addr, otheru->addr);
1359
1360         /* Set credentials */
1361         copy_peercred(sk, other);
1362
1363         sock->state     = SS_CONNECTED;
1364         sk->sk_state    = TCP_ESTABLISHED;
1365         sock_hold(newsk);
1366
1367         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1368         unix_peer(sk)   = newsk;
1369
1370         unix_state_unlock(sk);
1371
1372         /* take ten and and send info to listening sock */
1373         spin_lock(&other->sk_receive_queue.lock);
1374         __skb_queue_tail(&other->sk_receive_queue, skb);
1375         spin_unlock(&other->sk_receive_queue.lock);
1376         unix_state_unlock(other);
1377         other->sk_data_ready(other);
1378         sock_put(other);
1379         return 0;
1380
1381 out_unlock:
1382         if (other)
1383                 unix_state_unlock(other);
1384
1385 out:
1386         kfree_skb(skb);
1387         if (newsk)
1388                 unix_release_sock(newsk, 0);
1389         if (other)
1390                 sock_put(other);
1391         return err;
1392 }
1393
1394 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1395 {
1396         struct sock *ska = socka->sk, *skb = sockb->sk;
1397
1398         /* Join our sockets back to back */
1399         sock_hold(ska);
1400         sock_hold(skb);
1401         unix_peer(ska) = skb;
1402         unix_peer(skb) = ska;
1403         init_peercred(ska);
1404         init_peercred(skb);
1405
1406         if (ska->sk_type != SOCK_DGRAM) {
1407                 ska->sk_state = TCP_ESTABLISHED;
1408                 skb->sk_state = TCP_ESTABLISHED;
1409                 socka->state  = SS_CONNECTED;
1410                 sockb->state  = SS_CONNECTED;
1411         }
1412         return 0;
1413 }
1414
1415 static void unix_sock_inherit_flags(const struct socket *old,
1416                                     struct socket *new)
1417 {
1418         if (test_bit(SOCK_PASSCRED, &old->flags))
1419                 set_bit(SOCK_PASSCRED, &new->flags);
1420         if (test_bit(SOCK_PASSSEC, &old->flags))
1421                 set_bit(SOCK_PASSSEC, &new->flags);
1422 }
1423
1424 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1425                        bool kern)
1426 {
1427         struct sock *sk = sock->sk;
1428         struct sock *tsk;
1429         struct sk_buff *skb;
1430         int err;
1431
1432         err = -EOPNOTSUPP;
1433         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1434                 goto out;
1435
1436         err = -EINVAL;
1437         if (sk->sk_state != TCP_LISTEN)
1438                 goto out;
1439
1440         /* If socket state is TCP_LISTEN it cannot change (for now...),
1441          * so that no locks are necessary.
1442          */
1443
1444         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1445         if (!skb) {
1446                 /* This means receive shutdown. */
1447                 if (err == 0)
1448                         err = -EINVAL;
1449                 goto out;
1450         }
1451
1452         tsk = skb->sk;
1453         skb_free_datagram(sk, skb);
1454         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1455
1456         /* attach accepted sock to socket */
1457         unix_state_lock(tsk);
1458         newsock->state = SS_CONNECTED;
1459         unix_sock_inherit_flags(sock, newsock);
1460         sock_graft(tsk, newsock);
1461         unix_state_unlock(tsk);
1462         return 0;
1463
1464 out:
1465         return err;
1466 }
1467
1468
1469 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1470 {
1471         struct sock *sk = sock->sk;
1472         struct unix_address *addr;
1473         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1474         int err = 0;
1475
1476         if (peer) {
1477                 sk = unix_peer_get(sk);
1478
1479                 err = -ENOTCONN;
1480                 if (!sk)
1481                         goto out;
1482                 err = 0;
1483         } else {
1484                 sock_hold(sk);
1485         }
1486
1487         addr = smp_load_acquire(&unix_sk(sk)->addr);
1488         if (!addr) {
1489                 sunaddr->sun_family = AF_UNIX;
1490                 sunaddr->sun_path[0] = 0;
1491                 err = sizeof(short);
1492         } else {
1493                 err = addr->len;
1494                 memcpy(sunaddr, addr->name, addr->len);
1495         }
1496         sock_put(sk);
1497 out:
1498         return err;
1499 }
1500
1501 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1502 {
1503         int err = 0;
1504
1505         UNIXCB(skb).pid  = get_pid(scm->pid);
1506         UNIXCB(skb).uid = scm->creds.uid;
1507         UNIXCB(skb).gid = scm->creds.gid;
1508         UNIXCB(skb).fp = NULL;
1509         unix_get_secdata(scm, skb);
1510         if (scm->fp && send_fds)
1511                 err = unix_attach_fds(scm, skb);
1512
1513         skb->destructor = unix_destruct_scm;
1514         return err;
1515 }
1516
1517 static bool unix_passcred_enabled(const struct socket *sock,
1518                                   const struct sock *other)
1519 {
1520         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1521                !other->sk_socket ||
1522                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1523 }
1524
1525 /*
1526  * Some apps rely on write() giving SCM_CREDENTIALS
1527  * We include credentials if source or destination socket
1528  * asserted SOCK_PASSCRED.
1529  */
1530 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1531                             const struct sock *other)
1532 {
1533         if (UNIXCB(skb).pid)
1534                 return;
1535         if (unix_passcred_enabled(sock, other)) {
1536                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1537                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1538         }
1539 }
1540
1541 static int maybe_init_creds(struct scm_cookie *scm,
1542                             struct socket *socket,
1543                             const struct sock *other)
1544 {
1545         int err;
1546         struct msghdr msg = { .msg_controllen = 0 };
1547
1548         err = scm_send(socket, &msg, scm, false);
1549         if (err)
1550                 return err;
1551
1552         if (unix_passcred_enabled(socket, other)) {
1553                 scm->pid = get_pid(task_tgid(current));
1554                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1555         }
1556         return err;
1557 }
1558
1559 static bool unix_skb_scm_eq(struct sk_buff *skb,
1560                             struct scm_cookie *scm)
1561 {
1562         const struct unix_skb_parms *u = &UNIXCB(skb);
1563
1564         return u->pid == scm->pid &&
1565                uid_eq(u->uid, scm->creds.uid) &&
1566                gid_eq(u->gid, scm->creds.gid) &&
1567                unix_secdata_eq(scm, skb);
1568 }
1569
1570 /*
1571  *      Send AF_UNIX data.
1572  */
1573
1574 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1575                               size_t len)
1576 {
1577         struct sock *sk = sock->sk;
1578         struct net *net = sock_net(sk);
1579         struct unix_sock *u = unix_sk(sk);
1580         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1581         struct sock *other = NULL;
1582         int namelen = 0; /* fake GCC */
1583         int err;
1584         unsigned int hash;
1585         struct sk_buff *skb;
1586         long timeo;
1587         struct scm_cookie scm;
1588         int data_len = 0;
1589         int sk_locked;
1590
1591         wait_for_unix_gc();
1592         err = scm_send(sock, msg, &scm, false);
1593         if (err < 0)
1594                 return err;
1595
1596         err = -EOPNOTSUPP;
1597         if (msg->msg_flags&MSG_OOB)
1598                 goto out;
1599
1600         if (msg->msg_namelen) {
1601                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1602                 if (err < 0)
1603                         goto out;
1604                 namelen = err;
1605         } else {
1606                 sunaddr = NULL;
1607                 err = -ENOTCONN;
1608                 other = unix_peer_get(sk);
1609                 if (!other)
1610                         goto out;
1611         }
1612
1613         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1614             && (err = unix_autobind(sock)) != 0)
1615                 goto out;
1616
1617         err = -EMSGSIZE;
1618         if (len > sk->sk_sndbuf - 32)
1619                 goto out;
1620
1621         if (len > SKB_MAX_ALLOC) {
1622                 data_len = min_t(size_t,
1623                                  len - SKB_MAX_ALLOC,
1624                                  MAX_SKB_FRAGS * PAGE_SIZE);
1625                 data_len = PAGE_ALIGN(data_len);
1626
1627                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1628         }
1629
1630         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1631                                    msg->msg_flags & MSG_DONTWAIT, &err,
1632                                    PAGE_ALLOC_COSTLY_ORDER);
1633         if (skb == NULL)
1634                 goto out;
1635
1636         err = unix_scm_to_skb(&scm, skb, true);
1637         if (err < 0)
1638                 goto out_free;
1639
1640         skb_put(skb, len - data_len);
1641         skb->data_len = data_len;
1642         skb->len = len;
1643         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1644         if (err)
1645                 goto out_free;
1646
1647         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1648
1649 restart:
1650         if (!other) {
1651                 err = -ECONNRESET;
1652                 if (sunaddr == NULL)
1653                         goto out_free;
1654
1655                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1656                                         hash, &err);
1657                 if (other == NULL)
1658                         goto out_free;
1659         }
1660
1661         if (sk_filter(other, skb) < 0) {
1662                 /* Toss the packet but do not return any error to the sender */
1663                 err = len;
1664                 goto out_free;
1665         }
1666
1667         sk_locked = 0;
1668         unix_state_lock(other);
1669 restart_locked:
1670         err = -EPERM;
1671         if (!unix_may_send(sk, other))
1672                 goto out_unlock;
1673
1674         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1675                 /*
1676                  *      Check with 1003.1g - what should
1677                  *      datagram error
1678                  */
1679                 unix_state_unlock(other);
1680                 sock_put(other);
1681
1682                 if (!sk_locked)
1683                         unix_state_lock(sk);
1684
1685                 err = 0;
1686                 if (unix_peer(sk) == other) {
1687                         unix_peer(sk) = NULL;
1688                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1689
1690                         unix_state_unlock(sk);
1691
1692                         unix_dgram_disconnected(sk, other);
1693                         sock_put(other);
1694                         err = -ECONNREFUSED;
1695                 } else {
1696                         unix_state_unlock(sk);
1697                 }
1698
1699                 other = NULL;
1700                 if (err)
1701                         goto out_free;
1702                 goto restart;
1703         }
1704
1705         err = -EPIPE;
1706         if (other->sk_shutdown & RCV_SHUTDOWN)
1707                 goto out_unlock;
1708
1709         if (sk->sk_type != SOCK_SEQPACKET) {
1710                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1711                 if (err)
1712                         goto out_unlock;
1713         }
1714
1715         /* other == sk && unix_peer(other) != sk if
1716          * - unix_peer(sk) == NULL, destination address bound to sk
1717          * - unix_peer(sk) == sk by time of get but disconnected before lock
1718          */
1719         if (other != sk &&
1720             unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1721                 if (timeo) {
1722                         timeo = unix_wait_for_peer(other, timeo);
1723
1724                         err = sock_intr_errno(timeo);
1725                         if (signal_pending(current))
1726                                 goto out_free;
1727
1728                         goto restart;
1729                 }
1730
1731                 if (!sk_locked) {
1732                         unix_state_unlock(other);
1733                         unix_state_double_lock(sk, other);
1734                 }
1735
1736                 if (unix_peer(sk) != other ||
1737                     unix_dgram_peer_wake_me(sk, other)) {
1738                         err = -EAGAIN;
1739                         sk_locked = 1;
1740                         goto out_unlock;
1741                 }
1742
1743                 if (!sk_locked) {
1744                         sk_locked = 1;
1745                         goto restart_locked;
1746                 }
1747         }
1748
1749         if (unlikely(sk_locked))
1750                 unix_state_unlock(sk);
1751
1752         if (sock_flag(other, SOCK_RCVTSTAMP))
1753                 __net_timestamp(skb);
1754         maybe_add_creds(skb, sock, other);
1755         skb_queue_tail(&other->sk_receive_queue, skb);
1756         unix_state_unlock(other);
1757         other->sk_data_ready(other);
1758         sock_put(other);
1759         scm_destroy(&scm);
1760         return len;
1761
1762 out_unlock:
1763         if (sk_locked)
1764                 unix_state_unlock(sk);
1765         unix_state_unlock(other);
1766 out_free:
1767         kfree_skb(skb);
1768 out:
1769         if (other)
1770                 sock_put(other);
1771         scm_destroy(&scm);
1772         return err;
1773 }
1774
1775 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1776  * bytes, and a minimum of a full page.
1777  */
1778 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1779
1780 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1781                                size_t len)
1782 {
1783         struct sock *sk = sock->sk;
1784         struct sock *other = NULL;
1785         int err, size;
1786         struct sk_buff *skb;
1787         int sent = 0;
1788         struct scm_cookie scm;
1789         bool fds_sent = false;
1790         int data_len;
1791
1792         wait_for_unix_gc();
1793         err = scm_send(sock, msg, &scm, false);
1794         if (err < 0)
1795                 return err;
1796
1797         err = -EOPNOTSUPP;
1798         if (msg->msg_flags&MSG_OOB)
1799                 goto out_err;
1800
1801         if (msg->msg_namelen) {
1802                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1803                 goto out_err;
1804         } else {
1805                 err = -ENOTCONN;
1806                 other = unix_peer(sk);
1807                 if (!other)
1808                         goto out_err;
1809         }
1810
1811         if (sk->sk_shutdown & SEND_SHUTDOWN)
1812                 goto pipe_err;
1813
1814         while (sent < len) {
1815                 size = len - sent;
1816
1817                 /* Keep two messages in the pipe so it schedules better */
1818                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1819
1820                 /* allow fallback to order-0 allocations */
1821                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1822
1823                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1824
1825                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1826
1827                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1828                                            msg->msg_flags & MSG_DONTWAIT, &err,
1829                                            get_order(UNIX_SKB_FRAGS_SZ));
1830                 if (!skb)
1831                         goto out_err;
1832
1833                 /* Only send the fds in the first buffer */
1834                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1835                 if (err < 0) {
1836                         kfree_skb(skb);
1837                         goto out_err;
1838                 }
1839                 fds_sent = true;
1840
1841                 skb_put(skb, size - data_len);
1842                 skb->data_len = data_len;
1843                 skb->len = size;
1844                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1845                 if (err) {
1846                         kfree_skb(skb);
1847                         goto out_err;
1848                 }
1849
1850                 unix_state_lock(other);
1851
1852                 if (sock_flag(other, SOCK_DEAD) ||
1853                     (other->sk_shutdown & RCV_SHUTDOWN))
1854                         goto pipe_err_free;
1855
1856                 maybe_add_creds(skb, sock, other);
1857                 skb_queue_tail(&other->sk_receive_queue, skb);
1858                 unix_state_unlock(other);
1859                 other->sk_data_ready(other);
1860                 sent += size;
1861         }
1862
1863         scm_destroy(&scm);
1864
1865         return sent;
1866
1867 pipe_err_free:
1868         unix_state_unlock(other);
1869         kfree_skb(skb);
1870 pipe_err:
1871         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1872                 send_sig(SIGPIPE, current, 0);
1873         err = -EPIPE;
1874 out_err:
1875         scm_destroy(&scm);
1876         return sent ? : err;
1877 }
1878
1879 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1880                                     int offset, size_t size, int flags)
1881 {
1882         int err;
1883         bool send_sigpipe = false;
1884         bool init_scm = true;
1885         struct scm_cookie scm;
1886         struct sock *other, *sk = socket->sk;
1887         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1888
1889         if (flags & MSG_OOB)
1890                 return -EOPNOTSUPP;
1891
1892         other = unix_peer(sk);
1893         if (!other || sk->sk_state != TCP_ESTABLISHED)
1894                 return -ENOTCONN;
1895
1896         if (false) {
1897 alloc_skb:
1898                 unix_state_unlock(other);
1899                 mutex_unlock(&unix_sk(other)->iolock);
1900                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1901                                               &err, 0);
1902                 if (!newskb)
1903                         goto err;
1904         }
1905
1906         /* we must acquire iolock as we modify already present
1907          * skbs in the sk_receive_queue and mess with skb->len
1908          */
1909         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1910         if (err) {
1911                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1912                 goto err;
1913         }
1914
1915         if (sk->sk_shutdown & SEND_SHUTDOWN) {
1916                 err = -EPIPE;
1917                 send_sigpipe = true;
1918                 goto err_unlock;
1919         }
1920
1921         unix_state_lock(other);
1922
1923         if (sock_flag(other, SOCK_DEAD) ||
1924             other->sk_shutdown & RCV_SHUTDOWN) {
1925                 err = -EPIPE;
1926                 send_sigpipe = true;
1927                 goto err_state_unlock;
1928         }
1929
1930         if (init_scm) {
1931                 err = maybe_init_creds(&scm, socket, other);
1932                 if (err)
1933                         goto err_state_unlock;
1934                 init_scm = false;
1935         }
1936
1937         skb = skb_peek_tail(&other->sk_receive_queue);
1938         if (tail && tail == skb) {
1939                 skb = newskb;
1940         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1941                 if (newskb) {
1942                         skb = newskb;
1943                 } else {
1944                         tail = skb;
1945                         goto alloc_skb;
1946                 }
1947         } else if (newskb) {
1948                 /* this is fast path, we don't necessarily need to
1949                  * call to kfree_skb even though with newskb == NULL
1950                  * this - does no harm
1951                  */
1952                 consume_skb(newskb);
1953                 newskb = NULL;
1954         }
1955
1956         if (skb_append_pagefrags(skb, page, offset, size)) {
1957                 tail = skb;
1958                 goto alloc_skb;
1959         }
1960
1961         skb->len += size;
1962         skb->data_len += size;
1963         skb->truesize += size;
1964         refcount_add(size, &sk->sk_wmem_alloc);
1965
1966         if (newskb) {
1967                 err = unix_scm_to_skb(&scm, skb, false);
1968                 if (err)
1969                         goto err_state_unlock;
1970                 spin_lock(&other->sk_receive_queue.lock);
1971                 __skb_queue_tail(&other->sk_receive_queue, newskb);
1972                 spin_unlock(&other->sk_receive_queue.lock);
1973         }
1974
1975         unix_state_unlock(other);
1976         mutex_unlock(&unix_sk(other)->iolock);
1977
1978         other->sk_data_ready(other);
1979         scm_destroy(&scm);
1980         return size;
1981
1982 err_state_unlock:
1983         unix_state_unlock(other);
1984 err_unlock:
1985         mutex_unlock(&unix_sk(other)->iolock);
1986 err:
1987         kfree_skb(newskb);
1988         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
1989                 send_sig(SIGPIPE, current, 0);
1990         if (!init_scm)
1991                 scm_destroy(&scm);
1992         return err;
1993 }
1994
1995 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
1996                                   size_t len)
1997 {
1998         int err;
1999         struct sock *sk = sock->sk;
2000
2001         err = sock_error(sk);
2002         if (err)
2003                 return err;
2004
2005         if (sk->sk_state != TCP_ESTABLISHED)
2006                 return -ENOTCONN;
2007
2008         if (msg->msg_namelen)
2009                 msg->msg_namelen = 0;
2010
2011         return unix_dgram_sendmsg(sock, msg, len);
2012 }
2013
2014 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2015                                   size_t size, int flags)
2016 {
2017         struct sock *sk = sock->sk;
2018
2019         if (sk->sk_state != TCP_ESTABLISHED)
2020                 return -ENOTCONN;
2021
2022         return unix_dgram_recvmsg(sock, msg, size, flags);
2023 }
2024
2025 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2026 {
2027         struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2028
2029         if (addr) {
2030                 msg->msg_namelen = addr->len;
2031                 memcpy(msg->msg_name, addr->name, addr->len);
2032         }
2033 }
2034
2035 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2036                               size_t size, int flags)
2037 {
2038         struct scm_cookie scm;
2039         struct sock *sk = sock->sk;
2040         struct unix_sock *u = unix_sk(sk);
2041         struct sk_buff *skb, *last;
2042         long timeo;
2043         int err;
2044         int peeked, skip;
2045
2046         err = -EOPNOTSUPP;
2047         if (flags&MSG_OOB)
2048                 goto out;
2049
2050         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2051
2052         do {
2053                 mutex_lock(&u->iolock);
2054
2055                 skip = sk_peek_offset(sk, flags);
2056                 skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip,
2057                                               &err, &last);
2058                 if (skb)
2059                         break;
2060
2061                 mutex_unlock(&u->iolock);
2062
2063                 if (err != -EAGAIN)
2064                         break;
2065         } while (timeo &&
2066                  !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2067
2068         if (!skb) { /* implies iolock unlocked */
2069                 unix_state_lock(sk);
2070                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2071                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2072                     (sk->sk_shutdown & RCV_SHUTDOWN))
2073                         err = 0;
2074                 unix_state_unlock(sk);
2075                 goto out;
2076         }
2077
2078         if (wq_has_sleeper(&u->peer_wait))
2079                 wake_up_interruptible_sync_poll(&u->peer_wait,
2080                                                 EPOLLOUT | EPOLLWRNORM |
2081                                                 EPOLLWRBAND);
2082
2083         if (msg->msg_name)
2084                 unix_copy_addr(msg, skb->sk);
2085
2086         if (size > skb->len - skip)
2087                 size = skb->len - skip;
2088         else if (size < skb->len - skip)
2089                 msg->msg_flags |= MSG_TRUNC;
2090
2091         err = skb_copy_datagram_msg(skb, skip, msg, size);
2092         if (err)
2093                 goto out_free;
2094
2095         if (sock_flag(sk, SOCK_RCVTSTAMP))
2096                 __sock_recv_timestamp(msg, sk, skb);
2097
2098         memset(&scm, 0, sizeof(scm));
2099
2100         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2101         unix_set_secdata(&scm, skb);
2102
2103         if (!(flags & MSG_PEEK)) {
2104                 if (UNIXCB(skb).fp)
2105                         unix_detach_fds(&scm, skb);
2106
2107                 sk_peek_offset_bwd(sk, skb->len);
2108         } else {
2109                 /* It is questionable: on PEEK we could:
2110                    - do not return fds - good, but too simple 8)
2111                    - return fds, and do not return them on read (old strategy,
2112                      apparently wrong)
2113                    - clone fds (I chose it for now, it is the most universal
2114                      solution)
2115
2116                    POSIX 1003.1g does not actually define this clearly
2117                    at all. POSIX 1003.1g doesn't define a lot of things
2118                    clearly however!
2119
2120                 */
2121
2122                 sk_peek_offset_fwd(sk, size);
2123
2124                 if (UNIXCB(skb).fp)
2125                         scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2126         }
2127         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2128
2129         scm_recv(sock, msg, &scm, flags);
2130
2131 out_free:
2132         skb_free_datagram(sk, skb);
2133         mutex_unlock(&u->iolock);
2134 out:
2135         return err;
2136 }
2137
2138 /*
2139  *      Sleep until more data has arrived. But check for races..
2140  */
2141 static long unix_stream_data_wait(struct sock *sk, long timeo,
2142                                   struct sk_buff *last, unsigned int last_len,
2143                                   bool freezable)
2144 {
2145         struct sk_buff *tail;
2146         DEFINE_WAIT(wait);
2147
2148         unix_state_lock(sk);
2149
2150         for (;;) {
2151                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2152
2153                 tail = skb_peek_tail(&sk->sk_receive_queue);
2154                 if (tail != last ||
2155                     (tail && tail->len != last_len) ||
2156                     sk->sk_err ||
2157                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2158                     signal_pending(current) ||
2159                     !timeo)
2160                         break;
2161
2162                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2163                 unix_state_unlock(sk);
2164                 if (freezable)
2165                         timeo = freezable_schedule_timeout(timeo);
2166                 else
2167                         timeo = schedule_timeout(timeo);
2168                 unix_state_lock(sk);
2169
2170                 if (sock_flag(sk, SOCK_DEAD))
2171                         break;
2172
2173                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2174         }
2175
2176         finish_wait(sk_sleep(sk), &wait);
2177         unix_state_unlock(sk);
2178         return timeo;
2179 }
2180
2181 static unsigned int unix_skb_len(const struct sk_buff *skb)
2182 {
2183         return skb->len - UNIXCB(skb).consumed;
2184 }
2185
2186 struct unix_stream_read_state {
2187         int (*recv_actor)(struct sk_buff *, int, int,
2188                           struct unix_stream_read_state *);
2189         struct socket *socket;
2190         struct msghdr *msg;
2191         struct pipe_inode_info *pipe;
2192         size_t size;
2193         int flags;
2194         unsigned int splice_flags;
2195 };
2196
2197 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2198                                     bool freezable)
2199 {
2200         struct scm_cookie scm;
2201         struct socket *sock = state->socket;
2202         struct sock *sk = sock->sk;
2203         struct unix_sock *u = unix_sk(sk);
2204         int copied = 0;
2205         int flags = state->flags;
2206         int noblock = flags & MSG_DONTWAIT;
2207         bool check_creds = false;
2208         int target;
2209         int err = 0;
2210         long timeo;
2211         int skip;
2212         size_t size = state->size;
2213         unsigned int last_len;
2214
2215         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2216                 err = -EINVAL;
2217                 goto out;
2218         }
2219
2220         if (unlikely(flags & MSG_OOB)) {
2221                 err = -EOPNOTSUPP;
2222                 goto out;
2223         }
2224
2225         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2226         timeo = sock_rcvtimeo(sk, noblock);
2227
2228         memset(&scm, 0, sizeof(scm));
2229
2230         /* Lock the socket to prevent queue disordering
2231          * while sleeps in memcpy_tomsg
2232          */
2233         mutex_lock(&u->iolock);
2234
2235         skip = max(sk_peek_offset(sk, flags), 0);
2236
2237         do {
2238                 int chunk;
2239                 bool drop_skb;
2240                 struct sk_buff *skb, *last;
2241
2242 redo:
2243                 unix_state_lock(sk);
2244                 if (sock_flag(sk, SOCK_DEAD)) {
2245                         err = -ECONNRESET;
2246                         goto unlock;
2247                 }
2248                 last = skb = skb_peek(&sk->sk_receive_queue);
2249                 last_len = last ? last->len : 0;
2250 again:
2251                 if (skb == NULL) {
2252                         if (copied >= target)
2253                                 goto unlock;
2254
2255                         /*
2256                          *      POSIX 1003.1g mandates this order.
2257                          */
2258
2259                         err = sock_error(sk);
2260                         if (err)
2261                                 goto unlock;
2262                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2263                                 goto unlock;
2264
2265                         unix_state_unlock(sk);
2266                         if (!timeo) {
2267                                 err = -EAGAIN;
2268                                 break;
2269                         }
2270
2271                         mutex_unlock(&u->iolock);
2272
2273                         timeo = unix_stream_data_wait(sk, timeo, last,
2274                                                       last_len, freezable);
2275
2276                         if (signal_pending(current)) {
2277                                 err = sock_intr_errno(timeo);
2278                                 scm_destroy(&scm);
2279                                 goto out;
2280                         }
2281
2282                         mutex_lock(&u->iolock);
2283                         goto redo;
2284 unlock:
2285                         unix_state_unlock(sk);
2286                         break;
2287                 }
2288
2289                 while (skip >= unix_skb_len(skb)) {
2290                         skip -= unix_skb_len(skb);
2291                         last = skb;
2292                         last_len = skb->len;
2293                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2294                         if (!skb)
2295                                 goto again;
2296                 }
2297
2298                 unix_state_unlock(sk);
2299
2300                 if (check_creds) {
2301                         /* Never glue messages from different writers */
2302                         if (!unix_skb_scm_eq(skb, &scm))
2303                                 break;
2304                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2305                         /* Copy credentials */
2306                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2307                         unix_set_secdata(&scm, skb);
2308                         check_creds = true;
2309                 }
2310
2311                 /* Copy address just once */
2312                 if (state->msg && state->msg->msg_name) {
2313                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2314                                          state->msg->msg_name);
2315                         unix_copy_addr(state->msg, skb->sk);
2316                         sunaddr = NULL;
2317                 }
2318
2319                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2320                 skb_get(skb);
2321                 chunk = state->recv_actor(skb, skip, chunk, state);
2322                 drop_skb = !unix_skb_len(skb);
2323                 /* skb is only safe to use if !drop_skb */
2324                 consume_skb(skb);
2325                 if (chunk < 0) {
2326                         if (copied == 0)
2327                                 copied = -EFAULT;
2328                         break;
2329                 }
2330                 copied += chunk;
2331                 size -= chunk;
2332
2333                 if (drop_skb) {
2334                         /* the skb was touched by a concurrent reader;
2335                          * we should not expect anything from this skb
2336                          * anymore and assume it invalid - we can be
2337                          * sure it was dropped from the socket queue
2338                          *
2339                          * let's report a short read
2340                          */
2341                         err = 0;
2342                         break;
2343                 }
2344
2345                 /* Mark read part of skb as used */
2346                 if (!(flags & MSG_PEEK)) {
2347                         UNIXCB(skb).consumed += chunk;
2348
2349                         sk_peek_offset_bwd(sk, chunk);
2350
2351                         if (UNIXCB(skb).fp)
2352                                 unix_detach_fds(&scm, skb);
2353
2354                         if (unix_skb_len(skb))
2355                                 break;
2356
2357                         skb_unlink(skb, &sk->sk_receive_queue);
2358                         consume_skb(skb);
2359
2360                         if (scm.fp)
2361                                 break;
2362                 } else {
2363                         /* It is questionable, see note in unix_dgram_recvmsg.
2364                          */
2365                         if (UNIXCB(skb).fp)
2366                                 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2367
2368                         sk_peek_offset_fwd(sk, chunk);
2369
2370                         if (UNIXCB(skb).fp)
2371                                 break;
2372
2373                         skip = 0;
2374                         last = skb;
2375                         last_len = skb->len;
2376                         unix_state_lock(sk);
2377                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2378                         if (skb)
2379                                 goto again;
2380                         unix_state_unlock(sk);
2381                         break;
2382                 }
2383         } while (size);
2384
2385         mutex_unlock(&u->iolock);
2386         if (state->msg)
2387                 scm_recv(sock, state->msg, &scm, flags);
2388         else
2389                 scm_destroy(&scm);
2390 out:
2391         return copied ? : err;
2392 }
2393
2394 static int unix_stream_read_actor(struct sk_buff *skb,
2395                                   int skip, int chunk,
2396                                   struct unix_stream_read_state *state)
2397 {
2398         int ret;
2399
2400         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2401                                     state->msg, chunk);
2402         return ret ?: chunk;
2403 }
2404
2405 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2406                                size_t size, int flags)
2407 {
2408         struct unix_stream_read_state state = {
2409                 .recv_actor = unix_stream_read_actor,
2410                 .socket = sock,
2411                 .msg = msg,
2412                 .size = size,
2413                 .flags = flags
2414         };
2415
2416         return unix_stream_read_generic(&state, true);
2417 }
2418
2419 static int unix_stream_splice_actor(struct sk_buff *skb,
2420                                     int skip, int chunk,
2421                                     struct unix_stream_read_state *state)
2422 {
2423         return skb_splice_bits(skb, state->socket->sk,
2424                                UNIXCB(skb).consumed + skip,
2425                                state->pipe, chunk, state->splice_flags);
2426 }
2427
2428 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2429                                        struct pipe_inode_info *pipe,
2430                                        size_t size, unsigned int flags)
2431 {
2432         struct unix_stream_read_state state = {
2433                 .recv_actor = unix_stream_splice_actor,
2434                 .socket = sock,
2435                 .pipe = pipe,
2436                 .size = size,
2437                 .splice_flags = flags,
2438         };
2439
2440         if (unlikely(*ppos))
2441                 return -ESPIPE;
2442
2443         if (sock->file->f_flags & O_NONBLOCK ||
2444             flags & SPLICE_F_NONBLOCK)
2445                 state.flags = MSG_DONTWAIT;
2446
2447         return unix_stream_read_generic(&state, false);
2448 }
2449
2450 static int unix_shutdown(struct socket *sock, int mode)
2451 {
2452         struct sock *sk = sock->sk;
2453         struct sock *other;
2454
2455         if (mode < SHUT_RD || mode > SHUT_RDWR)
2456                 return -EINVAL;
2457         /* This maps:
2458          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2459          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2460          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2461          */
2462         ++mode;
2463
2464         unix_state_lock(sk);
2465         sk->sk_shutdown |= mode;
2466         other = unix_peer(sk);
2467         if (other)
2468                 sock_hold(other);
2469         unix_state_unlock(sk);
2470         sk->sk_state_change(sk);
2471
2472         if (other &&
2473                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2474
2475                 int peer_mode = 0;
2476
2477                 if (mode&RCV_SHUTDOWN)
2478                         peer_mode |= SEND_SHUTDOWN;
2479                 if (mode&SEND_SHUTDOWN)
2480                         peer_mode |= RCV_SHUTDOWN;
2481                 unix_state_lock(other);
2482                 other->sk_shutdown |= peer_mode;
2483                 unix_state_unlock(other);
2484                 other->sk_state_change(other);
2485                 if (peer_mode == SHUTDOWN_MASK)
2486                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2487                 else if (peer_mode & RCV_SHUTDOWN)
2488                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2489         }
2490         if (other)
2491                 sock_put(other);
2492
2493         return 0;
2494 }
2495
2496 long unix_inq_len(struct sock *sk)
2497 {
2498         struct sk_buff *skb;
2499         long amount = 0;
2500
2501         if (sk->sk_state == TCP_LISTEN)
2502                 return -EINVAL;
2503
2504         spin_lock(&sk->sk_receive_queue.lock);
2505         if (sk->sk_type == SOCK_STREAM ||
2506             sk->sk_type == SOCK_SEQPACKET) {
2507                 skb_queue_walk(&sk->sk_receive_queue, skb)
2508                         amount += unix_skb_len(skb);
2509         } else {
2510                 skb = skb_peek(&sk->sk_receive_queue);
2511                 if (skb)
2512                         amount = skb->len;
2513         }
2514         spin_unlock(&sk->sk_receive_queue.lock);
2515
2516         return amount;
2517 }
2518 EXPORT_SYMBOL_GPL(unix_inq_len);
2519
2520 long unix_outq_len(struct sock *sk)
2521 {
2522         return sk_wmem_alloc_get(sk);
2523 }
2524 EXPORT_SYMBOL_GPL(unix_outq_len);
2525
2526 static int unix_open_file(struct sock *sk)
2527 {
2528         struct path path;
2529         struct file *f;
2530         int fd;
2531
2532         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2533                 return -EPERM;
2534
2535         if (!smp_load_acquire(&unix_sk(sk)->addr))
2536                 return -ENOENT;
2537
2538         path = unix_sk(sk)->path;
2539         if (!path.dentry)
2540                 return -ENOENT;
2541
2542         path_get(&path);
2543
2544         fd = get_unused_fd_flags(O_CLOEXEC);
2545         if (fd < 0)
2546                 goto out;
2547
2548         f = dentry_open(&path, O_PATH, current_cred());
2549         if (IS_ERR(f)) {
2550                 put_unused_fd(fd);
2551                 fd = PTR_ERR(f);
2552                 goto out;
2553         }
2554
2555         fd_install(fd, f);
2556 out:
2557         path_put(&path);
2558
2559         return fd;
2560 }
2561
2562 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2563 {
2564         struct sock *sk = sock->sk;
2565         long amount = 0;
2566         int err;
2567
2568         switch (cmd) {
2569         case SIOCOUTQ:
2570                 amount = unix_outq_len(sk);
2571                 err = put_user(amount, (int __user *)arg);
2572                 break;
2573         case SIOCINQ:
2574                 amount = unix_inq_len(sk);
2575                 if (amount < 0)
2576                         err = amount;
2577                 else
2578                         err = put_user(amount, (int __user *)arg);
2579                 break;
2580         case SIOCUNIXFILE:
2581                 err = unix_open_file(sk);
2582                 break;
2583         default:
2584                 err = -ENOIOCTLCMD;
2585                 break;
2586         }
2587         return err;
2588 }
2589
2590 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2591 {
2592         struct sock *sk = sock->sk;
2593         __poll_t mask;
2594
2595         sock_poll_wait(file, sock, wait);
2596         mask = 0;
2597
2598         /* exceptional events? */
2599         if (sk->sk_err)
2600                 mask |= EPOLLERR;
2601         if (sk->sk_shutdown == SHUTDOWN_MASK)
2602                 mask |= EPOLLHUP;
2603         if (sk->sk_shutdown & RCV_SHUTDOWN)
2604                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2605
2606         /* readable? */
2607         if (!skb_queue_empty(&sk->sk_receive_queue))
2608                 mask |= EPOLLIN | EPOLLRDNORM;
2609
2610         /* Connection-based need to check for termination and startup */
2611         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2612             sk->sk_state == TCP_CLOSE)
2613                 mask |= EPOLLHUP;
2614
2615         /*
2616          * we set writable also when the other side has shut down the
2617          * connection. This prevents stuck sockets.
2618          */
2619         if (unix_writable(sk))
2620                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2621
2622         return mask;
2623 }
2624
2625 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2626                                     poll_table *wait)
2627 {
2628         struct sock *sk = sock->sk, *other;
2629         unsigned int writable;
2630         __poll_t mask;
2631
2632         sock_poll_wait(file, sock, wait);
2633         mask = 0;
2634
2635         /* exceptional events? */
2636         if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2637                 mask |= EPOLLERR |
2638                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2639
2640         if (sk->sk_shutdown & RCV_SHUTDOWN)
2641                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2642         if (sk->sk_shutdown == SHUTDOWN_MASK)
2643                 mask |= EPOLLHUP;
2644
2645         /* readable? */
2646         if (!skb_queue_empty(&sk->sk_receive_queue))
2647                 mask |= EPOLLIN | EPOLLRDNORM;
2648
2649         /* Connection-based need to check for termination and startup */
2650         if (sk->sk_type == SOCK_SEQPACKET) {
2651                 if (sk->sk_state == TCP_CLOSE)
2652                         mask |= EPOLLHUP;
2653                 /* connection hasn't started yet? */
2654                 if (sk->sk_state == TCP_SYN_SENT)
2655                         return mask;
2656         }
2657
2658         /* No write status requested, avoid expensive OUT tests. */
2659         if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2660                 return mask;
2661
2662         writable = unix_writable(sk);
2663         if (writable) {
2664                 unix_state_lock(sk);
2665
2666                 other = unix_peer(sk);
2667                 if (other && unix_peer(other) != sk &&
2668                     unix_recvq_full(other) &&
2669                     unix_dgram_peer_wake_me(sk, other))
2670                         writable = 0;
2671
2672                 unix_state_unlock(sk);
2673         }
2674
2675         if (writable)
2676                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2677         else
2678                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2679
2680         return mask;
2681 }
2682
2683 #ifdef CONFIG_PROC_FS
2684
2685 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2686
2687 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2688 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2689 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2690
2691 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2692 {
2693         unsigned long offset = get_offset(*pos);
2694         unsigned long bucket = get_bucket(*pos);
2695         struct sock *sk;
2696         unsigned long count = 0;
2697
2698         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2699                 if (sock_net(sk) != seq_file_net(seq))
2700                         continue;
2701                 if (++count == offset)
2702                         break;
2703         }
2704
2705         return sk;
2706 }
2707
2708 static struct sock *unix_next_socket(struct seq_file *seq,
2709                                      struct sock *sk,
2710                                      loff_t *pos)
2711 {
2712         unsigned long bucket;
2713
2714         while (sk > (struct sock *)SEQ_START_TOKEN) {
2715                 sk = sk_next(sk);
2716                 if (!sk)
2717                         goto next_bucket;
2718                 if (sock_net(sk) == seq_file_net(seq))
2719                         return sk;
2720         }
2721
2722         do {
2723                 sk = unix_from_bucket(seq, pos);
2724                 if (sk)
2725                         return sk;
2726
2727 next_bucket:
2728                 bucket = get_bucket(*pos) + 1;
2729                 *pos = set_bucket_offset(bucket, 1);
2730         } while (bucket < ARRAY_SIZE(unix_socket_table));
2731
2732         return NULL;
2733 }
2734
2735 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2736         __acquires(unix_table_lock)
2737 {
2738         spin_lock(&unix_table_lock);
2739
2740         if (!*pos)
2741                 return SEQ_START_TOKEN;
2742
2743         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2744                 return NULL;
2745
2746         return unix_next_socket(seq, NULL, pos);
2747 }
2748
2749 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2750 {
2751         ++*pos;
2752         return unix_next_socket(seq, v, pos);
2753 }
2754
2755 static void unix_seq_stop(struct seq_file *seq, void *v)
2756         __releases(unix_table_lock)
2757 {
2758         spin_unlock(&unix_table_lock);
2759 }
2760
2761 static int unix_seq_show(struct seq_file *seq, void *v)
2762 {
2763
2764         if (v == SEQ_START_TOKEN)
2765                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2766                          "Inode Path\n");
2767         else {
2768                 struct sock *s = v;
2769                 struct unix_sock *u = unix_sk(s);
2770                 unix_state_lock(s);
2771
2772                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2773                         s,
2774                         refcount_read(&s->sk_refcnt),
2775                         0,
2776                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2777                         s->sk_type,
2778                         s->sk_socket ?
2779                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2780                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2781                         sock_i_ino(s));
2782
2783                 if (u->addr) {  // under unix_table_lock here
2784                         int i, len;
2785                         seq_putc(seq, ' ');
2786
2787                         i = 0;
2788                         len = u->addr->len - sizeof(short);
2789                         if (!UNIX_ABSTRACT(s))
2790                                 len--;
2791                         else {
2792                                 seq_putc(seq, '@');
2793                                 i++;
2794                         }
2795                         for ( ; i < len; i++)
2796                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
2797                                          '@');
2798                 }
2799                 unix_state_unlock(s);
2800                 seq_putc(seq, '\n');
2801         }
2802
2803         return 0;
2804 }
2805
2806 static const struct seq_operations unix_seq_ops = {
2807         .start  = unix_seq_start,
2808         .next   = unix_seq_next,
2809         .stop   = unix_seq_stop,
2810         .show   = unix_seq_show,
2811 };
2812 #endif
2813
2814 static const struct net_proto_family unix_family_ops = {
2815         .family = PF_UNIX,
2816         .create = unix_create,
2817         .owner  = THIS_MODULE,
2818 };
2819
2820
2821 static int __net_init unix_net_init(struct net *net)
2822 {
2823         int error = -ENOMEM;
2824
2825         net->unx.sysctl_max_dgram_qlen = 10;
2826         if (unix_sysctl_register(net))
2827                 goto out;
2828
2829 #ifdef CONFIG_PROC_FS
2830         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2831                         sizeof(struct seq_net_private))) {
2832                 unix_sysctl_unregister(net);
2833                 goto out;
2834         }
2835 #endif
2836         error = 0;
2837 out:
2838         return error;
2839 }
2840
2841 static void __net_exit unix_net_exit(struct net *net)
2842 {
2843         unix_sysctl_unregister(net);
2844         remove_proc_entry("unix", net->proc_net);
2845 }
2846
2847 static struct pernet_operations unix_net_ops = {
2848         .init = unix_net_init,
2849         .exit = unix_net_exit,
2850 };
2851
2852 static int __init af_unix_init(void)
2853 {
2854         int rc = -1;
2855
2856         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2857
2858         rc = proto_register(&unix_proto, 1);
2859         if (rc != 0) {
2860                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2861                 goto out;
2862         }
2863
2864         sock_register(&unix_family_ops);
2865         register_pernet_subsys(&unix_net_ops);
2866 out:
2867         return rc;
2868 }
2869
2870 static void __exit af_unix_exit(void)
2871 {
2872         sock_unregister(PF_UNIX);
2873         proto_unregister(&unix_proto);
2874         unregister_pernet_subsys(&unix_net_ops);
2875 }
2876
2877 /* Earlier than device_initcall() so that other drivers invoking
2878    request_module() don't end up in a loop when modprobe tries
2879    to use a UNIX socket. But later than subsys_initcall() because
2880    we depend on stuff initialised there */
2881 fs_initcall(af_unix_init);
2882 module_exit(af_unix_exit);
2883
2884 MODULE_LICENSE("GPL");
2885 MODULE_ALIAS_NETPROTO(PF_UNIX);