net/unix/af_unix.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * NET4:        Implementation of BSD Unix domain sockets.
   4  *
   5  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   6  *
   7  * Fixes:
   8  *              Linus Torvalds  :       Assorted bug cures.
   9  *              Niibe Yutaka    :       async I/O support.
  10  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  11  *              Alan Cox        :       Limit size of allocated blocks.
  12  *              Alan Cox        :       Fixed the stupid socketpair bug.
  13  *              Alan Cox        :       BSD compatibility fine tuning.
  14  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  15  *              Alan Cox        :       Sorted out a proper draft version of
  16  *                                      file descriptor passing hacked up from
  17  *                                      Mike Shaver's work.
  18  *              Marty Leisner   :       Fixes to fd passing
  19  *              Nick Nevin      :       recvmsg bugfix.
  20  *              Alan Cox        :       Started proper garbage collector
  21  *              Heiko EiBfeldt  :       Missing verify_area check
  22  *              Alan Cox        :       Started POSIXisms
  23  *              Andreas Schwab  :       Replace inode by dentry for proper
  24  *                                      reference counting
  25  *              Kirk Petersen   :       Made this a module
  26  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  27  *                                      Lots of bug fixes.
  28  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  29  *                                      by above two patches.
  30  *           Andrea Arcangeli   :       If possible we block in connect(2)
  31  *                                      if the max backlog of the listen socket
  32  *                                      is been reached. This won't break
  33  *                                      old apps and it will avoid huge amount
  34  *                                      of socks hashed (this for unix_gc()
  35  *                                      performances reasons).
  36  *                                      Security fix that limits the max
  37  *                                      number of socks to 2*max_files and
  38  *                                      the number of skb queueable in the
  39  *                                      dgram receiver.
  40  *              Artur Skawina   :       Hash function optimizations
  41  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  42  *            Malcolm Beattie   :       Set peercred for socketpair
  43  *           Michal Ostrowski   :       Module initialization cleanup.
  44  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  45  *                                      the core infrastructure is doing that
  46  *                                      for all net proto families now (2.5.69+)
  47  *
  48  * Known differences from reference BSD that was tested:
  49  *
  50  *      [TO FIX]
  51  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  52  *              other the moment one end closes.
  53  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  54  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  55  *      [NOT TO FIX]
  56  *      accept() returns a path name even if the connecting socket has closed
  57  *              in the meantime (BSD loses the path and gives up).
  58  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  59  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  60  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  61  *      BSD af_unix apparently has connect forgetting to block properly.
  62  *              (need to check this with the POSIX spec in detail)
  63  *
  64  * Differences from 2.0.0-11-... (ANK)
  65  *      Bug fixes and improvements.
  66  *              - client shutdown killed server socket.
  67  *              - removed all useless cli/sti pairs.
  68  *
  69  *      Semantic changes/extensions.
  70  *              - generic control message passing.
  71  *              - SCM_CREDENTIALS control message.
  72  *              - "Abstract" (not FS based) socket bindings.
  73  *                Abstract names are sequences of bytes (not zero terminated)
  74  *                started by 0, so that this name space does not intersect
  75  *                with BSD names.
  76  */
  77
  78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  79
  80 #include <linux/module.h>
  81 #include <linux/kernel.h>
  82 #include <linux/signal.h>
  83 #include <linux/sched/signal.h>
  84 #include <linux/errno.h>
  85 #include <linux/string.h>
  86 #include <linux/stat.h>
  87 #include <linux/dcache.h>
  88 #include <linux/namei.h>
  89 #include <linux/socket.h>
  90 #include <linux/un.h>
  91 #include <linux/fcntl.h>
  92 #include <linux/termios.h>
  93 #include <linux/sockios.h>
  94 #include <linux/net.h>
  95 #include <linux/in.h>
  96 #include <linux/fs.h>
  97 #include <linux/slab.h>
  98 #include <linux/uaccess.h>
  99 #include <linux/skbuff.h>
 100 #include <linux/netdevice.h>
 101 #include <net/net_namespace.h>
 102 #include <net/sock.h>
 103 #include <net/tcp_states.h>
 104 #include <net/af_unix.h>
 105 #include <linux/proc_fs.h>
 106 #include <linux/seq_file.h>
 107 #include <net/scm.h>
 108 #include <linux/init.h>
 109 #include <linux/poll.h>
 110 #include <linux/rtnetlink.h>
 111 #include <linux/mount.h>
 112 #include <net/checksum.h>
 113 #include <linux/security.h>
 114 #include <linux/freezer.h>
 115 #include <linux/file.h>
 116
 117 #include "scm.h"
 118
 119 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 120 EXPORT_SYMBOL_GPL(unix_socket_table);
 121 DEFINE_SPINLOCK(unix_table_lock);
 122 EXPORT_SYMBOL_GPL(unix_table_lock);
 123 static atomic_long_t unix_nr_socks;
 124
 125
 126 static struct hlist_head *unix_sockets_unbound(void *addr)
 127 {
 128         unsigned long hash = (unsigned long)addr;
 129
 130         hash ^= hash >> 16;
 131         hash ^= hash >> 8;
 132         hash %= UNIX_HASH_SIZE;
 133         return &unix_socket_table[UNIX_HASH_SIZE + hash];
 134 }
 135
 136 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 137
 138 #ifdef CONFIG_SECURITY_NETWORK
 139 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 140 {
 141         UNIXCB(skb).secid = scm->secid;
 142 }
 143
 144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 145 {
 146         scm->secid = UNIXCB(skb).secid;
 147 }
 148
 149 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 150 {
 151         return (scm->secid == UNIXCB(skb).secid);
 152 }
 153 #else
 154 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 155 { }
 156
 157 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 158 { }
 159
 160 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 161 {
 162         return true;
 163 }
 164 #endif /* CONFIG_SECURITY_NETWORK */
 165
 166 /*
 167  *  SMP locking strategy:
 168  *    hash table is protected with spinlock unix_table_lock
 169  *    each socket state is protected by separate spin lock.
 170  */
 171
 172 static inline unsigned int unix_hash_fold(__wsum n)
 173 {
 174         unsigned int hash = (__force unsigned int)csum_fold(n);
 175
 176         hash ^= hash>>8;
 177         return hash&(UNIX_HASH_SIZE-1);
 178 }
 179
 180 #define unix_peer(sk) (unix_sk(sk)->peer)
 181
 182 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 183 {
 184         return unix_peer(osk) == sk;
 185 }
 186
 187 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 188 {
 189         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 190 }
 191
 192 static inline int unix_recvq_full(const struct sock *sk)
 193 {
 194         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 195 }
 196
 197 static inline int unix_recvq_full_lockless(const struct sock *sk)
 198 {
 199         return skb_queue_len_lockless(&sk->sk_receive_queue) >
 200                 READ_ONCE(sk->sk_max_ack_backlog);
 201 }
 202
 203 struct sock *unix_peer_get(struct sock *s)
 204 {
 205         struct sock *peer;
 206
 207         unix_state_lock(s);
 208         peer = unix_peer(s);
 209         if (peer)
 210                 sock_hold(peer);
 211         unix_state_unlock(s);
 212         return peer;
 213 }
 214 EXPORT_SYMBOL_GPL(unix_peer_get);
 215
 216 static inline void unix_release_addr(struct unix_address *addr)
 217 {
 218         if (refcount_dec_and_test(&addr->refcnt))
 219                 kfree(addr);
 220 }
 221
 222 /*
 223  *      Check unix socket name:
 224  *              - should be not zero length.
 225  *              - if started by not zero, should be NULL terminated (FS object)
 226  *              - if started by zero, it is abstract name.
 227  */
 228
 229 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 230 {
 231         *hashp = 0;
 232
 233         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 234                 return -EINVAL;
 235         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 236                 return -EINVAL;
 237         if (sunaddr->sun_path[0]) {
 238                 /*
 239                  * This may look like an off by one error but it is a bit more
 240                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 241                  * sun_path[108] doesn't as such exist.  However in kernel space
 242                  * we are guaranteed that it is a valid memory location in our
 243                  * kernel address buffer.
 244                  */
 245                 ((char *)sunaddr)[len] = 0;
 246                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 247                 return len;
 248         }
 249
 250         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 251         return len;
 252 }
 253
 254 static void __unix_remove_socket(struct sock *sk)
 255 {
 256         sk_del_node_init(sk);
 257 }
 258
 259 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 260 {
 261         WARN_ON(!sk_unhashed(sk));
 262         sk_add_node(sk, list);
 263 }
 264
 265 static inline void unix_remove_socket(struct sock *sk)
 266 {
 267         spin_lock(&unix_table_lock);
 268         __unix_remove_socket(sk);
 269         spin_unlock(&unix_table_lock);
 270 }
 271
 272 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 273 {
 274         spin_lock(&unix_table_lock);
 275         __unix_insert_socket(list, sk);
 276         spin_unlock(&unix_table_lock);
 277 }
 278
 279 static struct sock *__unix_find_socket_byname(struct net *net,
 280                                               struct sockaddr_un *sunname,
 281                                               int len, int type, unsigned int hash)
 282 {
 283         struct sock *s;
 284
 285         sk_for_each(s, &unix_socket_table[hash ^ type]) {
 286                 struct unix_sock *u = unix_sk(s);
 287
 288                 if (!net_eq(sock_net(s), net))
 289                         continue;
 290
 291                 if (u->addr->len == len &&
 292                     !memcmp(u->addr->name, sunname, len))
 293                         return s;
 294         }
 295         return NULL;
 296 }
 297
 298 static inline struct sock *unix_find_socket_byname(struct net *net,
 299                                                    struct sockaddr_un *sunname,
 300                                                    int len, int type,
 301                                                    unsigned int hash)
 302 {
 303         struct sock *s;
 304
 305         spin_lock(&unix_table_lock);
 306         s = __unix_find_socket_byname(net, sunname, len, type, hash);
 307         if (s)
 308                 sock_hold(s);
 309         spin_unlock(&unix_table_lock);
 310         return s;
 311 }
 312
 313 static struct sock *unix_find_socket_byinode(struct inode *i)
 314 {
 315         struct sock *s;
 316
 317         spin_lock(&unix_table_lock);
 318         sk_for_each(s,
 319                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 320                 struct dentry *dentry = unix_sk(s)->path.dentry;
 321
 322                 if (dentry && d_backing_inode(dentry) == i) {
 323                         sock_hold(s);
 324                         goto found;
 325                 }
 326         }
 327         s = NULL;
 328 found:
 329         spin_unlock(&unix_table_lock);
 330         return s;
 331 }
 332
 333 /* Support code for asymmetrically connected dgram sockets
 334  *
 335  * If a datagram socket is connected to a socket not itself connected
 336  * to the first socket (eg, /dev/log), clients may only enqueue more
 337  * messages if the present receive queue of the server socket is not
 338  * "too large". This means there's a second writeability condition
 339  * poll and sendmsg need to test. The dgram recv code will do a wake
 340  * up on the peer_wait wait queue of a socket upon reception of a
 341  * datagram which needs to be propagated to sleeping would-be writers
 342  * since these might not have sent anything so far. This can't be
 343  * accomplished via poll_wait because the lifetime of the server
 344  * socket might be less than that of its clients if these break their
 345  * association with it or if the server socket is closed while clients
 346  * are still connected to it and there's no way to inform "a polling
 347  * implementation" that it should let go of a certain wait queue
 348  *
 349  * In order to propagate a wake up, a wait_queue_entry_t of the client
 350  * socket is enqueued on the peer_wait queue of the server socket
 351  * whose wake function does a wake_up on the ordinary client socket
 352  * wait queue. This connection is established whenever a write (or
 353  * poll for write) hit the flow control condition and broken when the
 354  * association to the server socket is dissolved or after a wake up
 355  * was relayed.
 356  */
 357
 358 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 359                                       void *key)
 360 {
 361         struct unix_sock *u;
 362         wait_queue_head_t *u_sleep;
 363
 364         u = container_of(q, struct unix_sock, peer_wake);
 365
 366         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 367                             q);
 368         u->peer_wake.private = NULL;
 369
 370         /* relaying can only happen while the wq still exists */
 371         u_sleep = sk_sleep(&u->sk);
 372         if (u_sleep)
 373                 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
 374
 375         return 0;
 376 }
 377
 378 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 379 {
 380         struct unix_sock *u, *u_other;
 381         int rc;
 382
 383         u = unix_sk(sk);
 384         u_other = unix_sk(other);
 385         rc = 0;
 386         spin_lock(&u_other->peer_wait.lock);
 387
 388         if (!u->peer_wake.private) {
 389                 u->peer_wake.private = other;
 390                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 391
 392                 rc = 1;
 393         }
 394
 395         spin_unlock(&u_other->peer_wait.lock);
 396         return rc;
 397 }
 398
 399 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 400                                             struct sock *other)
 401 {
 402         struct unix_sock *u, *u_other;
 403
 404         u = unix_sk(sk);
 405         u_other = unix_sk(other);
 406         spin_lock(&u_other->peer_wait.lock);
 407
 408         if (u->peer_wake.private == other) {
 409                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 410                 u->peer_wake.private = NULL;
 411         }
 412
 413         spin_unlock(&u_other->peer_wait.lock);
 414 }
 415
 416 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 417                                                    struct sock *other)
 418 {
 419         unix_dgram_peer_wake_disconnect(sk, other);
 420         wake_up_interruptible_poll(sk_sleep(sk),
 421                                    EPOLLOUT |
 422                                    EPOLLWRNORM |
 423                                    EPOLLWRBAND);
 424 }
 425
 426 /* preconditions:
 427  *      - unix_peer(sk) == other
 428  *      - association is stable
 429  */
 430 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 431 {
 432         int connected;
 433
 434         connected = unix_dgram_peer_wake_connect(sk, other);
 435
 436         /* If other is SOCK_DEAD, we want to make sure we signal
 437          * POLLOUT, such that a subsequent write() can get a
 438          * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
 439          * to other and its full, we will hang waiting for POLLOUT.
 440          */
 441         if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
 442                 return 1;
 443
 444         if (connected)
 445                 unix_dgram_peer_wake_disconnect(sk, other);
 446
 447         return 0;
 448 }
 449
 450 static int unix_writable(const struct sock *sk)
 451 {
 452         return sk->sk_state != TCP_LISTEN &&
 453                (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 454 }
 455
 456 static void unix_write_space(struct sock *sk)
 457 {
 458         struct socket_wq *wq;
 459
 460         rcu_read_lock();
 461         if (unix_writable(sk)) {
 462                 wq = rcu_dereference(sk->sk_wq);
 463                 if (skwq_has_sleeper(wq))
 464                         wake_up_interruptible_sync_poll(&wq->wait,
 465                                 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
 466                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 467         }
 468         rcu_read_unlock();
 469 }
 470
 471 /* When dgram socket disconnects (or changes its peer), we clear its receive
 472  * queue of packets arrived from previous peer. First, it allows to do
 473  * flow control based only on wmem_alloc; second, sk connected to peer
 474  * may receive messages only from that peer. */
 475 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 476 {
 477         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 478                 skb_queue_purge(&sk->sk_receive_queue);
 479                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 480
 481                 /* If one link of bidirectional dgram pipe is disconnected,
 482                  * we signal error. Messages are lost. Do not make this,
 483                  * when peer was not connected to us.
 484                  */
 485                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 486                         other->sk_err = ECONNRESET;
 487                         other->sk_error_report(other);
 488                 }
 489         }
 490 }
 491
 492 static void unix_sock_destructor(struct sock *sk)
 493 {
 494         struct unix_sock *u = unix_sk(sk);
 495
 496         skb_queue_purge(&sk->sk_receive_queue);
 497
 498         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 499         WARN_ON(!sk_unhashed(sk));
 500         WARN_ON(sk->sk_socket);
 501         if (!sock_flag(sk, SOCK_DEAD)) {
 502                 pr_info("Attempt to release alive unix socket: %p\n", sk);
 503                 return;
 504         }
 505
 506         if (u->addr)
 507                 unix_release_addr(u->addr);
 508
 509         atomic_long_dec(&unix_nr_socks);
 510         local_bh_disable();
 511         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 512         local_bh_enable();
 513 #ifdef UNIX_REFCNT_DEBUG
 514         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 515                 atomic_long_read(&unix_nr_socks));
 516 #endif
 517 }
 518
 519 static void unix_release_sock(struct sock *sk, int embrion)
 520 {
 521         struct unix_sock *u = unix_sk(sk);
 522         struct path path;
 523         struct sock *skpair;
 524         struct sk_buff *skb;
 525         int state;
 526
 527         unix_remove_socket(sk);
 528
 529         /* Clear state */
 530         unix_state_lock(sk);
 531         sock_orphan(sk);
 532         sk->sk_shutdown = SHUTDOWN_MASK;
 533         path         = u->path;
 534         u->path.dentry = NULL;
 535         u->path.mnt = NULL;
 536         state = sk->sk_state;
 537         sk->sk_state = TCP_CLOSE;
 538         unix_state_unlock(sk);
 539
 540         wake_up_interruptible_all(&u->peer_wait);
 541
 542         skpair = unix_peer(sk);
 543
 544         if (skpair != NULL) {
 545                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 546                         unix_state_lock(skpair);
 547                         /* No more writes */
 548                         skpair->sk_shutdown = SHUTDOWN_MASK;
 549                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 550                                 skpair->sk_err = ECONNRESET;
 551                         unix_state_unlock(skpair);
 552                         skpair->sk_state_change(skpair);
 553                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 554                 }
 555
 556                 unix_dgram_peer_wake_disconnect(sk, skpair);
 557                 sock_put(skpair); /* It may now die */
 558                 unix_peer(sk) = NULL;
 559         }
 560
 561         /* Try to flush out this socket. Throw out buffers at least */
 562
 563         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 564                 if (state == TCP_LISTEN)
 565                         unix_release_sock(skb->sk, 1);
 566                 /* passed fds are erased in the kfree_skb hook        */
 567                 UNIXCB(skb).consumed = skb->len;
 568                 kfree_skb(skb);
 569         }
 570
 571         if (path.dentry)
 572                 path_put(&path);
 573
 574         sock_put(sk);
 575
 576         /* ---- Socket is dead now and most probably destroyed ---- */
 577
 578         /*
 579          * Fixme: BSD difference: In BSD all sockets connected to us get
 580          *        ECONNRESET and we die on the spot. In Linux we behave
 581          *        like files and pipes do and wait for the last
 582          *        dereference.
 583          *
 584          * Can't we simply set sock->err?
 585          *
 586          *        What the above comment does talk about? --ANK(980817)
 587          */
 588
 589         if (unix_tot_inflight)
 590                 unix_gc();              /* Garbage collect fds */
 591 }
 592
 593 static void init_peercred(struct sock *sk)
 594 {
 595         put_pid(sk->sk_peer_pid);
 596         if (sk->sk_peer_cred)
 597                 put_cred(sk->sk_peer_cred);
 598         sk->sk_peer_pid  = get_pid(task_tgid(current));
 599         sk->sk_peer_cred = get_current_cred();
 600 }
 601
 602 static void copy_peercred(struct sock *sk, struct sock *peersk)
 603 {
 604         put_pid(sk->sk_peer_pid);
 605         if (sk->sk_peer_cred)
 606                 put_cred(sk->sk_peer_cred);
 607         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 608         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 609 }
 610
 611 static int unix_listen(struct socket *sock, int backlog)
 612 {
 613         int err;
 614         struct sock *sk = sock->sk;
 615         struct unix_sock *u = unix_sk(sk);
 616
 617         err = -EOPNOTSUPP;
 618         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 619                 goto out;       /* Only stream/seqpacket sockets accept */
 620         err = -EINVAL;
 621         if (!u->addr)
 622                 goto out;       /* No listens on an unbound socket */
 623         unix_state_lock(sk);
 624         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 625                 goto out_unlock;
 626         if (backlog > sk->sk_max_ack_backlog)
 627                 wake_up_interruptible_all(&u->peer_wait);
 628         sk->sk_max_ack_backlog  = backlog;
 629         sk->sk_state            = TCP_LISTEN;
 630         /* set credentials so connect can copy them */
 631         init_peercred(sk);
 632         err = 0;
 633
 634 out_unlock:
 635         unix_state_unlock(sk);
 636 out:
 637         return err;
 638 }
 639
 640 static int unix_release(struct socket *);
 641 static int unix_bind(struct socket *, struct sockaddr *, int);
 642 static int unix_stream_connect(struct socket *, struct sockaddr *,
 643                                int addr_len, int flags);
 644 static int unix_socketpair(struct socket *, struct socket *);
 645 static int unix_accept(struct socket *, struct socket *, int, bool);
 646 static int unix_getname(struct socket *, struct sockaddr *, int);
 647 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
 648 static __poll_t unix_dgram_poll(struct file *, struct socket *,
 649                                     poll_table *);
 650 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 651 #ifdef CONFIG_COMPAT
 652 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 653 #endif
 654 static int unix_shutdown(struct socket *, int);
 655 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 656 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 657 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 658                                     size_t size, int flags);
 659 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 660                                        struct pipe_inode_info *, size_t size,
 661                                        unsigned int flags);
 662 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 663 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 664 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 665                               int, int);
 666 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 667 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 668                                   int);
 669
 670 static int unix_set_peek_off(struct sock *sk, int val)
 671 {
 672         struct unix_sock *u = unix_sk(sk);
 673
 674         if (mutex_lock_interruptible(&u->iolock))
 675                 return -EINTR;
 676
 677         sk->sk_peek_off = val;
 678         mutex_unlock(&u->iolock);
 679
 680         return 0;
 681 }
 682
 683 #ifdef CONFIG_PROC_FS
 684 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
 685 {
 686         struct sock *sk = sock->sk;
 687         struct unix_sock *u;
 688
 689         if (sk) {
 690                 u = unix_sk(sock->sk);
 691                 seq_printf(m, "scm_fds: %u\n",
 692                            atomic_read(&u->scm_stat.nr_fds));
 693         }
 694 }
 695 #else
 696 #define unix_show_fdinfo NULL
 697 #endif
 698
 699 static const struct proto_ops unix_stream_ops = {
 700         .family =       PF_UNIX,
 701         .owner =        THIS_MODULE,
 702         .release =      unix_release,
 703         .bind =         unix_bind,
 704         .connect =      unix_stream_connect,
 705         .socketpair =   unix_socketpair,
 706         .accept =       unix_accept,
 707         .getname =      unix_getname,
 708         .poll =         unix_poll,
 709         .ioctl =        unix_ioctl,
 710 #ifdef CONFIG_COMPAT
 711         .compat_ioctl = unix_compat_ioctl,
 712 #endif
 713         .listen =       unix_listen,
 714         .shutdown =     unix_shutdown,
 715         .sendmsg =      unix_stream_sendmsg,
 716         .recvmsg =      unix_stream_recvmsg,
 717         .mmap =         sock_no_mmap,
 718         .sendpage =     unix_stream_sendpage,
 719         .splice_read =  unix_stream_splice_read,
 720         .set_peek_off = unix_set_peek_off,
 721         .show_fdinfo =  unix_show_fdinfo,
 722 };
 723
 724 static const struct proto_ops unix_dgram_ops = {
 725         .family =       PF_UNIX,
 726         .owner =        THIS_MODULE,
 727         .release =      unix_release,
 728         .bind =         unix_bind,
 729         .connect =      unix_dgram_connect,
 730         .socketpair =   unix_socketpair,
 731         .accept =       sock_no_accept,
 732         .getname =      unix_getname,
 733         .poll =         unix_dgram_poll,
 734         .ioctl =        unix_ioctl,
 735 #ifdef CONFIG_COMPAT
 736         .compat_ioctl = unix_compat_ioctl,
 737 #endif
 738         .listen =       sock_no_listen,
 739         .shutdown =     unix_shutdown,
 740         .sendmsg =      unix_dgram_sendmsg,
 741         .recvmsg =      unix_dgram_recvmsg,
 742         .mmap =         sock_no_mmap,
 743         .sendpage =     sock_no_sendpage,
 744         .set_peek_off = unix_set_peek_off,
 745         .show_fdinfo =  unix_show_fdinfo,
 746 };
 747
 748 static const struct proto_ops unix_seqpacket_ops = {
 749         .family =       PF_UNIX,
 750         .owner =        THIS_MODULE,
 751         .release =      unix_release,
 752         .bind =         unix_bind,
 753         .connect =      unix_stream_connect,
 754         .socketpair =   unix_socketpair,
 755         .accept =       unix_accept,
 756         .getname =      unix_getname,
 757         .poll =         unix_dgram_poll,
 758         .ioctl =        unix_ioctl,
 759 #ifdef CONFIG_COMPAT
 760         .compat_ioctl = unix_compat_ioctl,
 761 #endif
 762         .listen =       unix_listen,
 763         .shutdown =     unix_shutdown,
 764         .sendmsg =      unix_seqpacket_sendmsg,
 765         .recvmsg =      unix_seqpacket_recvmsg,
 766         .mmap =         sock_no_mmap,
 767         .sendpage =     sock_no_sendpage,
 768         .set_peek_off = unix_set_peek_off,
 769         .show_fdinfo =  unix_show_fdinfo,
 770 };
 771
 772 static struct proto unix_proto = {
 773         .name                   = "UNIX",
 774         .owner                  = THIS_MODULE,
 775         .obj_size               = sizeof(struct unix_sock),
 776 };
 777
 778 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 779 {
 780         struct sock *sk = NULL;
 781         struct unix_sock *u;
 782
 783         atomic_long_inc(&unix_nr_socks);
 784         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 785                 goto out;
 786
 787         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
 788         if (!sk)
 789                 goto out;
 790
 791         sock_init_data(sock, sk);
 792
 793         sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 794         sk->sk_write_space      = unix_write_space;
 795         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 796         sk->sk_destruct         = unix_sock_destructor;
 797         u         = unix_sk(sk);
 798         u->path.dentry = NULL;
 799         u->path.mnt = NULL;
 800         spin_lock_init(&u->lock);
 801         atomic_long_set(&u->inflight, 0);
 802         INIT_LIST_HEAD(&u->link);
 803         mutex_init(&u->iolock); /* single task reading lock */
 804         mutex_init(&u->bindlock); /* single task binding lock */
 805         init_waitqueue_head(&u->peer_wait);
 806         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 807         memset(&u->scm_stat, 0, sizeof(struct scm_stat));
 808         unix_insert_socket(unix_sockets_unbound(sk), sk);
 809 out:
 810         if (sk == NULL)
 811                 atomic_long_dec(&unix_nr_socks);
 812         else {
 813                 local_bh_disable();
 814                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 815                 local_bh_enable();
 816         }
 817         return sk;
 818 }
 819
 820 static int unix_create(struct net *net, struct socket *sock, int protocol,
 821                        int kern)
 822 {
 823         if (protocol && protocol != PF_UNIX)
 824                 return -EPROTONOSUPPORT;
 825
 826         sock->state = SS_UNCONNECTED;
 827
 828         switch (sock->type) {
 829         case SOCK_STREAM:
 830                 sock->ops = &unix_stream_ops;
 831                 break;
 832                 /*
 833                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 834                  *      nothing uses it.
 835                  */
 836         case SOCK_RAW:
 837                 sock->type = SOCK_DGRAM;
 838                 fallthrough;
 839         case SOCK_DGRAM:
 840                 sock->ops = &unix_dgram_ops;
 841                 break;
 842         case SOCK_SEQPACKET:
 843                 sock->ops = &unix_seqpacket_ops;
 844                 break;
 845         default:
 846                 return -ESOCKTNOSUPPORT;
 847         }
 848
 849         return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
 850 }
 851
 852 static int unix_release(struct socket *sock)
 853 {
 854         struct sock *sk = sock->sk;
 855
 856         if (!sk)
 857                 return 0;
 858
 859         unix_release_sock(sk, 0);
 860         sock->sk = NULL;
 861
 862         return 0;
 863 }
 864
 865 static int unix_autobind(struct socket *sock)
 866 {
 867         struct sock *sk = sock->sk;
 868         struct net *net = sock_net(sk);
 869         struct unix_sock *u = unix_sk(sk);
 870         static u32 ordernum = 1;
 871         struct unix_address *addr;
 872         int err;
 873         unsigned int retries = 0;
 874
 875         err = mutex_lock_interruptible(&u->bindlock);
 876         if (err)
 877                 return err;
 878
 879         if (u->addr)
 880                 goto out;
 881
 882         err = -ENOMEM;
 883         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 884         if (!addr)
 885                 goto out;
 886
 887         addr->name->sun_family = AF_UNIX;
 888         refcount_set(&addr->refcnt, 1);
 889
 890 retry:
 891         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 892         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 893
 894         spin_lock(&unix_table_lock);
 895         ordernum = (ordernum+1)&0xFFFFF;
 896
 897         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 898                                       addr->hash)) {
 899                 spin_unlock(&unix_table_lock);
 900                 /*
 901                  * __unix_find_socket_byname() may take long time if many names
 902                  * are already in use.
 903                  */
 904                 cond_resched();
 905                 /* Give up if all names seems to be in use. */
 906                 if (retries++ == 0xFFFFF) {
 907                         err = -ENOSPC;
 908                         kfree(addr);
 909                         goto out;
 910                 }
 911                 goto retry;
 912         }
 913         addr->hash ^= sk->sk_type;
 914
 915         __unix_remove_socket(sk);
 916         smp_store_release(&u->addr, addr);
 917         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 918         spin_unlock(&unix_table_lock);
 919         err = 0;
 920
 921 out:    mutex_unlock(&u->bindlock);
 922         return err;
 923 }
 924
 925 static struct sock *unix_find_other(struct net *net,
 926                                     struct sockaddr_un *sunname, int len,
 927                                     int type, unsigned int hash, int *error)
 928 {
 929         struct sock *u;
 930         struct path path;
 931         int err = 0;
 932
 933         if (sunname->sun_path[0]) {
 934                 struct inode *inode;
 935                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 936                 if (err)
 937                         goto fail;
 938                 inode = d_backing_inode(path.dentry);
 939                 err = inode_permission(inode, MAY_WRITE);
 940                 if (err)
 941                         goto put_fail;
 942
 943                 err = -ECONNREFUSED;
 944                 if (!S_ISSOCK(inode->i_mode))
 945                         goto put_fail;
 946                 u = unix_find_socket_byinode(inode);
 947                 if (!u)
 948                         goto put_fail;
 949
 950                 if (u->sk_type == type)
 951                         touch_atime(&path);
 952
 953                 path_put(&path);
 954
 955                 err = -EPROTOTYPE;
 956                 if (u->sk_type != type) {
 957                         sock_put(u);
 958                         goto fail;
 959                 }
 960         } else {
 961                 err = -ECONNREFUSED;
 962                 u = unix_find_socket_byname(net, sunname, len, type, hash);
 963                 if (u) {
 964                         struct dentry *dentry;
 965                         dentry = unix_sk(u)->path.dentry;
 966                         if (dentry)
 967                                 touch_atime(&unix_sk(u)->path);
 968                 } else
 969                         goto fail;
 970         }
 971         return u;
 972
 973 put_fail:
 974         path_put(&path);
 975 fail:
 976         *error = err;
 977         return NULL;
 978 }
 979
 980 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 981 {
 982         struct dentry *dentry;
 983         struct path path;
 984         int err = 0;
 985         /*
 986          * Get the parent directory, calculate the hash for last
 987          * component.
 988          */
 989         dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 990         err = PTR_ERR(dentry);
 991         if (IS_ERR(dentry))
 992                 return err;
 993
 994         /*
 995          * All right, let's create it.
 996          */
 997         err = security_path_mknod(&path, dentry, mode, 0);
 998         if (!err) {
 999                 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
1000                 if (!err) {
1001                         res->mnt = mntget(path.mnt);
1002                         res->dentry = dget(dentry);
1003                 }
1004         }
1005         done_path_create(&path, dentry);
1006         return err;
1007 }
1008
1009 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1010 {
1011         struct sock *sk = sock->sk;
1012         struct net *net = sock_net(sk);
1013         struct unix_sock *u = unix_sk(sk);
1014         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1015         char *sun_path = sunaddr->sun_path;
1016         int err;
1017         unsigned int hash;
1018         struct unix_address *addr;
1019         struct hlist_head *list;
1020         struct path path = { };
1021
1022         err = -EINVAL;
1023         if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1024             sunaddr->sun_family != AF_UNIX)
1025                 goto out;
1026
1027         if (addr_len == sizeof(short)) {
1028                 err = unix_autobind(sock);
1029                 goto out;
1030         }
1031
1032         err = unix_mkname(sunaddr, addr_len, &hash);
1033         if (err < 0)
1034                 goto out;
1035         addr_len = err;
1036
1037         if (sun_path[0]) {
1038                 umode_t mode = S_IFSOCK |
1039                        (SOCK_INODE(sock)->i_mode & ~current_umask());
1040                 err = unix_mknod(sun_path, mode, &path);
1041                 if (err) {
1042                         if (err == -EEXIST)
1043                                 err = -EADDRINUSE;
1044                         goto out;
1045                 }
1046         }
1047
1048         err = mutex_lock_interruptible(&u->bindlock);
1049         if (err)
1050                 goto out_put;
1051
1052         err = -EINVAL;
1053         if (u->addr)
1054                 goto out_up;
1055
1056         err = -ENOMEM;
1057         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1058         if (!addr)
1059                 goto out_up;
1060
1061         memcpy(addr->name, sunaddr, addr_len);
1062         addr->len = addr_len;
1063         addr->hash = hash ^ sk->sk_type;
1064         refcount_set(&addr->refcnt, 1);
1065
1066         if (sun_path[0]) {
1067                 addr->hash = UNIX_HASH_SIZE;
1068                 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1069                 spin_lock(&unix_table_lock);
1070                 u->path = path;
1071                 list = &unix_socket_table[hash];
1072         } else {
1073                 spin_lock(&unix_table_lock);
1074                 err = -EADDRINUSE;
1075                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1076                                               sk->sk_type, hash)) {
1077                         unix_release_addr(addr);
1078                         goto out_unlock;
1079                 }
1080
1081                 list = &unix_socket_table[addr->hash];
1082         }
1083
1084         err = 0;
1085         __unix_remove_socket(sk);
1086         smp_store_release(&u->addr, addr);
1087         __unix_insert_socket(list, sk);
1088
1089 out_unlock:
1090         spin_unlock(&unix_table_lock);
1091 out_up:
1092         mutex_unlock(&u->bindlock);
1093 out_put:
1094         if (err)
1095                 path_put(&path);
1096 out:
1097         return err;
1098 }
1099
1100 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1101 {
1102         if (unlikely(sk1 == sk2) || !sk2) {
1103                 unix_state_lock(sk1);
1104                 return;
1105         }
1106         if (sk1 < sk2) {
1107                 unix_state_lock(sk1);
1108                 unix_state_lock_nested(sk2);
1109         } else {
1110                 unix_state_lock(sk2);
1111                 unix_state_lock_nested(sk1);
1112         }
1113 }
1114
1115 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1116 {
1117         if (unlikely(sk1 == sk2) || !sk2) {
1118                 unix_state_unlock(sk1);
1119                 return;
1120         }
1121         unix_state_unlock(sk1);
1122         unix_state_unlock(sk2);
1123 }
1124
1125 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1126                               int alen, int flags)
1127 {
1128         struct sock *sk = sock->sk;
1129         struct net *net = sock_net(sk);
1130         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1131         struct sock *other;
1132         unsigned int hash;
1133         int err;
1134
1135         err = -EINVAL;
1136         if (alen < offsetofend(struct sockaddr, sa_family))
1137                 goto out;
1138
1139         if (addr->sa_family != AF_UNSPEC) {
1140                 err = unix_mkname(sunaddr, alen, &hash);
1141                 if (err < 0)
1142                         goto out;
1143                 alen = err;
1144
1145                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1146                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1147                         goto out;
1148
1149 restart:
1150                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1151                 if (!other)
1152                         goto out;
1153
1154                 unix_state_double_lock(sk, other);
1155
1156                 /* Apparently VFS overslept socket death. Retry. */
1157                 if (sock_flag(other, SOCK_DEAD)) {
1158                         unix_state_double_unlock(sk, other);
1159                         sock_put(other);
1160                         goto restart;
1161                 }
1162
1163                 err = -EPERM;
1164                 if (!unix_may_send(sk, other))
1165                         goto out_unlock;
1166
1167                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1168                 if (err)
1169                         goto out_unlock;
1170
1171         } else {
1172                 /*
1173                  *      1003.1g breaking connected state with AF_UNSPEC
1174                  */
1175                 other = NULL;
1176                 unix_state_double_lock(sk, other);
1177         }
1178
1179         /*
1180          * If it was connected, reconnect.
1181          */
1182         if (unix_peer(sk)) {
1183                 struct sock *old_peer = unix_peer(sk);
1184                 unix_peer(sk) = other;
1185                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1186
1187                 unix_state_double_unlock(sk, other);
1188
1189                 if (other != old_peer)
1190                         unix_dgram_disconnected(sk, old_peer);
1191                 sock_put(old_peer);
1192         } else {
1193                 unix_peer(sk) = other;
1194                 unix_state_double_unlock(sk, other);
1195         }
1196         return 0;
1197
1198 out_unlock:
1199         unix_state_double_unlock(sk, other);
1200         sock_put(other);
1201 out:
1202         return err;
1203 }
1204
1205 static long unix_wait_for_peer(struct sock *other, long timeo)
1206         __releases(&unix_sk(other)->lock)
1207 {
1208         struct unix_sock *u = unix_sk(other);
1209         int sched;
1210         DEFINE_WAIT(wait);
1211
1212         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1213
1214         sched = !sock_flag(other, SOCK_DEAD) &&
1215                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1216                 unix_recvq_full(other);
1217
1218         unix_state_unlock(other);
1219
1220         if (sched)
1221                 timeo = schedule_timeout(timeo);
1222
1223         finish_wait(&u->peer_wait, &wait);
1224         return timeo;
1225 }
1226
1227 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1228                                int addr_len, int flags)
1229 {
1230         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1231         struct sock *sk = sock->sk;
1232         struct net *net = sock_net(sk);
1233         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1234         struct sock *newsk = NULL;
1235         struct sock *other = NULL;
1236         struct sk_buff *skb = NULL;
1237         unsigned int hash;
1238         int st;
1239         int err;
1240         long timeo;
1241
1242         err = unix_mkname(sunaddr, addr_len, &hash);
1243         if (err < 0)
1244                 goto out;
1245         addr_len = err;
1246
1247         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1248             (err = unix_autobind(sock)) != 0)
1249                 goto out;
1250
1251         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1252
1253         /* First of all allocate resources.
1254            If we will make it after state is locked,
1255            we will have to recheck all again in any case.
1256          */
1257
1258         err = -ENOMEM;
1259
1260         /* create new sock for complete connection */
1261         newsk = unix_create1(sock_net(sk), NULL, 0);
1262         if (newsk == NULL)
1263                 goto out;
1264
1265         /* Allocate skb for sending to listening sock */
1266         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1267         if (skb == NULL)
1268                 goto out;
1269
1270 restart:
1271         /*  Find listening sock. */
1272         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1273         if (!other)
1274                 goto out;
1275
1276         /* Latch state of peer */
1277         unix_state_lock(other);
1278
1279         /* Apparently VFS overslept socket death. Retry. */
1280         if (sock_flag(other, SOCK_DEAD)) {
1281                 unix_state_unlock(other);
1282                 sock_put(other);
1283                 goto restart;
1284         }
1285
1286         err = -ECONNREFUSED;
1287         if (other->sk_state != TCP_LISTEN)
1288                 goto out_unlock;
1289         if (other->sk_shutdown & RCV_SHUTDOWN)
1290                 goto out_unlock;
1291
1292         if (unix_recvq_full(other)) {
1293                 err = -EAGAIN;
1294                 if (!timeo)
1295                         goto out_unlock;
1296
1297                 timeo = unix_wait_for_peer(other, timeo);
1298
1299                 err = sock_intr_errno(timeo);
1300                 if (signal_pending(current))
1301                         goto out;
1302                 sock_put(other);
1303                 goto restart;
1304         }
1305
1306         /* Latch our state.
1307
1308            It is tricky place. We need to grab our state lock and cannot
1309            drop lock on peer. It is dangerous because deadlock is
1310            possible. Connect to self case and simultaneous
1311            attempt to connect are eliminated by checking socket
1312            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1313            check this before attempt to grab lock.
1314
1315            Well, and we have to recheck the state after socket locked.
1316          */
1317         st = sk->sk_state;
1318
1319         switch (st) {
1320         case TCP_CLOSE:
1321                 /* This is ok... continue with connect */
1322                 break;
1323         case TCP_ESTABLISHED:
1324                 /* Socket is already connected */
1325                 err = -EISCONN;
1326                 goto out_unlock;
1327         default:
1328                 err = -EINVAL;
1329                 goto out_unlock;
1330         }
1331
1332         unix_state_lock_nested(sk);
1333
1334         if (sk->sk_state != st) {
1335                 unix_state_unlock(sk);
1336                 unix_state_unlock(other);
1337                 sock_put(other);
1338                 goto restart;
1339         }
1340
1341         err = security_unix_stream_connect(sk, other, newsk);
1342         if (err) {
1343                 unix_state_unlock(sk);
1344                 goto out_unlock;
1345         }
1346
1347         /* The way is open! Fastly set all the necessary fields... */
1348
1349         sock_hold(sk);
1350         unix_peer(newsk)        = sk;
1351         newsk->sk_state         = TCP_ESTABLISHED;
1352         newsk->sk_type          = sk->sk_type;
1353         init_peercred(newsk);
1354         newu = unix_sk(newsk);
1355         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1356         otheru = unix_sk(other);
1357
1358         /* copy address information from listening to new sock
1359          *
1360          * The contents of *(otheru->addr) and otheru->path
1361          * are seen fully set up here, since we have found
1362          * otheru in hash under unix_table_lock.  Insertion
1363          * into the hash chain we'd found it in had been done
1364          * in an earlier critical area protected by unix_table_lock,
1365          * the same one where we'd set *(otheru->addr) contents,
1366          * as well as otheru->path and otheru->addr itself.
1367          *
1368          * Using smp_store_release() here to set newu->addr
1369          * is enough to make those stores, as well as stores
1370          * to newu->path visible to anyone who gets newu->addr
1371          * by smp_load_acquire().  IOW, the same warranties
1372          * as for unix_sock instances bound in unix_bind() or
1373          * in unix_autobind().
1374          */
1375         if (otheru->path.dentry) {
1376                 path_get(&otheru->path);
1377                 newu->path = otheru->path;
1378         }
1379         refcount_inc(&otheru->addr->refcnt);
1380         smp_store_release(&newu->addr, otheru->addr);
1381
1382         /* Set credentials */
1383         copy_peercred(sk, other);
1384
1385         sock->state     = SS_CONNECTED;
1386         sk->sk_state    = TCP_ESTABLISHED;
1387         sock_hold(newsk);
1388
1389         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1390         unix_peer(sk)   = newsk;
1391
1392         unix_state_unlock(sk);
1393
1394         /* take ten and and send info to listening sock */
1395         spin_lock(&other->sk_receive_queue.lock);
1396         __skb_queue_tail(&other->sk_receive_queue, skb);
1397         spin_unlock(&other->sk_receive_queue.lock);
1398         unix_state_unlock(other);
1399         other->sk_data_ready(other);
1400         sock_put(other);
1401         return 0;
1402
1403 out_unlock:
1404         if (other)
1405                 unix_state_unlock(other);
1406
1407 out:
1408         kfree_skb(skb);
1409         if (newsk)
1410                 unix_release_sock(newsk, 0);
1411         if (other)
1412                 sock_put(other);
1413         return err;
1414 }
1415
1416 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1417 {
1418         struct sock *ska = socka->sk, *skb = sockb->sk;
1419
1420         /* Join our sockets back to back */
1421         sock_hold(ska);
1422         sock_hold(skb);
1423         unix_peer(ska) = skb;
1424         unix_peer(skb) = ska;
1425         init_peercred(ska);
1426         init_peercred(skb);
1427
1428         if (ska->sk_type != SOCK_DGRAM) {
1429                 ska->sk_state = TCP_ESTABLISHED;
1430                 skb->sk_state = TCP_ESTABLISHED;
1431                 socka->state  = SS_CONNECTED;
1432                 sockb->state  = SS_CONNECTED;
1433         }
1434         return 0;
1435 }
1436
1437 static void unix_sock_inherit_flags(const struct socket *old,
1438                                     struct socket *new)
1439 {
1440         if (test_bit(SOCK_PASSCRED, &old->flags))
1441                 set_bit(SOCK_PASSCRED, &new->flags);
1442         if (test_bit(SOCK_PASSSEC, &old->flags))
1443                 set_bit(SOCK_PASSSEC, &new->flags);
1444 }
1445
1446 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1447                        bool kern)
1448 {
1449         struct sock *sk = sock->sk;
1450         struct sock *tsk;
1451         struct sk_buff *skb;
1452         int err;
1453
1454         err = -EOPNOTSUPP;
1455         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1456                 goto out;
1457
1458         err = -EINVAL;
1459         if (sk->sk_state != TCP_LISTEN)
1460                 goto out;
1461
1462         /* If socket state is TCP_LISTEN it cannot change (for now...),
1463          * so that no locks are necessary.
1464          */
1465
1466         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1467         if (!skb) {
1468                 /* This means receive shutdown. */
1469                 if (err == 0)
1470                         err = -EINVAL;
1471                 goto out;
1472         }
1473
1474         tsk = skb->sk;
1475         skb_free_datagram(sk, skb);
1476         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1477
1478         /* attach accepted sock to socket */
1479         unix_state_lock(tsk);
1480         newsock->state = SS_CONNECTED;
1481         unix_sock_inherit_flags(sock, newsock);
1482         sock_graft(tsk, newsock);
1483         unix_state_unlock(tsk);
1484         return 0;
1485
1486 out:
1487         return err;
1488 }
1489
1490
1491 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1492 {
1493         struct sock *sk = sock->sk;
1494         struct unix_address *addr;
1495         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1496         int err = 0;
1497
1498         if (peer) {
1499                 sk = unix_peer_get(sk);
1500
1501                 err = -ENOTCONN;
1502                 if (!sk)
1503                         goto out;
1504                 err = 0;
1505         } else {
1506                 sock_hold(sk);
1507         }
1508
1509         addr = smp_load_acquire(&unix_sk(sk)->addr);
1510         if (!addr) {
1511                 sunaddr->sun_family = AF_UNIX;
1512                 sunaddr->sun_path[0] = 0;
1513                 err = sizeof(short);
1514         } else {
1515                 err = addr->len;
1516                 memcpy(sunaddr, addr->name, addr->len);
1517         }
1518         sock_put(sk);
1519 out:
1520         return err;
1521 }
1522
1523 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1524 {
1525         int err = 0;
1526
1527         UNIXCB(skb).pid  = get_pid(scm->pid);
1528         UNIXCB(skb).uid = scm->creds.uid;
1529         UNIXCB(skb).gid = scm->creds.gid;
1530         UNIXCB(skb).fp = NULL;
1531         unix_get_secdata(scm, skb);
1532         if (scm->fp && send_fds)
1533                 err = unix_attach_fds(scm, skb);
1534
1535         skb->destructor = unix_destruct_scm;
1536         return err;
1537 }
1538
1539 static bool unix_passcred_enabled(const struct socket *sock,
1540                                   const struct sock *other)
1541 {
1542         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1543                !other->sk_socket ||
1544                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1545 }
1546
1547 /*
1548  * Some apps rely on write() giving SCM_CREDENTIALS
1549  * We include credentials if source or destination socket
1550  * asserted SOCK_PASSCRED.
1551  */
1552 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1553                             const struct sock *other)
1554 {
1555         if (UNIXCB(skb).pid)
1556                 return;
1557         if (unix_passcred_enabled(sock, other)) {
1558                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1559                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1560         }
1561 }
1562
1563 static int maybe_init_creds(struct scm_cookie *scm,
1564                             struct socket *socket,
1565                             const struct sock *other)
1566 {
1567         int err;
1568         struct msghdr msg = { .msg_controllen = 0 };
1569
1570         err = scm_send(socket, &msg, scm, false);
1571         if (err)
1572                 return err;
1573
1574         if (unix_passcred_enabled(socket, other)) {
1575                 scm->pid = get_pid(task_tgid(current));
1576                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1577         }
1578         return err;
1579 }
1580
1581 static bool unix_skb_scm_eq(struct sk_buff *skb,
1582                             struct scm_cookie *scm)
1583 {
1584         const struct unix_skb_parms *u = &UNIXCB(skb);
1585
1586         return u->pid == scm->pid &&
1587                uid_eq(u->uid, scm->creds.uid) &&
1588                gid_eq(u->gid, scm->creds.gid) &&
1589                unix_secdata_eq(scm, skb);
1590 }
1591
1592 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1593 {
1594         struct scm_fp_list *fp = UNIXCB(skb).fp;
1595         struct unix_sock *u = unix_sk(sk);
1596
1597         if (unlikely(fp && fp->count))
1598                 atomic_add(fp->count, &u->scm_stat.nr_fds);
1599 }
1600
1601 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1602 {
1603         struct scm_fp_list *fp = UNIXCB(skb).fp;
1604         struct unix_sock *u = unix_sk(sk);
1605
1606         if (unlikely(fp && fp->count))
1607                 atomic_sub(fp->count, &u->scm_stat.nr_fds);
1608 }
1609
1610 /*
1611  *      Send AF_UNIX data.
1612  */
1613
1614 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1615                               size_t len)
1616 {
1617         struct sock *sk = sock->sk;
1618         struct net *net = sock_net(sk);
1619         struct unix_sock *u = unix_sk(sk);
1620         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1621         struct sock *other = NULL;
1622         int namelen = 0; /* fake GCC */
1623         int err;
1624         unsigned int hash;
1625         struct sk_buff *skb;
1626         long timeo;
1627         struct scm_cookie scm;
1628         int data_len = 0;
1629         int sk_locked;
1630
1631         wait_for_unix_gc();
1632         err = scm_send(sock, msg, &scm, false);
1633         if (err < 0)
1634                 return err;
1635
1636         err = -EOPNOTSUPP;
1637         if (msg->msg_flags&MSG_OOB)
1638                 goto out;
1639
1640         if (msg->msg_namelen) {
1641                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1642                 if (err < 0)
1643                         goto out;
1644                 namelen = err;
1645         } else {
1646                 sunaddr = NULL;
1647                 err = -ENOTCONN;
1648                 other = unix_peer_get(sk);
1649                 if (!other)
1650                         goto out;
1651         }
1652
1653         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1654             && (err = unix_autobind(sock)) != 0)
1655                 goto out;
1656
1657         err = -EMSGSIZE;
1658         if (len > sk->sk_sndbuf - 32)
1659                 goto out;
1660
1661         if (len > SKB_MAX_ALLOC) {
1662                 data_len = min_t(size_t,
1663                                  len - SKB_MAX_ALLOC,
1664                                  MAX_SKB_FRAGS * PAGE_SIZE);
1665                 data_len = PAGE_ALIGN(data_len);
1666
1667                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1668         }
1669
1670         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1671                                    msg->msg_flags & MSG_DONTWAIT, &err,
1672                                    PAGE_ALLOC_COSTLY_ORDER);
1673         if (skb == NULL)
1674                 goto out;
1675
1676         err = unix_scm_to_skb(&scm, skb, true);
1677         if (err < 0)
1678                 goto out_free;
1679
1680         skb_put(skb, len - data_len);
1681         skb->data_len = data_len;
1682         skb->len = len;
1683         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1684         if (err)
1685                 goto out_free;
1686
1687         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1688
1689 restart:
1690         if (!other) {
1691                 err = -ECONNRESET;
1692                 if (sunaddr == NULL)
1693                         goto out_free;
1694
1695                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1696                                         hash, &err);
1697                 if (other == NULL)
1698                         goto out_free;
1699         }
1700
1701         if (sk_filter(other, skb) < 0) {
1702                 /* Toss the packet but do not return any error to the sender */
1703                 err = len;
1704                 goto out_free;
1705         }
1706
1707         sk_locked = 0;
1708         unix_state_lock(other);
1709 restart_locked:
1710         err = -EPERM;
1711         if (!unix_may_send(sk, other))
1712                 goto out_unlock;
1713
1714         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1715                 /*
1716                  *      Check with 1003.1g - what should
1717                  *      datagram error
1718                  */
1719                 unix_state_unlock(other);
1720                 sock_put(other);
1721
1722                 if (!sk_locked)
1723                         unix_state_lock(sk);
1724
1725                 err = 0;
1726                 if (unix_peer(sk) == other) {
1727                         unix_peer(sk) = NULL;
1728                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1729
1730                         unix_state_unlock(sk);
1731
1732                         unix_dgram_disconnected(sk, other);
1733                         sock_put(other);
1734                         err = -ECONNREFUSED;
1735                 } else {
1736                         unix_state_unlock(sk);
1737                 }
1738
1739                 other = NULL;
1740                 if (err)
1741                         goto out_free;
1742                 goto restart;
1743         }
1744
1745         err = -EPIPE;
1746         if (other->sk_shutdown & RCV_SHUTDOWN)
1747                 goto out_unlock;
1748
1749         if (sk->sk_type != SOCK_SEQPACKET) {
1750                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1751                 if (err)
1752                         goto out_unlock;
1753         }
1754
1755         /* other == sk && unix_peer(other) != sk if
1756          * - unix_peer(sk) == NULL, destination address bound to sk
1757          * - unix_peer(sk) == sk by time of get but disconnected before lock
1758          */
1759         if (other != sk &&
1760             unlikely(unix_peer(other) != sk &&
1761             unix_recvq_full_lockless(other))) {
1762                 if (timeo) {
1763                         timeo = unix_wait_for_peer(other, timeo);
1764
1765                         err = sock_intr_errno(timeo);
1766                         if (signal_pending(current))
1767                                 goto out_free;
1768
1769                         goto restart;
1770                 }
1771
1772                 if (!sk_locked) {
1773                         unix_state_unlock(other);
1774                         unix_state_double_lock(sk, other);
1775                 }
1776
1777                 if (unix_peer(sk) != other ||
1778                     unix_dgram_peer_wake_me(sk, other)) {
1779                         err = -EAGAIN;
1780                         sk_locked = 1;
1781                         goto out_unlock;
1782                 }
1783
1784                 if (!sk_locked) {
1785                         sk_locked = 1;
1786                         goto restart_locked;
1787                 }
1788         }
1789
1790         if (unlikely(sk_locked))
1791                 unix_state_unlock(sk);
1792
1793         if (sock_flag(other, SOCK_RCVTSTAMP))
1794                 __net_timestamp(skb);
1795         maybe_add_creds(skb, sock, other);
1796         scm_stat_add(other, skb);
1797         skb_queue_tail(&other->sk_receive_queue, skb);
1798         unix_state_unlock(other);
1799         other->sk_data_ready(other);
1800         sock_put(other);
1801         scm_destroy(&scm);
1802         return len;
1803
1804 out_unlock:
1805         if (sk_locked)
1806                 unix_state_unlock(sk);
1807         unix_state_unlock(other);
1808 out_free:
1809         kfree_skb(skb);
1810 out:
1811         if (other)
1812                 sock_put(other);
1813         scm_destroy(&scm);
1814         return err;
1815 }
1816
1817 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1818  * bytes, and a minimum of a full page.
1819  */
1820 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1821
1822 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1823                                size_t len)
1824 {
1825         struct sock *sk = sock->sk;
1826         struct sock *other = NULL;
1827         int err, size;
1828         struct sk_buff *skb;
1829         int sent = 0;
1830         struct scm_cookie scm;
1831         bool fds_sent = false;
1832         int data_len;
1833
1834         wait_for_unix_gc();
1835         err = scm_send(sock, msg, &scm, false);
1836         if (err < 0)
1837                 return err;
1838
1839         err = -EOPNOTSUPP;
1840         if (msg->msg_flags&MSG_OOB)
1841                 goto out_err;
1842
1843         if (msg->msg_namelen) {
1844                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1845                 goto out_err;
1846         } else {
1847                 err = -ENOTCONN;
1848                 other = unix_peer(sk);
1849                 if (!other)
1850                         goto out_err;
1851         }
1852
1853         if (sk->sk_shutdown & SEND_SHUTDOWN)
1854                 goto pipe_err;
1855
1856         while (sent < len) {
1857                 size = len - sent;
1858
1859                 /* Keep two messages in the pipe so it schedules better */
1860                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1861
1862                 /* allow fallback to order-0 allocations */
1863                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1864
1865                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1866
1867                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1868
1869                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1870                                            msg->msg_flags & MSG_DONTWAIT, &err,
1871                                            get_order(UNIX_SKB_FRAGS_SZ));
1872                 if (!skb)
1873                         goto out_err;
1874
1875                 /* Only send the fds in the first buffer */
1876                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1877                 if (err < 0) {
1878                         kfree_skb(skb);
1879                         goto out_err;
1880                 }
1881                 fds_sent = true;
1882
1883                 skb_put(skb, size - data_len);
1884                 skb->data_len = data_len;
1885                 skb->len = size;
1886                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1887                 if (err) {
1888                         kfree_skb(skb);
1889                         goto out_err;
1890                 }
1891
1892                 unix_state_lock(other);
1893
1894                 if (sock_flag(other, SOCK_DEAD) ||
1895                     (other->sk_shutdown & RCV_SHUTDOWN))
1896                         goto pipe_err_free;
1897
1898                 maybe_add_creds(skb, sock, other);
1899                 scm_stat_add(other, skb);
1900                 skb_queue_tail(&other->sk_receive_queue, skb);
1901                 unix_state_unlock(other);
1902                 other->sk_data_ready(other);
1903                 sent += size;
1904         }
1905
1906         scm_destroy(&scm);
1907
1908         return sent;
1909
1910 pipe_err_free:
1911         unix_state_unlock(other);
1912         kfree_skb(skb);
1913 pipe_err:
1914         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1915                 send_sig(SIGPIPE, current, 0);
1916         err = -EPIPE;
1917 out_err:
1918         scm_destroy(&scm);
1919         return sent ? : err;
1920 }
1921
1922 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1923                                     int offset, size_t size, int flags)
1924 {
1925         int err;
1926         bool send_sigpipe = false;
1927         bool init_scm = true;
1928         struct scm_cookie scm;
1929         struct sock *other, *sk = socket->sk;
1930         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1931
1932         if (flags & MSG_OOB)
1933                 return -EOPNOTSUPP;
1934
1935         other = unix_peer(sk);
1936         if (!other || sk->sk_state != TCP_ESTABLISHED)
1937                 return -ENOTCONN;
1938
1939         if (false) {
1940 alloc_skb:
1941                 unix_state_unlock(other);
1942                 mutex_unlock(&unix_sk(other)->iolock);
1943                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1944                                               &err, 0);
1945                 if (!newskb)
1946                         goto err;
1947         }
1948
1949         /* we must acquire iolock as we modify already present
1950          * skbs in the sk_receive_queue and mess with skb->len
1951          */
1952         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1953         if (err) {
1954                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1955                 goto err;
1956         }
1957
1958         if (sk->sk_shutdown & SEND_SHUTDOWN) {
1959                 err = -EPIPE;
1960                 send_sigpipe = true;
1961                 goto err_unlock;
1962         }
1963
1964         unix_state_lock(other);
1965
1966         if (sock_flag(other, SOCK_DEAD) ||
1967             other->sk_shutdown & RCV_SHUTDOWN) {
1968                 err = -EPIPE;
1969                 send_sigpipe = true;
1970                 goto err_state_unlock;
1971         }
1972
1973         if (init_scm) {
1974                 err = maybe_init_creds(&scm, socket, other);
1975                 if (err)
1976                         goto err_state_unlock;
1977                 init_scm = false;
1978         }
1979
1980         skb = skb_peek_tail(&other->sk_receive_queue);
1981         if (tail && tail == skb) {
1982                 skb = newskb;
1983         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1984                 if (newskb) {
1985                         skb = newskb;
1986                 } else {
1987                         tail = skb;
1988                         goto alloc_skb;
1989                 }
1990         } else if (newskb) {
1991                 /* this is fast path, we don't necessarily need to
1992                  * call to kfree_skb even though with newskb == NULL
1993                  * this - does no harm
1994                  */
1995                 consume_skb(newskb);
1996                 newskb = NULL;
1997         }
1998
1999         if (skb_append_pagefrags(skb, page, offset, size)) {
2000                 tail = skb;
2001                 goto alloc_skb;
2002         }
2003
2004         skb->len += size;
2005         skb->data_len += size;
2006         skb->truesize += size;
2007         refcount_add(size, &sk->sk_wmem_alloc);
2008
2009         if (newskb) {
2010                 err = unix_scm_to_skb(&scm, skb, false);
2011                 if (err)
2012                         goto err_state_unlock;
2013                 spin_lock(&other->sk_receive_queue.lock);
2014                 __skb_queue_tail(&other->sk_receive_queue, newskb);
2015                 spin_unlock(&other->sk_receive_queue.lock);
2016         }
2017
2018         unix_state_unlock(other);
2019         mutex_unlock(&unix_sk(other)->iolock);
2020
2021         other->sk_data_ready(other);
2022         scm_destroy(&scm);
2023         return size;
2024
2025 err_state_unlock:
2026         unix_state_unlock(other);
2027 err_unlock:
2028         mutex_unlock(&unix_sk(other)->iolock);
2029 err:
2030         kfree_skb(newskb);
2031         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2032                 send_sig(SIGPIPE, current, 0);
2033         if (!init_scm)
2034                 scm_destroy(&scm);
2035         return err;
2036 }
2037
2038 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2039                                   size_t len)
2040 {
2041         int err;
2042         struct sock *sk = sock->sk;
2043
2044         err = sock_error(sk);
2045         if (err)
2046                 return err;
2047
2048         if (sk->sk_state != TCP_ESTABLISHED)
2049                 return -ENOTCONN;
2050
2051         if (msg->msg_namelen)
2052                 msg->msg_namelen = 0;
2053
2054         return unix_dgram_sendmsg(sock, msg, len);
2055 }
2056
2057 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2058                                   size_t size, int flags)
2059 {
2060         struct sock *sk = sock->sk;
2061
2062         if (sk->sk_state != TCP_ESTABLISHED)
2063                 return -ENOTCONN;
2064
2065         return unix_dgram_recvmsg(sock, msg, size, flags);
2066 }
2067
2068 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2069 {
2070         struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2071
2072         if (addr) {
2073                 msg->msg_namelen = addr->len;
2074                 memcpy(msg->msg_name, addr->name, addr->len);
2075         }
2076 }
2077
2078 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2079                               size_t size, int flags)
2080 {
2081         struct scm_cookie scm;
2082         struct sock *sk = sock->sk;
2083         struct unix_sock *u = unix_sk(sk);
2084         struct sk_buff *skb, *last;
2085         long timeo;
2086         int skip;
2087         int err;
2088
2089         err = -EOPNOTSUPP;
2090         if (flags&MSG_OOB)
2091                 goto out;
2092
2093         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2094
2095         do {
2096                 mutex_lock(&u->iolock);
2097
2098                 skip = sk_peek_offset(sk, flags);
2099                 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2100                                               &skip, &err, &last);
2101                 if (skb) {
2102                         if (!(flags & MSG_PEEK))
2103                                 scm_stat_del(sk, skb);
2104                         break;
2105                 }
2106
2107                 mutex_unlock(&u->iolock);
2108
2109                 if (err != -EAGAIN)
2110                         break;
2111         } while (timeo &&
2112                  !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2113                                               &err, &timeo, last));
2114
2115         if (!skb) { /* implies iolock unlocked */
2116                 unix_state_lock(sk);
2117                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2118                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2119                     (sk->sk_shutdown & RCV_SHUTDOWN))
2120                         err = 0;
2121                 unix_state_unlock(sk);
2122                 goto out;
2123         }
2124
2125         if (wq_has_sleeper(&u->peer_wait))
2126                 wake_up_interruptible_sync_poll(&u->peer_wait,
2127                                                 EPOLLOUT | EPOLLWRNORM |
2128                                                 EPOLLWRBAND);
2129
2130         if (msg->msg_name)
2131                 unix_copy_addr(msg, skb->sk);
2132
2133         if (size > skb->len - skip)
2134                 size = skb->len - skip;
2135         else if (size < skb->len - skip)
2136                 msg->msg_flags |= MSG_TRUNC;
2137
2138         err = skb_copy_datagram_msg(skb, skip, msg, size);
2139         if (err)
2140                 goto out_free;
2141
2142         if (sock_flag(sk, SOCK_RCVTSTAMP))
2143                 __sock_recv_timestamp(msg, sk, skb);
2144
2145         memset(&scm, 0, sizeof(scm));
2146
2147         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2148         unix_set_secdata(&scm, skb);
2149
2150         if (!(flags & MSG_PEEK)) {
2151                 if (UNIXCB(skb).fp)
2152                         unix_detach_fds(&scm, skb);
2153
2154                 sk_peek_offset_bwd(sk, skb->len);
2155         } else {
2156                 /* It is questionable: on PEEK we could:
2157                    - do not return fds - good, but too simple 8)
2158                    - return fds, and do not return them on read (old strategy,
2159                      apparently wrong)
2160                    - clone fds (I chose it for now, it is the most universal
2161                      solution)
2162
2163                    POSIX 1003.1g does not actually define this clearly
2164                    at all. POSIX 1003.1g doesn't define a lot of things
2165                    clearly however!
2166
2167                 */
2168
2169                 sk_peek_offset_fwd(sk, size);
2170
2171                 if (UNIXCB(skb).fp)
2172                         scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2173         }
2174         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2175
2176         scm_recv(sock, msg, &scm, flags);
2177
2178 out_free:
2179         skb_free_datagram(sk, skb);
2180         mutex_unlock(&u->iolock);
2181 out:
2182         return err;
2183 }
2184
2185 /*
2186  *      Sleep until more data has arrived. But check for races..
2187  */
2188 static long unix_stream_data_wait(struct sock *sk, long timeo,
2189                                   struct sk_buff *last, unsigned int last_len,
2190                                   bool freezable)
2191 {
2192         struct sk_buff *tail;
2193         DEFINE_WAIT(wait);
2194
2195         unix_state_lock(sk);
2196
2197         for (;;) {
2198                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2199
2200                 tail = skb_peek_tail(&sk->sk_receive_queue);
2201                 if (tail != last ||
2202                     (tail && tail->len != last_len) ||
2203                     sk->sk_err ||
2204                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2205                     signal_pending(current) ||
2206                     !timeo)
2207                         break;
2208
2209                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2210                 unix_state_unlock(sk);
2211                 if (freezable)
2212                         timeo = freezable_schedule_timeout(timeo);
2213                 else
2214                         timeo = schedule_timeout(timeo);
2215                 unix_state_lock(sk);
2216
2217                 if (sock_flag(sk, SOCK_DEAD))
2218                         break;
2219
2220                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2221         }
2222
2223         finish_wait(sk_sleep(sk), &wait);
2224         unix_state_unlock(sk);
2225         return timeo;
2226 }
2227
2228 static unsigned int unix_skb_len(const struct sk_buff *skb)
2229 {
2230         return skb->len - UNIXCB(skb).consumed;
2231 }
2232
2233 struct unix_stream_read_state {
2234         int (*recv_actor)(struct sk_buff *, int, int,
2235                           struct unix_stream_read_state *);
2236         struct socket *socket;
2237         struct msghdr *msg;
2238         struct pipe_inode_info *pipe;
2239         size_t size;
2240         int flags;
2241         unsigned int splice_flags;
2242 };
2243
2244 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2245                                     bool freezable)
2246 {
2247         struct scm_cookie scm;
2248         struct socket *sock = state->socket;
2249         struct sock *sk = sock->sk;
2250         struct unix_sock *u = unix_sk(sk);
2251         int copied = 0;
2252         int flags = state->flags;
2253         int noblock = flags & MSG_DONTWAIT;
2254         bool check_creds = false;
2255         int target;
2256         int err = 0;
2257         long timeo;
2258         int skip;
2259         size_t size = state->size;
2260         unsigned int last_len;
2261
2262         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2263                 err = -EINVAL;
2264                 goto out;
2265         }
2266
2267         if (unlikely(flags & MSG_OOB)) {
2268                 err = -EOPNOTSUPP;
2269                 goto out;
2270         }
2271
2272         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2273         timeo = sock_rcvtimeo(sk, noblock);
2274
2275         memset(&scm, 0, sizeof(scm));
2276
2277         /* Lock the socket to prevent queue disordering
2278          * while sleeps in memcpy_tomsg
2279          */
2280         mutex_lock(&u->iolock);
2281
2282         skip = max(sk_peek_offset(sk, flags), 0);
2283
2284         do {
2285                 int chunk;
2286                 bool drop_skb;
2287                 struct sk_buff *skb, *last;
2288
2289 redo:
2290                 unix_state_lock(sk);
2291                 if (sock_flag(sk, SOCK_DEAD)) {
2292                         err = -ECONNRESET;
2293                         goto unlock;
2294                 }
2295                 last = skb = skb_peek(&sk->sk_receive_queue);
2296                 last_len = last ? last->len : 0;
2297 again:
2298                 if (skb == NULL) {
2299                         if (copied >= target)
2300                                 goto unlock;
2301
2302                         /*
2303                          *      POSIX 1003.1g mandates this order.
2304                          */
2305
2306                         err = sock_error(sk);
2307                         if (err)
2308                                 goto unlock;
2309                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2310                                 goto unlock;
2311
2312                         unix_state_unlock(sk);
2313                         if (!timeo) {
2314                                 err = -EAGAIN;
2315                                 break;
2316                         }
2317
2318                         mutex_unlock(&u->iolock);
2319
2320                         timeo = unix_stream_data_wait(sk, timeo, last,
2321                                                       last_len, freezable);
2322
2323                         if (signal_pending(current)) {
2324                                 err = sock_intr_errno(timeo);
2325                                 scm_destroy(&scm);
2326                                 goto out;
2327                         }
2328
2329                         mutex_lock(&u->iolock);
2330                         goto redo;
2331 unlock:
2332                         unix_state_unlock(sk);
2333                         break;
2334                 }
2335
2336                 while (skip >= unix_skb_len(skb)) {
2337                         skip -= unix_skb_len(skb);
2338                         last = skb;
2339                         last_len = skb->len;
2340                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2341                         if (!skb)
2342                                 goto again;
2343                 }
2344
2345                 unix_state_unlock(sk);
2346
2347                 if (check_creds) {
2348                         /* Never glue messages from different writers */
2349                         if (!unix_skb_scm_eq(skb, &scm))
2350                                 break;
2351                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2352                         /* Copy credentials */
2353                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2354                         unix_set_secdata(&scm, skb);
2355                         check_creds = true;
2356                 }
2357
2358                 /* Copy address just once */
2359                 if (state->msg && state->msg->msg_name) {
2360                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2361                                          state->msg->msg_name);
2362                         unix_copy_addr(state->msg, skb->sk);
2363                         sunaddr = NULL;
2364                 }
2365
2366                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2367                 skb_get(skb);
2368                 chunk = state->recv_actor(skb, skip, chunk, state);
2369                 drop_skb = !unix_skb_len(skb);
2370                 /* skb is only safe to use if !drop_skb */
2371                 consume_skb(skb);
2372                 if (chunk < 0) {
2373                         if (copied == 0)
2374                                 copied = -EFAULT;
2375                         break;
2376                 }
2377                 copied += chunk;
2378                 size -= chunk;
2379
2380                 if (drop_skb) {
2381                         /* the skb was touched by a concurrent reader;
2382                          * we should not expect anything from this skb
2383                          * anymore and assume it invalid - we can be
2384                          * sure it was dropped from the socket queue
2385                          *
2386                          * let's report a short read
2387                          */
2388                         err = 0;
2389                         break;
2390                 }
2391
2392                 /* Mark read part of skb as used */
2393                 if (!(flags & MSG_PEEK)) {
2394                         UNIXCB(skb).consumed += chunk;
2395
2396                         sk_peek_offset_bwd(sk, chunk);
2397
2398                         if (UNIXCB(skb).fp) {
2399                                 scm_stat_del(sk, skb);
2400                                 unix_detach_fds(&scm, skb);
2401                         }
2402
2403                         if (unix_skb_len(skb))
2404                                 break;
2405
2406                         skb_unlink(skb, &sk->sk_receive_queue);
2407                         consume_skb(skb);
2408
2409                         if (scm.fp)
2410                                 break;
2411                 } else {
2412                         /* It is questionable, see note in unix_dgram_recvmsg.
2413                          */
2414                         if (UNIXCB(skb).fp)
2415                                 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2416
2417                         sk_peek_offset_fwd(sk, chunk);
2418
2419                         if (UNIXCB(skb).fp)
2420                                 break;
2421
2422                         skip = 0;
2423                         last = skb;
2424                         last_len = skb->len;
2425                         unix_state_lock(sk);
2426                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2427                         if (skb)
2428                                 goto again;
2429                         unix_state_unlock(sk);
2430                         break;
2431                 }
2432         } while (size);
2433
2434         mutex_unlock(&u->iolock);
2435         if (state->msg)
2436                 scm_recv(sock, state->msg, &scm, flags);
2437         else
2438                 scm_destroy(&scm);
2439 out:
2440         return copied ? : err;
2441 }
2442
2443 static int unix_stream_read_actor(struct sk_buff *skb,
2444                                   int skip, int chunk,
2445                                   struct unix_stream_read_state *state)
2446 {
2447         int ret;
2448
2449         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2450                                     state->msg, chunk);
2451         return ret ?: chunk;
2452 }
2453
2454 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2455                                size_t size, int flags)
2456 {
2457         struct unix_stream_read_state state = {
2458                 .recv_actor = unix_stream_read_actor,
2459                 .socket = sock,
2460                 .msg = msg,
2461                 .size = size,
2462                 .flags = flags
2463         };
2464
2465         return unix_stream_read_generic(&state, true);
2466 }
2467
2468 static int unix_stream_splice_actor(struct sk_buff *skb,
2469                                     int skip, int chunk,
2470                                     struct unix_stream_read_state *state)
2471 {
2472         return skb_splice_bits(skb, state->socket->sk,
2473                                UNIXCB(skb).consumed + skip,
2474                                state->pipe, chunk, state->splice_flags);
2475 }
2476
2477 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2478                                        struct pipe_inode_info *pipe,
2479                                        size_t size, unsigned int flags)
2480 {
2481         struct unix_stream_read_state state = {
2482                 .recv_actor = unix_stream_splice_actor,
2483                 .socket = sock,
2484                 .pipe = pipe,
2485                 .size = size,
2486                 .splice_flags = flags,
2487         };
2488
2489         if (unlikely(*ppos))
2490                 return -ESPIPE;
2491
2492         if (sock->file->f_flags & O_NONBLOCK ||
2493             flags & SPLICE_F_NONBLOCK)
2494                 state.flags = MSG_DONTWAIT;
2495
2496         return unix_stream_read_generic(&state, false);
2497 }
2498
2499 static int unix_shutdown(struct socket *sock, int mode)
2500 {
2501         struct sock *sk = sock->sk;
2502         struct sock *other;
2503
2504         if (mode < SHUT_RD || mode > SHUT_RDWR)
2505                 return -EINVAL;
2506         /* This maps:
2507          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2508          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2509          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2510          */
2511         ++mode;
2512
2513         unix_state_lock(sk);
2514         sk->sk_shutdown |= mode;
2515         other = unix_peer(sk);
2516         if (other)
2517                 sock_hold(other);
2518         unix_state_unlock(sk);
2519         sk->sk_state_change(sk);
2520
2521         if (other &&
2522                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2523
2524                 int peer_mode = 0;
2525
2526                 if (mode&RCV_SHUTDOWN)
2527                         peer_mode |= SEND_SHUTDOWN;
2528                 if (mode&SEND_SHUTDOWN)
2529                         peer_mode |= RCV_SHUTDOWN;
2530                 unix_state_lock(other);
2531                 other->sk_shutdown |= peer_mode;
2532                 unix_state_unlock(other);
2533                 other->sk_state_change(other);
2534                 if (peer_mode == SHUTDOWN_MASK)
2535                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2536                 else if (peer_mode & RCV_SHUTDOWN)
2537                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2538         }
2539         if (other)
2540                 sock_put(other);
2541
2542         return 0;
2543 }
2544
2545 long unix_inq_len(struct sock *sk)
2546 {
2547         struct sk_buff *skb;
2548         long amount = 0;
2549
2550         if (sk->sk_state == TCP_LISTEN)
2551                 return -EINVAL;
2552
2553         spin_lock(&sk->sk_receive_queue.lock);
2554         if (sk->sk_type == SOCK_STREAM ||
2555             sk->sk_type == SOCK_SEQPACKET) {
2556                 skb_queue_walk(&sk->sk_receive_queue, skb)
2557                         amount += unix_skb_len(skb);
2558         } else {
2559                 skb = skb_peek(&sk->sk_receive_queue);
2560                 if (skb)
2561                         amount = skb->len;
2562         }
2563         spin_unlock(&sk->sk_receive_queue.lock);
2564
2565         return amount;
2566 }
2567 EXPORT_SYMBOL_GPL(unix_inq_len);
2568
2569 long unix_outq_len(struct sock *sk)
2570 {
2571         return sk_wmem_alloc_get(sk);
2572 }
2573 EXPORT_SYMBOL_GPL(unix_outq_len);
2574
2575 static int unix_open_file(struct sock *sk)
2576 {
2577         struct path path;
2578         struct file *f;
2579         int fd;
2580
2581         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2582                 return -EPERM;
2583
2584         if (!smp_load_acquire(&unix_sk(sk)->addr))
2585                 return -ENOENT;
2586
2587         path = unix_sk(sk)->path;
2588         if (!path.dentry)
2589                 return -ENOENT;
2590
2591         path_get(&path);
2592
2593         fd = get_unused_fd_flags(O_CLOEXEC);
2594         if (fd < 0)
2595                 goto out;
2596
2597         f = dentry_open(&path, O_PATH, current_cred());
2598         if (IS_ERR(f)) {
2599                 put_unused_fd(fd);
2600                 fd = PTR_ERR(f);
2601                 goto out;
2602         }
2603
2604         fd_install(fd, f);
2605 out:
2606         path_put(&path);
2607
2608         return fd;
2609 }
2610
2611 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2612 {
2613         struct sock *sk = sock->sk;
2614         long amount = 0;
2615         int err;
2616
2617         switch (cmd) {
2618         case SIOCOUTQ:
2619                 amount = unix_outq_len(sk);
2620                 err = put_user(amount, (int __user *)arg);
2621                 break;
2622         case SIOCINQ:
2623                 amount = unix_inq_len(sk);
2624                 if (amount < 0)
2625                         err = amount;
2626                 else
2627                         err = put_user(amount, (int __user *)arg);
2628                 break;
2629         case SIOCUNIXFILE:
2630                 err = unix_open_file(sk);
2631                 break;
2632         default:
2633                 err = -ENOIOCTLCMD;
2634                 break;
2635         }
2636         return err;
2637 }
2638
2639 #ifdef CONFIG_COMPAT
2640 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2641 {
2642         return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
2643 }
2644 #endif
2645
2646 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2647 {
2648         struct sock *sk = sock->sk;
2649         __poll_t mask;
2650
2651         sock_poll_wait(file, sock, wait);
2652         mask = 0;
2653
2654         /* exceptional events? */
2655         if (sk->sk_err)
2656                 mask |= EPOLLERR;
2657         if (sk->sk_shutdown == SHUTDOWN_MASK)
2658                 mask |= EPOLLHUP;
2659         if (sk->sk_shutdown & RCV_SHUTDOWN)
2660                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2661
2662         /* readable? */
2663         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2664                 mask |= EPOLLIN | EPOLLRDNORM;
2665
2666         /* Connection-based need to check for termination and startup */
2667         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2668             sk->sk_state == TCP_CLOSE)
2669                 mask |= EPOLLHUP;
2670
2671         /*
2672          * we set writable also when the other side has shut down the
2673          * connection. This prevents stuck sockets.
2674          */
2675         if (unix_writable(sk))
2676                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2677
2678         return mask;
2679 }
2680
2681 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2682                                     poll_table *wait)
2683 {
2684         struct sock *sk = sock->sk, *other;
2685         unsigned int writable;
2686         __poll_t mask;
2687
2688         sock_poll_wait(file, sock, wait);
2689         mask = 0;
2690
2691         /* exceptional events? */
2692         if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
2693                 mask |= EPOLLERR |
2694                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2695
2696         if (sk->sk_shutdown & RCV_SHUTDOWN)
2697                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2698         if (sk->sk_shutdown == SHUTDOWN_MASK)
2699                 mask |= EPOLLHUP;
2700
2701         /* readable? */
2702         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2703                 mask |= EPOLLIN | EPOLLRDNORM;
2704
2705         /* Connection-based need to check for termination and startup */
2706         if (sk->sk_type == SOCK_SEQPACKET) {
2707                 if (sk->sk_state == TCP_CLOSE)
2708                         mask |= EPOLLHUP;
2709                 /* connection hasn't started yet? */
2710                 if (sk->sk_state == TCP_SYN_SENT)
2711                         return mask;
2712         }
2713
2714         /* No write status requested, avoid expensive OUT tests. */
2715         if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2716                 return mask;
2717
2718         writable = unix_writable(sk);
2719         if (writable) {
2720                 unix_state_lock(sk);
2721
2722                 other = unix_peer(sk);
2723                 if (other && unix_peer(other) != sk &&
2724                     unix_recvq_full(other) &&
2725                     unix_dgram_peer_wake_me(sk, other))
2726                         writable = 0;
2727
2728                 unix_state_unlock(sk);
2729         }
2730
2731         if (writable)
2732                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2733         else
2734                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2735
2736         return mask;
2737 }
2738
2739 #ifdef CONFIG_PROC_FS
2740
2741 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2742
2743 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2744 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2745 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2746
2747 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2748 {
2749         unsigned long offset = get_offset(*pos);
2750         unsigned long bucket = get_bucket(*pos);
2751         struct sock *sk;
2752         unsigned long count = 0;
2753
2754         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2755                 if (sock_net(sk) != seq_file_net(seq))
2756                         continue;
2757                 if (++count == offset)
2758                         break;
2759         }
2760
2761         return sk;
2762 }
2763
2764 static struct sock *unix_next_socket(struct seq_file *seq,
2765                                      struct sock *sk,
2766                                      loff_t *pos)
2767 {
2768         unsigned long bucket;
2769
2770         while (sk > (struct sock *)SEQ_START_TOKEN) {
2771                 sk = sk_next(sk);
2772                 if (!sk)
2773                         goto next_bucket;
2774                 if (sock_net(sk) == seq_file_net(seq))
2775                         return sk;
2776         }
2777
2778         do {
2779                 sk = unix_from_bucket(seq, pos);
2780                 if (sk)
2781                         return sk;
2782
2783 next_bucket:
2784                 bucket = get_bucket(*pos) + 1;
2785                 *pos = set_bucket_offset(bucket, 1);
2786         } while (bucket < ARRAY_SIZE(unix_socket_table));
2787
2788         return NULL;
2789 }
2790
2791 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2792         __acquires(unix_table_lock)
2793 {
2794         spin_lock(&unix_table_lock);
2795
2796         if (!*pos)
2797                 return SEQ_START_TOKEN;
2798
2799         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2800                 return NULL;
2801
2802         return unix_next_socket(seq, NULL, pos);
2803 }
2804
2805 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2806 {
2807         ++*pos;
2808         return unix_next_socket(seq, v, pos);
2809 }
2810
2811 static void unix_seq_stop(struct seq_file *seq, void *v)
2812         __releases(unix_table_lock)
2813 {
2814         spin_unlock(&unix_table_lock);
2815 }
2816
2817 static int unix_seq_show(struct seq_file *seq, void *v)
2818 {
2819
2820         if (v == SEQ_START_TOKEN)
2821                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2822                          "Inode Path\n");
2823         else {
2824                 struct sock *s = v;
2825                 struct unix_sock *u = unix_sk(s);
2826                 unix_state_lock(s);
2827
2828                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2829                         s,
2830                         refcount_read(&s->sk_refcnt),
2831                         0,
2832                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2833                         s->sk_type,
2834                         s->sk_socket ?
2835                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2836                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2837                         sock_i_ino(s));
2838
2839                 if (u->addr) {  // under unix_table_lock here
2840                         int i, len;
2841                         seq_putc(seq, ' ');
2842
2843                         i = 0;
2844                         len = u->addr->len - sizeof(short);
2845                         if (!UNIX_ABSTRACT(s))
2846                                 len--;
2847                         else {
2848                                 seq_putc(seq, '@');
2849                                 i++;
2850                         }
2851                         for ( ; i < len; i++)
2852                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
2853                                          '@');
2854                 }
2855                 unix_state_unlock(s);
2856                 seq_putc(seq, '\n');
2857         }
2858
2859         return 0;
2860 }
2861
2862 static const struct seq_operations unix_seq_ops = {
2863         .start  = unix_seq_start,
2864         .next   = unix_seq_next,
2865         .stop   = unix_seq_stop,
2866         .show   = unix_seq_show,
2867 };
2868 #endif
2869
2870 static const struct net_proto_family unix_family_ops = {
2871         .family = PF_UNIX,
2872         .create = unix_create,
2873         .owner  = THIS_MODULE,
2874 };
2875
2876
2877 static int __net_init unix_net_init(struct net *net)
2878 {
2879         int error = -ENOMEM;
2880
2881         net->unx.sysctl_max_dgram_qlen = 10;
2882         if (unix_sysctl_register(net))
2883                 goto out;
2884
2885 #ifdef CONFIG_PROC_FS
2886         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2887                         sizeof(struct seq_net_private))) {
2888                 unix_sysctl_unregister(net);
2889                 goto out;
2890         }
2891 #endif
2892         error = 0;
2893 out:
2894         return error;
2895 }
2896
2897 static void __net_exit unix_net_exit(struct net *net)
2898 {
2899         unix_sysctl_unregister(net);
2900         remove_proc_entry("unix", net->proc_net);
2901 }
2902
2903 static struct pernet_operations unix_net_ops = {
2904         .init = unix_net_init,
2905         .exit = unix_net_exit,
2906 };
2907
2908 static int __init af_unix_init(void)
2909 {
2910         int rc = -1;
2911
2912         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
2913
2914         rc = proto_register(&unix_proto, 1);
2915         if (rc != 0) {
2916                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2917                 goto out;
2918         }
2919
2920         sock_register(&unix_family_ops);
2921         register_pernet_subsys(&unix_net_ops);
2922 out:
2923         return rc;
2924 }
2925
2926 static void __exit af_unix_exit(void)
2927 {
2928         sock_unregister(PF_UNIX);
2929         proto_unregister(&unix_proto);
2930         unregister_pernet_subsys(&unix_net_ops);
2931 }
2932
2933 /* Earlier than device_initcall() so that other drivers invoking
2934    request_module() don't end up in a loop when modprobe tries
2935    to use a UNIX socket. But later than subsys_initcall() because
2936    we depend on stuff initialised there */
2937 fs_initcall(af_unix_init);
2938 module_exit(af_unix_exit);
2939
2940 MODULE_LICENSE("GPL");
2941 MODULE_ALIAS_NETPROTO(PF_UNIX);