net/unix/af_unix.c

   1 /*
   2  * NET4:        Implementation of BSD Unix domain sockets.
   3  *
   4  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5  *
   6  *              This program is free software; you can redistribute it and/or
   7  *              modify it under the terms of the GNU General Public License
   8  *              as published by the Free Software Foundation; either version
   9  *              2 of the License, or (at your option) any later version.
  10  *
  11  * Fixes:
  12  *              Linus Torvalds  :       Assorted bug cures.
  13  *              Niibe Yutaka    :       async I/O support.
  14  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  15  *              Alan Cox        :       Limit size of allocated blocks.
  16  *              Alan Cox        :       Fixed the stupid socketpair bug.
  17  *              Alan Cox        :       BSD compatibility fine tuning.
  18  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  19  *              Alan Cox        :       Sorted out a proper draft version of
  20  *                                      file descriptor passing hacked up from
  21  *                                      Mike Shaver's work.
  22  *              Marty Leisner   :       Fixes to fd passing
  23  *              Nick Nevin      :       recvmsg bugfix.
  24  *              Alan Cox        :       Started proper garbage collector
  25  *              Heiko EiBfeldt  :       Missing verify_area check
  26  *              Alan Cox        :       Started POSIXisms
  27  *              Andreas Schwab  :       Replace inode by dentry for proper
  28  *                                      reference counting
  29  *              Kirk Petersen   :       Made this a module
  30  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  31  *                                      Lots of bug fixes.
  32  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  33  *                                      by above two patches.
  34  *           Andrea Arcangeli   :       If possible we block in connect(2)
  35  *                                      if the max backlog of the listen socket
  36  *                                      is been reached. This won't break
  37  *                                      old apps and it will avoid huge amount
  38  *                                      of socks hashed (this for unix_gc()
  39  *                                      performances reasons).
  40  *                                      Security fix that limits the max
  41  *                                      number of socks to 2*max_files and
  42  *                                      the number of skb queueable in the
  43  *                                      dgram receiver.
  44  *              Artur Skawina   :       Hash function optimizations
  45  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  46  *            Malcolm Beattie   :       Set peercred for socketpair
  47  *           Michal Ostrowski   :       Module initialization cleanup.
  48  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  49  *                                      the core infrastructure is doing that
  50  *                                      for all net proto families now (2.5.69+)
  51  *
  52  *
  53  * Known differences from reference BSD that was tested:
  54  *
  55  *      [TO FIX]
  56  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  57  *              other the moment one end closes.
  58  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  59  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  60  *      [NOT TO FIX]
  61  *      accept() returns a path name even if the connecting socket has closed
  62  *              in the meantime (BSD loses the path and gives up).
  63  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  64  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  65  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  66  *      BSD af_unix apparently has connect forgetting to block properly.
  67  *              (need to check this with the POSIX spec in detail)
  68  *
  69  * Differences from 2.0.0-11-... (ANK)
  70  *      Bug fixes and improvements.
  71  *              - client shutdown killed server socket.
  72  *              - removed all useless cli/sti pairs.
  73  *
  74  *      Semantic changes/extensions.
  75  *              - generic control message passing.
  76  *              - SCM_CREDENTIALS control message.
  77  *              - "Abstract" (not FS based) socket bindings.
  78  *                Abstract names are sequences of bytes (not zero terminated)
  79  *                started by 0, so that this name space does not intersect
  80  *                with BSD names.
  81  */
  82
  83 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  84
  85 #include <linux/module.h>
  86 #include <linux/kernel.h>
  87 #include <linux/signal.h>
  88 #include <linux/sched/signal.h>
  89 #include <linux/errno.h>
  90 #include <linux/string.h>
  91 #include <linux/stat.h>
  92 #include <linux/dcache.h>
  93 #include <linux/namei.h>
  94 #include <linux/socket.h>
  95 #include <linux/un.h>
  96 #include <linux/fcntl.h>
  97 #include <linux/termios.h>
  98 #include <linux/sockios.h>
  99 #include <linux/net.h>
 100 #include <linux/in.h>
 101 #include <linux/fs.h>
 102 #include <linux/slab.h>
 103 #include <linux/uaccess.h>
 104 #include <linux/skbuff.h>
 105 #include <linux/netdevice.h>
 106 #include <net/net_namespace.h>
 107 #include <net/sock.h>
 108 #include <net/tcp_states.h>
 109 #include <net/af_unix.h>
 110 #include <linux/proc_fs.h>
 111 #include <linux/seq_file.h>
 112 #include <net/scm.h>
 113 #include <linux/init.h>
 114 #include <linux/poll.h>
 115 #include <linux/rtnetlink.h>
 116 #include <linux/mount.h>
 117 #include <net/checksum.h>
 118 #include <linux/security.h>
 119 #include <linux/freezer.h>
 120 #include <linux/file.h>
 121
 122 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 123 EXPORT_SYMBOL_GPL(unix_socket_table);
 124 DEFINE_SPINLOCK(unix_table_lock);
 125 EXPORT_SYMBOL_GPL(unix_table_lock);
 126 static atomic_long_t unix_nr_socks;
 127
 128
 129 static struct hlist_head *unix_sockets_unbound(void *addr)
 130 {
 131         unsigned long hash = (unsigned long)addr;
 132
 133         hash ^= hash >> 16;
 134         hash ^= hash >> 8;
 135         hash %= UNIX_HASH_SIZE;
 136         return &unix_socket_table[UNIX_HASH_SIZE + hash];
 137 }
 138
 139 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 140
 141 #ifdef CONFIG_SECURITY_NETWORK
 142 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 143 {
 144         UNIXCB(skb).secid = scm->secid;
 145 }
 146
 147 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 148 {
 149         scm->secid = UNIXCB(skb).secid;
 150 }
 151
 152 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 153 {
 154         return (scm->secid == UNIXCB(skb).secid);
 155 }
 156 #else
 157 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 158 { }
 159
 160 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 161 { }
 162
 163 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 164 {
 165         return true;
 166 }
 167 #endif /* CONFIG_SECURITY_NETWORK */
 168
 169 /*
 170  *  SMP locking strategy:
 171  *    hash table is protected with spinlock unix_table_lock
 172  *    each socket state is protected by separate spin lock.
 173  */
 174
 175 static inline unsigned int unix_hash_fold(__wsum n)
 176 {
 177         unsigned int hash = (__force unsigned int)csum_fold(n);
 178
 179         hash ^= hash>>8;
 180         return hash&(UNIX_HASH_SIZE-1);
 181 }
 182
 183 #define unix_peer(sk) (unix_sk(sk)->peer)
 184
 185 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 186 {
 187         return unix_peer(osk) == sk;
 188 }
 189
 190 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 191 {
 192         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 193 }
 194
 195 static inline int unix_recvq_full(struct sock const *sk)
 196 {
 197         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 198 }
 199
 200 struct sock *unix_peer_get(struct sock *s)
 201 {
 202         struct sock *peer;
 203
 204         unix_state_lock(s);
 205         peer = unix_peer(s);
 206         if (peer)
 207                 sock_hold(peer);
 208         unix_state_unlock(s);
 209         return peer;
 210 }
 211 EXPORT_SYMBOL_GPL(unix_peer_get);
 212
 213 static inline void unix_release_addr(struct unix_address *addr)
 214 {
 215         if (refcount_dec_and_test(&addr->refcnt))
 216                 kfree(addr);
 217 }
 218
 219 /*
 220  *      Check unix socket name:
 221  *              - should be not zero length.
 222  *              - if started by not zero, should be NULL terminated (FS object)
 223  *              - if started by zero, it is abstract name.
 224  */
 225
 226 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 227 {
 228         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 229                 return -EINVAL;
 230         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 231                 return -EINVAL;
 232         if (sunaddr->sun_path[0]) {
 233                 /*
 234                  * This may look like an off by one error but it is a bit more
 235                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 236                  * sun_path[108] doesn't as such exist.  However in kernel space
 237                  * we are guaranteed that it is a valid memory location in our
 238                  * kernel address buffer.
 239                  */
 240                 ((char *)sunaddr)[len] = 0;
 241                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 242                 return len;
 243         }
 244
 245         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 246         return len;
 247 }
 248
 249 static void __unix_remove_socket(struct sock *sk)
 250 {
 251         sk_del_node_init(sk);
 252 }
 253
 254 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 255 {
 256         WARN_ON(!sk_unhashed(sk));
 257         sk_add_node(sk, list);
 258 }
 259
 260 static inline void unix_remove_socket(struct sock *sk)
 261 {
 262         spin_lock(&unix_table_lock);
 263         __unix_remove_socket(sk);
 264         spin_unlock(&unix_table_lock);
 265 }
 266
 267 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 268 {
 269         spin_lock(&unix_table_lock);
 270         __unix_insert_socket(list, sk);
 271         spin_unlock(&unix_table_lock);
 272 }
 273
 274 static struct sock *__unix_find_socket_byname(struct net *net,
 275                                               struct sockaddr_un *sunname,
 276                                               int len, int type, unsigned int hash)
 277 {
 278         struct sock *s;
 279
 280         sk_for_each(s, &unix_socket_table[hash ^ type]) {
 281                 struct unix_sock *u = unix_sk(s);
 282
 283                 if (!net_eq(sock_net(s), net))
 284                         continue;
 285
 286                 if (u->addr->len == len &&
 287                     !memcmp(u->addr->name, sunname, len))
 288                         goto found;
 289         }
 290         s = NULL;
 291 found:
 292         return s;
 293 }
 294
 295 static inline struct sock *unix_find_socket_byname(struct net *net,
 296                                                    struct sockaddr_un *sunname,
 297                                                    int len, int type,
 298                                                    unsigned int hash)
 299 {
 300         struct sock *s;
 301
 302         spin_lock(&unix_table_lock);
 303         s = __unix_find_socket_byname(net, sunname, len, type, hash);
 304         if (s)
 305                 sock_hold(s);
 306         spin_unlock(&unix_table_lock);
 307         return s;
 308 }
 309
 310 static struct sock *unix_find_socket_byinode(struct inode *i)
 311 {
 312         struct sock *s;
 313
 314         spin_lock(&unix_table_lock);
 315         sk_for_each(s,
 316                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 317                 struct dentry *dentry = unix_sk(s)->path.dentry;
 318
 319                 if (dentry && d_backing_inode(dentry) == i) {
 320                         sock_hold(s);
 321                         goto found;
 322                 }
 323         }
 324         s = NULL;
 325 found:
 326         spin_unlock(&unix_table_lock);
 327         return s;
 328 }
 329
 330 /* Support code for asymmetrically connected dgram sockets
 331  *
 332  * If a datagram socket is connected to a socket not itself connected
 333  * to the first socket (eg, /dev/log), clients may only enqueue more
 334  * messages if the present receive queue of the server socket is not
 335  * "too large". This means there's a second writeability condition
 336  * poll and sendmsg need to test. The dgram recv code will do a wake
 337  * up on the peer_wait wait queue of a socket upon reception of a
 338  * datagram which needs to be propagated to sleeping would-be writers
 339  * since these might not have sent anything so far. This can't be
 340  * accomplished via poll_wait because the lifetime of the server
 341  * socket might be less than that of its clients if these break their
 342  * association with it or if the server socket is closed while clients
 343  * are still connected to it and there's no way to inform "a polling
 344  * implementation" that it should let go of a certain wait queue
 345  *
 346  * In order to propagate a wake up, a wait_queue_entry_t of the client
 347  * socket is enqueued on the peer_wait queue of the server socket
 348  * whose wake function does a wake_up on the ordinary client socket
 349  * wait queue. This connection is established whenever a write (or
 350  * poll for write) hit the flow control condition and broken when the
 351  * association to the server socket is dissolved or after a wake up
 352  * was relayed.
 353  */
 354
 355 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 356                                       void *key)
 357 {
 358         struct unix_sock *u;
 359         wait_queue_head_t *u_sleep;
 360
 361         u = container_of(q, struct unix_sock, peer_wake);
 362
 363         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 364                             q);
 365         u->peer_wake.private = NULL;
 366
 367         /* relaying can only happen while the wq still exists */
 368         u_sleep = sk_sleep(&u->sk);
 369         if (u_sleep)
 370                 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
 371
 372         return 0;
 373 }
 374
 375 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 376 {
 377         struct unix_sock *u, *u_other;
 378         int rc;
 379
 380         u = unix_sk(sk);
 381         u_other = unix_sk(other);
 382         rc = 0;
 383         spin_lock(&u_other->peer_wait.lock);
 384
 385         if (!u->peer_wake.private) {
 386                 u->peer_wake.private = other;
 387                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 388
 389                 rc = 1;
 390         }
 391
 392         spin_unlock(&u_other->peer_wait.lock);
 393         return rc;
 394 }
 395
 396 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 397                                             struct sock *other)
 398 {
 399         struct unix_sock *u, *u_other;
 400
 401         u = unix_sk(sk);
 402         u_other = unix_sk(other);
 403         spin_lock(&u_other->peer_wait.lock);
 404
 405         if (u->peer_wake.private == other) {
 406                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 407                 u->peer_wake.private = NULL;
 408         }
 409
 410         spin_unlock(&u_other->peer_wait.lock);
 411 }
 412
 413 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 414                                                    struct sock *other)
 415 {
 416         unix_dgram_peer_wake_disconnect(sk, other);
 417         wake_up_interruptible_poll(sk_sleep(sk),
 418                                    EPOLLOUT |
 419                                    EPOLLWRNORM |
 420                                    EPOLLWRBAND);
 421 }
 422
 423 /* preconditions:
 424  *      - unix_peer(sk) == other
 425  *      - association is stable
 426  */
 427 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 428 {
 429         int connected;
 430
 431         connected = unix_dgram_peer_wake_connect(sk, other);
 432
 433         /* If other is SOCK_DEAD, we want to make sure we signal
 434          * POLLOUT, such that a subsequent write() can get a
 435          * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
 436          * to other and its full, we will hang waiting for POLLOUT.
 437          */
 438         if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
 439                 return 1;
 440
 441         if (connected)
 442                 unix_dgram_peer_wake_disconnect(sk, other);
 443
 444         return 0;
 445 }
 446
 447 static int unix_writable(const struct sock *sk)
 448 {
 449         return sk->sk_state != TCP_LISTEN &&
 450                (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 451 }
 452
 453 static void unix_write_space(struct sock *sk)
 454 {
 455         struct socket_wq *wq;
 456
 457         rcu_read_lock();
 458         if (unix_writable(sk)) {
 459                 wq = rcu_dereference(sk->sk_wq);
 460                 if (skwq_has_sleeper(wq))
 461                         wake_up_interruptible_sync_poll(&wq->wait,
 462                                 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
 463                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 464         }
 465         rcu_read_unlock();
 466 }
 467
 468 /* When dgram socket disconnects (or changes its peer), we clear its receive
 469  * queue of packets arrived from previous peer. First, it allows to do
 470  * flow control based only on wmem_alloc; second, sk connected to peer
 471  * may receive messages only from that peer. */
 472 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 473 {
 474         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 475                 skb_queue_purge(&sk->sk_receive_queue);
 476                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 477
 478                 /* If one link of bidirectional dgram pipe is disconnected,
 479                  * we signal error. Messages are lost. Do not make this,
 480                  * when peer was not connected to us.
 481                  */
 482                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 483                         other->sk_err = ECONNRESET;
 484                         other->sk_error_report(other);
 485                 }
 486         }
 487 }
 488
 489 static void unix_sock_destructor(struct sock *sk)
 490 {
 491         struct unix_sock *u = unix_sk(sk);
 492
 493         skb_queue_purge(&sk->sk_receive_queue);
 494
 495         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 496         WARN_ON(!sk_unhashed(sk));
 497         WARN_ON(sk->sk_socket);
 498         if (!sock_flag(sk, SOCK_DEAD)) {
 499                 pr_info("Attempt to release alive unix socket: %p\n", sk);
 500                 return;
 501         }
 502
 503         if (u->addr)
 504                 unix_release_addr(u->addr);
 505
 506         atomic_long_dec(&unix_nr_socks);
 507         local_bh_disable();
 508         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 509         local_bh_enable();
 510 #ifdef UNIX_REFCNT_DEBUG
 511         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 512                 atomic_long_read(&unix_nr_socks));
 513 #endif
 514 }
 515
 516 static void unix_release_sock(struct sock *sk, int embrion)
 517 {
 518         struct unix_sock *u = unix_sk(sk);
 519         struct path path;
 520         struct sock *skpair;
 521         struct sk_buff *skb;
 522         int state;
 523
 524         unix_remove_socket(sk);
 525
 526         /* Clear state */
 527         unix_state_lock(sk);
 528         sock_orphan(sk);
 529         sk->sk_shutdown = SHUTDOWN_MASK;
 530         path         = u->path;
 531         u->path.dentry = NULL;
 532         u->path.mnt = NULL;
 533         state = sk->sk_state;
 534         sk->sk_state = TCP_CLOSE;
 535         unix_state_unlock(sk);
 536
 537         wake_up_interruptible_all(&u->peer_wait);
 538
 539         skpair = unix_peer(sk);
 540
 541         if (skpair != NULL) {
 542                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 543                         unix_state_lock(skpair);
 544                         /* No more writes */
 545                         skpair->sk_shutdown = SHUTDOWN_MASK;
 546                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 547                                 skpair->sk_err = ECONNRESET;
 548                         unix_state_unlock(skpair);
 549                         skpair->sk_state_change(skpair);
 550                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 551                 }
 552
 553                 unix_dgram_peer_wake_disconnect(sk, skpair);
 554                 sock_put(skpair); /* It may now die */
 555                 unix_peer(sk) = NULL;
 556         }
 557
 558         /* Try to flush out this socket. Throw out buffers at least */
 559
 560         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 561                 if (state == TCP_LISTEN)
 562                         unix_release_sock(skb->sk, 1);
 563                 /* passed fds are erased in the kfree_skb hook        */
 564                 UNIXCB(skb).consumed = skb->len;
 565                 kfree_skb(skb);
 566         }
 567
 568         if (path.dentry)
 569                 path_put(&path);
 570
 571         sock_put(sk);
 572
 573         /* ---- Socket is dead now and most probably destroyed ---- */
 574
 575         /*
 576          * Fixme: BSD difference: In BSD all sockets connected to us get
 577          *        ECONNRESET and we die on the spot. In Linux we behave
 578          *        like files and pipes do and wait for the last
 579          *        dereference.
 580          *
 581          * Can't we simply set sock->err?
 582          *
 583          *        What the above comment does talk about? --ANK(980817)
 584          */
 585
 586         if (unix_tot_inflight)
 587                 unix_gc();              /* Garbage collect fds */
 588 }
 589
 590 static void init_peercred(struct sock *sk)
 591 {
 592         put_pid(sk->sk_peer_pid);
 593         if (sk->sk_peer_cred)
 594                 put_cred(sk->sk_peer_cred);
 595         sk->sk_peer_pid  = get_pid(task_tgid(current));
 596         sk->sk_peer_cred = get_current_cred();
 597 }
 598
 599 static void copy_peercred(struct sock *sk, struct sock *peersk)
 600 {
 601         put_pid(sk->sk_peer_pid);
 602         if (sk->sk_peer_cred)
 603                 put_cred(sk->sk_peer_cred);
 604         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 605         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 606 }
 607
 608 static int unix_listen(struct socket *sock, int backlog)
 609 {
 610         int err;
 611         struct sock *sk = sock->sk;
 612         struct unix_sock *u = unix_sk(sk);
 613         struct pid *old_pid = NULL;
 614
 615         err = -EOPNOTSUPP;
 616         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 617                 goto out;       /* Only stream/seqpacket sockets accept */
 618         err = -EINVAL;
 619         if (!u->addr)
 620                 goto out;       /* No listens on an unbound socket */
 621         unix_state_lock(sk);
 622         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 623                 goto out_unlock;
 624         if (backlog > sk->sk_max_ack_backlog)
 625                 wake_up_interruptible_all(&u->peer_wait);
 626         sk->sk_max_ack_backlog  = backlog;
 627         sk->sk_state            = TCP_LISTEN;
 628         /* set credentials so connect can copy them */
 629         init_peercred(sk);
 630         err = 0;
 631
 632 out_unlock:
 633         unix_state_unlock(sk);
 634         put_pid(old_pid);
 635 out:
 636         return err;
 637 }
 638
 639 static int unix_release(struct socket *);
 640 static int unix_bind(struct socket *, struct sockaddr *, int);
 641 static int unix_stream_connect(struct socket *, struct sockaddr *,
 642                                int addr_len, int flags);
 643 static int unix_socketpair(struct socket *, struct socket *);
 644 static int unix_accept(struct socket *, struct socket *, int, bool);
 645 static int unix_getname(struct socket *, struct sockaddr *, int);
 646 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
 647 static __poll_t unix_dgram_poll(struct file *, struct socket *,
 648                                     poll_table *);
 649 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 650 static int unix_shutdown(struct socket *, int);
 651 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 652 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 653 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 654                                     size_t size, int flags);
 655 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 656                                        struct pipe_inode_info *, size_t size,
 657                                        unsigned int flags);
 658 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 659 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 660 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 661                               int, int);
 662 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 663 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 664                                   int);
 665
 666 static int unix_set_peek_off(struct sock *sk, int val)
 667 {
 668         struct unix_sock *u = unix_sk(sk);
 669
 670         if (mutex_lock_interruptible(&u->iolock))
 671                 return -EINTR;
 672
 673         sk->sk_peek_off = val;
 674         mutex_unlock(&u->iolock);
 675
 676         return 0;
 677 }
 678
 679
 680 static const struct proto_ops unix_stream_ops = {
 681         .family =       PF_UNIX,
 682         .owner =        THIS_MODULE,
 683         .release =      unix_release,
 684         .bind =         unix_bind,
 685         .connect =      unix_stream_connect,
 686         .socketpair =   unix_socketpair,
 687         .accept =       unix_accept,
 688         .getname =      unix_getname,
 689         .poll =         unix_poll,
 690         .ioctl =        unix_ioctl,
 691         .listen =       unix_listen,
 692         .shutdown =     unix_shutdown,
 693         .setsockopt =   sock_no_setsockopt,
 694         .getsockopt =   sock_no_getsockopt,
 695         .sendmsg =      unix_stream_sendmsg,
 696         .recvmsg =      unix_stream_recvmsg,
 697         .mmap =         sock_no_mmap,
 698         .sendpage =     unix_stream_sendpage,
 699         .splice_read =  unix_stream_splice_read,
 700         .set_peek_off = unix_set_peek_off,
 701 };
 702
 703 static const struct proto_ops unix_dgram_ops = {
 704         .family =       PF_UNIX,
 705         .owner =        THIS_MODULE,
 706         .release =      unix_release,
 707         .bind =         unix_bind,
 708         .connect =      unix_dgram_connect,
 709         .socketpair =   unix_socketpair,
 710         .accept =       sock_no_accept,
 711         .getname =      unix_getname,
 712         .poll =         unix_dgram_poll,
 713         .ioctl =        unix_ioctl,
 714         .listen =       sock_no_listen,
 715         .shutdown =     unix_shutdown,
 716         .setsockopt =   sock_no_setsockopt,
 717         .getsockopt =   sock_no_getsockopt,
 718         .sendmsg =      unix_dgram_sendmsg,
 719         .recvmsg =      unix_dgram_recvmsg,
 720         .mmap =         sock_no_mmap,
 721         .sendpage =     sock_no_sendpage,
 722         .set_peek_off = unix_set_peek_off,
 723 };
 724
 725 static const struct proto_ops unix_seqpacket_ops = {
 726         .family =       PF_UNIX,
 727         .owner =        THIS_MODULE,
 728         .release =      unix_release,
 729         .bind =         unix_bind,
 730         .connect =      unix_stream_connect,
 731         .socketpair =   unix_socketpair,
 732         .accept =       unix_accept,
 733         .getname =      unix_getname,
 734         .poll =         unix_dgram_poll,
 735         .ioctl =        unix_ioctl,
 736         .listen =       unix_listen,
 737         .shutdown =     unix_shutdown,
 738         .setsockopt =   sock_no_setsockopt,
 739         .getsockopt =   sock_no_getsockopt,
 740         .sendmsg =      unix_seqpacket_sendmsg,
 741         .recvmsg =      unix_seqpacket_recvmsg,
 742         .mmap =         sock_no_mmap,
 743         .sendpage =     sock_no_sendpage,
 744         .set_peek_off = unix_set_peek_off,
 745 };
 746
 747 static struct proto unix_proto = {
 748         .name                   = "UNIX",
 749         .owner                  = THIS_MODULE,
 750         .obj_size               = sizeof(struct unix_sock),
 751 };
 752
 753 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 754 {
 755         struct sock *sk = NULL;
 756         struct unix_sock *u;
 757
 758         atomic_long_inc(&unix_nr_socks);
 759         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 760                 goto out;
 761
 762         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
 763         if (!sk)
 764                 goto out;
 765
 766         sock_init_data(sock, sk);
 767
 768         sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 769         sk->sk_write_space      = unix_write_space;
 770         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 771         sk->sk_destruct         = unix_sock_destructor;
 772         u         = unix_sk(sk);
 773         u->path.dentry = NULL;
 774         u->path.mnt = NULL;
 775         spin_lock_init(&u->lock);
 776         atomic_long_set(&u->inflight, 0);
 777         INIT_LIST_HEAD(&u->link);
 778         mutex_init(&u->iolock); /* single task reading lock */
 779         mutex_init(&u->bindlock); /* single task binding lock */
 780         init_waitqueue_head(&u->peer_wait);
 781         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 782         unix_insert_socket(unix_sockets_unbound(sk), sk);
 783 out:
 784         if (sk == NULL)
 785                 atomic_long_dec(&unix_nr_socks);
 786         else {
 787                 local_bh_disable();
 788                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 789                 local_bh_enable();
 790         }
 791         return sk;
 792 }
 793
 794 static int unix_create(struct net *net, struct socket *sock, int protocol,
 795                        int kern)
 796 {
 797         if (protocol && protocol != PF_UNIX)
 798                 return -EPROTONOSUPPORT;
 799
 800         sock->state = SS_UNCONNECTED;
 801
 802         switch (sock->type) {
 803         case SOCK_STREAM:
 804                 sock->ops = &unix_stream_ops;
 805                 break;
 806                 /*
 807                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 808                  *      nothing uses it.
 809                  */
 810         case SOCK_RAW:
 811                 sock->type = SOCK_DGRAM;
 812                 /* fall through */
 813         case SOCK_DGRAM:
 814                 sock->ops = &unix_dgram_ops;
 815                 break;
 816         case SOCK_SEQPACKET:
 817                 sock->ops = &unix_seqpacket_ops;
 818                 break;
 819         default:
 820                 return -ESOCKTNOSUPPORT;
 821         }
 822
 823         return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
 824 }
 825
 826 static int unix_release(struct socket *sock)
 827 {
 828         struct sock *sk = sock->sk;
 829
 830         if (!sk)
 831                 return 0;
 832
 833         unix_release_sock(sk, 0);
 834         sock->sk = NULL;
 835
 836         return 0;
 837 }
 838
 839 static int unix_autobind(struct socket *sock)
 840 {
 841         struct sock *sk = sock->sk;
 842         struct net *net = sock_net(sk);
 843         struct unix_sock *u = unix_sk(sk);
 844         static u32 ordernum = 1;
 845         struct unix_address *addr;
 846         int err;
 847         unsigned int retries = 0;
 848
 849         err = mutex_lock_interruptible(&u->bindlock);
 850         if (err)
 851                 return err;
 852
 853         err = 0;
 854         if (u->addr)
 855                 goto out;
 856
 857         err = -ENOMEM;
 858         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 859         if (!addr)
 860                 goto out;
 861
 862         addr->name->sun_family = AF_UNIX;
 863         refcount_set(&addr->refcnt, 1);
 864
 865 retry:
 866         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 867         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 868
 869         spin_lock(&unix_table_lock);
 870         ordernum = (ordernum+1)&0xFFFFF;
 871
 872         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 873                                       addr->hash)) {
 874                 spin_unlock(&unix_table_lock);
 875                 /*
 876                  * __unix_find_socket_byname() may take long time if many names
 877                  * are already in use.
 878                  */
 879                 cond_resched();
 880                 /* Give up if all names seems to be in use. */
 881                 if (retries++ == 0xFFFFF) {
 882                         err = -ENOSPC;
 883                         kfree(addr);
 884                         goto out;
 885                 }
 886                 goto retry;
 887         }
 888         addr->hash ^= sk->sk_type;
 889
 890         __unix_remove_socket(sk);
 891         smp_store_release(&u->addr, addr);
 892         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 893         spin_unlock(&unix_table_lock);
 894         err = 0;
 895
 896 out:    mutex_unlock(&u->bindlock);
 897         return err;
 898 }
 899
 900 static struct sock *unix_find_other(struct net *net,
 901                                     struct sockaddr_un *sunname, int len,
 902                                     int type, unsigned int hash, int *error)
 903 {
 904         struct sock *u;
 905         struct path path;
 906         int err = 0;
 907
 908         if (sunname->sun_path[0]) {
 909                 struct inode *inode;
 910                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 911                 if (err)
 912                         goto fail;
 913                 inode = d_backing_inode(path.dentry);
 914                 err = inode_permission(inode, MAY_WRITE);
 915                 if (err)
 916                         goto put_fail;
 917
 918                 err = -ECONNREFUSED;
 919                 if (!S_ISSOCK(inode->i_mode))
 920                         goto put_fail;
 921                 u = unix_find_socket_byinode(inode);
 922                 if (!u)
 923                         goto put_fail;
 924
 925                 if (u->sk_type == type)
 926                         touch_atime(&path);
 927
 928                 path_put(&path);
 929
 930                 err = -EPROTOTYPE;
 931                 if (u->sk_type != type) {
 932                         sock_put(u);
 933                         goto fail;
 934                 }
 935         } else {
 936                 err = -ECONNREFUSED;
 937                 u = unix_find_socket_byname(net, sunname, len, type, hash);
 938                 if (u) {
 939                         struct dentry *dentry;
 940                         dentry = unix_sk(u)->path.dentry;
 941                         if (dentry)
 942                                 touch_atime(&unix_sk(u)->path);
 943                 } else
 944                         goto fail;
 945         }
 946         return u;
 947
 948 put_fail:
 949         path_put(&path);
 950 fail:
 951         *error = err;
 952         return NULL;
 953 }
 954
 955 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 956 {
 957         struct dentry *dentry;
 958         struct path path;
 959         int err = 0;
 960         /*
 961          * Get the parent directory, calculate the hash for last
 962          * component.
 963          */
 964         dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 965         err = PTR_ERR(dentry);
 966         if (IS_ERR(dentry))
 967                 return err;
 968
 969         /*
 970          * All right, let's create it.
 971          */
 972         err = security_path_mknod(&path, dentry, mode, 0);
 973         if (!err) {
 974                 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
 975                 if (!err) {
 976                         res->mnt = mntget(path.mnt);
 977                         res->dentry = dget(dentry);
 978                 }
 979         }
 980         done_path_create(&path, dentry);
 981         return err;
 982 }
 983
 984 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 985 {
 986         struct sock *sk = sock->sk;
 987         struct net *net = sock_net(sk);
 988         struct unix_sock *u = unix_sk(sk);
 989         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 990         char *sun_path = sunaddr->sun_path;
 991         int err;
 992         unsigned int hash;
 993         struct unix_address *addr;
 994         struct hlist_head *list;
 995         struct path path = { };
 996
 997         err = -EINVAL;
 998         if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
 999             sunaddr->sun_family != AF_UNIX)
1000                 goto out;
1001
1002         if (addr_len == sizeof(short)) {
1003                 err = unix_autobind(sock);
1004                 goto out;
1005         }
1006
1007         err = unix_mkname(sunaddr, addr_len, &hash);
1008         if (err < 0)
1009                 goto out;
1010         addr_len = err;
1011
1012         if (sun_path[0]) {
1013                 umode_t mode = S_IFSOCK |
1014                        (SOCK_INODE(sock)->i_mode & ~current_umask());
1015                 err = unix_mknod(sun_path, mode, &path);
1016                 if (err) {
1017                         if (err == -EEXIST)
1018                                 err = -EADDRINUSE;
1019                         goto out;
1020                 }
1021         }
1022
1023         err = mutex_lock_interruptible(&u->bindlock);
1024         if (err)
1025                 goto out_put;
1026
1027         err = -EINVAL;
1028         if (u->addr)
1029                 goto out_up;
1030
1031         err = -ENOMEM;
1032         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1033         if (!addr)
1034                 goto out_up;
1035
1036         memcpy(addr->name, sunaddr, addr_len);
1037         addr->len = addr_len;
1038         addr->hash = hash ^ sk->sk_type;
1039         refcount_set(&addr->refcnt, 1);
1040
1041         if (sun_path[0]) {
1042                 addr->hash = UNIX_HASH_SIZE;
1043                 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1044                 spin_lock(&unix_table_lock);
1045                 u->path = path;
1046                 list = &unix_socket_table[hash];
1047         } else {
1048                 spin_lock(&unix_table_lock);
1049                 err = -EADDRINUSE;
1050                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1051                                               sk->sk_type, hash)) {
1052                         unix_release_addr(addr);
1053                         goto out_unlock;
1054                 }
1055
1056                 list = &unix_socket_table[addr->hash];
1057         }
1058
1059         err = 0;
1060         __unix_remove_socket(sk);
1061         smp_store_release(&u->addr, addr);
1062         __unix_insert_socket(list, sk);
1063
1064 out_unlock:
1065         spin_unlock(&unix_table_lock);
1066 out_up:
1067         mutex_unlock(&u->bindlock);
1068 out_put:
1069         if (err)
1070                 path_put(&path);
1071 out:
1072         return err;
1073 }
1074
1075 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1076 {
1077         if (unlikely(sk1 == sk2) || !sk2) {
1078                 unix_state_lock(sk1);
1079                 return;
1080         }
1081         if (sk1 < sk2) {
1082                 unix_state_lock(sk1);
1083                 unix_state_lock_nested(sk2);
1084         } else {
1085                 unix_state_lock(sk2);
1086                 unix_state_lock_nested(sk1);
1087         }
1088 }
1089
1090 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1091 {
1092         if (unlikely(sk1 == sk2) || !sk2) {
1093                 unix_state_unlock(sk1);
1094                 return;
1095         }
1096         unix_state_unlock(sk1);
1097         unix_state_unlock(sk2);
1098 }
1099
1100 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1101                               int alen, int flags)
1102 {
1103         struct sock *sk = sock->sk;
1104         struct net *net = sock_net(sk);
1105         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1106         struct sock *other;
1107         unsigned int hash;
1108         int err;
1109
1110         err = -EINVAL;
1111         if (alen < offsetofend(struct sockaddr, sa_family))
1112                 goto out;
1113
1114         if (addr->sa_family != AF_UNSPEC) {
1115                 err = unix_mkname(sunaddr, alen, &hash);
1116                 if (err < 0)
1117                         goto out;
1118                 alen = err;
1119
1120                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1121                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1122                         goto out;
1123
1124 restart:
1125                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1126                 if (!other)
1127                         goto out;
1128
1129                 unix_state_double_lock(sk, other);
1130
1131                 /* Apparently VFS overslept socket death. Retry. */
1132                 if (sock_flag(other, SOCK_DEAD)) {
1133                         unix_state_double_unlock(sk, other);
1134                         sock_put(other);
1135                         goto restart;
1136                 }
1137
1138                 err = -EPERM;
1139                 if (!unix_may_send(sk, other))
1140                         goto out_unlock;
1141
1142                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1143                 if (err)
1144                         goto out_unlock;
1145
1146         } else {
1147                 /*
1148                  *      1003.1g breaking connected state with AF_UNSPEC
1149                  */
1150                 other = NULL;
1151                 unix_state_double_lock(sk, other);
1152         }
1153
1154         /*
1155          * If it was connected, reconnect.
1156          */
1157         if (unix_peer(sk)) {
1158                 struct sock *old_peer = unix_peer(sk);
1159                 unix_peer(sk) = other;
1160                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1161
1162                 unix_state_double_unlock(sk, other);
1163
1164                 if (other != old_peer)
1165                         unix_dgram_disconnected(sk, old_peer);
1166                 sock_put(old_peer);
1167         } else {
1168                 unix_peer(sk) = other;
1169                 unix_state_double_unlock(sk, other);
1170         }
1171         return 0;
1172
1173 out_unlock:
1174         unix_state_double_unlock(sk, other);
1175         sock_put(other);
1176 out:
1177         return err;
1178 }
1179
1180 static long unix_wait_for_peer(struct sock *other, long timeo)
1181 {
1182         struct unix_sock *u = unix_sk(other);
1183         int sched;
1184         DEFINE_WAIT(wait);
1185
1186         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1187
1188         sched = !sock_flag(other, SOCK_DEAD) &&
1189                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1190                 unix_recvq_full(other);
1191
1192         unix_state_unlock(other);
1193
1194         if (sched)
1195                 timeo = schedule_timeout(timeo);
1196
1197         finish_wait(&u->peer_wait, &wait);
1198         return timeo;
1199 }
1200
1201 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1202                                int addr_len, int flags)
1203 {
1204         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1205         struct sock *sk = sock->sk;
1206         struct net *net = sock_net(sk);
1207         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1208         struct sock *newsk = NULL;
1209         struct sock *other = NULL;
1210         struct sk_buff *skb = NULL;
1211         unsigned int hash;
1212         int st;
1213         int err;
1214         long timeo;
1215
1216         err = unix_mkname(sunaddr, addr_len, &hash);
1217         if (err < 0)
1218                 goto out;
1219         addr_len = err;
1220
1221         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1222             (err = unix_autobind(sock)) != 0)
1223                 goto out;
1224
1225         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1226
1227         /* First of all allocate resources.
1228            If we will make it after state is locked,
1229            we will have to recheck all again in any case.
1230          */
1231
1232         err = -ENOMEM;
1233
1234         /* create new sock for complete connection */
1235         newsk = unix_create1(sock_net(sk), NULL, 0);
1236         if (newsk == NULL)
1237                 goto out;
1238
1239         /* Allocate skb for sending to listening sock */
1240         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1241         if (skb == NULL)
1242                 goto out;
1243
1244 restart:
1245         /*  Find listening sock. */
1246         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1247         if (!other)
1248                 goto out;
1249
1250         /* Latch state of peer */
1251         unix_state_lock(other);
1252
1253         /* Apparently VFS overslept socket death. Retry. */
1254         if (sock_flag(other, SOCK_DEAD)) {
1255                 unix_state_unlock(other);
1256                 sock_put(other);
1257                 goto restart;
1258         }
1259
1260         err = -ECONNREFUSED;
1261         if (other->sk_state != TCP_LISTEN)
1262                 goto out_unlock;
1263         if (other->sk_shutdown & RCV_SHUTDOWN)
1264                 goto out_unlock;
1265
1266         if (unix_recvq_full(other)) {
1267                 err = -EAGAIN;
1268                 if (!timeo)
1269                         goto out_unlock;
1270
1271                 timeo = unix_wait_for_peer(other, timeo);
1272
1273                 err = sock_intr_errno(timeo);
1274                 if (signal_pending(current))
1275                         goto out;
1276                 sock_put(other);
1277                 goto restart;
1278         }
1279
1280         /* Latch our state.
1281
1282            It is tricky place. We need to grab our state lock and cannot
1283            drop lock on peer. It is dangerous because deadlock is
1284            possible. Connect to self case and simultaneous
1285            attempt to connect are eliminated by checking socket
1286            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1287            check this before attempt to grab lock.
1288
1289            Well, and we have to recheck the state after socket locked.
1290          */
1291         st = sk->sk_state;
1292
1293         switch (st) {
1294         case TCP_CLOSE:
1295                 /* This is ok... continue with connect */
1296                 break;
1297         case TCP_ESTABLISHED:
1298                 /* Socket is already connected */
1299                 err = -EISCONN;
1300                 goto out_unlock;
1301         default:
1302                 err = -EINVAL;
1303                 goto out_unlock;
1304         }
1305
1306         unix_state_lock_nested(sk);
1307
1308         if (sk->sk_state != st) {
1309                 unix_state_unlock(sk);
1310                 unix_state_unlock(other);
1311                 sock_put(other);
1312                 goto restart;
1313         }
1314
1315         err = security_unix_stream_connect(sk, other, newsk);
1316         if (err) {
1317                 unix_state_unlock(sk);
1318                 goto out_unlock;
1319         }
1320
1321         /* The way is open! Fastly set all the necessary fields... */
1322
1323         sock_hold(sk);
1324         unix_peer(newsk)        = sk;
1325         newsk->sk_state         = TCP_ESTABLISHED;
1326         newsk->sk_type          = sk->sk_type;
1327         init_peercred(newsk);
1328         newu = unix_sk(newsk);
1329         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1330         otheru = unix_sk(other);
1331
1332         /* copy address information from listening to new sock
1333          *
1334          * The contents of *(otheru->addr) and otheru->path
1335          * are seen fully set up here, since we have found
1336          * otheru in hash under unix_table_lock.  Insertion
1337          * into the hash chain we'd found it in had been done
1338          * in an earlier critical area protected by unix_table_lock,
1339          * the same one where we'd set *(otheru->addr) contents,
1340          * as well as otheru->path and otheru->addr itself.
1341          *
1342          * Using smp_store_release() here to set newu->addr
1343          * is enough to make those stores, as well as stores
1344          * to newu->path visible to anyone who gets newu->addr
1345          * by smp_load_acquire().  IOW, the same warranties
1346          * as for unix_sock instances bound in unix_bind() or
1347          * in unix_autobind().
1348          */
1349         if (otheru->path.dentry) {
1350                 path_get(&otheru->path);
1351                 newu->path = otheru->path;
1352         }
1353         refcount_inc(&otheru->addr->refcnt);
1354         smp_store_release(&newu->addr, otheru->addr);
1355
1356         /* Set credentials */
1357         copy_peercred(sk, other);
1358
1359         sock->state     = SS_CONNECTED;
1360         sk->sk_state    = TCP_ESTABLISHED;
1361         sock_hold(newsk);
1362
1363         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1364         unix_peer(sk)   = newsk;
1365
1366         unix_state_unlock(sk);
1367
1368         /* take ten and and send info to listening sock */
1369         spin_lock(&other->sk_receive_queue.lock);
1370         __skb_queue_tail(&other->sk_receive_queue, skb);
1371         spin_unlock(&other->sk_receive_queue.lock);
1372         unix_state_unlock(other);
1373         other->sk_data_ready(other);
1374         sock_put(other);
1375         return 0;
1376
1377 out_unlock:
1378         if (other)
1379                 unix_state_unlock(other);
1380
1381 out:
1382         kfree_skb(skb);
1383         if (newsk)
1384                 unix_release_sock(newsk, 0);
1385         if (other)
1386                 sock_put(other);
1387         return err;
1388 }
1389
1390 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1391 {
1392         struct sock *ska = socka->sk, *skb = sockb->sk;
1393
1394         /* Join our sockets back to back */
1395         sock_hold(ska);
1396         sock_hold(skb);
1397         unix_peer(ska) = skb;
1398         unix_peer(skb) = ska;
1399         init_peercred(ska);
1400         init_peercred(skb);
1401
1402         if (ska->sk_type != SOCK_DGRAM) {
1403                 ska->sk_state = TCP_ESTABLISHED;
1404                 skb->sk_state = TCP_ESTABLISHED;
1405                 socka->state  = SS_CONNECTED;
1406                 sockb->state  = SS_CONNECTED;
1407         }
1408         return 0;
1409 }
1410
1411 static void unix_sock_inherit_flags(const struct socket *old,
1412                                     struct socket *new)
1413 {
1414         if (test_bit(SOCK_PASSCRED, &old->flags))
1415                 set_bit(SOCK_PASSCRED, &new->flags);
1416         if (test_bit(SOCK_PASSSEC, &old->flags))
1417                 set_bit(SOCK_PASSSEC, &new->flags);
1418 }
1419
1420 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1421                        bool kern)
1422 {
1423         struct sock *sk = sock->sk;
1424         struct sock *tsk;
1425         struct sk_buff *skb;
1426         int err;
1427
1428         err = -EOPNOTSUPP;
1429         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1430                 goto out;
1431
1432         err = -EINVAL;
1433         if (sk->sk_state != TCP_LISTEN)
1434                 goto out;
1435
1436         /* If socket state is TCP_LISTEN it cannot change (for now...),
1437          * so that no locks are necessary.
1438          */
1439
1440         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1441         if (!skb) {
1442                 /* This means receive shutdown. */
1443                 if (err == 0)
1444                         err = -EINVAL;
1445                 goto out;
1446         }
1447
1448         tsk = skb->sk;
1449         skb_free_datagram(sk, skb);
1450         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1451
1452         /* attach accepted sock to socket */
1453         unix_state_lock(tsk);
1454         newsock->state = SS_CONNECTED;
1455         unix_sock_inherit_flags(sock, newsock);
1456         sock_graft(tsk, newsock);
1457         unix_state_unlock(tsk);
1458         return 0;
1459
1460 out:
1461         return err;
1462 }
1463
1464
1465 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1466 {
1467         struct sock *sk = sock->sk;
1468         struct unix_address *addr;
1469         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1470         int err = 0;
1471
1472         if (peer) {
1473                 sk = unix_peer_get(sk);
1474
1475                 err = -ENOTCONN;
1476                 if (!sk)
1477                         goto out;
1478                 err = 0;
1479         } else {
1480                 sock_hold(sk);
1481         }
1482
1483         addr = smp_load_acquire(&unix_sk(sk)->addr);
1484         if (!addr) {
1485                 sunaddr->sun_family = AF_UNIX;
1486                 sunaddr->sun_path[0] = 0;
1487                 err = sizeof(short);
1488         } else {
1489                 err = addr->len;
1490                 memcpy(sunaddr, addr->name, addr->len);
1491         }
1492         sock_put(sk);
1493 out:
1494         return err;
1495 }
1496
1497 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1498 {
1499         int i;
1500
1501         scm->fp = UNIXCB(skb).fp;
1502         UNIXCB(skb).fp = NULL;
1503
1504         for (i = scm->fp->count-1; i >= 0; i--)
1505                 unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1506 }
1507
1508 static void unix_destruct_scm(struct sk_buff *skb)
1509 {
1510         struct scm_cookie scm;
1511         memset(&scm, 0, sizeof(scm));
1512         scm.pid  = UNIXCB(skb).pid;
1513         if (UNIXCB(skb).fp)
1514                 unix_detach_fds(&scm, skb);
1515
1516         /* Alas, it calls VFS */
1517         /* So fscking what? fput() had been SMP-safe since the last Summer */
1518         scm_destroy(&scm);
1519         sock_wfree(skb);
1520 }
1521
1522 /*
1523  * The "user->unix_inflight" variable is protected by the garbage
1524  * collection lock, and we just read it locklessly here. If you go
1525  * over the limit, there might be a tiny race in actually noticing
1526  * it across threads. Tough.
1527  */
1528 static inline bool too_many_unix_fds(struct task_struct *p)
1529 {
1530         struct user_struct *user = current_user();
1531
1532         if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1533                 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1534         return false;
1535 }
1536
1537 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1538 {
1539         int i;
1540
1541         if (too_many_unix_fds(current))
1542                 return -ETOOMANYREFS;
1543
1544         /*
1545          * Need to duplicate file references for the sake of garbage
1546          * collection.  Otherwise a socket in the fps might become a
1547          * candidate for GC while the skb is not yet queued.
1548          */
1549         UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1550         if (!UNIXCB(skb).fp)
1551                 return -ENOMEM;
1552
1553         for (i = scm->fp->count - 1; i >= 0; i--)
1554                 unix_inflight(scm->fp->user, scm->fp->fp[i]);
1555         return 0;
1556 }
1557
1558 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1559 {
1560         int err = 0;
1561
1562         UNIXCB(skb).pid  = get_pid(scm->pid);
1563         UNIXCB(skb).uid = scm->creds.uid;
1564         UNIXCB(skb).gid = scm->creds.gid;
1565         UNIXCB(skb).fp = NULL;
1566         unix_get_secdata(scm, skb);
1567         if (scm->fp && send_fds)
1568                 err = unix_attach_fds(scm, skb);
1569
1570         skb->destructor = unix_destruct_scm;
1571         return err;
1572 }
1573
1574 static bool unix_passcred_enabled(const struct socket *sock,
1575                                   const struct sock *other)
1576 {
1577         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1578                !other->sk_socket ||
1579                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1580 }
1581
1582 /*
1583  * Some apps rely on write() giving SCM_CREDENTIALS
1584  * We include credentials if source or destination socket
1585  * asserted SOCK_PASSCRED.
1586  */
1587 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1588                             const struct sock *other)
1589 {
1590         if (UNIXCB(skb).pid)
1591                 return;
1592         if (unix_passcred_enabled(sock, other)) {
1593                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1594                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1595         }
1596 }
1597
1598 static int maybe_init_creds(struct scm_cookie *scm,
1599                             struct socket *socket,
1600                             const struct sock *other)
1601 {
1602         int err;
1603         struct msghdr msg = { .msg_controllen = 0 };
1604
1605         err = scm_send(socket, &msg, scm, false);
1606         if (err)
1607                 return err;
1608
1609         if (unix_passcred_enabled(socket, other)) {
1610                 scm->pid = get_pid(task_tgid(current));
1611                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1612         }
1613         return err;
1614 }
1615
1616 static bool unix_skb_scm_eq(struct sk_buff *skb,
1617                             struct scm_cookie *scm)
1618 {
1619         const struct unix_skb_parms *u = &UNIXCB(skb);
1620
1621         return u->pid == scm->pid &&
1622                uid_eq(u->uid, scm->creds.uid) &&
1623                gid_eq(u->gid, scm->creds.gid) &&
1624                unix_secdata_eq(scm, skb);
1625 }
1626
1627 /*
1628  *      Send AF_UNIX data.
1629  */
1630
1631 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1632                               size_t len)
1633 {
1634         struct sock *sk = sock->sk;
1635         struct net *net = sock_net(sk);
1636         struct unix_sock *u = unix_sk(sk);
1637         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1638         struct sock *other = NULL;
1639         int namelen = 0; /* fake GCC */
1640         int err;
1641         unsigned int hash;
1642         struct sk_buff *skb;
1643         long timeo;
1644         struct scm_cookie scm;
1645         int data_len = 0;
1646         int sk_locked;
1647
1648         wait_for_unix_gc();
1649         err = scm_send(sock, msg, &scm, false);
1650         if (err < 0)
1651                 return err;
1652
1653         err = -EOPNOTSUPP;
1654         if (msg->msg_flags&MSG_OOB)
1655                 goto out;
1656
1657         if (msg->msg_namelen) {
1658                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1659                 if (err < 0)
1660                         goto out;
1661                 namelen = err;
1662         } else {
1663                 sunaddr = NULL;
1664                 err = -ENOTCONN;
1665                 other = unix_peer_get(sk);
1666                 if (!other)
1667                         goto out;
1668         }
1669
1670         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1671             && (err = unix_autobind(sock)) != 0)
1672                 goto out;
1673
1674         err = -EMSGSIZE;
1675         if (len > sk->sk_sndbuf - 32)
1676                 goto out;
1677
1678         if (len > SKB_MAX_ALLOC) {
1679                 data_len = min_t(size_t,
1680                                  len - SKB_MAX_ALLOC,
1681                                  MAX_SKB_FRAGS * PAGE_SIZE);
1682                 data_len = PAGE_ALIGN(data_len);
1683
1684                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1685         }
1686
1687         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1688                                    msg->msg_flags & MSG_DONTWAIT, &err,
1689                                    PAGE_ALLOC_COSTLY_ORDER);
1690         if (skb == NULL)
1691                 goto out;
1692
1693         err = unix_scm_to_skb(&scm, skb, true);
1694         if (err < 0)
1695                 goto out_free;
1696
1697         skb_put(skb, len - data_len);
1698         skb->data_len = data_len;
1699         skb->len = len;
1700         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1701         if (err)
1702                 goto out_free;
1703
1704         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1705
1706 restart:
1707         if (!other) {
1708                 err = -ECONNRESET;
1709                 if (sunaddr == NULL)
1710                         goto out_free;
1711
1712                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1713                                         hash, &err);
1714                 if (other == NULL)
1715                         goto out_free;
1716         }
1717
1718         if (sk_filter(other, skb) < 0) {
1719                 /* Toss the packet but do not return any error to the sender */
1720                 err = len;
1721                 goto out_free;
1722         }
1723
1724         sk_locked = 0;
1725         unix_state_lock(other);
1726 restart_locked:
1727         err = -EPERM;
1728         if (!unix_may_send(sk, other))
1729                 goto out_unlock;
1730
1731         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1732                 /*
1733                  *      Check with 1003.1g - what should
1734                  *      datagram error
1735                  */
1736                 unix_state_unlock(other);
1737                 sock_put(other);
1738
1739                 if (!sk_locked)
1740                         unix_state_lock(sk);
1741
1742                 err = 0;
1743                 if (unix_peer(sk) == other) {
1744                         unix_peer(sk) = NULL;
1745                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1746
1747                         unix_state_unlock(sk);
1748
1749                         unix_dgram_disconnected(sk, other);
1750                         sock_put(other);
1751                         err = -ECONNREFUSED;
1752                 } else {
1753                         unix_state_unlock(sk);
1754                 }
1755
1756                 other = NULL;
1757                 if (err)
1758                         goto out_free;
1759                 goto restart;
1760         }
1761
1762         err = -EPIPE;
1763         if (other->sk_shutdown & RCV_SHUTDOWN)
1764                 goto out_unlock;
1765
1766         if (sk->sk_type != SOCK_SEQPACKET) {
1767                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1768                 if (err)
1769                         goto out_unlock;
1770         }
1771
1772         /* other == sk && unix_peer(other) != sk if
1773          * - unix_peer(sk) == NULL, destination address bound to sk
1774          * - unix_peer(sk) == sk by time of get but disconnected before lock
1775          */
1776         if (other != sk &&
1777             unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1778                 if (timeo) {
1779                         timeo = unix_wait_for_peer(other, timeo);
1780
1781                         err = sock_intr_errno(timeo);
1782                         if (signal_pending(current))
1783                                 goto out_free;
1784
1785                         goto restart;
1786                 }
1787
1788                 if (!sk_locked) {
1789                         unix_state_unlock(other);
1790                         unix_state_double_lock(sk, other);
1791                 }
1792
1793                 if (unix_peer(sk) != other ||
1794                     unix_dgram_peer_wake_me(sk, other)) {
1795                         err = -EAGAIN;
1796                         sk_locked = 1;
1797                         goto out_unlock;
1798                 }
1799
1800                 if (!sk_locked) {
1801                         sk_locked = 1;
1802                         goto restart_locked;
1803                 }
1804         }
1805
1806         if (unlikely(sk_locked))
1807                 unix_state_unlock(sk);
1808
1809         if (sock_flag(other, SOCK_RCVTSTAMP))
1810                 __net_timestamp(skb);
1811         maybe_add_creds(skb, sock, other);
1812         skb_queue_tail(&other->sk_receive_queue, skb);
1813         unix_state_unlock(other);
1814         other->sk_data_ready(other);
1815         sock_put(other);
1816         scm_destroy(&scm);
1817         return len;
1818
1819 out_unlock:
1820         if (sk_locked)
1821                 unix_state_unlock(sk);
1822         unix_state_unlock(other);
1823 out_free:
1824         kfree_skb(skb);
1825 out:
1826         if (other)
1827                 sock_put(other);
1828         scm_destroy(&scm);
1829         return err;
1830 }
1831
1832 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1833  * bytes, and a minimum of a full page.
1834  */
1835 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1836
1837 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1838                                size_t len)
1839 {
1840         struct sock *sk = sock->sk;
1841         struct sock *other = NULL;
1842         int err, size;
1843         struct sk_buff *skb;
1844         int sent = 0;
1845         struct scm_cookie scm;
1846         bool fds_sent = false;
1847         int data_len;
1848
1849         wait_for_unix_gc();
1850         err = scm_send(sock, msg, &scm, false);
1851         if (err < 0)
1852                 return err;
1853
1854         err = -EOPNOTSUPP;
1855         if (msg->msg_flags&MSG_OOB)
1856                 goto out_err;
1857
1858         if (msg->msg_namelen) {
1859                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1860                 goto out_err;
1861         } else {
1862                 err = -ENOTCONN;
1863                 other = unix_peer(sk);
1864                 if (!other)
1865                         goto out_err;
1866         }
1867
1868         if (sk->sk_shutdown & SEND_SHUTDOWN)
1869                 goto pipe_err;
1870
1871         while (sent < len) {
1872                 size = len - sent;
1873
1874                 /* Keep two messages in the pipe so it schedules better */
1875                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1876
1877                 /* allow fallback to order-0 allocations */
1878                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1879
1880                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1881
1882                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1883
1884                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1885                                            msg->msg_flags & MSG_DONTWAIT, &err,
1886                                            get_order(UNIX_SKB_FRAGS_SZ));
1887                 if (!skb)
1888                         goto out_err;
1889
1890                 /* Only send the fds in the first buffer */
1891                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1892                 if (err < 0) {
1893                         kfree_skb(skb);
1894                         goto out_err;
1895                 }
1896                 fds_sent = true;
1897
1898                 skb_put(skb, size - data_len);
1899                 skb->data_len = data_len;
1900                 skb->len = size;
1901                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1902                 if (err) {
1903                         kfree_skb(skb);
1904                         goto out_err;
1905                 }
1906
1907                 unix_state_lock(other);
1908
1909                 if (sock_flag(other, SOCK_DEAD) ||
1910                     (other->sk_shutdown & RCV_SHUTDOWN))
1911                         goto pipe_err_free;
1912
1913                 maybe_add_creds(skb, sock, other);
1914                 skb_queue_tail(&other->sk_receive_queue, skb);
1915                 unix_state_unlock(other);
1916                 other->sk_data_ready(other);
1917                 sent += size;
1918         }
1919
1920         scm_destroy(&scm);
1921
1922         return sent;
1923
1924 pipe_err_free:
1925         unix_state_unlock(other);
1926         kfree_skb(skb);
1927 pipe_err:
1928         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1929                 send_sig(SIGPIPE, current, 0);
1930         err = -EPIPE;
1931 out_err:
1932         scm_destroy(&scm);
1933         return sent ? : err;
1934 }
1935
1936 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1937                                     int offset, size_t size, int flags)
1938 {
1939         int err;
1940         bool send_sigpipe = false;
1941         bool init_scm = true;
1942         struct scm_cookie scm;
1943         struct sock *other, *sk = socket->sk;
1944         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1945
1946         if (flags & MSG_OOB)
1947                 return -EOPNOTSUPP;
1948
1949         other = unix_peer(sk);
1950         if (!other || sk->sk_state != TCP_ESTABLISHED)
1951                 return -ENOTCONN;
1952
1953         if (false) {
1954 alloc_skb:
1955                 unix_state_unlock(other);
1956                 mutex_unlock(&unix_sk(other)->iolock);
1957                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1958                                               &err, 0);
1959                 if (!newskb)
1960                         goto err;
1961         }
1962
1963         /* we must acquire iolock as we modify already present
1964          * skbs in the sk_receive_queue and mess with skb->len
1965          */
1966         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1967         if (err) {
1968                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1969                 goto err;
1970         }
1971
1972         if (sk->sk_shutdown & SEND_SHUTDOWN) {
1973                 err = -EPIPE;
1974                 send_sigpipe = true;
1975                 goto err_unlock;
1976         }
1977
1978         unix_state_lock(other);
1979
1980         if (sock_flag(other, SOCK_DEAD) ||
1981             other->sk_shutdown & RCV_SHUTDOWN) {
1982                 err = -EPIPE;
1983                 send_sigpipe = true;
1984                 goto err_state_unlock;
1985         }
1986
1987         if (init_scm) {
1988                 err = maybe_init_creds(&scm, socket, other);
1989                 if (err)
1990                         goto err_state_unlock;
1991                 init_scm = false;
1992         }
1993
1994         skb = skb_peek_tail(&other->sk_receive_queue);
1995         if (tail && tail == skb) {
1996                 skb = newskb;
1997         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1998                 if (newskb) {
1999                         skb = newskb;
2000                 } else {
2001                         tail = skb;
2002                         goto alloc_skb;
2003                 }
2004         } else if (newskb) {
2005                 /* this is fast path, we don't necessarily need to
2006                  * call to kfree_skb even though with newskb == NULL
2007                  * this - does no harm
2008                  */
2009                 consume_skb(newskb);
2010                 newskb = NULL;
2011         }
2012
2013         if (skb_append_pagefrags(skb, page, offset, size)) {
2014                 tail = skb;
2015                 goto alloc_skb;
2016         }
2017
2018         skb->len += size;
2019         skb->data_len += size;
2020         skb->truesize += size;
2021         refcount_add(size, &sk->sk_wmem_alloc);
2022
2023         if (newskb) {
2024                 err = unix_scm_to_skb(&scm, skb, false);
2025                 if (err)
2026                         goto err_state_unlock;
2027                 spin_lock(&other->sk_receive_queue.lock);
2028                 __skb_queue_tail(&other->sk_receive_queue, newskb);
2029                 spin_unlock(&other->sk_receive_queue.lock);
2030         }
2031
2032         unix_state_unlock(other);
2033         mutex_unlock(&unix_sk(other)->iolock);
2034
2035         other->sk_data_ready(other);
2036         scm_destroy(&scm);
2037         return size;
2038
2039 err_state_unlock:
2040         unix_state_unlock(other);
2041 err_unlock:
2042         mutex_unlock(&unix_sk(other)->iolock);
2043 err:
2044         kfree_skb(newskb);
2045         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2046                 send_sig(SIGPIPE, current, 0);
2047         if (!init_scm)
2048                 scm_destroy(&scm);
2049         return err;
2050 }
2051
2052 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2053                                   size_t len)
2054 {
2055         int err;
2056         struct sock *sk = sock->sk;
2057
2058         err = sock_error(sk);
2059         if (err)
2060                 return err;
2061
2062         if (sk->sk_state != TCP_ESTABLISHED)
2063                 return -ENOTCONN;
2064
2065         if (msg->msg_namelen)
2066                 msg->msg_namelen = 0;
2067
2068         return unix_dgram_sendmsg(sock, msg, len);
2069 }
2070
2071 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2072                                   size_t size, int flags)
2073 {
2074         struct sock *sk = sock->sk;
2075
2076         if (sk->sk_state != TCP_ESTABLISHED)
2077                 return -ENOTCONN;
2078
2079         return unix_dgram_recvmsg(sock, msg, size, flags);
2080 }
2081
2082 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2083 {
2084         struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2085
2086         if (addr) {
2087                 msg->msg_namelen = addr->len;
2088                 memcpy(msg->msg_name, addr->name, addr->len);
2089         }
2090 }
2091
2092 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2093                               size_t size, int flags)
2094 {
2095         struct scm_cookie scm;
2096         struct sock *sk = sock->sk;
2097         struct unix_sock *u = unix_sk(sk);
2098         struct sk_buff *skb, *last;
2099         long timeo;
2100         int err;
2101         int peeked, skip;
2102
2103         err = -EOPNOTSUPP;
2104         if (flags&MSG_OOB)
2105                 goto out;
2106
2107         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2108
2109         do {
2110                 mutex_lock(&u->iolock);
2111
2112                 skip = sk_peek_offset(sk, flags);
2113                 skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip,
2114                                               &err, &last);
2115                 if (skb)
2116                         break;
2117
2118                 mutex_unlock(&u->iolock);
2119
2120                 if (err != -EAGAIN)
2121                         break;
2122         } while (timeo &&
2123                  !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2124
2125         if (!skb) { /* implies iolock unlocked */
2126                 unix_state_lock(sk);
2127                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2128                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2129                     (sk->sk_shutdown & RCV_SHUTDOWN))
2130                         err = 0;
2131                 unix_state_unlock(sk);
2132                 goto out;
2133         }
2134
2135         if (wq_has_sleeper(&u->peer_wait))
2136                 wake_up_interruptible_sync_poll(&u->peer_wait,
2137                                                 EPOLLOUT | EPOLLWRNORM |
2138                                                 EPOLLWRBAND);
2139
2140         if (msg->msg_name)
2141                 unix_copy_addr(msg, skb->sk);
2142
2143         if (size > skb->len - skip)
2144                 size = skb->len - skip;
2145         else if (size < skb->len - skip)
2146                 msg->msg_flags |= MSG_TRUNC;
2147
2148         err = skb_copy_datagram_msg(skb, skip, msg, size);
2149         if (err)
2150                 goto out_free;
2151
2152         if (sock_flag(sk, SOCK_RCVTSTAMP))
2153                 __sock_recv_timestamp(msg, sk, skb);
2154
2155         memset(&scm, 0, sizeof(scm));
2156
2157         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2158         unix_set_secdata(&scm, skb);
2159
2160         if (!(flags & MSG_PEEK)) {
2161                 if (UNIXCB(skb).fp)
2162                         unix_detach_fds(&scm, skb);
2163
2164                 sk_peek_offset_bwd(sk, skb->len);
2165         } else {
2166                 /* It is questionable: on PEEK we could:
2167                    - do not return fds - good, but too simple 8)
2168                    - return fds, and do not return them on read (old strategy,
2169                      apparently wrong)
2170                    - clone fds (I chose it for now, it is the most universal
2171                      solution)
2172
2173                    POSIX 1003.1g does not actually define this clearly
2174                    at all. POSIX 1003.1g doesn't define a lot of things
2175                    clearly however!
2176
2177                 */
2178
2179                 sk_peek_offset_fwd(sk, size);
2180
2181                 if (UNIXCB(skb).fp)
2182                         scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2183         }
2184         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2185
2186         scm_recv(sock, msg, &scm, flags);
2187
2188 out_free:
2189         skb_free_datagram(sk, skb);
2190         mutex_unlock(&u->iolock);
2191 out:
2192         return err;
2193 }
2194
2195 /*
2196  *      Sleep until more data has arrived. But check for races..
2197  */
2198 static long unix_stream_data_wait(struct sock *sk, long timeo,
2199                                   struct sk_buff *last, unsigned int last_len,
2200                                   bool freezable)
2201 {
2202         struct sk_buff *tail;
2203         DEFINE_WAIT(wait);
2204
2205         unix_state_lock(sk);
2206
2207         for (;;) {
2208                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2209
2210                 tail = skb_peek_tail(&sk->sk_receive_queue);
2211                 if (tail != last ||
2212                     (tail && tail->len != last_len) ||
2213                     sk->sk_err ||
2214                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2215                     signal_pending(current) ||
2216                     !timeo)
2217                         break;
2218
2219                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2220                 unix_state_unlock(sk);
2221                 if (freezable)
2222                         timeo = freezable_schedule_timeout(timeo);
2223                 else
2224                         timeo = schedule_timeout(timeo);
2225                 unix_state_lock(sk);
2226
2227                 if (sock_flag(sk, SOCK_DEAD))
2228                         break;
2229
2230                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2231         }
2232
2233         finish_wait(sk_sleep(sk), &wait);
2234         unix_state_unlock(sk);
2235         return timeo;
2236 }
2237
2238 static unsigned int unix_skb_len(const struct sk_buff *skb)
2239 {
2240         return skb->len - UNIXCB(skb).consumed;
2241 }
2242
2243 struct unix_stream_read_state {
2244         int (*recv_actor)(struct sk_buff *, int, int,
2245                           struct unix_stream_read_state *);
2246         struct socket *socket;
2247         struct msghdr *msg;
2248         struct pipe_inode_info *pipe;
2249         size_t size;
2250         int flags;
2251         unsigned int splice_flags;
2252 };
2253
2254 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2255                                     bool freezable)
2256 {
2257         struct scm_cookie scm;
2258         struct socket *sock = state->socket;
2259         struct sock *sk = sock->sk;
2260         struct unix_sock *u = unix_sk(sk);
2261         int copied = 0;
2262         int flags = state->flags;
2263         int noblock = flags & MSG_DONTWAIT;
2264         bool check_creds = false;
2265         int target;
2266         int err = 0;
2267         long timeo;
2268         int skip;
2269         size_t size = state->size;
2270         unsigned int last_len;
2271
2272         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2273                 err = -EINVAL;
2274                 goto out;
2275         }
2276
2277         if (unlikely(flags & MSG_OOB)) {
2278                 err = -EOPNOTSUPP;
2279                 goto out;
2280         }
2281
2282         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2283         timeo = sock_rcvtimeo(sk, noblock);
2284
2285         memset(&scm, 0, sizeof(scm));
2286
2287         /* Lock the socket to prevent queue disordering
2288          * while sleeps in memcpy_tomsg
2289          */
2290         mutex_lock(&u->iolock);
2291
2292         skip = max(sk_peek_offset(sk, flags), 0);
2293
2294         do {
2295                 int chunk;
2296                 bool drop_skb;
2297                 struct sk_buff *skb, *last;
2298
2299 redo:
2300                 unix_state_lock(sk);
2301                 if (sock_flag(sk, SOCK_DEAD)) {
2302                         err = -ECONNRESET;
2303                         goto unlock;
2304                 }
2305                 last = skb = skb_peek(&sk->sk_receive_queue);
2306                 last_len = last ? last->len : 0;
2307 again:
2308                 if (skb == NULL) {
2309                         if (copied >= target)
2310                                 goto unlock;
2311
2312                         /*
2313                          *      POSIX 1003.1g mandates this order.
2314                          */
2315
2316                         err = sock_error(sk);
2317                         if (err)
2318                                 goto unlock;
2319                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2320                                 goto unlock;
2321
2322                         unix_state_unlock(sk);
2323                         if (!timeo) {
2324                                 err = -EAGAIN;
2325                                 break;
2326                         }
2327
2328                         mutex_unlock(&u->iolock);
2329
2330                         timeo = unix_stream_data_wait(sk, timeo, last,
2331                                                       last_len, freezable);
2332
2333                         if (signal_pending(current)) {
2334                                 err = sock_intr_errno(timeo);
2335                                 scm_destroy(&scm);
2336                                 goto out;
2337                         }
2338
2339                         mutex_lock(&u->iolock);
2340                         goto redo;
2341 unlock:
2342                         unix_state_unlock(sk);
2343                         break;
2344                 }
2345
2346                 while (skip >= unix_skb_len(skb)) {
2347                         skip -= unix_skb_len(skb);
2348                         last = skb;
2349                         last_len = skb->len;
2350                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2351                         if (!skb)
2352                                 goto again;
2353                 }
2354
2355                 unix_state_unlock(sk);
2356
2357                 if (check_creds) {
2358                         /* Never glue messages from different writers */
2359                         if (!unix_skb_scm_eq(skb, &scm))
2360                                 break;
2361                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2362                         /* Copy credentials */
2363                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2364                         unix_set_secdata(&scm, skb);
2365                         check_creds = true;
2366                 }
2367
2368                 /* Copy address just once */
2369                 if (state->msg && state->msg->msg_name) {
2370                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2371                                          state->msg->msg_name);
2372                         unix_copy_addr(state->msg, skb->sk);
2373                         sunaddr = NULL;
2374                 }
2375
2376                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2377                 skb_get(skb);
2378                 chunk = state->recv_actor(skb, skip, chunk, state);
2379                 drop_skb = !unix_skb_len(skb);
2380                 /* skb is only safe to use if !drop_skb */
2381                 consume_skb(skb);
2382                 if (chunk < 0) {
2383                         if (copied == 0)
2384                                 copied = -EFAULT;
2385                         break;
2386                 }
2387                 copied += chunk;
2388                 size -= chunk;
2389
2390                 if (drop_skb) {
2391                         /* the skb was touched by a concurrent reader;
2392                          * we should not expect anything from this skb
2393                          * anymore and assume it invalid - we can be
2394                          * sure it was dropped from the socket queue
2395                          *
2396                          * let's report a short read
2397                          */
2398                         err = 0;
2399                         break;
2400                 }
2401
2402                 /* Mark read part of skb as used */
2403                 if (!(flags & MSG_PEEK)) {
2404                         UNIXCB(skb).consumed += chunk;
2405
2406                         sk_peek_offset_bwd(sk, chunk);
2407
2408                         if (UNIXCB(skb).fp)
2409                                 unix_detach_fds(&scm, skb);
2410
2411                         if (unix_skb_len(skb))
2412                                 break;
2413
2414                         skb_unlink(skb, &sk->sk_receive_queue);
2415                         consume_skb(skb);
2416
2417                         if (scm.fp)
2418                                 break;
2419                 } else {
2420                         /* It is questionable, see note in unix_dgram_recvmsg.
2421                          */
2422                         if (UNIXCB(skb).fp)
2423                                 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2424
2425                         sk_peek_offset_fwd(sk, chunk);
2426
2427                         if (UNIXCB(skb).fp)
2428                                 break;
2429
2430                         skip = 0;
2431                         last = skb;
2432                         last_len = skb->len;
2433                         unix_state_lock(sk);
2434                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2435                         if (skb)
2436                                 goto again;
2437                         unix_state_unlock(sk);
2438                         break;
2439                 }
2440         } while (size);
2441
2442         mutex_unlock(&u->iolock);
2443         if (state->msg)
2444                 scm_recv(sock, state->msg, &scm, flags);
2445         else
2446                 scm_destroy(&scm);
2447 out:
2448         return copied ? : err;
2449 }
2450
2451 static int unix_stream_read_actor(struct sk_buff *skb,
2452                                   int skip, int chunk,
2453                                   struct unix_stream_read_state *state)
2454 {
2455         int ret;
2456
2457         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2458                                     state->msg, chunk);
2459         return ret ?: chunk;
2460 }
2461
2462 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2463                                size_t size, int flags)
2464 {
2465         struct unix_stream_read_state state = {
2466                 .recv_actor = unix_stream_read_actor,
2467                 .socket = sock,
2468                 .msg = msg,
2469                 .size = size,
2470                 .flags = flags
2471         };
2472
2473         return unix_stream_read_generic(&state, true);
2474 }
2475
2476 static int unix_stream_splice_actor(struct sk_buff *skb,
2477                                     int skip, int chunk,
2478                                     struct unix_stream_read_state *state)
2479 {
2480         return skb_splice_bits(skb, state->socket->sk,
2481                                UNIXCB(skb).consumed + skip,
2482                                state->pipe, chunk, state->splice_flags);
2483 }
2484
2485 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2486                                        struct pipe_inode_info *pipe,
2487                                        size_t size, unsigned int flags)
2488 {
2489         struct unix_stream_read_state state = {
2490                 .recv_actor = unix_stream_splice_actor,
2491                 .socket = sock,
2492                 .pipe = pipe,
2493                 .size = size,
2494                 .splice_flags = flags,
2495         };
2496
2497         if (unlikely(*ppos))
2498                 return -ESPIPE;
2499
2500         if (sock->file->f_flags & O_NONBLOCK ||
2501             flags & SPLICE_F_NONBLOCK)
2502                 state.flags = MSG_DONTWAIT;
2503
2504         return unix_stream_read_generic(&state, false);
2505 }
2506
2507 static int unix_shutdown(struct socket *sock, int mode)
2508 {
2509         struct sock *sk = sock->sk;
2510         struct sock *other;
2511
2512         if (mode < SHUT_RD || mode > SHUT_RDWR)
2513                 return -EINVAL;
2514         /* This maps:
2515          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2516          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2517          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2518          */
2519         ++mode;
2520
2521         unix_state_lock(sk);
2522         sk->sk_shutdown |= mode;
2523         other = unix_peer(sk);
2524         if (other)
2525                 sock_hold(other);
2526         unix_state_unlock(sk);
2527         sk->sk_state_change(sk);
2528
2529         if (other &&
2530                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2531
2532                 int peer_mode = 0;
2533
2534                 if (mode&RCV_SHUTDOWN)
2535                         peer_mode |= SEND_SHUTDOWN;
2536                 if (mode&SEND_SHUTDOWN)
2537                         peer_mode |= RCV_SHUTDOWN;
2538                 unix_state_lock(other);
2539                 other->sk_shutdown |= peer_mode;
2540                 unix_state_unlock(other);
2541                 other->sk_state_change(other);
2542                 if (peer_mode == SHUTDOWN_MASK)
2543                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2544                 else if (peer_mode & RCV_SHUTDOWN)
2545                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2546         }
2547         if (other)
2548                 sock_put(other);
2549
2550         return 0;
2551 }
2552
2553 long unix_inq_len(struct sock *sk)
2554 {
2555         struct sk_buff *skb;
2556         long amount = 0;
2557
2558         if (sk->sk_state == TCP_LISTEN)
2559                 return -EINVAL;
2560
2561         spin_lock(&sk->sk_receive_queue.lock);
2562         if (sk->sk_type == SOCK_STREAM ||
2563             sk->sk_type == SOCK_SEQPACKET) {
2564                 skb_queue_walk(&sk->sk_receive_queue, skb)
2565                         amount += unix_skb_len(skb);
2566         } else {
2567                 skb = skb_peek(&sk->sk_receive_queue);
2568                 if (skb)
2569                         amount = skb->len;
2570         }
2571         spin_unlock(&sk->sk_receive_queue.lock);
2572
2573         return amount;
2574 }
2575 EXPORT_SYMBOL_GPL(unix_inq_len);
2576
2577 long unix_outq_len(struct sock *sk)
2578 {
2579         return sk_wmem_alloc_get(sk);
2580 }
2581 EXPORT_SYMBOL_GPL(unix_outq_len);
2582
2583 static int unix_open_file(struct sock *sk)
2584 {
2585         struct path path;
2586         struct file *f;
2587         int fd;
2588
2589         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2590                 return -EPERM;
2591
2592         if (!smp_load_acquire(&unix_sk(sk)->addr))
2593                 return -ENOENT;
2594
2595         path = unix_sk(sk)->path;
2596         if (!path.dentry)
2597                 return -ENOENT;
2598
2599         path_get(&path);
2600
2601         fd = get_unused_fd_flags(O_CLOEXEC);
2602         if (fd < 0)
2603                 goto out;
2604
2605         f = dentry_open(&path, O_PATH, current_cred());
2606         if (IS_ERR(f)) {
2607                 put_unused_fd(fd);
2608                 fd = PTR_ERR(f);
2609                 goto out;
2610         }
2611
2612         fd_install(fd, f);
2613 out:
2614         path_put(&path);
2615
2616         return fd;
2617 }
2618
2619 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2620 {
2621         struct sock *sk = sock->sk;
2622         long amount = 0;
2623         int err;
2624
2625         switch (cmd) {
2626         case SIOCOUTQ:
2627                 amount = unix_outq_len(sk);
2628                 err = put_user(amount, (int __user *)arg);
2629                 break;
2630         case SIOCINQ:
2631                 amount = unix_inq_len(sk);
2632                 if (amount < 0)
2633                         err = amount;
2634                 else
2635                         err = put_user(amount, (int __user *)arg);
2636                 break;
2637         case SIOCUNIXFILE:
2638                 err = unix_open_file(sk);
2639                 break;
2640         default:
2641                 err = -ENOIOCTLCMD;
2642                 break;
2643         }
2644         return err;
2645 }
2646
2647 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2648 {
2649         struct sock *sk = sock->sk;
2650         __poll_t mask;
2651
2652         sock_poll_wait(file, sock, wait);
2653         mask = 0;
2654
2655         /* exceptional events? */
2656         if (sk->sk_err)
2657                 mask |= EPOLLERR;
2658         if (sk->sk_shutdown == SHUTDOWN_MASK)
2659                 mask |= EPOLLHUP;
2660         if (sk->sk_shutdown & RCV_SHUTDOWN)
2661                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2662
2663         /* readable? */
2664         if (!skb_queue_empty(&sk->sk_receive_queue))
2665                 mask |= EPOLLIN | EPOLLRDNORM;
2666
2667         /* Connection-based need to check for termination and startup */
2668         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2669             sk->sk_state == TCP_CLOSE)
2670                 mask |= EPOLLHUP;
2671
2672         /*
2673          * we set writable also when the other side has shut down the
2674          * connection. This prevents stuck sockets.
2675          */
2676         if (unix_writable(sk))
2677                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2678
2679         return mask;
2680 }
2681
2682 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2683                                     poll_table *wait)
2684 {
2685         struct sock *sk = sock->sk, *other;
2686         unsigned int writable;
2687         __poll_t mask;
2688
2689         sock_poll_wait(file, sock, wait);
2690         mask = 0;
2691
2692         /* exceptional events? */
2693         if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2694                 mask |= EPOLLERR |
2695                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2696
2697         if (sk->sk_shutdown & RCV_SHUTDOWN)
2698                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2699         if (sk->sk_shutdown == SHUTDOWN_MASK)
2700                 mask |= EPOLLHUP;
2701
2702         /* readable? */
2703         if (!skb_queue_empty(&sk->sk_receive_queue))
2704                 mask |= EPOLLIN | EPOLLRDNORM;
2705
2706         /* Connection-based need to check for termination and startup */
2707         if (sk->sk_type == SOCK_SEQPACKET) {
2708                 if (sk->sk_state == TCP_CLOSE)
2709                         mask |= EPOLLHUP;
2710                 /* connection hasn't started yet? */
2711                 if (sk->sk_state == TCP_SYN_SENT)
2712                         return mask;
2713         }
2714
2715         /* No write status requested, avoid expensive OUT tests. */
2716         if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2717                 return mask;
2718
2719         writable = unix_writable(sk);
2720         if (writable) {
2721                 unix_state_lock(sk);
2722
2723                 other = unix_peer(sk);
2724                 if (other && unix_peer(other) != sk &&
2725                     unix_recvq_full(other) &&
2726                     unix_dgram_peer_wake_me(sk, other))
2727                         writable = 0;
2728
2729                 unix_state_unlock(sk);
2730         }
2731
2732         if (writable)
2733                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2734         else
2735                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2736
2737         return mask;
2738 }
2739
2740 #ifdef CONFIG_PROC_FS
2741
2742 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2743
2744 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2745 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2746 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2747
2748 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2749 {
2750         unsigned long offset = get_offset(*pos);
2751         unsigned long bucket = get_bucket(*pos);
2752         struct sock *sk;
2753         unsigned long count = 0;
2754
2755         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2756                 if (sock_net(sk) != seq_file_net(seq))
2757                         continue;
2758                 if (++count == offset)
2759                         break;
2760         }
2761
2762         return sk;
2763 }
2764
2765 static struct sock *unix_next_socket(struct seq_file *seq,
2766                                      struct sock *sk,
2767                                      loff_t *pos)
2768 {
2769         unsigned long bucket;
2770
2771         while (sk > (struct sock *)SEQ_START_TOKEN) {
2772                 sk = sk_next(sk);
2773                 if (!sk)
2774                         goto next_bucket;
2775                 if (sock_net(sk) == seq_file_net(seq))
2776                         return sk;
2777         }
2778
2779         do {
2780                 sk = unix_from_bucket(seq, pos);
2781                 if (sk)
2782                         return sk;
2783
2784 next_bucket:
2785                 bucket = get_bucket(*pos) + 1;
2786                 *pos = set_bucket_offset(bucket, 1);
2787         } while (bucket < ARRAY_SIZE(unix_socket_table));
2788
2789         return NULL;
2790 }
2791
2792 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2793         __acquires(unix_table_lock)
2794 {
2795         spin_lock(&unix_table_lock);
2796
2797         if (!*pos)
2798                 return SEQ_START_TOKEN;
2799
2800         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2801                 return NULL;
2802
2803         return unix_next_socket(seq, NULL, pos);
2804 }
2805
2806 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2807 {
2808         ++*pos;
2809         return unix_next_socket(seq, v, pos);
2810 }
2811
2812 static void unix_seq_stop(struct seq_file *seq, void *v)
2813         __releases(unix_table_lock)
2814 {
2815         spin_unlock(&unix_table_lock);
2816 }
2817
2818 static int unix_seq_show(struct seq_file *seq, void *v)
2819 {
2820
2821         if (v == SEQ_START_TOKEN)
2822                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2823                          "Inode Path\n");
2824         else {
2825                 struct sock *s = v;
2826                 struct unix_sock *u = unix_sk(s);
2827                 unix_state_lock(s);
2828
2829                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2830                         s,
2831                         refcount_read(&s->sk_refcnt),
2832                         0,
2833                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2834                         s->sk_type,
2835                         s->sk_socket ?
2836                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2837                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2838                         sock_i_ino(s));
2839
2840                 if (u->addr) {  // under unix_table_lock here
2841                         int i, len;
2842                         seq_putc(seq, ' ');
2843
2844                         i = 0;
2845                         len = u->addr->len - sizeof(short);
2846                         if (!UNIX_ABSTRACT(s))
2847                                 len--;
2848                         else {
2849                                 seq_putc(seq, '@');
2850                                 i++;
2851                         }
2852                         for ( ; i < len; i++)
2853                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
2854                                          '@');
2855                 }
2856                 unix_state_unlock(s);
2857                 seq_putc(seq, '\n');
2858         }
2859
2860         return 0;
2861 }
2862
2863 static const struct seq_operations unix_seq_ops = {
2864         .start  = unix_seq_start,
2865         .next   = unix_seq_next,
2866         .stop   = unix_seq_stop,
2867         .show   = unix_seq_show,
2868 };
2869 #endif
2870
2871 static const struct net_proto_family unix_family_ops = {
2872         .family = PF_UNIX,
2873         .create = unix_create,
2874         .owner  = THIS_MODULE,
2875 };
2876
2877
2878 static int __net_init unix_net_init(struct net *net)
2879 {
2880         int error = -ENOMEM;
2881
2882         net->unx.sysctl_max_dgram_qlen = 10;
2883         if (unix_sysctl_register(net))
2884                 goto out;
2885
2886 #ifdef CONFIG_PROC_FS
2887         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2888                         sizeof(struct seq_net_private))) {
2889                 unix_sysctl_unregister(net);
2890                 goto out;
2891         }
2892 #endif
2893         error = 0;
2894 out:
2895         return error;
2896 }
2897
2898 static void __net_exit unix_net_exit(struct net *net)
2899 {
2900         unix_sysctl_unregister(net);
2901         remove_proc_entry("unix", net->proc_net);
2902 }
2903
2904 static struct pernet_operations unix_net_ops = {
2905         .init = unix_net_init,
2906         .exit = unix_net_exit,
2907 };
2908
2909 static int __init af_unix_init(void)
2910 {
2911         int rc = -1;
2912
2913         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2914
2915         rc = proto_register(&unix_proto, 1);
2916         if (rc != 0) {
2917                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2918                 goto out;
2919         }
2920
2921         sock_register(&unix_family_ops);
2922         register_pernet_subsys(&unix_net_ops);
2923 out:
2924         return rc;
2925 }
2926
2927 static void __exit af_unix_exit(void)
2928 {
2929         sock_unregister(PF_UNIX);
2930         proto_unregister(&unix_proto);
2931         unregister_pernet_subsys(&unix_net_ops);
2932 }
2933
2934 /* Earlier than device_initcall() so that other drivers invoking
2935    request_module() don't end up in a loop when modprobe tries
2936    to use a UNIX socket. But later than subsys_initcall() because
2937    we depend on stuff initialised there */
2938 fs_initcall(af_unix_init);
2939 module_exit(af_unix_exit);
2940
2941 MODULE_LICENSE("GPL");
2942 MODULE_ALIAS_NETPROTO(PF_UNIX);