net/unix/af_unix.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * NET4:        Implementation of BSD Unix domain sockets.
   4  *
   5  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   6  *
   7  * Fixes:
   8  *              Linus Torvalds  :       Assorted bug cures.
   9  *              Niibe Yutaka    :       async I/O support.
  10  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  11  *              Alan Cox        :       Limit size of allocated blocks.
  12  *              Alan Cox        :       Fixed the stupid socketpair bug.
  13  *              Alan Cox        :       BSD compatibility fine tuning.
  14  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  15  *              Alan Cox        :       Sorted out a proper draft version of
  16  *                                      file descriptor passing hacked up from
  17  *                                      Mike Shaver's work.
  18  *              Marty Leisner   :       Fixes to fd passing
  19  *              Nick Nevin      :       recvmsg bugfix.
  20  *              Alan Cox        :       Started proper garbage collector
  21  *              Heiko EiBfeldt  :       Missing verify_area check
  22  *              Alan Cox        :       Started POSIXisms
  23  *              Andreas Schwab  :       Replace inode by dentry for proper
  24  *                                      reference counting
  25  *              Kirk Petersen   :       Made this a module
  26  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  27  *                                      Lots of bug fixes.
  28  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  29  *                                      by above two patches.
  30  *           Andrea Arcangeli   :       If possible we block in connect(2)
  31  *                                      if the max backlog of the listen socket
  32  *                                      is been reached. This won't break
  33  *                                      old apps and it will avoid huge amount
  34  *                                      of socks hashed (this for unix_gc()
  35  *                                      performances reasons).
  36  *                                      Security fix that limits the max
  37  *                                      number of socks to 2*max_files and
  38  *                                      the number of skb queueable in the
  39  *                                      dgram receiver.
  40  *              Artur Skawina   :       Hash function optimizations
  41  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  42  *            Malcolm Beattie   :       Set peercred for socketpair
  43  *           Michal Ostrowski   :       Module initialization cleanup.
  44  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  45  *                                      the core infrastructure is doing that
  46  *                                      for all net proto families now (2.5.69+)
  47  *
  48  * Known differences from reference BSD that was tested:
  49  *
  50  *      [TO FIX]
  51  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  52  *              other the moment one end closes.
  53  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  54  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  55  *      [NOT TO FIX]
  56  *      accept() returns a path name even if the connecting socket has closed
  57  *              in the meantime (BSD loses the path and gives up).
  58  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  59  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  60  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  61  *      BSD af_unix apparently has connect forgetting to block properly.
  62  *              (need to check this with the POSIX spec in detail)
  63  *
  64  * Differences from 2.0.0-11-... (ANK)
  65  *      Bug fixes and improvements.
  66  *              - client shutdown killed server socket.
  67  *              - removed all useless cli/sti pairs.
  68  *
  69  *      Semantic changes/extensions.
  70  *              - generic control message passing.
  71  *              - SCM_CREDENTIALS control message.
  72  *              - "Abstract" (not FS based) socket bindings.
  73  *                Abstract names are sequences of bytes (not zero terminated)
  74  *                started by 0, so that this name space does not intersect
  75  *                with BSD names.
  76  */
  77
  78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  79
  80 #include <linux/module.h>
  81 #include <linux/kernel.h>
  82 #include <linux/signal.h>
  83 #include <linux/sched/signal.h>
  84 #include <linux/errno.h>
  85 #include <linux/string.h>
  86 #include <linux/stat.h>
  87 #include <linux/dcache.h>
  88 #include <linux/namei.h>
  89 #include <linux/socket.h>
  90 #include <linux/un.h>
  91 #include <linux/fcntl.h>
  92 #include <linux/termios.h>
  93 #include <linux/sockios.h>
  94 #include <linux/net.h>
  95 #include <linux/in.h>
  96 #include <linux/fs.h>
  97 #include <linux/slab.h>
  98 #include <linux/uaccess.h>
  99 #include <linux/skbuff.h>
 100 #include <linux/netdevice.h>
 101 #include <net/net_namespace.h>
 102 #include <net/sock.h>
 103 #include <net/tcp_states.h>
 104 #include <net/af_unix.h>
 105 #include <linux/proc_fs.h>
 106 #include <linux/seq_file.h>
 107 #include <net/scm.h>
 108 #include <linux/init.h>
 109 #include <linux/poll.h>
 110 #include <linux/rtnetlink.h>
 111 #include <linux/mount.h>
 112 #include <net/checksum.h>
 113 #include <linux/security.h>
 114 #include <linux/freezer.h>
 115 #include <linux/file.h>
 116
 117 #include "scm.h"
 118
 119 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 120 EXPORT_SYMBOL_GPL(unix_socket_table);
 121 DEFINE_SPINLOCK(unix_table_lock);
 122 EXPORT_SYMBOL_GPL(unix_table_lock);
 123 static atomic_long_t unix_nr_socks;
 124
 125
 126 static struct hlist_head *unix_sockets_unbound(void *addr)
 127 {
 128         unsigned long hash = (unsigned long)addr;
 129
 130         hash ^= hash >> 16;
 131         hash ^= hash >> 8;
 132         hash %= UNIX_HASH_SIZE;
 133         return &unix_socket_table[UNIX_HASH_SIZE + hash];
 134 }
 135
 136 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 137
 138 #ifdef CONFIG_SECURITY_NETWORK
 139 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 140 {
 141         UNIXCB(skb).secid = scm->secid;
 142 }
 143
 144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 145 {
 146         scm->secid = UNIXCB(skb).secid;
 147 }
 148
 149 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 150 {
 151         return (scm->secid == UNIXCB(skb).secid);
 152 }
 153 #else
 154 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 155 { }
 156
 157 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 158 { }
 159
 160 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 161 {
 162         return true;
 163 }
 164 #endif /* CONFIG_SECURITY_NETWORK */
 165
 166 /*
 167  *  SMP locking strategy:
 168  *    hash table is protected with spinlock unix_table_lock
 169  *    each socket state is protected by separate spin lock.
 170  */
 171
 172 static inline unsigned int unix_hash_fold(__wsum n)
 173 {
 174         unsigned int hash = (__force unsigned int)csum_fold(n);
 175
 176         hash ^= hash>>8;
 177         return hash&(UNIX_HASH_SIZE-1);
 178 }
 179
 180 #define unix_peer(sk) (unix_sk(sk)->peer)
 181
 182 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 183 {
 184         return unix_peer(osk) == sk;
 185 }
 186
 187 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 188 {
 189         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 190 }
 191
 192 static inline int unix_recvq_full(struct sock const *sk)
 193 {
 194         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 195 }
 196
 197 struct sock *unix_peer_get(struct sock *s)
 198 {
 199         struct sock *peer;
 200
 201         unix_state_lock(s);
 202         peer = unix_peer(s);
 203         if (peer)
 204                 sock_hold(peer);
 205         unix_state_unlock(s);
 206         return peer;
 207 }
 208 EXPORT_SYMBOL_GPL(unix_peer_get);
 209
 210 static inline void unix_release_addr(struct unix_address *addr)
 211 {
 212         if (refcount_dec_and_test(&addr->refcnt))
 213                 kfree(addr);
 214 }
 215
 216 /*
 217  *      Check unix socket name:
 218  *              - should be not zero length.
 219  *              - if started by not zero, should be NULL terminated (FS object)
 220  *              - if started by zero, it is abstract name.
 221  */
 222
 223 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 224 {
 225         *hashp = 0;
 226
 227         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 228                 return -EINVAL;
 229         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 230                 return -EINVAL;
 231         if (sunaddr->sun_path[0]) {
 232                 /*
 233                  * This may look like an off by one error but it is a bit more
 234                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 235                  * sun_path[108] doesn't as such exist.  However in kernel space
 236                  * we are guaranteed that it is a valid memory location in our
 237                  * kernel address buffer.
 238                  */
 239                 ((char *)sunaddr)[len] = 0;
 240                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 241                 return len;
 242         }
 243
 244         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 245         return len;
 246 }
 247
 248 static void __unix_remove_socket(struct sock *sk)
 249 {
 250         sk_del_node_init(sk);
 251 }
 252
 253 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 254 {
 255         WARN_ON(!sk_unhashed(sk));
 256         sk_add_node(sk, list);
 257 }
 258
 259 static inline void unix_remove_socket(struct sock *sk)
 260 {
 261         spin_lock(&unix_table_lock);
 262         __unix_remove_socket(sk);
 263         spin_unlock(&unix_table_lock);
 264 }
 265
 266 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 267 {
 268         spin_lock(&unix_table_lock);
 269         __unix_insert_socket(list, sk);
 270         spin_unlock(&unix_table_lock);
 271 }
 272
 273 static struct sock *__unix_find_socket_byname(struct net *net,
 274                                               struct sockaddr_un *sunname,
 275                                               int len, int type, unsigned int hash)
 276 {
 277         struct sock *s;
 278
 279         sk_for_each(s, &unix_socket_table[hash ^ type]) {
 280                 struct unix_sock *u = unix_sk(s);
 281
 282                 if (!net_eq(sock_net(s), net))
 283                         continue;
 284
 285                 if (u->addr->len == len &&
 286                     !memcmp(u->addr->name, sunname, len))
 287                         return s;
 288         }
 289         return NULL;
 290 }
 291
 292 static inline struct sock *unix_find_socket_byname(struct net *net,
 293                                                    struct sockaddr_un *sunname,
 294                                                    int len, int type,
 295                                                    unsigned int hash)
 296 {
 297         struct sock *s;
 298
 299         spin_lock(&unix_table_lock);
 300         s = __unix_find_socket_byname(net, sunname, len, type, hash);
 301         if (s)
 302                 sock_hold(s);
 303         spin_unlock(&unix_table_lock);
 304         return s;
 305 }
 306
 307 static struct sock *unix_find_socket_byinode(struct inode *i)
 308 {
 309         struct sock *s;
 310
 311         spin_lock(&unix_table_lock);
 312         sk_for_each(s,
 313                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 314                 struct dentry *dentry = unix_sk(s)->path.dentry;
 315
 316                 if (dentry && d_backing_inode(dentry) == i) {
 317                         sock_hold(s);
 318                         goto found;
 319                 }
 320         }
 321         s = NULL;
 322 found:
 323         spin_unlock(&unix_table_lock);
 324         return s;
 325 }
 326
 327 /* Support code for asymmetrically connected dgram sockets
 328  *
 329  * If a datagram socket is connected to a socket not itself connected
 330  * to the first socket (eg, /dev/log), clients may only enqueue more
 331  * messages if the present receive queue of the server socket is not
 332  * "too large". This means there's a second writeability condition
 333  * poll and sendmsg need to test. The dgram recv code will do a wake
 334  * up on the peer_wait wait queue of a socket upon reception of a
 335  * datagram which needs to be propagated to sleeping would-be writers
 336  * since these might not have sent anything so far. This can't be
 337  * accomplished via poll_wait because the lifetime of the server
 338  * socket might be less than that of its clients if these break their
 339  * association with it or if the server socket is closed while clients
 340  * are still connected to it and there's no way to inform "a polling
 341  * implementation" that it should let go of a certain wait queue
 342  *
 343  * In order to propagate a wake up, a wait_queue_entry_t of the client
 344  * socket is enqueued on the peer_wait queue of the server socket
 345  * whose wake function does a wake_up on the ordinary client socket
 346  * wait queue. This connection is established whenever a write (or
 347  * poll for write) hit the flow control condition and broken when the
 348  * association to the server socket is dissolved or after a wake up
 349  * was relayed.
 350  */
 351
 352 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 353                                       void *key)
 354 {
 355         struct unix_sock *u;
 356         wait_queue_head_t *u_sleep;
 357
 358         u = container_of(q, struct unix_sock, peer_wake);
 359
 360         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 361                             q);
 362         u->peer_wake.private = NULL;
 363
 364         /* relaying can only happen while the wq still exists */
 365         u_sleep = sk_sleep(&u->sk);
 366         if (u_sleep)
 367                 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
 368
 369         return 0;
 370 }
 371
 372 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 373 {
 374         struct unix_sock *u, *u_other;
 375         int rc;
 376
 377         u = unix_sk(sk);
 378         u_other = unix_sk(other);
 379         rc = 0;
 380         spin_lock(&u_other->peer_wait.lock);
 381
 382         if (!u->peer_wake.private) {
 383                 u->peer_wake.private = other;
 384                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 385
 386                 rc = 1;
 387         }
 388
 389         spin_unlock(&u_other->peer_wait.lock);
 390         return rc;
 391 }
 392
 393 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 394                                             struct sock *other)
 395 {
 396         struct unix_sock *u, *u_other;
 397
 398         u = unix_sk(sk);
 399         u_other = unix_sk(other);
 400         spin_lock(&u_other->peer_wait.lock);
 401
 402         if (u->peer_wake.private == other) {
 403                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 404                 u->peer_wake.private = NULL;
 405         }
 406
 407         spin_unlock(&u_other->peer_wait.lock);
 408 }
 409
 410 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 411                                                    struct sock *other)
 412 {
 413         unix_dgram_peer_wake_disconnect(sk, other);
 414         wake_up_interruptible_poll(sk_sleep(sk),
 415                                    EPOLLOUT |
 416                                    EPOLLWRNORM |
 417                                    EPOLLWRBAND);
 418 }
 419
 420 /* preconditions:
 421  *      - unix_peer(sk) == other
 422  *      - association is stable
 423  */
 424 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 425 {
 426         int connected;
 427
 428         connected = unix_dgram_peer_wake_connect(sk, other);
 429
 430         /* If other is SOCK_DEAD, we want to make sure we signal
 431          * POLLOUT, such that a subsequent write() can get a
 432          * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
 433          * to other and its full, we will hang waiting for POLLOUT.
 434          */
 435         if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
 436                 return 1;
 437
 438         if (connected)
 439                 unix_dgram_peer_wake_disconnect(sk, other);
 440
 441         return 0;
 442 }
 443
 444 static int unix_writable(const struct sock *sk)
 445 {
 446         return sk->sk_state != TCP_LISTEN &&
 447                (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 448 }
 449
 450 static void unix_write_space(struct sock *sk)
 451 {
 452         struct socket_wq *wq;
 453
 454         rcu_read_lock();
 455         if (unix_writable(sk)) {
 456                 wq = rcu_dereference(sk->sk_wq);
 457                 if (skwq_has_sleeper(wq))
 458                         wake_up_interruptible_sync_poll(&wq->wait,
 459                                 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
 460                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 461         }
 462         rcu_read_unlock();
 463 }
 464
 465 /* When dgram socket disconnects (or changes its peer), we clear its receive
 466  * queue of packets arrived from previous peer. First, it allows to do
 467  * flow control based only on wmem_alloc; second, sk connected to peer
 468  * may receive messages only from that peer. */
 469 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 470 {
 471         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 472                 skb_queue_purge(&sk->sk_receive_queue);
 473                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 474
 475                 /* If one link of bidirectional dgram pipe is disconnected,
 476                  * we signal error. Messages are lost. Do not make this,
 477                  * when peer was not connected to us.
 478                  */
 479                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 480                         other->sk_err = ECONNRESET;
 481                         other->sk_error_report(other);
 482                 }
 483         }
 484 }
 485
 486 static void unix_sock_destructor(struct sock *sk)
 487 {
 488         struct unix_sock *u = unix_sk(sk);
 489
 490         skb_queue_purge(&sk->sk_receive_queue);
 491
 492         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 493         WARN_ON(!sk_unhashed(sk));
 494         WARN_ON(sk->sk_socket);
 495         if (!sock_flag(sk, SOCK_DEAD)) {
 496                 pr_info("Attempt to release alive unix socket: %p\n", sk);
 497                 return;
 498         }
 499
 500         if (u->addr)
 501                 unix_release_addr(u->addr);
 502
 503         atomic_long_dec(&unix_nr_socks);
 504         local_bh_disable();
 505         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 506         local_bh_enable();
 507 #ifdef UNIX_REFCNT_DEBUG
 508         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 509                 atomic_long_read(&unix_nr_socks));
 510 #endif
 511 }
 512
 513 static void unix_release_sock(struct sock *sk, int embrion)
 514 {
 515         struct unix_sock *u = unix_sk(sk);
 516         struct path path;
 517         struct sock *skpair;
 518         struct sk_buff *skb;
 519         int state;
 520
 521         unix_remove_socket(sk);
 522
 523         /* Clear state */
 524         unix_state_lock(sk);
 525         sock_orphan(sk);
 526         sk->sk_shutdown = SHUTDOWN_MASK;
 527         path         = u->path;
 528         u->path.dentry = NULL;
 529         u->path.mnt = NULL;
 530         state = sk->sk_state;
 531         sk->sk_state = TCP_CLOSE;
 532         unix_state_unlock(sk);
 533
 534         wake_up_interruptible_all(&u->peer_wait);
 535
 536         skpair = unix_peer(sk);
 537
 538         if (skpair != NULL) {
 539                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 540                         unix_state_lock(skpair);
 541                         /* No more writes */
 542                         skpair->sk_shutdown = SHUTDOWN_MASK;
 543                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 544                                 skpair->sk_err = ECONNRESET;
 545                         unix_state_unlock(skpair);
 546                         skpair->sk_state_change(skpair);
 547                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 548                 }
 549
 550                 unix_dgram_peer_wake_disconnect(sk, skpair);
 551                 sock_put(skpair); /* It may now die */
 552                 unix_peer(sk) = NULL;
 553         }
 554
 555         /* Try to flush out this socket. Throw out buffers at least */
 556
 557         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 558                 if (state == TCP_LISTEN)
 559                         unix_release_sock(skb->sk, 1);
 560                 /* passed fds are erased in the kfree_skb hook        */
 561                 UNIXCB(skb).consumed = skb->len;
 562                 kfree_skb(skb);
 563         }
 564
 565         if (path.dentry)
 566                 path_put(&path);
 567
 568         sock_put(sk);
 569
 570         /* ---- Socket is dead now and most probably destroyed ---- */
 571
 572         /*
 573          * Fixme: BSD difference: In BSD all sockets connected to us get
 574          *        ECONNRESET and we die on the spot. In Linux we behave
 575          *        like files and pipes do and wait for the last
 576          *        dereference.
 577          *
 578          * Can't we simply set sock->err?
 579          *
 580          *        What the above comment does talk about? --ANK(980817)
 581          */
 582
 583         if (unix_tot_inflight)
 584                 unix_gc();              /* Garbage collect fds */
 585 }
 586
 587 static void init_peercred(struct sock *sk)
 588 {
 589         put_pid(sk->sk_peer_pid);
 590         if (sk->sk_peer_cred)
 591                 put_cred(sk->sk_peer_cred);
 592         sk->sk_peer_pid  = get_pid(task_tgid(current));
 593         sk->sk_peer_cred = get_current_cred();
 594 }
 595
 596 static void copy_peercred(struct sock *sk, struct sock *peersk)
 597 {
 598         put_pid(sk->sk_peer_pid);
 599         if (sk->sk_peer_cred)
 600                 put_cred(sk->sk_peer_cred);
 601         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 602         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 603 }
 604
 605 static int unix_listen(struct socket *sock, int backlog)
 606 {
 607         int err;
 608         struct sock *sk = sock->sk;
 609         struct unix_sock *u = unix_sk(sk);
 610         struct pid *old_pid = NULL;
 611
 612         err = -EOPNOTSUPP;
 613         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 614                 goto out;       /* Only stream/seqpacket sockets accept */
 615         err = -EINVAL;
 616         if (!u->addr)
 617                 goto out;       /* No listens on an unbound socket */
 618         unix_state_lock(sk);
 619         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 620                 goto out_unlock;
 621         if (backlog > sk->sk_max_ack_backlog)
 622                 wake_up_interruptible_all(&u->peer_wait);
 623         sk->sk_max_ack_backlog  = backlog;
 624         sk->sk_state            = TCP_LISTEN;
 625         /* set credentials so connect can copy them */
 626         init_peercred(sk);
 627         err = 0;
 628
 629 out_unlock:
 630         unix_state_unlock(sk);
 631         put_pid(old_pid);
 632 out:
 633         return err;
 634 }
 635
 636 static int unix_release(struct socket *);
 637 static int unix_bind(struct socket *, struct sockaddr *, int);
 638 static int unix_stream_connect(struct socket *, struct sockaddr *,
 639                                int addr_len, int flags);
 640 static int unix_socketpair(struct socket *, struct socket *);
 641 static int unix_accept(struct socket *, struct socket *, int, bool);
 642 static int unix_getname(struct socket *, struct sockaddr *, int);
 643 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
 644 static __poll_t unix_dgram_poll(struct file *, struct socket *,
 645                                     poll_table *);
 646 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 647 #ifdef CONFIG_COMPAT
 648 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 649 #endif
 650 static int unix_shutdown(struct socket *, int);
 651 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 652 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 653 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 654                                     size_t size, int flags);
 655 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 656                                        struct pipe_inode_info *, size_t size,
 657                                        unsigned int flags);
 658 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 659 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 660 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 661                               int, int);
 662 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 663 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 664                                   int);
 665
 666 static int unix_set_peek_off(struct sock *sk, int val)
 667 {
 668         struct unix_sock *u = unix_sk(sk);
 669
 670         if (mutex_lock_interruptible(&u->iolock))
 671                 return -EINTR;
 672
 673         sk->sk_peek_off = val;
 674         mutex_unlock(&u->iolock);
 675
 676         return 0;
 677 }
 678
 679
 680 static const struct proto_ops unix_stream_ops = {
 681         .family =       PF_UNIX,
 682         .owner =        THIS_MODULE,
 683         .release =      unix_release,
 684         .bind =         unix_bind,
 685         .connect =      unix_stream_connect,
 686         .socketpair =   unix_socketpair,
 687         .accept =       unix_accept,
 688         .getname =      unix_getname,
 689         .poll =         unix_poll,
 690         .ioctl =        unix_ioctl,
 691 #ifdef CONFIG_COMPAT
 692         .compat_ioctl = unix_compat_ioctl,
 693 #endif
 694         .listen =       unix_listen,
 695         .shutdown =     unix_shutdown,
 696         .setsockopt =   sock_no_setsockopt,
 697         .getsockopt =   sock_no_getsockopt,
 698         .sendmsg =      unix_stream_sendmsg,
 699         .recvmsg =      unix_stream_recvmsg,
 700         .mmap =         sock_no_mmap,
 701         .sendpage =     unix_stream_sendpage,
 702         .splice_read =  unix_stream_splice_read,
 703         .set_peek_off = unix_set_peek_off,
 704 };
 705
 706 static const struct proto_ops unix_dgram_ops = {
 707         .family =       PF_UNIX,
 708         .owner =        THIS_MODULE,
 709         .release =      unix_release,
 710         .bind =         unix_bind,
 711         .connect =      unix_dgram_connect,
 712         .socketpair =   unix_socketpair,
 713         .accept =       sock_no_accept,
 714         .getname =      unix_getname,
 715         .poll =         unix_dgram_poll,
 716         .ioctl =        unix_ioctl,
 717 #ifdef CONFIG_COMPAT
 718         .compat_ioctl = unix_compat_ioctl,
 719 #endif
 720         .listen =       sock_no_listen,
 721         .shutdown =     unix_shutdown,
 722         .setsockopt =   sock_no_setsockopt,
 723         .getsockopt =   sock_no_getsockopt,
 724         .sendmsg =      unix_dgram_sendmsg,
 725         .recvmsg =      unix_dgram_recvmsg,
 726         .mmap =         sock_no_mmap,
 727         .sendpage =     sock_no_sendpage,
 728         .set_peek_off = unix_set_peek_off,
 729 };
 730
 731 static const struct proto_ops unix_seqpacket_ops = {
 732         .family =       PF_UNIX,
 733         .owner =        THIS_MODULE,
 734         .release =      unix_release,
 735         .bind =         unix_bind,
 736         .connect =      unix_stream_connect,
 737         .socketpair =   unix_socketpair,
 738         .accept =       unix_accept,
 739         .getname =      unix_getname,
 740         .poll =         unix_dgram_poll,
 741         .ioctl =        unix_ioctl,
 742 #ifdef CONFIG_COMPAT
 743         .compat_ioctl = unix_compat_ioctl,
 744 #endif
 745         .listen =       unix_listen,
 746         .shutdown =     unix_shutdown,
 747         .setsockopt =   sock_no_setsockopt,
 748         .getsockopt =   sock_no_getsockopt,
 749         .sendmsg =      unix_seqpacket_sendmsg,
 750         .recvmsg =      unix_seqpacket_recvmsg,
 751         .mmap =         sock_no_mmap,
 752         .sendpage =     sock_no_sendpage,
 753         .set_peek_off = unix_set_peek_off,
 754 };
 755
 756 static struct proto unix_proto = {
 757         .name                   = "UNIX",
 758         .owner                  = THIS_MODULE,
 759         .obj_size               = sizeof(struct unix_sock),
 760 };
 761
 762 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 763 {
 764         struct sock *sk = NULL;
 765         struct unix_sock *u;
 766
 767         atomic_long_inc(&unix_nr_socks);
 768         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 769                 goto out;
 770
 771         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
 772         if (!sk)
 773                 goto out;
 774
 775         sock_init_data(sock, sk);
 776
 777         sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 778         sk->sk_write_space      = unix_write_space;
 779         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 780         sk->sk_destruct         = unix_sock_destructor;
 781         u         = unix_sk(sk);
 782         u->path.dentry = NULL;
 783         u->path.mnt = NULL;
 784         spin_lock_init(&u->lock);
 785         atomic_long_set(&u->inflight, 0);
 786         INIT_LIST_HEAD(&u->link);
 787         mutex_init(&u->iolock); /* single task reading lock */
 788         mutex_init(&u->bindlock); /* single task binding lock */
 789         init_waitqueue_head(&u->peer_wait);
 790         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 791         unix_insert_socket(unix_sockets_unbound(sk), sk);
 792 out:
 793         if (sk == NULL)
 794                 atomic_long_dec(&unix_nr_socks);
 795         else {
 796                 local_bh_disable();
 797                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 798                 local_bh_enable();
 799         }
 800         return sk;
 801 }
 802
 803 static int unix_create(struct net *net, struct socket *sock, int protocol,
 804                        int kern)
 805 {
 806         if (protocol && protocol != PF_UNIX)
 807                 return -EPROTONOSUPPORT;
 808
 809         sock->state = SS_UNCONNECTED;
 810
 811         switch (sock->type) {
 812         case SOCK_STREAM:
 813                 sock->ops = &unix_stream_ops;
 814                 break;
 815                 /*
 816                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 817                  *      nothing uses it.
 818                  */
 819         case SOCK_RAW:
 820                 sock->type = SOCK_DGRAM;
 821                 /* fall through */
 822         case SOCK_DGRAM:
 823                 sock->ops = &unix_dgram_ops;
 824                 break;
 825         case SOCK_SEQPACKET:
 826                 sock->ops = &unix_seqpacket_ops;
 827                 break;
 828         default:
 829                 return -ESOCKTNOSUPPORT;
 830         }
 831
 832         return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
 833 }
 834
 835 static int unix_release(struct socket *sock)
 836 {
 837         struct sock *sk = sock->sk;
 838
 839         if (!sk)
 840                 return 0;
 841
 842         unix_release_sock(sk, 0);
 843         sock->sk = NULL;
 844
 845         return 0;
 846 }
 847
 848 static int unix_autobind(struct socket *sock)
 849 {
 850         struct sock *sk = sock->sk;
 851         struct net *net = sock_net(sk);
 852         struct unix_sock *u = unix_sk(sk);
 853         static u32 ordernum = 1;
 854         struct unix_address *addr;
 855         int err;
 856         unsigned int retries = 0;
 857
 858         err = mutex_lock_interruptible(&u->bindlock);
 859         if (err)
 860                 return err;
 861
 862         err = 0;
 863         if (u->addr)
 864                 goto out;
 865
 866         err = -ENOMEM;
 867         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 868         if (!addr)
 869                 goto out;
 870
 871         addr->name->sun_family = AF_UNIX;
 872         refcount_set(&addr->refcnt, 1);
 873
 874 retry:
 875         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 876         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 877
 878         spin_lock(&unix_table_lock);
 879         ordernum = (ordernum+1)&0xFFFFF;
 880
 881         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 882                                       addr->hash)) {
 883                 spin_unlock(&unix_table_lock);
 884                 /*
 885                  * __unix_find_socket_byname() may take long time if many names
 886                  * are already in use.
 887                  */
 888                 cond_resched();
 889                 /* Give up if all names seems to be in use. */
 890                 if (retries++ == 0xFFFFF) {
 891                         err = -ENOSPC;
 892                         kfree(addr);
 893                         goto out;
 894                 }
 895                 goto retry;
 896         }
 897         addr->hash ^= sk->sk_type;
 898
 899         __unix_remove_socket(sk);
 900         smp_store_release(&u->addr, addr);
 901         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 902         spin_unlock(&unix_table_lock);
 903         err = 0;
 904
 905 out:    mutex_unlock(&u->bindlock);
 906         return err;
 907 }
 908
 909 static struct sock *unix_find_other(struct net *net,
 910                                     struct sockaddr_un *sunname, int len,
 911                                     int type, unsigned int hash, int *error)
 912 {
 913         struct sock *u;
 914         struct path path;
 915         int err = 0;
 916
 917         if (sunname->sun_path[0]) {
 918                 struct inode *inode;
 919                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 920                 if (err)
 921                         goto fail;
 922                 inode = d_backing_inode(path.dentry);
 923                 err = inode_permission(inode, MAY_WRITE);
 924                 if (err)
 925                         goto put_fail;
 926
 927                 err = -ECONNREFUSED;
 928                 if (!S_ISSOCK(inode->i_mode))
 929                         goto put_fail;
 930                 u = unix_find_socket_byinode(inode);
 931                 if (!u)
 932                         goto put_fail;
 933
 934                 if (u->sk_type == type)
 935                         touch_atime(&path);
 936
 937                 path_put(&path);
 938
 939                 err = -EPROTOTYPE;
 940                 if (u->sk_type != type) {
 941                         sock_put(u);
 942                         goto fail;
 943                 }
 944         } else {
 945                 err = -ECONNREFUSED;
 946                 u = unix_find_socket_byname(net, sunname, len, type, hash);
 947                 if (u) {
 948                         struct dentry *dentry;
 949                         dentry = unix_sk(u)->path.dentry;
 950                         if (dentry)
 951                                 touch_atime(&unix_sk(u)->path);
 952                 } else
 953                         goto fail;
 954         }
 955         return u;
 956
 957 put_fail:
 958         path_put(&path);
 959 fail:
 960         *error = err;
 961         return NULL;
 962 }
 963
 964 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 965 {
 966         struct dentry *dentry;
 967         struct path path;
 968         int err = 0;
 969         /*
 970          * Get the parent directory, calculate the hash for last
 971          * component.
 972          */
 973         dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 974         err = PTR_ERR(dentry);
 975         if (IS_ERR(dentry))
 976                 return err;
 977
 978         /*
 979          * All right, let's create it.
 980          */
 981         err = security_path_mknod(&path, dentry, mode, 0);
 982         if (!err) {
 983                 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
 984                 if (!err) {
 985                         res->mnt = mntget(path.mnt);
 986                         res->dentry = dget(dentry);
 987                 }
 988         }
 989         done_path_create(&path, dentry);
 990         return err;
 991 }
 992
 993 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 994 {
 995         struct sock *sk = sock->sk;
 996         struct net *net = sock_net(sk);
 997         struct unix_sock *u = unix_sk(sk);
 998         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 999         char *sun_path = sunaddr->sun_path;
1000         int err;
1001         unsigned int hash;
1002         struct unix_address *addr;
1003         struct hlist_head *list;
1004         struct path path = { };
1005
1006         err = -EINVAL;
1007         if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1008             sunaddr->sun_family != AF_UNIX)
1009                 goto out;
1010
1011         if (addr_len == sizeof(short)) {
1012                 err = unix_autobind(sock);
1013                 goto out;
1014         }
1015
1016         err = unix_mkname(sunaddr, addr_len, &hash);
1017         if (err < 0)
1018                 goto out;
1019         addr_len = err;
1020
1021         if (sun_path[0]) {
1022                 umode_t mode = S_IFSOCK |
1023                        (SOCK_INODE(sock)->i_mode & ~current_umask());
1024                 err = unix_mknod(sun_path, mode, &path);
1025                 if (err) {
1026                         if (err == -EEXIST)
1027                                 err = -EADDRINUSE;
1028                         goto out;
1029                 }
1030         }
1031
1032         err = mutex_lock_interruptible(&u->bindlock);
1033         if (err)
1034                 goto out_put;
1035
1036         err = -EINVAL;
1037         if (u->addr)
1038                 goto out_up;
1039
1040         err = -ENOMEM;
1041         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1042         if (!addr)
1043                 goto out_up;
1044
1045         memcpy(addr->name, sunaddr, addr_len);
1046         addr->len = addr_len;
1047         addr->hash = hash ^ sk->sk_type;
1048         refcount_set(&addr->refcnt, 1);
1049
1050         if (sun_path[0]) {
1051                 addr->hash = UNIX_HASH_SIZE;
1052                 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1053                 spin_lock(&unix_table_lock);
1054                 u->path = path;
1055                 list = &unix_socket_table[hash];
1056         } else {
1057                 spin_lock(&unix_table_lock);
1058                 err = -EADDRINUSE;
1059                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1060                                               sk->sk_type, hash)) {
1061                         unix_release_addr(addr);
1062                         goto out_unlock;
1063                 }
1064
1065                 list = &unix_socket_table[addr->hash];
1066         }
1067
1068         err = 0;
1069         __unix_remove_socket(sk);
1070         smp_store_release(&u->addr, addr);
1071         __unix_insert_socket(list, sk);
1072
1073 out_unlock:
1074         spin_unlock(&unix_table_lock);
1075 out_up:
1076         mutex_unlock(&u->bindlock);
1077 out_put:
1078         if (err)
1079                 path_put(&path);
1080 out:
1081         return err;
1082 }
1083
1084 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1085 {
1086         if (unlikely(sk1 == sk2) || !sk2) {
1087                 unix_state_lock(sk1);
1088                 return;
1089         }
1090         if (sk1 < sk2) {
1091                 unix_state_lock(sk1);
1092                 unix_state_lock_nested(sk2);
1093         } else {
1094                 unix_state_lock(sk2);
1095                 unix_state_lock_nested(sk1);
1096         }
1097 }
1098
1099 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1100 {
1101         if (unlikely(sk1 == sk2) || !sk2) {
1102                 unix_state_unlock(sk1);
1103                 return;
1104         }
1105         unix_state_unlock(sk1);
1106         unix_state_unlock(sk2);
1107 }
1108
1109 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1110                               int alen, int flags)
1111 {
1112         struct sock *sk = sock->sk;
1113         struct net *net = sock_net(sk);
1114         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1115         struct sock *other;
1116         unsigned int hash;
1117         int err;
1118
1119         err = -EINVAL;
1120         if (alen < offsetofend(struct sockaddr, sa_family))
1121                 goto out;
1122
1123         if (addr->sa_family != AF_UNSPEC) {
1124                 err = unix_mkname(sunaddr, alen, &hash);
1125                 if (err < 0)
1126                         goto out;
1127                 alen = err;
1128
1129                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1130                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1131                         goto out;
1132
1133 restart:
1134                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1135                 if (!other)
1136                         goto out;
1137
1138                 unix_state_double_lock(sk, other);
1139
1140                 /* Apparently VFS overslept socket death. Retry. */
1141                 if (sock_flag(other, SOCK_DEAD)) {
1142                         unix_state_double_unlock(sk, other);
1143                         sock_put(other);
1144                         goto restart;
1145                 }
1146
1147                 err = -EPERM;
1148                 if (!unix_may_send(sk, other))
1149                         goto out_unlock;
1150
1151                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1152                 if (err)
1153                         goto out_unlock;
1154
1155         } else {
1156                 /*
1157                  *      1003.1g breaking connected state with AF_UNSPEC
1158                  */
1159                 other = NULL;
1160                 unix_state_double_lock(sk, other);
1161         }
1162
1163         /*
1164          * If it was connected, reconnect.
1165          */
1166         if (unix_peer(sk)) {
1167                 struct sock *old_peer = unix_peer(sk);
1168                 unix_peer(sk) = other;
1169                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1170
1171                 unix_state_double_unlock(sk, other);
1172
1173                 if (other != old_peer)
1174                         unix_dgram_disconnected(sk, old_peer);
1175                 sock_put(old_peer);
1176         } else {
1177                 unix_peer(sk) = other;
1178                 unix_state_double_unlock(sk, other);
1179         }
1180         return 0;
1181
1182 out_unlock:
1183         unix_state_double_unlock(sk, other);
1184         sock_put(other);
1185 out:
1186         return err;
1187 }
1188
1189 static long unix_wait_for_peer(struct sock *other, long timeo)
1190 {
1191         struct unix_sock *u = unix_sk(other);
1192         int sched;
1193         DEFINE_WAIT(wait);
1194
1195         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1196
1197         sched = !sock_flag(other, SOCK_DEAD) &&
1198                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1199                 unix_recvq_full(other);
1200
1201         unix_state_unlock(other);
1202
1203         if (sched)
1204                 timeo = schedule_timeout(timeo);
1205
1206         finish_wait(&u->peer_wait, &wait);
1207         return timeo;
1208 }
1209
1210 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1211                                int addr_len, int flags)
1212 {
1213         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1214         struct sock *sk = sock->sk;
1215         struct net *net = sock_net(sk);
1216         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1217         struct sock *newsk = NULL;
1218         struct sock *other = NULL;
1219         struct sk_buff *skb = NULL;
1220         unsigned int hash;
1221         int st;
1222         int err;
1223         long timeo;
1224
1225         err = unix_mkname(sunaddr, addr_len, &hash);
1226         if (err < 0)
1227                 goto out;
1228         addr_len = err;
1229
1230         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1231             (err = unix_autobind(sock)) != 0)
1232                 goto out;
1233
1234         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1235
1236         /* First of all allocate resources.
1237            If we will make it after state is locked,
1238            we will have to recheck all again in any case.
1239          */
1240
1241         err = -ENOMEM;
1242
1243         /* create new sock for complete connection */
1244         newsk = unix_create1(sock_net(sk), NULL, 0);
1245         if (newsk == NULL)
1246                 goto out;
1247
1248         /* Allocate skb for sending to listening sock */
1249         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1250         if (skb == NULL)
1251                 goto out;
1252
1253 restart:
1254         /*  Find listening sock. */
1255         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1256         if (!other)
1257                 goto out;
1258
1259         /* Latch state of peer */
1260         unix_state_lock(other);
1261
1262         /* Apparently VFS overslept socket death. Retry. */
1263         if (sock_flag(other, SOCK_DEAD)) {
1264                 unix_state_unlock(other);
1265                 sock_put(other);
1266                 goto restart;
1267         }
1268
1269         err = -ECONNREFUSED;
1270         if (other->sk_state != TCP_LISTEN)
1271                 goto out_unlock;
1272         if (other->sk_shutdown & RCV_SHUTDOWN)
1273                 goto out_unlock;
1274
1275         if (unix_recvq_full(other)) {
1276                 err = -EAGAIN;
1277                 if (!timeo)
1278                         goto out_unlock;
1279
1280                 timeo = unix_wait_for_peer(other, timeo);
1281
1282                 err = sock_intr_errno(timeo);
1283                 if (signal_pending(current))
1284                         goto out;
1285                 sock_put(other);
1286                 goto restart;
1287         }
1288
1289         /* Latch our state.
1290
1291            It is tricky place. We need to grab our state lock and cannot
1292            drop lock on peer. It is dangerous because deadlock is
1293            possible. Connect to self case and simultaneous
1294            attempt to connect are eliminated by checking socket
1295            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1296            check this before attempt to grab lock.
1297
1298            Well, and we have to recheck the state after socket locked.
1299          */
1300         st = sk->sk_state;
1301
1302         switch (st) {
1303         case TCP_CLOSE:
1304                 /* This is ok... continue with connect */
1305                 break;
1306         case TCP_ESTABLISHED:
1307                 /* Socket is already connected */
1308                 err = -EISCONN;
1309                 goto out_unlock;
1310         default:
1311                 err = -EINVAL;
1312                 goto out_unlock;
1313         }
1314
1315         unix_state_lock_nested(sk);
1316
1317         if (sk->sk_state != st) {
1318                 unix_state_unlock(sk);
1319                 unix_state_unlock(other);
1320                 sock_put(other);
1321                 goto restart;
1322         }
1323
1324         err = security_unix_stream_connect(sk, other, newsk);
1325         if (err) {
1326                 unix_state_unlock(sk);
1327                 goto out_unlock;
1328         }
1329
1330         /* The way is open! Fastly set all the necessary fields... */
1331
1332         sock_hold(sk);
1333         unix_peer(newsk)        = sk;
1334         newsk->sk_state         = TCP_ESTABLISHED;
1335         newsk->sk_type          = sk->sk_type;
1336         init_peercred(newsk);
1337         newu = unix_sk(newsk);
1338         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1339         otheru = unix_sk(other);
1340
1341         /* copy address information from listening to new sock
1342          *
1343          * The contents of *(otheru->addr) and otheru->path
1344          * are seen fully set up here, since we have found
1345          * otheru in hash under unix_table_lock.  Insertion
1346          * into the hash chain we'd found it in had been done
1347          * in an earlier critical area protected by unix_table_lock,
1348          * the same one where we'd set *(otheru->addr) contents,
1349          * as well as otheru->path and otheru->addr itself.
1350          *
1351          * Using smp_store_release() here to set newu->addr
1352          * is enough to make those stores, as well as stores
1353          * to newu->path visible to anyone who gets newu->addr
1354          * by smp_load_acquire().  IOW, the same warranties
1355          * as for unix_sock instances bound in unix_bind() or
1356          * in unix_autobind().
1357          */
1358         if (otheru->path.dentry) {
1359                 path_get(&otheru->path);
1360                 newu->path = otheru->path;
1361         }
1362         refcount_inc(&otheru->addr->refcnt);
1363         smp_store_release(&newu->addr, otheru->addr);
1364
1365         /* Set credentials */
1366         copy_peercred(sk, other);
1367
1368         sock->state     = SS_CONNECTED;
1369         sk->sk_state    = TCP_ESTABLISHED;
1370         sock_hold(newsk);
1371
1372         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1373         unix_peer(sk)   = newsk;
1374
1375         unix_state_unlock(sk);
1376
1377         /* take ten and and send info to listening sock */
1378         spin_lock(&other->sk_receive_queue.lock);
1379         __skb_queue_tail(&other->sk_receive_queue, skb);
1380         spin_unlock(&other->sk_receive_queue.lock);
1381         unix_state_unlock(other);
1382         other->sk_data_ready(other);
1383         sock_put(other);
1384         return 0;
1385
1386 out_unlock:
1387         if (other)
1388                 unix_state_unlock(other);
1389
1390 out:
1391         kfree_skb(skb);
1392         if (newsk)
1393                 unix_release_sock(newsk, 0);
1394         if (other)
1395                 sock_put(other);
1396         return err;
1397 }
1398
1399 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1400 {
1401         struct sock *ska = socka->sk, *skb = sockb->sk;
1402
1403         /* Join our sockets back to back */
1404         sock_hold(ska);
1405         sock_hold(skb);
1406         unix_peer(ska) = skb;
1407         unix_peer(skb) = ska;
1408         init_peercred(ska);
1409         init_peercred(skb);
1410
1411         if (ska->sk_type != SOCK_DGRAM) {
1412                 ska->sk_state = TCP_ESTABLISHED;
1413                 skb->sk_state = TCP_ESTABLISHED;
1414                 socka->state  = SS_CONNECTED;
1415                 sockb->state  = SS_CONNECTED;
1416         }
1417         return 0;
1418 }
1419
1420 static void unix_sock_inherit_flags(const struct socket *old,
1421                                     struct socket *new)
1422 {
1423         if (test_bit(SOCK_PASSCRED, &old->flags))
1424                 set_bit(SOCK_PASSCRED, &new->flags);
1425         if (test_bit(SOCK_PASSSEC, &old->flags))
1426                 set_bit(SOCK_PASSSEC, &new->flags);
1427 }
1428
1429 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1430                        bool kern)
1431 {
1432         struct sock *sk = sock->sk;
1433         struct sock *tsk;
1434         struct sk_buff *skb;
1435         int err;
1436
1437         err = -EOPNOTSUPP;
1438         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1439                 goto out;
1440
1441         err = -EINVAL;
1442         if (sk->sk_state != TCP_LISTEN)
1443                 goto out;
1444
1445         /* If socket state is TCP_LISTEN it cannot change (for now...),
1446          * so that no locks are necessary.
1447          */
1448
1449         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1450         if (!skb) {
1451                 /* This means receive shutdown. */
1452                 if (err == 0)
1453                         err = -EINVAL;
1454                 goto out;
1455         }
1456
1457         tsk = skb->sk;
1458         skb_free_datagram(sk, skb);
1459         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1460
1461         /* attach accepted sock to socket */
1462         unix_state_lock(tsk);
1463         newsock->state = SS_CONNECTED;
1464         unix_sock_inherit_flags(sock, newsock);
1465         sock_graft(tsk, newsock);
1466         unix_state_unlock(tsk);
1467         return 0;
1468
1469 out:
1470         return err;
1471 }
1472
1473
1474 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1475 {
1476         struct sock *sk = sock->sk;
1477         struct unix_address *addr;
1478         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1479         int err = 0;
1480
1481         if (peer) {
1482                 sk = unix_peer_get(sk);
1483
1484                 err = -ENOTCONN;
1485                 if (!sk)
1486                         goto out;
1487                 err = 0;
1488         } else {
1489                 sock_hold(sk);
1490         }
1491
1492         addr = smp_load_acquire(&unix_sk(sk)->addr);
1493         if (!addr) {
1494                 sunaddr->sun_family = AF_UNIX;
1495                 sunaddr->sun_path[0] = 0;
1496                 err = sizeof(short);
1497         } else {
1498                 err = addr->len;
1499                 memcpy(sunaddr, addr->name, addr->len);
1500         }
1501         sock_put(sk);
1502 out:
1503         return err;
1504 }
1505
1506 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1507 {
1508         int err = 0;
1509
1510         UNIXCB(skb).pid  = get_pid(scm->pid);
1511         UNIXCB(skb).uid = scm->creds.uid;
1512         UNIXCB(skb).gid = scm->creds.gid;
1513         UNIXCB(skb).fp = NULL;
1514         unix_get_secdata(scm, skb);
1515         if (scm->fp && send_fds)
1516                 err = unix_attach_fds(scm, skb);
1517
1518         skb->destructor = unix_destruct_scm;
1519         return err;
1520 }
1521
1522 static bool unix_passcred_enabled(const struct socket *sock,
1523                                   const struct sock *other)
1524 {
1525         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1526                !other->sk_socket ||
1527                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1528 }
1529
1530 /*
1531  * Some apps rely on write() giving SCM_CREDENTIALS
1532  * We include credentials if source or destination socket
1533  * asserted SOCK_PASSCRED.
1534  */
1535 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1536                             const struct sock *other)
1537 {
1538         if (UNIXCB(skb).pid)
1539                 return;
1540         if (unix_passcred_enabled(sock, other)) {
1541                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1542                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1543         }
1544 }
1545
1546 static int maybe_init_creds(struct scm_cookie *scm,
1547                             struct socket *socket,
1548                             const struct sock *other)
1549 {
1550         int err;
1551         struct msghdr msg = { .msg_controllen = 0 };
1552
1553         err = scm_send(socket, &msg, scm, false);
1554         if (err)
1555                 return err;
1556
1557         if (unix_passcred_enabled(socket, other)) {
1558                 scm->pid = get_pid(task_tgid(current));
1559                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1560         }
1561         return err;
1562 }
1563
1564 static bool unix_skb_scm_eq(struct sk_buff *skb,
1565                             struct scm_cookie *scm)
1566 {
1567         const struct unix_skb_parms *u = &UNIXCB(skb);
1568
1569         return u->pid == scm->pid &&
1570                uid_eq(u->uid, scm->creds.uid) &&
1571                gid_eq(u->gid, scm->creds.gid) &&
1572                unix_secdata_eq(scm, skb);
1573 }
1574
1575 /*
1576  *      Send AF_UNIX data.
1577  */
1578
1579 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1580                               size_t len)
1581 {
1582         struct sock *sk = sock->sk;
1583         struct net *net = sock_net(sk);
1584         struct unix_sock *u = unix_sk(sk);
1585         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1586         struct sock *other = NULL;
1587         int namelen = 0; /* fake GCC */
1588         int err;
1589         unsigned int hash;
1590         struct sk_buff *skb;
1591         long timeo;
1592         struct scm_cookie scm;
1593         int data_len = 0;
1594         int sk_locked;
1595
1596         wait_for_unix_gc();
1597         err = scm_send(sock, msg, &scm, false);
1598         if (err < 0)
1599                 return err;
1600
1601         err = -EOPNOTSUPP;
1602         if (msg->msg_flags&MSG_OOB)
1603                 goto out;
1604
1605         if (msg->msg_namelen) {
1606                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1607                 if (err < 0)
1608                         goto out;
1609                 namelen = err;
1610         } else {
1611                 sunaddr = NULL;
1612                 err = -ENOTCONN;
1613                 other = unix_peer_get(sk);
1614                 if (!other)
1615                         goto out;
1616         }
1617
1618         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1619             && (err = unix_autobind(sock)) != 0)
1620                 goto out;
1621
1622         err = -EMSGSIZE;
1623         if (len > sk->sk_sndbuf - 32)
1624                 goto out;
1625
1626         if (len > SKB_MAX_ALLOC) {
1627                 data_len = min_t(size_t,
1628                                  len - SKB_MAX_ALLOC,
1629                                  MAX_SKB_FRAGS * PAGE_SIZE);
1630                 data_len = PAGE_ALIGN(data_len);
1631
1632                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1633         }
1634
1635         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1636                                    msg->msg_flags & MSG_DONTWAIT, &err,
1637                                    PAGE_ALLOC_COSTLY_ORDER);
1638         if (skb == NULL)
1639                 goto out;
1640
1641         err = unix_scm_to_skb(&scm, skb, true);
1642         if (err < 0)
1643                 goto out_free;
1644
1645         skb_put(skb, len - data_len);
1646         skb->data_len = data_len;
1647         skb->len = len;
1648         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1649         if (err)
1650                 goto out_free;
1651
1652         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1653
1654 restart:
1655         if (!other) {
1656                 err = -ECONNRESET;
1657                 if (sunaddr == NULL)
1658                         goto out_free;
1659
1660                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1661                                         hash, &err);
1662                 if (other == NULL)
1663                         goto out_free;
1664         }
1665
1666         if (sk_filter(other, skb) < 0) {
1667                 /* Toss the packet but do not return any error to the sender */
1668                 err = len;
1669                 goto out_free;
1670         }
1671
1672         sk_locked = 0;
1673         unix_state_lock(other);
1674 restart_locked:
1675         err = -EPERM;
1676         if (!unix_may_send(sk, other))
1677                 goto out_unlock;
1678
1679         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1680                 /*
1681                  *      Check with 1003.1g - what should
1682                  *      datagram error
1683                  */
1684                 unix_state_unlock(other);
1685                 sock_put(other);
1686
1687                 if (!sk_locked)
1688                         unix_state_lock(sk);
1689
1690                 err = 0;
1691                 if (unix_peer(sk) == other) {
1692                         unix_peer(sk) = NULL;
1693                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1694
1695                         unix_state_unlock(sk);
1696
1697                         unix_dgram_disconnected(sk, other);
1698                         sock_put(other);
1699                         err = -ECONNREFUSED;
1700                 } else {
1701                         unix_state_unlock(sk);
1702                 }
1703
1704                 other = NULL;
1705                 if (err)
1706                         goto out_free;
1707                 goto restart;
1708         }
1709
1710         err = -EPIPE;
1711         if (other->sk_shutdown & RCV_SHUTDOWN)
1712                 goto out_unlock;
1713
1714         if (sk->sk_type != SOCK_SEQPACKET) {
1715                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1716                 if (err)
1717                         goto out_unlock;
1718         }
1719
1720         /* other == sk && unix_peer(other) != sk if
1721          * - unix_peer(sk) == NULL, destination address bound to sk
1722          * - unix_peer(sk) == sk by time of get but disconnected before lock
1723          */
1724         if (other != sk &&
1725             unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1726                 if (timeo) {
1727                         timeo = unix_wait_for_peer(other, timeo);
1728
1729                         err = sock_intr_errno(timeo);
1730                         if (signal_pending(current))
1731                                 goto out_free;
1732
1733                         goto restart;
1734                 }
1735
1736                 if (!sk_locked) {
1737                         unix_state_unlock(other);
1738                         unix_state_double_lock(sk, other);
1739                 }
1740
1741                 if (unix_peer(sk) != other ||
1742                     unix_dgram_peer_wake_me(sk, other)) {
1743                         err = -EAGAIN;
1744                         sk_locked = 1;
1745                         goto out_unlock;
1746                 }
1747
1748                 if (!sk_locked) {
1749                         sk_locked = 1;
1750                         goto restart_locked;
1751                 }
1752         }
1753
1754         if (unlikely(sk_locked))
1755                 unix_state_unlock(sk);
1756
1757         if (sock_flag(other, SOCK_RCVTSTAMP))
1758                 __net_timestamp(skb);
1759         maybe_add_creds(skb, sock, other);
1760         skb_queue_tail(&other->sk_receive_queue, skb);
1761         unix_state_unlock(other);
1762         other->sk_data_ready(other);
1763         sock_put(other);
1764         scm_destroy(&scm);
1765         return len;
1766
1767 out_unlock:
1768         if (sk_locked)
1769                 unix_state_unlock(sk);
1770         unix_state_unlock(other);
1771 out_free:
1772         kfree_skb(skb);
1773 out:
1774         if (other)
1775                 sock_put(other);
1776         scm_destroy(&scm);
1777         return err;
1778 }
1779
1780 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1781  * bytes, and a minimum of a full page.
1782  */
1783 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1784
1785 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1786                                size_t len)
1787 {
1788         struct sock *sk = sock->sk;
1789         struct sock *other = NULL;
1790         int err, size;
1791         struct sk_buff *skb;
1792         int sent = 0;
1793         struct scm_cookie scm;
1794         bool fds_sent = false;
1795         int data_len;
1796
1797         wait_for_unix_gc();
1798         err = scm_send(sock, msg, &scm, false);
1799         if (err < 0)
1800                 return err;
1801
1802         err = -EOPNOTSUPP;
1803         if (msg->msg_flags&MSG_OOB)
1804                 goto out_err;
1805
1806         if (msg->msg_namelen) {
1807                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1808                 goto out_err;
1809         } else {
1810                 err = -ENOTCONN;
1811                 other = unix_peer(sk);
1812                 if (!other)
1813                         goto out_err;
1814         }
1815
1816         if (sk->sk_shutdown & SEND_SHUTDOWN)
1817                 goto pipe_err;
1818
1819         while (sent < len) {
1820                 size = len - sent;
1821
1822                 /* Keep two messages in the pipe so it schedules better */
1823                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1824
1825                 /* allow fallback to order-0 allocations */
1826                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1827
1828                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1829
1830                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1831
1832                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1833                                            msg->msg_flags & MSG_DONTWAIT, &err,
1834                                            get_order(UNIX_SKB_FRAGS_SZ));
1835                 if (!skb)
1836                         goto out_err;
1837
1838                 /* Only send the fds in the first buffer */
1839                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1840                 if (err < 0) {
1841                         kfree_skb(skb);
1842                         goto out_err;
1843                 }
1844                 fds_sent = true;
1845
1846                 skb_put(skb, size - data_len);
1847                 skb->data_len = data_len;
1848                 skb->len = size;
1849                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1850                 if (err) {
1851                         kfree_skb(skb);
1852                         goto out_err;
1853                 }
1854
1855                 unix_state_lock(other);
1856
1857                 if (sock_flag(other, SOCK_DEAD) ||
1858                     (other->sk_shutdown & RCV_SHUTDOWN))
1859                         goto pipe_err_free;
1860
1861                 maybe_add_creds(skb, sock, other);
1862                 skb_queue_tail(&other->sk_receive_queue, skb);
1863                 unix_state_unlock(other);
1864                 other->sk_data_ready(other);
1865                 sent += size;
1866         }
1867
1868         scm_destroy(&scm);
1869
1870         return sent;
1871
1872 pipe_err_free:
1873         unix_state_unlock(other);
1874         kfree_skb(skb);
1875 pipe_err:
1876         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1877                 send_sig(SIGPIPE, current, 0);
1878         err = -EPIPE;
1879 out_err:
1880         scm_destroy(&scm);
1881         return sent ? : err;
1882 }
1883
1884 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1885                                     int offset, size_t size, int flags)
1886 {
1887         int err;
1888         bool send_sigpipe = false;
1889         bool init_scm = true;
1890         struct scm_cookie scm;
1891         struct sock *other, *sk = socket->sk;
1892         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1893
1894         if (flags & MSG_OOB)
1895                 return -EOPNOTSUPP;
1896
1897         other = unix_peer(sk);
1898         if (!other || sk->sk_state != TCP_ESTABLISHED)
1899                 return -ENOTCONN;
1900
1901         if (false) {
1902 alloc_skb:
1903                 unix_state_unlock(other);
1904                 mutex_unlock(&unix_sk(other)->iolock);
1905                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1906                                               &err, 0);
1907                 if (!newskb)
1908                         goto err;
1909         }
1910
1911         /* we must acquire iolock as we modify already present
1912          * skbs in the sk_receive_queue and mess with skb->len
1913          */
1914         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1915         if (err) {
1916                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1917                 goto err;
1918         }
1919
1920         if (sk->sk_shutdown & SEND_SHUTDOWN) {
1921                 err = -EPIPE;
1922                 send_sigpipe = true;
1923                 goto err_unlock;
1924         }
1925
1926         unix_state_lock(other);
1927
1928         if (sock_flag(other, SOCK_DEAD) ||
1929             other->sk_shutdown & RCV_SHUTDOWN) {
1930                 err = -EPIPE;
1931                 send_sigpipe = true;
1932                 goto err_state_unlock;
1933         }
1934
1935         if (init_scm) {
1936                 err = maybe_init_creds(&scm, socket, other);
1937                 if (err)
1938                         goto err_state_unlock;
1939                 init_scm = false;
1940         }
1941
1942         skb = skb_peek_tail(&other->sk_receive_queue);
1943         if (tail && tail == skb) {
1944                 skb = newskb;
1945         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1946                 if (newskb) {
1947                         skb = newskb;
1948                 } else {
1949                         tail = skb;
1950                         goto alloc_skb;
1951                 }
1952         } else if (newskb) {
1953                 /* this is fast path, we don't necessarily need to
1954                  * call to kfree_skb even though with newskb == NULL
1955                  * this - does no harm
1956                  */
1957                 consume_skb(newskb);
1958                 newskb = NULL;
1959         }
1960
1961         if (skb_append_pagefrags(skb, page, offset, size)) {
1962                 tail = skb;
1963                 goto alloc_skb;
1964         }
1965
1966         skb->len += size;
1967         skb->data_len += size;
1968         skb->truesize += size;
1969         refcount_add(size, &sk->sk_wmem_alloc);
1970
1971         if (newskb) {
1972                 err = unix_scm_to_skb(&scm, skb, false);
1973                 if (err)
1974                         goto err_state_unlock;
1975                 spin_lock(&other->sk_receive_queue.lock);
1976                 __skb_queue_tail(&other->sk_receive_queue, newskb);
1977                 spin_unlock(&other->sk_receive_queue.lock);
1978         }
1979
1980         unix_state_unlock(other);
1981         mutex_unlock(&unix_sk(other)->iolock);
1982
1983         other->sk_data_ready(other);
1984         scm_destroy(&scm);
1985         return size;
1986
1987 err_state_unlock:
1988         unix_state_unlock(other);
1989 err_unlock:
1990         mutex_unlock(&unix_sk(other)->iolock);
1991 err:
1992         kfree_skb(newskb);
1993         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
1994                 send_sig(SIGPIPE, current, 0);
1995         if (!init_scm)
1996                 scm_destroy(&scm);
1997         return err;
1998 }
1999
2000 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2001                                   size_t len)
2002 {
2003         int err;
2004         struct sock *sk = sock->sk;
2005
2006         err = sock_error(sk);
2007         if (err)
2008                 return err;
2009
2010         if (sk->sk_state != TCP_ESTABLISHED)
2011                 return -ENOTCONN;
2012
2013         if (msg->msg_namelen)
2014                 msg->msg_namelen = 0;
2015
2016         return unix_dgram_sendmsg(sock, msg, len);
2017 }
2018
2019 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2020                                   size_t size, int flags)
2021 {
2022         struct sock *sk = sock->sk;
2023
2024         if (sk->sk_state != TCP_ESTABLISHED)
2025                 return -ENOTCONN;
2026
2027         return unix_dgram_recvmsg(sock, msg, size, flags);
2028 }
2029
2030 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2031 {
2032         struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2033
2034         if (addr) {
2035                 msg->msg_namelen = addr->len;
2036                 memcpy(msg->msg_name, addr->name, addr->len);
2037         }
2038 }
2039
2040 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2041                               size_t size, int flags)
2042 {
2043         struct scm_cookie scm;
2044         struct sock *sk = sock->sk;
2045         struct unix_sock *u = unix_sk(sk);
2046         struct sk_buff *skb, *last;
2047         long timeo;
2048         int skip;
2049         int err;
2050
2051         err = -EOPNOTSUPP;
2052         if (flags&MSG_OOB)
2053                 goto out;
2054
2055         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2056
2057         do {
2058                 mutex_lock(&u->iolock);
2059
2060                 skip = sk_peek_offset(sk, flags);
2061                 skb = __skb_try_recv_datagram(sk, flags, NULL, &skip, &err,
2062                                               &last);
2063                 if (skb)
2064                         break;
2065
2066                 mutex_unlock(&u->iolock);
2067
2068                 if (err != -EAGAIN)
2069                         break;
2070         } while (timeo &&
2071                  !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2072
2073         if (!skb) { /* implies iolock unlocked */
2074                 unix_state_lock(sk);
2075                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2076                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2077                     (sk->sk_shutdown & RCV_SHUTDOWN))
2078                         err = 0;
2079                 unix_state_unlock(sk);
2080                 goto out;
2081         }
2082
2083         if (wq_has_sleeper(&u->peer_wait))
2084                 wake_up_interruptible_sync_poll(&u->peer_wait,
2085                                                 EPOLLOUT | EPOLLWRNORM |
2086                                                 EPOLLWRBAND);
2087
2088         if (msg->msg_name)
2089                 unix_copy_addr(msg, skb->sk);
2090
2091         if (size > skb->len - skip)
2092                 size = skb->len - skip;
2093         else if (size < skb->len - skip)
2094                 msg->msg_flags |= MSG_TRUNC;
2095
2096         err = skb_copy_datagram_msg(skb, skip, msg, size);
2097         if (err)
2098                 goto out_free;
2099
2100         if (sock_flag(sk, SOCK_RCVTSTAMP))
2101                 __sock_recv_timestamp(msg, sk, skb);
2102
2103         memset(&scm, 0, sizeof(scm));
2104
2105         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2106         unix_set_secdata(&scm, skb);
2107
2108         if (!(flags & MSG_PEEK)) {
2109                 if (UNIXCB(skb).fp)
2110                         unix_detach_fds(&scm, skb);
2111
2112                 sk_peek_offset_bwd(sk, skb->len);
2113         } else {
2114                 /* It is questionable: on PEEK we could:
2115                    - do not return fds - good, but too simple 8)
2116                    - return fds, and do not return them on read (old strategy,
2117                      apparently wrong)
2118                    - clone fds (I chose it for now, it is the most universal
2119                      solution)
2120
2121                    POSIX 1003.1g does not actually define this clearly
2122                    at all. POSIX 1003.1g doesn't define a lot of things
2123                    clearly however!
2124
2125                 */
2126
2127                 sk_peek_offset_fwd(sk, size);
2128
2129                 if (UNIXCB(skb).fp)
2130                         scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2131         }
2132         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2133
2134         scm_recv(sock, msg, &scm, flags);
2135
2136 out_free:
2137         skb_free_datagram(sk, skb);
2138         mutex_unlock(&u->iolock);
2139 out:
2140         return err;
2141 }
2142
2143 /*
2144  *      Sleep until more data has arrived. But check for races..
2145  */
2146 static long unix_stream_data_wait(struct sock *sk, long timeo,
2147                                   struct sk_buff *last, unsigned int last_len,
2148                                   bool freezable)
2149 {
2150         struct sk_buff *tail;
2151         DEFINE_WAIT(wait);
2152
2153         unix_state_lock(sk);
2154
2155         for (;;) {
2156                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2157
2158                 tail = skb_peek_tail(&sk->sk_receive_queue);
2159                 if (tail != last ||
2160                     (tail && tail->len != last_len) ||
2161                     sk->sk_err ||
2162                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2163                     signal_pending(current) ||
2164                     !timeo)
2165                         break;
2166
2167                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2168                 unix_state_unlock(sk);
2169                 if (freezable)
2170                         timeo = freezable_schedule_timeout(timeo);
2171                 else
2172                         timeo = schedule_timeout(timeo);
2173                 unix_state_lock(sk);
2174
2175                 if (sock_flag(sk, SOCK_DEAD))
2176                         break;
2177
2178                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2179         }
2180
2181         finish_wait(sk_sleep(sk), &wait);
2182         unix_state_unlock(sk);
2183         return timeo;
2184 }
2185
2186 static unsigned int unix_skb_len(const struct sk_buff *skb)
2187 {
2188         return skb->len - UNIXCB(skb).consumed;
2189 }
2190
2191 struct unix_stream_read_state {
2192         int (*recv_actor)(struct sk_buff *, int, int,
2193                           struct unix_stream_read_state *);
2194         struct socket *socket;
2195         struct msghdr *msg;
2196         struct pipe_inode_info *pipe;
2197         size_t size;
2198         int flags;
2199         unsigned int splice_flags;
2200 };
2201
2202 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2203                                     bool freezable)
2204 {
2205         struct scm_cookie scm;
2206         struct socket *sock = state->socket;
2207         struct sock *sk = sock->sk;
2208         struct unix_sock *u = unix_sk(sk);
2209         int copied = 0;
2210         int flags = state->flags;
2211         int noblock = flags & MSG_DONTWAIT;
2212         bool check_creds = false;
2213         int target;
2214         int err = 0;
2215         long timeo;
2216         int skip;
2217         size_t size = state->size;
2218         unsigned int last_len;
2219
2220         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2221                 err = -EINVAL;
2222                 goto out;
2223         }
2224
2225         if (unlikely(flags & MSG_OOB)) {
2226                 err = -EOPNOTSUPP;
2227                 goto out;
2228         }
2229
2230         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2231         timeo = sock_rcvtimeo(sk, noblock);
2232
2233         memset(&scm, 0, sizeof(scm));
2234
2235         /* Lock the socket to prevent queue disordering
2236          * while sleeps in memcpy_tomsg
2237          */
2238         mutex_lock(&u->iolock);
2239
2240         skip = max(sk_peek_offset(sk, flags), 0);
2241
2242         do {
2243                 int chunk;
2244                 bool drop_skb;
2245                 struct sk_buff *skb, *last;
2246
2247 redo:
2248                 unix_state_lock(sk);
2249                 if (sock_flag(sk, SOCK_DEAD)) {
2250                         err = -ECONNRESET;
2251                         goto unlock;
2252                 }
2253                 last = skb = skb_peek(&sk->sk_receive_queue);
2254                 last_len = last ? last->len : 0;
2255 again:
2256                 if (skb == NULL) {
2257                         if (copied >= target)
2258                                 goto unlock;
2259
2260                         /*
2261                          *      POSIX 1003.1g mandates this order.
2262                          */
2263
2264                         err = sock_error(sk);
2265                         if (err)
2266                                 goto unlock;
2267                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2268                                 goto unlock;
2269
2270                         unix_state_unlock(sk);
2271                         if (!timeo) {
2272                                 err = -EAGAIN;
2273                                 break;
2274                         }
2275
2276                         mutex_unlock(&u->iolock);
2277
2278                         timeo = unix_stream_data_wait(sk, timeo, last,
2279                                                       last_len, freezable);
2280
2281                         if (signal_pending(current)) {
2282                                 err = sock_intr_errno(timeo);
2283                                 scm_destroy(&scm);
2284                                 goto out;
2285                         }
2286
2287                         mutex_lock(&u->iolock);
2288                         goto redo;
2289 unlock:
2290                         unix_state_unlock(sk);
2291                         break;
2292                 }
2293
2294                 while (skip >= unix_skb_len(skb)) {
2295                         skip -= unix_skb_len(skb);
2296                         last = skb;
2297                         last_len = skb->len;
2298                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2299                         if (!skb)
2300                                 goto again;
2301                 }
2302
2303                 unix_state_unlock(sk);
2304
2305                 if (check_creds) {
2306                         /* Never glue messages from different writers */
2307                         if (!unix_skb_scm_eq(skb, &scm))
2308                                 break;
2309                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2310                         /* Copy credentials */
2311                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2312                         unix_set_secdata(&scm, skb);
2313                         check_creds = true;
2314                 }
2315
2316                 /* Copy address just once */
2317                 if (state->msg && state->msg->msg_name) {
2318                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2319                                          state->msg->msg_name);
2320                         unix_copy_addr(state->msg, skb->sk);
2321                         sunaddr = NULL;
2322                 }
2323
2324                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2325                 skb_get(skb);
2326                 chunk = state->recv_actor(skb, skip, chunk, state);
2327                 drop_skb = !unix_skb_len(skb);
2328                 /* skb is only safe to use if !drop_skb */
2329                 consume_skb(skb);
2330                 if (chunk < 0) {
2331                         if (copied == 0)
2332                                 copied = -EFAULT;
2333                         break;
2334                 }
2335                 copied += chunk;
2336                 size -= chunk;
2337
2338                 if (drop_skb) {
2339                         /* the skb was touched by a concurrent reader;
2340                          * we should not expect anything from this skb
2341                          * anymore and assume it invalid - we can be
2342                          * sure it was dropped from the socket queue
2343                          *
2344                          * let's report a short read
2345                          */
2346                         err = 0;
2347                         break;
2348                 }
2349
2350                 /* Mark read part of skb as used */
2351                 if (!(flags & MSG_PEEK)) {
2352                         UNIXCB(skb).consumed += chunk;
2353
2354                         sk_peek_offset_bwd(sk, chunk);
2355
2356                         if (UNIXCB(skb).fp)
2357                                 unix_detach_fds(&scm, skb);
2358
2359                         if (unix_skb_len(skb))
2360                                 break;
2361
2362                         skb_unlink(skb, &sk->sk_receive_queue);
2363                         consume_skb(skb);
2364
2365                         if (scm.fp)
2366                                 break;
2367                 } else {
2368                         /* It is questionable, see note in unix_dgram_recvmsg.
2369                          */
2370                         if (UNIXCB(skb).fp)
2371                                 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2372
2373                         sk_peek_offset_fwd(sk, chunk);
2374
2375                         if (UNIXCB(skb).fp)
2376                                 break;
2377
2378                         skip = 0;
2379                         last = skb;
2380                         last_len = skb->len;
2381                         unix_state_lock(sk);
2382                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2383                         if (skb)
2384                                 goto again;
2385                         unix_state_unlock(sk);
2386                         break;
2387                 }
2388         } while (size);
2389
2390         mutex_unlock(&u->iolock);
2391         if (state->msg)
2392                 scm_recv(sock, state->msg, &scm, flags);
2393         else
2394                 scm_destroy(&scm);
2395 out:
2396         return copied ? : err;
2397 }
2398
2399 static int unix_stream_read_actor(struct sk_buff *skb,
2400                                   int skip, int chunk,
2401                                   struct unix_stream_read_state *state)
2402 {
2403         int ret;
2404
2405         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2406                                     state->msg, chunk);
2407         return ret ?: chunk;
2408 }
2409
2410 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2411                                size_t size, int flags)
2412 {
2413         struct unix_stream_read_state state = {
2414                 .recv_actor = unix_stream_read_actor,
2415                 .socket = sock,
2416                 .msg = msg,
2417                 .size = size,
2418                 .flags = flags
2419         };
2420
2421         return unix_stream_read_generic(&state, true);
2422 }
2423
2424 static int unix_stream_splice_actor(struct sk_buff *skb,
2425                                     int skip, int chunk,
2426                                     struct unix_stream_read_state *state)
2427 {
2428         return skb_splice_bits(skb, state->socket->sk,
2429                                UNIXCB(skb).consumed + skip,
2430                                state->pipe, chunk, state->splice_flags);
2431 }
2432
2433 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2434                                        struct pipe_inode_info *pipe,
2435                                        size_t size, unsigned int flags)
2436 {
2437         struct unix_stream_read_state state = {
2438                 .recv_actor = unix_stream_splice_actor,
2439                 .socket = sock,
2440                 .pipe = pipe,
2441                 .size = size,
2442                 .splice_flags = flags,
2443         };
2444
2445         if (unlikely(*ppos))
2446                 return -ESPIPE;
2447
2448         if (sock->file->f_flags & O_NONBLOCK ||
2449             flags & SPLICE_F_NONBLOCK)
2450                 state.flags = MSG_DONTWAIT;
2451
2452         return unix_stream_read_generic(&state, false);
2453 }
2454
2455 static int unix_shutdown(struct socket *sock, int mode)
2456 {
2457         struct sock *sk = sock->sk;
2458         struct sock *other;
2459
2460         if (mode < SHUT_RD || mode > SHUT_RDWR)
2461                 return -EINVAL;
2462         /* This maps:
2463          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2464          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2465          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2466          */
2467         ++mode;
2468
2469         unix_state_lock(sk);
2470         sk->sk_shutdown |= mode;
2471         other = unix_peer(sk);
2472         if (other)
2473                 sock_hold(other);
2474         unix_state_unlock(sk);
2475         sk->sk_state_change(sk);
2476
2477         if (other &&
2478                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2479
2480                 int peer_mode = 0;
2481
2482                 if (mode&RCV_SHUTDOWN)
2483                         peer_mode |= SEND_SHUTDOWN;
2484                 if (mode&SEND_SHUTDOWN)
2485                         peer_mode |= RCV_SHUTDOWN;
2486                 unix_state_lock(other);
2487                 other->sk_shutdown |= peer_mode;
2488                 unix_state_unlock(other);
2489                 other->sk_state_change(other);
2490                 if (peer_mode == SHUTDOWN_MASK)
2491                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2492                 else if (peer_mode & RCV_SHUTDOWN)
2493                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2494         }
2495         if (other)
2496                 sock_put(other);
2497
2498         return 0;
2499 }
2500
2501 long unix_inq_len(struct sock *sk)
2502 {
2503         struct sk_buff *skb;
2504         long amount = 0;
2505
2506         if (sk->sk_state == TCP_LISTEN)
2507                 return -EINVAL;
2508
2509         spin_lock(&sk->sk_receive_queue.lock);
2510         if (sk->sk_type == SOCK_STREAM ||
2511             sk->sk_type == SOCK_SEQPACKET) {
2512                 skb_queue_walk(&sk->sk_receive_queue, skb)
2513                         amount += unix_skb_len(skb);
2514         } else {
2515                 skb = skb_peek(&sk->sk_receive_queue);
2516                 if (skb)
2517                         amount = skb->len;
2518         }
2519         spin_unlock(&sk->sk_receive_queue.lock);
2520
2521         return amount;
2522 }
2523 EXPORT_SYMBOL_GPL(unix_inq_len);
2524
2525 long unix_outq_len(struct sock *sk)
2526 {
2527         return sk_wmem_alloc_get(sk);
2528 }
2529 EXPORT_SYMBOL_GPL(unix_outq_len);
2530
2531 static int unix_open_file(struct sock *sk)
2532 {
2533         struct path path;
2534         struct file *f;
2535         int fd;
2536
2537         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2538                 return -EPERM;
2539
2540         if (!smp_load_acquire(&unix_sk(sk)->addr))
2541                 return -ENOENT;
2542
2543         path = unix_sk(sk)->path;
2544         if (!path.dentry)
2545                 return -ENOENT;
2546
2547         path_get(&path);
2548
2549         fd = get_unused_fd_flags(O_CLOEXEC);
2550         if (fd < 0)
2551                 goto out;
2552
2553         f = dentry_open(&path, O_PATH, current_cred());
2554         if (IS_ERR(f)) {
2555                 put_unused_fd(fd);
2556                 fd = PTR_ERR(f);
2557                 goto out;
2558         }
2559
2560         fd_install(fd, f);
2561 out:
2562         path_put(&path);
2563
2564         return fd;
2565 }
2566
2567 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2568 {
2569         struct sock *sk = sock->sk;
2570         long amount = 0;
2571         int err;
2572
2573         switch (cmd) {
2574         case SIOCOUTQ:
2575                 amount = unix_outq_len(sk);
2576                 err = put_user(amount, (int __user *)arg);
2577                 break;
2578         case SIOCINQ:
2579                 amount = unix_inq_len(sk);
2580                 if (amount < 0)
2581                         err = amount;
2582                 else
2583                         err = put_user(amount, (int __user *)arg);
2584                 break;
2585         case SIOCUNIXFILE:
2586                 err = unix_open_file(sk);
2587                 break;
2588         default:
2589                 err = -ENOIOCTLCMD;
2590                 break;
2591         }
2592         return err;
2593 }
2594
2595 #ifdef CONFIG_COMPAT
2596 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2597 {
2598         return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
2599 }
2600 #endif
2601
2602 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2603 {
2604         struct sock *sk = sock->sk;
2605         __poll_t mask;
2606
2607         sock_poll_wait(file, sock, wait);
2608         mask = 0;
2609
2610         /* exceptional events? */
2611         if (sk->sk_err)
2612                 mask |= EPOLLERR;
2613         if (sk->sk_shutdown == SHUTDOWN_MASK)
2614                 mask |= EPOLLHUP;
2615         if (sk->sk_shutdown & RCV_SHUTDOWN)
2616                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2617
2618         /* readable? */
2619         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2620                 mask |= EPOLLIN | EPOLLRDNORM;
2621
2622         /* Connection-based need to check for termination and startup */
2623         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2624             sk->sk_state == TCP_CLOSE)
2625                 mask |= EPOLLHUP;
2626
2627         /*
2628          * we set writable also when the other side has shut down the
2629          * connection. This prevents stuck sockets.
2630          */
2631         if (unix_writable(sk))
2632                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2633
2634         return mask;
2635 }
2636
2637 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2638                                     poll_table *wait)
2639 {
2640         struct sock *sk = sock->sk, *other;
2641         unsigned int writable;
2642         __poll_t mask;
2643
2644         sock_poll_wait(file, sock, wait);
2645         mask = 0;
2646
2647         /* exceptional events? */
2648         if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
2649                 mask |= EPOLLERR |
2650                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2651
2652         if (sk->sk_shutdown & RCV_SHUTDOWN)
2653                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2654         if (sk->sk_shutdown == SHUTDOWN_MASK)
2655                 mask |= EPOLLHUP;
2656
2657         /* readable? */
2658         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2659                 mask |= EPOLLIN | EPOLLRDNORM;
2660
2661         /* Connection-based need to check for termination and startup */
2662         if (sk->sk_type == SOCK_SEQPACKET) {
2663                 if (sk->sk_state == TCP_CLOSE)
2664                         mask |= EPOLLHUP;
2665                 /* connection hasn't started yet? */
2666                 if (sk->sk_state == TCP_SYN_SENT)
2667                         return mask;
2668         }
2669
2670         /* No write status requested, avoid expensive OUT tests. */
2671         if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2672                 return mask;
2673
2674         writable = unix_writable(sk);
2675         if (writable) {
2676                 unix_state_lock(sk);
2677
2678                 other = unix_peer(sk);
2679                 if (other && unix_peer(other) != sk &&
2680                     unix_recvq_full(other) &&
2681                     unix_dgram_peer_wake_me(sk, other))
2682                         writable = 0;
2683
2684                 unix_state_unlock(sk);
2685         }
2686
2687         if (writable)
2688                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2689         else
2690                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2691
2692         return mask;
2693 }
2694
2695 #ifdef CONFIG_PROC_FS
2696
2697 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2698
2699 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2700 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2701 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2702
2703 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2704 {
2705         unsigned long offset = get_offset(*pos);
2706         unsigned long bucket = get_bucket(*pos);
2707         struct sock *sk;
2708         unsigned long count = 0;
2709
2710         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2711                 if (sock_net(sk) != seq_file_net(seq))
2712                         continue;
2713                 if (++count == offset)
2714                         break;
2715         }
2716
2717         return sk;
2718 }
2719
2720 static struct sock *unix_next_socket(struct seq_file *seq,
2721                                      struct sock *sk,
2722                                      loff_t *pos)
2723 {
2724         unsigned long bucket;
2725
2726         while (sk > (struct sock *)SEQ_START_TOKEN) {
2727                 sk = sk_next(sk);
2728                 if (!sk)
2729                         goto next_bucket;
2730                 if (sock_net(sk) == seq_file_net(seq))
2731                         return sk;
2732         }
2733
2734         do {
2735                 sk = unix_from_bucket(seq, pos);
2736                 if (sk)
2737                         return sk;
2738
2739 next_bucket:
2740                 bucket = get_bucket(*pos) + 1;
2741                 *pos = set_bucket_offset(bucket, 1);
2742         } while (bucket < ARRAY_SIZE(unix_socket_table));
2743
2744         return NULL;
2745 }
2746
2747 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2748         __acquires(unix_table_lock)
2749 {
2750         spin_lock(&unix_table_lock);
2751
2752         if (!*pos)
2753                 return SEQ_START_TOKEN;
2754
2755         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2756                 return NULL;
2757
2758         return unix_next_socket(seq, NULL, pos);
2759 }
2760
2761 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2762 {
2763         ++*pos;
2764         return unix_next_socket(seq, v, pos);
2765 }
2766
2767 static void unix_seq_stop(struct seq_file *seq, void *v)
2768         __releases(unix_table_lock)
2769 {
2770         spin_unlock(&unix_table_lock);
2771 }
2772
2773 static int unix_seq_show(struct seq_file *seq, void *v)
2774 {
2775
2776         if (v == SEQ_START_TOKEN)
2777                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2778                          "Inode Path\n");
2779         else {
2780                 struct sock *s = v;
2781                 struct unix_sock *u = unix_sk(s);
2782                 unix_state_lock(s);
2783
2784                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2785                         s,
2786                         refcount_read(&s->sk_refcnt),
2787                         0,
2788                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2789                         s->sk_type,
2790                         s->sk_socket ?
2791                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2792                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2793                         sock_i_ino(s));
2794
2795                 if (u->addr) {  // under unix_table_lock here
2796                         int i, len;
2797                         seq_putc(seq, ' ');
2798
2799                         i = 0;
2800                         len = u->addr->len - sizeof(short);
2801                         if (!UNIX_ABSTRACT(s))
2802                                 len--;
2803                         else {
2804                                 seq_putc(seq, '@');
2805                                 i++;
2806                         }
2807                         for ( ; i < len; i++)
2808                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
2809                                          '@');
2810                 }
2811                 unix_state_unlock(s);
2812                 seq_putc(seq, '\n');
2813         }
2814
2815         return 0;
2816 }
2817
2818 static const struct seq_operations unix_seq_ops = {
2819         .start  = unix_seq_start,
2820         .next   = unix_seq_next,
2821         .stop   = unix_seq_stop,
2822         .show   = unix_seq_show,
2823 };
2824 #endif
2825
2826 static const struct net_proto_family unix_family_ops = {
2827         .family = PF_UNIX,
2828         .create = unix_create,
2829         .owner  = THIS_MODULE,
2830 };
2831
2832
2833 static int __net_init unix_net_init(struct net *net)
2834 {
2835         int error = -ENOMEM;
2836
2837         net->unx.sysctl_max_dgram_qlen = 10;
2838         if (unix_sysctl_register(net))
2839                 goto out;
2840
2841 #ifdef CONFIG_PROC_FS
2842         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2843                         sizeof(struct seq_net_private))) {
2844                 unix_sysctl_unregister(net);
2845                 goto out;
2846         }
2847 #endif
2848         error = 0;
2849 out:
2850         return error;
2851 }
2852
2853 static void __net_exit unix_net_exit(struct net *net)
2854 {
2855         unix_sysctl_unregister(net);
2856         remove_proc_entry("unix", net->proc_net);
2857 }
2858
2859 static struct pernet_operations unix_net_ops = {
2860         .init = unix_net_init,
2861         .exit = unix_net_exit,
2862 };
2863
2864 static int __init af_unix_init(void)
2865 {
2866         int rc = -1;
2867
2868         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
2869
2870         rc = proto_register(&unix_proto, 1);
2871         if (rc != 0) {
2872                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2873                 goto out;
2874         }
2875
2876         sock_register(&unix_family_ops);
2877         register_pernet_subsys(&unix_net_ops);
2878 out:
2879         return rc;
2880 }
2881
2882 static void __exit af_unix_exit(void)
2883 {
2884         sock_unregister(PF_UNIX);
2885         proto_unregister(&unix_proto);
2886         unregister_pernet_subsys(&unix_net_ops);
2887 }
2888
2889 /* Earlier than device_initcall() so that other drivers invoking
2890    request_module() don't end up in a loop when modprobe tries
2891    to use a UNIX socket. But later than subsys_initcall() because
2892    we depend on stuff initialised there */
2893 fs_initcall(af_unix_init);
2894 module_exit(af_unix_exit);
2895
2896 MODULE_LICENSE("GPL");
2897 MODULE_ALIAS_NETPROTO(PF_UNIX);