net/ipv6/ip6_output.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  *      IPv6 output functions
   4  *      Linux INET6 implementation
   5  *
   6  *      Authors:
   7  *      Pedro Roque             <roque@di.fc.ul.pt>
   8  *
   9  *      Based on linux/net/ipv4/ip_output.c
  10  *
  11  *      Changes:
  12  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
  13  *                              extension headers are implemented.
  14  *                              route changes now work.
  15  *                              ip6_forward does not confuse sniffers.
  16  *                              etc.
  17  *
  18  *      H. von Brand    :       Added missing #include <linux/string.h>
  19  *      Imran Patel     :       frag id should be in NBO
  20  *      Kazunori MIYAZAWA @USAGI
  21  *                      :       add ip6_append_data and related functions
  22  *                              for datagram xmit
  23  */
  24
  25 #include <linux/errno.h>
  26 #include <linux/kernel.h>
  27 #include <linux/string.h>
  28 #include <linux/socket.h>
  29 #include <linux/net.h>
  30 #include <linux/netdevice.h>
  31 #include <linux/if_arp.h>
  32 #include <linux/in6.h>
  33 #include <linux/tcp.h>
  34 #include <linux/route.h>
  35 #include <linux/module.h>
  36 #include <linux/slab.h>
  37
  38 #include <linux/bpf-cgroup.h>
  39 #include <linux/netfilter.h>
  40 #include <linux/netfilter_ipv6.h>
  41
  42 #include <net/sock.h>
  43 #include <net/snmp.h>
  44
  45 #include <net/ipv6.h>
  46 #include <net/ndisc.h>
  47 #include <net/protocol.h>
  48 #include <net/ip6_route.h>
  49 #include <net/addrconf.h>
  50 #include <net/rawv6.h>
  51 #include <net/icmp.h>
  52 #include <net/xfrm.h>
  53 #include <net/checksum.h>
  54 #include <linux/mroute6.h>
  55 #include <net/l3mdev.h>
  56 #include <net/lwtunnel.h>
  57
  58 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
  59 {
  60         struct dst_entry *dst = skb_dst(skb);
  61         struct net_device *dev = dst->dev;
  62         const struct in6_addr *nexthop;
  63         struct neighbour *neigh;
  64         int ret;
  65
  66         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
  67                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
  68
  69                 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
  70                     ((mroute6_is_socket(net, skb) &&
  71                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
  72                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
  73                                          &ipv6_hdr(skb)->saddr))) {
  74                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
  75
  76                         /* Do not check for IFF_ALLMULTI; multicast routing
  77                            is not supported in any case.
  78                          */
  79                         if (newskb)
  80                                 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
  81                                         net, sk, newskb, NULL, newskb->dev,
  82                                         dev_loopback_xmit);
  83
  84                         if (ipv6_hdr(skb)->hop_limit == 0) {
  85                                 IP6_INC_STATS(net, idev,
  86                                               IPSTATS_MIB_OUTDISCARDS);
  87                                 kfree_skb(skb);
  88                                 return 0;
  89                         }
  90                 }
  91
  92                 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
  93
  94                 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
  95                     IPV6_ADDR_SCOPE_NODELOCAL &&
  96                     !(dev->flags & IFF_LOOPBACK)) {
  97                         kfree_skb(skb);
  98                         return 0;
  99                 }
 100         }
 101
 102         if (lwtunnel_xmit_redirect(dst->lwtstate)) {
 103                 int res = lwtunnel_xmit(skb);
 104
 105                 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
 106                         return res;
 107         }
 108
 109         rcu_read_lock_bh();
 110         nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
 111         neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
 112         if (unlikely(!neigh))
 113                 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
 114         if (!IS_ERR(neigh)) {
 115                 sock_confirm_neigh(skb, neigh);
 116                 ret = neigh_output(neigh, skb, false);
 117                 rcu_read_unlock_bh();
 118                 return ret;
 119         }
 120         rcu_read_unlock_bh();
 121
 122         IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 123         kfree_skb(skb);
 124         return -EINVAL;
 125 }
 126
 127 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 128 {
 129 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
 130         /* Policy lookup after SNAT yielded a new policy */
 131         if (skb_dst(skb)->xfrm) {
 132                 IPCB(skb)->flags |= IPSKB_REROUTED;
 133                 return dst_output(net, sk, skb);
 134         }
 135 #endif
 136
 137         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 138             dst_allfrag(skb_dst(skb)) ||
 139             (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
 140                 return ip6_fragment(net, sk, skb, ip6_finish_output2);
 141         else
 142                 return ip6_finish_output2(net, sk, skb);
 143 }
 144
 145 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 146 {
 147         int ret;
 148
 149         ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
 150         switch (ret) {
 151         case NET_XMIT_SUCCESS:
 152                 return __ip6_finish_output(net, sk, skb);
 153         case NET_XMIT_CN:
 154                 return __ip6_finish_output(net, sk, skb) ? : ret;
 155         default:
 156                 kfree_skb(skb);
 157                 return ret;
 158         }
 159 }
 160
 161 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 162 {
 163         struct net_device *dev = skb_dst(skb)->dev;
 164         struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 165
 166         skb->protocol = htons(ETH_P_IPV6);
 167         skb->dev = dev;
 168
 169         if (unlikely(idev->cnf.disable_ipv6)) {
 170                 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 171                 kfree_skb(skb);
 172                 return 0;
 173         }
 174
 175         return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
 176                             net, sk, skb, NULL, dev,
 177                             ip6_finish_output,
 178                             !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 179 }
 180
 181 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
 182 {
 183         if (!np->autoflowlabel_set)
 184                 return ip6_default_np_autolabel(net);
 185         else
 186                 return np->autoflowlabel;
 187 }
 188
 189 /*
 190  * xmit an sk_buff (used by TCP, SCTP and DCCP)
 191  * Note : socket lock is not held for SYNACK packets, but might be modified
 192  * by calls to skb_set_owner_w() and ipv6_local_error(),
 193  * which are using proper atomic operations or spinlocks.
 194  */
 195 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 196              __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
 197 {
 198         struct net *net = sock_net(sk);
 199         const struct ipv6_pinfo *np = inet6_sk(sk);
 200         struct in6_addr *first_hop = &fl6->daddr;
 201         struct dst_entry *dst = skb_dst(skb);
 202         unsigned int head_room;
 203         struct ipv6hdr *hdr;
 204         u8  proto = fl6->flowi6_proto;
 205         int seg_len = skb->len;
 206         int hlimit = -1;
 207         u32 mtu;
 208
 209         head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 210         if (opt)
 211                 head_room += opt->opt_nflen + opt->opt_flen;
 212
 213         if (unlikely(skb_headroom(skb) < head_room)) {
 214                 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 215                 if (!skb2) {
 216                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 217                                       IPSTATS_MIB_OUTDISCARDS);
 218                         kfree_skb(skb);
 219                         return -ENOBUFS;
 220                 }
 221                 if (skb->sk)
 222                         skb_set_owner_w(skb2, skb->sk);
 223                 consume_skb(skb);
 224                 skb = skb2;
 225         }
 226
 227         if (opt) {
 228                 seg_len += opt->opt_nflen + opt->opt_flen;
 229
 230                 if (opt->opt_flen)
 231                         ipv6_push_frag_opts(skb, opt, &proto);
 232
 233                 if (opt->opt_nflen)
 234                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
 235                                              &fl6->saddr);
 236         }
 237
 238         skb_push(skb, sizeof(struct ipv6hdr));
 239         skb_reset_network_header(skb);
 240         hdr = ipv6_hdr(skb);
 241
 242         /*
 243          *      Fill in the IPv6 header
 244          */
 245         if (np)
 246                 hlimit = np->hop_limit;
 247         if (hlimit < 0)
 248                 hlimit = ip6_dst_hoplimit(dst);
 249
 250         ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
 251                                 ip6_autoflowlabel(net, np), fl6));
 252
 253         hdr->payload_len = htons(seg_len);
 254         hdr->nexthdr = proto;
 255         hdr->hop_limit = hlimit;
 256
 257         hdr->saddr = fl6->saddr;
 258         hdr->daddr = *first_hop;
 259
 260         skb->protocol = htons(ETH_P_IPV6);
 261         skb->priority = priority;
 262         skb->mark = mark;
 263
 264         mtu = dst_mtu(dst);
 265         if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
 266                 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 267                               IPSTATS_MIB_OUT, skb->len);
 268
 269                 /* if egress device is enslaved to an L3 master device pass the
 270                  * skb to its handler for processing
 271                  */
 272                 skb = l3mdev_ip6_out((struct sock *)sk, skb);
 273                 if (unlikely(!skb))
 274                         return 0;
 275
 276                 /* hooks should never assume socket lock is held.
 277                  * we promote our socket to non const
 278                  */
 279                 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 280                                net, (struct sock *)sk, skb, NULL, dst->dev,
 281                                dst_output);
 282         }
 283
 284         skb->dev = dst->dev;
 285         /* ipv6_local_error() does not require socket lock,
 286          * we promote our socket to non const
 287          */
 288         ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
 289
 290         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 291         kfree_skb(skb);
 292         return -EMSGSIZE;
 293 }
 294 EXPORT_SYMBOL(ip6_xmit);
 295
 296 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 297 {
 298         struct ip6_ra_chain *ra;
 299         struct sock *last = NULL;
 300
 301         read_lock(&ip6_ra_lock);
 302         for (ra = ip6_ra_chain; ra; ra = ra->next) {
 303                 struct sock *sk = ra->sk;
 304                 if (sk && ra->sel == sel &&
 305                     (!sk->sk_bound_dev_if ||
 306                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
 307                         struct ipv6_pinfo *np = inet6_sk(sk);
 308
 309                         if (np && np->rtalert_isolate &&
 310                             !net_eq(sock_net(sk), dev_net(skb->dev))) {
 311                                 continue;
 312                         }
 313                         if (last) {
 314                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 315                                 if (skb2)
 316                                         rawv6_rcv(last, skb2);
 317                         }
 318                         last = sk;
 319                 }
 320         }
 321
 322         if (last) {
 323                 rawv6_rcv(last, skb);
 324                 read_unlock(&ip6_ra_lock);
 325                 return 1;
 326         }
 327         read_unlock(&ip6_ra_lock);
 328         return 0;
 329 }
 330
 331 static int ip6_forward_proxy_check(struct sk_buff *skb)
 332 {
 333         struct ipv6hdr *hdr = ipv6_hdr(skb);
 334         u8 nexthdr = hdr->nexthdr;
 335         __be16 frag_off;
 336         int offset;
 337
 338         if (ipv6_ext_hdr(nexthdr)) {
 339                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
 340                 if (offset < 0)
 341                         return 0;
 342         } else
 343                 offset = sizeof(struct ipv6hdr);
 344
 345         if (nexthdr == IPPROTO_ICMPV6) {
 346                 struct icmp6hdr *icmp6;
 347
 348                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
 349                                          offset + 1 - skb->data)))
 350                         return 0;
 351
 352                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 353
 354                 switch (icmp6->icmp6_type) {
 355                 case NDISC_ROUTER_SOLICITATION:
 356                 case NDISC_ROUTER_ADVERTISEMENT:
 357                 case NDISC_NEIGHBOUR_SOLICITATION:
 358                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
 359                 case NDISC_REDIRECT:
 360                         /* For reaction involving unicast neighbor discovery
 361                          * message destined to the proxied address, pass it to
 362                          * input function.
 363                          */
 364                         return 1;
 365                 default:
 366                         break;
 367                 }
 368         }
 369
 370         /*
 371          * The proxying router can't forward traffic sent to a link-local
 372          * address, so signal the sender and discard the packet. This
 373          * behavior is clarified by the MIPv6 specification.
 374          */
 375         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 376                 dst_link_failure(skb);
 377                 return -1;
 378         }
 379
 380         return 0;
 381 }
 382
 383 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
 384                                      struct sk_buff *skb)
 385 {
 386         struct dst_entry *dst = skb_dst(skb);
 387
 388         __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 389         __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
 390
 391 #ifdef CONFIG_NET_SWITCHDEV
 392         if (skb->offload_l3_fwd_mark) {
 393                 consume_skb(skb);
 394                 return 0;
 395         }
 396 #endif
 397
 398         skb->tstamp = 0;
 399         return dst_output(net, sk, skb);
 400 }
 401
 402 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 403 {
 404         if (skb->len <= mtu)
 405                 return false;
 406
 407         /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
 408         if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
 409                 return true;
 410
 411         if (skb->ignore_df)
 412                 return false;
 413
 414         if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
 415                 return false;
 416
 417         return true;
 418 }
 419
 420 int ip6_forward(struct sk_buff *skb)
 421 {
 422         struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
 423         struct dst_entry *dst = skb_dst(skb);
 424         struct ipv6hdr *hdr = ipv6_hdr(skb);
 425         struct inet6_skb_parm *opt = IP6CB(skb);
 426         struct net *net = dev_net(dst->dev);
 427         u32 mtu;
 428
 429         if (net->ipv6.devconf_all->forwarding == 0)
 430                 goto error;
 431
 432         if (skb->pkt_type != PACKET_HOST)
 433                 goto drop;
 434
 435         if (unlikely(skb->sk))
 436                 goto drop;
 437
 438         if (skb_warn_if_lro(skb))
 439                 goto drop;
 440
 441         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 442                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 443                 goto drop;
 444         }
 445
 446         skb_forward_csum(skb);
 447
 448         /*
 449          *      We DO NOT make any processing on
 450          *      RA packets, pushing them to user level AS IS
 451          *      without ane WARRANTY that application will be able
 452          *      to interpret them. The reason is that we
 453          *      cannot make anything clever here.
 454          *
 455          *      We are not end-node, so that if packet contains
 456          *      AH/ESP, we cannot make anything.
 457          *      Defragmentation also would be mistake, RA packets
 458          *      cannot be fragmented, because there is no warranty
 459          *      that different fragments will go along one path. --ANK
 460          */
 461         if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
 462                 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
 463                         return 0;
 464         }
 465
 466         /*
 467          *      check and decrement ttl
 468          */
 469         if (hdr->hop_limit <= 1) {
 470                 /* Force OUTPUT device used as source address */
 471                 skb->dev = dst->dev;
 472                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
 473                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 474
 475                 kfree_skb(skb);
 476                 return -ETIMEDOUT;
 477         }
 478
 479         /* XXX: idev->cnf.proxy_ndp? */
 480         if (net->ipv6.devconf_all->proxy_ndp &&
 481             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
 482                 int proxied = ip6_forward_proxy_check(skb);
 483                 if (proxied > 0)
 484                         return ip6_input(skb);
 485                 else if (proxied < 0) {
 486                         __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 487                         goto drop;
 488                 }
 489         }
 490
 491         if (!xfrm6_route_forward(skb)) {
 492                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 493                 goto drop;
 494         }
 495         dst = skb_dst(skb);
 496
 497         /* IPv6 specs say nothing about it, but it is clear that we cannot
 498            send redirects to source routed frames.
 499            We don't send redirects to frames decapsulated from IPsec.
 500          */
 501         if (IP6CB(skb)->iif == dst->dev->ifindex &&
 502             opt->srcrt == 0 && !skb_sec_path(skb)) {
 503                 struct in6_addr *target = NULL;
 504                 struct inet_peer *peer;
 505                 struct rt6_info *rt;
 506
 507                 /*
 508                  *      incoming and outgoing devices are the same
 509                  *      send a redirect.
 510                  */
 511
 512                 rt = (struct rt6_info *) dst;
 513                 if (rt->rt6i_flags & RTF_GATEWAY)
 514                         target = &rt->rt6i_gateway;
 515                 else
 516                         target = &hdr->daddr;
 517
 518                 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
 519
 520                 /* Limit redirects both by destination (here)
 521                    and by source (inside ndisc_send_redirect)
 522                  */
 523                 if (inet_peer_xrlim_allow(peer, 1*HZ))
 524                         ndisc_send_redirect(skb, target);
 525                 if (peer)
 526                         inet_putpeer(peer);
 527         } else {
 528                 int addrtype = ipv6_addr_type(&hdr->saddr);
 529
 530                 /* This check is security critical. */
 531                 if (addrtype == IPV6_ADDR_ANY ||
 532                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
 533                         goto error;
 534                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
 535                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 536                                     ICMPV6_NOT_NEIGHBOUR, 0);
 537                         goto error;
 538                 }
 539         }
 540
 541         mtu = ip6_dst_mtu_forward(dst);
 542         if (mtu < IPV6_MIN_MTU)
 543                 mtu = IPV6_MIN_MTU;
 544
 545         if (ip6_pkt_too_big(skb, mtu)) {
 546                 /* Again, force OUTPUT device used as source address */
 547                 skb->dev = dst->dev;
 548                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 549                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
 550                 __IP6_INC_STATS(net, ip6_dst_idev(dst),
 551                                 IPSTATS_MIB_FRAGFAILS);
 552                 kfree_skb(skb);
 553                 return -EMSGSIZE;
 554         }
 555
 556         if (skb_cow(skb, dst->dev->hard_header_len)) {
 557                 __IP6_INC_STATS(net, ip6_dst_idev(dst),
 558                                 IPSTATS_MIB_OUTDISCARDS);
 559                 goto drop;
 560         }
 561
 562         hdr = ipv6_hdr(skb);
 563
 564         /* Mangling hops number delayed to point after skb COW */
 565
 566         hdr->hop_limit--;
 567
 568         return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
 569                        net, NULL, skb, skb->dev, dst->dev,
 570                        ip6_forward_finish);
 571
 572 error:
 573         __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
 574 drop:
 575         kfree_skb(skb);
 576         return -EINVAL;
 577 }
 578
 579 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 580 {
 581         to->pkt_type = from->pkt_type;
 582         to->priority = from->priority;
 583         to->protocol = from->protocol;
 584         skb_dst_drop(to);
 585         skb_dst_set(to, dst_clone(skb_dst(from)));
 586         to->dev = from->dev;
 587         to->mark = from->mark;
 588
 589         skb_copy_hash(to, from);
 590
 591 #ifdef CONFIG_NET_SCHED
 592         to->tc_index = from->tc_index;
 593 #endif
 594         nf_copy(to, from);
 595         skb_ext_copy(to, from);
 596         skb_copy_secmark(to, from);
 597 }
 598
 599 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
 600                       u8 nexthdr, __be32 frag_id,
 601                       struct ip6_fraglist_iter *iter)
 602 {
 603         unsigned int first_len;
 604         struct frag_hdr *fh;
 605
 606         /* BUILD HEADER */
 607         *prevhdr = NEXTHDR_FRAGMENT;
 608         iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 609         if (!iter->tmp_hdr)
 610                 return -ENOMEM;
 611
 612         iter->frag = skb_shinfo(skb)->frag_list;
 613         skb_frag_list_init(skb);
 614
 615         iter->offset = 0;
 616         iter->hlen = hlen;
 617         iter->frag_id = frag_id;
 618         iter->nexthdr = nexthdr;
 619
 620         __skb_pull(skb, hlen);
 621         fh = __skb_push(skb, sizeof(struct frag_hdr));
 622         __skb_push(skb, hlen);
 623         skb_reset_network_header(skb);
 624         memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
 625
 626         fh->nexthdr = nexthdr;
 627         fh->reserved = 0;
 628         fh->frag_off = htons(IP6_MF);
 629         fh->identification = frag_id;
 630
 631         first_len = skb_pagelen(skb);
 632         skb->data_len = first_len - skb_headlen(skb);
 633         skb->len = first_len;
 634         ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
 635
 636         return 0;
 637 }
 638 EXPORT_SYMBOL(ip6_fraglist_init);
 639
 640 void ip6_fraglist_prepare(struct sk_buff *skb,
 641                           struct ip6_fraglist_iter *iter)
 642 {
 643         struct sk_buff *frag = iter->frag;
 644         unsigned int hlen = iter->hlen;
 645         struct frag_hdr *fh;
 646
 647         frag->ip_summed = CHECKSUM_NONE;
 648         skb_reset_transport_header(frag);
 649         fh = __skb_push(frag, sizeof(struct frag_hdr));
 650         __skb_push(frag, hlen);
 651         skb_reset_network_header(frag);
 652         memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
 653         iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
 654         fh->nexthdr = iter->nexthdr;
 655         fh->reserved = 0;
 656         fh->frag_off = htons(iter->offset);
 657         if (frag->next)
 658                 fh->frag_off |= htons(IP6_MF);
 659         fh->identification = iter->frag_id;
 660         ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
 661         ip6_copy_metadata(frag, skb);
 662 }
 663 EXPORT_SYMBOL(ip6_fraglist_prepare);
 664
 665 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
 666                    unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
 667                    u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
 668 {
 669         state->prevhdr = prevhdr;
 670         state->nexthdr = nexthdr;
 671         state->frag_id = frag_id;
 672
 673         state->hlen = hlen;
 674         state->mtu = mtu;
 675
 676         state->left = skb->len - hlen;  /* Space per frame */
 677         state->ptr = hlen;              /* Where to start from */
 678
 679         state->hroom = hdr_room;
 680         state->troom = needed_tailroom;
 681
 682         state->offset = 0;
 683 }
 684 EXPORT_SYMBOL(ip6_frag_init);
 685
 686 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
 687 {
 688         u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
 689         struct sk_buff *frag;
 690         struct frag_hdr *fh;
 691         unsigned int len;
 692
 693         len = state->left;
 694         /* IF: it doesn't fit, use 'mtu' - the data space left */
 695         if (len > state->mtu)
 696                 len = state->mtu;
 697         /* IF: we are not sending up to and including the packet end
 698            then align the next start on an eight byte boundary */
 699         if (len < state->left)
 700                 len &= ~7;
 701
 702         /* Allocate buffer */
 703         frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
 704                          state->hroom + state->troom, GFP_ATOMIC);
 705         if (!frag)
 706                 return ERR_PTR(-ENOMEM);
 707
 708         /*
 709          *      Set up data on packet
 710          */
 711
 712         ip6_copy_metadata(frag, skb);
 713         skb_reserve(frag, state->hroom);
 714         skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
 715         skb_reset_network_header(frag);
 716         fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
 717         frag->transport_header = (frag->network_header + state->hlen +
 718                                   sizeof(struct frag_hdr));
 719
 720         /*
 721          *      Charge the memory for the fragment to any owner
 722          *      it might possess
 723          */
 724         if (skb->sk)
 725                 skb_set_owner_w(frag, skb->sk);
 726
 727         /*
 728          *      Copy the packet header into the new buffer.
 729          */
 730         skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
 731
 732         fragnexthdr_offset = skb_network_header(frag);
 733         fragnexthdr_offset += prevhdr - skb_network_header(skb);
 734         *fragnexthdr_offset = NEXTHDR_FRAGMENT;
 735
 736         /*
 737          *      Build fragment header.
 738          */
 739         fh->nexthdr = state->nexthdr;
 740         fh->reserved = 0;
 741         fh->identification = state->frag_id;
 742
 743         /*
 744          *      Copy a block of the IP datagram.
 745          */
 746         BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
 747                              len));
 748         state->left -= len;
 749
 750         fh->frag_off = htons(state->offset);
 751         if (state->left > 0)
 752                 fh->frag_off |= htons(IP6_MF);
 753         ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
 754
 755         state->ptr += len;
 756         state->offset += len;
 757
 758         return frag;
 759 }
 760 EXPORT_SYMBOL(ip6_frag_next);
 761
 762 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 763                  int (*output)(struct net *, struct sock *, struct sk_buff *))
 764 {
 765         struct sk_buff *frag;
 766         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 767         struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
 768                                 inet6_sk(skb->sk) : NULL;
 769         struct ip6_frag_state state;
 770         unsigned int mtu, hlen, nexthdr_offset;
 771         int hroom, err = 0;
 772         __be32 frag_id;
 773         u8 *prevhdr, nexthdr = 0;
 774
 775         err = ip6_find_1stfragopt(skb, &prevhdr);
 776         if (err < 0)
 777                 goto fail;
 778         hlen = err;
 779         nexthdr = *prevhdr;
 780         nexthdr_offset = prevhdr - skb_network_header(skb);
 781
 782         mtu = ip6_skb_dst_mtu(skb);
 783
 784         /* We must not fragment if the socket is set to force MTU discovery
 785          * or if the skb it not generated by a local socket.
 786          */
 787         if (unlikely(!skb->ignore_df && skb->len > mtu))
 788                 goto fail_toobig;
 789
 790         if (IP6CB(skb)->frag_max_size) {
 791                 if (IP6CB(skb)->frag_max_size > mtu)
 792                         goto fail_toobig;
 793
 794                 /* don't send fragments larger than what we received */
 795                 mtu = IP6CB(skb)->frag_max_size;
 796                 if (mtu < IPV6_MIN_MTU)
 797                         mtu = IPV6_MIN_MTU;
 798         }
 799
 800         if (np && np->frag_size < mtu) {
 801                 if (np->frag_size)
 802                         mtu = np->frag_size;
 803         }
 804         if (mtu < hlen + sizeof(struct frag_hdr) + 8)
 805                 goto fail_toobig;
 806         mtu -= hlen + sizeof(struct frag_hdr);
 807
 808         frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
 809                                     &ipv6_hdr(skb)->saddr);
 810
 811         if (skb->ip_summed == CHECKSUM_PARTIAL &&
 812             (err = skb_checksum_help(skb)))
 813                 goto fail;
 814
 815         prevhdr = skb_network_header(skb) + nexthdr_offset;
 816         hroom = LL_RESERVED_SPACE(rt->dst.dev);
 817         if (skb_has_frag_list(skb)) {
 818                 unsigned int first_len = skb_pagelen(skb);
 819                 struct ip6_fraglist_iter iter;
 820                 struct sk_buff *frag2;
 821
 822                 if (first_len - hlen > mtu ||
 823                     ((first_len - hlen) & 7) ||
 824                     skb_cloned(skb) ||
 825                     skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
 826                         goto slow_path;
 827
 828                 skb_walk_frags(skb, frag) {
 829                         /* Correct geometry. */
 830                         if (frag->len > mtu ||
 831                             ((frag->len & 7) && frag->next) ||
 832                             skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
 833                                 goto slow_path_clean;
 834
 835                         /* Partially cloned skb? */
 836                         if (skb_shared(frag))
 837                                 goto slow_path_clean;
 838
 839                         BUG_ON(frag->sk);
 840                         if (skb->sk) {
 841                                 frag->sk = skb->sk;
 842                                 frag->destructor = sock_wfree;
 843                         }
 844                         skb->truesize -= frag->truesize;
 845                 }
 846
 847                 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
 848                                         &iter);
 849                 if (err < 0)
 850                         goto fail;
 851
 852                 for (;;) {
 853                         /* Prepare header of the next frame,
 854                          * before previous one went down. */
 855                         if (iter.frag)
 856                                 ip6_fraglist_prepare(skb, &iter);
 857
 858                         err = output(net, sk, skb);
 859                         if (!err)
 860                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 861                                               IPSTATS_MIB_FRAGCREATES);
 862
 863                         if (err || !iter.frag)
 864                                 break;
 865
 866                         skb = ip6_fraglist_next(&iter);
 867                 }
 868
 869                 kfree(iter.tmp_hdr);
 870
 871                 if (err == 0) {
 872                         IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 873                                       IPSTATS_MIB_FRAGOKS);
 874                         return 0;
 875                 }
 876
 877                 kfree_skb_list(iter.frag);
 878
 879                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 880                               IPSTATS_MIB_FRAGFAILS);
 881                 return err;
 882
 883 slow_path_clean:
 884                 skb_walk_frags(skb, frag2) {
 885                         if (frag2 == frag)
 886                                 break;
 887                         frag2->sk = NULL;
 888                         frag2->destructor = NULL;
 889                         skb->truesize += frag2->truesize;
 890                 }
 891         }
 892
 893 slow_path:
 894         /*
 895          *      Fragment the datagram.
 896          */
 897
 898         ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
 899                       LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
 900                       &state);
 901
 902         /*
 903          *      Keep copying data until we run out.
 904          */
 905
 906         while (state.left > 0) {
 907                 frag = ip6_frag_next(skb, &state);
 908                 if (IS_ERR(frag)) {
 909                         err = PTR_ERR(frag);
 910                         goto fail;
 911                 }
 912
 913                 /*
 914                  *      Put this fragment into the sending queue.
 915                  */
 916                 err = output(net, sk, frag);
 917                 if (err)
 918                         goto fail;
 919
 920                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 921                               IPSTATS_MIB_FRAGCREATES);
 922         }
 923         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 924                       IPSTATS_MIB_FRAGOKS);
 925         consume_skb(skb);
 926         return err;
 927
 928 fail_toobig:
 929         if (skb->sk && dst_allfrag(skb_dst(skb)))
 930                 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
 931
 932         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 933         err = -EMSGSIZE;
 934
 935 fail:
 936         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 937                       IPSTATS_MIB_FRAGFAILS);
 938         kfree_skb(skb);
 939         return err;
 940 }
 941
 942 static inline int ip6_rt_check(const struct rt6key *rt_key,
 943                                const struct in6_addr *fl_addr,
 944                                const struct in6_addr *addr_cache)
 945 {
 946         return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 947                 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
 948 }
 949
 950 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 951                                           struct dst_entry *dst,
 952                                           const struct flowi6 *fl6)
 953 {
 954         struct ipv6_pinfo *np = inet6_sk(sk);
 955         struct rt6_info *rt;
 956
 957         if (!dst)
 958                 goto out;
 959
 960         if (dst->ops->family != AF_INET6) {
 961                 dst_release(dst);
 962                 return NULL;
 963         }
 964
 965         rt = (struct rt6_info *)dst;
 966         /* Yes, checking route validity in not connected
 967          * case is not very simple. Take into account,
 968          * that we do not support routing by source, TOS,
 969          * and MSG_DONTROUTE            --ANK (980726)
 970          *
 971          * 1. ip6_rt_check(): If route was host route,
 972          *    check that cached destination is current.
 973          *    If it is network route, we still may
 974          *    check its validity using saved pointer
 975          *    to the last used address: daddr_cache.
 976          *    We do not want to save whole address now,
 977          *    (because main consumer of this service
 978          *    is tcp, which has not this problem),
 979          *    so that the last trick works only on connected
 980          *    sockets.
 981          * 2. oif also should be the same.
 982          */
 983         if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
 984 #ifdef CONFIG_IPV6_SUBTREES
 985             ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
 986 #endif
 987            (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
 988               (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
 989                 dst_release(dst);
 990                 dst = NULL;
 991         }
 992
 993 out:
 994         return dst;
 995 }
 996
 997 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
 998                                struct dst_entry **dst, struct flowi6 *fl6)
 999 {
1000 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1001         struct neighbour *n;
1002         struct rt6_info *rt;
1003 #endif
1004         int err;
1005         int flags = 0;
1006
1007         /* The correct way to handle this would be to do
1008          * ip6_route_get_saddr, and then ip6_route_output; however,
1009          * the route-specific preferred source forces the
1010          * ip6_route_output call _before_ ip6_route_get_saddr.
1011          *
1012          * In source specific routing (no src=any default route),
1013          * ip6_route_output will fail given src=any saddr, though, so
1014          * that's why we try it again later.
1015          */
1016         if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1017                 struct fib6_info *from;
1018                 struct rt6_info *rt;
1019                 bool had_dst = *dst != NULL;
1020
1021                 if (!had_dst)
1022                         *dst = ip6_route_output(net, sk, fl6);
1023                 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1024
1025                 rcu_read_lock();
1026                 from = rt ? rcu_dereference(rt->from) : NULL;
1027                 err = ip6_route_get_saddr(net, from, &fl6->daddr,
1028                                           sk ? inet6_sk(sk)->srcprefs : 0,
1029                                           &fl6->saddr);
1030                 rcu_read_unlock();
1031
1032                 if (err)
1033                         goto out_err_release;
1034
1035                 /* If we had an erroneous initial result, pretend it
1036                  * never existed and let the SA-enabled version take
1037                  * over.
1038                  */
1039                 if (!had_dst && (*dst)->error) {
1040                         dst_release(*dst);
1041                         *dst = NULL;
1042                 }
1043
1044                 if (fl6->flowi6_oif)
1045                         flags |= RT6_LOOKUP_F_IFACE;
1046         }
1047
1048         if (!*dst)
1049                 *dst = ip6_route_output_flags(net, sk, fl6, flags);
1050
1051         err = (*dst)->error;
1052         if (err)
1053                 goto out_err_release;
1054
1055 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1056         /*
1057          * Here if the dst entry we've looked up
1058          * has a neighbour entry that is in the INCOMPLETE
1059          * state and the src address from the flow is
1060          * marked as OPTIMISTIC, we release the found
1061          * dst entry and replace it instead with the
1062          * dst entry of the nexthop router
1063          */
1064         rt = (struct rt6_info *) *dst;
1065         rcu_read_lock_bh();
1066         n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1067                                       rt6_nexthop(rt, &fl6->daddr));
1068         err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1069         rcu_read_unlock_bh();
1070
1071         if (err) {
1072                 struct inet6_ifaddr *ifp;
1073                 struct flowi6 fl_gw6;
1074                 int redirect;
1075
1076                 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1077                                       (*dst)->dev, 1);
1078
1079                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1080                 if (ifp)
1081                         in6_ifa_put(ifp);
1082
1083                 if (redirect) {
1084                         /*
1085                          * We need to get the dst entry for the
1086                          * default router instead
1087                          */
1088                         dst_release(*dst);
1089                         memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1090                         memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1091                         *dst = ip6_route_output(net, sk, &fl_gw6);
1092                         err = (*dst)->error;
1093                         if (err)
1094                                 goto out_err_release;
1095                 }
1096         }
1097 #endif
1098         if (ipv6_addr_v4mapped(&fl6->saddr) &&
1099             !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1100                 err = -EAFNOSUPPORT;
1101                 goto out_err_release;
1102         }
1103
1104         return 0;
1105
1106 out_err_release:
1107         dst_release(*dst);
1108         *dst = NULL;
1109
1110         if (err == -ENETUNREACH)
1111                 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1112         return err;
1113 }
1114
1115 /**
1116  *      ip6_dst_lookup - perform route lookup on flow
1117  *      @sk: socket which provides route info
1118  *      @dst: pointer to dst_entry * for result
1119  *      @fl6: flow to lookup
1120  *
1121  *      This function performs a route lookup on the given flow.
1122  *
1123  *      It returns zero on success, or a standard errno code on error.
1124  */
1125 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1126                    struct flowi6 *fl6)
1127 {
1128         *dst = NULL;
1129         return ip6_dst_lookup_tail(net, sk, dst, fl6);
1130 }
1131 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1132
1133 /**
1134  *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1135  *      @sk: socket which provides route info
1136  *      @fl6: flow to lookup
1137  *      @final_dst: final destination address for ipsec lookup
1138  *
1139  *      This function performs a route lookup on the given flow.
1140  *
1141  *      It returns a valid dst pointer on success, or a pointer encoded
1142  *      error code.
1143  */
1144 struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
1145                                       const struct in6_addr *final_dst)
1146 {
1147         struct dst_entry *dst = NULL;
1148         int err;
1149
1150         err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
1151         if (err)
1152                 return ERR_PTR(err);
1153         if (final_dst)
1154                 fl6->daddr = *final_dst;
1155
1156         return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1157 }
1158 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1159
1160 /**
1161  *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1162  *      @sk: socket which provides the dst cache and route info
1163  *      @fl6: flow to lookup
1164  *      @final_dst: final destination address for ipsec lookup
1165  *      @connected: whether @sk is connected or not
1166  *
1167  *      This function performs a route lookup on the given flow with the
1168  *      possibility of using the cached route in the socket if it is valid.
1169  *      It will take the socket dst lock when operating on the dst cache.
1170  *      As a result, this function can only be used in process context.
1171  *
1172  *      In addition, for a connected socket, cache the dst in the socket
1173  *      if the current cache is not valid.
1174  *
1175  *      It returns a valid dst pointer on success, or a pointer encoded
1176  *      error code.
1177  */
1178 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1179                                          const struct in6_addr *final_dst,
1180                                          bool connected)
1181 {
1182         struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1183
1184         dst = ip6_sk_dst_check(sk, dst, fl6);
1185         if (dst)
1186                 return dst;
1187
1188         dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
1189         if (connected && !IS_ERR(dst))
1190                 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1191
1192         return dst;
1193 }
1194 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1195
1196 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1197                                                gfp_t gfp)
1198 {
1199         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1200 }
1201
1202 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1203                                                 gfp_t gfp)
1204 {
1205         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1206 }
1207
1208 static void ip6_append_data_mtu(unsigned int *mtu,
1209                                 int *maxfraglen,
1210                                 unsigned int fragheaderlen,
1211                                 struct sk_buff *skb,
1212                                 struct rt6_info *rt,
1213                                 unsigned int orig_mtu)
1214 {
1215         if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1216                 if (!skb) {
1217                         /* first fragment, reserve header_len */
1218                         *mtu = orig_mtu - rt->dst.header_len;
1219
1220                 } else {
1221                         /*
1222                          * this fragment is not first, the headers
1223                          * space is regarded as data space.
1224                          */
1225                         *mtu = orig_mtu;
1226                 }
1227                 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1228                               + fragheaderlen - sizeof(struct frag_hdr);
1229         }
1230 }
1231
1232 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1233                           struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1234                           struct rt6_info *rt, struct flowi6 *fl6)
1235 {
1236         struct ipv6_pinfo *np = inet6_sk(sk);
1237         unsigned int mtu;
1238         struct ipv6_txoptions *opt = ipc6->opt;
1239
1240         /*
1241          * setup for corking
1242          */
1243         if (opt) {
1244                 if (WARN_ON(v6_cork->opt))
1245                         return -EINVAL;
1246
1247                 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1248                 if (unlikely(!v6_cork->opt))
1249                         return -ENOBUFS;
1250
1251                 v6_cork->opt->tot_len = sizeof(*opt);
1252                 v6_cork->opt->opt_flen = opt->opt_flen;
1253                 v6_cork->opt->opt_nflen = opt->opt_nflen;
1254
1255                 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1256                                                     sk->sk_allocation);
1257                 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1258                         return -ENOBUFS;
1259
1260                 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1261                                                     sk->sk_allocation);
1262                 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1263                         return -ENOBUFS;
1264
1265                 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1266                                                    sk->sk_allocation);
1267                 if (opt->hopopt && !v6_cork->opt->hopopt)
1268                         return -ENOBUFS;
1269
1270                 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1271                                                     sk->sk_allocation);
1272                 if (opt->srcrt && !v6_cork->opt->srcrt)
1273                         return -ENOBUFS;
1274
1275                 /* need source address above miyazawa*/
1276         }
1277         dst_hold(&rt->dst);
1278         cork->base.dst = &rt->dst;
1279         cork->fl.u.ip6 = *fl6;
1280         v6_cork->hop_limit = ipc6->hlimit;
1281         v6_cork->tclass = ipc6->tclass;
1282         if (rt->dst.flags & DST_XFRM_TUNNEL)
1283                 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1284                       READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1285         else
1286                 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1287                         READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1288         if (np->frag_size < mtu) {
1289                 if (np->frag_size)
1290                         mtu = np->frag_size;
1291         }
1292         if (mtu < IPV6_MIN_MTU)
1293                 return -EINVAL;
1294         cork->base.fragsize = mtu;
1295         cork->base.gso_size = ipc6->gso_size;
1296         cork->base.tx_flags = 0;
1297         cork->base.mark = ipc6->sockc.mark;
1298         sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1299
1300         if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1301                 cork->base.flags |= IPCORK_ALLFRAG;
1302         cork->base.length = 0;
1303
1304         cork->base.transmit_time = ipc6->sockc.transmit_time;
1305
1306         return 0;
1307 }
1308
1309 static int __ip6_append_data(struct sock *sk,
1310                              struct flowi6 *fl6,
1311                              struct sk_buff_head *queue,
1312                              struct inet_cork *cork,
1313                              struct inet6_cork *v6_cork,
1314                              struct page_frag *pfrag,
1315                              int getfrag(void *from, char *to, int offset,
1316                                          int len, int odd, struct sk_buff *skb),
1317                              void *from, int length, int transhdrlen,
1318                              unsigned int flags, struct ipcm6_cookie *ipc6)
1319 {
1320         struct sk_buff *skb, *skb_prev = NULL;
1321         unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1322         struct ubuf_info *uarg = NULL;
1323         int exthdrlen = 0;
1324         int dst_exthdrlen = 0;
1325         int hh_len;
1326         int copy;
1327         int err;
1328         int offset = 0;
1329         u32 tskey = 0;
1330         struct rt6_info *rt = (struct rt6_info *)cork->dst;
1331         struct ipv6_txoptions *opt = v6_cork->opt;
1332         int csummode = CHECKSUM_NONE;
1333         unsigned int maxnonfragsize, headersize;
1334         unsigned int wmem_alloc_delta = 0;
1335         bool paged, extra_uref = false;
1336
1337         skb = skb_peek_tail(queue);
1338         if (!skb) {
1339                 exthdrlen = opt ? opt->opt_flen : 0;
1340                 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1341         }
1342
1343         paged = !!cork->gso_size;
1344         mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1345         orig_mtu = mtu;
1346
1347         if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1348             sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1349                 tskey = sk->sk_tskey++;
1350
1351         hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1352
1353         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1354                         (opt ? opt->opt_nflen : 0);
1355         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1356                      sizeof(struct frag_hdr);
1357
1358         headersize = sizeof(struct ipv6hdr) +
1359                      (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1360                      (dst_allfrag(&rt->dst) ?
1361                       sizeof(struct frag_hdr) : 0) +
1362                      rt->rt6i_nfheader_len;
1363
1364         /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1365          * the first fragment
1366          */
1367         if (headersize + transhdrlen > mtu)
1368                 goto emsgsize;
1369
1370         if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1371             (sk->sk_protocol == IPPROTO_UDP ||
1372              sk->sk_protocol == IPPROTO_RAW)) {
1373                 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1374                                 sizeof(struct ipv6hdr));
1375                 goto emsgsize;
1376         }
1377
1378         if (ip6_sk_ignore_df(sk))
1379                 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1380         else
1381                 maxnonfragsize = mtu;
1382
1383         if (cork->length + length > maxnonfragsize - headersize) {
1384 emsgsize:
1385                 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1386                 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1387                 return -EMSGSIZE;
1388         }
1389
1390         /* CHECKSUM_PARTIAL only with no extension headers and when
1391          * we are not going to fragment
1392          */
1393         if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1394             headersize == sizeof(struct ipv6hdr) &&
1395             length <= mtu - headersize &&
1396             (!(flags & MSG_MORE) || cork->gso_size) &&
1397             rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1398                 csummode = CHECKSUM_PARTIAL;
1399
1400         if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1401                 uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1402                 if (!uarg)
1403                         return -ENOBUFS;
1404                 extra_uref = !skb_zcopy(skb);   /* only ref on new uarg */
1405                 if (rt->dst.dev->features & NETIF_F_SG &&
1406                     csummode == CHECKSUM_PARTIAL) {
1407                         paged = true;
1408                 } else {
1409                         uarg->zerocopy = 0;
1410                         skb_zcopy_set(skb, uarg, &extra_uref);
1411                 }
1412         }
1413
1414         /*
1415          * Let's try using as much space as possible.
1416          * Use MTU if total length of the message fits into the MTU.
1417          * Otherwise, we need to reserve fragment header and
1418          * fragment alignment (= 8-15 octects, in total).
1419          *
1420          * Note that we may need to "move" the data from the tail of
1421          * of the buffer to the new fragment when we split
1422          * the message.
1423          *
1424          * FIXME: It may be fragmented into multiple chunks
1425          *        at once if non-fragmentable extension headers
1426          *        are too large.
1427          * --yoshfuji
1428          */
1429
1430         cork->length += length;
1431         if (!skb)
1432                 goto alloc_new_skb;
1433
1434         while (length > 0) {
1435                 /* Check if the remaining data fits into current packet. */
1436                 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1437                 if (copy < length)
1438                         copy = maxfraglen - skb->len;
1439
1440                 if (copy <= 0) {
1441                         char *data;
1442                         unsigned int datalen;
1443                         unsigned int fraglen;
1444                         unsigned int fraggap;
1445                         unsigned int alloclen;
1446                         unsigned int pagedlen;
1447 alloc_new_skb:
1448                         /* There's no room in the current skb */
1449                         if (skb)
1450                                 fraggap = skb->len - maxfraglen;
1451                         else
1452                                 fraggap = 0;
1453                         /* update mtu and maxfraglen if necessary */
1454                         if (!skb || !skb_prev)
1455                                 ip6_append_data_mtu(&mtu, &maxfraglen,
1456                                                     fragheaderlen, skb, rt,
1457                                                     orig_mtu);
1458
1459                         skb_prev = skb;
1460
1461                         /*
1462                          * If remaining data exceeds the mtu,
1463                          * we know we need more fragment(s).
1464                          */
1465                         datalen = length + fraggap;
1466
1467                         if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1468                                 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1469                         fraglen = datalen + fragheaderlen;
1470                         pagedlen = 0;
1471
1472                         if ((flags & MSG_MORE) &&
1473                             !(rt->dst.dev->features&NETIF_F_SG))
1474                                 alloclen = mtu;
1475                         else if (!paged)
1476                                 alloclen = fraglen;
1477                         else {
1478                                 alloclen = min_t(int, fraglen, MAX_HEADER);
1479                                 pagedlen = fraglen - alloclen;
1480                         }
1481
1482                         alloclen += dst_exthdrlen;
1483
1484                         if (datalen != length + fraggap) {
1485                                 /*
1486                                  * this is not the last fragment, the trailer
1487                                  * space is regarded as data space.
1488                                  */
1489                                 datalen += rt->dst.trailer_len;
1490                         }
1491
1492                         alloclen += rt->dst.trailer_len;
1493                         fraglen = datalen + fragheaderlen;
1494
1495                         /*
1496                          * We just reserve space for fragment header.
1497                          * Note: this may be overallocation if the message
1498                          * (without MSG_MORE) fits into the MTU.
1499                          */
1500                         alloclen += sizeof(struct frag_hdr);
1501
1502                         copy = datalen - transhdrlen - fraggap - pagedlen;
1503                         if (copy < 0) {
1504                                 err = -EINVAL;
1505                                 goto error;
1506                         }
1507                         if (transhdrlen) {
1508                                 skb = sock_alloc_send_skb(sk,
1509                                                 alloclen + hh_len,
1510                                                 (flags & MSG_DONTWAIT), &err);
1511                         } else {
1512                                 skb = NULL;
1513                                 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1514                                     2 * sk->sk_sndbuf)
1515                                         skb = alloc_skb(alloclen + hh_len,
1516                                                         sk->sk_allocation);
1517                                 if (unlikely(!skb))
1518                                         err = -ENOBUFS;
1519                         }
1520                         if (!skb)
1521                                 goto error;
1522                         /*
1523                          *      Fill in the control structures
1524                          */
1525                         skb->protocol = htons(ETH_P_IPV6);
1526                         skb->ip_summed = csummode;
1527                         skb->csum = 0;
1528                         /* reserve for fragmentation and ipsec header */
1529                         skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1530                                     dst_exthdrlen);
1531
1532                         /*
1533                          *      Find where to start putting bytes
1534                          */
1535                         data = skb_put(skb, fraglen - pagedlen);
1536                         skb_set_network_header(skb, exthdrlen);
1537                         data += fragheaderlen;
1538                         skb->transport_header = (skb->network_header +
1539                                                  fragheaderlen);
1540                         if (fraggap) {
1541                                 skb->csum = skb_copy_and_csum_bits(
1542                                         skb_prev, maxfraglen,
1543                                         data + transhdrlen, fraggap, 0);
1544                                 skb_prev->csum = csum_sub(skb_prev->csum,
1545                                                           skb->csum);
1546                                 data += fraggap;
1547                                 pskb_trim_unique(skb_prev, maxfraglen);
1548                         }
1549                         if (copy > 0 &&
1550                             getfrag(from, data + transhdrlen, offset,
1551                                     copy, fraggap, skb) < 0) {
1552                                 err = -EFAULT;
1553                                 kfree_skb(skb);
1554                                 goto error;
1555                         }
1556
1557                         offset += copy;
1558                         length -= copy + transhdrlen;
1559                         transhdrlen = 0;
1560                         exthdrlen = 0;
1561                         dst_exthdrlen = 0;
1562
1563                         /* Only the initial fragment is time stamped */
1564                         skb_shinfo(skb)->tx_flags = cork->tx_flags;
1565                         cork->tx_flags = 0;
1566                         skb_shinfo(skb)->tskey = tskey;
1567                         tskey = 0;
1568                         skb_zcopy_set(skb, uarg, &extra_uref);
1569
1570                         if ((flags & MSG_CONFIRM) && !skb_prev)
1571                                 skb_set_dst_pending_confirm(skb, 1);
1572
1573                         /*
1574                          * Put the packet on the pending queue
1575                          */
1576                         if (!skb->destructor) {
1577                                 skb->destructor = sock_wfree;
1578                                 skb->sk = sk;
1579                                 wmem_alloc_delta += skb->truesize;
1580                         }
1581                         __skb_queue_tail(queue, skb);
1582                         continue;
1583                 }
1584
1585                 if (copy > length)
1586                         copy = length;
1587
1588                 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1589                     skb_tailroom(skb) >= copy) {
1590                         unsigned int off;
1591
1592                         off = skb->len;
1593                         if (getfrag(from, skb_put(skb, copy),
1594                                                 offset, copy, off, skb) < 0) {
1595                                 __skb_trim(skb, off);
1596                                 err = -EFAULT;
1597                                 goto error;
1598                         }
1599                 } else if (!uarg || !uarg->zerocopy) {
1600                         int i = skb_shinfo(skb)->nr_frags;
1601
1602                         err = -ENOMEM;
1603                         if (!sk_page_frag_refill(sk, pfrag))
1604                                 goto error;
1605
1606                         if (!skb_can_coalesce(skb, i, pfrag->page,
1607                                               pfrag->offset)) {
1608                                 err = -EMSGSIZE;
1609                                 if (i == MAX_SKB_FRAGS)
1610                                         goto error;
1611
1612                                 __skb_fill_page_desc(skb, i, pfrag->page,
1613                                                      pfrag->offset, 0);
1614                                 skb_shinfo(skb)->nr_frags = ++i;
1615                                 get_page(pfrag->page);
1616                         }
1617                         copy = min_t(int, copy, pfrag->size - pfrag->offset);
1618                         if (getfrag(from,
1619                                     page_address(pfrag->page) + pfrag->offset,
1620                                     offset, copy, skb->len, skb) < 0)
1621                                 goto error_efault;
1622
1623                         pfrag->offset += copy;
1624                         skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1625                         skb->len += copy;
1626                         skb->data_len += copy;
1627                         skb->truesize += copy;
1628                         wmem_alloc_delta += copy;
1629                 } else {
1630                         err = skb_zerocopy_iter_dgram(skb, from, copy);
1631                         if (err < 0)
1632                                 goto error;
1633                 }
1634                 offset += copy;
1635                 length -= copy;
1636         }
1637
1638         if (wmem_alloc_delta)
1639                 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1640         return 0;
1641
1642 error_efault:
1643         err = -EFAULT;
1644 error:
1645         if (uarg)
1646                 sock_zerocopy_put_abort(uarg, extra_uref);
1647         cork->length -= length;
1648         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1649         refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1650         return err;
1651 }
1652
1653 int ip6_append_data(struct sock *sk,
1654                     int getfrag(void *from, char *to, int offset, int len,
1655                                 int odd, struct sk_buff *skb),
1656                     void *from, int length, int transhdrlen,
1657                     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1658                     struct rt6_info *rt, unsigned int flags)
1659 {
1660         struct inet_sock *inet = inet_sk(sk);
1661         struct ipv6_pinfo *np = inet6_sk(sk);
1662         int exthdrlen;
1663         int err;
1664
1665         if (flags&MSG_PROBE)
1666                 return 0;
1667         if (skb_queue_empty(&sk->sk_write_queue)) {
1668                 /*
1669                  * setup for corking
1670                  */
1671                 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1672                                      ipc6, rt, fl6);
1673                 if (err)
1674                         return err;
1675
1676                 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1677                 length += exthdrlen;
1678                 transhdrlen += exthdrlen;
1679         } else {
1680                 fl6 = &inet->cork.fl.u.ip6;
1681                 transhdrlen = 0;
1682         }
1683
1684         return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1685                                  &np->cork, sk_page_frag(sk), getfrag,
1686                                  from, length, transhdrlen, flags, ipc6);
1687 }
1688 EXPORT_SYMBOL_GPL(ip6_append_data);
1689
1690 static void ip6_cork_release(struct inet_cork_full *cork,
1691                              struct inet6_cork *v6_cork)
1692 {
1693         if (v6_cork->opt) {
1694                 kfree(v6_cork->opt->dst0opt);
1695                 kfree(v6_cork->opt->dst1opt);
1696                 kfree(v6_cork->opt->hopopt);
1697                 kfree(v6_cork->opt->srcrt);
1698                 kfree(v6_cork->opt);
1699                 v6_cork->opt = NULL;
1700         }
1701
1702         if (cork->base.dst) {
1703                 dst_release(cork->base.dst);
1704                 cork->base.dst = NULL;
1705                 cork->base.flags &= ~IPCORK_ALLFRAG;
1706         }
1707         memset(&cork->fl, 0, sizeof(cork->fl));
1708 }
1709
1710 struct sk_buff *__ip6_make_skb(struct sock *sk,
1711                                struct sk_buff_head *queue,
1712                                struct inet_cork_full *cork,
1713                                struct inet6_cork *v6_cork)
1714 {
1715         struct sk_buff *skb, *tmp_skb;
1716         struct sk_buff **tail_skb;
1717         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1718         struct ipv6_pinfo *np = inet6_sk(sk);
1719         struct net *net = sock_net(sk);
1720         struct ipv6hdr *hdr;
1721         struct ipv6_txoptions *opt = v6_cork->opt;
1722         struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1723         struct flowi6 *fl6 = &cork->fl.u.ip6;
1724         unsigned char proto = fl6->flowi6_proto;
1725
1726         skb = __skb_dequeue(queue);
1727         if (!skb)
1728                 goto out;
1729         tail_skb = &(skb_shinfo(skb)->frag_list);
1730
1731         /* move skb->data to ip header from ext header */
1732         if (skb->data < skb_network_header(skb))
1733                 __skb_pull(skb, skb_network_offset(skb));
1734         while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1735                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1736                 *tail_skb = tmp_skb;
1737                 tail_skb = &(tmp_skb->next);
1738                 skb->len += tmp_skb->len;
1739                 skb->data_len += tmp_skb->len;
1740                 skb->truesize += tmp_skb->truesize;
1741                 tmp_skb->destructor = NULL;
1742                 tmp_skb->sk = NULL;
1743         }
1744
1745         /* Allow local fragmentation. */
1746         skb->ignore_df = ip6_sk_ignore_df(sk);
1747
1748         *final_dst = fl6->daddr;
1749         __skb_pull(skb, skb_network_header_len(skb));
1750         if (opt && opt->opt_flen)
1751                 ipv6_push_frag_opts(skb, opt, &proto);
1752         if (opt && opt->opt_nflen)
1753                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1754
1755         skb_push(skb, sizeof(struct ipv6hdr));
1756         skb_reset_network_header(skb);
1757         hdr = ipv6_hdr(skb);
1758
1759         ip6_flow_hdr(hdr, v6_cork->tclass,
1760                      ip6_make_flowlabel(net, skb, fl6->flowlabel,
1761                                         ip6_autoflowlabel(net, np), fl6));
1762         hdr->hop_limit = v6_cork->hop_limit;
1763         hdr->nexthdr = proto;
1764         hdr->saddr = fl6->saddr;
1765         hdr->daddr = *final_dst;
1766
1767         skb->priority = sk->sk_priority;
1768         skb->mark = cork->base.mark;
1769
1770         skb->tstamp = cork->base.transmit_time;
1771
1772         skb_dst_set(skb, dst_clone(&rt->dst));
1773         IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1774         if (proto == IPPROTO_ICMPV6) {
1775                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1776
1777                 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1778                 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1779         }
1780
1781         ip6_cork_release(cork, v6_cork);
1782 out:
1783         return skb;
1784 }
1785
1786 int ip6_send_skb(struct sk_buff *skb)
1787 {
1788         struct net *net = sock_net(skb->sk);
1789         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1790         int err;
1791
1792         err = ip6_local_out(net, skb->sk, skb);
1793         if (err) {
1794                 if (err > 0)
1795                         err = net_xmit_errno(err);
1796                 if (err)
1797                         IP6_INC_STATS(net, rt->rt6i_idev,
1798                                       IPSTATS_MIB_OUTDISCARDS);
1799         }
1800
1801         return err;
1802 }
1803
1804 int ip6_push_pending_frames(struct sock *sk)
1805 {
1806         struct sk_buff *skb;
1807
1808         skb = ip6_finish_skb(sk);
1809         if (!skb)
1810                 return 0;
1811
1812         return ip6_send_skb(skb);
1813 }
1814 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1815
1816 static void __ip6_flush_pending_frames(struct sock *sk,
1817                                        struct sk_buff_head *queue,
1818                                        struct inet_cork_full *cork,
1819                                        struct inet6_cork *v6_cork)
1820 {
1821         struct sk_buff *skb;
1822
1823         while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1824                 if (skb_dst(skb))
1825                         IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1826                                       IPSTATS_MIB_OUTDISCARDS);
1827                 kfree_skb(skb);
1828         }
1829
1830         ip6_cork_release(cork, v6_cork);
1831 }
1832
1833 void ip6_flush_pending_frames(struct sock *sk)
1834 {
1835         __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1836                                    &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1837 }
1838 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1839
1840 struct sk_buff *ip6_make_skb(struct sock *sk,
1841                              int getfrag(void *from, char *to, int offset,
1842                                          int len, int odd, struct sk_buff *skb),
1843                              void *from, int length, int transhdrlen,
1844                              struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1845                              struct rt6_info *rt, unsigned int flags,
1846                              struct inet_cork_full *cork)
1847 {
1848         struct inet6_cork v6_cork;
1849         struct sk_buff_head queue;
1850         int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1851         int err;
1852
1853         if (flags & MSG_PROBE)
1854                 return NULL;
1855
1856         __skb_queue_head_init(&queue);
1857
1858         cork->base.flags = 0;
1859         cork->base.addr = 0;
1860         cork->base.opt = NULL;
1861         cork->base.dst = NULL;
1862         v6_cork.opt = NULL;
1863         err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1864         if (err) {
1865                 ip6_cork_release(cork, &v6_cork);
1866                 return ERR_PTR(err);
1867         }
1868         if (ipc6->dontfrag < 0)
1869                 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1870
1871         err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1872                                 &current->task_frag, getfrag, from,
1873                                 length + exthdrlen, transhdrlen + exthdrlen,
1874                                 flags, ipc6);
1875         if (err) {
1876                 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1877                 return ERR_PTR(err);
1878         }
1879
1880         return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1881 }