net/ipv6/ip6_output.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  *      IPv6 output functions
   4  *      Linux INET6 implementation
   5  *
   6  *      Authors:
   7  *      Pedro Roque             <roque@di.fc.ul.pt>
   8  *
   9  *      Based on linux/net/ipv4/ip_output.c
  10  *
  11  *      Changes:
  12  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
  13  *                              extension headers are implemented.
  14  *                              route changes now work.
  15  *                              ip6_forward does not confuse sniffers.
  16  *                              etc.
  17  *
  18  *      H. von Brand    :       Added missing #include <linux/string.h>
  19  *      Imran Patel     :       frag id should be in NBO
  20  *      Kazunori MIYAZAWA @USAGI
  21  *                      :       add ip6_append_data and related functions
  22  *                              for datagram xmit
  23  */
  24
  25 #include <linux/errno.h>
  26 #include <linux/kernel.h>
  27 #include <linux/string.h>
  28 #include <linux/socket.h>
  29 #include <linux/net.h>
  30 #include <linux/netdevice.h>
  31 #include <linux/if_arp.h>
  32 #include <linux/in6.h>
  33 #include <linux/tcp.h>
  34 #include <linux/route.h>
  35 #include <linux/module.h>
  36 #include <linux/slab.h>
  37
  38 #include <linux/bpf-cgroup.h>
  39 #include <linux/netfilter.h>
  40 #include <linux/netfilter_ipv6.h>
  41
  42 #include <net/sock.h>
  43 #include <net/snmp.h>
  44
  45 #include <net/ipv6.h>
  46 #include <net/ndisc.h>
  47 #include <net/protocol.h>
  48 #include <net/ip6_route.h>
  49 #include <net/addrconf.h>
  50 #include <net/rawv6.h>
  51 #include <net/icmp.h>
  52 #include <net/xfrm.h>
  53 #include <net/checksum.h>
  54 #include <linux/mroute6.h>
  55 #include <net/l3mdev.h>
  56 #include <net/lwtunnel.h>
  57 #include <net/ip_tunnels.h>
  58
  59 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
  60 {
  61         struct dst_entry *dst = skb_dst(skb);
  62         struct net_device *dev = dst->dev;
  63         const struct in6_addr *nexthop;
  64         struct neighbour *neigh;
  65         int ret;
  66
  67         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
  68                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
  69
  70                 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
  71                     ((mroute6_is_socket(net, skb) &&
  72                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
  73                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
  74                                          &ipv6_hdr(skb)->saddr))) {
  75                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
  76
  77                         /* Do not check for IFF_ALLMULTI; multicast routing
  78                            is not supported in any case.
  79                          */
  80                         if (newskb)
  81                                 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
  82                                         net, sk, newskb, NULL, newskb->dev,
  83                                         dev_loopback_xmit);
  84
  85                         if (ipv6_hdr(skb)->hop_limit == 0) {
  86                                 IP6_INC_STATS(net, idev,
  87                                               IPSTATS_MIB_OUTDISCARDS);
  88                                 kfree_skb(skb);
  89                                 return 0;
  90                         }
  91                 }
  92
  93                 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
  94
  95                 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
  96                     IPV6_ADDR_SCOPE_NODELOCAL &&
  97                     !(dev->flags & IFF_LOOPBACK)) {
  98                         kfree_skb(skb);
  99                         return 0;
 100                 }
 101         }
 102
 103         if (lwtunnel_xmit_redirect(dst->lwtstate)) {
 104                 int res = lwtunnel_xmit(skb);
 105
 106                 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
 107                         return res;
 108         }
 109
 110         rcu_read_lock_bh();
 111         nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
 112         neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
 113         if (unlikely(!neigh))
 114                 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
 115         if (!IS_ERR(neigh)) {
 116                 sock_confirm_neigh(skb, neigh);
 117                 ret = neigh_output(neigh, skb, false);
 118                 rcu_read_unlock_bh();
 119                 return ret;
 120         }
 121         rcu_read_unlock_bh();
 122
 123         IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 124         kfree_skb(skb);
 125         return -EINVAL;
 126 }
 127
 128 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 129 {
 130 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
 131         /* Policy lookup after SNAT yielded a new policy */
 132         if (skb_dst(skb)->xfrm) {
 133                 IPCB(skb)->flags |= IPSKB_REROUTED;
 134                 return dst_output(net, sk, skb);
 135         }
 136 #endif
 137
 138         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 139             dst_allfrag(skb_dst(skb)) ||
 140             (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
 141                 return ip6_fragment(net, sk, skb, ip6_finish_output2);
 142         else
 143                 return ip6_finish_output2(net, sk, skb);
 144 }
 145
 146 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 147 {
 148         int ret;
 149
 150         ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
 151         switch (ret) {
 152         case NET_XMIT_SUCCESS:
 153                 return __ip6_finish_output(net, sk, skb);
 154         case NET_XMIT_CN:
 155                 return __ip6_finish_output(net, sk, skb) ? : ret;
 156         default:
 157                 kfree_skb(skb);
 158                 return ret;
 159         }
 160 }
 161
 162 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 163 {
 164         struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
 165         struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 166
 167         skb->protocol = htons(ETH_P_IPV6);
 168         skb->dev = dev;
 169
 170         if (unlikely(idev->cnf.disable_ipv6)) {
 171                 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 172                 kfree_skb(skb);
 173                 return 0;
 174         }
 175
 176         return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
 177                             net, sk, skb, indev, dev,
 178                             ip6_finish_output,
 179                             !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 180 }
 181
 182 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
 183 {
 184         if (!np->autoflowlabel_set)
 185                 return ip6_default_np_autolabel(net);
 186         else
 187                 return np->autoflowlabel;
 188 }
 189
 190 /*
 191  * xmit an sk_buff (used by TCP, SCTP and DCCP)
 192  * Note : socket lock is not held for SYNACK packets, but might be modified
 193  * by calls to skb_set_owner_w() and ipv6_local_error(),
 194  * which are using proper atomic operations or spinlocks.
 195  */
 196 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 197              __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
 198 {
 199         struct net *net = sock_net(sk);
 200         const struct ipv6_pinfo *np = inet6_sk(sk);
 201         struct in6_addr *first_hop = &fl6->daddr;
 202         struct dst_entry *dst = skb_dst(skb);
 203         unsigned int head_room;
 204         struct ipv6hdr *hdr;
 205         u8  proto = fl6->flowi6_proto;
 206         int seg_len = skb->len;
 207         int hlimit = -1;
 208         u32 mtu;
 209
 210         head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 211         if (opt)
 212                 head_room += opt->opt_nflen + opt->opt_flen;
 213
 214         if (unlikely(skb_headroom(skb) < head_room)) {
 215                 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 216                 if (!skb2) {
 217                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 218                                       IPSTATS_MIB_OUTDISCARDS);
 219                         kfree_skb(skb);
 220                         return -ENOBUFS;
 221                 }
 222                 if (skb->sk)
 223                         skb_set_owner_w(skb2, skb->sk);
 224                 consume_skb(skb);
 225                 skb = skb2;
 226         }
 227
 228         if (opt) {
 229                 seg_len += opt->opt_nflen + opt->opt_flen;
 230
 231                 if (opt->opt_flen)
 232                         ipv6_push_frag_opts(skb, opt, &proto);
 233
 234                 if (opt->opt_nflen)
 235                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
 236                                              &fl6->saddr);
 237         }
 238
 239         skb_push(skb, sizeof(struct ipv6hdr));
 240         skb_reset_network_header(skb);
 241         hdr = ipv6_hdr(skb);
 242
 243         /*
 244          *      Fill in the IPv6 header
 245          */
 246         if (np)
 247                 hlimit = np->hop_limit;
 248         if (hlimit < 0)
 249                 hlimit = ip6_dst_hoplimit(dst);
 250
 251         ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
 252                                 ip6_autoflowlabel(net, np), fl6));
 253
 254         hdr->payload_len = htons(seg_len);
 255         hdr->nexthdr = proto;
 256         hdr->hop_limit = hlimit;
 257
 258         hdr->saddr = fl6->saddr;
 259         hdr->daddr = *first_hop;
 260
 261         skb->protocol = htons(ETH_P_IPV6);
 262         skb->priority = priority;
 263         skb->mark = mark;
 264
 265         mtu = dst_mtu(dst);
 266         if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
 267                 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 268                               IPSTATS_MIB_OUT, skb->len);
 269
 270                 /* if egress device is enslaved to an L3 master device pass the
 271                  * skb to its handler for processing
 272                  */
 273                 skb = l3mdev_ip6_out((struct sock *)sk, skb);
 274                 if (unlikely(!skb))
 275                         return 0;
 276
 277                 /* hooks should never assume socket lock is held.
 278                  * we promote our socket to non const
 279                  */
 280                 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 281                                net, (struct sock *)sk, skb, NULL, dst->dev,
 282                                dst_output);
 283         }
 284
 285         skb->dev = dst->dev;
 286         /* ipv6_local_error() does not require socket lock,
 287          * we promote our socket to non const
 288          */
 289         ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
 290
 291         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 292         kfree_skb(skb);
 293         return -EMSGSIZE;
 294 }
 295 EXPORT_SYMBOL(ip6_xmit);
 296
 297 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 298 {
 299         struct ip6_ra_chain *ra;
 300         struct sock *last = NULL;
 301
 302         read_lock(&ip6_ra_lock);
 303         for (ra = ip6_ra_chain; ra; ra = ra->next) {
 304                 struct sock *sk = ra->sk;
 305                 if (sk && ra->sel == sel &&
 306                     (!sk->sk_bound_dev_if ||
 307                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
 308                         struct ipv6_pinfo *np = inet6_sk(sk);
 309
 310                         if (np && np->rtalert_isolate &&
 311                             !net_eq(sock_net(sk), dev_net(skb->dev))) {
 312                                 continue;
 313                         }
 314                         if (last) {
 315                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 316                                 if (skb2)
 317                                         rawv6_rcv(last, skb2);
 318                         }
 319                         last = sk;
 320                 }
 321         }
 322
 323         if (last) {
 324                 rawv6_rcv(last, skb);
 325                 read_unlock(&ip6_ra_lock);
 326                 return 1;
 327         }
 328         read_unlock(&ip6_ra_lock);
 329         return 0;
 330 }
 331
 332 static int ip6_forward_proxy_check(struct sk_buff *skb)
 333 {
 334         struct ipv6hdr *hdr = ipv6_hdr(skb);
 335         u8 nexthdr = hdr->nexthdr;
 336         __be16 frag_off;
 337         int offset;
 338
 339         if (ipv6_ext_hdr(nexthdr)) {
 340                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
 341                 if (offset < 0)
 342                         return 0;
 343         } else
 344                 offset = sizeof(struct ipv6hdr);
 345
 346         if (nexthdr == IPPROTO_ICMPV6) {
 347                 struct icmp6hdr *icmp6;
 348
 349                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
 350                                          offset + 1 - skb->data)))
 351                         return 0;
 352
 353                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 354
 355                 switch (icmp6->icmp6_type) {
 356                 case NDISC_ROUTER_SOLICITATION:
 357                 case NDISC_ROUTER_ADVERTISEMENT:
 358                 case NDISC_NEIGHBOUR_SOLICITATION:
 359                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
 360                 case NDISC_REDIRECT:
 361                         /* For reaction involving unicast neighbor discovery
 362                          * message destined to the proxied address, pass it to
 363                          * input function.
 364                          */
 365                         return 1;
 366                 default:
 367                         break;
 368                 }
 369         }
 370
 371         /*
 372          * The proxying router can't forward traffic sent to a link-local
 373          * address, so signal the sender and discard the packet. This
 374          * behavior is clarified by the MIPv6 specification.
 375          */
 376         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 377                 dst_link_failure(skb);
 378                 return -1;
 379         }
 380
 381         return 0;
 382 }
 383
 384 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
 385                                      struct sk_buff *skb)
 386 {
 387         struct dst_entry *dst = skb_dst(skb);
 388
 389         __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 390         __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
 391
 392 #ifdef CONFIG_NET_SWITCHDEV
 393         if (skb->offload_l3_fwd_mark) {
 394                 consume_skb(skb);
 395                 return 0;
 396         }
 397 #endif
 398
 399         skb->tstamp = 0;
 400         return dst_output(net, sk, skb);
 401 }
 402
 403 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 404 {
 405         if (skb->len <= mtu)
 406                 return false;
 407
 408         /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
 409         if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
 410                 return true;
 411
 412         if (skb->ignore_df)
 413                 return false;
 414
 415         if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
 416                 return false;
 417
 418         return true;
 419 }
 420
 421 int ip6_forward(struct sk_buff *skb)
 422 {
 423         struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
 424         struct dst_entry *dst = skb_dst(skb);
 425         struct ipv6hdr *hdr = ipv6_hdr(skb);
 426         struct inet6_skb_parm *opt = IP6CB(skb);
 427         struct net *net = dev_net(dst->dev);
 428         u32 mtu;
 429
 430         if (net->ipv6.devconf_all->forwarding == 0)
 431                 goto error;
 432
 433         if (skb->pkt_type != PACKET_HOST)
 434                 goto drop;
 435
 436         if (unlikely(skb->sk))
 437                 goto drop;
 438
 439         if (skb_warn_if_lro(skb))
 440                 goto drop;
 441
 442         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 443                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 444                 goto drop;
 445         }
 446
 447         skb_forward_csum(skb);
 448
 449         /*
 450          *      We DO NOT make any processing on
 451          *      RA packets, pushing them to user level AS IS
 452          *      without ane WARRANTY that application will be able
 453          *      to interpret them. The reason is that we
 454          *      cannot make anything clever here.
 455          *
 456          *      We are not end-node, so that if packet contains
 457          *      AH/ESP, we cannot make anything.
 458          *      Defragmentation also would be mistake, RA packets
 459          *      cannot be fragmented, because there is no warranty
 460          *      that different fragments will go along one path. --ANK
 461          */
 462         if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
 463                 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
 464                         return 0;
 465         }
 466
 467         /*
 468          *      check and decrement ttl
 469          */
 470         if (hdr->hop_limit <= 1) {
 471                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
 472                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 473
 474                 kfree_skb(skb);
 475                 return -ETIMEDOUT;
 476         }
 477
 478         /* XXX: idev->cnf.proxy_ndp? */
 479         if (net->ipv6.devconf_all->proxy_ndp &&
 480             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
 481                 int proxied = ip6_forward_proxy_check(skb);
 482                 if (proxied > 0)
 483                         return ip6_input(skb);
 484                 else if (proxied < 0) {
 485                         __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 486                         goto drop;
 487                 }
 488         }
 489
 490         if (!xfrm6_route_forward(skb)) {
 491                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 492                 goto drop;
 493         }
 494         dst = skb_dst(skb);
 495
 496         /* IPv6 specs say nothing about it, but it is clear that we cannot
 497            send redirects to source routed frames.
 498            We don't send redirects to frames decapsulated from IPsec.
 499          */
 500         if (IP6CB(skb)->iif == dst->dev->ifindex &&
 501             opt->srcrt == 0 && !skb_sec_path(skb)) {
 502                 struct in6_addr *target = NULL;
 503                 struct inet_peer *peer;
 504                 struct rt6_info *rt;
 505
 506                 /*
 507                  *      incoming and outgoing devices are the same
 508                  *      send a redirect.
 509                  */
 510
 511                 rt = (struct rt6_info *) dst;
 512                 if (rt->rt6i_flags & RTF_GATEWAY)
 513                         target = &rt->rt6i_gateway;
 514                 else
 515                         target = &hdr->daddr;
 516
 517                 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
 518
 519                 /* Limit redirects both by destination (here)
 520                    and by source (inside ndisc_send_redirect)
 521                  */
 522                 if (inet_peer_xrlim_allow(peer, 1*HZ))
 523                         ndisc_send_redirect(skb, target);
 524                 if (peer)
 525                         inet_putpeer(peer);
 526         } else {
 527                 int addrtype = ipv6_addr_type(&hdr->saddr);
 528
 529                 /* This check is security critical. */
 530                 if (addrtype == IPV6_ADDR_ANY ||
 531                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
 532                         goto error;
 533                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
 534                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 535                                     ICMPV6_NOT_NEIGHBOUR, 0);
 536                         goto error;
 537                 }
 538         }
 539
 540         mtu = ip6_dst_mtu_forward(dst);
 541         if (mtu < IPV6_MIN_MTU)
 542                 mtu = IPV6_MIN_MTU;
 543
 544         if (ip6_pkt_too_big(skb, mtu)) {
 545                 /* Again, force OUTPUT device used as source address */
 546                 skb->dev = dst->dev;
 547                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 548                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
 549                 __IP6_INC_STATS(net, ip6_dst_idev(dst),
 550                                 IPSTATS_MIB_FRAGFAILS);
 551                 kfree_skb(skb);
 552                 return -EMSGSIZE;
 553         }
 554
 555         if (skb_cow(skb, dst->dev->hard_header_len)) {
 556                 __IP6_INC_STATS(net, ip6_dst_idev(dst),
 557                                 IPSTATS_MIB_OUTDISCARDS);
 558                 goto drop;
 559         }
 560
 561         hdr = ipv6_hdr(skb);
 562
 563         /* Mangling hops number delayed to point after skb COW */
 564
 565         hdr->hop_limit--;
 566
 567         return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
 568                        net, NULL, skb, skb->dev, dst->dev,
 569                        ip6_forward_finish);
 570
 571 error:
 572         __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
 573 drop:
 574         kfree_skb(skb);
 575         return -EINVAL;
 576 }
 577
 578 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 579 {
 580         to->pkt_type = from->pkt_type;
 581         to->priority = from->priority;
 582         to->protocol = from->protocol;
 583         skb_dst_drop(to);
 584         skb_dst_set(to, dst_clone(skb_dst(from)));
 585         to->dev = from->dev;
 586         to->mark = from->mark;
 587
 588         skb_copy_hash(to, from);
 589
 590 #ifdef CONFIG_NET_SCHED
 591         to->tc_index = from->tc_index;
 592 #endif
 593         nf_copy(to, from);
 594         skb_ext_copy(to, from);
 595         skb_copy_secmark(to, from);
 596 }
 597
 598 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
 599                       u8 nexthdr, __be32 frag_id,
 600                       struct ip6_fraglist_iter *iter)
 601 {
 602         unsigned int first_len;
 603         struct frag_hdr *fh;
 604
 605         /* BUILD HEADER */
 606         *prevhdr = NEXTHDR_FRAGMENT;
 607         iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 608         if (!iter->tmp_hdr)
 609                 return -ENOMEM;
 610
 611         iter->frag = skb_shinfo(skb)->frag_list;
 612         skb_frag_list_init(skb);
 613
 614         iter->offset = 0;
 615         iter->hlen = hlen;
 616         iter->frag_id = frag_id;
 617         iter->nexthdr = nexthdr;
 618
 619         __skb_pull(skb, hlen);
 620         fh = __skb_push(skb, sizeof(struct frag_hdr));
 621         __skb_push(skb, hlen);
 622         skb_reset_network_header(skb);
 623         memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
 624
 625         fh->nexthdr = nexthdr;
 626         fh->reserved = 0;
 627         fh->frag_off = htons(IP6_MF);
 628         fh->identification = frag_id;
 629
 630         first_len = skb_pagelen(skb);
 631         skb->data_len = first_len - skb_headlen(skb);
 632         skb->len = first_len;
 633         ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
 634
 635         return 0;
 636 }
 637 EXPORT_SYMBOL(ip6_fraglist_init);
 638
 639 void ip6_fraglist_prepare(struct sk_buff *skb,
 640                           struct ip6_fraglist_iter *iter)
 641 {
 642         struct sk_buff *frag = iter->frag;
 643         unsigned int hlen = iter->hlen;
 644         struct frag_hdr *fh;
 645
 646         frag->ip_summed = CHECKSUM_NONE;
 647         skb_reset_transport_header(frag);
 648         fh = __skb_push(frag, sizeof(struct frag_hdr));
 649         __skb_push(frag, hlen);
 650         skb_reset_network_header(frag);
 651         memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
 652         iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
 653         fh->nexthdr = iter->nexthdr;
 654         fh->reserved = 0;
 655         fh->frag_off = htons(iter->offset);
 656         if (frag->next)
 657                 fh->frag_off |= htons(IP6_MF);
 658         fh->identification = iter->frag_id;
 659         ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
 660         ip6_copy_metadata(frag, skb);
 661 }
 662 EXPORT_SYMBOL(ip6_fraglist_prepare);
 663
 664 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
 665                    unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
 666                    u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
 667 {
 668         state->prevhdr = prevhdr;
 669         state->nexthdr = nexthdr;
 670         state->frag_id = frag_id;
 671
 672         state->hlen = hlen;
 673         state->mtu = mtu;
 674
 675         state->left = skb->len - hlen;  /* Space per frame */
 676         state->ptr = hlen;              /* Where to start from */
 677
 678         state->hroom = hdr_room;
 679         state->troom = needed_tailroom;
 680
 681         state->offset = 0;
 682 }
 683 EXPORT_SYMBOL(ip6_frag_init);
 684
 685 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
 686 {
 687         u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
 688         struct sk_buff *frag;
 689         struct frag_hdr *fh;
 690         unsigned int len;
 691
 692         len = state->left;
 693         /* IF: it doesn't fit, use 'mtu' - the data space left */
 694         if (len > state->mtu)
 695                 len = state->mtu;
 696         /* IF: we are not sending up to and including the packet end
 697            then align the next start on an eight byte boundary */
 698         if (len < state->left)
 699                 len &= ~7;
 700
 701         /* Allocate buffer */
 702         frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
 703                          state->hroom + state->troom, GFP_ATOMIC);
 704         if (!frag)
 705                 return ERR_PTR(-ENOMEM);
 706
 707         /*
 708          *      Set up data on packet
 709          */
 710
 711         ip6_copy_metadata(frag, skb);
 712         skb_reserve(frag, state->hroom);
 713         skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
 714         skb_reset_network_header(frag);
 715         fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
 716         frag->transport_header = (frag->network_header + state->hlen +
 717                                   sizeof(struct frag_hdr));
 718
 719         /*
 720          *      Charge the memory for the fragment to any owner
 721          *      it might possess
 722          */
 723         if (skb->sk)
 724                 skb_set_owner_w(frag, skb->sk);
 725
 726         /*
 727          *      Copy the packet header into the new buffer.
 728          */
 729         skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
 730
 731         fragnexthdr_offset = skb_network_header(frag);
 732         fragnexthdr_offset += prevhdr - skb_network_header(skb);
 733         *fragnexthdr_offset = NEXTHDR_FRAGMENT;
 734
 735         /*
 736          *      Build fragment header.
 737          */
 738         fh->nexthdr = state->nexthdr;
 739         fh->reserved = 0;
 740         fh->identification = state->frag_id;
 741
 742         /*
 743          *      Copy a block of the IP datagram.
 744          */
 745         BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
 746                              len));
 747         state->left -= len;
 748
 749         fh->frag_off = htons(state->offset);
 750         if (state->left > 0)
 751                 fh->frag_off |= htons(IP6_MF);
 752         ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
 753
 754         state->ptr += len;
 755         state->offset += len;
 756
 757         return frag;
 758 }
 759 EXPORT_SYMBOL(ip6_frag_next);
 760
 761 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 762                  int (*output)(struct net *, struct sock *, struct sk_buff *))
 763 {
 764         struct sk_buff *frag;
 765         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 766         struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
 767                                 inet6_sk(skb->sk) : NULL;
 768         struct ip6_frag_state state;
 769         unsigned int mtu, hlen, nexthdr_offset;
 770         ktime_t tstamp = skb->tstamp;
 771         int hroom, err = 0;
 772         __be32 frag_id;
 773         u8 *prevhdr, nexthdr = 0;
 774
 775         err = ip6_find_1stfragopt(skb, &prevhdr);
 776         if (err < 0)
 777                 goto fail;
 778         hlen = err;
 779         nexthdr = *prevhdr;
 780         nexthdr_offset = prevhdr - skb_network_header(skb);
 781
 782         mtu = ip6_skb_dst_mtu(skb);
 783
 784         /* We must not fragment if the socket is set to force MTU discovery
 785          * or if the skb it not generated by a local socket.
 786          */
 787         if (unlikely(!skb->ignore_df && skb->len > mtu))
 788                 goto fail_toobig;
 789
 790         if (IP6CB(skb)->frag_max_size) {
 791                 if (IP6CB(skb)->frag_max_size > mtu)
 792                         goto fail_toobig;
 793
 794                 /* don't send fragments larger than what we received */
 795                 mtu = IP6CB(skb)->frag_max_size;
 796                 if (mtu < IPV6_MIN_MTU)
 797                         mtu = IPV6_MIN_MTU;
 798         }
 799
 800         if (np && np->frag_size < mtu) {
 801                 if (np->frag_size)
 802                         mtu = np->frag_size;
 803         }
 804         if (mtu < hlen + sizeof(struct frag_hdr) + 8)
 805                 goto fail_toobig;
 806         mtu -= hlen + sizeof(struct frag_hdr);
 807
 808         frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
 809                                     &ipv6_hdr(skb)->saddr);
 810
 811         if (skb->ip_summed == CHECKSUM_PARTIAL &&
 812             (err = skb_checksum_help(skb)))
 813                 goto fail;
 814
 815         prevhdr = skb_network_header(skb) + nexthdr_offset;
 816         hroom = LL_RESERVED_SPACE(rt->dst.dev);
 817         if (skb_has_frag_list(skb)) {
 818                 unsigned int first_len = skb_pagelen(skb);
 819                 struct ip6_fraglist_iter iter;
 820                 struct sk_buff *frag2;
 821
 822                 if (first_len - hlen > mtu ||
 823                     ((first_len - hlen) & 7) ||
 824                     skb_cloned(skb) ||
 825                     skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
 826                         goto slow_path;
 827
 828                 skb_walk_frags(skb, frag) {
 829                         /* Correct geometry. */
 830                         if (frag->len > mtu ||
 831                             ((frag->len & 7) && frag->next) ||
 832                             skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
 833                                 goto slow_path_clean;
 834
 835                         /* Partially cloned skb? */
 836                         if (skb_shared(frag))
 837                                 goto slow_path_clean;
 838
 839                         BUG_ON(frag->sk);
 840                         if (skb->sk) {
 841                                 frag->sk = skb->sk;
 842                                 frag->destructor = sock_wfree;
 843                         }
 844                         skb->truesize -= frag->truesize;
 845                 }
 846
 847                 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
 848                                         &iter);
 849                 if (err < 0)
 850                         goto fail;
 851
 852                 for (;;) {
 853                         /* Prepare header of the next frame,
 854                          * before previous one went down. */
 855                         if (iter.frag)
 856                                 ip6_fraglist_prepare(skb, &iter);
 857
 858                         skb->tstamp = tstamp;
 859                         err = output(net, sk, skb);
 860                         if (!err)
 861                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 862                                               IPSTATS_MIB_FRAGCREATES);
 863
 864                         if (err || !iter.frag)
 865                                 break;
 866
 867                         skb = ip6_fraglist_next(&iter);
 868                 }
 869
 870                 kfree(iter.tmp_hdr);
 871
 872                 if (err == 0) {
 873                         IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 874                                       IPSTATS_MIB_FRAGOKS);
 875                         return 0;
 876                 }
 877
 878                 kfree_skb_list(iter.frag);
 879
 880                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 881                               IPSTATS_MIB_FRAGFAILS);
 882                 return err;
 883
 884 slow_path_clean:
 885                 skb_walk_frags(skb, frag2) {
 886                         if (frag2 == frag)
 887                                 break;
 888                         frag2->sk = NULL;
 889                         frag2->destructor = NULL;
 890                         skb->truesize += frag2->truesize;
 891                 }
 892         }
 893
 894 slow_path:
 895         /*
 896          *      Fragment the datagram.
 897          */
 898
 899         ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
 900                       LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
 901                       &state);
 902
 903         /*
 904          *      Keep copying data until we run out.
 905          */
 906
 907         while (state.left > 0) {
 908                 frag = ip6_frag_next(skb, &state);
 909                 if (IS_ERR(frag)) {
 910                         err = PTR_ERR(frag);
 911                         goto fail;
 912                 }
 913
 914                 /*
 915                  *      Put this fragment into the sending queue.
 916                  */
 917                 frag->tstamp = tstamp;
 918                 err = output(net, sk, frag);
 919                 if (err)
 920                         goto fail;
 921
 922                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 923                               IPSTATS_MIB_FRAGCREATES);
 924         }
 925         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 926                       IPSTATS_MIB_FRAGOKS);
 927         consume_skb(skb);
 928         return err;
 929
 930 fail_toobig:
 931         if (skb->sk && dst_allfrag(skb_dst(skb)))
 932                 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
 933
 934         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 935         err = -EMSGSIZE;
 936
 937 fail:
 938         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 939                       IPSTATS_MIB_FRAGFAILS);
 940         kfree_skb(skb);
 941         return err;
 942 }
 943
 944 static inline int ip6_rt_check(const struct rt6key *rt_key,
 945                                const struct in6_addr *fl_addr,
 946                                const struct in6_addr *addr_cache)
 947 {
 948         return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 949                 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
 950 }
 951
 952 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 953                                           struct dst_entry *dst,
 954                                           const struct flowi6 *fl6)
 955 {
 956         struct ipv6_pinfo *np = inet6_sk(sk);
 957         struct rt6_info *rt;
 958
 959         if (!dst)
 960                 goto out;
 961
 962         if (dst->ops->family != AF_INET6) {
 963                 dst_release(dst);
 964                 return NULL;
 965         }
 966
 967         rt = (struct rt6_info *)dst;
 968         /* Yes, checking route validity in not connected
 969          * case is not very simple. Take into account,
 970          * that we do not support routing by source, TOS,
 971          * and MSG_DONTROUTE            --ANK (980726)
 972          *
 973          * 1. ip6_rt_check(): If route was host route,
 974          *    check that cached destination is current.
 975          *    If it is network route, we still may
 976          *    check its validity using saved pointer
 977          *    to the last used address: daddr_cache.
 978          *    We do not want to save whole address now,
 979          *    (because main consumer of this service
 980          *    is tcp, which has not this problem),
 981          *    so that the last trick works only on connected
 982          *    sockets.
 983          * 2. oif also should be the same.
 984          */
 985         if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
 986 #ifdef CONFIG_IPV6_SUBTREES
 987             ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
 988 #endif
 989            (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
 990               (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
 991                 dst_release(dst);
 992                 dst = NULL;
 993         }
 994
 995 out:
 996         return dst;
 997 }
 998
 999 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1000                                struct dst_entry **dst, struct flowi6 *fl6)
1001 {
1002 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1003         struct neighbour *n;
1004         struct rt6_info *rt;
1005 #endif
1006         int err;
1007         int flags = 0;
1008
1009         /* The correct way to handle this would be to do
1010          * ip6_route_get_saddr, and then ip6_route_output; however,
1011          * the route-specific preferred source forces the
1012          * ip6_route_output call _before_ ip6_route_get_saddr.
1013          *
1014          * In source specific routing (no src=any default route),
1015          * ip6_route_output will fail given src=any saddr, though, so
1016          * that's why we try it again later.
1017          */
1018         if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1019                 struct fib6_info *from;
1020                 struct rt6_info *rt;
1021                 bool had_dst = *dst != NULL;
1022
1023                 if (!had_dst)
1024                         *dst = ip6_route_output(net, sk, fl6);
1025                 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1026
1027                 rcu_read_lock();
1028                 from = rt ? rcu_dereference(rt->from) : NULL;
1029                 err = ip6_route_get_saddr(net, from, &fl6->daddr,
1030                                           sk ? inet6_sk(sk)->srcprefs : 0,
1031                                           &fl6->saddr);
1032                 rcu_read_unlock();
1033
1034                 if (err)
1035                         goto out_err_release;
1036
1037                 /* If we had an erroneous initial result, pretend it
1038                  * never existed and let the SA-enabled version take
1039                  * over.
1040                  */
1041                 if (!had_dst && (*dst)->error) {
1042                         dst_release(*dst);
1043                         *dst = NULL;
1044                 }
1045
1046                 if (fl6->flowi6_oif)
1047                         flags |= RT6_LOOKUP_F_IFACE;
1048         }
1049
1050         if (!*dst)
1051                 *dst = ip6_route_output_flags(net, sk, fl6, flags);
1052
1053         err = (*dst)->error;
1054         if (err)
1055                 goto out_err_release;
1056
1057 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1058         /*
1059          * Here if the dst entry we've looked up
1060          * has a neighbour entry that is in the INCOMPLETE
1061          * state and the src address from the flow is
1062          * marked as OPTIMISTIC, we release the found
1063          * dst entry and replace it instead with the
1064          * dst entry of the nexthop router
1065          */
1066         rt = (struct rt6_info *) *dst;
1067         rcu_read_lock_bh();
1068         n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1069                                       rt6_nexthop(rt, &fl6->daddr));
1070         err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1071         rcu_read_unlock_bh();
1072
1073         if (err) {
1074                 struct inet6_ifaddr *ifp;
1075                 struct flowi6 fl_gw6;
1076                 int redirect;
1077
1078                 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1079                                       (*dst)->dev, 1);
1080
1081                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1082                 if (ifp)
1083                         in6_ifa_put(ifp);
1084
1085                 if (redirect) {
1086                         /*
1087                          * We need to get the dst entry for the
1088                          * default router instead
1089                          */
1090                         dst_release(*dst);
1091                         memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1092                         memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1093                         *dst = ip6_route_output(net, sk, &fl_gw6);
1094                         err = (*dst)->error;
1095                         if (err)
1096                                 goto out_err_release;
1097                 }
1098         }
1099 #endif
1100         if (ipv6_addr_v4mapped(&fl6->saddr) &&
1101             !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1102                 err = -EAFNOSUPPORT;
1103                 goto out_err_release;
1104         }
1105
1106         return 0;
1107
1108 out_err_release:
1109         dst_release(*dst);
1110         *dst = NULL;
1111
1112         if (err == -ENETUNREACH)
1113                 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1114         return err;
1115 }
1116
1117 /**
1118  *      ip6_dst_lookup - perform route lookup on flow
1119  *      @net: Network namespace to perform lookup in
1120  *      @sk: socket which provides route info
1121  *      @dst: pointer to dst_entry * for result
1122  *      @fl6: flow to lookup
1123  *
1124  *      This function performs a route lookup on the given flow.
1125  *
1126  *      It returns zero on success, or a standard errno code on error.
1127  */
1128 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1129                    struct flowi6 *fl6)
1130 {
1131         *dst = NULL;
1132         return ip6_dst_lookup_tail(net, sk, dst, fl6);
1133 }
1134 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1135
1136 /**
1137  *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1138  *      @net: Network namespace to perform lookup in
1139  *      @sk: socket which provides route info
1140  *      @fl6: flow to lookup
1141  *      @final_dst: final destination address for ipsec lookup
1142  *
1143  *      This function performs a route lookup on the given flow.
1144  *
1145  *      It returns a valid dst pointer on success, or a pointer encoded
1146  *      error code.
1147  */
1148 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1149                                       const struct in6_addr *final_dst)
1150 {
1151         struct dst_entry *dst = NULL;
1152         int err;
1153
1154         err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1155         if (err)
1156                 return ERR_PTR(err);
1157         if (final_dst)
1158                 fl6->daddr = *final_dst;
1159
1160         return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1161 }
1162 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1163
1164 /**
1165  *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1166  *      @sk: socket which provides the dst cache and route info
1167  *      @fl6: flow to lookup
1168  *      @final_dst: final destination address for ipsec lookup
1169  *      @connected: whether @sk is connected or not
1170  *
1171  *      This function performs a route lookup on the given flow with the
1172  *      possibility of using the cached route in the socket if it is valid.
1173  *      It will take the socket dst lock when operating on the dst cache.
1174  *      As a result, this function can only be used in process context.
1175  *
1176  *      In addition, for a connected socket, cache the dst in the socket
1177  *      if the current cache is not valid.
1178  *
1179  *      It returns a valid dst pointer on success, or a pointer encoded
1180  *      error code.
1181  */
1182 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1183                                          const struct in6_addr *final_dst,
1184                                          bool connected)
1185 {
1186         struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1187
1188         dst = ip6_sk_dst_check(sk, dst, fl6);
1189         if (dst)
1190                 return dst;
1191
1192         dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1193         if (connected && !IS_ERR(dst))
1194                 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1195
1196         return dst;
1197 }
1198 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1199
1200 /**
1201  *      ip6_dst_lookup_tunnel - perform route lookup on tunnel
1202  *      @skb: Packet for which lookup is done
1203  *      @dev: Tunnel device
1204  *      @net: Network namespace of tunnel device
1205  *      @sock: Socket which provides route info
1206  *      @saddr: Memory to store the src ip address
1207  *      @info: Tunnel information
1208  *      @protocol: IP protocol
1209  *      @use_cache: Flag to enable cache usage
1210  *      This function performs a route lookup on a tunnel
1211  *
1212  *      It returns a valid dst pointer and stores src address to be used in
1213  *      tunnel in param saddr on success, else a pointer encoded error code.
1214  */
1215
1216 struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1217                                         struct net_device *dev,
1218                                         struct net *net,
1219                                         struct socket *sock,
1220                                         struct in6_addr *saddr,
1221                                         const struct ip_tunnel_info *info,
1222                                         u8 protocol,
1223                                         bool use_cache)
1224 {
1225         struct dst_entry *dst = NULL;
1226 #ifdef CONFIG_DST_CACHE
1227         struct dst_cache *dst_cache;
1228 #endif
1229         struct flowi6 fl6;
1230         __u8 prio;
1231
1232 #ifdef CONFIG_DST_CACHE
1233         dst_cache = (struct dst_cache *)&info->dst_cache;
1234         if (use_cache) {
1235                 dst = dst_cache_get_ip6(dst_cache, saddr);
1236                 if (dst)
1237                         return dst;
1238         }
1239 #endif
1240         memset(&fl6, 0, sizeof(fl6));
1241         fl6.flowi6_mark = skb->mark;
1242         fl6.flowi6_proto = protocol;
1243         fl6.daddr = info->key.u.ipv6.dst;
1244         fl6.saddr = info->key.u.ipv6.src;
1245         prio = info->key.tos;
1246         fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio),
1247                                           info->key.label);
1248
1249         dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1250                                               NULL);
1251         if (IS_ERR(dst)) {
1252                 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1253                 return ERR_PTR(-ENETUNREACH);
1254         }
1255         if (dst->dev == dev) { /* is this necessary? */
1256                 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1257                 dst_release(dst);
1258                 return ERR_PTR(-ELOOP);
1259         }
1260 #ifdef CONFIG_DST_CACHE
1261         if (use_cache)
1262                 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1263 #endif
1264         *saddr = fl6.saddr;
1265         return dst;
1266 }
1267 EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1268
1269 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1270                                                gfp_t gfp)
1271 {
1272         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1273 }
1274
1275 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1276                                                 gfp_t gfp)
1277 {
1278         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1279 }
1280
1281 static void ip6_append_data_mtu(unsigned int *mtu,
1282                                 int *maxfraglen,
1283                                 unsigned int fragheaderlen,
1284                                 struct sk_buff *skb,
1285                                 struct rt6_info *rt,
1286                                 unsigned int orig_mtu)
1287 {
1288         if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1289                 if (!skb) {
1290                         /* first fragment, reserve header_len */
1291                         *mtu = orig_mtu - rt->dst.header_len;
1292
1293                 } else {
1294                         /*
1295                          * this fragment is not first, the headers
1296                          * space is regarded as data space.
1297                          */
1298                         *mtu = orig_mtu;
1299                 }
1300                 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1301                               + fragheaderlen - sizeof(struct frag_hdr);
1302         }
1303 }
1304
1305 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1306                           struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1307                           struct rt6_info *rt, struct flowi6 *fl6)
1308 {
1309         struct ipv6_pinfo *np = inet6_sk(sk);
1310         unsigned int mtu;
1311         struct ipv6_txoptions *opt = ipc6->opt;
1312
1313         /*
1314          * setup for corking
1315          */
1316         if (opt) {
1317                 if (WARN_ON(v6_cork->opt))
1318                         return -EINVAL;
1319
1320                 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1321                 if (unlikely(!v6_cork->opt))
1322                         return -ENOBUFS;
1323
1324                 v6_cork->opt->tot_len = sizeof(*opt);
1325                 v6_cork->opt->opt_flen = opt->opt_flen;
1326                 v6_cork->opt->opt_nflen = opt->opt_nflen;
1327
1328                 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1329                                                     sk->sk_allocation);
1330                 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1331                         return -ENOBUFS;
1332
1333                 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1334                                                     sk->sk_allocation);
1335                 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1336                         return -ENOBUFS;
1337
1338                 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1339                                                    sk->sk_allocation);
1340                 if (opt->hopopt && !v6_cork->opt->hopopt)
1341                         return -ENOBUFS;
1342
1343                 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1344                                                     sk->sk_allocation);
1345                 if (opt->srcrt && !v6_cork->opt->srcrt)
1346                         return -ENOBUFS;
1347
1348                 /* need source address above miyazawa*/
1349         }
1350         dst_hold(&rt->dst);
1351         cork->base.dst = &rt->dst;
1352         cork->fl.u.ip6 = *fl6;
1353         v6_cork->hop_limit = ipc6->hlimit;
1354         v6_cork->tclass = ipc6->tclass;
1355         if (rt->dst.flags & DST_XFRM_TUNNEL)
1356                 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1357                       READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1358         else
1359                 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1360                         READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1361         if (np->frag_size < mtu) {
1362                 if (np->frag_size)
1363                         mtu = np->frag_size;
1364         }
1365         if (mtu < IPV6_MIN_MTU)
1366                 return -EINVAL;
1367         cork->base.fragsize = mtu;
1368         cork->base.gso_size = ipc6->gso_size;
1369         cork->base.tx_flags = 0;
1370         cork->base.mark = ipc6->sockc.mark;
1371         sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1372
1373         if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1374                 cork->base.flags |= IPCORK_ALLFRAG;
1375         cork->base.length = 0;
1376
1377         cork->base.transmit_time = ipc6->sockc.transmit_time;
1378
1379         return 0;
1380 }
1381
1382 static int __ip6_append_data(struct sock *sk,
1383                              struct flowi6 *fl6,
1384                              struct sk_buff_head *queue,
1385                              struct inet_cork *cork,
1386                              struct inet6_cork *v6_cork,
1387                              struct page_frag *pfrag,
1388                              int getfrag(void *from, char *to, int offset,
1389                                          int len, int odd, struct sk_buff *skb),
1390                              void *from, int length, int transhdrlen,
1391                              unsigned int flags, struct ipcm6_cookie *ipc6)
1392 {
1393         struct sk_buff *skb, *skb_prev = NULL;
1394         unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1395         struct ubuf_info *uarg = NULL;
1396         int exthdrlen = 0;
1397         int dst_exthdrlen = 0;
1398         int hh_len;
1399         int copy;
1400         int err;
1401         int offset = 0;
1402         u32 tskey = 0;
1403         struct rt6_info *rt = (struct rt6_info *)cork->dst;
1404         struct ipv6_txoptions *opt = v6_cork->opt;
1405         int csummode = CHECKSUM_NONE;
1406         unsigned int maxnonfragsize, headersize;
1407         unsigned int wmem_alloc_delta = 0;
1408         bool paged, extra_uref = false;
1409
1410         skb = skb_peek_tail(queue);
1411         if (!skb) {
1412                 exthdrlen = opt ? opt->opt_flen : 0;
1413                 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1414         }
1415
1416         paged = !!cork->gso_size;
1417         mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1418         orig_mtu = mtu;
1419
1420         if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1421             sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1422                 tskey = sk->sk_tskey++;
1423
1424         hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1425
1426         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1427                         (opt ? opt->opt_nflen : 0);
1428         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1429                      sizeof(struct frag_hdr);
1430
1431         headersize = sizeof(struct ipv6hdr) +
1432                      (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1433                      (dst_allfrag(&rt->dst) ?
1434                       sizeof(struct frag_hdr) : 0) +
1435                      rt->rt6i_nfheader_len;
1436
1437         /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1438          * the first fragment
1439          */
1440         if (headersize + transhdrlen > mtu)
1441                 goto emsgsize;
1442
1443         if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1444             (sk->sk_protocol == IPPROTO_UDP ||
1445              sk->sk_protocol == IPPROTO_RAW)) {
1446                 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1447                                 sizeof(struct ipv6hdr));
1448                 goto emsgsize;
1449         }
1450
1451         if (ip6_sk_ignore_df(sk))
1452                 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1453         else
1454                 maxnonfragsize = mtu;
1455
1456         if (cork->length + length > maxnonfragsize - headersize) {
1457 emsgsize:
1458                 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1459                 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1460                 return -EMSGSIZE;
1461         }
1462
1463         /* CHECKSUM_PARTIAL only with no extension headers and when
1464          * we are not going to fragment
1465          */
1466         if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1467             headersize == sizeof(struct ipv6hdr) &&
1468             length <= mtu - headersize &&
1469             (!(flags & MSG_MORE) || cork->gso_size) &&
1470             rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1471                 csummode = CHECKSUM_PARTIAL;
1472
1473         if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1474                 uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1475                 if (!uarg)
1476                         return -ENOBUFS;
1477                 extra_uref = !skb_zcopy(skb);   /* only ref on new uarg */
1478                 if (rt->dst.dev->features & NETIF_F_SG &&
1479                     csummode == CHECKSUM_PARTIAL) {
1480                         paged = true;
1481                 } else {
1482                         uarg->zerocopy = 0;
1483                         skb_zcopy_set(skb, uarg, &extra_uref);
1484                 }
1485         }
1486
1487         /*
1488          * Let's try using as much space as possible.
1489          * Use MTU if total length of the message fits into the MTU.
1490          * Otherwise, we need to reserve fragment header and
1491          * fragment alignment (= 8-15 octects, in total).
1492          *
1493          * Note that we may need to "move" the data from the tail
1494          * of the buffer to the new fragment when we split
1495          * the message.
1496          *
1497          * FIXME: It may be fragmented into multiple chunks
1498          *        at once if non-fragmentable extension headers
1499          *        are too large.
1500          * --yoshfuji
1501          */
1502
1503         cork->length += length;
1504         if (!skb)
1505                 goto alloc_new_skb;
1506
1507         while (length > 0) {
1508                 /* Check if the remaining data fits into current packet. */
1509                 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1510                 if (copy < length)
1511                         copy = maxfraglen - skb->len;
1512
1513                 if (copy <= 0) {
1514                         char *data;
1515                         unsigned int datalen;
1516                         unsigned int fraglen;
1517                         unsigned int fraggap;
1518                         unsigned int alloclen;
1519                         unsigned int pagedlen;
1520 alloc_new_skb:
1521                         /* There's no room in the current skb */
1522                         if (skb)
1523                                 fraggap = skb->len - maxfraglen;
1524                         else
1525                                 fraggap = 0;
1526                         /* update mtu and maxfraglen if necessary */
1527                         if (!skb || !skb_prev)
1528                                 ip6_append_data_mtu(&mtu, &maxfraglen,
1529                                                     fragheaderlen, skb, rt,
1530                                                     orig_mtu);
1531
1532                         skb_prev = skb;
1533
1534                         /*
1535                          * If remaining data exceeds the mtu,
1536                          * we know we need more fragment(s).
1537                          */
1538                         datalen = length + fraggap;
1539
1540                         if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1541                                 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1542                         fraglen = datalen + fragheaderlen;
1543                         pagedlen = 0;
1544
1545                         if ((flags & MSG_MORE) &&
1546                             !(rt->dst.dev->features&NETIF_F_SG))
1547                                 alloclen = mtu;
1548                         else if (!paged)
1549                                 alloclen = fraglen;
1550                         else {
1551                                 alloclen = min_t(int, fraglen, MAX_HEADER);
1552                                 pagedlen = fraglen - alloclen;
1553                         }
1554
1555                         alloclen += dst_exthdrlen;
1556
1557                         if (datalen != length + fraggap) {
1558                                 /*
1559                                  * this is not the last fragment, the trailer
1560                                  * space is regarded as data space.
1561                                  */
1562                                 datalen += rt->dst.trailer_len;
1563                         }
1564
1565                         alloclen += rt->dst.trailer_len;
1566                         fraglen = datalen + fragheaderlen;
1567
1568                         /*
1569                          * We just reserve space for fragment header.
1570                          * Note: this may be overallocation if the message
1571                          * (without MSG_MORE) fits into the MTU.
1572                          */
1573                         alloclen += sizeof(struct frag_hdr);
1574
1575                         copy = datalen - transhdrlen - fraggap - pagedlen;
1576                         if (copy < 0) {
1577                                 err = -EINVAL;
1578                                 goto error;
1579                         }
1580                         if (transhdrlen) {
1581                                 skb = sock_alloc_send_skb(sk,
1582                                                 alloclen + hh_len,
1583                                                 (flags & MSG_DONTWAIT), &err);
1584                         } else {
1585                                 skb = NULL;
1586                                 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1587                                     2 * sk->sk_sndbuf)
1588                                         skb = alloc_skb(alloclen + hh_len,
1589                                                         sk->sk_allocation);
1590                                 if (unlikely(!skb))
1591                                         err = -ENOBUFS;
1592                         }
1593                         if (!skb)
1594                                 goto error;
1595                         /*
1596                          *      Fill in the control structures
1597                          */
1598                         skb->protocol = htons(ETH_P_IPV6);
1599                         skb->ip_summed = csummode;
1600                         skb->csum = 0;
1601                         /* reserve for fragmentation and ipsec header */
1602                         skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1603                                     dst_exthdrlen);
1604
1605                         /*
1606                          *      Find where to start putting bytes
1607                          */
1608                         data = skb_put(skb, fraglen - pagedlen);
1609                         skb_set_network_header(skb, exthdrlen);
1610                         data += fragheaderlen;
1611                         skb->transport_header = (skb->network_header +
1612                                                  fragheaderlen);
1613                         if (fraggap) {
1614                                 skb->csum = skb_copy_and_csum_bits(
1615                                         skb_prev, maxfraglen,
1616                                         data + transhdrlen, fraggap);
1617                                 skb_prev->csum = csum_sub(skb_prev->csum,
1618                                                           skb->csum);
1619                                 data += fraggap;
1620                                 pskb_trim_unique(skb_prev, maxfraglen);
1621                         }
1622                         if (copy > 0 &&
1623                             getfrag(from, data + transhdrlen, offset,
1624                                     copy, fraggap, skb) < 0) {
1625                                 err = -EFAULT;
1626                                 kfree_skb(skb);
1627                                 goto error;
1628                         }
1629
1630                         offset += copy;
1631                         length -= copy + transhdrlen;
1632                         transhdrlen = 0;
1633                         exthdrlen = 0;
1634                         dst_exthdrlen = 0;
1635
1636                         /* Only the initial fragment is time stamped */
1637                         skb_shinfo(skb)->tx_flags = cork->tx_flags;
1638                         cork->tx_flags = 0;
1639                         skb_shinfo(skb)->tskey = tskey;
1640                         tskey = 0;
1641                         skb_zcopy_set(skb, uarg, &extra_uref);
1642
1643                         if ((flags & MSG_CONFIRM) && !skb_prev)
1644                                 skb_set_dst_pending_confirm(skb, 1);
1645
1646                         /*
1647                          * Put the packet on the pending queue
1648                          */
1649                         if (!skb->destructor) {
1650                                 skb->destructor = sock_wfree;
1651                                 skb->sk = sk;
1652                                 wmem_alloc_delta += skb->truesize;
1653                         }
1654                         __skb_queue_tail(queue, skb);
1655                         continue;
1656                 }
1657
1658                 if (copy > length)
1659                         copy = length;
1660
1661                 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1662                     skb_tailroom(skb) >= copy) {
1663                         unsigned int off;
1664
1665                         off = skb->len;
1666                         if (getfrag(from, skb_put(skb, copy),
1667                                                 offset, copy, off, skb) < 0) {
1668                                 __skb_trim(skb, off);
1669                                 err = -EFAULT;
1670                                 goto error;
1671                         }
1672                 } else if (!uarg || !uarg->zerocopy) {
1673                         int i = skb_shinfo(skb)->nr_frags;
1674
1675                         err = -ENOMEM;
1676                         if (!sk_page_frag_refill(sk, pfrag))
1677                                 goto error;
1678
1679                         if (!skb_can_coalesce(skb, i, pfrag->page,
1680                                               pfrag->offset)) {
1681                                 err = -EMSGSIZE;
1682                                 if (i == MAX_SKB_FRAGS)
1683                                         goto error;
1684
1685                                 __skb_fill_page_desc(skb, i, pfrag->page,
1686                                                      pfrag->offset, 0);
1687                                 skb_shinfo(skb)->nr_frags = ++i;
1688                                 get_page(pfrag->page);
1689                         }
1690                         copy = min_t(int, copy, pfrag->size - pfrag->offset);
1691                         if (getfrag(from,
1692                                     page_address(pfrag->page) + pfrag->offset,
1693                                     offset, copy, skb->len, skb) < 0)
1694                                 goto error_efault;
1695
1696                         pfrag->offset += copy;
1697                         skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1698                         skb->len += copy;
1699                         skb->data_len += copy;
1700                         skb->truesize += copy;
1701                         wmem_alloc_delta += copy;
1702                 } else {
1703                         err = skb_zerocopy_iter_dgram(skb, from, copy);
1704                         if (err < 0)
1705                                 goto error;
1706                 }
1707                 offset += copy;
1708                 length -= copy;
1709         }
1710
1711         if (wmem_alloc_delta)
1712                 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1713         return 0;
1714
1715 error_efault:
1716         err = -EFAULT;
1717 error:
1718         if (uarg)
1719                 sock_zerocopy_put_abort(uarg, extra_uref);
1720         cork->length -= length;
1721         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1722         refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1723         return err;
1724 }
1725
1726 int ip6_append_data(struct sock *sk,
1727                     int getfrag(void *from, char *to, int offset, int len,
1728                                 int odd, struct sk_buff *skb),
1729                     void *from, int length, int transhdrlen,
1730                     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1731                     struct rt6_info *rt, unsigned int flags)
1732 {
1733         struct inet_sock *inet = inet_sk(sk);
1734         struct ipv6_pinfo *np = inet6_sk(sk);
1735         int exthdrlen;
1736         int err;
1737
1738         if (flags&MSG_PROBE)
1739                 return 0;
1740         if (skb_queue_empty(&sk->sk_write_queue)) {
1741                 /*
1742                  * setup for corking
1743                  */
1744                 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1745                                      ipc6, rt, fl6);
1746                 if (err)
1747                         return err;
1748
1749                 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1750                 length += exthdrlen;
1751                 transhdrlen += exthdrlen;
1752         } else {
1753                 fl6 = &inet->cork.fl.u.ip6;
1754                 transhdrlen = 0;
1755         }
1756
1757         return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1758                                  &np->cork, sk_page_frag(sk), getfrag,
1759                                  from, length, transhdrlen, flags, ipc6);
1760 }
1761 EXPORT_SYMBOL_GPL(ip6_append_data);
1762
1763 static void ip6_cork_release(struct inet_cork_full *cork,
1764                              struct inet6_cork *v6_cork)
1765 {
1766         if (v6_cork->opt) {
1767                 kfree(v6_cork->opt->dst0opt);
1768                 kfree(v6_cork->opt->dst1opt);
1769                 kfree(v6_cork->opt->hopopt);
1770                 kfree(v6_cork->opt->srcrt);
1771                 kfree(v6_cork->opt);
1772                 v6_cork->opt = NULL;
1773         }
1774
1775         if (cork->base.dst) {
1776                 dst_release(cork->base.dst);
1777                 cork->base.dst = NULL;
1778                 cork->base.flags &= ~IPCORK_ALLFRAG;
1779         }
1780         memset(&cork->fl, 0, sizeof(cork->fl));
1781 }
1782
1783 struct sk_buff *__ip6_make_skb(struct sock *sk,
1784                                struct sk_buff_head *queue,
1785                                struct inet_cork_full *cork,
1786                                struct inet6_cork *v6_cork)
1787 {
1788         struct sk_buff *skb, *tmp_skb;
1789         struct sk_buff **tail_skb;
1790         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1791         struct ipv6_pinfo *np = inet6_sk(sk);
1792         struct net *net = sock_net(sk);
1793         struct ipv6hdr *hdr;
1794         struct ipv6_txoptions *opt = v6_cork->opt;
1795         struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1796         struct flowi6 *fl6 = &cork->fl.u.ip6;
1797         unsigned char proto = fl6->flowi6_proto;
1798
1799         skb = __skb_dequeue(queue);
1800         if (!skb)
1801                 goto out;
1802         tail_skb = &(skb_shinfo(skb)->frag_list);
1803
1804         /* move skb->data to ip header from ext header */
1805         if (skb->data < skb_network_header(skb))
1806                 __skb_pull(skb, skb_network_offset(skb));
1807         while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1808                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1809                 *tail_skb = tmp_skb;
1810                 tail_skb = &(tmp_skb->next);
1811                 skb->len += tmp_skb->len;
1812                 skb->data_len += tmp_skb->len;
1813                 skb->truesize += tmp_skb->truesize;
1814                 tmp_skb->destructor = NULL;
1815                 tmp_skb->sk = NULL;
1816         }
1817
1818         /* Allow local fragmentation. */
1819         skb->ignore_df = ip6_sk_ignore_df(sk);
1820
1821         *final_dst = fl6->daddr;
1822         __skb_pull(skb, skb_network_header_len(skb));
1823         if (opt && opt->opt_flen)
1824                 ipv6_push_frag_opts(skb, opt, &proto);
1825         if (opt && opt->opt_nflen)
1826                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1827
1828         skb_push(skb, sizeof(struct ipv6hdr));
1829         skb_reset_network_header(skb);
1830         hdr = ipv6_hdr(skb);
1831
1832         ip6_flow_hdr(hdr, v6_cork->tclass,
1833                      ip6_make_flowlabel(net, skb, fl6->flowlabel,
1834                                         ip6_autoflowlabel(net, np), fl6));
1835         hdr->hop_limit = v6_cork->hop_limit;
1836         hdr->nexthdr = proto;
1837         hdr->saddr = fl6->saddr;
1838         hdr->daddr = *final_dst;
1839
1840         skb->priority = sk->sk_priority;
1841         skb->mark = cork->base.mark;
1842
1843         skb->tstamp = cork->base.transmit_time;
1844
1845         skb_dst_set(skb, dst_clone(&rt->dst));
1846         IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1847         if (proto == IPPROTO_ICMPV6) {
1848                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1849
1850                 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1851                 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1852         }
1853
1854         ip6_cork_release(cork, v6_cork);
1855 out:
1856         return skb;
1857 }
1858
1859 int ip6_send_skb(struct sk_buff *skb)
1860 {
1861         struct net *net = sock_net(skb->sk);
1862         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1863         int err;
1864
1865         err = ip6_local_out(net, skb->sk, skb);
1866         if (err) {
1867                 if (err > 0)
1868                         err = net_xmit_errno(err);
1869                 if (err)
1870                         IP6_INC_STATS(net, rt->rt6i_idev,
1871                                       IPSTATS_MIB_OUTDISCARDS);
1872         }
1873
1874         return err;
1875 }
1876
1877 int ip6_push_pending_frames(struct sock *sk)
1878 {
1879         struct sk_buff *skb;
1880
1881         skb = ip6_finish_skb(sk);
1882         if (!skb)
1883                 return 0;
1884
1885         return ip6_send_skb(skb);
1886 }
1887 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1888
1889 static void __ip6_flush_pending_frames(struct sock *sk,
1890                                        struct sk_buff_head *queue,
1891                                        struct inet_cork_full *cork,
1892                                        struct inet6_cork *v6_cork)
1893 {
1894         struct sk_buff *skb;
1895
1896         while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1897                 if (skb_dst(skb))
1898                         IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1899                                       IPSTATS_MIB_OUTDISCARDS);
1900                 kfree_skb(skb);
1901         }
1902
1903         ip6_cork_release(cork, v6_cork);
1904 }
1905
1906 void ip6_flush_pending_frames(struct sock *sk)
1907 {
1908         __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1909                                    &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1910 }
1911 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1912
1913 struct sk_buff *ip6_make_skb(struct sock *sk,
1914                              int getfrag(void *from, char *to, int offset,
1915                                          int len, int odd, struct sk_buff *skb),
1916                              void *from, int length, int transhdrlen,
1917                              struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1918                              struct rt6_info *rt, unsigned int flags,
1919                              struct inet_cork_full *cork)
1920 {
1921         struct inet6_cork v6_cork;
1922         struct sk_buff_head queue;
1923         int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1924         int err;
1925
1926         if (flags & MSG_PROBE)
1927                 return NULL;
1928
1929         __skb_queue_head_init(&queue);
1930
1931         cork->base.flags = 0;
1932         cork->base.addr = 0;
1933         cork->base.opt = NULL;
1934         cork->base.dst = NULL;
1935         v6_cork.opt = NULL;
1936         err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1937         if (err) {
1938                 ip6_cork_release(cork, &v6_cork);
1939                 return ERR_PTR(err);
1940         }
1941         if (ipc6->dontfrag < 0)
1942                 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1943
1944         err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1945                                 &current->task_frag, getfrag, from,
1946                                 length + exthdrlen, transhdrlen + exthdrlen,
1947                                 flags, ipc6);
1948         if (err) {
1949                 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1950                 return ERR_PTR(err);
1951         }
1952
1953         return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1954 }