net/ipv6/icmp.c

   1 /*
   2  *      Internet Control Message Protocol (ICMPv6)
   3  *      Linux INET6 implementation
   4  *
   5  *      Authors:
   6  *      Pedro Roque             <roque@di.fc.ul.pt>
   7  *
   8  *      Based on net/ipv4/icmp.c
   9  *
  10  *      RFC 1885
  11  *
  12  *      This program is free software; you can redistribute it and/or
  13  *      modify it under the terms of the GNU General Public License
  14  *      as published by the Free Software Foundation; either version
  15  *      2 of the License, or (at your option) any later version.
  16  */
  17
  18 /*
  19  *      Changes:
  20  *
  21  *      Andi Kleen              :       exception handling
  22  *      Andi Kleen                      add rate limits. never reply to a icmp.
  23  *                                      add more length checks and other fixes.
  24  *      yoshfuji                :       ensure to sent parameter problem for
  25  *                                      fragments.
  26  *      YOSHIFUJI Hideaki @USAGI:       added sysctl for icmp rate limit.
  27  *      Randy Dunlap and
  28  *      YOSHIFUJI Hideaki @USAGI:       Per-interface statistics support
  29  *      Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
  30  */
  31
  32 #define pr_fmt(fmt) "IPv6: " fmt
  33
  34 #include <linux/module.h>
  35 #include <linux/errno.h>
  36 #include <linux/types.h>
  37 #include <linux/socket.h>
  38 #include <linux/in.h>
  39 #include <linux/kernel.h>
  40 #include <linux/sockios.h>
  41 #include <linux/net.h>
  42 #include <linux/skbuff.h>
  43 #include <linux/init.h>
  44 #include <linux/netfilter.h>
  45 #include <linux/slab.h>
  46
  47 #ifdef CONFIG_SYSCTL
  48 #include <linux/sysctl.h>
  49 #endif
  50
  51 #include <linux/inet.h>
  52 #include <linux/netdevice.h>
  53 #include <linux/icmpv6.h>
  54
  55 #include <net/ip.h>
  56 #include <net/sock.h>
  57
  58 #include <net/ipv6.h>
  59 #include <net/ip6_checksum.h>
  60 #include <net/ping.h>
  61 #include <net/protocol.h>
  62 #include <net/raw.h>
  63 #include <net/rawv6.h>
  64 #include <net/transp_v6.h>
  65 #include <net/ip6_route.h>
  66 #include <net/addrconf.h>
  67 #include <net/icmp.h>
  68 #include <net/xfrm.h>
  69 #include <net/inet_common.h>
  70 #include <net/dsfield.h>
  71 #include <net/l3mdev.h>
  72
  73 #include <linux/uaccess.h>
  74
  75 /*
  76  *      The ICMP socket(s). This is the most convenient way to flow control
  77  *      our ICMP output as well as maintain a clean interface throughout
  78  *      all layers. All Socketless IP sends will soon be gone.
  79  *
  80  *      On SMP we have one ICMP socket per-cpu.
  81  */
  82 static inline struct sock *icmpv6_sk(struct net *net)
  83 {
  84         return *this_cpu_ptr(net->ipv6.icmp_sk);
  85 }
  86
  87 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
  88                        u8 type, u8 code, int offset, __be32 info)
  89 {
  90         /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
  91         struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
  92         struct net *net = dev_net(skb->dev);
  93
  94         if (type == ICMPV6_PKT_TOOBIG)
  95                 ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
  96         else if (type == NDISC_REDIRECT)
  97                 ip6_redirect(skb, net, skb->dev->ifindex, 0,
  98                              sock_net_uid(net, NULL));
  99
 100         if (!(type & ICMPV6_INFOMSG_MASK))
 101                 if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
 102                         ping_err(skb, offset, ntohl(info));
 103
 104         return 0;
 105 }
 106
 107 static int icmpv6_rcv(struct sk_buff *skb);
 108
 109 static const struct inet6_protocol icmpv6_protocol = {
 110         .handler        =       icmpv6_rcv,
 111         .err_handler    =       icmpv6_err,
 112         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
 113 };
 114
 115 /* Called with BH disabled */
 116 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
 117 {
 118         struct sock *sk;
 119
 120         sk = icmpv6_sk(net);
 121         if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
 122                 /* This can happen if the output path (f.e. SIT or
 123                  * ip6ip6 tunnel) signals dst_link_failure() for an
 124                  * outgoing ICMP6 packet.
 125                  */
 126                 return NULL;
 127         }
 128         return sk;
 129 }
 130
 131 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
 132 {
 133         spin_unlock(&sk->sk_lock.slock);
 134 }
 135
 136 /*
 137  * Figure out, may we reply to this packet with icmp error.
 138  *
 139  * We do not reply, if:
 140  *      - it was icmp error message.
 141  *      - it is truncated, so that it is known, that protocol is ICMPV6
 142  *        (i.e. in the middle of some exthdr)
 143  *
 144  *      --ANK (980726)
 145  */
 146
 147 static bool is_ineligible(const struct sk_buff *skb)
 148 {
 149         int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
 150         int len = skb->len - ptr;
 151         __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
 152         __be16 frag_off;
 153
 154         if (len < 0)
 155                 return true;
 156
 157         ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
 158         if (ptr < 0)
 159                 return false;
 160         if (nexthdr == IPPROTO_ICMPV6) {
 161                 u8 _type, *tp;
 162                 tp = skb_header_pointer(skb,
 163                         ptr+offsetof(struct icmp6hdr, icmp6_type),
 164                         sizeof(_type), &_type);
 165                 if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
 166                         return true;
 167         }
 168         return false;
 169 }
 170
 171 static bool icmpv6_mask_allow(int type)
 172 {
 173         /* Informational messages are not limited. */
 174         if (type & ICMPV6_INFOMSG_MASK)
 175                 return true;
 176
 177         /* Do not limit pmtu discovery, it would break it. */
 178         if (type == ICMPV6_PKT_TOOBIG)
 179                 return true;
 180
 181         return false;
 182 }
 183
 184 static bool icmpv6_global_allow(int type)
 185 {
 186         if (icmpv6_mask_allow(type))
 187                 return true;
 188
 189         if (icmp_global_allow())
 190                 return true;
 191
 192         return false;
 193 }
 194
 195 /*
 196  * Check the ICMP output rate limit
 197  */
 198 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
 199                                struct flowi6 *fl6)
 200 {
 201         struct net *net = sock_net(sk);
 202         struct dst_entry *dst;
 203         bool res = false;
 204
 205         if (icmpv6_mask_allow(type))
 206                 return true;
 207
 208         /*
 209          * Look up the output route.
 210          * XXX: perhaps the expire for routing entries cloned by
 211          * this lookup should be more aggressive (not longer than timeout).
 212          */
 213         dst = ip6_route_output(net, sk, fl6);
 214         if (dst->error) {
 215                 IP6_INC_STATS(net, ip6_dst_idev(dst),
 216                               IPSTATS_MIB_OUTNOROUTES);
 217         } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
 218                 res = true;
 219         } else {
 220                 struct rt6_info *rt = (struct rt6_info *)dst;
 221                 int tmo = net->ipv6.sysctl.icmpv6_time;
 222                 struct inet_peer *peer;
 223
 224                 /* Give more bandwidth to wider prefixes. */
 225                 if (rt->rt6i_dst.plen < 128)
 226                         tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
 227
 228                 peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
 229                 res = inet_peer_xrlim_allow(peer, tmo);
 230                 if (peer)
 231                         inet_putpeer(peer);
 232         }
 233         dst_release(dst);
 234         return res;
 235 }
 236
 237 /*
 238  *      an inline helper for the "simple" if statement below
 239  *      checks if parameter problem report is caused by an
 240  *      unrecognized IPv6 option that has the Option Type
 241  *      highest-order two bits set to 10
 242  */
 243
 244 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
 245 {
 246         u8 _optval, *op;
 247
 248         offset += skb_network_offset(skb);
 249         op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
 250         if (!op)
 251                 return true;
 252         return (*op & 0xC0) == 0x80;
 253 }
 254
 255 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
 256                                 struct icmp6hdr *thdr, int len)
 257 {
 258         struct sk_buff *skb;
 259         struct icmp6hdr *icmp6h;
 260
 261         skb = skb_peek(&sk->sk_write_queue);
 262         if (!skb)
 263                 return;
 264
 265         icmp6h = icmp6_hdr(skb);
 266         memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
 267         icmp6h->icmp6_cksum = 0;
 268
 269         if (skb_queue_len(&sk->sk_write_queue) == 1) {
 270                 skb->csum = csum_partial(icmp6h,
 271                                         sizeof(struct icmp6hdr), skb->csum);
 272                 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
 273                                                       &fl6->daddr,
 274                                                       len, fl6->flowi6_proto,
 275                                                       skb->csum);
 276         } else {
 277                 __wsum tmp_csum = 0;
 278
 279                 skb_queue_walk(&sk->sk_write_queue, skb) {
 280                         tmp_csum = csum_add(tmp_csum, skb->csum);
 281                 }
 282
 283                 tmp_csum = csum_partial(icmp6h,
 284                                         sizeof(struct icmp6hdr), tmp_csum);
 285                 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
 286                                                       &fl6->daddr,
 287                                                       len, fl6->flowi6_proto,
 288                                                       tmp_csum);
 289         }
 290         ip6_push_pending_frames(sk);
 291 }
 292
 293 struct icmpv6_msg {
 294         struct sk_buff  *skb;
 295         int             offset;
 296         uint8_t         type;
 297 };
 298
 299 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
 300 {
 301         struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
 302         struct sk_buff *org_skb = msg->skb;
 303         __wsum csum = 0;
 304
 305         csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
 306                                       to, len, csum);
 307         skb->csum = csum_block_add(skb->csum, csum, odd);
 308         if (!(msg->type & ICMPV6_INFOMSG_MASK))
 309                 nf_ct_attach(skb, org_skb);
 310         return 0;
 311 }
 312
 313 #if IS_ENABLED(CONFIG_IPV6_MIP6)
 314 static void mip6_addr_swap(struct sk_buff *skb)
 315 {
 316         struct ipv6hdr *iph = ipv6_hdr(skb);
 317         struct inet6_skb_parm *opt = IP6CB(skb);
 318         struct ipv6_destopt_hao *hao;
 319         struct in6_addr tmp;
 320         int off;
 321
 322         if (opt->dsthao) {
 323                 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
 324                 if (likely(off >= 0)) {
 325                         hao = (struct ipv6_destopt_hao *)
 326                                         (skb_network_header(skb) + off);
 327                         tmp = iph->saddr;
 328                         iph->saddr = hao->addr;
 329                         hao->addr = tmp;
 330                 }
 331         }
 332 }
 333 #else
 334 static inline void mip6_addr_swap(struct sk_buff *skb) {}
 335 #endif
 336
 337 static struct dst_entry *icmpv6_route_lookup(struct net *net,
 338                                              struct sk_buff *skb,
 339                                              struct sock *sk,
 340                                              struct flowi6 *fl6)
 341 {
 342         struct dst_entry *dst, *dst2;
 343         struct flowi6 fl2;
 344         int err;
 345
 346         err = ip6_dst_lookup(net, sk, &dst, fl6);
 347         if (err)
 348                 return ERR_PTR(err);
 349
 350         /*
 351          * We won't send icmp if the destination is known
 352          * anycast.
 353          */
 354         if (ipv6_anycast_destination(dst, &fl6->daddr)) {
 355                 net_dbg_ratelimited("icmp6_send: acast source\n");
 356                 dst_release(dst);
 357                 return ERR_PTR(-EINVAL);
 358         }
 359
 360         /* No need to clone since we're just using its address. */
 361         dst2 = dst;
 362
 363         dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
 364         if (!IS_ERR(dst)) {
 365                 if (dst != dst2)
 366                         return dst;
 367         } else {
 368                 if (PTR_ERR(dst) == -EPERM)
 369                         dst = NULL;
 370                 else
 371                         return dst;
 372         }
 373
 374         err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
 375         if (err)
 376                 goto relookup_failed;
 377
 378         err = ip6_dst_lookup(net, sk, &dst2, &fl2);
 379         if (err)
 380                 goto relookup_failed;
 381
 382         dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
 383         if (!IS_ERR(dst2)) {
 384                 dst_release(dst);
 385                 dst = dst2;
 386         } else {
 387                 err = PTR_ERR(dst2);
 388                 if (err == -EPERM) {
 389                         dst_release(dst);
 390                         return dst2;
 391                 } else
 392                         goto relookup_failed;
 393         }
 394
 395 relookup_failed:
 396         if (dst)
 397                 return dst;
 398         return ERR_PTR(err);
 399 }
 400
 401 static int icmp6_iif(const struct sk_buff *skb)
 402 {
 403         int iif = skb->dev->ifindex;
 404
 405         /* for local traffic to local address, skb dev is the loopback
 406          * device. Check if there is a dst attached to the skb and if so
 407          * get the real device index. Same is needed for replies to a link
 408          * local address on a device enslaved to an L3 master device
 409          */
 410         if (unlikely(iif == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
 411                 const struct rt6_info *rt6 = skb_rt6_info(skb);
 412
 413                 if (rt6)
 414                         iif = rt6->rt6i_idev->dev->ifindex;
 415         }
 416
 417         return iif;
 418 }
 419
 420 /*
 421  *      Send an ICMP message in response to a packet in error
 422  */
 423 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 424                        const struct in6_addr *force_saddr)
 425 {
 426         struct inet6_dev *idev = NULL;
 427         struct ipv6hdr *hdr = ipv6_hdr(skb);
 428         struct sock *sk;
 429         struct net *net;
 430         struct ipv6_pinfo *np;
 431         const struct in6_addr *saddr = NULL;
 432         struct dst_entry *dst;
 433         struct icmp6hdr tmp_hdr;
 434         struct flowi6 fl6;
 435         struct icmpv6_msg msg;
 436         struct ipcm6_cookie ipc6;
 437         int iif = 0;
 438         int addr_type = 0;
 439         int len;
 440         u32 mark;
 441
 442         if ((u8 *)hdr < skb->head ||
 443             (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
 444                 return;
 445
 446         if (!skb->dev)
 447                 return;
 448         net = dev_net(skb->dev);
 449         mark = IP6_REPLY_MARK(net, skb->mark);
 450         /*
 451          *      Make sure we respect the rules
 452          *      i.e. RFC 1885 2.4(e)
 453          *      Rule (e.1) is enforced by not using icmp6_send
 454          *      in any code that processes icmp errors.
 455          */
 456         addr_type = ipv6_addr_type(&hdr->daddr);
 457
 458         if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
 459             ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
 460                 saddr = &hdr->daddr;
 461
 462         /*
 463          *      Dest addr check
 464          */
 465
 466         if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
 467                 if (type != ICMPV6_PKT_TOOBIG &&
 468                     !(type == ICMPV6_PARAMPROB &&
 469                       code == ICMPV6_UNK_OPTION &&
 470                       (opt_unrec(skb, info))))
 471                         return;
 472
 473                 saddr = NULL;
 474         }
 475
 476         addr_type = ipv6_addr_type(&hdr->saddr);
 477
 478         /*
 479          *      Source addr check
 480          */
 481
 482         if (__ipv6_addr_needs_scope_id(addr_type)) {
 483                 iif = icmp6_iif(skb);
 484         } else {
 485                 dst = skb_dst(skb);
 486                 iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
 487         }
 488
 489         /*
 490          *      Must not send error if the source does not uniquely
 491          *      identify a single node (RFC2463 Section 2.4).
 492          *      We check unspecified / multicast addresses here,
 493          *      and anycast addresses will be checked later.
 494          */
 495         if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
 496                 net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
 497                                     &hdr->saddr, &hdr->daddr);
 498                 return;
 499         }
 500
 501         /*
 502          *      Never answer to a ICMP packet.
 503          */
 504         if (is_ineligible(skb)) {
 505                 net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
 506                                     &hdr->saddr, &hdr->daddr);
 507                 return;
 508         }
 509
 510         /* Needed by both icmp_global_allow and icmpv6_xmit_lock */
 511         local_bh_disable();
 512
 513         /* Check global sysctl_icmp_msgs_per_sec ratelimit */
 514         if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type))
 515                 goto out_bh_enable;
 516
 517         mip6_addr_swap(skb);
 518
 519         memset(&fl6, 0, sizeof(fl6));
 520         fl6.flowi6_proto = IPPROTO_ICMPV6;
 521         fl6.daddr = hdr->saddr;
 522         if (force_saddr)
 523                 saddr = force_saddr;
 524         if (saddr)
 525                 fl6.saddr = *saddr;
 526         fl6.flowi6_mark = mark;
 527         fl6.flowi6_oif = iif;
 528         fl6.fl6_icmp_type = type;
 529         fl6.fl6_icmp_code = code;
 530         fl6.flowi6_uid = sock_net_uid(net, NULL);
 531         fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
 532         security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 533
 534         sk = icmpv6_xmit_lock(net);
 535         if (!sk)
 536                 goto out_bh_enable;
 537
 538         sk->sk_mark = mark;
 539         np = inet6_sk(sk);
 540
 541         if (!icmpv6_xrlim_allow(sk, type, &fl6))
 542                 goto out;
 543
 544         tmp_hdr.icmp6_type = type;
 545         tmp_hdr.icmp6_code = code;
 546         tmp_hdr.icmp6_cksum = 0;
 547         tmp_hdr.icmp6_pointer = htonl(info);
 548
 549         if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
 550                 fl6.flowi6_oif = np->mcast_oif;
 551         else if (!fl6.flowi6_oif)
 552                 fl6.flowi6_oif = np->ucast_oif;
 553
 554         ipcm6_init_sk(&ipc6, np);
 555         fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
 556
 557         dst = icmpv6_route_lookup(net, skb, sk, &fl6);
 558         if (IS_ERR(dst))
 559                 goto out;
 560
 561         ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 562
 563         msg.skb = skb;
 564         msg.offset = skb_network_offset(skb);
 565         msg.type = type;
 566
 567         len = skb->len - msg.offset;
 568         len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
 569         if (len < 0) {
 570                 net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
 571                                     &hdr->saddr, &hdr->daddr);
 572                 goto out_dst_release;
 573         }
 574
 575         rcu_read_lock();
 576         idev = __in6_dev_get(skb->dev);
 577
 578         if (ip6_append_data(sk, icmpv6_getfrag, &msg,
 579                             len + sizeof(struct icmp6hdr),
 580                             sizeof(struct icmp6hdr),
 581                             &ipc6, &fl6, (struct rt6_info *)dst,
 582                             MSG_DONTWAIT)) {
 583                 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
 584                 ip6_flush_pending_frames(sk);
 585         } else {
 586                 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
 587                                            len + sizeof(struct icmp6hdr));
 588         }
 589         rcu_read_unlock();
 590 out_dst_release:
 591         dst_release(dst);
 592 out:
 593         icmpv6_xmit_unlock(sk);
 594 out_bh_enable:
 595         local_bh_enable();
 596 }
 597
 598 /* Slightly more convenient version of icmp6_send.
 599  */
 600 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
 601 {
 602         icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
 603         kfree_skb(skb);
 604 }
 605
 606 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
 607  * if sufficient data bytes are available
 608  * @nhs is the size of the tunnel header(s) :
 609  *  Either an IPv4 header for SIT encap
 610  *         an IPv4 header + GRE header for GRE encap
 611  */
 612 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
 613                                unsigned int data_len)
 614 {
 615         struct in6_addr temp_saddr;
 616         struct rt6_info *rt;
 617         struct sk_buff *skb2;
 618         u32 info = 0;
 619
 620         if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
 621                 return 1;
 622
 623         /* RFC 4884 (partial) support for ICMP extensions */
 624         if (data_len < 128 || (data_len & 7) || skb->len < data_len)
 625                 data_len = 0;
 626
 627         skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
 628
 629         if (!skb2)
 630                 return 1;
 631
 632         skb_dst_drop(skb2);
 633         skb_pull(skb2, nhs);
 634         skb_reset_network_header(skb2);
 635
 636         rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
 637                         skb, 0);
 638
 639         if (rt && rt->dst.dev)
 640                 skb2->dev = rt->dst.dev;
 641
 642         ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
 643
 644         if (data_len) {
 645                 /* RFC 4884 (partial) support :
 646                  * insert 0 padding at the end, before the extensions
 647                  */
 648                 __skb_push(skb2, nhs);
 649                 skb_reset_network_header(skb2);
 650                 memmove(skb2->data, skb2->data + nhs, data_len - nhs);
 651                 memset(skb2->data + data_len - nhs, 0, nhs);
 652                 /* RFC 4884 4.5 : Length is measured in 64-bit words,
 653                  * and stored in reserved[0]
 654                  */
 655                 info = (data_len/8) << 24;
 656         }
 657         if (type == ICMP_TIME_EXCEEDED)
 658                 icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
 659                            info, &temp_saddr);
 660         else
 661                 icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
 662                            info, &temp_saddr);
 663         if (rt)
 664                 ip6_rt_put(rt);
 665
 666         kfree_skb(skb2);
 667
 668         return 0;
 669 }
 670 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
 671
 672 static void icmpv6_echo_reply(struct sk_buff *skb)
 673 {
 674         struct net *net = dev_net(skb->dev);
 675         struct sock *sk;
 676         struct inet6_dev *idev;
 677         struct ipv6_pinfo *np;
 678         const struct in6_addr *saddr = NULL;
 679         struct icmp6hdr *icmph = icmp6_hdr(skb);
 680         struct icmp6hdr tmp_hdr;
 681         struct flowi6 fl6;
 682         struct icmpv6_msg msg;
 683         struct dst_entry *dst;
 684         struct ipcm6_cookie ipc6;
 685         u32 mark = IP6_REPLY_MARK(net, skb->mark);
 686
 687         saddr = &ipv6_hdr(skb)->daddr;
 688
 689         if (!ipv6_unicast_destination(skb) &&
 690             !(net->ipv6.sysctl.anycast_src_echo_reply &&
 691               ipv6_anycast_destination(skb_dst(skb), saddr)))
 692                 saddr = NULL;
 693
 694         memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
 695         tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
 696
 697         memset(&fl6, 0, sizeof(fl6));
 698         fl6.flowi6_proto = IPPROTO_ICMPV6;
 699         fl6.daddr = ipv6_hdr(skb)->saddr;
 700         if (saddr)
 701                 fl6.saddr = *saddr;
 702         fl6.flowi6_oif = icmp6_iif(skb);
 703         fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
 704         fl6.flowi6_mark = mark;
 705         fl6.flowi6_uid = sock_net_uid(net, NULL);
 706         security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 707
 708         local_bh_disable();
 709         sk = icmpv6_xmit_lock(net);
 710         if (!sk)
 711                 goto out_bh_enable;
 712         sk->sk_mark = mark;
 713         np = inet6_sk(sk);
 714
 715         if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
 716                 fl6.flowi6_oif = np->mcast_oif;
 717         else if (!fl6.flowi6_oif)
 718                 fl6.flowi6_oif = np->ucast_oif;
 719
 720         if (ip6_dst_lookup(net, sk, &dst, &fl6))
 721                 goto out;
 722         dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
 723         if (IS_ERR(dst))
 724                 goto out;
 725
 726         idev = __in6_dev_get(skb->dev);
 727
 728         msg.skb = skb;
 729         msg.offset = 0;
 730         msg.type = ICMPV6_ECHO_REPLY;
 731
 732         ipcm6_init_sk(&ipc6, np);
 733         ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 734         ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
 735
 736         if (ip6_append_data(sk, icmpv6_getfrag, &msg,
 737                             skb->len + sizeof(struct icmp6hdr),
 738                             sizeof(struct icmp6hdr), &ipc6, &fl6,
 739                             (struct rt6_info *)dst, MSG_DONTWAIT)) {
 740                 __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
 741                 ip6_flush_pending_frames(sk);
 742         } else {
 743                 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
 744                                            skb->len + sizeof(struct icmp6hdr));
 745         }
 746         dst_release(dst);
 747 out:
 748         icmpv6_xmit_unlock(sk);
 749 out_bh_enable:
 750         local_bh_enable();
 751 }
 752
 753 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
 754 {
 755         const struct inet6_protocol *ipprot;
 756         int inner_offset;
 757         __be16 frag_off;
 758         u8 nexthdr;
 759         struct net *net = dev_net(skb->dev);
 760
 761         if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 762                 goto out;
 763
 764         nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
 765         if (ipv6_ext_hdr(nexthdr)) {
 766                 /* now skip over extension headers */
 767                 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
 768                                                 &nexthdr, &frag_off);
 769                 if (inner_offset < 0)
 770                         goto out;
 771         } else {
 772                 inner_offset = sizeof(struct ipv6hdr);
 773         }
 774
 775         /* Checkin header including 8 bytes of inner protocol header. */
 776         if (!pskb_may_pull(skb, inner_offset+8))
 777                 goto out;
 778
 779         /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
 780            Without this we will not able f.e. to make source routed
 781            pmtu discovery.
 782            Corresponding argument (opt) to notifiers is already added.
 783            --ANK (980726)
 784          */
 785
 786         ipprot = rcu_dereference(inet6_protos[nexthdr]);
 787         if (ipprot && ipprot->err_handler)
 788                 ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
 789
 790         raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
 791         return;
 792
 793 out:
 794         __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
 795 }
 796
 797 /*
 798  *      Handle icmp messages
 799  */
 800
 801 static int icmpv6_rcv(struct sk_buff *skb)
 802 {
 803         struct net *net = dev_net(skb->dev);
 804         struct net_device *dev = skb->dev;
 805         struct inet6_dev *idev = __in6_dev_get(dev);
 806         const struct in6_addr *saddr, *daddr;
 807         struct icmp6hdr *hdr;
 808         u8 type;
 809         bool success = false;
 810
 811         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 812                 struct sec_path *sp = skb_sec_path(skb);
 813                 int nh;
 814
 815                 if (!(sp && sp->xvec[sp->len - 1]->props.flags &
 816                                  XFRM_STATE_ICMP))
 817                         goto drop_no_count;
 818
 819                 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
 820                         goto drop_no_count;
 821
 822                 nh = skb_network_offset(skb);
 823                 skb_set_network_header(skb, sizeof(*hdr));
 824
 825                 if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
 826                         goto drop_no_count;
 827
 828                 skb_set_network_header(skb, nh);
 829         }
 830
 831         __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
 832
 833         saddr = &ipv6_hdr(skb)->saddr;
 834         daddr = &ipv6_hdr(skb)->daddr;
 835
 836         if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
 837                 net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
 838                                     saddr, daddr);
 839                 goto csum_error;
 840         }
 841
 842         if (!pskb_pull(skb, sizeof(*hdr)))
 843                 goto discard_it;
 844
 845         hdr = icmp6_hdr(skb);
 846
 847         type = hdr->icmp6_type;
 848
 849         ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
 850
 851         switch (type) {
 852         case ICMPV6_ECHO_REQUEST:
 853                 if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
 854                         icmpv6_echo_reply(skb);
 855                 break;
 856
 857         case ICMPV6_ECHO_REPLY:
 858                 success = ping_rcv(skb);
 859                 break;
 860
 861         case ICMPV6_PKT_TOOBIG:
 862                 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
 863                    standard destination cache. Seems, only "advanced"
 864                    destination cache will allow to solve this problem
 865                    --ANK (980726)
 866                  */
 867                 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 868                         goto discard_it;
 869                 hdr = icmp6_hdr(skb);
 870
 871                 /* to notify */
 872                 /* fall through */
 873         case ICMPV6_DEST_UNREACH:
 874         case ICMPV6_TIME_EXCEED:
 875         case ICMPV6_PARAMPROB:
 876                 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
 877                 break;
 878
 879         case NDISC_ROUTER_SOLICITATION:
 880         case NDISC_ROUTER_ADVERTISEMENT:
 881         case NDISC_NEIGHBOUR_SOLICITATION:
 882         case NDISC_NEIGHBOUR_ADVERTISEMENT:
 883         case NDISC_REDIRECT:
 884                 ndisc_rcv(skb);
 885                 break;
 886
 887         case ICMPV6_MGM_QUERY:
 888                 igmp6_event_query(skb);
 889                 break;
 890
 891         case ICMPV6_MGM_REPORT:
 892                 igmp6_event_report(skb);
 893                 break;
 894
 895         case ICMPV6_MGM_REDUCTION:
 896         case ICMPV6_NI_QUERY:
 897         case ICMPV6_NI_REPLY:
 898         case ICMPV6_MLD2_REPORT:
 899         case ICMPV6_DHAAD_REQUEST:
 900         case ICMPV6_DHAAD_REPLY:
 901         case ICMPV6_MOBILE_PREFIX_SOL:
 902         case ICMPV6_MOBILE_PREFIX_ADV:
 903                 break;
 904
 905         default:
 906                 /* informational */
 907                 if (type & ICMPV6_INFOMSG_MASK)
 908                         break;
 909
 910                 net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
 911                                     saddr, daddr);
 912
 913                 /*
 914                  * error of unknown type.
 915                  * must pass to upper level
 916                  */
 917
 918                 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
 919         }
 920
 921         /* until the v6 path can be better sorted assume failure and
 922          * preserve the status quo behaviour for the rest of the paths to here
 923          */
 924         if (success)
 925                 consume_skb(skb);
 926         else
 927                 kfree_skb(skb);
 928
 929         return 0;
 930
 931 csum_error:
 932         __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
 933 discard_it:
 934         __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
 935 drop_no_count:
 936         kfree_skb(skb);
 937         return 0;
 938 }
 939
 940 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
 941                       u8 type,
 942                       const struct in6_addr *saddr,
 943                       const struct in6_addr *daddr,
 944                       int oif)
 945 {
 946         memset(fl6, 0, sizeof(*fl6));
 947         fl6->saddr = *saddr;
 948         fl6->daddr = *daddr;
 949         fl6->flowi6_proto       = IPPROTO_ICMPV6;
 950         fl6->fl6_icmp_type      = type;
 951         fl6->fl6_icmp_code      = 0;
 952         fl6->flowi6_oif         = oif;
 953         security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
 954 }
 955
 956 static void __net_exit icmpv6_sk_exit(struct net *net)
 957 {
 958         int i;
 959
 960         for_each_possible_cpu(i)
 961                 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
 962         free_percpu(net->ipv6.icmp_sk);
 963 }
 964
 965 static int __net_init icmpv6_sk_init(struct net *net)
 966 {
 967         struct sock *sk;
 968         int err, i;
 969
 970         net->ipv6.icmp_sk = alloc_percpu(struct sock *);
 971         if (!net->ipv6.icmp_sk)
 972                 return -ENOMEM;
 973
 974         for_each_possible_cpu(i) {
 975                 err = inet_ctl_sock_create(&sk, PF_INET6,
 976                                            SOCK_RAW, IPPROTO_ICMPV6, net);
 977                 if (err < 0) {
 978                         pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
 979                                err);
 980                         goto fail;
 981                 }
 982
 983                 *per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
 984
 985                 /* Enough space for 2 64K ICMP packets, including
 986                  * sk_buff struct overhead.
 987                  */
 988                 sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
 989         }
 990         return 0;
 991
 992  fail:
 993         icmpv6_sk_exit(net);
 994         return err;
 995 }
 996
 997 static struct pernet_operations icmpv6_sk_ops = {
 998         .init = icmpv6_sk_init,
 999         .exit = icmpv6_sk_exit,
1000 };
1001
1002 int __init icmpv6_init(void)
1003 {
1004         int err;
1005
1006         err = register_pernet_subsys(&icmpv6_sk_ops);
1007         if (err < 0)
1008                 return err;
1009
1010         err = -EAGAIN;
1011         if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1012                 goto fail;
1013
1014         err = inet6_register_icmp_sender(icmp6_send);
1015         if (err)
1016                 goto sender_reg_err;
1017         return 0;
1018
1019 sender_reg_err:
1020         inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1021 fail:
1022         pr_err("Failed to register ICMP6 protocol\n");
1023         unregister_pernet_subsys(&icmpv6_sk_ops);
1024         return err;
1025 }
1026
1027 void icmpv6_cleanup(void)
1028 {
1029         inet6_unregister_icmp_sender(icmp6_send);
1030         unregister_pernet_subsys(&icmpv6_sk_ops);
1031         inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1032 }
1033
1034
1035 static const struct icmp6_err {
1036         int err;
1037         int fatal;
1038 } tab_unreach[] = {
1039         {       /* NOROUTE */
1040                 .err    = ENETUNREACH,
1041                 .fatal  = 0,
1042         },
1043         {       /* ADM_PROHIBITED */
1044                 .err    = EACCES,
1045                 .fatal  = 1,
1046         },
1047         {       /* Was NOT_NEIGHBOUR, now reserved */
1048                 .err    = EHOSTUNREACH,
1049                 .fatal  = 0,
1050         },
1051         {       /* ADDR_UNREACH */
1052                 .err    = EHOSTUNREACH,
1053                 .fatal  = 0,
1054         },
1055         {       /* PORT_UNREACH */
1056                 .err    = ECONNREFUSED,
1057                 .fatal  = 1,
1058         },
1059         {       /* POLICY_FAIL */
1060                 .err    = EACCES,
1061                 .fatal  = 1,
1062         },
1063         {       /* REJECT_ROUTE */
1064                 .err    = EACCES,
1065                 .fatal  = 1,
1066         },
1067 };
1068
1069 int icmpv6_err_convert(u8 type, u8 code, int *err)
1070 {
1071         int fatal = 0;
1072
1073         *err = EPROTO;
1074
1075         switch (type) {
1076         case ICMPV6_DEST_UNREACH:
1077                 fatal = 1;
1078                 if (code < ARRAY_SIZE(tab_unreach)) {
1079                         *err  = tab_unreach[code].err;
1080                         fatal = tab_unreach[code].fatal;
1081                 }
1082                 break;
1083
1084         case ICMPV6_PKT_TOOBIG:
1085                 *err = EMSGSIZE;
1086                 break;
1087
1088         case ICMPV6_PARAMPROB:
1089                 *err = EPROTO;
1090                 fatal = 1;
1091                 break;
1092
1093         case ICMPV6_TIME_EXCEED:
1094                 *err = EHOSTUNREACH;
1095                 break;
1096         }
1097
1098         return fatal;
1099 }
1100 EXPORT_SYMBOL(icmpv6_err_convert);
1101
1102 #ifdef CONFIG_SYSCTL
1103 static struct ctl_table ipv6_icmp_table_template[] = {
1104         {
1105                 .procname       = "ratelimit",
1106                 .data           = &init_net.ipv6.sysctl.icmpv6_time,
1107                 .maxlen         = sizeof(int),
1108                 .mode           = 0644,
1109                 .proc_handler   = proc_dointvec_ms_jiffies,
1110         },
1111         {
1112                 .procname       = "echo_ignore_all",
1113                 .data           = &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1114                 .maxlen         = sizeof(int),
1115                 .mode           = 0644,
1116                 .proc_handler = proc_dointvec,
1117         },
1118         { },
1119 };
1120
1121 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1122 {
1123         struct ctl_table *table;
1124
1125         table = kmemdup(ipv6_icmp_table_template,
1126                         sizeof(ipv6_icmp_table_template),
1127                         GFP_KERNEL);
1128
1129         if (table) {
1130                 table[0].data = &net->ipv6.sysctl.icmpv6_time;
1131                 table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1132         }
1133         return table;
1134 }
1135 #endif