net/ipv6/icmp.c

   1 /*
   2  *      Internet Control Message Protocol (ICMPv6)
   3  *      Linux INET6 implementation
   4  *
   5  *      Authors:
   6  *      Pedro Roque             <roque@di.fc.ul.pt>
   7  *
   8  *      Based on net/ipv4/icmp.c
   9  *
  10  *      RFC 1885
  11  *
  12  *      This program is free software; you can redistribute it and/or
  13  *      modify it under the terms of the GNU General Public License
  14  *      as published by the Free Software Foundation; either version
  15  *      2 of the License, or (at your option) any later version.
  16  */
  17
  18 /*
  19  *      Changes:
  20  *
  21  *      Andi Kleen              :       exception handling
  22  *      Andi Kleen                      add rate limits. never reply to a icmp.
  23  *                                      add more length checks and other fixes.
  24  *      yoshfuji                :       ensure to sent parameter problem for
  25  *                                      fragments.
  26  *      YOSHIFUJI Hideaki @USAGI:       added sysctl for icmp rate limit.
  27  *      Randy Dunlap and
  28  *      YOSHIFUJI Hideaki @USAGI:       Per-interface statistics support
  29  *      Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
  30  */
  31
  32 #define pr_fmt(fmt) "IPv6: " fmt
  33
  34 #include <linux/module.h>
  35 #include <linux/errno.h>
  36 #include <linux/types.h>
  37 #include <linux/socket.h>
  38 #include <linux/in.h>
  39 #include <linux/kernel.h>
  40 #include <linux/sockios.h>
  41 #include <linux/net.h>
  42 #include <linux/skbuff.h>
  43 #include <linux/init.h>
  44 #include <linux/netfilter.h>
  45 #include <linux/slab.h>
  46
  47 #ifdef CONFIG_SYSCTL
  48 #include <linux/sysctl.h>
  49 #endif
  50
  51 #include <linux/inet.h>
  52 #include <linux/netdevice.h>
  53 #include <linux/icmpv6.h>
  54
  55 #include <net/ip.h>
  56 #include <net/sock.h>
  57
  58 #include <net/ipv6.h>
  59 #include <net/ip6_checksum.h>
  60 #include <net/ping.h>
  61 #include <net/protocol.h>
  62 #include <net/raw.h>
  63 #include <net/rawv6.h>
  64 #include <net/transp_v6.h>
  65 #include <net/ip6_route.h>
  66 #include <net/addrconf.h>
  67 #include <net/icmp.h>
  68 #include <net/xfrm.h>
  69 #include <net/inet_common.h>
  70 #include <net/dsfield.h>
  71 #include <net/l3mdev.h>
  72
  73 #include <linux/uaccess.h>
  74
  75 /*
  76  *      The ICMP socket(s). This is the most convenient way to flow control
  77  *      our ICMP output as well as maintain a clean interface throughout
  78  *      all layers. All Socketless IP sends will soon be gone.
  79  *
  80  *      On SMP we have one ICMP socket per-cpu.
  81  */
  82 static inline struct sock *icmpv6_sk(struct net *net)
  83 {
  84         return net->ipv6.icmp_sk[smp_processor_id()];
  85 }
  86
  87 static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
  88                        u8 type, u8 code, int offset, __be32 info)
  89 {
  90         /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
  91         struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
  92         struct net *net = dev_net(skb->dev);
  93
  94         if (type == ICMPV6_PKT_TOOBIG)
  95                 ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
  96         else if (type == NDISC_REDIRECT)
  97                 ip6_redirect(skb, net, skb->dev->ifindex, 0,
  98                              sock_net_uid(net, NULL));
  99
 100         if (!(type & ICMPV6_INFOMSG_MASK))
 101                 if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
 102                         ping_err(skb, offset, ntohl(info));
 103 }
 104
 105 static int icmpv6_rcv(struct sk_buff *skb);
 106
 107 static const struct inet6_protocol icmpv6_protocol = {
 108         .handler        =       icmpv6_rcv,
 109         .err_handler    =       icmpv6_err,
 110         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
 111 };
 112
 113 /* Called with BH disabled */
 114 static __inline__ struct sock *icmpv6_xmit_lock(struct net *net)
 115 {
 116         struct sock *sk;
 117
 118         sk = icmpv6_sk(net);
 119         if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
 120                 /* This can happen if the output path (f.e. SIT or
 121                  * ip6ip6 tunnel) signals dst_link_failure() for an
 122                  * outgoing ICMP6 packet.
 123                  */
 124                 return NULL;
 125         }
 126         return sk;
 127 }
 128
 129 static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
 130 {
 131         spin_unlock(&sk->sk_lock.slock);
 132 }
 133
 134 /*
 135  * Figure out, may we reply to this packet with icmp error.
 136  *
 137  * We do not reply, if:
 138  *      - it was icmp error message.
 139  *      - it is truncated, so that it is known, that protocol is ICMPV6
 140  *        (i.e. in the middle of some exthdr)
 141  *
 142  *      --ANK (980726)
 143  */
 144
 145 static bool is_ineligible(const struct sk_buff *skb)
 146 {
 147         int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
 148         int len = skb->len - ptr;
 149         __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
 150         __be16 frag_off;
 151
 152         if (len < 0)
 153                 return true;
 154
 155         ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
 156         if (ptr < 0)
 157                 return false;
 158         if (nexthdr == IPPROTO_ICMPV6) {
 159                 u8 _type, *tp;
 160                 tp = skb_header_pointer(skb,
 161                         ptr+offsetof(struct icmp6hdr, icmp6_type),
 162                         sizeof(_type), &_type);
 163                 if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
 164                         return true;
 165         }
 166         return false;
 167 }
 168
 169 static bool icmpv6_mask_allow(int type)
 170 {
 171         /* Informational messages are not limited. */
 172         if (type & ICMPV6_INFOMSG_MASK)
 173                 return true;
 174
 175         /* Do not limit pmtu discovery, it would break it. */
 176         if (type == ICMPV6_PKT_TOOBIG)
 177                 return true;
 178
 179         return false;
 180 }
 181
 182 static bool icmpv6_global_allow(int type)
 183 {
 184         if (icmpv6_mask_allow(type))
 185                 return true;
 186
 187         if (icmp_global_allow())
 188                 return true;
 189
 190         return false;
 191 }
 192
 193 /*
 194  * Check the ICMP output rate limit
 195  */
 196 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
 197                                struct flowi6 *fl6)
 198 {
 199         struct net *net = sock_net(sk);
 200         struct dst_entry *dst;
 201         bool res = false;
 202
 203         if (icmpv6_mask_allow(type))
 204                 return true;
 205
 206         /*
 207          * Look up the output route.
 208          * XXX: perhaps the expire for routing entries cloned by
 209          * this lookup should be more aggressive (not longer than timeout).
 210          */
 211         dst = ip6_route_output(net, sk, fl6);
 212         if (dst->error) {
 213                 IP6_INC_STATS(net, ip6_dst_idev(dst),
 214                               IPSTATS_MIB_OUTNOROUTES);
 215         } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
 216                 res = true;
 217         } else {
 218                 struct rt6_info *rt = (struct rt6_info *)dst;
 219                 int tmo = net->ipv6.sysctl.icmpv6_time;
 220                 struct inet_peer *peer;
 221
 222                 /* Give more bandwidth to wider prefixes. */
 223                 if (rt->rt6i_dst.plen < 128)
 224                         tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
 225
 226                 peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
 227                 res = inet_peer_xrlim_allow(peer, tmo);
 228                 if (peer)
 229                         inet_putpeer(peer);
 230         }
 231         dst_release(dst);
 232         return res;
 233 }
 234
 235 /*
 236  *      an inline helper for the "simple" if statement below
 237  *      checks if parameter problem report is caused by an
 238  *      unrecognized IPv6 option that has the Option Type
 239  *      highest-order two bits set to 10
 240  */
 241
 242 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
 243 {
 244         u8 _optval, *op;
 245
 246         offset += skb_network_offset(skb);
 247         op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
 248         if (!op)
 249                 return true;
 250         return (*op & 0xC0) == 0x80;
 251 }
 252
 253 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
 254                                 struct icmp6hdr *thdr, int len)
 255 {
 256         struct sk_buff *skb;
 257         struct icmp6hdr *icmp6h;
 258
 259         skb = skb_peek(&sk->sk_write_queue);
 260         if (!skb)
 261                 return;
 262
 263         icmp6h = icmp6_hdr(skb);
 264         memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
 265         icmp6h->icmp6_cksum = 0;
 266
 267         if (skb_queue_len(&sk->sk_write_queue) == 1) {
 268                 skb->csum = csum_partial(icmp6h,
 269                                         sizeof(struct icmp6hdr), skb->csum);
 270                 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
 271                                                       &fl6->daddr,
 272                                                       len, fl6->flowi6_proto,
 273                                                       skb->csum);
 274         } else {
 275                 __wsum tmp_csum = 0;
 276
 277                 skb_queue_walk(&sk->sk_write_queue, skb) {
 278                         tmp_csum = csum_add(tmp_csum, skb->csum);
 279                 }
 280
 281                 tmp_csum = csum_partial(icmp6h,
 282                                         sizeof(struct icmp6hdr), tmp_csum);
 283                 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
 284                                                       &fl6->daddr,
 285                                                       len, fl6->flowi6_proto,
 286                                                       tmp_csum);
 287         }
 288         ip6_push_pending_frames(sk);
 289 }
 290
 291 struct icmpv6_msg {
 292         struct sk_buff  *skb;
 293         int             offset;
 294         uint8_t         type;
 295 };
 296
 297 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
 298 {
 299         struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
 300         struct sk_buff *org_skb = msg->skb;
 301         __wsum csum = 0;
 302
 303         csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
 304                                       to, len, csum);
 305         skb->csum = csum_block_add(skb->csum, csum, odd);
 306         if (!(msg->type & ICMPV6_INFOMSG_MASK))
 307                 nf_ct_attach(skb, org_skb);
 308         return 0;
 309 }
 310
 311 #if IS_ENABLED(CONFIG_IPV6_MIP6)
 312 static void mip6_addr_swap(struct sk_buff *skb)
 313 {
 314         struct ipv6hdr *iph = ipv6_hdr(skb);
 315         struct inet6_skb_parm *opt = IP6CB(skb);
 316         struct ipv6_destopt_hao *hao;
 317         struct in6_addr tmp;
 318         int off;
 319
 320         if (opt->dsthao) {
 321                 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
 322                 if (likely(off >= 0)) {
 323                         hao = (struct ipv6_destopt_hao *)
 324                                         (skb_network_header(skb) + off);
 325                         tmp = iph->saddr;
 326                         iph->saddr = hao->addr;
 327                         hao->addr = tmp;
 328                 }
 329         }
 330 }
 331 #else
 332 static inline void mip6_addr_swap(struct sk_buff *skb) {}
 333 #endif
 334
 335 static struct dst_entry *icmpv6_route_lookup(struct net *net,
 336                                              struct sk_buff *skb,
 337                                              struct sock *sk,
 338                                              struct flowi6 *fl6)
 339 {
 340         struct dst_entry *dst, *dst2;
 341         struct flowi6 fl2;
 342         int err;
 343
 344         err = ip6_dst_lookup(net, sk, &dst, fl6);
 345         if (err)
 346                 return ERR_PTR(err);
 347
 348         /*
 349          * We won't send icmp if the destination is known
 350          * anycast.
 351          */
 352         if (ipv6_anycast_destination(dst, &fl6->daddr)) {
 353                 net_dbg_ratelimited("icmp6_send: acast source\n");
 354                 dst_release(dst);
 355                 return ERR_PTR(-EINVAL);
 356         }
 357
 358         /* No need to clone since we're just using its address. */
 359         dst2 = dst;
 360
 361         dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
 362         if (!IS_ERR(dst)) {
 363                 if (dst != dst2)
 364                         return dst;
 365         } else {
 366                 if (PTR_ERR(dst) == -EPERM)
 367                         dst = NULL;
 368                 else
 369                         return dst;
 370         }
 371
 372         err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
 373         if (err)
 374                 goto relookup_failed;
 375
 376         err = ip6_dst_lookup(net, sk, &dst2, &fl2);
 377         if (err)
 378                 goto relookup_failed;
 379
 380         dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
 381         if (!IS_ERR(dst2)) {
 382                 dst_release(dst);
 383                 dst = dst2;
 384         } else {
 385                 err = PTR_ERR(dst2);
 386                 if (err == -EPERM) {
 387                         dst_release(dst);
 388                         return dst2;
 389                 } else
 390                         goto relookup_failed;
 391         }
 392
 393 relookup_failed:
 394         if (dst)
 395                 return dst;
 396         return ERR_PTR(err);
 397 }
 398
 399 static int icmp6_iif(const struct sk_buff *skb)
 400 {
 401         int iif = skb->dev->ifindex;
 402
 403         /* for local traffic to local address, skb dev is the loopback
 404          * device. Check if there is a dst attached to the skb and if so
 405          * get the real device index. Same is needed for replies to a link
 406          * local address on a device enslaved to an L3 master device
 407          */
 408         if (unlikely(iif == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
 409                 const struct rt6_info *rt6 = skb_rt6_info(skb);
 410
 411                 if (rt6)
 412                         iif = rt6->rt6i_idev->dev->ifindex;
 413         }
 414
 415         return iif;
 416 }
 417
 418 /*
 419  *      Send an ICMP message in response to a packet in error
 420  */
 421 static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 422                        const struct in6_addr *force_saddr)
 423 {
 424         struct net *net = dev_net(skb->dev);
 425         struct inet6_dev *idev = NULL;
 426         struct ipv6hdr *hdr = ipv6_hdr(skb);
 427         struct sock *sk;
 428         struct ipv6_pinfo *np;
 429         const struct in6_addr *saddr = NULL;
 430         struct dst_entry *dst;
 431         struct icmp6hdr tmp_hdr;
 432         struct flowi6 fl6;
 433         struct icmpv6_msg msg;
 434         struct ipcm6_cookie ipc6;
 435         int iif = 0;
 436         int addr_type = 0;
 437         int len;
 438         u32 mark = IP6_REPLY_MARK(net, skb->mark);
 439
 440         if ((u8 *)hdr < skb->head ||
 441             (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
 442                 return;
 443
 444         /*
 445          *      Make sure we respect the rules
 446          *      i.e. RFC 1885 2.4(e)
 447          *      Rule (e.1) is enforced by not using icmp6_send
 448          *      in any code that processes icmp errors.
 449          */
 450         addr_type = ipv6_addr_type(&hdr->daddr);
 451
 452         if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
 453             ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
 454                 saddr = &hdr->daddr;
 455
 456         /*
 457          *      Dest addr check
 458          */
 459
 460         if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
 461                 if (type != ICMPV6_PKT_TOOBIG &&
 462                     !(type == ICMPV6_PARAMPROB &&
 463                       code == ICMPV6_UNK_OPTION &&
 464                       (opt_unrec(skb, info))))
 465                         return;
 466
 467                 saddr = NULL;
 468         }
 469
 470         addr_type = ipv6_addr_type(&hdr->saddr);
 471
 472         /*
 473          *      Source addr check
 474          */
 475
 476         if (__ipv6_addr_needs_scope_id(addr_type)) {
 477                 iif = icmp6_iif(skb);
 478         } else {
 479                 dst = skb_dst(skb);
 480                 iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
 481         }
 482
 483         /*
 484          *      Must not send error if the source does not uniquely
 485          *      identify a single node (RFC2463 Section 2.4).
 486          *      We check unspecified / multicast addresses here,
 487          *      and anycast addresses will be checked later.
 488          */
 489         if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
 490                 net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
 491                                     &hdr->saddr, &hdr->daddr);
 492                 return;
 493         }
 494
 495         /*
 496          *      Never answer to a ICMP packet.
 497          */
 498         if (is_ineligible(skb)) {
 499                 net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
 500                                     &hdr->saddr, &hdr->daddr);
 501                 return;
 502         }
 503
 504         /* Needed by both icmp_global_allow and icmpv6_xmit_lock */
 505         local_bh_disable();
 506
 507         /* Check global sysctl_icmp_msgs_per_sec ratelimit */
 508         if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type))
 509                 goto out_bh_enable;
 510
 511         mip6_addr_swap(skb);
 512
 513         memset(&fl6, 0, sizeof(fl6));
 514         fl6.flowi6_proto = IPPROTO_ICMPV6;
 515         fl6.daddr = hdr->saddr;
 516         if (force_saddr)
 517                 saddr = force_saddr;
 518         if (saddr)
 519                 fl6.saddr = *saddr;
 520         fl6.flowi6_mark = mark;
 521         fl6.flowi6_oif = iif;
 522         fl6.fl6_icmp_type = type;
 523         fl6.fl6_icmp_code = code;
 524         fl6.flowi6_uid = sock_net_uid(net, NULL);
 525         fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
 526         security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 527
 528         sk = icmpv6_xmit_lock(net);
 529         if (!sk)
 530                 goto out_bh_enable;
 531
 532         sk->sk_mark = mark;
 533         np = inet6_sk(sk);
 534
 535         if (!icmpv6_xrlim_allow(sk, type, &fl6))
 536                 goto out;
 537
 538         tmp_hdr.icmp6_type = type;
 539         tmp_hdr.icmp6_code = code;
 540         tmp_hdr.icmp6_cksum = 0;
 541         tmp_hdr.icmp6_pointer = htonl(info);
 542
 543         if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
 544                 fl6.flowi6_oif = np->mcast_oif;
 545         else if (!fl6.flowi6_oif)
 546                 fl6.flowi6_oif = np->ucast_oif;
 547
 548         ipcm6_init_sk(&ipc6, np);
 549         fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
 550
 551         dst = icmpv6_route_lookup(net, skb, sk, &fl6);
 552         if (IS_ERR(dst))
 553                 goto out;
 554
 555         ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 556
 557         msg.skb = skb;
 558         msg.offset = skb_network_offset(skb);
 559         msg.type = type;
 560
 561         len = skb->len - msg.offset;
 562         len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
 563         if (len < 0) {
 564                 net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
 565                                     &hdr->saddr, &hdr->daddr);
 566                 goto out_dst_release;
 567         }
 568
 569         rcu_read_lock();
 570         idev = __in6_dev_get(skb->dev);
 571
 572         if (ip6_append_data(sk, icmpv6_getfrag, &msg,
 573                             len + sizeof(struct icmp6hdr),
 574                             sizeof(struct icmp6hdr),
 575                             &ipc6, &fl6, (struct rt6_info *)dst,
 576                             MSG_DONTWAIT)) {
 577                 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
 578                 ip6_flush_pending_frames(sk);
 579         } else {
 580                 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
 581                                            len + sizeof(struct icmp6hdr));
 582         }
 583         rcu_read_unlock();
 584 out_dst_release:
 585         dst_release(dst);
 586 out:
 587         icmpv6_xmit_unlock(sk);
 588 out_bh_enable:
 589         local_bh_enable();
 590 }
 591
 592 /* Slightly more convenient version of icmp6_send.
 593  */
 594 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
 595 {
 596         icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
 597         kfree_skb(skb);
 598 }
 599
 600 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
 601  * if sufficient data bytes are available
 602  * @nhs is the size of the tunnel header(s) :
 603  *  Either an IPv4 header for SIT encap
 604  *         an IPv4 header + GRE header for GRE encap
 605  */
 606 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
 607                                unsigned int data_len)
 608 {
 609         struct in6_addr temp_saddr;
 610         struct rt6_info *rt;
 611         struct sk_buff *skb2;
 612         u32 info = 0;
 613
 614         if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
 615                 return 1;
 616
 617         /* RFC 4884 (partial) support for ICMP extensions */
 618         if (data_len < 128 || (data_len & 7) || skb->len < data_len)
 619                 data_len = 0;
 620
 621         skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
 622
 623         if (!skb2)
 624                 return 1;
 625
 626         skb_dst_drop(skb2);
 627         skb_pull(skb2, nhs);
 628         skb_reset_network_header(skb2);
 629
 630         rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
 631                         skb, 0);
 632
 633         if (rt && rt->dst.dev)
 634                 skb2->dev = rt->dst.dev;
 635
 636         ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
 637
 638         if (data_len) {
 639                 /* RFC 4884 (partial) support :
 640                  * insert 0 padding at the end, before the extensions
 641                  */
 642                 __skb_push(skb2, nhs);
 643                 skb_reset_network_header(skb2);
 644                 memmove(skb2->data, skb2->data + nhs, data_len - nhs);
 645                 memset(skb2->data + data_len - nhs, 0, nhs);
 646                 /* RFC 4884 4.5 : Length is measured in 64-bit words,
 647                  * and stored in reserved[0]
 648                  */
 649                 info = (data_len/8) << 24;
 650         }
 651         if (type == ICMP_TIME_EXCEEDED)
 652                 icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
 653                            info, &temp_saddr);
 654         else
 655                 icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
 656                            info, &temp_saddr);
 657         if (rt)
 658                 ip6_rt_put(rt);
 659
 660         kfree_skb(skb2);
 661
 662         return 0;
 663 }
 664 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
 665
 666 static void icmpv6_echo_reply(struct sk_buff *skb)
 667 {
 668         struct net *net = dev_net(skb->dev);
 669         struct sock *sk;
 670         struct inet6_dev *idev;
 671         struct ipv6_pinfo *np;
 672         const struct in6_addr *saddr = NULL;
 673         struct icmp6hdr *icmph = icmp6_hdr(skb);
 674         struct icmp6hdr tmp_hdr;
 675         struct flowi6 fl6;
 676         struct icmpv6_msg msg;
 677         struct dst_entry *dst;
 678         struct ipcm6_cookie ipc6;
 679         u32 mark = IP6_REPLY_MARK(net, skb->mark);
 680
 681         saddr = &ipv6_hdr(skb)->daddr;
 682
 683         if (!ipv6_unicast_destination(skb) &&
 684             !(net->ipv6.sysctl.anycast_src_echo_reply &&
 685               ipv6_anycast_destination(skb_dst(skb), saddr)))
 686                 saddr = NULL;
 687
 688         memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
 689         tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
 690
 691         memset(&fl6, 0, sizeof(fl6));
 692         fl6.flowi6_proto = IPPROTO_ICMPV6;
 693         fl6.daddr = ipv6_hdr(skb)->saddr;
 694         if (saddr)
 695                 fl6.saddr = *saddr;
 696         fl6.flowi6_oif = icmp6_iif(skb);
 697         fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
 698         fl6.flowi6_mark = mark;
 699         fl6.flowi6_uid = sock_net_uid(net, NULL);
 700         security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 701
 702         local_bh_disable();
 703         sk = icmpv6_xmit_lock(net);
 704         if (!sk)
 705                 goto out_bh_enable;
 706         sk->sk_mark = mark;
 707         np = inet6_sk(sk);
 708
 709         if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
 710                 fl6.flowi6_oif = np->mcast_oif;
 711         else if (!fl6.flowi6_oif)
 712                 fl6.flowi6_oif = np->ucast_oif;
 713
 714         if (ip6_dst_lookup(net, sk, &dst, &fl6))
 715                 goto out;
 716         dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
 717         if (IS_ERR(dst))
 718                 goto out;
 719
 720         idev = __in6_dev_get(skb->dev);
 721
 722         msg.skb = skb;
 723         msg.offset = 0;
 724         msg.type = ICMPV6_ECHO_REPLY;
 725
 726         ipcm6_init_sk(&ipc6, np);
 727         ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 728         ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
 729
 730         if (ip6_append_data(sk, icmpv6_getfrag, &msg,
 731                             skb->len + sizeof(struct icmp6hdr),
 732                             sizeof(struct icmp6hdr), &ipc6, &fl6,
 733                             (struct rt6_info *)dst, MSG_DONTWAIT)) {
 734                 __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
 735                 ip6_flush_pending_frames(sk);
 736         } else {
 737                 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
 738                                            skb->len + sizeof(struct icmp6hdr));
 739         }
 740         dst_release(dst);
 741 out:
 742         icmpv6_xmit_unlock(sk);
 743 out_bh_enable:
 744         local_bh_enable();
 745 }
 746
 747 void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
 748 {
 749         const struct inet6_protocol *ipprot;
 750         int inner_offset;
 751         __be16 frag_off;
 752         u8 nexthdr;
 753         struct net *net = dev_net(skb->dev);
 754
 755         if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 756                 goto out;
 757
 758         nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
 759         if (ipv6_ext_hdr(nexthdr)) {
 760                 /* now skip over extension headers */
 761                 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
 762                                                 &nexthdr, &frag_off);
 763                 if (inner_offset < 0)
 764                         goto out;
 765         } else {
 766                 inner_offset = sizeof(struct ipv6hdr);
 767         }
 768
 769         /* Checkin header including 8 bytes of inner protocol header. */
 770         if (!pskb_may_pull(skb, inner_offset+8))
 771                 goto out;
 772
 773         /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
 774            Without this we will not able f.e. to make source routed
 775            pmtu discovery.
 776            Corresponding argument (opt) to notifiers is already added.
 777            --ANK (980726)
 778          */
 779
 780         ipprot = rcu_dereference(inet6_protos[nexthdr]);
 781         if (ipprot && ipprot->err_handler)
 782                 ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
 783
 784         raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
 785         return;
 786
 787 out:
 788         __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
 789 }
 790
 791 /*
 792  *      Handle icmp messages
 793  */
 794
 795 static int icmpv6_rcv(struct sk_buff *skb)
 796 {
 797         struct net *net = dev_net(skb->dev);
 798         struct net_device *dev = skb->dev;
 799         struct inet6_dev *idev = __in6_dev_get(dev);
 800         const struct in6_addr *saddr, *daddr;
 801         struct icmp6hdr *hdr;
 802         u8 type;
 803         bool success = false;
 804
 805         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 806                 struct sec_path *sp = skb_sec_path(skb);
 807                 int nh;
 808
 809                 if (!(sp && sp->xvec[sp->len - 1]->props.flags &
 810                                  XFRM_STATE_ICMP))
 811                         goto drop_no_count;
 812
 813                 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
 814                         goto drop_no_count;
 815
 816                 nh = skb_network_offset(skb);
 817                 skb_set_network_header(skb, sizeof(*hdr));
 818
 819                 if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
 820                         goto drop_no_count;
 821
 822                 skb_set_network_header(skb, nh);
 823         }
 824
 825         __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
 826
 827         saddr = &ipv6_hdr(skb)->saddr;
 828         daddr = &ipv6_hdr(skb)->daddr;
 829
 830         if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
 831                 net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
 832                                     saddr, daddr);
 833                 goto csum_error;
 834         }
 835
 836         if (!pskb_pull(skb, sizeof(*hdr)))
 837                 goto discard_it;
 838
 839         hdr = icmp6_hdr(skb);
 840
 841         type = hdr->icmp6_type;
 842
 843         ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
 844
 845         switch (type) {
 846         case ICMPV6_ECHO_REQUEST:
 847                 if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
 848                         icmpv6_echo_reply(skb);
 849                 break;
 850
 851         case ICMPV6_ECHO_REPLY:
 852                 success = ping_rcv(skb);
 853                 break;
 854
 855         case ICMPV6_PKT_TOOBIG:
 856                 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
 857                    standard destination cache. Seems, only "advanced"
 858                    destination cache will allow to solve this problem
 859                    --ANK (980726)
 860                  */
 861                 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 862                         goto discard_it;
 863                 hdr = icmp6_hdr(skb);
 864
 865                 /* to notify */
 866                 /* fall through */
 867         case ICMPV6_DEST_UNREACH:
 868         case ICMPV6_TIME_EXCEED:
 869         case ICMPV6_PARAMPROB:
 870                 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
 871                 break;
 872
 873         case NDISC_ROUTER_SOLICITATION:
 874         case NDISC_ROUTER_ADVERTISEMENT:
 875         case NDISC_NEIGHBOUR_SOLICITATION:
 876         case NDISC_NEIGHBOUR_ADVERTISEMENT:
 877         case NDISC_REDIRECT:
 878                 ndisc_rcv(skb);
 879                 break;
 880
 881         case ICMPV6_MGM_QUERY:
 882                 igmp6_event_query(skb);
 883                 break;
 884
 885         case ICMPV6_MGM_REPORT:
 886                 igmp6_event_report(skb);
 887                 break;
 888
 889         case ICMPV6_MGM_REDUCTION:
 890         case ICMPV6_NI_QUERY:
 891         case ICMPV6_NI_REPLY:
 892         case ICMPV6_MLD2_REPORT:
 893         case ICMPV6_DHAAD_REQUEST:
 894         case ICMPV6_DHAAD_REPLY:
 895         case ICMPV6_MOBILE_PREFIX_SOL:
 896         case ICMPV6_MOBILE_PREFIX_ADV:
 897                 break;
 898
 899         default:
 900                 /* informational */
 901                 if (type & ICMPV6_INFOMSG_MASK)
 902                         break;
 903
 904                 net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
 905                                     saddr, daddr);
 906
 907                 /*
 908                  * error of unknown type.
 909                  * must pass to upper level
 910                  */
 911
 912                 icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
 913         }
 914
 915         /* until the v6 path can be better sorted assume failure and
 916          * preserve the status quo behaviour for the rest of the paths to here
 917          */
 918         if (success)
 919                 consume_skb(skb);
 920         else
 921                 kfree_skb(skb);
 922
 923         return 0;
 924
 925 csum_error:
 926         __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
 927 discard_it:
 928         __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
 929 drop_no_count:
 930         kfree_skb(skb);
 931         return 0;
 932 }
 933
 934 void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
 935                       u8 type,
 936                       const struct in6_addr *saddr,
 937                       const struct in6_addr *daddr,
 938                       int oif)
 939 {
 940         memset(fl6, 0, sizeof(*fl6));
 941         fl6->saddr = *saddr;
 942         fl6->daddr = *daddr;
 943         fl6->flowi6_proto       = IPPROTO_ICMPV6;
 944         fl6->fl6_icmp_type      = type;
 945         fl6->fl6_icmp_code      = 0;
 946         fl6->flowi6_oif         = oif;
 947         security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
 948 }
 949
 950 static int __net_init icmpv6_sk_init(struct net *net)
 951 {
 952         struct sock *sk;
 953         int err, i, j;
 954
 955         net->ipv6.icmp_sk =
 956                 kcalloc(nr_cpu_ids, sizeof(struct sock *), GFP_KERNEL);
 957         if (!net->ipv6.icmp_sk)
 958                 return -ENOMEM;
 959
 960         for_each_possible_cpu(i) {
 961                 err = inet_ctl_sock_create(&sk, PF_INET6,
 962                                            SOCK_RAW, IPPROTO_ICMPV6, net);
 963                 if (err < 0) {
 964                         pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
 965                                err);
 966                         goto fail;
 967                 }
 968
 969                 net->ipv6.icmp_sk[i] = sk;
 970
 971                 /* Enough space for 2 64K ICMP packets, including
 972                  * sk_buff struct overhead.
 973                  */
 974                 sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
 975         }
 976         return 0;
 977
 978  fail:
 979         for (j = 0; j < i; j++)
 980                 inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
 981         kfree(net->ipv6.icmp_sk);
 982         return err;
 983 }
 984
 985 static void __net_exit icmpv6_sk_exit(struct net *net)
 986 {
 987         int i;
 988
 989         for_each_possible_cpu(i) {
 990                 inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
 991         }
 992         kfree(net->ipv6.icmp_sk);
 993 }
 994
 995 static struct pernet_operations icmpv6_sk_ops = {
 996         .init = icmpv6_sk_init,
 997         .exit = icmpv6_sk_exit,
 998 };
 999
1000 int __init icmpv6_init(void)
1001 {
1002         int err;
1003
1004         err = register_pernet_subsys(&icmpv6_sk_ops);
1005         if (err < 0)
1006                 return err;
1007
1008         err = -EAGAIN;
1009         if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1010                 goto fail;
1011
1012         err = inet6_register_icmp_sender(icmp6_send);
1013         if (err)
1014                 goto sender_reg_err;
1015         return 0;
1016
1017 sender_reg_err:
1018         inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1019 fail:
1020         pr_err("Failed to register ICMP6 protocol\n");
1021         unregister_pernet_subsys(&icmpv6_sk_ops);
1022         return err;
1023 }
1024
1025 void icmpv6_cleanup(void)
1026 {
1027         inet6_unregister_icmp_sender(icmp6_send);
1028         unregister_pernet_subsys(&icmpv6_sk_ops);
1029         inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1030 }
1031
1032
1033 static const struct icmp6_err {
1034         int err;
1035         int fatal;
1036 } tab_unreach[] = {
1037         {       /* NOROUTE */
1038                 .err    = ENETUNREACH,
1039                 .fatal  = 0,
1040         },
1041         {       /* ADM_PROHIBITED */
1042                 .err    = EACCES,
1043                 .fatal  = 1,
1044         },
1045         {       /* Was NOT_NEIGHBOUR, now reserved */
1046                 .err    = EHOSTUNREACH,
1047                 .fatal  = 0,
1048         },
1049         {       /* ADDR_UNREACH */
1050                 .err    = EHOSTUNREACH,
1051                 .fatal  = 0,
1052         },
1053         {       /* PORT_UNREACH */
1054                 .err    = ECONNREFUSED,
1055                 .fatal  = 1,
1056         },
1057         {       /* POLICY_FAIL */
1058                 .err    = EACCES,
1059                 .fatal  = 1,
1060         },
1061         {       /* REJECT_ROUTE */
1062                 .err    = EACCES,
1063                 .fatal  = 1,
1064         },
1065 };
1066
1067 int icmpv6_err_convert(u8 type, u8 code, int *err)
1068 {
1069         int fatal = 0;
1070
1071         *err = EPROTO;
1072
1073         switch (type) {
1074         case ICMPV6_DEST_UNREACH:
1075                 fatal = 1;
1076                 if (code < ARRAY_SIZE(tab_unreach)) {
1077                         *err  = tab_unreach[code].err;
1078                         fatal = tab_unreach[code].fatal;
1079                 }
1080                 break;
1081
1082         case ICMPV6_PKT_TOOBIG:
1083                 *err = EMSGSIZE;
1084                 break;
1085
1086         case ICMPV6_PARAMPROB:
1087                 *err = EPROTO;
1088                 fatal = 1;
1089                 break;
1090
1091         case ICMPV6_TIME_EXCEED:
1092                 *err = EHOSTUNREACH;
1093                 break;
1094         }
1095
1096         return fatal;
1097 }
1098 EXPORT_SYMBOL(icmpv6_err_convert);
1099
1100 #ifdef CONFIG_SYSCTL
1101 static struct ctl_table ipv6_icmp_table_template[] = {
1102         {
1103                 .procname       = "ratelimit",
1104                 .data           = &init_net.ipv6.sysctl.icmpv6_time,
1105                 .maxlen         = sizeof(int),
1106                 .mode           = 0644,
1107                 .proc_handler   = proc_dointvec_ms_jiffies,
1108         },
1109         {
1110                 .procname       = "echo_ignore_all",
1111                 .data           = &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1112                 .maxlen         = sizeof(int),
1113                 .mode           = 0644,
1114                 .proc_handler = proc_dointvec,
1115         },
1116         { },
1117 };
1118
1119 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1120 {
1121         struct ctl_table *table;
1122
1123         table = kmemdup(ipv6_icmp_table_template,
1124                         sizeof(ipv6_icmp_table_template),
1125                         GFP_KERNEL);
1126
1127         if (table) {
1128                 table[0].data = &net->ipv6.sysctl.icmpv6_time;
1129                 table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1130         }
1131         return table;
1132 }
1133 #endif