printf: Remove unused 'bprintf'
[drm/drm-misc.git] / net / ipv6 / icmp.c
blob071b0bc1179d81b18c340ce415cef21e02a30cd7
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Internet Control Message Protocol (ICMPv6)
4 * Linux INET6 implementation
6 * Authors:
7 * Pedro Roque <roque@di.fc.ul.pt>
9 * Based on net/ipv4/icmp.c
11 * RFC 1885
15 * Changes:
17 * Andi Kleen : exception handling
18 * Andi Kleen add rate limits. never reply to a icmp.
19 * add more length checks and other fixes.
20 * yoshfuji : ensure to sent parameter problem for
21 * fragments.
22 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
23 * Randy Dunlap and
24 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
25 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
28 #define pr_fmt(fmt) "IPv6: " fmt
30 #include <linux/module.h>
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/socket.h>
34 #include <linux/in.h>
35 #include <linux/kernel.h>
36 #include <linux/sockios.h>
37 #include <linux/net.h>
38 #include <linux/skbuff.h>
39 #include <linux/init.h>
40 #include <linux/netfilter.h>
41 #include <linux/slab.h>
43 #ifdef CONFIG_SYSCTL
44 #include <linux/sysctl.h>
45 #endif
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/icmpv6.h>
51 #include <net/ip.h>
52 #include <net/sock.h>
54 #include <net/ipv6.h>
55 #include <net/ip6_checksum.h>
56 #include <net/ping.h>
57 #include <net/protocol.h>
58 #include <net/raw.h>
59 #include <net/rawv6.h>
60 #include <net/seg6.h>
61 #include <net/transp_v6.h>
62 #include <net/ip6_route.h>
63 #include <net/addrconf.h>
64 #include <net/icmp.h>
65 #include <net/xfrm.h>
66 #include <net/inet_common.h>
67 #include <net/dsfield.h>
68 #include <net/l3mdev.h>
70 #include <linux/uaccess.h>
72 static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
74 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
75 u8 type, u8 code, int offset, __be32 info)
77 /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
78 struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
79 struct net *net = dev_net(skb->dev);
81 if (type == ICMPV6_PKT_TOOBIG)
82 ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
83 else if (type == NDISC_REDIRECT)
84 ip6_redirect(skb, net, skb->dev->ifindex, 0,
85 sock_net_uid(net, NULL));
87 if (!(type & ICMPV6_INFOMSG_MASK))
88 if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
89 ping_err(skb, offset, ntohl(info));
91 return 0;
94 static int icmpv6_rcv(struct sk_buff *skb);
96 static const struct inet6_protocol icmpv6_protocol = {
97 .handler = icmpv6_rcv,
98 .err_handler = icmpv6_err,
99 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
102 /* Called with BH disabled */
103 static struct sock *icmpv6_xmit_lock(struct net *net)
105 struct sock *sk;
107 sk = this_cpu_read(ipv6_icmp_sk);
108 if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
109 /* This can happen if the output path (f.e. SIT or
110 * ip6ip6 tunnel) signals dst_link_failure() for an
111 * outgoing ICMP6 packet.
113 return NULL;
115 sock_net_set(sk, net);
116 return sk;
119 static void icmpv6_xmit_unlock(struct sock *sk)
121 sock_net_set(sk, &init_net);
122 spin_unlock(&sk->sk_lock.slock);
126 * Figure out, may we reply to this packet with icmp error.
128 * We do not reply, if:
129 * - it was icmp error message.
130 * - it is truncated, so that it is known, that protocol is ICMPV6
131 * (i.e. in the middle of some exthdr)
133 * --ANK (980726)
136 static bool is_ineligible(const struct sk_buff *skb)
138 int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
139 int len = skb->len - ptr;
140 __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
141 __be16 frag_off;
143 if (len < 0)
144 return true;
146 ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
147 if (ptr < 0)
148 return false;
149 if (nexthdr == IPPROTO_ICMPV6) {
150 u8 _type, *tp;
151 tp = skb_header_pointer(skb,
152 ptr+offsetof(struct icmp6hdr, icmp6_type),
153 sizeof(_type), &_type);
155 /* Based on RFC 8200, Section 4.5 Fragment Header, return
156 * false if this is a fragment packet with no icmp header info.
158 if (!tp && frag_off != 0)
159 return false;
160 else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
161 return true;
163 return false;
166 static bool icmpv6_mask_allow(struct net *net, int type)
168 if (type > ICMPV6_MSG_MAX)
169 return true;
171 /* Limit if icmp type is set in ratemask. */
172 if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
173 return true;
175 return false;
178 static bool icmpv6_global_allow(struct net *net, int type,
179 bool *apply_ratelimit)
181 if (icmpv6_mask_allow(net, type))
182 return true;
184 if (icmp_global_allow(net)) {
185 *apply_ratelimit = true;
186 return true;
188 __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
189 return false;
193 * Check the ICMP output rate limit
195 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
196 struct flowi6 *fl6, bool apply_ratelimit)
198 struct net *net = sock_net(sk);
199 struct dst_entry *dst;
200 bool res = false;
202 if (!apply_ratelimit)
203 return true;
206 * Look up the output route.
207 * XXX: perhaps the expire for routing entries cloned by
208 * this lookup should be more aggressive (not longer than timeout).
210 dst = ip6_route_output(net, sk, fl6);
211 if (dst->error) {
212 IP6_INC_STATS(net, ip6_dst_idev(dst),
213 IPSTATS_MIB_OUTNOROUTES);
214 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
215 res = true;
216 } else {
217 struct rt6_info *rt = dst_rt6_info(dst);
218 int tmo = net->ipv6.sysctl.icmpv6_time;
219 struct inet_peer *peer;
221 /* Give more bandwidth to wider prefixes. */
222 if (rt->rt6i_dst.plen < 128)
223 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
225 peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
226 res = inet_peer_xrlim_allow(peer, tmo);
227 if (peer)
228 inet_putpeer(peer);
230 if (!res)
231 __ICMP6_INC_STATS(net, ip6_dst_idev(dst),
232 ICMP6_MIB_RATELIMITHOST);
233 else
234 icmp_global_consume(net);
235 dst_release(dst);
236 return res;
239 static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
240 struct flowi6 *fl6)
242 struct net *net = sock_net(sk);
243 struct dst_entry *dst;
244 bool res = false;
246 dst = ip6_route_output(net, sk, fl6);
247 if (!dst->error) {
248 struct rt6_info *rt = dst_rt6_info(dst);
249 struct in6_addr prefsrc;
251 rt6_get_prefsrc(rt, &prefsrc);
252 res = !ipv6_addr_any(&prefsrc);
254 dst_release(dst);
255 return res;
259 * an inline helper for the "simple" if statement below
260 * checks if parameter problem report is caused by an
261 * unrecognized IPv6 option that has the Option Type
262 * highest-order two bits set to 10
265 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
267 u8 _optval, *op;
269 offset += skb_network_offset(skb);
270 op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
271 if (!op)
272 return true;
273 return (*op & 0xC0) == 0x80;
276 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
277 struct icmp6hdr *thdr, int len)
279 struct sk_buff *skb;
280 struct icmp6hdr *icmp6h;
282 skb = skb_peek(&sk->sk_write_queue);
283 if (!skb)
284 return;
286 icmp6h = icmp6_hdr(skb);
287 memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
288 icmp6h->icmp6_cksum = 0;
290 if (skb_queue_len(&sk->sk_write_queue) == 1) {
291 skb->csum = csum_partial(icmp6h,
292 sizeof(struct icmp6hdr), skb->csum);
293 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
294 &fl6->daddr,
295 len, fl6->flowi6_proto,
296 skb->csum);
297 } else {
298 __wsum tmp_csum = 0;
300 skb_queue_walk(&sk->sk_write_queue, skb) {
301 tmp_csum = csum_add(tmp_csum, skb->csum);
304 tmp_csum = csum_partial(icmp6h,
305 sizeof(struct icmp6hdr), tmp_csum);
306 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
307 &fl6->daddr,
308 len, fl6->flowi6_proto,
309 tmp_csum);
311 ip6_push_pending_frames(sk);
314 struct icmpv6_msg {
315 struct sk_buff *skb;
316 int offset;
317 uint8_t type;
320 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
322 struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
323 struct sk_buff *org_skb = msg->skb;
324 __wsum csum;
326 csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
327 to, len);
328 skb->csum = csum_block_add(skb->csum, csum, odd);
329 if (!(msg->type & ICMPV6_INFOMSG_MASK))
330 nf_ct_attach(skb, org_skb);
331 return 0;
334 #if IS_ENABLED(CONFIG_IPV6_MIP6)
335 static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt)
337 struct ipv6hdr *iph = ipv6_hdr(skb);
338 struct ipv6_destopt_hao *hao;
339 int off;
341 if (opt->dsthao) {
342 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
343 if (likely(off >= 0)) {
344 hao = (struct ipv6_destopt_hao *)
345 (skb_network_header(skb) + off);
346 swap(iph->saddr, hao->addr);
350 #else
351 static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {}
352 #endif
354 static struct dst_entry *icmpv6_route_lookup(struct net *net,
355 struct sk_buff *skb,
356 struct sock *sk,
357 struct flowi6 *fl6)
359 struct dst_entry *dst, *dst2;
360 struct flowi6 fl2;
361 int err;
363 err = ip6_dst_lookup(net, sk, &dst, fl6);
364 if (err)
365 return ERR_PTR(err);
368 * We won't send icmp if the destination is known
369 * anycast unless we need to treat anycast as unicast.
371 if (!READ_ONCE(net->ipv6.sysctl.icmpv6_error_anycast_as_unicast) &&
372 ipv6_anycast_destination(dst, &fl6->daddr)) {
373 net_dbg_ratelimited("icmp6_send: acast source\n");
374 dst_release(dst);
375 return ERR_PTR(-EINVAL);
378 /* No need to clone since we're just using its address. */
379 dst2 = dst;
381 dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
382 if (!IS_ERR(dst)) {
383 if (dst != dst2)
384 return dst;
385 } else {
386 if (PTR_ERR(dst) == -EPERM)
387 dst = NULL;
388 else
389 return dst;
392 err = xfrm_decode_session_reverse(net, skb, flowi6_to_flowi(&fl2), AF_INET6);
393 if (err)
394 goto relookup_failed;
396 err = ip6_dst_lookup(net, sk, &dst2, &fl2);
397 if (err)
398 goto relookup_failed;
400 dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
401 if (!IS_ERR(dst2)) {
402 dst_release(dst);
403 dst = dst2;
404 } else {
405 err = PTR_ERR(dst2);
406 if (err == -EPERM) {
407 dst_release(dst);
408 return dst2;
409 } else
410 goto relookup_failed;
413 relookup_failed:
414 if (dst)
415 return dst;
416 return ERR_PTR(err);
419 static struct net_device *icmp6_dev(const struct sk_buff *skb)
421 struct net_device *dev = skb->dev;
423 /* for local traffic to local address, skb dev is the loopback
424 * device. Check if there is a dst attached to the skb and if so
425 * get the real device index. Same is needed for replies to a link
426 * local address on a device enslaved to an L3 master device
428 if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
429 const struct rt6_info *rt6 = skb_rt6_info(skb);
431 /* The destination could be an external IP in Ext Hdr (SRv6, RPL, etc.),
432 * and ip6_null_entry could be set to skb if no route is found.
434 if (rt6 && rt6->rt6i_idev)
435 dev = rt6->rt6i_idev->dev;
438 return dev;
441 static int icmp6_iif(const struct sk_buff *skb)
443 return icmp6_dev(skb)->ifindex;
447 * Send an ICMP message in response to a packet in error
449 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
450 const struct in6_addr *force_saddr,
451 const struct inet6_skb_parm *parm)
453 struct inet6_dev *idev = NULL;
454 struct ipv6hdr *hdr = ipv6_hdr(skb);
455 struct sock *sk;
456 struct net *net;
457 struct ipv6_pinfo *np;
458 const struct in6_addr *saddr = NULL;
459 bool apply_ratelimit = false;
460 struct dst_entry *dst;
461 struct icmp6hdr tmp_hdr;
462 struct flowi6 fl6;
463 struct icmpv6_msg msg;
464 struct ipcm6_cookie ipc6;
465 int iif = 0;
466 int addr_type = 0;
467 int len;
468 u32 mark;
470 if ((u8 *)hdr < skb->head ||
471 (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
472 return;
474 if (!skb->dev)
475 return;
476 net = dev_net(skb->dev);
477 mark = IP6_REPLY_MARK(net, skb->mark);
479 * Make sure we respect the rules
480 * i.e. RFC 1885 2.4(e)
481 * Rule (e.1) is enforced by not using icmp6_send
482 * in any code that processes icmp errors.
484 addr_type = ipv6_addr_type(&hdr->daddr);
486 if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
487 ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
488 saddr = &hdr->daddr;
491 * Dest addr check
494 if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
495 if (type != ICMPV6_PKT_TOOBIG &&
496 !(type == ICMPV6_PARAMPROB &&
497 code == ICMPV6_UNK_OPTION &&
498 (opt_unrec(skb, info))))
499 return;
501 saddr = NULL;
504 addr_type = ipv6_addr_type(&hdr->saddr);
507 * Source addr check
510 if (__ipv6_addr_needs_scope_id(addr_type)) {
511 iif = icmp6_iif(skb);
512 } else {
514 * The source device is used for looking up which routing table
515 * to use for sending an ICMP error.
517 iif = l3mdev_master_ifindex(skb->dev);
521 * Must not send error if the source does not uniquely
522 * identify a single node (RFC2463 Section 2.4).
523 * We check unspecified / multicast addresses here,
524 * and anycast addresses will be checked later.
526 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
527 net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
528 &hdr->saddr, &hdr->daddr);
529 return;
533 * Never answer to a ICMP packet.
535 if (is_ineligible(skb)) {
536 net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
537 &hdr->saddr, &hdr->daddr);
538 return;
541 /* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */
542 local_bh_disable();
544 /* Check global sysctl_icmp_msgs_per_sec ratelimit */
545 if (!(skb->dev->flags & IFF_LOOPBACK) &&
546 !icmpv6_global_allow(net, type, &apply_ratelimit))
547 goto out_bh_enable;
549 mip6_addr_swap(skb, parm);
551 sk = icmpv6_xmit_lock(net);
552 if (!sk)
553 goto out_bh_enable;
555 memset(&fl6, 0, sizeof(fl6));
556 fl6.flowi6_proto = IPPROTO_ICMPV6;
557 fl6.daddr = hdr->saddr;
558 if (force_saddr)
559 saddr = force_saddr;
560 if (saddr) {
561 fl6.saddr = *saddr;
562 } else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
563 /* select a more meaningful saddr from input if */
564 struct net_device *in_netdev;
566 in_netdev = dev_get_by_index(net, parm->iif);
567 if (in_netdev) {
568 ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
569 inet6_sk(sk)->srcprefs,
570 &fl6.saddr);
571 dev_put(in_netdev);
574 fl6.flowi6_mark = mark;
575 fl6.flowi6_oif = iif;
576 fl6.fl6_icmp_type = type;
577 fl6.fl6_icmp_code = code;
578 fl6.flowi6_uid = sock_net_uid(net, NULL);
579 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
580 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
582 np = inet6_sk(sk);
584 if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit))
585 goto out;
587 tmp_hdr.icmp6_type = type;
588 tmp_hdr.icmp6_code = code;
589 tmp_hdr.icmp6_cksum = 0;
590 tmp_hdr.icmp6_pointer = htonl(info);
592 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
593 fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
594 else if (!fl6.flowi6_oif)
595 fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
597 ipcm6_init_sk(&ipc6, sk);
598 ipc6.sockc.mark = mark;
599 fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
601 dst = icmpv6_route_lookup(net, skb, sk, &fl6);
602 if (IS_ERR(dst))
603 goto out;
605 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
607 msg.skb = skb;
608 msg.offset = skb_network_offset(skb);
609 msg.type = type;
611 len = skb->len - msg.offset;
612 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
613 if (len < 0) {
614 net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
615 &hdr->saddr, &hdr->daddr);
616 goto out_dst_release;
619 rcu_read_lock();
620 idev = __in6_dev_get(skb->dev);
622 if (ip6_append_data(sk, icmpv6_getfrag, &msg,
623 len + sizeof(struct icmp6hdr),
624 sizeof(struct icmp6hdr),
625 &ipc6, &fl6, dst_rt6_info(dst),
626 MSG_DONTWAIT)) {
627 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
628 ip6_flush_pending_frames(sk);
629 } else {
630 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
631 len + sizeof(struct icmp6hdr));
633 rcu_read_unlock();
634 out_dst_release:
635 dst_release(dst);
636 out:
637 icmpv6_xmit_unlock(sk);
638 out_bh_enable:
639 local_bh_enable();
641 EXPORT_SYMBOL(icmp6_send);
643 /* Slightly more convenient version of icmp6_send with drop reasons.
645 void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos,
646 enum skb_drop_reason reason)
648 icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
649 kfree_skb_reason(skb, reason);
652 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
653 * if sufficient data bytes are available
654 * @nhs is the size of the tunnel header(s) :
655 * Either an IPv4 header for SIT encap
656 * an IPv4 header + GRE header for GRE encap
658 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
659 unsigned int data_len)
661 struct in6_addr temp_saddr;
662 struct rt6_info *rt;
663 struct sk_buff *skb2;
664 u32 info = 0;
666 if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
667 return 1;
669 /* RFC 4884 (partial) support for ICMP extensions */
670 if (data_len < 128 || (data_len & 7) || skb->len < data_len)
671 data_len = 0;
673 skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
675 if (!skb2)
676 return 1;
678 skb_dst_drop(skb2);
679 skb_pull(skb2, nhs);
680 skb_reset_network_header(skb2);
682 rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
683 skb, 0);
685 if (rt && rt->dst.dev)
686 skb2->dev = rt->dst.dev;
688 ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
690 if (data_len) {
691 /* RFC 4884 (partial) support :
692 * insert 0 padding at the end, before the extensions
694 __skb_push(skb2, nhs);
695 skb_reset_network_header(skb2);
696 memmove(skb2->data, skb2->data + nhs, data_len - nhs);
697 memset(skb2->data + data_len - nhs, 0, nhs);
698 /* RFC 4884 4.5 : Length is measured in 64-bit words,
699 * and stored in reserved[0]
701 info = (data_len/8) << 24;
703 if (type == ICMP_TIME_EXCEEDED)
704 icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
705 info, &temp_saddr, IP6CB(skb2));
706 else
707 icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
708 info, &temp_saddr, IP6CB(skb2));
709 if (rt)
710 ip6_rt_put(rt);
712 kfree_skb(skb2);
714 return 0;
716 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
718 static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
720 struct net *net = dev_net(skb->dev);
721 struct sock *sk;
722 struct inet6_dev *idev;
723 struct ipv6_pinfo *np;
724 const struct in6_addr *saddr = NULL;
725 struct icmp6hdr *icmph = icmp6_hdr(skb);
726 bool apply_ratelimit = false;
727 struct icmp6hdr tmp_hdr;
728 struct flowi6 fl6;
729 struct icmpv6_msg msg;
730 struct dst_entry *dst;
731 struct ipcm6_cookie ipc6;
732 u32 mark = IP6_REPLY_MARK(net, skb->mark);
733 SKB_DR(reason);
734 bool acast;
735 u8 type;
737 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
738 net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
739 return reason;
741 saddr = &ipv6_hdr(skb)->daddr;
743 acast = ipv6_anycast_destination(skb_dst(skb), saddr);
744 if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
745 return reason;
747 if (!ipv6_unicast_destination(skb) &&
748 !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
749 saddr = NULL;
751 if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
752 type = ICMPV6_EXT_ECHO_REPLY;
753 else
754 type = ICMPV6_ECHO_REPLY;
756 memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
757 tmp_hdr.icmp6_type = type;
759 memset(&fl6, 0, sizeof(fl6));
760 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
761 fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
763 fl6.flowi6_proto = IPPROTO_ICMPV6;
764 fl6.daddr = ipv6_hdr(skb)->saddr;
765 if (saddr)
766 fl6.saddr = *saddr;
767 fl6.flowi6_oif = icmp6_iif(skb);
768 fl6.fl6_icmp_type = type;
769 fl6.flowi6_mark = mark;
770 fl6.flowi6_uid = sock_net_uid(net, NULL);
771 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
773 local_bh_disable();
774 sk = icmpv6_xmit_lock(net);
775 if (!sk)
776 goto out_bh_enable;
777 np = inet6_sk(sk);
779 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
780 fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
781 else if (!fl6.flowi6_oif)
782 fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
784 if (ip6_dst_lookup(net, sk, &dst, &fl6))
785 goto out;
786 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
787 if (IS_ERR(dst))
788 goto out;
790 /* Check the ratelimit */
791 if ((!(skb->dev->flags & IFF_LOOPBACK) &&
792 !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY, &apply_ratelimit)) ||
793 !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6, apply_ratelimit))
794 goto out_dst_release;
796 idev = __in6_dev_get(skb->dev);
798 msg.skb = skb;
799 msg.offset = 0;
800 msg.type = type;
802 ipcm6_init_sk(&ipc6, sk);
803 ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
804 ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
805 ipc6.sockc.mark = mark;
807 if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
808 if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr))
809 goto out_dst_release;
811 if (ip6_append_data(sk, icmpv6_getfrag, &msg,
812 skb->len + sizeof(struct icmp6hdr),
813 sizeof(struct icmp6hdr), &ipc6, &fl6,
814 dst_rt6_info(dst), MSG_DONTWAIT)) {
815 __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
816 ip6_flush_pending_frames(sk);
817 } else {
818 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
819 skb->len + sizeof(struct icmp6hdr));
820 reason = SKB_CONSUMED;
822 out_dst_release:
823 dst_release(dst);
824 out:
825 icmpv6_xmit_unlock(sk);
826 out_bh_enable:
827 local_bh_enable();
828 return reason;
831 enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type,
832 u8 code, __be32 info)
834 struct inet6_skb_parm *opt = IP6CB(skb);
835 struct net *net = dev_net(skb->dev);
836 const struct inet6_protocol *ipprot;
837 enum skb_drop_reason reason;
838 int inner_offset;
839 __be16 frag_off;
840 u8 nexthdr;
842 reason = pskb_may_pull_reason(skb, sizeof(struct ipv6hdr));
843 if (reason != SKB_NOT_DROPPED_YET)
844 goto out;
846 seg6_icmp_srh(skb, opt);
848 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
849 if (ipv6_ext_hdr(nexthdr)) {
850 /* now skip over extension headers */
851 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
852 &nexthdr, &frag_off);
853 if (inner_offset < 0) {
854 SKB_DR_SET(reason, IPV6_BAD_EXTHDR);
855 goto out;
857 } else {
858 inner_offset = sizeof(struct ipv6hdr);
861 /* Checkin header including 8 bytes of inner protocol header. */
862 reason = pskb_may_pull_reason(skb, inner_offset + 8);
863 if (reason != SKB_NOT_DROPPED_YET)
864 goto out;
866 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
867 Without this we will not able f.e. to make source routed
868 pmtu discovery.
869 Corresponding argument (opt) to notifiers is already added.
870 --ANK (980726)
873 ipprot = rcu_dereference(inet6_protos[nexthdr]);
874 if (ipprot && ipprot->err_handler)
875 ipprot->err_handler(skb, opt, type, code, inner_offset, info);
877 raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
878 return SKB_CONSUMED;
880 out:
881 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
882 return reason;
886 * Handle icmp messages
889 static int icmpv6_rcv(struct sk_buff *skb)
891 enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
892 struct net *net = dev_net(skb->dev);
893 struct net_device *dev = icmp6_dev(skb);
894 struct inet6_dev *idev = __in6_dev_get(dev);
895 const struct in6_addr *saddr, *daddr;
896 struct icmp6hdr *hdr;
897 u8 type;
899 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
900 struct sec_path *sp = skb_sec_path(skb);
901 int nh;
903 if (!(sp && sp->xvec[sp->len - 1]->props.flags &
904 XFRM_STATE_ICMP)) {
905 reason = SKB_DROP_REASON_XFRM_POLICY;
906 goto drop_no_count;
909 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
910 goto drop_no_count;
912 nh = skb_network_offset(skb);
913 skb_set_network_header(skb, sizeof(*hdr));
915 if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN,
916 skb)) {
917 reason = SKB_DROP_REASON_XFRM_POLICY;
918 goto drop_no_count;
921 skb_set_network_header(skb, nh);
924 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
926 saddr = &ipv6_hdr(skb)->saddr;
927 daddr = &ipv6_hdr(skb)->daddr;
929 if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
930 net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
931 saddr, daddr);
932 goto csum_error;
935 if (!pskb_pull(skb, sizeof(*hdr)))
936 goto discard_it;
938 hdr = icmp6_hdr(skb);
940 type = hdr->icmp6_type;
942 ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
944 switch (type) {
945 case ICMPV6_ECHO_REQUEST:
946 if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
947 reason = icmpv6_echo_reply(skb);
948 break;
949 case ICMPV6_EXT_ECHO_REQUEST:
950 if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
951 READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
952 reason = icmpv6_echo_reply(skb);
953 break;
955 case ICMPV6_ECHO_REPLY:
956 reason = ping_rcv(skb);
957 break;
959 case ICMPV6_EXT_ECHO_REPLY:
960 reason = ping_rcv(skb);
961 break;
963 case ICMPV6_PKT_TOOBIG:
964 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
965 standard destination cache. Seems, only "advanced"
966 destination cache will allow to solve this problem
967 --ANK (980726)
969 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
970 goto discard_it;
971 hdr = icmp6_hdr(skb);
973 /* to notify */
974 fallthrough;
975 case ICMPV6_DEST_UNREACH:
976 case ICMPV6_TIME_EXCEED:
977 case ICMPV6_PARAMPROB:
978 reason = icmpv6_notify(skb, type, hdr->icmp6_code,
979 hdr->icmp6_mtu);
980 break;
982 case NDISC_ROUTER_SOLICITATION:
983 case NDISC_ROUTER_ADVERTISEMENT:
984 case NDISC_NEIGHBOUR_SOLICITATION:
985 case NDISC_NEIGHBOUR_ADVERTISEMENT:
986 case NDISC_REDIRECT:
987 reason = ndisc_rcv(skb);
988 break;
990 case ICMPV6_MGM_QUERY:
991 igmp6_event_query(skb);
992 return 0;
994 case ICMPV6_MGM_REPORT:
995 igmp6_event_report(skb);
996 return 0;
998 case ICMPV6_MGM_REDUCTION:
999 case ICMPV6_NI_QUERY:
1000 case ICMPV6_NI_REPLY:
1001 case ICMPV6_MLD2_REPORT:
1002 case ICMPV6_DHAAD_REQUEST:
1003 case ICMPV6_DHAAD_REPLY:
1004 case ICMPV6_MOBILE_PREFIX_SOL:
1005 case ICMPV6_MOBILE_PREFIX_ADV:
1006 break;
1008 default:
1009 /* informational */
1010 if (type & ICMPV6_INFOMSG_MASK)
1011 break;
1013 net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
1014 saddr, daddr);
1017 * error of unknown type.
1018 * must pass to upper level
1021 reason = icmpv6_notify(skb, type, hdr->icmp6_code,
1022 hdr->icmp6_mtu);
1025 /* until the v6 path can be better sorted assume failure and
1026 * preserve the status quo behaviour for the rest of the paths to here
1028 if (reason)
1029 kfree_skb_reason(skb, reason);
1030 else
1031 consume_skb(skb);
1033 return 0;
1035 csum_error:
1036 reason = SKB_DROP_REASON_ICMP_CSUM;
1037 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
1038 discard_it:
1039 __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
1040 drop_no_count:
1041 kfree_skb_reason(skb, reason);
1042 return 0;
1045 void icmpv6_flow_init(const struct sock *sk, struct flowi6 *fl6, u8 type,
1046 const struct in6_addr *saddr,
1047 const struct in6_addr *daddr, int oif)
1049 memset(fl6, 0, sizeof(*fl6));
1050 fl6->saddr = *saddr;
1051 fl6->daddr = *daddr;
1052 fl6->flowi6_proto = IPPROTO_ICMPV6;
1053 fl6->fl6_icmp_type = type;
1054 fl6->fl6_icmp_code = 0;
1055 fl6->flowi6_oif = oif;
1056 security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
1059 int __init icmpv6_init(void)
1061 struct sock *sk;
1062 int err, i;
1064 for_each_possible_cpu(i) {
1065 err = inet_ctl_sock_create(&sk, PF_INET6,
1066 SOCK_RAW, IPPROTO_ICMPV6, &init_net);
1067 if (err < 0) {
1068 pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1069 err);
1070 return err;
1073 per_cpu(ipv6_icmp_sk, i) = sk;
1075 /* Enough space for 2 64K ICMP packets, including
1076 * sk_buff struct overhead.
1078 sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1081 err = -EAGAIN;
1082 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1083 goto fail;
1085 err = inet6_register_icmp_sender(icmp6_send);
1086 if (err)
1087 goto sender_reg_err;
1088 return 0;
1090 sender_reg_err:
1091 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1092 fail:
1093 pr_err("Failed to register ICMP6 protocol\n");
1094 return err;
1097 void icmpv6_cleanup(void)
1099 inet6_unregister_icmp_sender(icmp6_send);
1100 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1104 static const struct icmp6_err {
1105 int err;
1106 int fatal;
1107 } tab_unreach[] = {
1108 { /* NOROUTE */
1109 .err = ENETUNREACH,
1110 .fatal = 0,
1112 { /* ADM_PROHIBITED */
1113 .err = EACCES,
1114 .fatal = 1,
1116 { /* Was NOT_NEIGHBOUR, now reserved */
1117 .err = EHOSTUNREACH,
1118 .fatal = 0,
1120 { /* ADDR_UNREACH */
1121 .err = EHOSTUNREACH,
1122 .fatal = 0,
1124 { /* PORT_UNREACH */
1125 .err = ECONNREFUSED,
1126 .fatal = 1,
1128 { /* POLICY_FAIL */
1129 .err = EACCES,
1130 .fatal = 1,
1132 { /* REJECT_ROUTE */
1133 .err = EACCES,
1134 .fatal = 1,
1138 int icmpv6_err_convert(u8 type, u8 code, int *err)
1140 int fatal = 0;
1142 *err = EPROTO;
1144 switch (type) {
1145 case ICMPV6_DEST_UNREACH:
1146 fatal = 1;
1147 if (code < ARRAY_SIZE(tab_unreach)) {
1148 *err = tab_unreach[code].err;
1149 fatal = tab_unreach[code].fatal;
1151 break;
1153 case ICMPV6_PKT_TOOBIG:
1154 *err = EMSGSIZE;
1155 break;
1157 case ICMPV6_PARAMPROB:
1158 *err = EPROTO;
1159 fatal = 1;
1160 break;
1162 case ICMPV6_TIME_EXCEED:
1163 *err = EHOSTUNREACH;
1164 break;
1167 return fatal;
1169 EXPORT_SYMBOL(icmpv6_err_convert);
1171 #ifdef CONFIG_SYSCTL
1172 static struct ctl_table ipv6_icmp_table_template[] = {
1174 .procname = "ratelimit",
1175 .data = &init_net.ipv6.sysctl.icmpv6_time,
1176 .maxlen = sizeof(int),
1177 .mode = 0644,
1178 .proc_handler = proc_dointvec_ms_jiffies,
1181 .procname = "echo_ignore_all",
1182 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1183 .maxlen = sizeof(u8),
1184 .mode = 0644,
1185 .proc_handler = proc_dou8vec_minmax,
1188 .procname = "echo_ignore_multicast",
1189 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1190 .maxlen = sizeof(u8),
1191 .mode = 0644,
1192 .proc_handler = proc_dou8vec_minmax,
1195 .procname = "echo_ignore_anycast",
1196 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1197 .maxlen = sizeof(u8),
1198 .mode = 0644,
1199 .proc_handler = proc_dou8vec_minmax,
1202 .procname = "ratemask",
1203 .data = &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1204 .maxlen = ICMPV6_MSG_MAX + 1,
1205 .mode = 0644,
1206 .proc_handler = proc_do_large_bitmap,
1209 .procname = "error_anycast_as_unicast",
1210 .data = &init_net.ipv6.sysctl.icmpv6_error_anycast_as_unicast,
1211 .maxlen = sizeof(u8),
1212 .mode = 0644,
1213 .proc_handler = proc_dou8vec_minmax,
1214 .extra1 = SYSCTL_ZERO,
1215 .extra2 = SYSCTL_ONE,
1219 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1221 struct ctl_table *table;
1223 table = kmemdup(ipv6_icmp_table_template,
1224 sizeof(ipv6_icmp_table_template),
1225 GFP_KERNEL);
1227 if (table) {
1228 table[0].data = &net->ipv6.sysctl.icmpv6_time;
1229 table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1230 table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1231 table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1232 table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1233 table[5].data = &net->ipv6.sysctl.icmpv6_error_anycast_as_unicast;
1235 return table;
1238 size_t ipv6_icmp_sysctl_table_size(void)
1240 return ARRAY_SIZE(ipv6_icmp_table_template);
1242 #endif