2 * Internet Control Message Protocol (ICMPv6)
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: icmp.c,v 1.38 2002/02/08 03:57:19 davem Exp $
10 * Based on net/ipv4/icmp.c
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
23 * Andi Kleen : exception handling
24 * Andi Kleen add rate limits. never reply to a icmp.
25 * add more length checks and other fixes.
26 * yoshfuji : ensure to sent parameter problem for
28 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
30 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
31 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
39 #include <linux/kernel.h>
40 #include <linux/sched.h>
41 #include <linux/sockios.h>
42 #include <linux/net.h>
43 #include <linux/skbuff.h>
44 #include <linux/init.h>
47 #include <linux/sysctl.h>
50 #include <linux/inet.h>
51 #include <linux/netdevice.h>
52 #include <linux/icmpv6.h>
58 #include <net/ip6_checksum.h>
59 #include <net/protocol.h>
61 #include <net/rawv6.h>
62 #include <net/transp_v6.h>
63 #include <net/ip6_route.h>
64 #include <net/addrconf.h>
67 #include <asm/uaccess.h>
68 #include <asm/system.h>
70 DEFINE_SNMP_STAT(struct icmpv6_mib
, icmpv6_statistics
);
73 * The ICMP socket(s). This is the most convenient way to flow control
74 * our ICMP output as well as maintain a clean interface throughout
75 * all layers. All Socketless IP sends will soon be gone.
77 * On SMP we have one ICMP socket per-cpu.
79 static DEFINE_PER_CPU(struct socket
*, __icmpv6_socket
) = NULL
;
80 #define icmpv6_socket __get_cpu_var(__icmpv6_socket)
82 static int icmpv6_rcv(struct sk_buff
**pskb
, unsigned int *nhoffp
);
84 static struct inet6_protocol icmpv6_protocol
= {
85 .handler
= icmpv6_rcv
,
86 .flags
= INET6_PROTO_FINAL
,
89 static __inline__
int icmpv6_xmit_lock(void)
93 if (unlikely(!spin_trylock(&icmpv6_socket
->sk
->sk_lock
.slock
))) {
94 /* This can happen if the output path (f.e. SIT or
95 * ip6ip6 tunnel) signals dst_link_failure() for an
96 * outgoing ICMP6 packet.
104 static __inline__
void icmpv6_xmit_unlock(void)
106 spin_unlock_bh(&icmpv6_socket
->sk
->sk_lock
.slock
);
110 * Slightly more convenient version of icmpv6_send.
112 void icmpv6_param_prob(struct sk_buff
*skb
, int code
, int pos
)
114 icmpv6_send(skb
, ICMPV6_PARAMPROB
, code
, pos
, skb
->dev
);
119 * Figure out, may we reply to this packet with icmp error.
121 * We do not reply, if:
122 * - it was icmp error message.
123 * - it is truncated, so that it is known, that protocol is ICMPV6
124 * (i.e. in the middle of some exthdr)
129 static int is_ineligible(struct sk_buff
*skb
)
131 int ptr
= (u8
*)(skb
->nh
.ipv6h
+1) - skb
->data
;
132 int len
= skb
->len
- ptr
;
133 __u8 nexthdr
= skb
->nh
.ipv6h
->nexthdr
;
138 ptr
= ipv6_skip_exthdr(skb
, ptr
, &nexthdr
, len
);
141 if (nexthdr
== IPPROTO_ICMPV6
) {
143 tp
= skb_header_pointer(skb
,
144 ptr
+offsetof(struct icmp6hdr
, icmp6_type
),
145 sizeof(_type
), &_type
);
147 !(*tp
& ICMPV6_INFOMSG_MASK
))
153 static int sysctl_icmpv6_time
= 1*HZ
;
156 * Check the ICMP output rate limit
158 static inline int icmpv6_xrlim_allow(struct sock
*sk
, int type
,
161 struct dst_entry
*dst
;
164 /* Informational messages are not limited. */
165 if (type
& ICMPV6_INFOMSG_MASK
)
168 /* Do not limit pmtu discovery, it would break it. */
169 if (type
== ICMPV6_PKT_TOOBIG
)
173 * Look up the output route.
174 * XXX: perhaps the expire for routing entries cloned by
175 * this lookup should be more aggressive (not longer than timeout).
177 dst
= ip6_route_output(sk
, fl
);
179 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES
);
180 } else if (dst
->dev
&& (dst
->dev
->flags
&IFF_LOOPBACK
)) {
183 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
184 int tmo
= sysctl_icmpv6_time
;
186 /* Give more bandwidth to wider prefixes. */
187 if (rt
->rt6i_dst
.plen
< 128)
188 tmo
>>= ((128 - rt
->rt6i_dst
.plen
)>>5);
190 res
= xrlim_allow(dst
, tmo
);
197 * an inline helper for the "simple" if statement below
198 * checks if parameter problem report is caused by an
199 * unrecognized IPv6 option that has the Option Type
200 * highest-order two bits set to 10
203 static __inline__
int opt_unrec(struct sk_buff
*skb
, __u32 offset
)
207 offset
+= skb
->nh
.raw
- skb
->data
;
208 op
= skb_header_pointer(skb
, offset
, sizeof(_optval
), &_optval
);
211 return (*op
& 0xC0) == 0x80;
214 static int icmpv6_push_pending_frames(struct sock
*sk
, struct flowi
*fl
, struct icmp6hdr
*thdr
, int len
)
217 struct icmp6hdr
*icmp6h
;
220 if ((skb
= skb_peek(&sk
->sk_write_queue
)) == NULL
)
223 icmp6h
= (struct icmp6hdr
*) skb
->h
.raw
;
224 memcpy(icmp6h
, thdr
, sizeof(struct icmp6hdr
));
225 icmp6h
->icmp6_cksum
= 0;
227 if (skb_queue_len(&sk
->sk_write_queue
) == 1) {
228 skb
->csum
= csum_partial((char *)icmp6h
,
229 sizeof(struct icmp6hdr
), skb
->csum
);
230 icmp6h
->icmp6_cksum
= csum_ipv6_magic(&fl
->fl6_src
,
237 skb_queue_walk(&sk
->sk_write_queue
, skb
) {
238 tmp_csum
= csum_add(tmp_csum
, skb
->csum
);
241 tmp_csum
= csum_partial((char *)icmp6h
,
242 sizeof(struct icmp6hdr
), tmp_csum
);
243 tmp_csum
= csum_ipv6_magic(&fl
->fl6_src
,
245 len
, fl
->proto
, tmp_csum
);
246 icmp6h
->icmp6_cksum
= tmp_csum
;
248 if (icmp6h
->icmp6_cksum
== 0)
249 icmp6h
->icmp6_cksum
= -1;
250 ip6_push_pending_frames(sk
);
260 static int icmpv6_getfrag(void *from
, char *to
, int offset
, int len
, int odd
, struct sk_buff
*skb
)
262 struct icmpv6_msg
*msg
= (struct icmpv6_msg
*) from
;
263 struct sk_buff
*org_skb
= msg
->skb
;
266 csum
= skb_copy_and_csum_bits(org_skb
, msg
->offset
+ offset
,
268 skb
->csum
= csum_block_add(skb
->csum
, csum
, odd
);
273 * Send an ICMP message in response to a packet in error
275 void icmpv6_send(struct sk_buff
*skb
, int type
, int code
, __u32 info
,
276 struct net_device
*dev
)
278 struct inet6_dev
*idev
= NULL
;
279 struct ipv6hdr
*hdr
= skb
->nh
.ipv6h
;
280 struct sock
*sk
= icmpv6_socket
->sk
;
281 struct ipv6_pinfo
*np
= inet6_sk(sk
);
282 struct in6_addr
*saddr
= NULL
;
283 struct dst_entry
*dst
;
284 struct icmp6hdr tmp_hdr
;
286 struct icmpv6_msg msg
;
293 if ((u8
*)hdr
< skb
->head
|| (u8
*)(hdr
+1) > skb
->tail
)
297 * Make sure we respect the rules
298 * i.e. RFC 1885 2.4(e)
299 * Rule (e.1) is enforced by not using icmpv6_send
300 * in any code that processes icmp errors.
302 addr_type
= ipv6_addr_type(&hdr
->daddr
);
304 if (ipv6_chk_addr(&hdr
->daddr
, skb
->dev
, 0))
311 if ((addr_type
& IPV6_ADDR_MULTICAST
|| skb
->pkt_type
!= PACKET_HOST
)) {
312 if (type
!= ICMPV6_PKT_TOOBIG
&&
313 !(type
== ICMPV6_PARAMPROB
&&
314 code
== ICMPV6_UNK_OPTION
&&
315 (opt_unrec(skb
, info
))))
321 addr_type
= ipv6_addr_type(&hdr
->saddr
);
327 if (addr_type
& IPV6_ADDR_LINKLOCAL
)
328 iif
= skb
->dev
->ifindex
;
331 * Must not send if we know that source is Anycast also.
332 * for now we don't know that.
334 if ((addr_type
== IPV6_ADDR_ANY
) || (addr_type
& IPV6_ADDR_MULTICAST
)) {
336 printk(KERN_DEBUG
"icmpv6_send: addr_any/mcast source\n"));
341 * Never answer to a ICMP packet.
343 if (is_ineligible(skb
)) {
345 printk(KERN_DEBUG
"icmpv6_send: no reply to icmp error\n"));
349 memset(&fl
, 0, sizeof(fl
));
350 fl
.proto
= IPPROTO_ICMPV6
;
351 ipv6_addr_copy(&fl
.fl6_dst
, &hdr
->saddr
);
353 ipv6_addr_copy(&fl
.fl6_src
, saddr
);
355 fl
.fl_icmp_type
= type
;
356 fl
.fl_icmp_code
= code
;
358 if (icmpv6_xmit_lock())
361 if (!icmpv6_xrlim_allow(sk
, type
, &fl
))
364 tmp_hdr
.icmp6_type
= type
;
365 tmp_hdr
.icmp6_code
= code
;
366 tmp_hdr
.icmp6_cksum
= 0;
367 tmp_hdr
.icmp6_pointer
= htonl(info
);
369 if (!fl
.oif
&& ipv6_addr_is_multicast(&fl
.fl6_dst
))
370 fl
.oif
= np
->mcast_oif
;
372 err
= ip6_dst_lookup(sk
, &dst
, &fl
);
375 if ((err
= xfrm_lookup(&dst
, &fl
, sk
, 0)) < 0)
376 goto out_dst_release
;
378 if (ipv6_addr_is_multicast(&fl
.fl6_dst
))
379 hlimit
= np
->mcast_hops
;
381 hlimit
= np
->hop_limit
;
383 hlimit
= dst_metric(dst
, RTAX_HOPLIMIT
);
385 hlimit
= ipv6_get_hoplimit(dst
->dev
);
388 msg
.offset
= skb
->nh
.raw
- skb
->data
;
390 len
= skb
->len
- msg
.offset
;
391 len
= min_t(unsigned int, len
, IPV6_MIN_MTU
- sizeof(struct ipv6hdr
) -sizeof(struct icmp6hdr
));
394 printk(KERN_DEBUG
"icmp: len problem\n"));
395 goto out_dst_release
;
398 idev
= in6_dev_get(skb
->dev
);
400 err
= ip6_append_data(sk
, icmpv6_getfrag
, &msg
,
401 len
+ sizeof(struct icmp6hdr
),
402 sizeof(struct icmp6hdr
),
403 hlimit
, NULL
, &fl
, (struct rt6_info
*)dst
,
406 ip6_flush_pending_frames(sk
);
409 err
= icmpv6_push_pending_frames(sk
, &fl
, &tmp_hdr
, len
+ sizeof(struct icmp6hdr
));
411 if (type
>= ICMPV6_DEST_UNREACH
&& type
<= ICMPV6_PARAMPROB
)
412 ICMP6_INC_STATS_OFFSET_BH(idev
, ICMP6_MIB_OUTDESTUNREACHS
, type
- ICMPV6_DEST_UNREACH
);
413 ICMP6_INC_STATS_BH(idev
, ICMP6_MIB_OUTMSGS
);
416 if (likely(idev
!= NULL
))
421 icmpv6_xmit_unlock();
424 static void icmpv6_echo_reply(struct sk_buff
*skb
)
426 struct sock
*sk
= icmpv6_socket
->sk
;
427 struct inet6_dev
*idev
;
428 struct ipv6_pinfo
*np
= inet6_sk(sk
);
429 struct in6_addr
*saddr
= NULL
;
430 struct icmp6hdr
*icmph
= (struct icmp6hdr
*) skb
->h
.raw
;
431 struct icmp6hdr tmp_hdr
;
433 struct icmpv6_msg msg
;
434 struct dst_entry
*dst
;
438 saddr
= &skb
->nh
.ipv6h
->daddr
;
440 if (!ipv6_unicast_destination(skb
))
443 memcpy(&tmp_hdr
, icmph
, sizeof(tmp_hdr
));
444 tmp_hdr
.icmp6_type
= ICMPV6_ECHO_REPLY
;
446 memset(&fl
, 0, sizeof(fl
));
447 fl
.proto
= IPPROTO_ICMPV6
;
448 ipv6_addr_copy(&fl
.fl6_dst
, &skb
->nh
.ipv6h
->saddr
);
450 ipv6_addr_copy(&fl
.fl6_src
, saddr
);
451 fl
.oif
= skb
->dev
->ifindex
;
452 fl
.fl_icmp_type
= ICMPV6_ECHO_REPLY
;
454 if (icmpv6_xmit_lock())
457 if (!fl
.oif
&& ipv6_addr_is_multicast(&fl
.fl6_dst
))
458 fl
.oif
= np
->mcast_oif
;
460 err
= ip6_dst_lookup(sk
, &dst
, &fl
);
463 if ((err
= xfrm_lookup(&dst
, &fl
, sk
, 0)) < 0)
464 goto out_dst_release
;
466 if (ipv6_addr_is_multicast(&fl
.fl6_dst
))
467 hlimit
= np
->mcast_hops
;
469 hlimit
= np
->hop_limit
;
471 hlimit
= dst_metric(dst
, RTAX_HOPLIMIT
);
473 hlimit
= ipv6_get_hoplimit(dst
->dev
);
475 idev
= in6_dev_get(skb
->dev
);
480 err
= ip6_append_data(sk
, icmpv6_getfrag
, &msg
, skb
->len
+ sizeof(struct icmp6hdr
),
481 sizeof(struct icmp6hdr
), hlimit
, NULL
, &fl
,
482 (struct rt6_info
*)dst
, MSG_DONTWAIT
);
485 ip6_flush_pending_frames(sk
);
488 err
= icmpv6_push_pending_frames(sk
, &fl
, &tmp_hdr
, skb
->len
+ sizeof(struct icmp6hdr
));
490 ICMP6_INC_STATS_BH(idev
, ICMP6_MIB_OUTECHOREPLIES
);
491 ICMP6_INC_STATS_BH(idev
, ICMP6_MIB_OUTMSGS
);
494 if (likely(idev
!= NULL
))
499 icmpv6_xmit_unlock();
502 static void icmpv6_notify(struct sk_buff
*skb
, int type
, int code
, u32 info
)
504 struct in6_addr
*saddr
, *daddr
;
505 struct inet6_protocol
*ipprot
;
511 if (!pskb_may_pull(skb
, sizeof(struct ipv6hdr
)))
514 nexthdr
= ((struct ipv6hdr
*)skb
->data
)->nexthdr
;
515 if (ipv6_ext_hdr(nexthdr
)) {
516 /* now skip over extension headers */
517 inner_offset
= ipv6_skip_exthdr(skb
, sizeof(struct ipv6hdr
), &nexthdr
, skb
->len
- sizeof(struct ipv6hdr
));
521 inner_offset
= sizeof(struct ipv6hdr
);
524 /* Checkin header including 8 bytes of inner protocol header. */
525 if (!pskb_may_pull(skb
, inner_offset
+8))
528 saddr
= &skb
->nh
.ipv6h
->saddr
;
529 daddr
= &skb
->nh
.ipv6h
->daddr
;
531 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
532 Without this we will not able f.e. to make source routed
534 Corresponding argument (opt) to notifiers is already added.
538 hash
= nexthdr
& (MAX_INET_PROTOS
- 1);
541 ipprot
= rcu_dereference(inet6_protos
[hash
]);
542 if (ipprot
&& ipprot
->err_handler
)
543 ipprot
->err_handler(skb
, NULL
, type
, code
, inner_offset
, info
);
546 read_lock(&raw_v6_lock
);
547 if ((sk
= sk_head(&raw_v6_htable
[hash
])) != NULL
) {
548 while((sk
= __raw_v6_lookup(sk
, nexthdr
, daddr
, saddr
))) {
549 rawv6_err(sk
, skb
, NULL
, type
, code
, inner_offset
, info
);
553 read_unlock(&raw_v6_lock
);
557 * Handle icmp messages
560 static int icmpv6_rcv(struct sk_buff
**pskb
, unsigned int *nhoffp
)
562 struct sk_buff
*skb
= *pskb
;
563 struct net_device
*dev
= skb
->dev
;
564 struct inet6_dev
*idev
= __in6_dev_get(dev
);
565 struct in6_addr
*saddr
, *daddr
;
566 struct ipv6hdr
*orig_hdr
;
567 struct icmp6hdr
*hdr
;
570 ICMP6_INC_STATS_BH(idev
, ICMP6_MIB_INMSGS
);
572 saddr
= &skb
->nh
.ipv6h
->saddr
;
573 daddr
= &skb
->nh
.ipv6h
->daddr
;
575 /* Perform checksum. */
576 if (skb
->ip_summed
== CHECKSUM_HW
) {
577 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
578 if (csum_ipv6_magic(saddr
, daddr
, skb
->len
, IPPROTO_ICMPV6
,
581 printk(KERN_DEBUG
"ICMPv6 hw checksum failed\n"));
582 skb
->ip_summed
= CHECKSUM_NONE
;
585 if (skb
->ip_summed
== CHECKSUM_NONE
) {
586 if (csum_ipv6_magic(saddr
, daddr
, skb
->len
, IPPROTO_ICMPV6
,
587 skb_checksum(skb
, 0, skb
->len
, 0))) {
589 printk(KERN_DEBUG
"ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n",
590 NIP6(*saddr
), NIP6(*daddr
)));
595 if (!pskb_pull(skb
, sizeof(struct icmp6hdr
)))
598 hdr
= (struct icmp6hdr
*) skb
->h
.raw
;
600 type
= hdr
->icmp6_type
;
602 if (type
>= ICMPV6_DEST_UNREACH
&& type
<= ICMPV6_PARAMPROB
)
603 ICMP6_INC_STATS_OFFSET_BH(idev
, ICMP6_MIB_INDESTUNREACHS
, type
- ICMPV6_DEST_UNREACH
);
604 else if (type
>= ICMPV6_ECHO_REQUEST
&& type
<= NDISC_REDIRECT
)
605 ICMP6_INC_STATS_OFFSET_BH(idev
, ICMP6_MIB_INECHOS
, type
- ICMPV6_ECHO_REQUEST
);
608 case ICMPV6_ECHO_REQUEST
:
609 icmpv6_echo_reply(skb
);
612 case ICMPV6_ECHO_REPLY
:
613 /* we couldn't care less */
616 case ICMPV6_PKT_TOOBIG
:
617 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
618 standard destination cache. Seems, only "advanced"
619 destination cache will allow to solve this problem
622 if (!pskb_may_pull(skb
, sizeof(struct ipv6hdr
)))
624 hdr
= (struct icmp6hdr
*) skb
->h
.raw
;
625 orig_hdr
= (struct ipv6hdr
*) (hdr
+ 1);
626 rt6_pmtu_discovery(&orig_hdr
->daddr
, &orig_hdr
->saddr
, dev
,
627 ntohl(hdr
->icmp6_mtu
));
630 * Drop through to notify
633 case ICMPV6_DEST_UNREACH
:
634 case ICMPV6_TIME_EXCEED
:
635 case ICMPV6_PARAMPROB
:
636 icmpv6_notify(skb
, type
, hdr
->icmp6_code
, hdr
->icmp6_mtu
);
639 case NDISC_ROUTER_SOLICITATION
:
640 case NDISC_ROUTER_ADVERTISEMENT
:
641 case NDISC_NEIGHBOUR_SOLICITATION
:
642 case NDISC_NEIGHBOUR_ADVERTISEMENT
:
647 case ICMPV6_MGM_QUERY
:
648 igmp6_event_query(skb
);
651 case ICMPV6_MGM_REPORT
:
652 igmp6_event_report(skb
);
655 case ICMPV6_MGM_REDUCTION
:
656 case ICMPV6_NI_QUERY
:
657 case ICMPV6_NI_REPLY
:
658 case ICMPV6_MLD2_REPORT
:
659 case ICMPV6_DHAAD_REQUEST
:
660 case ICMPV6_DHAAD_REPLY
:
661 case ICMPV6_MOBILE_PREFIX_SOL
:
662 case ICMPV6_MOBILE_PREFIX_ADV
:
667 printk(KERN_DEBUG
"icmpv6: msg of unknown type\n"));
670 if (type
& ICMPV6_INFOMSG_MASK
)
674 * error of unknown type.
675 * must pass to upper level
678 icmpv6_notify(skb
, type
, hdr
->icmp6_code
, hdr
->icmp6_mtu
);
684 ICMP6_INC_STATS_BH(idev
, ICMP6_MIB_INERRORS
);
689 int __init
icmpv6_init(struct net_proto_family
*ops
)
694 for (i
= 0; i
< NR_CPUS
; i
++) {
695 if (!cpu_possible(i
))
698 err
= sock_create_kern(PF_INET6
, SOCK_RAW
, IPPROTO_ICMPV6
,
699 &per_cpu(__icmpv6_socket
, i
));
702 "Failed to initialize the ICMP6 control socket "
708 sk
= per_cpu(__icmpv6_socket
, i
)->sk
;
709 sk
->sk_allocation
= GFP_ATOMIC
;
711 /* Enough space for 2 64K ICMP packets, including
712 * sk_buff struct overhead.
715 (2 * ((64 * 1024) + sizeof(struct sk_buff
)));
717 sk
->sk_prot
->unhash(sk
);
721 if (inet6_add_protocol(&icmpv6_protocol
, IPPROTO_ICMPV6
) < 0) {
722 printk(KERN_ERR
"Failed to register ICMP6 protocol\n");
730 for (j
= 0; j
< i
; j
++) {
731 if (!cpu_possible(j
))
733 sock_release(per_cpu(__icmpv6_socket
, j
));
739 void icmpv6_cleanup(void)
743 for (i
= 0; i
< NR_CPUS
; i
++) {
744 if (!cpu_possible(i
))
746 sock_release(per_cpu(__icmpv6_socket
, i
));
748 inet6_del_protocol(&icmpv6_protocol
, IPPROTO_ICMPV6
);
751 static struct icmp6_err
{
759 { /* ADM_PROHIBITED */
763 { /* Was NOT_NEIGHBOUR, now reserved */
777 int icmpv6_err_convert(int type
, int code
, int *err
)
784 case ICMPV6_DEST_UNREACH
:
786 if (code
<= ICMPV6_PORT_UNREACH
) {
787 *err
= tab_unreach
[code
].err
;
788 fatal
= tab_unreach
[code
].fatal
;
792 case ICMPV6_PKT_TOOBIG
:
796 case ICMPV6_PARAMPROB
:
801 case ICMPV6_TIME_EXCEED
:
810 ctl_table ipv6_icmp_table
[] = {
812 .ctl_name
= NET_IPV6_ICMP_RATELIMIT
,
813 .procname
= "ratelimit",
814 .data
= &sysctl_icmpv6_time
,
815 .maxlen
= sizeof(int),
817 .proc_handler
= &proc_dointvec