1 /* $NetBSD: ip6_forward.c,v 1.66 2009/03/18 16:00:23 cegger Exp $ */
2 /* $KAME: ip6_forward.c,v 1.109 2002/09/11 08:10:17 sakane Exp $ */
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: ip6_forward.c,v 1.66 2009/03/18 16:00:23 cegger Exp $");
36 #include "opt_ipsec.h"
37 #include "opt_pfil_hooks.h"
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/malloc.h>
43 #include <sys/domain.h>
44 #include <sys/protosw.h>
45 #include <sys/socket.h>
46 #include <sys/errno.h>
48 #include <sys/kernel.h>
49 #include <sys/syslog.h>
52 #include <net/route.h>
54 #include <netinet/in.h>
55 #include <netinet/in_var.h>
56 #include <netinet/ip_var.h>
57 #include <netinet/ip6.h>
58 #include <netinet6/ip6_var.h>
59 #include <netinet6/ip6_private.h>
60 #include <netinet6/scope6_var.h>
61 #include <netinet/icmp6.h>
62 #include <netinet6/nd6.h>
65 #include <netinet6/ipsec.h>
66 #include <netinet6/ipsec_private.h>
67 #include <netkey/key.h>
71 #include <netipsec/ipsec.h>
72 #include <netipsec/ipsec6.h>
73 #include <netipsec/key.h>
74 #include <netipsec/xform.h>
75 #endif /* FAST_IPSEC */
81 #include <net/net_osdep.h>
83 struct route ip6_forward_rt
;
86 extern struct pfil_head inet6_pfil_hook
; /* XXX */
90 * Forward a packet. If some error occurs return the sender
91 * an icmp packet. Note we can't always generate a meaningful
92 * icmp message because icmp doesn't have a large enough repertoire
95 * If not forwarding, just drop the packet. This could be confusing
96 * if ipforwarding was zero but some routing protocol was advancing
97 * us as a gateway to somewhere. However, we must let the routing
98 * protocol deal with that.
103 ip6_forward(struct mbuf
*m
, int srcrt
)
105 struct ip6_hdr
*ip6
= mtod(m
, struct ip6_hdr
*);
106 const struct sockaddr_in6
*dst
;
108 int error
= 0, type
= 0, code
= 0;
109 struct mbuf
*mcopy
= NULL
;
110 struct ifnet
*origifp
; /* maybe unnecessary */
111 u_int32_t inzone
, outzone
;
112 struct in6_addr src_in6
, dst_in6
;
114 struct secpolicy
*sp
= NULL
;
118 struct secpolicy
*sp
= NULL
;
124 * Clear any in-bound checksum flags for this packet.
126 m
->m_pkthdr
.csum_flags
= 0;
130 * Check AH/ESP integrity.
133 * Don't increment ip6s_cantforward because this is the check
134 * before forwarding packet actually.
136 if (ipsec6_in_reject(m
, NULL
)) {
137 IPSEC6_STATINC(IPSEC_STAT_IN_POLVIO
);
144 * Do not forward packets to multicast destination (should be handled
146 * Do not forward packets with unspecified source. It was discussed
147 * in July 2000, on ipngwg mailing list.
149 if ((m
->m_flags
& (M_BCAST
|M_MCAST
)) != 0 ||
150 IN6_IS_ADDR_MULTICAST(&ip6
->ip6_dst
) ||
151 IN6_IS_ADDR_UNSPECIFIED(&ip6
->ip6_src
)) {
152 IP6_STATINC(IP6_STAT_CANTFORWARD
);
153 /* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */
154 if (ip6_log_time
+ ip6_log_interval
< time_second
) {
155 ip6_log_time
= time_second
;
158 "from %s to %s nxt %d received on %s\n",
159 ip6_sprintf(&ip6
->ip6_src
),
160 ip6_sprintf(&ip6
->ip6_dst
),
162 if_name(m
->m_pkthdr
.rcvif
));
168 if (ip6
->ip6_hlim
<= IPV6_HLIMDEC
) {
169 /* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */
170 icmp6_error(m
, ICMP6_TIME_EXCEEDED
,
171 ICMP6_TIME_EXCEED_TRANSIT
, 0);
174 ip6
->ip6_hlim
-= IPV6_HLIMDEC
;
177 * Save at most ICMPV6_PLD_MAXLEN (= the min IPv6 MTU -
178 * size of IPv6 + ICMPv6 headers) bytes of the packet in case
179 * we need to generate an ICMP6 message to the src.
180 * Thanks to M_EXT, in most cases copy will not occur.
182 * It is important to save it before IPsec processing as IPsec
183 * processing may modify the mbuf.
185 mcopy
= m_copy(m
, 0, imin(m
->m_pkthdr
.len
, ICMPV6_PLD_MAXLEN
));
188 /* get a security policy for this packet */
189 sp
= ipsec6_getpolicybyaddr(m
, IPSEC_DIR_OUTBOUND
,
190 IP_FORWARDING
, &error
);
192 IPSEC6_STATINC(IPSEC_STAT_OUT_INVAL
);
193 IP6_STATINC(IP6_STAT_CANTFORWARD
);
196 /* XXX: what icmp ? */
208 switch (sp
->policy
) {
209 case IPSEC_POLICY_DISCARD
:
211 * This packet is just discarded.
213 IPSEC6_STATINC(IPSEC_STAT_OUT_POLVIO
);
214 IP6_STATINC(IP6_STAT_CANTFORWARD
);
218 /* XXX: what icmp ? */
226 case IPSEC_POLICY_BYPASS
:
227 case IPSEC_POLICY_NONE
:
228 /* no need to do IPsec. */
232 case IPSEC_POLICY_IPSEC
:
233 if (sp
->req
== NULL
) {
234 /* XXX should be panic ? */
235 printf("ip6_forward: No IPsec request specified.\n");
236 IP6_STATINC(IP6_STAT_CANTFORWARD
);
240 /* XXX: what icmp ? */
251 case IPSEC_POLICY_ENTRUST
:
253 /* should be panic ?? */
254 printf("ip6_forward: Invalid policy found. %d\n", sp
->policy
);
260 struct ipsecrequest
*isr
= NULL
;
261 struct ipsec_output_state state
;
264 * when the kernel forwards a packet, it is not proper to apply
265 * IPsec transport mode to the packet is not proper. this check
267 * at present, if there is even a transport mode SA request in the
268 * security policy, the kernel does not apply IPsec to the packet.
269 * this check is not enough because the following case is valid.
270 * ipsec esp/tunnel/xxx-xxx/require esp/transport//require;
272 for (isr
= sp
->req
; isr
; isr
= isr
->next
) {
273 if (isr
->saidx
.mode
== IPSEC_MODE_ANY
)
275 if (isr
->saidx
.mode
== IPSEC_MODE_TUNNEL
)
280 * if there's no need for tunnel mode IPsec, skip.
287 * All the extension headers will become inaccessible
288 * (since they can be encrypted).
289 * Don't panic, we need no more updates to extension headers
290 * on inner IPv6 packet (since they are now encapsulated).
292 * IPv6 [ESP|AH] IPv6 [extension headers] payload
294 memset(&state
, 0, sizeof(state
));
296 state
.ro
= NULL
; /* update at ipsec6_output_tunnel() */
297 state
.dst
= NULL
; /* update at ipsec6_output_tunnel() */
299 error
= ipsec6_output_tunnel(&state
, sp
, 0);
305 /* mbuf is already reclaimed in ipsec6_output_tunnel. */
314 printf("ip6_forward (ipsec): error code %d\n", error
);
317 /* don't show these error codes to the user */
320 IP6_STATINC(IP6_STAT_CANTFORWARD
);
323 /* XXX: what icmp ? */
332 if (ip6
!= mtod(m
, struct ip6_hdr
*)) {
334 * now tunnel mode headers are added. we are originating
335 * packet instead of forwarding the packet.
337 ip6_output(m
, NULL
, NULL
, IPV6_FORWARDING
/*XXX*/, NULL
, NULL
,
343 rt
= state
.ro
? rtcache_validate(state
.ro
) : NULL
;
344 dst
= (const struct sockaddr_in6
*)state
.dst
;
345 if (dst
!= NULL
&& rt
!= NULL
) {
353 /* Check the security policy (SP) for the packet */
355 sp
= ipsec6_check_policy(m
,NULL
,0,&needipsec
,&error
);
358 * Hack: -EINVAL is used to signal that a packet
359 * should be silently discarded. This is typically
360 * because we asked key management for an SA and
361 * it was delayed (e.g. kicked up to IKE).
363 if (error
== -EINVAL
)
367 #endif /* FAST_IPSEC */
372 struct sockaddr_in6 dst6
;
375 sockaddr_in6_init(&u
.dst6
, &ip6
->ip6_dst
, 0, 0, 0);
376 if ((rt
= rtcache_lookup(&ip6_forward_rt
, &u
.dst
)) == NULL
) {
377 IP6_STATINC(IP6_STAT_NOROUTE
);
378 /* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_noroute) */
380 icmp6_error(mcopy
, ICMP6_DST_UNREACH
,
381 ICMP6_DST_UNREACH_NOROUTE
, 0);
386 } else if ((rt
= rtcache_validate(&ip6_forward_rt
)) == NULL
&&
387 (rt
= rtcache_update(&ip6_forward_rt
, 1)) == NULL
) {
389 * rtcache_getdst(ip6_forward_rt)->sin6_addr was equal to
392 IP6_STATINC(IP6_STAT_NOROUTE
);
393 /* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_noroute) */
395 icmp6_error(mcopy
, ICMP6_DST_UNREACH
,
396 ICMP6_DST_UNREACH_NOROUTE
, 0);
401 dst
= satocsin6(rtcache_getdst(&ip6_forward_rt
));
407 * Source scope check: if a packet can't be delivered to its
408 * destination for the reason that the destination is beyond the scope
409 * of the source address, discard the packet and return an icmp6
410 * destination unreachable error with Code 2 (beyond scope of source
411 * address). We use a local copy of ip6_src, since in6_setscope()
412 * will possibly modify its first argument.
413 * [draft-ietf-ipngwg-icmp-v3-07, Section 3.1]
415 src_in6
= ip6
->ip6_src
;
416 if (in6_setscope(&src_in6
, rt
->rt_ifp
, &outzone
)) {
417 /* XXX: this should not happen */
418 uint64_t *ip6s
= IP6_STAT_GETREF();
419 ip6s
[IP6_STAT_CANTFORWARD
]++;
420 ip6s
[IP6_STAT_BADSCOPE
]++;
425 if (in6_setscope(&src_in6
, m
->m_pkthdr
.rcvif
, &inzone
)) {
426 uint64_t *ip6s
= IP6_STAT_GETREF();
427 ip6s
[IP6_STAT_CANTFORWARD
]++;
428 ip6s
[IP6_STAT_BADSCOPE
]++;
433 if (inzone
!= outzone
438 uint64_t *ip6s
= IP6_STAT_GETREF();
439 ip6s
[IP6_STAT_CANTFORWARD
]++;
440 ip6s
[IP6_STAT_BADSCOPE
]++;
442 in6_ifstat_inc(rt
->rt_ifp
, ifs6_in_discard
);
444 if (ip6_log_time
+ ip6_log_interval
< time_second
) {
445 ip6_log_time
= time_second
;
448 "src %s, dst %s, nxt %d, rcvif %s, outif %s\n",
449 ip6_sprintf(&ip6
->ip6_src
),
450 ip6_sprintf(&ip6
->ip6_dst
),
452 if_name(m
->m_pkthdr
.rcvif
), if_name(rt
->rt_ifp
));
455 icmp6_error(mcopy
, ICMP6_DST_UNREACH
,
456 ICMP6_DST_UNREACH_BEYONDSCOPE
, 0);
462 * If we need to encapsulate the packet, do it here
463 * ipsec6_proces_packet will send the packet using ip6_output
467 error
= ipsec6_process_packet(m
,sp
->req
);
477 * Destination scope check: if a packet is going to break the scope
478 * zone of packet's destination address, discard it. This case should
479 * usually be prevented by appropriately-configured routing table, but
480 * we need an explicit check because we may mistakenly forward the
481 * packet to a different zone by (e.g.) a default route.
483 dst_in6
= ip6
->ip6_dst
;
484 if (in6_setscope(&dst_in6
, m
->m_pkthdr
.rcvif
, &inzone
) != 0 ||
485 in6_setscope(&dst_in6
, rt
->rt_ifp
, &outzone
) != 0 ||
487 uint64_t *ip6s
= IP6_STAT_GETREF();
488 ip6s
[IP6_STAT_CANTFORWARD
]++;
489 ip6s
[IP6_STAT_BADSCOPE
]++;
495 if (m
->m_pkthdr
.len
> IN6_LINKMTU(rt
->rt_ifp
)) {
496 in6_ifstat_inc(rt
->rt_ifp
, ifs6_in_toobig
);
500 struct secpolicy
*xsp
;
505 mtu
= IN6_LINKMTU(rt
->rt_ifp
);
508 * When we do IPsec tunnel ingress, we need to play
509 * with the link value (decrement IPsec header size
510 * from mtu value). The code is much simpler than v4
511 * case, as we have the outgoing interface for
512 * encapsulated packet as "rt->rt_ifp".
514 xsp
= ipsec6_getpolicybyaddr(mcopy
, IPSEC_DIR_OUTBOUND
,
515 IP_FORWARDING
, &ipsecerror
);
517 ipsechdrsiz
= ipsec6_hdrsiz(mcopy
,
518 IPSEC_DIR_OUTBOUND
, NULL
);
519 if (ipsechdrsiz
< mtu
)
524 * if mtu becomes less than minimum MTU,
525 * tell minimum MTU (and I'll need to fragment it).
530 icmp6_error(mcopy
, ICMP6_PACKET_TOO_BIG
, 0, mtu
);
536 if (rt
->rt_flags
& RTF_GATEWAY
)
537 dst
= (struct sockaddr_in6
*)rt
->rt_gateway
;
540 * If we are to forward the packet using the same interface
541 * as one we got the packet from, perhaps we should send a redirect
542 * to sender to shortcut a hop.
543 * Only send redirect if source is sending directly to us,
544 * and if packet was not source routed (or has any options).
545 * Also, don't send redirect if forwarding using a route
546 * modified by a redirect.
548 if (rt
->rt_ifp
== m
->m_pkthdr
.rcvif
&& !srcrt
&& ip6_sendredirects
&&
552 (rt
->rt_flags
& (RTF_DYNAMIC
|RTF_MODIFIED
)) == 0) {
553 if ((rt
->rt_ifp
->if_flags
& IFF_POINTOPOINT
) &&
554 nd6_is_addr_neighbor(
555 satocsin6(rtcache_getdst(&ip6_forward_rt
)),
558 * If the incoming interface is equal to the outgoing
559 * one, the link attached to the interface is
560 * point-to-point, and the IPv6 destination is
561 * regarded as on-link on the link, then it will be
562 * highly probable that the destination address does
563 * not exist on the link and that the packet is going
564 * to loop. Thus, we immediately drop the packet and
565 * send an ICMPv6 error message.
566 * For other routing loops, we dare to let the packet
567 * go to the loop, so that a remote diagnosing host
568 * can detect the loop by traceroute.
569 * type/code is based on suggestion by Rich Draves.
570 * not sure if it is the best pick.
572 icmp6_error(mcopy
, ICMP6_DST_UNREACH
,
573 ICMP6_DST_UNREACH_ADDR
, 0);
581 * Fake scoped addresses. Note that even link-local source or
582 * destinaion can appear, if the originating node just sends the
583 * packet to us (without address resolution for the destination).
584 * Since both icmp6_error and icmp6_redirect_output fill the embedded
585 * link identifiers, we can do this stuff after making a copy for
586 * returning an error.
588 if ((rt
->rt_ifp
->if_flags
& IFF_LOOPBACK
) != 0) {
590 * See corresponding comments in ip6_output.
591 * XXX: but is it possible that ip6_forward() sends a packet
592 * to a loopback interface? I don't think so, and thus
593 * I bark here. (jinmei@kame.net)
594 * XXX: it is common to route invalid packets to loopback.
595 * also, the codepath will be visited on use of ::1 in
601 if ((rt
->rt_flags
& (RTF_BLACKHOLE
|RTF_REJECT
)) == 0)
604 printf("ip6_forward: outgoing interface is loopback. "
605 "src %s, dst %s, nxt %d, rcvif %s, outif %s\n",
606 ip6_sprintf(&ip6
->ip6_src
),
607 ip6_sprintf(&ip6
->ip6_dst
),
608 ip6
->ip6_nxt
, if_name(m
->m_pkthdr
.rcvif
),
609 if_name(rt
->rt_ifp
));
612 /* we can just use rcvif in forwarding. */
613 origifp
= m
->m_pkthdr
.rcvif
;
616 origifp
= rt
->rt_ifp
;
618 * clear embedded scope identifiers if necessary.
619 * in6_clearscope will touch the addresses only when necessary.
621 in6_clearscope(&ip6
->ip6_src
);
622 in6_clearscope(&ip6
->ip6_dst
);
626 * Run through list of hooks for output packets.
628 if ((error
= pfil_run_hooks(&inet6_pfil_hook
, &m
, rt
->rt_ifp
,
633 ip6
= mtod(m
, struct ip6_hdr
*);
634 #endif /* PFIL_HOOKS */
636 error
= nd6_output(rt
->rt_ifp
, origifp
, m
, dst
, rt
);
638 in6_ifstat_inc(rt
->rt_ifp
, ifs6_out_discard
);
639 IP6_STATINC(IP6_STAT_CANTFORWARD
);
641 IP6_STATINC(IP6_STAT_FORWARD
);
642 in6_ifstat_inc(rt
->rt_ifp
, ifs6_out_forward
);
644 IP6_STATINC(IP6_STAT_REDIRECTSENT
);
647 if (m
->m_flags
& M_CANFASTFWD
)
648 ip6flow_create(&ip6_forward_rt
, m
);
662 if (type
== ND_REDIRECT
) {
663 icmp6_redirect_output(mcopy
, rt
);
669 /* xxx MTU is constant in PPP? */
673 /* Tell source to slow down like source quench in IP? */
676 case ENETUNREACH
: /* shouldn't happen, checked above */
681 type
= ICMP6_DST_UNREACH
;
682 code
= ICMP6_DST_UNREACH_ADDR
;
685 icmp6_error(mcopy
, type
, code
, 0);