2 * SPDX-License-Identifier: BSD-3-Clause
4 * Copyright (c) 1980, 1986, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 /************************************************************************
32 * Note: In this file a 'fib' is a "forwarding information base" *
33 * Which is the new name for an in kernel routing (next hop) table. *
34 ***********************************************************************/
37 #include "opt_inet6.h"
38 #include "opt_mrouting.h"
39 #include "opt_route.h"
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/malloc.h>
45 #include <sys/socket.h>
46 #include <sys/sysctl.h>
47 #include <sys/syslog.h>
48 #include <sys/sysproto.h>
50 #include <sys/devctl.h>
51 #include <sys/domain.h>
52 #include <sys/eventhandler.h>
53 #include <sys/kernel.h>
55 #include <sys/rmlock.h>
58 #include <net/if_var.h>
59 #include <net/if_private.h>
60 #include <net/if_dl.h>
61 #include <net/route.h>
62 #include <net/route/route_ctl.h>
63 #include <net/route/route_var.h>
64 #include <net/route/nhop.h>
67 #include <netinet/in.h>
68 #include <netinet/ip_mroute.h>
69 #include <netinet6/in6_var.h>
71 VNET_PCPUSTAT_DEFINE(struct rtstat
, rtstat
);
73 VNET_PCPUSTAT_SYSINIT(rtstat
);
75 VNET_PCPUSTAT_SYSUNINIT(rtstat
);
78 SYSCTL_DECL(_net_route
);
79 SYSCTL_VNET_PCPUSTAT(_net_route
, OID_AUTO
, stats
, struct rtstat
,
80 rtstat
, "route statistics");
82 EVENTHANDLER_LIST_DEFINE(rt_addrmsg
);
84 static int rt_ifdelroute(const struct rtentry
*rt
, const struct nhop_object
*,
88 * route initialization must occur before ip6_init2(), which happenas at
97 SYSINIT(route_init
, SI_SUB_PROTO_DOMAIN
, SI_ORDER_THIRD
, route_init
, NULL
);
100 rt_table_init(int offset
, int family
, u_int fibnum
)
104 rh
= malloc(sizeof(struct rib_head
), M_RTABLE
, M_WAITOK
| M_ZERO
);
106 /* TODO: These details should be hidded inside radix.c */
107 /* Init masks tree */
108 rn_inithead_internal(&rh
->head
, rh
->rnh_nodes
, offset
);
109 rn_inithead_internal(&rh
->rmhead
.head
, rh
->rmhead
.mask_nodes
, 0);
110 rh
->head
.rnh_masks
= &rh
->rmhead
;
112 /* Save metadata associated with this routing table. */
113 rh
->rib_family
= family
;
114 rh
->rib_fibnum
= fibnum
;
116 rh
->rib_vnet
= curvnet
;
126 /* Init subscription system */
127 rib_init_subscriptions(rh
);
129 /* Finally, set base callbacks */
130 rh
->rnh_addaddr
= rn_addroute
;
131 rh
->rnh_deladdr
= rn_delete
;
132 rh
->rnh_matchaddr
= rn_match
;
133 rh
->rnh_lookup
= rn_lookup
;
134 rh
->rnh_walktree
= rn_walktree
;
135 rh
->rnh_walktree_from
= rn_walktree_from
;
141 rt_freeentry(struct radix_node
*rn
, void *arg
)
143 struct radix_head
* const rnh
= arg
;
144 struct radix_node
*x
;
146 x
= (struct radix_node
*)rn_delete(rn
+ 2, NULL
, rnh
);
153 rt_table_destroy(struct rib_head
*rh
)
157 rh
->rib_dying
= true;
164 tmproutes_destroy(rh
);
166 rn_walktree(&rh
->rmhead
.head
, rt_freeentry
, &rh
->rmhead
.head
);
168 nhops_destroy_rib(rh
);
170 rib_destroy_subscriptions(rh
);
172 /* Assume table is already empty */
173 RIB_LOCK_DESTROY(rh
);
178 * Adds a temporal redirect entry to the routing table.
179 * @fibnum: fib number
180 * @dst: destination to install redirect to
181 * @gateway: gateway to go via
182 * @author: sockaddr of originating router, can be NULL
183 * @ifp: interface to use for the redirected route
184 * @flags: set of flags to add. Allowed: RTF_GATEWAY
185 * @lifetime_sec: time in seconds to expire this redirect.
187 * Retuns 0 on success, errno otherwise.
190 rib_add_redirect(u_int fibnum
, struct sockaddr
*dst
, struct sockaddr
*gateway
,
191 struct sockaddr
*author
, struct ifnet
*ifp
, int flags
, int lifetime_sec
)
193 struct route_nhop_data rnd
= { .rnd_weight
= RT_DEFAULT_WEIGHT
};
194 struct rib_cmd_info rc
;
200 if (rt_tables_get_rnh(fibnum
, dst
->sa_family
) == NULL
)
201 return (EAFNOSUPPORT
);
203 /* Verify the allowed flag mask. */
204 KASSERT(((flags
& ~(RTF_GATEWAY
)) == 0),
205 ("invalid redirect flags: %x", flags
));
206 flags
|= RTF_HOST
| RTF_DYNAMIC
;
208 /* Get the best ifa for the given interface and gateway. */
209 if ((ifa
= ifaof_ifpforaddr(gateway
, ifp
)) == NULL
)
210 return (ENETUNREACH
);
212 struct nhop_object
*nh
= nhop_alloc(fibnum
, dst
->sa_family
);
216 nhop_set_gw(nh
, gateway
, flags
& RTF_GATEWAY
);
217 nhop_set_transmit_ifp(nh
, ifp
);
218 nhop_set_src(nh
, ifa
);
219 nhop_set_pxtype_flag(nh
, NHF_HOST
);
220 nhop_set_expire(nh
, lifetime_sec
+ time_uptime
);
221 nhop_set_redirect(nh
, true);
222 nhop_set_origin(nh
, NH_ORIGIN_REDIRECT
);
223 rnd
.rnd_nhop
= nhop_get_nhop(nh
, &error
);
225 error
= rib_add_route_px(fibnum
, dst
, -1,
226 &rnd
, RTM_F_CREATE
, &rc
);
230 /* TODO: add per-fib redirect stats. */
234 RTSTAT_INC(rts_dynamic
);
236 /* Send notification of a route addition to userland. */
237 struct rt_addrinfo info
= {
238 .rti_info
[RTAX_DST
] = dst
,
239 .rti_info
[RTAX_GATEWAY
] = gateway
,
240 .rti_info
[RTAX_AUTHOR
] = author
,
242 rt_missmsg_fib(RTM_REDIRECT
, &info
, flags
| RTF_UP
, error
, fibnum
);
248 * Routing table ioctl interface.
251 rtioctl_fib(u_long req
, caddr_t data
, u_int fibnum
)
255 * If more ioctl commands are added here, make sure the proper
256 * super-user checks are being performed because it is possible for
257 * prison-root to make it this far if raw sockets have been enabled
261 /* Multicast goop, grrr... */
262 return mrt_ioctl
? mrt_ioctl(req
, data
, fibnum
) : EOPNOTSUPP
;
269 ifa_ifwithroute(int flags
, const struct sockaddr
*dst
,
270 const struct sockaddr
*gateway
, u_int fibnum
)
275 if ((flags
& RTF_GATEWAY
) == 0) {
277 * If we are adding a route to an interface,
278 * and the interface is a pt to pt link
279 * we should search for the destination
280 * as our clue to the interface. Otherwise
281 * we can use the local address.
284 if (flags
& RTF_HOST
)
285 ifa
= ifa_ifwithdstaddr(dst
, fibnum
);
287 ifa
= ifa_ifwithaddr(gateway
);
290 * If we are adding a route to a remote net
291 * or host, the gateway may still be on the
292 * other end of a pt to pt link.
294 ifa
= ifa_ifwithdstaddr(gateway
, fibnum
);
297 ifa
= ifa_ifwithnet(gateway
, 0, fibnum
);
299 struct nhop_object
*nh
;
301 nh
= rib_lookup(fibnum
, gateway
, NHR_NONE
, 0);
304 * dismiss a gateway that is reachable only
305 * through the default router
307 if ((nh
== NULL
) || (nh
->nh_flags
& NHF_DEFAULT
))
311 if (ifa
->ifa_addr
->sa_family
!= dst
->sa_family
) {
312 struct ifaddr
*oifa
= ifa
;
313 ifa
= ifaof_ifpforaddr(dst
, ifa
->ifa_ifp
);
322 * Delete Routes for a Network Interface
324 * Called for each routing entry via the rnh->rnh_walktree() call above
325 * to delete all route entries referencing a detaching network interface.
328 * rt pointer to rtentry
330 * arg argument passed to rnh->rnh_walktree() - detaching interface
334 * errno failed - reason indicated
337 rt_ifdelroute(const struct rtentry
*rt
, const struct nhop_object
*nh
, void *arg
)
339 struct ifnet
*ifp
= arg
;
341 if (nh
->nh_ifp
!= ifp
)
345 * Protect (sorta) against walktree recursion problems
348 if ((rt
->rte_flags
& RTF_UP
) == 0)
355 rt_flushifroutes(struct ifnet
*ifp
)
358 rib_foreach_table_walk_del(AF_UNSPEC
, rt_ifdelroute
, ifp
);
362 * Tries to extract interface from RTAX_IFP passed in rt_addrinfo.
363 * Interface can be specified ether as interface index (sdl_index) or
364 * the interface name (sdl_data).
366 * Returns found ifp or NULL
368 static struct ifnet
*
369 info_get_ifp(struct rt_addrinfo
*info
)
371 const struct sockaddr_dl
*sdl
;
373 sdl
= (const struct sockaddr_dl
*)info
->rti_info
[RTAX_IFP
];
374 if (sdl
->sdl_family
!= AF_LINK
)
377 if (sdl
->sdl_index
!= 0)
378 return (ifnet_byindex(sdl
->sdl_index
));
379 if (sdl
->sdl_nlen
> 0) {
380 char if_name
[IF_NAMESIZE
];
381 if (sdl
->sdl_nlen
+ offsetof(struct sockaddr_dl
, sdl_data
) > sdl
->sdl_len
)
383 if (sdl
->sdl_nlen
>= IF_NAMESIZE
)
385 bzero(if_name
, sizeof(if_name
));
386 memcpy(if_name
, sdl
->sdl_data
, sdl
->sdl_nlen
);
387 return (ifunit(if_name
));
394 * Calculates proper ifa/ifp for the cases when gateway AF is different
397 * Returns 0 on success.
399 __noinline
static int
400 rt_getifa_family(struct rt_addrinfo
*info
, uint32_t fibnum
)
402 if (info
->rti_ifp
== NULL
) {
403 struct ifaddr
*ifa
= NULL
;
405 * No transmit interface specified. Guess it by checking gw sa.
407 const struct sockaddr
*gw
= info
->rti_info
[RTAX_GATEWAY
];
408 ifa
= ifa_ifwithroute(RTF_GATEWAY
, gw
, gw
, fibnum
);
410 return (ENETUNREACH
);
411 info
->rti_ifp
= ifa
->ifa_ifp
;
414 /* Prefer address from outgoing interface */
415 info
->rti_ifa
= ifaof_ifpforaddr(info
->rti_info
[RTAX_DST
], info
->rti_ifp
);
417 if (info
->rti_ifa
== NULL
) {
418 /* Use first found IPv4 address */
419 bool loopback_ok
= info
->rti_ifp
->if_flags
& IFF_LOOPBACK
;
420 info
->rti_ifa
= (struct ifaddr
*)in_findlocal(fibnum
, loopback_ok
);
423 if (info
->rti_ifa
== NULL
)
424 return (ENETUNREACH
);
429 * Fills in rti_ifp and rti_ifa for the provided fib.
431 * Assume basic consistency checks are executed by callers:
432 * RTAX_DST exists, if RTF_GATEWAY is set, RTAX_GATEWAY exists as well.
435 rt_getifa_fib(struct rt_addrinfo
*info
, u_int fibnum
)
437 const struct sockaddr
*dst
, *gateway
, *ifaaddr
;
440 dst
= info
->rti_info
[RTAX_DST
];
441 gateway
= info
->rti_info
[RTAX_GATEWAY
];
442 ifaaddr
= info
->rti_info
[RTAX_IFA
];
443 flags
= info
->rti_flags
;
446 * ifp may be specified by sockaddr_dl
447 * when protocol address is ambiguous.
451 /* If we have interface specified by RTAX_IFP address, try to use it */
452 if ((info
->rti_ifp
== NULL
) && (info
->rti_info
[RTAX_IFP
] != NULL
))
453 info
->rti_ifp
= info_get_ifp(info
);
455 * If we have source address specified, try to find it
456 * TODO: avoid enumerating all ifas on all interfaces.
458 if (info
->rti_ifa
== NULL
&& ifaaddr
!= NULL
)
459 info
->rti_ifa
= ifa_ifwithaddr(ifaaddr
);
460 if ((info
->rti_ifa
== NULL
) && ((info
->rti_flags
& RTF_GATEWAY
) != 0) &&
461 (gateway
->sa_family
!= dst
->sa_family
))
462 return (rt_getifa_family(info
, fibnum
));
463 if (info
->rti_ifa
== NULL
) {
464 const struct sockaddr
*sa
;
467 * Most common use case for the userland-supplied routes.
469 * Choose sockaddr to select ifa.
470 * -- if ifp is set --
471 * Order of preference:
474 * Note: for interface routes link-level gateway address
475 * is specified to indicate the interface index without
476 * specifying RTF_GATEWAY. In this case, ignore gateway
477 * Note: gateway AF may be different from dst AF. In this case,
479 * 3) final destination.
480 * 4) if all of these fails, try to get at least link-level ifa.
482 * try to lookup gateway or dst in the routing table to get ifa
484 if (info
->rti_info
[RTAX_IFA
] != NULL
)
485 sa
= info
->rti_info
[RTAX_IFA
];
486 else if ((info
->rti_flags
& RTF_GATEWAY
) != 0 &&
487 gateway
->sa_family
== dst
->sa_family
)
491 if (info
->rti_ifp
!= NULL
) {
492 info
->rti_ifa
= ifaof_ifpforaddr(sa
, info
->rti_ifp
);
494 if (info
->rti_ifa
== NULL
&& gateway
!= NULL
)
495 info
->rti_ifa
= ifaof_ifpforaddr(gateway
, info
->rti_ifp
);
496 } else if (dst
!= NULL
&& gateway
!= NULL
)
497 info
->rti_ifa
= ifa_ifwithroute(flags
, dst
, gateway
,
500 info
->rti_ifa
= ifa_ifwithroute(flags
, sa
, sa
,
503 if (info
->rti_ifa
!= NULL
) {
504 if (info
->rti_ifp
== NULL
)
505 info
->rti_ifp
= info
->rti_ifa
->ifa_ifp
;
512 rt_updatemtu(struct ifnet
*ifp
)
514 struct rib_head
*rnh
;
519 * Try to update rt_mtu for all routes using this interface
520 * Unfortunately the only way to do this is to traverse all
521 * routing tables in all fibs/domains.
523 for (i
= 1; i
<= AF_MAX
; i
++) {
524 mtu
= if_getmtu_family(ifp
, i
);
525 for (j
= 0; j
< rt_numfibs
; j
++) {
526 rnh
= rt_tables_get_rnh(j
, i
);
529 nhops_update_ifmtu(rnh
, ifp
, mtu
);
535 int p_sockaddr(char *buf
, int buflen
, struct sockaddr
*s
);
536 int rt_print(char *buf
, int buflen
, struct rtentry
*rt
);
539 p_sockaddr(char *buf
, int buflen
, struct sockaddr
*s
)
543 switch (s
->sa_family
) {
545 paddr
= &((struct sockaddr_in
*)s
)->sin_addr
;
548 paddr
= &((struct sockaddr_in6
*)s
)->sin6_addr
;
555 if (inet_ntop(s
->sa_family
, paddr
, buf
, buflen
) == NULL
)
558 return (strlen(buf
));
562 rt_print(char *buf
, int buflen
, struct rtentry
*rt
)
564 struct sockaddr
*addr
, *mask
;
570 i
= p_sockaddr(buf
, buflen
, addr
);
571 if (!(rt
->rt_flags
& RTF_HOST
)) {
573 i
+= p_sockaddr(buf
+ i
, buflen
- i
, mask
);
576 if (rt
->rt_flags
& RTF_GATEWAY
) {
578 i
+= p_sockaddr(buf
+ i
, buflen
- i
, &rt
->rt_nhop
->gw_sa
);
586 rt_maskedcopy(const struct sockaddr
*src
, struct sockaddr
*dst
,
587 const struct sockaddr
*netmask
)
589 const u_char
*cp1
= (const u_char
*)src
;
590 u_char
*cp2
= (u_char
*)dst
;
591 const u_char
*cp3
= (const u_char
*)netmask
;
592 u_char
*cplim
= cp2
+ *cp3
;
593 u_char
*cplim2
= cp2
+ *cp1
;
595 *cp2
++ = *cp1
++; *cp2
++ = *cp1
++; /* copies sa_len & sa_family */
600 *cp2
++ = *cp1
++ & *cp3
++;
602 bzero((caddr_t
)cp2
, (unsigned)(cplim2
- cp2
));
606 * Announce interface address arrival/withdraw
607 * Returns 0 on success.
610 rt_addrmsg(int cmd
, struct ifaddr
*ifa
, int fibnum
)
612 #if defined(INET) || defined(INET6)
613 struct sockaddr
*sa
= ifa
->ifa_addr
;
614 struct ifnet
*ifp
= ifa
->ifa_ifp
;
617 KASSERT(cmd
== RTM_ADD
|| cmd
== RTM_DELETE
,
618 ("unexpected cmd %d", cmd
));
619 KASSERT((fibnum
>= 0 && fibnum
< rt_numfibs
),
620 ("%s: fib out of range 0 <=%d<%d", __func__
, fibnum
, rt_numfibs
));
622 EVENTHANDLER_DIRECT_INVOKE(rt_addrmsg
, ifa
, cmd
);
625 if (sa
->sa_family
== AF_INET
) {
626 char addrstr
[INET_ADDRSTRLEN
];
627 char strbuf
[INET_ADDRSTRLEN
+ 12];
629 inet_ntoa_r(((struct sockaddr_in
*)sa
)->sin_addr
, addrstr
);
630 snprintf(strbuf
, sizeof(strbuf
), "address=%s", addrstr
);
631 devctl_notify("IFNET", ifp
->if_xname
,
632 (cmd
== RTM_ADD
) ? "ADDR_ADD" : "ADDR_DEL", strbuf
);
636 if (sa
->sa_family
== AF_INET6
) {
637 char addrstr
[INET6_ADDRSTRLEN
];
638 char strbuf
[INET6_ADDRSTRLEN
+ 12];
640 ip6_sprintf(addrstr
, IFA_IN6(ifa
));
641 snprintf(strbuf
, sizeof(strbuf
), "address=%s", addrstr
);
642 devctl_notify("IFNET", ifp
->if_xname
,
643 (cmd
== RTM_ADD
) ? "ADDR_ADD" : "ADDR_DEL", strbuf
);
647 if (V_rt_add_addr_allfibs
)
648 fibnum
= RT_ALL_FIBS
;
649 return (rtsock_addrmsg(cmd
, ifa
, fibnum
));
653 * Announce kernel-originated route addition/removal to rtsock based on @rt data.
656 * @nh: nhop object to announce
657 * @fibnum: fib id or RT_ALL_FIBS
659 * Returns 0 on success.
662 rt_routemsg(int cmd
, struct rtentry
*rt
, struct nhop_object
*nh
,
666 KASSERT(cmd
== RTM_ADD
|| cmd
== RTM_DELETE
|| cmd
== RTM_CHANGE
,
667 ("unexpected cmd %d", cmd
));
669 KASSERT(fibnum
== RT_ALL_FIBS
|| (fibnum
>= 0 && fibnum
< rt_numfibs
),
670 ("%s: fib out of range 0 <=%d<%d", __func__
, fibnum
, rt_numfibs
));
672 KASSERT(rt_key(rt
) != NULL
, (":%s: rt_key must be supplied", __func__
));
674 return (rtsock_routemsg(cmd
, rt
, nh
, fibnum
));
678 * Announce kernel-originated route addition/removal to rtsock based on @rt data.
680 * @info: addrinfo structure with valid data.
681 * @fibnum: fib id or RT_ALL_FIBS
683 * Returns 0 on success.
686 rt_routemsg_info(int cmd
, struct rt_addrinfo
*info
, int fibnum
)
689 KASSERT(cmd
== RTM_ADD
|| cmd
== RTM_DELETE
|| cmd
== RTM_CHANGE
,
690 ("unexpected cmd %d", cmd
));
692 KASSERT(fibnum
== RT_ALL_FIBS
|| (fibnum
>= 0 && fibnum
< rt_numfibs
),
693 ("%s: fib out of range 0 <=%d<%d", __func__
, fibnum
, rt_numfibs
));
695 KASSERT(info
->rti_info
[RTAX_DST
] != NULL
, (":%s: RTAX_DST must be supplied", __func__
));
697 return (rtsock_routemsg_info(cmd
, info
, fibnum
));
701 rt_ifmsg(struct ifnet
*ifp
, int if_flags_mask
)
703 rtsock_callback_p
->ifmsg_f(ifp
, if_flags_mask
);
704 netlink_callback_p
->ifmsg_f(ifp
, if_flags_mask
);