1 /* $NetBSD: rtsock.c,v 1.126 2009/09/12 18:09:25 tsutsui Exp $ */
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * Copyright (c) 1988, 1991, 1993
34 * The Regents of the University of California. All rights reserved.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * @(#)rtsock.c 8.7 (Berkeley) 10/12/95
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: rtsock.c,v 1.126 2009/09/12 18:09:25 tsutsui Exp $");
68 #include "opt_compat_netbsd.h"
71 #include <sys/param.h>
72 #include <sys/systm.h>
75 #include <sys/socket.h>
76 #include <sys/socketvar.h>
77 #include <sys/domain.h>
78 #include <sys/protosw.h>
79 #include <sys/sysctl.h>
80 #include <sys/kauth.h>
83 #include <netinet/in.h>
84 #endif /* RTSOCK_DEBUG */
87 #include <net/route.h>
88 #include <net/raw_cb.h>
90 #if defined(COMPAT_14) || defined(COMPAT_50)
91 #include <compat/net/if.h>
94 #include <machine/stdarg.h>
96 DOMAIN_DEFINE(routedomain
); /* forward declare and add to link set */
98 struct sockaddr route_dst
= { .sa_len
= 2, .sa_family
= PF_ROUTE
, };
99 struct sockaddr route_src
= { .sa_len
= 2, .sa_family
= PF_ROUTE
, };
101 int route_maxqlen
= IFQ_MAXLEN
;
102 static struct ifqueue route_intrq
;
103 static void *route_sih
;
105 static int rt_msg2(int, struct rt_addrinfo
*, void *, struct rt_walkarg
*, int *);
106 static int rt_xaddrs(u_char
, const char *, const char *, struct rt_addrinfo
*);
107 static struct mbuf
*rt_makeifannouncemsg(struct ifnet
*, int, int,
108 struct rt_addrinfo
*);
109 static void sysctl_net_route_setup(struct sysctllog
**);
110 static int sysctl_dumpentry(struct rtentry
*, void *);
111 static int sysctl_iflist(int, struct rt_walkarg
*, int);
112 static int sysctl_rtable(SYSCTLFN_PROTO
);
113 static void rt_adjustcount(int, int);
116 rt_adjustcount(int af
, int cnt
)
118 route_cb
.any_count
+= cnt
;
121 route_cb
.ip_count
+= cnt
;
125 route_cb
.ip6_count
+= cnt
;
129 route_cb
.ipx_count
+= cnt
;
132 route_cb
.ns_count
+= cnt
;
135 route_cb
.iso_count
+= cnt
;
141 cvtmetrics(struct rt_metrics
*ortm
, const struct nrt_metrics
*rtm
)
143 ortm
->rmx_locks
= rtm
->rmx_locks
;
144 ortm
->rmx_mtu
= rtm
->rmx_mtu
;
145 ortm
->rmx_hopcount
= rtm
->rmx_hopcount
;
146 ortm
->rmx_expire
= rtm
->rmx_expire
;
147 ortm
->rmx_recvpipe
= rtm
->rmx_recvpipe
;
148 ortm
->rmx_sendpipe
= rtm
->rmx_sendpipe
;
149 ortm
->rmx_ssthresh
= rtm
->rmx_ssthresh
;
150 ortm
->rmx_rtt
= rtm
->rmx_rtt
;
151 ortm
->rmx_rttvar
= rtm
->rmx_rttvar
;
152 ortm
->rmx_pksent
= rtm
->rmx_pksent
;
157 route_usrreq(struct socket
*so
, int req
, struct mbuf
*m
, struct mbuf
*nam
,
158 struct mbuf
*control
, struct lwp
*l
)
161 struct rawcb
*rp
= sotorawcb(so
);
164 if (req
== PRU_ATTACH
) {
166 rp
= malloc(sizeof(*rp
), M_PCB
, M_WAITOK
|M_ZERO
);
169 if (req
== PRU_DETACH
&& rp
)
170 rt_adjustcount(rp
->rcb_proto
.sp_protocol
, -1);
174 * Don't call raw_usrreq() in the attach case, because
175 * we want to allow non-privileged processes to listen on
176 * and send "safe" commands to the routing socket.
178 if (req
== PRU_ATTACH
) {
182 error
= raw_attach(so
, (int)(long)nam
);
184 error
= raw_usrreq(so
, req
, m
, nam
, control
, l
);
187 if (req
== PRU_ATTACH
&& rp
) {
193 rt_adjustcount(rp
->rcb_proto
.sp_protocol
, 1);
194 rp
->rcb_laddr
= &route_src
;
195 rp
->rcb_faddr
= &route_dst
;
197 so
->so_options
|= SO_USELOOPBACK
;
203 static const struct sockaddr
*
204 intern_netmask(const struct sockaddr
*mask
)
206 struct radix_node
*rn
;
207 extern struct radix_node_head
*mask_rnhead
;
210 (rn
= rn_search(mask
, mask_rnhead
->rnh_treetop
)))
211 mask
= (const struct sockaddr
*)rn
->rn_key
;
218 route_output(struct mbuf
*m
, ...)
220 struct sockproto proto
= { .sp_family
= PF_ROUTE
, };
221 struct rt_msghdr
*rtm
= NULL
;
222 struct rt_msghdr
*old_rtm
= NULL
;
223 struct rtentry
*rt
= NULL
;
224 struct rtentry
*saved_nrt
= NULL
;
225 struct rt_addrinfo info
;
227 struct ifnet
*ifp
= NULL
;
228 struct ifaddr
*ifa
= NULL
;
234 so
= va_arg(ap
, struct socket
*);
237 #define senderr(e) do { error = e; goto flush;} while (/*CONSTCOND*/ 0)
238 if (m
== NULL
|| ((m
->m_len
< sizeof(int32_t)) &&
239 (m
= m_pullup(m
, sizeof(int32_t))) == NULL
))
241 if ((m
->m_flags
& M_PKTHDR
) == 0)
242 panic("route_output");
243 len
= m
->m_pkthdr
.len
;
244 if (len
< sizeof(*rtm
) ||
245 len
!= mtod(m
, struct rt_msghdr
*)->rtm_msglen
) {
246 info
.rti_info
[RTAX_DST
] = NULL
;
249 R_Malloc(rtm
, struct rt_msghdr
*, len
);
251 info
.rti_info
[RTAX_DST
] = NULL
;
254 m_copydata(m
, 0, len
, rtm
);
255 if (rtm
->rtm_version
!= RTM_VERSION
) {
256 info
.rti_info
[RTAX_DST
] = NULL
;
257 senderr(EPROTONOSUPPORT
);
259 rtm
->rtm_pid
= curproc
->p_pid
;
260 memset(&info
, 0, sizeof(info
));
261 info
.rti_addrs
= rtm
->rtm_addrs
;
262 if (rt_xaddrs(rtm
->rtm_type
, (const char *)(rtm
+ 1), len
+ (char *)rtm
,
265 info
.rti_flags
= rtm
->rtm_flags
;
267 if (info
.rti_info
[RTAX_DST
]->sa_family
== AF_INET
) {
268 printf("%s: extracted info.rti_info[RTAX_DST] %s\n", __func__
,
269 inet_ntoa(((const struct sockaddr_in
*)
270 info
.rti_info
[RTAX_DST
])->sin_addr
));
272 #endif /* RTSOCK_DEBUG */
273 if (info
.rti_info
[RTAX_DST
] == NULL
||
274 (info
.rti_info
[RTAX_DST
]->sa_family
>= AF_MAX
))
276 if (info
.rti_info
[RTAX_GATEWAY
] != NULL
&&
277 (info
.rti_info
[RTAX_GATEWAY
]->sa_family
>= AF_MAX
))
281 * Verify that the caller has the appropriate privilege; RTM_GET
282 * is the only operation the non-superuser is allowed.
284 if (kauth_authorize_network(curlwp
->l_cred
, KAUTH_NETWORK_ROUTE
,
285 0, rtm
, NULL
, NULL
) != 0)
288 switch (rtm
->rtm_type
) {
291 if (info
.rti_info
[RTAX_GATEWAY
] == NULL
)
293 error
= rtrequest1(rtm
->rtm_type
, &info
, &saved_nrt
);
294 if (error
== 0 && saved_nrt
) {
295 rt_setmetrics(rtm
->rtm_inits
,
296 &rtm
->rtm_rmx
, &saved_nrt
->rt_rmx
);
297 saved_nrt
->rt_refcnt
--;
302 error
= rtrequest1(rtm
->rtm_type
, &info
, &saved_nrt
);
304 (rt
= saved_nrt
)->rt_refcnt
++;
312 /* XXX This will mask info.rti_info[RTAX_DST] with
313 * info.rti_info[RTAX_NETMASK] before
314 * searching. It did not used to do that. --dyoung
316 error
= rtrequest1(RTM_GET
, &info
, &rt
);
319 if (rtm
->rtm_type
!= RTM_GET
) {/* XXX: too grotty */
320 struct radix_node
*rn
;
322 if (memcmp(info
.rti_info
[RTAX_DST
], rt_getkey(rt
),
323 info
.rti_info
[RTAX_DST
]->sa_len
) != 0)
325 info
.rti_info
[RTAX_NETMASK
] = intern_netmask(
326 info
.rti_info
[RTAX_NETMASK
]);
327 for (rn
= rt
->rt_nodes
; rn
; rn
= rn
->rn_dupedkey
)
328 if (info
.rti_info
[RTAX_NETMASK
] ==
329 (const struct sockaddr
*)rn
->rn_mask
)
332 senderr(ETOOMANYREFS
);
333 rt
= (struct rtentry
*)rn
;
336 switch (rtm
->rtm_type
) {
339 info
.rti_info
[RTAX_DST
] = rt_getkey(rt
);
340 info
.rti_info
[RTAX_GATEWAY
] = rt
->rt_gateway
;
341 info
.rti_info
[RTAX_NETMASK
] = rt_mask(rt
);
342 if ((rtm
->rtm_addrs
& (RTA_IFP
| RTA_IFA
)) == 0)
344 else if ((ifp
= rt
->rt_ifp
) != NULL
) {
345 const struct ifaddr
*rtifa
;
346 info
.rti_info
[RTAX_IFP
] = ifp
->if_dl
->ifa_addr
;
347 /* rtifa used to be simply rt->rt_ifa.
348 * If rt->rt_ifa != NULL, then
349 * rt_get_ifa() != NULL. So this
350 * ought to still be safe. --dyoung
352 rtifa
= rt_get_ifa(rt
);
353 info
.rti_info
[RTAX_IFA
] = rtifa
->ifa_addr
;
355 if (info
.rti_info
[RTAX_IFA
]->sa_family
==
357 printf("%s: copying out RTAX_IFA %s ",
359 ((const struct sockaddr_in
*)
360 info
.rti_info
[RTAX_IFA
])->sin_addr
)
362 printf("for info.rti_info[RTAX_DST] %s "
363 "ifa_getifa %p ifa_seqno %p\n",
365 ((const struct sockaddr_in
*)
366 info
.rti_info
[RTAX_DST
])->sin_addr
),
367 (void *)rtifa
->ifa_getifa
,
370 #endif /* RTSOCK_DEBUG */
371 if (ifp
->if_flags
& IFF_POINTOPOINT
) {
372 info
.rti_info
[RTAX_BRD
] =
375 info
.rti_info
[RTAX_BRD
] = NULL
;
376 rtm
->rtm_index
= ifp
->if_index
;
378 info
.rti_info
[RTAX_IFP
] = NULL
;
379 info
.rti_info
[RTAX_IFA
] = NULL
;
381 (void)rt_msg2(rtm
->rtm_type
, &info
, NULL
, NULL
, &len
);
382 if (len
> rtm
->rtm_msglen
) {
384 R_Malloc(rtm
, struct rt_msghdr
*, len
);
387 (void)memcpy(rtm
, old_rtm
, old_rtm
->rtm_msglen
);
389 (void)rt_msg2(rtm
->rtm_type
, &info
, rtm
, NULL
, 0);
390 rtm
->rtm_flags
= rt
->rt_flags
;
391 cvtmetrics(&rtm
->rtm_rmx
, &rt
->rt_rmx
);
392 rtm
->rtm_addrs
= info
.rti_addrs
;
397 * new gateway could require new ifaddr, ifp;
398 * flags may also be different; ifp may be specified
399 * by ll sockaddr when protocol address is ambiguous
401 if ((error
= rt_getifa(&info
)) != 0)
403 if (info
.rti_info
[RTAX_GATEWAY
] &&
404 rt_setgate(rt
, info
.rti_info
[RTAX_GATEWAY
]))
406 /* new gateway could require new ifaddr, ifp;
407 flags may also be different; ifp may be specified
408 by ll sockaddr when protocol address is ambiguous */
409 if (info
.rti_info
[RTAX_IFP
] &&
410 (ifa
= ifa_ifwithnet(info
.rti_info
[RTAX_IFP
])) &&
411 (ifp
= ifa
->ifa_ifp
) && (info
.rti_info
[RTAX_IFA
] ||
412 info
.rti_info
[RTAX_GATEWAY
])) {
413 ifa
= ifaof_ifpforaddr(info
.rti_info
[RTAX_IFA
] ?
414 info
.rti_info
[RTAX_IFA
] :
415 info
.rti_info
[RTAX_GATEWAY
], ifp
);
416 } else if ((info
.rti_info
[RTAX_IFA
] &&
417 (ifa
= ifa_ifwithaddr(info
.rti_info
[RTAX_IFA
]))) ||
418 (info
.rti_info
[RTAX_GATEWAY
] &&
419 (ifa
= ifa_ifwithroute(rt
->rt_flags
,
420 rt_getkey(rt
), info
.rti_info
[RTAX_GATEWAY
])))) {
424 struct ifaddr
*oifa
= rt
->rt_ifa
;
426 if (oifa
&& oifa
->ifa_rtrequest
) {
427 oifa
->ifa_rtrequest(RTM_DELETE
,
430 rt_replace_ifa(rt
, ifa
);
434 rt_setmetrics(rtm
->rtm_inits
, &rtm
->rtm_rmx
,
436 if (rt
->rt_ifa
&& rt
->rt_ifa
->ifa_rtrequest
)
437 rt
->rt_ifa
->ifa_rtrequest(RTM_ADD
, rt
, &info
);
440 rt
->rt_rmx
.rmx_locks
&= ~(rtm
->rtm_inits
);
441 rt
->rt_rmx
.rmx_locks
|=
442 (rtm
->rtm_inits
& rtm
->rtm_rmx
.rmx_locks
);
454 rtm
->rtm_errno
= error
;
456 rtm
->rtm_flags
|= RTF_DONE
;
458 family
= info
.rti_info
[RTAX_DST
] ? info
.rti_info
[RTAX_DST
]->sa_family
:
460 /* We cannot free old_rtm until we have stopped using the
461 * pointers in info, some of which may point to sockaddrs
469 struct rawcb
*rp
= NULL
;
471 * Check to see if we don't want our own messages.
473 if ((so
->so_options
& SO_USELOOPBACK
) == 0) {
474 if (route_cb
.any_count
<= 1) {
480 /* There is another listener, so construct message */
484 m_copyback(m
, 0, rtm
->rtm_msglen
, rtm
);
485 if (m
->m_pkthdr
.len
< rtm
->rtm_msglen
) {
488 } else if (m
->m_pkthdr
.len
> rtm
->rtm_msglen
)
489 m_adj(m
, rtm
->rtm_msglen
- m
->m_pkthdr
.len
);
493 rp
->rcb_proto
.sp_family
= 0; /* Avoid us */
495 proto
.sp_protocol
= family
;
497 raw_input(m
, &proto
, &route_src
, &route_dst
);
499 rp
->rcb_proto
.sp_family
= PF_ROUTE
;
505 rt_setmetrics(u_long which
, const struct rt_metrics
*in
, struct nrt_metrics
*out
)
507 #define metric(f, e) if (which & (f)) out->e = in->e;
508 metric(RTV_RPIPE
, rmx_recvpipe
);
509 metric(RTV_SPIPE
, rmx_sendpipe
);
510 metric(RTV_SSTHRESH
, rmx_ssthresh
);
511 metric(RTV_RTT
, rmx_rtt
);
512 metric(RTV_RTTVAR
, rmx_rttvar
);
513 metric(RTV_HOPCOUNT
, rmx_hopcount
);
514 metric(RTV_MTU
, rmx_mtu
);
515 /* XXX time_t: Will not work after February 2145 (u_long time) */
516 metric(RTV_EXPIRE
, rmx_expire
);
521 rt_xaddrs(u_char rtmtype
, const char *cp
, const char *cplim
,
522 struct rt_addrinfo
*rtinfo
)
524 const struct sockaddr
*sa
= NULL
; /* Quell compiler warning */
527 for (i
= 0; i
< RTAX_MAX
&& cp
< cplim
; i
++) {
528 if ((rtinfo
->rti_addrs
& (1 << i
)) == 0)
530 rtinfo
->rti_info
[i
] = sa
= (const struct sockaddr
*)cp
;
535 * Check for extra addresses specified, except RTM_GET asking
536 * for interface info.
538 if (rtmtype
== RTM_GET
) {
539 if (((rtinfo
->rti_addrs
&
540 (~((1 << RTAX_IFP
) | (1 << RTAX_IFA
)))) & (~0 << i
)) != 0)
542 } else if ((rtinfo
->rti_addrs
& (~0 << i
)) != 0)
544 /* Check for bad data length. */
546 if (i
== RTAX_NETMASK
+ 1 && sa
!= NULL
&&
547 cp
- RT_ROUNDUP(sa
->sa_len
) + sa
->sa_len
== cplim
)
549 * The last sockaddr was info.rti_info[RTAX_NETMASK].
550 * We accept this for now for the sake of old
551 * binaries or third party softwares.
561 rt_msg1(int type
, struct rt_addrinfo
*rtinfo
, void *data
, int datalen
)
563 struct rt_msghdr
*rtm
;
566 const struct sockaddr
*sa
;
569 m
= m_gethdr(M_DONTWAIT
, MT_DATA
);
572 MCLAIM(m
, &routedomain
.dom_mowner
);
577 len
= sizeof(struct ifa_msghdr
);
582 len
= sizeof(struct if_msghdr14
);
587 len
= sizeof(struct if_msghdr50
);
592 len
= sizeof(struct if_msghdr
);
597 len
= sizeof(struct if_announcemsghdr
);
601 len
= sizeof(struct rt_msghdr
);
603 if (len
> MHLEN
+ MLEN
)
604 panic("rt_msg1: message too long");
605 else if (len
> MHLEN
) {
606 m
->m_next
= m_get(M_DONTWAIT
, MT_DATA
);
607 if (m
->m_next
== NULL
) {
611 MCLAIM(m
->m_next
, m
->m_owner
);
612 m
->m_pkthdr
.len
= len
;
614 m
->m_next
->m_len
= len
- MHLEN
;
616 m
->m_pkthdr
.len
= m
->m_len
= len
;
618 m
->m_pkthdr
.rcvif
= NULL
;
619 m_copyback(m
, 0, datalen
, data
);
621 (void)memset(mtod(m
, char *) + datalen
, 0, len
- datalen
);
622 rtm
= mtod(m
, struct rt_msghdr
*);
623 for (i
= 0; i
< RTAX_MAX
; i
++) {
624 if ((sa
= rtinfo
->rti_info
[i
]) == NULL
)
626 rtinfo
->rti_addrs
|= (1 << i
);
627 dlen
= RT_ROUNDUP(sa
->sa_len
);
628 m_copyback(m
, len
, dlen
, sa
);
631 if (m
->m_pkthdr
.len
!= len
) {
635 rtm
->rtm_msglen
= len
;
636 rtm
->rtm_version
= RTM_VERSION
;
637 rtm
->rtm_type
= type
;
644 * fills 'cp' or 'w'.w_tmem with the routing socket message and
645 * returns the length of the message in 'lenp'.
647 * if walkarg is 0, cp is expected to be 0 or a buffer large enough to hold
649 * otherwise walkarg's w_needed is updated and if the user buffer is
650 * specified and w_needed indicates space exists the information is copied
651 * into the temp space (w_tmem). w_tmem is [re]allocated if necessary,
652 * if the allocation fails ENOBUFS is returned.
655 rt_msg2(int type
, struct rt_addrinfo
*rtinfo
, void *cpv
, struct rt_walkarg
*w
,
659 int len
, dlen
, second_time
= 0;
660 char *cp0
, *cp
= cpv
;
662 rtinfo
->rti_addrs
= 0;
668 len
= sizeof(struct ifa_msghdr
);
672 len
= sizeof(struct if_msghdr14
);
677 len
= sizeof(struct if_msghdr50
);
682 len
= sizeof(struct if_msghdr
);
686 len
= sizeof(struct rt_msghdr
);
688 if ((cp0
= cp
) != NULL
)
690 for (i
= 0; i
< RTAX_MAX
; i
++) {
691 const struct sockaddr
*sa
;
693 if ((sa
= rtinfo
->rti_info
[i
]) == NULL
)
695 rtinfo
->rti_addrs
|= (1 << i
);
696 dlen
= RT_ROUNDUP(sa
->sa_len
);
698 (void)memcpy(cp
, sa
, (size_t)dlen
);
703 if (cp
== NULL
&& w
!= NULL
&& !second_time
) {
704 struct rt_walkarg
*rw
= w
;
707 if (rw
->w_needed
<= 0 && rw
->w_where
) {
708 if (rw
->w_tmemsize
< len
) {
710 free(rw
->w_tmem
, M_RTABLE
);
711 rw
->w_tmem
= malloc(len
, M_RTABLE
, M_NOWAIT
);
713 rw
->w_tmemsize
= len
;
722 rw
->w_tmemneeded
= len
;
728 struct rt_msghdr
*rtm
= (struct rt_msghdr
*)cp0
;
730 rtm
->rtm_version
= RTM_VERSION
;
731 rtm
->rtm_type
= type
;
732 rtm
->rtm_msglen
= len
;
740 * This routine is called to generate a message from the routing
741 * socket indicating that a redirect has occurred, a routing lookup
742 * has failed, or that a protocol has detected timeouts to a particular
746 rt_missmsg(int type
, struct rt_addrinfo
*rtinfo
, int flags
, int error
)
748 struct rt_msghdr rtm
;
750 const struct sockaddr
*sa
= rtinfo
->rti_info
[RTAX_DST
];
752 if (route_cb
.any_count
== 0)
754 memset(&rtm
, 0, sizeof(rtm
));
755 rtm
.rtm_flags
= RTF_DONE
| flags
;
756 rtm
.rtm_errno
= error
;
757 m
= rt_msg1(type
, rtinfo
, &rtm
, sizeof(rtm
));
760 mtod(m
, struct rt_msghdr
*)->rtm_addrs
= rtinfo
->rti_addrs
;
761 route_enqueue(m
, sa
? sa
->sa_family
: 0);
765 * This routine is called to generate a message from the routing
766 * socket indicating that the status of a network interface has changed.
769 rt_ifmsg(struct ifnet
*ifp
)
771 struct if_msghdr ifm
;
773 struct rt_addrinfo info
;
775 if (route_cb
.any_count
== 0)
777 (void)memset(&info
, 0, sizeof(info
));
778 (void)memset(&ifm
, 0, sizeof(ifm
));
779 ifm
.ifm_index
= ifp
->if_index
;
780 ifm
.ifm_flags
= ifp
->if_flags
;
781 ifm
.ifm_data
= ifp
->if_data
;
783 m
= rt_msg1(RTM_IFINFO
, &info
, &ifm
, sizeof(ifm
));
788 compat_14_rt_ifmsg(ifp
, &ifm
);
791 compat_50_rt_ifmsg(ifp
, &ifm
);
797 * This is called to generate messages from the routing socket
798 * indicating a network interface has had addresses associated with it.
799 * if we ever reverse the logic and replace messages TO the routing
800 * socket indicate a request to configure interfaces, then it will
801 * be unnecessary as the routing socket will automatically generate
805 rt_newaddrmsg(int cmd
, struct ifaddr
*ifa
, int error
, struct rtentry
*rt
)
807 #define cmdpass(__cmd, __pass) (((__cmd) << 2) | (__pass))
808 struct rt_addrinfo info
;
809 const struct sockaddr
*sa
;
812 struct ifnet
*ifp
= ifa
->ifa_ifp
;
813 struct rt_msghdr rtm
;
814 struct ifa_msghdr ifam
;
817 if (route_cb
.any_count
== 0)
819 for (pass
= 1; pass
< 3; pass
++) {
820 memset(&info
, 0, sizeof(info
));
821 switch (cmdpass(cmd
, pass
)) {
822 case cmdpass(RTM_ADD
, 1):
823 case cmdpass(RTM_CHANGE
, 1):
824 case cmdpass(RTM_DELETE
, 2):
830 info
.rti_info
[RTAX_IFA
] = sa
= ifa
->ifa_addr
;
831 info
.rti_info
[RTAX_IFP
] = ifp
->if_dl
->ifa_addr
;
832 info
.rti_info
[RTAX_NETMASK
] = ifa
->ifa_netmask
;
833 info
.rti_info
[RTAX_BRD
] = ifa
->ifa_dstaddr
;
834 memset(&ifam
, 0, sizeof(ifam
));
835 ifam
.ifam_index
= ifp
->if_index
;
836 ifam
.ifam_metric
= ifa
->ifa_metric
;
837 ifam
.ifam_flags
= ifa
->ifa_flags
;
838 m
= rt_msg1(ncmd
, &info
, &ifam
, sizeof(ifam
));
841 mtod(m
, struct ifa_msghdr
*)->ifam_addrs
=
844 case cmdpass(RTM_ADD
, 2):
845 case cmdpass(RTM_CHANGE
, 2):
846 case cmdpass(RTM_DELETE
, 1):
849 info
.rti_info
[RTAX_NETMASK
] = rt_mask(rt
);
850 info
.rti_info
[RTAX_DST
] = sa
= rt_getkey(rt
);
851 info
.rti_info
[RTAX_GATEWAY
] = rt
->rt_gateway
;
852 memset(&rtm
, 0, sizeof(rtm
));
853 rtm
.rtm_index
= ifp
->if_index
;
854 rtm
.rtm_flags
|= rt
->rt_flags
;
855 rtm
.rtm_errno
= error
;
856 m
= rt_msg1(cmd
, &info
, &rtm
, sizeof(rtm
));
859 mtod(m
, struct rt_msghdr
*)->rtm_addrs
= info
.rti_addrs
;
866 panic("%s: called with wrong command", __func__
);
868 route_enqueue(m
, sa
? sa
->sa_family
: 0);
874 rt_makeifannouncemsg(struct ifnet
*ifp
, int type
, int what
,
875 struct rt_addrinfo
*info
)
877 struct if_announcemsghdr ifan
;
879 memset(info
, 0, sizeof(*info
));
880 memset(&ifan
, 0, sizeof(ifan
));
881 ifan
.ifan_index
= ifp
->if_index
;
882 strlcpy(ifan
.ifan_name
, ifp
->if_xname
, sizeof(ifan
.ifan_name
));
883 ifan
.ifan_what
= what
;
884 return rt_msg1(type
, info
, &ifan
, sizeof(ifan
));
888 * This is called to generate routing socket messages indicating
889 * network interface arrival and departure.
892 rt_ifannouncemsg(struct ifnet
*ifp
, int what
)
895 struct rt_addrinfo info
;
897 if (route_cb
.any_count
== 0)
899 m
= rt_makeifannouncemsg(ifp
, RTM_IFANNOUNCE
, what
, &info
);
906 * This is called to generate routing socket messages indicating
907 * IEEE80211 wireless events.
908 * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way.
911 rt_ieee80211msg(struct ifnet
*ifp
, int what
, void *data
, size_t data_len
)
914 struct rt_addrinfo info
;
916 if (route_cb
.any_count
== 0)
918 m
= rt_makeifannouncemsg(ifp
, RTM_IEEE80211
, what
, &info
);
922 * Append the ieee80211 data. Try to stick it in the
923 * mbuf containing the ifannounce msg; otherwise allocate
924 * a new mbuf and append.
926 * NB: we assume m is a single mbuf.
928 if (data_len
> M_TRAILINGSPACE(m
)) {
929 struct mbuf
*n
= m_get(M_NOWAIT
, MT_DATA
);
934 (void)memcpy(mtod(n
, void *), data
, data_len
);
937 } else if (data_len
> 0) {
938 (void)memcpy(mtod(m
, uint8_t *) + m
->m_len
, data
, data_len
);
939 m
->m_len
+= data_len
;
941 if (m
->m_flags
& M_PKTHDR
)
942 m
->m_pkthdr
.len
+= data_len
;
943 mtod(m
, struct if_announcemsghdr
*)->ifan_msglen
+= data_len
;
948 * This is used in dumping the kernel table via sysctl().
951 sysctl_dumpentry(struct rtentry
*rt
, void *v
)
953 struct rt_walkarg
*w
= v
;
955 struct rt_addrinfo info
;
957 if (w
->w_op
== NET_RT_FLAGS
&& !(rt
->rt_flags
& w
->w_arg
))
959 memset(&info
, 0, sizeof(info
));
960 info
.rti_info
[RTAX_DST
] = rt_getkey(rt
);
961 info
.rti_info
[RTAX_GATEWAY
] = rt
->rt_gateway
;
962 info
.rti_info
[RTAX_NETMASK
] = rt_mask(rt
);
964 const struct ifaddr
*rtifa
;
965 info
.rti_info
[RTAX_IFP
] = rt
->rt_ifp
->if_dl
->ifa_addr
;
966 /* rtifa used to be simply rt->rt_ifa. If rt->rt_ifa != NULL,
967 * then rt_get_ifa() != NULL. So this ought to still be safe.
970 rtifa
= rt_get_ifa(rt
);
971 info
.rti_info
[RTAX_IFA
] = rtifa
->ifa_addr
;
972 if (rt
->rt_ifp
->if_flags
& IFF_POINTOPOINT
)
973 info
.rti_info
[RTAX_BRD
] = rtifa
->ifa_dstaddr
;
975 if ((error
= rt_msg2(RTM_GET
, &info
, 0, w
, &size
)))
977 if (w
->w_where
&& w
->w_tmem
&& w
->w_needed
<= 0) {
978 struct rt_msghdr
*rtm
= (struct rt_msghdr
*)w
->w_tmem
;
980 rtm
->rtm_flags
= rt
->rt_flags
;
981 rtm
->rtm_use
= rt
->rt_use
;
982 cvtmetrics(&rtm
->rtm_rmx
, &rt
->rt_rmx
);
983 KASSERT(rt
->rt_ifp
!= NULL
);
984 rtm
->rtm_index
= rt
->rt_ifp
->if_index
;
985 rtm
->rtm_errno
= rtm
->rtm_pid
= rtm
->rtm_seq
= 0;
986 rtm
->rtm_addrs
= info
.rti_addrs
;
987 if ((error
= copyout(rtm
, w
->w_where
, size
)) != 0)
990 w
->w_where
= (char *)w
->w_where
+ size
;
996 sysctl_iflist(int af
, struct rt_walkarg
*w
, int type
)
1000 struct rt_addrinfo info
;
1003 memset(&info
, 0, sizeof(info
));
1004 IFNET_FOREACH(ifp
) {
1005 if (w
->w_arg
&& w
->w_arg
!= ifp
->if_index
)
1007 if (IFADDR_EMPTY(ifp
))
1009 info
.rti_info
[RTAX_IFP
] = ifp
->if_dl
->ifa_addr
;
1012 error
= rt_msg2(RTM_IFINFO
, &info
, NULL
, w
, &len
);
1015 case NET_RT_OOIFLIST
:
1016 error
= rt_msg2(RTM_OOIFINFO
, &info
, NULL
, w
, &len
);
1020 case NET_RT_OIFLIST
:
1021 error
= rt_msg2(RTM_OIFINFO
, &info
, NULL
, w
, &len
);
1025 panic("sysctl_iflist(1)");
1029 info
.rti_info
[RTAX_IFP
] = NULL
;
1030 if (w
->w_where
&& w
->w_tmem
&& w
->w_needed
<= 0) {
1032 case NET_RT_IFLIST
: {
1033 struct if_msghdr
*ifm
;
1035 ifm
= (struct if_msghdr
*)w
->w_tmem
;
1036 ifm
->ifm_index
= ifp
->if_index
;
1037 ifm
->ifm_flags
= ifp
->if_flags
;
1038 ifm
->ifm_data
= ifp
->if_data
;
1039 ifm
->ifm_addrs
= info
.rti_addrs
;
1040 error
= copyout(ifm
, w
->w_where
, len
);
1043 w
->w_where
= (char *)w
->w_where
+ len
;
1048 case NET_RT_OOIFLIST
:
1049 error
= compat_14_iflist(ifp
, w
, &info
, len
);
1055 case NET_RT_OIFLIST
:
1056 error
= compat_50_iflist(ifp
, w
, &info
, len
);
1062 panic("sysctl_iflist(2)");
1065 IFADDR_FOREACH(ifa
, ifp
) {
1066 if (af
&& af
!= ifa
->ifa_addr
->sa_family
)
1068 info
.rti_info
[RTAX_IFA
] = ifa
->ifa_addr
;
1069 info
.rti_info
[RTAX_NETMASK
] = ifa
->ifa_netmask
;
1070 info
.rti_info
[RTAX_BRD
] = ifa
->ifa_dstaddr
;
1071 if ((error
= rt_msg2(RTM_NEWADDR
, &info
, 0, w
, &len
)))
1073 if (w
->w_where
&& w
->w_tmem
&& w
->w_needed
<= 0) {
1074 struct ifa_msghdr
*ifam
;
1076 ifam
= (struct ifa_msghdr
*)w
->w_tmem
;
1077 ifam
->ifam_index
= ifa
->ifa_ifp
->if_index
;
1078 ifam
->ifam_flags
= ifa
->ifa_flags
;
1079 ifam
->ifam_metric
= ifa
->ifa_metric
;
1080 ifam
->ifam_addrs
= info
.rti_addrs
;
1081 error
= copyout(w
->w_tmem
, w
->w_where
, len
);
1084 w
->w_where
= (char *)w
->w_where
+ len
;
1087 info
.rti_info
[RTAX_IFA
] = info
.rti_info
[RTAX_NETMASK
] =
1088 info
.rti_info
[RTAX_BRD
] = NULL
;
1094 sysctl_rtable(SYSCTLFN_ARGS
)
1097 size_t *given
= oldlenp
;
1098 const void *new = newp
;
1099 int i
, s
, error
= EINVAL
;
1101 struct rt_walkarg w
;
1103 if (namelen
== 1 && name
[0] == CTL_QUERY
)
1104 return sysctl_query(SYSCTLFN_CALL(rnode
));
1115 /* we may return here if a later [re]alloc of the t_mem buffer fails */
1116 if (w
.w_tmemneeded
) {
1117 w
.w_tmem
= malloc(w
.w_tmemneeded
, M_RTABLE
, M_WAITOK
);
1118 w
.w_tmemsize
= w
.w_tmemneeded
;
1124 w
.w_needed
= 0 - w
.w_given
;
1132 for (i
= 1; i
<= AF_MAX
; i
++)
1133 if ((af
== 0 || af
== i
) &&
1134 (error
= rt_walktree(i
, sysctl_dumpentry
, &w
)))
1139 case NET_RT_OOIFLIST
:
1140 error
= sysctl_iflist(af
, &w
, w
.w_op
);
1144 case NET_RT_OIFLIST
:
1145 error
= sysctl_iflist(af
, &w
, w
.w_op
);
1150 error
= sysctl_iflist(af
, &w
, w
.w_op
);
1154 /* check to see if we couldn't allocate memory with NOWAIT */
1155 if (error
== ENOBUFS
&& w
.w_tmem
== 0 && w
.w_tmemneeded
)
1159 free(w
.w_tmem
, M_RTABLE
);
1160 w
.w_needed
+= w
.w_given
;
1162 *given
= (char *)w
.w_where
- (char *)where
;
1163 if (*given
< w
.w_needed
)
1166 *given
= (11 * w
.w_needed
) / 10;
1172 * Routing message software interrupt routine
1175 route_intr(void *cookie
)
1177 struct sockproto proto
= { .sp_family
= PF_ROUTE
, };
1181 mutex_enter(softnet_lock
);
1182 KERNEL_LOCK(1, NULL
);
1183 while (!IF_IS_EMPTY(&route_intrq
)) {
1185 IF_DEQUEUE(&route_intrq
, m
);
1189 proto
.sp_protocol
= M_GETCTX(m
, uintptr_t);
1190 raw_input(m
, &proto
, &route_src
, &route_dst
);
1192 KERNEL_UNLOCK_ONE(NULL
);
1193 mutex_exit(softnet_lock
);
1197 * Enqueue a message to the software interrupt routine.
1200 route_enqueue(struct mbuf
*m
, int family
)
1205 if (IF_QFULL(&route_intrq
)) {
1206 IF_DROP(&route_intrq
);
1209 wasempty
= IF_IS_EMPTY(&route_intrq
);
1210 M_SETCTX(m
, (uintptr_t)family
);
1211 IF_ENQUEUE(&route_intrq
, m
);
1213 softint_schedule(route_sih
);
1222 sysctl_net_route_setup(NULL
);
1223 route_intrq
.ifq_maxlen
= route_maxqlen
;
1224 route_sih
= softint_establish(SOFTINT_NET
| SOFTINT_MPSAFE
,
1229 * Definitions of protocols supported in the ROUTE domain.
1231 PR_WRAP_USRREQ(route_usrreq
)
1232 #define route_usrreq route_usrreq_wrapper
1234 const struct protosw routesw
[] = {
1236 .pr_type
= SOCK_RAW
,
1237 .pr_domain
= &routedomain
,
1238 .pr_flags
= PR_ATOMIC
|PR_ADDR
,
1239 .pr_input
= raw_input
,
1240 .pr_output
= route_output
,
1241 .pr_ctlinput
= raw_ctlinput
,
1242 .pr_usrreq
= route_usrreq
,
1243 .pr_init
= raw_init
,
1247 struct domain routedomain
= {
1248 .dom_family
= PF_ROUTE
,
1249 .dom_name
= "route",
1250 .dom_init
= route_init
,
1251 .dom_protosw
= routesw
,
1252 .dom_protoswNPROTOSW
= &routesw
[__arraycount(routesw
)],
1256 sysctl_net_route_setup(struct sysctllog
**clog
)
1258 const struct sysctlnode
*rnode
= NULL
;
1260 sysctl_createv(clog
, 0, NULL
, NULL
,
1262 CTLTYPE_NODE
, "net", NULL
,
1266 sysctl_createv(clog
, 0, NULL
, &rnode
,
1268 CTLTYPE_NODE
, "route",
1269 SYSCTL_DESCR("PF_ROUTE information"),
1271 CTL_NET
, PF_ROUTE
, CTL_EOL
);
1272 sysctl_createv(clog
, 0, NULL
, NULL
,
1274 CTLTYPE_NODE
, "rtable",
1275 SYSCTL_DESCR("Routing table information"),
1276 sysctl_rtable
, 0, NULL
, 0,
1277 CTL_NET
, PF_ROUTE
, 0 /* any protocol */, CTL_EOL
);
1278 sysctl_createv(clog
, 0, &rnode
, NULL
,
1280 CTLTYPE_STRUCT
, "stats",
1281 SYSCTL_DESCR("Routing statistics"),
1282 NULL
, 0, &rtstat
, sizeof(rtstat
),
1283 CTL_CREATE
, CTL_EOL
);