1 /* $NetBSD: in_selsrc.c,v 1.7 2009/08/30 02:03:58 dyoung Exp $ */
4 * Copyright (c) 2005 David Young. All rights reserved.
6 * This code was written by David Young.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY DAVID YOUNG ``AS IS'' AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
20 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
21 * FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #include <sys/cdefs.h>
32 __KERNEL_RCSID(0, "$NetBSD: in_selsrc.c,v 1.7 2009/08/30 02:03:58 dyoung Exp $");
35 #include "opt_inet_conf.h"
37 #include <lib/libkern/libkern.h>
39 #include <sys/param.h>
40 #include <sys/ioctl.h>
41 #include <sys/errno.h>
42 #include <sys/malloc.h>
43 #include <sys/socket.h>
44 #include <sys/socketvar.h>
45 #include <sys/sysctl.h>
46 #include <sys/systm.h>
48 #include <sys/syslog.h>
52 #include <net/if_ether.h>
54 #include <netinet/in_systm.h>
55 #include <netinet/in.h>
56 #include <netinet/in_var.h>
57 #include <netinet/ip.h>
58 #include <netinet/ip_var.h>
59 #include <netinet/in_ifattach.h>
60 #include <netinet/in_pcb.h>
61 #include <netinet/if_inarp.h>
62 #include <netinet/ip_mroute.h>
63 #include <netinet/igmp_var.h>
64 #include <netinet/in_selsrc.h>
67 struct score_src_name
{
69 const in_score_src_t sn_score_src
;
72 static const struct sysctlnode
*in_domifattach_sysctl(struct in_ifsysctl
*);
73 static int in_preference(const struct in_addr
*, int, int,
74 const struct in_addr
*);
75 static int in_index(const struct in_addr
*, int, int, const struct in_addr
*);
76 static int in_matchlen(const struct in_addr
*, int, int,
77 const struct in_addr
*);
78 static int in_match_category(const struct in_addr
*, int, int,
79 const struct in_addr
*);
80 static size_t in_get_selectsrc(const struct in_ifselsrc
*, char *,
82 static int in_set_selectsrc(struct in_ifselsrc
*, char *buf
);
83 static int in_sysctl_selectsrc(SYSCTLFN_PROTO
);
84 static in_score_src_t
name_to_score_src(const char *);
85 static const char *score_src_to_name(const in_score_src_t
);
86 static void in_score(const in_score_src_t
*, int *, int *,
87 const struct in_addr
*, int, int, const struct in_addr
*);
89 static const struct score_src_name score_src_names
[] = {
90 {"same-category", in_match_category
}
91 , {"common-prefix-len", in_matchlen
}
93 , {"preference", in_preference
}
97 static const struct in_ifselsrc initial_iss
= { 0, {NULL
} };
99 static struct in_ifselsrc default_iss
= { 0, {in_index
} };
102 int in_selsrc_debug
= 0;
103 #endif /* GETIFA_DEBUG */
105 SYSCTL_SETUP(sysctl_selectsrc_setup
, "sysctl selectsrc subtree setup")
108 const struct sysctlnode
*rnode
, *cnode
;
110 if ((rc
= sysctl_createv(clog
, 0, NULL
, &rnode
,
111 CTLFLAG_PERMANENT
, CTLTYPE_NODE
, "net",
112 NULL
, NULL
, 0, NULL
, 0, CTL_NET
, CTL_EOL
)) != 0) {
113 printf("%s: could not create net, rc = %d\n", __func__
, rc
);
116 if ((rc
= sysctl_createv(clog
, 0, NULL
, &rnode
,
117 CTLFLAG_PERMANENT
, CTLTYPE_NODE
, "inet",
118 NULL
, NULL
, 0, NULL
, 0, CTL_NET
, PF_INET
, CTL_EOL
)) != 0) {
119 printf("%s: could not create net.inet, rc = %d\n", __func__
,
123 if ((rc
= sysctl_createv(clog
, 0, NULL
, &rnode
,
124 CTLFLAG_PERMANENT
, CTLTYPE_NODE
, "ip",
125 NULL
, NULL
, 0, NULL
, 0,
126 CTL_NET
, PF_INET
, IPPROTO_IP
, CTL_EOL
)) != 0) {
127 printf("%s: could not create net.inet.ip, rc = %d\n", __func__
,
131 if ((rc
= sysctl_createv(clog
, 0, NULL
, &rnode
,
132 CTLFLAG_PERMANENT
, CTLTYPE_NODE
, "selectsrc",
133 NULL
, NULL
, 0, NULL
, 0,
134 CTL_NET
, PF_INET
, IPPROTO_IP
, CTL_CREATE
, CTL_EOL
)) != 0) {
135 printf("%s: could not create net.inet.ip.selectsrc, "
136 "rc = %d\n", __func__
, rc
);
140 if ((rc
= sysctl_createv(clog
, 0, &rnode
, &cnode
,
141 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
, CTLTYPE_INT
, "debug",
142 SYSCTL_DESCR("enable source-selection debug messages"),
143 NULL
, 0, &in_selsrc_debug
, 0, CTL_CREATE
, CTL_EOL
)) != 0) {
144 printf("%s: could not create net.inet.ip.selectsrc.debug, "
145 "rc = %d\n", __func__
, rc
);
148 #endif /* GETIFA_DEBUG */
149 if ((rc
= sysctl_createv(clog
, 0, &rnode
, &cnode
,
150 CTLFLAG_READWRITE
, CTLTYPE_STRING
, "default",
151 SYSCTL_DESCR("default source selection policy"),
152 in_sysctl_selectsrc
, 0, &default_iss
, IN_SELECTSRC_LEN
,
153 CTL_CREATE
, CTL_EOL
)) != 0) {
155 "%s: could not create net.inet.ip.selectsrc.default (%d)\n",
162 * Score by address preference: prefer addresses with higher preference
163 * number. Preference numbers are assigned with ioctl SIOCSIFADDRPREF.
166 in_preference(const struct in_addr
*src
, int preference
,
167 int idx
, const struct in_addr
*dst
)
173 * Score by address "index": prefer addresses nearer the head of
177 in_index(const struct in_addr
*src
, int preference
, int idx
,
178 const struct in_addr
*dst
)
184 * Length of longest common prefix of src and dst.
186 * (Derived from in6_matchlen.)
189 in_matchlen(const struct in_addr
*src
, int preference
,
190 int idx
, const struct in_addr
*dst
)
193 const uint8_t *s
= (const uint8_t *)src
, *d
= (const uint8_t *)dst
;
194 const uint8_t *lim
= s
+ 4;
197 while (s
< lim
&& (r
= (*d
++ ^ *s
++)) == 0)
203 while ((r
& 0x80) == 0) {
210 static enum in_category
211 in_categorize(const struct in_addr
*s
)
213 if (IN_ANY_LOCAL(s
->s_addr
))
214 return IN_CATEGORY_LINKLOCAL
;
215 else if (IN_PRIVATE(s
->s_addr
))
216 return IN_CATEGORY_PRIVATE
;
218 return IN_CATEGORY_OTHER
;
222 in_match_category(const struct in_addr
*src
, int preference
,
223 int idx
, const struct in_addr
*dst
)
225 enum in_category dst_c
= in_categorize(dst
),
226 src_c
= in_categorize(src
);
228 if (in_selsrc_debug
) {
229 printf("%s: dst %#08" PRIx32
" categ %d, src %#08" PRIx32
230 " categ %d\n", __func__
, ntohl(dst
->s_addr
), dst_c
,
231 ntohl(src
->s_addr
), src_c
);
233 #endif /* GETIFA_DEBUG */
237 else if (dst_c
== IN_CATEGORY_LINKLOCAL
&& src_c
== IN_CATEGORY_PRIVATE
)
239 else if (dst_c
== IN_CATEGORY_PRIVATE
&& src_c
== IN_CATEGORY_LINKLOCAL
)
241 else if (dst_c
== IN_CATEGORY_OTHER
&& src_c
== IN_CATEGORY_PRIVATE
)
248 in_score(const in_score_src_t
*score_src
, int *score
, int *scorelenp
,
249 const struct in_addr
*src
, int preference
, int idx
,
250 const struct in_addr
*dst
)
254 for (i
= 0; i
< IN_SCORE_SRC_MAX
&& score_src
[i
] != NULL
; i
++)
255 score
[i
] = (*score_src
[i
])(src
, preference
, idx
, dst
);
256 if (scorelenp
!= NULL
)
261 in_score_cmp(int *score1
, int *score2
, int scorelen
)
265 for (i
= 0; i
< scorelen
; i
++) {
266 if (score1
[i
] == score2
[i
])
268 return score1
[i
] - score2
[i
];
275 in_score_println(int *score
, int scorelen
)
278 const char *delim
= "[";
280 for (i
= 0; i
< scorelen
; i
++) {
281 printf("%s%d", delim
, score
[i
]);
286 #endif /* GETIFA_DEBUG */
288 /* Scan the interface addresses on the interface ifa->ifa_ifp for
289 * the source address that best matches the destination, dst0,
290 * according to the source address-selection policy for this
291 * interface. If there is no better match than `ifa', return `ifa'.
292 * Otherwise, return the best address.
294 * Note that in_getifa is called after the kernel has decided which
295 * output interface to use (ifa->ifa_ifp), and in_getifa will not
296 * scan an address belonging to any other interface.
299 in_getifa(struct ifaddr
*ifa
, const struct sockaddr
*dst0
)
301 const in_score_src_t
*score_src
;
303 const struct sockaddr_in
*dst
, *src
;
304 struct ifaddr
*alt_ifa
, *best_ifa
;
306 struct in_ifsysctl
*isc
;
307 struct in_ifselsrc
*iss
;
308 int best_score
[IN_SCORE_SRC_MAX
], score
[IN_SCORE_SRC_MAX
];
310 if (ifa
->ifa_addr
->sa_family
!= AF_INET
||
311 dst0
== NULL
|| dst0
->sa_family
!= AF_INET
) { /* Possible. */
312 ifa
->ifa_seqno
= NULL
;
317 isc
= (struct in_ifsysctl
*)ifp
->if_afdata
[AF_INET
];
318 if (isc
!= NULL
&& isc
->isc_selsrc
!= NULL
&&
319 isc
->isc_selsrc
->iss_score_src
[0] != NULL
)
320 iss
= isc
->isc_selsrc
;
323 score_src
= &iss
->iss_score_src
[0];
325 dst
= (const struct sockaddr_in
*)dst0
;
329 /* Find out the index of this ifaddr. */
331 IFADDR_FOREACH(alt_ifa
, ifa
->ifa_ifp
) {
332 if (alt_ifa
== best_ifa
)
336 in_score(score_src
, best_score
, &scorelen
, &IA_SIN(best_ifa
)->sin_addr
,
337 best_ifa
->ifa_preference
, idx
, &dst
->sin_addr
);
340 if (in_selsrc_debug
) {
341 printf("%s: enter dst %#" PRIx32
" src %#" PRIx32
" score ",
342 __func__
, ntohl(dst
->sin_addr
.s_addr
),
343 ntohl(satosin(best_ifa
->ifa_addr
)->sin_addr
.s_addr
));
344 in_score_println(best_score
, scorelen
);
346 #endif /* GETIFA_DEBUG */
349 IFADDR_FOREACH(alt_ifa
, ifa
->ifa_ifp
) {
351 src
= IA_SIN(alt_ifa
);
353 if (alt_ifa
== ifa
|| src
->sin_family
!= AF_INET
)
356 in_score(score_src
, score
, NULL
, &src
->sin_addr
,
357 alt_ifa
->ifa_preference
, idx
, &dst
->sin_addr
);
360 if (in_selsrc_debug
) {
361 printf("%s: src %#" PRIx32
" score ", __func__
,
362 ntohl(src
->sin_addr
.s_addr
));
363 in_score_println(score
, scorelen
);
365 #endif /* GETIFA_DEBUG */
367 if (in_score_cmp(score
, best_score
, scorelen
) > 0) {
368 (void)memcpy(best_score
, score
, sizeof(best_score
));
373 if (in_selsrc_debug
) {
374 printf("%s: choose src %#" PRIx32
" score ", __func__
,
375 ntohl(IA_SIN(best_ifa
)->sin_addr
.s_addr
));
376 in_score_println(best_score
, scorelen
);
378 #endif /* GETIFA_DEBUG */
380 best_ifa
->ifa_seqno
= &iss
->iss_seqno
;
384 static in_score_src_t
385 name_to_score_src(const char *name
)
389 for (i
= 0; score_src_names
[i
].sn_name
!= NULL
; i
++) {
390 if (strcmp(score_src_names
[i
].sn_name
, name
) == 0)
391 return score_src_names
[i
].sn_score_src
;
397 score_src_to_name(const in_score_src_t score_src
)
400 for (i
= 0; score_src_names
[i
].sn_name
!= NULL
; i
++) {
401 if (score_src
== score_src_names
[i
].sn_score_src
)
402 return score_src_names
[i
].sn_name
;
408 in_get_selectsrc(const struct in_ifselsrc
*iss
, char *buf0
,
409 const size_t buflen0
)
414 size_t buflen
= buflen0
;
416 KASSERT(buflen
>= 1);
418 for (delim
= "", i
= 0;
419 i
< IN_SCORE_SRC_MAX
&& iss
->iss_score_src
[i
] != NULL
;
421 rc
= snprintf(buf
, buflen
, "%s%s",
422 delim
, score_src_to_name(iss
->iss_score_src
[i
]));
424 return buflen0
- buflen
;
426 return buflen0
+ rc
- buflen
;
436 in_set_selectsrc(struct in_ifselsrc
*iss
, char *buf
)
441 in_score_src_t score_src
;
442 in_score_src_t scorers
[IN_SCORE_SRC_MAX
];
444 memset(&scorers
, 0, sizeof(scorers
));
446 (name
= strsep(&next
, ",")) != NULL
&& i
< IN_SCORE_SRC_MAX
;
448 if (strcmp(name
, "") == 0)
450 if ((score_src
= name_to_score_src(name
)) == NULL
)
452 scorers
[i
] = score_src
;
454 if (i
== IN_SCORE_SRC_MAX
&& name
!= NULL
)
457 (void)memcpy(iss
->iss_score_src
, scorers
, sizeof(iss
->iss_score_src
));
458 /* If iss affects a specific interface that used to use
459 * the default policy, increase the sequence number on the
460 * default policy, forcing routes that cache a source
461 * (rt_ifa) found by the default policy to refresh their
464 if (iss
!= &default_iss
&& iss
->iss_score_src
[0] == NULL
&&
466 default_iss
.iss_seqno
++;
473 * sysctl helper routine for net.inet.ip.interfaces.<interface>.selectsrc.
474 * Pulls the old value out as a human-readable string, interprets
475 * and records the new value.
478 in_sysctl_selectsrc(SYSCTLFN_ARGS
)
480 char policy
[IN_SELECTSRC_LEN
];
482 struct sysctlnode node
;
483 struct in_ifselsrc
*iss
;
486 iss
= (struct in_ifselsrc
*)node
.sysctl_data
;
488 (error
= in_get_selectsrc(iss
, policy
, sizeof(policy
))) >= sizeof(policy
))
490 node
.sysctl_data
= &policy
[0];
491 error
= sysctl_lookup(SYSCTLFN_CALL(&node
));
492 if (error
|| newp
== NULL
)
495 return in_set_selectsrc(iss
, policy
);
498 static const struct sysctlnode
*
499 in_domifattach_sysctl(struct in_ifsysctl
*isc
)
502 const struct sysctlnode
*rnode
;
504 if ((rc
= sysctl_createv(&isc
->isc_log
, 0, NULL
, &rnode
,
505 CTLFLAG_READONLY
, CTLTYPE_NODE
,
508 CTL_NET
, PF_INET
, IPPROTO_IP
, CTL_CREATE
,
510 printf("%s: could not create net.inet.ip.interfaces, rc = %d\n",
514 if ((rc
= sysctl_createv(&isc
->isc_log
, 0, &rnode
, &rnode
,
515 CTLFLAG_READONLY
, CTLTYPE_NODE
,
516 isc
->isc_ifp
->if_xname
,
517 SYSCTL_DESCR("interface ip options"),
518 NULL
, 0, NULL
, 0, CTL_CREATE
, CTL_EOL
)) != 0) {
519 printf("%s: could not create net.inet.ip.interfaces.%s, "
520 "rc = %d\n", __func__
, isc
->isc_ifp
->if_xname
, rc
);
523 if ((rc
= sysctl_createv(&isc
->isc_log
, 0, &rnode
, &rnode
,
524 CTLFLAG_READWRITE
, CTLTYPE_STRING
,
526 SYSCTL_DESCR("source selection policy"),
527 in_sysctl_selectsrc
, 0,
528 isc
->isc_selsrc
, IN_SELECTSRC_LEN
,
529 CTL_CREATE
, CTL_EOL
)) != 0) {
531 "%s: could not create net.inet.ip.%s.selectsrc, rc = %d\n",
532 __func__
, isc
->isc_ifp
->if_xname
, rc
);
537 sysctl_teardown(&isc
->isc_log
);
542 in_domifattach(struct ifnet
*ifp
)
544 struct in_ifsysctl
*isc
;
545 struct in_ifselsrc
*iss
;
547 isc
= (struct in_ifsysctl
*)malloc(sizeof(*isc
), M_IFADDR
,
550 iss
= (struct in_ifselsrc
*)malloc(sizeof(*iss
), M_IFADDR
,
553 memcpy(&iss
->iss_score_src
[0], &initial_iss
.iss_score_src
[0],
554 MIN(sizeof(iss
->iss_score_src
), sizeof(initial_iss
.iss_score_src
)));
557 isc
->isc_selsrc
= iss
;
559 if (in_domifattach_sysctl(isc
) == NULL
)
570 in_domifdetach(struct ifnet
*ifp
, void *aux
)
572 struct in_ifsysctl
*isc
;
573 struct in_ifselsrc
*iss
;
577 isc
= (struct in_ifsysctl
*)aux
;
578 iss
= isc
->isc_selsrc
;
579 sysctl_teardown(&isc
->isc_log
);