Sync usage with man page.
[netbsd-mini2440.git] / sys / netinet / in_selsrc.c
blob6c68dc7293edc151d012913aca012914c06aa172
1 /* $NetBSD: in_selsrc.c,v 1.7 2009/08/30 02:03:58 dyoung Exp $ */
3 /*-
4 * Copyright (c) 2005 David Young. All rights reserved.
6 * This code was written by David Young.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY DAVID YOUNG ``AS IS'' AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
20 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
21 * FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #include <sys/cdefs.h>
32 __KERNEL_RCSID(0, "$NetBSD: in_selsrc.c,v 1.7 2009/08/30 02:03:58 dyoung Exp $");
34 #include "opt_inet.h"
35 #include "opt_inet_conf.h"
37 #include <lib/libkern/libkern.h>
39 #include <sys/param.h>
40 #include <sys/ioctl.h>
41 #include <sys/errno.h>
42 #include <sys/malloc.h>
43 #include <sys/socket.h>
44 #include <sys/socketvar.h>
45 #include <sys/sysctl.h>
46 #include <sys/systm.h>
47 #include <sys/proc.h>
48 #include <sys/syslog.h>
50 #include <net/if.h>
52 #include <net/if_ether.h>
54 #include <netinet/in_systm.h>
55 #include <netinet/in.h>
56 #include <netinet/in_var.h>
57 #include <netinet/ip.h>
58 #include <netinet/ip_var.h>
59 #include <netinet/in_ifattach.h>
60 #include <netinet/in_pcb.h>
61 #include <netinet/if_inarp.h>
62 #include <netinet/ip_mroute.h>
63 #include <netinet/igmp_var.h>
64 #include <netinet/in_selsrc.h>
66 #ifdef INET
67 struct score_src_name {
68 const char *sn_name;
69 const in_score_src_t sn_score_src;
72 static const struct sysctlnode *in_domifattach_sysctl(struct in_ifsysctl *);
73 static int in_preference(const struct in_addr *, int, int,
74 const struct in_addr *);
75 static int in_index(const struct in_addr *, int, int, const struct in_addr *);
76 static int in_matchlen(const struct in_addr *, int, int,
77 const struct in_addr *);
78 static int in_match_category(const struct in_addr *, int, int,
79 const struct in_addr *);
80 static size_t in_get_selectsrc(const struct in_ifselsrc *, char *,
81 const size_t);
82 static int in_set_selectsrc(struct in_ifselsrc *, char *buf);
83 static int in_sysctl_selectsrc(SYSCTLFN_PROTO);
84 static in_score_src_t name_to_score_src(const char *);
85 static const char *score_src_to_name(const in_score_src_t);
86 static void in_score(const in_score_src_t *, int *, int *,
87 const struct in_addr *, int, int, const struct in_addr *);
89 static const struct score_src_name score_src_names[] = {
90 {"same-category", in_match_category}
91 , {"common-prefix-len", in_matchlen}
92 , {"index", in_index}
93 , {"preference", in_preference}
94 , {NULL, NULL}
97 static const struct in_ifselsrc initial_iss = { 0, {NULL} };
99 static struct in_ifselsrc default_iss = { 0, {in_index} };
101 #ifdef GETIFA_DEBUG
102 int in_selsrc_debug = 0;
103 #endif /* GETIFA_DEBUG */
105 SYSCTL_SETUP(sysctl_selectsrc_setup, "sysctl selectsrc subtree setup")
107 int rc;
108 const struct sysctlnode *rnode, *cnode;
110 if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
111 CTLFLAG_PERMANENT, CTLTYPE_NODE, "net",
112 NULL, NULL, 0, NULL, 0, CTL_NET, CTL_EOL)) != 0) {
113 printf("%s: could not create net, rc = %d\n", __func__, rc);
114 return;
116 if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
117 CTLFLAG_PERMANENT, CTLTYPE_NODE, "inet",
118 NULL, NULL, 0, NULL, 0, CTL_NET, PF_INET, CTL_EOL)) != 0) {
119 printf("%s: could not create net.inet, rc = %d\n", __func__,
120 rc);
121 return;
123 if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
124 CTLFLAG_PERMANENT, CTLTYPE_NODE, "ip",
125 NULL, NULL, 0, NULL, 0,
126 CTL_NET, PF_INET, IPPROTO_IP, CTL_EOL)) != 0) {
127 printf("%s: could not create net.inet.ip, rc = %d\n", __func__,
128 rc);
129 return;
131 if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
132 CTLFLAG_PERMANENT, CTLTYPE_NODE, "selectsrc",
133 NULL, NULL, 0, NULL, 0,
134 CTL_NET, PF_INET, IPPROTO_IP, CTL_CREATE, CTL_EOL)) != 0) {
135 printf("%s: could not create net.inet.ip.selectsrc, "
136 "rc = %d\n", __func__, rc);
137 return;
139 #ifdef GETIFA_DEBUG
140 if ((rc = sysctl_createv(clog, 0, &rnode, &cnode,
141 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "debug",
142 SYSCTL_DESCR("enable source-selection debug messages"),
143 NULL, 0, &in_selsrc_debug, 0, CTL_CREATE, CTL_EOL)) != 0) {
144 printf("%s: could not create net.inet.ip.selectsrc.debug, "
145 "rc = %d\n", __func__, rc);
146 return;
148 #endif /* GETIFA_DEBUG */
149 if ((rc = sysctl_createv(clog, 0, &rnode, &cnode,
150 CTLFLAG_READWRITE, CTLTYPE_STRING, "default",
151 SYSCTL_DESCR("default source selection policy"),
152 in_sysctl_selectsrc, 0, &default_iss, IN_SELECTSRC_LEN,
153 CTL_CREATE, CTL_EOL)) != 0) {
154 printf(
155 "%s: could not create net.inet.ip.selectsrc.default (%d)\n",
156 __func__, rc);
157 return;
162 * Score by address preference: prefer addresses with higher preference
163 * number. Preference numbers are assigned with ioctl SIOCSIFADDRPREF.
165 static int
166 in_preference(const struct in_addr *src, int preference,
167 int idx, const struct in_addr *dst)
169 return preference;
173 * Score by address "index": prefer addresses nearer the head of
174 * the ifaddr list.
176 static int
177 in_index(const struct in_addr *src, int preference, int idx,
178 const struct in_addr *dst)
180 return -idx;
184 * Length of longest common prefix of src and dst.
186 * (Derived from in6_matchlen.)
188 static int
189 in_matchlen(const struct in_addr *src, int preference,
190 int idx, const struct in_addr *dst)
192 int match = 0;
193 const uint8_t *s = (const uint8_t *)src, *d = (const uint8_t *)dst;
194 const uint8_t *lim = s + 4;
195 uint_fast8_t r = 0;
197 while (s < lim && (r = (*d++ ^ *s++)) == 0)
198 match += 8;
200 if (s == lim)
201 return match;
203 while ((r & 0x80) == 0) {
204 match++;
205 r <<= 1;
207 return match;
210 static enum in_category
211 in_categorize(const struct in_addr *s)
213 if (IN_ANY_LOCAL(s->s_addr))
214 return IN_CATEGORY_LINKLOCAL;
215 else if (IN_PRIVATE(s->s_addr))
216 return IN_CATEGORY_PRIVATE;
217 else
218 return IN_CATEGORY_OTHER;
221 static int
222 in_match_category(const struct in_addr *src, int preference,
223 int idx, const struct in_addr *dst)
225 enum in_category dst_c = in_categorize(dst),
226 src_c = in_categorize(src);
227 #ifdef GETIFA_DEBUG
228 if (in_selsrc_debug) {
229 printf("%s: dst %#08" PRIx32 " categ %d, src %#08" PRIx32
230 " categ %d\n", __func__, ntohl(dst->s_addr), dst_c,
231 ntohl(src->s_addr), src_c);
233 #endif /* GETIFA_DEBUG */
235 if (dst_c == src_c)
236 return 2;
237 else if (dst_c == IN_CATEGORY_LINKLOCAL && src_c == IN_CATEGORY_PRIVATE)
238 return 1;
239 else if (dst_c == IN_CATEGORY_PRIVATE && src_c == IN_CATEGORY_LINKLOCAL)
240 return 1;
241 else if (dst_c == IN_CATEGORY_OTHER && src_c == IN_CATEGORY_PRIVATE)
242 return 1;
243 else
244 return 0;
247 static void
248 in_score(const in_score_src_t *score_src, int *score, int *scorelenp,
249 const struct in_addr *src, int preference, int idx,
250 const struct in_addr *dst)
252 int i;
254 for (i = 0; i < IN_SCORE_SRC_MAX && score_src[i] != NULL; i++)
255 score[i] = (*score_src[i])(src, preference, idx, dst);
256 if (scorelenp != NULL)
257 *scorelenp = i;
260 static int
261 in_score_cmp(int *score1, int *score2, int scorelen)
263 int i;
265 for (i = 0; i < scorelen; i++) {
266 if (score1[i] == score2[i])
267 continue;
268 return score1[i] - score2[i];
270 return 0;
273 #ifdef GETIFA_DEBUG
274 static void
275 in_score_println(int *score, int scorelen)
277 int i;
278 const char *delim = "[";
280 for (i = 0; i < scorelen; i++) {
281 printf("%s%d", delim, score[i]);
282 delim = ", ";
284 printf("]\n");
286 #endif /* GETIFA_DEBUG */
288 /* Scan the interface addresses on the interface ifa->ifa_ifp for
289 * the source address that best matches the destination, dst0,
290 * according to the source address-selection policy for this
291 * interface. If there is no better match than `ifa', return `ifa'.
292 * Otherwise, return the best address.
294 * Note that in_getifa is called after the kernel has decided which
295 * output interface to use (ifa->ifa_ifp), and in_getifa will not
296 * scan an address belonging to any other interface.
298 struct ifaddr *
299 in_getifa(struct ifaddr *ifa, const struct sockaddr *dst0)
301 const in_score_src_t *score_src;
302 int idx, scorelen;
303 const struct sockaddr_in *dst, *src;
304 struct ifaddr *alt_ifa, *best_ifa;
305 struct ifnet *ifp;
306 struct in_ifsysctl *isc;
307 struct in_ifselsrc *iss;
308 int best_score[IN_SCORE_SRC_MAX], score[IN_SCORE_SRC_MAX];
310 if (ifa->ifa_addr->sa_family != AF_INET ||
311 dst0 == NULL || dst0->sa_family != AF_INET) { /* Possible. */
312 ifa->ifa_seqno = NULL;
313 return ifa;
316 ifp = ifa->ifa_ifp;
317 isc = (struct in_ifsysctl *)ifp->if_afdata[AF_INET];
318 if (isc != NULL && isc->isc_selsrc != NULL &&
319 isc->isc_selsrc->iss_score_src[0] != NULL)
320 iss = isc->isc_selsrc;
321 else
322 iss = &default_iss;
323 score_src = &iss->iss_score_src[0];
325 dst = (const struct sockaddr_in *)dst0;
327 best_ifa = ifa;
329 /* Find out the index of this ifaddr. */
330 idx = 0;
331 IFADDR_FOREACH(alt_ifa, ifa->ifa_ifp) {
332 if (alt_ifa == best_ifa)
333 break;
334 idx++;
336 in_score(score_src, best_score, &scorelen, &IA_SIN(best_ifa)->sin_addr,
337 best_ifa->ifa_preference, idx, &dst->sin_addr);
339 #ifdef GETIFA_DEBUG
340 if (in_selsrc_debug) {
341 printf("%s: enter dst %#" PRIx32 " src %#" PRIx32 " score ",
342 __func__, ntohl(dst->sin_addr.s_addr),
343 ntohl(satosin(best_ifa->ifa_addr)->sin_addr.s_addr));
344 in_score_println(best_score, scorelen);
346 #endif /* GETIFA_DEBUG */
348 idx = -1;
349 IFADDR_FOREACH(alt_ifa, ifa->ifa_ifp) {
350 ++idx;
351 src = IA_SIN(alt_ifa);
353 if (alt_ifa == ifa || src->sin_family != AF_INET)
354 continue;
356 in_score(score_src, score, NULL, &src->sin_addr,
357 alt_ifa->ifa_preference, idx, &dst->sin_addr);
359 #ifdef GETIFA_DEBUG
360 if (in_selsrc_debug) {
361 printf("%s: src %#" PRIx32 " score ", __func__,
362 ntohl(src->sin_addr.s_addr));
363 in_score_println(score, scorelen);
365 #endif /* GETIFA_DEBUG */
367 if (in_score_cmp(score, best_score, scorelen) > 0) {
368 (void)memcpy(best_score, score, sizeof(best_score));
369 best_ifa = alt_ifa;
372 #ifdef GETIFA_DEBUG
373 if (in_selsrc_debug) {
374 printf("%s: choose src %#" PRIx32 " score ", __func__,
375 ntohl(IA_SIN(best_ifa)->sin_addr.s_addr));
376 in_score_println(best_score, scorelen);
378 #endif /* GETIFA_DEBUG */
380 best_ifa->ifa_seqno = &iss->iss_seqno;
381 return best_ifa;
384 static in_score_src_t
385 name_to_score_src(const char *name)
387 int i;
389 for (i = 0; score_src_names[i].sn_name != NULL; i++) {
390 if (strcmp(score_src_names[i].sn_name, name) == 0)
391 return score_src_names[i].sn_score_src;
393 return NULL;
396 static const char *
397 score_src_to_name(const in_score_src_t score_src)
399 int i;
400 for (i = 0; score_src_names[i].sn_name != NULL; i++) {
401 if (score_src == score_src_names[i].sn_score_src)
402 return score_src_names[i].sn_name;
404 return "<unknown>";
407 static size_t
408 in_get_selectsrc(const struct in_ifselsrc *iss, char *buf0,
409 const size_t buflen0)
411 int i, rc;
412 char *buf = buf0;
413 const char *delim;
414 size_t buflen = buflen0;
416 KASSERT(buflen >= 1);
418 for (delim = "", i = 0;
419 i < IN_SCORE_SRC_MAX && iss->iss_score_src[i] != NULL;
420 delim = ",", i++) {
421 rc = snprintf(buf, buflen, "%s%s",
422 delim, score_src_to_name(iss->iss_score_src[i]));
423 if (rc == -1)
424 return buflen0 - buflen;
425 if (rc >= buflen)
426 return buflen0 + rc - buflen;
427 buf += rc;
428 buflen -= rc;
430 if (buf == buf0)
431 *buf++ = '\0';
432 return buf - buf0;
435 static int
436 in_set_selectsrc(struct in_ifselsrc *iss, char *buf)
438 int i, s;
439 char *next = buf;
440 const char *name;
441 in_score_src_t score_src;
442 in_score_src_t scorers[IN_SCORE_SRC_MAX];
444 memset(&scorers, 0, sizeof(scorers));
445 for (i = 0;
446 (name = strsep(&next, ",")) != NULL && i < IN_SCORE_SRC_MAX;
447 i++) {
448 if (strcmp(name, "") == 0)
449 break;
450 if ((score_src = name_to_score_src(name)) == NULL)
451 return EINVAL;
452 scorers[i] = score_src;
454 if (i == IN_SCORE_SRC_MAX && name != NULL)
455 return EFBIG;
456 s = splnet();
457 (void)memcpy(iss->iss_score_src, scorers, sizeof(iss->iss_score_src));
458 /* If iss affects a specific interface that used to use
459 * the default policy, increase the sequence number on the
460 * default policy, forcing routes that cache a source
461 * (rt_ifa) found by the default policy to refresh their
462 * cache.
464 if (iss != &default_iss && iss->iss_score_src[0] == NULL &&
465 scorers[0] != NULL)
466 default_iss.iss_seqno++;
467 iss->iss_seqno++;
468 splx(s);
469 return 0;
473 * sysctl helper routine for net.inet.ip.interfaces.<interface>.selectsrc.
474 * Pulls the old value out as a human-readable string, interprets
475 * and records the new value.
477 static int
478 in_sysctl_selectsrc(SYSCTLFN_ARGS)
480 char policy[IN_SELECTSRC_LEN];
481 int error;
482 struct sysctlnode node;
483 struct in_ifselsrc *iss;
485 node = *rnode;
486 iss = (struct in_ifselsrc *)node.sysctl_data;
487 if (oldp != NULL &&
488 (error = in_get_selectsrc(iss, policy, sizeof(policy))) >= sizeof(policy))
489 return error;
490 node.sysctl_data = &policy[0];
491 error = sysctl_lookup(SYSCTLFN_CALL(&node));
492 if (error || newp == NULL)
493 return (error);
495 return in_set_selectsrc(iss, policy);
498 static const struct sysctlnode *
499 in_domifattach_sysctl(struct in_ifsysctl *isc)
501 int rc;
502 const struct sysctlnode *rnode;
504 if ((rc = sysctl_createv(&isc->isc_log, 0, NULL, &rnode,
505 CTLFLAG_READONLY, CTLTYPE_NODE,
506 "interfaces", NULL,
507 NULL, 0, NULL, 0,
508 CTL_NET, PF_INET, IPPROTO_IP, CTL_CREATE,
509 CTL_EOL)) != 0) {
510 printf("%s: could not create net.inet.ip.interfaces, rc = %d\n",
511 __func__, rc);
512 return NULL;
514 if ((rc = sysctl_createv(&isc->isc_log, 0, &rnode, &rnode,
515 CTLFLAG_READONLY, CTLTYPE_NODE,
516 isc->isc_ifp->if_xname,
517 SYSCTL_DESCR("interface ip options"),
518 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL)) != 0) {
519 printf("%s: could not create net.inet.ip.interfaces.%s, "
520 "rc = %d\n", __func__, isc->isc_ifp->if_xname, rc);
521 goto err;
523 if ((rc = sysctl_createv(&isc->isc_log, 0, &rnode, &rnode,
524 CTLFLAG_READWRITE, CTLTYPE_STRING,
525 "selectsrc",
526 SYSCTL_DESCR("source selection policy"),
527 in_sysctl_selectsrc, 0,
528 isc->isc_selsrc, IN_SELECTSRC_LEN,
529 CTL_CREATE, CTL_EOL)) != 0) {
530 printf(
531 "%s: could not create net.inet.ip.%s.selectsrc, rc = %d\n",
532 __func__, isc->isc_ifp->if_xname, rc);
533 goto err;
535 return rnode;
536 err:
537 sysctl_teardown(&isc->isc_log);
538 return NULL;
541 void *
542 in_domifattach(struct ifnet *ifp)
544 struct in_ifsysctl *isc;
545 struct in_ifselsrc *iss;
547 isc = (struct in_ifsysctl *)malloc(sizeof(*isc), M_IFADDR,
548 M_WAITOK | M_ZERO);
550 iss = (struct in_ifselsrc *)malloc(sizeof(*iss), M_IFADDR,
551 M_WAITOK | M_ZERO);
553 memcpy(&iss->iss_score_src[0], &initial_iss.iss_score_src[0],
554 MIN(sizeof(iss->iss_score_src), sizeof(initial_iss.iss_score_src)));
556 isc->isc_ifp = ifp;
557 isc->isc_selsrc = iss;
559 if (in_domifattach_sysctl(isc) == NULL)
560 goto err;
562 return isc;
563 err:
564 free(iss, M_IFADDR);
565 free(isc, M_IFADDR);
566 return NULL;
569 void
570 in_domifdetach(struct ifnet *ifp, void *aux)
572 struct in_ifsysctl *isc;
573 struct in_ifselsrc *iss;
575 if (aux == NULL)
576 return;
577 isc = (struct in_ifsysctl *)aux;
578 iss = isc->isc_selsrc;
579 sysctl_teardown(&isc->isc_log);
580 free(isc, M_IFADDR);
581 free(iss, M_IFADDR);
583 #endif /* INET */