conn rcv_lock converted to spinlock, struct cor_sock created, kernel_packet skb_clone...
[cor_2_6_31.git] / net / ipv4 / fib_frontend.c
blobe2f950592566ac93267dbd20bc85473d4a759d48
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: FIB frontend.
8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 #include <linux/module.h>
17 #include <asm/uaccess.h>
18 #include <asm/system.h>
19 #include <linux/bitops.h>
20 #include <linux/capability.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/mm.h>
24 #include <linux/string.h>
25 #include <linux/socket.h>
26 #include <linux/sockios.h>
27 #include <linux/errno.h>
28 #include <linux/in.h>
29 #include <linux/inet.h>
30 #include <linux/inetdevice.h>
31 #include <linux/netdevice.h>
32 #include <linux/if_addr.h>
33 #include <linux/if_arp.h>
34 #include <linux/skbuff.h>
35 #include <linux/init.h>
36 #include <linux/list.h>
38 #include <net/ip.h>
39 #include <net/protocol.h>
40 #include <net/route.h>
41 #include <net/tcp.h>
42 #include <net/sock.h>
43 #include <net/arp.h>
44 #include <net/ip_fib.h>
45 #include <net/rtnetlink.h>
47 #ifndef CONFIG_IP_MULTIPLE_TABLES
49 static int __net_init fib4_rules_init(struct net *net)
51 struct fib_table *local_table, *main_table;
53 local_table = fib_hash_table(RT_TABLE_LOCAL);
54 if (local_table == NULL)
55 return -ENOMEM;
57 main_table = fib_hash_table(RT_TABLE_MAIN);
58 if (main_table == NULL)
59 goto fail;
61 hlist_add_head_rcu(&local_table->tb_hlist,
62 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
63 hlist_add_head_rcu(&main_table->tb_hlist,
64 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
65 return 0;
67 fail:
68 kfree(local_table);
69 return -ENOMEM;
71 #else
73 struct fib_table *fib_new_table(struct net *net, u32 id)
75 struct fib_table *tb;
76 unsigned int h;
78 if (id == 0)
79 id = RT_TABLE_MAIN;
80 tb = fib_get_table(net, id);
81 if (tb)
82 return tb;
84 tb = fib_hash_table(id);
85 if (!tb)
86 return NULL;
87 h = id & (FIB_TABLE_HASHSZ - 1);
88 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
89 return tb;
92 struct fib_table *fib_get_table(struct net *net, u32 id)
94 struct fib_table *tb;
95 struct hlist_node *node;
96 struct hlist_head *head;
97 unsigned int h;
99 if (id == 0)
100 id = RT_TABLE_MAIN;
101 h = id & (FIB_TABLE_HASHSZ - 1);
103 rcu_read_lock();
104 head = &net->ipv4.fib_table_hash[h];
105 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
106 if (tb->tb_id == id) {
107 rcu_read_unlock();
108 return tb;
111 rcu_read_unlock();
112 return NULL;
114 #endif /* CONFIG_IP_MULTIPLE_TABLES */
116 void fib_select_default(struct net *net,
117 const struct flowi *flp, struct fib_result *res)
119 struct fib_table *tb;
120 int table = RT_TABLE_MAIN;
121 #ifdef CONFIG_IP_MULTIPLE_TABLES
122 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
123 return;
124 table = res->r->table;
125 #endif
126 tb = fib_get_table(net, table);
127 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
128 tb->tb_select_default(tb, flp, res);
131 static void fib_flush(struct net *net)
133 int flushed = 0;
134 struct fib_table *tb;
135 struct hlist_node *node;
136 struct hlist_head *head;
137 unsigned int h;
139 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
140 head = &net->ipv4.fib_table_hash[h];
141 hlist_for_each_entry(tb, node, head, tb_hlist)
142 flushed += tb->tb_flush(tb);
145 if (flushed)
146 rt_cache_flush(net, -1);
150 * Find the first device with a given source address.
153 struct net_device * ip_dev_find(struct net *net, __be32 addr)
155 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
156 struct fib_result res;
157 struct net_device *dev = NULL;
158 struct fib_table *local_table;
160 #ifdef CONFIG_IP_MULTIPLE_TABLES
161 res.r = NULL;
162 #endif
164 local_table = fib_get_table(net, RT_TABLE_LOCAL);
165 if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
166 return NULL;
167 if (res.type != RTN_LOCAL)
168 goto out;
169 dev = FIB_RES_DEV(res);
171 if (dev)
172 dev_hold(dev);
173 out:
174 fib_res_put(&res);
175 return dev;
179 * Find address type as if only "dev" was present in the system. If
180 * on_dev is NULL then all interfaces are taken into consideration.
182 static inline unsigned __inet_dev_addr_type(struct net *net,
183 const struct net_device *dev,
184 __be32 addr)
186 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
187 struct fib_result res;
188 unsigned ret = RTN_BROADCAST;
189 struct fib_table *local_table;
191 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
192 return RTN_BROADCAST;
193 if (ipv4_is_multicast(addr))
194 return RTN_MULTICAST;
196 #ifdef CONFIG_IP_MULTIPLE_TABLES
197 res.r = NULL;
198 #endif
200 local_table = fib_get_table(net, RT_TABLE_LOCAL);
201 if (local_table) {
202 ret = RTN_UNICAST;
203 if (!local_table->tb_lookup(local_table, &fl, &res)) {
204 if (!dev || dev == res.fi->fib_dev)
205 ret = res.type;
206 fib_res_put(&res);
209 return ret;
212 unsigned int inet_addr_type(struct net *net, __be32 addr)
214 return __inet_dev_addr_type(net, NULL, addr);
217 unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
218 __be32 addr)
220 return __inet_dev_addr_type(net, dev, addr);
223 /* Given (packet source, input interface) and optional (dst, oif, tos):
224 - (main) check, that source is valid i.e. not broadcast or our local
225 address.
226 - figure out what "logical" interface this packet arrived
227 and calculate "specific destination" address.
228 - check, that packet arrived from expected physical interface.
231 int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
232 struct net_device *dev, __be32 *spec_dst, u32 *itag)
234 struct in_device *in_dev;
235 struct flowi fl = { .nl_u = { .ip4_u =
236 { .daddr = src,
237 .saddr = dst,
238 .tos = tos } },
239 .iif = oif };
240 struct fib_result res;
241 int no_addr, rpf;
242 int ret;
243 struct net *net;
245 no_addr = rpf = 0;
246 rcu_read_lock();
247 in_dev = __in_dev_get_rcu(dev);
248 if (in_dev) {
249 no_addr = in_dev->ifa_list == NULL;
250 rpf = IN_DEV_RPFILTER(in_dev);
252 rcu_read_unlock();
254 if (in_dev == NULL)
255 goto e_inval;
257 net = dev_net(dev);
258 if (fib_lookup(net, &fl, &res))
259 goto last_resort;
260 if (res.type != RTN_UNICAST)
261 goto e_inval_res;
262 *spec_dst = FIB_RES_PREFSRC(res);
263 fib_combine_itag(itag, &res);
264 #ifdef CONFIG_IP_ROUTE_MULTIPATH
265 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
266 #else
267 if (FIB_RES_DEV(res) == dev)
268 #endif
270 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
271 fib_res_put(&res);
272 return ret;
274 fib_res_put(&res);
275 if (no_addr)
276 goto last_resort;
277 if (rpf == 1)
278 goto e_inval;
279 fl.oif = dev->ifindex;
281 ret = 0;
282 if (fib_lookup(net, &fl, &res) == 0) {
283 if (res.type == RTN_UNICAST) {
284 *spec_dst = FIB_RES_PREFSRC(res);
285 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
287 fib_res_put(&res);
289 return ret;
291 last_resort:
292 if (rpf)
293 goto e_inval;
294 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
295 *itag = 0;
296 return 0;
298 e_inval_res:
299 fib_res_put(&res);
300 e_inval:
301 return -EINVAL;
304 static inline __be32 sk_extract_addr(struct sockaddr *addr)
306 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
309 static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
311 struct nlattr *nla;
313 nla = (struct nlattr *) ((char *) mx + len);
314 nla->nla_type = type;
315 nla->nla_len = nla_attr_size(4);
316 *(u32 *) nla_data(nla) = value;
318 return len + nla_total_size(4);
321 static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
322 struct fib_config *cfg)
324 __be32 addr;
325 int plen;
327 memset(cfg, 0, sizeof(*cfg));
328 cfg->fc_nlinfo.nl_net = net;
330 if (rt->rt_dst.sa_family != AF_INET)
331 return -EAFNOSUPPORT;
334 * Check mask for validity:
335 * a) it must be contiguous.
336 * b) destination must have all host bits clear.
337 * c) if application forgot to set correct family (AF_INET),
338 * reject request unless it is absolutely clear i.e.
339 * both family and mask are zero.
341 plen = 32;
342 addr = sk_extract_addr(&rt->rt_dst);
343 if (!(rt->rt_flags & RTF_HOST)) {
344 __be32 mask = sk_extract_addr(&rt->rt_genmask);
346 if (rt->rt_genmask.sa_family != AF_INET) {
347 if (mask || rt->rt_genmask.sa_family)
348 return -EAFNOSUPPORT;
351 if (bad_mask(mask, addr))
352 return -EINVAL;
354 plen = inet_mask_len(mask);
357 cfg->fc_dst_len = plen;
358 cfg->fc_dst = addr;
360 if (cmd != SIOCDELRT) {
361 cfg->fc_nlflags = NLM_F_CREATE;
362 cfg->fc_protocol = RTPROT_BOOT;
365 if (rt->rt_metric)
366 cfg->fc_priority = rt->rt_metric - 1;
368 if (rt->rt_flags & RTF_REJECT) {
369 cfg->fc_scope = RT_SCOPE_HOST;
370 cfg->fc_type = RTN_UNREACHABLE;
371 return 0;
374 cfg->fc_scope = RT_SCOPE_NOWHERE;
375 cfg->fc_type = RTN_UNICAST;
377 if (rt->rt_dev) {
378 char *colon;
379 struct net_device *dev;
380 char devname[IFNAMSIZ];
382 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
383 return -EFAULT;
385 devname[IFNAMSIZ-1] = 0;
386 colon = strchr(devname, ':');
387 if (colon)
388 *colon = 0;
389 dev = __dev_get_by_name(net, devname);
390 if (!dev)
391 return -ENODEV;
392 cfg->fc_oif = dev->ifindex;
393 if (colon) {
394 struct in_ifaddr *ifa;
395 struct in_device *in_dev = __in_dev_get_rtnl(dev);
396 if (!in_dev)
397 return -ENODEV;
398 *colon = ':';
399 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
400 if (strcmp(ifa->ifa_label, devname) == 0)
401 break;
402 if (ifa == NULL)
403 return -ENODEV;
404 cfg->fc_prefsrc = ifa->ifa_local;
408 addr = sk_extract_addr(&rt->rt_gateway);
409 if (rt->rt_gateway.sa_family == AF_INET && addr) {
410 cfg->fc_gw = addr;
411 if (rt->rt_flags & RTF_GATEWAY &&
412 inet_addr_type(net, addr) == RTN_UNICAST)
413 cfg->fc_scope = RT_SCOPE_UNIVERSE;
416 if (cmd == SIOCDELRT)
417 return 0;
419 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
420 return -EINVAL;
422 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
423 cfg->fc_scope = RT_SCOPE_LINK;
425 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
426 struct nlattr *mx;
427 int len = 0;
429 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
430 if (mx == NULL)
431 return -ENOMEM;
433 if (rt->rt_flags & RTF_MTU)
434 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
436 if (rt->rt_flags & RTF_WINDOW)
437 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
439 if (rt->rt_flags & RTF_IRTT)
440 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
442 cfg->fc_mx = mx;
443 cfg->fc_mx_len = len;
446 return 0;
450 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
453 int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
455 struct fib_config cfg;
456 struct rtentry rt;
457 int err;
459 switch (cmd) {
460 case SIOCADDRT: /* Add a route */
461 case SIOCDELRT: /* Delete a route */
462 if (!capable(CAP_NET_ADMIN))
463 return -EPERM;
465 if (copy_from_user(&rt, arg, sizeof(rt)))
466 return -EFAULT;
468 rtnl_lock();
469 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
470 if (err == 0) {
471 struct fib_table *tb;
473 if (cmd == SIOCDELRT) {
474 tb = fib_get_table(net, cfg.fc_table);
475 if (tb)
476 err = tb->tb_delete(tb, &cfg);
477 else
478 err = -ESRCH;
479 } else {
480 tb = fib_new_table(net, cfg.fc_table);
481 if (tb)
482 err = tb->tb_insert(tb, &cfg);
483 else
484 err = -ENOBUFS;
487 /* allocated by rtentry_to_fib_config() */
488 kfree(cfg.fc_mx);
490 rtnl_unlock();
491 return err;
493 return -EINVAL;
496 const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
497 [RTA_DST] = { .type = NLA_U32 },
498 [RTA_SRC] = { .type = NLA_U32 },
499 [RTA_IIF] = { .type = NLA_U32 },
500 [RTA_OIF] = { .type = NLA_U32 },
501 [RTA_GATEWAY] = { .type = NLA_U32 },
502 [RTA_PRIORITY] = { .type = NLA_U32 },
503 [RTA_PREFSRC] = { .type = NLA_U32 },
504 [RTA_METRICS] = { .type = NLA_NESTED },
505 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
506 [RTA_FLOW] = { .type = NLA_U32 },
509 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
510 struct nlmsghdr *nlh, struct fib_config *cfg)
512 struct nlattr *attr;
513 int err, remaining;
514 struct rtmsg *rtm;
516 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
517 if (err < 0)
518 goto errout;
520 memset(cfg, 0, sizeof(*cfg));
522 rtm = nlmsg_data(nlh);
523 cfg->fc_dst_len = rtm->rtm_dst_len;
524 cfg->fc_tos = rtm->rtm_tos;
525 cfg->fc_table = rtm->rtm_table;
526 cfg->fc_protocol = rtm->rtm_protocol;
527 cfg->fc_scope = rtm->rtm_scope;
528 cfg->fc_type = rtm->rtm_type;
529 cfg->fc_flags = rtm->rtm_flags;
530 cfg->fc_nlflags = nlh->nlmsg_flags;
532 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
533 cfg->fc_nlinfo.nlh = nlh;
534 cfg->fc_nlinfo.nl_net = net;
536 if (cfg->fc_type > RTN_MAX) {
537 err = -EINVAL;
538 goto errout;
541 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
542 switch (nla_type(attr)) {
543 case RTA_DST:
544 cfg->fc_dst = nla_get_be32(attr);
545 break;
546 case RTA_OIF:
547 cfg->fc_oif = nla_get_u32(attr);
548 break;
549 case RTA_GATEWAY:
550 cfg->fc_gw = nla_get_be32(attr);
551 break;
552 case RTA_PRIORITY:
553 cfg->fc_priority = nla_get_u32(attr);
554 break;
555 case RTA_PREFSRC:
556 cfg->fc_prefsrc = nla_get_be32(attr);
557 break;
558 case RTA_METRICS:
559 cfg->fc_mx = nla_data(attr);
560 cfg->fc_mx_len = nla_len(attr);
561 break;
562 case RTA_MULTIPATH:
563 cfg->fc_mp = nla_data(attr);
564 cfg->fc_mp_len = nla_len(attr);
565 break;
566 case RTA_FLOW:
567 cfg->fc_flow = nla_get_u32(attr);
568 break;
569 case RTA_TABLE:
570 cfg->fc_table = nla_get_u32(attr);
571 break;
575 return 0;
576 errout:
577 return err;
580 static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
582 struct net *net = sock_net(skb->sk);
583 struct fib_config cfg;
584 struct fib_table *tb;
585 int err;
587 err = rtm_to_fib_config(net, skb, nlh, &cfg);
588 if (err < 0)
589 goto errout;
591 tb = fib_get_table(net, cfg.fc_table);
592 if (tb == NULL) {
593 err = -ESRCH;
594 goto errout;
597 err = tb->tb_delete(tb, &cfg);
598 errout:
599 return err;
602 static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
604 struct net *net = sock_net(skb->sk);
605 struct fib_config cfg;
606 struct fib_table *tb;
607 int err;
609 err = rtm_to_fib_config(net, skb, nlh, &cfg);
610 if (err < 0)
611 goto errout;
613 tb = fib_new_table(net, cfg.fc_table);
614 if (tb == NULL) {
615 err = -ENOBUFS;
616 goto errout;
619 err = tb->tb_insert(tb, &cfg);
620 errout:
621 return err;
624 static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
626 struct net *net = sock_net(skb->sk);
627 unsigned int h, s_h;
628 unsigned int e = 0, s_e;
629 struct fib_table *tb;
630 struct hlist_node *node;
631 struct hlist_head *head;
632 int dumped = 0;
634 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
635 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
636 return ip_rt_dump(skb, cb);
638 s_h = cb->args[0];
639 s_e = cb->args[1];
641 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
642 e = 0;
643 head = &net->ipv4.fib_table_hash[h];
644 hlist_for_each_entry(tb, node, head, tb_hlist) {
645 if (e < s_e)
646 goto next;
647 if (dumped)
648 memset(&cb->args[2], 0, sizeof(cb->args) -
649 2 * sizeof(cb->args[0]));
650 if (tb->tb_dump(tb, skb, cb) < 0)
651 goto out;
652 dumped = 1;
653 next:
654 e++;
657 out:
658 cb->args[1] = e;
659 cb->args[0] = h;
661 return skb->len;
664 /* Prepare and feed intra-kernel routing request.
665 Really, it should be netlink message, but :-( netlink
666 can be not configured, so that we feed it directly
667 to fib engine. It is legal, because all events occur
668 only when netlink is already locked.
671 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
673 struct net *net = dev_net(ifa->ifa_dev->dev);
674 struct fib_table *tb;
675 struct fib_config cfg = {
676 .fc_protocol = RTPROT_KERNEL,
677 .fc_type = type,
678 .fc_dst = dst,
679 .fc_dst_len = dst_len,
680 .fc_prefsrc = ifa->ifa_local,
681 .fc_oif = ifa->ifa_dev->dev->ifindex,
682 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
683 .fc_nlinfo = {
684 .nl_net = net,
688 if (type == RTN_UNICAST)
689 tb = fib_new_table(net, RT_TABLE_MAIN);
690 else
691 tb = fib_new_table(net, RT_TABLE_LOCAL);
693 if (tb == NULL)
694 return;
696 cfg.fc_table = tb->tb_id;
698 if (type != RTN_LOCAL)
699 cfg.fc_scope = RT_SCOPE_LINK;
700 else
701 cfg.fc_scope = RT_SCOPE_HOST;
703 if (cmd == RTM_NEWROUTE)
704 tb->tb_insert(tb, &cfg);
705 else
706 tb->tb_delete(tb, &cfg);
709 void fib_add_ifaddr(struct in_ifaddr *ifa)
711 struct in_device *in_dev = ifa->ifa_dev;
712 struct net_device *dev = in_dev->dev;
713 struct in_ifaddr *prim = ifa;
714 __be32 mask = ifa->ifa_mask;
715 __be32 addr = ifa->ifa_local;
716 __be32 prefix = ifa->ifa_address&mask;
718 if (ifa->ifa_flags&IFA_F_SECONDARY) {
719 prim = inet_ifa_byprefix(in_dev, prefix, mask);
720 if (prim == NULL) {
721 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
722 return;
726 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
728 if (!(dev->flags&IFF_UP))
729 return;
731 /* Add broadcast address, if it is explicitly assigned. */
732 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
733 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
735 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
736 (prefix != addr || ifa->ifa_prefixlen < 32)) {
737 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
738 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
740 /* Add network specific broadcasts, when it takes a sense */
741 if (ifa->ifa_prefixlen < 31) {
742 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
743 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
748 static void fib_del_ifaddr(struct in_ifaddr *ifa)
750 struct in_device *in_dev = ifa->ifa_dev;
751 struct net_device *dev = in_dev->dev;
752 struct in_ifaddr *ifa1;
753 struct in_ifaddr *prim = ifa;
754 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
755 __be32 any = ifa->ifa_address&ifa->ifa_mask;
756 #define LOCAL_OK 1
757 #define BRD_OK 2
758 #define BRD0_OK 4
759 #define BRD1_OK 8
760 unsigned ok = 0;
762 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
763 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
764 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
765 else {
766 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
767 if (prim == NULL) {
768 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
769 return;
773 /* Deletion is more complicated than add.
774 We should take care of not to delete too much :-)
776 Scan address list to be sure that addresses are really gone.
779 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
780 if (ifa->ifa_local == ifa1->ifa_local)
781 ok |= LOCAL_OK;
782 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
783 ok |= BRD_OK;
784 if (brd == ifa1->ifa_broadcast)
785 ok |= BRD1_OK;
786 if (any == ifa1->ifa_broadcast)
787 ok |= BRD0_OK;
790 if (!(ok&BRD_OK))
791 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
792 if (!(ok&BRD1_OK))
793 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
794 if (!(ok&BRD0_OK))
795 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
796 if (!(ok&LOCAL_OK)) {
797 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
799 /* Check, that this local address finally disappeared. */
800 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
801 /* And the last, but not the least thing.
802 We must flush stray FIB entries.
804 First of all, we scan fib_info list searching
805 for stray nexthop entries, then ignite fib_flush.
807 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
808 fib_flush(dev_net(dev));
811 #undef LOCAL_OK
812 #undef BRD_OK
813 #undef BRD0_OK
814 #undef BRD1_OK
817 static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
820 struct fib_result res;
821 struct flowi fl = { .mark = frn->fl_mark,
822 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
823 .tos = frn->fl_tos,
824 .scope = frn->fl_scope } } };
826 #ifdef CONFIG_IP_MULTIPLE_TABLES
827 res.r = NULL;
828 #endif
830 frn->err = -ENOENT;
831 if (tb) {
832 local_bh_disable();
834 frn->tb_id = tb->tb_id;
835 frn->err = tb->tb_lookup(tb, &fl, &res);
837 if (!frn->err) {
838 frn->prefixlen = res.prefixlen;
839 frn->nh_sel = res.nh_sel;
840 frn->type = res.type;
841 frn->scope = res.scope;
842 fib_res_put(&res);
844 local_bh_enable();
848 static void nl_fib_input(struct sk_buff *skb)
850 struct net *net;
851 struct fib_result_nl *frn;
852 struct nlmsghdr *nlh;
853 struct fib_table *tb;
854 u32 pid;
856 net = sock_net(skb->sk);
857 nlh = nlmsg_hdr(skb);
858 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
859 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
860 return;
862 skb = skb_clone(skb, GFP_KERNEL);
863 if (skb == NULL)
864 return;
865 nlh = nlmsg_hdr(skb);
867 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
868 tb = fib_get_table(net, frn->tb_id_in);
870 nl_fib_lookup(frn, tb);
872 pid = NETLINK_CB(skb).pid; /* pid of sending process */
873 NETLINK_CB(skb).pid = 0; /* from kernel */
874 NETLINK_CB(skb).dst_group = 0; /* unicast */
875 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
878 static int nl_fib_lookup_init(struct net *net)
880 struct sock *sk;
881 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
882 nl_fib_input, NULL, THIS_MODULE);
883 if (sk == NULL)
884 return -EAFNOSUPPORT;
885 net->ipv4.fibnl = sk;
886 return 0;
889 static void nl_fib_lookup_exit(struct net *net)
891 netlink_kernel_release(net->ipv4.fibnl);
892 net->ipv4.fibnl = NULL;
895 static void fib_disable_ip(struct net_device *dev, int force)
897 if (fib_sync_down_dev(dev, force))
898 fib_flush(dev_net(dev));
899 rt_cache_flush(dev_net(dev), 0);
900 arp_ifdown(dev);
903 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
905 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
906 struct net_device *dev = ifa->ifa_dev->dev;
908 switch (event) {
909 case NETDEV_UP:
910 fib_add_ifaddr(ifa);
911 #ifdef CONFIG_IP_ROUTE_MULTIPATH
912 fib_sync_up(dev);
913 #endif
914 rt_cache_flush(dev_net(dev), -1);
915 break;
916 case NETDEV_DOWN:
917 fib_del_ifaddr(ifa);
918 if (ifa->ifa_dev->ifa_list == NULL) {
919 /* Last address was deleted from this interface.
920 Disable IP.
922 fib_disable_ip(dev, 1);
923 } else {
924 rt_cache_flush(dev_net(dev), -1);
926 break;
928 return NOTIFY_DONE;
931 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
933 struct net_device *dev = ptr;
934 struct in_device *in_dev = __in_dev_get_rtnl(dev);
936 if (event == NETDEV_UNREGISTER) {
937 fib_disable_ip(dev, 2);
938 return NOTIFY_DONE;
941 if (!in_dev)
942 return NOTIFY_DONE;
944 switch (event) {
945 case NETDEV_UP:
946 for_ifa(in_dev) {
947 fib_add_ifaddr(ifa);
948 } endfor_ifa(in_dev);
949 #ifdef CONFIG_IP_ROUTE_MULTIPATH
950 fib_sync_up(dev);
951 #endif
952 rt_cache_flush(dev_net(dev), -1);
953 break;
954 case NETDEV_DOWN:
955 fib_disable_ip(dev, 0);
956 break;
957 case NETDEV_CHANGEMTU:
958 case NETDEV_CHANGE:
959 rt_cache_flush(dev_net(dev), 0);
960 break;
962 return NOTIFY_DONE;
965 static struct notifier_block fib_inetaddr_notifier = {
966 .notifier_call = fib_inetaddr_event,
969 static struct notifier_block fib_netdev_notifier = {
970 .notifier_call = fib_netdev_event,
973 static int __net_init ip_fib_net_init(struct net *net)
975 int err;
976 unsigned int i;
978 net->ipv4.fib_table_hash = kzalloc(
979 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
980 if (net->ipv4.fib_table_hash == NULL)
981 return -ENOMEM;
983 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
984 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
986 err = fib4_rules_init(net);
987 if (err < 0)
988 goto fail;
989 return 0;
991 fail:
992 kfree(net->ipv4.fib_table_hash);
993 return err;
996 static void __net_exit ip_fib_net_exit(struct net *net)
998 unsigned int i;
1000 #ifdef CONFIG_IP_MULTIPLE_TABLES
1001 fib4_rules_exit(net);
1002 #endif
1004 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1005 struct fib_table *tb;
1006 struct hlist_head *head;
1007 struct hlist_node *node, *tmp;
1009 head = &net->ipv4.fib_table_hash[i];
1010 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1011 hlist_del(node);
1012 tb->tb_flush(tb);
1013 kfree(tb);
1016 kfree(net->ipv4.fib_table_hash);
1019 static int __net_init fib_net_init(struct net *net)
1021 int error;
1023 error = ip_fib_net_init(net);
1024 if (error < 0)
1025 goto out;
1026 error = nl_fib_lookup_init(net);
1027 if (error < 0)
1028 goto out_nlfl;
1029 error = fib_proc_init(net);
1030 if (error < 0)
1031 goto out_proc;
1032 out:
1033 return error;
1035 out_proc:
1036 nl_fib_lookup_exit(net);
1037 out_nlfl:
1038 ip_fib_net_exit(net);
1039 goto out;
1042 static void __net_exit fib_net_exit(struct net *net)
1044 fib_proc_exit(net);
1045 nl_fib_lookup_exit(net);
1046 ip_fib_net_exit(net);
1049 static struct pernet_operations fib_net_ops = {
1050 .init = fib_net_init,
1051 .exit = fib_net_exit,
1054 void __init ip_fib_init(void)
1056 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1057 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1058 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1060 register_pernet_subsys(&fib_net_ops);
1061 register_netdevice_notifier(&fib_netdev_notifier);
1062 register_inetaddr_notifier(&fib_inetaddr_notifier);
1064 fib_hash_init();
1067 EXPORT_SYMBOL(inet_addr_type);
1068 EXPORT_SYMBOL(inet_dev_addr_type);
1069 EXPORT_SYMBOL(ip_dev_find);