Merge tag 'locks-v3.16-2' of git://git.samba.org/jlayton/linux
[linux/fpc-iii.git] / net / ipv4 / devinet.c
blobe9449376b58e4293b7735362912e1ea1191a8815
1 /*
2 * NET3 IP device support routines.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Derived from the IP parts of dev.c 1.0.19
10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
14 * Additional Authors:
15 * Alan Cox, <gw4pts@gw4pts.ampr.org>
16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
18 * Changes:
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
20 * lists.
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
25 * if no match found.
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
68 #include "fib_lookup.h"
70 static struct ipv4_devconf ipv4_devconf = {
71 .data = {
72 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
81 static struct ipv4_devconf ipv4_devconf_dflt = {
82 .data = {
83 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97 [IFA_LOCAL] = { .type = NLA_U32 },
98 [IFA_ADDRESS] = { .type = NLA_U32 },
99 [IFA_BROADCAST] = { .type = NLA_U32 },
100 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
102 [IFA_FLAGS] = { .type = NLA_U32 },
105 #define IN4_ADDR_HSIZE_SHIFT 8
106 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
110 static u32 inet_addr_hash(struct net *net, __be32 addr)
112 u32 val = (__force u32) addr ^ net_hash_mix(net);
114 return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
117 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
119 u32 hash = inet_addr_hash(net, ifa->ifa_local);
121 ASSERT_RTNL();
122 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
125 static void inet_hash_remove(struct in_ifaddr *ifa)
127 ASSERT_RTNL();
128 hlist_del_init_rcu(&ifa->hash);
132 * __ip_dev_find - find the first device with a given source address.
133 * @net: the net namespace
134 * @addr: the source address
135 * @devref: if true, take a reference on the found device
137 * If a caller uses devref=false, it should be protected by RCU, or RTNL
139 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
141 u32 hash = inet_addr_hash(net, addr);
142 struct net_device *result = NULL;
143 struct in_ifaddr *ifa;
145 rcu_read_lock();
146 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
147 if (ifa->ifa_local == addr) {
148 struct net_device *dev = ifa->ifa_dev->dev;
150 if (!net_eq(dev_net(dev), net))
151 continue;
152 result = dev;
153 break;
156 if (!result) {
157 struct flowi4 fl4 = { .daddr = addr };
158 struct fib_result res = { 0 };
159 struct fib_table *local;
161 /* Fallback to FIB local table so that communication
162 * over loopback subnets work.
164 local = fib_get_table(net, RT_TABLE_LOCAL);
165 if (local &&
166 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167 res.type == RTN_LOCAL)
168 result = FIB_RES_DEV(res);
170 if (result && devref)
171 dev_hold(result);
172 rcu_read_unlock();
173 return result;
175 EXPORT_SYMBOL(__ip_dev_find);
177 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
179 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
180 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181 int destroy);
182 #ifdef CONFIG_SYSCTL
183 static void devinet_sysctl_register(struct in_device *idev);
184 static void devinet_sysctl_unregister(struct in_device *idev);
185 #else
186 static void devinet_sysctl_register(struct in_device *idev)
189 static void devinet_sysctl_unregister(struct in_device *idev)
192 #endif
194 /* Locks all the inet devices. */
196 static struct in_ifaddr *inet_alloc_ifa(void)
198 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
201 static void inet_rcu_free_ifa(struct rcu_head *head)
203 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
204 if (ifa->ifa_dev)
205 in_dev_put(ifa->ifa_dev);
206 kfree(ifa);
209 static void inet_free_ifa(struct in_ifaddr *ifa)
211 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
214 void in_dev_finish_destroy(struct in_device *idev)
216 struct net_device *dev = idev->dev;
218 WARN_ON(idev->ifa_list);
219 WARN_ON(idev->mc_list);
220 kfree(rcu_dereference_protected(idev->mc_hash, 1));
221 #ifdef NET_REFCNT_DEBUG
222 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
223 #endif
224 dev_put(dev);
225 if (!idev->dead)
226 pr_err("Freeing alive in_device %p\n", idev);
227 else
228 kfree(idev);
230 EXPORT_SYMBOL(in_dev_finish_destroy);
232 static struct in_device *inetdev_init(struct net_device *dev)
234 struct in_device *in_dev;
236 ASSERT_RTNL();
238 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
239 if (!in_dev)
240 goto out;
241 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
242 sizeof(in_dev->cnf));
243 in_dev->cnf.sysctl = NULL;
244 in_dev->dev = dev;
245 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
246 if (!in_dev->arp_parms)
247 goto out_kfree;
248 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
249 dev_disable_lro(dev);
250 /* Reference in_dev->dev */
251 dev_hold(dev);
252 /* Account for reference dev->ip_ptr (below) */
253 in_dev_hold(in_dev);
255 devinet_sysctl_register(in_dev);
256 ip_mc_init_dev(in_dev);
257 if (dev->flags & IFF_UP)
258 ip_mc_up(in_dev);
260 /* we can receive as soon as ip_ptr is set -- do this last */
261 rcu_assign_pointer(dev->ip_ptr, in_dev);
262 out:
263 return in_dev;
264 out_kfree:
265 kfree(in_dev);
266 in_dev = NULL;
267 goto out;
270 static void in_dev_rcu_put(struct rcu_head *head)
272 struct in_device *idev = container_of(head, struct in_device, rcu_head);
273 in_dev_put(idev);
276 static void inetdev_destroy(struct in_device *in_dev)
278 struct in_ifaddr *ifa;
279 struct net_device *dev;
281 ASSERT_RTNL();
283 dev = in_dev->dev;
285 in_dev->dead = 1;
287 ip_mc_destroy_dev(in_dev);
289 while ((ifa = in_dev->ifa_list) != NULL) {
290 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
291 inet_free_ifa(ifa);
294 RCU_INIT_POINTER(dev->ip_ptr, NULL);
296 devinet_sysctl_unregister(in_dev);
297 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
298 arp_ifdown(dev);
300 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
303 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
305 rcu_read_lock();
306 for_primary_ifa(in_dev) {
307 if (inet_ifa_match(a, ifa)) {
308 if (!b || inet_ifa_match(b, ifa)) {
309 rcu_read_unlock();
310 return 1;
313 } endfor_ifa(in_dev);
314 rcu_read_unlock();
315 return 0;
318 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
319 int destroy, struct nlmsghdr *nlh, u32 portid)
321 struct in_ifaddr *promote = NULL;
322 struct in_ifaddr *ifa, *ifa1 = *ifap;
323 struct in_ifaddr *last_prim = in_dev->ifa_list;
324 struct in_ifaddr *prev_prom = NULL;
325 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
327 ASSERT_RTNL();
329 /* 1. Deleting primary ifaddr forces deletion all secondaries
330 * unless alias promotion is set
333 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
334 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
336 while ((ifa = *ifap1) != NULL) {
337 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
338 ifa1->ifa_scope <= ifa->ifa_scope)
339 last_prim = ifa;
341 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
342 ifa1->ifa_mask != ifa->ifa_mask ||
343 !inet_ifa_match(ifa1->ifa_address, ifa)) {
344 ifap1 = &ifa->ifa_next;
345 prev_prom = ifa;
346 continue;
349 if (!do_promote) {
350 inet_hash_remove(ifa);
351 *ifap1 = ifa->ifa_next;
353 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
354 blocking_notifier_call_chain(&inetaddr_chain,
355 NETDEV_DOWN, ifa);
356 inet_free_ifa(ifa);
357 } else {
358 promote = ifa;
359 break;
364 /* On promotion all secondaries from subnet are changing
365 * the primary IP, we must remove all their routes silently
366 * and later to add them back with new prefsrc. Do this
367 * while all addresses are on the device list.
369 for (ifa = promote; ifa; ifa = ifa->ifa_next) {
370 if (ifa1->ifa_mask == ifa->ifa_mask &&
371 inet_ifa_match(ifa1->ifa_address, ifa))
372 fib_del_ifaddr(ifa, ifa1);
375 /* 2. Unlink it */
377 *ifap = ifa1->ifa_next;
378 inet_hash_remove(ifa1);
380 /* 3. Announce address deletion */
382 /* Send message first, then call notifier.
383 At first sight, FIB update triggered by notifier
384 will refer to already deleted ifaddr, that could confuse
385 netlink listeners. It is not true: look, gated sees
386 that route deleted and if it still thinks that ifaddr
387 is valid, it will try to restore deleted routes... Grr.
388 So that, this order is correct.
390 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
391 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
393 if (promote) {
394 struct in_ifaddr *next_sec = promote->ifa_next;
396 if (prev_prom) {
397 prev_prom->ifa_next = promote->ifa_next;
398 promote->ifa_next = last_prim->ifa_next;
399 last_prim->ifa_next = promote;
402 promote->ifa_flags &= ~IFA_F_SECONDARY;
403 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
404 blocking_notifier_call_chain(&inetaddr_chain,
405 NETDEV_UP, promote);
406 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
407 if (ifa1->ifa_mask != ifa->ifa_mask ||
408 !inet_ifa_match(ifa1->ifa_address, ifa))
409 continue;
410 fib_add_ifaddr(ifa);
414 if (destroy)
415 inet_free_ifa(ifa1);
418 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
419 int destroy)
421 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
424 static void check_lifetime(struct work_struct *work);
426 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
428 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
429 u32 portid)
431 struct in_device *in_dev = ifa->ifa_dev;
432 struct in_ifaddr *ifa1, **ifap, **last_primary;
434 ASSERT_RTNL();
436 if (!ifa->ifa_local) {
437 inet_free_ifa(ifa);
438 return 0;
441 ifa->ifa_flags &= ~IFA_F_SECONDARY;
442 last_primary = &in_dev->ifa_list;
444 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
445 ifap = &ifa1->ifa_next) {
446 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
447 ifa->ifa_scope <= ifa1->ifa_scope)
448 last_primary = &ifa1->ifa_next;
449 if (ifa1->ifa_mask == ifa->ifa_mask &&
450 inet_ifa_match(ifa1->ifa_address, ifa)) {
451 if (ifa1->ifa_local == ifa->ifa_local) {
452 inet_free_ifa(ifa);
453 return -EEXIST;
455 if (ifa1->ifa_scope != ifa->ifa_scope) {
456 inet_free_ifa(ifa);
457 return -EINVAL;
459 ifa->ifa_flags |= IFA_F_SECONDARY;
463 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
464 prandom_seed((__force u32) ifa->ifa_local);
465 ifap = last_primary;
468 ifa->ifa_next = *ifap;
469 *ifap = ifa;
471 inet_hash_insert(dev_net(in_dev->dev), ifa);
473 cancel_delayed_work(&check_lifetime_work);
474 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
476 /* Send message first, then call notifier.
477 Notifier will trigger FIB update, so that
478 listeners of netlink will know about new ifaddr */
479 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
480 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
482 return 0;
485 static int inet_insert_ifa(struct in_ifaddr *ifa)
487 return __inet_insert_ifa(ifa, NULL, 0);
490 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
492 struct in_device *in_dev = __in_dev_get_rtnl(dev);
494 ASSERT_RTNL();
496 if (!in_dev) {
497 inet_free_ifa(ifa);
498 return -ENOBUFS;
500 ipv4_devconf_setall(in_dev);
501 neigh_parms_data_state_setall(in_dev->arp_parms);
502 if (ifa->ifa_dev != in_dev) {
503 WARN_ON(ifa->ifa_dev);
504 in_dev_hold(in_dev);
505 ifa->ifa_dev = in_dev;
507 if (ipv4_is_loopback(ifa->ifa_local))
508 ifa->ifa_scope = RT_SCOPE_HOST;
509 return inet_insert_ifa(ifa);
512 /* Caller must hold RCU or RTNL :
513 * We dont take a reference on found in_device
515 struct in_device *inetdev_by_index(struct net *net, int ifindex)
517 struct net_device *dev;
518 struct in_device *in_dev = NULL;
520 rcu_read_lock();
521 dev = dev_get_by_index_rcu(net, ifindex);
522 if (dev)
523 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
524 rcu_read_unlock();
525 return in_dev;
527 EXPORT_SYMBOL(inetdev_by_index);
529 /* Called only from RTNL semaphored context. No locks. */
531 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
532 __be32 mask)
534 ASSERT_RTNL();
536 for_primary_ifa(in_dev) {
537 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
538 return ifa;
539 } endfor_ifa(in_dev);
540 return NULL;
543 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
545 struct net *net = sock_net(skb->sk);
546 struct nlattr *tb[IFA_MAX+1];
547 struct in_device *in_dev;
548 struct ifaddrmsg *ifm;
549 struct in_ifaddr *ifa, **ifap;
550 int err = -EINVAL;
552 ASSERT_RTNL();
554 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
555 if (err < 0)
556 goto errout;
558 ifm = nlmsg_data(nlh);
559 in_dev = inetdev_by_index(net, ifm->ifa_index);
560 if (in_dev == NULL) {
561 err = -ENODEV;
562 goto errout;
565 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
566 ifap = &ifa->ifa_next) {
567 if (tb[IFA_LOCAL] &&
568 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
569 continue;
571 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
572 continue;
574 if (tb[IFA_ADDRESS] &&
575 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
576 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
577 continue;
579 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
580 return 0;
583 err = -EADDRNOTAVAIL;
584 errout:
585 return err;
588 #define INFINITY_LIFE_TIME 0xFFFFFFFF
590 static void check_lifetime(struct work_struct *work)
592 unsigned long now, next, next_sec, next_sched;
593 struct in_ifaddr *ifa;
594 struct hlist_node *n;
595 int i;
597 now = jiffies;
598 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
600 for (i = 0; i < IN4_ADDR_HSIZE; i++) {
601 bool change_needed = false;
603 rcu_read_lock();
604 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
605 unsigned long age;
607 if (ifa->ifa_flags & IFA_F_PERMANENT)
608 continue;
610 /* We try to batch several events at once. */
611 age = (now - ifa->ifa_tstamp +
612 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
614 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
615 age >= ifa->ifa_valid_lft) {
616 change_needed = true;
617 } else if (ifa->ifa_preferred_lft ==
618 INFINITY_LIFE_TIME) {
619 continue;
620 } else if (age >= ifa->ifa_preferred_lft) {
621 if (time_before(ifa->ifa_tstamp +
622 ifa->ifa_valid_lft * HZ, next))
623 next = ifa->ifa_tstamp +
624 ifa->ifa_valid_lft * HZ;
626 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
627 change_needed = true;
628 } else if (time_before(ifa->ifa_tstamp +
629 ifa->ifa_preferred_lft * HZ,
630 next)) {
631 next = ifa->ifa_tstamp +
632 ifa->ifa_preferred_lft * HZ;
635 rcu_read_unlock();
636 if (!change_needed)
637 continue;
638 rtnl_lock();
639 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
640 unsigned long age;
642 if (ifa->ifa_flags & IFA_F_PERMANENT)
643 continue;
645 /* We try to batch several events at once. */
646 age = (now - ifa->ifa_tstamp +
647 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
649 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
650 age >= ifa->ifa_valid_lft) {
651 struct in_ifaddr **ifap;
653 for (ifap = &ifa->ifa_dev->ifa_list;
654 *ifap != NULL; ifap = &(*ifap)->ifa_next) {
655 if (*ifap == ifa) {
656 inet_del_ifa(ifa->ifa_dev,
657 ifap, 1);
658 break;
661 } else if (ifa->ifa_preferred_lft !=
662 INFINITY_LIFE_TIME &&
663 age >= ifa->ifa_preferred_lft &&
664 !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
665 ifa->ifa_flags |= IFA_F_DEPRECATED;
666 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
669 rtnl_unlock();
672 next_sec = round_jiffies_up(next);
673 next_sched = next;
675 /* If rounded timeout is accurate enough, accept it. */
676 if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
677 next_sched = next_sec;
679 now = jiffies;
680 /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
681 if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
682 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
684 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
685 next_sched - now);
688 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
689 __u32 prefered_lft)
691 unsigned long timeout;
693 ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
695 timeout = addrconf_timeout_fixup(valid_lft, HZ);
696 if (addrconf_finite_timeout(timeout))
697 ifa->ifa_valid_lft = timeout;
698 else
699 ifa->ifa_flags |= IFA_F_PERMANENT;
701 timeout = addrconf_timeout_fixup(prefered_lft, HZ);
702 if (addrconf_finite_timeout(timeout)) {
703 if (timeout == 0)
704 ifa->ifa_flags |= IFA_F_DEPRECATED;
705 ifa->ifa_preferred_lft = timeout;
707 ifa->ifa_tstamp = jiffies;
708 if (!ifa->ifa_cstamp)
709 ifa->ifa_cstamp = ifa->ifa_tstamp;
712 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
713 __u32 *pvalid_lft, __u32 *pprefered_lft)
715 struct nlattr *tb[IFA_MAX+1];
716 struct in_ifaddr *ifa;
717 struct ifaddrmsg *ifm;
718 struct net_device *dev;
719 struct in_device *in_dev;
720 int err;
722 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
723 if (err < 0)
724 goto errout;
726 ifm = nlmsg_data(nlh);
727 err = -EINVAL;
728 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
729 goto errout;
731 dev = __dev_get_by_index(net, ifm->ifa_index);
732 err = -ENODEV;
733 if (dev == NULL)
734 goto errout;
736 in_dev = __in_dev_get_rtnl(dev);
737 err = -ENOBUFS;
738 if (in_dev == NULL)
739 goto errout;
741 ifa = inet_alloc_ifa();
742 if (ifa == NULL)
744 * A potential indev allocation can be left alive, it stays
745 * assigned to its device and is destroy with it.
747 goto errout;
749 ipv4_devconf_setall(in_dev);
750 neigh_parms_data_state_setall(in_dev->arp_parms);
751 in_dev_hold(in_dev);
753 if (tb[IFA_ADDRESS] == NULL)
754 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
756 INIT_HLIST_NODE(&ifa->hash);
757 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
758 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
759 ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
760 ifm->ifa_flags;
761 ifa->ifa_scope = ifm->ifa_scope;
762 ifa->ifa_dev = in_dev;
764 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
765 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
767 if (tb[IFA_BROADCAST])
768 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
770 if (tb[IFA_LABEL])
771 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
772 else
773 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
775 if (tb[IFA_CACHEINFO]) {
776 struct ifa_cacheinfo *ci;
778 ci = nla_data(tb[IFA_CACHEINFO]);
779 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
780 err = -EINVAL;
781 goto errout_free;
783 *pvalid_lft = ci->ifa_valid;
784 *pprefered_lft = ci->ifa_prefered;
787 return ifa;
789 errout_free:
790 inet_free_ifa(ifa);
791 errout:
792 return ERR_PTR(err);
795 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
797 struct in_device *in_dev = ifa->ifa_dev;
798 struct in_ifaddr *ifa1, **ifap;
800 if (!ifa->ifa_local)
801 return NULL;
803 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
804 ifap = &ifa1->ifa_next) {
805 if (ifa1->ifa_mask == ifa->ifa_mask &&
806 inet_ifa_match(ifa1->ifa_address, ifa) &&
807 ifa1->ifa_local == ifa->ifa_local)
808 return ifa1;
810 return NULL;
813 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
815 struct net *net = sock_net(skb->sk);
816 struct in_ifaddr *ifa;
817 struct in_ifaddr *ifa_existing;
818 __u32 valid_lft = INFINITY_LIFE_TIME;
819 __u32 prefered_lft = INFINITY_LIFE_TIME;
821 ASSERT_RTNL();
823 ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
824 if (IS_ERR(ifa))
825 return PTR_ERR(ifa);
827 ifa_existing = find_matching_ifa(ifa);
828 if (!ifa_existing) {
829 /* It would be best to check for !NLM_F_CREATE here but
830 * userspace already relies on not having to provide this.
832 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
833 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
834 } else {
835 inet_free_ifa(ifa);
837 if (nlh->nlmsg_flags & NLM_F_EXCL ||
838 !(nlh->nlmsg_flags & NLM_F_REPLACE))
839 return -EEXIST;
840 ifa = ifa_existing;
841 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
842 cancel_delayed_work(&check_lifetime_work);
843 queue_delayed_work(system_power_efficient_wq,
844 &check_lifetime_work, 0);
845 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
846 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
848 return 0;
852 * Determine a default network mask, based on the IP address.
855 static int inet_abc_len(__be32 addr)
857 int rc = -1; /* Something else, probably a multicast. */
859 if (ipv4_is_zeronet(addr))
860 rc = 0;
861 else {
862 __u32 haddr = ntohl(addr);
864 if (IN_CLASSA(haddr))
865 rc = 8;
866 else if (IN_CLASSB(haddr))
867 rc = 16;
868 else if (IN_CLASSC(haddr))
869 rc = 24;
872 return rc;
876 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
878 struct ifreq ifr;
879 struct sockaddr_in sin_orig;
880 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
881 struct in_device *in_dev;
882 struct in_ifaddr **ifap = NULL;
883 struct in_ifaddr *ifa = NULL;
884 struct net_device *dev;
885 char *colon;
886 int ret = -EFAULT;
887 int tryaddrmatch = 0;
890 * Fetch the caller's info block into kernel space
893 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
894 goto out;
895 ifr.ifr_name[IFNAMSIZ - 1] = 0;
897 /* save original address for comparison */
898 memcpy(&sin_orig, sin, sizeof(*sin));
900 colon = strchr(ifr.ifr_name, ':');
901 if (colon)
902 *colon = 0;
904 dev_load(net, ifr.ifr_name);
906 switch (cmd) {
907 case SIOCGIFADDR: /* Get interface address */
908 case SIOCGIFBRDADDR: /* Get the broadcast address */
909 case SIOCGIFDSTADDR: /* Get the destination address */
910 case SIOCGIFNETMASK: /* Get the netmask for the interface */
911 /* Note that these ioctls will not sleep,
912 so that we do not impose a lock.
913 One day we will be forced to put shlock here (I mean SMP)
915 tryaddrmatch = (sin_orig.sin_family == AF_INET);
916 memset(sin, 0, sizeof(*sin));
917 sin->sin_family = AF_INET;
918 break;
920 case SIOCSIFFLAGS:
921 ret = -EPERM;
922 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
923 goto out;
924 break;
925 case SIOCSIFADDR: /* Set interface address (and family) */
926 case SIOCSIFBRDADDR: /* Set the broadcast address */
927 case SIOCSIFDSTADDR: /* Set the destination address */
928 case SIOCSIFNETMASK: /* Set the netmask for the interface */
929 ret = -EPERM;
930 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
931 goto out;
932 ret = -EINVAL;
933 if (sin->sin_family != AF_INET)
934 goto out;
935 break;
936 default:
937 ret = -EINVAL;
938 goto out;
941 rtnl_lock();
943 ret = -ENODEV;
944 dev = __dev_get_by_name(net, ifr.ifr_name);
945 if (!dev)
946 goto done;
948 if (colon)
949 *colon = ':';
951 in_dev = __in_dev_get_rtnl(dev);
952 if (in_dev) {
953 if (tryaddrmatch) {
954 /* Matthias Andree */
955 /* compare label and address (4.4BSD style) */
956 /* note: we only do this for a limited set of ioctls
957 and only if the original address family was AF_INET.
958 This is checked above. */
959 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
960 ifap = &ifa->ifa_next) {
961 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
962 sin_orig.sin_addr.s_addr ==
963 ifa->ifa_local) {
964 break; /* found */
968 /* we didn't get a match, maybe the application is
969 4.3BSD-style and passed in junk so we fall back to
970 comparing just the label */
971 if (!ifa) {
972 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
973 ifap = &ifa->ifa_next)
974 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
975 break;
979 ret = -EADDRNOTAVAIL;
980 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
981 goto done;
983 switch (cmd) {
984 case SIOCGIFADDR: /* Get interface address */
985 sin->sin_addr.s_addr = ifa->ifa_local;
986 goto rarok;
988 case SIOCGIFBRDADDR: /* Get the broadcast address */
989 sin->sin_addr.s_addr = ifa->ifa_broadcast;
990 goto rarok;
992 case SIOCGIFDSTADDR: /* Get the destination address */
993 sin->sin_addr.s_addr = ifa->ifa_address;
994 goto rarok;
996 case SIOCGIFNETMASK: /* Get the netmask for the interface */
997 sin->sin_addr.s_addr = ifa->ifa_mask;
998 goto rarok;
1000 case SIOCSIFFLAGS:
1001 if (colon) {
1002 ret = -EADDRNOTAVAIL;
1003 if (!ifa)
1004 break;
1005 ret = 0;
1006 if (!(ifr.ifr_flags & IFF_UP))
1007 inet_del_ifa(in_dev, ifap, 1);
1008 break;
1010 ret = dev_change_flags(dev, ifr.ifr_flags);
1011 break;
1013 case SIOCSIFADDR: /* Set interface address (and family) */
1014 ret = -EINVAL;
1015 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1016 break;
1018 if (!ifa) {
1019 ret = -ENOBUFS;
1020 ifa = inet_alloc_ifa();
1021 if (!ifa)
1022 break;
1023 INIT_HLIST_NODE(&ifa->hash);
1024 if (colon)
1025 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1026 else
1027 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1028 } else {
1029 ret = 0;
1030 if (ifa->ifa_local == sin->sin_addr.s_addr)
1031 break;
1032 inet_del_ifa(in_dev, ifap, 0);
1033 ifa->ifa_broadcast = 0;
1034 ifa->ifa_scope = 0;
1037 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1039 if (!(dev->flags & IFF_POINTOPOINT)) {
1040 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1041 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1042 if ((dev->flags & IFF_BROADCAST) &&
1043 ifa->ifa_prefixlen < 31)
1044 ifa->ifa_broadcast = ifa->ifa_address |
1045 ~ifa->ifa_mask;
1046 } else {
1047 ifa->ifa_prefixlen = 32;
1048 ifa->ifa_mask = inet_make_mask(32);
1050 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1051 ret = inet_set_ifa(dev, ifa);
1052 break;
1054 case SIOCSIFBRDADDR: /* Set the broadcast address */
1055 ret = 0;
1056 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1057 inet_del_ifa(in_dev, ifap, 0);
1058 ifa->ifa_broadcast = sin->sin_addr.s_addr;
1059 inet_insert_ifa(ifa);
1061 break;
1063 case SIOCSIFDSTADDR: /* Set the destination address */
1064 ret = 0;
1065 if (ifa->ifa_address == sin->sin_addr.s_addr)
1066 break;
1067 ret = -EINVAL;
1068 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1069 break;
1070 ret = 0;
1071 inet_del_ifa(in_dev, ifap, 0);
1072 ifa->ifa_address = sin->sin_addr.s_addr;
1073 inet_insert_ifa(ifa);
1074 break;
1076 case SIOCSIFNETMASK: /* Set the netmask for the interface */
1079 * The mask we set must be legal.
1081 ret = -EINVAL;
1082 if (bad_mask(sin->sin_addr.s_addr, 0))
1083 break;
1084 ret = 0;
1085 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1086 __be32 old_mask = ifa->ifa_mask;
1087 inet_del_ifa(in_dev, ifap, 0);
1088 ifa->ifa_mask = sin->sin_addr.s_addr;
1089 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1091 /* See if current broadcast address matches
1092 * with current netmask, then recalculate
1093 * the broadcast address. Otherwise it's a
1094 * funny address, so don't touch it since
1095 * the user seems to know what (s)he's doing...
1097 if ((dev->flags & IFF_BROADCAST) &&
1098 (ifa->ifa_prefixlen < 31) &&
1099 (ifa->ifa_broadcast ==
1100 (ifa->ifa_local|~old_mask))) {
1101 ifa->ifa_broadcast = (ifa->ifa_local |
1102 ~sin->sin_addr.s_addr);
1104 inet_insert_ifa(ifa);
1106 break;
1108 done:
1109 rtnl_unlock();
1110 out:
1111 return ret;
1112 rarok:
1113 rtnl_unlock();
1114 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1115 goto out;
1118 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1120 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1121 struct in_ifaddr *ifa;
1122 struct ifreq ifr;
1123 int done = 0;
1125 if (!in_dev)
1126 goto out;
1128 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1129 if (!buf) {
1130 done += sizeof(ifr);
1131 continue;
1133 if (len < (int) sizeof(ifr))
1134 break;
1135 memset(&ifr, 0, sizeof(struct ifreq));
1136 strcpy(ifr.ifr_name, ifa->ifa_label);
1138 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1139 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1140 ifa->ifa_local;
1142 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1143 done = -EFAULT;
1144 break;
1146 buf += sizeof(struct ifreq);
1147 len -= sizeof(struct ifreq);
1148 done += sizeof(struct ifreq);
1150 out:
1151 return done;
1154 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1156 __be32 addr = 0;
1157 struct in_device *in_dev;
1158 struct net *net = dev_net(dev);
1160 rcu_read_lock();
1161 in_dev = __in_dev_get_rcu(dev);
1162 if (!in_dev)
1163 goto no_in_dev;
1165 for_primary_ifa(in_dev) {
1166 if (ifa->ifa_scope > scope)
1167 continue;
1168 if (!dst || inet_ifa_match(dst, ifa)) {
1169 addr = ifa->ifa_local;
1170 break;
1172 if (!addr)
1173 addr = ifa->ifa_local;
1174 } endfor_ifa(in_dev);
1176 if (addr)
1177 goto out_unlock;
1178 no_in_dev:
1180 /* Not loopback addresses on loopback should be preferred
1181 in this case. It is importnat that lo is the first interface
1182 in dev_base list.
1184 for_each_netdev_rcu(net, dev) {
1185 in_dev = __in_dev_get_rcu(dev);
1186 if (!in_dev)
1187 continue;
1189 for_primary_ifa(in_dev) {
1190 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1191 ifa->ifa_scope <= scope) {
1192 addr = ifa->ifa_local;
1193 goto out_unlock;
1195 } endfor_ifa(in_dev);
1197 out_unlock:
1198 rcu_read_unlock();
1199 return addr;
1201 EXPORT_SYMBOL(inet_select_addr);
1203 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1204 __be32 local, int scope)
1206 int same = 0;
1207 __be32 addr = 0;
1209 for_ifa(in_dev) {
1210 if (!addr &&
1211 (local == ifa->ifa_local || !local) &&
1212 ifa->ifa_scope <= scope) {
1213 addr = ifa->ifa_local;
1214 if (same)
1215 break;
1217 if (!same) {
1218 same = (!local || inet_ifa_match(local, ifa)) &&
1219 (!dst || inet_ifa_match(dst, ifa));
1220 if (same && addr) {
1221 if (local || !dst)
1222 break;
1223 /* Is the selected addr into dst subnet? */
1224 if (inet_ifa_match(addr, ifa))
1225 break;
1226 /* No, then can we use new local src? */
1227 if (ifa->ifa_scope <= scope) {
1228 addr = ifa->ifa_local;
1229 break;
1231 /* search for large dst subnet for addr */
1232 same = 0;
1235 } endfor_ifa(in_dev);
1237 return same ? addr : 0;
1241 * Confirm that local IP address exists using wildcards:
1242 * - net: netns to check, cannot be NULL
1243 * - in_dev: only on this interface, NULL=any interface
1244 * - dst: only in the same subnet as dst, 0=any dst
1245 * - local: address, 0=autoselect the local address
1246 * - scope: maximum allowed scope value for the local address
1248 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1249 __be32 dst, __be32 local, int scope)
1251 __be32 addr = 0;
1252 struct net_device *dev;
1254 if (in_dev != NULL)
1255 return confirm_addr_indev(in_dev, dst, local, scope);
1257 rcu_read_lock();
1258 for_each_netdev_rcu(net, dev) {
1259 in_dev = __in_dev_get_rcu(dev);
1260 if (in_dev) {
1261 addr = confirm_addr_indev(in_dev, dst, local, scope);
1262 if (addr)
1263 break;
1266 rcu_read_unlock();
1268 return addr;
1270 EXPORT_SYMBOL(inet_confirm_addr);
1273 * Device notifier
1276 int register_inetaddr_notifier(struct notifier_block *nb)
1278 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1280 EXPORT_SYMBOL(register_inetaddr_notifier);
1282 int unregister_inetaddr_notifier(struct notifier_block *nb)
1284 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1286 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1288 /* Rename ifa_labels for a device name change. Make some effort to preserve
1289 * existing alias numbering and to create unique labels if possible.
1291 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1293 struct in_ifaddr *ifa;
1294 int named = 0;
1296 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1297 char old[IFNAMSIZ], *dot;
1299 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1300 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1301 if (named++ == 0)
1302 goto skip;
1303 dot = strchr(old, ':');
1304 if (dot == NULL) {
1305 sprintf(old, ":%d", named);
1306 dot = old;
1308 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1309 strcat(ifa->ifa_label, dot);
1310 else
1311 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1312 skip:
1313 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1317 static bool inetdev_valid_mtu(unsigned int mtu)
1319 return mtu >= 68;
1322 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1323 struct in_device *in_dev)
1326 struct in_ifaddr *ifa;
1328 for (ifa = in_dev->ifa_list; ifa;
1329 ifa = ifa->ifa_next) {
1330 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1331 ifa->ifa_local, dev,
1332 ifa->ifa_local, NULL,
1333 dev->dev_addr, NULL);
1337 /* Called only under RTNL semaphore */
1339 static int inetdev_event(struct notifier_block *this, unsigned long event,
1340 void *ptr)
1342 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1343 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1345 ASSERT_RTNL();
1347 if (!in_dev) {
1348 if (event == NETDEV_REGISTER) {
1349 in_dev = inetdev_init(dev);
1350 if (!in_dev)
1351 return notifier_from_errno(-ENOMEM);
1352 if (dev->flags & IFF_LOOPBACK) {
1353 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1354 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1356 } else if (event == NETDEV_CHANGEMTU) {
1357 /* Re-enabling IP */
1358 if (inetdev_valid_mtu(dev->mtu))
1359 in_dev = inetdev_init(dev);
1361 goto out;
1364 switch (event) {
1365 case NETDEV_REGISTER:
1366 pr_debug("%s: bug\n", __func__);
1367 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1368 break;
1369 case NETDEV_UP:
1370 if (!inetdev_valid_mtu(dev->mtu))
1371 break;
1372 if (dev->flags & IFF_LOOPBACK) {
1373 struct in_ifaddr *ifa = inet_alloc_ifa();
1375 if (ifa) {
1376 INIT_HLIST_NODE(&ifa->hash);
1377 ifa->ifa_local =
1378 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1379 ifa->ifa_prefixlen = 8;
1380 ifa->ifa_mask = inet_make_mask(8);
1381 in_dev_hold(in_dev);
1382 ifa->ifa_dev = in_dev;
1383 ifa->ifa_scope = RT_SCOPE_HOST;
1384 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1385 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1386 INFINITY_LIFE_TIME);
1387 ipv4_devconf_setall(in_dev);
1388 neigh_parms_data_state_setall(in_dev->arp_parms);
1389 inet_insert_ifa(ifa);
1392 ip_mc_up(in_dev);
1393 /* fall through */
1394 case NETDEV_CHANGEADDR:
1395 if (!IN_DEV_ARP_NOTIFY(in_dev))
1396 break;
1397 /* fall through */
1398 case NETDEV_NOTIFY_PEERS:
1399 /* Send gratuitous ARP to notify of link change */
1400 inetdev_send_gratuitous_arp(dev, in_dev);
1401 break;
1402 case NETDEV_DOWN:
1403 ip_mc_down(in_dev);
1404 break;
1405 case NETDEV_PRE_TYPE_CHANGE:
1406 ip_mc_unmap(in_dev);
1407 break;
1408 case NETDEV_POST_TYPE_CHANGE:
1409 ip_mc_remap(in_dev);
1410 break;
1411 case NETDEV_CHANGEMTU:
1412 if (inetdev_valid_mtu(dev->mtu))
1413 break;
1414 /* disable IP when MTU is not enough */
1415 case NETDEV_UNREGISTER:
1416 inetdev_destroy(in_dev);
1417 break;
1418 case NETDEV_CHANGENAME:
1419 /* Do not notify about label change, this event is
1420 * not interesting to applications using netlink.
1422 inetdev_changename(dev, in_dev);
1424 devinet_sysctl_unregister(in_dev);
1425 devinet_sysctl_register(in_dev);
1426 break;
1428 out:
1429 return NOTIFY_DONE;
1432 static struct notifier_block ip_netdev_notifier = {
1433 .notifier_call = inetdev_event,
1436 static size_t inet_nlmsg_size(void)
1438 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1439 + nla_total_size(4) /* IFA_ADDRESS */
1440 + nla_total_size(4) /* IFA_LOCAL */
1441 + nla_total_size(4) /* IFA_BROADCAST */
1442 + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1443 + nla_total_size(4) /* IFA_FLAGS */
1444 + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1447 static inline u32 cstamp_delta(unsigned long cstamp)
1449 return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1452 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1453 unsigned long tstamp, u32 preferred, u32 valid)
1455 struct ifa_cacheinfo ci;
1457 ci.cstamp = cstamp_delta(cstamp);
1458 ci.tstamp = cstamp_delta(tstamp);
1459 ci.ifa_prefered = preferred;
1460 ci.ifa_valid = valid;
1462 return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1465 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1466 u32 portid, u32 seq, int event, unsigned int flags)
1468 struct ifaddrmsg *ifm;
1469 struct nlmsghdr *nlh;
1470 u32 preferred, valid;
1472 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1473 if (nlh == NULL)
1474 return -EMSGSIZE;
1476 ifm = nlmsg_data(nlh);
1477 ifm->ifa_family = AF_INET;
1478 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1479 ifm->ifa_flags = ifa->ifa_flags;
1480 ifm->ifa_scope = ifa->ifa_scope;
1481 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1483 if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1484 preferred = ifa->ifa_preferred_lft;
1485 valid = ifa->ifa_valid_lft;
1486 if (preferred != INFINITY_LIFE_TIME) {
1487 long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1489 if (preferred > tval)
1490 preferred -= tval;
1491 else
1492 preferred = 0;
1493 if (valid != INFINITY_LIFE_TIME) {
1494 if (valid > tval)
1495 valid -= tval;
1496 else
1497 valid = 0;
1500 } else {
1501 preferred = INFINITY_LIFE_TIME;
1502 valid = INFINITY_LIFE_TIME;
1504 if ((ifa->ifa_address &&
1505 nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1506 (ifa->ifa_local &&
1507 nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1508 (ifa->ifa_broadcast &&
1509 nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1510 (ifa->ifa_label[0] &&
1511 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1512 nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1513 put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1514 preferred, valid))
1515 goto nla_put_failure;
1517 return nlmsg_end(skb, nlh);
1519 nla_put_failure:
1520 nlmsg_cancel(skb, nlh);
1521 return -EMSGSIZE;
1524 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1526 struct net *net = sock_net(skb->sk);
1527 int h, s_h;
1528 int idx, s_idx;
1529 int ip_idx, s_ip_idx;
1530 struct net_device *dev;
1531 struct in_device *in_dev;
1532 struct in_ifaddr *ifa;
1533 struct hlist_head *head;
1535 s_h = cb->args[0];
1536 s_idx = idx = cb->args[1];
1537 s_ip_idx = ip_idx = cb->args[2];
1539 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1540 idx = 0;
1541 head = &net->dev_index_head[h];
1542 rcu_read_lock();
1543 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1544 net->dev_base_seq;
1545 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1546 if (idx < s_idx)
1547 goto cont;
1548 if (h > s_h || idx > s_idx)
1549 s_ip_idx = 0;
1550 in_dev = __in_dev_get_rcu(dev);
1551 if (!in_dev)
1552 goto cont;
1554 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1555 ifa = ifa->ifa_next, ip_idx++) {
1556 if (ip_idx < s_ip_idx)
1557 continue;
1558 if (inet_fill_ifaddr(skb, ifa,
1559 NETLINK_CB(cb->skb).portid,
1560 cb->nlh->nlmsg_seq,
1561 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1562 rcu_read_unlock();
1563 goto done;
1565 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1567 cont:
1568 idx++;
1570 rcu_read_unlock();
1573 done:
1574 cb->args[0] = h;
1575 cb->args[1] = idx;
1576 cb->args[2] = ip_idx;
1578 return skb->len;
1581 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1582 u32 portid)
1584 struct sk_buff *skb;
1585 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1586 int err = -ENOBUFS;
1587 struct net *net;
1589 net = dev_net(ifa->ifa_dev->dev);
1590 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1591 if (skb == NULL)
1592 goto errout;
1594 err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1595 if (err < 0) {
1596 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1597 WARN_ON(err == -EMSGSIZE);
1598 kfree_skb(skb);
1599 goto errout;
1601 rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1602 return;
1603 errout:
1604 if (err < 0)
1605 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1608 static size_t inet_get_link_af_size(const struct net_device *dev)
1610 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1612 if (!in_dev)
1613 return 0;
1615 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1618 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1620 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1621 struct nlattr *nla;
1622 int i;
1624 if (!in_dev)
1625 return -ENODATA;
1627 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1628 if (nla == NULL)
1629 return -EMSGSIZE;
1631 for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1632 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1634 return 0;
1637 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1638 [IFLA_INET_CONF] = { .type = NLA_NESTED },
1641 static int inet_validate_link_af(const struct net_device *dev,
1642 const struct nlattr *nla)
1644 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1645 int err, rem;
1647 if (dev && !__in_dev_get_rtnl(dev))
1648 return -EAFNOSUPPORT;
1650 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1651 if (err < 0)
1652 return err;
1654 if (tb[IFLA_INET_CONF]) {
1655 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1656 int cfgid = nla_type(a);
1658 if (nla_len(a) < 4)
1659 return -EINVAL;
1661 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1662 return -EINVAL;
1666 return 0;
1669 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1671 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1672 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1673 int rem;
1675 if (!in_dev)
1676 return -EAFNOSUPPORT;
1678 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1679 BUG();
1681 if (tb[IFLA_INET_CONF]) {
1682 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1683 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1686 return 0;
1689 static int inet_netconf_msgsize_devconf(int type)
1691 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1692 + nla_total_size(4); /* NETCONFA_IFINDEX */
1694 /* type -1 is used for ALL */
1695 if (type == -1 || type == NETCONFA_FORWARDING)
1696 size += nla_total_size(4);
1697 if (type == -1 || type == NETCONFA_RP_FILTER)
1698 size += nla_total_size(4);
1699 if (type == -1 || type == NETCONFA_MC_FORWARDING)
1700 size += nla_total_size(4);
1701 if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1702 size += nla_total_size(4);
1704 return size;
1707 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1708 struct ipv4_devconf *devconf, u32 portid,
1709 u32 seq, int event, unsigned int flags,
1710 int type)
1712 struct nlmsghdr *nlh;
1713 struct netconfmsg *ncm;
1715 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1716 flags);
1717 if (nlh == NULL)
1718 return -EMSGSIZE;
1720 ncm = nlmsg_data(nlh);
1721 ncm->ncm_family = AF_INET;
1723 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1724 goto nla_put_failure;
1726 /* type -1 is used for ALL */
1727 if ((type == -1 || type == NETCONFA_FORWARDING) &&
1728 nla_put_s32(skb, NETCONFA_FORWARDING,
1729 IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1730 goto nla_put_failure;
1731 if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1732 nla_put_s32(skb, NETCONFA_RP_FILTER,
1733 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1734 goto nla_put_failure;
1735 if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1736 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1737 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1738 goto nla_put_failure;
1739 if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1740 nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1741 IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1742 goto nla_put_failure;
1744 return nlmsg_end(skb, nlh);
1746 nla_put_failure:
1747 nlmsg_cancel(skb, nlh);
1748 return -EMSGSIZE;
1751 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1752 struct ipv4_devconf *devconf)
1754 struct sk_buff *skb;
1755 int err = -ENOBUFS;
1757 skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1758 if (skb == NULL)
1759 goto errout;
1761 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1762 RTM_NEWNETCONF, 0, type);
1763 if (err < 0) {
1764 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1765 WARN_ON(err == -EMSGSIZE);
1766 kfree_skb(skb);
1767 goto errout;
1769 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1770 return;
1771 errout:
1772 if (err < 0)
1773 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1776 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1777 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
1778 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
1779 [NETCONFA_RP_FILTER] = { .len = sizeof(int) },
1780 [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) },
1783 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1784 struct nlmsghdr *nlh)
1786 struct net *net = sock_net(in_skb->sk);
1787 struct nlattr *tb[NETCONFA_MAX+1];
1788 struct netconfmsg *ncm;
1789 struct sk_buff *skb;
1790 struct ipv4_devconf *devconf;
1791 struct in_device *in_dev;
1792 struct net_device *dev;
1793 int ifindex;
1794 int err;
1796 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1797 devconf_ipv4_policy);
1798 if (err < 0)
1799 goto errout;
1801 err = EINVAL;
1802 if (!tb[NETCONFA_IFINDEX])
1803 goto errout;
1805 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1806 switch (ifindex) {
1807 case NETCONFA_IFINDEX_ALL:
1808 devconf = net->ipv4.devconf_all;
1809 break;
1810 case NETCONFA_IFINDEX_DEFAULT:
1811 devconf = net->ipv4.devconf_dflt;
1812 break;
1813 default:
1814 dev = __dev_get_by_index(net, ifindex);
1815 if (dev == NULL)
1816 goto errout;
1817 in_dev = __in_dev_get_rtnl(dev);
1818 if (in_dev == NULL)
1819 goto errout;
1820 devconf = &in_dev->cnf;
1821 break;
1824 err = -ENOBUFS;
1825 skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1826 if (skb == NULL)
1827 goto errout;
1829 err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1830 NETLINK_CB(in_skb).portid,
1831 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1832 -1);
1833 if (err < 0) {
1834 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1835 WARN_ON(err == -EMSGSIZE);
1836 kfree_skb(skb);
1837 goto errout;
1839 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1840 errout:
1841 return err;
1844 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1845 struct netlink_callback *cb)
1847 struct net *net = sock_net(skb->sk);
1848 int h, s_h;
1849 int idx, s_idx;
1850 struct net_device *dev;
1851 struct in_device *in_dev;
1852 struct hlist_head *head;
1854 s_h = cb->args[0];
1855 s_idx = idx = cb->args[1];
1857 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1858 idx = 0;
1859 head = &net->dev_index_head[h];
1860 rcu_read_lock();
1861 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1862 net->dev_base_seq;
1863 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1864 if (idx < s_idx)
1865 goto cont;
1866 in_dev = __in_dev_get_rcu(dev);
1867 if (!in_dev)
1868 goto cont;
1870 if (inet_netconf_fill_devconf(skb, dev->ifindex,
1871 &in_dev->cnf,
1872 NETLINK_CB(cb->skb).portid,
1873 cb->nlh->nlmsg_seq,
1874 RTM_NEWNETCONF,
1875 NLM_F_MULTI,
1876 -1) <= 0) {
1877 rcu_read_unlock();
1878 goto done;
1880 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1881 cont:
1882 idx++;
1884 rcu_read_unlock();
1886 if (h == NETDEV_HASHENTRIES) {
1887 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1888 net->ipv4.devconf_all,
1889 NETLINK_CB(cb->skb).portid,
1890 cb->nlh->nlmsg_seq,
1891 RTM_NEWNETCONF, NLM_F_MULTI,
1892 -1) <= 0)
1893 goto done;
1894 else
1895 h++;
1897 if (h == NETDEV_HASHENTRIES + 1) {
1898 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1899 net->ipv4.devconf_dflt,
1900 NETLINK_CB(cb->skb).portid,
1901 cb->nlh->nlmsg_seq,
1902 RTM_NEWNETCONF, NLM_F_MULTI,
1903 -1) <= 0)
1904 goto done;
1905 else
1906 h++;
1908 done:
1909 cb->args[0] = h;
1910 cb->args[1] = idx;
1912 return skb->len;
1915 #ifdef CONFIG_SYSCTL
1917 static void devinet_copy_dflt_conf(struct net *net, int i)
1919 struct net_device *dev;
1921 rcu_read_lock();
1922 for_each_netdev_rcu(net, dev) {
1923 struct in_device *in_dev;
1925 in_dev = __in_dev_get_rcu(dev);
1926 if (in_dev && !test_bit(i, in_dev->cnf.state))
1927 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1929 rcu_read_unlock();
1932 /* called with RTNL locked */
1933 static void inet_forward_change(struct net *net)
1935 struct net_device *dev;
1936 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1938 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1939 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1940 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1941 NETCONFA_IFINDEX_ALL,
1942 net->ipv4.devconf_all);
1943 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1944 NETCONFA_IFINDEX_DEFAULT,
1945 net->ipv4.devconf_dflt);
1947 for_each_netdev(net, dev) {
1948 struct in_device *in_dev;
1949 if (on)
1950 dev_disable_lro(dev);
1951 rcu_read_lock();
1952 in_dev = __in_dev_get_rcu(dev);
1953 if (in_dev) {
1954 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1955 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1956 dev->ifindex, &in_dev->cnf);
1958 rcu_read_unlock();
1962 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
1964 if (cnf == net->ipv4.devconf_dflt)
1965 return NETCONFA_IFINDEX_DEFAULT;
1966 else if (cnf == net->ipv4.devconf_all)
1967 return NETCONFA_IFINDEX_ALL;
1968 else {
1969 struct in_device *idev
1970 = container_of(cnf, struct in_device, cnf);
1971 return idev->dev->ifindex;
1975 static int devinet_conf_proc(struct ctl_table *ctl, int write,
1976 void __user *buffer,
1977 size_t *lenp, loff_t *ppos)
1979 int old_value = *(int *)ctl->data;
1980 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1981 int new_value = *(int *)ctl->data;
1983 if (write) {
1984 struct ipv4_devconf *cnf = ctl->extra1;
1985 struct net *net = ctl->extra2;
1986 int i = (int *)ctl->data - cnf->data;
1987 int ifindex;
1989 set_bit(i, cnf->state);
1991 if (cnf == net->ipv4.devconf_dflt)
1992 devinet_copy_dflt_conf(net, i);
1993 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1994 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1995 if ((new_value == 0) && (old_value != 0))
1996 rt_cache_flush(net);
1998 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1999 new_value != old_value) {
2000 ifindex = devinet_conf_ifindex(net, cnf);
2001 inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2002 ifindex, cnf);
2004 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2005 new_value != old_value) {
2006 ifindex = devinet_conf_ifindex(net, cnf);
2007 inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2008 ifindex, cnf);
2012 return ret;
2015 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2016 void __user *buffer,
2017 size_t *lenp, loff_t *ppos)
2019 int *valp = ctl->data;
2020 int val = *valp;
2021 loff_t pos = *ppos;
2022 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2024 if (write && *valp != val) {
2025 struct net *net = ctl->extra2;
2027 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2028 if (!rtnl_trylock()) {
2029 /* Restore the original values before restarting */
2030 *valp = val;
2031 *ppos = pos;
2032 return restart_syscall();
2034 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2035 inet_forward_change(net);
2036 } else {
2037 struct ipv4_devconf *cnf = ctl->extra1;
2038 struct in_device *idev =
2039 container_of(cnf, struct in_device, cnf);
2040 if (*valp)
2041 dev_disable_lro(idev->dev);
2042 inet_netconf_notify_devconf(net,
2043 NETCONFA_FORWARDING,
2044 idev->dev->ifindex,
2045 cnf);
2047 rtnl_unlock();
2048 rt_cache_flush(net);
2049 } else
2050 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2051 NETCONFA_IFINDEX_DEFAULT,
2052 net->ipv4.devconf_dflt);
2055 return ret;
2058 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2059 void __user *buffer,
2060 size_t *lenp, loff_t *ppos)
2062 int *valp = ctl->data;
2063 int val = *valp;
2064 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2065 struct net *net = ctl->extra2;
2067 if (write && *valp != val)
2068 rt_cache_flush(net);
2070 return ret;
2073 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2075 .procname = name, \
2076 .data = ipv4_devconf.data + \
2077 IPV4_DEVCONF_ ## attr - 1, \
2078 .maxlen = sizeof(int), \
2079 .mode = mval, \
2080 .proc_handler = proc, \
2081 .extra1 = &ipv4_devconf, \
2084 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2085 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2087 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2088 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2090 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2091 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2093 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2094 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2096 static struct devinet_sysctl_table {
2097 struct ctl_table_header *sysctl_header;
2098 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2099 } devinet_sysctl = {
2100 .devinet_vars = {
2101 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2102 devinet_sysctl_forward),
2103 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2105 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2106 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2107 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2108 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2109 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2110 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2111 "accept_source_route"),
2112 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2113 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2114 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2115 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2116 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2117 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2118 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2119 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2120 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2121 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2122 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2123 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2124 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2125 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2126 "force_igmp_version"),
2127 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2128 "igmpv2_unsolicited_report_interval"),
2129 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2130 "igmpv3_unsolicited_report_interval"),
2132 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2133 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2134 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2135 "promote_secondaries"),
2136 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2137 "route_localnet"),
2141 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2142 struct ipv4_devconf *p)
2144 int i;
2145 struct devinet_sysctl_table *t;
2146 char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2148 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2149 if (!t)
2150 goto out;
2152 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2153 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2154 t->devinet_vars[i].extra1 = p;
2155 t->devinet_vars[i].extra2 = net;
2158 snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2160 t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2161 if (!t->sysctl_header)
2162 goto free;
2164 p->sysctl = t;
2165 return 0;
2167 free:
2168 kfree(t);
2169 out:
2170 return -ENOBUFS;
2173 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2175 struct devinet_sysctl_table *t = cnf->sysctl;
2177 if (t == NULL)
2178 return;
2180 cnf->sysctl = NULL;
2181 unregister_net_sysctl_table(t->sysctl_header);
2182 kfree(t);
2185 static void devinet_sysctl_register(struct in_device *idev)
2187 neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2188 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2189 &idev->cnf);
2192 static void devinet_sysctl_unregister(struct in_device *idev)
2194 __devinet_sysctl_unregister(&idev->cnf);
2195 neigh_sysctl_unregister(idev->arp_parms);
2198 static struct ctl_table ctl_forward_entry[] = {
2200 .procname = "ip_forward",
2201 .data = &ipv4_devconf.data[
2202 IPV4_DEVCONF_FORWARDING - 1],
2203 .maxlen = sizeof(int),
2204 .mode = 0644,
2205 .proc_handler = devinet_sysctl_forward,
2206 .extra1 = &ipv4_devconf,
2207 .extra2 = &init_net,
2209 { },
2211 #endif
2213 static __net_init int devinet_init_net(struct net *net)
2215 int err;
2216 struct ipv4_devconf *all, *dflt;
2217 #ifdef CONFIG_SYSCTL
2218 struct ctl_table *tbl = ctl_forward_entry;
2219 struct ctl_table_header *forw_hdr;
2220 #endif
2222 err = -ENOMEM;
2223 all = &ipv4_devconf;
2224 dflt = &ipv4_devconf_dflt;
2226 if (!net_eq(net, &init_net)) {
2227 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2228 if (all == NULL)
2229 goto err_alloc_all;
2231 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2232 if (dflt == NULL)
2233 goto err_alloc_dflt;
2235 #ifdef CONFIG_SYSCTL
2236 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2237 if (tbl == NULL)
2238 goto err_alloc_ctl;
2240 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2241 tbl[0].extra1 = all;
2242 tbl[0].extra2 = net;
2243 #endif
2246 #ifdef CONFIG_SYSCTL
2247 err = __devinet_sysctl_register(net, "all", all);
2248 if (err < 0)
2249 goto err_reg_all;
2251 err = __devinet_sysctl_register(net, "default", dflt);
2252 if (err < 0)
2253 goto err_reg_dflt;
2255 err = -ENOMEM;
2256 forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2257 if (forw_hdr == NULL)
2258 goto err_reg_ctl;
2259 net->ipv4.forw_hdr = forw_hdr;
2260 #endif
2262 net->ipv4.devconf_all = all;
2263 net->ipv4.devconf_dflt = dflt;
2264 return 0;
2266 #ifdef CONFIG_SYSCTL
2267 err_reg_ctl:
2268 __devinet_sysctl_unregister(dflt);
2269 err_reg_dflt:
2270 __devinet_sysctl_unregister(all);
2271 err_reg_all:
2272 if (tbl != ctl_forward_entry)
2273 kfree(tbl);
2274 err_alloc_ctl:
2275 #endif
2276 if (dflt != &ipv4_devconf_dflt)
2277 kfree(dflt);
2278 err_alloc_dflt:
2279 if (all != &ipv4_devconf)
2280 kfree(all);
2281 err_alloc_all:
2282 return err;
2285 static __net_exit void devinet_exit_net(struct net *net)
2287 #ifdef CONFIG_SYSCTL
2288 struct ctl_table *tbl;
2290 tbl = net->ipv4.forw_hdr->ctl_table_arg;
2291 unregister_net_sysctl_table(net->ipv4.forw_hdr);
2292 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2293 __devinet_sysctl_unregister(net->ipv4.devconf_all);
2294 kfree(tbl);
2295 #endif
2296 kfree(net->ipv4.devconf_dflt);
2297 kfree(net->ipv4.devconf_all);
2300 static __net_initdata struct pernet_operations devinet_ops = {
2301 .init = devinet_init_net,
2302 .exit = devinet_exit_net,
2305 static struct rtnl_af_ops inet_af_ops = {
2306 .family = AF_INET,
2307 .fill_link_af = inet_fill_link_af,
2308 .get_link_af_size = inet_get_link_af_size,
2309 .validate_link_af = inet_validate_link_af,
2310 .set_link_af = inet_set_link_af,
2313 void __init devinet_init(void)
2315 int i;
2317 for (i = 0; i < IN4_ADDR_HSIZE; i++)
2318 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2320 register_pernet_subsys(&devinet_ops);
2322 register_gifconf(PF_INET, inet_gifconf);
2323 register_netdevice_notifier(&ip_netdev_notifier);
2325 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2327 rtnl_af_register(&inet_af_ops);
2329 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2330 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2331 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2332 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2333 inet_netconf_dump_devconf, NULL);