gro: Allow tunnel stacking in the case of FOU/GUE
[linux/fpc-iii.git] / net / ipv4 / devinet.c
bloba57056d87a432d67b90f8dffafd9e6995e6e76b7
1 /*
2 * NET3 IP device support routines.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Derived from the IP parts of dev.c 1.0.19
10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
14 * Additional Authors:
15 * Alan Cox, <gw4pts@gw4pts.ampr.org>
16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
18 * Changes:
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
20 * lists.
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
25 * if no match found.
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
68 #include "fib_lookup.h"
70 static struct ipv4_devconf ipv4_devconf = {
71 .data = {
72 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
81 static struct ipv4_devconf ipv4_devconf_dflt = {
82 .data = {
83 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97 [IFA_LOCAL] = { .type = NLA_U32 },
98 [IFA_ADDRESS] = { .type = NLA_U32 },
99 [IFA_BROADCAST] = { .type = NLA_U32 },
100 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
102 [IFA_FLAGS] = { .type = NLA_U32 },
105 #define IN4_ADDR_HSIZE_SHIFT 8
106 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
110 static u32 inet_addr_hash(const struct net *net, __be32 addr)
112 u32 val = (__force u32) addr ^ net_hash_mix(net);
114 return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
117 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
119 u32 hash = inet_addr_hash(net, ifa->ifa_local);
121 ASSERT_RTNL();
122 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
125 static void inet_hash_remove(struct in_ifaddr *ifa)
127 ASSERT_RTNL();
128 hlist_del_init_rcu(&ifa->hash);
132 * __ip_dev_find - find the first device with a given source address.
133 * @net: the net namespace
134 * @addr: the source address
135 * @devref: if true, take a reference on the found device
137 * If a caller uses devref=false, it should be protected by RCU, or RTNL
139 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
141 u32 hash = inet_addr_hash(net, addr);
142 struct net_device *result = NULL;
143 struct in_ifaddr *ifa;
145 rcu_read_lock();
146 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
147 if (ifa->ifa_local == addr) {
148 struct net_device *dev = ifa->ifa_dev->dev;
150 if (!net_eq(dev_net(dev), net))
151 continue;
152 result = dev;
153 break;
156 if (!result) {
157 struct flowi4 fl4 = { .daddr = addr };
158 struct fib_result res = { 0 };
159 struct fib_table *local;
161 /* Fallback to FIB local table so that communication
162 * over loopback subnets work.
164 local = fib_get_table(net, RT_TABLE_LOCAL);
165 if (local &&
166 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167 res.type == RTN_LOCAL)
168 result = FIB_RES_DEV(res);
170 if (result && devref)
171 dev_hold(result);
172 rcu_read_unlock();
173 return result;
175 EXPORT_SYMBOL(__ip_dev_find);
177 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
179 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
180 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181 int destroy);
182 #ifdef CONFIG_SYSCTL
183 static int devinet_sysctl_register(struct in_device *idev);
184 static void devinet_sysctl_unregister(struct in_device *idev);
185 #else
186 static int devinet_sysctl_register(struct in_device *idev)
188 return 0;
190 static void devinet_sysctl_unregister(struct in_device *idev)
193 #endif
195 /* Locks all the inet devices. */
197 static struct in_ifaddr *inet_alloc_ifa(void)
199 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
202 static void inet_rcu_free_ifa(struct rcu_head *head)
204 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
205 if (ifa->ifa_dev)
206 in_dev_put(ifa->ifa_dev);
207 kfree(ifa);
210 static void inet_free_ifa(struct in_ifaddr *ifa)
212 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
215 void in_dev_finish_destroy(struct in_device *idev)
217 struct net_device *dev = idev->dev;
219 WARN_ON(idev->ifa_list);
220 WARN_ON(idev->mc_list);
221 kfree(rcu_dereference_protected(idev->mc_hash, 1));
222 #ifdef NET_REFCNT_DEBUG
223 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
224 #endif
225 dev_put(dev);
226 if (!idev->dead)
227 pr_err("Freeing alive in_device %p\n", idev);
228 else
229 kfree(idev);
231 EXPORT_SYMBOL(in_dev_finish_destroy);
233 static struct in_device *inetdev_init(struct net_device *dev)
235 struct in_device *in_dev;
236 int err = -ENOMEM;
238 ASSERT_RTNL();
240 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
241 if (!in_dev)
242 goto out;
243 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
244 sizeof(in_dev->cnf));
245 in_dev->cnf.sysctl = NULL;
246 in_dev->dev = dev;
247 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
248 if (!in_dev->arp_parms)
249 goto out_kfree;
250 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
251 dev_disable_lro(dev);
252 /* Reference in_dev->dev */
253 dev_hold(dev);
254 /* Account for reference dev->ip_ptr (below) */
255 in_dev_hold(in_dev);
257 err = devinet_sysctl_register(in_dev);
258 if (err) {
259 in_dev->dead = 1;
260 in_dev_put(in_dev);
261 in_dev = NULL;
262 goto out;
264 ip_mc_init_dev(in_dev);
265 if (dev->flags & IFF_UP)
266 ip_mc_up(in_dev);
268 /* we can receive as soon as ip_ptr is set -- do this last */
269 rcu_assign_pointer(dev->ip_ptr, in_dev);
270 out:
271 return in_dev ?: ERR_PTR(err);
272 out_kfree:
273 kfree(in_dev);
274 in_dev = NULL;
275 goto out;
278 static void in_dev_rcu_put(struct rcu_head *head)
280 struct in_device *idev = container_of(head, struct in_device, rcu_head);
281 in_dev_put(idev);
284 static void inetdev_destroy(struct in_device *in_dev)
286 struct in_ifaddr *ifa;
287 struct net_device *dev;
289 ASSERT_RTNL();
291 dev = in_dev->dev;
293 in_dev->dead = 1;
295 ip_mc_destroy_dev(in_dev);
297 while ((ifa = in_dev->ifa_list) != NULL) {
298 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
299 inet_free_ifa(ifa);
302 RCU_INIT_POINTER(dev->ip_ptr, NULL);
304 devinet_sysctl_unregister(in_dev);
305 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
306 arp_ifdown(dev);
308 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
311 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
313 rcu_read_lock();
314 for_primary_ifa(in_dev) {
315 if (inet_ifa_match(a, ifa)) {
316 if (!b || inet_ifa_match(b, ifa)) {
317 rcu_read_unlock();
318 return 1;
321 } endfor_ifa(in_dev);
322 rcu_read_unlock();
323 return 0;
326 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
327 int destroy, struct nlmsghdr *nlh, u32 portid)
329 struct in_ifaddr *promote = NULL;
330 struct in_ifaddr *ifa, *ifa1 = *ifap;
331 struct in_ifaddr *last_prim = in_dev->ifa_list;
332 struct in_ifaddr *prev_prom = NULL;
333 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
335 ASSERT_RTNL();
337 if (in_dev->dead)
338 goto no_promotions;
340 /* 1. Deleting primary ifaddr forces deletion all secondaries
341 * unless alias promotion is set
344 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
345 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
347 while ((ifa = *ifap1) != NULL) {
348 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
349 ifa1->ifa_scope <= ifa->ifa_scope)
350 last_prim = ifa;
352 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
353 ifa1->ifa_mask != ifa->ifa_mask ||
354 !inet_ifa_match(ifa1->ifa_address, ifa)) {
355 ifap1 = &ifa->ifa_next;
356 prev_prom = ifa;
357 continue;
360 if (!do_promote) {
361 inet_hash_remove(ifa);
362 *ifap1 = ifa->ifa_next;
364 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
365 blocking_notifier_call_chain(&inetaddr_chain,
366 NETDEV_DOWN, ifa);
367 inet_free_ifa(ifa);
368 } else {
369 promote = ifa;
370 break;
375 /* On promotion all secondaries from subnet are changing
376 * the primary IP, we must remove all their routes silently
377 * and later to add them back with new prefsrc. Do this
378 * while all addresses are on the device list.
380 for (ifa = promote; ifa; ifa = ifa->ifa_next) {
381 if (ifa1->ifa_mask == ifa->ifa_mask &&
382 inet_ifa_match(ifa1->ifa_address, ifa))
383 fib_del_ifaddr(ifa, ifa1);
386 no_promotions:
387 /* 2. Unlink it */
389 *ifap = ifa1->ifa_next;
390 inet_hash_remove(ifa1);
392 /* 3. Announce address deletion */
394 /* Send message first, then call notifier.
395 At first sight, FIB update triggered by notifier
396 will refer to already deleted ifaddr, that could confuse
397 netlink listeners. It is not true: look, gated sees
398 that route deleted and if it still thinks that ifaddr
399 is valid, it will try to restore deleted routes... Grr.
400 So that, this order is correct.
402 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
403 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
405 if (promote) {
406 struct in_ifaddr *next_sec = promote->ifa_next;
408 if (prev_prom) {
409 prev_prom->ifa_next = promote->ifa_next;
410 promote->ifa_next = last_prim->ifa_next;
411 last_prim->ifa_next = promote;
414 promote->ifa_flags &= ~IFA_F_SECONDARY;
415 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
416 blocking_notifier_call_chain(&inetaddr_chain,
417 NETDEV_UP, promote);
418 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
419 if (ifa1->ifa_mask != ifa->ifa_mask ||
420 !inet_ifa_match(ifa1->ifa_address, ifa))
421 continue;
422 fib_add_ifaddr(ifa);
426 if (destroy)
427 inet_free_ifa(ifa1);
430 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
431 int destroy)
433 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
436 static void check_lifetime(struct work_struct *work);
438 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
440 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
441 u32 portid)
443 struct in_device *in_dev = ifa->ifa_dev;
444 struct in_ifaddr *ifa1, **ifap, **last_primary;
446 ASSERT_RTNL();
448 if (!ifa->ifa_local) {
449 inet_free_ifa(ifa);
450 return 0;
453 ifa->ifa_flags &= ~IFA_F_SECONDARY;
454 last_primary = &in_dev->ifa_list;
456 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
457 ifap = &ifa1->ifa_next) {
458 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
459 ifa->ifa_scope <= ifa1->ifa_scope)
460 last_primary = &ifa1->ifa_next;
461 if (ifa1->ifa_mask == ifa->ifa_mask &&
462 inet_ifa_match(ifa1->ifa_address, ifa)) {
463 if (ifa1->ifa_local == ifa->ifa_local) {
464 inet_free_ifa(ifa);
465 return -EEXIST;
467 if (ifa1->ifa_scope != ifa->ifa_scope) {
468 inet_free_ifa(ifa);
469 return -EINVAL;
471 ifa->ifa_flags |= IFA_F_SECONDARY;
475 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
476 prandom_seed((__force u32) ifa->ifa_local);
477 ifap = last_primary;
480 ifa->ifa_next = *ifap;
481 *ifap = ifa;
483 inet_hash_insert(dev_net(in_dev->dev), ifa);
485 cancel_delayed_work(&check_lifetime_work);
486 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
488 /* Send message first, then call notifier.
489 Notifier will trigger FIB update, so that
490 listeners of netlink will know about new ifaddr */
491 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
492 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
494 return 0;
497 static int inet_insert_ifa(struct in_ifaddr *ifa)
499 return __inet_insert_ifa(ifa, NULL, 0);
502 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
504 struct in_device *in_dev = __in_dev_get_rtnl(dev);
506 ASSERT_RTNL();
508 if (!in_dev) {
509 inet_free_ifa(ifa);
510 return -ENOBUFS;
512 ipv4_devconf_setall(in_dev);
513 neigh_parms_data_state_setall(in_dev->arp_parms);
514 if (ifa->ifa_dev != in_dev) {
515 WARN_ON(ifa->ifa_dev);
516 in_dev_hold(in_dev);
517 ifa->ifa_dev = in_dev;
519 if (ipv4_is_loopback(ifa->ifa_local))
520 ifa->ifa_scope = RT_SCOPE_HOST;
521 return inet_insert_ifa(ifa);
524 /* Caller must hold RCU or RTNL :
525 * We dont take a reference on found in_device
527 struct in_device *inetdev_by_index(struct net *net, int ifindex)
529 struct net_device *dev;
530 struct in_device *in_dev = NULL;
532 rcu_read_lock();
533 dev = dev_get_by_index_rcu(net, ifindex);
534 if (dev)
535 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
536 rcu_read_unlock();
537 return in_dev;
539 EXPORT_SYMBOL(inetdev_by_index);
541 /* Called only from RTNL semaphored context. No locks. */
543 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
544 __be32 mask)
546 ASSERT_RTNL();
548 for_primary_ifa(in_dev) {
549 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
550 return ifa;
551 } endfor_ifa(in_dev);
552 return NULL;
555 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
557 struct ip_mreqn mreq = {
558 .imr_multiaddr.s_addr = ifa->ifa_address,
559 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
561 int ret;
563 ASSERT_RTNL();
565 lock_sock(sk);
566 if (join)
567 ret = ip_mc_join_group(sk, &mreq);
568 else
569 ret = ip_mc_leave_group(sk, &mreq);
570 release_sock(sk);
572 return ret;
575 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
577 struct net *net = sock_net(skb->sk);
578 struct nlattr *tb[IFA_MAX+1];
579 struct in_device *in_dev;
580 struct ifaddrmsg *ifm;
581 struct in_ifaddr *ifa, **ifap;
582 int err = -EINVAL;
584 ASSERT_RTNL();
586 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
587 if (err < 0)
588 goto errout;
590 ifm = nlmsg_data(nlh);
591 in_dev = inetdev_by_index(net, ifm->ifa_index);
592 if (!in_dev) {
593 err = -ENODEV;
594 goto errout;
597 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
598 ifap = &ifa->ifa_next) {
599 if (tb[IFA_LOCAL] &&
600 ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
601 continue;
603 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
604 continue;
606 if (tb[IFA_ADDRESS] &&
607 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
608 !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
609 continue;
611 if (ipv4_is_multicast(ifa->ifa_address))
612 ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
613 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
614 return 0;
617 err = -EADDRNOTAVAIL;
618 errout:
619 return err;
622 #define INFINITY_LIFE_TIME 0xFFFFFFFF
624 static void check_lifetime(struct work_struct *work)
626 unsigned long now, next, next_sec, next_sched;
627 struct in_ifaddr *ifa;
628 struct hlist_node *n;
629 int i;
631 now = jiffies;
632 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
634 for (i = 0; i < IN4_ADDR_HSIZE; i++) {
635 bool change_needed = false;
637 rcu_read_lock();
638 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
639 unsigned long age;
641 if (ifa->ifa_flags & IFA_F_PERMANENT)
642 continue;
644 /* We try to batch several events at once. */
645 age = (now - ifa->ifa_tstamp +
646 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
648 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
649 age >= ifa->ifa_valid_lft) {
650 change_needed = true;
651 } else if (ifa->ifa_preferred_lft ==
652 INFINITY_LIFE_TIME) {
653 continue;
654 } else if (age >= ifa->ifa_preferred_lft) {
655 if (time_before(ifa->ifa_tstamp +
656 ifa->ifa_valid_lft * HZ, next))
657 next = ifa->ifa_tstamp +
658 ifa->ifa_valid_lft * HZ;
660 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
661 change_needed = true;
662 } else if (time_before(ifa->ifa_tstamp +
663 ifa->ifa_preferred_lft * HZ,
664 next)) {
665 next = ifa->ifa_tstamp +
666 ifa->ifa_preferred_lft * HZ;
669 rcu_read_unlock();
670 if (!change_needed)
671 continue;
672 rtnl_lock();
673 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
674 unsigned long age;
676 if (ifa->ifa_flags & IFA_F_PERMANENT)
677 continue;
679 /* We try to batch several events at once. */
680 age = (now - ifa->ifa_tstamp +
681 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
683 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
684 age >= ifa->ifa_valid_lft) {
685 struct in_ifaddr **ifap;
687 for (ifap = &ifa->ifa_dev->ifa_list;
688 *ifap != NULL; ifap = &(*ifap)->ifa_next) {
689 if (*ifap == ifa) {
690 inet_del_ifa(ifa->ifa_dev,
691 ifap, 1);
692 break;
695 } else if (ifa->ifa_preferred_lft !=
696 INFINITY_LIFE_TIME &&
697 age >= ifa->ifa_preferred_lft &&
698 !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
699 ifa->ifa_flags |= IFA_F_DEPRECATED;
700 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
703 rtnl_unlock();
706 next_sec = round_jiffies_up(next);
707 next_sched = next;
709 /* If rounded timeout is accurate enough, accept it. */
710 if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
711 next_sched = next_sec;
713 now = jiffies;
714 /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
715 if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
716 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
718 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
719 next_sched - now);
722 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
723 __u32 prefered_lft)
725 unsigned long timeout;
727 ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
729 timeout = addrconf_timeout_fixup(valid_lft, HZ);
730 if (addrconf_finite_timeout(timeout))
731 ifa->ifa_valid_lft = timeout;
732 else
733 ifa->ifa_flags |= IFA_F_PERMANENT;
735 timeout = addrconf_timeout_fixup(prefered_lft, HZ);
736 if (addrconf_finite_timeout(timeout)) {
737 if (timeout == 0)
738 ifa->ifa_flags |= IFA_F_DEPRECATED;
739 ifa->ifa_preferred_lft = timeout;
741 ifa->ifa_tstamp = jiffies;
742 if (!ifa->ifa_cstamp)
743 ifa->ifa_cstamp = ifa->ifa_tstamp;
746 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
747 __u32 *pvalid_lft, __u32 *pprefered_lft)
749 struct nlattr *tb[IFA_MAX+1];
750 struct in_ifaddr *ifa;
751 struct ifaddrmsg *ifm;
752 struct net_device *dev;
753 struct in_device *in_dev;
754 int err;
756 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
757 if (err < 0)
758 goto errout;
760 ifm = nlmsg_data(nlh);
761 err = -EINVAL;
762 if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
763 goto errout;
765 dev = __dev_get_by_index(net, ifm->ifa_index);
766 err = -ENODEV;
767 if (!dev)
768 goto errout;
770 in_dev = __in_dev_get_rtnl(dev);
771 err = -ENOBUFS;
772 if (!in_dev)
773 goto errout;
775 ifa = inet_alloc_ifa();
776 if (!ifa)
778 * A potential indev allocation can be left alive, it stays
779 * assigned to its device and is destroy with it.
781 goto errout;
783 ipv4_devconf_setall(in_dev);
784 neigh_parms_data_state_setall(in_dev->arp_parms);
785 in_dev_hold(in_dev);
787 if (!tb[IFA_ADDRESS])
788 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
790 INIT_HLIST_NODE(&ifa->hash);
791 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
792 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
793 ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
794 ifm->ifa_flags;
795 ifa->ifa_scope = ifm->ifa_scope;
796 ifa->ifa_dev = in_dev;
798 ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
799 ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
801 if (tb[IFA_BROADCAST])
802 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
804 if (tb[IFA_LABEL])
805 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
806 else
807 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
809 if (tb[IFA_CACHEINFO]) {
810 struct ifa_cacheinfo *ci;
812 ci = nla_data(tb[IFA_CACHEINFO]);
813 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
814 err = -EINVAL;
815 goto errout_free;
817 *pvalid_lft = ci->ifa_valid;
818 *pprefered_lft = ci->ifa_prefered;
821 return ifa;
823 errout_free:
824 inet_free_ifa(ifa);
825 errout:
826 return ERR_PTR(err);
829 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
831 struct in_device *in_dev = ifa->ifa_dev;
832 struct in_ifaddr *ifa1, **ifap;
834 if (!ifa->ifa_local)
835 return NULL;
837 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
838 ifap = &ifa1->ifa_next) {
839 if (ifa1->ifa_mask == ifa->ifa_mask &&
840 inet_ifa_match(ifa1->ifa_address, ifa) &&
841 ifa1->ifa_local == ifa->ifa_local)
842 return ifa1;
844 return NULL;
847 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
849 struct net *net = sock_net(skb->sk);
850 struct in_ifaddr *ifa;
851 struct in_ifaddr *ifa_existing;
852 __u32 valid_lft = INFINITY_LIFE_TIME;
853 __u32 prefered_lft = INFINITY_LIFE_TIME;
855 ASSERT_RTNL();
857 ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
858 if (IS_ERR(ifa))
859 return PTR_ERR(ifa);
861 ifa_existing = find_matching_ifa(ifa);
862 if (!ifa_existing) {
863 /* It would be best to check for !NLM_F_CREATE here but
864 * userspace already relies on not having to provide this.
866 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
867 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
868 int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
869 true, ifa);
871 if (ret < 0) {
872 inet_free_ifa(ifa);
873 return ret;
876 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
877 } else {
878 inet_free_ifa(ifa);
880 if (nlh->nlmsg_flags & NLM_F_EXCL ||
881 !(nlh->nlmsg_flags & NLM_F_REPLACE))
882 return -EEXIST;
883 ifa = ifa_existing;
884 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
885 cancel_delayed_work(&check_lifetime_work);
886 queue_delayed_work(system_power_efficient_wq,
887 &check_lifetime_work, 0);
888 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
889 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
891 return 0;
895 * Determine a default network mask, based on the IP address.
898 static int inet_abc_len(__be32 addr)
900 int rc = -1; /* Something else, probably a multicast. */
902 if (ipv4_is_zeronet(addr))
903 rc = 0;
904 else {
905 __u32 haddr = ntohl(addr);
907 if (IN_CLASSA(haddr))
908 rc = 8;
909 else if (IN_CLASSB(haddr))
910 rc = 16;
911 else if (IN_CLASSC(haddr))
912 rc = 24;
915 return rc;
919 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
921 struct ifreq ifr;
922 struct sockaddr_in sin_orig;
923 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
924 struct in_device *in_dev;
925 struct in_ifaddr **ifap = NULL;
926 struct in_ifaddr *ifa = NULL;
927 struct net_device *dev;
928 char *colon;
929 int ret = -EFAULT;
930 int tryaddrmatch = 0;
933 * Fetch the caller's info block into kernel space
936 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
937 goto out;
938 ifr.ifr_name[IFNAMSIZ - 1] = 0;
940 /* save original address for comparison */
941 memcpy(&sin_orig, sin, sizeof(*sin));
943 colon = strchr(ifr.ifr_name, ':');
944 if (colon)
945 *colon = 0;
947 dev_load(net, ifr.ifr_name);
949 switch (cmd) {
950 case SIOCGIFADDR: /* Get interface address */
951 case SIOCGIFBRDADDR: /* Get the broadcast address */
952 case SIOCGIFDSTADDR: /* Get the destination address */
953 case SIOCGIFNETMASK: /* Get the netmask for the interface */
954 /* Note that these ioctls will not sleep,
955 so that we do not impose a lock.
956 One day we will be forced to put shlock here (I mean SMP)
958 tryaddrmatch = (sin_orig.sin_family == AF_INET);
959 memset(sin, 0, sizeof(*sin));
960 sin->sin_family = AF_INET;
961 break;
963 case SIOCSIFFLAGS:
964 ret = -EPERM;
965 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
966 goto out;
967 break;
968 case SIOCSIFADDR: /* Set interface address (and family) */
969 case SIOCSIFBRDADDR: /* Set the broadcast address */
970 case SIOCSIFDSTADDR: /* Set the destination address */
971 case SIOCSIFNETMASK: /* Set the netmask for the interface */
972 ret = -EPERM;
973 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
974 goto out;
975 ret = -EINVAL;
976 if (sin->sin_family != AF_INET)
977 goto out;
978 break;
979 default:
980 ret = -EINVAL;
981 goto out;
984 rtnl_lock();
986 ret = -ENODEV;
987 dev = __dev_get_by_name(net, ifr.ifr_name);
988 if (!dev)
989 goto done;
991 if (colon)
992 *colon = ':';
994 in_dev = __in_dev_get_rtnl(dev);
995 if (in_dev) {
996 if (tryaddrmatch) {
997 /* Matthias Andree */
998 /* compare label and address (4.4BSD style) */
999 /* note: we only do this for a limited set of ioctls
1000 and only if the original address family was AF_INET.
1001 This is checked above. */
1002 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1003 ifap = &ifa->ifa_next) {
1004 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1005 sin_orig.sin_addr.s_addr ==
1006 ifa->ifa_local) {
1007 break; /* found */
1011 /* we didn't get a match, maybe the application is
1012 4.3BSD-style and passed in junk so we fall back to
1013 comparing just the label */
1014 if (!ifa) {
1015 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1016 ifap = &ifa->ifa_next)
1017 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1018 break;
1022 ret = -EADDRNOTAVAIL;
1023 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1024 goto done;
1026 switch (cmd) {
1027 case SIOCGIFADDR: /* Get interface address */
1028 sin->sin_addr.s_addr = ifa->ifa_local;
1029 goto rarok;
1031 case SIOCGIFBRDADDR: /* Get the broadcast address */
1032 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1033 goto rarok;
1035 case SIOCGIFDSTADDR: /* Get the destination address */
1036 sin->sin_addr.s_addr = ifa->ifa_address;
1037 goto rarok;
1039 case SIOCGIFNETMASK: /* Get the netmask for the interface */
1040 sin->sin_addr.s_addr = ifa->ifa_mask;
1041 goto rarok;
1043 case SIOCSIFFLAGS:
1044 if (colon) {
1045 ret = -EADDRNOTAVAIL;
1046 if (!ifa)
1047 break;
1048 ret = 0;
1049 if (!(ifr.ifr_flags & IFF_UP))
1050 inet_del_ifa(in_dev, ifap, 1);
1051 break;
1053 ret = dev_change_flags(dev, ifr.ifr_flags);
1054 break;
1056 case SIOCSIFADDR: /* Set interface address (and family) */
1057 ret = -EINVAL;
1058 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1059 break;
1061 if (!ifa) {
1062 ret = -ENOBUFS;
1063 ifa = inet_alloc_ifa();
1064 if (!ifa)
1065 break;
1066 INIT_HLIST_NODE(&ifa->hash);
1067 if (colon)
1068 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1069 else
1070 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1071 } else {
1072 ret = 0;
1073 if (ifa->ifa_local == sin->sin_addr.s_addr)
1074 break;
1075 inet_del_ifa(in_dev, ifap, 0);
1076 ifa->ifa_broadcast = 0;
1077 ifa->ifa_scope = 0;
1080 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1082 if (!(dev->flags & IFF_POINTOPOINT)) {
1083 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1084 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1085 if ((dev->flags & IFF_BROADCAST) &&
1086 ifa->ifa_prefixlen < 31)
1087 ifa->ifa_broadcast = ifa->ifa_address |
1088 ~ifa->ifa_mask;
1089 } else {
1090 ifa->ifa_prefixlen = 32;
1091 ifa->ifa_mask = inet_make_mask(32);
1093 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1094 ret = inet_set_ifa(dev, ifa);
1095 break;
1097 case SIOCSIFBRDADDR: /* Set the broadcast address */
1098 ret = 0;
1099 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1100 inet_del_ifa(in_dev, ifap, 0);
1101 ifa->ifa_broadcast = sin->sin_addr.s_addr;
1102 inet_insert_ifa(ifa);
1104 break;
1106 case SIOCSIFDSTADDR: /* Set the destination address */
1107 ret = 0;
1108 if (ifa->ifa_address == sin->sin_addr.s_addr)
1109 break;
1110 ret = -EINVAL;
1111 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1112 break;
1113 ret = 0;
1114 inet_del_ifa(in_dev, ifap, 0);
1115 ifa->ifa_address = sin->sin_addr.s_addr;
1116 inet_insert_ifa(ifa);
1117 break;
1119 case SIOCSIFNETMASK: /* Set the netmask for the interface */
1122 * The mask we set must be legal.
1124 ret = -EINVAL;
1125 if (bad_mask(sin->sin_addr.s_addr, 0))
1126 break;
1127 ret = 0;
1128 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1129 __be32 old_mask = ifa->ifa_mask;
1130 inet_del_ifa(in_dev, ifap, 0);
1131 ifa->ifa_mask = sin->sin_addr.s_addr;
1132 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1134 /* See if current broadcast address matches
1135 * with current netmask, then recalculate
1136 * the broadcast address. Otherwise it's a
1137 * funny address, so don't touch it since
1138 * the user seems to know what (s)he's doing...
1140 if ((dev->flags & IFF_BROADCAST) &&
1141 (ifa->ifa_prefixlen < 31) &&
1142 (ifa->ifa_broadcast ==
1143 (ifa->ifa_local|~old_mask))) {
1144 ifa->ifa_broadcast = (ifa->ifa_local |
1145 ~sin->sin_addr.s_addr);
1147 inet_insert_ifa(ifa);
1149 break;
1151 done:
1152 rtnl_unlock();
1153 out:
1154 return ret;
1155 rarok:
1156 rtnl_unlock();
1157 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1158 goto out;
1161 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1163 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1164 struct in_ifaddr *ifa;
1165 struct ifreq ifr;
1166 int done = 0;
1168 if (!in_dev)
1169 goto out;
1171 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1172 if (!buf) {
1173 done += sizeof(ifr);
1174 continue;
1176 if (len < (int) sizeof(ifr))
1177 break;
1178 memset(&ifr, 0, sizeof(struct ifreq));
1179 strcpy(ifr.ifr_name, ifa->ifa_label);
1181 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1182 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1183 ifa->ifa_local;
1185 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1186 done = -EFAULT;
1187 break;
1189 buf += sizeof(struct ifreq);
1190 len -= sizeof(struct ifreq);
1191 done += sizeof(struct ifreq);
1193 out:
1194 return done;
1197 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1199 __be32 addr = 0;
1200 struct in_device *in_dev;
1201 struct net *net = dev_net(dev);
1203 rcu_read_lock();
1204 in_dev = __in_dev_get_rcu(dev);
1205 if (!in_dev)
1206 goto no_in_dev;
1208 for_primary_ifa(in_dev) {
1209 if (ifa->ifa_scope > scope)
1210 continue;
1211 if (!dst || inet_ifa_match(dst, ifa)) {
1212 addr = ifa->ifa_local;
1213 break;
1215 if (!addr)
1216 addr = ifa->ifa_local;
1217 } endfor_ifa(in_dev);
1219 if (addr)
1220 goto out_unlock;
1221 no_in_dev:
1223 /* Not loopback addresses on loopback should be preferred
1224 in this case. It is important that lo is the first interface
1225 in dev_base list.
1227 for_each_netdev_rcu(net, dev) {
1228 in_dev = __in_dev_get_rcu(dev);
1229 if (!in_dev)
1230 continue;
1232 for_primary_ifa(in_dev) {
1233 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1234 ifa->ifa_scope <= scope) {
1235 addr = ifa->ifa_local;
1236 goto out_unlock;
1238 } endfor_ifa(in_dev);
1240 out_unlock:
1241 rcu_read_unlock();
1242 return addr;
1244 EXPORT_SYMBOL(inet_select_addr);
1246 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1247 __be32 local, int scope)
1249 int same = 0;
1250 __be32 addr = 0;
1252 for_ifa(in_dev) {
1253 if (!addr &&
1254 (local == ifa->ifa_local || !local) &&
1255 ifa->ifa_scope <= scope) {
1256 addr = ifa->ifa_local;
1257 if (same)
1258 break;
1260 if (!same) {
1261 same = (!local || inet_ifa_match(local, ifa)) &&
1262 (!dst || inet_ifa_match(dst, ifa));
1263 if (same && addr) {
1264 if (local || !dst)
1265 break;
1266 /* Is the selected addr into dst subnet? */
1267 if (inet_ifa_match(addr, ifa))
1268 break;
1269 /* No, then can we use new local src? */
1270 if (ifa->ifa_scope <= scope) {
1271 addr = ifa->ifa_local;
1272 break;
1274 /* search for large dst subnet for addr */
1275 same = 0;
1278 } endfor_ifa(in_dev);
1280 return same ? addr : 0;
1284 * Confirm that local IP address exists using wildcards:
1285 * - net: netns to check, cannot be NULL
1286 * - in_dev: only on this interface, NULL=any interface
1287 * - dst: only in the same subnet as dst, 0=any dst
1288 * - local: address, 0=autoselect the local address
1289 * - scope: maximum allowed scope value for the local address
1291 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1292 __be32 dst, __be32 local, int scope)
1294 __be32 addr = 0;
1295 struct net_device *dev;
1297 if (in_dev)
1298 return confirm_addr_indev(in_dev, dst, local, scope);
1300 rcu_read_lock();
1301 for_each_netdev_rcu(net, dev) {
1302 in_dev = __in_dev_get_rcu(dev);
1303 if (in_dev) {
1304 addr = confirm_addr_indev(in_dev, dst, local, scope);
1305 if (addr)
1306 break;
1309 rcu_read_unlock();
1311 return addr;
1313 EXPORT_SYMBOL(inet_confirm_addr);
1316 * Device notifier
1319 int register_inetaddr_notifier(struct notifier_block *nb)
1321 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1323 EXPORT_SYMBOL(register_inetaddr_notifier);
1325 int unregister_inetaddr_notifier(struct notifier_block *nb)
1327 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1329 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1331 /* Rename ifa_labels for a device name change. Make some effort to preserve
1332 * existing alias numbering and to create unique labels if possible.
1334 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1336 struct in_ifaddr *ifa;
1337 int named = 0;
1339 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1340 char old[IFNAMSIZ], *dot;
1342 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1343 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1344 if (named++ == 0)
1345 goto skip;
1346 dot = strchr(old, ':');
1347 if (!dot) {
1348 sprintf(old, ":%d", named);
1349 dot = old;
1351 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1352 strcat(ifa->ifa_label, dot);
1353 else
1354 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1355 skip:
1356 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1360 static bool inetdev_valid_mtu(unsigned int mtu)
1362 return mtu >= 68;
1365 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1366 struct in_device *in_dev)
1369 struct in_ifaddr *ifa;
1371 for (ifa = in_dev->ifa_list; ifa;
1372 ifa = ifa->ifa_next) {
1373 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1374 ifa->ifa_local, dev,
1375 ifa->ifa_local, NULL,
1376 dev->dev_addr, NULL);
1380 /* Called only under RTNL semaphore */
1382 static int inetdev_event(struct notifier_block *this, unsigned long event,
1383 void *ptr)
1385 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1386 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1388 ASSERT_RTNL();
1390 if (!in_dev) {
1391 if (event == NETDEV_REGISTER) {
1392 in_dev = inetdev_init(dev);
1393 if (IS_ERR(in_dev))
1394 return notifier_from_errno(PTR_ERR(in_dev));
1395 if (dev->flags & IFF_LOOPBACK) {
1396 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1397 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1399 } else if (event == NETDEV_CHANGEMTU) {
1400 /* Re-enabling IP */
1401 if (inetdev_valid_mtu(dev->mtu))
1402 in_dev = inetdev_init(dev);
1404 goto out;
1407 switch (event) {
1408 case NETDEV_REGISTER:
1409 pr_debug("%s: bug\n", __func__);
1410 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1411 break;
1412 case NETDEV_UP:
1413 if (!inetdev_valid_mtu(dev->mtu))
1414 break;
1415 if (dev->flags & IFF_LOOPBACK) {
1416 struct in_ifaddr *ifa = inet_alloc_ifa();
1418 if (ifa) {
1419 INIT_HLIST_NODE(&ifa->hash);
1420 ifa->ifa_local =
1421 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1422 ifa->ifa_prefixlen = 8;
1423 ifa->ifa_mask = inet_make_mask(8);
1424 in_dev_hold(in_dev);
1425 ifa->ifa_dev = in_dev;
1426 ifa->ifa_scope = RT_SCOPE_HOST;
1427 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1428 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1429 INFINITY_LIFE_TIME);
1430 ipv4_devconf_setall(in_dev);
1431 neigh_parms_data_state_setall(in_dev->arp_parms);
1432 inet_insert_ifa(ifa);
1435 ip_mc_up(in_dev);
1436 /* fall through */
1437 case NETDEV_CHANGEADDR:
1438 if (!IN_DEV_ARP_NOTIFY(in_dev))
1439 break;
1440 /* fall through */
1441 case NETDEV_NOTIFY_PEERS:
1442 /* Send gratuitous ARP to notify of link change */
1443 inetdev_send_gratuitous_arp(dev, in_dev);
1444 break;
1445 case NETDEV_DOWN:
1446 ip_mc_down(in_dev);
1447 break;
1448 case NETDEV_PRE_TYPE_CHANGE:
1449 ip_mc_unmap(in_dev);
1450 break;
1451 case NETDEV_POST_TYPE_CHANGE:
1452 ip_mc_remap(in_dev);
1453 break;
1454 case NETDEV_CHANGEMTU:
1455 if (inetdev_valid_mtu(dev->mtu))
1456 break;
1457 /* disable IP when MTU is not enough */
1458 case NETDEV_UNREGISTER:
1459 inetdev_destroy(in_dev);
1460 break;
1461 case NETDEV_CHANGENAME:
1462 /* Do not notify about label change, this event is
1463 * not interesting to applications using netlink.
1465 inetdev_changename(dev, in_dev);
1467 devinet_sysctl_unregister(in_dev);
1468 devinet_sysctl_register(in_dev);
1469 break;
1471 out:
1472 return NOTIFY_DONE;
1475 static struct notifier_block ip_netdev_notifier = {
1476 .notifier_call = inetdev_event,
1479 static size_t inet_nlmsg_size(void)
1481 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1482 + nla_total_size(4) /* IFA_ADDRESS */
1483 + nla_total_size(4) /* IFA_LOCAL */
1484 + nla_total_size(4) /* IFA_BROADCAST */
1485 + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1486 + nla_total_size(4) /* IFA_FLAGS */
1487 + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1490 static inline u32 cstamp_delta(unsigned long cstamp)
1492 return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1495 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1496 unsigned long tstamp, u32 preferred, u32 valid)
1498 struct ifa_cacheinfo ci;
1500 ci.cstamp = cstamp_delta(cstamp);
1501 ci.tstamp = cstamp_delta(tstamp);
1502 ci.ifa_prefered = preferred;
1503 ci.ifa_valid = valid;
1505 return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1508 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1509 u32 portid, u32 seq, int event, unsigned int flags)
1511 struct ifaddrmsg *ifm;
1512 struct nlmsghdr *nlh;
1513 u32 preferred, valid;
1515 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1516 if (!nlh)
1517 return -EMSGSIZE;
1519 ifm = nlmsg_data(nlh);
1520 ifm->ifa_family = AF_INET;
1521 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1522 ifm->ifa_flags = ifa->ifa_flags;
1523 ifm->ifa_scope = ifa->ifa_scope;
1524 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1526 if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1527 preferred = ifa->ifa_preferred_lft;
1528 valid = ifa->ifa_valid_lft;
1529 if (preferred != INFINITY_LIFE_TIME) {
1530 long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1532 if (preferred > tval)
1533 preferred -= tval;
1534 else
1535 preferred = 0;
1536 if (valid != INFINITY_LIFE_TIME) {
1537 if (valid > tval)
1538 valid -= tval;
1539 else
1540 valid = 0;
1543 } else {
1544 preferred = INFINITY_LIFE_TIME;
1545 valid = INFINITY_LIFE_TIME;
1547 if ((ifa->ifa_address &&
1548 nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1549 (ifa->ifa_local &&
1550 nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1551 (ifa->ifa_broadcast &&
1552 nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1553 (ifa->ifa_label[0] &&
1554 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1555 nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1556 put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1557 preferred, valid))
1558 goto nla_put_failure;
1560 nlmsg_end(skb, nlh);
1561 return 0;
1563 nla_put_failure:
1564 nlmsg_cancel(skb, nlh);
1565 return -EMSGSIZE;
1568 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1570 struct net *net = sock_net(skb->sk);
1571 int h, s_h;
1572 int idx, s_idx;
1573 int ip_idx, s_ip_idx;
1574 struct net_device *dev;
1575 struct in_device *in_dev;
1576 struct in_ifaddr *ifa;
1577 struct hlist_head *head;
1579 s_h = cb->args[0];
1580 s_idx = idx = cb->args[1];
1581 s_ip_idx = ip_idx = cb->args[2];
1583 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1584 idx = 0;
1585 head = &net->dev_index_head[h];
1586 rcu_read_lock();
1587 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1588 net->dev_base_seq;
1589 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1590 if (idx < s_idx)
1591 goto cont;
1592 if (h > s_h || idx > s_idx)
1593 s_ip_idx = 0;
1594 in_dev = __in_dev_get_rcu(dev);
1595 if (!in_dev)
1596 goto cont;
1598 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1599 ifa = ifa->ifa_next, ip_idx++) {
1600 if (ip_idx < s_ip_idx)
1601 continue;
1602 if (inet_fill_ifaddr(skb, ifa,
1603 NETLINK_CB(cb->skb).portid,
1604 cb->nlh->nlmsg_seq,
1605 RTM_NEWADDR, NLM_F_MULTI) < 0) {
1606 rcu_read_unlock();
1607 goto done;
1609 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1611 cont:
1612 idx++;
1614 rcu_read_unlock();
1617 done:
1618 cb->args[0] = h;
1619 cb->args[1] = idx;
1620 cb->args[2] = ip_idx;
1622 return skb->len;
1625 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1626 u32 portid)
1628 struct sk_buff *skb;
1629 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1630 int err = -ENOBUFS;
1631 struct net *net;
1633 net = dev_net(ifa->ifa_dev->dev);
1634 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1635 if (!skb)
1636 goto errout;
1638 err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1639 if (err < 0) {
1640 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1641 WARN_ON(err == -EMSGSIZE);
1642 kfree_skb(skb);
1643 goto errout;
1645 rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1646 return;
1647 errout:
1648 if (err < 0)
1649 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1652 static size_t inet_get_link_af_size(const struct net_device *dev)
1654 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1656 if (!in_dev)
1657 return 0;
1659 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1662 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1664 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1665 struct nlattr *nla;
1666 int i;
1668 if (!in_dev)
1669 return -ENODATA;
1671 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1672 if (!nla)
1673 return -EMSGSIZE;
1675 for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1676 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1678 return 0;
1681 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1682 [IFLA_INET_CONF] = { .type = NLA_NESTED },
1685 static int inet_validate_link_af(const struct net_device *dev,
1686 const struct nlattr *nla)
1688 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1689 int err, rem;
1691 if (dev && !__in_dev_get_rtnl(dev))
1692 return -EAFNOSUPPORT;
1694 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1695 if (err < 0)
1696 return err;
1698 if (tb[IFLA_INET_CONF]) {
1699 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1700 int cfgid = nla_type(a);
1702 if (nla_len(a) < 4)
1703 return -EINVAL;
1705 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1706 return -EINVAL;
1710 return 0;
1713 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1715 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1716 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1717 int rem;
1719 if (!in_dev)
1720 return -EAFNOSUPPORT;
1722 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1723 BUG();
1725 if (tb[IFLA_INET_CONF]) {
1726 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1727 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1730 return 0;
1733 static int inet_netconf_msgsize_devconf(int type)
1735 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1736 + nla_total_size(4); /* NETCONFA_IFINDEX */
1738 /* type -1 is used for ALL */
1739 if (type == -1 || type == NETCONFA_FORWARDING)
1740 size += nla_total_size(4);
1741 if (type == -1 || type == NETCONFA_RP_FILTER)
1742 size += nla_total_size(4);
1743 if (type == -1 || type == NETCONFA_MC_FORWARDING)
1744 size += nla_total_size(4);
1745 if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1746 size += nla_total_size(4);
1748 return size;
1751 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1752 struct ipv4_devconf *devconf, u32 portid,
1753 u32 seq, int event, unsigned int flags,
1754 int type)
1756 struct nlmsghdr *nlh;
1757 struct netconfmsg *ncm;
1759 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1760 flags);
1761 if (!nlh)
1762 return -EMSGSIZE;
1764 ncm = nlmsg_data(nlh);
1765 ncm->ncm_family = AF_INET;
1767 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1768 goto nla_put_failure;
1770 /* type -1 is used for ALL */
1771 if ((type == -1 || type == NETCONFA_FORWARDING) &&
1772 nla_put_s32(skb, NETCONFA_FORWARDING,
1773 IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1774 goto nla_put_failure;
1775 if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1776 nla_put_s32(skb, NETCONFA_RP_FILTER,
1777 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1778 goto nla_put_failure;
1779 if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1780 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1781 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1782 goto nla_put_failure;
1783 if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1784 nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1785 IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1786 goto nla_put_failure;
1788 nlmsg_end(skb, nlh);
1789 return 0;
1791 nla_put_failure:
1792 nlmsg_cancel(skb, nlh);
1793 return -EMSGSIZE;
1796 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1797 struct ipv4_devconf *devconf)
1799 struct sk_buff *skb;
1800 int err = -ENOBUFS;
1802 skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1803 if (!skb)
1804 goto errout;
1806 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1807 RTM_NEWNETCONF, 0, type);
1808 if (err < 0) {
1809 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1810 WARN_ON(err == -EMSGSIZE);
1811 kfree_skb(skb);
1812 goto errout;
1814 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1815 return;
1816 errout:
1817 if (err < 0)
1818 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1821 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1822 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
1823 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
1824 [NETCONFA_RP_FILTER] = { .len = sizeof(int) },
1825 [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) },
1828 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1829 struct nlmsghdr *nlh)
1831 struct net *net = sock_net(in_skb->sk);
1832 struct nlattr *tb[NETCONFA_MAX+1];
1833 struct netconfmsg *ncm;
1834 struct sk_buff *skb;
1835 struct ipv4_devconf *devconf;
1836 struct in_device *in_dev;
1837 struct net_device *dev;
1838 int ifindex;
1839 int err;
1841 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1842 devconf_ipv4_policy);
1843 if (err < 0)
1844 goto errout;
1846 err = -EINVAL;
1847 if (!tb[NETCONFA_IFINDEX])
1848 goto errout;
1850 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1851 switch (ifindex) {
1852 case NETCONFA_IFINDEX_ALL:
1853 devconf = net->ipv4.devconf_all;
1854 break;
1855 case NETCONFA_IFINDEX_DEFAULT:
1856 devconf = net->ipv4.devconf_dflt;
1857 break;
1858 default:
1859 dev = __dev_get_by_index(net, ifindex);
1860 if (!dev)
1861 goto errout;
1862 in_dev = __in_dev_get_rtnl(dev);
1863 if (!in_dev)
1864 goto errout;
1865 devconf = &in_dev->cnf;
1866 break;
1869 err = -ENOBUFS;
1870 skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1871 if (!skb)
1872 goto errout;
1874 err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1875 NETLINK_CB(in_skb).portid,
1876 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1877 -1);
1878 if (err < 0) {
1879 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1880 WARN_ON(err == -EMSGSIZE);
1881 kfree_skb(skb);
1882 goto errout;
1884 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1885 errout:
1886 return err;
1889 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1890 struct netlink_callback *cb)
1892 struct net *net = sock_net(skb->sk);
1893 int h, s_h;
1894 int idx, s_idx;
1895 struct net_device *dev;
1896 struct in_device *in_dev;
1897 struct hlist_head *head;
1899 s_h = cb->args[0];
1900 s_idx = idx = cb->args[1];
1902 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1903 idx = 0;
1904 head = &net->dev_index_head[h];
1905 rcu_read_lock();
1906 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1907 net->dev_base_seq;
1908 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1909 if (idx < s_idx)
1910 goto cont;
1911 in_dev = __in_dev_get_rcu(dev);
1912 if (!in_dev)
1913 goto cont;
1915 if (inet_netconf_fill_devconf(skb, dev->ifindex,
1916 &in_dev->cnf,
1917 NETLINK_CB(cb->skb).portid,
1918 cb->nlh->nlmsg_seq,
1919 RTM_NEWNETCONF,
1920 NLM_F_MULTI,
1921 -1) < 0) {
1922 rcu_read_unlock();
1923 goto done;
1925 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1926 cont:
1927 idx++;
1929 rcu_read_unlock();
1931 if (h == NETDEV_HASHENTRIES) {
1932 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1933 net->ipv4.devconf_all,
1934 NETLINK_CB(cb->skb).portid,
1935 cb->nlh->nlmsg_seq,
1936 RTM_NEWNETCONF, NLM_F_MULTI,
1937 -1) < 0)
1938 goto done;
1939 else
1940 h++;
1942 if (h == NETDEV_HASHENTRIES + 1) {
1943 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1944 net->ipv4.devconf_dflt,
1945 NETLINK_CB(cb->skb).portid,
1946 cb->nlh->nlmsg_seq,
1947 RTM_NEWNETCONF, NLM_F_MULTI,
1948 -1) < 0)
1949 goto done;
1950 else
1951 h++;
1953 done:
1954 cb->args[0] = h;
1955 cb->args[1] = idx;
1957 return skb->len;
1960 #ifdef CONFIG_SYSCTL
1962 static void devinet_copy_dflt_conf(struct net *net, int i)
1964 struct net_device *dev;
1966 rcu_read_lock();
1967 for_each_netdev_rcu(net, dev) {
1968 struct in_device *in_dev;
1970 in_dev = __in_dev_get_rcu(dev);
1971 if (in_dev && !test_bit(i, in_dev->cnf.state))
1972 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1974 rcu_read_unlock();
1977 /* called with RTNL locked */
1978 static void inet_forward_change(struct net *net)
1980 struct net_device *dev;
1981 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1983 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1984 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1985 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1986 NETCONFA_IFINDEX_ALL,
1987 net->ipv4.devconf_all);
1988 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1989 NETCONFA_IFINDEX_DEFAULT,
1990 net->ipv4.devconf_dflt);
1992 for_each_netdev(net, dev) {
1993 struct in_device *in_dev;
1994 if (on)
1995 dev_disable_lro(dev);
1996 rcu_read_lock();
1997 in_dev = __in_dev_get_rcu(dev);
1998 if (in_dev) {
1999 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2000 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2001 dev->ifindex, &in_dev->cnf);
2003 rcu_read_unlock();
2007 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2009 if (cnf == net->ipv4.devconf_dflt)
2010 return NETCONFA_IFINDEX_DEFAULT;
2011 else if (cnf == net->ipv4.devconf_all)
2012 return NETCONFA_IFINDEX_ALL;
2013 else {
2014 struct in_device *idev
2015 = container_of(cnf, struct in_device, cnf);
2016 return idev->dev->ifindex;
2020 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2021 void __user *buffer,
2022 size_t *lenp, loff_t *ppos)
2024 int old_value = *(int *)ctl->data;
2025 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2026 int new_value = *(int *)ctl->data;
2028 if (write) {
2029 struct ipv4_devconf *cnf = ctl->extra1;
2030 struct net *net = ctl->extra2;
2031 int i = (int *)ctl->data - cnf->data;
2032 int ifindex;
2034 set_bit(i, cnf->state);
2036 if (cnf == net->ipv4.devconf_dflt)
2037 devinet_copy_dflt_conf(net, i);
2038 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2039 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2040 if ((new_value == 0) && (old_value != 0))
2041 rt_cache_flush(net);
2043 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2044 new_value != old_value) {
2045 ifindex = devinet_conf_ifindex(net, cnf);
2046 inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2047 ifindex, cnf);
2049 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2050 new_value != old_value) {
2051 ifindex = devinet_conf_ifindex(net, cnf);
2052 inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2053 ifindex, cnf);
2057 return ret;
2060 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2061 void __user *buffer,
2062 size_t *lenp, loff_t *ppos)
2064 int *valp = ctl->data;
2065 int val = *valp;
2066 loff_t pos = *ppos;
2067 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2069 if (write && *valp != val) {
2070 struct net *net = ctl->extra2;
2072 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2073 if (!rtnl_trylock()) {
2074 /* Restore the original values before restarting */
2075 *valp = val;
2076 *ppos = pos;
2077 return restart_syscall();
2079 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2080 inet_forward_change(net);
2081 } else {
2082 struct ipv4_devconf *cnf = ctl->extra1;
2083 struct in_device *idev =
2084 container_of(cnf, struct in_device, cnf);
2085 if (*valp)
2086 dev_disable_lro(idev->dev);
2087 inet_netconf_notify_devconf(net,
2088 NETCONFA_FORWARDING,
2089 idev->dev->ifindex,
2090 cnf);
2092 rtnl_unlock();
2093 rt_cache_flush(net);
2094 } else
2095 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2096 NETCONFA_IFINDEX_DEFAULT,
2097 net->ipv4.devconf_dflt);
2100 return ret;
2103 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2104 void __user *buffer,
2105 size_t *lenp, loff_t *ppos)
2107 int *valp = ctl->data;
2108 int val = *valp;
2109 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2110 struct net *net = ctl->extra2;
2112 if (write && *valp != val)
2113 rt_cache_flush(net);
2115 return ret;
2118 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2120 .procname = name, \
2121 .data = ipv4_devconf.data + \
2122 IPV4_DEVCONF_ ## attr - 1, \
2123 .maxlen = sizeof(int), \
2124 .mode = mval, \
2125 .proc_handler = proc, \
2126 .extra1 = &ipv4_devconf, \
2129 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2130 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2132 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2133 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2135 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2136 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2138 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2139 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2141 static struct devinet_sysctl_table {
2142 struct ctl_table_header *sysctl_header;
2143 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2144 } devinet_sysctl = {
2145 .devinet_vars = {
2146 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2147 devinet_sysctl_forward),
2148 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2150 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2151 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2152 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2153 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2154 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2155 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2156 "accept_source_route"),
2157 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2158 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2159 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2160 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2161 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2162 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2163 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2164 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2165 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2166 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2167 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2168 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2169 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2170 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2171 "force_igmp_version"),
2172 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2173 "igmpv2_unsolicited_report_interval"),
2174 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2175 "igmpv3_unsolicited_report_interval"),
2177 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2178 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2179 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2180 "promote_secondaries"),
2181 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2182 "route_localnet"),
2186 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2187 struct ipv4_devconf *p)
2189 int i;
2190 struct devinet_sysctl_table *t;
2191 char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2193 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2194 if (!t)
2195 goto out;
2197 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2198 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2199 t->devinet_vars[i].extra1 = p;
2200 t->devinet_vars[i].extra2 = net;
2203 snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2205 t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2206 if (!t->sysctl_header)
2207 goto free;
2209 p->sysctl = t;
2210 return 0;
2212 free:
2213 kfree(t);
2214 out:
2215 return -ENOBUFS;
2218 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2220 struct devinet_sysctl_table *t = cnf->sysctl;
2222 if (!t)
2223 return;
2225 cnf->sysctl = NULL;
2226 unregister_net_sysctl_table(t->sysctl_header);
2227 kfree(t);
2230 static int devinet_sysctl_register(struct in_device *idev)
2232 int err;
2234 if (!sysctl_dev_name_is_allowed(idev->dev->name))
2235 return -EINVAL;
2237 err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2238 if (err)
2239 return err;
2240 err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2241 &idev->cnf);
2242 if (err)
2243 neigh_sysctl_unregister(idev->arp_parms);
2244 return err;
2247 static void devinet_sysctl_unregister(struct in_device *idev)
2249 __devinet_sysctl_unregister(&idev->cnf);
2250 neigh_sysctl_unregister(idev->arp_parms);
2253 static struct ctl_table ctl_forward_entry[] = {
2255 .procname = "ip_forward",
2256 .data = &ipv4_devconf.data[
2257 IPV4_DEVCONF_FORWARDING - 1],
2258 .maxlen = sizeof(int),
2259 .mode = 0644,
2260 .proc_handler = devinet_sysctl_forward,
2261 .extra1 = &ipv4_devconf,
2262 .extra2 = &init_net,
2264 { },
2266 #endif
2268 static __net_init int devinet_init_net(struct net *net)
2270 int err;
2271 struct ipv4_devconf *all, *dflt;
2272 #ifdef CONFIG_SYSCTL
2273 struct ctl_table *tbl = ctl_forward_entry;
2274 struct ctl_table_header *forw_hdr;
2275 #endif
2277 err = -ENOMEM;
2278 all = &ipv4_devconf;
2279 dflt = &ipv4_devconf_dflt;
2281 if (!net_eq(net, &init_net)) {
2282 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2283 if (!all)
2284 goto err_alloc_all;
2286 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2287 if (!dflt)
2288 goto err_alloc_dflt;
2290 #ifdef CONFIG_SYSCTL
2291 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2292 if (!tbl)
2293 goto err_alloc_ctl;
2295 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2296 tbl[0].extra1 = all;
2297 tbl[0].extra2 = net;
2298 #endif
2301 #ifdef CONFIG_SYSCTL
2302 err = __devinet_sysctl_register(net, "all", all);
2303 if (err < 0)
2304 goto err_reg_all;
2306 err = __devinet_sysctl_register(net, "default", dflt);
2307 if (err < 0)
2308 goto err_reg_dflt;
2310 err = -ENOMEM;
2311 forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2312 if (!forw_hdr)
2313 goto err_reg_ctl;
2314 net->ipv4.forw_hdr = forw_hdr;
2315 #endif
2317 net->ipv4.devconf_all = all;
2318 net->ipv4.devconf_dflt = dflt;
2319 return 0;
2321 #ifdef CONFIG_SYSCTL
2322 err_reg_ctl:
2323 __devinet_sysctl_unregister(dflt);
2324 err_reg_dflt:
2325 __devinet_sysctl_unregister(all);
2326 err_reg_all:
2327 if (tbl != ctl_forward_entry)
2328 kfree(tbl);
2329 err_alloc_ctl:
2330 #endif
2331 if (dflt != &ipv4_devconf_dflt)
2332 kfree(dflt);
2333 err_alloc_dflt:
2334 if (all != &ipv4_devconf)
2335 kfree(all);
2336 err_alloc_all:
2337 return err;
2340 static __net_exit void devinet_exit_net(struct net *net)
2342 #ifdef CONFIG_SYSCTL
2343 struct ctl_table *tbl;
2345 tbl = net->ipv4.forw_hdr->ctl_table_arg;
2346 unregister_net_sysctl_table(net->ipv4.forw_hdr);
2347 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2348 __devinet_sysctl_unregister(net->ipv4.devconf_all);
2349 kfree(tbl);
2350 #endif
2351 kfree(net->ipv4.devconf_dflt);
2352 kfree(net->ipv4.devconf_all);
2355 static __net_initdata struct pernet_operations devinet_ops = {
2356 .init = devinet_init_net,
2357 .exit = devinet_exit_net,
2360 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2361 .family = AF_INET,
2362 .fill_link_af = inet_fill_link_af,
2363 .get_link_af_size = inet_get_link_af_size,
2364 .validate_link_af = inet_validate_link_af,
2365 .set_link_af = inet_set_link_af,
2368 void __init devinet_init(void)
2370 int i;
2372 for (i = 0; i < IN4_ADDR_HSIZE; i++)
2373 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2375 register_pernet_subsys(&devinet_ops);
2377 register_gifconf(PF_INET, inet_gifconf);
2378 register_netdevice_notifier(&ip_netdev_notifier);
2380 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2382 rtnl_af_register(&inet_af_ops);
2384 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2385 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2386 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2387 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2388 inet_netconf_dump_devconf, NULL);