sched/fair: Fix comments
[linux/fpc-iii.git] / net / ipv4 / devinet.c
blobcebedd545e5e2863afcfe116309725e2cd57206c
1 /*
2 * NET3 IP device support routines.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Derived from the IP parts of dev.c 1.0.19
10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
14 * Additional Authors:
15 * Alan Cox, <gw4pts@gw4pts.ampr.org>
16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
18 * Changes:
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
20 * lists.
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
25 * if no match found.
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
69 static struct ipv4_devconf ipv4_devconf = {
70 .data = {
71 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81 .data = {
82 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96 [IFA_LOCAL] = { .type = NLA_U32 },
97 [IFA_ADDRESS] = { .type = NLA_U32 },
98 [IFA_BROADCAST] = { .type = NLA_U32 },
99 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100 [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
101 [IFA_FLAGS] = { .type = NLA_U32 },
104 #define IN4_ADDR_HSIZE_SHIFT 8
105 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
107 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109 static u32 inet_addr_hash(const struct net *net, __be32 addr)
111 u32 val = (__force u32) addr ^ net_hash_mix(net);
113 return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
116 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
118 u32 hash = inet_addr_hash(net, ifa->ifa_local);
120 ASSERT_RTNL();
121 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
124 static void inet_hash_remove(struct in_ifaddr *ifa)
126 ASSERT_RTNL();
127 hlist_del_init_rcu(&ifa->hash);
131 * __ip_dev_find - find the first device with a given source address.
132 * @net: the net namespace
133 * @addr: the source address
134 * @devref: if true, take a reference on the found device
136 * If a caller uses devref=false, it should be protected by RCU, or RTNL
138 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
140 u32 hash = inet_addr_hash(net, addr);
141 struct net_device *result = NULL;
142 struct in_ifaddr *ifa;
144 rcu_read_lock();
145 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
146 if (ifa->ifa_local == addr) {
147 struct net_device *dev = ifa->ifa_dev->dev;
149 if (!net_eq(dev_net(dev), net))
150 continue;
151 result = dev;
152 break;
155 if (!result) {
156 struct flowi4 fl4 = { .daddr = addr };
157 struct fib_result res = { 0 };
158 struct fib_table *local;
160 /* Fallback to FIB local table so that communication
161 * over loopback subnets work.
163 local = fib_get_table(net, RT_TABLE_LOCAL);
164 if (local &&
165 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
166 res.type == RTN_LOCAL)
167 result = FIB_RES_DEV(res);
169 if (result && devref)
170 dev_hold(result);
171 rcu_read_unlock();
172 return result;
174 EXPORT_SYMBOL(__ip_dev_find);
176 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
178 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
179 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
180 int destroy);
181 #ifdef CONFIG_SYSCTL
182 static int devinet_sysctl_register(struct in_device *idev);
183 static void devinet_sysctl_unregister(struct in_device *idev);
184 #else
185 static int devinet_sysctl_register(struct in_device *idev)
187 return 0;
189 static void devinet_sysctl_unregister(struct in_device *idev)
192 #endif
194 /* Locks all the inet devices. */
196 static struct in_ifaddr *inet_alloc_ifa(void)
198 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
201 static void inet_rcu_free_ifa(struct rcu_head *head)
203 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
204 if (ifa->ifa_dev)
205 in_dev_put(ifa->ifa_dev);
206 kfree(ifa);
209 static void inet_free_ifa(struct in_ifaddr *ifa)
211 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
214 void in_dev_finish_destroy(struct in_device *idev)
216 struct net_device *dev = idev->dev;
218 WARN_ON(idev->ifa_list);
219 WARN_ON(idev->mc_list);
220 kfree(rcu_dereference_protected(idev->mc_hash, 1));
221 #ifdef NET_REFCNT_DEBUG
222 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
223 #endif
224 dev_put(dev);
225 if (!idev->dead)
226 pr_err("Freeing alive in_device %p\n", idev);
227 else
228 kfree(idev);
230 EXPORT_SYMBOL(in_dev_finish_destroy);
232 static struct in_device *inetdev_init(struct net_device *dev)
234 struct in_device *in_dev;
235 int err = -ENOMEM;
237 ASSERT_RTNL();
239 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
240 if (!in_dev)
241 goto out;
242 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
243 sizeof(in_dev->cnf));
244 in_dev->cnf.sysctl = NULL;
245 in_dev->dev = dev;
246 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
247 if (!in_dev->arp_parms)
248 goto out_kfree;
249 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
250 dev_disable_lro(dev);
251 /* Reference in_dev->dev */
252 dev_hold(dev);
253 /* Account for reference dev->ip_ptr (below) */
254 in_dev_hold(in_dev);
256 err = devinet_sysctl_register(in_dev);
257 if (err) {
258 in_dev->dead = 1;
259 in_dev_put(in_dev);
260 in_dev = NULL;
261 goto out;
263 ip_mc_init_dev(in_dev);
264 if (dev->flags & IFF_UP)
265 ip_mc_up(in_dev);
267 /* we can receive as soon as ip_ptr is set -- do this last */
268 rcu_assign_pointer(dev->ip_ptr, in_dev);
269 out:
270 return in_dev ?: ERR_PTR(err);
271 out_kfree:
272 kfree(in_dev);
273 in_dev = NULL;
274 goto out;
277 static void in_dev_rcu_put(struct rcu_head *head)
279 struct in_device *idev = container_of(head, struct in_device, rcu_head);
280 in_dev_put(idev);
283 static void inetdev_destroy(struct in_device *in_dev)
285 struct in_ifaddr *ifa;
286 struct net_device *dev;
288 ASSERT_RTNL();
290 dev = in_dev->dev;
292 in_dev->dead = 1;
294 ip_mc_destroy_dev(in_dev);
296 while ((ifa = in_dev->ifa_list) != NULL) {
297 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
298 inet_free_ifa(ifa);
301 RCU_INIT_POINTER(dev->ip_ptr, NULL);
303 devinet_sysctl_unregister(in_dev);
304 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
305 arp_ifdown(dev);
307 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
310 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
312 rcu_read_lock();
313 for_primary_ifa(in_dev) {
314 if (inet_ifa_match(a, ifa)) {
315 if (!b || inet_ifa_match(b, ifa)) {
316 rcu_read_unlock();
317 return 1;
320 } endfor_ifa(in_dev);
321 rcu_read_unlock();
322 return 0;
325 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
326 int destroy, struct nlmsghdr *nlh, u32 portid)
328 struct in_ifaddr *promote = NULL;
329 struct in_ifaddr *ifa, *ifa1 = *ifap;
330 struct in_ifaddr *last_prim = in_dev->ifa_list;
331 struct in_ifaddr *prev_prom = NULL;
332 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
334 ASSERT_RTNL();
336 if (in_dev->dead)
337 goto no_promotions;
339 /* 1. Deleting primary ifaddr forces deletion all secondaries
340 * unless alias promotion is set
343 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
344 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
346 while ((ifa = *ifap1) != NULL) {
347 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
348 ifa1->ifa_scope <= ifa->ifa_scope)
349 last_prim = ifa;
351 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
352 ifa1->ifa_mask != ifa->ifa_mask ||
353 !inet_ifa_match(ifa1->ifa_address, ifa)) {
354 ifap1 = &ifa->ifa_next;
355 prev_prom = ifa;
356 continue;
359 if (!do_promote) {
360 inet_hash_remove(ifa);
361 *ifap1 = ifa->ifa_next;
363 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
364 blocking_notifier_call_chain(&inetaddr_chain,
365 NETDEV_DOWN, ifa);
366 inet_free_ifa(ifa);
367 } else {
368 promote = ifa;
369 break;
374 /* On promotion all secondaries from subnet are changing
375 * the primary IP, we must remove all their routes silently
376 * and later to add them back with new prefsrc. Do this
377 * while all addresses are on the device list.
379 for (ifa = promote; ifa; ifa = ifa->ifa_next) {
380 if (ifa1->ifa_mask == ifa->ifa_mask &&
381 inet_ifa_match(ifa1->ifa_address, ifa))
382 fib_del_ifaddr(ifa, ifa1);
385 no_promotions:
386 /* 2. Unlink it */
388 *ifap = ifa1->ifa_next;
389 inet_hash_remove(ifa1);
391 /* 3. Announce address deletion */
393 /* Send message first, then call notifier.
394 At first sight, FIB update triggered by notifier
395 will refer to already deleted ifaddr, that could confuse
396 netlink listeners. It is not true: look, gated sees
397 that route deleted and if it still thinks that ifaddr
398 is valid, it will try to restore deleted routes... Grr.
399 So that, this order is correct.
401 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
402 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
404 if (promote) {
405 struct in_ifaddr *next_sec = promote->ifa_next;
407 if (prev_prom) {
408 prev_prom->ifa_next = promote->ifa_next;
409 promote->ifa_next = last_prim->ifa_next;
410 last_prim->ifa_next = promote;
413 promote->ifa_flags &= ~IFA_F_SECONDARY;
414 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
415 blocking_notifier_call_chain(&inetaddr_chain,
416 NETDEV_UP, promote);
417 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
418 if (ifa1->ifa_mask != ifa->ifa_mask ||
419 !inet_ifa_match(ifa1->ifa_address, ifa))
420 continue;
421 fib_add_ifaddr(ifa);
425 if (destroy)
426 inet_free_ifa(ifa1);
429 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
430 int destroy)
432 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
435 static void check_lifetime(struct work_struct *work);
437 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
439 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
440 u32 portid)
442 struct in_device *in_dev = ifa->ifa_dev;
443 struct in_ifaddr *ifa1, **ifap, **last_primary;
445 ASSERT_RTNL();
447 if (!ifa->ifa_local) {
448 inet_free_ifa(ifa);
449 return 0;
452 ifa->ifa_flags &= ~IFA_F_SECONDARY;
453 last_primary = &in_dev->ifa_list;
455 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
456 ifap = &ifa1->ifa_next) {
457 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
458 ifa->ifa_scope <= ifa1->ifa_scope)
459 last_primary = &ifa1->ifa_next;
460 if (ifa1->ifa_mask == ifa->ifa_mask &&
461 inet_ifa_match(ifa1->ifa_address, ifa)) {
462 if (ifa1->ifa_local == ifa->ifa_local) {
463 inet_free_ifa(ifa);
464 return -EEXIST;
466 if (ifa1->ifa_scope != ifa->ifa_scope) {
467 inet_free_ifa(ifa);
468 return -EINVAL;
470 ifa->ifa_flags |= IFA_F_SECONDARY;
474 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
475 prandom_seed((__force u32) ifa->ifa_local);
476 ifap = last_primary;
479 ifa->ifa_next = *ifap;
480 *ifap = ifa;
482 inet_hash_insert(dev_net(in_dev->dev), ifa);
484 cancel_delayed_work(&check_lifetime_work);
485 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
487 /* Send message first, then call notifier.
488 Notifier will trigger FIB update, so that
489 listeners of netlink will know about new ifaddr */
490 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
491 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
493 return 0;
496 static int inet_insert_ifa(struct in_ifaddr *ifa)
498 return __inet_insert_ifa(ifa, NULL, 0);
501 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
503 struct in_device *in_dev = __in_dev_get_rtnl(dev);
505 ASSERT_RTNL();
507 if (!in_dev) {
508 inet_free_ifa(ifa);
509 return -ENOBUFS;
511 ipv4_devconf_setall(in_dev);
512 neigh_parms_data_state_setall(in_dev->arp_parms);
513 if (ifa->ifa_dev != in_dev) {
514 WARN_ON(ifa->ifa_dev);
515 in_dev_hold(in_dev);
516 ifa->ifa_dev = in_dev;
518 if (ipv4_is_loopback(ifa->ifa_local))
519 ifa->ifa_scope = RT_SCOPE_HOST;
520 return inet_insert_ifa(ifa);
523 /* Caller must hold RCU or RTNL :
524 * We dont take a reference on found in_device
526 struct in_device *inetdev_by_index(struct net *net, int ifindex)
528 struct net_device *dev;
529 struct in_device *in_dev = NULL;
531 rcu_read_lock();
532 dev = dev_get_by_index_rcu(net, ifindex);
533 if (dev)
534 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
535 rcu_read_unlock();
536 return in_dev;
538 EXPORT_SYMBOL(inetdev_by_index);
540 /* Called only from RTNL semaphored context. No locks. */
542 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
543 __be32 mask)
545 ASSERT_RTNL();
547 for_primary_ifa(in_dev) {
548 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
549 return ifa;
550 } endfor_ifa(in_dev);
551 return NULL;
554 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
556 struct ip_mreqn mreq = {
557 .imr_multiaddr.s_addr = ifa->ifa_address,
558 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
560 int ret;
562 ASSERT_RTNL();
564 lock_sock(sk);
565 if (join)
566 ret = ip_mc_join_group(sk, &mreq);
567 else
568 ret = ip_mc_leave_group(sk, &mreq);
569 release_sock(sk);
571 return ret;
574 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
576 struct net *net = sock_net(skb->sk);
577 struct nlattr *tb[IFA_MAX+1];
578 struct in_device *in_dev;
579 struct ifaddrmsg *ifm;
580 struct in_ifaddr *ifa, **ifap;
581 int err = -EINVAL;
583 ASSERT_RTNL();
585 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
586 if (err < 0)
587 goto errout;
589 ifm = nlmsg_data(nlh);
590 in_dev = inetdev_by_index(net, ifm->ifa_index);
591 if (!in_dev) {
592 err = -ENODEV;
593 goto errout;
596 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
597 ifap = &ifa->ifa_next) {
598 if (tb[IFA_LOCAL] &&
599 ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
600 continue;
602 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
603 continue;
605 if (tb[IFA_ADDRESS] &&
606 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
607 !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
608 continue;
610 if (ipv4_is_multicast(ifa->ifa_address))
611 ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
612 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
613 return 0;
616 err = -EADDRNOTAVAIL;
617 errout:
618 return err;
621 #define INFINITY_LIFE_TIME 0xFFFFFFFF
623 static void check_lifetime(struct work_struct *work)
625 unsigned long now, next, next_sec, next_sched;
626 struct in_ifaddr *ifa;
627 struct hlist_node *n;
628 int i;
630 now = jiffies;
631 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
633 for (i = 0; i < IN4_ADDR_HSIZE; i++) {
634 bool change_needed = false;
636 rcu_read_lock();
637 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
638 unsigned long age;
640 if (ifa->ifa_flags & IFA_F_PERMANENT)
641 continue;
643 /* We try to batch several events at once. */
644 age = (now - ifa->ifa_tstamp +
645 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
647 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
648 age >= ifa->ifa_valid_lft) {
649 change_needed = true;
650 } else if (ifa->ifa_preferred_lft ==
651 INFINITY_LIFE_TIME) {
652 continue;
653 } else if (age >= ifa->ifa_preferred_lft) {
654 if (time_before(ifa->ifa_tstamp +
655 ifa->ifa_valid_lft * HZ, next))
656 next = ifa->ifa_tstamp +
657 ifa->ifa_valid_lft * HZ;
659 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
660 change_needed = true;
661 } else if (time_before(ifa->ifa_tstamp +
662 ifa->ifa_preferred_lft * HZ,
663 next)) {
664 next = ifa->ifa_tstamp +
665 ifa->ifa_preferred_lft * HZ;
668 rcu_read_unlock();
669 if (!change_needed)
670 continue;
671 rtnl_lock();
672 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
673 unsigned long age;
675 if (ifa->ifa_flags & IFA_F_PERMANENT)
676 continue;
678 /* We try to batch several events at once. */
679 age = (now - ifa->ifa_tstamp +
680 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
682 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
683 age >= ifa->ifa_valid_lft) {
684 struct in_ifaddr **ifap;
686 for (ifap = &ifa->ifa_dev->ifa_list;
687 *ifap != NULL; ifap = &(*ifap)->ifa_next) {
688 if (*ifap == ifa) {
689 inet_del_ifa(ifa->ifa_dev,
690 ifap, 1);
691 break;
694 } else if (ifa->ifa_preferred_lft !=
695 INFINITY_LIFE_TIME &&
696 age >= ifa->ifa_preferred_lft &&
697 !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
698 ifa->ifa_flags |= IFA_F_DEPRECATED;
699 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
702 rtnl_unlock();
705 next_sec = round_jiffies_up(next);
706 next_sched = next;
708 /* If rounded timeout is accurate enough, accept it. */
709 if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
710 next_sched = next_sec;
712 now = jiffies;
713 /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
714 if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
715 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
717 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
718 next_sched - now);
721 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
722 __u32 prefered_lft)
724 unsigned long timeout;
726 ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
728 timeout = addrconf_timeout_fixup(valid_lft, HZ);
729 if (addrconf_finite_timeout(timeout))
730 ifa->ifa_valid_lft = timeout;
731 else
732 ifa->ifa_flags |= IFA_F_PERMANENT;
734 timeout = addrconf_timeout_fixup(prefered_lft, HZ);
735 if (addrconf_finite_timeout(timeout)) {
736 if (timeout == 0)
737 ifa->ifa_flags |= IFA_F_DEPRECATED;
738 ifa->ifa_preferred_lft = timeout;
740 ifa->ifa_tstamp = jiffies;
741 if (!ifa->ifa_cstamp)
742 ifa->ifa_cstamp = ifa->ifa_tstamp;
745 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
746 __u32 *pvalid_lft, __u32 *pprefered_lft)
748 struct nlattr *tb[IFA_MAX+1];
749 struct in_ifaddr *ifa;
750 struct ifaddrmsg *ifm;
751 struct net_device *dev;
752 struct in_device *in_dev;
753 int err;
755 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
756 if (err < 0)
757 goto errout;
759 ifm = nlmsg_data(nlh);
760 err = -EINVAL;
761 if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
762 goto errout;
764 dev = __dev_get_by_index(net, ifm->ifa_index);
765 err = -ENODEV;
766 if (!dev)
767 goto errout;
769 in_dev = __in_dev_get_rtnl(dev);
770 err = -ENOBUFS;
771 if (!in_dev)
772 goto errout;
774 ifa = inet_alloc_ifa();
775 if (!ifa)
777 * A potential indev allocation can be left alive, it stays
778 * assigned to its device and is destroy with it.
780 goto errout;
782 ipv4_devconf_setall(in_dev);
783 neigh_parms_data_state_setall(in_dev->arp_parms);
784 in_dev_hold(in_dev);
786 if (!tb[IFA_ADDRESS])
787 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
789 INIT_HLIST_NODE(&ifa->hash);
790 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
791 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
792 ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
793 ifm->ifa_flags;
794 ifa->ifa_scope = ifm->ifa_scope;
795 ifa->ifa_dev = in_dev;
797 ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
798 ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
800 if (tb[IFA_BROADCAST])
801 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
803 if (tb[IFA_LABEL])
804 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
805 else
806 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
808 if (tb[IFA_CACHEINFO]) {
809 struct ifa_cacheinfo *ci;
811 ci = nla_data(tb[IFA_CACHEINFO]);
812 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
813 err = -EINVAL;
814 goto errout_free;
816 *pvalid_lft = ci->ifa_valid;
817 *pprefered_lft = ci->ifa_prefered;
820 return ifa;
822 errout_free:
823 inet_free_ifa(ifa);
824 errout:
825 return ERR_PTR(err);
828 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
830 struct in_device *in_dev = ifa->ifa_dev;
831 struct in_ifaddr *ifa1, **ifap;
833 if (!ifa->ifa_local)
834 return NULL;
836 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
837 ifap = &ifa1->ifa_next) {
838 if (ifa1->ifa_mask == ifa->ifa_mask &&
839 inet_ifa_match(ifa1->ifa_address, ifa) &&
840 ifa1->ifa_local == ifa->ifa_local)
841 return ifa1;
843 return NULL;
846 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
848 struct net *net = sock_net(skb->sk);
849 struct in_ifaddr *ifa;
850 struct in_ifaddr *ifa_existing;
851 __u32 valid_lft = INFINITY_LIFE_TIME;
852 __u32 prefered_lft = INFINITY_LIFE_TIME;
854 ASSERT_RTNL();
856 ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
857 if (IS_ERR(ifa))
858 return PTR_ERR(ifa);
860 ifa_existing = find_matching_ifa(ifa);
861 if (!ifa_existing) {
862 /* It would be best to check for !NLM_F_CREATE here but
863 * userspace already relies on not having to provide this.
865 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
866 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
867 int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
868 true, ifa);
870 if (ret < 0) {
871 inet_free_ifa(ifa);
872 return ret;
875 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
876 } else {
877 inet_free_ifa(ifa);
879 if (nlh->nlmsg_flags & NLM_F_EXCL ||
880 !(nlh->nlmsg_flags & NLM_F_REPLACE))
881 return -EEXIST;
882 ifa = ifa_existing;
883 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
884 cancel_delayed_work(&check_lifetime_work);
885 queue_delayed_work(system_power_efficient_wq,
886 &check_lifetime_work, 0);
887 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
889 return 0;
893 * Determine a default network mask, based on the IP address.
896 static int inet_abc_len(__be32 addr)
898 int rc = -1; /* Something else, probably a multicast. */
900 if (ipv4_is_zeronet(addr))
901 rc = 0;
902 else {
903 __u32 haddr = ntohl(addr);
905 if (IN_CLASSA(haddr))
906 rc = 8;
907 else if (IN_CLASSB(haddr))
908 rc = 16;
909 else if (IN_CLASSC(haddr))
910 rc = 24;
913 return rc;
917 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
919 struct ifreq ifr;
920 struct sockaddr_in sin_orig;
921 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
922 struct in_device *in_dev;
923 struct in_ifaddr **ifap = NULL;
924 struct in_ifaddr *ifa = NULL;
925 struct net_device *dev;
926 char *colon;
927 int ret = -EFAULT;
928 int tryaddrmatch = 0;
931 * Fetch the caller's info block into kernel space
934 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
935 goto out;
936 ifr.ifr_name[IFNAMSIZ - 1] = 0;
938 /* save original address for comparison */
939 memcpy(&sin_orig, sin, sizeof(*sin));
941 colon = strchr(ifr.ifr_name, ':');
942 if (colon)
943 *colon = 0;
945 dev_load(net, ifr.ifr_name);
947 switch (cmd) {
948 case SIOCGIFADDR: /* Get interface address */
949 case SIOCGIFBRDADDR: /* Get the broadcast address */
950 case SIOCGIFDSTADDR: /* Get the destination address */
951 case SIOCGIFNETMASK: /* Get the netmask for the interface */
952 /* Note that these ioctls will not sleep,
953 so that we do not impose a lock.
954 One day we will be forced to put shlock here (I mean SMP)
956 tryaddrmatch = (sin_orig.sin_family == AF_INET);
957 memset(sin, 0, sizeof(*sin));
958 sin->sin_family = AF_INET;
959 break;
961 case SIOCSIFFLAGS:
962 ret = -EPERM;
963 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
964 goto out;
965 break;
966 case SIOCSIFADDR: /* Set interface address (and family) */
967 case SIOCSIFBRDADDR: /* Set the broadcast address */
968 case SIOCSIFDSTADDR: /* Set the destination address */
969 case SIOCSIFNETMASK: /* Set the netmask for the interface */
970 ret = -EPERM;
971 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
972 goto out;
973 ret = -EINVAL;
974 if (sin->sin_family != AF_INET)
975 goto out;
976 break;
977 default:
978 ret = -EINVAL;
979 goto out;
982 rtnl_lock();
984 ret = -ENODEV;
985 dev = __dev_get_by_name(net, ifr.ifr_name);
986 if (!dev)
987 goto done;
989 if (colon)
990 *colon = ':';
992 in_dev = __in_dev_get_rtnl(dev);
993 if (in_dev) {
994 if (tryaddrmatch) {
995 /* Matthias Andree */
996 /* compare label and address (4.4BSD style) */
997 /* note: we only do this for a limited set of ioctls
998 and only if the original address family was AF_INET.
999 This is checked above. */
1000 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1001 ifap = &ifa->ifa_next) {
1002 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1003 sin_orig.sin_addr.s_addr ==
1004 ifa->ifa_local) {
1005 break; /* found */
1009 /* we didn't get a match, maybe the application is
1010 4.3BSD-style and passed in junk so we fall back to
1011 comparing just the label */
1012 if (!ifa) {
1013 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1014 ifap = &ifa->ifa_next)
1015 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1016 break;
1020 ret = -EADDRNOTAVAIL;
1021 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1022 goto done;
1024 switch (cmd) {
1025 case SIOCGIFADDR: /* Get interface address */
1026 sin->sin_addr.s_addr = ifa->ifa_local;
1027 goto rarok;
1029 case SIOCGIFBRDADDR: /* Get the broadcast address */
1030 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1031 goto rarok;
1033 case SIOCGIFDSTADDR: /* Get the destination address */
1034 sin->sin_addr.s_addr = ifa->ifa_address;
1035 goto rarok;
1037 case SIOCGIFNETMASK: /* Get the netmask for the interface */
1038 sin->sin_addr.s_addr = ifa->ifa_mask;
1039 goto rarok;
1041 case SIOCSIFFLAGS:
1042 if (colon) {
1043 ret = -EADDRNOTAVAIL;
1044 if (!ifa)
1045 break;
1046 ret = 0;
1047 if (!(ifr.ifr_flags & IFF_UP))
1048 inet_del_ifa(in_dev, ifap, 1);
1049 break;
1051 ret = dev_change_flags(dev, ifr.ifr_flags);
1052 break;
1054 case SIOCSIFADDR: /* Set interface address (and family) */
1055 ret = -EINVAL;
1056 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1057 break;
1059 if (!ifa) {
1060 ret = -ENOBUFS;
1061 ifa = inet_alloc_ifa();
1062 if (!ifa)
1063 break;
1064 INIT_HLIST_NODE(&ifa->hash);
1065 if (colon)
1066 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1067 else
1068 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1069 } else {
1070 ret = 0;
1071 if (ifa->ifa_local == sin->sin_addr.s_addr)
1072 break;
1073 inet_del_ifa(in_dev, ifap, 0);
1074 ifa->ifa_broadcast = 0;
1075 ifa->ifa_scope = 0;
1078 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1080 if (!(dev->flags & IFF_POINTOPOINT)) {
1081 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1082 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1083 if ((dev->flags & IFF_BROADCAST) &&
1084 ifa->ifa_prefixlen < 31)
1085 ifa->ifa_broadcast = ifa->ifa_address |
1086 ~ifa->ifa_mask;
1087 } else {
1088 ifa->ifa_prefixlen = 32;
1089 ifa->ifa_mask = inet_make_mask(32);
1091 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1092 ret = inet_set_ifa(dev, ifa);
1093 break;
1095 case SIOCSIFBRDADDR: /* Set the broadcast address */
1096 ret = 0;
1097 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1098 inet_del_ifa(in_dev, ifap, 0);
1099 ifa->ifa_broadcast = sin->sin_addr.s_addr;
1100 inet_insert_ifa(ifa);
1102 break;
1104 case SIOCSIFDSTADDR: /* Set the destination address */
1105 ret = 0;
1106 if (ifa->ifa_address == sin->sin_addr.s_addr)
1107 break;
1108 ret = -EINVAL;
1109 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1110 break;
1111 ret = 0;
1112 inet_del_ifa(in_dev, ifap, 0);
1113 ifa->ifa_address = sin->sin_addr.s_addr;
1114 inet_insert_ifa(ifa);
1115 break;
1117 case SIOCSIFNETMASK: /* Set the netmask for the interface */
1120 * The mask we set must be legal.
1122 ret = -EINVAL;
1123 if (bad_mask(sin->sin_addr.s_addr, 0))
1124 break;
1125 ret = 0;
1126 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1127 __be32 old_mask = ifa->ifa_mask;
1128 inet_del_ifa(in_dev, ifap, 0);
1129 ifa->ifa_mask = sin->sin_addr.s_addr;
1130 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1132 /* See if current broadcast address matches
1133 * with current netmask, then recalculate
1134 * the broadcast address. Otherwise it's a
1135 * funny address, so don't touch it since
1136 * the user seems to know what (s)he's doing...
1138 if ((dev->flags & IFF_BROADCAST) &&
1139 (ifa->ifa_prefixlen < 31) &&
1140 (ifa->ifa_broadcast ==
1141 (ifa->ifa_local|~old_mask))) {
1142 ifa->ifa_broadcast = (ifa->ifa_local |
1143 ~sin->sin_addr.s_addr);
1145 inet_insert_ifa(ifa);
1147 break;
1149 done:
1150 rtnl_unlock();
1151 out:
1152 return ret;
1153 rarok:
1154 rtnl_unlock();
1155 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1156 goto out;
1159 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1161 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1162 struct in_ifaddr *ifa;
1163 struct ifreq ifr;
1164 int done = 0;
1166 if (!in_dev)
1167 goto out;
1169 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1170 if (!buf) {
1171 done += sizeof(ifr);
1172 continue;
1174 if (len < (int) sizeof(ifr))
1175 break;
1176 memset(&ifr, 0, sizeof(struct ifreq));
1177 strcpy(ifr.ifr_name, ifa->ifa_label);
1179 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1180 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1181 ifa->ifa_local;
1183 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1184 done = -EFAULT;
1185 break;
1187 buf += sizeof(struct ifreq);
1188 len -= sizeof(struct ifreq);
1189 done += sizeof(struct ifreq);
1191 out:
1192 return done;
1195 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1197 __be32 addr = 0;
1198 struct in_device *in_dev;
1199 struct net *net = dev_net(dev);
1200 int master_idx;
1202 rcu_read_lock();
1203 in_dev = __in_dev_get_rcu(dev);
1204 if (!in_dev)
1205 goto no_in_dev;
1207 for_primary_ifa(in_dev) {
1208 if (ifa->ifa_scope > scope)
1209 continue;
1210 if (!dst || inet_ifa_match(dst, ifa)) {
1211 addr = ifa->ifa_local;
1212 break;
1214 if (!addr)
1215 addr = ifa->ifa_local;
1216 } endfor_ifa(in_dev);
1218 if (addr)
1219 goto out_unlock;
1220 no_in_dev:
1221 master_idx = l3mdev_master_ifindex_rcu(dev);
1223 /* For VRFs, the VRF device takes the place of the loopback device,
1224 * with addresses on it being preferred. Note in such cases the
1225 * loopback device will be among the devices that fail the master_idx
1226 * equality check in the loop below.
1228 if (master_idx &&
1229 (dev = dev_get_by_index_rcu(net, master_idx)) &&
1230 (in_dev = __in_dev_get_rcu(dev))) {
1231 for_primary_ifa(in_dev) {
1232 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1233 ifa->ifa_scope <= scope) {
1234 addr = ifa->ifa_local;
1235 goto out_unlock;
1237 } endfor_ifa(in_dev);
1240 /* Not loopback addresses on loopback should be preferred
1241 in this case. It is important that lo is the first interface
1242 in dev_base list.
1244 for_each_netdev_rcu(net, dev) {
1245 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1246 continue;
1248 in_dev = __in_dev_get_rcu(dev);
1249 if (!in_dev)
1250 continue;
1252 for_primary_ifa(in_dev) {
1253 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1254 ifa->ifa_scope <= scope) {
1255 addr = ifa->ifa_local;
1256 goto out_unlock;
1258 } endfor_ifa(in_dev);
1260 out_unlock:
1261 rcu_read_unlock();
1262 return addr;
1264 EXPORT_SYMBOL(inet_select_addr);
1266 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1267 __be32 local, int scope)
1269 int same = 0;
1270 __be32 addr = 0;
1272 for_ifa(in_dev) {
1273 if (!addr &&
1274 (local == ifa->ifa_local || !local) &&
1275 ifa->ifa_scope <= scope) {
1276 addr = ifa->ifa_local;
1277 if (same)
1278 break;
1280 if (!same) {
1281 same = (!local || inet_ifa_match(local, ifa)) &&
1282 (!dst || inet_ifa_match(dst, ifa));
1283 if (same && addr) {
1284 if (local || !dst)
1285 break;
1286 /* Is the selected addr into dst subnet? */
1287 if (inet_ifa_match(addr, ifa))
1288 break;
1289 /* No, then can we use new local src? */
1290 if (ifa->ifa_scope <= scope) {
1291 addr = ifa->ifa_local;
1292 break;
1294 /* search for large dst subnet for addr */
1295 same = 0;
1298 } endfor_ifa(in_dev);
1300 return same ? addr : 0;
1304 * Confirm that local IP address exists using wildcards:
1305 * - net: netns to check, cannot be NULL
1306 * - in_dev: only on this interface, NULL=any interface
1307 * - dst: only in the same subnet as dst, 0=any dst
1308 * - local: address, 0=autoselect the local address
1309 * - scope: maximum allowed scope value for the local address
1311 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1312 __be32 dst, __be32 local, int scope)
1314 __be32 addr = 0;
1315 struct net_device *dev;
1317 if (in_dev)
1318 return confirm_addr_indev(in_dev, dst, local, scope);
1320 rcu_read_lock();
1321 for_each_netdev_rcu(net, dev) {
1322 in_dev = __in_dev_get_rcu(dev);
1323 if (in_dev) {
1324 addr = confirm_addr_indev(in_dev, dst, local, scope);
1325 if (addr)
1326 break;
1329 rcu_read_unlock();
1331 return addr;
1333 EXPORT_SYMBOL(inet_confirm_addr);
1336 * Device notifier
1339 int register_inetaddr_notifier(struct notifier_block *nb)
1341 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1343 EXPORT_SYMBOL(register_inetaddr_notifier);
1345 int unregister_inetaddr_notifier(struct notifier_block *nb)
1347 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1349 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1351 /* Rename ifa_labels for a device name change. Make some effort to preserve
1352 * existing alias numbering and to create unique labels if possible.
1354 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1356 struct in_ifaddr *ifa;
1357 int named = 0;
1359 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1360 char old[IFNAMSIZ], *dot;
1362 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1363 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1364 if (named++ == 0)
1365 goto skip;
1366 dot = strchr(old, ':');
1367 if (!dot) {
1368 sprintf(old, ":%d", named);
1369 dot = old;
1371 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1372 strcat(ifa->ifa_label, dot);
1373 else
1374 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1375 skip:
1376 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1380 static bool inetdev_valid_mtu(unsigned int mtu)
1382 return mtu >= 68;
1385 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1386 struct in_device *in_dev)
1389 struct in_ifaddr *ifa;
1391 for (ifa = in_dev->ifa_list; ifa;
1392 ifa = ifa->ifa_next) {
1393 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1394 ifa->ifa_local, dev,
1395 ifa->ifa_local, NULL,
1396 dev->dev_addr, NULL);
1400 /* Called only under RTNL semaphore */
1402 static int inetdev_event(struct notifier_block *this, unsigned long event,
1403 void *ptr)
1405 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1406 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1408 ASSERT_RTNL();
1410 if (!in_dev) {
1411 if (event == NETDEV_REGISTER) {
1412 in_dev = inetdev_init(dev);
1413 if (IS_ERR(in_dev))
1414 return notifier_from_errno(PTR_ERR(in_dev));
1415 if (dev->flags & IFF_LOOPBACK) {
1416 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1417 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1419 } else if (event == NETDEV_CHANGEMTU) {
1420 /* Re-enabling IP */
1421 if (inetdev_valid_mtu(dev->mtu))
1422 in_dev = inetdev_init(dev);
1424 goto out;
1427 switch (event) {
1428 case NETDEV_REGISTER:
1429 pr_debug("%s: bug\n", __func__);
1430 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1431 break;
1432 case NETDEV_UP:
1433 if (!inetdev_valid_mtu(dev->mtu))
1434 break;
1435 if (dev->flags & IFF_LOOPBACK) {
1436 struct in_ifaddr *ifa = inet_alloc_ifa();
1438 if (ifa) {
1439 INIT_HLIST_NODE(&ifa->hash);
1440 ifa->ifa_local =
1441 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1442 ifa->ifa_prefixlen = 8;
1443 ifa->ifa_mask = inet_make_mask(8);
1444 in_dev_hold(in_dev);
1445 ifa->ifa_dev = in_dev;
1446 ifa->ifa_scope = RT_SCOPE_HOST;
1447 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1448 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1449 INFINITY_LIFE_TIME);
1450 ipv4_devconf_setall(in_dev);
1451 neigh_parms_data_state_setall(in_dev->arp_parms);
1452 inet_insert_ifa(ifa);
1455 ip_mc_up(in_dev);
1456 /* fall through */
1457 case NETDEV_CHANGEADDR:
1458 if (!IN_DEV_ARP_NOTIFY(in_dev))
1459 break;
1460 /* fall through */
1461 case NETDEV_NOTIFY_PEERS:
1462 /* Send gratuitous ARP to notify of link change */
1463 inetdev_send_gratuitous_arp(dev, in_dev);
1464 break;
1465 case NETDEV_DOWN:
1466 ip_mc_down(in_dev);
1467 break;
1468 case NETDEV_PRE_TYPE_CHANGE:
1469 ip_mc_unmap(in_dev);
1470 break;
1471 case NETDEV_POST_TYPE_CHANGE:
1472 ip_mc_remap(in_dev);
1473 break;
1474 case NETDEV_CHANGEMTU:
1475 if (inetdev_valid_mtu(dev->mtu))
1476 break;
1477 /* disable IP when MTU is not enough */
1478 case NETDEV_UNREGISTER:
1479 inetdev_destroy(in_dev);
1480 break;
1481 case NETDEV_CHANGENAME:
1482 /* Do not notify about label change, this event is
1483 * not interesting to applications using netlink.
1485 inetdev_changename(dev, in_dev);
1487 devinet_sysctl_unregister(in_dev);
1488 devinet_sysctl_register(in_dev);
1489 break;
1491 out:
1492 return NOTIFY_DONE;
1495 static struct notifier_block ip_netdev_notifier = {
1496 .notifier_call = inetdev_event,
1499 static size_t inet_nlmsg_size(void)
1501 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1502 + nla_total_size(4) /* IFA_ADDRESS */
1503 + nla_total_size(4) /* IFA_LOCAL */
1504 + nla_total_size(4) /* IFA_BROADCAST */
1505 + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1506 + nla_total_size(4) /* IFA_FLAGS */
1507 + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1510 static inline u32 cstamp_delta(unsigned long cstamp)
1512 return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1515 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1516 unsigned long tstamp, u32 preferred, u32 valid)
1518 struct ifa_cacheinfo ci;
1520 ci.cstamp = cstamp_delta(cstamp);
1521 ci.tstamp = cstamp_delta(tstamp);
1522 ci.ifa_prefered = preferred;
1523 ci.ifa_valid = valid;
1525 return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1528 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1529 u32 portid, u32 seq, int event, unsigned int flags)
1531 struct ifaddrmsg *ifm;
1532 struct nlmsghdr *nlh;
1533 u32 preferred, valid;
1535 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1536 if (!nlh)
1537 return -EMSGSIZE;
1539 ifm = nlmsg_data(nlh);
1540 ifm->ifa_family = AF_INET;
1541 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1542 ifm->ifa_flags = ifa->ifa_flags;
1543 ifm->ifa_scope = ifa->ifa_scope;
1544 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1546 if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1547 preferred = ifa->ifa_preferred_lft;
1548 valid = ifa->ifa_valid_lft;
1549 if (preferred != INFINITY_LIFE_TIME) {
1550 long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1552 if (preferred > tval)
1553 preferred -= tval;
1554 else
1555 preferred = 0;
1556 if (valid != INFINITY_LIFE_TIME) {
1557 if (valid > tval)
1558 valid -= tval;
1559 else
1560 valid = 0;
1563 } else {
1564 preferred = INFINITY_LIFE_TIME;
1565 valid = INFINITY_LIFE_TIME;
1567 if ((ifa->ifa_address &&
1568 nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1569 (ifa->ifa_local &&
1570 nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1571 (ifa->ifa_broadcast &&
1572 nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1573 (ifa->ifa_label[0] &&
1574 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1575 nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1576 put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1577 preferred, valid))
1578 goto nla_put_failure;
1580 nlmsg_end(skb, nlh);
1581 return 0;
1583 nla_put_failure:
1584 nlmsg_cancel(skb, nlh);
1585 return -EMSGSIZE;
1588 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1590 struct net *net = sock_net(skb->sk);
1591 int h, s_h;
1592 int idx, s_idx;
1593 int ip_idx, s_ip_idx;
1594 struct net_device *dev;
1595 struct in_device *in_dev;
1596 struct in_ifaddr *ifa;
1597 struct hlist_head *head;
1599 s_h = cb->args[0];
1600 s_idx = idx = cb->args[1];
1601 s_ip_idx = ip_idx = cb->args[2];
1603 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1604 idx = 0;
1605 head = &net->dev_index_head[h];
1606 rcu_read_lock();
1607 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1608 net->dev_base_seq;
1609 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1610 if (idx < s_idx)
1611 goto cont;
1612 if (h > s_h || idx > s_idx)
1613 s_ip_idx = 0;
1614 in_dev = __in_dev_get_rcu(dev);
1615 if (!in_dev)
1616 goto cont;
1618 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1619 ifa = ifa->ifa_next, ip_idx++) {
1620 if (ip_idx < s_ip_idx)
1621 continue;
1622 if (inet_fill_ifaddr(skb, ifa,
1623 NETLINK_CB(cb->skb).portid,
1624 cb->nlh->nlmsg_seq,
1625 RTM_NEWADDR, NLM_F_MULTI) < 0) {
1626 rcu_read_unlock();
1627 goto done;
1629 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1631 cont:
1632 idx++;
1634 rcu_read_unlock();
1637 done:
1638 cb->args[0] = h;
1639 cb->args[1] = idx;
1640 cb->args[2] = ip_idx;
1642 return skb->len;
1645 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1646 u32 portid)
1648 struct sk_buff *skb;
1649 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1650 int err = -ENOBUFS;
1651 struct net *net;
1653 net = dev_net(ifa->ifa_dev->dev);
1654 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1655 if (!skb)
1656 goto errout;
1658 err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1659 if (err < 0) {
1660 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1661 WARN_ON(err == -EMSGSIZE);
1662 kfree_skb(skb);
1663 goto errout;
1665 rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1666 return;
1667 errout:
1668 if (err < 0)
1669 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1672 static size_t inet_get_link_af_size(const struct net_device *dev,
1673 u32 ext_filter_mask)
1675 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1677 if (!in_dev)
1678 return 0;
1680 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1683 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1684 u32 ext_filter_mask)
1686 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1687 struct nlattr *nla;
1688 int i;
1690 if (!in_dev)
1691 return -ENODATA;
1693 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1694 if (!nla)
1695 return -EMSGSIZE;
1697 for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1698 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1700 return 0;
1703 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1704 [IFLA_INET_CONF] = { .type = NLA_NESTED },
1707 static int inet_validate_link_af(const struct net_device *dev,
1708 const struct nlattr *nla)
1710 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1711 int err, rem;
1713 if (dev && !__in_dev_get_rtnl(dev))
1714 return -EAFNOSUPPORT;
1716 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1717 if (err < 0)
1718 return err;
1720 if (tb[IFLA_INET_CONF]) {
1721 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1722 int cfgid = nla_type(a);
1724 if (nla_len(a) < 4)
1725 return -EINVAL;
1727 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1728 return -EINVAL;
1732 return 0;
1735 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1737 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1738 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1739 int rem;
1741 if (!in_dev)
1742 return -EAFNOSUPPORT;
1744 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1745 BUG();
1747 if (tb[IFLA_INET_CONF]) {
1748 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1749 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1752 return 0;
1755 static int inet_netconf_msgsize_devconf(int type)
1757 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1758 + nla_total_size(4); /* NETCONFA_IFINDEX */
1759 bool all = false;
1761 if (type == NETCONFA_ALL)
1762 all = true;
1764 if (all || type == NETCONFA_FORWARDING)
1765 size += nla_total_size(4);
1766 if (all || type == NETCONFA_RP_FILTER)
1767 size += nla_total_size(4);
1768 if (all || type == NETCONFA_MC_FORWARDING)
1769 size += nla_total_size(4);
1770 if (all || type == NETCONFA_PROXY_NEIGH)
1771 size += nla_total_size(4);
1772 if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1773 size += nla_total_size(4);
1775 return size;
1778 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1779 struct ipv4_devconf *devconf, u32 portid,
1780 u32 seq, int event, unsigned int flags,
1781 int type)
1783 struct nlmsghdr *nlh;
1784 struct netconfmsg *ncm;
1785 bool all = false;
1787 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1788 flags);
1789 if (!nlh)
1790 return -EMSGSIZE;
1792 if (type == NETCONFA_ALL)
1793 all = true;
1795 ncm = nlmsg_data(nlh);
1796 ncm->ncm_family = AF_INET;
1798 if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1799 goto nla_put_failure;
1801 if ((all || type == NETCONFA_FORWARDING) &&
1802 nla_put_s32(skb, NETCONFA_FORWARDING,
1803 IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1804 goto nla_put_failure;
1805 if ((all || type == NETCONFA_RP_FILTER) &&
1806 nla_put_s32(skb, NETCONFA_RP_FILTER,
1807 IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1808 goto nla_put_failure;
1809 if ((all || type == NETCONFA_MC_FORWARDING) &&
1810 nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1811 IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1812 goto nla_put_failure;
1813 if ((all || type == NETCONFA_PROXY_NEIGH) &&
1814 nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1815 IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1816 goto nla_put_failure;
1817 if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1818 nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1819 IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1820 goto nla_put_failure;
1822 nlmsg_end(skb, nlh);
1823 return 0;
1825 nla_put_failure:
1826 nlmsg_cancel(skb, nlh);
1827 return -EMSGSIZE;
1830 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1831 struct ipv4_devconf *devconf)
1833 struct sk_buff *skb;
1834 int err = -ENOBUFS;
1836 skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1837 if (!skb)
1838 goto errout;
1840 err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1841 RTM_NEWNETCONF, 0, type);
1842 if (err < 0) {
1843 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1844 WARN_ON(err == -EMSGSIZE);
1845 kfree_skb(skb);
1846 goto errout;
1848 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1849 return;
1850 errout:
1851 if (err < 0)
1852 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1855 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1856 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
1857 [NETCONFA_FORWARDING] = { .len = sizeof(int) },
1858 [NETCONFA_RP_FILTER] = { .len = sizeof(int) },
1859 [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) },
1860 [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) },
1863 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1864 struct nlmsghdr *nlh)
1866 struct net *net = sock_net(in_skb->sk);
1867 struct nlattr *tb[NETCONFA_MAX+1];
1868 struct netconfmsg *ncm;
1869 struct sk_buff *skb;
1870 struct ipv4_devconf *devconf;
1871 struct in_device *in_dev;
1872 struct net_device *dev;
1873 int ifindex;
1874 int err;
1876 err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1877 devconf_ipv4_policy);
1878 if (err < 0)
1879 goto errout;
1881 err = -EINVAL;
1882 if (!tb[NETCONFA_IFINDEX])
1883 goto errout;
1885 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1886 switch (ifindex) {
1887 case NETCONFA_IFINDEX_ALL:
1888 devconf = net->ipv4.devconf_all;
1889 break;
1890 case NETCONFA_IFINDEX_DEFAULT:
1891 devconf = net->ipv4.devconf_dflt;
1892 break;
1893 default:
1894 dev = __dev_get_by_index(net, ifindex);
1895 if (!dev)
1896 goto errout;
1897 in_dev = __in_dev_get_rtnl(dev);
1898 if (!in_dev)
1899 goto errout;
1900 devconf = &in_dev->cnf;
1901 break;
1904 err = -ENOBUFS;
1905 skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1906 if (!skb)
1907 goto errout;
1909 err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1910 NETLINK_CB(in_skb).portid,
1911 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1912 NETCONFA_ALL);
1913 if (err < 0) {
1914 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1915 WARN_ON(err == -EMSGSIZE);
1916 kfree_skb(skb);
1917 goto errout;
1919 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1920 errout:
1921 return err;
1924 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1925 struct netlink_callback *cb)
1927 struct net *net = sock_net(skb->sk);
1928 int h, s_h;
1929 int idx, s_idx;
1930 struct net_device *dev;
1931 struct in_device *in_dev;
1932 struct hlist_head *head;
1934 s_h = cb->args[0];
1935 s_idx = idx = cb->args[1];
1937 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1938 idx = 0;
1939 head = &net->dev_index_head[h];
1940 rcu_read_lock();
1941 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1942 net->dev_base_seq;
1943 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1944 if (idx < s_idx)
1945 goto cont;
1946 in_dev = __in_dev_get_rcu(dev);
1947 if (!in_dev)
1948 goto cont;
1950 if (inet_netconf_fill_devconf(skb, dev->ifindex,
1951 &in_dev->cnf,
1952 NETLINK_CB(cb->skb).portid,
1953 cb->nlh->nlmsg_seq,
1954 RTM_NEWNETCONF,
1955 NLM_F_MULTI,
1956 NETCONFA_ALL) < 0) {
1957 rcu_read_unlock();
1958 goto done;
1960 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1961 cont:
1962 idx++;
1964 rcu_read_unlock();
1966 if (h == NETDEV_HASHENTRIES) {
1967 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1968 net->ipv4.devconf_all,
1969 NETLINK_CB(cb->skb).portid,
1970 cb->nlh->nlmsg_seq,
1971 RTM_NEWNETCONF, NLM_F_MULTI,
1972 NETCONFA_ALL) < 0)
1973 goto done;
1974 else
1975 h++;
1977 if (h == NETDEV_HASHENTRIES + 1) {
1978 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1979 net->ipv4.devconf_dflt,
1980 NETLINK_CB(cb->skb).portid,
1981 cb->nlh->nlmsg_seq,
1982 RTM_NEWNETCONF, NLM_F_MULTI,
1983 NETCONFA_ALL) < 0)
1984 goto done;
1985 else
1986 h++;
1988 done:
1989 cb->args[0] = h;
1990 cb->args[1] = idx;
1992 return skb->len;
1995 #ifdef CONFIG_SYSCTL
1997 static void devinet_copy_dflt_conf(struct net *net, int i)
1999 struct net_device *dev;
2001 rcu_read_lock();
2002 for_each_netdev_rcu(net, dev) {
2003 struct in_device *in_dev;
2005 in_dev = __in_dev_get_rcu(dev);
2006 if (in_dev && !test_bit(i, in_dev->cnf.state))
2007 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2009 rcu_read_unlock();
2012 /* called with RTNL locked */
2013 static void inet_forward_change(struct net *net)
2015 struct net_device *dev;
2016 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2018 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2019 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2020 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2021 NETCONFA_IFINDEX_ALL,
2022 net->ipv4.devconf_all);
2023 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2024 NETCONFA_IFINDEX_DEFAULT,
2025 net->ipv4.devconf_dflt);
2027 for_each_netdev(net, dev) {
2028 struct in_device *in_dev;
2030 if (on)
2031 dev_disable_lro(dev);
2033 in_dev = __in_dev_get_rtnl(dev);
2034 if (in_dev) {
2035 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2036 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2037 dev->ifindex, &in_dev->cnf);
2042 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2044 if (cnf == net->ipv4.devconf_dflt)
2045 return NETCONFA_IFINDEX_DEFAULT;
2046 else if (cnf == net->ipv4.devconf_all)
2047 return NETCONFA_IFINDEX_ALL;
2048 else {
2049 struct in_device *idev
2050 = container_of(cnf, struct in_device, cnf);
2051 return idev->dev->ifindex;
2055 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2056 void __user *buffer,
2057 size_t *lenp, loff_t *ppos)
2059 int old_value = *(int *)ctl->data;
2060 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2061 int new_value = *(int *)ctl->data;
2063 if (write) {
2064 struct ipv4_devconf *cnf = ctl->extra1;
2065 struct net *net = ctl->extra2;
2066 int i = (int *)ctl->data - cnf->data;
2067 int ifindex;
2069 set_bit(i, cnf->state);
2071 if (cnf == net->ipv4.devconf_dflt)
2072 devinet_copy_dflt_conf(net, i);
2073 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2074 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2075 if ((new_value == 0) && (old_value != 0))
2076 rt_cache_flush(net);
2078 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2079 new_value != old_value) {
2080 ifindex = devinet_conf_ifindex(net, cnf);
2081 inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2082 ifindex, cnf);
2084 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2085 new_value != old_value) {
2086 ifindex = devinet_conf_ifindex(net, cnf);
2087 inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2088 ifindex, cnf);
2090 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2091 new_value != old_value) {
2092 ifindex = devinet_conf_ifindex(net, cnf);
2093 inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2094 ifindex, cnf);
2098 return ret;
2101 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2102 void __user *buffer,
2103 size_t *lenp, loff_t *ppos)
2105 int *valp = ctl->data;
2106 int val = *valp;
2107 loff_t pos = *ppos;
2108 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2110 if (write && *valp != val) {
2111 struct net *net = ctl->extra2;
2113 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2114 if (!rtnl_trylock()) {
2115 /* Restore the original values before restarting */
2116 *valp = val;
2117 *ppos = pos;
2118 return restart_syscall();
2120 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2121 inet_forward_change(net);
2122 } else {
2123 struct ipv4_devconf *cnf = ctl->extra1;
2124 struct in_device *idev =
2125 container_of(cnf, struct in_device, cnf);
2126 if (*valp)
2127 dev_disable_lro(idev->dev);
2128 inet_netconf_notify_devconf(net,
2129 NETCONFA_FORWARDING,
2130 idev->dev->ifindex,
2131 cnf);
2133 rtnl_unlock();
2134 rt_cache_flush(net);
2135 } else
2136 inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2137 NETCONFA_IFINDEX_DEFAULT,
2138 net->ipv4.devconf_dflt);
2141 return ret;
2144 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2145 void __user *buffer,
2146 size_t *lenp, loff_t *ppos)
2148 int *valp = ctl->data;
2149 int val = *valp;
2150 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2151 struct net *net = ctl->extra2;
2153 if (write && *valp != val)
2154 rt_cache_flush(net);
2156 return ret;
2159 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2161 .procname = name, \
2162 .data = ipv4_devconf.data + \
2163 IPV4_DEVCONF_ ## attr - 1, \
2164 .maxlen = sizeof(int), \
2165 .mode = mval, \
2166 .proc_handler = proc, \
2167 .extra1 = &ipv4_devconf, \
2170 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2171 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2173 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2174 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2176 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2177 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2179 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2180 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2182 static struct devinet_sysctl_table {
2183 struct ctl_table_header *sysctl_header;
2184 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2185 } devinet_sysctl = {
2186 .devinet_vars = {
2187 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2188 devinet_sysctl_forward),
2189 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2191 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2192 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2193 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2194 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2195 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2196 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2197 "accept_source_route"),
2198 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2199 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2200 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2201 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2202 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2203 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2204 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2205 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2206 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2207 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2208 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2209 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2210 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2211 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2212 "force_igmp_version"),
2213 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2214 "igmpv2_unsolicited_report_interval"),
2215 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2216 "igmpv3_unsolicited_report_interval"),
2217 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2218 "ignore_routes_with_linkdown"),
2219 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2220 "drop_gratuitous_arp"),
2222 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2223 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2224 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2225 "promote_secondaries"),
2226 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2227 "route_localnet"),
2228 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2229 "drop_unicast_in_l2_multicast"),
2233 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2234 int ifindex, struct ipv4_devconf *p)
2236 int i;
2237 struct devinet_sysctl_table *t;
2238 char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2240 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2241 if (!t)
2242 goto out;
2244 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2245 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2246 t->devinet_vars[i].extra1 = p;
2247 t->devinet_vars[i].extra2 = net;
2250 snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2252 t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2253 if (!t->sysctl_header)
2254 goto free;
2256 p->sysctl = t;
2258 inet_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p);
2259 return 0;
2261 free:
2262 kfree(t);
2263 out:
2264 return -ENOBUFS;
2267 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2269 struct devinet_sysctl_table *t = cnf->sysctl;
2271 if (!t)
2272 return;
2274 cnf->sysctl = NULL;
2275 unregister_net_sysctl_table(t->sysctl_header);
2276 kfree(t);
2279 static int devinet_sysctl_register(struct in_device *idev)
2281 int err;
2283 if (!sysctl_dev_name_is_allowed(idev->dev->name))
2284 return -EINVAL;
2286 err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2287 if (err)
2288 return err;
2289 err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2290 idev->dev->ifindex, &idev->cnf);
2291 if (err)
2292 neigh_sysctl_unregister(idev->arp_parms);
2293 return err;
2296 static void devinet_sysctl_unregister(struct in_device *idev)
2298 __devinet_sysctl_unregister(&idev->cnf);
2299 neigh_sysctl_unregister(idev->arp_parms);
2302 static struct ctl_table ctl_forward_entry[] = {
2304 .procname = "ip_forward",
2305 .data = &ipv4_devconf.data[
2306 IPV4_DEVCONF_FORWARDING - 1],
2307 .maxlen = sizeof(int),
2308 .mode = 0644,
2309 .proc_handler = devinet_sysctl_forward,
2310 .extra1 = &ipv4_devconf,
2311 .extra2 = &init_net,
2313 { },
2315 #endif
2317 static __net_init int devinet_init_net(struct net *net)
2319 int err;
2320 struct ipv4_devconf *all, *dflt;
2321 #ifdef CONFIG_SYSCTL
2322 struct ctl_table *tbl = ctl_forward_entry;
2323 struct ctl_table_header *forw_hdr;
2324 #endif
2326 err = -ENOMEM;
2327 all = &ipv4_devconf;
2328 dflt = &ipv4_devconf_dflt;
2330 if (!net_eq(net, &init_net)) {
2331 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2332 if (!all)
2333 goto err_alloc_all;
2335 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2336 if (!dflt)
2337 goto err_alloc_dflt;
2339 #ifdef CONFIG_SYSCTL
2340 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2341 if (!tbl)
2342 goto err_alloc_ctl;
2344 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2345 tbl[0].extra1 = all;
2346 tbl[0].extra2 = net;
2347 #endif
2350 #ifdef CONFIG_SYSCTL
2351 err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2352 if (err < 0)
2353 goto err_reg_all;
2355 err = __devinet_sysctl_register(net, "default",
2356 NETCONFA_IFINDEX_DEFAULT, dflt);
2357 if (err < 0)
2358 goto err_reg_dflt;
2360 err = -ENOMEM;
2361 forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2362 if (!forw_hdr)
2363 goto err_reg_ctl;
2364 net->ipv4.forw_hdr = forw_hdr;
2365 #endif
2367 net->ipv4.devconf_all = all;
2368 net->ipv4.devconf_dflt = dflt;
2369 return 0;
2371 #ifdef CONFIG_SYSCTL
2372 err_reg_ctl:
2373 __devinet_sysctl_unregister(dflt);
2374 err_reg_dflt:
2375 __devinet_sysctl_unregister(all);
2376 err_reg_all:
2377 if (tbl != ctl_forward_entry)
2378 kfree(tbl);
2379 err_alloc_ctl:
2380 #endif
2381 if (dflt != &ipv4_devconf_dflt)
2382 kfree(dflt);
2383 err_alloc_dflt:
2384 if (all != &ipv4_devconf)
2385 kfree(all);
2386 err_alloc_all:
2387 return err;
2390 static __net_exit void devinet_exit_net(struct net *net)
2392 #ifdef CONFIG_SYSCTL
2393 struct ctl_table *tbl;
2395 tbl = net->ipv4.forw_hdr->ctl_table_arg;
2396 unregister_net_sysctl_table(net->ipv4.forw_hdr);
2397 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2398 __devinet_sysctl_unregister(net->ipv4.devconf_all);
2399 kfree(tbl);
2400 #endif
2401 kfree(net->ipv4.devconf_dflt);
2402 kfree(net->ipv4.devconf_all);
2405 static __net_initdata struct pernet_operations devinet_ops = {
2406 .init = devinet_init_net,
2407 .exit = devinet_exit_net,
2410 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2411 .family = AF_INET,
2412 .fill_link_af = inet_fill_link_af,
2413 .get_link_af_size = inet_get_link_af_size,
2414 .validate_link_af = inet_validate_link_af,
2415 .set_link_af = inet_set_link_af,
2418 void __init devinet_init(void)
2420 int i;
2422 for (i = 0; i < IN4_ADDR_HSIZE; i++)
2423 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2425 register_pernet_subsys(&devinet_ops);
2427 register_gifconf(PF_INET, inet_gifconf);
2428 register_netdevice_notifier(&ip_netdev_notifier);
2430 queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2432 rtnl_af_register(&inet_af_ops);
2434 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2435 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2436 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2437 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2438 inet_netconf_dump_devconf, NULL);