1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Linux network device link state notification
6 * Stefan Rompf <sux@loplof.de>
9 #include <linux/module.h>
10 #include <linux/netdevice.h>
13 #include <net/pkt_sched.h>
14 #include <linux/rtnetlink.h>
15 #include <linux/jiffies.h>
16 #include <linux/spinlock.h>
17 #include <linux/workqueue.h>
18 #include <linux/bitops.h>
19 #include <linux/types.h>
27 static unsigned long linkwatch_flags
;
28 static unsigned long linkwatch_nextevent
;
30 static void linkwatch_event(struct work_struct
*dummy
);
31 static DECLARE_DELAYED_WORK(linkwatch_work
, linkwatch_event
);
33 static LIST_HEAD(lweventlist
);
34 static DEFINE_SPINLOCK(lweventlist_lock
);
36 static unsigned int default_operstate(const struct net_device
*dev
)
38 if (netif_testing(dev
))
39 return IF_OPER_TESTING
;
41 /* Some uppers (DSA) have additional sources for being down, so
42 * first check whether lower is indeed the source of its down state.
44 if (!netif_carrier_ok(dev
)) {
45 int iflink
= dev_get_iflink(dev
);
46 struct net_device
*peer
;
48 if (iflink
== dev
->ifindex
)
51 peer
= __dev_get_by_index(dev_net(dev
), iflink
);
55 return netif_carrier_ok(peer
) ? IF_OPER_DOWN
:
56 IF_OPER_LOWERLAYERDOWN
;
59 if (netif_dormant(dev
))
60 return IF_OPER_DORMANT
;
65 static void rfc2863_policy(struct net_device
*dev
)
67 unsigned int operstate
= default_operstate(dev
);
69 if (operstate
== READ_ONCE(dev
->operstate
))
72 switch(dev
->link_mode
) {
73 case IF_LINK_MODE_TESTING
:
74 if (operstate
== IF_OPER_UP
)
75 operstate
= IF_OPER_TESTING
;
78 case IF_LINK_MODE_DORMANT
:
79 if (operstate
== IF_OPER_UP
)
80 operstate
= IF_OPER_DORMANT
;
82 case IF_LINK_MODE_DEFAULT
:
87 WRITE_ONCE(dev
->operstate
, operstate
);
91 void linkwatch_init_dev(struct net_device
*dev
)
93 /* Handle pre-registration link state changes */
94 if (!netif_carrier_ok(dev
) || netif_dormant(dev
) ||
100 static bool linkwatch_urgent_event(struct net_device
*dev
)
102 if (!netif_running(dev
))
105 if (dev
->ifindex
!= dev_get_iflink(dev
))
108 if (netif_is_lag_port(dev
) || netif_is_lag_master(dev
))
111 return netif_carrier_ok(dev
) && qdisc_tx_changing(dev
);
115 static void linkwatch_add_event(struct net_device
*dev
)
119 spin_lock_irqsave(&lweventlist_lock
, flags
);
120 if (list_empty(&dev
->link_watch_list
)) {
121 list_add_tail(&dev
->link_watch_list
, &lweventlist
);
122 netdev_hold(dev
, &dev
->linkwatch_dev_tracker
, GFP_ATOMIC
);
124 spin_unlock_irqrestore(&lweventlist_lock
, flags
);
128 static void linkwatch_schedule_work(int urgent
)
130 unsigned long delay
= linkwatch_nextevent
- jiffies
;
132 if (test_bit(LW_URGENT
, &linkwatch_flags
))
135 /* Minimise down-time: drop delay for up event. */
137 if (test_and_set_bit(LW_URGENT
, &linkwatch_flags
))
142 /* If we wrap around we'll delay it by at most HZ. */
147 * If urgent, schedule immediate execution; otherwise, don't
148 * override the existing timer.
150 if (test_bit(LW_URGENT
, &linkwatch_flags
))
151 mod_delayed_work(system_unbound_wq
, &linkwatch_work
, 0);
153 queue_delayed_work(system_unbound_wq
, &linkwatch_work
, delay
);
157 static void linkwatch_do_dev(struct net_device
*dev
)
160 * Make sure the above read is complete since it can be
161 * rewritten as soon as we clear the bit below.
163 smp_mb__before_atomic();
165 /* We are about to handle this device,
166 * so new events can be accepted
168 clear_bit(__LINK_STATE_LINKWATCH_PENDING
, &dev
->state
);
171 if (dev
->flags
& IFF_UP
) {
172 if (netif_carrier_ok(dev
))
177 netdev_state_change(dev
);
179 /* Note: our callers are responsible for calling netdev_tracker_free().
180 * This is the reason we use __dev_put() instead of dev_put().
185 static void __linkwatch_run_queue(int urgent_only
)
187 #define MAX_DO_DEV_PER_LOOP 100
189 int do_dev
= MAX_DO_DEV_PER_LOOP
;
190 /* Use a local list here since we add non-urgent
191 * events back to the global one when called with
196 /* Give urgent case more budget */
198 do_dev
+= MAX_DO_DEV_PER_LOOP
;
201 * Limit the number of linkwatch events to one
202 * per second so that a runaway driver does not
203 * cause a storm of messages on the netlink
204 * socket. This limit does not apply to up events
205 * while the device qdisc is down.
208 linkwatch_nextevent
= jiffies
+ HZ
;
209 /* Limit wrap-around effect on delay. */
210 else if (time_after(linkwatch_nextevent
, jiffies
+ HZ
))
211 linkwatch_nextevent
= jiffies
;
213 clear_bit(LW_URGENT
, &linkwatch_flags
);
215 spin_lock_irq(&lweventlist_lock
);
216 list_splice_init(&lweventlist
, &wrk
);
218 while (!list_empty(&wrk
) && do_dev
> 0) {
219 struct net_device
*dev
;
221 dev
= list_first_entry(&wrk
, struct net_device
, link_watch_list
);
222 list_del_init(&dev
->link_watch_list
);
224 if (!netif_device_present(dev
) ||
225 (urgent_only
&& !linkwatch_urgent_event(dev
))) {
226 list_add_tail(&dev
->link_watch_list
, &lweventlist
);
229 /* We must free netdev tracker under
230 * the spinlock protection.
232 netdev_tracker_free(dev
, &dev
->linkwatch_dev_tracker
);
233 spin_unlock_irq(&lweventlist_lock
);
234 linkwatch_do_dev(dev
);
236 spin_lock_irq(&lweventlist_lock
);
239 /* Add the remaining work back to lweventlist */
240 list_splice_init(&wrk
, &lweventlist
);
242 if (!list_empty(&lweventlist
))
243 linkwatch_schedule_work(0);
244 spin_unlock_irq(&lweventlist_lock
);
247 void linkwatch_sync_dev(struct net_device
*dev
)
252 spin_lock_irqsave(&lweventlist_lock
, flags
);
253 if (!list_empty(&dev
->link_watch_list
)) {
254 list_del_init(&dev
->link_watch_list
);
256 /* We must release netdev tracker under
257 * the spinlock protection.
259 netdev_tracker_free(dev
, &dev
->linkwatch_dev_tracker
);
261 spin_unlock_irqrestore(&lweventlist_lock
, flags
);
263 linkwatch_do_dev(dev
);
267 /* Must be called with the rtnl semaphore held */
268 void linkwatch_run_queue(void)
270 __linkwatch_run_queue(0);
274 static void linkwatch_event(struct work_struct
*dummy
)
277 __linkwatch_run_queue(time_after(linkwatch_nextevent
, jiffies
));
282 void linkwatch_fire_event(struct net_device
*dev
)
284 bool urgent
= linkwatch_urgent_event(dev
);
286 if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING
, &dev
->state
)) {
287 linkwatch_add_event(dev
);
291 linkwatch_schedule_work(urgent
);
293 EXPORT_SYMBOL(linkwatch_fire_event
);