Linux 6.14-rc1
[linux.git] / net / core / net-sysfs.c
blob07cb99b114bdd770d93a08088d6b3738776a86a2
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * net-sysfs.c - network device class and attributes
5 * Copyright (c) 2003 Stephen Hemminger <shemminger@osdl.org>
6 */
8 #include <linux/capability.h>
9 #include <linux/kernel.h>
10 #include <linux/netdevice.h>
11 #include <linux/if_arp.h>
12 #include <linux/slab.h>
13 #include <linux/sched/signal.h>
14 #include <linux/sched/isolation.h>
15 #include <linux/nsproxy.h>
16 #include <net/sock.h>
17 #include <net/net_namespace.h>
18 #include <linux/rtnetlink.h>
19 #include <linux/vmalloc.h>
20 #include <linux/export.h>
21 #include <linux/jiffies.h>
22 #include <linux/pm_runtime.h>
23 #include <linux/of.h>
24 #include <linux/of_net.h>
25 #include <linux/cpu.h>
26 #include <net/netdev_rx_queue.h>
27 #include <net/rps.h>
29 #include "dev.h"
30 #include "net-sysfs.h"
32 #ifdef CONFIG_SYSFS
33 static const char fmt_hex[] = "%#x\n";
34 static const char fmt_dec[] = "%d\n";
35 static const char fmt_uint[] = "%u\n";
36 static const char fmt_ulong[] = "%lu\n";
37 static const char fmt_u64[] = "%llu\n";
39 /* Caller holds RTNL, netdev->lock or RCU */
40 static inline int dev_isalive(const struct net_device *dev)
42 return READ_ONCE(dev->reg_state) <= NETREG_REGISTERED;
45 /* use same locking rules as GIF* ioctl's */
46 static ssize_t netdev_show(const struct device *dev,
47 struct device_attribute *attr, char *buf,
48 ssize_t (*format)(const struct net_device *, char *))
50 struct net_device *ndev = to_net_dev(dev);
51 ssize_t ret = -EINVAL;
53 rcu_read_lock();
54 if (dev_isalive(ndev))
55 ret = (*format)(ndev, buf);
56 rcu_read_unlock();
58 return ret;
61 /* generate a show function for simple field */
62 #define NETDEVICE_SHOW(field, format_string) \
63 static ssize_t format_##field(const struct net_device *dev, char *buf) \
64 { \
65 return sysfs_emit(buf, format_string, READ_ONCE(dev->field)); \
66 } \
67 static ssize_t field##_show(struct device *dev, \
68 struct device_attribute *attr, char *buf) \
69 { \
70 return netdev_show(dev, attr, buf, format_##field); \
71 } \
73 #define NETDEVICE_SHOW_RO(field, format_string) \
74 NETDEVICE_SHOW(field, format_string); \
75 static DEVICE_ATTR_RO(field)
77 #define NETDEVICE_SHOW_RW(field, format_string) \
78 NETDEVICE_SHOW(field, format_string); \
79 static DEVICE_ATTR_RW(field)
81 /* use same locking and permission rules as SIF* ioctl's */
82 static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
83 const char *buf, size_t len,
84 int (*set)(struct net_device *, unsigned long))
86 struct net_device *netdev = to_net_dev(dev);
87 struct net *net = dev_net(netdev);
88 unsigned long new;
89 int ret;
91 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
92 return -EPERM;
94 ret = kstrtoul(buf, 0, &new);
95 if (ret)
96 goto err;
98 if (!rtnl_trylock())
99 return restart_syscall();
101 if (dev_isalive(netdev)) {
102 ret = (*set)(netdev, new);
103 if (ret == 0)
104 ret = len;
106 rtnl_unlock();
107 err:
108 return ret;
111 /* Same as netdev_store() but takes netdev_lock() instead of rtnl_lock() */
112 static ssize_t
113 netdev_lock_store(struct device *dev, struct device_attribute *attr,
114 const char *buf, size_t len,
115 int (*set)(struct net_device *, unsigned long))
117 struct net_device *netdev = to_net_dev(dev);
118 struct net *net = dev_net(netdev);
119 unsigned long new;
120 int ret;
122 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
123 return -EPERM;
125 ret = kstrtoul(buf, 0, &new);
126 if (ret)
127 return ret;
129 netdev_lock(netdev);
131 if (dev_isalive(netdev)) {
132 ret = (*set)(netdev, new);
133 if (ret == 0)
134 ret = len;
136 netdev_unlock(netdev);
138 return ret;
141 NETDEVICE_SHOW_RO(dev_id, fmt_hex);
142 NETDEVICE_SHOW_RO(dev_port, fmt_dec);
143 NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec);
144 NETDEVICE_SHOW_RO(addr_len, fmt_dec);
145 NETDEVICE_SHOW_RO(ifindex, fmt_dec);
146 NETDEVICE_SHOW_RO(type, fmt_dec);
147 NETDEVICE_SHOW_RO(link_mode, fmt_dec);
149 static ssize_t iflink_show(struct device *dev, struct device_attribute *attr,
150 char *buf)
152 struct net_device *ndev = to_net_dev(dev);
154 return sysfs_emit(buf, fmt_dec, dev_get_iflink(ndev));
156 static DEVICE_ATTR_RO(iflink);
158 static ssize_t format_name_assign_type(const struct net_device *dev, char *buf)
160 return sysfs_emit(buf, fmt_dec, READ_ONCE(dev->name_assign_type));
163 static ssize_t name_assign_type_show(struct device *dev,
164 struct device_attribute *attr,
165 char *buf)
167 struct net_device *ndev = to_net_dev(dev);
168 ssize_t ret = -EINVAL;
170 if (READ_ONCE(ndev->name_assign_type) != NET_NAME_UNKNOWN)
171 ret = netdev_show(dev, attr, buf, format_name_assign_type);
173 return ret;
175 static DEVICE_ATTR_RO(name_assign_type);
177 /* use same locking rules as GIFHWADDR ioctl's (dev_get_mac_address()) */
178 static ssize_t address_show(struct device *dev, struct device_attribute *attr,
179 char *buf)
181 struct net_device *ndev = to_net_dev(dev);
182 ssize_t ret = -EINVAL;
184 down_read(&dev_addr_sem);
186 rcu_read_lock();
187 if (dev_isalive(ndev))
188 ret = sysfs_format_mac(buf, ndev->dev_addr, ndev->addr_len);
189 rcu_read_unlock();
191 up_read(&dev_addr_sem);
192 return ret;
194 static DEVICE_ATTR_RO(address);
196 static ssize_t broadcast_show(struct device *dev,
197 struct device_attribute *attr, char *buf)
199 struct net_device *ndev = to_net_dev(dev);
200 int ret = -EINVAL;
202 rcu_read_lock();
203 if (dev_isalive(ndev))
204 ret = sysfs_format_mac(buf, ndev->broadcast, ndev->addr_len);
205 rcu_read_unlock();
206 return ret;
208 static DEVICE_ATTR_RO(broadcast);
210 static int change_carrier(struct net_device *dev, unsigned long new_carrier)
212 if (!netif_running(dev))
213 return -EINVAL;
214 return dev_change_carrier(dev, (bool)new_carrier);
217 static ssize_t carrier_store(struct device *dev, struct device_attribute *attr,
218 const char *buf, size_t len)
220 struct net_device *netdev = to_net_dev(dev);
222 /* The check is also done in change_carrier; this helps returning early
223 * without hitting the trylock/restart in netdev_store.
225 if (!netdev->netdev_ops->ndo_change_carrier)
226 return -EOPNOTSUPP;
228 return netdev_store(dev, attr, buf, len, change_carrier);
231 static ssize_t carrier_show(struct device *dev,
232 struct device_attribute *attr, char *buf)
234 struct net_device *netdev = to_net_dev(dev);
235 int ret = -EINVAL;
237 if (!rtnl_trylock())
238 return restart_syscall();
240 if (netif_running(netdev)) {
241 /* Synchronize carrier state with link watch,
242 * see also rtnl_getlink().
244 linkwatch_sync_dev(netdev);
246 ret = sysfs_emit(buf, fmt_dec, !!netif_carrier_ok(netdev));
248 rtnl_unlock();
250 return ret;
252 static DEVICE_ATTR_RW(carrier);
254 static ssize_t speed_show(struct device *dev,
255 struct device_attribute *attr, char *buf)
257 struct net_device *netdev = to_net_dev(dev);
258 int ret = -EINVAL;
260 /* The check is also done in __ethtool_get_link_ksettings; this helps
261 * returning early without hitting the trylock/restart below.
263 if (!netdev->ethtool_ops->get_link_ksettings)
264 return ret;
266 if (!rtnl_trylock())
267 return restart_syscall();
269 if (netif_running(netdev)) {
270 struct ethtool_link_ksettings cmd;
272 if (!__ethtool_get_link_ksettings(netdev, &cmd))
273 ret = sysfs_emit(buf, fmt_dec, cmd.base.speed);
275 rtnl_unlock();
276 return ret;
278 static DEVICE_ATTR_RO(speed);
280 static ssize_t duplex_show(struct device *dev,
281 struct device_attribute *attr, char *buf)
283 struct net_device *netdev = to_net_dev(dev);
284 int ret = -EINVAL;
286 /* The check is also done in __ethtool_get_link_ksettings; this helps
287 * returning early without hitting the trylock/restart below.
289 if (!netdev->ethtool_ops->get_link_ksettings)
290 return ret;
292 if (!rtnl_trylock())
293 return restart_syscall();
295 if (netif_running(netdev)) {
296 struct ethtool_link_ksettings cmd;
298 if (!__ethtool_get_link_ksettings(netdev, &cmd)) {
299 const char *duplex;
301 switch (cmd.base.duplex) {
302 case DUPLEX_HALF:
303 duplex = "half";
304 break;
305 case DUPLEX_FULL:
306 duplex = "full";
307 break;
308 default:
309 duplex = "unknown";
310 break;
312 ret = sysfs_emit(buf, "%s\n", duplex);
315 rtnl_unlock();
316 return ret;
318 static DEVICE_ATTR_RO(duplex);
320 static ssize_t testing_show(struct device *dev,
321 struct device_attribute *attr, char *buf)
323 struct net_device *netdev = to_net_dev(dev);
325 if (netif_running(netdev))
326 return sysfs_emit(buf, fmt_dec, !!netif_testing(netdev));
328 return -EINVAL;
330 static DEVICE_ATTR_RO(testing);
332 static ssize_t dormant_show(struct device *dev,
333 struct device_attribute *attr, char *buf)
335 struct net_device *netdev = to_net_dev(dev);
337 if (netif_running(netdev))
338 return sysfs_emit(buf, fmt_dec, !!netif_dormant(netdev));
340 return -EINVAL;
342 static DEVICE_ATTR_RO(dormant);
344 static const char *const operstates[] = {
345 "unknown",
346 "notpresent", /* currently unused */
347 "down",
348 "lowerlayerdown",
349 "testing",
350 "dormant",
351 "up"
354 static ssize_t operstate_show(struct device *dev,
355 struct device_attribute *attr, char *buf)
357 const struct net_device *netdev = to_net_dev(dev);
358 unsigned char operstate;
360 operstate = READ_ONCE(netdev->operstate);
361 if (!netif_running(netdev))
362 operstate = IF_OPER_DOWN;
364 if (operstate >= ARRAY_SIZE(operstates))
365 return -EINVAL; /* should not happen */
367 return sysfs_emit(buf, "%s\n", operstates[operstate]);
369 static DEVICE_ATTR_RO(operstate);
371 static ssize_t carrier_changes_show(struct device *dev,
372 struct device_attribute *attr,
373 char *buf)
375 struct net_device *netdev = to_net_dev(dev);
377 return sysfs_emit(buf, fmt_dec,
378 atomic_read(&netdev->carrier_up_count) +
379 atomic_read(&netdev->carrier_down_count));
381 static DEVICE_ATTR_RO(carrier_changes);
383 static ssize_t carrier_up_count_show(struct device *dev,
384 struct device_attribute *attr,
385 char *buf)
387 struct net_device *netdev = to_net_dev(dev);
389 return sysfs_emit(buf, fmt_dec, atomic_read(&netdev->carrier_up_count));
391 static DEVICE_ATTR_RO(carrier_up_count);
393 static ssize_t carrier_down_count_show(struct device *dev,
394 struct device_attribute *attr,
395 char *buf)
397 struct net_device *netdev = to_net_dev(dev);
399 return sysfs_emit(buf, fmt_dec, atomic_read(&netdev->carrier_down_count));
401 static DEVICE_ATTR_RO(carrier_down_count);
403 /* read-write attributes */
405 static int change_mtu(struct net_device *dev, unsigned long new_mtu)
407 return dev_set_mtu(dev, (int)new_mtu);
410 static ssize_t mtu_store(struct device *dev, struct device_attribute *attr,
411 const char *buf, size_t len)
413 return netdev_store(dev, attr, buf, len, change_mtu);
415 NETDEVICE_SHOW_RW(mtu, fmt_dec);
417 static int change_flags(struct net_device *dev, unsigned long new_flags)
419 return dev_change_flags(dev, (unsigned int)new_flags, NULL);
422 static ssize_t flags_store(struct device *dev, struct device_attribute *attr,
423 const char *buf, size_t len)
425 return netdev_store(dev, attr, buf, len, change_flags);
427 NETDEVICE_SHOW_RW(flags, fmt_hex);
429 static ssize_t tx_queue_len_store(struct device *dev,
430 struct device_attribute *attr,
431 const char *buf, size_t len)
433 if (!capable(CAP_NET_ADMIN))
434 return -EPERM;
436 return netdev_store(dev, attr, buf, len, dev_change_tx_queue_len);
438 NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec);
440 static int change_gro_flush_timeout(struct net_device *dev, unsigned long val)
442 netdev_set_gro_flush_timeout(dev, val);
443 return 0;
446 static ssize_t gro_flush_timeout_store(struct device *dev,
447 struct device_attribute *attr,
448 const char *buf, size_t len)
450 if (!capable(CAP_NET_ADMIN))
451 return -EPERM;
453 return netdev_lock_store(dev, attr, buf, len, change_gro_flush_timeout);
455 NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong);
457 static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val)
459 if (val > S32_MAX)
460 return -ERANGE;
462 netdev_set_defer_hard_irqs(dev, (u32)val);
463 return 0;
466 static ssize_t napi_defer_hard_irqs_store(struct device *dev,
467 struct device_attribute *attr,
468 const char *buf, size_t len)
470 if (!capable(CAP_NET_ADMIN))
471 return -EPERM;
473 return netdev_lock_store(dev, attr, buf, len,
474 change_napi_defer_hard_irqs);
476 NETDEVICE_SHOW_RW(napi_defer_hard_irqs, fmt_uint);
478 static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
479 const char *buf, size_t len)
481 struct net_device *netdev = to_net_dev(dev);
482 struct net *net = dev_net(netdev);
483 size_t count = len;
484 ssize_t ret = 0;
486 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
487 return -EPERM;
489 /* ignore trailing newline */
490 if (len > 0 && buf[len - 1] == '\n')
491 --count;
493 if (!rtnl_trylock())
494 return restart_syscall();
496 if (dev_isalive(netdev)) {
497 ret = dev_set_alias(netdev, buf, count);
498 if (ret < 0)
499 goto err;
500 ret = len;
501 netdev_state_change(netdev);
503 err:
504 rtnl_unlock();
506 return ret;
509 static ssize_t ifalias_show(struct device *dev,
510 struct device_attribute *attr, char *buf)
512 const struct net_device *netdev = to_net_dev(dev);
513 char tmp[IFALIASZ];
514 ssize_t ret = 0;
516 ret = dev_get_alias(netdev, tmp, sizeof(tmp));
517 if (ret > 0)
518 ret = sysfs_emit(buf, "%s\n", tmp);
519 return ret;
521 static DEVICE_ATTR_RW(ifalias);
523 static int change_group(struct net_device *dev, unsigned long new_group)
525 dev_set_group(dev, (int)new_group);
526 return 0;
529 static ssize_t group_store(struct device *dev, struct device_attribute *attr,
530 const char *buf, size_t len)
532 return netdev_store(dev, attr, buf, len, change_group);
534 NETDEVICE_SHOW(group, fmt_dec);
535 static DEVICE_ATTR(netdev_group, 0644, group_show, group_store);
537 static int change_proto_down(struct net_device *dev, unsigned long proto_down)
539 return dev_change_proto_down(dev, (bool)proto_down);
542 static ssize_t proto_down_store(struct device *dev,
543 struct device_attribute *attr,
544 const char *buf, size_t len)
546 return netdev_store(dev, attr, buf, len, change_proto_down);
548 NETDEVICE_SHOW_RW(proto_down, fmt_dec);
550 static ssize_t phys_port_id_show(struct device *dev,
551 struct device_attribute *attr, char *buf)
553 struct net_device *netdev = to_net_dev(dev);
554 ssize_t ret = -EINVAL;
556 /* The check is also done in dev_get_phys_port_id; this helps returning
557 * early without hitting the trylock/restart below.
559 if (!netdev->netdev_ops->ndo_get_phys_port_id)
560 return -EOPNOTSUPP;
562 if (!rtnl_trylock())
563 return restart_syscall();
565 if (dev_isalive(netdev)) {
566 struct netdev_phys_item_id ppid;
568 ret = dev_get_phys_port_id(netdev, &ppid);
569 if (!ret)
570 ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
572 rtnl_unlock();
574 return ret;
576 static DEVICE_ATTR_RO(phys_port_id);
578 static ssize_t phys_port_name_show(struct device *dev,
579 struct device_attribute *attr, char *buf)
581 struct net_device *netdev = to_net_dev(dev);
582 ssize_t ret = -EINVAL;
584 /* The checks are also done in dev_get_phys_port_name; this helps
585 * returning early without hitting the trylock/restart below.
587 if (!netdev->netdev_ops->ndo_get_phys_port_name &&
588 !netdev->devlink_port)
589 return -EOPNOTSUPP;
591 if (!rtnl_trylock())
592 return restart_syscall();
594 if (dev_isalive(netdev)) {
595 char name[IFNAMSIZ];
597 ret = dev_get_phys_port_name(netdev, name, sizeof(name));
598 if (!ret)
599 ret = sysfs_emit(buf, "%s\n", name);
601 rtnl_unlock();
603 return ret;
605 static DEVICE_ATTR_RO(phys_port_name);
607 static ssize_t phys_switch_id_show(struct device *dev,
608 struct device_attribute *attr, char *buf)
610 struct net_device *netdev = to_net_dev(dev);
611 ssize_t ret = -EINVAL;
613 /* The checks are also done in dev_get_phys_port_name; this helps
614 * returning early without hitting the trylock/restart below. This works
615 * because recurse is false when calling dev_get_port_parent_id.
617 if (!netdev->netdev_ops->ndo_get_port_parent_id &&
618 !netdev->devlink_port)
619 return -EOPNOTSUPP;
621 if (!rtnl_trylock())
622 return restart_syscall();
624 if (dev_isalive(netdev)) {
625 struct netdev_phys_item_id ppid = { };
627 ret = dev_get_port_parent_id(netdev, &ppid, false);
628 if (!ret)
629 ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id);
631 rtnl_unlock();
633 return ret;
635 static DEVICE_ATTR_RO(phys_switch_id);
637 static ssize_t threaded_show(struct device *dev,
638 struct device_attribute *attr, char *buf)
640 struct net_device *netdev = to_net_dev(dev);
641 ssize_t ret = -EINVAL;
643 rcu_read_lock();
645 if (dev_isalive(netdev))
646 ret = sysfs_emit(buf, fmt_dec, READ_ONCE(netdev->threaded));
648 rcu_read_unlock();
650 return ret;
653 static int modify_napi_threaded(struct net_device *dev, unsigned long val)
655 int ret;
657 if (list_empty(&dev->napi_list))
658 return -EOPNOTSUPP;
660 if (val != 0 && val != 1)
661 return -EOPNOTSUPP;
663 ret = dev_set_threaded(dev, val);
665 return ret;
668 static ssize_t threaded_store(struct device *dev,
669 struct device_attribute *attr,
670 const char *buf, size_t len)
672 return netdev_lock_store(dev, attr, buf, len, modify_napi_threaded);
674 static DEVICE_ATTR_RW(threaded);
676 static struct attribute *net_class_attrs[] __ro_after_init = {
677 &dev_attr_netdev_group.attr,
678 &dev_attr_type.attr,
679 &dev_attr_dev_id.attr,
680 &dev_attr_dev_port.attr,
681 &dev_attr_iflink.attr,
682 &dev_attr_ifindex.attr,
683 &dev_attr_name_assign_type.attr,
684 &dev_attr_addr_assign_type.attr,
685 &dev_attr_addr_len.attr,
686 &dev_attr_link_mode.attr,
687 &dev_attr_address.attr,
688 &dev_attr_broadcast.attr,
689 &dev_attr_speed.attr,
690 &dev_attr_duplex.attr,
691 &dev_attr_dormant.attr,
692 &dev_attr_testing.attr,
693 &dev_attr_operstate.attr,
694 &dev_attr_carrier_changes.attr,
695 &dev_attr_ifalias.attr,
696 &dev_attr_carrier.attr,
697 &dev_attr_mtu.attr,
698 &dev_attr_flags.attr,
699 &dev_attr_tx_queue_len.attr,
700 &dev_attr_gro_flush_timeout.attr,
701 &dev_attr_napi_defer_hard_irqs.attr,
702 &dev_attr_phys_port_id.attr,
703 &dev_attr_phys_port_name.attr,
704 &dev_attr_phys_switch_id.attr,
705 &dev_attr_proto_down.attr,
706 &dev_attr_carrier_up_count.attr,
707 &dev_attr_carrier_down_count.attr,
708 &dev_attr_threaded.attr,
709 NULL,
711 ATTRIBUTE_GROUPS(net_class);
713 /* Show a given an attribute in the statistics group */
714 static ssize_t netstat_show(const struct device *d,
715 struct device_attribute *attr, char *buf,
716 unsigned long offset)
718 struct net_device *dev = to_net_dev(d);
719 ssize_t ret = -EINVAL;
721 WARN_ON(offset > sizeof(struct rtnl_link_stats64) ||
722 offset % sizeof(u64) != 0);
724 rcu_read_lock();
725 if (dev_isalive(dev)) {
726 struct rtnl_link_stats64 temp;
727 const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
729 ret = sysfs_emit(buf, fmt_u64, *(u64 *)(((u8 *)stats) + offset));
731 rcu_read_unlock();
732 return ret;
735 /* generate a read-only statistics attribute */
736 #define NETSTAT_ENTRY(name) \
737 static ssize_t name##_show(struct device *d, \
738 struct device_attribute *attr, char *buf) \
740 return netstat_show(d, attr, buf, \
741 offsetof(struct rtnl_link_stats64, name)); \
743 static DEVICE_ATTR_RO(name)
745 NETSTAT_ENTRY(rx_packets);
746 NETSTAT_ENTRY(tx_packets);
747 NETSTAT_ENTRY(rx_bytes);
748 NETSTAT_ENTRY(tx_bytes);
749 NETSTAT_ENTRY(rx_errors);
750 NETSTAT_ENTRY(tx_errors);
751 NETSTAT_ENTRY(rx_dropped);
752 NETSTAT_ENTRY(tx_dropped);
753 NETSTAT_ENTRY(multicast);
754 NETSTAT_ENTRY(collisions);
755 NETSTAT_ENTRY(rx_length_errors);
756 NETSTAT_ENTRY(rx_over_errors);
757 NETSTAT_ENTRY(rx_crc_errors);
758 NETSTAT_ENTRY(rx_frame_errors);
759 NETSTAT_ENTRY(rx_fifo_errors);
760 NETSTAT_ENTRY(rx_missed_errors);
761 NETSTAT_ENTRY(tx_aborted_errors);
762 NETSTAT_ENTRY(tx_carrier_errors);
763 NETSTAT_ENTRY(tx_fifo_errors);
764 NETSTAT_ENTRY(tx_heartbeat_errors);
765 NETSTAT_ENTRY(tx_window_errors);
766 NETSTAT_ENTRY(rx_compressed);
767 NETSTAT_ENTRY(tx_compressed);
768 NETSTAT_ENTRY(rx_nohandler);
770 static struct attribute *netstat_attrs[] __ro_after_init = {
771 &dev_attr_rx_packets.attr,
772 &dev_attr_tx_packets.attr,
773 &dev_attr_rx_bytes.attr,
774 &dev_attr_tx_bytes.attr,
775 &dev_attr_rx_errors.attr,
776 &dev_attr_tx_errors.attr,
777 &dev_attr_rx_dropped.attr,
778 &dev_attr_tx_dropped.attr,
779 &dev_attr_multicast.attr,
780 &dev_attr_collisions.attr,
781 &dev_attr_rx_length_errors.attr,
782 &dev_attr_rx_over_errors.attr,
783 &dev_attr_rx_crc_errors.attr,
784 &dev_attr_rx_frame_errors.attr,
785 &dev_attr_rx_fifo_errors.attr,
786 &dev_attr_rx_missed_errors.attr,
787 &dev_attr_tx_aborted_errors.attr,
788 &dev_attr_tx_carrier_errors.attr,
789 &dev_attr_tx_fifo_errors.attr,
790 &dev_attr_tx_heartbeat_errors.attr,
791 &dev_attr_tx_window_errors.attr,
792 &dev_attr_rx_compressed.attr,
793 &dev_attr_tx_compressed.attr,
794 &dev_attr_rx_nohandler.attr,
795 NULL
798 static const struct attribute_group netstat_group = {
799 .name = "statistics",
800 .attrs = netstat_attrs,
803 static struct attribute *wireless_attrs[] = {
804 NULL
807 static const struct attribute_group wireless_group = {
808 .name = "wireless",
809 .attrs = wireless_attrs,
812 static bool wireless_group_needed(struct net_device *ndev)
814 #if IS_ENABLED(CONFIG_CFG80211)
815 if (ndev->ieee80211_ptr)
816 return true;
817 #endif
818 #if IS_ENABLED(CONFIG_WIRELESS_EXT)
819 if (ndev->wireless_handlers)
820 return true;
821 #endif
822 return false;
825 #else /* CONFIG_SYSFS */
826 #define net_class_groups NULL
827 #endif /* CONFIG_SYSFS */
829 #ifdef CONFIG_SYSFS
830 #define to_rx_queue_attr(_attr) \
831 container_of(_attr, struct rx_queue_attribute, attr)
833 #define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj)
835 static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr,
836 char *buf)
838 const struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
839 struct netdev_rx_queue *queue = to_rx_queue(kobj);
841 if (!attribute->show)
842 return -EIO;
844 return attribute->show(queue, buf);
847 static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
848 const char *buf, size_t count)
850 const struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
851 struct netdev_rx_queue *queue = to_rx_queue(kobj);
853 if (!attribute->store)
854 return -EIO;
856 return attribute->store(queue, buf, count);
859 static const struct sysfs_ops rx_queue_sysfs_ops = {
860 .show = rx_queue_attr_show,
861 .store = rx_queue_attr_store,
864 #ifdef CONFIG_RPS
865 static ssize_t show_rps_map(struct netdev_rx_queue *queue, char *buf)
867 struct rps_map *map;
868 cpumask_var_t mask;
869 int i, len;
871 if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
872 return -ENOMEM;
874 rcu_read_lock();
875 map = rcu_dereference(queue->rps_map);
876 if (map)
877 for (i = 0; i < map->len; i++)
878 cpumask_set_cpu(map->cpus[i], mask);
880 len = sysfs_emit(buf, "%*pb\n", cpumask_pr_args(mask));
881 rcu_read_unlock();
882 free_cpumask_var(mask);
884 return len < PAGE_SIZE ? len : -EINVAL;
887 static int netdev_rx_queue_set_rps_mask(struct netdev_rx_queue *queue,
888 cpumask_var_t mask)
890 static DEFINE_MUTEX(rps_map_mutex);
891 struct rps_map *old_map, *map;
892 int cpu, i;
894 map = kzalloc(max_t(unsigned int,
895 RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
896 GFP_KERNEL);
897 if (!map)
898 return -ENOMEM;
900 i = 0;
901 for_each_cpu_and(cpu, mask, cpu_online_mask)
902 map->cpus[i++] = cpu;
904 if (i) {
905 map->len = i;
906 } else {
907 kfree(map);
908 map = NULL;
911 mutex_lock(&rps_map_mutex);
912 old_map = rcu_dereference_protected(queue->rps_map,
913 mutex_is_locked(&rps_map_mutex));
914 rcu_assign_pointer(queue->rps_map, map);
916 if (map)
917 static_branch_inc(&rps_needed);
918 if (old_map)
919 static_branch_dec(&rps_needed);
921 mutex_unlock(&rps_map_mutex);
923 if (old_map)
924 kfree_rcu(old_map, rcu);
925 return 0;
928 int rps_cpumask_housekeeping(struct cpumask *mask)
930 if (!cpumask_empty(mask)) {
931 cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_DOMAIN));
932 cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_WQ));
933 if (cpumask_empty(mask))
934 return -EINVAL;
936 return 0;
939 static ssize_t store_rps_map(struct netdev_rx_queue *queue,
940 const char *buf, size_t len)
942 cpumask_var_t mask;
943 int err;
945 if (!capable(CAP_NET_ADMIN))
946 return -EPERM;
948 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
949 return -ENOMEM;
951 err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
952 if (err)
953 goto out;
955 err = rps_cpumask_housekeeping(mask);
956 if (err)
957 goto out;
959 err = netdev_rx_queue_set_rps_mask(queue, mask);
961 out:
962 free_cpumask_var(mask);
963 return err ? : len;
966 static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
967 char *buf)
969 struct rps_dev_flow_table *flow_table;
970 unsigned long val = 0;
972 rcu_read_lock();
973 flow_table = rcu_dereference(queue->rps_flow_table);
974 if (flow_table)
975 val = (unsigned long)flow_table->mask + 1;
976 rcu_read_unlock();
978 return sysfs_emit(buf, "%lu\n", val);
981 static void rps_dev_flow_table_release(struct rcu_head *rcu)
983 struct rps_dev_flow_table *table = container_of(rcu,
984 struct rps_dev_flow_table, rcu);
985 vfree(table);
988 static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
989 const char *buf, size_t len)
991 unsigned long mask, count;
992 struct rps_dev_flow_table *table, *old_table;
993 static DEFINE_SPINLOCK(rps_dev_flow_lock);
994 int rc;
996 if (!capable(CAP_NET_ADMIN))
997 return -EPERM;
999 rc = kstrtoul(buf, 0, &count);
1000 if (rc < 0)
1001 return rc;
1003 if (count) {
1004 mask = count - 1;
1005 /* mask = roundup_pow_of_two(count) - 1;
1006 * without overflows...
1008 while ((mask | (mask >> 1)) != mask)
1009 mask |= (mask >> 1);
1010 /* On 64 bit arches, must check mask fits in table->mask (u32),
1011 * and on 32bit arches, must check
1012 * RPS_DEV_FLOW_TABLE_SIZE(mask + 1) doesn't overflow.
1014 #if BITS_PER_LONG > 32
1015 if (mask > (unsigned long)(u32)mask)
1016 return -EINVAL;
1017 #else
1018 if (mask > (ULONG_MAX - RPS_DEV_FLOW_TABLE_SIZE(1))
1019 / sizeof(struct rps_dev_flow)) {
1020 /* Enforce a limit to prevent overflow */
1021 return -EINVAL;
1023 #endif
1024 table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(mask + 1));
1025 if (!table)
1026 return -ENOMEM;
1028 table->mask = mask;
1029 for (count = 0; count <= mask; count++)
1030 table->flows[count].cpu = RPS_NO_CPU;
1031 } else {
1032 table = NULL;
1035 spin_lock(&rps_dev_flow_lock);
1036 old_table = rcu_dereference_protected(queue->rps_flow_table,
1037 lockdep_is_held(&rps_dev_flow_lock));
1038 rcu_assign_pointer(queue->rps_flow_table, table);
1039 spin_unlock(&rps_dev_flow_lock);
1041 if (old_table)
1042 call_rcu(&old_table->rcu, rps_dev_flow_table_release);
1044 return len;
1047 static struct rx_queue_attribute rps_cpus_attribute __ro_after_init
1048 = __ATTR(rps_cpus, 0644, show_rps_map, store_rps_map);
1050 static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute __ro_after_init
1051 = __ATTR(rps_flow_cnt, 0644,
1052 show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
1053 #endif /* CONFIG_RPS */
1055 static struct attribute *rx_queue_default_attrs[] __ro_after_init = {
1056 #ifdef CONFIG_RPS
1057 &rps_cpus_attribute.attr,
1058 &rps_dev_flow_table_cnt_attribute.attr,
1059 #endif
1060 NULL
1062 ATTRIBUTE_GROUPS(rx_queue_default);
1064 static void rx_queue_release(struct kobject *kobj)
1066 struct netdev_rx_queue *queue = to_rx_queue(kobj);
1067 #ifdef CONFIG_RPS
1068 struct rps_map *map;
1069 struct rps_dev_flow_table *flow_table;
1071 map = rcu_dereference_protected(queue->rps_map, 1);
1072 if (map) {
1073 RCU_INIT_POINTER(queue->rps_map, NULL);
1074 kfree_rcu(map, rcu);
1077 flow_table = rcu_dereference_protected(queue->rps_flow_table, 1);
1078 if (flow_table) {
1079 RCU_INIT_POINTER(queue->rps_flow_table, NULL);
1080 call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
1082 #endif
1084 memset(kobj, 0, sizeof(*kobj));
1085 netdev_put(queue->dev, &queue->dev_tracker);
1088 static const void *rx_queue_namespace(const struct kobject *kobj)
1090 struct netdev_rx_queue *queue = to_rx_queue(kobj);
1091 struct device *dev = &queue->dev->dev;
1092 const void *ns = NULL;
1094 if (dev->class && dev->class->namespace)
1095 ns = dev->class->namespace(dev);
1097 return ns;
1100 static void rx_queue_get_ownership(const struct kobject *kobj,
1101 kuid_t *uid, kgid_t *gid)
1103 const struct net *net = rx_queue_namespace(kobj);
1105 net_ns_get_ownership(net, uid, gid);
1108 static const struct kobj_type rx_queue_ktype = {
1109 .sysfs_ops = &rx_queue_sysfs_ops,
1110 .release = rx_queue_release,
1111 .default_groups = rx_queue_default_groups,
1112 .namespace = rx_queue_namespace,
1113 .get_ownership = rx_queue_get_ownership,
1116 static int rx_queue_default_mask(struct net_device *dev,
1117 struct netdev_rx_queue *queue)
1119 #if IS_ENABLED(CONFIG_RPS) && IS_ENABLED(CONFIG_SYSCTL)
1120 struct cpumask *rps_default_mask = READ_ONCE(dev_net(dev)->core.rps_default_mask);
1122 if (rps_default_mask && !cpumask_empty(rps_default_mask))
1123 return netdev_rx_queue_set_rps_mask(queue, rps_default_mask);
1124 #endif
1125 return 0;
1128 static int rx_queue_add_kobject(struct net_device *dev, int index)
1130 struct netdev_rx_queue *queue = dev->_rx + index;
1131 struct kobject *kobj = &queue->kobj;
1132 int error = 0;
1134 /* Kobject_put later will trigger rx_queue_release call which
1135 * decreases dev refcount: Take that reference here
1137 netdev_hold(queue->dev, &queue->dev_tracker, GFP_KERNEL);
1139 kobj->kset = dev->queues_kset;
1140 error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
1141 "rx-%u", index);
1142 if (error)
1143 goto err;
1145 if (dev->sysfs_rx_queue_group) {
1146 error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group);
1147 if (error)
1148 goto err;
1151 error = rx_queue_default_mask(dev, queue);
1152 if (error)
1153 goto err;
1155 kobject_uevent(kobj, KOBJ_ADD);
1157 return error;
1159 err:
1160 kobject_put(kobj);
1161 return error;
1164 static int rx_queue_change_owner(struct net_device *dev, int index, kuid_t kuid,
1165 kgid_t kgid)
1167 struct netdev_rx_queue *queue = dev->_rx + index;
1168 struct kobject *kobj = &queue->kobj;
1169 int error;
1171 error = sysfs_change_owner(kobj, kuid, kgid);
1172 if (error)
1173 return error;
1175 if (dev->sysfs_rx_queue_group)
1176 error = sysfs_group_change_owner(
1177 kobj, dev->sysfs_rx_queue_group, kuid, kgid);
1179 return error;
1181 #endif /* CONFIG_SYSFS */
1184 net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
1186 #ifdef CONFIG_SYSFS
1187 int i;
1188 int error = 0;
1190 #ifndef CONFIG_RPS
1191 if (!dev->sysfs_rx_queue_group)
1192 return 0;
1193 #endif
1194 for (i = old_num; i < new_num; i++) {
1195 error = rx_queue_add_kobject(dev, i);
1196 if (error) {
1197 new_num = old_num;
1198 break;
1202 while (--i >= new_num) {
1203 struct kobject *kobj = &dev->_rx[i].kobj;
1205 if (!refcount_read(&dev_net(dev)->ns.count))
1206 kobj->uevent_suppress = 1;
1207 if (dev->sysfs_rx_queue_group)
1208 sysfs_remove_group(kobj, dev->sysfs_rx_queue_group);
1209 kobject_put(kobj);
1212 return error;
1213 #else
1214 return 0;
1215 #endif
1218 static int net_rx_queue_change_owner(struct net_device *dev, int num,
1219 kuid_t kuid, kgid_t kgid)
1221 #ifdef CONFIG_SYSFS
1222 int error = 0;
1223 int i;
1225 #ifndef CONFIG_RPS
1226 if (!dev->sysfs_rx_queue_group)
1227 return 0;
1228 #endif
1229 for (i = 0; i < num; i++) {
1230 error = rx_queue_change_owner(dev, i, kuid, kgid);
1231 if (error)
1232 break;
1235 return error;
1236 #else
1237 return 0;
1238 #endif
1241 #ifdef CONFIG_SYSFS
1243 * netdev_queue sysfs structures and functions.
1245 struct netdev_queue_attribute {
1246 struct attribute attr;
1247 ssize_t (*show)(struct netdev_queue *queue, char *buf);
1248 ssize_t (*store)(struct netdev_queue *queue,
1249 const char *buf, size_t len);
1251 #define to_netdev_queue_attr(_attr) \
1252 container_of(_attr, struct netdev_queue_attribute, attr)
1254 #define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj)
1256 static ssize_t netdev_queue_attr_show(struct kobject *kobj,
1257 struct attribute *attr, char *buf)
1259 const struct netdev_queue_attribute *attribute
1260 = to_netdev_queue_attr(attr);
1261 struct netdev_queue *queue = to_netdev_queue(kobj);
1263 if (!attribute->show)
1264 return -EIO;
1266 return attribute->show(queue, buf);
1269 static ssize_t netdev_queue_attr_store(struct kobject *kobj,
1270 struct attribute *attr,
1271 const char *buf, size_t count)
1273 const struct netdev_queue_attribute *attribute
1274 = to_netdev_queue_attr(attr);
1275 struct netdev_queue *queue = to_netdev_queue(kobj);
1277 if (!attribute->store)
1278 return -EIO;
1280 return attribute->store(queue, buf, count);
1283 static const struct sysfs_ops netdev_queue_sysfs_ops = {
1284 .show = netdev_queue_attr_show,
1285 .store = netdev_queue_attr_store,
1288 static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf)
1290 unsigned long trans_timeout = atomic_long_read(&queue->trans_timeout);
1292 return sysfs_emit(buf, fmt_ulong, trans_timeout);
1295 static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
1297 struct net_device *dev = queue->dev;
1298 unsigned int i;
1300 i = queue - dev->_tx;
1301 BUG_ON(i >= dev->num_tx_queues);
1303 return i;
1306 static ssize_t traffic_class_show(struct netdev_queue *queue,
1307 char *buf)
1309 struct net_device *dev = queue->dev;
1310 int num_tc, tc;
1311 int index;
1313 if (!netif_is_multiqueue(dev))
1314 return -ENOENT;
1316 if (!rtnl_trylock())
1317 return restart_syscall();
1319 index = get_netdev_queue_index(queue);
1321 /* If queue belongs to subordinate dev use its TC mapping */
1322 dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
1324 num_tc = dev->num_tc;
1325 tc = netdev_txq_to_tc(dev, index);
1327 rtnl_unlock();
1329 if (tc < 0)
1330 return -EINVAL;
1332 /* We can report the traffic class one of two ways:
1333 * Subordinate device traffic classes are reported with the traffic
1334 * class first, and then the subordinate class so for example TC0 on
1335 * subordinate device 2 will be reported as "0-2". If the queue
1336 * belongs to the root device it will be reported with just the
1337 * traffic class, so just "0" for TC 0 for example.
1339 return num_tc < 0 ? sysfs_emit(buf, "%d%d\n", tc, num_tc) :
1340 sysfs_emit(buf, "%d\n", tc);
1343 #ifdef CONFIG_XPS
1344 static ssize_t tx_maxrate_show(struct netdev_queue *queue,
1345 char *buf)
1347 return sysfs_emit(buf, "%lu\n", queue->tx_maxrate);
1350 static ssize_t tx_maxrate_store(struct netdev_queue *queue,
1351 const char *buf, size_t len)
1353 struct net_device *dev = queue->dev;
1354 int err, index = get_netdev_queue_index(queue);
1355 u32 rate = 0;
1357 if (!capable(CAP_NET_ADMIN))
1358 return -EPERM;
1360 /* The check is also done later; this helps returning early without
1361 * hitting the trylock/restart below.
1363 if (!dev->netdev_ops->ndo_set_tx_maxrate)
1364 return -EOPNOTSUPP;
1366 err = kstrtou32(buf, 10, &rate);
1367 if (err < 0)
1368 return err;
1370 if (!rtnl_trylock())
1371 return restart_syscall();
1373 err = -EOPNOTSUPP;
1374 if (dev->netdev_ops->ndo_set_tx_maxrate)
1375 err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate);
1377 rtnl_unlock();
1378 if (!err) {
1379 queue->tx_maxrate = rate;
1380 return len;
1382 return err;
1385 static struct netdev_queue_attribute queue_tx_maxrate __ro_after_init
1386 = __ATTR_RW(tx_maxrate);
1387 #endif
1389 static struct netdev_queue_attribute queue_trans_timeout __ro_after_init
1390 = __ATTR_RO(tx_timeout);
1392 static struct netdev_queue_attribute queue_traffic_class __ro_after_init
1393 = __ATTR_RO(traffic_class);
1395 #ifdef CONFIG_BQL
1397 * Byte queue limits sysfs structures and functions.
1399 static ssize_t bql_show(char *buf, unsigned int value)
1401 return sysfs_emit(buf, "%u\n", value);
1404 static ssize_t bql_set(const char *buf, const size_t count,
1405 unsigned int *pvalue)
1407 unsigned int value;
1408 int err;
1410 if (!strcmp(buf, "max") || !strcmp(buf, "max\n")) {
1411 value = DQL_MAX_LIMIT;
1412 } else {
1413 err = kstrtouint(buf, 10, &value);
1414 if (err < 0)
1415 return err;
1416 if (value > DQL_MAX_LIMIT)
1417 return -EINVAL;
1420 *pvalue = value;
1422 return count;
1425 static ssize_t bql_show_hold_time(struct netdev_queue *queue,
1426 char *buf)
1428 struct dql *dql = &queue->dql;
1430 return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time));
1433 static ssize_t bql_set_hold_time(struct netdev_queue *queue,
1434 const char *buf, size_t len)
1436 struct dql *dql = &queue->dql;
1437 unsigned int value;
1438 int err;
1440 err = kstrtouint(buf, 10, &value);
1441 if (err < 0)
1442 return err;
1444 dql->slack_hold_time = msecs_to_jiffies(value);
1446 return len;
1449 static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init
1450 = __ATTR(hold_time, 0644,
1451 bql_show_hold_time, bql_set_hold_time);
1453 static ssize_t bql_show_stall_thrs(struct netdev_queue *queue, char *buf)
1455 struct dql *dql = &queue->dql;
1457 return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->stall_thrs));
1460 static ssize_t bql_set_stall_thrs(struct netdev_queue *queue,
1461 const char *buf, size_t len)
1463 struct dql *dql = &queue->dql;
1464 unsigned int value;
1465 int err;
1467 err = kstrtouint(buf, 10, &value);
1468 if (err < 0)
1469 return err;
1471 value = msecs_to_jiffies(value);
1472 if (value && (value < 4 || value > 4 / 2 * BITS_PER_LONG))
1473 return -ERANGE;
1475 if (!dql->stall_thrs && value)
1476 dql->last_reap = jiffies;
1477 /* Force last_reap to be live */
1478 smp_wmb();
1479 dql->stall_thrs = value;
1481 return len;
1484 static struct netdev_queue_attribute bql_stall_thrs_attribute __ro_after_init =
1485 __ATTR(stall_thrs, 0644, bql_show_stall_thrs, bql_set_stall_thrs);
1487 static ssize_t bql_show_stall_max(struct netdev_queue *queue, char *buf)
1489 return sysfs_emit(buf, "%u\n", READ_ONCE(queue->dql.stall_max));
1492 static ssize_t bql_set_stall_max(struct netdev_queue *queue,
1493 const char *buf, size_t len)
1495 WRITE_ONCE(queue->dql.stall_max, 0);
1496 return len;
1499 static struct netdev_queue_attribute bql_stall_max_attribute __ro_after_init =
1500 __ATTR(stall_max, 0644, bql_show_stall_max, bql_set_stall_max);
1502 static ssize_t bql_show_stall_cnt(struct netdev_queue *queue, char *buf)
1504 struct dql *dql = &queue->dql;
1506 return sysfs_emit(buf, "%lu\n", dql->stall_cnt);
1509 static struct netdev_queue_attribute bql_stall_cnt_attribute __ro_after_init =
1510 __ATTR(stall_cnt, 0444, bql_show_stall_cnt, NULL);
1512 static ssize_t bql_show_inflight(struct netdev_queue *queue,
1513 char *buf)
1515 struct dql *dql = &queue->dql;
1517 return sysfs_emit(buf, "%u\n", dql->num_queued - dql->num_completed);
1520 static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init =
1521 __ATTR(inflight, 0444, bql_show_inflight, NULL);
1523 #define BQL_ATTR(NAME, FIELD) \
1524 static ssize_t bql_show_ ## NAME(struct netdev_queue *queue, \
1525 char *buf) \
1527 return bql_show(buf, queue->dql.FIELD); \
1530 static ssize_t bql_set_ ## NAME(struct netdev_queue *queue, \
1531 const char *buf, size_t len) \
1533 return bql_set(buf, len, &queue->dql.FIELD); \
1536 static struct netdev_queue_attribute bql_ ## NAME ## _attribute __ro_after_init \
1537 = __ATTR(NAME, 0644, \
1538 bql_show_ ## NAME, bql_set_ ## NAME)
1540 BQL_ATTR(limit, limit);
1541 BQL_ATTR(limit_max, max_limit);
1542 BQL_ATTR(limit_min, min_limit);
1544 static struct attribute *dql_attrs[] __ro_after_init = {
1545 &bql_limit_attribute.attr,
1546 &bql_limit_max_attribute.attr,
1547 &bql_limit_min_attribute.attr,
1548 &bql_hold_time_attribute.attr,
1549 &bql_inflight_attribute.attr,
1550 &bql_stall_thrs_attribute.attr,
1551 &bql_stall_cnt_attribute.attr,
1552 &bql_stall_max_attribute.attr,
1553 NULL
1556 static const struct attribute_group dql_group = {
1557 .name = "byte_queue_limits",
1558 .attrs = dql_attrs,
1560 #else
1561 /* Fake declaration, all the code using it should be dead */
1562 static const struct attribute_group dql_group = {};
1563 #endif /* CONFIG_BQL */
1565 #ifdef CONFIG_XPS
1566 static ssize_t xps_queue_show(struct net_device *dev, unsigned int index,
1567 int tc, char *buf, enum xps_map_type type)
1569 struct xps_dev_maps *dev_maps;
1570 unsigned long *mask;
1571 unsigned int nr_ids;
1572 int j, len;
1574 rcu_read_lock();
1575 dev_maps = rcu_dereference(dev->xps_maps[type]);
1577 /* Default to nr_cpu_ids/dev->num_rx_queues and do not just return 0
1578 * when dev_maps hasn't been allocated yet, to be backward compatible.
1580 nr_ids = dev_maps ? dev_maps->nr_ids :
1581 (type == XPS_CPUS ? nr_cpu_ids : dev->num_rx_queues);
1583 mask = bitmap_zalloc(nr_ids, GFP_NOWAIT);
1584 if (!mask) {
1585 rcu_read_unlock();
1586 return -ENOMEM;
1589 if (!dev_maps || tc >= dev_maps->num_tc)
1590 goto out_no_maps;
1592 for (j = 0; j < nr_ids; j++) {
1593 int i, tci = j * dev_maps->num_tc + tc;
1594 struct xps_map *map;
1596 map = rcu_dereference(dev_maps->attr_map[tci]);
1597 if (!map)
1598 continue;
1600 for (i = map->len; i--;) {
1601 if (map->queues[i] == index) {
1602 __set_bit(j, mask);
1603 break;
1607 out_no_maps:
1608 rcu_read_unlock();
1610 len = bitmap_print_to_pagebuf(false, buf, mask, nr_ids);
1611 bitmap_free(mask);
1613 return len < PAGE_SIZE ? len : -EINVAL;
1616 static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf)
1618 struct net_device *dev = queue->dev;
1619 unsigned int index;
1620 int len, tc;
1622 if (!netif_is_multiqueue(dev))
1623 return -ENOENT;
1625 index = get_netdev_queue_index(queue);
1627 if (!rtnl_trylock())
1628 return restart_syscall();
1630 /* If queue belongs to subordinate dev use its map */
1631 dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
1633 tc = netdev_txq_to_tc(dev, index);
1634 if (tc < 0) {
1635 rtnl_unlock();
1636 return -EINVAL;
1639 /* Make sure the subordinate device can't be freed */
1640 get_device(&dev->dev);
1641 rtnl_unlock();
1643 len = xps_queue_show(dev, index, tc, buf, XPS_CPUS);
1645 put_device(&dev->dev);
1646 return len;
1649 static ssize_t xps_cpus_store(struct netdev_queue *queue,
1650 const char *buf, size_t len)
1652 struct net_device *dev = queue->dev;
1653 unsigned int index;
1654 cpumask_var_t mask;
1655 int err;
1657 if (!netif_is_multiqueue(dev))
1658 return -ENOENT;
1660 if (!capable(CAP_NET_ADMIN))
1661 return -EPERM;
1663 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
1664 return -ENOMEM;
1666 index = get_netdev_queue_index(queue);
1668 err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
1669 if (err) {
1670 free_cpumask_var(mask);
1671 return err;
1674 if (!rtnl_trylock()) {
1675 free_cpumask_var(mask);
1676 return restart_syscall();
1679 err = netif_set_xps_queue(dev, mask, index);
1680 rtnl_unlock();
1682 free_cpumask_var(mask);
1684 return err ? : len;
1687 static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
1688 = __ATTR_RW(xps_cpus);
1690 static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
1692 struct net_device *dev = queue->dev;
1693 unsigned int index;
1694 int tc;
1696 index = get_netdev_queue_index(queue);
1698 if (!rtnl_trylock())
1699 return restart_syscall();
1701 tc = netdev_txq_to_tc(dev, index);
1702 rtnl_unlock();
1703 if (tc < 0)
1704 return -EINVAL;
1706 return xps_queue_show(dev, index, tc, buf, XPS_RXQS);
1709 static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
1710 size_t len)
1712 struct net_device *dev = queue->dev;
1713 struct net *net = dev_net(dev);
1714 unsigned long *mask;
1715 unsigned int index;
1716 int err;
1718 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1719 return -EPERM;
1721 mask = bitmap_zalloc(dev->num_rx_queues, GFP_KERNEL);
1722 if (!mask)
1723 return -ENOMEM;
1725 index = get_netdev_queue_index(queue);
1727 err = bitmap_parse(buf, len, mask, dev->num_rx_queues);
1728 if (err) {
1729 bitmap_free(mask);
1730 return err;
1733 if (!rtnl_trylock()) {
1734 bitmap_free(mask);
1735 return restart_syscall();
1738 cpus_read_lock();
1739 err = __netif_set_xps_queue(dev, mask, index, XPS_RXQS);
1740 cpus_read_unlock();
1742 rtnl_unlock();
1744 bitmap_free(mask);
1745 return err ? : len;
1748 static struct netdev_queue_attribute xps_rxqs_attribute __ro_after_init
1749 = __ATTR_RW(xps_rxqs);
1750 #endif /* CONFIG_XPS */
1752 static struct attribute *netdev_queue_default_attrs[] __ro_after_init = {
1753 &queue_trans_timeout.attr,
1754 &queue_traffic_class.attr,
1755 #ifdef CONFIG_XPS
1756 &xps_cpus_attribute.attr,
1757 &xps_rxqs_attribute.attr,
1758 &queue_tx_maxrate.attr,
1759 #endif
1760 NULL
1762 ATTRIBUTE_GROUPS(netdev_queue_default);
1764 static void netdev_queue_release(struct kobject *kobj)
1766 struct netdev_queue *queue = to_netdev_queue(kobj);
1768 memset(kobj, 0, sizeof(*kobj));
1769 netdev_put(queue->dev, &queue->dev_tracker);
1772 static const void *netdev_queue_namespace(const struct kobject *kobj)
1774 struct netdev_queue *queue = to_netdev_queue(kobj);
1775 struct device *dev = &queue->dev->dev;
1776 const void *ns = NULL;
1778 if (dev->class && dev->class->namespace)
1779 ns = dev->class->namespace(dev);
1781 return ns;
1784 static void netdev_queue_get_ownership(const struct kobject *kobj,
1785 kuid_t *uid, kgid_t *gid)
1787 const struct net *net = netdev_queue_namespace(kobj);
1789 net_ns_get_ownership(net, uid, gid);
1792 static const struct kobj_type netdev_queue_ktype = {
1793 .sysfs_ops = &netdev_queue_sysfs_ops,
1794 .release = netdev_queue_release,
1795 .default_groups = netdev_queue_default_groups,
1796 .namespace = netdev_queue_namespace,
1797 .get_ownership = netdev_queue_get_ownership,
1800 static bool netdev_uses_bql(const struct net_device *dev)
1802 if (dev->lltx || (dev->priv_flags & IFF_NO_QUEUE))
1803 return false;
1805 return IS_ENABLED(CONFIG_BQL);
1808 static int netdev_queue_add_kobject(struct net_device *dev, int index)
1810 struct netdev_queue *queue = dev->_tx + index;
1811 struct kobject *kobj = &queue->kobj;
1812 int error = 0;
1814 /* Kobject_put later will trigger netdev_queue_release call
1815 * which decreases dev refcount: Take that reference here
1817 netdev_hold(queue->dev, &queue->dev_tracker, GFP_KERNEL);
1819 kobj->kset = dev->queues_kset;
1820 error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
1821 "tx-%u", index);
1822 if (error)
1823 goto err;
1825 if (netdev_uses_bql(dev)) {
1826 error = sysfs_create_group(kobj, &dql_group);
1827 if (error)
1828 goto err;
1831 kobject_uevent(kobj, KOBJ_ADD);
1832 return 0;
1834 err:
1835 kobject_put(kobj);
1836 return error;
1839 static int tx_queue_change_owner(struct net_device *ndev, int index,
1840 kuid_t kuid, kgid_t kgid)
1842 struct netdev_queue *queue = ndev->_tx + index;
1843 struct kobject *kobj = &queue->kobj;
1844 int error;
1846 error = sysfs_change_owner(kobj, kuid, kgid);
1847 if (error)
1848 return error;
1850 if (netdev_uses_bql(ndev))
1851 error = sysfs_group_change_owner(kobj, &dql_group, kuid, kgid);
1853 return error;
1855 #endif /* CONFIG_SYSFS */
1858 netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
1860 #ifdef CONFIG_SYSFS
1861 int i;
1862 int error = 0;
1864 /* Tx queue kobjects are allowed to be updated when a device is being
1865 * unregistered, but solely to remove queues from qdiscs. Any path
1866 * adding queues should be fixed.
1868 WARN(dev->reg_state == NETREG_UNREGISTERING && new_num > old_num,
1869 "New queues can't be registered after device unregistration.");
1871 for (i = old_num; i < new_num; i++) {
1872 error = netdev_queue_add_kobject(dev, i);
1873 if (error) {
1874 new_num = old_num;
1875 break;
1879 while (--i >= new_num) {
1880 struct netdev_queue *queue = dev->_tx + i;
1882 if (!refcount_read(&dev_net(dev)->ns.count))
1883 queue->kobj.uevent_suppress = 1;
1885 if (netdev_uses_bql(dev))
1886 sysfs_remove_group(&queue->kobj, &dql_group);
1888 kobject_put(&queue->kobj);
1891 return error;
1892 #else
1893 return 0;
1894 #endif /* CONFIG_SYSFS */
1897 static int net_tx_queue_change_owner(struct net_device *dev, int num,
1898 kuid_t kuid, kgid_t kgid)
1900 #ifdef CONFIG_SYSFS
1901 int error = 0;
1902 int i;
1904 for (i = 0; i < num; i++) {
1905 error = tx_queue_change_owner(dev, i, kuid, kgid);
1906 if (error)
1907 break;
1910 return error;
1911 #else
1912 return 0;
1913 #endif /* CONFIG_SYSFS */
1916 static int register_queue_kobjects(struct net_device *dev)
1918 int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0;
1920 #ifdef CONFIG_SYSFS
1921 dev->queues_kset = kset_create_and_add("queues",
1922 NULL, &dev->dev.kobj);
1923 if (!dev->queues_kset)
1924 return -ENOMEM;
1925 real_rx = dev->real_num_rx_queues;
1926 #endif
1927 real_tx = dev->real_num_tx_queues;
1929 error = net_rx_queue_update_kobjects(dev, 0, real_rx);
1930 if (error)
1931 goto error;
1932 rxq = real_rx;
1934 error = netdev_queue_update_kobjects(dev, 0, real_tx);
1935 if (error)
1936 goto error;
1937 txq = real_tx;
1939 return 0;
1941 error:
1942 netdev_queue_update_kobjects(dev, txq, 0);
1943 net_rx_queue_update_kobjects(dev, rxq, 0);
1944 #ifdef CONFIG_SYSFS
1945 kset_unregister(dev->queues_kset);
1946 #endif
1947 return error;
1950 static int queue_change_owner(struct net_device *ndev, kuid_t kuid, kgid_t kgid)
1952 int error = 0, real_rx = 0, real_tx = 0;
1954 #ifdef CONFIG_SYSFS
1955 if (ndev->queues_kset) {
1956 error = sysfs_change_owner(&ndev->queues_kset->kobj, kuid, kgid);
1957 if (error)
1958 return error;
1960 real_rx = ndev->real_num_rx_queues;
1961 #endif
1962 real_tx = ndev->real_num_tx_queues;
1964 error = net_rx_queue_change_owner(ndev, real_rx, kuid, kgid);
1965 if (error)
1966 return error;
1968 error = net_tx_queue_change_owner(ndev, real_tx, kuid, kgid);
1969 if (error)
1970 return error;
1972 return 0;
1975 static void remove_queue_kobjects(struct net_device *dev)
1977 int real_rx = 0, real_tx = 0;
1979 #ifdef CONFIG_SYSFS
1980 real_rx = dev->real_num_rx_queues;
1981 #endif
1982 real_tx = dev->real_num_tx_queues;
1984 net_rx_queue_update_kobjects(dev, real_rx, 0);
1985 netdev_queue_update_kobjects(dev, real_tx, 0);
1987 dev->real_num_rx_queues = 0;
1988 dev->real_num_tx_queues = 0;
1989 #ifdef CONFIG_SYSFS
1990 kset_unregister(dev->queues_kset);
1991 #endif
1994 static bool net_current_may_mount(void)
1996 struct net *net = current->nsproxy->net_ns;
1998 return ns_capable(net->user_ns, CAP_SYS_ADMIN);
2001 static void *net_grab_current_ns(void)
2003 struct net *ns = current->nsproxy->net_ns;
2004 #ifdef CONFIG_NET_NS
2005 if (ns)
2006 refcount_inc(&ns->passive);
2007 #endif
2008 return ns;
2011 static const void *net_initial_ns(void)
2013 return &init_net;
2016 static const void *net_netlink_ns(struct sock *sk)
2018 return sock_net(sk);
2021 const struct kobj_ns_type_operations net_ns_type_operations = {
2022 .type = KOBJ_NS_TYPE_NET,
2023 .current_may_mount = net_current_may_mount,
2024 .grab_current_ns = net_grab_current_ns,
2025 .netlink_ns = net_netlink_ns,
2026 .initial_ns = net_initial_ns,
2027 .drop_ns = net_drop_ns,
2029 EXPORT_SYMBOL_GPL(net_ns_type_operations);
2031 static int netdev_uevent(const struct device *d, struct kobj_uevent_env *env)
2033 const struct net_device *dev = to_net_dev(d);
2034 int retval;
2036 /* pass interface to uevent. */
2037 retval = add_uevent_var(env, "INTERFACE=%s", dev->name);
2038 if (retval)
2039 goto exit;
2041 /* pass ifindex to uevent.
2042 * ifindex is useful as it won't change (interface name may change)
2043 * and is what RtNetlink uses natively.
2045 retval = add_uevent_var(env, "IFINDEX=%d", dev->ifindex);
2047 exit:
2048 return retval;
2052 * netdev_release -- destroy and free a dead device.
2053 * Called when last reference to device kobject is gone.
2055 static void netdev_release(struct device *d)
2057 struct net_device *dev = to_net_dev(d);
2059 BUG_ON(dev->reg_state != NETREG_RELEASED);
2061 /* no need to wait for rcu grace period:
2062 * device is dead and about to be freed.
2064 kfree(rcu_access_pointer(dev->ifalias));
2065 kvfree(dev);
2068 static const void *net_namespace(const struct device *d)
2070 const struct net_device *dev = to_net_dev(d);
2072 return dev_net(dev);
2075 static void net_get_ownership(const struct device *d, kuid_t *uid, kgid_t *gid)
2077 const struct net_device *dev = to_net_dev(d);
2078 const struct net *net = dev_net(dev);
2080 net_ns_get_ownership(net, uid, gid);
2083 static const struct class net_class = {
2084 .name = "net",
2085 .dev_release = netdev_release,
2086 .dev_groups = net_class_groups,
2087 .dev_uevent = netdev_uevent,
2088 .ns_type = &net_ns_type_operations,
2089 .namespace = net_namespace,
2090 .get_ownership = net_get_ownership,
2093 #ifdef CONFIG_OF
2094 static int of_dev_node_match(struct device *dev, const void *data)
2096 for (; dev; dev = dev->parent) {
2097 if (dev->of_node == data)
2098 return 1;
2101 return 0;
2105 * of_find_net_device_by_node - lookup the net device for the device node
2106 * @np: OF device node
2108 * Looks up the net_device structure corresponding with the device node.
2109 * If successful, returns a pointer to the net_device with the embedded
2110 * struct device refcount incremented by one, or NULL on failure. The
2111 * refcount must be dropped when done with the net_device.
2113 struct net_device *of_find_net_device_by_node(struct device_node *np)
2115 struct device *dev;
2117 dev = class_find_device(&net_class, NULL, np, of_dev_node_match);
2118 if (!dev)
2119 return NULL;
2121 return to_net_dev(dev);
2123 EXPORT_SYMBOL(of_find_net_device_by_node);
2124 #endif
2126 /* Delete sysfs entries but hold kobject reference until after all
2127 * netdev references are gone.
2129 void netdev_unregister_kobject(struct net_device *ndev)
2131 struct device *dev = &ndev->dev;
2133 if (!refcount_read(&dev_net(ndev)->ns.count))
2134 dev_set_uevent_suppress(dev, 1);
2136 kobject_get(&dev->kobj);
2138 remove_queue_kobjects(ndev);
2140 pm_runtime_set_memalloc_noio(dev, false);
2142 device_del(dev);
2145 /* Create sysfs entries for network device. */
2146 int netdev_register_kobject(struct net_device *ndev)
2148 struct device *dev = &ndev->dev;
2149 const struct attribute_group **groups = ndev->sysfs_groups;
2150 int error = 0;
2152 device_initialize(dev);
2153 dev->class = &net_class;
2154 dev->platform_data = ndev;
2155 dev->groups = groups;
2157 dev_set_name(dev, "%s", ndev->name);
2159 #ifdef CONFIG_SYSFS
2160 /* Allow for a device specific group */
2161 if (*groups)
2162 groups++;
2164 *groups++ = &netstat_group;
2166 if (wireless_group_needed(ndev))
2167 *groups++ = &wireless_group;
2168 #endif /* CONFIG_SYSFS */
2170 error = device_add(dev);
2171 if (error)
2172 return error;
2174 error = register_queue_kobjects(ndev);
2175 if (error) {
2176 device_del(dev);
2177 return error;
2180 pm_runtime_set_memalloc_noio(dev, true);
2182 return error;
2185 /* Change owner for sysfs entries when moving network devices across network
2186 * namespaces owned by different user namespaces.
2188 int netdev_change_owner(struct net_device *ndev, const struct net *net_old,
2189 const struct net *net_new)
2191 kuid_t old_uid = GLOBAL_ROOT_UID, new_uid = GLOBAL_ROOT_UID;
2192 kgid_t old_gid = GLOBAL_ROOT_GID, new_gid = GLOBAL_ROOT_GID;
2193 struct device *dev = &ndev->dev;
2194 int error;
2196 net_ns_get_ownership(net_old, &old_uid, &old_gid);
2197 net_ns_get_ownership(net_new, &new_uid, &new_gid);
2199 /* The network namespace was changed but the owning user namespace is
2200 * identical so there's no need to change the owner of sysfs entries.
2202 if (uid_eq(old_uid, new_uid) && gid_eq(old_gid, new_gid))
2203 return 0;
2205 error = device_change_owner(dev, new_uid, new_gid);
2206 if (error)
2207 return error;
2209 error = queue_change_owner(ndev, new_uid, new_gid);
2210 if (error)
2211 return error;
2213 return 0;
2216 int netdev_class_create_file_ns(const struct class_attribute *class_attr,
2217 const void *ns)
2219 return class_create_file_ns(&net_class, class_attr, ns);
2221 EXPORT_SYMBOL(netdev_class_create_file_ns);
2223 void netdev_class_remove_file_ns(const struct class_attribute *class_attr,
2224 const void *ns)
2226 class_remove_file_ns(&net_class, class_attr, ns);
2228 EXPORT_SYMBOL(netdev_class_remove_file_ns);
2230 int __init netdev_kobject_init(void)
2232 kobj_ns_type_register(&net_ns_type_operations);
2233 return class_register(&net_class);