2 * net/switchdev/switchdev.c - Switch device API
3 * Copyright (c) 2014-2015 Jiri Pirko <jiri@resnulli.us>
4 * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
12 #include <linux/kernel.h>
13 #include <linux/types.h>
14 #include <linux/init.h>
15 #include <linux/mutex.h>
16 #include <linux/notifier.h>
17 #include <linux/netdevice.h>
18 #include <linux/etherdevice.h>
19 #include <linux/if_bridge.h>
20 #include <linux/list.h>
21 #include <linux/workqueue.h>
22 #include <linux/if_vlan.h>
23 #include <linux/rtnetlink.h>
24 #include <net/ip_fib.h>
25 #include <net/switchdev.h>
28 * switchdev_trans_item_enqueue - Enqueue data item to transaction queue
31 * @data: pointer to data being queued
32 * @destructor: data destructor
33 * @tritem: transaction item being queued
35 * Enqeueue data item to transaction queue. tritem is typically placed in
36 * cointainter pointed at by data pointer. Destructor is called on
37 * transaction abort and after successful commit phase in case
38 * the caller did not dequeue the item before.
40 void switchdev_trans_item_enqueue(struct switchdev_trans
*trans
,
41 void *data
, void (*destructor
)(void const *),
42 struct switchdev_trans_item
*tritem
)
45 tritem
->destructor
= destructor
;
46 list_add_tail(&tritem
->list
, &trans
->item_list
);
48 EXPORT_SYMBOL_GPL(switchdev_trans_item_enqueue
);
50 static struct switchdev_trans_item
*
51 __switchdev_trans_item_dequeue(struct switchdev_trans
*trans
)
53 struct switchdev_trans_item
*tritem
;
55 if (list_empty(&trans
->item_list
))
57 tritem
= list_first_entry(&trans
->item_list
,
58 struct switchdev_trans_item
, list
);
59 list_del(&tritem
->list
);
64 * switchdev_trans_item_dequeue - Dequeue data item from transaction queue
68 void *switchdev_trans_item_dequeue(struct switchdev_trans
*trans
)
70 struct switchdev_trans_item
*tritem
;
72 tritem
= __switchdev_trans_item_dequeue(trans
);
76 EXPORT_SYMBOL_GPL(switchdev_trans_item_dequeue
);
78 static void switchdev_trans_init(struct switchdev_trans
*trans
)
80 INIT_LIST_HEAD(&trans
->item_list
);
83 static void switchdev_trans_items_destroy(struct switchdev_trans
*trans
)
85 struct switchdev_trans_item
*tritem
;
87 while ((tritem
= __switchdev_trans_item_dequeue(trans
)))
88 tritem
->destructor(tritem
->data
);
91 static void switchdev_trans_items_warn_destroy(struct net_device
*dev
,
92 struct switchdev_trans
*trans
)
94 WARN(!list_empty(&trans
->item_list
), "%s: transaction item queue is not empty.\n",
96 switchdev_trans_items_destroy(trans
);
99 static LIST_HEAD(deferred
);
100 static DEFINE_SPINLOCK(deferred_lock
);
102 typedef void switchdev_deferred_func_t(struct net_device
*dev
,
105 struct switchdev_deferred_item
{
106 struct list_head list
;
107 struct net_device
*dev
;
108 switchdev_deferred_func_t
*func
;
109 unsigned long data
[0];
112 static struct switchdev_deferred_item
*switchdev_deferred_dequeue(void)
114 struct switchdev_deferred_item
*dfitem
;
116 spin_lock_bh(&deferred_lock
);
117 if (list_empty(&deferred
)) {
121 dfitem
= list_first_entry(&deferred
,
122 struct switchdev_deferred_item
, list
);
123 list_del(&dfitem
->list
);
125 spin_unlock_bh(&deferred_lock
);
130 * switchdev_deferred_process - Process ops in deferred queue
132 * Called to flush the ops currently queued in deferred ops queue.
133 * rtnl_lock must be held.
135 void switchdev_deferred_process(void)
137 struct switchdev_deferred_item
*dfitem
;
141 while ((dfitem
= switchdev_deferred_dequeue())) {
142 dfitem
->func(dfitem
->dev
, dfitem
->data
);
143 dev_put(dfitem
->dev
);
147 EXPORT_SYMBOL_GPL(switchdev_deferred_process
);
149 static void switchdev_deferred_process_work(struct work_struct
*work
)
152 switchdev_deferred_process();
156 static DECLARE_WORK(deferred_process_work
, switchdev_deferred_process_work
);
158 static int switchdev_deferred_enqueue(struct net_device
*dev
,
159 const void *data
, size_t data_len
,
160 switchdev_deferred_func_t
*func
)
162 struct switchdev_deferred_item
*dfitem
;
164 dfitem
= kmalloc(sizeof(*dfitem
) + data_len
, GFP_ATOMIC
);
169 memcpy(dfitem
->data
, data
, data_len
);
171 spin_lock_bh(&deferred_lock
);
172 list_add_tail(&dfitem
->list
, &deferred
);
173 spin_unlock_bh(&deferred_lock
);
174 schedule_work(&deferred_process_work
);
179 * switchdev_port_attr_get - Get port attribute
182 * @attr: attribute to get
184 int switchdev_port_attr_get(struct net_device
*dev
, struct switchdev_attr
*attr
)
186 const struct switchdev_ops
*ops
= dev
->switchdev_ops
;
187 struct net_device
*lower_dev
;
188 struct list_head
*iter
;
189 struct switchdev_attr first
= {
190 .id
= SWITCHDEV_ATTR_ID_UNDEFINED
192 int err
= -EOPNOTSUPP
;
194 if (ops
&& ops
->switchdev_port_attr_get
)
195 return ops
->switchdev_port_attr_get(dev
, attr
);
197 if (attr
->flags
& SWITCHDEV_F_NO_RECURSE
)
200 /* Switch device port(s) may be stacked under
201 * bond/team/vlan dev, so recurse down to get attr on
202 * each port. Return -ENODATA if attr values don't
203 * compare across ports.
206 netdev_for_each_lower_dev(dev
, lower_dev
, iter
) {
207 err
= switchdev_port_attr_get(lower_dev
, attr
);
210 if (first
.id
== SWITCHDEV_ATTR_ID_UNDEFINED
)
212 else if (memcmp(&first
, attr
, sizeof(*attr
)))
218 EXPORT_SYMBOL_GPL(switchdev_port_attr_get
);
220 static int __switchdev_port_attr_set(struct net_device
*dev
,
221 const struct switchdev_attr
*attr
,
222 struct switchdev_trans
*trans
)
224 const struct switchdev_ops
*ops
= dev
->switchdev_ops
;
225 struct net_device
*lower_dev
;
226 struct list_head
*iter
;
227 int err
= -EOPNOTSUPP
;
229 if (ops
&& ops
->switchdev_port_attr_set
) {
230 err
= ops
->switchdev_port_attr_set(dev
, attr
, trans
);
234 if (attr
->flags
& SWITCHDEV_F_NO_RECURSE
)
237 /* Switch device port(s) may be stacked under
238 * bond/team/vlan dev, so recurse down to set attr on
242 netdev_for_each_lower_dev(dev
, lower_dev
, iter
) {
243 err
= __switchdev_port_attr_set(lower_dev
, attr
, trans
);
249 if (err
== -EOPNOTSUPP
&& attr
->flags
& SWITCHDEV_F_SKIP_EOPNOTSUPP
)
255 static int switchdev_port_attr_set_now(struct net_device
*dev
,
256 const struct switchdev_attr
*attr
)
258 struct switchdev_trans trans
;
261 switchdev_trans_init(&trans
);
263 /* Phase I: prepare for attr set. Driver/device should fail
264 * here if there are going to be issues in the commit phase,
265 * such as lack of resources or support. The driver/device
266 * should reserve resources needed for the commit phase here,
267 * but should not commit the attr.
270 trans
.ph_prepare
= true;
271 err
= __switchdev_port_attr_set(dev
, attr
, &trans
);
273 /* Prepare phase failed: abort the transaction. Any
274 * resources reserved in the prepare phase are
278 if (err
!= -EOPNOTSUPP
)
279 switchdev_trans_items_destroy(&trans
);
284 /* Phase II: commit attr set. This cannot fail as a fault
285 * of driver/device. If it does, it's a bug in the driver/device
286 * because the driver said everythings was OK in phase I.
289 trans
.ph_prepare
= false;
290 err
= __switchdev_port_attr_set(dev
, attr
, &trans
);
291 WARN(err
, "%s: Commit of attribute (id=%d) failed.\n",
292 dev
->name
, attr
->id
);
293 switchdev_trans_items_warn_destroy(dev
, &trans
);
298 static void switchdev_port_attr_set_deferred(struct net_device
*dev
,
301 const struct switchdev_attr
*attr
= data
;
304 err
= switchdev_port_attr_set_now(dev
, attr
);
305 if (err
&& err
!= -EOPNOTSUPP
)
306 netdev_err(dev
, "failed (err=%d) to set attribute (id=%d)\n",
310 static int switchdev_port_attr_set_defer(struct net_device
*dev
,
311 const struct switchdev_attr
*attr
)
313 return switchdev_deferred_enqueue(dev
, attr
, sizeof(*attr
),
314 switchdev_port_attr_set_deferred
);
318 * switchdev_port_attr_set - Set port attribute
321 * @attr: attribute to set
323 * Use a 2-phase prepare-commit transaction model to ensure
324 * system is not left in a partially updated state due to
325 * failure from driver/device.
327 * rtnl_lock must be held and must not be in atomic section,
328 * in case SWITCHDEV_F_DEFER flag is not set.
330 int switchdev_port_attr_set(struct net_device
*dev
,
331 const struct switchdev_attr
*attr
)
333 if (attr
->flags
& SWITCHDEV_F_DEFER
)
334 return switchdev_port_attr_set_defer(dev
, attr
);
336 return switchdev_port_attr_set_now(dev
, attr
);
338 EXPORT_SYMBOL_GPL(switchdev_port_attr_set
);
340 static size_t switchdev_obj_size(const struct switchdev_obj
*obj
)
343 case SWITCHDEV_OBJ_ID_PORT_VLAN
:
344 return sizeof(struct switchdev_obj_port_vlan
);
345 case SWITCHDEV_OBJ_ID_IPV4_FIB
:
346 return sizeof(struct switchdev_obj_ipv4_fib
);
347 case SWITCHDEV_OBJ_ID_PORT_FDB
:
348 return sizeof(struct switchdev_obj_port_fdb
);
355 static int __switchdev_port_obj_add(struct net_device
*dev
,
356 const struct switchdev_obj
*obj
,
357 struct switchdev_trans
*trans
)
359 const struct switchdev_ops
*ops
= dev
->switchdev_ops
;
360 struct net_device
*lower_dev
;
361 struct list_head
*iter
;
362 int err
= -EOPNOTSUPP
;
364 if (ops
&& ops
->switchdev_port_obj_add
)
365 return ops
->switchdev_port_obj_add(dev
, obj
, trans
);
367 /* Switch device port(s) may be stacked under
368 * bond/team/vlan dev, so recurse down to add object on
372 netdev_for_each_lower_dev(dev
, lower_dev
, iter
) {
373 err
= __switchdev_port_obj_add(lower_dev
, obj
, trans
);
381 static int switchdev_port_obj_add_now(struct net_device
*dev
,
382 const struct switchdev_obj
*obj
)
384 struct switchdev_trans trans
;
389 switchdev_trans_init(&trans
);
391 /* Phase I: prepare for obj add. Driver/device should fail
392 * here if there are going to be issues in the commit phase,
393 * such as lack of resources or support. The driver/device
394 * should reserve resources needed for the commit phase here,
395 * but should not commit the obj.
398 trans
.ph_prepare
= true;
399 err
= __switchdev_port_obj_add(dev
, obj
, &trans
);
401 /* Prepare phase failed: abort the transaction. Any
402 * resources reserved in the prepare phase are
406 if (err
!= -EOPNOTSUPP
)
407 switchdev_trans_items_destroy(&trans
);
412 /* Phase II: commit obj add. This cannot fail as a fault
413 * of driver/device. If it does, it's a bug in the driver/device
414 * because the driver said everythings was OK in phase I.
417 trans
.ph_prepare
= false;
418 err
= __switchdev_port_obj_add(dev
, obj
, &trans
);
419 WARN(err
, "%s: Commit of object (id=%d) failed.\n", dev
->name
, obj
->id
);
420 switchdev_trans_items_warn_destroy(dev
, &trans
);
425 static void switchdev_port_obj_add_deferred(struct net_device
*dev
,
428 const struct switchdev_obj
*obj
= data
;
431 err
= switchdev_port_obj_add_now(dev
, obj
);
432 if (err
&& err
!= -EOPNOTSUPP
)
433 netdev_err(dev
, "failed (err=%d) to add object (id=%d)\n",
437 static int switchdev_port_obj_add_defer(struct net_device
*dev
,
438 const struct switchdev_obj
*obj
)
440 return switchdev_deferred_enqueue(dev
, obj
, switchdev_obj_size(obj
),
441 switchdev_port_obj_add_deferred
);
445 * switchdev_port_obj_add - Add port object
449 * @obj: object to add
451 * Use a 2-phase prepare-commit transaction model to ensure
452 * system is not left in a partially updated state due to
453 * failure from driver/device.
455 * rtnl_lock must be held and must not be in atomic section,
456 * in case SWITCHDEV_F_DEFER flag is not set.
458 int switchdev_port_obj_add(struct net_device
*dev
,
459 const struct switchdev_obj
*obj
)
461 if (obj
->flags
& SWITCHDEV_F_DEFER
)
462 return switchdev_port_obj_add_defer(dev
, obj
);
464 return switchdev_port_obj_add_now(dev
, obj
);
466 EXPORT_SYMBOL_GPL(switchdev_port_obj_add
);
468 static int switchdev_port_obj_del_now(struct net_device
*dev
,
469 const struct switchdev_obj
*obj
)
471 const struct switchdev_ops
*ops
= dev
->switchdev_ops
;
472 struct net_device
*lower_dev
;
473 struct list_head
*iter
;
474 int err
= -EOPNOTSUPP
;
476 if (ops
&& ops
->switchdev_port_obj_del
)
477 return ops
->switchdev_port_obj_del(dev
, obj
);
479 /* Switch device port(s) may be stacked under
480 * bond/team/vlan dev, so recurse down to delete object on
484 netdev_for_each_lower_dev(dev
, lower_dev
, iter
) {
485 err
= switchdev_port_obj_del_now(lower_dev
, obj
);
493 static void switchdev_port_obj_del_deferred(struct net_device
*dev
,
496 const struct switchdev_obj
*obj
= data
;
499 err
= switchdev_port_obj_del_now(dev
, obj
);
500 if (err
&& err
!= -EOPNOTSUPP
)
501 netdev_err(dev
, "failed (err=%d) to del object (id=%d)\n",
505 static int switchdev_port_obj_del_defer(struct net_device
*dev
,
506 const struct switchdev_obj
*obj
)
508 return switchdev_deferred_enqueue(dev
, obj
, switchdev_obj_size(obj
),
509 switchdev_port_obj_del_deferred
);
513 * switchdev_port_obj_del - Delete port object
517 * @obj: object to delete
519 * rtnl_lock must be held and must not be in atomic section,
520 * in case SWITCHDEV_F_DEFER flag is not set.
522 int switchdev_port_obj_del(struct net_device
*dev
,
523 const struct switchdev_obj
*obj
)
525 if (obj
->flags
& SWITCHDEV_F_DEFER
)
526 return switchdev_port_obj_del_defer(dev
, obj
);
528 return switchdev_port_obj_del_now(dev
, obj
);
530 EXPORT_SYMBOL_GPL(switchdev_port_obj_del
);
533 * switchdev_port_obj_dump - Dump port objects
537 * @obj: object to dump
538 * @cb: function to call with a filled object
540 * rtnl_lock must be held.
542 int switchdev_port_obj_dump(struct net_device
*dev
, struct switchdev_obj
*obj
,
543 switchdev_obj_dump_cb_t
*cb
)
545 const struct switchdev_ops
*ops
= dev
->switchdev_ops
;
546 struct net_device
*lower_dev
;
547 struct list_head
*iter
;
548 int err
= -EOPNOTSUPP
;
552 if (ops
&& ops
->switchdev_port_obj_dump
)
553 return ops
->switchdev_port_obj_dump(dev
, obj
, cb
);
555 /* Switch device port(s) may be stacked under
556 * bond/team/vlan dev, so recurse down to dump objects on
557 * first port at bottom of stack.
560 netdev_for_each_lower_dev(dev
, lower_dev
, iter
) {
561 err
= switchdev_port_obj_dump(lower_dev
, obj
, cb
);
567 EXPORT_SYMBOL_GPL(switchdev_port_obj_dump
);
569 static RAW_NOTIFIER_HEAD(switchdev_notif_chain
);
572 * register_switchdev_notifier - Register notifier
573 * @nb: notifier_block
575 * Register switch device notifier. This should be used by code
576 * which needs to monitor events happening in particular device.
577 * Return values are same as for atomic_notifier_chain_register().
579 int register_switchdev_notifier(struct notifier_block
*nb
)
584 err
= raw_notifier_chain_register(&switchdev_notif_chain
, nb
);
588 EXPORT_SYMBOL_GPL(register_switchdev_notifier
);
591 * unregister_switchdev_notifier - Unregister notifier
592 * @nb: notifier_block
594 * Unregister switch device notifier.
595 * Return values are same as for atomic_notifier_chain_unregister().
597 int unregister_switchdev_notifier(struct notifier_block
*nb
)
602 err
= raw_notifier_chain_unregister(&switchdev_notif_chain
, nb
);
606 EXPORT_SYMBOL_GPL(unregister_switchdev_notifier
);
609 * call_switchdev_notifiers - Call notifiers
610 * @val: value passed unmodified to notifier function
612 * @info: notifier information data
614 * Call all network notifier blocks. This should be called by driver
615 * when it needs to propagate hardware event.
616 * Return values are same as for atomic_notifier_call_chain().
617 * rtnl_lock must be held.
619 int call_switchdev_notifiers(unsigned long val
, struct net_device
*dev
,
620 struct switchdev_notifier_info
*info
)
627 err
= raw_notifier_call_chain(&switchdev_notif_chain
, val
, info
);
630 EXPORT_SYMBOL_GPL(call_switchdev_notifiers
);
632 struct switchdev_vlan_dump
{
633 struct switchdev_obj_port_vlan vlan
;
641 static int switchdev_port_vlan_dump_put(struct switchdev_vlan_dump
*dump
)
643 struct bridge_vlan_info vinfo
;
645 vinfo
.flags
= dump
->flags
;
647 if (dump
->begin
== 0 && dump
->end
== 0) {
649 } else if (dump
->begin
== dump
->end
) {
650 vinfo
.vid
= dump
->begin
;
651 if (nla_put(dump
->skb
, IFLA_BRIDGE_VLAN_INFO
,
652 sizeof(vinfo
), &vinfo
))
655 vinfo
.vid
= dump
->begin
;
656 vinfo
.flags
|= BRIDGE_VLAN_INFO_RANGE_BEGIN
;
657 if (nla_put(dump
->skb
, IFLA_BRIDGE_VLAN_INFO
,
658 sizeof(vinfo
), &vinfo
))
660 vinfo
.vid
= dump
->end
;
661 vinfo
.flags
&= ~BRIDGE_VLAN_INFO_RANGE_BEGIN
;
662 vinfo
.flags
|= BRIDGE_VLAN_INFO_RANGE_END
;
663 if (nla_put(dump
->skb
, IFLA_BRIDGE_VLAN_INFO
,
664 sizeof(vinfo
), &vinfo
))
671 static int switchdev_port_vlan_dump_cb(struct switchdev_obj
*obj
)
673 struct switchdev_obj_port_vlan
*vlan
= SWITCHDEV_OBJ_PORT_VLAN(obj
);
674 struct switchdev_vlan_dump
*dump
=
675 container_of(vlan
, struct switchdev_vlan_dump
, vlan
);
678 if (vlan
->vid_begin
> vlan
->vid_end
)
681 if (dump
->filter_mask
& RTEXT_FILTER_BRVLAN
) {
682 dump
->flags
= vlan
->flags
;
683 for (dump
->begin
= dump
->end
= vlan
->vid_begin
;
684 dump
->begin
<= vlan
->vid_end
;
685 dump
->begin
++, dump
->end
++) {
686 err
= switchdev_port_vlan_dump_put(dump
);
690 } else if (dump
->filter_mask
& RTEXT_FILTER_BRVLAN_COMPRESSED
) {
691 if (dump
->begin
> vlan
->vid_begin
&&
692 dump
->begin
>= vlan
->vid_end
) {
693 if ((dump
->begin
- 1) == vlan
->vid_end
&&
694 dump
->flags
== vlan
->flags
) {
696 dump
->begin
= vlan
->vid_begin
;
698 err
= switchdev_port_vlan_dump_put(dump
);
699 dump
->flags
= vlan
->flags
;
700 dump
->begin
= vlan
->vid_begin
;
701 dump
->end
= vlan
->vid_end
;
703 } else if (dump
->end
<= vlan
->vid_begin
&&
704 dump
->end
< vlan
->vid_end
) {
705 if ((dump
->end
+ 1) == vlan
->vid_begin
&&
706 dump
->flags
== vlan
->flags
) {
708 dump
->end
= vlan
->vid_end
;
710 err
= switchdev_port_vlan_dump_put(dump
);
711 dump
->flags
= vlan
->flags
;
712 dump
->begin
= vlan
->vid_begin
;
713 dump
->end
= vlan
->vid_end
;
723 static int switchdev_port_vlan_fill(struct sk_buff
*skb
, struct net_device
*dev
,
726 struct switchdev_vlan_dump dump
= {
727 .vlan
.obj
.id
= SWITCHDEV_OBJ_ID_PORT_VLAN
,
729 .filter_mask
= filter_mask
,
733 if ((filter_mask
& RTEXT_FILTER_BRVLAN
) ||
734 (filter_mask
& RTEXT_FILTER_BRVLAN_COMPRESSED
)) {
735 err
= switchdev_port_obj_dump(dev
, &dump
.vlan
.obj
,
736 switchdev_port_vlan_dump_cb
);
739 if (filter_mask
& RTEXT_FILTER_BRVLAN_COMPRESSED
)
741 err
= switchdev_port_vlan_dump_put(&dump
);
745 return err
== -EOPNOTSUPP
? 0 : err
;
749 * switchdev_port_bridge_getlink - Get bridge port attributes
753 * Called for SELF on rtnl_bridge_getlink to get bridge port
756 int switchdev_port_bridge_getlink(struct sk_buff
*skb
, u32 pid
, u32 seq
,
757 struct net_device
*dev
, u32 filter_mask
,
760 struct switchdev_attr attr
= {
761 .id
= SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS
,
763 u16 mode
= BRIDGE_MODE_UNDEF
;
764 u32 mask
= BR_LEARNING
| BR_LEARNING_SYNC
| BR_FLOOD
;
767 err
= switchdev_port_attr_get(dev
, &attr
);
768 if (err
&& err
!= -EOPNOTSUPP
)
771 return ndo_dflt_bridge_getlink(skb
, pid
, seq
, dev
, mode
,
772 attr
.u
.brport_flags
, mask
, nlflags
,
773 filter_mask
, switchdev_port_vlan_fill
);
775 EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink
);
777 static int switchdev_port_br_setflag(struct net_device
*dev
,
778 struct nlattr
*nlattr
,
779 unsigned long brport_flag
)
781 struct switchdev_attr attr
= {
782 .id
= SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS
,
784 u8 flag
= nla_get_u8(nlattr
);
787 err
= switchdev_port_attr_get(dev
, &attr
);
792 attr
.u
.brport_flags
|= brport_flag
;
794 attr
.u
.brport_flags
&= ~brport_flag
;
796 return switchdev_port_attr_set(dev
, &attr
);
799 static const struct nla_policy
800 switchdev_port_bridge_policy
[IFLA_BRPORT_MAX
+ 1] = {
801 [IFLA_BRPORT_STATE
] = { .type
= NLA_U8
},
802 [IFLA_BRPORT_COST
] = { .type
= NLA_U32
},
803 [IFLA_BRPORT_PRIORITY
] = { .type
= NLA_U16
},
804 [IFLA_BRPORT_MODE
] = { .type
= NLA_U8
},
805 [IFLA_BRPORT_GUARD
] = { .type
= NLA_U8
},
806 [IFLA_BRPORT_PROTECT
] = { .type
= NLA_U8
},
807 [IFLA_BRPORT_FAST_LEAVE
] = { .type
= NLA_U8
},
808 [IFLA_BRPORT_LEARNING
] = { .type
= NLA_U8
},
809 [IFLA_BRPORT_LEARNING_SYNC
] = { .type
= NLA_U8
},
810 [IFLA_BRPORT_UNICAST_FLOOD
] = { .type
= NLA_U8
},
813 static int switchdev_port_br_setlink_protinfo(struct net_device
*dev
,
814 struct nlattr
*protinfo
)
820 err
= nla_validate_nested(protinfo
, IFLA_BRPORT_MAX
,
821 switchdev_port_bridge_policy
);
825 nla_for_each_nested(attr
, protinfo
, rem
) {
826 switch (nla_type(attr
)) {
827 case IFLA_BRPORT_LEARNING
:
828 err
= switchdev_port_br_setflag(dev
, attr
,
831 case IFLA_BRPORT_LEARNING_SYNC
:
832 err
= switchdev_port_br_setflag(dev
, attr
,
835 case IFLA_BRPORT_UNICAST_FLOOD
:
836 err
= switchdev_port_br_setflag(dev
, attr
, BR_FLOOD
);
849 static int switchdev_port_br_afspec(struct net_device
*dev
,
850 struct nlattr
*afspec
,
851 int (*f
)(struct net_device
*dev
,
852 const struct switchdev_obj
*obj
))
855 struct bridge_vlan_info
*vinfo
;
856 struct switchdev_obj_port_vlan vlan
= {
857 .obj
.id
= SWITCHDEV_OBJ_ID_PORT_VLAN
,
862 nla_for_each_nested(attr
, afspec
, rem
) {
863 if (nla_type(attr
) != IFLA_BRIDGE_VLAN_INFO
)
865 if (nla_len(attr
) != sizeof(struct bridge_vlan_info
))
867 vinfo
= nla_data(attr
);
868 if (!vinfo
->vid
|| vinfo
->vid
>= VLAN_VID_MASK
)
870 vlan
.flags
= vinfo
->flags
;
871 if (vinfo
->flags
& BRIDGE_VLAN_INFO_RANGE_BEGIN
) {
874 vlan
.vid_begin
= vinfo
->vid
;
875 /* don't allow range of pvids */
876 if (vlan
.flags
& BRIDGE_VLAN_INFO_PVID
)
878 } else if (vinfo
->flags
& BRIDGE_VLAN_INFO_RANGE_END
) {
881 vlan
.vid_end
= vinfo
->vid
;
882 if (vlan
.vid_end
<= vlan
.vid_begin
)
884 err
= f(dev
, &vlan
.obj
);
891 vlan
.vid_begin
= vinfo
->vid
;
892 vlan
.vid_end
= vinfo
->vid
;
893 err
= f(dev
, &vlan
.obj
);
904 * switchdev_port_bridge_setlink - Set bridge port attributes
907 * @nlh: netlink header
908 * @flags: netlink flags
910 * Called for SELF on rtnl_bridge_setlink to set bridge port
913 int switchdev_port_bridge_setlink(struct net_device
*dev
,
914 struct nlmsghdr
*nlh
, u16 flags
)
916 struct nlattr
*protinfo
;
917 struct nlattr
*afspec
;
920 protinfo
= nlmsg_find_attr(nlh
, sizeof(struct ifinfomsg
),
923 err
= switchdev_port_br_setlink_protinfo(dev
, protinfo
);
928 afspec
= nlmsg_find_attr(nlh
, sizeof(struct ifinfomsg
),
931 err
= switchdev_port_br_afspec(dev
, afspec
,
932 switchdev_port_obj_add
);
936 EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink
);
939 * switchdev_port_bridge_dellink - Set bridge port attributes
942 * @nlh: netlink header
943 * @flags: netlink flags
945 * Called for SELF on rtnl_bridge_dellink to set bridge port
948 int switchdev_port_bridge_dellink(struct net_device
*dev
,
949 struct nlmsghdr
*nlh
, u16 flags
)
951 struct nlattr
*afspec
;
953 afspec
= nlmsg_find_attr(nlh
, sizeof(struct ifinfomsg
),
956 return switchdev_port_br_afspec(dev
, afspec
,
957 switchdev_port_obj_del
);
961 EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink
);
964 * switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port
966 * @ndmsg: netlink hdr
967 * @nlattr: netlink attributes
969 * @addr: MAC address to add
972 * Add FDB entry to switch device.
974 int switchdev_port_fdb_add(struct ndmsg
*ndm
, struct nlattr
*tb
[],
975 struct net_device
*dev
, const unsigned char *addr
,
976 u16 vid
, u16 nlm_flags
)
978 struct switchdev_obj_port_fdb fdb
= {
979 .obj
.id
= SWITCHDEV_OBJ_ID_PORT_FDB
,
983 ether_addr_copy(fdb
.addr
, addr
);
984 return switchdev_port_obj_add(dev
, &fdb
.obj
);
986 EXPORT_SYMBOL_GPL(switchdev_port_fdb_add
);
989 * switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port
991 * @ndmsg: netlink hdr
992 * @nlattr: netlink attributes
994 * @addr: MAC address to delete
995 * @vid: VLAN to delete
997 * Delete FDB entry from switch device.
999 int switchdev_port_fdb_del(struct ndmsg
*ndm
, struct nlattr
*tb
[],
1000 struct net_device
*dev
, const unsigned char *addr
,
1003 struct switchdev_obj_port_fdb fdb
= {
1004 .obj
.id
= SWITCHDEV_OBJ_ID_PORT_FDB
,
1008 ether_addr_copy(fdb
.addr
, addr
);
1009 return switchdev_port_obj_del(dev
, &fdb
.obj
);
1011 EXPORT_SYMBOL_GPL(switchdev_port_fdb_del
);
1013 struct switchdev_fdb_dump
{
1014 struct switchdev_obj_port_fdb fdb
;
1015 struct net_device
*dev
;
1016 struct sk_buff
*skb
;
1017 struct netlink_callback
*cb
;
1021 static int switchdev_port_fdb_dump_cb(struct switchdev_obj
*obj
)
1023 struct switchdev_obj_port_fdb
*fdb
= SWITCHDEV_OBJ_PORT_FDB(obj
);
1024 struct switchdev_fdb_dump
*dump
=
1025 container_of(fdb
, struct switchdev_fdb_dump
, fdb
);
1026 u32 portid
= NETLINK_CB(dump
->cb
->skb
).portid
;
1027 u32 seq
= dump
->cb
->nlh
->nlmsg_seq
;
1028 struct nlmsghdr
*nlh
;
1031 if (dump
->idx
< dump
->cb
->args
[0])
1034 nlh
= nlmsg_put(dump
->skb
, portid
, seq
, RTM_NEWNEIGH
,
1035 sizeof(*ndm
), NLM_F_MULTI
);
1039 ndm
= nlmsg_data(nlh
);
1040 ndm
->ndm_family
= AF_BRIDGE
;
1043 ndm
->ndm_flags
= NTF_SELF
;
1045 ndm
->ndm_ifindex
= dump
->dev
->ifindex
;
1046 ndm
->ndm_state
= fdb
->ndm_state
;
1048 if (nla_put(dump
->skb
, NDA_LLADDR
, ETH_ALEN
, fdb
->addr
))
1049 goto nla_put_failure
;
1051 if (fdb
->vid
&& nla_put_u16(dump
->skb
, NDA_VLAN
, fdb
->vid
))
1052 goto nla_put_failure
;
1054 nlmsg_end(dump
->skb
, nlh
);
1061 nlmsg_cancel(dump
->skb
, nlh
);
1066 * switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries
1069 * @cb: netlink callback
1071 * @filter_dev: filter device
1074 * Delete FDB entry from switch device.
1076 int switchdev_port_fdb_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
,
1077 struct net_device
*dev
,
1078 struct net_device
*filter_dev
, int idx
)
1080 struct switchdev_fdb_dump dump
= {
1081 .fdb
.obj
.id
= SWITCHDEV_OBJ_ID_PORT_FDB
,
1088 switchdev_port_obj_dump(dev
, &dump
.fdb
.obj
, switchdev_port_fdb_dump_cb
);
1091 EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump
);
1093 static struct net_device
*switchdev_get_lowest_dev(struct net_device
*dev
)
1095 const struct switchdev_ops
*ops
= dev
->switchdev_ops
;
1096 struct net_device
*lower_dev
;
1097 struct net_device
*port_dev
;
1098 struct list_head
*iter
;
1100 /* Recusively search down until we find a sw port dev.
1101 * (A sw port dev supports switchdev_port_attr_get).
1104 if (ops
&& ops
->switchdev_port_attr_get
)
1107 netdev_for_each_lower_dev(dev
, lower_dev
, iter
) {
1108 port_dev
= switchdev_get_lowest_dev(lower_dev
);
1116 static struct net_device
*switchdev_get_dev_by_nhs(struct fib_info
*fi
)
1118 struct switchdev_attr attr
= {
1119 .id
= SWITCHDEV_ATTR_ID_PORT_PARENT_ID
,
1121 struct switchdev_attr prev_attr
;
1122 struct net_device
*dev
= NULL
;
1127 /* For this route, all nexthop devs must be on the same switch. */
1129 for (nhsel
= 0; nhsel
< fi
->fib_nhs
; nhsel
++) {
1130 const struct fib_nh
*nh
= &fi
->fib_nh
[nhsel
];
1135 dev
= switchdev_get_lowest_dev(nh
->nh_dev
);
1139 if (switchdev_port_attr_get(dev
, &attr
))
1143 !netdev_phys_item_id_same(&prev_attr
.u
.ppid
, &attr
.u
.ppid
))
1153 * switchdev_fib_ipv4_add - Add/modify switch IPv4 route entry
1155 * @dst: route's IPv4 destination address
1156 * @dst_len: destination address length (prefix length)
1157 * @fi: route FIB info structure
1160 * @nlflags: netlink flags passed in (NLM_F_*)
1161 * @tb_id: route table ID
1163 * Add/modify switch IPv4 route entry.
1165 int switchdev_fib_ipv4_add(u32 dst
, int dst_len
, struct fib_info
*fi
,
1166 u8 tos
, u8 type
, u32 nlflags
, u32 tb_id
)
1168 struct switchdev_obj_ipv4_fib ipv4_fib
= {
1169 .obj
.id
= SWITCHDEV_OBJ_ID_IPV4_FIB
,
1178 struct net_device
*dev
;
1181 /* Don't offload route if using custom ip rules or if
1182 * IPv4 FIB offloading has been disabled completely.
1185 #ifdef CONFIG_IP_MULTIPLE_TABLES
1186 if (fi
->fib_net
->ipv4
.fib_has_custom_rules
)
1190 if (fi
->fib_net
->ipv4
.fib_offload_disabled
)
1193 dev
= switchdev_get_dev_by_nhs(fi
);
1197 err
= switchdev_port_obj_add(dev
, &ipv4_fib
.obj
);
1199 fi
->fib_flags
|= RTNH_F_OFFLOAD
;
1201 return err
== -EOPNOTSUPP
? 0 : err
;
1203 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add
);
1206 * switchdev_fib_ipv4_del - Delete IPv4 route entry from switch
1208 * @dst: route's IPv4 destination address
1209 * @dst_len: destination address length (prefix length)
1210 * @fi: route FIB info structure
1213 * @tb_id: route table ID
1215 * Delete IPv4 route entry from switch device.
1217 int switchdev_fib_ipv4_del(u32 dst
, int dst_len
, struct fib_info
*fi
,
1218 u8 tos
, u8 type
, u32 tb_id
)
1220 struct switchdev_obj_ipv4_fib ipv4_fib
= {
1221 .obj
.id
= SWITCHDEV_OBJ_ID_IPV4_FIB
,
1230 struct net_device
*dev
;
1233 if (!(fi
->fib_flags
& RTNH_F_OFFLOAD
))
1236 dev
= switchdev_get_dev_by_nhs(fi
);
1240 err
= switchdev_port_obj_del(dev
, &ipv4_fib
.obj
);
1242 fi
->fib_flags
&= ~RTNH_F_OFFLOAD
;
1244 return err
== -EOPNOTSUPP
? 0 : err
;
1246 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del
);
1249 * switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation
1251 * @fi: route FIB info structure
1253 void switchdev_fib_ipv4_abort(struct fib_info
*fi
)
1255 /* There was a problem installing this route to the offload
1256 * device. For now, until we come up with more refined
1257 * policy handling, abruptly end IPv4 fib offloading for
1258 * for entire net by flushing offload device(s) of all
1259 * IPv4 routes, and mark IPv4 fib offloading broken from
1260 * this point forward.
1263 fib_flush_external(fi
->fib_net
);
1264 fi
->fib_net
->ipv4
.fib_offload_disabled
= true;
1266 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort
);
1268 static bool switchdev_port_same_parent_id(struct net_device
*a
,
1269 struct net_device
*b
)
1271 struct switchdev_attr a_attr
= {
1272 .id
= SWITCHDEV_ATTR_ID_PORT_PARENT_ID
,
1273 .flags
= SWITCHDEV_F_NO_RECURSE
,
1275 struct switchdev_attr b_attr
= {
1276 .id
= SWITCHDEV_ATTR_ID_PORT_PARENT_ID
,
1277 .flags
= SWITCHDEV_F_NO_RECURSE
,
1280 if (switchdev_port_attr_get(a
, &a_attr
) ||
1281 switchdev_port_attr_get(b
, &b_attr
))
1284 return netdev_phys_item_id_same(&a_attr
.u
.ppid
, &b_attr
.u
.ppid
);
1287 static u32
switchdev_port_fwd_mark_get(struct net_device
*dev
,
1288 struct net_device
*group_dev
)
1290 struct net_device
*lower_dev
;
1291 struct list_head
*iter
;
1293 netdev_for_each_lower_dev(group_dev
, lower_dev
, iter
) {
1294 if (lower_dev
== dev
)
1296 if (switchdev_port_same_parent_id(dev
, lower_dev
))
1297 return lower_dev
->offload_fwd_mark
;
1298 return switchdev_port_fwd_mark_get(dev
, lower_dev
);
1301 return dev
->ifindex
;
1304 static void switchdev_port_fwd_mark_reset(struct net_device
*group_dev
,
1305 u32 old_mark
, u32
*reset_mark
)
1307 struct net_device
*lower_dev
;
1308 struct list_head
*iter
;
1310 netdev_for_each_lower_dev(group_dev
, lower_dev
, iter
) {
1311 if (lower_dev
->offload_fwd_mark
== old_mark
) {
1313 *reset_mark
= lower_dev
->ifindex
;
1314 lower_dev
->offload_fwd_mark
= *reset_mark
;
1316 switchdev_port_fwd_mark_reset(lower_dev
, old_mark
, reset_mark
);
1321 * switchdev_port_fwd_mark_set - Set port offload forwarding mark
1324 * @group_dev: containing device
1325 * @joining: true if dev is joining group; false if leaving group
1327 * An ungrouped port's offload mark is just its ifindex. A grouped
1328 * port's (member of a bridge, for example) offload mark is the ifindex
1329 * of one of the ports in the group with the same parent (switch) ID.
1330 * Ports on the same device in the same group will have the same mark.
1335 * sw1p1 ifindex=2 mark=2
1336 * sw1p2 ifindex=3 mark=2
1337 * sw2p1 ifindex=4 mark=5
1338 * sw2p2 ifindex=5 mark=5
1340 * If sw2p2 leaves the bridge, we'll have:
1343 * sw1p1 ifindex=2 mark=2
1344 * sw1p2 ifindex=3 mark=2
1345 * sw2p1 ifindex=4 mark=4
1346 * sw2p2 ifindex=5 mark=5
1348 void switchdev_port_fwd_mark_set(struct net_device
*dev
,
1349 struct net_device
*group_dev
,
1352 u32 mark
= dev
->ifindex
;
1358 mark
= switchdev_port_fwd_mark_get(dev
, group_dev
);
1359 else if (dev
->offload_fwd_mark
== mark
)
1360 /* Ohoh, this port was the mark reference port,
1361 * but it's leaving the group, so reset the
1362 * mark for the remaining ports in the group.
1364 switchdev_port_fwd_mark_reset(group_dev
, mark
,
1368 dev
->offload_fwd_mark
= mark
;
1370 EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set
);