1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
2 /* Copyright (C) 2018 Netronome Systems, Inc. */
6 /* LAG group config flags. */
7 #define NFP_FL_LAG_LAST BIT(1)
8 #define NFP_FL_LAG_FIRST BIT(2)
9 #define NFP_FL_LAG_DATA BIT(3)
10 #define NFP_FL_LAG_XON BIT(4)
11 #define NFP_FL_LAG_SYNC BIT(5)
12 #define NFP_FL_LAG_SWITCH BIT(6)
13 #define NFP_FL_LAG_RESET BIT(7)
15 /* LAG port state flags. */
16 #define NFP_PORT_LAG_LINK_UP BIT(0)
17 #define NFP_PORT_LAG_TX_ENABLED BIT(1)
18 #define NFP_PORT_LAG_CHANGED BIT(2)
20 enum nfp_fl_lag_batch
{
21 NFP_FL_LAG_BATCH_FIRST
,
22 NFP_FL_LAG_BATCH_MEMBER
,
23 NFP_FL_LAG_BATCH_FINISHED
27 * struct nfp_flower_cmsg_lag_config - control message payload for LAG config
28 * @ctrl_flags: Configuration flags
29 * @reserved: Reserved for future use
30 * @ttl: Time to live of packet - host always sets to 0xff
31 * @pkt_number: Config message packet number - increment for each message
32 * @batch_ver: Batch version of messages - increment for each batch of messages
33 * @group_id: Group ID applicable
34 * @group_inst: Group instance number - increment when group is reused
35 * @members: Array of 32-bit words listing all active group members
37 struct nfp_flower_cmsg_lag_config
{
49 * struct nfp_fl_lag_group - list entry for each LAG group
50 * @group_id: Assigned group ID for host/kernel sync
51 * @group_inst: Group instance in case of ID reuse
53 * @master_ndev: Group master Netdev
54 * @dirty: Marked if the group needs synced to HW
55 * @offloaded: Marked if the group is currently offloaded to NIC
56 * @to_remove: Marked if the group should be removed from NIC
57 * @to_destroy: Marked if the group should be removed from driver
58 * @slave_cnt: Number of slaves in group
60 struct nfp_fl_lag_group
{
61 unsigned int group_id
;
63 struct list_head list
;
64 struct net_device
*master_ndev
;
69 unsigned int slave_cnt
;
72 #define NFP_FL_LAG_PKT_NUMBER_MASK GENMASK(30, 0)
73 #define NFP_FL_LAG_VERSION_MASK GENMASK(22, 0)
74 #define NFP_FL_LAG_HOST_TTL 0xff
76 /* Use this ID with zero members to ack a batch config */
77 #define NFP_FL_LAG_SYNC_ID 0
78 #define NFP_FL_LAG_GROUP_MIN 1 /* ID 0 reserved */
79 #define NFP_FL_LAG_GROUP_MAX 31 /* IDs 1 to 31 are valid */
81 /* wait for more config */
82 #define NFP_FL_LAG_DELAY (msecs_to_jiffies(2))
84 #define NFP_FL_LAG_RETRANS_LIMIT 100 /* max retrans cmsgs to store */
86 static unsigned int nfp_fl_get_next_pkt_number(struct nfp_fl_lag
*lag
)
89 lag
->pkt_num
&= NFP_FL_LAG_PKT_NUMBER_MASK
;
94 static void nfp_fl_increment_version(struct nfp_fl_lag
*lag
)
96 /* LSB is not considered by firmware so add 2 for each increment. */
98 lag
->batch_ver
&= NFP_FL_LAG_VERSION_MASK
;
100 /* Zero is reserved by firmware. */
105 static struct nfp_fl_lag_group
*
106 nfp_fl_lag_group_create(struct nfp_fl_lag
*lag
, struct net_device
*master
)
108 struct nfp_fl_lag_group
*group
;
109 struct nfp_flower_priv
*priv
;
112 priv
= container_of(lag
, struct nfp_flower_priv
, nfp_lag
);
114 id
= ida_alloc_range(&lag
->ida_handle
, NFP_FL_LAG_GROUP_MIN
,
115 NFP_FL_LAG_GROUP_MAX
, GFP_KERNEL
);
117 nfp_flower_cmsg_warn(priv
->app
,
118 "No more bonding groups available\n");
122 group
= kmalloc(sizeof(*group
), GFP_KERNEL
);
124 ida_free(&lag
->ida_handle
, id
);
125 return ERR_PTR(-ENOMEM
);
128 group
->group_id
= id
;
129 group
->master_ndev
= master
;
131 group
->offloaded
= false;
132 group
->to_remove
= false;
133 group
->to_destroy
= false;
134 group
->slave_cnt
= 0;
135 group
->group_inst
= ++lag
->global_inst
;
136 list_add_tail(&group
->list
, &lag
->group_list
);
141 static struct nfp_fl_lag_group
*
142 nfp_fl_lag_find_group_for_master_with_lag(struct nfp_fl_lag
*lag
,
143 struct net_device
*master
)
145 struct nfp_fl_lag_group
*entry
;
150 list_for_each_entry(entry
, &lag
->group_list
, list
)
151 if (entry
->master_ndev
== master
)
157 static int nfp_fl_lag_get_group_info(struct nfp_app
*app
,
158 struct net_device
*netdev
,
163 struct nfp_flower_priv
*priv
= app
->priv
;
164 struct nfp_fl_lag_group
*group
= NULL
;
167 mutex_lock(&priv
->nfp_lag
.lock
);
168 group
= nfp_fl_lag_find_group_for_master_with_lag(&priv
->nfp_lag
,
171 mutex_unlock(&priv
->nfp_lag
.lock
);
176 *group_id
= cpu_to_be16(group
->group_id
);
179 temp_vers
= cpu_to_be32(priv
->nfp_lag
.batch_ver
<<
180 NFP_FL_PRE_LAG_VER_OFF
);
181 memcpy(batch_ver
, &temp_vers
, 3);
185 *group_inst
= group
->group_inst
;
187 mutex_unlock(&priv
->nfp_lag
.lock
);
192 int nfp_flower_lag_populate_pre_action(struct nfp_app
*app
,
193 struct net_device
*master
,
194 struct nfp_fl_pre_lag
*pre_act
,
195 struct netlink_ext_ack
*extack
)
197 if (nfp_fl_lag_get_group_info(app
, master
, &pre_act
->group_id
,
198 pre_act
->lag_version
,
199 &pre_act
->instance
)) {
200 NL_SET_ERR_MSG_MOD(extack
, "invalid entry: group does not exist for LAG action");
207 void nfp_flower_lag_get_info_from_netdev(struct nfp_app
*app
,
208 struct net_device
*netdev
,
209 struct nfp_tun_neigh_lag
*lag
)
211 nfp_fl_lag_get_group_info(app
, netdev
, NULL
,
212 lag
->lag_version
, &lag
->lag_instance
);
215 int nfp_flower_lag_get_output_id(struct nfp_app
*app
, struct net_device
*master
)
217 struct nfp_flower_priv
*priv
= app
->priv
;
218 struct nfp_fl_lag_group
*group
= NULL
;
219 int group_id
= -ENOENT
;
221 mutex_lock(&priv
->nfp_lag
.lock
);
222 group
= nfp_fl_lag_find_group_for_master_with_lag(&priv
->nfp_lag
,
225 group_id
= group
->group_id
;
226 mutex_unlock(&priv
->nfp_lag
.lock
);
232 nfp_fl_lag_config_group(struct nfp_fl_lag
*lag
, struct nfp_fl_lag_group
*group
,
233 struct net_device
**active_members
,
234 unsigned int member_cnt
, enum nfp_fl_lag_batch
*batch
)
236 struct nfp_flower_cmsg_lag_config
*cmsg_payload
;
237 struct nfp_flower_priv
*priv
;
238 unsigned long int flags
;
239 unsigned int size
, i
;
242 priv
= container_of(lag
, struct nfp_flower_priv
, nfp_lag
);
243 size
= sizeof(*cmsg_payload
) + sizeof(__be32
) * member_cnt
;
244 skb
= nfp_flower_cmsg_alloc(priv
->app
, size
,
245 NFP_FLOWER_CMSG_TYPE_LAG_CONFIG
,
250 cmsg_payload
= nfp_flower_cmsg_get_data(skb
);
253 /* Increment batch version for each new batch of config messages. */
254 if (*batch
== NFP_FL_LAG_BATCH_FIRST
) {
255 flags
|= NFP_FL_LAG_FIRST
;
256 nfp_fl_increment_version(lag
);
257 *batch
= NFP_FL_LAG_BATCH_MEMBER
;
260 /* If it is a reset msg then it is also the end of the batch. */
262 flags
|= NFP_FL_LAG_RESET
;
263 *batch
= NFP_FL_LAG_BATCH_FINISHED
;
266 /* To signal the end of a batch, both the switch and last flags are set
267 * and the reserved SYNC group ID is used.
269 if (*batch
== NFP_FL_LAG_BATCH_FINISHED
) {
270 flags
|= NFP_FL_LAG_SWITCH
| NFP_FL_LAG_LAST
;
271 lag
->rst_cfg
= false;
272 cmsg_payload
->group_id
= cpu_to_be32(NFP_FL_LAG_SYNC_ID
);
273 cmsg_payload
->group_inst
= 0;
275 cmsg_payload
->group_id
= cpu_to_be32(group
->group_id
);
276 cmsg_payload
->group_inst
= cpu_to_be32(group
->group_inst
);
279 cmsg_payload
->reserved
[0] = 0;
280 cmsg_payload
->reserved
[1] = 0;
281 cmsg_payload
->ttl
= NFP_FL_LAG_HOST_TTL
;
282 cmsg_payload
->ctrl_flags
= flags
;
283 cmsg_payload
->batch_ver
= cpu_to_be32(lag
->batch_ver
);
284 cmsg_payload
->pkt_number
= cpu_to_be32(nfp_fl_get_next_pkt_number(lag
));
286 for (i
= 0; i
< member_cnt
; i
++)
287 cmsg_payload
->members
[i
] =
288 cpu_to_be32(nfp_repr_get_port_id(active_members
[i
]));
290 nfp_ctrl_tx(priv
->app
->ctrl
, skb
);
294 static void nfp_fl_lag_do_work(struct work_struct
*work
)
296 enum nfp_fl_lag_batch batch
= NFP_FL_LAG_BATCH_FIRST
;
297 struct nfp_fl_lag_group
*entry
, *storage
;
298 struct delayed_work
*delayed_work
;
299 struct nfp_flower_priv
*priv
;
300 struct nfp_fl_lag
*lag
;
303 delayed_work
= to_delayed_work(work
);
304 lag
= container_of(delayed_work
, struct nfp_fl_lag
, work
);
305 priv
= container_of(lag
, struct nfp_flower_priv
, nfp_lag
);
307 mutex_lock(&lag
->lock
);
308 list_for_each_entry_safe(entry
, storage
, &lag
->group_list
, list
) {
309 struct net_device
*iter_netdev
, **acti_netdevs
;
310 struct nfp_flower_repr_priv
*repr_priv
;
311 int active_count
= 0, slaves
= 0;
312 struct nfp_repr
*repr
;
313 unsigned long *flags
;
315 if (entry
->to_remove
) {
316 /* Active count of 0 deletes group on hw. */
317 err
= nfp_fl_lag_config_group(lag
, entry
, NULL
, 0,
320 entry
->to_remove
= false;
321 entry
->offloaded
= false;
323 nfp_flower_cmsg_warn(priv
->app
,
324 "group delete failed\n");
325 schedule_delayed_work(&lag
->work
,
330 if (entry
->to_destroy
) {
331 ida_free(&lag
->ida_handle
, entry
->group_id
);
332 list_del(&entry
->list
);
338 acti_netdevs
= kmalloc_array(entry
->slave_cnt
,
339 sizeof(*acti_netdevs
), GFP_KERNEL
);
341 schedule_delayed_work(&lag
->work
,
346 /* Include sanity check in the loop. It may be that a bond has
347 * changed between processing the last notification and the
348 * work queue triggering. If the number of slaves has changed
349 * or it now contains netdevs that cannot be offloaded, ignore
350 * the group until pending notifications are processed.
353 for_each_netdev_in_bond_rcu(entry
->master_ndev
, iter_netdev
) {
354 if (!nfp_netdev_is_nfp_repr(iter_netdev
)) {
359 repr
= netdev_priv(iter_netdev
);
361 if (repr
->app
!= priv
->app
) {
367 if (slaves
> entry
->slave_cnt
)
370 /* Check the ports for state changes. */
371 repr_priv
= repr
->app_priv
;
372 flags
= &repr_priv
->lag_port_flags
;
374 if (*flags
& NFP_PORT_LAG_CHANGED
) {
375 *flags
&= ~NFP_PORT_LAG_CHANGED
;
379 if ((*flags
& NFP_PORT_LAG_TX_ENABLED
) &&
380 (*flags
& NFP_PORT_LAG_LINK_UP
))
381 acti_netdevs
[active_count
++] = iter_netdev
;
385 if (slaves
!= entry
->slave_cnt
|| !entry
->dirty
) {
390 err
= nfp_fl_lag_config_group(lag
, entry
, acti_netdevs
,
391 active_count
, &batch
);
393 entry
->offloaded
= true;
394 entry
->dirty
= false;
396 nfp_flower_cmsg_warn(priv
->app
,
397 "group offload failed\n");
398 schedule_delayed_work(&lag
->work
, NFP_FL_LAG_DELAY
);
404 /* End the config batch if at least one packet has been batched. */
405 if (batch
== NFP_FL_LAG_BATCH_MEMBER
) {
406 batch
= NFP_FL_LAG_BATCH_FINISHED
;
407 err
= nfp_fl_lag_config_group(lag
, NULL
, NULL
, 0, &batch
);
409 nfp_flower_cmsg_warn(priv
->app
,
410 "group batch end cmsg failed\n");
413 mutex_unlock(&lag
->lock
);
417 nfp_fl_lag_put_unprocessed(struct nfp_fl_lag
*lag
, struct sk_buff
*skb
)
419 struct nfp_flower_cmsg_lag_config
*cmsg_payload
;
421 cmsg_payload
= nfp_flower_cmsg_get_data(skb
);
422 if (be32_to_cpu(cmsg_payload
->group_id
) > NFP_FL_LAG_GROUP_MAX
)
425 /* Drop cmsg retrans if storage limit is exceeded to prevent
426 * overloading. If the fw notices that expected messages have not been
427 * received in a given time block, it will request a full resync.
429 if (skb_queue_len(&lag
->retrans_skbs
) >= NFP_FL_LAG_RETRANS_LIMIT
)
432 __skb_queue_tail(&lag
->retrans_skbs
, skb
);
437 static void nfp_fl_send_unprocessed(struct nfp_fl_lag
*lag
)
439 struct nfp_flower_priv
*priv
;
442 priv
= container_of(lag
, struct nfp_flower_priv
, nfp_lag
);
444 while ((skb
= __skb_dequeue(&lag
->retrans_skbs
)))
445 nfp_ctrl_tx(priv
->app
->ctrl
, skb
);
448 bool nfp_flower_lag_unprocessed_msg(struct nfp_app
*app
, struct sk_buff
*skb
)
450 struct nfp_flower_cmsg_lag_config
*cmsg_payload
;
451 struct nfp_flower_priv
*priv
= app
->priv
;
452 struct nfp_fl_lag_group
*group_entry
;
453 unsigned long int flags
;
454 bool store_skb
= false;
457 cmsg_payload
= nfp_flower_cmsg_get_data(skb
);
458 flags
= cmsg_payload
->ctrl_flags
;
460 /* Note the intentional fall through below. If DATA and XON are both
461 * set, the message will stored and sent again with the rest of the
462 * unprocessed messages list.
466 if (flags
& NFP_FL_LAG_DATA
)
467 if (!nfp_fl_lag_put_unprocessed(&priv
->nfp_lag
, skb
))
471 if (flags
& NFP_FL_LAG_XON
)
472 nfp_fl_send_unprocessed(&priv
->nfp_lag
);
475 if (flags
& NFP_FL_LAG_SYNC
) {
476 /* To resend all config:
477 * 1) Clear all unprocessed messages
478 * 2) Mark all groups dirty
479 * 3) Reset NFP group config
480 * 4) Schedule a LAG config update
483 __skb_queue_purge(&priv
->nfp_lag
.retrans_skbs
);
485 mutex_lock(&priv
->nfp_lag
.lock
);
486 list_for_each_entry(group_entry
, &priv
->nfp_lag
.group_list
,
488 group_entry
->dirty
= true;
490 err
= nfp_flower_lag_reset(&priv
->nfp_lag
);
492 nfp_flower_cmsg_warn(priv
->app
,
493 "mem err in group reset msg\n");
494 mutex_unlock(&priv
->nfp_lag
.lock
);
496 schedule_delayed_work(&priv
->nfp_lag
.work
, 0);
503 nfp_fl_lag_schedule_group_remove(struct nfp_fl_lag
*lag
,
504 struct nfp_fl_lag_group
*group
)
506 group
->to_remove
= true;
508 schedule_delayed_work(&lag
->work
, NFP_FL_LAG_DELAY
);
512 nfp_fl_lag_schedule_group_delete(struct nfp_fl_lag
*lag
,
513 struct net_device
*master
)
515 struct nfp_fl_lag_group
*group
;
516 struct nfp_flower_priv
*priv
;
518 priv
= container_of(lag
, struct nfp_flower_priv
, nfp_lag
);
520 if (!netif_is_bond_master(master
))
523 mutex_lock(&lag
->lock
);
524 group
= nfp_fl_lag_find_group_for_master_with_lag(lag
, master
);
526 mutex_unlock(&lag
->lock
);
527 nfp_warn(priv
->app
->cpp
, "untracked bond got unregistered %s\n",
528 netdev_name(master
));
532 group
->to_remove
= true;
533 group
->to_destroy
= true;
534 mutex_unlock(&lag
->lock
);
536 schedule_delayed_work(&lag
->work
, NFP_FL_LAG_DELAY
);
540 nfp_fl_lag_changeupper_event(struct nfp_fl_lag
*lag
,
541 struct netdev_notifier_changeupper_info
*info
)
543 struct net_device
*upper
= info
->upper_dev
, *iter_netdev
;
544 struct netdev_lag_upper_info
*lag_upper_info
;
545 struct nfp_fl_lag_group
*group
;
546 struct nfp_flower_priv
*priv
;
547 unsigned int slave_count
= 0;
548 bool can_offload
= true;
549 struct nfp_repr
*repr
;
551 if (!netif_is_lag_master(upper
))
554 priv
= container_of(lag
, struct nfp_flower_priv
, nfp_lag
);
557 for_each_netdev_in_bond_rcu(upper
, iter_netdev
) {
558 if (!nfp_netdev_is_nfp_repr(iter_netdev
)) {
562 repr
= netdev_priv(iter_netdev
);
564 /* Ensure all ports are created by the same app/on same card. */
565 if (repr
->app
!= priv
->app
) {
574 lag_upper_info
= info
->upper_info
;
576 /* Firmware supports active/backup and L3/L4 hash bonds. */
577 if (lag_upper_info
&&
578 lag_upper_info
->tx_type
!= NETDEV_LAG_TX_TYPE_ACTIVEBACKUP
&&
579 (lag_upper_info
->tx_type
!= NETDEV_LAG_TX_TYPE_HASH
||
580 (lag_upper_info
->hash_type
!= NETDEV_LAG_HASH_L34
&&
581 lag_upper_info
->hash_type
!= NETDEV_LAG_HASH_E34
&&
582 lag_upper_info
->hash_type
!= NETDEV_LAG_HASH_UNKNOWN
))) {
584 nfp_flower_cmsg_warn(priv
->app
,
585 "Unable to offload tx_type %u hash %u\n",
586 lag_upper_info
->tx_type
,
587 lag_upper_info
->hash_type
);
590 mutex_lock(&lag
->lock
);
591 group
= nfp_fl_lag_find_group_for_master_with_lag(lag
, upper
);
593 if (slave_count
== 0 || !can_offload
) {
594 /* Cannot offload the group - remove if previously offloaded. */
595 if (group
&& group
->offloaded
)
596 nfp_fl_lag_schedule_group_remove(lag
, group
);
598 mutex_unlock(&lag
->lock
);
603 group
= nfp_fl_lag_group_create(lag
, upper
);
605 mutex_unlock(&lag
->lock
);
606 return PTR_ERR(group
);
611 group
->slave_cnt
= slave_count
;
613 /* Group may have been on queue for removal but is now offloadable. */
614 group
->to_remove
= false;
615 mutex_unlock(&lag
->lock
);
617 schedule_delayed_work(&lag
->work
, NFP_FL_LAG_DELAY
);
622 nfp_fl_lag_changels_event(struct nfp_fl_lag
*lag
, struct net_device
*netdev
,
623 struct netdev_notifier_changelowerstate_info
*info
)
625 struct netdev_lag_lower_state_info
*lag_lower_info
;
626 struct nfp_flower_repr_priv
*repr_priv
;
627 struct nfp_flower_priv
*priv
;
628 struct nfp_repr
*repr
;
629 unsigned long *flags
;
631 if (!netif_is_lag_port(netdev
) || !nfp_netdev_is_nfp_repr(netdev
))
634 lag_lower_info
= info
->lower_state_info
;
638 priv
= container_of(lag
, struct nfp_flower_priv
, nfp_lag
);
639 repr
= netdev_priv(netdev
);
641 /* Verify that the repr is associated with this app. */
642 if (repr
->app
!= priv
->app
)
645 repr_priv
= repr
->app_priv
;
646 flags
= &repr_priv
->lag_port_flags
;
648 mutex_lock(&lag
->lock
);
649 if (lag_lower_info
->link_up
)
650 *flags
|= NFP_PORT_LAG_LINK_UP
;
652 *flags
&= ~NFP_PORT_LAG_LINK_UP
;
654 if (lag_lower_info
->tx_enabled
)
655 *flags
|= NFP_PORT_LAG_TX_ENABLED
;
657 *flags
&= ~NFP_PORT_LAG_TX_ENABLED
;
659 *flags
|= NFP_PORT_LAG_CHANGED
;
660 mutex_unlock(&lag
->lock
);
662 schedule_delayed_work(&lag
->work
, NFP_FL_LAG_DELAY
);
665 int nfp_flower_lag_netdev_event(struct nfp_flower_priv
*priv
,
666 struct net_device
*netdev
,
667 unsigned long event
, void *ptr
)
669 struct nfp_fl_lag
*lag
= &priv
->nfp_lag
;
673 case NETDEV_CHANGEUPPER
:
674 err
= nfp_fl_lag_changeupper_event(lag
, ptr
);
678 case NETDEV_CHANGELOWERSTATE
:
679 nfp_fl_lag_changels_event(lag
, netdev
, ptr
);
681 case NETDEV_UNREGISTER
:
682 nfp_fl_lag_schedule_group_delete(lag
, netdev
);
689 int nfp_flower_lag_reset(struct nfp_fl_lag
*lag
)
691 enum nfp_fl_lag_batch batch
= NFP_FL_LAG_BATCH_FIRST
;
694 return nfp_fl_lag_config_group(lag
, NULL
, NULL
, 0, &batch
);
697 void nfp_flower_lag_init(struct nfp_fl_lag
*lag
)
699 INIT_DELAYED_WORK(&lag
->work
, nfp_fl_lag_do_work
);
700 INIT_LIST_HEAD(&lag
->group_list
);
701 mutex_init(&lag
->lock
);
702 ida_init(&lag
->ida_handle
);
704 __skb_queue_head_init(&lag
->retrans_skbs
);
706 /* 0 is a reserved batch version so increment to first valid value. */
707 nfp_fl_increment_version(lag
);
710 void nfp_flower_lag_cleanup(struct nfp_fl_lag
*lag
)
712 struct nfp_fl_lag_group
*entry
, *storage
;
714 cancel_delayed_work_sync(&lag
->work
);
716 __skb_queue_purge(&lag
->retrans_skbs
);
718 /* Remove all groups. */
719 mutex_lock(&lag
->lock
);
720 list_for_each_entry_safe(entry
, storage
, &lag
->group_list
, list
) {
721 list_del(&entry
->list
);
724 mutex_unlock(&lag
->lock
);
725 mutex_destroy(&lag
->lock
);
726 ida_destroy(&lag
->ida_handle
);