1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
2 /* Copyright (C) 2018 Netronome Systems, Inc. */
6 /* LAG group config flags. */
7 #define NFP_FL_LAG_LAST BIT(1)
8 #define NFP_FL_LAG_FIRST BIT(2)
9 #define NFP_FL_LAG_DATA BIT(3)
10 #define NFP_FL_LAG_XON BIT(4)
11 #define NFP_FL_LAG_SYNC BIT(5)
12 #define NFP_FL_LAG_SWITCH BIT(6)
13 #define NFP_FL_LAG_RESET BIT(7)
15 /* LAG port state flags. */
16 #define NFP_PORT_LAG_LINK_UP BIT(0)
17 #define NFP_PORT_LAG_TX_ENABLED BIT(1)
18 #define NFP_PORT_LAG_CHANGED BIT(2)
20 enum nfp_fl_lag_batch
{
21 NFP_FL_LAG_BATCH_FIRST
,
22 NFP_FL_LAG_BATCH_MEMBER
,
23 NFP_FL_LAG_BATCH_FINISHED
27 * struct nfp_flower_cmsg_lag_config - control message payload for LAG config
28 * @ctrl_flags: Configuration flags
29 * @reserved: Reserved for future use
30 * @ttl: Time to live of packet - host always sets to 0xff
31 * @pkt_number: Config message packet number - increment for each message
32 * @batch_ver: Batch version of messages - increment for each batch of messages
33 * @group_id: Group ID applicable
34 * @group_inst: Group instance number - increment when group is reused
35 * @members: Array of 32-bit words listing all active group members
37 struct nfp_flower_cmsg_lag_config
{
49 * struct nfp_fl_lag_group - list entry for each LAG group
50 * @group_id: Assigned group ID for host/kernel sync
51 * @group_inst: Group instance in case of ID reuse
53 * @master_ndev: Group master Netdev
54 * @dirty: Marked if the group needs synced to HW
55 * @offloaded: Marked if the group is currently offloaded to NIC
56 * @to_remove: Marked if the group should be removed from NIC
57 * @to_destroy: Marked if the group should be removed from driver
58 * @slave_cnt: Number of slaves in group
60 struct nfp_fl_lag_group
{
61 unsigned int group_id
;
63 struct list_head list
;
64 struct net_device
*master_ndev
;
69 unsigned int slave_cnt
;
72 #define NFP_FL_LAG_PKT_NUMBER_MASK GENMASK(30, 0)
73 #define NFP_FL_LAG_VERSION_MASK GENMASK(22, 0)
74 #define NFP_FL_LAG_HOST_TTL 0xff
76 /* Use this ID with zero members to ack a batch config */
77 #define NFP_FL_LAG_SYNC_ID 0
78 #define NFP_FL_LAG_GROUP_MIN 1 /* ID 0 reserved */
79 #define NFP_FL_LAG_GROUP_MAX 32 /* IDs 1 to 31 are valid */
81 /* wait for more config */
82 #define NFP_FL_LAG_DELAY (msecs_to_jiffies(2))
84 #define NFP_FL_LAG_RETRANS_LIMIT 100 /* max retrans cmsgs to store */
86 static unsigned int nfp_fl_get_next_pkt_number(struct nfp_fl_lag
*lag
)
89 lag
->pkt_num
&= NFP_FL_LAG_PKT_NUMBER_MASK
;
94 static void nfp_fl_increment_version(struct nfp_fl_lag
*lag
)
96 /* LSB is not considered by firmware so add 2 for each increment. */
98 lag
->batch_ver
&= NFP_FL_LAG_VERSION_MASK
;
100 /* Zero is reserved by firmware. */
105 static struct nfp_fl_lag_group
*
106 nfp_fl_lag_group_create(struct nfp_fl_lag
*lag
, struct net_device
*master
)
108 struct nfp_fl_lag_group
*group
;
109 struct nfp_flower_priv
*priv
;
112 priv
= container_of(lag
, struct nfp_flower_priv
, nfp_lag
);
114 id
= ida_simple_get(&lag
->ida_handle
, NFP_FL_LAG_GROUP_MIN
,
115 NFP_FL_LAG_GROUP_MAX
, GFP_KERNEL
);
117 nfp_flower_cmsg_warn(priv
->app
,
118 "No more bonding groups available\n");
122 group
= kmalloc(sizeof(*group
), GFP_KERNEL
);
124 ida_simple_remove(&lag
->ida_handle
, id
);
125 return ERR_PTR(-ENOMEM
);
128 group
->group_id
= id
;
129 group
->master_ndev
= master
;
131 group
->offloaded
= false;
132 group
->to_remove
= false;
133 group
->to_destroy
= false;
134 group
->slave_cnt
= 0;
135 group
->group_inst
= ++lag
->global_inst
;
136 list_add_tail(&group
->list
, &lag
->group_list
);
141 static struct nfp_fl_lag_group
*
142 nfp_fl_lag_find_group_for_master_with_lag(struct nfp_fl_lag
*lag
,
143 struct net_device
*master
)
145 struct nfp_fl_lag_group
*entry
;
150 list_for_each_entry(entry
, &lag
->group_list
, list
)
151 if (entry
->master_ndev
== master
)
157 int nfp_flower_lag_populate_pre_action(struct nfp_app
*app
,
158 struct net_device
*master
,
159 struct nfp_fl_pre_lag
*pre_act
,
160 struct netlink_ext_ack
*extack
)
162 struct nfp_flower_priv
*priv
= app
->priv
;
163 struct nfp_fl_lag_group
*group
= NULL
;
166 mutex_lock(&priv
->nfp_lag
.lock
);
167 group
= nfp_fl_lag_find_group_for_master_with_lag(&priv
->nfp_lag
,
170 mutex_unlock(&priv
->nfp_lag
.lock
);
171 NL_SET_ERR_MSG_MOD(extack
, "invalid entry: group does not exist for LAG action");
175 pre_act
->group_id
= cpu_to_be16(group
->group_id
);
176 temp_vers
= cpu_to_be32(priv
->nfp_lag
.batch_ver
<<
177 NFP_FL_PRE_LAG_VER_OFF
);
178 memcpy(pre_act
->lag_version
, &temp_vers
, 3);
179 pre_act
->instance
= group
->group_inst
;
180 mutex_unlock(&priv
->nfp_lag
.lock
);
185 int nfp_flower_lag_get_output_id(struct nfp_app
*app
, struct net_device
*master
)
187 struct nfp_flower_priv
*priv
= app
->priv
;
188 struct nfp_fl_lag_group
*group
= NULL
;
189 int group_id
= -ENOENT
;
191 mutex_lock(&priv
->nfp_lag
.lock
);
192 group
= nfp_fl_lag_find_group_for_master_with_lag(&priv
->nfp_lag
,
195 group_id
= group
->group_id
;
196 mutex_unlock(&priv
->nfp_lag
.lock
);
202 nfp_fl_lag_config_group(struct nfp_fl_lag
*lag
, struct nfp_fl_lag_group
*group
,
203 struct net_device
**active_members
,
204 unsigned int member_cnt
, enum nfp_fl_lag_batch
*batch
)
206 struct nfp_flower_cmsg_lag_config
*cmsg_payload
;
207 struct nfp_flower_priv
*priv
;
208 unsigned long int flags
;
209 unsigned int size
, i
;
212 priv
= container_of(lag
, struct nfp_flower_priv
, nfp_lag
);
213 size
= sizeof(*cmsg_payload
) + sizeof(__be32
) * member_cnt
;
214 skb
= nfp_flower_cmsg_alloc(priv
->app
, size
,
215 NFP_FLOWER_CMSG_TYPE_LAG_CONFIG
,
220 cmsg_payload
= nfp_flower_cmsg_get_data(skb
);
223 /* Increment batch version for each new batch of config messages. */
224 if (*batch
== NFP_FL_LAG_BATCH_FIRST
) {
225 flags
|= NFP_FL_LAG_FIRST
;
226 nfp_fl_increment_version(lag
);
227 *batch
= NFP_FL_LAG_BATCH_MEMBER
;
230 /* If it is a reset msg then it is also the end of the batch. */
232 flags
|= NFP_FL_LAG_RESET
;
233 *batch
= NFP_FL_LAG_BATCH_FINISHED
;
236 /* To signal the end of a batch, both the switch and last flags are set
237 * and the the reserved SYNC group ID is used.
239 if (*batch
== NFP_FL_LAG_BATCH_FINISHED
) {
240 flags
|= NFP_FL_LAG_SWITCH
| NFP_FL_LAG_LAST
;
241 lag
->rst_cfg
= false;
242 cmsg_payload
->group_id
= cpu_to_be32(NFP_FL_LAG_SYNC_ID
);
243 cmsg_payload
->group_inst
= 0;
245 cmsg_payload
->group_id
= cpu_to_be32(group
->group_id
);
246 cmsg_payload
->group_inst
= cpu_to_be32(group
->group_inst
);
249 cmsg_payload
->reserved
[0] = 0;
250 cmsg_payload
->reserved
[1] = 0;
251 cmsg_payload
->ttl
= NFP_FL_LAG_HOST_TTL
;
252 cmsg_payload
->ctrl_flags
= flags
;
253 cmsg_payload
->batch_ver
= cpu_to_be32(lag
->batch_ver
);
254 cmsg_payload
->pkt_number
= cpu_to_be32(nfp_fl_get_next_pkt_number(lag
));
256 for (i
= 0; i
< member_cnt
; i
++)
257 cmsg_payload
->members
[i
] =
258 cpu_to_be32(nfp_repr_get_port_id(active_members
[i
]));
260 nfp_ctrl_tx(priv
->app
->ctrl
, skb
);
264 static void nfp_fl_lag_do_work(struct work_struct
*work
)
266 enum nfp_fl_lag_batch batch
= NFP_FL_LAG_BATCH_FIRST
;
267 struct nfp_fl_lag_group
*entry
, *storage
;
268 struct delayed_work
*delayed_work
;
269 struct nfp_flower_priv
*priv
;
270 struct nfp_fl_lag
*lag
;
273 delayed_work
= to_delayed_work(work
);
274 lag
= container_of(delayed_work
, struct nfp_fl_lag
, work
);
275 priv
= container_of(lag
, struct nfp_flower_priv
, nfp_lag
);
277 mutex_lock(&lag
->lock
);
278 list_for_each_entry_safe(entry
, storage
, &lag
->group_list
, list
) {
279 struct net_device
*iter_netdev
, **acti_netdevs
;
280 struct nfp_flower_repr_priv
*repr_priv
;
281 int active_count
= 0, slaves
= 0;
282 struct nfp_repr
*repr
;
283 unsigned long *flags
;
285 if (entry
->to_remove
) {
286 /* Active count of 0 deletes group on hw. */
287 err
= nfp_fl_lag_config_group(lag
, entry
, NULL
, 0,
290 entry
->to_remove
= false;
291 entry
->offloaded
= false;
293 nfp_flower_cmsg_warn(priv
->app
,
294 "group delete failed\n");
295 schedule_delayed_work(&lag
->work
,
300 if (entry
->to_destroy
) {
301 ida_simple_remove(&lag
->ida_handle
,
303 list_del(&entry
->list
);
309 acti_netdevs
= kmalloc_array(entry
->slave_cnt
,
310 sizeof(*acti_netdevs
), GFP_KERNEL
);
312 /* Include sanity check in the loop. It may be that a bond has
313 * changed between processing the last notification and the
314 * work queue triggering. If the number of slaves has changed
315 * or it now contains netdevs that cannot be offloaded, ignore
316 * the group until pending notifications are processed.
319 for_each_netdev_in_bond_rcu(entry
->master_ndev
, iter_netdev
) {
320 if (!nfp_netdev_is_nfp_repr(iter_netdev
)) {
325 repr
= netdev_priv(iter_netdev
);
327 if (repr
->app
!= priv
->app
) {
333 if (slaves
> entry
->slave_cnt
)
336 /* Check the ports for state changes. */
337 repr_priv
= repr
->app_priv
;
338 flags
= &repr_priv
->lag_port_flags
;
340 if (*flags
& NFP_PORT_LAG_CHANGED
) {
341 *flags
&= ~NFP_PORT_LAG_CHANGED
;
345 if ((*flags
& NFP_PORT_LAG_TX_ENABLED
) &&
346 (*flags
& NFP_PORT_LAG_LINK_UP
))
347 acti_netdevs
[active_count
++] = iter_netdev
;
351 if (slaves
!= entry
->slave_cnt
|| !entry
->dirty
) {
356 err
= nfp_fl_lag_config_group(lag
, entry
, acti_netdevs
,
357 active_count
, &batch
);
359 entry
->offloaded
= true;
360 entry
->dirty
= false;
362 nfp_flower_cmsg_warn(priv
->app
,
363 "group offload failed\n");
364 schedule_delayed_work(&lag
->work
, NFP_FL_LAG_DELAY
);
370 /* End the config batch if at least one packet has been batched. */
371 if (batch
== NFP_FL_LAG_BATCH_MEMBER
) {
372 batch
= NFP_FL_LAG_BATCH_FINISHED
;
373 err
= nfp_fl_lag_config_group(lag
, NULL
, NULL
, 0, &batch
);
375 nfp_flower_cmsg_warn(priv
->app
,
376 "group batch end cmsg failed\n");
379 mutex_unlock(&lag
->lock
);
383 nfp_fl_lag_put_unprocessed(struct nfp_fl_lag
*lag
, struct sk_buff
*skb
)
385 struct nfp_flower_cmsg_lag_config
*cmsg_payload
;
387 cmsg_payload
= nfp_flower_cmsg_get_data(skb
);
388 if (be32_to_cpu(cmsg_payload
->group_id
) >= NFP_FL_LAG_GROUP_MAX
)
391 /* Drop cmsg retrans if storage limit is exceeded to prevent
392 * overloading. If the fw notices that expected messages have not been
393 * received in a given time block, it will request a full resync.
395 if (skb_queue_len(&lag
->retrans_skbs
) >= NFP_FL_LAG_RETRANS_LIMIT
)
398 __skb_queue_tail(&lag
->retrans_skbs
, skb
);
403 static void nfp_fl_send_unprocessed(struct nfp_fl_lag
*lag
)
405 struct nfp_flower_priv
*priv
;
408 priv
= container_of(lag
, struct nfp_flower_priv
, nfp_lag
);
410 while ((skb
= __skb_dequeue(&lag
->retrans_skbs
)))
411 nfp_ctrl_tx(priv
->app
->ctrl
, skb
);
414 bool nfp_flower_lag_unprocessed_msg(struct nfp_app
*app
, struct sk_buff
*skb
)
416 struct nfp_flower_cmsg_lag_config
*cmsg_payload
;
417 struct nfp_flower_priv
*priv
= app
->priv
;
418 struct nfp_fl_lag_group
*group_entry
;
419 unsigned long int flags
;
420 bool store_skb
= false;
423 cmsg_payload
= nfp_flower_cmsg_get_data(skb
);
424 flags
= cmsg_payload
->ctrl_flags
;
426 /* Note the intentional fall through below. If DATA and XON are both
427 * set, the message will stored and sent again with the rest of the
428 * unprocessed messages list.
432 if (flags
& NFP_FL_LAG_DATA
)
433 if (!nfp_fl_lag_put_unprocessed(&priv
->nfp_lag
, skb
))
437 if (flags
& NFP_FL_LAG_XON
)
438 nfp_fl_send_unprocessed(&priv
->nfp_lag
);
441 if (flags
& NFP_FL_LAG_SYNC
) {
442 /* To resend all config:
443 * 1) Clear all unprocessed messages
444 * 2) Mark all groups dirty
445 * 3) Reset NFP group config
446 * 4) Schedule a LAG config update
449 __skb_queue_purge(&priv
->nfp_lag
.retrans_skbs
);
451 mutex_lock(&priv
->nfp_lag
.lock
);
452 list_for_each_entry(group_entry
, &priv
->nfp_lag
.group_list
,
454 group_entry
->dirty
= true;
456 err
= nfp_flower_lag_reset(&priv
->nfp_lag
);
458 nfp_flower_cmsg_warn(priv
->app
,
459 "mem err in group reset msg\n");
460 mutex_unlock(&priv
->nfp_lag
.lock
);
462 schedule_delayed_work(&priv
->nfp_lag
.work
, 0);
469 nfp_fl_lag_schedule_group_remove(struct nfp_fl_lag
*lag
,
470 struct nfp_fl_lag_group
*group
)
472 group
->to_remove
= true;
474 schedule_delayed_work(&lag
->work
, NFP_FL_LAG_DELAY
);
478 nfp_fl_lag_schedule_group_delete(struct nfp_fl_lag
*lag
,
479 struct net_device
*master
)
481 struct nfp_fl_lag_group
*group
;
482 struct nfp_flower_priv
*priv
;
484 priv
= container_of(lag
, struct nfp_flower_priv
, nfp_lag
);
486 if (!netif_is_bond_master(master
))
489 mutex_lock(&lag
->lock
);
490 group
= nfp_fl_lag_find_group_for_master_with_lag(lag
, master
);
492 mutex_unlock(&lag
->lock
);
493 nfp_warn(priv
->app
->cpp
, "untracked bond got unregistered %s\n",
494 netdev_name(master
));
498 group
->to_remove
= true;
499 group
->to_destroy
= true;
500 mutex_unlock(&lag
->lock
);
502 schedule_delayed_work(&lag
->work
, NFP_FL_LAG_DELAY
);
506 nfp_fl_lag_changeupper_event(struct nfp_fl_lag
*lag
,
507 struct netdev_notifier_changeupper_info
*info
)
509 struct net_device
*upper
= info
->upper_dev
, *iter_netdev
;
510 struct netdev_lag_upper_info
*lag_upper_info
;
511 struct nfp_fl_lag_group
*group
;
512 struct nfp_flower_priv
*priv
;
513 unsigned int slave_count
= 0;
514 bool can_offload
= true;
515 struct nfp_repr
*repr
;
517 if (!netif_is_lag_master(upper
))
520 priv
= container_of(lag
, struct nfp_flower_priv
, nfp_lag
);
523 for_each_netdev_in_bond_rcu(upper
, iter_netdev
) {
524 if (!nfp_netdev_is_nfp_repr(iter_netdev
)) {
528 repr
= netdev_priv(iter_netdev
);
530 /* Ensure all ports are created by the same app/on same card. */
531 if (repr
->app
!= priv
->app
) {
540 lag_upper_info
= info
->upper_info
;
542 /* Firmware supports active/backup and L3/L4 hash bonds. */
543 if (lag_upper_info
&&
544 lag_upper_info
->tx_type
!= NETDEV_LAG_TX_TYPE_ACTIVEBACKUP
&&
545 (lag_upper_info
->tx_type
!= NETDEV_LAG_TX_TYPE_HASH
||
546 (lag_upper_info
->hash_type
!= NETDEV_LAG_HASH_L34
&&
547 lag_upper_info
->hash_type
!= NETDEV_LAG_HASH_E34
&&
548 lag_upper_info
->hash_type
!= NETDEV_LAG_HASH_UNKNOWN
))) {
550 nfp_flower_cmsg_warn(priv
->app
,
551 "Unable to offload tx_type %u hash %u\n",
552 lag_upper_info
->tx_type
,
553 lag_upper_info
->hash_type
);
556 mutex_lock(&lag
->lock
);
557 group
= nfp_fl_lag_find_group_for_master_with_lag(lag
, upper
);
559 if (slave_count
== 0 || !can_offload
) {
560 /* Cannot offload the group - remove if previously offloaded. */
561 if (group
&& group
->offloaded
)
562 nfp_fl_lag_schedule_group_remove(lag
, group
);
564 mutex_unlock(&lag
->lock
);
569 group
= nfp_fl_lag_group_create(lag
, upper
);
571 mutex_unlock(&lag
->lock
);
572 return PTR_ERR(group
);
577 group
->slave_cnt
= slave_count
;
579 /* Group may have been on queue for removal but is now offfloable. */
580 group
->to_remove
= false;
581 mutex_unlock(&lag
->lock
);
583 schedule_delayed_work(&lag
->work
, NFP_FL_LAG_DELAY
);
588 nfp_fl_lag_changels_event(struct nfp_fl_lag
*lag
, struct net_device
*netdev
,
589 struct netdev_notifier_changelowerstate_info
*info
)
591 struct netdev_lag_lower_state_info
*lag_lower_info
;
592 struct nfp_flower_repr_priv
*repr_priv
;
593 struct nfp_flower_priv
*priv
;
594 struct nfp_repr
*repr
;
595 unsigned long *flags
;
597 if (!netif_is_lag_port(netdev
) || !nfp_netdev_is_nfp_repr(netdev
))
600 lag_lower_info
= info
->lower_state_info
;
604 priv
= container_of(lag
, struct nfp_flower_priv
, nfp_lag
);
605 repr
= netdev_priv(netdev
);
607 /* Verify that the repr is associated with this app. */
608 if (repr
->app
!= priv
->app
)
611 repr_priv
= repr
->app_priv
;
612 flags
= &repr_priv
->lag_port_flags
;
614 mutex_lock(&lag
->lock
);
615 if (lag_lower_info
->link_up
)
616 *flags
|= NFP_PORT_LAG_LINK_UP
;
618 *flags
&= ~NFP_PORT_LAG_LINK_UP
;
620 if (lag_lower_info
->tx_enabled
)
621 *flags
|= NFP_PORT_LAG_TX_ENABLED
;
623 *flags
&= ~NFP_PORT_LAG_TX_ENABLED
;
625 *flags
|= NFP_PORT_LAG_CHANGED
;
626 mutex_unlock(&lag
->lock
);
628 schedule_delayed_work(&lag
->work
, NFP_FL_LAG_DELAY
);
631 int nfp_flower_lag_netdev_event(struct nfp_flower_priv
*priv
,
632 struct net_device
*netdev
,
633 unsigned long event
, void *ptr
)
635 struct nfp_fl_lag
*lag
= &priv
->nfp_lag
;
639 case NETDEV_CHANGEUPPER
:
640 err
= nfp_fl_lag_changeupper_event(lag
, ptr
);
644 case NETDEV_CHANGELOWERSTATE
:
645 nfp_fl_lag_changels_event(lag
, netdev
, ptr
);
647 case NETDEV_UNREGISTER
:
648 nfp_fl_lag_schedule_group_delete(lag
, netdev
);
655 int nfp_flower_lag_reset(struct nfp_fl_lag
*lag
)
657 enum nfp_fl_lag_batch batch
= NFP_FL_LAG_BATCH_FIRST
;
660 return nfp_fl_lag_config_group(lag
, NULL
, NULL
, 0, &batch
);
663 void nfp_flower_lag_init(struct nfp_fl_lag
*lag
)
665 INIT_DELAYED_WORK(&lag
->work
, nfp_fl_lag_do_work
);
666 INIT_LIST_HEAD(&lag
->group_list
);
667 mutex_init(&lag
->lock
);
668 ida_init(&lag
->ida_handle
);
670 __skb_queue_head_init(&lag
->retrans_skbs
);
672 /* 0 is a reserved batch version so increment to first valid value. */
673 nfp_fl_increment_version(lag
);
676 void nfp_flower_lag_cleanup(struct nfp_fl_lag
*lag
)
678 struct nfp_fl_lag_group
*entry
, *storage
;
680 cancel_delayed_work_sync(&lag
->work
);
682 __skb_queue_purge(&lag
->retrans_skbs
);
684 /* Remove all groups. */
685 mutex_lock(&lag
->lock
);
686 list_for_each_entry_safe(entry
, storage
, &lag
->group_list
, list
) {
687 list_del(&entry
->list
);
690 mutex_unlock(&lag
->lock
);
691 mutex_destroy(&lag
->lock
);
692 ida_destroy(&lag
->ida_handle
);