1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * drivers/net/team/team_mode_loadbalance.c - Load-balancing mode for team
4 * Copyright (c) 2012 Jiri Pirko <jpirko@redhat.com>
7 #include <linux/kernel.h>
8 #include <linux/types.h>
9 #include <linux/module.h>
10 #include <linux/init.h>
11 #include <linux/errno.h>
12 #include <linux/netdevice.h>
13 #include <linux/etherdevice.h>
14 #include <linux/filter.h>
15 #include <linux/if_team.h>
17 static rx_handler_result_t
lb_receive(struct team
*team
, struct team_port
*port
,
20 if (unlikely(skb
->protocol
== htons(ETH_P_SLOW
))) {
21 /* LACPDU packets should go to exact delivery */
22 const unsigned char *dest
= eth_hdr(skb
)->h_dest
;
24 if (is_link_local_ether_addr(dest
) && dest
[5] == 0x02)
25 return RX_HANDLER_EXACT
;
27 return RX_HANDLER_ANOTHER
;
32 typedef struct team_port
*lb_select_tx_port_func_t(struct team
*,
35 #define LB_TX_HASHTABLE_SIZE 256 /* hash is a char */
41 struct lb_pcpu_stats
{
42 struct lb_stats hash_stats
[LB_TX_HASHTABLE_SIZE
];
43 struct u64_stats_sync syncp
;
46 struct lb_stats_info
{
47 struct lb_stats stats
;
48 struct lb_stats last_stats
;
49 struct team_option_inst_info
*opt_inst_info
;
52 struct lb_port_mapping
{
53 struct team_port __rcu
*port
;
54 struct team_option_inst_info
*opt_inst_info
;
59 struct lb_port_mapping tx_hash_to_port_mapping
[LB_TX_HASHTABLE_SIZE
];
60 struct sock_fprog_kern
*orig_fprog
;
62 unsigned int refresh_interval
; /* in tenths of second */
63 struct delayed_work refresh_dw
;
64 struct lb_stats_info info
[LB_TX_HASHTABLE_SIZE
];
69 struct bpf_prog __rcu
*fp
;
70 lb_select_tx_port_func_t __rcu
*select_tx_port_func
;
71 struct lb_pcpu_stats __percpu
*pcpu_stats
;
72 struct lb_priv_ex
*ex
; /* priv extension */
75 static struct lb_priv
*get_lb_priv(struct team
*team
)
77 return (struct lb_priv
*) &team
->mode_priv
;
81 struct lb_stats __percpu
*pcpu_stats
;
82 struct lb_stats_info stats_info
;
85 static struct lb_port_priv
*get_lb_port_priv(struct team_port
*port
)
87 return (struct lb_port_priv
*) &port
->mode_priv
;
90 #define LB_HTPM_PORT_BY_HASH(lp_priv, hash) \
91 (lb_priv)->ex->tx_hash_to_port_mapping[hash].port
93 #define LB_HTPM_OPT_INST_INFO_BY_HASH(lp_priv, hash) \
94 (lb_priv)->ex->tx_hash_to_port_mapping[hash].opt_inst_info
96 static void lb_tx_hash_to_port_mapping_null_port(struct team
*team
,
97 struct team_port
*port
)
99 struct lb_priv
*lb_priv
= get_lb_priv(team
);
100 bool changed
= false;
103 for (i
= 0; i
< LB_TX_HASHTABLE_SIZE
; i
++) {
104 struct lb_port_mapping
*pm
;
106 pm
= &lb_priv
->ex
->tx_hash_to_port_mapping
[i
];
107 if (rcu_access_pointer(pm
->port
) == port
) {
108 RCU_INIT_POINTER(pm
->port
, NULL
);
109 team_option_inst_set_change(pm
->opt_inst_info
);
114 team_options_change_check(team
);
117 /* Basic tx selection based solely by hash */
118 static struct team_port
*lb_hash_select_tx_port(struct team
*team
,
121 int port_index
= team_num_to_port_index(team
, hash
);
123 return team_get_port_by_index_rcu(team
, port_index
);
126 /* Hash to port mapping select tx port */
127 static struct team_port
*lb_htpm_select_tx_port(struct team
*team
,
130 struct lb_priv
*lb_priv
= get_lb_priv(team
);
131 struct team_port
*port
;
133 port
= rcu_dereference_bh(LB_HTPM_PORT_BY_HASH(lb_priv
, hash
));
136 /* If no valid port in the table, fall back to simple hash */
137 return lb_hash_select_tx_port(team
, hash
);
140 struct lb_select_tx_port
{
142 lb_select_tx_port_func_t
*func
;
145 static const struct lb_select_tx_port lb_select_tx_port_list
[] = {
148 .func
= lb_hash_select_tx_port
,
151 .name
= "hash_to_port_mapping",
152 .func
= lb_htpm_select_tx_port
,
155 #define LB_SELECT_TX_PORT_LIST_COUNT ARRAY_SIZE(lb_select_tx_port_list)
157 static char *lb_select_tx_port_get_name(lb_select_tx_port_func_t
*func
)
161 for (i
= 0; i
< LB_SELECT_TX_PORT_LIST_COUNT
; i
++) {
162 const struct lb_select_tx_port
*item
;
164 item
= &lb_select_tx_port_list
[i
];
165 if (item
->func
== func
)
171 static lb_select_tx_port_func_t
*lb_select_tx_port_get_func(const char *name
)
175 for (i
= 0; i
< LB_SELECT_TX_PORT_LIST_COUNT
; i
++) {
176 const struct lb_select_tx_port
*item
;
178 item
= &lb_select_tx_port_list
[i
];
179 if (!strcmp(item
->name
, name
))
185 static unsigned int lb_get_skb_hash(struct lb_priv
*lb_priv
,
192 fp
= rcu_dereference_bh(lb_priv
->fp
);
195 lhash
= bpf_prog_run(fp
, skb
);
197 return c
[0] ^ c
[1] ^ c
[2] ^ c
[3];
200 static void lb_update_tx_stats(unsigned int tx_bytes
, struct lb_priv
*lb_priv
,
201 struct lb_port_priv
*lb_port_priv
,
204 struct lb_pcpu_stats
*pcpu_stats
;
205 struct lb_stats
*port_stats
;
206 struct lb_stats
*hash_stats
;
208 pcpu_stats
= this_cpu_ptr(lb_priv
->pcpu_stats
);
209 port_stats
= this_cpu_ptr(lb_port_priv
->pcpu_stats
);
210 hash_stats
= &pcpu_stats
->hash_stats
[hash
];
211 u64_stats_update_begin(&pcpu_stats
->syncp
);
212 port_stats
->tx_bytes
+= tx_bytes
;
213 hash_stats
->tx_bytes
+= tx_bytes
;
214 u64_stats_update_end(&pcpu_stats
->syncp
);
217 static bool lb_transmit(struct team
*team
, struct sk_buff
*skb
)
219 struct lb_priv
*lb_priv
= get_lb_priv(team
);
220 lb_select_tx_port_func_t
*select_tx_port_func
;
221 struct team_port
*port
;
223 unsigned int tx_bytes
= skb
->len
;
225 hash
= lb_get_skb_hash(lb_priv
, skb
);
226 select_tx_port_func
= rcu_dereference_bh(lb_priv
->select_tx_port_func
);
227 port
= select_tx_port_func(team
, hash
);
230 if (team_dev_queue_xmit(team
, port
, skb
))
232 lb_update_tx_stats(tx_bytes
, lb_priv
, get_lb_port_priv(port
), hash
);
236 dev_kfree_skb_any(skb
);
240 static void lb_bpf_func_get(struct team
*team
, struct team_gsetter_ctx
*ctx
)
242 struct lb_priv
*lb_priv
= get_lb_priv(team
);
244 if (!lb_priv
->ex
->orig_fprog
) {
245 ctx
->data
.bin_val
.len
= 0;
246 ctx
->data
.bin_val
.ptr
= NULL
;
249 ctx
->data
.bin_val
.len
= lb_priv
->ex
->orig_fprog
->len
*
250 sizeof(struct sock_filter
);
251 ctx
->data
.bin_val
.ptr
= lb_priv
->ex
->orig_fprog
->filter
;
254 static int __fprog_create(struct sock_fprog_kern
**pfprog
, u32 data_len
,
257 struct sock_fprog_kern
*fprog
;
258 struct sock_filter
*filter
= (struct sock_filter
*) data
;
260 if (data_len
% sizeof(struct sock_filter
))
262 fprog
= kmalloc(sizeof(*fprog
), GFP_KERNEL
);
265 fprog
->filter
= kmemdup(filter
, data_len
, GFP_KERNEL
);
266 if (!fprog
->filter
) {
270 fprog
->len
= data_len
/ sizeof(struct sock_filter
);
275 static void __fprog_destroy(struct sock_fprog_kern
*fprog
)
277 kfree(fprog
->filter
);
281 static int lb_bpf_func_set(struct team
*team
, struct team_gsetter_ctx
*ctx
)
283 struct lb_priv
*lb_priv
= get_lb_priv(team
);
284 struct bpf_prog
*fp
= NULL
;
285 struct bpf_prog
*orig_fp
= NULL
;
286 struct sock_fprog_kern
*fprog
= NULL
;
289 if (ctx
->data
.bin_val
.len
) {
290 err
= __fprog_create(&fprog
, ctx
->data
.bin_val
.len
,
291 ctx
->data
.bin_val
.ptr
);
294 err
= bpf_prog_create(&fp
, fprog
);
296 __fprog_destroy(fprog
);
301 if (lb_priv
->ex
->orig_fprog
) {
302 /* Clear old filter data */
303 __fprog_destroy(lb_priv
->ex
->orig_fprog
);
304 orig_fp
= rcu_dereference_protected(lb_priv
->fp
,
305 lockdep_is_held(&team
->lock
));
308 rcu_assign_pointer(lb_priv
->fp
, fp
);
309 lb_priv
->ex
->orig_fprog
= fprog
;
313 bpf_prog_destroy(orig_fp
);
318 static void lb_bpf_func_free(struct team
*team
)
320 struct lb_priv
*lb_priv
= get_lb_priv(team
);
323 if (!lb_priv
->ex
->orig_fprog
)
326 __fprog_destroy(lb_priv
->ex
->orig_fprog
);
327 fp
= rcu_dereference_protected(lb_priv
->fp
,
328 lockdep_is_held(&team
->lock
));
329 bpf_prog_destroy(fp
);
332 static void lb_tx_method_get(struct team
*team
, struct team_gsetter_ctx
*ctx
)
334 struct lb_priv
*lb_priv
= get_lb_priv(team
);
335 lb_select_tx_port_func_t
*func
;
338 func
= rcu_dereference_protected(lb_priv
->select_tx_port_func
,
339 lockdep_is_held(&team
->lock
));
340 name
= lb_select_tx_port_get_name(func
);
342 ctx
->data
.str_val
= name
;
345 static int lb_tx_method_set(struct team
*team
, struct team_gsetter_ctx
*ctx
)
347 struct lb_priv
*lb_priv
= get_lb_priv(team
);
348 lb_select_tx_port_func_t
*func
;
350 func
= lb_select_tx_port_get_func(ctx
->data
.str_val
);
353 rcu_assign_pointer(lb_priv
->select_tx_port_func
, func
);
357 static void lb_tx_hash_to_port_mapping_init(struct team
*team
,
358 struct team_option_inst_info
*info
)
360 struct lb_priv
*lb_priv
= get_lb_priv(team
);
361 unsigned char hash
= info
->array_index
;
363 LB_HTPM_OPT_INST_INFO_BY_HASH(lb_priv
, hash
) = info
;
366 static void lb_tx_hash_to_port_mapping_get(struct team
*team
,
367 struct team_gsetter_ctx
*ctx
)
369 struct lb_priv
*lb_priv
= get_lb_priv(team
);
370 struct team_port
*port
;
371 unsigned char hash
= ctx
->info
->array_index
;
373 port
= LB_HTPM_PORT_BY_HASH(lb_priv
, hash
);
374 ctx
->data
.u32_val
= port
? port
->dev
->ifindex
: 0;
377 static int lb_tx_hash_to_port_mapping_set(struct team
*team
,
378 struct team_gsetter_ctx
*ctx
)
380 struct lb_priv
*lb_priv
= get_lb_priv(team
);
381 struct team_port
*port
;
382 unsigned char hash
= ctx
->info
->array_index
;
384 list_for_each_entry(port
, &team
->port_list
, list
) {
385 if (ctx
->data
.u32_val
== port
->dev
->ifindex
&&
386 team_port_enabled(port
)) {
387 rcu_assign_pointer(LB_HTPM_PORT_BY_HASH(lb_priv
, hash
),
395 static void lb_hash_stats_init(struct team
*team
,
396 struct team_option_inst_info
*info
)
398 struct lb_priv
*lb_priv
= get_lb_priv(team
);
399 unsigned char hash
= info
->array_index
;
401 lb_priv
->ex
->stats
.info
[hash
].opt_inst_info
= info
;
404 static void lb_hash_stats_get(struct team
*team
, struct team_gsetter_ctx
*ctx
)
406 struct lb_priv
*lb_priv
= get_lb_priv(team
);
407 unsigned char hash
= ctx
->info
->array_index
;
409 ctx
->data
.bin_val
.ptr
= &lb_priv
->ex
->stats
.info
[hash
].stats
;
410 ctx
->data
.bin_val
.len
= sizeof(struct lb_stats
);
413 static void lb_port_stats_init(struct team
*team
,
414 struct team_option_inst_info
*info
)
416 struct team_port
*port
= info
->port
;
417 struct lb_port_priv
*lb_port_priv
= get_lb_port_priv(port
);
419 lb_port_priv
->stats_info
.opt_inst_info
= info
;
422 static void lb_port_stats_get(struct team
*team
, struct team_gsetter_ctx
*ctx
)
424 struct team_port
*port
= ctx
->info
->port
;
425 struct lb_port_priv
*lb_port_priv
= get_lb_port_priv(port
);
427 ctx
->data
.bin_val
.ptr
= &lb_port_priv
->stats_info
.stats
;
428 ctx
->data
.bin_val
.len
= sizeof(struct lb_stats
);
431 static void __lb_stats_info_refresh_prepare(struct lb_stats_info
*s_info
)
433 memcpy(&s_info
->last_stats
, &s_info
->stats
, sizeof(struct lb_stats
));
434 memset(&s_info
->stats
, 0, sizeof(struct lb_stats
));
437 static bool __lb_stats_info_refresh_check(struct lb_stats_info
*s_info
,
440 if (memcmp(&s_info
->last_stats
, &s_info
->stats
,
441 sizeof(struct lb_stats
))) {
442 team_option_inst_set_change(s_info
->opt_inst_info
);
448 static void __lb_one_cpu_stats_add(struct lb_stats
*acc_stats
,
449 struct lb_stats
*cpu_stats
,
450 struct u64_stats_sync
*syncp
)
456 start
= u64_stats_fetch_begin(syncp
);
457 tmp
.tx_bytes
= cpu_stats
->tx_bytes
;
458 } while (u64_stats_fetch_retry(syncp
, start
));
459 acc_stats
->tx_bytes
+= tmp
.tx_bytes
;
462 static void lb_stats_refresh(struct work_struct
*work
)
465 struct lb_priv
*lb_priv
;
466 struct lb_priv_ex
*lb_priv_ex
;
467 struct lb_pcpu_stats
*pcpu_stats
;
468 struct lb_stats
*stats
;
469 struct lb_stats_info
*s_info
;
470 struct team_port
*port
;
471 bool changed
= false;
475 lb_priv_ex
= container_of(work
, struct lb_priv_ex
,
476 stats
.refresh_dw
.work
);
478 team
= lb_priv_ex
->team
;
479 lb_priv
= get_lb_priv(team
);
481 if (!mutex_trylock(&team
->lock
)) {
482 schedule_delayed_work(&lb_priv_ex
->stats
.refresh_dw
, 0);
486 for (j
= 0; j
< LB_TX_HASHTABLE_SIZE
; j
++) {
487 s_info
= &lb_priv
->ex
->stats
.info
[j
];
488 __lb_stats_info_refresh_prepare(s_info
);
489 for_each_possible_cpu(i
) {
490 pcpu_stats
= per_cpu_ptr(lb_priv
->pcpu_stats
, i
);
491 stats
= &pcpu_stats
->hash_stats
[j
];
492 __lb_one_cpu_stats_add(&s_info
->stats
, stats
,
495 changed
|= __lb_stats_info_refresh_check(s_info
, team
);
498 list_for_each_entry(port
, &team
->port_list
, list
) {
499 struct lb_port_priv
*lb_port_priv
= get_lb_port_priv(port
);
501 s_info
= &lb_port_priv
->stats_info
;
502 __lb_stats_info_refresh_prepare(s_info
);
503 for_each_possible_cpu(i
) {
504 pcpu_stats
= per_cpu_ptr(lb_priv
->pcpu_stats
, i
);
505 stats
= per_cpu_ptr(lb_port_priv
->pcpu_stats
, i
);
506 __lb_one_cpu_stats_add(&s_info
->stats
, stats
,
509 changed
|= __lb_stats_info_refresh_check(s_info
, team
);
513 team_options_change_check(team
);
515 schedule_delayed_work(&lb_priv_ex
->stats
.refresh_dw
,
516 (lb_priv_ex
->stats
.refresh_interval
* HZ
) / 10);
518 mutex_unlock(&team
->lock
);
521 static void lb_stats_refresh_interval_get(struct team
*team
,
522 struct team_gsetter_ctx
*ctx
)
524 struct lb_priv
*lb_priv
= get_lb_priv(team
);
526 ctx
->data
.u32_val
= lb_priv
->ex
->stats
.refresh_interval
;
529 static int lb_stats_refresh_interval_set(struct team
*team
,
530 struct team_gsetter_ctx
*ctx
)
532 struct lb_priv
*lb_priv
= get_lb_priv(team
);
533 unsigned int interval
;
535 interval
= ctx
->data
.u32_val
;
536 if (lb_priv
->ex
->stats
.refresh_interval
== interval
)
538 lb_priv
->ex
->stats
.refresh_interval
= interval
;
540 schedule_delayed_work(&lb_priv
->ex
->stats
.refresh_dw
, 0);
542 cancel_delayed_work(&lb_priv
->ex
->stats
.refresh_dw
);
546 static const struct team_option lb_options
[] = {
548 .name
= "bpf_hash_func",
549 .type
= TEAM_OPTION_TYPE_BINARY
,
550 .getter
= lb_bpf_func_get
,
551 .setter
= lb_bpf_func_set
,
554 .name
= "lb_tx_method",
555 .type
= TEAM_OPTION_TYPE_STRING
,
556 .getter
= lb_tx_method_get
,
557 .setter
= lb_tx_method_set
,
560 .name
= "lb_tx_hash_to_port_mapping",
561 .array_size
= LB_TX_HASHTABLE_SIZE
,
562 .type
= TEAM_OPTION_TYPE_U32
,
563 .init
= lb_tx_hash_to_port_mapping_init
,
564 .getter
= lb_tx_hash_to_port_mapping_get
,
565 .setter
= lb_tx_hash_to_port_mapping_set
,
568 .name
= "lb_hash_stats",
569 .array_size
= LB_TX_HASHTABLE_SIZE
,
570 .type
= TEAM_OPTION_TYPE_BINARY
,
571 .init
= lb_hash_stats_init
,
572 .getter
= lb_hash_stats_get
,
575 .name
= "lb_port_stats",
577 .type
= TEAM_OPTION_TYPE_BINARY
,
578 .init
= lb_port_stats_init
,
579 .getter
= lb_port_stats_get
,
582 .name
= "lb_stats_refresh_interval",
583 .type
= TEAM_OPTION_TYPE_U32
,
584 .getter
= lb_stats_refresh_interval_get
,
585 .setter
= lb_stats_refresh_interval_set
,
589 static int lb_init(struct team
*team
)
591 struct lb_priv
*lb_priv
= get_lb_priv(team
);
592 lb_select_tx_port_func_t
*func
;
595 /* set default tx port selector */
596 func
= lb_select_tx_port_get_func("hash");
598 rcu_assign_pointer(lb_priv
->select_tx_port_func
, func
);
600 lb_priv
->ex
= kzalloc(sizeof(*lb_priv
->ex
), GFP_KERNEL
);
603 lb_priv
->ex
->team
= team
;
605 lb_priv
->pcpu_stats
= alloc_percpu(struct lb_pcpu_stats
);
606 if (!lb_priv
->pcpu_stats
) {
608 goto err_alloc_pcpu_stats
;
611 for_each_possible_cpu(i
) {
612 struct lb_pcpu_stats
*team_lb_stats
;
613 team_lb_stats
= per_cpu_ptr(lb_priv
->pcpu_stats
, i
);
614 u64_stats_init(&team_lb_stats
->syncp
);
618 INIT_DELAYED_WORK(&lb_priv
->ex
->stats
.refresh_dw
, lb_stats_refresh
);
620 err
= team_options_register(team
, lb_options
, ARRAY_SIZE(lb_options
));
622 goto err_options_register
;
625 err_options_register
:
626 free_percpu(lb_priv
->pcpu_stats
);
627 err_alloc_pcpu_stats
:
632 static void lb_exit(struct team
*team
)
634 struct lb_priv
*lb_priv
= get_lb_priv(team
);
636 team_options_unregister(team
, lb_options
,
637 ARRAY_SIZE(lb_options
));
638 lb_bpf_func_free(team
);
639 cancel_delayed_work_sync(&lb_priv
->ex
->stats
.refresh_dw
);
640 free_percpu(lb_priv
->pcpu_stats
);
644 static int lb_port_enter(struct team
*team
, struct team_port
*port
)
646 struct lb_port_priv
*lb_port_priv
= get_lb_port_priv(port
);
648 lb_port_priv
->pcpu_stats
= alloc_percpu(struct lb_stats
);
649 if (!lb_port_priv
->pcpu_stats
)
654 static void lb_port_leave(struct team
*team
, struct team_port
*port
)
656 struct lb_port_priv
*lb_port_priv
= get_lb_port_priv(port
);
658 free_percpu(lb_port_priv
->pcpu_stats
);
661 static void lb_port_disabled(struct team
*team
, struct team_port
*port
)
663 lb_tx_hash_to_port_mapping_null_port(team
, port
);
666 static const struct team_mode_ops lb_mode_ops
= {
669 .port_enter
= lb_port_enter
,
670 .port_leave
= lb_port_leave
,
671 .port_disabled
= lb_port_disabled
,
672 .receive
= lb_receive
,
673 .transmit
= lb_transmit
,
676 static const struct team_mode lb_mode
= {
677 .kind
= "loadbalance",
678 .owner
= THIS_MODULE
,
679 .priv_size
= sizeof(struct lb_priv
),
680 .port_priv_size
= sizeof(struct lb_port_priv
),
682 .lag_tx_type
= NETDEV_LAG_TX_TYPE_HASH
,
685 static int __init
lb_init_module(void)
687 return team_mode_register(&lb_mode
);
690 static void __exit
lb_cleanup_module(void)
692 team_mode_unregister(&lb_mode
);
695 module_init(lb_init_module
);
696 module_exit(lb_cleanup_module
);
698 MODULE_LICENSE("GPL v2");
699 MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>");
700 MODULE_DESCRIPTION("Load-balancing mode for team");
701 MODULE_ALIAS_TEAM_MODE("loadbalance");