2 * drivers/net/team/team_mode_loadbalance.c - Load-balancing mode for team
3 * Copyright (c) 2012 Jiri Pirko <jpirko@redhat.com>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
11 #include <linux/kernel.h>
12 #include <linux/types.h>
13 #include <linux/module.h>
14 #include <linux/init.h>
15 #include <linux/errno.h>
16 #include <linux/netdevice.h>
17 #include <linux/filter.h>
18 #include <linux/if_team.h>
22 typedef struct team_port
*lb_select_tx_port_func_t(struct team
*,
27 #define LB_TX_HASHTABLE_SIZE 256 /* hash is a char */
33 struct lb_pcpu_stats
{
34 struct lb_stats hash_stats
[LB_TX_HASHTABLE_SIZE
];
35 struct u64_stats_sync syncp
;
38 struct lb_stats_info
{
39 struct lb_stats stats
;
40 struct lb_stats last_stats
;
41 struct team_option_inst_info
*opt_inst_info
;
44 struct lb_port_mapping
{
45 struct team_port __rcu
*port
;
46 struct team_option_inst_info
*opt_inst_info
;
51 struct lb_port_mapping tx_hash_to_port_mapping
[LB_TX_HASHTABLE_SIZE
];
52 struct sock_fprog
*orig_fprog
;
54 unsigned int refresh_interval
; /* in tenths of second */
55 struct delayed_work refresh_dw
;
56 struct lb_stats_info info
[LB_TX_HASHTABLE_SIZE
];
61 struct sk_filter __rcu
*fp
;
62 lb_select_tx_port_func_t __rcu
*select_tx_port_func
;
63 struct lb_pcpu_stats __percpu
*pcpu_stats
;
64 struct lb_priv_ex
*ex
; /* priv extension */
67 static struct lb_priv
*get_lb_priv(struct team
*team
)
69 return (struct lb_priv
*) &team
->mode_priv
;
73 struct lb_stats __percpu
*pcpu_stats
;
74 struct lb_stats_info stats_info
;
77 static struct lb_port_priv
*get_lb_port_priv(struct team_port
*port
)
79 return (struct lb_port_priv
*) &port
->mode_priv
;
82 #define LB_HTPM_PORT_BY_HASH(lp_priv, hash) \
83 (lb_priv)->ex->tx_hash_to_port_mapping[hash].port
85 #define LB_HTPM_OPT_INST_INFO_BY_HASH(lp_priv, hash) \
86 (lb_priv)->ex->tx_hash_to_port_mapping[hash].opt_inst_info
88 static void lb_tx_hash_to_port_mapping_null_port(struct team
*team
,
89 struct team_port
*port
)
91 struct lb_priv
*lb_priv
= get_lb_priv(team
);
95 for (i
= 0; i
< LB_TX_HASHTABLE_SIZE
; i
++) {
96 struct lb_port_mapping
*pm
;
98 pm
= &lb_priv
->ex
->tx_hash_to_port_mapping
[i
];
99 if (rcu_access_pointer(pm
->port
) == port
) {
100 RCU_INIT_POINTER(pm
->port
, NULL
);
101 team_option_inst_set_change(pm
->opt_inst_info
);
106 team_options_change_check(team
);
109 /* Basic tx selection based solely by hash */
110 static struct team_port
*lb_hash_select_tx_port(struct team
*team
,
111 struct lb_priv
*lb_priv
,
117 port_index
= hash
% team
->en_port_count
;
118 return team_get_port_by_index_rcu(team
, port_index
);
121 /* Hash to port mapping select tx port */
122 static struct team_port
*lb_htpm_select_tx_port(struct team
*team
,
123 struct lb_priv
*lb_priv
,
127 return rcu_dereference_bh(LB_HTPM_PORT_BY_HASH(lb_priv
, hash
));
130 struct lb_select_tx_port
{
132 lb_select_tx_port_func_t
*func
;
135 static const struct lb_select_tx_port lb_select_tx_port_list
[] = {
138 .func
= lb_hash_select_tx_port
,
141 .name
= "hash_to_port_mapping",
142 .func
= lb_htpm_select_tx_port
,
145 #define LB_SELECT_TX_PORT_LIST_COUNT ARRAY_SIZE(lb_select_tx_port_list)
147 static char *lb_select_tx_port_get_name(lb_select_tx_port_func_t
*func
)
151 for (i
= 0; i
< LB_SELECT_TX_PORT_LIST_COUNT
; i
++) {
152 const struct lb_select_tx_port
*item
;
154 item
= &lb_select_tx_port_list
[i
];
155 if (item
->func
== func
)
161 static lb_select_tx_port_func_t
*lb_select_tx_port_get_func(const char *name
)
165 for (i
= 0; i
< LB_SELECT_TX_PORT_LIST_COUNT
; i
++) {
166 const struct lb_select_tx_port
*item
;
168 item
= &lb_select_tx_port_list
[i
];
169 if (!strcmp(item
->name
, name
))
175 static unsigned int lb_get_skb_hash(struct lb_priv
*lb_priv
,
178 struct sk_filter
*fp
;
182 fp
= rcu_dereference_bh(lb_priv
->fp
);
185 lhash
= SK_RUN_FILTER(fp
, skb
);
187 return c
[0] ^ c
[1] ^ c
[2] ^ c
[3];
190 static void lb_update_tx_stats(unsigned int tx_bytes
, struct lb_priv
*lb_priv
,
191 struct lb_port_priv
*lb_port_priv
,
194 struct lb_pcpu_stats
*pcpu_stats
;
195 struct lb_stats
*port_stats
;
196 struct lb_stats
*hash_stats
;
198 pcpu_stats
= this_cpu_ptr(lb_priv
->pcpu_stats
);
199 port_stats
= this_cpu_ptr(lb_port_priv
->pcpu_stats
);
200 hash_stats
= &pcpu_stats
->hash_stats
[hash
];
201 u64_stats_update_begin(&pcpu_stats
->syncp
);
202 port_stats
->tx_bytes
+= tx_bytes
;
203 hash_stats
->tx_bytes
+= tx_bytes
;
204 u64_stats_update_end(&pcpu_stats
->syncp
);
207 static bool lb_transmit(struct team
*team
, struct sk_buff
*skb
)
209 struct lb_priv
*lb_priv
= get_lb_priv(team
);
210 lb_select_tx_port_func_t
*select_tx_port_func
;
211 struct team_port
*port
;
213 unsigned int tx_bytes
= skb
->len
;
215 hash
= lb_get_skb_hash(lb_priv
, skb
);
216 select_tx_port_func
= rcu_dereference_bh(lb_priv
->select_tx_port_func
);
217 port
= select_tx_port_func(team
, lb_priv
, skb
, hash
);
220 if (team_dev_queue_xmit(team
, port
, skb
))
222 lb_update_tx_stats(tx_bytes
, lb_priv
, get_lb_port_priv(port
), hash
);
226 dev_kfree_skb_any(skb
);
230 static int lb_bpf_func_get(struct team
*team
, struct team_gsetter_ctx
*ctx
)
232 struct lb_priv
*lb_priv
= get_lb_priv(team
);
234 if (!lb_priv
->ex
->orig_fprog
) {
235 ctx
->data
.bin_val
.len
= 0;
236 ctx
->data
.bin_val
.ptr
= NULL
;
239 ctx
->data
.bin_val
.len
= lb_priv
->ex
->orig_fprog
->len
*
240 sizeof(struct sock_filter
);
241 ctx
->data
.bin_val
.ptr
= lb_priv
->ex
->orig_fprog
->filter
;
245 static int __fprog_create(struct sock_fprog
**pfprog
, u32 data_len
,
248 struct sock_fprog
*fprog
;
249 struct sock_filter
*filter
= (struct sock_filter
*) data
;
251 if (data_len
% sizeof(struct sock_filter
))
253 fprog
= kmalloc(sizeof(struct sock_fprog
), GFP_KERNEL
);
256 fprog
->filter
= kmemdup(filter
, data_len
, GFP_KERNEL
);
257 if (!fprog
->filter
) {
261 fprog
->len
= data_len
/ sizeof(struct sock_filter
);
266 static void __fprog_destroy(struct sock_fprog
*fprog
)
268 kfree(fprog
->filter
);
272 static int lb_bpf_func_set(struct team
*team
, struct team_gsetter_ctx
*ctx
)
274 struct lb_priv
*lb_priv
= get_lb_priv(team
);
275 struct sk_filter
*fp
= NULL
;
276 struct sk_filter
*orig_fp
;
277 struct sock_fprog
*fprog
= NULL
;
280 if (ctx
->data
.bin_val
.len
) {
281 err
= __fprog_create(&fprog
, ctx
->data
.bin_val
.len
,
282 ctx
->data
.bin_val
.ptr
);
285 err
= sk_unattached_filter_create(&fp
, fprog
);
287 __fprog_destroy(fprog
);
292 if (lb_priv
->ex
->orig_fprog
) {
293 /* Clear old filter data */
294 __fprog_destroy(lb_priv
->ex
->orig_fprog
);
295 orig_fp
= rcu_dereference_protected(lb_priv
->fp
,
296 lockdep_is_held(&team
->lock
));
297 sk_unattached_filter_destroy(orig_fp
);
300 rcu_assign_pointer(lb_priv
->fp
, fp
);
301 lb_priv
->ex
->orig_fprog
= fprog
;
305 static int lb_tx_method_get(struct team
*team
, struct team_gsetter_ctx
*ctx
)
307 struct lb_priv
*lb_priv
= get_lb_priv(team
);
308 lb_select_tx_port_func_t
*func
;
311 func
= rcu_dereference_protected(lb_priv
->select_tx_port_func
,
312 lockdep_is_held(&team
->lock
));
313 name
= lb_select_tx_port_get_name(func
);
315 ctx
->data
.str_val
= name
;
319 static int lb_tx_method_set(struct team
*team
, struct team_gsetter_ctx
*ctx
)
321 struct lb_priv
*lb_priv
= get_lb_priv(team
);
322 lb_select_tx_port_func_t
*func
;
324 func
= lb_select_tx_port_get_func(ctx
->data
.str_val
);
327 rcu_assign_pointer(lb_priv
->select_tx_port_func
, func
);
331 static int lb_tx_hash_to_port_mapping_init(struct team
*team
,
332 struct team_option_inst_info
*info
)
334 struct lb_priv
*lb_priv
= get_lb_priv(team
);
335 unsigned char hash
= info
->array_index
;
337 LB_HTPM_OPT_INST_INFO_BY_HASH(lb_priv
, hash
) = info
;
341 static int lb_tx_hash_to_port_mapping_get(struct team
*team
,
342 struct team_gsetter_ctx
*ctx
)
344 struct lb_priv
*lb_priv
= get_lb_priv(team
);
345 struct team_port
*port
;
346 unsigned char hash
= ctx
->info
->array_index
;
348 port
= LB_HTPM_PORT_BY_HASH(lb_priv
, hash
);
349 ctx
->data
.u32_val
= port
? port
->dev
->ifindex
: 0;
353 static int lb_tx_hash_to_port_mapping_set(struct team
*team
,
354 struct team_gsetter_ctx
*ctx
)
356 struct lb_priv
*lb_priv
= get_lb_priv(team
);
357 struct team_port
*port
;
358 unsigned char hash
= ctx
->info
->array_index
;
360 list_for_each_entry(port
, &team
->port_list
, list
) {
361 if (ctx
->data
.u32_val
== port
->dev
->ifindex
&&
362 team_port_enabled(port
)) {
363 rcu_assign_pointer(LB_HTPM_PORT_BY_HASH(lb_priv
, hash
),
371 static int lb_hash_stats_init(struct team
*team
,
372 struct team_option_inst_info
*info
)
374 struct lb_priv
*lb_priv
= get_lb_priv(team
);
375 unsigned char hash
= info
->array_index
;
377 lb_priv
->ex
->stats
.info
[hash
].opt_inst_info
= info
;
381 static int lb_hash_stats_get(struct team
*team
, struct team_gsetter_ctx
*ctx
)
383 struct lb_priv
*lb_priv
= get_lb_priv(team
);
384 unsigned char hash
= ctx
->info
->array_index
;
386 ctx
->data
.bin_val
.ptr
= &lb_priv
->ex
->stats
.info
[hash
].stats
;
387 ctx
->data
.bin_val
.len
= sizeof(struct lb_stats
);
391 static int lb_port_stats_init(struct team
*team
,
392 struct team_option_inst_info
*info
)
394 struct team_port
*port
= info
->port
;
395 struct lb_port_priv
*lb_port_priv
= get_lb_port_priv(port
);
397 lb_port_priv
->stats_info
.opt_inst_info
= info
;
401 static int lb_port_stats_get(struct team
*team
, struct team_gsetter_ctx
*ctx
)
403 struct team_port
*port
= ctx
->info
->port
;
404 struct lb_port_priv
*lb_port_priv
= get_lb_port_priv(port
);
406 ctx
->data
.bin_val
.ptr
= &lb_port_priv
->stats_info
.stats
;
407 ctx
->data
.bin_val
.len
= sizeof(struct lb_stats
);
411 static void __lb_stats_info_refresh_prepare(struct lb_stats_info
*s_info
)
413 memcpy(&s_info
->last_stats
, &s_info
->stats
, sizeof(struct lb_stats
));
414 memset(&s_info
->stats
, 0, sizeof(struct lb_stats
));
417 static bool __lb_stats_info_refresh_check(struct lb_stats_info
*s_info
,
420 if (memcmp(&s_info
->last_stats
, &s_info
->stats
,
421 sizeof(struct lb_stats
))) {
422 team_option_inst_set_change(s_info
->opt_inst_info
);
428 static void __lb_one_cpu_stats_add(struct lb_stats
*acc_stats
,
429 struct lb_stats
*cpu_stats
,
430 struct u64_stats_sync
*syncp
)
436 start
= u64_stats_fetch_begin_bh(syncp
);
437 tmp
.tx_bytes
= cpu_stats
->tx_bytes
;
438 } while (u64_stats_fetch_retry_bh(syncp
, start
));
439 acc_stats
->tx_bytes
+= tmp
.tx_bytes
;
442 static void lb_stats_refresh(struct work_struct
*work
)
445 struct lb_priv
*lb_priv
;
446 struct lb_priv_ex
*lb_priv_ex
;
447 struct lb_pcpu_stats
*pcpu_stats
;
448 struct lb_stats
*stats
;
449 struct lb_stats_info
*s_info
;
450 struct team_port
*port
;
451 bool changed
= false;
455 lb_priv_ex
= container_of(work
, struct lb_priv_ex
,
456 stats
.refresh_dw
.work
);
458 team
= lb_priv_ex
->team
;
459 lb_priv
= get_lb_priv(team
);
461 if (!mutex_trylock(&team
->lock
)) {
462 schedule_delayed_work(&lb_priv_ex
->stats
.refresh_dw
, 0);
466 for (j
= 0; j
< LB_TX_HASHTABLE_SIZE
; j
++) {
467 s_info
= &lb_priv
->ex
->stats
.info
[j
];
468 __lb_stats_info_refresh_prepare(s_info
);
469 for_each_possible_cpu(i
) {
470 pcpu_stats
= per_cpu_ptr(lb_priv
->pcpu_stats
, i
);
471 stats
= &pcpu_stats
->hash_stats
[j
];
472 __lb_one_cpu_stats_add(&s_info
->stats
, stats
,
475 changed
|= __lb_stats_info_refresh_check(s_info
, team
);
478 list_for_each_entry(port
, &team
->port_list
, list
) {
479 struct lb_port_priv
*lb_port_priv
= get_lb_port_priv(port
);
481 s_info
= &lb_port_priv
->stats_info
;
482 __lb_stats_info_refresh_prepare(s_info
);
483 for_each_possible_cpu(i
) {
484 pcpu_stats
= per_cpu_ptr(lb_priv
->pcpu_stats
, i
);
485 stats
= per_cpu_ptr(lb_port_priv
->pcpu_stats
, i
);
486 __lb_one_cpu_stats_add(&s_info
->stats
, stats
,
489 changed
|= __lb_stats_info_refresh_check(s_info
, team
);
493 team_options_change_check(team
);
495 schedule_delayed_work(&lb_priv_ex
->stats
.refresh_dw
,
496 (lb_priv_ex
->stats
.refresh_interval
* HZ
) / 10);
498 mutex_unlock(&team
->lock
);
501 static int lb_stats_refresh_interval_get(struct team
*team
,
502 struct team_gsetter_ctx
*ctx
)
504 struct lb_priv
*lb_priv
= get_lb_priv(team
);
506 ctx
->data
.u32_val
= lb_priv
->ex
->stats
.refresh_interval
;
510 static int lb_stats_refresh_interval_set(struct team
*team
,
511 struct team_gsetter_ctx
*ctx
)
513 struct lb_priv
*lb_priv
= get_lb_priv(team
);
514 unsigned int interval
;
516 interval
= ctx
->data
.u32_val
;
517 if (lb_priv
->ex
->stats
.refresh_interval
== interval
)
519 lb_priv
->ex
->stats
.refresh_interval
= interval
;
521 schedule_delayed_work(&lb_priv
->ex
->stats
.refresh_dw
, 0);
523 cancel_delayed_work(&lb_priv
->ex
->stats
.refresh_dw
);
527 static const struct team_option lb_options
[] = {
529 .name
= "bpf_hash_func",
530 .type
= TEAM_OPTION_TYPE_BINARY
,
531 .getter
= lb_bpf_func_get
,
532 .setter
= lb_bpf_func_set
,
535 .name
= "lb_tx_method",
536 .type
= TEAM_OPTION_TYPE_STRING
,
537 .getter
= lb_tx_method_get
,
538 .setter
= lb_tx_method_set
,
541 .name
= "lb_tx_hash_to_port_mapping",
542 .array_size
= LB_TX_HASHTABLE_SIZE
,
543 .type
= TEAM_OPTION_TYPE_U32
,
544 .init
= lb_tx_hash_to_port_mapping_init
,
545 .getter
= lb_tx_hash_to_port_mapping_get
,
546 .setter
= lb_tx_hash_to_port_mapping_set
,
549 .name
= "lb_hash_stats",
550 .array_size
= LB_TX_HASHTABLE_SIZE
,
551 .type
= TEAM_OPTION_TYPE_BINARY
,
552 .init
= lb_hash_stats_init
,
553 .getter
= lb_hash_stats_get
,
556 .name
= "lb_port_stats",
558 .type
= TEAM_OPTION_TYPE_BINARY
,
559 .init
= lb_port_stats_init
,
560 .getter
= lb_port_stats_get
,
563 .name
= "lb_stats_refresh_interval",
564 .type
= TEAM_OPTION_TYPE_U32
,
565 .getter
= lb_stats_refresh_interval_get
,
566 .setter
= lb_stats_refresh_interval_set
,
570 static int lb_init(struct team
*team
)
572 struct lb_priv
*lb_priv
= get_lb_priv(team
);
573 lb_select_tx_port_func_t
*func
;
576 /* set default tx port selector */
577 func
= lb_select_tx_port_get_func("hash");
579 rcu_assign_pointer(lb_priv
->select_tx_port_func
, func
);
581 lb_priv
->ex
= kzalloc(sizeof(*lb_priv
->ex
), GFP_KERNEL
);
584 lb_priv
->ex
->team
= team
;
586 lb_priv
->pcpu_stats
= alloc_percpu(struct lb_pcpu_stats
);
587 if (!lb_priv
->pcpu_stats
) {
589 goto err_alloc_pcpu_stats
;
592 INIT_DELAYED_WORK(&lb_priv
->ex
->stats
.refresh_dw
, lb_stats_refresh
);
594 err
= team_options_register(team
, lb_options
, ARRAY_SIZE(lb_options
));
596 goto err_options_register
;
599 err_options_register
:
600 free_percpu(lb_priv
->pcpu_stats
);
601 err_alloc_pcpu_stats
:
606 static void lb_exit(struct team
*team
)
608 struct lb_priv
*lb_priv
= get_lb_priv(team
);
610 team_options_unregister(team
, lb_options
,
611 ARRAY_SIZE(lb_options
));
612 cancel_delayed_work_sync(&lb_priv
->ex
->stats
.refresh_dw
);
613 free_percpu(lb_priv
->pcpu_stats
);
617 static int lb_port_enter(struct team
*team
, struct team_port
*port
)
619 struct lb_port_priv
*lb_port_priv
= get_lb_port_priv(port
);
621 lb_port_priv
->pcpu_stats
= alloc_percpu(struct lb_stats
);
622 if (!lb_port_priv
->pcpu_stats
)
627 static void lb_port_leave(struct team
*team
, struct team_port
*port
)
629 struct lb_port_priv
*lb_port_priv
= get_lb_port_priv(port
);
631 free_percpu(lb_port_priv
->pcpu_stats
);
634 static void lb_port_disabled(struct team
*team
, struct team_port
*port
)
636 lb_tx_hash_to_port_mapping_null_port(team
, port
);
639 static const struct team_mode_ops lb_mode_ops
= {
642 .port_enter
= lb_port_enter
,
643 .port_leave
= lb_port_leave
,
644 .port_disabled
= lb_port_disabled
,
645 .transmit
= lb_transmit
,
648 static const struct team_mode lb_mode
= {
649 .kind
= "loadbalance",
650 .owner
= THIS_MODULE
,
651 .priv_size
= sizeof(struct lb_priv
),
652 .port_priv_size
= sizeof(struct lb_port_priv
),
656 static int __init
lb_init_module(void)
658 return team_mode_register(&lb_mode
);
661 static void __exit
lb_cleanup_module(void)
663 team_mode_unregister(&lb_mode
);
666 module_init(lb_init_module
);
667 module_exit(lb_cleanup_module
);
669 MODULE_LICENSE("GPL v2");
670 MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>");
671 MODULE_DESCRIPTION("Load-balancing mode for team");
672 MODULE_ALIAS("team-mode-loadbalance");