2 * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3 * Copyright (c) 2016-2017 Mellanox Technologies. All rights reserved.
4 * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5 * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6 * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7 * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the names of the copyright holders nor the names of its
18 * contributors may be used to endorse or promote products derived from
19 * this software without specific prior written permission.
21 * Alternatively, this software may be distributed under the terms of the
22 * GNU General Public License ("GPL") version 2 as published by the Free
23 * Software Foundation.
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
38 #include <linux/kernel.h>
39 #include <linux/types.h>
40 #include <linux/rhashtable.h>
41 #include <linux/bitops.h>
42 #include <linux/in6.h>
43 #include <linux/notifier.h>
44 #include <linux/inetdevice.h>
45 #include <linux/netdevice.h>
46 #include <linux/if_bridge.h>
47 #include <linux/socket.h>
48 #include <linux/route.h>
49 #include <linux/gcd.h>
50 #include <linux/random.h>
51 #include <net/netevent.h>
52 #include <net/neighbour.h>
54 #include <net/ip_fib.h>
55 #include <net/ip6_fib.h>
56 #include <net/fib_rules.h>
57 #include <net/ip_tunnels.h>
58 #include <net/l3mdev.h>
59 #include <net/addrconf.h>
60 #include <net/ndisc.h>
62 #include <net/fib_notifier.h>
67 #include "spectrum_cnt.h"
68 #include "spectrum_dpipe.h"
69 #include "spectrum_ipip.h"
70 #include "spectrum_mr.h"
71 #include "spectrum_mr_tcam.h"
72 #include "spectrum_router.h"
76 struct mlxsw_sp_lpm_tree
;
77 struct mlxsw_sp_rif_ops
;
79 struct mlxsw_sp_router
{
80 struct mlxsw_sp
*mlxsw_sp
;
81 struct mlxsw_sp_rif
**rifs
;
82 struct mlxsw_sp_vr
*vrs
;
83 struct rhashtable neigh_ht
;
84 struct rhashtable nexthop_group_ht
;
85 struct rhashtable nexthop_ht
;
86 struct list_head nexthop_list
;
88 /* One tree for each protocol: IPv4 and IPv6 */
89 struct mlxsw_sp_lpm_tree
*proto_trees
[2];
90 struct mlxsw_sp_lpm_tree
*trees
;
91 unsigned int tree_count
;
94 struct delayed_work dw
;
95 unsigned long interval
; /* ms */
97 struct delayed_work nexthop_probe_dw
;
98 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
99 struct list_head nexthop_neighs_list
;
100 struct list_head ipip_list
;
102 struct notifier_block fib_nb
;
103 struct notifier_block netevent_nb
;
104 const struct mlxsw_sp_rif_ops
**rif_ops_arr
;
105 const struct mlxsw_sp_ipip_ops
**ipip_ops_arr
;
108 struct mlxsw_sp_rif
{
109 struct list_head nexthop_list
;
110 struct list_head neigh_list
;
111 struct net_device
*dev
;
112 struct mlxsw_sp_fid
*fid
;
113 unsigned char addr
[ETH_ALEN
];
117 const struct mlxsw_sp_rif_ops
*ops
;
118 struct mlxsw_sp
*mlxsw_sp
;
120 unsigned int counter_ingress
;
121 bool counter_ingress_valid
;
122 unsigned int counter_egress
;
123 bool counter_egress_valid
;
126 struct mlxsw_sp_rif_params
{
127 struct net_device
*dev
;
136 struct mlxsw_sp_rif_subport
{
137 struct mlxsw_sp_rif common
;
146 struct mlxsw_sp_rif_ipip_lb
{
147 struct mlxsw_sp_rif common
;
148 struct mlxsw_sp_rif_ipip_lb_config lb_config
;
149 u16 ul_vr_id
; /* Reserved for Spectrum-2. */
152 struct mlxsw_sp_rif_params_ipip_lb
{
153 struct mlxsw_sp_rif_params common
;
154 struct mlxsw_sp_rif_ipip_lb_config lb_config
;
157 struct mlxsw_sp_rif_ops
{
158 enum mlxsw_sp_rif_type type
;
161 void (*setup
)(struct mlxsw_sp_rif
*rif
,
162 const struct mlxsw_sp_rif_params
*params
);
163 int (*configure
)(struct mlxsw_sp_rif
*rif
);
164 void (*deconfigure
)(struct mlxsw_sp_rif
*rif
);
165 struct mlxsw_sp_fid
* (*fid_get
)(struct mlxsw_sp_rif
*rif
);
168 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree
*lpm_tree
);
169 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp
*mlxsw_sp
,
170 struct mlxsw_sp_lpm_tree
*lpm_tree
);
171 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp
*mlxsw_sp
,
172 const struct mlxsw_sp_fib
*fib
,
174 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp
*mlxsw_sp
,
175 const struct mlxsw_sp_fib
*fib
);
177 static unsigned int *
178 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif
*rif
,
179 enum mlxsw_sp_rif_counter_dir dir
)
182 case MLXSW_SP_RIF_COUNTER_EGRESS
:
183 return &rif
->counter_egress
;
184 case MLXSW_SP_RIF_COUNTER_INGRESS
:
185 return &rif
->counter_ingress
;
191 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif
*rif
,
192 enum mlxsw_sp_rif_counter_dir dir
)
195 case MLXSW_SP_RIF_COUNTER_EGRESS
:
196 return rif
->counter_egress_valid
;
197 case MLXSW_SP_RIF_COUNTER_INGRESS
:
198 return rif
->counter_ingress_valid
;
204 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif
*rif
,
205 enum mlxsw_sp_rif_counter_dir dir
,
209 case MLXSW_SP_RIF_COUNTER_EGRESS
:
210 rif
->counter_egress_valid
= valid
;
212 case MLXSW_SP_RIF_COUNTER_INGRESS
:
213 rif
->counter_ingress_valid
= valid
;
218 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp
*mlxsw_sp
, u16 rif_index
,
219 unsigned int counter_index
, bool enable
,
220 enum mlxsw_sp_rif_counter_dir dir
)
222 char ritr_pl
[MLXSW_REG_RITR_LEN
];
223 bool is_egress
= false;
226 if (dir
== MLXSW_SP_RIF_COUNTER_EGRESS
)
228 mlxsw_reg_ritr_rif_pack(ritr_pl
, rif_index
);
229 err
= mlxsw_reg_query(mlxsw_sp
->core
, MLXSW_REG(ritr
), ritr_pl
);
233 mlxsw_reg_ritr_counter_pack(ritr_pl
, counter_index
, enable
,
235 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(ritr
), ritr_pl
);
238 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp
*mlxsw_sp
,
239 struct mlxsw_sp_rif
*rif
,
240 enum mlxsw_sp_rif_counter_dir dir
, u64
*cnt
)
242 char ricnt_pl
[MLXSW_REG_RICNT_LEN
];
243 unsigned int *p_counter_index
;
247 valid
= mlxsw_sp_rif_counter_valid_get(rif
, dir
);
251 p_counter_index
= mlxsw_sp_rif_p_counter_get(rif
, dir
);
252 if (!p_counter_index
)
254 mlxsw_reg_ricnt_pack(ricnt_pl
, *p_counter_index
,
255 MLXSW_REG_RICNT_OPCODE_NOP
);
256 err
= mlxsw_reg_query(mlxsw_sp
->core
, MLXSW_REG(ricnt
), ricnt_pl
);
259 *cnt
= mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl
);
263 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp
*mlxsw_sp
,
264 unsigned int counter_index
)
266 char ricnt_pl
[MLXSW_REG_RICNT_LEN
];
268 mlxsw_reg_ricnt_pack(ricnt_pl
, counter_index
,
269 MLXSW_REG_RICNT_OPCODE_CLEAR
);
270 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(ricnt
), ricnt_pl
);
273 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp
*mlxsw_sp
,
274 struct mlxsw_sp_rif
*rif
,
275 enum mlxsw_sp_rif_counter_dir dir
)
277 unsigned int *p_counter_index
;
280 p_counter_index
= mlxsw_sp_rif_p_counter_get(rif
, dir
);
281 if (!p_counter_index
)
283 err
= mlxsw_sp_counter_alloc(mlxsw_sp
, MLXSW_SP_COUNTER_SUB_POOL_RIF
,
288 err
= mlxsw_sp_rif_counter_clear(mlxsw_sp
, *p_counter_index
);
290 goto err_counter_clear
;
292 err
= mlxsw_sp_rif_counter_edit(mlxsw_sp
, rif
->rif_index
,
293 *p_counter_index
, true, dir
);
295 goto err_counter_edit
;
296 mlxsw_sp_rif_counter_valid_set(rif
, dir
, true);
301 mlxsw_sp_counter_free(mlxsw_sp
, MLXSW_SP_COUNTER_SUB_POOL_RIF
,
306 void mlxsw_sp_rif_counter_free(struct mlxsw_sp
*mlxsw_sp
,
307 struct mlxsw_sp_rif
*rif
,
308 enum mlxsw_sp_rif_counter_dir dir
)
310 unsigned int *p_counter_index
;
312 if (!mlxsw_sp_rif_counter_valid_get(rif
, dir
))
315 p_counter_index
= mlxsw_sp_rif_p_counter_get(rif
, dir
);
316 if (WARN_ON(!p_counter_index
))
318 mlxsw_sp_rif_counter_edit(mlxsw_sp
, rif
->rif_index
,
319 *p_counter_index
, false, dir
);
320 mlxsw_sp_counter_free(mlxsw_sp
, MLXSW_SP_COUNTER_SUB_POOL_RIF
,
322 mlxsw_sp_rif_counter_valid_set(rif
, dir
, false);
325 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif
*rif
)
327 struct mlxsw_sp
*mlxsw_sp
= rif
->mlxsw_sp
;
328 struct devlink
*devlink
;
330 devlink
= priv_to_devlink(mlxsw_sp
->core
);
331 if (!devlink_dpipe_table_counter_enabled(devlink
,
332 MLXSW_SP_DPIPE_TABLE_NAME_ERIF
))
334 mlxsw_sp_rif_counter_alloc(mlxsw_sp
, rif
, MLXSW_SP_RIF_COUNTER_EGRESS
);
337 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif
*rif
)
339 struct mlxsw_sp
*mlxsw_sp
= rif
->mlxsw_sp
;
341 mlxsw_sp_rif_counter_free(mlxsw_sp
, rif
, MLXSW_SP_RIF_COUNTER_EGRESS
);
344 static struct mlxsw_sp_rif
*
345 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp
*mlxsw_sp
,
346 const struct net_device
*dev
);
348 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
350 struct mlxsw_sp_prefix_usage
{
351 DECLARE_BITMAP(b
, MLXSW_SP_PREFIX_COUNT
);
354 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
355 for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
358 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage
*prefix_usage1
,
359 struct mlxsw_sp_prefix_usage
*prefix_usage2
)
361 return !memcmp(prefix_usage1
, prefix_usage2
, sizeof(*prefix_usage1
));
365 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage
*prefix_usage1
,
366 struct mlxsw_sp_prefix_usage
*prefix_usage2
)
368 memcpy(prefix_usage1
, prefix_usage2
, sizeof(*prefix_usage1
));
372 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage
*prefix_usage
,
373 unsigned char prefix_len
)
375 set_bit(prefix_len
, prefix_usage
->b
);
379 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage
*prefix_usage
,
380 unsigned char prefix_len
)
382 clear_bit(prefix_len
, prefix_usage
->b
);
385 struct mlxsw_sp_fib_key
{
386 unsigned char addr
[sizeof(struct in6_addr
)];
387 unsigned char prefix_len
;
390 enum mlxsw_sp_fib_entry_type
{
391 MLXSW_SP_FIB_ENTRY_TYPE_REMOTE
,
392 MLXSW_SP_FIB_ENTRY_TYPE_LOCAL
,
393 MLXSW_SP_FIB_ENTRY_TYPE_TRAP
,
395 /* This is a special case of local delivery, where a packet should be
396 * decapsulated on reception. Note that there is no corresponding ENCAP,
397 * because that's a type of next hop, not of FIB entry. (There can be
398 * several next hops in a REMOTE entry, and some of them may be
399 * encapsulating entries.)
401 MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP
,
404 struct mlxsw_sp_nexthop_group
;
406 struct mlxsw_sp_fib_node
{
407 struct list_head entry_list
;
408 struct list_head list
;
409 struct rhash_head ht_node
;
410 struct mlxsw_sp_fib
*fib
;
411 struct mlxsw_sp_fib_key key
;
414 struct mlxsw_sp_fib_entry_decap
{
415 struct mlxsw_sp_ipip_entry
*ipip_entry
;
419 struct mlxsw_sp_fib_entry
{
420 struct list_head list
;
421 struct mlxsw_sp_fib_node
*fib_node
;
422 enum mlxsw_sp_fib_entry_type type
;
423 struct list_head nexthop_group_node
;
424 struct mlxsw_sp_nexthop_group
*nh_group
;
425 struct mlxsw_sp_fib_entry_decap decap
; /* Valid for decap entries. */
428 struct mlxsw_sp_fib4_entry
{
429 struct mlxsw_sp_fib_entry common
;
436 struct mlxsw_sp_fib6_entry
{
437 struct mlxsw_sp_fib_entry common
;
438 struct list_head rt6_list
;
442 struct mlxsw_sp_rt6
{
443 struct list_head list
;
447 struct mlxsw_sp_lpm_tree
{
449 unsigned int ref_count
;
450 enum mlxsw_sp_l3proto proto
;
451 unsigned long prefix_ref_count
[MLXSW_SP_PREFIX_COUNT
];
452 struct mlxsw_sp_prefix_usage prefix_usage
;
455 struct mlxsw_sp_fib
{
456 struct rhashtable ht
;
457 struct list_head node_list
;
458 struct mlxsw_sp_vr
*vr
;
459 struct mlxsw_sp_lpm_tree
*lpm_tree
;
460 enum mlxsw_sp_l3proto proto
;
464 u16 id
; /* virtual router ID */
465 u32 tb_id
; /* kernel fib table id */
466 unsigned int rif_count
;
467 struct mlxsw_sp_fib
*fib4
;
468 struct mlxsw_sp_fib
*fib6
;
469 struct mlxsw_sp_mr_table
*mr4_table
;
472 static const struct rhashtable_params mlxsw_sp_fib_ht_params
;
474 static struct mlxsw_sp_fib
*mlxsw_sp_fib_create(struct mlxsw_sp
*mlxsw_sp
,
475 struct mlxsw_sp_vr
*vr
,
476 enum mlxsw_sp_l3proto proto
)
478 struct mlxsw_sp_lpm_tree
*lpm_tree
;
479 struct mlxsw_sp_fib
*fib
;
482 lpm_tree
= mlxsw_sp
->router
->lpm
.proto_trees
[proto
];
483 fib
= kzalloc(sizeof(*fib
), GFP_KERNEL
);
485 return ERR_PTR(-ENOMEM
);
486 err
= rhashtable_init(&fib
->ht
, &mlxsw_sp_fib_ht_params
);
488 goto err_rhashtable_init
;
489 INIT_LIST_HEAD(&fib
->node_list
);
492 fib
->lpm_tree
= lpm_tree
;
493 mlxsw_sp_lpm_tree_hold(lpm_tree
);
494 err
= mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp
, fib
, lpm_tree
->id
);
496 goto err_lpm_tree_bind
;
500 mlxsw_sp_lpm_tree_put(mlxsw_sp
, lpm_tree
);
506 static void mlxsw_sp_fib_destroy(struct mlxsw_sp
*mlxsw_sp
,
507 struct mlxsw_sp_fib
*fib
)
509 mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp
, fib
);
510 mlxsw_sp_lpm_tree_put(mlxsw_sp
, fib
->lpm_tree
);
511 WARN_ON(!list_empty(&fib
->node_list
));
512 rhashtable_destroy(&fib
->ht
);
516 static struct mlxsw_sp_lpm_tree
*
517 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp
*mlxsw_sp
)
519 static struct mlxsw_sp_lpm_tree
*lpm_tree
;
522 for (i
= 0; i
< mlxsw_sp
->router
->lpm
.tree_count
; i
++) {
523 lpm_tree
= &mlxsw_sp
->router
->lpm
.trees
[i
];
524 if (lpm_tree
->ref_count
== 0)
530 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp
*mlxsw_sp
,
531 struct mlxsw_sp_lpm_tree
*lpm_tree
)
533 char ralta_pl
[MLXSW_REG_RALTA_LEN
];
535 mlxsw_reg_ralta_pack(ralta_pl
, true,
536 (enum mlxsw_reg_ralxx_protocol
) lpm_tree
->proto
,
538 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(ralta
), ralta_pl
);
541 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp
*mlxsw_sp
,
542 struct mlxsw_sp_lpm_tree
*lpm_tree
)
544 char ralta_pl
[MLXSW_REG_RALTA_LEN
];
546 mlxsw_reg_ralta_pack(ralta_pl
, false,
547 (enum mlxsw_reg_ralxx_protocol
) lpm_tree
->proto
,
549 mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(ralta
), ralta_pl
);
553 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp
*mlxsw_sp
,
554 struct mlxsw_sp_prefix_usage
*prefix_usage
,
555 struct mlxsw_sp_lpm_tree
*lpm_tree
)
557 char ralst_pl
[MLXSW_REG_RALST_LEN
];
560 u8 last_prefix
= MLXSW_REG_RALST_BIN_NO_CHILD
;
562 mlxsw_sp_prefix_usage_for_each(prefix
, prefix_usage
)
565 mlxsw_reg_ralst_pack(ralst_pl
, root_bin
, lpm_tree
->id
);
566 mlxsw_sp_prefix_usage_for_each(prefix
, prefix_usage
) {
569 mlxsw_reg_ralst_bin_pack(ralst_pl
, prefix
, last_prefix
,
570 MLXSW_REG_RALST_BIN_NO_CHILD
);
571 last_prefix
= prefix
;
573 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(ralst
), ralst_pl
);
576 static struct mlxsw_sp_lpm_tree
*
577 mlxsw_sp_lpm_tree_create(struct mlxsw_sp
*mlxsw_sp
,
578 struct mlxsw_sp_prefix_usage
*prefix_usage
,
579 enum mlxsw_sp_l3proto proto
)
581 struct mlxsw_sp_lpm_tree
*lpm_tree
;
584 lpm_tree
= mlxsw_sp_lpm_tree_find_unused(mlxsw_sp
);
586 return ERR_PTR(-EBUSY
);
587 lpm_tree
->proto
= proto
;
588 err
= mlxsw_sp_lpm_tree_alloc(mlxsw_sp
, lpm_tree
);
592 err
= mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp
, prefix_usage
,
595 goto err_left_struct_set
;
596 memcpy(&lpm_tree
->prefix_usage
, prefix_usage
,
597 sizeof(lpm_tree
->prefix_usage
));
598 memset(&lpm_tree
->prefix_ref_count
, 0,
599 sizeof(lpm_tree
->prefix_ref_count
));
600 lpm_tree
->ref_count
= 1;
604 mlxsw_sp_lpm_tree_free(mlxsw_sp
, lpm_tree
);
608 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp
*mlxsw_sp
,
609 struct mlxsw_sp_lpm_tree
*lpm_tree
)
611 mlxsw_sp_lpm_tree_free(mlxsw_sp
, lpm_tree
);
614 static struct mlxsw_sp_lpm_tree
*
615 mlxsw_sp_lpm_tree_get(struct mlxsw_sp
*mlxsw_sp
,
616 struct mlxsw_sp_prefix_usage
*prefix_usage
,
617 enum mlxsw_sp_l3proto proto
)
619 struct mlxsw_sp_lpm_tree
*lpm_tree
;
622 for (i
= 0; i
< mlxsw_sp
->router
->lpm
.tree_count
; i
++) {
623 lpm_tree
= &mlxsw_sp
->router
->lpm
.trees
[i
];
624 if (lpm_tree
->ref_count
!= 0 &&
625 lpm_tree
->proto
== proto
&&
626 mlxsw_sp_prefix_usage_eq(&lpm_tree
->prefix_usage
,
628 mlxsw_sp_lpm_tree_hold(lpm_tree
);
632 return mlxsw_sp_lpm_tree_create(mlxsw_sp
, prefix_usage
, proto
);
635 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree
*lpm_tree
)
637 lpm_tree
->ref_count
++;
640 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp
*mlxsw_sp
,
641 struct mlxsw_sp_lpm_tree
*lpm_tree
)
643 if (--lpm_tree
->ref_count
== 0)
644 mlxsw_sp_lpm_tree_destroy(mlxsw_sp
, lpm_tree
);
647 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
649 static int mlxsw_sp_lpm_init(struct mlxsw_sp
*mlxsw_sp
)
651 struct mlxsw_sp_prefix_usage req_prefix_usage
= {{ 0 } };
652 struct mlxsw_sp_lpm_tree
*lpm_tree
;
656 if (!MLXSW_CORE_RES_VALID(mlxsw_sp
->core
, MAX_LPM_TREES
))
659 max_trees
= MLXSW_CORE_RES_GET(mlxsw_sp
->core
, MAX_LPM_TREES
);
660 mlxsw_sp
->router
->lpm
.tree_count
= max_trees
- MLXSW_SP_LPM_TREE_MIN
;
661 mlxsw_sp
->router
->lpm
.trees
= kcalloc(mlxsw_sp
->router
->lpm
.tree_count
,
662 sizeof(struct mlxsw_sp_lpm_tree
),
664 if (!mlxsw_sp
->router
->lpm
.trees
)
667 for (i
= 0; i
< mlxsw_sp
->router
->lpm
.tree_count
; i
++) {
668 lpm_tree
= &mlxsw_sp
->router
->lpm
.trees
[i
];
669 lpm_tree
->id
= i
+ MLXSW_SP_LPM_TREE_MIN
;
672 lpm_tree
= mlxsw_sp_lpm_tree_get(mlxsw_sp
, &req_prefix_usage
,
673 MLXSW_SP_L3_PROTO_IPV4
);
674 if (IS_ERR(lpm_tree
)) {
675 err
= PTR_ERR(lpm_tree
);
676 goto err_ipv4_tree_get
;
678 mlxsw_sp
->router
->lpm
.proto_trees
[MLXSW_SP_L3_PROTO_IPV4
] = lpm_tree
;
680 lpm_tree
= mlxsw_sp_lpm_tree_get(mlxsw_sp
, &req_prefix_usage
,
681 MLXSW_SP_L3_PROTO_IPV6
);
682 if (IS_ERR(lpm_tree
)) {
683 err
= PTR_ERR(lpm_tree
);
684 goto err_ipv6_tree_get
;
686 mlxsw_sp
->router
->lpm
.proto_trees
[MLXSW_SP_L3_PROTO_IPV6
] = lpm_tree
;
691 lpm_tree
= mlxsw_sp
->router
->lpm
.proto_trees
[MLXSW_SP_L3_PROTO_IPV4
];
692 mlxsw_sp_lpm_tree_put(mlxsw_sp
, lpm_tree
);
694 kfree(mlxsw_sp
->router
->lpm
.trees
);
698 static void mlxsw_sp_lpm_fini(struct mlxsw_sp
*mlxsw_sp
)
700 struct mlxsw_sp_lpm_tree
*lpm_tree
;
702 lpm_tree
= mlxsw_sp
->router
->lpm
.proto_trees
[MLXSW_SP_L3_PROTO_IPV6
];
703 mlxsw_sp_lpm_tree_put(mlxsw_sp
, lpm_tree
);
705 lpm_tree
= mlxsw_sp
->router
->lpm
.proto_trees
[MLXSW_SP_L3_PROTO_IPV4
];
706 mlxsw_sp_lpm_tree_put(mlxsw_sp
, lpm_tree
);
708 kfree(mlxsw_sp
->router
->lpm
.trees
);
711 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr
*vr
)
713 return !!vr
->fib4
|| !!vr
->fib6
|| !!vr
->mr4_table
;
716 static struct mlxsw_sp_vr
*mlxsw_sp_vr_find_unused(struct mlxsw_sp
*mlxsw_sp
)
718 struct mlxsw_sp_vr
*vr
;
721 for (i
= 0; i
< MLXSW_CORE_RES_GET(mlxsw_sp
->core
, MAX_VRS
); i
++) {
722 vr
= &mlxsw_sp
->router
->vrs
[i
];
723 if (!mlxsw_sp_vr_is_used(vr
))
729 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp
*mlxsw_sp
,
730 const struct mlxsw_sp_fib
*fib
, u8 tree_id
)
732 char raltb_pl
[MLXSW_REG_RALTB_LEN
];
734 mlxsw_reg_raltb_pack(raltb_pl
, fib
->vr
->id
,
735 (enum mlxsw_reg_ralxx_protocol
) fib
->proto
,
737 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(raltb
), raltb_pl
);
740 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp
*mlxsw_sp
,
741 const struct mlxsw_sp_fib
*fib
)
743 char raltb_pl
[MLXSW_REG_RALTB_LEN
];
745 /* Bind to tree 0 which is default */
746 mlxsw_reg_raltb_pack(raltb_pl
, fib
->vr
->id
,
747 (enum mlxsw_reg_ralxx_protocol
) fib
->proto
, 0);
748 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(raltb
), raltb_pl
);
751 static u32
mlxsw_sp_fix_tb_id(u32 tb_id
)
753 /* For our purpose, squash main, default and local tables into one */
754 if (tb_id
== RT_TABLE_LOCAL
|| tb_id
== RT_TABLE_DEFAULT
)
755 tb_id
= RT_TABLE_MAIN
;
759 static struct mlxsw_sp_vr
*mlxsw_sp_vr_find(struct mlxsw_sp
*mlxsw_sp
,
762 struct mlxsw_sp_vr
*vr
;
765 tb_id
= mlxsw_sp_fix_tb_id(tb_id
);
767 for (i
= 0; i
< MLXSW_CORE_RES_GET(mlxsw_sp
->core
, MAX_VRS
); i
++) {
768 vr
= &mlxsw_sp
->router
->vrs
[i
];
769 if (mlxsw_sp_vr_is_used(vr
) && vr
->tb_id
== tb_id
)
775 static struct mlxsw_sp_fib
*mlxsw_sp_vr_fib(const struct mlxsw_sp_vr
*vr
,
776 enum mlxsw_sp_l3proto proto
)
779 case MLXSW_SP_L3_PROTO_IPV4
:
781 case MLXSW_SP_L3_PROTO_IPV6
:
787 static struct mlxsw_sp_vr
*mlxsw_sp_vr_create(struct mlxsw_sp
*mlxsw_sp
,
789 struct netlink_ext_ack
*extack
)
791 struct mlxsw_sp_mr_table
*mr4_table
;
792 struct mlxsw_sp_fib
*fib4
;
793 struct mlxsw_sp_fib
*fib6
;
794 struct mlxsw_sp_vr
*vr
;
797 vr
= mlxsw_sp_vr_find_unused(mlxsw_sp
);
799 NL_SET_ERR_MSG(extack
, "spectrum: Exceeded number of supported virtual routers");
800 return ERR_PTR(-EBUSY
);
802 fib4
= mlxsw_sp_fib_create(mlxsw_sp
, vr
, MLXSW_SP_L3_PROTO_IPV4
);
804 return ERR_CAST(fib4
);
805 fib6
= mlxsw_sp_fib_create(mlxsw_sp
, vr
, MLXSW_SP_L3_PROTO_IPV6
);
808 goto err_fib6_create
;
810 mr4_table
= mlxsw_sp_mr_table_create(mlxsw_sp
, vr
->id
,
811 MLXSW_SP_L3_PROTO_IPV4
);
812 if (IS_ERR(mr4_table
)) {
813 err
= PTR_ERR(mr4_table
);
814 goto err_mr_table_create
;
818 vr
->mr4_table
= mr4_table
;
823 mlxsw_sp_fib_destroy(mlxsw_sp
, fib6
);
825 mlxsw_sp_fib_destroy(mlxsw_sp
, fib4
);
829 static void mlxsw_sp_vr_destroy(struct mlxsw_sp
*mlxsw_sp
,
830 struct mlxsw_sp_vr
*vr
)
832 mlxsw_sp_mr_table_destroy(vr
->mr4_table
);
833 vr
->mr4_table
= NULL
;
834 mlxsw_sp_fib_destroy(mlxsw_sp
, vr
->fib6
);
836 mlxsw_sp_fib_destroy(mlxsw_sp
, vr
->fib4
);
840 static struct mlxsw_sp_vr
*mlxsw_sp_vr_get(struct mlxsw_sp
*mlxsw_sp
, u32 tb_id
,
841 struct netlink_ext_ack
*extack
)
843 struct mlxsw_sp_vr
*vr
;
845 tb_id
= mlxsw_sp_fix_tb_id(tb_id
);
846 vr
= mlxsw_sp_vr_find(mlxsw_sp
, tb_id
);
848 vr
= mlxsw_sp_vr_create(mlxsw_sp
, tb_id
, extack
);
852 static void mlxsw_sp_vr_put(struct mlxsw_sp
*mlxsw_sp
, struct mlxsw_sp_vr
*vr
)
854 if (!vr
->rif_count
&& list_empty(&vr
->fib4
->node_list
) &&
855 list_empty(&vr
->fib6
->node_list
) &&
856 mlxsw_sp_mr_table_empty(vr
->mr4_table
))
857 mlxsw_sp_vr_destroy(mlxsw_sp
, vr
);
861 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr
*vr
,
862 enum mlxsw_sp_l3proto proto
, u8 tree_id
)
864 struct mlxsw_sp_fib
*fib
= mlxsw_sp_vr_fib(vr
, proto
);
866 if (!mlxsw_sp_vr_is_used(vr
))
868 if (fib
->lpm_tree
->id
== tree_id
)
873 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp
*mlxsw_sp
,
874 struct mlxsw_sp_fib
*fib
,
875 struct mlxsw_sp_lpm_tree
*new_tree
)
877 struct mlxsw_sp_lpm_tree
*old_tree
= fib
->lpm_tree
;
880 fib
->lpm_tree
= new_tree
;
881 mlxsw_sp_lpm_tree_hold(new_tree
);
882 err
= mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp
, fib
, new_tree
->id
);
885 mlxsw_sp_lpm_tree_put(mlxsw_sp
, old_tree
);
889 mlxsw_sp_lpm_tree_put(mlxsw_sp
, new_tree
);
890 fib
->lpm_tree
= old_tree
;
894 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp
*mlxsw_sp
,
895 struct mlxsw_sp_fib
*fib
,
896 struct mlxsw_sp_lpm_tree
*new_tree
)
898 enum mlxsw_sp_l3proto proto
= fib
->proto
;
899 struct mlxsw_sp_lpm_tree
*old_tree
;
900 u8 old_id
, new_id
= new_tree
->id
;
901 struct mlxsw_sp_vr
*vr
;
904 old_tree
= mlxsw_sp
->router
->lpm
.proto_trees
[proto
];
905 old_id
= old_tree
->id
;
907 for (i
= 0; i
< MLXSW_CORE_RES_GET(mlxsw_sp
->core
, MAX_VRS
); i
++) {
908 vr
= &mlxsw_sp
->router
->vrs
[i
];
909 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr
, proto
, old_id
))
911 err
= mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp
,
912 mlxsw_sp_vr_fib(vr
, proto
),
915 goto err_tree_replace
;
918 memcpy(new_tree
->prefix_ref_count
, old_tree
->prefix_ref_count
,
919 sizeof(new_tree
->prefix_ref_count
));
920 mlxsw_sp
->router
->lpm
.proto_trees
[proto
] = new_tree
;
921 mlxsw_sp_lpm_tree_put(mlxsw_sp
, old_tree
);
926 for (i
--; i
>= 0; i
--) {
927 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr
, proto
, new_id
))
929 mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp
,
930 mlxsw_sp_vr_fib(vr
, proto
),
936 static int mlxsw_sp_vrs_init(struct mlxsw_sp
*mlxsw_sp
)
938 struct mlxsw_sp_vr
*vr
;
942 if (!MLXSW_CORE_RES_VALID(mlxsw_sp
->core
, MAX_VRS
))
945 max_vrs
= MLXSW_CORE_RES_GET(mlxsw_sp
->core
, MAX_VRS
);
946 mlxsw_sp
->router
->vrs
= kcalloc(max_vrs
, sizeof(struct mlxsw_sp_vr
),
948 if (!mlxsw_sp
->router
->vrs
)
951 for (i
= 0; i
< max_vrs
; i
++) {
952 vr
= &mlxsw_sp
->router
->vrs
[i
];
959 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp
*mlxsw_sp
);
961 static void mlxsw_sp_vrs_fini(struct mlxsw_sp
*mlxsw_sp
)
963 /* At this stage we're guaranteed not to have new incoming
964 * FIB notifications and the work queue is free from FIBs
965 * sitting on top of mlxsw netdevs. However, we can still
966 * have other FIBs queued. Flush the queue before flushing
967 * the device's tables. No need for locks, as we're the only
970 mlxsw_core_flush_owq();
971 mlxsw_sp_router_fib_flush(mlxsw_sp
);
972 kfree(mlxsw_sp
->router
->vrs
);
975 static struct net_device
*
976 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device
*ol_dev
)
978 struct ip_tunnel
*tun
= netdev_priv(ol_dev
);
979 struct net
*net
= dev_net(ol_dev
);
981 return __dev_get_by_index(net
, tun
->parms
.link
);
984 u32
mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device
*ol_dev
)
986 struct net_device
*d
= __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev
);
989 return l3mdev_fib_table(d
) ? : RT_TABLE_MAIN
;
991 return l3mdev_fib_table(ol_dev
) ? : RT_TABLE_MAIN
;
994 static struct mlxsw_sp_rif
*
995 mlxsw_sp_rif_create(struct mlxsw_sp
*mlxsw_sp
,
996 const struct mlxsw_sp_rif_params
*params
,
997 struct netlink_ext_ack
*extack
);
999 static struct mlxsw_sp_rif_ipip_lb
*
1000 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp
*mlxsw_sp
,
1001 enum mlxsw_sp_ipip_type ipipt
,
1002 struct net_device
*ol_dev
,
1003 struct netlink_ext_ack
*extack
)
1005 struct mlxsw_sp_rif_params_ipip_lb lb_params
;
1006 const struct mlxsw_sp_ipip_ops
*ipip_ops
;
1007 struct mlxsw_sp_rif
*rif
;
1009 ipip_ops
= mlxsw_sp
->router
->ipip_ops_arr
[ipipt
];
1010 lb_params
= (struct mlxsw_sp_rif_params_ipip_lb
) {
1011 .common
.dev
= ol_dev
,
1012 .common
.lag
= false,
1013 .lb_config
= ipip_ops
->ol_loopback_config(mlxsw_sp
, ol_dev
),
1016 rif
= mlxsw_sp_rif_create(mlxsw_sp
, &lb_params
.common
, extack
);
1018 return ERR_CAST(rif
);
1019 return container_of(rif
, struct mlxsw_sp_rif_ipip_lb
, common
);
1022 static struct mlxsw_sp_ipip_entry
*
1023 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp
*mlxsw_sp
,
1024 enum mlxsw_sp_ipip_type ipipt
,
1025 struct net_device
*ol_dev
)
1027 struct mlxsw_sp_ipip_entry
*ipip_entry
;
1028 struct mlxsw_sp_ipip_entry
*ret
= NULL
;
1030 ipip_entry
= kzalloc(sizeof(*ipip_entry
), GFP_KERNEL
);
1032 return ERR_PTR(-ENOMEM
);
1034 ipip_entry
->ol_lb
= mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp
, ipipt
,
1036 if (IS_ERR(ipip_entry
->ol_lb
)) {
1037 ret
= ERR_CAST(ipip_entry
->ol_lb
);
1038 goto err_ol_ipip_lb_create
;
1041 ipip_entry
->ipipt
= ipipt
;
1042 ipip_entry
->ol_dev
= ol_dev
;
1043 ipip_entry
->parms
= mlxsw_sp_ipip_netdev_parms(ol_dev
);
1047 err_ol_ipip_lb_create
:
1053 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry
*ipip_entry
)
1055 mlxsw_sp_rif_destroy(&ipip_entry
->ol_lb
->common
);
1060 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp
*mlxsw_sp
,
1061 const enum mlxsw_sp_l3proto ul_proto
,
1062 union mlxsw_sp_l3addr saddr
,
1064 struct mlxsw_sp_ipip_entry
*ipip_entry
)
1066 u32 tun_ul_tb_id
= mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry
->ol_dev
);
1067 enum mlxsw_sp_ipip_type ipipt
= ipip_entry
->ipipt
;
1068 union mlxsw_sp_l3addr tun_saddr
;
1070 if (mlxsw_sp
->router
->ipip_ops_arr
[ipipt
]->ul_proto
!= ul_proto
)
1073 tun_saddr
= mlxsw_sp_ipip_netdev_saddr(ul_proto
, ipip_entry
->ol_dev
);
1074 return tun_ul_tb_id
== ul_tb_id
&&
1075 mlxsw_sp_l3addr_eq(&tun_saddr
, &saddr
);
1079 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp
*mlxsw_sp
,
1080 struct mlxsw_sp_fib_entry
*fib_entry
,
1081 struct mlxsw_sp_ipip_entry
*ipip_entry
)
1086 err
= mlxsw_sp_kvdl_alloc(mlxsw_sp
, 1, &tunnel_index
);
1090 ipip_entry
->decap_fib_entry
= fib_entry
;
1091 fib_entry
->decap
.ipip_entry
= ipip_entry
;
1092 fib_entry
->decap
.tunnel_index
= tunnel_index
;
1096 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp
*mlxsw_sp
,
1097 struct mlxsw_sp_fib_entry
*fib_entry
)
1099 /* Unlink this node from the IPIP entry that it's the decap entry of. */
1100 fib_entry
->decap
.ipip_entry
->decap_fib_entry
= NULL
;
1101 fib_entry
->decap
.ipip_entry
= NULL
;
1102 mlxsw_sp_kvdl_free(mlxsw_sp
, fib_entry
->decap
.tunnel_index
);
1105 static struct mlxsw_sp_fib_node
*
1106 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib
*fib
, const void *addr
,
1107 size_t addr_len
, unsigned char prefix_len
);
1108 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp
*mlxsw_sp
,
1109 struct mlxsw_sp_fib_entry
*fib_entry
);
1112 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp
*mlxsw_sp
,
1113 struct mlxsw_sp_ipip_entry
*ipip_entry
)
1115 struct mlxsw_sp_fib_entry
*fib_entry
= ipip_entry
->decap_fib_entry
;
1117 mlxsw_sp_fib_entry_decap_fini(mlxsw_sp
, fib_entry
);
1118 fib_entry
->type
= MLXSW_SP_FIB_ENTRY_TYPE_TRAP
;
1120 mlxsw_sp_fib_entry_update(mlxsw_sp
, fib_entry
);
1124 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp
*mlxsw_sp
,
1125 struct mlxsw_sp_ipip_entry
*ipip_entry
,
1126 struct mlxsw_sp_fib_entry
*decap_fib_entry
)
1128 if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp
, decap_fib_entry
,
1131 decap_fib_entry
->type
= MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP
;
1133 if (mlxsw_sp_fib_entry_update(mlxsw_sp
, decap_fib_entry
))
1134 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp
, ipip_entry
);
1137 /* Given an IPIP entry, find the corresponding decap route. */
1138 static struct mlxsw_sp_fib_entry
*
1139 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp
*mlxsw_sp
,
1140 struct mlxsw_sp_ipip_entry
*ipip_entry
)
1142 static struct mlxsw_sp_fib_node
*fib_node
;
1143 const struct mlxsw_sp_ipip_ops
*ipip_ops
;
1144 struct mlxsw_sp_fib_entry
*fib_entry
;
1145 unsigned char saddr_prefix_len
;
1146 union mlxsw_sp_l3addr saddr
;
1147 struct mlxsw_sp_fib
*ul_fib
;
1148 struct mlxsw_sp_vr
*ul_vr
;
1154 ipip_ops
= mlxsw_sp
->router
->ipip_ops_arr
[ipip_entry
->ipipt
];
1156 ul_tb_id
= mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry
->ol_dev
);
1157 ul_vr
= mlxsw_sp_vr_find(mlxsw_sp
, ul_tb_id
);
1161 ul_fib
= mlxsw_sp_vr_fib(ul_vr
, ipip_ops
->ul_proto
);
1162 saddr
= mlxsw_sp_ipip_netdev_saddr(ipip_ops
->ul_proto
,
1163 ipip_entry
->ol_dev
);
1165 switch (ipip_ops
->ul_proto
) {
1166 case MLXSW_SP_L3_PROTO_IPV4
:
1167 saddr4
= be32_to_cpu(saddr
.addr4
);
1170 saddr_prefix_len
= 32;
1172 case MLXSW_SP_L3_PROTO_IPV6
:
1177 fib_node
= mlxsw_sp_fib_node_lookup(ul_fib
, saddrp
, saddr_len
,
1179 if (!fib_node
|| list_empty(&fib_node
->entry_list
))
1182 fib_entry
= list_first_entry(&fib_node
->entry_list
,
1183 struct mlxsw_sp_fib_entry
, list
);
1184 if (fib_entry
->type
!= MLXSW_SP_FIB_ENTRY_TYPE_TRAP
)
1190 static struct mlxsw_sp_ipip_entry
*
1191 mlxsw_sp_ipip_entry_create(struct mlxsw_sp
*mlxsw_sp
,
1192 enum mlxsw_sp_ipip_type ipipt
,
1193 struct net_device
*ol_dev
)
1195 struct mlxsw_sp_ipip_entry
*ipip_entry
;
1197 ipip_entry
= mlxsw_sp_ipip_entry_alloc(mlxsw_sp
, ipipt
, ol_dev
);
1198 if (IS_ERR(ipip_entry
))
1201 list_add_tail(&ipip_entry
->ipip_list_node
,
1202 &mlxsw_sp
->router
->ipip_list
);
1208 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp
*mlxsw_sp
,
1209 struct mlxsw_sp_ipip_entry
*ipip_entry
)
1211 list_del(&ipip_entry
->ipip_list_node
);
1212 mlxsw_sp_ipip_entry_dealloc(ipip_entry
);
1216 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp
*mlxsw_sp
,
1217 const struct net_device
*ul_dev
,
1218 enum mlxsw_sp_l3proto ul_proto
,
1219 union mlxsw_sp_l3addr ul_dip
,
1220 struct mlxsw_sp_ipip_entry
*ipip_entry
)
1222 u32 ul_tb_id
= l3mdev_fib_table(ul_dev
) ? : RT_TABLE_MAIN
;
1223 enum mlxsw_sp_ipip_type ipipt
= ipip_entry
->ipipt
;
1224 struct net_device
*ipip_ul_dev
;
1226 if (mlxsw_sp
->router
->ipip_ops_arr
[ipipt
]->ul_proto
!= ul_proto
)
1229 ipip_ul_dev
= __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry
->ol_dev
);
1230 return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp
, ul_proto
, ul_dip
,
1231 ul_tb_id
, ipip_entry
) &&
1232 (!ipip_ul_dev
|| ipip_ul_dev
== ul_dev
);
1235 /* Given decap parameters, find the corresponding IPIP entry. */
1236 static struct mlxsw_sp_ipip_entry
*
1237 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp
*mlxsw_sp
,
1238 const struct net_device
*ul_dev
,
1239 enum mlxsw_sp_l3proto ul_proto
,
1240 union mlxsw_sp_l3addr ul_dip
)
1242 struct mlxsw_sp_ipip_entry
*ipip_entry
;
1244 list_for_each_entry(ipip_entry
, &mlxsw_sp
->router
->ipip_list
,
1246 if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp
, ul_dev
,
1254 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp
*mlxsw_sp
,
1255 const struct net_device
*dev
,
1256 enum mlxsw_sp_ipip_type
*p_type
)
1258 struct mlxsw_sp_router
*router
= mlxsw_sp
->router
;
1259 const struct mlxsw_sp_ipip_ops
*ipip_ops
;
1260 enum mlxsw_sp_ipip_type ipipt
;
1262 for (ipipt
= 0; ipipt
< MLXSW_SP_IPIP_TYPE_MAX
; ++ipipt
) {
1263 ipip_ops
= router
->ipip_ops_arr
[ipipt
];
1264 if (dev
->type
== ipip_ops
->dev_type
) {
1273 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp
*mlxsw_sp
,
1274 const struct net_device
*dev
)
1276 return mlxsw_sp_netdev_ipip_type(mlxsw_sp
, dev
, NULL
);
1279 static struct mlxsw_sp_ipip_entry
*
1280 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp
*mlxsw_sp
,
1281 const struct net_device
*ol_dev
)
1283 struct mlxsw_sp_ipip_entry
*ipip_entry
;
1285 list_for_each_entry(ipip_entry
, &mlxsw_sp
->router
->ipip_list
,
1287 if (ipip_entry
->ol_dev
== ol_dev
)
1293 static struct mlxsw_sp_ipip_entry
*
1294 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp
*mlxsw_sp
,
1295 const struct net_device
*ul_dev
,
1296 struct mlxsw_sp_ipip_entry
*start
)
1298 struct mlxsw_sp_ipip_entry
*ipip_entry
;
1300 ipip_entry
= list_prepare_entry(start
, &mlxsw_sp
->router
->ipip_list
,
1302 list_for_each_entry_continue(ipip_entry
, &mlxsw_sp
->router
->ipip_list
,
1304 struct net_device
*ipip_ul_dev
=
1305 __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry
->ol_dev
);
1307 if (ipip_ul_dev
== ul_dev
)
1314 bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp
*mlxsw_sp
,
1315 const struct net_device
*dev
)
1317 return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp
, dev
, NULL
);
1320 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp
*mlxsw_sp
,
1321 const struct net_device
*ol_dev
,
1322 enum mlxsw_sp_ipip_type ipipt
)
1324 const struct mlxsw_sp_ipip_ops
*ops
1325 = mlxsw_sp
->router
->ipip_ops_arr
[ipipt
];
1327 /* For deciding whether decap should be offloaded, we don't care about
1328 * overlay protocol, so ask whether either one is supported.
1330 return ops
->can_offload(mlxsw_sp
, ol_dev
, MLXSW_SP_L3_PROTO_IPV4
) ||
1331 ops
->can_offload(mlxsw_sp
, ol_dev
, MLXSW_SP_L3_PROTO_IPV6
);
1334 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp
*mlxsw_sp
,
1335 struct net_device
*ol_dev
)
1337 struct mlxsw_sp_ipip_entry
*ipip_entry
;
1338 enum mlxsw_sp_l3proto ul_proto
;
1339 enum mlxsw_sp_ipip_type ipipt
;
1340 union mlxsw_sp_l3addr saddr
;
1343 mlxsw_sp_netdev_ipip_type(mlxsw_sp
, ol_dev
, &ipipt
);
1344 if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp
, ol_dev
, ipipt
)) {
1345 ul_tb_id
= mlxsw_sp_ipip_dev_ul_tb_id(ol_dev
);
1346 ul_proto
= mlxsw_sp
->router
->ipip_ops_arr
[ipipt
]->ul_proto
;
1347 saddr
= mlxsw_sp_ipip_netdev_saddr(ul_proto
, ol_dev
);
1348 if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp
, ul_proto
,
1351 ipip_entry
= mlxsw_sp_ipip_entry_create(mlxsw_sp
, ipipt
,
1353 if (IS_ERR(ipip_entry
))
1354 return PTR_ERR(ipip_entry
);
1361 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp
*mlxsw_sp
,
1362 struct net_device
*ol_dev
)
1364 struct mlxsw_sp_ipip_entry
*ipip_entry
;
1366 ipip_entry
= mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp
, ol_dev
);
1368 mlxsw_sp_ipip_entry_destroy(mlxsw_sp
, ipip_entry
);
1372 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp
*mlxsw_sp
,
1373 struct mlxsw_sp_ipip_entry
*ipip_entry
)
1375 struct mlxsw_sp_fib_entry
*decap_fib_entry
;
1377 decap_fib_entry
= mlxsw_sp_ipip_entry_find_decap(mlxsw_sp
, ipip_entry
);
1378 if (decap_fib_entry
)
1379 mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp
, ipip_entry
,
1383 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp
*mlxsw_sp
,
1384 struct net_device
*ol_dev
)
1386 struct mlxsw_sp_ipip_entry
*ipip_entry
;
1388 ipip_entry
= mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp
, ol_dev
);
1390 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp
, ipip_entry
);
1394 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp
*mlxsw_sp
,
1395 struct mlxsw_sp_ipip_entry
*ipip_entry
)
1397 if (ipip_entry
->decap_fib_entry
)
1398 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp
, ipip_entry
);
1401 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp
*mlxsw_sp
,
1402 struct net_device
*ol_dev
)
1404 struct mlxsw_sp_ipip_entry
*ipip_entry
;
1406 ipip_entry
= mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp
, ol_dev
);
1408 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp
, ipip_entry
);
1411 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp
*mlxsw_sp
,
1412 struct mlxsw_sp_rif
*old_rif
,
1413 struct mlxsw_sp_rif
*new_rif
);
1415 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp
*mlxsw_sp
,
1416 struct mlxsw_sp_ipip_entry
*ipip_entry
,
1418 struct netlink_ext_ack
*extack
)
1420 struct mlxsw_sp_rif_ipip_lb
*old_lb_rif
= ipip_entry
->ol_lb
;
1421 struct mlxsw_sp_rif_ipip_lb
*new_lb_rif
;
1423 new_lb_rif
= mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp
,
1427 if (IS_ERR(new_lb_rif
))
1428 return PTR_ERR(new_lb_rif
);
1429 ipip_entry
->ol_lb
= new_lb_rif
;
1432 mlxsw_sp_nexthop_rif_migrate(mlxsw_sp
, &old_lb_rif
->common
,
1433 &new_lb_rif
->common
);
1435 mlxsw_sp_rif_destroy(&old_lb_rif
->common
);
1440 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp
*mlxsw_sp
,
1441 struct mlxsw_sp_rif
*rif
);
1444 * Update the offload related to an IPIP entry. This always updates decap, and
1445 * in addition to that it also:
1446 * @recreate_loopback: recreates the associated loopback RIF
1447 * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1448 * relevant when recreate_loopback is true.
1449 * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1450 * is only relevant when recreate_loopback is false.
1452 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp
*mlxsw_sp
,
1453 struct mlxsw_sp_ipip_entry
*ipip_entry
,
1454 bool recreate_loopback
,
1456 bool update_nexthops
,
1457 struct netlink_ext_ack
*extack
)
1461 /* RIFs can't be edited, so to update loopback, we need to destroy and
1462 * recreate it. That creates a window of opportunity where RALUE and
1463 * RATR registers end up referencing a RIF that's already gone. RATRs
1464 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1465 * of RALUE, demote the decap route back.
1467 if (ipip_entry
->decap_fib_entry
)
1468 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp
, ipip_entry
);
1470 if (recreate_loopback
) {
1471 err
= mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp
, ipip_entry
,
1472 keep_encap
, extack
);
1475 } else if (update_nexthops
) {
1476 mlxsw_sp_nexthop_rif_update(mlxsw_sp
,
1477 &ipip_entry
->ol_lb
->common
);
1480 if (ipip_entry
->ol_dev
->flags
& IFF_UP
)
1481 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp
, ipip_entry
);
1486 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp
*mlxsw_sp
,
1487 struct net_device
*ol_dev
,
1488 struct netlink_ext_ack
*extack
)
1490 struct mlxsw_sp_ipip_entry
*ipip_entry
=
1491 mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp
, ol_dev
);
1492 enum mlxsw_sp_l3proto ul_proto
;
1493 union mlxsw_sp_l3addr saddr
;
1499 /* For flat configuration cases, moving overlay to a different VRF might
1500 * cause local address conflict, and the conflicting tunnels need to be
1503 ul_tb_id
= mlxsw_sp_ipip_dev_ul_tb_id(ol_dev
);
1504 ul_proto
= mlxsw_sp
->router
->ipip_ops_arr
[ipip_entry
->ipipt
]->ul_proto
;
1505 saddr
= mlxsw_sp_ipip_netdev_saddr(ul_proto
, ol_dev
);
1506 if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp
, ul_proto
,
1509 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp
, ipip_entry
);
1513 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp
, ipip_entry
,
1514 true, false, false, extack
);
1518 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp
*mlxsw_sp
,
1519 struct mlxsw_sp_ipip_entry
*ipip_entry
,
1520 struct net_device
*ul_dev
,
1521 struct netlink_ext_ack
*extack
)
1523 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp
, ipip_entry
,
1524 true, true, false, extack
);
1528 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp
*mlxsw_sp
,
1529 struct mlxsw_sp_ipip_entry
*ipip_entry
,
1530 struct net_device
*ul_dev
)
1532 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp
, ipip_entry
,
1533 false, false, true, NULL
);
1537 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp
*mlxsw_sp
,
1538 struct mlxsw_sp_ipip_entry
*ipip_entry
,
1539 struct net_device
*ul_dev
)
1541 /* A down underlay device causes encapsulated packets to not be
1542 * forwarded, but decap still works. So refresh next hops without
1543 * touching anything else.
1545 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp
, ipip_entry
,
1546 false, false, true, NULL
);
1550 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp
*mlxsw_sp
,
1551 struct net_device
*ol_dev
,
1552 struct netlink_ext_ack
*extack
)
1554 const struct mlxsw_sp_ipip_ops
*ipip_ops
;
1555 struct mlxsw_sp_ipip_entry
*ipip_entry
;
1558 ipip_entry
= mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp
, ol_dev
);
1560 /* A change might make a tunnel eligible for offloading, but
1561 * that is currently not implemented. What falls to slow path
1566 /* A change might make a tunnel not eligible for offloading. */
1567 if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp
, ol_dev
,
1568 ipip_entry
->ipipt
)) {
1569 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp
, ipip_entry
);
1573 ipip_ops
= mlxsw_sp
->router
->ipip_ops_arr
[ipip_entry
->ipipt
];
1574 err
= ipip_ops
->ol_netdev_change(mlxsw_sp
, ipip_entry
, extack
);
1578 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp
*mlxsw_sp
,
1579 struct mlxsw_sp_ipip_entry
*ipip_entry
)
1581 struct net_device
*ol_dev
= ipip_entry
->ol_dev
;
1583 if (ol_dev
->flags
& IFF_UP
)
1584 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp
, ipip_entry
);
1585 mlxsw_sp_ipip_entry_destroy(mlxsw_sp
, ipip_entry
);
1588 /* The configuration where several tunnels have the same local address in the
1589 * same underlay table needs special treatment in the HW. That is currently not
1590 * implemented in the driver. This function finds and demotes the first tunnel
1591 * with a given source address, except the one passed in in the argument
1595 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp
*mlxsw_sp
,
1596 enum mlxsw_sp_l3proto ul_proto
,
1597 union mlxsw_sp_l3addr saddr
,
1599 const struct mlxsw_sp_ipip_entry
*except
)
1601 struct mlxsw_sp_ipip_entry
*ipip_entry
, *tmp
;
1603 list_for_each_entry_safe(ipip_entry
, tmp
, &mlxsw_sp
->router
->ipip_list
,
1605 if (ipip_entry
!= except
&&
1606 mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp
, ul_proto
, saddr
,
1607 ul_tb_id
, ipip_entry
)) {
1608 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp
, ipip_entry
);
1616 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp
*mlxsw_sp
,
1617 struct net_device
*ul_dev
)
1619 struct mlxsw_sp_ipip_entry
*ipip_entry
, *tmp
;
1621 list_for_each_entry_safe(ipip_entry
, tmp
, &mlxsw_sp
->router
->ipip_list
,
1623 struct net_device
*ipip_ul_dev
=
1624 __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry
->ol_dev
);
1626 if (ipip_ul_dev
== ul_dev
)
1627 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp
, ipip_entry
);
1631 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp
*mlxsw_sp
,
1632 struct net_device
*ol_dev
,
1633 unsigned long event
,
1634 struct netdev_notifier_info
*info
)
1636 struct netdev_notifier_changeupper_info
*chup
;
1637 struct netlink_ext_ack
*extack
;
1640 case NETDEV_REGISTER
:
1641 return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp
, ol_dev
);
1642 case NETDEV_UNREGISTER
:
1643 mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp
, ol_dev
);
1646 mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp
, ol_dev
);
1649 mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp
, ol_dev
);
1651 case NETDEV_CHANGEUPPER
:
1652 chup
= container_of(info
, typeof(*chup
), info
);
1653 extack
= info
->extack
;
1654 if (netif_is_l3_master(chup
->upper_dev
))
1655 return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp
,
1660 extack
= info
->extack
;
1661 return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp
,
1668 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp
*mlxsw_sp
,
1669 struct mlxsw_sp_ipip_entry
*ipip_entry
,
1670 struct net_device
*ul_dev
,
1671 unsigned long event
,
1672 struct netdev_notifier_info
*info
)
1674 struct netdev_notifier_changeupper_info
*chup
;
1675 struct netlink_ext_ack
*extack
;
1678 case NETDEV_CHANGEUPPER
:
1679 chup
= container_of(info
, typeof(*chup
), info
);
1680 extack
= info
->extack
;
1681 if (netif_is_l3_master(chup
->upper_dev
))
1682 return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp
,
1689 return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp
, ipip_entry
,
1692 return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp
,
1700 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp
*mlxsw_sp
,
1701 struct net_device
*ul_dev
,
1702 unsigned long event
,
1703 struct netdev_notifier_info
*info
)
1705 struct mlxsw_sp_ipip_entry
*ipip_entry
= NULL
;
1708 while ((ipip_entry
= mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp
,
1711 err
= __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp
, ipip_entry
,
1712 ul_dev
, event
, info
);
1714 mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp
,
1723 struct mlxsw_sp_neigh_key
{
1724 struct neighbour
*n
;
1727 struct mlxsw_sp_neigh_entry
{
1728 struct list_head rif_list_node
;
1729 struct rhash_head ht_node
;
1730 struct mlxsw_sp_neigh_key key
;
1733 unsigned char ha
[ETH_ALEN
];
1734 struct list_head nexthop_list
; /* list of nexthops using
1737 struct list_head nexthop_neighs_list_node
;
1738 unsigned int counter_index
;
1742 static const struct rhashtable_params mlxsw_sp_neigh_ht_params
= {
1743 .key_offset
= offsetof(struct mlxsw_sp_neigh_entry
, key
),
1744 .head_offset
= offsetof(struct mlxsw_sp_neigh_entry
, ht_node
),
1745 .key_len
= sizeof(struct mlxsw_sp_neigh_key
),
1748 struct mlxsw_sp_neigh_entry
*
1749 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif
*rif
,
1750 struct mlxsw_sp_neigh_entry
*neigh_entry
)
1753 if (list_empty(&rif
->neigh_list
))
1756 return list_first_entry(&rif
->neigh_list
,
1757 typeof(*neigh_entry
),
1760 if (list_is_last(&neigh_entry
->rif_list_node
, &rif
->neigh_list
))
1762 return list_next_entry(neigh_entry
, rif_list_node
);
1765 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry
*neigh_entry
)
1767 return neigh_entry
->key
.n
->tbl
->family
;
1771 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry
*neigh_entry
)
1773 return neigh_entry
->ha
;
1776 u32
mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry
*neigh_entry
)
1778 struct neighbour
*n
;
1780 n
= neigh_entry
->key
.n
;
1781 return ntohl(*((__be32
*) n
->primary_key
));
1785 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry
*neigh_entry
)
1787 struct neighbour
*n
;
1789 n
= neigh_entry
->key
.n
;
1790 return (struct in6_addr
*) &n
->primary_key
;
1793 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp
*mlxsw_sp
,
1794 struct mlxsw_sp_neigh_entry
*neigh_entry
,
1797 if (!neigh_entry
->counter_valid
)
1800 return mlxsw_sp_flow_counter_get(mlxsw_sp
, neigh_entry
->counter_index
,
1804 static struct mlxsw_sp_neigh_entry
*
1805 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp
*mlxsw_sp
, struct neighbour
*n
,
1808 struct mlxsw_sp_neigh_entry
*neigh_entry
;
1810 neigh_entry
= kzalloc(sizeof(*neigh_entry
), GFP_KERNEL
);
1814 neigh_entry
->key
.n
= n
;
1815 neigh_entry
->rif
= rif
;
1816 INIT_LIST_HEAD(&neigh_entry
->nexthop_list
);
1821 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry
*neigh_entry
)
1827 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp
*mlxsw_sp
,
1828 struct mlxsw_sp_neigh_entry
*neigh_entry
)
1830 return rhashtable_insert_fast(&mlxsw_sp
->router
->neigh_ht
,
1831 &neigh_entry
->ht_node
,
1832 mlxsw_sp_neigh_ht_params
);
1836 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp
*mlxsw_sp
,
1837 struct mlxsw_sp_neigh_entry
*neigh_entry
)
1839 rhashtable_remove_fast(&mlxsw_sp
->router
->neigh_ht
,
1840 &neigh_entry
->ht_node
,
1841 mlxsw_sp_neigh_ht_params
);
1845 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp
*mlxsw_sp
,
1846 struct mlxsw_sp_neigh_entry
*neigh_entry
)
1848 struct devlink
*devlink
;
1849 const char *table_name
;
1851 switch (mlxsw_sp_neigh_entry_type(neigh_entry
)) {
1853 table_name
= MLXSW_SP_DPIPE_TABLE_NAME_HOST4
;
1856 table_name
= MLXSW_SP_DPIPE_TABLE_NAME_HOST6
;
1863 devlink
= priv_to_devlink(mlxsw_sp
->core
);
1864 return devlink_dpipe_table_counter_enabled(devlink
, table_name
);
1868 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp
*mlxsw_sp
,
1869 struct mlxsw_sp_neigh_entry
*neigh_entry
)
1871 if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp
, neigh_entry
))
1874 if (mlxsw_sp_flow_counter_alloc(mlxsw_sp
, &neigh_entry
->counter_index
))
1877 neigh_entry
->counter_valid
= true;
1881 mlxsw_sp_neigh_counter_free(struct mlxsw_sp
*mlxsw_sp
,
1882 struct mlxsw_sp_neigh_entry
*neigh_entry
)
1884 if (!neigh_entry
->counter_valid
)
1886 mlxsw_sp_flow_counter_free(mlxsw_sp
,
1887 neigh_entry
->counter_index
);
1888 neigh_entry
->counter_valid
= false;
1891 static struct mlxsw_sp_neigh_entry
*
1892 mlxsw_sp_neigh_entry_create(struct mlxsw_sp
*mlxsw_sp
, struct neighbour
*n
)
1894 struct mlxsw_sp_neigh_entry
*neigh_entry
;
1895 struct mlxsw_sp_rif
*rif
;
1898 rif
= mlxsw_sp_rif_find_by_dev(mlxsw_sp
, n
->dev
);
1900 return ERR_PTR(-EINVAL
);
1902 neigh_entry
= mlxsw_sp_neigh_entry_alloc(mlxsw_sp
, n
, rif
->rif_index
);
1904 return ERR_PTR(-ENOMEM
);
1906 err
= mlxsw_sp_neigh_entry_insert(mlxsw_sp
, neigh_entry
);
1908 goto err_neigh_entry_insert
;
1910 mlxsw_sp_neigh_counter_alloc(mlxsw_sp
, neigh_entry
);
1911 list_add(&neigh_entry
->rif_list_node
, &rif
->neigh_list
);
1915 err_neigh_entry_insert
:
1916 mlxsw_sp_neigh_entry_free(neigh_entry
);
1917 return ERR_PTR(err
);
1921 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp
*mlxsw_sp
,
1922 struct mlxsw_sp_neigh_entry
*neigh_entry
)
1924 list_del(&neigh_entry
->rif_list_node
);
1925 mlxsw_sp_neigh_counter_free(mlxsw_sp
, neigh_entry
);
1926 mlxsw_sp_neigh_entry_remove(mlxsw_sp
, neigh_entry
);
1927 mlxsw_sp_neigh_entry_free(neigh_entry
);
1930 static struct mlxsw_sp_neigh_entry
*
1931 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp
*mlxsw_sp
, struct neighbour
*n
)
1933 struct mlxsw_sp_neigh_key key
;
1936 return rhashtable_lookup_fast(&mlxsw_sp
->router
->neigh_ht
,
1937 &key
, mlxsw_sp_neigh_ht_params
);
1941 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp
*mlxsw_sp
)
1943 unsigned long interval
;
1945 #if IS_ENABLED(CONFIG_IPV6)
1946 interval
= min_t(unsigned long,
1947 NEIGH_VAR(&arp_tbl
.parms
, DELAY_PROBE_TIME
),
1948 NEIGH_VAR(&nd_tbl
.parms
, DELAY_PROBE_TIME
));
1950 interval
= NEIGH_VAR(&arp_tbl
.parms
, DELAY_PROBE_TIME
);
1952 mlxsw_sp
->router
->neighs_update
.interval
= jiffies_to_msecs(interval
);
1955 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp
*mlxsw_sp
,
1959 struct net_device
*dev
;
1960 struct neighbour
*n
;
1965 mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl
, ent_index
, &rif
, &dip
);
1967 if (!mlxsw_sp
->router
->rifs
[rif
]) {
1968 dev_err_ratelimited(mlxsw_sp
->bus_info
->dev
, "Incorrect RIF in neighbour entry\n");
1973 dev
= mlxsw_sp
->router
->rifs
[rif
]->dev
;
1974 n
= neigh_lookup(&arp_tbl
, &dipn
, dev
);
1978 netdev_dbg(dev
, "Updating neighbour with IP=%pI4h\n", &dip
);
1979 neigh_event_send(n
, NULL
);
1983 #if IS_ENABLED(CONFIG_IPV6)
1984 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp
*mlxsw_sp
,
1988 struct net_device
*dev
;
1989 struct neighbour
*n
;
1990 struct in6_addr dip
;
1993 mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl
, rec_index
, &rif
,
1996 if (!mlxsw_sp
->router
->rifs
[rif
]) {
1997 dev_err_ratelimited(mlxsw_sp
->bus_info
->dev
, "Incorrect RIF in neighbour entry\n");
2001 dev
= mlxsw_sp
->router
->rifs
[rif
]->dev
;
2002 n
= neigh_lookup(&nd_tbl
, &dip
, dev
);
2006 netdev_dbg(dev
, "Updating neighbour with IP=%pI6c\n", &dip
);
2007 neigh_event_send(n
, NULL
);
2011 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp
*mlxsw_sp
,
2018 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp
*mlxsw_sp
,
2025 num_entries
= mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl
,
2027 /* Hardware starts counting at 0, so add 1. */
2030 /* Each record consists of several neighbour entries. */
2031 for (i
= 0; i
< num_entries
; i
++) {
2034 ent_index
= rec_index
* MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC
+ i
;
2035 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp
, rauhtd_pl
,
2041 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp
*mlxsw_sp
,
2045 /* One record contains one entry. */
2046 mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp
, rauhtd_pl
,
2050 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp
*mlxsw_sp
,
2051 char *rauhtd_pl
, int rec_index
)
2053 switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl
, rec_index
)) {
2054 case MLXSW_REG_RAUHTD_TYPE_IPV4
:
2055 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp
, rauhtd_pl
,
2058 case MLXSW_REG_RAUHTD_TYPE_IPV6
:
2059 mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp
, rauhtd_pl
,
2065 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl
)
2067 u8 num_rec
, last_rec_index
, num_entries
;
2069 num_rec
= mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl
);
2070 last_rec_index
= num_rec
- 1;
2072 if (num_rec
< MLXSW_REG_RAUHTD_REC_MAX_NUM
)
2074 if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl
, last_rec_index
) ==
2075 MLXSW_REG_RAUHTD_TYPE_IPV6
)
2078 num_entries
= mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl
,
2080 if (++num_entries
== MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC
)
2086 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp
*mlxsw_sp
,
2088 enum mlxsw_reg_rauhtd_type type
)
2093 /* Make sure the neighbour's netdev isn't removed in the
2098 mlxsw_reg_rauhtd_pack(rauhtd_pl
, type
);
2099 err
= mlxsw_reg_query(mlxsw_sp
->core
, MLXSW_REG(rauhtd
),
2102 dev_err_ratelimited(mlxsw_sp
->bus_info
->dev
, "Failed to dump neighbour table\n");
2105 num_rec
= mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl
);
2106 for (i
= 0; i
< num_rec
; i
++)
2107 mlxsw_sp_router_neigh_rec_process(mlxsw_sp
, rauhtd_pl
,
2109 } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl
));
2115 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp
*mlxsw_sp
)
2117 enum mlxsw_reg_rauhtd_type type
;
2121 rauhtd_pl
= kmalloc(MLXSW_REG_RAUHTD_LEN
, GFP_KERNEL
);
2125 type
= MLXSW_REG_RAUHTD_TYPE_IPV4
;
2126 err
= __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp
, rauhtd_pl
, type
);
2130 type
= MLXSW_REG_RAUHTD_TYPE_IPV6
;
2131 err
= __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp
, rauhtd_pl
, type
);
2137 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp
*mlxsw_sp
)
2139 struct mlxsw_sp_neigh_entry
*neigh_entry
;
2141 /* Take RTNL mutex here to prevent lists from changes */
2143 list_for_each_entry(neigh_entry
, &mlxsw_sp
->router
->nexthop_neighs_list
,
2144 nexthop_neighs_list_node
)
2145 /* If this neigh have nexthops, make the kernel think this neigh
2146 * is active regardless of the traffic.
2148 neigh_event_send(neigh_entry
->key
.n
, NULL
);
2153 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp
*mlxsw_sp
)
2155 unsigned long interval
= mlxsw_sp
->router
->neighs_update
.interval
;
2157 mlxsw_core_schedule_dw(&mlxsw_sp
->router
->neighs_update
.dw
,
2158 msecs_to_jiffies(interval
));
2161 static void mlxsw_sp_router_neighs_update_work(struct work_struct
*work
)
2163 struct mlxsw_sp_router
*router
;
2166 router
= container_of(work
, struct mlxsw_sp_router
,
2167 neighs_update
.dw
.work
);
2168 err
= mlxsw_sp_router_neighs_update_rauhtd(router
->mlxsw_sp
);
2170 dev_err(router
->mlxsw_sp
->bus_info
->dev
, "Could not update kernel for neigh activity");
2172 mlxsw_sp_router_neighs_update_nh(router
->mlxsw_sp
);
2174 mlxsw_sp_router_neighs_update_work_schedule(router
->mlxsw_sp
);
2177 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct
*work
)
2179 struct mlxsw_sp_neigh_entry
*neigh_entry
;
2180 struct mlxsw_sp_router
*router
;
2182 router
= container_of(work
, struct mlxsw_sp_router
,
2183 nexthop_probe_dw
.work
);
2184 /* Iterate over nexthop neighbours, find those who are unresolved and
2185 * send arp on them. This solves the chicken-egg problem when
2186 * the nexthop wouldn't get offloaded until the neighbor is resolved
2187 * but it wouldn't get resolved ever in case traffic is flowing in HW
2188 * using different nexthop.
2190 * Take RTNL mutex here to prevent lists from changes.
2193 list_for_each_entry(neigh_entry
, &router
->nexthop_neighs_list
,
2194 nexthop_neighs_list_node
)
2195 if (!neigh_entry
->connected
)
2196 neigh_event_send(neigh_entry
->key
.n
, NULL
);
2199 mlxsw_core_schedule_dw(&router
->nexthop_probe_dw
,
2200 MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL
);
2204 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp
*mlxsw_sp
,
2205 struct mlxsw_sp_neigh_entry
*neigh_entry
,
2208 static enum mlxsw_reg_rauht_op
mlxsw_sp_rauht_op(bool adding
)
2210 return adding
? MLXSW_REG_RAUHT_OP_WRITE_ADD
:
2211 MLXSW_REG_RAUHT_OP_WRITE_DELETE
;
2215 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp
*mlxsw_sp
,
2216 struct mlxsw_sp_neigh_entry
*neigh_entry
,
2217 enum mlxsw_reg_rauht_op op
)
2219 struct neighbour
*n
= neigh_entry
->key
.n
;
2220 u32 dip
= ntohl(*((__be32
*) n
->primary_key
));
2221 char rauht_pl
[MLXSW_REG_RAUHT_LEN
];
2223 mlxsw_reg_rauht_pack4(rauht_pl
, op
, neigh_entry
->rif
, neigh_entry
->ha
,
2225 if (neigh_entry
->counter_valid
)
2226 mlxsw_reg_rauht_pack_counter(rauht_pl
,
2227 neigh_entry
->counter_index
);
2228 mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(rauht
), rauht_pl
);
2232 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp
*mlxsw_sp
,
2233 struct mlxsw_sp_neigh_entry
*neigh_entry
,
2234 enum mlxsw_reg_rauht_op op
)
2236 struct neighbour
*n
= neigh_entry
->key
.n
;
2237 char rauht_pl
[MLXSW_REG_RAUHT_LEN
];
2238 const char *dip
= n
->primary_key
;
2240 mlxsw_reg_rauht_pack6(rauht_pl
, op
, neigh_entry
->rif
, neigh_entry
->ha
,
2242 if (neigh_entry
->counter_valid
)
2243 mlxsw_reg_rauht_pack_counter(rauht_pl
,
2244 neigh_entry
->counter_index
);
2245 mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(rauht
), rauht_pl
);
2248 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry
*neigh_entry
)
2250 struct neighbour
*n
= neigh_entry
->key
.n
;
2252 /* Packets with a link-local destination address are trapped
2253 * after LPM lookup and never reach the neighbour table, so
2254 * there is no need to program such neighbours to the device.
2256 if (ipv6_addr_type((struct in6_addr
*) &n
->primary_key
) &
2257 IPV6_ADDR_LINKLOCAL
)
2263 mlxsw_sp_neigh_entry_update(struct mlxsw_sp
*mlxsw_sp
,
2264 struct mlxsw_sp_neigh_entry
*neigh_entry
,
2267 if (!adding
&& !neigh_entry
->connected
)
2269 neigh_entry
->connected
= adding
;
2270 if (neigh_entry
->key
.n
->tbl
->family
== AF_INET
) {
2271 mlxsw_sp_router_neigh_entry_op4(mlxsw_sp
, neigh_entry
,
2272 mlxsw_sp_rauht_op(adding
));
2273 } else if (neigh_entry
->key
.n
->tbl
->family
== AF_INET6
) {
2274 if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry
))
2276 mlxsw_sp_router_neigh_entry_op6(mlxsw_sp
, neigh_entry
,
2277 mlxsw_sp_rauht_op(adding
));
2284 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp
*mlxsw_sp
,
2285 struct mlxsw_sp_neigh_entry
*neigh_entry
,
2289 mlxsw_sp_neigh_counter_alloc(mlxsw_sp
, neigh_entry
);
2291 mlxsw_sp_neigh_counter_free(mlxsw_sp
, neigh_entry
);
2292 mlxsw_sp_neigh_entry_update(mlxsw_sp
, neigh_entry
, true);
2295 struct mlxsw_sp_netevent_work
{
2296 struct work_struct work
;
2297 struct mlxsw_sp
*mlxsw_sp
;
2298 struct neighbour
*n
;
2301 static void mlxsw_sp_router_neigh_event_work(struct work_struct
*work
)
2303 struct mlxsw_sp_netevent_work
*net_work
=
2304 container_of(work
, struct mlxsw_sp_netevent_work
, work
);
2305 struct mlxsw_sp
*mlxsw_sp
= net_work
->mlxsw_sp
;
2306 struct mlxsw_sp_neigh_entry
*neigh_entry
;
2307 struct neighbour
*n
= net_work
->n
;
2308 unsigned char ha
[ETH_ALEN
];
2309 bool entry_connected
;
2312 /* If these parameters are changed after we release the lock,
2313 * then we are guaranteed to receive another event letting us
2316 read_lock_bh(&n
->lock
);
2317 memcpy(ha
, n
->ha
, ETH_ALEN
);
2318 nud_state
= n
->nud_state
;
2320 read_unlock_bh(&n
->lock
);
2323 entry_connected
= nud_state
& NUD_VALID
&& !dead
;
2324 neigh_entry
= mlxsw_sp_neigh_entry_lookup(mlxsw_sp
, n
);
2325 if (!entry_connected
&& !neigh_entry
)
2328 neigh_entry
= mlxsw_sp_neigh_entry_create(mlxsw_sp
, n
);
2329 if (IS_ERR(neigh_entry
))
2333 memcpy(neigh_entry
->ha
, ha
, ETH_ALEN
);
2334 mlxsw_sp_neigh_entry_update(mlxsw_sp
, neigh_entry
, entry_connected
);
2335 mlxsw_sp_nexthop_neigh_update(mlxsw_sp
, neigh_entry
, !entry_connected
);
2337 if (!neigh_entry
->connected
&& list_empty(&neigh_entry
->nexthop_list
))
2338 mlxsw_sp_neigh_entry_destroy(mlxsw_sp
, neigh_entry
);
2346 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp
*mlxsw_sp
);
2348 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct
*work
)
2350 struct mlxsw_sp_netevent_work
*net_work
=
2351 container_of(work
, struct mlxsw_sp_netevent_work
, work
);
2352 struct mlxsw_sp
*mlxsw_sp
= net_work
->mlxsw_sp
;
2354 mlxsw_sp_mp_hash_init(mlxsw_sp
);
2358 static int mlxsw_sp_router_netevent_event(struct notifier_block
*nb
,
2359 unsigned long event
, void *ptr
)
2361 struct mlxsw_sp_netevent_work
*net_work
;
2362 struct mlxsw_sp_port
*mlxsw_sp_port
;
2363 struct mlxsw_sp_router
*router
;
2364 struct mlxsw_sp
*mlxsw_sp
;
2365 unsigned long interval
;
2366 struct neigh_parms
*p
;
2367 struct neighbour
*n
;
2371 case NETEVENT_DELAY_PROBE_TIME_UPDATE
:
2374 /* We don't care about changes in the default table. */
2375 if (!p
->dev
|| (p
->tbl
->family
!= AF_INET
&&
2376 p
->tbl
->family
!= AF_INET6
))
2379 /* We are in atomic context and can't take RTNL mutex,
2380 * so use RCU variant to walk the device chain.
2382 mlxsw_sp_port
= mlxsw_sp_port_lower_dev_hold(p
->dev
);
2386 mlxsw_sp
= mlxsw_sp_port
->mlxsw_sp
;
2387 interval
= jiffies_to_msecs(NEIGH_VAR(p
, DELAY_PROBE_TIME
));
2388 mlxsw_sp
->router
->neighs_update
.interval
= interval
;
2390 mlxsw_sp_port_dev_put(mlxsw_sp_port
);
2392 case NETEVENT_NEIGH_UPDATE
:
2395 if (n
->tbl
->family
!= AF_INET
&& n
->tbl
->family
!= AF_INET6
)
2398 mlxsw_sp_port
= mlxsw_sp_port_lower_dev_hold(n
->dev
);
2402 net_work
= kzalloc(sizeof(*net_work
), GFP_ATOMIC
);
2404 mlxsw_sp_port_dev_put(mlxsw_sp_port
);
2408 INIT_WORK(&net_work
->work
, mlxsw_sp_router_neigh_event_work
);
2409 net_work
->mlxsw_sp
= mlxsw_sp_port
->mlxsw_sp
;
2412 /* Take a reference to ensure the neighbour won't be
2413 * destructed until we drop the reference in delayed
2417 mlxsw_core_schedule_work(&net_work
->work
);
2418 mlxsw_sp_port_dev_put(mlxsw_sp_port
);
2420 case NETEVENT_MULTIPATH_HASH_UPDATE
:
2423 if (!net_eq(net
, &init_net
))
2426 net_work
= kzalloc(sizeof(*net_work
), GFP_ATOMIC
);
2430 router
= container_of(nb
, struct mlxsw_sp_router
, netevent_nb
);
2431 INIT_WORK(&net_work
->work
, mlxsw_sp_router_mp_hash_event_work
);
2432 net_work
->mlxsw_sp
= router
->mlxsw_sp
;
2433 mlxsw_core_schedule_work(&net_work
->work
);
2440 static int mlxsw_sp_neigh_init(struct mlxsw_sp
*mlxsw_sp
)
2444 err
= rhashtable_init(&mlxsw_sp
->router
->neigh_ht
,
2445 &mlxsw_sp_neigh_ht_params
);
2449 /* Initialize the polling interval according to the default
2452 mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp
);
2454 /* Create the delayed works for the activity_update */
2455 INIT_DELAYED_WORK(&mlxsw_sp
->router
->neighs_update
.dw
,
2456 mlxsw_sp_router_neighs_update_work
);
2457 INIT_DELAYED_WORK(&mlxsw_sp
->router
->nexthop_probe_dw
,
2458 mlxsw_sp_router_probe_unresolved_nexthops
);
2459 mlxsw_core_schedule_dw(&mlxsw_sp
->router
->neighs_update
.dw
, 0);
2460 mlxsw_core_schedule_dw(&mlxsw_sp
->router
->nexthop_probe_dw
, 0);
2464 static void mlxsw_sp_neigh_fini(struct mlxsw_sp
*mlxsw_sp
)
2466 cancel_delayed_work_sync(&mlxsw_sp
->router
->neighs_update
.dw
);
2467 cancel_delayed_work_sync(&mlxsw_sp
->router
->nexthop_probe_dw
);
2468 rhashtable_destroy(&mlxsw_sp
->router
->neigh_ht
);
2471 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp
*mlxsw_sp
,
2472 struct mlxsw_sp_rif
*rif
)
2474 struct mlxsw_sp_neigh_entry
*neigh_entry
, *tmp
;
2476 list_for_each_entry_safe(neigh_entry
, tmp
, &rif
->neigh_list
,
2478 mlxsw_sp_neigh_entry_update(mlxsw_sp
, neigh_entry
, false);
2479 mlxsw_sp_neigh_entry_destroy(mlxsw_sp
, neigh_entry
);
2483 enum mlxsw_sp_nexthop_type
{
2484 MLXSW_SP_NEXTHOP_TYPE_ETH
,
2485 MLXSW_SP_NEXTHOP_TYPE_IPIP
,
2488 struct mlxsw_sp_nexthop_key
{
2489 struct fib_nh
*fib_nh
;
2492 struct mlxsw_sp_nexthop
{
2493 struct list_head neigh_list_node
; /* member of neigh entry list */
2494 struct list_head rif_list_node
;
2495 struct list_head router_list_node
;
2496 struct mlxsw_sp_nexthop_group
*nh_grp
; /* pointer back to the group
2499 struct rhash_head ht_node
;
2500 struct mlxsw_sp_nexthop_key key
;
2501 unsigned char gw_addr
[sizeof(struct in6_addr
)];
2505 int num_adj_entries
;
2506 struct mlxsw_sp_rif
*rif
;
2507 u8 should_offload
:1, /* set indicates this neigh is connected and
2508 * should be put to KVD linear area of this group.
2510 offloaded
:1, /* set in case the neigh is actually put into
2511 * KVD linear area of this group.
2513 update
:1; /* set indicates that MAC of this neigh should be
2516 enum mlxsw_sp_nexthop_type type
;
2518 struct mlxsw_sp_neigh_entry
*neigh_entry
;
2519 struct mlxsw_sp_ipip_entry
*ipip_entry
;
2521 unsigned int counter_index
;
2525 struct mlxsw_sp_nexthop_group
{
2527 struct rhash_head ht_node
;
2528 struct list_head fib_list
; /* list of fib entries that use this group */
2529 struct neigh_table
*neigh_tbl
;
2530 u8 adj_index_valid
:1,
2531 gateway
:1; /* routes using the group use a gateway */
2535 int sum_norm_weight
;
2536 struct mlxsw_sp_nexthop nexthops
[0];
2537 #define nh_rif nexthops[0].rif
2540 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp
*mlxsw_sp
,
2541 struct mlxsw_sp_nexthop
*nh
)
2543 struct devlink
*devlink
;
2545 devlink
= priv_to_devlink(mlxsw_sp
->core
);
2546 if (!devlink_dpipe_table_counter_enabled(devlink
,
2547 MLXSW_SP_DPIPE_TABLE_NAME_ADJ
))
2550 if (mlxsw_sp_flow_counter_alloc(mlxsw_sp
, &nh
->counter_index
))
2553 nh
->counter_valid
= true;
2556 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp
*mlxsw_sp
,
2557 struct mlxsw_sp_nexthop
*nh
)
2559 if (!nh
->counter_valid
)
2561 mlxsw_sp_flow_counter_free(mlxsw_sp
, nh
->counter_index
);
2562 nh
->counter_valid
= false;
2565 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp
*mlxsw_sp
,
2566 struct mlxsw_sp_nexthop
*nh
, u64
*p_counter
)
2568 if (!nh
->counter_valid
)
2571 return mlxsw_sp_flow_counter_get(mlxsw_sp
, nh
->counter_index
,
2575 struct mlxsw_sp_nexthop
*mlxsw_sp_nexthop_next(struct mlxsw_sp_router
*router
,
2576 struct mlxsw_sp_nexthop
*nh
)
2579 if (list_empty(&router
->nexthop_list
))
2582 return list_first_entry(&router
->nexthop_list
,
2583 typeof(*nh
), router_list_node
);
2585 if (list_is_last(&nh
->router_list_node
, &router
->nexthop_list
))
2587 return list_next_entry(nh
, router_list_node
);
2590 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop
*nh
)
2592 return nh
->offloaded
;
2595 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop
*nh
)
2599 return nh
->neigh_entry
->ha
;
2602 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop
*nh
, u32
*p_adj_index
,
2603 u32
*p_adj_size
, u32
*p_adj_hash_index
)
2605 struct mlxsw_sp_nexthop_group
*nh_grp
= nh
->nh_grp
;
2606 u32 adj_hash_index
= 0;
2609 if (!nh
->offloaded
|| !nh_grp
->adj_index_valid
)
2612 *p_adj_index
= nh_grp
->adj_index
;
2613 *p_adj_size
= nh_grp
->ecmp_size
;
2615 for (i
= 0; i
< nh_grp
->count
; i
++) {
2616 struct mlxsw_sp_nexthop
*nh_iter
= &nh_grp
->nexthops
[i
];
2620 if (nh_iter
->offloaded
)
2621 adj_hash_index
+= nh_iter
->num_adj_entries
;
2624 *p_adj_hash_index
= adj_hash_index
;
2628 struct mlxsw_sp_rif
*mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop
*nh
)
2633 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop
*nh
)
2635 struct mlxsw_sp_nexthop_group
*nh_grp
= nh
->nh_grp
;
2638 for (i
= 0; i
< nh_grp
->count
; i
++) {
2639 struct mlxsw_sp_nexthop
*nh_iter
= &nh_grp
->nexthops
[i
];
2641 if (nh_iter
->type
== MLXSW_SP_NEXTHOP_TYPE_IPIP
)
2647 static struct fib_info
*
2648 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group
*nh_grp
)
2650 return nh_grp
->priv
;
2653 struct mlxsw_sp_nexthop_group_cmp_arg
{
2654 enum mlxsw_sp_l3proto proto
;
2656 struct fib_info
*fi
;
2657 struct mlxsw_sp_fib6_entry
*fib6_entry
;
2662 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group
*nh_grp
,
2663 const struct in6_addr
*gw
, int ifindex
,
2668 for (i
= 0; i
< nh_grp
->count
; i
++) {
2669 const struct mlxsw_sp_nexthop
*nh
;
2671 nh
= &nh_grp
->nexthops
[i
];
2672 if (nh
->ifindex
== ifindex
&& nh
->nh_weight
== weight
&&
2673 ipv6_addr_equal(gw
, (struct in6_addr
*) nh
->gw_addr
))
2681 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group
*nh_grp
,
2682 const struct mlxsw_sp_fib6_entry
*fib6_entry
)
2684 struct mlxsw_sp_rt6
*mlxsw_sp_rt6
;
2686 if (nh_grp
->count
!= fib6_entry
->nrt6
)
2689 list_for_each_entry(mlxsw_sp_rt6
, &fib6_entry
->rt6_list
, list
) {
2690 struct in6_addr
*gw
;
2691 int ifindex
, weight
;
2693 ifindex
= mlxsw_sp_rt6
->rt
->dst
.dev
->ifindex
;
2694 weight
= mlxsw_sp_rt6
->rt
->rt6i_nh_weight
;
2695 gw
= &mlxsw_sp_rt6
->rt
->rt6i_gateway
;
2696 if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp
, gw
, ifindex
,
2705 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg
*arg
, const void *ptr
)
2707 const struct mlxsw_sp_nexthop_group_cmp_arg
*cmp_arg
= arg
->key
;
2708 const struct mlxsw_sp_nexthop_group
*nh_grp
= ptr
;
2710 switch (cmp_arg
->proto
) {
2711 case MLXSW_SP_L3_PROTO_IPV4
:
2712 return cmp_arg
->fi
!= mlxsw_sp_nexthop4_group_fi(nh_grp
);
2713 case MLXSW_SP_L3_PROTO_IPV6
:
2714 return !mlxsw_sp_nexthop6_group_cmp(nh_grp
,
2715 cmp_arg
->fib6_entry
);
2723 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group
*nh_grp
)
2725 return nh_grp
->neigh_tbl
->family
;
2728 static u32
mlxsw_sp_nexthop_group_hash_obj(const void *data
, u32 len
, u32 seed
)
2730 const struct mlxsw_sp_nexthop_group
*nh_grp
= data
;
2731 const struct mlxsw_sp_nexthop
*nh
;
2732 struct fib_info
*fi
;
2736 switch (mlxsw_sp_nexthop_group_type(nh_grp
)) {
2738 fi
= mlxsw_sp_nexthop4_group_fi(nh_grp
);
2739 return jhash(&fi
, sizeof(fi
), seed
);
2741 val
= nh_grp
->count
;
2742 for (i
= 0; i
< nh_grp
->count
; i
++) {
2743 nh
= &nh_grp
->nexthops
[i
];
2746 return jhash(&val
, sizeof(val
), seed
);
2754 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry
*fib6_entry
, u32 seed
)
2756 unsigned int val
= fib6_entry
->nrt6
;
2757 struct mlxsw_sp_rt6
*mlxsw_sp_rt6
;
2758 struct net_device
*dev
;
2760 list_for_each_entry(mlxsw_sp_rt6
, &fib6_entry
->rt6_list
, list
) {
2761 dev
= mlxsw_sp_rt6
->rt
->dst
.dev
;
2762 val
^= dev
->ifindex
;
2765 return jhash(&val
, sizeof(val
), seed
);
2769 mlxsw_sp_nexthop_group_hash(const void *data
, u32 len
, u32 seed
)
2771 const struct mlxsw_sp_nexthop_group_cmp_arg
*cmp_arg
= data
;
2773 switch (cmp_arg
->proto
) {
2774 case MLXSW_SP_L3_PROTO_IPV4
:
2775 return jhash(&cmp_arg
->fi
, sizeof(cmp_arg
->fi
), seed
);
2776 case MLXSW_SP_L3_PROTO_IPV6
:
2777 return mlxsw_sp_nexthop6_group_hash(cmp_arg
->fib6_entry
, seed
);
2784 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params
= {
2785 .head_offset
= offsetof(struct mlxsw_sp_nexthop_group
, ht_node
),
2786 .hashfn
= mlxsw_sp_nexthop_group_hash
,
2787 .obj_hashfn
= mlxsw_sp_nexthop_group_hash_obj
,
2788 .obj_cmpfn
= mlxsw_sp_nexthop_group_cmp
,
2791 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp
*mlxsw_sp
,
2792 struct mlxsw_sp_nexthop_group
*nh_grp
)
2794 if (mlxsw_sp_nexthop_group_type(nh_grp
) == AF_INET6
&&
2798 return rhashtable_insert_fast(&mlxsw_sp
->router
->nexthop_group_ht
,
2800 mlxsw_sp_nexthop_group_ht_params
);
2803 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp
*mlxsw_sp
,
2804 struct mlxsw_sp_nexthop_group
*nh_grp
)
2806 if (mlxsw_sp_nexthop_group_type(nh_grp
) == AF_INET6
&&
2810 rhashtable_remove_fast(&mlxsw_sp
->router
->nexthop_group_ht
,
2812 mlxsw_sp_nexthop_group_ht_params
);
2815 static struct mlxsw_sp_nexthop_group
*
2816 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp
*mlxsw_sp
,
2817 struct fib_info
*fi
)
2819 struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg
;
2821 cmp_arg
.proto
= MLXSW_SP_L3_PROTO_IPV4
;
2823 return rhashtable_lookup_fast(&mlxsw_sp
->router
->nexthop_group_ht
,
2825 mlxsw_sp_nexthop_group_ht_params
);
2828 static struct mlxsw_sp_nexthop_group
*
2829 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp
*mlxsw_sp
,
2830 struct mlxsw_sp_fib6_entry
*fib6_entry
)
2832 struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg
;
2834 cmp_arg
.proto
= MLXSW_SP_L3_PROTO_IPV6
;
2835 cmp_arg
.fib6_entry
= fib6_entry
;
2836 return rhashtable_lookup_fast(&mlxsw_sp
->router
->nexthop_group_ht
,
2838 mlxsw_sp_nexthop_group_ht_params
);
2841 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params
= {
2842 .key_offset
= offsetof(struct mlxsw_sp_nexthop
, key
),
2843 .head_offset
= offsetof(struct mlxsw_sp_nexthop
, ht_node
),
2844 .key_len
= sizeof(struct mlxsw_sp_nexthop_key
),
2847 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp
*mlxsw_sp
,
2848 struct mlxsw_sp_nexthop
*nh
)
2850 return rhashtable_insert_fast(&mlxsw_sp
->router
->nexthop_ht
,
2851 &nh
->ht_node
, mlxsw_sp_nexthop_ht_params
);
2854 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp
*mlxsw_sp
,
2855 struct mlxsw_sp_nexthop
*nh
)
2857 rhashtable_remove_fast(&mlxsw_sp
->router
->nexthop_ht
, &nh
->ht_node
,
2858 mlxsw_sp_nexthop_ht_params
);
2861 static struct mlxsw_sp_nexthop
*
2862 mlxsw_sp_nexthop_lookup(struct mlxsw_sp
*mlxsw_sp
,
2863 struct mlxsw_sp_nexthop_key key
)
2865 return rhashtable_lookup_fast(&mlxsw_sp
->router
->nexthop_ht
, &key
,
2866 mlxsw_sp_nexthop_ht_params
);
2869 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp
*mlxsw_sp
,
2870 const struct mlxsw_sp_fib
*fib
,
2871 u32 adj_index
, u16 ecmp_size
,
2875 char raleu_pl
[MLXSW_REG_RALEU_LEN
];
2877 mlxsw_reg_raleu_pack(raleu_pl
,
2878 (enum mlxsw_reg_ralxx_protocol
) fib
->proto
,
2879 fib
->vr
->id
, adj_index
, ecmp_size
, new_adj_index
,
2881 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(raleu
), raleu_pl
);
2884 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp
*mlxsw_sp
,
2885 struct mlxsw_sp_nexthop_group
*nh_grp
,
2886 u32 old_adj_index
, u16 old_ecmp_size
)
2888 struct mlxsw_sp_fib_entry
*fib_entry
;
2889 struct mlxsw_sp_fib
*fib
= NULL
;
2892 list_for_each_entry(fib_entry
, &nh_grp
->fib_list
, nexthop_group_node
) {
2893 if (fib
== fib_entry
->fib_node
->fib
)
2895 fib
= fib_entry
->fib_node
->fib
;
2896 err
= mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp
, fib
,
2907 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp
*mlxsw_sp
, u32 adj_index
,
2908 struct mlxsw_sp_nexthop
*nh
)
2910 struct mlxsw_sp_neigh_entry
*neigh_entry
= nh
->neigh_entry
;
2911 char ratr_pl
[MLXSW_REG_RATR_LEN
];
2913 mlxsw_reg_ratr_pack(ratr_pl
, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY
,
2914 true, MLXSW_REG_RATR_TYPE_ETHERNET
,
2915 adj_index
, neigh_entry
->rif
);
2916 mlxsw_reg_ratr_eth_entry_pack(ratr_pl
, neigh_entry
->ha
);
2917 if (nh
->counter_valid
)
2918 mlxsw_reg_ratr_counter_pack(ratr_pl
, nh
->counter_index
, true);
2920 mlxsw_reg_ratr_counter_pack(ratr_pl
, 0, false);
2922 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(ratr
), ratr_pl
);
2925 int mlxsw_sp_nexthop_update(struct mlxsw_sp
*mlxsw_sp
, u32 adj_index
,
2926 struct mlxsw_sp_nexthop
*nh
)
2930 for (i
= 0; i
< nh
->num_adj_entries
; i
++) {
2933 err
= __mlxsw_sp_nexthop_update(mlxsw_sp
, adj_index
+ i
, nh
);
2941 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp
*mlxsw_sp
,
2943 struct mlxsw_sp_nexthop
*nh
)
2945 const struct mlxsw_sp_ipip_ops
*ipip_ops
;
2947 ipip_ops
= mlxsw_sp
->router
->ipip_ops_arr
[nh
->ipip_entry
->ipipt
];
2948 return ipip_ops
->nexthop_update(mlxsw_sp
, adj_index
, nh
->ipip_entry
);
2951 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp
*mlxsw_sp
,
2953 struct mlxsw_sp_nexthop
*nh
)
2957 for (i
= 0; i
< nh
->num_adj_entries
; i
++) {
2960 err
= __mlxsw_sp_nexthop_ipip_update(mlxsw_sp
, adj_index
+ i
,
2970 mlxsw_sp_nexthop_group_update(struct mlxsw_sp
*mlxsw_sp
,
2971 struct mlxsw_sp_nexthop_group
*nh_grp
,
2974 u32 adj_index
= nh_grp
->adj_index
; /* base */
2975 struct mlxsw_sp_nexthop
*nh
;
2979 for (i
= 0; i
< nh_grp
->count
; i
++) {
2980 nh
= &nh_grp
->nexthops
[i
];
2982 if (!nh
->should_offload
) {
2987 if (nh
->update
|| reallocate
) {
2989 case MLXSW_SP_NEXTHOP_TYPE_ETH
:
2990 err
= mlxsw_sp_nexthop_update
2991 (mlxsw_sp
, adj_index
, nh
);
2993 case MLXSW_SP_NEXTHOP_TYPE_IPIP
:
2994 err
= mlxsw_sp_nexthop_ipip_update
2995 (mlxsw_sp
, adj_index
, nh
);
3003 adj_index
+= nh
->num_adj_entries
;
3009 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node
*fib_node
,
3010 const struct mlxsw_sp_fib_entry
*fib_entry
);
3013 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp
*mlxsw_sp
,
3014 struct mlxsw_sp_nexthop_group
*nh_grp
)
3016 struct mlxsw_sp_fib_entry
*fib_entry
;
3019 list_for_each_entry(fib_entry
, &nh_grp
->fib_list
, nexthop_group_node
) {
3020 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry
->fib_node
,
3023 err
= mlxsw_sp_fib_entry_update(mlxsw_sp
, fib_entry
);
3031 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry
*fib_entry
,
3032 enum mlxsw_reg_ralue_op op
, int err
);
3035 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group
*nh_grp
)
3037 enum mlxsw_reg_ralue_op op
= MLXSW_REG_RALUE_OP_WRITE_WRITE
;
3038 struct mlxsw_sp_fib_entry
*fib_entry
;
3040 list_for_each_entry(fib_entry
, &nh_grp
->fib_list
, nexthop_group_node
) {
3041 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry
->fib_node
,
3044 mlxsw_sp_fib_entry_offload_refresh(fib_entry
, op
, 0);
3048 static void mlxsw_sp_adj_grp_size_round_up(u16
*p_adj_grp_size
)
3050 /* Valid sizes for an adjacency group are:
3051 * 1-64, 512, 1024, 2048 and 4096.
3053 if (*p_adj_grp_size
<= 64)
3055 else if (*p_adj_grp_size
<= 512)
3056 *p_adj_grp_size
= 512;
3057 else if (*p_adj_grp_size
<= 1024)
3058 *p_adj_grp_size
= 1024;
3059 else if (*p_adj_grp_size
<= 2048)
3060 *p_adj_grp_size
= 2048;
3062 *p_adj_grp_size
= 4096;
3065 static void mlxsw_sp_adj_grp_size_round_down(u16
*p_adj_grp_size
,
3066 unsigned int alloc_size
)
3068 if (alloc_size
>= 4096)
3069 *p_adj_grp_size
= 4096;
3070 else if (alloc_size
>= 2048)
3071 *p_adj_grp_size
= 2048;
3072 else if (alloc_size
>= 1024)
3073 *p_adj_grp_size
= 1024;
3074 else if (alloc_size
>= 512)
3075 *p_adj_grp_size
= 512;
3078 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp
*mlxsw_sp
,
3079 u16
*p_adj_grp_size
)
3081 unsigned int alloc_size
;
3084 /* Round up the requested group size to the next size supported
3085 * by the device and make sure the request can be satisfied.
3087 mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size
);
3088 err
= mlxsw_sp_kvdl_alloc_size_query(mlxsw_sp
, *p_adj_grp_size
,
3092 /* It is possible the allocation results in more allocated
3093 * entries than requested. Try to use as much of them as
3096 mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size
, alloc_size
);
3102 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group
*nh_grp
)
3104 int i
, g
= 0, sum_norm_weight
= 0;
3105 struct mlxsw_sp_nexthop
*nh
;
3107 for (i
= 0; i
< nh_grp
->count
; i
++) {
3108 nh
= &nh_grp
->nexthops
[i
];
3110 if (!nh
->should_offload
)
3113 g
= gcd(nh
->nh_weight
, g
);
3118 for (i
= 0; i
< nh_grp
->count
; i
++) {
3119 nh
= &nh_grp
->nexthops
[i
];
3121 if (!nh
->should_offload
)
3123 nh
->norm_nh_weight
= nh
->nh_weight
/ g
;
3124 sum_norm_weight
+= nh
->norm_nh_weight
;
3127 nh_grp
->sum_norm_weight
= sum_norm_weight
;
3131 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group
*nh_grp
)
3133 int total
= nh_grp
->sum_norm_weight
;
3134 u16 ecmp_size
= nh_grp
->ecmp_size
;
3135 int i
, weight
= 0, lower_bound
= 0;
3137 for (i
= 0; i
< nh_grp
->count
; i
++) {
3138 struct mlxsw_sp_nexthop
*nh
= &nh_grp
->nexthops
[i
];
3141 if (!nh
->should_offload
)
3143 weight
+= nh
->norm_nh_weight
;
3144 upper_bound
= DIV_ROUND_CLOSEST(ecmp_size
* weight
, total
);
3145 nh
->num_adj_entries
= upper_bound
- lower_bound
;
3146 lower_bound
= upper_bound
;
3151 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp
*mlxsw_sp
,
3152 struct mlxsw_sp_nexthop_group
*nh_grp
)
3154 u16 ecmp_size
, old_ecmp_size
;
3155 struct mlxsw_sp_nexthop
*nh
;
3156 bool offload_change
= false;
3158 bool old_adj_index_valid
;
3163 if (!nh_grp
->gateway
) {
3164 mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp
, nh_grp
);
3168 for (i
= 0; i
< nh_grp
->count
; i
++) {
3169 nh
= &nh_grp
->nexthops
[i
];
3171 if (nh
->should_offload
!= nh
->offloaded
) {
3172 offload_change
= true;
3173 if (nh
->should_offload
)
3177 if (!offload_change
) {
3178 /* Nothing was added or removed, so no need to reallocate. Just
3179 * update MAC on existing adjacency indexes.
3181 err
= mlxsw_sp_nexthop_group_update(mlxsw_sp
, nh_grp
, false);
3183 dev_warn(mlxsw_sp
->bus_info
->dev
, "Failed to update neigh MAC in adjacency table.\n");
3188 mlxsw_sp_nexthop_group_normalize(nh_grp
);
3189 if (!nh_grp
->sum_norm_weight
)
3190 /* No neigh of this group is connected so we just set
3191 * the trap and let everthing flow through kernel.
3195 ecmp_size
= nh_grp
->sum_norm_weight
;
3196 err
= mlxsw_sp_fix_adj_grp_size(mlxsw_sp
, &ecmp_size
);
3198 /* No valid allocation size available. */
3201 err
= mlxsw_sp_kvdl_alloc(mlxsw_sp
, ecmp_size
, &adj_index
);
3203 /* We ran out of KVD linear space, just set the
3204 * trap and let everything flow through kernel.
3206 dev_warn(mlxsw_sp
->bus_info
->dev
, "Failed to allocate KVD linear area for nexthop group.\n");
3209 old_adj_index_valid
= nh_grp
->adj_index_valid
;
3210 old_adj_index
= nh_grp
->adj_index
;
3211 old_ecmp_size
= nh_grp
->ecmp_size
;
3212 nh_grp
->adj_index_valid
= 1;
3213 nh_grp
->adj_index
= adj_index
;
3214 nh_grp
->ecmp_size
= ecmp_size
;
3215 mlxsw_sp_nexthop_group_rebalance(nh_grp
);
3216 err
= mlxsw_sp_nexthop_group_update(mlxsw_sp
, nh_grp
, true);
3218 dev_warn(mlxsw_sp
->bus_info
->dev
, "Failed to update neigh MAC in adjacency table.\n");
3222 if (!old_adj_index_valid
) {
3223 /* The trap was set for fib entries, so we have to call
3224 * fib entry update to unset it and use adjacency index.
3226 err
= mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp
, nh_grp
);
3228 dev_warn(mlxsw_sp
->bus_info
->dev
, "Failed to add adjacency index to fib entries.\n");
3234 err
= mlxsw_sp_adj_index_mass_update(mlxsw_sp
, nh_grp
,
3235 old_adj_index
, old_ecmp_size
);
3236 mlxsw_sp_kvdl_free(mlxsw_sp
, old_adj_index
);
3238 dev_warn(mlxsw_sp
->bus_info
->dev
, "Failed to mass-update adjacency index for nexthop group.\n");
3242 /* Offload state within the group changed, so update the flags. */
3243 mlxsw_sp_nexthop_fib_entries_refresh(nh_grp
);
3248 old_adj_index_valid
= nh_grp
->adj_index_valid
;
3249 nh_grp
->adj_index_valid
= 0;
3250 for (i
= 0; i
< nh_grp
->count
; i
++) {
3251 nh
= &nh_grp
->nexthops
[i
];
3254 err
= mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp
, nh_grp
);
3256 dev_warn(mlxsw_sp
->bus_info
->dev
, "Failed to set traps for fib entries.\n");
3257 if (old_adj_index_valid
)
3258 mlxsw_sp_kvdl_free(mlxsw_sp
, nh_grp
->adj_index
);
3261 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop
*nh
,
3265 nh
->should_offload
= 1;
3267 nh
->should_offload
= 0;
3272 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp
*mlxsw_sp
,
3273 struct mlxsw_sp_neigh_entry
*neigh_entry
,
3276 struct mlxsw_sp_nexthop
*nh
;
3278 list_for_each_entry(nh
, &neigh_entry
->nexthop_list
,
3280 __mlxsw_sp_nexthop_neigh_update(nh
, removing
);
3281 mlxsw_sp_nexthop_group_refresh(mlxsw_sp
, nh
->nh_grp
);
3285 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop
*nh
,
3286 struct mlxsw_sp_rif
*rif
)
3292 list_add(&nh
->rif_list_node
, &rif
->nexthop_list
);
3295 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop
*nh
)
3300 list_del(&nh
->rif_list_node
);
3304 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp
*mlxsw_sp
,
3305 struct mlxsw_sp_nexthop
*nh
)
3307 struct mlxsw_sp_neigh_entry
*neigh_entry
;
3308 struct neighbour
*n
;
3312 if (!nh
->nh_grp
->gateway
|| nh
->neigh_entry
)
3315 /* Take a reference of neigh here ensuring that neigh would
3316 * not be destructed before the nexthop entry is finished.
3317 * The reference is taken either in neigh_lookup() or
3318 * in neigh_create() in case n is not found.
3320 n
= neigh_lookup(nh
->nh_grp
->neigh_tbl
, &nh
->gw_addr
, nh
->rif
->dev
);
3322 n
= neigh_create(nh
->nh_grp
->neigh_tbl
, &nh
->gw_addr
,
3326 neigh_event_send(n
, NULL
);
3328 neigh_entry
= mlxsw_sp_neigh_entry_lookup(mlxsw_sp
, n
);
3330 neigh_entry
= mlxsw_sp_neigh_entry_create(mlxsw_sp
, n
);
3331 if (IS_ERR(neigh_entry
)) {
3333 goto err_neigh_entry_create
;
3337 /* If that is the first nexthop connected to that neigh, add to
3338 * nexthop_neighs_list
3340 if (list_empty(&neigh_entry
->nexthop_list
))
3341 list_add_tail(&neigh_entry
->nexthop_neighs_list_node
,
3342 &mlxsw_sp
->router
->nexthop_neighs_list
);
3344 nh
->neigh_entry
= neigh_entry
;
3345 list_add_tail(&nh
->neigh_list_node
, &neigh_entry
->nexthop_list
);
3346 read_lock_bh(&n
->lock
);
3347 nud_state
= n
->nud_state
;
3349 read_unlock_bh(&n
->lock
);
3350 __mlxsw_sp_nexthop_neigh_update(nh
, !(nud_state
& NUD_VALID
&& !dead
));
3354 err_neigh_entry_create
:
3359 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp
*mlxsw_sp
,
3360 struct mlxsw_sp_nexthop
*nh
)
3362 struct mlxsw_sp_neigh_entry
*neigh_entry
= nh
->neigh_entry
;
3363 struct neighbour
*n
;
3367 n
= neigh_entry
->key
.n
;
3369 __mlxsw_sp_nexthop_neigh_update(nh
, true);
3370 list_del(&nh
->neigh_list_node
);
3371 nh
->neigh_entry
= NULL
;
3373 /* If that is the last nexthop connected to that neigh, remove from
3374 * nexthop_neighs_list
3376 if (list_empty(&neigh_entry
->nexthop_list
))
3377 list_del(&neigh_entry
->nexthop_neighs_list_node
);
3379 if (!neigh_entry
->connected
&& list_empty(&neigh_entry
->nexthop_list
))
3380 mlxsw_sp_neigh_entry_destroy(mlxsw_sp
, neigh_entry
);
3385 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device
*ol_dev
)
3387 struct net_device
*ul_dev
= __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev
);
3389 return ul_dev
? (ul_dev
->flags
& IFF_UP
) : true;
3392 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp
*mlxsw_sp
,
3393 struct mlxsw_sp_nexthop
*nh
,
3394 struct mlxsw_sp_ipip_entry
*ipip_entry
)
3398 if (!nh
->nh_grp
->gateway
|| nh
->ipip_entry
)
3401 nh
->ipip_entry
= ipip_entry
;
3402 removing
= !mlxsw_sp_ipip_netdev_ul_up(ipip_entry
->ol_dev
);
3403 __mlxsw_sp_nexthop_neigh_update(nh
, removing
);
3404 mlxsw_sp_nexthop_rif_init(nh
, &ipip_entry
->ol_lb
->common
);
3407 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp
*mlxsw_sp
,
3408 struct mlxsw_sp_nexthop
*nh
)
3410 struct mlxsw_sp_ipip_entry
*ipip_entry
= nh
->ipip_entry
;
3415 __mlxsw_sp_nexthop_neigh_update(nh
, true);
3416 nh
->ipip_entry
= NULL
;
3419 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp
*mlxsw_sp
,
3420 const struct fib_nh
*fib_nh
,
3421 enum mlxsw_sp_ipip_type
*p_ipipt
)
3423 struct net_device
*dev
= fib_nh
->nh_dev
;
3426 fib_nh
->nh_parent
->fib_type
== RTN_UNICAST
&&
3427 mlxsw_sp_netdev_ipip_type(mlxsw_sp
, dev
, p_ipipt
);
3430 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp
*mlxsw_sp
,
3431 struct mlxsw_sp_nexthop
*nh
)
3434 case MLXSW_SP_NEXTHOP_TYPE_ETH
:
3435 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp
, nh
);
3436 mlxsw_sp_nexthop_rif_fini(nh
);
3438 case MLXSW_SP_NEXTHOP_TYPE_IPIP
:
3439 mlxsw_sp_nexthop_rif_fini(nh
);
3440 mlxsw_sp_nexthop_ipip_fini(mlxsw_sp
, nh
);
3445 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp
*mlxsw_sp
,
3446 struct mlxsw_sp_nexthop
*nh
,
3447 struct fib_nh
*fib_nh
)
3449 const struct mlxsw_sp_ipip_ops
*ipip_ops
;
3450 struct net_device
*dev
= fib_nh
->nh_dev
;
3451 struct mlxsw_sp_ipip_entry
*ipip_entry
;
3452 struct mlxsw_sp_rif
*rif
;
3455 ipip_entry
= mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp
, dev
);
3457 ipip_ops
= mlxsw_sp
->router
->ipip_ops_arr
[ipip_entry
->ipipt
];
3458 if (ipip_ops
->can_offload(mlxsw_sp
, dev
,
3459 MLXSW_SP_L3_PROTO_IPV4
)) {
3460 nh
->type
= MLXSW_SP_NEXTHOP_TYPE_IPIP
;
3461 mlxsw_sp_nexthop_ipip_init(mlxsw_sp
, nh
, ipip_entry
);
3466 nh
->type
= MLXSW_SP_NEXTHOP_TYPE_ETH
;
3467 rif
= mlxsw_sp_rif_find_by_dev(mlxsw_sp
, dev
);
3471 mlxsw_sp_nexthop_rif_init(nh
, rif
);
3472 err
= mlxsw_sp_nexthop_neigh_init(mlxsw_sp
, nh
);
3474 goto err_neigh_init
;
3479 mlxsw_sp_nexthop_rif_fini(nh
);
3483 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp
*mlxsw_sp
,
3484 struct mlxsw_sp_nexthop
*nh
)
3486 mlxsw_sp_nexthop_type_fini(mlxsw_sp
, nh
);
3489 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp
*mlxsw_sp
,
3490 struct mlxsw_sp_nexthop_group
*nh_grp
,
3491 struct mlxsw_sp_nexthop
*nh
,
3492 struct fib_nh
*fib_nh
)
3494 struct net_device
*dev
= fib_nh
->nh_dev
;
3495 struct in_device
*in_dev
;
3498 nh
->nh_grp
= nh_grp
;
3499 nh
->key
.fib_nh
= fib_nh
;
3500 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3501 nh
->nh_weight
= fib_nh
->nh_weight
;
3505 memcpy(&nh
->gw_addr
, &fib_nh
->nh_gw
, sizeof(fib_nh
->nh_gw
));
3506 err
= mlxsw_sp_nexthop_insert(mlxsw_sp
, nh
);
3510 mlxsw_sp_nexthop_counter_alloc(mlxsw_sp
, nh
);
3511 list_add_tail(&nh
->router_list_node
, &mlxsw_sp
->router
->nexthop_list
);
3516 in_dev
= __in_dev_get_rtnl(dev
);
3517 if (in_dev
&& IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev
) &&
3518 fib_nh
->nh_flags
& RTNH_F_LINKDOWN
)
3521 err
= mlxsw_sp_nexthop4_type_init(mlxsw_sp
, nh
, fib_nh
);
3523 goto err_nexthop_neigh_init
;
3527 err_nexthop_neigh_init
:
3528 mlxsw_sp_nexthop_remove(mlxsw_sp
, nh
);
3532 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp
*mlxsw_sp
,
3533 struct mlxsw_sp_nexthop
*nh
)
3535 mlxsw_sp_nexthop4_type_fini(mlxsw_sp
, nh
);
3536 list_del(&nh
->router_list_node
);
3537 mlxsw_sp_nexthop_counter_free(mlxsw_sp
, nh
);
3538 mlxsw_sp_nexthop_remove(mlxsw_sp
, nh
);
3541 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp
*mlxsw_sp
,
3542 unsigned long event
, struct fib_nh
*fib_nh
)
3544 struct mlxsw_sp_nexthop_key key
;
3545 struct mlxsw_sp_nexthop
*nh
;
3547 if (mlxsw_sp
->router
->aborted
)
3550 key
.fib_nh
= fib_nh
;
3551 nh
= mlxsw_sp_nexthop_lookup(mlxsw_sp
, key
);
3552 if (WARN_ON_ONCE(!nh
))
3556 case FIB_EVENT_NH_ADD
:
3557 mlxsw_sp_nexthop4_type_init(mlxsw_sp
, nh
, fib_nh
);
3559 case FIB_EVENT_NH_DEL
:
3560 mlxsw_sp_nexthop4_type_fini(mlxsw_sp
, nh
);
3564 mlxsw_sp_nexthop_group_refresh(mlxsw_sp
, nh
->nh_grp
);
3567 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp
*mlxsw_sp
,
3568 struct mlxsw_sp_rif
*rif
)
3570 struct mlxsw_sp_nexthop
*nh
;
3573 list_for_each_entry(nh
, &rif
->nexthop_list
, rif_list_node
) {
3575 case MLXSW_SP_NEXTHOP_TYPE_ETH
:
3578 case MLXSW_SP_NEXTHOP_TYPE_IPIP
:
3579 removing
= !mlxsw_sp_ipip_netdev_ul_up(rif
->dev
);
3586 __mlxsw_sp_nexthop_neigh_update(nh
, removing
);
3587 mlxsw_sp_nexthop_group_refresh(mlxsw_sp
, nh
->nh_grp
);
3591 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp
*mlxsw_sp
,
3592 struct mlxsw_sp_rif
*old_rif
,
3593 struct mlxsw_sp_rif
*new_rif
)
3595 struct mlxsw_sp_nexthop
*nh
;
3597 list_splice_init(&old_rif
->nexthop_list
, &new_rif
->nexthop_list
);
3598 list_for_each_entry(nh
, &new_rif
->nexthop_list
, rif_list_node
)
3600 mlxsw_sp_nexthop_rif_update(mlxsw_sp
, new_rif
);
3603 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp
*mlxsw_sp
,
3604 struct mlxsw_sp_rif
*rif
)
3606 struct mlxsw_sp_nexthop
*nh
, *tmp
;
3608 list_for_each_entry_safe(nh
, tmp
, &rif
->nexthop_list
, rif_list_node
) {
3609 mlxsw_sp_nexthop_type_fini(mlxsw_sp
, nh
);
3610 mlxsw_sp_nexthop_group_refresh(mlxsw_sp
, nh
->nh_grp
);
3614 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp
*mlxsw_sp
,
3615 const struct fib_info
*fi
)
3617 return fi
->fib_nh
->nh_scope
== RT_SCOPE_LINK
||
3618 mlxsw_sp_nexthop4_ipip_type(mlxsw_sp
, fi
->fib_nh
, NULL
);
3621 static struct mlxsw_sp_nexthop_group
*
3622 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp
*mlxsw_sp
, struct fib_info
*fi
)
3624 struct mlxsw_sp_nexthop_group
*nh_grp
;
3625 struct mlxsw_sp_nexthop
*nh
;
3626 struct fib_nh
*fib_nh
;
3631 alloc_size
= sizeof(*nh_grp
) +
3632 fi
->fib_nhs
* sizeof(struct mlxsw_sp_nexthop
);
3633 nh_grp
= kzalloc(alloc_size
, GFP_KERNEL
);
3635 return ERR_PTR(-ENOMEM
);
3637 INIT_LIST_HEAD(&nh_grp
->fib_list
);
3638 nh_grp
->neigh_tbl
= &arp_tbl
;
3640 nh_grp
->gateway
= mlxsw_sp_fi_is_gateway(mlxsw_sp
, fi
);
3641 nh_grp
->count
= fi
->fib_nhs
;
3643 for (i
= 0; i
< nh_grp
->count
; i
++) {
3644 nh
= &nh_grp
->nexthops
[i
];
3645 fib_nh
= &fi
->fib_nh
[i
];
3646 err
= mlxsw_sp_nexthop4_init(mlxsw_sp
, nh_grp
, nh
, fib_nh
);
3648 goto err_nexthop4_init
;
3650 err
= mlxsw_sp_nexthop_group_insert(mlxsw_sp
, nh_grp
);
3652 goto err_nexthop_group_insert
;
3653 mlxsw_sp_nexthop_group_refresh(mlxsw_sp
, nh_grp
);
3656 err_nexthop_group_insert
:
3658 for (i
--; i
>= 0; i
--) {
3659 nh
= &nh_grp
->nexthops
[i
];
3660 mlxsw_sp_nexthop4_fini(mlxsw_sp
, nh
);
3664 return ERR_PTR(err
);
3668 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp
*mlxsw_sp
,
3669 struct mlxsw_sp_nexthop_group
*nh_grp
)
3671 struct mlxsw_sp_nexthop
*nh
;
3674 mlxsw_sp_nexthop_group_remove(mlxsw_sp
, nh_grp
);
3675 for (i
= 0; i
< nh_grp
->count
; i
++) {
3676 nh
= &nh_grp
->nexthops
[i
];
3677 mlxsw_sp_nexthop4_fini(mlxsw_sp
, nh
);
3679 mlxsw_sp_nexthop_group_refresh(mlxsw_sp
, nh_grp
);
3680 WARN_ON_ONCE(nh_grp
->adj_index_valid
);
3681 fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp
));
3685 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp
*mlxsw_sp
,
3686 struct mlxsw_sp_fib_entry
*fib_entry
,
3687 struct fib_info
*fi
)
3689 struct mlxsw_sp_nexthop_group
*nh_grp
;
3691 nh_grp
= mlxsw_sp_nexthop4_group_lookup(mlxsw_sp
, fi
);
3693 nh_grp
= mlxsw_sp_nexthop4_group_create(mlxsw_sp
, fi
);
3695 return PTR_ERR(nh_grp
);
3697 list_add_tail(&fib_entry
->nexthop_group_node
, &nh_grp
->fib_list
);
3698 fib_entry
->nh_group
= nh_grp
;
3702 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp
*mlxsw_sp
,
3703 struct mlxsw_sp_fib_entry
*fib_entry
)
3705 struct mlxsw_sp_nexthop_group
*nh_grp
= fib_entry
->nh_group
;
3707 list_del(&fib_entry
->nexthop_group_node
);
3708 if (!list_empty(&nh_grp
->fib_list
))
3710 mlxsw_sp_nexthop4_group_destroy(mlxsw_sp
, nh_grp
);
3714 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry
*fib_entry
)
3716 struct mlxsw_sp_fib4_entry
*fib4_entry
;
3718 fib4_entry
= container_of(fib_entry
, struct mlxsw_sp_fib4_entry
,
3720 return !fib4_entry
->tos
;
3724 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry
*fib_entry
)
3726 struct mlxsw_sp_nexthop_group
*nh_group
= fib_entry
->nh_group
;
3728 switch (fib_entry
->fib_node
->fib
->proto
) {
3729 case MLXSW_SP_L3_PROTO_IPV4
:
3730 if (!mlxsw_sp_fib4_entry_should_offload(fib_entry
))
3733 case MLXSW_SP_L3_PROTO_IPV6
:
3737 switch (fib_entry
->type
) {
3738 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE
:
3739 return !!nh_group
->adj_index_valid
;
3740 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL
:
3741 return !!nh_group
->nh_rif
;
3742 case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP
:
3749 static struct mlxsw_sp_nexthop
*
3750 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group
*nh_grp
,
3751 const struct mlxsw_sp_rt6
*mlxsw_sp_rt6
)
3755 for (i
= 0; i
< nh_grp
->count
; i
++) {
3756 struct mlxsw_sp_nexthop
*nh
= &nh_grp
->nexthops
[i
];
3757 struct rt6_info
*rt
= mlxsw_sp_rt6
->rt
;
3759 if (nh
->rif
&& nh
->rif
->dev
== rt
->dst
.dev
&&
3760 ipv6_addr_equal((const struct in6_addr
*) &nh
->gw_addr
,
3770 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry
*fib_entry
)
3772 struct mlxsw_sp_nexthop_group
*nh_grp
= fib_entry
->nh_group
;
3775 if (fib_entry
->type
== MLXSW_SP_FIB_ENTRY_TYPE_LOCAL
||
3776 fib_entry
->type
== MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP
) {
3777 nh_grp
->nexthops
->key
.fib_nh
->nh_flags
|= RTNH_F_OFFLOAD
;
3781 for (i
= 0; i
< nh_grp
->count
; i
++) {
3782 struct mlxsw_sp_nexthop
*nh
= &nh_grp
->nexthops
[i
];
3785 nh
->key
.fib_nh
->nh_flags
|= RTNH_F_OFFLOAD
;
3787 nh
->key
.fib_nh
->nh_flags
&= ~RTNH_F_OFFLOAD
;
3792 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry
*fib_entry
)
3794 struct mlxsw_sp_nexthop_group
*nh_grp
= fib_entry
->nh_group
;
3797 if (!list_is_singular(&nh_grp
->fib_list
))
3800 for (i
= 0; i
< nh_grp
->count
; i
++) {
3801 struct mlxsw_sp_nexthop
*nh
= &nh_grp
->nexthops
[i
];
3803 nh
->key
.fib_nh
->nh_flags
&= ~RTNH_F_OFFLOAD
;
3808 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry
*fib_entry
)
3810 struct mlxsw_sp_fib6_entry
*fib6_entry
;
3811 struct mlxsw_sp_rt6
*mlxsw_sp_rt6
;
3813 fib6_entry
= container_of(fib_entry
, struct mlxsw_sp_fib6_entry
,
3816 if (fib_entry
->type
== MLXSW_SP_FIB_ENTRY_TYPE_LOCAL
) {
3817 list_first_entry(&fib6_entry
->rt6_list
, struct mlxsw_sp_rt6
,
3818 list
)->rt
->rt6i_nh_flags
|= RTNH_F_OFFLOAD
;
3822 list_for_each_entry(mlxsw_sp_rt6
, &fib6_entry
->rt6_list
, list
) {
3823 struct mlxsw_sp_nexthop_group
*nh_grp
= fib_entry
->nh_group
;
3824 struct mlxsw_sp_nexthop
*nh
;
3826 nh
= mlxsw_sp_rt6_nexthop(nh_grp
, mlxsw_sp_rt6
);
3827 if (nh
&& nh
->offloaded
)
3828 mlxsw_sp_rt6
->rt
->rt6i_nh_flags
|= RTNH_F_OFFLOAD
;
3830 mlxsw_sp_rt6
->rt
->rt6i_nh_flags
&= ~RTNH_F_OFFLOAD
;
3835 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry
*fib_entry
)
3837 struct mlxsw_sp_fib6_entry
*fib6_entry
;
3838 struct mlxsw_sp_rt6
*mlxsw_sp_rt6
;
3840 fib6_entry
= container_of(fib_entry
, struct mlxsw_sp_fib6_entry
,
3842 list_for_each_entry(mlxsw_sp_rt6
, &fib6_entry
->rt6_list
, list
) {
3843 struct rt6_info
*rt
= mlxsw_sp_rt6
->rt
;
3845 rt
->rt6i_nh_flags
&= ~RTNH_F_OFFLOAD
;
3849 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry
*fib_entry
)
3851 switch (fib_entry
->fib_node
->fib
->proto
) {
3852 case MLXSW_SP_L3_PROTO_IPV4
:
3853 mlxsw_sp_fib4_entry_offload_set(fib_entry
);
3855 case MLXSW_SP_L3_PROTO_IPV6
:
3856 mlxsw_sp_fib6_entry_offload_set(fib_entry
);
3862 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry
*fib_entry
)
3864 switch (fib_entry
->fib_node
->fib
->proto
) {
3865 case MLXSW_SP_L3_PROTO_IPV4
:
3866 mlxsw_sp_fib4_entry_offload_unset(fib_entry
);
3868 case MLXSW_SP_L3_PROTO_IPV6
:
3869 mlxsw_sp_fib6_entry_offload_unset(fib_entry
);
3875 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry
*fib_entry
,
3876 enum mlxsw_reg_ralue_op op
, int err
)
3879 case MLXSW_REG_RALUE_OP_WRITE_DELETE
:
3880 return mlxsw_sp_fib_entry_offload_unset(fib_entry
);
3881 case MLXSW_REG_RALUE_OP_WRITE_WRITE
:
3884 if (mlxsw_sp_fib_entry_should_offload(fib_entry
))
3885 mlxsw_sp_fib_entry_offload_set(fib_entry
);
3887 mlxsw_sp_fib_entry_offload_unset(fib_entry
);
3895 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl
,
3896 const struct mlxsw_sp_fib_entry
*fib_entry
,
3897 enum mlxsw_reg_ralue_op op
)
3899 struct mlxsw_sp_fib
*fib
= fib_entry
->fib_node
->fib
;
3900 enum mlxsw_reg_ralxx_protocol proto
;
3903 proto
= (enum mlxsw_reg_ralxx_protocol
) fib
->proto
;
3905 switch (fib
->proto
) {
3906 case MLXSW_SP_L3_PROTO_IPV4
:
3907 p_dip
= (u32
*) fib_entry
->fib_node
->key
.addr
;
3908 mlxsw_reg_ralue_pack4(ralue_pl
, proto
, op
, fib
->vr
->id
,
3909 fib_entry
->fib_node
->key
.prefix_len
,
3912 case MLXSW_SP_L3_PROTO_IPV6
:
3913 mlxsw_reg_ralue_pack6(ralue_pl
, proto
, op
, fib
->vr
->id
,
3914 fib_entry
->fib_node
->key
.prefix_len
,
3915 fib_entry
->fib_node
->key
.addr
);
3920 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp
*mlxsw_sp
,
3921 struct mlxsw_sp_fib_entry
*fib_entry
,
3922 enum mlxsw_reg_ralue_op op
)
3924 char ralue_pl
[MLXSW_REG_RALUE_LEN
];
3925 enum mlxsw_reg_ralue_trap_action trap_action
;
3927 u32 adjacency_index
= 0;
3930 /* In case the nexthop group adjacency index is valid, use it
3931 * with provided ECMP size. Otherwise, setup trap and pass
3932 * traffic to kernel.
3934 if (mlxsw_sp_fib_entry_should_offload(fib_entry
)) {
3935 trap_action
= MLXSW_REG_RALUE_TRAP_ACTION_NOP
;
3936 adjacency_index
= fib_entry
->nh_group
->adj_index
;
3937 ecmp_size
= fib_entry
->nh_group
->ecmp_size
;
3939 trap_action
= MLXSW_REG_RALUE_TRAP_ACTION_TRAP
;
3940 trap_id
= MLXSW_TRAP_ID_RTR_INGRESS0
;
3943 mlxsw_sp_fib_entry_ralue_pack(ralue_pl
, fib_entry
, op
);
3944 mlxsw_reg_ralue_act_remote_pack(ralue_pl
, trap_action
, trap_id
,
3945 adjacency_index
, ecmp_size
);
3946 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(ralue
), ralue_pl
);
3949 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp
*mlxsw_sp
,
3950 struct mlxsw_sp_fib_entry
*fib_entry
,
3951 enum mlxsw_reg_ralue_op op
)
3953 struct mlxsw_sp_rif
*rif
= fib_entry
->nh_group
->nh_rif
;
3954 enum mlxsw_reg_ralue_trap_action trap_action
;
3955 char ralue_pl
[MLXSW_REG_RALUE_LEN
];
3959 if (mlxsw_sp_fib_entry_should_offload(fib_entry
)) {
3960 trap_action
= MLXSW_REG_RALUE_TRAP_ACTION_NOP
;
3961 rif_index
= rif
->rif_index
;
3963 trap_action
= MLXSW_REG_RALUE_TRAP_ACTION_TRAP
;
3964 trap_id
= MLXSW_TRAP_ID_RTR_INGRESS0
;
3967 mlxsw_sp_fib_entry_ralue_pack(ralue_pl
, fib_entry
, op
);
3968 mlxsw_reg_ralue_act_local_pack(ralue_pl
, trap_action
, trap_id
,
3970 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(ralue
), ralue_pl
);
3973 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp
*mlxsw_sp
,
3974 struct mlxsw_sp_fib_entry
*fib_entry
,
3975 enum mlxsw_reg_ralue_op op
)
3977 char ralue_pl
[MLXSW_REG_RALUE_LEN
];
3979 mlxsw_sp_fib_entry_ralue_pack(ralue_pl
, fib_entry
, op
);
3980 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl
);
3981 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(ralue
), ralue_pl
);
3985 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp
*mlxsw_sp
,
3986 struct mlxsw_sp_fib_entry
*fib_entry
,
3987 enum mlxsw_reg_ralue_op op
)
3989 struct mlxsw_sp_ipip_entry
*ipip_entry
= fib_entry
->decap
.ipip_entry
;
3990 const struct mlxsw_sp_ipip_ops
*ipip_ops
;
3992 if (WARN_ON(!ipip_entry
))
3995 ipip_ops
= mlxsw_sp
->router
->ipip_ops_arr
[ipip_entry
->ipipt
];
3996 return ipip_ops
->fib_entry_op(mlxsw_sp
, ipip_entry
, op
,
3997 fib_entry
->decap
.tunnel_index
);
4000 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp
*mlxsw_sp
,
4001 struct mlxsw_sp_fib_entry
*fib_entry
,
4002 enum mlxsw_reg_ralue_op op
)
4004 switch (fib_entry
->type
) {
4005 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE
:
4006 return mlxsw_sp_fib_entry_op_remote(mlxsw_sp
, fib_entry
, op
);
4007 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL
:
4008 return mlxsw_sp_fib_entry_op_local(mlxsw_sp
, fib_entry
, op
);
4009 case MLXSW_SP_FIB_ENTRY_TYPE_TRAP
:
4010 return mlxsw_sp_fib_entry_op_trap(mlxsw_sp
, fib_entry
, op
);
4011 case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP
:
4012 return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp
,
4018 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp
*mlxsw_sp
,
4019 struct mlxsw_sp_fib_entry
*fib_entry
,
4020 enum mlxsw_reg_ralue_op op
)
4022 int err
= __mlxsw_sp_fib_entry_op(mlxsw_sp
, fib_entry
, op
);
4024 mlxsw_sp_fib_entry_offload_refresh(fib_entry
, op
, err
);
4029 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp
*mlxsw_sp
,
4030 struct mlxsw_sp_fib_entry
*fib_entry
)
4032 return mlxsw_sp_fib_entry_op(mlxsw_sp
, fib_entry
,
4033 MLXSW_REG_RALUE_OP_WRITE_WRITE
);
4036 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp
*mlxsw_sp
,
4037 struct mlxsw_sp_fib_entry
*fib_entry
)
4039 return mlxsw_sp_fib_entry_op(mlxsw_sp
, fib_entry
,
4040 MLXSW_REG_RALUE_OP_WRITE_DELETE
);
4044 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp
*mlxsw_sp
,
4045 const struct fib_entry_notifier_info
*fen_info
,
4046 struct mlxsw_sp_fib_entry
*fib_entry
)
4048 union mlxsw_sp_l3addr dip
= { .addr4
= htonl(fen_info
->dst
) };
4049 struct net_device
*dev
= fen_info
->fi
->fib_dev
;
4050 struct mlxsw_sp_ipip_entry
*ipip_entry
;
4051 struct fib_info
*fi
= fen_info
->fi
;
4053 switch (fen_info
->type
) {
4055 ipip_entry
= mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp
, dev
,
4056 MLXSW_SP_L3_PROTO_IPV4
, dip
);
4057 if (ipip_entry
&& ipip_entry
->ol_dev
->flags
& IFF_UP
) {
4058 fib_entry
->type
= MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP
;
4059 return mlxsw_sp_fib_entry_decap_init(mlxsw_sp
,
4065 fib_entry
->type
= MLXSW_SP_FIB_ENTRY_TYPE_TRAP
;
4067 case RTN_UNREACHABLE
: /* fall through */
4068 case RTN_BLACKHOLE
: /* fall through */
4070 /* Packets hitting these routes need to be trapped, but
4071 * can do so with a lower priority than packets directed
4072 * at the host, so use action type local instead of trap.
4074 fib_entry
->type
= MLXSW_SP_FIB_ENTRY_TYPE_LOCAL
;
4077 if (mlxsw_sp_fi_is_gateway(mlxsw_sp
, fi
))
4078 fib_entry
->type
= MLXSW_SP_FIB_ENTRY_TYPE_REMOTE
;
4080 fib_entry
->type
= MLXSW_SP_FIB_ENTRY_TYPE_LOCAL
;
4087 static struct mlxsw_sp_fib4_entry
*
4088 mlxsw_sp_fib4_entry_create(struct mlxsw_sp
*mlxsw_sp
,
4089 struct mlxsw_sp_fib_node
*fib_node
,
4090 const struct fib_entry_notifier_info
*fen_info
)
4092 struct mlxsw_sp_fib4_entry
*fib4_entry
;
4093 struct mlxsw_sp_fib_entry
*fib_entry
;
4096 fib4_entry
= kzalloc(sizeof(*fib4_entry
), GFP_KERNEL
);
4098 return ERR_PTR(-ENOMEM
);
4099 fib_entry
= &fib4_entry
->common
;
4101 err
= mlxsw_sp_fib4_entry_type_set(mlxsw_sp
, fen_info
, fib_entry
);
4103 goto err_fib4_entry_type_set
;
4105 err
= mlxsw_sp_nexthop4_group_get(mlxsw_sp
, fib_entry
, fen_info
->fi
);
4107 goto err_nexthop4_group_get
;
4109 fib4_entry
->prio
= fen_info
->fi
->fib_priority
;
4110 fib4_entry
->tb_id
= fen_info
->tb_id
;
4111 fib4_entry
->type
= fen_info
->type
;
4112 fib4_entry
->tos
= fen_info
->tos
;
4114 fib_entry
->fib_node
= fib_node
;
4118 err_nexthop4_group_get
:
4119 err_fib4_entry_type_set
:
4121 return ERR_PTR(err
);
4124 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp
*mlxsw_sp
,
4125 struct mlxsw_sp_fib4_entry
*fib4_entry
)
4127 mlxsw_sp_nexthop4_group_put(mlxsw_sp
, &fib4_entry
->common
);
4131 static struct mlxsw_sp_fib4_entry
*
4132 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp
*mlxsw_sp
,
4133 const struct fib_entry_notifier_info
*fen_info
)
4135 struct mlxsw_sp_fib4_entry
*fib4_entry
;
4136 struct mlxsw_sp_fib_node
*fib_node
;
4137 struct mlxsw_sp_fib
*fib
;
4138 struct mlxsw_sp_vr
*vr
;
4140 vr
= mlxsw_sp_vr_find(mlxsw_sp
, fen_info
->tb_id
);
4143 fib
= mlxsw_sp_vr_fib(vr
, MLXSW_SP_L3_PROTO_IPV4
);
4145 fib_node
= mlxsw_sp_fib_node_lookup(fib
, &fen_info
->dst
,
4146 sizeof(fen_info
->dst
),
4151 list_for_each_entry(fib4_entry
, &fib_node
->entry_list
, common
.list
) {
4152 if (fib4_entry
->tb_id
== fen_info
->tb_id
&&
4153 fib4_entry
->tos
== fen_info
->tos
&&
4154 fib4_entry
->type
== fen_info
->type
&&
4155 mlxsw_sp_nexthop4_group_fi(fib4_entry
->common
.nh_group
) ==
4164 static const struct rhashtable_params mlxsw_sp_fib_ht_params
= {
4165 .key_offset
= offsetof(struct mlxsw_sp_fib_node
, key
),
4166 .head_offset
= offsetof(struct mlxsw_sp_fib_node
, ht_node
),
4167 .key_len
= sizeof(struct mlxsw_sp_fib_key
),
4168 .automatic_shrinking
= true,
4171 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib
*fib
,
4172 struct mlxsw_sp_fib_node
*fib_node
)
4174 return rhashtable_insert_fast(&fib
->ht
, &fib_node
->ht_node
,
4175 mlxsw_sp_fib_ht_params
);
4178 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib
*fib
,
4179 struct mlxsw_sp_fib_node
*fib_node
)
4181 rhashtable_remove_fast(&fib
->ht
, &fib_node
->ht_node
,
4182 mlxsw_sp_fib_ht_params
);
4185 static struct mlxsw_sp_fib_node
*
4186 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib
*fib
, const void *addr
,
4187 size_t addr_len
, unsigned char prefix_len
)
4189 struct mlxsw_sp_fib_key key
;
4191 memset(&key
, 0, sizeof(key
));
4192 memcpy(key
.addr
, addr
, addr_len
);
4193 key
.prefix_len
= prefix_len
;
4194 return rhashtable_lookup_fast(&fib
->ht
, &key
, mlxsw_sp_fib_ht_params
);
4197 static struct mlxsw_sp_fib_node
*
4198 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib
*fib
, const void *addr
,
4199 size_t addr_len
, unsigned char prefix_len
)
4201 struct mlxsw_sp_fib_node
*fib_node
;
4203 fib_node
= kzalloc(sizeof(*fib_node
), GFP_KERNEL
);
4207 INIT_LIST_HEAD(&fib_node
->entry_list
);
4208 list_add(&fib_node
->list
, &fib
->node_list
);
4209 memcpy(fib_node
->key
.addr
, addr
, addr_len
);
4210 fib_node
->key
.prefix_len
= prefix_len
;
4215 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node
*fib_node
)
4217 list_del(&fib_node
->list
);
4218 WARN_ON(!list_empty(&fib_node
->entry_list
));
4223 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node
*fib_node
,
4224 const struct mlxsw_sp_fib_entry
*fib_entry
)
4226 return list_first_entry(&fib_node
->entry_list
,
4227 struct mlxsw_sp_fib_entry
, list
) == fib_entry
;
4230 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp
*mlxsw_sp
,
4231 struct mlxsw_sp_fib_node
*fib_node
)
4233 struct mlxsw_sp_prefix_usage req_prefix_usage
;
4234 struct mlxsw_sp_fib
*fib
= fib_node
->fib
;
4235 struct mlxsw_sp_lpm_tree
*lpm_tree
;
4238 lpm_tree
= mlxsw_sp
->router
->lpm
.proto_trees
[fib
->proto
];
4239 if (lpm_tree
->prefix_ref_count
[fib_node
->key
.prefix_len
] != 0)
4242 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage
, &lpm_tree
->prefix_usage
);
4243 mlxsw_sp_prefix_usage_set(&req_prefix_usage
, fib_node
->key
.prefix_len
);
4244 lpm_tree
= mlxsw_sp_lpm_tree_get(mlxsw_sp
, &req_prefix_usage
,
4246 if (IS_ERR(lpm_tree
))
4247 return PTR_ERR(lpm_tree
);
4249 err
= mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp
, fib
, lpm_tree
);
4251 goto err_lpm_tree_replace
;
4254 lpm_tree
->prefix_ref_count
[fib_node
->key
.prefix_len
]++;
4257 err_lpm_tree_replace
:
4258 mlxsw_sp_lpm_tree_put(mlxsw_sp
, lpm_tree
);
4262 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp
*mlxsw_sp
,
4263 struct mlxsw_sp_fib_node
*fib_node
)
4265 struct mlxsw_sp_lpm_tree
*lpm_tree
= fib_node
->fib
->lpm_tree
;
4266 struct mlxsw_sp_prefix_usage req_prefix_usage
;
4267 struct mlxsw_sp_fib
*fib
= fib_node
->fib
;
4270 if (--lpm_tree
->prefix_ref_count
[fib_node
->key
.prefix_len
] != 0)
4272 /* Try to construct a new LPM tree from the current prefix usage
4273 * minus the unused one. If we fail, continue using the old one.
4275 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage
, &lpm_tree
->prefix_usage
);
4276 mlxsw_sp_prefix_usage_clear(&req_prefix_usage
,
4277 fib_node
->key
.prefix_len
);
4278 lpm_tree
= mlxsw_sp_lpm_tree_get(mlxsw_sp
, &req_prefix_usage
,
4280 if (IS_ERR(lpm_tree
))
4283 err
= mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp
, fib
, lpm_tree
);
4285 goto err_lpm_tree_replace
;
4289 err_lpm_tree_replace
:
4290 mlxsw_sp_lpm_tree_put(mlxsw_sp
, lpm_tree
);
4293 static int mlxsw_sp_fib_node_init(struct mlxsw_sp
*mlxsw_sp
,
4294 struct mlxsw_sp_fib_node
*fib_node
,
4295 struct mlxsw_sp_fib
*fib
)
4299 err
= mlxsw_sp_fib_node_insert(fib
, fib_node
);
4302 fib_node
->fib
= fib
;
4304 err
= mlxsw_sp_fib_lpm_tree_link(mlxsw_sp
, fib_node
);
4306 goto err_fib_lpm_tree_link
;
4310 err_fib_lpm_tree_link
:
4311 fib_node
->fib
= NULL
;
4312 mlxsw_sp_fib_node_remove(fib
, fib_node
);
4316 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp
*mlxsw_sp
,
4317 struct mlxsw_sp_fib_node
*fib_node
)
4319 struct mlxsw_sp_fib
*fib
= fib_node
->fib
;
4321 mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp
, fib_node
);
4322 fib_node
->fib
= NULL
;
4323 mlxsw_sp_fib_node_remove(fib
, fib_node
);
4326 static struct mlxsw_sp_fib_node
*
4327 mlxsw_sp_fib_node_get(struct mlxsw_sp
*mlxsw_sp
, u32 tb_id
, const void *addr
,
4328 size_t addr_len
, unsigned char prefix_len
,
4329 enum mlxsw_sp_l3proto proto
)
4331 struct mlxsw_sp_fib_node
*fib_node
;
4332 struct mlxsw_sp_fib
*fib
;
4333 struct mlxsw_sp_vr
*vr
;
4336 vr
= mlxsw_sp_vr_get(mlxsw_sp
, tb_id
, NULL
);
4338 return ERR_CAST(vr
);
4339 fib
= mlxsw_sp_vr_fib(vr
, proto
);
4341 fib_node
= mlxsw_sp_fib_node_lookup(fib
, addr
, addr_len
, prefix_len
);
4345 fib_node
= mlxsw_sp_fib_node_create(fib
, addr
, addr_len
, prefix_len
);
4348 goto err_fib_node_create
;
4351 err
= mlxsw_sp_fib_node_init(mlxsw_sp
, fib_node
, fib
);
4353 goto err_fib_node_init
;
4358 mlxsw_sp_fib_node_destroy(fib_node
);
4359 err_fib_node_create
:
4360 mlxsw_sp_vr_put(mlxsw_sp
, vr
);
4361 return ERR_PTR(err
);
4364 static void mlxsw_sp_fib_node_put(struct mlxsw_sp
*mlxsw_sp
,
4365 struct mlxsw_sp_fib_node
*fib_node
)
4367 struct mlxsw_sp_vr
*vr
= fib_node
->fib
->vr
;
4369 if (!list_empty(&fib_node
->entry_list
))
4371 mlxsw_sp_fib_node_fini(mlxsw_sp
, fib_node
);
4372 mlxsw_sp_fib_node_destroy(fib_node
);
4373 mlxsw_sp_vr_put(mlxsw_sp
, vr
);
4376 static struct mlxsw_sp_fib4_entry
*
4377 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node
*fib_node
,
4378 const struct mlxsw_sp_fib4_entry
*new4_entry
)
4380 struct mlxsw_sp_fib4_entry
*fib4_entry
;
4382 list_for_each_entry(fib4_entry
, &fib_node
->entry_list
, common
.list
) {
4383 if (fib4_entry
->tb_id
> new4_entry
->tb_id
)
4385 if (fib4_entry
->tb_id
!= new4_entry
->tb_id
)
4387 if (fib4_entry
->tos
> new4_entry
->tos
)
4389 if (fib4_entry
->prio
>= new4_entry
->prio
||
4390 fib4_entry
->tos
< new4_entry
->tos
)
4398 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry
*fib4_entry
,
4399 struct mlxsw_sp_fib4_entry
*new4_entry
)
4401 struct mlxsw_sp_fib_node
*fib_node
;
4403 if (WARN_ON(!fib4_entry
))
4406 fib_node
= fib4_entry
->common
.fib_node
;
4407 list_for_each_entry_from(fib4_entry
, &fib_node
->entry_list
,
4409 if (fib4_entry
->tb_id
!= new4_entry
->tb_id
||
4410 fib4_entry
->tos
!= new4_entry
->tos
||
4411 fib4_entry
->prio
!= new4_entry
->prio
)
4415 list_add_tail(&new4_entry
->common
.list
, &fib4_entry
->common
.list
);
4420 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry
*new4_entry
,
4421 bool replace
, bool append
)
4423 struct mlxsw_sp_fib_node
*fib_node
= new4_entry
->common
.fib_node
;
4424 struct mlxsw_sp_fib4_entry
*fib4_entry
;
4426 fib4_entry
= mlxsw_sp_fib4_node_entry_find(fib_node
, new4_entry
);
4429 return mlxsw_sp_fib4_node_list_append(fib4_entry
, new4_entry
);
4430 if (replace
&& WARN_ON(!fib4_entry
))
4433 /* Insert new entry before replaced one, so that we can later
4434 * remove the second.
4437 list_add_tail(&new4_entry
->common
.list
,
4438 &fib4_entry
->common
.list
);
4440 struct mlxsw_sp_fib4_entry
*last
;
4442 list_for_each_entry(last
, &fib_node
->entry_list
, common
.list
) {
4443 if (new4_entry
->tb_id
> last
->tb_id
)
4449 list_add(&new4_entry
->common
.list
,
4450 &fib4_entry
->common
.list
);
4452 list_add(&new4_entry
->common
.list
,
4453 &fib_node
->entry_list
);
4460 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry
*fib4_entry
)
4462 list_del(&fib4_entry
->common
.list
);
4465 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp
*mlxsw_sp
,
4466 struct mlxsw_sp_fib_entry
*fib_entry
)
4468 struct mlxsw_sp_fib_node
*fib_node
= fib_entry
->fib_node
;
4470 if (!mlxsw_sp_fib_node_entry_is_first(fib_node
, fib_entry
))
4473 /* To prevent packet loss, overwrite the previously offloaded
4476 if (!list_is_singular(&fib_node
->entry_list
)) {
4477 enum mlxsw_reg_ralue_op op
= MLXSW_REG_RALUE_OP_WRITE_DELETE
;
4478 struct mlxsw_sp_fib_entry
*n
= list_next_entry(fib_entry
, list
);
4480 mlxsw_sp_fib_entry_offload_refresh(n
, op
, 0);
4483 return mlxsw_sp_fib_entry_update(mlxsw_sp
, fib_entry
);
4486 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp
*mlxsw_sp
,
4487 struct mlxsw_sp_fib_entry
*fib_entry
)
4489 struct mlxsw_sp_fib_node
*fib_node
= fib_entry
->fib_node
;
4491 if (!mlxsw_sp_fib_node_entry_is_first(fib_node
, fib_entry
))
4494 /* Promote the next entry by overwriting the deleted entry */
4495 if (!list_is_singular(&fib_node
->entry_list
)) {
4496 struct mlxsw_sp_fib_entry
*n
= list_next_entry(fib_entry
, list
);
4497 enum mlxsw_reg_ralue_op op
= MLXSW_REG_RALUE_OP_WRITE_DELETE
;
4499 mlxsw_sp_fib_entry_update(mlxsw_sp
, n
);
4500 mlxsw_sp_fib_entry_offload_refresh(fib_entry
, op
, 0);
4504 mlxsw_sp_fib_entry_del(mlxsw_sp
, fib_entry
);
4507 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp
*mlxsw_sp
,
4508 struct mlxsw_sp_fib4_entry
*fib4_entry
,
4509 bool replace
, bool append
)
4513 err
= mlxsw_sp_fib4_node_list_insert(fib4_entry
, replace
, append
);
4517 err
= mlxsw_sp_fib_node_entry_add(mlxsw_sp
, &fib4_entry
->common
);
4519 goto err_fib_node_entry_add
;
4523 err_fib_node_entry_add
:
4524 mlxsw_sp_fib4_node_list_remove(fib4_entry
);
4529 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp
*mlxsw_sp
,
4530 struct mlxsw_sp_fib4_entry
*fib4_entry
)
4532 mlxsw_sp_fib_node_entry_del(mlxsw_sp
, &fib4_entry
->common
);
4533 mlxsw_sp_fib4_node_list_remove(fib4_entry
);
4535 if (fib4_entry
->common
.type
== MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP
)
4536 mlxsw_sp_fib_entry_decap_fini(mlxsw_sp
, &fib4_entry
->common
);
4539 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp
*mlxsw_sp
,
4540 struct mlxsw_sp_fib4_entry
*fib4_entry
,
4543 struct mlxsw_sp_fib_node
*fib_node
= fib4_entry
->common
.fib_node
;
4544 struct mlxsw_sp_fib4_entry
*replaced
;
4549 /* We inserted the new entry before replaced one */
4550 replaced
= list_next_entry(fib4_entry
, common
.list
);
4552 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp
, replaced
);
4553 mlxsw_sp_fib4_entry_destroy(mlxsw_sp
, replaced
);
4554 mlxsw_sp_fib_node_put(mlxsw_sp
, fib_node
);
4558 mlxsw_sp_router_fib4_add(struct mlxsw_sp
*mlxsw_sp
,
4559 const struct fib_entry_notifier_info
*fen_info
,
4560 bool replace
, bool append
)
4562 struct mlxsw_sp_fib4_entry
*fib4_entry
;
4563 struct mlxsw_sp_fib_node
*fib_node
;
4566 if (mlxsw_sp
->router
->aborted
)
4569 fib_node
= mlxsw_sp_fib_node_get(mlxsw_sp
, fen_info
->tb_id
,
4570 &fen_info
->dst
, sizeof(fen_info
->dst
),
4572 MLXSW_SP_L3_PROTO_IPV4
);
4573 if (IS_ERR(fib_node
)) {
4574 dev_warn(mlxsw_sp
->bus_info
->dev
, "Failed to get FIB node\n");
4575 return PTR_ERR(fib_node
);
4578 fib4_entry
= mlxsw_sp_fib4_entry_create(mlxsw_sp
, fib_node
, fen_info
);
4579 if (IS_ERR(fib4_entry
)) {
4580 dev_warn(mlxsw_sp
->bus_info
->dev
, "Failed to create FIB entry\n");
4581 err
= PTR_ERR(fib4_entry
);
4582 goto err_fib4_entry_create
;
4585 err
= mlxsw_sp_fib4_node_entry_link(mlxsw_sp
, fib4_entry
, replace
,
4588 dev_warn(mlxsw_sp
->bus_info
->dev
, "Failed to link FIB entry to node\n");
4589 goto err_fib4_node_entry_link
;
4592 mlxsw_sp_fib4_entry_replace(mlxsw_sp
, fib4_entry
, replace
);
4596 err_fib4_node_entry_link
:
4597 mlxsw_sp_fib4_entry_destroy(mlxsw_sp
, fib4_entry
);
4598 err_fib4_entry_create
:
4599 mlxsw_sp_fib_node_put(mlxsw_sp
, fib_node
);
4603 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp
*mlxsw_sp
,
4604 struct fib_entry_notifier_info
*fen_info
)
4606 struct mlxsw_sp_fib4_entry
*fib4_entry
;
4607 struct mlxsw_sp_fib_node
*fib_node
;
4609 if (mlxsw_sp
->router
->aborted
)
4612 fib4_entry
= mlxsw_sp_fib4_entry_lookup(mlxsw_sp
, fen_info
);
4613 if (WARN_ON(!fib4_entry
))
4615 fib_node
= fib4_entry
->common
.fib_node
;
4617 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp
, fib4_entry
);
4618 mlxsw_sp_fib4_entry_destroy(mlxsw_sp
, fib4_entry
);
4619 mlxsw_sp_fib_node_put(mlxsw_sp
, fib_node
);
4622 static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info
*rt
)
4624 /* Packets with link-local destination IP arriving to the router
4625 * are trapped to the CPU, so no need to program specific routes
4628 if (ipv6_addr_type(&rt
->rt6i_dst
.addr
) & IPV6_ADDR_LINKLOCAL
)
4631 /* Multicast routes aren't supported, so ignore them. Neighbour
4632 * Discovery packets are specifically trapped.
4634 if (ipv6_addr_type(&rt
->rt6i_dst
.addr
) & IPV6_ADDR_MULTICAST
)
4637 /* Cloned routes are irrelevant in the forwarding path. */
4638 if (rt
->rt6i_flags
& RTF_CACHE
)
4644 static struct mlxsw_sp_rt6
*mlxsw_sp_rt6_create(struct rt6_info
*rt
)
4646 struct mlxsw_sp_rt6
*mlxsw_sp_rt6
;
4648 mlxsw_sp_rt6
= kzalloc(sizeof(*mlxsw_sp_rt6
), GFP_KERNEL
);
4650 return ERR_PTR(-ENOMEM
);
4652 /* In case of route replace, replaced route is deleted with
4653 * no notification. Take reference to prevent accessing freed
4656 mlxsw_sp_rt6
->rt
= rt
;
4659 return mlxsw_sp_rt6
;
4662 #if IS_ENABLED(CONFIG_IPV6)
4663 static void mlxsw_sp_rt6_release(struct rt6_info
*rt
)
4668 static void mlxsw_sp_rt6_release(struct rt6_info
*rt
)
4673 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6
*mlxsw_sp_rt6
)
4675 mlxsw_sp_rt6_release(mlxsw_sp_rt6
->rt
);
4676 kfree(mlxsw_sp_rt6
);
4679 static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info
*rt
)
4681 /* RTF_CACHE routes are ignored */
4682 return (rt
->rt6i_flags
& (RTF_GATEWAY
| RTF_ADDRCONF
)) == RTF_GATEWAY
;
4685 static struct rt6_info
*
4686 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry
*fib6_entry
)
4688 return list_first_entry(&fib6_entry
->rt6_list
, struct mlxsw_sp_rt6
,
4692 static struct mlxsw_sp_fib6_entry
*
4693 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node
*fib_node
,
4694 const struct rt6_info
*nrt
, bool replace
)
4696 struct mlxsw_sp_fib6_entry
*fib6_entry
;
4698 if (!mlxsw_sp_fib6_rt_can_mp(nrt
) || replace
)
4701 list_for_each_entry(fib6_entry
, &fib_node
->entry_list
, common
.list
) {
4702 struct rt6_info
*rt
= mlxsw_sp_fib6_entry_rt(fib6_entry
);
4704 /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
4707 if (rt
->rt6i_table
->tb6_id
> nrt
->rt6i_table
->tb6_id
)
4709 if (rt
->rt6i_table
->tb6_id
!= nrt
->rt6i_table
->tb6_id
)
4711 if (rt
->rt6i_metric
< nrt
->rt6i_metric
)
4713 if (rt
->rt6i_metric
== nrt
->rt6i_metric
&&
4714 mlxsw_sp_fib6_rt_can_mp(rt
))
4716 if (rt
->rt6i_metric
> nrt
->rt6i_metric
)
4723 static struct mlxsw_sp_rt6
*
4724 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry
*fib6_entry
,
4725 const struct rt6_info
*rt
)
4727 struct mlxsw_sp_rt6
*mlxsw_sp_rt6
;
4729 list_for_each_entry(mlxsw_sp_rt6
, &fib6_entry
->rt6_list
, list
) {
4730 if (mlxsw_sp_rt6
->rt
== rt
)
4731 return mlxsw_sp_rt6
;
4737 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp
*mlxsw_sp
,
4738 const struct rt6_info
*rt
,
4739 enum mlxsw_sp_ipip_type
*ret
)
4741 return rt
->dst
.dev
&&
4742 mlxsw_sp_netdev_ipip_type(mlxsw_sp
, rt
->dst
.dev
, ret
);
4745 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp
*mlxsw_sp
,
4746 struct mlxsw_sp_nexthop_group
*nh_grp
,
4747 struct mlxsw_sp_nexthop
*nh
,
4748 const struct rt6_info
*rt
)
4750 const struct mlxsw_sp_ipip_ops
*ipip_ops
;
4751 struct mlxsw_sp_ipip_entry
*ipip_entry
;
4752 struct net_device
*dev
= rt
->dst
.dev
;
4753 struct mlxsw_sp_rif
*rif
;
4756 ipip_entry
= mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp
, dev
);
4758 ipip_ops
= mlxsw_sp
->router
->ipip_ops_arr
[ipip_entry
->ipipt
];
4759 if (ipip_ops
->can_offload(mlxsw_sp
, dev
,
4760 MLXSW_SP_L3_PROTO_IPV6
)) {
4761 nh
->type
= MLXSW_SP_NEXTHOP_TYPE_IPIP
;
4762 mlxsw_sp_nexthop_ipip_init(mlxsw_sp
, nh
, ipip_entry
);
4767 nh
->type
= MLXSW_SP_NEXTHOP_TYPE_ETH
;
4768 rif
= mlxsw_sp_rif_find_by_dev(mlxsw_sp
, dev
);
4771 mlxsw_sp_nexthop_rif_init(nh
, rif
);
4773 err
= mlxsw_sp_nexthop_neigh_init(mlxsw_sp
, nh
);
4775 goto err_nexthop_neigh_init
;
4779 err_nexthop_neigh_init
:
4780 mlxsw_sp_nexthop_rif_fini(nh
);
4784 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp
*mlxsw_sp
,
4785 struct mlxsw_sp_nexthop
*nh
)
4787 mlxsw_sp_nexthop_type_fini(mlxsw_sp
, nh
);
4790 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp
*mlxsw_sp
,
4791 struct mlxsw_sp_nexthop_group
*nh_grp
,
4792 struct mlxsw_sp_nexthop
*nh
,
4793 const struct rt6_info
*rt
)
4795 struct net_device
*dev
= rt
->dst
.dev
;
4797 nh
->nh_grp
= nh_grp
;
4798 nh
->nh_weight
= rt
->rt6i_nh_weight
;
4799 memcpy(&nh
->gw_addr
, &rt
->rt6i_gateway
, sizeof(nh
->gw_addr
));
4800 mlxsw_sp_nexthop_counter_alloc(mlxsw_sp
, nh
);
4802 list_add_tail(&nh
->router_list_node
, &mlxsw_sp
->router
->nexthop_list
);
4806 nh
->ifindex
= dev
->ifindex
;
4808 return mlxsw_sp_nexthop6_type_init(mlxsw_sp
, nh_grp
, nh
, rt
);
4811 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp
*mlxsw_sp
,
4812 struct mlxsw_sp_nexthop
*nh
)
4814 mlxsw_sp_nexthop6_type_fini(mlxsw_sp
, nh
);
4815 list_del(&nh
->router_list_node
);
4816 mlxsw_sp_nexthop_counter_free(mlxsw_sp
, nh
);
4819 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp
*mlxsw_sp
,
4820 const struct rt6_info
*rt
)
4822 return rt
->rt6i_flags
& RTF_GATEWAY
||
4823 mlxsw_sp_nexthop6_ipip_type(mlxsw_sp
, rt
, NULL
);
4826 static struct mlxsw_sp_nexthop_group
*
4827 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp
*mlxsw_sp
,
4828 struct mlxsw_sp_fib6_entry
*fib6_entry
)
4830 struct mlxsw_sp_nexthop_group
*nh_grp
;
4831 struct mlxsw_sp_rt6
*mlxsw_sp_rt6
;
4832 struct mlxsw_sp_nexthop
*nh
;
4837 alloc_size
= sizeof(*nh_grp
) +
4838 fib6_entry
->nrt6
* sizeof(struct mlxsw_sp_nexthop
);
4839 nh_grp
= kzalloc(alloc_size
, GFP_KERNEL
);
4841 return ERR_PTR(-ENOMEM
);
4842 INIT_LIST_HEAD(&nh_grp
->fib_list
);
4843 #if IS_ENABLED(CONFIG_IPV6)
4844 nh_grp
->neigh_tbl
= &nd_tbl
;
4846 mlxsw_sp_rt6
= list_first_entry(&fib6_entry
->rt6_list
,
4847 struct mlxsw_sp_rt6
, list
);
4848 nh_grp
->gateway
= mlxsw_sp_rt6_is_gateway(mlxsw_sp
, mlxsw_sp_rt6
->rt
);
4849 nh_grp
->count
= fib6_entry
->nrt6
;
4850 for (i
= 0; i
< nh_grp
->count
; i
++) {
4851 struct rt6_info
*rt
= mlxsw_sp_rt6
->rt
;
4853 nh
= &nh_grp
->nexthops
[i
];
4854 err
= mlxsw_sp_nexthop6_init(mlxsw_sp
, nh_grp
, nh
, rt
);
4856 goto err_nexthop6_init
;
4857 mlxsw_sp_rt6
= list_next_entry(mlxsw_sp_rt6
, list
);
4860 err
= mlxsw_sp_nexthop_group_insert(mlxsw_sp
, nh_grp
);
4862 goto err_nexthop_group_insert
;
4864 mlxsw_sp_nexthop_group_refresh(mlxsw_sp
, nh_grp
);
4867 err_nexthop_group_insert
:
4869 for (i
--; i
>= 0; i
--) {
4870 nh
= &nh_grp
->nexthops
[i
];
4871 mlxsw_sp_nexthop6_fini(mlxsw_sp
, nh
);
4874 return ERR_PTR(err
);
4878 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp
*mlxsw_sp
,
4879 struct mlxsw_sp_nexthop_group
*nh_grp
)
4881 struct mlxsw_sp_nexthop
*nh
;
4882 int i
= nh_grp
->count
;
4884 mlxsw_sp_nexthop_group_remove(mlxsw_sp
, nh_grp
);
4885 for (i
--; i
>= 0; i
--) {
4886 nh
= &nh_grp
->nexthops
[i
];
4887 mlxsw_sp_nexthop6_fini(mlxsw_sp
, nh
);
4889 mlxsw_sp_nexthop_group_refresh(mlxsw_sp
, nh_grp
);
4890 WARN_ON(nh_grp
->adj_index_valid
);
4894 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp
*mlxsw_sp
,
4895 struct mlxsw_sp_fib6_entry
*fib6_entry
)
4897 struct mlxsw_sp_nexthop_group
*nh_grp
;
4899 nh_grp
= mlxsw_sp_nexthop6_group_lookup(mlxsw_sp
, fib6_entry
);
4901 nh_grp
= mlxsw_sp_nexthop6_group_create(mlxsw_sp
, fib6_entry
);
4903 return PTR_ERR(nh_grp
);
4906 list_add_tail(&fib6_entry
->common
.nexthop_group_node
,
4908 fib6_entry
->common
.nh_group
= nh_grp
;
4913 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp
*mlxsw_sp
,
4914 struct mlxsw_sp_fib_entry
*fib_entry
)
4916 struct mlxsw_sp_nexthop_group
*nh_grp
= fib_entry
->nh_group
;
4918 list_del(&fib_entry
->nexthop_group_node
);
4919 if (!list_empty(&nh_grp
->fib_list
))
4921 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp
, nh_grp
);
4925 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp
*mlxsw_sp
,
4926 struct mlxsw_sp_fib6_entry
*fib6_entry
)
4928 struct mlxsw_sp_nexthop_group
*old_nh_grp
= fib6_entry
->common
.nh_group
;
4931 fib6_entry
->common
.nh_group
= NULL
;
4932 list_del(&fib6_entry
->common
.nexthop_group_node
);
4934 err
= mlxsw_sp_nexthop6_group_get(mlxsw_sp
, fib6_entry
);
4936 goto err_nexthop6_group_get
;
4938 /* In case this entry is offloaded, then the adjacency index
4939 * currently associated with it in the device's table is that
4940 * of the old group. Start using the new one instead.
4942 err
= mlxsw_sp_fib_node_entry_add(mlxsw_sp
, &fib6_entry
->common
);
4944 goto err_fib_node_entry_add
;
4946 if (list_empty(&old_nh_grp
->fib_list
))
4947 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp
, old_nh_grp
);
4951 err_fib_node_entry_add
:
4952 mlxsw_sp_nexthop6_group_put(mlxsw_sp
, &fib6_entry
->common
);
4953 err_nexthop6_group_get
:
4954 list_add_tail(&fib6_entry
->common
.nexthop_group_node
,
4955 &old_nh_grp
->fib_list
);
4956 fib6_entry
->common
.nh_group
= old_nh_grp
;
4961 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp
*mlxsw_sp
,
4962 struct mlxsw_sp_fib6_entry
*fib6_entry
,
4963 struct rt6_info
*rt
)
4965 struct mlxsw_sp_rt6
*mlxsw_sp_rt6
;
4968 mlxsw_sp_rt6
= mlxsw_sp_rt6_create(rt
);
4969 if (IS_ERR(mlxsw_sp_rt6
))
4970 return PTR_ERR(mlxsw_sp_rt6
);
4972 list_add_tail(&mlxsw_sp_rt6
->list
, &fib6_entry
->rt6_list
);
4975 err
= mlxsw_sp_nexthop6_group_update(mlxsw_sp
, fib6_entry
);
4977 goto err_nexthop6_group_update
;
4981 err_nexthop6_group_update
:
4983 list_del(&mlxsw_sp_rt6
->list
);
4984 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6
);
4989 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp
*mlxsw_sp
,
4990 struct mlxsw_sp_fib6_entry
*fib6_entry
,
4991 struct rt6_info
*rt
)
4993 struct mlxsw_sp_rt6
*mlxsw_sp_rt6
;
4995 mlxsw_sp_rt6
= mlxsw_sp_fib6_entry_rt_find(fib6_entry
, rt
);
4996 if (WARN_ON(!mlxsw_sp_rt6
))
5000 list_del(&mlxsw_sp_rt6
->list
);
5001 mlxsw_sp_nexthop6_group_update(mlxsw_sp
, fib6_entry
);
5002 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6
);
5005 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp
*mlxsw_sp
,
5006 struct mlxsw_sp_fib_entry
*fib_entry
,
5007 const struct rt6_info
*rt
)
5009 /* Packets hitting RTF_REJECT routes need to be discarded by the
5010 * stack. We can rely on their destination device not having a
5011 * RIF (it's the loopback device) and can thus use action type
5012 * local, which will cause them to be trapped with a lower
5013 * priority than packets that need to be locally received.
5015 if (rt
->rt6i_flags
& (RTF_LOCAL
| RTF_ANYCAST
))
5016 fib_entry
->type
= MLXSW_SP_FIB_ENTRY_TYPE_TRAP
;
5017 else if (rt
->rt6i_flags
& RTF_REJECT
)
5018 fib_entry
->type
= MLXSW_SP_FIB_ENTRY_TYPE_LOCAL
;
5019 else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp
, rt
))
5020 fib_entry
->type
= MLXSW_SP_FIB_ENTRY_TYPE_REMOTE
;
5022 fib_entry
->type
= MLXSW_SP_FIB_ENTRY_TYPE_LOCAL
;
5026 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry
*fib6_entry
)
5028 struct mlxsw_sp_rt6
*mlxsw_sp_rt6
, *tmp
;
5030 list_for_each_entry_safe(mlxsw_sp_rt6
, tmp
, &fib6_entry
->rt6_list
,
5033 list_del(&mlxsw_sp_rt6
->list
);
5034 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6
);
5038 static struct mlxsw_sp_fib6_entry
*
5039 mlxsw_sp_fib6_entry_create(struct mlxsw_sp
*mlxsw_sp
,
5040 struct mlxsw_sp_fib_node
*fib_node
,
5041 struct rt6_info
*rt
)
5043 struct mlxsw_sp_fib6_entry
*fib6_entry
;
5044 struct mlxsw_sp_fib_entry
*fib_entry
;
5045 struct mlxsw_sp_rt6
*mlxsw_sp_rt6
;
5048 fib6_entry
= kzalloc(sizeof(*fib6_entry
), GFP_KERNEL
);
5050 return ERR_PTR(-ENOMEM
);
5051 fib_entry
= &fib6_entry
->common
;
5053 mlxsw_sp_rt6
= mlxsw_sp_rt6_create(rt
);
5054 if (IS_ERR(mlxsw_sp_rt6
)) {
5055 err
= PTR_ERR(mlxsw_sp_rt6
);
5056 goto err_rt6_create
;
5059 mlxsw_sp_fib6_entry_type_set(mlxsw_sp
, fib_entry
, mlxsw_sp_rt6
->rt
);
5061 INIT_LIST_HEAD(&fib6_entry
->rt6_list
);
5062 list_add_tail(&mlxsw_sp_rt6
->list
, &fib6_entry
->rt6_list
);
5063 fib6_entry
->nrt6
= 1;
5064 err
= mlxsw_sp_nexthop6_group_get(mlxsw_sp
, fib6_entry
);
5066 goto err_nexthop6_group_get
;
5068 fib_entry
->fib_node
= fib_node
;
5072 err_nexthop6_group_get
:
5073 list_del(&mlxsw_sp_rt6
->list
);
5074 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6
);
5077 return ERR_PTR(err
);
5080 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp
*mlxsw_sp
,
5081 struct mlxsw_sp_fib6_entry
*fib6_entry
)
5083 mlxsw_sp_nexthop6_group_put(mlxsw_sp
, &fib6_entry
->common
);
5084 mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry
);
5085 WARN_ON(fib6_entry
->nrt6
);
5089 static struct mlxsw_sp_fib6_entry
*
5090 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node
*fib_node
,
5091 const struct rt6_info
*nrt
, bool replace
)
5093 struct mlxsw_sp_fib6_entry
*fib6_entry
, *fallback
= NULL
;
5095 list_for_each_entry(fib6_entry
, &fib_node
->entry_list
, common
.list
) {
5096 struct rt6_info
*rt
= mlxsw_sp_fib6_entry_rt(fib6_entry
);
5098 if (rt
->rt6i_table
->tb6_id
> nrt
->rt6i_table
->tb6_id
)
5100 if (rt
->rt6i_table
->tb6_id
!= nrt
->rt6i_table
->tb6_id
)
5102 if (replace
&& rt
->rt6i_metric
== nrt
->rt6i_metric
) {
5103 if (mlxsw_sp_fib6_rt_can_mp(rt
) ==
5104 mlxsw_sp_fib6_rt_can_mp(nrt
))
5106 if (mlxsw_sp_fib6_rt_can_mp(nrt
))
5107 fallback
= fallback
?: fib6_entry
;
5109 if (rt
->rt6i_metric
> nrt
->rt6i_metric
)
5110 return fallback
?: fib6_entry
;
5117 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry
*new6_entry
,
5120 struct mlxsw_sp_fib_node
*fib_node
= new6_entry
->common
.fib_node
;
5121 struct rt6_info
*nrt
= mlxsw_sp_fib6_entry_rt(new6_entry
);
5122 struct mlxsw_sp_fib6_entry
*fib6_entry
;
5124 fib6_entry
= mlxsw_sp_fib6_node_entry_find(fib_node
, nrt
, replace
);
5126 if (replace
&& WARN_ON(!fib6_entry
))
5130 list_add_tail(&new6_entry
->common
.list
,
5131 &fib6_entry
->common
.list
);
5133 struct mlxsw_sp_fib6_entry
*last
;
5135 list_for_each_entry(last
, &fib_node
->entry_list
, common
.list
) {
5136 struct rt6_info
*rt
= mlxsw_sp_fib6_entry_rt(last
);
5138 if (nrt
->rt6i_table
->tb6_id
> rt
->rt6i_table
->tb6_id
)
5144 list_add(&new6_entry
->common
.list
,
5145 &fib6_entry
->common
.list
);
5147 list_add(&new6_entry
->common
.list
,
5148 &fib_node
->entry_list
);
5155 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry
*fib6_entry
)
5157 list_del(&fib6_entry
->common
.list
);
5160 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp
*mlxsw_sp
,
5161 struct mlxsw_sp_fib6_entry
*fib6_entry
,
5166 err
= mlxsw_sp_fib6_node_list_insert(fib6_entry
, replace
);
5170 err
= mlxsw_sp_fib_node_entry_add(mlxsw_sp
, &fib6_entry
->common
);
5172 goto err_fib_node_entry_add
;
5176 err_fib_node_entry_add
:
5177 mlxsw_sp_fib6_node_list_remove(fib6_entry
);
5182 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp
*mlxsw_sp
,
5183 struct mlxsw_sp_fib6_entry
*fib6_entry
)
5185 mlxsw_sp_fib_node_entry_del(mlxsw_sp
, &fib6_entry
->common
);
5186 mlxsw_sp_fib6_node_list_remove(fib6_entry
);
5189 static struct mlxsw_sp_fib6_entry
*
5190 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp
*mlxsw_sp
,
5191 const struct rt6_info
*rt
)
5193 struct mlxsw_sp_fib6_entry
*fib6_entry
;
5194 struct mlxsw_sp_fib_node
*fib_node
;
5195 struct mlxsw_sp_fib
*fib
;
5196 struct mlxsw_sp_vr
*vr
;
5198 vr
= mlxsw_sp_vr_find(mlxsw_sp
, rt
->rt6i_table
->tb6_id
);
5201 fib
= mlxsw_sp_vr_fib(vr
, MLXSW_SP_L3_PROTO_IPV6
);
5203 fib_node
= mlxsw_sp_fib_node_lookup(fib
, &rt
->rt6i_dst
.addr
,
5204 sizeof(rt
->rt6i_dst
.addr
),
5209 list_for_each_entry(fib6_entry
, &fib_node
->entry_list
, common
.list
) {
5210 struct rt6_info
*iter_rt
= mlxsw_sp_fib6_entry_rt(fib6_entry
);
5212 if (rt
->rt6i_table
->tb6_id
== iter_rt
->rt6i_table
->tb6_id
&&
5213 rt
->rt6i_metric
== iter_rt
->rt6i_metric
&&
5214 mlxsw_sp_fib6_entry_rt_find(fib6_entry
, rt
))
5221 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp
*mlxsw_sp
,
5222 struct mlxsw_sp_fib6_entry
*fib6_entry
,
5225 struct mlxsw_sp_fib_node
*fib_node
= fib6_entry
->common
.fib_node
;
5226 struct mlxsw_sp_fib6_entry
*replaced
;
5231 replaced
= list_next_entry(fib6_entry
, common
.list
);
5233 mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp
, replaced
);
5234 mlxsw_sp_fib6_entry_destroy(mlxsw_sp
, replaced
);
5235 mlxsw_sp_fib_node_put(mlxsw_sp
, fib_node
);
5238 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp
*mlxsw_sp
,
5239 struct rt6_info
*rt
, bool replace
)
5241 struct mlxsw_sp_fib6_entry
*fib6_entry
;
5242 struct mlxsw_sp_fib_node
*fib_node
;
5245 if (mlxsw_sp
->router
->aborted
)
5248 if (rt
->rt6i_src
.plen
)
5251 if (mlxsw_sp_fib6_rt_should_ignore(rt
))
5254 fib_node
= mlxsw_sp_fib_node_get(mlxsw_sp
, rt
->rt6i_table
->tb6_id
,
5256 sizeof(rt
->rt6i_dst
.addr
),
5258 MLXSW_SP_L3_PROTO_IPV6
);
5259 if (IS_ERR(fib_node
))
5260 return PTR_ERR(fib_node
);
5262 /* Before creating a new entry, try to append route to an existing
5265 fib6_entry
= mlxsw_sp_fib6_node_mp_entry_find(fib_node
, rt
, replace
);
5267 err
= mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp
, fib6_entry
, rt
);
5269 goto err_fib6_entry_nexthop_add
;
5273 fib6_entry
= mlxsw_sp_fib6_entry_create(mlxsw_sp
, fib_node
, rt
);
5274 if (IS_ERR(fib6_entry
)) {
5275 err
= PTR_ERR(fib6_entry
);
5276 goto err_fib6_entry_create
;
5279 err
= mlxsw_sp_fib6_node_entry_link(mlxsw_sp
, fib6_entry
, replace
);
5281 goto err_fib6_node_entry_link
;
5283 mlxsw_sp_fib6_entry_replace(mlxsw_sp
, fib6_entry
, replace
);
5287 err_fib6_node_entry_link
:
5288 mlxsw_sp_fib6_entry_destroy(mlxsw_sp
, fib6_entry
);
5289 err_fib6_entry_create
:
5290 err_fib6_entry_nexthop_add
:
5291 mlxsw_sp_fib_node_put(mlxsw_sp
, fib_node
);
5295 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp
*mlxsw_sp
,
5296 struct rt6_info
*rt
)
5298 struct mlxsw_sp_fib6_entry
*fib6_entry
;
5299 struct mlxsw_sp_fib_node
*fib_node
;
5301 if (mlxsw_sp
->router
->aborted
)
5304 if (mlxsw_sp_fib6_rt_should_ignore(rt
))
5307 fib6_entry
= mlxsw_sp_fib6_entry_lookup(mlxsw_sp
, rt
);
5308 if (WARN_ON(!fib6_entry
))
5311 /* If route is part of a multipath entry, but not the last one
5312 * removed, then only reduce its nexthop group.
5314 if (!list_is_singular(&fib6_entry
->rt6_list
)) {
5315 mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp
, fib6_entry
, rt
);
5319 fib_node
= fib6_entry
->common
.fib_node
;
5321 mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp
, fib6_entry
);
5322 mlxsw_sp_fib6_entry_destroy(mlxsw_sp
, fib6_entry
);
5323 mlxsw_sp_fib_node_put(mlxsw_sp
, fib_node
);
5326 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp
*mlxsw_sp
,
5327 enum mlxsw_reg_ralxx_protocol proto
,
5330 char ralta_pl
[MLXSW_REG_RALTA_LEN
];
5331 char ralst_pl
[MLXSW_REG_RALST_LEN
];
5334 mlxsw_reg_ralta_pack(ralta_pl
, true, proto
, tree_id
);
5335 err
= mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(ralta
), ralta_pl
);
5339 mlxsw_reg_ralst_pack(ralst_pl
, 0xff, tree_id
);
5340 err
= mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(ralst
), ralst_pl
);
5344 for (i
= 0; i
< MLXSW_CORE_RES_GET(mlxsw_sp
->core
, MAX_VRS
); i
++) {
5345 struct mlxsw_sp_vr
*vr
= &mlxsw_sp
->router
->vrs
[i
];
5346 char raltb_pl
[MLXSW_REG_RALTB_LEN
];
5347 char ralue_pl
[MLXSW_REG_RALUE_LEN
];
5349 mlxsw_reg_raltb_pack(raltb_pl
, vr
->id
, proto
, tree_id
);
5350 err
= mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(raltb
),
5355 mlxsw_reg_ralue_pack(ralue_pl
, proto
,
5356 MLXSW_REG_RALUE_OP_WRITE_WRITE
, vr
->id
, 0);
5357 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl
);
5358 err
= mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(ralue
),
5367 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp
*mlxsw_sp
,
5368 struct mfc_entry_notifier_info
*men_info
,
5371 struct mlxsw_sp_vr
*vr
;
5373 if (mlxsw_sp
->router
->aborted
)
5376 vr
= mlxsw_sp_vr_get(mlxsw_sp
, men_info
->tb_id
, NULL
);
5380 return mlxsw_sp_mr_route4_add(vr
->mr4_table
, men_info
->mfc
, replace
);
5383 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp
*mlxsw_sp
,
5384 struct mfc_entry_notifier_info
*men_info
)
5386 struct mlxsw_sp_vr
*vr
;
5388 if (mlxsw_sp
->router
->aborted
)
5391 vr
= mlxsw_sp_vr_find(mlxsw_sp
, men_info
->tb_id
);
5395 mlxsw_sp_mr_route4_del(vr
->mr4_table
, men_info
->mfc
);
5396 mlxsw_sp_vr_put(mlxsw_sp
, vr
);
5400 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp
*mlxsw_sp
,
5401 struct vif_entry_notifier_info
*ven_info
)
5403 struct mlxsw_sp_rif
*rif
;
5404 struct mlxsw_sp_vr
*vr
;
5406 if (mlxsw_sp
->router
->aborted
)
5409 vr
= mlxsw_sp_vr_get(mlxsw_sp
, ven_info
->tb_id
, NULL
);
5413 rif
= mlxsw_sp_rif_find_by_dev(mlxsw_sp
, ven_info
->dev
);
5414 return mlxsw_sp_mr_vif_add(vr
->mr4_table
, ven_info
->dev
,
5415 ven_info
->vif_index
,
5416 ven_info
->vif_flags
, rif
);
5420 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp
*mlxsw_sp
,
5421 struct vif_entry_notifier_info
*ven_info
)
5423 struct mlxsw_sp_vr
*vr
;
5425 if (mlxsw_sp
->router
->aborted
)
5428 vr
= mlxsw_sp_vr_find(mlxsw_sp
, ven_info
->tb_id
);
5432 mlxsw_sp_mr_vif_del(vr
->mr4_table
, ven_info
->vif_index
);
5433 mlxsw_sp_vr_put(mlxsw_sp
, vr
);
5436 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp
*mlxsw_sp
)
5438 enum mlxsw_reg_ralxx_protocol proto
= MLXSW_REG_RALXX_PROTOCOL_IPV4
;
5441 err
= __mlxsw_sp_router_set_abort_trap(mlxsw_sp
, proto
,
5442 MLXSW_SP_LPM_TREE_MIN
);
5446 /* The multicast router code does not need an abort trap as by default,
5447 * packets that don't match any routes are trapped to the CPU.
5450 proto
= MLXSW_REG_RALXX_PROTOCOL_IPV6
;
5451 return __mlxsw_sp_router_set_abort_trap(mlxsw_sp
, proto
,
5452 MLXSW_SP_LPM_TREE_MIN
+ 1);
5455 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp
*mlxsw_sp
,
5456 struct mlxsw_sp_fib_node
*fib_node
)
5458 struct mlxsw_sp_fib4_entry
*fib4_entry
, *tmp
;
5460 list_for_each_entry_safe(fib4_entry
, tmp
, &fib_node
->entry_list
,
5462 bool do_break
= &tmp
->common
.list
== &fib_node
->entry_list
;
5464 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp
, fib4_entry
);
5465 mlxsw_sp_fib4_entry_destroy(mlxsw_sp
, fib4_entry
);
5466 mlxsw_sp_fib_node_put(mlxsw_sp
, fib_node
);
5467 /* Break when entry list is empty and node was freed.
5468 * Otherwise, we'll access freed memory in the next
5476 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp
*mlxsw_sp
,
5477 struct mlxsw_sp_fib_node
*fib_node
)
5479 struct mlxsw_sp_fib6_entry
*fib6_entry
, *tmp
;
5481 list_for_each_entry_safe(fib6_entry
, tmp
, &fib_node
->entry_list
,
5483 bool do_break
= &tmp
->common
.list
== &fib_node
->entry_list
;
5485 mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp
, fib6_entry
);
5486 mlxsw_sp_fib6_entry_destroy(mlxsw_sp
, fib6_entry
);
5487 mlxsw_sp_fib_node_put(mlxsw_sp
, fib_node
);
5493 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp
*mlxsw_sp
,
5494 struct mlxsw_sp_fib_node
*fib_node
)
5496 switch (fib_node
->fib
->proto
) {
5497 case MLXSW_SP_L3_PROTO_IPV4
:
5498 mlxsw_sp_fib4_node_flush(mlxsw_sp
, fib_node
);
5500 case MLXSW_SP_L3_PROTO_IPV6
:
5501 mlxsw_sp_fib6_node_flush(mlxsw_sp
, fib_node
);
5506 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp
*mlxsw_sp
,
5507 struct mlxsw_sp_vr
*vr
,
5508 enum mlxsw_sp_l3proto proto
)
5510 struct mlxsw_sp_fib
*fib
= mlxsw_sp_vr_fib(vr
, proto
);
5511 struct mlxsw_sp_fib_node
*fib_node
, *tmp
;
5513 list_for_each_entry_safe(fib_node
, tmp
, &fib
->node_list
, list
) {
5514 bool do_break
= &tmp
->list
== &fib
->node_list
;
5516 mlxsw_sp_fib_node_flush(mlxsw_sp
, fib_node
);
5522 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp
*mlxsw_sp
)
5526 for (i
= 0; i
< MLXSW_CORE_RES_GET(mlxsw_sp
->core
, MAX_VRS
); i
++) {
5527 struct mlxsw_sp_vr
*vr
= &mlxsw_sp
->router
->vrs
[i
];
5529 if (!mlxsw_sp_vr_is_used(vr
))
5532 mlxsw_sp_mr_table_flush(vr
->mr4_table
);
5533 mlxsw_sp_vr_fib_flush(mlxsw_sp
, vr
, MLXSW_SP_L3_PROTO_IPV4
);
5535 /* If virtual router was only used for IPv4, then it's no
5538 if (!mlxsw_sp_vr_is_used(vr
))
5540 mlxsw_sp_vr_fib_flush(mlxsw_sp
, vr
, MLXSW_SP_L3_PROTO_IPV6
);
5544 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp
*mlxsw_sp
)
5548 if (mlxsw_sp
->router
->aborted
)
5550 dev_warn(mlxsw_sp
->bus_info
->dev
, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5551 mlxsw_sp_router_fib_flush(mlxsw_sp
);
5552 mlxsw_sp
->router
->aborted
= true;
5553 err
= mlxsw_sp_router_set_abort_trap(mlxsw_sp
);
5555 dev_warn(mlxsw_sp
->bus_info
->dev
, "Failed to set abort trap.\n");
5558 struct mlxsw_sp_fib_event_work
{
5559 struct work_struct work
;
5561 struct fib6_entry_notifier_info fen6_info
;
5562 struct fib_entry_notifier_info fen_info
;
5563 struct fib_rule_notifier_info fr_info
;
5564 struct fib_nh_notifier_info fnh_info
;
5565 struct mfc_entry_notifier_info men_info
;
5566 struct vif_entry_notifier_info ven_info
;
5568 struct mlxsw_sp
*mlxsw_sp
;
5569 unsigned long event
;
5572 static void mlxsw_sp_router_fib4_event_work(struct work_struct
*work
)
5574 struct mlxsw_sp_fib_event_work
*fib_work
=
5575 container_of(work
, struct mlxsw_sp_fib_event_work
, work
);
5576 struct mlxsw_sp
*mlxsw_sp
= fib_work
->mlxsw_sp
;
5577 bool replace
, append
;
5580 /* Protect internal structures from changes */
5582 switch (fib_work
->event
) {
5583 case FIB_EVENT_ENTRY_REPLACE
: /* fall through */
5584 case FIB_EVENT_ENTRY_APPEND
: /* fall through */
5585 case FIB_EVENT_ENTRY_ADD
:
5586 replace
= fib_work
->event
== FIB_EVENT_ENTRY_REPLACE
;
5587 append
= fib_work
->event
== FIB_EVENT_ENTRY_APPEND
;
5588 err
= mlxsw_sp_router_fib4_add(mlxsw_sp
, &fib_work
->fen_info
,
5591 mlxsw_sp_router_fib_abort(mlxsw_sp
);
5592 fib_info_put(fib_work
->fen_info
.fi
);
5594 case FIB_EVENT_ENTRY_DEL
:
5595 mlxsw_sp_router_fib4_del(mlxsw_sp
, &fib_work
->fen_info
);
5596 fib_info_put(fib_work
->fen_info
.fi
);
5598 case FIB_EVENT_RULE_ADD
:
5599 /* if we get here, a rule was added that we do not support.
5600 * just do the fib_abort
5602 mlxsw_sp_router_fib_abort(mlxsw_sp
);
5604 case FIB_EVENT_NH_ADD
: /* fall through */
5605 case FIB_EVENT_NH_DEL
:
5606 mlxsw_sp_nexthop4_event(mlxsw_sp
, fib_work
->event
,
5607 fib_work
->fnh_info
.fib_nh
);
5608 fib_info_put(fib_work
->fnh_info
.fib_nh
->nh_parent
);
5615 static void mlxsw_sp_router_fib6_event_work(struct work_struct
*work
)
5617 struct mlxsw_sp_fib_event_work
*fib_work
=
5618 container_of(work
, struct mlxsw_sp_fib_event_work
, work
);
5619 struct mlxsw_sp
*mlxsw_sp
= fib_work
->mlxsw_sp
;
5624 switch (fib_work
->event
) {
5625 case FIB_EVENT_ENTRY_REPLACE
: /* fall through */
5626 case FIB_EVENT_ENTRY_ADD
:
5627 replace
= fib_work
->event
== FIB_EVENT_ENTRY_REPLACE
;
5628 err
= mlxsw_sp_router_fib6_add(mlxsw_sp
,
5629 fib_work
->fen6_info
.rt
, replace
);
5631 mlxsw_sp_router_fib_abort(mlxsw_sp
);
5632 mlxsw_sp_rt6_release(fib_work
->fen6_info
.rt
);
5634 case FIB_EVENT_ENTRY_DEL
:
5635 mlxsw_sp_router_fib6_del(mlxsw_sp
, fib_work
->fen6_info
.rt
);
5636 mlxsw_sp_rt6_release(fib_work
->fen6_info
.rt
);
5638 case FIB_EVENT_RULE_ADD
:
5639 /* if we get here, a rule was added that we do not support.
5640 * just do the fib_abort
5642 mlxsw_sp_router_fib_abort(mlxsw_sp
);
5649 static void mlxsw_sp_router_fibmr_event_work(struct work_struct
*work
)
5651 struct mlxsw_sp_fib_event_work
*fib_work
=
5652 container_of(work
, struct mlxsw_sp_fib_event_work
, work
);
5653 struct mlxsw_sp
*mlxsw_sp
= fib_work
->mlxsw_sp
;
5658 switch (fib_work
->event
) {
5659 case FIB_EVENT_ENTRY_REPLACE
: /* fall through */
5660 case FIB_EVENT_ENTRY_ADD
:
5661 replace
= fib_work
->event
== FIB_EVENT_ENTRY_REPLACE
;
5663 err
= mlxsw_sp_router_fibmr_add(mlxsw_sp
, &fib_work
->men_info
,
5666 mlxsw_sp_router_fib_abort(mlxsw_sp
);
5667 ipmr_cache_put(fib_work
->men_info
.mfc
);
5669 case FIB_EVENT_ENTRY_DEL
:
5670 mlxsw_sp_router_fibmr_del(mlxsw_sp
, &fib_work
->men_info
);
5671 ipmr_cache_put(fib_work
->men_info
.mfc
);
5673 case FIB_EVENT_VIF_ADD
:
5674 err
= mlxsw_sp_router_fibmr_vif_add(mlxsw_sp
,
5675 &fib_work
->ven_info
);
5677 mlxsw_sp_router_fib_abort(mlxsw_sp
);
5678 dev_put(fib_work
->ven_info
.dev
);
5680 case FIB_EVENT_VIF_DEL
:
5681 mlxsw_sp_router_fibmr_vif_del(mlxsw_sp
,
5682 &fib_work
->ven_info
);
5683 dev_put(fib_work
->ven_info
.dev
);
5685 case FIB_EVENT_RULE_ADD
:
5686 /* if we get here, a rule was added that we do not support.
5687 * just do the fib_abort
5689 mlxsw_sp_router_fib_abort(mlxsw_sp
);
5696 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work
*fib_work
,
5697 struct fib_notifier_info
*info
)
5699 struct fib_entry_notifier_info
*fen_info
;
5700 struct fib_nh_notifier_info
*fnh_info
;
5702 switch (fib_work
->event
) {
5703 case FIB_EVENT_ENTRY_REPLACE
: /* fall through */
5704 case FIB_EVENT_ENTRY_APPEND
: /* fall through */
5705 case FIB_EVENT_ENTRY_ADD
: /* fall through */
5706 case FIB_EVENT_ENTRY_DEL
:
5707 fen_info
= container_of(info
, struct fib_entry_notifier_info
,
5709 fib_work
->fen_info
= *fen_info
;
5710 /* Take reference on fib_info to prevent it from being
5711 * freed while work is queued. Release it afterwards.
5713 fib_info_hold(fib_work
->fen_info
.fi
);
5715 case FIB_EVENT_NH_ADD
: /* fall through */
5716 case FIB_EVENT_NH_DEL
:
5717 fnh_info
= container_of(info
, struct fib_nh_notifier_info
,
5719 fib_work
->fnh_info
= *fnh_info
;
5720 fib_info_hold(fib_work
->fnh_info
.fib_nh
->nh_parent
);
5725 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work
*fib_work
,
5726 struct fib_notifier_info
*info
)
5728 struct fib6_entry_notifier_info
*fen6_info
;
5730 switch (fib_work
->event
) {
5731 case FIB_EVENT_ENTRY_REPLACE
: /* fall through */
5732 case FIB_EVENT_ENTRY_ADD
: /* fall through */
5733 case FIB_EVENT_ENTRY_DEL
:
5734 fen6_info
= container_of(info
, struct fib6_entry_notifier_info
,
5736 fib_work
->fen6_info
= *fen6_info
;
5737 rt6_hold(fib_work
->fen6_info
.rt
);
5743 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work
*fib_work
,
5744 struct fib_notifier_info
*info
)
5746 switch (fib_work
->event
) {
5747 case FIB_EVENT_ENTRY_REPLACE
: /* fall through */
5748 case FIB_EVENT_ENTRY_ADD
: /* fall through */
5749 case FIB_EVENT_ENTRY_DEL
:
5750 memcpy(&fib_work
->men_info
, info
, sizeof(fib_work
->men_info
));
5751 ipmr_cache_hold(fib_work
->men_info
.mfc
);
5753 case FIB_EVENT_VIF_ADD
: /* fall through */
5754 case FIB_EVENT_VIF_DEL
:
5755 memcpy(&fib_work
->ven_info
, info
, sizeof(fib_work
->ven_info
));
5756 dev_hold(fib_work
->ven_info
.dev
);
5761 static int mlxsw_sp_router_fib_rule_event(unsigned long event
,
5762 struct fib_notifier_info
*info
,
5763 struct mlxsw_sp
*mlxsw_sp
)
5765 struct netlink_ext_ack
*extack
= info
->extack
;
5766 struct fib_rule_notifier_info
*fr_info
;
5767 struct fib_rule
*rule
;
5770 /* nothing to do at the moment */
5771 if (event
== FIB_EVENT_RULE_DEL
)
5774 if (mlxsw_sp
->router
->aborted
)
5777 fr_info
= container_of(info
, struct fib_rule_notifier_info
, info
);
5778 rule
= fr_info
->rule
;
5780 switch (info
->family
) {
5782 if (!fib4_rule_default(rule
) && !rule
->l3mdev
)
5786 if (!fib6_rule_default(rule
) && !rule
->l3mdev
)
5789 case RTNL_FAMILY_IPMR
:
5790 if (!ipmr_rule_default(rule
) && !rule
->l3mdev
)
5796 NL_SET_ERR_MSG(extack
, "spectrum: FIB rules not supported. Aborting offload");
5801 /* Called with rcu_read_lock() */
5802 static int mlxsw_sp_router_fib_event(struct notifier_block
*nb
,
5803 unsigned long event
, void *ptr
)
5805 struct mlxsw_sp_fib_event_work
*fib_work
;
5806 struct fib_notifier_info
*info
= ptr
;
5807 struct mlxsw_sp_router
*router
;
5810 if (!net_eq(info
->net
, &init_net
) ||
5811 (info
->family
!= AF_INET
&& info
->family
!= AF_INET6
&&
5812 info
->family
!= RTNL_FAMILY_IPMR
))
5815 router
= container_of(nb
, struct mlxsw_sp_router
, fib_nb
);
5818 case FIB_EVENT_RULE_ADD
: /* fall through */
5819 case FIB_EVENT_RULE_DEL
:
5820 err
= mlxsw_sp_router_fib_rule_event(event
, info
,
5826 fib_work
= kzalloc(sizeof(*fib_work
), GFP_ATOMIC
);
5827 if (WARN_ON(!fib_work
))
5830 fib_work
->mlxsw_sp
= router
->mlxsw_sp
;
5831 fib_work
->event
= event
;
5833 switch (info
->family
) {
5835 INIT_WORK(&fib_work
->work
, mlxsw_sp_router_fib4_event_work
);
5836 mlxsw_sp_router_fib4_event(fib_work
, info
);
5839 INIT_WORK(&fib_work
->work
, mlxsw_sp_router_fib6_event_work
);
5840 mlxsw_sp_router_fib6_event(fib_work
, info
);
5842 case RTNL_FAMILY_IPMR
:
5843 INIT_WORK(&fib_work
->work
, mlxsw_sp_router_fibmr_event_work
);
5844 mlxsw_sp_router_fibmr_event(fib_work
, info
);
5848 mlxsw_core_schedule_work(&fib_work
->work
);
5853 static struct mlxsw_sp_rif
*
5854 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp
*mlxsw_sp
,
5855 const struct net_device
*dev
)
5859 for (i
= 0; i
< MLXSW_CORE_RES_GET(mlxsw_sp
->core
, MAX_RIFS
); i
++)
5860 if (mlxsw_sp
->router
->rifs
[i
] &&
5861 mlxsw_sp
->router
->rifs
[i
]->dev
== dev
)
5862 return mlxsw_sp
->router
->rifs
[i
];
5867 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp
*mlxsw_sp
, u16 rif
)
5869 char ritr_pl
[MLXSW_REG_RITR_LEN
];
5872 mlxsw_reg_ritr_rif_pack(ritr_pl
, rif
);
5873 err
= mlxsw_reg_query(mlxsw_sp
->core
, MLXSW_REG(ritr
), ritr_pl
);
5874 if (WARN_ON_ONCE(err
))
5877 mlxsw_reg_ritr_enable_set(ritr_pl
, false);
5878 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(ritr
), ritr_pl
);
5881 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp
*mlxsw_sp
,
5882 struct mlxsw_sp_rif
*rif
)
5884 mlxsw_sp_router_rif_disable(mlxsw_sp
, rif
->rif_index
);
5885 mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp
, rif
);
5886 mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp
, rif
);
5890 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif
*rif
, struct net_device
*dev
,
5891 unsigned long event
)
5893 struct inet6_dev
*inet6_dev
;
5894 bool addr_list_empty
= true;
5895 struct in_device
*idev
;
5901 idev
= __in_dev_get_rtnl(dev
);
5902 if (idev
&& idev
->ifa_list
)
5903 addr_list_empty
= false;
5905 inet6_dev
= __in6_dev_get(dev
);
5906 if (addr_list_empty
&& inet6_dev
&&
5907 !list_empty(&inet6_dev
->addr_list
))
5908 addr_list_empty
= false;
5910 if (rif
&& addr_list_empty
&&
5911 !netif_is_l3_slave(rif
->dev
))
5913 /* It is possible we already removed the RIF ourselves
5914 * if it was assigned to a netdev that is now a bridge
5923 static enum mlxsw_sp_rif_type
5924 mlxsw_sp_dev_rif_type(const struct mlxsw_sp
*mlxsw_sp
,
5925 const struct net_device
*dev
)
5927 enum mlxsw_sp_fid_type type
;
5929 if (mlxsw_sp_netdev_ipip_type(mlxsw_sp
, dev
, NULL
))
5930 return MLXSW_SP_RIF_TYPE_IPIP_LB
;
5932 /* Otherwise RIF type is derived from the type of the underlying FID. */
5933 if (is_vlan_dev(dev
) && netif_is_bridge_master(vlan_dev_real_dev(dev
)))
5934 type
= MLXSW_SP_FID_TYPE_8021Q
;
5935 else if (netif_is_bridge_master(dev
) && br_vlan_enabled(dev
))
5936 type
= MLXSW_SP_FID_TYPE_8021Q
;
5937 else if (netif_is_bridge_master(dev
))
5938 type
= MLXSW_SP_FID_TYPE_8021D
;
5940 type
= MLXSW_SP_FID_TYPE_RFID
;
5942 return mlxsw_sp_fid_type_rif_type(mlxsw_sp
, type
);
5945 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp
*mlxsw_sp
, u16
*p_rif_index
)
5949 for (i
= 0; i
< MLXSW_CORE_RES_GET(mlxsw_sp
->core
, MAX_RIFS
); i
++) {
5950 if (!mlxsw_sp
->router
->rifs
[i
]) {
5959 static struct mlxsw_sp_rif
*mlxsw_sp_rif_alloc(size_t rif_size
, u16 rif_index
,
5961 struct net_device
*l3_dev
)
5963 struct mlxsw_sp_rif
*rif
;
5965 rif
= kzalloc(rif_size
, GFP_KERNEL
);
5969 INIT_LIST_HEAD(&rif
->nexthop_list
);
5970 INIT_LIST_HEAD(&rif
->neigh_list
);
5971 ether_addr_copy(rif
->addr
, l3_dev
->dev_addr
);
5972 rif
->mtu
= l3_dev
->mtu
;
5975 rif
->rif_index
= rif_index
;
5980 struct mlxsw_sp_rif
*mlxsw_sp_rif_by_index(const struct mlxsw_sp
*mlxsw_sp
,
5983 return mlxsw_sp
->router
->rifs
[rif_index
];
5986 u16
mlxsw_sp_rif_index(const struct mlxsw_sp_rif
*rif
)
5988 return rif
->rif_index
;
5991 u16
mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb
*lb_rif
)
5993 return lb_rif
->common
.rif_index
;
5996 u16
mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb
*lb_rif
)
5998 return lb_rif
->ul_vr_id
;
6001 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif
*rif
)
6003 return rif
->dev
->ifindex
;
6006 const struct net_device
*mlxsw_sp_rif_dev(const struct mlxsw_sp_rif
*rif
)
6011 static struct mlxsw_sp_rif
*
6012 mlxsw_sp_rif_create(struct mlxsw_sp
*mlxsw_sp
,
6013 const struct mlxsw_sp_rif_params
*params
,
6014 struct netlink_ext_ack
*extack
)
6016 u32 tb_id
= l3mdev_fib_table(params
->dev
);
6017 const struct mlxsw_sp_rif_ops
*ops
;
6018 struct mlxsw_sp_fid
*fid
= NULL
;
6019 enum mlxsw_sp_rif_type type
;
6020 struct mlxsw_sp_rif
*rif
;
6021 struct mlxsw_sp_vr
*vr
;
6025 type
= mlxsw_sp_dev_rif_type(mlxsw_sp
, params
->dev
);
6026 ops
= mlxsw_sp
->router
->rif_ops_arr
[type
];
6028 vr
= mlxsw_sp_vr_get(mlxsw_sp
, tb_id
? : RT_TABLE_MAIN
, extack
);
6030 return ERR_CAST(vr
);
6033 err
= mlxsw_sp_rif_index_alloc(mlxsw_sp
, &rif_index
);
6035 NL_SET_ERR_MSG(extack
, "spectrum: Exceeded number of supported router interfaces");
6036 goto err_rif_index_alloc
;
6039 rif
= mlxsw_sp_rif_alloc(ops
->rif_size
, rif_index
, vr
->id
, params
->dev
);
6044 rif
->mlxsw_sp
= mlxsw_sp
;
6048 fid
= ops
->fid_get(rif
);
6057 ops
->setup(rif
, params
);
6059 err
= ops
->configure(rif
);
6063 err
= mlxsw_sp_mr_rif_add(vr
->mr4_table
, rif
);
6065 goto err_mr_rif_add
;
6067 mlxsw_sp_rif_counters_alloc(rif
);
6068 mlxsw_sp
->router
->rifs
[rif_index
] = rif
;
6073 ops
->deconfigure(rif
);
6076 mlxsw_sp_fid_put(fid
);
6080 err_rif_index_alloc
:
6082 mlxsw_sp_vr_put(mlxsw_sp
, vr
);
6083 return ERR_PTR(err
);
6086 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif
*rif
)
6088 const struct mlxsw_sp_rif_ops
*ops
= rif
->ops
;
6089 struct mlxsw_sp
*mlxsw_sp
= rif
->mlxsw_sp
;
6090 struct mlxsw_sp_fid
*fid
= rif
->fid
;
6091 struct mlxsw_sp_vr
*vr
;
6093 mlxsw_sp_router_rif_gone_sync(mlxsw_sp
, rif
);
6094 vr
= &mlxsw_sp
->router
->vrs
[rif
->vr_id
];
6096 mlxsw_sp
->router
->rifs
[rif
->rif_index
] = NULL
;
6097 mlxsw_sp_rif_counters_free(rif
);
6098 mlxsw_sp_mr_rif_del(vr
->mr4_table
, rif
);
6099 ops
->deconfigure(rif
);
6101 /* Loopback RIFs are not associated with a FID. */
6102 mlxsw_sp_fid_put(fid
);
6105 mlxsw_sp_vr_put(mlxsw_sp
, vr
);
6109 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params
*params
,
6110 struct mlxsw_sp_port_vlan
*mlxsw_sp_port_vlan
)
6112 struct mlxsw_sp_port
*mlxsw_sp_port
= mlxsw_sp_port_vlan
->mlxsw_sp_port
;
6114 params
->vid
= mlxsw_sp_port_vlan
->vid
;
6115 params
->lag
= mlxsw_sp_port
->lagged
;
6117 params
->lag_id
= mlxsw_sp_port
->lag_id
;
6119 params
->system_port
= mlxsw_sp_port
->local_port
;
6123 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan
*mlxsw_sp_port_vlan
,
6124 struct net_device
*l3_dev
,
6125 struct netlink_ext_ack
*extack
)
6127 struct mlxsw_sp_port
*mlxsw_sp_port
= mlxsw_sp_port_vlan
->mlxsw_sp_port
;
6128 struct mlxsw_sp
*mlxsw_sp
= mlxsw_sp_port
->mlxsw_sp
;
6129 u16 vid
= mlxsw_sp_port_vlan
->vid
;
6130 struct mlxsw_sp_rif
*rif
;
6131 struct mlxsw_sp_fid
*fid
;
6134 rif
= mlxsw_sp_rif_find_by_dev(mlxsw_sp
, l3_dev
);
6136 struct mlxsw_sp_rif_params params
= {
6140 mlxsw_sp_rif_subport_params_init(¶ms
, mlxsw_sp_port_vlan
);
6141 rif
= mlxsw_sp_rif_create(mlxsw_sp
, ¶ms
, extack
);
6143 return PTR_ERR(rif
);
6146 /* FID was already created, just take a reference */
6147 fid
= rif
->ops
->fid_get(rif
);
6148 err
= mlxsw_sp_fid_port_vid_map(fid
, mlxsw_sp_port
, vid
);
6150 goto err_fid_port_vid_map
;
6152 err
= mlxsw_sp_port_vid_learning_set(mlxsw_sp_port
, vid
, false);
6154 goto err_port_vid_learning_set
;
6156 err
= mlxsw_sp_port_vid_stp_set(mlxsw_sp_port
, vid
,
6157 BR_STATE_FORWARDING
);
6159 goto err_port_vid_stp_set
;
6161 mlxsw_sp_port_vlan
->fid
= fid
;
6165 err_port_vid_stp_set
:
6166 mlxsw_sp_port_vid_learning_set(mlxsw_sp_port
, vid
, true);
6167 err_port_vid_learning_set
:
6168 mlxsw_sp_fid_port_vid_unmap(fid
, mlxsw_sp_port
, vid
);
6169 err_fid_port_vid_map
:
6170 mlxsw_sp_fid_put(fid
);
6175 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan
*mlxsw_sp_port_vlan
)
6177 struct mlxsw_sp_port
*mlxsw_sp_port
= mlxsw_sp_port_vlan
->mlxsw_sp_port
;
6178 struct mlxsw_sp_fid
*fid
= mlxsw_sp_port_vlan
->fid
;
6179 u16 vid
= mlxsw_sp_port_vlan
->vid
;
6181 if (WARN_ON(mlxsw_sp_fid_type(fid
) != MLXSW_SP_FID_TYPE_RFID
))
6184 mlxsw_sp_port_vlan
->fid
= NULL
;
6185 mlxsw_sp_port_vid_stp_set(mlxsw_sp_port
, vid
, BR_STATE_BLOCKING
);
6186 mlxsw_sp_port_vid_learning_set(mlxsw_sp_port
, vid
, true);
6187 mlxsw_sp_fid_port_vid_unmap(fid
, mlxsw_sp_port
, vid
);
6188 /* If router port holds the last reference on the rFID, then the
6189 * associated Sub-port RIF will be destroyed.
6191 mlxsw_sp_fid_put(fid
);
6194 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device
*l3_dev
,
6195 struct net_device
*port_dev
,
6196 unsigned long event
, u16 vid
,
6197 struct netlink_ext_ack
*extack
)
6199 struct mlxsw_sp_port
*mlxsw_sp_port
= netdev_priv(port_dev
);
6200 struct mlxsw_sp_port_vlan
*mlxsw_sp_port_vlan
;
6202 mlxsw_sp_port_vlan
= mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port
, vid
);
6203 if (WARN_ON(!mlxsw_sp_port_vlan
))
6208 return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan
,
6211 mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan
);
6218 static int mlxsw_sp_inetaddr_port_event(struct net_device
*port_dev
,
6219 unsigned long event
,
6220 struct netlink_ext_ack
*extack
)
6222 if (netif_is_bridge_port(port_dev
) ||
6223 netif_is_lag_port(port_dev
) ||
6224 netif_is_ovs_port(port_dev
))
6227 return mlxsw_sp_inetaddr_port_vlan_event(port_dev
, port_dev
, event
, 1,
6231 static int __mlxsw_sp_inetaddr_lag_event(struct net_device
*l3_dev
,
6232 struct net_device
*lag_dev
,
6233 unsigned long event
, u16 vid
,
6234 struct netlink_ext_ack
*extack
)
6236 struct net_device
*port_dev
;
6237 struct list_head
*iter
;
6240 netdev_for_each_lower_dev(lag_dev
, port_dev
, iter
) {
6241 if (mlxsw_sp_port_dev_check(port_dev
)) {
6242 err
= mlxsw_sp_inetaddr_port_vlan_event(l3_dev
,
6254 static int mlxsw_sp_inetaddr_lag_event(struct net_device
*lag_dev
,
6255 unsigned long event
,
6256 struct netlink_ext_ack
*extack
)
6258 if (netif_is_bridge_port(lag_dev
))
6261 return __mlxsw_sp_inetaddr_lag_event(lag_dev
, lag_dev
, event
, 1,
6265 static int mlxsw_sp_inetaddr_bridge_event(struct net_device
*l3_dev
,
6266 unsigned long event
,
6267 struct netlink_ext_ack
*extack
)
6269 struct mlxsw_sp
*mlxsw_sp
= mlxsw_sp_lower_get(l3_dev
);
6270 struct mlxsw_sp_rif_params params
= {
6273 struct mlxsw_sp_rif
*rif
;
6277 rif
= mlxsw_sp_rif_create(mlxsw_sp
, ¶ms
, extack
);
6279 return PTR_ERR(rif
);
6282 rif
= mlxsw_sp_rif_find_by_dev(mlxsw_sp
, l3_dev
);
6283 mlxsw_sp_rif_destroy(rif
);
6290 static int mlxsw_sp_inetaddr_vlan_event(struct net_device
*vlan_dev
,
6291 unsigned long event
,
6292 struct netlink_ext_ack
*extack
)
6294 struct net_device
*real_dev
= vlan_dev_real_dev(vlan_dev
);
6295 u16 vid
= vlan_dev_vlan_id(vlan_dev
);
6297 if (netif_is_bridge_port(vlan_dev
))
6300 if (mlxsw_sp_port_dev_check(real_dev
))
6301 return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev
, real_dev
,
6302 event
, vid
, extack
);
6303 else if (netif_is_lag_master(real_dev
))
6304 return __mlxsw_sp_inetaddr_lag_event(vlan_dev
, real_dev
, event
,
6306 else if (netif_is_bridge_master(real_dev
) && br_vlan_enabled(real_dev
))
6307 return mlxsw_sp_inetaddr_bridge_event(vlan_dev
, event
, extack
);
6312 static int __mlxsw_sp_inetaddr_event(struct net_device
*dev
,
6313 unsigned long event
,
6314 struct netlink_ext_ack
*extack
)
6316 if (mlxsw_sp_port_dev_check(dev
))
6317 return mlxsw_sp_inetaddr_port_event(dev
, event
, extack
);
6318 else if (netif_is_lag_master(dev
))
6319 return mlxsw_sp_inetaddr_lag_event(dev
, event
, extack
);
6320 else if (netif_is_bridge_master(dev
))
6321 return mlxsw_sp_inetaddr_bridge_event(dev
, event
, extack
);
6322 else if (is_vlan_dev(dev
))
6323 return mlxsw_sp_inetaddr_vlan_event(dev
, event
, extack
);
6328 int mlxsw_sp_inetaddr_event(struct notifier_block
*unused
,
6329 unsigned long event
, void *ptr
)
6331 struct in_ifaddr
*ifa
= (struct in_ifaddr
*) ptr
;
6332 struct net_device
*dev
= ifa
->ifa_dev
->dev
;
6333 struct mlxsw_sp
*mlxsw_sp
;
6334 struct mlxsw_sp_rif
*rif
;
6337 /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
6338 if (event
== NETDEV_UP
)
6341 mlxsw_sp
= mlxsw_sp_lower_get(dev
);
6345 rif
= mlxsw_sp_rif_find_by_dev(mlxsw_sp
, dev
);
6346 if (!mlxsw_sp_rif_should_config(rif
, dev
, event
))
6349 err
= __mlxsw_sp_inetaddr_event(dev
, event
, NULL
);
6351 return notifier_from_errno(err
);
6354 int mlxsw_sp_inetaddr_valid_event(struct notifier_block
*unused
,
6355 unsigned long event
, void *ptr
)
6357 struct in_validator_info
*ivi
= (struct in_validator_info
*) ptr
;
6358 struct net_device
*dev
= ivi
->ivi_dev
->dev
;
6359 struct mlxsw_sp
*mlxsw_sp
;
6360 struct mlxsw_sp_rif
*rif
;
6363 mlxsw_sp
= mlxsw_sp_lower_get(dev
);
6367 rif
= mlxsw_sp_rif_find_by_dev(mlxsw_sp
, dev
);
6368 if (!mlxsw_sp_rif_should_config(rif
, dev
, event
))
6371 err
= __mlxsw_sp_inetaddr_event(dev
, event
, ivi
->extack
);
6373 return notifier_from_errno(err
);
6376 struct mlxsw_sp_inet6addr_event_work
{
6377 struct work_struct work
;
6378 struct net_device
*dev
;
6379 unsigned long event
;
6382 static void mlxsw_sp_inet6addr_event_work(struct work_struct
*work
)
6384 struct mlxsw_sp_inet6addr_event_work
*inet6addr_work
=
6385 container_of(work
, struct mlxsw_sp_inet6addr_event_work
, work
);
6386 struct net_device
*dev
= inet6addr_work
->dev
;
6387 unsigned long event
= inet6addr_work
->event
;
6388 struct mlxsw_sp
*mlxsw_sp
;
6389 struct mlxsw_sp_rif
*rif
;
6392 mlxsw_sp
= mlxsw_sp_lower_get(dev
);
6396 rif
= mlxsw_sp_rif_find_by_dev(mlxsw_sp
, dev
);
6397 if (!mlxsw_sp_rif_should_config(rif
, dev
, event
))
6400 __mlxsw_sp_inetaddr_event(dev
, event
, NULL
);
6404 kfree(inet6addr_work
);
6407 /* Called with rcu_read_lock() */
6408 int mlxsw_sp_inet6addr_event(struct notifier_block
*unused
,
6409 unsigned long event
, void *ptr
)
6411 struct inet6_ifaddr
*if6
= (struct inet6_ifaddr
*) ptr
;
6412 struct mlxsw_sp_inet6addr_event_work
*inet6addr_work
;
6413 struct net_device
*dev
= if6
->idev
->dev
;
6415 /* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
6416 if (event
== NETDEV_UP
)
6419 if (!mlxsw_sp_port_dev_lower_find_rcu(dev
))
6422 inet6addr_work
= kzalloc(sizeof(*inet6addr_work
), GFP_ATOMIC
);
6423 if (!inet6addr_work
)
6426 INIT_WORK(&inet6addr_work
->work
, mlxsw_sp_inet6addr_event_work
);
6427 inet6addr_work
->dev
= dev
;
6428 inet6addr_work
->event
= event
;
6430 mlxsw_core_schedule_work(&inet6addr_work
->work
);
6435 int mlxsw_sp_inet6addr_valid_event(struct notifier_block
*unused
,
6436 unsigned long event
, void *ptr
)
6438 struct in6_validator_info
*i6vi
= (struct in6_validator_info
*) ptr
;
6439 struct net_device
*dev
= i6vi
->i6vi_dev
->dev
;
6440 struct mlxsw_sp
*mlxsw_sp
;
6441 struct mlxsw_sp_rif
*rif
;
6444 mlxsw_sp
= mlxsw_sp_lower_get(dev
);
6448 rif
= mlxsw_sp_rif_find_by_dev(mlxsw_sp
, dev
);
6449 if (!mlxsw_sp_rif_should_config(rif
, dev
, event
))
6452 err
= __mlxsw_sp_inetaddr_event(dev
, event
, i6vi
->extack
);
6454 return notifier_from_errno(err
);
6457 static int mlxsw_sp_rif_edit(struct mlxsw_sp
*mlxsw_sp
, u16 rif_index
,
6458 const char *mac
, int mtu
)
6460 char ritr_pl
[MLXSW_REG_RITR_LEN
];
6463 mlxsw_reg_ritr_rif_pack(ritr_pl
, rif_index
);
6464 err
= mlxsw_reg_query(mlxsw_sp
->core
, MLXSW_REG(ritr
), ritr_pl
);
6468 mlxsw_reg_ritr_mtu_set(ritr_pl
, mtu
);
6469 mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl
, mac
);
6470 mlxsw_reg_ritr_op_set(ritr_pl
, MLXSW_REG_RITR_RIF_CREATE
);
6471 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(ritr
), ritr_pl
);
6474 int mlxsw_sp_netdevice_router_port_event(struct net_device
*dev
)
6476 struct mlxsw_sp
*mlxsw_sp
;
6477 struct mlxsw_sp_rif
*rif
;
6481 mlxsw_sp
= mlxsw_sp_lower_get(dev
);
6485 rif
= mlxsw_sp_rif_find_by_dev(mlxsw_sp
, dev
);
6488 fid_index
= mlxsw_sp_fid_index(rif
->fid
);
6490 err
= mlxsw_sp_rif_fdb_op(mlxsw_sp
, rif
->addr
, fid_index
, false);
6494 err
= mlxsw_sp_rif_edit(mlxsw_sp
, rif
->rif_index
, dev
->dev_addr
,
6499 err
= mlxsw_sp_rif_fdb_op(mlxsw_sp
, dev
->dev_addr
, fid_index
, true);
6501 goto err_rif_fdb_op
;
6503 if (rif
->mtu
!= dev
->mtu
) {
6504 struct mlxsw_sp_vr
*vr
;
6506 /* The RIF is relevant only to its mr_table instance, as unlike
6507 * unicast routing, in multicast routing a RIF cannot be shared
6508 * between several multicast routing tables.
6510 vr
= &mlxsw_sp
->router
->vrs
[rif
->vr_id
];
6511 mlxsw_sp_mr_rif_mtu_update(vr
->mr4_table
, rif
, dev
->mtu
);
6514 ether_addr_copy(rif
->addr
, dev
->dev_addr
);
6515 rif
->mtu
= dev
->mtu
;
6517 netdev_dbg(dev
, "Updated RIF=%d\n", rif
->rif_index
);
6522 mlxsw_sp_rif_edit(mlxsw_sp
, rif
->rif_index
, rif
->addr
, rif
->mtu
);
6524 mlxsw_sp_rif_fdb_op(mlxsw_sp
, rif
->addr
, fid_index
, true);
6528 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp
*mlxsw_sp
,
6529 struct net_device
*l3_dev
,
6530 struct netlink_ext_ack
*extack
)
6532 struct mlxsw_sp_rif
*rif
;
6534 /* If netdev is already associated with a RIF, then we need to
6535 * destroy it and create a new one with the new virtual router ID.
6537 rif
= mlxsw_sp_rif_find_by_dev(mlxsw_sp
, l3_dev
);
6539 __mlxsw_sp_inetaddr_event(l3_dev
, NETDEV_DOWN
, extack
);
6541 return __mlxsw_sp_inetaddr_event(l3_dev
, NETDEV_UP
, extack
);
6544 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp
*mlxsw_sp
,
6545 struct net_device
*l3_dev
)
6547 struct mlxsw_sp_rif
*rif
;
6549 rif
= mlxsw_sp_rif_find_by_dev(mlxsw_sp
, l3_dev
);
6552 __mlxsw_sp_inetaddr_event(l3_dev
, NETDEV_DOWN
, NULL
);
6555 int mlxsw_sp_netdevice_vrf_event(struct net_device
*l3_dev
, unsigned long event
,
6556 struct netdev_notifier_changeupper_info
*info
)
6558 struct mlxsw_sp
*mlxsw_sp
= mlxsw_sp_lower_get(l3_dev
);
6565 case NETDEV_PRECHANGEUPPER
:
6567 case NETDEV_CHANGEUPPER
:
6568 if (info
->linking
) {
6569 struct netlink_ext_ack
*extack
;
6571 extack
= netdev_notifier_info_to_extack(&info
->info
);
6572 err
= mlxsw_sp_port_vrf_join(mlxsw_sp
, l3_dev
, extack
);
6574 mlxsw_sp_port_vrf_leave(mlxsw_sp
, l3_dev
);
6582 static struct mlxsw_sp_rif_subport
*
6583 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif
*rif
)
6585 return container_of(rif
, struct mlxsw_sp_rif_subport
, common
);
6588 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif
*rif
,
6589 const struct mlxsw_sp_rif_params
*params
)
6591 struct mlxsw_sp_rif_subport
*rif_subport
;
6593 rif_subport
= mlxsw_sp_rif_subport_rif(rif
);
6594 rif_subport
->vid
= params
->vid
;
6595 rif_subport
->lag
= params
->lag
;
6597 rif_subport
->lag_id
= params
->lag_id
;
6599 rif_subport
->system_port
= params
->system_port
;
6602 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif
*rif
, bool enable
)
6604 struct mlxsw_sp
*mlxsw_sp
= rif
->mlxsw_sp
;
6605 struct mlxsw_sp_rif_subport
*rif_subport
;
6606 char ritr_pl
[MLXSW_REG_RITR_LEN
];
6608 rif_subport
= mlxsw_sp_rif_subport_rif(rif
);
6609 mlxsw_reg_ritr_pack(ritr_pl
, enable
, MLXSW_REG_RITR_SP_IF
,
6610 rif
->rif_index
, rif
->vr_id
, rif
->dev
->mtu
);
6611 mlxsw_reg_ritr_mac_pack(ritr_pl
, rif
->dev
->dev_addr
);
6612 mlxsw_reg_ritr_sp_if_pack(ritr_pl
, rif_subport
->lag
,
6613 rif_subport
->lag
? rif_subport
->lag_id
:
6614 rif_subport
->system_port
,
6617 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(ritr
), ritr_pl
);
6620 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif
*rif
)
6624 err
= mlxsw_sp_rif_subport_op(rif
, true);
6628 err
= mlxsw_sp_rif_fdb_op(rif
->mlxsw_sp
, rif
->dev
->dev_addr
,
6629 mlxsw_sp_fid_index(rif
->fid
), true);
6631 goto err_rif_fdb_op
;
6633 mlxsw_sp_fid_rif_set(rif
->fid
, rif
);
6637 mlxsw_sp_rif_subport_op(rif
, false);
6641 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif
*rif
)
6643 struct mlxsw_sp_fid
*fid
= rif
->fid
;
6645 mlxsw_sp_fid_rif_set(fid
, NULL
);
6646 mlxsw_sp_rif_fdb_op(rif
->mlxsw_sp
, rif
->dev
->dev_addr
,
6647 mlxsw_sp_fid_index(fid
), false);
6648 mlxsw_sp_rif_subport_op(rif
, false);
6651 static struct mlxsw_sp_fid
*
6652 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif
*rif
)
6654 return mlxsw_sp_fid_rfid_get(rif
->mlxsw_sp
, rif
->rif_index
);
6657 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops
= {
6658 .type
= MLXSW_SP_RIF_TYPE_SUBPORT
,
6659 .rif_size
= sizeof(struct mlxsw_sp_rif_subport
),
6660 .setup
= mlxsw_sp_rif_subport_setup
,
6661 .configure
= mlxsw_sp_rif_subport_configure
,
6662 .deconfigure
= mlxsw_sp_rif_subport_deconfigure
,
6663 .fid_get
= mlxsw_sp_rif_subport_fid_get
,
6666 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif
*rif
,
6667 enum mlxsw_reg_ritr_if_type type
,
6668 u16 vid_fid
, bool enable
)
6670 struct mlxsw_sp
*mlxsw_sp
= rif
->mlxsw_sp
;
6671 char ritr_pl
[MLXSW_REG_RITR_LEN
];
6673 mlxsw_reg_ritr_pack(ritr_pl
, enable
, type
, rif
->rif_index
, rif
->vr_id
,
6675 mlxsw_reg_ritr_mac_pack(ritr_pl
, rif
->dev
->dev_addr
);
6676 mlxsw_reg_ritr_fid_set(ritr_pl
, type
, vid_fid
);
6678 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(ritr
), ritr_pl
);
6681 u8
mlxsw_sp_router_port(const struct mlxsw_sp
*mlxsw_sp
)
6683 return mlxsw_core_max_ports(mlxsw_sp
->core
) + 1;
6686 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif
*rif
)
6688 struct mlxsw_sp
*mlxsw_sp
= rif
->mlxsw_sp
;
6689 u16 vid
= mlxsw_sp_fid_8021q_vid(rif
->fid
);
6692 err
= mlxsw_sp_rif_vlan_fid_op(rif
, MLXSW_REG_RITR_VLAN_IF
, vid
, true);
6696 err
= mlxsw_sp_fid_flood_set(rif
->fid
, MLXSW_SP_FLOOD_TYPE_MC
,
6697 mlxsw_sp_router_port(mlxsw_sp
), true);
6699 goto err_fid_mc_flood_set
;
6701 err
= mlxsw_sp_fid_flood_set(rif
->fid
, MLXSW_SP_FLOOD_TYPE_BC
,
6702 mlxsw_sp_router_port(mlxsw_sp
), true);
6704 goto err_fid_bc_flood_set
;
6706 err
= mlxsw_sp_rif_fdb_op(rif
->mlxsw_sp
, rif
->dev
->dev_addr
,
6707 mlxsw_sp_fid_index(rif
->fid
), true);
6709 goto err_rif_fdb_op
;
6711 mlxsw_sp_fid_rif_set(rif
->fid
, rif
);
6715 mlxsw_sp_fid_flood_set(rif
->fid
, MLXSW_SP_FLOOD_TYPE_BC
,
6716 mlxsw_sp_router_port(mlxsw_sp
), false);
6717 err_fid_bc_flood_set
:
6718 mlxsw_sp_fid_flood_set(rif
->fid
, MLXSW_SP_FLOOD_TYPE_MC
,
6719 mlxsw_sp_router_port(mlxsw_sp
), false);
6720 err_fid_mc_flood_set
:
6721 mlxsw_sp_rif_vlan_fid_op(rif
, MLXSW_REG_RITR_VLAN_IF
, vid
, false);
6725 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif
*rif
)
6727 u16 vid
= mlxsw_sp_fid_8021q_vid(rif
->fid
);
6728 struct mlxsw_sp
*mlxsw_sp
= rif
->mlxsw_sp
;
6729 struct mlxsw_sp_fid
*fid
= rif
->fid
;
6731 mlxsw_sp_fid_rif_set(fid
, NULL
);
6732 mlxsw_sp_rif_fdb_op(rif
->mlxsw_sp
, rif
->dev
->dev_addr
,
6733 mlxsw_sp_fid_index(fid
), false);
6734 mlxsw_sp_fid_flood_set(rif
->fid
, MLXSW_SP_FLOOD_TYPE_BC
,
6735 mlxsw_sp_router_port(mlxsw_sp
), false);
6736 mlxsw_sp_fid_flood_set(rif
->fid
, MLXSW_SP_FLOOD_TYPE_MC
,
6737 mlxsw_sp_router_port(mlxsw_sp
), false);
6738 mlxsw_sp_rif_vlan_fid_op(rif
, MLXSW_REG_RITR_VLAN_IF
, vid
, false);
6741 static struct mlxsw_sp_fid
*
6742 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif
*rif
)
6744 u16 vid
= is_vlan_dev(rif
->dev
) ? vlan_dev_vlan_id(rif
->dev
) : 1;
6746 return mlxsw_sp_fid_8021q_get(rif
->mlxsw_sp
, vid
);
6749 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops
= {
6750 .type
= MLXSW_SP_RIF_TYPE_VLAN
,
6751 .rif_size
= sizeof(struct mlxsw_sp_rif
),
6752 .configure
= mlxsw_sp_rif_vlan_configure
,
6753 .deconfigure
= mlxsw_sp_rif_vlan_deconfigure
,
6754 .fid_get
= mlxsw_sp_rif_vlan_fid_get
,
6757 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif
*rif
)
6759 struct mlxsw_sp
*mlxsw_sp
= rif
->mlxsw_sp
;
6760 u16 fid_index
= mlxsw_sp_fid_index(rif
->fid
);
6763 err
= mlxsw_sp_rif_vlan_fid_op(rif
, MLXSW_REG_RITR_FID_IF
, fid_index
,
6768 err
= mlxsw_sp_fid_flood_set(rif
->fid
, MLXSW_SP_FLOOD_TYPE_MC
,
6769 mlxsw_sp_router_port(mlxsw_sp
), true);
6771 goto err_fid_mc_flood_set
;
6773 err
= mlxsw_sp_fid_flood_set(rif
->fid
, MLXSW_SP_FLOOD_TYPE_BC
,
6774 mlxsw_sp_router_port(mlxsw_sp
), true);
6776 goto err_fid_bc_flood_set
;
6778 err
= mlxsw_sp_rif_fdb_op(rif
->mlxsw_sp
, rif
->dev
->dev_addr
,
6779 mlxsw_sp_fid_index(rif
->fid
), true);
6781 goto err_rif_fdb_op
;
6783 mlxsw_sp_fid_rif_set(rif
->fid
, rif
);
6787 mlxsw_sp_fid_flood_set(rif
->fid
, MLXSW_SP_FLOOD_TYPE_BC
,
6788 mlxsw_sp_router_port(mlxsw_sp
), false);
6789 err_fid_bc_flood_set
:
6790 mlxsw_sp_fid_flood_set(rif
->fid
, MLXSW_SP_FLOOD_TYPE_MC
,
6791 mlxsw_sp_router_port(mlxsw_sp
), false);
6792 err_fid_mc_flood_set
:
6793 mlxsw_sp_rif_vlan_fid_op(rif
, MLXSW_REG_RITR_FID_IF
, fid_index
, false);
6797 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif
*rif
)
6799 u16 fid_index
= mlxsw_sp_fid_index(rif
->fid
);
6800 struct mlxsw_sp
*mlxsw_sp
= rif
->mlxsw_sp
;
6801 struct mlxsw_sp_fid
*fid
= rif
->fid
;
6803 mlxsw_sp_fid_rif_set(fid
, NULL
);
6804 mlxsw_sp_rif_fdb_op(rif
->mlxsw_sp
, rif
->dev
->dev_addr
,
6805 mlxsw_sp_fid_index(fid
), false);
6806 mlxsw_sp_fid_flood_set(rif
->fid
, MLXSW_SP_FLOOD_TYPE_BC
,
6807 mlxsw_sp_router_port(mlxsw_sp
), false);
6808 mlxsw_sp_fid_flood_set(rif
->fid
, MLXSW_SP_FLOOD_TYPE_MC
,
6809 mlxsw_sp_router_port(mlxsw_sp
), false);
6810 mlxsw_sp_rif_vlan_fid_op(rif
, MLXSW_REG_RITR_FID_IF
, fid_index
, false);
6813 static struct mlxsw_sp_fid
*
6814 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif
*rif
)
6816 return mlxsw_sp_fid_8021d_get(rif
->mlxsw_sp
, rif
->dev
->ifindex
);
6819 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops
= {
6820 .type
= MLXSW_SP_RIF_TYPE_FID
,
6821 .rif_size
= sizeof(struct mlxsw_sp_rif
),
6822 .configure
= mlxsw_sp_rif_fid_configure
,
6823 .deconfigure
= mlxsw_sp_rif_fid_deconfigure
,
6824 .fid_get
= mlxsw_sp_rif_fid_fid_get
,
6827 static struct mlxsw_sp_rif_ipip_lb
*
6828 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif
*rif
)
6830 return container_of(rif
, struct mlxsw_sp_rif_ipip_lb
, common
);
6834 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif
*rif
,
6835 const struct mlxsw_sp_rif_params
*params
)
6837 struct mlxsw_sp_rif_params_ipip_lb
*params_lb
;
6838 struct mlxsw_sp_rif_ipip_lb
*rif_lb
;
6840 params_lb
= container_of(params
, struct mlxsw_sp_rif_params_ipip_lb
,
6842 rif_lb
= mlxsw_sp_rif_ipip_lb_rif(rif
);
6843 rif_lb
->lb_config
= params_lb
->lb_config
;
6847 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb
*lb_rif
,
6848 struct mlxsw_sp_vr
*ul_vr
, bool enable
)
6850 struct mlxsw_sp_rif_ipip_lb_config lb_cf
= lb_rif
->lb_config
;
6851 struct mlxsw_sp_rif
*rif
= &lb_rif
->common
;
6852 struct mlxsw_sp
*mlxsw_sp
= rif
->mlxsw_sp
;
6853 char ritr_pl
[MLXSW_REG_RITR_LEN
];
6856 switch (lb_cf
.ul_protocol
) {
6857 case MLXSW_SP_L3_PROTO_IPV4
:
6858 saddr4
= be32_to_cpu(lb_cf
.saddr
.addr4
);
6859 mlxsw_reg_ritr_pack(ritr_pl
, enable
, MLXSW_REG_RITR_LOOPBACK_IF
,
6860 rif
->rif_index
, rif
->vr_id
, rif
->dev
->mtu
);
6861 mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl
, lb_cf
.lb_ipipt
,
6862 MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET
,
6863 ul_vr
->id
, saddr4
, lb_cf
.okey
);
6866 case MLXSW_SP_L3_PROTO_IPV6
:
6867 return -EAFNOSUPPORT
;
6870 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(ritr
), ritr_pl
);
6874 mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif
*rif
)
6876 struct mlxsw_sp_rif_ipip_lb
*lb_rif
= mlxsw_sp_rif_ipip_lb_rif(rif
);
6877 u32 ul_tb_id
= mlxsw_sp_ipip_dev_ul_tb_id(rif
->dev
);
6878 struct mlxsw_sp
*mlxsw_sp
= rif
->mlxsw_sp
;
6879 struct mlxsw_sp_vr
*ul_vr
;
6882 ul_vr
= mlxsw_sp_vr_get(mlxsw_sp
, ul_tb_id
, NULL
);
6884 return PTR_ERR(ul_vr
);
6886 err
= mlxsw_sp_rif_ipip_lb_op(lb_rif
, ul_vr
, true);
6888 goto err_loopback_op
;
6890 lb_rif
->ul_vr_id
= ul_vr
->id
;
6895 mlxsw_sp_vr_put(mlxsw_sp
, ul_vr
);
6899 static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif
*rif
)
6901 struct mlxsw_sp_rif_ipip_lb
*lb_rif
= mlxsw_sp_rif_ipip_lb_rif(rif
);
6902 struct mlxsw_sp
*mlxsw_sp
= rif
->mlxsw_sp
;
6903 struct mlxsw_sp_vr
*ul_vr
;
6905 ul_vr
= &mlxsw_sp
->router
->vrs
[lb_rif
->ul_vr_id
];
6906 mlxsw_sp_rif_ipip_lb_op(lb_rif
, ul_vr
, false);
6909 mlxsw_sp_vr_put(mlxsw_sp
, ul_vr
);
6912 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops
= {
6913 .type
= MLXSW_SP_RIF_TYPE_IPIP_LB
,
6914 .rif_size
= sizeof(struct mlxsw_sp_rif_ipip_lb
),
6915 .setup
= mlxsw_sp_rif_ipip_lb_setup
,
6916 .configure
= mlxsw_sp_rif_ipip_lb_configure
,
6917 .deconfigure
= mlxsw_sp_rif_ipip_lb_deconfigure
,
6920 static const struct mlxsw_sp_rif_ops
*mlxsw_sp_rif_ops_arr
[] = {
6921 [MLXSW_SP_RIF_TYPE_SUBPORT
] = &mlxsw_sp_rif_subport_ops
,
6922 [MLXSW_SP_RIF_TYPE_VLAN
] = &mlxsw_sp_rif_vlan_ops
,
6923 [MLXSW_SP_RIF_TYPE_FID
] = &mlxsw_sp_rif_fid_ops
,
6924 [MLXSW_SP_RIF_TYPE_IPIP_LB
] = &mlxsw_sp_rif_ipip_lb_ops
,
6927 static int mlxsw_sp_rifs_init(struct mlxsw_sp
*mlxsw_sp
)
6929 u64 max_rifs
= MLXSW_CORE_RES_GET(mlxsw_sp
->core
, MAX_RIFS
);
6931 mlxsw_sp
->router
->rifs
= kcalloc(max_rifs
,
6932 sizeof(struct mlxsw_sp_rif
*),
6934 if (!mlxsw_sp
->router
->rifs
)
6937 mlxsw_sp
->router
->rif_ops_arr
= mlxsw_sp_rif_ops_arr
;
6942 static void mlxsw_sp_rifs_fini(struct mlxsw_sp
*mlxsw_sp
)
6946 for (i
= 0; i
< MLXSW_CORE_RES_GET(mlxsw_sp
->core
, MAX_RIFS
); i
++)
6947 WARN_ON_ONCE(mlxsw_sp
->router
->rifs
[i
]);
6949 kfree(mlxsw_sp
->router
->rifs
);
6953 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp
*mlxsw_sp
)
6955 char tigcr_pl
[MLXSW_REG_TIGCR_LEN
];
6957 mlxsw_reg_tigcr_pack(tigcr_pl
, true, 0);
6958 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(tigcr
), tigcr_pl
);
6961 static int mlxsw_sp_ipips_init(struct mlxsw_sp
*mlxsw_sp
)
6963 mlxsw_sp
->router
->ipip_ops_arr
= mlxsw_sp_ipip_ops_arr
;
6964 INIT_LIST_HEAD(&mlxsw_sp
->router
->ipip_list
);
6965 return mlxsw_sp_ipip_config_tigcr(mlxsw_sp
);
6968 static void mlxsw_sp_ipips_fini(struct mlxsw_sp
*mlxsw_sp
)
6970 WARN_ON(!list_empty(&mlxsw_sp
->router
->ipip_list
));
6973 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block
*nb
)
6975 struct mlxsw_sp_router
*router
;
6977 /* Flush pending FIB notifications and then flush the device's
6978 * table before requesting another dump. The FIB notification
6979 * block is unregistered, so no need to take RTNL.
6981 mlxsw_core_flush_owq();
6982 router
= container_of(nb
, struct mlxsw_sp_router
, fib_nb
);
6983 mlxsw_sp_router_fib_flush(router
->mlxsw_sp
);
6986 #ifdef CONFIG_IP_ROUTE_MULTIPATH
6987 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl
, int header
)
6989 mlxsw_reg_recr2_outer_header_enables_set(recr2_pl
, header
, true);
6992 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl
, int field
)
6994 mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl
, field
, true);
6997 static void mlxsw_sp_mp4_hash_init(char *recr2_pl
)
6999 bool only_l3
= !init_net
.ipv4
.sysctl_fib_multipath_hash_policy
;
7001 mlxsw_sp_mp_hash_header_set(recr2_pl
,
7002 MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP
);
7003 mlxsw_sp_mp_hash_header_set(recr2_pl
, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP
);
7004 mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl
);
7005 mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl
);
7008 mlxsw_sp_mp_hash_header_set(recr2_pl
, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4
);
7009 mlxsw_sp_mp_hash_field_set(recr2_pl
, MLXSW_REG_RECR2_IPV4_PROTOCOL
);
7010 mlxsw_sp_mp_hash_field_set(recr2_pl
, MLXSW_REG_RECR2_TCP_UDP_SPORT
);
7011 mlxsw_sp_mp_hash_field_set(recr2_pl
, MLXSW_REG_RECR2_TCP_UDP_DPORT
);
7014 static void mlxsw_sp_mp6_hash_init(char *recr2_pl
)
7016 mlxsw_sp_mp_hash_header_set(recr2_pl
,
7017 MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP
);
7018 mlxsw_sp_mp_hash_header_set(recr2_pl
, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP
);
7019 mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl
);
7020 mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl
);
7021 mlxsw_sp_mp_hash_field_set(recr2_pl
, MLXSW_REG_RECR2_IPV6_FLOW_LABEL
);
7022 mlxsw_sp_mp_hash_field_set(recr2_pl
, MLXSW_REG_RECR2_IPV6_NEXT_HEADER
);
7025 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp
*mlxsw_sp
)
7027 char recr2_pl
[MLXSW_REG_RECR2_LEN
];
7030 get_random_bytes(&seed
, sizeof(seed
));
7031 mlxsw_reg_recr2_pack(recr2_pl
, seed
);
7032 mlxsw_sp_mp4_hash_init(recr2_pl
);
7033 mlxsw_sp_mp6_hash_init(recr2_pl
);
7035 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(recr2
), recr2_pl
);
7038 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp
*mlxsw_sp
)
7044 static int mlxsw_sp_dscp_init(struct mlxsw_sp
*mlxsw_sp
)
7046 char rdpm_pl
[MLXSW_REG_RDPM_LEN
];
7049 MLXSW_REG_ZERO(rdpm
, rdpm_pl
);
7051 /* HW is determining switch priority based on DSCP-bits, but the
7052 * kernel is still doing that based on the ToS. Since there's a
7053 * mismatch in bits we need to make sure to translate the right
7054 * value ToS would observe, skipping the 2 least-significant ECN bits.
7056 for (i
= 0; i
< MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT
; i
++)
7057 mlxsw_reg_rdpm_pack(rdpm_pl
, i
, rt_tos2priority(i
<< 2));
7059 return mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(rdpm
), rdpm_pl
);
7062 static int __mlxsw_sp_router_init(struct mlxsw_sp
*mlxsw_sp
)
7064 char rgcr_pl
[MLXSW_REG_RGCR_LEN
];
7068 if (!MLXSW_CORE_RES_VALID(mlxsw_sp
->core
, MAX_RIFS
))
7070 max_rifs
= MLXSW_CORE_RES_GET(mlxsw_sp
->core
, MAX_RIFS
);
7072 mlxsw_reg_rgcr_pack(rgcr_pl
, true, true);
7073 mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl
, max_rifs
);
7074 mlxsw_reg_rgcr_usp_set(rgcr_pl
, true);
7075 err
= mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(rgcr
), rgcr_pl
);
7081 static void __mlxsw_sp_router_fini(struct mlxsw_sp
*mlxsw_sp
)
7083 char rgcr_pl
[MLXSW_REG_RGCR_LEN
];
7085 mlxsw_reg_rgcr_pack(rgcr_pl
, false, false);
7086 mlxsw_reg_write(mlxsw_sp
->core
, MLXSW_REG(rgcr
), rgcr_pl
);
7089 int mlxsw_sp_router_init(struct mlxsw_sp
*mlxsw_sp
)
7091 struct mlxsw_sp_router
*router
;
7094 router
= kzalloc(sizeof(*mlxsw_sp
->router
), GFP_KERNEL
);
7097 mlxsw_sp
->router
= router
;
7098 router
->mlxsw_sp
= mlxsw_sp
;
7100 INIT_LIST_HEAD(&mlxsw_sp
->router
->nexthop_neighs_list
);
7101 err
= __mlxsw_sp_router_init(mlxsw_sp
);
7103 goto err_router_init
;
7105 err
= mlxsw_sp_rifs_init(mlxsw_sp
);
7109 err
= mlxsw_sp_ipips_init(mlxsw_sp
);
7111 goto err_ipips_init
;
7113 err
= rhashtable_init(&mlxsw_sp
->router
->nexthop_ht
,
7114 &mlxsw_sp_nexthop_ht_params
);
7116 goto err_nexthop_ht_init
;
7118 err
= rhashtable_init(&mlxsw_sp
->router
->nexthop_group_ht
,
7119 &mlxsw_sp_nexthop_group_ht_params
);
7121 goto err_nexthop_group_ht_init
;
7123 INIT_LIST_HEAD(&mlxsw_sp
->router
->nexthop_list
);
7124 err
= mlxsw_sp_lpm_init(mlxsw_sp
);
7128 err
= mlxsw_sp_mr_init(mlxsw_sp
, &mlxsw_sp_mr_tcam_ops
);
7132 err
= mlxsw_sp_vrs_init(mlxsw_sp
);
7136 err
= mlxsw_sp_neigh_init(mlxsw_sp
);
7138 goto err_neigh_init
;
7140 mlxsw_sp
->router
->netevent_nb
.notifier_call
=
7141 mlxsw_sp_router_netevent_event
;
7142 err
= register_netevent_notifier(&mlxsw_sp
->router
->netevent_nb
);
7144 goto err_register_netevent_notifier
;
7146 err
= mlxsw_sp_mp_hash_init(mlxsw_sp
);
7148 goto err_mp_hash_init
;
7150 err
= mlxsw_sp_dscp_init(mlxsw_sp
);
7154 mlxsw_sp
->router
->fib_nb
.notifier_call
= mlxsw_sp_router_fib_event
;
7155 err
= register_fib_notifier(&mlxsw_sp
->router
->fib_nb
,
7156 mlxsw_sp_router_fib_dump_flush
);
7158 goto err_register_fib_notifier
;
7162 err_register_fib_notifier
:
7165 unregister_netevent_notifier(&mlxsw_sp
->router
->netevent_nb
);
7166 err_register_netevent_notifier
:
7167 mlxsw_sp_neigh_fini(mlxsw_sp
);
7169 mlxsw_sp_vrs_fini(mlxsw_sp
);
7171 mlxsw_sp_mr_fini(mlxsw_sp
);
7173 mlxsw_sp_lpm_fini(mlxsw_sp
);
7175 rhashtable_destroy(&mlxsw_sp
->router
->nexthop_group_ht
);
7176 err_nexthop_group_ht_init
:
7177 rhashtable_destroy(&mlxsw_sp
->router
->nexthop_ht
);
7178 err_nexthop_ht_init
:
7179 mlxsw_sp_ipips_fini(mlxsw_sp
);
7181 mlxsw_sp_rifs_fini(mlxsw_sp
);
7183 __mlxsw_sp_router_fini(mlxsw_sp
);
7185 kfree(mlxsw_sp
->router
);
7189 void mlxsw_sp_router_fini(struct mlxsw_sp
*mlxsw_sp
)
7191 unregister_fib_notifier(&mlxsw_sp
->router
->fib_nb
);
7192 unregister_netevent_notifier(&mlxsw_sp
->router
->netevent_nb
);
7193 mlxsw_sp_neigh_fini(mlxsw_sp
);
7194 mlxsw_sp_vrs_fini(mlxsw_sp
);
7195 mlxsw_sp_mr_fini(mlxsw_sp
);
7196 mlxsw_sp_lpm_fini(mlxsw_sp
);
7197 rhashtable_destroy(&mlxsw_sp
->router
->nexthop_group_ht
);
7198 rhashtable_destroy(&mlxsw_sp
->router
->nexthop_ht
);
7199 mlxsw_sp_ipips_fini(mlxsw_sp
);
7200 mlxsw_sp_rifs_fini(mlxsw_sp
);
7201 __mlxsw_sp_router_fini(mlxsw_sp
);
7202 kfree(mlxsw_sp
->router
);