Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
[cris-mirror.git] / drivers / net / ethernet / mellanox / mlxsw / spectrum_router.c
blobf7948e983637da5e887d47cd3af80ebdc599c872
1 /*
2 * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3 * Copyright (c) 2016-2017 Mellanox Technologies. All rights reserved.
4 * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5 * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6 * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7 * Copyright (c) 2017 Petr Machata <petrm@mellanox.com>
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the names of the copyright holders nor the names of its
18 * contributors may be used to endorse or promote products derived from
19 * this software without specific prior written permission.
21 * Alternatively, this software may be distributed under the terms of the
22 * GNU General Public License ("GPL") version 2 as published by the Free
23 * Software Foundation.
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
38 #include <linux/kernel.h>
39 #include <linux/types.h>
40 #include <linux/rhashtable.h>
41 #include <linux/bitops.h>
42 #include <linux/in6.h>
43 #include <linux/notifier.h>
44 #include <linux/inetdevice.h>
45 #include <linux/netdevice.h>
46 #include <linux/if_bridge.h>
47 #include <linux/socket.h>
48 #include <linux/route.h>
49 #include <linux/gcd.h>
50 #include <linux/random.h>
51 #include <net/netevent.h>
52 #include <net/neighbour.h>
53 #include <net/arp.h>
54 #include <net/ip_fib.h>
55 #include <net/ip6_fib.h>
56 #include <net/fib_rules.h>
57 #include <net/ip_tunnels.h>
58 #include <net/l3mdev.h>
59 #include <net/addrconf.h>
60 #include <net/ndisc.h>
61 #include <net/ipv6.h>
62 #include <net/fib_notifier.h>
64 #include "spectrum.h"
65 #include "core.h"
66 #include "reg.h"
67 #include "spectrum_cnt.h"
68 #include "spectrum_dpipe.h"
69 #include "spectrum_ipip.h"
70 #include "spectrum_mr.h"
71 #include "spectrum_mr_tcam.h"
72 #include "spectrum_router.h"
74 struct mlxsw_sp_fib;
75 struct mlxsw_sp_vr;
76 struct mlxsw_sp_lpm_tree;
77 struct mlxsw_sp_rif_ops;
79 struct mlxsw_sp_router {
80 struct mlxsw_sp *mlxsw_sp;
81 struct mlxsw_sp_rif **rifs;
82 struct mlxsw_sp_vr *vrs;
83 struct rhashtable neigh_ht;
84 struct rhashtable nexthop_group_ht;
85 struct rhashtable nexthop_ht;
86 struct list_head nexthop_list;
87 struct {
88 /* One tree for each protocol: IPv4 and IPv6 */
89 struct mlxsw_sp_lpm_tree *proto_trees[2];
90 struct mlxsw_sp_lpm_tree *trees;
91 unsigned int tree_count;
92 } lpm;
93 struct {
94 struct delayed_work dw;
95 unsigned long interval; /* ms */
96 } neighs_update;
97 struct delayed_work nexthop_probe_dw;
98 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
99 struct list_head nexthop_neighs_list;
100 struct list_head ipip_list;
101 bool aborted;
102 struct notifier_block fib_nb;
103 struct notifier_block netevent_nb;
104 const struct mlxsw_sp_rif_ops **rif_ops_arr;
105 const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
108 struct mlxsw_sp_rif {
109 struct list_head nexthop_list;
110 struct list_head neigh_list;
111 struct net_device *dev;
112 struct mlxsw_sp_fid *fid;
113 unsigned char addr[ETH_ALEN];
114 int mtu;
115 u16 rif_index;
116 u16 vr_id;
117 const struct mlxsw_sp_rif_ops *ops;
118 struct mlxsw_sp *mlxsw_sp;
120 unsigned int counter_ingress;
121 bool counter_ingress_valid;
122 unsigned int counter_egress;
123 bool counter_egress_valid;
126 struct mlxsw_sp_rif_params {
127 struct net_device *dev;
128 union {
129 u16 system_port;
130 u16 lag_id;
132 u16 vid;
133 bool lag;
136 struct mlxsw_sp_rif_subport {
137 struct mlxsw_sp_rif common;
138 union {
139 u16 system_port;
140 u16 lag_id;
142 u16 vid;
143 bool lag;
146 struct mlxsw_sp_rif_ipip_lb {
147 struct mlxsw_sp_rif common;
148 struct mlxsw_sp_rif_ipip_lb_config lb_config;
149 u16 ul_vr_id; /* Reserved for Spectrum-2. */
152 struct mlxsw_sp_rif_params_ipip_lb {
153 struct mlxsw_sp_rif_params common;
154 struct mlxsw_sp_rif_ipip_lb_config lb_config;
157 struct mlxsw_sp_rif_ops {
158 enum mlxsw_sp_rif_type type;
159 size_t rif_size;
161 void (*setup)(struct mlxsw_sp_rif *rif,
162 const struct mlxsw_sp_rif_params *params);
163 int (*configure)(struct mlxsw_sp_rif *rif);
164 void (*deconfigure)(struct mlxsw_sp_rif *rif);
165 struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif);
168 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
169 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
170 struct mlxsw_sp_lpm_tree *lpm_tree);
171 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
172 const struct mlxsw_sp_fib *fib,
173 u8 tree_id);
174 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
175 const struct mlxsw_sp_fib *fib);
177 static unsigned int *
178 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
179 enum mlxsw_sp_rif_counter_dir dir)
181 switch (dir) {
182 case MLXSW_SP_RIF_COUNTER_EGRESS:
183 return &rif->counter_egress;
184 case MLXSW_SP_RIF_COUNTER_INGRESS:
185 return &rif->counter_ingress;
187 return NULL;
190 static bool
191 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
192 enum mlxsw_sp_rif_counter_dir dir)
194 switch (dir) {
195 case MLXSW_SP_RIF_COUNTER_EGRESS:
196 return rif->counter_egress_valid;
197 case MLXSW_SP_RIF_COUNTER_INGRESS:
198 return rif->counter_ingress_valid;
200 return false;
203 static void
204 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
205 enum mlxsw_sp_rif_counter_dir dir,
206 bool valid)
208 switch (dir) {
209 case MLXSW_SP_RIF_COUNTER_EGRESS:
210 rif->counter_egress_valid = valid;
211 break;
212 case MLXSW_SP_RIF_COUNTER_INGRESS:
213 rif->counter_ingress_valid = valid;
214 break;
218 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
219 unsigned int counter_index, bool enable,
220 enum mlxsw_sp_rif_counter_dir dir)
222 char ritr_pl[MLXSW_REG_RITR_LEN];
223 bool is_egress = false;
224 int err;
226 if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
227 is_egress = true;
228 mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
229 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
230 if (err)
231 return err;
233 mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
234 is_egress);
235 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
238 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
239 struct mlxsw_sp_rif *rif,
240 enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
242 char ricnt_pl[MLXSW_REG_RICNT_LEN];
243 unsigned int *p_counter_index;
244 bool valid;
245 int err;
247 valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
248 if (!valid)
249 return -EINVAL;
251 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
252 if (!p_counter_index)
253 return -EINVAL;
254 mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
255 MLXSW_REG_RICNT_OPCODE_NOP);
256 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
257 if (err)
258 return err;
259 *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
260 return 0;
263 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
264 unsigned int counter_index)
266 char ricnt_pl[MLXSW_REG_RICNT_LEN];
268 mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
269 MLXSW_REG_RICNT_OPCODE_CLEAR);
270 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
273 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
274 struct mlxsw_sp_rif *rif,
275 enum mlxsw_sp_rif_counter_dir dir)
277 unsigned int *p_counter_index;
278 int err;
280 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
281 if (!p_counter_index)
282 return -EINVAL;
283 err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
284 p_counter_index);
285 if (err)
286 return err;
288 err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
289 if (err)
290 goto err_counter_clear;
292 err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
293 *p_counter_index, true, dir);
294 if (err)
295 goto err_counter_edit;
296 mlxsw_sp_rif_counter_valid_set(rif, dir, true);
297 return 0;
299 err_counter_edit:
300 err_counter_clear:
301 mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
302 *p_counter_index);
303 return err;
306 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
307 struct mlxsw_sp_rif *rif,
308 enum mlxsw_sp_rif_counter_dir dir)
310 unsigned int *p_counter_index;
312 if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
313 return;
315 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
316 if (WARN_ON(!p_counter_index))
317 return;
318 mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
319 *p_counter_index, false, dir);
320 mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
321 *p_counter_index);
322 mlxsw_sp_rif_counter_valid_set(rif, dir, false);
325 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
327 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
328 struct devlink *devlink;
330 devlink = priv_to_devlink(mlxsw_sp->core);
331 if (!devlink_dpipe_table_counter_enabled(devlink,
332 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
333 return;
334 mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
337 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
339 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
341 mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
344 static struct mlxsw_sp_rif *
345 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
346 const struct net_device *dev);
348 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
350 struct mlxsw_sp_prefix_usage {
351 DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
354 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
355 for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
357 static bool
358 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
359 struct mlxsw_sp_prefix_usage *prefix_usage2)
361 return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
364 static void
365 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
366 struct mlxsw_sp_prefix_usage *prefix_usage2)
368 memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
371 static void
372 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
373 unsigned char prefix_len)
375 set_bit(prefix_len, prefix_usage->b);
378 static void
379 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
380 unsigned char prefix_len)
382 clear_bit(prefix_len, prefix_usage->b);
385 struct mlxsw_sp_fib_key {
386 unsigned char addr[sizeof(struct in6_addr)];
387 unsigned char prefix_len;
390 enum mlxsw_sp_fib_entry_type {
391 MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
392 MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
393 MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
395 /* This is a special case of local delivery, where a packet should be
396 * decapsulated on reception. Note that there is no corresponding ENCAP,
397 * because that's a type of next hop, not of FIB entry. (There can be
398 * several next hops in a REMOTE entry, and some of them may be
399 * encapsulating entries.)
401 MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
404 struct mlxsw_sp_nexthop_group;
406 struct mlxsw_sp_fib_node {
407 struct list_head entry_list;
408 struct list_head list;
409 struct rhash_head ht_node;
410 struct mlxsw_sp_fib *fib;
411 struct mlxsw_sp_fib_key key;
414 struct mlxsw_sp_fib_entry_decap {
415 struct mlxsw_sp_ipip_entry *ipip_entry;
416 u32 tunnel_index;
419 struct mlxsw_sp_fib_entry {
420 struct list_head list;
421 struct mlxsw_sp_fib_node *fib_node;
422 enum mlxsw_sp_fib_entry_type type;
423 struct list_head nexthop_group_node;
424 struct mlxsw_sp_nexthop_group *nh_group;
425 struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
428 struct mlxsw_sp_fib4_entry {
429 struct mlxsw_sp_fib_entry common;
430 u32 tb_id;
431 u32 prio;
432 u8 tos;
433 u8 type;
436 struct mlxsw_sp_fib6_entry {
437 struct mlxsw_sp_fib_entry common;
438 struct list_head rt6_list;
439 unsigned int nrt6;
442 struct mlxsw_sp_rt6 {
443 struct list_head list;
444 struct rt6_info *rt;
447 struct mlxsw_sp_lpm_tree {
448 u8 id; /* tree ID */
449 unsigned int ref_count;
450 enum mlxsw_sp_l3proto proto;
451 unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
452 struct mlxsw_sp_prefix_usage prefix_usage;
455 struct mlxsw_sp_fib {
456 struct rhashtable ht;
457 struct list_head node_list;
458 struct mlxsw_sp_vr *vr;
459 struct mlxsw_sp_lpm_tree *lpm_tree;
460 enum mlxsw_sp_l3proto proto;
463 struct mlxsw_sp_vr {
464 u16 id; /* virtual router ID */
465 u32 tb_id; /* kernel fib table id */
466 unsigned int rif_count;
467 struct mlxsw_sp_fib *fib4;
468 struct mlxsw_sp_fib *fib6;
469 struct mlxsw_sp_mr_table *mr4_table;
472 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
474 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
475 struct mlxsw_sp_vr *vr,
476 enum mlxsw_sp_l3proto proto)
478 struct mlxsw_sp_lpm_tree *lpm_tree;
479 struct mlxsw_sp_fib *fib;
480 int err;
482 lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
483 fib = kzalloc(sizeof(*fib), GFP_KERNEL);
484 if (!fib)
485 return ERR_PTR(-ENOMEM);
486 err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
487 if (err)
488 goto err_rhashtable_init;
489 INIT_LIST_HEAD(&fib->node_list);
490 fib->proto = proto;
491 fib->vr = vr;
492 fib->lpm_tree = lpm_tree;
493 mlxsw_sp_lpm_tree_hold(lpm_tree);
494 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
495 if (err)
496 goto err_lpm_tree_bind;
497 return fib;
499 err_lpm_tree_bind:
500 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
501 err_rhashtable_init:
502 kfree(fib);
503 return ERR_PTR(err);
506 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
507 struct mlxsw_sp_fib *fib)
509 mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
510 mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
511 WARN_ON(!list_empty(&fib->node_list));
512 rhashtable_destroy(&fib->ht);
513 kfree(fib);
516 static struct mlxsw_sp_lpm_tree *
517 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
519 static struct mlxsw_sp_lpm_tree *lpm_tree;
520 int i;
522 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
523 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
524 if (lpm_tree->ref_count == 0)
525 return lpm_tree;
527 return NULL;
530 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
531 struct mlxsw_sp_lpm_tree *lpm_tree)
533 char ralta_pl[MLXSW_REG_RALTA_LEN];
535 mlxsw_reg_ralta_pack(ralta_pl, true,
536 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
537 lpm_tree->id);
538 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
541 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
542 struct mlxsw_sp_lpm_tree *lpm_tree)
544 char ralta_pl[MLXSW_REG_RALTA_LEN];
546 mlxsw_reg_ralta_pack(ralta_pl, false,
547 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
548 lpm_tree->id);
549 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
552 static int
553 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
554 struct mlxsw_sp_prefix_usage *prefix_usage,
555 struct mlxsw_sp_lpm_tree *lpm_tree)
557 char ralst_pl[MLXSW_REG_RALST_LEN];
558 u8 root_bin = 0;
559 u8 prefix;
560 u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
562 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
563 root_bin = prefix;
565 mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
566 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
567 if (prefix == 0)
568 continue;
569 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
570 MLXSW_REG_RALST_BIN_NO_CHILD);
571 last_prefix = prefix;
573 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
576 static struct mlxsw_sp_lpm_tree *
577 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
578 struct mlxsw_sp_prefix_usage *prefix_usage,
579 enum mlxsw_sp_l3proto proto)
581 struct mlxsw_sp_lpm_tree *lpm_tree;
582 int err;
584 lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
585 if (!lpm_tree)
586 return ERR_PTR(-EBUSY);
587 lpm_tree->proto = proto;
588 err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
589 if (err)
590 return ERR_PTR(err);
592 err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
593 lpm_tree);
594 if (err)
595 goto err_left_struct_set;
596 memcpy(&lpm_tree->prefix_usage, prefix_usage,
597 sizeof(lpm_tree->prefix_usage));
598 memset(&lpm_tree->prefix_ref_count, 0,
599 sizeof(lpm_tree->prefix_ref_count));
600 lpm_tree->ref_count = 1;
601 return lpm_tree;
603 err_left_struct_set:
604 mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
605 return ERR_PTR(err);
608 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
609 struct mlxsw_sp_lpm_tree *lpm_tree)
611 mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
614 static struct mlxsw_sp_lpm_tree *
615 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
616 struct mlxsw_sp_prefix_usage *prefix_usage,
617 enum mlxsw_sp_l3proto proto)
619 struct mlxsw_sp_lpm_tree *lpm_tree;
620 int i;
622 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
623 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
624 if (lpm_tree->ref_count != 0 &&
625 lpm_tree->proto == proto &&
626 mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
627 prefix_usage)) {
628 mlxsw_sp_lpm_tree_hold(lpm_tree);
629 return lpm_tree;
632 return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
635 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
637 lpm_tree->ref_count++;
640 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
641 struct mlxsw_sp_lpm_tree *lpm_tree)
643 if (--lpm_tree->ref_count == 0)
644 mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
647 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
649 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
651 struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
652 struct mlxsw_sp_lpm_tree *lpm_tree;
653 u64 max_trees;
654 int err, i;
656 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
657 return -EIO;
659 max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
660 mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
661 mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
662 sizeof(struct mlxsw_sp_lpm_tree),
663 GFP_KERNEL);
664 if (!mlxsw_sp->router->lpm.trees)
665 return -ENOMEM;
667 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
668 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
669 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
672 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
673 MLXSW_SP_L3_PROTO_IPV4);
674 if (IS_ERR(lpm_tree)) {
675 err = PTR_ERR(lpm_tree);
676 goto err_ipv4_tree_get;
678 mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
680 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
681 MLXSW_SP_L3_PROTO_IPV6);
682 if (IS_ERR(lpm_tree)) {
683 err = PTR_ERR(lpm_tree);
684 goto err_ipv6_tree_get;
686 mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
688 return 0;
690 err_ipv6_tree_get:
691 lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
692 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
693 err_ipv4_tree_get:
694 kfree(mlxsw_sp->router->lpm.trees);
695 return err;
698 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
700 struct mlxsw_sp_lpm_tree *lpm_tree;
702 lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
703 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
705 lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
706 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
708 kfree(mlxsw_sp->router->lpm.trees);
711 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
713 return !!vr->fib4 || !!vr->fib6 || !!vr->mr4_table;
716 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
718 struct mlxsw_sp_vr *vr;
719 int i;
721 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
722 vr = &mlxsw_sp->router->vrs[i];
723 if (!mlxsw_sp_vr_is_used(vr))
724 return vr;
726 return NULL;
729 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
730 const struct mlxsw_sp_fib *fib, u8 tree_id)
732 char raltb_pl[MLXSW_REG_RALTB_LEN];
734 mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
735 (enum mlxsw_reg_ralxx_protocol) fib->proto,
736 tree_id);
737 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
740 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
741 const struct mlxsw_sp_fib *fib)
743 char raltb_pl[MLXSW_REG_RALTB_LEN];
745 /* Bind to tree 0 which is default */
746 mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
747 (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
748 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
751 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
753 /* For our purpose, squash main, default and local tables into one */
754 if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
755 tb_id = RT_TABLE_MAIN;
756 return tb_id;
759 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
760 u32 tb_id)
762 struct mlxsw_sp_vr *vr;
763 int i;
765 tb_id = mlxsw_sp_fix_tb_id(tb_id);
767 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
768 vr = &mlxsw_sp->router->vrs[i];
769 if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
770 return vr;
772 return NULL;
775 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
776 enum mlxsw_sp_l3proto proto)
778 switch (proto) {
779 case MLXSW_SP_L3_PROTO_IPV4:
780 return vr->fib4;
781 case MLXSW_SP_L3_PROTO_IPV6:
782 return vr->fib6;
784 return NULL;
787 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
788 u32 tb_id,
789 struct netlink_ext_ack *extack)
791 struct mlxsw_sp_mr_table *mr4_table;
792 struct mlxsw_sp_fib *fib4;
793 struct mlxsw_sp_fib *fib6;
794 struct mlxsw_sp_vr *vr;
795 int err;
797 vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
798 if (!vr) {
799 NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported virtual routers");
800 return ERR_PTR(-EBUSY);
802 fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
803 if (IS_ERR(fib4))
804 return ERR_CAST(fib4);
805 fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
806 if (IS_ERR(fib6)) {
807 err = PTR_ERR(fib6);
808 goto err_fib6_create;
810 mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
811 MLXSW_SP_L3_PROTO_IPV4);
812 if (IS_ERR(mr4_table)) {
813 err = PTR_ERR(mr4_table);
814 goto err_mr_table_create;
816 vr->fib4 = fib4;
817 vr->fib6 = fib6;
818 vr->mr4_table = mr4_table;
819 vr->tb_id = tb_id;
820 return vr;
822 err_mr_table_create:
823 mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
824 err_fib6_create:
825 mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
826 return ERR_PTR(err);
829 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
830 struct mlxsw_sp_vr *vr)
832 mlxsw_sp_mr_table_destroy(vr->mr4_table);
833 vr->mr4_table = NULL;
834 mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
835 vr->fib6 = NULL;
836 mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
837 vr->fib4 = NULL;
840 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
841 struct netlink_ext_ack *extack)
843 struct mlxsw_sp_vr *vr;
845 tb_id = mlxsw_sp_fix_tb_id(tb_id);
846 vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
847 if (!vr)
848 vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
849 return vr;
852 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
854 if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
855 list_empty(&vr->fib6->node_list) &&
856 mlxsw_sp_mr_table_empty(vr->mr4_table))
857 mlxsw_sp_vr_destroy(mlxsw_sp, vr);
860 static bool
861 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
862 enum mlxsw_sp_l3proto proto, u8 tree_id)
864 struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
866 if (!mlxsw_sp_vr_is_used(vr))
867 return false;
868 if (fib->lpm_tree->id == tree_id)
869 return true;
870 return false;
873 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
874 struct mlxsw_sp_fib *fib,
875 struct mlxsw_sp_lpm_tree *new_tree)
877 struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
878 int err;
880 fib->lpm_tree = new_tree;
881 mlxsw_sp_lpm_tree_hold(new_tree);
882 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
883 if (err)
884 goto err_tree_bind;
885 mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
886 return 0;
888 err_tree_bind:
889 mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
890 fib->lpm_tree = old_tree;
891 return err;
894 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
895 struct mlxsw_sp_fib *fib,
896 struct mlxsw_sp_lpm_tree *new_tree)
898 enum mlxsw_sp_l3proto proto = fib->proto;
899 struct mlxsw_sp_lpm_tree *old_tree;
900 u8 old_id, new_id = new_tree->id;
901 struct mlxsw_sp_vr *vr;
902 int i, err;
904 old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
905 old_id = old_tree->id;
907 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
908 vr = &mlxsw_sp->router->vrs[i];
909 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
910 continue;
911 err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
912 mlxsw_sp_vr_fib(vr, proto),
913 new_tree);
914 if (err)
915 goto err_tree_replace;
918 memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
919 sizeof(new_tree->prefix_ref_count));
920 mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
921 mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
923 return 0;
925 err_tree_replace:
926 for (i--; i >= 0; i--) {
927 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
928 continue;
929 mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
930 mlxsw_sp_vr_fib(vr, proto),
931 old_tree);
933 return err;
936 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
938 struct mlxsw_sp_vr *vr;
939 u64 max_vrs;
940 int i;
942 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
943 return -EIO;
945 max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
946 mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
947 GFP_KERNEL);
948 if (!mlxsw_sp->router->vrs)
949 return -ENOMEM;
951 for (i = 0; i < max_vrs; i++) {
952 vr = &mlxsw_sp->router->vrs[i];
953 vr->id = i;
956 return 0;
959 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
961 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
963 /* At this stage we're guaranteed not to have new incoming
964 * FIB notifications and the work queue is free from FIBs
965 * sitting on top of mlxsw netdevs. However, we can still
966 * have other FIBs queued. Flush the queue before flushing
967 * the device's tables. No need for locks, as we're the only
968 * writer.
970 mlxsw_core_flush_owq();
971 mlxsw_sp_router_fib_flush(mlxsw_sp);
972 kfree(mlxsw_sp->router->vrs);
975 static struct net_device *
976 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
978 struct ip_tunnel *tun = netdev_priv(ol_dev);
979 struct net *net = dev_net(ol_dev);
981 return __dev_get_by_index(net, tun->parms.link);
984 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
986 struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
988 if (d)
989 return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
990 else
991 return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
994 static struct mlxsw_sp_rif *
995 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
996 const struct mlxsw_sp_rif_params *params,
997 struct netlink_ext_ack *extack);
999 static struct mlxsw_sp_rif_ipip_lb *
1000 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
1001 enum mlxsw_sp_ipip_type ipipt,
1002 struct net_device *ol_dev,
1003 struct netlink_ext_ack *extack)
1005 struct mlxsw_sp_rif_params_ipip_lb lb_params;
1006 const struct mlxsw_sp_ipip_ops *ipip_ops;
1007 struct mlxsw_sp_rif *rif;
1009 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1010 lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1011 .common.dev = ol_dev,
1012 .common.lag = false,
1013 .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1016 rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1017 if (IS_ERR(rif))
1018 return ERR_CAST(rif);
1019 return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1022 static struct mlxsw_sp_ipip_entry *
1023 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1024 enum mlxsw_sp_ipip_type ipipt,
1025 struct net_device *ol_dev)
1027 struct mlxsw_sp_ipip_entry *ipip_entry;
1028 struct mlxsw_sp_ipip_entry *ret = NULL;
1030 ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1031 if (!ipip_entry)
1032 return ERR_PTR(-ENOMEM);
1034 ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1035 ol_dev, NULL);
1036 if (IS_ERR(ipip_entry->ol_lb)) {
1037 ret = ERR_CAST(ipip_entry->ol_lb);
1038 goto err_ol_ipip_lb_create;
1041 ipip_entry->ipipt = ipipt;
1042 ipip_entry->ol_dev = ol_dev;
1043 ipip_entry->parms = mlxsw_sp_ipip_netdev_parms(ol_dev);
1045 return ipip_entry;
1047 err_ol_ipip_lb_create:
1048 kfree(ipip_entry);
1049 return ret;
1052 static void
1053 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1055 mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1056 kfree(ipip_entry);
1059 static bool
1060 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1061 const enum mlxsw_sp_l3proto ul_proto,
1062 union mlxsw_sp_l3addr saddr,
1063 u32 ul_tb_id,
1064 struct mlxsw_sp_ipip_entry *ipip_entry)
1066 u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1067 enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1068 union mlxsw_sp_l3addr tun_saddr;
1070 if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1071 return false;
1073 tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1074 return tun_ul_tb_id == ul_tb_id &&
1075 mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1078 static int
1079 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1080 struct mlxsw_sp_fib_entry *fib_entry,
1081 struct mlxsw_sp_ipip_entry *ipip_entry)
1083 u32 tunnel_index;
1084 int err;
1086 err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &tunnel_index);
1087 if (err)
1088 return err;
1090 ipip_entry->decap_fib_entry = fib_entry;
1091 fib_entry->decap.ipip_entry = ipip_entry;
1092 fib_entry->decap.tunnel_index = tunnel_index;
1093 return 0;
1096 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1097 struct mlxsw_sp_fib_entry *fib_entry)
1099 /* Unlink this node from the IPIP entry that it's the decap entry of. */
1100 fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1101 fib_entry->decap.ipip_entry = NULL;
1102 mlxsw_sp_kvdl_free(mlxsw_sp, fib_entry->decap.tunnel_index);
1105 static struct mlxsw_sp_fib_node *
1106 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1107 size_t addr_len, unsigned char prefix_len);
1108 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1109 struct mlxsw_sp_fib_entry *fib_entry);
1111 static void
1112 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1113 struct mlxsw_sp_ipip_entry *ipip_entry)
1115 struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1117 mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1118 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1120 mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1123 static void
1124 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1125 struct mlxsw_sp_ipip_entry *ipip_entry,
1126 struct mlxsw_sp_fib_entry *decap_fib_entry)
1128 if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1129 ipip_entry))
1130 return;
1131 decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1133 if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1134 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1137 /* Given an IPIP entry, find the corresponding decap route. */
1138 static struct mlxsw_sp_fib_entry *
1139 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1140 struct mlxsw_sp_ipip_entry *ipip_entry)
1142 static struct mlxsw_sp_fib_node *fib_node;
1143 const struct mlxsw_sp_ipip_ops *ipip_ops;
1144 struct mlxsw_sp_fib_entry *fib_entry;
1145 unsigned char saddr_prefix_len;
1146 union mlxsw_sp_l3addr saddr;
1147 struct mlxsw_sp_fib *ul_fib;
1148 struct mlxsw_sp_vr *ul_vr;
1149 const void *saddrp;
1150 size_t saddr_len;
1151 u32 ul_tb_id;
1152 u32 saddr4;
1154 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1156 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1157 ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1158 if (!ul_vr)
1159 return NULL;
1161 ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1162 saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1163 ipip_entry->ol_dev);
1165 switch (ipip_ops->ul_proto) {
1166 case MLXSW_SP_L3_PROTO_IPV4:
1167 saddr4 = be32_to_cpu(saddr.addr4);
1168 saddrp = &saddr4;
1169 saddr_len = 4;
1170 saddr_prefix_len = 32;
1171 break;
1172 case MLXSW_SP_L3_PROTO_IPV6:
1173 WARN_ON(1);
1174 return NULL;
1177 fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1178 saddr_prefix_len);
1179 if (!fib_node || list_empty(&fib_node->entry_list))
1180 return NULL;
1182 fib_entry = list_first_entry(&fib_node->entry_list,
1183 struct mlxsw_sp_fib_entry, list);
1184 if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1185 return NULL;
1187 return fib_entry;
1190 static struct mlxsw_sp_ipip_entry *
1191 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1192 enum mlxsw_sp_ipip_type ipipt,
1193 struct net_device *ol_dev)
1195 struct mlxsw_sp_ipip_entry *ipip_entry;
1197 ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1198 if (IS_ERR(ipip_entry))
1199 return ipip_entry;
1201 list_add_tail(&ipip_entry->ipip_list_node,
1202 &mlxsw_sp->router->ipip_list);
1204 return ipip_entry;
1207 static void
1208 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1209 struct mlxsw_sp_ipip_entry *ipip_entry)
1211 list_del(&ipip_entry->ipip_list_node);
1212 mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1215 static bool
1216 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1217 const struct net_device *ul_dev,
1218 enum mlxsw_sp_l3proto ul_proto,
1219 union mlxsw_sp_l3addr ul_dip,
1220 struct mlxsw_sp_ipip_entry *ipip_entry)
1222 u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1223 enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1224 struct net_device *ipip_ul_dev;
1226 if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1227 return false;
1229 ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1230 return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1231 ul_tb_id, ipip_entry) &&
1232 (!ipip_ul_dev || ipip_ul_dev == ul_dev);
1235 /* Given decap parameters, find the corresponding IPIP entry. */
1236 static struct mlxsw_sp_ipip_entry *
1237 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1238 const struct net_device *ul_dev,
1239 enum mlxsw_sp_l3proto ul_proto,
1240 union mlxsw_sp_l3addr ul_dip)
1242 struct mlxsw_sp_ipip_entry *ipip_entry;
1244 list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1245 ipip_list_node)
1246 if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1247 ul_proto, ul_dip,
1248 ipip_entry))
1249 return ipip_entry;
1251 return NULL;
1254 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1255 const struct net_device *dev,
1256 enum mlxsw_sp_ipip_type *p_type)
1258 struct mlxsw_sp_router *router = mlxsw_sp->router;
1259 const struct mlxsw_sp_ipip_ops *ipip_ops;
1260 enum mlxsw_sp_ipip_type ipipt;
1262 for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1263 ipip_ops = router->ipip_ops_arr[ipipt];
1264 if (dev->type == ipip_ops->dev_type) {
1265 if (p_type)
1266 *p_type = ipipt;
1267 return true;
1270 return false;
1273 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1274 const struct net_device *dev)
1276 return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1279 static struct mlxsw_sp_ipip_entry *
1280 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1281 const struct net_device *ol_dev)
1283 struct mlxsw_sp_ipip_entry *ipip_entry;
1285 list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1286 ipip_list_node)
1287 if (ipip_entry->ol_dev == ol_dev)
1288 return ipip_entry;
1290 return NULL;
1293 static struct mlxsw_sp_ipip_entry *
1294 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1295 const struct net_device *ul_dev,
1296 struct mlxsw_sp_ipip_entry *start)
1298 struct mlxsw_sp_ipip_entry *ipip_entry;
1300 ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1301 ipip_list_node);
1302 list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1303 ipip_list_node) {
1304 struct net_device *ipip_ul_dev =
1305 __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1307 if (ipip_ul_dev == ul_dev)
1308 return ipip_entry;
1311 return NULL;
1314 bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1315 const struct net_device *dev)
1317 return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1320 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1321 const struct net_device *ol_dev,
1322 enum mlxsw_sp_ipip_type ipipt)
1324 const struct mlxsw_sp_ipip_ops *ops
1325 = mlxsw_sp->router->ipip_ops_arr[ipipt];
1327 /* For deciding whether decap should be offloaded, we don't care about
1328 * overlay protocol, so ask whether either one is supported.
1330 return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1331 ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1334 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1335 struct net_device *ol_dev)
1337 struct mlxsw_sp_ipip_entry *ipip_entry;
1338 enum mlxsw_sp_l3proto ul_proto;
1339 enum mlxsw_sp_ipip_type ipipt;
1340 union mlxsw_sp_l3addr saddr;
1341 u32 ul_tb_id;
1343 mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1344 if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1345 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1346 ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1347 saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1348 if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1349 saddr, ul_tb_id,
1350 NULL)) {
1351 ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1352 ol_dev);
1353 if (IS_ERR(ipip_entry))
1354 return PTR_ERR(ipip_entry);
1358 return 0;
1361 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1362 struct net_device *ol_dev)
1364 struct mlxsw_sp_ipip_entry *ipip_entry;
1366 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1367 if (ipip_entry)
1368 mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1371 static void
1372 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1373 struct mlxsw_sp_ipip_entry *ipip_entry)
1375 struct mlxsw_sp_fib_entry *decap_fib_entry;
1377 decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1378 if (decap_fib_entry)
1379 mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1380 decap_fib_entry);
1383 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1384 struct net_device *ol_dev)
1386 struct mlxsw_sp_ipip_entry *ipip_entry;
1388 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1389 if (ipip_entry)
1390 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1393 static void
1394 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1395 struct mlxsw_sp_ipip_entry *ipip_entry)
1397 if (ipip_entry->decap_fib_entry)
1398 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1401 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1402 struct net_device *ol_dev)
1404 struct mlxsw_sp_ipip_entry *ipip_entry;
1406 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1407 if (ipip_entry)
1408 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1411 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1412 struct mlxsw_sp_rif *old_rif,
1413 struct mlxsw_sp_rif *new_rif);
1414 static int
1415 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1416 struct mlxsw_sp_ipip_entry *ipip_entry,
1417 bool keep_encap,
1418 struct netlink_ext_ack *extack)
1420 struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1421 struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1423 new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1424 ipip_entry->ipipt,
1425 ipip_entry->ol_dev,
1426 extack);
1427 if (IS_ERR(new_lb_rif))
1428 return PTR_ERR(new_lb_rif);
1429 ipip_entry->ol_lb = new_lb_rif;
1431 if (keep_encap)
1432 mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1433 &new_lb_rif->common);
1435 mlxsw_sp_rif_destroy(&old_lb_rif->common);
1437 return 0;
1440 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1441 struct mlxsw_sp_rif *rif);
1444 * Update the offload related to an IPIP entry. This always updates decap, and
1445 * in addition to that it also:
1446 * @recreate_loopback: recreates the associated loopback RIF
1447 * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1448 * relevant when recreate_loopback is true.
1449 * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1450 * is only relevant when recreate_loopback is false.
1452 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1453 struct mlxsw_sp_ipip_entry *ipip_entry,
1454 bool recreate_loopback,
1455 bool keep_encap,
1456 bool update_nexthops,
1457 struct netlink_ext_ack *extack)
1459 int err;
1461 /* RIFs can't be edited, so to update loopback, we need to destroy and
1462 * recreate it. That creates a window of opportunity where RALUE and
1463 * RATR registers end up referencing a RIF that's already gone. RATRs
1464 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1465 * of RALUE, demote the decap route back.
1467 if (ipip_entry->decap_fib_entry)
1468 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1470 if (recreate_loopback) {
1471 err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1472 keep_encap, extack);
1473 if (err)
1474 return err;
1475 } else if (update_nexthops) {
1476 mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1477 &ipip_entry->ol_lb->common);
1480 if (ipip_entry->ol_dev->flags & IFF_UP)
1481 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1483 return 0;
1486 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1487 struct net_device *ol_dev,
1488 struct netlink_ext_ack *extack)
1490 struct mlxsw_sp_ipip_entry *ipip_entry =
1491 mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1492 enum mlxsw_sp_l3proto ul_proto;
1493 union mlxsw_sp_l3addr saddr;
1494 u32 ul_tb_id;
1496 if (!ipip_entry)
1497 return 0;
1499 /* For flat configuration cases, moving overlay to a different VRF might
1500 * cause local address conflict, and the conflicting tunnels need to be
1501 * demoted.
1503 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1504 ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1505 saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1506 if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1507 saddr, ul_tb_id,
1508 ipip_entry)) {
1509 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1510 return 0;
1513 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1514 true, false, false, extack);
1517 static int
1518 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1519 struct mlxsw_sp_ipip_entry *ipip_entry,
1520 struct net_device *ul_dev,
1521 struct netlink_ext_ack *extack)
1523 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1524 true, true, false, extack);
1527 static int
1528 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1529 struct mlxsw_sp_ipip_entry *ipip_entry,
1530 struct net_device *ul_dev)
1532 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1533 false, false, true, NULL);
1536 static int
1537 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1538 struct mlxsw_sp_ipip_entry *ipip_entry,
1539 struct net_device *ul_dev)
1541 /* A down underlay device causes encapsulated packets to not be
1542 * forwarded, but decap still works. So refresh next hops without
1543 * touching anything else.
1545 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1546 false, false, true, NULL);
1549 static int
1550 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1551 struct net_device *ol_dev,
1552 struct netlink_ext_ack *extack)
1554 const struct mlxsw_sp_ipip_ops *ipip_ops;
1555 struct mlxsw_sp_ipip_entry *ipip_entry;
1556 int err;
1558 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1559 if (!ipip_entry)
1560 /* A change might make a tunnel eligible for offloading, but
1561 * that is currently not implemented. What falls to slow path
1562 * stays there.
1564 return 0;
1566 /* A change might make a tunnel not eligible for offloading. */
1567 if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1568 ipip_entry->ipipt)) {
1569 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1570 return 0;
1573 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1574 err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1575 return err;
1578 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1579 struct mlxsw_sp_ipip_entry *ipip_entry)
1581 struct net_device *ol_dev = ipip_entry->ol_dev;
1583 if (ol_dev->flags & IFF_UP)
1584 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1585 mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1588 /* The configuration where several tunnels have the same local address in the
1589 * same underlay table needs special treatment in the HW. That is currently not
1590 * implemented in the driver. This function finds and demotes the first tunnel
1591 * with a given source address, except the one passed in in the argument
1592 * `except'.
1594 bool
1595 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1596 enum mlxsw_sp_l3proto ul_proto,
1597 union mlxsw_sp_l3addr saddr,
1598 u32 ul_tb_id,
1599 const struct mlxsw_sp_ipip_entry *except)
1601 struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1603 list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1604 ipip_list_node) {
1605 if (ipip_entry != except &&
1606 mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1607 ul_tb_id, ipip_entry)) {
1608 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1609 return true;
1613 return false;
1616 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1617 struct net_device *ul_dev)
1619 struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1621 list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1622 ipip_list_node) {
1623 struct net_device *ipip_ul_dev =
1624 __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1626 if (ipip_ul_dev == ul_dev)
1627 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1631 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1632 struct net_device *ol_dev,
1633 unsigned long event,
1634 struct netdev_notifier_info *info)
1636 struct netdev_notifier_changeupper_info *chup;
1637 struct netlink_ext_ack *extack;
1639 switch (event) {
1640 case NETDEV_REGISTER:
1641 return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1642 case NETDEV_UNREGISTER:
1643 mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1644 return 0;
1645 case NETDEV_UP:
1646 mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1647 return 0;
1648 case NETDEV_DOWN:
1649 mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1650 return 0;
1651 case NETDEV_CHANGEUPPER:
1652 chup = container_of(info, typeof(*chup), info);
1653 extack = info->extack;
1654 if (netif_is_l3_master(chup->upper_dev))
1655 return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1656 ol_dev,
1657 extack);
1658 return 0;
1659 case NETDEV_CHANGE:
1660 extack = info->extack;
1661 return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1662 ol_dev, extack);
1664 return 0;
1667 static int
1668 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1669 struct mlxsw_sp_ipip_entry *ipip_entry,
1670 struct net_device *ul_dev,
1671 unsigned long event,
1672 struct netdev_notifier_info *info)
1674 struct netdev_notifier_changeupper_info *chup;
1675 struct netlink_ext_ack *extack;
1677 switch (event) {
1678 case NETDEV_CHANGEUPPER:
1679 chup = container_of(info, typeof(*chup), info);
1680 extack = info->extack;
1681 if (netif_is_l3_master(chup->upper_dev))
1682 return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1683 ipip_entry,
1684 ul_dev,
1685 extack);
1686 break;
1688 case NETDEV_UP:
1689 return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1690 ul_dev);
1691 case NETDEV_DOWN:
1692 return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1693 ipip_entry,
1694 ul_dev);
1696 return 0;
1700 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1701 struct net_device *ul_dev,
1702 unsigned long event,
1703 struct netdev_notifier_info *info)
1705 struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1706 int err;
1708 while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1709 ul_dev,
1710 ipip_entry))) {
1711 err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1712 ul_dev, event, info);
1713 if (err) {
1714 mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1715 ul_dev);
1716 return err;
1720 return 0;
1723 struct mlxsw_sp_neigh_key {
1724 struct neighbour *n;
1727 struct mlxsw_sp_neigh_entry {
1728 struct list_head rif_list_node;
1729 struct rhash_head ht_node;
1730 struct mlxsw_sp_neigh_key key;
1731 u16 rif;
1732 bool connected;
1733 unsigned char ha[ETH_ALEN];
1734 struct list_head nexthop_list; /* list of nexthops using
1735 * this neigh entry
1737 struct list_head nexthop_neighs_list_node;
1738 unsigned int counter_index;
1739 bool counter_valid;
1742 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1743 .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1744 .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1745 .key_len = sizeof(struct mlxsw_sp_neigh_key),
1748 struct mlxsw_sp_neigh_entry *
1749 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1750 struct mlxsw_sp_neigh_entry *neigh_entry)
1752 if (!neigh_entry) {
1753 if (list_empty(&rif->neigh_list))
1754 return NULL;
1755 else
1756 return list_first_entry(&rif->neigh_list,
1757 typeof(*neigh_entry),
1758 rif_list_node);
1760 if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1761 return NULL;
1762 return list_next_entry(neigh_entry, rif_list_node);
1765 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1767 return neigh_entry->key.n->tbl->family;
1770 unsigned char *
1771 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1773 return neigh_entry->ha;
1776 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1778 struct neighbour *n;
1780 n = neigh_entry->key.n;
1781 return ntohl(*((__be32 *) n->primary_key));
1784 struct in6_addr *
1785 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1787 struct neighbour *n;
1789 n = neigh_entry->key.n;
1790 return (struct in6_addr *) &n->primary_key;
1793 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1794 struct mlxsw_sp_neigh_entry *neigh_entry,
1795 u64 *p_counter)
1797 if (!neigh_entry->counter_valid)
1798 return -EINVAL;
1800 return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1801 p_counter, NULL);
1804 static struct mlxsw_sp_neigh_entry *
1805 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1806 u16 rif)
1808 struct mlxsw_sp_neigh_entry *neigh_entry;
1810 neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1811 if (!neigh_entry)
1812 return NULL;
1814 neigh_entry->key.n = n;
1815 neigh_entry->rif = rif;
1816 INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1818 return neigh_entry;
1821 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1823 kfree(neigh_entry);
1826 static int
1827 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1828 struct mlxsw_sp_neigh_entry *neigh_entry)
1830 return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1831 &neigh_entry->ht_node,
1832 mlxsw_sp_neigh_ht_params);
1835 static void
1836 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1837 struct mlxsw_sp_neigh_entry *neigh_entry)
1839 rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
1840 &neigh_entry->ht_node,
1841 mlxsw_sp_neigh_ht_params);
1844 static bool
1845 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
1846 struct mlxsw_sp_neigh_entry *neigh_entry)
1848 struct devlink *devlink;
1849 const char *table_name;
1851 switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
1852 case AF_INET:
1853 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
1854 break;
1855 case AF_INET6:
1856 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
1857 break;
1858 default:
1859 WARN_ON(1);
1860 return false;
1863 devlink = priv_to_devlink(mlxsw_sp->core);
1864 return devlink_dpipe_table_counter_enabled(devlink, table_name);
1867 static void
1868 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
1869 struct mlxsw_sp_neigh_entry *neigh_entry)
1871 if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
1872 return;
1874 if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
1875 return;
1877 neigh_entry->counter_valid = true;
1880 static void
1881 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
1882 struct mlxsw_sp_neigh_entry *neigh_entry)
1884 if (!neigh_entry->counter_valid)
1885 return;
1886 mlxsw_sp_flow_counter_free(mlxsw_sp,
1887 neigh_entry->counter_index);
1888 neigh_entry->counter_valid = false;
1891 static struct mlxsw_sp_neigh_entry *
1892 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1894 struct mlxsw_sp_neigh_entry *neigh_entry;
1895 struct mlxsw_sp_rif *rif;
1896 int err;
1898 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
1899 if (!rif)
1900 return ERR_PTR(-EINVAL);
1902 neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
1903 if (!neigh_entry)
1904 return ERR_PTR(-ENOMEM);
1906 err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
1907 if (err)
1908 goto err_neigh_entry_insert;
1910 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
1911 list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
1913 return neigh_entry;
1915 err_neigh_entry_insert:
1916 mlxsw_sp_neigh_entry_free(neigh_entry);
1917 return ERR_PTR(err);
1920 static void
1921 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1922 struct mlxsw_sp_neigh_entry *neigh_entry)
1924 list_del(&neigh_entry->rif_list_node);
1925 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
1926 mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
1927 mlxsw_sp_neigh_entry_free(neigh_entry);
1930 static struct mlxsw_sp_neigh_entry *
1931 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1933 struct mlxsw_sp_neigh_key key;
1935 key.n = n;
1936 return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
1937 &key, mlxsw_sp_neigh_ht_params);
1940 static void
1941 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
1943 unsigned long interval;
1945 #if IS_ENABLED(CONFIG_IPV6)
1946 interval = min_t(unsigned long,
1947 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
1948 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
1949 #else
1950 interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
1951 #endif
1952 mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
1955 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
1956 char *rauhtd_pl,
1957 int ent_index)
1959 struct net_device *dev;
1960 struct neighbour *n;
1961 __be32 dipn;
1962 u32 dip;
1963 u16 rif;
1965 mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
1967 if (!mlxsw_sp->router->rifs[rif]) {
1968 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
1969 return;
1972 dipn = htonl(dip);
1973 dev = mlxsw_sp->router->rifs[rif]->dev;
1974 n = neigh_lookup(&arp_tbl, &dipn, dev);
1975 if (!n)
1976 return;
1978 netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
1979 neigh_event_send(n, NULL);
1980 neigh_release(n);
1983 #if IS_ENABLED(CONFIG_IPV6)
1984 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
1985 char *rauhtd_pl,
1986 int rec_index)
1988 struct net_device *dev;
1989 struct neighbour *n;
1990 struct in6_addr dip;
1991 u16 rif;
1993 mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
1994 (char *) &dip);
1996 if (!mlxsw_sp->router->rifs[rif]) {
1997 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
1998 return;
2001 dev = mlxsw_sp->router->rifs[rif]->dev;
2002 n = neigh_lookup(&nd_tbl, &dip, dev);
2003 if (!n)
2004 return;
2006 netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2007 neigh_event_send(n, NULL);
2008 neigh_release(n);
2010 #else
2011 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2012 char *rauhtd_pl,
2013 int rec_index)
2016 #endif
2018 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2019 char *rauhtd_pl,
2020 int rec_index)
2022 u8 num_entries;
2023 int i;
2025 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2026 rec_index);
2027 /* Hardware starts counting at 0, so add 1. */
2028 num_entries++;
2030 /* Each record consists of several neighbour entries. */
2031 for (i = 0; i < num_entries; i++) {
2032 int ent_index;
2034 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2035 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2036 ent_index);
2041 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2042 char *rauhtd_pl,
2043 int rec_index)
2045 /* One record contains one entry. */
2046 mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2047 rec_index);
2050 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2051 char *rauhtd_pl, int rec_index)
2053 switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2054 case MLXSW_REG_RAUHTD_TYPE_IPV4:
2055 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2056 rec_index);
2057 break;
2058 case MLXSW_REG_RAUHTD_TYPE_IPV6:
2059 mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2060 rec_index);
2061 break;
2065 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2067 u8 num_rec, last_rec_index, num_entries;
2069 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2070 last_rec_index = num_rec - 1;
2072 if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2073 return false;
2074 if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2075 MLXSW_REG_RAUHTD_TYPE_IPV6)
2076 return true;
2078 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2079 last_rec_index);
2080 if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2081 return true;
2082 return false;
2085 static int
2086 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2087 char *rauhtd_pl,
2088 enum mlxsw_reg_rauhtd_type type)
2090 int i, num_rec;
2091 int err;
2093 /* Make sure the neighbour's netdev isn't removed in the
2094 * process.
2096 rtnl_lock();
2097 do {
2098 mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2099 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2100 rauhtd_pl);
2101 if (err) {
2102 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2103 break;
2105 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2106 for (i = 0; i < num_rec; i++)
2107 mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2109 } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2110 rtnl_unlock();
2112 return err;
2115 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2117 enum mlxsw_reg_rauhtd_type type;
2118 char *rauhtd_pl;
2119 int err;
2121 rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2122 if (!rauhtd_pl)
2123 return -ENOMEM;
2125 type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2126 err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2127 if (err)
2128 goto out;
2130 type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2131 err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2132 out:
2133 kfree(rauhtd_pl);
2134 return err;
2137 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2139 struct mlxsw_sp_neigh_entry *neigh_entry;
2141 /* Take RTNL mutex here to prevent lists from changes */
2142 rtnl_lock();
2143 list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2144 nexthop_neighs_list_node)
2145 /* If this neigh have nexthops, make the kernel think this neigh
2146 * is active regardless of the traffic.
2148 neigh_event_send(neigh_entry->key.n, NULL);
2149 rtnl_unlock();
2152 static void
2153 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2155 unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2157 mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2158 msecs_to_jiffies(interval));
2161 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2163 struct mlxsw_sp_router *router;
2164 int err;
2166 router = container_of(work, struct mlxsw_sp_router,
2167 neighs_update.dw.work);
2168 err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2169 if (err)
2170 dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2172 mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2174 mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2177 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2179 struct mlxsw_sp_neigh_entry *neigh_entry;
2180 struct mlxsw_sp_router *router;
2182 router = container_of(work, struct mlxsw_sp_router,
2183 nexthop_probe_dw.work);
2184 /* Iterate over nexthop neighbours, find those who are unresolved and
2185 * send arp on them. This solves the chicken-egg problem when
2186 * the nexthop wouldn't get offloaded until the neighbor is resolved
2187 * but it wouldn't get resolved ever in case traffic is flowing in HW
2188 * using different nexthop.
2190 * Take RTNL mutex here to prevent lists from changes.
2192 rtnl_lock();
2193 list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2194 nexthop_neighs_list_node)
2195 if (!neigh_entry->connected)
2196 neigh_event_send(neigh_entry->key.n, NULL);
2197 rtnl_unlock();
2199 mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2200 MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2203 static void
2204 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2205 struct mlxsw_sp_neigh_entry *neigh_entry,
2206 bool removing);
2208 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2210 return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2211 MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2214 static void
2215 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2216 struct mlxsw_sp_neigh_entry *neigh_entry,
2217 enum mlxsw_reg_rauht_op op)
2219 struct neighbour *n = neigh_entry->key.n;
2220 u32 dip = ntohl(*((__be32 *) n->primary_key));
2221 char rauht_pl[MLXSW_REG_RAUHT_LEN];
2223 mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2224 dip);
2225 if (neigh_entry->counter_valid)
2226 mlxsw_reg_rauht_pack_counter(rauht_pl,
2227 neigh_entry->counter_index);
2228 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2231 static void
2232 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2233 struct mlxsw_sp_neigh_entry *neigh_entry,
2234 enum mlxsw_reg_rauht_op op)
2236 struct neighbour *n = neigh_entry->key.n;
2237 char rauht_pl[MLXSW_REG_RAUHT_LEN];
2238 const char *dip = n->primary_key;
2240 mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2241 dip);
2242 if (neigh_entry->counter_valid)
2243 mlxsw_reg_rauht_pack_counter(rauht_pl,
2244 neigh_entry->counter_index);
2245 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2248 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2250 struct neighbour *n = neigh_entry->key.n;
2252 /* Packets with a link-local destination address are trapped
2253 * after LPM lookup and never reach the neighbour table, so
2254 * there is no need to program such neighbours to the device.
2256 if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2257 IPV6_ADDR_LINKLOCAL)
2258 return true;
2259 return false;
2262 static void
2263 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2264 struct mlxsw_sp_neigh_entry *neigh_entry,
2265 bool adding)
2267 if (!adding && !neigh_entry->connected)
2268 return;
2269 neigh_entry->connected = adding;
2270 if (neigh_entry->key.n->tbl->family == AF_INET) {
2271 mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2272 mlxsw_sp_rauht_op(adding));
2273 } else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2274 if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2275 return;
2276 mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2277 mlxsw_sp_rauht_op(adding));
2278 } else {
2279 WARN_ON_ONCE(1);
2283 void
2284 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2285 struct mlxsw_sp_neigh_entry *neigh_entry,
2286 bool adding)
2288 if (adding)
2289 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2290 else
2291 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2292 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2295 struct mlxsw_sp_netevent_work {
2296 struct work_struct work;
2297 struct mlxsw_sp *mlxsw_sp;
2298 struct neighbour *n;
2301 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2303 struct mlxsw_sp_netevent_work *net_work =
2304 container_of(work, struct mlxsw_sp_netevent_work, work);
2305 struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2306 struct mlxsw_sp_neigh_entry *neigh_entry;
2307 struct neighbour *n = net_work->n;
2308 unsigned char ha[ETH_ALEN];
2309 bool entry_connected;
2310 u8 nud_state, dead;
2312 /* If these parameters are changed after we release the lock,
2313 * then we are guaranteed to receive another event letting us
2314 * know about it.
2316 read_lock_bh(&n->lock);
2317 memcpy(ha, n->ha, ETH_ALEN);
2318 nud_state = n->nud_state;
2319 dead = n->dead;
2320 read_unlock_bh(&n->lock);
2322 rtnl_lock();
2323 entry_connected = nud_state & NUD_VALID && !dead;
2324 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2325 if (!entry_connected && !neigh_entry)
2326 goto out;
2327 if (!neigh_entry) {
2328 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2329 if (IS_ERR(neigh_entry))
2330 goto out;
2333 memcpy(neigh_entry->ha, ha, ETH_ALEN);
2334 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2335 mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
2337 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2338 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2340 out:
2341 rtnl_unlock();
2342 neigh_release(n);
2343 kfree(net_work);
2346 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2348 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2350 struct mlxsw_sp_netevent_work *net_work =
2351 container_of(work, struct mlxsw_sp_netevent_work, work);
2352 struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2354 mlxsw_sp_mp_hash_init(mlxsw_sp);
2355 kfree(net_work);
2358 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2359 unsigned long event, void *ptr)
2361 struct mlxsw_sp_netevent_work *net_work;
2362 struct mlxsw_sp_port *mlxsw_sp_port;
2363 struct mlxsw_sp_router *router;
2364 struct mlxsw_sp *mlxsw_sp;
2365 unsigned long interval;
2366 struct neigh_parms *p;
2367 struct neighbour *n;
2368 struct net *net;
2370 switch (event) {
2371 case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2372 p = ptr;
2374 /* We don't care about changes in the default table. */
2375 if (!p->dev || (p->tbl->family != AF_INET &&
2376 p->tbl->family != AF_INET6))
2377 return NOTIFY_DONE;
2379 /* We are in atomic context and can't take RTNL mutex,
2380 * so use RCU variant to walk the device chain.
2382 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2383 if (!mlxsw_sp_port)
2384 return NOTIFY_DONE;
2386 mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2387 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2388 mlxsw_sp->router->neighs_update.interval = interval;
2390 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2391 break;
2392 case NETEVENT_NEIGH_UPDATE:
2393 n = ptr;
2395 if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2396 return NOTIFY_DONE;
2398 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2399 if (!mlxsw_sp_port)
2400 return NOTIFY_DONE;
2402 net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2403 if (!net_work) {
2404 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2405 return NOTIFY_BAD;
2408 INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2409 net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2410 net_work->n = n;
2412 /* Take a reference to ensure the neighbour won't be
2413 * destructed until we drop the reference in delayed
2414 * work.
2416 neigh_clone(n);
2417 mlxsw_core_schedule_work(&net_work->work);
2418 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2419 break;
2420 case NETEVENT_MULTIPATH_HASH_UPDATE:
2421 net = ptr;
2423 if (!net_eq(net, &init_net))
2424 return NOTIFY_DONE;
2426 net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2427 if (!net_work)
2428 return NOTIFY_BAD;
2430 router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2431 INIT_WORK(&net_work->work, mlxsw_sp_router_mp_hash_event_work);
2432 net_work->mlxsw_sp = router->mlxsw_sp;
2433 mlxsw_core_schedule_work(&net_work->work);
2434 break;
2437 return NOTIFY_DONE;
2440 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2442 int err;
2444 err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2445 &mlxsw_sp_neigh_ht_params);
2446 if (err)
2447 return err;
2449 /* Initialize the polling interval according to the default
2450 * table.
2452 mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2454 /* Create the delayed works for the activity_update */
2455 INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2456 mlxsw_sp_router_neighs_update_work);
2457 INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2458 mlxsw_sp_router_probe_unresolved_nexthops);
2459 mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2460 mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2461 return 0;
2464 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2466 cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2467 cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2468 rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2471 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2472 struct mlxsw_sp_rif *rif)
2474 struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2476 list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2477 rif_list_node) {
2478 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2479 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2483 enum mlxsw_sp_nexthop_type {
2484 MLXSW_SP_NEXTHOP_TYPE_ETH,
2485 MLXSW_SP_NEXTHOP_TYPE_IPIP,
2488 struct mlxsw_sp_nexthop_key {
2489 struct fib_nh *fib_nh;
2492 struct mlxsw_sp_nexthop {
2493 struct list_head neigh_list_node; /* member of neigh entry list */
2494 struct list_head rif_list_node;
2495 struct list_head router_list_node;
2496 struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2497 * this belongs to
2499 struct rhash_head ht_node;
2500 struct mlxsw_sp_nexthop_key key;
2501 unsigned char gw_addr[sizeof(struct in6_addr)];
2502 int ifindex;
2503 int nh_weight;
2504 int norm_nh_weight;
2505 int num_adj_entries;
2506 struct mlxsw_sp_rif *rif;
2507 u8 should_offload:1, /* set indicates this neigh is connected and
2508 * should be put to KVD linear area of this group.
2510 offloaded:1, /* set in case the neigh is actually put into
2511 * KVD linear area of this group.
2513 update:1; /* set indicates that MAC of this neigh should be
2514 * updated in HW
2516 enum mlxsw_sp_nexthop_type type;
2517 union {
2518 struct mlxsw_sp_neigh_entry *neigh_entry;
2519 struct mlxsw_sp_ipip_entry *ipip_entry;
2521 unsigned int counter_index;
2522 bool counter_valid;
2525 struct mlxsw_sp_nexthop_group {
2526 void *priv;
2527 struct rhash_head ht_node;
2528 struct list_head fib_list; /* list of fib entries that use this group */
2529 struct neigh_table *neigh_tbl;
2530 u8 adj_index_valid:1,
2531 gateway:1; /* routes using the group use a gateway */
2532 u32 adj_index;
2533 u16 ecmp_size;
2534 u16 count;
2535 int sum_norm_weight;
2536 struct mlxsw_sp_nexthop nexthops[0];
2537 #define nh_rif nexthops[0].rif
2540 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2541 struct mlxsw_sp_nexthop *nh)
2543 struct devlink *devlink;
2545 devlink = priv_to_devlink(mlxsw_sp->core);
2546 if (!devlink_dpipe_table_counter_enabled(devlink,
2547 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2548 return;
2550 if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2551 return;
2553 nh->counter_valid = true;
2556 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2557 struct mlxsw_sp_nexthop *nh)
2559 if (!nh->counter_valid)
2560 return;
2561 mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2562 nh->counter_valid = false;
2565 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2566 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2568 if (!nh->counter_valid)
2569 return -EINVAL;
2571 return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2572 p_counter, NULL);
2575 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2576 struct mlxsw_sp_nexthop *nh)
2578 if (!nh) {
2579 if (list_empty(&router->nexthop_list))
2580 return NULL;
2581 else
2582 return list_first_entry(&router->nexthop_list,
2583 typeof(*nh), router_list_node);
2585 if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2586 return NULL;
2587 return list_next_entry(nh, router_list_node);
2590 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2592 return nh->offloaded;
2595 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2597 if (!nh->offloaded)
2598 return NULL;
2599 return nh->neigh_entry->ha;
2602 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2603 u32 *p_adj_size, u32 *p_adj_hash_index)
2605 struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2606 u32 adj_hash_index = 0;
2607 int i;
2609 if (!nh->offloaded || !nh_grp->adj_index_valid)
2610 return -EINVAL;
2612 *p_adj_index = nh_grp->adj_index;
2613 *p_adj_size = nh_grp->ecmp_size;
2615 for (i = 0; i < nh_grp->count; i++) {
2616 struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2618 if (nh_iter == nh)
2619 break;
2620 if (nh_iter->offloaded)
2621 adj_hash_index += nh_iter->num_adj_entries;
2624 *p_adj_hash_index = adj_hash_index;
2625 return 0;
2628 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2630 return nh->rif;
2633 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2635 struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2636 int i;
2638 for (i = 0; i < nh_grp->count; i++) {
2639 struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2641 if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2642 return true;
2644 return false;
2647 static struct fib_info *
2648 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2650 return nh_grp->priv;
2653 struct mlxsw_sp_nexthop_group_cmp_arg {
2654 enum mlxsw_sp_l3proto proto;
2655 union {
2656 struct fib_info *fi;
2657 struct mlxsw_sp_fib6_entry *fib6_entry;
2661 static bool
2662 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2663 const struct in6_addr *gw, int ifindex,
2664 int weight)
2666 int i;
2668 for (i = 0; i < nh_grp->count; i++) {
2669 const struct mlxsw_sp_nexthop *nh;
2671 nh = &nh_grp->nexthops[i];
2672 if (nh->ifindex == ifindex && nh->nh_weight == weight &&
2673 ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2674 return true;
2677 return false;
2680 static bool
2681 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2682 const struct mlxsw_sp_fib6_entry *fib6_entry)
2684 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2686 if (nh_grp->count != fib6_entry->nrt6)
2687 return false;
2689 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2690 struct in6_addr *gw;
2691 int ifindex, weight;
2693 ifindex = mlxsw_sp_rt6->rt->dst.dev->ifindex;
2694 weight = mlxsw_sp_rt6->rt->rt6i_nh_weight;
2695 gw = &mlxsw_sp_rt6->rt->rt6i_gateway;
2696 if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
2697 weight))
2698 return false;
2701 return true;
2704 static int
2705 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2707 const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2708 const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2710 switch (cmp_arg->proto) {
2711 case MLXSW_SP_L3_PROTO_IPV4:
2712 return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2713 case MLXSW_SP_L3_PROTO_IPV6:
2714 return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2715 cmp_arg->fib6_entry);
2716 default:
2717 WARN_ON(1);
2718 return 1;
2722 static int
2723 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2725 return nh_grp->neigh_tbl->family;
2728 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2730 const struct mlxsw_sp_nexthop_group *nh_grp = data;
2731 const struct mlxsw_sp_nexthop *nh;
2732 struct fib_info *fi;
2733 unsigned int val;
2734 int i;
2736 switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2737 case AF_INET:
2738 fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2739 return jhash(&fi, sizeof(fi), seed);
2740 case AF_INET6:
2741 val = nh_grp->count;
2742 for (i = 0; i < nh_grp->count; i++) {
2743 nh = &nh_grp->nexthops[i];
2744 val ^= nh->ifindex;
2746 return jhash(&val, sizeof(val), seed);
2747 default:
2748 WARN_ON(1);
2749 return 0;
2753 static u32
2754 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2756 unsigned int val = fib6_entry->nrt6;
2757 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2758 struct net_device *dev;
2760 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2761 dev = mlxsw_sp_rt6->rt->dst.dev;
2762 val ^= dev->ifindex;
2765 return jhash(&val, sizeof(val), seed);
2768 static u32
2769 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2771 const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2773 switch (cmp_arg->proto) {
2774 case MLXSW_SP_L3_PROTO_IPV4:
2775 return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2776 case MLXSW_SP_L3_PROTO_IPV6:
2777 return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2778 default:
2779 WARN_ON(1);
2780 return 0;
2784 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2785 .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2786 .hashfn = mlxsw_sp_nexthop_group_hash,
2787 .obj_hashfn = mlxsw_sp_nexthop_group_hash_obj,
2788 .obj_cmpfn = mlxsw_sp_nexthop_group_cmp,
2791 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2792 struct mlxsw_sp_nexthop_group *nh_grp)
2794 if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2795 !nh_grp->gateway)
2796 return 0;
2798 return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
2799 &nh_grp->ht_node,
2800 mlxsw_sp_nexthop_group_ht_params);
2803 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
2804 struct mlxsw_sp_nexthop_group *nh_grp)
2806 if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2807 !nh_grp->gateway)
2808 return;
2810 rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
2811 &nh_grp->ht_node,
2812 mlxsw_sp_nexthop_group_ht_params);
2815 static struct mlxsw_sp_nexthop_group *
2816 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
2817 struct fib_info *fi)
2819 struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2821 cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
2822 cmp_arg.fi = fi;
2823 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2824 &cmp_arg,
2825 mlxsw_sp_nexthop_group_ht_params);
2828 static struct mlxsw_sp_nexthop_group *
2829 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
2830 struct mlxsw_sp_fib6_entry *fib6_entry)
2832 struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2834 cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
2835 cmp_arg.fib6_entry = fib6_entry;
2836 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2837 &cmp_arg,
2838 mlxsw_sp_nexthop_group_ht_params);
2841 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
2842 .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
2843 .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
2844 .key_len = sizeof(struct mlxsw_sp_nexthop_key),
2847 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
2848 struct mlxsw_sp_nexthop *nh)
2850 return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
2851 &nh->ht_node, mlxsw_sp_nexthop_ht_params);
2854 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
2855 struct mlxsw_sp_nexthop *nh)
2857 rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
2858 mlxsw_sp_nexthop_ht_params);
2861 static struct mlxsw_sp_nexthop *
2862 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
2863 struct mlxsw_sp_nexthop_key key)
2865 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
2866 mlxsw_sp_nexthop_ht_params);
2869 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
2870 const struct mlxsw_sp_fib *fib,
2871 u32 adj_index, u16 ecmp_size,
2872 u32 new_adj_index,
2873 u16 new_ecmp_size)
2875 char raleu_pl[MLXSW_REG_RALEU_LEN];
2877 mlxsw_reg_raleu_pack(raleu_pl,
2878 (enum mlxsw_reg_ralxx_protocol) fib->proto,
2879 fib->vr->id, adj_index, ecmp_size, new_adj_index,
2880 new_ecmp_size);
2881 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
2884 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
2885 struct mlxsw_sp_nexthop_group *nh_grp,
2886 u32 old_adj_index, u16 old_ecmp_size)
2888 struct mlxsw_sp_fib_entry *fib_entry;
2889 struct mlxsw_sp_fib *fib = NULL;
2890 int err;
2892 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
2893 if (fib == fib_entry->fib_node->fib)
2894 continue;
2895 fib = fib_entry->fib_node->fib;
2896 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
2897 old_adj_index,
2898 old_ecmp_size,
2899 nh_grp->adj_index,
2900 nh_grp->ecmp_size);
2901 if (err)
2902 return err;
2904 return 0;
2907 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2908 struct mlxsw_sp_nexthop *nh)
2910 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
2911 char ratr_pl[MLXSW_REG_RATR_LEN];
2913 mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
2914 true, MLXSW_REG_RATR_TYPE_ETHERNET,
2915 adj_index, neigh_entry->rif);
2916 mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
2917 if (nh->counter_valid)
2918 mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
2919 else
2920 mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
2922 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
2925 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2926 struct mlxsw_sp_nexthop *nh)
2928 int i;
2930 for (i = 0; i < nh->num_adj_entries; i++) {
2931 int err;
2933 err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
2934 if (err)
2935 return err;
2938 return 0;
2941 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
2942 u32 adj_index,
2943 struct mlxsw_sp_nexthop *nh)
2945 const struct mlxsw_sp_ipip_ops *ipip_ops;
2947 ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
2948 return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
2951 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
2952 u32 adj_index,
2953 struct mlxsw_sp_nexthop *nh)
2955 int i;
2957 for (i = 0; i < nh->num_adj_entries; i++) {
2958 int err;
2960 err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
2961 nh);
2962 if (err)
2963 return err;
2966 return 0;
2969 static int
2970 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
2971 struct mlxsw_sp_nexthop_group *nh_grp,
2972 bool reallocate)
2974 u32 adj_index = nh_grp->adj_index; /* base */
2975 struct mlxsw_sp_nexthop *nh;
2976 int i;
2977 int err;
2979 for (i = 0; i < nh_grp->count; i++) {
2980 nh = &nh_grp->nexthops[i];
2982 if (!nh->should_offload) {
2983 nh->offloaded = 0;
2984 continue;
2987 if (nh->update || reallocate) {
2988 switch (nh->type) {
2989 case MLXSW_SP_NEXTHOP_TYPE_ETH:
2990 err = mlxsw_sp_nexthop_update
2991 (mlxsw_sp, adj_index, nh);
2992 break;
2993 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
2994 err = mlxsw_sp_nexthop_ipip_update
2995 (mlxsw_sp, adj_index, nh);
2996 break;
2998 if (err)
2999 return err;
3000 nh->update = 0;
3001 nh->offloaded = 1;
3003 adj_index += nh->num_adj_entries;
3005 return 0;
3008 static bool
3009 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
3010 const struct mlxsw_sp_fib_entry *fib_entry);
3012 static int
3013 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3014 struct mlxsw_sp_nexthop_group *nh_grp)
3016 struct mlxsw_sp_fib_entry *fib_entry;
3017 int err;
3019 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3020 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3021 fib_entry))
3022 continue;
3023 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3024 if (err)
3025 return err;
3027 return 0;
3030 static void
3031 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3032 enum mlxsw_reg_ralue_op op, int err);
3034 static void
3035 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
3037 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
3038 struct mlxsw_sp_fib_entry *fib_entry;
3040 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3041 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3042 fib_entry))
3043 continue;
3044 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
3048 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3050 /* Valid sizes for an adjacency group are:
3051 * 1-64, 512, 1024, 2048 and 4096.
3053 if (*p_adj_grp_size <= 64)
3054 return;
3055 else if (*p_adj_grp_size <= 512)
3056 *p_adj_grp_size = 512;
3057 else if (*p_adj_grp_size <= 1024)
3058 *p_adj_grp_size = 1024;
3059 else if (*p_adj_grp_size <= 2048)
3060 *p_adj_grp_size = 2048;
3061 else
3062 *p_adj_grp_size = 4096;
3065 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3066 unsigned int alloc_size)
3068 if (alloc_size >= 4096)
3069 *p_adj_grp_size = 4096;
3070 else if (alloc_size >= 2048)
3071 *p_adj_grp_size = 2048;
3072 else if (alloc_size >= 1024)
3073 *p_adj_grp_size = 1024;
3074 else if (alloc_size >= 512)
3075 *p_adj_grp_size = 512;
3078 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3079 u16 *p_adj_grp_size)
3081 unsigned int alloc_size;
3082 int err;
3084 /* Round up the requested group size to the next size supported
3085 * by the device and make sure the request can be satisfied.
3087 mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3088 err = mlxsw_sp_kvdl_alloc_size_query(mlxsw_sp, *p_adj_grp_size,
3089 &alloc_size);
3090 if (err)
3091 return err;
3092 /* It is possible the allocation results in more allocated
3093 * entries than requested. Try to use as much of them as
3094 * possible.
3096 mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3098 return 0;
3101 static void
3102 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3104 int i, g = 0, sum_norm_weight = 0;
3105 struct mlxsw_sp_nexthop *nh;
3107 for (i = 0; i < nh_grp->count; i++) {
3108 nh = &nh_grp->nexthops[i];
3110 if (!nh->should_offload)
3111 continue;
3112 if (g > 0)
3113 g = gcd(nh->nh_weight, g);
3114 else
3115 g = nh->nh_weight;
3118 for (i = 0; i < nh_grp->count; i++) {
3119 nh = &nh_grp->nexthops[i];
3121 if (!nh->should_offload)
3122 continue;
3123 nh->norm_nh_weight = nh->nh_weight / g;
3124 sum_norm_weight += nh->norm_nh_weight;
3127 nh_grp->sum_norm_weight = sum_norm_weight;
3130 static void
3131 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3133 int total = nh_grp->sum_norm_weight;
3134 u16 ecmp_size = nh_grp->ecmp_size;
3135 int i, weight = 0, lower_bound = 0;
3137 for (i = 0; i < nh_grp->count; i++) {
3138 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3139 int upper_bound;
3141 if (!nh->should_offload)
3142 continue;
3143 weight += nh->norm_nh_weight;
3144 upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3145 nh->num_adj_entries = upper_bound - lower_bound;
3146 lower_bound = upper_bound;
3150 static void
3151 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3152 struct mlxsw_sp_nexthop_group *nh_grp)
3154 u16 ecmp_size, old_ecmp_size;
3155 struct mlxsw_sp_nexthop *nh;
3156 bool offload_change = false;
3157 u32 adj_index;
3158 bool old_adj_index_valid;
3159 u32 old_adj_index;
3160 int i;
3161 int err;
3163 if (!nh_grp->gateway) {
3164 mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3165 return;
3168 for (i = 0; i < nh_grp->count; i++) {
3169 nh = &nh_grp->nexthops[i];
3171 if (nh->should_offload != nh->offloaded) {
3172 offload_change = true;
3173 if (nh->should_offload)
3174 nh->update = 1;
3177 if (!offload_change) {
3178 /* Nothing was added or removed, so no need to reallocate. Just
3179 * update MAC on existing adjacency indexes.
3181 err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3182 if (err) {
3183 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3184 goto set_trap;
3186 return;
3188 mlxsw_sp_nexthop_group_normalize(nh_grp);
3189 if (!nh_grp->sum_norm_weight)
3190 /* No neigh of this group is connected so we just set
3191 * the trap and let everthing flow through kernel.
3193 goto set_trap;
3195 ecmp_size = nh_grp->sum_norm_weight;
3196 err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3197 if (err)
3198 /* No valid allocation size available. */
3199 goto set_trap;
3201 err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index);
3202 if (err) {
3203 /* We ran out of KVD linear space, just set the
3204 * trap and let everything flow through kernel.
3206 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3207 goto set_trap;
3209 old_adj_index_valid = nh_grp->adj_index_valid;
3210 old_adj_index = nh_grp->adj_index;
3211 old_ecmp_size = nh_grp->ecmp_size;
3212 nh_grp->adj_index_valid = 1;
3213 nh_grp->adj_index = adj_index;
3214 nh_grp->ecmp_size = ecmp_size;
3215 mlxsw_sp_nexthop_group_rebalance(nh_grp);
3216 err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3217 if (err) {
3218 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3219 goto set_trap;
3222 if (!old_adj_index_valid) {
3223 /* The trap was set for fib entries, so we have to call
3224 * fib entry update to unset it and use adjacency index.
3226 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3227 if (err) {
3228 dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3229 goto set_trap;
3231 return;
3234 err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3235 old_adj_index, old_ecmp_size);
3236 mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
3237 if (err) {
3238 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3239 goto set_trap;
3242 /* Offload state within the group changed, so update the flags. */
3243 mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
3245 return;
3247 set_trap:
3248 old_adj_index_valid = nh_grp->adj_index_valid;
3249 nh_grp->adj_index_valid = 0;
3250 for (i = 0; i < nh_grp->count; i++) {
3251 nh = &nh_grp->nexthops[i];
3252 nh->offloaded = 0;
3254 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3255 if (err)
3256 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3257 if (old_adj_index_valid)
3258 mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
3261 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3262 bool removing)
3264 if (!removing)
3265 nh->should_offload = 1;
3266 else
3267 nh->should_offload = 0;
3268 nh->update = 1;
3271 static void
3272 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3273 struct mlxsw_sp_neigh_entry *neigh_entry,
3274 bool removing)
3276 struct mlxsw_sp_nexthop *nh;
3278 list_for_each_entry(nh, &neigh_entry->nexthop_list,
3279 neigh_list_node) {
3280 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3281 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3285 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3286 struct mlxsw_sp_rif *rif)
3288 if (nh->rif)
3289 return;
3291 nh->rif = rif;
3292 list_add(&nh->rif_list_node, &rif->nexthop_list);
3295 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3297 if (!nh->rif)
3298 return;
3300 list_del(&nh->rif_list_node);
3301 nh->rif = NULL;
3304 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3305 struct mlxsw_sp_nexthop *nh)
3307 struct mlxsw_sp_neigh_entry *neigh_entry;
3308 struct neighbour *n;
3309 u8 nud_state, dead;
3310 int err;
3312 if (!nh->nh_grp->gateway || nh->neigh_entry)
3313 return 0;
3315 /* Take a reference of neigh here ensuring that neigh would
3316 * not be destructed before the nexthop entry is finished.
3317 * The reference is taken either in neigh_lookup() or
3318 * in neigh_create() in case n is not found.
3320 n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3321 if (!n) {
3322 n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3323 nh->rif->dev);
3324 if (IS_ERR(n))
3325 return PTR_ERR(n);
3326 neigh_event_send(n, NULL);
3328 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3329 if (!neigh_entry) {
3330 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3331 if (IS_ERR(neigh_entry)) {
3332 err = -EINVAL;
3333 goto err_neigh_entry_create;
3337 /* If that is the first nexthop connected to that neigh, add to
3338 * nexthop_neighs_list
3340 if (list_empty(&neigh_entry->nexthop_list))
3341 list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3342 &mlxsw_sp->router->nexthop_neighs_list);
3344 nh->neigh_entry = neigh_entry;
3345 list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3346 read_lock_bh(&n->lock);
3347 nud_state = n->nud_state;
3348 dead = n->dead;
3349 read_unlock_bh(&n->lock);
3350 __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3352 return 0;
3354 err_neigh_entry_create:
3355 neigh_release(n);
3356 return err;
3359 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3360 struct mlxsw_sp_nexthop *nh)
3362 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3363 struct neighbour *n;
3365 if (!neigh_entry)
3366 return;
3367 n = neigh_entry->key.n;
3369 __mlxsw_sp_nexthop_neigh_update(nh, true);
3370 list_del(&nh->neigh_list_node);
3371 nh->neigh_entry = NULL;
3373 /* If that is the last nexthop connected to that neigh, remove from
3374 * nexthop_neighs_list
3376 if (list_empty(&neigh_entry->nexthop_list))
3377 list_del(&neigh_entry->nexthop_neighs_list_node);
3379 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3380 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3382 neigh_release(n);
3385 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3387 struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3389 return ul_dev ? (ul_dev->flags & IFF_UP) : true;
3392 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3393 struct mlxsw_sp_nexthop *nh,
3394 struct mlxsw_sp_ipip_entry *ipip_entry)
3396 bool removing;
3398 if (!nh->nh_grp->gateway || nh->ipip_entry)
3399 return;
3401 nh->ipip_entry = ipip_entry;
3402 removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3403 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3404 mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3407 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3408 struct mlxsw_sp_nexthop *nh)
3410 struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3412 if (!ipip_entry)
3413 return;
3415 __mlxsw_sp_nexthop_neigh_update(nh, true);
3416 nh->ipip_entry = NULL;
3419 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3420 const struct fib_nh *fib_nh,
3421 enum mlxsw_sp_ipip_type *p_ipipt)
3423 struct net_device *dev = fib_nh->nh_dev;
3425 return dev &&
3426 fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3427 mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3430 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3431 struct mlxsw_sp_nexthop *nh)
3433 switch (nh->type) {
3434 case MLXSW_SP_NEXTHOP_TYPE_ETH:
3435 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3436 mlxsw_sp_nexthop_rif_fini(nh);
3437 break;
3438 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3439 mlxsw_sp_nexthop_rif_fini(nh);
3440 mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3441 break;
3445 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3446 struct mlxsw_sp_nexthop *nh,
3447 struct fib_nh *fib_nh)
3449 const struct mlxsw_sp_ipip_ops *ipip_ops;
3450 struct net_device *dev = fib_nh->nh_dev;
3451 struct mlxsw_sp_ipip_entry *ipip_entry;
3452 struct mlxsw_sp_rif *rif;
3453 int err;
3455 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3456 if (ipip_entry) {
3457 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3458 if (ipip_ops->can_offload(mlxsw_sp, dev,
3459 MLXSW_SP_L3_PROTO_IPV4)) {
3460 nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3461 mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3462 return 0;
3466 nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3467 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3468 if (!rif)
3469 return 0;
3471 mlxsw_sp_nexthop_rif_init(nh, rif);
3472 err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3473 if (err)
3474 goto err_neigh_init;
3476 return 0;
3478 err_neigh_init:
3479 mlxsw_sp_nexthop_rif_fini(nh);
3480 return err;
3483 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3484 struct mlxsw_sp_nexthop *nh)
3486 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3489 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3490 struct mlxsw_sp_nexthop_group *nh_grp,
3491 struct mlxsw_sp_nexthop *nh,
3492 struct fib_nh *fib_nh)
3494 struct net_device *dev = fib_nh->nh_dev;
3495 struct in_device *in_dev;
3496 int err;
3498 nh->nh_grp = nh_grp;
3499 nh->key.fib_nh = fib_nh;
3500 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3501 nh->nh_weight = fib_nh->nh_weight;
3502 #else
3503 nh->nh_weight = 1;
3504 #endif
3505 memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
3506 err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3507 if (err)
3508 return err;
3510 mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3511 list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3513 if (!dev)
3514 return 0;
3516 in_dev = __in_dev_get_rtnl(dev);
3517 if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3518 fib_nh->nh_flags & RTNH_F_LINKDOWN)
3519 return 0;
3521 err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3522 if (err)
3523 goto err_nexthop_neigh_init;
3525 return 0;
3527 err_nexthop_neigh_init:
3528 mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3529 return err;
3532 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3533 struct mlxsw_sp_nexthop *nh)
3535 mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3536 list_del(&nh->router_list_node);
3537 mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3538 mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3541 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3542 unsigned long event, struct fib_nh *fib_nh)
3544 struct mlxsw_sp_nexthop_key key;
3545 struct mlxsw_sp_nexthop *nh;
3547 if (mlxsw_sp->router->aborted)
3548 return;
3550 key.fib_nh = fib_nh;
3551 nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3552 if (WARN_ON_ONCE(!nh))
3553 return;
3555 switch (event) {
3556 case FIB_EVENT_NH_ADD:
3557 mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3558 break;
3559 case FIB_EVENT_NH_DEL:
3560 mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3561 break;
3564 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3567 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3568 struct mlxsw_sp_rif *rif)
3570 struct mlxsw_sp_nexthop *nh;
3571 bool removing;
3573 list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3574 switch (nh->type) {
3575 case MLXSW_SP_NEXTHOP_TYPE_ETH:
3576 removing = false;
3577 break;
3578 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3579 removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3580 break;
3581 default:
3582 WARN_ON(1);
3583 continue;
3586 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3587 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3591 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3592 struct mlxsw_sp_rif *old_rif,
3593 struct mlxsw_sp_rif *new_rif)
3595 struct mlxsw_sp_nexthop *nh;
3597 list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3598 list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3599 nh->rif = new_rif;
3600 mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3603 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3604 struct mlxsw_sp_rif *rif)
3606 struct mlxsw_sp_nexthop *nh, *tmp;
3608 list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3609 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3610 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3614 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3615 const struct fib_info *fi)
3617 return fi->fib_nh->nh_scope == RT_SCOPE_LINK ||
3618 mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
3621 static struct mlxsw_sp_nexthop_group *
3622 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3624 struct mlxsw_sp_nexthop_group *nh_grp;
3625 struct mlxsw_sp_nexthop *nh;
3626 struct fib_nh *fib_nh;
3627 size_t alloc_size;
3628 int i;
3629 int err;
3631 alloc_size = sizeof(*nh_grp) +
3632 fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
3633 nh_grp = kzalloc(alloc_size, GFP_KERNEL);
3634 if (!nh_grp)
3635 return ERR_PTR(-ENOMEM);
3636 nh_grp->priv = fi;
3637 INIT_LIST_HEAD(&nh_grp->fib_list);
3638 nh_grp->neigh_tbl = &arp_tbl;
3640 nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3641 nh_grp->count = fi->fib_nhs;
3642 fib_info_hold(fi);
3643 for (i = 0; i < nh_grp->count; i++) {
3644 nh = &nh_grp->nexthops[i];
3645 fib_nh = &fi->fib_nh[i];
3646 err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3647 if (err)
3648 goto err_nexthop4_init;
3650 err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3651 if (err)
3652 goto err_nexthop_group_insert;
3653 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3654 return nh_grp;
3656 err_nexthop_group_insert:
3657 err_nexthop4_init:
3658 for (i--; i >= 0; i--) {
3659 nh = &nh_grp->nexthops[i];
3660 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3662 fib_info_put(fi);
3663 kfree(nh_grp);
3664 return ERR_PTR(err);
3667 static void
3668 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3669 struct mlxsw_sp_nexthop_group *nh_grp)
3671 struct mlxsw_sp_nexthop *nh;
3672 int i;
3674 mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3675 for (i = 0; i < nh_grp->count; i++) {
3676 nh = &nh_grp->nexthops[i];
3677 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3679 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3680 WARN_ON_ONCE(nh_grp->adj_index_valid);
3681 fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3682 kfree(nh_grp);
3685 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3686 struct mlxsw_sp_fib_entry *fib_entry,
3687 struct fib_info *fi)
3689 struct mlxsw_sp_nexthop_group *nh_grp;
3691 nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3692 if (!nh_grp) {
3693 nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3694 if (IS_ERR(nh_grp))
3695 return PTR_ERR(nh_grp);
3697 list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3698 fib_entry->nh_group = nh_grp;
3699 return 0;
3702 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3703 struct mlxsw_sp_fib_entry *fib_entry)
3705 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3707 list_del(&fib_entry->nexthop_group_node);
3708 if (!list_empty(&nh_grp->fib_list))
3709 return;
3710 mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3713 static bool
3714 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3716 struct mlxsw_sp_fib4_entry *fib4_entry;
3718 fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3719 common);
3720 return !fib4_entry->tos;
3723 static bool
3724 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3726 struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3728 switch (fib_entry->fib_node->fib->proto) {
3729 case MLXSW_SP_L3_PROTO_IPV4:
3730 if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
3731 return false;
3732 break;
3733 case MLXSW_SP_L3_PROTO_IPV6:
3734 break;
3737 switch (fib_entry->type) {
3738 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3739 return !!nh_group->adj_index_valid;
3740 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3741 return !!nh_group->nh_rif;
3742 case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3743 return true;
3744 default:
3745 return false;
3749 static struct mlxsw_sp_nexthop *
3750 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3751 const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3753 int i;
3755 for (i = 0; i < nh_grp->count; i++) {
3756 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3757 struct rt6_info *rt = mlxsw_sp_rt6->rt;
3759 if (nh->rif && nh->rif->dev == rt->dst.dev &&
3760 ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
3761 &rt->rt6i_gateway))
3762 return nh;
3763 continue;
3766 return NULL;
3769 static void
3770 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3772 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3773 int i;
3775 if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
3776 fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) {
3777 nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3778 return;
3781 for (i = 0; i < nh_grp->count; i++) {
3782 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3784 if (nh->offloaded)
3785 nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3786 else
3787 nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3791 static void
3792 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3794 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3795 int i;
3797 if (!list_is_singular(&nh_grp->fib_list))
3798 return;
3800 for (i = 0; i < nh_grp->count; i++) {
3801 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3803 nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3807 static void
3808 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3810 struct mlxsw_sp_fib6_entry *fib6_entry;
3811 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3813 fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3814 common);
3816 if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
3817 list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
3818 list)->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
3819 return;
3822 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3823 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3824 struct mlxsw_sp_nexthop *nh;
3826 nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3827 if (nh && nh->offloaded)
3828 mlxsw_sp_rt6->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD;
3829 else
3830 mlxsw_sp_rt6->rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
3834 static void
3835 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3837 struct mlxsw_sp_fib6_entry *fib6_entry;
3838 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3840 fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3841 common);
3842 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3843 struct rt6_info *rt = mlxsw_sp_rt6->rt;
3845 rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD;
3849 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3851 switch (fib_entry->fib_node->fib->proto) {
3852 case MLXSW_SP_L3_PROTO_IPV4:
3853 mlxsw_sp_fib4_entry_offload_set(fib_entry);
3854 break;
3855 case MLXSW_SP_L3_PROTO_IPV6:
3856 mlxsw_sp_fib6_entry_offload_set(fib_entry);
3857 break;
3861 static void
3862 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3864 switch (fib_entry->fib_node->fib->proto) {
3865 case MLXSW_SP_L3_PROTO_IPV4:
3866 mlxsw_sp_fib4_entry_offload_unset(fib_entry);
3867 break;
3868 case MLXSW_SP_L3_PROTO_IPV6:
3869 mlxsw_sp_fib6_entry_offload_unset(fib_entry);
3870 break;
3874 static void
3875 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3876 enum mlxsw_reg_ralue_op op, int err)
3878 switch (op) {
3879 case MLXSW_REG_RALUE_OP_WRITE_DELETE:
3880 return mlxsw_sp_fib_entry_offload_unset(fib_entry);
3881 case MLXSW_REG_RALUE_OP_WRITE_WRITE:
3882 if (err)
3883 return;
3884 if (mlxsw_sp_fib_entry_should_offload(fib_entry))
3885 mlxsw_sp_fib_entry_offload_set(fib_entry);
3886 else
3887 mlxsw_sp_fib_entry_offload_unset(fib_entry);
3888 return;
3889 default:
3890 return;
3894 static void
3895 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
3896 const struct mlxsw_sp_fib_entry *fib_entry,
3897 enum mlxsw_reg_ralue_op op)
3899 struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
3900 enum mlxsw_reg_ralxx_protocol proto;
3901 u32 *p_dip;
3903 proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
3905 switch (fib->proto) {
3906 case MLXSW_SP_L3_PROTO_IPV4:
3907 p_dip = (u32 *) fib_entry->fib_node->key.addr;
3908 mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
3909 fib_entry->fib_node->key.prefix_len,
3910 *p_dip);
3911 break;
3912 case MLXSW_SP_L3_PROTO_IPV6:
3913 mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
3914 fib_entry->fib_node->key.prefix_len,
3915 fib_entry->fib_node->key.addr);
3916 break;
3920 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
3921 struct mlxsw_sp_fib_entry *fib_entry,
3922 enum mlxsw_reg_ralue_op op)
3924 char ralue_pl[MLXSW_REG_RALUE_LEN];
3925 enum mlxsw_reg_ralue_trap_action trap_action;
3926 u16 trap_id = 0;
3927 u32 adjacency_index = 0;
3928 u16 ecmp_size = 0;
3930 /* In case the nexthop group adjacency index is valid, use it
3931 * with provided ECMP size. Otherwise, setup trap and pass
3932 * traffic to kernel.
3934 if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
3935 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
3936 adjacency_index = fib_entry->nh_group->adj_index;
3937 ecmp_size = fib_entry->nh_group->ecmp_size;
3938 } else {
3939 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
3940 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
3943 mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3944 mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
3945 adjacency_index, ecmp_size);
3946 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3949 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
3950 struct mlxsw_sp_fib_entry *fib_entry,
3951 enum mlxsw_reg_ralue_op op)
3953 struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
3954 enum mlxsw_reg_ralue_trap_action trap_action;
3955 char ralue_pl[MLXSW_REG_RALUE_LEN];
3956 u16 trap_id = 0;
3957 u16 rif_index = 0;
3959 if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
3960 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
3961 rif_index = rif->rif_index;
3962 } else {
3963 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
3964 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
3967 mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3968 mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
3969 rif_index);
3970 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3973 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
3974 struct mlxsw_sp_fib_entry *fib_entry,
3975 enum mlxsw_reg_ralue_op op)
3977 char ralue_pl[MLXSW_REG_RALUE_LEN];
3979 mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
3980 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
3981 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
3984 static int
3985 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
3986 struct mlxsw_sp_fib_entry *fib_entry,
3987 enum mlxsw_reg_ralue_op op)
3989 struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
3990 const struct mlxsw_sp_ipip_ops *ipip_ops;
3992 if (WARN_ON(!ipip_entry))
3993 return -EINVAL;
3995 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3996 return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
3997 fib_entry->decap.tunnel_index);
4000 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4001 struct mlxsw_sp_fib_entry *fib_entry,
4002 enum mlxsw_reg_ralue_op op)
4004 switch (fib_entry->type) {
4005 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4006 return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
4007 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4008 return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
4009 case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
4010 return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
4011 case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4012 return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
4013 fib_entry, op);
4015 return -EINVAL;
4018 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4019 struct mlxsw_sp_fib_entry *fib_entry,
4020 enum mlxsw_reg_ralue_op op)
4022 int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
4024 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
4026 return err;
4029 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
4030 struct mlxsw_sp_fib_entry *fib_entry)
4032 return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4033 MLXSW_REG_RALUE_OP_WRITE_WRITE);
4036 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
4037 struct mlxsw_sp_fib_entry *fib_entry)
4039 return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4040 MLXSW_REG_RALUE_OP_WRITE_DELETE);
4043 static int
4044 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4045 const struct fib_entry_notifier_info *fen_info,
4046 struct mlxsw_sp_fib_entry *fib_entry)
4048 union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4049 struct net_device *dev = fen_info->fi->fib_dev;
4050 struct mlxsw_sp_ipip_entry *ipip_entry;
4051 struct fib_info *fi = fen_info->fi;
4053 switch (fen_info->type) {
4054 case RTN_LOCAL:
4055 ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4056 MLXSW_SP_L3_PROTO_IPV4, dip);
4057 if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4058 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4059 return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4060 fib_entry,
4061 ipip_entry);
4063 /* fall through */
4064 case RTN_BROADCAST:
4065 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4066 return 0;
4067 case RTN_UNREACHABLE: /* fall through */
4068 case RTN_BLACKHOLE: /* fall through */
4069 case RTN_PROHIBIT:
4070 /* Packets hitting these routes need to be trapped, but
4071 * can do so with a lower priority than packets directed
4072 * at the host, so use action type local instead of trap.
4074 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4075 return 0;
4076 case RTN_UNICAST:
4077 if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4078 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4079 else
4080 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4081 return 0;
4082 default:
4083 return -EINVAL;
4087 static struct mlxsw_sp_fib4_entry *
4088 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4089 struct mlxsw_sp_fib_node *fib_node,
4090 const struct fib_entry_notifier_info *fen_info)
4092 struct mlxsw_sp_fib4_entry *fib4_entry;
4093 struct mlxsw_sp_fib_entry *fib_entry;
4094 int err;
4096 fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4097 if (!fib4_entry)
4098 return ERR_PTR(-ENOMEM);
4099 fib_entry = &fib4_entry->common;
4101 err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4102 if (err)
4103 goto err_fib4_entry_type_set;
4105 err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4106 if (err)
4107 goto err_nexthop4_group_get;
4109 fib4_entry->prio = fen_info->fi->fib_priority;
4110 fib4_entry->tb_id = fen_info->tb_id;
4111 fib4_entry->type = fen_info->type;
4112 fib4_entry->tos = fen_info->tos;
4114 fib_entry->fib_node = fib_node;
4116 return fib4_entry;
4118 err_nexthop4_group_get:
4119 err_fib4_entry_type_set:
4120 kfree(fib4_entry);
4121 return ERR_PTR(err);
4124 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4125 struct mlxsw_sp_fib4_entry *fib4_entry)
4127 mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4128 kfree(fib4_entry);
4131 static struct mlxsw_sp_fib4_entry *
4132 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4133 const struct fib_entry_notifier_info *fen_info)
4135 struct mlxsw_sp_fib4_entry *fib4_entry;
4136 struct mlxsw_sp_fib_node *fib_node;
4137 struct mlxsw_sp_fib *fib;
4138 struct mlxsw_sp_vr *vr;
4140 vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4141 if (!vr)
4142 return NULL;
4143 fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4145 fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4146 sizeof(fen_info->dst),
4147 fen_info->dst_len);
4148 if (!fib_node)
4149 return NULL;
4151 list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4152 if (fib4_entry->tb_id == fen_info->tb_id &&
4153 fib4_entry->tos == fen_info->tos &&
4154 fib4_entry->type == fen_info->type &&
4155 mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4156 fen_info->fi) {
4157 return fib4_entry;
4161 return NULL;
4164 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4165 .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4166 .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4167 .key_len = sizeof(struct mlxsw_sp_fib_key),
4168 .automatic_shrinking = true,
4171 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4172 struct mlxsw_sp_fib_node *fib_node)
4174 return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4175 mlxsw_sp_fib_ht_params);
4178 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4179 struct mlxsw_sp_fib_node *fib_node)
4181 rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4182 mlxsw_sp_fib_ht_params);
4185 static struct mlxsw_sp_fib_node *
4186 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4187 size_t addr_len, unsigned char prefix_len)
4189 struct mlxsw_sp_fib_key key;
4191 memset(&key, 0, sizeof(key));
4192 memcpy(key.addr, addr, addr_len);
4193 key.prefix_len = prefix_len;
4194 return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4197 static struct mlxsw_sp_fib_node *
4198 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4199 size_t addr_len, unsigned char prefix_len)
4201 struct mlxsw_sp_fib_node *fib_node;
4203 fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4204 if (!fib_node)
4205 return NULL;
4207 INIT_LIST_HEAD(&fib_node->entry_list);
4208 list_add(&fib_node->list, &fib->node_list);
4209 memcpy(fib_node->key.addr, addr, addr_len);
4210 fib_node->key.prefix_len = prefix_len;
4212 return fib_node;
4215 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4217 list_del(&fib_node->list);
4218 WARN_ON(!list_empty(&fib_node->entry_list));
4219 kfree(fib_node);
4222 static bool
4223 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
4224 const struct mlxsw_sp_fib_entry *fib_entry)
4226 return list_first_entry(&fib_node->entry_list,
4227 struct mlxsw_sp_fib_entry, list) == fib_entry;
4230 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4231 struct mlxsw_sp_fib_node *fib_node)
4233 struct mlxsw_sp_prefix_usage req_prefix_usage;
4234 struct mlxsw_sp_fib *fib = fib_node->fib;
4235 struct mlxsw_sp_lpm_tree *lpm_tree;
4236 int err;
4238 lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
4239 if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4240 goto out;
4242 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4243 mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4244 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4245 fib->proto);
4246 if (IS_ERR(lpm_tree))
4247 return PTR_ERR(lpm_tree);
4249 err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4250 if (err)
4251 goto err_lpm_tree_replace;
4253 out:
4254 lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
4255 return 0;
4257 err_lpm_tree_replace:
4258 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4259 return err;
4262 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4263 struct mlxsw_sp_fib_node *fib_node)
4265 struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
4266 struct mlxsw_sp_prefix_usage req_prefix_usage;
4267 struct mlxsw_sp_fib *fib = fib_node->fib;
4268 int err;
4270 if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4271 return;
4272 /* Try to construct a new LPM tree from the current prefix usage
4273 * minus the unused one. If we fail, continue using the old one.
4275 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4276 mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
4277 fib_node->key.prefix_len);
4278 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4279 fib->proto);
4280 if (IS_ERR(lpm_tree))
4281 return;
4283 err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4284 if (err)
4285 goto err_lpm_tree_replace;
4287 return;
4289 err_lpm_tree_replace:
4290 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4293 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4294 struct mlxsw_sp_fib_node *fib_node,
4295 struct mlxsw_sp_fib *fib)
4297 int err;
4299 err = mlxsw_sp_fib_node_insert(fib, fib_node);
4300 if (err)
4301 return err;
4302 fib_node->fib = fib;
4304 err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
4305 if (err)
4306 goto err_fib_lpm_tree_link;
4308 return 0;
4310 err_fib_lpm_tree_link:
4311 fib_node->fib = NULL;
4312 mlxsw_sp_fib_node_remove(fib, fib_node);
4313 return err;
4316 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4317 struct mlxsw_sp_fib_node *fib_node)
4319 struct mlxsw_sp_fib *fib = fib_node->fib;
4321 mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
4322 fib_node->fib = NULL;
4323 mlxsw_sp_fib_node_remove(fib, fib_node);
4326 static struct mlxsw_sp_fib_node *
4327 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4328 size_t addr_len, unsigned char prefix_len,
4329 enum mlxsw_sp_l3proto proto)
4331 struct mlxsw_sp_fib_node *fib_node;
4332 struct mlxsw_sp_fib *fib;
4333 struct mlxsw_sp_vr *vr;
4334 int err;
4336 vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4337 if (IS_ERR(vr))
4338 return ERR_CAST(vr);
4339 fib = mlxsw_sp_vr_fib(vr, proto);
4341 fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4342 if (fib_node)
4343 return fib_node;
4345 fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4346 if (!fib_node) {
4347 err = -ENOMEM;
4348 goto err_fib_node_create;
4351 err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4352 if (err)
4353 goto err_fib_node_init;
4355 return fib_node;
4357 err_fib_node_init:
4358 mlxsw_sp_fib_node_destroy(fib_node);
4359 err_fib_node_create:
4360 mlxsw_sp_vr_put(mlxsw_sp, vr);
4361 return ERR_PTR(err);
4364 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4365 struct mlxsw_sp_fib_node *fib_node)
4367 struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4369 if (!list_empty(&fib_node->entry_list))
4370 return;
4371 mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4372 mlxsw_sp_fib_node_destroy(fib_node);
4373 mlxsw_sp_vr_put(mlxsw_sp, vr);
4376 static struct mlxsw_sp_fib4_entry *
4377 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4378 const struct mlxsw_sp_fib4_entry *new4_entry)
4380 struct mlxsw_sp_fib4_entry *fib4_entry;
4382 list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4383 if (fib4_entry->tb_id > new4_entry->tb_id)
4384 continue;
4385 if (fib4_entry->tb_id != new4_entry->tb_id)
4386 break;
4387 if (fib4_entry->tos > new4_entry->tos)
4388 continue;
4389 if (fib4_entry->prio >= new4_entry->prio ||
4390 fib4_entry->tos < new4_entry->tos)
4391 return fib4_entry;
4394 return NULL;
4397 static int
4398 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
4399 struct mlxsw_sp_fib4_entry *new4_entry)
4401 struct mlxsw_sp_fib_node *fib_node;
4403 if (WARN_ON(!fib4_entry))
4404 return -EINVAL;
4406 fib_node = fib4_entry->common.fib_node;
4407 list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
4408 common.list) {
4409 if (fib4_entry->tb_id != new4_entry->tb_id ||
4410 fib4_entry->tos != new4_entry->tos ||
4411 fib4_entry->prio != new4_entry->prio)
4412 break;
4415 list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
4416 return 0;
4419 static int
4420 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
4421 bool replace, bool append)
4423 struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
4424 struct mlxsw_sp_fib4_entry *fib4_entry;
4426 fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
4428 if (append)
4429 return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
4430 if (replace && WARN_ON(!fib4_entry))
4431 return -EINVAL;
4433 /* Insert new entry before replaced one, so that we can later
4434 * remove the second.
4436 if (fib4_entry) {
4437 list_add_tail(&new4_entry->common.list,
4438 &fib4_entry->common.list);
4439 } else {
4440 struct mlxsw_sp_fib4_entry *last;
4442 list_for_each_entry(last, &fib_node->entry_list, common.list) {
4443 if (new4_entry->tb_id > last->tb_id)
4444 break;
4445 fib4_entry = last;
4448 if (fib4_entry)
4449 list_add(&new4_entry->common.list,
4450 &fib4_entry->common.list);
4451 else
4452 list_add(&new4_entry->common.list,
4453 &fib_node->entry_list);
4456 return 0;
4459 static void
4460 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
4462 list_del(&fib4_entry->common.list);
4465 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
4466 struct mlxsw_sp_fib_entry *fib_entry)
4468 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4470 if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4471 return 0;
4473 /* To prevent packet loss, overwrite the previously offloaded
4474 * entry.
4476 if (!list_is_singular(&fib_node->entry_list)) {
4477 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4478 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4480 mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
4483 return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4486 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
4487 struct mlxsw_sp_fib_entry *fib_entry)
4489 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4491 if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4492 return;
4494 /* Promote the next entry by overwriting the deleted entry */
4495 if (!list_is_singular(&fib_node->entry_list)) {
4496 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4497 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4499 mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4500 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4501 return;
4504 mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4507 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4508 struct mlxsw_sp_fib4_entry *fib4_entry,
4509 bool replace, bool append)
4511 int err;
4513 err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4514 if (err)
4515 return err;
4517 err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4518 if (err)
4519 goto err_fib_node_entry_add;
4521 return 0;
4523 err_fib_node_entry_add:
4524 mlxsw_sp_fib4_node_list_remove(fib4_entry);
4525 return err;
4528 static void
4529 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4530 struct mlxsw_sp_fib4_entry *fib4_entry)
4532 mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4533 mlxsw_sp_fib4_node_list_remove(fib4_entry);
4535 if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4536 mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4539 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4540 struct mlxsw_sp_fib4_entry *fib4_entry,
4541 bool replace)
4543 struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4544 struct mlxsw_sp_fib4_entry *replaced;
4546 if (!replace)
4547 return;
4549 /* We inserted the new entry before replaced one */
4550 replaced = list_next_entry(fib4_entry, common.list);
4552 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4553 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4554 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4557 static int
4558 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4559 const struct fib_entry_notifier_info *fen_info,
4560 bool replace, bool append)
4562 struct mlxsw_sp_fib4_entry *fib4_entry;
4563 struct mlxsw_sp_fib_node *fib_node;
4564 int err;
4566 if (mlxsw_sp->router->aborted)
4567 return 0;
4569 fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4570 &fen_info->dst, sizeof(fen_info->dst),
4571 fen_info->dst_len,
4572 MLXSW_SP_L3_PROTO_IPV4);
4573 if (IS_ERR(fib_node)) {
4574 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4575 return PTR_ERR(fib_node);
4578 fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4579 if (IS_ERR(fib4_entry)) {
4580 dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4581 err = PTR_ERR(fib4_entry);
4582 goto err_fib4_entry_create;
4585 err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4586 append);
4587 if (err) {
4588 dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4589 goto err_fib4_node_entry_link;
4592 mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4594 return 0;
4596 err_fib4_node_entry_link:
4597 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4598 err_fib4_entry_create:
4599 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4600 return err;
4603 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4604 struct fib_entry_notifier_info *fen_info)
4606 struct mlxsw_sp_fib4_entry *fib4_entry;
4607 struct mlxsw_sp_fib_node *fib_node;
4609 if (mlxsw_sp->router->aborted)
4610 return;
4612 fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4613 if (WARN_ON(!fib4_entry))
4614 return;
4615 fib_node = fib4_entry->common.fib_node;
4617 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4618 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4619 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4622 static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt)
4624 /* Packets with link-local destination IP arriving to the router
4625 * are trapped to the CPU, so no need to program specific routes
4626 * for them.
4628 if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL)
4629 return true;
4631 /* Multicast routes aren't supported, so ignore them. Neighbour
4632 * Discovery packets are specifically trapped.
4634 if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_MULTICAST)
4635 return true;
4637 /* Cloned routes are irrelevant in the forwarding path. */
4638 if (rt->rt6i_flags & RTF_CACHE)
4639 return true;
4641 return false;
4644 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt)
4646 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4648 mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4649 if (!mlxsw_sp_rt6)
4650 return ERR_PTR(-ENOMEM);
4652 /* In case of route replace, replaced route is deleted with
4653 * no notification. Take reference to prevent accessing freed
4654 * memory.
4656 mlxsw_sp_rt6->rt = rt;
4657 rt6_hold(rt);
4659 return mlxsw_sp_rt6;
4662 #if IS_ENABLED(CONFIG_IPV6)
4663 static void mlxsw_sp_rt6_release(struct rt6_info *rt)
4665 rt6_release(rt);
4667 #else
4668 static void mlxsw_sp_rt6_release(struct rt6_info *rt)
4671 #endif
4673 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4675 mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4676 kfree(mlxsw_sp_rt6);
4679 static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt)
4681 /* RTF_CACHE routes are ignored */
4682 return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
4685 static struct rt6_info *
4686 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
4688 return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4689 list)->rt;
4692 static struct mlxsw_sp_fib6_entry *
4693 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4694 const struct rt6_info *nrt, bool replace)
4696 struct mlxsw_sp_fib6_entry *fib6_entry;
4698 if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
4699 return NULL;
4701 list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4702 struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4704 /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
4705 * virtual router.
4707 if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
4708 continue;
4709 if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
4710 break;
4711 if (rt->rt6i_metric < nrt->rt6i_metric)
4712 continue;
4713 if (rt->rt6i_metric == nrt->rt6i_metric &&
4714 mlxsw_sp_fib6_rt_can_mp(rt))
4715 return fib6_entry;
4716 if (rt->rt6i_metric > nrt->rt6i_metric)
4717 break;
4720 return NULL;
4723 static struct mlxsw_sp_rt6 *
4724 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
4725 const struct rt6_info *rt)
4727 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4729 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4730 if (mlxsw_sp_rt6->rt == rt)
4731 return mlxsw_sp_rt6;
4734 return NULL;
4737 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4738 const struct rt6_info *rt,
4739 enum mlxsw_sp_ipip_type *ret)
4741 return rt->dst.dev &&
4742 mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->dst.dev, ret);
4745 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
4746 struct mlxsw_sp_nexthop_group *nh_grp,
4747 struct mlxsw_sp_nexthop *nh,
4748 const struct rt6_info *rt)
4750 const struct mlxsw_sp_ipip_ops *ipip_ops;
4751 struct mlxsw_sp_ipip_entry *ipip_entry;
4752 struct net_device *dev = rt->dst.dev;
4753 struct mlxsw_sp_rif *rif;
4754 int err;
4756 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4757 if (ipip_entry) {
4758 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4759 if (ipip_ops->can_offload(mlxsw_sp, dev,
4760 MLXSW_SP_L3_PROTO_IPV6)) {
4761 nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4762 mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4763 return 0;
4767 nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4768 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4769 if (!rif)
4770 return 0;
4771 mlxsw_sp_nexthop_rif_init(nh, rif);
4773 err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4774 if (err)
4775 goto err_nexthop_neigh_init;
4777 return 0;
4779 err_nexthop_neigh_init:
4780 mlxsw_sp_nexthop_rif_fini(nh);
4781 return err;
4784 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
4785 struct mlxsw_sp_nexthop *nh)
4787 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4790 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
4791 struct mlxsw_sp_nexthop_group *nh_grp,
4792 struct mlxsw_sp_nexthop *nh,
4793 const struct rt6_info *rt)
4795 struct net_device *dev = rt->dst.dev;
4797 nh->nh_grp = nh_grp;
4798 nh->nh_weight = rt->rt6i_nh_weight;
4799 memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr));
4800 mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4802 list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4804 if (!dev)
4805 return 0;
4806 nh->ifindex = dev->ifindex;
4808 return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
4811 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
4812 struct mlxsw_sp_nexthop *nh)
4814 mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
4815 list_del(&nh->router_list_node);
4816 mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4819 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
4820 const struct rt6_info *rt)
4822 return rt->rt6i_flags & RTF_GATEWAY ||
4823 mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
4826 static struct mlxsw_sp_nexthop_group *
4827 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
4828 struct mlxsw_sp_fib6_entry *fib6_entry)
4830 struct mlxsw_sp_nexthop_group *nh_grp;
4831 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4832 struct mlxsw_sp_nexthop *nh;
4833 size_t alloc_size;
4834 int i = 0;
4835 int err;
4837 alloc_size = sizeof(*nh_grp) +
4838 fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
4839 nh_grp = kzalloc(alloc_size, GFP_KERNEL);
4840 if (!nh_grp)
4841 return ERR_PTR(-ENOMEM);
4842 INIT_LIST_HEAD(&nh_grp->fib_list);
4843 #if IS_ENABLED(CONFIG_IPV6)
4844 nh_grp->neigh_tbl = &nd_tbl;
4845 #endif
4846 mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
4847 struct mlxsw_sp_rt6, list);
4848 nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
4849 nh_grp->count = fib6_entry->nrt6;
4850 for (i = 0; i < nh_grp->count; i++) {
4851 struct rt6_info *rt = mlxsw_sp_rt6->rt;
4853 nh = &nh_grp->nexthops[i];
4854 err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
4855 if (err)
4856 goto err_nexthop6_init;
4857 mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
4860 err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
4861 if (err)
4862 goto err_nexthop_group_insert;
4864 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4865 return nh_grp;
4867 err_nexthop_group_insert:
4868 err_nexthop6_init:
4869 for (i--; i >= 0; i--) {
4870 nh = &nh_grp->nexthops[i];
4871 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4873 kfree(nh_grp);
4874 return ERR_PTR(err);
4877 static void
4878 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
4879 struct mlxsw_sp_nexthop_group *nh_grp)
4881 struct mlxsw_sp_nexthop *nh;
4882 int i = nh_grp->count;
4884 mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
4885 for (i--; i >= 0; i--) {
4886 nh = &nh_grp->nexthops[i];
4887 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4889 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4890 WARN_ON(nh_grp->adj_index_valid);
4891 kfree(nh_grp);
4894 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
4895 struct mlxsw_sp_fib6_entry *fib6_entry)
4897 struct mlxsw_sp_nexthop_group *nh_grp;
4899 nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
4900 if (!nh_grp) {
4901 nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
4902 if (IS_ERR(nh_grp))
4903 return PTR_ERR(nh_grp);
4906 list_add_tail(&fib6_entry->common.nexthop_group_node,
4907 &nh_grp->fib_list);
4908 fib6_entry->common.nh_group = nh_grp;
4910 return 0;
4913 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
4914 struct mlxsw_sp_fib_entry *fib_entry)
4916 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4918 list_del(&fib_entry->nexthop_group_node);
4919 if (!list_empty(&nh_grp->fib_list))
4920 return;
4921 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
4924 static int
4925 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
4926 struct mlxsw_sp_fib6_entry *fib6_entry)
4928 struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
4929 int err;
4931 fib6_entry->common.nh_group = NULL;
4932 list_del(&fib6_entry->common.nexthop_group_node);
4934 err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
4935 if (err)
4936 goto err_nexthop6_group_get;
4938 /* In case this entry is offloaded, then the adjacency index
4939 * currently associated with it in the device's table is that
4940 * of the old group. Start using the new one instead.
4942 err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
4943 if (err)
4944 goto err_fib_node_entry_add;
4946 if (list_empty(&old_nh_grp->fib_list))
4947 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
4949 return 0;
4951 err_fib_node_entry_add:
4952 mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
4953 err_nexthop6_group_get:
4954 list_add_tail(&fib6_entry->common.nexthop_group_node,
4955 &old_nh_grp->fib_list);
4956 fib6_entry->common.nh_group = old_nh_grp;
4957 return err;
4960 static int
4961 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
4962 struct mlxsw_sp_fib6_entry *fib6_entry,
4963 struct rt6_info *rt)
4965 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4966 int err;
4968 mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
4969 if (IS_ERR(mlxsw_sp_rt6))
4970 return PTR_ERR(mlxsw_sp_rt6);
4972 list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
4973 fib6_entry->nrt6++;
4975 err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
4976 if (err)
4977 goto err_nexthop6_group_update;
4979 return 0;
4981 err_nexthop6_group_update:
4982 fib6_entry->nrt6--;
4983 list_del(&mlxsw_sp_rt6->list);
4984 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
4985 return err;
4988 static void
4989 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
4990 struct mlxsw_sp_fib6_entry *fib6_entry,
4991 struct rt6_info *rt)
4993 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4995 mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
4996 if (WARN_ON(!mlxsw_sp_rt6))
4997 return;
4999 fib6_entry->nrt6--;
5000 list_del(&mlxsw_sp_rt6->list);
5001 mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5002 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5005 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5006 struct mlxsw_sp_fib_entry *fib_entry,
5007 const struct rt6_info *rt)
5009 /* Packets hitting RTF_REJECT routes need to be discarded by the
5010 * stack. We can rely on their destination device not having a
5011 * RIF (it's the loopback device) and can thus use action type
5012 * local, which will cause them to be trapped with a lower
5013 * priority than packets that need to be locally received.
5015 if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
5016 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5017 else if (rt->rt6i_flags & RTF_REJECT)
5018 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5019 else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
5020 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5021 else
5022 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5025 static void
5026 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
5028 struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
5030 list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
5031 list) {
5032 fib6_entry->nrt6--;
5033 list_del(&mlxsw_sp_rt6->list);
5034 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5038 static struct mlxsw_sp_fib6_entry *
5039 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5040 struct mlxsw_sp_fib_node *fib_node,
5041 struct rt6_info *rt)
5043 struct mlxsw_sp_fib6_entry *fib6_entry;
5044 struct mlxsw_sp_fib_entry *fib_entry;
5045 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5046 int err;
5048 fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5049 if (!fib6_entry)
5050 return ERR_PTR(-ENOMEM);
5051 fib_entry = &fib6_entry->common;
5053 mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5054 if (IS_ERR(mlxsw_sp_rt6)) {
5055 err = PTR_ERR(mlxsw_sp_rt6);
5056 goto err_rt6_create;
5059 mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
5061 INIT_LIST_HEAD(&fib6_entry->rt6_list);
5062 list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5063 fib6_entry->nrt6 = 1;
5064 err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5065 if (err)
5066 goto err_nexthop6_group_get;
5068 fib_entry->fib_node = fib_node;
5070 return fib6_entry;
5072 err_nexthop6_group_get:
5073 list_del(&mlxsw_sp_rt6->list);
5074 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5075 err_rt6_create:
5076 kfree(fib6_entry);
5077 return ERR_PTR(err);
5080 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5081 struct mlxsw_sp_fib6_entry *fib6_entry)
5083 mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5084 mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5085 WARN_ON(fib6_entry->nrt6);
5086 kfree(fib6_entry);
5089 static struct mlxsw_sp_fib6_entry *
5090 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5091 const struct rt6_info *nrt, bool replace)
5093 struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
5095 list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5096 struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5098 if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id)
5099 continue;
5100 if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id)
5101 break;
5102 if (replace && rt->rt6i_metric == nrt->rt6i_metric) {
5103 if (mlxsw_sp_fib6_rt_can_mp(rt) ==
5104 mlxsw_sp_fib6_rt_can_mp(nrt))
5105 return fib6_entry;
5106 if (mlxsw_sp_fib6_rt_can_mp(nrt))
5107 fallback = fallback ?: fib6_entry;
5109 if (rt->rt6i_metric > nrt->rt6i_metric)
5110 return fallback ?: fib6_entry;
5113 return fallback;
5116 static int
5117 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
5118 bool replace)
5120 struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
5121 struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
5122 struct mlxsw_sp_fib6_entry *fib6_entry;
5124 fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
5126 if (replace && WARN_ON(!fib6_entry))
5127 return -EINVAL;
5129 if (fib6_entry) {
5130 list_add_tail(&new6_entry->common.list,
5131 &fib6_entry->common.list);
5132 } else {
5133 struct mlxsw_sp_fib6_entry *last;
5135 list_for_each_entry(last, &fib_node->entry_list, common.list) {
5136 struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last);
5138 if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id)
5139 break;
5140 fib6_entry = last;
5143 if (fib6_entry)
5144 list_add(&new6_entry->common.list,
5145 &fib6_entry->common.list);
5146 else
5147 list_add(&new6_entry->common.list,
5148 &fib_node->entry_list);
5151 return 0;
5154 static void
5155 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
5157 list_del(&fib6_entry->common.list);
5160 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5161 struct mlxsw_sp_fib6_entry *fib6_entry,
5162 bool replace)
5164 int err;
5166 err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
5167 if (err)
5168 return err;
5170 err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5171 if (err)
5172 goto err_fib_node_entry_add;
5174 return 0;
5176 err_fib_node_entry_add:
5177 mlxsw_sp_fib6_node_list_remove(fib6_entry);
5178 return err;
5181 static void
5182 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5183 struct mlxsw_sp_fib6_entry *fib6_entry)
5185 mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
5186 mlxsw_sp_fib6_node_list_remove(fib6_entry);
5189 static struct mlxsw_sp_fib6_entry *
5190 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5191 const struct rt6_info *rt)
5193 struct mlxsw_sp_fib6_entry *fib6_entry;
5194 struct mlxsw_sp_fib_node *fib_node;
5195 struct mlxsw_sp_fib *fib;
5196 struct mlxsw_sp_vr *vr;
5198 vr = mlxsw_sp_vr_find(mlxsw_sp, rt->rt6i_table->tb6_id);
5199 if (!vr)
5200 return NULL;
5201 fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5203 fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->rt6i_dst.addr,
5204 sizeof(rt->rt6i_dst.addr),
5205 rt->rt6i_dst.plen);
5206 if (!fib_node)
5207 return NULL;
5209 list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5210 struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5212 if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id &&
5213 rt->rt6i_metric == iter_rt->rt6i_metric &&
5214 mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5215 return fib6_entry;
5218 return NULL;
5221 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
5222 struct mlxsw_sp_fib6_entry *fib6_entry,
5223 bool replace)
5225 struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5226 struct mlxsw_sp_fib6_entry *replaced;
5228 if (!replace)
5229 return;
5231 replaced = list_next_entry(fib6_entry, common.list);
5233 mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
5234 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
5235 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5238 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
5239 struct rt6_info *rt, bool replace)
5241 struct mlxsw_sp_fib6_entry *fib6_entry;
5242 struct mlxsw_sp_fib_node *fib_node;
5243 int err;
5245 if (mlxsw_sp->router->aborted)
5246 return 0;
5248 if (rt->rt6i_src.plen)
5249 return -EINVAL;
5251 if (mlxsw_sp_fib6_rt_should_ignore(rt))
5252 return 0;
5254 fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->rt6i_table->tb6_id,
5255 &rt->rt6i_dst.addr,
5256 sizeof(rt->rt6i_dst.addr),
5257 rt->rt6i_dst.plen,
5258 MLXSW_SP_L3_PROTO_IPV6);
5259 if (IS_ERR(fib_node))
5260 return PTR_ERR(fib_node);
5262 /* Before creating a new entry, try to append route to an existing
5263 * multipath entry.
5265 fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
5266 if (fib6_entry) {
5267 err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
5268 if (err)
5269 goto err_fib6_entry_nexthop_add;
5270 return 0;
5273 fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
5274 if (IS_ERR(fib6_entry)) {
5275 err = PTR_ERR(fib6_entry);
5276 goto err_fib6_entry_create;
5279 err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
5280 if (err)
5281 goto err_fib6_node_entry_link;
5283 mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
5285 return 0;
5287 err_fib6_node_entry_link:
5288 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5289 err_fib6_entry_create:
5290 err_fib6_entry_nexthop_add:
5291 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5292 return err;
5295 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5296 struct rt6_info *rt)
5298 struct mlxsw_sp_fib6_entry *fib6_entry;
5299 struct mlxsw_sp_fib_node *fib_node;
5301 if (mlxsw_sp->router->aborted)
5302 return;
5304 if (mlxsw_sp_fib6_rt_should_ignore(rt))
5305 return;
5307 fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5308 if (WARN_ON(!fib6_entry))
5309 return;
5311 /* If route is part of a multipath entry, but not the last one
5312 * removed, then only reduce its nexthop group.
5314 if (!list_is_singular(&fib6_entry->rt6_list)) {
5315 mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
5316 return;
5319 fib_node = fib6_entry->common.fib_node;
5321 mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5322 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5323 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5326 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5327 enum mlxsw_reg_ralxx_protocol proto,
5328 u8 tree_id)
5330 char ralta_pl[MLXSW_REG_RALTA_LEN];
5331 char ralst_pl[MLXSW_REG_RALST_LEN];
5332 int i, err;
5334 mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5335 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5336 if (err)
5337 return err;
5339 mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5340 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5341 if (err)
5342 return err;
5344 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5345 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5346 char raltb_pl[MLXSW_REG_RALTB_LEN];
5347 char ralue_pl[MLXSW_REG_RALUE_LEN];
5349 mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5350 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5351 raltb_pl);
5352 if (err)
5353 return err;
5355 mlxsw_reg_ralue_pack(ralue_pl, proto,
5356 MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5357 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5358 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5359 ralue_pl);
5360 if (err)
5361 return err;
5364 return 0;
5367 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5368 struct mfc_entry_notifier_info *men_info,
5369 bool replace)
5371 struct mlxsw_sp_vr *vr;
5373 if (mlxsw_sp->router->aborted)
5374 return 0;
5376 vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5377 if (IS_ERR(vr))
5378 return PTR_ERR(vr);
5380 return mlxsw_sp_mr_route4_add(vr->mr4_table, men_info->mfc, replace);
5383 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5384 struct mfc_entry_notifier_info *men_info)
5386 struct mlxsw_sp_vr *vr;
5388 if (mlxsw_sp->router->aborted)
5389 return;
5391 vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5392 if (WARN_ON(!vr))
5393 return;
5395 mlxsw_sp_mr_route4_del(vr->mr4_table, men_info->mfc);
5396 mlxsw_sp_vr_put(mlxsw_sp, vr);
5399 static int
5400 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5401 struct vif_entry_notifier_info *ven_info)
5403 struct mlxsw_sp_rif *rif;
5404 struct mlxsw_sp_vr *vr;
5406 if (mlxsw_sp->router->aborted)
5407 return 0;
5409 vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5410 if (IS_ERR(vr))
5411 return PTR_ERR(vr);
5413 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5414 return mlxsw_sp_mr_vif_add(vr->mr4_table, ven_info->dev,
5415 ven_info->vif_index,
5416 ven_info->vif_flags, rif);
5419 static void
5420 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5421 struct vif_entry_notifier_info *ven_info)
5423 struct mlxsw_sp_vr *vr;
5425 if (mlxsw_sp->router->aborted)
5426 return;
5428 vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5429 if (WARN_ON(!vr))
5430 return;
5432 mlxsw_sp_mr_vif_del(vr->mr4_table, ven_info->vif_index);
5433 mlxsw_sp_vr_put(mlxsw_sp, vr);
5436 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5438 enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5439 int err;
5441 err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5442 MLXSW_SP_LPM_TREE_MIN);
5443 if (err)
5444 return err;
5446 /* The multicast router code does not need an abort trap as by default,
5447 * packets that don't match any routes are trapped to the CPU.
5450 proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5451 return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5452 MLXSW_SP_LPM_TREE_MIN + 1);
5455 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5456 struct mlxsw_sp_fib_node *fib_node)
5458 struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
5460 list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
5461 common.list) {
5462 bool do_break = &tmp->common.list == &fib_node->entry_list;
5464 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
5465 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5466 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5467 /* Break when entry list is empty and node was freed.
5468 * Otherwise, we'll access freed memory in the next
5469 * iteration.
5471 if (do_break)
5472 break;
5476 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5477 struct mlxsw_sp_fib_node *fib_node)
5479 struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
5481 list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
5482 common.list) {
5483 bool do_break = &tmp->common.list == &fib_node->entry_list;
5485 mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5486 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5487 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5488 if (do_break)
5489 break;
5493 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5494 struct mlxsw_sp_fib_node *fib_node)
5496 switch (fib_node->fib->proto) {
5497 case MLXSW_SP_L3_PROTO_IPV4:
5498 mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5499 break;
5500 case MLXSW_SP_L3_PROTO_IPV6:
5501 mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5502 break;
5506 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5507 struct mlxsw_sp_vr *vr,
5508 enum mlxsw_sp_l3proto proto)
5510 struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5511 struct mlxsw_sp_fib_node *fib_node, *tmp;
5513 list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5514 bool do_break = &tmp->list == &fib->node_list;
5516 mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5517 if (do_break)
5518 break;
5522 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5524 int i;
5526 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5527 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5529 if (!mlxsw_sp_vr_is_used(vr))
5530 continue;
5532 mlxsw_sp_mr_table_flush(vr->mr4_table);
5533 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5535 /* If virtual router was only used for IPv4, then it's no
5536 * longer used.
5538 if (!mlxsw_sp_vr_is_used(vr))
5539 continue;
5540 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5544 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5546 int err;
5548 if (mlxsw_sp->router->aborted)
5549 return;
5550 dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5551 mlxsw_sp_router_fib_flush(mlxsw_sp);
5552 mlxsw_sp->router->aborted = true;
5553 err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5554 if (err)
5555 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5558 struct mlxsw_sp_fib_event_work {
5559 struct work_struct work;
5560 union {
5561 struct fib6_entry_notifier_info fen6_info;
5562 struct fib_entry_notifier_info fen_info;
5563 struct fib_rule_notifier_info fr_info;
5564 struct fib_nh_notifier_info fnh_info;
5565 struct mfc_entry_notifier_info men_info;
5566 struct vif_entry_notifier_info ven_info;
5568 struct mlxsw_sp *mlxsw_sp;
5569 unsigned long event;
5572 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5574 struct mlxsw_sp_fib_event_work *fib_work =
5575 container_of(work, struct mlxsw_sp_fib_event_work, work);
5576 struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5577 bool replace, append;
5578 int err;
5580 /* Protect internal structures from changes */
5581 rtnl_lock();
5582 switch (fib_work->event) {
5583 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5584 case FIB_EVENT_ENTRY_APPEND: /* fall through */
5585 case FIB_EVENT_ENTRY_ADD:
5586 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5587 append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5588 err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5589 replace, append);
5590 if (err)
5591 mlxsw_sp_router_fib_abort(mlxsw_sp);
5592 fib_info_put(fib_work->fen_info.fi);
5593 break;
5594 case FIB_EVENT_ENTRY_DEL:
5595 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5596 fib_info_put(fib_work->fen_info.fi);
5597 break;
5598 case FIB_EVENT_RULE_ADD:
5599 /* if we get here, a rule was added that we do not support.
5600 * just do the fib_abort
5602 mlxsw_sp_router_fib_abort(mlxsw_sp);
5603 break;
5604 case FIB_EVENT_NH_ADD: /* fall through */
5605 case FIB_EVENT_NH_DEL:
5606 mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5607 fib_work->fnh_info.fib_nh);
5608 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5609 break;
5611 rtnl_unlock();
5612 kfree(fib_work);
5615 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5617 struct mlxsw_sp_fib_event_work *fib_work =
5618 container_of(work, struct mlxsw_sp_fib_event_work, work);
5619 struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5620 bool replace;
5621 int err;
5623 rtnl_lock();
5624 switch (fib_work->event) {
5625 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5626 case FIB_EVENT_ENTRY_ADD:
5627 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5628 err = mlxsw_sp_router_fib6_add(mlxsw_sp,
5629 fib_work->fen6_info.rt, replace);
5630 if (err)
5631 mlxsw_sp_router_fib_abort(mlxsw_sp);
5632 mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5633 break;
5634 case FIB_EVENT_ENTRY_DEL:
5635 mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
5636 mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5637 break;
5638 case FIB_EVENT_RULE_ADD:
5639 /* if we get here, a rule was added that we do not support.
5640 * just do the fib_abort
5642 mlxsw_sp_router_fib_abort(mlxsw_sp);
5643 break;
5645 rtnl_unlock();
5646 kfree(fib_work);
5649 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5651 struct mlxsw_sp_fib_event_work *fib_work =
5652 container_of(work, struct mlxsw_sp_fib_event_work, work);
5653 struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5654 bool replace;
5655 int err;
5657 rtnl_lock();
5658 switch (fib_work->event) {
5659 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5660 case FIB_EVENT_ENTRY_ADD:
5661 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5663 err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
5664 replace);
5665 if (err)
5666 mlxsw_sp_router_fib_abort(mlxsw_sp);
5667 ipmr_cache_put(fib_work->men_info.mfc);
5668 break;
5669 case FIB_EVENT_ENTRY_DEL:
5670 mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
5671 ipmr_cache_put(fib_work->men_info.mfc);
5672 break;
5673 case FIB_EVENT_VIF_ADD:
5674 err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
5675 &fib_work->ven_info);
5676 if (err)
5677 mlxsw_sp_router_fib_abort(mlxsw_sp);
5678 dev_put(fib_work->ven_info.dev);
5679 break;
5680 case FIB_EVENT_VIF_DEL:
5681 mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
5682 &fib_work->ven_info);
5683 dev_put(fib_work->ven_info.dev);
5684 break;
5685 case FIB_EVENT_RULE_ADD:
5686 /* if we get here, a rule was added that we do not support.
5687 * just do the fib_abort
5689 mlxsw_sp_router_fib_abort(mlxsw_sp);
5690 break;
5692 rtnl_unlock();
5693 kfree(fib_work);
5696 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
5697 struct fib_notifier_info *info)
5699 struct fib_entry_notifier_info *fen_info;
5700 struct fib_nh_notifier_info *fnh_info;
5702 switch (fib_work->event) {
5703 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5704 case FIB_EVENT_ENTRY_APPEND: /* fall through */
5705 case FIB_EVENT_ENTRY_ADD: /* fall through */
5706 case FIB_EVENT_ENTRY_DEL:
5707 fen_info = container_of(info, struct fib_entry_notifier_info,
5708 info);
5709 fib_work->fen_info = *fen_info;
5710 /* Take reference on fib_info to prevent it from being
5711 * freed while work is queued. Release it afterwards.
5713 fib_info_hold(fib_work->fen_info.fi);
5714 break;
5715 case FIB_EVENT_NH_ADD: /* fall through */
5716 case FIB_EVENT_NH_DEL:
5717 fnh_info = container_of(info, struct fib_nh_notifier_info,
5718 info);
5719 fib_work->fnh_info = *fnh_info;
5720 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
5721 break;
5725 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
5726 struct fib_notifier_info *info)
5728 struct fib6_entry_notifier_info *fen6_info;
5730 switch (fib_work->event) {
5731 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5732 case FIB_EVENT_ENTRY_ADD: /* fall through */
5733 case FIB_EVENT_ENTRY_DEL:
5734 fen6_info = container_of(info, struct fib6_entry_notifier_info,
5735 info);
5736 fib_work->fen6_info = *fen6_info;
5737 rt6_hold(fib_work->fen6_info.rt);
5738 break;
5742 static void
5743 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
5744 struct fib_notifier_info *info)
5746 switch (fib_work->event) {
5747 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5748 case FIB_EVENT_ENTRY_ADD: /* fall through */
5749 case FIB_EVENT_ENTRY_DEL:
5750 memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
5751 ipmr_cache_hold(fib_work->men_info.mfc);
5752 break;
5753 case FIB_EVENT_VIF_ADD: /* fall through */
5754 case FIB_EVENT_VIF_DEL:
5755 memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
5756 dev_hold(fib_work->ven_info.dev);
5757 break;
5761 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
5762 struct fib_notifier_info *info,
5763 struct mlxsw_sp *mlxsw_sp)
5765 struct netlink_ext_ack *extack = info->extack;
5766 struct fib_rule_notifier_info *fr_info;
5767 struct fib_rule *rule;
5768 int err = 0;
5770 /* nothing to do at the moment */
5771 if (event == FIB_EVENT_RULE_DEL)
5772 return 0;
5774 if (mlxsw_sp->router->aborted)
5775 return 0;
5777 fr_info = container_of(info, struct fib_rule_notifier_info, info);
5778 rule = fr_info->rule;
5780 switch (info->family) {
5781 case AF_INET:
5782 if (!fib4_rule_default(rule) && !rule->l3mdev)
5783 err = -1;
5784 break;
5785 case AF_INET6:
5786 if (!fib6_rule_default(rule) && !rule->l3mdev)
5787 err = -1;
5788 break;
5789 case RTNL_FAMILY_IPMR:
5790 if (!ipmr_rule_default(rule) && !rule->l3mdev)
5791 err = -1;
5792 break;
5795 if (err < 0)
5796 NL_SET_ERR_MSG(extack, "spectrum: FIB rules not supported. Aborting offload");
5798 return err;
5801 /* Called with rcu_read_lock() */
5802 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
5803 unsigned long event, void *ptr)
5805 struct mlxsw_sp_fib_event_work *fib_work;
5806 struct fib_notifier_info *info = ptr;
5807 struct mlxsw_sp_router *router;
5808 int err;
5810 if (!net_eq(info->net, &init_net) ||
5811 (info->family != AF_INET && info->family != AF_INET6 &&
5812 info->family != RTNL_FAMILY_IPMR))
5813 return NOTIFY_DONE;
5815 router = container_of(nb, struct mlxsw_sp_router, fib_nb);
5817 switch (event) {
5818 case FIB_EVENT_RULE_ADD: /* fall through */
5819 case FIB_EVENT_RULE_DEL:
5820 err = mlxsw_sp_router_fib_rule_event(event, info,
5821 router->mlxsw_sp);
5822 if (!err)
5823 return NOTIFY_DONE;
5826 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
5827 if (WARN_ON(!fib_work))
5828 return NOTIFY_BAD;
5830 fib_work->mlxsw_sp = router->mlxsw_sp;
5831 fib_work->event = event;
5833 switch (info->family) {
5834 case AF_INET:
5835 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
5836 mlxsw_sp_router_fib4_event(fib_work, info);
5837 break;
5838 case AF_INET6:
5839 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
5840 mlxsw_sp_router_fib6_event(fib_work, info);
5841 break;
5842 case RTNL_FAMILY_IPMR:
5843 INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
5844 mlxsw_sp_router_fibmr_event(fib_work, info);
5845 break;
5848 mlxsw_core_schedule_work(&fib_work->work);
5850 return NOTIFY_DONE;
5853 static struct mlxsw_sp_rif *
5854 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
5855 const struct net_device *dev)
5857 int i;
5859 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
5860 if (mlxsw_sp->router->rifs[i] &&
5861 mlxsw_sp->router->rifs[i]->dev == dev)
5862 return mlxsw_sp->router->rifs[i];
5864 return NULL;
5867 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
5869 char ritr_pl[MLXSW_REG_RITR_LEN];
5870 int err;
5872 mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
5873 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5874 if (WARN_ON_ONCE(err))
5875 return err;
5877 mlxsw_reg_ritr_enable_set(ritr_pl, false);
5878 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
5881 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
5882 struct mlxsw_sp_rif *rif)
5884 mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
5885 mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
5886 mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
5889 static bool
5890 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
5891 unsigned long event)
5893 struct inet6_dev *inet6_dev;
5894 bool addr_list_empty = true;
5895 struct in_device *idev;
5897 switch (event) {
5898 case NETDEV_UP:
5899 return rif == NULL;
5900 case NETDEV_DOWN:
5901 idev = __in_dev_get_rtnl(dev);
5902 if (idev && idev->ifa_list)
5903 addr_list_empty = false;
5905 inet6_dev = __in6_dev_get(dev);
5906 if (addr_list_empty && inet6_dev &&
5907 !list_empty(&inet6_dev->addr_list))
5908 addr_list_empty = false;
5910 if (rif && addr_list_empty &&
5911 !netif_is_l3_slave(rif->dev))
5912 return true;
5913 /* It is possible we already removed the RIF ourselves
5914 * if it was assigned to a netdev that is now a bridge
5915 * or LAG slave.
5917 return false;
5920 return false;
5923 static enum mlxsw_sp_rif_type
5924 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
5925 const struct net_device *dev)
5927 enum mlxsw_sp_fid_type type;
5929 if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
5930 return MLXSW_SP_RIF_TYPE_IPIP_LB;
5932 /* Otherwise RIF type is derived from the type of the underlying FID. */
5933 if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
5934 type = MLXSW_SP_FID_TYPE_8021Q;
5935 else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
5936 type = MLXSW_SP_FID_TYPE_8021Q;
5937 else if (netif_is_bridge_master(dev))
5938 type = MLXSW_SP_FID_TYPE_8021D;
5939 else
5940 type = MLXSW_SP_FID_TYPE_RFID;
5942 return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
5945 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
5947 int i;
5949 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
5950 if (!mlxsw_sp->router->rifs[i]) {
5951 *p_rif_index = i;
5952 return 0;
5956 return -ENOBUFS;
5959 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
5960 u16 vr_id,
5961 struct net_device *l3_dev)
5963 struct mlxsw_sp_rif *rif;
5965 rif = kzalloc(rif_size, GFP_KERNEL);
5966 if (!rif)
5967 return NULL;
5969 INIT_LIST_HEAD(&rif->nexthop_list);
5970 INIT_LIST_HEAD(&rif->neigh_list);
5971 ether_addr_copy(rif->addr, l3_dev->dev_addr);
5972 rif->mtu = l3_dev->mtu;
5973 rif->vr_id = vr_id;
5974 rif->dev = l3_dev;
5975 rif->rif_index = rif_index;
5977 return rif;
5980 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
5981 u16 rif_index)
5983 return mlxsw_sp->router->rifs[rif_index];
5986 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
5988 return rif->rif_index;
5991 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
5993 return lb_rif->common.rif_index;
5996 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
5998 return lb_rif->ul_vr_id;
6001 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
6003 return rif->dev->ifindex;
6006 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
6008 return rif->dev;
6011 static struct mlxsw_sp_rif *
6012 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6013 const struct mlxsw_sp_rif_params *params,
6014 struct netlink_ext_ack *extack)
6016 u32 tb_id = l3mdev_fib_table(params->dev);
6017 const struct mlxsw_sp_rif_ops *ops;
6018 struct mlxsw_sp_fid *fid = NULL;
6019 enum mlxsw_sp_rif_type type;
6020 struct mlxsw_sp_rif *rif;
6021 struct mlxsw_sp_vr *vr;
6022 u16 rif_index;
6023 int err;
6025 type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
6026 ops = mlxsw_sp->router->rif_ops_arr[type];
6028 vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
6029 if (IS_ERR(vr))
6030 return ERR_CAST(vr);
6031 vr->rif_count++;
6033 err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6034 if (err) {
6035 NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported router interfaces");
6036 goto err_rif_index_alloc;
6039 rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6040 if (!rif) {
6041 err = -ENOMEM;
6042 goto err_rif_alloc;
6044 rif->mlxsw_sp = mlxsw_sp;
6045 rif->ops = ops;
6047 if (ops->fid_get) {
6048 fid = ops->fid_get(rif);
6049 if (IS_ERR(fid)) {
6050 err = PTR_ERR(fid);
6051 goto err_fid_get;
6053 rif->fid = fid;
6056 if (ops->setup)
6057 ops->setup(rif, params);
6059 err = ops->configure(rif);
6060 if (err)
6061 goto err_configure;
6063 err = mlxsw_sp_mr_rif_add(vr->mr4_table, rif);
6064 if (err)
6065 goto err_mr_rif_add;
6067 mlxsw_sp_rif_counters_alloc(rif);
6068 mlxsw_sp->router->rifs[rif_index] = rif;
6070 return rif;
6072 err_mr_rif_add:
6073 ops->deconfigure(rif);
6074 err_configure:
6075 if (fid)
6076 mlxsw_sp_fid_put(fid);
6077 err_fid_get:
6078 kfree(rif);
6079 err_rif_alloc:
6080 err_rif_index_alloc:
6081 vr->rif_count--;
6082 mlxsw_sp_vr_put(mlxsw_sp, vr);
6083 return ERR_PTR(err);
6086 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6088 const struct mlxsw_sp_rif_ops *ops = rif->ops;
6089 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6090 struct mlxsw_sp_fid *fid = rif->fid;
6091 struct mlxsw_sp_vr *vr;
6093 mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6094 vr = &mlxsw_sp->router->vrs[rif->vr_id];
6096 mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6097 mlxsw_sp_rif_counters_free(rif);
6098 mlxsw_sp_mr_rif_del(vr->mr4_table, rif);
6099 ops->deconfigure(rif);
6100 if (fid)
6101 /* Loopback RIFs are not associated with a FID. */
6102 mlxsw_sp_fid_put(fid);
6103 kfree(rif);
6104 vr->rif_count--;
6105 mlxsw_sp_vr_put(mlxsw_sp, vr);
6108 static void
6109 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6110 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6112 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6114 params->vid = mlxsw_sp_port_vlan->vid;
6115 params->lag = mlxsw_sp_port->lagged;
6116 if (params->lag)
6117 params->lag_id = mlxsw_sp_port->lag_id;
6118 else
6119 params->system_port = mlxsw_sp_port->local_port;
6122 static int
6123 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6124 struct net_device *l3_dev,
6125 struct netlink_ext_ack *extack)
6127 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6128 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6129 u16 vid = mlxsw_sp_port_vlan->vid;
6130 struct mlxsw_sp_rif *rif;
6131 struct mlxsw_sp_fid *fid;
6132 int err;
6134 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6135 if (!rif) {
6136 struct mlxsw_sp_rif_params params = {
6137 .dev = l3_dev,
6140 mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
6141 rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6142 if (IS_ERR(rif))
6143 return PTR_ERR(rif);
6146 /* FID was already created, just take a reference */
6147 fid = rif->ops->fid_get(rif);
6148 err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6149 if (err)
6150 goto err_fid_port_vid_map;
6152 err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6153 if (err)
6154 goto err_port_vid_learning_set;
6156 err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6157 BR_STATE_FORWARDING);
6158 if (err)
6159 goto err_port_vid_stp_set;
6161 mlxsw_sp_port_vlan->fid = fid;
6163 return 0;
6165 err_port_vid_stp_set:
6166 mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6167 err_port_vid_learning_set:
6168 mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6169 err_fid_port_vid_map:
6170 mlxsw_sp_fid_put(fid);
6171 return err;
6174 void
6175 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6177 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6178 struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6179 u16 vid = mlxsw_sp_port_vlan->vid;
6181 if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6182 return;
6184 mlxsw_sp_port_vlan->fid = NULL;
6185 mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6186 mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6187 mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6188 /* If router port holds the last reference on the rFID, then the
6189 * associated Sub-port RIF will be destroyed.
6191 mlxsw_sp_fid_put(fid);
6194 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6195 struct net_device *port_dev,
6196 unsigned long event, u16 vid,
6197 struct netlink_ext_ack *extack)
6199 struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6200 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6202 mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6203 if (WARN_ON(!mlxsw_sp_port_vlan))
6204 return -EINVAL;
6206 switch (event) {
6207 case NETDEV_UP:
6208 return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6209 l3_dev, extack);
6210 case NETDEV_DOWN:
6211 mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6212 break;
6215 return 0;
6218 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6219 unsigned long event,
6220 struct netlink_ext_ack *extack)
6222 if (netif_is_bridge_port(port_dev) ||
6223 netif_is_lag_port(port_dev) ||
6224 netif_is_ovs_port(port_dev))
6225 return 0;
6227 return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1,
6228 extack);
6231 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6232 struct net_device *lag_dev,
6233 unsigned long event, u16 vid,
6234 struct netlink_ext_ack *extack)
6236 struct net_device *port_dev;
6237 struct list_head *iter;
6238 int err;
6240 netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6241 if (mlxsw_sp_port_dev_check(port_dev)) {
6242 err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6243 port_dev,
6244 event, vid,
6245 extack);
6246 if (err)
6247 return err;
6251 return 0;
6254 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6255 unsigned long event,
6256 struct netlink_ext_ack *extack)
6258 if (netif_is_bridge_port(lag_dev))
6259 return 0;
6261 return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1,
6262 extack);
6265 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
6266 unsigned long event,
6267 struct netlink_ext_ack *extack)
6269 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6270 struct mlxsw_sp_rif_params params = {
6271 .dev = l3_dev,
6273 struct mlxsw_sp_rif *rif;
6275 switch (event) {
6276 case NETDEV_UP:
6277 rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6278 if (IS_ERR(rif))
6279 return PTR_ERR(rif);
6280 break;
6281 case NETDEV_DOWN:
6282 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6283 mlxsw_sp_rif_destroy(rif);
6284 break;
6287 return 0;
6290 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
6291 unsigned long event,
6292 struct netlink_ext_ack *extack)
6294 struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6295 u16 vid = vlan_dev_vlan_id(vlan_dev);
6297 if (netif_is_bridge_port(vlan_dev))
6298 return 0;
6300 if (mlxsw_sp_port_dev_check(real_dev))
6301 return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6302 event, vid, extack);
6303 else if (netif_is_lag_master(real_dev))
6304 return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6305 vid, extack);
6306 else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6307 return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event, extack);
6309 return 0;
6312 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
6313 unsigned long event,
6314 struct netlink_ext_ack *extack)
6316 if (mlxsw_sp_port_dev_check(dev))
6317 return mlxsw_sp_inetaddr_port_event(dev, event, extack);
6318 else if (netif_is_lag_master(dev))
6319 return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
6320 else if (netif_is_bridge_master(dev))
6321 return mlxsw_sp_inetaddr_bridge_event(dev, event, extack);
6322 else if (is_vlan_dev(dev))
6323 return mlxsw_sp_inetaddr_vlan_event(dev, event, extack);
6324 else
6325 return 0;
6328 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
6329 unsigned long event, void *ptr)
6331 struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
6332 struct net_device *dev = ifa->ifa_dev->dev;
6333 struct mlxsw_sp *mlxsw_sp;
6334 struct mlxsw_sp_rif *rif;
6335 int err = 0;
6337 /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
6338 if (event == NETDEV_UP)
6339 goto out;
6341 mlxsw_sp = mlxsw_sp_lower_get(dev);
6342 if (!mlxsw_sp)
6343 goto out;
6345 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6346 if (!mlxsw_sp_rif_should_config(rif, dev, event))
6347 goto out;
6349 err = __mlxsw_sp_inetaddr_event(dev, event, NULL);
6350 out:
6351 return notifier_from_errno(err);
6354 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
6355 unsigned long event, void *ptr)
6357 struct in_validator_info *ivi = (struct in_validator_info *) ptr;
6358 struct net_device *dev = ivi->ivi_dev->dev;
6359 struct mlxsw_sp *mlxsw_sp;
6360 struct mlxsw_sp_rif *rif;
6361 int err = 0;
6363 mlxsw_sp = mlxsw_sp_lower_get(dev);
6364 if (!mlxsw_sp)
6365 goto out;
6367 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6368 if (!mlxsw_sp_rif_should_config(rif, dev, event))
6369 goto out;
6371 err = __mlxsw_sp_inetaddr_event(dev, event, ivi->extack);
6372 out:
6373 return notifier_from_errno(err);
6376 struct mlxsw_sp_inet6addr_event_work {
6377 struct work_struct work;
6378 struct net_device *dev;
6379 unsigned long event;
6382 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
6384 struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
6385 container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
6386 struct net_device *dev = inet6addr_work->dev;
6387 unsigned long event = inet6addr_work->event;
6388 struct mlxsw_sp *mlxsw_sp;
6389 struct mlxsw_sp_rif *rif;
6391 rtnl_lock();
6392 mlxsw_sp = mlxsw_sp_lower_get(dev);
6393 if (!mlxsw_sp)
6394 goto out;
6396 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6397 if (!mlxsw_sp_rif_should_config(rif, dev, event))
6398 goto out;
6400 __mlxsw_sp_inetaddr_event(dev, event, NULL);
6401 out:
6402 rtnl_unlock();
6403 dev_put(dev);
6404 kfree(inet6addr_work);
6407 /* Called with rcu_read_lock() */
6408 int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
6409 unsigned long event, void *ptr)
6411 struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
6412 struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
6413 struct net_device *dev = if6->idev->dev;
6415 /* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
6416 if (event == NETDEV_UP)
6417 return NOTIFY_DONE;
6419 if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
6420 return NOTIFY_DONE;
6422 inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
6423 if (!inet6addr_work)
6424 return NOTIFY_BAD;
6426 INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
6427 inet6addr_work->dev = dev;
6428 inet6addr_work->event = event;
6429 dev_hold(dev);
6430 mlxsw_core_schedule_work(&inet6addr_work->work);
6432 return NOTIFY_DONE;
6435 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
6436 unsigned long event, void *ptr)
6438 struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
6439 struct net_device *dev = i6vi->i6vi_dev->dev;
6440 struct mlxsw_sp *mlxsw_sp;
6441 struct mlxsw_sp_rif *rif;
6442 int err = 0;
6444 mlxsw_sp = mlxsw_sp_lower_get(dev);
6445 if (!mlxsw_sp)
6446 goto out;
6448 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6449 if (!mlxsw_sp_rif_should_config(rif, dev, event))
6450 goto out;
6452 err = __mlxsw_sp_inetaddr_event(dev, event, i6vi->extack);
6453 out:
6454 return notifier_from_errno(err);
6457 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6458 const char *mac, int mtu)
6460 char ritr_pl[MLXSW_REG_RITR_LEN];
6461 int err;
6463 mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6464 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6465 if (err)
6466 return err;
6468 mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
6469 mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
6470 mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
6471 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6474 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
6476 struct mlxsw_sp *mlxsw_sp;
6477 struct mlxsw_sp_rif *rif;
6478 u16 fid_index;
6479 int err;
6481 mlxsw_sp = mlxsw_sp_lower_get(dev);
6482 if (!mlxsw_sp)
6483 return 0;
6485 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6486 if (!rif)
6487 return 0;
6488 fid_index = mlxsw_sp_fid_index(rif->fid);
6490 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
6491 if (err)
6492 return err;
6494 err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
6495 dev->mtu);
6496 if (err)
6497 goto err_rif_edit;
6499 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
6500 if (err)
6501 goto err_rif_fdb_op;
6503 if (rif->mtu != dev->mtu) {
6504 struct mlxsw_sp_vr *vr;
6506 /* The RIF is relevant only to its mr_table instance, as unlike
6507 * unicast routing, in multicast routing a RIF cannot be shared
6508 * between several multicast routing tables.
6510 vr = &mlxsw_sp->router->vrs[rif->vr_id];
6511 mlxsw_sp_mr_rif_mtu_update(vr->mr4_table, rif, dev->mtu);
6514 ether_addr_copy(rif->addr, dev->dev_addr);
6515 rif->mtu = dev->mtu;
6517 netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
6519 return 0;
6521 err_rif_fdb_op:
6522 mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
6523 err_rif_edit:
6524 mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
6525 return err;
6528 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
6529 struct net_device *l3_dev,
6530 struct netlink_ext_ack *extack)
6532 struct mlxsw_sp_rif *rif;
6534 /* If netdev is already associated with a RIF, then we need to
6535 * destroy it and create a new one with the new virtual router ID.
6537 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6538 if (rif)
6539 __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, extack);
6541 return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP, extack);
6544 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
6545 struct net_device *l3_dev)
6547 struct mlxsw_sp_rif *rif;
6549 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6550 if (!rif)
6551 return;
6552 __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, NULL);
6555 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
6556 struct netdev_notifier_changeupper_info *info)
6558 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6559 int err = 0;
6561 if (!mlxsw_sp)
6562 return 0;
6564 switch (event) {
6565 case NETDEV_PRECHANGEUPPER:
6566 return 0;
6567 case NETDEV_CHANGEUPPER:
6568 if (info->linking) {
6569 struct netlink_ext_ack *extack;
6571 extack = netdev_notifier_info_to_extack(&info->info);
6572 err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
6573 } else {
6574 mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
6576 break;
6579 return err;
6582 static struct mlxsw_sp_rif_subport *
6583 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
6585 return container_of(rif, struct mlxsw_sp_rif_subport, common);
6588 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
6589 const struct mlxsw_sp_rif_params *params)
6591 struct mlxsw_sp_rif_subport *rif_subport;
6593 rif_subport = mlxsw_sp_rif_subport_rif(rif);
6594 rif_subport->vid = params->vid;
6595 rif_subport->lag = params->lag;
6596 if (params->lag)
6597 rif_subport->lag_id = params->lag_id;
6598 else
6599 rif_subport->system_port = params->system_port;
6602 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
6604 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6605 struct mlxsw_sp_rif_subport *rif_subport;
6606 char ritr_pl[MLXSW_REG_RITR_LEN];
6608 rif_subport = mlxsw_sp_rif_subport_rif(rif);
6609 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
6610 rif->rif_index, rif->vr_id, rif->dev->mtu);
6611 mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6612 mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
6613 rif_subport->lag ? rif_subport->lag_id :
6614 rif_subport->system_port,
6615 rif_subport->vid);
6617 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6620 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
6622 int err;
6624 err = mlxsw_sp_rif_subport_op(rif, true);
6625 if (err)
6626 return err;
6628 err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6629 mlxsw_sp_fid_index(rif->fid), true);
6630 if (err)
6631 goto err_rif_fdb_op;
6633 mlxsw_sp_fid_rif_set(rif->fid, rif);
6634 return 0;
6636 err_rif_fdb_op:
6637 mlxsw_sp_rif_subport_op(rif, false);
6638 return err;
6641 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
6643 struct mlxsw_sp_fid *fid = rif->fid;
6645 mlxsw_sp_fid_rif_set(fid, NULL);
6646 mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6647 mlxsw_sp_fid_index(fid), false);
6648 mlxsw_sp_rif_subport_op(rif, false);
6651 static struct mlxsw_sp_fid *
6652 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif)
6654 return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
6657 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
6658 .type = MLXSW_SP_RIF_TYPE_SUBPORT,
6659 .rif_size = sizeof(struct mlxsw_sp_rif_subport),
6660 .setup = mlxsw_sp_rif_subport_setup,
6661 .configure = mlxsw_sp_rif_subport_configure,
6662 .deconfigure = mlxsw_sp_rif_subport_deconfigure,
6663 .fid_get = mlxsw_sp_rif_subport_fid_get,
6666 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
6667 enum mlxsw_reg_ritr_if_type type,
6668 u16 vid_fid, bool enable)
6670 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6671 char ritr_pl[MLXSW_REG_RITR_LEN];
6673 mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
6674 rif->dev->mtu);
6675 mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6676 mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
6678 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6681 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
6683 return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
6686 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
6688 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6689 u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6690 int err;
6692 err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
6693 if (err)
6694 return err;
6696 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6697 mlxsw_sp_router_port(mlxsw_sp), true);
6698 if (err)
6699 goto err_fid_mc_flood_set;
6701 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6702 mlxsw_sp_router_port(mlxsw_sp), true);
6703 if (err)
6704 goto err_fid_bc_flood_set;
6706 err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6707 mlxsw_sp_fid_index(rif->fid), true);
6708 if (err)
6709 goto err_rif_fdb_op;
6711 mlxsw_sp_fid_rif_set(rif->fid, rif);
6712 return 0;
6714 err_rif_fdb_op:
6715 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6716 mlxsw_sp_router_port(mlxsw_sp), false);
6717 err_fid_bc_flood_set:
6718 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6719 mlxsw_sp_router_port(mlxsw_sp), false);
6720 err_fid_mc_flood_set:
6721 mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
6722 return err;
6725 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
6727 u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6728 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6729 struct mlxsw_sp_fid *fid = rif->fid;
6731 mlxsw_sp_fid_rif_set(fid, NULL);
6732 mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6733 mlxsw_sp_fid_index(fid), false);
6734 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6735 mlxsw_sp_router_port(mlxsw_sp), false);
6736 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6737 mlxsw_sp_router_port(mlxsw_sp), false);
6738 mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
6741 static struct mlxsw_sp_fid *
6742 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif)
6744 u16 vid = is_vlan_dev(rif->dev) ? vlan_dev_vlan_id(rif->dev) : 1;
6746 return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
6749 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
6750 .type = MLXSW_SP_RIF_TYPE_VLAN,
6751 .rif_size = sizeof(struct mlxsw_sp_rif),
6752 .configure = mlxsw_sp_rif_vlan_configure,
6753 .deconfigure = mlxsw_sp_rif_vlan_deconfigure,
6754 .fid_get = mlxsw_sp_rif_vlan_fid_get,
6757 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
6759 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6760 u16 fid_index = mlxsw_sp_fid_index(rif->fid);
6761 int err;
6763 err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
6764 true);
6765 if (err)
6766 return err;
6768 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6769 mlxsw_sp_router_port(mlxsw_sp), true);
6770 if (err)
6771 goto err_fid_mc_flood_set;
6773 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6774 mlxsw_sp_router_port(mlxsw_sp), true);
6775 if (err)
6776 goto err_fid_bc_flood_set;
6778 err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6779 mlxsw_sp_fid_index(rif->fid), true);
6780 if (err)
6781 goto err_rif_fdb_op;
6783 mlxsw_sp_fid_rif_set(rif->fid, rif);
6784 return 0;
6786 err_rif_fdb_op:
6787 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6788 mlxsw_sp_router_port(mlxsw_sp), false);
6789 err_fid_bc_flood_set:
6790 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6791 mlxsw_sp_router_port(mlxsw_sp), false);
6792 err_fid_mc_flood_set:
6793 mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
6794 return err;
6797 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
6799 u16 fid_index = mlxsw_sp_fid_index(rif->fid);
6800 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6801 struct mlxsw_sp_fid *fid = rif->fid;
6803 mlxsw_sp_fid_rif_set(fid, NULL);
6804 mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6805 mlxsw_sp_fid_index(fid), false);
6806 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
6807 mlxsw_sp_router_port(mlxsw_sp), false);
6808 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
6809 mlxsw_sp_router_port(mlxsw_sp), false);
6810 mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
6813 static struct mlxsw_sp_fid *
6814 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif)
6816 return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
6819 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
6820 .type = MLXSW_SP_RIF_TYPE_FID,
6821 .rif_size = sizeof(struct mlxsw_sp_rif),
6822 .configure = mlxsw_sp_rif_fid_configure,
6823 .deconfigure = mlxsw_sp_rif_fid_deconfigure,
6824 .fid_get = mlxsw_sp_rif_fid_fid_get,
6827 static struct mlxsw_sp_rif_ipip_lb *
6828 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
6830 return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
6833 static void
6834 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
6835 const struct mlxsw_sp_rif_params *params)
6837 struct mlxsw_sp_rif_params_ipip_lb *params_lb;
6838 struct mlxsw_sp_rif_ipip_lb *rif_lb;
6840 params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
6841 common);
6842 rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
6843 rif_lb->lb_config = params_lb->lb_config;
6846 static int
6847 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
6848 struct mlxsw_sp_vr *ul_vr, bool enable)
6850 struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
6851 struct mlxsw_sp_rif *rif = &lb_rif->common;
6852 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6853 char ritr_pl[MLXSW_REG_RITR_LEN];
6854 u32 saddr4;
6856 switch (lb_cf.ul_protocol) {
6857 case MLXSW_SP_L3_PROTO_IPV4:
6858 saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
6859 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
6860 rif->rif_index, rif->vr_id, rif->dev->mtu);
6861 mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
6862 MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
6863 ul_vr->id, saddr4, lb_cf.okey);
6864 break;
6866 case MLXSW_SP_L3_PROTO_IPV6:
6867 return -EAFNOSUPPORT;
6870 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6873 static int
6874 mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
6876 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
6877 u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
6878 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6879 struct mlxsw_sp_vr *ul_vr;
6880 int err;
6882 ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
6883 if (IS_ERR(ul_vr))
6884 return PTR_ERR(ul_vr);
6886 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
6887 if (err)
6888 goto err_loopback_op;
6890 lb_rif->ul_vr_id = ul_vr->id;
6891 ++ul_vr->rif_count;
6892 return 0;
6894 err_loopback_op:
6895 mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
6896 return err;
6899 static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
6901 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
6902 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6903 struct mlxsw_sp_vr *ul_vr;
6905 ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
6906 mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
6908 --ul_vr->rif_count;
6909 mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
6912 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {
6913 .type = MLXSW_SP_RIF_TYPE_IPIP_LB,
6914 .rif_size = sizeof(struct mlxsw_sp_rif_ipip_lb),
6915 .setup = mlxsw_sp_rif_ipip_lb_setup,
6916 .configure = mlxsw_sp_rif_ipip_lb_configure,
6917 .deconfigure = mlxsw_sp_rif_ipip_lb_deconfigure,
6920 static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
6921 [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops,
6922 [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_ops,
6923 [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops,
6924 [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp_rif_ipip_lb_ops,
6927 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
6929 u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
6931 mlxsw_sp->router->rifs = kcalloc(max_rifs,
6932 sizeof(struct mlxsw_sp_rif *),
6933 GFP_KERNEL);
6934 if (!mlxsw_sp->router->rifs)
6935 return -ENOMEM;
6937 mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
6939 return 0;
6942 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
6944 int i;
6946 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
6947 WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
6949 kfree(mlxsw_sp->router->rifs);
6952 static int
6953 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
6955 char tigcr_pl[MLXSW_REG_TIGCR_LEN];
6957 mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
6958 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
6961 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
6963 mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
6964 INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
6965 return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
6968 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
6970 WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
6973 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
6975 struct mlxsw_sp_router *router;
6977 /* Flush pending FIB notifications and then flush the device's
6978 * table before requesting another dump. The FIB notification
6979 * block is unregistered, so no need to take RTNL.
6981 mlxsw_core_flush_owq();
6982 router = container_of(nb, struct mlxsw_sp_router, fib_nb);
6983 mlxsw_sp_router_fib_flush(router->mlxsw_sp);
6986 #ifdef CONFIG_IP_ROUTE_MULTIPATH
6987 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
6989 mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
6992 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
6994 mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
6997 static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
6999 bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
7001 mlxsw_sp_mp_hash_header_set(recr2_pl,
7002 MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
7003 mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
7004 mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
7005 mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
7006 if (only_l3)
7007 return;
7008 mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
7009 mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
7010 mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
7011 mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
7014 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
7016 mlxsw_sp_mp_hash_header_set(recr2_pl,
7017 MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
7018 mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
7019 mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
7020 mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
7021 mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
7022 mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
7025 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7027 char recr2_pl[MLXSW_REG_RECR2_LEN];
7028 u32 seed;
7030 get_random_bytes(&seed, sizeof(seed));
7031 mlxsw_reg_recr2_pack(recr2_pl, seed);
7032 mlxsw_sp_mp4_hash_init(recr2_pl);
7033 mlxsw_sp_mp6_hash_init(recr2_pl);
7035 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
7037 #else
7038 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7040 return 0;
7042 #endif
7044 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
7046 char rdpm_pl[MLXSW_REG_RDPM_LEN];
7047 unsigned int i;
7049 MLXSW_REG_ZERO(rdpm, rdpm_pl);
7051 /* HW is determining switch priority based on DSCP-bits, but the
7052 * kernel is still doing that based on the ToS. Since there's a
7053 * mismatch in bits we need to make sure to translate the right
7054 * value ToS would observe, skipping the 2 least-significant ECN bits.
7056 for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
7057 mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
7059 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
7062 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7064 char rgcr_pl[MLXSW_REG_RGCR_LEN];
7065 u64 max_rifs;
7066 int err;
7068 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
7069 return -EIO;
7070 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7072 mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
7073 mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
7074 mlxsw_reg_rgcr_usp_set(rgcr_pl, true);
7075 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7076 if (err)
7077 return err;
7078 return 0;
7081 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7083 char rgcr_pl[MLXSW_REG_RGCR_LEN];
7085 mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
7086 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7089 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7091 struct mlxsw_sp_router *router;
7092 int err;
7094 router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
7095 if (!router)
7096 return -ENOMEM;
7097 mlxsw_sp->router = router;
7098 router->mlxsw_sp = mlxsw_sp;
7100 INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
7101 err = __mlxsw_sp_router_init(mlxsw_sp);
7102 if (err)
7103 goto err_router_init;
7105 err = mlxsw_sp_rifs_init(mlxsw_sp);
7106 if (err)
7107 goto err_rifs_init;
7109 err = mlxsw_sp_ipips_init(mlxsw_sp);
7110 if (err)
7111 goto err_ipips_init;
7113 err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
7114 &mlxsw_sp_nexthop_ht_params);
7115 if (err)
7116 goto err_nexthop_ht_init;
7118 err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
7119 &mlxsw_sp_nexthop_group_ht_params);
7120 if (err)
7121 goto err_nexthop_group_ht_init;
7123 INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
7124 err = mlxsw_sp_lpm_init(mlxsw_sp);
7125 if (err)
7126 goto err_lpm_init;
7128 err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
7129 if (err)
7130 goto err_mr_init;
7132 err = mlxsw_sp_vrs_init(mlxsw_sp);
7133 if (err)
7134 goto err_vrs_init;
7136 err = mlxsw_sp_neigh_init(mlxsw_sp);
7137 if (err)
7138 goto err_neigh_init;
7140 mlxsw_sp->router->netevent_nb.notifier_call =
7141 mlxsw_sp_router_netevent_event;
7142 err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7143 if (err)
7144 goto err_register_netevent_notifier;
7146 err = mlxsw_sp_mp_hash_init(mlxsw_sp);
7147 if (err)
7148 goto err_mp_hash_init;
7150 err = mlxsw_sp_dscp_init(mlxsw_sp);
7151 if (err)
7152 goto err_dscp_init;
7154 mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
7155 err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
7156 mlxsw_sp_router_fib_dump_flush);
7157 if (err)
7158 goto err_register_fib_notifier;
7160 return 0;
7162 err_register_fib_notifier:
7163 err_dscp_init:
7164 err_mp_hash_init:
7165 unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7166 err_register_netevent_notifier:
7167 mlxsw_sp_neigh_fini(mlxsw_sp);
7168 err_neigh_init:
7169 mlxsw_sp_vrs_fini(mlxsw_sp);
7170 err_vrs_init:
7171 mlxsw_sp_mr_fini(mlxsw_sp);
7172 err_mr_init:
7173 mlxsw_sp_lpm_fini(mlxsw_sp);
7174 err_lpm_init:
7175 rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7176 err_nexthop_group_ht_init:
7177 rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7178 err_nexthop_ht_init:
7179 mlxsw_sp_ipips_fini(mlxsw_sp);
7180 err_ipips_init:
7181 mlxsw_sp_rifs_fini(mlxsw_sp);
7182 err_rifs_init:
7183 __mlxsw_sp_router_fini(mlxsw_sp);
7184 err_router_init:
7185 kfree(mlxsw_sp->router);
7186 return err;
7189 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7191 unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
7192 unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7193 mlxsw_sp_neigh_fini(mlxsw_sp);
7194 mlxsw_sp_vrs_fini(mlxsw_sp);
7195 mlxsw_sp_mr_fini(mlxsw_sp);
7196 mlxsw_sp_lpm_fini(mlxsw_sp);
7197 rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7198 rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7199 mlxsw_sp_ipips_fini(mlxsw_sp);
7200 mlxsw_sp_rifs_fini(mlxsw_sp);
7201 __mlxsw_sp_router_fini(mlxsw_sp);
7202 kfree(mlxsw_sp->router);