1 // SPDX-License-Identifier: GPL-2.0
2 /* Generic nexthop implementation
4 * Copyright (c) 2017-19 Cumulus Networks
5 * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com>
8 #include <linux/nexthop.h>
9 #include <linux/rtnetlink.h>
10 #include <linux/slab.h>
12 #include <net/ipv6_stubs.h>
13 #include <net/lwtunnel.h>
14 #include <net/ndisc.h>
15 #include <net/nexthop.h>
16 #include <net/route.h>
19 static void remove_nexthop(struct net
*net
, struct nexthop
*nh
,
20 struct nl_info
*nlinfo
);
22 #define NH_DEV_HASHBITS 8
23 #define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS)
25 static const struct nla_policy rtm_nh_policy
[NHA_MAX
+ 1] = {
26 [NHA_ID
] = { .type
= NLA_U32
},
27 [NHA_GROUP
] = { .type
= NLA_BINARY
},
28 [NHA_GROUP_TYPE
] = { .type
= NLA_U16
},
29 [NHA_BLACKHOLE
] = { .type
= NLA_FLAG
},
30 [NHA_OIF
] = { .type
= NLA_U32
},
31 [NHA_GATEWAY
] = { .type
= NLA_BINARY
},
32 [NHA_ENCAP_TYPE
] = { .type
= NLA_U16
},
33 [NHA_ENCAP
] = { .type
= NLA_NESTED
},
34 [NHA_GROUPS
] = { .type
= NLA_FLAG
},
35 [NHA_MASTER
] = { .type
= NLA_U32
},
36 [NHA_FDB
] = { .type
= NLA_FLAG
},
39 static bool nexthop_notifiers_is_empty(struct net
*net
)
41 return !net
->nexthop
.notifier_chain
.head
;
45 __nh_notifier_single_info_init(struct nh_notifier_single_info
*nh_info
,
46 const struct nexthop
*nh
)
48 struct nh_info
*nhi
= rtnl_dereference(nh
->nh_info
);
50 nh_info
->dev
= nhi
->fib_nhc
.nhc_dev
;
51 nh_info
->gw_family
= nhi
->fib_nhc
.nhc_gw_family
;
52 if (nh_info
->gw_family
== AF_INET
)
53 nh_info
->ipv4
= nhi
->fib_nhc
.nhc_gw
.ipv4
;
54 else if (nh_info
->gw_family
== AF_INET6
)
55 nh_info
->ipv6
= nhi
->fib_nhc
.nhc_gw
.ipv6
;
57 nh_info
->is_reject
= nhi
->reject_nh
;
58 nh_info
->is_fdb
= nhi
->fdb_nh
;
59 nh_info
->has_encap
= !!nhi
->fib_nhc
.nhc_lwtstate
;
62 static int nh_notifier_single_info_init(struct nh_notifier_info
*info
,
63 const struct nexthop
*nh
)
65 info
->nh
= kzalloc(sizeof(*info
->nh
), GFP_KERNEL
);
69 __nh_notifier_single_info_init(info
->nh
, nh
);
74 static void nh_notifier_single_info_fini(struct nh_notifier_info
*info
)
79 static int nh_notifier_grp_info_init(struct nh_notifier_info
*info
,
80 const struct nexthop
*nh
)
82 struct nh_group
*nhg
= rtnl_dereference(nh
->nh_grp
);
83 u16 num_nh
= nhg
->num_nh
;
86 info
->nh_grp
= kzalloc(struct_size(info
->nh_grp
, nh_entries
, num_nh
),
91 info
->nh_grp
->num_nh
= num_nh
;
92 info
->nh_grp
->is_fdb
= nhg
->fdb_nh
;
94 for (i
= 0; i
< num_nh
; i
++) {
95 struct nh_grp_entry
*nhge
= &nhg
->nh_entries
[i
];
97 info
->nh_grp
->nh_entries
[i
].id
= nhge
->nh
->id
;
98 info
->nh_grp
->nh_entries
[i
].weight
= nhge
->weight
;
99 __nh_notifier_single_info_init(&info
->nh_grp
->nh_entries
[i
].nh
,
106 static void nh_notifier_grp_info_fini(struct nh_notifier_info
*info
)
111 static int nh_notifier_info_init(struct nh_notifier_info
*info
,
112 const struct nexthop
*nh
)
115 info
->is_grp
= nh
->is_group
;
118 return nh_notifier_grp_info_init(info
, nh
);
120 return nh_notifier_single_info_init(info
, nh
);
123 static void nh_notifier_info_fini(struct nh_notifier_info
*info
)
126 nh_notifier_grp_info_fini(info
);
128 nh_notifier_single_info_fini(info
);
131 static int call_nexthop_notifiers(struct net
*net
,
132 enum nexthop_event_type event_type
,
134 struct netlink_ext_ack
*extack
)
136 struct nh_notifier_info info
= {
144 if (nexthop_notifiers_is_empty(net
))
147 err
= nh_notifier_info_init(&info
, nh
);
149 NL_SET_ERR_MSG(extack
, "Failed to initialize nexthop notifier info");
153 err
= blocking_notifier_call_chain(&net
->nexthop
.notifier_chain
,
155 nh_notifier_info_fini(&info
);
157 return notifier_to_errno(err
);
160 static int call_nexthop_notifier(struct notifier_block
*nb
, struct net
*net
,
161 enum nexthop_event_type event_type
,
163 struct netlink_ext_ack
*extack
)
165 struct nh_notifier_info info
= {
171 err
= nh_notifier_info_init(&info
, nh
);
175 err
= nb
->notifier_call(nb
, event_type
, &info
);
176 nh_notifier_info_fini(&info
);
178 return notifier_to_errno(err
);
181 static unsigned int nh_dev_hashfn(unsigned int val
)
183 unsigned int mask
= NH_DEV_HASHSIZE
- 1;
186 (val
>> NH_DEV_HASHBITS
) ^
187 (val
>> (NH_DEV_HASHBITS
* 2))) & mask
;
190 static void nexthop_devhash_add(struct net
*net
, struct nh_info
*nhi
)
192 struct net_device
*dev
= nhi
->fib_nhc
.nhc_dev
;
193 struct hlist_head
*head
;
198 hash
= nh_dev_hashfn(dev
->ifindex
);
199 head
= &net
->nexthop
.devhash
[hash
];
200 hlist_add_head(&nhi
->dev_hash
, head
);
203 static void nexthop_free_mpath(struct nexthop
*nh
)
205 struct nh_group
*nhg
;
208 nhg
= rcu_dereference_raw(nh
->nh_grp
);
209 for (i
= 0; i
< nhg
->num_nh
; ++i
) {
210 struct nh_grp_entry
*nhge
= &nhg
->nh_entries
[i
];
212 WARN_ON(!list_empty(&nhge
->nh_list
));
213 nexthop_put(nhge
->nh
);
216 WARN_ON(nhg
->spare
== nhg
);
222 static void nexthop_free_single(struct nexthop
*nh
)
226 nhi
= rcu_dereference_raw(nh
->nh_info
);
227 switch (nhi
->family
) {
229 fib_nh_release(nh
->net
, &nhi
->fib_nh
);
232 ipv6_stub
->fib6_nh_release(&nhi
->fib6_nh
);
238 void nexthop_free_rcu(struct rcu_head
*head
)
240 struct nexthop
*nh
= container_of(head
, struct nexthop
, rcu
);
243 nexthop_free_mpath(nh
);
245 nexthop_free_single(nh
);
249 EXPORT_SYMBOL_GPL(nexthop_free_rcu
);
251 static struct nexthop
*nexthop_alloc(void)
255 nh
= kzalloc(sizeof(struct nexthop
), GFP_KERNEL
);
257 INIT_LIST_HEAD(&nh
->fi_list
);
258 INIT_LIST_HEAD(&nh
->f6i_list
);
259 INIT_LIST_HEAD(&nh
->grp_list
);
260 INIT_LIST_HEAD(&nh
->fdb_list
);
265 static struct nh_group
*nexthop_grp_alloc(u16 num_nh
)
267 struct nh_group
*nhg
;
269 nhg
= kzalloc(struct_size(nhg
, nh_entries
, num_nh
), GFP_KERNEL
);
271 nhg
->num_nh
= num_nh
;
276 static void nh_base_seq_inc(struct net
*net
)
278 while (++net
->nexthop
.seq
== 0)
282 /* no reference taken; rcu lock or rtnl must be held */
283 struct nexthop
*nexthop_find_by_id(struct net
*net
, u32 id
)
285 struct rb_node
**pp
, *parent
= NULL
, *next
;
287 pp
= &net
->nexthop
.rb_root
.rb_node
;
291 next
= rcu_dereference_raw(*pp
);
296 nh
= rb_entry(parent
, struct nexthop
, rb_node
);
299 else if (id
> nh
->id
)
300 pp
= &next
->rb_right
;
306 EXPORT_SYMBOL_GPL(nexthop_find_by_id
);
308 /* used for auto id allocation; called with rtnl held */
309 static u32
nh_find_unused_id(struct net
*net
)
311 u32 id_start
= net
->nexthop
.last_id_allocated
;
314 net
->nexthop
.last_id_allocated
++;
315 if (net
->nexthop
.last_id_allocated
== id_start
)
318 if (!nexthop_find_by_id(net
, net
->nexthop
.last_id_allocated
))
319 return net
->nexthop
.last_id_allocated
;
324 static int nla_put_nh_group(struct sk_buff
*skb
, struct nh_group
*nhg
)
326 struct nexthop_grp
*p
;
327 size_t len
= nhg
->num_nh
* sizeof(*p
);
333 group_type
= NEXTHOP_GRP_TYPE_MPATH
;
335 if (nla_put_u16(skb
, NHA_GROUP_TYPE
, group_type
))
336 goto nla_put_failure
;
338 nla
= nla_reserve(skb
, NHA_GROUP
, len
);
340 goto nla_put_failure
;
343 for (i
= 0; i
< nhg
->num_nh
; ++i
) {
344 p
->id
= nhg
->nh_entries
[i
].nh
->id
;
345 p
->weight
= nhg
->nh_entries
[i
].weight
- 1;
355 static int nh_fill_node(struct sk_buff
*skb
, struct nexthop
*nh
,
356 int event
, u32 portid
, u32 seq
, unsigned int nlflags
)
358 struct fib6_nh
*fib6_nh
;
359 struct fib_nh
*fib_nh
;
360 struct nlmsghdr
*nlh
;
364 nlh
= nlmsg_put(skb
, portid
, seq
, event
, sizeof(*nhm
), nlflags
);
368 nhm
= nlmsg_data(nlh
);
369 nhm
->nh_family
= AF_UNSPEC
;
370 nhm
->nh_flags
= nh
->nh_flags
;
371 nhm
->nh_protocol
= nh
->protocol
;
375 if (nla_put_u32(skb
, NHA_ID
, nh
->id
))
376 goto nla_put_failure
;
379 struct nh_group
*nhg
= rtnl_dereference(nh
->nh_grp
);
381 if (nhg
->fdb_nh
&& nla_put_flag(skb
, NHA_FDB
))
382 goto nla_put_failure
;
383 if (nla_put_nh_group(skb
, nhg
))
384 goto nla_put_failure
;
388 nhi
= rtnl_dereference(nh
->nh_info
);
389 nhm
->nh_family
= nhi
->family
;
390 if (nhi
->reject_nh
) {
391 if (nla_put_flag(skb
, NHA_BLACKHOLE
))
392 goto nla_put_failure
;
394 } else if (nhi
->fdb_nh
) {
395 if (nla_put_flag(skb
, NHA_FDB
))
396 goto nla_put_failure
;
398 const struct net_device
*dev
;
400 dev
= nhi
->fib_nhc
.nhc_dev
;
401 if (dev
&& nla_put_u32(skb
, NHA_OIF
, dev
->ifindex
))
402 goto nla_put_failure
;
405 nhm
->nh_scope
= nhi
->fib_nhc
.nhc_scope
;
406 switch (nhi
->family
) {
408 fib_nh
= &nhi
->fib_nh
;
409 if (fib_nh
->fib_nh_gw_family
&&
410 nla_put_be32(skb
, NHA_GATEWAY
, fib_nh
->fib_nh_gw4
))
411 goto nla_put_failure
;
415 fib6_nh
= &nhi
->fib6_nh
;
416 if (fib6_nh
->fib_nh_gw_family
&&
417 nla_put_in6_addr(skb
, NHA_GATEWAY
, &fib6_nh
->fib_nh_gw6
))
418 goto nla_put_failure
;
422 if (nhi
->fib_nhc
.nhc_lwtstate
&&
423 lwtunnel_fill_encap(skb
, nhi
->fib_nhc
.nhc_lwtstate
,
424 NHA_ENCAP
, NHA_ENCAP_TYPE
) < 0)
425 goto nla_put_failure
;
432 nlmsg_cancel(skb
, nlh
);
436 static size_t nh_nlmsg_size_grp(struct nexthop
*nh
)
438 struct nh_group
*nhg
= rtnl_dereference(nh
->nh_grp
);
439 size_t sz
= sizeof(struct nexthop_grp
) * nhg
->num_nh
;
441 return nla_total_size(sz
) +
442 nla_total_size(2); /* NHA_GROUP_TYPE */
445 static size_t nh_nlmsg_size_single(struct nexthop
*nh
)
447 struct nh_info
*nhi
= rtnl_dereference(nh
->nh_info
);
450 /* covers NHA_BLACKHOLE since NHA_OIF and BLACKHOLE
451 * are mutually exclusive
453 sz
= nla_total_size(4); /* NHA_OIF */
455 switch (nhi
->family
) {
457 if (nhi
->fib_nh
.fib_nh_gw_family
)
458 sz
+= nla_total_size(4); /* NHA_GATEWAY */
463 if (nhi
->fib6_nh
.fib_nh_gw_family
)
464 sz
+= nla_total_size(sizeof(const struct in6_addr
));
468 if (nhi
->fib_nhc
.nhc_lwtstate
) {
469 sz
+= lwtunnel_get_encap_size(nhi
->fib_nhc
.nhc_lwtstate
);
470 sz
+= nla_total_size(2); /* NHA_ENCAP_TYPE */
476 static size_t nh_nlmsg_size(struct nexthop
*nh
)
478 size_t sz
= NLMSG_ALIGN(sizeof(struct nhmsg
));
480 sz
+= nla_total_size(4); /* NHA_ID */
483 sz
+= nh_nlmsg_size_grp(nh
);
485 sz
+= nh_nlmsg_size_single(nh
);
490 static void nexthop_notify(int event
, struct nexthop
*nh
, struct nl_info
*info
)
492 unsigned int nlflags
= info
->nlh
? info
->nlh
->nlmsg_flags
: 0;
493 u32 seq
= info
->nlh
? info
->nlh
->nlmsg_seq
: 0;
497 skb
= nlmsg_new(nh_nlmsg_size(nh
), gfp_any());
501 err
= nh_fill_node(skb
, nh
, event
, info
->portid
, seq
, nlflags
);
503 /* -EMSGSIZE implies BUG in nh_nlmsg_size() */
504 WARN_ON(err
== -EMSGSIZE
);
509 rtnl_notify(skb
, info
->nl_net
, info
->portid
, RTNLGRP_NEXTHOP
,
510 info
->nlh
, gfp_any());
514 rtnl_set_sk_err(info
->nl_net
, RTNLGRP_NEXTHOP
, err
);
517 static bool valid_group_nh(struct nexthop
*nh
, unsigned int npaths
,
518 bool *is_fdb
, struct netlink_ext_ack
*extack
)
521 struct nh_group
*nhg
= rtnl_dereference(nh
->nh_grp
);
523 /* nested multipath (group within a group) is not
527 NL_SET_ERR_MSG(extack
,
528 "Multipath group can not be a nexthop within a group");
531 *is_fdb
= nhg
->fdb_nh
;
533 struct nh_info
*nhi
= rtnl_dereference(nh
->nh_info
);
535 if (nhi
->reject_nh
&& npaths
> 1) {
536 NL_SET_ERR_MSG(extack
,
537 "Blackhole nexthop can not be used in a group with more than 1 path");
540 *is_fdb
= nhi
->fdb_nh
;
546 static int nh_check_attr_fdb_group(struct nexthop
*nh
, u8
*nh_family
,
547 struct netlink_ext_ack
*extack
)
551 nhi
= rtnl_dereference(nh
->nh_info
);
554 NL_SET_ERR_MSG(extack
, "FDB nexthop group can only have fdb nexthops");
558 if (*nh_family
== AF_UNSPEC
) {
559 *nh_family
= nhi
->family
;
560 } else if (*nh_family
!= nhi
->family
) {
561 NL_SET_ERR_MSG(extack
, "FDB nexthop group cannot have mixed family nexthops");
568 static int nh_check_attr_group(struct net
*net
, struct nlattr
*tb
[],
569 struct netlink_ext_ack
*extack
)
571 unsigned int len
= nla_len(tb
[NHA_GROUP
]);
572 u8 nh_family
= AF_UNSPEC
;
573 struct nexthop_grp
*nhg
;
577 if (!len
|| len
& (sizeof(struct nexthop_grp
) - 1)) {
578 NL_SET_ERR_MSG(extack
,
579 "Invalid length for nexthop group attribute");
583 /* convert len to number of nexthop ids */
586 nhg
= nla_data(tb
[NHA_GROUP
]);
587 for (i
= 0; i
< len
; ++i
) {
588 if (nhg
[i
].resvd1
|| nhg
[i
].resvd2
) {
589 NL_SET_ERR_MSG(extack
, "Reserved fields in nexthop_grp must be 0");
592 if (nhg
[i
].weight
> 254) {
593 NL_SET_ERR_MSG(extack
, "Invalid value for weight");
596 for (j
= i
+ 1; j
< len
; ++j
) {
597 if (nhg
[i
].id
== nhg
[j
].id
) {
598 NL_SET_ERR_MSG(extack
, "Nexthop id can not be used twice in a group");
606 nhg
= nla_data(tb
[NHA_GROUP
]);
607 for (i
= 0; i
< len
; ++i
) {
611 nh
= nexthop_find_by_id(net
, nhg
[i
].id
);
613 NL_SET_ERR_MSG(extack
, "Invalid nexthop id");
616 if (!valid_group_nh(nh
, len
, &is_fdb_nh
, extack
))
619 if (nhg_fdb
&& nh_check_attr_fdb_group(nh
, &nh_family
, extack
))
622 if (!nhg_fdb
&& is_fdb_nh
) {
623 NL_SET_ERR_MSG(extack
, "Non FDB nexthop group cannot have fdb nexthops");
627 for (i
= NHA_GROUP_TYPE
+ 1; i
< __NHA_MAX
; ++i
) {
632 NL_SET_ERR_MSG(extack
,
633 "No other attributes can be set in nexthop groups");
640 static bool ipv6_good_nh(const struct fib6_nh
*nh
)
642 int state
= NUD_REACHABLE
;
647 n
= __ipv6_neigh_lookup_noref_stub(nh
->fib_nh_dev
, &nh
->fib_nh_gw6
);
649 state
= n
->nud_state
;
651 rcu_read_unlock_bh();
653 return !!(state
& NUD_VALID
);
656 static bool ipv4_good_nh(const struct fib_nh
*nh
)
658 int state
= NUD_REACHABLE
;
663 n
= __ipv4_neigh_lookup_noref(nh
->fib_nh_dev
,
664 (__force u32
)nh
->fib_nh_gw4
);
666 state
= n
->nud_state
;
668 rcu_read_unlock_bh();
670 return !!(state
& NUD_VALID
);
673 struct nexthop
*nexthop_select_path(struct nexthop
*nh
, int hash
)
675 struct nexthop
*rc
= NULL
;
676 struct nh_group
*nhg
;
682 nhg
= rcu_dereference(nh
->nh_grp
);
683 for (i
= 0; i
< nhg
->num_nh
; ++i
) {
684 struct nh_grp_entry
*nhge
= &nhg
->nh_entries
[i
];
687 if (hash
> atomic_read(&nhge
->upper_bound
))
690 nhi
= rcu_dereference(nhge
->nh
->nh_info
);
694 /* nexthops always check if it is good and does
695 * not rely on a sysctl for this behavior
697 switch (nhi
->family
) {
699 if (ipv4_good_nh(&nhi
->fib_nh
))
703 if (ipv6_good_nh(&nhi
->fib6_nh
))
714 EXPORT_SYMBOL_GPL(nexthop_select_path
);
716 int nexthop_for_each_fib6_nh(struct nexthop
*nh
,
717 int (*cb
)(struct fib6_nh
*nh
, void *arg
),
724 struct nh_group
*nhg
;
727 nhg
= rcu_dereference_rtnl(nh
->nh_grp
);
728 for (i
= 0; i
< nhg
->num_nh
; i
++) {
729 struct nh_grp_entry
*nhge
= &nhg
->nh_entries
[i
];
731 nhi
= rcu_dereference_rtnl(nhge
->nh
->nh_info
);
732 err
= cb(&nhi
->fib6_nh
, arg
);
737 nhi
= rcu_dereference_rtnl(nh
->nh_info
);
738 err
= cb(&nhi
->fib6_nh
, arg
);
745 EXPORT_SYMBOL_GPL(nexthop_for_each_fib6_nh
);
747 static int check_src_addr(const struct in6_addr
*saddr
,
748 struct netlink_ext_ack
*extack
)
750 if (!ipv6_addr_any(saddr
)) {
751 NL_SET_ERR_MSG(extack
, "IPv6 routes using source address can not use nexthop objects");
757 int fib6_check_nexthop(struct nexthop
*nh
, struct fib6_config
*cfg
,
758 struct netlink_ext_ack
*extack
)
763 /* fib6_src is unique to a fib6_info and limits the ability to cache
764 * routes in fib6_nh within a nexthop that is potentially shared
765 * across multiple fib entries. If the config wants to use source
766 * routing it can not use nexthop objects. mlxsw also does not allow
767 * fib6_src on routes.
769 if (cfg
&& check_src_addr(&cfg
->fc_src
, extack
) < 0)
773 struct nh_group
*nhg
;
775 nhg
= rtnl_dereference(nh
->nh_grp
);
778 is_fdb_nh
= nhg
->fdb_nh
;
780 nhi
= rtnl_dereference(nh
->nh_info
);
781 if (nhi
->family
== AF_INET
)
783 is_fdb_nh
= nhi
->fdb_nh
;
787 NL_SET_ERR_MSG(extack
, "Route cannot point to a fdb nexthop");
793 NL_SET_ERR_MSG(extack
, "IPv6 routes can not use an IPv4 nexthop");
796 EXPORT_SYMBOL_GPL(fib6_check_nexthop
);
798 /* if existing nexthop has ipv6 routes linked to it, need
799 * to verify this new spec works with ipv6
801 static int fib6_check_nh_list(struct nexthop
*old
, struct nexthop
*new,
802 struct netlink_ext_ack
*extack
)
804 struct fib6_info
*f6i
;
806 if (list_empty(&old
->f6i_list
))
809 list_for_each_entry(f6i
, &old
->f6i_list
, nh_list
) {
810 if (check_src_addr(&f6i
->fib6_src
.addr
, extack
) < 0)
814 return fib6_check_nexthop(new, NULL
, extack
);
817 static int nexthop_check_scope(struct nh_info
*nhi
, u8 scope
,
818 struct netlink_ext_ack
*extack
)
820 if (scope
== RT_SCOPE_HOST
&& nhi
->fib_nhc
.nhc_gw_family
) {
821 NL_SET_ERR_MSG(extack
,
822 "Route with host scope can not have a gateway");
826 if (nhi
->fib_nhc
.nhc_flags
& RTNH_F_ONLINK
&& scope
>= RT_SCOPE_LINK
) {
827 NL_SET_ERR_MSG(extack
, "Scope mismatch with nexthop");
834 /* Invoked by fib add code to verify nexthop by id is ok with
835 * config for prefix; parts of fib_check_nh not done when nexthop
838 int fib_check_nexthop(struct nexthop
*nh
, u8 scope
,
839 struct netlink_ext_ack
*extack
)
845 struct nh_group
*nhg
;
847 nhg
= rtnl_dereference(nh
->nh_grp
);
849 NL_SET_ERR_MSG(extack
, "Route cannot point to a fdb nexthop");
854 if (scope
== RT_SCOPE_HOST
) {
855 NL_SET_ERR_MSG(extack
, "Route with host scope can not have multiple nexthops");
860 /* all nexthops in a group have the same scope */
861 nhi
= rtnl_dereference(nhg
->nh_entries
[0].nh
->nh_info
);
862 err
= nexthop_check_scope(nhi
, scope
, extack
);
864 nhi
= rtnl_dereference(nh
->nh_info
);
866 NL_SET_ERR_MSG(extack
, "Route cannot point to a fdb nexthop");
870 err
= nexthop_check_scope(nhi
, scope
, extack
);
877 static int fib_check_nh_list(struct nexthop
*old
, struct nexthop
*new,
878 struct netlink_ext_ack
*extack
)
882 list_for_each_entry(fi
, &old
->fi_list
, nh_list
) {
885 err
= fib_check_nexthop(new, fi
->fib_scope
, extack
);
892 static void nh_group_rebalance(struct nh_group
*nhg
)
898 for (i
= 0; i
< nhg
->num_nh
; ++i
)
899 total
+= nhg
->nh_entries
[i
].weight
;
901 for (i
= 0; i
< nhg
->num_nh
; ++i
) {
902 struct nh_grp_entry
*nhge
= &nhg
->nh_entries
[i
];
906 upper_bound
= DIV_ROUND_CLOSEST_ULL((u64
)w
<< 31, total
) - 1;
907 atomic_set(&nhge
->upper_bound
, upper_bound
);
911 static void remove_nh_grp_entry(struct net
*net
, struct nh_grp_entry
*nhge
,
912 struct nl_info
*nlinfo
)
914 struct nh_grp_entry
*nhges
, *new_nhges
;
915 struct nexthop
*nhp
= nhge
->nh_parent
;
916 struct netlink_ext_ack extack
;
917 struct nexthop
*nh
= nhge
->nh
;
918 struct nh_group
*nhg
, *newg
;
923 nhg
= rtnl_dereference(nhp
->nh_grp
);
926 /* last entry, keep it visible and remove the parent */
927 if (nhg
->num_nh
== 1) {
928 remove_nexthop(net
, nhp
, nlinfo
);
932 newg
->has_v4
= false;
933 newg
->mpath
= nhg
->mpath
;
934 newg
->fdb_nh
= nhg
->fdb_nh
;
935 newg
->num_nh
= nhg
->num_nh
;
937 /* copy old entries to new except the one getting removed */
938 nhges
= nhg
->nh_entries
;
939 new_nhges
= newg
->nh_entries
;
940 for (i
= 0, j
= 0; i
< nhg
->num_nh
; ++i
) {
943 /* current nexthop getting removed */
944 if (nhg
->nh_entries
[i
].nh
== nh
) {
949 nhi
= rtnl_dereference(nhges
[i
].nh
->nh_info
);
950 if (nhi
->family
== AF_INET
)
953 list_del(&nhges
[i
].nh_list
);
954 new_nhges
[j
].nh_parent
= nhges
[i
].nh_parent
;
955 new_nhges
[j
].nh
= nhges
[i
].nh
;
956 new_nhges
[j
].weight
= nhges
[i
].weight
;
957 list_add(&new_nhges
[j
].nh_list
, &new_nhges
[j
].nh
->grp_list
);
961 nh_group_rebalance(newg
);
962 rcu_assign_pointer(nhp
->nh_grp
, newg
);
964 list_del(&nhge
->nh_list
);
965 nexthop_put(nhge
->nh
);
967 err
= call_nexthop_notifiers(net
, NEXTHOP_EVENT_REPLACE
, nhp
, &extack
);
969 pr_err("%s\n", extack
._msg
);
972 nexthop_notify(RTM_NEWNEXTHOP
, nhp
, nlinfo
);
975 static void remove_nexthop_from_groups(struct net
*net
, struct nexthop
*nh
,
976 struct nl_info
*nlinfo
)
978 struct nh_grp_entry
*nhge
, *tmp
;
980 list_for_each_entry_safe(nhge
, tmp
, &nh
->grp_list
, nh_list
)
981 remove_nh_grp_entry(net
, nhge
, nlinfo
);
983 /* make sure all see the newly published array before releasing rtnl */
987 static void remove_nexthop_group(struct nexthop
*nh
, struct nl_info
*nlinfo
)
989 struct nh_group
*nhg
= rcu_dereference_rtnl(nh
->nh_grp
);
990 int i
, num_nh
= nhg
->num_nh
;
992 for (i
= 0; i
< num_nh
; ++i
) {
993 struct nh_grp_entry
*nhge
= &nhg
->nh_entries
[i
];
995 if (WARN_ON(!nhge
->nh
))
998 list_del_init(&nhge
->nh_list
);
1002 /* not called for nexthop replace */
1003 static void __remove_nexthop_fib(struct net
*net
, struct nexthop
*nh
)
1005 struct fib6_info
*f6i
, *tmp
;
1006 bool do_flush
= false;
1007 struct fib_info
*fi
;
1009 list_for_each_entry(fi
, &nh
->fi_list
, nh_list
) {
1010 fi
->fib_flags
|= RTNH_F_DEAD
;
1016 /* ip6_del_rt removes the entry from this list hence the _safe */
1017 list_for_each_entry_safe(f6i
, tmp
, &nh
->f6i_list
, nh_list
) {
1018 /* __ip6_del_rt does a release, so do a hold here */
1019 fib6_info_hold(f6i
);
1020 ipv6_stub
->ip6_del_rt(net
, f6i
,
1021 !net
->ipv4
.sysctl_nexthop_compat_mode
);
1025 static void __remove_nexthop(struct net
*net
, struct nexthop
*nh
,
1026 struct nl_info
*nlinfo
)
1028 __remove_nexthop_fib(net
, nh
);
1031 remove_nexthop_group(nh
, nlinfo
);
1033 struct nh_info
*nhi
;
1035 nhi
= rtnl_dereference(nh
->nh_info
);
1036 if (nhi
->fib_nhc
.nhc_dev
)
1037 hlist_del(&nhi
->dev_hash
);
1039 remove_nexthop_from_groups(net
, nh
, nlinfo
);
1043 static void remove_nexthop(struct net
*net
, struct nexthop
*nh
,
1044 struct nl_info
*nlinfo
)
1046 call_nexthop_notifiers(net
, NEXTHOP_EVENT_DEL
, nh
, NULL
);
1048 /* remove from the tree */
1049 rb_erase(&nh
->rb_node
, &net
->nexthop
.rb_root
);
1052 nexthop_notify(RTM_DELNEXTHOP
, nh
, nlinfo
);
1054 __remove_nexthop(net
, nh
, nlinfo
);
1055 nh_base_seq_inc(net
);
1060 /* if any FIB entries reference this nexthop, any dst entries
1061 * need to be regenerated
1063 static void nh_rt_cache_flush(struct net
*net
, struct nexthop
*nh
)
1065 struct fib6_info
*f6i
;
1067 if (!list_empty(&nh
->fi_list
))
1068 rt_cache_flush(net
);
1070 list_for_each_entry(f6i
, &nh
->f6i_list
, nh_list
)
1071 ipv6_stub
->fib6_update_sernum(net
, f6i
);
1074 static int replace_nexthop_grp(struct net
*net
, struct nexthop
*old
,
1075 struct nexthop
*new,
1076 struct netlink_ext_ack
*extack
)
1078 struct nh_group
*oldg
, *newg
;
1081 if (!new->is_group
) {
1082 NL_SET_ERR_MSG(extack
, "Can not replace a nexthop group with a nexthop.");
1086 err
= call_nexthop_notifiers(net
, NEXTHOP_EVENT_REPLACE
, new, extack
);
1090 oldg
= rtnl_dereference(old
->nh_grp
);
1091 newg
= rtnl_dereference(new->nh_grp
);
1093 /* update parents - used by nexthop code for cleanup */
1094 for (i
= 0; i
< newg
->num_nh
; i
++)
1095 newg
->nh_entries
[i
].nh_parent
= old
;
1097 rcu_assign_pointer(old
->nh_grp
, newg
);
1099 for (i
= 0; i
< oldg
->num_nh
; i
++)
1100 oldg
->nh_entries
[i
].nh_parent
= new;
1102 rcu_assign_pointer(new->nh_grp
, oldg
);
1107 static void nh_group_v4_update(struct nh_group
*nhg
)
1109 struct nh_grp_entry
*nhges
;
1110 bool has_v4
= false;
1113 nhges
= nhg
->nh_entries
;
1114 for (i
= 0; i
< nhg
->num_nh
; i
++) {
1115 struct nh_info
*nhi
;
1117 nhi
= rtnl_dereference(nhges
[i
].nh
->nh_info
);
1118 if (nhi
->family
== AF_INET
)
1121 nhg
->has_v4
= has_v4
;
1124 static int replace_nexthop_single(struct net
*net
, struct nexthop
*old
,
1125 struct nexthop
*new,
1126 struct netlink_ext_ack
*extack
)
1128 u8 old_protocol
, old_nh_flags
;
1129 struct nh_info
*oldi
, *newi
;
1130 struct nh_grp_entry
*nhge
;
1133 if (new->is_group
) {
1134 NL_SET_ERR_MSG(extack
, "Can not replace a nexthop with a nexthop group.");
1138 err
= call_nexthop_notifiers(net
, NEXTHOP_EVENT_REPLACE
, new, extack
);
1142 /* Hardware flags were set on 'old' as 'new' is not in the red-black
1143 * tree. Therefore, inherit the flags from 'old' to 'new'.
1145 new->nh_flags
|= old
->nh_flags
& (RTNH_F_OFFLOAD
| RTNH_F_TRAP
);
1147 oldi
= rtnl_dereference(old
->nh_info
);
1148 newi
= rtnl_dereference(new->nh_info
);
1150 newi
->nh_parent
= old
;
1151 oldi
->nh_parent
= new;
1153 old_protocol
= old
->protocol
;
1154 old_nh_flags
= old
->nh_flags
;
1156 old
->protocol
= new->protocol
;
1157 old
->nh_flags
= new->nh_flags
;
1159 rcu_assign_pointer(old
->nh_info
, newi
);
1160 rcu_assign_pointer(new->nh_info
, oldi
);
1162 /* Send a replace notification for all the groups using the nexthop. */
1163 list_for_each_entry(nhge
, &old
->grp_list
, nh_list
) {
1164 struct nexthop
*nhp
= nhge
->nh_parent
;
1166 err
= call_nexthop_notifiers(net
, NEXTHOP_EVENT_REPLACE
, nhp
,
1172 /* When replacing an IPv4 nexthop with an IPv6 nexthop, potentially
1173 * update IPv4 indication in all the groups using the nexthop.
1175 if (oldi
->family
== AF_INET
&& newi
->family
== AF_INET6
) {
1176 list_for_each_entry(nhge
, &old
->grp_list
, nh_list
) {
1177 struct nexthop
*nhp
= nhge
->nh_parent
;
1178 struct nh_group
*nhg
;
1180 nhg
= rtnl_dereference(nhp
->nh_grp
);
1181 nh_group_v4_update(nhg
);
1188 rcu_assign_pointer(new->nh_info
, newi
);
1189 rcu_assign_pointer(old
->nh_info
, oldi
);
1190 old
->nh_flags
= old_nh_flags
;
1191 old
->protocol
= old_protocol
;
1192 oldi
->nh_parent
= old
;
1193 newi
->nh_parent
= new;
1194 list_for_each_entry_continue_reverse(nhge
, &old
->grp_list
, nh_list
) {
1195 struct nexthop
*nhp
= nhge
->nh_parent
;
1197 call_nexthop_notifiers(net
, NEXTHOP_EVENT_REPLACE
, nhp
, extack
);
1199 call_nexthop_notifiers(net
, NEXTHOP_EVENT_REPLACE
, old
, extack
);
1203 static void __nexthop_replace_notify(struct net
*net
, struct nexthop
*nh
,
1204 struct nl_info
*info
)
1206 struct fib6_info
*f6i
;
1208 if (!list_empty(&nh
->fi_list
)) {
1209 struct fib_info
*fi
;
1211 /* expectation is a few fib_info per nexthop and then
1212 * a lot of routes per fib_info. So mark the fib_info
1213 * and then walk the fib tables once
1215 list_for_each_entry(fi
, &nh
->fi_list
, nh_list
)
1216 fi
->nh_updated
= true;
1218 fib_info_notify_update(net
, info
);
1220 list_for_each_entry(fi
, &nh
->fi_list
, nh_list
)
1221 fi
->nh_updated
= false;
1224 list_for_each_entry(f6i
, &nh
->f6i_list
, nh_list
)
1225 ipv6_stub
->fib6_rt_update(net
, f6i
, info
);
1228 /* send RTM_NEWROUTE with REPLACE flag set for all FIB entries
1229 * linked to this nexthop and for all groups that the nexthop
1232 static void nexthop_replace_notify(struct net
*net
, struct nexthop
*nh
,
1233 struct nl_info
*info
)
1235 struct nh_grp_entry
*nhge
;
1237 __nexthop_replace_notify(net
, nh
, info
);
1239 list_for_each_entry(nhge
, &nh
->grp_list
, nh_list
)
1240 __nexthop_replace_notify(net
, nhge
->nh_parent
, info
);
1243 static int replace_nexthop(struct net
*net
, struct nexthop
*old
,
1244 struct nexthop
*new, struct netlink_ext_ack
*extack
)
1246 bool new_is_reject
= false;
1247 struct nh_grp_entry
*nhge
;
1250 /* check that existing FIB entries are ok with the
1251 * new nexthop definition
1253 err
= fib_check_nh_list(old
, new, extack
);
1257 err
= fib6_check_nh_list(old
, new, extack
);
1261 if (!new->is_group
) {
1262 struct nh_info
*nhi
= rtnl_dereference(new->nh_info
);
1264 new_is_reject
= nhi
->reject_nh
;
1267 list_for_each_entry(nhge
, &old
->grp_list
, nh_list
) {
1268 /* if new nexthop is a blackhole, any groups using this
1269 * nexthop cannot have more than 1 path
1271 if (new_is_reject
&&
1272 nexthop_num_path(nhge
->nh_parent
) > 1) {
1273 NL_SET_ERR_MSG(extack
, "Blackhole nexthop can not be a member of a group with more than one path");
1277 err
= fib_check_nh_list(nhge
->nh_parent
, new, extack
);
1281 err
= fib6_check_nh_list(nhge
->nh_parent
, new, extack
);
1287 err
= replace_nexthop_grp(net
, old
, new, extack
);
1289 err
= replace_nexthop_single(net
, old
, new, extack
);
1292 nh_rt_cache_flush(net
, old
);
1294 __remove_nexthop(net
, new, NULL
);
1301 /* called with rtnl_lock held */
1302 static int insert_nexthop(struct net
*net
, struct nexthop
*new_nh
,
1303 struct nh_config
*cfg
, struct netlink_ext_ack
*extack
)
1305 struct rb_node
**pp
, *parent
= NULL
, *next
;
1306 struct rb_root
*root
= &net
->nexthop
.rb_root
;
1307 bool replace
= !!(cfg
->nlflags
& NLM_F_REPLACE
);
1308 bool create
= !!(cfg
->nlflags
& NLM_F_CREATE
);
1309 u32 new_id
= new_nh
->id
;
1310 int replace_notify
= 0;
1313 pp
= &root
->rb_node
;
1323 nh
= rb_entry(parent
, struct nexthop
, rb_node
);
1324 if (new_id
< nh
->id
) {
1325 pp
= &next
->rb_left
;
1326 } else if (new_id
> nh
->id
) {
1327 pp
= &next
->rb_right
;
1328 } else if (replace
) {
1329 rc
= replace_nexthop(net
, nh
, new_nh
, extack
);
1331 new_nh
= nh
; /* send notification with old nh */
1336 /* id already exists and not a replace */
1341 if (replace
&& !create
) {
1342 NL_SET_ERR_MSG(extack
, "Replace specified without create and no entry exists");
1347 rb_link_node_rcu(&new_nh
->rb_node
, parent
, pp
);
1348 rb_insert_color(&new_nh
->rb_node
, root
);
1350 rc
= call_nexthop_notifiers(net
, NEXTHOP_EVENT_REPLACE
, new_nh
, extack
);
1352 rb_erase(&new_nh
->rb_node
, &net
->nexthop
.rb_root
);
1356 nh_base_seq_inc(net
);
1357 nexthop_notify(RTM_NEWNEXTHOP
, new_nh
, &cfg
->nlinfo
);
1358 if (replace_notify
&& net
->ipv4
.sysctl_nexthop_compat_mode
)
1359 nexthop_replace_notify(net
, new_nh
, &cfg
->nlinfo
);
1366 /* remove all nexthops tied to a device being deleted */
1367 static void nexthop_flush_dev(struct net_device
*dev
)
1369 unsigned int hash
= nh_dev_hashfn(dev
->ifindex
);
1370 struct net
*net
= dev_net(dev
);
1371 struct hlist_head
*head
= &net
->nexthop
.devhash
[hash
];
1372 struct hlist_node
*n
;
1373 struct nh_info
*nhi
;
1375 hlist_for_each_entry_safe(nhi
, n
, head
, dev_hash
) {
1376 if (nhi
->fib_nhc
.nhc_dev
!= dev
)
1379 remove_nexthop(net
, nhi
->nh_parent
, NULL
);
1383 /* rtnl; called when net namespace is deleted */
1384 static void flush_all_nexthops(struct net
*net
)
1386 struct rb_root
*root
= &net
->nexthop
.rb_root
;
1387 struct rb_node
*node
;
1390 while ((node
= rb_first(root
))) {
1391 nh
= rb_entry(node
, struct nexthop
, rb_node
);
1392 remove_nexthop(net
, nh
, NULL
);
1397 static struct nexthop
*nexthop_create_group(struct net
*net
,
1398 struct nh_config
*cfg
)
1400 struct nlattr
*grps_attr
= cfg
->nh_grp
;
1401 struct nexthop_grp
*entry
= nla_data(grps_attr
);
1402 u16 num_nh
= nla_len(grps_attr
) / sizeof(*entry
);
1403 struct nh_group
*nhg
;
1407 if (WARN_ON(!num_nh
))
1408 return ERR_PTR(-EINVAL
);
1410 nh
= nexthop_alloc();
1412 return ERR_PTR(-ENOMEM
);
1416 nhg
= nexthop_grp_alloc(num_nh
);
1419 return ERR_PTR(-ENOMEM
);
1422 /* spare group used for removals */
1423 nhg
->spare
= nexthop_grp_alloc(num_nh
);
1427 return ERR_PTR(-ENOMEM
);
1429 nhg
->spare
->spare
= nhg
;
1431 for (i
= 0; i
< nhg
->num_nh
; ++i
) {
1432 struct nexthop
*nhe
;
1433 struct nh_info
*nhi
;
1435 nhe
= nexthop_find_by_id(net
, entry
[i
].id
);
1436 if (!nexthop_get(nhe
))
1439 nhi
= rtnl_dereference(nhe
->nh_info
);
1440 if (nhi
->family
== AF_INET
)
1443 nhg
->nh_entries
[i
].nh
= nhe
;
1444 nhg
->nh_entries
[i
].weight
= entry
[i
].weight
+ 1;
1445 list_add(&nhg
->nh_entries
[i
].nh_list
, &nhe
->grp_list
);
1446 nhg
->nh_entries
[i
].nh_parent
= nh
;
1449 if (cfg
->nh_grp_type
== NEXTHOP_GRP_TYPE_MPATH
) {
1451 nh_group_rebalance(nhg
);
1457 rcu_assign_pointer(nh
->nh_grp
, nhg
);
1463 nexthop_put(nhg
->nh_entries
[i
].nh
);
1469 return ERR_PTR(-ENOENT
);
1472 static int nh_create_ipv4(struct net
*net
, struct nexthop
*nh
,
1473 struct nh_info
*nhi
, struct nh_config
*cfg
,
1474 struct netlink_ext_ack
*extack
)
1476 struct fib_nh
*fib_nh
= &nhi
->fib_nh
;
1477 struct fib_config fib_cfg
= {
1478 .fc_oif
= cfg
->nh_ifindex
,
1479 .fc_gw4
= cfg
->gw
.ipv4
,
1480 .fc_gw_family
= cfg
->gw
.ipv4
? AF_INET
: 0,
1481 .fc_flags
= cfg
->nh_flags
,
1482 .fc_encap
= cfg
->nh_encap
,
1483 .fc_encap_type
= cfg
->nh_encap_type
,
1485 u32 tb_id
= (cfg
->dev
? l3mdev_fib_table(cfg
->dev
) : RT_TABLE_MAIN
);
1488 err
= fib_nh_init(net
, fib_nh
, &fib_cfg
, 1, extack
);
1490 fib_nh_release(net
, fib_nh
);
1497 /* sets nh_dev if successful */
1498 err
= fib_check_nh(net
, fib_nh
, tb_id
, 0, extack
);
1500 nh
->nh_flags
= fib_nh
->fib_nh_flags
;
1501 fib_info_update_nhc_saddr(net
, &fib_nh
->nh_common
,
1502 fib_nh
->fib_nh_scope
);
1504 fib_nh_release(net
, fib_nh
);
1510 static int nh_create_ipv6(struct net
*net
, struct nexthop
*nh
,
1511 struct nh_info
*nhi
, struct nh_config
*cfg
,
1512 struct netlink_ext_ack
*extack
)
1514 struct fib6_nh
*fib6_nh
= &nhi
->fib6_nh
;
1515 struct fib6_config fib6_cfg
= {
1516 .fc_table
= l3mdev_fib_table(cfg
->dev
),
1517 .fc_ifindex
= cfg
->nh_ifindex
,
1518 .fc_gateway
= cfg
->gw
.ipv6
,
1519 .fc_flags
= cfg
->nh_flags
,
1520 .fc_encap
= cfg
->nh_encap
,
1521 .fc_encap_type
= cfg
->nh_encap_type
,
1522 .fc_is_fdb
= cfg
->nh_fdb
,
1526 if (!ipv6_addr_any(&cfg
->gw
.ipv6
))
1527 fib6_cfg
.fc_flags
|= RTF_GATEWAY
;
1529 /* sets nh_dev if successful */
1530 err
= ipv6_stub
->fib6_nh_init(net
, fib6_nh
, &fib6_cfg
, GFP_KERNEL
,
1533 ipv6_stub
->fib6_nh_release(fib6_nh
);
1535 nh
->nh_flags
= fib6_nh
->fib_nh_flags
;
1540 static struct nexthop
*nexthop_create(struct net
*net
, struct nh_config
*cfg
,
1541 struct netlink_ext_ack
*extack
)
1543 struct nh_info
*nhi
;
1547 nh
= nexthop_alloc();
1549 return ERR_PTR(-ENOMEM
);
1551 nhi
= kzalloc(sizeof(*nhi
), GFP_KERNEL
);
1554 return ERR_PTR(-ENOMEM
);
1557 nh
->nh_flags
= cfg
->nh_flags
;
1560 nhi
->nh_parent
= nh
;
1561 nhi
->family
= cfg
->nh_family
;
1562 nhi
->fib_nhc
.nhc_scope
= RT_SCOPE_LINK
;
1567 if (cfg
->nh_blackhole
) {
1569 cfg
->nh_ifindex
= net
->loopback_dev
->ifindex
;
1572 switch (cfg
->nh_family
) {
1574 err
= nh_create_ipv4(net
, nh
, nhi
, cfg
, extack
);
1577 err
= nh_create_ipv6(net
, nh
, nhi
, cfg
, extack
);
1584 return ERR_PTR(err
);
1587 /* add the entry to the device based hash */
1589 nexthop_devhash_add(net
, nhi
);
1591 rcu_assign_pointer(nh
->nh_info
, nhi
);
1596 /* called with rtnl lock held */
1597 static struct nexthop
*nexthop_add(struct net
*net
, struct nh_config
*cfg
,
1598 struct netlink_ext_ack
*extack
)
1603 if (cfg
->nlflags
& NLM_F_REPLACE
&& !cfg
->nh_id
) {
1604 NL_SET_ERR_MSG(extack
, "Replace requires nexthop id");
1605 return ERR_PTR(-EINVAL
);
1609 cfg
->nh_id
= nh_find_unused_id(net
);
1611 NL_SET_ERR_MSG(extack
, "No unused id");
1612 return ERR_PTR(-EINVAL
);
1617 nh
= nexthop_create_group(net
, cfg
);
1619 nh
= nexthop_create(net
, cfg
, extack
);
1624 refcount_set(&nh
->refcnt
, 1);
1625 nh
->id
= cfg
->nh_id
;
1626 nh
->protocol
= cfg
->nh_protocol
;
1629 err
= insert_nexthop(net
, nh
, cfg
, extack
);
1631 __remove_nexthop(net
, nh
, NULL
);
1639 static int rtm_to_nh_config(struct net
*net
, struct sk_buff
*skb
,
1640 struct nlmsghdr
*nlh
, struct nh_config
*cfg
,
1641 struct netlink_ext_ack
*extack
)
1643 struct nhmsg
*nhm
= nlmsg_data(nlh
);
1644 struct nlattr
*tb
[NHA_MAX
+ 1];
1647 err
= nlmsg_parse(nlh
, sizeof(*nhm
), tb
, NHA_MAX
, rtm_nh_policy
,
1653 if (nhm
->resvd
|| nhm
->nh_scope
) {
1654 NL_SET_ERR_MSG(extack
, "Invalid values in ancillary header");
1657 if (nhm
->nh_flags
& ~NEXTHOP_VALID_USER_FLAGS
) {
1658 NL_SET_ERR_MSG(extack
, "Invalid nexthop flags in ancillary header");
1662 switch (nhm
->nh_family
) {
1671 NL_SET_ERR_MSG(extack
, "Invalid address family");
1675 if (tb
[NHA_GROUPS
] || tb
[NHA_MASTER
]) {
1676 NL_SET_ERR_MSG(extack
, "Invalid attributes in request");
1680 memset(cfg
, 0, sizeof(*cfg
));
1681 cfg
->nlflags
= nlh
->nlmsg_flags
;
1682 cfg
->nlinfo
.portid
= NETLINK_CB(skb
).portid
;
1683 cfg
->nlinfo
.nlh
= nlh
;
1684 cfg
->nlinfo
.nl_net
= net
;
1686 cfg
->nh_family
= nhm
->nh_family
;
1687 cfg
->nh_protocol
= nhm
->nh_protocol
;
1688 cfg
->nh_flags
= nhm
->nh_flags
;
1691 cfg
->nh_id
= nla_get_u32(tb
[NHA_ID
]);
1694 if (tb
[NHA_OIF
] || tb
[NHA_BLACKHOLE
] ||
1695 tb
[NHA_ENCAP
] || tb
[NHA_ENCAP_TYPE
]) {
1696 NL_SET_ERR_MSG(extack
, "Fdb attribute can not be used with encap, oif or blackhole");
1699 if (nhm
->nh_flags
) {
1700 NL_SET_ERR_MSG(extack
, "Unsupported nexthop flags in ancillary header");
1703 cfg
->nh_fdb
= nla_get_flag(tb
[NHA_FDB
]);
1706 if (tb
[NHA_GROUP
]) {
1707 if (nhm
->nh_family
!= AF_UNSPEC
) {
1708 NL_SET_ERR_MSG(extack
, "Invalid family for group");
1711 cfg
->nh_grp
= tb
[NHA_GROUP
];
1713 cfg
->nh_grp_type
= NEXTHOP_GRP_TYPE_MPATH
;
1714 if (tb
[NHA_GROUP_TYPE
])
1715 cfg
->nh_grp_type
= nla_get_u16(tb
[NHA_GROUP_TYPE
]);
1717 if (cfg
->nh_grp_type
> NEXTHOP_GRP_TYPE_MAX
) {
1718 NL_SET_ERR_MSG(extack
, "Invalid group type");
1721 err
= nh_check_attr_group(net
, tb
, extack
);
1723 /* no other attributes should be set */
1727 if (tb
[NHA_BLACKHOLE
]) {
1728 if (tb
[NHA_GATEWAY
] || tb
[NHA_OIF
] ||
1729 tb
[NHA_ENCAP
] || tb
[NHA_ENCAP_TYPE
] || tb
[NHA_FDB
]) {
1730 NL_SET_ERR_MSG(extack
, "Blackhole attribute can not be used with gateway, oif, encap or fdb");
1734 cfg
->nh_blackhole
= 1;
1739 if (!cfg
->nh_fdb
&& !tb
[NHA_OIF
]) {
1740 NL_SET_ERR_MSG(extack
, "Device attribute required for non-blackhole and non-fdb nexthops");
1744 if (!cfg
->nh_fdb
&& tb
[NHA_OIF
]) {
1745 cfg
->nh_ifindex
= nla_get_u32(tb
[NHA_OIF
]);
1746 if (cfg
->nh_ifindex
)
1747 cfg
->dev
= __dev_get_by_index(net
, cfg
->nh_ifindex
);
1750 NL_SET_ERR_MSG(extack
, "Invalid device index");
1752 } else if (!(cfg
->dev
->flags
& IFF_UP
)) {
1753 NL_SET_ERR_MSG(extack
, "Nexthop device is not up");
1756 } else if (!netif_carrier_ok(cfg
->dev
)) {
1757 NL_SET_ERR_MSG(extack
, "Carrier for nexthop device is down");
1764 if (tb
[NHA_GATEWAY
]) {
1765 struct nlattr
*gwa
= tb
[NHA_GATEWAY
];
1767 switch (cfg
->nh_family
) {
1769 if (nla_len(gwa
) != sizeof(u32
)) {
1770 NL_SET_ERR_MSG(extack
, "Invalid gateway");
1773 cfg
->gw
.ipv4
= nla_get_be32(gwa
);
1776 if (nla_len(gwa
) != sizeof(struct in6_addr
)) {
1777 NL_SET_ERR_MSG(extack
, "Invalid gateway");
1780 cfg
->gw
.ipv6
= nla_get_in6_addr(gwa
);
1783 NL_SET_ERR_MSG(extack
,
1784 "Unknown address family for gateway");
1788 /* device only nexthop (no gateway) */
1789 if (cfg
->nh_flags
& RTNH_F_ONLINK
) {
1790 NL_SET_ERR_MSG(extack
,
1791 "ONLINK flag can not be set for nexthop without a gateway");
1796 if (tb
[NHA_ENCAP
]) {
1797 cfg
->nh_encap
= tb
[NHA_ENCAP
];
1799 if (!tb
[NHA_ENCAP_TYPE
]) {
1800 NL_SET_ERR_MSG(extack
, "LWT encapsulation type is missing");
1804 cfg
->nh_encap_type
= nla_get_u16(tb
[NHA_ENCAP_TYPE
]);
1805 err
= lwtunnel_valid_encap_type(cfg
->nh_encap_type
, extack
);
1809 } else if (tb
[NHA_ENCAP_TYPE
]) {
1810 NL_SET_ERR_MSG(extack
, "LWT encapsulation attribute is missing");
1821 static int rtm_new_nexthop(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
1822 struct netlink_ext_ack
*extack
)
1824 struct net
*net
= sock_net(skb
->sk
);
1825 struct nh_config cfg
;
1829 err
= rtm_to_nh_config(net
, skb
, nlh
, &cfg
, extack
);
1831 nh
= nexthop_add(net
, &cfg
, extack
);
1839 static int nh_valid_get_del_req(struct nlmsghdr
*nlh
, u32
*id
,
1840 struct netlink_ext_ack
*extack
)
1842 struct nhmsg
*nhm
= nlmsg_data(nlh
);
1843 struct nlattr
*tb
[NHA_MAX
+ 1];
1846 err
= nlmsg_parse(nlh
, sizeof(*nhm
), tb
, NHA_MAX
, rtm_nh_policy
,
1852 for (i
= 0; i
< __NHA_MAX
; ++i
) {
1860 NL_SET_ERR_MSG_ATTR(extack
, tb
[i
],
1861 "Unexpected attribute in request");
1865 if (nhm
->nh_protocol
|| nhm
->resvd
|| nhm
->nh_scope
|| nhm
->nh_flags
) {
1866 NL_SET_ERR_MSG(extack
, "Invalid values in header");
1871 NL_SET_ERR_MSG(extack
, "Nexthop id is missing");
1875 *id
= nla_get_u32(tb
[NHA_ID
]);
1877 NL_SET_ERR_MSG(extack
, "Invalid nexthop id");
1885 static int rtm_del_nexthop(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
1886 struct netlink_ext_ack
*extack
)
1888 struct net
*net
= sock_net(skb
->sk
);
1889 struct nl_info nlinfo
= {
1892 .portid
= NETLINK_CB(skb
).portid
,
1898 err
= nh_valid_get_del_req(nlh
, &id
, extack
);
1902 nh
= nexthop_find_by_id(net
, id
);
1906 remove_nexthop(net
, nh
, &nlinfo
);
1912 static int rtm_get_nexthop(struct sk_buff
*in_skb
, struct nlmsghdr
*nlh
,
1913 struct netlink_ext_ack
*extack
)
1915 struct net
*net
= sock_net(in_skb
->sk
);
1916 struct sk_buff
*skb
= NULL
;
1921 err
= nh_valid_get_del_req(nlh
, &id
, extack
);
1926 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
1931 nh
= nexthop_find_by_id(net
, id
);
1935 err
= nh_fill_node(skb
, nh
, RTM_NEWNEXTHOP
, NETLINK_CB(in_skb
).portid
,
1938 WARN_ON(err
== -EMSGSIZE
);
1942 err
= rtnl_unicast(skb
, net
, NETLINK_CB(in_skb
).portid
);
1950 static bool nh_dump_filtered(struct nexthop
*nh
, int dev_idx
, int master_idx
,
1951 bool group_filter
, u8 family
)
1953 const struct net_device
*dev
;
1954 const struct nh_info
*nhi
;
1956 if (group_filter
&& !nh
->is_group
)
1959 if (!dev_idx
&& !master_idx
&& !family
)
1965 nhi
= rtnl_dereference(nh
->nh_info
);
1966 if (family
&& nhi
->family
!= family
)
1969 dev
= nhi
->fib_nhc
.nhc_dev
;
1970 if (dev_idx
&& (!dev
|| dev
->ifindex
!= dev_idx
))
1974 struct net_device
*master
;
1979 master
= netdev_master_upper_dev_get((struct net_device
*)dev
);
1980 if (!master
|| master
->ifindex
!= master_idx
)
1987 static int nh_valid_dump_req(const struct nlmsghdr
*nlh
, int *dev_idx
,
1988 int *master_idx
, bool *group_filter
,
1989 bool *fdb_filter
, struct netlink_callback
*cb
)
1991 struct netlink_ext_ack
*extack
= cb
->extack
;
1992 struct nlattr
*tb
[NHA_MAX
+ 1];
1997 err
= nlmsg_parse(nlh
, sizeof(*nhm
), tb
, NHA_MAX
, rtm_nh_policy
,
2002 for (i
= 0; i
<= NHA_MAX
; ++i
) {
2008 idx
= nla_get_u32(tb
[i
]);
2009 if (idx
> INT_MAX
) {
2010 NL_SET_ERR_MSG(extack
, "Invalid device index");
2016 idx
= nla_get_u32(tb
[i
]);
2017 if (idx
> INT_MAX
) {
2018 NL_SET_ERR_MSG(extack
, "Invalid master device index");
2024 *group_filter
= true;
2030 NL_SET_ERR_MSG(extack
, "Unsupported attribute in dump request");
2035 nhm
= nlmsg_data(nlh
);
2036 if (nhm
->nh_protocol
|| nhm
->resvd
|| nhm
->nh_scope
|| nhm
->nh_flags
) {
2037 NL_SET_ERR_MSG(extack
, "Invalid values in header for nexthop dump request");
2045 static int rtm_dump_nexthop(struct sk_buff
*skb
, struct netlink_callback
*cb
)
2047 bool group_filter
= false, fdb_filter
= false;
2048 struct nhmsg
*nhm
= nlmsg_data(cb
->nlh
);
2049 int dev_filter_idx
= 0, master_idx
= 0;
2050 struct net
*net
= sock_net(skb
->sk
);
2051 struct rb_root
*root
= &net
->nexthop
.rb_root
;
2052 struct rb_node
*node
;
2056 err
= nh_valid_dump_req(cb
->nlh
, &dev_filter_idx
, &master_idx
,
2057 &group_filter
, &fdb_filter
, cb
);
2061 s_idx
= cb
->args
[0];
2062 for (node
= rb_first(root
); node
; node
= rb_next(node
)) {
2068 nh
= rb_entry(node
, struct nexthop
, rb_node
);
2069 if (nh_dump_filtered(nh
, dev_filter_idx
, master_idx
,
2070 group_filter
, nhm
->nh_family
))
2073 err
= nh_fill_node(skb
, nh
, RTM_NEWNEXTHOP
,
2074 NETLINK_CB(cb
->skb
).portid
,
2075 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
);
2077 if (likely(skb
->len
))
2090 cb
->seq
= net
->nexthop
.seq
;
2091 nl_dump_check_consistent(cb
, nlmsg_hdr(skb
));
2096 static void nexthop_sync_mtu(struct net_device
*dev
, u32 orig_mtu
)
2098 unsigned int hash
= nh_dev_hashfn(dev
->ifindex
);
2099 struct net
*net
= dev_net(dev
);
2100 struct hlist_head
*head
= &net
->nexthop
.devhash
[hash
];
2101 struct hlist_node
*n
;
2102 struct nh_info
*nhi
;
2104 hlist_for_each_entry_safe(nhi
, n
, head
, dev_hash
) {
2105 if (nhi
->fib_nhc
.nhc_dev
== dev
) {
2106 if (nhi
->family
== AF_INET
)
2107 fib_nhc_update_mtu(&nhi
->fib_nhc
, dev
->mtu
,
2114 static int nh_netdev_event(struct notifier_block
*this,
2115 unsigned long event
, void *ptr
)
2117 struct net_device
*dev
= netdev_notifier_info_to_dev(ptr
);
2118 struct netdev_notifier_info_ext
*info_ext
;
2122 case NETDEV_UNREGISTER
:
2123 nexthop_flush_dev(dev
);
2126 if (!(dev_get_flags(dev
) & (IFF_RUNNING
| IFF_LOWER_UP
)))
2127 nexthop_flush_dev(dev
);
2129 case NETDEV_CHANGEMTU
:
2131 nexthop_sync_mtu(dev
, info_ext
->ext
.mtu
);
2132 rt_cache_flush(dev_net(dev
));
2138 static struct notifier_block nh_netdev_notifier
= {
2139 .notifier_call
= nh_netdev_event
,
2142 static int nexthops_dump(struct net
*net
, struct notifier_block
*nb
,
2143 struct netlink_ext_ack
*extack
)
2145 struct rb_root
*root
= &net
->nexthop
.rb_root
;
2146 struct rb_node
*node
;
2149 for (node
= rb_first(root
); node
; node
= rb_next(node
)) {
2152 nh
= rb_entry(node
, struct nexthop
, rb_node
);
2153 err
= call_nexthop_notifier(nb
, net
, NEXTHOP_EVENT_REPLACE
, nh
,
2162 int register_nexthop_notifier(struct net
*net
, struct notifier_block
*nb
,
2163 struct netlink_ext_ack
*extack
)
2168 err
= nexthops_dump(net
, nb
, extack
);
2171 err
= blocking_notifier_chain_register(&net
->nexthop
.notifier_chain
,
2177 EXPORT_SYMBOL(register_nexthop_notifier
);
2179 int unregister_nexthop_notifier(struct net
*net
, struct notifier_block
*nb
)
2181 return blocking_notifier_chain_unregister(&net
->nexthop
.notifier_chain
,
2184 EXPORT_SYMBOL(unregister_nexthop_notifier
);
2186 void nexthop_set_hw_flags(struct net
*net
, u32 id
, bool offload
, bool trap
)
2188 struct nexthop
*nexthop
;
2192 nexthop
= nexthop_find_by_id(net
, id
);
2196 nexthop
->nh_flags
&= ~(RTNH_F_OFFLOAD
| RTNH_F_TRAP
);
2198 nexthop
->nh_flags
|= RTNH_F_OFFLOAD
;
2200 nexthop
->nh_flags
|= RTNH_F_TRAP
;
2205 EXPORT_SYMBOL(nexthop_set_hw_flags
);
2207 static void __net_exit
nexthop_net_exit(struct net
*net
)
2210 flush_all_nexthops(net
);
2212 kfree(net
->nexthop
.devhash
);
2215 static int __net_init
nexthop_net_init(struct net
*net
)
2217 size_t sz
= sizeof(struct hlist_head
) * NH_DEV_HASHSIZE
;
2219 net
->nexthop
.rb_root
= RB_ROOT
;
2220 net
->nexthop
.devhash
= kzalloc(sz
, GFP_KERNEL
);
2221 if (!net
->nexthop
.devhash
)
2223 BLOCKING_INIT_NOTIFIER_HEAD(&net
->nexthop
.notifier_chain
);
2228 static struct pernet_operations nexthop_net_ops
= {
2229 .init
= nexthop_net_init
,
2230 .exit
= nexthop_net_exit
,
2233 static int __init
nexthop_init(void)
2235 register_pernet_subsys(&nexthop_net_ops
);
2237 register_netdevice_notifier(&nh_netdev_notifier
);
2239 rtnl_register(PF_UNSPEC
, RTM_NEWNEXTHOP
, rtm_new_nexthop
, NULL
, 0);
2240 rtnl_register(PF_UNSPEC
, RTM_DELNEXTHOP
, rtm_del_nexthop
, NULL
, 0);
2241 rtnl_register(PF_UNSPEC
, RTM_GETNEXTHOP
, rtm_get_nexthop
,
2242 rtm_dump_nexthop
, 0);
2244 rtnl_register(PF_INET
, RTM_NEWNEXTHOP
, rtm_new_nexthop
, NULL
, 0);
2245 rtnl_register(PF_INET
, RTM_GETNEXTHOP
, NULL
, rtm_dump_nexthop
, 0);
2247 rtnl_register(PF_INET6
, RTM_NEWNEXTHOP
, rtm_new_nexthop
, NULL
, 0);
2248 rtnl_register(PF_INET6
, RTM_GETNEXTHOP
, NULL
, rtm_dump_nexthop
, 0);
2252 subsys_initcall(nexthop_init
);