1 // SPDX-License-Identifier: GPL-2.0
3 * Management Component Transport Protocol (MCTP) - routing
6 * This is currently based on a simple routing table, with no dst cache. The
7 * number of routes should stay fairly small, so the lookup cost is small.
9 * Copyright (c) 2021 Code Construct
10 * Copyright (c) 2021 Google
13 #include <linux/idr.h>
14 #include <linux/kconfig.h>
15 #include <linux/mctp.h>
16 #include <linux/netdevice.h>
17 #include <linux/rtnetlink.h>
18 #include <linux/skbuff.h>
20 #include <uapi/linux/if_arp.h>
23 #include <net/mctpdevice.h>
24 #include <net/netlink.h>
27 #include <trace/events/mctp.h>
29 static const unsigned int mctp_message_maxlen
= 64 * 1024;
30 static const unsigned long mctp_key_lifetime
= 6 * CONFIG_HZ
;
32 static void mctp_flow_prepare_output(struct sk_buff
*skb
, struct mctp_dev
*dev
);
34 /* route output callbacks */
35 static int mctp_route_discard(struct mctp_route
*route
, struct sk_buff
*skb
)
41 static struct mctp_sock
*mctp_lookup_bind(struct net
*net
, struct sk_buff
*skb
)
43 struct mctp_skb_cb
*cb
= mctp_cb(skb
);
48 WARN_ON(!rcu_read_lock_held());
50 /* TODO: look up in skb->cb? */
53 if (!skb_headlen(skb
))
56 type
= (*(u8
*)skb
->data
) & 0x7f;
58 sk_for_each_rcu(sk
, &net
->mctp
.binds
) {
59 struct mctp_sock
*msk
= container_of(sk
, struct mctp_sock
, sk
);
61 if (msk
->bind_net
!= MCTP_NET_ANY
&& msk
->bind_net
!= cb
->net
)
64 if (msk
->bind_type
!= type
)
67 if (!mctp_address_matches(msk
->bind_addr
, mh
->dest
))
76 /* A note on the key allocations.
78 * struct net->mctp.keys contains our set of currently-allocated keys for
79 * MCTP tag management. The lookup tuple for these is the peer EID,
80 * local EID and MCTP tag.
82 * In some cases, the peer EID may be MCTP_EID_ANY: for example, when a
83 * broadcast message is sent, we may receive responses from any peer EID.
84 * Because the broadcast dest address is equivalent to ANY, we create
85 * a key with (local = local-eid, peer = ANY). This allows a match on the
86 * incoming broadcast responses from any peer.
88 * We perform lookups when packets are received, and when tags are allocated
91 * - when a packet is sent, with a locally-owned tag: we need to find an
92 * unused tag value for the (local, peer) EID pair.
94 * - when a tag is manually allocated: we need to find an unused tag value
95 * for the peer EID, but don't have a specific local EID at that stage.
97 * in the latter case, on successful allocation, we end up with a tag with
98 * (local = ANY, peer = peer-eid).
100 * So, the key set allows both a local EID of ANY, as well as a peer EID of
101 * ANY in the lookup tuple. Both may be ANY if we prealloc for a broadcast.
102 * The matching (in mctp_key_match()) during lookup allows the match value to
103 * be ANY in either the dest or source addresses.
105 * When allocating (+ inserting) a tag, we need to check for conflicts amongst
106 * the existing tag set. This requires macthing either exactly on the local
107 * and peer addresses, or either being ANY.
110 static bool mctp_key_match(struct mctp_sk_key
*key
, unsigned int net
,
111 mctp_eid_t local
, mctp_eid_t peer
, u8 tag
)
116 if (!mctp_address_matches(key
->local_addr
, local
))
119 if (!mctp_address_matches(key
->peer_addr
, peer
))
128 /* returns a key (with key->lock held, and refcounted), or NULL if no such
131 static struct mctp_sk_key
*mctp_lookup_key(struct net
*net
, struct sk_buff
*skb
,
132 unsigned int netid
, mctp_eid_t peer
,
133 unsigned long *irqflags
)
134 __acquires(&key
->lock
)
136 struct mctp_sk_key
*key
, *ret
;
142 tag
= mh
->flags_seq_tag
& (MCTP_HDR_TAG_MASK
| MCTP_HDR_FLAG_TO
);
145 spin_lock_irqsave(&net
->mctp
.keys_lock
, flags
);
147 hlist_for_each_entry(key
, &net
->mctp
.keys
, hlist
) {
148 if (!mctp_key_match(key
, netid
, mh
->dest
, peer
, tag
))
151 spin_lock(&key
->lock
);
153 refcount_inc(&key
->refs
);
157 spin_unlock(&key
->lock
);
161 spin_unlock(&net
->mctp
.keys_lock
);
164 spin_unlock_irqrestore(&net
->mctp
.keys_lock
, flags
);
170 static struct mctp_sk_key
*mctp_key_alloc(struct mctp_sock
*msk
,
172 mctp_eid_t local
, mctp_eid_t peer
,
175 struct mctp_sk_key
*key
;
177 key
= kzalloc(sizeof(*key
), gfp
);
182 key
->peer_addr
= peer
;
183 key
->local_addr
= local
;
187 spin_lock_init(&key
->lock
);
188 refcount_set(&key
->refs
, 1);
194 void mctp_key_unref(struct mctp_sk_key
*key
)
198 if (!refcount_dec_and_test(&key
->refs
))
201 /* even though no refs exist here, the lock allows us to stay
202 * consistent with the locking requirement of mctp_dev_release_key
204 spin_lock_irqsave(&key
->lock
, flags
);
205 mctp_dev_release_key(key
->dev
, key
);
206 spin_unlock_irqrestore(&key
->lock
, flags
);
212 static int mctp_key_add(struct mctp_sk_key
*key
, struct mctp_sock
*msk
)
214 struct net
*net
= sock_net(&msk
->sk
);
215 struct mctp_sk_key
*tmp
;
219 spin_lock_irqsave(&net
->mctp
.keys_lock
, flags
);
221 if (sock_flag(&msk
->sk
, SOCK_DEAD
)) {
226 hlist_for_each_entry(tmp
, &net
->mctp
.keys
, hlist
) {
227 if (mctp_key_match(tmp
, key
->net
, key
->local_addr
,
228 key
->peer_addr
, key
->tag
)) {
229 spin_lock(&tmp
->lock
);
232 spin_unlock(&tmp
->lock
);
239 refcount_inc(&key
->refs
);
240 key
->expiry
= jiffies
+ mctp_key_lifetime
;
241 timer_reduce(&msk
->key_expiry
, key
->expiry
);
243 hlist_add_head(&key
->hlist
, &net
->mctp
.keys
);
244 hlist_add_head(&key
->sklist
, &msk
->keys
);
248 spin_unlock_irqrestore(&net
->mctp
.keys_lock
, flags
);
253 /* Helper for mctp_route_input().
254 * We're done with the key; unlock and unref the key.
255 * For the usual case of automatic expiry we remove the key from lists.
256 * In the case that manual allocation is set on a key we release the lock
257 * and local ref, reset reassembly, but don't remove from lists.
259 static void __mctp_key_done_in(struct mctp_sk_key
*key
, struct net
*net
,
260 unsigned long flags
, unsigned long reason
)
261 __releases(&key
->lock
)
265 trace_mctp_key_release(key
, reason
);
266 skb
= key
->reasm_head
;
267 key
->reasm_head
= NULL
;
269 if (!key
->manual_alloc
) {
270 key
->reasm_dead
= true;
272 mctp_dev_release_key(key
->dev
, key
);
274 spin_unlock_irqrestore(&key
->lock
, flags
);
276 if (!key
->manual_alloc
) {
277 spin_lock_irqsave(&net
->mctp
.keys_lock
, flags
);
278 if (!hlist_unhashed(&key
->hlist
)) {
279 hlist_del_init(&key
->hlist
);
280 hlist_del_init(&key
->sklist
);
283 spin_unlock_irqrestore(&net
->mctp
.keys_lock
, flags
);
286 /* and one for the local reference */
292 #ifdef CONFIG_MCTP_FLOWS
293 static void mctp_skb_set_flow(struct sk_buff
*skb
, struct mctp_sk_key
*key
)
295 struct mctp_flow
*flow
;
297 flow
= skb_ext_add(skb
, SKB_EXT_MCTP
);
301 refcount_inc(&key
->refs
);
305 static void mctp_flow_prepare_output(struct sk_buff
*skb
, struct mctp_dev
*dev
)
307 struct mctp_sk_key
*key
;
308 struct mctp_flow
*flow
;
310 flow
= skb_ext_find(skb
, SKB_EXT_MCTP
);
316 if (WARN_ON(key
->dev
&& key
->dev
!= dev
))
319 mctp_dev_set_key(dev
, key
);
322 static void mctp_skb_set_flow(struct sk_buff
*skb
, struct mctp_sk_key
*key
) {}
323 static void mctp_flow_prepare_output(struct sk_buff
*skb
, struct mctp_dev
*dev
) {}
326 static int mctp_frag_queue(struct mctp_sk_key
*key
, struct sk_buff
*skb
)
328 struct mctp_hdr
*hdr
= mctp_hdr(skb
);
329 u8 exp_seq
, this_seq
;
331 this_seq
= (hdr
->flags_seq_tag
>> MCTP_HDR_SEQ_SHIFT
)
334 if (!key
->reasm_head
) {
335 key
->reasm_head
= skb
;
336 key
->reasm_tailp
= &(skb_shinfo(skb
)->frag_list
);
337 key
->last_seq
= this_seq
;
341 exp_seq
= (key
->last_seq
+ 1) & MCTP_HDR_SEQ_MASK
;
343 if (this_seq
!= exp_seq
)
346 if (key
->reasm_head
->len
+ skb
->len
> mctp_message_maxlen
)
351 *key
->reasm_tailp
= skb
;
352 key
->reasm_tailp
= &skb
->next
;
354 key
->last_seq
= this_seq
;
356 key
->reasm_head
->data_len
+= skb
->len
;
357 key
->reasm_head
->len
+= skb
->len
;
358 key
->reasm_head
->truesize
+= skb
->truesize
;
363 static int mctp_route_input(struct mctp_route
*route
, struct sk_buff
*skb
)
365 struct mctp_sk_key
*key
, *any_key
= NULL
;
366 struct net
*net
= dev_net(skb
->dev
);
367 struct mctp_sock
*msk
;
377 /* we may be receiving a locally-routed packet; drop source sk
382 /* ensure we have enough data for a header and a type */
383 if (skb
->len
< sizeof(struct mctp_hdr
) + 1)
386 /* grab header, advance data ptr */
388 netid
= mctp_cb(skb
)->net
;
389 skb_pull(skb
, sizeof(struct mctp_hdr
));
394 flags
= mh
->flags_seq_tag
& (MCTP_HDR_FLAG_SOM
| MCTP_HDR_FLAG_EOM
);
395 tag
= mh
->flags_seq_tag
& (MCTP_HDR_TAG_MASK
| MCTP_HDR_FLAG_TO
);
399 /* lookup socket / reasm context, exactly matching (src,dest,tag).
400 * we hold a ref on the key, and key->lock held.
402 key
= mctp_lookup_key(net
, skb
, netid
, mh
->src
, &f
);
404 if (flags
& MCTP_HDR_FLAG_SOM
) {
406 msk
= container_of(key
->sk
, struct mctp_sock
, sk
);
408 /* first response to a broadcast? do a more general
409 * key lookup to find the socket, but don't use this
410 * key for reassembly - we'll create a more specific
411 * one for future packets if required (ie, !EOM).
413 * this lookup requires key->peer to be MCTP_ADDR_ANY,
414 * it doesn't match just any key->peer.
416 any_key
= mctp_lookup_key(net
, skb
, netid
,
419 msk
= container_of(any_key
->sk
,
420 struct mctp_sock
, sk
);
421 spin_unlock_irqrestore(&any_key
->lock
, f
);
425 if (!key
&& !msk
&& (tag
& MCTP_HDR_FLAG_TO
))
426 msk
= mctp_lookup_bind(net
, skb
);
433 /* single-packet message? deliver to socket, clean up any
436 if (flags
& MCTP_HDR_FLAG_EOM
) {
437 sock_queue_rcv_skb(&msk
->sk
, skb
);
439 /* we've hit a pending reassembly; not much we
442 __mctp_key_done_in(key
, net
, f
,
443 MCTP_TRACE_KEY_REPLIED
);
450 /* broadcast response or a bind() - create a key for further
451 * packets for this message
454 key
= mctp_key_alloc(msk
, netid
, mh
->dest
, mh
->src
,
461 /* we can queue without the key lock here, as the
462 * key isn't observable yet
464 mctp_frag_queue(key
, skb
);
466 /* if the key_add fails, we've raced with another
467 * SOM packet with the same src, dest and tag. There's
468 * no way to distinguish future packets, so all we
469 * can do is drop; we'll free the skb on exit from
472 rc
= mctp_key_add(key
, msk
);
474 trace_mctp_key_acquire(key
);
476 /* we don't need to release key->lock on exit, so
477 * clean up here and suppress the unlock via
484 if (key
->reasm_head
|| key
->reasm_dead
) {
485 /* duplicate start? drop everything */
486 __mctp_key_done_in(key
, net
, f
,
487 MCTP_TRACE_KEY_INVALIDATED
);
491 rc
= mctp_frag_queue(key
, skb
);
496 /* this packet continues a previous message; reassemble
497 * using the message-specific key
500 /* we need to be continuing an existing reassembly... */
501 if (!key
->reasm_head
)
504 rc
= mctp_frag_queue(key
, skb
);
506 /* end of message? deliver to socket, and we're done with
507 * the reassembly/response key
509 if (!rc
&& flags
& MCTP_HDR_FLAG_EOM
) {
510 sock_queue_rcv_skb(key
->sk
, key
->reasm_head
);
511 key
->reasm_head
= NULL
;
512 __mctp_key_done_in(key
, net
, f
, MCTP_TRACE_KEY_REPLIED
);
517 /* not a start, no matching key */
524 spin_unlock_irqrestore(&key
->lock
, f
);
528 mctp_key_unref(any_key
);
535 static unsigned int mctp_route_mtu(struct mctp_route
*rt
)
537 return rt
->mtu
?: READ_ONCE(rt
->dev
->dev
->mtu
);
540 static int mctp_route_output(struct mctp_route
*route
, struct sk_buff
*skb
)
542 struct mctp_skb_cb
*cb
= mctp_cb(skb
);
543 struct mctp_hdr
*hdr
= mctp_hdr(skb
);
544 char daddr_buf
[MAX_ADDR_LEN
];
549 skb
->protocol
= htons(ETH_P_MCTP
);
551 mtu
= READ_ONCE(skb
->dev
->mtu
);
552 if (skb
->len
> mtu
) {
558 /* direct route; use the hwaddr we stashed in sendmsg */
559 if (cb
->halen
!= skb
->dev
->addr_len
) {
560 /* sanity check, sendmsg should have already caught this */
566 /* If lookup fails let the device handle daddr==NULL */
567 if (mctp_neigh_lookup(route
->dev
, hdr
->dest
, daddr_buf
) == 0)
571 rc
= dev_hard_header(skb
, skb
->dev
, ntohs(skb
->protocol
),
572 daddr
, skb
->dev
->dev_addr
, skb
->len
);
575 return -EHOSTUNREACH
;
578 mctp_flow_prepare_output(skb
, route
->dev
);
580 rc
= dev_queue_xmit(skb
);
582 rc
= net_xmit_errno(rc
);
587 /* route alloc/release */
588 static void mctp_route_release(struct mctp_route
*rt
)
590 if (refcount_dec_and_test(&rt
->refs
)) {
591 mctp_dev_put(rt
->dev
);
596 /* returns a route with the refcount at 1 */
597 static struct mctp_route
*mctp_route_alloc(void)
599 struct mctp_route
*rt
;
601 rt
= kzalloc(sizeof(*rt
), GFP_KERNEL
);
605 INIT_LIST_HEAD(&rt
->list
);
606 refcount_set(&rt
->refs
, 1);
607 rt
->output
= mctp_route_discard
;
612 unsigned int mctp_default_net(struct net
*net
)
614 return READ_ONCE(net
->mctp
.default_net
);
617 int mctp_default_net_set(struct net
*net
, unsigned int index
)
621 WRITE_ONCE(net
->mctp
.default_net
, index
);
626 static void mctp_reserve_tag(struct net
*net
, struct mctp_sk_key
*key
,
627 struct mctp_sock
*msk
)
629 struct netns_mctp
*mns
= &net
->mctp
;
631 lockdep_assert_held(&mns
->keys_lock
);
633 key
->expiry
= jiffies
+ mctp_key_lifetime
;
634 timer_reduce(&msk
->key_expiry
, key
->expiry
);
636 /* we hold the net->key_lock here, allowing updates to both
639 hlist_add_head_rcu(&key
->hlist
, &mns
->keys
);
640 hlist_add_head_rcu(&key
->sklist
, &msk
->keys
);
641 refcount_inc(&key
->refs
);
644 /* Allocate a locally-owned tag value for (local, peer), and reserve
645 * it for the socket msk
647 struct mctp_sk_key
*mctp_alloc_local_tag(struct mctp_sock
*msk
,
649 mctp_eid_t local
, mctp_eid_t peer
,
650 bool manual
, u8
*tagp
)
652 struct net
*net
= sock_net(&msk
->sk
);
653 struct netns_mctp
*mns
= &net
->mctp
;
654 struct mctp_sk_key
*key
, *tmp
;
658 /* for NULL destination EIDs, we may get a response from any peer */
659 if (peer
== MCTP_ADDR_NULL
)
660 peer
= MCTP_ADDR_ANY
;
662 /* be optimistic, alloc now */
663 key
= mctp_key_alloc(msk
, netid
, local
, peer
, 0, GFP_KERNEL
);
665 return ERR_PTR(-ENOMEM
);
667 /* 8 possible tag values */
670 spin_lock_irqsave(&mns
->keys_lock
, flags
);
672 /* Walk through the existing keys, looking for potential conflicting
673 * tags. If we find a conflict, clear that bit from tagbits
675 hlist_for_each_entry(tmp
, &mns
->keys
, hlist
) {
676 /* We can check the lookup fields (*_addr, tag) without the
677 * lock held, they don't change over the lifetime of the key.
680 /* tags are net-specific */
681 if (tmp
->net
!= netid
)
684 /* if we don't own the tag, it can't conflict */
685 if (tmp
->tag
& MCTP_HDR_FLAG_TO
)
688 /* Since we're avoiding conflicting entries, match peer and
689 * local addresses, including with a wildcard on ANY. See
690 * 'A note on key allocations' for background.
692 if (peer
!= MCTP_ADDR_ANY
&&
693 !mctp_address_matches(tmp
->peer_addr
, peer
))
696 if (local
!= MCTP_ADDR_ANY
&&
697 !mctp_address_matches(tmp
->local_addr
, local
))
700 spin_lock(&tmp
->lock
);
701 /* key must still be valid. If we find a match, clear the
702 * potential tag value
705 tagbits
&= ~(1 << tmp
->tag
);
706 spin_unlock(&tmp
->lock
);
713 key
->tag
= __ffs(tagbits
);
714 mctp_reserve_tag(net
, key
, msk
);
715 trace_mctp_key_acquire(key
);
717 key
->manual_alloc
= manual
;
721 spin_unlock_irqrestore(&mns
->keys_lock
, flags
);
725 return ERR_PTR(-EBUSY
);
731 static struct mctp_sk_key
*mctp_lookup_prealloc_tag(struct mctp_sock
*msk
,
734 u8 req_tag
, u8
*tagp
)
736 struct net
*net
= sock_net(&msk
->sk
);
737 struct netns_mctp
*mns
= &net
->mctp
;
738 struct mctp_sk_key
*key
, *tmp
;
741 req_tag
&= ~(MCTP_TAG_PREALLOC
| MCTP_TAG_OWNER
);
744 spin_lock_irqsave(&mns
->keys_lock
, flags
);
746 hlist_for_each_entry(tmp
, &mns
->keys
, hlist
) {
747 if (tmp
->net
!= netid
)
750 if (tmp
->tag
!= req_tag
)
753 if (!mctp_address_matches(tmp
->peer_addr
, daddr
))
756 if (!tmp
->manual_alloc
)
759 spin_lock(&tmp
->lock
);
762 refcount_inc(&key
->refs
);
763 spin_unlock(&tmp
->lock
);
766 spin_unlock(&tmp
->lock
);
768 spin_unlock_irqrestore(&mns
->keys_lock
, flags
);
771 return ERR_PTR(-ENOENT
);
779 /* routing lookups */
780 static bool mctp_rt_match_eid(struct mctp_route
*rt
,
781 unsigned int net
, mctp_eid_t eid
)
783 return READ_ONCE(rt
->dev
->net
) == net
&&
784 rt
->min
<= eid
&& rt
->max
>= eid
;
787 /* compares match, used for duplicate prevention */
788 static bool mctp_rt_compare_exact(struct mctp_route
*rt1
,
789 struct mctp_route
*rt2
)
792 return rt1
->dev
->net
== rt2
->dev
->net
&&
793 rt1
->min
== rt2
->min
&&
794 rt1
->max
== rt2
->max
;
797 struct mctp_route
*mctp_route_lookup(struct net
*net
, unsigned int dnet
,
800 struct mctp_route
*tmp
, *rt
= NULL
;
804 list_for_each_entry_rcu(tmp
, &net
->mctp
.routes
, list
) {
805 /* TODO: add metrics */
806 if (mctp_rt_match_eid(tmp
, dnet
, daddr
)) {
807 if (refcount_inc_not_zero(&tmp
->refs
)) {
819 static struct mctp_route
*mctp_route_lookup_null(struct net
*net
,
820 struct net_device
*dev
)
822 struct mctp_route
*tmp
, *rt
= NULL
;
826 list_for_each_entry_rcu(tmp
, &net
->mctp
.routes
, list
) {
827 if (tmp
->dev
->dev
== dev
&& tmp
->type
== RTN_LOCAL
&&
828 refcount_inc_not_zero(&tmp
->refs
)) {
839 static int mctp_do_fragment_route(struct mctp_route
*rt
, struct sk_buff
*skb
,
840 unsigned int mtu
, u8 tag
)
842 const unsigned int hlen
= sizeof(struct mctp_hdr
);
843 struct mctp_hdr
*hdr
, *hdr2
;
844 unsigned int pos
, size
, headroom
;
845 struct sk_buff
*skb2
;
853 if (mtu
< hlen
+ 1) {
858 /* keep same headroom as the original skb */
859 headroom
= skb_headroom(skb
);
861 /* we've got the header */
864 for (pos
= 0; pos
< skb
->len
;) {
865 /* size of message payload */
866 size
= min(mtu
- hlen
, skb
->len
- pos
);
868 skb2
= alloc_skb(headroom
+ hlen
+ size
, GFP_KERNEL
);
874 /* generic skb copy */
875 skb2
->protocol
= skb
->protocol
;
876 skb2
->priority
= skb
->priority
;
877 skb2
->dev
= skb
->dev
;
878 memcpy(skb2
->cb
, skb
->cb
, sizeof(skb2
->cb
));
881 skb_set_owner_w(skb2
, skb
->sk
);
883 /* establish packet */
884 skb_reserve(skb2
, headroom
);
885 skb_reset_network_header(skb2
);
886 skb_put(skb2
, hlen
+ size
);
887 skb2
->transport_header
= skb2
->network_header
+ hlen
;
889 /* copy header fields, calculate SOM/EOM flags & seq */
890 hdr2
= mctp_hdr(skb2
);
891 hdr2
->ver
= hdr
->ver
;
892 hdr2
->dest
= hdr
->dest
;
893 hdr2
->src
= hdr
->src
;
894 hdr2
->flags_seq_tag
= tag
&
895 (MCTP_HDR_TAG_MASK
| MCTP_HDR_FLAG_TO
);
898 hdr2
->flags_seq_tag
|= MCTP_HDR_FLAG_SOM
;
900 if (pos
+ size
== skb
->len
)
901 hdr2
->flags_seq_tag
|= MCTP_HDR_FLAG_EOM
;
903 hdr2
->flags_seq_tag
|= seq
<< MCTP_HDR_SEQ_SHIFT
;
905 /* copy message payload */
906 skb_copy_bits(skb
, pos
, skb_transport_header(skb2
), size
);
908 /* we need to copy the extensions, for MCTP flow data */
909 skb_ext_copy(skb2
, skb
);
912 rc
= rt
->output(rt
, skb2
);
916 seq
= (seq
+ 1) & MCTP_HDR_SEQ_MASK
;
924 int mctp_local_output(struct sock
*sk
, struct mctp_route
*rt
,
925 struct sk_buff
*skb
, mctp_eid_t daddr
, u8 req_tag
)
927 struct mctp_sock
*msk
= container_of(sk
, struct mctp_sock
, sk
);
928 struct mctp_skb_cb
*cb
= mctp_cb(skb
);
929 struct mctp_route tmp_rt
= {0};
930 struct mctp_sk_key
*key
;
931 struct mctp_hdr
*hdr
;
944 if (WARN_ON(!rt
->dev
))
947 } else if (cb
->ifindex
) {
948 struct net_device
*dev
;
954 dev
= dev_get_by_index_rcu(sock_net(sk
), cb
->ifindex
);
959 rt
->dev
= __mctp_dev_get(dev
);
965 /* establish temporary route - we set up enough to keep
966 * mctp_route_output happy
968 rt
->output
= mctp_route_output
;
976 spin_lock_irqsave(&rt
->dev
->addrs_lock
, flags
);
977 if (rt
->dev
->num_addrs
== 0) {
980 /* use the outbound interface's first address as our source */
981 saddr
= rt
->dev
->addrs
[0];
984 spin_unlock_irqrestore(&rt
->dev
->addrs_lock
, flags
);
985 netid
= READ_ONCE(rt
->dev
->net
);
990 if (req_tag
& MCTP_TAG_OWNER
) {
991 if (req_tag
& MCTP_TAG_PREALLOC
)
992 key
= mctp_lookup_prealloc_tag(msk
, netid
, daddr
,
995 key
= mctp_alloc_local_tag(msk
, netid
, saddr
, daddr
,
1002 mctp_skb_set_flow(skb
, key
);
1003 /* done with the key in this scope */
1004 mctp_key_unref(key
);
1005 tag
|= MCTP_HDR_FLAG_TO
;
1008 tag
= req_tag
& MCTP_TAG_MASK
;
1011 skb
->protocol
= htons(ETH_P_MCTP
);
1013 skb_reset_transport_header(skb
);
1014 skb_push(skb
, sizeof(struct mctp_hdr
));
1015 skb_reset_network_header(skb
);
1016 skb
->dev
= rt
->dev
->dev
;
1018 /* cb->net will have been set on initial ingress */
1021 /* set up common header fields */
1022 hdr
= mctp_hdr(skb
);
1027 mtu
= mctp_route_mtu(rt
);
1029 if (skb
->len
+ sizeof(struct mctp_hdr
) <= mtu
) {
1030 hdr
->flags_seq_tag
= MCTP_HDR_FLAG_SOM
|
1031 MCTP_HDR_FLAG_EOM
| tag
;
1032 rc
= rt
->output(rt
, skb
);
1034 rc
= mctp_do_fragment_route(rt
, skb
, mtu
, tag
);
1037 /* route output functions consume the skb, even on error */
1042 mctp_route_release(rt
);
1044 mctp_dev_put(tmp_rt
.dev
);
1051 /* route management */
1052 static int mctp_route_add(struct mctp_dev
*mdev
, mctp_eid_t daddr_start
,
1053 unsigned int daddr_extent
, unsigned int mtu
,
1056 int (*rtfn
)(struct mctp_route
*rt
, struct sk_buff
*skb
);
1057 struct net
*net
= dev_net(mdev
->dev
);
1058 struct mctp_route
*rt
, *ert
;
1060 if (!mctp_address_unicast(daddr_start
))
1063 if (daddr_extent
> 0xff || daddr_start
+ daddr_extent
>= 255)
1068 rtfn
= mctp_route_input
;
1071 rtfn
= mctp_route_output
;
1077 rt
= mctp_route_alloc();
1081 rt
->min
= daddr_start
;
1082 rt
->max
= daddr_start
+ daddr_extent
;
1085 mctp_dev_hold(rt
->dev
);
1090 /* Prevent duplicate identical routes. */
1091 list_for_each_entry(ert
, &net
->mctp
.routes
, list
) {
1092 if (mctp_rt_compare_exact(rt
, ert
)) {
1093 mctp_route_release(rt
);
1098 list_add_rcu(&rt
->list
, &net
->mctp
.routes
);
1103 static int mctp_route_remove(struct mctp_dev
*mdev
, mctp_eid_t daddr_start
,
1104 unsigned int daddr_extent
, unsigned char type
)
1106 struct net
*net
= dev_net(mdev
->dev
);
1107 struct mctp_route
*rt
, *tmp
;
1108 mctp_eid_t daddr_end
;
1111 if (daddr_extent
> 0xff || daddr_start
+ daddr_extent
>= 255)
1114 daddr_end
= daddr_start
+ daddr_extent
;
1119 list_for_each_entry_safe(rt
, tmp
, &net
->mctp
.routes
, list
) {
1120 if (rt
->dev
== mdev
&&
1121 rt
->min
== daddr_start
&& rt
->max
== daddr_end
&&
1123 list_del_rcu(&rt
->list
);
1124 /* TODO: immediate RTM_DELROUTE */
1125 mctp_route_release(rt
);
1130 return dropped
? 0 : -ENOENT
;
1133 int mctp_route_add_local(struct mctp_dev
*mdev
, mctp_eid_t addr
)
1135 return mctp_route_add(mdev
, addr
, 0, 0, RTN_LOCAL
);
1138 int mctp_route_remove_local(struct mctp_dev
*mdev
, mctp_eid_t addr
)
1140 return mctp_route_remove(mdev
, addr
, 0, RTN_LOCAL
);
1143 /* removes all entries for a given device */
1144 void mctp_route_remove_dev(struct mctp_dev
*mdev
)
1146 struct net
*net
= dev_net(mdev
->dev
);
1147 struct mctp_route
*rt
, *tmp
;
1150 list_for_each_entry_safe(rt
, tmp
, &net
->mctp
.routes
, list
) {
1151 if (rt
->dev
== mdev
) {
1152 list_del_rcu(&rt
->list
);
1153 /* TODO: immediate RTM_DELROUTE */
1154 mctp_route_release(rt
);
1159 /* Incoming packet-handling */
1161 static int mctp_pkttype_receive(struct sk_buff
*skb
, struct net_device
*dev
,
1162 struct packet_type
*pt
,
1163 struct net_device
*orig_dev
)
1165 struct net
*net
= dev_net(dev
);
1166 struct mctp_dev
*mdev
;
1167 struct mctp_skb_cb
*cb
;
1168 struct mctp_route
*rt
;
1169 struct mctp_hdr
*mh
;
1172 mdev
= __mctp_dev_get(dev
);
1175 /* basic non-data sanity checks */
1179 if (!pskb_may_pull(skb
, sizeof(struct mctp_hdr
)))
1182 skb_reset_transport_header(skb
);
1183 skb_reset_network_header(skb
);
1185 /* We have enough for a header; decode and route */
1187 if (mh
->ver
< MCTP_VER_MIN
|| mh
->ver
> MCTP_VER_MAX
)
1190 /* source must be valid unicast or null; drop reserved ranges and
1193 if (!(mctp_address_unicast(mh
->src
) || mctp_address_null(mh
->src
)))
1196 /* dest address: as above, but allow broadcast */
1197 if (!(mctp_address_unicast(mh
->dest
) || mctp_address_null(mh
->dest
) ||
1198 mctp_address_broadcast(mh
->dest
)))
1201 /* MCTP drivers must populate halen/haddr */
1202 if (dev
->type
== ARPHRD_MCTP
) {
1205 cb
= __mctp_cb(skb
);
1208 cb
->net
= READ_ONCE(mdev
->net
);
1209 cb
->ifindex
= dev
->ifindex
;
1211 rt
= mctp_route_lookup(net
, cb
->net
, mh
->dest
);
1213 /* NULL EID, but addressed to our physical address */
1214 if (!rt
&& mh
->dest
== MCTP_ADDR_NULL
&& skb
->pkt_type
== PACKET_HOST
)
1215 rt
= mctp_route_lookup_null(net
, dev
);
1220 rt
->output(rt
, skb
);
1221 mctp_route_release(rt
);
1224 return NET_RX_SUCCESS
;
1232 static struct packet_type mctp_packet_type
= {
1233 .type
= cpu_to_be16(ETH_P_MCTP
),
1234 .func
= mctp_pkttype_receive
,
1237 /* netlink interface */
1239 static const struct nla_policy rta_mctp_policy
[RTA_MAX
+ 1] = {
1240 [RTA_DST
] = { .type
= NLA_U8
},
1241 [RTA_METRICS
] = { .type
= NLA_NESTED
},
1242 [RTA_OIF
] = { .type
= NLA_U32
},
1245 /* Common part for RTM_NEWROUTE and RTM_DELROUTE parsing.
1246 * tb must hold RTA_MAX+1 elements.
1248 static int mctp_route_nlparse(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
1249 struct netlink_ext_ack
*extack
,
1250 struct nlattr
**tb
, struct rtmsg
**rtm
,
1251 struct mctp_dev
**mdev
, mctp_eid_t
*daddr_start
)
1253 struct net
*net
= sock_net(skb
->sk
);
1254 struct net_device
*dev
;
1255 unsigned int ifindex
;
1258 rc
= nlmsg_parse(nlh
, sizeof(struct rtmsg
), tb
, RTA_MAX
,
1259 rta_mctp_policy
, extack
);
1261 NL_SET_ERR_MSG(extack
, "incorrect format");
1266 NL_SET_ERR_MSG(extack
, "dst EID missing");
1269 *daddr_start
= nla_get_u8(tb
[RTA_DST
]);
1272 NL_SET_ERR_MSG(extack
, "ifindex missing");
1275 ifindex
= nla_get_u32(tb
[RTA_OIF
]);
1277 *rtm
= nlmsg_data(nlh
);
1278 if ((*rtm
)->rtm_family
!= AF_MCTP
) {
1279 NL_SET_ERR_MSG(extack
, "route family must be AF_MCTP");
1283 dev
= __dev_get_by_index(net
, ifindex
);
1285 NL_SET_ERR_MSG(extack
, "bad ifindex");
1288 *mdev
= mctp_dev_get_rtnl(dev
);
1292 if (dev
->flags
& IFF_LOOPBACK
) {
1293 NL_SET_ERR_MSG(extack
, "no routes to loopback");
1300 static const struct nla_policy rta_metrics_policy
[RTAX_MAX
+ 1] = {
1301 [RTAX_MTU
] = { .type
= NLA_U32
},
1304 static int mctp_newroute(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
1305 struct netlink_ext_ack
*extack
)
1307 struct nlattr
*tb
[RTA_MAX
+ 1];
1308 struct nlattr
*tbx
[RTAX_MAX
+ 1];
1309 mctp_eid_t daddr_start
;
1310 struct mctp_dev
*mdev
;
1315 rc
= mctp_route_nlparse(skb
, nlh
, extack
, tb
,
1316 &rtm
, &mdev
, &daddr_start
);
1320 if (rtm
->rtm_type
!= RTN_UNICAST
) {
1321 NL_SET_ERR_MSG(extack
, "rtm_type must be RTN_UNICAST");
1326 if (tb
[RTA_METRICS
]) {
1327 rc
= nla_parse_nested(tbx
, RTAX_MAX
, tb
[RTA_METRICS
],
1328 rta_metrics_policy
, NULL
);
1332 mtu
= nla_get_u32(tbx
[RTAX_MTU
]);
1335 rc
= mctp_route_add(mdev
, daddr_start
, rtm
->rtm_dst_len
, mtu
,
1340 static int mctp_delroute(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
1341 struct netlink_ext_ack
*extack
)
1343 struct nlattr
*tb
[RTA_MAX
+ 1];
1344 mctp_eid_t daddr_start
;
1345 struct mctp_dev
*mdev
;
1349 rc
= mctp_route_nlparse(skb
, nlh
, extack
, tb
,
1350 &rtm
, &mdev
, &daddr_start
);
1354 /* we only have unicast routes */
1355 if (rtm
->rtm_type
!= RTN_UNICAST
)
1358 rc
= mctp_route_remove(mdev
, daddr_start
, rtm
->rtm_dst_len
, RTN_UNICAST
);
1362 static int mctp_fill_rtinfo(struct sk_buff
*skb
, struct mctp_route
*rt
,
1363 u32 portid
, u32 seq
, int event
, unsigned int flags
)
1365 struct nlmsghdr
*nlh
;
1369 nlh
= nlmsg_put(skb
, portid
, seq
, event
, sizeof(*hdr
), flags
);
1373 hdr
= nlmsg_data(nlh
);
1374 hdr
->rtm_family
= AF_MCTP
;
1376 /* we use the _len fields as a number of EIDs, rather than
1377 * a number of bits in the address
1379 hdr
->rtm_dst_len
= rt
->max
- rt
->min
;
1380 hdr
->rtm_src_len
= 0;
1382 hdr
->rtm_table
= RT_TABLE_DEFAULT
;
1383 hdr
->rtm_protocol
= RTPROT_STATIC
; /* everything is user-defined */
1384 hdr
->rtm_scope
= RT_SCOPE_LINK
; /* TODO: scope in mctp_route? */
1385 hdr
->rtm_type
= rt
->type
;
1387 if (nla_put_u8(skb
, RTA_DST
, rt
->min
))
1390 metrics
= nla_nest_start_noflag(skb
, RTA_METRICS
);
1395 if (nla_put_u32(skb
, RTAX_MTU
, rt
->mtu
))
1399 nla_nest_end(skb
, metrics
);
1402 if (nla_put_u32(skb
, RTA_OIF
, rt
->dev
->dev
->ifindex
))
1406 /* TODO: conditional neighbour physaddr? */
1408 nlmsg_end(skb
, nlh
);
1413 nlmsg_cancel(skb
, nlh
);
1417 static int mctp_dump_rtinfo(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1419 struct net
*net
= sock_net(skb
->sk
);
1420 struct mctp_route
*rt
;
1423 /* TODO: allow filtering on route data, possibly under
1427 /* TODO: change to struct overlay */
1428 s_idx
= cb
->args
[0];
1432 list_for_each_entry_rcu(rt
, &net
->mctp
.routes
, list
) {
1435 if (mctp_fill_rtinfo(skb
, rt
,
1436 NETLINK_CB(cb
->skb
).portid
,
1438 RTM_NEWROUTE
, NLM_F_MULTI
) < 0)
1448 /* net namespace implementation */
1449 static int __net_init
mctp_routes_net_init(struct net
*net
)
1451 struct netns_mctp
*ns
= &net
->mctp
;
1453 INIT_LIST_HEAD(&ns
->routes
);
1454 INIT_HLIST_HEAD(&ns
->binds
);
1455 mutex_init(&ns
->bind_lock
);
1456 INIT_HLIST_HEAD(&ns
->keys
);
1457 spin_lock_init(&ns
->keys_lock
);
1458 WARN_ON(mctp_default_net_set(net
, MCTP_INITIAL_DEFAULT_NET
));
1462 static void __net_exit
mctp_routes_net_exit(struct net
*net
)
1464 struct mctp_route
*rt
;
1467 list_for_each_entry_rcu(rt
, &net
->mctp
.routes
, list
)
1468 mctp_route_release(rt
);
1472 static struct pernet_operations mctp_net_ops
= {
1473 .init
= mctp_routes_net_init
,
1474 .exit
= mctp_routes_net_exit
,
1477 static const struct rtnl_msg_handler mctp_route_rtnl_msg_handlers
[] = {
1478 {THIS_MODULE
, PF_MCTP
, RTM_NEWROUTE
, mctp_newroute
, NULL
, 0},
1479 {THIS_MODULE
, PF_MCTP
, RTM_DELROUTE
, mctp_delroute
, NULL
, 0},
1480 {THIS_MODULE
, PF_MCTP
, RTM_GETROUTE
, NULL
, mctp_dump_rtinfo
, 0},
1483 int __init
mctp_routes_init(void)
1487 dev_add_pack(&mctp_packet_type
);
1489 err
= register_pernet_subsys(&mctp_net_ops
);
1493 err
= rtnl_register_many(mctp_route_rtnl_msg_handlers
);
1500 unregister_pernet_subsys(&mctp_net_ops
);
1502 dev_remove_pack(&mctp_packet_type
);
1506 void mctp_routes_exit(void)
1508 rtnl_unregister_many(mctp_route_rtnl_msg_handlers
);
1509 unregister_pernet_subsys(&mctp_net_ops
);
1510 dev_remove_pack(&mctp_packet_type
);
1513 #if IS_ENABLED(CONFIG_MCTP_TEST)
1514 #include "test/route-test.c"