1 // SPDX-License-Identifier: GPL-2.0
3 * Management Component Transport Protocol (MCTP)
5 * Copyright (c) 2021 Code Construct
6 * Copyright (c) 2021 Google
9 #include <linux/compat.h>
10 #include <linux/if_arp.h>
11 #include <linux/net.h>
12 #include <linux/mctp.h>
13 #include <linux/module.h>
14 #include <linux/socket.h>
17 #include <net/mctpdevice.h>
20 #define CREATE_TRACE_POINTS
21 #include <trace/events/mctp.h>
23 /* socket implementation */
25 static void mctp_sk_expire_keys(struct timer_list
*timer
);
27 static int mctp_release(struct socket
*sock
)
29 struct sock
*sk
= sock
->sk
;
33 sk
->sk_prot
->close(sk
, 0);
39 /* Generic sockaddr checks, padding checks only so far */
40 static bool mctp_sockaddr_is_ok(const struct sockaddr_mctp
*addr
)
42 return !addr
->__smctp_pad0
&& !addr
->__smctp_pad1
;
45 static bool mctp_sockaddr_ext_is_ok(const struct sockaddr_mctp_ext
*addr
)
47 return !addr
->__smctp_pad0
[0] &&
48 !addr
->__smctp_pad0
[1] &&
49 !addr
->__smctp_pad0
[2];
52 static int mctp_bind(struct socket
*sock
, struct sockaddr
*addr
, int addrlen
)
54 struct sock
*sk
= sock
->sk
;
55 struct mctp_sock
*msk
= container_of(sk
, struct mctp_sock
, sk
);
56 struct sockaddr_mctp
*smctp
;
59 if (addrlen
< sizeof(*smctp
))
62 if (addr
->sa_family
!= AF_MCTP
)
65 if (!capable(CAP_NET_BIND_SERVICE
))
68 /* it's a valid sockaddr for MCTP, cast and do protocol checks */
69 smctp
= (struct sockaddr_mctp
*)addr
;
71 if (!mctp_sockaddr_is_ok(smctp
))
76 /* TODO: allow rebind */
81 msk
->bind_net
= smctp
->smctp_network
;
82 msk
->bind_addr
= smctp
->smctp_addr
.s_addr
;
83 msk
->bind_type
= smctp
->smctp_type
& 0x7f; /* ignore the IC bit */
85 rc
= sk
->sk_prot
->hash(sk
);
93 static int mctp_sendmsg(struct socket
*sock
, struct msghdr
*msg
, size_t len
)
95 DECLARE_SOCKADDR(struct sockaddr_mctp
*, addr
, msg
->msg_name
);
96 int rc
, addrlen
= msg
->msg_namelen
;
97 struct sock
*sk
= sock
->sk
;
98 struct mctp_sock
*msk
= container_of(sk
, struct mctp_sock
, sk
);
99 struct mctp_skb_cb
*cb
;
100 struct mctp_route
*rt
;
101 struct sk_buff
*skb
= NULL
;
105 const u8 tagbits
= MCTP_TAG_MASK
| MCTP_TAG_OWNER
|
108 if (addrlen
< sizeof(struct sockaddr_mctp
))
110 if (addr
->smctp_family
!= AF_MCTP
)
112 if (!mctp_sockaddr_is_ok(addr
))
114 if (addr
->smctp_tag
& ~tagbits
)
116 /* can't preallocate a non-owned tag */
117 if (addr
->smctp_tag
& MCTP_TAG_PREALLOC
&&
118 !(addr
->smctp_tag
& MCTP_TAG_OWNER
))
122 /* TODO: connect()ed sockets */
123 return -EDESTADDRREQ
;
126 if (!capable(CAP_NET_RAW
))
129 if (addr
->smctp_network
== MCTP_NET_ANY
)
130 addr
->smctp_network
= mctp_default_net(sock_net(sk
));
132 /* direct addressing */
133 if (msk
->addr_ext
&& addrlen
>= sizeof(struct sockaddr_mctp_ext
)) {
134 DECLARE_SOCKADDR(struct sockaddr_mctp_ext
*,
135 extaddr
, msg
->msg_name
);
136 struct net_device
*dev
;
140 dev
= dev_get_by_index_rcu(sock_net(sk
), extaddr
->smctp_ifindex
);
141 /* check for correct halen */
142 if (dev
&& extaddr
->smctp_halen
== dev
->addr_len
) {
143 hlen
= LL_RESERVED_SPACE(dev
) + sizeof(struct mctp_hdr
);
151 rt
= mctp_route_lookup(sock_net(sk
), addr
->smctp_network
,
152 addr
->smctp_addr
.s_addr
);
157 hlen
= LL_RESERVED_SPACE(rt
->dev
->dev
) + sizeof(struct mctp_hdr
);
160 skb
= sock_alloc_send_skb(sk
, hlen
+ 1 + len
,
161 msg
->msg_flags
& MSG_DONTWAIT
, &rc
);
165 skb_reserve(skb
, hlen
);
167 /* set type as fist byte in payload */
168 *(u8
*)skb_put(skb
, 1) = addr
->smctp_type
;
170 rc
= memcpy_from_msg((void *)skb_put(skb
, len
), msg
, len
);
176 cb
->net
= addr
->smctp_network
;
179 /* fill extended address in cb */
180 DECLARE_SOCKADDR(struct sockaddr_mctp_ext
*,
181 extaddr
, msg
->msg_name
);
183 if (!mctp_sockaddr_ext_is_ok(extaddr
) ||
184 extaddr
->smctp_halen
> sizeof(cb
->haddr
)) {
189 cb
->ifindex
= extaddr
->smctp_ifindex
;
190 /* smctp_halen is checked above */
191 cb
->halen
= extaddr
->smctp_halen
;
192 memcpy(cb
->haddr
, extaddr
->smctp_haddr
, cb
->halen
);
195 rc
= mctp_local_output(sk
, rt
, skb
, addr
->smctp_addr
.s_addr
,
205 static int mctp_recvmsg(struct socket
*sock
, struct msghdr
*msg
, size_t len
,
208 DECLARE_SOCKADDR(struct sockaddr_mctp
*, addr
, msg
->msg_name
);
209 struct sock
*sk
= sock
->sk
;
210 struct mctp_sock
*msk
= container_of(sk
, struct mctp_sock
, sk
);
216 if (flags
& ~(MSG_DONTWAIT
| MSG_TRUNC
| MSG_PEEK
))
219 skb
= skb_recv_datagram(sk
, flags
, &rc
);
228 /* extract message type, remove from data */
229 type
= *((u8
*)skb
->data
);
230 msglen
= skb
->len
- 1;
233 msg
->msg_flags
|= MSG_TRUNC
;
237 rc
= skb_copy_datagram_msg(skb
, 1, msg
, len
);
241 sock_recv_cmsgs(msg
, sk
, skb
);
244 struct mctp_skb_cb
*cb
= mctp_cb(skb
);
245 /* TODO: expand mctp_skb_cb for header fields? */
246 struct mctp_hdr
*hdr
= mctp_hdr(skb
);
248 addr
= msg
->msg_name
;
249 addr
->smctp_family
= AF_MCTP
;
250 addr
->__smctp_pad0
= 0;
251 addr
->smctp_network
= cb
->net
;
252 addr
->smctp_addr
.s_addr
= hdr
->src
;
253 addr
->smctp_type
= type
;
254 addr
->smctp_tag
= hdr
->flags_seq_tag
&
255 (MCTP_HDR_TAG_MASK
| MCTP_HDR_FLAG_TO
);
256 addr
->__smctp_pad1
= 0;
257 msg
->msg_namelen
= sizeof(*addr
);
260 DECLARE_SOCKADDR(struct sockaddr_mctp_ext
*, ae
,
262 msg
->msg_namelen
= sizeof(*ae
);
263 ae
->smctp_ifindex
= cb
->ifindex
;
264 ae
->smctp_halen
= cb
->halen
;
265 memset(ae
->__smctp_pad0
, 0x0, sizeof(ae
->__smctp_pad0
));
266 memset(ae
->smctp_haddr
, 0x0, sizeof(ae
->smctp_haddr
));
267 memcpy(ae
->smctp_haddr
, cb
->haddr
, cb
->halen
);
273 if (flags
& MSG_TRUNC
)
277 skb_free_datagram(sk
, skb
);
281 /* We're done with the key; invalidate, stop reassembly, and remove from lists.
283 static void __mctp_key_remove(struct mctp_sk_key
*key
, struct net
*net
,
284 unsigned long flags
, unsigned long reason
)
285 __releases(&key
->lock
)
286 __must_hold(&net
->mctp
.keys_lock
)
290 trace_mctp_key_release(key
, reason
);
291 skb
= key
->reasm_head
;
292 key
->reasm_head
= NULL
;
293 key
->reasm_dead
= true;
295 mctp_dev_release_key(key
->dev
, key
);
296 spin_unlock_irqrestore(&key
->lock
, flags
);
298 if (!hlist_unhashed(&key
->hlist
)) {
299 hlist_del_init(&key
->hlist
);
300 hlist_del_init(&key
->sklist
);
301 /* unref for the lists */
308 static int mctp_setsockopt(struct socket
*sock
, int level
, int optname
,
309 sockptr_t optval
, unsigned int optlen
)
311 struct mctp_sock
*msk
= container_of(sock
->sk
, struct mctp_sock
, sk
);
314 if (level
!= SOL_MCTP
)
317 if (optname
== MCTP_OPT_ADDR_EXT
) {
318 if (optlen
!= sizeof(int))
320 if (copy_from_sockptr(&val
, optval
, sizeof(int)))
329 static int mctp_getsockopt(struct socket
*sock
, int level
, int optname
,
330 char __user
*optval
, int __user
*optlen
)
332 struct mctp_sock
*msk
= container_of(sock
->sk
, struct mctp_sock
, sk
);
335 if (level
!= SOL_MCTP
)
338 if (get_user(len
, optlen
))
341 if (optname
== MCTP_OPT_ADDR_EXT
) {
342 if (len
!= sizeof(int))
344 val
= !!msk
->addr_ext
;
345 if (copy_to_user(optval
, &val
, len
))
353 /* helpers for reading/writing the tag ioc, handling compatibility across the
354 * two versions, and some basic API error checking
356 static int mctp_ioctl_tag_copy_from_user(unsigned long arg
,
357 struct mctp_ioc_tag_ctl2
*ctl
,
360 struct mctp_ioc_tag_ctl ctl_compat
;
369 size
= sizeof(ctl_compat
);
373 rc
= copy_from_user(ptr
, (void __user
*)arg
, size
);
378 /* compat, using defaults for new fields */
379 ctl
->net
= MCTP_INITIAL_DEFAULT_NET
;
380 ctl
->peer_addr
= ctl_compat
.peer_addr
;
381 ctl
->local_addr
= MCTP_ADDR_ANY
;
382 ctl
->flags
= ctl_compat
.flags
;
383 ctl
->tag
= ctl_compat
.tag
;
389 if (ctl
->local_addr
!= MCTP_ADDR_ANY
&&
390 ctl
->local_addr
!= MCTP_ADDR_NULL
)
396 static int mctp_ioctl_tag_copy_to_user(unsigned long arg
,
397 struct mctp_ioc_tag_ctl2
*ctl
,
400 struct mctp_ioc_tag_ctl ctl_compat
;
409 ctl_compat
.peer_addr
= ctl
->peer_addr
;
410 ctl_compat
.tag
= ctl
->tag
;
411 ctl_compat
.flags
= ctl
->flags
;
414 size
= sizeof(ctl_compat
);
417 rc
= copy_to_user((void __user
*)arg
, ptr
, size
);
424 static int mctp_ioctl_alloctag(struct mctp_sock
*msk
, bool tagv2
,
427 struct net
*net
= sock_net(&msk
->sk
);
428 struct mctp_sk_key
*key
= NULL
;
429 struct mctp_ioc_tag_ctl2 ctl
;
434 rc
= mctp_ioctl_tag_copy_from_user(arg
, &ctl
, tagv2
);
441 key
= mctp_alloc_local_tag(msk
, ctl
.net
, MCTP_ADDR_ANY
,
442 ctl
.peer_addr
, true, &tag
);
446 ctl
.tag
= tag
| MCTP_TAG_OWNER
| MCTP_TAG_PREALLOC
;
447 rc
= mctp_ioctl_tag_copy_to_user(arg
, &ctl
, tagv2
);
450 /* Unwind our key allocation: the keys list lock needs to be
451 * taken before the individual key locks, and we need a valid
452 * flags value (fl2) to pass to __mctp_key_remove, hence the
453 * second spin_lock_irqsave() rather than a plain spin_lock().
455 spin_lock_irqsave(&net
->mctp
.keys_lock
, flags
);
456 spin_lock_irqsave(&key
->lock
, fl2
);
457 __mctp_key_remove(key
, net
, fl2
, MCTP_TRACE_KEY_DROPPED
);
459 spin_unlock_irqrestore(&net
->mctp
.keys_lock
, flags
);
467 static int mctp_ioctl_droptag(struct mctp_sock
*msk
, bool tagv2
,
470 struct net
*net
= sock_net(&msk
->sk
);
471 struct mctp_ioc_tag_ctl2 ctl
;
472 unsigned long flags
, fl2
;
473 struct mctp_sk_key
*key
;
474 struct hlist_node
*tmp
;
478 rc
= mctp_ioctl_tag_copy_from_user(arg
, &ctl
, tagv2
);
482 /* Must be a local tag, TO set, preallocated */
483 if ((ctl
.tag
& ~MCTP_TAG_MASK
) != (MCTP_TAG_OWNER
| MCTP_TAG_PREALLOC
))
486 tag
= ctl
.tag
& MCTP_TAG_MASK
;
489 if (ctl
.peer_addr
== MCTP_ADDR_NULL
)
490 ctl
.peer_addr
= MCTP_ADDR_ANY
;
492 spin_lock_irqsave(&net
->mctp
.keys_lock
, flags
);
493 hlist_for_each_entry_safe(key
, tmp
, &msk
->keys
, sklist
) {
494 /* we do an irqsave here, even though we know the irq state,
495 * so we have the flags to pass to __mctp_key_remove
497 spin_lock_irqsave(&key
->lock
, fl2
);
498 if (key
->manual_alloc
&&
499 ctl
.net
== key
->net
&&
500 ctl
.peer_addr
== key
->peer_addr
&&
502 __mctp_key_remove(key
, net
, fl2
,
503 MCTP_TRACE_KEY_DROPPED
);
506 spin_unlock_irqrestore(&key
->lock
, fl2
);
509 spin_unlock_irqrestore(&net
->mctp
.keys_lock
, flags
);
514 static int mctp_ioctl(struct socket
*sock
, unsigned int cmd
, unsigned long arg
)
516 struct mctp_sock
*msk
= container_of(sock
->sk
, struct mctp_sock
, sk
);
520 case SIOCMCTPALLOCTAG2
:
521 case SIOCMCTPALLOCTAG
:
522 tagv2
= cmd
== SIOCMCTPALLOCTAG2
;
523 return mctp_ioctl_alloctag(msk
, tagv2
, arg
);
524 case SIOCMCTPDROPTAG
:
525 case SIOCMCTPDROPTAG2
:
526 tagv2
= cmd
== SIOCMCTPDROPTAG2
;
527 return mctp_ioctl_droptag(msk
, tagv2
, arg
);
534 static int mctp_compat_ioctl(struct socket
*sock
, unsigned int cmd
,
537 void __user
*argp
= compat_ptr(arg
);
540 /* These have compatible ptr layouts */
541 case SIOCMCTPALLOCTAG
:
542 case SIOCMCTPDROPTAG
:
543 return mctp_ioctl(sock
, cmd
, (unsigned long)argp
);
550 static const struct proto_ops mctp_dgram_ops
= {
552 .release
= mctp_release
,
554 .connect
= sock_no_connect
,
555 .socketpair
= sock_no_socketpair
,
556 .accept
= sock_no_accept
,
557 .getname
= sock_no_getname
,
558 .poll
= datagram_poll
,
560 .gettstamp
= sock_gettstamp
,
561 .listen
= sock_no_listen
,
562 .shutdown
= sock_no_shutdown
,
563 .setsockopt
= mctp_setsockopt
,
564 .getsockopt
= mctp_getsockopt
,
565 .sendmsg
= mctp_sendmsg
,
566 .recvmsg
= mctp_recvmsg
,
567 .mmap
= sock_no_mmap
,
569 .compat_ioctl
= mctp_compat_ioctl
,
573 static void mctp_sk_expire_keys(struct timer_list
*timer
)
575 struct mctp_sock
*msk
= container_of(timer
, struct mctp_sock
,
577 struct net
*net
= sock_net(&msk
->sk
);
578 unsigned long next_expiry
, flags
, fl2
;
579 struct mctp_sk_key
*key
;
580 struct hlist_node
*tmp
;
581 bool next_expiry_valid
= false;
583 spin_lock_irqsave(&net
->mctp
.keys_lock
, flags
);
585 hlist_for_each_entry_safe(key
, tmp
, &msk
->keys
, sklist
) {
586 /* don't expire. manual_alloc is immutable, no locking
589 if (key
->manual_alloc
)
592 spin_lock_irqsave(&key
->lock
, fl2
);
593 if (!time_after_eq(key
->expiry
, jiffies
)) {
594 __mctp_key_remove(key
, net
, fl2
,
595 MCTP_TRACE_KEY_TIMEOUT
);
599 if (next_expiry_valid
) {
600 if (time_before(key
->expiry
, next_expiry
))
601 next_expiry
= key
->expiry
;
603 next_expiry
= key
->expiry
;
604 next_expiry_valid
= true;
606 spin_unlock_irqrestore(&key
->lock
, fl2
);
609 spin_unlock_irqrestore(&net
->mctp
.keys_lock
, flags
);
611 if (next_expiry_valid
)
612 mod_timer(timer
, next_expiry
);
615 static int mctp_sk_init(struct sock
*sk
)
617 struct mctp_sock
*msk
= container_of(sk
, struct mctp_sock
, sk
);
619 INIT_HLIST_HEAD(&msk
->keys
);
620 timer_setup(&msk
->key_expiry
, mctp_sk_expire_keys
, 0);
624 static void mctp_sk_close(struct sock
*sk
, long timeout
)
626 sk_common_release(sk
);
629 static int mctp_sk_hash(struct sock
*sk
)
631 struct net
*net
= sock_net(sk
);
633 mutex_lock(&net
->mctp
.bind_lock
);
634 sk_add_node_rcu(sk
, &net
->mctp
.binds
);
635 mutex_unlock(&net
->mctp
.bind_lock
);
640 static void mctp_sk_unhash(struct sock
*sk
)
642 struct mctp_sock
*msk
= container_of(sk
, struct mctp_sock
, sk
);
643 struct net
*net
= sock_net(sk
);
644 unsigned long flags
, fl2
;
645 struct mctp_sk_key
*key
;
646 struct hlist_node
*tmp
;
648 /* remove from any type-based binds */
649 mutex_lock(&net
->mctp
.bind_lock
);
650 sk_del_node_init_rcu(sk
);
651 mutex_unlock(&net
->mctp
.bind_lock
);
653 /* remove tag allocations */
654 spin_lock_irqsave(&net
->mctp
.keys_lock
, flags
);
655 hlist_for_each_entry_safe(key
, tmp
, &msk
->keys
, sklist
) {
656 spin_lock_irqsave(&key
->lock
, fl2
);
657 __mctp_key_remove(key
, net
, fl2
, MCTP_TRACE_KEY_CLOSED
);
659 sock_set_flag(sk
, SOCK_DEAD
);
660 spin_unlock_irqrestore(&net
->mctp
.keys_lock
, flags
);
662 /* Since there are no more tag allocations (we have removed all of the
663 * keys), stop any pending expiry events. the timer cannot be re-queued
664 * as the sk is no longer observable
666 del_timer_sync(&msk
->key_expiry
);
669 static void mctp_sk_destruct(struct sock
*sk
)
671 skb_queue_purge(&sk
->sk_receive_queue
);
674 static struct proto mctp_proto
= {
676 .owner
= THIS_MODULE
,
677 .obj_size
= sizeof(struct mctp_sock
),
678 .init
= mctp_sk_init
,
679 .close
= mctp_sk_close
,
680 .hash
= mctp_sk_hash
,
681 .unhash
= mctp_sk_unhash
,
684 static int mctp_pf_create(struct net
*net
, struct socket
*sock
,
685 int protocol
, int kern
)
687 const struct proto_ops
*ops
;
693 return -EPROTONOSUPPORT
;
695 /* only datagram sockets are supported */
696 if (sock
->type
!= SOCK_DGRAM
)
697 return -ESOCKTNOSUPPORT
;
700 ops
= &mctp_dgram_ops
;
702 sock
->state
= SS_UNCONNECTED
;
705 sk
= sk_alloc(net
, PF_MCTP
, GFP_KERNEL
, proto
, kern
);
709 sock_init_data(sock
, sk
);
710 sk
->sk_destruct
= mctp_sk_destruct
;
713 if (sk
->sk_prot
->init
)
714 rc
= sk
->sk_prot
->init(sk
);
727 static struct net_proto_family mctp_pf
= {
729 .create
= mctp_pf_create
,
730 .owner
= THIS_MODULE
,
733 static __init
int mctp_init(void)
737 /* ensure our uapi tag definitions match the header format */
738 BUILD_BUG_ON(MCTP_TAG_OWNER
!= MCTP_HDR_FLAG_TO
);
739 BUILD_BUG_ON(MCTP_TAG_MASK
!= MCTP_HDR_TAG_MASK
);
741 pr_info("mctp: management component transport protocol core\n");
743 rc
= sock_register(&mctp_pf
);
747 rc
= proto_register(&mctp_proto
, 0);
751 rc
= mctp_routes_init();
753 goto err_unreg_proto
;
755 rc
= mctp_neigh_init();
757 goto err_unreg_routes
;
759 rc
= mctp_device_init();
761 goto err_unreg_neigh
;
770 proto_unregister(&mctp_proto
);
772 sock_unregister(PF_MCTP
);
777 static __exit
void mctp_exit(void)
782 proto_unregister(&mctp_proto
);
783 sock_unregister(PF_MCTP
);
786 subsys_initcall(mctp_init
);
787 module_exit(mctp_exit
);
789 MODULE_DESCRIPTION("MCTP core");
790 MODULE_AUTHOR("Jeremy Kerr <jk@codeconstruct.com.au>");
792 MODULE_ALIAS_NETPROTO(PF_MCTP
);