1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * inet_diag.c Module for monitoring INET transport protocols sockets.
5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
8 #include <linux/kernel.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/fcntl.h>
12 #include <linux/random.h>
13 #include <linux/slab.h>
14 #include <linux/cache.h>
15 #include <linux/init.h>
16 #include <linux/time.h>
21 #include <net/inet_common.h>
22 #include <net/inet_connection_sock.h>
23 #include <net/inet_hashtables.h>
24 #include <net/inet_timewait_sock.h>
25 #include <net/inet6_hashtables.h>
26 #include <net/bpf_sk_storage.h>
27 #include <net/netlink.h>
29 #include <linux/inet.h>
30 #include <linux/stddef.h>
32 #include <linux/inet_diag.h>
33 #include <linux/sock_diag.h>
35 static const struct inet_diag_handler
**inet_diag_table
;
37 struct inet_diag_entry
{
46 #ifdef CONFIG_SOCK_CGROUP_DATA
51 static DEFINE_MUTEX(inet_diag_table_mutex
);
53 static const struct inet_diag_handler
*inet_diag_lock_handler(int proto
)
55 if (!inet_diag_table
[proto
])
56 sock_load_diag_module(AF_INET
, proto
);
58 mutex_lock(&inet_diag_table_mutex
);
59 if (!inet_diag_table
[proto
])
60 return ERR_PTR(-ENOENT
);
62 return inet_diag_table
[proto
];
65 static void inet_diag_unlock_handler(const struct inet_diag_handler
*handler
)
67 mutex_unlock(&inet_diag_table_mutex
);
70 void inet_diag_msg_common_fill(struct inet_diag_msg
*r
, struct sock
*sk
)
72 r
->idiag_family
= sk
->sk_family
;
74 r
->id
.idiag_sport
= htons(sk
->sk_num
);
75 r
->id
.idiag_dport
= sk
->sk_dport
;
76 r
->id
.idiag_if
= sk
->sk_bound_dev_if
;
77 sock_diag_save_cookie(sk
, r
->id
.idiag_cookie
);
79 #if IS_ENABLED(CONFIG_IPV6)
80 if (sk
->sk_family
== AF_INET6
) {
81 *(struct in6_addr
*)r
->id
.idiag_src
= sk
->sk_v6_rcv_saddr
;
82 *(struct in6_addr
*)r
->id
.idiag_dst
= sk
->sk_v6_daddr
;
86 memset(&r
->id
.idiag_src
, 0, sizeof(r
->id
.idiag_src
));
87 memset(&r
->id
.idiag_dst
, 0, sizeof(r
->id
.idiag_dst
));
89 r
->id
.idiag_src
[0] = sk
->sk_rcv_saddr
;
90 r
->id
.idiag_dst
[0] = sk
->sk_daddr
;
93 EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill
);
95 static size_t inet_sk_attr_size(struct sock
*sk
,
96 const struct inet_diag_req_v2
*req
,
99 const struct inet_diag_handler
*handler
;
102 handler
= inet_diag_table
[req
->sdiag_protocol
];
103 if (handler
&& handler
->idiag_get_aux_size
)
104 aux
= handler
->idiag_get_aux_size(sk
, net_admin
);
106 return nla_total_size(sizeof(struct tcp_info
))
107 + nla_total_size(sizeof(struct inet_diag_msg
))
108 + inet_diag_msg_attrs_size()
109 + nla_total_size(sizeof(struct inet_diag_meminfo
))
110 + nla_total_size(SK_MEMINFO_VARS
* sizeof(u32
))
111 + nla_total_size(TCP_CA_NAME_MAX
)
112 + nla_total_size(sizeof(struct tcpvegas_info
))
117 int inet_diag_msg_attrs_fill(struct sock
*sk
, struct sk_buff
*skb
,
118 struct inet_diag_msg
*r
, int ext
,
119 struct user_namespace
*user_ns
,
122 const struct inet_sock
*inet
= inet_sk(sk
);
124 if (nla_put_u8(skb
, INET_DIAG_SHUTDOWN
, sk
->sk_shutdown
))
127 /* IPv6 dual-stack sockets use inet->tos for IPv4 connections,
128 * hence this needs to be included regardless of socket family.
130 if (ext
& (1 << (INET_DIAG_TOS
- 1)))
131 if (nla_put_u8(skb
, INET_DIAG_TOS
, inet
->tos
) < 0)
134 #if IS_ENABLED(CONFIG_IPV6)
135 if (r
->idiag_family
== AF_INET6
) {
136 if (ext
& (1 << (INET_DIAG_TCLASS
- 1)))
137 if (nla_put_u8(skb
, INET_DIAG_TCLASS
,
138 inet6_sk(sk
)->tclass
) < 0)
141 if (((1 << sk
->sk_state
) & (TCPF_LISTEN
| TCPF_CLOSE
)) &&
142 nla_put_u8(skb
, INET_DIAG_SKV6ONLY
, ipv6_only_sock(sk
)))
147 if (net_admin
&& nla_put_u32(skb
, INET_DIAG_MARK
, sk
->sk_mark
))
150 if (ext
& (1 << (INET_DIAG_CLASS_ID
- 1)) ||
151 ext
& (1 << (INET_DIAG_TCLASS
- 1))) {
154 #ifdef CONFIG_SOCK_CGROUP_DATA
155 classid
= sock_cgroup_classid(&sk
->sk_cgrp_data
);
157 /* Fallback to socket priority if class id isn't set.
158 * Classful qdiscs use it as direct reference to class.
159 * For cgroup2 classid is always zero.
162 classid
= sk
->sk_priority
;
164 if (nla_put_u32(skb
, INET_DIAG_CLASS_ID
, classid
))
168 #ifdef CONFIG_SOCK_CGROUP_DATA
169 if (nla_put_u64_64bit(skb
, INET_DIAG_CGROUP_ID
,
170 cgroup_id(sock_cgroup_ptr(&sk
->sk_cgrp_data
)),
175 r
->idiag_uid
= from_kuid_munged(user_ns
, sock_i_uid(sk
));
176 r
->idiag_inode
= sock_i_ino(sk
);
182 EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill
);
184 #define MAX_DUMP_ALLOC_SIZE (KMALLOC_MAX_SIZE - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
186 int inet_sk_diag_fill(struct sock
*sk
, struct inet_connection_sock
*icsk
,
187 struct sk_buff
*skb
, struct netlink_callback
*cb
,
188 const struct inet_diag_req_v2
*req
,
189 u16 nlmsg_flags
, bool net_admin
)
191 const struct tcp_congestion_ops
*ca_ops
;
192 const struct inet_diag_handler
*handler
;
193 struct inet_diag_dump_data
*cb_data
;
194 int ext
= req
->idiag_ext
;
195 struct inet_diag_msg
*r
;
196 struct nlmsghdr
*nlh
;
201 handler
= inet_diag_table
[req
->sdiag_protocol
];
204 nlh
= nlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
, cb
->nlh
->nlmsg_seq
,
205 cb
->nlh
->nlmsg_type
, sizeof(*r
), nlmsg_flags
);
210 BUG_ON(!sk_fullsock(sk
));
212 inet_diag_msg_common_fill(r
, sk
);
213 r
->idiag_state
= sk
->sk_state
;
215 r
->idiag_retrans
= 0;
217 if (inet_diag_msg_attrs_fill(sk
, skb
, r
, ext
,
218 sk_user_ns(NETLINK_CB(cb
->skb
).sk
),
222 if (ext
& (1 << (INET_DIAG_MEMINFO
- 1))) {
223 struct inet_diag_meminfo minfo
= {
224 .idiag_rmem
= sk_rmem_alloc_get(sk
),
225 .idiag_wmem
= READ_ONCE(sk
->sk_wmem_queued
),
226 .idiag_fmem
= sk
->sk_forward_alloc
,
227 .idiag_tmem
= sk_wmem_alloc_get(sk
),
230 if (nla_put(skb
, INET_DIAG_MEMINFO
, sizeof(minfo
), &minfo
) < 0)
234 if (ext
& (1 << (INET_DIAG_SKMEMINFO
- 1)))
235 if (sock_diag_put_meminfo(sk
, skb
, INET_DIAG_SKMEMINFO
))
239 * RAW sockets might have user-defined protocols assigned,
240 * so report the one supplied on socket creation.
242 if (sk
->sk_type
== SOCK_RAW
) {
243 if (nla_put_u8(skb
, INET_DIAG_PROTOCOL
, sk
->sk_protocol
))
248 handler
->idiag_get_info(sk
, r
, NULL
);
252 if (icsk
->icsk_pending
== ICSK_TIME_RETRANS
||
253 icsk
->icsk_pending
== ICSK_TIME_REO_TIMEOUT
||
254 icsk
->icsk_pending
== ICSK_TIME_LOSS_PROBE
) {
256 r
->idiag_retrans
= icsk
->icsk_retransmits
;
258 jiffies_delta_to_msecs(icsk
->icsk_timeout
- jiffies
);
259 } else if (icsk
->icsk_pending
== ICSK_TIME_PROBE0
) {
261 r
->idiag_retrans
= icsk
->icsk_probes_out
;
263 jiffies_delta_to_msecs(icsk
->icsk_timeout
- jiffies
);
264 } else if (timer_pending(&sk
->sk_timer
)) {
266 r
->idiag_retrans
= icsk
->icsk_probes_out
;
268 jiffies_delta_to_msecs(sk
->sk_timer
.expires
- jiffies
);
271 r
->idiag_expires
= 0;
274 if ((ext
& (1 << (INET_DIAG_INFO
- 1))) && handler
->idiag_info_size
) {
275 attr
= nla_reserve_64bit(skb
, INET_DIAG_INFO
,
276 handler
->idiag_info_size
,
281 info
= nla_data(attr
);
284 if (ext
& (1 << (INET_DIAG_CONG
- 1))) {
288 ca_ops
= READ_ONCE(icsk
->icsk_ca_ops
);
290 err
= nla_put_string(skb
, INET_DIAG_CONG
, ca_ops
->name
);
296 handler
->idiag_get_info(sk
, r
, info
);
298 if (ext
& (1 << (INET_DIAG_INFO
- 1)) && handler
->idiag_get_aux
)
299 if (handler
->idiag_get_aux(sk
, net_admin
, skb
) < 0)
302 if (sk
->sk_state
< TCP_TIME_WAIT
) {
303 union tcp_cc_info info
;
308 ca_ops
= READ_ONCE(icsk
->icsk_ca_ops
);
309 if (ca_ops
&& ca_ops
->get_info
)
310 sz
= ca_ops
->get_info(sk
, ext
, &attr
, &info
);
312 if (sz
&& nla_put(skb
, attr
, sz
, &info
) < 0)
316 /* Keep it at the end for potential retry with a larger skb,
317 * or else do best-effort fitting, which is only done for the
320 if (cb_data
->bpf_stg_diag
) {
321 bool first_nlmsg
= ((unsigned char *)nlh
== skb
->data
);
322 unsigned int prev_min_dump_alloc
;
323 unsigned int total_nla_size
= 0;
324 unsigned int msg_len
;
327 msg_len
= skb_tail_pointer(skb
) - (unsigned char *)nlh
;
328 err
= bpf_sk_storage_diag_put(cb_data
->bpf_stg_diag
, sk
, skb
,
329 INET_DIAG_SK_BPF_STORAGES
,
335 total_nla_size
+= msg_len
;
336 prev_min_dump_alloc
= cb
->min_dump_alloc
;
337 if (total_nla_size
> prev_min_dump_alloc
)
338 cb
->min_dump_alloc
= min_t(u32
, total_nla_size
,
339 MAX_DUMP_ALLOC_SIZE
);
344 if (cb
->min_dump_alloc
> prev_min_dump_alloc
)
345 /* Retry with pskb_expand_head() with
346 * __GFP_DIRECT_RECLAIM
350 WARN_ON_ONCE(total_nla_size
<= prev_min_dump_alloc
);
352 /* Send what we have for this sk
353 * and move on to the next sk in the following
363 nlmsg_cancel(skb
, nlh
);
366 EXPORT_SYMBOL_GPL(inet_sk_diag_fill
);
368 static int inet_twsk_diag_fill(struct sock
*sk
,
370 struct netlink_callback
*cb
,
373 struct inet_timewait_sock
*tw
= inet_twsk(sk
);
374 struct inet_diag_msg
*r
;
375 struct nlmsghdr
*nlh
;
378 nlh
= nlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
,
379 cb
->nlh
->nlmsg_seq
, cb
->nlh
->nlmsg_type
,
380 sizeof(*r
), nlmsg_flags
);
385 BUG_ON(tw
->tw_state
!= TCP_TIME_WAIT
);
387 inet_diag_msg_common_fill(r
, sk
);
388 r
->idiag_retrans
= 0;
390 r
->idiag_state
= tw
->tw_substate
;
392 tmo
= tw
->tw_timer
.expires
- jiffies
;
393 r
->idiag_expires
= jiffies_delta_to_msecs(tmo
);
403 static int inet_req_diag_fill(struct sock
*sk
, struct sk_buff
*skb
,
404 struct netlink_callback
*cb
,
405 u16 nlmsg_flags
, bool net_admin
)
407 struct request_sock
*reqsk
= inet_reqsk(sk
);
408 struct inet_diag_msg
*r
;
409 struct nlmsghdr
*nlh
;
412 nlh
= nlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
, cb
->nlh
->nlmsg_seq
,
413 cb
->nlh
->nlmsg_type
, sizeof(*r
), nlmsg_flags
);
418 inet_diag_msg_common_fill(r
, sk
);
419 r
->idiag_state
= TCP_SYN_RECV
;
421 r
->idiag_retrans
= reqsk
->num_retrans
;
423 BUILD_BUG_ON(offsetof(struct inet_request_sock
, ir_cookie
) !=
424 offsetof(struct sock
, sk_cookie
));
426 tmo
= inet_reqsk(sk
)->rsk_timer
.expires
- jiffies
;
427 r
->idiag_expires
= jiffies_delta_to_msecs(tmo
);
433 if (net_admin
&& nla_put_u32(skb
, INET_DIAG_MARK
,
434 inet_rsk(reqsk
)->ir_mark
))
441 static int sk_diag_fill(struct sock
*sk
, struct sk_buff
*skb
,
442 struct netlink_callback
*cb
,
443 const struct inet_diag_req_v2
*r
,
444 u16 nlmsg_flags
, bool net_admin
)
446 if (sk
->sk_state
== TCP_TIME_WAIT
)
447 return inet_twsk_diag_fill(sk
, skb
, cb
, nlmsg_flags
);
449 if (sk
->sk_state
== TCP_NEW_SYN_RECV
)
450 return inet_req_diag_fill(sk
, skb
, cb
, nlmsg_flags
, net_admin
);
452 return inet_sk_diag_fill(sk
, inet_csk(sk
), skb
, cb
, r
, nlmsg_flags
,
456 struct sock
*inet_diag_find_one_icsk(struct net
*net
,
457 struct inet_hashinfo
*hashinfo
,
458 const struct inet_diag_req_v2
*req
)
463 if (req
->sdiag_family
== AF_INET
)
464 sk
= inet_lookup(net
, hashinfo
, NULL
, 0, req
->id
.idiag_dst
[0],
465 req
->id
.idiag_dport
, req
->id
.idiag_src
[0],
466 req
->id
.idiag_sport
, req
->id
.idiag_if
);
467 #if IS_ENABLED(CONFIG_IPV6)
468 else if (req
->sdiag_family
== AF_INET6
) {
469 if (ipv6_addr_v4mapped((struct in6_addr
*)req
->id
.idiag_dst
) &&
470 ipv6_addr_v4mapped((struct in6_addr
*)req
->id
.idiag_src
))
471 sk
= inet_lookup(net
, hashinfo
, NULL
, 0, req
->id
.idiag_dst
[3],
472 req
->id
.idiag_dport
, req
->id
.idiag_src
[3],
473 req
->id
.idiag_sport
, req
->id
.idiag_if
);
475 sk
= inet6_lookup(net
, hashinfo
, NULL
, 0,
476 (struct in6_addr
*)req
->id
.idiag_dst
,
478 (struct in6_addr
*)req
->id
.idiag_src
,
485 return ERR_PTR(-EINVAL
);
489 return ERR_PTR(-ENOENT
);
491 if (sock_diag_check_cookie(sk
, req
->id
.idiag_cookie
)) {
493 return ERR_PTR(-ENOENT
);
498 EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk
);
500 int inet_diag_dump_one_icsk(struct inet_hashinfo
*hashinfo
,
501 struct netlink_callback
*cb
,
502 const struct inet_diag_req_v2
*req
)
504 struct sk_buff
*in_skb
= cb
->skb
;
505 bool net_admin
= netlink_net_capable(in_skb
, CAP_NET_ADMIN
);
506 struct net
*net
= sock_net(in_skb
->sk
);
511 sk
= inet_diag_find_one_icsk(net
, hashinfo
, req
);
515 rep
= nlmsg_new(inet_sk_attr_size(sk
, req
, net_admin
), GFP_KERNEL
);
521 err
= sk_diag_fill(sk
, rep
, cb
, req
, 0, net_admin
);
523 WARN_ON(err
== -EMSGSIZE
);
527 err
= netlink_unicast(net
->diag_nlsk
, rep
, NETLINK_CB(in_skb
).portid
,
538 EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk
);
540 static int inet_diag_cmd_exact(int cmd
, struct sk_buff
*in_skb
,
541 const struct nlmsghdr
*nlh
,
542 const struct inet_diag_req_v2
*req
)
544 const struct inet_diag_handler
*handler
;
547 handler
= inet_diag_lock_handler(req
->sdiag_protocol
);
548 if (IS_ERR(handler
)) {
549 err
= PTR_ERR(handler
);
550 } else if (cmd
== SOCK_DIAG_BY_FAMILY
) {
551 struct inet_diag_dump_data empty_dump_data
= {};
552 struct netlink_callback cb
= {
555 .data
= &empty_dump_data
,
557 err
= handler
->dump_one(&cb
, req
);
558 } else if (cmd
== SOCK_DESTROY
&& handler
->destroy
) {
559 err
= handler
->destroy(in_skb
, req
);
563 inet_diag_unlock_handler(handler
);
568 static int bitstring_match(const __be32
*a1
, const __be32
*a2
, int bits
)
570 int words
= bits
>> 5;
575 if (memcmp(a1
, a2
, words
<< 2))
585 mask
= htonl((0xffffffff) << (32 - bits
));
587 if ((w1
^ w2
) & mask
)
594 static int inet_diag_bc_run(const struct nlattr
*_bc
,
595 const struct inet_diag_entry
*entry
)
597 const void *bc
= nla_data(_bc
);
598 int len
= nla_len(_bc
);
602 const struct inet_diag_bc_op
*op
= bc
;
605 case INET_DIAG_BC_NOP
:
607 case INET_DIAG_BC_JMP
:
610 case INET_DIAG_BC_S_EQ
:
611 yes
= entry
->sport
== op
[1].no
;
613 case INET_DIAG_BC_S_GE
:
614 yes
= entry
->sport
>= op
[1].no
;
616 case INET_DIAG_BC_S_LE
:
617 yes
= entry
->sport
<= op
[1].no
;
619 case INET_DIAG_BC_D_EQ
:
620 yes
= entry
->dport
== op
[1].no
;
622 case INET_DIAG_BC_D_GE
:
623 yes
= entry
->dport
>= op
[1].no
;
625 case INET_DIAG_BC_D_LE
:
626 yes
= entry
->dport
<= op
[1].no
;
628 case INET_DIAG_BC_AUTO
:
629 yes
= !(entry
->userlocks
& SOCK_BINDPORT_LOCK
);
631 case INET_DIAG_BC_S_COND
:
632 case INET_DIAG_BC_D_COND
: {
633 const struct inet_diag_hostcond
*cond
;
636 cond
= (const struct inet_diag_hostcond
*)(op
+ 1);
637 if (cond
->port
!= -1 &&
638 cond
->port
!= (op
->code
== INET_DIAG_BC_S_COND
?
639 entry
->sport
: entry
->dport
)) {
644 if (op
->code
== INET_DIAG_BC_S_COND
)
649 if (cond
->family
!= AF_UNSPEC
&&
650 cond
->family
!= entry
->family
) {
651 if (entry
->family
== AF_INET6
&&
652 cond
->family
== AF_INET
) {
653 if (addr
[0] == 0 && addr
[1] == 0 &&
654 addr
[2] == htonl(0xffff) &&
655 bitstring_match(addr
+ 3,
664 if (cond
->prefix_len
== 0)
666 if (bitstring_match(addr
, cond
->addr
,
672 case INET_DIAG_BC_DEV_COND
: {
675 ifindex
= *((const u32
*)(op
+ 1));
676 if (ifindex
!= entry
->ifindex
)
680 case INET_DIAG_BC_MARK_COND
: {
681 struct inet_diag_markcond
*cond
;
683 cond
= (struct inet_diag_markcond
*)(op
+ 1);
684 if ((entry
->mark
& cond
->mask
) != cond
->mark
)
688 #ifdef CONFIG_SOCK_CGROUP_DATA
689 case INET_DIAG_BC_CGROUP_COND
: {
692 cgroup_id
= get_unaligned((const u64
*)(op
+ 1));
693 if (cgroup_id
!= entry
->cgroup_id
)
711 /* This helper is available for all sockets (ESTABLISH, TIMEWAIT, SYN_RECV)
713 static void entry_fill_addrs(struct inet_diag_entry
*entry
,
714 const struct sock
*sk
)
716 #if IS_ENABLED(CONFIG_IPV6)
717 if (sk
->sk_family
== AF_INET6
) {
718 entry
->saddr
= sk
->sk_v6_rcv_saddr
.s6_addr32
;
719 entry
->daddr
= sk
->sk_v6_daddr
.s6_addr32
;
723 entry
->saddr
= &sk
->sk_rcv_saddr
;
724 entry
->daddr
= &sk
->sk_daddr
;
728 int inet_diag_bc_sk(const struct nlattr
*bc
, struct sock
*sk
)
730 struct inet_sock
*inet
= inet_sk(sk
);
731 struct inet_diag_entry entry
;
736 entry
.family
= sk
->sk_family
;
737 entry_fill_addrs(&entry
, sk
);
738 entry
.sport
= inet
->inet_num
;
739 entry
.dport
= ntohs(inet
->inet_dport
);
740 entry
.ifindex
= sk
->sk_bound_dev_if
;
741 entry
.userlocks
= sk_fullsock(sk
) ? sk
->sk_userlocks
: 0;
743 entry
.mark
= sk
->sk_mark
;
744 else if (sk
->sk_state
== TCP_NEW_SYN_RECV
)
745 entry
.mark
= inet_rsk(inet_reqsk(sk
))->ir_mark
;
748 #ifdef CONFIG_SOCK_CGROUP_DATA
749 entry
.cgroup_id
= cgroup_id(sock_cgroup_ptr(&sk
->sk_cgrp_data
));
752 return inet_diag_bc_run(bc
, &entry
);
754 EXPORT_SYMBOL_GPL(inet_diag_bc_sk
);
756 static int valid_cc(const void *bc
, int len
, int cc
)
759 const struct inet_diag_bc_op
*op
= bc
;
765 if (op
->yes
< 4 || op
->yes
& 3)
773 /* data is u32 ifindex */
774 static bool valid_devcond(const struct inet_diag_bc_op
*op
, int len
,
777 /* Check ifindex space. */
778 *min_len
+= sizeof(u32
);
784 /* Validate an inet_diag_hostcond. */
785 static bool valid_hostcond(const struct inet_diag_bc_op
*op
, int len
,
788 struct inet_diag_hostcond
*cond
;
791 /* Check hostcond space. */
792 *min_len
+= sizeof(struct inet_diag_hostcond
);
795 cond
= (struct inet_diag_hostcond
*)(op
+ 1);
797 /* Check address family and address length. */
798 switch (cond
->family
) {
803 addr_len
= sizeof(struct in_addr
);
806 addr_len
= sizeof(struct in6_addr
);
811 *min_len
+= addr_len
;
815 /* Check prefix length (in bits) vs address length (in bytes). */
816 if (cond
->prefix_len
> 8 * addr_len
)
822 /* Validate a port comparison operator. */
823 static bool valid_port_comparison(const struct inet_diag_bc_op
*op
,
824 int len
, int *min_len
)
826 /* Port comparisons put the port in a follow-on inet_diag_bc_op. */
827 *min_len
+= sizeof(struct inet_diag_bc_op
);
833 static bool valid_markcond(const struct inet_diag_bc_op
*op
, int len
,
836 *min_len
+= sizeof(struct inet_diag_markcond
);
837 return len
>= *min_len
;
840 #ifdef CONFIG_SOCK_CGROUP_DATA
841 static bool valid_cgroupcond(const struct inet_diag_bc_op
*op
, int len
,
844 *min_len
+= sizeof(u64
);
845 return len
>= *min_len
;
849 static int inet_diag_bc_audit(const struct nlattr
*attr
,
850 const struct sk_buff
*skb
)
852 bool net_admin
= netlink_net_capable(skb
, CAP_NET_ADMIN
);
853 const void *bytecode
, *bc
;
854 int bytecode_len
, len
;
856 if (!attr
|| nla_len(attr
) < sizeof(struct inet_diag_bc_op
))
859 bytecode
= bc
= nla_data(attr
);
860 len
= bytecode_len
= nla_len(attr
);
863 int min_len
= sizeof(struct inet_diag_bc_op
);
864 const struct inet_diag_bc_op
*op
= bc
;
867 case INET_DIAG_BC_S_COND
:
868 case INET_DIAG_BC_D_COND
:
869 if (!valid_hostcond(bc
, len
, &min_len
))
872 case INET_DIAG_BC_DEV_COND
:
873 if (!valid_devcond(bc
, len
, &min_len
))
876 case INET_DIAG_BC_S_EQ
:
877 case INET_DIAG_BC_S_GE
:
878 case INET_DIAG_BC_S_LE
:
879 case INET_DIAG_BC_D_EQ
:
880 case INET_DIAG_BC_D_GE
:
881 case INET_DIAG_BC_D_LE
:
882 if (!valid_port_comparison(bc
, len
, &min_len
))
885 case INET_DIAG_BC_MARK_COND
:
888 if (!valid_markcond(bc
, len
, &min_len
))
891 #ifdef CONFIG_SOCK_CGROUP_DATA
892 case INET_DIAG_BC_CGROUP_COND
:
893 if (!valid_cgroupcond(bc
, len
, &min_len
))
897 case INET_DIAG_BC_AUTO
:
898 case INET_DIAG_BC_JMP
:
899 case INET_DIAG_BC_NOP
:
905 if (op
->code
!= INET_DIAG_BC_NOP
) {
906 if (op
->no
< min_len
|| op
->no
> len
+ 4 || op
->no
& 3)
909 !valid_cc(bytecode
, bytecode_len
, len
- op
->no
))
913 if (op
->yes
< min_len
|| op
->yes
> len
+ 4 || op
->yes
& 3)
918 return len
== 0 ? 0 : -EINVAL
;
921 static void twsk_build_assert(void)
923 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_family
) !=
924 offsetof(struct sock
, sk_family
));
926 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_num
) !=
927 offsetof(struct inet_sock
, inet_num
));
929 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_dport
) !=
930 offsetof(struct inet_sock
, inet_dport
));
932 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_rcv_saddr
) !=
933 offsetof(struct inet_sock
, inet_rcv_saddr
));
935 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_daddr
) !=
936 offsetof(struct inet_sock
, inet_daddr
));
938 #if IS_ENABLED(CONFIG_IPV6)
939 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_v6_rcv_saddr
) !=
940 offsetof(struct sock
, sk_v6_rcv_saddr
));
942 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_v6_daddr
) !=
943 offsetof(struct sock
, sk_v6_daddr
));
947 void inet_diag_dump_icsk(struct inet_hashinfo
*hashinfo
, struct sk_buff
*skb
,
948 struct netlink_callback
*cb
,
949 const struct inet_diag_req_v2
*r
)
951 bool net_admin
= netlink_net_capable(cb
->skb
, CAP_NET_ADMIN
);
952 struct inet_diag_dump_data
*cb_data
= cb
->data
;
953 struct net
*net
= sock_net(skb
->sk
);
954 u32 idiag_states
= r
->idiag_states
;
955 int i
, num
, s_i
, s_num
;
959 bc
= cb_data
->inet_diag_nla_bc
;
960 if (idiag_states
& TCPF_SYN_RECV
)
961 idiag_states
|= TCPF_NEW_SYN_RECV
;
963 s_num
= num
= cb
->args
[2];
965 if (cb
->args
[0] == 0) {
966 if (!(idiag_states
& TCPF_LISTEN
) || r
->id
.idiag_dport
)
969 for (i
= s_i
; i
< INET_LHTABLE_SIZE
; i
++) {
970 struct inet_listen_hashbucket
*ilb
;
971 struct hlist_nulls_node
*node
;
974 ilb
= &hashinfo
->listening_hash
[i
];
975 spin_lock(&ilb
->lock
);
976 sk_nulls_for_each(sk
, node
, &ilb
->nulls_head
) {
977 struct inet_sock
*inet
= inet_sk(sk
);
979 if (!net_eq(sock_net(sk
), net
))
987 if (r
->sdiag_family
!= AF_UNSPEC
&&
988 sk
->sk_family
!= r
->sdiag_family
)
991 if (r
->id
.idiag_sport
!= inet
->inet_sport
&&
995 if (!inet_diag_bc_sk(bc
, sk
))
998 if (inet_sk_diag_fill(sk
, inet_csk(sk
), skb
,
1001 spin_unlock(&ilb
->lock
);
1008 spin_unlock(&ilb
->lock
);
1014 s_i
= num
= s_num
= 0;
1017 if (!(idiag_states
& ~TCPF_LISTEN
))
1021 for (i
= s_i
; i
<= hashinfo
->ehash_mask
; i
++) {
1022 struct inet_ehash_bucket
*head
= &hashinfo
->ehash
[i
];
1023 spinlock_t
*lock
= inet_ehash_lockp(hashinfo
, i
);
1024 struct hlist_nulls_node
*node
;
1025 struct sock
*sk_arr
[SKARR_SZ
];
1026 int num_arr
[SKARR_SZ
];
1027 int idx
, accum
, res
;
1029 if (hlist_nulls_empty(&head
->chain
))
1039 sk_nulls_for_each(sk
, node
, &head
->chain
) {
1042 if (!net_eq(sock_net(sk
), net
))
1046 state
= (sk
->sk_state
== TCP_TIME_WAIT
) ?
1047 inet_twsk(sk
)->tw_substate
: sk
->sk_state
;
1048 if (!(idiag_states
& (1 << state
)))
1050 if (r
->sdiag_family
!= AF_UNSPEC
&&
1051 sk
->sk_family
!= r
->sdiag_family
)
1053 if (r
->id
.idiag_sport
!= htons(sk
->sk_num
) &&
1056 if (r
->id
.idiag_dport
!= sk
->sk_dport
&&
1059 twsk_build_assert();
1061 if (!inet_diag_bc_sk(bc
, sk
))
1064 if (!refcount_inc_not_zero(&sk
->sk_refcnt
))
1067 num_arr
[accum
] = num
;
1069 if (++accum
== SKARR_SZ
)
1074 spin_unlock_bh(lock
);
1076 for (idx
= 0; idx
< accum
; idx
++) {
1078 res
= sk_diag_fill(sk_arr
[idx
], skb
, cb
, r
,
1079 NLM_F_MULTI
, net_admin
);
1083 sock_gen_put(sk_arr
[idx
]);
1088 if (accum
== SKARR_SZ
) {
1100 EXPORT_SYMBOL_GPL(inet_diag_dump_icsk
);
1102 static int __inet_diag_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
,
1103 const struct inet_diag_req_v2
*r
)
1105 const struct inet_diag_handler
*handler
;
1106 u32 prev_min_dump_alloc
;
1110 prev_min_dump_alloc
= cb
->min_dump_alloc
;
1111 handler
= inet_diag_lock_handler(r
->sdiag_protocol
);
1112 if (!IS_ERR(handler
))
1113 handler
->dump(skb
, cb
, r
);
1115 err
= PTR_ERR(handler
);
1116 inet_diag_unlock_handler(handler
);
1118 /* The skb is not large enough to fit one sk info and
1119 * inet_sk_diag_fill() has requested for a larger skb.
1121 if (!skb
->len
&& cb
->min_dump_alloc
> prev_min_dump_alloc
) {
1122 err
= pskb_expand_head(skb
, 0, cb
->min_dump_alloc
, GFP_KERNEL
);
1127 return err
? : skb
->len
;
1130 static int inet_diag_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1132 return __inet_diag_dump(skb
, cb
, nlmsg_data(cb
->nlh
));
1135 static int __inet_diag_dump_start(struct netlink_callback
*cb
, int hdrlen
)
1137 const struct nlmsghdr
*nlh
= cb
->nlh
;
1138 struct inet_diag_dump_data
*cb_data
;
1139 struct sk_buff
*skb
= cb
->skb
;
1143 cb_data
= kzalloc(sizeof(*cb_data
), GFP_KERNEL
);
1147 nla_for_each_attr(nla
, nlmsg_attrdata(nlh
, hdrlen
),
1148 nlmsg_attrlen(nlh
, hdrlen
), rem
) {
1149 int type
= nla_type(nla
);
1151 if (type
< __INET_DIAG_REQ_MAX
)
1152 cb_data
->req_nlas
[type
] = nla
;
1155 nla
= cb_data
->inet_diag_nla_bc
;
1157 err
= inet_diag_bc_audit(nla
, skb
);
1164 nla
= cb_data
->inet_diag_nla_bpf_stgs
;
1166 struct bpf_sk_storage_diag
*bpf_stg_diag
;
1168 bpf_stg_diag
= bpf_sk_storage_diag_alloc(nla
);
1169 if (IS_ERR(bpf_stg_diag
)) {
1171 return PTR_ERR(bpf_stg_diag
);
1173 cb_data
->bpf_stg_diag
= bpf_stg_diag
;
1180 static int inet_diag_dump_start(struct netlink_callback
*cb
)
1182 return __inet_diag_dump_start(cb
, sizeof(struct inet_diag_req_v2
));
1185 static int inet_diag_dump_start_compat(struct netlink_callback
*cb
)
1187 return __inet_diag_dump_start(cb
, sizeof(struct inet_diag_req
));
1190 static int inet_diag_dump_done(struct netlink_callback
*cb
)
1192 struct inet_diag_dump_data
*cb_data
= cb
->data
;
1194 bpf_sk_storage_diag_free(cb_data
->bpf_stg_diag
);
1200 static int inet_diag_type2proto(int type
)
1203 case TCPDIAG_GETSOCK
:
1205 case DCCPDIAG_GETSOCK
:
1206 return IPPROTO_DCCP
;
1212 static int inet_diag_dump_compat(struct sk_buff
*skb
,
1213 struct netlink_callback
*cb
)
1215 struct inet_diag_req
*rc
= nlmsg_data(cb
->nlh
);
1216 struct inet_diag_req_v2 req
;
1218 req
.sdiag_family
= AF_UNSPEC
; /* compatibility */
1219 req
.sdiag_protocol
= inet_diag_type2proto(cb
->nlh
->nlmsg_type
);
1220 req
.idiag_ext
= rc
->idiag_ext
;
1221 req
.idiag_states
= rc
->idiag_states
;
1224 return __inet_diag_dump(skb
, cb
, &req
);
1227 static int inet_diag_get_exact_compat(struct sk_buff
*in_skb
,
1228 const struct nlmsghdr
*nlh
)
1230 struct inet_diag_req
*rc
= nlmsg_data(nlh
);
1231 struct inet_diag_req_v2 req
;
1233 req
.sdiag_family
= rc
->idiag_family
;
1234 req
.sdiag_protocol
= inet_diag_type2proto(nlh
->nlmsg_type
);
1235 req
.idiag_ext
= rc
->idiag_ext
;
1236 req
.idiag_states
= rc
->idiag_states
;
1239 return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY
, in_skb
, nlh
, &req
);
1242 static int inet_diag_rcv_msg_compat(struct sk_buff
*skb
, struct nlmsghdr
*nlh
)
1244 int hdrlen
= sizeof(struct inet_diag_req
);
1245 struct net
*net
= sock_net(skb
->sk
);
1247 if (nlh
->nlmsg_type
>= INET_DIAG_GETSOCK_MAX
||
1248 nlmsg_len(nlh
) < hdrlen
)
1251 if (nlh
->nlmsg_flags
& NLM_F_DUMP
) {
1252 struct netlink_dump_control c
= {
1253 .start
= inet_diag_dump_start_compat
,
1254 .done
= inet_diag_dump_done
,
1255 .dump
= inet_diag_dump_compat
,
1257 return netlink_dump_start(net
->diag_nlsk
, skb
, nlh
, &c
);
1260 return inet_diag_get_exact_compat(skb
, nlh
);
1263 static int inet_diag_handler_cmd(struct sk_buff
*skb
, struct nlmsghdr
*h
)
1265 int hdrlen
= sizeof(struct inet_diag_req_v2
);
1266 struct net
*net
= sock_net(skb
->sk
);
1268 if (nlmsg_len(h
) < hdrlen
)
1271 if (h
->nlmsg_type
== SOCK_DIAG_BY_FAMILY
&&
1272 h
->nlmsg_flags
& NLM_F_DUMP
) {
1273 struct netlink_dump_control c
= {
1274 .start
= inet_diag_dump_start
,
1275 .done
= inet_diag_dump_done
,
1276 .dump
= inet_diag_dump
,
1278 return netlink_dump_start(net
->diag_nlsk
, skb
, h
, &c
);
1281 return inet_diag_cmd_exact(h
->nlmsg_type
, skb
, h
, nlmsg_data(h
));
1285 int inet_diag_handler_get_info(struct sk_buff
*skb
, struct sock
*sk
)
1287 const struct inet_diag_handler
*handler
;
1288 struct nlmsghdr
*nlh
;
1289 struct nlattr
*attr
;
1290 struct inet_diag_msg
*r
;
1294 nlh
= nlmsg_put(skb
, 0, 0, SOCK_DIAG_BY_FAMILY
, sizeof(*r
), 0);
1298 r
= nlmsg_data(nlh
);
1299 memset(r
, 0, sizeof(*r
));
1300 inet_diag_msg_common_fill(r
, sk
);
1301 if (sk
->sk_type
== SOCK_DGRAM
|| sk
->sk_type
== SOCK_STREAM
)
1302 r
->id
.idiag_sport
= inet_sk(sk
)->inet_sport
;
1303 r
->idiag_state
= sk
->sk_state
;
1305 if ((err
= nla_put_u8(skb
, INET_DIAG_PROTOCOL
, sk
->sk_protocol
))) {
1306 nlmsg_cancel(skb
, nlh
);
1310 handler
= inet_diag_lock_handler(sk
->sk_protocol
);
1311 if (IS_ERR(handler
)) {
1312 inet_diag_unlock_handler(handler
);
1313 nlmsg_cancel(skb
, nlh
);
1314 return PTR_ERR(handler
);
1317 attr
= handler
->idiag_info_size
1318 ? nla_reserve_64bit(skb
, INET_DIAG_INFO
,
1319 handler
->idiag_info_size
,
1323 info
= nla_data(attr
);
1325 handler
->idiag_get_info(sk
, r
, info
);
1326 inet_diag_unlock_handler(handler
);
1328 nlmsg_end(skb
, nlh
);
1332 static const struct sock_diag_handler inet_diag_handler
= {
1334 .dump
= inet_diag_handler_cmd
,
1335 .get_info
= inet_diag_handler_get_info
,
1336 .destroy
= inet_diag_handler_cmd
,
1339 static const struct sock_diag_handler inet6_diag_handler
= {
1341 .dump
= inet_diag_handler_cmd
,
1342 .get_info
= inet_diag_handler_get_info
,
1343 .destroy
= inet_diag_handler_cmd
,
1346 int inet_diag_register(const struct inet_diag_handler
*h
)
1348 const __u16 type
= h
->idiag_type
;
1351 if (type
>= IPPROTO_MAX
)
1354 mutex_lock(&inet_diag_table_mutex
);
1356 if (!inet_diag_table
[type
]) {
1357 inet_diag_table
[type
] = h
;
1360 mutex_unlock(&inet_diag_table_mutex
);
1364 EXPORT_SYMBOL_GPL(inet_diag_register
);
1366 void inet_diag_unregister(const struct inet_diag_handler
*h
)
1368 const __u16 type
= h
->idiag_type
;
1370 if (type
>= IPPROTO_MAX
)
1373 mutex_lock(&inet_diag_table_mutex
);
1374 inet_diag_table
[type
] = NULL
;
1375 mutex_unlock(&inet_diag_table_mutex
);
1377 EXPORT_SYMBOL_GPL(inet_diag_unregister
);
1379 static int __init
inet_diag_init(void)
1381 const int inet_diag_table_size
= (IPPROTO_MAX
*
1382 sizeof(struct inet_diag_handler
*));
1385 inet_diag_table
= kzalloc(inet_diag_table_size
, GFP_KERNEL
);
1386 if (!inet_diag_table
)
1389 err
= sock_diag_register(&inet_diag_handler
);
1393 err
= sock_diag_register(&inet6_diag_handler
);
1397 sock_diag_register_inet_compat(inet_diag_rcv_msg_compat
);
1402 sock_diag_unregister(&inet_diag_handler
);
1404 kfree(inet_diag_table
);
1408 static void __exit
inet_diag_exit(void)
1410 sock_diag_unregister(&inet6_diag_handler
);
1411 sock_diag_unregister(&inet_diag_handler
);
1412 sock_diag_unregister_inet_compat(inet_diag_rcv_msg_compat
);
1413 kfree(inet_diag_table
);
1416 module_init(inet_diag_init
);
1417 module_exit(inet_diag_exit
);
1418 MODULE_LICENSE("GPL");
1419 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK
, NETLINK_SOCK_DIAG
, 2 /* AF_INET */);
1420 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK
, NETLINK_SOCK_DIAG
, 10 /* AF_INET6 */);