1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * inet_diag.c Module for monitoring INET transport protocols sockets.
5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
8 #include <linux/kernel.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/fcntl.h>
12 #include <linux/random.h>
13 #include <linux/slab.h>
14 #include <linux/cache.h>
15 #include <linux/init.h>
16 #include <linux/time.h>
21 #include <net/inet_common.h>
22 #include <net/inet_connection_sock.h>
23 #include <net/inet_hashtables.h>
24 #include <net/inet_timewait_sock.h>
25 #include <net/inet6_hashtables.h>
26 #include <net/bpf_sk_storage.h>
27 #include <net/netlink.h>
29 #include <linux/inet.h>
30 #include <linux/stddef.h>
32 #include <linux/inet_diag.h>
33 #include <linux/sock_diag.h>
35 static const struct inet_diag_handler __rcu
**inet_diag_table
;
37 struct inet_diag_entry
{
46 #ifdef CONFIG_SOCK_CGROUP_DATA
51 static const struct inet_diag_handler
*inet_diag_lock_handler(int proto
)
53 const struct inet_diag_handler
*handler
;
55 if (proto
< 0 || proto
>= IPPROTO_MAX
)
58 if (!READ_ONCE(inet_diag_table
[proto
]))
59 sock_load_diag_module(AF_INET
, proto
);
62 handler
= rcu_dereference(inet_diag_table
[proto
]);
63 if (handler
&& !try_module_get(handler
->owner
))
70 static void inet_diag_unlock_handler(const struct inet_diag_handler
*handler
)
72 module_put(handler
->owner
);
75 void inet_diag_msg_common_fill(struct inet_diag_msg
*r
, struct sock
*sk
)
77 r
->idiag_family
= sk
->sk_family
;
79 r
->id
.idiag_sport
= htons(sk
->sk_num
);
80 r
->id
.idiag_dport
= sk
->sk_dport
;
81 r
->id
.idiag_if
= sk
->sk_bound_dev_if
;
82 sock_diag_save_cookie(sk
, r
->id
.idiag_cookie
);
84 #if IS_ENABLED(CONFIG_IPV6)
85 if (sk
->sk_family
== AF_INET6
) {
86 *(struct in6_addr
*)r
->id
.idiag_src
= sk
->sk_v6_rcv_saddr
;
87 *(struct in6_addr
*)r
->id
.idiag_dst
= sk
->sk_v6_daddr
;
91 memset(&r
->id
.idiag_src
, 0, sizeof(r
->id
.idiag_src
));
92 memset(&r
->id
.idiag_dst
, 0, sizeof(r
->id
.idiag_dst
));
94 r
->id
.idiag_src
[0] = sk
->sk_rcv_saddr
;
95 r
->id
.idiag_dst
[0] = sk
->sk_daddr
;
98 EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill
);
100 static size_t inet_sk_attr_size(struct sock
*sk
,
101 const struct inet_diag_req_v2
*req
,
104 const struct inet_diag_handler
*handler
;
108 handler
= rcu_dereference(inet_diag_table
[req
->sdiag_protocol
]);
109 DEBUG_NET_WARN_ON_ONCE(!handler
);
110 if (handler
&& handler
->idiag_get_aux_size
)
111 aux
= handler
->idiag_get_aux_size(sk
, net_admin
);
114 return nla_total_size(sizeof(struct tcp_info
))
115 + nla_total_size(sizeof(struct inet_diag_msg
))
116 + inet_diag_msg_attrs_size()
117 + nla_total_size(sizeof(struct inet_diag_meminfo
))
118 + nla_total_size(SK_MEMINFO_VARS
* sizeof(u32
))
119 + nla_total_size(TCP_CA_NAME_MAX
)
120 + nla_total_size(sizeof(struct tcpvegas_info
))
125 int inet_diag_msg_attrs_fill(struct sock
*sk
, struct sk_buff
*skb
,
126 struct inet_diag_msg
*r
, int ext
,
127 struct user_namespace
*user_ns
,
130 const struct inet_sock
*inet
= inet_sk(sk
);
131 struct inet_diag_sockopt inet_sockopt
;
133 if (nla_put_u8(skb
, INET_DIAG_SHUTDOWN
, sk
->sk_shutdown
))
136 /* IPv6 dual-stack sockets use inet->tos for IPv4 connections,
137 * hence this needs to be included regardless of socket family.
139 if (ext
& (1 << (INET_DIAG_TOS
- 1)))
140 if (nla_put_u8(skb
, INET_DIAG_TOS
, READ_ONCE(inet
->tos
)) < 0)
143 #if IS_ENABLED(CONFIG_IPV6)
144 if (r
->idiag_family
== AF_INET6
) {
145 if (ext
& (1 << (INET_DIAG_TCLASS
- 1)))
146 if (nla_put_u8(skb
, INET_DIAG_TCLASS
,
147 inet6_sk(sk
)->tclass
) < 0)
150 if (((1 << sk
->sk_state
) & (TCPF_LISTEN
| TCPF_CLOSE
)) &&
151 nla_put_u8(skb
, INET_DIAG_SKV6ONLY
, ipv6_only_sock(sk
)))
156 if (net_admin
&& nla_put_u32(skb
, INET_DIAG_MARK
, READ_ONCE(sk
->sk_mark
)))
159 if (ext
& (1 << (INET_DIAG_CLASS_ID
- 1)) ||
160 ext
& (1 << (INET_DIAG_TCLASS
- 1))) {
163 #ifdef CONFIG_SOCK_CGROUP_DATA
164 classid
= sock_cgroup_classid(&sk
->sk_cgrp_data
);
166 /* Fallback to socket priority if class id isn't set.
167 * Classful qdiscs use it as direct reference to class.
168 * For cgroup2 classid is always zero.
171 classid
= READ_ONCE(sk
->sk_priority
);
173 if (nla_put_u32(skb
, INET_DIAG_CLASS_ID
, classid
))
177 #ifdef CONFIG_SOCK_CGROUP_DATA
178 if (nla_put_u64_64bit(skb
, INET_DIAG_CGROUP_ID
,
179 cgroup_id(sock_cgroup_ptr(&sk
->sk_cgrp_data
)),
184 r
->idiag_uid
= from_kuid_munged(user_ns
, sock_i_uid(sk
));
185 r
->idiag_inode
= sock_i_ino(sk
);
187 memset(&inet_sockopt
, 0, sizeof(inet_sockopt
));
188 inet_sockopt
.recverr
= inet_test_bit(RECVERR
, sk
);
189 inet_sockopt
.is_icsk
= inet_test_bit(IS_ICSK
, sk
);
190 inet_sockopt
.freebind
= inet_test_bit(FREEBIND
, sk
);
191 inet_sockopt
.hdrincl
= inet_test_bit(HDRINCL
, sk
);
192 inet_sockopt
.mc_loop
= inet_test_bit(MC_LOOP
, sk
);
193 inet_sockopt
.transparent
= inet_test_bit(TRANSPARENT
, sk
);
194 inet_sockopt
.mc_all
= inet_test_bit(MC_ALL
, sk
);
195 inet_sockopt
.nodefrag
= inet_test_bit(NODEFRAG
, sk
);
196 inet_sockopt
.bind_address_no_port
= inet_test_bit(BIND_ADDRESS_NO_PORT
, sk
);
197 inet_sockopt
.recverr_rfc4884
= inet_test_bit(RECVERR_RFC4884
, sk
);
198 inet_sockopt
.defer_connect
= inet_test_bit(DEFER_CONNECT
, sk
);
199 if (nla_put(skb
, INET_DIAG_SOCKOPT
, sizeof(inet_sockopt
),
207 EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill
);
209 static int inet_diag_parse_attrs(const struct nlmsghdr
*nlh
, int hdrlen
,
210 struct nlattr
**req_nlas
)
215 nlmsg_for_each_attr(nla
, nlh
, hdrlen
, remaining
) {
216 int type
= nla_type(nla
);
218 if (type
== INET_DIAG_REQ_PROTOCOL
&& nla_len(nla
) != sizeof(u32
))
221 if (type
< __INET_DIAG_REQ_MAX
)
222 req_nlas
[type
] = nla
;
227 static int inet_diag_get_protocol(const struct inet_diag_req_v2
*req
,
228 const struct inet_diag_dump_data
*data
)
230 if (data
->req_nlas
[INET_DIAG_REQ_PROTOCOL
])
231 return nla_get_u32(data
->req_nlas
[INET_DIAG_REQ_PROTOCOL
]);
232 return req
->sdiag_protocol
;
235 #define MAX_DUMP_ALLOC_SIZE (KMALLOC_MAX_SIZE - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
237 int inet_sk_diag_fill(struct sock
*sk
, struct inet_connection_sock
*icsk
,
238 struct sk_buff
*skb
, struct netlink_callback
*cb
,
239 const struct inet_diag_req_v2
*req
,
240 u16 nlmsg_flags
, bool net_admin
)
242 const struct tcp_congestion_ops
*ca_ops
;
243 const struct inet_diag_handler
*handler
;
244 struct inet_diag_dump_data
*cb_data
;
245 int ext
= req
->idiag_ext
;
246 struct inet_diag_msg
*r
;
247 struct nlmsghdr
*nlh
;
254 protocol
= inet_diag_get_protocol(req
, cb_data
);
256 /* inet_diag_lock_handler() made sure inet_diag_table[] is stable. */
257 handler
= rcu_dereference_protected(inet_diag_table
[protocol
], 1);
258 DEBUG_NET_WARN_ON_ONCE(!handler
);
262 nlh
= nlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
, cb
->nlh
->nlmsg_seq
,
263 cb
->nlh
->nlmsg_type
, sizeof(*r
), nlmsg_flags
);
268 BUG_ON(!sk_fullsock(sk
));
270 inet_diag_msg_common_fill(r
, sk
);
271 r
->idiag_state
= sk
->sk_state
;
273 r
->idiag_retrans
= 0;
274 r
->idiag_expires
= 0;
276 if (inet_diag_msg_attrs_fill(sk
, skb
, r
, ext
,
277 sk_user_ns(NETLINK_CB(cb
->skb
).sk
),
281 if (ext
& (1 << (INET_DIAG_MEMINFO
- 1))) {
282 struct inet_diag_meminfo minfo
= {
283 .idiag_rmem
= sk_rmem_alloc_get(sk
),
284 .idiag_wmem
= READ_ONCE(sk
->sk_wmem_queued
),
285 .idiag_fmem
= sk_forward_alloc_get(sk
),
286 .idiag_tmem
= sk_wmem_alloc_get(sk
),
289 if (nla_put(skb
, INET_DIAG_MEMINFO
, sizeof(minfo
), &minfo
) < 0)
293 if (ext
& (1 << (INET_DIAG_SKMEMINFO
- 1)))
294 if (sock_diag_put_meminfo(sk
, skb
, INET_DIAG_SKMEMINFO
))
298 * RAW sockets might have user-defined protocols assigned,
299 * so report the one supplied on socket creation.
301 if (sk
->sk_type
== SOCK_RAW
) {
302 if (nla_put_u8(skb
, INET_DIAG_PROTOCOL
, sk
->sk_protocol
))
307 handler
->idiag_get_info(sk
, r
, NULL
);
311 icsk_pending
= smp_load_acquire(&icsk
->icsk_pending
);
312 if (icsk_pending
== ICSK_TIME_RETRANS
||
313 icsk_pending
== ICSK_TIME_REO_TIMEOUT
||
314 icsk_pending
== ICSK_TIME_LOSS_PROBE
) {
316 r
->idiag_retrans
= icsk
->icsk_retransmits
;
318 jiffies_delta_to_msecs(icsk
->icsk_timeout
- jiffies
);
319 } else if (icsk_pending
== ICSK_TIME_PROBE0
) {
321 r
->idiag_retrans
= icsk
->icsk_probes_out
;
323 jiffies_delta_to_msecs(icsk
->icsk_timeout
- jiffies
);
324 } else if (timer_pending(&sk
->sk_timer
)) {
326 r
->idiag_retrans
= icsk
->icsk_probes_out
;
328 jiffies_delta_to_msecs(sk
->sk_timer
.expires
- jiffies
);
331 if ((ext
& (1 << (INET_DIAG_INFO
- 1))) && handler
->idiag_info_size
) {
332 attr
= nla_reserve_64bit(skb
, INET_DIAG_INFO
,
333 handler
->idiag_info_size
,
338 info
= nla_data(attr
);
341 if (ext
& (1 << (INET_DIAG_CONG
- 1))) {
345 ca_ops
= READ_ONCE(icsk
->icsk_ca_ops
);
347 err
= nla_put_string(skb
, INET_DIAG_CONG
, ca_ops
->name
);
353 handler
->idiag_get_info(sk
, r
, info
);
355 if (ext
& (1 << (INET_DIAG_INFO
- 1)) && handler
->idiag_get_aux
)
356 if (handler
->idiag_get_aux(sk
, net_admin
, skb
) < 0)
359 if (sk
->sk_state
< TCP_TIME_WAIT
) {
360 union tcp_cc_info info
;
365 ca_ops
= READ_ONCE(icsk
->icsk_ca_ops
);
366 if (ca_ops
&& ca_ops
->get_info
)
367 sz
= ca_ops
->get_info(sk
, ext
, &attr
, &info
);
369 if (sz
&& nla_put(skb
, attr
, sz
, &info
) < 0)
373 /* Keep it at the end for potential retry with a larger skb,
374 * or else do best-effort fitting, which is only done for the
377 if (cb_data
->bpf_stg_diag
) {
378 bool first_nlmsg
= ((unsigned char *)nlh
== skb
->data
);
379 unsigned int prev_min_dump_alloc
;
380 unsigned int total_nla_size
= 0;
381 unsigned int msg_len
;
384 msg_len
= skb_tail_pointer(skb
) - (unsigned char *)nlh
;
385 err
= bpf_sk_storage_diag_put(cb_data
->bpf_stg_diag
, sk
, skb
,
386 INET_DIAG_SK_BPF_STORAGES
,
392 total_nla_size
+= msg_len
;
393 prev_min_dump_alloc
= cb
->min_dump_alloc
;
394 if (total_nla_size
> prev_min_dump_alloc
)
395 cb
->min_dump_alloc
= min_t(u32
, total_nla_size
,
396 MAX_DUMP_ALLOC_SIZE
);
401 if (cb
->min_dump_alloc
> prev_min_dump_alloc
)
402 /* Retry with pskb_expand_head() with
403 * __GFP_DIRECT_RECLAIM
407 WARN_ON_ONCE(total_nla_size
<= prev_min_dump_alloc
);
409 /* Send what we have for this sk
410 * and move on to the next sk in the following
420 nlmsg_cancel(skb
, nlh
);
423 EXPORT_SYMBOL_GPL(inet_sk_diag_fill
);
425 static int inet_twsk_diag_fill(struct sock
*sk
,
427 struct netlink_callback
*cb
,
428 u16 nlmsg_flags
, bool net_admin
)
430 struct inet_timewait_sock
*tw
= inet_twsk(sk
);
431 struct inet_diag_msg
*r
;
432 struct nlmsghdr
*nlh
;
435 nlh
= nlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
,
436 cb
->nlh
->nlmsg_seq
, cb
->nlh
->nlmsg_type
,
437 sizeof(*r
), nlmsg_flags
);
442 BUG_ON(tw
->tw_state
!= TCP_TIME_WAIT
);
444 inet_diag_msg_common_fill(r
, sk
);
445 r
->idiag_retrans
= 0;
447 r
->idiag_state
= READ_ONCE(tw
->tw_substate
);
449 tmo
= tw
->tw_timer
.expires
- jiffies
;
450 r
->idiag_expires
= jiffies_delta_to_msecs(tmo
);
456 if (net_admin
&& nla_put_u32(skb
, INET_DIAG_MARK
,
458 nlmsg_cancel(skb
, nlh
);
466 static int inet_req_diag_fill(struct sock
*sk
, struct sk_buff
*skb
,
467 struct netlink_callback
*cb
,
468 u16 nlmsg_flags
, bool net_admin
)
470 struct request_sock
*reqsk
= inet_reqsk(sk
);
471 struct inet_diag_msg
*r
;
472 struct nlmsghdr
*nlh
;
475 nlh
= nlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
, cb
->nlh
->nlmsg_seq
,
476 cb
->nlh
->nlmsg_type
, sizeof(*r
), nlmsg_flags
);
481 inet_diag_msg_common_fill(r
, sk
);
482 r
->idiag_state
= TCP_SYN_RECV
;
484 r
->idiag_retrans
= reqsk
->num_retrans
;
486 BUILD_BUG_ON(offsetof(struct inet_request_sock
, ir_cookie
) !=
487 offsetof(struct sock
, sk_cookie
));
489 tmo
= inet_reqsk(sk
)->rsk_timer
.expires
- jiffies
;
490 r
->idiag_expires
= jiffies_delta_to_msecs(tmo
);
496 if (net_admin
&& nla_put_u32(skb
, INET_DIAG_MARK
,
497 inet_rsk(reqsk
)->ir_mark
)) {
498 nlmsg_cancel(skb
, nlh
);
506 static int sk_diag_fill(struct sock
*sk
, struct sk_buff
*skb
,
507 struct netlink_callback
*cb
,
508 const struct inet_diag_req_v2
*r
,
509 u16 nlmsg_flags
, bool net_admin
)
511 if (sk
->sk_state
== TCP_TIME_WAIT
)
512 return inet_twsk_diag_fill(sk
, skb
, cb
, nlmsg_flags
, net_admin
);
514 if (sk
->sk_state
== TCP_NEW_SYN_RECV
)
515 return inet_req_diag_fill(sk
, skb
, cb
, nlmsg_flags
, net_admin
);
517 return inet_sk_diag_fill(sk
, inet_csk(sk
), skb
, cb
, r
, nlmsg_flags
,
521 struct sock
*inet_diag_find_one_icsk(struct net
*net
,
522 struct inet_hashinfo
*hashinfo
,
523 const struct inet_diag_req_v2
*req
)
528 if (req
->sdiag_family
== AF_INET
)
529 sk
= inet_lookup(net
, hashinfo
, NULL
, 0, req
->id
.idiag_dst
[0],
530 req
->id
.idiag_dport
, req
->id
.idiag_src
[0],
531 req
->id
.idiag_sport
, req
->id
.idiag_if
);
532 #if IS_ENABLED(CONFIG_IPV6)
533 else if (req
->sdiag_family
== AF_INET6
) {
534 if (ipv6_addr_v4mapped((struct in6_addr
*)req
->id
.idiag_dst
) &&
535 ipv6_addr_v4mapped((struct in6_addr
*)req
->id
.idiag_src
))
536 sk
= inet_lookup(net
, hashinfo
, NULL
, 0, req
->id
.idiag_dst
[3],
537 req
->id
.idiag_dport
, req
->id
.idiag_src
[3],
538 req
->id
.idiag_sport
, req
->id
.idiag_if
);
540 sk
= inet6_lookup(net
, hashinfo
, NULL
, 0,
541 (struct in6_addr
*)req
->id
.idiag_dst
,
543 (struct in6_addr
*)req
->id
.idiag_src
,
550 return ERR_PTR(-EINVAL
);
554 return ERR_PTR(-ENOENT
);
556 if (sock_diag_check_cookie(sk
, req
->id
.idiag_cookie
)) {
558 return ERR_PTR(-ENOENT
);
563 EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk
);
565 int inet_diag_dump_one_icsk(struct inet_hashinfo
*hashinfo
,
566 struct netlink_callback
*cb
,
567 const struct inet_diag_req_v2
*req
)
569 struct sk_buff
*in_skb
= cb
->skb
;
570 bool net_admin
= netlink_net_capable(in_skb
, CAP_NET_ADMIN
);
571 struct net
*net
= sock_net(in_skb
->sk
);
576 sk
= inet_diag_find_one_icsk(net
, hashinfo
, req
);
580 rep
= nlmsg_new(inet_sk_attr_size(sk
, req
, net_admin
), GFP_KERNEL
);
586 err
= sk_diag_fill(sk
, rep
, cb
, req
, 0, net_admin
);
588 WARN_ON(err
== -EMSGSIZE
);
592 err
= nlmsg_unicast(net
->diag_nlsk
, rep
, NETLINK_CB(in_skb
).portid
);
600 EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk
);
602 static int inet_diag_cmd_exact(int cmd
, struct sk_buff
*in_skb
,
603 const struct nlmsghdr
*nlh
,
605 const struct inet_diag_req_v2
*req
)
607 const struct inet_diag_handler
*handler
;
608 struct inet_diag_dump_data dump_data
;
611 memset(&dump_data
, 0, sizeof(dump_data
));
612 err
= inet_diag_parse_attrs(nlh
, hdrlen
, dump_data
.req_nlas
);
616 protocol
= inet_diag_get_protocol(req
, &dump_data
);
618 handler
= inet_diag_lock_handler(protocol
);
622 if (cmd
== SOCK_DIAG_BY_FAMILY
) {
623 struct netlink_callback cb
= {
628 err
= handler
->dump_one(&cb
, req
);
629 } else if (cmd
== SOCK_DESTROY
&& handler
->destroy
) {
630 err
= handler
->destroy(in_skb
, req
);
634 inet_diag_unlock_handler(handler
);
639 static int bitstring_match(const __be32
*a1
, const __be32
*a2
, int bits
)
641 int words
= bits
>> 5;
646 if (memcmp(a1
, a2
, words
<< 2))
656 mask
= htonl((0xffffffff) << (32 - bits
));
658 if ((w1
^ w2
) & mask
)
665 static int inet_diag_bc_run(const struct nlattr
*_bc
,
666 const struct inet_diag_entry
*entry
)
668 const void *bc
= nla_data(_bc
);
669 int len
= nla_len(_bc
);
673 const struct inet_diag_bc_op
*op
= bc
;
676 case INET_DIAG_BC_NOP
:
678 case INET_DIAG_BC_JMP
:
681 case INET_DIAG_BC_S_EQ
:
682 yes
= entry
->sport
== op
[1].no
;
684 case INET_DIAG_BC_S_GE
:
685 yes
= entry
->sport
>= op
[1].no
;
687 case INET_DIAG_BC_S_LE
:
688 yes
= entry
->sport
<= op
[1].no
;
690 case INET_DIAG_BC_D_EQ
:
691 yes
= entry
->dport
== op
[1].no
;
693 case INET_DIAG_BC_D_GE
:
694 yes
= entry
->dport
>= op
[1].no
;
696 case INET_DIAG_BC_D_LE
:
697 yes
= entry
->dport
<= op
[1].no
;
699 case INET_DIAG_BC_AUTO
:
700 yes
= !(entry
->userlocks
& SOCK_BINDPORT_LOCK
);
702 case INET_DIAG_BC_S_COND
:
703 case INET_DIAG_BC_D_COND
: {
704 const struct inet_diag_hostcond
*cond
;
707 cond
= (const struct inet_diag_hostcond
*)(op
+ 1);
708 if (cond
->port
!= -1 &&
709 cond
->port
!= (op
->code
== INET_DIAG_BC_S_COND
?
710 entry
->sport
: entry
->dport
)) {
715 if (op
->code
== INET_DIAG_BC_S_COND
)
720 if (cond
->family
!= AF_UNSPEC
&&
721 cond
->family
!= entry
->family
) {
722 if (entry
->family
== AF_INET6
&&
723 cond
->family
== AF_INET
) {
724 if (addr
[0] == 0 && addr
[1] == 0 &&
725 addr
[2] == htonl(0xffff) &&
726 bitstring_match(addr
+ 3,
735 if (cond
->prefix_len
== 0)
737 if (bitstring_match(addr
, cond
->addr
,
743 case INET_DIAG_BC_DEV_COND
: {
746 ifindex
= *((const u32
*)(op
+ 1));
747 if (ifindex
!= entry
->ifindex
)
751 case INET_DIAG_BC_MARK_COND
: {
752 struct inet_diag_markcond
*cond
;
754 cond
= (struct inet_diag_markcond
*)(op
+ 1);
755 if ((entry
->mark
& cond
->mask
) != cond
->mark
)
759 #ifdef CONFIG_SOCK_CGROUP_DATA
760 case INET_DIAG_BC_CGROUP_COND
: {
763 cgroup_id
= get_unaligned((const u64
*)(op
+ 1));
764 if (cgroup_id
!= entry
->cgroup_id
)
782 /* This helper is available for all sockets (ESTABLISH, TIMEWAIT, SYN_RECV)
784 static void entry_fill_addrs(struct inet_diag_entry
*entry
,
785 const struct sock
*sk
)
787 #if IS_ENABLED(CONFIG_IPV6)
788 if (sk
->sk_family
== AF_INET6
) {
789 entry
->saddr
= sk
->sk_v6_rcv_saddr
.s6_addr32
;
790 entry
->daddr
= sk
->sk_v6_daddr
.s6_addr32
;
794 entry
->saddr
= &sk
->sk_rcv_saddr
;
795 entry
->daddr
= &sk
->sk_daddr
;
799 int inet_diag_bc_sk(const struct nlattr
*bc
, struct sock
*sk
)
801 struct inet_sock
*inet
= inet_sk(sk
);
802 struct inet_diag_entry entry
;
807 entry
.family
= sk
->sk_family
;
808 entry_fill_addrs(&entry
, sk
);
809 entry
.sport
= inet
->inet_num
;
810 entry
.dport
= ntohs(inet
->inet_dport
);
811 entry
.ifindex
= sk
->sk_bound_dev_if
;
812 entry
.userlocks
= sk_fullsock(sk
) ? sk
->sk_userlocks
: 0;
814 entry
.mark
= READ_ONCE(sk
->sk_mark
);
815 else if (sk
->sk_state
== TCP_NEW_SYN_RECV
)
816 entry
.mark
= inet_rsk(inet_reqsk(sk
))->ir_mark
;
817 else if (sk
->sk_state
== TCP_TIME_WAIT
)
818 entry
.mark
= inet_twsk(sk
)->tw_mark
;
821 #ifdef CONFIG_SOCK_CGROUP_DATA
822 entry
.cgroup_id
= sk_fullsock(sk
) ?
823 cgroup_id(sock_cgroup_ptr(&sk
->sk_cgrp_data
)) : 0;
826 return inet_diag_bc_run(bc
, &entry
);
828 EXPORT_SYMBOL_GPL(inet_diag_bc_sk
);
830 static int valid_cc(const void *bc
, int len
, int cc
)
833 const struct inet_diag_bc_op
*op
= bc
;
839 if (op
->yes
< 4 || op
->yes
& 3)
847 /* data is u32 ifindex */
848 static bool valid_devcond(const struct inet_diag_bc_op
*op
, int len
,
851 /* Check ifindex space. */
852 *min_len
+= sizeof(u32
);
858 /* Validate an inet_diag_hostcond. */
859 static bool valid_hostcond(const struct inet_diag_bc_op
*op
, int len
,
862 struct inet_diag_hostcond
*cond
;
865 /* Check hostcond space. */
866 *min_len
+= sizeof(struct inet_diag_hostcond
);
869 cond
= (struct inet_diag_hostcond
*)(op
+ 1);
871 /* Check address family and address length. */
872 switch (cond
->family
) {
877 addr_len
= sizeof(struct in_addr
);
880 addr_len
= sizeof(struct in6_addr
);
885 *min_len
+= addr_len
;
889 /* Check prefix length (in bits) vs address length (in bytes). */
890 if (cond
->prefix_len
> 8 * addr_len
)
896 /* Validate a port comparison operator. */
897 static bool valid_port_comparison(const struct inet_diag_bc_op
*op
,
898 int len
, int *min_len
)
900 /* Port comparisons put the port in a follow-on inet_diag_bc_op. */
901 *min_len
+= sizeof(struct inet_diag_bc_op
);
907 static bool valid_markcond(const struct inet_diag_bc_op
*op
, int len
,
910 *min_len
+= sizeof(struct inet_diag_markcond
);
911 return len
>= *min_len
;
914 #ifdef CONFIG_SOCK_CGROUP_DATA
915 static bool valid_cgroupcond(const struct inet_diag_bc_op
*op
, int len
,
918 *min_len
+= sizeof(u64
);
919 return len
>= *min_len
;
923 static int inet_diag_bc_audit(const struct nlattr
*attr
,
924 const struct sk_buff
*skb
)
926 bool net_admin
= netlink_net_capable(skb
, CAP_NET_ADMIN
);
927 const void *bytecode
, *bc
;
928 int bytecode_len
, len
;
930 if (!attr
|| nla_len(attr
) < sizeof(struct inet_diag_bc_op
))
933 bytecode
= bc
= nla_data(attr
);
934 len
= bytecode_len
= nla_len(attr
);
937 int min_len
= sizeof(struct inet_diag_bc_op
);
938 const struct inet_diag_bc_op
*op
= bc
;
941 case INET_DIAG_BC_S_COND
:
942 case INET_DIAG_BC_D_COND
:
943 if (!valid_hostcond(bc
, len
, &min_len
))
946 case INET_DIAG_BC_DEV_COND
:
947 if (!valid_devcond(bc
, len
, &min_len
))
950 case INET_DIAG_BC_S_EQ
:
951 case INET_DIAG_BC_S_GE
:
952 case INET_DIAG_BC_S_LE
:
953 case INET_DIAG_BC_D_EQ
:
954 case INET_DIAG_BC_D_GE
:
955 case INET_DIAG_BC_D_LE
:
956 if (!valid_port_comparison(bc
, len
, &min_len
))
959 case INET_DIAG_BC_MARK_COND
:
962 if (!valid_markcond(bc
, len
, &min_len
))
965 #ifdef CONFIG_SOCK_CGROUP_DATA
966 case INET_DIAG_BC_CGROUP_COND
:
967 if (!valid_cgroupcond(bc
, len
, &min_len
))
971 case INET_DIAG_BC_AUTO
:
972 case INET_DIAG_BC_JMP
:
973 case INET_DIAG_BC_NOP
:
979 if (op
->code
!= INET_DIAG_BC_NOP
) {
980 if (op
->no
< min_len
|| op
->no
> len
+ 4 || op
->no
& 3)
983 !valid_cc(bytecode
, bytecode_len
, len
- op
->no
))
987 if (op
->yes
< min_len
|| op
->yes
> len
+ 4 || op
->yes
& 3)
992 return len
== 0 ? 0 : -EINVAL
;
995 static void twsk_build_assert(void)
997 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_family
) !=
998 offsetof(struct sock
, sk_family
));
1000 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_num
) !=
1001 offsetof(struct inet_sock
, inet_num
));
1003 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_dport
) !=
1004 offsetof(struct inet_sock
, inet_dport
));
1006 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_rcv_saddr
) !=
1007 offsetof(struct inet_sock
, inet_rcv_saddr
));
1009 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_daddr
) !=
1010 offsetof(struct inet_sock
, inet_daddr
));
1012 #if IS_ENABLED(CONFIG_IPV6)
1013 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_v6_rcv_saddr
) !=
1014 offsetof(struct sock
, sk_v6_rcv_saddr
));
1016 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_v6_daddr
) !=
1017 offsetof(struct sock
, sk_v6_daddr
));
1021 void inet_diag_dump_icsk(struct inet_hashinfo
*hashinfo
, struct sk_buff
*skb
,
1022 struct netlink_callback
*cb
,
1023 const struct inet_diag_req_v2
*r
)
1025 bool net_admin
= netlink_net_capable(cb
->skb
, CAP_NET_ADMIN
);
1026 struct inet_diag_dump_data
*cb_data
= cb
->data
;
1027 struct net
*net
= sock_net(skb
->sk
);
1028 u32 idiag_states
= r
->idiag_states
;
1029 int i
, num
, s_i
, s_num
;
1033 bc
= cb_data
->inet_diag_nla_bc
;
1034 if (idiag_states
& TCPF_SYN_RECV
)
1035 idiag_states
|= TCPF_NEW_SYN_RECV
;
1037 s_num
= num
= cb
->args
[2];
1039 if (cb
->args
[0] == 0) {
1040 if (!(idiag_states
& TCPF_LISTEN
) || r
->id
.idiag_dport
)
1041 goto skip_listen_ht
;
1043 for (i
= s_i
; i
<= hashinfo
->lhash2_mask
; i
++) {
1044 struct inet_listen_hashbucket
*ilb
;
1045 struct hlist_nulls_node
*node
;
1048 ilb
= &hashinfo
->lhash2
[i
];
1050 if (hlist_nulls_empty(&ilb
->nulls_head
)) {
1054 spin_lock(&ilb
->lock
);
1055 sk_nulls_for_each(sk
, node
, &ilb
->nulls_head
) {
1056 struct inet_sock
*inet
= inet_sk(sk
);
1058 if (!net_eq(sock_net(sk
), net
))
1066 if (r
->sdiag_family
!= AF_UNSPEC
&&
1067 sk
->sk_family
!= r
->sdiag_family
)
1070 if (r
->id
.idiag_sport
!= inet
->inet_sport
&&
1074 if (!inet_diag_bc_sk(bc
, sk
))
1077 if (inet_sk_diag_fill(sk
, inet_csk(sk
), skb
,
1080 spin_unlock(&ilb
->lock
);
1087 spin_unlock(&ilb
->lock
);
1093 s_i
= num
= s_num
= 0;
1096 /* Process a maximum of SKARR_SZ sockets at a time when walking hash buckets
1101 /* Dump bound but inactive (not listening, connecting, etc.) sockets */
1102 if (cb
->args
[0] == 1) {
1103 if (!(idiag_states
& TCPF_BOUND_INACTIVE
))
1106 for (i
= s_i
; i
< hashinfo
->bhash_size
; i
++) {
1107 struct inet_bind_hashbucket
*ibb
;
1108 struct inet_bind2_bucket
*tb2
;
1109 struct sock
*sk_arr
[SKARR_SZ
];
1110 int num_arr
[SKARR_SZ
];
1111 int idx
, accum
, res
;
1116 ibb
= &hashinfo
->bhash2
[i
];
1118 if (hlist_empty(&ibb
->chain
)) {
1122 spin_lock_bh(&ibb
->lock
);
1123 inet_bind_bucket_for_each(tb2
, &ibb
->chain
) {
1124 if (!net_eq(ib2_net(tb2
), net
))
1127 sk_for_each_bound(sk
, &tb2
->owners
) {
1128 struct inet_sock
*inet
= inet_sk(sk
);
1133 if (sk
->sk_state
!= TCP_CLOSE
||
1137 if (r
->sdiag_family
!= AF_UNSPEC
&&
1138 r
->sdiag_family
!= sk
->sk_family
)
1141 if (!inet_diag_bc_sk(bc
, sk
))
1145 num_arr
[accum
] = num
;
1147 if (++accum
== SKARR_SZ
)
1148 goto pause_bind_walk
;
1154 spin_unlock_bh(&ibb
->lock
);
1157 for (idx
= 0; idx
< accum
; idx
++) {
1159 res
= inet_sk_diag_fill(sk_arr
[idx
],
1166 sock_put(sk_arr
[idx
]);
1173 if (accum
== SKARR_SZ
) {
1175 goto resume_bind_walk
;
1182 s_i
= num
= s_num
= 0;
1185 if (!(idiag_states
& ~TCPF_LISTEN
))
1188 for (i
= s_i
; i
<= hashinfo
->ehash_mask
; i
++) {
1189 struct inet_ehash_bucket
*head
= &hashinfo
->ehash
[i
];
1190 spinlock_t
*lock
= inet_ehash_lockp(hashinfo
, i
);
1191 struct hlist_nulls_node
*node
;
1192 struct sock
*sk_arr
[SKARR_SZ
];
1193 int num_arr
[SKARR_SZ
];
1194 int idx
, accum
, res
;
1196 if (hlist_nulls_empty(&head
->chain
))
1206 sk_nulls_for_each(sk
, node
, &head
->chain
) {
1209 if (!net_eq(sock_net(sk
), net
))
1213 state
= (sk
->sk_state
== TCP_TIME_WAIT
) ?
1214 READ_ONCE(inet_twsk(sk
)->tw_substate
) : sk
->sk_state
;
1215 if (!(idiag_states
& (1 << state
)))
1217 if (r
->sdiag_family
!= AF_UNSPEC
&&
1218 sk
->sk_family
!= r
->sdiag_family
)
1220 if (r
->id
.idiag_sport
!= htons(sk
->sk_num
) &&
1223 if (r
->id
.idiag_dport
!= sk
->sk_dport
&&
1226 twsk_build_assert();
1228 if (!inet_diag_bc_sk(bc
, sk
))
1231 if (!refcount_inc_not_zero(&sk
->sk_refcnt
))
1234 num_arr
[accum
] = num
;
1236 if (++accum
== SKARR_SZ
)
1241 spin_unlock_bh(lock
);
1243 for (idx
= 0; idx
< accum
; idx
++) {
1245 res
= sk_diag_fill(sk_arr
[idx
], skb
, cb
, r
,
1246 NLM_F_MULTI
, net_admin
);
1250 sock_gen_put(sk_arr
[idx
]);
1255 if (accum
== SKARR_SZ
) {
1267 EXPORT_SYMBOL_GPL(inet_diag_dump_icsk
);
1269 static int __inet_diag_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
,
1270 const struct inet_diag_req_v2
*r
)
1272 struct inet_diag_dump_data
*cb_data
= cb
->data
;
1273 const struct inet_diag_handler
*handler
;
1274 u32 prev_min_dump_alloc
;
1275 int protocol
, err
= 0;
1277 protocol
= inet_diag_get_protocol(r
, cb_data
);
1280 prev_min_dump_alloc
= cb
->min_dump_alloc
;
1281 handler
= inet_diag_lock_handler(protocol
);
1283 handler
->dump(skb
, cb
, r
);
1284 inet_diag_unlock_handler(handler
);
1288 /* The skb is not large enough to fit one sk info and
1289 * inet_sk_diag_fill() has requested for a larger skb.
1291 if (!skb
->len
&& cb
->min_dump_alloc
> prev_min_dump_alloc
) {
1292 err
= pskb_expand_head(skb
, 0, cb
->min_dump_alloc
, GFP_KERNEL
);
1297 return err
? : skb
->len
;
1300 static int inet_diag_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1302 return __inet_diag_dump(skb
, cb
, nlmsg_data(cb
->nlh
));
1305 static int __inet_diag_dump_start(struct netlink_callback
*cb
, int hdrlen
)
1307 const struct nlmsghdr
*nlh
= cb
->nlh
;
1308 struct inet_diag_dump_data
*cb_data
;
1309 struct sk_buff
*skb
= cb
->skb
;
1313 cb_data
= kzalloc(sizeof(*cb_data
), GFP_KERNEL
);
1317 err
= inet_diag_parse_attrs(nlh
, hdrlen
, cb_data
->req_nlas
);
1322 nla
= cb_data
->inet_diag_nla_bc
;
1324 err
= inet_diag_bc_audit(nla
, skb
);
1331 nla
= cb_data
->inet_diag_nla_bpf_stgs
;
1333 struct bpf_sk_storage_diag
*bpf_stg_diag
;
1335 bpf_stg_diag
= bpf_sk_storage_diag_alloc(nla
);
1336 if (IS_ERR(bpf_stg_diag
)) {
1338 return PTR_ERR(bpf_stg_diag
);
1340 cb_data
->bpf_stg_diag
= bpf_stg_diag
;
1347 static int inet_diag_dump_start(struct netlink_callback
*cb
)
1349 return __inet_diag_dump_start(cb
, sizeof(struct inet_diag_req_v2
));
1352 static int inet_diag_dump_start_compat(struct netlink_callback
*cb
)
1354 return __inet_diag_dump_start(cb
, sizeof(struct inet_diag_req
));
1357 static int inet_diag_dump_done(struct netlink_callback
*cb
)
1359 struct inet_diag_dump_data
*cb_data
= cb
->data
;
1361 bpf_sk_storage_diag_free(cb_data
->bpf_stg_diag
);
1367 static int inet_diag_type2proto(int type
)
1370 case TCPDIAG_GETSOCK
:
1372 case DCCPDIAG_GETSOCK
:
1373 return IPPROTO_DCCP
;
1379 static int inet_diag_dump_compat(struct sk_buff
*skb
,
1380 struct netlink_callback
*cb
)
1382 struct inet_diag_req
*rc
= nlmsg_data(cb
->nlh
);
1383 struct inet_diag_req_v2 req
;
1385 req
.sdiag_family
= AF_UNSPEC
; /* compatibility */
1386 req
.sdiag_protocol
= inet_diag_type2proto(cb
->nlh
->nlmsg_type
);
1387 req
.idiag_ext
= rc
->idiag_ext
;
1389 req
.idiag_states
= rc
->idiag_states
;
1392 return __inet_diag_dump(skb
, cb
, &req
);
1395 static int inet_diag_get_exact_compat(struct sk_buff
*in_skb
,
1396 const struct nlmsghdr
*nlh
)
1398 struct inet_diag_req
*rc
= nlmsg_data(nlh
);
1399 struct inet_diag_req_v2 req
;
1401 req
.sdiag_family
= rc
->idiag_family
;
1402 req
.sdiag_protocol
= inet_diag_type2proto(nlh
->nlmsg_type
);
1403 req
.idiag_ext
= rc
->idiag_ext
;
1405 req
.idiag_states
= rc
->idiag_states
;
1408 return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY
, in_skb
, nlh
,
1409 sizeof(struct inet_diag_req
), &req
);
1412 static int inet_diag_rcv_msg_compat(struct sk_buff
*skb
, struct nlmsghdr
*nlh
)
1414 int hdrlen
= sizeof(struct inet_diag_req
);
1415 struct net
*net
= sock_net(skb
->sk
);
1417 if (nlh
->nlmsg_type
>= INET_DIAG_GETSOCK_MAX
||
1418 nlmsg_len(nlh
) < hdrlen
)
1421 if (nlh
->nlmsg_flags
& NLM_F_DUMP
) {
1422 struct netlink_dump_control c
= {
1423 .start
= inet_diag_dump_start_compat
,
1424 .done
= inet_diag_dump_done
,
1425 .dump
= inet_diag_dump_compat
,
1427 return netlink_dump_start(net
->diag_nlsk
, skb
, nlh
, &c
);
1430 return inet_diag_get_exact_compat(skb
, nlh
);
1433 static int inet_diag_handler_cmd(struct sk_buff
*skb
, struct nlmsghdr
*h
)
1435 int hdrlen
= sizeof(struct inet_diag_req_v2
);
1436 struct net
*net
= sock_net(skb
->sk
);
1438 if (nlmsg_len(h
) < hdrlen
)
1441 if (h
->nlmsg_type
== SOCK_DIAG_BY_FAMILY
&&
1442 h
->nlmsg_flags
& NLM_F_DUMP
) {
1443 struct netlink_dump_control c
= {
1444 .start
= inet_diag_dump_start
,
1445 .done
= inet_diag_dump_done
,
1446 .dump
= inet_diag_dump
,
1448 return netlink_dump_start(net
->diag_nlsk
, skb
, h
, &c
);
1451 return inet_diag_cmd_exact(h
->nlmsg_type
, skb
, h
, hdrlen
,
1456 int inet_diag_handler_get_info(struct sk_buff
*skb
, struct sock
*sk
)
1458 const struct inet_diag_handler
*handler
;
1459 struct nlmsghdr
*nlh
;
1460 struct nlattr
*attr
;
1461 struct inet_diag_msg
*r
;
1465 nlh
= nlmsg_put(skb
, 0, 0, SOCK_DIAG_BY_FAMILY
, sizeof(*r
), 0);
1469 r
= nlmsg_data(nlh
);
1470 memset(r
, 0, sizeof(*r
));
1471 inet_diag_msg_common_fill(r
, sk
);
1472 if (sk
->sk_type
== SOCK_DGRAM
|| sk
->sk_type
== SOCK_STREAM
)
1473 r
->id
.idiag_sport
= inet_sk(sk
)->inet_sport
;
1474 r
->idiag_state
= sk
->sk_state
;
1476 if ((err
= nla_put_u8(skb
, INET_DIAG_PROTOCOL
, sk
->sk_protocol
))) {
1477 nlmsg_cancel(skb
, nlh
);
1481 handler
= inet_diag_lock_handler(sk
->sk_protocol
);
1483 nlmsg_cancel(skb
, nlh
);
1487 attr
= handler
->idiag_info_size
1488 ? nla_reserve_64bit(skb
, INET_DIAG_INFO
,
1489 handler
->idiag_info_size
,
1493 info
= nla_data(attr
);
1495 handler
->idiag_get_info(sk
, r
, info
);
1496 inet_diag_unlock_handler(handler
);
1498 nlmsg_end(skb
, nlh
);
1502 static const struct sock_diag_handler inet_diag_handler
= {
1503 .owner
= THIS_MODULE
,
1505 .dump
= inet_diag_handler_cmd
,
1506 .get_info
= inet_diag_handler_get_info
,
1507 .destroy
= inet_diag_handler_cmd
,
1510 static const struct sock_diag_handler inet6_diag_handler
= {
1511 .owner
= THIS_MODULE
,
1513 .dump
= inet_diag_handler_cmd
,
1514 .get_info
= inet_diag_handler_get_info
,
1515 .destroy
= inet_diag_handler_cmd
,
1518 int inet_diag_register(const struct inet_diag_handler
*h
)
1520 const __u16 type
= h
->idiag_type
;
1522 if (type
>= IPPROTO_MAX
)
1525 return !cmpxchg((const struct inet_diag_handler
**)&inet_diag_table
[type
],
1526 NULL
, h
) ? 0 : -EEXIST
;
1528 EXPORT_SYMBOL_GPL(inet_diag_register
);
1530 void inet_diag_unregister(const struct inet_diag_handler
*h
)
1532 const __u16 type
= h
->idiag_type
;
1534 if (type
>= IPPROTO_MAX
)
1537 xchg((const struct inet_diag_handler
**)&inet_diag_table
[type
],
1540 EXPORT_SYMBOL_GPL(inet_diag_unregister
);
1542 static const struct sock_diag_inet_compat inet_diag_compat
= {
1543 .owner
= THIS_MODULE
,
1544 .fn
= inet_diag_rcv_msg_compat
,
1547 static int __init
inet_diag_init(void)
1549 const int inet_diag_table_size
= (IPPROTO_MAX
*
1550 sizeof(struct inet_diag_handler
*));
1553 inet_diag_table
= kzalloc(inet_diag_table_size
, GFP_KERNEL
);
1554 if (!inet_diag_table
)
1557 err
= sock_diag_register(&inet_diag_handler
);
1561 err
= sock_diag_register(&inet6_diag_handler
);
1565 sock_diag_register_inet_compat(&inet_diag_compat
);
1570 sock_diag_unregister(&inet_diag_handler
);
1572 kfree(inet_diag_table
);
1576 static void __exit
inet_diag_exit(void)
1578 sock_diag_unregister(&inet6_diag_handler
);
1579 sock_diag_unregister(&inet_diag_handler
);
1580 sock_diag_unregister_inet_compat(&inet_diag_compat
);
1581 kfree(inet_diag_table
);
1584 module_init(inet_diag_init
);
1585 module_exit(inet_diag_exit
);
1586 MODULE_LICENSE("GPL");
1587 MODULE_DESCRIPTION("INET/INET6: socket monitoring via SOCK_DIAG");
1588 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK
, NETLINK_SOCK_DIAG
, 2 /* AF_INET */);
1589 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK
, NETLINK_SOCK_DIAG
, 10 /* AF_INET6 */);