1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * inet_diag.c Module for monitoring INET transport protocols sockets.
5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
8 #include <linux/kernel.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/fcntl.h>
12 #include <linux/random.h>
13 #include <linux/slab.h>
14 #include <linux/cache.h>
15 #include <linux/init.h>
16 #include <linux/time.h>
21 #include <net/inet_common.h>
22 #include <net/inet_connection_sock.h>
23 #include <net/inet_hashtables.h>
24 #include <net/inet_timewait_sock.h>
25 #include <net/inet6_hashtables.h>
26 #include <net/bpf_sk_storage.h>
27 #include <net/netlink.h>
29 #include <linux/inet.h>
30 #include <linux/stddef.h>
32 #include <linux/inet_diag.h>
33 #include <linux/sock_diag.h>
35 static const struct inet_diag_handler
**inet_diag_table
;
37 struct inet_diag_entry
{
46 #ifdef CONFIG_SOCK_CGROUP_DATA
51 static DEFINE_MUTEX(inet_diag_table_mutex
);
53 static const struct inet_diag_handler
*inet_diag_lock_handler(int proto
)
55 if (proto
< 0 || proto
>= IPPROTO_MAX
) {
56 mutex_lock(&inet_diag_table_mutex
);
57 return ERR_PTR(-ENOENT
);
60 if (!inet_diag_table
[proto
])
61 sock_load_diag_module(AF_INET
, proto
);
63 mutex_lock(&inet_diag_table_mutex
);
64 if (!inet_diag_table
[proto
])
65 return ERR_PTR(-ENOENT
);
67 return inet_diag_table
[proto
];
70 static void inet_diag_unlock_handler(const struct inet_diag_handler
*handler
)
72 mutex_unlock(&inet_diag_table_mutex
);
75 void inet_diag_msg_common_fill(struct inet_diag_msg
*r
, struct sock
*sk
)
77 r
->idiag_family
= sk
->sk_family
;
79 r
->id
.idiag_sport
= htons(sk
->sk_num
);
80 r
->id
.idiag_dport
= sk
->sk_dport
;
81 r
->id
.idiag_if
= sk
->sk_bound_dev_if
;
82 sock_diag_save_cookie(sk
, r
->id
.idiag_cookie
);
84 #if IS_ENABLED(CONFIG_IPV6)
85 if (sk
->sk_family
== AF_INET6
) {
86 *(struct in6_addr
*)r
->id
.idiag_src
= sk
->sk_v6_rcv_saddr
;
87 *(struct in6_addr
*)r
->id
.idiag_dst
= sk
->sk_v6_daddr
;
91 memset(&r
->id
.idiag_src
, 0, sizeof(r
->id
.idiag_src
));
92 memset(&r
->id
.idiag_dst
, 0, sizeof(r
->id
.idiag_dst
));
94 r
->id
.idiag_src
[0] = sk
->sk_rcv_saddr
;
95 r
->id
.idiag_dst
[0] = sk
->sk_daddr
;
98 EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill
);
100 static size_t inet_sk_attr_size(struct sock
*sk
,
101 const struct inet_diag_req_v2
*req
,
104 const struct inet_diag_handler
*handler
;
107 handler
= inet_diag_table
[req
->sdiag_protocol
];
108 if (handler
&& handler
->idiag_get_aux_size
)
109 aux
= handler
->idiag_get_aux_size(sk
, net_admin
);
111 return nla_total_size(sizeof(struct tcp_info
))
112 + nla_total_size(sizeof(struct inet_diag_msg
))
113 + inet_diag_msg_attrs_size()
114 + nla_total_size(sizeof(struct inet_diag_meminfo
))
115 + nla_total_size(SK_MEMINFO_VARS
* sizeof(u32
))
116 + nla_total_size(TCP_CA_NAME_MAX
)
117 + nla_total_size(sizeof(struct tcpvegas_info
))
122 int inet_diag_msg_attrs_fill(struct sock
*sk
, struct sk_buff
*skb
,
123 struct inet_diag_msg
*r
, int ext
,
124 struct user_namespace
*user_ns
,
127 const struct inet_sock
*inet
= inet_sk(sk
);
128 struct inet_diag_sockopt inet_sockopt
;
130 if (nla_put_u8(skb
, INET_DIAG_SHUTDOWN
, sk
->sk_shutdown
))
133 /* IPv6 dual-stack sockets use inet->tos for IPv4 connections,
134 * hence this needs to be included regardless of socket family.
136 if (ext
& (1 << (INET_DIAG_TOS
- 1)))
137 if (nla_put_u8(skb
, INET_DIAG_TOS
, inet
->tos
) < 0)
140 #if IS_ENABLED(CONFIG_IPV6)
141 if (r
->idiag_family
== AF_INET6
) {
142 if (ext
& (1 << (INET_DIAG_TCLASS
- 1)))
143 if (nla_put_u8(skb
, INET_DIAG_TCLASS
,
144 inet6_sk(sk
)->tclass
) < 0)
147 if (((1 << sk
->sk_state
) & (TCPF_LISTEN
| TCPF_CLOSE
)) &&
148 nla_put_u8(skb
, INET_DIAG_SKV6ONLY
, ipv6_only_sock(sk
)))
153 if (net_admin
&& nla_put_u32(skb
, INET_DIAG_MARK
, sk
->sk_mark
))
156 if (ext
& (1 << (INET_DIAG_CLASS_ID
- 1)) ||
157 ext
& (1 << (INET_DIAG_TCLASS
- 1))) {
160 #ifdef CONFIG_SOCK_CGROUP_DATA
161 classid
= sock_cgroup_classid(&sk
->sk_cgrp_data
);
163 /* Fallback to socket priority if class id isn't set.
164 * Classful qdiscs use it as direct reference to class.
165 * For cgroup2 classid is always zero.
168 classid
= sk
->sk_priority
;
170 if (nla_put_u32(skb
, INET_DIAG_CLASS_ID
, classid
))
174 #ifdef CONFIG_SOCK_CGROUP_DATA
175 if (nla_put_u64_64bit(skb
, INET_DIAG_CGROUP_ID
,
176 cgroup_id(sock_cgroup_ptr(&sk
->sk_cgrp_data
)),
181 r
->idiag_uid
= from_kuid_munged(user_ns
, sock_i_uid(sk
));
182 r
->idiag_inode
= sock_i_ino(sk
);
184 memset(&inet_sockopt
, 0, sizeof(inet_sockopt
));
185 inet_sockopt
.recverr
= inet
->recverr
;
186 inet_sockopt
.is_icsk
= inet
->is_icsk
;
187 inet_sockopt
.freebind
= inet
->freebind
;
188 inet_sockopt
.hdrincl
= inet
->hdrincl
;
189 inet_sockopt
.mc_loop
= inet
->mc_loop
;
190 inet_sockopt
.transparent
= inet
->transparent
;
191 inet_sockopt
.mc_all
= inet
->mc_all
;
192 inet_sockopt
.nodefrag
= inet
->nodefrag
;
193 inet_sockopt
.bind_address_no_port
= inet
->bind_address_no_port
;
194 inet_sockopt
.recverr_rfc4884
= inet
->recverr_rfc4884
;
195 inet_sockopt
.defer_connect
= inet
->defer_connect
;
196 if (nla_put(skb
, INET_DIAG_SOCKOPT
, sizeof(inet_sockopt
),
204 EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill
);
206 static int inet_diag_parse_attrs(const struct nlmsghdr
*nlh
, int hdrlen
,
207 struct nlattr
**req_nlas
)
212 nlmsg_for_each_attr(nla
, nlh
, hdrlen
, remaining
) {
213 int type
= nla_type(nla
);
215 if (type
== INET_DIAG_REQ_PROTOCOL
&& nla_len(nla
) != sizeof(u32
))
218 if (type
< __INET_DIAG_REQ_MAX
)
219 req_nlas
[type
] = nla
;
224 static int inet_diag_get_protocol(const struct inet_diag_req_v2
*req
,
225 const struct inet_diag_dump_data
*data
)
227 if (data
->req_nlas
[INET_DIAG_REQ_PROTOCOL
])
228 return nla_get_u32(data
->req_nlas
[INET_DIAG_REQ_PROTOCOL
]);
229 return req
->sdiag_protocol
;
232 #define MAX_DUMP_ALLOC_SIZE (KMALLOC_MAX_SIZE - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
234 int inet_sk_diag_fill(struct sock
*sk
, struct inet_connection_sock
*icsk
,
235 struct sk_buff
*skb
, struct netlink_callback
*cb
,
236 const struct inet_diag_req_v2
*req
,
237 u16 nlmsg_flags
, bool net_admin
)
239 const struct tcp_congestion_ops
*ca_ops
;
240 const struct inet_diag_handler
*handler
;
241 struct inet_diag_dump_data
*cb_data
;
242 int ext
= req
->idiag_ext
;
243 struct inet_diag_msg
*r
;
244 struct nlmsghdr
*nlh
;
249 handler
= inet_diag_table
[inet_diag_get_protocol(req
, cb_data
)];
252 nlh
= nlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
, cb
->nlh
->nlmsg_seq
,
253 cb
->nlh
->nlmsg_type
, sizeof(*r
), nlmsg_flags
);
258 BUG_ON(!sk_fullsock(sk
));
260 inet_diag_msg_common_fill(r
, sk
);
261 r
->idiag_state
= sk
->sk_state
;
263 r
->idiag_retrans
= 0;
265 if (inet_diag_msg_attrs_fill(sk
, skb
, r
, ext
,
266 sk_user_ns(NETLINK_CB(cb
->skb
).sk
),
270 if (ext
& (1 << (INET_DIAG_MEMINFO
- 1))) {
271 struct inet_diag_meminfo minfo
= {
272 .idiag_rmem
= sk_rmem_alloc_get(sk
),
273 .idiag_wmem
= READ_ONCE(sk
->sk_wmem_queued
),
274 .idiag_fmem
= sk
->sk_forward_alloc
,
275 .idiag_tmem
= sk_wmem_alloc_get(sk
),
278 if (nla_put(skb
, INET_DIAG_MEMINFO
, sizeof(minfo
), &minfo
) < 0)
282 if (ext
& (1 << (INET_DIAG_SKMEMINFO
- 1)))
283 if (sock_diag_put_meminfo(sk
, skb
, INET_DIAG_SKMEMINFO
))
287 * RAW sockets might have user-defined protocols assigned,
288 * so report the one supplied on socket creation.
290 if (sk
->sk_type
== SOCK_RAW
) {
291 if (nla_put_u8(skb
, INET_DIAG_PROTOCOL
, sk
->sk_protocol
))
296 handler
->idiag_get_info(sk
, r
, NULL
);
300 if (icsk
->icsk_pending
== ICSK_TIME_RETRANS
||
301 icsk
->icsk_pending
== ICSK_TIME_REO_TIMEOUT
||
302 icsk
->icsk_pending
== ICSK_TIME_LOSS_PROBE
) {
304 r
->idiag_retrans
= icsk
->icsk_retransmits
;
306 jiffies_delta_to_msecs(icsk
->icsk_timeout
- jiffies
);
307 } else if (icsk
->icsk_pending
== ICSK_TIME_PROBE0
) {
309 r
->idiag_retrans
= icsk
->icsk_probes_out
;
311 jiffies_delta_to_msecs(icsk
->icsk_timeout
- jiffies
);
312 } else if (timer_pending(&sk
->sk_timer
)) {
314 r
->idiag_retrans
= icsk
->icsk_probes_out
;
316 jiffies_delta_to_msecs(sk
->sk_timer
.expires
- jiffies
);
319 r
->idiag_expires
= 0;
322 if ((ext
& (1 << (INET_DIAG_INFO
- 1))) && handler
->idiag_info_size
) {
323 attr
= nla_reserve_64bit(skb
, INET_DIAG_INFO
,
324 handler
->idiag_info_size
,
329 info
= nla_data(attr
);
332 if (ext
& (1 << (INET_DIAG_CONG
- 1))) {
336 ca_ops
= READ_ONCE(icsk
->icsk_ca_ops
);
338 err
= nla_put_string(skb
, INET_DIAG_CONG
, ca_ops
->name
);
344 handler
->idiag_get_info(sk
, r
, info
);
346 if (ext
& (1 << (INET_DIAG_INFO
- 1)) && handler
->idiag_get_aux
)
347 if (handler
->idiag_get_aux(sk
, net_admin
, skb
) < 0)
350 if (sk
->sk_state
< TCP_TIME_WAIT
) {
351 union tcp_cc_info info
;
356 ca_ops
= READ_ONCE(icsk
->icsk_ca_ops
);
357 if (ca_ops
&& ca_ops
->get_info
)
358 sz
= ca_ops
->get_info(sk
, ext
, &attr
, &info
);
360 if (sz
&& nla_put(skb
, attr
, sz
, &info
) < 0)
364 /* Keep it at the end for potential retry with a larger skb,
365 * or else do best-effort fitting, which is only done for the
368 if (cb_data
->bpf_stg_diag
) {
369 bool first_nlmsg
= ((unsigned char *)nlh
== skb
->data
);
370 unsigned int prev_min_dump_alloc
;
371 unsigned int total_nla_size
= 0;
372 unsigned int msg_len
;
375 msg_len
= skb_tail_pointer(skb
) - (unsigned char *)nlh
;
376 err
= bpf_sk_storage_diag_put(cb_data
->bpf_stg_diag
, sk
, skb
,
377 INET_DIAG_SK_BPF_STORAGES
,
383 total_nla_size
+= msg_len
;
384 prev_min_dump_alloc
= cb
->min_dump_alloc
;
385 if (total_nla_size
> prev_min_dump_alloc
)
386 cb
->min_dump_alloc
= min_t(u32
, total_nla_size
,
387 MAX_DUMP_ALLOC_SIZE
);
392 if (cb
->min_dump_alloc
> prev_min_dump_alloc
)
393 /* Retry with pskb_expand_head() with
394 * __GFP_DIRECT_RECLAIM
398 WARN_ON_ONCE(total_nla_size
<= prev_min_dump_alloc
);
400 /* Send what we have for this sk
401 * and move on to the next sk in the following
411 nlmsg_cancel(skb
, nlh
);
414 EXPORT_SYMBOL_GPL(inet_sk_diag_fill
);
416 static int inet_twsk_diag_fill(struct sock
*sk
,
418 struct netlink_callback
*cb
,
421 struct inet_timewait_sock
*tw
= inet_twsk(sk
);
422 struct inet_diag_msg
*r
;
423 struct nlmsghdr
*nlh
;
426 nlh
= nlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
,
427 cb
->nlh
->nlmsg_seq
, cb
->nlh
->nlmsg_type
,
428 sizeof(*r
), nlmsg_flags
);
433 BUG_ON(tw
->tw_state
!= TCP_TIME_WAIT
);
435 inet_diag_msg_common_fill(r
, sk
);
436 r
->idiag_retrans
= 0;
438 r
->idiag_state
= tw
->tw_substate
;
440 tmo
= tw
->tw_timer
.expires
- jiffies
;
441 r
->idiag_expires
= jiffies_delta_to_msecs(tmo
);
451 static int inet_req_diag_fill(struct sock
*sk
, struct sk_buff
*skb
,
452 struct netlink_callback
*cb
,
453 u16 nlmsg_flags
, bool net_admin
)
455 struct request_sock
*reqsk
= inet_reqsk(sk
);
456 struct inet_diag_msg
*r
;
457 struct nlmsghdr
*nlh
;
460 nlh
= nlmsg_put(skb
, NETLINK_CB(cb
->skb
).portid
, cb
->nlh
->nlmsg_seq
,
461 cb
->nlh
->nlmsg_type
, sizeof(*r
), nlmsg_flags
);
466 inet_diag_msg_common_fill(r
, sk
);
467 r
->idiag_state
= TCP_SYN_RECV
;
469 r
->idiag_retrans
= reqsk
->num_retrans
;
471 BUILD_BUG_ON(offsetof(struct inet_request_sock
, ir_cookie
) !=
472 offsetof(struct sock
, sk_cookie
));
474 tmo
= inet_reqsk(sk
)->rsk_timer
.expires
- jiffies
;
475 r
->idiag_expires
= jiffies_delta_to_msecs(tmo
);
481 if (net_admin
&& nla_put_u32(skb
, INET_DIAG_MARK
,
482 inet_rsk(reqsk
)->ir_mark
)) {
483 nlmsg_cancel(skb
, nlh
);
491 static int sk_diag_fill(struct sock
*sk
, struct sk_buff
*skb
,
492 struct netlink_callback
*cb
,
493 const struct inet_diag_req_v2
*r
,
494 u16 nlmsg_flags
, bool net_admin
)
496 if (sk
->sk_state
== TCP_TIME_WAIT
)
497 return inet_twsk_diag_fill(sk
, skb
, cb
, nlmsg_flags
);
499 if (sk
->sk_state
== TCP_NEW_SYN_RECV
)
500 return inet_req_diag_fill(sk
, skb
, cb
, nlmsg_flags
, net_admin
);
502 return inet_sk_diag_fill(sk
, inet_csk(sk
), skb
, cb
, r
, nlmsg_flags
,
506 struct sock
*inet_diag_find_one_icsk(struct net
*net
,
507 struct inet_hashinfo
*hashinfo
,
508 const struct inet_diag_req_v2
*req
)
513 if (req
->sdiag_family
== AF_INET
)
514 sk
= inet_lookup(net
, hashinfo
, NULL
, 0, req
->id
.idiag_dst
[0],
515 req
->id
.idiag_dport
, req
->id
.idiag_src
[0],
516 req
->id
.idiag_sport
, req
->id
.idiag_if
);
517 #if IS_ENABLED(CONFIG_IPV6)
518 else if (req
->sdiag_family
== AF_INET6
) {
519 if (ipv6_addr_v4mapped((struct in6_addr
*)req
->id
.idiag_dst
) &&
520 ipv6_addr_v4mapped((struct in6_addr
*)req
->id
.idiag_src
))
521 sk
= inet_lookup(net
, hashinfo
, NULL
, 0, req
->id
.idiag_dst
[3],
522 req
->id
.idiag_dport
, req
->id
.idiag_src
[3],
523 req
->id
.idiag_sport
, req
->id
.idiag_if
);
525 sk
= inet6_lookup(net
, hashinfo
, NULL
, 0,
526 (struct in6_addr
*)req
->id
.idiag_dst
,
528 (struct in6_addr
*)req
->id
.idiag_src
,
535 return ERR_PTR(-EINVAL
);
539 return ERR_PTR(-ENOENT
);
541 if (sock_diag_check_cookie(sk
, req
->id
.idiag_cookie
)) {
543 return ERR_PTR(-ENOENT
);
548 EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk
);
550 int inet_diag_dump_one_icsk(struct inet_hashinfo
*hashinfo
,
551 struct netlink_callback
*cb
,
552 const struct inet_diag_req_v2
*req
)
554 struct sk_buff
*in_skb
= cb
->skb
;
555 bool net_admin
= netlink_net_capable(in_skb
, CAP_NET_ADMIN
);
556 struct net
*net
= sock_net(in_skb
->sk
);
561 sk
= inet_diag_find_one_icsk(net
, hashinfo
, req
);
565 rep
= nlmsg_new(inet_sk_attr_size(sk
, req
, net_admin
), GFP_KERNEL
);
571 err
= sk_diag_fill(sk
, rep
, cb
, req
, 0, net_admin
);
573 WARN_ON(err
== -EMSGSIZE
);
577 err
= netlink_unicast(net
->diag_nlsk
, rep
, NETLINK_CB(in_skb
).portid
,
588 EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk
);
590 static int inet_diag_cmd_exact(int cmd
, struct sk_buff
*in_skb
,
591 const struct nlmsghdr
*nlh
,
593 const struct inet_diag_req_v2
*req
)
595 const struct inet_diag_handler
*handler
;
596 struct inet_diag_dump_data dump_data
;
599 memset(&dump_data
, 0, sizeof(dump_data
));
600 err
= inet_diag_parse_attrs(nlh
, hdrlen
, dump_data
.req_nlas
);
604 protocol
= inet_diag_get_protocol(req
, &dump_data
);
606 handler
= inet_diag_lock_handler(protocol
);
607 if (IS_ERR(handler
)) {
608 err
= PTR_ERR(handler
);
609 } else if (cmd
== SOCK_DIAG_BY_FAMILY
) {
610 struct netlink_callback cb
= {
615 err
= handler
->dump_one(&cb
, req
);
616 } else if (cmd
== SOCK_DESTROY
&& handler
->destroy
) {
617 err
= handler
->destroy(in_skb
, req
);
621 inet_diag_unlock_handler(handler
);
626 static int bitstring_match(const __be32
*a1
, const __be32
*a2
, int bits
)
628 int words
= bits
>> 5;
633 if (memcmp(a1
, a2
, words
<< 2))
643 mask
= htonl((0xffffffff) << (32 - bits
));
645 if ((w1
^ w2
) & mask
)
652 static int inet_diag_bc_run(const struct nlattr
*_bc
,
653 const struct inet_diag_entry
*entry
)
655 const void *bc
= nla_data(_bc
);
656 int len
= nla_len(_bc
);
660 const struct inet_diag_bc_op
*op
= bc
;
663 case INET_DIAG_BC_NOP
:
665 case INET_DIAG_BC_JMP
:
668 case INET_DIAG_BC_S_EQ
:
669 yes
= entry
->sport
== op
[1].no
;
671 case INET_DIAG_BC_S_GE
:
672 yes
= entry
->sport
>= op
[1].no
;
674 case INET_DIAG_BC_S_LE
:
675 yes
= entry
->sport
<= op
[1].no
;
677 case INET_DIAG_BC_D_EQ
:
678 yes
= entry
->dport
== op
[1].no
;
680 case INET_DIAG_BC_D_GE
:
681 yes
= entry
->dport
>= op
[1].no
;
683 case INET_DIAG_BC_D_LE
:
684 yes
= entry
->dport
<= op
[1].no
;
686 case INET_DIAG_BC_AUTO
:
687 yes
= !(entry
->userlocks
& SOCK_BINDPORT_LOCK
);
689 case INET_DIAG_BC_S_COND
:
690 case INET_DIAG_BC_D_COND
: {
691 const struct inet_diag_hostcond
*cond
;
694 cond
= (const struct inet_diag_hostcond
*)(op
+ 1);
695 if (cond
->port
!= -1 &&
696 cond
->port
!= (op
->code
== INET_DIAG_BC_S_COND
?
697 entry
->sport
: entry
->dport
)) {
702 if (op
->code
== INET_DIAG_BC_S_COND
)
707 if (cond
->family
!= AF_UNSPEC
&&
708 cond
->family
!= entry
->family
) {
709 if (entry
->family
== AF_INET6
&&
710 cond
->family
== AF_INET
) {
711 if (addr
[0] == 0 && addr
[1] == 0 &&
712 addr
[2] == htonl(0xffff) &&
713 bitstring_match(addr
+ 3,
722 if (cond
->prefix_len
== 0)
724 if (bitstring_match(addr
, cond
->addr
,
730 case INET_DIAG_BC_DEV_COND
: {
733 ifindex
= *((const u32
*)(op
+ 1));
734 if (ifindex
!= entry
->ifindex
)
738 case INET_DIAG_BC_MARK_COND
: {
739 struct inet_diag_markcond
*cond
;
741 cond
= (struct inet_diag_markcond
*)(op
+ 1);
742 if ((entry
->mark
& cond
->mask
) != cond
->mark
)
746 #ifdef CONFIG_SOCK_CGROUP_DATA
747 case INET_DIAG_BC_CGROUP_COND
: {
750 cgroup_id
= get_unaligned((const u64
*)(op
+ 1));
751 if (cgroup_id
!= entry
->cgroup_id
)
769 /* This helper is available for all sockets (ESTABLISH, TIMEWAIT, SYN_RECV)
771 static void entry_fill_addrs(struct inet_diag_entry
*entry
,
772 const struct sock
*sk
)
774 #if IS_ENABLED(CONFIG_IPV6)
775 if (sk
->sk_family
== AF_INET6
) {
776 entry
->saddr
= sk
->sk_v6_rcv_saddr
.s6_addr32
;
777 entry
->daddr
= sk
->sk_v6_daddr
.s6_addr32
;
781 entry
->saddr
= &sk
->sk_rcv_saddr
;
782 entry
->daddr
= &sk
->sk_daddr
;
786 int inet_diag_bc_sk(const struct nlattr
*bc
, struct sock
*sk
)
788 struct inet_sock
*inet
= inet_sk(sk
);
789 struct inet_diag_entry entry
;
794 entry
.family
= sk
->sk_family
;
795 entry_fill_addrs(&entry
, sk
);
796 entry
.sport
= inet
->inet_num
;
797 entry
.dport
= ntohs(inet
->inet_dport
);
798 entry
.ifindex
= sk
->sk_bound_dev_if
;
799 entry
.userlocks
= sk_fullsock(sk
) ? sk
->sk_userlocks
: 0;
801 entry
.mark
= sk
->sk_mark
;
802 else if (sk
->sk_state
== TCP_NEW_SYN_RECV
)
803 entry
.mark
= inet_rsk(inet_reqsk(sk
))->ir_mark
;
806 #ifdef CONFIG_SOCK_CGROUP_DATA
807 entry
.cgroup_id
= sk_fullsock(sk
) ?
808 cgroup_id(sock_cgroup_ptr(&sk
->sk_cgrp_data
)) : 0;
811 return inet_diag_bc_run(bc
, &entry
);
813 EXPORT_SYMBOL_GPL(inet_diag_bc_sk
);
815 static int valid_cc(const void *bc
, int len
, int cc
)
818 const struct inet_diag_bc_op
*op
= bc
;
824 if (op
->yes
< 4 || op
->yes
& 3)
832 /* data is u32 ifindex */
833 static bool valid_devcond(const struct inet_diag_bc_op
*op
, int len
,
836 /* Check ifindex space. */
837 *min_len
+= sizeof(u32
);
843 /* Validate an inet_diag_hostcond. */
844 static bool valid_hostcond(const struct inet_diag_bc_op
*op
, int len
,
847 struct inet_diag_hostcond
*cond
;
850 /* Check hostcond space. */
851 *min_len
+= sizeof(struct inet_diag_hostcond
);
854 cond
= (struct inet_diag_hostcond
*)(op
+ 1);
856 /* Check address family and address length. */
857 switch (cond
->family
) {
862 addr_len
= sizeof(struct in_addr
);
865 addr_len
= sizeof(struct in6_addr
);
870 *min_len
+= addr_len
;
874 /* Check prefix length (in bits) vs address length (in bytes). */
875 if (cond
->prefix_len
> 8 * addr_len
)
881 /* Validate a port comparison operator. */
882 static bool valid_port_comparison(const struct inet_diag_bc_op
*op
,
883 int len
, int *min_len
)
885 /* Port comparisons put the port in a follow-on inet_diag_bc_op. */
886 *min_len
+= sizeof(struct inet_diag_bc_op
);
892 static bool valid_markcond(const struct inet_diag_bc_op
*op
, int len
,
895 *min_len
+= sizeof(struct inet_diag_markcond
);
896 return len
>= *min_len
;
899 #ifdef CONFIG_SOCK_CGROUP_DATA
900 static bool valid_cgroupcond(const struct inet_diag_bc_op
*op
, int len
,
903 *min_len
+= sizeof(u64
);
904 return len
>= *min_len
;
908 static int inet_diag_bc_audit(const struct nlattr
*attr
,
909 const struct sk_buff
*skb
)
911 bool net_admin
= netlink_net_capable(skb
, CAP_NET_ADMIN
);
912 const void *bytecode
, *bc
;
913 int bytecode_len
, len
;
915 if (!attr
|| nla_len(attr
) < sizeof(struct inet_diag_bc_op
))
918 bytecode
= bc
= nla_data(attr
);
919 len
= bytecode_len
= nla_len(attr
);
922 int min_len
= sizeof(struct inet_diag_bc_op
);
923 const struct inet_diag_bc_op
*op
= bc
;
926 case INET_DIAG_BC_S_COND
:
927 case INET_DIAG_BC_D_COND
:
928 if (!valid_hostcond(bc
, len
, &min_len
))
931 case INET_DIAG_BC_DEV_COND
:
932 if (!valid_devcond(bc
, len
, &min_len
))
935 case INET_DIAG_BC_S_EQ
:
936 case INET_DIAG_BC_S_GE
:
937 case INET_DIAG_BC_S_LE
:
938 case INET_DIAG_BC_D_EQ
:
939 case INET_DIAG_BC_D_GE
:
940 case INET_DIAG_BC_D_LE
:
941 if (!valid_port_comparison(bc
, len
, &min_len
))
944 case INET_DIAG_BC_MARK_COND
:
947 if (!valid_markcond(bc
, len
, &min_len
))
950 #ifdef CONFIG_SOCK_CGROUP_DATA
951 case INET_DIAG_BC_CGROUP_COND
:
952 if (!valid_cgroupcond(bc
, len
, &min_len
))
956 case INET_DIAG_BC_AUTO
:
957 case INET_DIAG_BC_JMP
:
958 case INET_DIAG_BC_NOP
:
964 if (op
->code
!= INET_DIAG_BC_NOP
) {
965 if (op
->no
< min_len
|| op
->no
> len
+ 4 || op
->no
& 3)
968 !valid_cc(bytecode
, bytecode_len
, len
- op
->no
))
972 if (op
->yes
< min_len
|| op
->yes
> len
+ 4 || op
->yes
& 3)
977 return len
== 0 ? 0 : -EINVAL
;
980 static void twsk_build_assert(void)
982 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_family
) !=
983 offsetof(struct sock
, sk_family
));
985 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_num
) !=
986 offsetof(struct inet_sock
, inet_num
));
988 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_dport
) !=
989 offsetof(struct inet_sock
, inet_dport
));
991 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_rcv_saddr
) !=
992 offsetof(struct inet_sock
, inet_rcv_saddr
));
994 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_daddr
) !=
995 offsetof(struct inet_sock
, inet_daddr
));
997 #if IS_ENABLED(CONFIG_IPV6)
998 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_v6_rcv_saddr
) !=
999 offsetof(struct sock
, sk_v6_rcv_saddr
));
1001 BUILD_BUG_ON(offsetof(struct inet_timewait_sock
, tw_v6_daddr
) !=
1002 offsetof(struct sock
, sk_v6_daddr
));
1006 void inet_diag_dump_icsk(struct inet_hashinfo
*hashinfo
, struct sk_buff
*skb
,
1007 struct netlink_callback
*cb
,
1008 const struct inet_diag_req_v2
*r
)
1010 bool net_admin
= netlink_net_capable(cb
->skb
, CAP_NET_ADMIN
);
1011 struct inet_diag_dump_data
*cb_data
= cb
->data
;
1012 struct net
*net
= sock_net(skb
->sk
);
1013 u32 idiag_states
= r
->idiag_states
;
1014 int i
, num
, s_i
, s_num
;
1018 bc
= cb_data
->inet_diag_nla_bc
;
1019 if (idiag_states
& TCPF_SYN_RECV
)
1020 idiag_states
|= TCPF_NEW_SYN_RECV
;
1022 s_num
= num
= cb
->args
[2];
1024 if (cb
->args
[0] == 0) {
1025 if (!(idiag_states
& TCPF_LISTEN
) || r
->id
.idiag_dport
)
1026 goto skip_listen_ht
;
1028 for (i
= s_i
; i
< INET_LHTABLE_SIZE
; i
++) {
1029 struct inet_listen_hashbucket
*ilb
;
1030 struct hlist_nulls_node
*node
;
1033 ilb
= &hashinfo
->listening_hash
[i
];
1034 spin_lock(&ilb
->lock
);
1035 sk_nulls_for_each(sk
, node
, &ilb
->nulls_head
) {
1036 struct inet_sock
*inet
= inet_sk(sk
);
1038 if (!net_eq(sock_net(sk
), net
))
1046 if (r
->sdiag_family
!= AF_UNSPEC
&&
1047 sk
->sk_family
!= r
->sdiag_family
)
1050 if (r
->id
.idiag_sport
!= inet
->inet_sport
&&
1054 if (!inet_diag_bc_sk(bc
, sk
))
1057 if (inet_sk_diag_fill(sk
, inet_csk(sk
), skb
,
1060 spin_unlock(&ilb
->lock
);
1067 spin_unlock(&ilb
->lock
);
1073 s_i
= num
= s_num
= 0;
1076 if (!(idiag_states
& ~TCPF_LISTEN
))
1080 for (i
= s_i
; i
<= hashinfo
->ehash_mask
; i
++) {
1081 struct inet_ehash_bucket
*head
= &hashinfo
->ehash
[i
];
1082 spinlock_t
*lock
= inet_ehash_lockp(hashinfo
, i
);
1083 struct hlist_nulls_node
*node
;
1084 struct sock
*sk_arr
[SKARR_SZ
];
1085 int num_arr
[SKARR_SZ
];
1086 int idx
, accum
, res
;
1088 if (hlist_nulls_empty(&head
->chain
))
1098 sk_nulls_for_each(sk
, node
, &head
->chain
) {
1101 if (!net_eq(sock_net(sk
), net
))
1105 state
= (sk
->sk_state
== TCP_TIME_WAIT
) ?
1106 inet_twsk(sk
)->tw_substate
: sk
->sk_state
;
1107 if (!(idiag_states
& (1 << state
)))
1109 if (r
->sdiag_family
!= AF_UNSPEC
&&
1110 sk
->sk_family
!= r
->sdiag_family
)
1112 if (r
->id
.idiag_sport
!= htons(sk
->sk_num
) &&
1115 if (r
->id
.idiag_dport
!= sk
->sk_dport
&&
1118 twsk_build_assert();
1120 if (!inet_diag_bc_sk(bc
, sk
))
1123 if (!refcount_inc_not_zero(&sk
->sk_refcnt
))
1126 num_arr
[accum
] = num
;
1128 if (++accum
== SKARR_SZ
)
1133 spin_unlock_bh(lock
);
1135 for (idx
= 0; idx
< accum
; idx
++) {
1137 res
= sk_diag_fill(sk_arr
[idx
], skb
, cb
, r
,
1138 NLM_F_MULTI
, net_admin
);
1142 sock_gen_put(sk_arr
[idx
]);
1147 if (accum
== SKARR_SZ
) {
1159 EXPORT_SYMBOL_GPL(inet_diag_dump_icsk
);
1161 static int __inet_diag_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
,
1162 const struct inet_diag_req_v2
*r
)
1164 struct inet_diag_dump_data
*cb_data
= cb
->data
;
1165 const struct inet_diag_handler
*handler
;
1166 u32 prev_min_dump_alloc
;
1167 int protocol
, err
= 0;
1169 protocol
= inet_diag_get_protocol(r
, cb_data
);
1172 prev_min_dump_alloc
= cb
->min_dump_alloc
;
1173 handler
= inet_diag_lock_handler(protocol
);
1174 if (!IS_ERR(handler
))
1175 handler
->dump(skb
, cb
, r
);
1177 err
= PTR_ERR(handler
);
1178 inet_diag_unlock_handler(handler
);
1180 /* The skb is not large enough to fit one sk info and
1181 * inet_sk_diag_fill() has requested for a larger skb.
1183 if (!skb
->len
&& cb
->min_dump_alloc
> prev_min_dump_alloc
) {
1184 err
= pskb_expand_head(skb
, 0, cb
->min_dump_alloc
, GFP_KERNEL
);
1189 return err
? : skb
->len
;
1192 static int inet_diag_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1194 return __inet_diag_dump(skb
, cb
, nlmsg_data(cb
->nlh
));
1197 static int __inet_diag_dump_start(struct netlink_callback
*cb
, int hdrlen
)
1199 const struct nlmsghdr
*nlh
= cb
->nlh
;
1200 struct inet_diag_dump_data
*cb_data
;
1201 struct sk_buff
*skb
= cb
->skb
;
1205 cb_data
= kzalloc(sizeof(*cb_data
), GFP_KERNEL
);
1209 err
= inet_diag_parse_attrs(nlh
, hdrlen
, cb_data
->req_nlas
);
1214 nla
= cb_data
->inet_diag_nla_bc
;
1216 err
= inet_diag_bc_audit(nla
, skb
);
1223 nla
= cb_data
->inet_diag_nla_bpf_stgs
;
1225 struct bpf_sk_storage_diag
*bpf_stg_diag
;
1227 bpf_stg_diag
= bpf_sk_storage_diag_alloc(nla
);
1228 if (IS_ERR(bpf_stg_diag
)) {
1230 return PTR_ERR(bpf_stg_diag
);
1232 cb_data
->bpf_stg_diag
= bpf_stg_diag
;
1239 static int inet_diag_dump_start(struct netlink_callback
*cb
)
1241 return __inet_diag_dump_start(cb
, sizeof(struct inet_diag_req_v2
));
1244 static int inet_diag_dump_start_compat(struct netlink_callback
*cb
)
1246 return __inet_diag_dump_start(cb
, sizeof(struct inet_diag_req
));
1249 static int inet_diag_dump_done(struct netlink_callback
*cb
)
1251 struct inet_diag_dump_data
*cb_data
= cb
->data
;
1253 bpf_sk_storage_diag_free(cb_data
->bpf_stg_diag
);
1259 static int inet_diag_type2proto(int type
)
1262 case TCPDIAG_GETSOCK
:
1264 case DCCPDIAG_GETSOCK
:
1265 return IPPROTO_DCCP
;
1271 static int inet_diag_dump_compat(struct sk_buff
*skb
,
1272 struct netlink_callback
*cb
)
1274 struct inet_diag_req
*rc
= nlmsg_data(cb
->nlh
);
1275 struct inet_diag_req_v2 req
;
1277 req
.sdiag_family
= AF_UNSPEC
; /* compatibility */
1278 req
.sdiag_protocol
= inet_diag_type2proto(cb
->nlh
->nlmsg_type
);
1279 req
.idiag_ext
= rc
->idiag_ext
;
1280 req
.idiag_states
= rc
->idiag_states
;
1283 return __inet_diag_dump(skb
, cb
, &req
);
1286 static int inet_diag_get_exact_compat(struct sk_buff
*in_skb
,
1287 const struct nlmsghdr
*nlh
)
1289 struct inet_diag_req
*rc
= nlmsg_data(nlh
);
1290 struct inet_diag_req_v2 req
;
1292 req
.sdiag_family
= rc
->idiag_family
;
1293 req
.sdiag_protocol
= inet_diag_type2proto(nlh
->nlmsg_type
);
1294 req
.idiag_ext
= rc
->idiag_ext
;
1295 req
.idiag_states
= rc
->idiag_states
;
1298 return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY
, in_skb
, nlh
,
1299 sizeof(struct inet_diag_req
), &req
);
1302 static int inet_diag_rcv_msg_compat(struct sk_buff
*skb
, struct nlmsghdr
*nlh
)
1304 int hdrlen
= sizeof(struct inet_diag_req
);
1305 struct net
*net
= sock_net(skb
->sk
);
1307 if (nlh
->nlmsg_type
>= INET_DIAG_GETSOCK_MAX
||
1308 nlmsg_len(nlh
) < hdrlen
)
1311 if (nlh
->nlmsg_flags
& NLM_F_DUMP
) {
1312 struct netlink_dump_control c
= {
1313 .start
= inet_diag_dump_start_compat
,
1314 .done
= inet_diag_dump_done
,
1315 .dump
= inet_diag_dump_compat
,
1317 return netlink_dump_start(net
->diag_nlsk
, skb
, nlh
, &c
);
1320 return inet_diag_get_exact_compat(skb
, nlh
);
1323 static int inet_diag_handler_cmd(struct sk_buff
*skb
, struct nlmsghdr
*h
)
1325 int hdrlen
= sizeof(struct inet_diag_req_v2
);
1326 struct net
*net
= sock_net(skb
->sk
);
1328 if (nlmsg_len(h
) < hdrlen
)
1331 if (h
->nlmsg_type
== SOCK_DIAG_BY_FAMILY
&&
1332 h
->nlmsg_flags
& NLM_F_DUMP
) {
1333 struct netlink_dump_control c
= {
1334 .start
= inet_diag_dump_start
,
1335 .done
= inet_diag_dump_done
,
1336 .dump
= inet_diag_dump
,
1338 return netlink_dump_start(net
->diag_nlsk
, skb
, h
, &c
);
1341 return inet_diag_cmd_exact(h
->nlmsg_type
, skb
, h
, hdrlen
,
1346 int inet_diag_handler_get_info(struct sk_buff
*skb
, struct sock
*sk
)
1348 const struct inet_diag_handler
*handler
;
1349 struct nlmsghdr
*nlh
;
1350 struct nlattr
*attr
;
1351 struct inet_diag_msg
*r
;
1355 nlh
= nlmsg_put(skb
, 0, 0, SOCK_DIAG_BY_FAMILY
, sizeof(*r
), 0);
1359 r
= nlmsg_data(nlh
);
1360 memset(r
, 0, sizeof(*r
));
1361 inet_diag_msg_common_fill(r
, sk
);
1362 if (sk
->sk_type
== SOCK_DGRAM
|| sk
->sk_type
== SOCK_STREAM
)
1363 r
->id
.idiag_sport
= inet_sk(sk
)->inet_sport
;
1364 r
->idiag_state
= sk
->sk_state
;
1366 if ((err
= nla_put_u8(skb
, INET_DIAG_PROTOCOL
, sk
->sk_protocol
))) {
1367 nlmsg_cancel(skb
, nlh
);
1371 handler
= inet_diag_lock_handler(sk
->sk_protocol
);
1372 if (IS_ERR(handler
)) {
1373 inet_diag_unlock_handler(handler
);
1374 nlmsg_cancel(skb
, nlh
);
1375 return PTR_ERR(handler
);
1378 attr
= handler
->idiag_info_size
1379 ? nla_reserve_64bit(skb
, INET_DIAG_INFO
,
1380 handler
->idiag_info_size
,
1384 info
= nla_data(attr
);
1386 handler
->idiag_get_info(sk
, r
, info
);
1387 inet_diag_unlock_handler(handler
);
1389 nlmsg_end(skb
, nlh
);
1393 static const struct sock_diag_handler inet_diag_handler
= {
1395 .dump
= inet_diag_handler_cmd
,
1396 .get_info
= inet_diag_handler_get_info
,
1397 .destroy
= inet_diag_handler_cmd
,
1400 static const struct sock_diag_handler inet6_diag_handler
= {
1402 .dump
= inet_diag_handler_cmd
,
1403 .get_info
= inet_diag_handler_get_info
,
1404 .destroy
= inet_diag_handler_cmd
,
1407 int inet_diag_register(const struct inet_diag_handler
*h
)
1409 const __u16 type
= h
->idiag_type
;
1412 if (type
>= IPPROTO_MAX
)
1415 mutex_lock(&inet_diag_table_mutex
);
1417 if (!inet_diag_table
[type
]) {
1418 inet_diag_table
[type
] = h
;
1421 mutex_unlock(&inet_diag_table_mutex
);
1425 EXPORT_SYMBOL_GPL(inet_diag_register
);
1427 void inet_diag_unregister(const struct inet_diag_handler
*h
)
1429 const __u16 type
= h
->idiag_type
;
1431 if (type
>= IPPROTO_MAX
)
1434 mutex_lock(&inet_diag_table_mutex
);
1435 inet_diag_table
[type
] = NULL
;
1436 mutex_unlock(&inet_diag_table_mutex
);
1438 EXPORT_SYMBOL_GPL(inet_diag_unregister
);
1440 static int __init
inet_diag_init(void)
1442 const int inet_diag_table_size
= (IPPROTO_MAX
*
1443 sizeof(struct inet_diag_handler
*));
1446 inet_diag_table
= kzalloc(inet_diag_table_size
, GFP_KERNEL
);
1447 if (!inet_diag_table
)
1450 err
= sock_diag_register(&inet_diag_handler
);
1454 err
= sock_diag_register(&inet6_diag_handler
);
1458 sock_diag_register_inet_compat(inet_diag_rcv_msg_compat
);
1463 sock_diag_unregister(&inet_diag_handler
);
1465 kfree(inet_diag_table
);
1469 static void __exit
inet_diag_exit(void)
1471 sock_diag_unregister(&inet6_diag_handler
);
1472 sock_diag_unregister(&inet_diag_handler
);
1473 sock_diag_unregister_inet_compat(inet_diag_rcv_msg_compat
);
1474 kfree(inet_diag_table
);
1477 module_init(inet_diag_init
);
1478 module_exit(inet_diag_exit
);
1479 MODULE_LICENSE("GPL");
1480 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK
, NETLINK_SOCK_DIAG
, 2 /* AF_INET */);
1481 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK
, NETLINK_SOCK_DIAG
, 10 /* AF_INET6 */);