octeontx2-pf: Fix error return code in otx2_probe()
[linux/fpc-iii.git] / net / ipv4 / inet_diag.c
blob0034092358c3802242bc4cd4c65e8a7f8dcd67d9
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * inet_diag.c Module for monitoring INET transport protocols sockets.
5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6 */
8 #include <linux/kernel.h>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/fcntl.h>
12 #include <linux/random.h>
13 #include <linux/slab.h>
14 #include <linux/cache.h>
15 #include <linux/init.h>
16 #include <linux/time.h>
18 #include <net/icmp.h>
19 #include <net/tcp.h>
20 #include <net/ipv6.h>
21 #include <net/inet_common.h>
22 #include <net/inet_connection_sock.h>
23 #include <net/inet_hashtables.h>
24 #include <net/inet_timewait_sock.h>
25 #include <net/inet6_hashtables.h>
26 #include <net/bpf_sk_storage.h>
27 #include <net/netlink.h>
29 #include <linux/inet.h>
30 #include <linux/stddef.h>
32 #include <linux/inet_diag.h>
33 #include <linux/sock_diag.h>
35 static const struct inet_diag_handler **inet_diag_table;
37 struct inet_diag_entry {
38 const __be32 *saddr;
39 const __be32 *daddr;
40 u16 sport;
41 u16 dport;
42 u16 family;
43 u16 userlocks;
44 u32 ifindex;
45 u32 mark;
46 #ifdef CONFIG_SOCK_CGROUP_DATA
47 u64 cgroup_id;
48 #endif
51 static DEFINE_MUTEX(inet_diag_table_mutex);
53 static const struct inet_diag_handler *inet_diag_lock_handler(int proto)
55 if (!inet_diag_table[proto])
56 sock_load_diag_module(AF_INET, proto);
58 mutex_lock(&inet_diag_table_mutex);
59 if (!inet_diag_table[proto])
60 return ERR_PTR(-ENOENT);
62 return inet_diag_table[proto];
65 static void inet_diag_unlock_handler(const struct inet_diag_handler *handler)
67 mutex_unlock(&inet_diag_table_mutex);
70 void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk)
72 r->idiag_family = sk->sk_family;
74 r->id.idiag_sport = htons(sk->sk_num);
75 r->id.idiag_dport = sk->sk_dport;
76 r->id.idiag_if = sk->sk_bound_dev_if;
77 sock_diag_save_cookie(sk, r->id.idiag_cookie);
79 #if IS_ENABLED(CONFIG_IPV6)
80 if (sk->sk_family == AF_INET6) {
81 *(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr;
82 *(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr;
83 } else
84 #endif
86 memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
87 memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
89 r->id.idiag_src[0] = sk->sk_rcv_saddr;
90 r->id.idiag_dst[0] = sk->sk_daddr;
93 EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill);
95 static size_t inet_sk_attr_size(struct sock *sk,
96 const struct inet_diag_req_v2 *req,
97 bool net_admin)
99 const struct inet_diag_handler *handler;
100 size_t aux = 0;
102 handler = inet_diag_table[req->sdiag_protocol];
103 if (handler && handler->idiag_get_aux_size)
104 aux = handler->idiag_get_aux_size(sk, net_admin);
106 return nla_total_size(sizeof(struct tcp_info))
107 + nla_total_size(sizeof(struct inet_diag_msg))
108 + inet_diag_msg_attrs_size()
109 + nla_total_size(sizeof(struct inet_diag_meminfo))
110 + nla_total_size(SK_MEMINFO_VARS * sizeof(u32))
111 + nla_total_size(TCP_CA_NAME_MAX)
112 + nla_total_size(sizeof(struct tcpvegas_info))
113 + aux
114 + 64;
117 int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
118 struct inet_diag_msg *r, int ext,
119 struct user_namespace *user_ns,
120 bool net_admin)
122 const struct inet_sock *inet = inet_sk(sk);
124 if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown))
125 goto errout;
127 /* IPv6 dual-stack sockets use inet->tos for IPv4 connections,
128 * hence this needs to be included regardless of socket family.
130 if (ext & (1 << (INET_DIAG_TOS - 1)))
131 if (nla_put_u8(skb, INET_DIAG_TOS, inet->tos) < 0)
132 goto errout;
134 #if IS_ENABLED(CONFIG_IPV6)
135 if (r->idiag_family == AF_INET6) {
136 if (ext & (1 << (INET_DIAG_TCLASS - 1)))
137 if (nla_put_u8(skb, INET_DIAG_TCLASS,
138 inet6_sk(sk)->tclass) < 0)
139 goto errout;
141 if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) &&
142 nla_put_u8(skb, INET_DIAG_SKV6ONLY, ipv6_only_sock(sk)))
143 goto errout;
145 #endif
147 if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark))
148 goto errout;
150 if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) ||
151 ext & (1 << (INET_DIAG_TCLASS - 1))) {
152 u32 classid = 0;
154 #ifdef CONFIG_SOCK_CGROUP_DATA
155 classid = sock_cgroup_classid(&sk->sk_cgrp_data);
156 #endif
157 /* Fallback to socket priority if class id isn't set.
158 * Classful qdiscs use it as direct reference to class.
159 * For cgroup2 classid is always zero.
161 if (!classid)
162 classid = sk->sk_priority;
164 if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid))
165 goto errout;
168 #ifdef CONFIG_SOCK_CGROUP_DATA
169 if (nla_put_u64_64bit(skb, INET_DIAG_CGROUP_ID,
170 cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)),
171 INET_DIAG_PAD))
172 goto errout;
173 #endif
175 r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
176 r->idiag_inode = sock_i_ino(sk);
178 return 0;
179 errout:
180 return 1;
182 EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill);
184 #define MAX_DUMP_ALLOC_SIZE (KMALLOC_MAX_SIZE - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
186 int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
187 struct sk_buff *skb, struct netlink_callback *cb,
188 const struct inet_diag_req_v2 *req,
189 u16 nlmsg_flags, bool net_admin)
191 const struct tcp_congestion_ops *ca_ops;
192 const struct inet_diag_handler *handler;
193 struct inet_diag_dump_data *cb_data;
194 int ext = req->idiag_ext;
195 struct inet_diag_msg *r;
196 struct nlmsghdr *nlh;
197 struct nlattr *attr;
198 void *info = NULL;
200 cb_data = cb->data;
201 handler = inet_diag_table[req->sdiag_protocol];
202 BUG_ON(!handler);
204 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
205 cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags);
206 if (!nlh)
207 return -EMSGSIZE;
209 r = nlmsg_data(nlh);
210 BUG_ON(!sk_fullsock(sk));
212 inet_diag_msg_common_fill(r, sk);
213 r->idiag_state = sk->sk_state;
214 r->idiag_timer = 0;
215 r->idiag_retrans = 0;
217 if (inet_diag_msg_attrs_fill(sk, skb, r, ext,
218 sk_user_ns(NETLINK_CB(cb->skb).sk),
219 net_admin))
220 goto errout;
222 if (ext & (1 << (INET_DIAG_MEMINFO - 1))) {
223 struct inet_diag_meminfo minfo = {
224 .idiag_rmem = sk_rmem_alloc_get(sk),
225 .idiag_wmem = READ_ONCE(sk->sk_wmem_queued),
226 .idiag_fmem = sk->sk_forward_alloc,
227 .idiag_tmem = sk_wmem_alloc_get(sk),
230 if (nla_put(skb, INET_DIAG_MEMINFO, sizeof(minfo), &minfo) < 0)
231 goto errout;
234 if (ext & (1 << (INET_DIAG_SKMEMINFO - 1)))
235 if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO))
236 goto errout;
239 * RAW sockets might have user-defined protocols assigned,
240 * so report the one supplied on socket creation.
242 if (sk->sk_type == SOCK_RAW) {
243 if (nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol))
244 goto errout;
247 if (!icsk) {
248 handler->idiag_get_info(sk, r, NULL);
249 goto out;
252 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
253 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
254 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
255 r->idiag_timer = 1;
256 r->idiag_retrans = icsk->icsk_retransmits;
257 r->idiag_expires =
258 jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies);
259 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
260 r->idiag_timer = 4;
261 r->idiag_retrans = icsk->icsk_probes_out;
262 r->idiag_expires =
263 jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies);
264 } else if (timer_pending(&sk->sk_timer)) {
265 r->idiag_timer = 2;
266 r->idiag_retrans = icsk->icsk_probes_out;
267 r->idiag_expires =
268 jiffies_delta_to_msecs(sk->sk_timer.expires - jiffies);
269 } else {
270 r->idiag_timer = 0;
271 r->idiag_expires = 0;
274 if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) {
275 attr = nla_reserve_64bit(skb, INET_DIAG_INFO,
276 handler->idiag_info_size,
277 INET_DIAG_PAD);
278 if (!attr)
279 goto errout;
281 info = nla_data(attr);
284 if (ext & (1 << (INET_DIAG_CONG - 1))) {
285 int err = 0;
287 rcu_read_lock();
288 ca_ops = READ_ONCE(icsk->icsk_ca_ops);
289 if (ca_ops)
290 err = nla_put_string(skb, INET_DIAG_CONG, ca_ops->name);
291 rcu_read_unlock();
292 if (err < 0)
293 goto errout;
296 handler->idiag_get_info(sk, r, info);
298 if (ext & (1 << (INET_DIAG_INFO - 1)) && handler->idiag_get_aux)
299 if (handler->idiag_get_aux(sk, net_admin, skb) < 0)
300 goto errout;
302 if (sk->sk_state < TCP_TIME_WAIT) {
303 union tcp_cc_info info;
304 size_t sz = 0;
305 int attr;
307 rcu_read_lock();
308 ca_ops = READ_ONCE(icsk->icsk_ca_ops);
309 if (ca_ops && ca_ops->get_info)
310 sz = ca_ops->get_info(sk, ext, &attr, &info);
311 rcu_read_unlock();
312 if (sz && nla_put(skb, attr, sz, &info) < 0)
313 goto errout;
316 /* Keep it at the end for potential retry with a larger skb,
317 * or else do best-effort fitting, which is only done for the
318 * first_nlmsg.
320 if (cb_data->bpf_stg_diag) {
321 bool first_nlmsg = ((unsigned char *)nlh == skb->data);
322 unsigned int prev_min_dump_alloc;
323 unsigned int total_nla_size = 0;
324 unsigned int msg_len;
325 int err;
327 msg_len = skb_tail_pointer(skb) - (unsigned char *)nlh;
328 err = bpf_sk_storage_diag_put(cb_data->bpf_stg_diag, sk, skb,
329 INET_DIAG_SK_BPF_STORAGES,
330 &total_nla_size);
332 if (!err)
333 goto out;
335 total_nla_size += msg_len;
336 prev_min_dump_alloc = cb->min_dump_alloc;
337 if (total_nla_size > prev_min_dump_alloc)
338 cb->min_dump_alloc = min_t(u32, total_nla_size,
339 MAX_DUMP_ALLOC_SIZE);
341 if (!first_nlmsg)
342 goto errout;
344 if (cb->min_dump_alloc > prev_min_dump_alloc)
345 /* Retry with pskb_expand_head() with
346 * __GFP_DIRECT_RECLAIM
348 goto errout;
350 WARN_ON_ONCE(total_nla_size <= prev_min_dump_alloc);
352 /* Send what we have for this sk
353 * and move on to the next sk in the following
354 * dump()
358 out:
359 nlmsg_end(skb, nlh);
360 return 0;
362 errout:
363 nlmsg_cancel(skb, nlh);
364 return -EMSGSIZE;
366 EXPORT_SYMBOL_GPL(inet_sk_diag_fill);
368 static int inet_twsk_diag_fill(struct sock *sk,
369 struct sk_buff *skb,
370 struct netlink_callback *cb,
371 u16 nlmsg_flags)
373 struct inet_timewait_sock *tw = inet_twsk(sk);
374 struct inet_diag_msg *r;
375 struct nlmsghdr *nlh;
376 long tmo;
378 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
379 cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type,
380 sizeof(*r), nlmsg_flags);
381 if (!nlh)
382 return -EMSGSIZE;
384 r = nlmsg_data(nlh);
385 BUG_ON(tw->tw_state != TCP_TIME_WAIT);
387 inet_diag_msg_common_fill(r, sk);
388 r->idiag_retrans = 0;
390 r->idiag_state = tw->tw_substate;
391 r->idiag_timer = 3;
392 tmo = tw->tw_timer.expires - jiffies;
393 r->idiag_expires = jiffies_delta_to_msecs(tmo);
394 r->idiag_rqueue = 0;
395 r->idiag_wqueue = 0;
396 r->idiag_uid = 0;
397 r->idiag_inode = 0;
399 nlmsg_end(skb, nlh);
400 return 0;
403 static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
404 struct netlink_callback *cb,
405 u16 nlmsg_flags, bool net_admin)
407 struct request_sock *reqsk = inet_reqsk(sk);
408 struct inet_diag_msg *r;
409 struct nlmsghdr *nlh;
410 long tmo;
412 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
413 cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags);
414 if (!nlh)
415 return -EMSGSIZE;
417 r = nlmsg_data(nlh);
418 inet_diag_msg_common_fill(r, sk);
419 r->idiag_state = TCP_SYN_RECV;
420 r->idiag_timer = 1;
421 r->idiag_retrans = reqsk->num_retrans;
423 BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
424 offsetof(struct sock, sk_cookie));
426 tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies;
427 r->idiag_expires = jiffies_delta_to_msecs(tmo);
428 r->idiag_rqueue = 0;
429 r->idiag_wqueue = 0;
430 r->idiag_uid = 0;
431 r->idiag_inode = 0;
433 if (net_admin && nla_put_u32(skb, INET_DIAG_MARK,
434 inet_rsk(reqsk)->ir_mark))
435 return -EMSGSIZE;
437 nlmsg_end(skb, nlh);
438 return 0;
441 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
442 struct netlink_callback *cb,
443 const struct inet_diag_req_v2 *r,
444 u16 nlmsg_flags, bool net_admin)
446 if (sk->sk_state == TCP_TIME_WAIT)
447 return inet_twsk_diag_fill(sk, skb, cb, nlmsg_flags);
449 if (sk->sk_state == TCP_NEW_SYN_RECV)
450 return inet_req_diag_fill(sk, skb, cb, nlmsg_flags, net_admin);
452 return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, r, nlmsg_flags,
453 net_admin);
456 struct sock *inet_diag_find_one_icsk(struct net *net,
457 struct inet_hashinfo *hashinfo,
458 const struct inet_diag_req_v2 *req)
460 struct sock *sk;
462 rcu_read_lock();
463 if (req->sdiag_family == AF_INET)
464 sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[0],
465 req->id.idiag_dport, req->id.idiag_src[0],
466 req->id.idiag_sport, req->id.idiag_if);
467 #if IS_ENABLED(CONFIG_IPV6)
468 else if (req->sdiag_family == AF_INET6) {
469 if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
470 ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src))
471 sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[3],
472 req->id.idiag_dport, req->id.idiag_src[3],
473 req->id.idiag_sport, req->id.idiag_if);
474 else
475 sk = inet6_lookup(net, hashinfo, NULL, 0,
476 (struct in6_addr *)req->id.idiag_dst,
477 req->id.idiag_dport,
478 (struct in6_addr *)req->id.idiag_src,
479 req->id.idiag_sport,
480 req->id.idiag_if);
482 #endif
483 else {
484 rcu_read_unlock();
485 return ERR_PTR(-EINVAL);
487 rcu_read_unlock();
488 if (!sk)
489 return ERR_PTR(-ENOENT);
491 if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) {
492 sock_gen_put(sk);
493 return ERR_PTR(-ENOENT);
496 return sk;
498 EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk);
500 int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
501 struct netlink_callback *cb,
502 const struct inet_diag_req_v2 *req)
504 struct sk_buff *in_skb = cb->skb;
505 bool net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN);
506 struct net *net = sock_net(in_skb->sk);
507 struct sk_buff *rep;
508 struct sock *sk;
509 int err;
511 sk = inet_diag_find_one_icsk(net, hashinfo, req);
512 if (IS_ERR(sk))
513 return PTR_ERR(sk);
515 rep = nlmsg_new(inet_sk_attr_size(sk, req, net_admin), GFP_KERNEL);
516 if (!rep) {
517 err = -ENOMEM;
518 goto out;
521 err = sk_diag_fill(sk, rep, cb, req, 0, net_admin);
522 if (err < 0) {
523 WARN_ON(err == -EMSGSIZE);
524 nlmsg_free(rep);
525 goto out;
527 err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
528 MSG_DONTWAIT);
529 if (err > 0)
530 err = 0;
532 out:
533 if (sk)
534 sock_gen_put(sk);
536 return err;
538 EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
540 static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb,
541 const struct nlmsghdr *nlh,
542 const struct inet_diag_req_v2 *req)
544 const struct inet_diag_handler *handler;
545 int err;
547 handler = inet_diag_lock_handler(req->sdiag_protocol);
548 if (IS_ERR(handler)) {
549 err = PTR_ERR(handler);
550 } else if (cmd == SOCK_DIAG_BY_FAMILY) {
551 struct inet_diag_dump_data empty_dump_data = {};
552 struct netlink_callback cb = {
553 .nlh = nlh,
554 .skb = in_skb,
555 .data = &empty_dump_data,
557 err = handler->dump_one(&cb, req);
558 } else if (cmd == SOCK_DESTROY && handler->destroy) {
559 err = handler->destroy(in_skb, req);
560 } else {
561 err = -EOPNOTSUPP;
563 inet_diag_unlock_handler(handler);
565 return err;
568 static int bitstring_match(const __be32 *a1, const __be32 *a2, int bits)
570 int words = bits >> 5;
572 bits &= 0x1f;
574 if (words) {
575 if (memcmp(a1, a2, words << 2))
576 return 0;
578 if (bits) {
579 __be32 w1, w2;
580 __be32 mask;
582 w1 = a1[words];
583 w2 = a2[words];
585 mask = htonl((0xffffffff) << (32 - bits));
587 if ((w1 ^ w2) & mask)
588 return 0;
591 return 1;
594 static int inet_diag_bc_run(const struct nlattr *_bc,
595 const struct inet_diag_entry *entry)
597 const void *bc = nla_data(_bc);
598 int len = nla_len(_bc);
600 while (len > 0) {
601 int yes = 1;
602 const struct inet_diag_bc_op *op = bc;
604 switch (op->code) {
605 case INET_DIAG_BC_NOP:
606 break;
607 case INET_DIAG_BC_JMP:
608 yes = 0;
609 break;
610 case INET_DIAG_BC_S_EQ:
611 yes = entry->sport == op[1].no;
612 break;
613 case INET_DIAG_BC_S_GE:
614 yes = entry->sport >= op[1].no;
615 break;
616 case INET_DIAG_BC_S_LE:
617 yes = entry->sport <= op[1].no;
618 break;
619 case INET_DIAG_BC_D_EQ:
620 yes = entry->dport == op[1].no;
621 break;
622 case INET_DIAG_BC_D_GE:
623 yes = entry->dport >= op[1].no;
624 break;
625 case INET_DIAG_BC_D_LE:
626 yes = entry->dport <= op[1].no;
627 break;
628 case INET_DIAG_BC_AUTO:
629 yes = !(entry->userlocks & SOCK_BINDPORT_LOCK);
630 break;
631 case INET_DIAG_BC_S_COND:
632 case INET_DIAG_BC_D_COND: {
633 const struct inet_diag_hostcond *cond;
634 const __be32 *addr;
636 cond = (const struct inet_diag_hostcond *)(op + 1);
637 if (cond->port != -1 &&
638 cond->port != (op->code == INET_DIAG_BC_S_COND ?
639 entry->sport : entry->dport)) {
640 yes = 0;
641 break;
644 if (op->code == INET_DIAG_BC_S_COND)
645 addr = entry->saddr;
646 else
647 addr = entry->daddr;
649 if (cond->family != AF_UNSPEC &&
650 cond->family != entry->family) {
651 if (entry->family == AF_INET6 &&
652 cond->family == AF_INET) {
653 if (addr[0] == 0 && addr[1] == 0 &&
654 addr[2] == htonl(0xffff) &&
655 bitstring_match(addr + 3,
656 cond->addr,
657 cond->prefix_len))
658 break;
660 yes = 0;
661 break;
664 if (cond->prefix_len == 0)
665 break;
666 if (bitstring_match(addr, cond->addr,
667 cond->prefix_len))
668 break;
669 yes = 0;
670 break;
672 case INET_DIAG_BC_DEV_COND: {
673 u32 ifindex;
675 ifindex = *((const u32 *)(op + 1));
676 if (ifindex != entry->ifindex)
677 yes = 0;
678 break;
680 case INET_DIAG_BC_MARK_COND: {
681 struct inet_diag_markcond *cond;
683 cond = (struct inet_diag_markcond *)(op + 1);
684 if ((entry->mark & cond->mask) != cond->mark)
685 yes = 0;
686 break;
688 #ifdef CONFIG_SOCK_CGROUP_DATA
689 case INET_DIAG_BC_CGROUP_COND: {
690 u64 cgroup_id;
692 cgroup_id = get_unaligned((const u64 *)(op + 1));
693 if (cgroup_id != entry->cgroup_id)
694 yes = 0;
695 break;
697 #endif
700 if (yes) {
701 len -= op->yes;
702 bc += op->yes;
703 } else {
704 len -= op->no;
705 bc += op->no;
708 return len == 0;
711 /* This helper is available for all sockets (ESTABLISH, TIMEWAIT, SYN_RECV)
713 static void entry_fill_addrs(struct inet_diag_entry *entry,
714 const struct sock *sk)
716 #if IS_ENABLED(CONFIG_IPV6)
717 if (sk->sk_family == AF_INET6) {
718 entry->saddr = sk->sk_v6_rcv_saddr.s6_addr32;
719 entry->daddr = sk->sk_v6_daddr.s6_addr32;
720 } else
721 #endif
723 entry->saddr = &sk->sk_rcv_saddr;
724 entry->daddr = &sk->sk_daddr;
728 int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
730 struct inet_sock *inet = inet_sk(sk);
731 struct inet_diag_entry entry;
733 if (!bc)
734 return 1;
736 entry.family = sk->sk_family;
737 entry_fill_addrs(&entry, sk);
738 entry.sport = inet->inet_num;
739 entry.dport = ntohs(inet->inet_dport);
740 entry.ifindex = sk->sk_bound_dev_if;
741 entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0;
742 if (sk_fullsock(sk))
743 entry.mark = sk->sk_mark;
744 else if (sk->sk_state == TCP_NEW_SYN_RECV)
745 entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
746 else
747 entry.mark = 0;
748 #ifdef CONFIG_SOCK_CGROUP_DATA
749 entry.cgroup_id = cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data));
750 #endif
752 return inet_diag_bc_run(bc, &entry);
754 EXPORT_SYMBOL_GPL(inet_diag_bc_sk);
756 static int valid_cc(const void *bc, int len, int cc)
758 while (len >= 0) {
759 const struct inet_diag_bc_op *op = bc;
761 if (cc > len)
762 return 0;
763 if (cc == len)
764 return 1;
765 if (op->yes < 4 || op->yes & 3)
766 return 0;
767 len -= op->yes;
768 bc += op->yes;
770 return 0;
773 /* data is u32 ifindex */
774 static bool valid_devcond(const struct inet_diag_bc_op *op, int len,
775 int *min_len)
777 /* Check ifindex space. */
778 *min_len += sizeof(u32);
779 if (len < *min_len)
780 return false;
782 return true;
784 /* Validate an inet_diag_hostcond. */
785 static bool valid_hostcond(const struct inet_diag_bc_op *op, int len,
786 int *min_len)
788 struct inet_diag_hostcond *cond;
789 int addr_len;
791 /* Check hostcond space. */
792 *min_len += sizeof(struct inet_diag_hostcond);
793 if (len < *min_len)
794 return false;
795 cond = (struct inet_diag_hostcond *)(op + 1);
797 /* Check address family and address length. */
798 switch (cond->family) {
799 case AF_UNSPEC:
800 addr_len = 0;
801 break;
802 case AF_INET:
803 addr_len = sizeof(struct in_addr);
804 break;
805 case AF_INET6:
806 addr_len = sizeof(struct in6_addr);
807 break;
808 default:
809 return false;
811 *min_len += addr_len;
812 if (len < *min_len)
813 return false;
815 /* Check prefix length (in bits) vs address length (in bytes). */
816 if (cond->prefix_len > 8 * addr_len)
817 return false;
819 return true;
822 /* Validate a port comparison operator. */
823 static bool valid_port_comparison(const struct inet_diag_bc_op *op,
824 int len, int *min_len)
826 /* Port comparisons put the port in a follow-on inet_diag_bc_op. */
827 *min_len += sizeof(struct inet_diag_bc_op);
828 if (len < *min_len)
829 return false;
830 return true;
833 static bool valid_markcond(const struct inet_diag_bc_op *op, int len,
834 int *min_len)
836 *min_len += sizeof(struct inet_diag_markcond);
837 return len >= *min_len;
840 #ifdef CONFIG_SOCK_CGROUP_DATA
841 static bool valid_cgroupcond(const struct inet_diag_bc_op *op, int len,
842 int *min_len)
844 *min_len += sizeof(u64);
845 return len >= *min_len;
847 #endif
849 static int inet_diag_bc_audit(const struct nlattr *attr,
850 const struct sk_buff *skb)
852 bool net_admin = netlink_net_capable(skb, CAP_NET_ADMIN);
853 const void *bytecode, *bc;
854 int bytecode_len, len;
856 if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op))
857 return -EINVAL;
859 bytecode = bc = nla_data(attr);
860 len = bytecode_len = nla_len(attr);
862 while (len > 0) {
863 int min_len = sizeof(struct inet_diag_bc_op);
864 const struct inet_diag_bc_op *op = bc;
866 switch (op->code) {
867 case INET_DIAG_BC_S_COND:
868 case INET_DIAG_BC_D_COND:
869 if (!valid_hostcond(bc, len, &min_len))
870 return -EINVAL;
871 break;
872 case INET_DIAG_BC_DEV_COND:
873 if (!valid_devcond(bc, len, &min_len))
874 return -EINVAL;
875 break;
876 case INET_DIAG_BC_S_EQ:
877 case INET_DIAG_BC_S_GE:
878 case INET_DIAG_BC_S_LE:
879 case INET_DIAG_BC_D_EQ:
880 case INET_DIAG_BC_D_GE:
881 case INET_DIAG_BC_D_LE:
882 if (!valid_port_comparison(bc, len, &min_len))
883 return -EINVAL;
884 break;
885 case INET_DIAG_BC_MARK_COND:
886 if (!net_admin)
887 return -EPERM;
888 if (!valid_markcond(bc, len, &min_len))
889 return -EINVAL;
890 break;
891 #ifdef CONFIG_SOCK_CGROUP_DATA
892 case INET_DIAG_BC_CGROUP_COND:
893 if (!valid_cgroupcond(bc, len, &min_len))
894 return -EINVAL;
895 break;
896 #endif
897 case INET_DIAG_BC_AUTO:
898 case INET_DIAG_BC_JMP:
899 case INET_DIAG_BC_NOP:
900 break;
901 default:
902 return -EINVAL;
905 if (op->code != INET_DIAG_BC_NOP) {
906 if (op->no < min_len || op->no > len + 4 || op->no & 3)
907 return -EINVAL;
908 if (op->no < len &&
909 !valid_cc(bytecode, bytecode_len, len - op->no))
910 return -EINVAL;
913 if (op->yes < min_len || op->yes > len + 4 || op->yes & 3)
914 return -EINVAL;
915 bc += op->yes;
916 len -= op->yes;
918 return len == 0 ? 0 : -EINVAL;
921 static void twsk_build_assert(void)
923 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_family) !=
924 offsetof(struct sock, sk_family));
926 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_num) !=
927 offsetof(struct inet_sock, inet_num));
929 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_dport) !=
930 offsetof(struct inet_sock, inet_dport));
932 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_rcv_saddr) !=
933 offsetof(struct inet_sock, inet_rcv_saddr));
935 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_daddr) !=
936 offsetof(struct inet_sock, inet_daddr));
938 #if IS_ENABLED(CONFIG_IPV6)
939 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_rcv_saddr) !=
940 offsetof(struct sock, sk_v6_rcv_saddr));
942 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_daddr) !=
943 offsetof(struct sock, sk_v6_daddr));
944 #endif
947 void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
948 struct netlink_callback *cb,
949 const struct inet_diag_req_v2 *r)
951 bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
952 struct inet_diag_dump_data *cb_data = cb->data;
953 struct net *net = sock_net(skb->sk);
954 u32 idiag_states = r->idiag_states;
955 int i, num, s_i, s_num;
956 struct nlattr *bc;
957 struct sock *sk;
959 bc = cb_data->inet_diag_nla_bc;
960 if (idiag_states & TCPF_SYN_RECV)
961 idiag_states |= TCPF_NEW_SYN_RECV;
962 s_i = cb->args[1];
963 s_num = num = cb->args[2];
965 if (cb->args[0] == 0) {
966 if (!(idiag_states & TCPF_LISTEN) || r->id.idiag_dport)
967 goto skip_listen_ht;
969 for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
970 struct inet_listen_hashbucket *ilb;
971 struct hlist_nulls_node *node;
973 num = 0;
974 ilb = &hashinfo->listening_hash[i];
975 spin_lock(&ilb->lock);
976 sk_nulls_for_each(sk, node, &ilb->nulls_head) {
977 struct inet_sock *inet = inet_sk(sk);
979 if (!net_eq(sock_net(sk), net))
980 continue;
982 if (num < s_num) {
983 num++;
984 continue;
987 if (r->sdiag_family != AF_UNSPEC &&
988 sk->sk_family != r->sdiag_family)
989 goto next_listen;
991 if (r->id.idiag_sport != inet->inet_sport &&
992 r->id.idiag_sport)
993 goto next_listen;
995 if (!inet_diag_bc_sk(bc, sk))
996 goto next_listen;
998 if (inet_sk_diag_fill(sk, inet_csk(sk), skb,
999 cb, r, NLM_F_MULTI,
1000 net_admin) < 0) {
1001 spin_unlock(&ilb->lock);
1002 goto done;
1005 next_listen:
1006 ++num;
1008 spin_unlock(&ilb->lock);
1010 s_num = 0;
1012 skip_listen_ht:
1013 cb->args[0] = 1;
1014 s_i = num = s_num = 0;
1017 if (!(idiag_states & ~TCPF_LISTEN))
1018 goto out;
1020 #define SKARR_SZ 16
1021 for (i = s_i; i <= hashinfo->ehash_mask; i++) {
1022 struct inet_ehash_bucket *head = &hashinfo->ehash[i];
1023 spinlock_t *lock = inet_ehash_lockp(hashinfo, i);
1024 struct hlist_nulls_node *node;
1025 struct sock *sk_arr[SKARR_SZ];
1026 int num_arr[SKARR_SZ];
1027 int idx, accum, res;
1029 if (hlist_nulls_empty(&head->chain))
1030 continue;
1032 if (i > s_i)
1033 s_num = 0;
1035 next_chunk:
1036 num = 0;
1037 accum = 0;
1038 spin_lock_bh(lock);
1039 sk_nulls_for_each(sk, node, &head->chain) {
1040 int state;
1042 if (!net_eq(sock_net(sk), net))
1043 continue;
1044 if (num < s_num)
1045 goto next_normal;
1046 state = (sk->sk_state == TCP_TIME_WAIT) ?
1047 inet_twsk(sk)->tw_substate : sk->sk_state;
1048 if (!(idiag_states & (1 << state)))
1049 goto next_normal;
1050 if (r->sdiag_family != AF_UNSPEC &&
1051 sk->sk_family != r->sdiag_family)
1052 goto next_normal;
1053 if (r->id.idiag_sport != htons(sk->sk_num) &&
1054 r->id.idiag_sport)
1055 goto next_normal;
1056 if (r->id.idiag_dport != sk->sk_dport &&
1057 r->id.idiag_dport)
1058 goto next_normal;
1059 twsk_build_assert();
1061 if (!inet_diag_bc_sk(bc, sk))
1062 goto next_normal;
1064 if (!refcount_inc_not_zero(&sk->sk_refcnt))
1065 goto next_normal;
1067 num_arr[accum] = num;
1068 sk_arr[accum] = sk;
1069 if (++accum == SKARR_SZ)
1070 break;
1071 next_normal:
1072 ++num;
1074 spin_unlock_bh(lock);
1075 res = 0;
1076 for (idx = 0; idx < accum; idx++) {
1077 if (res >= 0) {
1078 res = sk_diag_fill(sk_arr[idx], skb, cb, r,
1079 NLM_F_MULTI, net_admin);
1080 if (res < 0)
1081 num = num_arr[idx];
1083 sock_gen_put(sk_arr[idx]);
1085 if (res < 0)
1086 break;
1087 cond_resched();
1088 if (accum == SKARR_SZ) {
1089 s_num = num + 1;
1090 goto next_chunk;
1094 done:
1095 cb->args[1] = i;
1096 cb->args[2] = num;
1097 out:
1100 EXPORT_SYMBOL_GPL(inet_diag_dump_icsk);
1102 static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
1103 const struct inet_diag_req_v2 *r)
1105 const struct inet_diag_handler *handler;
1106 u32 prev_min_dump_alloc;
1107 int err = 0;
1109 again:
1110 prev_min_dump_alloc = cb->min_dump_alloc;
1111 handler = inet_diag_lock_handler(r->sdiag_protocol);
1112 if (!IS_ERR(handler))
1113 handler->dump(skb, cb, r);
1114 else
1115 err = PTR_ERR(handler);
1116 inet_diag_unlock_handler(handler);
1118 /* The skb is not large enough to fit one sk info and
1119 * inet_sk_diag_fill() has requested for a larger skb.
1121 if (!skb->len && cb->min_dump_alloc > prev_min_dump_alloc) {
1122 err = pskb_expand_head(skb, 0, cb->min_dump_alloc, GFP_KERNEL);
1123 if (!err)
1124 goto again;
1127 return err ? : skb->len;
1130 static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
1132 return __inet_diag_dump(skb, cb, nlmsg_data(cb->nlh));
1135 static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen)
1137 const struct nlmsghdr *nlh = cb->nlh;
1138 struct inet_diag_dump_data *cb_data;
1139 struct sk_buff *skb = cb->skb;
1140 struct nlattr *nla;
1141 int rem, err;
1143 cb_data = kzalloc(sizeof(*cb_data), GFP_KERNEL);
1144 if (!cb_data)
1145 return -ENOMEM;
1147 nla_for_each_attr(nla, nlmsg_attrdata(nlh, hdrlen),
1148 nlmsg_attrlen(nlh, hdrlen), rem) {
1149 int type = nla_type(nla);
1151 if (type < __INET_DIAG_REQ_MAX)
1152 cb_data->req_nlas[type] = nla;
1155 nla = cb_data->inet_diag_nla_bc;
1156 if (nla) {
1157 err = inet_diag_bc_audit(nla, skb);
1158 if (err) {
1159 kfree(cb_data);
1160 return err;
1164 nla = cb_data->inet_diag_nla_bpf_stgs;
1165 if (nla) {
1166 struct bpf_sk_storage_diag *bpf_stg_diag;
1168 bpf_stg_diag = bpf_sk_storage_diag_alloc(nla);
1169 if (IS_ERR(bpf_stg_diag)) {
1170 kfree(cb_data);
1171 return PTR_ERR(bpf_stg_diag);
1173 cb_data->bpf_stg_diag = bpf_stg_diag;
1176 cb->data = cb_data;
1177 return 0;
1180 static int inet_diag_dump_start(struct netlink_callback *cb)
1182 return __inet_diag_dump_start(cb, sizeof(struct inet_diag_req_v2));
1185 static int inet_diag_dump_start_compat(struct netlink_callback *cb)
1187 return __inet_diag_dump_start(cb, sizeof(struct inet_diag_req));
1190 static int inet_diag_dump_done(struct netlink_callback *cb)
1192 struct inet_diag_dump_data *cb_data = cb->data;
1194 bpf_sk_storage_diag_free(cb_data->bpf_stg_diag);
1195 kfree(cb->data);
1197 return 0;
1200 static int inet_diag_type2proto(int type)
1202 switch (type) {
1203 case TCPDIAG_GETSOCK:
1204 return IPPROTO_TCP;
1205 case DCCPDIAG_GETSOCK:
1206 return IPPROTO_DCCP;
1207 default:
1208 return 0;
1212 static int inet_diag_dump_compat(struct sk_buff *skb,
1213 struct netlink_callback *cb)
1215 struct inet_diag_req *rc = nlmsg_data(cb->nlh);
1216 struct inet_diag_req_v2 req;
1218 req.sdiag_family = AF_UNSPEC; /* compatibility */
1219 req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type);
1220 req.idiag_ext = rc->idiag_ext;
1221 req.idiag_states = rc->idiag_states;
1222 req.id = rc->id;
1224 return __inet_diag_dump(skb, cb, &req);
1227 static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
1228 const struct nlmsghdr *nlh)
1230 struct inet_diag_req *rc = nlmsg_data(nlh);
1231 struct inet_diag_req_v2 req;
1233 req.sdiag_family = rc->idiag_family;
1234 req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type);
1235 req.idiag_ext = rc->idiag_ext;
1236 req.idiag_states = rc->idiag_states;
1237 req.id = rc->id;
1239 return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, &req);
1242 static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
1244 int hdrlen = sizeof(struct inet_diag_req);
1245 struct net *net = sock_net(skb->sk);
1247 if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX ||
1248 nlmsg_len(nlh) < hdrlen)
1249 return -EINVAL;
1251 if (nlh->nlmsg_flags & NLM_F_DUMP) {
1252 struct netlink_dump_control c = {
1253 .start = inet_diag_dump_start_compat,
1254 .done = inet_diag_dump_done,
1255 .dump = inet_diag_dump_compat,
1257 return netlink_dump_start(net->diag_nlsk, skb, nlh, &c);
1260 return inet_diag_get_exact_compat(skb, nlh);
1263 static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h)
1265 int hdrlen = sizeof(struct inet_diag_req_v2);
1266 struct net *net = sock_net(skb->sk);
1268 if (nlmsg_len(h) < hdrlen)
1269 return -EINVAL;
1271 if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY &&
1272 h->nlmsg_flags & NLM_F_DUMP) {
1273 struct netlink_dump_control c = {
1274 .start = inet_diag_dump_start,
1275 .done = inet_diag_dump_done,
1276 .dump = inet_diag_dump,
1278 return netlink_dump_start(net->diag_nlsk, skb, h, &c);
1281 return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h));
1284 static
1285 int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
1287 const struct inet_diag_handler *handler;
1288 struct nlmsghdr *nlh;
1289 struct nlattr *attr;
1290 struct inet_diag_msg *r;
1291 void *info = NULL;
1292 int err = 0;
1294 nlh = nlmsg_put(skb, 0, 0, SOCK_DIAG_BY_FAMILY, sizeof(*r), 0);
1295 if (!nlh)
1296 return -ENOMEM;
1298 r = nlmsg_data(nlh);
1299 memset(r, 0, sizeof(*r));
1300 inet_diag_msg_common_fill(r, sk);
1301 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_STREAM)
1302 r->id.idiag_sport = inet_sk(sk)->inet_sport;
1303 r->idiag_state = sk->sk_state;
1305 if ((err = nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol))) {
1306 nlmsg_cancel(skb, nlh);
1307 return err;
1310 handler = inet_diag_lock_handler(sk->sk_protocol);
1311 if (IS_ERR(handler)) {
1312 inet_diag_unlock_handler(handler);
1313 nlmsg_cancel(skb, nlh);
1314 return PTR_ERR(handler);
1317 attr = handler->idiag_info_size
1318 ? nla_reserve_64bit(skb, INET_DIAG_INFO,
1319 handler->idiag_info_size,
1320 INET_DIAG_PAD)
1321 : NULL;
1322 if (attr)
1323 info = nla_data(attr);
1325 handler->idiag_get_info(sk, r, info);
1326 inet_diag_unlock_handler(handler);
1328 nlmsg_end(skb, nlh);
1329 return 0;
1332 static const struct sock_diag_handler inet_diag_handler = {
1333 .family = AF_INET,
1334 .dump = inet_diag_handler_cmd,
1335 .get_info = inet_diag_handler_get_info,
1336 .destroy = inet_diag_handler_cmd,
1339 static const struct sock_diag_handler inet6_diag_handler = {
1340 .family = AF_INET6,
1341 .dump = inet_diag_handler_cmd,
1342 .get_info = inet_diag_handler_get_info,
1343 .destroy = inet_diag_handler_cmd,
1346 int inet_diag_register(const struct inet_diag_handler *h)
1348 const __u16 type = h->idiag_type;
1349 int err = -EINVAL;
1351 if (type >= IPPROTO_MAX)
1352 goto out;
1354 mutex_lock(&inet_diag_table_mutex);
1355 err = -EEXIST;
1356 if (!inet_diag_table[type]) {
1357 inet_diag_table[type] = h;
1358 err = 0;
1360 mutex_unlock(&inet_diag_table_mutex);
1361 out:
1362 return err;
1364 EXPORT_SYMBOL_GPL(inet_diag_register);
1366 void inet_diag_unregister(const struct inet_diag_handler *h)
1368 const __u16 type = h->idiag_type;
1370 if (type >= IPPROTO_MAX)
1371 return;
1373 mutex_lock(&inet_diag_table_mutex);
1374 inet_diag_table[type] = NULL;
1375 mutex_unlock(&inet_diag_table_mutex);
1377 EXPORT_SYMBOL_GPL(inet_diag_unregister);
1379 static int __init inet_diag_init(void)
1381 const int inet_diag_table_size = (IPPROTO_MAX *
1382 sizeof(struct inet_diag_handler *));
1383 int err = -ENOMEM;
1385 inet_diag_table = kzalloc(inet_diag_table_size, GFP_KERNEL);
1386 if (!inet_diag_table)
1387 goto out;
1389 err = sock_diag_register(&inet_diag_handler);
1390 if (err)
1391 goto out_free_nl;
1393 err = sock_diag_register(&inet6_diag_handler);
1394 if (err)
1395 goto out_free_inet;
1397 sock_diag_register_inet_compat(inet_diag_rcv_msg_compat);
1398 out:
1399 return err;
1401 out_free_inet:
1402 sock_diag_unregister(&inet_diag_handler);
1403 out_free_nl:
1404 kfree(inet_diag_table);
1405 goto out;
1408 static void __exit inet_diag_exit(void)
1410 sock_diag_unregister(&inet6_diag_handler);
1411 sock_diag_unregister(&inet_diag_handler);
1412 sock_diag_unregister_inet_compat(inet_diag_rcv_msg_compat);
1413 kfree(inet_diag_table);
1416 module_init(inet_diag_init);
1417 module_exit(inet_diag_exit);
1418 MODULE_LICENSE("GPL");
1419 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2 /* AF_INET */);
1420 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10 /* AF_INET6 */);