1 // SPDX-License-Identifier: GPL-2.0-only
3 * net/psample/psample.c - Netlink channel for packet sampling
4 * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
7 #include <linux/types.h>
8 #include <linux/kernel.h>
9 #include <linux/skbuff.h>
10 #include <linux/module.h>
11 #include <net/net_namespace.h>
13 #include <net/netlink.h>
14 #include <net/genetlink.h>
15 #include <net/psample.h>
16 #include <linux/spinlock.h>
17 #include <net/ip_tunnels.h>
18 #include <net/dst_metadata.h>
20 #define PSAMPLE_MAX_PACKET_SIZE 0xffff
22 static LIST_HEAD(psample_groups_list
);
23 static DEFINE_SPINLOCK(psample_groups_lock
);
25 /* multicast groups */
26 enum psample_nl_multicast_groups
{
27 PSAMPLE_NL_MCGRP_CONFIG
,
28 PSAMPLE_NL_MCGRP_SAMPLE
,
31 static const struct genl_multicast_group psample_nl_mcgrps
[] = {
32 [PSAMPLE_NL_MCGRP_CONFIG
] = { .name
= PSAMPLE_NL_MCGRP_CONFIG_NAME
},
33 [PSAMPLE_NL_MCGRP_SAMPLE
] = { .name
= PSAMPLE_NL_MCGRP_SAMPLE_NAME
},
36 static struct genl_family psample_nl_family __ro_after_init
;
38 static int psample_group_nl_fill(struct sk_buff
*msg
,
39 struct psample_group
*group
,
40 enum psample_command cmd
, u32 portid
, u32 seq
,
46 hdr
= genlmsg_put(msg
, portid
, seq
, &psample_nl_family
, flags
, cmd
);
50 ret
= nla_put_u32(msg
, PSAMPLE_ATTR_SAMPLE_GROUP
, group
->group_num
);
54 ret
= nla_put_u32(msg
, PSAMPLE_ATTR_GROUP_REFCOUNT
, group
->refcount
);
58 ret
= nla_put_u32(msg
, PSAMPLE_ATTR_GROUP_SEQ
, group
->seq
);
62 genlmsg_end(msg
, hdr
);
66 genlmsg_cancel(msg
, hdr
);
70 static int psample_nl_cmd_get_group_dumpit(struct sk_buff
*msg
,
71 struct netlink_callback
*cb
)
73 struct psample_group
*group
;
74 int start
= cb
->args
[0];
78 spin_lock_bh(&psample_groups_lock
);
79 list_for_each_entry(group
, &psample_groups_list
, list
) {
80 if (!net_eq(group
->net
, sock_net(msg
->sk
)))
86 err
= psample_group_nl_fill(msg
, group
, PSAMPLE_CMD_NEW_GROUP
,
87 NETLINK_CB(cb
->skb
).portid
,
88 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
);
94 spin_unlock_bh(&psample_groups_lock
);
99 static const struct genl_small_ops psample_nl_ops
[] = {
101 .cmd
= PSAMPLE_CMD_GET_GROUP
,
102 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
103 .dumpit
= psample_nl_cmd_get_group_dumpit
,
104 /* can be retrieved by unprivileged users */
108 static struct genl_family psample_nl_family __ro_after_init
= {
109 .name
= PSAMPLE_GENL_NAME
,
110 .version
= PSAMPLE_GENL_VERSION
,
111 .maxattr
= PSAMPLE_ATTR_MAX
,
113 .module
= THIS_MODULE
,
114 .mcgrps
= psample_nl_mcgrps
,
115 .small_ops
= psample_nl_ops
,
116 .n_small_ops
= ARRAY_SIZE(psample_nl_ops
),
117 .n_mcgrps
= ARRAY_SIZE(psample_nl_mcgrps
),
120 static void psample_group_notify(struct psample_group
*group
,
121 enum psample_command cmd
)
126 msg
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_ATOMIC
);
130 err
= psample_group_nl_fill(msg
, group
, cmd
, 0, 0, NLM_F_MULTI
);
132 genlmsg_multicast_netns(&psample_nl_family
, group
->net
, msg
, 0,
133 PSAMPLE_NL_MCGRP_CONFIG
, GFP_ATOMIC
);
138 static struct psample_group
*psample_group_create(struct net
*net
,
141 struct psample_group
*group
;
143 group
= kzalloc(sizeof(*group
), GFP_ATOMIC
);
148 group
->group_num
= group_num
;
149 list_add_tail(&group
->list
, &psample_groups_list
);
151 psample_group_notify(group
, PSAMPLE_CMD_NEW_GROUP
);
155 static void psample_group_destroy(struct psample_group
*group
)
157 psample_group_notify(group
, PSAMPLE_CMD_DEL_GROUP
);
158 list_del(&group
->list
);
159 kfree_rcu(group
, rcu
);
162 static struct psample_group
*
163 psample_group_lookup(struct net
*net
, u32 group_num
)
165 struct psample_group
*group
;
167 list_for_each_entry(group
, &psample_groups_list
, list
)
168 if ((group
->group_num
== group_num
) && (group
->net
== net
))
173 struct psample_group
*psample_group_get(struct net
*net
, u32 group_num
)
175 struct psample_group
*group
;
177 spin_lock_bh(&psample_groups_lock
);
179 group
= psample_group_lookup(net
, group_num
);
181 group
= psample_group_create(net
, group_num
);
188 spin_unlock_bh(&psample_groups_lock
);
191 EXPORT_SYMBOL_GPL(psample_group_get
);
193 void psample_group_take(struct psample_group
*group
)
195 spin_lock_bh(&psample_groups_lock
);
197 spin_unlock_bh(&psample_groups_lock
);
199 EXPORT_SYMBOL_GPL(psample_group_take
);
201 void psample_group_put(struct psample_group
*group
)
203 spin_lock_bh(&psample_groups_lock
);
205 if (--group
->refcount
== 0)
206 psample_group_destroy(group
);
208 spin_unlock_bh(&psample_groups_lock
);
210 EXPORT_SYMBOL_GPL(psample_group_put
);
213 static int __psample_ip_tun_to_nlattr(struct sk_buff
*skb
,
214 struct ip_tunnel_info
*tun_info
)
216 unsigned short tun_proto
= ip_tunnel_info_af(tun_info
);
217 const void *tun_opts
= ip_tunnel_info_opts(tun_info
);
218 const struct ip_tunnel_key
*tun_key
= &tun_info
->key
;
219 int tun_opts_len
= tun_info
->options_len
;
221 if (tun_key
->tun_flags
& TUNNEL_KEY
&&
222 nla_put_be64(skb
, PSAMPLE_TUNNEL_KEY_ATTR_ID
, tun_key
->tun_id
,
223 PSAMPLE_TUNNEL_KEY_ATTR_PAD
))
226 if (tun_info
->mode
& IP_TUNNEL_INFO_BRIDGE
&&
227 nla_put_flag(skb
, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE
))
232 if (tun_key
->u
.ipv4
.src
&&
233 nla_put_in_addr(skb
, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_SRC
,
234 tun_key
->u
.ipv4
.src
))
236 if (tun_key
->u
.ipv4
.dst
&&
237 nla_put_in_addr(skb
, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_DST
,
238 tun_key
->u
.ipv4
.dst
))
242 if (!ipv6_addr_any(&tun_key
->u
.ipv6
.src
) &&
243 nla_put_in6_addr(skb
, PSAMPLE_TUNNEL_KEY_ATTR_IPV6_SRC
,
244 &tun_key
->u
.ipv6
.src
))
246 if (!ipv6_addr_any(&tun_key
->u
.ipv6
.dst
) &&
247 nla_put_in6_addr(skb
, PSAMPLE_TUNNEL_KEY_ATTR_IPV6_DST
,
248 &tun_key
->u
.ipv6
.dst
))
253 nla_put_u8(skb
, PSAMPLE_TUNNEL_KEY_ATTR_TOS
, tun_key
->tos
))
255 if (nla_put_u8(skb
, PSAMPLE_TUNNEL_KEY_ATTR_TTL
, tun_key
->ttl
))
257 if ((tun_key
->tun_flags
& TUNNEL_DONT_FRAGMENT
) &&
258 nla_put_flag(skb
, PSAMPLE_TUNNEL_KEY_ATTR_DONT_FRAGMENT
))
260 if ((tun_key
->tun_flags
& TUNNEL_CSUM
) &&
261 nla_put_flag(skb
, PSAMPLE_TUNNEL_KEY_ATTR_CSUM
))
263 if (tun_key
->tp_src
&&
264 nla_put_be16(skb
, PSAMPLE_TUNNEL_KEY_ATTR_TP_SRC
, tun_key
->tp_src
))
266 if (tun_key
->tp_dst
&&
267 nla_put_be16(skb
, PSAMPLE_TUNNEL_KEY_ATTR_TP_DST
, tun_key
->tp_dst
))
269 if ((tun_key
->tun_flags
& TUNNEL_OAM
) &&
270 nla_put_flag(skb
, PSAMPLE_TUNNEL_KEY_ATTR_OAM
))
273 if (tun_key
->tun_flags
& TUNNEL_GENEVE_OPT
&&
274 nla_put(skb
, PSAMPLE_TUNNEL_KEY_ATTR_GENEVE_OPTS
,
275 tun_opts_len
, tun_opts
))
277 else if (tun_key
->tun_flags
& TUNNEL_ERSPAN_OPT
&&
278 nla_put(skb
, PSAMPLE_TUNNEL_KEY_ATTR_ERSPAN_OPTS
,
279 tun_opts_len
, tun_opts
))
286 static int psample_ip_tun_to_nlattr(struct sk_buff
*skb
,
287 struct ip_tunnel_info
*tun_info
)
292 nla
= nla_nest_start_noflag(skb
, PSAMPLE_ATTR_TUNNEL
);
296 err
= __psample_ip_tun_to_nlattr(skb
, tun_info
);
298 nla_nest_cancel(skb
, nla
);
302 nla_nest_end(skb
, nla
);
307 static int psample_tunnel_meta_len(struct ip_tunnel_info
*tun_info
)
309 unsigned short tun_proto
= ip_tunnel_info_af(tun_info
);
310 const struct ip_tunnel_key
*tun_key
= &tun_info
->key
;
311 int tun_opts_len
= tun_info
->options_len
;
314 if (tun_key
->tun_flags
& TUNNEL_KEY
)
315 sum
+= nla_total_size(sizeof(u64
));
317 if (tun_info
->mode
& IP_TUNNEL_INFO_BRIDGE
)
318 sum
+= nla_total_size(0);
322 if (tun_key
->u
.ipv4
.src
)
323 sum
+= nla_total_size(sizeof(u32
));
324 if (tun_key
->u
.ipv4
.dst
)
325 sum
+= nla_total_size(sizeof(u32
));
328 if (!ipv6_addr_any(&tun_key
->u
.ipv6
.src
))
329 sum
+= nla_total_size(sizeof(struct in6_addr
));
330 if (!ipv6_addr_any(&tun_key
->u
.ipv6
.dst
))
331 sum
+= nla_total_size(sizeof(struct in6_addr
));
335 sum
+= nla_total_size(sizeof(u8
));
336 sum
+= nla_total_size(sizeof(u8
)); /* TTL */
337 if (tun_key
->tun_flags
& TUNNEL_DONT_FRAGMENT
)
338 sum
+= nla_total_size(0);
339 if (tun_key
->tun_flags
& TUNNEL_CSUM
)
340 sum
+= nla_total_size(0);
342 sum
+= nla_total_size(sizeof(u16
));
344 sum
+= nla_total_size(sizeof(u16
));
345 if (tun_key
->tun_flags
& TUNNEL_OAM
)
346 sum
+= nla_total_size(0);
348 if (tun_key
->tun_flags
& TUNNEL_GENEVE_OPT
)
349 sum
+= nla_total_size(tun_opts_len
);
350 else if (tun_key
->tun_flags
& TUNNEL_ERSPAN_OPT
)
351 sum
+= nla_total_size(tun_opts_len
);
358 void psample_sample_packet(struct psample_group
*group
, struct sk_buff
*skb
,
359 u32 trunc_size
, int in_ifindex
, int out_ifindex
,
363 struct ip_tunnel_info
*tun_info
;
365 struct sk_buff
*nl_skb
;
371 meta_len
= (in_ifindex
? nla_total_size(sizeof(u16
)) : 0) +
372 (out_ifindex
? nla_total_size(sizeof(u16
)) : 0) +
373 nla_total_size(sizeof(u32
)) + /* sample_rate */
374 nla_total_size(sizeof(u32
)) + /* orig_size */
375 nla_total_size(sizeof(u32
)) + /* group_num */
376 nla_total_size(sizeof(u32
)); /* seq */
379 tun_info
= skb_tunnel_info(skb
);
381 meta_len
+= psample_tunnel_meta_len(tun_info
);
384 data_len
= min(skb
->len
, trunc_size
);
385 if (meta_len
+ nla_total_size(data_len
) > PSAMPLE_MAX_PACKET_SIZE
)
386 data_len
= PSAMPLE_MAX_PACKET_SIZE
- meta_len
- NLA_HDRLEN
389 nl_skb
= genlmsg_new(meta_len
+ nla_total_size(data_len
), GFP_ATOMIC
);
390 if (unlikely(!nl_skb
))
393 data
= genlmsg_put(nl_skb
, 0, 0, &psample_nl_family
, 0,
399 ret
= nla_put_u16(nl_skb
, PSAMPLE_ATTR_IIFINDEX
, in_ifindex
);
400 if (unlikely(ret
< 0))
405 ret
= nla_put_u16(nl_skb
, PSAMPLE_ATTR_OIFINDEX
, out_ifindex
);
406 if (unlikely(ret
< 0))
410 ret
= nla_put_u32(nl_skb
, PSAMPLE_ATTR_SAMPLE_RATE
, sample_rate
);
411 if (unlikely(ret
< 0))
414 ret
= nla_put_u32(nl_skb
, PSAMPLE_ATTR_ORIGSIZE
, skb
->len
);
415 if (unlikely(ret
< 0))
418 ret
= nla_put_u32(nl_skb
, PSAMPLE_ATTR_SAMPLE_GROUP
, group
->group_num
);
419 if (unlikely(ret
< 0))
422 ret
= nla_put_u32(nl_skb
, PSAMPLE_ATTR_GROUP_SEQ
, group
->seq
++);
423 if (unlikely(ret
< 0))
427 int nla_len
= nla_total_size(data_len
);
430 nla
= skb_put(nl_skb
, nla_len
);
431 nla
->nla_type
= PSAMPLE_ATTR_DATA
;
432 nla
->nla_len
= nla_attr_size(data_len
);
434 if (skb_copy_bits(skb
, 0, nla_data(nla
), data_len
))
440 ret
= psample_ip_tun_to_nlattr(nl_skb
, tun_info
);
441 if (unlikely(ret
< 0))
446 genlmsg_end(nl_skb
, data
);
447 genlmsg_multicast_netns(&psample_nl_family
, group
->net
, nl_skb
, 0,
448 PSAMPLE_NL_MCGRP_SAMPLE
, GFP_ATOMIC
);
452 pr_err_ratelimited("Could not create psample log message\n");
455 EXPORT_SYMBOL_GPL(psample_sample_packet
);
457 static int __init
psample_module_init(void)
459 return genl_register_family(&psample_nl_family
);
462 static void __exit
psample_module_exit(void)
464 genl_unregister_family(&psample_nl_family
);
467 module_init(psample_module_init
);
468 module_exit(psample_module_exit
);
470 MODULE_AUTHOR("Yotam Gigi <yotam.gi@gmail.com>");
471 MODULE_DESCRIPTION("netlink channel for packet sampling");
472 MODULE_LICENSE("GPL v2");