2 * count the number of connections matching an arbitrary key.
4 * (C) 2017 Red Hat GmbH
5 * Author: Florian Westphal <fw@strlen.de>
7 * split from xt_connlimit.c:
8 * (c) 2000 Gerd Knorr <kraxel@bytesex.org>
9 * Nov 2002: Martin Bene <martin.bene@icomedias.com>:
10 * only ignore TIME_WAIT or gone connections
11 * (C) CC Computer Consultants GmbH, 2007
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #include <linux/in6.h>
17 #include <linux/ipv6.h>
18 #include <linux/jhash.h>
19 #include <linux/slab.h>
20 #include <linux/list.h>
21 #include <linux/rbtree.h>
22 #include <linux/module.h>
23 #include <linux/random.h>
24 #include <linux/skbuff.h>
25 #include <linux/spinlock.h>
26 #include <linux/netfilter/nf_conntrack_tcp.h>
27 #include <linux/netfilter/x_tables.h>
28 #include <net/netfilter/nf_conntrack.h>
29 #include <net/netfilter/nf_conntrack_count.h>
30 #include <net/netfilter/nf_conntrack_core.h>
31 #include <net/netfilter/nf_conntrack_tuple.h>
32 #include <net/netfilter/nf_conntrack_zones.h>
34 #define CONNCOUNT_SLOTS 256U
37 #define CONNCOUNT_LOCK_SLOTS 8U
39 #define CONNCOUNT_LOCK_SLOTS 256U
42 #define CONNCOUNT_GC_MAX_NODES 8
45 /* we will save the tuples of all connections we care about */
46 struct nf_conncount_tuple
{
47 struct hlist_node node
;
48 struct nf_conntrack_tuple tuple
;
51 struct nf_conncount_rb
{
53 struct hlist_head hhead
; /* connections/hosts in same subnet */
57 static spinlock_t nf_conncount_locks
[CONNCOUNT_LOCK_SLOTS
] __cacheline_aligned_in_smp
;
59 struct nf_conncount_data
{
61 struct rb_root root
[CONNCOUNT_SLOTS
];
64 static u_int32_t conncount_rnd __read_mostly
;
65 static struct kmem_cache
*conncount_rb_cachep __read_mostly
;
66 static struct kmem_cache
*conncount_conn_cachep __read_mostly
;
68 static inline bool already_closed(const struct nf_conn
*conn
)
70 if (nf_ct_protonum(conn
) == IPPROTO_TCP
)
71 return conn
->proto
.tcp
.state
== TCP_CONNTRACK_TIME_WAIT
||
72 conn
->proto
.tcp
.state
== TCP_CONNTRACK_CLOSE
;
77 static int key_diff(const u32
*a
, const u32
*b
, unsigned int klen
)
79 return memcmp(a
, b
, klen
* sizeof(u32
));
82 static bool add_hlist(struct hlist_head
*head
,
83 const struct nf_conntrack_tuple
*tuple
)
85 struct nf_conncount_tuple
*conn
;
87 conn
= kmem_cache_alloc(conncount_conn_cachep
, GFP_ATOMIC
);
91 hlist_add_head(&conn
->node
, head
);
95 static unsigned int check_hlist(struct net
*net
,
96 struct hlist_head
*head
,
97 const struct nf_conntrack_tuple
*tuple
,
98 const struct nf_conntrack_zone
*zone
,
101 const struct nf_conntrack_tuple_hash
*found
;
102 struct nf_conncount_tuple
*conn
;
103 struct hlist_node
*n
;
104 struct nf_conn
*found_ct
;
105 unsigned int length
= 0;
109 /* check the saved connections */
110 hlist_for_each_entry_safe(conn
, n
, head
, node
) {
111 found
= nf_conntrack_find_get(net
, zone
, &conn
->tuple
);
113 hlist_del(&conn
->node
);
114 kmem_cache_free(conncount_conn_cachep
, conn
);
118 found_ct
= nf_ct_tuplehash_to_ctrack(found
);
120 if (nf_ct_tuple_equal(&conn
->tuple
, tuple
)) {
122 * Just to be sure we have it only once in the list.
123 * We should not see tuples twice unless someone hooks
124 * this into a table without "-p tcp --syn".
127 } else if (already_closed(found_ct
)) {
129 * we do not care about connections which are
130 * closed already -> ditch it
133 hlist_del(&conn
->node
);
134 kmem_cache_free(conncount_conn_cachep
, conn
);
145 static void tree_nodes_free(struct rb_root
*root
,
146 struct nf_conncount_rb
*gc_nodes
[],
147 unsigned int gc_count
)
149 struct nf_conncount_rb
*rbconn
;
152 rbconn
= gc_nodes
[--gc_count
];
153 rb_erase(&rbconn
->node
, root
);
154 kmem_cache_free(conncount_rb_cachep
, rbconn
);
159 count_tree(struct net
*net
, struct rb_root
*root
,
160 const u32
*key
, u8 keylen
,
162 const struct nf_conntrack_tuple
*tuple
,
163 const struct nf_conntrack_zone
*zone
)
165 struct nf_conncount_rb
*gc_nodes
[CONNCOUNT_GC_MAX_NODES
];
166 struct rb_node
**rbnode
, *parent
;
167 struct nf_conncount_rb
*rbconn
;
168 struct nf_conncount_tuple
*conn
;
169 unsigned int gc_count
;
175 rbnode
= &(root
->rb_node
);
180 rbconn
= rb_entry(*rbnode
, struct nf_conncount_rb
, node
);
183 diff
= key_diff(key
, rbconn
->key
, keylen
);
185 rbnode
= &((*rbnode
)->rb_left
);
186 } else if (diff
> 0) {
187 rbnode
= &((*rbnode
)->rb_right
);
189 /* same source network -> be counted! */
191 count
= check_hlist(net
, &rbconn
->hhead
, tuple
, zone
, &addit
);
193 tree_nodes_free(root
, gc_nodes
, gc_count
);
197 if (!add_hlist(&rbconn
->hhead
, tuple
))
198 return 0; /* hotdrop */
203 if (no_gc
|| gc_count
>= ARRAY_SIZE(gc_nodes
))
206 /* only used for GC on hhead, retval and 'addit' ignored */
207 check_hlist(net
, &rbconn
->hhead
, tuple
, zone
, &addit
);
208 if (hlist_empty(&rbconn
->hhead
))
209 gc_nodes
[gc_count
++] = rbconn
;
214 tree_nodes_free(root
, gc_nodes
, gc_count
);
215 /* tree_node_free before new allocation permits
216 * allocator to re-use newly free'd object.
218 * This is a rare event; in most cases we will find
219 * existing node to re-use. (or gc_count is 0).
224 /* no match, need to insert new node */
225 rbconn
= kmem_cache_alloc(conncount_rb_cachep
, GFP_ATOMIC
);
229 conn
= kmem_cache_alloc(conncount_conn_cachep
, GFP_ATOMIC
);
231 kmem_cache_free(conncount_rb_cachep
, rbconn
);
235 conn
->tuple
= *tuple
;
236 memcpy(rbconn
->key
, key
, sizeof(u32
) * keylen
);
238 INIT_HLIST_HEAD(&rbconn
->hhead
);
239 hlist_add_head(&conn
->node
, &rbconn
->hhead
);
241 rb_link_node(&rbconn
->node
, parent
, rbnode
);
242 rb_insert_color(&rbconn
->node
, root
);
246 unsigned int nf_conncount_count(struct net
*net
,
247 struct nf_conncount_data
*data
,
250 const struct nf_conntrack_tuple
*tuple
,
251 const struct nf_conntrack_zone
*zone
)
253 struct rb_root
*root
;
257 hash
= jhash2(key
, data
->keylen
, conncount_rnd
) % CONNCOUNT_SLOTS
;
258 root
= &data
->root
[hash
];
260 spin_lock_bh(&nf_conncount_locks
[hash
% CONNCOUNT_LOCK_SLOTS
]);
262 count
= count_tree(net
, root
, key
, data
->keylen
, family
, tuple
, zone
);
264 spin_unlock_bh(&nf_conncount_locks
[hash
% CONNCOUNT_LOCK_SLOTS
]);
268 EXPORT_SYMBOL_GPL(nf_conncount_count
);
270 struct nf_conncount_data
*nf_conncount_init(struct net
*net
, unsigned int family
,
273 struct nf_conncount_data
*data
;
276 if (keylen
% sizeof(u32
) ||
277 keylen
/ sizeof(u32
) > MAX_KEYLEN
||
279 return ERR_PTR(-EINVAL
);
281 net_get_random_once(&conncount_rnd
, sizeof(conncount_rnd
));
283 data
= kmalloc(sizeof(*data
), GFP_KERNEL
);
285 return ERR_PTR(-ENOMEM
);
287 ret
= nf_ct_netns_get(net
, family
);
293 for (i
= 0; i
< ARRAY_SIZE(data
->root
); ++i
)
294 data
->root
[i
] = RB_ROOT
;
296 data
->keylen
= keylen
/ sizeof(u32
);
300 EXPORT_SYMBOL_GPL(nf_conncount_init
);
302 static void destroy_tree(struct rb_root
*r
)
304 struct nf_conncount_tuple
*conn
;
305 struct nf_conncount_rb
*rbconn
;
306 struct hlist_node
*n
;
307 struct rb_node
*node
;
309 while ((node
= rb_first(r
)) != NULL
) {
310 rbconn
= rb_entry(node
, struct nf_conncount_rb
, node
);
314 hlist_for_each_entry_safe(conn
, n
, &rbconn
->hhead
, node
)
315 kmem_cache_free(conncount_conn_cachep
, conn
);
317 kmem_cache_free(conncount_rb_cachep
, rbconn
);
321 void nf_conncount_destroy(struct net
*net
, unsigned int family
,
322 struct nf_conncount_data
*data
)
326 nf_ct_netns_put(net
, family
);
328 for (i
= 0; i
< ARRAY_SIZE(data
->root
); ++i
)
329 destroy_tree(&data
->root
[i
]);
333 EXPORT_SYMBOL_GPL(nf_conncount_destroy
);
335 static int __init
nf_conncount_modinit(void)
339 BUILD_BUG_ON(CONNCOUNT_LOCK_SLOTS
> CONNCOUNT_SLOTS
);
340 BUILD_BUG_ON((CONNCOUNT_SLOTS
% CONNCOUNT_LOCK_SLOTS
) != 0);
342 for (i
= 0; i
< CONNCOUNT_LOCK_SLOTS
; ++i
)
343 spin_lock_init(&nf_conncount_locks
[i
]);
345 conncount_conn_cachep
= kmem_cache_create("nf_conncount_tuple",
346 sizeof(struct nf_conncount_tuple
),
348 if (!conncount_conn_cachep
)
351 conncount_rb_cachep
= kmem_cache_create("nf_conncount_rb",
352 sizeof(struct nf_conncount_rb
),
354 if (!conncount_rb_cachep
) {
355 kmem_cache_destroy(conncount_conn_cachep
);
362 static void __exit
nf_conncount_modexit(void)
364 kmem_cache_destroy(conncount_conn_cachep
);
365 kmem_cache_destroy(conncount_rb_cachep
);
368 module_init(nf_conncount_modinit
);
369 module_exit(nf_conncount_modexit
);
370 MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
371 MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
372 MODULE_DESCRIPTION("netfilter: count number of connections matching a key");
373 MODULE_LICENSE("GPL");