2 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
11 #include <linux/kernel.h>
12 #include <linux/init.h>
13 #include <linux/module.h>
14 #include <linux/list.h>
15 #include <linux/log2.h>
16 #include <linux/jhash.h>
17 #include <linux/netlink.h>
18 #include <linux/vmalloc.h>
19 #include <linux/netfilter.h>
20 #include <linux/netfilter/nf_tables.h>
21 #include <net/netfilter/nf_tables.h>
23 #define NFT_HASH_MIN_SIZE 4UL
26 struct nft_hash_table __rcu
*tbl
;
29 struct nft_hash_table
{
31 struct nft_hash_elem __rcu
*buckets
[];
34 struct nft_hash_elem
{
35 struct nft_hash_elem __rcu
*next
;
37 struct nft_data data
[];
40 #define nft_hash_for_each_entry(i, head) \
41 for (i = nft_dereference(head); i != NULL; i = nft_dereference(i->next))
42 #define nft_hash_for_each_entry_rcu(i, head) \
43 for (i = rcu_dereference(head); i != NULL; i = rcu_dereference(i->next))
45 static u32 nft_hash_rnd __read_mostly
;
46 static bool nft_hash_rnd_initted __read_mostly
;
48 static unsigned int nft_hash_data(const struct nft_data
*data
,
49 unsigned int hsize
, unsigned int len
)
53 h
= jhash(data
->data
, len
, nft_hash_rnd
);
54 return h
& (hsize
- 1);
57 static bool nft_hash_lookup(const struct nft_set
*set
,
58 const struct nft_data
*key
,
59 struct nft_data
*data
)
61 const struct nft_hash
*priv
= nft_set_priv(set
);
62 const struct nft_hash_table
*tbl
= rcu_dereference(priv
->tbl
);
63 const struct nft_hash_elem
*he
;
66 h
= nft_hash_data(key
, tbl
->size
, set
->klen
);
67 nft_hash_for_each_entry_rcu(he
, tbl
->buckets
[h
]) {
68 if (nft_data_cmp(&he
->key
, key
, set
->klen
))
70 if (set
->flags
& NFT_SET_MAP
)
71 nft_data_copy(data
, he
->data
);
77 static void nft_hash_tbl_free(const struct nft_hash_table
*tbl
)
82 static unsigned int nft_hash_tbl_size(unsigned int nelem
)
84 return max(roundup_pow_of_two(nelem
* 4 / 3), NFT_HASH_MIN_SIZE
);
87 static struct nft_hash_table
*nft_hash_tbl_alloc(unsigned int nbuckets
)
89 struct nft_hash_table
*tbl
;
92 size
= sizeof(*tbl
) + nbuckets
* sizeof(tbl
->buckets
[0]);
93 tbl
= kzalloc(size
, GFP_KERNEL
| __GFP_REPEAT
| __GFP_NOWARN
);
103 static void nft_hash_chain_unzip(const struct nft_set
*set
,
104 const struct nft_hash_table
*ntbl
,
105 struct nft_hash_table
*tbl
, unsigned int n
)
107 struct nft_hash_elem
*he
, *last
, *next
;
110 he
= nft_dereference(tbl
->buckets
[n
]);
113 h
= nft_hash_data(&he
->key
, ntbl
->size
, set
->klen
);
115 /* Find last element of first chain hashing to bucket h */
117 nft_hash_for_each_entry(he
, he
->next
) {
118 if (nft_hash_data(&he
->key
, ntbl
->size
, set
->klen
) != h
)
123 /* Unlink first chain from the old table */
124 RCU_INIT_POINTER(tbl
->buckets
[n
], last
->next
);
126 /* If end of chain reached, done */
130 /* Find first element of second chain hashing to bucket h */
132 nft_hash_for_each_entry(he
, he
->next
) {
133 if (nft_hash_data(&he
->key
, ntbl
->size
, set
->klen
) != h
)
139 /* Link the two chains */
140 RCU_INIT_POINTER(last
->next
, next
);
143 static int nft_hash_tbl_expand(const struct nft_set
*set
, struct nft_hash
*priv
)
145 struct nft_hash_table
*tbl
= nft_dereference(priv
->tbl
), *ntbl
;
146 struct nft_hash_elem
*he
;
150 ntbl
= nft_hash_tbl_alloc(tbl
->size
* 2);
154 /* Link new table's buckets to first element in the old table
155 * hashing to the new bucket.
157 for (i
= 0; i
< ntbl
->size
; i
++) {
158 h
= i
< tbl
->size
? i
: i
- tbl
->size
;
159 nft_hash_for_each_entry(he
, tbl
->buckets
[h
]) {
160 if (nft_hash_data(&he
->key
, ntbl
->size
, set
->klen
) != i
)
162 RCU_INIT_POINTER(ntbl
->buckets
[i
], he
);
167 /* Publish new table */
168 rcu_assign_pointer(priv
->tbl
, ntbl
);
170 /* Unzip interleaved hash chains */
172 /* Wait for readers to use new table/unzipped chains */
176 for (i
= 0; i
< tbl
->size
; i
++) {
177 nft_hash_chain_unzip(set
, ntbl
, tbl
, i
);
178 if (tbl
->buckets
[i
] != NULL
)
183 nft_hash_tbl_free(tbl
);
187 static int nft_hash_tbl_shrink(const struct nft_set
*set
, struct nft_hash
*priv
)
189 struct nft_hash_table
*tbl
= nft_dereference(priv
->tbl
), *ntbl
;
190 struct nft_hash_elem __rcu
**pprev
;
193 ntbl
= nft_hash_tbl_alloc(tbl
->size
/ 2);
197 for (i
= 0; i
< ntbl
->size
; i
++) {
198 ntbl
->buckets
[i
] = tbl
->buckets
[i
];
200 for (pprev
= &ntbl
->buckets
[i
]; *pprev
!= NULL
;
201 pprev
= &nft_dereference(*pprev
)->next
)
203 RCU_INIT_POINTER(*pprev
, tbl
->buckets
[i
+ ntbl
->size
]);
206 /* Publish new table */
207 rcu_assign_pointer(priv
->tbl
, ntbl
);
210 nft_hash_tbl_free(tbl
);
214 static int nft_hash_insert(const struct nft_set
*set
,
215 const struct nft_set_elem
*elem
)
217 struct nft_hash
*priv
= nft_set_priv(set
);
218 struct nft_hash_table
*tbl
= nft_dereference(priv
->tbl
);
219 struct nft_hash_elem
*he
;
220 unsigned int size
, h
;
222 if (elem
->flags
!= 0)
226 if (set
->flags
& NFT_SET_MAP
)
227 size
+= sizeof(he
->data
[0]);
229 he
= kzalloc(size
, GFP_KERNEL
);
233 nft_data_copy(&he
->key
, &elem
->key
);
234 if (set
->flags
& NFT_SET_MAP
)
235 nft_data_copy(he
->data
, &elem
->data
);
237 h
= nft_hash_data(&he
->key
, tbl
->size
, set
->klen
);
238 RCU_INIT_POINTER(he
->next
, tbl
->buckets
[h
]);
239 rcu_assign_pointer(tbl
->buckets
[h
], he
);
241 /* Expand table when exceeding 75% load */
242 if (set
->nelems
+ 1 > tbl
->size
/ 4 * 3)
243 nft_hash_tbl_expand(set
, priv
);
248 static void nft_hash_elem_destroy(const struct nft_set
*set
,
249 struct nft_hash_elem
*he
)
251 nft_data_uninit(&he
->key
, NFT_DATA_VALUE
);
252 if (set
->flags
& NFT_SET_MAP
)
253 nft_data_uninit(he
->data
, set
->dtype
);
257 static void nft_hash_remove(const struct nft_set
*set
,
258 const struct nft_set_elem
*elem
)
260 struct nft_hash
*priv
= nft_set_priv(set
);
261 struct nft_hash_table
*tbl
= nft_dereference(priv
->tbl
);
262 struct nft_hash_elem
*he
, __rcu
**pprev
;
264 pprev
= elem
->cookie
;
265 he
= nft_dereference((*pprev
));
267 RCU_INIT_POINTER(*pprev
, he
->next
);
271 /* Shrink table beneath 30% load */
272 if (set
->nelems
- 1 < tbl
->size
* 3 / 10 &&
273 tbl
->size
> NFT_HASH_MIN_SIZE
)
274 nft_hash_tbl_shrink(set
, priv
);
277 static int nft_hash_get(const struct nft_set
*set
, struct nft_set_elem
*elem
)
279 const struct nft_hash
*priv
= nft_set_priv(set
);
280 const struct nft_hash_table
*tbl
= nft_dereference(priv
->tbl
);
281 struct nft_hash_elem __rcu
* const *pprev
;
282 struct nft_hash_elem
*he
;
285 h
= nft_hash_data(&elem
->key
, tbl
->size
, set
->klen
);
286 pprev
= &tbl
->buckets
[h
];
287 nft_hash_for_each_entry(he
, tbl
->buckets
[h
]) {
288 if (nft_data_cmp(&he
->key
, &elem
->key
, set
->klen
)) {
293 elem
->cookie
= (void *)pprev
;
295 if (set
->flags
& NFT_SET_MAP
)
296 nft_data_copy(&elem
->data
, he
->data
);
302 static void nft_hash_walk(const struct nft_ctx
*ctx
, const struct nft_set
*set
,
303 struct nft_set_iter
*iter
)
305 const struct nft_hash
*priv
= nft_set_priv(set
);
306 const struct nft_hash_table
*tbl
= nft_dereference(priv
->tbl
);
307 const struct nft_hash_elem
*he
;
308 struct nft_set_elem elem
;
311 for (i
= 0; i
< tbl
->size
; i
++) {
312 nft_hash_for_each_entry(he
, tbl
->buckets
[i
]) {
313 if (iter
->count
< iter
->skip
)
316 memcpy(&elem
.key
, &he
->key
, sizeof(elem
.key
));
317 if (set
->flags
& NFT_SET_MAP
)
318 memcpy(&elem
.data
, he
->data
, sizeof(elem
.data
));
321 iter
->err
= iter
->fn(ctx
, set
, iter
, &elem
);
330 static unsigned int nft_hash_privsize(const struct nlattr
* const nla
[])
332 return sizeof(struct nft_hash
);
335 static int nft_hash_init(const struct nft_set
*set
,
336 const struct nft_set_desc
*desc
,
337 const struct nlattr
* const tb
[])
339 struct nft_hash
*priv
= nft_set_priv(set
);
340 struct nft_hash_table
*tbl
;
343 if (unlikely(!nft_hash_rnd_initted
)) {
344 get_random_bytes(&nft_hash_rnd
, 4);
345 nft_hash_rnd_initted
= true;
348 size
= NFT_HASH_MIN_SIZE
;
350 size
= nft_hash_tbl_size(desc
->size
);
352 tbl
= nft_hash_tbl_alloc(size
);
355 RCU_INIT_POINTER(priv
->tbl
, tbl
);
359 static void nft_hash_destroy(const struct nft_set
*set
)
361 const struct nft_hash
*priv
= nft_set_priv(set
);
362 const struct nft_hash_table
*tbl
= nft_dereference(priv
->tbl
);
363 struct nft_hash_elem
*he
, *next
;
366 for (i
= 0; i
< tbl
->size
; i
++) {
367 for (he
= nft_dereference(tbl
->buckets
[i
]); he
!= NULL
;
369 next
= nft_dereference(he
->next
);
370 nft_hash_elem_destroy(set
, he
);
376 static bool nft_hash_estimate(const struct nft_set_desc
*desc
, u32 features
,
377 struct nft_set_estimate
*est
)
381 esize
= sizeof(struct nft_hash_elem
);
382 if (features
& NFT_SET_MAP
)
383 esize
+= FIELD_SIZEOF(struct nft_hash_elem
, data
[0]);
386 est
->size
= sizeof(struct nft_hash
) +
387 nft_hash_tbl_size(desc
->size
) *
388 sizeof(struct nft_hash_elem
*) +
391 /* Resizing happens when the load drops below 30% or goes
392 * above 75%. The average of 52.5% load (approximated by 50%)
393 * is used for the size estimation of the hash buckets,
394 * meaning we calculate two buckets per element.
396 est
->size
= esize
+ 2 * sizeof(struct nft_hash_elem
*);
399 est
->class = NFT_SET_CLASS_O_1
;
404 static struct nft_set_ops nft_hash_ops __read_mostly
= {
405 .privsize
= nft_hash_privsize
,
406 .estimate
= nft_hash_estimate
,
407 .init
= nft_hash_init
,
408 .destroy
= nft_hash_destroy
,
410 .insert
= nft_hash_insert
,
411 .remove
= nft_hash_remove
,
412 .lookup
= nft_hash_lookup
,
413 .walk
= nft_hash_walk
,
414 .features
= NFT_SET_MAP
,
415 .owner
= THIS_MODULE
,
418 static int __init
nft_hash_module_init(void)
420 return nft_register_set(&nft_hash_ops
);
423 static void __exit
nft_hash_module_exit(void)
425 nft_unregister_set(&nft_hash_ops
);
428 module_init(nft_hash_module_init
);
429 module_exit(nft_hash_module_exit
);
431 MODULE_LICENSE("GPL");
432 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
433 MODULE_ALIAS_NFT_SET();