printf: Remove unused 'bprintf'
[drm/drm-misc.git] / net / xfrm / xfrm_state.c
blob67ca7ac955a376197c487c4a851cc719324b05f8
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * xfrm_state.c
5 * Changes:
6 * Mitsuru KANDA @USAGI
7 * Kazunori MIYAZAWA @USAGI
8 * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
9 * IPv6 support
10 * YOSHIFUJI Hideaki @USAGI
11 * Split up af-specific functions
12 * Derek Atkins <derek@ihtfp.com>
13 * Add UDP Encapsulation
17 #include <linux/compat.h>
18 #include <linux/workqueue.h>
19 #include <net/xfrm.h>
20 #include <linux/pfkeyv2.h>
21 #include <linux/ipsec.h>
22 #include <linux/module.h>
23 #include <linux/cache.h>
24 #include <linux/audit.h>
25 #include <linux/uaccess.h>
26 #include <linux/ktime.h>
27 #include <linux/slab.h>
28 #include <linux/interrupt.h>
29 #include <linux/kernel.h>
31 #include <crypto/aead.h>
33 #include "xfrm_hash.h"
35 #define xfrm_state_deref_prot(table, net) \
36 rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock))
38 static void xfrm_state_gc_task(struct work_struct *work);
40 /* Each xfrm_state may be linked to two tables:
42 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
43 2. Hash table by (daddr,family,reqid) to find what SAs exist for given
44 destination/tunnel endpoint. (output)
47 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
48 static struct kmem_cache *xfrm_state_cache __ro_after_init;
50 static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task);
51 static HLIST_HEAD(xfrm_state_gc_list);
52 static HLIST_HEAD(xfrm_state_dev_gc_list);
54 static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x)
56 return refcount_inc_not_zero(&x->refcnt);
59 static inline unsigned int xfrm_dst_hash(struct net *net,
60 const xfrm_address_t *daddr,
61 const xfrm_address_t *saddr,
62 u32 reqid,
63 unsigned short family)
65 return __xfrm_dst_hash(daddr, saddr, reqid, family, net->xfrm.state_hmask);
68 static inline unsigned int xfrm_src_hash(struct net *net,
69 const xfrm_address_t *daddr,
70 const xfrm_address_t *saddr,
71 unsigned short family)
73 return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask);
76 static inline unsigned int
77 xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr,
78 __be32 spi, u8 proto, unsigned short family)
80 return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask);
83 static unsigned int xfrm_seq_hash(struct net *net, u32 seq)
85 return __xfrm_seq_hash(seq, net->xfrm.state_hmask);
88 #define XFRM_STATE_INSERT(by, _n, _h, _type) \
89 { \
90 struct xfrm_state *_x = NULL; \
92 if (_type != XFRM_DEV_OFFLOAD_PACKET) { \
93 hlist_for_each_entry_rcu(_x, _h, by) { \
94 if (_x->xso.type == XFRM_DEV_OFFLOAD_PACKET) \
95 continue; \
96 break; \
97 } \
98 } \
100 if (!_x || _x->xso.type == XFRM_DEV_OFFLOAD_PACKET) \
101 /* SAD is empty or consist from HW SAs only */ \
102 hlist_add_head_rcu(_n, _h); \
103 else \
104 hlist_add_before_rcu(_n, &_x->by); \
107 static void xfrm_hash_transfer(struct hlist_head *list,
108 struct hlist_head *ndsttable,
109 struct hlist_head *nsrctable,
110 struct hlist_head *nspitable,
111 struct hlist_head *nseqtable,
112 unsigned int nhashmask)
114 struct hlist_node *tmp;
115 struct xfrm_state *x;
117 hlist_for_each_entry_safe(x, tmp, list, bydst) {
118 unsigned int h;
120 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
121 x->props.reqid, x->props.family,
122 nhashmask);
123 XFRM_STATE_INSERT(bydst, &x->bydst, ndsttable + h, x->xso.type);
125 h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
126 x->props.family,
127 nhashmask);
128 XFRM_STATE_INSERT(bysrc, &x->bysrc, nsrctable + h, x->xso.type);
130 if (x->id.spi) {
131 h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
132 x->id.proto, x->props.family,
133 nhashmask);
134 XFRM_STATE_INSERT(byspi, &x->byspi, nspitable + h,
135 x->xso.type);
138 if (x->km.seq) {
139 h = __xfrm_seq_hash(x->km.seq, nhashmask);
140 XFRM_STATE_INSERT(byseq, &x->byseq, nseqtable + h,
141 x->xso.type);
146 static unsigned long xfrm_hash_new_size(unsigned int state_hmask)
148 return ((state_hmask + 1) << 1) * sizeof(struct hlist_head);
151 static void xfrm_hash_resize(struct work_struct *work)
153 struct net *net = container_of(work, struct net, xfrm.state_hash_work);
154 struct hlist_head *ndst, *nsrc, *nspi, *nseq, *odst, *osrc, *ospi, *oseq;
155 unsigned long nsize, osize;
156 unsigned int nhashmask, ohashmask;
157 int i;
159 nsize = xfrm_hash_new_size(net->xfrm.state_hmask);
160 ndst = xfrm_hash_alloc(nsize);
161 if (!ndst)
162 return;
163 nsrc = xfrm_hash_alloc(nsize);
164 if (!nsrc) {
165 xfrm_hash_free(ndst, nsize);
166 return;
168 nspi = xfrm_hash_alloc(nsize);
169 if (!nspi) {
170 xfrm_hash_free(ndst, nsize);
171 xfrm_hash_free(nsrc, nsize);
172 return;
174 nseq = xfrm_hash_alloc(nsize);
175 if (!nseq) {
176 xfrm_hash_free(ndst, nsize);
177 xfrm_hash_free(nsrc, nsize);
178 xfrm_hash_free(nspi, nsize);
179 return;
182 spin_lock_bh(&net->xfrm.xfrm_state_lock);
183 write_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
185 nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
186 odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net);
187 for (i = net->xfrm.state_hmask; i >= 0; i--)
188 xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nseq, nhashmask);
190 osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net);
191 ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net);
192 oseq = xfrm_state_deref_prot(net->xfrm.state_byseq, net);
193 ohashmask = net->xfrm.state_hmask;
195 rcu_assign_pointer(net->xfrm.state_bydst, ndst);
196 rcu_assign_pointer(net->xfrm.state_bysrc, nsrc);
197 rcu_assign_pointer(net->xfrm.state_byspi, nspi);
198 rcu_assign_pointer(net->xfrm.state_byseq, nseq);
199 net->xfrm.state_hmask = nhashmask;
201 write_seqcount_end(&net->xfrm.xfrm_state_hash_generation);
202 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
204 osize = (ohashmask + 1) * sizeof(struct hlist_head);
206 synchronize_rcu();
208 xfrm_hash_free(odst, osize);
209 xfrm_hash_free(osrc, osize);
210 xfrm_hash_free(ospi, osize);
211 xfrm_hash_free(oseq, osize);
214 static DEFINE_SPINLOCK(xfrm_state_afinfo_lock);
215 static struct xfrm_state_afinfo __rcu *xfrm_state_afinfo[NPROTO];
217 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
218 static DEFINE_SPINLOCK(xfrm_state_dev_gc_lock);
220 int __xfrm_state_delete(struct xfrm_state *x);
222 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
223 static bool km_is_alive(const struct km_event *c);
224 void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
226 int xfrm_register_type(const struct xfrm_type *type, unsigned short family)
228 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
229 int err = 0;
231 if (!afinfo)
232 return -EAFNOSUPPORT;
234 #define X(afi, T, name) do { \
235 WARN_ON((afi)->type_ ## name); \
236 (afi)->type_ ## name = (T); \
237 } while (0)
239 switch (type->proto) {
240 case IPPROTO_COMP:
241 X(afinfo, type, comp);
242 break;
243 case IPPROTO_AH:
244 X(afinfo, type, ah);
245 break;
246 case IPPROTO_ESP:
247 X(afinfo, type, esp);
248 break;
249 case IPPROTO_IPIP:
250 X(afinfo, type, ipip);
251 break;
252 case IPPROTO_DSTOPTS:
253 X(afinfo, type, dstopts);
254 break;
255 case IPPROTO_ROUTING:
256 X(afinfo, type, routing);
257 break;
258 case IPPROTO_IPV6:
259 X(afinfo, type, ipip6);
260 break;
261 default:
262 WARN_ON(1);
263 err = -EPROTONOSUPPORT;
264 break;
266 #undef X
267 rcu_read_unlock();
268 return err;
270 EXPORT_SYMBOL(xfrm_register_type);
272 void xfrm_unregister_type(const struct xfrm_type *type, unsigned short family)
274 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
276 if (unlikely(afinfo == NULL))
277 return;
279 #define X(afi, T, name) do { \
280 WARN_ON((afi)->type_ ## name != (T)); \
281 (afi)->type_ ## name = NULL; \
282 } while (0)
284 switch (type->proto) {
285 case IPPROTO_COMP:
286 X(afinfo, type, comp);
287 break;
288 case IPPROTO_AH:
289 X(afinfo, type, ah);
290 break;
291 case IPPROTO_ESP:
292 X(afinfo, type, esp);
293 break;
294 case IPPROTO_IPIP:
295 X(afinfo, type, ipip);
296 break;
297 case IPPROTO_DSTOPTS:
298 X(afinfo, type, dstopts);
299 break;
300 case IPPROTO_ROUTING:
301 X(afinfo, type, routing);
302 break;
303 case IPPROTO_IPV6:
304 X(afinfo, type, ipip6);
305 break;
306 default:
307 WARN_ON(1);
308 break;
310 #undef X
311 rcu_read_unlock();
313 EXPORT_SYMBOL(xfrm_unregister_type);
315 static const struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
317 const struct xfrm_type *type = NULL;
318 struct xfrm_state_afinfo *afinfo;
319 int modload_attempted = 0;
321 retry:
322 afinfo = xfrm_state_get_afinfo(family);
323 if (unlikely(afinfo == NULL))
324 return NULL;
326 switch (proto) {
327 case IPPROTO_COMP:
328 type = afinfo->type_comp;
329 break;
330 case IPPROTO_AH:
331 type = afinfo->type_ah;
332 break;
333 case IPPROTO_ESP:
334 type = afinfo->type_esp;
335 break;
336 case IPPROTO_IPIP:
337 type = afinfo->type_ipip;
338 break;
339 case IPPROTO_DSTOPTS:
340 type = afinfo->type_dstopts;
341 break;
342 case IPPROTO_ROUTING:
343 type = afinfo->type_routing;
344 break;
345 case IPPROTO_IPV6:
346 type = afinfo->type_ipip6;
347 break;
348 default:
349 break;
352 if (unlikely(type && !try_module_get(type->owner)))
353 type = NULL;
355 rcu_read_unlock();
357 if (!type && !modload_attempted) {
358 request_module("xfrm-type-%d-%d", family, proto);
359 modload_attempted = 1;
360 goto retry;
363 return type;
366 static void xfrm_put_type(const struct xfrm_type *type)
368 module_put(type->owner);
371 int xfrm_register_type_offload(const struct xfrm_type_offload *type,
372 unsigned short family)
374 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
375 int err = 0;
377 if (unlikely(afinfo == NULL))
378 return -EAFNOSUPPORT;
380 switch (type->proto) {
381 case IPPROTO_ESP:
382 WARN_ON(afinfo->type_offload_esp);
383 afinfo->type_offload_esp = type;
384 break;
385 default:
386 WARN_ON(1);
387 err = -EPROTONOSUPPORT;
388 break;
391 rcu_read_unlock();
392 return err;
394 EXPORT_SYMBOL(xfrm_register_type_offload);
396 void xfrm_unregister_type_offload(const struct xfrm_type_offload *type,
397 unsigned short family)
399 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
401 if (unlikely(afinfo == NULL))
402 return;
404 switch (type->proto) {
405 case IPPROTO_ESP:
406 WARN_ON(afinfo->type_offload_esp != type);
407 afinfo->type_offload_esp = NULL;
408 break;
409 default:
410 WARN_ON(1);
411 break;
413 rcu_read_unlock();
415 EXPORT_SYMBOL(xfrm_unregister_type_offload);
417 static const struct xfrm_type_offload *
418 xfrm_get_type_offload(u8 proto, unsigned short family, bool try_load)
420 const struct xfrm_type_offload *type = NULL;
421 struct xfrm_state_afinfo *afinfo;
423 retry:
424 afinfo = xfrm_state_get_afinfo(family);
425 if (unlikely(afinfo == NULL))
426 return NULL;
428 switch (proto) {
429 case IPPROTO_ESP:
430 type = afinfo->type_offload_esp;
431 break;
432 default:
433 break;
436 if ((type && !try_module_get(type->owner)))
437 type = NULL;
439 rcu_read_unlock();
441 if (!type && try_load) {
442 request_module("xfrm-offload-%d-%d", family, proto);
443 try_load = false;
444 goto retry;
447 return type;
450 static void xfrm_put_type_offload(const struct xfrm_type_offload *type)
452 module_put(type->owner);
455 static const struct xfrm_mode xfrm4_mode_map[XFRM_MODE_MAX] = {
456 [XFRM_MODE_BEET] = {
457 .encap = XFRM_MODE_BEET,
458 .flags = XFRM_MODE_FLAG_TUNNEL,
459 .family = AF_INET,
461 [XFRM_MODE_TRANSPORT] = {
462 .encap = XFRM_MODE_TRANSPORT,
463 .family = AF_INET,
465 [XFRM_MODE_TUNNEL] = {
466 .encap = XFRM_MODE_TUNNEL,
467 .flags = XFRM_MODE_FLAG_TUNNEL,
468 .family = AF_INET,
472 static const struct xfrm_mode xfrm6_mode_map[XFRM_MODE_MAX] = {
473 [XFRM_MODE_BEET] = {
474 .encap = XFRM_MODE_BEET,
475 .flags = XFRM_MODE_FLAG_TUNNEL,
476 .family = AF_INET6,
478 [XFRM_MODE_ROUTEOPTIMIZATION] = {
479 .encap = XFRM_MODE_ROUTEOPTIMIZATION,
480 .family = AF_INET6,
482 [XFRM_MODE_TRANSPORT] = {
483 .encap = XFRM_MODE_TRANSPORT,
484 .family = AF_INET6,
486 [XFRM_MODE_TUNNEL] = {
487 .encap = XFRM_MODE_TUNNEL,
488 .flags = XFRM_MODE_FLAG_TUNNEL,
489 .family = AF_INET6,
493 static const struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
495 const struct xfrm_mode *mode;
497 if (unlikely(encap >= XFRM_MODE_MAX))
498 return NULL;
500 switch (family) {
501 case AF_INET:
502 mode = &xfrm4_mode_map[encap];
503 if (mode->family == family)
504 return mode;
505 break;
506 case AF_INET6:
507 mode = &xfrm6_mode_map[encap];
508 if (mode->family == family)
509 return mode;
510 break;
511 default:
512 break;
515 return NULL;
518 void xfrm_state_free(struct xfrm_state *x)
520 kmem_cache_free(xfrm_state_cache, x);
522 EXPORT_SYMBOL(xfrm_state_free);
524 static void ___xfrm_state_destroy(struct xfrm_state *x)
526 hrtimer_cancel(&x->mtimer);
527 del_timer_sync(&x->rtimer);
528 kfree(x->aead);
529 kfree(x->aalg);
530 kfree(x->ealg);
531 kfree(x->calg);
532 kfree(x->encap);
533 kfree(x->coaddr);
534 kfree(x->replay_esn);
535 kfree(x->preplay_esn);
536 if (x->type_offload)
537 xfrm_put_type_offload(x->type_offload);
538 if (x->type) {
539 x->type->destructor(x);
540 xfrm_put_type(x->type);
542 if (x->xfrag.page)
543 put_page(x->xfrag.page);
544 xfrm_dev_state_free(x);
545 security_xfrm_state_free(x);
546 xfrm_state_free(x);
549 static void xfrm_state_gc_task(struct work_struct *work)
551 struct xfrm_state *x;
552 struct hlist_node *tmp;
553 struct hlist_head gc_list;
555 spin_lock_bh(&xfrm_state_gc_lock);
556 hlist_move_list(&xfrm_state_gc_list, &gc_list);
557 spin_unlock_bh(&xfrm_state_gc_lock);
559 synchronize_rcu();
561 hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
562 ___xfrm_state_destroy(x);
565 static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
567 struct xfrm_state *x = container_of(me, struct xfrm_state, mtimer);
568 enum hrtimer_restart ret = HRTIMER_NORESTART;
569 time64_t now = ktime_get_real_seconds();
570 time64_t next = TIME64_MAX;
571 int warn = 0;
572 int err = 0;
574 spin_lock(&x->lock);
575 xfrm_dev_state_update_stats(x);
577 if (x->km.state == XFRM_STATE_DEAD)
578 goto out;
579 if (x->km.state == XFRM_STATE_EXPIRED)
580 goto expired;
581 if (x->lft.hard_add_expires_seconds) {
582 time64_t tmo = x->lft.hard_add_expires_seconds +
583 x->curlft.add_time - now;
584 if (tmo <= 0) {
585 if (x->xflags & XFRM_SOFT_EXPIRE) {
586 /* enter hard expire without soft expire first?!
587 * setting a new date could trigger this.
588 * workaround: fix x->curflt.add_time by below:
590 x->curlft.add_time = now - x->saved_tmo - 1;
591 tmo = x->lft.hard_add_expires_seconds - x->saved_tmo;
592 } else
593 goto expired;
595 if (tmo < next)
596 next = tmo;
598 if (x->lft.hard_use_expires_seconds) {
599 time64_t tmo = x->lft.hard_use_expires_seconds +
600 (READ_ONCE(x->curlft.use_time) ? : now) - now;
601 if (tmo <= 0)
602 goto expired;
603 if (tmo < next)
604 next = tmo;
606 if (x->km.dying)
607 goto resched;
608 if (x->lft.soft_add_expires_seconds) {
609 time64_t tmo = x->lft.soft_add_expires_seconds +
610 x->curlft.add_time - now;
611 if (tmo <= 0) {
612 warn = 1;
613 x->xflags &= ~XFRM_SOFT_EXPIRE;
614 } else if (tmo < next) {
615 next = tmo;
616 x->xflags |= XFRM_SOFT_EXPIRE;
617 x->saved_tmo = tmo;
620 if (x->lft.soft_use_expires_seconds) {
621 time64_t tmo = x->lft.soft_use_expires_seconds +
622 (READ_ONCE(x->curlft.use_time) ? : now) - now;
623 if (tmo <= 0)
624 warn = 1;
625 else if (tmo < next)
626 next = tmo;
629 x->km.dying = warn;
630 if (warn)
631 km_state_expired(x, 0, 0);
632 resched:
633 if (next != TIME64_MAX) {
634 hrtimer_forward_now(&x->mtimer, ktime_set(next, 0));
635 ret = HRTIMER_RESTART;
638 goto out;
640 expired:
641 if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0)
642 x->km.state = XFRM_STATE_EXPIRED;
644 err = __xfrm_state_delete(x);
645 if (!err)
646 km_state_expired(x, 1, 0);
648 xfrm_audit_state_delete(x, err ? 0 : 1, true);
650 out:
651 spin_unlock(&x->lock);
652 return ret;
655 static void xfrm_replay_timer_handler(struct timer_list *t);
657 struct xfrm_state *xfrm_state_alloc(struct net *net)
659 struct xfrm_state *x;
661 x = kmem_cache_zalloc(xfrm_state_cache, GFP_ATOMIC);
663 if (x) {
664 write_pnet(&x->xs_net, net);
665 refcount_set(&x->refcnt, 1);
666 atomic_set(&x->tunnel_users, 0);
667 INIT_LIST_HEAD(&x->km.all);
668 INIT_HLIST_NODE(&x->state_cache);
669 INIT_HLIST_NODE(&x->bydst);
670 INIT_HLIST_NODE(&x->bysrc);
671 INIT_HLIST_NODE(&x->byspi);
672 INIT_HLIST_NODE(&x->byseq);
673 hrtimer_init(&x->mtimer, CLOCK_BOOTTIME, HRTIMER_MODE_ABS_SOFT);
674 x->mtimer.function = xfrm_timer_handler;
675 timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0);
676 x->curlft.add_time = ktime_get_real_seconds();
677 x->lft.soft_byte_limit = XFRM_INF;
678 x->lft.soft_packet_limit = XFRM_INF;
679 x->lft.hard_byte_limit = XFRM_INF;
680 x->lft.hard_packet_limit = XFRM_INF;
681 x->replay_maxage = 0;
682 x->replay_maxdiff = 0;
683 x->pcpu_num = UINT_MAX;
684 spin_lock_init(&x->lock);
686 return x;
688 EXPORT_SYMBOL(xfrm_state_alloc);
690 #ifdef CONFIG_XFRM_OFFLOAD
691 void xfrm_dev_state_delete(struct xfrm_state *x)
693 struct xfrm_dev_offload *xso = &x->xso;
694 struct net_device *dev = READ_ONCE(xso->dev);
696 if (dev) {
697 dev->xfrmdev_ops->xdo_dev_state_delete(x);
698 spin_lock_bh(&xfrm_state_dev_gc_lock);
699 hlist_add_head(&x->dev_gclist, &xfrm_state_dev_gc_list);
700 spin_unlock_bh(&xfrm_state_dev_gc_lock);
703 EXPORT_SYMBOL_GPL(xfrm_dev_state_delete);
705 void xfrm_dev_state_free(struct xfrm_state *x)
707 struct xfrm_dev_offload *xso = &x->xso;
708 struct net_device *dev = READ_ONCE(xso->dev);
710 if (dev && dev->xfrmdev_ops) {
711 spin_lock_bh(&xfrm_state_dev_gc_lock);
712 if (!hlist_unhashed(&x->dev_gclist))
713 hlist_del(&x->dev_gclist);
714 spin_unlock_bh(&xfrm_state_dev_gc_lock);
716 if (dev->xfrmdev_ops->xdo_dev_state_free)
717 dev->xfrmdev_ops->xdo_dev_state_free(x);
718 WRITE_ONCE(xso->dev, NULL);
719 xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
720 netdev_put(dev, &xso->dev_tracker);
723 #endif
725 void __xfrm_state_destroy(struct xfrm_state *x, bool sync)
727 WARN_ON(x->km.state != XFRM_STATE_DEAD);
729 if (sync) {
730 synchronize_rcu();
731 ___xfrm_state_destroy(x);
732 } else {
733 spin_lock_bh(&xfrm_state_gc_lock);
734 hlist_add_head(&x->gclist, &xfrm_state_gc_list);
735 spin_unlock_bh(&xfrm_state_gc_lock);
736 schedule_work(&xfrm_state_gc_work);
739 EXPORT_SYMBOL(__xfrm_state_destroy);
741 int __xfrm_state_delete(struct xfrm_state *x)
743 struct net *net = xs_net(x);
744 int err = -ESRCH;
746 if (x->km.state != XFRM_STATE_DEAD) {
747 x->km.state = XFRM_STATE_DEAD;
749 spin_lock(&net->xfrm.xfrm_state_lock);
750 list_del(&x->km.all);
751 hlist_del_rcu(&x->bydst);
752 hlist_del_rcu(&x->bysrc);
753 if (x->km.seq)
754 hlist_del_rcu(&x->byseq);
755 if (!hlist_unhashed(&x->state_cache))
756 hlist_del_rcu(&x->state_cache);
757 if (!hlist_unhashed(&x->state_cache_input))
758 hlist_del_rcu(&x->state_cache_input);
760 if (x->id.spi)
761 hlist_del_rcu(&x->byspi);
762 net->xfrm.state_num--;
763 xfrm_nat_keepalive_state_updated(x);
764 spin_unlock(&net->xfrm.xfrm_state_lock);
766 if (x->encap_sk)
767 sock_put(rcu_dereference_raw(x->encap_sk));
769 xfrm_dev_state_delete(x);
771 /* All xfrm_state objects are created by xfrm_state_alloc.
772 * The xfrm_state_alloc call gives a reference, and that
773 * is what we are dropping here.
775 xfrm_state_put(x);
776 err = 0;
779 return err;
781 EXPORT_SYMBOL(__xfrm_state_delete);
783 int xfrm_state_delete(struct xfrm_state *x)
785 int err;
787 spin_lock_bh(&x->lock);
788 err = __xfrm_state_delete(x);
789 spin_unlock_bh(&x->lock);
791 return err;
793 EXPORT_SYMBOL(xfrm_state_delete);
795 #ifdef CONFIG_SECURITY_NETWORK_XFRM
796 static inline int
797 xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
799 int i, err = 0;
801 for (i = 0; i <= net->xfrm.state_hmask; i++) {
802 struct xfrm_state *x;
804 hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
805 if (xfrm_id_proto_match(x->id.proto, proto) &&
806 (err = security_xfrm_state_delete(x)) != 0) {
807 xfrm_audit_state_delete(x, 0, task_valid);
808 return err;
813 return err;
816 static inline int
817 xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid)
819 int i, err = 0;
821 for (i = 0; i <= net->xfrm.state_hmask; i++) {
822 struct xfrm_state *x;
823 struct xfrm_dev_offload *xso;
825 hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
826 xso = &x->xso;
828 if (xso->dev == dev &&
829 (err = security_xfrm_state_delete(x)) != 0) {
830 xfrm_audit_state_delete(x, 0, task_valid);
831 return err;
836 return err;
838 #else
839 static inline int
840 xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
842 return 0;
845 static inline int
846 xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid)
848 return 0;
850 #endif
852 int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync)
854 int i, err = 0, cnt = 0;
856 spin_lock_bh(&net->xfrm.xfrm_state_lock);
857 err = xfrm_state_flush_secctx_check(net, proto, task_valid);
858 if (err)
859 goto out;
861 err = -ESRCH;
862 for (i = 0; i <= net->xfrm.state_hmask; i++) {
863 struct xfrm_state *x;
864 restart:
865 hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
866 if (!xfrm_state_kern(x) &&
867 xfrm_id_proto_match(x->id.proto, proto)) {
868 xfrm_state_hold(x);
869 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
871 err = xfrm_state_delete(x);
872 xfrm_audit_state_delete(x, err ? 0 : 1,
873 task_valid);
874 if (sync)
875 xfrm_state_put_sync(x);
876 else
877 xfrm_state_put(x);
878 if (!err)
879 cnt++;
881 spin_lock_bh(&net->xfrm.xfrm_state_lock);
882 goto restart;
886 out:
887 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
888 if (cnt)
889 err = 0;
891 return err;
893 EXPORT_SYMBOL(xfrm_state_flush);
895 int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid)
897 struct xfrm_state *x;
898 struct hlist_node *tmp;
899 struct xfrm_dev_offload *xso;
900 int i, err = 0, cnt = 0;
902 spin_lock_bh(&net->xfrm.xfrm_state_lock);
903 err = xfrm_dev_state_flush_secctx_check(net, dev, task_valid);
904 if (err)
905 goto out;
907 err = -ESRCH;
908 for (i = 0; i <= net->xfrm.state_hmask; i++) {
909 restart:
910 hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
911 xso = &x->xso;
913 if (!xfrm_state_kern(x) && xso->dev == dev) {
914 xfrm_state_hold(x);
915 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
917 err = xfrm_state_delete(x);
918 xfrm_dev_state_free(x);
920 xfrm_audit_state_delete(x, err ? 0 : 1,
921 task_valid);
922 xfrm_state_put(x);
923 if (!err)
924 cnt++;
926 spin_lock_bh(&net->xfrm.xfrm_state_lock);
927 goto restart;
931 if (cnt)
932 err = 0;
934 out:
935 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
937 spin_lock_bh(&xfrm_state_dev_gc_lock);
938 restart_gc:
939 hlist_for_each_entry_safe(x, tmp, &xfrm_state_dev_gc_list, dev_gclist) {
940 xso = &x->xso;
942 if (xso->dev == dev) {
943 spin_unlock_bh(&xfrm_state_dev_gc_lock);
944 xfrm_dev_state_free(x);
945 spin_lock_bh(&xfrm_state_dev_gc_lock);
946 goto restart_gc;
950 spin_unlock_bh(&xfrm_state_dev_gc_lock);
952 xfrm_flush_gc();
954 return err;
956 EXPORT_SYMBOL(xfrm_dev_state_flush);
958 void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si)
960 spin_lock_bh(&net->xfrm.xfrm_state_lock);
961 si->sadcnt = net->xfrm.state_num;
962 si->sadhcnt = net->xfrm.state_hmask + 1;
963 si->sadhmcnt = xfrm_state_hashmax;
964 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
966 EXPORT_SYMBOL(xfrm_sad_getinfo);
968 static void
969 __xfrm4_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
971 const struct flowi4 *fl4 = &fl->u.ip4;
973 sel->daddr.a4 = fl4->daddr;
974 sel->saddr.a4 = fl4->saddr;
975 sel->dport = xfrm_flowi_dport(fl, &fl4->uli);
976 sel->dport_mask = htons(0xffff);
977 sel->sport = xfrm_flowi_sport(fl, &fl4->uli);
978 sel->sport_mask = htons(0xffff);
979 sel->family = AF_INET;
980 sel->prefixlen_d = 32;
981 sel->prefixlen_s = 32;
982 sel->proto = fl4->flowi4_proto;
983 sel->ifindex = fl4->flowi4_oif;
986 static void
987 __xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
989 const struct flowi6 *fl6 = &fl->u.ip6;
991 /* Initialize temporary selector matching only to current session. */
992 *(struct in6_addr *)&sel->daddr = fl6->daddr;
993 *(struct in6_addr *)&sel->saddr = fl6->saddr;
994 sel->dport = xfrm_flowi_dport(fl, &fl6->uli);
995 sel->dport_mask = htons(0xffff);
996 sel->sport = xfrm_flowi_sport(fl, &fl6->uli);
997 sel->sport_mask = htons(0xffff);
998 sel->family = AF_INET6;
999 sel->prefixlen_d = 128;
1000 sel->prefixlen_s = 128;
1001 sel->proto = fl6->flowi6_proto;
1002 sel->ifindex = fl6->flowi6_oif;
1005 static void
1006 xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl,
1007 const struct xfrm_tmpl *tmpl,
1008 const xfrm_address_t *daddr, const xfrm_address_t *saddr,
1009 unsigned short family)
1011 switch (family) {
1012 case AF_INET:
1013 __xfrm4_init_tempsel(&x->sel, fl);
1014 break;
1015 case AF_INET6:
1016 __xfrm6_init_tempsel(&x->sel, fl);
1017 break;
1020 x->id = tmpl->id;
1022 switch (tmpl->encap_family) {
1023 case AF_INET:
1024 if (x->id.daddr.a4 == 0)
1025 x->id.daddr.a4 = daddr->a4;
1026 x->props.saddr = tmpl->saddr;
1027 if (x->props.saddr.a4 == 0)
1028 x->props.saddr.a4 = saddr->a4;
1029 break;
1030 case AF_INET6:
1031 if (ipv6_addr_any((struct in6_addr *)&x->id.daddr))
1032 memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr));
1033 memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr));
1034 if (ipv6_addr_any((struct in6_addr *)&x->props.saddr))
1035 memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr));
1036 break;
1039 x->props.mode = tmpl->mode;
1040 x->props.reqid = tmpl->reqid;
1041 x->props.family = tmpl->encap_family;
1044 static struct xfrm_state *__xfrm_state_lookup_all(struct net *net, u32 mark,
1045 const xfrm_address_t *daddr,
1046 __be32 spi, u8 proto,
1047 unsigned short family,
1048 struct xfrm_dev_offload *xdo)
1050 unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
1051 struct xfrm_state *x;
1053 hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
1054 #ifdef CONFIG_XFRM_OFFLOAD
1055 if (xdo->type == XFRM_DEV_OFFLOAD_PACKET) {
1056 if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
1057 /* HW states are in the head of list, there is
1058 * no need to iterate further.
1060 break;
1062 /* Packet offload: both policy and SA should
1063 * have same device.
1065 if (xdo->dev != x->xso.dev)
1066 continue;
1067 } else if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
1068 /* Skip HW policy for SW lookups */
1069 continue;
1070 #endif
1071 if (x->props.family != family ||
1072 x->id.spi != spi ||
1073 x->id.proto != proto ||
1074 !xfrm_addr_equal(&x->id.daddr, daddr, family))
1075 continue;
1077 if ((mark & x->mark.m) != x->mark.v)
1078 continue;
1079 if (!xfrm_state_hold_rcu(x))
1080 continue;
1081 return x;
1084 return NULL;
1087 static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
1088 const xfrm_address_t *daddr,
1089 __be32 spi, u8 proto,
1090 unsigned short family)
1092 unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
1093 struct xfrm_state *x;
1095 hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
1096 if (x->props.family != family ||
1097 x->id.spi != spi ||
1098 x->id.proto != proto ||
1099 !xfrm_addr_equal(&x->id.daddr, daddr, family))
1100 continue;
1102 if ((mark & x->mark.m) != x->mark.v)
1103 continue;
1104 if (!xfrm_state_hold_rcu(x))
1105 continue;
1106 return x;
1109 return NULL;
1112 struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark,
1113 const xfrm_address_t *daddr,
1114 __be32 spi, u8 proto,
1115 unsigned short family)
1117 struct hlist_head *state_cache_input;
1118 struct xfrm_state *x = NULL;
1119 int cpu = get_cpu();
1121 state_cache_input = per_cpu_ptr(net->xfrm.state_cache_input, cpu);
1123 rcu_read_lock();
1124 hlist_for_each_entry_rcu(x, state_cache_input, state_cache_input) {
1125 if (x->props.family != family ||
1126 x->id.spi != spi ||
1127 x->id.proto != proto ||
1128 !xfrm_addr_equal(&x->id.daddr, daddr, family))
1129 continue;
1131 if ((mark & x->mark.m) != x->mark.v)
1132 continue;
1133 if (!xfrm_state_hold_rcu(x))
1134 continue;
1135 goto out;
1138 x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family);
1140 if (x && x->km.state == XFRM_STATE_VALID) {
1141 spin_lock_bh(&net->xfrm.xfrm_state_lock);
1142 if (hlist_unhashed(&x->state_cache_input)) {
1143 hlist_add_head_rcu(&x->state_cache_input, state_cache_input);
1144 } else {
1145 hlist_del_rcu(&x->state_cache_input);
1146 hlist_add_head_rcu(&x->state_cache_input, state_cache_input);
1148 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1151 out:
1152 rcu_read_unlock();
1153 put_cpu();
1154 return x;
1156 EXPORT_SYMBOL(xfrm_input_state_lookup);
1158 static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
1159 const xfrm_address_t *daddr,
1160 const xfrm_address_t *saddr,
1161 u8 proto, unsigned short family)
1163 unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
1164 struct xfrm_state *x;
1166 hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) {
1167 if (x->props.family != family ||
1168 x->id.proto != proto ||
1169 !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
1170 !xfrm_addr_equal(&x->props.saddr, saddr, family))
1171 continue;
1173 if ((mark & x->mark.m) != x->mark.v)
1174 continue;
1175 if (!xfrm_state_hold_rcu(x))
1176 continue;
1177 return x;
1180 return NULL;
1183 static inline struct xfrm_state *
1184 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
1186 struct net *net = xs_net(x);
1187 u32 mark = x->mark.v & x->mark.m;
1189 if (use_spi)
1190 return __xfrm_state_lookup(net, mark, &x->id.daddr,
1191 x->id.spi, x->id.proto, family);
1192 else
1193 return __xfrm_state_lookup_byaddr(net, mark,
1194 &x->id.daddr,
1195 &x->props.saddr,
1196 x->id.proto, family);
1199 static void xfrm_hash_grow_check(struct net *net, int have_hash_collision)
1201 if (have_hash_collision &&
1202 (net->xfrm.state_hmask + 1) < xfrm_state_hashmax &&
1203 net->xfrm.state_num > net->xfrm.state_hmask)
1204 schedule_work(&net->xfrm.state_hash_work);
1207 static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
1208 const struct flowi *fl, unsigned short family,
1209 struct xfrm_state **best, int *acq_in_progress,
1210 int *error)
1212 /* We need the cpu id just as a lookup key,
1213 * we don't require it to be stable.
1215 unsigned int pcpu_id = get_cpu();
1216 put_cpu();
1218 /* Resolution logic:
1219 * 1. There is a valid state with matching selector. Done.
1220 * 2. Valid state with inappropriate selector. Skip.
1222 * Entering area of "sysdeps".
1224 * 3. If state is not valid, selector is temporary, it selects
1225 * only session which triggered previous resolution. Key
1226 * manager will do something to install a state with proper
1227 * selector.
1229 if (x->km.state == XFRM_STATE_VALID) {
1230 if ((x->sel.family &&
1231 (x->sel.family != family ||
1232 !xfrm_selector_match(&x->sel, fl, family))) ||
1233 !security_xfrm_state_pol_flow_match(x, pol,
1234 &fl->u.__fl_common))
1235 return;
1237 if (x->pcpu_num != UINT_MAX && x->pcpu_num != pcpu_id)
1238 return;
1240 if (!*best ||
1241 ((*best)->pcpu_num == UINT_MAX && x->pcpu_num == pcpu_id) ||
1242 (*best)->km.dying > x->km.dying ||
1243 ((*best)->km.dying == x->km.dying &&
1244 (*best)->curlft.add_time < x->curlft.add_time))
1245 *best = x;
1246 } else if (x->km.state == XFRM_STATE_ACQ) {
1247 if (!*best || x->pcpu_num == pcpu_id)
1248 *acq_in_progress = 1;
1249 } else if (x->km.state == XFRM_STATE_ERROR ||
1250 x->km.state == XFRM_STATE_EXPIRED) {
1251 if ((!x->sel.family ||
1252 (x->sel.family == family &&
1253 xfrm_selector_match(&x->sel, fl, family))) &&
1254 security_xfrm_state_pol_flow_match(x, pol,
1255 &fl->u.__fl_common))
1256 *error = -ESRCH;
1260 struct xfrm_state *
1261 xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
1262 const struct flowi *fl, struct xfrm_tmpl *tmpl,
1263 struct xfrm_policy *pol, int *err,
1264 unsigned short family, u32 if_id)
1266 static xfrm_address_t saddr_wildcard = { };
1267 struct net *net = xp_net(pol);
1268 unsigned int h, h_wildcard;
1269 struct xfrm_state *x, *x0, *to_put;
1270 int acquire_in_progress = 0;
1271 int error = 0;
1272 struct xfrm_state *best = NULL;
1273 u32 mark = pol->mark.v & pol->mark.m;
1274 unsigned short encap_family = tmpl->encap_family;
1275 unsigned int sequence;
1276 struct km_event c;
1277 unsigned int pcpu_id;
1278 bool cached = false;
1280 /* We need the cpu id just as a lookup key,
1281 * we don't require it to be stable.
1283 pcpu_id = get_cpu();
1284 put_cpu();
1286 to_put = NULL;
1288 sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
1290 rcu_read_lock();
1291 hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
1292 if (x->props.family == encap_family &&
1293 x->props.reqid == tmpl->reqid &&
1294 (mark & x->mark.m) == x->mark.v &&
1295 x->if_id == if_id &&
1296 !(x->props.flags & XFRM_STATE_WILDRECV) &&
1297 xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
1298 tmpl->mode == x->props.mode &&
1299 tmpl->id.proto == x->id.proto &&
1300 (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
1301 xfrm_state_look_at(pol, x, fl, encap_family,
1302 &best, &acquire_in_progress, &error);
1305 if (best)
1306 goto cached;
1308 hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
1309 if (x->props.family == encap_family &&
1310 x->props.reqid == tmpl->reqid &&
1311 (mark & x->mark.m) == x->mark.v &&
1312 x->if_id == if_id &&
1313 !(x->props.flags & XFRM_STATE_WILDRECV) &&
1314 xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
1315 tmpl->mode == x->props.mode &&
1316 tmpl->id.proto == x->id.proto &&
1317 (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
1318 xfrm_state_look_at(pol, x, fl, family,
1319 &best, &acquire_in_progress, &error);
1322 cached:
1323 cached = true;
1324 if (best)
1325 goto found;
1326 else if (error)
1327 best = NULL;
1328 else if (acquire_in_progress) /* XXX: acquire_in_progress should not happen */
1329 WARN_ON(1);
1331 h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
1332 hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
1333 #ifdef CONFIG_XFRM_OFFLOAD
1334 if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
1335 if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
1336 /* HW states are in the head of list, there is
1337 * no need to iterate further.
1339 break;
1341 /* Packet offload: both policy and SA should
1342 * have same device.
1344 if (pol->xdo.dev != x->xso.dev)
1345 continue;
1346 } else if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
1347 /* Skip HW policy for SW lookups */
1348 continue;
1349 #endif
1350 if (x->props.family == encap_family &&
1351 x->props.reqid == tmpl->reqid &&
1352 (mark & x->mark.m) == x->mark.v &&
1353 x->if_id == if_id &&
1354 !(x->props.flags & XFRM_STATE_WILDRECV) &&
1355 xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
1356 tmpl->mode == x->props.mode &&
1357 tmpl->id.proto == x->id.proto &&
1358 (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
1359 xfrm_state_look_at(pol, x, fl, family,
1360 &best, &acquire_in_progress, &error);
1362 if (best || acquire_in_progress)
1363 goto found;
1365 h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
1366 hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) {
1367 #ifdef CONFIG_XFRM_OFFLOAD
1368 if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
1369 if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
1370 /* HW states are in the head of list, there is
1371 * no need to iterate further.
1373 break;
1375 /* Packet offload: both policy and SA should
1376 * have same device.
1378 if (pol->xdo.dev != x->xso.dev)
1379 continue;
1380 } else if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
1381 /* Skip HW policy for SW lookups */
1382 continue;
1383 #endif
1384 if (x->props.family == encap_family &&
1385 x->props.reqid == tmpl->reqid &&
1386 (mark & x->mark.m) == x->mark.v &&
1387 x->if_id == if_id &&
1388 !(x->props.flags & XFRM_STATE_WILDRECV) &&
1389 xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
1390 tmpl->mode == x->props.mode &&
1391 tmpl->id.proto == x->id.proto &&
1392 (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
1393 xfrm_state_look_at(pol, x, fl, family,
1394 &best, &acquire_in_progress, &error);
1397 found:
1398 if (!(pol->flags & XFRM_POLICY_CPU_ACQUIRE) ||
1399 (best && (best->pcpu_num == pcpu_id)))
1400 x = best;
1402 if (!x && !error && !acquire_in_progress) {
1403 if (tmpl->id.spi &&
1404 (x0 = __xfrm_state_lookup_all(net, mark, daddr,
1405 tmpl->id.spi, tmpl->id.proto,
1406 encap_family,
1407 &pol->xdo)) != NULL) {
1408 to_put = x0;
1409 error = -EEXIST;
1410 goto out;
1413 c.net = net;
1414 /* If the KMs have no listeners (yet...), avoid allocating an SA
1415 * for each and every packet - garbage collection might not
1416 * handle the flood.
1418 if (!km_is_alive(&c)) {
1419 error = -ESRCH;
1420 goto out;
1423 x = xfrm_state_alloc(net);
1424 if (x == NULL) {
1425 error = -ENOMEM;
1426 goto out;
1428 /* Initialize temporary state matching only
1429 * to current session. */
1430 xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family);
1431 memcpy(&x->mark, &pol->mark, sizeof(x->mark));
1432 x->if_id = if_id;
1433 if ((pol->flags & XFRM_POLICY_CPU_ACQUIRE) && best)
1434 x->pcpu_num = pcpu_id;
1436 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid);
1437 if (error) {
1438 x->km.state = XFRM_STATE_DEAD;
1439 to_put = x;
1440 x = NULL;
1441 goto out;
1443 #ifdef CONFIG_XFRM_OFFLOAD
1444 if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
1445 struct xfrm_dev_offload *xdo = &pol->xdo;
1446 struct xfrm_dev_offload *xso = &x->xso;
1448 xso->type = XFRM_DEV_OFFLOAD_PACKET;
1449 xso->dir = xdo->dir;
1450 xso->dev = xdo->dev;
1451 xso->real_dev = xdo->real_dev;
1452 xso->flags = XFRM_DEV_OFFLOAD_FLAG_ACQ;
1453 netdev_hold(xso->dev, &xso->dev_tracker, GFP_ATOMIC);
1454 error = xso->dev->xfrmdev_ops->xdo_dev_state_add(x, NULL);
1455 if (error) {
1456 xso->dir = 0;
1457 netdev_put(xso->dev, &xso->dev_tracker);
1458 xso->dev = NULL;
1459 xso->real_dev = NULL;
1460 xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
1461 x->km.state = XFRM_STATE_DEAD;
1462 to_put = x;
1463 x = NULL;
1464 goto out;
1467 #endif
1468 if (km_query(x, tmpl, pol) == 0) {
1469 spin_lock_bh(&net->xfrm.xfrm_state_lock);
1470 x->km.state = XFRM_STATE_ACQ;
1471 x->dir = XFRM_SA_DIR_OUT;
1472 list_add(&x->km.all, &net->xfrm.state_all);
1473 h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
1474 XFRM_STATE_INSERT(bydst, &x->bydst,
1475 net->xfrm.state_bydst + h,
1476 x->xso.type);
1477 h = xfrm_src_hash(net, daddr, saddr, encap_family);
1478 XFRM_STATE_INSERT(bysrc, &x->bysrc,
1479 net->xfrm.state_bysrc + h,
1480 x->xso.type);
1481 INIT_HLIST_NODE(&x->state_cache);
1482 if (x->id.spi) {
1483 h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
1484 XFRM_STATE_INSERT(byspi, &x->byspi,
1485 net->xfrm.state_byspi + h,
1486 x->xso.type);
1488 if (x->km.seq) {
1489 h = xfrm_seq_hash(net, x->km.seq);
1490 XFRM_STATE_INSERT(byseq, &x->byseq,
1491 net->xfrm.state_byseq + h,
1492 x->xso.type);
1494 x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
1495 hrtimer_start(&x->mtimer,
1496 ktime_set(net->xfrm.sysctl_acq_expires, 0),
1497 HRTIMER_MODE_REL_SOFT);
1498 net->xfrm.state_num++;
1499 xfrm_hash_grow_check(net, x->bydst.next != NULL);
1500 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1501 } else {
1502 #ifdef CONFIG_XFRM_OFFLOAD
1503 struct xfrm_dev_offload *xso = &x->xso;
1505 if (xso->type == XFRM_DEV_OFFLOAD_PACKET) {
1506 xfrm_dev_state_delete(x);
1507 xfrm_dev_state_free(x);
1509 #endif
1510 x->km.state = XFRM_STATE_DEAD;
1511 to_put = x;
1512 x = NULL;
1513 error = -ESRCH;
1516 /* Use the already installed 'fallback' while the CPU-specific
1517 * SA acquire is handled*/
1518 if (best)
1519 x = best;
1521 out:
1522 if (x) {
1523 if (!xfrm_state_hold_rcu(x)) {
1524 *err = -EAGAIN;
1525 x = NULL;
1527 } else {
1528 *err = acquire_in_progress ? -EAGAIN : error;
1531 if (x && x->km.state == XFRM_STATE_VALID && !cached &&
1532 (!(pol->flags & XFRM_POLICY_CPU_ACQUIRE) || x->pcpu_num == pcpu_id)) {
1533 spin_lock_bh(&net->xfrm.xfrm_state_lock);
1534 if (hlist_unhashed(&x->state_cache))
1535 hlist_add_head_rcu(&x->state_cache, &pol->state_cache_list);
1536 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1539 rcu_read_unlock();
1540 if (to_put)
1541 xfrm_state_put(to_put);
1543 if (read_seqcount_retry(&net->xfrm.xfrm_state_hash_generation, sequence)) {
1544 *err = -EAGAIN;
1545 if (x) {
1546 xfrm_state_put(x);
1547 x = NULL;
1551 return x;
1554 struct xfrm_state *
1555 xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
1556 xfrm_address_t *daddr, xfrm_address_t *saddr,
1557 unsigned short family, u8 mode, u8 proto, u32 reqid)
1559 unsigned int h;
1560 struct xfrm_state *rx = NULL, *x = NULL;
1562 spin_lock_bh(&net->xfrm.xfrm_state_lock);
1563 h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
1564 hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1565 if (x->props.family == family &&
1566 x->props.reqid == reqid &&
1567 (mark & x->mark.m) == x->mark.v &&
1568 x->if_id == if_id &&
1569 !(x->props.flags & XFRM_STATE_WILDRECV) &&
1570 xfrm_state_addr_check(x, daddr, saddr, family) &&
1571 mode == x->props.mode &&
1572 proto == x->id.proto &&
1573 x->km.state == XFRM_STATE_VALID) {
1574 rx = x;
1575 break;
1579 if (rx)
1580 xfrm_state_hold(rx);
1581 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1584 return rx;
1586 EXPORT_SYMBOL(xfrm_stateonly_find);
1588 struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi,
1589 unsigned short family)
1591 struct xfrm_state *x;
1592 struct xfrm_state_walk *w;
1594 spin_lock_bh(&net->xfrm.xfrm_state_lock);
1595 list_for_each_entry(w, &net->xfrm.state_all, all) {
1596 x = container_of(w, struct xfrm_state, km);
1597 if (x->props.family != family ||
1598 x->id.spi != spi)
1599 continue;
1601 xfrm_state_hold(x);
1602 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1603 return x;
1605 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1606 return NULL;
1608 EXPORT_SYMBOL(xfrm_state_lookup_byspi);
1610 static void __xfrm_state_insert(struct xfrm_state *x)
1612 struct net *net = xs_net(x);
1613 unsigned int h;
1615 list_add(&x->km.all, &net->xfrm.state_all);
1617 h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
1618 x->props.reqid, x->props.family);
1619 XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h,
1620 x->xso.type);
1622 h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family);
1623 XFRM_STATE_INSERT(bysrc, &x->bysrc, net->xfrm.state_bysrc + h,
1624 x->xso.type);
1626 if (x->id.spi) {
1627 h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto,
1628 x->props.family);
1630 XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h,
1631 x->xso.type);
1634 if (x->km.seq) {
1635 h = xfrm_seq_hash(net, x->km.seq);
1637 XFRM_STATE_INSERT(byseq, &x->byseq, net->xfrm.state_byseq + h,
1638 x->xso.type);
1641 hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT);
1642 if (x->replay_maxage)
1643 mod_timer(&x->rtimer, jiffies + x->replay_maxage);
1645 net->xfrm.state_num++;
1647 xfrm_hash_grow_check(net, x->bydst.next != NULL);
1648 xfrm_nat_keepalive_state_updated(x);
1651 /* net->xfrm.xfrm_state_lock is held */
1652 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
1654 struct net *net = xs_net(xnew);
1655 unsigned short family = xnew->props.family;
1656 u32 reqid = xnew->props.reqid;
1657 struct xfrm_state *x;
1658 unsigned int h;
1659 u32 mark = xnew->mark.v & xnew->mark.m;
1660 u32 if_id = xnew->if_id;
1661 u32 cpu_id = xnew->pcpu_num;
1663 h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
1664 hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1665 if (x->props.family == family &&
1666 x->props.reqid == reqid &&
1667 x->if_id == if_id &&
1668 x->pcpu_num == cpu_id &&
1669 (mark & x->mark.m) == x->mark.v &&
1670 xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) &&
1671 xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family))
1672 x->genid++;
1676 void xfrm_state_insert(struct xfrm_state *x)
1678 struct net *net = xs_net(x);
1680 spin_lock_bh(&net->xfrm.xfrm_state_lock);
1681 __xfrm_state_bump_genids(x);
1682 __xfrm_state_insert(x);
1683 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1685 EXPORT_SYMBOL(xfrm_state_insert);
1687 /* net->xfrm.xfrm_state_lock is held */
1688 static struct xfrm_state *__find_acq_core(struct net *net,
1689 const struct xfrm_mark *m,
1690 unsigned short family, u8 mode,
1691 u32 reqid, u32 if_id, u32 pcpu_num, u8 proto,
1692 const xfrm_address_t *daddr,
1693 const xfrm_address_t *saddr,
1694 int create)
1696 unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
1697 struct xfrm_state *x;
1698 u32 mark = m->v & m->m;
1700 hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1701 if (x->props.reqid != reqid ||
1702 x->props.mode != mode ||
1703 x->props.family != family ||
1704 x->km.state != XFRM_STATE_ACQ ||
1705 x->id.spi != 0 ||
1706 x->id.proto != proto ||
1707 (mark & x->mark.m) != x->mark.v ||
1708 x->pcpu_num != pcpu_num ||
1709 !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
1710 !xfrm_addr_equal(&x->props.saddr, saddr, family))
1711 continue;
1713 xfrm_state_hold(x);
1714 return x;
1717 if (!create)
1718 return NULL;
1720 x = xfrm_state_alloc(net);
1721 if (likely(x)) {
1722 switch (family) {
1723 case AF_INET:
1724 x->sel.daddr.a4 = daddr->a4;
1725 x->sel.saddr.a4 = saddr->a4;
1726 x->sel.prefixlen_d = 32;
1727 x->sel.prefixlen_s = 32;
1728 x->props.saddr.a4 = saddr->a4;
1729 x->id.daddr.a4 = daddr->a4;
1730 break;
1732 case AF_INET6:
1733 x->sel.daddr.in6 = daddr->in6;
1734 x->sel.saddr.in6 = saddr->in6;
1735 x->sel.prefixlen_d = 128;
1736 x->sel.prefixlen_s = 128;
1737 x->props.saddr.in6 = saddr->in6;
1738 x->id.daddr.in6 = daddr->in6;
1739 break;
1742 x->pcpu_num = pcpu_num;
1743 x->km.state = XFRM_STATE_ACQ;
1744 x->id.proto = proto;
1745 x->props.family = family;
1746 x->props.mode = mode;
1747 x->props.reqid = reqid;
1748 x->if_id = if_id;
1749 x->mark.v = m->v;
1750 x->mark.m = m->m;
1751 x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
1752 xfrm_state_hold(x);
1753 hrtimer_start(&x->mtimer,
1754 ktime_set(net->xfrm.sysctl_acq_expires, 0),
1755 HRTIMER_MODE_REL_SOFT);
1756 list_add(&x->km.all, &net->xfrm.state_all);
1757 XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h,
1758 x->xso.type);
1759 h = xfrm_src_hash(net, daddr, saddr, family);
1760 XFRM_STATE_INSERT(bysrc, &x->bysrc, net->xfrm.state_bysrc + h,
1761 x->xso.type);
1763 net->xfrm.state_num++;
1765 xfrm_hash_grow_check(net, x->bydst.next != NULL);
1768 return x;
1771 static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num);
1773 int xfrm_state_add(struct xfrm_state *x)
1775 struct net *net = xs_net(x);
1776 struct xfrm_state *x1, *to_put;
1777 int family;
1778 int err;
1779 u32 mark = x->mark.v & x->mark.m;
1780 int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1782 family = x->props.family;
1784 to_put = NULL;
1786 spin_lock_bh(&net->xfrm.xfrm_state_lock);
1788 x1 = __xfrm_state_locate(x, use_spi, family);
1789 if (x1) {
1790 to_put = x1;
1791 x1 = NULL;
1792 err = -EEXIST;
1793 goto out;
1796 if (use_spi && x->km.seq) {
1797 x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq, x->pcpu_num);
1798 if (x1 && ((x1->id.proto != x->id.proto) ||
1799 !xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) {
1800 to_put = x1;
1801 x1 = NULL;
1805 if (use_spi && !x1)
1806 x1 = __find_acq_core(net, &x->mark, family, x->props.mode,
1807 x->props.reqid, x->if_id, x->pcpu_num, x->id.proto,
1808 &x->id.daddr, &x->props.saddr, 0);
1810 __xfrm_state_bump_genids(x);
1811 __xfrm_state_insert(x);
1812 err = 0;
1814 out:
1815 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1817 if (x1) {
1818 xfrm_state_delete(x1);
1819 xfrm_state_put(x1);
1822 if (to_put)
1823 xfrm_state_put(to_put);
1825 return err;
1827 EXPORT_SYMBOL(xfrm_state_add);
1829 #ifdef CONFIG_XFRM_MIGRATE
1830 static inline int clone_security(struct xfrm_state *x, struct xfrm_sec_ctx *security)
1832 struct xfrm_user_sec_ctx *uctx;
1833 int size = sizeof(*uctx) + security->ctx_len;
1834 int err;
1836 uctx = kmalloc(size, GFP_KERNEL);
1837 if (!uctx)
1838 return -ENOMEM;
1840 uctx->exttype = XFRMA_SEC_CTX;
1841 uctx->len = size;
1842 uctx->ctx_doi = security->ctx_doi;
1843 uctx->ctx_alg = security->ctx_alg;
1844 uctx->ctx_len = security->ctx_len;
1845 memcpy(uctx + 1, security->ctx_str, security->ctx_len);
1846 err = security_xfrm_state_alloc(x, uctx);
1847 kfree(uctx);
1848 if (err)
1849 return err;
1851 return 0;
1854 static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
1855 struct xfrm_encap_tmpl *encap)
1857 struct net *net = xs_net(orig);
1858 struct xfrm_state *x = xfrm_state_alloc(net);
1859 if (!x)
1860 goto out;
1862 memcpy(&x->id, &orig->id, sizeof(x->id));
1863 memcpy(&x->sel, &orig->sel, sizeof(x->sel));
1864 memcpy(&x->lft, &orig->lft, sizeof(x->lft));
1865 x->props.mode = orig->props.mode;
1866 x->props.replay_window = orig->props.replay_window;
1867 x->props.reqid = orig->props.reqid;
1868 x->props.family = orig->props.family;
1869 x->props.saddr = orig->props.saddr;
1871 if (orig->aalg) {
1872 x->aalg = xfrm_algo_auth_clone(orig->aalg);
1873 if (!x->aalg)
1874 goto error;
1876 x->props.aalgo = orig->props.aalgo;
1878 if (orig->aead) {
1879 x->aead = xfrm_algo_aead_clone(orig->aead);
1880 x->geniv = orig->geniv;
1881 if (!x->aead)
1882 goto error;
1884 if (orig->ealg) {
1885 x->ealg = xfrm_algo_clone(orig->ealg);
1886 if (!x->ealg)
1887 goto error;
1889 x->props.ealgo = orig->props.ealgo;
1891 if (orig->calg) {
1892 x->calg = xfrm_algo_clone(orig->calg);
1893 if (!x->calg)
1894 goto error;
1896 x->props.calgo = orig->props.calgo;
1898 if (encap || orig->encap) {
1899 if (encap)
1900 x->encap = kmemdup(encap, sizeof(*x->encap),
1901 GFP_KERNEL);
1902 else
1903 x->encap = kmemdup(orig->encap, sizeof(*x->encap),
1904 GFP_KERNEL);
1906 if (!x->encap)
1907 goto error;
1910 if (orig->security)
1911 if (clone_security(x, orig->security))
1912 goto error;
1914 if (orig->coaddr) {
1915 x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr),
1916 GFP_KERNEL);
1917 if (!x->coaddr)
1918 goto error;
1921 if (orig->replay_esn) {
1922 if (xfrm_replay_clone(x, orig))
1923 goto error;
1926 memcpy(&x->mark, &orig->mark, sizeof(x->mark));
1927 memcpy(&x->props.smark, &orig->props.smark, sizeof(x->props.smark));
1929 x->props.flags = orig->props.flags;
1930 x->props.extra_flags = orig->props.extra_flags;
1932 x->pcpu_num = orig->pcpu_num;
1933 x->if_id = orig->if_id;
1934 x->tfcpad = orig->tfcpad;
1935 x->replay_maxdiff = orig->replay_maxdiff;
1936 x->replay_maxage = orig->replay_maxage;
1937 memcpy(&x->curlft, &orig->curlft, sizeof(x->curlft));
1938 x->km.state = orig->km.state;
1939 x->km.seq = orig->km.seq;
1940 x->replay = orig->replay;
1941 x->preplay = orig->preplay;
1942 x->mapping_maxage = orig->mapping_maxage;
1943 x->lastused = orig->lastused;
1944 x->new_mapping = 0;
1945 x->new_mapping_sport = 0;
1946 x->dir = orig->dir;
1948 return x;
1950 error:
1951 xfrm_state_put(x);
1952 out:
1953 return NULL;
1956 struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net,
1957 u32 if_id)
1959 unsigned int h;
1960 struct xfrm_state *x = NULL;
1962 spin_lock_bh(&net->xfrm.xfrm_state_lock);
1964 if (m->reqid) {
1965 h = xfrm_dst_hash(net, &m->old_daddr, &m->old_saddr,
1966 m->reqid, m->old_family);
1967 hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1968 if (x->props.mode != m->mode ||
1969 x->id.proto != m->proto)
1970 continue;
1971 if (m->reqid && x->props.reqid != m->reqid)
1972 continue;
1973 if (if_id != 0 && x->if_id != if_id)
1974 continue;
1975 if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
1976 m->old_family) ||
1977 !xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
1978 m->old_family))
1979 continue;
1980 xfrm_state_hold(x);
1981 break;
1983 } else {
1984 h = xfrm_src_hash(net, &m->old_daddr, &m->old_saddr,
1985 m->old_family);
1986 hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) {
1987 if (x->props.mode != m->mode ||
1988 x->id.proto != m->proto)
1989 continue;
1990 if (if_id != 0 && x->if_id != if_id)
1991 continue;
1992 if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
1993 m->old_family) ||
1994 !xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
1995 m->old_family))
1996 continue;
1997 xfrm_state_hold(x);
1998 break;
2002 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
2004 return x;
2006 EXPORT_SYMBOL(xfrm_migrate_state_find);
2008 struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
2009 struct xfrm_migrate *m,
2010 struct xfrm_encap_tmpl *encap)
2012 struct xfrm_state *xc;
2014 xc = xfrm_state_clone(x, encap);
2015 if (!xc)
2016 return NULL;
2018 xc->props.family = m->new_family;
2020 if (xfrm_init_state(xc) < 0)
2021 goto error;
2023 memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
2024 memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
2026 /* add state */
2027 if (xfrm_addr_equal(&x->id.daddr, &m->new_daddr, m->new_family)) {
2028 /* a care is needed when the destination address of the
2029 state is to be updated as it is a part of triplet */
2030 xfrm_state_insert(xc);
2031 } else {
2032 if (xfrm_state_add(xc) < 0)
2033 goto error;
2036 return xc;
2037 error:
2038 xfrm_state_put(xc);
2039 return NULL;
2041 EXPORT_SYMBOL(xfrm_state_migrate);
2042 #endif
2044 int xfrm_state_update(struct xfrm_state *x)
2046 struct xfrm_state *x1, *to_put;
2047 int err;
2048 int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
2049 struct net *net = xs_net(x);
2051 to_put = NULL;
2053 spin_lock_bh(&net->xfrm.xfrm_state_lock);
2054 x1 = __xfrm_state_locate(x, use_spi, x->props.family);
2056 err = -ESRCH;
2057 if (!x1)
2058 goto out;
2060 if (xfrm_state_kern(x1)) {
2061 to_put = x1;
2062 err = -EEXIST;
2063 goto out;
2066 if (x1->km.state == XFRM_STATE_ACQ) {
2067 if (x->dir && x1->dir != x->dir)
2068 goto out;
2070 __xfrm_state_insert(x);
2071 x = NULL;
2072 } else {
2073 if (x1->dir != x->dir)
2074 goto out;
2076 err = 0;
2078 out:
2079 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
2081 if (to_put)
2082 xfrm_state_put(to_put);
2084 if (err)
2085 return err;
2087 if (!x) {
2088 xfrm_state_delete(x1);
2089 xfrm_state_put(x1);
2090 return 0;
2093 err = -EINVAL;
2094 spin_lock_bh(&x1->lock);
2095 if (likely(x1->km.state == XFRM_STATE_VALID)) {
2096 if (x->encap && x1->encap &&
2097 x->encap->encap_type == x1->encap->encap_type)
2098 memcpy(x1->encap, x->encap, sizeof(*x1->encap));
2099 else if (x->encap || x1->encap)
2100 goto fail;
2102 if (x->coaddr && x1->coaddr) {
2103 memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
2105 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
2106 memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
2107 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
2108 x1->km.dying = 0;
2110 hrtimer_start(&x1->mtimer, ktime_set(1, 0),
2111 HRTIMER_MODE_REL_SOFT);
2112 if (READ_ONCE(x1->curlft.use_time))
2113 xfrm_state_check_expire(x1);
2115 if (x->props.smark.m || x->props.smark.v || x->if_id) {
2116 spin_lock_bh(&net->xfrm.xfrm_state_lock);
2118 if (x->props.smark.m || x->props.smark.v)
2119 x1->props.smark = x->props.smark;
2121 if (x->if_id)
2122 x1->if_id = x->if_id;
2124 __xfrm_state_bump_genids(x1);
2125 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
2128 err = 0;
2129 x->km.state = XFRM_STATE_DEAD;
2130 __xfrm_state_put(x);
2133 fail:
2134 spin_unlock_bh(&x1->lock);
2136 xfrm_state_put(x1);
2138 return err;
2140 EXPORT_SYMBOL(xfrm_state_update);
2142 int xfrm_state_check_expire(struct xfrm_state *x)
2144 xfrm_dev_state_update_stats(x);
2146 if (!READ_ONCE(x->curlft.use_time))
2147 WRITE_ONCE(x->curlft.use_time, ktime_get_real_seconds());
2149 if (x->curlft.bytes >= x->lft.hard_byte_limit ||
2150 x->curlft.packets >= x->lft.hard_packet_limit) {
2151 x->km.state = XFRM_STATE_EXPIRED;
2152 hrtimer_start(&x->mtimer, 0, HRTIMER_MODE_REL_SOFT);
2153 return -EINVAL;
2156 if (!x->km.dying &&
2157 (x->curlft.bytes >= x->lft.soft_byte_limit ||
2158 x->curlft.packets >= x->lft.soft_packet_limit)) {
2159 x->km.dying = 1;
2160 km_state_expired(x, 0, 0);
2162 return 0;
2164 EXPORT_SYMBOL(xfrm_state_check_expire);
2166 void xfrm_state_update_stats(struct net *net)
2168 struct xfrm_state *x;
2169 int i;
2171 spin_lock_bh(&net->xfrm.xfrm_state_lock);
2172 for (i = 0; i <= net->xfrm.state_hmask; i++) {
2173 hlist_for_each_entry(x, net->xfrm.state_bydst + i, bydst)
2174 xfrm_dev_state_update_stats(x);
2176 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
2179 struct xfrm_state *
2180 xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi,
2181 u8 proto, unsigned short family)
2183 struct xfrm_state *x;
2185 rcu_read_lock();
2186 x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family);
2187 rcu_read_unlock();
2188 return x;
2190 EXPORT_SYMBOL(xfrm_state_lookup);
2192 struct xfrm_state *
2193 xfrm_state_lookup_byaddr(struct net *net, u32 mark,
2194 const xfrm_address_t *daddr, const xfrm_address_t *saddr,
2195 u8 proto, unsigned short family)
2197 struct xfrm_state *x;
2199 spin_lock_bh(&net->xfrm.xfrm_state_lock);
2200 x = __xfrm_state_lookup_byaddr(net, mark, daddr, saddr, proto, family);
2201 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
2202 return x;
2204 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
2206 struct xfrm_state *
2207 xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
2208 u32 if_id, u32 pcpu_num, u8 proto, const xfrm_address_t *daddr,
2209 const xfrm_address_t *saddr, int create, unsigned short family)
2211 struct xfrm_state *x;
2213 spin_lock_bh(&net->xfrm.xfrm_state_lock);
2214 x = __find_acq_core(net, mark, family, mode, reqid, if_id, pcpu_num,
2215 proto, daddr, saddr, create);
2216 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
2218 return x;
2220 EXPORT_SYMBOL(xfrm_find_acq);
2222 #ifdef CONFIG_XFRM_SUB_POLICY
2223 #if IS_ENABLED(CONFIG_IPV6)
2224 /* distribution counting sort function for xfrm_state and xfrm_tmpl */
2225 static void
2226 __xfrm6_sort(void **dst, void **src, int n,
2227 int (*cmp)(const void *p), int maxclass)
2229 int count[XFRM_MAX_DEPTH] = { };
2230 int class[XFRM_MAX_DEPTH];
2231 int i;
2233 for (i = 0; i < n; i++) {
2234 int c = cmp(src[i]);
2236 class[i] = c;
2237 count[c]++;
2240 for (i = 2; i < maxclass; i++)
2241 count[i] += count[i - 1];
2243 for (i = 0; i < n; i++) {
2244 dst[count[class[i] - 1]++] = src[i];
2245 src[i] = NULL;
2249 /* Rule for xfrm_state:
2251 * rule 1: select IPsec transport except AH
2252 * rule 2: select MIPv6 RO or inbound trigger
2253 * rule 3: select IPsec transport AH
2254 * rule 4: select IPsec tunnel
2255 * rule 5: others
2257 static int __xfrm6_state_sort_cmp(const void *p)
2259 const struct xfrm_state *v = p;
2261 switch (v->props.mode) {
2262 case XFRM_MODE_TRANSPORT:
2263 if (v->id.proto != IPPROTO_AH)
2264 return 1;
2265 else
2266 return 3;
2267 #if IS_ENABLED(CONFIG_IPV6_MIP6)
2268 case XFRM_MODE_ROUTEOPTIMIZATION:
2269 case XFRM_MODE_IN_TRIGGER:
2270 return 2;
2271 #endif
2272 case XFRM_MODE_TUNNEL:
2273 case XFRM_MODE_BEET:
2274 return 4;
2276 return 5;
2279 /* Rule for xfrm_tmpl:
2281 * rule 1: select IPsec transport
2282 * rule 2: select MIPv6 RO or inbound trigger
2283 * rule 3: select IPsec tunnel
2284 * rule 4: others
2286 static int __xfrm6_tmpl_sort_cmp(const void *p)
2288 const struct xfrm_tmpl *v = p;
2290 switch (v->mode) {
2291 case XFRM_MODE_TRANSPORT:
2292 return 1;
2293 #if IS_ENABLED(CONFIG_IPV6_MIP6)
2294 case XFRM_MODE_ROUTEOPTIMIZATION:
2295 case XFRM_MODE_IN_TRIGGER:
2296 return 2;
2297 #endif
2298 case XFRM_MODE_TUNNEL:
2299 case XFRM_MODE_BEET:
2300 return 3;
2302 return 4;
2304 #else
2305 static inline int __xfrm6_state_sort_cmp(const void *p) { return 5; }
2306 static inline int __xfrm6_tmpl_sort_cmp(const void *p) { return 4; }
2308 static inline void
2309 __xfrm6_sort(void **dst, void **src, int n,
2310 int (*cmp)(const void *p), int maxclass)
2312 int i;
2314 for (i = 0; i < n; i++)
2315 dst[i] = src[i];
2317 #endif /* CONFIG_IPV6 */
2319 void
2320 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
2321 unsigned short family)
2323 int i;
2325 if (family == AF_INET6)
2326 __xfrm6_sort((void **)dst, (void **)src, n,
2327 __xfrm6_tmpl_sort_cmp, 5);
2328 else
2329 for (i = 0; i < n; i++)
2330 dst[i] = src[i];
2333 void
2334 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
2335 unsigned short family)
2337 int i;
2339 if (family == AF_INET6)
2340 __xfrm6_sort((void **)dst, (void **)src, n,
2341 __xfrm6_state_sort_cmp, 6);
2342 else
2343 for (i = 0; i < n; i++)
2344 dst[i] = src[i];
2346 #endif
2348 /* Silly enough, but I'm lazy to build resolution list */
2350 static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num)
2352 unsigned int h = xfrm_seq_hash(net, seq);
2353 struct xfrm_state *x;
2355 hlist_for_each_entry_rcu(x, net->xfrm.state_byseq + h, byseq) {
2356 if (x->km.seq == seq &&
2357 (mark & x->mark.m) == x->mark.v &&
2358 x->pcpu_num == pcpu_num &&
2359 x->km.state == XFRM_STATE_ACQ) {
2360 xfrm_state_hold(x);
2361 return x;
2365 return NULL;
2368 struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq, u32 pcpu_num)
2370 struct xfrm_state *x;
2372 spin_lock_bh(&net->xfrm.xfrm_state_lock);
2373 x = __xfrm_find_acq_byseq(net, mark, seq, pcpu_num);
2374 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
2375 return x;
2377 EXPORT_SYMBOL(xfrm_find_acq_byseq);
2379 u32 xfrm_get_acqseq(void)
2381 u32 res;
2382 static atomic_t acqseq;
2384 do {
2385 res = atomic_inc_return(&acqseq);
2386 } while (!res);
2388 return res;
2390 EXPORT_SYMBOL(xfrm_get_acqseq);
2392 int verify_spi_info(u8 proto, u32 min, u32 max, struct netlink_ext_ack *extack)
2394 switch (proto) {
2395 case IPPROTO_AH:
2396 case IPPROTO_ESP:
2397 break;
2399 case IPPROTO_COMP:
2400 /* IPCOMP spi is 16-bits. */
2401 if (max >= 0x10000) {
2402 NL_SET_ERR_MSG(extack, "IPCOMP SPI must be <= 65535");
2403 return -EINVAL;
2405 break;
2407 default:
2408 NL_SET_ERR_MSG(extack, "Invalid protocol, must be one of AH, ESP, IPCOMP");
2409 return -EINVAL;
2412 if (min > max) {
2413 NL_SET_ERR_MSG(extack, "Invalid SPI range: min > max");
2414 return -EINVAL;
2417 return 0;
2419 EXPORT_SYMBOL(verify_spi_info);
2421 int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high,
2422 struct netlink_ext_ack *extack)
2424 struct net *net = xs_net(x);
2425 unsigned int h;
2426 struct xfrm_state *x0;
2427 int err = -ENOENT;
2428 __be32 minspi = htonl(low);
2429 __be32 maxspi = htonl(high);
2430 __be32 newspi = 0;
2431 u32 mark = x->mark.v & x->mark.m;
2433 spin_lock_bh(&x->lock);
2434 if (x->km.state == XFRM_STATE_DEAD) {
2435 NL_SET_ERR_MSG(extack, "Target ACQUIRE is in DEAD state");
2436 goto unlock;
2439 err = 0;
2440 if (x->id.spi)
2441 goto unlock;
2443 err = -ENOENT;
2445 if (minspi == maxspi) {
2446 x0 = xfrm_state_lookup(net, mark, &x->id.daddr, minspi, x->id.proto, x->props.family);
2447 if (x0) {
2448 NL_SET_ERR_MSG(extack, "Requested SPI is already in use");
2449 xfrm_state_put(x0);
2450 goto unlock;
2452 newspi = minspi;
2453 } else {
2454 u32 spi = 0;
2455 for (h = 0; h < high-low+1; h++) {
2456 spi = get_random_u32_inclusive(low, high);
2457 x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family);
2458 if (x0 == NULL) {
2459 newspi = htonl(spi);
2460 break;
2462 xfrm_state_put(x0);
2465 if (newspi) {
2466 spin_lock_bh(&net->xfrm.xfrm_state_lock);
2467 x->id.spi = newspi;
2468 h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
2469 XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h,
2470 x->xso.type);
2471 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
2473 err = 0;
2474 } else {
2475 NL_SET_ERR_MSG(extack, "No SPI available in the requested range");
2478 unlock:
2479 spin_unlock_bh(&x->lock);
2481 return err;
2483 EXPORT_SYMBOL(xfrm_alloc_spi);
2485 static bool __xfrm_state_filter_match(struct xfrm_state *x,
2486 struct xfrm_address_filter *filter)
2488 if (filter) {
2489 if ((filter->family == AF_INET ||
2490 filter->family == AF_INET6) &&
2491 x->props.family != filter->family)
2492 return false;
2494 return addr_match(&x->props.saddr, &filter->saddr,
2495 filter->splen) &&
2496 addr_match(&x->id.daddr, &filter->daddr,
2497 filter->dplen);
2499 return true;
2502 int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
2503 int (*func)(struct xfrm_state *, int, void*),
2504 void *data)
2506 struct xfrm_state *state;
2507 struct xfrm_state_walk *x;
2508 int err = 0;
2510 if (walk->seq != 0 && list_empty(&walk->all))
2511 return 0;
2513 spin_lock_bh(&net->xfrm.xfrm_state_lock);
2514 if (list_empty(&walk->all))
2515 x = list_first_entry(&net->xfrm.state_all, struct xfrm_state_walk, all);
2516 else
2517 x = list_first_entry(&walk->all, struct xfrm_state_walk, all);
2518 list_for_each_entry_from(x, &net->xfrm.state_all, all) {
2519 if (x->state == XFRM_STATE_DEAD)
2520 continue;
2521 state = container_of(x, struct xfrm_state, km);
2522 if (!xfrm_id_proto_match(state->id.proto, walk->proto))
2523 continue;
2524 if (!__xfrm_state_filter_match(state, walk->filter))
2525 continue;
2526 err = func(state, walk->seq, data);
2527 if (err) {
2528 list_move_tail(&walk->all, &x->all);
2529 goto out;
2531 walk->seq++;
2533 if (walk->seq == 0) {
2534 err = -ENOENT;
2535 goto out;
2537 list_del_init(&walk->all);
2538 out:
2539 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
2540 return err;
2542 EXPORT_SYMBOL(xfrm_state_walk);
2544 void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto,
2545 struct xfrm_address_filter *filter)
2547 INIT_LIST_HEAD(&walk->all);
2548 walk->proto = proto;
2549 walk->state = XFRM_STATE_DEAD;
2550 walk->seq = 0;
2551 walk->filter = filter;
2553 EXPORT_SYMBOL(xfrm_state_walk_init);
2555 void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net)
2557 kfree(walk->filter);
2559 if (list_empty(&walk->all))
2560 return;
2562 spin_lock_bh(&net->xfrm.xfrm_state_lock);
2563 list_del(&walk->all);
2564 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
2566 EXPORT_SYMBOL(xfrm_state_walk_done);
2568 static void xfrm_replay_timer_handler(struct timer_list *t)
2570 struct xfrm_state *x = from_timer(x, t, rtimer);
2572 spin_lock(&x->lock);
2574 if (x->km.state == XFRM_STATE_VALID) {
2575 if (xfrm_aevent_is_on(xs_net(x)))
2576 xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
2577 else
2578 x->xflags |= XFRM_TIME_DEFER;
2581 spin_unlock(&x->lock);
2584 static LIST_HEAD(xfrm_km_list);
2586 void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
2588 struct xfrm_mgr *km;
2590 rcu_read_lock();
2591 list_for_each_entry_rcu(km, &xfrm_km_list, list)
2592 if (km->notify_policy)
2593 km->notify_policy(xp, dir, c);
2594 rcu_read_unlock();
2597 void km_state_notify(struct xfrm_state *x, const struct km_event *c)
2599 struct xfrm_mgr *km;
2600 rcu_read_lock();
2601 list_for_each_entry_rcu(km, &xfrm_km_list, list)
2602 if (km->notify)
2603 km->notify(x, c);
2604 rcu_read_unlock();
2607 EXPORT_SYMBOL(km_policy_notify);
2608 EXPORT_SYMBOL(km_state_notify);
2610 void km_state_expired(struct xfrm_state *x, int hard, u32 portid)
2612 struct km_event c;
2614 c.data.hard = hard;
2615 c.portid = portid;
2616 c.event = XFRM_MSG_EXPIRE;
2617 km_state_notify(x, &c);
2620 EXPORT_SYMBOL(km_state_expired);
2622 * We send to all registered managers regardless of failure
2623 * We are happy with one success
2625 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
2627 int err = -EINVAL, acqret;
2628 struct xfrm_mgr *km;
2630 rcu_read_lock();
2631 list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2632 acqret = km->acquire(x, t, pol);
2633 if (!acqret)
2634 err = acqret;
2636 rcu_read_unlock();
2637 return err;
2639 EXPORT_SYMBOL(km_query);
2641 static int __km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
2643 int err = -EINVAL;
2644 struct xfrm_mgr *km;
2646 rcu_read_lock();
2647 list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2648 if (km->new_mapping)
2649 err = km->new_mapping(x, ipaddr, sport);
2650 if (!err)
2651 break;
2653 rcu_read_unlock();
2654 return err;
2657 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
2659 int ret = 0;
2661 if (x->mapping_maxage) {
2662 if ((jiffies / HZ - x->new_mapping) > x->mapping_maxage ||
2663 x->new_mapping_sport != sport) {
2664 x->new_mapping_sport = sport;
2665 x->new_mapping = jiffies / HZ;
2666 ret = __km_new_mapping(x, ipaddr, sport);
2668 } else {
2669 ret = __km_new_mapping(x, ipaddr, sport);
2672 return ret;
2674 EXPORT_SYMBOL(km_new_mapping);
2676 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid)
2678 struct km_event c;
2680 c.data.hard = hard;
2681 c.portid = portid;
2682 c.event = XFRM_MSG_POLEXPIRE;
2683 km_policy_notify(pol, dir, &c);
2685 EXPORT_SYMBOL(km_policy_expired);
2687 #ifdef CONFIG_XFRM_MIGRATE
2688 int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
2689 const struct xfrm_migrate *m, int num_migrate,
2690 const struct xfrm_kmaddress *k,
2691 const struct xfrm_encap_tmpl *encap)
2693 int err = -EINVAL;
2694 int ret;
2695 struct xfrm_mgr *km;
2697 rcu_read_lock();
2698 list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2699 if (km->migrate) {
2700 ret = km->migrate(sel, dir, type, m, num_migrate, k,
2701 encap);
2702 if (!ret)
2703 err = ret;
2706 rcu_read_unlock();
2707 return err;
2709 EXPORT_SYMBOL(km_migrate);
2710 #endif
2712 int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
2714 int err = -EINVAL;
2715 int ret;
2716 struct xfrm_mgr *km;
2718 rcu_read_lock();
2719 list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2720 if (km->report) {
2721 ret = km->report(net, proto, sel, addr);
2722 if (!ret)
2723 err = ret;
2726 rcu_read_unlock();
2727 return err;
2729 EXPORT_SYMBOL(km_report);
2731 static bool km_is_alive(const struct km_event *c)
2733 struct xfrm_mgr *km;
2734 bool is_alive = false;
2736 rcu_read_lock();
2737 list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2738 if (km->is_alive && km->is_alive(c)) {
2739 is_alive = true;
2740 break;
2743 rcu_read_unlock();
2745 return is_alive;
2748 #if IS_ENABLED(CONFIG_XFRM_USER_COMPAT)
2749 static DEFINE_SPINLOCK(xfrm_translator_lock);
2750 static struct xfrm_translator __rcu *xfrm_translator;
2752 struct xfrm_translator *xfrm_get_translator(void)
2754 struct xfrm_translator *xtr;
2756 rcu_read_lock();
2757 xtr = rcu_dereference(xfrm_translator);
2758 if (unlikely(!xtr))
2759 goto out;
2760 if (!try_module_get(xtr->owner))
2761 xtr = NULL;
2762 out:
2763 rcu_read_unlock();
2764 return xtr;
2766 EXPORT_SYMBOL_GPL(xfrm_get_translator);
2768 void xfrm_put_translator(struct xfrm_translator *xtr)
2770 module_put(xtr->owner);
2772 EXPORT_SYMBOL_GPL(xfrm_put_translator);
2774 int xfrm_register_translator(struct xfrm_translator *xtr)
2776 int err = 0;
2778 spin_lock_bh(&xfrm_translator_lock);
2779 if (unlikely(xfrm_translator != NULL))
2780 err = -EEXIST;
2781 else
2782 rcu_assign_pointer(xfrm_translator, xtr);
2783 spin_unlock_bh(&xfrm_translator_lock);
2785 return err;
2787 EXPORT_SYMBOL_GPL(xfrm_register_translator);
2789 int xfrm_unregister_translator(struct xfrm_translator *xtr)
2791 int err = 0;
2793 spin_lock_bh(&xfrm_translator_lock);
2794 if (likely(xfrm_translator != NULL)) {
2795 if (rcu_access_pointer(xfrm_translator) != xtr)
2796 err = -EINVAL;
2797 else
2798 RCU_INIT_POINTER(xfrm_translator, NULL);
2800 spin_unlock_bh(&xfrm_translator_lock);
2801 synchronize_rcu();
2803 return err;
2805 EXPORT_SYMBOL_GPL(xfrm_unregister_translator);
2806 #endif
2808 int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, int optlen)
2810 int err;
2811 u8 *data;
2812 struct xfrm_mgr *km;
2813 struct xfrm_policy *pol = NULL;
2815 if (sockptr_is_null(optval) && !optlen) {
2816 xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL);
2817 xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL);
2818 __sk_dst_reset(sk);
2819 return 0;
2822 if (optlen <= 0 || optlen > PAGE_SIZE)
2823 return -EMSGSIZE;
2825 data = memdup_sockptr(optval, optlen);
2826 if (IS_ERR(data))
2827 return PTR_ERR(data);
2829 if (in_compat_syscall()) {
2830 struct xfrm_translator *xtr = xfrm_get_translator();
2832 if (!xtr) {
2833 kfree(data);
2834 return -EOPNOTSUPP;
2837 err = xtr->xlate_user_policy_sockptr(&data, optlen);
2838 xfrm_put_translator(xtr);
2839 if (err) {
2840 kfree(data);
2841 return err;
2845 err = -EINVAL;
2846 rcu_read_lock();
2847 list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2848 pol = km->compile_policy(sk, optname, data,
2849 optlen, &err);
2850 if (err >= 0)
2851 break;
2853 rcu_read_unlock();
2855 if (err >= 0) {
2856 xfrm_sk_policy_insert(sk, err, pol);
2857 xfrm_pol_put(pol);
2858 __sk_dst_reset(sk);
2859 err = 0;
2862 kfree(data);
2863 return err;
2865 EXPORT_SYMBOL(xfrm_user_policy);
2867 static DEFINE_SPINLOCK(xfrm_km_lock);
2869 void xfrm_register_km(struct xfrm_mgr *km)
2871 spin_lock_bh(&xfrm_km_lock);
2872 list_add_tail_rcu(&km->list, &xfrm_km_list);
2873 spin_unlock_bh(&xfrm_km_lock);
2875 EXPORT_SYMBOL(xfrm_register_km);
2877 void xfrm_unregister_km(struct xfrm_mgr *km)
2879 spin_lock_bh(&xfrm_km_lock);
2880 list_del_rcu(&km->list);
2881 spin_unlock_bh(&xfrm_km_lock);
2882 synchronize_rcu();
2884 EXPORT_SYMBOL(xfrm_unregister_km);
2886 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
2888 int err = 0;
2890 if (WARN_ON(afinfo->family >= NPROTO))
2891 return -EAFNOSUPPORT;
2893 spin_lock_bh(&xfrm_state_afinfo_lock);
2894 if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
2895 err = -EEXIST;
2896 else
2897 rcu_assign_pointer(xfrm_state_afinfo[afinfo->family], afinfo);
2898 spin_unlock_bh(&xfrm_state_afinfo_lock);
2899 return err;
2901 EXPORT_SYMBOL(xfrm_state_register_afinfo);
2903 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
2905 int err = 0, family = afinfo->family;
2907 if (WARN_ON(family >= NPROTO))
2908 return -EAFNOSUPPORT;
2910 spin_lock_bh(&xfrm_state_afinfo_lock);
2911 if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
2912 if (rcu_access_pointer(xfrm_state_afinfo[family]) != afinfo)
2913 err = -EINVAL;
2914 else
2915 RCU_INIT_POINTER(xfrm_state_afinfo[afinfo->family], NULL);
2917 spin_unlock_bh(&xfrm_state_afinfo_lock);
2918 synchronize_rcu();
2919 return err;
2921 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
2923 struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family)
2925 if (unlikely(family >= NPROTO))
2926 return NULL;
2928 return rcu_dereference(xfrm_state_afinfo[family]);
2930 EXPORT_SYMBOL_GPL(xfrm_state_afinfo_get_rcu);
2932 struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
2934 struct xfrm_state_afinfo *afinfo;
2935 if (unlikely(family >= NPROTO))
2936 return NULL;
2937 rcu_read_lock();
2938 afinfo = rcu_dereference(xfrm_state_afinfo[family]);
2939 if (unlikely(!afinfo))
2940 rcu_read_unlock();
2941 return afinfo;
2944 void xfrm_flush_gc(void)
2946 flush_work(&xfrm_state_gc_work);
2948 EXPORT_SYMBOL(xfrm_flush_gc);
2950 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
2951 void xfrm_state_delete_tunnel(struct xfrm_state *x)
2953 if (x->tunnel) {
2954 struct xfrm_state *t = x->tunnel;
2956 if (atomic_read(&t->tunnel_users) == 2)
2957 xfrm_state_delete(t);
2958 atomic_dec(&t->tunnel_users);
2959 xfrm_state_put_sync(t);
2960 x->tunnel = NULL;
2963 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
2965 u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
2967 const struct xfrm_type *type = READ_ONCE(x->type);
2968 struct crypto_aead *aead;
2969 u32 blksize, net_adj = 0;
2971 if (x->km.state != XFRM_STATE_VALID ||
2972 !type || type->proto != IPPROTO_ESP)
2973 return mtu - x->props.header_len;
2975 aead = x->data;
2976 blksize = ALIGN(crypto_aead_blocksize(aead), 4);
2978 switch (x->props.mode) {
2979 case XFRM_MODE_TRANSPORT:
2980 case XFRM_MODE_BEET:
2981 if (x->props.family == AF_INET)
2982 net_adj = sizeof(struct iphdr);
2983 else if (x->props.family == AF_INET6)
2984 net_adj = sizeof(struct ipv6hdr);
2985 break;
2986 case XFRM_MODE_TUNNEL:
2987 break;
2988 default:
2989 WARN_ON_ONCE(1);
2990 break;
2993 return ((mtu - x->props.header_len - crypto_aead_authsize(aead) -
2994 net_adj) & ~(blksize - 1)) + net_adj - 2;
2996 EXPORT_SYMBOL_GPL(xfrm_state_mtu);
2998 int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload,
2999 struct netlink_ext_ack *extack)
3001 const struct xfrm_mode *inner_mode;
3002 const struct xfrm_mode *outer_mode;
3003 int family = x->props.family;
3004 int err;
3006 if (family == AF_INET &&
3007 READ_ONCE(xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc))
3008 x->props.flags |= XFRM_STATE_NOPMTUDISC;
3010 err = -EPROTONOSUPPORT;
3012 if (x->sel.family != AF_UNSPEC) {
3013 inner_mode = xfrm_get_mode(x->props.mode, x->sel.family);
3014 if (inner_mode == NULL) {
3015 NL_SET_ERR_MSG(extack, "Requested mode not found");
3016 goto error;
3019 if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
3020 family != x->sel.family) {
3021 NL_SET_ERR_MSG(extack, "Only tunnel modes can accommodate a change of family");
3022 goto error;
3025 x->inner_mode = *inner_mode;
3026 } else {
3027 const struct xfrm_mode *inner_mode_iaf;
3028 int iafamily = AF_INET;
3030 inner_mode = xfrm_get_mode(x->props.mode, x->props.family);
3031 if (inner_mode == NULL) {
3032 NL_SET_ERR_MSG(extack, "Requested mode not found");
3033 goto error;
3036 x->inner_mode = *inner_mode;
3038 if (x->props.family == AF_INET)
3039 iafamily = AF_INET6;
3041 inner_mode_iaf = xfrm_get_mode(x->props.mode, iafamily);
3042 if (inner_mode_iaf) {
3043 if (inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL)
3044 x->inner_mode_iaf = *inner_mode_iaf;
3048 x->type = xfrm_get_type(x->id.proto, family);
3049 if (x->type == NULL) {
3050 NL_SET_ERR_MSG(extack, "Requested type not found");
3051 goto error;
3054 x->type_offload = xfrm_get_type_offload(x->id.proto, family, offload);
3056 err = x->type->init_state(x, extack);
3057 if (err)
3058 goto error;
3060 outer_mode = xfrm_get_mode(x->props.mode, family);
3061 if (!outer_mode) {
3062 NL_SET_ERR_MSG(extack, "Requested mode not found");
3063 err = -EPROTONOSUPPORT;
3064 goto error;
3067 x->outer_mode = *outer_mode;
3068 if (init_replay) {
3069 err = xfrm_init_replay(x, extack);
3070 if (err)
3071 goto error;
3074 if (x->nat_keepalive_interval) {
3075 if (x->dir != XFRM_SA_DIR_OUT) {
3076 NL_SET_ERR_MSG(extack, "NAT keepalive is only supported for outbound SAs");
3077 err = -EINVAL;
3078 goto error;
3081 if (!x->encap || x->encap->encap_type != UDP_ENCAP_ESPINUDP) {
3082 NL_SET_ERR_MSG(extack,
3083 "NAT keepalive is only supported for UDP encapsulation");
3084 err = -EINVAL;
3085 goto error;
3089 error:
3090 return err;
3093 EXPORT_SYMBOL(__xfrm_init_state);
3095 int xfrm_init_state(struct xfrm_state *x)
3097 int err;
3099 err = __xfrm_init_state(x, true, false, NULL);
3100 if (!err)
3101 x->km.state = XFRM_STATE_VALID;
3103 return err;
3106 EXPORT_SYMBOL(xfrm_init_state);
3108 int __net_init xfrm_state_init(struct net *net)
3110 unsigned int sz;
3112 if (net_eq(net, &init_net))
3113 xfrm_state_cache = KMEM_CACHE(xfrm_state,
3114 SLAB_HWCACHE_ALIGN | SLAB_PANIC);
3116 INIT_LIST_HEAD(&net->xfrm.state_all);
3118 sz = sizeof(struct hlist_head) * 8;
3120 net->xfrm.state_bydst = xfrm_hash_alloc(sz);
3121 if (!net->xfrm.state_bydst)
3122 goto out_bydst;
3123 net->xfrm.state_bysrc = xfrm_hash_alloc(sz);
3124 if (!net->xfrm.state_bysrc)
3125 goto out_bysrc;
3126 net->xfrm.state_byspi = xfrm_hash_alloc(sz);
3127 if (!net->xfrm.state_byspi)
3128 goto out_byspi;
3129 net->xfrm.state_byseq = xfrm_hash_alloc(sz);
3130 if (!net->xfrm.state_byseq)
3131 goto out_byseq;
3133 net->xfrm.state_cache_input = alloc_percpu(struct hlist_head);
3134 if (!net->xfrm.state_cache_input)
3135 goto out_state_cache_input;
3137 net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
3139 net->xfrm.state_num = 0;
3140 INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);
3141 spin_lock_init(&net->xfrm.xfrm_state_lock);
3142 seqcount_spinlock_init(&net->xfrm.xfrm_state_hash_generation,
3143 &net->xfrm.xfrm_state_lock);
3144 return 0;
3146 out_state_cache_input:
3147 xfrm_hash_free(net->xfrm.state_byseq, sz);
3148 out_byseq:
3149 xfrm_hash_free(net->xfrm.state_byspi, sz);
3150 out_byspi:
3151 xfrm_hash_free(net->xfrm.state_bysrc, sz);
3152 out_bysrc:
3153 xfrm_hash_free(net->xfrm.state_bydst, sz);
3154 out_bydst:
3155 return -ENOMEM;
3158 void xfrm_state_fini(struct net *net)
3160 unsigned int sz;
3162 flush_work(&net->xfrm.state_hash_work);
3163 flush_work(&xfrm_state_gc_work);
3164 xfrm_state_flush(net, 0, false, true);
3166 WARN_ON(!list_empty(&net->xfrm.state_all));
3168 sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head);
3169 WARN_ON(!hlist_empty(net->xfrm.state_byseq));
3170 xfrm_hash_free(net->xfrm.state_byseq, sz);
3171 WARN_ON(!hlist_empty(net->xfrm.state_byspi));
3172 xfrm_hash_free(net->xfrm.state_byspi, sz);
3173 WARN_ON(!hlist_empty(net->xfrm.state_bysrc));
3174 xfrm_hash_free(net->xfrm.state_bysrc, sz);
3175 WARN_ON(!hlist_empty(net->xfrm.state_bydst));
3176 xfrm_hash_free(net->xfrm.state_bydst, sz);
3177 free_percpu(net->xfrm.state_cache_input);
3180 #ifdef CONFIG_AUDITSYSCALL
3181 static void xfrm_audit_helper_sainfo(struct xfrm_state *x,
3182 struct audit_buffer *audit_buf)
3184 struct xfrm_sec_ctx *ctx = x->security;
3185 u32 spi = ntohl(x->id.spi);
3187 if (ctx)
3188 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
3189 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
3191 switch (x->props.family) {
3192 case AF_INET:
3193 audit_log_format(audit_buf, " src=%pI4 dst=%pI4",
3194 &x->props.saddr.a4, &x->id.daddr.a4);
3195 break;
3196 case AF_INET6:
3197 audit_log_format(audit_buf, " src=%pI6 dst=%pI6",
3198 x->props.saddr.a6, x->id.daddr.a6);
3199 break;
3202 audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
3205 static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family,
3206 struct audit_buffer *audit_buf)
3208 const struct iphdr *iph4;
3209 const struct ipv6hdr *iph6;
3211 switch (family) {
3212 case AF_INET:
3213 iph4 = ip_hdr(skb);
3214 audit_log_format(audit_buf, " src=%pI4 dst=%pI4",
3215 &iph4->saddr, &iph4->daddr);
3216 break;
3217 case AF_INET6:
3218 iph6 = ipv6_hdr(skb);
3219 audit_log_format(audit_buf,
3220 " src=%pI6 dst=%pI6 flowlbl=0x%x%02x%02x",
3221 &iph6->saddr, &iph6->daddr,
3222 iph6->flow_lbl[0] & 0x0f,
3223 iph6->flow_lbl[1],
3224 iph6->flow_lbl[2]);
3225 break;
3229 void xfrm_audit_state_add(struct xfrm_state *x, int result, bool task_valid)
3231 struct audit_buffer *audit_buf;
3233 audit_buf = xfrm_audit_start("SAD-add");
3234 if (audit_buf == NULL)
3235 return;
3236 xfrm_audit_helper_usrinfo(task_valid, audit_buf);
3237 xfrm_audit_helper_sainfo(x, audit_buf);
3238 audit_log_format(audit_buf, " res=%u", result);
3239 audit_log_end(audit_buf);
3241 EXPORT_SYMBOL_GPL(xfrm_audit_state_add);
3243 void xfrm_audit_state_delete(struct xfrm_state *x, int result, bool task_valid)
3245 struct audit_buffer *audit_buf;
3247 audit_buf = xfrm_audit_start("SAD-delete");
3248 if (audit_buf == NULL)
3249 return;
3250 xfrm_audit_helper_usrinfo(task_valid, audit_buf);
3251 xfrm_audit_helper_sainfo(x, audit_buf);
3252 audit_log_format(audit_buf, " res=%u", result);
3253 audit_log_end(audit_buf);
3255 EXPORT_SYMBOL_GPL(xfrm_audit_state_delete);
3257 void xfrm_audit_state_replay_overflow(struct xfrm_state *x,
3258 struct sk_buff *skb)
3260 struct audit_buffer *audit_buf;
3261 u32 spi;
3263 audit_buf = xfrm_audit_start("SA-replay-overflow");
3264 if (audit_buf == NULL)
3265 return;
3266 xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
3267 /* don't record the sequence number because it's inherent in this kind
3268 * of audit message */
3269 spi = ntohl(x->id.spi);
3270 audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
3271 audit_log_end(audit_buf);
3273 EXPORT_SYMBOL_GPL(xfrm_audit_state_replay_overflow);
3275 void xfrm_audit_state_replay(struct xfrm_state *x,
3276 struct sk_buff *skb, __be32 net_seq)
3278 struct audit_buffer *audit_buf;
3279 u32 spi;
3281 audit_buf = xfrm_audit_start("SA-replayed-pkt");
3282 if (audit_buf == NULL)
3283 return;
3284 xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
3285 spi = ntohl(x->id.spi);
3286 audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
3287 spi, spi, ntohl(net_seq));
3288 audit_log_end(audit_buf);
3290 EXPORT_SYMBOL_GPL(xfrm_audit_state_replay);
3292 void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family)
3294 struct audit_buffer *audit_buf;
3296 audit_buf = xfrm_audit_start("SA-notfound");
3297 if (audit_buf == NULL)
3298 return;
3299 xfrm_audit_helper_pktinfo(skb, family, audit_buf);
3300 audit_log_end(audit_buf);
3302 EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound_simple);
3304 void xfrm_audit_state_notfound(struct sk_buff *skb, u16 family,
3305 __be32 net_spi, __be32 net_seq)
3307 struct audit_buffer *audit_buf;
3308 u32 spi;
3310 audit_buf = xfrm_audit_start("SA-notfound");
3311 if (audit_buf == NULL)
3312 return;
3313 xfrm_audit_helper_pktinfo(skb, family, audit_buf);
3314 spi = ntohl(net_spi);
3315 audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
3316 spi, spi, ntohl(net_seq));
3317 audit_log_end(audit_buf);
3319 EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound);
3321 void xfrm_audit_state_icvfail(struct xfrm_state *x,
3322 struct sk_buff *skb, u8 proto)
3324 struct audit_buffer *audit_buf;
3325 __be32 net_spi;
3326 __be32 net_seq;
3328 audit_buf = xfrm_audit_start("SA-icv-failure");
3329 if (audit_buf == NULL)
3330 return;
3331 xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
3332 if (xfrm_parse_spi(skb, proto, &net_spi, &net_seq) == 0) {
3333 u32 spi = ntohl(net_spi);
3334 audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
3335 spi, spi, ntohl(net_seq));
3337 audit_log_end(audit_buf);
3339 EXPORT_SYMBOL_GPL(xfrm_audit_state_icvfail);
3340 #endif /* CONFIG_AUDITSYSCALL */