1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * xfrm_output.c - Common IPsec encapsulation code.
5 * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au>
8 #include <linux/errno.h>
9 #include <linux/module.h>
10 #include <linux/netdevice.h>
11 #include <linux/netfilter.h>
12 #include <linux/skbuff.h>
13 #include <linux/slab.h>
14 #include <linux/spinlock.h>
16 #include <net/inet_ecn.h>
19 #include "xfrm_inout.h"
21 static int xfrm_output2(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
);
22 static int xfrm_inner_extract_output(struct xfrm_state
*x
, struct sk_buff
*skb
);
24 static int xfrm_skb_check_space(struct sk_buff
*skb
)
26 struct dst_entry
*dst
= skb_dst(skb
);
27 int nhead
= dst
->header_len
+ LL_RESERVED_SPACE(dst
->dev
)
29 int ntail
= dst
->dev
->needed_tailroom
- skb_tailroom(skb
);
38 return pskb_expand_head(skb
, nhead
, ntail
, GFP_ATOMIC
);
41 /* Children define the path of the packet through the
42 * Linux networking. Thus, destinations are stackable.
45 static struct dst_entry
*skb_dst_pop(struct sk_buff
*skb
)
47 struct dst_entry
*child
= dst_clone(xfrm_dst_child(skb_dst(skb
)));
53 /* Add encapsulation header.
55 * The IP header will be moved forward to make space for the encapsulation
58 static int xfrm4_transport_output(struct xfrm_state
*x
, struct sk_buff
*skb
)
60 struct iphdr
*iph
= ip_hdr(skb
);
61 int ihl
= iph
->ihl
* 4;
63 skb_set_inner_transport_header(skb
, skb_transport_offset(skb
));
65 skb_set_network_header(skb
, -x
->props
.header_len
);
66 skb
->mac_header
= skb
->network_header
+
67 offsetof(struct iphdr
, protocol
);
68 skb
->transport_header
= skb
->network_header
+ ihl
;
70 memmove(skb_network_header(skb
), iph
, ihl
);
74 /* Add encapsulation header.
76 * The IP header and mutable extension headers will be moved forward to make
77 * space for the encapsulation header.
79 static int xfrm6_transport_output(struct xfrm_state
*x
, struct sk_buff
*skb
)
81 #if IS_ENABLED(CONFIG_IPV6)
87 skb_set_inner_transport_header(skb
, skb_transport_offset(skb
));
89 hdr_len
= x
->type
->hdr_offset(x
, skb
, &prevhdr
);
92 skb_set_mac_header(skb
,
93 (prevhdr
- x
->props
.header_len
) - skb
->data
);
94 skb_set_network_header(skb
, -x
->props
.header_len
);
95 skb
->transport_header
= skb
->network_header
+ hdr_len
;
96 __skb_pull(skb
, hdr_len
);
97 memmove(ipv6_hdr(skb
), iph
, hdr_len
);
101 return -EAFNOSUPPORT
;
105 /* Add route optimization header space.
107 * The IP header and mutable extension headers will be moved forward to make
108 * space for the route optimization header.
110 static int xfrm6_ro_output(struct xfrm_state
*x
, struct sk_buff
*skb
)
112 #if IS_ENABLED(CONFIG_IPV6)
119 hdr_len
= x
->type
->hdr_offset(x
, skb
, &prevhdr
);
122 skb_set_mac_header(skb
,
123 (prevhdr
- x
->props
.header_len
) - skb
->data
);
124 skb_set_network_header(skb
, -x
->props
.header_len
);
125 skb
->transport_header
= skb
->network_header
+ hdr_len
;
126 __skb_pull(skb
, hdr_len
);
127 memmove(ipv6_hdr(skb
), iph
, hdr_len
);
129 x
->lastused
= ktime_get_real_seconds();
134 return -EAFNOSUPPORT
;
138 /* Add encapsulation header.
140 * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt.
142 static int xfrm4_beet_encap_add(struct xfrm_state
*x
, struct sk_buff
*skb
)
144 struct ip_beet_phdr
*ph
;
145 struct iphdr
*top_iph
;
149 optlen
= XFRM_MODE_SKB_CB(skb
)->optlen
;
150 if (unlikely(optlen
))
151 hdrlen
+= IPV4_BEET_PHMAXLEN
- (optlen
& 4);
153 skb_set_network_header(skb
, -x
->props
.header_len
- hdrlen
+
154 (XFRM_MODE_SKB_CB(skb
)->ihl
- sizeof(*top_iph
)));
155 if (x
->sel
.family
!= AF_INET6
)
156 skb
->network_header
+= IPV4_BEET_PHMAXLEN
;
157 skb
->mac_header
= skb
->network_header
+
158 offsetof(struct iphdr
, protocol
);
159 skb
->transport_header
= skb
->network_header
+ sizeof(*top_iph
);
161 xfrm4_beet_make_header(skb
);
163 ph
= __skb_pull(skb
, XFRM_MODE_SKB_CB(skb
)->ihl
- hdrlen
);
165 top_iph
= ip_hdr(skb
);
167 if (unlikely(optlen
)) {
168 if (WARN_ON(optlen
< 0))
171 ph
->padlen
= 4 - (optlen
& 4);
172 ph
->hdrlen
= optlen
/ 8;
173 ph
->nexthdr
= top_iph
->protocol
;
175 memset(ph
+ 1, IPOPT_NOP
, ph
->padlen
);
177 top_iph
->protocol
= IPPROTO_BEETPH
;
178 top_iph
->ihl
= sizeof(struct iphdr
) / 4;
181 top_iph
->saddr
= x
->props
.saddr
.a4
;
182 top_iph
->daddr
= x
->id
.daddr
.a4
;
187 /* Add encapsulation header.
189 * The top IP header will be constructed per RFC 2401.
191 static int xfrm4_tunnel_encap_add(struct xfrm_state
*x
, struct sk_buff
*skb
)
193 struct dst_entry
*dst
= skb_dst(skb
);
194 struct iphdr
*top_iph
;
197 skb_set_inner_network_header(skb
, skb_network_offset(skb
));
198 skb_set_inner_transport_header(skb
, skb_transport_offset(skb
));
200 skb_set_network_header(skb
, -x
->props
.header_len
);
201 skb
->mac_header
= skb
->network_header
+
202 offsetof(struct iphdr
, protocol
);
203 skb
->transport_header
= skb
->network_header
+ sizeof(*top_iph
);
204 top_iph
= ip_hdr(skb
);
207 top_iph
->version
= 4;
209 top_iph
->protocol
= xfrm_af2proto(skb_dst(skb
)->ops
->family
);
211 /* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */
212 if (x
->props
.extra_flags
& XFRM_SA_XFLAG_DONT_ENCAP_DSCP
)
215 top_iph
->tos
= XFRM_MODE_SKB_CB(skb
)->tos
;
216 top_iph
->tos
= INET_ECN_encapsulate(top_iph
->tos
,
217 XFRM_MODE_SKB_CB(skb
)->tos
);
219 flags
= x
->props
.flags
;
220 if (flags
& XFRM_STATE_NOECN
)
221 IP_ECN_clear(top_iph
);
223 top_iph
->frag_off
= (flags
& XFRM_STATE_NOPMTUDISC
) ?
224 0 : (XFRM_MODE_SKB_CB(skb
)->frag_off
& htons(IP_DF
));
226 top_iph
->ttl
= ip4_dst_hoplimit(xfrm_dst_child(dst
));
228 top_iph
->saddr
= x
->props
.saddr
.a4
;
229 top_iph
->daddr
= x
->id
.daddr
.a4
;
230 ip_select_ident(dev_net(dst
->dev
), skb
, NULL
);
235 #if IS_ENABLED(CONFIG_IPV6)
236 static int xfrm6_tunnel_encap_add(struct xfrm_state
*x
, struct sk_buff
*skb
)
238 struct dst_entry
*dst
= skb_dst(skb
);
239 struct ipv6hdr
*top_iph
;
242 skb_set_inner_network_header(skb
, skb_network_offset(skb
));
243 skb_set_inner_transport_header(skb
, skb_transport_offset(skb
));
245 skb_set_network_header(skb
, -x
->props
.header_len
);
246 skb
->mac_header
= skb
->network_header
+
247 offsetof(struct ipv6hdr
, nexthdr
);
248 skb
->transport_header
= skb
->network_header
+ sizeof(*top_iph
);
249 top_iph
= ipv6_hdr(skb
);
251 top_iph
->version
= 6;
253 memcpy(top_iph
->flow_lbl
, XFRM_MODE_SKB_CB(skb
)->flow_lbl
,
254 sizeof(top_iph
->flow_lbl
));
255 top_iph
->nexthdr
= xfrm_af2proto(skb_dst(skb
)->ops
->family
);
257 if (x
->props
.extra_flags
& XFRM_SA_XFLAG_DONT_ENCAP_DSCP
)
260 dsfield
= XFRM_MODE_SKB_CB(skb
)->tos
;
261 dsfield
= INET_ECN_encapsulate(dsfield
, XFRM_MODE_SKB_CB(skb
)->tos
);
262 if (x
->props
.flags
& XFRM_STATE_NOECN
)
263 dsfield
&= ~INET_ECN_MASK
;
264 ipv6_change_dsfield(top_iph
, 0, dsfield
);
265 top_iph
->hop_limit
= ip6_dst_hoplimit(xfrm_dst_child(dst
));
266 top_iph
->saddr
= *(struct in6_addr
*)&x
->props
.saddr
;
267 top_iph
->daddr
= *(struct in6_addr
*)&x
->id
.daddr
;
271 static int xfrm6_beet_encap_add(struct xfrm_state
*x
, struct sk_buff
*skb
)
273 struct ipv6hdr
*top_iph
;
274 struct ip_beet_phdr
*ph
;
278 optlen
= XFRM_MODE_SKB_CB(skb
)->optlen
;
279 if (unlikely(optlen
))
280 hdr_len
+= IPV4_BEET_PHMAXLEN
- (optlen
& 4);
282 skb_set_network_header(skb
, -x
->props
.header_len
- hdr_len
);
283 if (x
->sel
.family
!= AF_INET6
)
284 skb
->network_header
+= IPV4_BEET_PHMAXLEN
;
285 skb
->mac_header
= skb
->network_header
+
286 offsetof(struct ipv6hdr
, nexthdr
);
287 skb
->transport_header
= skb
->network_header
+ sizeof(*top_iph
);
288 ph
= __skb_pull(skb
, XFRM_MODE_SKB_CB(skb
)->ihl
- hdr_len
);
290 xfrm6_beet_make_header(skb
);
292 top_iph
= ipv6_hdr(skb
);
293 if (unlikely(optlen
)) {
294 if (WARN_ON(optlen
< 0))
297 ph
->padlen
= 4 - (optlen
& 4);
298 ph
->hdrlen
= optlen
/ 8;
299 ph
->nexthdr
= top_iph
->nexthdr
;
301 memset(ph
+ 1, IPOPT_NOP
, ph
->padlen
);
303 top_iph
->nexthdr
= IPPROTO_BEETPH
;
306 top_iph
->saddr
= *(struct in6_addr
*)&x
->props
.saddr
;
307 top_iph
->daddr
= *(struct in6_addr
*)&x
->id
.daddr
;
312 /* Add encapsulation header.
314 * On exit, the transport header will be set to the start of the
315 * encapsulation header to be filled in by x->type->output and the mac
316 * header will be set to the nextheader (protocol for IPv4) field of the
317 * extension header directly preceding the encapsulation header, or in
318 * its absence, that of the top IP header.
319 * The value of the network header will always point to the top IP header
320 * while skb->data will point to the payload.
322 static int xfrm4_prepare_output(struct xfrm_state
*x
, struct sk_buff
*skb
)
326 err
= xfrm_inner_extract_output(x
, skb
);
330 IPCB(skb
)->flags
|= IPSKB_XFRM_TUNNEL_SIZE
;
331 skb
->protocol
= htons(ETH_P_IP
);
333 switch (x
->outer_mode
.encap
) {
335 return xfrm4_beet_encap_add(x
, skb
);
336 case XFRM_MODE_TUNNEL
:
337 return xfrm4_tunnel_encap_add(x
, skb
);
344 static int xfrm6_prepare_output(struct xfrm_state
*x
, struct sk_buff
*skb
)
346 #if IS_ENABLED(CONFIG_IPV6)
349 err
= xfrm_inner_extract_output(x
, skb
);
354 skb
->protocol
= htons(ETH_P_IPV6
);
356 switch (x
->outer_mode
.encap
) {
358 return xfrm6_beet_encap_add(x
, skb
);
359 case XFRM_MODE_TUNNEL
:
360 return xfrm6_tunnel_encap_add(x
, skb
);
367 return -EAFNOSUPPORT
;
370 static int xfrm_outer_mode_output(struct xfrm_state
*x
, struct sk_buff
*skb
)
372 switch (x
->outer_mode
.encap
) {
374 case XFRM_MODE_TUNNEL
:
375 if (x
->outer_mode
.family
== AF_INET
)
376 return xfrm4_prepare_output(x
, skb
);
377 if (x
->outer_mode
.family
== AF_INET6
)
378 return xfrm6_prepare_output(x
, skb
);
380 case XFRM_MODE_TRANSPORT
:
381 if (x
->outer_mode
.family
== AF_INET
)
382 return xfrm4_transport_output(x
, skb
);
383 if (x
->outer_mode
.family
== AF_INET6
)
384 return xfrm6_transport_output(x
, skb
);
386 case XFRM_MODE_ROUTEOPTIMIZATION
:
387 if (x
->outer_mode
.family
== AF_INET6
)
388 return xfrm6_ro_output(x
, skb
);
399 #if IS_ENABLED(CONFIG_NET_PKTGEN)
400 int pktgen_xfrm_outer_mode_output(struct xfrm_state
*x
, struct sk_buff
*skb
)
402 return xfrm_outer_mode_output(x
, skb
);
404 EXPORT_SYMBOL_GPL(pktgen_xfrm_outer_mode_output
);
407 static int xfrm_output_one(struct sk_buff
*skb
, int err
)
409 struct dst_entry
*dst
= skb_dst(skb
);
410 struct xfrm_state
*x
= dst
->xfrm
;
411 struct net
*net
= xs_net(x
);
417 err
= xfrm_skb_check_space(skb
);
419 XFRM_INC_STATS(net
, LINUX_MIB_XFRMOUTERROR
);
423 skb
->mark
= xfrm_smark_get(skb
->mark
, x
);
425 err
= xfrm_outer_mode_output(x
, skb
);
427 XFRM_INC_STATS(net
, LINUX_MIB_XFRMOUTSTATEMODEERROR
);
431 spin_lock_bh(&x
->lock
);
433 if (unlikely(x
->km
.state
!= XFRM_STATE_VALID
)) {
434 XFRM_INC_STATS(net
, LINUX_MIB_XFRMOUTSTATEINVALID
);
439 err
= xfrm_state_check_expire(x
);
441 XFRM_INC_STATS(net
, LINUX_MIB_XFRMOUTSTATEEXPIRED
);
445 err
= x
->repl
->overflow(x
, skb
);
447 XFRM_INC_STATS(net
, LINUX_MIB_XFRMOUTSTATESEQERROR
);
451 x
->curlft
.bytes
+= skb
->len
;
454 spin_unlock_bh(&x
->lock
);
458 XFRM_INC_STATS(net
, LINUX_MIB_XFRMOUTERROR
);
463 if (xfrm_offload(skb
)) {
464 x
->type_offload
->encap(x
, skb
);
466 /* Inner headers are invalid now. */
467 skb
->encapsulation
= 0;
469 err
= x
->type
->output(x
, skb
);
470 if (err
== -EINPROGRESS
)
476 XFRM_INC_STATS(net
, LINUX_MIB_XFRMOUTSTATEPROTOERROR
);
480 dst
= skb_dst_pop(skb
);
482 XFRM_INC_STATS(net
, LINUX_MIB_XFRMOUTERROR
);
486 skb_dst_set(skb
, dst
);
488 } while (x
&& !(x
->outer_mode
.flags
& XFRM_MODE_FLAG_TUNNEL
));
493 spin_unlock_bh(&x
->lock
);
500 int xfrm_output_resume(struct sk_buff
*skb
, int err
)
502 struct net
*net
= xs_net(skb_dst(skb
)->xfrm
);
504 while (likely((err
= xfrm_output_one(skb
, err
)) == 0)) {
507 err
= skb_dst(skb
)->ops
->local_out(net
, skb
->sk
, skb
);
508 if (unlikely(err
!= 1))
511 if (!skb_dst(skb
)->xfrm
)
512 return dst_output(net
, skb
->sk
, skb
);
514 err
= nf_hook(skb_dst(skb
)->ops
->family
,
515 NF_INET_POST_ROUTING
, net
, skb
->sk
, skb
,
516 NULL
, skb_dst(skb
)->dev
, xfrm_output2
);
517 if (unlikely(err
!= 1))
521 if (err
== -EINPROGRESS
)
527 EXPORT_SYMBOL_GPL(xfrm_output_resume
);
529 static int xfrm_output2(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
)
531 return xfrm_output_resume(skb
, 1);
534 static int xfrm_output_gso(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
)
536 struct sk_buff
*segs
, *nskb
;
538 BUILD_BUG_ON(sizeof(*IPCB(skb
)) > SKB_GSO_CB_OFFSET
);
539 BUILD_BUG_ON(sizeof(*IP6CB(skb
)) > SKB_GSO_CB_OFFSET
);
540 segs
= skb_gso_segment(skb
, 0);
543 return PTR_ERR(segs
);
547 skb_list_walk_safe(segs
, segs
, nskb
) {
550 skb_mark_not_on_list(segs
);
551 err
= xfrm_output2(net
, sk
, segs
);
554 kfree_skb_list(nskb
);
562 int xfrm_output(struct sock
*sk
, struct sk_buff
*skb
)
564 struct net
*net
= dev_net(skb_dst(skb
)->dev
);
565 struct xfrm_state
*x
= skb_dst(skb
)->xfrm
;
570 if (xfrm_dev_offload_ok(skb
, x
)) {
573 sp
= secpath_set(skb
);
575 XFRM_INC_STATS(net
, LINUX_MIB_XFRMOUTERROR
);
579 skb
->encapsulation
= 1;
582 sp
->xvec
[sp
->len
++] = x
;
585 if (skb_is_gso(skb
)) {
586 if (skb
->inner_protocol
)
587 return xfrm_output_gso(net
, sk
, skb
);
589 skb_shinfo(skb
)->gso_type
|= SKB_GSO_ESP
;
593 if (x
->xso
.dev
&& x
->xso
.dev
->features
& NETIF_F_HW_ESP_TX_CSUM
)
597 return xfrm_output_gso(net
, sk
, skb
);
600 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) {
601 err
= skb_checksum_help(skb
);
603 XFRM_INC_STATS(net
, LINUX_MIB_XFRMOUTERROR
);
610 return xfrm_output2(net
, sk
, skb
);
612 EXPORT_SYMBOL_GPL(xfrm_output
);
614 static int xfrm_inner_extract_output(struct xfrm_state
*x
, struct sk_buff
*skb
)
616 const struct xfrm_state_afinfo
*afinfo
;
617 const struct xfrm_mode
*inner_mode
;
618 int err
= -EAFNOSUPPORT
;
620 if (x
->sel
.family
== AF_UNSPEC
)
621 inner_mode
= xfrm_ip2inner_mode(x
,
622 xfrm_af2proto(skb_dst(skb
)->ops
->family
));
624 inner_mode
= &x
->inner_mode
;
626 if (inner_mode
== NULL
)
627 return -EAFNOSUPPORT
;
630 afinfo
= xfrm_state_afinfo_get_rcu(inner_mode
->family
);
632 err
= afinfo
->extract_output(x
, skb
);
638 void xfrm_local_error(struct sk_buff
*skb
, int mtu
)
641 struct xfrm_state_afinfo
*afinfo
;
643 if (skb
->protocol
== htons(ETH_P_IP
))
645 else if (skb
->protocol
== htons(ETH_P_IPV6
) &&
646 skb
->sk
->sk_family
== AF_INET6
)
651 afinfo
= xfrm_state_get_afinfo(proto
);
653 afinfo
->local_error(skb
, mtu
);
657 EXPORT_SYMBOL_GPL(xfrm_local_error
);