1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * IPV4 GSO/GRO offload support
4 * Linux INET implementation
6 * TCPv4 GSO/GRO support
9 #include <linux/indirect_call_wrapper.h>
10 #include <linux/skbuff.h>
14 #include <net/protocol.h>
16 static void tcp_gso_tstamp(struct sk_buff
*skb
, unsigned int ts_seq
,
17 unsigned int seq
, unsigned int mss
)
20 if (before(ts_seq
, seq
+ mss
)) {
21 skb_shinfo(skb
)->tx_flags
|= SKBTX_SW_TSTAMP
;
22 skb_shinfo(skb
)->tskey
= ts_seq
;
31 static void __tcpv4_gso_segment_csum(struct sk_buff
*seg
,
32 __be32
*oldip
, __be32 newip
,
33 __be16
*oldport
, __be16 newport
)
38 if (*oldip
== newip
&& *oldport
== newport
)
44 inet_proto_csum_replace4(&th
->check
, seg
, *oldip
, newip
, true);
45 inet_proto_csum_replace2(&th
->check
, seg
, *oldport
, newport
, false);
48 csum_replace4(&iph
->check
, *oldip
, newip
);
52 static struct sk_buff
*__tcpv4_gso_segment_list_csum(struct sk_buff
*segs
)
54 const struct tcphdr
*th
;
55 const struct iphdr
*iph
;
63 th2
= tcp_hdr(seg
->next
);
64 iph2
= ip_hdr(seg
->next
);
66 if (!(*(const u32
*)&th
->source
^ *(const u32
*)&th2
->source
) &&
67 iph
->daddr
== iph2
->daddr
&& iph
->saddr
== iph2
->saddr
)
70 while ((seg
= seg
->next
)) {
74 __tcpv4_gso_segment_csum(seg
,
75 &iph2
->saddr
, iph
->saddr
,
76 &th2
->source
, th
->source
);
77 __tcpv4_gso_segment_csum(seg
,
78 &iph2
->daddr
, iph
->daddr
,
79 &th2
->dest
, th
->dest
);
85 static struct sk_buff
*__tcp4_gso_segment_list(struct sk_buff
*skb
,
86 netdev_features_t features
)
88 skb
= skb_segment_list(skb
, features
, skb_mac_header_len(skb
));
92 return __tcpv4_gso_segment_list_csum(skb
);
95 static struct sk_buff
*tcp4_gso_segment(struct sk_buff
*skb
,
96 netdev_features_t features
)
98 if (!(skb_shinfo(skb
)->gso_type
& SKB_GSO_TCPV4
))
99 return ERR_PTR(-EINVAL
);
101 if (!pskb_may_pull(skb
, sizeof(struct tcphdr
)))
102 return ERR_PTR(-EINVAL
);
104 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_FRAGLIST
) {
105 struct tcphdr
*th
= tcp_hdr(skb
);
107 if (skb_pagelen(skb
) - th
->doff
* 4 == skb_shinfo(skb
)->gso_size
)
108 return __tcp4_gso_segment_list(skb
, features
);
110 skb
->ip_summed
= CHECKSUM_NONE
;
113 if (unlikely(skb
->ip_summed
!= CHECKSUM_PARTIAL
)) {
114 const struct iphdr
*iph
= ip_hdr(skb
);
115 struct tcphdr
*th
= tcp_hdr(skb
);
117 /* Set up checksum pseudo header, usually expect stack to
118 * have done this already.
122 skb
->ip_summed
= CHECKSUM_PARTIAL
;
123 __tcp_v4_send_check(skb
, iph
->saddr
, iph
->daddr
);
126 return tcp_gso_segment(skb
, features
);
129 struct sk_buff
*tcp_gso_segment(struct sk_buff
*skb
,
130 netdev_features_t features
)
132 struct sk_buff
*segs
= ERR_PTR(-EINVAL
);
133 unsigned int sum_truesize
= 0;
139 struct sk_buff
*gso_skb
= skb
;
141 bool ooo_okay
, copy_destructor
;
145 thlen
= th
->doff
* 4;
146 if (thlen
< sizeof(*th
))
149 if (unlikely(skb_checksum_start(skb
) != skb_transport_header(skb
)))
152 if (!pskb_may_pull(skb
, thlen
))
156 __skb_pull(skb
, thlen
);
158 mss
= skb_shinfo(skb
)->gso_size
;
159 if (unlikely(skb
->len
<= mss
))
162 if (skb_gso_ok(skb
, features
| NETIF_F_GSO_ROBUST
)) {
163 /* Packet is from an untrusted source, reset gso_segs. */
165 skb_shinfo(skb
)->gso_segs
= DIV_ROUND_UP(skb
->len
, mss
);
171 copy_destructor
= gso_skb
->destructor
== tcp_wfree
;
172 ooo_okay
= gso_skb
->ooo_okay
;
173 /* All segments but the first should have ooo_okay cleared */
176 segs
= skb_segment(skb
, features
);
180 /* Only first segment might have ooo_okay set */
181 segs
->ooo_okay
= ooo_okay
;
183 /* GSO partial and frag_list segmentation only requires splitting
184 * the frame into an MSS multiple and possibly a remainder, both
185 * cases return a GSO skb. So update the mss now.
187 if (skb_is_gso(segs
))
188 mss
*= skb_shinfo(segs
)->gso_segs
;
190 delta
= (__force __wsum
)htonl(oldlen
+ thlen
+ mss
);
194 seq
= ntohl(th
->seq
);
196 if (unlikely(skb_shinfo(gso_skb
)->tx_flags
& SKBTX_SW_TSTAMP
))
197 tcp_gso_tstamp(segs
, skb_shinfo(gso_skb
)->tskey
, seq
, mss
);
199 newcheck
= ~csum_fold(csum_add(csum_unfold(th
->check
), delta
));
202 th
->fin
= th
->psh
= 0;
203 th
->check
= newcheck
;
205 if (skb
->ip_summed
== CHECKSUM_PARTIAL
)
206 gso_reset_checksum(skb
, ~th
->check
);
208 th
->check
= gso_make_checksum(skb
, ~th
->check
);
211 if (copy_destructor
) {
212 skb
->destructor
= gso_skb
->destructor
;
213 skb
->sk
= gso_skb
->sk
;
214 sum_truesize
+= skb
->truesize
;
219 th
->seq
= htonl(seq
);
223 /* Following permits TCP Small Queues to work well with GSO :
224 * The callback to TCP stack will be called at the time last frag
225 * is freed at TX completion, and not right now when gso_skb
226 * is freed by GSO engine
228 if (copy_destructor
) {
231 swap(gso_skb
->sk
, skb
->sk
);
232 swap(gso_skb
->destructor
, skb
->destructor
);
233 sum_truesize
+= skb
->truesize
;
234 delta
= sum_truesize
- gso_skb
->truesize
;
235 /* In some pathological cases, delta can be negative.
236 * We need to either use refcount_add() or refcount_sub_and_test()
238 if (likely(delta
>= 0))
239 refcount_add(delta
, &skb
->sk
->sk_wmem_alloc
);
241 WARN_ON_ONCE(refcount_sub_and_test(-delta
, &skb
->sk
->sk_wmem_alloc
));
244 delta
= (__force __wsum
)htonl(oldlen
+
245 (skb_tail_pointer(skb
) -
246 skb_transport_header(skb
)) +
248 th
->check
= ~csum_fold(csum_add(csum_unfold(th
->check
), delta
));
249 if (skb
->ip_summed
== CHECKSUM_PARTIAL
)
250 gso_reset_checksum(skb
, ~th
->check
);
252 th
->check
= gso_make_checksum(skb
, ~th
->check
);
257 struct sk_buff
*tcp_gro_lookup(struct list_head
*head
, struct tcphdr
*th
)
262 list_for_each_entry(p
, head
, list
) {
263 if (!NAPI_GRO_CB(p
)->same_flow
)
267 if (*(u32
*)&th
->source
^ *(u32
*)&th2
->source
) {
268 NAPI_GRO_CB(p
)->same_flow
= 0;
278 struct tcphdr
*tcp_gro_pull_header(struct sk_buff
*skb
)
280 unsigned int thlen
, hlen
, off
;
283 off
= skb_gro_offset(skb
);
284 hlen
= off
+ sizeof(*th
);
285 th
= skb_gro_header(skb
, hlen
, off
);
289 thlen
= th
->doff
* 4;
290 if (thlen
< sizeof(*th
))
294 if (!skb_gro_may_pull(skb
, hlen
)) {
295 th
= skb_gro_header_slow(skb
, hlen
, off
);
300 skb_gro_pull(skb
, thlen
);
305 struct sk_buff
*tcp_gro_receive(struct list_head
*head
, struct sk_buff
*skb
,
308 unsigned int thlen
= th
->doff
* 4;
309 struct sk_buff
*pp
= NULL
;
314 unsigned int mss
= 1;
318 len
= skb_gro_len(skb
);
319 flags
= tcp_flag_word(th
);
321 p
= tcp_gro_lookup(head
, th
);
323 goto out_check_final
;
326 flush
= (__force
int)(flags
& TCP_FLAG_CWR
);
327 flush
|= (__force
int)((flags
^ tcp_flag_word(th2
)) &
328 ~(TCP_FLAG_CWR
| TCP_FLAG_FIN
| TCP_FLAG_PSH
));
329 flush
|= (__force
int)(th
->ack_seq
^ th2
->ack_seq
);
330 for (i
= sizeof(*th
); i
< thlen
; i
+= 4)
331 flush
|= *(u32
*)((u8
*)th
+ i
) ^
332 *(u32
*)((u8
*)th2
+ i
);
334 flush
|= gro_receive_network_flush(th
, th2
, p
);
336 mss
= skb_shinfo(p
)->gso_size
;
338 /* If skb is a GRO packet, make sure its gso_size matches prior packet mss.
339 * If it is a single frame, do not aggregate it if its length
340 * is bigger than our mss.
342 if (unlikely(skb_is_gso(skb
)))
343 flush
|= (mss
!= skb_shinfo(skb
)->gso_size
);
345 flush
|= (len
- 1) >= mss
;
347 flush
|= (ntohl(th2
->seq
) + skb_gro_len(p
)) ^ ntohl(th
->seq
);
348 flush
|= skb_cmp_decrypted(p
, skb
);
350 if (unlikely(NAPI_GRO_CB(p
)->is_flist
)) {
351 flush
|= (__force
int)(flags
^ tcp_flag_word(th2
));
352 flush
|= skb
->ip_summed
!= p
->ip_summed
;
353 flush
|= skb
->csum_level
!= p
->csum_level
;
354 flush
|= NAPI_GRO_CB(p
)->count
>= 64;
356 if (flush
|| skb_gro_receive_list(p
, skb
))
359 goto out_check_final
;
362 if (flush
|| skb_gro_receive(p
, skb
)) {
364 goto out_check_final
;
367 tcp_flag_word(th2
) |= flags
& (TCP_FLAG_FIN
| TCP_FLAG_PSH
);
370 /* Force a flush if last segment is smaller than mss. */
371 if (unlikely(skb_is_gso(skb
)))
372 flush
= len
!= NAPI_GRO_CB(skb
)->count
* skb_shinfo(skb
)->gso_size
;
376 flush
|= (__force
int)(flags
& (TCP_FLAG_URG
| TCP_FLAG_PSH
|
377 TCP_FLAG_RST
| TCP_FLAG_SYN
|
380 if (p
&& (!NAPI_GRO_CB(skb
)->same_flow
|| flush
))
383 NAPI_GRO_CB(skb
)->flush
|= (flush
!= 0);
388 void tcp_gro_complete(struct sk_buff
*skb
)
390 struct tcphdr
*th
= tcp_hdr(skb
);
391 struct skb_shared_info
*shinfo
;
393 if (skb
->encapsulation
)
394 skb
->inner_transport_header
= skb
->transport_header
;
396 skb
->csum_start
= (unsigned char *)th
- skb
->head
;
397 skb
->csum_offset
= offsetof(struct tcphdr
, check
);
398 skb
->ip_summed
= CHECKSUM_PARTIAL
;
400 shinfo
= skb_shinfo(skb
);
401 shinfo
->gso_segs
= NAPI_GRO_CB(skb
)->count
;
404 shinfo
->gso_type
|= SKB_GSO_TCP_ECN
;
406 EXPORT_SYMBOL(tcp_gro_complete
);
408 static void tcp4_check_fraglist_gro(struct list_head
*head
, struct sk_buff
*skb
,
411 const struct iphdr
*iph
;
417 if (likely(!(skb
->dev
->features
& NETIF_F_GRO_FRAGLIST
)))
420 p
= tcp_gro_lookup(head
, th
);
422 NAPI_GRO_CB(skb
)->is_flist
= NAPI_GRO_CB(p
)->is_flist
;
426 inet_get_iif_sdif(skb
, &iif
, &sdif
);
427 iph
= skb_gro_network_header(skb
);
428 net
= dev_net(skb
->dev
);
429 sk
= __inet_lookup_established(net
, net
->ipv4
.tcp_death_row
.hashinfo
,
430 iph
->saddr
, th
->source
,
431 iph
->daddr
, ntohs(th
->dest
),
433 NAPI_GRO_CB(skb
)->is_flist
= !sk
;
438 INDIRECT_CALLABLE_SCOPE
439 struct sk_buff
*tcp4_gro_receive(struct list_head
*head
, struct sk_buff
*skb
)
443 /* Don't bother verifying checksum if we're going to flush anyway. */
444 if (!NAPI_GRO_CB(skb
)->flush
&&
445 skb_gro_checksum_validate(skb
, IPPROTO_TCP
,
446 inet_gro_compute_pseudo
))
449 th
= tcp_gro_pull_header(skb
);
453 tcp4_check_fraglist_gro(head
, skb
, th
);
455 return tcp_gro_receive(head
, skb
, th
);
458 NAPI_GRO_CB(skb
)->flush
= 1;
462 INDIRECT_CALLABLE_SCOPE
int tcp4_gro_complete(struct sk_buff
*skb
, int thoff
)
464 const u16 offset
= NAPI_GRO_CB(skb
)->network_offsets
[skb
->encapsulation
];
465 const struct iphdr
*iph
= (struct iphdr
*)(skb
->data
+ offset
);
466 struct tcphdr
*th
= tcp_hdr(skb
);
468 if (unlikely(NAPI_GRO_CB(skb
)->is_flist
)) {
469 skb_shinfo(skb
)->gso_type
|= SKB_GSO_FRAGLIST
| SKB_GSO_TCPV4
;
470 skb_shinfo(skb
)->gso_segs
= NAPI_GRO_CB(skb
)->count
;
472 __skb_incr_checksum_unnecessary(skb
);
477 th
->check
= ~tcp_v4_check(skb
->len
- thoff
, iph
->saddr
,
480 skb_shinfo(skb
)->gso_type
|= SKB_GSO_TCPV4
|
481 (NAPI_GRO_CB(skb
)->ip_fixedid
* SKB_GSO_TCP_FIXEDID
);
483 tcp_gro_complete(skb
);
487 int __init
tcpv4_offload_init(void)
489 net_hotdata
.tcpv4_offload
= (struct net_offload
) {
491 .gso_segment
= tcp4_gso_segment
,
492 .gro_receive
= tcp4_gro_receive
,
493 .gro_complete
= tcp4_gro_complete
,
496 return inet_add_offload(&net_hotdata
.tcpv4_offload
, IPPROTO_TCP
);