1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef __BPF_TCP_HELPERS_H
3 #define __BPF_TCP_HELPERS_H
6 #include <linux/types.h>
7 #include <bpf/bpf_helpers.h>
8 #include <bpf/bpf_core_read.h>
9 #include "bpf_trace_helpers.h"
11 #define BPF_STRUCT_OPS(name, args...) \
12 SEC("struct_ops/"#name) \
15 #define tcp_jiffies32 ((__u32)bpf_jiffies64())
18 unsigned char skc_state
;
19 } __attribute__((preserve_access_index
));
28 struct sock_common __sk_common
;
29 unsigned long sk_pacing_rate
;
30 __u32 sk_pacing_status
; /* see enum sk_pacing */
31 } __attribute__((preserve_access_index
));
35 } __attribute__((preserve_access_index
));
37 struct inet_connection_sock
{
38 struct inet_sock icsk_inet
;
45 __u64 icsk_ca_priv
[104 / sizeof(__u64
)];
46 } __attribute__((preserve_access_index
));
49 struct inet_connection_sock inet_conn
;
61 __u8 syn_data
:1, /* SYN includes data */
62 syn_fastopen
:1, /* SYN includes Fast Open option */
63 syn_fastopen_exp
:1,/* SYN includes Fast Open exp. option */
64 syn_fastopen_ch
:1, /* Active TFO re-enabling probe */
65 syn_data_acked
:1,/* data in SYN is acked by SYN-ACK */
66 save_syn
:1, /* Save headers of SYN packet */
67 is_cwnd_limited
:1,/* forward progress limited by snd_cwnd? */
68 syn_smc
:1; /* SYN includes SMC */
69 __u32 max_packets_out
;
72 __u64 tcp_mstamp
; /* most recent packet received/sent */
73 } __attribute__((preserve_access_index
));
75 static __always_inline
struct inet_connection_sock
*inet_csk(const struct sock
*sk
)
77 return (struct inet_connection_sock
*)sk
;
80 static __always_inline
void *inet_csk_ca(const struct sock
*sk
)
82 return (void *)inet_csk(sk
)->icsk_ca_priv
;
85 static __always_inline
struct tcp_sock
*tcp_sk(const struct sock
*sk
)
87 return (struct tcp_sock
*)sk
;
90 static __always_inline
bool before(__u32 seq1
, __u32 seq2
)
92 return (__s32
)(seq1
-seq2
) < 0;
94 #define after(seq2, seq1) before(seq1, seq2)
97 #define TCP_ECN_QUEUE_CWR 2
98 #define TCP_ECN_DEMAND_CWR 4
99 #define TCP_ECN_SEEN 8
101 enum inet_csk_ack_state_t
{
105 ICSK_ACK_PUSHED2
= 8,
106 ICSK_ACK_NOW
= 16 /* Send the next ACK immediately (once) */
110 CA_EVENT_TX_START
= 0,
111 CA_EVENT_CWND_RESTART
= 1,
112 CA_EVENT_COMPLETE_CWR
= 2,
114 CA_EVENT_ECN_NO_CE
= 4,
115 CA_EVENT_ECN_IS_CE
= 5,
130 } __attribute__((preserve_access_index
));
133 __u64 prior_mstamp
; /* starting timestamp for interval */
134 __u32 prior_delivered
; /* tp->delivered at "prior_mstamp" */
135 __s32 delivered
; /* number of packets delivered over interval */
136 long interval_us
; /* time for tp->delivered to incr "delivered" */
137 __u32 snd_interval_us
; /* snd interval for delivered packets */
138 __u32 rcv_interval_us
; /* rcv interval for delivered packets */
139 long rtt_us
; /* RTT of last (S)ACKed packet (or -1) */
140 int losses
; /* number of packets marked lost upon ACK */
141 __u32 acked_sacked
; /* number of packets newly (S)ACKed upon ACK */
142 __u32 prior_in_flight
; /* in flight before this ACK */
143 bool is_app_limited
; /* is sample from packet with bubble in pipe? */
144 bool is_retrans
; /* is sample from retransmission? */
145 bool is_ack_delayed
; /* is this (likely) a delayed ACK? */
146 } __attribute__((preserve_access_index
));
148 #define TCP_CA_NAME_MAX 16
149 #define TCP_CONG_NEEDS_ECN 0x2
151 struct tcp_congestion_ops
{
152 char name
[TCP_CA_NAME_MAX
];
155 /* initialize private data (optional) */
156 void (*init
)(struct sock
*sk
);
157 /* cleanup private data (optional) */
158 void (*release
)(struct sock
*sk
);
160 /* return slow start threshold (required) */
161 __u32 (*ssthresh
)(struct sock
*sk
);
162 /* do new cwnd calculation (required) */
163 void (*cong_avoid
)(struct sock
*sk
, __u32 ack
, __u32 acked
);
164 /* call before changing ca_state (optional) */
165 void (*set_state
)(struct sock
*sk
, __u8 new_state
);
166 /* call when cwnd event occurs (optional) */
167 void (*cwnd_event
)(struct sock
*sk
, enum tcp_ca_event ev
);
168 /* call when ack arrives (optional) */
169 void (*in_ack_event
)(struct sock
*sk
, __u32 flags
);
170 /* new value of cwnd after loss (required) */
171 __u32 (*undo_cwnd
)(struct sock
*sk
);
172 /* hook for packet ack accounting (optional) */
173 void (*pkts_acked
)(struct sock
*sk
, const struct ack_sample
*sample
);
174 /* override sysctl_tcp_min_tso_segs */
175 __u32 (*min_tso_segs
)(struct sock
*sk
);
176 /* returns the multiplier used in tcp_sndbuf_expand (optional) */
177 __u32 (*sndbuf_expand
)(struct sock
*sk
);
178 /* call when packets are delivered to update cwnd and pacing rate,
179 * after all the ca_state processing. (optional)
181 void (*cong_control
)(struct sock
*sk
, const struct rate_sample
*rs
);
184 #define min(a, b) ((a) < (b) ? (a) : (b))
185 #define max(a, b) ((a) > (b) ? (a) : (b))
186 #define min_not_zero(x, y) ({ \
187 typeof(x) __x = (x); \
188 typeof(y) __y = (y); \
189 __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
191 static __always_inline __u32
tcp_slow_start(struct tcp_sock
*tp
, __u32 acked
)
193 __u32 cwnd
= min(tp
->snd_cwnd
+ acked
, tp
->snd_ssthresh
);
195 acked
-= cwnd
- tp
->snd_cwnd
;
196 tp
->snd_cwnd
= min(cwnd
, tp
->snd_cwnd_clamp
);
201 static __always_inline
bool tcp_in_slow_start(const struct tcp_sock
*tp
)
203 return tp
->snd_cwnd
< tp
->snd_ssthresh
;
206 static __always_inline
bool tcp_is_cwnd_limited(const struct sock
*sk
)
208 const struct tcp_sock
*tp
= tcp_sk(sk
);
210 /* If in slow start, ensure cwnd grows to twice what was ACKed. */
211 if (tcp_in_slow_start(tp
))
212 return tp
->snd_cwnd
< 2 * tp
->max_packets_out
;
214 return !!BPF_CORE_READ_BITFIELD(tp
, is_cwnd_limited
);
217 static __always_inline
void tcp_cong_avoid_ai(struct tcp_sock
*tp
, __u32 w
, __u32 acked
)
219 /* If credits accumulated at a higher w, apply them gently now. */
220 if (tp
->snd_cwnd_cnt
>= w
) {
221 tp
->snd_cwnd_cnt
= 0;
225 tp
->snd_cwnd_cnt
+= acked
;
226 if (tp
->snd_cwnd_cnt
>= w
) {
227 __u32 delta
= tp
->snd_cwnd_cnt
/ w
;
229 tp
->snd_cwnd_cnt
-= delta
* w
;
230 tp
->snd_cwnd
+= delta
;
232 tp
->snd_cwnd
= min(tp
->snd_cwnd
, tp
->snd_cwnd_clamp
);