1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef __BPF_TCP_HELPERS_H
3 #define __BPF_TCP_HELPERS_H
6 #include <linux/types.h>
7 #include <bpf/bpf_helpers.h>
8 #include <bpf/bpf_core_read.h>
9 #include <bpf/bpf_tracing.h>
11 #define BPF_STRUCT_OPS(name, args...) \
12 SEC("struct_ops/"#name) \
15 #define tcp_jiffies32 ((__u32)bpf_jiffies64())
18 unsigned char skc_state
;
20 } __attribute__((preserve_access_index
));
29 struct sock_common __sk_common
;
30 unsigned long sk_pacing_rate
;
31 __u32 sk_pacing_status
; /* see enum sk_pacing */
32 } __attribute__((preserve_access_index
));
36 } __attribute__((preserve_access_index
));
38 struct inet_connection_sock
{
39 struct inet_sock icsk_inet
;
46 __u64 icsk_ca_priv
[104 / sizeof(__u64
)];
47 } __attribute__((preserve_access_index
));
50 struct sock_common __req_common
;
51 } __attribute__((preserve_access_index
));
54 struct inet_connection_sock inet_conn
;
67 __u8 syn_data
:1, /* SYN includes data */
68 syn_fastopen
:1, /* SYN includes Fast Open option */
69 syn_fastopen_exp
:1,/* SYN includes Fast Open exp. option */
70 syn_fastopen_ch
:1, /* Active TFO re-enabling probe */
71 syn_data_acked
:1,/* data in SYN is acked by SYN-ACK */
72 save_syn
:1, /* Save headers of SYN packet */
73 is_cwnd_limited
:1,/* forward progress limited by snd_cwnd? */
74 syn_smc
:1; /* SYN includes SMC */
75 __u32 max_packets_out
;
78 __u64 tcp_mstamp
; /* most recent packet received/sent */
79 } __attribute__((preserve_access_index
));
81 static __always_inline
struct inet_connection_sock
*inet_csk(const struct sock
*sk
)
83 return (struct inet_connection_sock
*)sk
;
86 static __always_inline
void *inet_csk_ca(const struct sock
*sk
)
88 return (void *)inet_csk(sk
)->icsk_ca_priv
;
91 static __always_inline
struct tcp_sock
*tcp_sk(const struct sock
*sk
)
93 return (struct tcp_sock
*)sk
;
96 static __always_inline
bool before(__u32 seq1
, __u32 seq2
)
98 return (__s32
)(seq1
-seq2
) < 0;
100 #define after(seq2, seq1) before(seq1, seq2)
103 #define TCP_ECN_QUEUE_CWR 2
104 #define TCP_ECN_DEMAND_CWR 4
105 #define TCP_ECN_SEEN 8
107 enum inet_csk_ack_state_t
{
111 ICSK_ACK_PUSHED2
= 8,
112 ICSK_ACK_NOW
= 16 /* Send the next ACK immediately (once) */
116 CA_EVENT_TX_START
= 0,
117 CA_EVENT_CWND_RESTART
= 1,
118 CA_EVENT_COMPLETE_CWR
= 2,
120 CA_EVENT_ECN_NO_CE
= 4,
121 CA_EVENT_ECN_IS_CE
= 5,
128 } __attribute__((preserve_access_index
));
131 __u64 prior_mstamp
; /* starting timestamp for interval */
132 __u32 prior_delivered
; /* tp->delivered at "prior_mstamp" */
133 __s32 delivered
; /* number of packets delivered over interval */
134 long interval_us
; /* time for tp->delivered to incr "delivered" */
135 __u32 snd_interval_us
; /* snd interval for delivered packets */
136 __u32 rcv_interval_us
; /* rcv interval for delivered packets */
137 long rtt_us
; /* RTT of last (S)ACKed packet (or -1) */
138 int losses
; /* number of packets marked lost upon ACK */
139 __u32 acked_sacked
; /* number of packets newly (S)ACKed upon ACK */
140 __u32 prior_in_flight
; /* in flight before this ACK */
141 bool is_app_limited
; /* is sample from packet with bubble in pipe? */
142 bool is_retrans
; /* is sample from retransmission? */
143 bool is_ack_delayed
; /* is this (likely) a delayed ACK? */
144 } __attribute__((preserve_access_index
));
146 #define TCP_CA_NAME_MAX 16
147 #define TCP_CONG_NEEDS_ECN 0x2
149 struct tcp_congestion_ops
{
150 char name
[TCP_CA_NAME_MAX
];
153 /* initialize private data (optional) */
154 void (*init
)(struct sock
*sk
);
155 /* cleanup private data (optional) */
156 void (*release
)(struct sock
*sk
);
158 /* return slow start threshold (required) */
159 __u32 (*ssthresh
)(struct sock
*sk
);
160 /* do new cwnd calculation (required) */
161 void (*cong_avoid
)(struct sock
*sk
, __u32 ack
, __u32 acked
);
162 /* call before changing ca_state (optional) */
163 void (*set_state
)(struct sock
*sk
, __u8 new_state
);
164 /* call when cwnd event occurs (optional) */
165 void (*cwnd_event
)(struct sock
*sk
, enum tcp_ca_event ev
);
166 /* call when ack arrives (optional) */
167 void (*in_ack_event
)(struct sock
*sk
, __u32 flags
);
168 /* new value of cwnd after loss (required) */
169 __u32 (*undo_cwnd
)(struct sock
*sk
);
170 /* hook for packet ack accounting (optional) */
171 void (*pkts_acked
)(struct sock
*sk
, const struct ack_sample
*sample
);
172 /* override sysctl_tcp_min_tso_segs */
173 __u32 (*min_tso_segs
)(struct sock
*sk
);
174 /* returns the multiplier used in tcp_sndbuf_expand (optional) */
175 __u32 (*sndbuf_expand
)(struct sock
*sk
);
176 /* call when packets are delivered to update cwnd and pacing rate,
177 * after all the ca_state processing. (optional)
179 void (*cong_control
)(struct sock
*sk
, const struct rate_sample
*rs
);
182 #define min(a, b) ((a) < (b) ? (a) : (b))
183 #define max(a, b) ((a) > (b) ? (a) : (b))
184 #define min_not_zero(x, y) ({ \
185 typeof(x) __x = (x); \
186 typeof(y) __y = (y); \
187 __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
189 static __always_inline __u32
tcp_slow_start(struct tcp_sock
*tp
, __u32 acked
)
191 __u32 cwnd
= min(tp
->snd_cwnd
+ acked
, tp
->snd_ssthresh
);
193 acked
-= cwnd
- tp
->snd_cwnd
;
194 tp
->snd_cwnd
= min(cwnd
, tp
->snd_cwnd_clamp
);
199 static __always_inline
bool tcp_in_slow_start(const struct tcp_sock
*tp
)
201 return tp
->snd_cwnd
< tp
->snd_ssthresh
;
204 static __always_inline
bool tcp_is_cwnd_limited(const struct sock
*sk
)
206 const struct tcp_sock
*tp
= tcp_sk(sk
);
208 /* If in slow start, ensure cwnd grows to twice what was ACKed. */
209 if (tcp_in_slow_start(tp
))
210 return tp
->snd_cwnd
< 2 * tp
->max_packets_out
;
212 return !!BPF_CORE_READ_BITFIELD(tp
, is_cwnd_limited
);
215 static __always_inline
void tcp_cong_avoid_ai(struct tcp_sock
*tp
, __u32 w
, __u32 acked
)
217 /* If credits accumulated at a higher w, apply them gently now. */
218 if (tp
->snd_cwnd_cnt
>= w
) {
219 tp
->snd_cwnd_cnt
= 0;
223 tp
->snd_cwnd_cnt
+= acked
;
224 if (tp
->snd_cwnd_cnt
>= w
) {
225 __u32 delta
= tp
->snd_cwnd_cnt
/ w
;
227 tp
->snd_cwnd_cnt
-= delta
* w
;
228 tp
->snd_cwnd
+= delta
;
230 tp
->snd_cwnd
= min(tp
->snd_cwnd
, tp
->snd_cwnd_clamp
);