3 * SPDX-License-Identifier: BSD-3-Clause
5 * Copyright (c) 2018-2020
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * Author: Randall Stewart <rrs@netflix.com>
33 #ifndef __tcp_ratelimit_h__
34 #define __tcp_ratelimit_h__
38 #define RL_MIN_DIVISOR 50
39 #define RL_DEFAULT_DIVISOR 1000
41 /* Flags on an individual rate */
42 #define HDWRPACE_INITED 0x0001
43 #define HDWRPACE_TAGPRESENT 0x0002
44 #define HDWRPACE_IFPDEPARTED 0x0004
45 struct tcp_hwrate_limit_table
{
46 const struct tcp_rate_set
*ptbl
; /* Pointer to parent table */
47 struct m_snd_tag
*tag
; /* Send tag if needed (chelsio) */
48 long rate
; /* Rate we get in Bytes per second (Bps) */
49 long using; /* How many flows are using this hdwr rate. */
51 uint32_t time_between
; /* Time-Gap between packets at this rate */
56 #define RS_IS_DEFF 0x0001 /* Its a lagg, do a double lookup */
57 #define RS_IS_INTF 0x0002 /* Its a plain interface */
58 #define RS_NO_PRE 0x0004 /* The interfacd has set rates */
59 #define RS_INT_TBL 0x0010 /*
60 * The table is the internal version
61 * which has special setup requirements.
63 #define RS_IS_DEAD 0x0020 /* The RS is dead list */
64 #define RS_FUNERAL_SCHD 0x0040 /* Is a epoch call scheduled to bury this guy?*/
65 #define RS_INTF_NO_SUP 0x0100 /* The interface does not support the ratelimiting */
68 struct sysctl_ctx_list sysctl_ctx
;
69 CK_LIST_ENTRY(tcp_rate_set
) next
;
71 struct tcp_hwrate_limit_table
*rs_rlt
;
72 uint64_t rs_flows_using
;
73 uint64_t rs_flow_limit
;
81 struct epoch_context rs_epoch_ctx
;
84 CK_LIST_HEAD(head_tcp_rate_set
, tcp_rate_set
);
87 #define RS_PACING_EXACT_MATCH 0x0001 /* Need an exact match for rate */
88 #define RS_PACING_GT 0x0002 /* Greater than requested */
89 #define RS_PACING_GEQ 0x0004 /* Greater than or equal too */
90 #define RS_PACING_LT 0x0008 /* Less than requested rate */
91 #define RS_PACING_SUB_OK 0x0010 /* If a rate can't be found get the
92 * next best rate (highest or lowest). */
94 #ifndef ETHERNET_SEGMENT_SIZE
95 #define ETHERNET_SEGMENT_SIZE 1514
100 #define DETAILED_RATELIMIT_SYSCTL 1 /*
101 * Undefine this if you don't want
102 * detailed rates to appear in
104 * With the defintion each rate
105 * shows up in your sysctl tree
109 tcp_hw_highest_rate(const struct tcp_hwrate_limit_table
*rle
)
111 return (rle
->ptbl
->rs_rlt
[rle
->ptbl
->rs_highest_valid
].rate
);
115 tcp_hw_highest_rate_ifp(struct ifnet
*ifp
, struct inpcb
*inp
);
117 const struct tcp_hwrate_limit_table
*
118 tcp_set_pacing_rate(struct tcpcb
*tp
, struct ifnet
*ifp
,
119 uint64_t bytes_per_sec
, int flags
, int *error
, uint64_t *lower_rate
);
121 const struct tcp_hwrate_limit_table
*
122 tcp_chg_pacing_rate(const struct tcp_hwrate_limit_table
*crte
,
123 struct tcpcb
*tp
, struct ifnet
*ifp
,
124 uint64_t bytes_per_sec
, int flags
, int *error
, uint64_t *lower_rate
);
126 tcp_rel_pacing_rate(const struct tcp_hwrate_limit_table
*crte
,
130 tcp_get_pacing_burst_size_w_divisor(struct tcpcb
*tp
, uint64_t bw
, uint32_t segsiz
, int can_use_1mss
,
131 const struct tcp_hwrate_limit_table
*te
, int *err
, int divisor
);
134 tcp_rl_log_enobuf(const struct tcp_hwrate_limit_table
*rte
);
137 tcp_rl_release_ifnet(struct ifnet
*ifp
);
140 static inline const struct tcp_hwrate_limit_table
*
141 tcp_set_pacing_rate(struct tcpcb
*tp
, struct ifnet
*ifp
,
142 uint64_t bytes_per_sec
, int flags
, int *error
, uint64_t *lower_rate
)
149 static inline const struct tcp_hwrate_limit_table
*
150 tcp_chg_pacing_rate(const struct tcp_hwrate_limit_table
*crte
,
151 struct tcpcb
*tp
, struct ifnet
*ifp
,
152 uint64_t bytes_per_sec
, int flags
, int *error
, uint64_t *lower_rate
)
160 tcp_rel_pacing_rate(const struct tcp_hwrate_limit_table
*crte
,
166 static uint64_t inline
167 tcp_hw_highest_rate(const struct tcp_hwrate_limit_table
*rle
)
172 static uint64_t inline
173 tcp_hw_highest_rate_ifp(struct ifnet
*ifp
, struct inpcb
*inp
)
178 static inline uint32_t
179 tcp_get_pacing_burst_size_w_divisor(struct tcpcb
*tp
, uint64_t bw
, uint32_t segsiz
, int can_use_1mss
,
180 const struct tcp_hwrate_limit_table
*te
, int *err
, int divisor
)
183 * We use the google formula to calculate the
188 * tso = min(bw/(div=1000), 64k)
190 * Note for these calculations we ignore the
191 * packet overhead (enet hdr, ip hdr and tcp hdr).
192 * We only get the google formula when we have
193 * divisor = 1000, which is the default for now.
196 uint32_t new_tso
, min_tso_segs
;
198 /* It can't be zero */
199 if ((divisor
== 0) ||
200 (divisor
< RL_MIN_DIVISOR
)) {
201 bytes
= bw
/ RL_DEFAULT_DIVISOR
;
203 bytes
= bw
/ divisor
;
204 /* We can't ever send more than 65k in a TSO */
205 if (bytes
> 0xffff) {
209 new_tso
= (bytes
+ segsiz
- 1) / segsiz
;
214 if (new_tso
< min_tso_segs
)
215 new_tso
= min_tso_segs
;
220 /* Do nothing if RATELIMIT is not defined */
222 tcp_rl_log_enobuf(const struct tcp_hwrate_limit_table
*rte
)
227 tcp_rl_release_ifnet(struct ifnet
*ifp
)
233 * Given a b/w and a segsiz, and optional hardware
234 * rate limit, return the ideal size to burst
235 * out at once. Note the parameter can_use_1mss
236 * dictates if the transport will tolerate a 1mss
237 * limit, if not it will bottom out at 2mss (think
240 static inline uint32_t
241 tcp_get_pacing_burst_size(struct tcpcb
*tp
, uint64_t bw
, uint32_t segsiz
, int can_use_1mss
,
242 const struct tcp_hwrate_limit_table
*te
, int *err
)
245 return (tcp_get_pacing_burst_size_w_divisor(tp
, bw
, segsiz
,