2 * H-TCP congestion control. The algorithm is detailed in:
3 * R.N.Shorten, D.J.Leith:
4 * "H-TCP: TCP for high-speed and long-distance networks"
5 * Proc. PFLDnet, Argonne, 2004.
6 * http://www.hamilton.ie/net/htcp3.pdf
10 #include <linux/module.h>
13 #define ALPHA_BASE (1<<7) /* 1.0 with shift << 7 */
14 #define BETA_MIN (1<<6) /* 0.5 with shift << 7 */
15 #define BETA_MAX 102 /* 0.8 with shift << 7 */
17 static int use_rtt_scaling __read_mostly
= 1;
18 module_param(use_rtt_scaling
, int, 0644);
19 MODULE_PARM_DESC(use_rtt_scaling
, "turn on/off RTT scaling");
21 static int use_bandwidth_switch __read_mostly
= 1;
22 module_param(use_bandwidth_switch
, int, 0644);
23 MODULE_PARM_DESC(use_bandwidth_switch
, "turn on/off bandwidth switcher");
26 u32 alpha
; /* Fixed point arith, << 7 */
27 u8 beta
; /* Fixed point arith, << 7 */
28 u8 modeswitch
; /* Delay modeswitch
29 until we had at least one congestion event */
34 u32 last_cong
; /* Time since last congestion event end */
40 /* Bandwidth estimation */
48 static inline u32
htcp_cong_time(const struct htcp
*ca
)
50 return jiffies
- ca
->last_cong
;
53 static inline u32
htcp_ccount(const struct htcp
*ca
)
55 return htcp_cong_time(ca
) / ca
->minRTT
;
58 static inline void htcp_reset(struct htcp
*ca
)
60 ca
->undo_last_cong
= ca
->last_cong
;
61 ca
->undo_maxRTT
= ca
->maxRTT
;
62 ca
->undo_old_maxB
= ca
->old_maxB
;
64 ca
->last_cong
= jiffies
;
67 static u32
htcp_cwnd_undo(struct sock
*sk
)
69 const struct tcp_sock
*tp
= tcp_sk(sk
);
70 struct htcp
*ca
= inet_csk_ca(sk
);
72 if (ca
->undo_last_cong
) {
73 ca
->last_cong
= ca
->undo_last_cong
;
74 ca
->maxRTT
= ca
->undo_maxRTT
;
75 ca
->old_maxB
= ca
->undo_old_maxB
;
76 ca
->undo_last_cong
= 0;
79 return max(tp
->snd_cwnd
, (tp
->snd_ssthresh
<< 7) / ca
->beta
);
82 static inline void measure_rtt(struct sock
*sk
, u32 srtt
)
84 const struct inet_connection_sock
*icsk
= inet_csk(sk
);
85 struct htcp
*ca
= inet_csk_ca(sk
);
87 /* keep track of minimum RTT seen so far, minRTT is zero at first */
88 if (ca
->minRTT
> srtt
|| !ca
->minRTT
)
92 if (icsk
->icsk_ca_state
== TCP_CA_Open
) {
93 if (ca
->maxRTT
< ca
->minRTT
)
94 ca
->maxRTT
= ca
->minRTT
;
95 if (ca
->maxRTT
< srtt
&&
96 srtt
<= ca
->maxRTT
+ msecs_to_jiffies(20))
101 static void measure_achieved_throughput(struct sock
*sk
,
102 const struct ack_sample
*sample
)
104 const struct inet_connection_sock
*icsk
= inet_csk(sk
);
105 const struct tcp_sock
*tp
= tcp_sk(sk
);
106 struct htcp
*ca
= inet_csk_ca(sk
);
107 u32 now
= tcp_time_stamp
;
109 if (icsk
->icsk_ca_state
== TCP_CA_Open
)
110 ca
->pkts_acked
= sample
->pkts_acked
;
112 if (sample
->rtt_us
> 0)
113 measure_rtt(sk
, usecs_to_jiffies(sample
->rtt_us
));
115 if (!use_bandwidth_switch
)
118 /* achieved throughput calculations */
119 if (!((1 << icsk
->icsk_ca_state
) & (TCPF_CA_Open
| TCPF_CA_Disorder
))) {
125 ca
->packetcount
+= sample
->pkts_acked
;
127 if (ca
->packetcount
>= tp
->snd_cwnd
- (ca
->alpha
>> 7 ? : 1) &&
128 now
- ca
->lasttime
>= ca
->minRTT
&&
130 __u32 cur_Bi
= ca
->packetcount
* HZ
/ (now
- ca
->lasttime
);
132 if (htcp_ccount(ca
) <= 3) {
133 /* just after backoff */
134 ca
->minB
= ca
->maxB
= ca
->Bi
= cur_Bi
;
136 ca
->Bi
= (3 * ca
->Bi
+ cur_Bi
) / 4;
137 if (ca
->Bi
> ca
->maxB
)
139 if (ca
->minB
> ca
->maxB
)
147 static inline void htcp_beta_update(struct htcp
*ca
, u32 minRTT
, u32 maxRTT
)
149 if (use_bandwidth_switch
) {
151 u32 old_maxB
= ca
->old_maxB
;
153 ca
->old_maxB
= ca
->maxB
;
154 if (!between(5 * maxB
, 4 * old_maxB
, 6 * old_maxB
)) {
161 if (ca
->modeswitch
&& minRTT
> msecs_to_jiffies(10) && maxRTT
) {
162 ca
->beta
= (minRTT
<< 7) / maxRTT
;
163 if (ca
->beta
< BETA_MIN
)
165 else if (ca
->beta
> BETA_MAX
)
173 static inline void htcp_alpha_update(struct htcp
*ca
)
175 u32 minRTT
= ca
->minRTT
;
177 u32 diff
= htcp_cong_time(ca
);
181 factor
= 1 + (10 * diff
+ ((diff
/ 2) * (diff
/ 2) / HZ
)) / HZ
;
184 if (use_rtt_scaling
&& minRTT
) {
185 u32 scale
= (HZ
<< 3) / (10 * minRTT
);
187 /* clamping ratio to interval [0.5,10]<<3 */
188 scale
= min(max(scale
, 1U << 2), 10U << 3);
189 factor
= (factor
<< 3) / scale
;
194 ca
->alpha
= 2 * factor
* ((1 << 7) - ca
->beta
);
196 ca
->alpha
= ALPHA_BASE
;
200 * After we have the rtt data to calculate beta, we'd still prefer to wait one
201 * rtt before we adjust our beta to ensure we are working from a consistent
204 * This function should be called when we hit a congestion event since only at
205 * that point do we really have a real sense of maxRTT (the queues en route
206 * were getting just too full now).
208 static void htcp_param_update(struct sock
*sk
)
210 struct htcp
*ca
= inet_csk_ca(sk
);
211 u32 minRTT
= ca
->minRTT
;
212 u32 maxRTT
= ca
->maxRTT
;
214 htcp_beta_update(ca
, minRTT
, maxRTT
);
215 htcp_alpha_update(ca
);
217 /* add slowly fading memory for maxRTT to accommodate routing changes */
218 if (minRTT
> 0 && maxRTT
> minRTT
)
219 ca
->maxRTT
= minRTT
+ ((maxRTT
- minRTT
) * 95) / 100;
222 static u32
htcp_recalc_ssthresh(struct sock
*sk
)
224 const struct tcp_sock
*tp
= tcp_sk(sk
);
225 const struct htcp
*ca
= inet_csk_ca(sk
);
227 htcp_param_update(sk
);
228 return max((tp
->snd_cwnd
* ca
->beta
) >> 7, 2U);
231 static void htcp_cong_avoid(struct sock
*sk
, u32 ack
, u32 acked
)
233 struct tcp_sock
*tp
= tcp_sk(sk
);
234 struct htcp
*ca
= inet_csk_ca(sk
);
236 if (!tcp_is_cwnd_limited(sk
))
239 if (tcp_in_slow_start(tp
))
240 tcp_slow_start(tp
, acked
);
242 /* In dangerous area, increase slowly.
243 * In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd
245 if ((tp
->snd_cwnd_cnt
* ca
->alpha
)>>7 >= tp
->snd_cwnd
) {
246 if (tp
->snd_cwnd
< tp
->snd_cwnd_clamp
)
248 tp
->snd_cwnd_cnt
= 0;
249 htcp_alpha_update(ca
);
251 tp
->snd_cwnd_cnt
+= ca
->pkts_acked
;
257 static void htcp_init(struct sock
*sk
)
259 struct htcp
*ca
= inet_csk_ca(sk
);
261 memset(ca
, 0, sizeof(struct htcp
));
262 ca
->alpha
= ALPHA_BASE
;
265 ca
->last_cong
= jiffies
;
268 static void htcp_state(struct sock
*sk
, u8 new_state
)
273 struct htcp
*ca
= inet_csk_ca(sk
);
275 if (ca
->undo_last_cong
) {
276 ca
->last_cong
= jiffies
;
277 ca
->undo_last_cong
= 0;
282 case TCP_CA_Recovery
:
284 htcp_reset(inet_csk_ca(sk
));
289 static struct tcp_congestion_ops htcp __read_mostly
= {
291 .ssthresh
= htcp_recalc_ssthresh
,
292 .cong_avoid
= htcp_cong_avoid
,
293 .set_state
= htcp_state
,
294 .undo_cwnd
= htcp_cwnd_undo
,
295 .pkts_acked
= measure_achieved_throughput
,
296 .owner
= THIS_MODULE
,
300 static int __init
htcp_register(void)
302 BUILD_BUG_ON(sizeof(struct htcp
) > ICSK_CA_PRIV_SIZE
);
303 BUILD_BUG_ON(BETA_MIN
>= BETA_MAX
);
304 return tcp_register_congestion_control(&htcp
);
307 static void __exit
htcp_unregister(void)
309 tcp_unregister_congestion_control(&htcp
);
312 module_init(htcp_register
);
313 module_exit(htcp_unregister
);
315 MODULE_AUTHOR("Baruch Even");
316 MODULE_LICENSE("GPL");
317 MODULE_DESCRIPTION("H-TCP");