sys/netinet/tcp_congctl.c

   1 /*      $NetBSD: tcp_congctl.c,v 1.14 2008/02/29 07:39:17 matt Exp $    */
   2
   3 /*-
   4  * Copyright (c) 1997, 1998, 1999, 2001, 2005, 2006 The NetBSD Foundation, Inc.
   5  * All rights reserved.
   6  *
   7  * This code is derived from software contributed to The NetBSD Foundation
   8  * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation
   9  * Facility, NASA Ames Research Center.
  10  * This code is derived from software contributed to The NetBSD Foundation
  11  * by Charles M. Hannum.
  12  * This code is derived from software contributed to The NetBSD Foundation
  13  * by Rui Paulo.
  14  *
  15  * Redistribution and use in source and binary forms, with or without
  16  * modification, are permitted provided that the following conditions
  17  * are met:
  18  * 1. Redistributions of source code must retain the above copyright
  19  *    notice, this list of conditions and the following disclaimer.
  20  * 2. Redistributions in binary form must reproduce the above copyright
  21  *    notice, this list of conditions and the following disclaimer in the
  22  *    documentation and/or other materials provided with the distribution.
  23  *
  24  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  25  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  26  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  27  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  28  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  30  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  31  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  32  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  33  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  34  * POSSIBILITY OF SUCH DAMAGE.
  35  */
  36
  37 /*
  38  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  39  * All rights reserved.
  40  *
  41  * Redistribution and use in source and binary forms, with or without
  42  * modification, are permitted provided that the following conditions
  43  * are met:
  44  * 1. Redistributions of source code must retain the above copyright
  45  *    notice, this list of conditions and the following disclaimer.
  46  * 2. Redistributions in binary form must reproduce the above copyright
  47  *    notice, this list of conditions and the following disclaimer in the
  48  *    documentation and/or other materials provided with the distribution.
  49  * 3. Neither the name of the project nor the names of its contributors
  50  *    may be used to endorse or promote products derived from this software
  51  *    without specific prior written permission.
  52  *
  53  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  63  * SUCH DAMAGE.
  64  */
  65
  66 /*
  67  *      @(#)COPYRIGHT   1.1 (NRL) 17 January 1995
  68  *
  69  * NRL grants permission for redistribution and use in source and binary
  70  * forms, with or without modification, of the software and documentation
  71  * created at NRL provided that the following conditions are met:
  72  *
  73  * 1. Redistributions of source code must retain the above copyright
  74  *    notice, this list of conditions and the following disclaimer.
  75  * 2. Redistributions in binary form must reproduce the above copyright
  76  *    notice, this list of conditions and the following disclaimer in the
  77  *    documentation and/or other materials provided with the distribution.
  78  * 3. All advertising materials mentioning features or use of this software
  79  *    must display the following acknowledgements:
  80  *      This product includes software developed by the University of
  81  *      California, Berkeley and its contributors.
  82  *      This product includes software developed at the Information
  83  *      Technology Division, US Naval Research Laboratory.
  84  * 4. Neither the name of the NRL nor the names of its contributors
  85  *    may be used to endorse or promote products derived from this software
  86  *    without specific prior written permission.
  87  *
  88  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
  89  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  90  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
  91  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
  92  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  93  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  94  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  95  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  96  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  97  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  98  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  99  *
 100  * The views and conclusions contained in the software and documentation
 101  * are those of the authors and should not be interpreted as representing
 102  * official policies, either expressed or implied, of the US Naval
 103  * Research Laboratory (NRL).
 104  */
 105
 106 /*
 107  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
 108  *      The Regents of the University of California.  All rights reserved.
 109  *
 110  * Redistribution and use in source and binary forms, with or without
 111  * modification, are permitted provided that the following conditions
 112  * are met:
 113  * 1. Redistributions of source code must retain the above copyright
 114  *    notice, this list of conditions and the following disclaimer.
 115  * 2. Redistributions in binary form must reproduce the above copyright
 116  *    notice, this list of conditions and the following disclaimer in the
 117  *    documentation and/or other materials provided with the distribution.
 118  * 3. Neither the name of the University nor the names of its contributors
 119  *    may be used to endorse or promote products derived from this software
 120  *    without specific prior written permission.
 121  *
 122  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 123  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 124  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 125  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 126  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 127  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 128  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 129  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 130  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 131  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 132  * SUCH DAMAGE.
 133  *
 134  *      @(#)tcp_input.c 8.12 (Berkeley) 5/24/95
 135  */
 136
 137 #include <sys/cdefs.h>
 138 __KERNEL_RCSID(0, "$NetBSD: tcp_congctl.c,v 1.14 2008/02/29 07:39:17 matt Exp $");
 139
 140 #include "opt_inet.h"
 141 #include "opt_tcp_debug.h"
 142 #include "opt_tcp_congctl.h"
 143
 144 #include <sys/param.h>
 145 #include <sys/systm.h>
 146 #include <sys/malloc.h>
 147 #include <sys/mbuf.h>
 148 #include <sys/protosw.h>
 149 #include <sys/socket.h>
 150 #include <sys/socketvar.h>
 151 #include <sys/errno.h>
 152 #include <sys/syslog.h>
 153 #include <sys/pool.h>
 154 #include <sys/domain.h>
 155 #include <sys/kernel.h>
 156 #include <sys/mutex.h>
 157
 158 #include <net/if.h>
 159 #include <net/route.h>
 160
 161 #include <netinet/in.h>
 162 #include <netinet/in_systm.h>
 163 #include <netinet/ip.h>
 164 #include <netinet/in_pcb.h>
 165 #include <netinet/in_var.h>
 166 #include <netinet/ip_var.h>
 167
 168 #ifdef INET6
 169 #ifndef INET
 170 #include <netinet/in.h>
 171 #endif
 172 #include <netinet/ip6.h>
 173 #include <netinet6/ip6_var.h>
 174 #include <netinet6/in6_pcb.h>
 175 #include <netinet6/ip6_var.h>
 176 #include <netinet6/in6_var.h>
 177 #include <netinet/icmp6.h>
 178 #include <netinet6/nd6.h>
 179 #endif
 180
 181 #include <netinet/tcp.h>
 182 #include <netinet/tcp_fsm.h>
 183 #include <netinet/tcp_seq.h>
 184 #include <netinet/tcp_timer.h>
 185 #include <netinet/tcp_var.h>
 186 #include <netinet/tcpip.h>
 187 #include <netinet/tcp_congctl.h>
 188 #ifdef TCP_DEBUG
 189 #include <netinet/tcp_debug.h>
 190 #endif
 191
 192 /*
 193  * TODO:
 194  *   consider separating the actual implementations in another file.
 195  */
 196
 197 static int  tcp_reno_fast_retransmit(struct tcpcb *, const struct tcphdr *);
 198 static void tcp_reno_slow_retransmit(struct tcpcb *);
 199 static void tcp_reno_fast_retransmit_newack(struct tcpcb *,
 200     const struct tcphdr *);
 201 static void tcp_reno_newack(struct tcpcb *, const struct tcphdr *);
 202 static void tcp_reno_congestion_exp(struct tcpcb *tp);
 203
 204 static int  tcp_newreno_fast_retransmit(struct tcpcb *, const struct tcphdr *);
 205 static void tcp_newreno_fast_retransmit_newack(struct tcpcb *,
 206         const struct tcphdr *);
 207 static void tcp_newreno_newack(struct tcpcb *, const struct tcphdr *);
 208
 209
 210 static void tcp_congctl_fillnames(void);
 211
 212 extern int tcprexmtthresh;
 213
 214 MALLOC_DEFINE(M_TCPCONGCTL, "tcpcongctl", "TCP congestion control structures");
 215
 216 /* currently selected global congestion control */
 217 char tcp_congctl_global_name[TCPCC_MAXLEN];
 218
 219 /* available global congestion control algorithms */
 220 char tcp_congctl_avail[10 * TCPCC_MAXLEN];
 221
 222 /*
 223  * Used to list the available congestion control algorithms.
 224  */
 225 TAILQ_HEAD(, tcp_congctlent) tcp_congctlhd =
 226     TAILQ_HEAD_INITIALIZER(tcp_congctlhd);
 227
 228 static struct tcp_congctlent * tcp_congctl_global;
 229
 230 static kmutex_t tcp_congctl_mtx;
 231
 232 void
 233 tcp_congctl_init(void)
 234 {
 235         int r;
 236
 237         mutex_init(&tcp_congctl_mtx, MUTEX_DEFAULT, IPL_NONE);
 238
 239         /* Base algorithms. */
 240         r = tcp_congctl_register("reno", &tcp_reno_ctl);
 241         KASSERT(r == 0);
 242         r = tcp_congctl_register("newreno", &tcp_newreno_ctl);
 243         KASSERT(r == 0);
 244
 245         /* NewReno is the default. */
 246 #ifndef TCP_CONGCTL_DEFAULT
 247 #define TCP_CONGCTL_DEFAULT "newreno"
 248 #endif
 249
 250         r = tcp_congctl_select(NULL, TCP_CONGCTL_DEFAULT);
 251         KASSERT(r == 0);
 252 }
 253
 254 /*
 255  * Register a congestion algorithm and select it if we have none.
 256  */
 257 int
 258 tcp_congctl_register(const char *name, const struct tcp_congctl *tcc)
 259 {
 260         struct tcp_congctlent *ntcc, *tccp;
 261
 262         TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent)
 263                 if (!strcmp(name, tccp->congctl_name)) {
 264                         /* name already registered */
 265                         return EEXIST;
 266                 }
 267
 268         ntcc = malloc(sizeof(*ntcc), M_TCPCONGCTL, M_WAITOK|M_ZERO);
 269
 270         strlcpy(ntcc->congctl_name, name, sizeof(ntcc->congctl_name) - 1);
 271         ntcc->congctl_ctl = tcc;
 272
 273         TAILQ_INSERT_TAIL(&tcp_congctlhd, ntcc, congctl_ent);
 274         tcp_congctl_fillnames();
 275
 276         if (TAILQ_FIRST(&tcp_congctlhd) == ntcc)
 277                 tcp_congctl_select(NULL, name);
 278
 279         return 0;
 280 }
 281
 282 int
 283 tcp_congctl_unregister(const char *name)
 284 {
 285         struct tcp_congctlent *tccp, *rtccp;
 286         unsigned int size;
 287
 288         rtccp = NULL;
 289         size = 0;
 290         TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) {
 291                 if (!strcmp(name, tccp->congctl_name))
 292                         rtccp = tccp;
 293                 size++;
 294         }
 295
 296         if (!rtccp)
 297                 return ENOENT;
 298
 299         if (size <= 1 || tcp_congctl_global == rtccp || rtccp->congctl_refcnt)
 300                 return EBUSY;
 301
 302         TAILQ_REMOVE(&tcp_congctlhd, rtccp, congctl_ent);
 303         free(rtccp, M_TCPCONGCTL);
 304         tcp_congctl_fillnames();
 305
 306         return 0;
 307 }
 308
 309 /*
 310  * Select a congestion algorithm by name.
 311  */
 312 int
 313 tcp_congctl_select(struct tcpcb *tp, const char *name)
 314 {
 315         struct tcp_congctlent *tccp, *old_tccp, *new_tccp;
 316         bool old_found, new_found;
 317
 318         KASSERT(name);
 319
 320         old_found = (tp == NULL || tp->t_congctl == NULL);
 321         old_tccp = NULL;
 322         new_found = false;
 323         new_tccp = NULL;
 324
 325         TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) {
 326                 if (!old_found && tccp->congctl_ctl == tp->t_congctl) {
 327                         old_tccp = tccp;
 328                         old_found = true;
 329                 }
 330
 331                 if (!new_found && !strcmp(name, tccp->congctl_name)) {
 332                         new_tccp = tccp;
 333                         new_found = true;
 334                 }
 335
 336                 if (new_found && old_found) {
 337                         if (tp) {
 338                                 mutex_enter(&tcp_congctl_mtx);
 339                                 if (old_tccp)
 340                                         old_tccp->congctl_refcnt--;
 341                                 tp->t_congctl = new_tccp->congctl_ctl;
 342                                 new_tccp->congctl_refcnt++;
 343                                 mutex_exit(&tcp_congctl_mtx);
 344                         } else {
 345                                 tcp_congctl_global = new_tccp;
 346                                 strlcpy(tcp_congctl_global_name,
 347                                     new_tccp->congctl_name,
 348                                     sizeof(tcp_congctl_global_name) - 1);
 349                         }
 350                         return 0;
 351                 }
 352         }
 353
 354         return EINVAL;
 355 }
 356
 357 void
 358 tcp_congctl_release(struct tcpcb *tp)
 359 {
 360         struct tcp_congctlent *tccp;
 361
 362         KASSERT(tp->t_congctl);
 363
 364         TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) {
 365                 if (tccp->congctl_ctl == tp->t_congctl) {
 366                         tccp->congctl_refcnt--;
 367                         return;
 368                 }
 369         }
 370 }
 371
 372 /*
 373  * Returns the name of a congestion algorithm.
 374  */
 375 const char *
 376 tcp_congctl_bystruct(const struct tcp_congctl *tcc)
 377 {
 378         struct tcp_congctlent *tccp;
 379
 380         KASSERT(tcc);
 381
 382         TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent)
 383                 if (tccp->congctl_ctl == tcc)
 384                         return tccp->congctl_name;
 385
 386         return NULL;
 387 }
 388
 389 static void
 390 tcp_congctl_fillnames(void)
 391 {
 392         struct tcp_congctlent *tccp;
 393         const char *delim = " ";
 394
 395         tcp_congctl_avail[0] = '\0';
 396         TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) {
 397                 strlcat(tcp_congctl_avail, tccp->congctl_name,
 398                     sizeof(tcp_congctl_avail) - 1);
 399                 if (TAILQ_NEXT(tccp, congctl_ent))
 400                         strlcat(tcp_congctl_avail, delim,
 401                             sizeof(tcp_congctl_avail) - 1);
 402         }
 403
 404 }
 405
 406 /* ------------------------------------------------------------------------ */
 407
 408 /*
 409  * TCP/Reno congestion control.
 410  */
 411 static void
 412 tcp_reno_congestion_exp(struct tcpcb *tp)
 413 {
 414         u_int win;
 415
 416         /*
 417          * Halve the congestion window and reduce the
 418          * slow start threshold.
 419          */
 420         win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_segsz;
 421         if (win < 2)
 422                 win = 2;
 423
 424         tp->snd_ssthresh = win * tp->t_segsz;
 425         tp->snd_recover = tp->snd_max;
 426         tp->snd_cwnd = tp->snd_ssthresh;
 427
 428         /*
 429          * When using TCP ECN, notify the peer that
 430          * we reduced the cwnd.
 431          */
 432         if (TCP_ECN_ALLOWED(tp))
 433                 tp->t_flags |= TF_ECN_SND_CWR;
 434 }
 435
 436
 437
 438 static int
 439 tcp_reno_fast_retransmit(struct tcpcb *tp, const struct tcphdr *th)
 440 {
 441         /*
 442          * We know we're losing at the current
 443          * window size so do congestion avoidance
 444          * (set ssthresh to half the current window
 445          * and pull our congestion window back to
 446          * the new ssthresh).
 447          *
 448          * Dup acks mean that packets have left the
 449          * network (they're now cached at the receiver)
 450          * so bump cwnd by the amount in the receiver
 451          * to keep a constant cwnd packets in the
 452          * network.
 453          *
 454          * If we are using TCP/SACK, then enter
 455          * Fast Recovery if the receiver SACKs
 456          * data that is tcprexmtthresh * MSS
 457          * bytes past the last ACKed segment,
 458          * irrespective of the number of DupAcks.
 459          */
 460
 461         tcp_seq onxt;
 462
 463         onxt = tp->snd_nxt;
 464         tcp_reno_congestion_exp(tp);
 465         tp->t_partialacks = 0;
 466         TCP_TIMER_DISARM(tp, TCPT_REXMT);
 467         tp->t_rtttime = 0;
 468         if (TCP_SACK_ENABLED(tp)) {
 469                 tp->t_dupacks = tcprexmtthresh;
 470                 tp->sack_newdata = tp->snd_nxt;
 471                 tp->snd_cwnd = tp->t_segsz;
 472                 (void) tcp_output(tp);
 473                 return 0;
 474         }
 475         tp->snd_nxt = th->th_ack;
 476         tp->snd_cwnd = tp->t_segsz;
 477         (void) tcp_output(tp);
 478         tp->snd_cwnd = tp->snd_ssthresh + tp->t_segsz * tp->t_dupacks;
 479         if (SEQ_GT(onxt, tp->snd_nxt))
 480                 tp->snd_nxt = onxt;
 481
 482         return 0;
 483 }
 484
 485 static void
 486 tcp_reno_slow_retransmit(struct tcpcb *tp)
 487 {
 488         u_int win;
 489
 490         /*
 491          * Close the congestion window down to one segment
 492          * (we'll open it by one segment for each ack we get).
 493          * Since we probably have a window's worth of unacked
 494          * data accumulated, this "slow start" keeps us from
 495          * dumping all that data as back-to-back packets (which
 496          * might overwhelm an intermediate gateway).
 497          *
 498          * There are two phases to the opening: Initially we
 499          * open by one mss on each ack.  This makes the window
 500          * size increase exponentially with time.  If the
 501          * window is larger than the path can handle, this
 502          * exponential growth results in dropped packet(s)
 503          * almost immediately.  To get more time between
 504          * drops but still "push" the network to take advantage
 505          * of improving conditions, we switch from exponential
 506          * to linear window opening at some threshhold size.
 507          * For a threshhold, we use half the current window
 508          * size, truncated to a multiple of the mss.
 509          *
 510          * (the minimum cwnd that will give us exponential
 511          * growth is 2 mss.  We don't allow the threshhold
 512          * to go below this.)
 513          */
 514
 515         win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_segsz;
 516         if (win < 2)
 517                 win = 2;
 518         /* Loss Window MUST be one segment. */
 519         tp->snd_cwnd = tp->t_segsz;
 520         tp->snd_ssthresh = win * tp->t_segsz;
 521         tp->t_partialacks = -1;
 522         tp->t_dupacks = 0;
 523         tp->t_bytes_acked = 0;
 524 }
 525
 526 static void
 527 tcp_reno_fast_retransmit_newack(struct tcpcb *tp,
 528     const struct tcphdr *th)
 529 {
 530         if (tp->t_partialacks < 0) {
 531                 /*
 532                  * We were not in fast recovery.  Reset the duplicate ack
 533                  * counter.
 534                  */
 535                 tp->t_dupacks = 0;
 536         } else {
 537                 /*
 538                  * Clamp the congestion window to the crossover point and
 539                  * exit fast recovery.
 540                  */
 541                 if (tp->snd_cwnd > tp->snd_ssthresh)
 542                         tp->snd_cwnd = tp->snd_ssthresh;
 543                 tp->t_partialacks = -1;
 544                 tp->t_dupacks = 0;
 545                 tp->t_bytes_acked = 0;
 546         }
 547 }
 548
 549 static void
 550 tcp_reno_newack(struct tcpcb *tp, const struct tcphdr *th)
 551 {
 552         /*
 553          * When new data is acked, open the congestion window.
 554          */
 555
 556         u_int cw = tp->snd_cwnd;
 557         u_int incr = tp->t_segsz;
 558
 559         if (tcp_do_abc) {
 560
 561                 /*
 562                  * RFC 3465 Appropriate Byte Counting (ABC)
 563                  */
 564
 565                 int acked = th->th_ack - tp->snd_una;
 566
 567                 if (cw >= tp->snd_ssthresh) {
 568                         tp->t_bytes_acked += acked;
 569                         if (tp->t_bytes_acked >= cw) {
 570                                 /* Time to increase the window. */
 571                                 tp->t_bytes_acked -= cw;
 572                         } else {
 573                                 /* No need to increase yet. */
 574                                 incr = 0;
 575                         }
 576                 } else {
 577                         /*
 578                          * use 2*SMSS or 1*SMSS for the "L" param,
 579                          * depending on sysctl setting.
 580                          *
 581                          * (See RFC 3465 2.3 Choosing the Limit)
 582                          */
 583                         u_int abc_lim;
 584
 585                         abc_lim = (tcp_abc_aggressive == 0 ||
 586                             tp->snd_nxt != tp->snd_max) ? incr : incr * 2;
 587                         incr = min(acked, abc_lim);
 588                 }
 589         } else {
 590
 591                 /*
 592                  * If the window gives us less than ssthresh packets
 593                  * in flight, open exponentially (segsz per packet).
 594                  * Otherwise open linearly: segsz per window
 595                  * (segsz^2 / cwnd per packet).
 596                  */
 597
 598                 if (cw >= tp->snd_ssthresh) {
 599                         incr = incr * incr / cw;
 600                 }
 601         }
 602
 603         tp->snd_cwnd = min(cw + incr, TCP_MAXWIN << tp->snd_scale);
 604 }
 605
 606 const struct tcp_congctl tcp_reno_ctl = {
 607         .fast_retransmit = tcp_reno_fast_retransmit,
 608         .slow_retransmit = tcp_reno_slow_retransmit,
 609         .fast_retransmit_newack = tcp_reno_fast_retransmit_newack,
 610         .newack = tcp_reno_newack,
 611         .cong_exp = tcp_reno_congestion_exp,
 612 };
 613
 614 /*
 615  * TCP/NewReno Congestion control.
 616  */
 617 static int
 618 tcp_newreno_fast_retransmit(struct tcpcb *tp, const struct tcphdr *th)
 619 {
 620         if (SEQ_LT(th->th_ack, tp->snd_high)) {
 621                 /*
 622                  * False fast retransmit after timeout.
 623                  * Do not enter fast recovery
 624                  */
 625                 tp->t_dupacks = 0;
 626                 return 1;
 627         } else {
 628                 /*
 629                  * Fast retransmit is same as reno.
 630                  */
 631                 return tcp_reno_fast_retransmit(tp, th);
 632         }
 633
 634         return 0;
 635 }
 636
 637 /*
 638  * Implement the NewReno response to a new ack, checking for partial acks in
 639  * fast recovery.
 640  */
 641 static void
 642 tcp_newreno_fast_retransmit_newack(struct tcpcb *tp, const struct tcphdr *th)
 643 {
 644         if (tp->t_partialacks < 0) {
 645                 /*
 646                  * We were not in fast recovery.  Reset the duplicate ack
 647                  * counter.
 648                  */
 649                 tp->t_dupacks = 0;
 650         } else if (SEQ_LT(th->th_ack, tp->snd_recover)) {
 651                 /*
 652                  * This is a partial ack.  Retransmit the first unacknowledged
 653                  * segment and deflate the congestion window by the amount of
 654                  * acknowledged data.  Do not exit fast recovery.
 655                  */
 656                 tcp_seq onxt = tp->snd_nxt;
 657                 u_long ocwnd = tp->snd_cwnd;
 658
 659                 /*
 660                  * snd_una has not yet been updated and the socket's send
 661                  * buffer has not yet drained off the ACK'd data, so we
 662                  * have to leave snd_una as it was to get the correct data
 663                  * offset in tcp_output().
 664                  */
 665                 if (++tp->t_partialacks == 1)
 666                         TCP_TIMER_DISARM(tp, TCPT_REXMT);
 667                 tp->t_rtttime = 0;
 668                 tp->snd_nxt = th->th_ack;
 669                 /*
 670                  * Set snd_cwnd to one segment beyond ACK'd offset.  snd_una
 671                  * is not yet updated when we're called.
 672                  */
 673                 tp->snd_cwnd = tp->t_segsz + (th->th_ack - tp->snd_una);
 674                 (void) tcp_output(tp);
 675                 tp->snd_cwnd = ocwnd;
 676                 if (SEQ_GT(onxt, tp->snd_nxt))
 677                         tp->snd_nxt = onxt;
 678                 /*
 679                  * Partial window deflation.  Relies on fact that tp->snd_una
 680                  * not updated yet.
 681                  */
 682                 tp->snd_cwnd -= (th->th_ack - tp->snd_una - tp->t_segsz);
 683         } else {
 684                 /*
 685                  * Complete ack.  Inflate the congestion window to ssthresh
 686                  * and exit fast recovery.
 687                  *
 688                  * Window inflation should have left us with approx.
 689                  * snd_ssthresh outstanding data.  But in case we
 690                  * would be inclined to send a burst, better to do
 691                  * it via the slow start mechanism.
 692                  */
 693                 if (SEQ_SUB(tp->snd_max, th->th_ack) < tp->snd_ssthresh)
 694                         tp->snd_cwnd = SEQ_SUB(tp->snd_max, th->th_ack)
 695                             + tp->t_segsz;
 696                 else
 697                         tp->snd_cwnd = tp->snd_ssthresh;
 698                 tp->t_partialacks = -1;
 699                 tp->t_dupacks = 0;
 700                 tp->t_bytes_acked = 0;
 701         }
 702 }
 703
 704 static void
 705 tcp_newreno_newack(struct tcpcb *tp, const struct tcphdr *th)
 706 {
 707         /*
 708          * If we are still in fast recovery (meaning we are using
 709          * NewReno and we have only received partial acks), do not
 710          * inflate the window yet.
 711          */
 712         if (tp->t_partialacks < 0)
 713                 tcp_reno_newack(tp, th);
 714 }
 715
 716
 717 const struct tcp_congctl tcp_newreno_ctl = {
 718         .fast_retransmit = tcp_newreno_fast_retransmit,
 719         .slow_retransmit = tcp_reno_slow_retransmit,
 720         .fast_retransmit_newack = tcp_newreno_fast_retransmit_newack,
 721         .newack = tcp_newreno_newack,
 722         .cong_exp = tcp_reno_congestion_exp,
 723 };
 724
 725