net/dccp/ccids/lib/packet_history.c

   1 /*
   2  *  Copyright (c) 2007   The University of Aberdeen, Scotland, UK
   3  *  Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
   4  *
   5  *  An implementation of the DCCP protocol
   6  *
   7  *  This code has been developed by the University of Waikato WAND
   8  *  research group. For further information please see http://www.wand.net.nz/
   9  *  or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz
  10  *
  11  *  This code also uses code from Lulea University, rereleased as GPL by its
  12  *  authors:
  13  *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
  14  *
  15  *  Changes to meet Linux coding standards, to make it meet latest ccid3 draft
  16  *  and to make it work as a loadable module in the DCCP stack written by
  17  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
  18  *
  19  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
  20  *
  21  *  This program is free software; you can redistribute it and/or modify
  22  *  it under the terms of the GNU General Public License as published by
  23  *  the Free Software Foundation; either version 2 of the License, or
  24  *  (at your option) any later version.
  25  *
  26  *  This program is distributed in the hope that it will be useful,
  27  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  28  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  29  *  GNU General Public License for more details.
  30  *
  31  *  You should have received a copy of the GNU General Public License
  32  *  along with this program; if not, write to the Free Software
  33  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  34  */
  35
  36 #include <linux/string.h>
  37 #include <linux/slab.h>
  38 #include "packet_history.h"
  39 #include "../../dccp.h"
  40
  41 /**
  42  *  tfrc_tx_hist_entry  -  Simple singly-linked TX history list
  43  *  @next:  next oldest entry (LIFO order)
  44  *  @seqno: sequence number of this entry
  45  *  @stamp: send time of packet with sequence number @seqno
  46  */
  47 struct tfrc_tx_hist_entry {
  48         struct tfrc_tx_hist_entry *next;
  49         u64                       seqno;
  50         ktime_t                   stamp;
  51 };
  52
  53 /*
  54  * Transmitter History Routines
  55  */
  56 static struct kmem_cache *tfrc_tx_hist_slab;
  57
  58 int __init tfrc_tx_packet_history_init(void)
  59 {
  60         tfrc_tx_hist_slab = kmem_cache_create("tfrc_tx_hist",
  61                                               sizeof(struct tfrc_tx_hist_entry),
  62                                               0, SLAB_HWCACHE_ALIGN, NULL);
  63         return tfrc_tx_hist_slab == NULL ? -ENOBUFS : 0;
  64 }
  65
  66 void tfrc_tx_packet_history_exit(void)
  67 {
  68         if (tfrc_tx_hist_slab != NULL) {
  69                 kmem_cache_destroy(tfrc_tx_hist_slab);
  70                 tfrc_tx_hist_slab = NULL;
  71         }
  72 }
  73
  74 static struct tfrc_tx_hist_entry *
  75         tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
  76 {
  77         while (head != NULL && head->seqno != seqno)
  78                 head = head->next;
  79
  80         return head;
  81 }
  82
  83 int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
  84 {
  85         struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any());
  86
  87         if (entry == NULL)
  88                 return -ENOBUFS;
  89         entry->seqno = seqno;
  90         entry->stamp = ktime_get_real();
  91         entry->next  = *headp;
  92         *headp       = entry;
  93         return 0;
  94 }
  95
  96 void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
  97 {
  98         struct tfrc_tx_hist_entry *head = *headp;
  99
 100         while (head != NULL) {
 101                 struct tfrc_tx_hist_entry *next = head->next;
 102
 103                 kmem_cache_free(tfrc_tx_hist_slab, head);
 104                 head = next;
 105         }
 106
 107         *headp = NULL;
 108 }
 109
 110 u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
 111                      const ktime_t now)
 112 {
 113         u32 rtt = 0;
 114         struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, seqno);
 115
 116         if (packet != NULL) {
 117                 rtt = ktime_us_delta(now, packet->stamp);
 118                 /*
 119                  * Garbage-collect older (irrelevant) entries:
 120                  */
 121                 tfrc_tx_hist_purge(&packet->next);
 122         }
 123
 124         return rtt;
 125 }
 126
 127
 128 /*
 129  *      Receiver History Routines
 130  */
 131 static struct kmem_cache *tfrc_rx_hist_slab;
 132
 133 int __init tfrc_rx_packet_history_init(void)
 134 {
 135         tfrc_rx_hist_slab = kmem_cache_create("tfrc_rxh_cache",
 136                                               sizeof(struct tfrc_rx_hist_entry),
 137                                               0, SLAB_HWCACHE_ALIGN, NULL);
 138         return tfrc_rx_hist_slab == NULL ? -ENOBUFS : 0;
 139 }
 140
 141 void tfrc_rx_packet_history_exit(void)
 142 {
 143         if (tfrc_rx_hist_slab != NULL) {
 144                 kmem_cache_destroy(tfrc_rx_hist_slab);
 145                 tfrc_rx_hist_slab = NULL;
 146         }
 147 }
 148
 149 static inline void tfrc_rx_hist_entry_from_skb(struct tfrc_rx_hist_entry *entry,
 150                                                const struct sk_buff *skb,
 151                                                const u64 ndp)
 152 {
 153         const struct dccp_hdr *dh = dccp_hdr(skb);
 154
 155         entry->tfrchrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
 156         entry->tfrchrx_ccval = dh->dccph_ccval;
 157         entry->tfrchrx_type  = dh->dccph_type;
 158         entry->tfrchrx_ndp   = ndp;
 159         entry->tfrchrx_tstamp = ktime_get_real();
 160 }
 161
 162 void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
 163                              const struct sk_buff *skb,
 164                              const u64 ndp)
 165 {
 166         struct tfrc_rx_hist_entry *entry = tfrc_rx_hist_last_rcv(h);
 167
 168         tfrc_rx_hist_entry_from_skb(entry, skb, ndp);
 169 }
 170
 171 /* has the packet contained in skb been seen before? */
 172 int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
 173 {
 174         const u64 seq = DCCP_SKB_CB(skb)->dccpd_seq;
 175         int i;
 176
 177         if (dccp_delta_seqno(tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, seq) <= 0)
 178                 return 1;
 179
 180         for (i = 1; i <= h->loss_count; i++)
 181                 if (tfrc_rx_hist_entry(h, i)->tfrchrx_seqno == seq)
 182                         return 1;
 183
 184         return 0;
 185 }
 186
 187 static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
 188 {
 189         const u8 idx_a = tfrc_rx_hist_index(h, a),
 190                  idx_b = tfrc_rx_hist_index(h, b);
 191         struct tfrc_rx_hist_entry *tmp = h->ring[idx_a];
 192
 193         h->ring[idx_a] = h->ring[idx_b];
 194         h->ring[idx_b] = tmp;
 195 }
 196
 197 /*
 198  * Private helper functions for loss detection.
 199  *
 200  * In the descriptions, `Si' refers to the sequence number of entry number i,
 201  * whose NDP count is `Ni' (lower case is used for variables).
 202  * Note: All __xxx_loss functions expect that a test against duplicates has been
 203  *       performed already: the seqno of the skb must not be less than the seqno
 204  *       of loss_prev; and it must not equal that of any valid history entry.
 205  */
 206 static void __do_track_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u64 n1)
 207 {
 208         u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
 209             s1 = DCCP_SKB_CB(skb)->dccpd_seq;
 210
 211         if (!dccp_loss_free(s0, s1, n1)) {      /* gap between S0 and S1 */
 212                 h->loss_count = 1;
 213                 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n1);
 214         }
 215 }
 216
 217 static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2)
 218 {
 219         u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
 220             s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno,
 221             s2 = DCCP_SKB_CB(skb)->dccpd_seq;
 222
 223         if (likely(dccp_delta_seqno(s1, s2) > 0)) {     /* S1  <  S2 */
 224                 h->loss_count = 2;
 225                 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n2);
 226                 return;
 227         }
 228
 229         /* S0  <  S2  <  S1 */
 230
 231         if (dccp_loss_free(s0, s2, n2)) {
 232                 u64 n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp;
 233
 234                 if (dccp_loss_free(s2, s1, n1)) {
 235                         /* hole is filled: S0, S2, and S1 are consecutive */
 236                         h->loss_count = 0;
 237                         h->loss_start = tfrc_rx_hist_index(h, 1);
 238                 } else
 239                         /* gap between S2 and S1: just update loss_prev */
 240                         tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2);
 241
 242         } else {        /* gap between S0 and S2 */
 243                 /*
 244                  * Reorder history to insert S2 between S0 and S1
 245                  */
 246                 tfrc_rx_hist_swap(h, 0, 3);
 247                 h->loss_start = tfrc_rx_hist_index(h, 3);
 248                 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n2);
 249                 h->loss_count = 2;
 250         }
 251 }
 252
 253 /* return 1 if a new loss event has been identified */
 254 static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3)
 255 {
 256         u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
 257             s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno,
 258             s2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_seqno,
 259             s3 = DCCP_SKB_CB(skb)->dccpd_seq;
 260
 261         if (likely(dccp_delta_seqno(s2, s3) > 0)) {     /* S2  <  S3 */
 262                 h->loss_count = 3;
 263                 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 3), skb, n3);
 264                 return 1;
 265         }
 266
 267         /* S3  <  S2 */
 268
 269         if (dccp_delta_seqno(s1, s3) > 0) {             /* S1  <  S3  <  S2 */
 270                 /*
 271                  * Reorder history to insert S3 between S1 and S2
 272                  */
 273                 tfrc_rx_hist_swap(h, 2, 3);
 274                 tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n3);
 275                 h->loss_count = 3;
 276                 return 1;
 277         }
 278
 279         /* S0  <  S3  <  S1 */
 280
 281         if (dccp_loss_free(s0, s3, n3)) {
 282                 u64 n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp;
 283
 284                 if (dccp_loss_free(s3, s1, n1)) {
 285                         /* hole between S0 and S1 filled by S3 */
 286                         u64 n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp;
 287
 288                         if (dccp_loss_free(s1, s2, n2)) {
 289                                 /* entire hole filled by S0, S3, S1, S2 */
 290                                 h->loss_start = tfrc_rx_hist_index(h, 2);
 291                                 h->loss_count = 0;
 292                         } else {
 293                                 /* gap remains between S1 and S2 */
 294                                 h->loss_start = tfrc_rx_hist_index(h, 1);
 295                                 h->loss_count = 1;
 296                         }
 297
 298                 } else /* gap exists between S3 and S1, loss_count stays at 2 */
 299                         tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n3);
 300
 301                 return 0;
 302         }
 303
 304         /*
 305          * The remaining case:  S0  <  S3  <  S1  <  S2;  gap between S0 and S3
 306          * Reorder history to insert S3 between S0 and S1.
 307          */
 308         tfrc_rx_hist_swap(h, 0, 3);
 309         h->loss_start = tfrc_rx_hist_index(h, 3);
 310         tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n3);
 311         h->loss_count = 3;
 312
 313         return 1;
 314 }
 315
 316 /* recycle RX history records to continue loss detection if necessary */
 317 static void __three_after_loss(struct tfrc_rx_hist *h)
 318 {
 319         /*
 320          * At this stage we know already that there is a gap between S0 and S1
 321          * (since S0 was the highest sequence number received before detecting
 322          * the loss). To recycle the loss record, it is thus only necessary to
 323          * check for other possible gaps between S1/S2 and between S2/S3.
 324          */
 325         u64 s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno,
 326             s2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_seqno,
 327             s3 = tfrc_rx_hist_entry(h, 3)->tfrchrx_seqno;
 328         u64 n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp,
 329             n3 = tfrc_rx_hist_entry(h, 3)->tfrchrx_ndp;
 330
 331         if (dccp_loss_free(s1, s2, n2)) {
 332
 333                 if (dccp_loss_free(s2, s3, n3)) {
 334                         /* no gap between S2 and S3: entire hole is filled */
 335                         h->loss_start = tfrc_rx_hist_index(h, 3);
 336                         h->loss_count = 0;
 337                 } else {
 338                         /* gap between S2 and S3 */
 339                         h->loss_start = tfrc_rx_hist_index(h, 2);
 340                         h->loss_count = 1;
 341                 }
 342
 343         } else {        /* gap between S1 and S2 */
 344                 h->loss_start = tfrc_rx_hist_index(h, 1);
 345                 h->loss_count = 2;
 346         }
 347 }
 348
 349 /**
 350  *  tfrc_rx_handle_loss  -  Loss detection and further processing
 351  *  @h:             The non-empty RX history object
 352  *  @lh:            Loss Intervals database to update
 353  *  @skb:           Currently received packet
 354  *  @ndp:           The NDP count belonging to @skb
 355  *  @calc_first_li: Caller-dependent computation of first loss interval in @lh
 356  *  @sk:            Used by @calc_first_li (see tfrc_lh_interval_add)
 357  *  Chooses action according to pending loss, updates LI database when a new
 358  *  loss was detected, and does required post-processing. Returns 1 when caller
 359  *  should send feedback, 0 otherwise.
 360  *  Since it also takes care of reordering during loss detection and updates the
 361  *  records accordingly, the caller should not perform any more RX history
 362  *  operations when loss_count is greater than 0 after calling this function.
 363  */
 364 int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
 365                         struct tfrc_loss_hist *lh,
 366                         struct sk_buff *skb, const u64 ndp,
 367                         u32 (*calc_first_li)(struct sock *), struct sock *sk)
 368 {
 369         int is_new_loss = 0;
 370
 371         if (h->loss_count == 0) {
 372                 __do_track_loss(h, skb, ndp);
 373         } else if (h->loss_count == 1) {
 374                 __one_after_loss(h, skb, ndp);
 375         } else if (h->loss_count != 2) {
 376                 DCCP_BUG("invalid loss_count %d", h->loss_count);
 377         } else if (__two_after_loss(h, skb, ndp)) {
 378                 /*
 379                  * Update Loss Interval database and recycle RX records
 380                  */
 381                 is_new_loss = tfrc_lh_interval_add(lh, h, calc_first_li, sk);
 382                 __three_after_loss(h);
 383         }
 384         return is_new_loss;
 385 }
 386
 387 int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
 388 {
 389         int i;
 390
 391         for (i = 0; i <= TFRC_NDUPACK; i++) {
 392                 h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC);
 393                 if (h->ring[i] == NULL)
 394                         goto out_free;
 395         }
 396
 397         h->loss_count = h->loss_start = 0;
 398         return 0;
 399
 400 out_free:
 401         while (i-- != 0) {
 402                 kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]);
 403                 h->ring[i] = NULL;
 404         }
 405         return -ENOBUFS;
 406 }
 407
 408 void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
 409 {
 410         int i;
 411
 412         for (i = 0; i <= TFRC_NDUPACK; ++i)
 413                 if (h->ring[i] != NULL) {
 414                         kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]);
 415                         h->ring[i] = NULL;
 416                 }
 417 }
 418
 419 /**
 420  * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against
 421  */
 422 static inline struct tfrc_rx_hist_entry *
 423                         tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h)
 424 {
 425         return h->ring[0];
 426 }
 427
 428 /**
 429  * tfrc_rx_hist_rtt_prev_s: previously suitable (wrt rtt_last_s) RTT-sampling entry
 430  */
 431 static inline struct tfrc_rx_hist_entry *
 432                         tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h)
 433 {
 434         return h->ring[h->rtt_sample_prev];
 435 }
 436
 437 /**
 438  * tfrc_rx_hist_sample_rtt  -  Sample RTT from timestamp / CCVal
 439  * Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able
 440  * to compute a sample with given data - calling function should check this.
 441  */
 442 u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
 443 {
 444         u32 sample = 0,
 445             delta_v = SUB16(dccp_hdr(skb)->dccph_ccval,
 446                             tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
 447
 448         if (delta_v < 1 || delta_v > 4) {       /* unsuitable CCVal delta */
 449                 if (h->rtt_sample_prev == 2) {  /* previous candidate stored */
 450                         sample = SUB16(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
 451                                        tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
 452                         if (sample)
 453                                 sample = 4 / sample *
 454                                          ktime_us_delta(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_tstamp,
 455                                                         tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp);
 456                         else    /*
 457                                  * FIXME: This condition is in principle not
 458                                  * possible but occurs when CCID is used for
 459                                  * two-way data traffic. I have tried to trace
 460                                  * it, but the cause does not seem to be here.
 461                                  */
 462                                 DCCP_BUG("please report to dccp@vger.kernel.org"
 463                                          " => prev = %u, last = %u",
 464                                          tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
 465                                          tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
 466                 } else if (delta_v < 1) {
 467                         h->rtt_sample_prev = 1;
 468                         goto keep_ref_for_next_time;
 469                 }
 470
 471         } else if (delta_v == 4) /* optimal match */
 472                 sample = ktime_to_us(net_timedelta(tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp));
 473         else {                   /* suboptimal match */
 474                 h->rtt_sample_prev = 2;
 475                 goto keep_ref_for_next_time;
 476         }
 477
 478         if (unlikely(sample > DCCP_SANE_RTT_MAX)) {
 479                 DCCP_WARN("RTT sample %u too large, using max\n", sample);
 480                 sample = DCCP_SANE_RTT_MAX;
 481         }
 482
 483         h->rtt_sample_prev = 0;        /* use current entry as next reference */
 484 keep_ref_for_next_time:
 485
 486         return sample;
 487 }