lib/proportions.c

   1 /*
   2  * Floating proportions
   3  *
   4  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
   5  *
   6  * Description:
   7  *
   8  * The floating proportion is a time derivative with an exponentially decaying
   9  * history:
  10  *
  11  *   p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
  12  *
  13  * Where j is an element from {prop_local}, x_{j} is j's number of events,
  14  * and i the time period over which the differential is taken. So d/dt_{-i} is
  15  * the differential over the i-th last period.
  16  *
  17  * The decaying history gives smooth transitions. The time differential carries
  18  * the notion of speed.
  19  *
  20  * The denominator is 2^(1+i) because we want the series to be normalised, ie.
  21  *
  22  *   \Sum_{i=0} 1/2^(1+i) = 1
  23  *
  24  * Further more, if we measure time (t) in the same events as x; so that:
  25  *
  26  *   t = \Sum_{j} x_{j}
  27  *
  28  * we get that:
  29  *
  30  *   \Sum_{j} p_{j} = 1
  31  *
  32  * Writing this in an iterative fashion we get (dropping the 'd's):
  33  *
  34  *   if (++x_{j}, ++t > period)
  35  *     t /= 2;
  36  *     for_each (j)
  37  *       x_{j} /= 2;
  38  *
  39  * so that:
  40  *
  41  *   p_{j} = x_{j} / t;
  42  *
  43  * We optimize away the '/= 2' for the global time delta by noting that:
  44  *
  45  *   if (++t > period) t /= 2:
  46  *
  47  * Can be approximated by:
  48  *
  49  *   period/2 + (++t % period/2)
  50  *
  51  * [ Furthermore, when we choose period to be 2^n it can be written in terms of
  52  *   binary operations and wraparound artefacts disappear. ]
  53  *
  54  * Also note that this yields a natural counter of the elapsed periods:
  55  *
  56  *   c = t / (period/2)
  57  *
  58  * [ Its monotonic increasing property can be applied to mitigate the wrap-
  59  *   around issue. ]
  60  *
  61  * This allows us to do away with the loop over all prop_locals on each period
  62  * expiration. By remembering the period count under which it was last accessed
  63  * as c_{j}, we can obtain the number of 'missed' cycles from:
  64  *
  65  *   c - c_{j}
  66  *
  67  * We can then lazily catch up to the global period count every time we are
  68  * going to use x_{j}, by doing:
  69  *
  70  *   x_{j} /= 2^(c - c_{j}), c_{j} = c
  71  */
  72
  73 #include <linux/proportions.h>
  74 #include <linux/rcupdate.h>
  75
  76 int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp)
  77 {
  78         int err;
  79
  80         if (shift > PROP_MAX_SHIFT)
  81                 shift = PROP_MAX_SHIFT;
  82
  83         pd->index = 0;
  84         pd->pg[0].shift = shift;
  85         mutex_init(&pd->mutex);
  86         err = percpu_counter_init(&pd->pg[0].events, 0, gfp);
  87         if (err)
  88                 goto out;
  89
  90         err = percpu_counter_init(&pd->pg[1].events, 0, gfp);
  91         if (err)
  92                 percpu_counter_destroy(&pd->pg[0].events);
  93
  94 out:
  95         return err;
  96 }
  97
  98 /*
  99  * We have two copies, and flip between them to make it seem like an atomic
 100  * update. The update is not really atomic wrt the events counter, but
 101  * it is internally consistent with the bit layout depending on shift.
 102  *
 103  * We copy the events count, move the bits around and flip the index.
 104  */
 105 void prop_change_shift(struct prop_descriptor *pd, int shift)
 106 {
 107         int index;
 108         int offset;
 109         u64 events;
 110         unsigned long flags;
 111
 112         if (shift > PROP_MAX_SHIFT)
 113                 shift = PROP_MAX_SHIFT;
 114
 115         mutex_lock(&pd->mutex);
 116
 117         index = pd->index ^ 1;
 118         offset = pd->pg[pd->index].shift - shift;
 119         if (!offset)
 120                 goto out;
 121
 122         pd->pg[index].shift = shift;
 123
 124         local_irq_save(flags);
 125         events = percpu_counter_sum(&pd->pg[pd->index].events);
 126         if (offset < 0)
 127                 events <<= -offset;
 128         else
 129                 events >>= offset;
 130         percpu_counter_set(&pd->pg[index].events, events);
 131
 132         /*
 133          * ensure the new pg is fully written before the switch
 134          */
 135         smp_wmb();
 136         pd->index = index;
 137         local_irq_restore(flags);
 138
 139         synchronize_rcu();
 140
 141 out:
 142         mutex_unlock(&pd->mutex);
 143 }
 144
 145 /*
 146  * wrap the access to the data in an rcu_read_lock() section;
 147  * this is used to track the active references.
 148  */
 149 static struct prop_global *prop_get_global(struct prop_descriptor *pd)
 150 __acquires(RCU)
 151 {
 152         int index;
 153
 154         rcu_read_lock();
 155         index = pd->index;
 156         /*
 157          * match the wmb from vcd_flip()
 158          */
 159         smp_rmb();
 160         return &pd->pg[index];
 161 }
 162
 163 static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
 164 __releases(RCU)
 165 {
 166         rcu_read_unlock();
 167 }
 168
 169 static void
 170 prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
 171 {
 172         int offset = *pl_shift - new_shift;
 173
 174         if (!offset)
 175                 return;
 176
 177         if (offset < 0)
 178                 *pl_period <<= -offset;
 179         else
 180                 *pl_period >>= offset;
 181
 182         *pl_shift = new_shift;
 183 }
 184
 185 /*
 186  * PERCPU
 187  */
 188
 189 #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
 190
 191 int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp)
 192 {
 193         raw_spin_lock_init(&pl->lock);
 194         pl->shift = 0;
 195         pl->period = 0;
 196         return percpu_counter_init(&pl->events, 0, gfp);
 197 }
 198
 199 void prop_local_destroy_percpu(struct prop_local_percpu *pl)
 200 {
 201         percpu_counter_destroy(&pl->events);
 202 }
 203
 204 /*
 205  * Catch up with missed period expirations.
 206  *
 207  *   until (c_{j} == c)
 208  *     x_{j} -= x_{j}/2;
 209  *     c_{j}++;
 210  */
 211 static
 212 void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
 213 {
 214         unsigned long period = 1UL << (pg->shift - 1);
 215         unsigned long period_mask = ~(period - 1);
 216         unsigned long global_period;
 217         unsigned long flags;
 218
 219         global_period = percpu_counter_read(&pg->events);
 220         global_period &= period_mask;
 221
 222         /*
 223          * Fast path - check if the local and global period count still match
 224          * outside of the lock.
 225          */
 226         if (pl->period == global_period)
 227                 return;
 228
 229         raw_spin_lock_irqsave(&pl->lock, flags);
 230         prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
 231
 232         /*
 233          * For each missed period, we half the local counter.
 234          * basically:
 235          *   pl->events >> (global_period - pl->period);
 236          */
 237         period = (global_period - pl->period) >> (pg->shift - 1);
 238         if (period < BITS_PER_LONG) {
 239                 s64 val = percpu_counter_read(&pl->events);
 240
 241                 if (val < (nr_cpu_ids * PROP_BATCH))
 242                         val = percpu_counter_sum(&pl->events);
 243
 244                 __percpu_counter_add(&pl->events, -val + (val >> period),
 245                                         PROP_BATCH);
 246         } else
 247                 percpu_counter_set(&pl->events, 0);
 248
 249         pl->period = global_period;
 250         raw_spin_unlock_irqrestore(&pl->lock, flags);
 251 }
 252
 253 /*
 254  *   ++x_{j}, ++t
 255  */
 256 void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
 257 {
 258         struct prop_global *pg = prop_get_global(pd);
 259
 260         prop_norm_percpu(pg, pl);
 261         __percpu_counter_add(&pl->events, 1, PROP_BATCH);
 262         percpu_counter_add(&pg->events, 1);
 263         prop_put_global(pd, pg);
 264 }
 265
 266 /*
 267  * identical to __prop_inc_percpu, except that it limits this pl's fraction to
 268  * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
 269  */
 270 void __prop_inc_percpu_max(struct prop_descriptor *pd,
 271                            struct prop_local_percpu *pl, long frac)
 272 {
 273         struct prop_global *pg = prop_get_global(pd);
 274
 275         prop_norm_percpu(pg, pl);
 276
 277         if (unlikely(frac != PROP_FRAC_BASE)) {
 278                 unsigned long period_2 = 1UL << (pg->shift - 1);
 279                 unsigned long counter_mask = period_2 - 1;
 280                 unsigned long global_count;
 281                 long numerator, denominator;
 282
 283                 numerator = percpu_counter_read_positive(&pl->events);
 284                 global_count = percpu_counter_read(&pg->events);
 285                 denominator = period_2 + (global_count & counter_mask);
 286
 287                 if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
 288                         goto out_put;
 289         }
 290
 291         percpu_counter_add(&pl->events, 1);
 292         percpu_counter_add(&pg->events, 1);
 293
 294 out_put:
 295         prop_put_global(pd, pg);
 296 }
 297
 298 /*
 299  * Obtain a fraction of this proportion
 300  *
 301  *   p_{j} = x_{j} / (period/2 + t % period/2)
 302  */
 303 void prop_fraction_percpu(struct prop_descriptor *pd,
 304                 struct prop_local_percpu *pl,
 305                 long *numerator, long *denominator)
 306 {
 307         struct prop_global *pg = prop_get_global(pd);
 308         unsigned long period_2 = 1UL << (pg->shift - 1);
 309         unsigned long counter_mask = period_2 - 1;
 310         unsigned long global_count;
 311
 312         prop_norm_percpu(pg, pl);
 313         *numerator = percpu_counter_read_positive(&pl->events);
 314
 315         global_count = percpu_counter_read(&pg->events);
 316         *denominator = period_2 + (global_count & counter_mask);
 317
 318         prop_put_global(pd, pg);
 319 }
 320
 321 /*
 322  * SINGLE
 323  */
 324
 325 int prop_local_init_single(struct prop_local_single *pl)
 326 {
 327         raw_spin_lock_init(&pl->lock);
 328         pl->shift = 0;
 329         pl->period = 0;
 330         pl->events = 0;
 331         return 0;
 332 }
 333
 334 void prop_local_destroy_single(struct prop_local_single *pl)
 335 {
 336 }
 337
 338 /*
 339  * Catch up with missed period expirations.
 340  */
 341 static
 342 void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
 343 {
 344         unsigned long period = 1UL << (pg->shift - 1);
 345         unsigned long period_mask = ~(period - 1);
 346         unsigned long global_period;
 347         unsigned long flags;
 348
 349         global_period = percpu_counter_read(&pg->events);
 350         global_period &= period_mask;
 351
 352         /*
 353          * Fast path - check if the local and global period count still match
 354          * outside of the lock.
 355          */
 356         if (pl->period == global_period)
 357                 return;
 358
 359         raw_spin_lock_irqsave(&pl->lock, flags);
 360         prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
 361         /*
 362          * For each missed period, we half the local counter.
 363          */
 364         period = (global_period - pl->period) >> (pg->shift - 1);
 365         if (likely(period < BITS_PER_LONG))
 366                 pl->events >>= period;
 367         else
 368                 pl->events = 0;
 369         pl->period = global_period;
 370         raw_spin_unlock_irqrestore(&pl->lock, flags);
 371 }
 372
 373 /*
 374  *   ++x_{j}, ++t
 375  */
 376 void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
 377 {
 378         struct prop_global *pg = prop_get_global(pd);
 379
 380         prop_norm_single(pg, pl);
 381         pl->events++;
 382         percpu_counter_add(&pg->events, 1);
 383         prop_put_global(pd, pg);
 384 }
 385
 386 /*
 387  * Obtain a fraction of this proportion
 388  *
 389  *   p_{j} = x_{j} / (period/2 + t % period/2)
 390  */
 391 void prop_fraction_single(struct prop_descriptor *pd,
 392                 struct prop_local_single *pl,
 393                 long *numerator, long *denominator)
 394 {
 395         struct prop_global *pg = prop_get_global(pd);
 396         unsigned long period_2 = 1UL << (pg->shift - 1);
 397         unsigned long counter_mask = period_2 - 1;
 398         unsigned long global_count;
 399
 400         prop_norm_single(pg, pl);
 401         *numerator = pl->events;
 402
 403         global_count = percpu_counter_read(&pg->events);
 404         *denominator = period_2 + (global_count & counter_mask);
 405
 406         prop_put_global(pd, pg);
 407 }