include: replace linux/module.h with "struct module" wherever possible
[linux-2.6/next.git] / lib / proportions.c
blobd50746a79de2a846187137a2ffb11993c23127e8
1 /*
2 * Floating proportions
4 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
6 * Description:
8 * The floating proportion is a time derivative with an exponentially decaying
9 * history:
11 * p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
13 * Where j is an element from {prop_local}, x_{j} is j's number of events,
14 * and i the time period over which the differential is taken. So d/dt_{-i} is
15 * the differential over the i-th last period.
17 * The decaying history gives smooth transitions. The time differential carries
18 * the notion of speed.
20 * The denominator is 2^(1+i) because we want the series to be normalised, ie.
22 * \Sum_{i=0} 1/2^(1+i) = 1
24 * Further more, if we measure time (t) in the same events as x; so that:
26 * t = \Sum_{j} x_{j}
28 * we get that:
30 * \Sum_{j} p_{j} = 1
32 * Writing this in an iterative fashion we get (dropping the 'd's):
34 * if (++x_{j}, ++t > period)
35 * t /= 2;
36 * for_each (j)
37 * x_{j} /= 2;
39 * so that:
41 * p_{j} = x_{j} / t;
43 * We optimize away the '/= 2' for the global time delta by noting that:
45 * if (++t > period) t /= 2:
47 * Can be approximated by:
49 * period/2 + (++t % period/2)
51 * [ Furthermore, when we choose period to be 2^n it can be written in terms of
52 * binary operations and wraparound artefacts disappear. ]
54 * Also note that this yields a natural counter of the elapsed periods:
56 * c = t / (period/2)
58 * [ Its monotonic increasing property can be applied to mitigate the wrap-
59 * around issue. ]
61 * This allows us to do away with the loop over all prop_locals on each period
62 * expiration. By remembering the period count under which it was last accessed
63 * as c_{j}, we can obtain the number of 'missed' cycles from:
65 * c - c_{j}
67 * We can then lazily catch up to the global period count every time we are
68 * going to use x_{j}, by doing:
70 * x_{j} /= 2^(c - c_{j}), c_{j} = c
73 #include <linux/proportions.h>
74 #include <linux/rcupdate.h>
76 int prop_descriptor_init(struct prop_descriptor *pd, int shift)
78 int err;
80 if (shift > PROP_MAX_SHIFT)
81 shift = PROP_MAX_SHIFT;
83 pd->index = 0;
84 pd->pg[0].shift = shift;
85 mutex_init(&pd->mutex);
86 err = percpu_counter_init(&pd->pg[0].events, 0);
87 if (err)
88 goto out;
90 err = percpu_counter_init(&pd->pg[1].events, 0);
91 if (err)
92 percpu_counter_destroy(&pd->pg[0].events);
94 out:
95 return err;
99 * We have two copies, and flip between them to make it seem like an atomic
100 * update. The update is not really atomic wrt the events counter, but
101 * it is internally consistent with the bit layout depending on shift.
103 * We copy the events count, move the bits around and flip the index.
105 void prop_change_shift(struct prop_descriptor *pd, int shift)
107 int index;
108 int offset;
109 u64 events;
110 unsigned long flags;
112 if (shift > PROP_MAX_SHIFT)
113 shift = PROP_MAX_SHIFT;
115 mutex_lock(&pd->mutex);
117 index = pd->index ^ 1;
118 offset = pd->pg[pd->index].shift - shift;
119 if (!offset)
120 goto out;
122 pd->pg[index].shift = shift;
124 local_irq_save(flags);
125 events = percpu_counter_sum(&pd->pg[pd->index].events);
126 if (offset < 0)
127 events <<= -offset;
128 else
129 events >>= offset;
130 percpu_counter_set(&pd->pg[index].events, events);
133 * ensure the new pg is fully written before the switch
135 smp_wmb();
136 pd->index = index;
137 local_irq_restore(flags);
139 synchronize_rcu();
141 out:
142 mutex_unlock(&pd->mutex);
146 * wrap the access to the data in an rcu_read_lock() section;
147 * this is used to track the active references.
149 static struct prop_global *prop_get_global(struct prop_descriptor *pd)
150 __acquires(RCU)
152 int index;
154 rcu_read_lock();
155 index = pd->index;
157 * match the wmb from vcd_flip()
159 smp_rmb();
160 return &pd->pg[index];
163 static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
164 __releases(RCU)
166 rcu_read_unlock();
169 static void
170 prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
172 int offset = *pl_shift - new_shift;
174 if (!offset)
175 return;
177 if (offset < 0)
178 *pl_period <<= -offset;
179 else
180 *pl_period >>= offset;
182 *pl_shift = new_shift;
186 * PERCPU
189 #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
191 int prop_local_init_percpu(struct prop_local_percpu *pl)
193 spin_lock_init(&pl->lock);
194 pl->shift = 0;
195 pl->period = 0;
196 return percpu_counter_init(&pl->events, 0);
199 void prop_local_destroy_percpu(struct prop_local_percpu *pl)
201 percpu_counter_destroy(&pl->events);
205 * Catch up with missed period expirations.
207 * until (c_{j} == c)
208 * x_{j} -= x_{j}/2;
209 * c_{j}++;
211 static
212 void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
214 unsigned long period = 1UL << (pg->shift - 1);
215 unsigned long period_mask = ~(period - 1);
216 unsigned long global_period;
217 unsigned long flags;
219 global_period = percpu_counter_read(&pg->events);
220 global_period &= period_mask;
223 * Fast path - check if the local and global period count still match
224 * outside of the lock.
226 if (pl->period == global_period)
227 return;
229 spin_lock_irqsave(&pl->lock, flags);
230 prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
233 * For each missed period, we half the local counter.
234 * basically:
235 * pl->events >> (global_period - pl->period);
237 period = (global_period - pl->period) >> (pg->shift - 1);
238 if (period < BITS_PER_LONG) {
239 s64 val = percpu_counter_read(&pl->events);
241 if (val < (nr_cpu_ids * PROP_BATCH))
242 val = percpu_counter_sum(&pl->events);
244 __percpu_counter_add(&pl->events, -val + (val >> period),
245 PROP_BATCH);
246 } else
247 percpu_counter_set(&pl->events, 0);
249 pl->period = global_period;
250 spin_unlock_irqrestore(&pl->lock, flags);
254 * ++x_{j}, ++t
256 void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
258 struct prop_global *pg = prop_get_global(pd);
260 prop_norm_percpu(pg, pl);
261 __percpu_counter_add(&pl->events, 1, PROP_BATCH);
262 percpu_counter_add(&pg->events, 1);
263 prop_put_global(pd, pg);
267 * identical to __prop_inc_percpu, except that it limits this pl's fraction to
268 * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
270 void __prop_inc_percpu_max(struct prop_descriptor *pd,
271 struct prop_local_percpu *pl, long frac)
273 struct prop_global *pg = prop_get_global(pd);
275 prop_norm_percpu(pg, pl);
277 if (unlikely(frac != PROP_FRAC_BASE)) {
278 unsigned long period_2 = 1UL << (pg->shift - 1);
279 unsigned long counter_mask = period_2 - 1;
280 unsigned long global_count;
281 long numerator, denominator;
283 numerator = percpu_counter_read_positive(&pl->events);
284 global_count = percpu_counter_read(&pg->events);
285 denominator = period_2 + (global_count & counter_mask);
287 if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
288 goto out_put;
291 percpu_counter_add(&pl->events, 1);
292 percpu_counter_add(&pg->events, 1);
294 out_put:
295 prop_put_global(pd, pg);
299 * Obtain a fraction of this proportion
301 * p_{j} = x_{j} / (period/2 + t % period/2)
303 void prop_fraction_percpu(struct prop_descriptor *pd,
304 struct prop_local_percpu *pl,
305 long *numerator, long *denominator)
307 struct prop_global *pg = prop_get_global(pd);
308 unsigned long period_2 = 1UL << (pg->shift - 1);
309 unsigned long counter_mask = period_2 - 1;
310 unsigned long global_count;
312 prop_norm_percpu(pg, pl);
313 *numerator = percpu_counter_read_positive(&pl->events);
315 global_count = percpu_counter_read(&pg->events);
316 *denominator = period_2 + (global_count & counter_mask);
318 prop_put_global(pd, pg);
322 * SINGLE
325 int prop_local_init_single(struct prop_local_single *pl)
327 spin_lock_init(&pl->lock);
328 pl->shift = 0;
329 pl->period = 0;
330 pl->events = 0;
331 return 0;
334 void prop_local_destroy_single(struct prop_local_single *pl)
339 * Catch up with missed period expirations.
341 static
342 void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
344 unsigned long period = 1UL << (pg->shift - 1);
345 unsigned long period_mask = ~(period - 1);
346 unsigned long global_period;
347 unsigned long flags;
349 global_period = percpu_counter_read(&pg->events);
350 global_period &= period_mask;
353 * Fast path - check if the local and global period count still match
354 * outside of the lock.
356 if (pl->period == global_period)
357 return;
359 spin_lock_irqsave(&pl->lock, flags);
360 prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
362 * For each missed period, we half the local counter.
364 period = (global_period - pl->period) >> (pg->shift - 1);
365 if (likely(period < BITS_PER_LONG))
366 pl->events >>= period;
367 else
368 pl->events = 0;
369 pl->period = global_period;
370 spin_unlock_irqrestore(&pl->lock, flags);
374 * ++x_{j}, ++t
376 void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
378 struct prop_global *pg = prop_get_global(pd);
380 prop_norm_single(pg, pl);
381 pl->events++;
382 percpu_counter_add(&pg->events, 1);
383 prop_put_global(pd, pg);
387 * Obtain a fraction of this proportion
389 * p_{j} = x_{j} / (period/2 + t % period/2)
391 void prop_fraction_single(struct prop_descriptor *pd,
392 struct prop_local_single *pl,
393 long *numerator, long *denominator)
395 struct prop_global *pg = prop_get_global(pd);
396 unsigned long period_2 = 1UL << (pg->shift - 1);
397 unsigned long counter_mask = period_2 - 1;
398 unsigned long global_count;
400 prop_norm_single(pg, pl);
401 *numerator = pl->events;
403 global_count = percpu_counter_read(&pg->events);
404 *denominator = period_2 + (global_count & counter_mask);
406 prop_put_global(pd, pg);