1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
4 #include <linux/kernel.h>
5 #include <linux/percpu.h>
6 #include <linux/slab.h>
7 #include <linux/static_key.h>
8 #include <linux/interrupt.h>
10 #include <linux/irq.h>
11 #include <linux/math64.h>
13 #include <trace/events/irq.h>
15 #include "internals.h"
17 DEFINE_STATIC_KEY_FALSE(irq_timing_enabled
);
19 DEFINE_PER_CPU(struct irq_timings
, irq_timings
);
31 static DEFINE_IDR(irqt_stats
);
33 void irq_timings_enable(void)
35 static_branch_enable(&irq_timing_enabled
);
38 void irq_timings_disable(void)
40 static_branch_disable(&irq_timing_enabled
);
44 * irqs_update - update the irq timing statistics with a new timestamp
46 * @irqs: an irqt_stat struct pointer
47 * @ts: the new timestamp
49 * The statistics are computed online, in other words, the code is
50 * designed to compute the statistics on a stream of values rather
51 * than doing multiple passes on the values to compute the average,
52 * then the variance. The integer division introduces a loss of
53 * precision but with an acceptable error margin regarding the results
54 * we would have with the double floating precision: we are dealing
55 * with nanosec, so big numbers, consequently the mantisse is
56 * negligeable, especially when converting the time in usec
59 * The computation happens at idle time. When the CPU is not idle, the
60 * interrupts' timestamps are stored in the circular buffer, when the
61 * CPU goes idle and this routine is called, all the buffer's values
62 * are injected in the statistical model continuying to extend the
63 * statistics from the previous busy-idle cycle.
65 * The observations showed a device will trigger a burst of periodic
66 * interrupts followed by one or two peaks of longer time, for
67 * instance when a SD card device flushes its cache, then the periodic
68 * intervals occur again. A one second inactivity period resets the
69 * stats, that gives us the certitude the statistical values won't
70 * exceed 1x10^9, thus the computation won't overflow.
72 * Basically, the purpose of the algorithm is to watch the periodic
73 * interrupts and eliminate the peaks.
75 * An interrupt is considered periodically stable if the interval of
76 * its occurences follow the normal distribution, thus the values
79 * avg - 3 x stddev < value < avg + 3 x stddev
81 * Which can be simplified to:
83 * -3 x stddev < value - avg < 3 x stddev
85 * abs(value - avg) < 3 x stddev
87 * In order to save a costly square root computation, we use the
88 * variance. For the record, stddev = sqrt(variance). The equation
91 * abs(value - avg) < 3 x sqrt(variance)
93 * And finally we square it:
95 * (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2
97 * (value - avg) x (value - avg) < 9 x variance
99 * Statistically speaking, any values out of this interval is
100 * considered as an anomaly and is discarded. However, a normal
101 * distribution appears when the number of samples is 30 (it is the
102 * rule of thumb in statistics, cf. "30 samples" on Internet). When
103 * there are three consecutive anomalies, the statistics are resetted.
106 static void irqs_update(struct irqt_stat
*irqs
, u64 ts
)
108 u64 old_ts
= irqs
->last_ts
;
114 * The timestamps are absolute time values, we need to compute
115 * the timing interval between two interrupts.
120 * The interval type is u64 in order to deal with the same
121 * type in our computation, that prevent mindfuck issues with
122 * overflow, sign and division.
124 interval
= ts
- old_ts
;
127 * The interrupt triggered more than one second apart, that
128 * ends the sequence as predictible for our purpose. In this
129 * case, assume we have the beginning of a sequence and the
130 * timestamp is the first value. As it is impossible to
131 * predict anything at this point, return.
133 * Note the first timestamp of the sequence will always fall
134 * in this test because the old_ts is zero. That is what we
135 * want as we need another timestamp to compute an interval.
137 if (interval
>= NSEC_PER_SEC
) {
138 memset(irqs
, 0, sizeof(*irqs
));
144 * Pre-compute the delta with the average as the result is
145 * used several times in this function.
147 diff
= interval
- irqs
->avg
;
150 * Increment the number of samples.
155 * Online variance divided by the number of elements if there
156 * is more than one sample. Normally the formula is division
157 * by nr_samples - 1 but we assume the number of element will be
158 * more than 32 and dividing by 32 instead of 31 is enough
161 if (likely(irqs
->nr_samples
> 1))
162 variance
= irqs
->variance
>> IRQ_TIMINGS_SHIFT
;
165 * The rule of thumb in statistics for the normal distribution
166 * is having at least 30 samples in order to have the model to
167 * apply. Values outside the interval are considered as an
170 if ((irqs
->nr_samples
>= 30) && ((diff
* diff
) > (9 * variance
))) {
172 * After three consecutive anomalies, we reset the
173 * stats as it is no longer stable enough.
175 if (irqs
->anomalies
++ >= 3) {
176 memset(irqs
, 0, sizeof(*irqs
));
182 * The anomalies must be consecutives, so at this
183 * point, we reset the anomalies counter.
189 * The interrupt is considered stable enough to try to predict
190 * the next event on it.
195 * Online average algorithm:
197 * new_average = average + ((value - average) / count)
199 * The variance computation depends on the new average
200 * to be computed here first.
203 irqs
->avg
= irqs
->avg
+ (diff
>> IRQ_TIMINGS_SHIFT
);
206 * Online variance algorithm:
208 * new_variance = variance + (value - average) x (value - new_average)
210 * Warning: irqs->avg is updated with the line above, hence
211 * 'interval - irqs->avg' is no longer equal to 'diff'
213 irqs
->variance
= irqs
->variance
+ (diff
* (interval
- irqs
->avg
));
216 * Update the next event
218 irqs
->next_evt
= ts
+ irqs
->avg
;
222 * irq_timings_next_event - Return when the next event is supposed to arrive
224 * During the last busy cycle, the number of interrupts is incremented
225 * and stored in the irq_timings structure. This information is
228 * - know if the index in the table wrapped up:
230 * If more than the array size interrupts happened during the
231 * last busy/idle cycle, the index wrapped up and we have to
232 * begin with the next element in the array which is the last one
233 * in the sequence, otherwise it is a the index 0.
235 * - have an indication of the interrupts activity on this CPU
238 * The values are 'consumed' after inserting in the statistical model,
239 * thus the count is reinitialized.
241 * The array of values **must** be browsed in the time direction, the
242 * timestamp must increase between an element and the next one.
244 * Returns a nanosec time based estimation of the earliest interrupt,
247 u64
irq_timings_next_event(u64 now
)
249 struct irq_timings
*irqts
= this_cpu_ptr(&irq_timings
);
250 struct irqt_stat
*irqs
;
251 struct irqt_stat __percpu
*s
;
252 u64 ts
, next_evt
= U64_MAX
;
256 * This function must be called with the local irq disabled in
257 * order to prevent the timings circular buffer to be updated
258 * while we are reading it.
260 lockdep_assert_irqs_disabled();
263 * Number of elements in the circular buffer: If it happens it
264 * was flushed before, then the number of elements could be
265 * smaller than IRQ_TIMINGS_SIZE, so the count is used,
266 * otherwise the array size is used as we wrapped. The index
267 * begins from zero when we did not wrap. That could be done
268 * in a nicer way with the proper circular array structure
269 * type but with the cost of extra computation in the
270 * interrupt handler hot path. We choose efficiency.
272 * Inject measured irq/timestamp to the statistical model
273 * while decrementing the counter because we consume the data
274 * from our circular buffer.
276 for (i
= irqts
->count
& IRQ_TIMINGS_MASK
,
277 irqts
->count
= min(IRQ_TIMINGS_SIZE
, irqts
->count
);
278 irqts
->count
> 0; irqts
->count
--, i
= (i
+ 1) & IRQ_TIMINGS_MASK
) {
280 irq
= irq_timing_decode(irqts
->values
[i
], &ts
);
282 s
= idr_find(&irqt_stats
, irq
);
284 irqs
= this_cpu_ptr(s
);
285 irqs_update(irqs
, ts
);
290 * Look in the list of interrupts' statistics, the earliest
293 idr_for_each_entry(&irqt_stats
, s
, i
) {
295 irqs
= this_cpu_ptr(s
);
300 if (irqs
->next_evt
<= now
) {
305 * This interrupt mustn't use in the future
306 * until new events occur and update the
313 if (irqs
->next_evt
< next_evt
) {
315 next_evt
= irqs
->next_evt
;
322 void irq_timings_free(int irq
)
324 struct irqt_stat __percpu
*s
;
326 s
= idr_find(&irqt_stats
, irq
);
329 idr_remove(&irqt_stats
, irq
);
333 int irq_timings_alloc(int irq
)
335 struct irqt_stat __percpu
*s
;
339 * Some platforms can have the same private interrupt per cpu,
340 * so this function may be be called several times with the
341 * same interrupt number. Just bail out in case the per cpu
342 * stat structure is already allocated.
344 s
= idr_find(&irqt_stats
, irq
);
348 s
= alloc_percpu(*s
);
352 idr_preload(GFP_KERNEL
);
353 id
= idr_alloc(&irqt_stats
, s
, irq
, irq
+ 1, GFP_NOWAIT
);