On Tue, Nov 06, 2007 at 02:33:53AM -0800, akpm@linux-foundation.org wrote:
[mmotm.git] / drivers / misc / hwlat_detector.c
blobe02d8e17b4849556499c0004d23a62483893ba90
1 /*
2 * hwlat_detector.c - A simple Hardware Latency detector.
4 * Use this module to detect large system latencies induced by the behavior of
5 * certain underlying system hardware or firmware, independent of Linux itself.
6 * The code was developed originally to detect the presence of SMIs on Intel
7 * and AMD systems, although there is no dependency upon x86 herein.
9 * The classical example usage of this module is in detecting the presence of
10 * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a
11 * somewhat special form of hardware interrupt spawned from earlier CPU debug
12 * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge
13 * LPC (or other device) to generate a special interrupt under certain
14 * circumstances, for example, upon expiration of a special SMI timer device,
15 * due to certain external thermal readings, on certain I/O address accesses,
16 * and other situations. An SMI hits a special CPU pin, triggers a special
17 * SMI mode (complete with special memory map), and the OS is unaware.
19 * Although certain hardware-inducing latencies are necessary (for example,
20 * a modern system often requires an SMI handler for correct thermal control
21 * and remote management) they can wreak havoc upon any OS-level performance
22 * guarantees toward low-latency, especially when the OS is not even made
23 * aware of the presence of these interrupts. For this reason, we need a
24 * somewhat brute force mechanism to detect these interrupts. In this case,
25 * we do it by hogging all of the CPU(s) for configurable timer intervals,
26 * sampling the built-in CPU timer, looking for discontiguous readings.
28 * WARNING: This implementation necessarily introduces latencies. Therefore,
29 * you should NEVER use this module in a production environment
30 * requiring any kind of low-latency performance guarantee(s).
32 * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
34 * Includes useful feedback from Clark Williams <clark@redhat.com>
36 * This file is licensed under the terms of the GNU General Public
37 * License version 2. This program is licensed "as is" without any
38 * warranty of any kind, whether express or implied.
41 #include <linux/module.h>
42 #include <linux/init.h>
43 #include <linux/ring_buffer.h>
44 #include <linux/stop_machine.h>
45 #include <linux/time.h>
46 #include <linux/hrtimer.h>
47 #include <linux/kthread.h>
48 #include <linux/debugfs.h>
49 #include <linux/seq_file.h>
50 #include <linux/uaccess.h>
51 #include <linux/version.h>
52 #include <linux/delay.h>
54 #define BUF_SIZE_DEFAULT 262144UL /* 8K*(sizeof(entry)) */
55 #define BUF_FLAGS (RB_FL_OVERWRITE) /* no block on full */
56 #define U64STR_SIZE 22 /* 20 digits max */
58 #define VERSION "1.0.0"
59 #define BANNER "hwlat_detector: "
60 #define DRVNAME "hwlat_detector"
61 #define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */
62 #define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */
63 #define DEFAULT_LAT_THRESHOLD 10 /* 10us */
65 /* Module metadata */
67 MODULE_LICENSE("GPL");
68 MODULE_AUTHOR("Jon Masters <jcm@redhat.com>");
69 MODULE_DESCRIPTION("A simple hardware latency detector");
70 MODULE_VERSION(VERSION);
72 /* Module parameters */
74 static int debug;
75 static int enabled;
76 static int threshold;
78 module_param(debug, int, 0); /* enable debug */
79 module_param(enabled, int, 0); /* enable detector */
80 module_param(threshold, int, 0); /* latency threshold */
82 /* Buffering and sampling */
84 static struct ring_buffer *ring_buffer; /* sample buffer */
85 static DEFINE_MUTEX(ring_buffer_mutex); /* lock changes */
86 static unsigned long buf_size = BUF_SIZE_DEFAULT;
87 static struct task_struct *kthread; /* sampling thread */
89 /* DebugFS filesystem entries */
91 static struct dentry *debug_dir; /* debugfs directory */
92 static struct dentry *debug_max; /* maximum TSC delta */
93 static struct dentry *debug_count; /* total detect count */
94 static struct dentry *debug_sample_width; /* sample width us */
95 static struct dentry *debug_sample_window; /* sample window us */
96 static struct dentry *debug_sample; /* raw samples us */
97 static struct dentry *debug_threshold; /* threshold us */
98 static struct dentry *debug_enable; /* enable/disable */
100 /* Individual samples and global state */
102 struct sample; /* latency sample */
103 struct data; /* Global state */
105 /* Sampling functions */
106 static int __buffer_add_sample(struct sample *sample);
107 static struct sample *buffer_get_sample(struct sample *sample);
108 static int get_sample(void *unused);
110 /* Threading and state */
111 static int kthread_fn(void *unused);
112 static int start_kthread(void);
113 static int stop_kthread(void);
114 static void __reset_stats(void);
115 static int init_stats(void);
117 /* Debugfs interface */
118 static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
119 size_t cnt, loff_t *ppos, const u64 *entry);
120 static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
121 size_t cnt, loff_t *ppos, u64 *entry);
122 static int debug_sample_fopen(struct inode *inode, struct file *filp);
123 static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
124 size_t cnt, loff_t *ppos);
125 static int debug_sample_release(struct inode *inode, struct file *filp);
126 static int debug_enable_fopen(struct inode *inode, struct file *filp);
127 static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
128 size_t cnt, loff_t *ppos);
129 static ssize_t debug_enable_fwrite(struct file *file,
130 const char __user *user_buffer,
131 size_t user_size, loff_t *offset);
133 /* Initialization functions */
134 static int init_debugfs(void);
135 static void free_debugfs(void);
136 static int detector_init(void);
137 static void detector_exit(void);
139 /* Individual latency samples are stored here when detected and packed into
140 * the ring_buffer circular buffer, where they are overwritten when
141 * more than buf_size/sizeof(sample) samples are received. */
142 struct sample {
143 u64 seqnum; /* unique sequence */
144 u64 duration; /* ktime delta */
145 struct timespec timestamp; /* wall time */
148 /* keep the global state somewhere. Mostly used under stop_machine. */
149 static struct data {
151 struct mutex lock; /* protect changes */
153 u64 count; /* total since reset */
154 u64 max_sample; /* max hardware latency */
155 u64 threshold; /* sample threshold level */
157 u64 sample_window; /* total sampling window (on+off) */
158 u64 sample_width; /* active sampling portion of window */
160 atomic_t sample_open; /* whether the sample file is open */
162 wait_queue_head_t wq; /* waitqeue for new sample values */
164 } data;
167 * __buffer_add_sample - add a new latency sample recording to the ring buffer
168 * @sample: The new latency sample value
170 * This receives a new latency sample and records it in a global ring buffer.
171 * No additional locking is used in this case - suited for stop_machine use.
173 static int __buffer_add_sample(struct sample *sample)
175 return ring_buffer_write(ring_buffer,
176 sizeof(struct sample), sample);
180 * buffer_get_sample - remove a hardware latency sample from the ring buffer
181 * @sample: Pre-allocated storage for the sample
183 * This retrieves a hardware latency sample from the global circular buffer
185 static struct sample *buffer_get_sample(struct sample *sample)
187 struct ring_buffer_event *e = NULL;
188 struct sample *s = NULL;
189 unsigned int cpu = 0;
191 if (!sample)
192 return NULL;
194 /* ring_buffers are per-cpu but we just want any value */
195 /* so we'll start with this cpu and try others if not */
196 /* Steven is planning to add a generic mechanism */
197 mutex_lock(&ring_buffer_mutex);
198 e = ring_buffer_consume(ring_buffer, smp_processor_id(), NULL);
199 if (!e) {
200 for_each_online_cpu(cpu) {
201 e = ring_buffer_consume(ring_buffer, cpu, NULL);
202 if (e)
203 break;
207 if (e) {
208 s = ring_buffer_event_data(e);
209 memcpy(sample, s, sizeof(struct sample));
210 } else
211 sample = NULL;
212 mutex_unlock(&ring_buffer_mutex);
214 return sample;
218 * get_sample - sample the CPU TSC and look for likely hardware latencies
219 * @unused: This is not used but is a part of the stop_machine API
221 * Used to repeatedly capture the CPU TSC (or similar), looking for potential
222 * hardware-induced latency. Called under stop_machine, with data.lock held.
224 static int get_sample(void *unused)
226 ktime_t start, t1, t2;
227 s64 diff, total = 0;
228 u64 sample = 0;
229 int ret = 1;
231 start = ktime_get(); /* start timestamp */
233 do {
235 t1 = ktime_get(); /* we'll look for a discontinuity */
236 t2 = ktime_get();
238 total = ktime_to_us(ktime_sub(t2, start)); /* sample width */
239 diff = ktime_to_us(ktime_sub(t2, t1)); /* current diff */
241 /* This shouldn't happen */
242 if (diff < 0) {
243 printk(KERN_ERR BANNER "time running backwards\n");
244 goto out;
247 if (diff > sample)
248 sample = diff; /* only want highest value */
250 } while (total <= data.sample_width);
252 /* If we exceed the threshold value, we have found a hardware latency */
253 if (sample > data.threshold) {
254 struct sample s;
256 data.count++;
257 s.seqnum = data.count;
258 s.duration = sample;
259 s.timestamp = CURRENT_TIME;
260 __buffer_add_sample(&s);
262 /* Keep a running maximum ever recorded hardware latency */
263 if (sample > data.max_sample)
264 data.max_sample = sample;
266 wake_up(&data.wq); /* wake up reader(s) */
269 ret = 0;
270 out:
271 return ret;
275 * kthread_fn - The CPU time sampling/hardware latency detection kernel thread
276 * @unused: A required part of the kthread API.
278 * Used to periodically sample the CPU TSC via a call to get_sample. We
279 * use stop_machine, whith does (intentionally) introduce latency since we
280 * need to ensure nothing else might be running (and thus pre-empting).
281 * Obviously this should never be used in production environments.
283 * stop_machine will schedule us typically only on CPU0 which is fine for
284 * almost every real-world hardware latency situation - but we might later
285 * generalize this if we find there are any actualy systems with alternate
286 * SMI delivery or other non CPU0 hardware latencies.
288 static int kthread_fn(void *unused)
290 int err = 0;
291 u64 interval = 0;
293 while (!kthread_should_stop()) {
295 mutex_lock(&data.lock);
297 err = stop_machine(get_sample, unused, 0);
298 if (err) {
299 /* Houston, we have a problem */
300 mutex_unlock(&data.lock);
301 goto err_out;
304 interval = data.sample_window - data.sample_width;
305 do_div(interval, USEC_PER_MSEC); /* modifies interval value */
307 mutex_unlock(&data.lock);
309 if (msleep_interruptible(interval))
310 goto out;
312 goto out;
313 err_out:
314 printk(KERN_ERR BANNER "could not call stop_machine, disabling\n");
315 enabled = 0;
316 out:
317 return err;
322 * start_kthread - Kick off the hardware latency sampling/detector kthread
324 * This starts a kernel thread that will sit and sample the CPU timestamp
325 * counter (TSC or similar) and look for potential hardware latencies.
327 static int start_kthread(void)
329 kthread = kthread_run(kthread_fn, NULL,
330 DRVNAME);
331 if (IS_ERR(kthread)) {
332 printk(KERN_ERR BANNER "could not start sampling thread\n");
333 enabled = 0;
334 return -ENOMEM;
337 return 0;
341 * stop_kthread - Inform the hardware latency samping/detector kthread to stop
343 * This kicks the running hardware latency sampling/detector kernel thread and
344 * tells it to stop sampling now. Use this on unload and at system shutdown.
346 static int stop_kthread(void)
348 int ret;
350 ret = kthread_stop(kthread);
352 return ret;
356 * __reset_stats - Reset statistics for the hardware latency detector
358 * We use data to store various statistics and global state. We call this
359 * function in order to reset those when "enable" is toggled on or off, and
360 * also at initialization. Should be called with data.lock held.
362 static void __reset_stats(void)
364 data.count = 0;
365 data.max_sample = 0;
366 ring_buffer_reset(ring_buffer); /* flush out old sample entries */
370 * init_stats - Setup global state statistics for the hardware latency detector
372 * We use data to store various statistics and global state. We also use
373 * a global ring buffer (ring_buffer) to keep raw samples of detected hardware
374 * induced system latencies. This function initializes these structures and
375 * allocates the global ring buffer also.
377 static int init_stats(void)
379 int ret = -ENOMEM;
381 mutex_init(&data.lock);
382 init_waitqueue_head(&data.wq);
383 atomic_set(&data.sample_open, 0);
385 ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS);
387 if (WARN(!ring_buffer, KERN_ERR BANNER
388 "failed to allocate ring buffer!\n"))
389 goto out;
391 __reset_stats();
392 data.threshold = DEFAULT_LAT_THRESHOLD; /* threshold us */
393 data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */
394 data.sample_width = DEFAULT_SAMPLE_WIDTH; /* width us */
396 ret = 0;
398 out:
399 return ret;
404 * simple_data_read - Wrapper read function for global state debugfs entries
405 * @filp: The active open file structure for the debugfs "file"
406 * @ubuf: The userspace provided buffer to read value into
407 * @cnt: The maximum number of bytes to read
408 * @ppos: The current "file" position
409 * @entry: The entry to read from
411 * This function provides a generic read implementation for the global state
412 * "data" structure debugfs filesystem entries. It would be nice to use
413 * simple_attr_read directly, but we need to make sure that the data.lock
414 * spinlock is held during the actual read (even though we likely won't ever
415 * actually race here as the updater runs under a stop_machine context).
417 static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
418 size_t cnt, loff_t *ppos, const u64 *entry)
420 char buf[U64STR_SIZE];
421 u64 val = 0;
422 int len = 0;
424 memset(buf, 0, sizeof(buf));
426 if (!entry)
427 return -EFAULT;
429 mutex_lock(&data.lock);
430 val = *entry;
431 mutex_unlock(&data.lock);
433 len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val);
435 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
440 * simple_data_write - Wrapper write function for global state debugfs entries
441 * @filp: The active open file structure for the debugfs "file"
442 * @ubuf: The userspace provided buffer to write value from
443 * @cnt: The maximum number of bytes to write
444 * @ppos: The current "file" position
445 * @entry: The entry to write to
447 * This function provides a generic write implementation for the global state
448 * "data" structure debugfs filesystem entries. It would be nice to use
449 * simple_attr_write directly, but we need to make sure that the data.lock
450 * spinlock is held during the actual write (even though we likely won't ever
451 * actually race here as the updater runs under a stop_machine context).
453 static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
454 size_t cnt, loff_t *ppos, u64 *entry)
456 char buf[U64STR_SIZE];
457 int csize = min(cnt, sizeof(buf));
458 u64 val = 0;
459 int err = 0;
461 memset(buf, '\0', sizeof(buf));
462 if (copy_from_user(buf, ubuf, csize))
463 return -EFAULT;
465 buf[U64STR_SIZE-1] = '\0'; /* just in case */
466 err = strict_strtoull(buf, 10, &val);
467 if (err)
468 return -EINVAL;
470 mutex_lock(&data.lock);
471 *entry = val;
472 mutex_unlock(&data.lock);
474 return csize;
478 * debug_count_fopen - Open function for "count" debugfs entry
479 * @inode: The in-kernel inode representation of the debugfs "file"
480 * @filp: The active open file structure for the debugfs "file"
482 * This function provides an open implementation for the "count" debugfs
483 * interface to the hardware latency detector.
485 static int debug_count_fopen(struct inode *inode, struct file *filp)
487 return 0;
491 * debug_count_fread - Read function for "count" debugfs entry
492 * @filp: The active open file structure for the debugfs "file"
493 * @ubuf: The userspace provided buffer to read value into
494 * @cnt: The maximum number of bytes to read
495 * @ppos: The current "file" position
497 * This function provides a read implementation for the "count" debugfs
498 * interface to the hardware latency detector. Can be used to read the
499 * number of latency readings exceeding the configured threshold since
500 * the detector was last reset (e.g. by writing a zero into "count").
502 static ssize_t debug_count_fread(struct file *filp, char __user *ubuf,
503 size_t cnt, loff_t *ppos)
505 return simple_data_read(filp, ubuf, cnt, ppos, &data.count);
509 * debug_count_fwrite - Write function for "count" debugfs entry
510 * @filp: The active open file structure for the debugfs "file"
511 * @ubuf: The user buffer that contains the value to write
512 * @cnt: The maximum number of bytes to write to "file"
513 * @ppos: The current position in the debugfs "file"
515 * This function provides a write implementation for the "count" debugfs
516 * interface to the hardware latency detector. Can be used to write a
517 * desired value, especially to zero the total count.
519 static ssize_t debug_count_fwrite(struct file *filp,
520 const char __user *ubuf,
521 size_t cnt,
522 loff_t *ppos)
524 return simple_data_write(filp, ubuf, cnt, ppos, &data.count);
528 * debug_enable_fopen - Dummy open function for "enable" debugfs interface
529 * @inode: The in-kernel inode representation of the debugfs "file"
530 * @filp: The active open file structure for the debugfs "file"
532 * This function provides an open implementation for the "enable" debugfs
533 * interface to the hardware latency detector.
535 static int debug_enable_fopen(struct inode *inode, struct file *filp)
537 return 0;
541 * debug_enable_fread - Read function for "enable" debugfs interface
542 * @filp: The active open file structure for the debugfs "file"
543 * @ubuf: The userspace provided buffer to read value into
544 * @cnt: The maximum number of bytes to read
545 * @ppos: The current "file" position
547 * This function provides a read implementation for the "enable" debugfs
548 * interface to the hardware latency detector. Can be used to determine
549 * whether the detector is currently enabled ("0\n" or "1\n" returned).
551 static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
552 size_t cnt, loff_t *ppos)
554 char buf[4];
556 if ((cnt < sizeof(buf)) || (*ppos))
557 return 0;
559 buf[0] = enabled ? '1' : '0';
560 buf[1] = '\n';
561 buf[2] = '\0';
562 if (copy_to_user(ubuf, buf, strlen(buf)))
563 return -EFAULT;
564 return *ppos = strlen(buf);
568 * debug_enable_fwrite - Write function for "enable" debugfs interface
569 * @filp: The active open file structure for the debugfs "file"
570 * @ubuf: The user buffer that contains the value to write
571 * @cnt: The maximum number of bytes to write to "file"
572 * @ppos: The current position in the debugfs "file"
574 * This function provides a write implementation for the "enable" debugfs
575 * interface to the hardware latency detector. Can be used to enable or
576 * disable the detector, which will have the side-effect of possibly
577 * also resetting the global stats and kicking off the measuring
578 * kthread (on an enable) or the converse (upon a disable).
580 static ssize_t debug_enable_fwrite(struct file *filp,
581 const char __user *ubuf,
582 size_t cnt,
583 loff_t *ppos)
585 char buf[4];
586 int csize = min(cnt, sizeof(buf));
587 long val = 0;
588 int err = 0;
590 memset(buf, '\0', sizeof(buf));
591 if (copy_from_user(buf, ubuf, csize))
592 return -EFAULT;
594 buf[sizeof(buf)-1] = '\0'; /* just in case */
595 err = strict_strtoul(buf, 10, &val);
596 if (0 != err)
597 return -EINVAL;
599 if (val) {
600 if (enabled)
601 goto unlock;
602 enabled = 1;
603 __reset_stats();
604 if (start_kthread())
605 return -EFAULT;
606 } else {
607 if (!enabled)
608 goto unlock;
609 enabled = 0;
610 stop_kthread();
611 wake_up(&data.wq); /* reader(s) should return */
613 unlock:
614 return csize;
618 * debug_max_fopen - Open function for "max" debugfs entry
619 * @inode: The in-kernel inode representation of the debugfs "file"
620 * @filp: The active open file structure for the debugfs "file"
622 * This function provides an open implementation for the "max" debugfs
623 * interface to the hardware latency detector.
625 static int debug_max_fopen(struct inode *inode, struct file *filp)
627 return 0;
631 * debug_max_fread - Read function for "max" debugfs entry
632 * @filp: The active open file structure for the debugfs "file"
633 * @ubuf: The userspace provided buffer to read value into
634 * @cnt: The maximum number of bytes to read
635 * @ppos: The current "file" position
637 * This function provides a read implementation for the "max" debugfs
638 * interface to the hardware latency detector. Can be used to determine
639 * the maximum latency value observed since it was last reset.
641 static ssize_t debug_max_fread(struct file *filp, char __user *ubuf,
642 size_t cnt, loff_t *ppos)
644 return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample);
648 * debug_max_fwrite - Write function for "max" debugfs entry
649 * @filp: The active open file structure for the debugfs "file"
650 * @ubuf: The user buffer that contains the value to write
651 * @cnt: The maximum number of bytes to write to "file"
652 * @ppos: The current position in the debugfs "file"
654 * This function provides a write implementation for the "max" debugfs
655 * interface to the hardware latency detector. Can be used to reset the
656 * maximum or set it to some other desired value - if, then, subsequent
657 * measurements exceed this value, the maximum will be updated.
659 static ssize_t debug_max_fwrite(struct file *filp,
660 const char __user *ubuf,
661 size_t cnt,
662 loff_t *ppos)
664 return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample);
669 * debug_sample_fopen - An open function for "sample" debugfs interface
670 * @inode: The in-kernel inode representation of this debugfs "file"
671 * @filp: The active open file structure for the debugfs "file"
673 * This function handles opening the "sample" file within the hardware
674 * latency detector debugfs directory interface. This file is used to read
675 * raw samples from the global ring_buffer and allows the user to see a
676 * running latency history. Can be opened blocking or non-blocking,
677 * affecting whether it behaves as a buffer read pipe, or does not.
678 * Implements simple locking to prevent multiple simultaneous use.
680 static int debug_sample_fopen(struct inode *inode, struct file *filp)
682 if (!atomic_add_unless(&data.sample_open, 1, 1))
683 return -EBUSY;
684 else
685 return 0;
689 * debug_sample_fread - A read function for "sample" debugfs interface
690 * @filp: The active open file structure for the debugfs "file"
691 * @ubuf: The user buffer that will contain the samples read
692 * @cnt: The maximum bytes to read from the debugfs "file"
693 * @ppos: The current position in the debugfs "file"
695 * This function handles reading from the "sample" file within the hardware
696 * latency detector debugfs directory interface. This file is used to read
697 * raw samples from the global ring_buffer and allows the user to see a
698 * running latency history. By default this will block pending a new
699 * value written into the sample buffer, unless there are already a
700 * number of value(s) waiting in the buffer, or the sample file was
701 * previously opened in a non-blocking mode of operation.
703 static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
704 size_t cnt, loff_t *ppos)
706 int len = 0;
707 char buf[64];
708 struct sample *sample = NULL;
710 if (!enabled)
711 return 0;
713 sample = kzalloc(sizeof(struct sample), GFP_KERNEL);
714 if (!sample)
715 return -ENOMEM;
717 while (!buffer_get_sample(sample)) {
719 DEFINE_WAIT(wait);
721 if (filp->f_flags & O_NONBLOCK) {
722 len = -EAGAIN;
723 goto out;
726 prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE);
727 schedule();
728 finish_wait(&data.wq, &wait);
730 if (signal_pending(current)) {
731 len = -EINTR;
732 goto out;
735 if (!enabled) { /* enable was toggled */
736 len = 0;
737 goto out;
741 len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\n",
742 sample->timestamp.tv_sec,
743 sample->timestamp.tv_nsec,
744 sample->duration);
747 /* handling partial reads is more trouble than it's worth */
748 if (len > cnt)
749 goto out;
751 if (copy_to_user(ubuf, buf, len))
752 len = -EFAULT;
754 out:
755 kfree(sample);
756 return len;
760 * debug_sample_release - Release function for "sample" debugfs interface
761 * @inode: The in-kernel inode represenation of the debugfs "file"
762 * @filp: The active open file structure for the debugfs "file"
764 * This function completes the close of the debugfs interface "sample" file.
765 * Frees the sample_open "lock" so that other users may open the interface.
767 static int debug_sample_release(struct inode *inode, struct file *filp)
769 atomic_dec(&data.sample_open);
771 return 0;
775 * debug_threshold_fopen - Open function for "threshold" debugfs entry
776 * @inode: The in-kernel inode representation of the debugfs "file"
777 * @filp: The active open file structure for the debugfs "file"
779 * This function provides an open implementation for the "threshold" debugfs
780 * interface to the hardware latency detector.
782 static int debug_threshold_fopen(struct inode *inode, struct file *filp)
784 return 0;
788 * debug_threshold_fread - Read function for "threshold" debugfs entry
789 * @filp: The active open file structure for the debugfs "file"
790 * @ubuf: The userspace provided buffer to read value into
791 * @cnt: The maximum number of bytes to read
792 * @ppos: The current "file" position
794 * This function provides a read implementation for the "threshold" debugfs
795 * interface to the hardware latency detector. It can be used to determine
796 * the current threshold level at which a latency will be recorded in the
797 * global ring buffer, typically on the order of 10us.
799 static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf,
800 size_t cnt, loff_t *ppos)
802 return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold);
806 * debug_threshold_fwrite - Write function for "threshold" debugfs entry
807 * @filp: The active open file structure for the debugfs "file"
808 * @ubuf: The user buffer that contains the value to write
809 * @cnt: The maximum number of bytes to write to "file"
810 * @ppos: The current position in the debugfs "file"
812 * This function provides a write implementation for the "threshold" debugfs
813 * interface to the hardware latency detector. It can be used to configure
814 * the threshold level at which any subsequently detected latencies will
815 * be recorded into the global ring buffer.
817 static ssize_t debug_threshold_fwrite(struct file *filp,
818 const char __user *ubuf,
819 size_t cnt,
820 loff_t *ppos)
822 int ret;
824 ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold);
826 if (enabled)
827 wake_up_process(kthread);
829 return ret;
833 * debug_width_fopen - Open function for "width" debugfs entry
834 * @inode: The in-kernel inode representation of the debugfs "file"
835 * @filp: The active open file structure for the debugfs "file"
837 * This function provides an open implementation for the "width" debugfs
838 * interface to the hardware latency detector.
840 static int debug_width_fopen(struct inode *inode, struct file *filp)
842 return 0;
846 * debug_width_fread - Read function for "width" debugfs entry
847 * @filp: The active open file structure for the debugfs "file"
848 * @ubuf: The userspace provided buffer to read value into
849 * @cnt: The maximum number of bytes to read
850 * @ppos: The current "file" position
852 * This function provides a read implementation for the "width" debugfs
853 * interface to the hardware latency detector. It can be used to determine
854 * for how many us of the total window us we will actively sample for any
855 * hardware-induced latecy periods. Obviously, it is not possible to
856 * sample constantly and have the system respond to a sample reader, or,
857 * worse, without having the system appear to have gone out to lunch.
859 static ssize_t debug_width_fread(struct file *filp, char __user *ubuf,
860 size_t cnt, loff_t *ppos)
862 return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width);
866 * debug_width_fwrite - Write function for "width" debugfs entry
867 * @filp: The active open file structure for the debugfs "file"
868 * @ubuf: The user buffer that contains the value to write
869 * @cnt: The maximum number of bytes to write to "file"
870 * @ppos: The current position in the debugfs "file"
872 * This function provides a write implementation for the "width" debugfs
873 * interface to the hardware latency detector. It can be used to configure
874 * for how many us of the total window us we will actively sample for any
875 * hardware-induced latency periods. Obviously, it is not possible to
876 * sample constantly and have the system respond to a sample reader, or,
877 * worse, without having the system appear to have gone out to lunch. It
878 * is enforced that width is less that the total window size.
880 static ssize_t debug_width_fwrite(struct file *filp,
881 const char __user *ubuf,
882 size_t cnt,
883 loff_t *ppos)
885 char buf[U64STR_SIZE];
886 int csize = min(cnt, sizeof(buf));
887 u64 val = 0;
888 int err = 0;
890 memset(buf, '\0', sizeof(buf));
891 if (copy_from_user(buf, ubuf, csize))
892 return -EFAULT;
894 buf[U64STR_SIZE-1] = '\0'; /* just in case */
895 err = strict_strtoull(buf, 10, &val);
896 if (0 != err)
897 return -EINVAL;
899 mutex_lock(&data.lock);
900 if (val < data.sample_window)
901 data.sample_width = val;
902 else {
903 mutex_unlock(&data.lock);
904 return -EINVAL;
906 mutex_unlock(&data.lock);
908 if (enabled)
909 wake_up_process(kthread);
911 return csize;
915 * debug_window_fopen - Open function for "window" debugfs entry
916 * @inode: The in-kernel inode representation of the debugfs "file"
917 * @filp: The active open file structure for the debugfs "file"
919 * This function provides an open implementation for the "window" debugfs
920 * interface to the hardware latency detector. The window is the total time
921 * in us that will be considered one sample period. Conceptually, windows
922 * occur back-to-back and contain a sample width period during which
923 * actual sampling occurs.
925 static int debug_window_fopen(struct inode *inode, struct file *filp)
927 return 0;
931 * debug_window_fread - Read function for "window" debugfs entry
932 * @filp: The active open file structure for the debugfs "file"
933 * @ubuf: The userspace provided buffer to read value into
934 * @cnt: The maximum number of bytes to read
935 * @ppos: The current "file" position
937 * This function provides a read implementation for the "window" debugfs
938 * interface to the hardware latency detector. The window is the total time
939 * in us that will be considered one sample period. Conceptually, windows
940 * occur back-to-back and contain a sample width period during which
941 * actual sampling occurs. Can be used to read the total window size.
943 static ssize_t debug_window_fread(struct file *filp, char __user *ubuf,
944 size_t cnt, loff_t *ppos)
946 return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window);
950 * debug_window_fwrite - Write function for "window" debugfs entry
951 * @filp: The active open file structure for the debugfs "file"
952 * @ubuf: The user buffer that contains the value to write
953 * @cnt: The maximum number of bytes to write to "file"
954 * @ppos: The current position in the debugfs "file"
956 * This function provides a write implementation for the "window" debufds
957 * interface to the hardware latency detetector. The window is the total time
958 * in us that will be considered one sample period. Conceptually, windows
959 * occur back-to-back and contain a sample width period during which
960 * actual sampling occurs. Can be used to write a new total window size. It
961 * is enfoced that any value written must be greater than the sample width
962 * size, or an error results.
964 static ssize_t debug_window_fwrite(struct file *filp,
965 const char __user *ubuf,
966 size_t cnt,
967 loff_t *ppos)
969 char buf[U64STR_SIZE];
970 int csize = min(cnt, sizeof(buf));
971 u64 val = 0;
972 int err = 0;
974 memset(buf, '\0', sizeof(buf));
975 if (copy_from_user(buf, ubuf, csize))
976 return -EFAULT;
978 buf[U64STR_SIZE-1] = '\0'; /* just in case */
979 err = strict_strtoull(buf, 10, &val);
980 if (0 != err)
981 return -EINVAL;
983 mutex_lock(&data.lock);
984 if (data.sample_width < val)
985 data.sample_window = val;
986 else {
987 mutex_unlock(&data.lock);
988 return -EINVAL;
990 mutex_unlock(&data.lock);
992 return csize;
996 * Function pointers for the "count" debugfs file operations
998 static const struct file_operations count_fops = {
999 .open = debug_count_fopen,
1000 .read = debug_count_fread,
1001 .write = debug_count_fwrite,
1002 .owner = THIS_MODULE,
1006 * Function pointers for the "enable" debugfs file operations
1008 static const struct file_operations enable_fops = {
1009 .open = debug_enable_fopen,
1010 .read = debug_enable_fread,
1011 .write = debug_enable_fwrite,
1012 .owner = THIS_MODULE,
1016 * Function pointers for the "max" debugfs file operations
1018 static const struct file_operations max_fops = {
1019 .open = debug_max_fopen,
1020 .read = debug_max_fread,
1021 .write = debug_max_fwrite,
1022 .owner = THIS_MODULE,
1026 * Function pointers for the "sample" debugfs file operations
1028 static const struct file_operations sample_fops = {
1029 .open = debug_sample_fopen,
1030 .read = debug_sample_fread,
1031 .release = debug_sample_release,
1032 .owner = THIS_MODULE,
1036 * Function pointers for the "threshold" debugfs file operations
1038 static const struct file_operations threshold_fops = {
1039 .open = debug_threshold_fopen,
1040 .read = debug_threshold_fread,
1041 .write = debug_threshold_fwrite,
1042 .owner = THIS_MODULE,
1046 * Function pointers for the "width" debugfs file operations
1048 static const struct file_operations width_fops = {
1049 .open = debug_width_fopen,
1050 .read = debug_width_fread,
1051 .write = debug_width_fwrite,
1052 .owner = THIS_MODULE,
1056 * Function pointers for the "window" debugfs file operations
1058 static const struct file_operations window_fops = {
1059 .open = debug_window_fopen,
1060 .read = debug_window_fread,
1061 .write = debug_window_fwrite,
1062 .owner = THIS_MODULE,
1066 * init_debugfs - A function to initialize the debugfs interface files
1068 * This function creates entries in debugfs for "hwlat_detector", including
1069 * files to read values from the detector, current samples, and the
1070 * maximum sample that has been captured since the hardware latency
1071 * dectector was started.
1073 static int init_debugfs(void)
1075 int ret = -ENOMEM;
1077 debug_dir = debugfs_create_dir(DRVNAME, NULL);
1078 if (!debug_dir)
1079 goto err_debug_dir;
1081 debug_sample = debugfs_create_file("sample", 0444,
1082 debug_dir, NULL,
1083 &sample_fops);
1084 if (!debug_sample)
1085 goto err_sample;
1087 debug_count = debugfs_create_file("count", 0444,
1088 debug_dir, NULL,
1089 &count_fops);
1090 if (!debug_count)
1091 goto err_count;
1093 debug_max = debugfs_create_file("max", 0444,
1094 debug_dir, NULL,
1095 &max_fops);
1096 if (!debug_max)
1097 goto err_max;
1099 debug_sample_window = debugfs_create_file("window", 0644,
1100 debug_dir, NULL,
1101 &window_fops);
1102 if (!debug_sample_window)
1103 goto err_window;
1105 debug_sample_width = debugfs_create_file("width", 0644,
1106 debug_dir, NULL,
1107 &width_fops);
1108 if (!debug_sample_width)
1109 goto err_width;
1111 debug_threshold = debugfs_create_file("threshold", 0644,
1112 debug_dir, NULL,
1113 &threshold_fops);
1114 if (!debug_threshold)
1115 goto err_threshold;
1117 debug_enable = debugfs_create_file("enable", 0644,
1118 debug_dir, &enabled,
1119 &enable_fops);
1120 if (!debug_enable)
1121 goto err_enable;
1123 else {
1124 ret = 0;
1125 goto out;
1128 err_enable:
1129 debugfs_remove(debug_threshold);
1130 err_threshold:
1131 debugfs_remove(debug_sample_width);
1132 err_width:
1133 debugfs_remove(debug_sample_window);
1134 err_window:
1135 debugfs_remove(debug_max);
1136 err_max:
1137 debugfs_remove(debug_count);
1138 err_count:
1139 debugfs_remove(debug_sample);
1140 err_sample:
1141 debugfs_remove(debug_dir);
1142 err_debug_dir:
1143 out:
1144 return ret;
1148 * free_debugfs - A function to cleanup the debugfs file interface
1150 static void free_debugfs(void)
1152 /* could also use a debugfs_remove_recursive */
1153 debugfs_remove(debug_enable);
1154 debugfs_remove(debug_threshold);
1155 debugfs_remove(debug_sample_width);
1156 debugfs_remove(debug_sample_window);
1157 debugfs_remove(debug_max);
1158 debugfs_remove(debug_count);
1159 debugfs_remove(debug_sample);
1160 debugfs_remove(debug_dir);
1164 * detector_init - Standard module initialization code
1166 static int detector_init(void)
1168 int ret = -ENOMEM;
1170 printk(KERN_INFO BANNER "version %s\n", VERSION);
1172 ret = init_stats();
1173 if (0 != ret)
1174 goto out;
1176 ret = init_debugfs();
1177 if (0 != ret)
1178 goto err_stats;
1180 if (enabled)
1181 ret = start_kthread();
1183 goto out;
1185 err_stats:
1186 ring_buffer_free(ring_buffer);
1187 out:
1188 return ret;
1193 * detector_exit - Standard module cleanup code
1195 static void detector_exit(void)
1197 if (enabled) {
1198 enabled = 0;
1199 stop_kthread();
1202 free_debugfs();
1203 ring_buffer_free(ring_buffer); /* free up the ring buffer */
1207 module_init(detector_init);
1208 module_exit(detector_exit);