1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/errno.h>
3 #include <linux/numa.h>
4 #include <linux/slab.h>
5 #include <linux/rculist.h>
6 #include <linux/threads.h>
7 #include <linux/preempt.h>
8 #include <linux/irqflags.h>
9 #include <linux/vmalloc.h>
11 #include <linux/module.h>
12 #include <linux/device-mapper.h>
17 #define DM_MSG_PREFIX "stats"
19 static int dm_stat_need_rcu_barrier
;
22 * Using 64-bit values to avoid overflow (which is a
23 * problem that block/genhd.c's IO accounting has).
25 struct dm_stat_percpu
{
26 unsigned long long sectors
[2];
27 unsigned long long ios
[2];
28 unsigned long long merges
[2];
29 unsigned long long ticks
[2];
30 unsigned long long io_ticks
[2];
31 unsigned long long io_ticks_total
;
32 unsigned long long time_in_queue
;
33 unsigned long long *histogram
;
36 struct dm_stat_shared
{
37 atomic_t in_flight
[2];
38 unsigned long long stamp
;
39 struct dm_stat_percpu tmp
;
43 struct list_head list_entry
;
45 unsigned int stat_flags
;
50 unsigned int n_histogram_entries
;
51 unsigned long long *histogram_boundaries
;
52 const char *program_id
;
54 struct rcu_head rcu_head
;
55 size_t shared_alloc_size
;
56 size_t percpu_alloc_size
;
57 size_t histogram_alloc_size
;
58 struct dm_stat_percpu
*stat_percpu
[NR_CPUS
];
59 struct dm_stat_shared stat_shared
[] __counted_by(n_entries
);
62 #define STAT_PRECISE_TIMESTAMPS 1
64 struct dm_stats_last_position
{
69 #define DM_STAT_MAX_ENTRIES 8388608
70 #define DM_STAT_MAX_HISTOGRAM_ENTRIES 134217728
73 * A typo on the command line could possibly make the kernel run out of memory
74 * and crash. To prevent the crash we account all used memory. We fail if we
75 * exhaust 1/4 of all memory or 1/2 of vmalloc space.
77 #define DM_STATS_MEMORY_FACTOR 4
78 #define DM_STATS_VMALLOC_FACTOR 2
80 static DEFINE_SPINLOCK(shared_memory_lock
);
82 static unsigned long shared_memory_amount
;
84 static bool __check_shared_memory(size_t alloc_size
)
88 a
= shared_memory_amount
+ alloc_size
;
89 if (a
< shared_memory_amount
)
91 if (a
>> PAGE_SHIFT
> totalram_pages() / DM_STATS_MEMORY_FACTOR
)
94 if (a
> (VMALLOC_END
- VMALLOC_START
) / DM_STATS_VMALLOC_FACTOR
)
100 static bool check_shared_memory(size_t alloc_size
)
104 spin_lock_irq(&shared_memory_lock
);
106 ret
= __check_shared_memory(alloc_size
);
108 spin_unlock_irq(&shared_memory_lock
);
113 static bool claim_shared_memory(size_t alloc_size
)
115 spin_lock_irq(&shared_memory_lock
);
117 if (!__check_shared_memory(alloc_size
)) {
118 spin_unlock_irq(&shared_memory_lock
);
122 shared_memory_amount
+= alloc_size
;
124 spin_unlock_irq(&shared_memory_lock
);
129 static void free_shared_memory(size_t alloc_size
)
133 spin_lock_irqsave(&shared_memory_lock
, flags
);
135 if (WARN_ON_ONCE(shared_memory_amount
< alloc_size
)) {
136 spin_unlock_irqrestore(&shared_memory_lock
, flags
);
137 DMCRIT("Memory usage accounting bug.");
141 shared_memory_amount
-= alloc_size
;
143 spin_unlock_irqrestore(&shared_memory_lock
, flags
);
146 static void *dm_kvzalloc(size_t alloc_size
, int node
)
150 if (!claim_shared_memory(alloc_size
))
153 p
= kvzalloc_node(alloc_size
, GFP_KERNEL
| __GFP_NOMEMALLOC
, node
);
157 free_shared_memory(alloc_size
);
162 static void dm_kvfree(void *ptr
, size_t alloc_size
)
167 free_shared_memory(alloc_size
);
172 static void dm_stat_free(struct rcu_head
*head
)
175 struct dm_stat
*s
= container_of(head
, struct dm_stat
, rcu_head
);
177 kfree(s
->histogram_boundaries
);
178 kfree(s
->program_id
);
180 for_each_possible_cpu(cpu
) {
181 dm_kvfree(s
->stat_percpu
[cpu
][0].histogram
, s
->histogram_alloc_size
);
182 dm_kvfree(s
->stat_percpu
[cpu
], s
->percpu_alloc_size
);
184 dm_kvfree(s
->stat_shared
[0].tmp
.histogram
, s
->histogram_alloc_size
);
185 dm_kvfree(s
, s
->shared_alloc_size
);
188 static int dm_stat_in_flight(struct dm_stat_shared
*shared
)
190 return atomic_read(&shared
->in_flight
[READ
]) +
191 atomic_read(&shared
->in_flight
[WRITE
]);
194 int dm_stats_init(struct dm_stats
*stats
)
197 struct dm_stats_last_position
*last
;
199 mutex_init(&stats
->mutex
);
200 INIT_LIST_HEAD(&stats
->list
);
201 stats
->precise_timestamps
= false;
202 stats
->last
= alloc_percpu(struct dm_stats_last_position
);
206 for_each_possible_cpu(cpu
) {
207 last
= per_cpu_ptr(stats
->last
, cpu
);
208 last
->last_sector
= (sector_t
)ULLONG_MAX
;
209 last
->last_rw
= UINT_MAX
;
215 void dm_stats_cleanup(struct dm_stats
*stats
)
219 struct dm_stat_shared
*shared
;
221 while (!list_empty(&stats
->list
)) {
222 s
= container_of(stats
->list
.next
, struct dm_stat
, list_entry
);
223 list_del(&s
->list_entry
);
224 for (ni
= 0; ni
< s
->n_entries
; ni
++) {
225 shared
= &s
->stat_shared
[ni
];
226 if (WARN_ON(dm_stat_in_flight(shared
))) {
227 DMCRIT("leaked in-flight counter at index %lu "
228 "(start %llu, end %llu, step %llu): reads %d, writes %d",
230 (unsigned long long)s
->start
,
231 (unsigned long long)s
->end
,
232 (unsigned long long)s
->step
,
233 atomic_read(&shared
->in_flight
[READ
]),
234 atomic_read(&shared
->in_flight
[WRITE
]));
238 dm_stat_free(&s
->rcu_head
);
240 free_percpu(stats
->last
);
241 mutex_destroy(&stats
->mutex
);
244 static void dm_stats_recalc_precise_timestamps(struct dm_stats
*stats
)
247 struct dm_stat
*tmp_s
;
248 bool precise_timestamps
= false;
250 list_for_each(l
, &stats
->list
) {
251 tmp_s
= container_of(l
, struct dm_stat
, list_entry
);
252 if (tmp_s
->stat_flags
& STAT_PRECISE_TIMESTAMPS
) {
253 precise_timestamps
= true;
257 stats
->precise_timestamps
= precise_timestamps
;
260 static int dm_stats_create(struct dm_stats
*stats
, sector_t start
, sector_t end
,
261 sector_t step
, unsigned int stat_flags
,
262 unsigned int n_histogram_entries
,
263 unsigned long long *histogram_boundaries
,
264 const char *program_id
, const char *aux_data
,
265 void (*suspend_callback
)(struct mapped_device
*),
266 void (*resume_callback
)(struct mapped_device
*),
267 struct mapped_device
*md
)
270 struct dm_stat
*s
, *tmp_s
;
273 size_t shared_alloc_size
;
274 size_t percpu_alloc_size
;
275 size_t histogram_alloc_size
;
276 struct dm_stat_percpu
*p
;
281 if (end
< start
|| !step
)
284 n_entries
= end
- start
;
285 if (dm_sector_div64(n_entries
, step
))
288 if (n_entries
!= (size_t)n_entries
|| !(size_t)(n_entries
+ 1))
291 if (n_entries
> DM_STAT_MAX_ENTRIES
)
294 shared_alloc_size
= struct_size(s
, stat_shared
, n_entries
);
295 if ((shared_alloc_size
- sizeof(struct dm_stat
)) / sizeof(struct dm_stat_shared
) != n_entries
)
298 percpu_alloc_size
= (size_t)n_entries
* sizeof(struct dm_stat_percpu
);
299 if (percpu_alloc_size
/ sizeof(struct dm_stat_percpu
) != n_entries
)
302 histogram_alloc_size
= (n_histogram_entries
+ 1) * (size_t)n_entries
* sizeof(unsigned long long);
303 if (histogram_alloc_size
/ (n_histogram_entries
+ 1) != (size_t)n_entries
* sizeof(unsigned long long))
306 if ((n_histogram_entries
+ 1) * (size_t)n_entries
> DM_STAT_MAX_HISTOGRAM_ENTRIES
)
309 if (!check_shared_memory(shared_alloc_size
+ histogram_alloc_size
+
310 num_possible_cpus() * (percpu_alloc_size
+ histogram_alloc_size
)))
313 s
= dm_kvzalloc(shared_alloc_size
, NUMA_NO_NODE
);
317 s
->stat_flags
= stat_flags
;
318 s
->n_entries
= n_entries
;
322 s
->shared_alloc_size
= shared_alloc_size
;
323 s
->percpu_alloc_size
= percpu_alloc_size
;
324 s
->histogram_alloc_size
= histogram_alloc_size
;
326 s
->n_histogram_entries
= n_histogram_entries
;
327 s
->histogram_boundaries
= kmemdup(histogram_boundaries
,
328 s
->n_histogram_entries
* sizeof(unsigned long long), GFP_KERNEL
);
329 if (!s
->histogram_boundaries
) {
334 s
->program_id
= kstrdup(program_id
, GFP_KERNEL
);
335 if (!s
->program_id
) {
339 s
->aux_data
= kstrdup(aux_data
, GFP_KERNEL
);
345 for (ni
= 0; ni
< n_entries
; ni
++) {
346 atomic_set(&s
->stat_shared
[ni
].in_flight
[READ
], 0);
347 atomic_set(&s
->stat_shared
[ni
].in_flight
[WRITE
], 0);
351 if (s
->n_histogram_entries
) {
352 unsigned long long *hi
;
354 hi
= dm_kvzalloc(s
->histogram_alloc_size
, NUMA_NO_NODE
);
359 for (ni
= 0; ni
< n_entries
; ni
++) {
360 s
->stat_shared
[ni
].tmp
.histogram
= hi
;
361 hi
+= s
->n_histogram_entries
+ 1;
366 for_each_possible_cpu(cpu
) {
367 p
= dm_kvzalloc(percpu_alloc_size
, cpu_to_node(cpu
));
372 s
->stat_percpu
[cpu
] = p
;
373 if (s
->n_histogram_entries
) {
374 unsigned long long *hi
;
376 hi
= dm_kvzalloc(s
->histogram_alloc_size
, cpu_to_node(cpu
));
381 for (ni
= 0; ni
< n_entries
; ni
++) {
382 p
[ni
].histogram
= hi
;
383 hi
+= s
->n_histogram_entries
+ 1;
390 * Suspend/resume to make sure there is no i/o in flight,
391 * so that newly created statistics will be exact.
393 * (note: we couldn't suspend earlier because we must not
394 * allocate memory while suspended)
396 suspend_callback(md
);
398 mutex_lock(&stats
->mutex
);
400 list_for_each(l
, &stats
->list
) {
401 tmp_s
= container_of(l
, struct dm_stat
, list_entry
);
402 if (WARN_ON(tmp_s
->id
< s
->id
)) {
404 goto out_unlock_resume
;
406 if (tmp_s
->id
> s
->id
)
408 if (unlikely(s
->id
== INT_MAX
)) {
410 goto out_unlock_resume
;
415 list_add_tail_rcu(&s
->list_entry
, l
);
417 dm_stats_recalc_precise_timestamps(stats
);
419 if (!static_key_enabled(&stats_enabled
.key
))
420 static_branch_enable(&stats_enabled
);
422 mutex_unlock(&stats
->mutex
);
429 mutex_unlock(&stats
->mutex
);
432 dm_stat_free(&s
->rcu_head
);
436 static struct dm_stat
*__dm_stats_find(struct dm_stats
*stats
, int id
)
440 list_for_each_entry(s
, &stats
->list
, list_entry
) {
450 static int dm_stats_delete(struct dm_stats
*stats
, int id
)
455 mutex_lock(&stats
->mutex
);
457 s
= __dm_stats_find(stats
, id
);
459 mutex_unlock(&stats
->mutex
);
463 list_del_rcu(&s
->list_entry
);
465 dm_stats_recalc_precise_timestamps(stats
);
467 mutex_unlock(&stats
->mutex
);
470 * vfree can't be called from RCU callback
472 for_each_possible_cpu(cpu
)
473 if (is_vmalloc_addr(s
->stat_percpu
) ||
474 is_vmalloc_addr(s
->stat_percpu
[cpu
][0].histogram
))
476 if (is_vmalloc_addr(s
) ||
477 is_vmalloc_addr(s
->stat_shared
[0].tmp
.histogram
)) {
479 synchronize_rcu_expedited();
480 dm_stat_free(&s
->rcu_head
);
482 WRITE_ONCE(dm_stat_need_rcu_barrier
, 1);
483 call_rcu(&s
->rcu_head
, dm_stat_free
);
488 static int dm_stats_list(struct dm_stats
*stats
, const char *program
,
489 char *result
, unsigned int maxlen
)
497 * <region_id>: <start_sector>+<length> <step> <program_id> <aux_data>
500 mutex_lock(&stats
->mutex
);
501 list_for_each_entry(s
, &stats
->list
, list_entry
) {
502 if (!program
|| !strcmp(program
, s
->program_id
)) {
503 len
= s
->end
- s
->start
;
504 DMEMIT("%d: %llu+%llu %llu %s %s", s
->id
,
505 (unsigned long long)s
->start
,
506 (unsigned long long)len
,
507 (unsigned long long)s
->step
,
510 if (s
->stat_flags
& STAT_PRECISE_TIMESTAMPS
)
511 DMEMIT(" precise_timestamps");
512 if (s
->n_histogram_entries
) {
515 DMEMIT(" histogram:");
516 for (i
= 0; i
< s
->n_histogram_entries
; i
++) {
519 DMEMIT("%llu", s
->histogram_boundaries
[i
]);
526 mutex_unlock(&stats
->mutex
);
531 static void dm_stat_round(struct dm_stat
*s
, struct dm_stat_shared
*shared
,
532 struct dm_stat_percpu
*p
)
535 * This is racy, but so is part_round_stats_single.
537 unsigned long long now
, difference
;
538 unsigned int in_flight_read
, in_flight_write
;
540 if (likely(!(s
->stat_flags
& STAT_PRECISE_TIMESTAMPS
)))
543 now
= ktime_to_ns(ktime_get());
545 difference
= now
- shared
->stamp
;
549 in_flight_read
= (unsigned int)atomic_read(&shared
->in_flight
[READ
]);
550 in_flight_write
= (unsigned int)atomic_read(&shared
->in_flight
[WRITE
]);
552 p
->io_ticks
[READ
] += difference
;
554 p
->io_ticks
[WRITE
] += difference
;
555 if (in_flight_read
+ in_flight_write
) {
556 p
->io_ticks_total
+= difference
;
557 p
->time_in_queue
+= (in_flight_read
+ in_flight_write
) * difference
;
562 static void dm_stat_for_entry(struct dm_stat
*s
, size_t entry
,
563 int idx
, sector_t len
,
564 struct dm_stats_aux
*stats_aux
, bool end
,
565 unsigned long duration_jiffies
)
567 struct dm_stat_shared
*shared
= &s
->stat_shared
[entry
];
568 struct dm_stat_percpu
*p
;
571 * For strict correctness we should use local_irq_save/restore
572 * instead of preempt_disable/enable.
574 * preempt_disable/enable is racy if the driver finishes bios
575 * from non-interrupt context as well as from interrupt context
576 * or from more different interrupts.
578 * On 64-bit architectures the race only results in not counting some
579 * events, so it is acceptable. On 32-bit architectures the race could
580 * cause the counter going off by 2^32, so we need to do proper locking
583 * part_stat_lock()/part_stat_unlock() have this race too.
585 #if BITS_PER_LONG == 32
588 local_irq_save(flags
);
592 p
= &s
->stat_percpu
[smp_processor_id()][entry
];
595 dm_stat_round(s
, shared
, p
);
596 atomic_inc(&shared
->in_flight
[idx
]);
598 unsigned long long duration
;
600 dm_stat_round(s
, shared
, p
);
601 atomic_dec(&shared
->in_flight
[idx
]);
602 p
->sectors
[idx
] += len
;
604 p
->merges
[idx
] += stats_aux
->merged
;
605 if (!(s
->stat_flags
& STAT_PRECISE_TIMESTAMPS
)) {
606 p
->ticks
[idx
] += duration_jiffies
;
607 duration
= jiffies_to_msecs(duration_jiffies
);
609 p
->ticks
[idx
] += stats_aux
->duration_ns
;
610 duration
= stats_aux
->duration_ns
;
612 if (s
->n_histogram_entries
) {
613 unsigned int lo
= 0, hi
= s
->n_histogram_entries
+ 1;
615 while (lo
+ 1 < hi
) {
616 unsigned int mid
= (lo
+ hi
) / 2;
618 if (s
->histogram_boundaries
[mid
- 1] > duration
)
627 #if BITS_PER_LONG == 32
628 local_irq_restore(flags
);
634 static void __dm_stat_bio(struct dm_stat
*s
, int bi_rw
,
635 sector_t bi_sector
, sector_t end_sector
,
636 bool end
, unsigned long duration_jiffies
,
637 struct dm_stats_aux
*stats_aux
)
639 sector_t rel_sector
, offset
, todo
, fragment_len
;
642 if (end_sector
<= s
->start
|| bi_sector
>= s
->end
)
644 if (unlikely(bi_sector
< s
->start
)) {
646 todo
= end_sector
- s
->start
;
648 rel_sector
= bi_sector
- s
->start
;
649 todo
= end_sector
- bi_sector
;
651 if (unlikely(end_sector
> s
->end
))
652 todo
-= (end_sector
- s
->end
);
654 offset
= dm_sector_div64(rel_sector
, s
->step
);
657 if (WARN_ON_ONCE(entry
>= s
->n_entries
)) {
658 DMCRIT("Invalid area access in region id %d", s
->id
);
662 if (fragment_len
> s
->step
- offset
)
663 fragment_len
= s
->step
- offset
;
664 dm_stat_for_entry(s
, entry
, bi_rw
, fragment_len
,
665 stats_aux
, end
, duration_jiffies
);
666 todo
-= fragment_len
;
669 } while (unlikely(todo
!= 0));
672 void dm_stats_account_io(struct dm_stats
*stats
, unsigned long bi_rw
,
673 sector_t bi_sector
, unsigned int bi_sectors
, bool end
,
674 unsigned long start_time
,
675 struct dm_stats_aux
*stats_aux
)
679 struct dm_stats_last_position
*last
;
680 bool got_precise_time
;
681 unsigned long duration_jiffies
= 0;
683 if (unlikely(!bi_sectors
))
686 end_sector
= bi_sector
+ bi_sectors
;
690 * A race condition can at worst result in the merged flag being
691 * misrepresented, so we don't have to disable preemption here.
693 last
= raw_cpu_ptr(stats
->last
);
695 (bi_sector
== (READ_ONCE(last
->last_sector
) &&
697 (READ_ONCE(last
->last_rw
) == WRITE
))
699 WRITE_ONCE(last
->last_sector
, end_sector
);
700 WRITE_ONCE(last
->last_rw
, bi_rw
);
702 duration_jiffies
= jiffies
- start_time
;
706 got_precise_time
= false;
707 list_for_each_entry_rcu(s
, &stats
->list
, list_entry
) {
708 if (s
->stat_flags
& STAT_PRECISE_TIMESTAMPS
&& !got_precise_time
) {
709 /* start (!end) duration_ns is set by DM core's alloc_io() */
711 stats_aux
->duration_ns
= ktime_to_ns(ktime_get()) - stats_aux
->duration_ns
;
712 got_precise_time
= true;
714 __dm_stat_bio(s
, bi_rw
, bi_sector
, end_sector
, end
, duration_jiffies
, stats_aux
);
720 static void __dm_stat_init_temporary_percpu_totals(struct dm_stat_shared
*shared
,
721 struct dm_stat
*s
, size_t x
)
724 struct dm_stat_percpu
*p
;
727 p
= &s
->stat_percpu
[smp_processor_id()][x
];
728 dm_stat_round(s
, shared
, p
);
731 shared
->tmp
.sectors
[READ
] = 0;
732 shared
->tmp
.sectors
[WRITE
] = 0;
733 shared
->tmp
.ios
[READ
] = 0;
734 shared
->tmp
.ios
[WRITE
] = 0;
735 shared
->tmp
.merges
[READ
] = 0;
736 shared
->tmp
.merges
[WRITE
] = 0;
737 shared
->tmp
.ticks
[READ
] = 0;
738 shared
->tmp
.ticks
[WRITE
] = 0;
739 shared
->tmp
.io_ticks
[READ
] = 0;
740 shared
->tmp
.io_ticks
[WRITE
] = 0;
741 shared
->tmp
.io_ticks_total
= 0;
742 shared
->tmp
.time_in_queue
= 0;
744 if (s
->n_histogram_entries
)
745 memset(shared
->tmp
.histogram
, 0, (s
->n_histogram_entries
+ 1) * sizeof(unsigned long long));
747 for_each_possible_cpu(cpu
) {
748 p
= &s
->stat_percpu
[cpu
][x
];
749 shared
->tmp
.sectors
[READ
] += READ_ONCE(p
->sectors
[READ
]);
750 shared
->tmp
.sectors
[WRITE
] += READ_ONCE(p
->sectors
[WRITE
]);
751 shared
->tmp
.ios
[READ
] += READ_ONCE(p
->ios
[READ
]);
752 shared
->tmp
.ios
[WRITE
] += READ_ONCE(p
->ios
[WRITE
]);
753 shared
->tmp
.merges
[READ
] += READ_ONCE(p
->merges
[READ
]);
754 shared
->tmp
.merges
[WRITE
] += READ_ONCE(p
->merges
[WRITE
]);
755 shared
->tmp
.ticks
[READ
] += READ_ONCE(p
->ticks
[READ
]);
756 shared
->tmp
.ticks
[WRITE
] += READ_ONCE(p
->ticks
[WRITE
]);
757 shared
->tmp
.io_ticks
[READ
] += READ_ONCE(p
->io_ticks
[READ
]);
758 shared
->tmp
.io_ticks
[WRITE
] += READ_ONCE(p
->io_ticks
[WRITE
]);
759 shared
->tmp
.io_ticks_total
+= READ_ONCE(p
->io_ticks_total
);
760 shared
->tmp
.time_in_queue
+= READ_ONCE(p
->time_in_queue
);
761 if (s
->n_histogram_entries
) {
764 for (i
= 0; i
< s
->n_histogram_entries
+ 1; i
++)
765 shared
->tmp
.histogram
[i
] += READ_ONCE(p
->histogram
[i
]);
770 static void __dm_stat_clear(struct dm_stat
*s
, size_t idx_start
, size_t idx_end
,
771 bool init_tmp_percpu_totals
)
774 struct dm_stat_shared
*shared
;
775 struct dm_stat_percpu
*p
;
777 for (x
= idx_start
; x
< idx_end
; x
++) {
778 shared
= &s
->stat_shared
[x
];
779 if (init_tmp_percpu_totals
)
780 __dm_stat_init_temporary_percpu_totals(shared
, s
, x
);
782 p
= &s
->stat_percpu
[smp_processor_id()][x
];
783 p
->sectors
[READ
] -= shared
->tmp
.sectors
[READ
];
784 p
->sectors
[WRITE
] -= shared
->tmp
.sectors
[WRITE
];
785 p
->ios
[READ
] -= shared
->tmp
.ios
[READ
];
786 p
->ios
[WRITE
] -= shared
->tmp
.ios
[WRITE
];
787 p
->merges
[READ
] -= shared
->tmp
.merges
[READ
];
788 p
->merges
[WRITE
] -= shared
->tmp
.merges
[WRITE
];
789 p
->ticks
[READ
] -= shared
->tmp
.ticks
[READ
];
790 p
->ticks
[WRITE
] -= shared
->tmp
.ticks
[WRITE
];
791 p
->io_ticks
[READ
] -= shared
->tmp
.io_ticks
[READ
];
792 p
->io_ticks
[WRITE
] -= shared
->tmp
.io_ticks
[WRITE
];
793 p
->io_ticks_total
-= shared
->tmp
.io_ticks_total
;
794 p
->time_in_queue
-= shared
->tmp
.time_in_queue
;
796 if (s
->n_histogram_entries
) {
799 for (i
= 0; i
< s
->n_histogram_entries
+ 1; i
++) {
801 p
= &s
->stat_percpu
[smp_processor_id()][x
];
802 p
->histogram
[i
] -= shared
->tmp
.histogram
[i
];
810 static int dm_stats_clear(struct dm_stats
*stats
, int id
)
814 mutex_lock(&stats
->mutex
);
816 s
= __dm_stats_find(stats
, id
);
818 mutex_unlock(&stats
->mutex
);
822 __dm_stat_clear(s
, 0, s
->n_entries
, true);
824 mutex_unlock(&stats
->mutex
);
830 * This is like jiffies_to_msec, but works for 64-bit values.
832 static unsigned long long dm_jiffies_to_msec64(struct dm_stat
*s
, unsigned long long j
)
834 unsigned long long result
;
837 if (s
->stat_flags
& STAT_PRECISE_TIMESTAMPS
)
842 result
= jiffies_to_msecs(j
& 0x3fffff);
844 mult
= jiffies_to_msecs(1 << 22);
845 result
+= (unsigned long long)mult
* (unsigned long long)jiffies_to_msecs((j
>> 22) & 0x3fffff);
848 result
+= (unsigned long long)mult
* (unsigned long long)mult
* (unsigned long long)jiffies_to_msecs(j
>> 44);
853 static int dm_stats_print(struct dm_stats
*stats
, int id
,
854 size_t idx_start
, size_t idx_len
,
855 bool clear
, char *result
, unsigned int maxlen
)
860 sector_t start
, end
, step
;
862 struct dm_stat_shared
*shared
;
866 * <start_sector>+<length> counters
869 mutex_lock(&stats
->mutex
);
871 s
= __dm_stats_find(stats
, id
);
873 mutex_unlock(&stats
->mutex
);
877 idx_end
= idx_start
+ idx_len
;
878 if (idx_end
< idx_start
||
879 idx_end
> s
->n_entries
)
880 idx_end
= s
->n_entries
;
882 if (idx_start
> idx_end
)
886 start
= s
->start
+ (step
* idx_start
);
888 for (x
= idx_start
; x
< idx_end
; x
++, start
= end
) {
889 shared
= &s
->stat_shared
[x
];
891 if (unlikely(end
> s
->end
))
894 __dm_stat_init_temporary_percpu_totals(shared
, s
, x
);
896 DMEMIT("%llu+%llu %llu %llu %llu %llu %llu %llu %llu %llu %d %llu %llu %llu %llu",
897 (unsigned long long)start
,
898 (unsigned long long)step
,
899 shared
->tmp
.ios
[READ
],
900 shared
->tmp
.merges
[READ
],
901 shared
->tmp
.sectors
[READ
],
902 dm_jiffies_to_msec64(s
, shared
->tmp
.ticks
[READ
]),
903 shared
->tmp
.ios
[WRITE
],
904 shared
->tmp
.merges
[WRITE
],
905 shared
->tmp
.sectors
[WRITE
],
906 dm_jiffies_to_msec64(s
, shared
->tmp
.ticks
[WRITE
]),
907 dm_stat_in_flight(shared
),
908 dm_jiffies_to_msec64(s
, shared
->tmp
.io_ticks_total
),
909 dm_jiffies_to_msec64(s
, shared
->tmp
.time_in_queue
),
910 dm_jiffies_to_msec64(s
, shared
->tmp
.io_ticks
[READ
]),
911 dm_jiffies_to_msec64(s
, shared
->tmp
.io_ticks
[WRITE
]));
912 if (s
->n_histogram_entries
) {
915 for (i
= 0; i
< s
->n_histogram_entries
+ 1; i
++)
916 DMEMIT("%s%llu", !i
? " " : ":", shared
->tmp
.histogram
[i
]);
920 if (unlikely(sz
+ 1 >= maxlen
))
921 goto buffer_overflow
;
927 __dm_stat_clear(s
, idx_start
, idx_end
, false);
930 mutex_unlock(&stats
->mutex
);
935 static int dm_stats_set_aux(struct dm_stats
*stats
, int id
, const char *aux_data
)
938 const char *new_aux_data
;
940 mutex_lock(&stats
->mutex
);
942 s
= __dm_stats_find(stats
, id
);
944 mutex_unlock(&stats
->mutex
);
948 new_aux_data
= kstrdup(aux_data
, GFP_KERNEL
);
950 mutex_unlock(&stats
->mutex
);
955 s
->aux_data
= new_aux_data
;
957 mutex_unlock(&stats
->mutex
);
962 static int parse_histogram(const char *h
, unsigned int *n_histogram_entries
,
963 unsigned long long **histogram_boundaries
)
967 unsigned long long last
;
969 *n_histogram_entries
= 1;
972 (*n_histogram_entries
)++;
974 *histogram_boundaries
= kmalloc_array(*n_histogram_entries
,
975 sizeof(unsigned long long),
977 if (!*histogram_boundaries
)
983 unsigned long long hi
;
987 s
= sscanf(h
, "%llu%c", &hi
, &ch
);
988 if (!s
|| (s
== 2 && ch
!= ','))
993 (*histogram_boundaries
)[n
] = hi
;
996 h
= strchr(h
, ',') + 1;
1001 static int message_stats_create(struct mapped_device
*md
,
1002 unsigned int argc
, char **argv
,
1003 char *result
, unsigned int maxlen
)
1008 unsigned long long start
, end
, len
, step
;
1009 unsigned int divisor
;
1010 const char *program_id
, *aux_data
;
1011 unsigned int stat_flags
= 0;
1012 unsigned int n_histogram_entries
= 0;
1013 unsigned long long *histogram_boundaries
= NULL
;
1014 struct dm_arg_set as
, as_backup
;
1016 unsigned int feature_args
;
1020 * <range> <step> [<extra_parameters> <parameters>] [<program_id> [<aux_data>]]
1028 dm_consume_args(&as
, 1);
1030 a
= dm_shift_arg(&as
);
1031 if (!strcmp(a
, "-")) {
1033 len
= dm_get_size(md
);
1036 } else if (sscanf(a
, "%llu+%llu%c", &start
, &len
, &dummy
) != 2 ||
1037 start
!= (sector_t
)start
|| len
!= (sector_t
)len
)
1044 a
= dm_shift_arg(&as
);
1045 if (sscanf(a
, "/%u%c", &divisor
, &dummy
) == 1) {
1049 if (do_div(step
, divisor
))
1053 } else if (sscanf(a
, "%llu%c", &step
, &dummy
) != 1 ||
1054 step
!= (sector_t
)step
|| !step
)
1058 a
= dm_shift_arg(&as
);
1059 if (a
&& sscanf(a
, "%u%c", &feature_args
, &dummy
) == 1) {
1060 while (feature_args
--) {
1061 a
= dm_shift_arg(&as
);
1064 if (!strcasecmp(a
, "precise_timestamps"))
1065 stat_flags
|= STAT_PRECISE_TIMESTAMPS
;
1066 else if (!strncasecmp(a
, "histogram:", 10)) {
1067 if (n_histogram_entries
)
1069 r
= parse_histogram(a
+ 10, &n_histogram_entries
, &histogram_boundaries
);
1082 a
= dm_shift_arg(&as
);
1086 a
= dm_shift_arg(&as
);
1094 * If a buffer overflow happens after we created the region,
1095 * it's too late (the userspace would retry with a larger
1096 * buffer, but the region id that caused the overflow is already
1097 * leaked). So we must detect buffer overflow in advance.
1099 snprintf(result
, maxlen
, "%d", INT_MAX
);
1100 if (dm_message_test_buffer_overflow(result
, maxlen
)) {
1105 id
= dm_stats_create(dm_get_stats(md
), start
, end
, step
, stat_flags
,
1106 n_histogram_entries
, histogram_boundaries
, program_id
, aux_data
,
1107 dm_internal_suspend_fast
, dm_internal_resume_fast
, md
);
1113 snprintf(result
, maxlen
, "%d", id
);
1121 kfree(histogram_boundaries
);
1125 static int message_stats_delete(struct mapped_device
*md
,
1126 unsigned int argc
, char **argv
)
1134 if (sscanf(argv
[1], "%d%c", &id
, &dummy
) != 1 || id
< 0)
1137 return dm_stats_delete(dm_get_stats(md
), id
);
1140 static int message_stats_clear(struct mapped_device
*md
,
1141 unsigned int argc
, char **argv
)
1149 if (sscanf(argv
[1], "%d%c", &id
, &dummy
) != 1 || id
< 0)
1152 return dm_stats_clear(dm_get_stats(md
), id
);
1155 static int message_stats_list(struct mapped_device
*md
,
1156 unsigned int argc
, char **argv
,
1157 char *result
, unsigned int maxlen
)
1160 const char *program
= NULL
;
1162 if (argc
< 1 || argc
> 2)
1166 program
= kstrdup(argv
[1], GFP_KERNEL
);
1171 r
= dm_stats_list(dm_get_stats(md
), program
, result
, maxlen
);
1178 static int message_stats_print(struct mapped_device
*md
,
1179 unsigned int argc
, char **argv
, bool clear
,
1180 char *result
, unsigned int maxlen
)
1184 unsigned long idx_start
= 0, idx_len
= ULONG_MAX
;
1186 if (argc
!= 2 && argc
!= 4)
1189 if (sscanf(argv
[1], "%d%c", &id
, &dummy
) != 1 || id
< 0)
1193 if (strcmp(argv
[2], "-") &&
1194 sscanf(argv
[2], "%lu%c", &idx_start
, &dummy
) != 1)
1196 if (strcmp(argv
[3], "-") &&
1197 sscanf(argv
[3], "%lu%c", &idx_len
, &dummy
) != 1)
1201 return dm_stats_print(dm_get_stats(md
), id
, idx_start
, idx_len
, clear
,
1205 static int message_stats_set_aux(struct mapped_device
*md
,
1206 unsigned int argc
, char **argv
)
1214 if (sscanf(argv
[1], "%d%c", &id
, &dummy
) != 1 || id
< 0)
1217 return dm_stats_set_aux(dm_get_stats(md
), id
, argv
[2]);
1220 int dm_stats_message(struct mapped_device
*md
, unsigned int argc
, char **argv
,
1221 char *result
, unsigned int maxlen
)
1225 /* All messages here must start with '@' */
1226 if (!strcasecmp(argv
[0], "@stats_create"))
1227 r
= message_stats_create(md
, argc
, argv
, result
, maxlen
);
1228 else if (!strcasecmp(argv
[0], "@stats_delete"))
1229 r
= message_stats_delete(md
, argc
, argv
);
1230 else if (!strcasecmp(argv
[0], "@stats_clear"))
1231 r
= message_stats_clear(md
, argc
, argv
);
1232 else if (!strcasecmp(argv
[0], "@stats_list"))
1233 r
= message_stats_list(md
, argc
, argv
, result
, maxlen
);
1234 else if (!strcasecmp(argv
[0], "@stats_print"))
1235 r
= message_stats_print(md
, argc
, argv
, false, result
, maxlen
);
1236 else if (!strcasecmp(argv
[0], "@stats_print_clear"))
1237 r
= message_stats_print(md
, argc
, argv
, true, result
, maxlen
);
1238 else if (!strcasecmp(argv
[0], "@stats_set_aux"))
1239 r
= message_stats_set_aux(md
, argc
, argv
);
1241 return 2; /* this wasn't a stats message */
1244 DMCRIT("Invalid parameters for message %s", argv
[0]);
1249 int __init
dm_statistics_init(void)
1251 shared_memory_amount
= 0;
1252 dm_stat_need_rcu_barrier
= 0;
1256 void dm_statistics_exit(void)
1258 if (dm_stat_need_rcu_barrier
)
1260 if (WARN_ON(shared_memory_amount
))
1261 DMCRIT("shared_memory_amount leaked: %lu", shared_memory_amount
);
1264 module_param_named(stats_current_allocated_bytes
, shared_memory_amount
, ulong
, 0444);
1265 MODULE_PARM_DESC(stats_current_allocated_bytes
, "Memory currently used by statistics");