1 // SPDX-License-Identifier: GPL-2.0
3 * random utility code, for bcache but in theory not specific to bcache
5 * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
6 * Copyright 2012 Google, Inc.
10 #include <linux/blkdev.h>
11 #include <linux/console.h>
12 #include <linux/ctype.h>
13 #include <linux/debugfs.h>
14 #include <linux/freezer.h>
15 #include <linux/kthread.h>
16 #include <linux/log2.h>
17 #include <linux/math64.h>
18 #include <linux/percpu.h>
19 #include <linux/preempt.h>
20 #include <linux/random.h>
21 #include <linux/seq_file.h>
22 #include <linux/string.h>
23 #include <linux/types.h>
24 #include <linux/sched/clock.h>
26 #include "eytzinger.h"
27 #include "mean_and_variance.h"
30 static const char si_units
[] = "?kMGTPEZY";
32 /* string_get_size units: */
33 static const char *const units_2
[] = {
34 "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
36 static const char *const units_10
[] = {
37 "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
40 static int parse_u64(const char *cp
, u64
*res
)
42 const char *start
= cp
;
52 if (v
> U64_MAX
- (*cp
- '0'))
56 } while (isdigit(*cp
));
62 static int bch2_pow(u64 n
, u64 p
, u64
*res
)
67 if (*res
> div64_u64(U64_MAX
, n
))
74 static int parse_unit_suffix(const char *cp
, u64
*res
)
76 const char *start
= cp
;
84 for (u
= 1; u
< strlen(si_units
); u
++)
85 if (*cp
== si_units
[u
]) {
90 for (u
= 0; u
< ARRAY_SIZE(units_2
); u
++)
91 if (!strncmp(cp
, units_2
[u
], strlen(units_2
[u
]))) {
92 cp
+= strlen(units_2
[u
]);
96 for (u
= 0; u
< ARRAY_SIZE(units_10
); u
++)
97 if (!strncmp(cp
, units_10
[u
], strlen(units_10
[u
]))) {
98 cp
+= strlen(units_10
[u
]);
106 ret
= bch2_pow(base
, u
, res
);
113 #define parse_or_ret(cp, _f) \
121 static int __bch2_strtou64_h(const char *cp
, u64
*res
)
123 const char *start
= cp
;
124 u64 v
= 0, b
, f_n
= 0, f_d
= 1;
127 parse_or_ret(cp
, parse_u64(cp
, &v
));
131 ret
= parse_u64(cp
, &f_n
);
136 ret
= bch2_pow(10, ret
, &f_d
);
141 parse_or_ret(cp
, parse_unit_suffix(cp
, &b
));
143 if (v
> div64_u64(U64_MAX
, b
))
147 if (f_n
> div64_u64(U64_MAX
, b
))
150 f_n
= div64_u64(f_n
* b
, f_d
);
159 static int __bch2_strtoh(const char *cp
, u64
*res
,
160 u64 t_max
, bool t_signed
)
162 bool positive
= *cp
!= '-';
165 if (*cp
== '+' || *cp
== '-')
168 parse_or_ret(cp
, __bch2_strtou64_h(cp
, &v
));
191 #define STRTO_H(name, type) \
192 int bch2_ ## name ## _h(const char *cp, type *res) \
195 int ret = __bch2_strtoh(cp, &v, ANYSINT_MAX(type), \
196 ANYSINT_MAX(type) != ((type) ~0ULL)); \
201 STRTO_H(strtoint
, int)
202 STRTO_H(strtouint
, unsigned int)
203 STRTO_H(strtoll
, long long)
204 STRTO_H(strtoull
, unsigned long long)
205 STRTO_H(strtou64
, u64
)
207 u64
bch2_read_flag_list(const char *opt
, const char * const list
[])
210 char *p
, *s
, *d
= kstrdup(opt
, GFP_KERNEL
);
217 while ((p
= strsep(&s
, ",;"))) {
218 int flag
= match_string(list
, -1, p
);
225 ret
|= BIT_ULL(flag
);
233 bool bch2_is_zero(const void *_p
, size_t n
)
238 for (i
= 0; i
< n
; i
++)
244 void bch2_prt_u64_base2_nbits(struct printbuf
*out
, u64 v
, unsigned nr_bits
)
247 prt_char(out
, '0' + ((v
>> --nr_bits
) & 1));
250 void bch2_prt_u64_base2(struct printbuf
*out
, u64 v
)
252 bch2_prt_u64_base2_nbits(out
, v
, fls64(v
) ?: 1);
255 static void __bch2_print_string_as_lines(const char *prefix
, const char *lines
,
262 printk("%s (null)\n", prefix
);
270 locked
= console_trylock();
274 p
= strchrnul(lines
, '\n');
275 printk("%s%.*s\n", prefix
, (int) (p
- lines
), lines
);
284 void bch2_print_string_as_lines(const char *prefix
, const char *lines
)
286 return __bch2_print_string_as_lines(prefix
, lines
, false);
289 void bch2_print_string_as_lines_nonblocking(const char *prefix
, const char *lines
)
291 return __bch2_print_string_as_lines(prefix
, lines
, true);
294 int bch2_save_backtrace(bch_stacktrace
*stack
, struct task_struct
*task
, unsigned skipnr
,
297 #ifdef CONFIG_STACKTRACE
298 unsigned nr_entries
= 0;
301 int ret
= darray_make_room_gfp(stack
, 32, gfp
);
305 if (!down_read_trylock(&task
->signal
->exec_update_lock
))
309 nr_entries
= stack_trace_save_tsk(task
, stack
->data
, stack
->size
, skipnr
+ 1);
310 } while (nr_entries
== stack
->size
&&
311 !(ret
= darray_make_room_gfp(stack
, stack
->size
* 2, gfp
)));
313 stack
->nr
= nr_entries
;
314 up_read(&task
->signal
->exec_update_lock
);
322 void bch2_prt_backtrace(struct printbuf
*out
, bch_stacktrace
*stack
)
324 darray_for_each(*stack
, i
) {
325 prt_printf(out
, "[<0>] %pB", (void *) *i
);
330 int bch2_prt_task_backtrace(struct printbuf
*out
, struct task_struct
*task
, unsigned skipnr
, gfp_t gfp
)
332 bch_stacktrace stack
= { 0 };
333 int ret
= bch2_save_backtrace(&stack
, task
, skipnr
+ 1, gfp
);
335 bch2_prt_backtrace(out
, &stack
);
342 void bch2_prt_datetime(struct printbuf
*out
, time64_t sec
)
351 void bch2_prt_datetime(struct printbuf
*out
, time64_t sec
)
354 snprintf(buf
, sizeof(buf
), "%ptT", &sec
);
359 void bch2_pr_time_units(struct printbuf
*out
, u64 ns
)
361 const struct time_unit
*u
= bch2_pick_time_units(ns
);
363 prt_printf(out
, "%llu %s", div64_u64(ns
, u
->nsecs
), u
->name
);
366 static void bch2_pr_time_units_aligned(struct printbuf
*out
, u64 ns
)
368 const struct time_unit
*u
= bch2_pick_time_units(ns
);
370 prt_printf(out
, "%llu \r%s", div64_u64(ns
, u
->nsecs
), u
->name
);
373 static inline void pr_name_and_units(struct printbuf
*out
, const char *name
, u64 ns
)
375 prt_printf(out
, "%s\t", name
);
376 bch2_pr_time_units_aligned(out
, ns
);
380 #define TABSTOP_SIZE 12
382 void bch2_time_stats_to_text(struct printbuf
*out
, struct bch2_time_stats
*stats
)
384 struct quantiles
*quantiles
= time_stats_to_quantiles(stats
);
385 s64 f_mean
= 0, d_mean
= 0;
386 u64 f_stddev
= 0, d_stddev
= 0;
391 spin_lock_irq(&stats
->lock
);
392 for_each_possible_cpu(cpu
)
393 __bch2_time_stats_clear_buffer(stats
, per_cpu_ptr(stats
->buffer
, cpu
));
394 spin_unlock_irq(&stats
->lock
);
398 * avoid divide by zero
400 if (stats
->freq_stats
.n
) {
401 f_mean
= mean_and_variance_get_mean(stats
->freq_stats
);
402 f_stddev
= mean_and_variance_get_stddev(stats
->freq_stats
);
403 d_mean
= mean_and_variance_get_mean(stats
->duration_stats
);
404 d_stddev
= mean_and_variance_get_stddev(stats
->duration_stats
);
407 printbuf_tabstop_push(out
, out
->indent
+ TABSTOP_SIZE
);
408 prt_printf(out
, "count:\t%llu\n", stats
->duration_stats
.n
);
409 printbuf_tabstop_pop(out
);
411 printbuf_tabstops_reset(out
);
413 printbuf_tabstop_push(out
, out
->indent
+ 20);
414 printbuf_tabstop_push(out
, TABSTOP_SIZE
+ 2);
415 printbuf_tabstop_push(out
, 0);
416 printbuf_tabstop_push(out
, TABSTOP_SIZE
+ 2);
418 prt_printf(out
, "\tsince mount\r\trecent\r\n");
420 printbuf_tabstops_reset(out
);
421 printbuf_tabstop_push(out
, out
->indent
+ 20);
422 printbuf_tabstop_push(out
, TABSTOP_SIZE
);
423 printbuf_tabstop_push(out
, 2);
424 printbuf_tabstop_push(out
, TABSTOP_SIZE
);
426 prt_printf(out
, "duration of events\n");
427 printbuf_indent_add(out
, 2);
429 pr_name_and_units(out
, "min:", stats
->min_duration
);
430 pr_name_and_units(out
, "max:", stats
->max_duration
);
431 pr_name_and_units(out
, "total:", stats
->total_duration
);
433 prt_printf(out
, "mean:\t");
434 bch2_pr_time_units_aligned(out
, d_mean
);
436 bch2_pr_time_units_aligned(out
, mean_and_variance_weighted_get_mean(stats
->duration_stats_weighted
, TIME_STATS_MV_WEIGHT
));
439 prt_printf(out
, "stddev:\t");
440 bch2_pr_time_units_aligned(out
, d_stddev
);
442 bch2_pr_time_units_aligned(out
, mean_and_variance_weighted_get_stddev(stats
->duration_stats_weighted
, TIME_STATS_MV_WEIGHT
));
444 printbuf_indent_sub(out
, 2);
447 prt_printf(out
, "time between events\n");
448 printbuf_indent_add(out
, 2);
450 pr_name_and_units(out
, "min:", stats
->min_freq
);
451 pr_name_and_units(out
, "max:", stats
->max_freq
);
453 prt_printf(out
, "mean:\t");
454 bch2_pr_time_units_aligned(out
, f_mean
);
456 bch2_pr_time_units_aligned(out
, mean_and_variance_weighted_get_mean(stats
->freq_stats_weighted
, TIME_STATS_MV_WEIGHT
));
459 prt_printf(out
, "stddev:\t");
460 bch2_pr_time_units_aligned(out
, f_stddev
);
462 bch2_pr_time_units_aligned(out
, mean_and_variance_weighted_get_stddev(stats
->freq_stats_weighted
, TIME_STATS_MV_WEIGHT
));
464 printbuf_indent_sub(out
, 2);
467 printbuf_tabstops_reset(out
);
470 int i
= eytzinger0_first(NR_QUANTILES
);
471 const struct time_unit
*u
=
472 bch2_pick_time_units(quantiles
->entries
[i
].m
);
475 prt_printf(out
, "quantiles (%s):\t", u
->name
);
476 eytzinger0_for_each(i
, NR_QUANTILES
) {
477 bool is_last
= eytzinger0_next(i
, NR_QUANTILES
) == -1;
479 u64 q
= max(quantiles
->entries
[i
].m
, last_q
);
480 prt_printf(out
, "%llu ", div64_u64(q
, u
->nsecs
));
491 * bch2_ratelimit_delay() - return how long to delay until the next time to do
493 * @d: the struct bch_ratelimit to update
494 * Returns: the amount of time to delay by, in jiffies
496 u64
bch2_ratelimit_delay(struct bch_ratelimit
*d
)
498 u64 now
= local_clock();
500 return time_after64(d
->next
, now
)
501 ? nsecs_to_jiffies(d
->next
- now
)
506 * bch2_ratelimit_increment() - increment @d by the amount of work done
507 * @d: the struct bch_ratelimit to update
508 * @done: the amount of work done, in arbitrary units
510 void bch2_ratelimit_increment(struct bch_ratelimit
*d
, u64 done
)
512 u64 now
= local_clock();
514 d
->next
+= div_u64(done
* NSEC_PER_SEC
, d
->rate
);
516 if (time_before64(now
+ NSEC_PER_SEC
, d
->next
))
517 d
->next
= now
+ NSEC_PER_SEC
;
519 if (time_after64(now
- NSEC_PER_SEC
* 2, d
->next
))
520 d
->next
= now
- NSEC_PER_SEC
* 2;
526 * Updates pd_controller. Attempts to scale inputed values to units per second.
527 * @target: desired value
528 * @actual: current value
530 * @sign: 1 or -1; 1 if increasing the rate makes actual go up, -1 if increasing
531 * it makes actual go down.
533 void bch2_pd_controller_update(struct bch_pd_controller
*pd
,
534 s64 target
, s64 actual
, int sign
)
536 s64 proportional
, derivative
, change
;
538 unsigned long seconds_since_update
= (jiffies
- pd
->last_update
) / HZ
;
540 if (seconds_since_update
== 0)
543 pd
->last_update
= jiffies
;
545 proportional
= actual
- target
;
546 proportional
*= seconds_since_update
;
547 proportional
= div_s64(proportional
, pd
->p_term_inverse
);
549 derivative
= actual
- pd
->last_actual
;
550 derivative
= div_s64(derivative
, seconds_since_update
);
551 derivative
= ewma_add(pd
->smoothed_derivative
, derivative
,
552 (pd
->d_term
/ seconds_since_update
) ?: 1);
553 derivative
= derivative
* pd
->d_term
;
554 derivative
= div_s64(derivative
, pd
->p_term_inverse
);
556 change
= proportional
+ derivative
;
558 /* Don't increase rate if not keeping up */
561 time_after64(local_clock(),
562 pd
->rate
.next
+ NSEC_PER_MSEC
))
565 change
*= (sign
* -1);
567 pd
->rate
.rate
= clamp_t(s64
, (s64
) pd
->rate
.rate
+ change
,
570 pd
->last_actual
= actual
;
571 pd
->last_derivative
= derivative
;
572 pd
->last_proportional
= proportional
;
573 pd
->last_change
= change
;
574 pd
->last_target
= target
;
577 void bch2_pd_controller_init(struct bch_pd_controller
*pd
)
579 pd
->rate
.rate
= 1024;
580 pd
->last_update
= jiffies
;
581 pd
->p_term_inverse
= 6000;
583 pd
->d_smooth
= pd
->d_term
;
584 pd
->backpressure
= 1;
587 void bch2_pd_controller_debug_to_text(struct printbuf
*out
, struct bch_pd_controller
*pd
)
589 if (!out
->nr_tabstops
)
590 printbuf_tabstop_push(out
, 20);
592 prt_printf(out
, "rate:\t");
593 prt_human_readable_s64(out
, pd
->rate
.rate
);
596 prt_printf(out
, "target:\t");
597 prt_human_readable_u64(out
, pd
->last_target
);
600 prt_printf(out
, "actual:\t");
601 prt_human_readable_u64(out
, pd
->last_actual
);
604 prt_printf(out
, "proportional:\t");
605 prt_human_readable_s64(out
, pd
->last_proportional
);
608 prt_printf(out
, "derivative:\t");
609 prt_human_readable_s64(out
, pd
->last_derivative
);
612 prt_printf(out
, "change:\t");
613 prt_human_readable_s64(out
, pd
->last_change
);
616 prt_printf(out
, "next io:\t%llims\n", div64_s64(pd
->rate
.next
- local_clock(), NSEC_PER_MSEC
));
621 void bch2_bio_map(struct bio
*bio
, void *base
, size_t size
)
624 struct page
*page
= is_vmalloc_addr(base
)
625 ? vmalloc_to_page(base
)
626 : virt_to_page(base
);
627 unsigned offset
= offset_in_page(base
);
628 unsigned len
= min_t(size_t, PAGE_SIZE
- offset
, size
);
630 BUG_ON(!bio_add_page(bio
, page
, len
, offset
));
636 int bch2_bio_alloc_pages(struct bio
*bio
, size_t size
, gfp_t gfp_mask
)
639 struct page
*page
= alloc_pages(gfp_mask
, 0);
640 unsigned len
= min_t(size_t, PAGE_SIZE
, size
);
645 if (unlikely(!bio_add_page(bio
, page
, len
, 0))) {
656 size_t bch2_rand_range(size_t max
)
664 rand
= get_random_long();
665 rand
&= roundup_pow_of_two(max
) - 1;
666 } while (rand
>= max
);
671 void memcpy_to_bio(struct bio
*dst
, struct bvec_iter dst_iter
, const void *src
)
674 struct bvec_iter iter
;
676 __bio_for_each_segment(bv
, dst
, iter
, dst_iter
) {
677 void *dstp
= kmap_local_page(bv
.bv_page
);
679 memcpy(dstp
+ bv
.bv_offset
, src
, bv
.bv_len
);
686 void memcpy_from_bio(void *dst
, struct bio
*src
, struct bvec_iter src_iter
)
689 struct bvec_iter iter
;
691 __bio_for_each_segment(bv
, src
, iter
, src_iter
) {
692 void *srcp
= kmap_local_page(bv
.bv_page
);
694 memcpy(dst
, srcp
+ bv
.bv_offset
, bv
.bv_len
);
702 void eytzinger1_test(void)
704 unsigned inorder
, eytz
, size
;
706 pr_info("1 based eytzinger test:");
711 unsigned extra
= eytzinger1_extra(size
);
714 pr_info("tree size %u", size
);
716 BUG_ON(eytzinger1_prev(0, size
) != eytzinger1_last(size
));
717 BUG_ON(eytzinger1_next(0, size
) != eytzinger1_first(size
));
719 BUG_ON(eytzinger1_prev(eytzinger1_first(size
), size
) != 0);
720 BUG_ON(eytzinger1_next(eytzinger1_last(size
), size
) != 0);
723 eytzinger1_for_each(eytz
, size
) {
724 BUG_ON(__inorder_to_eytzinger1(inorder
, size
, extra
) != eytz
);
725 BUG_ON(__eytzinger1_to_inorder(eytz
, size
, extra
) != inorder
);
726 BUG_ON(eytz
!= eytzinger1_last(size
) &&
727 eytzinger1_prev(eytzinger1_next(eytz
, size
), size
) != eytz
);
734 void eytzinger0_test(void)
737 unsigned inorder
, eytz
, size
;
739 pr_info("0 based eytzinger test:");
744 unsigned extra
= eytzinger0_extra(size
);
747 pr_info("tree size %u", size
);
749 BUG_ON(eytzinger0_prev(-1, size
) != eytzinger0_last(size
));
750 BUG_ON(eytzinger0_next(-1, size
) != eytzinger0_first(size
));
752 BUG_ON(eytzinger0_prev(eytzinger0_first(size
), size
) != -1);
753 BUG_ON(eytzinger0_next(eytzinger0_last(size
), size
) != -1);
756 eytzinger0_for_each(eytz
, size
) {
757 BUG_ON(__inorder_to_eytzinger0(inorder
, size
, extra
) != eytz
);
758 BUG_ON(__eytzinger0_to_inorder(eytz
, size
, extra
) != inorder
);
759 BUG_ON(eytz
!= eytzinger0_last(size
) &&
760 eytzinger0_prev(eytzinger0_next(eytz
, size
), size
) != eytz
);
767 static inline int cmp_u16(const void *_l
, const void *_r
, size_t size
)
769 const u16
*l
= _l
, *r
= _r
;
771 return (*l
> *r
) - (*r
- *l
);
774 static void eytzinger0_find_test_val(u16
*test_array
, unsigned nr
, u16 search
)
776 int i
, c1
= -1, c2
= -1;
779 r
= eytzinger0_find_le(test_array
, nr
,
780 sizeof(test_array
[0]),
785 for (i
= 0; i
< nr
; i
++)
786 if (test_array
[i
] <= search
&& test_array
[i
] > c2
)
790 eytzinger0_for_each(i
, nr
)
791 pr_info("[%3u] = %12u", i
, test_array
[i
]);
792 pr_info("find_le(%2u) -> [%2zi] = %2i should be %2i",
797 void eytzinger0_find_test(void)
799 unsigned i
, nr
, allocated
= 1 << 12;
800 u16
*test_array
= kmalloc_array(allocated
, sizeof(test_array
[0]), GFP_KERNEL
);
802 for (nr
= 1; nr
< allocated
; nr
++) {
803 pr_info("testing %u elems", nr
);
805 get_random_bytes(test_array
, nr
* sizeof(test_array
[0]));
806 eytzinger0_sort(test_array
, nr
, sizeof(test_array
[0]), cmp_u16
, NULL
);
808 /* verify array is sorted correctly: */
809 eytzinger0_for_each(i
, nr
)
810 BUG_ON(i
!= eytzinger0_last(nr
) &&
811 test_array
[i
] > test_array
[eytzinger0_next(i
, nr
)]);
813 for (i
= 0; i
< U16_MAX
; i
+= 1 << 12)
814 eytzinger0_find_test_val(test_array
, nr
, i
);
816 for (i
= 0; i
< nr
; i
++) {
817 eytzinger0_find_test_val(test_array
, nr
, test_array
[i
] - 1);
818 eytzinger0_find_test_val(test_array
, nr
, test_array
[i
]);
819 eytzinger0_find_test_val(test_array
, nr
, test_array
[i
] + 1);
828 * Accumulate percpu counters onto one cpu's copy - only valid when access
829 * against any percpu counter is guarded against
831 u64
*bch2_acc_percpu_u64s(u64 __percpu
*p
, unsigned nr
)
836 /* access to pcpu vars has to be blocked by other locking */
838 ret
= this_cpu_ptr(p
);
841 for_each_possible_cpu(cpu
) {
842 u64
*i
= per_cpu_ptr(p
, cpu
);
845 acc_u64s(ret
, i
, nr
);
846 memset(i
, 0, nr
* sizeof(u64
));
853 void bch2_darray_str_exit(darray_str
*d
)
855 darray_for_each(*d
, i
)
860 int bch2_split_devs(const char *_dev_name
, darray_str
*ret
)
864 char *dev_name
, *s
, *orig
;
866 dev_name
= orig
= kstrdup(_dev_name
, GFP_KERNEL
);
870 while ((s
= strsep(&dev_name
, ":"))) {
871 char *p
= kstrdup(s
, GFP_KERNEL
);
875 if (darray_push(ret
, p
)) {
884 bch2_darray_str_exit(ret
);