1 // SPDX-License-Identifier: GPL-2.0
5 * Author: SeongJae Park <sj@kernel.org>
8 #define pr_fmt(fmt) "damon: " fmt
10 #include <linux/damon.h>
11 #include <linux/delay.h>
12 #include <linux/kthread.h>
14 #include <linux/psi.h>
15 #include <linux/slab.h>
16 #include <linux/string.h>
18 #define CREATE_TRACE_POINTS
19 #include <trace/events/damon.h>
21 #ifdef CONFIG_DAMON_KUNIT_TEST
22 #undef DAMON_MIN_REGION
23 #define DAMON_MIN_REGION 1
26 static DEFINE_MUTEX(damon_lock
);
27 static int nr_running_ctxs
;
28 static bool running_exclusive_ctxs
;
30 static DEFINE_MUTEX(damon_ops_lock
);
31 static struct damon_operations damon_registered_ops
[NR_DAMON_OPS
];
33 static struct kmem_cache
*damon_region_cache __ro_after_init
;
35 /* Should be called under damon_ops_lock with id smaller than NR_DAMON_OPS */
36 static bool __damon_is_registered_ops(enum damon_ops_id id
)
38 struct damon_operations empty_ops
= {};
40 if (!memcmp(&empty_ops
, &damon_registered_ops
[id
], sizeof(empty_ops
)))
46 * damon_is_registered_ops() - Check if a given damon_operations is registered.
47 * @id: Id of the damon_operations to check if registered.
49 * Return: true if the ops is set, false otherwise.
51 bool damon_is_registered_ops(enum damon_ops_id id
)
55 if (id
>= NR_DAMON_OPS
)
57 mutex_lock(&damon_ops_lock
);
58 registered
= __damon_is_registered_ops(id
);
59 mutex_unlock(&damon_ops_lock
);
64 * damon_register_ops() - Register a monitoring operations set to DAMON.
65 * @ops: monitoring operations set to register.
67 * This function registers a monitoring operations set of valid &struct
68 * damon_operations->id so that others can find and use them later.
70 * Return: 0 on success, negative error code otherwise.
72 int damon_register_ops(struct damon_operations
*ops
)
76 if (ops
->id
>= NR_DAMON_OPS
)
78 mutex_lock(&damon_ops_lock
);
79 /* Fail for already registered ops */
80 if (__damon_is_registered_ops(ops
->id
)) {
84 damon_registered_ops
[ops
->id
] = *ops
;
86 mutex_unlock(&damon_ops_lock
);
91 * damon_select_ops() - Select a monitoring operations to use with the context.
92 * @ctx: monitoring context to use the operations.
93 * @id: id of the registered monitoring operations to select.
95 * This function finds registered monitoring operations set of @id and make
98 * Return: 0 on success, negative error code otherwise.
100 int damon_select_ops(struct damon_ctx
*ctx
, enum damon_ops_id id
)
104 if (id
>= NR_DAMON_OPS
)
107 mutex_lock(&damon_ops_lock
);
108 if (!__damon_is_registered_ops(id
))
111 ctx
->ops
= damon_registered_ops
[id
];
112 mutex_unlock(&damon_ops_lock
);
117 * Construct a damon_region struct
119 * Returns the pointer to the new struct if success, or NULL otherwise
121 struct damon_region
*damon_new_region(unsigned long start
, unsigned long end
)
123 struct damon_region
*region
;
125 region
= kmem_cache_alloc(damon_region_cache
, GFP_KERNEL
);
129 region
->ar
.start
= start
;
130 region
->ar
.end
= end
;
131 region
->nr_accesses
= 0;
132 region
->nr_accesses_bp
= 0;
133 INIT_LIST_HEAD(®ion
->list
);
136 region
->last_nr_accesses
= 0;
141 void damon_add_region(struct damon_region
*r
, struct damon_target
*t
)
143 list_add_tail(&r
->list
, &t
->regions_list
);
147 static void damon_del_region(struct damon_region
*r
, struct damon_target
*t
)
153 static void damon_free_region(struct damon_region
*r
)
155 kmem_cache_free(damon_region_cache
, r
);
158 void damon_destroy_region(struct damon_region
*r
, struct damon_target
*t
)
160 damon_del_region(r
, t
);
161 damon_free_region(r
);
165 * Check whether a region is intersecting an address range
167 * Returns true if it is.
169 static bool damon_intersect(struct damon_region
*r
,
170 struct damon_addr_range
*re
)
172 return !(r
->ar
.end
<= re
->start
|| re
->end
<= r
->ar
.start
);
176 * Fill holes in regions with new regions.
178 static int damon_fill_regions_holes(struct damon_region
*first
,
179 struct damon_region
*last
, struct damon_target
*t
)
181 struct damon_region
*r
= first
;
183 damon_for_each_region_from(r
, t
) {
184 struct damon_region
*next
, *newr
;
188 next
= damon_next_region(r
);
189 if (r
->ar
.end
!= next
->ar
.start
) {
190 newr
= damon_new_region(r
->ar
.end
, next
->ar
.start
);
193 damon_insert_region(newr
, r
, next
, t
);
200 * damon_set_regions() - Set regions of a target for given address ranges.
201 * @t: the given target.
202 * @ranges: array of new monitoring target ranges.
203 * @nr_ranges: length of @ranges.
205 * This function adds new regions to, or modify existing regions of a
206 * monitoring target to fit in specific ranges.
208 * Return: 0 if success, or negative error code otherwise.
210 int damon_set_regions(struct damon_target
*t
, struct damon_addr_range
*ranges
,
211 unsigned int nr_ranges
)
213 struct damon_region
*r
, *next
;
217 /* Remove regions which are not in the new ranges */
218 damon_for_each_region_safe(r
, next
, t
) {
219 for (i
= 0; i
< nr_ranges
; i
++) {
220 if (damon_intersect(r
, &ranges
[i
]))
224 damon_destroy_region(r
, t
);
227 r
= damon_first_region(t
);
228 /* Add new regions or resize existing regions to fit in the ranges */
229 for (i
= 0; i
< nr_ranges
; i
++) {
230 struct damon_region
*first
= NULL
, *last
, *newr
;
231 struct damon_addr_range
*range
;
234 /* Get the first/last regions intersecting with the range */
235 damon_for_each_region_from(r
, t
) {
236 if (damon_intersect(r
, range
)) {
241 if (r
->ar
.start
>= range
->end
)
245 /* no region intersects with this range */
246 newr
= damon_new_region(
247 ALIGN_DOWN(range
->start
,
249 ALIGN(range
->end
, DAMON_MIN_REGION
));
252 damon_insert_region(newr
, damon_prev_region(r
), r
, t
);
254 /* resize intersecting regions to fit in this range */
255 first
->ar
.start
= ALIGN_DOWN(range
->start
,
257 last
->ar
.end
= ALIGN(range
->end
, DAMON_MIN_REGION
);
259 /* fill possible holes in the range */
260 err
= damon_fill_regions_holes(first
, last
, t
);
268 struct damos_filter
*damos_new_filter(enum damos_filter_type type
,
271 struct damos_filter
*filter
;
273 filter
= kmalloc(sizeof(*filter
), GFP_KERNEL
);
277 filter
->matching
= matching
;
278 INIT_LIST_HEAD(&filter
->list
);
282 void damos_add_filter(struct damos
*s
, struct damos_filter
*f
)
284 list_add_tail(&f
->list
, &s
->filters
);
287 static void damos_del_filter(struct damos_filter
*f
)
292 static void damos_free_filter(struct damos_filter
*f
)
297 void damos_destroy_filter(struct damos_filter
*f
)
300 damos_free_filter(f
);
303 struct damos_quota_goal
*damos_new_quota_goal(
304 enum damos_quota_goal_metric metric
,
305 unsigned long target_value
)
307 struct damos_quota_goal
*goal
;
309 goal
= kmalloc(sizeof(*goal
), GFP_KERNEL
);
312 goal
->metric
= metric
;
313 goal
->target_value
= target_value
;
314 INIT_LIST_HEAD(&goal
->list
);
318 void damos_add_quota_goal(struct damos_quota
*q
, struct damos_quota_goal
*g
)
320 list_add_tail(&g
->list
, &q
->goals
);
323 static void damos_del_quota_goal(struct damos_quota_goal
*g
)
328 static void damos_free_quota_goal(struct damos_quota_goal
*g
)
333 void damos_destroy_quota_goal(struct damos_quota_goal
*g
)
335 damos_del_quota_goal(g
);
336 damos_free_quota_goal(g
);
339 /* initialize fields of @quota that normally API users wouldn't set */
340 static struct damos_quota
*damos_quota_init(struct damos_quota
*quota
)
343 quota
->total_charged_sz
= 0;
344 quota
->total_charged_ns
= 0;
345 quota
->charged_sz
= 0;
346 quota
->charged_from
= 0;
347 quota
->charge_target_from
= NULL
;
348 quota
->charge_addr_from
= 0;
353 struct damos
*damon_new_scheme(struct damos_access_pattern
*pattern
,
354 enum damos_action action
,
355 unsigned long apply_interval_us
,
356 struct damos_quota
*quota
,
357 struct damos_watermarks
*wmarks
,
360 struct damos
*scheme
;
362 scheme
= kmalloc(sizeof(*scheme
), GFP_KERNEL
);
365 scheme
->pattern
= *pattern
;
366 scheme
->action
= action
;
367 scheme
->apply_interval_us
= apply_interval_us
;
369 * next_apply_sis will be set when kdamond starts. While kdamond is
370 * running, it will also updated when it is added to the DAMON context,
371 * or damon_attrs are updated.
373 scheme
->next_apply_sis
= 0;
374 INIT_LIST_HEAD(&scheme
->filters
);
375 scheme
->stat
= (struct damos_stat
){};
376 INIT_LIST_HEAD(&scheme
->list
);
378 scheme
->quota
= *(damos_quota_init(quota
));
379 /* quota.goals should be separately set by caller */
380 INIT_LIST_HEAD(&scheme
->quota
.goals
);
382 scheme
->wmarks
= *wmarks
;
383 scheme
->wmarks
.activated
= true;
385 scheme
->target_nid
= target_nid
;
390 static void damos_set_next_apply_sis(struct damos
*s
, struct damon_ctx
*ctx
)
392 unsigned long sample_interval
= ctx
->attrs
.sample_interval
?
393 ctx
->attrs
.sample_interval
: 1;
394 unsigned long apply_interval
= s
->apply_interval_us
?
395 s
->apply_interval_us
: ctx
->attrs
.aggr_interval
;
397 s
->next_apply_sis
= ctx
->passed_sample_intervals
+
398 apply_interval
/ sample_interval
;
401 void damon_add_scheme(struct damon_ctx
*ctx
, struct damos
*s
)
403 list_add_tail(&s
->list
, &ctx
->schemes
);
404 damos_set_next_apply_sis(s
, ctx
);
407 static void damon_del_scheme(struct damos
*s
)
412 static void damon_free_scheme(struct damos
*s
)
417 void damon_destroy_scheme(struct damos
*s
)
419 struct damos_quota_goal
*g
, *g_next
;
420 struct damos_filter
*f
, *next
;
422 damos_for_each_quota_goal_safe(g
, g_next
, &s
->quota
)
423 damos_destroy_quota_goal(g
);
425 damos_for_each_filter_safe(f
, next
, s
)
426 damos_destroy_filter(f
);
428 damon_free_scheme(s
);
432 * Construct a damon_target struct
434 * Returns the pointer to the new struct if success, or NULL otherwise
436 struct damon_target
*damon_new_target(void)
438 struct damon_target
*t
;
440 t
= kmalloc(sizeof(*t
), GFP_KERNEL
);
446 INIT_LIST_HEAD(&t
->regions_list
);
447 INIT_LIST_HEAD(&t
->list
);
452 void damon_add_target(struct damon_ctx
*ctx
, struct damon_target
*t
)
454 list_add_tail(&t
->list
, &ctx
->adaptive_targets
);
457 bool damon_targets_empty(struct damon_ctx
*ctx
)
459 return list_empty(&ctx
->adaptive_targets
);
462 static void damon_del_target(struct damon_target
*t
)
467 void damon_free_target(struct damon_target
*t
)
469 struct damon_region
*r
, *next
;
471 damon_for_each_region_safe(r
, next
, t
)
472 damon_free_region(r
);
476 void damon_destroy_target(struct damon_target
*t
)
479 damon_free_target(t
);
482 unsigned int damon_nr_regions(struct damon_target
*t
)
484 return t
->nr_regions
;
487 struct damon_ctx
*damon_new_ctx(void)
489 struct damon_ctx
*ctx
;
491 ctx
= kzalloc(sizeof(*ctx
), GFP_KERNEL
);
495 init_completion(&ctx
->kdamond_started
);
497 ctx
->attrs
.sample_interval
= 5 * 1000;
498 ctx
->attrs
.aggr_interval
= 100 * 1000;
499 ctx
->attrs
.ops_update_interval
= 60 * 1000 * 1000;
501 ctx
->passed_sample_intervals
= 0;
502 /* These will be set from kdamond_init_intervals_sis() */
503 ctx
->next_aggregation_sis
= 0;
504 ctx
->next_ops_update_sis
= 0;
506 mutex_init(&ctx
->kdamond_lock
);
508 ctx
->attrs
.min_nr_regions
= 10;
509 ctx
->attrs
.max_nr_regions
= 1000;
511 INIT_LIST_HEAD(&ctx
->adaptive_targets
);
512 INIT_LIST_HEAD(&ctx
->schemes
);
517 static void damon_destroy_targets(struct damon_ctx
*ctx
)
519 struct damon_target
*t
, *next_t
;
521 if (ctx
->ops
.cleanup
) {
522 ctx
->ops
.cleanup(ctx
);
526 damon_for_each_target_safe(t
, next_t
, ctx
)
527 damon_destroy_target(t
);
530 void damon_destroy_ctx(struct damon_ctx
*ctx
)
532 struct damos
*s
, *next_s
;
534 damon_destroy_targets(ctx
);
536 damon_for_each_scheme_safe(s
, next_s
, ctx
)
537 damon_destroy_scheme(s
);
542 static unsigned int damon_age_for_new_attrs(unsigned int age
,
543 struct damon_attrs
*old_attrs
, struct damon_attrs
*new_attrs
)
545 return age
* old_attrs
->aggr_interval
/ new_attrs
->aggr_interval
;
548 /* convert access ratio in bp (per 10,000) to nr_accesses */
549 static unsigned int damon_accesses_bp_to_nr_accesses(
550 unsigned int accesses_bp
, struct damon_attrs
*attrs
)
552 return accesses_bp
* damon_max_nr_accesses(attrs
) / 10000;
556 * Convert nr_accesses to access ratio in bp (per 10,000).
558 * Callers should ensure attrs.aggr_interval is not zero, like
559 * damon_update_monitoring_results() does . Otherwise, divide-by-zero would
562 static unsigned int damon_nr_accesses_to_accesses_bp(
563 unsigned int nr_accesses
, struct damon_attrs
*attrs
)
565 return nr_accesses
* 10000 / damon_max_nr_accesses(attrs
);
568 static unsigned int damon_nr_accesses_for_new_attrs(unsigned int nr_accesses
,
569 struct damon_attrs
*old_attrs
, struct damon_attrs
*new_attrs
)
571 return damon_accesses_bp_to_nr_accesses(
572 damon_nr_accesses_to_accesses_bp(
573 nr_accesses
, old_attrs
),
577 static void damon_update_monitoring_result(struct damon_region
*r
,
578 struct damon_attrs
*old_attrs
, struct damon_attrs
*new_attrs
)
580 r
->nr_accesses
= damon_nr_accesses_for_new_attrs(r
->nr_accesses
,
581 old_attrs
, new_attrs
);
582 r
->nr_accesses_bp
= r
->nr_accesses
* 10000;
583 r
->age
= damon_age_for_new_attrs(r
->age
, old_attrs
, new_attrs
);
587 * region->nr_accesses is the number of sampling intervals in the last
588 * aggregation interval that access to the region has found, and region->age is
589 * the number of aggregation intervals that its access pattern has maintained.
590 * For the reason, the real meaning of the two fields depend on current
591 * sampling interval and aggregation interval. This function updates
592 * ->nr_accesses and ->age of given damon_ctx's regions for new damon_attrs.
594 static void damon_update_monitoring_results(struct damon_ctx
*ctx
,
595 struct damon_attrs
*new_attrs
)
597 struct damon_attrs
*old_attrs
= &ctx
->attrs
;
598 struct damon_target
*t
;
599 struct damon_region
*r
;
601 /* if any interval is zero, simply forgive conversion */
602 if (!old_attrs
->sample_interval
|| !old_attrs
->aggr_interval
||
603 !new_attrs
->sample_interval
||
604 !new_attrs
->aggr_interval
)
607 damon_for_each_target(t
, ctx
)
608 damon_for_each_region(r
, t
)
609 damon_update_monitoring_result(
610 r
, old_attrs
, new_attrs
);
614 * damon_set_attrs() - Set attributes for the monitoring.
615 * @ctx: monitoring context
616 * @attrs: monitoring attributes
618 * This function should be called while the kdamond is not running, or an
619 * access check results aggregation is not ongoing (e.g., from
620 * &struct damon_callback->after_aggregation or
621 * &struct damon_callback->after_wmarks_check callbacks).
623 * Every time interval is in micro-seconds.
625 * Return: 0 on success, negative error code otherwise.
627 int damon_set_attrs(struct damon_ctx
*ctx
, struct damon_attrs
*attrs
)
629 unsigned long sample_interval
= attrs
->sample_interval
?
630 attrs
->sample_interval
: 1;
633 if (attrs
->min_nr_regions
< 3)
635 if (attrs
->min_nr_regions
> attrs
->max_nr_regions
)
637 if (attrs
->sample_interval
> attrs
->aggr_interval
)
640 ctx
->next_aggregation_sis
= ctx
->passed_sample_intervals
+
641 attrs
->aggr_interval
/ sample_interval
;
642 ctx
->next_ops_update_sis
= ctx
->passed_sample_intervals
+
643 attrs
->ops_update_interval
/ sample_interval
;
645 damon_update_monitoring_results(ctx
, attrs
);
648 damon_for_each_scheme(s
, ctx
)
649 damos_set_next_apply_sis(s
, ctx
);
655 * damon_set_schemes() - Set data access monitoring based operation schemes.
656 * @ctx: monitoring context
657 * @schemes: array of the schemes
658 * @nr_schemes: number of entries in @schemes
660 * This function should not be called while the kdamond of the context is
663 void damon_set_schemes(struct damon_ctx
*ctx
, struct damos
**schemes
,
666 struct damos
*s
, *next
;
669 damon_for_each_scheme_safe(s
, next
, ctx
)
670 damon_destroy_scheme(s
);
671 for (i
= 0; i
< nr_schemes
; i
++)
672 damon_add_scheme(ctx
, schemes
[i
]);
675 static struct damos_quota_goal
*damos_nth_quota_goal(
676 int n
, struct damos_quota
*q
)
678 struct damos_quota_goal
*goal
;
681 damos_for_each_quota_goal(goal
, q
) {
688 static void damos_commit_quota_goal(
689 struct damos_quota_goal
*dst
, struct damos_quota_goal
*src
)
691 dst
->metric
= src
->metric
;
692 dst
->target_value
= src
->target_value
;
693 if (dst
->metric
== DAMOS_QUOTA_USER_INPUT
)
694 dst
->current_value
= src
->current_value
;
695 /* keep last_psi_total as is, since it will be updated in next cycle */
699 * damos_commit_quota_goals() - Commit DAMOS quota goals to another quota.
700 * @dst: The commit destination DAMOS quota.
701 * @src: The commit source DAMOS quota.
703 * Copies user-specified parameters for quota goals from @src to @dst. Users
704 * should use this function for quota goals-level parameters update of running
705 * DAMON contexts, instead of manual in-place updates.
707 * This function should be called from parameters-update safe context, like
710 int damos_commit_quota_goals(struct damos_quota
*dst
, struct damos_quota
*src
)
712 struct damos_quota_goal
*dst_goal
, *next
, *src_goal
, *new_goal
;
715 damos_for_each_quota_goal_safe(dst_goal
, next
, dst
) {
716 src_goal
= damos_nth_quota_goal(i
++, src
);
718 damos_commit_quota_goal(dst_goal
, src_goal
);
720 damos_destroy_quota_goal(dst_goal
);
722 damos_for_each_quota_goal_safe(src_goal
, next
, src
) {
725 new_goal
= damos_new_quota_goal(
726 src_goal
->metric
, src_goal
->target_value
);
729 damos_add_quota_goal(dst
, new_goal
);
734 static int damos_commit_quota(struct damos_quota
*dst
, struct damos_quota
*src
)
738 dst
->reset_interval
= src
->reset_interval
;
741 err
= damos_commit_quota_goals(dst
, src
);
744 dst
->weight_sz
= src
->weight_sz
;
745 dst
->weight_nr_accesses
= src
->weight_nr_accesses
;
746 dst
->weight_age
= src
->weight_age
;
750 static struct damos_filter
*damos_nth_filter(int n
, struct damos
*s
)
752 struct damos_filter
*filter
;
755 damos_for_each_filter(filter
, s
) {
762 static void damos_commit_filter_arg(
763 struct damos_filter
*dst
, struct damos_filter
*src
)
766 case DAMOS_FILTER_TYPE_MEMCG
:
767 dst
->memcg_id
= src
->memcg_id
;
769 case DAMOS_FILTER_TYPE_ADDR
:
770 dst
->addr_range
= src
->addr_range
;
772 case DAMOS_FILTER_TYPE_TARGET
:
773 dst
->target_idx
= src
->target_idx
;
780 static void damos_commit_filter(
781 struct damos_filter
*dst
, struct damos_filter
*src
)
783 dst
->type
= src
->type
;
784 dst
->matching
= src
->matching
;
785 damos_commit_filter_arg(dst
, src
);
788 static int damos_commit_filters(struct damos
*dst
, struct damos
*src
)
790 struct damos_filter
*dst_filter
, *next
, *src_filter
, *new_filter
;
793 damos_for_each_filter_safe(dst_filter
, next
, dst
) {
794 src_filter
= damos_nth_filter(i
++, src
);
796 damos_commit_filter(dst_filter
, src_filter
);
798 damos_destroy_filter(dst_filter
);
801 damos_for_each_filter_safe(src_filter
, next
, src
) {
805 new_filter
= damos_new_filter(
806 src_filter
->type
, src_filter
->matching
);
809 damos_commit_filter_arg(new_filter
, src_filter
);
810 damos_add_filter(dst
, new_filter
);
815 static struct damos
*damon_nth_scheme(int n
, struct damon_ctx
*ctx
)
820 damon_for_each_scheme(s
, ctx
) {
827 static int damos_commit(struct damos
*dst
, struct damos
*src
)
831 dst
->pattern
= src
->pattern
;
832 dst
->action
= src
->action
;
833 dst
->apply_interval_us
= src
->apply_interval_us
;
835 err
= damos_commit_quota(&dst
->quota
, &src
->quota
);
839 dst
->wmarks
= src
->wmarks
;
841 err
= damos_commit_filters(dst
, src
);
845 static int damon_commit_schemes(struct damon_ctx
*dst
, struct damon_ctx
*src
)
847 struct damos
*dst_scheme
, *next
, *src_scheme
, *new_scheme
;
848 int i
= 0, j
= 0, err
;
850 damon_for_each_scheme_safe(dst_scheme
, next
, dst
) {
851 src_scheme
= damon_nth_scheme(i
++, src
);
853 err
= damos_commit(dst_scheme
, src_scheme
);
857 damon_destroy_scheme(dst_scheme
);
861 damon_for_each_scheme_safe(src_scheme
, next
, src
) {
864 new_scheme
= damon_new_scheme(&src_scheme
->pattern
,
866 src_scheme
->apply_interval_us
,
867 &src_scheme
->quota
, &src_scheme
->wmarks
,
871 damon_add_scheme(dst
, new_scheme
);
876 static struct damon_target
*damon_nth_target(int n
, struct damon_ctx
*ctx
)
878 struct damon_target
*t
;
881 damon_for_each_target(t
, ctx
) {
889 * The caller should ensure the regions of @src are
890 * 1. valid (end >= src) and
891 * 2. sorted by starting address.
893 * If @src has no region, @dst keeps current regions.
895 static int damon_commit_target_regions(
896 struct damon_target
*dst
, struct damon_target
*src
)
898 struct damon_region
*src_region
;
899 struct damon_addr_range
*ranges
;
902 damon_for_each_region(src_region
, src
)
907 ranges
= kmalloc_array(i
, sizeof(*ranges
), GFP_KERNEL
| __GFP_NOWARN
);
911 damon_for_each_region(src_region
, src
)
912 ranges
[i
++] = src_region
->ar
;
913 err
= damon_set_regions(dst
, ranges
, i
);
918 static int damon_commit_target(
919 struct damon_target
*dst
, bool dst_has_pid
,
920 struct damon_target
*src
, bool src_has_pid
)
924 err
= damon_commit_target_regions(dst
, src
);
935 static int damon_commit_targets(
936 struct damon_ctx
*dst
, struct damon_ctx
*src
)
938 struct damon_target
*dst_target
, *next
, *src_target
, *new_target
;
939 int i
= 0, j
= 0, err
;
941 damon_for_each_target_safe(dst_target
, next
, dst
) {
942 src_target
= damon_nth_target(i
++, src
);
944 err
= damon_commit_target(
945 dst_target
, damon_target_has_pid(dst
),
946 src_target
, damon_target_has_pid(src
));
950 if (damon_target_has_pid(dst
))
951 put_pid(dst_target
->pid
);
952 damon_destroy_target(dst_target
);
956 damon_for_each_target_safe(src_target
, next
, src
) {
959 new_target
= damon_new_target();
962 err
= damon_commit_target(new_target
, false,
963 src_target
, damon_target_has_pid(src
));
971 * damon_commit_ctx() - Commit parameters of a DAMON context to another.
972 * @dst: The commit destination DAMON context.
973 * @src: The commit source DAMON context.
975 * This function copies user-specified parameters from @src to @dst and update
976 * the internal status and results accordingly. Users should use this function
977 * for context-level parameters update of running context, instead of manual
980 * This function should be called from parameters-update safe context, like
983 int damon_commit_ctx(struct damon_ctx
*dst
, struct damon_ctx
*src
)
987 err
= damon_commit_schemes(dst
, src
);
990 err
= damon_commit_targets(dst
, src
);
994 * schemes and targets should be updated first, since
995 * 1. damon_set_attrs() updates monitoring results of targets and
996 * next_apply_sis of schemes, and
997 * 2. ops update should be done after pid handling is done (target
998 * committing require putting pids).
1000 err
= damon_set_attrs(dst
, &src
->attrs
);
1003 dst
->ops
= src
->ops
;
1009 * damon_nr_running_ctxs() - Return number of currently running contexts.
1011 int damon_nr_running_ctxs(void)
1015 mutex_lock(&damon_lock
);
1016 nr_ctxs
= nr_running_ctxs
;
1017 mutex_unlock(&damon_lock
);
1022 /* Returns the size upper limit for each monitoring region */
1023 static unsigned long damon_region_sz_limit(struct damon_ctx
*ctx
)
1025 struct damon_target
*t
;
1026 struct damon_region
*r
;
1027 unsigned long sz
= 0;
1029 damon_for_each_target(t
, ctx
) {
1030 damon_for_each_region(r
, t
)
1031 sz
+= damon_sz_region(r
);
1034 if (ctx
->attrs
.min_nr_regions
)
1035 sz
/= ctx
->attrs
.min_nr_regions
;
1036 if (sz
< DAMON_MIN_REGION
)
1037 sz
= DAMON_MIN_REGION
;
1042 static int kdamond_fn(void *data
);
1045 * __damon_start() - Starts monitoring with given context.
1046 * @ctx: monitoring context
1048 * This function should be called while damon_lock is hold.
1050 * Return: 0 on success, negative error code otherwise.
1052 static int __damon_start(struct damon_ctx
*ctx
)
1056 mutex_lock(&ctx
->kdamond_lock
);
1057 if (!ctx
->kdamond
) {
1059 reinit_completion(&ctx
->kdamond_started
);
1060 ctx
->kdamond
= kthread_run(kdamond_fn
, ctx
, "kdamond.%d",
1062 if (IS_ERR(ctx
->kdamond
)) {
1063 err
= PTR_ERR(ctx
->kdamond
);
1064 ctx
->kdamond
= NULL
;
1066 wait_for_completion(&ctx
->kdamond_started
);
1069 mutex_unlock(&ctx
->kdamond_lock
);
1075 * damon_start() - Starts the monitorings for a given group of contexts.
1076 * @ctxs: an array of the pointers for contexts to start monitoring
1077 * @nr_ctxs: size of @ctxs
1078 * @exclusive: exclusiveness of this contexts group
1080 * This function starts a group of monitoring threads for a group of monitoring
1081 * contexts. One thread per each context is created and run in parallel. The
1082 * caller should handle synchronization between the threads by itself. If
1083 * @exclusive is true and a group of threads that created by other
1084 * 'damon_start()' call is currently running, this function does nothing but
1087 * Return: 0 on success, negative error code otherwise.
1089 int damon_start(struct damon_ctx
**ctxs
, int nr_ctxs
, bool exclusive
)
1094 mutex_lock(&damon_lock
);
1095 if ((exclusive
&& nr_running_ctxs
) ||
1096 (!exclusive
&& running_exclusive_ctxs
)) {
1097 mutex_unlock(&damon_lock
);
1101 for (i
= 0; i
< nr_ctxs
; i
++) {
1102 err
= __damon_start(ctxs
[i
]);
1107 if (exclusive
&& nr_running_ctxs
)
1108 running_exclusive_ctxs
= true;
1109 mutex_unlock(&damon_lock
);
1115 * __damon_stop() - Stops monitoring of a given context.
1116 * @ctx: monitoring context
1118 * Return: 0 on success, negative error code otherwise.
1120 static int __damon_stop(struct damon_ctx
*ctx
)
1122 struct task_struct
*tsk
;
1124 mutex_lock(&ctx
->kdamond_lock
);
1127 get_task_struct(tsk
);
1128 mutex_unlock(&ctx
->kdamond_lock
);
1129 kthread_stop_put(tsk
);
1132 mutex_unlock(&ctx
->kdamond_lock
);
1138 * damon_stop() - Stops the monitorings for a given group of contexts.
1139 * @ctxs: an array of the pointers for contexts to stop monitoring
1140 * @nr_ctxs: size of @ctxs
1142 * Return: 0 on success, negative error code otherwise.
1144 int damon_stop(struct damon_ctx
**ctxs
, int nr_ctxs
)
1148 for (i
= 0; i
< nr_ctxs
; i
++) {
1149 /* nr_running_ctxs is decremented in kdamond_fn */
1150 err
= __damon_stop(ctxs
[i
]);
1158 * Reset the aggregated monitoring results ('nr_accesses' of each region).
1160 static void kdamond_reset_aggregated(struct damon_ctx
*c
)
1162 struct damon_target
*t
;
1163 unsigned int ti
= 0; /* target's index */
1165 damon_for_each_target(t
, c
) {
1166 struct damon_region
*r
;
1168 damon_for_each_region(r
, t
) {
1169 trace_damon_aggregated(ti
, r
, damon_nr_regions(t
));
1170 r
->last_nr_accesses
= r
->nr_accesses
;
1177 static void damon_split_region_at(struct damon_target
*t
,
1178 struct damon_region
*r
, unsigned long sz_r
);
1180 static bool __damos_valid_target(struct damon_region
*r
, struct damos
*s
)
1183 unsigned int nr_accesses
= r
->nr_accesses_bp
/ 10000;
1185 sz
= damon_sz_region(r
);
1186 return s
->pattern
.min_sz_region
<= sz
&&
1187 sz
<= s
->pattern
.max_sz_region
&&
1188 s
->pattern
.min_nr_accesses
<= nr_accesses
&&
1189 nr_accesses
<= s
->pattern
.max_nr_accesses
&&
1190 s
->pattern
.min_age_region
<= r
->age
&&
1191 r
->age
<= s
->pattern
.max_age_region
;
1194 static bool damos_valid_target(struct damon_ctx
*c
, struct damon_target
*t
,
1195 struct damon_region
*r
, struct damos
*s
)
1197 bool ret
= __damos_valid_target(r
, s
);
1199 if (!ret
|| !s
->quota
.esz
|| !c
->ops
.get_scheme_score
)
1202 return c
->ops
.get_scheme_score(c
, t
, r
, s
) >= s
->quota
.min_score
;
1206 * damos_skip_charged_region() - Check if the given region or starting part of
1207 * it is already charged for the DAMOS quota.
1208 * @t: The target of the region.
1209 * @rp: The pointer to the region.
1210 * @s: The scheme to be applied.
1212 * If a quota of a scheme has exceeded in a quota charge window, the scheme's
1213 * action would applied to only a part of the target access pattern fulfilling
1214 * regions. To avoid applying the scheme action to only already applied
1215 * regions, DAMON skips applying the scheme action to the regions that charged
1216 * in the previous charge window.
1218 * This function checks if a given region should be skipped or not for the
1219 * reason. If only the starting part of the region has previously charged,
1220 * this function splits the region into two so that the second one covers the
1221 * area that not charged in the previous charge widnow and saves the second
1222 * region in *rp and returns false, so that the caller can apply DAMON action
1223 * to the second one.
1225 * Return: true if the region should be entirely skipped, false otherwise.
1227 static bool damos_skip_charged_region(struct damon_target
*t
,
1228 struct damon_region
**rp
, struct damos
*s
)
1230 struct damon_region
*r
= *rp
;
1231 struct damos_quota
*quota
= &s
->quota
;
1232 unsigned long sz_to_skip
;
1234 /* Skip previously charged regions */
1235 if (quota
->charge_target_from
) {
1236 if (t
!= quota
->charge_target_from
)
1238 if (r
== damon_last_region(t
)) {
1239 quota
->charge_target_from
= NULL
;
1240 quota
->charge_addr_from
= 0;
1243 if (quota
->charge_addr_from
&&
1244 r
->ar
.end
<= quota
->charge_addr_from
)
1247 if (quota
->charge_addr_from
&& r
->ar
.start
<
1248 quota
->charge_addr_from
) {
1249 sz_to_skip
= ALIGN_DOWN(quota
->charge_addr_from
-
1250 r
->ar
.start
, DAMON_MIN_REGION
);
1252 if (damon_sz_region(r
) <= DAMON_MIN_REGION
)
1254 sz_to_skip
= DAMON_MIN_REGION
;
1256 damon_split_region_at(t
, r
, sz_to_skip
);
1257 r
= damon_next_region(r
);
1260 quota
->charge_target_from
= NULL
;
1261 quota
->charge_addr_from
= 0;
1266 static void damos_update_stat(struct damos
*s
,
1267 unsigned long sz_tried
, unsigned long sz_applied
)
1270 s
->stat
.sz_tried
+= sz_tried
;
1272 s
->stat
.nr_applied
++;
1273 s
->stat
.sz_applied
+= sz_applied
;
1276 static bool __damos_filter_out(struct damon_ctx
*ctx
, struct damon_target
*t
,
1277 struct damon_region
*r
, struct damos_filter
*filter
)
1279 bool matched
= false;
1280 struct damon_target
*ti
;
1282 unsigned long start
, end
;
1284 switch (filter
->type
) {
1285 case DAMOS_FILTER_TYPE_TARGET
:
1286 damon_for_each_target(ti
, ctx
) {
1291 matched
= target_idx
== filter
->target_idx
;
1293 case DAMOS_FILTER_TYPE_ADDR
:
1294 start
= ALIGN_DOWN(filter
->addr_range
.start
, DAMON_MIN_REGION
);
1295 end
= ALIGN_DOWN(filter
->addr_range
.end
, DAMON_MIN_REGION
);
1297 /* inside the range */
1298 if (start
<= r
->ar
.start
&& r
->ar
.end
<= end
) {
1302 /* outside of the range */
1303 if (r
->ar
.end
<= start
|| end
<= r
->ar
.start
) {
1307 /* start before the range and overlap */
1308 if (r
->ar
.start
< start
) {
1309 damon_split_region_at(t
, r
, start
- r
->ar
.start
);
1313 /* start inside the range */
1314 damon_split_region_at(t
, r
, end
- r
->ar
.start
);
1321 return matched
== filter
->matching
;
1324 static bool damos_filter_out(struct damon_ctx
*ctx
, struct damon_target
*t
,
1325 struct damon_region
*r
, struct damos
*s
)
1327 struct damos_filter
*filter
;
1329 damos_for_each_filter(filter
, s
) {
1330 if (__damos_filter_out(ctx
, t
, r
, filter
))
1336 static void damos_apply_scheme(struct damon_ctx
*c
, struct damon_target
*t
,
1337 struct damon_region
*r
, struct damos
*s
)
1339 struct damos_quota
*quota
= &s
->quota
;
1340 unsigned long sz
= damon_sz_region(r
);
1341 struct timespec64 begin
, end
;
1342 unsigned long sz_applied
= 0;
1345 * We plan to support multiple context per kdamond, as DAMON sysfs
1346 * implies with 'nr_contexts' file. Nevertheless, only single context
1347 * per kdamond is supported for now. So, we can simply use '0' context
1350 unsigned int cidx
= 0;
1351 struct damos
*siter
; /* schemes iterator */
1352 unsigned int sidx
= 0;
1353 struct damon_target
*titer
; /* targets iterator */
1354 unsigned int tidx
= 0;
1355 bool do_trace
= false;
1357 /* get indices for trace_damos_before_apply() */
1358 if (trace_damos_before_apply_enabled()) {
1359 damon_for_each_scheme(siter
, c
) {
1364 damon_for_each_target(titer
, c
) {
1372 if (c
->ops
.apply_scheme
) {
1373 if (quota
->esz
&& quota
->charged_sz
+ sz
> quota
->esz
) {
1374 sz
= ALIGN_DOWN(quota
->esz
- quota
->charged_sz
,
1378 damon_split_region_at(t
, r
, sz
);
1380 if (damos_filter_out(c
, t
, r
, s
))
1382 ktime_get_coarse_ts64(&begin
);
1383 if (c
->callback
.before_damos_apply
)
1384 err
= c
->callback
.before_damos_apply(c
, t
, r
, s
);
1386 trace_damos_before_apply(cidx
, sidx
, tidx
, r
,
1387 damon_nr_regions(t
), do_trace
);
1388 sz_applied
= c
->ops
.apply_scheme(c
, t
, r
, s
);
1390 ktime_get_coarse_ts64(&end
);
1391 quota
->total_charged_ns
+= timespec64_to_ns(&end
) -
1392 timespec64_to_ns(&begin
);
1393 quota
->charged_sz
+= sz
;
1394 if (quota
->esz
&& quota
->charged_sz
>= quota
->esz
) {
1395 quota
->charge_target_from
= t
;
1396 quota
->charge_addr_from
= r
->ar
.end
+ 1;
1399 if (s
->action
!= DAMOS_STAT
)
1403 damos_update_stat(s
, sz
, sz_applied
);
1406 static void damon_do_apply_schemes(struct damon_ctx
*c
,
1407 struct damon_target
*t
,
1408 struct damon_region
*r
)
1412 damon_for_each_scheme(s
, c
) {
1413 struct damos_quota
*quota
= &s
->quota
;
1415 if (c
->passed_sample_intervals
< s
->next_apply_sis
)
1418 if (!s
->wmarks
.activated
)
1421 /* Check the quota */
1422 if (quota
->esz
&& quota
->charged_sz
>= quota
->esz
)
1425 if (damos_skip_charged_region(t
, &r
, s
))
1428 if (!damos_valid_target(c
, t
, r
, s
))
1431 damos_apply_scheme(c
, t
, r
, s
);
1436 * damon_feed_loop_next_input() - get next input to achieve a target score.
1437 * @last_input The last input.
1438 * @score Current score that made with @last_input.
1440 * Calculate next input to achieve the target score, based on the last input
1441 * and current score. Assuming the input and the score are positively
1442 * proportional, calculate how much compensation should be added to or
1443 * subtracted from the last input as a proportion of the last input. Avoid
1444 * next input always being zero by setting it non-zero always. In short form
1445 * (assuming support of float and signed calculations), the algorithm is as
1448 * next_input = max(last_input * ((goal - current) / goal + 1), 1)
1450 * For simple implementation, we assume the target score is always 10,000. The
1451 * caller should adjust @score for this.
1453 * Returns next input that assumed to achieve the target score.
1455 static unsigned long damon_feed_loop_next_input(unsigned long last_input
,
1456 unsigned long score
)
1458 const unsigned long goal
= 10000;
1459 /* Set minimum input as 10000 to avoid compensation be zero */
1460 const unsigned long min_input
= 10000;
1461 unsigned long score_goal_diff
, compensation
;
1462 bool over_achieving
= score
> goal
;
1466 if (score
>= goal
* 2)
1470 score_goal_diff
= score
- goal
;
1472 score_goal_diff
= goal
- score
;
1474 if (last_input
< ULONG_MAX
/ score_goal_diff
)
1475 compensation
= last_input
* score_goal_diff
/ goal
;
1477 compensation
= last_input
/ goal
* score_goal_diff
;
1480 return max(last_input
- compensation
, min_input
);
1481 if (last_input
< ULONG_MAX
- compensation
)
1482 return last_input
+ compensation
;
1488 static u64
damos_get_some_mem_psi_total(void)
1490 if (static_branch_likely(&psi_disabled
))
1492 return div_u64(psi_system
.total
[PSI_AVGS
][PSI_MEM
* 2],
1496 #else /* CONFIG_PSI */
1498 static inline u64
damos_get_some_mem_psi_total(void)
1503 #endif /* CONFIG_PSI */
1505 static void damos_set_quota_goal_current_value(struct damos_quota_goal
*goal
)
1509 switch (goal
->metric
) {
1510 case DAMOS_QUOTA_USER_INPUT
:
1511 /* User should already set goal->current_value */
1513 case DAMOS_QUOTA_SOME_MEM_PSI_US
:
1514 now_psi_total
= damos_get_some_mem_psi_total();
1515 goal
->current_value
= now_psi_total
- goal
->last_psi_total
;
1516 goal
->last_psi_total
= now_psi_total
;
1523 /* Return the highest score since it makes schemes least aggressive */
1524 static unsigned long damos_quota_score(struct damos_quota
*quota
)
1526 struct damos_quota_goal
*goal
;
1527 unsigned long highest_score
= 0;
1529 damos_for_each_quota_goal(goal
, quota
) {
1530 damos_set_quota_goal_current_value(goal
);
1531 highest_score
= max(highest_score
,
1532 goal
->current_value
* 10000 /
1533 goal
->target_value
);
1536 return highest_score
;
1540 * Called only if quota->ms, or quota->sz are set, or quota->goals is not empty
1542 static void damos_set_effective_quota(struct damos_quota
*quota
)
1544 unsigned long throughput
;
1547 if (!quota
->ms
&& list_empty("a
->goals
)) {
1548 quota
->esz
= quota
->sz
;
1552 if (!list_empty("a
->goals
)) {
1553 unsigned long score
= damos_quota_score(quota
);
1555 quota
->esz_bp
= damon_feed_loop_next_input(
1556 max(quota
->esz_bp
, 10000UL),
1558 esz
= quota
->esz_bp
/ 10000;
1562 if (quota
->total_charged_ns
)
1563 throughput
= quota
->total_charged_sz
* 1000000 /
1564 quota
->total_charged_ns
;
1566 throughput
= PAGE_SIZE
* 1024;
1567 if (!list_empty("a
->goals
))
1568 esz
= min(throughput
* quota
->ms
, esz
);
1570 esz
= throughput
* quota
->ms
;
1573 if (quota
->sz
&& quota
->sz
< esz
)
1579 static void damos_adjust_quota(struct damon_ctx
*c
, struct damos
*s
)
1581 struct damos_quota
*quota
= &s
->quota
;
1582 struct damon_target
*t
;
1583 struct damon_region
*r
;
1584 unsigned long cumulated_sz
;
1585 unsigned int score
, max_score
= 0;
1587 if (!quota
->ms
&& !quota
->sz
&& list_empty("a
->goals
))
1590 /* New charge window starts */
1591 if (time_after_eq(jiffies
, quota
->charged_from
+
1592 msecs_to_jiffies(quota
->reset_interval
))) {
1593 if (quota
->esz
&& quota
->charged_sz
>= quota
->esz
)
1594 s
->stat
.qt_exceeds
++;
1595 quota
->total_charged_sz
+= quota
->charged_sz
;
1596 quota
->charged_from
= jiffies
;
1597 quota
->charged_sz
= 0;
1598 damos_set_effective_quota(quota
);
1601 if (!c
->ops
.get_scheme_score
)
1604 /* Fill up the score histogram */
1605 memset(c
->regions_score_histogram
, 0,
1606 sizeof(*c
->regions_score_histogram
) *
1607 (DAMOS_MAX_SCORE
+ 1));
1608 damon_for_each_target(t
, c
) {
1609 damon_for_each_region(r
, t
) {
1610 if (!__damos_valid_target(r
, s
))
1612 score
= c
->ops
.get_scheme_score(c
, t
, r
, s
);
1613 c
->regions_score_histogram
[score
] +=
1615 if (score
> max_score
)
1620 /* Set the min score limit */
1621 for (cumulated_sz
= 0, score
= max_score
; ; score
--) {
1622 cumulated_sz
+= c
->regions_score_histogram
[score
];
1623 if (cumulated_sz
>= quota
->esz
|| !score
)
1626 quota
->min_score
= score
;
1629 static void kdamond_apply_schemes(struct damon_ctx
*c
)
1631 struct damon_target
*t
;
1632 struct damon_region
*r
, *next_r
;
1634 unsigned long sample_interval
= c
->attrs
.sample_interval
?
1635 c
->attrs
.sample_interval
: 1;
1636 bool has_schemes_to_apply
= false;
1638 damon_for_each_scheme(s
, c
) {
1639 if (c
->passed_sample_intervals
< s
->next_apply_sis
)
1642 if (!s
->wmarks
.activated
)
1645 has_schemes_to_apply
= true;
1647 damos_adjust_quota(c
, s
);
1650 if (!has_schemes_to_apply
)
1653 damon_for_each_target(t
, c
) {
1654 damon_for_each_region_safe(r
, next_r
, t
)
1655 damon_do_apply_schemes(c
, t
, r
);
1658 damon_for_each_scheme(s
, c
) {
1659 if (c
->passed_sample_intervals
< s
->next_apply_sis
)
1661 s
->next_apply_sis
= c
->passed_sample_intervals
+
1662 (s
->apply_interval_us
? s
->apply_interval_us
:
1663 c
->attrs
.aggr_interval
) / sample_interval
;
1668 * Merge two adjacent regions into one region
1670 static void damon_merge_two_regions(struct damon_target
*t
,
1671 struct damon_region
*l
, struct damon_region
*r
)
1673 unsigned long sz_l
= damon_sz_region(l
), sz_r
= damon_sz_region(r
);
1675 l
->nr_accesses
= (l
->nr_accesses
* sz_l
+ r
->nr_accesses
* sz_r
) /
1677 l
->nr_accesses_bp
= l
->nr_accesses
* 10000;
1678 l
->age
= (l
->age
* sz_l
+ r
->age
* sz_r
) / (sz_l
+ sz_r
);
1679 l
->ar
.end
= r
->ar
.end
;
1680 damon_destroy_region(r
, t
);
1684 * Merge adjacent regions having similar access frequencies
1686 * t target affected by this merge operation
1687 * thres '->nr_accesses' diff threshold for the merge
1688 * sz_limit size upper limit of each region
1690 static void damon_merge_regions_of(struct damon_target
*t
, unsigned int thres
,
1691 unsigned long sz_limit
)
1693 struct damon_region
*r
, *prev
= NULL
, *next
;
1695 damon_for_each_region_safe(r
, next
, t
) {
1696 if (abs(r
->nr_accesses
- r
->last_nr_accesses
) > thres
)
1701 if (prev
&& prev
->ar
.end
== r
->ar
.start
&&
1702 abs(prev
->nr_accesses
- r
->nr_accesses
) <= thres
&&
1703 damon_sz_region(prev
) + damon_sz_region(r
) <= sz_limit
)
1704 damon_merge_two_regions(t
, prev
, r
);
1711 * Merge adjacent regions having similar access frequencies
1713 * threshold '->nr_accesses' diff threshold for the merge
1714 * sz_limit size upper limit of each region
1716 * This function merges monitoring target regions which are adjacent and their
1717 * access frequencies are similar. This is for minimizing the monitoring
1718 * overhead under the dynamically changeable access pattern. If a merge was
1719 * unnecessarily made, later 'kdamond_split_regions()' will revert it.
1721 * The total number of regions could be higher than the user-defined limit,
1722 * max_nr_regions for some cases. For example, the user can update
1723 * max_nr_regions to a number that lower than the current number of regions
1724 * while DAMON is running. For such a case, repeat merging until the limit is
1725 * met while increasing @threshold up to possible maximum level.
1727 static void kdamond_merge_regions(struct damon_ctx
*c
, unsigned int threshold
,
1728 unsigned long sz_limit
)
1730 struct damon_target
*t
;
1731 unsigned int nr_regions
;
1732 unsigned int max_thres
;
1734 max_thres
= c
->attrs
.aggr_interval
/
1735 (c
->attrs
.sample_interval
? c
->attrs
.sample_interval
: 1);
1738 damon_for_each_target(t
, c
) {
1739 damon_merge_regions_of(t
, threshold
, sz_limit
);
1740 nr_regions
+= damon_nr_regions(t
);
1742 threshold
= max(1, threshold
* 2);
1743 } while (nr_regions
> c
->attrs
.max_nr_regions
&&
1744 threshold
/ 2 < max_thres
);
1748 * Split a region in two
1750 * r the region to be split
1751 * sz_r size of the first sub-region that will be made
1753 static void damon_split_region_at(struct damon_target
*t
,
1754 struct damon_region
*r
, unsigned long sz_r
)
1756 struct damon_region
*new;
1758 new = damon_new_region(r
->ar
.start
+ sz_r
, r
->ar
.end
);
1762 r
->ar
.end
= new->ar
.start
;
1765 new->last_nr_accesses
= r
->last_nr_accesses
;
1766 new->nr_accesses_bp
= r
->nr_accesses_bp
;
1767 new->nr_accesses
= r
->nr_accesses
;
1769 damon_insert_region(new, r
, damon_next_region(r
), t
);
1772 /* Split every region in the given target into 'nr_subs' regions */
1773 static void damon_split_regions_of(struct damon_target
*t
, int nr_subs
)
1775 struct damon_region
*r
, *next
;
1776 unsigned long sz_region
, sz_sub
= 0;
1779 damon_for_each_region_safe(r
, next
, t
) {
1780 sz_region
= damon_sz_region(r
);
1782 for (i
= 0; i
< nr_subs
- 1 &&
1783 sz_region
> 2 * DAMON_MIN_REGION
; i
++) {
1785 * Randomly select size of left sub-region to be at
1786 * least 10 percent and at most 90% of original region
1788 sz_sub
= ALIGN_DOWN(damon_rand(1, 10) *
1789 sz_region
/ 10, DAMON_MIN_REGION
);
1790 /* Do not allow blank region */
1791 if (sz_sub
== 0 || sz_sub
>= sz_region
)
1794 damon_split_region_at(t
, r
, sz_sub
);
1801 * Split every target region into randomly-sized small regions
1803 * This function splits every target region into random-sized small regions if
1804 * current total number of the regions is equal or smaller than half of the
1805 * user-specified maximum number of regions. This is for maximizing the
1806 * monitoring accuracy under the dynamically changeable access patterns. If a
1807 * split was unnecessarily made, later 'kdamond_merge_regions()' will revert
1810 static void kdamond_split_regions(struct damon_ctx
*ctx
)
1812 struct damon_target
*t
;
1813 unsigned int nr_regions
= 0;
1814 static unsigned int last_nr_regions
;
1815 int nr_subregions
= 2;
1817 damon_for_each_target(t
, ctx
)
1818 nr_regions
+= damon_nr_regions(t
);
1820 if (nr_regions
> ctx
->attrs
.max_nr_regions
/ 2)
1823 /* Maybe the middle of the region has different access frequency */
1824 if (last_nr_regions
== nr_regions
&&
1825 nr_regions
< ctx
->attrs
.max_nr_regions
/ 3)
1828 damon_for_each_target(t
, ctx
)
1829 damon_split_regions_of(t
, nr_subregions
);
1831 last_nr_regions
= nr_regions
;
1835 * Check whether current monitoring should be stopped
1837 * The monitoring is stopped when either the user requested to stop, or all
1838 * monitoring targets are invalid.
1840 * Returns true if need to stop current monitoring.
1842 static bool kdamond_need_stop(struct damon_ctx
*ctx
)
1844 struct damon_target
*t
;
1846 if (kthread_should_stop())
1849 if (!ctx
->ops
.target_valid
)
1852 damon_for_each_target(t
, ctx
) {
1853 if (ctx
->ops
.target_valid(t
))
1860 static int damos_get_wmark_metric_value(enum damos_wmark_metric metric
,
1861 unsigned long *metric_value
)
1864 case DAMOS_WMARK_FREE_MEM_RATE
:
1865 *metric_value
= global_zone_page_state(NR_FREE_PAGES
) * 1000 /
1875 * Returns zero if the scheme is active. Else, returns time to wait for next
1876 * watermark check in micro-seconds.
1878 static unsigned long damos_wmark_wait_us(struct damos
*scheme
)
1880 unsigned long metric
;
1882 if (damos_get_wmark_metric_value(scheme
->wmarks
.metric
, &metric
))
1885 /* higher than high watermark or lower than low watermark */
1886 if (metric
> scheme
->wmarks
.high
|| scheme
->wmarks
.low
> metric
) {
1887 if (scheme
->wmarks
.activated
)
1888 pr_debug("deactivate a scheme (%d) for %s wmark\n",
1890 metric
> scheme
->wmarks
.high
?
1892 scheme
->wmarks
.activated
= false;
1893 return scheme
->wmarks
.interval
;
1896 /* inactive and higher than middle watermark */
1897 if ((scheme
->wmarks
.high
>= metric
&& metric
>= scheme
->wmarks
.mid
) &&
1898 !scheme
->wmarks
.activated
)
1899 return scheme
->wmarks
.interval
;
1901 if (!scheme
->wmarks
.activated
)
1902 pr_debug("activate a scheme (%d)\n", scheme
->action
);
1903 scheme
->wmarks
.activated
= true;
1907 static void kdamond_usleep(unsigned long usecs
)
1909 if (usecs
>= USLEEP_RANGE_UPPER_BOUND
)
1910 schedule_timeout_idle(usecs_to_jiffies(usecs
));
1912 usleep_range_idle(usecs
, usecs
+ 1);
1915 /* Returns negative error code if it's not activated but should return */
1916 static int kdamond_wait_activation(struct damon_ctx
*ctx
)
1919 unsigned long wait_time
;
1920 unsigned long min_wait_time
= 0;
1921 bool init_wait_time
= false;
1923 while (!kdamond_need_stop(ctx
)) {
1924 damon_for_each_scheme(s
, ctx
) {
1925 wait_time
= damos_wmark_wait_us(s
);
1926 if (!init_wait_time
|| wait_time
< min_wait_time
) {
1927 init_wait_time
= true;
1928 min_wait_time
= wait_time
;
1934 kdamond_usleep(min_wait_time
);
1936 if (ctx
->callback
.after_wmarks_check
&&
1937 ctx
->callback
.after_wmarks_check(ctx
))
1943 static void kdamond_init_intervals_sis(struct damon_ctx
*ctx
)
1945 unsigned long sample_interval
= ctx
->attrs
.sample_interval
?
1946 ctx
->attrs
.sample_interval
: 1;
1947 unsigned long apply_interval
;
1948 struct damos
*scheme
;
1950 ctx
->passed_sample_intervals
= 0;
1951 ctx
->next_aggregation_sis
= ctx
->attrs
.aggr_interval
/ sample_interval
;
1952 ctx
->next_ops_update_sis
= ctx
->attrs
.ops_update_interval
/
1955 damon_for_each_scheme(scheme
, ctx
) {
1956 apply_interval
= scheme
->apply_interval_us
?
1957 scheme
->apply_interval_us
: ctx
->attrs
.aggr_interval
;
1958 scheme
->next_apply_sis
= apply_interval
/ sample_interval
;
1963 * The monitoring daemon that runs as a kernel thread
1965 static int kdamond_fn(void *data
)
1967 struct damon_ctx
*ctx
= data
;
1968 struct damon_target
*t
;
1969 struct damon_region
*r
, *next
;
1970 unsigned int max_nr_accesses
= 0;
1971 unsigned long sz_limit
= 0;
1973 pr_debug("kdamond (%d) starts\n", current
->pid
);
1975 complete(&ctx
->kdamond_started
);
1976 kdamond_init_intervals_sis(ctx
);
1980 if (ctx
->callback
.before_start
&& ctx
->callback
.before_start(ctx
))
1982 ctx
->regions_score_histogram
= kmalloc_array(DAMOS_MAX_SCORE
+ 1,
1983 sizeof(*ctx
->regions_score_histogram
), GFP_KERNEL
);
1984 if (!ctx
->regions_score_histogram
)
1987 sz_limit
= damon_region_sz_limit(ctx
);
1989 while (!kdamond_need_stop(ctx
)) {
1991 * ctx->attrs and ctx->next_{aggregation,ops_update}_sis could
1992 * be changed from after_wmarks_check() or after_aggregation()
1993 * callbacks. Read the values here, and use those for this
1994 * iteration. That is, damon_set_attrs() updated new values
1995 * are respected from next iteration.
1997 unsigned long next_aggregation_sis
= ctx
->next_aggregation_sis
;
1998 unsigned long next_ops_update_sis
= ctx
->next_ops_update_sis
;
1999 unsigned long sample_interval
= ctx
->attrs
.sample_interval
;
2001 if (kdamond_wait_activation(ctx
))
2004 if (ctx
->ops
.prepare_access_checks
)
2005 ctx
->ops
.prepare_access_checks(ctx
);
2006 if (ctx
->callback
.after_sampling
&&
2007 ctx
->callback
.after_sampling(ctx
))
2010 kdamond_usleep(sample_interval
);
2011 ctx
->passed_sample_intervals
++;
2013 if (ctx
->ops
.check_accesses
)
2014 max_nr_accesses
= ctx
->ops
.check_accesses(ctx
);
2016 if (ctx
->passed_sample_intervals
>= next_aggregation_sis
) {
2017 kdamond_merge_regions(ctx
,
2018 max_nr_accesses
/ 10,
2020 if (ctx
->callback
.after_aggregation
&&
2021 ctx
->callback
.after_aggregation(ctx
))
2026 * do kdamond_apply_schemes() after kdamond_merge_regions() if
2027 * possible, to reduce overhead
2029 if (!list_empty(&ctx
->schemes
))
2030 kdamond_apply_schemes(ctx
);
2032 sample_interval
= ctx
->attrs
.sample_interval
?
2033 ctx
->attrs
.sample_interval
: 1;
2034 if (ctx
->passed_sample_intervals
>= next_aggregation_sis
) {
2035 ctx
->next_aggregation_sis
= next_aggregation_sis
+
2036 ctx
->attrs
.aggr_interval
/ sample_interval
;
2038 kdamond_reset_aggregated(ctx
);
2039 kdamond_split_regions(ctx
);
2040 if (ctx
->ops
.reset_aggregated
)
2041 ctx
->ops
.reset_aggregated(ctx
);
2044 if (ctx
->passed_sample_intervals
>= next_ops_update_sis
) {
2045 ctx
->next_ops_update_sis
= next_ops_update_sis
+
2046 ctx
->attrs
.ops_update_interval
/
2048 if (ctx
->ops
.update
)
2049 ctx
->ops
.update(ctx
);
2050 sz_limit
= damon_region_sz_limit(ctx
);
2054 damon_for_each_target(t
, ctx
) {
2055 damon_for_each_region_safe(r
, next
, t
)
2056 damon_destroy_region(r
, t
);
2059 if (ctx
->callback
.before_terminate
)
2060 ctx
->callback
.before_terminate(ctx
);
2061 if (ctx
->ops
.cleanup
)
2062 ctx
->ops
.cleanup(ctx
);
2063 kfree(ctx
->regions_score_histogram
);
2065 pr_debug("kdamond (%d) finishes\n", current
->pid
);
2066 mutex_lock(&ctx
->kdamond_lock
);
2067 ctx
->kdamond
= NULL
;
2068 mutex_unlock(&ctx
->kdamond_lock
);
2070 mutex_lock(&damon_lock
);
2072 if (!nr_running_ctxs
&& running_exclusive_ctxs
)
2073 running_exclusive_ctxs
= false;
2074 mutex_unlock(&damon_lock
);
2080 * struct damon_system_ram_region - System RAM resource address region of
2082 * @start: Start address of the region (inclusive).
2083 * @end: End address of the region (exclusive).
2085 struct damon_system_ram_region
{
2086 unsigned long start
;
2090 static int walk_system_ram(struct resource
*res
, void *arg
)
2092 struct damon_system_ram_region
*a
= arg
;
2094 if (a
->end
- a
->start
< resource_size(res
)) {
2095 a
->start
= res
->start
;
2102 * Find biggest 'System RAM' resource and store its start and end address in
2103 * @start and @end, respectively. If no System RAM is found, returns false.
2105 static bool damon_find_biggest_system_ram(unsigned long *start
,
2109 struct damon_system_ram_region arg
= {};
2111 walk_system_ram_res(0, ULONG_MAX
, &arg
, walk_system_ram
);
2112 if (arg
.end
<= arg
.start
)
2121 * damon_set_region_biggest_system_ram_default() - Set the region of the given
2122 * monitoring target as requested, or biggest 'System RAM'.
2123 * @t: The monitoring target to set the region.
2124 * @start: The pointer to the start address of the region.
2125 * @end: The pointer to the end address of the region.
2127 * This function sets the region of @t as requested by @start and @end. If the
2128 * values of @start and @end are zero, however, this function finds the biggest
2129 * 'System RAM' resource and sets the region to cover the resource. In the
2130 * latter case, this function saves the start and end addresses of the resource
2131 * in @start and @end, respectively.
2133 * Return: 0 on success, negative error code otherwise.
2135 int damon_set_region_biggest_system_ram_default(struct damon_target
*t
,
2136 unsigned long *start
, unsigned long *end
)
2138 struct damon_addr_range addr_range
;
2143 if (!*start
&& !*end
&&
2144 !damon_find_biggest_system_ram(start
, end
))
2147 addr_range
.start
= *start
;
2148 addr_range
.end
= *end
;
2149 return damon_set_regions(t
, &addr_range
, 1);
2153 * damon_moving_sum() - Calculate an inferred moving sum value.
2154 * @mvsum: Inferred sum of the last @len_window values.
2155 * @nomvsum: Non-moving sum of the last discrete @len_window window values.
2156 * @len_window: The number of last values to take care of.
2157 * @new_value: New value that will be added to the pseudo moving sum.
2159 * Moving sum (moving average * window size) is good for handling noise, but
2160 * the cost of keeping past values can be high for arbitrary window size. This
2161 * function implements a lightweight pseudo moving sum function that doesn't
2162 * keep the past window values.
2164 * It simply assumes there was no noise in the past, and get the no-noise
2165 * assumed past value to drop from @nomvsum and @len_window. @nomvsum is a
2166 * non-moving sum of the last window. For example, if @len_window is 10 and we
2167 * have 25 values, @nomvsum is the sum of the 11th to 20th values of the 25
2168 * values. Hence, this function simply drops @nomvsum / @len_window from
2169 * given @mvsum and add @new_value.
2171 * For example, if @len_window is 10 and @nomvsum is 50, the last 10 values for
2172 * the last window could be vary, e.g., 0, 10, 0, 10, 0, 10, 0, 0, 0, 20. For
2173 * calculating next moving sum with a new value, we should drop 0 from 50 and
2174 * add the new value. However, this function assumes it got value 5 for each
2175 * of the last ten times. Based on the assumption, when the next value is
2176 * measured, it drops the assumed past value, 5 from the current sum, and add
2177 * the new value to get the updated pseduo-moving average.
2179 * This means the value could have errors, but the errors will be disappeared
2180 * for every @len_window aligned calls. For example, if @len_window is 10, the
2181 * pseudo moving sum with 11th value to 19th value would have an error. But
2182 * the sum with 20th value will not have the error.
2184 * Return: Pseudo-moving average after getting the @new_value.
2186 static unsigned int damon_moving_sum(unsigned int mvsum
, unsigned int nomvsum
,
2187 unsigned int len_window
, unsigned int new_value
)
2189 return mvsum
- nomvsum
/ len_window
+ new_value
;
2193 * damon_update_region_access_rate() - Update the access rate of a region.
2194 * @r: The DAMON region to update for its access check result.
2195 * @accessed: Whether the region has accessed during last sampling interval.
2196 * @attrs: The damon_attrs of the DAMON context.
2198 * Update the access rate of a region with the region's last sampling interval
2199 * access check result.
2201 * Usually this will be called by &damon_operations->check_accesses callback.
2203 void damon_update_region_access_rate(struct damon_region
*r
, bool accessed
,
2204 struct damon_attrs
*attrs
)
2206 unsigned int len_window
= 1;
2209 * sample_interval can be zero, but cannot be larger than
2210 * aggr_interval, owing to validation of damon_set_attrs().
2212 if (attrs
->sample_interval
)
2213 len_window
= damon_max_nr_accesses(attrs
);
2214 r
->nr_accesses_bp
= damon_moving_sum(r
->nr_accesses_bp
,
2215 r
->last_nr_accesses
* 10000, len_window
,
2216 accessed
? 10000 : 0);
2222 static int __init
damon_init(void)
2224 damon_region_cache
= KMEM_CACHE(damon_region
, 0);
2225 if (unlikely(!damon_region_cache
)) {
2226 pr_err("creating damon_region_cache fails\n");
2233 subsys_initcall(damon_init
);
2235 #include "tests/core-kunit.h"