Merge tag 'clk-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-stable.git] / lib / alloc_tag.c
blob35f7560a309a4737d3efeab3effe72271e8aff54
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/alloc_tag.h>
3 #include <linux/execmem.h>
4 #include <linux/fs.h>
5 #include <linux/gfp.h>
6 #include <linux/kallsyms.h>
7 #include <linux/module.h>
8 #include <linux/page_ext.h>
9 #include <linux/proc_fs.h>
10 #include <linux/seq_buf.h>
11 #include <linux/seq_file.h>
12 #include <linux/vmalloc.h>
14 #define ALLOCINFO_FILE_NAME "allocinfo"
15 #define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag))
16 #define SECTION_START(NAME) (CODETAG_SECTION_START_PREFIX NAME)
17 #define SECTION_STOP(NAME) (CODETAG_SECTION_STOP_PREFIX NAME)
19 #ifdef CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
20 static bool mem_profiling_support = true;
21 #else
22 static bool mem_profiling_support;
23 #endif
25 static struct codetag_type *alloc_tag_cttype;
27 DEFINE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag);
28 EXPORT_SYMBOL(_shared_alloc_tag);
30 DEFINE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
31 mem_alloc_profiling_key);
32 DEFINE_STATIC_KEY_FALSE(mem_profiling_compressed);
34 struct alloc_tag_kernel_section kernel_tags = { NULL, 0 };
35 unsigned long alloc_tag_ref_mask;
36 int alloc_tag_ref_offs;
38 struct allocinfo_private {
39 struct codetag_iterator iter;
40 bool print_header;
43 static void *allocinfo_start(struct seq_file *m, loff_t *pos)
45 struct allocinfo_private *priv;
46 struct codetag *ct;
47 loff_t node = *pos;
49 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
50 m->private = priv;
51 if (!priv)
52 return NULL;
54 priv->print_header = (node == 0);
55 codetag_lock_module_list(alloc_tag_cttype, true);
56 priv->iter = codetag_get_ct_iter(alloc_tag_cttype);
57 while ((ct = codetag_next_ct(&priv->iter)) != NULL && node)
58 node--;
60 return ct ? priv : NULL;
63 static void *allocinfo_next(struct seq_file *m, void *arg, loff_t *pos)
65 struct allocinfo_private *priv = (struct allocinfo_private *)arg;
66 struct codetag *ct = codetag_next_ct(&priv->iter);
68 (*pos)++;
69 if (!ct)
70 return NULL;
72 return priv;
75 static void allocinfo_stop(struct seq_file *m, void *arg)
77 struct allocinfo_private *priv = (struct allocinfo_private *)m->private;
79 if (priv) {
80 codetag_lock_module_list(alloc_tag_cttype, false);
81 kfree(priv);
85 static void print_allocinfo_header(struct seq_buf *buf)
87 /* Output format version, so we can change it. */
88 seq_buf_printf(buf, "allocinfo - version: 1.0\n");
89 seq_buf_printf(buf, "# <size> <calls> <tag info>\n");
92 static void alloc_tag_to_text(struct seq_buf *out, struct codetag *ct)
94 struct alloc_tag *tag = ct_to_alloc_tag(ct);
95 struct alloc_tag_counters counter = alloc_tag_read(tag);
96 s64 bytes = counter.bytes;
98 seq_buf_printf(out, "%12lli %8llu ", bytes, counter.calls);
99 codetag_to_text(out, ct);
100 seq_buf_putc(out, ' ');
101 seq_buf_putc(out, '\n');
104 static int allocinfo_show(struct seq_file *m, void *arg)
106 struct allocinfo_private *priv = (struct allocinfo_private *)arg;
107 char *bufp;
108 size_t n = seq_get_buf(m, &bufp);
109 struct seq_buf buf;
111 seq_buf_init(&buf, bufp, n);
112 if (priv->print_header) {
113 print_allocinfo_header(&buf);
114 priv->print_header = false;
116 alloc_tag_to_text(&buf, priv->iter.ct);
117 seq_commit(m, seq_buf_used(&buf));
118 return 0;
121 static const struct seq_operations allocinfo_seq_op = {
122 .start = allocinfo_start,
123 .next = allocinfo_next,
124 .stop = allocinfo_stop,
125 .show = allocinfo_show,
128 size_t alloc_tag_top_users(struct codetag_bytes *tags, size_t count, bool can_sleep)
130 struct codetag_iterator iter;
131 struct codetag *ct;
132 struct codetag_bytes n;
133 unsigned int i, nr = 0;
135 if (can_sleep)
136 codetag_lock_module_list(alloc_tag_cttype, true);
137 else if (!codetag_trylock_module_list(alloc_tag_cttype))
138 return 0;
140 iter = codetag_get_ct_iter(alloc_tag_cttype);
141 while ((ct = codetag_next_ct(&iter))) {
142 struct alloc_tag_counters counter = alloc_tag_read(ct_to_alloc_tag(ct));
144 n.ct = ct;
145 n.bytes = counter.bytes;
147 for (i = 0; i < nr; i++)
148 if (n.bytes > tags[i].bytes)
149 break;
151 if (i < count) {
152 nr -= nr == count;
153 memmove(&tags[i + 1],
154 &tags[i],
155 sizeof(tags[0]) * (nr - i));
156 nr++;
157 tags[i] = n;
161 codetag_lock_module_list(alloc_tag_cttype, false);
163 return nr;
166 void pgalloc_tag_split(struct folio *folio, int old_order, int new_order)
168 int i;
169 struct alloc_tag *tag;
170 unsigned int nr_pages = 1 << new_order;
172 if (!mem_alloc_profiling_enabled())
173 return;
175 tag = pgalloc_tag_get(&folio->page);
176 if (!tag)
177 return;
179 for (i = nr_pages; i < (1 << old_order); i += nr_pages) {
180 union pgtag_ref_handle handle;
181 union codetag_ref ref;
183 if (get_page_tag_ref(folio_page(folio, i), &ref, &handle)) {
184 /* Set new reference to point to the original tag */
185 alloc_tag_ref_set(&ref, tag);
186 update_page_tag_ref(handle, &ref);
187 put_page_tag_ref(handle);
192 void pgalloc_tag_swap(struct folio *new, struct folio *old)
194 union pgtag_ref_handle handle_old, handle_new;
195 union codetag_ref ref_old, ref_new;
196 struct alloc_tag *tag_old, *tag_new;
198 tag_old = pgalloc_tag_get(&old->page);
199 if (!tag_old)
200 return;
201 tag_new = pgalloc_tag_get(&new->page);
202 if (!tag_new)
203 return;
205 if (!get_page_tag_ref(&old->page, &ref_old, &handle_old))
206 return;
207 if (!get_page_tag_ref(&new->page, &ref_new, &handle_new)) {
208 put_page_tag_ref(handle_old);
209 return;
212 /* swap tags */
213 __alloc_tag_ref_set(&ref_old, tag_new);
214 update_page_tag_ref(handle_old, &ref_old);
215 __alloc_tag_ref_set(&ref_new, tag_old);
216 update_page_tag_ref(handle_new, &ref_new);
218 put_page_tag_ref(handle_old);
219 put_page_tag_ref(handle_new);
222 static void shutdown_mem_profiling(bool remove_file)
224 if (mem_alloc_profiling_enabled())
225 static_branch_disable(&mem_alloc_profiling_key);
227 if (!mem_profiling_support)
228 return;
230 if (remove_file)
231 remove_proc_entry(ALLOCINFO_FILE_NAME, NULL);
232 mem_profiling_support = false;
235 static void __init procfs_init(void)
237 if (!mem_profiling_support)
238 return;
240 if (!proc_create_seq(ALLOCINFO_FILE_NAME, 0400, NULL, &allocinfo_seq_op)) {
241 pr_err("Failed to create %s file\n", ALLOCINFO_FILE_NAME);
242 shutdown_mem_profiling(false);
246 void __init alloc_tag_sec_init(void)
248 struct alloc_tag *last_codetag;
250 if (!mem_profiling_support)
251 return;
253 if (!static_key_enabled(&mem_profiling_compressed))
254 return;
256 kernel_tags.first_tag = (struct alloc_tag *)kallsyms_lookup_name(
257 SECTION_START(ALLOC_TAG_SECTION_NAME));
258 last_codetag = (struct alloc_tag *)kallsyms_lookup_name(
259 SECTION_STOP(ALLOC_TAG_SECTION_NAME));
260 kernel_tags.count = last_codetag - kernel_tags.first_tag;
262 /* Check if kernel tags fit into page flags */
263 if (kernel_tags.count > (1UL << NR_UNUSED_PAGEFLAG_BITS)) {
264 shutdown_mem_profiling(false); /* allocinfo file does not exist yet */
265 pr_err("%lu allocation tags cannot be references using %d available page flag bits. Memory allocation profiling is disabled!\n",
266 kernel_tags.count, NR_UNUSED_PAGEFLAG_BITS);
267 return;
270 alloc_tag_ref_offs = (LRU_REFS_PGOFF - NR_UNUSED_PAGEFLAG_BITS);
271 alloc_tag_ref_mask = ((1UL << NR_UNUSED_PAGEFLAG_BITS) - 1);
272 pr_debug("Memory allocation profiling compression is using %d page flag bits!\n",
273 NR_UNUSED_PAGEFLAG_BITS);
276 #ifdef CONFIG_MODULES
278 static struct maple_tree mod_area_mt = MTREE_INIT(mod_area_mt, MT_FLAGS_ALLOC_RANGE);
279 static struct vm_struct *vm_module_tags;
280 /* A dummy object used to indicate an unloaded module */
281 static struct module unloaded_mod;
282 /* A dummy object used to indicate a module prepended area */
283 static struct module prepend_mod;
285 struct alloc_tag_module_section module_tags;
287 static inline unsigned long alloc_tag_align(unsigned long val)
289 if (!static_key_enabled(&mem_profiling_compressed)) {
290 /* No alignment requirements when we are not indexing the tags */
291 return val;
294 if (val % sizeof(struct alloc_tag) == 0)
295 return val;
296 return ((val / sizeof(struct alloc_tag)) + 1) * sizeof(struct alloc_tag);
299 static bool ensure_alignment(unsigned long align, unsigned int *prepend)
301 if (!static_key_enabled(&mem_profiling_compressed)) {
302 /* No alignment requirements when we are not indexing the tags */
303 return true;
307 * If alloc_tag size is not a multiple of required alignment, tag
308 * indexing does not work.
310 if (!IS_ALIGNED(sizeof(struct alloc_tag), align))
311 return false;
313 /* Ensure prepend consumes multiple of alloc_tag-sized blocks */
314 if (*prepend)
315 *prepend = alloc_tag_align(*prepend);
317 return true;
320 static inline bool tags_addressable(void)
322 unsigned long tag_idx_count;
324 if (!static_key_enabled(&mem_profiling_compressed))
325 return true; /* with page_ext tags are always addressable */
327 tag_idx_count = CODETAG_ID_FIRST + kernel_tags.count +
328 module_tags.size / sizeof(struct alloc_tag);
330 return tag_idx_count < (1UL << NR_UNUSED_PAGEFLAG_BITS);
333 static bool needs_section_mem(struct module *mod, unsigned long size)
335 if (!mem_profiling_support)
336 return false;
338 return size >= sizeof(struct alloc_tag);
341 static struct alloc_tag *find_used_tag(struct alloc_tag *from, struct alloc_tag *to)
343 while (from <= to) {
344 struct alloc_tag_counters counter;
346 counter = alloc_tag_read(from);
347 if (counter.bytes)
348 return from;
349 from++;
352 return NULL;
355 /* Called with mod_area_mt locked */
356 static void clean_unused_module_areas_locked(void)
358 MA_STATE(mas, &mod_area_mt, 0, module_tags.size);
359 struct module *val;
361 mas_for_each(&mas, val, module_tags.size) {
362 if (val != &unloaded_mod)
363 continue;
365 /* Release area if all tags are unused */
366 if (!find_used_tag((struct alloc_tag *)(module_tags.start_addr + mas.index),
367 (struct alloc_tag *)(module_tags.start_addr + mas.last)))
368 mas_erase(&mas);
372 /* Called with mod_area_mt locked */
373 static bool find_aligned_area(struct ma_state *mas, unsigned long section_size,
374 unsigned long size, unsigned int prepend, unsigned long align)
376 bool cleanup_done = false;
378 repeat:
379 /* Try finding exact size and hope the start is aligned */
380 if (!mas_empty_area(mas, 0, section_size - 1, prepend + size)) {
381 if (IS_ALIGNED(mas->index + prepend, align))
382 return true;
384 /* Try finding larger area to align later */
385 mas_reset(mas);
386 if (!mas_empty_area(mas, 0, section_size - 1,
387 size + prepend + align - 1))
388 return true;
391 /* No free area, try cleanup stale data and repeat the search once */
392 if (!cleanup_done) {
393 clean_unused_module_areas_locked();
394 cleanup_done = true;
395 mas_reset(mas);
396 goto repeat;
399 return false;
402 static int vm_module_tags_populate(void)
404 unsigned long phys_size = vm_module_tags->nr_pages << PAGE_SHIFT;
406 if (phys_size < module_tags.size) {
407 struct page **next_page = vm_module_tags->pages + vm_module_tags->nr_pages;
408 unsigned long addr = module_tags.start_addr + phys_size;
409 unsigned long more_pages;
410 unsigned long nr;
412 more_pages = ALIGN(module_tags.size - phys_size, PAGE_SIZE) >> PAGE_SHIFT;
413 nr = alloc_pages_bulk_array_node(GFP_KERNEL | __GFP_NOWARN,
414 NUMA_NO_NODE, more_pages, next_page);
415 if (nr < more_pages ||
416 vmap_pages_range(addr, addr + (nr << PAGE_SHIFT), PAGE_KERNEL,
417 next_page, PAGE_SHIFT) < 0) {
418 /* Clean up and error out */
419 for (int i = 0; i < nr; i++)
420 __free_page(next_page[i]);
421 return -ENOMEM;
423 vm_module_tags->nr_pages += nr;
426 return 0;
429 static void *reserve_module_tags(struct module *mod, unsigned long size,
430 unsigned int prepend, unsigned long align)
432 unsigned long section_size = module_tags.end_addr - module_tags.start_addr;
433 MA_STATE(mas, &mod_area_mt, 0, section_size - 1);
434 unsigned long offset;
435 void *ret = NULL;
437 /* If no tags return error */
438 if (size < sizeof(struct alloc_tag))
439 return ERR_PTR(-EINVAL);
442 * align is always power of 2, so we can use IS_ALIGNED and ALIGN.
443 * align 0 or 1 means no alignment, to simplify set to 1.
445 if (!align)
446 align = 1;
448 if (!ensure_alignment(align, &prepend)) {
449 shutdown_mem_profiling(true);
450 pr_err("%s: alignment %lu is incompatible with allocation tag indexing. Memory allocation profiling is disabled!\n",
451 mod->name, align);
452 return ERR_PTR(-EINVAL);
455 mas_lock(&mas);
456 if (!find_aligned_area(&mas, section_size, size, prepend, align)) {
457 ret = ERR_PTR(-ENOMEM);
458 goto unlock;
461 /* Mark found area as reserved */
462 offset = mas.index;
463 offset += prepend;
464 offset = ALIGN(offset, align);
465 if (offset != mas.index) {
466 unsigned long pad_start = mas.index;
468 mas.last = offset - 1;
469 mas_store(&mas, &prepend_mod);
470 if (mas_is_err(&mas)) {
471 ret = ERR_PTR(xa_err(mas.node));
472 goto unlock;
474 mas.index = offset;
475 mas.last = offset + size - 1;
476 mas_store(&mas, mod);
477 if (mas_is_err(&mas)) {
478 mas.index = pad_start;
479 mas_erase(&mas);
480 ret = ERR_PTR(xa_err(mas.node));
482 } else {
483 mas.last = offset + size - 1;
484 mas_store(&mas, mod);
485 if (mas_is_err(&mas))
486 ret = ERR_PTR(xa_err(mas.node));
488 unlock:
489 mas_unlock(&mas);
491 if (IS_ERR(ret))
492 return ret;
494 if (module_tags.size < offset + size) {
495 int grow_res;
497 module_tags.size = offset + size;
498 if (mem_alloc_profiling_enabled() && !tags_addressable()) {
499 shutdown_mem_profiling(true);
500 pr_warn("With module %s there are too many tags to fit in %d page flag bits. Memory allocation profiling is disabled!\n",
501 mod->name, NR_UNUSED_PAGEFLAG_BITS);
504 grow_res = vm_module_tags_populate();
505 if (grow_res) {
506 shutdown_mem_profiling(true);
507 pr_err("Failed to allocate memory for allocation tags in the module %s. Memory allocation profiling is disabled!\n",
508 mod->name);
509 return ERR_PTR(grow_res);
513 return (struct alloc_tag *)(module_tags.start_addr + offset);
516 static void release_module_tags(struct module *mod, bool used)
518 MA_STATE(mas, &mod_area_mt, module_tags.size, module_tags.size);
519 struct alloc_tag *tag;
520 struct module *val;
522 mas_lock(&mas);
523 mas_for_each_rev(&mas, val, 0)
524 if (val == mod)
525 break;
527 if (!val) /* module not found */
528 goto out;
530 if (!used)
531 goto release_area;
533 /* Find out if the area is used */
534 tag = find_used_tag((struct alloc_tag *)(module_tags.start_addr + mas.index),
535 (struct alloc_tag *)(module_tags.start_addr + mas.last));
536 if (tag) {
537 struct alloc_tag_counters counter = alloc_tag_read(tag);
539 pr_info("%s:%u module %s func:%s has %llu allocated at module unload\n",
540 tag->ct.filename, tag->ct.lineno, tag->ct.modname,
541 tag->ct.function, counter.bytes);
542 } else {
543 used = false;
545 release_area:
546 mas_store(&mas, used ? &unloaded_mod : NULL);
547 val = mas_prev_range(&mas, 0);
548 if (val == &prepend_mod)
549 mas_store(&mas, NULL);
550 out:
551 mas_unlock(&mas);
554 static void replace_module(struct module *mod, struct module *new_mod)
556 MA_STATE(mas, &mod_area_mt, 0, module_tags.size);
557 struct module *val;
559 mas_lock(&mas);
560 mas_for_each(&mas, val, module_tags.size) {
561 if (val != mod)
562 continue;
564 mas_store_gfp(&mas, new_mod, GFP_KERNEL);
565 break;
567 mas_unlock(&mas);
570 static int __init alloc_mod_tags_mem(void)
572 /* Map space to copy allocation tags */
573 vm_module_tags = execmem_vmap(MODULE_ALLOC_TAG_VMAP_SIZE);
574 if (!vm_module_tags) {
575 pr_err("Failed to map %lu bytes for module allocation tags\n",
576 MODULE_ALLOC_TAG_VMAP_SIZE);
577 module_tags.start_addr = 0;
578 return -ENOMEM;
581 vm_module_tags->pages = kmalloc_array(get_vm_area_size(vm_module_tags) >> PAGE_SHIFT,
582 sizeof(struct page *), GFP_KERNEL | __GFP_ZERO);
583 if (!vm_module_tags->pages) {
584 free_vm_area(vm_module_tags);
585 return -ENOMEM;
588 module_tags.start_addr = (unsigned long)vm_module_tags->addr;
589 module_tags.end_addr = module_tags.start_addr + MODULE_ALLOC_TAG_VMAP_SIZE;
590 /* Ensure the base is alloc_tag aligned when required for indexing */
591 module_tags.start_addr = alloc_tag_align(module_tags.start_addr);
593 return 0;
596 static void __init free_mod_tags_mem(void)
598 int i;
600 module_tags.start_addr = 0;
601 for (i = 0; i < vm_module_tags->nr_pages; i++)
602 __free_page(vm_module_tags->pages[i]);
603 kfree(vm_module_tags->pages);
604 free_vm_area(vm_module_tags);
607 #else /* CONFIG_MODULES */
609 static inline int alloc_mod_tags_mem(void) { return 0; }
610 static inline void free_mod_tags_mem(void) {}
612 #endif /* CONFIG_MODULES */
614 /* See: Documentation/mm/allocation-profiling.rst */
615 static int __init setup_early_mem_profiling(char *str)
617 bool compressed = false;
618 bool enable;
620 if (!str || !str[0])
621 return -EINVAL;
623 if (!strncmp(str, "never", 5)) {
624 enable = false;
625 mem_profiling_support = false;
626 pr_info("Memory allocation profiling is disabled!\n");
627 } else {
628 char *token = strsep(&str, ",");
630 if (kstrtobool(token, &enable))
631 return -EINVAL;
633 if (str) {
635 if (strcmp(str, "compressed"))
636 return -EINVAL;
638 compressed = true;
640 mem_profiling_support = true;
641 pr_info("Memory allocation profiling is enabled %s compression and is turned %s!\n",
642 compressed ? "with" : "without", enable ? "on" : "off");
645 if (enable != mem_alloc_profiling_enabled()) {
646 if (enable)
647 static_branch_enable(&mem_alloc_profiling_key);
648 else
649 static_branch_disable(&mem_alloc_profiling_key);
651 if (compressed != static_key_enabled(&mem_profiling_compressed)) {
652 if (compressed)
653 static_branch_enable(&mem_profiling_compressed);
654 else
655 static_branch_disable(&mem_profiling_compressed);
658 return 0;
660 early_param("sysctl.vm.mem_profiling", setup_early_mem_profiling);
662 static __init bool need_page_alloc_tagging(void)
664 if (static_key_enabled(&mem_profiling_compressed))
665 return false;
667 return mem_profiling_support;
670 static __init void init_page_alloc_tagging(void)
674 struct page_ext_operations page_alloc_tagging_ops = {
675 .size = sizeof(union codetag_ref),
676 .need = need_page_alloc_tagging,
677 .init = init_page_alloc_tagging,
679 EXPORT_SYMBOL(page_alloc_tagging_ops);
681 #ifdef CONFIG_SYSCTL
682 static struct ctl_table memory_allocation_profiling_sysctls[] = {
684 .procname = "mem_profiling",
685 .data = &mem_alloc_profiling_key,
686 #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
687 .mode = 0444,
688 #else
689 .mode = 0644,
690 #endif
691 .proc_handler = proc_do_static_key,
695 static void __init sysctl_init(void)
697 if (!mem_profiling_support)
698 memory_allocation_profiling_sysctls[0].mode = 0444;
700 register_sysctl_init("vm", memory_allocation_profiling_sysctls);
702 #else /* CONFIG_SYSCTL */
703 static inline void sysctl_init(void) {}
704 #endif /* CONFIG_SYSCTL */
706 static int __init alloc_tag_init(void)
708 const struct codetag_type_desc desc = {
709 .section = ALLOC_TAG_SECTION_NAME,
710 .tag_size = sizeof(struct alloc_tag),
711 #ifdef CONFIG_MODULES
712 .needs_section_mem = needs_section_mem,
713 .alloc_section_mem = reserve_module_tags,
714 .free_section_mem = release_module_tags,
715 .module_replaced = replace_module,
716 #endif
718 int res;
720 res = alloc_mod_tags_mem();
721 if (res)
722 return res;
724 alloc_tag_cttype = codetag_register_type(&desc);
725 if (IS_ERR(alloc_tag_cttype)) {
726 free_mod_tags_mem();
727 return PTR_ERR(alloc_tag_cttype);
730 sysctl_init();
731 procfs_init();
733 return 0;
735 module_init(alloc_tag_init);