1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/alloc_tag.h>
3 #include <linux/execmem.h>
6 #include <linux/kallsyms.h>
7 #include <linux/module.h>
8 #include <linux/page_ext.h>
9 #include <linux/proc_fs.h>
10 #include <linux/seq_buf.h>
11 #include <linux/seq_file.h>
12 #include <linux/vmalloc.h>
14 #define ALLOCINFO_FILE_NAME "allocinfo"
15 #define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag))
16 #define SECTION_START(NAME) (CODETAG_SECTION_START_PREFIX NAME)
17 #define SECTION_STOP(NAME) (CODETAG_SECTION_STOP_PREFIX NAME)
19 #ifdef CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
20 static bool mem_profiling_support
= true;
22 static bool mem_profiling_support
;
25 static struct codetag_type
*alloc_tag_cttype
;
27 DEFINE_PER_CPU(struct alloc_tag_counters
, _shared_alloc_tag
);
28 EXPORT_SYMBOL(_shared_alloc_tag
);
30 DEFINE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
,
31 mem_alloc_profiling_key
);
32 DEFINE_STATIC_KEY_FALSE(mem_profiling_compressed
);
34 struct alloc_tag_kernel_section kernel_tags
= { NULL
, 0 };
35 unsigned long alloc_tag_ref_mask
;
36 int alloc_tag_ref_offs
;
38 struct allocinfo_private
{
39 struct codetag_iterator iter
;
43 static void *allocinfo_start(struct seq_file
*m
, loff_t
*pos
)
45 struct allocinfo_private
*priv
;
49 priv
= kzalloc(sizeof(*priv
), GFP_KERNEL
);
54 priv
->print_header
= (node
== 0);
55 codetag_lock_module_list(alloc_tag_cttype
, true);
56 priv
->iter
= codetag_get_ct_iter(alloc_tag_cttype
);
57 while ((ct
= codetag_next_ct(&priv
->iter
)) != NULL
&& node
)
60 return ct
? priv
: NULL
;
63 static void *allocinfo_next(struct seq_file
*m
, void *arg
, loff_t
*pos
)
65 struct allocinfo_private
*priv
= (struct allocinfo_private
*)arg
;
66 struct codetag
*ct
= codetag_next_ct(&priv
->iter
);
75 static void allocinfo_stop(struct seq_file
*m
, void *arg
)
77 struct allocinfo_private
*priv
= (struct allocinfo_private
*)m
->private;
80 codetag_lock_module_list(alloc_tag_cttype
, false);
85 static void print_allocinfo_header(struct seq_buf
*buf
)
87 /* Output format version, so we can change it. */
88 seq_buf_printf(buf
, "allocinfo - version: 1.0\n");
89 seq_buf_printf(buf
, "# <size> <calls> <tag info>\n");
92 static void alloc_tag_to_text(struct seq_buf
*out
, struct codetag
*ct
)
94 struct alloc_tag
*tag
= ct_to_alloc_tag(ct
);
95 struct alloc_tag_counters counter
= alloc_tag_read(tag
);
96 s64 bytes
= counter
.bytes
;
98 seq_buf_printf(out
, "%12lli %8llu ", bytes
, counter
.calls
);
99 codetag_to_text(out
, ct
);
100 seq_buf_putc(out
, ' ');
101 seq_buf_putc(out
, '\n');
104 static int allocinfo_show(struct seq_file
*m
, void *arg
)
106 struct allocinfo_private
*priv
= (struct allocinfo_private
*)arg
;
108 size_t n
= seq_get_buf(m
, &bufp
);
111 seq_buf_init(&buf
, bufp
, n
);
112 if (priv
->print_header
) {
113 print_allocinfo_header(&buf
);
114 priv
->print_header
= false;
116 alloc_tag_to_text(&buf
, priv
->iter
.ct
);
117 seq_commit(m
, seq_buf_used(&buf
));
121 static const struct seq_operations allocinfo_seq_op
= {
122 .start
= allocinfo_start
,
123 .next
= allocinfo_next
,
124 .stop
= allocinfo_stop
,
125 .show
= allocinfo_show
,
128 size_t alloc_tag_top_users(struct codetag_bytes
*tags
, size_t count
, bool can_sleep
)
130 struct codetag_iterator iter
;
132 struct codetag_bytes n
;
133 unsigned int i
, nr
= 0;
136 codetag_lock_module_list(alloc_tag_cttype
, true);
137 else if (!codetag_trylock_module_list(alloc_tag_cttype
))
140 iter
= codetag_get_ct_iter(alloc_tag_cttype
);
141 while ((ct
= codetag_next_ct(&iter
))) {
142 struct alloc_tag_counters counter
= alloc_tag_read(ct_to_alloc_tag(ct
));
145 n
.bytes
= counter
.bytes
;
147 for (i
= 0; i
< nr
; i
++)
148 if (n
.bytes
> tags
[i
].bytes
)
153 memmove(&tags
[i
+ 1],
155 sizeof(tags
[0]) * (nr
- i
));
161 codetag_lock_module_list(alloc_tag_cttype
, false);
166 void pgalloc_tag_split(struct folio
*folio
, int old_order
, int new_order
)
169 struct alloc_tag
*tag
;
170 unsigned int nr_pages
= 1 << new_order
;
172 if (!mem_alloc_profiling_enabled())
175 tag
= pgalloc_tag_get(&folio
->page
);
179 for (i
= nr_pages
; i
< (1 << old_order
); i
+= nr_pages
) {
180 union pgtag_ref_handle handle
;
181 union codetag_ref ref
;
183 if (get_page_tag_ref(folio_page(folio
, i
), &ref
, &handle
)) {
184 /* Set new reference to point to the original tag */
185 alloc_tag_ref_set(&ref
, tag
);
186 update_page_tag_ref(handle
, &ref
);
187 put_page_tag_ref(handle
);
192 void pgalloc_tag_copy(struct folio
*new, struct folio
*old
)
194 union pgtag_ref_handle handle
;
195 union codetag_ref ref
;
196 struct alloc_tag
*tag
;
198 tag
= pgalloc_tag_get(&old
->page
);
202 if (!get_page_tag_ref(&new->page
, &ref
, &handle
))
205 /* Clear the old ref to the original allocation tag. */
206 clear_page_tag_ref(&old
->page
);
207 /* Decrement the counters of the tag on get_new_folio. */
208 alloc_tag_sub(&ref
, folio_size(new));
209 __alloc_tag_ref_set(&ref
, tag
);
210 update_page_tag_ref(handle
, &ref
);
211 put_page_tag_ref(handle
);
214 static void shutdown_mem_profiling(bool remove_file
)
216 if (mem_alloc_profiling_enabled())
217 static_branch_disable(&mem_alloc_profiling_key
);
219 if (!mem_profiling_support
)
223 remove_proc_entry(ALLOCINFO_FILE_NAME
, NULL
);
224 mem_profiling_support
= false;
227 static void __init
procfs_init(void)
229 if (!mem_profiling_support
)
232 if (!proc_create_seq(ALLOCINFO_FILE_NAME
, 0400, NULL
, &allocinfo_seq_op
)) {
233 pr_err("Failed to create %s file\n", ALLOCINFO_FILE_NAME
);
234 shutdown_mem_profiling(false);
238 void __init
alloc_tag_sec_init(void)
240 struct alloc_tag
*last_codetag
;
242 if (!mem_profiling_support
)
245 if (!static_key_enabled(&mem_profiling_compressed
))
248 kernel_tags
.first_tag
= (struct alloc_tag
*)kallsyms_lookup_name(
249 SECTION_START(ALLOC_TAG_SECTION_NAME
));
250 last_codetag
= (struct alloc_tag
*)kallsyms_lookup_name(
251 SECTION_STOP(ALLOC_TAG_SECTION_NAME
));
252 kernel_tags
.count
= last_codetag
- kernel_tags
.first_tag
;
254 /* Check if kernel tags fit into page flags */
255 if (kernel_tags
.count
> (1UL << NR_UNUSED_PAGEFLAG_BITS
)) {
256 shutdown_mem_profiling(false); /* allocinfo file does not exist yet */
257 pr_err("%lu allocation tags cannot be references using %d available page flag bits. Memory allocation profiling is disabled!\n",
258 kernel_tags
.count
, NR_UNUSED_PAGEFLAG_BITS
);
262 alloc_tag_ref_offs
= (LRU_REFS_PGOFF
- NR_UNUSED_PAGEFLAG_BITS
);
263 alloc_tag_ref_mask
= ((1UL << NR_UNUSED_PAGEFLAG_BITS
) - 1);
264 pr_debug("Memory allocation profiling compression is using %d page flag bits!\n",
265 NR_UNUSED_PAGEFLAG_BITS
);
268 #ifdef CONFIG_MODULES
270 static struct maple_tree mod_area_mt
= MTREE_INIT(mod_area_mt
, MT_FLAGS_ALLOC_RANGE
);
271 static struct vm_struct
*vm_module_tags
;
272 /* A dummy object used to indicate an unloaded module */
273 static struct module unloaded_mod
;
274 /* A dummy object used to indicate a module prepended area */
275 static struct module prepend_mod
;
277 struct alloc_tag_module_section module_tags
;
279 static inline unsigned long alloc_tag_align(unsigned long val
)
281 if (!static_key_enabled(&mem_profiling_compressed
)) {
282 /* No alignment requirements when we are not indexing the tags */
286 if (val
% sizeof(struct alloc_tag
) == 0)
288 return ((val
/ sizeof(struct alloc_tag
)) + 1) * sizeof(struct alloc_tag
);
291 static bool ensure_alignment(unsigned long align
, unsigned int *prepend
)
293 if (!static_key_enabled(&mem_profiling_compressed
)) {
294 /* No alignment requirements when we are not indexing the tags */
299 * If alloc_tag size is not a multiple of required alignment, tag
300 * indexing does not work.
302 if (!IS_ALIGNED(sizeof(struct alloc_tag
), align
))
305 /* Ensure prepend consumes multiple of alloc_tag-sized blocks */
307 *prepend
= alloc_tag_align(*prepend
);
312 static inline bool tags_addressable(void)
314 unsigned long tag_idx_count
;
316 if (!static_key_enabled(&mem_profiling_compressed
))
317 return true; /* with page_ext tags are always addressable */
319 tag_idx_count
= CODETAG_ID_FIRST
+ kernel_tags
.count
+
320 module_tags
.size
/ sizeof(struct alloc_tag
);
322 return tag_idx_count
< (1UL << NR_UNUSED_PAGEFLAG_BITS
);
325 static bool needs_section_mem(struct module
*mod
, unsigned long size
)
327 if (!mem_profiling_support
)
330 return size
>= sizeof(struct alloc_tag
);
333 static struct alloc_tag
*find_used_tag(struct alloc_tag
*from
, struct alloc_tag
*to
)
336 struct alloc_tag_counters counter
;
338 counter
= alloc_tag_read(from
);
347 /* Called with mod_area_mt locked */
348 static void clean_unused_module_areas_locked(void)
350 MA_STATE(mas
, &mod_area_mt
, 0, module_tags
.size
);
353 mas_for_each(&mas
, val
, module_tags
.size
) {
354 if (val
!= &unloaded_mod
)
357 /* Release area if all tags are unused */
358 if (!find_used_tag((struct alloc_tag
*)(module_tags
.start_addr
+ mas
.index
),
359 (struct alloc_tag
*)(module_tags
.start_addr
+ mas
.last
)))
364 /* Called with mod_area_mt locked */
365 static bool find_aligned_area(struct ma_state
*mas
, unsigned long section_size
,
366 unsigned long size
, unsigned int prepend
, unsigned long align
)
368 bool cleanup_done
= false;
371 /* Try finding exact size and hope the start is aligned */
372 if (!mas_empty_area(mas
, 0, section_size
- 1, prepend
+ size
)) {
373 if (IS_ALIGNED(mas
->index
+ prepend
, align
))
376 /* Try finding larger area to align later */
378 if (!mas_empty_area(mas
, 0, section_size
- 1,
379 size
+ prepend
+ align
- 1))
383 /* No free area, try cleanup stale data and repeat the search once */
385 clean_unused_module_areas_locked();
394 static int vm_module_tags_populate(void)
396 unsigned long phys_size
= vm_module_tags
->nr_pages
<< PAGE_SHIFT
;
398 if (phys_size
< module_tags
.size
) {
399 struct page
**next_page
= vm_module_tags
->pages
+ vm_module_tags
->nr_pages
;
400 unsigned long addr
= module_tags
.start_addr
+ phys_size
;
401 unsigned long more_pages
;
404 more_pages
= ALIGN(module_tags
.size
- phys_size
, PAGE_SIZE
) >> PAGE_SHIFT
;
405 nr
= alloc_pages_bulk_array_node(GFP_KERNEL
| __GFP_NOWARN
,
406 NUMA_NO_NODE
, more_pages
, next_page
);
407 if (nr
< more_pages
||
408 vmap_pages_range(addr
, addr
+ (nr
<< PAGE_SHIFT
), PAGE_KERNEL
,
409 next_page
, PAGE_SHIFT
) < 0) {
410 /* Clean up and error out */
411 for (int i
= 0; i
< nr
; i
++)
412 __free_page(next_page
[i
]);
415 vm_module_tags
->nr_pages
+= nr
;
421 static void *reserve_module_tags(struct module
*mod
, unsigned long size
,
422 unsigned int prepend
, unsigned long align
)
424 unsigned long section_size
= module_tags
.end_addr
- module_tags
.start_addr
;
425 MA_STATE(mas
, &mod_area_mt
, 0, section_size
- 1);
426 unsigned long offset
;
429 /* If no tags return error */
430 if (size
< sizeof(struct alloc_tag
))
431 return ERR_PTR(-EINVAL
);
434 * align is always power of 2, so we can use IS_ALIGNED and ALIGN.
435 * align 0 or 1 means no alignment, to simplify set to 1.
440 if (!ensure_alignment(align
, &prepend
)) {
441 shutdown_mem_profiling(true);
442 pr_err("%s: alignment %lu is incompatible with allocation tag indexing. Memory allocation profiling is disabled!\n",
444 return ERR_PTR(-EINVAL
);
448 if (!find_aligned_area(&mas
, section_size
, size
, prepend
, align
)) {
449 ret
= ERR_PTR(-ENOMEM
);
453 /* Mark found area as reserved */
456 offset
= ALIGN(offset
, align
);
457 if (offset
!= mas
.index
) {
458 unsigned long pad_start
= mas
.index
;
460 mas
.last
= offset
- 1;
461 mas_store(&mas
, &prepend_mod
);
462 if (mas_is_err(&mas
)) {
463 ret
= ERR_PTR(xa_err(mas
.node
));
467 mas
.last
= offset
+ size
- 1;
468 mas_store(&mas
, mod
);
469 if (mas_is_err(&mas
)) {
470 mas
.index
= pad_start
;
472 ret
= ERR_PTR(xa_err(mas
.node
));
475 mas
.last
= offset
+ size
- 1;
476 mas_store(&mas
, mod
);
477 if (mas_is_err(&mas
))
478 ret
= ERR_PTR(xa_err(mas
.node
));
486 if (module_tags
.size
< offset
+ size
) {
489 module_tags
.size
= offset
+ size
;
490 if (mem_alloc_profiling_enabled() && !tags_addressable()) {
491 shutdown_mem_profiling(true);
492 pr_warn("With module %s there are too many tags to fit in %d page flag bits. Memory allocation profiling is disabled!\n",
493 mod
->name
, NR_UNUSED_PAGEFLAG_BITS
);
496 grow_res
= vm_module_tags_populate();
498 shutdown_mem_profiling(true);
499 pr_err("Failed to allocate memory for allocation tags in the module %s. Memory allocation profiling is disabled!\n",
501 return ERR_PTR(grow_res
);
505 return (struct alloc_tag
*)(module_tags
.start_addr
+ offset
);
508 static void release_module_tags(struct module
*mod
, bool used
)
510 MA_STATE(mas
, &mod_area_mt
, module_tags
.size
, module_tags
.size
);
511 struct alloc_tag
*tag
;
515 mas_for_each_rev(&mas
, val
, 0)
519 if (!val
) /* module not found */
525 /* Find out if the area is used */
526 tag
= find_used_tag((struct alloc_tag
*)(module_tags
.start_addr
+ mas
.index
),
527 (struct alloc_tag
*)(module_tags
.start_addr
+ mas
.last
));
529 struct alloc_tag_counters counter
= alloc_tag_read(tag
);
531 pr_info("%s:%u module %s func:%s has %llu allocated at module unload\n",
532 tag
->ct
.filename
, tag
->ct
.lineno
, tag
->ct
.modname
,
533 tag
->ct
.function
, counter
.bytes
);
538 mas_store(&mas
, used
? &unloaded_mod
: NULL
);
539 val
= mas_prev_range(&mas
, 0);
540 if (val
== &prepend_mod
)
541 mas_store(&mas
, NULL
);
546 static void replace_module(struct module
*mod
, struct module
*new_mod
)
548 MA_STATE(mas
, &mod_area_mt
, 0, module_tags
.size
);
552 mas_for_each(&mas
, val
, module_tags
.size
) {
556 mas_store_gfp(&mas
, new_mod
, GFP_KERNEL
);
562 static int __init
alloc_mod_tags_mem(void)
564 /* Map space to copy allocation tags */
565 vm_module_tags
= execmem_vmap(MODULE_ALLOC_TAG_VMAP_SIZE
);
566 if (!vm_module_tags
) {
567 pr_err("Failed to map %lu bytes for module allocation tags\n",
568 MODULE_ALLOC_TAG_VMAP_SIZE
);
569 module_tags
.start_addr
= 0;
573 vm_module_tags
->pages
= kmalloc_array(get_vm_area_size(vm_module_tags
) >> PAGE_SHIFT
,
574 sizeof(struct page
*), GFP_KERNEL
| __GFP_ZERO
);
575 if (!vm_module_tags
->pages
) {
576 free_vm_area(vm_module_tags
);
580 module_tags
.start_addr
= (unsigned long)vm_module_tags
->addr
;
581 module_tags
.end_addr
= module_tags
.start_addr
+ MODULE_ALLOC_TAG_VMAP_SIZE
;
582 /* Ensure the base is alloc_tag aligned when required for indexing */
583 module_tags
.start_addr
= alloc_tag_align(module_tags
.start_addr
);
588 static void __init
free_mod_tags_mem(void)
592 module_tags
.start_addr
= 0;
593 for (i
= 0; i
< vm_module_tags
->nr_pages
; i
++)
594 __free_page(vm_module_tags
->pages
[i
]);
595 kfree(vm_module_tags
->pages
);
596 free_vm_area(vm_module_tags
);
599 #else /* CONFIG_MODULES */
601 static inline int alloc_mod_tags_mem(void) { return 0; }
602 static inline void free_mod_tags_mem(void) {}
604 #endif /* CONFIG_MODULES */
606 /* See: Documentation/mm/allocation-profiling.rst */
607 static int __init
setup_early_mem_profiling(char *str
)
609 bool compressed
= false;
615 if (!strncmp(str
, "never", 5)) {
617 mem_profiling_support
= false;
618 pr_info("Memory allocation profiling is disabled!\n");
620 char *token
= strsep(&str
, ",");
622 if (kstrtobool(token
, &enable
))
627 if (strcmp(str
, "compressed"))
632 mem_profiling_support
= true;
633 pr_info("Memory allocation profiling is enabled %s compression and is turned %s!\n",
634 compressed
? "with" : "without", enable
? "on" : "off");
637 if (enable
!= mem_alloc_profiling_enabled()) {
639 static_branch_enable(&mem_alloc_profiling_key
);
641 static_branch_disable(&mem_alloc_profiling_key
);
643 if (compressed
!= static_key_enabled(&mem_profiling_compressed
)) {
645 static_branch_enable(&mem_profiling_compressed
);
647 static_branch_disable(&mem_profiling_compressed
);
652 early_param("sysctl.vm.mem_profiling", setup_early_mem_profiling
);
654 static __init
bool need_page_alloc_tagging(void)
656 if (static_key_enabled(&mem_profiling_compressed
))
659 return mem_profiling_support
;
662 static __init
void init_page_alloc_tagging(void)
666 struct page_ext_operations page_alloc_tagging_ops
= {
667 .size
= sizeof(union codetag_ref
),
668 .need
= need_page_alloc_tagging
,
669 .init
= init_page_alloc_tagging
,
671 EXPORT_SYMBOL(page_alloc_tagging_ops
);
674 static struct ctl_table memory_allocation_profiling_sysctls
[] = {
676 .procname
= "mem_profiling",
677 .data
= &mem_alloc_profiling_key
,
678 #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
683 .proc_handler
= proc_do_static_key
,
687 static void __init
sysctl_init(void)
689 if (!mem_profiling_support
)
690 memory_allocation_profiling_sysctls
[0].mode
= 0444;
692 register_sysctl_init("vm", memory_allocation_profiling_sysctls
);
694 #else /* CONFIG_SYSCTL */
695 static inline void sysctl_init(void) {}
696 #endif /* CONFIG_SYSCTL */
698 static int __init
alloc_tag_init(void)
700 const struct codetag_type_desc desc
= {
701 .section
= ALLOC_TAG_SECTION_NAME
,
702 .tag_size
= sizeof(struct alloc_tag
),
703 #ifdef CONFIG_MODULES
704 .needs_section_mem
= needs_section_mem
,
705 .alloc_section_mem
= reserve_module_tags
,
706 .free_section_mem
= release_module_tags
,
707 .module_replaced
= replace_module
,
712 res
= alloc_mod_tags_mem();
716 alloc_tag_cttype
= codetag_register_type(&desc
);
717 if (IS_ERR(alloc_tag_cttype
)) {
719 return PTR_ERR(alloc_tag_cttype
);
727 module_init(alloc_tag_init
);