1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/alloc_tag.h>
3 #include <linux/execmem.h>
6 #include <linux/kallsyms.h>
7 #include <linux/module.h>
8 #include <linux/page_ext.h>
9 #include <linux/proc_fs.h>
10 #include <linux/seq_buf.h>
11 #include <linux/seq_file.h>
12 #include <linux/vmalloc.h>
14 #define ALLOCINFO_FILE_NAME "allocinfo"
15 #define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag))
16 #define SECTION_START(NAME) (CODETAG_SECTION_START_PREFIX NAME)
17 #define SECTION_STOP(NAME) (CODETAG_SECTION_STOP_PREFIX NAME)
19 #ifdef CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
20 static bool mem_profiling_support
= true;
22 static bool mem_profiling_support
;
25 static struct codetag_type
*alloc_tag_cttype
;
27 DEFINE_PER_CPU(struct alloc_tag_counters
, _shared_alloc_tag
);
28 EXPORT_SYMBOL(_shared_alloc_tag
);
30 DEFINE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
,
31 mem_alloc_profiling_key
);
32 DEFINE_STATIC_KEY_FALSE(mem_profiling_compressed
);
34 struct alloc_tag_kernel_section kernel_tags
= { NULL
, 0 };
35 unsigned long alloc_tag_ref_mask
;
36 int alloc_tag_ref_offs
;
38 struct allocinfo_private
{
39 struct codetag_iterator iter
;
43 static void *allocinfo_start(struct seq_file
*m
, loff_t
*pos
)
45 struct allocinfo_private
*priv
;
49 priv
= kzalloc(sizeof(*priv
), GFP_KERNEL
);
54 priv
->print_header
= (node
== 0);
55 codetag_lock_module_list(alloc_tag_cttype
, true);
56 priv
->iter
= codetag_get_ct_iter(alloc_tag_cttype
);
57 while ((ct
= codetag_next_ct(&priv
->iter
)) != NULL
&& node
)
60 return ct
? priv
: NULL
;
63 static void *allocinfo_next(struct seq_file
*m
, void *arg
, loff_t
*pos
)
65 struct allocinfo_private
*priv
= (struct allocinfo_private
*)arg
;
66 struct codetag
*ct
= codetag_next_ct(&priv
->iter
);
75 static void allocinfo_stop(struct seq_file
*m
, void *arg
)
77 struct allocinfo_private
*priv
= (struct allocinfo_private
*)m
->private;
80 codetag_lock_module_list(alloc_tag_cttype
, false);
85 static void print_allocinfo_header(struct seq_buf
*buf
)
87 /* Output format version, so we can change it. */
88 seq_buf_printf(buf
, "allocinfo - version: 1.0\n");
89 seq_buf_printf(buf
, "# <size> <calls> <tag info>\n");
92 static void alloc_tag_to_text(struct seq_buf
*out
, struct codetag
*ct
)
94 struct alloc_tag
*tag
= ct_to_alloc_tag(ct
);
95 struct alloc_tag_counters counter
= alloc_tag_read(tag
);
96 s64 bytes
= counter
.bytes
;
98 seq_buf_printf(out
, "%12lli %8llu ", bytes
, counter
.calls
);
99 codetag_to_text(out
, ct
);
100 seq_buf_putc(out
, ' ');
101 seq_buf_putc(out
, '\n');
104 static int allocinfo_show(struct seq_file
*m
, void *arg
)
106 struct allocinfo_private
*priv
= (struct allocinfo_private
*)arg
;
108 size_t n
= seq_get_buf(m
, &bufp
);
111 seq_buf_init(&buf
, bufp
, n
);
112 if (priv
->print_header
) {
113 print_allocinfo_header(&buf
);
114 priv
->print_header
= false;
116 alloc_tag_to_text(&buf
, priv
->iter
.ct
);
117 seq_commit(m
, seq_buf_used(&buf
));
121 static const struct seq_operations allocinfo_seq_op
= {
122 .start
= allocinfo_start
,
123 .next
= allocinfo_next
,
124 .stop
= allocinfo_stop
,
125 .show
= allocinfo_show
,
128 size_t alloc_tag_top_users(struct codetag_bytes
*tags
, size_t count
, bool can_sleep
)
130 struct codetag_iterator iter
;
132 struct codetag_bytes n
;
133 unsigned int i
, nr
= 0;
136 codetag_lock_module_list(alloc_tag_cttype
, true);
137 else if (!codetag_trylock_module_list(alloc_tag_cttype
))
140 iter
= codetag_get_ct_iter(alloc_tag_cttype
);
141 while ((ct
= codetag_next_ct(&iter
))) {
142 struct alloc_tag_counters counter
= alloc_tag_read(ct_to_alloc_tag(ct
));
145 n
.bytes
= counter
.bytes
;
147 for (i
= 0; i
< nr
; i
++)
148 if (n
.bytes
> tags
[i
].bytes
)
153 memmove(&tags
[i
+ 1],
155 sizeof(tags
[0]) * (nr
- i
));
161 codetag_lock_module_list(alloc_tag_cttype
, false);
166 void pgalloc_tag_split(struct folio
*folio
, int old_order
, int new_order
)
169 struct alloc_tag
*tag
;
170 unsigned int nr_pages
= 1 << new_order
;
172 if (!mem_alloc_profiling_enabled())
175 tag
= pgalloc_tag_get(&folio
->page
);
179 for (i
= nr_pages
; i
< (1 << old_order
); i
+= nr_pages
) {
180 union pgtag_ref_handle handle
;
181 union codetag_ref ref
;
183 if (get_page_tag_ref(folio_page(folio
, i
), &ref
, &handle
)) {
184 /* Set new reference to point to the original tag */
185 alloc_tag_ref_set(&ref
, tag
);
186 update_page_tag_ref(handle
, &ref
);
187 put_page_tag_ref(handle
);
192 void pgalloc_tag_swap(struct folio
*new, struct folio
*old
)
194 union pgtag_ref_handle handle_old
, handle_new
;
195 union codetag_ref ref_old
, ref_new
;
196 struct alloc_tag
*tag_old
, *tag_new
;
198 tag_old
= pgalloc_tag_get(&old
->page
);
201 tag_new
= pgalloc_tag_get(&new->page
);
205 if (!get_page_tag_ref(&old
->page
, &ref_old
, &handle_old
))
207 if (!get_page_tag_ref(&new->page
, &ref_new
, &handle_new
)) {
208 put_page_tag_ref(handle_old
);
213 __alloc_tag_ref_set(&ref_old
, tag_new
);
214 update_page_tag_ref(handle_old
, &ref_old
);
215 __alloc_tag_ref_set(&ref_new
, tag_old
);
216 update_page_tag_ref(handle_new
, &ref_new
);
218 put_page_tag_ref(handle_old
);
219 put_page_tag_ref(handle_new
);
222 static void shutdown_mem_profiling(bool remove_file
)
224 if (mem_alloc_profiling_enabled())
225 static_branch_disable(&mem_alloc_profiling_key
);
227 if (!mem_profiling_support
)
231 remove_proc_entry(ALLOCINFO_FILE_NAME
, NULL
);
232 mem_profiling_support
= false;
235 static void __init
procfs_init(void)
237 if (!mem_profiling_support
)
240 if (!proc_create_seq(ALLOCINFO_FILE_NAME
, 0400, NULL
, &allocinfo_seq_op
)) {
241 pr_err("Failed to create %s file\n", ALLOCINFO_FILE_NAME
);
242 shutdown_mem_profiling(false);
246 void __init
alloc_tag_sec_init(void)
248 struct alloc_tag
*last_codetag
;
250 if (!mem_profiling_support
)
253 if (!static_key_enabled(&mem_profiling_compressed
))
256 kernel_tags
.first_tag
= (struct alloc_tag
*)kallsyms_lookup_name(
257 SECTION_START(ALLOC_TAG_SECTION_NAME
));
258 last_codetag
= (struct alloc_tag
*)kallsyms_lookup_name(
259 SECTION_STOP(ALLOC_TAG_SECTION_NAME
));
260 kernel_tags
.count
= last_codetag
- kernel_tags
.first_tag
;
262 /* Check if kernel tags fit into page flags */
263 if (kernel_tags
.count
> (1UL << NR_UNUSED_PAGEFLAG_BITS
)) {
264 shutdown_mem_profiling(false); /* allocinfo file does not exist yet */
265 pr_err("%lu allocation tags cannot be references using %d available page flag bits. Memory allocation profiling is disabled!\n",
266 kernel_tags
.count
, NR_UNUSED_PAGEFLAG_BITS
);
270 alloc_tag_ref_offs
= (LRU_REFS_PGOFF
- NR_UNUSED_PAGEFLAG_BITS
);
271 alloc_tag_ref_mask
= ((1UL << NR_UNUSED_PAGEFLAG_BITS
) - 1);
272 pr_debug("Memory allocation profiling compression is using %d page flag bits!\n",
273 NR_UNUSED_PAGEFLAG_BITS
);
276 #ifdef CONFIG_MODULES
278 static struct maple_tree mod_area_mt
= MTREE_INIT(mod_area_mt
, MT_FLAGS_ALLOC_RANGE
);
279 static struct vm_struct
*vm_module_tags
;
280 /* A dummy object used to indicate an unloaded module */
281 static struct module unloaded_mod
;
282 /* A dummy object used to indicate a module prepended area */
283 static struct module prepend_mod
;
285 struct alloc_tag_module_section module_tags
;
287 static inline unsigned long alloc_tag_align(unsigned long val
)
289 if (!static_key_enabled(&mem_profiling_compressed
)) {
290 /* No alignment requirements when we are not indexing the tags */
294 if (val
% sizeof(struct alloc_tag
) == 0)
296 return ((val
/ sizeof(struct alloc_tag
)) + 1) * sizeof(struct alloc_tag
);
299 static bool ensure_alignment(unsigned long align
, unsigned int *prepend
)
301 if (!static_key_enabled(&mem_profiling_compressed
)) {
302 /* No alignment requirements when we are not indexing the tags */
307 * If alloc_tag size is not a multiple of required alignment, tag
308 * indexing does not work.
310 if (!IS_ALIGNED(sizeof(struct alloc_tag
), align
))
313 /* Ensure prepend consumes multiple of alloc_tag-sized blocks */
315 *prepend
= alloc_tag_align(*prepend
);
320 static inline bool tags_addressable(void)
322 unsigned long tag_idx_count
;
324 if (!static_key_enabled(&mem_profiling_compressed
))
325 return true; /* with page_ext tags are always addressable */
327 tag_idx_count
= CODETAG_ID_FIRST
+ kernel_tags
.count
+
328 module_tags
.size
/ sizeof(struct alloc_tag
);
330 return tag_idx_count
< (1UL << NR_UNUSED_PAGEFLAG_BITS
);
333 static bool needs_section_mem(struct module
*mod
, unsigned long size
)
335 if (!mem_profiling_support
)
338 return size
>= sizeof(struct alloc_tag
);
341 static struct alloc_tag
*find_used_tag(struct alloc_tag
*from
, struct alloc_tag
*to
)
344 struct alloc_tag_counters counter
;
346 counter
= alloc_tag_read(from
);
355 /* Called with mod_area_mt locked */
356 static void clean_unused_module_areas_locked(void)
358 MA_STATE(mas
, &mod_area_mt
, 0, module_tags
.size
);
361 mas_for_each(&mas
, val
, module_tags
.size
) {
362 if (val
!= &unloaded_mod
)
365 /* Release area if all tags are unused */
366 if (!find_used_tag((struct alloc_tag
*)(module_tags
.start_addr
+ mas
.index
),
367 (struct alloc_tag
*)(module_tags
.start_addr
+ mas
.last
)))
372 /* Called with mod_area_mt locked */
373 static bool find_aligned_area(struct ma_state
*mas
, unsigned long section_size
,
374 unsigned long size
, unsigned int prepend
, unsigned long align
)
376 bool cleanup_done
= false;
379 /* Try finding exact size and hope the start is aligned */
380 if (!mas_empty_area(mas
, 0, section_size
- 1, prepend
+ size
)) {
381 if (IS_ALIGNED(mas
->index
+ prepend
, align
))
384 /* Try finding larger area to align later */
386 if (!mas_empty_area(mas
, 0, section_size
- 1,
387 size
+ prepend
+ align
- 1))
391 /* No free area, try cleanup stale data and repeat the search once */
393 clean_unused_module_areas_locked();
402 static int vm_module_tags_populate(void)
404 unsigned long phys_size
= vm_module_tags
->nr_pages
<< PAGE_SHIFT
;
406 if (phys_size
< module_tags
.size
) {
407 struct page
**next_page
= vm_module_tags
->pages
+ vm_module_tags
->nr_pages
;
408 unsigned long addr
= module_tags
.start_addr
+ phys_size
;
409 unsigned long more_pages
;
412 more_pages
= ALIGN(module_tags
.size
- phys_size
, PAGE_SIZE
) >> PAGE_SHIFT
;
413 nr
= alloc_pages_bulk_array_node(GFP_KERNEL
| __GFP_NOWARN
,
414 NUMA_NO_NODE
, more_pages
, next_page
);
415 if (nr
< more_pages
||
416 vmap_pages_range(addr
, addr
+ (nr
<< PAGE_SHIFT
), PAGE_KERNEL
,
417 next_page
, PAGE_SHIFT
) < 0) {
418 /* Clean up and error out */
419 for (int i
= 0; i
< nr
; i
++)
420 __free_page(next_page
[i
]);
423 vm_module_tags
->nr_pages
+= nr
;
429 static void *reserve_module_tags(struct module
*mod
, unsigned long size
,
430 unsigned int prepend
, unsigned long align
)
432 unsigned long section_size
= module_tags
.end_addr
- module_tags
.start_addr
;
433 MA_STATE(mas
, &mod_area_mt
, 0, section_size
- 1);
434 unsigned long offset
;
437 /* If no tags return error */
438 if (size
< sizeof(struct alloc_tag
))
439 return ERR_PTR(-EINVAL
);
442 * align is always power of 2, so we can use IS_ALIGNED and ALIGN.
443 * align 0 or 1 means no alignment, to simplify set to 1.
448 if (!ensure_alignment(align
, &prepend
)) {
449 shutdown_mem_profiling(true);
450 pr_err("%s: alignment %lu is incompatible with allocation tag indexing. Memory allocation profiling is disabled!\n",
452 return ERR_PTR(-EINVAL
);
456 if (!find_aligned_area(&mas
, section_size
, size
, prepend
, align
)) {
457 ret
= ERR_PTR(-ENOMEM
);
461 /* Mark found area as reserved */
464 offset
= ALIGN(offset
, align
);
465 if (offset
!= mas
.index
) {
466 unsigned long pad_start
= mas
.index
;
468 mas
.last
= offset
- 1;
469 mas_store(&mas
, &prepend_mod
);
470 if (mas_is_err(&mas
)) {
471 ret
= ERR_PTR(xa_err(mas
.node
));
475 mas
.last
= offset
+ size
- 1;
476 mas_store(&mas
, mod
);
477 if (mas_is_err(&mas
)) {
478 mas
.index
= pad_start
;
480 ret
= ERR_PTR(xa_err(mas
.node
));
483 mas
.last
= offset
+ size
- 1;
484 mas_store(&mas
, mod
);
485 if (mas_is_err(&mas
))
486 ret
= ERR_PTR(xa_err(mas
.node
));
494 if (module_tags
.size
< offset
+ size
) {
497 module_tags
.size
= offset
+ size
;
498 if (mem_alloc_profiling_enabled() && !tags_addressable()) {
499 shutdown_mem_profiling(true);
500 pr_warn("With module %s there are too many tags to fit in %d page flag bits. Memory allocation profiling is disabled!\n",
501 mod
->name
, NR_UNUSED_PAGEFLAG_BITS
);
504 grow_res
= vm_module_tags_populate();
506 shutdown_mem_profiling(true);
507 pr_err("Failed to allocate memory for allocation tags in the module %s. Memory allocation profiling is disabled!\n",
509 return ERR_PTR(grow_res
);
513 return (struct alloc_tag
*)(module_tags
.start_addr
+ offset
);
516 static void release_module_tags(struct module
*mod
, bool used
)
518 MA_STATE(mas
, &mod_area_mt
, module_tags
.size
, module_tags
.size
);
519 struct alloc_tag
*tag
;
523 mas_for_each_rev(&mas
, val
, 0)
527 if (!val
) /* module not found */
533 /* Find out if the area is used */
534 tag
= find_used_tag((struct alloc_tag
*)(module_tags
.start_addr
+ mas
.index
),
535 (struct alloc_tag
*)(module_tags
.start_addr
+ mas
.last
));
537 struct alloc_tag_counters counter
= alloc_tag_read(tag
);
539 pr_info("%s:%u module %s func:%s has %llu allocated at module unload\n",
540 tag
->ct
.filename
, tag
->ct
.lineno
, tag
->ct
.modname
,
541 tag
->ct
.function
, counter
.bytes
);
546 mas_store(&mas
, used
? &unloaded_mod
: NULL
);
547 val
= mas_prev_range(&mas
, 0);
548 if (val
== &prepend_mod
)
549 mas_store(&mas
, NULL
);
554 static void replace_module(struct module
*mod
, struct module
*new_mod
)
556 MA_STATE(mas
, &mod_area_mt
, 0, module_tags
.size
);
560 mas_for_each(&mas
, val
, module_tags
.size
) {
564 mas_store_gfp(&mas
, new_mod
, GFP_KERNEL
);
570 static int __init
alloc_mod_tags_mem(void)
572 /* Map space to copy allocation tags */
573 vm_module_tags
= execmem_vmap(MODULE_ALLOC_TAG_VMAP_SIZE
);
574 if (!vm_module_tags
) {
575 pr_err("Failed to map %lu bytes for module allocation tags\n",
576 MODULE_ALLOC_TAG_VMAP_SIZE
);
577 module_tags
.start_addr
= 0;
581 vm_module_tags
->pages
= kmalloc_array(get_vm_area_size(vm_module_tags
) >> PAGE_SHIFT
,
582 sizeof(struct page
*), GFP_KERNEL
| __GFP_ZERO
);
583 if (!vm_module_tags
->pages
) {
584 free_vm_area(vm_module_tags
);
588 module_tags
.start_addr
= (unsigned long)vm_module_tags
->addr
;
589 module_tags
.end_addr
= module_tags
.start_addr
+ MODULE_ALLOC_TAG_VMAP_SIZE
;
590 /* Ensure the base is alloc_tag aligned when required for indexing */
591 module_tags
.start_addr
= alloc_tag_align(module_tags
.start_addr
);
596 static void __init
free_mod_tags_mem(void)
600 module_tags
.start_addr
= 0;
601 for (i
= 0; i
< vm_module_tags
->nr_pages
; i
++)
602 __free_page(vm_module_tags
->pages
[i
]);
603 kfree(vm_module_tags
->pages
);
604 free_vm_area(vm_module_tags
);
607 #else /* CONFIG_MODULES */
609 static inline int alloc_mod_tags_mem(void) { return 0; }
610 static inline void free_mod_tags_mem(void) {}
612 #endif /* CONFIG_MODULES */
614 /* See: Documentation/mm/allocation-profiling.rst */
615 static int __init
setup_early_mem_profiling(char *str
)
617 bool compressed
= false;
623 if (!strncmp(str
, "never", 5)) {
625 mem_profiling_support
= false;
626 pr_info("Memory allocation profiling is disabled!\n");
628 char *token
= strsep(&str
, ",");
630 if (kstrtobool(token
, &enable
))
635 if (strcmp(str
, "compressed"))
640 mem_profiling_support
= true;
641 pr_info("Memory allocation profiling is enabled %s compression and is turned %s!\n",
642 compressed
? "with" : "without", enable
? "on" : "off");
645 if (enable
!= mem_alloc_profiling_enabled()) {
647 static_branch_enable(&mem_alloc_profiling_key
);
649 static_branch_disable(&mem_alloc_profiling_key
);
651 if (compressed
!= static_key_enabled(&mem_profiling_compressed
)) {
653 static_branch_enable(&mem_profiling_compressed
);
655 static_branch_disable(&mem_profiling_compressed
);
660 early_param("sysctl.vm.mem_profiling", setup_early_mem_profiling
);
662 static __init
bool need_page_alloc_tagging(void)
664 if (static_key_enabled(&mem_profiling_compressed
))
667 return mem_profiling_support
;
670 static __init
void init_page_alloc_tagging(void)
674 struct page_ext_operations page_alloc_tagging_ops
= {
675 .size
= sizeof(union codetag_ref
),
676 .need
= need_page_alloc_tagging
,
677 .init
= init_page_alloc_tagging
,
679 EXPORT_SYMBOL(page_alloc_tagging_ops
);
682 static struct ctl_table memory_allocation_profiling_sysctls
[] = {
684 .procname
= "mem_profiling",
685 .data
= &mem_alloc_profiling_key
,
686 #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
691 .proc_handler
= proc_do_static_key
,
695 static void __init
sysctl_init(void)
697 if (!mem_profiling_support
)
698 memory_allocation_profiling_sysctls
[0].mode
= 0444;
700 register_sysctl_init("vm", memory_allocation_profiling_sysctls
);
702 #else /* CONFIG_SYSCTL */
703 static inline void sysctl_init(void) {}
704 #endif /* CONFIG_SYSCTL */
706 static int __init
alloc_tag_init(void)
708 const struct codetag_type_desc desc
= {
709 .section
= ALLOC_TAG_SECTION_NAME
,
710 .tag_size
= sizeof(struct alloc_tag
),
711 #ifdef CONFIG_MODULES
712 .needs_section_mem
= needs_section_mem
,
713 .alloc_section_mem
= reserve_module_tags
,
714 .free_section_mem
= release_module_tags
,
715 .module_replaced
= replace_module
,
720 res
= alloc_mod_tags_mem();
724 alloc_tag_cttype
= codetag_register_type(&desc
);
725 if (IS_ERR(alloc_tag_cttype
)) {
727 return PTR_ERR(alloc_tag_cttype
);
735 module_init(alloc_tag_init
);