2 * Copyright (C) IBM Corporation, 2014, 2017
3 * Anton Blanchard, Rashmica Gupta.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
11 #define pr_fmt(fmt) "memtrace: " fmt
13 #include <linux/bitops.h>
14 #include <linux/string.h>
15 #include <linux/memblock.h>
16 #include <linux/init.h>
17 #include <linux/moduleparam.h>
19 #include <linux/debugfs.h>
20 #include <linux/slab.h>
21 #include <linux/memory.h>
22 #include <linux/memory_hotplug.h>
23 #include <asm/machdep.h>
24 #include <asm/debugfs.h>
26 /* This enables us to keep track of the memory removed from each node. */
27 struct memtrace_entry
{
36 static u64 memtrace_size
;
38 static struct memtrace_entry
*memtrace_array
;
39 static unsigned int memtrace_array_nr
;
42 static ssize_t
memtrace_read(struct file
*filp
, char __user
*ubuf
,
43 size_t count
, loff_t
*ppos
)
45 struct memtrace_entry
*ent
= filp
->private_data
;
47 return simple_read_from_buffer(ubuf
, count
, ppos
, ent
->mem
, ent
->size
);
50 static bool valid_memtrace_range(struct memtrace_entry
*dev
,
51 unsigned long start
, unsigned long size
)
53 if ((start
>= dev
->start
) &&
54 ((start
+ size
) <= (dev
->start
+ dev
->size
)))
60 static int memtrace_mmap(struct file
*filp
, struct vm_area_struct
*vma
)
62 unsigned long size
= vma
->vm_end
- vma
->vm_start
;
63 struct memtrace_entry
*dev
= filp
->private_data
;
65 if (!valid_memtrace_range(dev
, vma
->vm_pgoff
<< PAGE_SHIFT
, size
))
68 vma
->vm_page_prot
= pgprot_noncached(vma
->vm_page_prot
);
70 if (remap_pfn_range(vma
, vma
->vm_start
,
71 vma
->vm_pgoff
+ (dev
->start
>> PAGE_SHIFT
),
72 size
, vma
->vm_page_prot
))
78 static const struct file_operations memtrace_fops
= {
79 .llseek
= default_llseek
,
80 .read
= memtrace_read
,
81 .mmap
= memtrace_mmap
,
85 static void flush_memory_region(u64 base
, u64 size
)
87 unsigned long line_size
= ppc64_caches
.l1d
.size
;
88 u64 end
= base
+ size
;
91 base
= round_down(base
, line_size
);
92 end
= round_up(end
, line_size
);
94 for (addr
= base
; addr
< end
; addr
+= line_size
)
95 asm volatile("dcbf 0,%0" : "=r" (addr
) :: "memory");
98 static int check_memblock_online(struct memory_block
*mem
, void *arg
)
100 if (mem
->state
!= MEM_ONLINE
)
106 static int change_memblock_state(struct memory_block
*mem
, void *arg
)
108 unsigned long state
= (unsigned long)arg
;
115 static bool memtrace_offline_pages(u32 nid
, u64 start_pfn
, u64 nr_pages
)
117 u64 end_pfn
= start_pfn
+ nr_pages
- 1;
119 if (walk_memory_range(start_pfn
, end_pfn
, NULL
,
120 check_memblock_online
))
123 walk_memory_range(start_pfn
, end_pfn
, (void *)MEM_GOING_OFFLINE
,
124 change_memblock_state
);
126 if (offline_pages(start_pfn
, nr_pages
)) {
127 walk_memory_range(start_pfn
, end_pfn
, (void *)MEM_ONLINE
,
128 change_memblock_state
);
132 walk_memory_range(start_pfn
, end_pfn
, (void *)MEM_OFFLINE
,
133 change_memblock_state
);
135 /* RCU grace period? */
136 flush_memory_region((u64
)__va(start_pfn
<< PAGE_SHIFT
),
137 nr_pages
<< PAGE_SHIFT
);
139 lock_device_hotplug();
140 remove_memory(nid
, start_pfn
<< PAGE_SHIFT
, nr_pages
<< PAGE_SHIFT
);
141 unlock_device_hotplug();
146 static u64
memtrace_alloc_node(u32 nid
, u64 size
)
148 u64 start_pfn
, end_pfn
, nr_pages
;
151 if (!NODE_DATA(nid
) || !node_spanned_pages(nid
))
154 start_pfn
= node_start_pfn(nid
);
155 end_pfn
= node_end_pfn(nid
);
156 nr_pages
= size
>> PAGE_SHIFT
;
158 /* Trace memory needs to be aligned to the size */
159 end_pfn
= round_down(end_pfn
- nr_pages
, nr_pages
);
161 for (base_pfn
= end_pfn
; base_pfn
> start_pfn
; base_pfn
-= nr_pages
) {
162 if (memtrace_offline_pages(nid
, base_pfn
, nr_pages
) == true)
163 return base_pfn
<< PAGE_SHIFT
;
169 static int memtrace_init_regions_runtime(u64 size
)
174 memtrace_array
= kcalloc(num_online_nodes(),
175 sizeof(struct memtrace_entry
), GFP_KERNEL
);
176 if (!memtrace_array
) {
177 pr_err("Failed to allocate memtrace_array\n");
181 for_each_online_node(nid
) {
182 m
= memtrace_alloc_node(nid
, size
);
185 * A node might not have any local memory, so warn but
189 pr_err("Failed to allocate trace memory on node %d\n", nid
);
193 pr_info("Allocated trace memory on node %d at 0x%016llx\n", nid
, m
);
195 memtrace_array
[memtrace_array_nr
].start
= m
;
196 memtrace_array
[memtrace_array_nr
].size
= size
;
197 memtrace_array
[memtrace_array_nr
].nid
= nid
;
204 static struct dentry
*memtrace_debugfs_dir
;
206 static int memtrace_init_debugfs(void)
211 for (i
= 0; i
< memtrace_array_nr
; i
++) {
213 struct memtrace_entry
*ent
= &memtrace_array
[i
];
215 ent
->mem
= ioremap(ent
->start
, ent
->size
);
216 /* Warn but continue on */
218 pr_err("Failed to map trace memory at 0x%llx\n",
224 snprintf(ent
->name
, 16, "%08x", ent
->nid
);
225 dir
= debugfs_create_dir(ent
->name
, memtrace_debugfs_dir
);
230 debugfs_create_file("trace", 0400, dir
, ent
, &memtrace_fops
);
231 debugfs_create_x64("start", 0400, dir
, &ent
->start
);
232 debugfs_create_x64("size", 0400, dir
, &ent
->size
);
238 static int memtrace_enable_set(void *data
, u64 val
)
246 /* Make sure size is aligned to a memory block */
247 if (val
& (memory_block_size_bytes() - 1))
250 if (memtrace_init_regions_runtime(val
))
253 if (memtrace_init_debugfs())
261 static int memtrace_enable_get(void *data
, u64
*val
)
263 *val
= memtrace_size
;
267 DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops
, memtrace_enable_get
,
268 memtrace_enable_set
, "0x%016llx\n");
270 static int memtrace_init(void)
272 memtrace_debugfs_dir
= debugfs_create_dir("memtrace",
273 powerpc_debugfs_root
);
274 if (!memtrace_debugfs_dir
)
277 debugfs_create_file("enable", 0600, memtrace_debugfs_dir
,
278 NULL
, &memtrace_init_fops
);
282 machine_device_initcall(powernv
, memtrace_init
);