2 * Copyright 2016, Rashmica Gupta, IBM Corp.
4 * This traverses the kernel pagetables and dumps the
5 * information about the used sections of memory to
6 * /sys/kernel/debug/kernel_pagetables.
8 * Derived from the arm64 implementation:
9 * Copyright (c) 2014, The Linux Foundation, Laura Abbott.
10 * (C) Copyright 2008 Intel Corporation, Arjan van de Ven.
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; version 2
17 #include <linux/debugfs.h>
19 #include <linux/hugetlb.h>
22 #include <linux/sched.h>
23 #include <linux/seq_file.h>
24 #include <asm/fixmap.h>
25 #include <asm/pgtable.h>
26 #include <linux/const.h>
28 #include <asm/pgalloc.h>
31 * To visualise what is happening,
33 * - PTRS_PER_P** = how many entries there are in the corresponding P**
34 * - P**_SHIFT = how many bits of the address we use to index into the
36 * - P**_SIZE is how much memory we can access through the table - not the
37 * size of the table itself.
38 * P**={PGD, PUD, PMD, PTE}
41 * Each entry of the PGD points to a PUD. Each entry of a PUD points to a
42 * PMD. Each entry of a PMD points to a PTE. And every PTE entry points to
45 * In the case where there are only 3 levels, the PUD is folded into the
46 * PGD: every PUD has only one entry which points to the PMD.
48 * The page dumper groups page table entries of the same type into a single
49 * description. It uses pg_state to track the range information while
50 * iterating over the PTE entries. When the continuity is broken it then
51 * dumps out a description of the range - ie PTEs that are virtually contiguous
52 * with the same PTE flags are chunked together. This is to make it clear how
53 * different areas of the kernel virtual memory are used.
58 const struct addr_marker
*marker
;
59 unsigned long start_address
;
60 unsigned long start_pa
;
61 unsigned long last_pa
;
67 unsigned long start_address
;
71 static struct addr_marker address_markers
[] = {
72 { 0, "Start of kernel VM" },
73 { 0, "vmalloc() Area" },
74 { 0, "vmalloc() End" },
75 { 0, "isa I/O start" },
77 { 0, "phb I/O start" },
79 { 0, "I/O remap start" },
80 { 0, "I/O remap end" },
81 { 0, "vmemmap start" },
94 static const struct flag_info flag_array
[] = {
96 #ifdef CONFIG_PPC_STD_MMU_64
97 .mask
= _PAGE_PRIVILEGED
,
121 .mask
= _PAGE_PRESENT
,
122 .val
= _PAGE_PRESENT
,
126 #ifdef CONFIG_PPC_STD_MMU_64
127 .mask
= H_PAGE_HASHPTE
,
128 .val
= H_PAGE_HASHPTE
,
130 .mask
= _PAGE_HASHPTE
,
131 .val
= _PAGE_HASHPTE
,
136 #ifndef CONFIG_PPC_STD_MMU_64
137 .mask
= _PAGE_GUARDED
,
138 .val
= _PAGE_GUARDED
,
148 .mask
= _PAGE_ACCESSED
,
149 .val
= _PAGE_ACCESSED
,
153 #ifndef CONFIG_PPC_STD_MMU_64
154 .mask
= _PAGE_WRITETHRU
,
155 .val
= _PAGE_WRITETHRU
,
156 .set
= "write through",
160 .mask
= _PAGE_NO_CACHE
,
161 .val
= _PAGE_NO_CACHE
,
165 #ifdef CONFIG_PPC_BOOK3S_64
170 #ifdef CONFIG_PPC_64K_PAGES
171 .mask
= H_PAGE_COMBO
,
175 .mask
= H_PAGE_4K_PFN
,
176 .val
= H_PAGE_4K_PFN
,
180 .mask
= H_PAGE_F_GIX
,
184 .shift
= H_PAGE_F_GIX_SHIFT
,
186 .mask
= H_PAGE_F_SECOND
,
187 .val
= H_PAGE_F_SECOND
,
191 .mask
= _PAGE_SPECIAL
,
192 .val
= _PAGE_SPECIAL
,
197 struct pgtable_level
{
198 const struct flag_info
*flag
;
203 static struct pgtable_level pg_level
[] = {
207 .num
= ARRAY_SIZE(flag_array
),
210 .num
= ARRAY_SIZE(flag_array
),
213 .num
= ARRAY_SIZE(flag_array
),
216 .num
= ARRAY_SIZE(flag_array
),
220 static void dump_flag_info(struct pg_state
*st
, const struct flag_info
221 *flag
, u64 pte
, int num
)
225 for (i
= 0; i
< num
; i
++, flag
++) {
226 const char *s
= NULL
;
229 /* flag not defined so don't check it */
232 /* Some 'flags' are actually values */
234 val
= pte
& flag
->val
;
236 val
= val
>> flag
->shift
;
237 seq_printf(st
->seq
, " %s:%llx", flag
->set
, val
);
239 if ((pte
& flag
->mask
) == flag
->val
)
244 seq_printf(st
->seq
, " %s", s
);
246 st
->current_flags
&= ~flag
->mask
;
248 if (st
->current_flags
!= 0)
249 seq_printf(st
->seq
, " unknown flags:%llx", st
->current_flags
);
252 static void dump_addr(struct pg_state
*st
, unsigned long addr
)
254 static const char units
[] = "KMGTPE";
255 const char *unit
= units
;
258 seq_printf(st
->seq
, "0x%016lx-0x%016lx ", st
->start_address
, addr
-1);
259 seq_printf(st
->seq
, "0x%016lx ", st
->start_pa
);
261 delta
= (addr
- st
->start_address
) >> 10;
262 /* Work out what appropriate unit to use */
263 while (!(delta
& 1023) && unit
[1]) {
267 seq_printf(st
->seq
, "%9lu%c", delta
, *unit
);
271 static void note_page(struct pg_state
*st
, unsigned long addr
,
272 unsigned int level
, u64 val
)
274 u64 flag
= val
& pg_level
[level
].mask
;
275 u64 pa
= val
& PTE_RPN_MASK
;
277 /* At first no level is set */
280 st
->current_flags
= flag
;
281 st
->start_address
= addr
;
284 seq_printf(st
->seq
, "---[ %s ]---\n", st
->marker
->name
);
286 * Dump the section of virtual memory when:
287 * - the PTE flags from one entry to the next differs.
288 * - we change levels in the tree.
289 * - the address is in a different section of memory and is thus
290 * used for a different purpose, regardless of the flags.
291 * - the pa of this page is not adjacent to the last inspected page
293 } else if (flag
!= st
->current_flags
|| level
!= st
->level
||
294 addr
>= st
->marker
[1].start_address
||
295 pa
!= st
->last_pa
+ PAGE_SIZE
) {
297 /* Check the PTE flags */
298 if (st
->current_flags
) {
301 /* Dump all the flags */
302 if (pg_level
[st
->level
].flag
)
303 dump_flag_info(st
, pg_level
[st
->level
].flag
,
305 pg_level
[st
->level
].num
);
307 seq_puts(st
->seq
, "\n");
311 * Address indicates we have passed the end of the
312 * current section of virtual memory
314 while (addr
>= st
->marker
[1].start_address
) {
316 seq_printf(st
->seq
, "---[ %s ]---\n", st
->marker
->name
);
318 st
->start_address
= addr
;
321 st
->current_flags
= flag
;
328 static void walk_pte(struct pg_state
*st
, pmd_t
*pmd
, unsigned long start
)
330 pte_t
*pte
= pte_offset_kernel(pmd
, 0);
334 for (i
= 0; i
< PTRS_PER_PTE
; i
++, pte
++) {
335 addr
= start
+ i
* PAGE_SIZE
;
336 note_page(st
, addr
, 4, pte_val(*pte
));
341 static void walk_pmd(struct pg_state
*st
, pud_t
*pud
, unsigned long start
)
343 pmd_t
*pmd
= pmd_offset(pud
, 0);
347 for (i
= 0; i
< PTRS_PER_PMD
; i
++, pmd
++) {
348 addr
= start
+ i
* PMD_SIZE
;
349 if (!pmd_none(*pmd
) && !pmd_huge(*pmd
))
351 walk_pte(st
, pmd
, addr
);
353 note_page(st
, addr
, 3, pmd_val(*pmd
));
357 static void walk_pud(struct pg_state
*st
, pgd_t
*pgd
, unsigned long start
)
359 pud_t
*pud
= pud_offset(pgd
, 0);
363 for (i
= 0; i
< PTRS_PER_PUD
; i
++, pud
++) {
364 addr
= start
+ i
* PUD_SIZE
;
365 if (!pud_none(*pud
) && !pud_huge(*pud
))
367 walk_pmd(st
, pud
, addr
);
369 note_page(st
, addr
, 2, pud_val(*pud
));
373 static void walk_pagetables(struct pg_state
*st
)
375 pgd_t
*pgd
= pgd_offset_k(0UL);
380 * Traverse the linux pagetable structure and dump pages that are in
381 * the hash pagetable.
383 for (i
= 0; i
< PTRS_PER_PGD
; i
++, pgd
++) {
384 addr
= KERN_VIRT_START
+ i
* PGDIR_SIZE
;
385 if (!pgd_none(*pgd
) && !pgd_huge(*pgd
))
387 walk_pud(st
, pgd
, addr
);
389 note_page(st
, addr
, 1, pgd_val(*pgd
));
393 static void populate_markers(void)
395 address_markers
[0].start_address
= PAGE_OFFSET
;
396 address_markers
[1].start_address
= VMALLOC_START
;
397 address_markers
[2].start_address
= VMALLOC_END
;
398 address_markers
[3].start_address
= ISA_IO_BASE
;
399 address_markers
[4].start_address
= ISA_IO_END
;
400 address_markers
[5].start_address
= PHB_IO_BASE
;
401 address_markers
[6].start_address
= PHB_IO_END
;
402 address_markers
[7].start_address
= IOREMAP_BASE
;
403 address_markers
[8].start_address
= IOREMAP_END
;
404 #ifdef CONFIG_PPC_STD_MMU_64
405 address_markers
[9].start_address
= H_VMEMMAP_BASE
;
407 address_markers
[9].start_address
= VMEMMAP_BASE
;
411 static int ptdump_show(struct seq_file
*m
, void *v
)
413 struct pg_state st
= {
415 .start_address
= KERN_VIRT_START
,
416 .marker
= address_markers
,
418 /* Traverse kernel page tables */
419 walk_pagetables(&st
);
420 note_page(&st
, 0, 0, 0);
425 static int ptdump_open(struct inode
*inode
, struct file
*file
)
427 return single_open(file
, ptdump_show
, NULL
);
430 static const struct file_operations ptdump_fops
= {
434 .release
= single_release
,
437 static void build_pgtable_complete_mask(void)
441 for (i
= 0; i
< ARRAY_SIZE(pg_level
); i
++)
442 if (pg_level
[i
].flag
)
443 for (j
= 0; j
< pg_level
[i
].num
; j
++)
444 pg_level
[i
].mask
|= pg_level
[i
].flag
[j
].mask
;
447 static int ptdump_init(void)
449 struct dentry
*debugfs_file
;
452 build_pgtable_complete_mask();
453 debugfs_file
= debugfs_create_file("kernel_pagetables", 0400, NULL
,
455 return debugfs_file
? 0 : -ENOMEM
;
457 device_initcall(ptdump_init
);