2 * Copyright 2016, Rashmica Gupta, IBM Corp.
4 * This traverses the kernel pagetables and dumps the
5 * information about the used sections of memory to
6 * /sys/kernel/debug/kernel_pagetables.
8 * Derived from the arm64 implementation:
9 * Copyright (c) 2014, The Linux Foundation, Laura Abbott.
10 * (C) Copyright 2008 Intel Corporation, Arjan van de Ven.
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; version 2
17 #include <linux/debugfs.h>
19 #include <linux/hugetlb.h>
22 #include <linux/sched.h>
23 #include <linux/seq_file.h>
24 #include <asm/fixmap.h>
25 #include <asm/pgtable.h>
26 #include <linux/const.h>
28 #include <asm/pgalloc.h>
31 #define KERN_VIRT_START 0
35 * To visualise what is happening,
37 * - PTRS_PER_P** = how many entries there are in the corresponding P**
38 * - P**_SHIFT = how many bits of the address we use to index into the
40 * - P**_SIZE is how much memory we can access through the table - not the
41 * size of the table itself.
42 * P**={PGD, PUD, PMD, PTE}
45 * Each entry of the PGD points to a PUD. Each entry of a PUD points to a
46 * PMD. Each entry of a PMD points to a PTE. And every PTE entry points to
49 * In the case where there are only 3 levels, the PUD is folded into the
50 * PGD: every PUD has only one entry which points to the PMD.
52 * The page dumper groups page table entries of the same type into a single
53 * description. It uses pg_state to track the range information while
54 * iterating over the PTE entries. When the continuity is broken it then
55 * dumps out a description of the range - ie PTEs that are virtually contiguous
56 * with the same PTE flags are chunked together. This is to make it clear how
57 * different areas of the kernel virtual memory are used.
62 const struct addr_marker
*marker
;
63 unsigned long start_address
;
64 unsigned long start_pa
;
65 unsigned long last_pa
;
71 unsigned long start_address
;
75 static struct addr_marker address_markers
[] = {
76 { 0, "Start of kernel VM" },
77 { 0, "vmalloc() Area" },
78 { 0, "vmalloc() End" },
80 { 0, "isa I/O start" },
82 { 0, "phb I/O start" },
84 { 0, "I/O remap start" },
85 { 0, "I/O remap end" },
86 { 0, "vmemmap start" },
88 { 0, "Early I/O remap start" },
89 { 0, "Early I/O remap end" },
90 #ifdef CONFIG_NOT_COHERENT_CACHE
91 { 0, "Consistent mem start" },
92 { 0, "Consistent mem end" },
95 { 0, "Highmem PTEs start" },
96 { 0, "Highmem PTEs end" },
98 { 0, "Fixmap start" },
113 static const struct flag_info flag_array
[] = {
115 #ifdef CONFIG_PPC_STD_MMU_64
116 .mask
= _PAGE_PRIVILEGED
,
145 .mask
= _PAGE_PRESENT
,
146 .val
= _PAGE_PRESENT
,
150 #ifdef CONFIG_PPC_STD_MMU_64
151 .mask
= H_PAGE_HASHPTE
,
152 .val
= H_PAGE_HASHPTE
,
154 .mask
= _PAGE_HASHPTE
,
155 .val
= _PAGE_HASHPTE
,
160 #ifndef CONFIG_PPC_STD_MMU_64
161 .mask
= _PAGE_GUARDED
,
162 .val
= _PAGE_GUARDED
,
172 .mask
= _PAGE_ACCESSED
,
173 .val
= _PAGE_ACCESSED
,
177 #ifndef CONFIG_PPC_STD_MMU_64
178 .mask
= _PAGE_WRITETHRU
,
179 .val
= _PAGE_WRITETHRU
,
180 .set
= "write through",
184 #ifndef CONFIG_PPC_BOOK3S_64
185 .mask
= _PAGE_NO_CACHE
,
186 .val
= _PAGE_NO_CACHE
,
191 .mask
= _PAGE_NON_IDEMPOTENT
,
192 .val
= _PAGE_NON_IDEMPOTENT
,
193 .set
= "non-idempotent",
196 .mask
= _PAGE_TOLERANT
,
197 .val
= _PAGE_TOLERANT
,
202 #ifdef CONFIG_PPC_BOOK3S_64
207 #ifdef CONFIG_PPC_64K_PAGES
208 .mask
= H_PAGE_COMBO
,
212 .mask
= H_PAGE_4K_PFN
,
213 .val
= H_PAGE_4K_PFN
,
217 .mask
= H_PAGE_F_GIX
,
221 .shift
= H_PAGE_F_GIX_SHIFT
,
223 .mask
= H_PAGE_F_SECOND
,
224 .val
= H_PAGE_F_SECOND
,
228 .mask
= _PAGE_SPECIAL
,
229 .val
= _PAGE_SPECIAL
,
232 .mask
= _PAGE_SHARED
,
238 struct pgtable_level
{
239 const struct flag_info
*flag
;
244 static struct pgtable_level pg_level
[] = {
248 .num
= ARRAY_SIZE(flag_array
),
251 .num
= ARRAY_SIZE(flag_array
),
254 .num
= ARRAY_SIZE(flag_array
),
257 .num
= ARRAY_SIZE(flag_array
),
261 static void dump_flag_info(struct pg_state
*st
, const struct flag_info
262 *flag
, u64 pte
, int num
)
266 for (i
= 0; i
< num
; i
++, flag
++) {
267 const char *s
= NULL
;
270 /* flag not defined so don't check it */
273 /* Some 'flags' are actually values */
275 val
= pte
& flag
->val
;
277 val
= val
>> flag
->shift
;
278 seq_printf(st
->seq
, " %s:%llx", flag
->set
, val
);
280 if ((pte
& flag
->mask
) == flag
->val
)
285 seq_printf(st
->seq
, " %s", s
);
287 st
->current_flags
&= ~flag
->mask
;
289 if (st
->current_flags
!= 0)
290 seq_printf(st
->seq
, " unknown flags:%llx", st
->current_flags
);
293 static void dump_addr(struct pg_state
*st
, unsigned long addr
)
295 static const char units
[] = "KMGTPE";
296 const char *unit
= units
;
300 seq_printf(st
->seq
, "0x%016lx-0x%016lx ", st
->start_address
, addr
-1);
301 seq_printf(st
->seq
, "0x%016lx ", st
->start_pa
);
303 seq_printf(st
->seq
, "0x%08lx-0x%08lx ", st
->start_address
, addr
- 1);
304 seq_printf(st
->seq
, "0x%08lx ", st
->start_pa
);
307 delta
= (addr
- st
->start_address
) >> 10;
308 /* Work out what appropriate unit to use */
309 while (!(delta
& 1023) && unit
[1]) {
313 seq_printf(st
->seq
, "%9lu%c", delta
, *unit
);
317 static void note_page(struct pg_state
*st
, unsigned long addr
,
318 unsigned int level
, u64 val
)
320 u64 flag
= val
& pg_level
[level
].mask
;
321 u64 pa
= val
& PTE_RPN_MASK
;
323 /* At first no level is set */
326 st
->current_flags
= flag
;
327 st
->start_address
= addr
;
330 seq_printf(st
->seq
, "---[ %s ]---\n", st
->marker
->name
);
332 * Dump the section of virtual memory when:
333 * - the PTE flags from one entry to the next differs.
334 * - we change levels in the tree.
335 * - the address is in a different section of memory and is thus
336 * used for a different purpose, regardless of the flags.
337 * - the pa of this page is not adjacent to the last inspected page
339 } else if (flag
!= st
->current_flags
|| level
!= st
->level
||
340 addr
>= st
->marker
[1].start_address
||
341 pa
!= st
->last_pa
+ PAGE_SIZE
) {
343 /* Check the PTE flags */
344 if (st
->current_flags
) {
347 /* Dump all the flags */
348 if (pg_level
[st
->level
].flag
)
349 dump_flag_info(st
, pg_level
[st
->level
].flag
,
351 pg_level
[st
->level
].num
);
353 seq_putc(st
->seq
, '\n');
357 * Address indicates we have passed the end of the
358 * current section of virtual memory
360 while (addr
>= st
->marker
[1].start_address
) {
362 seq_printf(st
->seq
, "---[ %s ]---\n", st
->marker
->name
);
364 st
->start_address
= addr
;
367 st
->current_flags
= flag
;
374 static void walk_pte(struct pg_state
*st
, pmd_t
*pmd
, unsigned long start
)
376 pte_t
*pte
= pte_offset_kernel(pmd
, 0);
380 for (i
= 0; i
< PTRS_PER_PTE
; i
++, pte
++) {
381 addr
= start
+ i
* PAGE_SIZE
;
382 note_page(st
, addr
, 4, pte_val(*pte
));
387 static void walk_pmd(struct pg_state
*st
, pud_t
*pud
, unsigned long start
)
389 pmd_t
*pmd
= pmd_offset(pud
, 0);
393 for (i
= 0; i
< PTRS_PER_PMD
; i
++, pmd
++) {
394 addr
= start
+ i
* PMD_SIZE
;
395 if (!pmd_none(*pmd
) && !pmd_huge(*pmd
))
397 walk_pte(st
, pmd
, addr
);
399 note_page(st
, addr
, 3, pmd_val(*pmd
));
403 static void walk_pud(struct pg_state
*st
, pgd_t
*pgd
, unsigned long start
)
405 pud_t
*pud
= pud_offset(pgd
, 0);
409 for (i
= 0; i
< PTRS_PER_PUD
; i
++, pud
++) {
410 addr
= start
+ i
* PUD_SIZE
;
411 if (!pud_none(*pud
) && !pud_huge(*pud
))
413 walk_pmd(st
, pud
, addr
);
415 note_page(st
, addr
, 2, pud_val(*pud
));
419 static void walk_pagetables(struct pg_state
*st
)
421 pgd_t
*pgd
= pgd_offset_k(0UL);
426 * Traverse the linux pagetable structure and dump pages that are in
427 * the hash pagetable.
429 for (i
= 0; i
< PTRS_PER_PGD
; i
++, pgd
++) {
430 addr
= KERN_VIRT_START
+ i
* PGDIR_SIZE
;
431 if (!pgd_none(*pgd
) && !pgd_huge(*pgd
))
433 walk_pud(st
, pgd
, addr
);
435 note_page(st
, addr
, 1, pgd_val(*pgd
));
439 static void populate_markers(void)
443 address_markers
[i
++].start_address
= PAGE_OFFSET
;
444 address_markers
[i
++].start_address
= VMALLOC_START
;
445 address_markers
[i
++].start_address
= VMALLOC_END
;
447 address_markers
[i
++].start_address
= ISA_IO_BASE
;
448 address_markers
[i
++].start_address
= ISA_IO_END
;
449 address_markers
[i
++].start_address
= PHB_IO_BASE
;
450 address_markers
[i
++].start_address
= PHB_IO_END
;
451 address_markers
[i
++].start_address
= IOREMAP_BASE
;
452 address_markers
[i
++].start_address
= IOREMAP_END
;
453 #ifdef CONFIG_PPC_STD_MMU_64
454 address_markers
[i
++].start_address
= H_VMEMMAP_BASE
;
456 address_markers
[i
++].start_address
= VMEMMAP_BASE
;
458 #else /* !CONFIG_PPC64 */
459 address_markers
[i
++].start_address
= ioremap_bot
;
460 address_markers
[i
++].start_address
= IOREMAP_TOP
;
461 #ifdef CONFIG_NOT_COHERENT_CACHE
462 address_markers
[i
++].start_address
= IOREMAP_TOP
;
463 address_markers
[i
++].start_address
= IOREMAP_TOP
+
464 CONFIG_CONSISTENT_SIZE
;
466 #ifdef CONFIG_HIGHMEM
467 address_markers
[i
++].start_address
= PKMAP_BASE
;
468 address_markers
[i
++].start_address
= PKMAP_ADDR(LAST_PKMAP
);
470 address_markers
[i
++].start_address
= FIXADDR_START
;
471 address_markers
[i
++].start_address
= FIXADDR_TOP
;
472 #endif /* CONFIG_PPC64 */
475 static int ptdump_show(struct seq_file
*m
, void *v
)
477 struct pg_state st
= {
479 .start_address
= KERN_VIRT_START
,
480 .marker
= address_markers
,
482 /* Traverse kernel page tables */
483 walk_pagetables(&st
);
484 note_page(&st
, 0, 0, 0);
489 static int ptdump_open(struct inode
*inode
, struct file
*file
)
491 return single_open(file
, ptdump_show
, NULL
);
494 static const struct file_operations ptdump_fops
= {
498 .release
= single_release
,
501 static void build_pgtable_complete_mask(void)
505 for (i
= 0; i
< ARRAY_SIZE(pg_level
); i
++)
506 if (pg_level
[i
].flag
)
507 for (j
= 0; j
< pg_level
[i
].num
; j
++)
508 pg_level
[i
].mask
|= pg_level
[i
].flag
[j
].mask
;
511 static int ptdump_init(void)
513 struct dentry
*debugfs_file
;
516 build_pgtable_complete_mask();
517 debugfs_file
= debugfs_create_file("kernel_page_tables", 0400, NULL
,
519 return debugfs_file
? 0 : -ENOMEM
;
521 device_initcall(ptdump_init
);