2 * Copyright 2016, Rashmica Gupta, IBM Corp.
4 * This traverses the kernel pagetables and dumps the
5 * information about the used sections of memory to
6 * /sys/kernel/debug/kernel_pagetables.
8 * Derived from the arm64 implementation:
9 * Copyright (c) 2014, The Linux Foundation, Laura Abbott.
10 * (C) Copyright 2008 Intel Corporation, Arjan van de Ven.
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; version 2
17 #include <linux/debugfs.h>
19 #include <linux/hugetlb.h>
22 #include <linux/sched.h>
23 #include <linux/seq_file.h>
24 #include <asm/fixmap.h>
25 #include <asm/pgtable.h>
26 #include <linux/const.h>
28 #include <asm/pgalloc.h>
31 #define KERN_VIRT_START 0
35 * To visualise what is happening,
37 * - PTRS_PER_P** = how many entries there are in the corresponding P**
38 * - P**_SHIFT = how many bits of the address we use to index into the
40 * - P**_SIZE is how much memory we can access through the table - not the
41 * size of the table itself.
42 * P**={PGD, PUD, PMD, PTE}
45 * Each entry of the PGD points to a PUD. Each entry of a PUD points to a
46 * PMD. Each entry of a PMD points to a PTE. And every PTE entry points to
49 * In the case where there are only 3 levels, the PUD is folded into the
50 * PGD: every PUD has only one entry which points to the PMD.
52 * The page dumper groups page table entries of the same type into a single
53 * description. It uses pg_state to track the range information while
54 * iterating over the PTE entries. When the continuity is broken it then
55 * dumps out a description of the range - ie PTEs that are virtually contiguous
56 * with the same PTE flags are chunked together. This is to make it clear how
57 * different areas of the kernel virtual memory are used.
62 const struct addr_marker
*marker
;
63 unsigned long start_address
;
64 unsigned long start_pa
;
65 unsigned long last_pa
;
71 unsigned long start_address
;
75 static struct addr_marker address_markers
[] = {
76 { 0, "Start of kernel VM" },
77 { 0, "vmalloc() Area" },
78 { 0, "vmalloc() End" },
80 { 0, "isa I/O start" },
82 { 0, "phb I/O start" },
84 { 0, "I/O remap start" },
85 { 0, "I/O remap end" },
86 { 0, "vmemmap start" },
88 { 0, "Early I/O remap start" },
89 { 0, "Early I/O remap end" },
90 #ifdef CONFIG_NOT_COHERENT_CACHE
91 { 0, "Consistent mem start" },
92 { 0, "Consistent mem end" },
95 { 0, "Highmem PTEs start" },
96 { 0, "Highmem PTEs end" },
98 { 0, "Fixmap start" },
113 static const struct flag_info flag_array
[] = {
115 .mask
= _PAGE_USER
| _PAGE_PRIVILEGED
,
120 .mask
= _PAGE_RW
| _PAGE_RO
| _PAGE_NA
,
124 .mask
= _PAGE_RW
| _PAGE_RO
| _PAGE_NA
,
129 .mask
= _PAGE_RW
| _PAGE_RO
| _PAGE_NA
,
144 .mask
= _PAGE_PRESENT
,
145 .val
= _PAGE_PRESENT
,
149 #ifdef CONFIG_PPC_BOOK3S_64
150 .mask
= H_PAGE_HASHPTE
,
151 .val
= H_PAGE_HASHPTE
,
153 .mask
= _PAGE_HASHPTE
,
154 .val
= _PAGE_HASHPTE
,
159 #ifndef CONFIG_PPC_BOOK3S_64
160 .mask
= _PAGE_GUARDED
,
161 .val
= _PAGE_GUARDED
,
171 .mask
= _PAGE_ACCESSED
,
172 .val
= _PAGE_ACCESSED
,
176 #ifndef CONFIG_PPC_BOOK3S_64
177 .mask
= _PAGE_WRITETHRU
,
178 .val
= _PAGE_WRITETHRU
,
179 .set
= "write through",
183 #ifndef CONFIG_PPC_BOOK3S_64
184 .mask
= _PAGE_NO_CACHE
,
185 .val
= _PAGE_NO_CACHE
,
190 .mask
= _PAGE_NON_IDEMPOTENT
,
191 .val
= _PAGE_NON_IDEMPOTENT
,
192 .set
= "non-idempotent",
195 .mask
= _PAGE_TOLERANT
,
196 .val
= _PAGE_TOLERANT
,
201 #ifdef CONFIG_PPC_BOOK3S_64
206 #ifdef CONFIG_PPC_64K_PAGES
207 .mask
= H_PAGE_COMBO
,
211 .mask
= H_PAGE_4K_PFN
,
212 .val
= H_PAGE_4K_PFN
,
215 #else /* CONFIG_PPC_64K_PAGES */
216 .mask
= H_PAGE_F_GIX
,
220 .shift
= H_PAGE_F_GIX_SHIFT
,
222 .mask
= H_PAGE_F_SECOND
,
223 .val
= H_PAGE_F_SECOND
,
226 #endif /* CONFIG_PPC_64K_PAGES */
228 .mask
= _PAGE_SPECIAL
,
229 .val
= _PAGE_SPECIAL
,
234 struct pgtable_level
{
235 const struct flag_info
*flag
;
240 static struct pgtable_level pg_level
[] = {
244 .num
= ARRAY_SIZE(flag_array
),
247 .num
= ARRAY_SIZE(flag_array
),
250 .num
= ARRAY_SIZE(flag_array
),
253 .num
= ARRAY_SIZE(flag_array
),
257 static void dump_flag_info(struct pg_state
*st
, const struct flag_info
258 *flag
, u64 pte
, int num
)
262 for (i
= 0; i
< num
; i
++, flag
++) {
263 const char *s
= NULL
;
266 /* flag not defined so don't check it */
269 /* Some 'flags' are actually values */
271 val
= pte
& flag
->val
;
273 val
= val
>> flag
->shift
;
274 seq_printf(st
->seq
, " %s:%llx", flag
->set
, val
);
276 if ((pte
& flag
->mask
) == flag
->val
)
281 seq_printf(st
->seq
, " %s", s
);
283 st
->current_flags
&= ~flag
->mask
;
285 if (st
->current_flags
!= 0)
286 seq_printf(st
->seq
, " unknown flags:%llx", st
->current_flags
);
289 static void dump_addr(struct pg_state
*st
, unsigned long addr
)
291 static const char units
[] = "KMGTPE";
292 const char *unit
= units
;
296 seq_printf(st
->seq
, "0x%016lx-0x%016lx ", st
->start_address
, addr
-1);
297 seq_printf(st
->seq
, "0x%016lx ", st
->start_pa
);
299 seq_printf(st
->seq
, "0x%08lx-0x%08lx ", st
->start_address
, addr
- 1);
300 seq_printf(st
->seq
, "0x%08lx ", st
->start_pa
);
303 delta
= (addr
- st
->start_address
) >> 10;
304 /* Work out what appropriate unit to use */
305 while (!(delta
& 1023) && unit
[1]) {
309 seq_printf(st
->seq
, "%9lu%c", delta
, *unit
);
313 static void note_page(struct pg_state
*st
, unsigned long addr
,
314 unsigned int level
, u64 val
)
316 u64 flag
= val
& pg_level
[level
].mask
;
317 u64 pa
= val
& PTE_RPN_MASK
;
319 /* At first no level is set */
322 st
->current_flags
= flag
;
323 st
->start_address
= addr
;
326 seq_printf(st
->seq
, "---[ %s ]---\n", st
->marker
->name
);
328 * Dump the section of virtual memory when:
329 * - the PTE flags from one entry to the next differs.
330 * - we change levels in the tree.
331 * - the address is in a different section of memory and is thus
332 * used for a different purpose, regardless of the flags.
333 * - the pa of this page is not adjacent to the last inspected page
335 } else if (flag
!= st
->current_flags
|| level
!= st
->level
||
336 addr
>= st
->marker
[1].start_address
||
337 pa
!= st
->last_pa
+ PAGE_SIZE
) {
339 /* Check the PTE flags */
340 if (st
->current_flags
) {
343 /* Dump all the flags */
344 if (pg_level
[st
->level
].flag
)
345 dump_flag_info(st
, pg_level
[st
->level
].flag
,
347 pg_level
[st
->level
].num
);
349 seq_putc(st
->seq
, '\n');
353 * Address indicates we have passed the end of the
354 * current section of virtual memory
356 while (addr
>= st
->marker
[1].start_address
) {
358 seq_printf(st
->seq
, "---[ %s ]---\n", st
->marker
->name
);
360 st
->start_address
= addr
;
363 st
->current_flags
= flag
;
370 static void walk_pte(struct pg_state
*st
, pmd_t
*pmd
, unsigned long start
)
372 pte_t
*pte
= pte_offset_kernel(pmd
, 0);
376 for (i
= 0; i
< PTRS_PER_PTE
; i
++, pte
++) {
377 addr
= start
+ i
* PAGE_SIZE
;
378 note_page(st
, addr
, 4, pte_val(*pte
));
383 static void walk_pmd(struct pg_state
*st
, pud_t
*pud
, unsigned long start
)
385 pmd_t
*pmd
= pmd_offset(pud
, 0);
389 for (i
= 0; i
< PTRS_PER_PMD
; i
++, pmd
++) {
390 addr
= start
+ i
* PMD_SIZE
;
391 if (!pmd_none(*pmd
) && !pmd_huge(*pmd
))
393 walk_pte(st
, pmd
, addr
);
395 note_page(st
, addr
, 3, pmd_val(*pmd
));
399 static void walk_pud(struct pg_state
*st
, pgd_t
*pgd
, unsigned long start
)
401 pud_t
*pud
= pud_offset(pgd
, 0);
405 for (i
= 0; i
< PTRS_PER_PUD
; i
++, pud
++) {
406 addr
= start
+ i
* PUD_SIZE
;
407 if (!pud_none(*pud
) && !pud_huge(*pud
))
409 walk_pmd(st
, pud
, addr
);
411 note_page(st
, addr
, 2, pud_val(*pud
));
415 static void walk_pagetables(struct pg_state
*st
)
417 pgd_t
*pgd
= pgd_offset_k(0UL);
422 * Traverse the linux pagetable structure and dump pages that are in
423 * the hash pagetable.
425 for (i
= 0; i
< PTRS_PER_PGD
; i
++, pgd
++) {
426 addr
= KERN_VIRT_START
+ i
* PGDIR_SIZE
;
427 if (!pgd_none(*pgd
) && !pgd_huge(*pgd
))
429 walk_pud(st
, pgd
, addr
);
431 note_page(st
, addr
, 1, pgd_val(*pgd
));
435 static void populate_markers(void)
439 address_markers
[i
++].start_address
= PAGE_OFFSET
;
440 address_markers
[i
++].start_address
= VMALLOC_START
;
441 address_markers
[i
++].start_address
= VMALLOC_END
;
443 address_markers
[i
++].start_address
= ISA_IO_BASE
;
444 address_markers
[i
++].start_address
= ISA_IO_END
;
445 address_markers
[i
++].start_address
= PHB_IO_BASE
;
446 address_markers
[i
++].start_address
= PHB_IO_END
;
447 address_markers
[i
++].start_address
= IOREMAP_BASE
;
448 address_markers
[i
++].start_address
= IOREMAP_END
;
449 #ifdef CONFIG_PPC_BOOK3S_64
450 address_markers
[i
++].start_address
= H_VMEMMAP_BASE
;
452 address_markers
[i
++].start_address
= VMEMMAP_BASE
;
454 #else /* !CONFIG_PPC64 */
455 address_markers
[i
++].start_address
= ioremap_bot
;
456 address_markers
[i
++].start_address
= IOREMAP_TOP
;
457 #ifdef CONFIG_NOT_COHERENT_CACHE
458 address_markers
[i
++].start_address
= IOREMAP_TOP
;
459 address_markers
[i
++].start_address
= IOREMAP_TOP
+
460 CONFIG_CONSISTENT_SIZE
;
462 #ifdef CONFIG_HIGHMEM
463 address_markers
[i
++].start_address
= PKMAP_BASE
;
464 address_markers
[i
++].start_address
= PKMAP_ADDR(LAST_PKMAP
);
466 address_markers
[i
++].start_address
= FIXADDR_START
;
467 address_markers
[i
++].start_address
= FIXADDR_TOP
;
468 #endif /* CONFIG_PPC64 */
471 static int ptdump_show(struct seq_file
*m
, void *v
)
473 struct pg_state st
= {
475 .start_address
= KERN_VIRT_START
,
476 .marker
= address_markers
,
478 /* Traverse kernel page tables */
479 walk_pagetables(&st
);
480 note_page(&st
, 0, 0, 0);
485 static int ptdump_open(struct inode
*inode
, struct file
*file
)
487 return single_open(file
, ptdump_show
, NULL
);
490 static const struct file_operations ptdump_fops
= {
494 .release
= single_release
,
497 static void build_pgtable_complete_mask(void)
501 for (i
= 0; i
< ARRAY_SIZE(pg_level
); i
++)
502 if (pg_level
[i
].flag
)
503 for (j
= 0; j
< pg_level
[i
].num
; j
++)
504 pg_level
[i
].mask
|= pg_level
[i
].flag
[j
].mask
;
507 static int ptdump_init(void)
509 struct dentry
*debugfs_file
;
512 build_pgtable_complete_mask();
513 debugfs_file
= debugfs_create_file("kernel_page_tables", 0400, NULL
,
515 return debugfs_file
? 0 : -ENOMEM
;
517 device_initcall(ptdump_init
);