15 #define MADV_PAGEOUT 21
18 #define BASE_ADDR ((void *)(1UL << 30))
19 static unsigned long hpage_pmd_size
;
20 static unsigned long page_size
;
21 static int hpage_pmd_nr
;
23 #define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/"
24 #define PID_SMAPS "/proc/self/smaps"
32 static const char *thp_enabled_strings
[] = {
42 THP_DEFRAG_DEFER_MADVISE
,
47 static const char *thp_defrag_strings
[] = {
65 static const char *shmem_enabled_strings
[] = {
75 struct khugepaged_settings
{
77 unsigned int alloc_sleep_millisecs
;
78 unsigned int scan_sleep_millisecs
;
79 unsigned int max_ptes_none
;
80 unsigned int max_ptes_swap
;
81 unsigned int max_ptes_shared
;
82 unsigned long pages_to_scan
;
86 enum thp_enabled thp_enabled
;
87 enum thp_defrag thp_defrag
;
88 enum shmem_enabled shmem_enabled
;
91 struct khugepaged_settings khugepaged
;
94 static struct settings default_settings
= {
95 .thp_enabled
= THP_MADVISE
,
96 .thp_defrag
= THP_DEFRAG_ALWAYS
,
97 .shmem_enabled
= SHMEM_NEVER
,
102 .alloc_sleep_millisecs
= 10,
103 .scan_sleep_millisecs
= 10,
107 static struct settings saved_settings
;
108 static bool skip_settings_restore
;
110 static int exit_status
;
112 static void success(const char *msg
)
114 printf(" \e[32m%s\e[0m\n", msg
);
117 static void fail(const char *msg
)
119 printf(" \e[31m%s\e[0m\n", msg
);
123 static int read_file(const char *path
, char *buf
, size_t buflen
)
128 fd
= open(path
, O_RDONLY
);
132 numread
= read(fd
, buf
, buflen
- 1);
141 return (unsigned int) numread
;
144 static int write_file(const char *path
, const char *buf
, size_t buflen
)
149 fd
= open(path
, O_WRONLY
);
153 numwritten
= write(fd
, buf
, buflen
- 1);
158 return (unsigned int) numwritten
;
161 static int read_string(const char *name
, const char *strings
[])
168 ret
= snprintf(path
, PATH_MAX
, THP_SYSFS
"%s", name
);
169 if (ret
>= PATH_MAX
) {
170 printf("%s: Pathname is too long\n", __func__
);
174 if (!read_file(path
, buf
, sizeof(buf
))) {
179 c
= strchr(buf
, '[');
181 printf("%s: Parse failure\n", __func__
);
186 memmove(buf
, c
, sizeof(buf
) - (c
- buf
));
188 c
= strchr(buf
, ']');
190 printf("%s: Parse failure\n", __func__
);
196 while (strings
[ret
]) {
197 if (!strcmp(strings
[ret
], buf
))
202 printf("Failed to parse %s\n", name
);
206 static void write_string(const char *name
, const char *val
)
211 ret
= snprintf(path
, PATH_MAX
, THP_SYSFS
"%s", name
);
212 if (ret
>= PATH_MAX
) {
213 printf("%s: Pathname is too long\n", __func__
);
217 if (!write_file(path
, val
, strlen(val
) + 1)) {
223 static const unsigned long read_num(const char *name
)
229 ret
= snprintf(path
, PATH_MAX
, THP_SYSFS
"%s", name
);
230 if (ret
>= PATH_MAX
) {
231 printf("%s: Pathname is too long\n", __func__
);
235 ret
= read_file(path
, buf
, sizeof(buf
));
237 perror("read_file(read_num)");
241 return strtoul(buf
, NULL
, 10);
244 static void write_num(const char *name
, unsigned long num
)
250 ret
= snprintf(path
, PATH_MAX
, THP_SYSFS
"%s", name
);
251 if (ret
>= PATH_MAX
) {
252 printf("%s: Pathname is too long\n", __func__
);
256 sprintf(buf
, "%ld", num
);
257 if (!write_file(path
, buf
, strlen(buf
) + 1)) {
263 static void write_settings(struct settings
*settings
)
265 struct khugepaged_settings
*khugepaged
= &settings
->khugepaged
;
267 write_string("enabled", thp_enabled_strings
[settings
->thp_enabled
]);
268 write_string("defrag", thp_defrag_strings
[settings
->thp_defrag
]);
269 write_string("shmem_enabled",
270 shmem_enabled_strings
[settings
->shmem_enabled
]);
271 write_num("debug_cow", settings
->debug_cow
);
272 write_num("use_zero_page", settings
->use_zero_page
);
274 write_num("khugepaged/defrag", khugepaged
->defrag
);
275 write_num("khugepaged/alloc_sleep_millisecs",
276 khugepaged
->alloc_sleep_millisecs
);
277 write_num("khugepaged/scan_sleep_millisecs",
278 khugepaged
->scan_sleep_millisecs
);
279 write_num("khugepaged/max_ptes_none", khugepaged
->max_ptes_none
);
280 write_num("khugepaged/max_ptes_swap", khugepaged
->max_ptes_swap
);
281 write_num("khugepaged/max_ptes_shared", khugepaged
->max_ptes_shared
);
282 write_num("khugepaged/pages_to_scan", khugepaged
->pages_to_scan
);
285 static void restore_settings(int sig
)
287 if (skip_settings_restore
)
290 printf("Restore THP and khugepaged settings...");
291 write_settings(&saved_settings
);
299 static void save_settings(void)
301 printf("Save THP and khugepaged settings...");
302 saved_settings
= (struct settings
) {
303 .thp_enabled
= read_string("enabled", thp_enabled_strings
),
304 .thp_defrag
= read_string("defrag", thp_defrag_strings
),
306 read_string("shmem_enabled", shmem_enabled_strings
),
307 .debug_cow
= read_num("debug_cow"),
308 .use_zero_page
= read_num("use_zero_page"),
310 saved_settings
.khugepaged
= (struct khugepaged_settings
) {
311 .defrag
= read_num("khugepaged/defrag"),
312 .alloc_sleep_millisecs
=
313 read_num("khugepaged/alloc_sleep_millisecs"),
314 .scan_sleep_millisecs
=
315 read_num("khugepaged/scan_sleep_millisecs"),
316 .max_ptes_none
= read_num("khugepaged/max_ptes_none"),
317 .max_ptes_swap
= read_num("khugepaged/max_ptes_swap"),
318 .max_ptes_shared
= read_num("khugepaged/max_ptes_shared"),
319 .pages_to_scan
= read_num("khugepaged/pages_to_scan"),
323 signal(SIGTERM
, restore_settings
);
324 signal(SIGINT
, restore_settings
);
325 signal(SIGHUP
, restore_settings
);
326 signal(SIGQUIT
, restore_settings
);
329 static void adjust_settings(void)
332 printf("Adjust settings...");
333 write_settings(&default_settings
);
337 #define MAX_LINE_LENGTH 500
339 static bool check_for_pattern(FILE *fp
, char *pattern
, char *buf
)
341 while (fgets(buf
, MAX_LINE_LENGTH
, fp
) != NULL
) {
342 if (!strncmp(buf
, pattern
, strlen(pattern
)))
348 static bool check_huge(void *addr
)
353 char buffer
[MAX_LINE_LENGTH
];
354 char addr_pattern
[MAX_LINE_LENGTH
];
356 ret
= snprintf(addr_pattern
, MAX_LINE_LENGTH
, "%08lx-",
357 (unsigned long) addr
);
358 if (ret
>= MAX_LINE_LENGTH
) {
359 printf("%s: Pattern is too long\n", __func__
);
364 fp
= fopen(PID_SMAPS
, "r");
366 printf("%s: Failed to open file %s\n", __func__
, PID_SMAPS
);
369 if (!check_for_pattern(fp
, addr_pattern
, buffer
))
372 ret
= snprintf(addr_pattern
, MAX_LINE_LENGTH
, "AnonHugePages:%10ld kB",
373 hpage_pmd_size
>> 10);
374 if (ret
>= MAX_LINE_LENGTH
) {
375 printf("%s: Pattern is too long\n", __func__
);
379 * Fetch the AnonHugePages: in the same block and check whether it got
380 * the expected number of hugeepages next.
382 if (!check_for_pattern(fp
, "AnonHugePages:", buffer
))
385 if (strncmp(buffer
, addr_pattern
, strlen(addr_pattern
)))
395 static bool check_swap(void *addr
, unsigned long size
)
400 char buffer
[MAX_LINE_LENGTH
];
401 char addr_pattern
[MAX_LINE_LENGTH
];
403 ret
= snprintf(addr_pattern
, MAX_LINE_LENGTH
, "%08lx-",
404 (unsigned long) addr
);
405 if (ret
>= MAX_LINE_LENGTH
) {
406 printf("%s: Pattern is too long\n", __func__
);
411 fp
= fopen(PID_SMAPS
, "r");
413 printf("%s: Failed to open file %s\n", __func__
, PID_SMAPS
);
416 if (!check_for_pattern(fp
, addr_pattern
, buffer
))
419 ret
= snprintf(addr_pattern
, MAX_LINE_LENGTH
, "Swap:%19ld kB",
421 if (ret
>= MAX_LINE_LENGTH
) {
422 printf("%s: Pattern is too long\n", __func__
);
426 * Fetch the Swap: in the same block and check whether it got
427 * the expected number of hugeepages next.
429 if (!check_for_pattern(fp
, "Swap:", buffer
))
432 if (strncmp(buffer
, addr_pattern
, strlen(addr_pattern
)))
441 static void *alloc_mapping(void)
445 p
= mmap(BASE_ADDR
, hpage_pmd_size
, PROT_READ
| PROT_WRITE
,
446 MAP_ANONYMOUS
| MAP_PRIVATE
, -1, 0);
447 if (p
!= BASE_ADDR
) {
448 printf("Failed to allocate VMA at %p\n", BASE_ADDR
);
455 static void fill_memory(int *p
, unsigned long start
, unsigned long end
)
459 for (i
= start
/ page_size
; i
< end
/ page_size
; i
++)
460 p
[i
* page_size
/ sizeof(*p
)] = i
+ 0xdead0000;
463 static void validate_memory(int *p
, unsigned long start
, unsigned long end
)
467 for (i
= start
/ page_size
; i
< end
/ page_size
; i
++) {
468 if (p
[i
* page_size
/ sizeof(*p
)] != i
+ 0xdead0000) {
469 printf("Page %d is corrupted: %#x\n",
470 i
, p
[i
* page_size
/ sizeof(*p
)]);
477 static bool wait_for_scan(const char *msg
, char *p
)
480 int timeout
= 6; /* 3 seconds */
484 printf("Unexpected huge page\n");
488 madvise(p
, hpage_pmd_size
, MADV_HUGEPAGE
);
490 /* Wait until the second full_scan completed */
491 full_scans
= read_num("khugepaged/full_scans") + 2;
493 printf("%s...", msg
);
497 if (read_num("khugepaged/full_scans") >= full_scans
)
503 madvise(p
, hpage_pmd_size
, MADV_NOHUGEPAGE
);
505 return timeout
== -1;
508 static void alloc_at_fault(void)
510 struct settings settings
= default_settings
;
513 settings
.thp_enabled
= THP_ALWAYS
;
514 write_settings(&settings
);
518 printf("Allocate huge page on fault...");
524 write_settings(&default_settings
);
526 madvise(p
, page_size
, MADV_DONTNEED
);
527 printf("Split huge PMD on MADV_DONTNEED...");
532 munmap(p
, hpage_pmd_size
);
535 static void collapse_full(void)
540 fill_memory(p
, 0, hpage_pmd_size
);
541 if (wait_for_scan("Collapse fully populated PTE table", p
))
543 else if (check_huge(p
))
547 validate_memory(p
, 0, hpage_pmd_size
);
548 munmap(p
, hpage_pmd_size
);
551 static void collapse_empty(void)
556 if (wait_for_scan("Do not collapse empty PTE table", p
))
558 else if (check_huge(p
))
562 munmap(p
, hpage_pmd_size
);
565 static void collapse_single_pte_entry(void)
570 fill_memory(p
, 0, page_size
);
571 if (wait_for_scan("Collapse PTE table with single PTE entry present", p
))
573 else if (check_huge(p
))
577 validate_memory(p
, 0, page_size
);
578 munmap(p
, hpage_pmd_size
);
581 static void collapse_max_ptes_none(void)
583 int max_ptes_none
= hpage_pmd_nr
/ 2;
584 struct settings settings
= default_settings
;
587 settings
.khugepaged
.max_ptes_none
= max_ptes_none
;
588 write_settings(&settings
);
592 fill_memory(p
, 0, (hpage_pmd_nr
- max_ptes_none
- 1) * page_size
);
593 if (wait_for_scan("Do not collapse with max_ptes_none exceeded", p
))
595 else if (check_huge(p
))
599 validate_memory(p
, 0, (hpage_pmd_nr
- max_ptes_none
- 1) * page_size
);
601 fill_memory(p
, 0, (hpage_pmd_nr
- max_ptes_none
) * page_size
);
602 if (wait_for_scan("Collapse with max_ptes_none PTEs empty", p
))
604 else if (check_huge(p
))
608 validate_memory(p
, 0, (hpage_pmd_nr
- max_ptes_none
) * page_size
);
610 munmap(p
, hpage_pmd_size
);
611 write_settings(&default_settings
);
614 static void collapse_swapin_single_pte(void)
618 fill_memory(p
, 0, hpage_pmd_size
);
620 printf("Swapout one page...");
621 if (madvise(p
, page_size
, MADV_PAGEOUT
)) {
622 perror("madvise(MADV_PAGEOUT)");
625 if (check_swap(p
, page_size
)) {
632 if (wait_for_scan("Collapse with swapping in single PTE entry", p
))
634 else if (check_huge(p
))
638 validate_memory(p
, 0, hpage_pmd_size
);
640 munmap(p
, hpage_pmd_size
);
643 static void collapse_max_ptes_swap(void)
645 int max_ptes_swap
= read_num("khugepaged/max_ptes_swap");
650 fill_memory(p
, 0, hpage_pmd_size
);
651 printf("Swapout %d of %d pages...", max_ptes_swap
+ 1, hpage_pmd_nr
);
652 if (madvise(p
, (max_ptes_swap
+ 1) * page_size
, MADV_PAGEOUT
)) {
653 perror("madvise(MADV_PAGEOUT)");
656 if (check_swap(p
, (max_ptes_swap
+ 1) * page_size
)) {
663 if (wait_for_scan("Do not collapse with max_ptes_swap exceeded", p
))
665 else if (check_huge(p
))
669 validate_memory(p
, 0, hpage_pmd_size
);
671 fill_memory(p
, 0, hpage_pmd_size
);
672 printf("Swapout %d of %d pages...", max_ptes_swap
, hpage_pmd_nr
);
673 if (madvise(p
, max_ptes_swap
* page_size
, MADV_PAGEOUT
)) {
674 perror("madvise(MADV_PAGEOUT)");
677 if (check_swap(p
, max_ptes_swap
* page_size
)) {
684 if (wait_for_scan("Collapse with max_ptes_swap pages swapped out", p
))
686 else if (check_huge(p
))
690 validate_memory(p
, 0, hpage_pmd_size
);
692 munmap(p
, hpage_pmd_size
);
695 static void collapse_single_pte_entry_compound(void)
701 printf("Allocate huge page...");
702 madvise(p
, hpage_pmd_size
, MADV_HUGEPAGE
);
703 fill_memory(p
, 0, hpage_pmd_size
);
708 madvise(p
, hpage_pmd_size
, MADV_NOHUGEPAGE
);
710 printf("Split huge page leaving single PTE mapping compound page...");
711 madvise(p
+ page_size
, hpage_pmd_size
- page_size
, MADV_DONTNEED
);
717 if (wait_for_scan("Collapse PTE table with single PTE mapping compound page", p
))
719 else if (check_huge(p
))
723 validate_memory(p
, 0, page_size
);
724 munmap(p
, hpage_pmd_size
);
727 static void collapse_full_of_compound(void)
733 printf("Allocate huge page...");
734 madvise(p
, hpage_pmd_size
, MADV_HUGEPAGE
);
735 fill_memory(p
, 0, hpage_pmd_size
);
741 printf("Split huge page leaving single PTE page table full of compound pages...");
742 madvise(p
, page_size
, MADV_NOHUGEPAGE
);
743 madvise(p
, hpage_pmd_size
, MADV_NOHUGEPAGE
);
749 if (wait_for_scan("Collapse PTE table full of compound pages", p
))
751 else if (check_huge(p
))
755 validate_memory(p
, 0, hpage_pmd_size
);
756 munmap(p
, hpage_pmd_size
);
759 static void collapse_compound_extreme(void)
765 for (i
= 0; i
< hpage_pmd_nr
; i
++) {
766 printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...",
767 i
+ 1, hpage_pmd_nr
);
769 madvise(BASE_ADDR
, hpage_pmd_size
, MADV_HUGEPAGE
);
770 fill_memory(BASE_ADDR
, 0, hpage_pmd_size
);
771 if (!check_huge(BASE_ADDR
)) {
772 printf("Failed to allocate huge page\n");
775 madvise(BASE_ADDR
, hpage_pmd_size
, MADV_NOHUGEPAGE
);
777 p
= mremap(BASE_ADDR
- i
* page_size
,
778 i
* page_size
+ hpage_pmd_size
,
780 MREMAP_MAYMOVE
| MREMAP_FIXED
,
781 BASE_ADDR
+ 2 * hpage_pmd_size
);
782 if (p
== MAP_FAILED
) {
783 perror("mremap+unmap");
787 p
= mremap(BASE_ADDR
+ 2 * hpage_pmd_size
,
789 (i
+ 1) * page_size
+ hpage_pmd_size
,
790 MREMAP_MAYMOVE
| MREMAP_FIXED
,
791 BASE_ADDR
- (i
+ 1) * page_size
);
792 if (p
== MAP_FAILED
) {
793 perror("mremap+alloc");
798 munmap(BASE_ADDR
, hpage_pmd_size
);
799 fill_memory(p
, 0, hpage_pmd_size
);
805 if (wait_for_scan("Collapse PTE table full of different compound pages", p
))
807 else if (check_huge(p
))
812 validate_memory(p
, 0, hpage_pmd_size
);
813 munmap(p
, hpage_pmd_size
);
816 static void collapse_fork(void)
823 printf("Allocate small page...");
824 fill_memory(p
, 0, page_size
);
830 printf("Share small page over fork()...");
832 /* Do not touch settings on child exit */
833 skip_settings_restore
= true;
841 fill_memory(p
, page_size
, 2 * page_size
);
843 if (wait_for_scan("Collapse PTE table with single page shared with parent process", p
))
845 else if (check_huge(p
))
850 validate_memory(p
, 0, page_size
);
851 munmap(p
, hpage_pmd_size
);
856 exit_status
+= WEXITSTATUS(wstatus
);
858 printf("Check if parent still has small page...");
863 validate_memory(p
, 0, page_size
);
864 munmap(p
, hpage_pmd_size
);
867 static void collapse_fork_compound(void)
874 printf("Allocate huge page...");
875 madvise(p
, hpage_pmd_size
, MADV_HUGEPAGE
);
876 fill_memory(p
, 0, hpage_pmd_size
);
882 printf("Share huge page over fork()...");
884 /* Do not touch settings on child exit */
885 skip_settings_restore
= true;
893 printf("Split huge page PMD in child process...");
894 madvise(p
, page_size
, MADV_NOHUGEPAGE
);
895 madvise(p
, hpage_pmd_size
, MADV_NOHUGEPAGE
);
900 fill_memory(p
, 0, page_size
);
902 write_num("khugepaged/max_ptes_shared", hpage_pmd_nr
- 1);
903 if (wait_for_scan("Collapse PTE table full of compound pages in child", p
))
905 else if (check_huge(p
))
909 write_num("khugepaged/max_ptes_shared",
910 default_settings
.khugepaged
.max_ptes_shared
);
912 validate_memory(p
, 0, hpage_pmd_size
);
913 munmap(p
, hpage_pmd_size
);
918 exit_status
+= WEXITSTATUS(wstatus
);
920 printf("Check if parent still has huge page...");
925 validate_memory(p
, 0, hpage_pmd_size
);
926 munmap(p
, hpage_pmd_size
);
929 static void collapse_max_ptes_shared()
931 int max_ptes_shared
= read_num("khugepaged/max_ptes_shared");
937 printf("Allocate huge page...");
938 madvise(p
, hpage_pmd_size
, MADV_HUGEPAGE
);
939 fill_memory(p
, 0, hpage_pmd_size
);
945 printf("Share huge page over fork()...");
947 /* Do not touch settings on child exit */
948 skip_settings_restore
= true;
956 printf("Trigger CoW on page %d of %d...",
957 hpage_pmd_nr
- max_ptes_shared
- 1, hpage_pmd_nr
);
958 fill_memory(p
, 0, (hpage_pmd_nr
- max_ptes_shared
- 1) * page_size
);
964 if (wait_for_scan("Do not collapse with max_ptes_shared exceeded", p
))
966 else if (!check_huge(p
))
971 printf("Trigger CoW on page %d of %d...",
972 hpage_pmd_nr
- max_ptes_shared
, hpage_pmd_nr
);
973 fill_memory(p
, 0, (hpage_pmd_nr
- max_ptes_shared
) * page_size
);
980 if (wait_for_scan("Collapse with max_ptes_shared PTEs shared", p
))
982 else if (check_huge(p
))
987 validate_memory(p
, 0, hpage_pmd_size
);
988 munmap(p
, hpage_pmd_size
);
993 exit_status
+= WEXITSTATUS(wstatus
);
995 printf("Check if parent still has huge page...");
1000 validate_memory(p
, 0, hpage_pmd_size
);
1001 munmap(p
, hpage_pmd_size
);
1006 setbuf(stdout
, NULL
);
1008 page_size
= getpagesize();
1009 hpage_pmd_size
= read_num("hpage_pmd_size");
1010 hpage_pmd_nr
= hpage_pmd_size
/ page_size
;
1012 default_settings
.khugepaged
.max_ptes_none
= hpage_pmd_nr
- 1;
1013 default_settings
.khugepaged
.max_ptes_swap
= hpage_pmd_nr
/ 8;
1014 default_settings
.khugepaged
.max_ptes_shared
= hpage_pmd_nr
/ 2;
1015 default_settings
.khugepaged
.pages_to_scan
= hpage_pmd_nr
* 8;
1023 collapse_single_pte_entry();
1024 collapse_max_ptes_none();
1025 collapse_swapin_single_pte();
1026 collapse_max_ptes_swap();
1027 collapse_single_pte_entry_compound();
1028 collapse_full_of_compound();
1029 collapse_compound_extreme();
1031 collapse_fork_compound();
1032 collapse_max_ptes_shared();
1034 restore_settings(0);