2 * Copyright (c) 2019 Alexey Dobriyan <adobriyan@gmail.com>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 * Fork and exec tiny 1 page executable which precisely controls its VM.
18 * Test /proc/$PID/maps
19 * Test /proc/$PID/smaps
20 * Test /proc/$PID/smaps_rollup
21 * Test /proc/$PID/statm
23 * FIXME require CONFIG_TMPFS which can be disabled
24 * FIXME test other values from "smaps"
25 * FIXME support other archs
37 #include <sys/mount.h>
38 #include <sys/types.h>
43 #include <sys/syscall.h>
45 #include <linux/kdev_t.h>
47 #include <sys/resource.h>
50 #include "../kselftest.h"
52 static inline long sys_execveat(int dirfd
, const char *pathname
, char **argv
, char **envp
, int flags
)
54 return syscall(SYS_execveat
, dirfd
, pathname
, argv
, envp
, flags
);
57 static void make_private_tmp(void)
59 if (unshare(CLONE_NEWNS
) == -1) {
60 if (errno
== ENOSYS
|| errno
== EPERM
) {
65 if (mount(NULL
, "/", NULL
, MS_PRIVATE
|MS_REC
, NULL
) == -1) {
68 if (mount(NULL
, "/tmp", "tmpfs", 0, NULL
) == -1) {
73 static pid_t pid
= -1;
110 #define PAGE_SIZE 4096
111 #define VADDR (1UL << 32)
112 #define MAPS_OFFSET 73
114 #define syscall 0x0f, 0x05
117 (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff, \
118 ((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff
122 (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff, \
123 ((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff
126 0xb8, (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff
128 static const uint8_t payload
[] = {
129 /* Casually unmap stack, vDSO and everything else. */
131 mov_rdi(VADDR
+ 4096),
132 mov_rsi((1ULL << 47) - 4096 - VADDR
- 4096),
137 /* write(0, &c, 1); */
138 0x31, 0xff, /* xor edi, edi */
139 0x48, 0x8d, 0x35, 0x00, 0x00, 0x00, 0x00, /* lea rsi, [rip] */
140 0xba, 0x01, 0x00, 0x00, 0x00, /* mov edx, 1 */
148 0xeb, 0xf7, /* jmp 1b */
151 static int make_exe(const uint8_t *payload
, size_t len
)
154 struct elf64_phdr ph
;
156 struct iovec iov
[3] = {
157 {&h
, sizeof(struct elf64_hdr
)},
158 {&ph
, sizeof(struct elf64_phdr
)},
159 {(void *)payload
, len
},
164 memset(&h
, 0, sizeof(h
));
176 h
.e_entry
= VADDR
+ sizeof(struct elf64_hdr
) + sizeof(struct elf64_phdr
);
177 h
.e_phoff
= sizeof(struct elf64_hdr
);
180 h
.e_ehsize
= sizeof(struct elf64_hdr
);
181 h
.e_phentsize
= sizeof(struct elf64_phdr
);
187 memset(&ph
, 0, sizeof(ph
));
189 ph
.p_flags
= (1<<2)|1;
193 ph
.p_filesz
= sizeof(struct elf64_hdr
) + sizeof(struct elf64_phdr
) + len
;
194 ph
.p_memsz
= sizeof(struct elf64_hdr
) + sizeof(struct elf64_phdr
) + len
;
197 fd
= openat(AT_FDCWD
, "/tmp", O_WRONLY
|O_EXCL
|O_TMPFILE
, 0700);
202 if (writev(fd
, iov
, 3) != sizeof(struct elf64_hdr
) + sizeof(struct elf64_phdr
) + len
) {
206 /* Avoid ETXTBSY on exec. */
207 snprintf(buf
, sizeof(buf
), "/proc/self/fd/%u", fd
);
208 fd1
= open(buf
, O_RDONLY
|O_CLOEXEC
);
216 * 0: vsyscall VMA doesn't exist vsyscall=none
217 * 1: vsyscall VMA is --xp vsyscall=xonly
218 * 2: vsyscall VMA is r-xp vsyscall=emulate
220 static volatile int g_vsyscall
;
221 static const char *str_vsyscall
;
223 static const char str_vsyscall_0
[] = "";
224 static const char str_vsyscall_1
[] =
225 "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n";
226 static const char str_vsyscall_2
[] =
227 "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n";
230 static void sigaction_SIGSEGV(int _
, siginfo_t
*__
, void *___
)
236 * vsyscall page can't be unmapped, probe it directly.
238 static void vsyscall(void)
245 fprintf(stderr
, "fork, errno %d\n", errno
);
249 struct rlimit rlim
= {0, 0};
250 (void)setrlimit(RLIMIT_CORE
, &rlim
);
252 /* Hide "segfault at ffffffffff600000" messages. */
253 struct sigaction act
;
254 memset(&act
, 0, sizeof(struct sigaction
));
255 act
.sa_flags
= SA_SIGINFO
;
256 act
.sa_sigaction
= sigaction_SIGSEGV
;
257 (void)sigaction(SIGSEGV
, &act
, NULL
);
260 /* gettimeofday(NULL, NULL); */
261 uint64_t rax
= 0xffffffffff600000;
265 : "D" (NULL
), "S" (NULL
)
270 *(volatile int *)0xffffffffff600000UL
;
275 waitpid(pid
, &wstatus
, 0);
276 if (WIFEXITED(wstatus
)) {
277 g_vsyscall
= WEXITSTATUS(wstatus
);
279 fprintf(stderr
, "error: wstatus %08x\n", wstatus
);
290 switch (g_vsyscall
) {
292 str_vsyscall
= str_vsyscall_0
;
295 str_vsyscall
= str_vsyscall_1
;
298 str_vsyscall
= str_vsyscall_2
;
308 /* Reserve fd 0 for 1-byte pipe ping from child. */
310 if (open("/", O_RDONLY
|O_DIRECTORY
|O_PATH
) != 0) {
314 exec_fd
= make_exe(payload
, sizeof(payload
));
316 if (pipe(pipefd
) == -1) {
319 if (dup2(pipefd
[1], 0) != 0) {
328 sys_execveat(exec_fd
, "", NULL
, NULL
, AT_EMPTY_PATH
);
333 if (read(pipefd
[0], &_
, 1) != 1) {
338 if (fstat(exec_fd
, &st
) == -1) {
342 /* Generate "head -n1 /proc/$PID/maps" */
344 memset(buf0
, ' ', sizeof(buf0
));
345 int len
= snprintf(buf0
, sizeof(buf0
),
346 "%08lx-%08lx r-xp 00000000 %02lx:%02lx %llu",
347 VADDR
, VADDR
+ PAGE_SIZE
,
348 MAJOR(st
.st_dev
), MINOR(st
.st_dev
),
349 (unsigned long long)st
.st_ino
);
351 snprintf(buf0
+ MAPS_OFFSET
, sizeof(buf0
) - MAPS_OFFSET
,
352 "/tmp/#%llu (deleted)\n", (unsigned long long)st
.st_ino
);
354 /* Test /proc/$PID/maps */
356 const size_t len
= strlen(buf0
) + strlen(str_vsyscall
);
361 snprintf(buf
, sizeof(buf
), "/proc/%u/maps", pid
);
362 fd
= open(buf
, O_RDONLY
);
366 rv
= read(fd
, buf
, sizeof(buf
));
368 assert(memcmp(buf
, buf0
, strlen(buf0
)) == 0);
369 if (g_vsyscall
> 0) {
370 assert(memcmp(buf
+ strlen(buf0
), str_vsyscall
, strlen(str_vsyscall
)) == 0);
374 /* Test /proc/$PID/smaps */
380 snprintf(buf
, sizeof(buf
), "/proc/%u/smaps", pid
);
381 fd
= open(buf
, O_RDONLY
);
385 rv
= read(fd
, buf
, sizeof(buf
));
386 assert(0 <= rv
&& rv
<= sizeof(buf
));
388 assert(rv
>= strlen(buf0
));
389 assert(memcmp(buf
, buf0
, strlen(buf0
)) == 0);
391 #define RSS1 "Rss: 4 kB\n"
392 #define RSS2 "Rss: 0 kB\n"
393 #define PSS1 "Pss: 4 kB\n"
394 #define PSS2 "Pss: 0 kB\n"
395 assert(memmem(buf
, rv
, RSS1
, strlen(RSS1
)) ||
396 memmem(buf
, rv
, RSS2
, strlen(RSS2
)));
397 assert(memmem(buf
, rv
, PSS1
, strlen(PSS1
)) ||
398 memmem(buf
, rv
, PSS2
, strlen(PSS2
)));
400 static const char *S
[] = {
402 "KernelPageSize: 4 kB\n",
403 "MMUPageSize: 4 kB\n",
405 "AnonHugePages: 0 kB\n",
406 "Shared_Hugetlb: 0 kB\n",
407 "Private_Hugetlb: 0 kB\n",
412 for (i
= 0; i
< ARRAY_SIZE(S
); i
++) {
413 assert(memmem(buf
, rv
, S
[i
], strlen(S
[i
])));
416 if (g_vsyscall
> 0) {
417 assert(memmem(buf
, rv
, str_vsyscall
, strlen(str_vsyscall
)));
421 /* Test /proc/$PID/smaps_rollup */
424 memset(bufr
, ' ', sizeof(bufr
));
425 len
= snprintf(bufr
, sizeof(bufr
),
426 "%08lx-%08lx ---p 00000000 00:00 0",
427 VADDR
, VADDR
+ PAGE_SIZE
);
429 snprintf(bufr
+ MAPS_OFFSET
, sizeof(bufr
) - MAPS_OFFSET
,
436 snprintf(buf
, sizeof(buf
), "/proc/%u/smaps_rollup", pid
);
437 fd
= open(buf
, O_RDONLY
);
441 rv
= read(fd
, buf
, sizeof(buf
));
442 assert(0 <= rv
&& rv
<= sizeof(buf
));
444 assert(rv
>= strlen(bufr
));
445 assert(memcmp(buf
, bufr
, strlen(bufr
)) == 0);
447 assert(memmem(buf
, rv
, RSS1
, strlen(RSS1
)) ||
448 memmem(buf
, rv
, RSS2
, strlen(RSS2
)));
449 assert(memmem(buf
, rv
, PSS1
, strlen(PSS1
)) ||
450 memmem(buf
, rv
, PSS2
, strlen(PSS2
)));
452 static const char *S
[] = {
454 "AnonHugePages: 0 kB\n",
455 "Shared_Hugetlb: 0 kB\n",
456 "Private_Hugetlb: 0 kB\n",
461 for (i
= 0; i
< ARRAY_SIZE(S
); i
++) {
462 assert(memmem(buf
, rv
, S
[i
], strlen(S
[i
])));
466 /* Test /proc/$PID/statm */
472 snprintf(buf
, sizeof(buf
), "/proc/%u/statm", pid
);
473 fd
= open(buf
, O_RDONLY
);
477 rv
= read(fd
, buf
, sizeof(buf
));
480 assert(buf
[0] == '1'); /* ->total_vm */
481 assert(buf
[1] == ' ');
482 assert(buf
[2] == '0' || buf
[2] == '1'); /* rss */
483 assert(buf
[3] == ' ');
484 assert(buf
[4] == '0' || buf
[2] == '1'); /* file rss */
485 assert(buf
[5] == ' ');
486 assert(buf
[6] == '1'); /* ELF executable segments */
487 assert(buf
[7] == ' ');
488 assert(buf
[8] == '0');
489 assert(buf
[9] == ' ');
490 assert(buf
[10] == '0'); /* ->data_vm + ->stack_vm */
491 assert(buf
[11] == ' ');
492 assert(buf
[12] == '0');
493 assert(buf
[13] == '\n');
496 /* Test PROCMAP_QUERY ioctl() for /proc/$PID/maps */
498 char path_buf
[256], exp_path_buf
[256];
499 struct procmap_query q
;
502 snprintf(path_buf
, sizeof(path_buf
), "/proc/%u/maps", pid
);
503 fd
= open(path_buf
, O_RDONLY
);
507 /* CASE 1: exact MATCH at VADDR */
508 memset(&q
, 0, sizeof(q
));
510 q
.query_addr
= VADDR
;
512 q
.vma_name_addr
= (__u64
)(unsigned long)path_buf
;
513 q
.vma_name_size
= sizeof(path_buf
);
515 err
= ioctl(fd
, PROCMAP_QUERY
, &q
);
518 assert(q
.query_addr
== VADDR
);
519 assert(q
.query_flags
== 0);
521 assert(q
.vma_flags
== (PROCMAP_QUERY_VMA_READABLE
| PROCMAP_QUERY_VMA_EXECUTABLE
));
522 assert(q
.vma_start
== VADDR
);
523 assert(q
.vma_end
== VADDR
+ PAGE_SIZE
);
524 assert(q
.vma_page_size
== PAGE_SIZE
);
526 assert(q
.vma_offset
== 0);
527 assert(q
.inode
== st
.st_ino
);
528 assert(q
.dev_major
== MAJOR(st
.st_dev
));
529 assert(q
.dev_minor
== MINOR(st
.st_dev
));
531 snprintf(exp_path_buf
, sizeof(exp_path_buf
),
532 "/tmp/#%llu (deleted)", (unsigned long long)st
.st_ino
);
533 assert(q
.vma_name_size
== strlen(exp_path_buf
) + 1);
534 assert(strcmp(path_buf
, exp_path_buf
) == 0);
536 /* CASE 2: NO MATCH at VADDR-1 */
537 memset(&q
, 0, sizeof(q
));
539 q
.query_addr
= VADDR
- 1;
540 q
.query_flags
= 0; /* exact match */
542 err
= ioctl(fd
, PROCMAP_QUERY
, &q
);
543 err
= err
< 0 ? -errno
: 0;
544 assert(err
== -ENOENT
);
546 /* CASE 3: MATCH COVERING_OR_NEXT_VMA at VADDR - 1 */
547 memset(&q
, 0, sizeof(q
));
549 q
.query_addr
= VADDR
- 1;
550 q
.query_flags
= PROCMAP_QUERY_COVERING_OR_NEXT_VMA
;
552 err
= ioctl(fd
, PROCMAP_QUERY
, &q
);
555 assert(q
.query_addr
== VADDR
- 1);
556 assert(q
.query_flags
== PROCMAP_QUERY_COVERING_OR_NEXT_VMA
);
557 assert(q
.vma_start
== VADDR
);
558 assert(q
.vma_end
== VADDR
+ PAGE_SIZE
);
560 /* CASE 4: NO MATCH at VADDR + PAGE_SIZE */
561 memset(&q
, 0, sizeof(q
));
563 q
.query_addr
= VADDR
+ PAGE_SIZE
; /* point right after the VMA */
564 q
.query_flags
= PROCMAP_QUERY_COVERING_OR_NEXT_VMA
;
566 err
= ioctl(fd
, PROCMAP_QUERY
, &q
);
567 err
= err
< 0 ? -errno
: 0;
568 assert(err
== -ENOENT
);
570 /* CASE 5: NO MATCH WRITABLE at VADDR */
571 memset(&q
, 0, sizeof(q
));
573 q
.query_addr
= VADDR
;
574 q
.query_flags
= PROCMAP_QUERY_VMA_WRITABLE
;
576 err
= ioctl(fd
, PROCMAP_QUERY
, &q
);
577 err
= err
< 0 ? -errno
: 0;
578 assert(err
== -ENOENT
);