Linux: Fix detection of register_sysctl_sz
[zfs.git] / tests / zfs-tests / cmd / idmap_util.c
blob49483cbaa421a44e288a898dda8db35d39253c4b
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 #ifndef _GNU_SOURCE
23 #define _GNU_SOURCE
24 #endif
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <stdbool.h>
29 #include <stddef.h>
30 #include <string.h>
31 #include <linux/types.h>
32 #include <sys/wait.h>
33 #include <sys/stat.h>
34 #include <sys/mount.h>
35 #include <fcntl.h>
36 #include <errno.h>
37 #include <sched.h>
38 #include <syscall.h>
39 #include <sys/socket.h>
41 #include <sys/list.h>
43 #ifndef UINT_MAX
44 #define UINT_MAX 4294967295U
45 #endif
47 #ifndef __NR_Linux
48 #if defined __alpha__
49 #define __NR_Linux 110
50 #elif defined _MIPS_SIM
51 #if _MIPS_SIM == _MIPS_SIM_ABI32
52 #define __NR_Linux 4000
53 #endif
54 #if _MIPS_SIM == _MIPS_SIM_NABI32
55 #define __NR_Linux 6000
56 #endif
57 #if _MIPS_SIM == _MIPS_SIM_ABI64
58 #define __NR_Linux 5000
59 #endif
60 #elif defined __ia64__
61 #define __NR_Linux 1024
62 #else
63 #define __NR_Linux 0
64 #endif
65 #endif
67 #ifndef __NR_mount_setattr
68 #define __NR_mount_setattr (442 + __NR_Linux)
69 #endif
71 #ifndef __NR_open_tree
72 #define __NR_open_tree (428 + __NR_Linux)
73 #endif
75 #ifndef __NR_move_mount
76 #define __NR_move_mount (429 + __NR_Linux)
77 #endif
79 #ifndef MNT_DETACH
80 #define MNT_DETACH 2
81 #endif
83 #ifndef MOVE_MOUNT_F_EMPTY_PATH
84 #define MOVE_MOUNT_F_EMPTY_PATH 0x00000004
85 #endif
87 #ifndef MOUNT_ATTR_IDMAP
88 #define MOUNT_ATTR_IDMAP 0x00100000
89 #endif
91 #ifndef OPEN_TREE_CLONE
92 #define OPEN_TREE_CLONE 1
93 #endif
95 #ifndef OPEN_TREE_CLOEXEC
96 #define OPEN_TREE_CLOEXEC O_CLOEXEC
97 #endif
99 #ifndef AT_RECURSIVE
100 #define AT_RECURSIVE 0x8000
101 #endif
103 typedef struct {
104 __u64 attr_set;
105 __u64 attr_clr;
106 __u64 propagation;
107 __u64 userns_fd;
108 } mount_attr_t;
110 static inline int
111 sys_mount_setattr(int dfd, const char *path, unsigned int flags,
112 mount_attr_t *attr, size_t size)
114 return (syscall(__NR_mount_setattr, dfd, path, flags, attr, size));
117 static inline int
118 sys_open_tree(int dfd, const char *filename, unsigned int flags)
120 return (syscall(__NR_open_tree, dfd, filename, flags));
123 static inline int sys_move_mount(int from_dfd, const char *from_pathname,
124 int to_dfd, const char *to_pathname, unsigned int flags)
126 return (syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd,
127 to_pathname, flags));
130 typedef enum idmap_type_t {
131 TYPE_UID,
132 TYPE_GID,
133 TYPE_BOTH
134 } idmap_type_t;
136 struct idmap_entry {
137 __u32 first;
138 __u32 lower_first;
139 __u32 count;
140 idmap_type_t type;
141 list_node_t node;
144 static void
145 log_msg(const char *msg, ...)
147 va_list ap;
149 va_start(ap, msg);
150 vfprintf(stderr, msg, ap);
151 fputc('\n', stderr);
152 va_end(ap);
155 #define log_errno(msg, args...) \
156 do { \
157 log_msg("%s:%d:%s: [%m] " msg, __FILE__, __LINE__,\
158 __FUNCTION__, ##args); \
159 } while (0)
162 * Parse the idmapping in the following format
163 * and add to the list:
165 * u:nsid_first:hostid_first:count
166 * g:nsid_first:hostid_first:count
167 * b:nsid_first:hostid_first:count
169 * The delimiter can be : or space character.
171 * Return:
172 * 0 if success
173 * ENOMEM if out of memory
174 * EINVAL if wrong arg or input
176 static int
177 parse_idmap_entry(list_t *head, char *input)
179 char *token, *savedptr = NULL;
180 struct idmap_entry *entry;
181 unsigned long ul;
182 char *delimiter = (char *)": ";
183 char c;
185 if (!input || !head)
186 return (EINVAL);
187 entry = malloc(sizeof (*entry));
188 if (!entry)
189 return (ENOMEM);
191 token = strtok_r(input, delimiter, &savedptr);
192 if (token)
193 c = token[0];
194 if (!token || (c != 'b' && c != 'u' && c != 'g'))
195 goto errout;
196 entry->type = (c == 'b') ? TYPE_BOTH :
197 ((c == 'u') ? TYPE_UID : TYPE_GID);
199 token = strtok_r(NULL, delimiter, &savedptr);
200 if (!token)
201 goto errout;
202 ul = strtoul(token, NULL, 10);
203 if (ul > UINT_MAX || errno != 0)
204 goto errout;
205 entry->first = (__u32)ul;
207 token = strtok_r(NULL, delimiter, &savedptr);
208 if (!token)
209 goto errout;
210 ul = strtoul(token, NULL, 10);
211 if (ul > UINT_MAX || errno != 0)
212 goto errout;
213 entry->lower_first = (__u32)ul;
215 token = strtok_r(NULL, delimiter, &savedptr);
216 if (!token)
217 goto errout;
218 ul = strtoul(token, NULL, 10);
219 if (ul > UINT_MAX || errno != 0)
220 goto errout;
221 entry->count = (__u32)ul;
223 list_insert_tail(head, entry);
225 return (0);
227 errout:
228 free(entry);
229 return (EINVAL);
233 * Release all the entries in the list
235 static void
236 free_idmap(list_t *head)
238 struct idmap_entry *entry;
240 while ((entry = list_remove_head(head)) != NULL)
241 free(entry);
242 /* list_destroy() to be done by the caller */
246 * Write all bytes in the buffer to fd
248 static ssize_t
249 write_buf(int fd, const char *buf, size_t buf_size)
251 ssize_t written, total_written = 0;
252 size_t remaining = buf_size;
253 char *position = (char *)buf;
255 for (;;) {
256 written = write(fd, position, remaining);
257 if (written < 0 && errno == EINTR)
258 continue;
259 if (written < 0) {
260 log_errno("write");
261 return (written);
263 total_written += written;
264 if (total_written == buf_size)
265 break;
266 remaining -= written;
267 position += written;
270 return (total_written);
274 * Read data from file into buffer
276 static ssize_t
277 read_buf(int fd, char *buf, size_t buf_size)
279 int ret;
280 for (;;) {
281 ret = read(fd, buf, buf_size);
282 if (ret < 0 && errno == EINTR)
283 continue;
284 break;
286 if (ret < 0)
287 log_errno("read");
288 return (ret);
292 * Write idmap of the given type in the buffer to the
293 * process' uid_map or gid_map proc file.
295 * Return:
296 * 0 if success
297 * errno if there's any error
299 static int
300 write_idmap(pid_t pid, char *buf, size_t buf_size, idmap_type_t type)
302 char path[PATH_MAX];
303 int fd = -EBADF;
304 int ret;
306 (void) snprintf(path, sizeof (path), "/proc/%d/%cid_map",
307 pid, type == TYPE_UID ? 'u' : 'g');
308 fd = open(path, O_WRONLY | O_CLOEXEC);
309 if (fd < 0) {
310 ret = errno;
311 log_errno("open(%s)", path);
312 goto out;
314 ret = write_buf(fd, buf, buf_size);
315 if (ret < 0)
316 ret = errno;
317 else
318 ret = 0;
319 out:
320 if (fd >= 0)
321 close(fd);
322 return (ret);
326 * Write idmap info in the list to the process
327 * user namespace, i.e. its /proc/<pid>/uid_map
328 * and /proc/<pid>/gid_map file.
330 * Return:
331 * 0 if success
332 * errno if it fails
334 static int
335 write_pid_idmaps(pid_t pid, list_t *head)
337 char *buf_uids, *buf_gids;
338 char *curr_bufu, *curr_bufg;
339 /* max 4k to be allowed for each map */
340 int size_buf_uids = 4096, size_buf_gids = 4096;
341 struct idmap_entry *entry;
342 int uid_filled, gid_filled;
343 int ret = 0;
344 int has_uids = 0, has_gids = 0;
345 size_t buf_size;
347 buf_uids = malloc(size_buf_uids);
348 if (!buf_uids)
349 return (ENOMEM);
350 buf_gids = malloc(size_buf_gids);
351 if (!buf_gids) {
352 free(buf_uids);
353 return (ENOMEM);
355 curr_bufu = buf_uids;
356 curr_bufg = buf_gids;
358 for (entry = list_head(head); entry; entry = list_next(head, entry)) {
359 if (entry->type == TYPE_UID || entry->type == TYPE_BOTH) {
360 uid_filled = snprintf(curr_bufu, size_buf_uids,
361 "%u %u %u\n", entry->first, entry->lower_first,
362 entry->count);
363 if (uid_filled <= 0 || uid_filled >= size_buf_uids) {
364 ret = E2BIG;
365 goto out;
367 curr_bufu += uid_filled;
368 size_buf_uids -= uid_filled;
369 has_uids = 1;
371 if (entry->type == TYPE_GID || entry->type == TYPE_BOTH) {
372 gid_filled = snprintf(curr_bufg, size_buf_gids,
373 "%u %u %u\n", entry->first, entry->lower_first,
374 entry->count);
375 if (gid_filled <= 0 || gid_filled >= size_buf_gids) {
376 ret = E2BIG;
377 goto out;
379 curr_bufg += gid_filled;
380 size_buf_gids -= gid_filled;
381 has_gids = 1;
384 if (has_uids) {
385 buf_size = curr_bufu - buf_uids;
386 ret = write_idmap(pid, buf_uids, buf_size, TYPE_UID);
387 if (ret)
388 goto out;
390 if (has_gids) {
391 buf_size = curr_bufg - buf_gids;
392 ret = write_idmap(pid, buf_gids, buf_size, TYPE_GID);
395 out:
396 free(buf_uids);
397 free(buf_gids);
398 return (ret);
402 * Wait for the child process to exit
403 * and reap it.
405 * Return:
406 * process exit code if available
408 static int
409 wait_for_pid(pid_t pid)
411 int status;
412 int ret;
414 for (;;) {
415 ret = waitpid(pid, &status, 0);
416 if (ret < 0) {
417 if (errno == EINTR)
418 continue;
419 return (EXIT_FAILURE);
421 break;
423 if (!WIFEXITED(status))
424 return (EXIT_FAILURE);
425 return (WEXITSTATUS(status));
429 * Get the file descriptor of the process user namespace
430 * given its pid.
432 * Return:
433 * fd if success
434 * -1 if it fails
436 static int
437 userns_fd_from_pid(pid_t pid)
439 int fd;
440 char path[PATH_MAX];
442 (void) snprintf(path, sizeof (path), "/proc/%d/ns/user", pid);
443 fd = open(path, O_RDONLY | O_CLOEXEC);
444 if (fd < 0)
445 log_errno("open(%s)", path);
446 return (fd);
450 * Get the user namespace file descriptor given a list
451 * of idmap info.
453 * Return:
454 * fd if success
455 * -errno if it fails
457 static int
458 userns_fd_from_idmap(list_t *head)
460 pid_t pid;
461 int ret, fd;
462 int fds[2];
463 char c;
464 int saved_errno = 0;
466 /* socketpair for bidirectional communication */
467 ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, fds);
468 if (ret) {
469 log_errno("socketpair");
470 return (-errno);
473 pid = fork();
474 if (pid < 0) {
475 log_errno("fork");
476 fd = -errno;
477 goto out;
480 if (pid == 0) {
481 /* child process */
482 ret = unshare(CLONE_NEWUSER);
483 if (ret == 0) {
484 /* notify the parent of success */
485 ret = write_buf(fds[1], "1", 1);
486 if (ret < 0)
487 saved_errno = errno;
488 else {
490 * Until the parent has written to idmap,
491 * we cannot exit, otherwise the defunct
492 * process is owned by the real root, writing
493 * to its idmap ends up with EPERM in the
494 * context of a user ns
496 ret = read_buf(fds[1], &c, 1);
497 if (ret < 0)
498 saved_errno = errno;
500 } else {
501 saved_errno = errno;
502 log_errno("unshare");
503 ret = write_buf(fds[1], "0", 1);
504 if (ret < 0)
505 saved_errno = errno;
507 exit(saved_errno);
510 /* parent process */
511 ret = read_buf(fds[0], &c, 1);
512 if (ret == 1 && c == '1') {
513 ret = write_pid_idmaps(pid, head);
514 if (!ret) {
515 fd = userns_fd_from_pid(pid);
516 if (fd < 0)
517 fd = -errno;
518 } else {
519 fd = -ret;
521 /* Let child know it can exit */
522 (void) write_buf(fds[0], "1", 1);
523 } else {
524 fd = -EBADF;
526 (void) wait_for_pid(pid);
527 out:
528 close(fds[0]);
529 close(fds[1]);
530 return (fd);
534 * Check if the operating system supports idmapped mount on the
535 * given path or not.
537 * Return:
538 * true if supported
539 * false if not supported
541 static bool
542 is_idmap_supported(char *path)
544 list_t head;
545 int ret;
546 int tree_fd = -EBADF, path_fd = -EBADF;
547 mount_attr_t attr = {
548 .attr_set = MOUNT_ATTR_IDMAP,
549 .userns_fd = -EBADF,
552 /* strtok_r() won't be happy with a const string */
553 /* To check if idmapped mount can be done in a user ns, map 0 to 0 */
554 char *input = strdup("b:0:0:1");
556 if (!input) {
557 errno = ENOMEM;
558 log_errno("strdup");
559 return (false);
562 list_create(&head, sizeof (struct idmap_entry),
563 offsetof(struct idmap_entry, node));
564 ret = parse_idmap_entry(&head, input);
565 if (ret) {
566 errno = ret;
567 log_errno("parse_idmap_entry(%s)", input);
568 goto out1;
570 ret = userns_fd_from_idmap(&head);
571 if (ret < 0)
572 goto out1;
573 attr.userns_fd = ret;
574 ret = openat(-EBADF, path, O_DIRECTORY | O_CLOEXEC);
575 if (ret < 0) {
576 log_errno("openat(%s)", path);
577 goto out;
579 path_fd = ret;
580 ret = sys_open_tree(path_fd, "", AT_EMPTY_PATH | AT_NO_AUTOMOUNT |
581 AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
582 if (ret < 0) {
583 log_errno("sys_open_tree");
584 goto out;
586 tree_fd = ret;
587 ret = sys_mount_setattr(tree_fd, "", AT_EMPTY_PATH, &attr,
588 sizeof (attr));
589 if (ret < 0) {
590 log_errno("sys_mount_setattr");
592 out:
593 close(attr.userns_fd);
594 out1:
595 free_idmap(&head);
596 list_destroy(&head);
597 if (tree_fd >= 0)
598 close(tree_fd);
599 if (path_fd >= 0)
600 close(path_fd);
601 free(input);
602 return (ret == 0);
606 * Check if the given path is a mount point or not.
608 * Return:
609 * true if it is
610 * false otherwise
612 static bool
613 is_mountpoint(char *path)
615 char *parent;
616 struct stat st_me, st_parent;
617 bool ret;
619 parent = malloc(strlen(path)+4);
620 if (!parent) {
621 errno = ENOMEM;
622 log_errno("malloc");
623 return (false);
625 strcat(strcpy(parent, path), "/..");
626 if (lstat(path, &st_me) != 0 ||
627 lstat(parent, &st_parent) != 0)
628 ret = false;
629 else
630 if (st_me.st_dev != st_parent.st_dev ||
631 st_me.st_ino == st_parent.st_ino)
632 ret = true;
633 else
634 ret = false;
635 free(parent);
636 return (ret);
640 * Remount the source on the new target folder with the given
641 * list of idmap info. If target is NULL, the source will be
642 * unmounted and then remounted if it is a mountpoint, otherwise
643 * no unmount is done, the source is simply idmap remounted.
645 * Return:
646 * 0 if success
647 * -errno otherwise
649 static int
650 do_idmap_mount(list_t *idmap, char *source, char *target, int flags)
652 int ret;
653 int tree_fd = -EBADF, source_fd = -EBADF;
654 mount_attr_t attr = {
655 .attr_set = MOUNT_ATTR_IDMAP,
656 .userns_fd = -EBADF,
659 ret = userns_fd_from_idmap(idmap);
660 if (ret < 0)
661 goto out1;
662 attr.userns_fd = ret;
663 ret = openat(-EBADF, source, O_DIRECTORY | O_CLOEXEC);
664 if (ret < 0) {
665 ret = -errno;
666 log_errno("openat(%s)", source);
667 goto out;
669 source_fd = ret;
670 ret = sys_open_tree(source_fd, "", AT_EMPTY_PATH | AT_NO_AUTOMOUNT |
671 AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE | flags);
672 if (ret < 0) {
673 ret = -errno;
674 log_errno("sys_open_tree");
675 goto out;
677 tree_fd = ret;
678 ret = sys_mount_setattr(tree_fd, "", AT_EMPTY_PATH | flags, &attr,
679 sizeof (attr));
680 if (ret < 0) {
681 ret = -errno;
682 log_errno("sys_mount_setattr");
683 goto out;
685 if (target == NULL && is_mountpoint(source)) {
686 ret = umount2(source, MNT_DETACH);
687 if (ret < 0) {
688 ret = -errno;
689 log_errno("umount2(%s)", source);
690 goto out;
693 ret = sys_move_mount(tree_fd, "", -EBADF, target == NULL ?
694 source : target, MOVE_MOUNT_F_EMPTY_PATH);
695 if (ret < 0) {
696 ret = -errno;
697 log_errno("sys_move_mount(%s)", target == NULL ?
698 source : target);
700 out:
701 close(attr.userns_fd);
702 out1:
703 if (tree_fd >= 0)
704 close(tree_fd);
705 if (source_fd >= 0)
706 close(source_fd);
707 return (ret);
710 static void
711 print_usage(char *argv[])
713 fprintf(stderr, "Usage: %s [-r] [-c] [-m <idmap1>] [-m <idmap2>]" \
714 " ... [<source>] [<target>]\n", argv[0]);
715 fprintf(stderr, "\n");
716 fprintf(stderr, " -r Recursively do idmapped mount.\n");
717 fprintf(stderr, "\n");
718 fprintf(stderr, " -c Checks if idmapped mount is supported " \
719 "on the <source> by the operating system or not.\n");
720 fprintf(stderr, "\n");
721 fprintf(stderr, " -m <idmap> to specify the idmap info, " \
722 "in the following format:\n");
723 fprintf(stderr, " <id_type>:<nsid_first>:<hostid_first>:<count>\n");
724 fprintf(stderr, "\n");
725 fprintf(stderr, " <id_type> can be either of 'b', 'u', and 'g'.\n");
726 fprintf(stderr, "\n");
727 fprintf(stderr, "The <source> folder will be mounted at <target> " \
728 "with the provided idmap information.\nIf no <target> is " \
729 "specified, and <source> is a mount point, " \
730 "then <source> will be unmounted and then remounted.\n");
734 main(int argc, char *argv[])
736 int opt;
737 list_t idmap_head;
738 int check_supported = 0;
739 int ret = EXIT_SUCCESS;
740 char *source = NULL, *target = NULL;
741 int flags = 0;
743 list_create(&idmap_head, sizeof (struct idmap_entry),
744 offsetof(struct idmap_entry, node));
746 while ((opt = getopt(argc, argv, "rcm:")) != -1) {
747 switch (opt) {
748 case 'r':
749 flags |= AT_RECURSIVE;
750 break;
751 case 'c':
752 check_supported = 1;
753 break;
754 case 'm':
755 ret = parse_idmap_entry(&idmap_head, optarg);
756 if (ret) {
757 errno = ret;
758 log_errno("parse_idmap_entry(%s)", optarg);
759 ret = EXIT_FAILURE;
760 goto out;
762 break;
763 default:
764 print_usage(argv);
765 exit(EXIT_FAILURE);
769 if (check_supported == 0 && list_is_empty(&idmap_head)) {
770 print_usage(argv);
771 ret = EXIT_FAILURE;
772 goto out;
775 if (optind >= argc) {
776 fprintf(stderr, "Expected to have <source>, <target>.\n");
777 print_usage(argv);
778 ret = EXIT_FAILURE;
779 goto out;
782 source = argv[optind];
783 if (optind < (argc - 1)) {
784 target = argv[optind + 1];
787 if (check_supported) {
788 free_idmap(&idmap_head);
789 list_destroy(&idmap_head);
790 if (is_idmap_supported(source)) {
791 printf("idmapped mount is supported on [%s].\n",
792 source);
793 return (EXIT_SUCCESS);
794 } else {
795 printf("idmapped mount is NOT supported.\n");
796 return (EXIT_FAILURE);
800 ret = do_idmap_mount(&idmap_head, source, target, flags);
801 if (ret)
802 ret = EXIT_FAILURE;
803 out:
804 free_idmap(&idmap_head);
805 list_destroy(&idmap_head);
807 exit(ret);