4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
31 #include <linux/types.h>
34 #include <sys/mount.h>
39 #include <sys/socket.h>
44 #define UINT_MAX 4294967295U
49 #define __NR_Linux 110
50 #elif defined _MIPS_SIM
51 #if _MIPS_SIM == _MIPS_SIM_ABI32
52 #define __NR_Linux 4000
54 #if _MIPS_SIM == _MIPS_SIM_NABI32
55 #define __NR_Linux 6000
57 #if _MIPS_SIM == _MIPS_SIM_ABI64
58 #define __NR_Linux 5000
60 #elif defined __ia64__
61 #define __NR_Linux 1024
67 #ifndef __NR_mount_setattr
68 #define __NR_mount_setattr (442 + __NR_Linux)
71 #ifndef __NR_open_tree
72 #define __NR_open_tree (428 + __NR_Linux)
75 #ifndef __NR_move_mount
76 #define __NR_move_mount (429 + __NR_Linux)
83 #ifndef MOVE_MOUNT_F_EMPTY_PATH
84 #define MOVE_MOUNT_F_EMPTY_PATH 0x00000004
87 #ifndef MOUNT_ATTR_IDMAP
88 #define MOUNT_ATTR_IDMAP 0x00100000
91 #ifndef OPEN_TREE_CLONE
92 #define OPEN_TREE_CLONE 1
95 #ifndef OPEN_TREE_CLOEXEC
96 #define OPEN_TREE_CLOEXEC O_CLOEXEC
100 #define AT_RECURSIVE 0x8000
111 sys_mount_setattr(int dfd
, const char *path
, unsigned int flags
,
112 mount_attr_t
*attr
, size_t size
)
114 return (syscall(__NR_mount_setattr
, dfd
, path
, flags
, attr
, size
));
118 sys_open_tree(int dfd
, const char *filename
, unsigned int flags
)
120 return (syscall(__NR_open_tree
, dfd
, filename
, flags
));
123 static inline int sys_move_mount(int from_dfd
, const char *from_pathname
,
124 int to_dfd
, const char *to_pathname
, unsigned int flags
)
126 return (syscall(__NR_move_mount
, from_dfd
, from_pathname
, to_dfd
,
127 to_pathname
, flags
));
130 typedef enum idmap_type_t
{
145 log_msg(const char *msg
, ...)
150 vfprintf(stderr
, msg
, ap
);
155 #define log_errno(msg, args...) \
157 log_msg("%s:%d:%s: [%m] " msg, __FILE__, __LINE__,\
158 __FUNCTION__, ##args); \
162 * Parse the idmapping in the following format
163 * and add to the list:
165 * u:nsid_first:hostid_first:count
166 * g:nsid_first:hostid_first:count
167 * b:nsid_first:hostid_first:count
169 * The delimiter can be : or space character.
173 * ENOMEM if out of memory
174 * EINVAL if wrong arg or input
177 parse_idmap_entry(list_t
*head
, char *input
)
179 char *token
, *savedptr
= NULL
;
180 struct idmap_entry
*entry
;
182 char *delimiter
= (char *)": ";
187 entry
= malloc(sizeof (*entry
));
191 token
= strtok_r(input
, delimiter
, &savedptr
);
194 if (!token
|| (c
!= 'b' && c
!= 'u' && c
!= 'g'))
196 entry
->type
= (c
== 'b') ? TYPE_BOTH
:
197 ((c
== 'u') ? TYPE_UID
: TYPE_GID
);
199 token
= strtok_r(NULL
, delimiter
, &savedptr
);
202 ul
= strtoul(token
, NULL
, 10);
203 if (ul
> UINT_MAX
|| errno
!= 0)
205 entry
->first
= (__u32
)ul
;
207 token
= strtok_r(NULL
, delimiter
, &savedptr
);
210 ul
= strtoul(token
, NULL
, 10);
211 if (ul
> UINT_MAX
|| errno
!= 0)
213 entry
->lower_first
= (__u32
)ul
;
215 token
= strtok_r(NULL
, delimiter
, &savedptr
);
218 ul
= strtoul(token
, NULL
, 10);
219 if (ul
> UINT_MAX
|| errno
!= 0)
221 entry
->count
= (__u32
)ul
;
223 list_insert_tail(head
, entry
);
233 * Release all the entries in the list
236 free_idmap(list_t
*head
)
238 struct idmap_entry
*entry
;
240 while ((entry
= list_remove_head(head
)) != NULL
)
242 /* list_destroy() to be done by the caller */
246 * Write all bytes in the buffer to fd
249 write_buf(int fd
, const char *buf
, size_t buf_size
)
251 ssize_t written
, total_written
= 0;
252 size_t remaining
= buf_size
;
253 char *position
= (char *)buf
;
256 written
= write(fd
, position
, remaining
);
257 if (written
< 0 && errno
== EINTR
)
263 total_written
+= written
;
264 if (total_written
== buf_size
)
266 remaining
-= written
;
270 return (total_written
);
274 * Read data from file into buffer
277 read_buf(int fd
, char *buf
, size_t buf_size
)
281 ret
= read(fd
, buf
, buf_size
);
282 if (ret
< 0 && errno
== EINTR
)
292 * Write idmap of the given type in the buffer to the
293 * process' uid_map or gid_map proc file.
297 * errno if there's any error
300 write_idmap(pid_t pid
, char *buf
, size_t buf_size
, idmap_type_t type
)
306 (void) snprintf(path
, sizeof (path
), "/proc/%d/%cid_map",
307 pid
, type
== TYPE_UID
? 'u' : 'g');
308 fd
= open(path
, O_WRONLY
| O_CLOEXEC
);
311 log_errno("open(%s)", path
);
314 ret
= write_buf(fd
, buf
, buf_size
);
326 * Write idmap info in the list to the process
327 * user namespace, i.e. its /proc/<pid>/uid_map
328 * and /proc/<pid>/gid_map file.
335 write_pid_idmaps(pid_t pid
, list_t
*head
)
337 char *buf_uids
, *buf_gids
;
338 char *curr_bufu
, *curr_bufg
;
339 /* max 4k to be allowed for each map */
340 int size_buf_uids
= 4096, size_buf_gids
= 4096;
341 struct idmap_entry
*entry
;
342 int uid_filled
, gid_filled
;
344 int has_uids
= 0, has_gids
= 0;
347 buf_uids
= malloc(size_buf_uids
);
350 buf_gids
= malloc(size_buf_gids
);
355 curr_bufu
= buf_uids
;
356 curr_bufg
= buf_gids
;
358 for (entry
= list_head(head
); entry
; entry
= list_next(head
, entry
)) {
359 if (entry
->type
== TYPE_UID
|| entry
->type
== TYPE_BOTH
) {
360 uid_filled
= snprintf(curr_bufu
, size_buf_uids
,
361 "%u %u %u\n", entry
->first
, entry
->lower_first
,
363 if (uid_filled
<= 0 || uid_filled
>= size_buf_uids
) {
367 curr_bufu
+= uid_filled
;
368 size_buf_uids
-= uid_filled
;
371 if (entry
->type
== TYPE_GID
|| entry
->type
== TYPE_BOTH
) {
372 gid_filled
= snprintf(curr_bufg
, size_buf_gids
,
373 "%u %u %u\n", entry
->first
, entry
->lower_first
,
375 if (gid_filled
<= 0 || gid_filled
>= size_buf_gids
) {
379 curr_bufg
+= gid_filled
;
380 size_buf_gids
-= gid_filled
;
385 buf_size
= curr_bufu
- buf_uids
;
386 ret
= write_idmap(pid
, buf_uids
, buf_size
, TYPE_UID
);
391 buf_size
= curr_bufg
- buf_gids
;
392 ret
= write_idmap(pid
, buf_gids
, buf_size
, TYPE_GID
);
402 * Wait for the child process to exit
406 * process exit code if available
409 wait_for_pid(pid_t pid
)
415 ret
= waitpid(pid
, &status
, 0);
419 return (EXIT_FAILURE
);
423 if (!WIFEXITED(status
))
424 return (EXIT_FAILURE
);
425 return (WEXITSTATUS(status
));
429 * Get the file descriptor of the process user namespace
437 userns_fd_from_pid(pid_t pid
)
442 (void) snprintf(path
, sizeof (path
), "/proc/%d/ns/user", pid
);
443 fd
= open(path
, O_RDONLY
| O_CLOEXEC
);
445 log_errno("open(%s)", path
);
450 * Get the user namespace file descriptor given a list
458 userns_fd_from_idmap(list_t
*head
)
466 /* socketpair for bidirectional communication */
467 ret
= socketpair(AF_LOCAL
, SOCK_STREAM
| SOCK_CLOEXEC
, 0, fds
);
469 log_errno("socketpair");
482 ret
= unshare(CLONE_NEWUSER
);
484 /* notify the parent of success */
485 ret
= write_buf(fds
[1], "1", 1);
490 * Until the parent has written to idmap,
491 * we cannot exit, otherwise the defunct
492 * process is owned by the real root, writing
493 * to its idmap ends up with EPERM in the
494 * context of a user ns
496 ret
= read_buf(fds
[1], &c
, 1);
502 log_errno("unshare");
503 ret
= write_buf(fds
[1], "0", 1);
511 ret
= read_buf(fds
[0], &c
, 1);
512 if (ret
== 1 && c
== '1') {
513 ret
= write_pid_idmaps(pid
, head
);
515 fd
= userns_fd_from_pid(pid
);
521 /* Let child know it can exit */
522 (void) write_buf(fds
[0], "1", 1);
526 (void) wait_for_pid(pid
);
534 * Check if the operating system supports idmapped mount on the
539 * false if not supported
542 is_idmap_supported(char *path
)
546 int tree_fd
= -EBADF
, path_fd
= -EBADF
;
547 mount_attr_t attr
= {
548 .attr_set
= MOUNT_ATTR_IDMAP
,
552 /* strtok_r() won't be happy with a const string */
553 /* To check if idmapped mount can be done in a user ns, map 0 to 0 */
554 char *input
= strdup("b:0:0:1");
562 list_create(&head
, sizeof (struct idmap_entry
),
563 offsetof(struct idmap_entry
, node
));
564 ret
= parse_idmap_entry(&head
, input
);
567 log_errno("parse_idmap_entry(%s)", input
);
570 ret
= userns_fd_from_idmap(&head
);
573 attr
.userns_fd
= ret
;
574 ret
= openat(-EBADF
, path
, O_DIRECTORY
| O_CLOEXEC
);
576 log_errno("openat(%s)", path
);
580 ret
= sys_open_tree(path_fd
, "", AT_EMPTY_PATH
| AT_NO_AUTOMOUNT
|
581 AT_SYMLINK_NOFOLLOW
| OPEN_TREE_CLOEXEC
| OPEN_TREE_CLONE
);
583 log_errno("sys_open_tree");
587 ret
= sys_mount_setattr(tree_fd
, "", AT_EMPTY_PATH
, &attr
,
590 log_errno("sys_mount_setattr");
593 close(attr
.userns_fd
);
606 * Check if the given path is a mount point or not.
613 is_mountpoint(char *path
)
616 struct stat st_me
, st_parent
;
619 parent
= malloc(strlen(path
)+4);
625 strcat(strcpy(parent
, path
), "/..");
626 if (lstat(path
, &st_me
) != 0 ||
627 lstat(parent
, &st_parent
) != 0)
630 if (st_me
.st_dev
!= st_parent
.st_dev
||
631 st_me
.st_ino
== st_parent
.st_ino
)
640 * Remount the source on the new target folder with the given
641 * list of idmap info. If target is NULL, the source will be
642 * unmounted and then remounted if it is a mountpoint, otherwise
643 * no unmount is done, the source is simply idmap remounted.
650 do_idmap_mount(list_t
*idmap
, char *source
, char *target
, int flags
)
653 int tree_fd
= -EBADF
, source_fd
= -EBADF
;
654 mount_attr_t attr
= {
655 .attr_set
= MOUNT_ATTR_IDMAP
,
659 ret
= userns_fd_from_idmap(idmap
);
662 attr
.userns_fd
= ret
;
663 ret
= openat(-EBADF
, source
, O_DIRECTORY
| O_CLOEXEC
);
666 log_errno("openat(%s)", source
);
670 ret
= sys_open_tree(source_fd
, "", AT_EMPTY_PATH
| AT_NO_AUTOMOUNT
|
671 AT_SYMLINK_NOFOLLOW
| OPEN_TREE_CLOEXEC
| OPEN_TREE_CLONE
| flags
);
674 log_errno("sys_open_tree");
678 ret
= sys_mount_setattr(tree_fd
, "", AT_EMPTY_PATH
| flags
, &attr
,
682 log_errno("sys_mount_setattr");
685 if (target
== NULL
&& is_mountpoint(source
)) {
686 ret
= umount2(source
, MNT_DETACH
);
689 log_errno("umount2(%s)", source
);
693 ret
= sys_move_mount(tree_fd
, "", -EBADF
, target
== NULL
?
694 source
: target
, MOVE_MOUNT_F_EMPTY_PATH
);
697 log_errno("sys_move_mount(%s)", target
== NULL
?
701 close(attr
.userns_fd
);
711 print_usage(char *argv
[])
713 fprintf(stderr
, "Usage: %s [-r] [-c] [-m <idmap1>] [-m <idmap2>]" \
714 " ... [<source>] [<target>]\n", argv
[0]);
715 fprintf(stderr
, "\n");
716 fprintf(stderr
, " -r Recursively do idmapped mount.\n");
717 fprintf(stderr
, "\n");
718 fprintf(stderr
, " -c Checks if idmapped mount is supported " \
719 "on the <source> by the operating system or not.\n");
720 fprintf(stderr
, "\n");
721 fprintf(stderr
, " -m <idmap> to specify the idmap info, " \
722 "in the following format:\n");
723 fprintf(stderr
, " <id_type>:<nsid_first>:<hostid_first>:<count>\n");
724 fprintf(stderr
, "\n");
725 fprintf(stderr
, " <id_type> can be either of 'b', 'u', and 'g'.\n");
726 fprintf(stderr
, "\n");
727 fprintf(stderr
, "The <source> folder will be mounted at <target> " \
728 "with the provided idmap information.\nIf no <target> is " \
729 "specified, and <source> is a mount point, " \
730 "then <source> will be unmounted and then remounted.\n");
734 main(int argc
, char *argv
[])
738 int check_supported
= 0;
739 int ret
= EXIT_SUCCESS
;
740 char *source
= NULL
, *target
= NULL
;
743 list_create(&idmap_head
, sizeof (struct idmap_entry
),
744 offsetof(struct idmap_entry
, node
));
746 while ((opt
= getopt(argc
, argv
, "rcm:")) != -1) {
749 flags
|= AT_RECURSIVE
;
755 ret
= parse_idmap_entry(&idmap_head
, optarg
);
758 log_errno("parse_idmap_entry(%s)", optarg
);
769 if (check_supported
== 0 && list_is_empty(&idmap_head
)) {
775 if (optind
>= argc
) {
776 fprintf(stderr
, "Expected to have <source>, <target>.\n");
782 source
= argv
[optind
];
783 if (optind
< (argc
- 1)) {
784 target
= argv
[optind
+ 1];
787 if (check_supported
) {
788 free_idmap(&idmap_head
);
789 list_destroy(&idmap_head
);
790 if (is_idmap_supported(source
)) {
791 printf("idmapped mount is supported on [%s].\n",
793 return (EXIT_SUCCESS
);
795 printf("idmapped mount is NOT supported.\n");
796 return (EXIT_FAILURE
);
800 ret
= do_idmap_mount(&idmap_head
, source
, target
, flags
);
804 free_idmap(&idmap_head
);
805 list_destroy(&idmap_head
);