2 * Copyright (c) 2021 Klara Systems, Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <sys/types.h>
28 #include <sys/sysmacros.h>
30 #include <linux/file.h>
31 #include <linux/magic.h>
33 #include <sys/string.h>
35 #if defined(CONFIG_USER_NS)
36 #include <linux/statfs.h>
37 #include <linux/proc_ns.h>
40 #include <sys/mutex.h>
42 static kmutex_t zone_datasets_lock
;
43 static struct list_head zone_datasets
;
45 typedef struct zone_datasets
{
46 struct list_head zds_list
; /* zone_datasets linkage */
47 struct user_namespace
*zds_userns
; /* namespace reference */
48 struct list_head zds_datasets
; /* datasets for the namespace */
51 typedef struct zone_dataset
{
52 struct list_head zd_list
; /* zone_dataset linkage */
53 size_t zd_dsnamelen
; /* length of name */
54 char zd_dsname
[]; /* name of the member dataset */
61 * - EBADF if it cannot open the provided file descriptor
62 * - ENOTTY if the file itself is a not a user namespace file. We want to
63 * intercept this error in the ZFS layer. We cannot just return one of the
64 * ZFS_ERR_* errors here as we want to preserve the seperation of the ZFS
68 user_ns_get(int fd
, struct user_namespace
**userns
)
75 if ((nsfile
= fget(fd
)) == NULL
)
77 if (vfs_statfs(&nsfile
->f_path
, &st
) != 0) {
81 if (st
.f_type
!= NSFS_MAGIC
) {
85 ns
= get_proc_ns(file_inode(nsfile
));
86 if (ns
->ops
->type
!= CLONE_NEWUSER
) {
90 *userns
= container_of(ns
, struct user_namespace
, ns
);
98 #endif /* CONFIG_USER_NS */
101 user_ns_zoneid(struct user_namespace
*user_ns
)
105 r
= user_ns
->ns
.inum
;
110 static struct zone_datasets
*
111 zone_datasets_lookup(unsigned int nsinum
)
113 zone_datasets_t
*zds
;
115 list_for_each_entry(zds
, &zone_datasets
, zds_list
) {
116 if (user_ns_zoneid(zds
->zds_userns
) == nsinum
)
122 #ifdef CONFIG_USER_NS
123 static struct zone_dataset
*
124 zone_dataset_lookup(zone_datasets_t
*zds
, const char *dataset
, size_t dsnamelen
)
128 list_for_each_entry(zd
, &zds
->zds_datasets
, zd_list
) {
129 if (zd
->zd_dsnamelen
!= dsnamelen
)
131 if (strncmp(zd
->zd_dsname
, dataset
, dsnamelen
) == 0)
139 zone_dataset_cred_check(cred_t
*cred
)
142 if (!uid_eq(cred
->uid
, GLOBAL_ROOT_UID
))
147 #endif /* CONFIG_USER_NS */
150 zone_dataset_name_check(const char *dataset
, size_t *dsnamelen
)
153 if (dataset
[0] == '\0' || dataset
[0] == '/')
156 *dsnamelen
= strlen(dataset
);
157 /* Ignore trailing slash, if supplied. */
158 if (dataset
[*dsnamelen
- 1] == '/')
165 zone_dataset_attach(cred_t
*cred
, const char *dataset
, int userns_fd
)
167 #ifdef CONFIG_USER_NS
168 struct user_namespace
*userns
;
169 zone_datasets_t
*zds
;
174 if ((error
= zone_dataset_cred_check(cred
)) != 0)
176 if ((error
= zone_dataset_name_check(dataset
, &dsnamelen
)) != 0)
178 if ((error
= user_ns_get(userns_fd
, &userns
)) != 0)
181 mutex_enter(&zone_datasets_lock
);
182 zds
= zone_datasets_lookup(user_ns_zoneid(userns
));
184 zds
= kmem_alloc(sizeof (zone_datasets_t
), KM_SLEEP
);
185 INIT_LIST_HEAD(&zds
->zds_list
);
186 INIT_LIST_HEAD(&zds
->zds_datasets
);
187 zds
->zds_userns
= userns
;
189 * Lock the namespace by incresing its refcount to prevent
190 * the namespace ID from being reused.
193 list_add_tail(&zds
->zds_list
, &zone_datasets
);
195 zd
= zone_dataset_lookup(zds
, dataset
, dsnamelen
);
197 mutex_exit(&zone_datasets_lock
);
202 zd
= kmem_alloc(sizeof (zone_dataset_t
) + dsnamelen
+ 1, KM_SLEEP
);
203 zd
->zd_dsnamelen
= dsnamelen
;
204 strlcpy(zd
->zd_dsname
, dataset
, dsnamelen
+ 1);
205 INIT_LIST_HEAD(&zd
->zd_list
);
206 list_add_tail(&zd
->zd_list
, &zds
->zds_datasets
);
208 mutex_exit(&zone_datasets_lock
);
212 #endif /* CONFIG_USER_NS */
214 EXPORT_SYMBOL(zone_dataset_attach
);
217 zone_dataset_detach(cred_t
*cred
, const char *dataset
, int userns_fd
)
219 #ifdef CONFIG_USER_NS
220 struct user_namespace
*userns
;
221 zone_datasets_t
*zds
;
226 if ((error
= zone_dataset_cred_check(cred
)) != 0)
228 if ((error
= zone_dataset_name_check(dataset
, &dsnamelen
)) != 0)
230 if ((error
= user_ns_get(userns_fd
, &userns
)) != 0)
233 mutex_enter(&zone_datasets_lock
);
234 zds
= zone_datasets_lookup(user_ns_zoneid(userns
));
236 zd
= zone_dataset_lookup(zds
, dataset
, dsnamelen
);
237 if (zds
== NULL
|| zd
== NULL
) {
238 mutex_exit(&zone_datasets_lock
);
242 list_del(&zd
->zd_list
);
243 kmem_free(zd
, sizeof (*zd
) + zd
->zd_dsnamelen
+ 1);
245 /* Prune the namespace entry if it has no more delegations. */
246 if (list_empty(&zds
->zds_datasets
)) {
248 * Decrease the refcount now that the namespace is no longer
249 * used. It is no longer necessary to prevent the namespace ID
253 list_del(&zds
->zds_list
);
254 kmem_free(zds
, sizeof (*zds
));
257 mutex_exit(&zone_datasets_lock
);
261 #endif /* CONFIG_USER_NS */
263 EXPORT_SYMBOL(zone_dataset_detach
);
266 * A dataset is visible if:
267 * - It is a parent of a namespace entry.
268 * - It is one of the namespace entries.
269 * - It is a child of a namespace entry.
271 * A dataset is writable if:
272 * - It is one of the namespace entries.
273 * - It is a child of a namespace entry.
275 * The parent datasets of namespace entries are visible and
276 * read-only to provide a path back to the root of the pool.
279 zone_dataset_visible(const char *dataset
, int *write
)
281 zone_datasets_t
*zds
;
283 size_t dsnamelen
, zd_len
;
286 /* Default to read-only, in case visible is returned. */
289 if (zone_dataset_name_check(dataset
, &dsnamelen
) != 0)
291 if (INGLOBALZONE(curproc
)) {
297 mutex_enter(&zone_datasets_lock
);
298 zds
= zone_datasets_lookup(crgetzoneid(curproc
->cred
));
300 mutex_exit(&zone_datasets_lock
);
305 list_for_each_entry(zd
, &zds
->zds_datasets
, zd_list
) {
306 zd_len
= strlen(zd
->zd_dsname
);
307 if (zd_len
> dsnamelen
) {
309 * The name of the namespace entry is longer than that
310 * of the dataset, so it could be that the dataset is a
311 * parent of the namespace entry.
313 visible
= memcmp(zd
->zd_dsname
, dataset
,
315 zd
->zd_dsname
[dsnamelen
] == '/';
318 } else if (zd_len
== dsnamelen
) {
320 * The name of the namespace entry is as long as that
321 * of the dataset, so perhaps the dataset itself is the
324 visible
= memcmp(zd
->zd_dsname
, dataset
, zd_len
) == 0;
332 * The name of the namespace entry is shorter than that
333 * of the dataset, so perhaps the dataset is a child of
334 * the namespace entry.
336 visible
= memcmp(zd
->zd_dsname
, dataset
,
337 zd_len
) == 0 && dataset
[zd_len
] == '/';
346 mutex_exit(&zone_datasets_lock
);
349 EXPORT_SYMBOL(zone_dataset_visible
);
356 #if defined(CONFIG_USER_NS)
357 z
= user_ns_zoneid(&init_user_ns
);
362 EXPORT_SYMBOL(global_zoneid
);
365 crgetzoneid(const cred_t
*cr
)
369 #if defined(CONFIG_USER_NS)
370 r
= user_ns_zoneid(cr
->user_ns
);
375 EXPORT_SYMBOL(crgetzoneid
);
378 inglobalzone(proc_t
*proc
)
380 #if defined(CONFIG_USER_NS)
381 return (proc
->cred
->user_ns
== &init_user_ns
);
386 EXPORT_SYMBOL(inglobalzone
);
391 mutex_init(&zone_datasets_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
392 INIT_LIST_HEAD(&zone_datasets
);
399 zone_datasets_t
*zds
;
403 * It would be better to assert an empty zone_datasets, but since
404 * there's no automatic mechanism for cleaning them up if the user
405 * namespace is destroyed, just do it here, since spl is about to go
408 while (!list_empty(&zone_datasets
)) {
409 zds
= list_entry(zone_datasets
.next
, zone_datasets_t
, zds_list
);
410 while (!list_empty(&zds
->zds_datasets
)) {
411 zd
= list_entry(zds
->zds_datasets
.next
,
412 zone_dataset_t
, zd_list
);
413 list_del(&zd
->zd_list
);
414 kmem_free(zd
, sizeof (*zd
) + zd
->zd_dsnamelen
+ 1);
416 put_user_ns(zds
->zds_userns
);
417 list_del(&zds
->zds_list
);
418 kmem_free(zds
, sizeof (*zds
));
420 mutex_destroy(&zone_datasets_lock
);