Linux 6.12 compat: META
[zfs.git] / module / os / linux / spl / spl-zone.c
blob58b5e0dc44b77c79b3a7048bf1289421e3b2151c
1 /*
2 * Copyright (c) 2021 Klara Systems, Inc.
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
27 #include <sys/types.h>
28 #include <sys/sysmacros.h>
29 #include <sys/kmem.h>
30 #include <linux/file.h>
31 #include <linux/magic.h>
32 #include <sys/zone.h>
33 #include <sys/string.h>
35 #if defined(CONFIG_USER_NS)
36 #include <linux/statfs.h>
37 #include <linux/proc_ns.h>
38 #endif
40 #include <sys/mutex.h>
42 static kmutex_t zone_datasets_lock;
43 static struct list_head zone_datasets;
45 typedef struct zone_datasets {
46 struct list_head zds_list; /* zone_datasets linkage */
47 struct user_namespace *zds_userns; /* namespace reference */
48 struct list_head zds_datasets; /* datasets for the namespace */
49 } zone_datasets_t;
51 typedef struct zone_dataset {
52 struct list_head zd_list; /* zone_dataset linkage */
53 size_t zd_dsnamelen; /* length of name */
54 char zd_dsname[]; /* name of the member dataset */
55 } zone_dataset_t;
57 #ifdef CONFIG_USER_NS
59 * Returns:
60 * - 0 on success
61 * - EBADF if it cannot open the provided file descriptor
62 * - ENOTTY if the file itself is a not a user namespace file. We want to
63 * intercept this error in the ZFS layer. We cannot just return one of the
64 * ZFS_ERR_* errors here as we want to preserve the seperation of the ZFS
65 * and the SPL layers.
67 static int
68 user_ns_get(int fd, struct user_namespace **userns)
70 struct kstatfs st;
71 struct file *nsfile;
72 struct ns_common *ns;
73 int error;
75 if ((nsfile = fget(fd)) == NULL)
76 return (EBADF);
77 if (vfs_statfs(&nsfile->f_path, &st) != 0) {
78 error = ENOTTY;
79 goto done;
81 if (st.f_type != NSFS_MAGIC) {
82 error = ENOTTY;
83 goto done;
85 ns = get_proc_ns(file_inode(nsfile));
86 if (ns->ops->type != CLONE_NEWUSER) {
87 error = ENOTTY;
88 goto done;
90 *userns = container_of(ns, struct user_namespace, ns);
92 error = 0;
93 done:
94 fput(nsfile);
96 return (error);
98 #endif /* CONFIG_USER_NS */
100 static unsigned int
101 user_ns_zoneid(struct user_namespace *user_ns)
103 unsigned int r;
105 r = user_ns->ns.inum;
107 return (r);
110 static struct zone_datasets *
111 zone_datasets_lookup(unsigned int nsinum)
113 zone_datasets_t *zds;
115 list_for_each_entry(zds, &zone_datasets, zds_list) {
116 if (user_ns_zoneid(zds->zds_userns) == nsinum)
117 return (zds);
119 return (NULL);
122 #ifdef CONFIG_USER_NS
123 static struct zone_dataset *
124 zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen)
126 zone_dataset_t *zd;
128 list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
129 if (zd->zd_dsnamelen != dsnamelen)
130 continue;
131 if (strncmp(zd->zd_dsname, dataset, dsnamelen) == 0)
132 return (zd);
135 return (NULL);
138 static int
139 zone_dataset_cred_check(cred_t *cred)
142 if (!uid_eq(cred->uid, GLOBAL_ROOT_UID))
143 return (EPERM);
145 return (0);
147 #endif /* CONFIG_USER_NS */
149 static int
150 zone_dataset_name_check(const char *dataset, size_t *dsnamelen)
153 if (dataset[0] == '\0' || dataset[0] == '/')
154 return (ENOENT);
156 *dsnamelen = strlen(dataset);
157 /* Ignore trailing slash, if supplied. */
158 if (dataset[*dsnamelen - 1] == '/')
159 (*dsnamelen)--;
161 return (0);
165 zone_dataset_attach(cred_t *cred, const char *dataset, int userns_fd)
167 #ifdef CONFIG_USER_NS
168 struct user_namespace *userns;
169 zone_datasets_t *zds;
170 zone_dataset_t *zd;
171 int error;
172 size_t dsnamelen;
174 if ((error = zone_dataset_cred_check(cred)) != 0)
175 return (error);
176 if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
177 return (error);
178 if ((error = user_ns_get(userns_fd, &userns)) != 0)
179 return (error);
181 mutex_enter(&zone_datasets_lock);
182 zds = zone_datasets_lookup(user_ns_zoneid(userns));
183 if (zds == NULL) {
184 zds = kmem_alloc(sizeof (zone_datasets_t), KM_SLEEP);
185 INIT_LIST_HEAD(&zds->zds_list);
186 INIT_LIST_HEAD(&zds->zds_datasets);
187 zds->zds_userns = userns;
189 * Lock the namespace by incresing its refcount to prevent
190 * the namespace ID from being reused.
192 get_user_ns(userns);
193 list_add_tail(&zds->zds_list, &zone_datasets);
194 } else {
195 zd = zone_dataset_lookup(zds, dataset, dsnamelen);
196 if (zd != NULL) {
197 mutex_exit(&zone_datasets_lock);
198 return (EEXIST);
202 zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP);
203 zd->zd_dsnamelen = dsnamelen;
204 strlcpy(zd->zd_dsname, dataset, dsnamelen + 1);
205 INIT_LIST_HEAD(&zd->zd_list);
206 list_add_tail(&zd->zd_list, &zds->zds_datasets);
208 mutex_exit(&zone_datasets_lock);
209 return (0);
210 #else
211 return (ENXIO);
212 #endif /* CONFIG_USER_NS */
214 EXPORT_SYMBOL(zone_dataset_attach);
217 zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd)
219 #ifdef CONFIG_USER_NS
220 struct user_namespace *userns;
221 zone_datasets_t *zds;
222 zone_dataset_t *zd;
223 int error;
224 size_t dsnamelen;
226 if ((error = zone_dataset_cred_check(cred)) != 0)
227 return (error);
228 if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
229 return (error);
230 if ((error = user_ns_get(userns_fd, &userns)) != 0)
231 return (error);
233 mutex_enter(&zone_datasets_lock);
234 zds = zone_datasets_lookup(user_ns_zoneid(userns));
235 if (zds != NULL)
236 zd = zone_dataset_lookup(zds, dataset, dsnamelen);
237 if (zds == NULL || zd == NULL) {
238 mutex_exit(&zone_datasets_lock);
239 return (ENOENT);
242 list_del(&zd->zd_list);
243 kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
245 /* Prune the namespace entry if it has no more delegations. */
246 if (list_empty(&zds->zds_datasets)) {
248 * Decrease the refcount now that the namespace is no longer
249 * used. It is no longer necessary to prevent the namespace ID
250 * from being reused.
252 put_user_ns(userns);
253 list_del(&zds->zds_list);
254 kmem_free(zds, sizeof (*zds));
257 mutex_exit(&zone_datasets_lock);
258 return (0);
259 #else
260 return (ENXIO);
261 #endif /* CONFIG_USER_NS */
263 EXPORT_SYMBOL(zone_dataset_detach);
266 * A dataset is visible if:
267 * - It is a parent of a namespace entry.
268 * - It is one of the namespace entries.
269 * - It is a child of a namespace entry.
271 * A dataset is writable if:
272 * - It is one of the namespace entries.
273 * - It is a child of a namespace entry.
275 * The parent datasets of namespace entries are visible and
276 * read-only to provide a path back to the root of the pool.
279 zone_dataset_visible(const char *dataset, int *write)
281 zone_datasets_t *zds;
282 zone_dataset_t *zd;
283 size_t dsnamelen, zd_len;
284 int visible;
286 /* Default to read-only, in case visible is returned. */
287 if (write != NULL)
288 *write = 0;
289 if (zone_dataset_name_check(dataset, &dsnamelen) != 0)
290 return (0);
291 if (INGLOBALZONE(curproc)) {
292 if (write != NULL)
293 *write = 1;
294 return (1);
297 mutex_enter(&zone_datasets_lock);
298 zds = zone_datasets_lookup(crgetzoneid(curproc->cred));
299 if (zds == NULL) {
300 mutex_exit(&zone_datasets_lock);
301 return (0);
304 visible = 0;
305 list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
306 zd_len = strlen(zd->zd_dsname);
307 if (zd_len > dsnamelen) {
309 * The name of the namespace entry is longer than that
310 * of the dataset, so it could be that the dataset is a
311 * parent of the namespace entry.
313 visible = memcmp(zd->zd_dsname, dataset,
314 dsnamelen) == 0 &&
315 zd->zd_dsname[dsnamelen] == '/';
316 if (visible)
317 break;
318 } else if (zd_len == dsnamelen) {
320 * The name of the namespace entry is as long as that
321 * of the dataset, so perhaps the dataset itself is the
322 * namespace entry.
324 visible = memcmp(zd->zd_dsname, dataset, zd_len) == 0;
325 if (visible) {
326 if (write != NULL)
327 *write = 1;
328 break;
330 } else {
332 * The name of the namespace entry is shorter than that
333 * of the dataset, so perhaps the dataset is a child of
334 * the namespace entry.
336 visible = memcmp(zd->zd_dsname, dataset,
337 zd_len) == 0 && dataset[zd_len] == '/';
338 if (visible) {
339 if (write != NULL)
340 *write = 1;
341 break;
346 mutex_exit(&zone_datasets_lock);
347 return (visible);
349 EXPORT_SYMBOL(zone_dataset_visible);
351 unsigned int
352 global_zoneid(void)
354 unsigned int z = 0;
356 #if defined(CONFIG_USER_NS)
357 z = user_ns_zoneid(&init_user_ns);
358 #endif
360 return (z);
362 EXPORT_SYMBOL(global_zoneid);
364 unsigned int
365 crgetzoneid(const cred_t *cr)
367 unsigned int r = 0;
369 #if defined(CONFIG_USER_NS)
370 r = user_ns_zoneid(cr->user_ns);
371 #endif
373 return (r);
375 EXPORT_SYMBOL(crgetzoneid);
377 boolean_t
378 inglobalzone(proc_t *proc)
380 #if defined(CONFIG_USER_NS)
381 return (proc->cred->user_ns == &init_user_ns);
382 #else
383 return (B_TRUE);
384 #endif
386 EXPORT_SYMBOL(inglobalzone);
389 spl_zone_init(void)
391 mutex_init(&zone_datasets_lock, NULL, MUTEX_DEFAULT, NULL);
392 INIT_LIST_HEAD(&zone_datasets);
393 return (0);
396 void
397 spl_zone_fini(void)
399 zone_datasets_t *zds;
400 zone_dataset_t *zd;
403 * It would be better to assert an empty zone_datasets, but since
404 * there's no automatic mechanism for cleaning them up if the user
405 * namespace is destroyed, just do it here, since spl is about to go
406 * out of context.
408 while (!list_empty(&zone_datasets)) {
409 zds = list_entry(zone_datasets.next, zone_datasets_t, zds_list);
410 while (!list_empty(&zds->zds_datasets)) {
411 zd = list_entry(zds->zds_datasets.next,
412 zone_dataset_t, zd_list);
413 list_del(&zd->zd_list);
414 kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
416 put_user_ns(zds->zds_userns);
417 list_del(&zds->zds_list);
418 kmem_free(zds, sizeof (*zds));
420 mutex_destroy(&zone_datasets_lock);