1 // SPDX-License-Identifier: GPL-2.0
3 * linux/ipc/namespace.c
4 * Copyright (C) 2006 Pavel Emelyanov <xemul@openvz.org> OpenVZ, SWsoft Inc.
9 #include <linux/ipc_namespace.h>
10 #include <linux/rcupdate.h>
11 #include <linux/nsproxy.h>
12 #include <linux/slab.h>
13 #include <linux/cred.h>
15 #include <linux/mount.h>
16 #include <linux/user_namespace.h>
17 #include <linux/proc_ns.h>
18 #include <linux/sched/task.h>
23 * The work queue is used to avoid the cost of synchronize_rcu in kern_unmount.
25 static void free_ipc(struct work_struct
*unused
);
26 static DECLARE_WORK(free_ipc_work
, free_ipc
);
28 static struct ucounts
*inc_ipc_namespaces(struct user_namespace
*ns
)
30 return inc_ucount(ns
, current_euid(), UCOUNT_IPC_NAMESPACES
);
33 static void dec_ipc_namespaces(struct ucounts
*ucounts
)
35 dec_ucount(ucounts
, UCOUNT_IPC_NAMESPACES
);
38 static struct ipc_namespace
*create_ipc_ns(struct user_namespace
*user_ns
,
39 struct ipc_namespace
*old_ns
)
41 struct ipc_namespace
*ns
;
42 struct ucounts
*ucounts
;
47 ucounts
= inc_ipc_namespaces(user_ns
);
50 * IPC namespaces are freed asynchronously, by free_ipc_work.
51 * If frees were pending, flush_work will wait, and
52 * return true. Fail the allocation if no frees are pending.
54 if (flush_work(&free_ipc_work
))
60 ns
= kzalloc(sizeof(struct ipc_namespace
), GFP_KERNEL_ACCOUNT
);
64 err
= ns_alloc_inum(&ns
->ns
);
67 ns
->ns
.ops
= &ipcns_operations
;
69 refcount_set(&ns
->ns
.count
, 1);
70 ns
->user_ns
= get_user_ns(user_ns
);
71 ns
->ucounts
= ucounts
;
78 if (!setup_mq_sysctls(ns
))
81 if (!setup_ipc_sysctls(ns
))
84 err
= msg_init_ns(ns
);
94 retire_mq_sysctls(ns
);
97 put_user_ns(ns
->user_ns
);
98 ns_free_inum(&ns
->ns
);
102 dec_ipc_namespaces(ucounts
);
107 struct ipc_namespace
*copy_ipcs(unsigned long flags
,
108 struct user_namespace
*user_ns
, struct ipc_namespace
*ns
)
110 if (!(flags
& CLONE_NEWIPC
))
111 return get_ipc_ns(ns
);
112 return create_ipc_ns(user_ns
, ns
);
116 * free_ipcs - free all ipcs of one type
117 * @ns: the namespace to remove the ipcs from
118 * @ids: the table of ipcs to free
119 * @free: the function called to free each individual ipc
121 * Called for each kind of ipc when an ipc_namespace exits.
123 void free_ipcs(struct ipc_namespace
*ns
, struct ipc_ids
*ids
,
124 void (*free
)(struct ipc_namespace
*, struct kern_ipc_perm
*))
126 struct kern_ipc_perm
*perm
;
130 down_write(&ids
->rwsem
);
132 in_use
= ids
->in_use
;
134 for (total
= 0, next_id
= 0; total
< in_use
; next_id
++) {
135 perm
= idr_find(&ids
->ipcs_idr
, next_id
);
139 ipc_lock_object(perm
);
143 up_write(&ids
->rwsem
);
146 static void free_ipc_ns(struct ipc_namespace
*ns
)
149 * Caller needs to wait for an RCU grace period to have passed
150 * after making the mount point inaccessible to new accesses.
157 retire_mq_sysctls(ns
);
158 retire_ipc_sysctls(ns
);
160 dec_ipc_namespaces(ns
->ucounts
);
161 put_user_ns(ns
->user_ns
);
162 ns_free_inum(&ns
->ns
);
166 static LLIST_HEAD(free_ipc_list
);
167 static void free_ipc(struct work_struct
*unused
)
169 struct llist_node
*node
= llist_del_all(&free_ipc_list
);
170 struct ipc_namespace
*n
, *t
;
172 llist_for_each_entry_safe(n
, t
, node
, mnt_llist
)
173 mnt_make_shortterm(n
->mq_mnt
);
175 /* Wait for any last users to have gone away. */
178 llist_for_each_entry_safe(n
, t
, node
, mnt_llist
)
183 * put_ipc_ns - drop a reference to an ipc namespace.
184 * @ns: the namespace to put
186 * If this is the last task in the namespace exiting, and
187 * it is dropping the refcount to 0, then it can race with
188 * a task in another ipc namespace but in a mounts namespace
189 * which has this ipcns's mqueuefs mounted, doing some action
190 * with one of the mqueuefs files. That can raise the refcount.
191 * So dropping the refcount, and raising the refcount when
192 * accessing it through the VFS, are protected with mq_lock.
194 * (Clearly, a task raising the refcount on its own ipc_ns
195 * needn't take mq_lock since it can't race with the last task
196 * in the ipcns exiting).
198 void put_ipc_ns(struct ipc_namespace
*ns
)
200 if (refcount_dec_and_lock(&ns
->ns
.count
, &mq_lock
)) {
202 spin_unlock(&mq_lock
);
204 if (llist_add(&ns
->mnt_llist
, &free_ipc_list
))
205 schedule_work(&free_ipc_work
);
209 static inline struct ipc_namespace
*to_ipc_ns(struct ns_common
*ns
)
211 return container_of(ns
, struct ipc_namespace
, ns
);
214 static struct ns_common
*ipcns_get(struct task_struct
*task
)
216 struct ipc_namespace
*ns
= NULL
;
217 struct nsproxy
*nsproxy
;
220 nsproxy
= task
->nsproxy
;
222 ns
= get_ipc_ns(nsproxy
->ipc_ns
);
225 return ns
? &ns
->ns
: NULL
;
228 static void ipcns_put(struct ns_common
*ns
)
230 return put_ipc_ns(to_ipc_ns(ns
));
233 static int ipcns_install(struct nsset
*nsset
, struct ns_common
*new)
235 struct nsproxy
*nsproxy
= nsset
->nsproxy
;
236 struct ipc_namespace
*ns
= to_ipc_ns(new);
237 if (!ns_capable(ns
->user_ns
, CAP_SYS_ADMIN
) ||
238 !ns_capable(nsset
->cred
->user_ns
, CAP_SYS_ADMIN
))
241 put_ipc_ns(nsproxy
->ipc_ns
);
242 nsproxy
->ipc_ns
= get_ipc_ns(ns
);
246 static struct user_namespace
*ipcns_owner(struct ns_common
*ns
)
248 return to_ipc_ns(ns
)->user_ns
;
251 const struct proc_ns_operations ipcns_operations
= {
253 .type
= CLONE_NEWIPC
,
256 .install
= ipcns_install
,
257 .owner
= ipcns_owner
,