1 // SPDX-License-Identifier: GPL-2.0
3 * linux/ipc/namespace.c
4 * Copyright (C) 2006 Pavel Emelyanov <xemul@openvz.org> OpenVZ, SWsoft Inc.
9 #include <linux/ipc_namespace.h>
10 #include <linux/rcupdate.h>
11 #include <linux/nsproxy.h>
12 #include <linux/slab.h>
13 #include <linux/cred.h>
15 #include <linux/mount.h>
16 #include <linux/user_namespace.h>
17 #include <linux/proc_ns.h>
18 #include <linux/sched/task.h>
23 * The work queue is used to avoid the cost of synchronize_rcu in kern_unmount.
25 static void free_ipc(struct work_struct
*unused
);
26 static DECLARE_WORK(free_ipc_work
, free_ipc
);
28 static struct ucounts
*inc_ipc_namespaces(struct user_namespace
*ns
)
30 return inc_ucount(ns
, current_euid(), UCOUNT_IPC_NAMESPACES
);
33 static void dec_ipc_namespaces(struct ucounts
*ucounts
)
35 dec_ucount(ucounts
, UCOUNT_IPC_NAMESPACES
);
38 static struct ipc_namespace
*create_ipc_ns(struct user_namespace
*user_ns
,
39 struct ipc_namespace
*old_ns
)
41 struct ipc_namespace
*ns
;
42 struct ucounts
*ucounts
;
47 ucounts
= inc_ipc_namespaces(user_ns
);
50 * IPC namespaces are freed asynchronously, by free_ipc_work.
51 * If frees were pending, flush_work will wait, and
52 * return true. Fail the allocation if no frees are pending.
54 if (flush_work(&free_ipc_work
))
60 ns
= kzalloc(sizeof(struct ipc_namespace
), GFP_KERNEL_ACCOUNT
);
64 err
= ns_alloc_inum(&ns
->ns
);
67 ns
->ns
.ops
= &ipcns_operations
;
69 refcount_set(&ns
->ns
.count
, 1);
70 ns
->user_ns
= get_user_ns(user_ns
);
71 ns
->ucounts
= ucounts
;
78 if (!setup_mq_sysctls(ns
))
81 if (!setup_ipc_sysctls(ns
))
84 err
= msg_init_ns(ns
);
94 retire_ipc_sysctls(ns
);
96 retire_mq_sysctls(ns
);
99 put_user_ns(ns
->user_ns
);
100 ns_free_inum(&ns
->ns
);
104 dec_ipc_namespaces(ucounts
);
109 struct ipc_namespace
*copy_ipcs(unsigned long flags
,
110 struct user_namespace
*user_ns
, struct ipc_namespace
*ns
)
112 if (!(flags
& CLONE_NEWIPC
))
113 return get_ipc_ns(ns
);
114 return create_ipc_ns(user_ns
, ns
);
118 * free_ipcs - free all ipcs of one type
119 * @ns: the namespace to remove the ipcs from
120 * @ids: the table of ipcs to free
121 * @free: the function called to free each individual ipc
123 * Called for each kind of ipc when an ipc_namespace exits.
125 void free_ipcs(struct ipc_namespace
*ns
, struct ipc_ids
*ids
,
126 void (*free
)(struct ipc_namespace
*, struct kern_ipc_perm
*))
128 struct kern_ipc_perm
*perm
;
132 down_write(&ids
->rwsem
);
134 in_use
= ids
->in_use
;
136 for (total
= 0, next_id
= 0; total
< in_use
; next_id
++) {
137 perm
= idr_find(&ids
->ipcs_idr
, next_id
);
141 ipc_lock_object(perm
);
145 up_write(&ids
->rwsem
);
148 static void free_ipc_ns(struct ipc_namespace
*ns
)
151 * Caller needs to wait for an RCU grace period to have passed
152 * after making the mount point inaccessible to new accesses.
159 retire_mq_sysctls(ns
);
160 retire_ipc_sysctls(ns
);
162 dec_ipc_namespaces(ns
->ucounts
);
163 put_user_ns(ns
->user_ns
);
164 ns_free_inum(&ns
->ns
);
168 static LLIST_HEAD(free_ipc_list
);
169 static void free_ipc(struct work_struct
*unused
)
171 struct llist_node
*node
= llist_del_all(&free_ipc_list
);
172 struct ipc_namespace
*n
, *t
;
174 llist_for_each_entry_safe(n
, t
, node
, mnt_llist
)
175 mnt_make_shortterm(n
->mq_mnt
);
177 /* Wait for any last users to have gone away. */
180 llist_for_each_entry_safe(n
, t
, node
, mnt_llist
)
185 * put_ipc_ns - drop a reference to an ipc namespace.
186 * @ns: the namespace to put
188 * If this is the last task in the namespace exiting, and
189 * it is dropping the refcount to 0, then it can race with
190 * a task in another ipc namespace but in a mounts namespace
191 * which has this ipcns's mqueuefs mounted, doing some action
192 * with one of the mqueuefs files. That can raise the refcount.
193 * So dropping the refcount, and raising the refcount when
194 * accessing it through the VFS, are protected with mq_lock.
196 * (Clearly, a task raising the refcount on its own ipc_ns
197 * needn't take mq_lock since it can't race with the last task
198 * in the ipcns exiting).
200 void put_ipc_ns(struct ipc_namespace
*ns
)
202 if (refcount_dec_and_lock(&ns
->ns
.count
, &mq_lock
)) {
204 spin_unlock(&mq_lock
);
206 if (llist_add(&ns
->mnt_llist
, &free_ipc_list
))
207 schedule_work(&free_ipc_work
);
211 static inline struct ipc_namespace
*to_ipc_ns(struct ns_common
*ns
)
213 return container_of(ns
, struct ipc_namespace
, ns
);
216 static struct ns_common
*ipcns_get(struct task_struct
*task
)
218 struct ipc_namespace
*ns
= NULL
;
219 struct nsproxy
*nsproxy
;
222 nsproxy
= task
->nsproxy
;
224 ns
= get_ipc_ns(nsproxy
->ipc_ns
);
227 return ns
? &ns
->ns
: NULL
;
230 static void ipcns_put(struct ns_common
*ns
)
232 return put_ipc_ns(to_ipc_ns(ns
));
235 static int ipcns_install(struct nsset
*nsset
, struct ns_common
*new)
237 struct nsproxy
*nsproxy
= nsset
->nsproxy
;
238 struct ipc_namespace
*ns
= to_ipc_ns(new);
239 if (!ns_capable(ns
->user_ns
, CAP_SYS_ADMIN
) ||
240 !ns_capable(nsset
->cred
->user_ns
, CAP_SYS_ADMIN
))
243 put_ipc_ns(nsproxy
->ipc_ns
);
244 nsproxy
->ipc_ns
= get_ipc_ns(ns
);
248 static struct user_namespace
*ipcns_owner(struct ns_common
*ns
)
250 return to_ipc_ns(ns
)->user_ns
;
253 const struct proc_ns_operations ipcns_operations
= {
255 .type
= CLONE_NEWIPC
,
258 .install
= ipcns_install
,
259 .owner
= ipcns_owner
,