cpufreq/amd-pstate: Fix per-policy boost flag incorrect when fail
[pf-kernel.git] / io_uring / eventfd.c
blob100d5da94cb95c5e539df7e03f9d5e73c733db50
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/mm.h>
5 #include <linux/slab.h>
6 #include <linux/eventfd.h>
7 #include <linux/eventpoll.h>
8 #include <linux/io_uring.h>
9 #include <linux/io_uring_types.h>
11 #include "io-wq.h"
12 #include "eventfd.h"
14 struct io_ev_fd {
15 struct eventfd_ctx *cq_ev_fd;
16 unsigned int eventfd_async;
17 /* protected by ->completion_lock */
18 unsigned last_cq_tail;
19 refcount_t refs;
20 atomic_t ops;
21 struct rcu_head rcu;
24 enum {
25 IO_EVENTFD_OP_SIGNAL_BIT,
28 static void io_eventfd_free(struct rcu_head *rcu)
30 struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
32 eventfd_ctx_put(ev_fd->cq_ev_fd);
33 kfree(ev_fd);
36 static void io_eventfd_put(struct io_ev_fd *ev_fd)
38 if (refcount_dec_and_test(&ev_fd->refs))
39 call_rcu(&ev_fd->rcu, io_eventfd_free);
42 static void io_eventfd_do_signal(struct rcu_head *rcu)
44 struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
46 eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
47 io_eventfd_put(ev_fd);
50 static void io_eventfd_release(struct io_ev_fd *ev_fd, bool put_ref)
52 if (put_ref)
53 io_eventfd_put(ev_fd);
54 rcu_read_unlock();
58 * Returns true if the caller should put the ev_fd reference, false if not.
60 static bool __io_eventfd_signal(struct io_ev_fd *ev_fd)
62 if (eventfd_signal_allowed()) {
63 eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
64 return true;
66 if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) {
67 call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal);
68 return false;
70 return true;
74 * Trigger if eventfd_async isn't set, or if it's set and the caller is
75 * an async worker. If ev_fd isn't valid, obviously return false.
77 static bool io_eventfd_trigger(struct io_ev_fd *ev_fd)
79 if (ev_fd)
80 return !ev_fd->eventfd_async || io_wq_current_is_worker();
81 return false;
85 * On success, returns with an ev_fd reference grabbed and the RCU read
86 * lock held.
88 static struct io_ev_fd *io_eventfd_grab(struct io_ring_ctx *ctx)
90 struct io_ev_fd *ev_fd;
92 if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
93 return NULL;
95 rcu_read_lock();
98 * rcu_dereference ctx->io_ev_fd once and use it for both for checking
99 * and eventfd_signal
101 ev_fd = rcu_dereference(ctx->io_ev_fd);
104 * Check again if ev_fd exists in case an io_eventfd_unregister call
105 * completed between the NULL check of ctx->io_ev_fd at the start of
106 * the function and rcu_read_lock.
108 if (io_eventfd_trigger(ev_fd) && refcount_inc_not_zero(&ev_fd->refs))
109 return ev_fd;
111 rcu_read_unlock();
112 return NULL;
115 void io_eventfd_signal(struct io_ring_ctx *ctx)
117 struct io_ev_fd *ev_fd;
119 ev_fd = io_eventfd_grab(ctx);
120 if (ev_fd)
121 io_eventfd_release(ev_fd, __io_eventfd_signal(ev_fd));
124 void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
126 struct io_ev_fd *ev_fd;
128 ev_fd = io_eventfd_grab(ctx);
129 if (ev_fd) {
130 bool skip, put_ref = true;
133 * Eventfd should only get triggered when at least one event
134 * has been posted. Some applications rely on the eventfd
135 * notification count only changing IFF a new CQE has been
136 * added to the CQ ring. There's no dependency on 1:1
137 * relationship between how many times this function is called
138 * (and hence the eventfd count) and number of CQEs posted to
139 * the CQ ring.
141 spin_lock(&ctx->completion_lock);
142 skip = ctx->cached_cq_tail == ev_fd->last_cq_tail;
143 ev_fd->last_cq_tail = ctx->cached_cq_tail;
144 spin_unlock(&ctx->completion_lock);
146 if (!skip)
147 put_ref = __io_eventfd_signal(ev_fd);
149 io_eventfd_release(ev_fd, put_ref);
153 int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
154 unsigned int eventfd_async)
156 struct io_ev_fd *ev_fd;
157 __s32 __user *fds = arg;
158 int fd;
160 ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
161 lockdep_is_held(&ctx->uring_lock));
162 if (ev_fd)
163 return -EBUSY;
165 if (copy_from_user(&fd, fds, sizeof(*fds)))
166 return -EFAULT;
168 ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL);
169 if (!ev_fd)
170 return -ENOMEM;
172 ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd);
173 if (IS_ERR(ev_fd->cq_ev_fd)) {
174 int ret = PTR_ERR(ev_fd->cq_ev_fd);
176 kfree(ev_fd);
177 return ret;
180 spin_lock(&ctx->completion_lock);
181 ev_fd->last_cq_tail = ctx->cached_cq_tail;
182 spin_unlock(&ctx->completion_lock);
184 ev_fd->eventfd_async = eventfd_async;
185 ctx->has_evfd = true;
186 refcount_set(&ev_fd->refs, 1);
187 atomic_set(&ev_fd->ops, 0);
188 rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
189 return 0;
192 int io_eventfd_unregister(struct io_ring_ctx *ctx)
194 struct io_ev_fd *ev_fd;
196 ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
197 lockdep_is_held(&ctx->uring_lock));
198 if (ev_fd) {
199 ctx->has_evfd = false;
200 rcu_assign_pointer(ctx->io_ev_fd, NULL);
201 io_eventfd_put(ev_fd);
202 return 0;
205 return -ENXIO;