4 * Copyright IBM, Corp. 2011
7 * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
8 * Kevin Wolf <kwolf@redhat.com>
10 * This work is licensed under the terms of the GNU LGPL, version 2 or later.
11 * See the COPYING.LIB file in the top-level directory.
15 #include "qemu/osdep.h"
17 #include "qemu/thread.h"
18 #include "qemu/atomic.h"
19 #include "qemu/coroutine_int.h"
20 #include "qemu/coroutine-tls.h"
21 #include "qemu/cutils.h"
22 #include "block/aio.h"
25 COROUTINE_POOL_BATCH_MAX_SIZE
= 128,
29 * Coroutine creation and deletion is expensive so a pool of unused coroutines
30 * is kept as a cache. When the pool has coroutines available, they are
31 * recycled instead of creating new ones from scratch. Coroutines are added to
32 * the pool upon termination.
34 * The pool is global but each thread maintains a small local pool to avoid
35 * global pool contention. Threads fetch and return batches of coroutines from
36 * the global pool to maintain their local pool. The local pool holds up to two
37 * batches whereas the maximum size of the global pool is controlled by the
38 * qemu_coroutine_inc_pool_size() API.
40 * .-----------------------------------.
41 * | Batch 1 | Batch 2 | Batch 3 | ... | global_pool
42 * `-----------------------------------'
44 * .-------------------.
45 * | Batch 1 | Batch 2 | per-thread local_pool (maximum 2 batches)
46 * `-------------------'
48 typedef struct CoroutinePoolBatch
{
49 /* Batches are kept in a list */
50 QSLIST_ENTRY(CoroutinePoolBatch
) next
;
52 /* This batch holds up to @COROUTINE_POOL_BATCH_MAX_SIZE coroutines */
53 QSLIST_HEAD(, Coroutine
) list
;
57 typedef QSLIST_HEAD(, CoroutinePoolBatch
) CoroutinePool
;
59 /* Host operating system limit on number of pooled coroutines */
60 static unsigned int global_pool_hard_max_size
;
62 static QemuMutex global_pool_lock
; /* protects the following variables */
63 static CoroutinePool global_pool
= QSLIST_HEAD_INITIALIZER(global_pool
);
64 static unsigned int global_pool_size
;
65 static unsigned int global_pool_max_size
= COROUTINE_POOL_BATCH_MAX_SIZE
;
67 QEMU_DEFINE_STATIC_CO_TLS(CoroutinePool
, local_pool
);
68 QEMU_DEFINE_STATIC_CO_TLS(Notifier
, local_pool_cleanup_notifier
);
70 static CoroutinePoolBatch
*coroutine_pool_batch_new(void)
72 CoroutinePoolBatch
*batch
= g_new(CoroutinePoolBatch
, 1);
74 QSLIST_INIT(&batch
->list
);
79 static void coroutine_pool_batch_delete(CoroutinePoolBatch
*batch
)
84 QSLIST_FOREACH_SAFE(co
, &batch
->list
, pool_next
, tmp
) {
85 QSLIST_REMOVE_HEAD(&batch
->list
, pool_next
);
86 qemu_coroutine_delete(co
);
91 static void local_pool_cleanup(Notifier
*n
, void *value
)
93 CoroutinePool
*local_pool
= get_ptr_local_pool();
94 CoroutinePoolBatch
*batch
;
95 CoroutinePoolBatch
*tmp
;
97 QSLIST_FOREACH_SAFE(batch
, local_pool
, next
, tmp
) {
98 QSLIST_REMOVE_HEAD(local_pool
, next
);
99 coroutine_pool_batch_delete(batch
);
103 /* Ensure the atexit notifier is registered */
104 static void local_pool_cleanup_init_once(void)
106 Notifier
*notifier
= get_ptr_local_pool_cleanup_notifier();
107 if (!notifier
->notify
) {
108 notifier
->notify
= local_pool_cleanup
;
109 qemu_thread_atexit_add(notifier
);
113 /* Helper to get the next unused coroutine from the local pool */
114 static Coroutine
*coroutine_pool_get_local(void)
116 CoroutinePool
*local_pool
= get_ptr_local_pool();
117 CoroutinePoolBatch
*batch
= QSLIST_FIRST(local_pool
);
120 if (unlikely(!batch
)) {
124 co
= QSLIST_FIRST(&batch
->list
);
125 QSLIST_REMOVE_HEAD(&batch
->list
, pool_next
);
128 if (batch
->size
== 0) {
129 QSLIST_REMOVE_HEAD(local_pool
, next
);
130 coroutine_pool_batch_delete(batch
);
135 /* Get the next batch from the global pool */
136 static void coroutine_pool_refill_local(void)
138 CoroutinePool
*local_pool
= get_ptr_local_pool();
139 CoroutinePoolBatch
*batch
= NULL
;
141 WITH_QEMU_LOCK_GUARD(&global_pool_lock
) {
142 batch
= QSLIST_FIRST(&global_pool
);
145 QSLIST_REMOVE_HEAD(&global_pool
, next
);
146 global_pool_size
-= batch
->size
;
151 QSLIST_INSERT_HEAD(local_pool
, batch
, next
);
152 local_pool_cleanup_init_once();
156 /* Add a batch of coroutines to the global pool */
157 static void coroutine_pool_put_global(CoroutinePoolBatch
*batch
)
159 WITH_QEMU_LOCK_GUARD(&global_pool_lock
) {
160 unsigned int max
= MIN(global_pool_max_size
,
161 global_pool_hard_max_size
);
163 if (global_pool_size
< max
) {
164 QSLIST_INSERT_HEAD(&global_pool
, batch
, next
);
166 /* Overshooting the max pool size is allowed */
167 global_pool_size
+= batch
->size
;
172 /* The global pool was full, so throw away this batch */
173 coroutine_pool_batch_delete(batch
);
176 /* Get the next unused coroutine from the pool or return NULL */
177 static Coroutine
*coroutine_pool_get(void)
181 co
= coroutine_pool_get_local();
183 coroutine_pool_refill_local();
184 co
= coroutine_pool_get_local();
189 static void coroutine_pool_put(Coroutine
*co
)
191 CoroutinePool
*local_pool
= get_ptr_local_pool();
192 CoroutinePoolBatch
*batch
= QSLIST_FIRST(local_pool
);
194 if (unlikely(!batch
)) {
195 batch
= coroutine_pool_batch_new();
196 QSLIST_INSERT_HEAD(local_pool
, batch
, next
);
197 local_pool_cleanup_init_once();
200 if (unlikely(batch
->size
>= COROUTINE_POOL_BATCH_MAX_SIZE
)) {
201 CoroutinePoolBatch
*next
= QSLIST_NEXT(batch
, next
);
203 /* Is the local pool full? */
205 QSLIST_REMOVE_HEAD(local_pool
, next
);
206 coroutine_pool_put_global(batch
);
209 batch
= coroutine_pool_batch_new();
210 QSLIST_INSERT_HEAD(local_pool
, batch
, next
);
213 QSLIST_INSERT_HEAD(&batch
->list
, co
, pool_next
);
217 Coroutine
*qemu_coroutine_create(CoroutineEntry
*entry
, void *opaque
)
219 Coroutine
*co
= NULL
;
221 if (IS_ENABLED(CONFIG_COROUTINE_POOL
)) {
222 co
= coroutine_pool_get();
226 co
= qemu_coroutine_new();
230 co
->entry_arg
= opaque
;
231 QSIMPLEQ_INIT(&co
->co_queue_wakeup
);
235 static void coroutine_delete(Coroutine
*co
)
239 if (IS_ENABLED(CONFIG_COROUTINE_POOL
)) {
240 coroutine_pool_put(co
);
242 qemu_coroutine_delete(co
);
246 void qemu_aio_coroutine_enter(AioContext
*ctx
, Coroutine
*co
)
248 QSIMPLEQ_HEAD(, Coroutine
) pending
= QSIMPLEQ_HEAD_INITIALIZER(pending
);
249 Coroutine
*from
= qemu_coroutine_self();
251 QSIMPLEQ_INSERT_TAIL(&pending
, co
, co_queue_next
);
253 /* Run co and any queued coroutines */
254 while (!QSIMPLEQ_EMPTY(&pending
)) {
255 Coroutine
*to
= QSIMPLEQ_FIRST(&pending
);
259 * Read to before to->scheduled; pairs with qatomic_cmpxchg in
260 * qemu_co_sleep(), aio_co_schedule() etc.
262 smp_read_barrier_depends();
264 const char *scheduled
= qatomic_read(&to
->scheduled
);
266 QSIMPLEQ_REMOVE_HEAD(&pending
, co_queue_next
);
268 trace_qemu_aio_coroutine_enter(ctx
, from
, to
, to
->entry_arg
);
270 /* if the Coroutine has already been scheduled, entering it again will
271 * cause us to enter it twice, potentially even after the coroutine has
275 "%s: Co-routine was already scheduled in '%s'\n",
276 __func__
, scheduled
);
281 fprintf(stderr
, "Co-routine re-entered recursively\n");
288 /* Store to->ctx before anything that stores to. Matches
289 * barrier in aio_co_wake and qemu_co_mutex_wake.
293 ret
= qemu_coroutine_switch(from
, to
, COROUTINE_ENTER
);
295 /* Queued coroutines are run depth-first; previously pending coroutines
296 * run after those queued more recently.
298 QSIMPLEQ_PREPEND(&pending
, &to
->co_queue_wakeup
);
301 case COROUTINE_YIELD
:
303 case COROUTINE_TERMINATE
:
304 assert(!to
->locks_held
);
305 trace_qemu_coroutine_terminate(to
);
306 coroutine_delete(to
);
314 void qemu_coroutine_enter(Coroutine
*co
)
316 qemu_aio_coroutine_enter(qemu_get_current_aio_context(), co
);
319 void qemu_coroutine_enter_if_inactive(Coroutine
*co
)
321 if (!qemu_coroutine_entered(co
)) {
322 qemu_coroutine_enter(co
);
326 void coroutine_fn
qemu_coroutine_yield(void)
328 Coroutine
*self
= qemu_coroutine_self();
329 Coroutine
*to
= self
->caller
;
331 trace_qemu_coroutine_yield(self
, to
);
334 fprintf(stderr
, "Co-routine is yielding to no one\n");
339 qemu_coroutine_switch(self
, to
, COROUTINE_YIELD
);
342 bool qemu_coroutine_entered(Coroutine
*co
)
347 AioContext
*qemu_coroutine_get_aio_context(Coroutine
*co
)
352 void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size
)
354 QEMU_LOCK_GUARD(&global_pool_lock
);
355 global_pool_max_size
+= additional_pool_size
;
358 void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size
)
360 QEMU_LOCK_GUARD(&global_pool_lock
);
361 global_pool_max_size
-= removing_pool_size
;
364 static unsigned int get_global_pool_hard_max_size(void)
367 g_autofree
char *contents
= NULL
;
371 * Linux processes can have up to max_map_count virtual memory areas
372 * (VMAs). mmap(2), mprotect(2), etc fail with ENOMEM beyond this limit. We
373 * must limit the coroutine pool to a safe size to avoid running out of
376 if (g_file_get_contents("/proc/sys/vm/max_map_count", &contents
, NULL
,
378 qemu_strtoi(contents
, NULL
, 10, &max_map_count
) == 0) {
380 * This is an upper bound that avoids exceeding max_map_count. Leave a
381 * fixed amount for non-coroutine users like library dependencies,
382 * vhost-user, etc. Each coroutine takes up 2 VMAs so halve the
385 if (max_map_count
> 5000) {
386 return (max_map_count
- 5000) / 2;
388 /* Disable the global pool but threads still have local pools */
397 static void __attribute__((constructor
)) qemu_coroutine_init(void)
399 qemu_mutex_init(&global_pool_lock
);
400 global_pool_hard_max_size
= get_global_pool_hard_max_size();