2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/version.h>
20 #include <linux/kthread.h>
21 #include <linux/list.h>
22 #include <linux/spinlock.h>
23 # include <linux/freezer.h>
24 #include "async-thread.h"
26 #define WORK_QUEUED_BIT 0
27 #define WORK_DONE_BIT 1
28 #define WORK_ORDER_DONE_BIT 2
31 * container for the kthread task pointer and the list of pending work
32 * One of these is allocated per thread.
34 struct btrfs_worker_thread
{
35 /* pool we belong to */
36 struct btrfs_workers
*workers
;
38 /* list of struct btrfs_work that are waiting for service */
39 struct list_head pending
;
41 /* list of worker threads from struct btrfs_workers */
42 struct list_head worker_list
;
45 struct task_struct
*task
;
47 /* number of things on the pending list */
50 unsigned long sequence
;
52 /* protects the pending list. */
55 /* set to non-zero when this thread is already awake and kicking */
58 /* are we currently idle */
63 * helper function to move a thread onto the idle list after it
64 * has finished some requests.
66 static void check_idle_worker(struct btrfs_worker_thread
*worker
)
68 if (!worker
->idle
&& atomic_read(&worker
->num_pending
) <
69 worker
->workers
->idle_thresh
/ 2) {
71 spin_lock_irqsave(&worker
->workers
->lock
, flags
);
73 list_move(&worker
->worker_list
, &worker
->workers
->idle_list
);
74 spin_unlock_irqrestore(&worker
->workers
->lock
, flags
);
79 * helper function to move a thread off the idle list after new
80 * pending work is added.
82 static void check_busy_worker(struct btrfs_worker_thread
*worker
)
84 if (worker
->idle
&& atomic_read(&worker
->num_pending
) >=
85 worker
->workers
->idle_thresh
) {
87 spin_lock_irqsave(&worker
->workers
->lock
, flags
);
89 list_move_tail(&worker
->worker_list
,
90 &worker
->workers
->worker_list
);
91 spin_unlock_irqrestore(&worker
->workers
->lock
, flags
);
95 static noinline
int run_ordered_completions(struct btrfs_workers
*workers
,
96 struct btrfs_work
*work
)
100 if (!workers
->ordered
)
103 set_bit(WORK_DONE_BIT
, &work
->flags
);
105 spin_lock_irqsave(&workers
->lock
, flags
);
107 while (!list_empty(&workers
->order_list
)) {
108 work
= list_entry(workers
->order_list
.next
,
109 struct btrfs_work
, order_list
);
111 if (!test_bit(WORK_DONE_BIT
, &work
->flags
))
114 /* we are going to call the ordered done function, but
115 * we leave the work item on the list as a barrier so
116 * that later work items that are done don't have their
117 * functions called before this one returns
119 if (test_and_set_bit(WORK_ORDER_DONE_BIT
, &work
->flags
))
122 spin_unlock_irqrestore(&workers
->lock
, flags
);
124 work
->ordered_func(work
);
126 /* now take the lock again and call the freeing code */
127 spin_lock_irqsave(&workers
->lock
, flags
);
128 list_del(&work
->order_list
);
129 work
->ordered_free(work
);
132 spin_unlock_irqrestore(&workers
->lock
, flags
);
137 * main loop for servicing work items
139 static int worker_loop(void *arg
)
141 struct btrfs_worker_thread
*worker
= arg
;
142 struct list_head
*cur
;
143 struct btrfs_work
*work
;
145 spin_lock_irq(&worker
->lock
);
146 while (!list_empty(&worker
->pending
)) {
147 cur
= worker
->pending
.next
;
148 work
= list_entry(cur
, struct btrfs_work
, list
);
149 list_del(&work
->list
);
150 clear_bit(WORK_QUEUED_BIT
, &work
->flags
);
152 work
->worker
= worker
;
153 spin_unlock_irq(&worker
->lock
);
157 atomic_dec(&worker
->num_pending
);
159 * unless this is an ordered work queue,
160 * 'work' was probably freed by func above.
162 run_ordered_completions(worker
->workers
, work
);
164 spin_lock_irq(&worker
->lock
);
165 check_idle_worker(worker
);
169 if (freezing(current
)) {
172 set_current_state(TASK_INTERRUPTIBLE
);
173 spin_unlock_irq(&worker
->lock
);
174 if (!kthread_should_stop())
176 __set_current_state(TASK_RUNNING
);
178 } while (!kthread_should_stop());
183 * this will wait for all the worker threads to shutdown
185 int btrfs_stop_workers(struct btrfs_workers
*workers
)
187 struct list_head
*cur
;
188 struct btrfs_worker_thread
*worker
;
190 list_splice_init(&workers
->idle_list
, &workers
->worker_list
);
191 while (!list_empty(&workers
->worker_list
)) {
192 cur
= workers
->worker_list
.next
;
193 worker
= list_entry(cur
, struct btrfs_worker_thread
,
195 kthread_stop(worker
->task
);
196 list_del(&worker
->worker_list
);
203 * simple init on struct btrfs_workers
205 void btrfs_init_workers(struct btrfs_workers
*workers
, char *name
, int max
)
207 workers
->num_workers
= 0;
208 INIT_LIST_HEAD(&workers
->worker_list
);
209 INIT_LIST_HEAD(&workers
->idle_list
);
210 INIT_LIST_HEAD(&workers
->order_list
);
211 spin_lock_init(&workers
->lock
);
212 workers
->max_workers
= max
;
213 workers
->idle_thresh
= 32;
214 workers
->name
= name
;
215 workers
->ordered
= 0;
219 * starts new worker threads. This does not enforce the max worker
220 * count in case you need to temporarily go past it.
222 int btrfs_start_workers(struct btrfs_workers
*workers
, int num_workers
)
224 struct btrfs_worker_thread
*worker
;
228 for (i
= 0; i
< num_workers
; i
++) {
229 worker
= kzalloc(sizeof(*worker
), GFP_NOFS
);
235 INIT_LIST_HEAD(&worker
->pending
);
236 INIT_LIST_HEAD(&worker
->worker_list
);
237 spin_lock_init(&worker
->lock
);
238 atomic_set(&worker
->num_pending
, 0);
239 worker
->task
= kthread_run(worker_loop
, worker
,
240 "btrfs-%s-%d", workers
->name
,
241 workers
->num_workers
+ i
);
242 worker
->workers
= workers
;
243 if (IS_ERR(worker
->task
)) {
245 ret
= PTR_ERR(worker
->task
);
249 spin_lock_irq(&workers
->lock
);
250 list_add_tail(&worker
->worker_list
, &workers
->idle_list
);
252 workers
->num_workers
++;
253 spin_unlock_irq(&workers
->lock
);
257 btrfs_stop_workers(workers
);
262 * run through the list and find a worker thread that doesn't have a lot
263 * to do right now. This can return null if we aren't yet at the thread
264 * count limit and all of the threads are busy.
266 static struct btrfs_worker_thread
*next_worker(struct btrfs_workers
*workers
)
268 struct btrfs_worker_thread
*worker
;
269 struct list_head
*next
;
270 int enforce_min
= workers
->num_workers
< workers
->max_workers
;
273 * if we find an idle thread, don't move it to the end of the
274 * idle list. This improves the chance that the next submission
275 * will reuse the same thread, and maybe catch it while it is still
278 if (!list_empty(&workers
->idle_list
)) {
279 next
= workers
->idle_list
.next
;
280 worker
= list_entry(next
, struct btrfs_worker_thread
,
284 if (enforce_min
|| list_empty(&workers
->worker_list
))
288 * if we pick a busy task, move the task to the end of the list.
289 * hopefully this will keep things somewhat evenly balanced.
290 * Do the move in batches based on the sequence number. This groups
291 * requests submitted at roughly the same time onto the same worker.
293 next
= workers
->worker_list
.next
;
294 worker
= list_entry(next
, struct btrfs_worker_thread
, worker_list
);
295 atomic_inc(&worker
->num_pending
);
298 if (worker
->sequence
% workers
->idle_thresh
== 0)
299 list_move_tail(next
, &workers
->worker_list
);
304 * selects a worker thread to take the next job. This will either find
305 * an idle worker, start a new worker up to the max count, or just return
306 * one of the existing busy workers.
308 static struct btrfs_worker_thread
*find_worker(struct btrfs_workers
*workers
)
310 struct btrfs_worker_thread
*worker
;
314 spin_lock_irqsave(&workers
->lock
, flags
);
315 worker
= next_worker(workers
);
316 spin_unlock_irqrestore(&workers
->lock
, flags
);
319 spin_lock_irqsave(&workers
->lock
, flags
);
320 if (workers
->num_workers
>= workers
->max_workers
) {
321 struct list_head
*fallback
= NULL
;
323 * we have failed to find any workers, just
324 * return the force one
326 if (!list_empty(&workers
->worker_list
))
327 fallback
= workers
->worker_list
.next
;
328 if (!list_empty(&workers
->idle_list
))
329 fallback
= workers
->idle_list
.next
;
331 worker
= list_entry(fallback
,
332 struct btrfs_worker_thread
, worker_list
);
333 spin_unlock_irqrestore(&workers
->lock
, flags
);
335 spin_unlock_irqrestore(&workers
->lock
, flags
);
336 /* we're below the limit, start another worker */
337 btrfs_start_workers(workers
, 1);
345 * btrfs_requeue_work just puts the work item back on the tail of the list
346 * it was taken from. It is intended for use with long running work functions
347 * that make some progress and want to give the cpu up for others.
349 int btrfs_requeue_work(struct btrfs_work
*work
)
351 struct btrfs_worker_thread
*worker
= work
->worker
;
354 if (test_and_set_bit(WORK_QUEUED_BIT
, &work
->flags
))
357 spin_lock_irqsave(&worker
->lock
, flags
);
358 atomic_inc(&worker
->num_pending
);
359 list_add_tail(&work
->list
, &worker
->pending
);
361 /* by definition we're busy, take ourselves off the idle
365 spin_lock_irqsave(&worker
->workers
->lock
, flags
);
367 list_move_tail(&worker
->worker_list
,
368 &worker
->workers
->worker_list
);
369 spin_unlock_irqrestore(&worker
->workers
->lock
, flags
);
372 spin_unlock_irqrestore(&worker
->lock
, flags
);
379 * places a struct btrfs_work into the pending queue of one of the kthreads
381 int btrfs_queue_worker(struct btrfs_workers
*workers
, struct btrfs_work
*work
)
383 struct btrfs_worker_thread
*worker
;
387 /* don't requeue something already on a list */
388 if (test_and_set_bit(WORK_QUEUED_BIT
, &work
->flags
))
391 worker
= find_worker(workers
);
392 if (workers
->ordered
) {
393 spin_lock_irqsave(&workers
->lock
, flags
);
394 list_add_tail(&work
->order_list
, &workers
->order_list
);
395 spin_unlock_irqrestore(&workers
->lock
, flags
);
397 INIT_LIST_HEAD(&work
->order_list
);
400 spin_lock_irqsave(&worker
->lock
, flags
);
401 atomic_inc(&worker
->num_pending
);
402 check_busy_worker(worker
);
403 list_add_tail(&work
->list
, &worker
->pending
);
406 * avoid calling into wake_up_process if this thread has already
409 if (!worker
->working
)
413 spin_unlock_irqrestore(&worker
->lock
, flags
);
416 wake_up_process(worker
->task
);