1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2005-2007 Red Hat GmbH
5 * A target that delays reads and/or writes and can send
6 * them to different devices.
8 * This file is released under the GPL.
11 #include <linux/module.h>
12 #include <linux/init.h>
13 #include <linux/blkdev.h>
14 #include <linux/bio.h>
15 #include <linux/slab.h>
16 #include <linux/kthread.h>
18 #include <linux/device-mapper.h>
20 #define DM_MSG_PREFIX "delay"
30 struct timer_list delay_timer
;
31 struct mutex process_bios_lock
; /* hold while removing bios to be processed from list */
32 spinlock_t delayed_bios_lock
; /* hold on all accesses to delayed_bios list */
33 struct workqueue_struct
*kdelayd_wq
;
34 struct work_struct flush_expired_bios
;
35 struct list_head delayed_bios
;
36 struct task_struct
*worker
;
39 struct delay_class read
;
40 struct delay_class write
;
41 struct delay_class flush
;
46 struct dm_delay_info
{
47 struct delay_c
*context
;
48 struct delay_class
*class;
49 struct list_head list
;
50 unsigned long expires
;
53 static void handle_delayed_timer(struct timer_list
*t
)
55 struct delay_c
*dc
= from_timer(dc
, t
, delay_timer
);
57 queue_work(dc
->kdelayd_wq
, &dc
->flush_expired_bios
);
60 static void queue_timeout(struct delay_c
*dc
, unsigned long expires
)
62 timer_reduce(&dc
->delay_timer
, expires
);
65 static inline bool delay_is_fast(struct delay_c
*dc
)
70 static void flush_bios(struct bio
*bio
)
77 dm_submit_bio_remap(bio
, NULL
);
82 static void flush_delayed_bios(struct delay_c
*dc
, bool flush_all
)
84 struct dm_delay_info
*delayed
, *next
;
85 struct bio_list flush_bio_list
;
86 LIST_HEAD(local_list
);
87 unsigned long next_expires
= 0;
88 bool start_timer
= false;
89 bio_list_init(&flush_bio_list
);
91 mutex_lock(&dc
->process_bios_lock
);
92 spin_lock(&dc
->delayed_bios_lock
);
93 list_replace_init(&dc
->delayed_bios
, &local_list
);
94 spin_unlock(&dc
->delayed_bios_lock
);
95 list_for_each_entry_safe(delayed
, next
, &local_list
, list
) {
97 if (flush_all
|| time_after_eq(jiffies
, delayed
->expires
)) {
98 struct bio
*bio
= dm_bio_from_per_bio_data(delayed
,
99 sizeof(struct dm_delay_info
));
100 list_del(&delayed
->list
);
101 bio_list_add(&flush_bio_list
, bio
);
102 delayed
->class->ops
--;
106 if (!delay_is_fast(dc
)) {
109 next_expires
= delayed
->expires
;
111 next_expires
= min(next_expires
, delayed
->expires
);
115 spin_lock(&dc
->delayed_bios_lock
);
116 list_splice(&local_list
, &dc
->delayed_bios
);
117 spin_unlock(&dc
->delayed_bios_lock
);
118 mutex_unlock(&dc
->process_bios_lock
);
121 queue_timeout(dc
, next_expires
);
123 flush_bios(bio_list_get(&flush_bio_list
));
126 static int flush_worker_fn(void *data
)
128 struct delay_c
*dc
= data
;
130 while (!kthread_should_stop()) {
131 flush_delayed_bios(dc
, false);
132 spin_lock(&dc
->delayed_bios_lock
);
133 if (unlikely(list_empty(&dc
->delayed_bios
))) {
134 set_current_state(TASK_INTERRUPTIBLE
);
135 spin_unlock(&dc
->delayed_bios_lock
);
138 spin_unlock(&dc
->delayed_bios_lock
);
146 static void flush_expired_bios(struct work_struct
*work
)
150 dc
= container_of(work
, struct delay_c
, flush_expired_bios
);
151 flush_delayed_bios(dc
, false);
154 static void delay_dtr(struct dm_target
*ti
)
156 struct delay_c
*dc
= ti
->private;
158 if (dc
->kdelayd_wq
) {
159 timer_shutdown_sync(&dc
->delay_timer
);
160 destroy_workqueue(dc
->kdelayd_wq
);
164 dm_put_device(ti
, dc
->read
.dev
);
166 dm_put_device(ti
, dc
->write
.dev
);
168 dm_put_device(ti
, dc
->flush
.dev
);
170 kthread_stop(dc
->worker
);
172 mutex_destroy(&dc
->process_bios_lock
);
177 static int delay_class_ctr(struct dm_target
*ti
, struct delay_class
*c
, char **argv
)
180 unsigned long long tmpll
;
183 if (sscanf(argv
[1], "%llu%c", &tmpll
, &dummy
) != 1 || tmpll
!= (sector_t
)tmpll
) {
184 ti
->error
= "Invalid device sector";
189 if (sscanf(argv
[2], "%u%c", &c
->delay
, &dummy
) != 1) {
190 ti
->error
= "Invalid delay";
194 ret
= dm_get_device(ti
, argv
[0], dm_table_get_mode(ti
->table
), &c
->dev
);
196 ti
->error
= "Device lookup failed";
204 * Mapping parameters:
205 * <device> <offset> <delay> [<write_device> <write_offset> <write_delay>]
207 * With separate write parameters, the first set is only used for reads.
208 * Offsets are specified in sectors.
209 * Delays are specified in milliseconds.
211 static int delay_ctr(struct dm_target
*ti
, unsigned int argc
, char **argv
)
215 unsigned int max_delay
;
217 if (argc
!= 3 && argc
!= 6 && argc
!= 9) {
218 ti
->error
= "Requires exactly 3, 6 or 9 arguments";
222 dc
= kzalloc(sizeof(*dc
), GFP_KERNEL
);
224 ti
->error
= "Cannot allocate context";
229 INIT_LIST_HEAD(&dc
->delayed_bios
);
230 mutex_init(&dc
->process_bios_lock
);
231 spin_lock_init(&dc
->delayed_bios_lock
);
232 dc
->may_delay
= true;
235 ret
= delay_class_ctr(ti
, &dc
->read
, argv
);
238 max_delay
= dc
->read
.delay
;
241 ret
= delay_class_ctr(ti
, &dc
->write
, argv
);
244 ret
= delay_class_ctr(ti
, &dc
->flush
, argv
);
250 ret
= delay_class_ctr(ti
, &dc
->write
, argv
+ 3);
253 max_delay
= max(max_delay
, dc
->write
.delay
);
256 ret
= delay_class_ctr(ti
, &dc
->flush
, argv
+ 3);
262 ret
= delay_class_ctr(ti
, &dc
->flush
, argv
+ 6);
265 max_delay
= max(max_delay
, dc
->flush
.delay
);
268 if (max_delay
< 50) {
270 * In case of small requested delays, use kthread instead of
271 * timers and workqueue to achieve better latency.
273 dc
->worker
= kthread_run(&flush_worker_fn
, dc
, "dm-delay-flush-worker");
274 if (IS_ERR(dc
->worker
)) {
275 ret
= PTR_ERR(dc
->worker
);
280 timer_setup(&dc
->delay_timer
, handle_delayed_timer
, 0);
281 INIT_WORK(&dc
->flush_expired_bios
, flush_expired_bios
);
282 dc
->kdelayd_wq
= alloc_workqueue("kdelayd", WQ_MEM_RECLAIM
, 0);
283 if (!dc
->kdelayd_wq
) {
285 DMERR("Couldn't start kdelayd");
290 ti
->num_flush_bios
= 1;
291 ti
->num_discard_bios
= 1;
292 ti
->accounts_remapped_io
= true;
293 ti
->per_io_data_size
= sizeof(struct dm_delay_info
);
301 static int delay_bio(struct delay_c
*dc
, struct delay_class
*c
, struct bio
*bio
)
303 struct dm_delay_info
*delayed
;
304 unsigned long expires
= 0;
307 return DM_MAPIO_REMAPPED
;
309 delayed
= dm_per_bio_data(bio
, sizeof(struct dm_delay_info
));
311 delayed
->context
= dc
;
312 delayed
->expires
= expires
= jiffies
+ msecs_to_jiffies(c
->delay
);
314 spin_lock(&dc
->delayed_bios_lock
);
315 if (unlikely(!dc
->may_delay
)) {
316 spin_unlock(&dc
->delayed_bios_lock
);
317 return DM_MAPIO_REMAPPED
;
320 list_add_tail(&delayed
->list
, &dc
->delayed_bios
);
321 spin_unlock(&dc
->delayed_bios_lock
);
323 if (delay_is_fast(dc
))
324 wake_up_process(dc
->worker
);
326 queue_timeout(dc
, expires
);
328 return DM_MAPIO_SUBMITTED
;
331 static void delay_presuspend(struct dm_target
*ti
)
333 struct delay_c
*dc
= ti
->private;
335 spin_lock(&dc
->delayed_bios_lock
);
336 dc
->may_delay
= false;
337 spin_unlock(&dc
->delayed_bios_lock
);
339 if (!delay_is_fast(dc
))
340 timer_delete(&dc
->delay_timer
);
341 flush_delayed_bios(dc
, true);
344 static void delay_resume(struct dm_target
*ti
)
346 struct delay_c
*dc
= ti
->private;
348 dc
->may_delay
= true;
351 static int delay_map(struct dm_target
*ti
, struct bio
*bio
)
353 struct delay_c
*dc
= ti
->private;
354 struct delay_class
*c
;
355 struct dm_delay_info
*delayed
= dm_per_bio_data(bio
, sizeof(struct dm_delay_info
));
357 if (bio_data_dir(bio
) == WRITE
) {
358 if (unlikely(bio
->bi_opf
& REQ_PREFLUSH
))
366 bio_set_dev(bio
, c
->dev
->bdev
);
367 bio
->bi_iter
.bi_sector
= c
->start
+ dm_target_offset(ti
, bio
->bi_iter
.bi_sector
);
369 return delay_bio(dc
, c
, bio
);
372 #define DMEMIT_DELAY_CLASS(c) \
373 DMEMIT("%s %llu %u", (c)->dev->name, (unsigned long long)(c)->start, (c)->delay)
375 static void delay_status(struct dm_target
*ti
, status_type_t type
,
376 unsigned int status_flags
, char *result
, unsigned int maxlen
)
378 struct delay_c
*dc
= ti
->private;
382 case STATUSTYPE_INFO
:
383 DMEMIT("%u %u %u", dc
->read
.ops
, dc
->write
.ops
, dc
->flush
.ops
);
386 case STATUSTYPE_TABLE
:
387 DMEMIT_DELAY_CLASS(&dc
->read
);
390 DMEMIT_DELAY_CLASS(&dc
->write
);
394 DMEMIT_DELAY_CLASS(&dc
->flush
);
404 static int delay_iterate_devices(struct dm_target
*ti
,
405 iterate_devices_callout_fn fn
, void *data
)
407 struct delay_c
*dc
= ti
->private;
410 ret
= fn(ti
, dc
->read
.dev
, dc
->read
.start
, ti
->len
, data
);
413 ret
= fn(ti
, dc
->write
.dev
, dc
->write
.start
, ti
->len
, data
);
416 ret
= fn(ti
, dc
->flush
.dev
, dc
->flush
.start
, ti
->len
, data
);
424 static struct target_type delay_target
= {
426 .version
= {1, 4, 0},
427 .features
= DM_TARGET_PASSES_INTEGRITY
,
428 .module
= THIS_MODULE
,
432 .presuspend
= delay_presuspend
,
433 .resume
= delay_resume
,
434 .status
= delay_status
,
435 .iterate_devices
= delay_iterate_devices
,
439 MODULE_DESCRIPTION(DM_NAME
" delay target");
440 MODULE_AUTHOR("Heinz Mauelshagen <mauelshagen@redhat.com>");
441 MODULE_LICENSE("GPL");