1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2014 Facebook. All rights reserved.
5 * This file is released under the GPL.
8 #include <linux/device-mapper.h>
10 #include <linux/module.h>
11 #include <linux/init.h>
12 #include <linux/blkdev.h>
13 #include <linux/bio.h>
14 #include <linux/dax.h>
15 #include <linux/slab.h>
16 #include <linux/kthread.h>
17 #include <linux/freezer.h>
18 #include <linux/uio.h>
20 #define DM_MSG_PREFIX "log-writes"
23 * This target will sequentially log all writes to the target device onto the
24 * log device. This is helpful for replaying writes to check for fs consistency
25 * at all times. This target provides a mechanism to mark specific events to
26 * check data at a later time. So for example you would:
30 * dmsetup message /dev/whatever mark mymark
33 * Then replay the log up to mymark and check the contents of the replay to
34 * verify it matches what was written.
36 * We log writes only after they have been flushed, this makes the log describe
37 * close to the order in which the data hits the actual disk, not its cache. So
38 * for example the following sequence (W means write, C means complete)
40 * Wa,Wb,Wc,Cc,Ca,FLUSH,FUAd,Cb,CFLUSH,CFUAd
42 * Would result in the log looking like this:
44 * c,a,b,flush,fuad,<other writes>,<next flush>
46 * This is meant to help expose problems where file systems do not properly wait
47 * on data being written before invoking a FLUSH. FUA bypasses cache so once it
48 * completes it is added to the log as it should be on disk.
50 * We treat DISCARDs as if they don't bypass cache so that they are logged in
51 * order of completion along with the normal writes. If we didn't do it this
52 * way we would process all the discards first and then write all the data, when
53 * in fact we want to do the data and the discard in the order that they
56 #define LOG_FLUSH_FLAG (1 << 0)
57 #define LOG_FUA_FLAG (1 << 1)
58 #define LOG_DISCARD_FLAG (1 << 2)
59 #define LOG_MARK_FLAG (1 << 3)
60 #define LOG_METADATA_FLAG (1 << 4)
62 #define WRITE_LOG_VERSION 1ULL
63 #define WRITE_LOG_MAGIC 0x6a736677736872ULL
64 #define WRITE_LOG_SUPER_SECTOR 0
67 * The disk format for this is braindead simple.
69 * At byte 0 we have our super, followed by the following sequence for
72 * [ 1 sector ][ entry->nr_sectors ]
73 * [log_write_entry][ data written ]
75 * The log_write_entry takes up a full sector so we can have arbitrary length
76 * marks and it leaves us room for extra content in the future.
80 * Basic info about the log for userspace.
82 struct log_write_super
{
90 * sector - the sector we wrote.
91 * nr_sectors - the number of sectors we wrote.
92 * flags - flags for this log entry.
93 * data_len - the size of the data in this log entry, this is for private log
94 * entry stuff, the MARK data provided by userspace for example.
96 struct log_write_entry
{
103 struct log_writes_c
{
105 struct dm_dev
*logdev
;
110 atomic_t pending_blocks
;
111 sector_t next_sector
;
113 bool logging_enabled
;
114 bool device_supports_discard
;
115 spinlock_t blocks_lock
;
116 struct list_head unflushed_blocks
;
117 struct list_head logging_blocks
;
118 wait_queue_head_t wait
;
119 struct task_struct
*log_kthread
;
120 struct completion super_done
;
123 struct pending_block
{
130 struct list_head list
;
131 struct bio_vec vecs
[];
134 struct per_bio_data
{
135 struct pending_block
*block
;
138 static inline sector_t
bio_to_dev_sectors(struct log_writes_c
*lc
,
141 return sectors
>> (lc
->sectorshift
- SECTOR_SHIFT
);
144 static inline sector_t
dev_to_bio_sectors(struct log_writes_c
*lc
,
147 return sectors
<< (lc
->sectorshift
- SECTOR_SHIFT
);
150 static void put_pending_block(struct log_writes_c
*lc
)
152 if (atomic_dec_and_test(&lc
->pending_blocks
)) {
153 smp_mb__after_atomic();
154 if (waitqueue_active(&lc
->wait
))
159 static void put_io_block(struct log_writes_c
*lc
)
161 if (atomic_dec_and_test(&lc
->io_blocks
)) {
162 smp_mb__after_atomic();
163 if (waitqueue_active(&lc
->wait
))
168 static void log_end_io(struct bio
*bio
)
170 struct log_writes_c
*lc
= bio
->bi_private
;
172 if (bio
->bi_status
) {
175 DMERR("Error writing log block, error=%d", bio
->bi_status
);
176 spin_lock_irqsave(&lc
->blocks_lock
, flags
);
177 lc
->logging_enabled
= false;
178 spin_unlock_irqrestore(&lc
->blocks_lock
, flags
);
186 static void log_end_super(struct bio
*bio
)
188 struct log_writes_c
*lc
= bio
->bi_private
;
190 complete(&lc
->super_done
);
195 * Meant to be called if there is an error, it will free all the pages
196 * associated with the block.
198 static void free_pending_block(struct log_writes_c
*lc
,
199 struct pending_block
*block
)
203 for (i
= 0; i
< block
->vec_cnt
; i
++) {
204 if (block
->vecs
[i
].bv_page
)
205 __free_page(block
->vecs
[i
].bv_page
);
209 put_pending_block(lc
);
212 static int write_metadata(struct log_writes_c
*lc
, void *entry
,
213 size_t entrylen
, void *data
, size_t datalen
,
221 bio
= bio_alloc(lc
->logdev
->bdev
, 1, REQ_OP_WRITE
, GFP_KERNEL
);
222 bio
->bi_iter
.bi_size
= 0;
223 bio
->bi_iter
.bi_sector
= sector
;
224 bio
->bi_end_io
= (sector
== WRITE_LOG_SUPER_SECTOR
) ?
225 log_end_super
: log_end_io
;
226 bio
->bi_private
= lc
;
228 page
= alloc_page(GFP_KERNEL
);
230 DMERR("Couldn't alloc log page");
235 ptr
= kmap_local_page(page
);
236 memcpy(ptr
, entry
, entrylen
);
238 memcpy(ptr
+ entrylen
, data
, datalen
);
239 memset(ptr
+ entrylen
+ datalen
, 0,
240 lc
->sectorsize
- entrylen
- datalen
);
243 ret
= bio_add_page(bio
, page
, lc
->sectorsize
, 0);
244 if (ret
!= lc
->sectorsize
) {
245 DMERR("Couldn't add page to the log block");
258 static int write_inline_data(struct log_writes_c
*lc
, void *entry
,
259 size_t entrylen
, void *data
, size_t datalen
,
262 int bio_pages
, pg_datalen
, pg_sectorlen
, i
;
269 bio_pages
= bio_max_segs(DIV_ROUND_UP(datalen
, PAGE_SIZE
));
271 atomic_inc(&lc
->io_blocks
);
273 bio
= bio_alloc(lc
->logdev
->bdev
, bio_pages
, REQ_OP_WRITE
,
275 bio
->bi_iter
.bi_size
= 0;
276 bio
->bi_iter
.bi_sector
= sector
;
277 bio
->bi_end_io
= log_end_io
;
278 bio
->bi_private
= lc
;
280 for (i
= 0; i
< bio_pages
; i
++) {
281 pg_datalen
= min_t(int, datalen
, PAGE_SIZE
);
282 pg_sectorlen
= ALIGN(pg_datalen
, lc
->sectorsize
);
284 page
= alloc_page(GFP_KERNEL
);
286 DMERR("Couldn't alloc inline data page");
290 ptr
= kmap_local_page(page
);
291 memcpy(ptr
, data
, pg_datalen
);
292 if (pg_sectorlen
> pg_datalen
)
293 memset(ptr
+ pg_datalen
, 0, pg_sectorlen
- pg_datalen
);
296 ret
= bio_add_page(bio
, page
, pg_sectorlen
, 0);
297 if (ret
!= pg_sectorlen
) {
298 DMERR("Couldn't add page of inline data");
303 datalen
-= pg_datalen
;
308 sector
+= bio_pages
* PAGE_SECTORS
;
318 static int log_one_block(struct log_writes_c
*lc
,
319 struct pending_block
*block
, sector_t sector
)
322 struct log_write_entry entry
;
323 size_t metadatalen
, ret
;
326 entry
.sector
= cpu_to_le64(block
->sector
);
327 entry
.nr_sectors
= cpu_to_le64(block
->nr_sectors
);
328 entry
.flags
= cpu_to_le64(block
->flags
);
329 entry
.data_len
= cpu_to_le64(block
->datalen
);
331 metadatalen
= (block
->flags
& LOG_MARK_FLAG
) ? block
->datalen
: 0;
332 if (write_metadata(lc
, &entry
, sizeof(entry
), block
->data
,
333 metadatalen
, sector
)) {
334 free_pending_block(lc
, block
);
338 sector
+= dev_to_bio_sectors(lc
, 1);
340 if (block
->datalen
&& metadatalen
== 0) {
341 if (write_inline_data(lc
, &entry
, sizeof(entry
), block
->data
,
342 block
->datalen
, sector
)) {
343 free_pending_block(lc
, block
);
346 /* we don't support both inline data & bio data */
353 atomic_inc(&lc
->io_blocks
);
354 bio
= bio_alloc(lc
->logdev
->bdev
, bio_max_segs(block
->vec_cnt
),
355 REQ_OP_WRITE
, GFP_KERNEL
);
356 bio
->bi_iter
.bi_size
= 0;
357 bio
->bi_iter
.bi_sector
= sector
;
358 bio
->bi_end_io
= log_end_io
;
359 bio
->bi_private
= lc
;
361 for (i
= 0; i
< block
->vec_cnt
; i
++) {
363 * The page offset is always 0 because we allocate a new page
364 * for every bvec in the original bio for simplicity sake.
366 ret
= bio_add_page(bio
, block
->vecs
[i
].bv_page
,
367 block
->vecs
[i
].bv_len
, 0);
368 if (ret
!= block
->vecs
[i
].bv_len
) {
369 atomic_inc(&lc
->io_blocks
);
371 bio
= bio_alloc(lc
->logdev
->bdev
,
372 bio_max_segs(block
->vec_cnt
- i
),
373 REQ_OP_WRITE
, GFP_KERNEL
);
374 bio
->bi_iter
.bi_size
= 0;
375 bio
->bi_iter
.bi_sector
= sector
;
376 bio
->bi_end_io
= log_end_io
;
377 bio
->bi_private
= lc
;
379 ret
= bio_add_page(bio
, block
->vecs
[i
].bv_page
,
380 block
->vecs
[i
].bv_len
, 0);
381 if (ret
!= block
->vecs
[i
].bv_len
) {
382 DMERR("Couldn't add page on new bio?");
387 sector
+= block
->vecs
[i
].bv_len
>> SECTOR_SHIFT
;
393 put_pending_block(lc
);
396 free_pending_block(lc
, block
);
401 static int log_super(struct log_writes_c
*lc
)
403 struct log_write_super super
;
405 super
.magic
= cpu_to_le64(WRITE_LOG_MAGIC
);
406 super
.version
= cpu_to_le64(WRITE_LOG_VERSION
);
407 super
.nr_entries
= cpu_to_le64(lc
->logged_entries
);
408 super
.sectorsize
= cpu_to_le32(lc
->sectorsize
);
410 if (write_metadata(lc
, &super
, sizeof(super
), NULL
, 0,
411 WRITE_LOG_SUPER_SECTOR
)) {
412 DMERR("Couldn't write super");
417 * Super sector should be writen in-order, otherwise the
418 * nr_entries could be rewritten incorrectly by an old bio.
420 wait_for_completion_io(&lc
->super_done
);
425 static inline sector_t
logdev_last_sector(struct log_writes_c
*lc
)
427 return bdev_nr_sectors(lc
->logdev
->bdev
);
430 static int log_writes_kthread(void *arg
)
432 struct log_writes_c
*lc
= arg
;
435 while (!kthread_should_stop()) {
437 bool logging_enabled
;
438 struct pending_block
*block
= NULL
;
441 spin_lock_irq(&lc
->blocks_lock
);
442 if (!list_empty(&lc
->logging_blocks
)) {
443 block
= list_first_entry(&lc
->logging_blocks
,
444 struct pending_block
, list
);
445 list_del_init(&block
->list
);
446 if (!lc
->logging_enabled
)
449 sector
= lc
->next_sector
;
450 if (!(block
->flags
& LOG_DISCARD_FLAG
))
451 lc
->next_sector
+= dev_to_bio_sectors(lc
, block
->nr_sectors
);
452 lc
->next_sector
+= dev_to_bio_sectors(lc
, 1);
455 * Apparently the size of the device may not be known
456 * right away, so handle this properly.
459 lc
->end_sector
= logdev_last_sector(lc
);
460 if (lc
->end_sector
&&
461 lc
->next_sector
>= lc
->end_sector
) {
462 DMERR("Ran out of space on the logdev");
463 lc
->logging_enabled
= false;
466 lc
->logged_entries
++;
467 atomic_inc(&lc
->io_blocks
);
469 super
= (block
->flags
& (LOG_FUA_FLAG
| LOG_MARK_FLAG
));
471 atomic_inc(&lc
->io_blocks
);
474 logging_enabled
= lc
->logging_enabled
;
475 spin_unlock_irq(&lc
->blocks_lock
);
477 if (logging_enabled
) {
478 ret
= log_one_block(lc
, block
, sector
);
482 spin_lock_irq(&lc
->blocks_lock
);
483 lc
->logging_enabled
= false;
484 spin_unlock_irq(&lc
->blocks_lock
);
487 free_pending_block(lc
, block
);
491 if (!try_to_freeze()) {
492 set_current_state(TASK_INTERRUPTIBLE
);
493 if (!kthread_should_stop() &&
494 list_empty(&lc
->logging_blocks
))
496 __set_current_state(TASK_RUNNING
);
503 * Construct a log-writes mapping:
504 * log-writes <dev_path> <log_dev_path>
506 static int log_writes_ctr(struct dm_target
*ti
, unsigned int argc
, char **argv
)
508 struct log_writes_c
*lc
;
509 struct dm_arg_set as
;
510 const char *devname
, *logdevname
;
517 ti
->error
= "Invalid argument count";
521 lc
= kzalloc(sizeof(struct log_writes_c
), GFP_KERNEL
);
523 ti
->error
= "Cannot allocate context";
526 spin_lock_init(&lc
->blocks_lock
);
527 INIT_LIST_HEAD(&lc
->unflushed_blocks
);
528 INIT_LIST_HEAD(&lc
->logging_blocks
);
529 init_waitqueue_head(&lc
->wait
);
530 init_completion(&lc
->super_done
);
531 atomic_set(&lc
->io_blocks
, 0);
532 atomic_set(&lc
->pending_blocks
, 0);
534 devname
= dm_shift_arg(&as
);
535 ret
= dm_get_device(ti
, devname
, dm_table_get_mode(ti
->table
), &lc
->dev
);
537 ti
->error
= "Device lookup failed";
541 logdevname
= dm_shift_arg(&as
);
542 ret
= dm_get_device(ti
, logdevname
, dm_table_get_mode(ti
->table
),
545 ti
->error
= "Log device lookup failed";
546 dm_put_device(ti
, lc
->dev
);
550 lc
->sectorsize
= bdev_logical_block_size(lc
->dev
->bdev
);
551 lc
->sectorshift
= ilog2(lc
->sectorsize
);
552 lc
->log_kthread
= kthread_run(log_writes_kthread
, lc
, "log-write");
553 if (IS_ERR(lc
->log_kthread
)) {
554 ret
= PTR_ERR(lc
->log_kthread
);
555 ti
->error
= "Couldn't alloc kthread";
556 dm_put_device(ti
, lc
->dev
);
557 dm_put_device(ti
, lc
->logdev
);
562 * next_sector is in 512b sectors to correspond to what bi_sector expects.
563 * The super starts at sector 0, and the next_sector is the next logical
564 * one based on the sectorsize of the device.
566 lc
->next_sector
= lc
->sectorsize
>> SECTOR_SHIFT
;
567 lc
->logging_enabled
= true;
568 lc
->end_sector
= logdev_last_sector(lc
);
569 lc
->device_supports_discard
= true;
571 ti
->num_flush_bios
= 1;
572 ti
->flush_supported
= true;
573 ti
->num_discard_bios
= 1;
574 ti
->discards_supported
= true;
575 ti
->per_io_data_size
= sizeof(struct per_bio_data
);
584 static int log_mark(struct log_writes_c
*lc
, char *data
)
586 struct pending_block
*block
;
587 size_t maxsize
= lc
->sectorsize
- sizeof(struct log_write_entry
);
589 block
= kzalloc(sizeof(struct pending_block
), GFP_KERNEL
);
591 DMERR("Error allocating pending block");
595 block
->data
= kstrndup(data
, maxsize
- 1, GFP_KERNEL
);
597 DMERR("Error copying mark data");
601 atomic_inc(&lc
->pending_blocks
);
602 block
->datalen
= strlen(block
->data
);
603 block
->flags
|= LOG_MARK_FLAG
;
604 spin_lock_irq(&lc
->blocks_lock
);
605 list_add_tail(&block
->list
, &lc
->logging_blocks
);
606 spin_unlock_irq(&lc
->blocks_lock
);
607 wake_up_process(lc
->log_kthread
);
611 static void log_writes_dtr(struct dm_target
*ti
)
613 struct log_writes_c
*lc
= ti
->private;
615 spin_lock_irq(&lc
->blocks_lock
);
616 list_splice_init(&lc
->unflushed_blocks
, &lc
->logging_blocks
);
617 spin_unlock_irq(&lc
->blocks_lock
);
620 * This is just nice to have since it'll update the super to include the
621 * unflushed blocks, if it fails we don't really care.
623 log_mark(lc
, "dm-log-writes-end");
624 wake_up_process(lc
->log_kthread
);
625 wait_event(lc
->wait
, !atomic_read(&lc
->io_blocks
) &&
626 !atomic_read(&lc
->pending_blocks
));
627 kthread_stop(lc
->log_kthread
);
629 WARN_ON(!list_empty(&lc
->logging_blocks
));
630 WARN_ON(!list_empty(&lc
->unflushed_blocks
));
631 dm_put_device(ti
, lc
->dev
);
632 dm_put_device(ti
, lc
->logdev
);
636 static void normal_map_bio(struct dm_target
*ti
, struct bio
*bio
)
638 struct log_writes_c
*lc
= ti
->private;
640 bio_set_dev(bio
, lc
->dev
->bdev
);
643 static int log_writes_map(struct dm_target
*ti
, struct bio
*bio
)
645 struct log_writes_c
*lc
= ti
->private;
646 struct per_bio_data
*pb
= dm_per_bio_data(bio
, sizeof(struct per_bio_data
));
647 struct pending_block
*block
;
648 struct bvec_iter iter
;
652 bool flush_bio
= (bio
->bi_opf
& REQ_PREFLUSH
);
653 bool fua_bio
= (bio
->bi_opf
& REQ_FUA
);
654 bool discard_bio
= (bio_op(bio
) == REQ_OP_DISCARD
);
655 bool meta_bio
= (bio
->bi_opf
& REQ_META
);
659 /* Don't bother doing anything if logging has been disabled */
660 if (!lc
->logging_enabled
)
664 * Map reads as normal.
666 if (bio_data_dir(bio
) == READ
)
669 /* No sectors and not a flush? Don't care */
670 if (!bio_sectors(bio
) && !flush_bio
)
674 * Discards will have bi_size set but there's no actual data, so just
675 * allocate the size of the pending block.
678 alloc_size
= sizeof(struct pending_block
);
680 alloc_size
= struct_size(block
, vecs
, bio_segments(bio
));
682 block
= kzalloc(alloc_size
, GFP_NOIO
);
684 DMERR("Error allocating pending block");
685 spin_lock_irq(&lc
->blocks_lock
);
686 lc
->logging_enabled
= false;
687 spin_unlock_irq(&lc
->blocks_lock
);
688 return DM_MAPIO_KILL
;
690 INIT_LIST_HEAD(&block
->list
);
692 atomic_inc(&lc
->pending_blocks
);
695 block
->flags
|= LOG_FLUSH_FLAG
;
697 block
->flags
|= LOG_FUA_FLAG
;
699 block
->flags
|= LOG_DISCARD_FLAG
;
701 block
->flags
|= LOG_METADATA_FLAG
;
703 block
->sector
= bio_to_dev_sectors(lc
, bio
->bi_iter
.bi_sector
);
704 block
->nr_sectors
= bio_to_dev_sectors(lc
, bio_sectors(bio
));
706 /* We don't need the data, just submit */
708 WARN_ON(flush_bio
|| fua_bio
);
709 if (lc
->device_supports_discard
)
712 return DM_MAPIO_SUBMITTED
;
715 /* Flush bio, splice the unflushed blocks onto this list and submit */
716 if (flush_bio
&& !bio_sectors(bio
)) {
717 spin_lock_irq(&lc
->blocks_lock
);
718 list_splice_init(&lc
->unflushed_blocks
, &block
->list
);
719 spin_unlock_irq(&lc
->blocks_lock
);
724 * We will write this bio somewhere else way later so we need to copy
725 * the actual contents into new pages so we know the data will always be
728 * We do this because this could be a bio from O_DIRECT in which case we
729 * can't just hold onto the page until some later point, we have to
730 * manually copy the contents.
732 bio_for_each_segment(bv
, bio
, iter
) {
736 page
= alloc_page(GFP_NOIO
);
738 DMERR("Error allocing page");
739 free_pending_block(lc
, block
);
740 spin_lock_irq(&lc
->blocks_lock
);
741 lc
->logging_enabled
= false;
742 spin_unlock_irq(&lc
->blocks_lock
);
743 return DM_MAPIO_KILL
;
746 dst
= kmap_local_page(page
);
747 memcpy_from_bvec(dst
, &bv
);
749 block
->vecs
[i
].bv_page
= page
;
750 block
->vecs
[i
].bv_len
= bv
.bv_len
;
755 /* Had a flush with data in it, weird */
757 spin_lock_irq(&lc
->blocks_lock
);
758 list_splice_init(&lc
->unflushed_blocks
, &block
->list
);
759 spin_unlock_irq(&lc
->blocks_lock
);
762 normal_map_bio(ti
, bio
);
763 return DM_MAPIO_REMAPPED
;
766 static int normal_end_io(struct dm_target
*ti
, struct bio
*bio
,
769 struct log_writes_c
*lc
= ti
->private;
770 struct per_bio_data
*pb
= dm_per_bio_data(bio
, sizeof(struct per_bio_data
));
772 if (bio_data_dir(bio
) == WRITE
&& pb
->block
) {
773 struct pending_block
*block
= pb
->block
;
776 spin_lock_irqsave(&lc
->blocks_lock
, flags
);
777 if (block
->flags
& LOG_FLUSH_FLAG
) {
778 list_splice_tail_init(&block
->list
, &lc
->logging_blocks
);
779 list_add_tail(&block
->list
, &lc
->logging_blocks
);
780 wake_up_process(lc
->log_kthread
);
781 } else if (block
->flags
& LOG_FUA_FLAG
) {
782 list_add_tail(&block
->list
, &lc
->logging_blocks
);
783 wake_up_process(lc
->log_kthread
);
785 list_add_tail(&block
->list
, &lc
->unflushed_blocks
);
786 spin_unlock_irqrestore(&lc
->blocks_lock
, flags
);
789 return DM_ENDIO_DONE
;
793 * INFO format: <logged entries> <highest allocated sector>
795 static void log_writes_status(struct dm_target
*ti
, status_type_t type
,
796 unsigned int status_flags
, char *result
,
800 struct log_writes_c
*lc
= ti
->private;
803 case STATUSTYPE_INFO
:
804 DMEMIT("%llu %llu", lc
->logged_entries
,
805 (unsigned long long)lc
->next_sector
- 1);
806 if (!lc
->logging_enabled
)
807 DMEMIT(" logging_disabled");
810 case STATUSTYPE_TABLE
:
811 DMEMIT("%s %s", lc
->dev
->name
, lc
->logdev
->name
);
820 static int log_writes_prepare_ioctl(struct dm_target
*ti
,
821 struct block_device
**bdev
)
823 struct log_writes_c
*lc
= ti
->private;
824 struct dm_dev
*dev
= lc
->dev
;
828 * Only pass ioctls through if the device sizes match exactly.
830 if (ti
->len
!= bdev_nr_sectors(dev
->bdev
))
835 static int log_writes_iterate_devices(struct dm_target
*ti
,
836 iterate_devices_callout_fn fn
,
839 struct log_writes_c
*lc
= ti
->private;
841 return fn(ti
, lc
->dev
, 0, ti
->len
, data
);
845 * Messages supported:
846 * mark <mark data> - specify the marked data.
848 static int log_writes_message(struct dm_target
*ti
, unsigned int argc
, char **argv
,
849 char *result
, unsigned int maxlen
)
852 struct log_writes_c
*lc
= ti
->private;
855 DMWARN("Invalid log-writes message arguments, expect 2 arguments, got %d", argc
);
859 if (!strcasecmp(argv
[0], "mark"))
860 r
= log_mark(lc
, argv
[1]);
862 DMWARN("Unrecognised log writes target message received: %s", argv
[0]);
867 static void log_writes_io_hints(struct dm_target
*ti
, struct queue_limits
*limits
)
869 struct log_writes_c
*lc
= ti
->private;
871 if (!bdev_max_discard_sectors(lc
->dev
->bdev
)) {
872 lc
->device_supports_discard
= false;
873 limits
->discard_granularity
= lc
->sectorsize
;
874 limits
->max_hw_discard_sectors
= (UINT_MAX
>> SECTOR_SHIFT
);
876 limits
->logical_block_size
= bdev_logical_block_size(lc
->dev
->bdev
);
877 limits
->physical_block_size
= bdev_physical_block_size(lc
->dev
->bdev
);
878 limits
->io_min
= limits
->physical_block_size
;
879 limits
->dma_alignment
= limits
->logical_block_size
- 1;
882 #if IS_ENABLED(CONFIG_FS_DAX)
883 static struct dax_device
*log_writes_dax_pgoff(struct dm_target
*ti
,
886 struct log_writes_c
*lc
= ti
->private;
888 *pgoff
+= (get_start_sect(lc
->dev
->bdev
) >> PAGE_SECTORS_SHIFT
);
889 return lc
->dev
->dax_dev
;
892 static long log_writes_dax_direct_access(struct dm_target
*ti
, pgoff_t pgoff
,
893 long nr_pages
, enum dax_access_mode mode
, void **kaddr
,
896 struct dax_device
*dax_dev
= log_writes_dax_pgoff(ti
, &pgoff
);
898 return dax_direct_access(dax_dev
, pgoff
, nr_pages
, mode
, kaddr
, pfn
);
901 static int log_writes_dax_zero_page_range(struct dm_target
*ti
, pgoff_t pgoff
,
904 struct dax_device
*dax_dev
= log_writes_dax_pgoff(ti
, &pgoff
);
906 return dax_zero_page_range(dax_dev
, pgoff
, nr_pages
<< PAGE_SHIFT
);
909 static size_t log_writes_dax_recovery_write(struct dm_target
*ti
,
910 pgoff_t pgoff
, void *addr
, size_t bytes
, struct iov_iter
*i
)
912 struct dax_device
*dax_dev
= log_writes_dax_pgoff(ti
, &pgoff
);
914 return dax_recovery_write(dax_dev
, pgoff
, addr
, bytes
, i
);
918 #define log_writes_dax_direct_access NULL
919 #define log_writes_dax_zero_page_range NULL
920 #define log_writes_dax_recovery_write NULL
923 static struct target_type log_writes_target
= {
924 .name
= "log-writes",
925 .version
= {1, 1, 0},
926 .module
= THIS_MODULE
,
927 .ctr
= log_writes_ctr
,
928 .dtr
= log_writes_dtr
,
929 .map
= log_writes_map
,
930 .end_io
= normal_end_io
,
931 .status
= log_writes_status
,
932 .prepare_ioctl
= log_writes_prepare_ioctl
,
933 .message
= log_writes_message
,
934 .iterate_devices
= log_writes_iterate_devices
,
935 .io_hints
= log_writes_io_hints
,
936 .direct_access
= log_writes_dax_direct_access
,
937 .dax_zero_page_range
= log_writes_dax_zero_page_range
,
938 .dax_recovery_write
= log_writes_dax_recovery_write
,
940 module_dm(log_writes
);
942 MODULE_DESCRIPTION(DM_NAME
" log writes target");
943 MODULE_AUTHOR("Josef Bacik <jbacik@fb.com>");
944 MODULE_LICENSE("GPL");