2 * Copyright (C) 2001-2003 Sistina Software (UK) Limited.
4 * This file is released under the GPL.
8 #include <linux/device-mapper.h>
10 #include <linux/module.h>
11 #include <linux/init.h>
12 #include <linux/blkdev.h>
13 #include <linux/bio.h>
14 #include <linux/dax.h>
15 #include <linux/slab.h>
16 #include <linux/log2.h>
18 #define DM_MSG_PREFIX "striped"
19 #define DM_IO_ERROR_THRESHOLD 15
23 sector_t physical_start
;
32 /* The size of this target / num. stripes */
33 sector_t stripe_width
;
38 /* Needed for handling events */
41 /* Work struct used for triggering events*/
42 struct work_struct trigger_event
;
44 struct stripe stripe
[];
48 * An event is triggered whenever a drive
49 * drops out of a stripe volume.
51 static void trigger_event(struct work_struct
*work
)
53 struct stripe_c
*sc
= container_of(work
, struct stripe_c
,
55 dm_table_event(sc
->ti
->table
);
59 * Parse a single <dev> <sector> pair
61 static int get_stripe(struct dm_target
*ti
, struct stripe_c
*sc
,
62 unsigned int stripe
, char **argv
)
64 unsigned long long start
;
68 if (sscanf(argv
[1], "%llu%c", &start
, &dummy
) != 1)
71 ret
= dm_get_device(ti
, argv
[0], dm_table_get_mode(ti
->table
),
72 &sc
->stripe
[stripe
].dev
);
76 sc
->stripe
[stripe
].physical_start
= start
;
82 * Construct a striped mapping.
83 * <number of stripes> <chunk size> [<dev_path> <offset>]+
85 static int stripe_ctr(struct dm_target
*ti
, unsigned int argc
, char **argv
)
88 sector_t width
, tmp_len
;
95 ti
->error
= "Not enough arguments";
99 if (kstrtouint(argv
[0], 10, &stripes
) || !stripes
) {
100 ti
->error
= "Invalid stripe count";
104 if (kstrtouint(argv
[1], 10, &chunk_size
) || !chunk_size
) {
105 ti
->error
= "Invalid chunk_size";
110 if (sector_div(width
, stripes
)) {
111 ti
->error
= "Target length not divisible by "
117 if (sector_div(tmp_len
, chunk_size
)) {
118 ti
->error
= "Target length not divisible by "
124 * Do we have enough arguments for that many stripes ?
126 if (argc
!= (2 + 2 * stripes
)) {
127 ti
->error
= "Not enough destinations "
132 sc
= kmalloc(struct_size(sc
, stripe
, stripes
), GFP_KERNEL
);
134 ti
->error
= "Memory allocation for striped context "
139 INIT_WORK(&sc
->trigger_event
, trigger_event
);
141 /* Set pointer to dm target; used in trigger_event */
143 sc
->stripes
= stripes
;
144 sc
->stripe_width
= width
;
146 if (stripes
& (stripes
- 1))
147 sc
->stripes_shift
= -1;
149 sc
->stripes_shift
= __ffs(stripes
);
151 r
= dm_set_target_max_io_len(ti
, chunk_size
);
157 ti
->num_flush_bios
= stripes
;
158 ti
->num_discard_bios
= stripes
;
159 ti
->num_secure_erase_bios
= stripes
;
160 ti
->num_write_same_bios
= stripes
;
161 ti
->num_write_zeroes_bios
= stripes
;
163 sc
->chunk_size
= chunk_size
;
164 if (chunk_size
& (chunk_size
- 1))
165 sc
->chunk_size_shift
= -1;
167 sc
->chunk_size_shift
= __ffs(chunk_size
);
170 * Get the stripe destinations.
172 for (i
= 0; i
< stripes
; i
++) {
175 r
= get_stripe(ti
, sc
, i
, argv
);
177 ti
->error
= "Couldn't parse stripe destination";
179 dm_put_device(ti
, sc
->stripe
[i
].dev
);
183 atomic_set(&(sc
->stripe
[i
].error_count
), 0);
191 static void stripe_dtr(struct dm_target
*ti
)
194 struct stripe_c
*sc
= (struct stripe_c
*) ti
->private;
196 for (i
= 0; i
< sc
->stripes
; i
++)
197 dm_put_device(ti
, sc
->stripe
[i
].dev
);
199 flush_work(&sc
->trigger_event
);
203 static void stripe_map_sector(struct stripe_c
*sc
, sector_t sector
,
204 uint32_t *stripe
, sector_t
*result
)
206 sector_t chunk
= dm_target_offset(sc
->ti
, sector
);
207 sector_t chunk_offset
;
209 if (sc
->chunk_size_shift
< 0)
210 chunk_offset
= sector_div(chunk
, sc
->chunk_size
);
212 chunk_offset
= chunk
& (sc
->chunk_size
- 1);
213 chunk
>>= sc
->chunk_size_shift
;
216 if (sc
->stripes_shift
< 0)
217 *stripe
= sector_div(chunk
, sc
->stripes
);
219 *stripe
= chunk
& (sc
->stripes
- 1);
220 chunk
>>= sc
->stripes_shift
;
223 if (sc
->chunk_size_shift
< 0)
224 chunk
*= sc
->chunk_size
;
226 chunk
<<= sc
->chunk_size_shift
;
228 *result
= chunk
+ chunk_offset
;
231 static void stripe_map_range_sector(struct stripe_c
*sc
, sector_t sector
,
232 uint32_t target_stripe
, sector_t
*result
)
236 stripe_map_sector(sc
, sector
, &stripe
, result
);
237 if (stripe
== target_stripe
)
242 if (sc
->chunk_size_shift
< 0)
243 *result
-= sector_div(sector
, sc
->chunk_size
);
245 *result
= sector
& ~(sector_t
)(sc
->chunk_size
- 1);
247 if (target_stripe
< stripe
)
248 *result
+= sc
->chunk_size
; /* next chunk */
251 static int stripe_map_range(struct stripe_c
*sc
, struct bio
*bio
,
252 uint32_t target_stripe
)
256 stripe_map_range_sector(sc
, bio
->bi_iter
.bi_sector
,
257 target_stripe
, &begin
);
258 stripe_map_range_sector(sc
, bio_end_sector(bio
),
259 target_stripe
, &end
);
261 bio_set_dev(bio
, sc
->stripe
[target_stripe
].dev
->bdev
);
262 bio
->bi_iter
.bi_sector
= begin
+
263 sc
->stripe
[target_stripe
].physical_start
;
264 bio
->bi_iter
.bi_size
= to_bytes(end
- begin
);
265 return DM_MAPIO_REMAPPED
;
267 /* The range doesn't map to the target stripe */
269 return DM_MAPIO_SUBMITTED
;
273 static int stripe_map(struct dm_target
*ti
, struct bio
*bio
)
275 struct stripe_c
*sc
= ti
->private;
277 unsigned target_bio_nr
;
279 if (bio
->bi_opf
& REQ_PREFLUSH
) {
280 target_bio_nr
= dm_bio_get_target_bio_nr(bio
);
281 BUG_ON(target_bio_nr
>= sc
->stripes
);
282 bio_set_dev(bio
, sc
->stripe
[target_bio_nr
].dev
->bdev
);
283 return DM_MAPIO_REMAPPED
;
285 if (unlikely(bio_op(bio
) == REQ_OP_DISCARD
) ||
286 unlikely(bio_op(bio
) == REQ_OP_SECURE_ERASE
) ||
287 unlikely(bio_op(bio
) == REQ_OP_WRITE_ZEROES
) ||
288 unlikely(bio_op(bio
) == REQ_OP_WRITE_SAME
)) {
289 target_bio_nr
= dm_bio_get_target_bio_nr(bio
);
290 BUG_ON(target_bio_nr
>= sc
->stripes
);
291 return stripe_map_range(sc
, bio
, target_bio_nr
);
294 stripe_map_sector(sc
, bio
->bi_iter
.bi_sector
,
295 &stripe
, &bio
->bi_iter
.bi_sector
);
297 bio
->bi_iter
.bi_sector
+= sc
->stripe
[stripe
].physical_start
;
298 bio_set_dev(bio
, sc
->stripe
[stripe
].dev
->bdev
);
300 return DM_MAPIO_REMAPPED
;
303 #if IS_ENABLED(CONFIG_DAX_DRIVER)
304 static long stripe_dax_direct_access(struct dm_target
*ti
, pgoff_t pgoff
,
305 long nr_pages
, void **kaddr
, pfn_t
*pfn
)
307 sector_t dev_sector
, sector
= pgoff
* PAGE_SECTORS
;
308 struct stripe_c
*sc
= ti
->private;
309 struct dax_device
*dax_dev
;
310 struct block_device
*bdev
;
314 stripe_map_sector(sc
, sector
, &stripe
, &dev_sector
);
315 dev_sector
+= sc
->stripe
[stripe
].physical_start
;
316 dax_dev
= sc
->stripe
[stripe
].dev
->dax_dev
;
317 bdev
= sc
->stripe
[stripe
].dev
->bdev
;
319 ret
= bdev_dax_pgoff(bdev
, dev_sector
, nr_pages
* PAGE_SIZE
, &pgoff
);
322 return dax_direct_access(dax_dev
, pgoff
, nr_pages
, kaddr
, pfn
);
325 static size_t stripe_dax_copy_from_iter(struct dm_target
*ti
, pgoff_t pgoff
,
326 void *addr
, size_t bytes
, struct iov_iter
*i
)
328 sector_t dev_sector
, sector
= pgoff
* PAGE_SECTORS
;
329 struct stripe_c
*sc
= ti
->private;
330 struct dax_device
*dax_dev
;
331 struct block_device
*bdev
;
334 stripe_map_sector(sc
, sector
, &stripe
, &dev_sector
);
335 dev_sector
+= sc
->stripe
[stripe
].physical_start
;
336 dax_dev
= sc
->stripe
[stripe
].dev
->dax_dev
;
337 bdev
= sc
->stripe
[stripe
].dev
->bdev
;
339 if (bdev_dax_pgoff(bdev
, dev_sector
, ALIGN(bytes
, PAGE_SIZE
), &pgoff
))
341 return dax_copy_from_iter(dax_dev
, pgoff
, addr
, bytes
, i
);
344 static size_t stripe_dax_copy_to_iter(struct dm_target
*ti
, pgoff_t pgoff
,
345 void *addr
, size_t bytes
, struct iov_iter
*i
)
347 sector_t dev_sector
, sector
= pgoff
* PAGE_SECTORS
;
348 struct stripe_c
*sc
= ti
->private;
349 struct dax_device
*dax_dev
;
350 struct block_device
*bdev
;
353 stripe_map_sector(sc
, sector
, &stripe
, &dev_sector
);
354 dev_sector
+= sc
->stripe
[stripe
].physical_start
;
355 dax_dev
= sc
->stripe
[stripe
].dev
->dax_dev
;
356 bdev
= sc
->stripe
[stripe
].dev
->bdev
;
358 if (bdev_dax_pgoff(bdev
, dev_sector
, ALIGN(bytes
, PAGE_SIZE
), &pgoff
))
360 return dax_copy_to_iter(dax_dev
, pgoff
, addr
, bytes
, i
);
363 static int stripe_dax_zero_page_range(struct dm_target
*ti
, pgoff_t pgoff
,
367 sector_t dev_sector
, sector
= pgoff
* PAGE_SECTORS
;
368 struct stripe_c
*sc
= ti
->private;
369 struct dax_device
*dax_dev
;
370 struct block_device
*bdev
;
373 stripe_map_sector(sc
, sector
, &stripe
, &dev_sector
);
374 dev_sector
+= sc
->stripe
[stripe
].physical_start
;
375 dax_dev
= sc
->stripe
[stripe
].dev
->dax_dev
;
376 bdev
= sc
->stripe
[stripe
].dev
->bdev
;
378 ret
= bdev_dax_pgoff(bdev
, dev_sector
, nr_pages
<< PAGE_SHIFT
, &pgoff
);
381 return dax_zero_page_range(dax_dev
, pgoff
, nr_pages
);
385 #define stripe_dax_direct_access NULL
386 #define stripe_dax_copy_from_iter NULL
387 #define stripe_dax_copy_to_iter NULL
388 #define stripe_dax_zero_page_range NULL
395 * #stripes [stripe_name <stripe_name>] [group word count]
396 * [error count 'A|D' <error count 'A|D'>]
399 * #stripes [stripe chunk size]
400 * [stripe_name physical_start <stripe_name physical_start>]
404 static void stripe_status(struct dm_target
*ti
, status_type_t type
,
405 unsigned status_flags
, char *result
, unsigned maxlen
)
407 struct stripe_c
*sc
= (struct stripe_c
*) ti
->private;
412 case STATUSTYPE_INFO
:
413 DMEMIT("%d ", sc
->stripes
);
414 for (i
= 0; i
< sc
->stripes
; i
++) {
415 DMEMIT("%s ", sc
->stripe
[i
].dev
->name
);
418 for (i
= 0; i
< sc
->stripes
; i
++) {
419 DMEMIT("%c", atomic_read(&(sc
->stripe
[i
].error_count
)) ?
424 case STATUSTYPE_TABLE
:
425 DMEMIT("%d %llu", sc
->stripes
,
426 (unsigned long long)sc
->chunk_size
);
427 for (i
= 0; i
< sc
->stripes
; i
++)
428 DMEMIT(" %s %llu", sc
->stripe
[i
].dev
->name
,
429 (unsigned long long)sc
->stripe
[i
].physical_start
);
434 static int stripe_end_io(struct dm_target
*ti
, struct bio
*bio
,
438 char major_minor
[16];
439 struct stripe_c
*sc
= ti
->private;
442 return DM_ENDIO_DONE
; /* I/O complete */
444 if (bio
->bi_opf
& REQ_RAHEAD
)
445 return DM_ENDIO_DONE
;
447 if (*error
== BLK_STS_NOTSUPP
)
448 return DM_ENDIO_DONE
;
450 memset(major_minor
, 0, sizeof(major_minor
));
451 sprintf(major_minor
, "%d:%d", MAJOR(bio_dev(bio
)), MINOR(bio_dev(bio
)));
454 * Test to see which stripe drive triggered the event
455 * and increment error count for all stripes on that device.
456 * If the error count for a given device exceeds the threshold
457 * value we will no longer trigger any further events.
459 for (i
= 0; i
< sc
->stripes
; i
++)
460 if (!strcmp(sc
->stripe
[i
].dev
->name
, major_minor
)) {
461 atomic_inc(&(sc
->stripe
[i
].error_count
));
462 if (atomic_read(&(sc
->stripe
[i
].error_count
)) <
463 DM_IO_ERROR_THRESHOLD
)
464 schedule_work(&sc
->trigger_event
);
467 return DM_ENDIO_DONE
;
470 static int stripe_iterate_devices(struct dm_target
*ti
,
471 iterate_devices_callout_fn fn
, void *data
)
473 struct stripe_c
*sc
= ti
->private;
478 ret
= fn(ti
, sc
->stripe
[i
].dev
,
479 sc
->stripe
[i
].physical_start
,
480 sc
->stripe_width
, data
);
481 } while (!ret
&& ++i
< sc
->stripes
);
486 static void stripe_io_hints(struct dm_target
*ti
,
487 struct queue_limits
*limits
)
489 struct stripe_c
*sc
= ti
->private;
490 unsigned chunk_size
= sc
->chunk_size
<< SECTOR_SHIFT
;
492 blk_limits_io_min(limits
, chunk_size
);
493 blk_limits_io_opt(limits
, chunk_size
* sc
->stripes
);
496 static struct target_type stripe_target
= {
498 .version
= {1, 6, 0},
499 .features
= DM_TARGET_PASSES_INTEGRITY
| DM_TARGET_NOWAIT
,
500 .module
= THIS_MODULE
,
504 .end_io
= stripe_end_io
,
505 .status
= stripe_status
,
506 .iterate_devices
= stripe_iterate_devices
,
507 .io_hints
= stripe_io_hints
,
508 .direct_access
= stripe_dax_direct_access
,
509 .dax_copy_from_iter
= stripe_dax_copy_from_iter
,
510 .dax_copy_to_iter
= stripe_dax_copy_to_iter
,
511 .dax_zero_page_range
= stripe_dax_zero_page_range
,
514 int __init
dm_stripe_init(void)
518 r
= dm_register_target(&stripe_target
);
520 DMWARN("target registration failed");
525 void dm_stripe_exit(void)
527 dm_unregister_target(&stripe_target
);