1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2011 Red Hat, Inc.
5 * This file is released under the GPL.
7 #include "dm-block-manager.h"
8 #include "dm-persistent-data-internal.h"
10 #include <linux/dm-bufio.h>
11 #include <linux/crc32c.h>
12 #include <linux/module.h>
13 #include <linux/slab.h>
14 #include <linux/rwsem.h>
15 #include <linux/device-mapper.h>
16 #include <linux/stacktrace.h>
17 #include <linux/sched/task.h>
19 #define DM_MSG_PREFIX "block manager"
21 /*----------------------------------------------------------------*/
23 #ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING
26 * This is a read/write semaphore with a couple of differences.
28 * i) There is a restriction on the number of concurrent read locks that
29 * may be held at once. This is just an implementation detail.
31 * ii) Recursive locking attempts are detected and return EINVAL. A stack
32 * trace is also emitted for the previous lock acquisition.
34 * iii) Priority is given to write locks.
40 unsigned int nr_entries
;
41 unsigned long entries
[MAX_STACK
];
47 struct list_head waiters
;
48 struct task_struct
*holders
[MAX_HOLDERS
];
50 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
51 struct stack_store traces
[MAX_HOLDERS
];
56 struct list_head list
;
57 struct task_struct
*task
;
61 static unsigned int __find_holder(struct block_lock
*lock
,
62 struct task_struct
*task
)
66 for (i
= 0; i
< MAX_HOLDERS
; i
++)
67 if (lock
->holders
[i
] == task
)
70 BUG_ON(i
== MAX_HOLDERS
);
74 /* call this *after* you increment lock->count */
75 static void __add_holder(struct block_lock
*lock
, struct task_struct
*task
)
77 unsigned int h
= __find_holder(lock
, NULL
);
78 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
79 struct stack_store
*t
;
82 get_task_struct(task
);
83 lock
->holders
[h
] = task
;
85 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
87 t
->nr_entries
= stack_trace_save(t
->entries
, MAX_STACK
, 2);
91 /* call this *before* you decrement lock->count */
92 static void __del_holder(struct block_lock
*lock
, struct task_struct
*task
)
94 unsigned int h
= __find_holder(lock
, task
);
96 lock
->holders
[h
] = NULL
;
97 put_task_struct(task
);
100 static int __check_holder(struct block_lock
*lock
)
104 for (i
= 0; i
< MAX_HOLDERS
; i
++) {
105 if (lock
->holders
[i
] == current
) {
106 DMERR("recursive lock detected in metadata");
107 #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
108 DMERR("previously held here:");
109 stack_trace_print(lock
->traces
[i
].entries
,
110 lock
->traces
[i
].nr_entries
, 4);
112 DMERR("subsequent acquisition attempted here:");
122 static void __wait(struct waiter
*w
)
125 set_current_state(TASK_UNINTERRUPTIBLE
);
133 set_current_state(TASK_RUNNING
);
136 static void __wake_waiter(struct waiter
*w
)
138 struct task_struct
*task
;
144 wake_up_process(task
);
148 * We either wake a few readers or a single writer.
150 static void __wake_many(struct block_lock
*lock
)
152 struct waiter
*w
, *tmp
;
154 BUG_ON(lock
->count
< 0);
155 list_for_each_entry_safe(w
, tmp
, &lock
->waiters
, list
) {
156 if (lock
->count
>= MAX_HOLDERS
)
159 if (w
->wants_write
) {
161 return; /* still read locked */
164 __add_holder(lock
, w
->task
);
170 __add_holder(lock
, w
->task
);
175 static void bl_init(struct block_lock
*lock
)
179 spin_lock_init(&lock
->lock
);
181 INIT_LIST_HEAD(&lock
->waiters
);
182 for (i
= 0; i
< MAX_HOLDERS
; i
++)
183 lock
->holders
[i
] = NULL
;
186 static int __available_for_read(struct block_lock
*lock
)
188 return lock
->count
>= 0 &&
189 lock
->count
< MAX_HOLDERS
&&
190 list_empty(&lock
->waiters
);
193 static int bl_down_read(struct block_lock
*lock
)
198 spin_lock(&lock
->lock
);
199 r
= __check_holder(lock
);
201 spin_unlock(&lock
->lock
);
205 if (__available_for_read(lock
)) {
207 __add_holder(lock
, current
);
208 spin_unlock(&lock
->lock
);
212 get_task_struct(current
);
216 list_add_tail(&w
.list
, &lock
->waiters
);
217 spin_unlock(&lock
->lock
);
220 put_task_struct(current
);
224 static int bl_down_read_nonblock(struct block_lock
*lock
)
228 spin_lock(&lock
->lock
);
229 r
= __check_holder(lock
);
233 if (__available_for_read(lock
)) {
235 __add_holder(lock
, current
);
241 spin_unlock(&lock
->lock
);
245 static void bl_up_read(struct block_lock
*lock
)
247 spin_lock(&lock
->lock
);
248 BUG_ON(lock
->count
<= 0);
249 __del_holder(lock
, current
);
251 if (!list_empty(&lock
->waiters
))
253 spin_unlock(&lock
->lock
);
256 static int bl_down_write(struct block_lock
*lock
)
261 spin_lock(&lock
->lock
);
262 r
= __check_holder(lock
);
264 spin_unlock(&lock
->lock
);
268 if (lock
->count
== 0 && list_empty(&lock
->waiters
)) {
270 __add_holder(lock
, current
);
271 spin_unlock(&lock
->lock
);
275 get_task_struct(current
);
280 * Writers given priority. We know there's only one mutator in the
281 * system, so ignoring the ordering reversal.
283 list_add(&w
.list
, &lock
->waiters
);
284 spin_unlock(&lock
->lock
);
287 put_task_struct(current
);
292 static void bl_up_write(struct block_lock
*lock
)
294 spin_lock(&lock
->lock
);
295 __del_holder(lock
, current
);
297 if (!list_empty(&lock
->waiters
))
299 spin_unlock(&lock
->lock
);
302 static void report_recursive_bug(dm_block_t b
, int r
)
305 DMERR("recursive acquisition of block %llu requested.",
306 (unsigned long long) b
);
309 #else /* !CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */
311 #define bl_init(x) do { } while (0)
312 #define bl_down_read(x) 0
313 #define bl_down_read_nonblock(x) 0
314 #define bl_up_read(x) do { } while (0)
315 #define bl_down_write(x) 0
316 #define bl_up_write(x) do { } while (0)
317 #define report_recursive_bug(x, y) do { } while (0)
319 #endif /* CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING */
321 /*----------------------------------------------------------------*/
324 * Block manager is currently implemented using dm-bufio. struct
325 * dm_block_manager and struct dm_block map directly onto a couple of
326 * structs in the bufio interface. I want to retain the freedom to move
327 * away from bufio in the future. So these structs are just cast within
328 * this .c file, rather than making it through to the public interface.
330 static struct dm_buffer
*to_buffer(struct dm_block
*b
)
332 return (struct dm_buffer
*) b
;
335 dm_block_t
dm_block_location(struct dm_block
*b
)
337 return dm_bufio_get_block_number(to_buffer(b
));
339 EXPORT_SYMBOL_GPL(dm_block_location
);
341 void *dm_block_data(struct dm_block
*b
)
343 return dm_bufio_get_block_data(to_buffer(b
));
345 EXPORT_SYMBOL_GPL(dm_block_data
);
348 const struct dm_block_validator
*validator
;
351 #ifdef CONFIG_DM_DEBUG_BLOCK_MANAGER_LOCKING
352 struct block_lock lock
;
356 static void dm_block_manager_alloc_callback(struct dm_buffer
*buf
)
358 struct buffer_aux
*aux
= dm_bufio_get_aux_data(buf
);
360 aux
->validator
= NULL
;
364 static void dm_block_manager_write_callback(struct dm_buffer
*buf
)
366 struct buffer_aux
*aux
= dm_bufio_get_aux_data(buf
);
368 if (aux
->validator
) {
369 aux
->validator
->prepare_for_write(aux
->validator
, (struct dm_block
*) buf
,
370 dm_bufio_get_block_size(dm_bufio_get_client(buf
)));
375 * -------------------------------------------------------------
377 *--------------------------------------------------------------
379 struct dm_block_manager
{
380 struct dm_bufio_client
*bufio
;
384 struct dm_block_manager
*dm_block_manager_create(struct block_device
*bdev
,
385 unsigned int block_size
,
386 unsigned int max_held_per_thread
)
389 struct dm_block_manager
*bm
;
391 bm
= kmalloc(sizeof(*bm
), GFP_KERNEL
);
397 bm
->bufio
= dm_bufio_client_create(bdev
, block_size
, max_held_per_thread
,
398 sizeof(struct buffer_aux
),
399 dm_block_manager_alloc_callback
,
400 dm_block_manager_write_callback
,
402 if (IS_ERR(bm
->bufio
)) {
403 r
= PTR_ERR(bm
->bufio
);
408 bm
->read_only
= false;
415 EXPORT_SYMBOL_GPL(dm_block_manager_create
);
417 void dm_block_manager_destroy(struct dm_block_manager
*bm
)
419 dm_bufio_client_destroy(bm
->bufio
);
422 EXPORT_SYMBOL_GPL(dm_block_manager_destroy
);
424 void dm_block_manager_reset(struct dm_block_manager
*bm
)
426 dm_bufio_client_reset(bm
->bufio
);
428 EXPORT_SYMBOL_GPL(dm_block_manager_reset
);
430 unsigned int dm_bm_block_size(struct dm_block_manager
*bm
)
432 return dm_bufio_get_block_size(bm
->bufio
);
434 EXPORT_SYMBOL_GPL(dm_bm_block_size
);
436 dm_block_t
dm_bm_nr_blocks(struct dm_block_manager
*bm
)
438 return dm_bufio_get_device_size(bm
->bufio
);
441 static int dm_bm_validate_buffer(struct dm_block_manager
*bm
,
442 struct dm_buffer
*buf
,
443 struct buffer_aux
*aux
,
444 const struct dm_block_validator
*v
)
446 if (unlikely(!aux
->validator
)) {
451 r
= v
->check(v
, (struct dm_block
*) buf
, dm_bufio_get_block_size(bm
->bufio
));
453 DMERR_LIMIT("%s validator check failed for block %llu", v
->name
,
454 (unsigned long long) dm_bufio_get_block_number(buf
));
459 if (unlikely(aux
->validator
!= v
)) {
460 DMERR_LIMIT("validator mismatch (old=%s vs new=%s) for block %llu",
461 aux
->validator
->name
, v
? v
->name
: "NULL",
462 (unsigned long long) dm_bufio_get_block_number(buf
));
469 int dm_bm_read_lock(struct dm_block_manager
*bm
, dm_block_t b
,
470 const struct dm_block_validator
*v
,
471 struct dm_block
**result
)
473 struct buffer_aux
*aux
;
477 p
= dm_bufio_read(bm
->bufio
, b
, (struct dm_buffer
**) result
);
481 aux
= dm_bufio_get_aux_data(to_buffer(*result
));
482 r
= bl_down_read(&aux
->lock
);
484 dm_bufio_release(to_buffer(*result
));
485 report_recursive_bug(b
, r
);
489 aux
->write_locked
= 0;
491 r
= dm_bm_validate_buffer(bm
, to_buffer(*result
), aux
, v
);
493 bl_up_read(&aux
->lock
);
494 dm_bufio_release(to_buffer(*result
));
500 EXPORT_SYMBOL_GPL(dm_bm_read_lock
);
502 int dm_bm_write_lock(struct dm_block_manager
*bm
,
503 dm_block_t b
, const struct dm_block_validator
*v
,
504 struct dm_block
**result
)
506 struct buffer_aux
*aux
;
510 if (dm_bm_is_read_only(bm
))
513 p
= dm_bufio_read(bm
->bufio
, b
, (struct dm_buffer
**) result
);
517 aux
= dm_bufio_get_aux_data(to_buffer(*result
));
518 r
= bl_down_write(&aux
->lock
);
520 dm_bufio_release(to_buffer(*result
));
521 report_recursive_bug(b
, r
);
525 aux
->write_locked
= 1;
527 r
= dm_bm_validate_buffer(bm
, to_buffer(*result
), aux
, v
);
529 bl_up_write(&aux
->lock
);
530 dm_bufio_release(to_buffer(*result
));
536 EXPORT_SYMBOL_GPL(dm_bm_write_lock
);
538 int dm_bm_read_try_lock(struct dm_block_manager
*bm
,
539 dm_block_t b
, const struct dm_block_validator
*v
,
540 struct dm_block
**result
)
542 struct buffer_aux
*aux
;
546 p
= dm_bufio_get(bm
->bufio
, b
, (struct dm_buffer
**) result
);
552 aux
= dm_bufio_get_aux_data(to_buffer(*result
));
553 r
= bl_down_read_nonblock(&aux
->lock
);
555 dm_bufio_release(to_buffer(*result
));
556 report_recursive_bug(b
, r
);
559 aux
->write_locked
= 0;
561 r
= dm_bm_validate_buffer(bm
, to_buffer(*result
), aux
, v
);
563 bl_up_read(&aux
->lock
);
564 dm_bufio_release(to_buffer(*result
));
571 int dm_bm_write_lock_zero(struct dm_block_manager
*bm
,
572 dm_block_t b
, const struct dm_block_validator
*v
,
573 struct dm_block
**result
)
576 struct buffer_aux
*aux
;
579 if (dm_bm_is_read_only(bm
))
582 p
= dm_bufio_new(bm
->bufio
, b
, (struct dm_buffer
**) result
);
586 memset(p
, 0, dm_bm_block_size(bm
));
588 aux
= dm_bufio_get_aux_data(to_buffer(*result
));
589 r
= bl_down_write(&aux
->lock
);
591 dm_bufio_release(to_buffer(*result
));
595 aux
->write_locked
= 1;
600 EXPORT_SYMBOL_GPL(dm_bm_write_lock_zero
);
602 void dm_bm_unlock(struct dm_block
*b
)
604 struct buffer_aux
*aux
= dm_bufio_get_aux_data(to_buffer(b
));
606 if (aux
->write_locked
) {
607 dm_bufio_mark_buffer_dirty(to_buffer(b
));
608 bl_up_write(&aux
->lock
);
610 bl_up_read(&aux
->lock
);
612 dm_bufio_release(to_buffer(b
));
614 EXPORT_SYMBOL_GPL(dm_bm_unlock
);
616 int dm_bm_flush(struct dm_block_manager
*bm
)
618 if (dm_bm_is_read_only(bm
))
621 return dm_bufio_write_dirty_buffers(bm
->bufio
);
623 EXPORT_SYMBOL_GPL(dm_bm_flush
);
625 void dm_bm_prefetch(struct dm_block_manager
*bm
, dm_block_t b
)
627 dm_bufio_prefetch(bm
->bufio
, b
, 1);
630 bool dm_bm_is_read_only(struct dm_block_manager
*bm
)
632 return bm
? bm
->read_only
: true;
634 EXPORT_SYMBOL_GPL(dm_bm_is_read_only
);
636 void dm_bm_set_read_only(struct dm_block_manager
*bm
)
639 bm
->read_only
= true;
641 EXPORT_SYMBOL_GPL(dm_bm_set_read_only
);
643 void dm_bm_set_read_write(struct dm_block_manager
*bm
)
646 bm
->read_only
= false;
648 EXPORT_SYMBOL_GPL(dm_bm_set_read_write
);
650 u32
dm_bm_checksum(const void *data
, size_t len
, u32 init_xor
)
652 return crc32c(~(u32
) 0, data
, len
) ^ init_xor
;
654 EXPORT_SYMBOL_GPL(dm_bm_checksum
);
656 /*----------------------------------------------------------------*/
658 MODULE_LICENSE("GPL");
659 MODULE_AUTHOR("Joe Thornber <dm-devel@lists.linux.dev>");
660 MODULE_DESCRIPTION("Immutable metadata library for dm");
662 /*----------------------------------------------------------------*/