1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright 2023 Red Hat
6 #include <linux/atomic.h>
7 #include <linux/bitops.h>
8 #include <linux/completion.h>
9 #include <linux/delay.h>
10 #include <linux/device-mapper.h>
11 #include <linux/err.h>
12 #include <linux/module.h>
13 #include <linux/mutex.h>
14 #include <linux/spinlock.h>
16 #include "admin-state.h"
17 #include "block-map.h"
18 #include "completion.h"
19 #include "constants.h"
23 #include "encodings.h"
26 #include "io-submitter.h"
28 #include "memory-alloc.h"
29 #include "message-stats.h"
30 #include "recovery-journal.h"
32 #include "slab-depot.h"
33 #include "status-codes.h"
34 #include "string-utils.h"
35 #include "thread-device.h"
36 #include "thread-registry.h"
37 #include "thread-utils.h"
43 GROW_LOGICAL_PHASE_START
,
44 GROW_LOGICAL_PHASE_GROW_BLOCK_MAP
,
45 GROW_LOGICAL_PHASE_END
,
46 GROW_LOGICAL_PHASE_ERROR
,
47 GROW_PHYSICAL_PHASE_START
,
48 GROW_PHYSICAL_PHASE_COPY_SUMMARY
,
49 GROW_PHYSICAL_PHASE_UPDATE_COMPONENTS
,
50 GROW_PHYSICAL_PHASE_USE_NEW_SLABS
,
51 GROW_PHYSICAL_PHASE_END
,
52 GROW_PHYSICAL_PHASE_ERROR
,
54 LOAD_PHASE_LOAD_DEPOT
,
55 LOAD_PHASE_MAKE_DIRTY
,
56 LOAD_PHASE_PREPARE_TO_ALLOCATE
,
57 LOAD_PHASE_SCRUB_SLABS
,
58 LOAD_PHASE_DATA_REDUCTION
,
60 LOAD_PHASE_DRAIN_JOURNAL
,
61 LOAD_PHASE_WAIT_FOR_READ_ONLY
,
63 PRE_LOAD_PHASE_LOAD_COMPONENTS
,
65 PREPARE_GROW_PHYSICAL_PHASE_START
,
67 RESUME_PHASE_ALLOW_READ_ONLY_MODE
,
71 RESUME_PHASE_BLOCK_MAP
,
72 RESUME_PHASE_LOGICAL_ZONES
,
75 RESUME_PHASE_DATA_VIOS
,
79 SUSPEND_PHASE_DATA_VIOS
,
81 SUSPEND_PHASE_FLUSHES
,
82 SUSPEND_PHASE_LOGICAL_ZONES
,
83 SUSPEND_PHASE_BLOCK_MAP
,
84 SUSPEND_PHASE_JOURNAL
,
86 SUSPEND_PHASE_READ_ONLY_WAIT
,
87 SUSPEND_PHASE_WRITE_SUPER_BLOCK
,
91 static const char * const ADMIN_PHASE_NAMES
[] = {
92 "GROW_LOGICAL_PHASE_START",
93 "GROW_LOGICAL_PHASE_GROW_BLOCK_MAP",
94 "GROW_LOGICAL_PHASE_END",
95 "GROW_LOGICAL_PHASE_ERROR",
96 "GROW_PHYSICAL_PHASE_START",
97 "GROW_PHYSICAL_PHASE_COPY_SUMMARY",
98 "GROW_PHYSICAL_PHASE_UPDATE_COMPONENTS",
99 "GROW_PHYSICAL_PHASE_USE_NEW_SLABS",
100 "GROW_PHYSICAL_PHASE_END",
101 "GROW_PHYSICAL_PHASE_ERROR",
103 "LOAD_PHASE_LOAD_DEPOT",
104 "LOAD_PHASE_MAKE_DIRTY",
105 "LOAD_PHASE_PREPARE_TO_ALLOCATE",
106 "LOAD_PHASE_SCRUB_SLABS",
107 "LOAD_PHASE_DATA_REDUCTION",
108 "LOAD_PHASE_FINISHED",
109 "LOAD_PHASE_DRAIN_JOURNAL",
110 "LOAD_PHASE_WAIT_FOR_READ_ONLY",
111 "PRE_LOAD_PHASE_START",
112 "PRE_LOAD_PHASE_LOAD_COMPONENTS",
113 "PRE_LOAD_PHASE_END",
114 "PREPARE_GROW_PHYSICAL_PHASE_START",
115 "RESUME_PHASE_START",
116 "RESUME_PHASE_ALLOW_READ_ONLY_MODE",
117 "RESUME_PHASE_DEDUPE",
118 "RESUME_PHASE_DEPOT",
119 "RESUME_PHASE_JOURNAL",
120 "RESUME_PHASE_BLOCK_MAP",
121 "RESUME_PHASE_LOGICAL_ZONES",
122 "RESUME_PHASE_PACKER",
123 "RESUME_PHASE_FLUSHER",
124 "RESUME_PHASE_DATA_VIOS",
126 "SUSPEND_PHASE_START",
127 "SUSPEND_PHASE_PACKER",
128 "SUSPEND_PHASE_DATA_VIOS",
129 "SUSPEND_PHASE_DEDUPE",
130 "SUSPEND_PHASE_FLUSHES",
131 "SUSPEND_PHASE_LOGICAL_ZONES",
132 "SUSPEND_PHASE_BLOCK_MAP",
133 "SUSPEND_PHASE_JOURNAL",
134 "SUSPEND_PHASE_DEPOT",
135 "SUSPEND_PHASE_READ_ONLY_WAIT",
136 "SUSPEND_PHASE_WRITE_SUPER_BLOCK",
140 /* If we bump this, update the arrays below */
141 #define TABLE_VERSION 4
143 /* arrays for handling different table versions */
144 static const u8 REQUIRED_ARGC
[] = { 10, 12, 9, 7, 6 };
145 /* pool name no longer used. only here for verification of older versions */
146 static const u8 POOL_NAME_ARG_INDEX
[] = { 8, 10, 8 };
149 * Track in-use instance numbers using a flat bit array.
151 * O(n) run time isn't ideal, but if we have 1000 VDO devices in use simultaneously we still only
152 * need to scan 16 words, so it's not likely to be a big deal compared to other resource usage.
156 * This minimum size for the bit array creates a numbering space of 0-999, which allows
157 * successive starts of the same volume to have different instance numbers in any
158 * reasonably-sized test. Changing instances on restart allows vdoMonReport to detect that
159 * the ephemeral stats have reset to zero.
161 #define BIT_COUNT_MINIMUM 1000
162 /* Grow the bit array by this many bits when needed */
163 #define BIT_COUNT_INCREMENT 100
165 struct instance_tracker
{
166 unsigned int bit_count
;
167 unsigned long *words
;
172 static DEFINE_MUTEX(instances_lock
);
173 static struct instance_tracker instances
;
176 * free_device_config() - Free a device config created by parse_device_config().
177 * @config: The config to free.
179 static void free_device_config(struct device_config
*config
)
184 if (config
->owned_device
!= NULL
)
185 dm_put_device(config
->owning_target
, config
->owned_device
);
187 vdo_free(config
->parent_device_name
);
188 vdo_free(config
->original_string
);
190 /* Reduce the chance a use-after-free (as in BZ 1669960) happens to work. */
191 memset(config
, 0, sizeof(*config
));
196 * get_version_number() - Decide the version number from argv.
198 * @argc: The number of table values.
199 * @argv: The array of table values.
200 * @error_ptr: A pointer to return a error string in.
201 * @version_ptr: A pointer to return the version.
203 * Return: VDO_SUCCESS or an error code.
205 static int get_version_number(int argc
, char **argv
, char **error_ptr
,
206 unsigned int *version_ptr
)
208 /* version, if it exists, is in a form of V<n> */
209 if (sscanf(argv
[0], "V%u", version_ptr
) == 1) {
210 if (*version_ptr
< 1 || *version_ptr
> TABLE_VERSION
) {
211 *error_ptr
= "Unknown version number detected";
212 return VDO_BAD_CONFIGURATION
;
215 /* V0 actually has no version number in the table string */
220 * V0 and V1 have no optional parameters. There will always be a parameter for thread
221 * config, even if it's a "." to show it's an empty list.
223 if (*version_ptr
<= 1) {
224 if (argc
!= REQUIRED_ARGC
[*version_ptr
]) {
225 *error_ptr
= "Incorrect number of arguments for version";
226 return VDO_BAD_CONFIGURATION
;
228 } else if (argc
< REQUIRED_ARGC
[*version_ptr
]) {
229 *error_ptr
= "Incorrect number of arguments for version";
230 return VDO_BAD_CONFIGURATION
;
233 if (*version_ptr
!= TABLE_VERSION
) {
234 vdo_log_warning("Detected version mismatch between kernel module and tools kernel: %d, tool: %d",
235 TABLE_VERSION
, *version_ptr
);
236 vdo_log_warning("Please consider upgrading management tools to match kernel.");
241 /* Free a list of non-NULL string pointers, and then the list itself. */
242 static void free_string_array(char **string_array
)
246 for (offset
= 0; string_array
[offset
] != NULL
; offset
++)
247 vdo_free(string_array
[offset
]);
248 vdo_free(string_array
);
252 * Split the input string into substrings, separated at occurrences of the indicated character,
253 * returning a null-terminated list of string pointers.
255 * The string pointers and the pointer array itself should both be freed with vdo_free() when no
256 * longer needed. This can be done with vdo_free_string_array (below) if the pointers in the array
257 * are not changed. Since the array and copied strings are allocated by this function, it may only
258 * be used in contexts where allocation is permitted.
260 * Empty substrings are not ignored; that is, returned substrings may be empty strings if the
261 * separator occurs twice in a row.
263 static int split_string(const char *string
, char separator
, char ***substring_array_ptr
)
265 unsigned int current_substring
= 0, substring_count
= 1;
271 for (s
= string
; *s
!= 0; s
++) {
276 result
= vdo_allocate(substring_count
+ 1, char *, "string-splitting array",
278 if (result
!= VDO_SUCCESS
)
281 for (s
= string
; *s
!= 0; s
++) {
282 if (*s
== separator
) {
283 ptrdiff_t length
= s
- string
;
285 result
= vdo_allocate(length
+ 1, char, "split string",
286 &substrings
[current_substring
]);
287 if (result
!= VDO_SUCCESS
) {
288 free_string_array(substrings
);
292 * Trailing NUL is already in place after allocation; deal with the zero or
293 * more non-NUL bytes in the string.
296 memcpy(substrings
[current_substring
], string
, length
);
299 BUG_ON(current_substring
>= substring_count
);
302 /* Process final string, with no trailing separator. */
303 BUG_ON(current_substring
!= (substring_count
- 1));
304 length
= strlen(string
);
306 result
= vdo_allocate(length
+ 1, char, "split string",
307 &substrings
[current_substring
]);
308 if (result
!= VDO_SUCCESS
) {
309 free_string_array(substrings
);
312 memcpy(substrings
[current_substring
], string
, length
);
314 /* substrings[current_substring] is NULL already */
315 *substring_array_ptr
= substrings
;
320 * Join the input substrings into one string, joined with the indicated character, returning a
321 * string. array_length is a bound on the number of valid elements in substring_array, in case it
322 * is not NULL-terminated.
324 static int join_strings(char **substring_array
, size_t array_length
, char separator
,
327 size_t string_length
= 0;
330 char *output
, *current_position
;
332 for (i
= 0; (i
< array_length
) && (substring_array
[i
] != NULL
); i
++)
333 string_length
+= strlen(substring_array
[i
]) + 1;
335 result
= vdo_allocate(string_length
, char, __func__
, &output
);
336 if (result
!= VDO_SUCCESS
)
339 current_position
= &output
[0];
341 for (i
= 0; (i
< array_length
) && (substring_array
[i
] != NULL
); i
++) {
342 current_position
= vdo_append_to_buffer(current_position
,
343 output
+ string_length
, "%s",
345 *current_position
= separator
;
349 /* We output one too many separators; replace the last with a zero byte. */
350 if (current_position
!= output
)
351 *(current_position
- 1) = '\0';
353 *string_ptr
= output
;
358 * parse_bool() - Parse a two-valued option into a bool.
359 * @bool_str: The string value to convert to a bool.
360 * @true_str: The string value which should be converted to true.
361 * @false_str: The string value which should be converted to false.
362 * @bool_ptr: A pointer to return the bool value in.
364 * Return: VDO_SUCCESS or an error if bool_str is neither true_str nor false_str.
366 static inline int __must_check
parse_bool(const char *bool_str
, const char *true_str
,
367 const char *false_str
, bool *bool_ptr
)
371 if (strcmp(bool_str
, true_str
) == 0)
373 else if (strcmp(bool_str
, false_str
) == 0)
376 return VDO_BAD_CONFIGURATION
;
383 * process_one_thread_config_spec() - Process one component of a thread parameter configuration
384 * string and update the configuration data structure.
385 * @thread_param_type: The type of thread specified.
386 * @count: The thread count requested.
387 * @config: The configuration data structure to update.
389 * If the thread count requested is invalid, a message is logged and -EINVAL returned. If the
390 * thread name is unknown, a message is logged but no error is returned.
392 * Return: VDO_SUCCESS or -EINVAL
394 static int process_one_thread_config_spec(const char *thread_param_type
,
396 struct thread_count_config
*config
)
398 /* Handle limited thread parameters */
399 if (strcmp(thread_param_type
, "bioRotationInterval") == 0) {
401 vdo_log_error("thread config string error: 'bioRotationInterval' of at least 1 is required");
403 } else if (count
> VDO_BIO_ROTATION_INTERVAL_LIMIT
) {
404 vdo_log_error("thread config string error: 'bioRotationInterval' cannot be higher than %d",
405 VDO_BIO_ROTATION_INTERVAL_LIMIT
);
408 config
->bio_rotation_interval
= count
;
411 if (strcmp(thread_param_type
, "logical") == 0) {
412 if (count
> MAX_VDO_LOGICAL_ZONES
) {
413 vdo_log_error("thread config string error: at most %d 'logical' threads are allowed",
414 MAX_VDO_LOGICAL_ZONES
);
417 config
->logical_zones
= count
;
420 if (strcmp(thread_param_type
, "physical") == 0) {
421 if (count
> MAX_VDO_PHYSICAL_ZONES
) {
422 vdo_log_error("thread config string error: at most %d 'physical' threads are allowed",
423 MAX_VDO_PHYSICAL_ZONES
);
426 config
->physical_zones
= count
;
429 /* Handle other thread count parameters */
430 if (count
> MAXIMUM_VDO_THREADS
) {
431 vdo_log_error("thread config string error: at most %d '%s' threads are allowed",
432 MAXIMUM_VDO_THREADS
, thread_param_type
);
435 if (strcmp(thread_param_type
, "hash") == 0) {
436 config
->hash_zones
= count
;
439 if (strcmp(thread_param_type
, "cpu") == 0) {
441 vdo_log_error("thread config string error: at least one 'cpu' thread required");
444 config
->cpu_threads
= count
;
447 if (strcmp(thread_param_type
, "ack") == 0) {
448 config
->bio_ack_threads
= count
;
451 if (strcmp(thread_param_type
, "bio") == 0) {
453 vdo_log_error("thread config string error: at least one 'bio' thread required");
456 config
->bio_threads
= count
;
461 * Don't fail, just log. This will handle version mismatches between user mode tools and
464 vdo_log_info("unknown thread parameter type \"%s\"", thread_param_type
);
469 * parse_one_thread_config_spec() - Parse one component of a thread parameter configuration string
470 * and update the configuration data structure.
471 * @spec: The thread parameter specification string.
472 * @config: The configuration data to be updated.
474 static int parse_one_thread_config_spec(const char *spec
,
475 struct thread_count_config
*config
)
481 result
= split_string(spec
, '=', &fields
);
482 if (result
!= VDO_SUCCESS
)
485 if ((fields
[0] == NULL
) || (fields
[1] == NULL
) || (fields
[2] != NULL
)) {
486 vdo_log_error("thread config string error: expected thread parameter assignment, saw \"%s\"",
488 free_string_array(fields
);
492 result
= kstrtouint(fields
[1], 10, &count
);
494 vdo_log_error("thread config string error: integer value needed, found \"%s\"",
496 free_string_array(fields
);
500 result
= process_one_thread_config_spec(fields
[0], count
, config
);
501 free_string_array(fields
);
506 * parse_thread_config_string() - Parse the configuration string passed and update the specified
507 * counts and other parameters of various types of threads to be
509 * @string: Thread parameter configuration string.
510 * @config: The thread configuration data to update.
512 * The configuration string should contain one or more comma-separated specs of the form
513 * "typename=number"; the supported type names are "cpu", "ack", "bio", "bioRotationInterval",
514 * "logical", "physical", and "hash".
516 * If an error occurs during parsing of a single key/value pair, we deem it serious enough to stop
519 * This function can't set the "reason" value the caller wants to pass back, because we'd want to
520 * format it to say which field was invalid, and we can't allocate the "reason" strings
521 * dynamically. So if an error occurs, we'll log the details and pass back an error.
523 * Return: VDO_SUCCESS or -EINVAL or -ENOMEM
525 static int parse_thread_config_string(const char *string
,
526 struct thread_count_config
*config
)
528 int result
= VDO_SUCCESS
;
531 if (strcmp(".", string
) != 0) {
534 result
= split_string(string
, ',', &specs
);
535 if (result
!= VDO_SUCCESS
)
538 for (i
= 0; specs
[i
] != NULL
; i
++) {
539 result
= parse_one_thread_config_spec(specs
[i
], config
);
540 if (result
!= VDO_SUCCESS
)
543 free_string_array(specs
);
549 * process_one_key_value_pair() - Process one component of an optional parameter string and update
550 * the configuration data structure.
551 * @key: The optional parameter key name.
552 * @value: The optional parameter value.
553 * @config: The configuration data structure to update.
555 * If the value requested is invalid, a message is logged and -EINVAL returned. If the key is
556 * unknown, a message is logged but no error is returned.
558 * Return: VDO_SUCCESS or -EINVAL
560 static int process_one_key_value_pair(const char *key
, unsigned int value
,
561 struct device_config
*config
)
563 /* Non thread optional parameters */
564 if (strcmp(key
, "maxDiscard") == 0) {
566 vdo_log_error("optional parameter error: at least one max discard block required");
569 /* Max discard sectors in blkdev_issue_discard is UINT_MAX >> 9 */
570 if (value
> (UINT_MAX
/ VDO_BLOCK_SIZE
)) {
571 vdo_log_error("optional parameter error: at most %d max discard blocks are allowed",
572 UINT_MAX
/ VDO_BLOCK_SIZE
);
575 config
->max_discard_blocks
= value
;
578 /* Handles unknown key names */
579 return process_one_thread_config_spec(key
, value
, &config
->thread_counts
);
583 * parse_one_key_value_pair() - Parse one key/value pair and update the configuration data
585 * @key: The optional key name.
586 * @value: The optional value.
587 * @config: The configuration data to be updated.
589 * Return: VDO_SUCCESS or error.
591 static int parse_one_key_value_pair(const char *key
, const char *value
,
592 struct device_config
*config
)
597 if (strcmp(key
, "deduplication") == 0)
598 return parse_bool(value
, "on", "off", &config
->deduplication
);
600 if (strcmp(key
, "compression") == 0)
601 return parse_bool(value
, "on", "off", &config
->compression
);
603 /* The remaining arguments must have integral values. */
604 result
= kstrtouint(value
, 10, &count
);
606 vdo_log_error("optional config string error: integer value needed, found \"%s\"",
610 return process_one_key_value_pair(key
, count
, config
);
614 * parse_key_value_pairs() - Parse all key/value pairs from a list of arguments.
615 * @argc: The total number of arguments in list.
616 * @argv: The list of key/value pairs.
617 * @config: The device configuration data to update.
619 * If an error occurs during parsing of a single key/value pair, we deem it serious enough to stop
622 * This function can't set the "reason" value the caller wants to pass back, because we'd want to
623 * format it to say which field was invalid, and we can't allocate the "reason" strings
624 * dynamically. So if an error occurs, we'll log the details and return the error.
626 * Return: VDO_SUCCESS or error
628 static int parse_key_value_pairs(int argc
, char **argv
, struct device_config
*config
)
630 int result
= VDO_SUCCESS
;
633 result
= parse_one_key_value_pair(argv
[0], argv
[1], config
);
634 if (result
!= VDO_SUCCESS
)
645 * parse_optional_arguments() - Parse the configuration string passed in for optional arguments.
646 * @arg_set: The structure holding the arguments to parse.
647 * @error_ptr: Pointer to a buffer to hold the error string.
648 * @config: Pointer to device configuration data to update.
650 * For V0/V1 configurations, there will only be one optional parameter; the thread configuration.
651 * The configuration string should contain one or more comma-separated specs of the form
652 * "typename=number"; the supported type names are "cpu", "ack", "bio", "bioRotationInterval",
653 * "logical", "physical", and "hash".
655 * For V2 configurations and beyond, there could be any number of arguments. They should contain
656 * one or more key/value pairs separated by a space.
658 * Return: VDO_SUCCESS or error
660 static int parse_optional_arguments(struct dm_arg_set
*arg_set
, char **error_ptr
,
661 struct device_config
*config
)
663 int result
= VDO_SUCCESS
;
665 if (config
->version
== 0 || config
->version
== 1) {
666 result
= parse_thread_config_string(arg_set
->argv
[0],
667 &config
->thread_counts
);
668 if (result
!= VDO_SUCCESS
) {
669 *error_ptr
= "Invalid thread-count configuration";
670 return VDO_BAD_CONFIGURATION
;
673 if ((arg_set
->argc
% 2) != 0) {
674 *error_ptr
= "Odd number of optional arguments given but they should be <key> <value> pairs";
675 return VDO_BAD_CONFIGURATION
;
677 result
= parse_key_value_pairs(arg_set
->argc
, arg_set
->argv
, config
);
678 if (result
!= VDO_SUCCESS
) {
679 *error_ptr
= "Invalid optional argument configuration";
680 return VDO_BAD_CONFIGURATION
;
687 * handle_parse_error() - Handle a parsing error.
688 * @config: The config to free.
689 * @error_ptr: A place to store a constant string about the error.
690 * @error_str: A constant string to store in error_ptr.
692 static void handle_parse_error(struct device_config
*config
, char **error_ptr
,
695 free_device_config(config
);
696 *error_ptr
= error_str
;
700 * parse_device_config() - Convert the dmsetup table into a struct device_config.
701 * @argc: The number of table values.
702 * @argv: The array of table values.
703 * @ti: The target structure for this table.
704 * @config_ptr: A pointer to return the allocated config.
706 * Return: VDO_SUCCESS or an error code.
708 static int parse_device_config(int argc
, char **argv
, struct dm_target
*ti
,
709 struct device_config
**config_ptr
)
712 size_t logical_bytes
= to_bytes(ti
->len
);
713 struct dm_arg_set arg_set
;
714 char **error_ptr
= &ti
->error
;
715 struct device_config
*config
= NULL
;
718 if ((logical_bytes
% VDO_BLOCK_SIZE
) != 0) {
719 handle_parse_error(config
, error_ptr
,
720 "Logical size must be a multiple of 4096");
721 return VDO_BAD_CONFIGURATION
;
725 handle_parse_error(config
, error_ptr
, "Incorrect number of arguments");
726 return VDO_BAD_CONFIGURATION
;
729 result
= vdo_allocate(1, struct device_config
, "device_config", &config
);
730 if (result
!= VDO_SUCCESS
) {
731 handle_parse_error(config
, error_ptr
,
732 "Could not allocate config structure");
733 return VDO_BAD_CONFIGURATION
;
736 config
->owning_target
= ti
;
737 config
->logical_blocks
= logical_bytes
/ VDO_BLOCK_SIZE
;
738 INIT_LIST_HEAD(&config
->config_list
);
740 /* Save the original string. */
741 result
= join_strings(argv
, argc
, ' ', &config
->original_string
);
742 if (result
!= VDO_SUCCESS
) {
743 handle_parse_error(config
, error_ptr
, "Could not populate string");
744 return VDO_BAD_CONFIGURATION
;
747 vdo_log_info("table line: %s", config
->original_string
);
749 config
->thread_counts
= (struct thread_count_config
) {
750 .bio_ack_threads
= 1,
751 .bio_threads
= DEFAULT_VDO_BIO_SUBMIT_QUEUE_COUNT
,
752 .bio_rotation_interval
= DEFAULT_VDO_BIO_SUBMIT_QUEUE_ROTATE_INTERVAL
,
758 config
->max_discard_blocks
= 1;
759 config
->deduplication
= true;
760 config
->compression
= false;
765 result
= get_version_number(argc
, argv
, error_ptr
, &config
->version
);
766 if (result
!= VDO_SUCCESS
) {
767 /* get_version_number sets error_ptr itself. */
768 handle_parse_error(config
, error_ptr
, *error_ptr
);
771 /* Move the arg pointer forward only if the argument was there. */
772 if (config
->version
>= 1)
773 dm_shift_arg(&arg_set
);
775 result
= vdo_duplicate_string(dm_shift_arg(&arg_set
), "parent device name",
776 &config
->parent_device_name
);
777 if (result
!= VDO_SUCCESS
) {
778 handle_parse_error(config
, error_ptr
,
779 "Could not copy parent device name");
780 return VDO_BAD_CONFIGURATION
;
783 /* Get the physical blocks, if known. */
784 if (config
->version
>= 1) {
785 result
= kstrtoull(dm_shift_arg(&arg_set
), 10, &config
->physical_blocks
);
786 if (result
!= VDO_SUCCESS
) {
787 handle_parse_error(config
, error_ptr
,
788 "Invalid physical block count");
789 return VDO_BAD_CONFIGURATION
;
793 /* Get the logical block size and validate */
794 result
= parse_bool(dm_shift_arg(&arg_set
), "512", "4096", &enable_512e
);
795 if (result
!= VDO_SUCCESS
) {
796 handle_parse_error(config
, error_ptr
, "Invalid logical block size");
797 return VDO_BAD_CONFIGURATION
;
799 config
->logical_block_size
= (enable_512e
? 512 : 4096);
801 /* Skip past the two no longer used read cache options. */
802 if (config
->version
<= 1)
803 dm_consume_args(&arg_set
, 2);
805 /* Get the page cache size. */
806 result
= kstrtouint(dm_shift_arg(&arg_set
), 10, &config
->cache_size
);
807 if (result
!= VDO_SUCCESS
) {
808 handle_parse_error(config
, error_ptr
,
809 "Invalid block map page cache size");
810 return VDO_BAD_CONFIGURATION
;
813 /* Get the block map era length. */
814 result
= kstrtouint(dm_shift_arg(&arg_set
), 10, &config
->block_map_maximum_age
);
815 if (result
!= VDO_SUCCESS
) {
816 handle_parse_error(config
, error_ptr
, "Invalid block map maximum age");
817 return VDO_BAD_CONFIGURATION
;
820 /* Skip past the no longer used MD RAID5 optimization mode */
821 if (config
->version
<= 2)
822 dm_consume_args(&arg_set
, 1);
824 /* Skip past the no longer used write policy setting */
825 if (config
->version
<= 3)
826 dm_consume_args(&arg_set
, 1);
828 /* Skip past the no longer used pool name for older table lines */
829 if (config
->version
<= 2) {
831 * Make sure the enum to get the pool name from argv directly is still in sync with
832 * the parsing of the table line.
834 if (&arg_set
.argv
[0] != &argv
[POOL_NAME_ARG_INDEX
[config
->version
]]) {
835 handle_parse_error(config
, error_ptr
,
836 "Pool name not in expected location");
837 return VDO_BAD_CONFIGURATION
;
839 dm_shift_arg(&arg_set
);
842 /* Get the optional arguments and validate. */
843 result
= parse_optional_arguments(&arg_set
, error_ptr
, config
);
844 if (result
!= VDO_SUCCESS
) {
845 /* parse_optional_arguments sets error_ptr itself. */
846 handle_parse_error(config
, error_ptr
, *error_ptr
);
851 * Logical, physical, and hash zone counts can all be zero; then we get one thread doing
852 * everything, our older configuration. If any zone count is non-zero, the others must be
855 if (((config
->thread_counts
.logical_zones
== 0) !=
856 (config
->thread_counts
.physical_zones
== 0)) ||
857 ((config
->thread_counts
.physical_zones
== 0) !=
858 (config
->thread_counts
.hash_zones
== 0))) {
859 handle_parse_error(config
, error_ptr
,
860 "Logical, physical, and hash zones counts must all be zero or all non-zero");
861 return VDO_BAD_CONFIGURATION
;
864 if (config
->cache_size
<
865 (2 * MAXIMUM_VDO_USER_VIOS
* config
->thread_counts
.logical_zones
)) {
866 handle_parse_error(config
, error_ptr
,
867 "Insufficient block map cache for logical zones");
868 return VDO_BAD_CONFIGURATION
;
871 result
= dm_get_device(ti
, config
->parent_device_name
,
872 dm_table_get_mode(ti
->table
), &config
->owned_device
);
874 vdo_log_error("couldn't open device \"%s\": error %d",
875 config
->parent_device_name
, result
);
876 handle_parse_error(config
, error_ptr
, "Unable to open storage device");
877 return VDO_BAD_CONFIGURATION
;
880 if (config
->version
== 0) {
881 u64 device_size
= bdev_nr_bytes(config
->owned_device
->bdev
);
883 config
->physical_blocks
= device_size
/ VDO_BLOCK_SIZE
;
886 *config_ptr
= config
;
890 static struct vdo
*get_vdo_for_target(struct dm_target
*ti
)
892 return ((struct device_config
*) ti
->private)->vdo
;
896 static int vdo_map_bio(struct dm_target
*ti
, struct bio
*bio
)
898 struct vdo
*vdo
= get_vdo_for_target(ti
);
899 struct vdo_work_queue
*current_work_queue
;
900 const struct admin_state_code
*code
= vdo_get_admin_state_code(&vdo
->admin
.state
);
902 VDO_ASSERT_LOG_ONLY(code
->normal
, "vdo should not receive bios while in state %s",
905 /* Count all incoming bios. */
906 vdo_count_bios(&vdo
->stats
.bios_in
, bio
);
909 /* Handle empty bios. Empty flush bios are not associated with a vio. */
910 if ((bio_op(bio
) == REQ_OP_FLUSH
) || ((bio
->bi_opf
& REQ_PREFLUSH
) != 0)) {
911 vdo_launch_flush(vdo
, bio
);
912 return DM_MAPIO_SUBMITTED
;
915 /* This could deadlock, */
916 current_work_queue
= vdo_get_current_work_queue();
917 BUG_ON((current_work_queue
!= NULL
) &&
918 (vdo
== vdo_get_work_queue_owner(current_work_queue
)->vdo
));
919 vdo_launch_bio(vdo
->data_vio_pool
, bio
);
920 return DM_MAPIO_SUBMITTED
;
923 static void vdo_io_hints(struct dm_target
*ti
, struct queue_limits
*limits
)
925 struct vdo
*vdo
= get_vdo_for_target(ti
);
927 limits
->logical_block_size
= vdo
->device_config
->logical_block_size
;
928 limits
->physical_block_size
= VDO_BLOCK_SIZE
;
930 /* The minimum io size for random io */
931 limits
->io_min
= VDO_BLOCK_SIZE
;
932 /* The optimal io size for streamed/sequential io */
933 limits
->io_opt
= VDO_BLOCK_SIZE
;
936 * Sets the maximum discard size that will be passed into VDO. This value comes from a
937 * table line value passed in during dmsetup create.
939 * The value 1024 is the largest usable value on HD systems. A 2048 sector discard on a
940 * busy HD system takes 31 seconds. We should use a value no higher than 1024, which takes
941 * 15 to 16 seconds on a busy HD system. However, using large values results in 120 second
942 * blocked task warnings in kernel logs. In order to avoid these warnings, we choose to
943 * use the smallest reasonable value.
945 * The value is used by dm-thin to determine whether to pass down discards. The block layer
946 * splits large discards on this boundary when this is set.
948 limits
->max_hw_discard_sectors
=
949 (vdo
->device_config
->max_discard_blocks
* VDO_SECTORS_PER_BLOCK
);
952 * Force discards to not begin or end with a partial block by stating the granularity is
955 limits
->discard_granularity
= VDO_BLOCK_SIZE
;
958 static int vdo_iterate_devices(struct dm_target
*ti
, iterate_devices_callout_fn fn
,
961 struct device_config
*config
= get_vdo_for_target(ti
)->device_config
;
963 return fn(ti
, config
->owned_device
, 0,
964 config
->physical_blocks
* VDO_SECTORS_PER_BLOCK
, data
);
969 * <device> <operating mode> <in recovery> <index state> <compression state>
970 * <used physical blocks> <total physical blocks>
973 static void vdo_status(struct dm_target
*ti
, status_type_t status_type
,
974 unsigned int status_flags
, char *result
, unsigned int maxlen
)
976 struct vdo
*vdo
= get_vdo_for_target(ti
);
977 struct vdo_statistics
*stats
;
978 struct device_config
*device_config
;
979 /* N.B.: The DMEMIT macro uses the variables named "sz", "result", "maxlen". */
982 switch (status_type
) {
983 case STATUSTYPE_INFO
:
984 /* Report info for dmsetup status */
985 mutex_lock(&vdo
->stats_mutex
);
986 vdo_fetch_statistics(vdo
, &vdo
->stats_buffer
);
987 stats
= &vdo
->stats_buffer
;
989 DMEMIT("/dev/%pg %s %s %s %s %llu %llu",
990 vdo_get_backing_device(vdo
), stats
->mode
,
991 stats
->in_recovery_mode
? "recovering" : "-",
992 vdo_get_dedupe_index_state_name(vdo
->hash_zones
),
993 vdo_get_compressing(vdo
) ? "online" : "offline",
994 stats
->data_blocks_used
+ stats
->overhead_blocks_used
,
995 stats
->physical_blocks
);
996 mutex_unlock(&vdo
->stats_mutex
);
999 case STATUSTYPE_TABLE
:
1000 /* Report the string actually specified in the beginning. */
1001 device_config
= (struct device_config
*) ti
->private;
1002 DMEMIT("%s", device_config
->original_string
);
1005 case STATUSTYPE_IMA
:
1006 /* FIXME: We ought to be more detailed here, but this is what thin does. */
1012 static block_count_t __must_check
get_underlying_device_block_count(const struct vdo
*vdo
)
1014 return bdev_nr_bytes(vdo_get_backing_device(vdo
)) / VDO_BLOCK_SIZE
;
1017 static int __must_check
process_vdo_message_locked(struct vdo
*vdo
, unsigned int argc
,
1020 if ((argc
== 2) && (strcasecmp(argv
[0], "compression") == 0)) {
1021 if (strcasecmp(argv
[1], "on") == 0) {
1022 vdo_set_compressing(vdo
, true);
1026 if (strcasecmp(argv
[1], "off") == 0) {
1027 vdo_set_compressing(vdo
, false);
1031 vdo_log_warning("invalid argument '%s' to dmsetup compression message",
1036 vdo_log_warning("unrecognized dmsetup message '%s' received", argv
[0]);
1041 * If the message is a dump, just do it. Otherwise, check that no other message is being processed,
1042 * and only proceed if so.
1043 * Returns -EBUSY if another message is being processed
1045 static int __must_check
process_vdo_message(struct vdo
*vdo
, unsigned int argc
,
1051 * All messages which may be processed in parallel with other messages should be handled
1052 * here before the atomic check below. Messages which should be exclusive should be
1053 * processed in process_vdo_message_locked().
1056 /* Dump messages should always be processed */
1057 if (strcasecmp(argv
[0], "dump") == 0)
1058 return vdo_dump(vdo
, argc
, argv
, "dmsetup message");
1061 if (strcasecmp(argv
[0], "dump-on-shutdown") == 0) {
1062 vdo
->dump_on_shutdown
= true;
1066 /* Index messages should always be processed */
1067 if ((strcasecmp(argv
[0], "index-close") == 0) ||
1068 (strcasecmp(argv
[0], "index-create") == 0) ||
1069 (strcasecmp(argv
[0], "index-disable") == 0) ||
1070 (strcasecmp(argv
[0], "index-enable") == 0))
1071 return vdo_message_dedupe_index(vdo
->hash_zones
, argv
[0]);
1074 if (atomic_cmpxchg(&vdo
->processing_message
, 0, 1) != 0)
1077 result
= process_vdo_message_locked(vdo
, argc
, argv
);
1079 /* Pairs with the implicit barrier in cmpxchg just above */
1081 atomic_set(&vdo
->processing_message
, 0);
1085 static int vdo_message(struct dm_target
*ti
, unsigned int argc
, char **argv
,
1086 char *result_buffer
, unsigned int maxlen
)
1088 struct registered_thread allocating_thread
, instance_thread
;
1093 vdo_log_warning("unspecified dmsetup message");
1097 vdo
= get_vdo_for_target(ti
);
1098 vdo_register_allocating_thread(&allocating_thread
, NULL
);
1099 vdo_register_thread_device_id(&instance_thread
, &vdo
->instance
);
1102 * Must be done here so we don't map return codes. The code in dm-ioctl expects a 1 for a
1103 * return code to look at the buffer and see if it is full or not.
1105 if ((argc
== 1) && (strcasecmp(argv
[0], "stats") == 0)) {
1106 vdo_write_stats(vdo
, result_buffer
, maxlen
);
1108 } else if ((argc
== 1) && (strcasecmp(argv
[0], "config") == 0)) {
1109 vdo_write_config(vdo
, &result_buffer
, &maxlen
);
1112 result
= vdo_status_to_errno(process_vdo_message(vdo
, argc
, argv
));
1115 vdo_unregister_thread_device_id();
1116 vdo_unregister_allocating_thread();
1120 static void configure_target_capabilities(struct dm_target
*ti
)
1122 ti
->discards_supported
= 1;
1123 ti
->flush_supported
= true;
1124 ti
->num_discard_bios
= 1;
1125 ti
->num_flush_bios
= 1;
1128 * If this value changes, please make sure to update the value for max_discard_sectors
1131 BUG_ON(dm_set_target_max_io_len(ti
, VDO_SECTORS_PER_BLOCK
) != 0);
1135 * Implements vdo_filter_fn.
1137 static bool vdo_uses_device(struct vdo
*vdo
, const void *context
)
1139 const struct device_config
*config
= context
;
1141 return vdo_get_backing_device(vdo
)->bd_dev
== config
->owned_device
->bdev
->bd_dev
;
1145 * get_thread_id_for_phase() - Get the thread id for the current phase of the admin operation in
1148 static thread_id_t __must_check
get_thread_id_for_phase(struct vdo
*vdo
)
1150 switch (vdo
->admin
.phase
) {
1151 case RESUME_PHASE_PACKER
:
1152 case RESUME_PHASE_FLUSHER
:
1153 case SUSPEND_PHASE_PACKER
:
1154 case SUSPEND_PHASE_FLUSHES
:
1155 return vdo
->thread_config
.packer_thread
;
1157 case RESUME_PHASE_DATA_VIOS
:
1158 case SUSPEND_PHASE_DATA_VIOS
:
1159 return vdo
->thread_config
.cpu_thread
;
1161 case LOAD_PHASE_DRAIN_JOURNAL
:
1162 case RESUME_PHASE_JOURNAL
:
1163 case SUSPEND_PHASE_JOURNAL
:
1164 return vdo
->thread_config
.journal_thread
;
1167 return vdo
->thread_config
.admin_thread
;
1171 static struct vdo_completion
*prepare_admin_completion(struct vdo
*vdo
,
1172 vdo_action_fn callback
,
1173 vdo_action_fn error_handler
)
1175 struct vdo_completion
*completion
= &vdo
->admin
.completion
;
1178 * We can't use vdo_prepare_completion_for_requeue() here because we don't want to reset
1179 * any error in the completion.
1181 completion
->callback
= callback
;
1182 completion
->error_handler
= error_handler
;
1183 completion
->callback_thread_id
= get_thread_id_for_phase(vdo
);
1184 completion
->requeue
= true;
1189 * advance_phase() - Increment the phase of the current admin operation and prepare the admin
1190 * completion to run on the thread for the next phase.
1191 * @vdo: The on which an admin operation is being performed
1193 * Return: The current phase
1195 static u32
advance_phase(struct vdo
*vdo
)
1197 u32 phase
= vdo
->admin
.phase
++;
1199 vdo
->admin
.completion
.callback_thread_id
= get_thread_id_for_phase(vdo
);
1200 vdo
->admin
.completion
.requeue
= true;
1205 * Perform an administrative operation (load, suspend, grow logical, or grow physical). This method
1206 * should not be called from vdo threads.
1208 static int perform_admin_operation(struct vdo
*vdo
, u32 starting_phase
,
1209 vdo_action_fn callback
, vdo_action_fn error_handler
,
1213 struct vdo_administrator
*admin
= &vdo
->admin
;
1215 if (atomic_cmpxchg(&admin
->busy
, 0, 1) != 0) {
1216 return vdo_log_error_strerror(VDO_COMPONENT_BUSY
,
1217 "Can't start %s operation, another operation is already in progress",
1221 admin
->phase
= starting_phase
;
1222 reinit_completion(&admin
->callback_sync
);
1223 vdo_reset_completion(&admin
->completion
);
1224 vdo_launch_completion(prepare_admin_completion(vdo
, callback
, error_handler
));
1227 * Using the "interruptible" interface means that Linux will not log a message when we wait
1228 * for more than 120 seconds.
1230 while (wait_for_completion_interruptible(&admin
->callback_sync
)) {
1231 /* However, if we get a signal in a user-mode process, we could spin... */
1235 result
= admin
->completion
.result
;
1236 /* pairs with implicit barrier in cmpxchg above */
1238 atomic_set(&admin
->busy
, 0);
1242 /* Assert that we are operating on the correct thread for the current phase. */
1243 static void assert_admin_phase_thread(struct vdo
*vdo
, const char *what
)
1245 VDO_ASSERT_LOG_ONLY(vdo_get_callback_thread_id() == get_thread_id_for_phase(vdo
),
1246 "%s on correct thread for %s", what
,
1247 ADMIN_PHASE_NAMES
[vdo
->admin
.phase
]);
1251 * finish_operation_callback() - Callback to finish an admin operation.
1252 * @completion: The admin_completion.
1254 static void finish_operation_callback(struct vdo_completion
*completion
)
1256 struct vdo_administrator
*admin
= &completion
->vdo
->admin
;
1258 vdo_finish_operation(&admin
->state
, completion
->result
);
1259 complete(&admin
->callback_sync
);
1263 * decode_from_super_block() - Decode the VDO state from the super block and validate that it is
1265 * @vdo: The vdo being loaded.
1267 * On error from this method, the component states must be destroyed explicitly. If this method
1268 * returns successfully, the component states must not be destroyed.
1270 * Return: VDO_SUCCESS or an error.
1272 static int __must_check
decode_from_super_block(struct vdo
*vdo
)
1274 const struct device_config
*config
= vdo
->device_config
;
1277 result
= vdo_decode_component_states(vdo
->super_block
.buffer
, &vdo
->geometry
,
1279 if (result
!= VDO_SUCCESS
)
1282 vdo_set_state(vdo
, vdo
->states
.vdo
.state
);
1283 vdo
->load_state
= vdo
->states
.vdo
.state
;
1286 * If the device config specifies a larger logical size than was recorded in the super
1287 * block, just accept it.
1289 if (vdo
->states
.vdo
.config
.logical_blocks
< config
->logical_blocks
) {
1290 vdo_log_warning("Growing logical size: a logical size of %llu blocks was specified, but that differs from the %llu blocks configured in the vdo super block",
1291 (unsigned long long) config
->logical_blocks
,
1292 (unsigned long long) vdo
->states
.vdo
.config
.logical_blocks
);
1293 vdo
->states
.vdo
.config
.logical_blocks
= config
->logical_blocks
;
1296 result
= vdo_validate_component_states(&vdo
->states
, vdo
->geometry
.nonce
,
1297 config
->physical_blocks
,
1298 config
->logical_blocks
);
1299 if (result
!= VDO_SUCCESS
)
1302 vdo
->layout
= vdo
->states
.layout
;
1307 * decode_vdo() - Decode the component data portion of a super block and fill in the corresponding
1308 * portions of the vdo being loaded.
1309 * @vdo: The vdo being loaded.
1311 * This will also allocate the recovery journal and slab depot. If this method is called with an
1312 * asynchronous layer (i.e. a thread config which specifies at least one base thread), the block
1313 * map and packer will be constructed as well.
1315 * Return: VDO_SUCCESS or an error.
1317 static int __must_check
decode_vdo(struct vdo
*vdo
)
1319 block_count_t maximum_age
, journal_length
;
1320 struct partition
*partition
;
1323 result
= decode_from_super_block(vdo
);
1324 if (result
!= VDO_SUCCESS
) {
1325 vdo_destroy_component_states(&vdo
->states
);
1329 maximum_age
= vdo_convert_maximum_age(vdo
->device_config
->block_map_maximum_age
);
1331 vdo_get_recovery_journal_length(vdo
->states
.vdo
.config
.recovery_journal_size
);
1332 if (maximum_age
> (journal_length
/ 2)) {
1333 return vdo_log_error_strerror(VDO_BAD_CONFIGURATION
,
1334 "maximum age: %llu exceeds limit %llu",
1335 (unsigned long long) maximum_age
,
1336 (unsigned long long) (journal_length
/ 2));
1339 if (maximum_age
== 0) {
1340 return vdo_log_error_strerror(VDO_BAD_CONFIGURATION
,
1341 "maximum age must be greater than 0");
1344 result
= vdo_enable_read_only_entry(vdo
);
1345 if (result
!= VDO_SUCCESS
)
1348 partition
= vdo_get_known_partition(&vdo
->layout
,
1349 VDO_RECOVERY_JOURNAL_PARTITION
);
1350 result
= vdo_decode_recovery_journal(vdo
->states
.recovery_journal
,
1351 vdo
->states
.vdo
.nonce
, vdo
, partition
,
1352 vdo
->states
.vdo
.complete_recoveries
,
1353 vdo
->states
.vdo
.config
.recovery_journal_size
,
1354 &vdo
->recovery_journal
);
1355 if (result
!= VDO_SUCCESS
)
1358 partition
= vdo_get_known_partition(&vdo
->layout
, VDO_SLAB_SUMMARY_PARTITION
);
1359 result
= vdo_decode_slab_depot(vdo
->states
.slab_depot
, vdo
, partition
,
1361 if (result
!= VDO_SUCCESS
)
1364 result
= vdo_decode_block_map(vdo
->states
.block_map
,
1365 vdo
->states
.vdo
.config
.logical_blocks
, vdo
,
1366 vdo
->recovery_journal
, vdo
->states
.vdo
.nonce
,
1367 vdo
->device_config
->cache_size
, maximum_age
,
1369 if (result
!= VDO_SUCCESS
)
1372 result
= vdo_make_physical_zones(vdo
, &vdo
->physical_zones
);
1373 if (result
!= VDO_SUCCESS
)
1376 /* The logical zones depend on the physical zones already existing. */
1377 result
= vdo_make_logical_zones(vdo
, &vdo
->logical_zones
);
1378 if (result
!= VDO_SUCCESS
)
1381 return vdo_make_hash_zones(vdo
, &vdo
->hash_zones
);
1385 * pre_load_callback() - Callback to initiate a pre-load, registered in vdo_initialize().
1386 * @completion: The admin completion.
1388 static void pre_load_callback(struct vdo_completion
*completion
)
1390 struct vdo
*vdo
= completion
->vdo
;
1393 assert_admin_phase_thread(vdo
, __func__
);
1395 switch (advance_phase(vdo
)) {
1396 case PRE_LOAD_PHASE_START
:
1397 result
= vdo_start_operation(&vdo
->admin
.state
,
1398 VDO_ADMIN_STATE_PRE_LOADING
);
1399 if (result
!= VDO_SUCCESS
) {
1400 vdo_continue_completion(completion
, result
);
1404 vdo_load_super_block(vdo
, completion
);
1407 case PRE_LOAD_PHASE_LOAD_COMPONENTS
:
1408 vdo_continue_completion(completion
, decode_vdo(vdo
));
1411 case PRE_LOAD_PHASE_END
:
1415 vdo_set_completion_result(completion
, UDS_BAD_STATE
);
1418 finish_operation_callback(completion
);
1421 static void release_instance(unsigned int instance
)
1423 mutex_lock(&instances_lock
);
1424 if (instance
>= instances
.bit_count
) {
1425 VDO_ASSERT_LOG_ONLY(false,
1426 "instance number %u must be less than bit count %u",
1427 instance
, instances
.bit_count
);
1428 } else if (test_bit(instance
, instances
.words
) == 0) {
1429 VDO_ASSERT_LOG_ONLY(false, "instance number %u must be allocated", instance
);
1431 __clear_bit(instance
, instances
.words
);
1432 instances
.count
-= 1;
1434 mutex_unlock(&instances_lock
);
1437 static void set_device_config(struct dm_target
*ti
, struct vdo
*vdo
,
1438 struct device_config
*config
)
1440 list_del_init(&config
->config_list
);
1441 list_add_tail(&config
->config_list
, &vdo
->device_config_list
);
1443 ti
->private = config
;
1444 configure_target_capabilities(ti
);
1447 static int vdo_initialize(struct dm_target
*ti
, unsigned int instance
,
1448 struct device_config
*config
)
1452 u64 block_size
= VDO_BLOCK_SIZE
;
1453 u64 logical_size
= to_bytes(ti
->len
);
1454 block_count_t logical_blocks
= logical_size
/ block_size
;
1456 vdo_log_info("loading device '%s'", vdo_get_device_name(ti
));
1457 vdo_log_debug("Logical block size = %llu", (u64
) config
->logical_block_size
);
1458 vdo_log_debug("Logical blocks = %llu", logical_blocks
);
1459 vdo_log_debug("Physical block size = %llu", (u64
) block_size
);
1460 vdo_log_debug("Physical blocks = %llu", config
->physical_blocks
);
1461 vdo_log_debug("Block map cache blocks = %u", config
->cache_size
);
1462 vdo_log_debug("Block map maximum age = %u", config
->block_map_maximum_age
);
1463 vdo_log_debug("Deduplication = %s", (config
->deduplication
? "on" : "off"));
1464 vdo_log_debug("Compression = %s", (config
->compression
? "on" : "off"));
1466 vdo
= vdo_find_matching(vdo_uses_device
, config
);
1468 vdo_log_error("Existing vdo already uses device %s",
1469 vdo
->device_config
->parent_device_name
);
1470 ti
->error
= "Cannot share storage device with already-running VDO";
1471 return VDO_BAD_CONFIGURATION
;
1474 result
= vdo_make(instance
, config
, &ti
->error
, &vdo
);
1475 if (result
!= VDO_SUCCESS
) {
1476 vdo_log_error("Could not create VDO device. (VDO error %d, message %s)",
1482 result
= perform_admin_operation(vdo
, PRE_LOAD_PHASE_START
, pre_load_callback
,
1483 finish_operation_callback
, "pre-load");
1484 if (result
!= VDO_SUCCESS
) {
1485 ti
->error
= ((result
== VDO_INVALID_ADMIN_STATE
) ?
1486 "Pre-load is only valid immediately after initialization" :
1487 "Cannot load metadata from device");
1488 vdo_log_error("Could not start VDO device. (VDO error %d, message %s)",
1494 set_device_config(ti
, vdo
, config
);
1495 vdo
->device_config
= config
;
1499 /* Implements vdo_filter_fn. */
1500 static bool __must_check
vdo_is_named(struct vdo
*vdo
, const void *context
)
1502 struct dm_target
*ti
= vdo
->device_config
->owning_target
;
1503 const char *device_name
= vdo_get_device_name(ti
);
1505 return strcmp(device_name
, context
) == 0;
1509 * get_bit_array_size() - Return the number of bytes needed to store a bit array of the specified
1510 * capacity in an array of unsigned longs.
1511 * @bit_count: The number of bits the array must hold.
1513 * Return: the number of bytes needed for the array representation.
1515 static size_t get_bit_array_size(unsigned int bit_count
)
1517 /* Round up to a multiple of the word size and convert to a byte count. */
1518 return (BITS_TO_LONGS(bit_count
) * sizeof(unsigned long));
1522 * grow_bit_array() - Re-allocate the bitmap word array so there will more instance numbers that
1525 * Since the array is initially NULL, this also initializes the array the first time we allocate an
1528 * Return: VDO_SUCCESS or an error code from the allocation
1530 static int grow_bit_array(void)
1532 unsigned int new_count
= max(instances
.bit_count
+ BIT_COUNT_INCREMENT
,
1533 (unsigned int) BIT_COUNT_MINIMUM
);
1534 unsigned long *new_words
;
1537 result
= vdo_reallocate_memory(instances
.words
,
1538 get_bit_array_size(instances
.bit_count
),
1539 get_bit_array_size(new_count
),
1540 "instance number bit array", &new_words
);
1541 if (result
!= VDO_SUCCESS
)
1544 instances
.bit_count
= new_count
;
1545 instances
.words
= new_words
;
1550 * allocate_instance() - Allocate an instance number.
1551 * @instance_ptr: A point to hold the instance number
1553 * Return: VDO_SUCCESS or an error code
1555 * This function must be called while holding the instances lock.
1557 static int allocate_instance(unsigned int *instance_ptr
)
1559 unsigned int instance
;
1562 /* If there are no unallocated instances, grow the bit array. */
1563 if (instances
.count
>= instances
.bit_count
) {
1564 result
= grow_bit_array();
1565 if (result
!= VDO_SUCCESS
)
1570 * There must be a zero bit somewhere now. Find it, starting just after the last instance
1573 instance
= find_next_zero_bit(instances
.words
, instances
.bit_count
,
1575 if (instance
>= instances
.bit_count
) {
1576 /* Nothing free after next, so wrap around to instance zero. */
1577 instance
= find_first_zero_bit(instances
.words
, instances
.bit_count
);
1578 result
= VDO_ASSERT(instance
< instances
.bit_count
,
1579 "impossibly, no zero bit found");
1580 if (result
!= VDO_SUCCESS
)
1584 __set_bit(instance
, instances
.words
);
1586 instances
.next
= instance
+ 1;
1587 *instance_ptr
= instance
;
1591 static int construct_new_vdo_registered(struct dm_target
*ti
, unsigned int argc
,
1592 char **argv
, unsigned int instance
)
1595 struct device_config
*config
;
1597 result
= parse_device_config(argc
, argv
, ti
, &config
);
1598 if (result
!= VDO_SUCCESS
) {
1599 vdo_log_error_strerror(result
, "parsing failed: %s", ti
->error
);
1600 release_instance(instance
);
1604 /* Beyond this point, the instance number will be cleaned up for us if needed */
1605 result
= vdo_initialize(ti
, instance
, config
);
1606 if (result
!= VDO_SUCCESS
) {
1607 release_instance(instance
);
1608 free_device_config(config
);
1609 return vdo_status_to_errno(result
);
1615 static int construct_new_vdo(struct dm_target
*ti
, unsigned int argc
, char **argv
)
1618 unsigned int instance
;
1619 struct registered_thread instance_thread
;
1621 mutex_lock(&instances_lock
);
1622 result
= allocate_instance(&instance
);
1623 mutex_unlock(&instances_lock
);
1624 if (result
!= VDO_SUCCESS
)
1627 vdo_register_thread_device_id(&instance_thread
, &instance
);
1628 result
= construct_new_vdo_registered(ti
, argc
, argv
, instance
);
1629 vdo_unregister_thread_device_id();
1634 * check_may_grow_physical() - Callback to check that we're not in recovery mode, used in
1635 * vdo_prepare_to_grow_physical().
1636 * @completion: The admin completion.
1638 static void check_may_grow_physical(struct vdo_completion
*completion
)
1640 struct vdo
*vdo
= completion
->vdo
;
1642 assert_admin_phase_thread(vdo
, __func__
);
1644 /* These checks can only be done from a vdo thread. */
1645 if (vdo_is_read_only(vdo
))
1646 vdo_set_completion_result(completion
, VDO_READ_ONLY
);
1648 if (vdo_in_recovery_mode(vdo
))
1649 vdo_set_completion_result(completion
, VDO_RETRY_AFTER_REBUILD
);
1651 finish_operation_callback(completion
);
1654 static block_count_t
get_partition_size(struct layout
*layout
, enum partition_id id
)
1656 return vdo_get_known_partition(layout
, id
)->count
;
1660 * grow_layout() - Make the layout for growing a vdo.
1661 * @vdo: The vdo preparing to grow.
1662 * @old_size: The current size of the vdo.
1663 * @new_size: The size to which the vdo will be grown.
1665 * Return: VDO_SUCCESS or an error code.
1667 static int grow_layout(struct vdo
*vdo
, block_count_t old_size
, block_count_t new_size
)
1670 block_count_t min_new_size
;
1672 if (vdo
->next_layout
.size
== new_size
) {
1673 /* We are already prepared to grow to the new size, so we're done. */
1677 /* Make a copy completion if there isn't one */
1678 if (vdo
->partition_copier
== NULL
) {
1679 vdo
->partition_copier
= dm_kcopyd_client_create(NULL
);
1680 if (IS_ERR(vdo
->partition_copier
)) {
1681 result
= PTR_ERR(vdo
->partition_copier
);
1682 vdo
->partition_copier
= NULL
;
1687 /* Free any unused preparation. */
1688 vdo_uninitialize_layout(&vdo
->next_layout
);
1691 * Make a new layout with the existing partition sizes for everything but the slab depot
1694 result
= vdo_initialize_layout(new_size
, vdo
->layout
.start
,
1695 get_partition_size(&vdo
->layout
,
1696 VDO_BLOCK_MAP_PARTITION
),
1697 get_partition_size(&vdo
->layout
,
1698 VDO_RECOVERY_JOURNAL_PARTITION
),
1699 get_partition_size(&vdo
->layout
,
1700 VDO_SLAB_SUMMARY_PARTITION
),
1702 if (result
!= VDO_SUCCESS
) {
1703 dm_kcopyd_client_destroy(vdo_forget(vdo
->partition_copier
));
1707 /* Ensure the new journal and summary are entirely within the added blocks. */
1708 min_new_size
= (old_size
+
1709 get_partition_size(&vdo
->next_layout
,
1710 VDO_SLAB_SUMMARY_PARTITION
) +
1711 get_partition_size(&vdo
->next_layout
,
1712 VDO_RECOVERY_JOURNAL_PARTITION
));
1713 if (min_new_size
> new_size
) {
1714 /* Copying the journal and summary would destroy some old metadata. */
1715 vdo_uninitialize_layout(&vdo
->next_layout
);
1716 dm_kcopyd_client_destroy(vdo_forget(vdo
->partition_copier
));
1717 return VDO_INCREMENT_TOO_SMALL
;
1723 static int prepare_to_grow_physical(struct vdo
*vdo
, block_count_t new_physical_blocks
)
1726 block_count_t current_physical_blocks
= vdo
->states
.vdo
.config
.physical_blocks
;
1728 vdo_log_info("Preparing to resize physical to %llu",
1729 (unsigned long long) new_physical_blocks
);
1730 VDO_ASSERT_LOG_ONLY((new_physical_blocks
> current_physical_blocks
),
1731 "New physical size is larger than current physical size");
1732 result
= perform_admin_operation(vdo
, PREPARE_GROW_PHYSICAL_PHASE_START
,
1733 check_may_grow_physical
,
1734 finish_operation_callback
,
1735 "prepare grow-physical");
1736 if (result
!= VDO_SUCCESS
)
1739 result
= grow_layout(vdo
, current_physical_blocks
, new_physical_blocks
);
1740 if (result
!= VDO_SUCCESS
)
1743 result
= vdo_prepare_to_grow_slab_depot(vdo
->depot
,
1744 vdo_get_known_partition(&vdo
->next_layout
,
1745 VDO_SLAB_DEPOT_PARTITION
));
1746 if (result
!= VDO_SUCCESS
) {
1747 vdo_uninitialize_layout(&vdo
->next_layout
);
1751 vdo_log_info("Done preparing to resize physical");
1756 * validate_new_device_config() - Check whether a new device config represents a valid modification
1757 * to an existing config.
1758 * @to_validate: The new config to validate.
1759 * @config: The existing config.
1760 * @may_grow: Set to true if growing the logical and physical size of the vdo is currently
1762 * @error_ptr: A pointer to hold the reason for any error.
1764 * Return: VDO_SUCCESS or an error.
1766 static int validate_new_device_config(struct device_config
*to_validate
,
1767 struct device_config
*config
, bool may_grow
,
1770 if (to_validate
->owning_target
->begin
!= config
->owning_target
->begin
) {
1771 *error_ptr
= "Starting sector cannot change";
1772 return VDO_PARAMETER_MISMATCH
;
1775 if (to_validate
->logical_block_size
!= config
->logical_block_size
) {
1776 *error_ptr
= "Logical block size cannot change";
1777 return VDO_PARAMETER_MISMATCH
;
1780 if (to_validate
->logical_blocks
< config
->logical_blocks
) {
1781 *error_ptr
= "Can't shrink VDO logical size";
1782 return VDO_PARAMETER_MISMATCH
;
1785 if (to_validate
->cache_size
!= config
->cache_size
) {
1786 *error_ptr
= "Block map cache size cannot change";
1787 return VDO_PARAMETER_MISMATCH
;
1790 if (to_validate
->block_map_maximum_age
!= config
->block_map_maximum_age
) {
1791 *error_ptr
= "Block map maximum age cannot change";
1792 return VDO_PARAMETER_MISMATCH
;
1795 if (memcmp(&to_validate
->thread_counts
, &config
->thread_counts
,
1796 sizeof(struct thread_count_config
)) != 0) {
1797 *error_ptr
= "Thread configuration cannot change";
1798 return VDO_PARAMETER_MISMATCH
;
1801 if (to_validate
->physical_blocks
< config
->physical_blocks
) {
1802 *error_ptr
= "Removing physical storage from a VDO is not supported";
1803 return VDO_NOT_IMPLEMENTED
;
1806 if (!may_grow
&& (to_validate
->physical_blocks
> config
->physical_blocks
)) {
1807 *error_ptr
= "VDO physical size may not grow in current state";
1808 return VDO_NOT_IMPLEMENTED
;
1814 static int prepare_to_modify(struct dm_target
*ti
, struct device_config
*config
,
1818 bool may_grow
= (vdo_get_admin_state(vdo
) != VDO_ADMIN_STATE_PRE_LOADED
);
1820 result
= validate_new_device_config(config
, vdo
->device_config
, may_grow
,
1822 if (result
!= VDO_SUCCESS
)
1825 if (config
->logical_blocks
> vdo
->device_config
->logical_blocks
) {
1826 block_count_t logical_blocks
= vdo
->states
.vdo
.config
.logical_blocks
;
1828 vdo_log_info("Preparing to resize logical to %llu",
1829 (unsigned long long) config
->logical_blocks
);
1830 VDO_ASSERT_LOG_ONLY((config
->logical_blocks
> logical_blocks
),
1831 "New logical size is larger than current size");
1833 result
= vdo_prepare_to_grow_block_map(vdo
->block_map
,
1834 config
->logical_blocks
);
1835 if (result
!= VDO_SUCCESS
) {
1836 ti
->error
= "Device vdo_prepare_to_grow_logical failed";
1840 vdo_log_info("Done preparing to resize logical");
1843 if (config
->physical_blocks
> vdo
->device_config
->physical_blocks
) {
1844 result
= prepare_to_grow_physical(vdo
, config
->physical_blocks
);
1845 if (result
!= VDO_SUCCESS
) {
1846 if (result
== VDO_PARAMETER_MISMATCH
) {
1848 * If we don't trap this case, vdo_status_to_errno() will remap
1849 * it to -EIO, which is misleading and ahistorical.
1854 if (result
== VDO_TOO_MANY_SLABS
)
1855 ti
->error
= "Device vdo_prepare_to_grow_physical failed (specified physical size too big based on formatted slab size)";
1857 ti
->error
= "Device vdo_prepare_to_grow_physical failed";
1863 if (strcmp(config
->parent_device_name
, vdo
->device_config
->parent_device_name
) != 0) {
1864 const char *device_name
= vdo_get_device_name(config
->owning_target
);
1866 vdo_log_info("Updating backing device of %s from %s to %s", device_name
,
1867 vdo
->device_config
->parent_device_name
,
1868 config
->parent_device_name
);
1874 static int update_existing_vdo(const char *device_name
, struct dm_target
*ti
,
1875 unsigned int argc
, char **argv
, struct vdo
*vdo
)
1878 struct device_config
*config
;
1880 result
= parse_device_config(argc
, argv
, ti
, &config
);
1881 if (result
!= VDO_SUCCESS
)
1884 vdo_log_info("preparing to modify device '%s'", device_name
);
1885 result
= prepare_to_modify(ti
, config
, vdo
);
1886 if (result
!= VDO_SUCCESS
) {
1887 free_device_config(config
);
1888 return vdo_status_to_errno(result
);
1891 set_device_config(ti
, vdo
, config
);
1895 static int vdo_ctr(struct dm_target
*ti
, unsigned int argc
, char **argv
)
1898 struct registered_thread allocating_thread
, instance_thread
;
1899 const char *device_name
;
1902 vdo_register_allocating_thread(&allocating_thread
, NULL
);
1903 device_name
= vdo_get_device_name(ti
);
1904 vdo
= vdo_find_matching(vdo_is_named
, device_name
);
1906 result
= construct_new_vdo(ti
, argc
, argv
);
1908 vdo_register_thread_device_id(&instance_thread
, &vdo
->instance
);
1909 result
= update_existing_vdo(device_name
, ti
, argc
, argv
, vdo
);
1910 vdo_unregister_thread_device_id();
1913 vdo_unregister_allocating_thread();
1917 static void vdo_dtr(struct dm_target
*ti
)
1919 struct device_config
*config
= ti
->private;
1920 struct vdo
*vdo
= vdo_forget(config
->vdo
);
1922 list_del_init(&config
->config_list
);
1923 if (list_empty(&vdo
->device_config_list
)) {
1924 const char *device_name
;
1926 /* This was the last config referencing the VDO. Free it. */
1927 unsigned int instance
= vdo
->instance
;
1928 struct registered_thread allocating_thread
, instance_thread
;
1930 vdo_register_thread_device_id(&instance_thread
, &instance
);
1931 vdo_register_allocating_thread(&allocating_thread
, NULL
);
1933 device_name
= vdo_get_device_name(ti
);
1934 vdo_log_info("stopping device '%s'", device_name
);
1935 if (vdo
->dump_on_shutdown
)
1936 vdo_dump_all(vdo
, "device shutdown");
1938 vdo_destroy(vdo_forget(vdo
));
1939 vdo_log_info("device '%s' stopped", device_name
);
1940 vdo_unregister_thread_device_id();
1941 vdo_unregister_allocating_thread();
1942 release_instance(instance
);
1943 } else if (config
== vdo
->device_config
) {
1945 * The VDO still references this config. Give it a reference to a config that isn't
1948 vdo
->device_config
= list_first_entry(&vdo
->device_config_list
,
1949 struct device_config
, config_list
);
1952 free_device_config(config
);
1956 static void vdo_presuspend(struct dm_target
*ti
)
1958 get_vdo_for_target(ti
)->suspend_type
=
1959 (dm_noflush_suspending(ti
) ? VDO_ADMIN_STATE_SUSPENDING
: VDO_ADMIN_STATE_SAVING
);
1963 * write_super_block_for_suspend() - Update the VDO state and save the super block.
1964 * @completion: The admin completion
1966 static void write_super_block_for_suspend(struct vdo_completion
*completion
)
1968 struct vdo
*vdo
= completion
->vdo
;
1970 switch (vdo_get_state(vdo
)) {
1973 vdo_set_state(vdo
, VDO_CLEAN
);
1977 case VDO_READ_ONLY_MODE
:
1978 case VDO_FORCE_REBUILD
:
1979 case VDO_RECOVERING
:
1980 case VDO_REBUILD_FOR_UPGRADE
:
1985 vdo_continue_completion(completion
, UDS_BAD_STATE
);
1989 vdo_save_components(vdo
, completion
);
1993 * suspend_callback() - Callback to initiate a suspend, registered in vdo_postsuspend().
1994 * @completion: The sub-task completion.
1996 static void suspend_callback(struct vdo_completion
*completion
)
1998 struct vdo
*vdo
= completion
->vdo
;
1999 struct admin_state
*state
= &vdo
->admin
.state
;
2002 assert_admin_phase_thread(vdo
, __func__
);
2004 switch (advance_phase(vdo
)) {
2005 case SUSPEND_PHASE_START
:
2006 if (vdo_get_admin_state_code(state
)->quiescent
) {
2007 /* Already suspended */
2011 vdo_continue_completion(completion
,
2012 vdo_start_operation(state
, vdo
->suspend_type
));
2015 case SUSPEND_PHASE_PACKER
:
2017 * If the VDO was already resumed from a prior suspend while read-only, some of the
2018 * components may not have been resumed. By setting a read-only error here, we
2019 * guarantee that the result of this suspend will be VDO_READ_ONLY and not
2020 * VDO_INVALID_ADMIN_STATE in that case.
2022 if (vdo_in_read_only_mode(vdo
))
2023 vdo_set_completion_result(completion
, VDO_READ_ONLY
);
2025 vdo_drain_packer(vdo
->packer
, completion
);
2028 case SUSPEND_PHASE_DATA_VIOS
:
2029 drain_data_vio_pool(vdo
->data_vio_pool
, completion
);
2032 case SUSPEND_PHASE_DEDUPE
:
2033 vdo_drain_hash_zones(vdo
->hash_zones
, completion
);
2036 case SUSPEND_PHASE_FLUSHES
:
2037 vdo_drain_flusher(vdo
->flusher
, completion
);
2040 case SUSPEND_PHASE_LOGICAL_ZONES
:
2042 * Attempt to flush all I/O before completing post suspend work. We believe a
2043 * suspended device is expected to have persisted all data written before the
2044 * suspend, even if it hasn't been flushed yet.
2046 result
= vdo_synchronous_flush(vdo
);
2047 if (result
!= VDO_SUCCESS
)
2048 vdo_enter_read_only_mode(vdo
, result
);
2050 vdo_drain_logical_zones(vdo
->logical_zones
,
2051 vdo_get_admin_state_code(state
), completion
);
2054 case SUSPEND_PHASE_BLOCK_MAP
:
2055 vdo_drain_block_map(vdo
->block_map
, vdo_get_admin_state_code(state
),
2059 case SUSPEND_PHASE_JOURNAL
:
2060 vdo_drain_recovery_journal(vdo
->recovery_journal
,
2061 vdo_get_admin_state_code(state
), completion
);
2064 case SUSPEND_PHASE_DEPOT
:
2065 vdo_drain_slab_depot(vdo
->depot
, vdo_get_admin_state_code(state
),
2069 case SUSPEND_PHASE_READ_ONLY_WAIT
:
2070 vdo_wait_until_not_entering_read_only_mode(completion
);
2073 case SUSPEND_PHASE_WRITE_SUPER_BLOCK
:
2074 if (vdo_is_state_suspending(state
) || (completion
->result
!= VDO_SUCCESS
)) {
2075 /* If we didn't save the VDO or there was an error, we're done. */
2079 write_super_block_for_suspend(completion
);
2082 case SUSPEND_PHASE_END
:
2086 vdo_set_completion_result(completion
, UDS_BAD_STATE
);
2089 finish_operation_callback(completion
);
2092 static void vdo_postsuspend(struct dm_target
*ti
)
2094 struct vdo
*vdo
= get_vdo_for_target(ti
);
2095 struct registered_thread instance_thread
;
2096 const char *device_name
;
2099 vdo_register_thread_device_id(&instance_thread
, &vdo
->instance
);
2100 device_name
= vdo_get_device_name(vdo
->device_config
->owning_target
);
2101 vdo_log_info("suspending device '%s'", device_name
);
2104 * It's important to note any error here does not actually stop device-mapper from
2105 * suspending the device. All this work is done post suspend.
2107 result
= perform_admin_operation(vdo
, SUSPEND_PHASE_START
, suspend_callback
,
2108 suspend_callback
, "suspend");
2110 if ((result
== VDO_SUCCESS
) || (result
== VDO_READ_ONLY
)) {
2112 * Treat VDO_READ_ONLY as a success since a read-only suspension still leaves the
2115 vdo_log_info("device '%s' suspended", device_name
);
2116 } else if (result
== VDO_INVALID_ADMIN_STATE
) {
2117 vdo_log_error("Suspend invoked while in unexpected state: %s",
2118 vdo_get_admin_state(vdo
)->name
);
2120 vdo_log_error_strerror(result
, "Suspend of device '%s' failed",
2124 vdo_unregister_thread_device_id();
2128 * was_new() - Check whether the vdo was new when it was loaded.
2129 * @vdo: The vdo to query.
2131 * Return: true if the vdo was new.
2133 static bool was_new(const struct vdo
*vdo
)
2135 return (vdo
->load_state
== VDO_NEW
);
2139 * requires_repair() - Check whether a vdo requires recovery or rebuild.
2140 * @vdo: The vdo to query.
2142 * Return: true if the vdo must be repaired.
2144 static bool __must_check
requires_repair(const struct vdo
*vdo
)
2146 switch (vdo_get_state(vdo
)) {
2148 case VDO_FORCE_REBUILD
:
2150 case VDO_REBUILD_FOR_UPGRADE
:
2159 * get_load_type() - Determine how the slab depot was loaded.
2162 * Return: How the depot was loaded.
2164 static enum slab_depot_load_type
get_load_type(struct vdo
*vdo
)
2166 if (vdo_state_requires_read_only_rebuild(vdo
->load_state
))
2167 return VDO_SLAB_DEPOT_REBUILD_LOAD
;
2169 if (vdo_state_requires_recovery(vdo
->load_state
))
2170 return VDO_SLAB_DEPOT_RECOVERY_LOAD
;
2172 return VDO_SLAB_DEPOT_NORMAL_LOAD
;
2176 * load_callback() - Callback to do the destructive parts of loading a VDO.
2177 * @completion: The sub-task completion.
2179 static void load_callback(struct vdo_completion
*completion
)
2181 struct vdo
*vdo
= completion
->vdo
;
2184 assert_admin_phase_thread(vdo
, __func__
);
2186 switch (advance_phase(vdo
)) {
2187 case LOAD_PHASE_START
:
2188 result
= vdo_start_operation(&vdo
->admin
.state
, VDO_ADMIN_STATE_LOADING
);
2189 if (result
!= VDO_SUCCESS
) {
2190 vdo_continue_completion(completion
, result
);
2194 /* Prepare the recovery journal for new entries. */
2195 vdo_open_recovery_journal(vdo
->recovery_journal
, vdo
->depot
,
2197 vdo_allow_read_only_mode_entry(completion
);
2200 case LOAD_PHASE_LOAD_DEPOT
:
2201 vdo_set_dedupe_state_normal(vdo
->hash_zones
);
2202 if (vdo_is_read_only(vdo
)) {
2204 * In read-only mode we don't use the allocator and it may not even be
2205 * readable, so don't bother trying to load it.
2207 vdo_set_completion_result(completion
, VDO_READ_ONLY
);
2211 if (requires_repair(vdo
)) {
2212 vdo_repair(completion
);
2216 vdo_load_slab_depot(vdo
->depot
,
2217 (was_new(vdo
) ? VDO_ADMIN_STATE_FORMATTING
:
2218 VDO_ADMIN_STATE_LOADING
),
2222 case LOAD_PHASE_MAKE_DIRTY
:
2223 vdo_set_state(vdo
, VDO_DIRTY
);
2224 vdo_save_components(vdo
, completion
);
2227 case LOAD_PHASE_PREPARE_TO_ALLOCATE
:
2228 vdo_initialize_block_map_from_journal(vdo
->block_map
,
2229 vdo
->recovery_journal
);
2230 vdo_prepare_slab_depot_to_allocate(vdo
->depot
, get_load_type(vdo
),
2234 case LOAD_PHASE_SCRUB_SLABS
:
2235 if (vdo_state_requires_recovery(vdo
->load_state
))
2236 vdo_enter_recovery_mode(vdo
);
2238 vdo_scrub_all_unrecovered_slabs(vdo
->depot
, completion
);
2241 case LOAD_PHASE_DATA_REDUCTION
:
2242 WRITE_ONCE(vdo
->compressing
, vdo
->device_config
->compression
);
2243 if (vdo
->device_config
->deduplication
) {
2245 * Don't try to load or rebuild the index first (and log scary error
2246 * messages) if this is known to be a newly-formatted volume.
2248 vdo_start_dedupe_index(vdo
->hash_zones
, was_new(vdo
));
2251 vdo
->allocations_allowed
= false;
2254 case LOAD_PHASE_FINISHED
:
2257 case LOAD_PHASE_DRAIN_JOURNAL
:
2258 vdo_drain_recovery_journal(vdo
->recovery_journal
, VDO_ADMIN_STATE_SAVING
,
2262 case LOAD_PHASE_WAIT_FOR_READ_ONLY
:
2263 /* Avoid an infinite loop */
2264 completion
->error_handler
= NULL
;
2265 vdo
->admin
.phase
= LOAD_PHASE_FINISHED
;
2266 vdo_wait_until_not_entering_read_only_mode(completion
);
2270 vdo_set_completion_result(completion
, UDS_BAD_STATE
);
2273 finish_operation_callback(completion
);
2277 * handle_load_error() - Handle an error during the load operation.
2278 * @completion: The admin completion.
2280 * If at all possible, brings the vdo online in read-only mode. This handler is registered in
2281 * vdo_preresume_registered().
2283 static void handle_load_error(struct vdo_completion
*completion
)
2285 struct vdo
*vdo
= completion
->vdo
;
2287 if (vdo_requeue_completion_if_needed(completion
,
2288 vdo
->thread_config
.admin_thread
))
2291 if (vdo_state_requires_read_only_rebuild(vdo
->load_state
) &&
2292 (vdo
->admin
.phase
== LOAD_PHASE_MAKE_DIRTY
)) {
2293 vdo_log_error_strerror(completion
->result
, "aborting load");
2294 vdo
->admin
.phase
= LOAD_PHASE_DRAIN_JOURNAL
;
2295 load_callback(vdo_forget(completion
));
2299 if ((completion
->result
== VDO_UNSUPPORTED_VERSION
) &&
2300 (vdo
->admin
.phase
== LOAD_PHASE_MAKE_DIRTY
)) {
2301 vdo_log_error("Aborting load due to unsupported version");
2302 vdo
->admin
.phase
= LOAD_PHASE_FINISHED
;
2303 load_callback(completion
);
2307 vdo_log_error_strerror(completion
->result
,
2308 "Entering read-only mode due to load error");
2309 vdo
->admin
.phase
= LOAD_PHASE_WAIT_FOR_READ_ONLY
;
2310 vdo_enter_read_only_mode(vdo
, completion
->result
);
2311 completion
->result
= VDO_READ_ONLY
;
2312 load_callback(completion
);
2316 * write_super_block_for_resume() - Update the VDO state and save the super block.
2317 * @completion: The admin completion
2319 static void write_super_block_for_resume(struct vdo_completion
*completion
)
2321 struct vdo
*vdo
= completion
->vdo
;
2323 switch (vdo_get_state(vdo
)) {
2326 vdo_set_state(vdo
, VDO_DIRTY
);
2327 vdo_save_components(vdo
, completion
);
2331 case VDO_READ_ONLY_MODE
:
2332 case VDO_FORCE_REBUILD
:
2333 case VDO_RECOVERING
:
2334 case VDO_REBUILD_FOR_UPGRADE
:
2335 /* No need to write the super block in these cases */
2336 vdo_launch_completion(completion
);
2341 vdo_continue_completion(completion
, UDS_BAD_STATE
);
2346 * resume_callback() - Callback to resume a VDO.
2347 * @completion: The admin completion.
2349 static void resume_callback(struct vdo_completion
*completion
)
2351 struct vdo
*vdo
= completion
->vdo
;
2354 assert_admin_phase_thread(vdo
, __func__
);
2356 switch (advance_phase(vdo
)) {
2357 case RESUME_PHASE_START
:
2358 result
= vdo_start_operation(&vdo
->admin
.state
,
2359 VDO_ADMIN_STATE_RESUMING
);
2360 if (result
!= VDO_SUCCESS
) {
2361 vdo_continue_completion(completion
, result
);
2365 write_super_block_for_resume(completion
);
2368 case RESUME_PHASE_ALLOW_READ_ONLY_MODE
:
2369 vdo_allow_read_only_mode_entry(completion
);
2372 case RESUME_PHASE_DEDUPE
:
2373 vdo_resume_hash_zones(vdo
->hash_zones
, completion
);
2376 case RESUME_PHASE_DEPOT
:
2377 vdo_resume_slab_depot(vdo
->depot
, completion
);
2380 case RESUME_PHASE_JOURNAL
:
2381 vdo_resume_recovery_journal(vdo
->recovery_journal
, completion
);
2384 case RESUME_PHASE_BLOCK_MAP
:
2385 vdo_resume_block_map(vdo
->block_map
, completion
);
2388 case RESUME_PHASE_LOGICAL_ZONES
:
2389 vdo_resume_logical_zones(vdo
->logical_zones
, completion
);
2392 case RESUME_PHASE_PACKER
:
2394 bool was_enabled
= vdo_get_compressing(vdo
);
2395 bool enable
= vdo
->device_config
->compression
;
2397 if (enable
!= was_enabled
)
2398 WRITE_ONCE(vdo
->compressing
, enable
);
2399 vdo_log_info("compression is %s", (enable
? "enabled" : "disabled"));
2401 vdo_resume_packer(vdo
->packer
, completion
);
2405 case RESUME_PHASE_FLUSHER
:
2406 vdo_resume_flusher(vdo
->flusher
, completion
);
2409 case RESUME_PHASE_DATA_VIOS
:
2410 resume_data_vio_pool(vdo
->data_vio_pool
, completion
);
2413 case RESUME_PHASE_END
:
2417 vdo_set_completion_result(completion
, UDS_BAD_STATE
);
2420 finish_operation_callback(completion
);
2424 * grow_logical_callback() - Callback to initiate a grow logical.
2425 * @completion: The admin completion.
2427 * Registered in perform_grow_logical().
2429 static void grow_logical_callback(struct vdo_completion
*completion
)
2431 struct vdo
*vdo
= completion
->vdo
;
2434 assert_admin_phase_thread(vdo
, __func__
);
2436 switch (advance_phase(vdo
)) {
2437 case GROW_LOGICAL_PHASE_START
:
2438 if (vdo_is_read_only(vdo
)) {
2439 vdo_log_error_strerror(VDO_READ_ONLY
,
2440 "Can't grow logical size of a read-only VDO");
2441 vdo_set_completion_result(completion
, VDO_READ_ONLY
);
2445 result
= vdo_start_operation(&vdo
->admin
.state
,
2446 VDO_ADMIN_STATE_SUSPENDED_OPERATION
);
2447 if (result
!= VDO_SUCCESS
) {
2448 vdo_continue_completion(completion
, result
);
2452 vdo
->states
.vdo
.config
.logical_blocks
= vdo
->block_map
->next_entry_count
;
2453 vdo_save_components(vdo
, completion
);
2456 case GROW_LOGICAL_PHASE_GROW_BLOCK_MAP
:
2457 vdo_grow_block_map(vdo
->block_map
, completion
);
2460 case GROW_LOGICAL_PHASE_END
:
2463 case GROW_LOGICAL_PHASE_ERROR
:
2464 vdo_enter_read_only_mode(vdo
, completion
->result
);
2468 vdo_set_completion_result(completion
, UDS_BAD_STATE
);
2471 finish_operation_callback(completion
);
2475 * handle_logical_growth_error() - Handle an error during the grow physical process.
2476 * @completion: The admin completion.
2478 static void handle_logical_growth_error(struct vdo_completion
*completion
)
2480 struct vdo
*vdo
= completion
->vdo
;
2482 if (vdo
->admin
.phase
== GROW_LOGICAL_PHASE_GROW_BLOCK_MAP
) {
2484 * We've failed to write the new size in the super block, so set our in memory
2485 * config back to the old size.
2487 vdo
->states
.vdo
.config
.logical_blocks
= vdo
->block_map
->entry_count
;
2488 vdo_abandon_block_map_growth(vdo
->block_map
);
2491 vdo
->admin
.phase
= GROW_LOGICAL_PHASE_ERROR
;
2492 grow_logical_callback(completion
);
2496 * perform_grow_logical() - Grow the logical size of the vdo.
2497 * @vdo: The vdo to grow.
2498 * @new_logical_blocks: The size to which the vdo should be grown.
2500 * Context: This method may only be called when the vdo has been suspended and must not be called
2501 * from a base thread.
2503 * Return: VDO_SUCCESS or an error.
2505 static int perform_grow_logical(struct vdo
*vdo
, block_count_t new_logical_blocks
)
2509 if (vdo
->device_config
->logical_blocks
== new_logical_blocks
) {
2511 * A table was loaded for which we prepared to grow, but a table without that
2512 * growth was what we are resuming with.
2514 vdo_abandon_block_map_growth(vdo
->block_map
);
2518 vdo_log_info("Resizing logical to %llu",
2519 (unsigned long long) new_logical_blocks
);
2520 if (vdo
->block_map
->next_entry_count
!= new_logical_blocks
)
2521 return VDO_PARAMETER_MISMATCH
;
2523 result
= perform_admin_operation(vdo
, GROW_LOGICAL_PHASE_START
,
2524 grow_logical_callback
,
2525 handle_logical_growth_error
, "grow logical");
2526 if (result
!= VDO_SUCCESS
)
2529 vdo_log_info("Logical blocks now %llu", (unsigned long long) new_logical_blocks
);
2533 static void copy_callback(int read_err
, unsigned long write_err
, void *context
)
2535 struct vdo_completion
*completion
= context
;
2536 int result
= (((read_err
== 0) && (write_err
== 0)) ? VDO_SUCCESS
: -EIO
);
2538 vdo_continue_completion(completion
, result
);
2541 static void partition_to_region(struct partition
*partition
, struct vdo
*vdo
,
2542 struct dm_io_region
*region
)
2544 physical_block_number_t pbn
= partition
->offset
- vdo
->geometry
.bio_offset
;
2546 *region
= (struct dm_io_region
) {
2547 .bdev
= vdo_get_backing_device(vdo
),
2548 .sector
= pbn
* VDO_SECTORS_PER_BLOCK
,
2549 .count
= partition
->count
* VDO_SECTORS_PER_BLOCK
,
2554 * copy_partition() - Copy a partition from the location specified in the current layout to that in
2556 * @vdo: The vdo preparing to grow.
2557 * @id: The ID of the partition to copy.
2558 * @parent: The completion to notify when the copy is complete.
2560 static void copy_partition(struct vdo
*vdo
, enum partition_id id
,
2561 struct vdo_completion
*parent
)
2563 struct dm_io_region read_region
, write_regions
[1];
2564 struct partition
*from
= vdo_get_known_partition(&vdo
->layout
, id
);
2565 struct partition
*to
= vdo_get_known_partition(&vdo
->next_layout
, id
);
2567 partition_to_region(from
, vdo
, &read_region
);
2568 partition_to_region(to
, vdo
, &write_regions
[0]);
2569 dm_kcopyd_copy(vdo
->partition_copier
, &read_region
, 1, write_regions
, 0,
2570 copy_callback
, parent
);
2574 * grow_physical_callback() - Callback to initiate a grow physical.
2575 * @completion: The admin completion.
2577 * Registered in perform_grow_physical().
2579 static void grow_physical_callback(struct vdo_completion
*completion
)
2581 struct vdo
*vdo
= completion
->vdo
;
2584 assert_admin_phase_thread(vdo
, __func__
);
2586 switch (advance_phase(vdo
)) {
2587 case GROW_PHYSICAL_PHASE_START
:
2588 if (vdo_is_read_only(vdo
)) {
2589 vdo_log_error_strerror(VDO_READ_ONLY
,
2590 "Can't grow physical size of a read-only VDO");
2591 vdo_set_completion_result(completion
, VDO_READ_ONLY
);
2595 result
= vdo_start_operation(&vdo
->admin
.state
,
2596 VDO_ADMIN_STATE_SUSPENDED_OPERATION
);
2597 if (result
!= VDO_SUCCESS
) {
2598 vdo_continue_completion(completion
, result
);
2602 /* Copy the journal into the new layout. */
2603 copy_partition(vdo
, VDO_RECOVERY_JOURNAL_PARTITION
, completion
);
2606 case GROW_PHYSICAL_PHASE_COPY_SUMMARY
:
2607 copy_partition(vdo
, VDO_SLAB_SUMMARY_PARTITION
, completion
);
2610 case GROW_PHYSICAL_PHASE_UPDATE_COMPONENTS
:
2611 vdo_uninitialize_layout(&vdo
->layout
);
2612 vdo
->layout
= vdo
->next_layout
;
2613 vdo_forget(vdo
->next_layout
.head
);
2614 vdo
->states
.vdo
.config
.physical_blocks
= vdo
->layout
.size
;
2615 vdo_update_slab_depot_size(vdo
->depot
);
2616 vdo_save_components(vdo
, completion
);
2619 case GROW_PHYSICAL_PHASE_USE_NEW_SLABS
:
2620 vdo_use_new_slabs(vdo
->depot
, completion
);
2623 case GROW_PHYSICAL_PHASE_END
:
2624 vdo
->depot
->summary_origin
=
2625 vdo_get_known_partition(&vdo
->layout
,
2626 VDO_SLAB_SUMMARY_PARTITION
)->offset
;
2627 vdo
->recovery_journal
->origin
=
2628 vdo_get_known_partition(&vdo
->layout
,
2629 VDO_RECOVERY_JOURNAL_PARTITION
)->offset
;
2632 case GROW_PHYSICAL_PHASE_ERROR
:
2633 vdo_enter_read_only_mode(vdo
, completion
->result
);
2637 vdo_set_completion_result(completion
, UDS_BAD_STATE
);
2640 vdo_uninitialize_layout(&vdo
->next_layout
);
2641 finish_operation_callback(completion
);
2645 * handle_physical_growth_error() - Handle an error during the grow physical process.
2646 * @completion: The sub-task completion.
2648 static void handle_physical_growth_error(struct vdo_completion
*completion
)
2650 completion
->vdo
->admin
.phase
= GROW_PHYSICAL_PHASE_ERROR
;
2651 grow_physical_callback(completion
);
2655 * perform_grow_physical() - Grow the physical size of the vdo.
2656 * @vdo: The vdo to resize.
2657 * @new_physical_blocks: The new physical size in blocks.
2659 * Context: This method may only be called when the vdo has been suspended and must not be called
2660 * from a base thread.
2662 * Return: VDO_SUCCESS or an error.
2664 static int perform_grow_physical(struct vdo
*vdo
, block_count_t new_physical_blocks
)
2667 block_count_t new_depot_size
, prepared_depot_size
;
2668 block_count_t old_physical_blocks
= vdo
->states
.vdo
.config
.physical_blocks
;
2670 /* Skip any noop grows. */
2671 if (old_physical_blocks
== new_physical_blocks
)
2674 if (new_physical_blocks
!= vdo
->next_layout
.size
) {
2676 * Either the VDO isn't prepared to grow, or it was prepared to grow to a different
2677 * size. Doing this check here relies on the fact that the call to this method is
2678 * done under the dmsetup message lock.
2680 vdo_uninitialize_layout(&vdo
->next_layout
);
2681 vdo_abandon_new_slabs(vdo
->depot
);
2682 return VDO_PARAMETER_MISMATCH
;
2685 /* Validate that we are prepared to grow appropriately. */
2687 vdo_get_known_partition(&vdo
->next_layout
, VDO_SLAB_DEPOT_PARTITION
)->count
;
2688 prepared_depot_size
= (vdo
->depot
->new_slabs
== NULL
) ? 0 : vdo
->depot
->new_size
;
2689 if (prepared_depot_size
!= new_depot_size
)
2690 return VDO_PARAMETER_MISMATCH
;
2692 result
= perform_admin_operation(vdo
, GROW_PHYSICAL_PHASE_START
,
2693 grow_physical_callback
,
2694 handle_physical_growth_error
, "grow physical");
2695 if (result
!= VDO_SUCCESS
)
2698 vdo_log_info("Physical block count was %llu, now %llu",
2699 (unsigned long long) old_physical_blocks
,
2700 (unsigned long long) new_physical_blocks
);
2705 * apply_new_vdo_configuration() - Attempt to make any configuration changes from the table being
2707 * @vdo: The vdo being resumed.
2708 * @config: The new device configuration derived from the table with which the vdo is being
2711 * Return: VDO_SUCCESS or an error.
2713 static int __must_check
apply_new_vdo_configuration(struct vdo
*vdo
,
2714 struct device_config
*config
)
2718 result
= perform_grow_logical(vdo
, config
->logical_blocks
);
2719 if (result
!= VDO_SUCCESS
) {
2720 vdo_log_error("grow logical operation failed, result = %d", result
);
2724 result
= perform_grow_physical(vdo
, config
->physical_blocks
);
2725 if (result
!= VDO_SUCCESS
)
2726 vdo_log_error("resize operation failed, result = %d", result
);
2731 static int vdo_preresume_registered(struct dm_target
*ti
, struct vdo
*vdo
)
2733 struct device_config
*config
= ti
->private;
2734 const char *device_name
= vdo_get_device_name(ti
);
2735 block_count_t backing_blocks
;
2738 backing_blocks
= get_underlying_device_block_count(vdo
);
2739 if (backing_blocks
< config
->physical_blocks
) {
2740 /* FIXME: can this still happen? */
2741 vdo_log_error("resume of device '%s' failed: backing device has %llu blocks but VDO physical size is %llu blocks",
2742 device_name
, (unsigned long long) backing_blocks
,
2743 (unsigned long long) config
->physical_blocks
);
2747 if (vdo_get_admin_state(vdo
) == VDO_ADMIN_STATE_PRE_LOADED
) {
2748 vdo_log_info("starting device '%s'", device_name
);
2749 result
= perform_admin_operation(vdo
, LOAD_PHASE_START
, load_callback
,
2750 handle_load_error
, "load");
2751 if (result
== VDO_UNSUPPORTED_VERSION
) {
2753 * A component version is not supported. This can happen when the
2754 * recovery journal metadata is in an old version format. Abort the
2755 * load without saving the state.
2757 vdo
->suspend_type
= VDO_ADMIN_STATE_SUSPENDING
;
2758 perform_admin_operation(vdo
, SUSPEND_PHASE_START
,
2759 suspend_callback
, suspend_callback
,
2764 if ((result
!= VDO_SUCCESS
) && (result
!= VDO_READ_ONLY
)) {
2766 * Something has gone very wrong. Make sure everything has drained and
2767 * leave the device in an unresumable state.
2769 vdo_log_error_strerror(result
,
2770 "Start failed, could not load VDO metadata");
2771 vdo
->suspend_type
= VDO_ADMIN_STATE_STOPPING
;
2772 perform_admin_operation(vdo
, SUSPEND_PHASE_START
,
2773 suspend_callback
, suspend_callback
,
2778 /* Even if the VDO is read-only, it is now able to handle read requests. */
2779 vdo_log_info("device '%s' started", device_name
);
2782 vdo_log_info("resuming device '%s'", device_name
);
2784 /* If this fails, the VDO was not in a state to be resumed. This should never happen. */
2785 result
= apply_new_vdo_configuration(vdo
, config
);
2786 BUG_ON(result
== VDO_INVALID_ADMIN_STATE
);
2789 * Now that we've tried to modify the vdo, the new config *is* the config, whether the
2790 * modifications worked or not.
2792 vdo
->device_config
= config
;
2795 * Any error here is highly unexpected and the state of the vdo is questionable, so we mark
2796 * it read-only in memory. Because we are suspended, the read-only state will not be
2799 if (result
!= VDO_SUCCESS
) {
2800 vdo_log_error_strerror(result
,
2801 "Commit of modifications to device '%s' failed",
2803 vdo_enter_read_only_mode(vdo
, result
);
2807 if (vdo_get_admin_state(vdo
)->normal
) {
2808 /* The VDO was just started, so we don't need to resume it. */
2812 result
= perform_admin_operation(vdo
, RESUME_PHASE_START
, resume_callback
,
2813 resume_callback
, "resume");
2814 BUG_ON(result
== VDO_INVALID_ADMIN_STATE
);
2815 if (result
== VDO_READ_ONLY
) {
2816 /* Even if the vdo is read-only, it has still resumed. */
2817 result
= VDO_SUCCESS
;
2820 if (result
!= VDO_SUCCESS
)
2821 vdo_log_error("resume of device '%s' failed with error: %d", device_name
,
2827 static int vdo_preresume(struct dm_target
*ti
)
2829 struct registered_thread instance_thread
;
2830 struct vdo
*vdo
= get_vdo_for_target(ti
);
2833 vdo_register_thread_device_id(&instance_thread
, &vdo
->instance
);
2834 result
= vdo_preresume_registered(ti
, vdo
);
2835 if ((result
== VDO_PARAMETER_MISMATCH
) || (result
== VDO_INVALID_ADMIN_STATE
) ||
2836 (result
== VDO_UNSUPPORTED_VERSION
))
2838 vdo_unregister_thread_device_id();
2839 return vdo_status_to_errno(result
);
2842 static void vdo_resume(struct dm_target
*ti
)
2844 struct registered_thread instance_thread
;
2846 vdo_register_thread_device_id(&instance_thread
,
2847 &get_vdo_for_target(ti
)->instance
);
2848 vdo_log_info("device '%s' resumed", vdo_get_device_name(ti
));
2849 vdo_unregister_thread_device_id();
2853 * If anything changes that affects how user tools will interact with vdo, update the version
2854 * number and make sure documentation about the change is complete so tools can properly update
2855 * their management code.
2857 static struct target_type vdo_target_bio
= {
2858 .features
= DM_TARGET_SINGLETON
,
2860 .version
= { 9, 1, 0 },
2861 .module
= THIS_MODULE
,
2864 .io_hints
= vdo_io_hints
,
2865 .iterate_devices
= vdo_iterate_devices
,
2867 .message
= vdo_message
,
2868 .status
= vdo_status
,
2869 .presuspend
= vdo_presuspend
,
2870 .postsuspend
= vdo_postsuspend
,
2871 .preresume
= vdo_preresume
,
2872 .resume
= vdo_resume
,
2875 static bool dm_registered
;
2877 static void vdo_module_destroy(void)
2879 vdo_log_debug("unloading");
2882 dm_unregister_target(&vdo_target_bio
);
2884 VDO_ASSERT_LOG_ONLY(instances
.count
== 0,
2885 "should have no instance numbers still in use, but have %u",
2887 vdo_free(instances
.words
);
2888 memset(&instances
, 0, sizeof(struct instance_tracker
));
2891 static int __init
vdo_init(void)
2895 /* Memory tracking must be initialized first for accurate accounting. */
2897 vdo_initialize_threads_mutex();
2898 vdo_initialize_thread_device_registry();
2899 vdo_initialize_device_registry_once();
2901 /* Add VDO errors to the set of errors registered by the indexer. */
2902 result
= vdo_register_status_codes();
2903 if (result
!= VDO_SUCCESS
) {
2904 vdo_log_error("vdo_register_status_codes failed %d", result
);
2905 vdo_module_destroy();
2909 result
= dm_register_target(&vdo_target_bio
);
2911 vdo_log_error("dm_register_target failed %d", result
);
2912 vdo_module_destroy();
2915 dm_registered
= true;
2920 static void __exit
vdo_exit(void)
2922 vdo_module_destroy();
2923 /* Memory tracking cleanup must be done last. */
2927 module_init(vdo_init
);
2928 module_exit(vdo_exit
);
2930 module_param_named(log_level
, vdo_log_level
, uint
, 0644);
2931 MODULE_PARM_DESC(log_level
, "Log level for log messages");
2933 MODULE_DESCRIPTION(DM_NAME
" target for transparent deduplication");
2934 MODULE_AUTHOR("Red Hat, Inc.");
2935 MODULE_LICENSE("GPL");