Expand PMF_FN_* macros.
[netbsd-mini2440.git] / external / gpl2 / lvm2 / dist / lib / metadata / metadata.c
blobec3761c1d9fad11beef54a46590ae9823c6953b8
1 /* $NetBSD$ */
3 /*
4 * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
5 * Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
7 * This file is part of LVM2.
9 * This copyrighted material is made available to anyone wishing to use,
10 * modify, copy, or redistribute it subject to the terms and conditions
11 * of the GNU Lesser General Public License v.2.1.
13 * You should have received a copy of the GNU Lesser General Public License
14 * along with this program; if not, write to the Free Software Foundation,
15 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 #include "lib.h"
19 #include "device.h"
20 #include "metadata.h"
21 #include "toolcontext.h"
22 #include "lvm-string.h"
23 #include "lvm-file.h"
24 #include "lvmcache.h"
25 #include "memlock.h"
26 #include "str_list.h"
27 #include "pv_alloc.h"
28 #include "segtype.h"
29 #include "activate.h"
30 #include "display.h"
31 #include "locking.h"
32 #include "archiver.h"
33 #include "defaults.h"
34 #include "filter-persistent.h"
36 #include <sys/param.h>
39 * FIXME: Check for valid handle before dereferencing field or log error?
41 #define pv_field(handle, field) \
42 (((const struct physical_volume *)(handle))->field)
44 static struct physical_volume *_pv_read(struct cmd_context *cmd,
45 struct dm_pool *pvmem,
46 const char *pv_name,
47 struct dm_list *mdas,
48 uint64_t *label_sector,
49 int warnings, int scan_label_only);
51 static struct physical_volume *_find_pv_by_name(struct cmd_context *cmd,
52 const char *pv_name);
54 static struct pv_list *_find_pv_in_vg(const struct volume_group *vg,
55 const char *pv_name);
57 static struct physical_volume *_find_pv_in_vg_by_uuid(const struct volume_group *vg,
58 const struct id *id);
60 static uint32_t _vg_bad_status_bits(const struct volume_group *vg,
61 uint32_t status);
63 const char _really_init[] =
64 "Really INITIALIZE physical volume \"%s\" of volume group \"%s\" [y/n]? ";
66 unsigned long set_pe_align(struct physical_volume *pv, unsigned long data_alignment)
68 if (pv->pe_align)
69 goto out;
71 if (data_alignment)
72 pv->pe_align = data_alignment;
73 else
74 pv->pe_align = MAX(65536UL, lvm_getpagesize()) >> SECTOR_SHIFT;
76 if (!pv->dev)
77 goto out;
80 * Align to stripe-width of underlying md device if present
82 if (find_config_tree_bool(pv->fmt->cmd, "devices/md_chunk_alignment",
83 DEFAULT_MD_CHUNK_ALIGNMENT))
84 pv->pe_align = MAX(pv->pe_align,
85 dev_md_stripe_width(pv->fmt->cmd->sysfs_dir,
86 pv->dev));
89 * Align to topology's minimum_io_size or optimal_io_size if present
90 * - minimum_io_size - the smallest request the device can perform
91 * w/o incurring a read-modify-write penalty (e.g. MD's chunk size)
92 * - optimal_io_size - the device's preferred unit of receiving I/O
93 * (e.g. MD's stripe width)
95 if (find_config_tree_bool(pv->fmt->cmd,
96 "devices/data_alignment_detection",
97 DEFAULT_DATA_ALIGNMENT_DETECTION)) {
98 pv->pe_align = MAX(pv->pe_align,
99 dev_minimum_io_size(pv->fmt->cmd->sysfs_dir,
100 pv->dev));
102 pv->pe_align = MAX(pv->pe_align,
103 dev_optimal_io_size(pv->fmt->cmd->sysfs_dir,
104 pv->dev));
107 log_very_verbose("%s: Setting PE alignment to %lu sectors.",
108 dev_name(pv->dev), pv->pe_align);
110 out:
111 return pv->pe_align;
114 unsigned long set_pe_align_offset(struct physical_volume *pv,
115 unsigned long data_alignment_offset)
117 if (pv->pe_align_offset)
118 goto out;
120 if (data_alignment_offset)
121 pv->pe_align_offset = data_alignment_offset;
123 if (!pv->dev)
124 goto out;
126 if (find_config_tree_bool(pv->fmt->cmd,
127 "devices/data_alignment_offset_detection",
128 DEFAULT_DATA_ALIGNMENT_OFFSET_DETECTION))
129 pv->pe_align_offset =
130 MAX(pv->pe_align_offset,
131 dev_alignment_offset(pv->fmt->cmd->sysfs_dir,
132 pv->dev));
134 log_very_verbose("%s: Setting PE alignment offset to %lu sectors.",
135 dev_name(pv->dev), pv->pe_align_offset);
137 out:
138 return pv->pe_align_offset;
142 * add_pv_to_vg - Add a physical volume to a volume group
143 * @vg - volume group to add to
144 * @pv_name - name of the pv (to be removed)
145 * @pv - physical volume to add to volume group
147 * Returns:
148 * 0 - failure
149 * 1 - success
150 * FIXME: remove pv_name - obtain safely from pv
152 int add_pv_to_vg(struct volume_group *vg, const char *pv_name,
153 struct physical_volume *pv)
155 struct pv_list *pvl;
156 struct format_instance *fid = vg->fid;
157 struct dm_pool *mem = vg->vgmem;
159 log_verbose("Adding physical volume '%s' to volume group '%s'",
160 pv_name, vg->name);
162 if (!(pvl = dm_pool_zalloc(mem, sizeof(*pvl)))) {
163 log_error("pv_list allocation for '%s' failed", pv_name);
164 return 0;
167 if (!is_orphan_vg(pv->vg_name)) {
168 log_error("Physical volume '%s' is already in volume group "
169 "'%s'", pv_name, pv->vg_name);
170 return 0;
173 if (pv->fmt != fid->fmt) {
174 log_error("Physical volume %s is of different format type (%s)",
175 pv_name, pv->fmt->name);
176 return 0;
179 /* Ensure PV doesn't depend on another PV already in the VG */
180 if (pv_uses_vg(pv, vg)) {
181 log_error("Physical volume %s might be constructed from same "
182 "volume group %s", pv_name, vg->name);
183 return 0;
186 if (!(pv->vg_name = dm_pool_strdup(mem, vg->name))) {
187 log_error("vg->name allocation failed for '%s'", pv_name);
188 return 0;
191 memcpy(&pv->vgid, &vg->id, sizeof(vg->id));
193 /* Units of 512-byte sectors */
194 pv->pe_size = vg->extent_size;
197 * pe_count must always be calculated by pv_setup
199 pv->pe_alloc_count = 0;
201 if (!fid->fmt->ops->pv_setup(fid->fmt, UINT64_C(0), 0,
202 vg->extent_size, 0, 0, 0UL, UINT64_C(0),
203 &fid->metadata_areas, pv, vg)) {
204 log_error("Format-specific setup of physical volume '%s' "
205 "failed.", pv_name);
206 return 0;
209 if (_find_pv_in_vg(vg, pv_name)) {
210 log_error("Physical volume '%s' listed more than once.",
211 pv_name);
212 return 0;
215 if (vg->pv_count && (vg->pv_count == vg->max_pv)) {
216 log_error("No space for '%s' - volume group '%s' "
217 "holds max %d physical volume(s).", pv_name,
218 vg->name, vg->max_pv);
219 return 0;
222 if (!alloc_pv_segment_whole_pv(mem, pv))
223 return_0;
225 pvl->pv = pv;
226 dm_list_add(&vg->pvs, &pvl->list);
228 if ((uint64_t) vg->extent_count + pv->pe_count > UINT32_MAX) {
229 log_error("Unable to add %s to %s: new extent count (%"
230 PRIu64 ") exceeds limit (%" PRIu32 ").",
231 pv_name, vg->name,
232 (uint64_t) vg->extent_count + pv->pe_count,
233 UINT32_MAX);
234 return 0;
237 vg->pv_count++;
238 vg->extent_count += pv->pe_count;
239 vg->free_count += pv->pe_count;
241 return 1;
244 static int _copy_pv(struct dm_pool *pvmem,
245 struct physical_volume *pv_to,
246 struct physical_volume *pv_from)
248 memcpy(pv_to, pv_from, sizeof(*pv_to));
250 if (!(pv_to->vg_name = dm_pool_strdup(pvmem, pv_from->vg_name)))
251 return_0;
253 if (!str_list_dup(pvmem, &pv_to->tags, &pv_from->tags))
254 return_0;
256 if (!peg_dup(pvmem, &pv_to->segments, &pv_from->segments))
257 return_0;
259 return 1;
262 static struct pv_list *_copy_pvl(struct dm_pool *pvmem, struct pv_list *pvl_from)
264 struct pv_list *pvl_to = NULL;
266 if (!(pvl_to = dm_pool_zalloc(pvmem, sizeof(*pvl_to))))
267 return_NULL;
269 if (!(pvl_to->pv = dm_pool_alloc(pvmem, sizeof(*pvl_to->pv))))
270 goto_bad;
272 if(!_copy_pv(pvmem, pvl_to->pv, pvl_from->pv))
273 goto_bad;
275 return pvl_to;
276 bad:
277 dm_pool_free(pvmem, pvl_to);
278 return NULL;
281 int get_pv_from_vg_by_id(const struct format_type *fmt, const char *vg_name,
282 const char *vgid, const char *pvid,
283 struct physical_volume *pv)
285 struct volume_group *vg;
286 struct pv_list *pvl;
287 int r = 0, consistent = 0;
289 if (!(vg = vg_read_internal(fmt->cmd, vg_name, vgid, &consistent))) {
290 log_error("get_pv_from_vg_by_id: vg_read_internal failed to read VG %s",
291 vg_name);
292 return 0;
295 if (!consistent)
296 log_warn("WARNING: Volume group %s is not consistent",
297 vg_name);
299 dm_list_iterate_items(pvl, &vg->pvs) {
300 if (id_equal(&pvl->pv->id, (const struct id *) pvid)) {
301 if (!_copy_pv(fmt->cmd->mem, pv, pvl->pv)) {
302 log_error("internal PV duplication failed");
303 r = 0;
304 goto out;
306 r = 1;
307 goto out;
310 out:
311 vg_release(vg);
312 return r;
315 int move_pv(struct volume_group *vg_from, struct volume_group *vg_to,
316 const char *pv_name)
318 struct physical_volume *pv;
319 struct pv_list *pvl;
321 /* FIXME: handle tags */
322 if (!(pvl = find_pv_in_vg(vg_from, pv_name))) {
323 log_error("Physical volume %s not in volume group %s",
324 pv_name, vg_from->name);
325 return 0;
328 if (_vg_bad_status_bits(vg_from, RESIZEABLE_VG) ||
329 _vg_bad_status_bits(vg_to, RESIZEABLE_VG))
330 return 0;
332 dm_list_move(&vg_to->pvs, &pvl->list);
334 vg_from->pv_count--;
335 vg_to->pv_count++;
337 pv = pvl->pv;
339 vg_from->extent_count -= pv_pe_count(pv);
340 vg_to->extent_count += pv_pe_count(pv);
342 vg_from->free_count -= pv_pe_count(pv) - pv_pe_alloc_count(pv);
343 vg_to->free_count += pv_pe_count(pv) - pv_pe_alloc_count(pv);
345 return 1;
348 int move_pvs_used_by_lv(struct volume_group *vg_from,
349 struct volume_group *vg_to,
350 const char *lv_name)
352 struct lv_segment *lvseg;
353 unsigned s;
354 struct lv_list *lvl;
355 struct logical_volume *lv;
357 /* FIXME: handle tags */
358 if (!(lvl = find_lv_in_vg(vg_from, lv_name))) {
359 log_error("Logical volume %s not in volume group %s",
360 lv_name, vg_from->name);
361 return 0;
364 if (_vg_bad_status_bits(vg_from, RESIZEABLE_VG) ||
365 _vg_bad_status_bits(vg_to, RESIZEABLE_VG))
366 return 0;
368 dm_list_iterate_items(lvseg, &lvl->lv->segments) {
369 if (lvseg->log_lv)
370 if (!move_pvs_used_by_lv(vg_from, vg_to,
371 lvseg->log_lv->name))
372 return_0;
373 for (s = 0; s < lvseg->area_count; s++) {
374 if (seg_type(lvseg, s) == AREA_PV) {
375 if (!move_pv(vg_from, vg_to,
376 pv_dev_name(seg_pv(lvseg, s))))
377 return_0;
378 } else if (seg_type(lvseg, s) == AREA_LV) {
379 lv = seg_lv(lvseg, s);
380 if (!move_pvs_used_by_lv(vg_from, vg_to,
381 lv->name))
382 return_0;
386 return 1;
389 static int validate_new_vg_name(struct cmd_context *cmd, const char *vg_name)
391 char vg_path[PATH_MAX];
393 if (!validate_name(vg_name))
394 return_0;
396 snprintf(vg_path, PATH_MAX, "%s%s", cmd->dev_dir, vg_name);
397 if (path_exists(vg_path)) {
398 log_error("%s: already exists in filesystem", vg_path);
399 return 0;
402 return 1;
405 int validate_vg_rename_params(struct cmd_context *cmd,
406 const char *vg_name_old,
407 const char *vg_name_new)
409 unsigned length;
410 char *dev_dir;
412 dev_dir = cmd->dev_dir;
413 length = strlen(dev_dir);
415 /* Check sanity of new name */
416 if (strlen(vg_name_new) > NAME_LEN - length - 2) {
417 log_error("New volume group path exceeds maximum length "
418 "of %d!", NAME_LEN - length - 2);
419 return 0;
422 if (!validate_new_vg_name(cmd, vg_name_new)) {
423 log_error("New volume group name \"%s\" is invalid",
424 vg_name_new);
425 return 0;
428 if (!strcmp(vg_name_old, vg_name_new)) {
429 log_error("Old and new volume group names must differ");
430 return 0;
433 return 1;
436 int vg_rename(struct cmd_context *cmd, struct volume_group *vg,
437 const char *new_name)
439 struct dm_pool *mem = vg->vgmem;
440 struct pv_list *pvl;
442 if (!(vg->name = dm_pool_strdup(mem, new_name))) {
443 log_error("vg->name allocation failed for '%s'", new_name);
444 return 0;
447 dm_list_iterate_items(pvl, &vg->pvs) {
448 if (!(pvl->pv->vg_name = dm_pool_strdup(mem, new_name))) {
449 log_error("pv->vg_name allocation failed for '%s'",
450 pv_dev_name(pvl->pv));
451 return 0;
455 return 1;
458 int remove_lvs_in_vg(struct cmd_context *cmd,
459 struct volume_group *vg,
460 force_t force)
462 struct dm_list *lst;
463 struct lv_list *lvl;
465 while ((lst = dm_list_first(&vg->lvs))) {
466 lvl = dm_list_item(lst, struct lv_list);
467 if (!lv_remove_with_dependencies(cmd, lvl->lv, force))
468 return 0;
471 return 1;
474 int vg_remove_check(struct volume_group *vg)
476 unsigned lv_count;
477 struct pv_list *pvl, *tpvl;
479 if (vg_read_error(vg) || vg_missing_pv_count(vg)) {
480 log_error("Volume group \"%s\" not found, is inconsistent "
481 "or has PVs missing.", vg ? vg->name : "");
482 log_error("Consider vgreduce --removemissing if metadata "
483 "is inconsistent.");
484 return 0;
487 if (!vg_check_status(vg, EXPORTED_VG))
488 return 0;
490 lv_count = vg_visible_lvs(vg);
492 if (lv_count) {
493 log_error("Volume group \"%s\" still contains %u "
494 "logical volume(s)", vg->name, lv_count);
495 return 0;
498 if (!archive(vg))
499 return 0;
501 dm_list_iterate_items_safe(pvl, tpvl, &vg->pvs) {
502 dm_list_del(&pvl->list);
503 dm_list_add(&vg->removed_pvs, &pvl->list);
505 return 1;
508 int vg_remove(struct volume_group *vg)
510 struct physical_volume *pv;
511 struct pv_list *pvl;
512 int ret = 1;
514 if (!lock_vol(vg->cmd, VG_ORPHANS, LCK_VG_WRITE)) {
515 log_error("Can't get lock for orphan PVs");
516 return 0;
519 if (!vg_remove_mdas(vg)) {
520 log_error("vg_remove_mdas %s failed", vg->name);
521 unlock_vg(vg->cmd, VG_ORPHANS);
522 return 0;
525 /* init physical volumes */
526 dm_list_iterate_items(pvl, &vg->removed_pvs) {
527 pv = pvl->pv;
528 log_verbose("Removing physical volume \"%s\" from "
529 "volume group \"%s\"", pv_dev_name(pv), vg->name);
530 pv->vg_name = vg->fid->fmt->orphan_vg_name;
531 pv->status = ALLOCATABLE_PV;
533 if (!dev_get_size(pv_dev(pv), &pv->size)) {
534 log_error("%s: Couldn't get size.", pv_dev_name(pv));
535 ret = 0;
536 continue;
539 /* FIXME Write to same sector label was read from */
540 if (!pv_write(vg->cmd, pv, NULL, INT64_C(-1))) {
541 log_error("Failed to remove physical volume \"%s\""
542 " from volume group \"%s\"",
543 pv_dev_name(pv), vg->name);
544 ret = 0;
548 backup_remove(vg->cmd, vg->name);
550 if (ret)
551 log_print("Volume group \"%s\" successfully removed", vg->name);
552 else
553 log_error("Volume group \"%s\" not properly removed", vg->name);
555 unlock_vg(vg->cmd, VG_ORPHANS);
556 return ret;
560 * Extend a VG by a single PV / device path
562 * Parameters:
563 * - vg: handle of volume group to extend by 'pv_name'
564 * - pv_name: device path of PV to add to VG
565 * - pp: parameters to pass to implicit pvcreate; if NULL, do not pvcreate
568 static int vg_extend_single_pv(struct volume_group *vg, char *pv_name,
569 struct pvcreate_params *pp)
571 struct physical_volume *pv;
573 pv = pv_by_path(vg->fid->fmt->cmd, pv_name);
574 if (!pv && !pp) {
575 log_error("%s not identified as an existing "
576 "physical volume", pv_name);
577 return 0;
578 } else if (!pv && pp) {
579 pv = pvcreate_single(vg->cmd, pv_name, pp);
580 if (!pv)
581 return 0;
583 if (!add_pv_to_vg(vg, pv_name, pv))
584 return 0;
585 return 1;
589 * Extend a VG by a single PV / device path
591 * Parameters:
592 * - vg: handle of volume group to extend by 'pv_name'
593 * - pv_count: count of device paths of PVs
594 * - pv_names: device paths of PVs to add to VG
595 * - pp: parameters to pass to implicit pvcreate; if NULL, do not pvcreate
598 int vg_extend(struct volume_group *vg, int pv_count, char **pv_names,
599 struct pvcreate_params *pp)
601 int i;
603 if (_vg_bad_status_bits(vg, RESIZEABLE_VG))
604 return 0;
606 /* attach each pv */
607 for (i = 0; i < pv_count; i++) {
608 if (!vg_extend_single_pv(vg, pv_names[i], pp))
609 goto bad;
612 /* FIXME Decide whether to initialise and add new mdahs to format instance */
614 return 1;
616 bad:
617 log_error("Unable to add physical volume '%s' to "
618 "volume group '%s'.", pv_names[i], vg->name);
619 return 0;
622 /* FIXME: use this inside vgreduce_single? */
623 int vg_reduce(struct volume_group *vg, char *pv_name)
625 struct physical_volume *pv;
626 struct pv_list *pvl;
628 if (_vg_bad_status_bits(vg, RESIZEABLE_VG))
629 return 0;
631 if (!archive(vg))
632 goto bad;
634 /* remove each pv */
635 if (!(pvl = find_pv_in_vg(vg, pv_name))) {
636 log_error("Physical volume %s not in volume group %s.",
637 pv_name, vg->name);
638 goto bad;
641 pv = pvl->pv;
643 if (pv_pe_alloc_count(pv)) {
644 log_error("Physical volume %s still in use.",
645 pv_name);
646 goto bad;
649 if (!dev_get_size(pv_dev(pv), &pv->size)) {
650 log_error("%s: Couldn't get size.", pv_name);
651 goto bad;
654 vg->pv_count--;
655 vg->free_count -= pv_pe_count(pv) - pv_pe_alloc_count(pv);
656 vg->extent_count -= pv_pe_count(pv);
658 /* add pv to the remove_pvs list */
659 dm_list_del(&pvl->list);
660 dm_list_add(&vg->removed_pvs, &pvl->list);
662 return 1;
664 bad:
665 log_error("Unable to remove physical volume '%s' from "
666 "volume group '%s'.", pv_name, vg->name);
667 return 0;
670 const char *strip_dir(const char *vg_name, const char *dev_dir)
672 size_t len = strlen(dev_dir);
673 if (!strncmp(vg_name, dev_dir, len))
674 vg_name += len;
676 return vg_name;
680 * Validate parameters to vg_create() before calling.
681 * FIXME: Move inside vg_create library function.
682 * FIXME: Change vgcreate_params struct to individual gets/sets
684 int vgcreate_params_validate(struct cmd_context *cmd,
685 struct vgcreate_params *vp)
687 if (!validate_new_vg_name(cmd, vp->vg_name)) {
688 log_error("New volume group name \"%s\" is invalid",
689 vp->vg_name);
690 return 1;
693 if (vp->alloc == ALLOC_INHERIT) {
694 log_error("Volume Group allocation policy cannot inherit "
695 "from anything");
696 return 1;
699 if (!vp->extent_size) {
700 log_error("Physical extent size may not be zero");
701 return 1;
704 if (!(cmd->fmt->features & FMT_UNLIMITED_VOLS)) {
705 if (!vp->max_lv)
706 vp->max_lv = 255;
707 if (!vp->max_pv)
708 vp->max_pv = 255;
709 if (vp->max_lv > 255 || vp->max_pv > 255) {
710 log_error("Number of volumes may not exceed 255");
711 return 1;
715 return 0;
719 * Create a (struct volume_group) volume group handle from a struct volume_group pointer and a
720 * possible failure code or zero for success.
722 static struct volume_group *_vg_make_handle(struct cmd_context *cmd,
723 struct volume_group *vg,
724 uint32_t failure)
726 struct dm_pool *vgmem;
728 if (!vg) {
729 if (!(vgmem = dm_pool_create("lvm2 vg_handle", VG_MEMPOOL_CHUNK)) ||
730 !(vg = dm_pool_zalloc(vgmem, sizeof(*vg)))) {
731 log_error("Error allocating vg handle.");
732 if (vgmem)
733 dm_pool_destroy(vgmem);
734 return_NULL;
736 vg->vgmem = vgmem;
739 vg->read_status = failure;
741 return (struct volume_group *)vg;
744 int lv_has_unknown_segments(const struct logical_volume *lv)
746 struct lv_segment *seg;
747 /* foreach segment */
748 dm_list_iterate_items(seg, &lv->segments)
749 if (seg_unknown(seg))
750 return 1;
751 return 0;
754 int vg_has_unknown_segments(const struct volume_group *vg)
756 struct lv_list *lvl;
758 /* foreach LV */
759 dm_list_iterate_items(lvl, &vg->lvs)
760 if (lv_has_unknown_segments(lvl->lv))
761 return 1;
762 return 0;
766 * Create a VG with default parameters.
767 * Returns:
768 * - struct volume_group* with SUCCESS code: VG structure created
769 * - NULL or struct volume_group* with FAILED_* code: error creating VG structure
770 * Use vg_read_error() to determine success or failure.
771 * FIXME: cleanup usage of _vg_make_handle()
773 struct volume_group *vg_create(struct cmd_context *cmd, const char *vg_name)
775 struct volume_group *vg;
776 int consistent = 0;
777 struct dm_pool *mem;
778 uint32_t rc;
780 if (!validate_name(vg_name)) {
781 log_error("Invalid vg name %s", vg_name);
782 /* FIXME: use _vg_make_handle() w/proper error code */
783 return NULL;
786 rc = vg_lock_newname(cmd, vg_name);
787 if (rc != SUCCESS)
788 /* NOTE: let caller decide - this may be check for existence */
789 return _vg_make_handle(cmd, NULL, rc);
791 /* FIXME: Is this vg_read_internal necessary? Move it inside
792 vg_lock_newname? */
793 /* is this vg name already in use ? */
794 if ((vg = vg_read_internal(cmd, vg_name, NULL, &consistent))) {
795 log_error("A volume group called '%s' already exists.", vg_name);
796 unlock_and_release_vg(cmd, vg, vg_name);
797 return _vg_make_handle(cmd, NULL, FAILED_EXIST);
800 if (!(mem = dm_pool_create("lvm2 vg_create", VG_MEMPOOL_CHUNK)))
801 goto_bad;
803 if (!(vg = dm_pool_zalloc(mem, sizeof(*vg))))
804 goto_bad;
806 if (!id_create(&vg->id)) {
807 log_error("Couldn't create uuid for volume group '%s'.",
808 vg_name);
809 goto bad;
812 /* Strip dev_dir if present */
813 vg_name = strip_dir(vg_name, cmd->dev_dir);
815 vg->vgmem = mem;
816 vg->cmd = cmd;
818 if (!(vg->name = dm_pool_strdup(mem, vg_name)))
819 goto_bad;
821 vg->seqno = 0;
823 vg->status = (RESIZEABLE_VG | LVM_READ | LVM_WRITE);
824 if (!(vg->system_id = dm_pool_alloc(mem, NAME_LEN)))
825 goto_bad;
827 *vg->system_id = '\0';
829 vg->extent_size = DEFAULT_EXTENT_SIZE * 2;
830 vg->extent_count = 0;
831 vg->free_count = 0;
833 vg->max_lv = DEFAULT_MAX_LV;
834 vg->max_pv = DEFAULT_MAX_PV;
836 vg->alloc = DEFAULT_ALLOC_POLICY;
838 vg->pv_count = 0;
839 dm_list_init(&vg->pvs);
841 dm_list_init(&vg->lvs);
843 dm_list_init(&vg->tags);
845 /* initialize removed_pvs list */
846 dm_list_init(&vg->removed_pvs);
848 if (!(vg->fid = cmd->fmt->ops->create_instance(cmd->fmt, vg_name,
849 NULL, NULL))) {
850 log_error("Failed to create format instance");
851 goto bad;
854 if (vg->fid->fmt->ops->vg_setup &&
855 !vg->fid->fmt->ops->vg_setup(vg->fid, vg)) {
856 log_error("Format specific setup of volume group '%s' failed.",
857 vg_name);
858 goto bad;
860 return _vg_make_handle(cmd, vg, SUCCESS);
862 bad:
863 unlock_and_release_vg(cmd, vg, vg_name);
864 /* FIXME: use _vg_make_handle() w/proper error code */
865 return NULL;
868 uint64_t extents_from_size(struct cmd_context *cmd, uint64_t size,
869 uint32_t extent_size)
871 if (size % extent_size) {
872 size += extent_size - size % extent_size;
873 log_print("Rounding up size to full physical extent %s",
874 display_size(cmd, size));
877 if (size > (uint64_t) UINT32_MAX * extent_size) {
878 log_error("Volume too large (%s) for extent size %s. "
879 "Upper limit is %s.",
880 display_size(cmd, size),
881 display_size(cmd, (uint64_t) extent_size),
882 display_size(cmd, (uint64_t) UINT32_MAX *
883 extent_size));
884 return 0;
887 return (uint64_t) size / extent_size;
890 static int _recalc_extents(uint32_t *extents, const char *desc1,
891 const char *desc2, uint32_t old_size,
892 uint32_t new_size)
894 uint64_t size = (uint64_t) old_size * (*extents);
896 if (size % new_size) {
897 log_error("New size %" PRIu64 " for %s%s not an exact number "
898 "of new extents.", size, desc1, desc2);
899 return 0;
902 size /= new_size;
904 if (size > UINT32_MAX) {
905 log_error("New extent count %" PRIu64 " for %s%s exceeds "
906 "32 bits.", size, desc1, desc2);
907 return 0;
910 *extents = (uint32_t) size;
912 return 1;
915 int vg_set_extent_size(struct volume_group *vg, uint32_t new_size)
917 uint32_t old_size = vg->extent_size;
918 struct pv_list *pvl;
919 struct lv_list *lvl;
920 struct physical_volume *pv;
921 struct logical_volume *lv;
922 struct lv_segment *seg;
923 struct pv_segment *pvseg;
924 uint32_t s;
926 if (!vg_is_resizeable(vg)) {
927 log_error("Volume group \"%s\" must be resizeable "
928 "to change PE size", vg->name);
929 return 0;
932 if (!new_size) {
933 log_error("Physical extent size may not be zero");
934 return 0;
937 if (new_size == vg->extent_size)
938 return 1;
940 if (new_size & (new_size - 1)) {
941 log_error("Physical extent size must be a power of 2.");
942 return 0;
945 if (new_size > vg->extent_size) {
946 if ((uint64_t) vg_size(vg) % new_size) {
947 /* FIXME Adjust used PV sizes instead */
948 log_error("New extent size is not a perfect fit");
949 return 0;
953 vg->extent_size = new_size;
955 if (vg->fid->fmt->ops->vg_setup &&
956 !vg->fid->fmt->ops->vg_setup(vg->fid, vg))
957 return_0;
959 if (!_recalc_extents(&vg->extent_count, vg->name, "", old_size,
960 new_size))
961 return_0;
963 if (!_recalc_extents(&vg->free_count, vg->name, " free space",
964 old_size, new_size))
965 return_0;
967 /* foreach PV */
968 dm_list_iterate_items(pvl, &vg->pvs) {
969 pv = pvl->pv;
971 pv->pe_size = new_size;
972 if (!_recalc_extents(&pv->pe_count, pv_dev_name(pv), "",
973 old_size, new_size))
974 return_0;
976 if (!_recalc_extents(&pv->pe_alloc_count, pv_dev_name(pv),
977 " allocated space", old_size, new_size))
978 return_0;
980 /* foreach free PV Segment */
981 dm_list_iterate_items(pvseg, &pv->segments) {
982 if (pvseg_is_allocated(pvseg))
983 continue;
985 if (!_recalc_extents(&pvseg->pe, pv_dev_name(pv),
986 " PV segment start", old_size,
987 new_size))
988 return_0;
989 if (!_recalc_extents(&pvseg->len, pv_dev_name(pv),
990 " PV segment length", old_size,
991 new_size))
992 return_0;
996 /* foreach LV */
997 dm_list_iterate_items(lvl, &vg->lvs) {
998 lv = lvl->lv;
1000 if (!_recalc_extents(&lv->le_count, lv->name, "", old_size,
1001 new_size))
1002 return_0;
1004 dm_list_iterate_items(seg, &lv->segments) {
1005 if (!_recalc_extents(&seg->le, lv->name,
1006 " segment start", old_size,
1007 new_size))
1008 return_0;
1010 if (!_recalc_extents(&seg->len, lv->name,
1011 " segment length", old_size,
1012 new_size))
1013 return_0;
1015 if (!_recalc_extents(&seg->area_len, lv->name,
1016 " area length", old_size,
1017 new_size))
1018 return_0;
1020 if (!_recalc_extents(&seg->extents_copied, lv->name,
1021 " extents moved", old_size,
1022 new_size))
1023 return_0;
1025 /* foreach area */
1026 for (s = 0; s < seg->area_count; s++) {
1027 switch (seg_type(seg, s)) {
1028 case AREA_PV:
1029 if (!_recalc_extents
1030 (&seg_pe(seg, s),
1031 lv->name,
1032 " pvseg start", old_size,
1033 new_size))
1034 return_0;
1035 if (!_recalc_extents
1036 (&seg_pvseg(seg, s)->len,
1037 lv->name,
1038 " pvseg length", old_size,
1039 new_size))
1040 return_0;
1041 break;
1042 case AREA_LV:
1043 if (!_recalc_extents
1044 (&seg_le(seg, s), lv->name,
1045 " area start", old_size,
1046 new_size))
1047 return_0;
1048 break;
1049 case AREA_UNASSIGNED:
1050 log_error("Unassigned area %u found in "
1051 "segment", s);
1052 return 0;
1059 return 1;
1062 int vg_set_max_lv(struct volume_group *vg, uint32_t max_lv)
1064 if (!vg_is_resizeable(vg)) {
1065 log_error("Volume group \"%s\" must be resizeable "
1066 "to change MaxLogicalVolume", vg->name);
1067 return 0;
1070 if (!(vg->fid->fmt->features & FMT_UNLIMITED_VOLS)) {
1071 if (!max_lv)
1072 max_lv = 255;
1073 else if (max_lv > 255) {
1074 log_error("MaxLogicalVolume limit is 255");
1075 return 0;
1079 if (max_lv && max_lv < vg_visible_lvs(vg)) {
1080 log_error("MaxLogicalVolume is less than the current number "
1081 "%d of LVs for %s", vg_visible_lvs(vg),
1082 vg->name);
1083 return 0;
1085 vg->max_lv = max_lv;
1087 return 1;
1090 int vg_set_max_pv(struct volume_group *vg, uint32_t max_pv)
1092 if (!vg_is_resizeable(vg)) {
1093 log_error("Volume group \"%s\" must be resizeable "
1094 "to change MaxPhysicalVolumes", vg->name);
1095 return 0;
1098 if (!(vg->fid->fmt->features & FMT_UNLIMITED_VOLS)) {
1099 if (!max_pv)
1100 max_pv = 255;
1101 else if (max_pv > 255) {
1102 log_error("MaxPhysicalVolume limit is 255");
1103 return 0;
1107 if (max_pv && max_pv < vg->pv_count) {
1108 log_error("MaxPhysicalVolumes is less than the current number "
1109 "%d of PVs for \"%s\"", vg->pv_count,
1110 vg->name);
1111 return 0;
1113 vg->max_pv = max_pv;
1114 return 1;
1117 int vg_set_alloc_policy(struct volume_group *vg, alloc_policy_t alloc)
1119 if (alloc == ALLOC_INHERIT) {
1120 log_error("Volume Group allocation policy cannot inherit "
1121 "from anything");
1122 return 0;
1125 if (alloc == vg->alloc)
1126 return 1;
1128 vg->alloc = alloc;
1129 return 1;
1132 int vg_set_clustered(struct volume_group *vg, int clustered)
1134 struct lv_list *lvl;
1135 if (clustered) {
1136 dm_list_iterate_items(lvl, &vg->lvs) {
1137 if (lv_is_origin(lvl->lv) || lv_is_cow(lvl->lv)) {
1138 log_error("Volume group %s contains snapshots "
1139 "that are not yet supported.",
1140 vg->name);
1141 return 0;
1146 if (clustered)
1147 vg->status |= CLUSTERED;
1148 else
1149 vg->status &= ~CLUSTERED;
1150 return 1;
1154 * Separate metadata areas after splitting a VG.
1155 * Also accepts orphan VG as destination (for vgreduce).
1157 int vg_split_mdas(struct cmd_context *cmd __attribute((unused)),
1158 struct volume_group *vg_from, struct volume_group *vg_to)
1160 struct metadata_area *mda, *mda2;
1161 struct dm_list *mdas_from, *mdas_to;
1162 int common_mda = 0;
1164 mdas_from = &vg_from->fid->metadata_areas;
1165 mdas_to = &vg_to->fid->metadata_areas;
1167 dm_list_iterate_items_safe(mda, mda2, mdas_from) {
1168 if (!mda->ops->mda_in_vg) {
1169 common_mda = 1;
1170 continue;
1173 if (!mda->ops->mda_in_vg(vg_from->fid, vg_from, mda)) {
1174 if (is_orphan_vg(vg_to->name))
1175 dm_list_del(&mda->list);
1176 else
1177 dm_list_move(mdas_to, &mda->list);
1181 if (dm_list_empty(mdas_from) ||
1182 (!is_orphan_vg(vg_to->name) && dm_list_empty(mdas_to)))
1183 return common_mda;
1185 return 1;
1189 * See if we may pvcreate on this device.
1190 * 0 indicates we may not.
1192 static int pvcreate_check(struct cmd_context *cmd, const char *name,
1193 struct pvcreate_params *pp)
1195 struct physical_volume *pv;
1196 struct device *dev;
1197 uint64_t md_superblock, swap_signature;
1198 int wipe_md, wipe_swap;
1200 /* FIXME Check partition type is LVM unless --force is given */
1202 /* Is there a pv here already? */
1203 pv = pv_read(cmd, name, NULL, NULL, 0, 0);
1206 * If a PV has no MDAs it may appear to be an orphan until the
1207 * metadata is read off another PV in the same VG. Detecting
1208 * this means checking every VG by scanning every PV on the
1209 * system.
1211 if (pv && is_orphan(pv)) {
1212 if (!scan_vgs_for_pvs(cmd))
1213 return_0;
1214 pv = pv_read(cmd, name, NULL, NULL, 0, 0);
1217 /* Allow partial & exported VGs to be destroyed. */
1218 /* We must have -ff to overwrite a non orphan */
1219 if (pv && !is_orphan(pv) && pp->force != DONT_PROMPT_OVERRIDE) {
1220 log_error("Can't initialize physical volume \"%s\" of "
1221 "volume group \"%s\" without -ff", name, pv_vg_name(pv));
1222 return 0;
1225 /* prompt */
1226 if (pv && !is_orphan(pv) && !pp->yes &&
1227 yes_no_prompt(_really_init, name, pv_vg_name(pv)) == 'n') {
1228 log_print("%s: physical volume not initialized", name);
1229 return 0;
1232 if (sigint_caught())
1233 return 0;
1235 dev = dev_cache_get(name, cmd->filter);
1237 /* Is there an md superblock here? */
1238 if (!dev && md_filtering()) {
1239 unlock_vg(cmd, VG_ORPHANS);
1241 persistent_filter_wipe(cmd->filter);
1242 lvmcache_destroy(cmd, 1);
1244 init_md_filtering(0);
1245 if (!lock_vol(cmd, VG_ORPHANS, LCK_VG_WRITE)) {
1246 log_error("Can't get lock for orphan PVs");
1247 init_md_filtering(1);
1248 return 0;
1250 dev = dev_cache_get(name, cmd->filter);
1251 init_md_filtering(1);
1254 if (!dev) {
1255 log_error("Device %s not found (or ignored by filtering).", name);
1256 return 0;
1260 * This test will fail if the device belongs to an MD array.
1262 if (!dev_test_excl(dev)) {
1263 /* FIXME Detect whether device-mapper itself is still using it */
1264 log_error("Can't open %s exclusively. Mounted filesystem?",
1265 name);
1266 return 0;
1269 /* Wipe superblock? */
1270 if ((wipe_md = dev_is_md(dev, &md_superblock)) == 1 &&
1271 ((!pp->idp && !pp->restorefile) || pp->yes ||
1272 (yes_no_prompt("Software RAID md superblock "
1273 "detected on %s. Wipe it? [y/n] ", name) == 'y'))) {
1274 log_print("Wiping software RAID md superblock on %s", name);
1275 if (!dev_set(dev, md_superblock, 4, 0)) {
1276 log_error("Failed to wipe RAID md superblock on %s",
1277 name);
1278 return 0;
1282 if (wipe_md == -1) {
1283 log_error("Fatal error while trying to detect software "
1284 "RAID md superblock on %s", name);
1285 return 0;
1288 if ((wipe_swap = dev_is_swap(dev, &swap_signature)) == 1 &&
1289 ((!pp->idp && !pp->restorefile) || pp->yes ||
1290 (yes_no_prompt("Swap signature detected on %s. Wipe it? [y/n] ",
1291 name) == 'y'))) {
1292 log_print("Wiping swap signature on %s", name);
1293 if (!dev_set(dev, swap_signature, 10, 0)) {
1294 log_error("Failed to wipe swap signature on %s", name);
1295 return 0;
1299 if (wipe_swap == -1) {
1300 log_error("Fatal error while trying to detect swap "
1301 "signature on %s", name);
1302 return 0;
1305 if (sigint_caught())
1306 return 0;
1308 if (pv && !is_orphan(pv) && pp->force) {
1309 log_warn("WARNING: Forcing physical volume creation on "
1310 "%s%s%s%s", name,
1311 !is_orphan(pv) ? " of volume group \"" : "",
1312 !is_orphan(pv) ? pv_vg_name(pv) : "",
1313 !is_orphan(pv) ? "\"" : "");
1316 return 1;
1319 void pvcreate_params_set_defaults(struct pvcreate_params *pp)
1321 memset(pp, 0, sizeof(*pp));
1322 pp->zero = 1;
1323 pp->size = 0;
1324 pp->data_alignment = UINT64_C(0);
1325 pp->data_alignment_offset = UINT64_C(0);
1326 pp->pvmetadatacopies = DEFAULT_PVMETADATACOPIES;
1327 pp->pvmetadatasize = DEFAULT_PVMETADATASIZE;
1328 pp->labelsector = DEFAULT_LABELSECTOR;
1329 pp->idp = 0;
1330 pp->pe_start = 0;
1331 pp->extent_count = 0;
1332 pp->extent_size = 0;
1333 pp->restorefile = 0;
1334 pp->force = PROMPT;
1335 pp->yes = 0;
1339 * pvcreate_single() - initialize a device with PV label and metadata area
1341 * Parameters:
1342 * - pv_name: device path to initialize
1343 * - pp: parameters to pass to pv_create; if NULL, use default values
1345 * Returns:
1346 * NULL: error
1347 * struct physical_volume * (non-NULL): handle to physical volume created
1349 struct physical_volume * pvcreate_single(struct cmd_context *cmd,
1350 const char *pv_name,
1351 struct pvcreate_params *pp)
1353 void *pv;
1354 struct device *dev;
1355 struct dm_list mdas;
1356 struct pvcreate_params default_pp;
1357 char buffer[64] __attribute((aligned(8)));
1359 pvcreate_params_set_defaults(&default_pp);
1360 if (!pp)
1361 pp = &default_pp;
1363 if (pp->idp) {
1364 if ((dev = device_from_pvid(cmd, pp->idp)) &&
1365 (dev != dev_cache_get(pv_name, cmd->filter))) {
1366 if (!id_write_format((const struct id*)&pp->idp->uuid,
1367 buffer, sizeof(buffer)))
1368 return_NULL;
1369 log_error("uuid %s already in use on \"%s\"", buffer,
1370 dev_name(dev));
1371 return NULL;
1375 if (!pvcreate_check(cmd, pv_name, pp))
1376 goto error;
1378 if (sigint_caught())
1379 goto error;
1381 if (!(dev = dev_cache_get(pv_name, cmd->filter))) {
1382 log_error("%s: Couldn't find device. Check your filters?",
1383 pv_name);
1384 goto error;
1387 dm_list_init(&mdas);
1388 if (!(pv = pv_create(cmd, dev, pp->idp, pp->size,
1389 pp->data_alignment, pp->data_alignment_offset,
1390 pp->pe_start, pp->extent_count, pp->extent_size,
1391 pp->pvmetadatacopies,
1392 pp->pvmetadatasize,&mdas))) {
1393 log_error("Failed to setup physical volume \"%s\"", pv_name);
1394 goto error;
1397 log_verbose("Set up physical volume for \"%s\" with %" PRIu64
1398 " available sectors", pv_name, pv_size(pv));
1400 /* Wipe existing label first */
1401 if (!label_remove(pv_dev(pv))) {
1402 log_error("Failed to wipe existing label on %s", pv_name);
1403 goto error;
1406 if (pp->zero) {
1407 log_verbose("Zeroing start of device %s", pv_name);
1408 if (!dev_open_quiet(dev)) {
1409 log_error("%s not opened: device not zeroed", pv_name);
1410 goto error;
1413 if (!dev_set(dev, UINT64_C(0), (size_t) 2048, 0)) {
1414 log_error("%s not wiped: aborting", pv_name);
1415 dev_close(dev);
1416 goto error;
1418 dev_close(dev);
1421 log_very_verbose("Writing physical volume data to disk \"%s\"",
1422 pv_name);
1423 if (!(pv_write(cmd, (struct physical_volume *)pv, &mdas,
1424 pp->labelsector))) {
1425 log_error("Failed to write physical volume \"%s\"", pv_name);
1426 goto error;
1429 log_print("Physical volume \"%s\" successfully created", pv_name);
1431 return pv;
1433 error:
1434 return NULL;
1437 static void _free_pv(struct dm_pool *mem, struct physical_volume *pv)
1439 dm_pool_free(mem, pv);
1442 static struct physical_volume *_alloc_pv(struct dm_pool *mem, struct device *dev)
1444 struct physical_volume *pv = dm_pool_zalloc(mem, sizeof(*pv));
1446 if (!pv)
1447 return_NULL;
1449 if (!(pv->vg_name = dm_pool_zalloc(mem, NAME_LEN))) {
1450 dm_pool_free(mem, pv);
1451 return NULL;
1454 pv->pe_size = 0;
1455 pv->pe_start = 0;
1456 pv->pe_count = 0;
1457 pv->pe_alloc_count = 0;
1458 pv->pe_align = 0;
1459 pv->pe_align_offset = 0;
1460 pv->fmt = NULL;
1461 pv->dev = dev;
1463 pv->status = ALLOCATABLE_PV;
1465 dm_list_init(&pv->tags);
1466 dm_list_init(&pv->segments);
1468 return pv;
1472 * pv_create - initialize a physical volume for use with a volume group
1474 * @fmt: format type
1475 * @dev: PV device to initialize
1476 * @size: size of the PV in sectors
1477 * @data_alignment: requested alignment of data
1478 * @data_alignment_offset: requested offset to aligned data
1479 * @pe_start: physical extent start
1480 * @existing_extent_count
1481 * @existing_extent_size
1482 * @pvmetadatacopies
1483 * @pvmetadatasize
1484 * @mdas
1486 * Returns:
1487 * PV handle - physical volume initialized successfully
1488 * NULL - invalid parameter or problem initializing the physical volume
1490 * Note:
1491 * FIXME: shorten argument list and replace with explict 'set' functions
1493 struct physical_volume *pv_create(const struct cmd_context *cmd,
1494 struct device *dev,
1495 struct id *id, uint64_t size,
1496 unsigned long data_alignment,
1497 unsigned long data_alignment_offset,
1498 uint64_t pe_start,
1499 uint32_t existing_extent_count,
1500 uint32_t existing_extent_size,
1501 int pvmetadatacopies,
1502 uint64_t pvmetadatasize, struct dm_list *mdas)
1504 const struct format_type *fmt = cmd->fmt;
1505 struct dm_pool *mem = fmt->cmd->mem;
1506 struct physical_volume *pv = _alloc_pv(mem, dev);
1508 if (!pv)
1509 return NULL;
1511 if (id)
1512 memcpy(&pv->id, id, sizeof(*id));
1513 else if (!id_create(&pv->id)) {
1514 log_error("Failed to create random uuid for %s.",
1515 dev_name(dev));
1516 goto bad;
1519 if (!dev_get_size(pv->dev, &pv->size)) {
1520 log_error("%s: Couldn't get size.", pv_dev_name(pv));
1521 goto bad;
1524 if (size) {
1525 if (size > pv->size)
1526 log_warn("WARNING: %s: Overriding real size. "
1527 "You could lose data.", pv_dev_name(pv));
1528 log_verbose("%s: Pretending size is %" PRIu64 " sectors.",
1529 pv_dev_name(pv), size);
1530 pv->size = size;
1533 if (pv->size < PV_MIN_SIZE) {
1534 log_error("%s: Size must exceed minimum of %ld sectors.",
1535 pv_dev_name(pv), PV_MIN_SIZE);
1536 goto bad;
1539 if (pv->size < data_alignment) {
1540 log_error("%s: Data alignment must not exceed device size.",
1541 pv_dev_name(pv));
1542 goto bad;
1545 pv->fmt = fmt;
1546 pv->vg_name = fmt->orphan_vg_name;
1548 if (!fmt->ops->pv_setup(fmt, pe_start, existing_extent_count,
1549 existing_extent_size, data_alignment,
1550 data_alignment_offset,
1551 pvmetadatacopies, pvmetadatasize, mdas,
1552 pv, NULL)) {
1553 log_error("%s: Format-specific setup of physical volume "
1554 "failed.", pv_dev_name(pv));
1555 goto bad;
1558 return pv;
1560 bad:
1561 _free_pv(mem, pv);
1562 return NULL;
1565 /* FIXME: liblvm todo - make into function that returns handle */
1566 struct pv_list *find_pv_in_vg(const struct volume_group *vg,
1567 const char *pv_name)
1569 return _find_pv_in_vg(vg, pv_name);
1572 static struct pv_list *_find_pv_in_vg(const struct volume_group *vg,
1573 const char *pv_name)
1575 struct pv_list *pvl;
1577 dm_list_iterate_items(pvl, &vg->pvs)
1578 if (pvl->pv->dev == dev_cache_get(pv_name, vg->cmd->filter))
1579 return pvl;
1581 return NULL;
1584 struct pv_list *find_pv_in_pv_list(const struct dm_list *pl,
1585 const struct physical_volume *pv)
1587 struct pv_list *pvl;
1589 dm_list_iterate_items(pvl, pl)
1590 if (pvl->pv == pv)
1591 return pvl;
1593 return NULL;
1596 int pv_is_in_vg(struct volume_group *vg, struct physical_volume *pv)
1598 struct pv_list *pvl;
1600 dm_list_iterate_items(pvl, &vg->pvs)
1601 if (pv == pvl->pv)
1602 return 1;
1604 return 0;
1608 * find_pv_in_vg_by_uuid - Find PV in VG by PV UUID
1609 * @vg: volume group to search
1610 * @id: UUID of the PV to match
1612 * Returns:
1613 * PV handle - if UUID of PV found in VG
1614 * NULL - invalid parameter or UUID of PV not found in VG
1616 * Note
1617 * FIXME - liblvm todo - make into function that takes VG handle
1619 struct physical_volume *find_pv_in_vg_by_uuid(const struct volume_group *vg,
1620 const struct id *id)
1622 return _find_pv_in_vg_by_uuid(vg, id);
1626 static struct physical_volume *_find_pv_in_vg_by_uuid(const struct volume_group *vg,
1627 const struct id *id)
1629 struct pv_list *pvl;
1631 dm_list_iterate_items(pvl, &vg->pvs)
1632 if (id_equal(&pvl->pv->id, id))
1633 return pvl->pv;
1635 return NULL;
1638 struct lv_list *find_lv_in_vg(const struct volume_group *vg,
1639 const char *lv_name)
1641 struct lv_list *lvl;
1642 const char *ptr;
1644 /* Use last component */
1645 if ((ptr = strrchr(lv_name, '/')))
1646 ptr++;
1647 else
1648 ptr = lv_name;
1650 dm_list_iterate_items(lvl, &vg->lvs)
1651 if (!strcmp(lvl->lv->name, ptr))
1652 return lvl;
1654 return NULL;
1657 struct lv_list *find_lv_in_lv_list(const struct dm_list *ll,
1658 const struct logical_volume *lv)
1660 struct lv_list *lvl;
1662 dm_list_iterate_items(lvl, ll)
1663 if (lvl->lv == lv)
1664 return lvl;
1666 return NULL;
1669 struct lv_list *find_lv_in_vg_by_lvid(struct volume_group *vg,
1670 const union lvid *lvid)
1672 struct lv_list *lvl;
1674 dm_list_iterate_items(lvl, &vg->lvs)
1675 if (!strncmp(lvl->lv->lvid.s, lvid->s, sizeof(*lvid)))
1676 return lvl;
1678 return NULL;
1681 struct logical_volume *find_lv(const struct volume_group *vg,
1682 const char *lv_name)
1684 struct lv_list *lvl = find_lv_in_vg(vg, lv_name);
1685 return lvl ? lvl->lv : NULL;
1688 struct physical_volume *find_pv(struct volume_group *vg, struct device *dev)
1690 struct pv_list *pvl;
1692 dm_list_iterate_items(pvl, &vg->pvs)
1693 if (dev == pvl->pv->dev)
1694 return pvl->pv;
1696 return NULL;
1699 /* FIXME: liblvm todo - make into function that returns handle */
1700 struct physical_volume *find_pv_by_name(struct cmd_context *cmd,
1701 const char *pv_name)
1703 return _find_pv_by_name(cmd, pv_name);
1707 static struct physical_volume *_find_pv_by_name(struct cmd_context *cmd,
1708 const char *pv_name)
1710 struct physical_volume *pv;
1712 if (!(pv = _pv_read(cmd, cmd->mem, pv_name, NULL, NULL, 1, 0))) {
1713 log_error("Physical volume %s not found", pv_name);
1714 return NULL;
1717 if (is_orphan_vg(pv->vg_name)) {
1718 /* If a PV has no MDAs - need to search all VGs for it */
1719 if (!scan_vgs_for_pvs(cmd))
1720 return_NULL;
1721 if (!(pv = _pv_read(cmd, cmd->mem, pv_name, NULL, NULL, 1, 0))) {
1722 log_error("Physical volume %s not found", pv_name);
1723 return NULL;
1727 if (is_orphan_vg(pv->vg_name)) {
1728 log_error("Physical volume %s not in a volume group", pv_name);
1729 return NULL;
1732 return pv;
1735 /* Find segment at a given logical extent in an LV */
1736 struct lv_segment *find_seg_by_le(const struct logical_volume *lv, uint32_t le)
1738 struct lv_segment *seg;
1740 dm_list_iterate_items(seg, &lv->segments)
1741 if (le >= seg->le && le < seg->le + seg->len)
1742 return seg;
1744 return NULL;
1747 struct lv_segment *first_seg(const struct logical_volume *lv)
1749 struct lv_segment *seg;
1751 dm_list_iterate_items(seg, &lv->segments)
1752 return seg;
1754 return NULL;
1757 /* Find segment at a given physical extent in a PV */
1758 struct pv_segment *find_peg_by_pe(const struct physical_volume *pv, uint32_t pe)
1760 struct pv_segment *peg;
1762 dm_list_iterate_items(peg, &pv->segments)
1763 if (pe >= peg->pe && pe < peg->pe + peg->len)
1764 return peg;
1766 return NULL;
1769 int vg_remove_mdas(struct volume_group *vg)
1771 struct metadata_area *mda;
1773 /* FIXME Improve recovery situation? */
1774 /* Remove each copy of the metadata */
1775 dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
1776 if (mda->ops->vg_remove &&
1777 !mda->ops->vg_remove(vg->fid, vg, mda))
1778 return_0;
1781 return 1;
1784 unsigned snapshot_count(const struct volume_group *vg)
1786 struct lv_list *lvl;
1787 unsigned num_snapshots = 0;
1789 dm_list_iterate_items(lvl, &vg->lvs)
1790 if (lv_is_cow(lvl->lv))
1791 num_snapshots++;
1793 return num_snapshots;
1796 unsigned vg_visible_lvs(const struct volume_group *vg)
1798 struct lv_list *lvl;
1799 unsigned lv_count = 0;
1801 dm_list_iterate_items(lvl, &vg->lvs) {
1802 if (lv_is_visible(lvl->lv))
1803 lv_count++;
1806 return lv_count;
1810 * Determine whether two vgs are compatible for merging.
1812 int vgs_are_compatible(struct cmd_context *cmd __attribute((unused)),
1813 struct volume_group *vg_from,
1814 struct volume_group *vg_to)
1816 struct lv_list *lvl1, *lvl2;
1817 struct pv_list *pvl;
1818 char *name1, *name2;
1820 if (lvs_in_vg_activated(vg_from)) {
1821 log_error("Logical volumes in \"%s\" must be inactive",
1822 vg_from->name);
1823 return 0;
1826 /* Check compatibility */
1827 if (vg_to->extent_size != vg_from->extent_size) {
1828 log_error("Extent sizes differ: %d (%s) and %d (%s)",
1829 vg_to->extent_size, vg_to->name,
1830 vg_from->extent_size, vg_from->name);
1831 return 0;
1834 if (vg_to->max_pv &&
1835 (vg_to->max_pv < vg_to->pv_count + vg_from->pv_count)) {
1836 log_error("Maximum number of physical volumes (%d) exceeded "
1837 " for \"%s\" and \"%s\"", vg_to->max_pv, vg_to->name,
1838 vg_from->name);
1839 return 0;
1842 if (vg_to->max_lv &&
1843 (vg_to->max_lv < vg_visible_lvs(vg_to) + vg_visible_lvs(vg_from))) {
1844 log_error("Maximum number of logical volumes (%d) exceeded "
1845 " for \"%s\" and \"%s\"", vg_to->max_lv, vg_to->name,
1846 vg_from->name);
1847 return 0;
1850 /* Metadata types must be the same */
1851 if (vg_to->fid->fmt != vg_from->fid->fmt) {
1852 log_error("Metadata types differ for \"%s\" and \"%s\"",
1853 vg_to->name, vg_from->name);
1854 return 0;
1857 /* Clustering attribute must be the same */
1858 if (vg_is_clustered(vg_to) != vg_is_clustered(vg_from)) {
1859 log_error("Clustered attribute differs for \"%s\" and \"%s\"",
1860 vg_to->name, vg_from->name);
1861 return 0;
1864 /* Check no conflicts with LV names */
1865 dm_list_iterate_items(lvl1, &vg_to->lvs) {
1866 name1 = lvl1->lv->name;
1868 dm_list_iterate_items(lvl2, &vg_from->lvs) {
1869 name2 = lvl2->lv->name;
1871 if (!strcmp(name1, name2)) {
1872 log_error("Duplicate logical volume "
1873 "name \"%s\" "
1874 "in \"%s\" and \"%s\"",
1875 name1, vg_to->name, vg_from->name);
1876 return 0;
1881 /* Check no PVs are constructed from either VG */
1882 dm_list_iterate_items(pvl, &vg_to->pvs) {
1883 if (pv_uses_vg(pvl->pv, vg_from)) {
1884 log_error("Physical volume %s might be constructed "
1885 "from same volume group %s.",
1886 pv_dev_name(pvl->pv), vg_from->name);
1887 return 0;
1891 dm_list_iterate_items(pvl, &vg_from->pvs) {
1892 if (pv_uses_vg(pvl->pv, vg_to)) {
1893 log_error("Physical volume %s might be constructed "
1894 "from same volume group %s.",
1895 pv_dev_name(pvl->pv), vg_to->name);
1896 return 0;
1900 return 1;
1903 struct _lv_postorder_baton {
1904 int (*fn)(struct logical_volume *lv, void *data);
1905 void *data;
1908 static int _lv_postorder_visit(struct logical_volume *,
1909 int (*fn)(struct logical_volume *lv, void *data),
1910 void *data);
1912 static int _lv_postorder_level(struct logical_volume *lv, void *data)
1914 struct _lv_postorder_baton *baton = data;
1915 if (lv->status & POSTORDER_OPEN_FLAG)
1916 return 1; // a data structure loop has closed...
1917 lv->status |= POSTORDER_OPEN_FLAG;
1918 int r =_lv_postorder_visit(lv, baton->fn, baton->data);
1919 lv->status &= ~POSTORDER_OPEN_FLAG;
1920 lv->status |= POSTORDER_FLAG;
1921 return r;
1924 static int _lv_each_dependency(struct logical_volume *lv,
1925 int (*fn)(struct logical_volume *lv, void *data),
1926 void *data)
1928 int i, s;
1929 struct lv_segment *lvseg;
1931 struct logical_volume *deps[] = {
1932 lv->snapshot ? lv->snapshot->origin : 0,
1933 lv->snapshot ? lv->snapshot->cow : 0 };
1934 for (i = 0; i < sizeof(deps) / sizeof(*deps); ++i) {
1935 if (deps[i] && !fn(deps[i], data))
1936 return_0;
1939 dm_list_iterate_items(lvseg, &lv->segments) {
1940 if (lvseg->log_lv && !fn(lvseg->log_lv, data))
1941 return_0;
1942 for (s = 0; s < lvseg->area_count; ++s) {
1943 if (seg_type(lvseg, s) == AREA_LV && !fn(seg_lv(lvseg,s), data))
1944 return_0;
1947 return 1;
1950 static int _lv_postorder_cleanup(struct logical_volume *lv, void *data)
1952 if (!(lv->status & POSTORDER_FLAG))
1953 return 1;
1954 lv->status &= ~POSTORDER_FLAG;
1956 if (!_lv_each_dependency(lv, _lv_postorder_cleanup, data))
1957 return_0;
1958 return 1;
1961 static int _lv_postorder_visit(struct logical_volume *lv,
1962 int (*fn)(struct logical_volume *lv, void *data),
1963 void *data)
1965 struct _lv_postorder_baton baton;
1966 int r;
1968 if (lv->status & POSTORDER_FLAG)
1969 return 1;
1971 baton.fn = fn;
1972 baton.data = data;
1973 r = _lv_each_dependency(lv, _lv_postorder_level, &baton);
1974 if (r)
1975 r = fn(lv, data);
1977 return r;
1981 * This will walk the LV dependency graph in depth-first order and in the
1982 * postorder, call a callback function "fn". The void *data is passed along all
1983 * the calls. The callback may return zero to indicate an error and terminate
1984 * the depth-first walk. The error is propagated to return value of
1985 * _lv_postorder.
1987 static int _lv_postorder(struct logical_volume *lv,
1988 int (*fn)(struct logical_volume *lv, void *data),
1989 void *data)
1991 int r;
1992 r = _lv_postorder_visit(lv, fn, data);
1993 _lv_postorder_cleanup(lv, 0);
1994 return r;
1997 struct _lv_mark_if_partial_baton {
1998 int partial;
2001 static int _lv_mark_if_partial_collect(struct logical_volume *lv, void *data)
2003 struct _lv_mark_if_partial_baton *baton = data;
2004 if (lv->status & PARTIAL_LV)
2005 baton->partial = 1;
2007 return 1;
2010 static int _lv_mark_if_partial_single(struct logical_volume *lv, void *data)
2012 int s;
2013 struct _lv_mark_if_partial_baton baton;
2014 struct lv_segment *lvseg;
2016 dm_list_iterate_items(lvseg, &lv->segments) {
2017 for (s = 0; s < lvseg->area_count; ++s) {
2018 if (seg_type(lvseg, s) == AREA_PV) {
2019 if (seg_pv(lvseg, s)->status & MISSING_PV)
2020 lv->status |= PARTIAL_LV;
2025 baton.partial = 0;
2026 _lv_each_dependency(lv, _lv_mark_if_partial_collect, &baton);
2028 if (baton.partial)
2029 lv->status |= PARTIAL_LV;
2031 return 1;
2034 static int _lv_mark_if_partial(struct logical_volume *lv)
2036 return _lv_postorder(lv, _lv_mark_if_partial_single, NULL);
2040 * Mark LVs with missing PVs using PARTIAL_LV status flag. The flag is
2041 * propagated transitively, so LVs referencing other LVs are marked
2042 * partial as well, if any of their referenced LVs are marked partial.
2044 static int _vg_mark_partial_lvs(struct volume_group *vg)
2046 struct logical_volume *lv;
2047 struct lv_list *lvl;
2049 dm_list_iterate_items(lvl, &vg->lvs) {
2050 lv = lvl->lv;
2051 if (!_lv_mark_if_partial(lv))
2052 return_0;
2054 return 1;
2058 * Be sure that all PV devices have cached read ahead in dev-cache
2059 * Currently it takes read_ahead from first PV segment only
2061 static int _lv_read_ahead_single(struct logical_volume *lv, void *data)
2063 struct lv_segment *seg = first_seg(lv);
2064 uint32_t seg_read_ahead = 0, *read_ahead = data;
2066 if (seg && seg->area_count && seg_type(seg, 0) == AREA_PV)
2067 dev_get_read_ahead(seg_pv(seg, 0)->dev, &seg_read_ahead);
2069 if (seg_read_ahead > *read_ahead)
2070 *read_ahead = seg_read_ahead;
2072 return 1;
2076 * Calculate readahead for logical volume from underlying PV devices.
2077 * If read_ahead is NULL, only ensure that readahead of PVs are preloaded
2078 * into PV struct device in dev cache.
2080 void lv_calculate_readahead(const struct logical_volume *lv, uint32_t *read_ahead)
2082 uint32_t _read_ahead = 0;
2084 if (lv->read_ahead == DM_READ_AHEAD_AUTO)
2085 _lv_postorder((struct logical_volume *)lv, _lv_read_ahead_single, &_read_ahead);
2087 if (read_ahead) {
2088 log_debug("Calculated readahead of LV %s is %u", lv->name, _read_ahead);
2089 *read_ahead = _read_ahead;
2093 int vg_validate(struct volume_group *vg)
2095 struct pv_list *pvl, *pvl2;
2096 struct lv_list *lvl, *lvl2;
2097 char uuid[64] __attribute((aligned(8)));
2098 int r = 1;
2099 uint32_t hidden_lv_count = 0;
2101 /* FIXME Also check there's no data/metadata overlap */
2103 dm_list_iterate_items(pvl, &vg->pvs) {
2104 dm_list_iterate_items(pvl2, &vg->pvs) {
2105 if (pvl == pvl2)
2106 break;
2107 if (id_equal(&pvl->pv->id,
2108 &pvl2->pv->id)) {
2109 if (!id_write_format(&pvl->pv->id, uuid,
2110 sizeof(uuid)))
2111 stack;
2112 log_error("Internal error: Duplicate PV id "
2113 "%s detected for %s in %s.",
2114 uuid, pv_dev_name(pvl->pv),
2115 vg->name);
2116 r = 0;
2120 if (strcmp(pvl->pv->vg_name, vg->name)) {
2121 log_error("Internal error: VG name for PV %s is corrupted",
2122 pv_dev_name(pvl->pv));
2123 r = 0;
2127 if (!check_pv_segments(vg)) {
2128 log_error("Internal error: PV segments corrupted in %s.",
2129 vg->name);
2130 r = 0;
2134 * Count all non-snapshot invisible LVs
2136 dm_list_iterate_items(lvl, &vg->lvs) {
2137 if (lvl->lv->status & VISIBLE_LV)
2138 continue;
2140 /* snapshots */
2141 if (lv_is_cow(lvl->lv))
2142 continue;
2144 /* virtual origins are always hidden */
2145 if (lv_is_origin(lvl->lv) && !lv_is_virtual_origin(lvl->lv))
2146 continue;
2148 /* count other non-snapshot invisible volumes */
2149 hidden_lv_count++;
2152 * FIXME: add check for unreferenced invisible LVs
2153 * - snapshot cow & origin
2154 * - mirror log & images
2155 * - mirror conversion volumes (_mimagetmp*)
2160 * all volumes = visible LVs + snapshot_cows + invisible LVs
2162 if (((uint32_t) dm_list_size(&vg->lvs)) !=
2163 vg_visible_lvs(vg) + snapshot_count(vg) + hidden_lv_count) {
2164 log_error("Internal error: #internal LVs (%u) != #LVs (%"
2165 PRIu32 ") + #snapshots (%" PRIu32 ") + #internal LVs %u in VG %s",
2166 dm_list_size(&vg->lvs), vg_visible_lvs(vg),
2167 snapshot_count(vg), hidden_lv_count, vg->name);
2168 r = 0;
2171 dm_list_iterate_items(lvl, &vg->lvs) {
2172 dm_list_iterate_items(lvl2, &vg->lvs) {
2173 if (lvl == lvl2)
2174 break;
2175 if (!strcmp(lvl->lv->name, lvl2->lv->name)) {
2176 log_error("Internal error: Duplicate LV name "
2177 "%s detected in %s.", lvl->lv->name,
2178 vg->name);
2179 r = 0;
2181 if (id_equal(&lvl->lv->lvid.id[1],
2182 &lvl2->lv->lvid.id[1])) {
2183 if (!id_write_format(&lvl->lv->lvid.id[1], uuid,
2184 sizeof(uuid)))
2185 stack;
2186 log_error("Internal error: Duplicate LV id "
2187 "%s detected for %s and %s in %s.",
2188 uuid, lvl->lv->name, lvl2->lv->name,
2189 vg->name);
2190 r = 0;
2195 dm_list_iterate_items(lvl, &vg->lvs) {
2196 if (!check_lv_segments(lvl->lv, 1)) {
2197 log_error("Internal error: LV segments corrupted in %s.",
2198 lvl->lv->name);
2199 r = 0;
2203 if (!(vg->fid->fmt->features & FMT_UNLIMITED_VOLS) &&
2204 (!vg->max_lv || !vg->max_pv)) {
2205 log_error("Internal error: Volume group %s has limited PV/LV count"
2206 " but limit is not set.", vg->name);
2207 r = 0;
2210 if (vg_max_lv_reached(vg))
2211 stack;
2213 return r;
2217 * After vg_write() returns success,
2218 * caller MUST call either vg_commit() or vg_revert()
2220 int vg_write(struct volume_group *vg)
2222 struct dm_list *mdah;
2223 struct metadata_area *mda;
2225 if (!vg_validate(vg))
2226 return_0;
2228 if (vg->status & PARTIAL_VG) {
2229 log_error("Cannot update partial volume group %s.", vg->name);
2230 return 0;
2233 if (vg_missing_pv_count(vg) && !vg->cmd->handles_missing_pvs) {
2234 log_error("Cannot update volume group %s while physical "
2235 "volumes are missing.", vg->name);
2236 return 0;
2239 if (vg_has_unknown_segments(vg) && !vg->cmd->handles_unknown_segments) {
2240 log_error("Cannot update volume group %s with unknown segments in it!",
2241 vg->name);
2242 return 0;
2246 if (dm_list_empty(&vg->fid->metadata_areas)) {
2247 log_error("Aborting vg_write: No metadata areas to write to!");
2248 return 0;
2251 if (!drop_cached_metadata(vg)) {
2252 log_error("Unable to drop cached metadata for VG %s.", vg->name);
2253 return 0;
2256 vg->seqno++;
2258 /* Write to each copy of the metadata area */
2259 dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
2260 if (!mda->ops->vg_write) {
2261 log_error("Format does not support writing volume"
2262 "group metadata areas");
2263 /* Revert */
2264 dm_list_uniterate(mdah, &vg->fid->metadata_areas, &mda->list) {
2265 mda = dm_list_item(mdah, struct metadata_area);
2267 if (mda->ops->vg_revert &&
2268 !mda->ops->vg_revert(vg->fid, vg, mda)) {
2269 stack;
2272 return 0;
2274 if (!mda->ops->vg_write(vg->fid, vg, mda)) {
2275 stack;
2276 /* Revert */
2277 dm_list_uniterate(mdah, &vg->fid->metadata_areas, &mda->list) {
2278 mda = dm_list_item(mdah, struct metadata_area);
2280 if (mda->ops->vg_revert &&
2281 !mda->ops->vg_revert(vg->fid, vg, mda)) {
2282 stack;
2285 return 0;
2289 /* Now pre-commit each copy of the new metadata */
2290 dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
2291 if (mda->ops->vg_precommit &&
2292 !mda->ops->vg_precommit(vg->fid, vg, mda)) {
2293 stack;
2294 /* Revert */
2295 dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
2296 if (mda->ops->vg_revert &&
2297 !mda->ops->vg_revert(vg->fid, vg, mda)) {
2298 stack;
2301 return 0;
2305 return 1;
2308 /* Commit pending changes */
2309 int vg_commit(struct volume_group *vg)
2311 struct metadata_area *mda;
2312 int cache_updated = 0;
2313 int failed = 0;
2315 if (!vgname_is_locked(vg->name)) {
2316 log_error("Internal error: Attempt to write new VG metadata "
2317 "without locking %s", vg->name);
2318 return cache_updated;
2321 /* Commit to each copy of the metadata area */
2322 dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
2323 failed = 0;
2324 if (mda->ops->vg_commit &&
2325 !mda->ops->vg_commit(vg->fid, vg, mda)) {
2326 stack;
2327 failed = 1;
2329 /* Update cache first time we succeed */
2330 if (!failed && !cache_updated) {
2331 lvmcache_update_vg(vg, 0);
2332 cache_updated = 1;
2336 /* If update failed, remove any cached precommitted metadata. */
2337 if (!cache_updated && !drop_cached_metadata(vg))
2338 log_error("Attempt to drop cached metadata failed "
2339 "after commit for VG %s.", vg->name);
2341 /* If at least one mda commit succeeded, it was committed */
2342 return cache_updated;
2345 /* Don't commit any pending changes */
2346 int vg_revert(struct volume_group *vg)
2348 struct metadata_area *mda;
2350 dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
2351 if (mda->ops->vg_revert &&
2352 !mda->ops->vg_revert(vg->fid, vg, mda)) {
2353 stack;
2357 if (!drop_cached_metadata(vg))
2358 log_error("Attempt to drop cached metadata failed "
2359 "after reverted update for VG %s.", vg->name);
2361 return 1;
2364 /* Make orphan PVs look like a VG */
2365 static struct volume_group *_vg_read_orphans(struct cmd_context *cmd,
2366 const char *orphan_vgname)
2368 struct lvmcache_vginfo *vginfo;
2369 struct lvmcache_info *info;
2370 struct pv_list *pvl;
2371 struct volume_group *vg;
2372 struct physical_volume *pv;
2373 struct dm_pool *mem;
2375 lvmcache_label_scan(cmd, 0);
2377 if (!(vginfo = vginfo_from_vgname(orphan_vgname, NULL)))
2378 return_NULL;
2380 if (!(mem = dm_pool_create("vg_read orphan", VG_MEMPOOL_CHUNK)))
2381 return_NULL;
2383 if (!(vg = dm_pool_zalloc(mem, sizeof(*vg)))) {
2384 log_error("vg allocation failed");
2385 return NULL;
2387 dm_list_init(&vg->pvs);
2388 dm_list_init(&vg->lvs);
2389 dm_list_init(&vg->tags);
2390 dm_list_init(&vg->removed_pvs);
2391 vg->vgmem = mem;
2392 vg->cmd = cmd;
2393 if (!(vg->name = dm_pool_strdup(mem, orphan_vgname))) {
2394 log_error("vg name allocation failed");
2395 goto bad;
2398 /* create format instance with appropriate metadata area */
2399 if (!(vg->fid = vginfo->fmt->ops->create_instance(vginfo->fmt,
2400 orphan_vgname, NULL,
2401 NULL))) {
2402 log_error("Failed to create format instance");
2403 goto bad;
2406 dm_list_iterate_items(info, &vginfo->infos) {
2407 if (!(pv = _pv_read(cmd, mem, dev_name(info->dev), NULL, NULL, 1, 0))) {
2408 continue;
2410 if (!(pvl = dm_pool_zalloc(mem, sizeof(*pvl)))) {
2411 log_error("pv_list allocation failed");
2412 goto bad;
2414 pvl->pv = pv;
2415 dm_list_add(&vg->pvs, &pvl->list);
2416 vg->pv_count++;
2419 return vg;
2420 bad:
2421 dm_pool_destroy(mem);
2422 return NULL;
2425 static int _update_pv_list(struct dm_pool *pvmem, struct dm_list *all_pvs, struct volume_group *vg)
2427 struct pv_list *pvl, *pvl2;
2429 dm_list_iterate_items(pvl, &vg->pvs) {
2430 dm_list_iterate_items(pvl2, all_pvs) {
2431 if (pvl->pv->dev == pvl2->pv->dev)
2432 goto next_pv;
2436 * PV is not on list so add it.
2438 if (!(pvl2 = _copy_pvl(pvmem, pvl))) {
2439 log_error("pv_list allocation for '%s' failed",
2440 pv_dev_name(pvl->pv));
2441 return 0;
2443 dm_list_add(all_pvs, &pvl2->list);
2444 next_pv:
2448 return 1;
2451 int vg_missing_pv_count(const struct volume_group *vg)
2453 int ret = 0;
2454 struct pv_list *pvl;
2455 dm_list_iterate_items(pvl, &vg->pvs) {
2456 if (pvl->pv->status & MISSING_PV)
2457 ++ ret;
2459 return ret;
2462 /* Caller sets consistent to 1 if it's safe for vg_read_internal to correct
2463 * inconsistent metadata on disk (i.e. the VG write lock is held).
2464 * This guarantees only consistent metadata is returned.
2465 * If consistent is 0, caller must check whether consistent == 1 on return
2466 * and take appropriate action if it isn't (e.g. abort; get write lock
2467 * and call vg_read_internal again).
2469 * If precommitted is set, use precommitted metadata if present.
2471 * Either of vgname or vgid may be NULL.
2473 static struct volume_group *_vg_read(struct cmd_context *cmd,
2474 const char *vgname,
2475 const char *vgid,
2476 int *consistent, unsigned precommitted)
2478 struct format_instance *fid;
2479 const struct format_type *fmt;
2480 struct volume_group *vg, *correct_vg = NULL;
2481 struct metadata_area *mda;
2482 struct lvmcache_info *info;
2483 int inconsistent = 0;
2484 int inconsistent_vgid = 0;
2485 int inconsistent_pvs = 0;
2486 unsigned use_precommitted = precommitted;
2487 unsigned saved_handles_missing_pvs = cmd->handles_missing_pvs;
2488 struct dm_list *pvids;
2489 struct pv_list *pvl, *pvl2;
2490 struct dm_list all_pvs;
2491 char uuid[64] __attribute((aligned(8)));
2493 if (is_orphan_vg(vgname)) {
2494 if (use_precommitted) {
2495 log_error("Internal error: vg_read_internal requires vgname "
2496 "with pre-commit.");
2497 return NULL;
2499 *consistent = 1;
2500 return _vg_read_orphans(cmd, vgname);
2503 if ((correct_vg = lvmcache_get_vg(vgid, precommitted))) {
2504 if (vg_missing_pv_count(correct_vg)) {
2505 log_verbose("There are %d physical volumes missing.",
2506 vg_missing_pv_count(correct_vg));
2507 _vg_mark_partial_lvs(correct_vg);
2509 *consistent = 1;
2510 return correct_vg;
2513 /* Find the vgname in the cache */
2514 /* If it's not there we must do full scan to be completely sure */
2515 if (!(fmt = fmt_from_vgname(vgname, vgid))) {
2516 lvmcache_label_scan(cmd, 0);
2517 if (!(fmt = fmt_from_vgname(vgname, vgid))) {
2518 if (memlock())
2519 return_NULL;
2520 lvmcache_label_scan(cmd, 2);
2521 if (!(fmt = fmt_from_vgname(vgname, vgid)))
2522 return_NULL;
2526 /* Now determine the correct vgname if none was supplied */
2527 if (!vgname && !(vgname = vgname_from_vgid(cmd->mem, vgid)))
2528 return_NULL;
2530 if (use_precommitted && !(fmt->features & FMT_PRECOMMIT))
2531 use_precommitted = 0;
2533 /* create format instance with appropriate metadata area */
2534 if (!(fid = fmt->ops->create_instance(fmt, vgname, vgid, NULL))) {
2535 log_error("Failed to create format instance");
2536 return NULL;
2539 /* Store pvids for later so we can check if any are missing */
2540 if (!(pvids = lvmcache_get_pvids(cmd, vgname, vgid)))
2541 return_NULL;
2543 /* Ensure contents of all metadata areas match - else do recovery */
2544 dm_list_iterate_items(mda, &fid->metadata_areas) {
2545 if ((use_precommitted &&
2546 !(vg = mda->ops->vg_read_precommit(fid, vgname, mda))) ||
2547 (!use_precommitted &&
2548 !(vg = mda->ops->vg_read(fid, vgname, mda)))) {
2549 inconsistent = 1;
2550 vg_release(vg);
2551 continue;
2553 if (!correct_vg) {
2554 correct_vg = vg;
2555 continue;
2558 /* FIXME Also ensure contents same - checksum compare? */
2559 if (correct_vg->seqno != vg->seqno) {
2560 inconsistent = 1;
2561 if (vg->seqno > correct_vg->seqno) {
2562 vg_release(correct_vg);
2563 correct_vg = vg;
2567 if (vg != correct_vg)
2568 vg_release(vg);
2571 /* Ensure every PV in the VG was in the cache */
2572 if (correct_vg) {
2574 * If the VG has PVs without mdas, they may still be
2575 * orphans in the cache: update the cache state here.
2577 if (!inconsistent &&
2578 dm_list_size(&correct_vg->pvs) > dm_list_size(pvids)) {
2579 dm_list_iterate_items(pvl, &correct_vg->pvs) {
2580 if (!pvl->pv->dev) {
2581 inconsistent_pvs = 1;
2582 break;
2585 if (str_list_match_item(pvids, pvl->pv->dev->pvid))
2586 continue;
2589 * PV not marked as belonging to this VG in cache.
2590 * Check it's an orphan without metadata area.
2592 if (!(info = info_from_pvid(pvl->pv->dev->pvid, 1)) ||
2593 !info->vginfo || !is_orphan_vg(info->vginfo->vgname) ||
2594 dm_list_size(&info->mdas)) {
2595 inconsistent_pvs = 1;
2596 break;
2600 /* If the check passed, let's update VG and recalculate pvids */
2601 if (!inconsistent_pvs) {
2602 log_debug("Updating cache for PVs without mdas "
2603 "in VG %s.", vgname);
2604 lvmcache_update_vg(correct_vg, use_precommitted);
2606 if (!(pvids = lvmcache_get_pvids(cmd, vgname, vgid)))
2607 return_NULL;
2611 if (dm_list_size(&correct_vg->pvs) != dm_list_size(pvids)
2612 + vg_missing_pv_count(correct_vg)) {
2613 log_debug("Cached VG %s had incorrect PV list",
2614 vgname);
2616 if (memlock())
2617 inconsistent = 1;
2618 else {
2619 vg_release(correct_vg);
2620 correct_vg = NULL;
2622 } else dm_list_iterate_items(pvl, &correct_vg->pvs) {
2623 if (pvl->pv->status & MISSING_PV)
2624 continue;
2625 if (!str_list_match_item(pvids, pvl->pv->dev->pvid)) {
2626 log_debug("Cached VG %s had incorrect PV list",
2627 vgname);
2628 vg_release(correct_vg);
2629 correct_vg = NULL;
2630 break;
2635 dm_list_init(&all_pvs);
2637 /* Failed to find VG where we expected it - full scan and retry */
2638 if (!correct_vg) {
2639 inconsistent = 0;
2641 if (memlock())
2642 return_NULL;
2643 lvmcache_label_scan(cmd, 2);
2644 if (!(fmt = fmt_from_vgname(vgname, vgid)))
2645 return_NULL;
2647 if (precommitted && !(fmt->features & FMT_PRECOMMIT))
2648 use_precommitted = 0;
2650 /* create format instance with appropriate metadata area */
2651 if (!(fid = fmt->ops->create_instance(fmt, vgname, vgid, NULL))) {
2652 log_error("Failed to create format instance");
2653 return NULL;
2656 /* Ensure contents of all metadata areas match - else recover */
2657 dm_list_iterate_items(mda, &fid->metadata_areas) {
2658 if ((use_precommitted &&
2659 !(vg = mda->ops->vg_read_precommit(fid, vgname,
2660 mda))) ||
2661 (!use_precommitted &&
2662 !(vg = mda->ops->vg_read(fid, vgname, mda)))) {
2663 inconsistent = 1;
2664 continue;
2666 if (!correct_vg) {
2667 correct_vg = vg;
2668 if (!_update_pv_list(cmd->mem, &all_pvs, correct_vg)) {
2669 vg_release(vg);
2670 return_NULL;
2672 continue;
2675 if (strncmp((char *)vg->id.uuid,
2676 (char *)correct_vg->id.uuid, ID_LEN)) {
2677 inconsistent = 1;
2678 inconsistent_vgid = 1;
2681 /* FIXME Also ensure contents same - checksums same? */
2682 if (correct_vg->seqno != vg->seqno) {
2683 inconsistent = 1;
2684 if (!_update_pv_list(cmd->mem, &all_pvs, vg)) {
2685 vg_release(vg);
2686 vg_release(correct_vg);
2687 return_NULL;
2689 if (vg->seqno > correct_vg->seqno) {
2690 vg_release(correct_vg);
2691 correct_vg = vg;
2695 if (vg != correct_vg)
2696 vg_release(vg);
2699 /* Give up looking */
2700 if (!correct_vg)
2701 return_NULL;
2704 lvmcache_update_vg(correct_vg, use_precommitted);
2706 if (inconsistent) {
2707 /* FIXME Test should be if we're *using* precommitted metadata not if we were searching for it */
2708 if (use_precommitted) {
2709 log_error("Inconsistent pre-commit metadata copies "
2710 "for volume group %s", vgname);
2711 vg_release(correct_vg);
2712 return NULL;
2715 if (!*consistent)
2716 return correct_vg;
2718 /* Don't touch if vgids didn't match */
2719 if (inconsistent_vgid) {
2720 log_error("Inconsistent metadata UUIDs found for "
2721 "volume group %s", vgname);
2722 *consistent = 0;
2723 return correct_vg;
2726 log_warn("WARNING: Inconsistent metadata found for VG %s - updating "
2727 "to use version %u", vgname, correct_vg->seqno);
2729 cmd->handles_missing_pvs = 1;
2730 if (!vg_write(correct_vg)) {
2731 log_error("Automatic metadata correction failed");
2732 vg_release(correct_vg);
2733 cmd->handles_missing_pvs = saved_handles_missing_pvs;
2734 return NULL;
2736 cmd->handles_missing_pvs = saved_handles_missing_pvs;
2738 if (!vg_commit(correct_vg)) {
2739 log_error("Automatic metadata correction commit "
2740 "failed");
2741 vg_release(correct_vg);
2742 return NULL;
2745 dm_list_iterate_items(pvl, &all_pvs) {
2746 dm_list_iterate_items(pvl2, &correct_vg->pvs) {
2747 if (pvl->pv->dev == pvl2->pv->dev)
2748 goto next_pv;
2750 if (!id_write_format(&pvl->pv->id, uuid, sizeof(uuid))) {
2751 vg_release(correct_vg);
2752 return_NULL;
2754 log_error("Removing PV %s (%s) that no longer belongs to VG %s",
2755 pv_dev_name(pvl->pv), uuid, correct_vg->name);
2756 if (!pv_write_orphan(cmd, pvl->pv)) {
2757 vg_release(correct_vg);
2758 return_NULL;
2760 next_pv:
2765 if (vg_missing_pv_count(correct_vg)) {
2766 log_verbose("There are %d physical volumes missing.",
2767 vg_missing_pv_count(correct_vg));
2768 _vg_mark_partial_lvs(correct_vg);
2771 if ((correct_vg->status & PVMOVE) && !pvmove_mode()) {
2772 log_error("WARNING: Interrupted pvmove detected in "
2773 "volume group %s", correct_vg->name);
2774 log_error("Please restore the metadata by running "
2775 "vgcfgrestore.");
2776 vg_release(correct_vg);
2777 return NULL;
2780 *consistent = 1;
2781 return correct_vg;
2784 struct volume_group *vg_read_internal(struct cmd_context *cmd, const char *vgname,
2785 const char *vgid, int *consistent)
2787 struct volume_group *vg;
2788 struct lv_list *lvl;
2790 if (!(vg = _vg_read(cmd, vgname, vgid, consistent, 0)))
2791 return NULL;
2793 if (!check_pv_segments(vg)) {
2794 log_error("Internal error: PV segments corrupted in %s.",
2795 vg->name);
2796 vg_release(vg);
2797 return NULL;
2800 dm_list_iterate_items(lvl, &vg->lvs) {
2801 if (!check_lv_segments(lvl->lv, 1)) {
2802 log_error("Internal error: LV segments corrupted in %s.",
2803 lvl->lv->name);
2804 vg_release(vg);
2805 return NULL;
2809 return vg;
2812 void vg_release(struct volume_group *vg)
2814 if (!vg || !vg->vgmem)
2815 return;
2817 if (vg->cmd && vg->vgmem == vg->cmd->mem)
2818 log_error("Internal error: global memory pool used for VG %s",
2819 vg->name);
2821 dm_pool_destroy(vg->vgmem);
2824 /* This is only called by lv_from_lvid, which is only called from
2825 * activate.c so we know the appropriate VG lock is already held and
2826 * the vg_read_internal is therefore safe.
2828 static struct volume_group *_vg_read_by_vgid(struct cmd_context *cmd,
2829 const char *vgid,
2830 unsigned precommitted)
2832 const char *vgname;
2833 struct dm_list *vgnames;
2834 struct volume_group *vg = NULL;
2835 struct lvmcache_vginfo *vginfo;
2836 struct str_list *strl;
2837 int consistent = 0;
2839 /* Is corresponding vgname already cached? */
2840 if ((vginfo = vginfo_from_vgid(vgid)) &&
2841 vginfo->vgname && !is_orphan_vg(vginfo->vgname)) {
2842 if ((vg = _vg_read(cmd, NULL, vgid,
2843 &consistent, precommitted)) &&
2844 !strncmp((char *)vg->id.uuid, vgid, ID_LEN)) {
2846 if (!consistent) {
2847 log_error("Volume group %s metadata is "
2848 "inconsistent", vg->name);
2850 return vg;
2852 vg_release(vg);
2855 /* Mustn't scan if memory locked: ensure cache gets pre-populated! */
2856 if (memlock())
2857 goto out;
2859 /* FIXME Need a genuine read by ID here - don't vg_read_internal by name! */
2860 /* FIXME Disabled vgrenames while active for now because we aren't
2861 * allowed to do a full scan here any more. */
2863 // The slow way - full scan required to cope with vgrename
2864 if (!(vgnames = get_vgnames(cmd, 2))) {
2865 log_error("vg_read_by_vgid: get_vgnames failed");
2866 goto out;
2869 dm_list_iterate_items(strl, vgnames) {
2870 vgname = strl->str;
2871 if (!vgname || is_orphan_vg(vgname))
2872 continue; // FIXME Unnecessary?
2873 consistent = 0;
2874 if ((vg = _vg_read(cmd, vgname, vgid, &consistent,
2875 precommitted)) &&
2876 !strncmp((char *)vg->id.uuid, vgid, ID_LEN)) {
2878 if (!consistent) {
2879 log_error("Volume group %s metadata is "
2880 "inconsistent", vgname);
2881 goto out;
2883 return vg;
2887 out:
2888 vg_release(vg);
2889 return NULL;
2892 /* Only called by activate.c */
2893 struct logical_volume *lv_from_lvid(struct cmd_context *cmd, const char *lvid_s,
2894 unsigned precommitted)
2896 struct lv_list *lvl;
2897 struct volume_group *vg;
2898 const union lvid *lvid;
2900 lvid = (const union lvid *) lvid_s;
2902 log_very_verbose("Finding volume group for uuid %s", lvid_s);
2903 if (!(vg = _vg_read_by_vgid(cmd, (char *)lvid->id[0].uuid, precommitted))) {
2904 log_error("Volume group for uuid not found: %s", lvid_s);
2905 return NULL;
2908 log_verbose("Found volume group \"%s\"", vg->name);
2909 if (vg->status & EXPORTED_VG) {
2910 log_error("Volume group \"%s\" is exported", vg->name);
2911 goto out;
2913 if (!(lvl = find_lv_in_vg_by_lvid(vg, lvid))) {
2914 log_very_verbose("Can't find logical volume id %s", lvid_s);
2915 goto out;
2918 return lvl->lv;
2919 out:
2920 vg_release(vg);
2921 return NULL;
2925 * pv_read - read and return a handle to a physical volume
2926 * @cmd: LVM command initiating the pv_read
2927 * @pv_name: full device name of the PV, including the path
2928 * @mdas: list of metadata areas of the PV
2929 * @label_sector: sector number where the PV label is stored on @pv_name
2930 * @warnings:
2932 * Returns:
2933 * PV handle - valid pv_name and successful read of the PV, or
2934 * NULL - invalid parameter or error in reading the PV
2936 * Note:
2937 * FIXME - liblvm todo - make into function that returns handle
2939 struct physical_volume *pv_read(struct cmd_context *cmd, const char *pv_name,
2940 struct dm_list *mdas, uint64_t *label_sector,
2941 int warnings, int scan_label_only)
2943 return _pv_read(cmd, cmd->mem, pv_name, mdas, label_sector, warnings, scan_label_only);
2946 /* FIXME Use label functions instead of PV functions */
2947 static struct physical_volume *_pv_read(struct cmd_context *cmd,
2948 struct dm_pool *pvmem,
2949 const char *pv_name,
2950 struct dm_list *mdas,
2951 uint64_t *label_sector,
2952 int warnings, int scan_label_only)
2954 struct physical_volume *pv;
2955 struct label *label;
2956 struct lvmcache_info *info;
2957 struct device *dev;
2959 if (!(dev = dev_cache_get(pv_name, cmd->filter)))
2960 return_NULL;
2962 if (!(label_read(dev, &label, UINT64_C(0)))) {
2963 if (warnings)
2964 log_error("No physical volume label read from %s",
2965 pv_name);
2966 return NULL;
2969 info = (struct lvmcache_info *) label->info;
2970 if (label_sector && *label_sector)
2971 *label_sector = label->sector;
2973 if (!(pv = dm_pool_zalloc(pvmem, sizeof(*pv)))) {
2974 log_error("pv allocation for '%s' failed", pv_name);
2975 return NULL;
2978 dm_list_init(&pv->tags);
2979 dm_list_init(&pv->segments);
2981 /* FIXME Move more common code up here */
2982 if (!(info->fmt->ops->pv_read(info->fmt, pv_name, pv, mdas,
2983 scan_label_only))) {
2984 log_error("Failed to read existing physical volume '%s'",
2985 pv_name);
2986 return NULL;
2989 if (!pv->size)
2990 return NULL;
2992 if (!alloc_pv_segment_whole_pv(pvmem, pv))
2993 return_NULL;
2995 return pv;
2998 /* May return empty list */
2999 struct dm_list *get_vgnames(struct cmd_context *cmd, int full_scan)
3001 return lvmcache_get_vgnames(cmd, full_scan);
3004 struct dm_list *get_vgids(struct cmd_context *cmd, int full_scan)
3006 return lvmcache_get_vgids(cmd, full_scan);
3009 static int _get_pvs(struct cmd_context *cmd, struct dm_list **pvslist)
3011 struct str_list *strl;
3012 struct dm_list * uninitialized_var(results);
3013 const char *vgname, *vgid;
3014 struct pv_list *pvl, *pvl_copy;
3015 struct dm_list *vgids;
3016 struct volume_group *vg;
3017 int consistent = 0;
3018 int old_pvmove;
3020 lvmcache_label_scan(cmd, 0);
3022 if (pvslist) {
3023 if (!(results = dm_pool_alloc(cmd->mem, sizeof(*results)))) {
3024 log_error("PV list allocation failed");
3025 return 0;
3028 dm_list_init(results);
3031 /* Get list of VGs */
3032 if (!(vgids = get_vgids(cmd, 0))) {
3033 log_error("get_pvs: get_vgids failed");
3034 return 0;
3037 /* Read every VG to ensure cache consistency */
3038 /* Orphan VG is last on list */
3039 old_pvmove = pvmove_mode();
3040 init_pvmove(1);
3041 dm_list_iterate_items(strl, vgids) {
3042 vgid = strl->str;
3043 if (!vgid)
3044 continue; /* FIXME Unnecessary? */
3045 consistent = 0;
3046 if (!(vgname = vgname_from_vgid(NULL, vgid))) {
3047 stack;
3048 continue;
3050 if (!(vg = vg_read_internal(cmd, vgname, vgid, &consistent))) {
3051 stack;
3052 continue;
3054 if (!consistent)
3055 log_warn("WARNING: Volume Group %s is not consistent",
3056 vgname);
3058 /* Move PVs onto results list */
3059 if (pvslist)
3060 dm_list_iterate_items(pvl, &vg->pvs) {
3061 if (!(pvl_copy = _copy_pvl(cmd->mem, pvl))) {
3062 log_error("PV list allocation failed");
3063 vg_release(vg);
3064 return 0;
3066 dm_list_add(results, &pvl_copy->list);
3068 vg_release(vg);
3070 init_pvmove(old_pvmove);
3072 if (pvslist)
3073 *pvslist = results;
3074 else
3075 dm_pool_free(cmd->mem, vgids);
3077 return 1;
3080 struct dm_list *get_pvs(struct cmd_context *cmd)
3082 struct dm_list *results;
3084 if (!_get_pvs(cmd, &results))
3085 return NULL;
3087 return results;
3090 int scan_vgs_for_pvs(struct cmd_context *cmd)
3092 return _get_pvs(cmd, NULL);
3095 int pv_write(struct cmd_context *cmd __attribute((unused)),
3096 struct physical_volume *pv,
3097 struct dm_list *mdas, int64_t label_sector)
3099 if (!pv->fmt->ops->pv_write) {
3100 log_error("Format does not support writing physical volumes");
3101 return 0;
3104 if (!is_orphan_vg(pv->vg_name) || pv->pe_alloc_count) {
3105 log_error("Assertion failed: can't _pv_write non-orphan PV "
3106 "(in VG %s)", pv->vg_name);
3107 return 0;
3110 if (!pv->fmt->ops->pv_write(pv->fmt, pv, mdas, label_sector))
3111 return_0;
3113 return 1;
3116 int pv_write_orphan(struct cmd_context *cmd, struct physical_volume *pv)
3118 const char *old_vg_name = pv->vg_name;
3120 pv->vg_name = cmd->fmt->orphan_vg_name;
3121 pv->status = ALLOCATABLE_PV;
3122 pv->pe_alloc_count = 0;
3124 if (!dev_get_size(pv->dev, &pv->size)) {
3125 log_error("%s: Couldn't get size.", pv_dev_name(pv));
3126 return 0;
3129 if (!pv_write(cmd, pv, NULL, INT64_C(-1))) {
3130 log_error("Failed to clear metadata from physical "
3131 "volume \"%s\" after removal from \"%s\"",
3132 pv_dev_name(pv), old_vg_name);
3133 return 0;
3136 return 1;
3140 * is_orphan_vg - Determine whether a vg_name is an orphan
3141 * @vg_name: pointer to the vg_name
3143 int is_orphan_vg(const char *vg_name)
3145 return (vg_name && vg_name[0] == ORPHAN_PREFIX[0]) ? 1 : 0;
3149 * is_orphan - Determine whether a pv is an orphan based on its vg_name
3150 * @pv: handle to the physical volume
3152 int is_orphan(const struct physical_volume *pv)
3154 return is_orphan_vg(pv_field(pv, vg_name));
3158 * is_pv - Determine whether a pv is a real pv or dummy one
3159 * @pv: handle to device
3161 int is_pv(struct physical_volume *pv)
3163 return (pv_field(pv, vg_name) ? 1 : 0);
3167 * Returns:
3168 * 0 - fail
3169 * 1 - success
3171 int pv_analyze(struct cmd_context *cmd, const char *pv_name,
3172 uint64_t label_sector)
3174 struct label *label;
3175 struct device *dev;
3176 struct metadata_area *mda;
3177 struct lvmcache_info *info;
3179 dev = dev_cache_get(pv_name, cmd->filter);
3180 if (!dev) {
3181 log_error("Device %s not found (or ignored by filtering).",
3182 pv_name);
3183 return 0;
3187 * First, scan for LVM labels.
3189 if (!label_read(dev, &label, label_sector)) {
3190 log_error("Could not find LVM label on %s",
3191 pv_name);
3192 return 0;
3195 log_print("Found label on %s, sector %"PRIu64", type=%s",
3196 pv_name, label->sector, label->type);
3199 * Next, loop through metadata areas
3201 info = label->info;
3202 dm_list_iterate_items(mda, &info->mdas)
3203 mda->ops->pv_analyze_mda(info->fmt, mda);
3205 return 1;
3208 /* FIXME: remove / combine this with locking? */
3209 int vg_check_write_mode(struct volume_group *vg)
3211 if (vg->open_mode != 'w') {
3212 log_errno(EPERM, "Attempt to modify a read-only VG");
3213 return 0;
3215 return 1;
3219 * Performs a set of checks against a VG according to bits set in status
3220 * and returns FAILED_* bits for those that aren't acceptable.
3222 * FIXME Remove the unnecessary duplicate definitions and return bits directly.
3224 static uint32_t _vg_bad_status_bits(const struct volume_group *vg,
3225 uint32_t status)
3227 uint32_t failure = 0;
3229 if ((status & CLUSTERED) &&
3230 (vg_is_clustered(vg)) && !locking_is_clustered()) {
3231 log_error("Skipping clustered volume group %s", vg->name);
3232 /* Return because other flags are considered undefined. */
3233 return FAILED_CLUSTERED;
3236 if ((status & EXPORTED_VG) &&
3237 vg_is_exported(vg)) {
3238 log_error("Volume group %s is exported", vg->name);
3239 failure |= FAILED_EXPORTED;
3242 if ((status & LVM_WRITE) &&
3243 !(vg->status & LVM_WRITE)) {
3244 log_error("Volume group %s is read-only", vg->name);
3245 failure |= FAILED_READ_ONLY;
3248 if ((status & RESIZEABLE_VG) &&
3249 !vg_is_resizeable(vg)) {
3250 log_error("Volume group %s is not resizeable.", vg->name);
3251 failure |= FAILED_RESIZEABLE;
3254 return failure;
3258 * vg_check_status - check volume group status flags and log error
3259 * @vg - volume group to check status flags
3260 * @status - specific status flags to check (e.g. EXPORTED_VG)
3262 int vg_check_status(const struct volume_group *vg, uint32_t status)
3264 return !_vg_bad_status_bits(vg, status);
3267 static struct volume_group *_recover_vg(struct cmd_context *cmd, const char *lock_name,
3268 const char *vg_name, const char *vgid,
3269 uint32_t lock_flags)
3271 int consistent = 1;
3272 struct volume_group *vg;
3274 lock_flags &= ~LCK_TYPE_MASK;
3275 lock_flags |= LCK_WRITE;
3277 unlock_vg(cmd, lock_name);
3279 dev_close_all();
3281 if (!lock_vol(cmd, lock_name, lock_flags))
3282 return_NULL;
3284 if (!(vg = vg_read_internal(cmd, vg_name, vgid, &consistent)))
3285 return_NULL;
3287 if (!consistent) {
3288 vg_release(vg);
3289 return_NULL;
3292 return (struct volume_group *)vg;
3296 * Consolidated locking, reading, and status flag checking.
3298 * If the metadata is inconsistent, setting READ_ALLOW_INCONSISTENT in
3299 * misc_flags will return it with FAILED_INCONSISTENT set instead of
3300 * giving you nothing.
3302 * Use vg_read_error(vg) to determine the result. Nonzero means there were
3303 * problems reading the volume group.
3304 * Zero value means that the VG is open and appropriate locks are held.
3306 static struct volume_group *_vg_lock_and_read(struct cmd_context *cmd, const char *vg_name,
3307 const char *vgid, uint32_t lock_flags,
3308 uint32_t status_flags, uint32_t misc_flags)
3310 struct volume_group *vg = NULL;
3311 const char *lock_name;
3312 int consistent = 1;
3313 int consistent_in;
3314 uint32_t failure = 0;
3315 int already_locked;
3317 if (misc_flags & READ_ALLOW_INCONSISTENT || !(lock_flags & LCK_WRITE))
3318 consistent = 0;
3320 if (!validate_name(vg_name) && !is_orphan_vg(vg_name)) {
3321 log_error("Volume group name %s has invalid characters",
3322 vg_name);
3323 return NULL;
3326 lock_name = is_orphan_vg(vg_name) ? VG_ORPHANS : vg_name;
3327 already_locked = vgname_is_locked(lock_name);
3329 if (!already_locked && !(misc_flags & READ_WITHOUT_LOCK) &&
3330 !lock_vol(cmd, lock_name, lock_flags)) {
3331 log_error("Can't get lock for %s", vg_name);
3332 return _vg_make_handle(cmd, vg, FAILED_LOCKING);
3335 if (is_orphan_vg(vg_name))
3336 status_flags &= ~LVM_WRITE;
3338 consistent_in = consistent;
3340 /* If consistent == 1, we get NULL here if correction fails. */
3341 if (!(vg = vg_read_internal(cmd, vg_name, vgid, &consistent))) {
3342 if (consistent_in && !consistent) {
3343 log_error("Volume group \"%s\" inconsistent.", vg_name);
3344 failure |= FAILED_INCONSISTENT;
3345 goto_bad;
3348 log_error("Volume group \"%s\" not found", vg_name);
3350 failure |= FAILED_NOTFOUND;
3351 goto_bad;
3354 if (vg_is_clustered(vg) && !locking_is_clustered()) {
3355 log_error("Skipping clustered volume group %s", vg->name);
3356 failure |= FAILED_CLUSTERED;
3357 goto_bad;
3360 /* consistent == 0 when VG is not found, but failed == FAILED_NOTFOUND */
3361 if (!consistent && !failure) {
3362 vg_release(vg);
3363 if (!(vg = _recover_vg(cmd, lock_name, vg_name, vgid, lock_flags))) {
3364 log_error("Recovery of volume group \"%s\" failed.",
3365 vg_name);
3366 failure |= FAILED_INCONSISTENT;
3367 goto_bad;
3372 * Check that the tool can handle tricky cases -- missing PVs and
3373 * unknown segment types.
3376 if (!cmd->handles_missing_pvs && vg_missing_pv_count(vg) &&
3377 (lock_flags & LCK_WRITE)) {
3378 log_error("Cannot change VG %s while PVs are missing.", vg->name);
3379 log_error("Consider vgreduce --removemissing.");
3380 failure |= FAILED_INCONSISTENT; /* FIXME new failure code here? */
3381 goto_bad;
3384 if (!cmd->handles_unknown_segments && vg_has_unknown_segments(vg) &&
3385 (lock_flags & LCK_WRITE)) {
3386 log_error("Cannot change VG %s with unknown segments in it!",
3387 vg->name);
3388 failure |= FAILED_INCONSISTENT; /* FIXME new failure code here? */
3389 goto_bad;
3392 failure |= _vg_bad_status_bits(vg, status_flags);
3393 if (failure)
3394 goto_bad;
3396 return _vg_make_handle(cmd, vg, failure);
3398 bad:
3399 if (!already_locked && !(misc_flags & READ_WITHOUT_LOCK))
3400 unlock_vg(cmd, lock_name);
3402 return _vg_make_handle(cmd, vg, failure);
3406 * vg_read: High-level volume group metadata read function.
3408 * vg_read_error() must be used on any handle returned to check for errors.
3410 * - metadata inconsistent and automatic correction failed: FAILED_INCONSISTENT
3411 * - VG is read-only: FAILED_READ_ONLY
3412 * - VG is EXPORTED, unless flags has READ_ALLOW_EXPORTED: FAILED_EXPORTED
3413 * - VG is not RESIZEABLE: FAILED_RESIZEABLE
3414 * - locking failed: FAILED_LOCKING
3416 * On failures, all locks are released, unless one of the following applies:
3417 * - vgname_is_locked(lock_name) is true
3418 * FIXME: remove the above 2 conditions if possible and make an error always
3419 * release the lock.
3421 * Volume groups are opened read-only unless flags contains READ_FOR_UPDATE.
3423 * Checking for VG existence:
3425 * FIXME: We want vg_read to attempt automatic recovery after acquiring a
3426 * temporary write lock: if that fails, we bail out as usual, with failed &
3427 * FAILED_INCONSISTENT. If it works, we are good to go. Code that's been in
3428 * toollib just set lock_flags to LCK_VG_WRITE and called vg_read_internal with
3429 * *consistent = 1.
3431 struct volume_group *vg_read(struct cmd_context *cmd, const char *vg_name,
3432 const char *vgid, uint32_t flags)
3434 uint32_t status = 0;
3435 uint32_t lock_flags = LCK_VG_READ;
3437 if (flags & READ_FOR_UPDATE) {
3438 status |= EXPORTED_VG | LVM_WRITE;
3439 lock_flags = LCK_VG_WRITE;
3442 if (flags & READ_ALLOW_EXPORTED)
3443 status &= ~EXPORTED_VG;
3445 return _vg_lock_and_read(cmd, vg_name, vgid, lock_flags, status, flags);
3449 * A high-level volume group metadata reading function. Open a volume group for
3450 * later update (this means the user code can change the metadata and later
3451 * request the new metadata to be written and committed).
3453 struct volume_group *vg_read_for_update(struct cmd_context *cmd, const char *vg_name,
3454 const char *vgid, uint32_t flags)
3456 return vg_read(cmd, vg_name, vgid, flags | READ_FOR_UPDATE);
3460 * Test the validity of a VG handle returned by vg_read() or vg_read_for_update().
3462 uint32_t vg_read_error(struct volume_group *vg_handle)
3464 if (!vg_handle)
3465 return FAILED_ALLOCATION;
3467 return vg_handle->read_status;
3471 * Lock a vgname and/or check for existence.
3472 * Takes a WRITE lock on the vgname before scanning.
3473 * If scanning fails or vgname found, release the lock.
3474 * NOTE: If you find the return codes confusing, you might think of this
3475 * function as similar to an open() call with O_CREAT and O_EXCL flags
3476 * (open returns fail with -EEXIST if file already exists).
3478 * Returns:
3479 * FAILED_LOCKING - Cannot lock name
3480 * FAILED_EXIST - VG name already exists - cannot reserve
3481 * SUCCESS - VG name does not exist in system and WRITE lock held
3483 uint32_t vg_lock_newname(struct cmd_context *cmd, const char *vgname)
3485 if (!lock_vol(cmd, vgname, LCK_VG_WRITE)) {
3486 return FAILED_LOCKING;
3489 /* Find the vgname in the cache */
3490 /* If it's not there we must do full scan to be completely sure */
3491 if (!fmt_from_vgname(vgname, NULL)) {
3492 lvmcache_label_scan(cmd, 0);
3493 if (!fmt_from_vgname(vgname, NULL)) {
3494 if (memlock()) {
3496 * FIXME: Disallow calling this function if
3497 * memlock() is true.
3499 unlock_vg(cmd, vgname);
3500 return FAILED_LOCKING;
3502 lvmcache_label_scan(cmd, 2);
3503 if (!fmt_from_vgname(vgname, NULL)) {
3504 /* vgname not found after scanning */
3505 return SUCCESS;
3510 /* Found vgname so cannot reserve. */
3511 unlock_vg(cmd, vgname);
3512 return FAILED_EXIST;
3516 * Gets/Sets for external LVM library
3518 struct id pv_id(const struct physical_volume *pv)
3520 return pv_field(pv, id);
3523 const struct format_type *pv_format_type(const struct physical_volume *pv)
3525 return pv_field(pv, fmt);
3528 struct id pv_vgid(const struct physical_volume *pv)
3530 return pv_field(pv, vgid);
3533 struct device *pv_dev(const struct physical_volume *pv)
3535 return pv_field(pv, dev);
3538 const char *pv_vg_name(const struct physical_volume *pv)
3540 return pv_field(pv, vg_name);
3543 const char *pv_dev_name(const struct physical_volume *pv)
3545 return dev_name(pv_dev(pv));
3548 uint64_t pv_size(const struct physical_volume *pv)
3550 return pv_field(pv, size);
3553 uint32_t pv_status(const struct physical_volume *pv)
3555 return pv_field(pv, status);
3558 uint32_t pv_pe_size(const struct physical_volume *pv)
3560 return pv_field(pv, pe_size);
3563 uint64_t pv_pe_start(const struct physical_volume *pv)
3565 return pv_field(pv, pe_start);
3568 uint32_t pv_pe_count(const struct physical_volume *pv)
3570 return pv_field(pv, pe_count);
3573 uint32_t pv_pe_alloc_count(const struct physical_volume *pv)
3575 return pv_field(pv, pe_alloc_count);
3578 uint32_t pv_mda_count(const struct physical_volume *pv)
3580 struct lvmcache_info *info;
3582 info = info_from_pvid((const char *)&pv->id.uuid, 0);
3583 return info ? dm_list_size(&info->mdas) : UINT64_C(0);
3586 uint32_t vg_seqno(const struct volume_group *vg)
3588 return vg->seqno;
3591 uint32_t vg_status(const struct volume_group *vg)
3593 return vg->status;
3596 uint64_t vg_size(const struct volume_group *vg)
3598 return (uint64_t) vg->extent_count * vg->extent_size;
3601 uint64_t vg_free(const struct volume_group *vg)
3603 return (uint64_t) vg->free_count * vg->extent_size;
3606 uint64_t vg_extent_size(const struct volume_group *vg)
3608 return (uint64_t) vg->extent_size;
3611 uint64_t vg_extent_count(const struct volume_group *vg)
3613 return (uint64_t) vg->extent_count;
3616 uint64_t vg_free_count(const struct volume_group *vg)
3618 return (uint64_t) vg->free_count;
3621 uint64_t vg_pv_count(const struct volume_group *vg)
3623 return (uint64_t) vg->pv_count;
3626 uint64_t vg_max_pv(const struct volume_group *vg)
3628 return (uint64_t) vg->max_pv;
3631 uint64_t vg_max_lv(const struct volume_group *vg)
3633 return (uint64_t) vg->max_lv;
3636 uint32_t vg_mda_count(const struct volume_group *vg)
3638 return dm_list_size(&vg->fid->metadata_areas);
3641 uint64_t lv_size(const struct logical_volume *lv)
3643 return lv->size;
3647 * pv_by_path - Given a device path return a PV handle if it is a PV
3648 * @cmd - handle to the LVM command instance
3649 * @pv_name - device path to read for the PV
3651 * Returns:
3652 * NULL - device path does not contain a valid PV
3653 * non-NULL - PV handle corresponding to device path
3655 * FIXME: merge with find_pv_by_name ?
3657 struct physical_volume *pv_by_path(struct cmd_context *cmd, const char *pv_name)
3659 struct dm_list mdas;
3661 dm_list_init(&mdas);
3662 return _pv_read(cmd, cmd->mem, pv_name, &mdas, NULL, 1, 0);