1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2014-2016 Christoph Hellwig.
5 #include <linux/sunrpc/svc.h>
6 #include <linux/blkdev.h>
7 #include <linux/nfs4.h>
8 #include <linux/nfs_fs.h>
9 #include <linux/nfs_xdr.h>
12 #include "blocklayout.h"
14 #define NFSDBG_FACILITY NFSDBG_PNFS_LD
17 bl_free_device(struct pnfs_block_dev
*dev
)
19 if (dev
->nr_children
) {
22 for (i
= 0; i
< dev
->nr_children
; i
++)
23 bl_free_device(&dev
->children
[i
]);
26 if (dev
->pr_registered
) {
27 const struct pr_ops
*ops
=
28 dev
->bdev
->bd_disk
->fops
->pr_ops
;
31 error
= ops
->pr_register(dev
->bdev
, dev
->pr_key
, 0,
34 pr_err("failed to unregister PR key.\n");
38 blkdev_put(dev
->bdev
, FMODE_READ
| FMODE_WRITE
);
43 bl_free_deviceid_node(struct nfs4_deviceid_node
*d
)
45 struct pnfs_block_dev
*dev
=
46 container_of(d
, struct pnfs_block_dev
, node
);
49 kfree_rcu(dev
, node
.rcu
);
53 nfs4_block_decode_volume(struct xdr_stream
*xdr
, struct pnfs_block_volume
*b
)
58 p
= xdr_inline_decode(xdr
, 4);
61 b
->type
= be32_to_cpup(p
++);
64 case PNFS_BLOCK_VOLUME_SIMPLE
:
65 p
= xdr_inline_decode(xdr
, 4);
68 b
->simple
.nr_sigs
= be32_to_cpup(p
++);
69 if (!b
->simple
.nr_sigs
|| b
->simple
.nr_sigs
> PNFS_BLOCK_MAX_UUIDS
) {
70 dprintk("Bad signature count: %d\n", b
->simple
.nr_sigs
);
74 b
->simple
.len
= 4 + 4;
75 for (i
= 0; i
< b
->simple
.nr_sigs
; i
++) {
76 p
= xdr_inline_decode(xdr
, 8 + 4);
79 p
= xdr_decode_hyper(p
, &b
->simple
.sigs
[i
].offset
);
80 b
->simple
.sigs
[i
].sig_len
= be32_to_cpup(p
++);
81 if (b
->simple
.sigs
[i
].sig_len
> PNFS_BLOCK_UUID_LEN
) {
82 pr_info("signature too long: %d\n",
83 b
->simple
.sigs
[i
].sig_len
);
87 p
= xdr_inline_decode(xdr
, b
->simple
.sigs
[i
].sig_len
);
90 memcpy(&b
->simple
.sigs
[i
].sig
, p
,
91 b
->simple
.sigs
[i
].sig_len
);
93 b
->simple
.len
+= 8 + 4 + \
94 (XDR_QUADLEN(b
->simple
.sigs
[i
].sig_len
) << 2);
97 case PNFS_BLOCK_VOLUME_SLICE
:
98 p
= xdr_inline_decode(xdr
, 8 + 8 + 4);
101 p
= xdr_decode_hyper(p
, &b
->slice
.start
);
102 p
= xdr_decode_hyper(p
, &b
->slice
.len
);
103 b
->slice
.volume
= be32_to_cpup(p
++);
105 case PNFS_BLOCK_VOLUME_CONCAT
:
106 p
= xdr_inline_decode(xdr
, 4);
110 b
->concat
.volumes_count
= be32_to_cpup(p
++);
111 if (b
->concat
.volumes_count
> PNFS_BLOCK_MAX_DEVICES
) {
112 dprintk("Too many volumes: %d\n", b
->concat
.volumes_count
);
116 p
= xdr_inline_decode(xdr
, b
->concat
.volumes_count
* 4);
119 for (i
= 0; i
< b
->concat
.volumes_count
; i
++)
120 b
->concat
.volumes
[i
] = be32_to_cpup(p
++);
122 case PNFS_BLOCK_VOLUME_STRIPE
:
123 p
= xdr_inline_decode(xdr
, 8 + 4);
127 p
= xdr_decode_hyper(p
, &b
->stripe
.chunk_size
);
128 b
->stripe
.volumes_count
= be32_to_cpup(p
++);
129 if (b
->stripe
.volumes_count
> PNFS_BLOCK_MAX_DEVICES
) {
130 dprintk("Too many volumes: %d\n", b
->stripe
.volumes_count
);
134 p
= xdr_inline_decode(xdr
, b
->stripe
.volumes_count
* 4);
137 for (i
= 0; i
< b
->stripe
.volumes_count
; i
++)
138 b
->stripe
.volumes
[i
] = be32_to_cpup(p
++);
140 case PNFS_BLOCK_VOLUME_SCSI
:
141 p
= xdr_inline_decode(xdr
, 4 + 4 + 4);
144 b
->scsi
.code_set
= be32_to_cpup(p
++);
145 b
->scsi
.designator_type
= be32_to_cpup(p
++);
146 b
->scsi
.designator_len
= be32_to_cpup(p
++);
147 p
= xdr_inline_decode(xdr
, b
->scsi
.designator_len
);
150 if (b
->scsi
.designator_len
> 256)
152 memcpy(&b
->scsi
.designator
, p
, b
->scsi
.designator_len
);
153 p
= xdr_inline_decode(xdr
, 8);
156 p
= xdr_decode_hyper(p
, &b
->scsi
.pr_key
);
159 dprintk("unknown volume type!\n");
166 static bool bl_map_simple(struct pnfs_block_dev
*dev
, u64 offset
,
167 struct pnfs_block_dev_map
*map
)
169 map
->start
= dev
->start
;
171 map
->disk_offset
= dev
->disk_offset
;
172 map
->bdev
= dev
->bdev
;
176 static bool bl_map_concat(struct pnfs_block_dev
*dev
, u64 offset
,
177 struct pnfs_block_dev_map
*map
)
181 for (i
= 0; i
< dev
->nr_children
; i
++) {
182 struct pnfs_block_dev
*child
= &dev
->children
[i
];
184 if (child
->start
> offset
||
185 child
->start
+ child
->len
<= offset
)
188 child
->map(child
, offset
- child
->start
, map
);
192 dprintk("%s: ran off loop!\n", __func__
);
196 static bool bl_map_stripe(struct pnfs_block_dev
*dev
, u64 offset
,
197 struct pnfs_block_dev_map
*map
)
199 struct pnfs_block_dev
*child
;
204 chunk
= div_u64(offset
, dev
->chunk_size
);
205 div_u64_rem(chunk
, dev
->nr_children
, &chunk_idx
);
207 if (chunk_idx
>= dev
->nr_children
) {
208 dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
209 __func__
, chunk_idx
, offset
, dev
->chunk_size
);
210 /* error, should not happen */
214 /* truncate offset to the beginning of the stripe */
215 offset
= chunk
* dev
->chunk_size
;
217 /* disk offset of the stripe */
218 disk_offset
= div_u64(offset
, dev
->nr_children
);
220 child
= &dev
->children
[chunk_idx
];
221 child
->map(child
, disk_offset
, map
);
223 map
->start
+= offset
;
224 map
->disk_offset
+= disk_offset
;
225 map
->len
= dev
->chunk_size
;
230 bl_parse_deviceid(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
231 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
);
235 bl_parse_simple(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
236 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
)
238 struct pnfs_block_volume
*v
= &volumes
[idx
];
239 struct block_device
*bdev
;
242 dev
= bl_resolve_deviceid(server
, v
, gfp_mask
);
246 bdev
= blkdev_get_by_dev(dev
, FMODE_READ
| FMODE_WRITE
, NULL
);
248 printk(KERN_WARNING
"pNFS: failed to open device %d:%d (%ld)\n",
249 MAJOR(dev
), MINOR(dev
), PTR_ERR(bdev
));
250 return PTR_ERR(bdev
);
255 d
->len
= i_size_read(d
->bdev
->bd_inode
);
256 d
->map
= bl_map_simple
;
258 printk(KERN_INFO
"pNFS: using block device %s\n",
259 d
->bdev
->bd_disk
->disk_name
);
264 bl_validate_designator(struct pnfs_block_volume
*v
)
266 switch (v
->scsi
.designator_type
) {
267 case PS_DESIGNATOR_EUI64
:
268 if (v
->scsi
.code_set
!= PS_CODE_SET_BINARY
)
271 if (v
->scsi
.designator_len
!= 8 &&
272 v
->scsi
.designator_len
!= 10 &&
273 v
->scsi
.designator_len
!= 16)
277 case PS_DESIGNATOR_NAA
:
278 if (v
->scsi
.code_set
!= PS_CODE_SET_BINARY
)
281 if (v
->scsi
.designator_len
!= 8 &&
282 v
->scsi
.designator_len
!= 16)
286 case PS_DESIGNATOR_T10
:
287 case PS_DESIGNATOR_NAME
:
288 pr_err("pNFS: unsupported designator "
289 "(code set %d, type %d, len %d.\n",
291 v
->scsi
.designator_type
,
292 v
->scsi
.designator_len
);
295 pr_err("pNFS: invalid designator "
296 "(code set %d, type %d, len %d.\n",
298 v
->scsi
.designator_type
,
299 v
->scsi
.designator_len
);
305 * Try to open the udev path for the WWN. At least on Debian the udev
306 * by-id path will always point to the dm-multipath device if one exists.
308 static struct block_device
*
309 bl_open_udev_path(struct pnfs_block_volume
*v
)
311 struct block_device
*bdev
;
314 devname
= kasprintf(GFP_KERNEL
, "/dev/disk/by-id/wwn-0x%*phN",
315 v
->scsi
.designator_len
, v
->scsi
.designator
);
317 return ERR_PTR(-ENOMEM
);
319 bdev
= blkdev_get_by_path(devname
, FMODE_READ
| FMODE_WRITE
, NULL
);
321 pr_warn("pNFS: failed to open device %s (%ld)\n",
322 devname
, PTR_ERR(bdev
));
330 * Try to open the RH/Fedora specific dm-mpath udev path for this WWN, as the
331 * wwn- links will only point to the first discovered SCSI device there.
333 static struct block_device
*
334 bl_open_dm_mpath_udev_path(struct pnfs_block_volume
*v
)
336 struct block_device
*bdev
;
339 devname
= kasprintf(GFP_KERNEL
,
340 "/dev/disk/by-id/dm-uuid-mpath-%d%*phN",
341 v
->scsi
.designator_type
,
342 v
->scsi
.designator_len
, v
->scsi
.designator
);
344 return ERR_PTR(-ENOMEM
);
346 bdev
= blkdev_get_by_path(devname
, FMODE_READ
| FMODE_WRITE
, NULL
);
352 bl_parse_scsi(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
353 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
)
355 struct pnfs_block_volume
*v
= &volumes
[idx
];
356 struct block_device
*bdev
;
357 const struct pr_ops
*ops
;
360 if (!bl_validate_designator(v
))
363 bdev
= bl_open_dm_mpath_udev_path(v
);
365 bdev
= bl_open_udev_path(v
);
367 return PTR_ERR(bdev
);
370 d
->len
= i_size_read(d
->bdev
->bd_inode
);
371 d
->map
= bl_map_simple
;
372 d
->pr_key
= v
->scsi
.pr_key
;
374 pr_info("pNFS: using block device %s (reservation key 0x%llx)\n",
375 d
->bdev
->bd_disk
->disk_name
, d
->pr_key
);
377 ops
= d
->bdev
->bd_disk
->fops
->pr_ops
;
379 pr_err("pNFS: block device %s does not support reservations.",
380 d
->bdev
->bd_disk
->disk_name
);
385 error
= ops
->pr_register(d
->bdev
, 0, d
->pr_key
, true);
387 pr_err("pNFS: failed to register key for block device %s.",
388 d
->bdev
->bd_disk
->disk_name
);
392 d
->pr_registered
= true;
396 blkdev_put(d
->bdev
, FMODE_READ
| FMODE_WRITE
);
401 bl_parse_slice(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
402 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
)
404 struct pnfs_block_volume
*v
= &volumes
[idx
];
407 ret
= bl_parse_deviceid(server
, d
, volumes
, v
->slice
.volume
, gfp_mask
);
411 d
->disk_offset
= v
->slice
.start
;
412 d
->len
= v
->slice
.len
;
417 bl_parse_concat(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
418 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
)
420 struct pnfs_block_volume
*v
= &volumes
[idx
];
424 d
->children
= kcalloc(v
->concat
.volumes_count
,
425 sizeof(struct pnfs_block_dev
), GFP_KERNEL
);
429 for (i
= 0; i
< v
->concat
.volumes_count
; i
++) {
430 ret
= bl_parse_deviceid(server
, &d
->children
[i
],
431 volumes
, v
->concat
.volumes
[i
], gfp_mask
);
436 d
->children
[i
].start
+= len
;
437 len
+= d
->children
[i
].len
;
441 d
->map
= bl_map_concat
;
446 bl_parse_stripe(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
447 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
)
449 struct pnfs_block_volume
*v
= &volumes
[idx
];
453 d
->children
= kcalloc(v
->stripe
.volumes_count
,
454 sizeof(struct pnfs_block_dev
), GFP_KERNEL
);
458 for (i
= 0; i
< v
->stripe
.volumes_count
; i
++) {
459 ret
= bl_parse_deviceid(server
, &d
->children
[i
],
460 volumes
, v
->stripe
.volumes
[i
], gfp_mask
);
465 len
+= d
->children
[i
].len
;
469 d
->chunk_size
= v
->stripe
.chunk_size
;
470 d
->map
= bl_map_stripe
;
475 bl_parse_deviceid(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
476 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
)
478 switch (volumes
[idx
].type
) {
479 case PNFS_BLOCK_VOLUME_SIMPLE
:
480 return bl_parse_simple(server
, d
, volumes
, idx
, gfp_mask
);
481 case PNFS_BLOCK_VOLUME_SLICE
:
482 return bl_parse_slice(server
, d
, volumes
, idx
, gfp_mask
);
483 case PNFS_BLOCK_VOLUME_CONCAT
:
484 return bl_parse_concat(server
, d
, volumes
, idx
, gfp_mask
);
485 case PNFS_BLOCK_VOLUME_STRIPE
:
486 return bl_parse_stripe(server
, d
, volumes
, idx
, gfp_mask
);
487 case PNFS_BLOCK_VOLUME_SCSI
:
488 return bl_parse_scsi(server
, d
, volumes
, idx
, gfp_mask
);
490 dprintk("unsupported volume type: %d\n", volumes
[idx
].type
);
495 struct nfs4_deviceid_node
*
496 bl_alloc_deviceid_node(struct nfs_server
*server
, struct pnfs_device
*pdev
,
499 struct nfs4_deviceid_node
*node
= NULL
;
500 struct pnfs_block_volume
*volumes
;
501 struct pnfs_block_dev
*top
;
502 struct xdr_stream xdr
;
504 struct page
*scratch
;
505 int nr_volumes
, ret
, i
;
508 scratch
= alloc_page(gfp_mask
);
512 xdr_init_decode_pages(&xdr
, &buf
, pdev
->pages
, pdev
->pglen
);
513 xdr_set_scratch_page(&xdr
, scratch
);
515 p
= xdr_inline_decode(&xdr
, sizeof(__be32
));
517 goto out_free_scratch
;
518 nr_volumes
= be32_to_cpup(p
++);
520 volumes
= kcalloc(nr_volumes
, sizeof(struct pnfs_block_volume
),
523 goto out_free_scratch
;
525 for (i
= 0; i
< nr_volumes
; i
++) {
526 ret
= nfs4_block_decode_volume(&xdr
, &volumes
[i
]);
528 goto out_free_volumes
;
531 top
= kzalloc(sizeof(*top
), gfp_mask
);
533 goto out_free_volumes
;
535 ret
= bl_parse_deviceid(server
, top
, volumes
, nr_volumes
- 1, gfp_mask
);
538 nfs4_init_deviceid_node(node
, server
, &pdev
->dev_id
);
540 nfs4_mark_deviceid_unavailable(node
);
545 __free_page(scratch
);