1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2014-2016 Christoph Hellwig.
5 #include <linux/sunrpc/svc.h>
6 #include <linux/blkdev.h>
7 #include <linux/nfs4.h>
8 #include <linux/nfs_fs.h>
9 #include <linux/nfs_xdr.h>
12 #include "blocklayout.h"
13 #include "../nfs4trace.h"
15 #define NFSDBG_FACILITY NFSDBG_PNFS_LD
17 static void bl_unregister_scsi(struct pnfs_block_dev
*dev
)
19 struct block_device
*bdev
= file_bdev(dev
->bdev_file
);
20 const struct pr_ops
*ops
= bdev
->bd_disk
->fops
->pr_ops
;
23 status
= ops
->pr_register(bdev
, dev
->pr_key
, 0, false);
25 trace_bl_pr_key_unreg_err(bdev
, dev
->pr_key
, status
);
27 trace_bl_pr_key_unreg(bdev
, dev
->pr_key
);
30 static bool bl_register_scsi(struct pnfs_block_dev
*dev
)
32 struct block_device
*bdev
= file_bdev(dev
->bdev_file
);
33 const struct pr_ops
*ops
= bdev
->bd_disk
->fops
->pr_ops
;
36 if (test_and_set_bit(PNFS_BDEV_REGISTERED
, &dev
->flags
))
39 status
= ops
->pr_register(bdev
, 0, dev
->pr_key
, true);
41 trace_bl_pr_key_reg_err(bdev
, dev
->pr_key
, status
);
44 trace_bl_pr_key_reg(bdev
, dev
->pr_key
);
48 static void bl_unregister_dev(struct pnfs_block_dev
*dev
)
52 if (dev
->nr_children
) {
53 for (i
= 0; i
< dev
->nr_children
; i
++)
54 bl_unregister_dev(&dev
->children
[i
]);
58 if (dev
->type
== PNFS_BLOCK_VOLUME_SCSI
&&
59 test_and_clear_bit(PNFS_BDEV_REGISTERED
, &dev
->flags
))
60 bl_unregister_scsi(dev
);
63 bool bl_register_dev(struct pnfs_block_dev
*dev
)
67 if (dev
->nr_children
) {
68 for (i
= 0; i
< dev
->nr_children
; i
++) {
69 if (!bl_register_dev(&dev
->children
[i
])) {
71 bl_unregister_dev(&dev
->children
[--i
]);
78 if (dev
->type
== PNFS_BLOCK_VOLUME_SCSI
)
79 return bl_register_scsi(dev
);
84 bl_free_device(struct pnfs_block_dev
*dev
)
86 bl_unregister_dev(dev
);
88 if (dev
->nr_children
) {
91 for (i
= 0; i
< dev
->nr_children
; i
++)
92 bl_free_device(&dev
->children
[i
]);
101 bl_free_deviceid_node(struct nfs4_deviceid_node
*d
)
103 struct pnfs_block_dev
*dev
=
104 container_of(d
, struct pnfs_block_dev
, node
);
107 kfree_rcu(dev
, node
.rcu
);
111 nfs4_block_decode_volume(struct xdr_stream
*xdr
, struct pnfs_block_volume
*b
)
116 p
= xdr_inline_decode(xdr
, 4);
119 b
->type
= be32_to_cpup(p
++);
122 case PNFS_BLOCK_VOLUME_SIMPLE
:
123 p
= xdr_inline_decode(xdr
, 4);
126 b
->simple
.nr_sigs
= be32_to_cpup(p
++);
127 if (!b
->simple
.nr_sigs
|| b
->simple
.nr_sigs
> PNFS_BLOCK_MAX_UUIDS
) {
128 dprintk("Bad signature count: %d\n", b
->simple
.nr_sigs
);
132 b
->simple
.len
= 4 + 4;
133 for (i
= 0; i
< b
->simple
.nr_sigs
; i
++) {
134 p
= xdr_inline_decode(xdr
, 8 + 4);
137 p
= xdr_decode_hyper(p
, &b
->simple
.sigs
[i
].offset
);
138 b
->simple
.sigs
[i
].sig_len
= be32_to_cpup(p
++);
139 if (b
->simple
.sigs
[i
].sig_len
> PNFS_BLOCK_UUID_LEN
) {
140 pr_info("signature too long: %d\n",
141 b
->simple
.sigs
[i
].sig_len
);
145 p
= xdr_inline_decode(xdr
, b
->simple
.sigs
[i
].sig_len
);
148 memcpy(&b
->simple
.sigs
[i
].sig
, p
,
149 b
->simple
.sigs
[i
].sig_len
);
151 b
->simple
.len
+= 8 + 4 + \
152 (XDR_QUADLEN(b
->simple
.sigs
[i
].sig_len
) << 2);
155 case PNFS_BLOCK_VOLUME_SLICE
:
156 p
= xdr_inline_decode(xdr
, 8 + 8 + 4);
159 p
= xdr_decode_hyper(p
, &b
->slice
.start
);
160 p
= xdr_decode_hyper(p
, &b
->slice
.len
);
161 b
->slice
.volume
= be32_to_cpup(p
++);
163 case PNFS_BLOCK_VOLUME_CONCAT
:
164 p
= xdr_inline_decode(xdr
, 4);
168 b
->concat
.volumes_count
= be32_to_cpup(p
++);
169 if (b
->concat
.volumes_count
> PNFS_BLOCK_MAX_DEVICES
) {
170 dprintk("Too many volumes: %d\n", b
->concat
.volumes_count
);
174 p
= xdr_inline_decode(xdr
, b
->concat
.volumes_count
* 4);
177 for (i
= 0; i
< b
->concat
.volumes_count
; i
++)
178 b
->concat
.volumes
[i
] = be32_to_cpup(p
++);
180 case PNFS_BLOCK_VOLUME_STRIPE
:
181 p
= xdr_inline_decode(xdr
, 8 + 4);
185 p
= xdr_decode_hyper(p
, &b
->stripe
.chunk_size
);
186 b
->stripe
.volumes_count
= be32_to_cpup(p
++);
187 if (b
->stripe
.volumes_count
> PNFS_BLOCK_MAX_DEVICES
) {
188 dprintk("Too many volumes: %d\n", b
->stripe
.volumes_count
);
192 p
= xdr_inline_decode(xdr
, b
->stripe
.volumes_count
* 4);
195 for (i
= 0; i
< b
->stripe
.volumes_count
; i
++)
196 b
->stripe
.volumes
[i
] = be32_to_cpup(p
++);
198 case PNFS_BLOCK_VOLUME_SCSI
:
199 p
= xdr_inline_decode(xdr
, 4 + 4 + 4);
202 b
->scsi
.code_set
= be32_to_cpup(p
++);
203 b
->scsi
.designator_type
= be32_to_cpup(p
++);
204 b
->scsi
.designator_len
= be32_to_cpup(p
++);
205 p
= xdr_inline_decode(xdr
, b
->scsi
.designator_len
);
208 if (b
->scsi
.designator_len
> 256)
210 memcpy(&b
->scsi
.designator
, p
, b
->scsi
.designator_len
);
211 p
= xdr_inline_decode(xdr
, 8);
214 p
= xdr_decode_hyper(p
, &b
->scsi
.pr_key
);
217 dprintk("unknown volume type!\n");
224 static bool bl_map_simple(struct pnfs_block_dev
*dev
, u64 offset
,
225 struct pnfs_block_dev_map
*map
)
227 map
->start
= dev
->start
;
229 map
->disk_offset
= dev
->disk_offset
;
230 map
->bdev
= file_bdev(dev
->bdev_file
);
234 static bool bl_map_concat(struct pnfs_block_dev
*dev
, u64 offset
,
235 struct pnfs_block_dev_map
*map
)
239 for (i
= 0; i
< dev
->nr_children
; i
++) {
240 struct pnfs_block_dev
*child
= &dev
->children
[i
];
242 if (child
->start
> offset
||
243 child
->start
+ child
->len
<= offset
)
246 child
->map(child
, offset
- child
->start
, map
);
250 dprintk("%s: ran off loop!\n", __func__
);
254 static bool bl_map_stripe(struct pnfs_block_dev
*dev
, u64 offset
,
255 struct pnfs_block_dev_map
*map
)
257 struct pnfs_block_dev
*child
;
262 chunk
= div_u64(offset
, dev
->chunk_size
);
263 div_u64_rem(chunk
, dev
->nr_children
, &chunk_idx
);
265 if (chunk_idx
>= dev
->nr_children
) {
266 dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
267 __func__
, chunk_idx
, offset
, dev
->chunk_size
);
268 /* error, should not happen */
272 /* truncate offset to the beginning of the stripe */
273 offset
= chunk
* dev
->chunk_size
;
275 /* disk offset of the stripe */
276 disk_offset
= div_u64(offset
, dev
->nr_children
);
278 child
= &dev
->children
[chunk_idx
];
279 child
->map(child
, disk_offset
, map
);
281 map
->start
+= offset
;
282 map
->disk_offset
+= disk_offset
;
283 map
->len
= dev
->chunk_size
;
288 bl_parse_deviceid(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
289 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
);
293 bl_parse_simple(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
294 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
)
296 struct pnfs_block_volume
*v
= &volumes
[idx
];
297 struct file
*bdev_file
;
300 dev
= bl_resolve_deviceid(server
, v
, gfp_mask
);
304 bdev_file
= bdev_file_open_by_dev(dev
, BLK_OPEN_READ
| BLK_OPEN_WRITE
,
306 if (IS_ERR(bdev_file
)) {
307 printk(KERN_WARNING
"pNFS: failed to open device %d:%d (%ld)\n",
308 MAJOR(dev
), MINOR(dev
), PTR_ERR(bdev_file
));
309 return PTR_ERR(bdev_file
);
311 d
->bdev_file
= bdev_file
;
312 d
->len
= bdev_nr_bytes(file_bdev(bdev_file
));
313 d
->map
= bl_map_simple
;
315 printk(KERN_INFO
"pNFS: using block device %s\n",
316 file_bdev(bdev_file
)->bd_disk
->disk_name
);
321 bl_validate_designator(struct pnfs_block_volume
*v
)
323 switch (v
->scsi
.designator_type
) {
324 case PS_DESIGNATOR_EUI64
:
325 if (v
->scsi
.code_set
!= PS_CODE_SET_BINARY
)
328 if (v
->scsi
.designator_len
!= 8 &&
329 v
->scsi
.designator_len
!= 10 &&
330 v
->scsi
.designator_len
!= 16)
334 case PS_DESIGNATOR_NAA
:
335 if (v
->scsi
.code_set
!= PS_CODE_SET_BINARY
)
338 if (v
->scsi
.designator_len
!= 8 &&
339 v
->scsi
.designator_len
!= 16)
343 case PS_DESIGNATOR_T10
:
344 case PS_DESIGNATOR_NAME
:
345 pr_err("pNFS: unsupported designator "
346 "(code set %d, type %d, len %d.\n",
348 v
->scsi
.designator_type
,
349 v
->scsi
.designator_len
);
352 pr_err("pNFS: invalid designator "
353 "(code set %d, type %d, len %d.\n",
355 v
->scsi
.designator_type
,
356 v
->scsi
.designator_len
);
362 bl_open_path(struct pnfs_block_volume
*v
, const char *prefix
)
364 struct file
*bdev_file
;
367 devname
= kasprintf(GFP_KERNEL
, "/dev/disk/by-id/%s%*phN",
368 prefix
, v
->scsi
.designator_len
, v
->scsi
.designator
);
370 return ERR_PTR(-ENOMEM
);
372 bdev_file
= bdev_file_open_by_path(devname
, BLK_OPEN_READ
| BLK_OPEN_WRITE
,
374 if (IS_ERR(bdev_file
)) {
375 dprintk("failed to open device %s (%ld)\n",
376 devname
, PTR_ERR(bdev_file
));
384 bl_parse_scsi(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
385 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
)
387 struct pnfs_block_volume
*v
= &volumes
[idx
];
388 struct block_device
*bdev
;
389 const struct pr_ops
*ops
;
390 struct file
*bdev_file
;
393 if (!bl_validate_designator(v
))
397 * Try to open the RH/Fedora specific dm-mpath udev path first, as the
398 * wwn- links will only point to the first discovered SCSI device there.
399 * On other distributions like Debian, the default SCSI by-id path will
400 * point to the dm-multipath device if one exists.
402 bdev_file
= bl_open_path(v
, "dm-uuid-mpath-0x");
403 if (IS_ERR(bdev_file
))
404 bdev_file
= bl_open_path(v
, "wwn-0x");
405 if (IS_ERR(bdev_file
))
406 bdev_file
= bl_open_path(v
, "nvme-eui.");
407 if (IS_ERR(bdev_file
)) {
408 pr_warn("pNFS: no device found for volume %*phN\n",
409 v
->scsi
.designator_len
, v
->scsi
.designator
);
410 return PTR_ERR(bdev_file
);
412 d
->bdev_file
= bdev_file
;
413 bdev
= file_bdev(bdev_file
);
415 d
->len
= bdev_nr_bytes(bdev
);
416 d
->map
= bl_map_simple
;
417 d
->pr_key
= v
->scsi
.pr_key
;
422 ops
= bdev
->bd_disk
->fops
->pr_ops
;
424 pr_err("pNFS: block device %s does not support reservations.",
425 bdev
->bd_disk
->disk_name
);
438 bl_parse_slice(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
439 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
)
441 struct pnfs_block_volume
*v
= &volumes
[idx
];
444 ret
= bl_parse_deviceid(server
, d
, volumes
, v
->slice
.volume
, gfp_mask
);
448 d
->disk_offset
= v
->slice
.start
;
449 d
->len
= v
->slice
.len
;
454 bl_parse_concat(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
455 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
)
457 struct pnfs_block_volume
*v
= &volumes
[idx
];
461 d
->children
= kcalloc(v
->concat
.volumes_count
,
462 sizeof(struct pnfs_block_dev
), gfp_mask
);
466 for (i
= 0; i
< v
->concat
.volumes_count
; i
++) {
467 ret
= bl_parse_deviceid(server
, &d
->children
[i
],
468 volumes
, v
->concat
.volumes
[i
], gfp_mask
);
473 d
->children
[i
].start
+= len
;
474 len
+= d
->children
[i
].len
;
478 d
->map
= bl_map_concat
;
483 bl_parse_stripe(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
484 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
)
486 struct pnfs_block_volume
*v
= &volumes
[idx
];
490 d
->children
= kcalloc(v
->stripe
.volumes_count
,
491 sizeof(struct pnfs_block_dev
), gfp_mask
);
495 for (i
= 0; i
< v
->stripe
.volumes_count
; i
++) {
496 ret
= bl_parse_deviceid(server
, &d
->children
[i
],
497 volumes
, v
->stripe
.volumes
[i
], gfp_mask
);
502 len
+= d
->children
[i
].len
;
506 d
->chunk_size
= v
->stripe
.chunk_size
;
507 d
->map
= bl_map_stripe
;
512 bl_parse_deviceid(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
513 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
)
515 d
->type
= volumes
[idx
].type
;
518 case PNFS_BLOCK_VOLUME_SIMPLE
:
519 return bl_parse_simple(server
, d
, volumes
, idx
, gfp_mask
);
520 case PNFS_BLOCK_VOLUME_SLICE
:
521 return bl_parse_slice(server
, d
, volumes
, idx
, gfp_mask
);
522 case PNFS_BLOCK_VOLUME_CONCAT
:
523 return bl_parse_concat(server
, d
, volumes
, idx
, gfp_mask
);
524 case PNFS_BLOCK_VOLUME_STRIPE
:
525 return bl_parse_stripe(server
, d
, volumes
, idx
, gfp_mask
);
526 case PNFS_BLOCK_VOLUME_SCSI
:
527 return bl_parse_scsi(server
, d
, volumes
, idx
, gfp_mask
);
529 dprintk("unsupported volume type: %d\n", d
->type
);
534 struct nfs4_deviceid_node
*
535 bl_alloc_deviceid_node(struct nfs_server
*server
, struct pnfs_device
*pdev
,
538 struct nfs4_deviceid_node
*node
= NULL
;
539 struct pnfs_block_volume
*volumes
;
540 struct pnfs_block_dev
*top
;
541 struct xdr_stream xdr
;
543 struct page
*scratch
;
544 int nr_volumes
, ret
, i
;
547 scratch
= alloc_page(gfp_mask
);
551 xdr_init_decode_pages(&xdr
, &buf
, pdev
->pages
, pdev
->pglen
);
552 xdr_set_scratch_page(&xdr
, scratch
);
554 p
= xdr_inline_decode(&xdr
, sizeof(__be32
));
556 goto out_free_scratch
;
557 nr_volumes
= be32_to_cpup(p
++);
559 volumes
= kcalloc(nr_volumes
, sizeof(struct pnfs_block_volume
),
562 goto out_free_scratch
;
564 for (i
= 0; i
< nr_volumes
; i
++) {
565 ret
= nfs4_block_decode_volume(&xdr
, &volumes
[i
]);
567 goto out_free_volumes
;
570 top
= kzalloc(sizeof(*top
), gfp_mask
);
572 goto out_free_volumes
;
574 ret
= bl_parse_deviceid(server
, top
, volumes
, nr_volumes
- 1, gfp_mask
);
577 nfs4_init_deviceid_node(node
, server
, &pdev
->dev_id
);
579 nfs4_mark_deviceid_unavailable(node
);
584 __free_page(scratch
);