2 * Copyright (c) 2014-2016 Christoph Hellwig.
4 #include <linux/sunrpc/svc.h>
5 #include <linux/blkdev.h>
6 #include <linux/nfs4.h>
7 #include <linux/nfs_fs.h>
8 #include <linux/nfs_xdr.h>
11 #include "blocklayout.h"
13 #define NFSDBG_FACILITY NFSDBG_PNFS_LD
16 bl_free_device(struct pnfs_block_dev
*dev
)
18 if (dev
->nr_children
) {
21 for (i
= 0; i
< dev
->nr_children
; i
++)
22 bl_free_device(&dev
->children
[i
]);
25 if (dev
->pr_registered
) {
26 const struct pr_ops
*ops
=
27 dev
->bdev
->bd_disk
->fops
->pr_ops
;
30 error
= ops
->pr_register(dev
->bdev
, dev
->pr_key
, 0,
33 pr_err("failed to unregister PR key.\n");
37 blkdev_put(dev
->bdev
, FMODE_READ
| FMODE_WRITE
);
42 bl_free_deviceid_node(struct nfs4_deviceid_node
*d
)
44 struct pnfs_block_dev
*dev
=
45 container_of(d
, struct pnfs_block_dev
, node
);
48 kfree_rcu(dev
, node
.rcu
);
52 nfs4_block_decode_volume(struct xdr_stream
*xdr
, struct pnfs_block_volume
*b
)
57 p
= xdr_inline_decode(xdr
, 4);
60 b
->type
= be32_to_cpup(p
++);
63 case PNFS_BLOCK_VOLUME_SIMPLE
:
64 p
= xdr_inline_decode(xdr
, 4);
67 b
->simple
.nr_sigs
= be32_to_cpup(p
++);
68 if (!b
->simple
.nr_sigs
) {
69 dprintk("no signature\n");
73 b
->simple
.len
= 4 + 4;
74 for (i
= 0; i
< b
->simple
.nr_sigs
; i
++) {
75 p
= xdr_inline_decode(xdr
, 8 + 4);
78 p
= xdr_decode_hyper(p
, &b
->simple
.sigs
[i
].offset
);
79 b
->simple
.sigs
[i
].sig_len
= be32_to_cpup(p
++);
80 if (b
->simple
.sigs
[i
].sig_len
> PNFS_BLOCK_UUID_LEN
) {
81 pr_info("signature too long: %d\n",
82 b
->simple
.sigs
[i
].sig_len
);
86 p
= xdr_inline_decode(xdr
, b
->simple
.sigs
[i
].sig_len
);
89 memcpy(&b
->simple
.sigs
[i
].sig
, p
,
90 b
->simple
.sigs
[i
].sig_len
);
92 b
->simple
.len
+= 8 + 4 + b
->simple
.sigs
[i
].sig_len
;
95 case PNFS_BLOCK_VOLUME_SLICE
:
96 p
= xdr_inline_decode(xdr
, 8 + 8 + 4);
99 p
= xdr_decode_hyper(p
, &b
->slice
.start
);
100 p
= xdr_decode_hyper(p
, &b
->slice
.len
);
101 b
->slice
.volume
= be32_to_cpup(p
++);
103 case PNFS_BLOCK_VOLUME_CONCAT
:
104 p
= xdr_inline_decode(xdr
, 4);
107 b
->concat
.volumes_count
= be32_to_cpup(p
++);
109 p
= xdr_inline_decode(xdr
, b
->concat
.volumes_count
* 4);
112 for (i
= 0; i
< b
->concat
.volumes_count
; i
++)
113 b
->concat
.volumes
[i
] = be32_to_cpup(p
++);
115 case PNFS_BLOCK_VOLUME_STRIPE
:
116 p
= xdr_inline_decode(xdr
, 8 + 4);
119 p
= xdr_decode_hyper(p
, &b
->stripe
.chunk_size
);
120 b
->stripe
.volumes_count
= be32_to_cpup(p
++);
122 p
= xdr_inline_decode(xdr
, b
->stripe
.volumes_count
* 4);
125 for (i
= 0; i
< b
->stripe
.volumes_count
; i
++)
126 b
->stripe
.volumes
[i
] = be32_to_cpup(p
++);
128 case PNFS_BLOCK_VOLUME_SCSI
:
129 p
= xdr_inline_decode(xdr
, 4 + 4 + 4);
132 b
->scsi
.code_set
= be32_to_cpup(p
++);
133 b
->scsi
.designator_type
= be32_to_cpup(p
++);
134 b
->scsi
.designator_len
= be32_to_cpup(p
++);
135 p
= xdr_inline_decode(xdr
, b
->scsi
.designator_len
);
138 if (b
->scsi
.designator_len
> 256)
140 memcpy(&b
->scsi
.designator
, p
, b
->scsi
.designator_len
);
141 p
= xdr_inline_decode(xdr
, 8);
144 p
= xdr_decode_hyper(p
, &b
->scsi
.pr_key
);
147 dprintk("unknown volume type!\n");
154 static bool bl_map_simple(struct pnfs_block_dev
*dev
, u64 offset
,
155 struct pnfs_block_dev_map
*map
)
157 map
->start
= dev
->start
;
159 map
->disk_offset
= dev
->disk_offset
;
160 map
->bdev
= dev
->bdev
;
164 static bool bl_map_concat(struct pnfs_block_dev
*dev
, u64 offset
,
165 struct pnfs_block_dev_map
*map
)
169 for (i
= 0; i
< dev
->nr_children
; i
++) {
170 struct pnfs_block_dev
*child
= &dev
->children
[i
];
172 if (child
->start
> offset
||
173 child
->start
+ child
->len
<= offset
)
176 child
->map(child
, offset
- child
->start
, map
);
180 dprintk("%s: ran off loop!\n", __func__
);
184 static bool bl_map_stripe(struct pnfs_block_dev
*dev
, u64 offset
,
185 struct pnfs_block_dev_map
*map
)
187 struct pnfs_block_dev
*child
;
192 chunk
= div_u64(offset
, dev
->chunk_size
);
193 div_u64_rem(chunk
, dev
->nr_children
, &chunk_idx
);
195 if (chunk_idx
> dev
->nr_children
) {
196 dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
197 __func__
, chunk_idx
, offset
, dev
->chunk_size
);
198 /* error, should not happen */
202 /* truncate offset to the beginning of the stripe */
203 offset
= chunk
* dev
->chunk_size
;
205 /* disk offset of the stripe */
206 disk_offset
= div_u64(offset
, dev
->nr_children
);
208 child
= &dev
->children
[chunk_idx
];
209 child
->map(child
, disk_offset
, map
);
211 map
->start
+= offset
;
212 map
->disk_offset
+= disk_offset
;
213 map
->len
= dev
->chunk_size
;
218 bl_parse_deviceid(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
219 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
);
223 bl_parse_simple(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
224 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
)
226 struct pnfs_block_volume
*v
= &volumes
[idx
];
229 dev
= bl_resolve_deviceid(server
, v
, gfp_mask
);
233 d
->bdev
= blkdev_get_by_dev(dev
, FMODE_READ
| FMODE_WRITE
, NULL
);
234 if (IS_ERR(d
->bdev
)) {
235 printk(KERN_WARNING
"pNFS: failed to open device %d:%d (%ld)\n",
236 MAJOR(dev
), MINOR(dev
), PTR_ERR(d
->bdev
));
237 return PTR_ERR(d
->bdev
);
241 d
->len
= i_size_read(d
->bdev
->bd_inode
);
242 d
->map
= bl_map_simple
;
244 printk(KERN_INFO
"pNFS: using block device %s\n",
245 d
->bdev
->bd_disk
->disk_name
);
250 bl_validate_designator(struct pnfs_block_volume
*v
)
252 switch (v
->scsi
.designator_type
) {
253 case PS_DESIGNATOR_EUI64
:
254 if (v
->scsi
.code_set
!= PS_CODE_SET_BINARY
)
257 if (v
->scsi
.designator_len
!= 8 &&
258 v
->scsi
.designator_len
!= 10 &&
259 v
->scsi
.designator_len
!= 16)
263 case PS_DESIGNATOR_NAA
:
264 if (v
->scsi
.code_set
!= PS_CODE_SET_BINARY
)
267 if (v
->scsi
.designator_len
!= 8 &&
268 v
->scsi
.designator_len
!= 16)
272 case PS_DESIGNATOR_T10
:
273 case PS_DESIGNATOR_NAME
:
274 pr_err("pNFS: unsupported designator "
275 "(code set %d, type %d, len %d.\n",
277 v
->scsi
.designator_type
,
278 v
->scsi
.designator_len
);
281 pr_err("pNFS: invalid designator "
282 "(code set %d, type %d, len %d.\n",
284 v
->scsi
.designator_type
,
285 v
->scsi
.designator_len
);
291 bl_parse_scsi(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
292 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
)
294 struct pnfs_block_volume
*v
= &volumes
[idx
];
295 const struct pr_ops
*ops
;
299 if (!bl_validate_designator(v
))
302 switch (v
->scsi
.designator_len
) {
304 devname
= kasprintf(GFP_KERNEL
, "/dev/disk/by-id/wwn-0x%8phN",
308 devname
= kasprintf(GFP_KERNEL
, "/dev/disk/by-id/wwn-0x%12phN",
312 devname
= kasprintf(GFP_KERNEL
, "/dev/disk/by-id/wwn-0x%16phN",
319 d
->bdev
= blkdev_get_by_path(devname
, FMODE_READ
, NULL
);
320 if (IS_ERR(d
->bdev
)) {
321 pr_warn("pNFS: failed to open device %s (%ld)\n",
322 devname
, PTR_ERR(d
->bdev
));
324 return PTR_ERR(d
->bdev
);
329 d
->len
= i_size_read(d
->bdev
->bd_inode
);
330 d
->map
= bl_map_simple
;
331 d
->pr_key
= v
->scsi
.pr_key
;
333 pr_info("pNFS: using block device %s (reservation key 0x%llx)\n",
334 d
->bdev
->bd_disk
->disk_name
, d
->pr_key
);
336 ops
= d
->bdev
->bd_disk
->fops
->pr_ops
;
338 pr_err("pNFS: block device %s does not support reservations.",
339 d
->bdev
->bd_disk
->disk_name
);
344 error
= ops
->pr_register(d
->bdev
, 0, d
->pr_key
, true);
346 pr_err("pNFS: failed to register key for block device %s.",
347 d
->bdev
->bd_disk
->disk_name
);
351 d
->pr_registered
= true;
355 blkdev_put(d
->bdev
, FMODE_READ
);
360 bl_parse_slice(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
361 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
)
363 struct pnfs_block_volume
*v
= &volumes
[idx
];
366 ret
= bl_parse_deviceid(server
, d
, volumes
, v
->slice
.volume
, gfp_mask
);
370 d
->disk_offset
= v
->slice
.start
;
371 d
->len
= v
->slice
.len
;
376 bl_parse_concat(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
377 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
)
379 struct pnfs_block_volume
*v
= &volumes
[idx
];
383 d
->children
= kcalloc(v
->concat
.volumes_count
,
384 sizeof(struct pnfs_block_dev
), GFP_KERNEL
);
388 for (i
= 0; i
< v
->concat
.volumes_count
; i
++) {
389 ret
= bl_parse_deviceid(server
, &d
->children
[i
],
390 volumes
, v
->concat
.volumes
[i
], gfp_mask
);
395 d
->children
[i
].start
+= len
;
396 len
+= d
->children
[i
].len
;
400 d
->map
= bl_map_concat
;
405 bl_parse_stripe(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
406 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
)
408 struct pnfs_block_volume
*v
= &volumes
[idx
];
412 d
->children
= kcalloc(v
->stripe
.volumes_count
,
413 sizeof(struct pnfs_block_dev
), GFP_KERNEL
);
417 for (i
= 0; i
< v
->stripe
.volumes_count
; i
++) {
418 ret
= bl_parse_deviceid(server
, &d
->children
[i
],
419 volumes
, v
->stripe
.volumes
[i
], gfp_mask
);
424 len
+= d
->children
[i
].len
;
428 d
->chunk_size
= v
->stripe
.chunk_size
;
429 d
->map
= bl_map_stripe
;
434 bl_parse_deviceid(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
435 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
)
437 switch (volumes
[idx
].type
) {
438 case PNFS_BLOCK_VOLUME_SIMPLE
:
439 return bl_parse_simple(server
, d
, volumes
, idx
, gfp_mask
);
440 case PNFS_BLOCK_VOLUME_SLICE
:
441 return bl_parse_slice(server
, d
, volumes
, idx
, gfp_mask
);
442 case PNFS_BLOCK_VOLUME_CONCAT
:
443 return bl_parse_concat(server
, d
, volumes
, idx
, gfp_mask
);
444 case PNFS_BLOCK_VOLUME_STRIPE
:
445 return bl_parse_stripe(server
, d
, volumes
, idx
, gfp_mask
);
446 case PNFS_BLOCK_VOLUME_SCSI
:
447 return bl_parse_scsi(server
, d
, volumes
, idx
, gfp_mask
);
449 dprintk("unsupported volume type: %d\n", volumes
[idx
].type
);
454 struct nfs4_deviceid_node
*
455 bl_alloc_deviceid_node(struct nfs_server
*server
, struct pnfs_device
*pdev
,
458 struct nfs4_deviceid_node
*node
= NULL
;
459 struct pnfs_block_volume
*volumes
;
460 struct pnfs_block_dev
*top
;
461 struct xdr_stream xdr
;
463 struct page
*scratch
;
464 int nr_volumes
, ret
, i
;
467 scratch
= alloc_page(gfp_mask
);
471 xdr_init_decode_pages(&xdr
, &buf
, pdev
->pages
, pdev
->pglen
);
472 xdr_set_scratch_buffer(&xdr
, page_address(scratch
), PAGE_SIZE
);
474 p
= xdr_inline_decode(&xdr
, sizeof(__be32
));
476 goto out_free_scratch
;
477 nr_volumes
= be32_to_cpup(p
++);
479 volumes
= kcalloc(nr_volumes
, sizeof(struct pnfs_block_volume
),
482 goto out_free_scratch
;
484 for (i
= 0; i
< nr_volumes
; i
++) {
485 ret
= nfs4_block_decode_volume(&xdr
, &volumes
[i
]);
487 goto out_free_volumes
;
490 top
= kzalloc(sizeof(*top
), gfp_mask
);
492 goto out_free_volumes
;
494 ret
= bl_parse_deviceid(server
, top
, volumes
, nr_volumes
- 1, gfp_mask
);
498 goto out_free_volumes
;
502 nfs4_init_deviceid_node(node
, server
, &pdev
->dev_id
);
507 __free_page(scratch
);