2 * Copyright (c) 2014 Christoph Hellwig.
4 #include <linux/sunrpc/svc.h>
5 #include <linux/blkdev.h>
6 #include <linux/nfs4.h>
7 #include <linux/nfs_fs.h>
8 #include <linux/nfs_xdr.h>
10 #include "blocklayout.h"
12 #define NFSDBG_FACILITY NFSDBG_PNFS_LD
15 bl_free_device(struct pnfs_block_dev
*dev
)
17 if (dev
->nr_children
) {
20 for (i
= 0; i
< dev
->nr_children
; i
++)
21 bl_free_device(&dev
->children
[i
]);
25 blkdev_put(dev
->bdev
, FMODE_READ
| FMODE_WRITE
);
30 bl_free_deviceid_node(struct nfs4_deviceid_node
*d
)
32 struct pnfs_block_dev
*dev
=
33 container_of(d
, struct pnfs_block_dev
, node
);
36 kfree_rcu(dev
, node
.rcu
);
40 nfs4_block_decode_volume(struct xdr_stream
*xdr
, struct pnfs_block_volume
*b
)
45 p
= xdr_inline_decode(xdr
, 4);
48 b
->type
= be32_to_cpup(p
++);
51 case PNFS_BLOCK_VOLUME_SIMPLE
:
52 p
= xdr_inline_decode(xdr
, 4);
55 b
->simple
.nr_sigs
= be32_to_cpup(p
++);
56 if (!b
->simple
.nr_sigs
) {
57 dprintk("no signature\n");
61 b
->simple
.len
= 4 + 4;
62 for (i
= 0; i
< b
->simple
.nr_sigs
; i
++) {
63 p
= xdr_inline_decode(xdr
, 8 + 4);
66 p
= xdr_decode_hyper(p
, &b
->simple
.sigs
[i
].offset
);
67 b
->simple
.sigs
[i
].sig_len
= be32_to_cpup(p
++);
68 if (b
->simple
.sigs
[i
].sig_len
> PNFS_BLOCK_UUID_LEN
) {
69 pr_info("signature too long: %d\n",
70 b
->simple
.sigs
[i
].sig_len
);
74 p
= xdr_inline_decode(xdr
, b
->simple
.sigs
[i
].sig_len
);
77 memcpy(&b
->simple
.sigs
[i
].sig
, p
,
78 b
->simple
.sigs
[i
].sig_len
);
80 b
->simple
.len
+= 8 + 4 + b
->simple
.sigs
[i
].sig_len
;
83 case PNFS_BLOCK_VOLUME_SLICE
:
84 p
= xdr_inline_decode(xdr
, 8 + 8 + 4);
87 p
= xdr_decode_hyper(p
, &b
->slice
.start
);
88 p
= xdr_decode_hyper(p
, &b
->slice
.len
);
89 b
->slice
.volume
= be32_to_cpup(p
++);
91 case PNFS_BLOCK_VOLUME_CONCAT
:
92 p
= xdr_inline_decode(xdr
, 4);
95 b
->concat
.volumes_count
= be32_to_cpup(p
++);
97 p
= xdr_inline_decode(xdr
, b
->concat
.volumes_count
* 4);
100 for (i
= 0; i
< b
->concat
.volumes_count
; i
++)
101 b
->concat
.volumes
[i
] = be32_to_cpup(p
++);
103 case PNFS_BLOCK_VOLUME_STRIPE
:
104 p
= xdr_inline_decode(xdr
, 8 + 4);
107 p
= xdr_decode_hyper(p
, &b
->stripe
.chunk_size
);
108 b
->stripe
.volumes_count
= be32_to_cpup(p
++);
110 p
= xdr_inline_decode(xdr
, b
->stripe
.volumes_count
* 4);
113 for (i
= 0; i
< b
->stripe
.volumes_count
; i
++)
114 b
->stripe
.volumes
[i
] = be32_to_cpup(p
++);
117 dprintk("unknown volume type!\n");
124 static bool bl_map_simple(struct pnfs_block_dev
*dev
, u64 offset
,
125 struct pnfs_block_dev_map
*map
)
127 map
->start
= dev
->start
;
129 map
->disk_offset
= dev
->disk_offset
;
130 map
->bdev
= dev
->bdev
;
134 static bool bl_map_concat(struct pnfs_block_dev
*dev
, u64 offset
,
135 struct pnfs_block_dev_map
*map
)
139 for (i
= 0; i
< dev
->nr_children
; i
++) {
140 struct pnfs_block_dev
*child
= &dev
->children
[i
];
142 if (child
->start
> offset
||
143 child
->start
+ child
->len
<= offset
)
146 child
->map(child
, offset
- child
->start
, map
);
150 dprintk("%s: ran off loop!\n", __func__
);
154 static bool bl_map_stripe(struct pnfs_block_dev
*dev
, u64 offset
,
155 struct pnfs_block_dev_map
*map
)
157 struct pnfs_block_dev
*child
;
162 chunk
= div_u64(offset
, dev
->chunk_size
);
163 div_u64_rem(chunk
, dev
->nr_children
, &chunk_idx
);
165 if (chunk_idx
> dev
->nr_children
) {
166 dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
167 __func__
, chunk_idx
, offset
, dev
->chunk_size
);
168 /* error, should not happen */
172 /* truncate offset to the beginning of the stripe */
173 offset
= chunk
* dev
->chunk_size
;
175 /* disk offset of the stripe */
176 disk_offset
= div_u64(offset
, dev
->nr_children
);
178 child
= &dev
->children
[chunk_idx
];
179 child
->map(child
, disk_offset
, map
);
181 map
->start
+= offset
;
182 map
->disk_offset
+= disk_offset
;
183 map
->len
= dev
->chunk_size
;
188 bl_parse_deviceid(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
189 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
);
193 bl_parse_simple(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
194 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
)
196 struct pnfs_block_volume
*v
= &volumes
[idx
];
199 dev
= bl_resolve_deviceid(server
, v
, gfp_mask
);
203 d
->bdev
= blkdev_get_by_dev(dev
, FMODE_READ
| FMODE_WRITE
, NULL
);
204 if (IS_ERR(d
->bdev
)) {
205 printk(KERN_WARNING
"pNFS: failed to open device %d:%d (%ld)\n",
206 MAJOR(dev
), MINOR(dev
), PTR_ERR(d
->bdev
));
207 return PTR_ERR(d
->bdev
);
211 d
->len
= i_size_read(d
->bdev
->bd_inode
);
212 d
->map
= bl_map_simple
;
214 printk(KERN_INFO
"pNFS: using block device %s\n",
215 d
->bdev
->bd_disk
->disk_name
);
220 bl_parse_slice(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
221 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
)
223 struct pnfs_block_volume
*v
= &volumes
[idx
];
226 ret
= bl_parse_deviceid(server
, d
, volumes
, v
->slice
.volume
, gfp_mask
);
230 d
->disk_offset
= v
->slice
.start
;
231 d
->len
= v
->slice
.len
;
236 bl_parse_concat(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
237 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
)
239 struct pnfs_block_volume
*v
= &volumes
[idx
];
243 d
->children
= kcalloc(v
->concat
.volumes_count
,
244 sizeof(struct pnfs_block_dev
), GFP_KERNEL
);
248 for (i
= 0; i
< v
->concat
.volumes_count
; i
++) {
249 ret
= bl_parse_deviceid(server
, &d
->children
[i
],
250 volumes
, v
->concat
.volumes
[i
], gfp_mask
);
255 d
->children
[i
].start
+= len
;
256 len
+= d
->children
[i
].len
;
260 d
->map
= bl_map_concat
;
265 bl_parse_stripe(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
266 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
)
268 struct pnfs_block_volume
*v
= &volumes
[idx
];
272 d
->children
= kcalloc(v
->stripe
.volumes_count
,
273 sizeof(struct pnfs_block_dev
), GFP_KERNEL
);
277 for (i
= 0; i
< v
->stripe
.volumes_count
; i
++) {
278 ret
= bl_parse_deviceid(server
, &d
->children
[i
],
279 volumes
, v
->stripe
.volumes
[i
], gfp_mask
);
284 len
+= d
->children
[i
].len
;
288 d
->chunk_size
= v
->stripe
.chunk_size
;
289 d
->map
= bl_map_stripe
;
294 bl_parse_deviceid(struct nfs_server
*server
, struct pnfs_block_dev
*d
,
295 struct pnfs_block_volume
*volumes
, int idx
, gfp_t gfp_mask
)
297 switch (volumes
[idx
].type
) {
298 case PNFS_BLOCK_VOLUME_SIMPLE
:
299 return bl_parse_simple(server
, d
, volumes
, idx
, gfp_mask
);
300 case PNFS_BLOCK_VOLUME_SLICE
:
301 return bl_parse_slice(server
, d
, volumes
, idx
, gfp_mask
);
302 case PNFS_BLOCK_VOLUME_CONCAT
:
303 return bl_parse_concat(server
, d
, volumes
, idx
, gfp_mask
);
304 case PNFS_BLOCK_VOLUME_STRIPE
:
305 return bl_parse_stripe(server
, d
, volumes
, idx
, gfp_mask
);
307 dprintk("unsupported volume type: %d\n", volumes
[idx
].type
);
312 struct nfs4_deviceid_node
*
313 bl_alloc_deviceid_node(struct nfs_server
*server
, struct pnfs_device
*pdev
,
316 struct nfs4_deviceid_node
*node
= NULL
;
317 struct pnfs_block_volume
*volumes
;
318 struct pnfs_block_dev
*top
;
319 struct xdr_stream xdr
;
321 struct page
*scratch
;
322 int nr_volumes
, ret
, i
;
325 scratch
= alloc_page(gfp_mask
);
329 xdr_init_decode_pages(&xdr
, &buf
, pdev
->pages
, pdev
->pglen
);
330 xdr_set_scratch_buffer(&xdr
, page_address(scratch
), PAGE_SIZE
);
332 p
= xdr_inline_decode(&xdr
, sizeof(__be32
));
334 goto out_free_scratch
;
335 nr_volumes
= be32_to_cpup(p
++);
337 volumes
= kcalloc(nr_volumes
, sizeof(struct pnfs_block_volume
),
340 goto out_free_scratch
;
342 for (i
= 0; i
< nr_volumes
; i
++) {
343 ret
= nfs4_block_decode_volume(&xdr
, &volumes
[i
]);
345 goto out_free_volumes
;
348 top
= kzalloc(sizeof(*top
), gfp_mask
);
350 goto out_free_volumes
;
352 ret
= bl_parse_deviceid(server
, top
, volumes
, nr_volumes
- 1, gfp_mask
);
356 goto out_free_volumes
;
360 nfs4_init_deviceid_node(node
, server
, &pdev
->dev_id
);
365 __free_page(scratch
);