2 * pNFS Objects layout implementation over open-osd initiator library
4 * Copyright (C) 2009 Panasas Inc. [year of first publication]
7 * Benny Halevy <bhalevy@panasas.com>
8 * Boaz Harrosh <bharrosh@panasas.com>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2
12 * See the file COPYING included with this distribution for more details.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * 3. Neither the name of the Panasas company nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
28 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
29 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
30 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
34 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 #include <linux/module.h>
41 #include <scsi/osd_initiator.h>
43 #include "objlayout.h"
45 #define NFSDBG_FACILITY NFSDBG_PNFS_LD
47 #define _LLU(x) ((unsigned long long)x)
49 enum { BIO_MAX_PAGES_KMALLOC
=
50 (PAGE_SIZE
- sizeof(struct bio
)) / sizeof(struct bio_vec
),
53 struct objio_dev_ent
{
54 struct nfs4_deviceid_node id_node
;
59 objio_free_deviceid_node(struct nfs4_deviceid_node
*d
)
61 struct objio_dev_ent
*de
= container_of(d
, struct objio_dev_ent
, id_node
);
63 dprintk("%s: free od=%p\n", __func__
, de
->od
);
64 osduld_put_device(de
->od
);
68 static struct objio_dev_ent
*_dev_list_find(const struct nfs_server
*nfss
,
69 const struct nfs4_deviceid
*d_id
)
71 struct nfs4_deviceid_node
*d
;
72 struct objio_dev_ent
*de
;
74 d
= nfs4_find_get_deviceid(nfss
->pnfs_curr_ld
, nfss
->nfs_client
, d_id
);
78 de
= container_of(d
, struct objio_dev_ent
, id_node
);
82 static struct objio_dev_ent
*
83 _dev_list_add(const struct nfs_server
*nfss
,
84 const struct nfs4_deviceid
*d_id
, struct osd_dev
*od
,
87 struct nfs4_deviceid_node
*d
;
88 struct objio_dev_ent
*de
= kzalloc(sizeof(*de
), gfp_flags
);
89 struct objio_dev_ent
*n
;
92 dprintk("%s: -ENOMEM od=%p\n", __func__
, od
);
96 dprintk("%s: Adding od=%p\n", __func__
, od
);
97 nfs4_init_deviceid_node(&de
->id_node
,
103 d
= nfs4_insert_deviceid_node(&de
->id_node
);
104 n
= container_of(d
, struct objio_dev_ent
, id_node
);
106 dprintk("%s: Race with other n->od=%p\n", __func__
, n
->od
);
107 objio_free_deviceid_node(&de
->id_node
);
114 struct caps_buffers
{
115 u8 caps_key
[OSD_CRYPTO_KEYID_SIZE
];
116 u8 creds
[OSD_CAP_LEN
];
119 struct objio_segment
{
120 struct pnfs_layout_segment lseg
;
122 struct pnfs_osd_object_cred
*comps
;
125 unsigned stripe_unit
;
126 unsigned group_width
; /* Data stripe_units without integrity comps */
128 unsigned group_count
;
130 unsigned max_io_size
;
132 unsigned comps_index
;
134 /* variable length */
135 struct objio_dev_ent
*ods
[];
138 static inline struct objio_segment
*
139 OBJIO_LSEG(struct pnfs_layout_segment
*lseg
)
141 return container_of(lseg
, struct objio_segment
, lseg
);
145 typedef ssize_t (*objio_done_fn
)(struct objio_state
*ios
);
149 struct objlayout_io_state ol_state
;
151 struct objio_segment
*layout
;
157 unsigned long length
;
158 unsigned numdevs
; /* Actually used devs in this IO */
159 /* A per-device variable array of size numdevs */
160 struct _objio_per_comp
{
162 struct osd_request
*or;
163 unsigned long length
;
169 /* Send and wait for a get_device_info of devices in the layout,
170 then look them up with the osd_initiator library */
171 static struct objio_dev_ent
*_device_lookup(struct pnfs_layout_hdr
*pnfslay
,
172 struct objio_segment
*objio_seg
, unsigned comp
,
175 struct pnfs_osd_deviceaddr
*deviceaddr
;
176 struct nfs4_deviceid
*d_id
;
177 struct objio_dev_ent
*ode
;
179 struct osd_dev_info odi
;
182 d_id
= &objio_seg
->comps
[comp
].oc_object_id
.oid_device_id
;
184 ode
= _dev_list_find(NFS_SERVER(pnfslay
->plh_inode
), d_id
);
188 err
= objlayout_get_deviceinfo(pnfslay
, d_id
, &deviceaddr
, gfp_flags
);
190 dprintk("%s: objlayout_get_deviceinfo dev(%llx:%llx) =>%d\n",
191 __func__
, _DEVID_LO(d_id
), _DEVID_HI(d_id
), err
);
195 odi
.systemid_len
= deviceaddr
->oda_systemid
.len
;
196 if (odi
.systemid_len
> sizeof(odi
.systemid
)) {
199 } else if (odi
.systemid_len
)
200 memcpy(odi
.systemid
, deviceaddr
->oda_systemid
.data
,
202 odi
.osdname_len
= deviceaddr
->oda_osdname
.len
;
203 odi
.osdname
= (u8
*)deviceaddr
->oda_osdname
.data
;
205 if (!odi
.osdname_len
&& !odi
.systemid_len
) {
206 dprintk("%s: !odi.osdname_len && !odi.systemid_len\n",
212 od
= osduld_info_lookup(&odi
);
213 if (unlikely(IS_ERR(od
))) {
215 dprintk("%s: osduld_info_lookup => %d\n", __func__
, err
);
219 ode
= _dev_list_add(NFS_SERVER(pnfslay
->plh_inode
), d_id
, od
,
223 dprintk("%s: return=%d\n", __func__
, err
);
224 objlayout_put_deviceinfo(deviceaddr
);
225 return err
? ERR_PTR(err
) : ode
;
228 static int objio_devices_lookup(struct pnfs_layout_hdr
*pnfslay
,
229 struct objio_segment
*objio_seg
,
235 /* lookup all devices */
236 for (i
= 0; i
< objio_seg
->num_comps
; i
++) {
237 struct objio_dev_ent
*ode
;
239 ode
= _device_lookup(pnfslay
, objio_seg
, i
, gfp_flags
);
240 if (unlikely(IS_ERR(ode
))) {
244 objio_seg
->ods
[i
] = ode
;
249 dprintk("%s: return=%d\n", __func__
, err
);
253 static int _verify_data_map(struct pnfs_osd_layout
*layout
)
255 struct pnfs_osd_data_map
*data_map
= &layout
->olo_map
;
259 /* FIXME: Only raid0 for now. if not go through MDS */
260 if (data_map
->odm_raid_algorithm
!= PNFS_OSD_RAID_0
) {
261 printk(KERN_ERR
"Only RAID_0 for now\n");
264 if (0 != (data_map
->odm_num_comps
% (data_map
->odm_mirror_cnt
+ 1))) {
265 printk(KERN_ERR
"Data Map wrong, num_comps=%u mirrors=%u\n",
266 data_map
->odm_num_comps
, data_map
->odm_mirror_cnt
);
270 if (data_map
->odm_group_width
)
271 group_width
= data_map
->odm_group_width
;
273 group_width
= data_map
->odm_num_comps
/
274 (data_map
->odm_mirror_cnt
+ 1);
276 stripe_length
= (u64
)data_map
->odm_stripe_unit
* group_width
;
277 if (stripe_length
>= (1ULL << 32)) {
278 printk(KERN_ERR
"Total Stripe length(0x%llx)"
279 " >= 32bit is not supported\n", _LLU(stripe_length
));
283 if (0 != (data_map
->odm_stripe_unit
& ~PAGE_MASK
)) {
284 printk(KERN_ERR
"Stripe Unit(0x%llx)"
285 " must be Multples of PAGE_SIZE(0x%lx)\n",
286 _LLU(data_map
->odm_stripe_unit
), PAGE_SIZE
);
293 static void copy_single_comp(struct pnfs_osd_object_cred
*cur_comp
,
294 struct pnfs_osd_object_cred
*src_comp
,
295 struct caps_buffers
*caps_p
)
297 WARN_ON(src_comp
->oc_cap_key
.cred_len
> sizeof(caps_p
->caps_key
));
298 WARN_ON(src_comp
->oc_cap
.cred_len
> sizeof(caps_p
->creds
));
300 *cur_comp
= *src_comp
;
302 memcpy(caps_p
->caps_key
, src_comp
->oc_cap_key
.cred
,
303 sizeof(caps_p
->caps_key
));
304 cur_comp
->oc_cap_key
.cred
= caps_p
->caps_key
;
306 memcpy(caps_p
->creds
, src_comp
->oc_cap
.cred
,
307 sizeof(caps_p
->creds
));
308 cur_comp
->oc_cap
.cred
= caps_p
->creds
;
311 int objio_alloc_lseg(struct pnfs_layout_segment
**outp
,
312 struct pnfs_layout_hdr
*pnfslay
,
313 struct pnfs_layout_range
*range
,
314 struct xdr_stream
*xdr
,
317 struct objio_segment
*objio_seg
;
318 struct pnfs_osd_xdr_decode_layout_iter iter
;
319 struct pnfs_osd_layout layout
;
320 struct pnfs_osd_object_cred
*cur_comp
, src_comp
;
321 struct caps_buffers
*caps_p
;
324 err
= pnfs_osd_xdr_decode_layout_map(&layout
, &iter
, xdr
);
328 err
= _verify_data_map(&layout
);
332 objio_seg
= kzalloc(sizeof(*objio_seg
) +
333 sizeof(objio_seg
->ods
[0]) * layout
.olo_num_comps
+
334 sizeof(*objio_seg
->comps
) * layout
.olo_num_comps
+
335 sizeof(struct caps_buffers
) * layout
.olo_num_comps
,
340 objio_seg
->comps
= (void *)(objio_seg
->ods
+ layout
.olo_num_comps
);
341 cur_comp
= objio_seg
->comps
;
342 caps_p
= (void *)(cur_comp
+ layout
.olo_num_comps
);
343 while (pnfs_osd_xdr_decode_layout_comp(&src_comp
, &iter
, xdr
, &err
))
344 copy_single_comp(cur_comp
++, &src_comp
, caps_p
++);
348 objio_seg
->num_comps
= layout
.olo_num_comps
;
349 objio_seg
->comps_index
= layout
.olo_comps_index
;
350 err
= objio_devices_lookup(pnfslay
, objio_seg
, gfp_flags
);
354 objio_seg
->mirrors_p1
= layout
.olo_map
.odm_mirror_cnt
+ 1;
355 objio_seg
->stripe_unit
= layout
.olo_map
.odm_stripe_unit
;
356 if (layout
.olo_map
.odm_group_width
) {
357 objio_seg
->group_width
= layout
.olo_map
.odm_group_width
;
358 objio_seg
->group_depth
= layout
.olo_map
.odm_group_depth
;
359 objio_seg
->group_count
= layout
.olo_map
.odm_num_comps
/
360 objio_seg
->mirrors_p1
/
361 objio_seg
->group_width
;
363 objio_seg
->group_width
= layout
.olo_map
.odm_num_comps
/
364 objio_seg
->mirrors_p1
;
365 objio_seg
->group_depth
= -1;
366 objio_seg
->group_count
= 1;
369 /* Cache this calculation it will hit for every page */
370 objio_seg
->max_io_size
= (BIO_MAX_PAGES_KMALLOC
* PAGE_SIZE
-
371 objio_seg
->stripe_unit
) *
372 objio_seg
->group_width
;
374 *outp
= &objio_seg
->lseg
;
379 dprintk("%s: Error: return %d\n", __func__
, err
);
384 void objio_free_lseg(struct pnfs_layout_segment
*lseg
)
387 struct objio_segment
*objio_seg
= OBJIO_LSEG(lseg
);
389 for (i
= 0; i
< objio_seg
->num_comps
; i
++) {
390 if (!objio_seg
->ods
[i
])
392 nfs4_put_deviceid_node(&objio_seg
->ods
[i
]->id_node
);
397 int objio_alloc_io_state(struct pnfs_layout_segment
*lseg
,
398 struct objlayout_io_state
**outp
,
401 struct objio_segment
*objio_seg
= OBJIO_LSEG(lseg
);
402 struct objio_state
*ios
;
403 const unsigned first_size
= sizeof(*ios
) +
404 objio_seg
->num_comps
* sizeof(ios
->per_dev
[0]);
405 const unsigned sec_size
= objio_seg
->num_comps
*
406 sizeof(ios
->ol_state
.ioerrs
[0]);
408 ios
= kzalloc(first_size
+ sec_size
, gfp_flags
);
412 ios
->layout
= objio_seg
;
413 ios
->ol_state
.ioerrs
= ((void *)ios
) + first_size
;
414 ios
->ol_state
.num_comps
= objio_seg
->num_comps
;
416 *outp
= &ios
->ol_state
;
420 void objio_free_io_state(struct objlayout_io_state
*ol_state
)
422 struct objio_state
*ios
= container_of(ol_state
, struct objio_state
,
428 enum pnfs_osd_errno
osd_pri_2_pnfs_err(enum osd_err_priority oep
)
431 case OSD_ERR_PRI_NO_ERROR
:
432 return (enum pnfs_osd_errno
)0;
434 case OSD_ERR_PRI_CLEAR_PAGES
:
438 case OSD_ERR_PRI_RESOURCE
:
439 return PNFS_OSD_ERR_RESOURCE
;
440 case OSD_ERR_PRI_BAD_CRED
:
441 return PNFS_OSD_ERR_BAD_CRED
;
442 case OSD_ERR_PRI_NO_ACCESS
:
443 return PNFS_OSD_ERR_NO_ACCESS
;
444 case OSD_ERR_PRI_UNREACHABLE
:
445 return PNFS_OSD_ERR_UNREACHABLE
;
446 case OSD_ERR_PRI_NOT_FOUND
:
447 return PNFS_OSD_ERR_NOT_FOUND
;
448 case OSD_ERR_PRI_NO_SPACE
:
449 return PNFS_OSD_ERR_NO_SPACE
;
453 case OSD_ERR_PRI_EIO
:
454 return PNFS_OSD_ERR_EIO
;
458 static void _clear_bio(struct bio
*bio
)
463 __bio_for_each_segment(bv
, bio
, i
, 0) {
464 unsigned this_count
= bv
->bv_len
;
466 if (likely(PAGE_SIZE
== this_count
))
467 clear_highpage(bv
->bv_page
);
469 zero_user(bv
->bv_page
, bv
->bv_offset
, this_count
);
473 static int _io_check(struct objio_state
*ios
, bool is_write
)
475 enum osd_err_priority oep
= OSD_ERR_PRI_NO_ERROR
;
479 for (i
= 0; i
< ios
->numdevs
; i
++) {
480 struct osd_sense_info osi
;
481 struct osd_request
*or = ios
->per_dev
[i
].or;
487 ret
= osd_req_decode_sense(or, &osi
);
491 if (OSD_ERR_PRI_CLEAR_PAGES
== osi
.osd_err_pri
) {
492 /* start read offset passed endof file */
494 _clear_bio(ios
->per_dev
[i
].bio
);
495 dprintk("%s: start read offset passed end of file "
496 "offset=0x%llx, length=0x%lx\n", __func__
,
497 _LLU(ios
->per_dev
[i
].offset
),
498 ios
->per_dev
[i
].length
);
500 continue; /* we recovered */
502 objlayout_io_set_result(&ios
->ol_state
, i
,
503 &ios
->layout
->comps
[i
].oc_object_id
,
504 osd_pri_2_pnfs_err(osi
.osd_err_pri
),
505 ios
->per_dev
[i
].offset
,
506 ios
->per_dev
[i
].length
,
509 if (osi
.osd_err_pri
>= oep
) {
510 oep
= osi
.osd_err_pri
;
519 * Common IO state helpers.
521 static void _io_free(struct objio_state
*ios
)
525 for (i
= 0; i
< ios
->numdevs
; i
++) {
526 struct _objio_per_comp
*per_dev
= &ios
->per_dev
[i
];
529 osd_end_request(per_dev
->or);
534 bio_put(per_dev
->bio
);
540 struct osd_dev
*_io_od(struct objio_state
*ios
, unsigned dev
)
542 unsigned min_dev
= ios
->layout
->comps_index
;
543 unsigned max_dev
= min_dev
+ ios
->layout
->num_comps
;
545 BUG_ON(dev
< min_dev
|| max_dev
<= dev
);
546 return ios
->layout
->ods
[dev
- min_dev
]->od
;
549 struct _striping_info
{
556 static void _calc_stripe_info(struct objio_state
*ios
, u64 file_offset
,
557 struct _striping_info
*si
)
559 u32 stripe_unit
= ios
->layout
->stripe_unit
;
560 u32 group_width
= ios
->layout
->group_width
;
561 u64 group_depth
= ios
->layout
->group_depth
;
562 u32 U
= stripe_unit
* group_width
;
564 u64 T
= U
* group_depth
;
565 u64 S
= T
* ios
->layout
->group_count
;
566 u64 M
= div64_u64(file_offset
, S
);
569 G = (L - (M * S)) / T
570 H = (L - (M * S)) % T
572 u64 LmodU
= file_offset
- M
* S
;
573 u32 G
= div64_u64(LmodU
, T
);
574 u64 H
= LmodU
- G
* T
;
576 u32 N
= div_u64(H
, U
);
578 div_u64_rem(file_offset
, stripe_unit
, &si
->unit_off
);
579 si
->obj_offset
= si
->unit_off
+ (N
* stripe_unit
) +
580 (M
* group_depth
* stripe_unit
);
582 /* "H - (N * U)" is just "H % U" so it's bound to u32 */
583 si
->dev
= (u32
)(H
- (N
* U
)) / stripe_unit
+ G
* group_width
;
584 si
->dev
*= ios
->layout
->mirrors_p1
;
586 si
->group_length
= T
- H
;
589 static int _add_stripe_unit(struct objio_state
*ios
, unsigned *cur_pg
,
590 unsigned pgbase
, struct _objio_per_comp
*per_dev
, int len
,
593 unsigned pg
= *cur_pg
;
595 struct request_queue
*q
=
596 osd_request_queue(_io_od(ios
, per_dev
->dev
));
598 if (per_dev
->bio
== NULL
) {
599 unsigned pages_in_stripe
= ios
->layout
->group_width
*
600 (ios
->layout
->stripe_unit
/ PAGE_SIZE
);
601 unsigned bio_size
= (ios
->ol_state
.nr_pages
+ pages_in_stripe
) /
602 ios
->layout
->group_width
;
604 if (BIO_MAX_PAGES_KMALLOC
< bio_size
)
605 bio_size
= BIO_MAX_PAGES_KMALLOC
;
607 per_dev
->bio
= bio_kmalloc(gfp_flags
, bio_size
);
608 if (unlikely(!per_dev
->bio
)) {
609 dprintk("Faild to allocate BIO size=%u\n", bio_size
);
614 while (cur_len
> 0) {
615 unsigned pglen
= min_t(unsigned, PAGE_SIZE
- pgbase
, cur_len
);
618 BUG_ON(ios
->ol_state
.nr_pages
<= pg
);
621 added_len
= bio_add_pc_page(q
, per_dev
->bio
,
622 ios
->ol_state
.pages
[pg
], pglen
, pgbase
);
623 if (unlikely(pglen
!= added_len
))
630 per_dev
->length
+= len
;
635 static int _prepare_one_group(struct objio_state
*ios
, u64 length
,
636 struct _striping_info
*si
, unsigned *last_pg
,
639 unsigned stripe_unit
= ios
->layout
->stripe_unit
;
640 unsigned mirrors_p1
= ios
->layout
->mirrors_p1
;
641 unsigned devs_in_group
= ios
->layout
->group_width
* mirrors_p1
;
642 unsigned dev
= si
->dev
;
643 unsigned first_dev
= dev
- (dev
% devs_in_group
);
644 unsigned max_comp
= ios
->numdevs
? ios
->numdevs
- mirrors_p1
: 0;
645 unsigned cur_pg
= *last_pg
;
649 struct _objio_per_comp
*per_dev
= &ios
->per_dev
[dev
- first_dev
];
650 unsigned cur_len
, page_off
= 0;
652 if (!per_dev
->length
) {
655 per_dev
->offset
= si
->obj_offset
+ stripe_unit
-
657 cur_len
= stripe_unit
;
658 } else if (dev
== si
->dev
) {
659 per_dev
->offset
= si
->obj_offset
;
660 cur_len
= stripe_unit
- si
->unit_off
;
661 page_off
= si
->unit_off
& ~PAGE_MASK
;
663 (page_off
!= ios
->ol_state
.pgbase
));
664 } else { /* dev > si->dev */
665 per_dev
->offset
= si
->obj_offset
- si
->unit_off
;
666 cur_len
= stripe_unit
;
669 if (max_comp
< dev
- first_dev
)
670 max_comp
= dev
- first_dev
;
672 cur_len
= stripe_unit
;
674 if (cur_len
>= length
)
677 ret
= _add_stripe_unit(ios
, &cur_pg
, page_off
, per_dev
,
683 dev
= (dev
% devs_in_group
) + first_dev
;
686 ios
->length
+= cur_len
;
689 ios
->numdevs
= max_comp
+ mirrors_p1
;
694 static int _io_rw_pagelist(struct objio_state
*ios
, gfp_t gfp_flags
)
696 u64 length
= ios
->ol_state
.count
;
697 u64 offset
= ios
->ol_state
.offset
;
698 struct _striping_info si
;
699 unsigned last_pg
= 0;
703 _calc_stripe_info(ios
, offset
, &si
);
705 if (length
< si
.group_length
)
706 si
.group_length
= length
;
708 ret
= _prepare_one_group(ios
, si
.group_length
, &si
, &last_pg
, gfp_flags
);
712 offset
+= si
.group_length
;
713 length
-= si
.group_length
;
723 static ssize_t
_sync_done(struct objio_state
*ios
)
725 struct completion
*waiting
= ios
->private;
731 static void _last_io(struct kref
*kref
)
733 struct objio_state
*ios
= container_of(kref
, struct objio_state
, kref
);
738 static void _done_io(struct osd_request
*or, void *p
)
740 struct objio_state
*ios
= p
;
742 kref_put(&ios
->kref
, _last_io
);
745 static ssize_t
_io_exec(struct objio_state
*ios
)
747 DECLARE_COMPLETION_ONSTACK(wait
);
748 ssize_t status
= 0; /* sync status */
750 objio_done_fn saved_done_fn
= ios
->done
;
751 bool sync
= ios
->ol_state
.sync
;
754 ios
->done
= _sync_done
;
755 ios
->private = &wait
;
758 kref_init(&ios
->kref
);
760 for (i
= 0; i
< ios
->numdevs
; i
++) {
761 struct osd_request
*or = ios
->per_dev
[i
].or;
766 kref_get(&ios
->kref
);
767 osd_execute_request_async(or, _done_io
, ios
);
770 kref_put(&ios
->kref
, _last_io
);
773 wait_for_completion(&wait
);
774 status
= saved_done_fn(ios
);
783 static ssize_t
_read_done(struct objio_state
*ios
)
786 int ret
= _io_check(ios
, false);
791 status
= ios
->length
;
795 objlayout_read_done(&ios
->ol_state
, status
, ios
->ol_state
.sync
);
799 static int _read_mirrors(struct objio_state
*ios
, unsigned cur_comp
)
801 struct osd_request
*or = NULL
;
802 struct _objio_per_comp
*per_dev
= &ios
->per_dev
[cur_comp
];
803 unsigned dev
= per_dev
->dev
;
804 struct pnfs_osd_object_cred
*cred
=
805 &ios
->layout
->comps
[cur_comp
];
806 struct osd_obj_id obj
= {
807 .partition
= cred
->oc_object_id
.oid_partition_id
,
808 .id
= cred
->oc_object_id
.oid_object_id
,
812 or = osd_start_request(_io_od(ios
, dev
), GFP_KERNEL
);
819 osd_req_read(or, &obj
, per_dev
->offset
, per_dev
->bio
, per_dev
->length
);
821 ret
= osd_finalize_request(or, 0, cred
->oc_cap
.cred
, NULL
);
823 dprintk("%s: Faild to osd_finalize_request() => %d\n",
828 dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n",
829 __func__
, cur_comp
, dev
, obj
.id
, _LLU(per_dev
->offset
),
836 static ssize_t
_read_exec(struct objio_state
*ios
)
841 for (i
= 0; i
< ios
->numdevs
; i
+= ios
->layout
->mirrors_p1
) {
842 if (!ios
->per_dev
[i
].length
)
844 ret
= _read_mirrors(ios
, i
);
849 ios
->done
= _read_done
;
850 return _io_exec(ios
); /* In sync mode exec returns the io status */
857 ssize_t
objio_read_pagelist(struct objlayout_io_state
*ol_state
)
859 struct objio_state
*ios
= container_of(ol_state
, struct objio_state
,
863 ret
= _io_rw_pagelist(ios
, GFP_KERNEL
);
867 return _read_exec(ios
);
873 static ssize_t
_write_done(struct objio_state
*ios
)
876 int ret
= _io_check(ios
, true);
881 /* FIXME: should be based on the OSD's persistence model
882 * See OSD2r05 Section 4.13 Data persistence model */
883 ios
->ol_state
.committed
= NFS_FILE_SYNC
;
884 status
= ios
->length
;
889 objlayout_write_done(&ios
->ol_state
, status
, ios
->ol_state
.sync
);
893 static int _write_mirrors(struct objio_state
*ios
, unsigned cur_comp
)
895 struct _objio_per_comp
*master_dev
= &ios
->per_dev
[cur_comp
];
896 unsigned dev
= ios
->per_dev
[cur_comp
].dev
;
897 unsigned last_comp
= cur_comp
+ ios
->layout
->mirrors_p1
;
900 for (; cur_comp
< last_comp
; ++cur_comp
, ++dev
) {
901 struct osd_request
*or = NULL
;
902 struct pnfs_osd_object_cred
*cred
=
903 &ios
->layout
->comps
[cur_comp
];
904 struct osd_obj_id obj
= {
905 .partition
= cred
->oc_object_id
.oid_partition_id
,
906 .id
= cred
->oc_object_id
.oid_object_id
,
908 struct _objio_per_comp
*per_dev
= &ios
->per_dev
[cur_comp
];
911 or = osd_start_request(_io_od(ios
, dev
), GFP_NOFS
);
918 if (per_dev
!= master_dev
) {
919 bio
= bio_kmalloc(GFP_NOFS
,
920 master_dev
->bio
->bi_max_vecs
);
921 if (unlikely(!bio
)) {
922 dprintk("Faild to allocate BIO size=%u\n",
923 master_dev
->bio
->bi_max_vecs
);
928 __bio_clone(bio
, master_dev
->bio
);
933 per_dev
->length
= master_dev
->length
;
934 per_dev
->offset
= master_dev
->offset
;
936 bio
= master_dev
->bio
;
937 bio
->bi_rw
|= REQ_WRITE
;
940 osd_req_write(or, &obj
, per_dev
->offset
, bio
, per_dev
->length
);
942 ret
= osd_finalize_request(or, 0, cred
->oc_cap
.cred
, NULL
);
944 dprintk("%s: Faild to osd_finalize_request() => %d\n",
949 dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n",
950 __func__
, cur_comp
, dev
, obj
.id
, _LLU(per_dev
->offset
),
958 static ssize_t
_write_exec(struct objio_state
*ios
)
963 for (i
= 0; i
< ios
->numdevs
; i
+= ios
->layout
->mirrors_p1
) {
964 if (!ios
->per_dev
[i
].length
)
966 ret
= _write_mirrors(ios
, i
);
971 ios
->done
= _write_done
;
972 return _io_exec(ios
); /* In sync mode exec returns the io->status */
979 ssize_t
objio_write_pagelist(struct objlayout_io_state
*ol_state
, bool stable
)
981 struct objio_state
*ios
= container_of(ol_state
, struct objio_state
,
985 /* TODO: ios->stable = stable; */
986 ret
= _io_rw_pagelist(ios
, GFP_NOFS
);
990 return _write_exec(ios
);
993 static bool objio_pg_test(struct nfs_pageio_descriptor
*pgio
,
994 struct nfs_page
*prev
, struct nfs_page
*req
)
996 if (!pnfs_generic_pg_test(pgio
, prev
, req
))
999 return pgio
->pg_count
+ req
->wb_bytes
<=
1000 OBJIO_LSEG(pgio
->pg_lseg
)->max_io_size
;
1003 static const struct nfs_pageio_ops objio_pg_read_ops
= {
1004 .pg_init
= pnfs_generic_pg_init_read
,
1005 .pg_test
= objio_pg_test
,
1006 .pg_doio
= pnfs_generic_pg_readpages
,
1009 static const struct nfs_pageio_ops objio_pg_write_ops
= {
1010 .pg_init
= pnfs_generic_pg_init_write
,
1011 .pg_test
= objio_pg_test
,
1012 .pg_doio
= pnfs_generic_pg_writepages
,
1015 static struct pnfs_layoutdriver_type objlayout_type
= {
1016 .id
= LAYOUT_OSD2_OBJECTS
,
1017 .name
= "LAYOUT_OSD2_OBJECTS",
1018 .flags
= PNFS_LAYOUTRET_ON_SETATTR
,
1020 .alloc_layout_hdr
= objlayout_alloc_layout_hdr
,
1021 .free_layout_hdr
= objlayout_free_layout_hdr
,
1023 .alloc_lseg
= objlayout_alloc_lseg
,
1024 .free_lseg
= objlayout_free_lseg
,
1026 .read_pagelist
= objlayout_read_pagelist
,
1027 .write_pagelist
= objlayout_write_pagelist
,
1028 .pg_read_ops
= &objio_pg_read_ops
,
1029 .pg_write_ops
= &objio_pg_write_ops
,
1031 .free_deviceid_node
= objio_free_deviceid_node
,
1033 .encode_layoutcommit
= objlayout_encode_layoutcommit
,
1034 .encode_layoutreturn
= objlayout_encode_layoutreturn
,
1037 MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects");
1038 MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>");
1039 MODULE_LICENSE("GPL");
1042 objlayout_init(void)
1044 int ret
= pnfs_register_layoutdriver(&objlayout_type
);
1048 "%s: Registering OSD pNFS Layout Driver failed: error=%d\n",
1051 printk(KERN_INFO
"%s: Registered OSD pNFS Layout Driver\n",
1057 objlayout_exit(void)
1059 pnfs_unregister_layoutdriver(&objlayout_type
);
1060 printk(KERN_INFO
"%s: Unregistered OSD pNFS Layout Driver\n",
1064 MODULE_ALIAS("nfs-layouttype4-2");
1066 module_init(objlayout_init
);
1067 module_exit(objlayout_exit
);