1 // SPDX-License-Identifier: GPL-2.0
3 * NVMe Over Fabrics Target File I/O commands implementation.
4 * Copyright (c) 2017-2018 Western Digital Corporation or its
7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9 #include <linux/falloc.h>
10 #include <linux/file.h>
13 #define NVMET_MAX_MPOOL_BVEC 16
14 #define NVMET_MIN_MPOOL_OBJ 16
16 int nvmet_file_ns_revalidate(struct nvmet_ns
*ns
)
21 ret
= vfs_getattr(&ns
->file
->f_path
, &stat
, STATX_SIZE
,
28 void nvmet_file_ns_disable(struct nvmet_ns
*ns
)
32 flush_workqueue(buffered_io_wq
);
33 mempool_destroy(ns
->bvec_pool
);
35 kmem_cache_destroy(ns
->bvec_cache
);
36 ns
->bvec_cache
= NULL
;
42 int nvmet_file_ns_enable(struct nvmet_ns
*ns
)
44 int flags
= O_RDWR
| O_LARGEFILE
;
50 ns
->file
= filp_open(ns
->device_path
, flags
, 0);
51 if (IS_ERR(ns
->file
)) {
52 pr_err("failed to open file %s: (%ld)\n",
53 ns
->device_path
, PTR_ERR(ns
->file
));
54 return PTR_ERR(ns
->file
);
57 ret
= nvmet_file_ns_revalidate(ns
);
62 * i_blkbits can be greater than the universally accepted upper bound,
63 * so make sure we export a sane namespace lba_shift.
65 ns
->blksize_shift
= min_t(u8
,
66 file_inode(ns
->file
)->i_blkbits
, 12);
68 ns
->bvec_cache
= kmem_cache_create("nvmet-bvec",
69 NVMET_MAX_MPOOL_BVEC
* sizeof(struct bio_vec
),
70 0, SLAB_HWCACHE_ALIGN
, NULL
);
71 if (!ns
->bvec_cache
) {
76 ns
->bvec_pool
= mempool_create(NVMET_MIN_MPOOL_OBJ
, mempool_alloc_slab
,
77 mempool_free_slab
, ns
->bvec_cache
);
87 ns
->blksize_shift
= 0;
88 nvmet_file_ns_disable(ns
);
92 static void nvmet_file_init_bvec(struct bio_vec
*bv
, struct scatterlist
*sg
)
94 bv
->bv_page
= sg_page(sg
);
95 bv
->bv_offset
= sg
->offset
;
96 bv
->bv_len
= sg
->length
;
99 static ssize_t
nvmet_file_submit_bvec(struct nvmet_req
*req
, loff_t pos
,
100 unsigned long nr_segs
, size_t count
, int ki_flags
)
102 struct kiocb
*iocb
= &req
->f
.iocb
;
103 ssize_t (*call_iter
)(struct kiocb
*iocb
, struct iov_iter
*iter
);
104 struct iov_iter iter
;
107 if (req
->cmd
->rw
.opcode
== nvme_cmd_write
) {
108 if (req
->cmd
->rw
.control
& cpu_to_le16(NVME_RW_FUA
))
109 ki_flags
|= IOCB_DSYNC
;
110 call_iter
= req
->ns
->file
->f_op
->write_iter
;
113 call_iter
= req
->ns
->file
->f_op
->read_iter
;
117 iov_iter_bvec(&iter
, rw
, req
->f
.bvec
, nr_segs
, count
);
120 iocb
->ki_filp
= req
->ns
->file
;
121 iocb
->ki_flags
= ki_flags
| iocb_flags(req
->ns
->file
);
123 return call_iter(iocb
, &iter
);
126 static void nvmet_file_io_done(struct kiocb
*iocb
, long ret
, long ret2
)
128 struct nvmet_req
*req
= container_of(iocb
, struct nvmet_req
, f
.iocb
);
129 u16 status
= NVME_SC_SUCCESS
;
131 if (req
->f
.bvec
!= req
->inline_bvec
) {
132 if (likely(req
->f
.mpool_alloc
== false))
135 mempool_free(req
->f
.bvec
, req
->ns
->bvec_pool
);
138 if (unlikely(ret
!= req
->transfer_len
))
139 status
= errno_to_nvme_status(req
, ret
);
140 nvmet_req_complete(req
, status
);
143 static bool nvmet_file_execute_io(struct nvmet_req
*req
, int ki_flags
)
145 ssize_t nr_bvec
= req
->sg_cnt
;
146 unsigned long bv_cnt
= 0;
147 bool is_sync
= false;
148 size_t len
= 0, total_len
= 0;
152 struct scatterlist
*sg
;
154 if (req
->f
.mpool_alloc
&& nr_bvec
> NVMET_MAX_MPOOL_BVEC
)
157 pos
= le64_to_cpu(req
->cmd
->rw
.slba
) << req
->ns
->blksize_shift
;
158 if (unlikely(pos
+ req
->transfer_len
> req
->ns
->size
)) {
159 nvmet_req_complete(req
, errno_to_nvme_status(req
, -ENOSPC
));
163 memset(&req
->f
.iocb
, 0, sizeof(struct kiocb
));
164 for_each_sg(req
->sg
, sg
, req
->sg_cnt
, i
) {
165 nvmet_file_init_bvec(&req
->f
.bvec
[bv_cnt
], sg
);
166 len
+= req
->f
.bvec
[bv_cnt
].bv_len
;
167 total_len
+= req
->f
.bvec
[bv_cnt
].bv_len
;
170 WARN_ON_ONCE((nr_bvec
- 1) < 0);
172 if (unlikely(is_sync
) &&
173 (nr_bvec
- 1 == 0 || bv_cnt
== NVMET_MAX_MPOOL_BVEC
)) {
174 ret
= nvmet_file_submit_bvec(req
, pos
, bv_cnt
, len
, 0);
185 if (WARN_ON_ONCE(total_len
!= req
->transfer_len
)) {
190 if (unlikely(is_sync
)) {
196 * A NULL ki_complete ask for synchronous execution, which we want
197 * for the IOCB_NOWAIT case.
199 if (!(ki_flags
& IOCB_NOWAIT
))
200 req
->f
.iocb
.ki_complete
= nvmet_file_io_done
;
202 ret
= nvmet_file_submit_bvec(req
, pos
, bv_cnt
, total_len
, ki_flags
);
208 if (WARN_ON_ONCE(!(ki_flags
& IOCB_NOWAIT
)))
213 * For file systems returning error -EOPNOTSUPP, handle
214 * IOCB_NOWAIT error case separately and retry without
217 if ((ki_flags
& IOCB_NOWAIT
))
223 nvmet_file_io_done(&req
->f
.iocb
, ret
, 0);
227 static void nvmet_file_buffered_io_work(struct work_struct
*w
)
229 struct nvmet_req
*req
= container_of(w
, struct nvmet_req
, f
.work
);
231 nvmet_file_execute_io(req
, 0);
234 static void nvmet_file_submit_buffered_io(struct nvmet_req
*req
)
236 INIT_WORK(&req
->f
.work
, nvmet_file_buffered_io_work
);
237 queue_work(buffered_io_wq
, &req
->f
.work
);
240 static void nvmet_file_execute_rw(struct nvmet_req
*req
)
242 ssize_t nr_bvec
= req
->sg_cnt
;
244 if (!nvmet_check_transfer_len(req
, nvmet_rw_data_len(req
)))
247 if (!req
->sg_cnt
|| !nr_bvec
) {
248 nvmet_req_complete(req
, 0);
252 if (nr_bvec
> NVMET_MAX_INLINE_BIOVEC
)
253 req
->f
.bvec
= kmalloc_array(nr_bvec
, sizeof(struct bio_vec
),
256 req
->f
.bvec
= req
->inline_bvec
;
258 if (unlikely(!req
->f
.bvec
)) {
259 /* fallback under memory pressure */
260 req
->f
.bvec
= mempool_alloc(req
->ns
->bvec_pool
, GFP_KERNEL
);
261 req
->f
.mpool_alloc
= true;
263 req
->f
.mpool_alloc
= false;
265 if (req
->ns
->buffered_io
) {
266 if (likely(!req
->f
.mpool_alloc
) &&
267 nvmet_file_execute_io(req
, IOCB_NOWAIT
))
269 nvmet_file_submit_buffered_io(req
);
271 nvmet_file_execute_io(req
, 0);
274 u16
nvmet_file_flush(struct nvmet_req
*req
)
276 return errno_to_nvme_status(req
, vfs_fsync(req
->ns
->file
, 1));
279 static void nvmet_file_flush_work(struct work_struct
*w
)
281 struct nvmet_req
*req
= container_of(w
, struct nvmet_req
, f
.work
);
283 nvmet_req_complete(req
, nvmet_file_flush(req
));
286 static void nvmet_file_execute_flush(struct nvmet_req
*req
)
288 if (!nvmet_check_transfer_len(req
, 0))
290 INIT_WORK(&req
->f
.work
, nvmet_file_flush_work
);
291 schedule_work(&req
->f
.work
);
294 static void nvmet_file_execute_discard(struct nvmet_req
*req
)
296 int mode
= FALLOC_FL_PUNCH_HOLE
| FALLOC_FL_KEEP_SIZE
;
297 struct nvme_dsm_range range
;
303 for (i
= 0; i
<= le32_to_cpu(req
->cmd
->dsm
.nr
); i
++) {
304 status
= nvmet_copy_from_sgl(req
, i
* sizeof(range
), &range
,
309 offset
= le64_to_cpu(range
.slba
) << req
->ns
->blksize_shift
;
310 len
= le32_to_cpu(range
.nlb
);
311 len
<<= req
->ns
->blksize_shift
;
312 if (offset
+ len
> req
->ns
->size
) {
313 req
->error_slba
= le64_to_cpu(range
.slba
);
314 status
= errno_to_nvme_status(req
, -ENOSPC
);
318 ret
= vfs_fallocate(req
->ns
->file
, mode
, offset
, len
);
319 if (ret
&& ret
!= -EOPNOTSUPP
) {
320 req
->error_slba
= le64_to_cpu(range
.slba
);
321 status
= errno_to_nvme_status(req
, ret
);
326 nvmet_req_complete(req
, status
);
329 static void nvmet_file_dsm_work(struct work_struct
*w
)
331 struct nvmet_req
*req
= container_of(w
, struct nvmet_req
, f
.work
);
333 switch (le32_to_cpu(req
->cmd
->dsm
.attributes
)) {
335 nvmet_file_execute_discard(req
);
337 case NVME_DSMGMT_IDR
:
338 case NVME_DSMGMT_IDW
:
340 /* Not supported yet */
341 nvmet_req_complete(req
, 0);
346 static void nvmet_file_execute_dsm(struct nvmet_req
*req
)
348 if (!nvmet_check_data_len_lte(req
, nvmet_dsm_len(req
)))
350 INIT_WORK(&req
->f
.work
, nvmet_file_dsm_work
);
351 schedule_work(&req
->f
.work
);
354 static void nvmet_file_write_zeroes_work(struct work_struct
*w
)
356 struct nvmet_req
*req
= container_of(w
, struct nvmet_req
, f
.work
);
357 struct nvme_write_zeroes_cmd
*write_zeroes
= &req
->cmd
->write_zeroes
;
358 int mode
= FALLOC_FL_ZERO_RANGE
| FALLOC_FL_KEEP_SIZE
;
363 offset
= le64_to_cpu(write_zeroes
->slba
) << req
->ns
->blksize_shift
;
364 len
= (((sector_t
)le16_to_cpu(write_zeroes
->length
) + 1) <<
365 req
->ns
->blksize_shift
);
367 if (unlikely(offset
+ len
> req
->ns
->size
)) {
368 nvmet_req_complete(req
, errno_to_nvme_status(req
, -ENOSPC
));
372 ret
= vfs_fallocate(req
->ns
->file
, mode
, offset
, len
);
373 nvmet_req_complete(req
, ret
< 0 ? errno_to_nvme_status(req
, ret
) : 0);
376 static void nvmet_file_execute_write_zeroes(struct nvmet_req
*req
)
378 if (!nvmet_check_transfer_len(req
, 0))
380 INIT_WORK(&req
->f
.work
, nvmet_file_write_zeroes_work
);
381 schedule_work(&req
->f
.work
);
384 u16
nvmet_file_parse_io_cmd(struct nvmet_req
*req
)
386 struct nvme_command
*cmd
= req
->cmd
;
388 switch (cmd
->common
.opcode
) {
391 req
->execute
= nvmet_file_execute_rw
;
394 req
->execute
= nvmet_file_execute_flush
;
397 req
->execute
= nvmet_file_execute_dsm
;
399 case nvme_cmd_write_zeroes
:
400 req
->execute
= nvmet_file_execute_write_zeroes
;
403 pr_err("unhandled cmd for file ns %d on qid %d\n",
404 cmd
->common
.opcode
, req
->sq
->qid
);
405 req
->error_loc
= offsetof(struct nvme_common_command
, opcode
);
406 return NVME_SC_INVALID_OPCODE
| NVME_SC_DNR
;