1 // SPDX-License-Identifier: GPL-2.0
3 * NVMe admin command implementation.
4 * Copyright (c) 2015-2016 HGST, a Western Digital Company.
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 #include <linux/module.h>
8 #include <linux/rculist.h>
9 #include <linux/part_stat.h>
11 #include <generated/utsrelease.h>
12 #include <linux/unaligned.h>
15 u32
nvmet_get_log_page_len(struct nvme_command
*cmd
)
17 u32 len
= le16_to_cpu(cmd
->get_log_page
.numdu
);
20 len
+= le16_to_cpu(cmd
->get_log_page
.numdl
);
21 /* NUMD is a 0's based value */
28 static u32
nvmet_feat_data_len(struct nvmet_req
*req
, u32 cdw10
)
30 switch (cdw10
& 0xff) {
31 case NVME_FEAT_HOST_ID
:
32 return sizeof(req
->sq
->ctrl
->hostid
);
38 u64
nvmet_get_log_page_offset(struct nvme_command
*cmd
)
40 return le64_to_cpu(cmd
->get_log_page
.lpo
);
43 static void nvmet_execute_get_log_page_noop(struct nvmet_req
*req
)
45 nvmet_req_complete(req
, nvmet_zero_sgl(req
, 0, req
->transfer_len
));
48 static void nvmet_execute_get_log_page_error(struct nvmet_req
*req
)
50 struct nvmet_ctrl
*ctrl
= req
->sq
->ctrl
;
56 spin_lock_irqsave(&ctrl
->error_lock
, flags
);
57 slot
= ctrl
->err_counter
% NVMET_ERROR_LOG_SLOTS
;
59 for (i
= 0; i
< NVMET_ERROR_LOG_SLOTS
; i
++) {
60 if (nvmet_copy_to_sgl(req
, offset
, &ctrl
->slots
[slot
],
61 sizeof(struct nvme_error_slot
)))
65 slot
= NVMET_ERROR_LOG_SLOTS
- 1;
68 offset
+= sizeof(struct nvme_error_slot
);
70 spin_unlock_irqrestore(&ctrl
->error_lock
, flags
);
71 nvmet_req_complete(req
, 0);
74 static void nvmet_execute_get_supported_log_pages(struct nvmet_req
*req
)
76 struct nvme_supported_log
*logs
;
79 logs
= kzalloc(sizeof(*logs
), GFP_KERNEL
);
81 status
= NVME_SC_INTERNAL
;
85 logs
->lids
[NVME_LOG_SUPPORTED
] = cpu_to_le32(NVME_LIDS_LSUPP
);
86 logs
->lids
[NVME_LOG_ERROR
] = cpu_to_le32(NVME_LIDS_LSUPP
);
87 logs
->lids
[NVME_LOG_SMART
] = cpu_to_le32(NVME_LIDS_LSUPP
);
88 logs
->lids
[NVME_LOG_FW_SLOT
] = cpu_to_le32(NVME_LIDS_LSUPP
);
89 logs
->lids
[NVME_LOG_CHANGED_NS
] = cpu_to_le32(NVME_LIDS_LSUPP
);
90 logs
->lids
[NVME_LOG_CMD_EFFECTS
] = cpu_to_le32(NVME_LIDS_LSUPP
);
91 logs
->lids
[NVME_LOG_ENDURANCE_GROUP
] = cpu_to_le32(NVME_LIDS_LSUPP
);
92 logs
->lids
[NVME_LOG_ANA
] = cpu_to_le32(NVME_LIDS_LSUPP
);
93 logs
->lids
[NVME_LOG_FEATURES
] = cpu_to_le32(NVME_LIDS_LSUPP
);
94 logs
->lids
[NVME_LOG_RMI
] = cpu_to_le32(NVME_LIDS_LSUPP
);
95 logs
->lids
[NVME_LOG_RESERVATION
] = cpu_to_le32(NVME_LIDS_LSUPP
);
97 status
= nvmet_copy_to_sgl(req
, 0, logs
, sizeof(*logs
));
100 nvmet_req_complete(req
, status
);
103 static u16
nvmet_get_smart_log_nsid(struct nvmet_req
*req
,
104 struct nvme_smart_log
*slog
)
106 u64 host_reads
, host_writes
, data_units_read
, data_units_written
;
109 status
= nvmet_req_find_ns(req
);
113 /* we don't have the right data for file backed ns */
115 return NVME_SC_SUCCESS
;
117 host_reads
= part_stat_read(req
->ns
->bdev
, ios
[READ
]);
119 DIV_ROUND_UP(part_stat_read(req
->ns
->bdev
, sectors
[READ
]), 1000);
120 host_writes
= part_stat_read(req
->ns
->bdev
, ios
[WRITE
]);
122 DIV_ROUND_UP(part_stat_read(req
->ns
->bdev
, sectors
[WRITE
]), 1000);
124 put_unaligned_le64(host_reads
, &slog
->host_reads
[0]);
125 put_unaligned_le64(data_units_read
, &slog
->data_units_read
[0]);
126 put_unaligned_le64(host_writes
, &slog
->host_writes
[0]);
127 put_unaligned_le64(data_units_written
, &slog
->data_units_written
[0]);
129 return NVME_SC_SUCCESS
;
132 static u16
nvmet_get_smart_log_all(struct nvmet_req
*req
,
133 struct nvme_smart_log
*slog
)
135 u64 host_reads
= 0, host_writes
= 0;
136 u64 data_units_read
= 0, data_units_written
= 0;
138 struct nvmet_ctrl
*ctrl
;
141 ctrl
= req
->sq
->ctrl
;
142 xa_for_each(&ctrl
->subsys
->namespaces
, idx
, ns
) {
143 /* we don't have the right data for file backed ns */
146 host_reads
+= part_stat_read(ns
->bdev
, ios
[READ
]);
147 data_units_read
+= DIV_ROUND_UP(
148 part_stat_read(ns
->bdev
, sectors
[READ
]), 1000);
149 host_writes
+= part_stat_read(ns
->bdev
, ios
[WRITE
]);
150 data_units_written
+= DIV_ROUND_UP(
151 part_stat_read(ns
->bdev
, sectors
[WRITE
]), 1000);
154 put_unaligned_le64(host_reads
, &slog
->host_reads
[0]);
155 put_unaligned_le64(data_units_read
, &slog
->data_units_read
[0]);
156 put_unaligned_le64(host_writes
, &slog
->host_writes
[0]);
157 put_unaligned_le64(data_units_written
, &slog
->data_units_written
[0]);
159 return NVME_SC_SUCCESS
;
162 static void nvmet_execute_get_log_page_rmi(struct nvmet_req
*req
)
164 struct nvme_rotational_media_log
*log
;
165 struct gendisk
*disk
;
168 req
->cmd
->common
.nsid
= cpu_to_le32(le16_to_cpu(
169 req
->cmd
->get_log_page
.lsi
));
170 status
= nvmet_req_find_ns(req
);
174 if (!req
->ns
->bdev
|| bdev_nonrot(req
->ns
->bdev
)) {
175 status
= NVME_SC_INVALID_FIELD
| NVME_STATUS_DNR
;
179 if (req
->transfer_len
!= sizeof(*log
)) {
180 status
= NVME_SC_SGL_INVALID_DATA
| NVME_STATUS_DNR
;
184 log
= kzalloc(sizeof(*log
), GFP_KERNEL
);
188 log
->endgid
= req
->cmd
->get_log_page
.lsi
;
189 disk
= req
->ns
->bdev
->bd_disk
;
190 if (disk
&& disk
->ia_ranges
)
191 log
->numa
= cpu_to_le16(disk
->ia_ranges
->nr_ia_ranges
);
193 log
->numa
= cpu_to_le16(1);
195 status
= nvmet_copy_to_sgl(req
, 0, log
, sizeof(*log
));
198 nvmet_req_complete(req
, status
);
201 static void nvmet_execute_get_log_page_smart(struct nvmet_req
*req
)
203 struct nvme_smart_log
*log
;
204 u16 status
= NVME_SC_INTERNAL
;
207 if (req
->transfer_len
!= sizeof(*log
))
210 log
= kzalloc(sizeof(*log
), GFP_KERNEL
);
214 if (req
->cmd
->get_log_page
.nsid
== cpu_to_le32(NVME_NSID_ALL
))
215 status
= nvmet_get_smart_log_all(req
, log
);
217 status
= nvmet_get_smart_log_nsid(req
, log
);
221 spin_lock_irqsave(&req
->sq
->ctrl
->error_lock
, flags
);
222 put_unaligned_le64(req
->sq
->ctrl
->err_counter
,
223 &log
->num_err_log_entries
);
224 spin_unlock_irqrestore(&req
->sq
->ctrl
->error_lock
, flags
);
226 status
= nvmet_copy_to_sgl(req
, 0, log
, sizeof(*log
));
230 nvmet_req_complete(req
, status
);
233 static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log
*log
)
235 log
->acs
[nvme_admin_get_log_page
] =
236 log
->acs
[nvme_admin_identify
] =
237 log
->acs
[nvme_admin_abort_cmd
] =
238 log
->acs
[nvme_admin_set_features
] =
239 log
->acs
[nvme_admin_get_features
] =
240 log
->acs
[nvme_admin_async_event
] =
241 log
->acs
[nvme_admin_keep_alive
] =
242 cpu_to_le32(NVME_CMD_EFFECTS_CSUPP
);
244 log
->iocs
[nvme_cmd_read
] =
245 log
->iocs
[nvme_cmd_flush
] =
246 log
->iocs
[nvme_cmd_dsm
] =
247 log
->iocs
[nvme_cmd_resv_acquire
] =
248 log
->iocs
[nvme_cmd_resv_register
] =
249 log
->iocs
[nvme_cmd_resv_release
] =
250 log
->iocs
[nvme_cmd_resv_report
] =
251 cpu_to_le32(NVME_CMD_EFFECTS_CSUPP
);
252 log
->iocs
[nvme_cmd_write
] =
253 log
->iocs
[nvme_cmd_write_zeroes
] =
254 cpu_to_le32(NVME_CMD_EFFECTS_CSUPP
| NVME_CMD_EFFECTS_LBCC
);
257 static void nvmet_get_cmd_effects_zns(struct nvme_effects_log
*log
)
259 log
->iocs
[nvme_cmd_zone_append
] =
260 log
->iocs
[nvme_cmd_zone_mgmt_send
] =
261 cpu_to_le32(NVME_CMD_EFFECTS_CSUPP
| NVME_CMD_EFFECTS_LBCC
);
262 log
->iocs
[nvme_cmd_zone_mgmt_recv
] =
263 cpu_to_le32(NVME_CMD_EFFECTS_CSUPP
);
266 static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req
*req
)
268 struct nvme_effects_log
*log
;
269 u16 status
= NVME_SC_SUCCESS
;
271 log
= kzalloc(sizeof(*log
), GFP_KERNEL
);
273 status
= NVME_SC_INTERNAL
;
277 switch (req
->cmd
->get_log_page
.csi
) {
279 nvmet_get_cmd_effects_nvm(log
);
282 if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED
)) {
283 status
= NVME_SC_INVALID_IO_CMD_SET
;
286 nvmet_get_cmd_effects_nvm(log
);
287 nvmet_get_cmd_effects_zns(log
);
290 status
= NVME_SC_INVALID_LOG_PAGE
;
294 status
= nvmet_copy_to_sgl(req
, 0, log
, sizeof(*log
));
298 nvmet_req_complete(req
, status
);
301 static void nvmet_execute_get_log_changed_ns(struct nvmet_req
*req
)
303 struct nvmet_ctrl
*ctrl
= req
->sq
->ctrl
;
304 u16 status
= NVME_SC_INTERNAL
;
307 if (req
->transfer_len
!= NVME_MAX_CHANGED_NAMESPACES
* sizeof(__le32
))
310 mutex_lock(&ctrl
->lock
);
311 if (ctrl
->nr_changed_ns
== U32_MAX
)
312 len
= sizeof(__le32
);
314 len
= ctrl
->nr_changed_ns
* sizeof(__le32
);
315 status
= nvmet_copy_to_sgl(req
, 0, ctrl
->changed_ns_list
, len
);
317 status
= nvmet_zero_sgl(req
, len
, req
->transfer_len
- len
);
318 ctrl
->nr_changed_ns
= 0;
319 nvmet_clear_aen_bit(req
, NVME_AEN_BIT_NS_ATTR
);
320 mutex_unlock(&ctrl
->lock
);
322 nvmet_req_complete(req
, status
);
325 static u32
nvmet_format_ana_group(struct nvmet_req
*req
, u32 grpid
,
326 struct nvme_ana_group_desc
*desc
)
328 struct nvmet_ctrl
*ctrl
= req
->sq
->ctrl
;
333 if (!(req
->cmd
->get_log_page
.lsp
& NVME_ANA_LOG_RGO
)) {
334 xa_for_each(&ctrl
->subsys
->namespaces
, idx
, ns
)
335 if (ns
->anagrpid
== grpid
)
336 desc
->nsids
[count
++] = cpu_to_le32(ns
->nsid
);
339 desc
->grpid
= cpu_to_le32(grpid
);
340 desc
->nnsids
= cpu_to_le32(count
);
341 desc
->chgcnt
= cpu_to_le64(nvmet_ana_chgcnt
);
342 desc
->state
= req
->port
->ana_state
[grpid
];
343 memset(desc
->rsvd17
, 0, sizeof(desc
->rsvd17
));
344 return struct_size(desc
, nsids
, count
);
347 static void nvmet_execute_get_log_page_endgrp(struct nvmet_req
*req
)
349 u64 host_reads
, host_writes
, data_units_read
, data_units_written
;
350 struct nvme_endurance_group_log
*log
;
354 * The target driver emulates each endurance group as its own
355 * namespace, reusing the nsid as the endurance group identifier.
357 req
->cmd
->common
.nsid
= cpu_to_le32(le16_to_cpu(
358 req
->cmd
->get_log_page
.lsi
));
359 status
= nvmet_req_find_ns(req
);
363 log
= kzalloc(sizeof(*log
), GFP_KERNEL
);
365 status
= NVME_SC_INTERNAL
;
372 host_reads
= part_stat_read(req
->ns
->bdev
, ios
[READ
]);
374 DIV_ROUND_UP(part_stat_read(req
->ns
->bdev
, sectors
[READ
]), 1000);
375 host_writes
= part_stat_read(req
->ns
->bdev
, ios
[WRITE
]);
377 DIV_ROUND_UP(part_stat_read(req
->ns
->bdev
, sectors
[WRITE
]), 1000);
379 put_unaligned_le64(host_reads
, &log
->hrc
[0]);
380 put_unaligned_le64(data_units_read
, &log
->dur
[0]);
381 put_unaligned_le64(host_writes
, &log
->hwc
[0]);
382 put_unaligned_le64(data_units_written
, &log
->duw
[0]);
384 status
= nvmet_copy_to_sgl(req
, 0, log
, sizeof(*log
));
387 nvmet_req_complete(req
, status
);
390 static void nvmet_execute_get_log_page_ana(struct nvmet_req
*req
)
392 struct nvme_ana_rsp_hdr hdr
= { 0, };
393 struct nvme_ana_group_desc
*desc
;
394 size_t offset
= sizeof(struct nvme_ana_rsp_hdr
); /* start beyond hdr */
400 status
= NVME_SC_INTERNAL
;
401 desc
= kmalloc(struct_size(desc
, nsids
, NVMET_MAX_NAMESPACES
),
406 down_read(&nvmet_ana_sem
);
407 for (grpid
= 1; grpid
<= NVMET_MAX_ANAGRPS
; grpid
++) {
408 if (!nvmet_ana_group_enabled
[grpid
])
410 len
= nvmet_format_ana_group(req
, grpid
, desc
);
411 status
= nvmet_copy_to_sgl(req
, offset
, desc
, len
);
417 for ( ; grpid
<= NVMET_MAX_ANAGRPS
; grpid
++) {
418 if (nvmet_ana_group_enabled
[grpid
])
422 hdr
.chgcnt
= cpu_to_le64(nvmet_ana_chgcnt
);
423 hdr
.ngrps
= cpu_to_le16(ngrps
);
424 nvmet_clear_aen_bit(req
, NVME_AEN_BIT_ANA_CHANGE
);
425 up_read(&nvmet_ana_sem
);
429 /* copy the header last once we know the number of groups */
430 status
= nvmet_copy_to_sgl(req
, 0, &hdr
, sizeof(hdr
));
432 nvmet_req_complete(req
, status
);
435 static void nvmet_execute_get_log_page_features(struct nvmet_req
*req
)
437 struct nvme_supported_features_log
*features
;
440 features
= kzalloc(sizeof(*features
), GFP_KERNEL
);
442 status
= NVME_SC_INTERNAL
;
446 features
->fis
[NVME_FEAT_NUM_QUEUES
] =
447 cpu_to_le32(NVME_FIS_FSUPP
| NVME_FIS_CSCPE
);
448 features
->fis
[NVME_FEAT_KATO
] =
449 cpu_to_le32(NVME_FIS_FSUPP
| NVME_FIS_CSCPE
);
450 features
->fis
[NVME_FEAT_ASYNC_EVENT
] =
451 cpu_to_le32(NVME_FIS_FSUPP
| NVME_FIS_CSCPE
);
452 features
->fis
[NVME_FEAT_HOST_ID
] =
453 cpu_to_le32(NVME_FIS_FSUPP
| NVME_FIS_CSCPE
);
454 features
->fis
[NVME_FEAT_WRITE_PROTECT
] =
455 cpu_to_le32(NVME_FIS_FSUPP
| NVME_FIS_NSCPE
);
456 features
->fis
[NVME_FEAT_RESV_MASK
] =
457 cpu_to_le32(NVME_FIS_FSUPP
| NVME_FIS_NSCPE
);
459 status
= nvmet_copy_to_sgl(req
, 0, features
, sizeof(*features
));
462 nvmet_req_complete(req
, status
);
465 static void nvmet_execute_get_log_page(struct nvmet_req
*req
)
467 if (!nvmet_check_transfer_len(req
, nvmet_get_log_page_len(req
->cmd
)))
470 switch (req
->cmd
->get_log_page
.lid
) {
471 case NVME_LOG_SUPPORTED
:
472 return nvmet_execute_get_supported_log_pages(req
);
474 return nvmet_execute_get_log_page_error(req
);
476 return nvmet_execute_get_log_page_smart(req
);
477 case NVME_LOG_FW_SLOT
:
479 * We only support a single firmware slot which always is
480 * active, so we can zero out the whole firmware slot log and
481 * still claim to fully implement this mandatory log page.
483 return nvmet_execute_get_log_page_noop(req
);
484 case NVME_LOG_CHANGED_NS
:
485 return nvmet_execute_get_log_changed_ns(req
);
486 case NVME_LOG_CMD_EFFECTS
:
487 return nvmet_execute_get_log_cmd_effects_ns(req
);
488 case NVME_LOG_ENDURANCE_GROUP
:
489 return nvmet_execute_get_log_page_endgrp(req
);
491 return nvmet_execute_get_log_page_ana(req
);
492 case NVME_LOG_FEATURES
:
493 return nvmet_execute_get_log_page_features(req
);
495 return nvmet_execute_get_log_page_rmi(req
);
496 case NVME_LOG_RESERVATION
:
497 return nvmet_execute_get_log_page_resv(req
);
499 pr_debug("unhandled lid %d on qid %d\n",
500 req
->cmd
->get_log_page
.lid
, req
->sq
->qid
);
501 req
->error_loc
= offsetof(struct nvme_get_log_page_command
, lid
);
502 nvmet_req_complete(req
, NVME_SC_INVALID_FIELD
| NVME_STATUS_DNR
);
505 static void nvmet_execute_identify_ctrl(struct nvmet_req
*req
)
507 struct nvmet_ctrl
*ctrl
= req
->sq
->ctrl
;
508 struct nvmet_subsys
*subsys
= ctrl
->subsys
;
509 struct nvme_id_ctrl
*id
;
510 u32 cmd_capsule_size
;
513 if (!subsys
->subsys_discovered
) {
514 mutex_lock(&subsys
->lock
);
515 subsys
->subsys_discovered
= true;
516 mutex_unlock(&subsys
->lock
);
519 id
= kzalloc(sizeof(*id
), GFP_KERNEL
);
521 status
= NVME_SC_INTERNAL
;
525 /* XXX: figure out how to assign real vendors IDs. */
529 memcpy(id
->sn
, ctrl
->subsys
->serial
, NVMET_SN_MAX_SIZE
);
530 memcpy_and_pad(id
->mn
, sizeof(id
->mn
), subsys
->model_number
,
531 strlen(subsys
->model_number
), ' ');
532 memcpy_and_pad(id
->fr
, sizeof(id
->fr
),
533 subsys
->firmware_rev
, strlen(subsys
->firmware_rev
), ' ');
535 put_unaligned_le24(subsys
->ieee_oui
, id
->ieee
);
539 if (nvmet_is_disc_subsys(ctrl
->subsys
))
540 id
->cntrltype
= NVME_CTRL_DISC
;
542 id
->cntrltype
= NVME_CTRL_IO
;
544 /* we support multiple ports, multiples hosts and ANA: */
545 id
->cmic
= NVME_CTRL_CMIC_MULTI_PORT
| NVME_CTRL_CMIC_MULTI_CTRL
|
548 /* Limit MDTS according to transport capability */
549 if (ctrl
->ops
->get_mdts
)
550 id
->mdts
= ctrl
->ops
->get_mdts(ctrl
);
554 id
->cntlid
= cpu_to_le16(ctrl
->cntlid
);
555 id
->ver
= cpu_to_le32(ctrl
->subsys
->ver
);
557 /* XXX: figure out what to do about RTD3R/RTD3 */
558 id
->oaes
= cpu_to_le32(NVMET_AEN_CFG_OPTIONAL
);
559 id
->ctratt
= cpu_to_le32(NVME_CTRL_ATTR_HID_128_BIT
|
560 NVME_CTRL_ATTR_TBKAS
);
565 * We don't really have a practical limit on the number of abort
566 * comands. But we don't do anything useful for abort either, so
567 * no point in allowing more abort commands than the spec requires.
571 id
->aerl
= NVMET_ASYNC_EVENTS
- 1;
573 /* first slot is read-only, only one slot supported */
574 id
->frmw
= (1 << 0) | (1 << 1);
575 id
->lpa
= (1 << 0) | (1 << 1) | (1 << 2);
576 id
->elpe
= NVMET_ERROR_LOG_SLOTS
- 1;
579 /* We support keep-alive timeout in granularity of seconds */
580 id
->kas
= cpu_to_le16(NVMET_KAS
);
582 id
->sqes
= (0x6 << 4) | 0x6;
583 id
->cqes
= (0x4 << 4) | 0x4;
585 /* no enforcement soft-limit for maxcmd - pick arbitrary high value */
586 id
->maxcmd
= cpu_to_le16(NVMET_MAX_CMD(ctrl
));
588 id
->nn
= cpu_to_le32(NVMET_MAX_NAMESPACES
);
589 id
->mnan
= cpu_to_le32(NVMET_MAX_NAMESPACES
);
590 id
->oncs
= cpu_to_le16(NVME_CTRL_ONCS_DSM
|
591 NVME_CTRL_ONCS_WRITE_ZEROES
|
592 NVME_CTRL_ONCS_RESERVATIONS
);
594 /* XXX: don't report vwc if the underlying device is write through */
595 id
->vwc
= NVME_CTRL_VWC_PRESENT
;
598 * We can't support atomic writes bigger than a LBA without support
599 * from the backend device.
604 /* we always support SGLs */
605 id
->sgls
= cpu_to_le32(NVME_CTRL_SGLS_BYTE_ALIGNED
);
606 if (ctrl
->ops
->flags
& NVMF_KEYED_SGLS
)
607 id
->sgls
|= cpu_to_le32(NVME_CTRL_SGLS_KSDBDS
);
608 if (req
->port
->inline_data_size
)
609 id
->sgls
|= cpu_to_le32(NVME_CTRL_SGLS_SAOS
);
611 strscpy(id
->subnqn
, ctrl
->subsys
->subsysnqn
, sizeof(id
->subnqn
));
614 * Max command capsule size is sqe + in-capsule data size.
615 * Disable in-capsule data for Metadata capable controllers.
617 cmd_capsule_size
= sizeof(struct nvme_command
);
618 if (!ctrl
->pi_support
)
619 cmd_capsule_size
+= req
->port
->inline_data_size
;
620 id
->ioccsz
= cpu_to_le32(cmd_capsule_size
/ 16);
622 /* Max response capsule size is cqe */
623 id
->iorcsz
= cpu_to_le32(sizeof(struct nvme_completion
) / 16);
625 id
->msdbd
= ctrl
->ops
->msdbd
;
628 * Endurance group identifier is 16 bits, so we can't let namespaces
629 * overflow that since we reuse the nsid
631 BUILD_BUG_ON(NVMET_MAX_NAMESPACES
> USHRT_MAX
);
632 id
->endgidmax
= cpu_to_le16(NVMET_MAX_NAMESPACES
);
634 id
->anacap
= (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4);
635 id
->anatt
= 10; /* random value */
636 id
->anagrpmax
= cpu_to_le32(NVMET_MAX_ANAGRPS
);
637 id
->nanagrpid
= cpu_to_le32(NVMET_MAX_ANAGRPS
);
640 * Meh, we don't really support any power state. Fake up the same
641 * values that qemu does.
643 id
->psd
[0].max_power
= cpu_to_le16(0x9c4);
644 id
->psd
[0].entry_lat
= cpu_to_le32(0x10);
645 id
->psd
[0].exit_lat
= cpu_to_le32(0x4);
647 id
->nwpc
= 1 << 0; /* write protect and no write protect */
649 status
= nvmet_copy_to_sgl(req
, 0, id
, sizeof(*id
));
653 nvmet_req_complete(req
, status
);
656 static void nvmet_execute_identify_ns(struct nvmet_req
*req
)
658 struct nvme_id_ns
*id
;
661 if (le32_to_cpu(req
->cmd
->identify
.nsid
) == NVME_NSID_ALL
) {
662 req
->error_loc
= offsetof(struct nvme_identify
, nsid
);
663 status
= NVME_SC_INVALID_NS
| NVME_STATUS_DNR
;
667 id
= kzalloc(sizeof(*id
), GFP_KERNEL
);
669 status
= NVME_SC_INTERNAL
;
673 /* return an all zeroed buffer if we can't find an active namespace */
674 status
= nvmet_req_find_ns(req
);
680 if (nvmet_ns_revalidate(req
->ns
)) {
681 mutex_lock(&req
->ns
->subsys
->lock
);
682 nvmet_ns_changed(req
->ns
->subsys
, req
->ns
->nsid
);
683 mutex_unlock(&req
->ns
->subsys
->lock
);
687 * nuse = ncap = nsze isn't always true, but we have no way to find
688 * that out from the underlying device.
690 id
->ncap
= id
->nsze
=
691 cpu_to_le64(req
->ns
->size
>> req
->ns
->blksize_shift
);
692 switch (req
->port
->ana_state
[req
->ns
->anagrpid
]) {
693 case NVME_ANA_INACCESSIBLE
:
694 case NVME_ANA_PERSISTENT_LOSS
:
702 nvmet_bdev_set_limits(req
->ns
->bdev
, id
);
705 * We just provide a single LBA format that matches what the
706 * underlying device reports.
712 * Our namespace might always be shared. Not just with other
713 * controllers, but also with any other user of the block device.
715 id
->nmic
= NVME_NS_NMIC_SHARED
;
716 id
->anagrpid
= cpu_to_le32(req
->ns
->anagrpid
);
718 if (req
->ns
->pr
.enable
)
719 id
->rescap
= NVME_PR_SUPPORT_WRITE_EXCLUSIVE
|
720 NVME_PR_SUPPORT_EXCLUSIVE_ACCESS
|
721 NVME_PR_SUPPORT_WRITE_EXCLUSIVE_REG_ONLY
|
722 NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_REG_ONLY
|
723 NVME_PR_SUPPORT_WRITE_EXCLUSIVE_ALL_REGS
|
724 NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_ALL_REGS
|
725 NVME_PR_SUPPORT_IEKEY_VER_1_3_DEF
;
728 * Since we don't know any better, every namespace is its own endurance
731 id
->endgid
= cpu_to_le16(req
->ns
->nsid
);
733 memcpy(&id
->nguid
, &req
->ns
->nguid
, sizeof(id
->nguid
));
735 id
->lbaf
[0].ds
= req
->ns
->blksize_shift
;
737 if (req
->sq
->ctrl
->pi_support
&& nvmet_ns_has_pi(req
->ns
)) {
738 id
->dpc
= NVME_NS_DPC_PI_FIRST
| NVME_NS_DPC_PI_LAST
|
739 NVME_NS_DPC_PI_TYPE1
| NVME_NS_DPC_PI_TYPE2
|
740 NVME_NS_DPC_PI_TYPE3
;
741 id
->mc
= NVME_MC_EXTENDED_LBA
;
742 id
->dps
= req
->ns
->pi_type
;
743 id
->flbas
= NVME_NS_FLBAS_META_EXT
;
744 id
->lbaf
[0].ms
= cpu_to_le16(req
->ns
->metadata_size
);
747 if (req
->ns
->readonly
)
748 id
->nsattr
|= NVME_NS_ATTR_RO
;
751 status
= nvmet_copy_to_sgl(req
, 0, id
, sizeof(*id
));
755 nvmet_req_complete(req
, status
);
758 static void nvmet_execute_identify_endgrp_list(struct nvmet_req
*req
)
760 u16 min_endgid
= le16_to_cpu(req
->cmd
->identify
.cnssid
);
761 static const int buf_size
= NVME_IDENTIFY_DATA_SIZE
;
762 struct nvmet_ctrl
*ctrl
= req
->sq
->ctrl
;
769 list
= kzalloc(buf_size
, GFP_KERNEL
);
771 status
= NVME_SC_INTERNAL
;
775 xa_for_each(&ctrl
->subsys
->namespaces
, idx
, ns
) {
776 if (ns
->nsid
<= min_endgid
)
779 list
[i
++] = cpu_to_le16(ns
->nsid
);
780 if (i
== buf_size
/ sizeof(__le16
))
784 list
[0] = cpu_to_le16(i
- 1);
785 status
= nvmet_copy_to_sgl(req
, 0, list
, buf_size
);
788 nvmet_req_complete(req
, status
);
791 static void nvmet_execute_identify_nslist(struct nvmet_req
*req
, bool match_css
)
793 static const int buf_size
= NVME_IDENTIFY_DATA_SIZE
;
794 struct nvmet_ctrl
*ctrl
= req
->sq
->ctrl
;
797 u32 min_nsid
= le32_to_cpu(req
->cmd
->identify
.nsid
);
803 * NSID values 0xFFFFFFFE and NVME_NSID_ALL are invalid
804 * See NVMe Base Specification, Active Namespace ID list (CNS 02h).
806 if (min_nsid
== 0xFFFFFFFE || min_nsid
== NVME_NSID_ALL
) {
807 req
->error_loc
= offsetof(struct nvme_identify
, nsid
);
808 status
= NVME_SC_INVALID_NS
| NVME_STATUS_DNR
;
812 list
= kzalloc(buf_size
, GFP_KERNEL
);
814 status
= NVME_SC_INTERNAL
;
818 xa_for_each(&ctrl
->subsys
->namespaces
, idx
, ns
) {
819 if (ns
->nsid
<= min_nsid
)
821 if (match_css
&& req
->ns
->csi
!= req
->cmd
->identify
.csi
)
823 list
[i
++] = cpu_to_le32(ns
->nsid
);
824 if (i
== buf_size
/ sizeof(__le32
))
828 status
= nvmet_copy_to_sgl(req
, 0, list
, buf_size
);
832 nvmet_req_complete(req
, status
);
835 static u16
nvmet_copy_ns_identifier(struct nvmet_req
*req
, u8 type
, u8 len
,
836 void *id
, off_t
*off
)
838 struct nvme_ns_id_desc desc
= {
844 status
= nvmet_copy_to_sgl(req
, *off
, &desc
, sizeof(desc
));
847 *off
+= sizeof(desc
);
849 status
= nvmet_copy_to_sgl(req
, *off
, id
, len
);
857 static void nvmet_execute_identify_desclist(struct nvmet_req
*req
)
862 status
= nvmet_req_find_ns(req
);
866 if (memchr_inv(&req
->ns
->uuid
, 0, sizeof(req
->ns
->uuid
))) {
867 status
= nvmet_copy_ns_identifier(req
, NVME_NIDT_UUID
,
869 &req
->ns
->uuid
, &off
);
873 if (memchr_inv(req
->ns
->nguid
, 0, sizeof(req
->ns
->nguid
))) {
874 status
= nvmet_copy_ns_identifier(req
, NVME_NIDT_NGUID
,
876 &req
->ns
->nguid
, &off
);
881 status
= nvmet_copy_ns_identifier(req
, NVME_NIDT_CSI
,
883 &req
->ns
->csi
, &off
);
887 if (sg_zero_buffer(req
->sg
, req
->sg_cnt
, NVME_IDENTIFY_DATA_SIZE
- off
,
888 off
) != NVME_IDENTIFY_DATA_SIZE
- off
)
889 status
= NVME_SC_INTERNAL
| NVME_STATUS_DNR
;
892 nvmet_req_complete(req
, status
);
895 static void nvmet_execute_identify_ctrl_nvm(struct nvmet_req
*req
)
897 /* Not supported: return zeroes */
898 nvmet_req_complete(req
,
899 nvmet_zero_sgl(req
, 0, sizeof(struct nvme_id_ctrl_nvm
)));
902 static void nvme_execute_identify_ns_nvm(struct nvmet_req
*req
)
905 struct nvme_id_ns_nvm
*id
;
907 status
= nvmet_req_find_ns(req
);
911 id
= kzalloc(sizeof(*id
), GFP_KERNEL
);
913 status
= NVME_SC_INTERNAL
;
916 status
= nvmet_copy_to_sgl(req
, 0, id
, sizeof(*id
));
918 nvmet_req_complete(req
, status
);
921 static void nvmet_execute_id_cs_indep(struct nvmet_req
*req
)
923 struct nvme_id_ns_cs_indep
*id
;
926 status
= nvmet_req_find_ns(req
);
930 id
= kzalloc(sizeof(*id
), GFP_KERNEL
);
932 status
= NVME_SC_INTERNAL
;
936 id
->nstat
= NVME_NSTAT_NRDY
;
937 id
->anagrpid
= cpu_to_le32(req
->ns
->anagrpid
);
938 id
->nmic
= NVME_NS_NMIC_SHARED
;
939 if (req
->ns
->readonly
)
940 id
->nsattr
|= NVME_NS_ATTR_RO
;
941 if (req
->ns
->bdev
&& !bdev_nonrot(req
->ns
->bdev
))
942 id
->nsfeat
|= NVME_NS_ROTATIONAL
;
944 * We need flush command to flush the file's metadata,
945 * so report supporting vwc if backend is file, even
946 * though buffered_io is disable.
948 if (req
->ns
->bdev
&& !bdev_write_cache(req
->ns
->bdev
))
949 id
->nsfeat
|= NVME_NS_VWC_NOT_PRESENT
;
951 status
= nvmet_copy_to_sgl(req
, 0, id
, sizeof(*id
));
954 nvmet_req_complete(req
, status
);
957 static void nvmet_execute_identify(struct nvmet_req
*req
)
959 if (!nvmet_check_transfer_len(req
, NVME_IDENTIFY_DATA_SIZE
))
962 switch (req
->cmd
->identify
.cns
) {
964 nvmet_execute_identify_ns(req
);
966 case NVME_ID_CNS_CTRL
:
967 nvmet_execute_identify_ctrl(req
);
969 case NVME_ID_CNS_NS_ACTIVE_LIST
:
970 nvmet_execute_identify_nslist(req
, false);
972 case NVME_ID_CNS_NS_DESC_LIST
:
973 nvmet_execute_identify_desclist(req
);
975 case NVME_ID_CNS_CS_NS
:
976 switch (req
->cmd
->identify
.csi
) {
978 nvme_execute_identify_ns_nvm(req
);
981 if (IS_ENABLED(CONFIG_BLK_DEV_ZONED
)) {
982 nvmet_execute_identify_ns_zns(req
);
988 case NVME_ID_CNS_CS_CTRL
:
989 switch (req
->cmd
->identify
.csi
) {
991 nvmet_execute_identify_ctrl_nvm(req
);
994 if (IS_ENABLED(CONFIG_BLK_DEV_ZONED
)) {
995 nvmet_execute_identify_ctrl_zns(req
);
1001 case NVME_ID_CNS_NS_ACTIVE_LIST_CS
:
1002 nvmet_execute_identify_nslist(req
, true);
1004 case NVME_ID_CNS_NS_CS_INDEP
:
1005 nvmet_execute_id_cs_indep(req
);
1007 case NVME_ID_CNS_ENDGRP_LIST
:
1008 nvmet_execute_identify_endgrp_list(req
);
1012 pr_debug("unhandled identify cns %d on qid %d\n",
1013 req
->cmd
->identify
.cns
, req
->sq
->qid
);
1014 req
->error_loc
= offsetof(struct nvme_identify
, cns
);
1015 nvmet_req_complete(req
, NVME_SC_INVALID_FIELD
| NVME_STATUS_DNR
);
1019 * A "minimum viable" abort implementation: the command is mandatory in the
1020 * spec, but we are not required to do any useful work. We couldn't really
1021 * do a useful abort, so don't bother even with waiting for the command
1022 * to be exectuted and return immediately telling the command to abort
1025 static void nvmet_execute_abort(struct nvmet_req
*req
)
1027 if (!nvmet_check_transfer_len(req
, 0))
1029 nvmet_set_result(req
, 1);
1030 nvmet_req_complete(req
, 0);
1033 static u16
nvmet_write_protect_flush_sync(struct nvmet_req
*req
)
1038 status
= nvmet_file_flush(req
);
1040 status
= nvmet_bdev_flush(req
);
1043 pr_err("write protect flush failed nsid: %u\n", req
->ns
->nsid
);
1047 static u16
nvmet_set_feat_write_protect(struct nvmet_req
*req
)
1049 u32 write_protect
= le32_to_cpu(req
->cmd
->common
.cdw11
);
1050 struct nvmet_subsys
*subsys
= nvmet_req_subsys(req
);
1053 status
= nvmet_req_find_ns(req
);
1057 mutex_lock(&subsys
->lock
);
1058 switch (write_protect
) {
1059 case NVME_NS_WRITE_PROTECT
:
1060 req
->ns
->readonly
= true;
1061 status
= nvmet_write_protect_flush_sync(req
);
1063 req
->ns
->readonly
= false;
1065 case NVME_NS_NO_WRITE_PROTECT
:
1066 req
->ns
->readonly
= false;
1074 nvmet_ns_changed(subsys
, req
->ns
->nsid
);
1075 mutex_unlock(&subsys
->lock
);
1079 u16
nvmet_set_feat_kato(struct nvmet_req
*req
)
1081 u32 val32
= le32_to_cpu(req
->cmd
->common
.cdw11
);
1083 nvmet_stop_keep_alive_timer(req
->sq
->ctrl
);
1084 req
->sq
->ctrl
->kato
= DIV_ROUND_UP(val32
, 1000);
1085 nvmet_start_keep_alive_timer(req
->sq
->ctrl
);
1087 nvmet_set_result(req
, req
->sq
->ctrl
->kato
);
1092 u16
nvmet_set_feat_async_event(struct nvmet_req
*req
, u32 mask
)
1094 u32 val32
= le32_to_cpu(req
->cmd
->common
.cdw11
);
1096 if (val32
& ~mask
) {
1097 req
->error_loc
= offsetof(struct nvme_common_command
, cdw11
);
1098 return NVME_SC_INVALID_FIELD
| NVME_STATUS_DNR
;
1101 WRITE_ONCE(req
->sq
->ctrl
->aen_enabled
, val32
);
1102 nvmet_set_result(req
, val32
);
1107 void nvmet_execute_set_features(struct nvmet_req
*req
)
1109 struct nvmet_subsys
*subsys
= nvmet_req_subsys(req
);
1110 u32 cdw10
= le32_to_cpu(req
->cmd
->common
.cdw10
);
1111 u32 cdw11
= le32_to_cpu(req
->cmd
->common
.cdw11
);
1116 if (!nvmet_check_data_len_lte(req
, 0))
1119 switch (cdw10
& 0xff) {
1120 case NVME_FEAT_NUM_QUEUES
:
1121 ncqr
= (cdw11
>> 16) & 0xffff;
1122 nsqr
= cdw11
& 0xffff;
1123 if (ncqr
== 0xffff || nsqr
== 0xffff) {
1124 status
= NVME_SC_INVALID_FIELD
| NVME_STATUS_DNR
;
1127 nvmet_set_result(req
,
1128 (subsys
->max_qid
- 1) | ((subsys
->max_qid
- 1) << 16));
1130 case NVME_FEAT_KATO
:
1131 status
= nvmet_set_feat_kato(req
);
1133 case NVME_FEAT_ASYNC_EVENT
:
1134 status
= nvmet_set_feat_async_event(req
, NVMET_AEN_CFG_ALL
);
1136 case NVME_FEAT_HOST_ID
:
1137 status
= NVME_SC_CMD_SEQ_ERROR
| NVME_STATUS_DNR
;
1139 case NVME_FEAT_WRITE_PROTECT
:
1140 status
= nvmet_set_feat_write_protect(req
);
1142 case NVME_FEAT_RESV_MASK
:
1143 status
= nvmet_set_feat_resv_notif_mask(req
, cdw11
);
1146 req
->error_loc
= offsetof(struct nvme_common_command
, cdw10
);
1147 status
= NVME_SC_INVALID_FIELD
| NVME_STATUS_DNR
;
1151 nvmet_req_complete(req
, status
);
1154 static u16
nvmet_get_feat_write_protect(struct nvmet_req
*req
)
1156 struct nvmet_subsys
*subsys
= nvmet_req_subsys(req
);
1159 result
= nvmet_req_find_ns(req
);
1163 mutex_lock(&subsys
->lock
);
1164 if (req
->ns
->readonly
== true)
1165 result
= NVME_NS_WRITE_PROTECT
;
1167 result
= NVME_NS_NO_WRITE_PROTECT
;
1168 nvmet_set_result(req
, result
);
1169 mutex_unlock(&subsys
->lock
);
1174 void nvmet_get_feat_kato(struct nvmet_req
*req
)
1176 nvmet_set_result(req
, req
->sq
->ctrl
->kato
* 1000);
1179 void nvmet_get_feat_async_event(struct nvmet_req
*req
)
1181 nvmet_set_result(req
, READ_ONCE(req
->sq
->ctrl
->aen_enabled
));
1184 void nvmet_execute_get_features(struct nvmet_req
*req
)
1186 struct nvmet_subsys
*subsys
= nvmet_req_subsys(req
);
1187 u32 cdw10
= le32_to_cpu(req
->cmd
->common
.cdw10
);
1190 if (!nvmet_check_transfer_len(req
, nvmet_feat_data_len(req
, cdw10
)))
1193 switch (cdw10
& 0xff) {
1195 * These features are mandatory in the spec, but we don't
1196 * have a useful way to implement them. We'll eventually
1197 * need to come up with some fake values for these.
1200 case NVME_FEAT_ARBITRATION
:
1202 case NVME_FEAT_POWER_MGMT
:
1204 case NVME_FEAT_TEMP_THRESH
:
1206 case NVME_FEAT_ERR_RECOVERY
:
1208 case NVME_FEAT_IRQ_COALESCE
:
1210 case NVME_FEAT_IRQ_CONFIG
:
1212 case NVME_FEAT_WRITE_ATOMIC
:
1215 case NVME_FEAT_ASYNC_EVENT
:
1216 nvmet_get_feat_async_event(req
);
1218 case NVME_FEAT_VOLATILE_WC
:
1219 nvmet_set_result(req
, 1);
1221 case NVME_FEAT_NUM_QUEUES
:
1222 nvmet_set_result(req
,
1223 (subsys
->max_qid
-1) | ((subsys
->max_qid
-1) << 16));
1225 case NVME_FEAT_KATO
:
1226 nvmet_get_feat_kato(req
);
1228 case NVME_FEAT_HOST_ID
:
1229 /* need 128-bit host identifier flag */
1230 if (!(req
->cmd
->common
.cdw11
& cpu_to_le32(1 << 0))) {
1232 offsetof(struct nvme_common_command
, cdw11
);
1233 status
= NVME_SC_INVALID_FIELD
| NVME_STATUS_DNR
;
1237 status
= nvmet_copy_to_sgl(req
, 0, &req
->sq
->ctrl
->hostid
,
1238 sizeof(req
->sq
->ctrl
->hostid
));
1240 case NVME_FEAT_WRITE_PROTECT
:
1241 status
= nvmet_get_feat_write_protect(req
);
1243 case NVME_FEAT_RESV_MASK
:
1244 status
= nvmet_get_feat_resv_notif_mask(req
);
1248 offsetof(struct nvme_common_command
, cdw10
);
1249 status
= NVME_SC_INVALID_FIELD
| NVME_STATUS_DNR
;
1253 nvmet_req_complete(req
, status
);
1256 void nvmet_execute_async_event(struct nvmet_req
*req
)
1258 struct nvmet_ctrl
*ctrl
= req
->sq
->ctrl
;
1260 if (!nvmet_check_transfer_len(req
, 0))
1263 mutex_lock(&ctrl
->lock
);
1264 if (ctrl
->nr_async_event_cmds
>= NVMET_ASYNC_EVENTS
) {
1265 mutex_unlock(&ctrl
->lock
);
1266 nvmet_req_complete(req
, NVME_SC_ASYNC_LIMIT
| NVME_STATUS_DNR
);
1269 ctrl
->async_event_cmds
[ctrl
->nr_async_event_cmds
++] = req
;
1270 mutex_unlock(&ctrl
->lock
);
1272 queue_work(nvmet_wq
, &ctrl
->async_event_work
);
1275 void nvmet_execute_keep_alive(struct nvmet_req
*req
)
1277 struct nvmet_ctrl
*ctrl
= req
->sq
->ctrl
;
1280 if (!nvmet_check_transfer_len(req
, 0))
1284 status
= NVME_SC_KA_TIMEOUT_INVALID
;
1288 pr_debug("ctrl %d update keep-alive timer for %d secs\n",
1289 ctrl
->cntlid
, ctrl
->kato
);
1290 mod_delayed_work(system_wq
, &ctrl
->ka_work
, ctrl
->kato
* HZ
);
1292 nvmet_req_complete(req
, status
);
1295 u16
nvmet_parse_admin_cmd(struct nvmet_req
*req
)
1297 struct nvme_command
*cmd
= req
->cmd
;
1300 if (nvme_is_fabrics(cmd
))
1301 return nvmet_parse_fabrics_admin_cmd(req
);
1302 if (nvmet_is_disc_subsys(nvmet_req_subsys(req
)))
1303 return nvmet_parse_discovery_cmd(req
);
1305 ret
= nvmet_check_ctrl_status(req
);
1309 if (nvmet_is_passthru_req(req
))
1310 return nvmet_parse_passthru_admin_cmd(req
);
1312 switch (cmd
->common
.opcode
) {
1313 case nvme_admin_get_log_page
:
1314 req
->execute
= nvmet_execute_get_log_page
;
1316 case nvme_admin_identify
:
1317 req
->execute
= nvmet_execute_identify
;
1319 case nvme_admin_abort_cmd
:
1320 req
->execute
= nvmet_execute_abort
;
1322 case nvme_admin_set_features
:
1323 req
->execute
= nvmet_execute_set_features
;
1325 case nvme_admin_get_features
:
1326 req
->execute
= nvmet_execute_get_features
;
1328 case nvme_admin_async_event
:
1329 req
->execute
= nvmet_execute_async_event
;
1331 case nvme_admin_keep_alive
:
1332 req
->execute
= nvmet_execute_keep_alive
;
1335 return nvmet_report_invalid_opcode(req
);