4 * Copyright (c) 2012 Intel Corporation
5 * Copyright (c) 2021 Minwoo Im
6 * Copyright (c) 2021 Samsung Electronics Co., Ltd.
9 * Keith Busch <kbusch@kernel.org>
10 * Klaus Jensen <k.jensen@samsung.com>
11 * Gollu Appalanaidu <anaidu.gollu@samsung.com>
12 * Dmitry Fomichev <dmitry.fomichev@wdc.com>
13 * Minwoo Im <minwoo.im.dev@gmail.com>
15 * This code is licensed under the GNU GPL v2 or later.
18 #ifndef HW_NVME_NVME_H
19 #define HW_NVME_NVME_H
21 #include "qemu/uuid.h"
22 #include "hw/pci/pci_device.h"
23 #include "hw/block/block.h"
25 #include "block/nvme.h"
27 #define NVME_MAX_CONTROLLERS 256
28 #define NVME_MAX_NAMESPACES 256
29 #define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000)
30 #define NVME_FDP_MAX_EVENTS 63
31 #define NVME_FDP_MAXPIDS 128
34 * The controller only supports Submission and Completion Queue Entry Sizes of
35 * 64 and 16 bytes respectively.
40 QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES
> NVME_NSID_BROADCAST
- 1);
42 typedef struct NvmeCtrl NvmeCtrl
;
43 typedef struct NvmeNamespace NvmeNamespace
;
45 #define TYPE_NVME_BUS "nvme-bus"
46 OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus
, NVME_BUS
)
48 typedef struct NvmeBus
{
52 #define TYPE_NVME_SUBSYS "nvme-subsys"
53 #define NVME_SUBSYS(obj) \
54 OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
55 #define SUBSYS_SLOT_RSVD (void *)0xFFFF
57 typedef struct NvmeReclaimUnit
{
61 typedef struct NvmeRuHandle
{
64 uint64_t event_filter
;
68 /* reclaim units indexed by reclaim group */
72 typedef struct NvmeFdpEventBuffer
{
73 NvmeFdpEvent events
[NVME_FDP_MAX_EVENTS
];
79 typedef struct NvmeEnduranceGroup
{
83 NvmeFdpEventBuffer host_events
, ctrl_events
;
100 typedef struct NvmeSubsystem
{
101 DeviceState parent_obj
;
106 NvmeCtrl
*ctrls
[NVME_MAX_CONTROLLERS
];
107 NvmeNamespace
*namespaces
[NVME_MAX_NAMESPACES
+ 1];
108 NvmeEnduranceGroup endgrp
;
122 int nvme_subsys_register_ctrl(NvmeCtrl
*n
, Error
**errp
);
123 void nvme_subsys_unregister_ctrl(NvmeSubsystem
*subsys
, NvmeCtrl
*n
);
125 static inline NvmeCtrl
*nvme_subsys_ctrl(NvmeSubsystem
*subsys
,
128 if (!subsys
|| cntlid
>= NVME_MAX_CONTROLLERS
) {
132 if (subsys
->ctrls
[cntlid
] == SUBSYS_SLOT_RSVD
) {
136 return subsys
->ctrls
[cntlid
];
139 static inline NvmeNamespace
*nvme_subsys_ns(NvmeSubsystem
*subsys
,
142 if (!subsys
|| !nsid
|| nsid
> NVME_MAX_NAMESPACES
) {
146 return subsys
->namespaces
[nsid
];
149 #define TYPE_NVME_NS "nvme-ns"
150 #define NVME_NS(obj) \
151 OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
153 typedef struct NvmeZone
{
156 QTAILQ_ENTRY(NvmeZone
) entry
;
159 #define FDP_EVT_MAX 0xff
160 #define NVME_FDP_MAX_NS_RUHS 32u
163 static const uint8_t nvme_fdp_evf_shifts
[FDP_EVT_MAX
] = {
165 [FDP_EVT_RU_NOT_FULLY_WRITTEN
] = 0,
166 [FDP_EVT_RU_ATL_EXCEEDED
] = 1,
167 [FDP_EVT_CTRL_RESET_RUH
] = 2,
168 [FDP_EVT_INVALID_PID
] = 3,
170 [FDP_EVT_MEDIA_REALLOC
] = 32,
171 [FDP_EVT_RUH_IMPLICIT_RU_CHANGE
] = 33,
177 uint8_t data
[NGUID_LEN
];
180 bool nvme_nguid_is_null(const NvmeNGUID
*nguid
);
182 extern const PropertyInfo qdev_prop_nguid
;
184 #define DEFINE_PROP_NGUID_NODEFAULT(_name, _state, _field) \
185 DEFINE_PROP(_name, _state, _field, qdev_prop_nguid, NvmeNGUID)
187 typedef struct NvmeNamespaceParams
{
207 bool cross_zone_read
;
208 uint64_t zone_size_bs
;
209 uint64_t zone_cap_bs
;
210 uint32_t max_active_zones
;
211 uint32_t max_open_zones
;
212 uint32_t zd_extension_size
;
221 } NvmeNamespaceParams
;
223 typedef struct NvmeAtomic
{
224 uint32_t atomic_max_write_size
;
228 typedef struct NvmeNamespace
{
229 DeviceState parent_obj
;
235 NvmeIdNsNvm id_ns_nvm
;
239 const uint32_t *iocs
;
251 QTAILQ_ENTRY(NvmeNamespace
) entry
;
253 NvmeIdNsZoned
*id_ns_zoned
;
254 NvmeZone
*zone_array
;
255 QTAILQ_HEAD(, NvmeZone
) exp_open_zones
;
256 QTAILQ_HEAD(, NvmeZone
) imp_open_zones
;
257 QTAILQ_HEAD(, NvmeZone
) closed_zones
;
258 QTAILQ_HEAD(, NvmeZone
) full_zones
;
261 uint64_t zone_capacity
;
262 uint32_t zone_size_log2
;
263 uint8_t *zd_extensions
;
264 int32_t nr_open_zones
;
265 int32_t nr_active_zones
;
267 NvmeNamespaceParams params
;
268 NvmeSubsystem
*subsys
;
269 NvmeEnduranceGroup
*endgrp
;
277 /* reclaim unit handle identifiers indexed by placement handle */
282 static inline uint32_t nvme_nsid(NvmeNamespace
*ns
)
285 return ns
->params
.nsid
;
291 static inline size_t nvme_l2b(NvmeNamespace
*ns
, uint64_t lba
)
293 return lba
<< ns
->lbaf
.ds
;
296 static inline size_t nvme_m2b(NvmeNamespace
*ns
, uint64_t lba
)
298 return ns
->lbaf
.ms
* lba
;
301 static inline int64_t nvme_moff(NvmeNamespace
*ns
, uint64_t lba
)
303 return ns
->moff
+ nvme_m2b(ns
, lba
);
306 static inline bool nvme_ns_ext(NvmeNamespace
*ns
)
308 return !!NVME_ID_NS_FLBAS_EXTENDED(ns
->id_ns
.flbas
);
311 static inline NvmeZoneState
nvme_get_zone_state(NvmeZone
*zone
)
313 return zone
->d
.zs
>> 4;
316 static inline void nvme_set_zone_state(NvmeZone
*zone
, NvmeZoneState state
)
318 zone
->d
.zs
= state
<< 4;
321 static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace
*ns
, NvmeZone
*zone
)
323 return zone
->d
.zslba
+ ns
->zone_size
;
326 static inline uint64_t nvme_zone_wr_boundary(NvmeZone
*zone
)
328 return zone
->d
.zslba
+ zone
->d
.zcap
;
331 static inline bool nvme_wp_is_valid(NvmeZone
*zone
)
333 uint8_t st
= nvme_get_zone_state(zone
);
335 return st
!= NVME_ZONE_STATE_FULL
&&
336 st
!= NVME_ZONE_STATE_READ_ONLY
&&
337 st
!= NVME_ZONE_STATE_OFFLINE
;
340 static inline uint8_t *nvme_get_zd_extension(NvmeNamespace
*ns
,
343 return &ns
->zd_extensions
[zone_idx
* ns
->params
.zd_extension_size
];
346 static inline void nvme_aor_inc_open(NvmeNamespace
*ns
)
348 assert(ns
->nr_open_zones
>= 0);
349 if (ns
->params
.max_open_zones
) {
351 assert(ns
->nr_open_zones
<= ns
->params
.max_open_zones
);
355 static inline void nvme_aor_dec_open(NvmeNamespace
*ns
)
357 if (ns
->params
.max_open_zones
) {
358 assert(ns
->nr_open_zones
> 0);
361 assert(ns
->nr_open_zones
>= 0);
364 static inline void nvme_aor_inc_active(NvmeNamespace
*ns
)
366 assert(ns
->nr_active_zones
>= 0);
367 if (ns
->params
.max_active_zones
) {
368 ns
->nr_active_zones
++;
369 assert(ns
->nr_active_zones
<= ns
->params
.max_active_zones
);
373 static inline void nvme_aor_dec_active(NvmeNamespace
*ns
)
375 if (ns
->params
.max_active_zones
) {
376 assert(ns
->nr_active_zones
> 0);
377 ns
->nr_active_zones
--;
378 assert(ns
->nr_active_zones
>= ns
->nr_open_zones
);
380 assert(ns
->nr_active_zones
>= 0);
383 static inline void nvme_fdp_stat_inc(uint64_t *a
, uint64_t b
)
385 uint64_t ret
= *a
+ b
;
386 *a
= ret
< *a
? UINT64_MAX
: ret
;
389 void nvme_ns_init_format(NvmeNamespace
*ns
);
390 int nvme_ns_setup(NvmeNamespace
*ns
, Error
**errp
);
391 void nvme_ns_drain(NvmeNamespace
*ns
);
392 void nvme_ns_shutdown(NvmeNamespace
*ns
);
393 void nvme_ns_cleanup(NvmeNamespace
*ns
);
395 typedef struct NvmeAsyncEvent
{
396 QTAILQ_ENTRY(NvmeAsyncEvent
) entry
;
397 NvmeAerResult result
;
401 NVME_SG_ALLOC
= 1 << 0,
402 NVME_SG_DMA
= 1 << 1,
405 typedef struct NvmeSg
{
414 typedef enum NvmeTxDirection
{
415 NVME_TX_DIRECTION_TO_DEVICE
= 0,
416 NVME_TX_DIRECTION_FROM_DEVICE
= 1,
419 typedef struct NvmeRequest
{
420 struct NvmeSQueue
*sq
;
421 struct NvmeNamespace
*ns
;
427 BlockAcctCookie acct
;
430 QTAILQ_ENTRY(NvmeRequest
)entry
;
433 typedef struct NvmeBounceContext
{
442 static inline const char *nvme_adm_opc_str(uint8_t opc
)
445 case NVME_ADM_CMD_DELETE_SQ
: return "NVME_ADM_CMD_DELETE_SQ";
446 case NVME_ADM_CMD_CREATE_SQ
: return "NVME_ADM_CMD_CREATE_SQ";
447 case NVME_ADM_CMD_GET_LOG_PAGE
: return "NVME_ADM_CMD_GET_LOG_PAGE";
448 case NVME_ADM_CMD_DELETE_CQ
: return "NVME_ADM_CMD_DELETE_CQ";
449 case NVME_ADM_CMD_CREATE_CQ
: return "NVME_ADM_CMD_CREATE_CQ";
450 case NVME_ADM_CMD_IDENTIFY
: return "NVME_ADM_CMD_IDENTIFY";
451 case NVME_ADM_CMD_ABORT
: return "NVME_ADM_CMD_ABORT";
452 case NVME_ADM_CMD_SET_FEATURES
: return "NVME_ADM_CMD_SET_FEATURES";
453 case NVME_ADM_CMD_GET_FEATURES
: return "NVME_ADM_CMD_GET_FEATURES";
454 case NVME_ADM_CMD_ASYNC_EV_REQ
: return "NVME_ADM_CMD_ASYNC_EV_REQ";
455 case NVME_ADM_CMD_NS_ATTACHMENT
: return "NVME_ADM_CMD_NS_ATTACHMENT";
456 case NVME_ADM_CMD_DIRECTIVE_SEND
: return "NVME_ADM_CMD_DIRECTIVE_SEND";
457 case NVME_ADM_CMD_VIRT_MNGMT
: return "NVME_ADM_CMD_VIRT_MNGMT";
458 case NVME_ADM_CMD_DIRECTIVE_RECV
: return "NVME_ADM_CMD_DIRECTIVE_RECV";
459 case NVME_ADM_CMD_DBBUF_CONFIG
: return "NVME_ADM_CMD_DBBUF_CONFIG";
460 case NVME_ADM_CMD_FORMAT_NVM
: return "NVME_ADM_CMD_FORMAT_NVM";
461 default: return "NVME_ADM_CMD_UNKNOWN";
465 static inline const char *nvme_io_opc_str(uint8_t opc
)
468 case NVME_CMD_FLUSH
: return "NVME_NVM_CMD_FLUSH";
469 case NVME_CMD_WRITE
: return "NVME_NVM_CMD_WRITE";
470 case NVME_CMD_READ
: return "NVME_NVM_CMD_READ";
471 case NVME_CMD_COMPARE
: return "NVME_NVM_CMD_COMPARE";
472 case NVME_CMD_WRITE_ZEROES
: return "NVME_NVM_CMD_WRITE_ZEROES";
473 case NVME_CMD_DSM
: return "NVME_NVM_CMD_DSM";
474 case NVME_CMD_VERIFY
: return "NVME_NVM_CMD_VERIFY";
475 case NVME_CMD_COPY
: return "NVME_NVM_CMD_COPY";
476 case NVME_CMD_ZONE_MGMT_SEND
: return "NVME_ZONED_CMD_MGMT_SEND";
477 case NVME_CMD_ZONE_MGMT_RECV
: return "NVME_ZONED_CMD_MGMT_RECV";
478 case NVME_CMD_ZONE_APPEND
: return "NVME_ZONED_CMD_ZONE_APPEND";
479 default: return "NVME_NVM_CMD_UNKNOWN";
483 typedef struct NvmeSQueue
{
484 struct NvmeCtrl
*ctrl
;
494 EventNotifier notifier
;
495 bool ioeventfd_enabled
;
497 QTAILQ_HEAD(, NvmeRequest
) req_list
;
498 QTAILQ_HEAD(, NvmeRequest
) out_req_list
;
499 QTAILQ_ENTRY(NvmeSQueue
) entry
;
502 typedef struct NvmeCQueue
{
503 struct NvmeCtrl
*ctrl
;
506 uint16_t irq_enabled
;
515 EventNotifier notifier
;
516 bool ioeventfd_enabled
;
517 QTAILQ_HEAD(, NvmeSQueue
) sq_list
;
518 QTAILQ_HEAD(, NvmeRequest
) req_list
;
521 #define TYPE_NVME "nvme"
523 OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
525 typedef struct NvmeParams
{
527 uint32_t num_queues
; /* deprecated since 5.1 */
528 uint32_t max_ioqpairs
;
531 uint32_t cmb_size_mb
;
533 uint32_t aer_max_queued
;
538 bool auto_transition_zones
;
541 uint16_t sriov_max_vfs
;
542 uint16_t sriov_vq_flexible
;
543 uint16_t sriov_vi_flexible
;
544 uint32_t sriov_max_vq_per_vf
;
545 uint32_t sriov_max_vi_per_vf
;
546 bool msix_exclusive_bar
;
552 uint16_t atomic_awun
;
553 uint16_t atomic_awupf
;
557 typedef struct NvmeCtrl
{
558 PCIDevice parent_obj
;
569 uint16_t max_prp_ents
;
571 uint8_t outstanding_aers
;
574 uint64_t host_timestamp
; /* Timestamp sent by the host */
575 uint64_t timestamp_set_qemu_clock_ms
; /* QEMU clock time */
576 uint64_t starttime_ms
;
577 uint16_t temperature
;
578 uint8_t smart_critical_warning
;
579 uint32_t conf_msix_qsize
;
580 uint32_t conf_ioqpairs
;
593 HostMemoryBackend
*dev
;
599 NvmeRequest
**aer_reqs
;
600 QTAILQ_HEAD(, NvmeAsyncEvent
) aer_queue
;
605 /* Namespace ID is started with 1 so bitmap should be 1-based */
606 #define NVME_CHANGED_NSID_SIZE (NVME_MAX_NAMESPACES + 1)
607 DECLARE_BITMAP(changed_nsids
, NVME_CHANGED_NSID_SIZE
);
609 NvmeSubsystem
*subsys
;
611 NvmeNamespace
namespace;
612 NvmeNamespace
*namespaces
[NVME_MAX_NAMESPACES
+ 1];
621 uint16_t temp_thresh_hi
;
622 uint16_t temp_thresh_low
;
625 uint32_t async_config
;
626 NvmeHostBehaviorSupport hbs
;
629 NvmePriCtrlCap pri_ctrl_cap
;
630 uint32_t nr_sec_ctrls
;
631 NvmeSecCtrlEntry
*sec_ctrl_list
;
635 } next_pri_ctrl_cap
; /* These override pri_ctrl_cap after reset */
636 uint32_t dn
; /* Disable Normal */
640 typedef enum NvmeResetType
{
641 NVME_RESET_FUNCTION
= 0,
642 NVME_RESET_CONTROLLER
= 1,
645 static inline NvmeNamespace
*nvme_ns(NvmeCtrl
*n
, uint32_t nsid
)
647 if (!nsid
|| nsid
> NVME_MAX_NAMESPACES
) {
651 return n
->namespaces
[nsid
];
654 static inline NvmeCQueue
*nvme_cq(NvmeRequest
*req
)
656 NvmeSQueue
*sq
= req
->sq
;
657 NvmeCtrl
*n
= sq
->ctrl
;
659 return n
->cq
[sq
->cqid
];
662 static inline NvmeCtrl
*nvme_ctrl(NvmeRequest
*req
)
664 NvmeSQueue
*sq
= req
->sq
;
668 static inline uint16_t nvme_cid(NvmeRequest
*req
)
674 return le16_to_cpu(req
->cqe
.cid
);
677 static inline NvmeSecCtrlEntry
*nvme_sctrl(NvmeCtrl
*n
)
679 PCIDevice
*pci_dev
= &n
->parent_obj
;
680 NvmeCtrl
*pf
= NVME(pcie_sriov_get_pf(pci_dev
));
682 if (pci_is_vf(pci_dev
)) {
683 return &pf
->sec_ctrl_list
[pcie_sriov_vf_number(pci_dev
)];
689 static inline NvmeSecCtrlEntry
*nvme_sctrl_for_cntlid(NvmeCtrl
*n
,
692 NvmeSecCtrlEntry
*list
= n
->sec_ctrl_list
;
695 for (i
= 0; i
< n
->nr_sec_ctrls
; i
++) {
696 if (le16_to_cpu(list
[i
].scid
) == cntlid
) {
704 void nvme_attach_ns(NvmeCtrl
*n
, NvmeNamespace
*ns
);
705 uint16_t nvme_bounce_data(NvmeCtrl
*n
, void *ptr
, uint32_t len
,
706 NvmeTxDirection dir
, NvmeRequest
*req
);
707 uint16_t nvme_bounce_mdata(NvmeCtrl
*n
, void *ptr
, uint32_t len
,
708 NvmeTxDirection dir
, NvmeRequest
*req
);
709 void nvme_rw_complete_cb(void *opaque
, int ret
);
710 uint16_t nvme_map_dptr(NvmeCtrl
*n
, NvmeSg
*sg
, size_t len
,
713 #endif /* HW_NVME_NVME_H */