2 * QEMU PAPR Storage Class Memory Interfaces
4 * Copyright (c) 2019-2020, IBM Corporation.
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include "qemu/osdep.h"
25 #include "qemu/cutils.h"
26 #include "qapi/error.h"
27 #include "hw/ppc/spapr_drc.h"
28 #include "hw/ppc/spapr_nvdimm.h"
29 #include "hw/mem/nvdimm.h"
30 #include "qemu/nvdimm-utils.h"
31 #include "hw/ppc/fdt.h"
32 #include "qemu/range.h"
33 #include "hw/ppc/spapr_numa.h"
34 #include "block/thread-pool.h"
35 #include "migration/vmstate.h"
36 #include "qemu/pmem.h"
37 #include "hw/qdev-properties.h"
39 /* DIMM health bitmap bitmap indicators. Taken from kernel's papr_scm.c */
40 /* SCM device is unable to persist memory contents */
41 #define PAPR_PMEM_UNARMED PPC_BIT(0)
44 * The nvdimm size should be aligned to SCM block size.
45 * The SCM block size should be aligned to SPAPR_MEMORY_BLOCK_SIZE
46 * in order to have SCM regions not to overlap with dimm memory regions.
47 * The SCM devices can have variable block sizes. For now, fixing the
48 * block size to the minimum value.
50 #define SPAPR_MINIMUM_SCM_BLOCK_SIZE SPAPR_MEMORY_BLOCK_SIZE
52 /* Have an explicit check for alignment */
53 QEMU_BUILD_BUG_ON(SPAPR_MINIMUM_SCM_BLOCK_SIZE
% SPAPR_MEMORY_BLOCK_SIZE
);
55 #define TYPE_SPAPR_NVDIMM "spapr-nvdimm"
56 OBJECT_DECLARE_TYPE(SpaprNVDIMMDevice
, SPAPRNVDIMMClass
, SPAPR_NVDIMM
)
58 struct SPAPRNVDIMMClass
{
60 NVDIMMClass parent_class
;
63 void (*realize
)(NVDIMMDevice
*dimm
, Error
**errp
);
64 void (*unrealize
)(NVDIMMDevice
*dimm
, Error
**errp
);
67 bool spapr_nvdimm_validate(HotplugHandler
*hotplug_dev
, NVDIMMDevice
*nvdimm
,
68 uint64_t size
, Error
**errp
)
70 const MachineClass
*mc
= MACHINE_GET_CLASS(hotplug_dev
);
71 const MachineState
*ms
= MACHINE(hotplug_dev
);
72 PCDIMMDevice
*dimm
= PC_DIMM(nvdimm
);
73 MemoryRegion
*mr
= host_memory_backend_get_memory(dimm
->hostmem
);
74 g_autofree
char *uuidstr
= NULL
;
78 if (!mc
->nvdimm_supported
) {
79 error_setg(errp
, "NVDIMM hotplug not supported for this machine");
83 if (!ms
->nvdimms_state
->is_enabled
) {
84 error_setg(errp
, "nvdimm device found but 'nvdimm=off' was set");
88 if (object_property_get_int(OBJECT(nvdimm
), NVDIMM_LABEL_SIZE_PROP
,
90 error_setg(errp
, "PAPR requires NVDIMM devices to have label-size set");
94 if (size
% SPAPR_MINIMUM_SCM_BLOCK_SIZE
) {
95 error_setg(errp
, "PAPR requires NVDIMM memory size (excluding label)"
96 " to be a multiple of %" PRIu64
"MB",
97 SPAPR_MINIMUM_SCM_BLOCK_SIZE
/ MiB
);
101 uuidstr
= object_property_get_str(OBJECT(nvdimm
), NVDIMM_UUID_PROP
,
103 ret
= qemu_uuid_parse(uuidstr
, &uuid
);
106 if (qemu_uuid_is_null(&uuid
)) {
107 error_setg(errp
, "NVDIMM device requires the uuid to be set");
111 if (object_dynamic_cast(OBJECT(nvdimm
), TYPE_SPAPR_NVDIMM
) &&
112 (memory_region_get_fd(mr
) < 0)) {
113 error_setg(errp
, "spapr-nvdimm device requires the "
114 "memdev %s to be of memory-backend-file type",
115 object_get_canonical_path_component(OBJECT(dimm
->hostmem
)));
123 void spapr_add_nvdimm(DeviceState
*dev
, uint64_t slot
)
126 bool hotplugged
= spapr_drc_hotplugged(dev
);
128 drc
= spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM
, slot
);
132 * pc_dimm_get_free_slot() provided a free slot at pre-plug. The
133 * corresponding DRC is thus assumed to be attachable.
135 spapr_drc_attach(drc
, dev
);
138 spapr_hotplug_req_add_by_index(drc
);
142 static int spapr_dt_nvdimm(SpaprMachineState
*spapr
, void *fdt
,
143 int parent_offset
, NVDIMMDevice
*nvdimm
)
149 uint32_t node
= object_property_get_uint(OBJECT(nvdimm
), PC_DIMM_NODE_PROP
,
151 uint64_t slot
= object_property_get_uint(OBJECT(nvdimm
), PC_DIMM_SLOT_PROP
,
153 uint64_t lsize
= nvdimm
->label_size
;
154 uint64_t size
= object_property_get_int(OBJECT(nvdimm
), PC_DIMM_SIZE_PROP
,
157 drc
= spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM
, slot
);
160 drc_idx
= spapr_drc_index(drc
);
162 buf
= g_strdup_printf("ibm,pmemory@%x", drc_idx
);
163 child_offset
= fdt_add_subnode(fdt
, parent_offset
, buf
);
168 _FDT((fdt_setprop_cell(fdt
, child_offset
, "reg", drc_idx
)));
169 _FDT((fdt_setprop_string(fdt
, child_offset
, "compatible", "ibm,pmemory")));
170 _FDT((fdt_setprop_string(fdt
, child_offset
, "device_type", "ibm,pmemory")));
172 spapr_numa_write_associativity_dt(spapr
, fdt
, child_offset
, node
);
174 buf
= qemu_uuid_unparse_strdup(&nvdimm
->uuid
);
175 _FDT((fdt_setprop_string(fdt
, child_offset
, "ibm,unit-guid", buf
)));
178 _FDT((fdt_setprop_cell(fdt
, child_offset
, "ibm,my-drc-index", drc_idx
)));
180 _FDT((fdt_setprop_u64(fdt
, child_offset
, "ibm,block-size",
181 SPAPR_MINIMUM_SCM_BLOCK_SIZE
)));
182 _FDT((fdt_setprop_u64(fdt
, child_offset
, "ibm,number-of-blocks",
183 size
/ SPAPR_MINIMUM_SCM_BLOCK_SIZE
)));
184 _FDT((fdt_setprop_cell(fdt
, child_offset
, "ibm,metadata-size", lsize
)));
186 _FDT((fdt_setprop_string(fdt
, child_offset
, "ibm,pmem-application",
187 "operating-system")));
188 _FDT(fdt_setprop(fdt
, child_offset
, "ibm,cache-flush-required", NULL
, 0));
190 if (object_dynamic_cast(OBJECT(nvdimm
), TYPE_SPAPR_NVDIMM
)) {
191 bool is_pmem
= false, pmem_override
= false;
192 PCDIMMDevice
*dimm
= PC_DIMM(nvdimm
);
193 HostMemoryBackend
*hostmem
= dimm
->hostmem
;
195 is_pmem
= object_property_get_bool(OBJECT(hostmem
), "pmem", NULL
);
196 pmem_override
= object_property_get_bool(OBJECT(nvdimm
),
197 "pmem-override", NULL
);
198 if (!is_pmem
|| pmem_override
) {
199 _FDT(fdt_setprop(fdt
, child_offset
, "ibm,hcall-flush-required",
207 int spapr_pmem_dt_populate(SpaprDrc
*drc
, SpaprMachineState
*spapr
,
208 void *fdt
, int *fdt_start_offset
, Error
**errp
)
210 NVDIMMDevice
*nvdimm
= NVDIMM(drc
->dev
);
212 *fdt_start_offset
= spapr_dt_nvdimm(spapr
, fdt
, 0, nvdimm
);
217 void spapr_dt_persistent_memory(SpaprMachineState
*spapr
, void *fdt
)
219 int offset
= fdt_subnode_offset(fdt
, 0, "ibm,persistent-memory");
220 GSList
*iter
, *nvdimms
= nvdimm_get_device_list();
223 offset
= fdt_add_subnode(fdt
, 0, "ibm,persistent-memory");
225 _FDT((fdt_setprop_cell(fdt
, offset
, "#address-cells", 0x1)));
226 _FDT((fdt_setprop_cell(fdt
, offset
, "#size-cells", 0x0)));
227 _FDT((fdt_setprop_string(fdt
, offset
, "device_type",
228 "ibm,persistent-memory")));
231 /* Create DT entries for cold plugged NVDIMM devices */
232 for (iter
= nvdimms
; iter
; iter
= iter
->next
) {
233 NVDIMMDevice
*nvdimm
= iter
->data
;
235 spapr_dt_nvdimm(spapr
, fdt
, offset
, nvdimm
);
237 g_slist_free(nvdimms
);
242 static target_ulong
h_scm_read_metadata(PowerPCCPU
*cpu
,
243 SpaprMachineState
*spapr
,
247 uint32_t drc_index
= args
[0];
248 uint64_t offset
= args
[1];
249 uint64_t len
= args
[2];
250 SpaprDrc
*drc
= spapr_drc_by_index(drc_index
);
251 NVDIMMDevice
*nvdimm
;
254 uint8_t buf
[8] = { 0 };
256 if (!drc
|| !drc
->dev
||
257 spapr_drc_type(drc
) != SPAPR_DR_CONNECTOR_TYPE_PMEM
) {
261 if (len
!= 1 && len
!= 2 &&
262 len
!= 4 && len
!= 8) {
266 nvdimm
= NVDIMM(drc
->dev
);
267 if ((offset
+ len
< offset
) ||
268 (nvdimm
->label_size
< len
+ offset
)) {
272 ddc
= NVDIMM_GET_CLASS(nvdimm
);
273 ddc
->read_label_data(nvdimm
, buf
, len
, offset
);
280 data
= lduw_be_p(buf
);
283 data
= ldl_be_p(buf
);
286 data
= ldq_be_p(buf
);
289 g_assert_not_reached();
297 static target_ulong
h_scm_write_metadata(PowerPCCPU
*cpu
,
298 SpaprMachineState
*spapr
,
302 uint32_t drc_index
= args
[0];
303 uint64_t offset
= args
[1];
304 uint64_t data
= args
[2];
305 uint64_t len
= args
[3];
306 SpaprDrc
*drc
= spapr_drc_by_index(drc_index
);
307 NVDIMMDevice
*nvdimm
;
309 uint8_t buf
[8] = { 0 };
311 if (!drc
|| !drc
->dev
||
312 spapr_drc_type(drc
) != SPAPR_DR_CONNECTOR_TYPE_PMEM
) {
316 if (len
!= 1 && len
!= 2 &&
317 len
!= 4 && len
!= 8) {
321 nvdimm
= NVDIMM(drc
->dev
);
322 if ((offset
+ len
< offset
) ||
323 (nvdimm
->label_size
< len
+ offset
)) {
329 if (data
& 0xffffffffffffff00) {
335 if (data
& 0xffffffffffff0000) {
341 if (data
& 0xffffffff00000000) {
350 g_assert_not_reached();
353 ddc
= NVDIMM_GET_CLASS(nvdimm
);
354 ddc
->write_label_data(nvdimm
, buf
, len
, offset
);
359 static target_ulong
h_scm_bind_mem(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
360 target_ulong opcode
, target_ulong
*args
)
362 uint32_t drc_index
= args
[0];
363 uint64_t starting_idx
= args
[1];
364 uint64_t no_of_scm_blocks_to_bind
= args
[2];
365 uint64_t target_logical_mem_addr
= args
[3];
366 uint64_t continue_token
= args
[4];
368 uint64_t total_no_of_scm_blocks
;
369 SpaprDrc
*drc
= spapr_drc_by_index(drc_index
);
371 NVDIMMDevice
*nvdimm
;
373 if (!drc
|| !drc
->dev
||
374 spapr_drc_type(drc
) != SPAPR_DR_CONNECTOR_TYPE_PMEM
) {
379 * Currently continue token should be zero qemu has already bound
380 * everything and this hcall doesnt return H_BUSY.
382 if (continue_token
> 0) {
386 /* Currently qemu assigns the address. */
387 if (target_logical_mem_addr
!= 0xffffffffffffffff) {
391 nvdimm
= NVDIMM(drc
->dev
);
393 size
= object_property_get_uint(OBJECT(nvdimm
),
394 PC_DIMM_SIZE_PROP
, &error_abort
);
396 total_no_of_scm_blocks
= size
/ SPAPR_MINIMUM_SCM_BLOCK_SIZE
;
398 if (starting_idx
> total_no_of_scm_blocks
) {
402 if (((starting_idx
+ no_of_scm_blocks_to_bind
) < starting_idx
) ||
403 ((starting_idx
+ no_of_scm_blocks_to_bind
) > total_no_of_scm_blocks
)) {
407 addr
= object_property_get_uint(OBJECT(nvdimm
),
408 PC_DIMM_ADDR_PROP
, &error_abort
);
410 addr
+= starting_idx
* SPAPR_MINIMUM_SCM_BLOCK_SIZE
;
412 /* Already bound, Return target logical address in R5 */
414 args
[2] = no_of_scm_blocks_to_bind
;
419 typedef struct SpaprNVDIMMDeviceFlushState
{
420 uint64_t continue_token
;
424 QLIST_ENTRY(SpaprNVDIMMDeviceFlushState
) node
;
425 } SpaprNVDIMMDeviceFlushState
;
427 typedef struct SpaprNVDIMMDevice SpaprNVDIMMDevice
;
428 struct SpaprNVDIMMDevice
{
430 NVDIMMDevice parent_obj
;
432 bool hcall_flush_required
;
433 uint64_t nvdimm_flush_token
;
434 QLIST_HEAD(, SpaprNVDIMMDeviceFlushState
) pending_nvdimm_flush_states
;
435 QLIST_HEAD(, SpaprNVDIMMDeviceFlushState
) completed_nvdimm_flush_states
;
440 * The 'on' value for this property forced the qemu to enable the hcall
441 * flush for the nvdimm device even if the backend is a pmem
446 static int flush_worker_cb(void *opaque
)
448 SpaprNVDIMMDeviceFlushState
*state
= opaque
;
449 SpaprDrc
*drc
= spapr_drc_by_index(state
->drcidx
);
451 HostMemoryBackend
*backend
;
454 g_assert(drc
!= NULL
);
456 dimm
= PC_DIMM(drc
->dev
);
457 backend
= MEMORY_BACKEND(dimm
->hostmem
);
458 backend_fd
= memory_region_get_fd(&backend
->mr
);
460 if (object_property_get_bool(OBJECT(backend
), "pmem", NULL
)) {
461 MemoryRegion
*mr
= host_memory_backend_get_memory(dimm
->hostmem
);
462 void *ptr
= memory_region_get_ram_ptr(mr
);
463 size_t size
= object_property_get_uint(OBJECT(dimm
), PC_DIMM_SIZE_PROP
,
466 /* flush pmem backend */
467 pmem_persist(ptr
, size
);
469 /* flush raw backing image */
470 if (qemu_fdatasync(backend_fd
) < 0) {
471 error_report("papr_scm: Could not sync nvdimm to backend file: %s",
480 static void spapr_nvdimm_flush_completion_cb(void *opaque
, int hcall_ret
)
482 SpaprNVDIMMDeviceFlushState
*state
= opaque
;
483 SpaprDrc
*drc
= spapr_drc_by_index(state
->drcidx
);
484 SpaprNVDIMMDevice
*s_nvdimm
;
486 g_assert(drc
!= NULL
);
488 s_nvdimm
= SPAPR_NVDIMM(drc
->dev
);
490 state
->hcall_ret
= hcall_ret
;
491 QLIST_REMOVE(state
, node
);
492 QLIST_INSERT_HEAD(&s_nvdimm
->completed_nvdimm_flush_states
, state
, node
);
495 static int spapr_nvdimm_flush_post_load(void *opaque
, int version_id
)
497 SpaprNVDIMMDevice
*s_nvdimm
= (SpaprNVDIMMDevice
*)opaque
;
498 SpaprNVDIMMDeviceFlushState
*state
;
499 ThreadPool
*pool
= aio_get_thread_pool(qemu_get_aio_context());
500 HostMemoryBackend
*backend
= MEMORY_BACKEND(PC_DIMM(s_nvdimm
)->hostmem
);
501 bool is_pmem
= object_property_get_bool(OBJECT(backend
), "pmem", NULL
);
502 bool pmem_override
= object_property_get_bool(OBJECT(s_nvdimm
),
503 "pmem-override", NULL
);
504 bool dest_hcall_flush_required
= pmem_override
|| !is_pmem
;
506 if (!s_nvdimm
->hcall_flush_required
&& dest_hcall_flush_required
) {
507 error_report("The file backend for the spapr-nvdimm device %s at "
508 "source is a pmem, use pmem=on and pmem-override=off to "
509 "continue.", DEVICE(s_nvdimm
)->id
);
512 if (s_nvdimm
->hcall_flush_required
&& !dest_hcall_flush_required
) {
513 error_report("The guest expects hcall-flush support for the "
514 "spapr-nvdimm device %s, use pmem_override=on to "
515 "continue.", DEVICE(s_nvdimm
)->id
);
519 QLIST_FOREACH(state
, &s_nvdimm
->pending_nvdimm_flush_states
, node
) {
520 thread_pool_submit_aio(pool
, flush_worker_cb
, state
,
521 spapr_nvdimm_flush_completion_cb
, state
);
527 static const VMStateDescription vmstate_spapr_nvdimm_flush_state
= {
528 .name
= "spapr_nvdimm_flush_state",
530 .minimum_version_id
= 1,
531 .fields
= (VMStateField
[]) {
532 VMSTATE_UINT64(continue_token
, SpaprNVDIMMDeviceFlushState
),
533 VMSTATE_INT64(hcall_ret
, SpaprNVDIMMDeviceFlushState
),
534 VMSTATE_UINT32(drcidx
, SpaprNVDIMMDeviceFlushState
),
535 VMSTATE_END_OF_LIST()
539 const VMStateDescription vmstate_spapr_nvdimm_states
= {
540 .name
= "spapr_nvdimm_states",
542 .minimum_version_id
= 1,
543 .post_load
= spapr_nvdimm_flush_post_load
,
544 .fields
= (VMStateField
[]) {
545 VMSTATE_BOOL(hcall_flush_required
, SpaprNVDIMMDevice
),
546 VMSTATE_UINT64(nvdimm_flush_token
, SpaprNVDIMMDevice
),
547 VMSTATE_QLIST_V(completed_nvdimm_flush_states
, SpaprNVDIMMDevice
, 1,
548 vmstate_spapr_nvdimm_flush_state
,
549 SpaprNVDIMMDeviceFlushState
, node
),
550 VMSTATE_QLIST_V(pending_nvdimm_flush_states
, SpaprNVDIMMDevice
, 1,
551 vmstate_spapr_nvdimm_flush_state
,
552 SpaprNVDIMMDeviceFlushState
, node
),
553 VMSTATE_END_OF_LIST()
558 * Assign a token and reserve it for the new flush state.
560 static SpaprNVDIMMDeviceFlushState
*spapr_nvdimm_init_new_flush_state(
561 SpaprNVDIMMDevice
*spapr_nvdimm
)
563 SpaprNVDIMMDeviceFlushState
*state
;
565 state
= g_malloc0(sizeof(*state
));
567 spapr_nvdimm
->nvdimm_flush_token
++;
568 /* Token zero is presumed as no job pending. Assert on overflow to zero */
569 g_assert(spapr_nvdimm
->nvdimm_flush_token
!= 0);
571 state
->continue_token
= spapr_nvdimm
->nvdimm_flush_token
;
573 QLIST_INSERT_HEAD(&spapr_nvdimm
->pending_nvdimm_flush_states
, state
, node
);
579 * spapr_nvdimm_finish_flushes
580 * Waits for all pending flush requests to complete
581 * their execution and free the states
583 void spapr_nvdimm_finish_flushes(void)
585 SpaprNVDIMMDeviceFlushState
*state
, *next
;
586 GSList
*list
, *nvdimms
;
589 * Called on reset path, the main loop thread which calls
590 * the pending BHs has gotten out running in the reset path,
591 * finally reaching here. Other code path being guest
592 * h_client_architecture_support, thats early boot up.
594 nvdimms
= nvdimm_get_device_list();
595 for (list
= nvdimms
; list
; list
= list
->next
) {
596 NVDIMMDevice
*nvdimm
= list
->data
;
597 if (object_dynamic_cast(OBJECT(nvdimm
), TYPE_SPAPR_NVDIMM
)) {
598 SpaprNVDIMMDevice
*s_nvdimm
= SPAPR_NVDIMM(nvdimm
);
599 while (!QLIST_EMPTY(&s_nvdimm
->pending_nvdimm_flush_states
)) {
600 aio_poll(qemu_get_aio_context(), true);
603 QLIST_FOREACH_SAFE(state
, &s_nvdimm
->completed_nvdimm_flush_states
,
605 QLIST_REMOVE(state
, node
);
610 g_slist_free(nvdimms
);
614 * spapr_nvdimm_get_flush_status
615 * Fetches the status of the hcall worker and returns
616 * H_LONG_BUSY_ORDER_10_MSEC if the worker is still running.
618 static int spapr_nvdimm_get_flush_status(SpaprNVDIMMDevice
*s_nvdimm
,
621 SpaprNVDIMMDeviceFlushState
*state
, *node
;
623 QLIST_FOREACH(state
, &s_nvdimm
->pending_nvdimm_flush_states
, node
) {
624 if (state
->continue_token
== token
) {
625 return H_LONG_BUSY_ORDER_10_MSEC
;
629 QLIST_FOREACH_SAFE(state
, &s_nvdimm
->completed_nvdimm_flush_states
,
631 if (state
->continue_token
== token
) {
632 int ret
= state
->hcall_ret
;
633 QLIST_REMOVE(state
, node
);
639 /* If not found in complete list too, invalid token */
645 * Input: drc_index, continue-token
646 * Out: continue-token
647 * Return Value: H_SUCCESS, H_Parameter, H_P2, H_LONG_BUSY_ORDER_10_MSEC,
650 * Given a DRC Index Flush the data to backend NVDIMM device. The hcall returns
651 * H_LONG_BUSY_ORDER_10_MSEC when the flush takes longer time and the hcall
652 * needs to be issued multiple times in order to be completely serviced. The
653 * continue-token from the output to be passed in the argument list of
654 * subsequent hcalls until the hcall is completely serviced at which point
655 * H_SUCCESS or other error is returned.
657 static target_ulong
h_scm_flush(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
658 target_ulong opcode
, target_ulong
*args
)
661 uint32_t drc_index
= args
[0];
662 uint64_t continue_token
= args
[1];
663 SpaprDrc
*drc
= spapr_drc_by_index(drc_index
);
665 HostMemoryBackend
*backend
= NULL
;
666 SpaprNVDIMMDeviceFlushState
*state
;
667 ThreadPool
*pool
= aio_get_thread_pool(qemu_get_aio_context());
670 if (!drc
|| !drc
->dev
||
671 spapr_drc_type(drc
) != SPAPR_DR_CONNECTOR_TYPE_PMEM
) {
675 dimm
= PC_DIMM(drc
->dev
);
676 if (!object_dynamic_cast(OBJECT(dimm
), TYPE_SPAPR_NVDIMM
)) {
679 if (continue_token
== 0) {
680 bool is_pmem
= false, pmem_override
= false;
681 backend
= MEMORY_BACKEND(dimm
->hostmem
);
682 fd
= memory_region_get_fd(&backend
->mr
);
685 return H_UNSUPPORTED
;
688 is_pmem
= object_property_get_bool(OBJECT(backend
), "pmem", NULL
);
689 pmem_override
= object_property_get_bool(OBJECT(dimm
),
690 "pmem-override", NULL
);
691 if (is_pmem
&& !pmem_override
) {
692 return H_UNSUPPORTED
;
695 state
= spapr_nvdimm_init_new_flush_state(SPAPR_NVDIMM(dimm
));
700 state
->drcidx
= drc_index
;
702 thread_pool_submit_aio(pool
, flush_worker_cb
, state
,
703 spapr_nvdimm_flush_completion_cb
, state
);
705 continue_token
= state
->continue_token
;
708 ret
= spapr_nvdimm_get_flush_status(SPAPR_NVDIMM(dimm
), continue_token
);
709 if (H_IS_LONG_BUSY(ret
)) {
710 args
[0] = continue_token
;
716 static target_ulong
h_scm_unbind_mem(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
717 target_ulong opcode
, target_ulong
*args
)
719 uint32_t drc_index
= args
[0];
720 uint64_t starting_scm_logical_addr
= args
[1];
721 uint64_t no_of_scm_blocks_to_unbind
= args
[2];
722 uint64_t continue_token
= args
[3];
723 uint64_t size_to_unbind
;
724 Range blockrange
= range_empty
;
725 Range nvdimmrange
= range_empty
;
726 SpaprDrc
*drc
= spapr_drc_by_index(drc_index
);
727 NVDIMMDevice
*nvdimm
;
730 if (!drc
|| !drc
->dev
||
731 spapr_drc_type(drc
) != SPAPR_DR_CONNECTOR_TYPE_PMEM
) {
735 /* continue_token should be zero as this hcall doesn't return H_BUSY. */
736 if (continue_token
> 0) {
740 /* Check if starting_scm_logical_addr is block aligned */
741 if (!QEMU_IS_ALIGNED(starting_scm_logical_addr
,
742 SPAPR_MINIMUM_SCM_BLOCK_SIZE
)) {
746 size_to_unbind
= no_of_scm_blocks_to_unbind
* SPAPR_MINIMUM_SCM_BLOCK_SIZE
;
747 if (no_of_scm_blocks_to_unbind
== 0 || no_of_scm_blocks_to_unbind
!=
748 size_to_unbind
/ SPAPR_MINIMUM_SCM_BLOCK_SIZE
) {
752 nvdimm
= NVDIMM(drc
->dev
);
753 size
= object_property_get_int(OBJECT(nvdimm
), PC_DIMM_SIZE_PROP
,
755 addr
= object_property_get_int(OBJECT(nvdimm
), PC_DIMM_ADDR_PROP
,
758 range_init_nofail(&nvdimmrange
, addr
, size
);
759 range_init_nofail(&blockrange
, starting_scm_logical_addr
, size_to_unbind
);
761 if (!range_contains_range(&nvdimmrange
, &blockrange
)) {
765 args
[1] = no_of_scm_blocks_to_unbind
;
767 /* let unplug take care of actual unbind */
771 #define H_UNBIND_SCOPE_ALL 0x1
772 #define H_UNBIND_SCOPE_DRC 0x2
774 static target_ulong
h_scm_unbind_all(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
775 target_ulong opcode
, target_ulong
*args
)
777 uint64_t target_scope
= args
[0];
778 uint32_t drc_index
= args
[1];
779 uint64_t continue_token
= args
[2];
780 NVDIMMDevice
*nvdimm
;
782 uint64_t no_of_scm_blocks_unbound
= 0;
784 /* continue_token should be zero as this hcall doesn't return H_BUSY. */
785 if (continue_token
> 0) {
789 if (target_scope
== H_UNBIND_SCOPE_DRC
) {
790 SpaprDrc
*drc
= spapr_drc_by_index(drc_index
);
792 if (!drc
|| !drc
->dev
||
793 spapr_drc_type(drc
) != SPAPR_DR_CONNECTOR_TYPE_PMEM
) {
797 nvdimm
= NVDIMM(drc
->dev
);
798 size
= object_property_get_int(OBJECT(nvdimm
), PC_DIMM_SIZE_PROP
,
801 no_of_scm_blocks_unbound
= size
/ SPAPR_MINIMUM_SCM_BLOCK_SIZE
;
802 } else if (target_scope
== H_UNBIND_SCOPE_ALL
) {
803 GSList
*list
, *nvdimms
;
805 nvdimms
= nvdimm_get_device_list();
806 for (list
= nvdimms
; list
; list
= list
->next
) {
808 size
= object_property_get_int(OBJECT(nvdimm
), PC_DIMM_SIZE_PROP
,
811 no_of_scm_blocks_unbound
+= size
/ SPAPR_MINIMUM_SCM_BLOCK_SIZE
;
813 g_slist_free(nvdimms
);
818 args
[1] = no_of_scm_blocks_unbound
;
820 /* let unplug take care of actual unbind */
824 static target_ulong
h_scm_health(PowerPCCPU
*cpu
, SpaprMachineState
*spapr
,
825 target_ulong opcode
, target_ulong
*args
)
828 NVDIMMDevice
*nvdimm
;
829 uint64_t hbitmap
= 0;
830 uint32_t drc_index
= args
[0];
831 SpaprDrc
*drc
= spapr_drc_by_index(drc_index
);
832 const uint64_t hbitmap_mask
= PAPR_PMEM_UNARMED
;
835 /* Ensure that the drc is valid & is valid PMEM dimm and is plugged in */
836 if (!drc
|| !drc
->dev
||
837 spapr_drc_type(drc
) != SPAPR_DR_CONNECTOR_TYPE_PMEM
) {
841 nvdimm
= NVDIMM(drc
->dev
);
843 /* Update if the nvdimm is unarmed and send its status via health bitmaps */
844 if (object_property_get_bool(OBJECT(nvdimm
), NVDIMM_UNARMED_PROP
, NULL
)) {
845 hbitmap
|= PAPR_PMEM_UNARMED
;
848 /* Update the out args with health bitmap/mask */
850 args
[1] = hbitmap_mask
;
855 static void spapr_scm_register_types(void)
857 /* qemu/scm specific hcalls */
858 spapr_register_hypercall(H_SCM_READ_METADATA
, h_scm_read_metadata
);
859 spapr_register_hypercall(H_SCM_WRITE_METADATA
, h_scm_write_metadata
);
860 spapr_register_hypercall(H_SCM_BIND_MEM
, h_scm_bind_mem
);
861 spapr_register_hypercall(H_SCM_UNBIND_MEM
, h_scm_unbind_mem
);
862 spapr_register_hypercall(H_SCM_UNBIND_ALL
, h_scm_unbind_all
);
863 spapr_register_hypercall(H_SCM_HEALTH
, h_scm_health
);
864 spapr_register_hypercall(H_SCM_FLUSH
, h_scm_flush
);
867 type_init(spapr_scm_register_types
)
869 static void spapr_nvdimm_realize(NVDIMMDevice
*dimm
, Error
**errp
)
871 SpaprNVDIMMDevice
*s_nvdimm
= SPAPR_NVDIMM(dimm
);
872 HostMemoryBackend
*backend
= MEMORY_BACKEND(PC_DIMM(dimm
)->hostmem
);
873 bool is_pmem
= object_property_get_bool(OBJECT(backend
), "pmem", NULL
);
874 bool pmem_override
= object_property_get_bool(OBJECT(dimm
), "pmem-override",
876 if (!is_pmem
|| pmem_override
) {
877 s_nvdimm
->hcall_flush_required
= true;
880 vmstate_register(NULL
, VMSTATE_INSTANCE_ID_ANY
,
881 &vmstate_spapr_nvdimm_states
, dimm
);
884 static void spapr_nvdimm_unrealize(NVDIMMDevice
*dimm
)
886 vmstate_unregister(NULL
, &vmstate_spapr_nvdimm_states
, dimm
);
889 static Property spapr_nvdimm_properties
[] = {
890 #ifdef CONFIG_LIBPMEM
891 DEFINE_PROP_BOOL("pmem-override", SpaprNVDIMMDevice
, pmem_override
, false),
893 DEFINE_PROP_END_OF_LIST(),
896 static void spapr_nvdimm_class_init(ObjectClass
*oc
, void *data
)
898 DeviceClass
*dc
= DEVICE_CLASS(oc
);
899 NVDIMMClass
*nvc
= NVDIMM_CLASS(oc
);
901 nvc
->realize
= spapr_nvdimm_realize
;
902 nvc
->unrealize
= spapr_nvdimm_unrealize
;
904 device_class_set_props(dc
, spapr_nvdimm_properties
);
907 static void spapr_nvdimm_init(Object
*obj
)
909 SpaprNVDIMMDevice
*s_nvdimm
= SPAPR_NVDIMM(obj
);
911 s_nvdimm
->hcall_flush_required
= false;
912 QLIST_INIT(&s_nvdimm
->pending_nvdimm_flush_states
);
913 QLIST_INIT(&s_nvdimm
->completed_nvdimm_flush_states
);
916 static TypeInfo spapr_nvdimm_info
= {
917 .name
= TYPE_SPAPR_NVDIMM
,
918 .parent
= TYPE_NVDIMM
,
919 .class_init
= spapr_nvdimm_class_init
,
920 .class_size
= sizeof(SPAPRNVDIMMClass
),
921 .instance_size
= sizeof(SpaprNVDIMMDevice
),
922 .instance_init
= spapr_nvdimm_init
,
925 static void spapr_nvdimm_register_types(void)
927 type_register_static(&spapr_nvdimm_info
);
930 type_init(spapr_nvdimm_register_types
)