4 * Implementation of SR/IOV emulation support.
6 * Copyright (c) 2015-2017 Knut Omang <knut.omang@oracle.com>
8 * This work is licensed under the terms of the GNU GPL, version 2 or later.
9 * See the COPYING file in the top-level directory.
13 #include "qemu/osdep.h"
14 #include "hw/pci/pci_device.h"
15 #include "hw/pci/pcie.h"
16 #include "hw/pci/pci_bus.h"
17 #include "hw/qdev-properties.h"
18 #include "qemu/error-report.h"
19 #include "qemu/range.h"
20 #include "qapi/error.h"
23 static PCIDevice
*register_vf(PCIDevice
*pf
, int devfn
,
24 const char *name
, uint16_t vf_num
);
25 static void unregister_vfs(PCIDevice
*dev
);
27 void pcie_sriov_pf_init(PCIDevice
*dev
, uint16_t offset
,
28 const char *vfname
, uint16_t vf_dev_id
,
29 uint16_t init_vfs
, uint16_t total_vfs
,
30 uint16_t vf_offset
, uint16_t vf_stride
)
32 uint8_t *cfg
= dev
->config
+ offset
;
35 pcie_add_capability(dev
, PCI_EXT_CAP_ID_SRIOV
, 1,
36 offset
, PCI_EXT_CAP_SRIOV_SIZEOF
);
37 dev
->exp
.sriov_cap
= offset
;
38 dev
->exp
.sriov_pf
.num_vfs
= 0;
39 dev
->exp
.sriov_pf
.vfname
= g_strdup(vfname
);
40 dev
->exp
.sriov_pf
.vf
= NULL
;
42 pci_set_word(cfg
+ PCI_SRIOV_VF_OFFSET
, vf_offset
);
43 pci_set_word(cfg
+ PCI_SRIOV_VF_STRIDE
, vf_stride
);
46 * Mandatory page sizes to support.
47 * Device implementations can call pcie_sriov_pf_add_sup_pgsize()
50 pci_set_word(cfg
+ PCI_SRIOV_SUP_PGSIZE
, SRIOV_SUP_PGSIZE_MINREQ
);
53 * Default is to use 4K pages, software can modify it
54 * to any of the supported bits
56 pci_set_word(cfg
+ PCI_SRIOV_SYS_PGSIZE
, 0x1);
58 /* Set up device ID and initial/total number of VFs available */
59 pci_set_word(cfg
+ PCI_SRIOV_VF_DID
, vf_dev_id
);
60 pci_set_word(cfg
+ PCI_SRIOV_INITIAL_VF
, init_vfs
);
61 pci_set_word(cfg
+ PCI_SRIOV_TOTAL_VF
, total_vfs
);
62 pci_set_word(cfg
+ PCI_SRIOV_NUM_VF
, 0);
64 /* Write enable control bits */
65 wmask
= dev
->wmask
+ offset
;
66 pci_set_word(wmask
+ PCI_SRIOV_CTRL
,
67 PCI_SRIOV_CTRL_VFE
| PCI_SRIOV_CTRL_MSE
| PCI_SRIOV_CTRL_ARI
);
68 pci_set_word(wmask
+ PCI_SRIOV_NUM_VF
, 0xffff);
69 pci_set_word(wmask
+ PCI_SRIOV_SYS_PGSIZE
, 0x553);
71 qdev_prop_set_bit(&dev
->qdev
, "multifunction", true);
74 void pcie_sriov_pf_exit(PCIDevice
*dev
)
77 g_free((char *)dev
->exp
.sriov_pf
.vfname
);
78 dev
->exp
.sriov_pf
.vfname
= NULL
;
81 void pcie_sriov_pf_init_vf_bar(PCIDevice
*dev
, int region_num
,
82 uint8_t type
, dma_addr_t size
)
86 uint16_t sriov_cap
= dev
->exp
.sriov_cap
;
88 assert(sriov_cap
> 0);
89 assert(region_num
>= 0);
90 assert(region_num
< PCI_NUM_REGIONS
);
91 assert(region_num
!= PCI_ROM_SLOT
);
94 addr
= sriov_cap
+ PCI_SRIOV_BAR
+ region_num
* 4;
96 pci_set_long(dev
->config
+ addr
, type
);
97 if (!(type
& PCI_BASE_ADDRESS_SPACE_IO
) &&
98 type
& PCI_BASE_ADDRESS_MEM_TYPE_64
) {
99 pci_set_quad(dev
->wmask
+ addr
, wmask
);
100 pci_set_quad(dev
->cmask
+ addr
, ~0ULL);
102 pci_set_long(dev
->wmask
+ addr
, wmask
& 0xffffffff);
103 pci_set_long(dev
->cmask
+ addr
, 0xffffffff);
105 dev
->exp
.sriov_pf
.vf_bar_type
[region_num
] = type
;
108 void pcie_sriov_vf_register_bar(PCIDevice
*dev
, int region_num
,
109 MemoryRegion
*memory
)
112 PCIBus
*bus
= pci_get_bus(dev
);
114 pcibus_t size
= memory_region_size(memory
);
116 assert(pci_is_vf(dev
)); /* PFs must use pci_register_bar */
117 assert(region_num
>= 0);
118 assert(region_num
< PCI_NUM_REGIONS
);
119 type
= dev
->exp
.sriov_vf
.pf
->exp
.sriov_pf
.vf_bar_type
[region_num
];
121 if (!is_power_of_2(size
)) {
122 error_report("%s: PCI region size must be a power"
123 " of two - type=0x%x, size=0x%"FMT_PCIBUS
,
124 __func__
, type
, size
);
128 r
= &dev
->io_regions
[region_num
];
131 type
& PCI_BASE_ADDRESS_SPACE_IO
132 ? bus
->address_space_io
133 : bus
->address_space_mem
;
137 r
->addr
= pci_bar_address(dev
, region_num
, r
->type
, r
->size
);
138 if (r
->addr
!= PCI_BAR_UNMAPPED
) {
139 memory_region_add_subregion_overlap(r
->address_space
,
140 r
->addr
, r
->memory
, 1);
144 static PCIDevice
*register_vf(PCIDevice
*pf
, int devfn
, const char *name
,
147 PCIDevice
*dev
= pci_new(devfn
, name
);
148 dev
->exp
.sriov_vf
.pf
= pf
;
149 dev
->exp
.sriov_vf
.vf_number
= vf_num
;
150 PCIBus
*bus
= pci_get_bus(pf
);
151 Error
*local_err
= NULL
;
153 qdev_realize(&dev
->qdev
, &bus
->qbus
, &local_err
);
155 error_report_err(local_err
);
159 /* set vid/did according to sr/iov spec - they are not used */
160 pci_config_set_vendor_id(dev
->config
, 0xffff);
161 pci_config_set_device_id(dev
->config
, 0xffff);
166 static void register_vfs(PCIDevice
*dev
)
170 uint16_t sriov_cap
= dev
->exp
.sriov_cap
;
172 pci_get_word(dev
->config
+ sriov_cap
+ PCI_SRIOV_VF_OFFSET
);
174 pci_get_word(dev
->config
+ sriov_cap
+ PCI_SRIOV_VF_STRIDE
);
175 int32_t devfn
= dev
->devfn
+ vf_offset
;
177 assert(sriov_cap
> 0);
178 num_vfs
= pci_get_word(dev
->config
+ sriov_cap
+ PCI_SRIOV_NUM_VF
);
179 if (num_vfs
> pci_get_word(dev
->config
+ sriov_cap
+ PCI_SRIOV_TOTAL_VF
)) {
183 dev
->exp
.sriov_pf
.vf
= g_new(PCIDevice
*, num_vfs
);
185 trace_sriov_register_vfs(dev
->name
, PCI_SLOT(dev
->devfn
),
186 PCI_FUNC(dev
->devfn
), num_vfs
);
187 for (i
= 0; i
< num_vfs
; i
++) {
188 dev
->exp
.sriov_pf
.vf
[i
] = register_vf(dev
, devfn
,
189 dev
->exp
.sriov_pf
.vfname
, i
);
190 if (!dev
->exp
.sriov_pf
.vf
[i
]) {
196 dev
->exp
.sriov_pf
.num_vfs
= num_vfs
;
199 static void unregister_vfs(PCIDevice
*dev
)
201 uint16_t num_vfs
= dev
->exp
.sriov_pf
.num_vfs
;
204 trace_sriov_unregister_vfs(dev
->name
, PCI_SLOT(dev
->devfn
),
205 PCI_FUNC(dev
->devfn
), num_vfs
);
206 for (i
= 0; i
< num_vfs
; i
++) {
208 PCIDevice
*vf
= dev
->exp
.sriov_pf
.vf
[i
];
209 if (!object_property_set_bool(OBJECT(vf
), "realized", false, &err
)) {
210 error_reportf_err(err
, "Failed to unplug: ");
212 object_unparent(OBJECT(vf
));
213 object_unref(OBJECT(vf
));
215 g_free(dev
->exp
.sriov_pf
.vf
);
216 dev
->exp
.sriov_pf
.vf
= NULL
;
217 dev
->exp
.sriov_pf
.num_vfs
= 0;
220 void pcie_sriov_config_write(PCIDevice
*dev
, uint32_t address
,
221 uint32_t val
, int len
)
224 uint16_t sriov_cap
= dev
->exp
.sriov_cap
;
226 if (!sriov_cap
|| address
< sriov_cap
) {
229 off
= address
- sriov_cap
;
230 if (off
>= PCI_EXT_CAP_SRIOV_SIZEOF
) {
234 trace_sriov_config_write(dev
->name
, PCI_SLOT(dev
->devfn
),
235 PCI_FUNC(dev
->devfn
), off
, val
, len
);
237 if (range_covers_byte(off
, len
, PCI_SRIOV_CTRL
)) {
238 if (dev
->exp
.sriov_pf
.num_vfs
) {
239 if (!(val
& PCI_SRIOV_CTRL_VFE
)) {
243 if (val
& PCI_SRIOV_CTRL_VFE
) {
252 void pcie_sriov_pf_reset(PCIDevice
*dev
)
254 uint16_t sriov_cap
= dev
->exp
.sriov_cap
;
259 pci_set_word(dev
->config
+ sriov_cap
+ PCI_SRIOV_CTRL
, 0);
262 pci_set_word(dev
->config
+ sriov_cap
+ PCI_SRIOV_NUM_VF
, 0);
265 * Default is to use 4K pages, software can modify it
266 * to any of the supported bits
268 pci_set_word(dev
->config
+ sriov_cap
+ PCI_SRIOV_SYS_PGSIZE
, 0x1);
270 for (uint16_t i
= 0; i
< PCI_NUM_REGIONS
; i
++) {
271 pci_set_quad(dev
->config
+ sriov_cap
+ PCI_SRIOV_BAR
+ i
* 4,
272 dev
->exp
.sriov_pf
.vf_bar_type
[i
]);
276 /* Add optional supported page sizes to the mask of supported page sizes */
277 void pcie_sriov_pf_add_sup_pgsize(PCIDevice
*dev
, uint16_t opt_sup_pgsize
)
279 uint8_t *cfg
= dev
->config
+ dev
->exp
.sriov_cap
;
280 uint8_t *wmask
= dev
->wmask
+ dev
->exp
.sriov_cap
;
282 uint16_t sup_pgsize
= pci_get_word(cfg
+ PCI_SRIOV_SUP_PGSIZE
);
284 sup_pgsize
|= opt_sup_pgsize
;
287 * Make sure the new bits are set, and that system page size
288 * also can be set to any of the new values according to spec:
290 pci_set_word(cfg
+ PCI_SRIOV_SUP_PGSIZE
, sup_pgsize
);
291 pci_set_word(wmask
+ PCI_SRIOV_SYS_PGSIZE
, sup_pgsize
);
295 uint16_t pcie_sriov_vf_number(PCIDevice
*dev
)
297 assert(pci_is_vf(dev
));
298 return dev
->exp
.sriov_vf
.vf_number
;
301 PCIDevice
*pcie_sriov_get_pf(PCIDevice
*dev
)
303 return dev
->exp
.sriov_vf
.pf
;
306 PCIDevice
*pcie_sriov_get_vf_at_index(PCIDevice
*dev
, int n
)
308 assert(!pci_is_vf(dev
));
309 if (n
< dev
->exp
.sriov_pf
.num_vfs
) {
310 return dev
->exp
.sriov_pf
.vf
[n
];
315 uint16_t pcie_sriov_num_vfs(PCIDevice
*dev
)
317 return dev
->exp
.sriov_pf
.num_vfs
;