1 // SPDX-License-Identifier: GPL-2.0-only
3 * VFIO PCI I/O Port & MMIO access
5 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
6 * Author: Alex Williamson <alex.williamson@redhat.com>
8 * Derived from original vfio:
9 * Copyright 2010 Cisco Systems, Inc. All rights reserved.
10 * Author: Tom Lyon, pugs@cisco.com
14 #include <linux/pci.h>
15 #include <linux/uaccess.h>
17 #include <linux/vfio.h>
18 #include <linux/vgaarb.h>
20 #include "vfio_pci_priv.h"
22 #ifdef __LITTLE_ENDIAN
23 #define vfio_ioread64 ioread64
24 #define vfio_iowrite64 iowrite64
25 #define vfio_ioread32 ioread32
26 #define vfio_iowrite32 iowrite32
27 #define vfio_ioread16 ioread16
28 #define vfio_iowrite16 iowrite16
30 #define vfio_ioread64 ioread64be
31 #define vfio_iowrite64 iowrite64be
32 #define vfio_ioread32 ioread32be
33 #define vfio_iowrite32 iowrite32be
34 #define vfio_ioread16 ioread16be
35 #define vfio_iowrite16 iowrite16be
37 #define vfio_ioread8 ioread8
38 #define vfio_iowrite8 iowrite8
40 #define VFIO_IOWRITE(size) \
41 int vfio_pci_core_iowrite##size(struct vfio_pci_core_device *vdev, \
42 bool test_mem, u##size val, void __iomem *io) \
45 down_read(&vdev->memory_lock); \
46 if (!__vfio_pci_memory_enabled(vdev)) { \
47 up_read(&vdev->memory_lock); \
52 vfio_iowrite##size(val, io); \
55 up_read(&vdev->memory_lock); \
59 EXPORT_SYMBOL_GPL(vfio_pci_core_iowrite##size);
68 #define VFIO_IOREAD(size) \
69 int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev, \
70 bool test_mem, u##size *val, void __iomem *io) \
73 down_read(&vdev->memory_lock); \
74 if (!__vfio_pci_memory_enabled(vdev)) { \
75 up_read(&vdev->memory_lock); \
80 *val = vfio_ioread##size(io); \
83 up_read(&vdev->memory_lock); \
87 EXPORT_SYMBOL_GPL(vfio_pci_core_ioread##size);
96 #define VFIO_IORDWR(size) \
97 static int vfio_pci_iordwr##size(struct vfio_pci_core_device *vdev,\
98 bool iswrite, bool test_mem, \
99 void __iomem *io, char __user *buf, \
100 loff_t off, size_t *filled) \
106 if (copy_from_user(&val, buf, sizeof(val))) \
109 ret = vfio_pci_core_iowrite##size(vdev, test_mem, \
114 ret = vfio_pci_core_ioread##size(vdev, test_mem, \
119 if (copy_to_user(buf, &val, sizeof(val))) \
123 *filled = sizeof(val); \
130 #if defined(ioread64) && defined(iowrite64)
135 * Read or write from an __iomem region (MMIO or I/O port) with an excluded
136 * range which is inaccessible. The excluded range drops writes and fills
137 * reads with -1. This is intended for handling MSI-X vector tables and
138 * leftover space for ROM BARs.
140 ssize_t
vfio_pci_core_do_io_rw(struct vfio_pci_core_device
*vdev
, bool test_mem
,
141 void __iomem
*io
, char __user
*buf
,
142 loff_t off
, size_t count
, size_t x_start
,
143 size_t x_end
, bool iswrite
)
149 size_t fillable
, filled
;
152 fillable
= min(count
, (size_t)(x_start
- off
));
153 else if (off
>= x_end
)
158 #if defined(ioread64) && defined(iowrite64)
159 if (fillable
>= 8 && !(off
% 8)) {
160 ret
= vfio_pci_iordwr64(vdev
, iswrite
, test_mem
,
161 io
, buf
, off
, &filled
);
167 if (fillable
>= 4 && !(off
% 4)) {
168 ret
= vfio_pci_iordwr32(vdev
, iswrite
, test_mem
,
169 io
, buf
, off
, &filled
);
173 } else if (fillable
>= 2 && !(off
% 2)) {
174 ret
= vfio_pci_iordwr16(vdev
, iswrite
, test_mem
,
175 io
, buf
, off
, &filled
);
179 } else if (fillable
) {
180 ret
= vfio_pci_iordwr8(vdev
, iswrite
, test_mem
,
181 io
, buf
, off
, &filled
);
186 /* Fill reads with -1, drop writes */
187 filled
= min(count
, (size_t)(x_end
- off
));
192 for (i
= 0; i
< filled
; i
++)
193 if (copy_to_user(buf
+ i
, &val
, 1))
206 EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw
);
208 int vfio_pci_core_setup_barmap(struct vfio_pci_core_device
*vdev
, int bar
)
210 struct pci_dev
*pdev
= vdev
->pdev
;
214 if (vdev
->barmap
[bar
])
217 ret
= pci_request_selected_regions(pdev
, 1 << bar
, "vfio");
221 io
= pci_iomap(pdev
, bar
, 0);
223 pci_release_selected_regions(pdev
, 1 << bar
);
227 vdev
->barmap
[bar
] = io
;
231 EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap
);
233 ssize_t
vfio_pci_bar_rw(struct vfio_pci_core_device
*vdev
, char __user
*buf
,
234 size_t count
, loff_t
*ppos
, bool iswrite
)
236 struct pci_dev
*pdev
= vdev
->pdev
;
237 loff_t pos
= *ppos
& VFIO_PCI_OFFSET_MASK
;
238 int bar
= VFIO_PCI_OFFSET_TO_INDEX(*ppos
);
239 size_t x_start
= 0, x_end
= 0;
242 struct resource
*res
= &vdev
->pdev
->resource
[bar
];
245 if (pci_resource_start(pdev
, bar
))
246 end
= pci_resource_len(pdev
, bar
);
247 else if (bar
== PCI_ROM_RESOURCE
&&
248 pdev
->resource
[bar
].flags
& IORESOURCE_ROM_SHADOW
)
256 count
= min(count
, (size_t)(end
- pos
));
258 if (bar
== PCI_ROM_RESOURCE
) {
260 * The ROM can fill less space than the BAR, so we start the
261 * excluded range at the end of the actual ROM. This makes
262 * filling large ROM BARs much faster.
264 io
= pci_map_rom(pdev
, &x_start
);
271 int ret
= vfio_pci_core_setup_barmap(vdev
, bar
);
277 io
= vdev
->barmap
[bar
];
280 if (bar
== vdev
->msix_bar
) {
281 x_start
= vdev
->msix_offset
;
282 x_end
= vdev
->msix_offset
+ vdev
->msix_size
;
285 done
= vfio_pci_core_do_io_rw(vdev
, res
->flags
& IORESOURCE_MEM
, io
, buf
, pos
,
286 count
, x_start
, x_end
, iswrite
);
291 if (bar
== PCI_ROM_RESOURCE
)
292 pci_unmap_rom(pdev
, io
);
297 #ifdef CONFIG_VFIO_PCI_VGA
298 ssize_t
vfio_pci_vga_rw(struct vfio_pci_core_device
*vdev
, char __user
*buf
,
299 size_t count
, loff_t
*ppos
, bool iswrite
)
302 loff_t off
, pos
= *ppos
& VFIO_PCI_OFFSET_MASK
;
303 void __iomem
*iomem
= NULL
;
315 case 0xa0000 ... 0xbffff:
316 count
= min(count
, (size_t)(0xc0000 - pos
));
317 iomem
= ioremap(0xa0000, 0xbffff - 0xa0000 + 1);
319 rsrc
= VGA_RSRC_LEGACY_MEM
;
322 case 0x3b0 ... 0x3bb:
323 count
= min(count
, (size_t)(0x3bc - pos
));
324 iomem
= ioport_map(0x3b0, 0x3bb - 0x3b0 + 1);
326 rsrc
= VGA_RSRC_LEGACY_IO
;
329 case 0x3c0 ... 0x3df:
330 count
= min(count
, (size_t)(0x3e0 - pos
));
331 iomem
= ioport_map(0x3c0, 0x3df - 0x3c0 + 1);
333 rsrc
= VGA_RSRC_LEGACY_IO
;
343 ret
= vga_get_interruptible(vdev
->pdev
, rsrc
);
345 is_ioport
? ioport_unmap(iomem
) : iounmap(iomem
);
350 * VGA MMIO is a legacy, non-BAR resource that hopefully allows
351 * probing, so we don't currently worry about access in relation
352 * to the memory enable bit in the command register.
354 done
= vfio_pci_core_do_io_rw(vdev
, false, iomem
, buf
, off
, count
,
357 vga_put(vdev
->pdev
, rsrc
);
359 is_ioport
? ioport_unmap(iomem
) : iounmap(iomem
);
368 static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd
*ioeventfd
,
371 switch (ioeventfd
->count
) {
373 vfio_pci_core_iowrite8(ioeventfd
->vdev
, test_mem
,
374 ioeventfd
->data
, ioeventfd
->addr
);
377 vfio_pci_core_iowrite16(ioeventfd
->vdev
, test_mem
,
378 ioeventfd
->data
, ioeventfd
->addr
);
381 vfio_pci_core_iowrite32(ioeventfd
->vdev
, test_mem
,
382 ioeventfd
->data
, ioeventfd
->addr
);
386 vfio_pci_core_iowrite64(ioeventfd
->vdev
, test_mem
,
387 ioeventfd
->data
, ioeventfd
->addr
);
393 static int vfio_pci_ioeventfd_handler(void *opaque
, void *unused
)
395 struct vfio_pci_ioeventfd
*ioeventfd
= opaque
;
396 struct vfio_pci_core_device
*vdev
= ioeventfd
->vdev
;
398 if (ioeventfd
->test_mem
) {
399 if (!down_read_trylock(&vdev
->memory_lock
))
400 return 1; /* Lock contended, use thread */
401 if (!__vfio_pci_memory_enabled(vdev
)) {
402 up_read(&vdev
->memory_lock
);
407 vfio_pci_ioeventfd_do_write(ioeventfd
, false);
409 if (ioeventfd
->test_mem
)
410 up_read(&vdev
->memory_lock
);
415 static void vfio_pci_ioeventfd_thread(void *opaque
, void *unused
)
417 struct vfio_pci_ioeventfd
*ioeventfd
= opaque
;
419 vfio_pci_ioeventfd_do_write(ioeventfd
, ioeventfd
->test_mem
);
422 int vfio_pci_ioeventfd(struct vfio_pci_core_device
*vdev
, loff_t offset
,
423 uint64_t data
, int count
, int fd
)
425 struct pci_dev
*pdev
= vdev
->pdev
;
426 loff_t pos
= offset
& VFIO_PCI_OFFSET_MASK
;
427 int ret
, bar
= VFIO_PCI_OFFSET_TO_INDEX(offset
);
428 struct vfio_pci_ioeventfd
*ioeventfd
;
430 /* Only support ioeventfds into BARs */
431 if (bar
> VFIO_PCI_BAR5_REGION_INDEX
)
434 if (pos
+ count
> pci_resource_len(pdev
, bar
))
437 /* Disallow ioeventfds working around MSI-X table writes */
438 if (bar
== vdev
->msix_bar
&&
439 !(pos
+ count
<= vdev
->msix_offset
||
440 pos
>= vdev
->msix_offset
+ vdev
->msix_size
))
448 ret
= vfio_pci_core_setup_barmap(vdev
, bar
);
452 mutex_lock(&vdev
->ioeventfds_lock
);
454 list_for_each_entry(ioeventfd
, &vdev
->ioeventfds_list
, next
) {
455 if (ioeventfd
->pos
== pos
&& ioeventfd
->bar
== bar
&&
456 ioeventfd
->data
== data
&& ioeventfd
->count
== count
) {
458 vfio_virqfd_disable(&ioeventfd
->virqfd
);
459 list_del(&ioeventfd
->next
);
460 vdev
->ioeventfds_nr
--;
475 if (vdev
->ioeventfds_nr
>= VFIO_PCI_IOEVENTFD_MAX
) {
480 ioeventfd
= kzalloc(sizeof(*ioeventfd
), GFP_KERNEL_ACCOUNT
);
486 ioeventfd
->vdev
= vdev
;
487 ioeventfd
->addr
= vdev
->barmap
[bar
] + pos
;
488 ioeventfd
->data
= data
;
489 ioeventfd
->pos
= pos
;
490 ioeventfd
->bar
= bar
;
491 ioeventfd
->count
= count
;
492 ioeventfd
->test_mem
= vdev
->pdev
->resource
[bar
].flags
& IORESOURCE_MEM
;
494 ret
= vfio_virqfd_enable(ioeventfd
, vfio_pci_ioeventfd_handler
,
495 vfio_pci_ioeventfd_thread
, NULL
,
496 &ioeventfd
->virqfd
, fd
);
502 list_add(&ioeventfd
->next
, &vdev
->ioeventfds_list
);
503 vdev
->ioeventfds_nr
++;
506 mutex_unlock(&vdev
->ioeventfds_lock
);