1 // SPDX-License-Identifier: GPL-2.0
3 * drivers/android/staging/vsoc.c
5 * Android Virtual System on a Chip (VSoC) driver
7 * Copyright (C) 2017 Google, Inc.
9 * Author: ghartman@google.com
11 * Based on drivers/char/kvm_ivshmem.c - driver for KVM Inter-VM shared memory
12 * Copyright 2009 Cam Macdonell <cam@cs.ualberta.ca>
14 * Based on cirrusfb.c and 8139cp.c:
15 * Copyright 1999-2001 Jeff Garzik
16 * Copyright 2001-2004 Jeff Garzik
19 #include <linux/dma-mapping.h>
20 #include <linux/freezer.h>
21 #include <linux/futex.h>
22 #include <linux/init.h>
23 #include <linux/kernel.h>
24 #include <linux/module.h>
25 #include <linux/mutex.h>
26 #include <linux/pci.h>
27 #include <linux/proc_fs.h>
28 #include <linux/sched.h>
29 #include <linux/syscalls.h>
30 #include <linux/uaccess.h>
31 #include <linux/interrupt.h>
32 #include <linux/cdev.h>
33 #include <linux/file.h>
34 #include "uapi/vsoc_shm.h"
36 #define VSOC_DEV_NAME "vsoc"
39 * Description of the ivshmem-doorbell PCI device used by QEmu. These
40 * constants follow docs/specs/ivshmem-spec.txt, which can be found in
41 * the QEmu repository. This was last reconciled with the version that
46 * These constants are determined KVM Inter-VM shared memory device
50 INTR_MASK
= 0x00, /* Interrupt Mask */
51 INTR_STATUS
= 0x04, /* Interrupt Status */
52 IV_POSITION
= 0x08, /* VM ID */
53 DOORBELL
= 0x0c, /* Doorbell */
56 static const int REGISTER_BAR
; /* Equal to 0 */
57 static const int MAX_REGISTER_BAR_LEN
= 0x100;
59 * The MSI-x BAR is not used directly.
61 * static const int MSI_X_BAR = 1;
63 static const int SHARED_MEMORY_BAR
= 2;
65 struct vsoc_region_data
{
66 char name
[VSOC_DEVICE_NAME_SZ
+ 1];
67 wait_queue_head_t interrupt_wait_queue
;
68 /* TODO(b/73664181): Use multiple futex wait queues */
69 wait_queue_head_t futex_wait_queue
;
70 /* Flag indicating that an interrupt has been signalled by the host. */
71 atomic_t
*incoming_signalled
;
72 /* Flag indicating the guest has signalled the host. */
73 atomic_t
*outgoing_signalled
;
79 /* Kernel virtual address of REGISTER_BAR. */
81 /* Physical address of SHARED_MEMORY_BAR. */
82 phys_addr_t shm_phys_start
;
83 /* Kernel virtual address of SHARED_MEMORY_BAR. */
84 void __iomem
*kernel_mapped_shm
;
85 /* Size of the entire shared memory window in bytes. */
88 * Pointer to the virtual address of the shared memory layout structure.
89 * This is probably identical to kernel_mapped_shm, but saving this
90 * here saves a lot of annoying casts.
92 struct vsoc_shm_layout_descriptor
*layout
;
94 * Points to a table of region descriptors in the kernel's virtual
95 * address space. Calculated from
96 * vsoc_shm_layout_descriptor.vsoc_region_desc_offset
98 struct vsoc_device_region
*regions
;
99 /* Head of a list of permissions that have been granted. */
100 struct list_head permissions
;
102 /* Per-region (and therefore per-interrupt) information. */
103 struct vsoc_region_data
*regions_data
;
105 * Table of msi-x entries. This has to be separated from struct
106 * vsoc_region_data because the kernel deals with them as an array.
108 struct msix_entry
*msix_entries
;
109 /* Mutex that protectes the permission list */
111 /* Major number assigned by the kernel */
113 /* Character device assigned by the kernel */
115 /* Device class assigned by the kernel */
118 * Flags that indicate what we've initialized. These are used to do an
119 * orderly cleanup of the device.
122 bool requested_regions
;
128 static struct vsoc_device vsoc_dev
;
131 * TODO(ghartman): Add a /sys filesystem entry that summarizes the permissions.
134 struct fd_scoped_permission_node
{
135 struct fd_scoped_permission permission
;
136 struct list_head list
;
139 struct vsoc_private_data
{
140 struct fd_scoped_permission_node
*fd_scoped_permission_node
;
143 static long vsoc_ioctl(struct file
*, unsigned int, unsigned long);
144 static int vsoc_mmap(struct file
*, struct vm_area_struct
*);
145 static int vsoc_open(struct inode
*, struct file
*);
146 static int vsoc_release(struct inode
*, struct file
*);
147 static ssize_t
vsoc_read(struct file
*, char __user
*, size_t, loff_t
*);
148 static ssize_t
vsoc_write(struct file
*, const char __user
*, size_t, loff_t
*);
149 static loff_t
vsoc_lseek(struct file
*filp
, loff_t offset
, int origin
);
151 do_create_fd_scoped_permission(struct vsoc_device_region
*region_p
,
152 struct fd_scoped_permission_node
*np
,
153 struct fd_scoped_permission_arg __user
*arg
);
155 do_destroy_fd_scoped_permission(struct vsoc_device_region
*owner_region_p
,
156 struct fd_scoped_permission
*perm
);
157 static long do_vsoc_describe_region(struct file
*,
158 struct vsoc_device_region __user
*);
159 static ssize_t
vsoc_get_area(struct file
*filp
, __u32
*perm_off
);
162 * Validate arguments on entry points to the driver.
164 inline int vsoc_validate_inode(struct inode
*inode
)
166 if (iminor(inode
) >= vsoc_dev
.layout
->region_count
) {
167 dev_err(&vsoc_dev
.dev
->dev
,
168 "describe_region: invalid region %d\n", iminor(inode
));
174 inline int vsoc_validate_filep(struct file
*filp
)
176 int ret
= vsoc_validate_inode(file_inode(filp
));
180 if (!filp
->private_data
) {
181 dev_err(&vsoc_dev
.dev
->dev
,
182 "No private data on fd, region %d\n",
183 iminor(file_inode(filp
)));
189 /* Converts from shared memory offset to virtual address */
190 static inline void *shm_off_to_virtual_addr(__u32 offset
)
192 return (void __force
*)vsoc_dev
.kernel_mapped_shm
+ offset
;
195 /* Converts from shared memory offset to physical address */
196 static inline phys_addr_t
shm_off_to_phys_addr(__u32 offset
)
198 return vsoc_dev
.shm_phys_start
+ offset
;
202 * Convenience functions to obtain the region from the inode or file.
203 * Dangerous to call before validating the inode/file.
206 inline struct vsoc_device_region
*vsoc_region_from_inode(struct inode
*inode
)
208 return &vsoc_dev
.regions
[iminor(inode
)];
212 inline struct vsoc_device_region
*vsoc_region_from_filep(struct file
*inode
)
214 return vsoc_region_from_inode(file_inode(inode
));
217 static inline uint32_t vsoc_device_region_size(struct vsoc_device_region
*r
)
219 return r
->region_end_offset
- r
->region_begin_offset
;
222 static const struct file_operations vsoc_ops
= {
223 .owner
= THIS_MODULE
,
227 .unlocked_ioctl
= vsoc_ioctl
,
228 .compat_ioctl
= vsoc_ioctl
,
230 .llseek
= vsoc_lseek
,
231 .release
= vsoc_release
,
234 static struct pci_device_id vsoc_id_table
[] = {
235 {0x1af4, 0x1110, PCI_ANY_ID
, PCI_ANY_ID
, 0, 0, 0},
239 MODULE_DEVICE_TABLE(pci
, vsoc_id_table
);
241 static void vsoc_remove_device(struct pci_dev
*pdev
);
242 static int vsoc_probe_device(struct pci_dev
*pdev
,
243 const struct pci_device_id
*ent
);
245 static struct pci_driver vsoc_pci_driver
= {
247 .id_table
= vsoc_id_table
,
248 .probe
= vsoc_probe_device
,
249 .remove
= vsoc_remove_device
,
253 do_create_fd_scoped_permission(struct vsoc_device_region
*region_p
,
254 struct fd_scoped_permission_node
*np
,
255 struct fd_scoped_permission_arg __user
*arg
)
257 struct file
*managed_filp
;
259 atomic_t
*owner_ptr
= NULL
;
260 struct vsoc_device_region
*managed_region_p
;
262 if (copy_from_user(&np
->permission
,
263 &arg
->perm
, sizeof(np
->permission
)) ||
264 copy_from_user(&managed_fd
,
265 &arg
->managed_region_fd
, sizeof(managed_fd
))) {
268 managed_filp
= fdget(managed_fd
).file
;
269 /* Check that it's a valid fd, */
270 if (!managed_filp
|| vsoc_validate_filep(managed_filp
))
272 /* EEXIST if the given fd already has a permission. */
273 if (((struct vsoc_private_data
*)managed_filp
->private_data
)->
274 fd_scoped_permission_node
)
276 managed_region_p
= vsoc_region_from_filep(managed_filp
);
277 /* Check that the provided region is managed by this one */
278 if (&vsoc_dev
.regions
[managed_region_p
->managed_by
] != region_p
)
280 /* The area must be well formed and have non-zero size */
281 if (np
->permission
.begin_offset
>= np
->permission
.end_offset
)
283 /* The area must fit in the memory window */
284 if (np
->permission
.end_offset
>
285 vsoc_device_region_size(managed_region_p
))
287 /* The area must be in the region data section */
288 if (np
->permission
.begin_offset
<
289 managed_region_p
->offset_of_region_data
)
291 /* The area must be page aligned */
292 if (!PAGE_ALIGNED(np
->permission
.begin_offset
) ||
293 !PAGE_ALIGNED(np
->permission
.end_offset
))
295 /* Owner offset must be naturally aligned in the window */
296 if (np
->permission
.owner_offset
&
297 (sizeof(np
->permission
.owner_offset
) - 1))
299 /* The owner flag must reside in the owner memory */
300 if (np
->permission
.owner_offset
+ sizeof(np
->permission
.owner_offset
) >
301 vsoc_device_region_size(region_p
))
303 /* The owner flag must reside in the data section */
304 if (np
->permission
.owner_offset
< region_p
->offset_of_region_data
)
306 /* The owner value must change to claim the memory */
307 if (np
->permission
.owned_value
== VSOC_REGION_FREE
)
310 (atomic_t
*)shm_off_to_virtual_addr(region_p
->region_begin_offset
+
311 np
->permission
.owner_offset
);
312 /* We've already verified that this is in the shared memory window, so
313 * it should be safe to write to this address.
315 if (atomic_cmpxchg(owner_ptr
,
317 np
->permission
.owned_value
) != VSOC_REGION_FREE
) {
320 ((struct vsoc_private_data
*)managed_filp
->private_data
)->
321 fd_scoped_permission_node
= np
;
322 /* The file offset needs to be adjusted if the calling
323 * process did any read/write operations on the fd
324 * before creating the permission.
326 if (managed_filp
->f_pos
) {
327 if (managed_filp
->f_pos
> np
->permission
.end_offset
) {
328 /* If the offset is beyond the permission end, set it
331 managed_filp
->f_pos
= np
->permission
.end_offset
;
333 /* If the offset is within the permission interval
334 * keep it there otherwise reset it to zero.
336 if (managed_filp
->f_pos
< np
->permission
.begin_offset
) {
337 managed_filp
->f_pos
= 0;
339 managed_filp
->f_pos
-=
340 np
->permission
.begin_offset
;
348 do_destroy_fd_scoped_permission_node(struct vsoc_device_region
*owner_region_p
,
349 struct fd_scoped_permission_node
*node
)
352 do_destroy_fd_scoped_permission(owner_region_p
,
354 mutex_lock(&vsoc_dev
.mtx
);
355 list_del(&node
->list
);
356 mutex_unlock(&vsoc_dev
.mtx
);
362 do_destroy_fd_scoped_permission(struct vsoc_device_region
*owner_region_p
,
363 struct fd_scoped_permission
*perm
)
365 atomic_t
*owner_ptr
= NULL
;
370 owner_ptr
= (atomic_t
*)shm_off_to_virtual_addr
371 (owner_region_p
->region_begin_offset
+ perm
->owner_offset
);
372 prev
= atomic_xchg(owner_ptr
, VSOC_REGION_FREE
);
373 if (prev
!= perm
->owned_value
)
374 dev_err(&vsoc_dev
.dev
->dev
,
375 "%x-%x: owner (%s) %x: expected to be %x was %x",
376 perm
->begin_offset
, perm
->end_offset
,
377 owner_region_p
->device_name
, perm
->owner_offset
,
378 perm
->owned_value
, prev
);
381 static long do_vsoc_describe_region(struct file
*filp
,
382 struct vsoc_device_region __user
*dest
)
384 struct vsoc_device_region
*region_p
;
385 int retval
= vsoc_validate_filep(filp
);
389 region_p
= vsoc_region_from_filep(filp
);
390 if (copy_to_user(dest
, region_p
, sizeof(*region_p
)))
396 * Implements the inner logic of cond_wait. Copies to and from userspace are
397 * done in the helper function below.
399 static int handle_vsoc_cond_wait(struct file
*filp
, struct vsoc_cond_wait
*arg
)
402 u32 region_number
= iminor(file_inode(filp
));
403 struct vsoc_region_data
*data
= vsoc_dev
.regions_data
+ region_number
;
404 struct hrtimer_sleeper timeout
, *to
= NULL
;
406 struct vsoc_device_region
*region_p
= vsoc_region_from_filep(filp
);
407 atomic_t
*address
= NULL
;
410 /* Ensure that the offset is aligned */
411 if (arg
->offset
& (sizeof(uint32_t) - 1))
412 return -EADDRNOTAVAIL
;
413 /* Ensure that the offset is within shared memory */
414 if (((uint64_t)arg
->offset
) + region_p
->region_begin_offset
+
415 sizeof(uint32_t) > region_p
->region_end_offset
)
417 address
= shm_off_to_virtual_addr(region_p
->region_begin_offset
+
420 /* Ensure that the type of wait is valid */
421 switch (arg
->wait_type
) {
422 case VSOC_WAIT_IF_EQUAL
:
424 case VSOC_WAIT_IF_EQUAL_TIMEOUT
:
432 /* Copy the user-supplied timesec into the kernel structure.
433 * We do things this way to flatten differences between 32 bit
434 * and 64 bit timespecs.
436 if (arg
->wake_time_nsec
>= NSEC_PER_SEC
)
438 wake_time
= ktime_set(arg
->wake_time_sec
, arg
->wake_time_nsec
);
440 hrtimer_init_on_stack(&to
->timer
, CLOCK_MONOTONIC
,
442 hrtimer_set_expires_range_ns(&to
->timer
, wake_time
,
443 current
->timer_slack_ns
);
445 hrtimer_init_sleeper(to
, current
);
449 prepare_to_wait(&data
->futex_wait_queue
, &wait
,
452 * Check the sentinel value after prepare_to_wait. If the value
453 * changes after this check the writer will call signal,
454 * changing the task state from INTERRUPTIBLE to RUNNING. That
455 * will ensure that schedule() will eventually schedule this
458 if (atomic_read(address
) != arg
->value
) {
463 hrtimer_start_expires(&to
->timer
, HRTIMER_MODE_ABS
);
464 if (likely(to
->task
))
465 freezable_schedule();
466 hrtimer_cancel(&to
->timer
);
472 freezable_schedule();
474 /* Count the number of times that we woke up. This is useful
478 if (signal_pending(current
)) {
483 finish_wait(&data
->futex_wait_queue
, &wait
);
485 destroy_hrtimer_on_stack(&to
->timer
);
490 * Handles the details of copying from/to userspace to ensure that the copies
491 * happen on all of the return paths of cond_wait.
493 static int do_vsoc_cond_wait(struct file
*filp
,
494 struct vsoc_cond_wait __user
*untrusted_in
)
496 struct vsoc_cond_wait arg
;
499 if (copy_from_user(&arg
, untrusted_in
, sizeof(arg
)))
501 /* wakes is an out parameter. Initialize it to something sensible. */
503 rval
= handle_vsoc_cond_wait(filp
, &arg
);
504 if (copy_to_user(untrusted_in
, &arg
, sizeof(arg
)))
509 static int do_vsoc_cond_wake(struct file
*filp
, uint32_t offset
)
511 struct vsoc_device_region
*region_p
= vsoc_region_from_filep(filp
);
512 u32 region_number
= iminor(file_inode(filp
));
513 struct vsoc_region_data
*data
= vsoc_dev
.regions_data
+ region_number
;
514 /* Ensure that the offset is aligned */
515 if (offset
& (sizeof(uint32_t) - 1))
516 return -EADDRNOTAVAIL
;
517 /* Ensure that the offset is within shared memory */
518 if (((uint64_t)offset
) + region_p
->region_begin_offset
+
519 sizeof(uint32_t) > region_p
->region_end_offset
)
522 * TODO(b/73664181): Use multiple futex wait queues.
523 * We need to wake every sleeper when the condition changes. Typically
524 * only a single thread will be waiting on the condition, but there
525 * are exceptions. The worst case is about 10 threads.
527 wake_up_interruptible_all(&data
->futex_wait_queue
);
531 static long vsoc_ioctl(struct file
*filp
, unsigned int cmd
, unsigned long arg
)
534 struct vsoc_device_region
*region_p
;
536 struct vsoc_region_data
*reg_data
;
537 int retval
= vsoc_validate_filep(filp
);
541 region_p
= vsoc_region_from_filep(filp
);
542 reg_num
= iminor(file_inode(filp
));
543 reg_data
= vsoc_dev
.regions_data
+ reg_num
;
545 case VSOC_CREATE_FD_SCOPED_PERMISSION
:
547 struct fd_scoped_permission_node
*node
= NULL
;
549 node
= kzalloc(sizeof(*node
), GFP_KERNEL
);
550 /* We can't allocate memory for the permission */
553 INIT_LIST_HEAD(&node
->list
);
554 rv
= do_create_fd_scoped_permission
557 (struct fd_scoped_permission_arg __user
*)arg
);
559 mutex_lock(&vsoc_dev
.mtx
);
560 list_add(&node
->list
, &vsoc_dev
.permissions
);
561 mutex_unlock(&vsoc_dev
.mtx
);
569 case VSOC_GET_FD_SCOPED_PERMISSION
:
571 struct fd_scoped_permission_node
*node
=
572 ((struct vsoc_private_data
*)filp
->private_data
)->
573 fd_scoped_permission_node
;
577 ((struct fd_scoped_permission __user
*)arg
,
578 &node
->permission
, sizeof(node
->permission
)))
583 case VSOC_MAYBE_SEND_INTERRUPT_TO_HOST
:
584 if (!atomic_xchg(reg_data
->outgoing_signalled
, 1)) {
585 writel(reg_num
, vsoc_dev
.regs
+ DOORBELL
);
592 case VSOC_SEND_INTERRUPT_TO_HOST
:
593 writel(reg_num
, vsoc_dev
.regs
+ DOORBELL
);
595 case VSOC_WAIT_FOR_INCOMING_INTERRUPT
:
596 wait_event_interruptible
597 (reg_data
->interrupt_wait_queue
,
598 (atomic_read(reg_data
->incoming_signalled
) != 0));
601 case VSOC_DESCRIBE_REGION
:
602 return do_vsoc_describe_region
604 (struct vsoc_device_region __user
*)arg
);
606 case VSOC_SELF_INTERRUPT
:
607 atomic_set(reg_data
->incoming_signalled
, 1);
608 wake_up_interruptible(®_data
->interrupt_wait_queue
);
612 return do_vsoc_cond_wait(filp
,
613 (struct vsoc_cond_wait __user
*)arg
);
615 return do_vsoc_cond_wake(filp
, arg
);
623 static ssize_t
vsoc_read(struct file
*filp
, char __user
*buffer
, size_t len
,
629 int retval
= vsoc_validate_filep(filp
);
633 area_len
= vsoc_get_area(filp
, &area_off
);
634 area_p
= shm_off_to_virtual_addr(area_off
);
636 area_len
-= *poffset
;
641 if (copy_to_user(buffer
, area_p
, len
))
647 static loff_t
vsoc_lseek(struct file
*filp
, loff_t offset
, int origin
)
649 ssize_t area_len
= 0;
650 int retval
= vsoc_validate_filep(filp
);
654 area_len
= vsoc_get_area(filp
, NULL
);
660 if (offset
> 0 && offset
+ filp
->f_pos
< 0)
662 offset
+= filp
->f_pos
;
666 if (offset
> 0 && offset
+ area_len
< 0)
672 if (offset
>= area_len
)
679 /* Next hole is always the end of the region, unless offset is
682 if (offset
< area_len
)
690 if (offset
< 0 || offset
> area_len
)
692 filp
->f_pos
= offset
;
697 static ssize_t
vsoc_write(struct file
*filp
, const char __user
*buffer
,
698 size_t len
, loff_t
*poffset
)
703 int retval
= vsoc_validate_filep(filp
);
707 area_len
= vsoc_get_area(filp
, &area_off
);
708 area_p
= shm_off_to_virtual_addr(area_off
);
710 area_len
-= *poffset
;
715 if (copy_from_user(area_p
, buffer
, len
))
721 static irqreturn_t
vsoc_interrupt(int irq
, void *region_data_v
)
723 struct vsoc_region_data
*region_data
=
724 (struct vsoc_region_data
*)region_data_v
;
725 int reg_num
= region_data
- vsoc_dev
.regions_data
;
727 if (unlikely(!region_data
))
730 if (unlikely(reg_num
< 0 ||
731 reg_num
>= vsoc_dev
.layout
->region_count
)) {
732 dev_err(&vsoc_dev
.dev
->dev
,
733 "invalid irq @%p reg_num=0x%04x\n",
734 region_data
, reg_num
);
737 if (unlikely(vsoc_dev
.regions_data
+ reg_num
!= region_data
)) {
738 dev_err(&vsoc_dev
.dev
->dev
,
739 "irq not aligned @%p reg_num=0x%04x\n",
740 region_data
, reg_num
);
743 wake_up_interruptible(®ion_data
->interrupt_wait_queue
);
747 static int vsoc_probe_device(struct pci_dev
*pdev
,
748 const struct pci_device_id
*ent
)
752 resource_size_t reg_size
;
756 result
= pci_enable_device(pdev
);
759 "pci_enable_device failed %s: error %d\n",
760 pci_name(pdev
), result
);
763 vsoc_dev
.enabled_device
= true;
764 result
= pci_request_regions(pdev
, "vsoc");
766 dev_err(&pdev
->dev
, "pci_request_regions failed\n");
767 vsoc_remove_device(pdev
);
770 vsoc_dev
.requested_regions
= true;
771 /* Set up the control registers in BAR 0 */
772 reg_size
= pci_resource_len(pdev
, REGISTER_BAR
);
773 if (reg_size
> MAX_REGISTER_BAR_LEN
)
775 pci_iomap(pdev
, REGISTER_BAR
, MAX_REGISTER_BAR_LEN
);
777 vsoc_dev
.regs
= pci_iomap(pdev
, REGISTER_BAR
, reg_size
);
779 if (!vsoc_dev
.regs
) {
781 "cannot map registers of size %zu\n",
783 vsoc_remove_device(pdev
);
787 /* Map the shared memory in BAR 2 */
788 vsoc_dev
.shm_phys_start
= pci_resource_start(pdev
, SHARED_MEMORY_BAR
);
789 vsoc_dev
.shm_size
= pci_resource_len(pdev
, SHARED_MEMORY_BAR
);
791 dev_info(&pdev
->dev
, "shared memory @ DMA %pa size=0x%zx\n",
792 &vsoc_dev
.shm_phys_start
, vsoc_dev
.shm_size
);
793 vsoc_dev
.kernel_mapped_shm
= pci_iomap_wc(pdev
, SHARED_MEMORY_BAR
, 0);
794 if (!vsoc_dev
.kernel_mapped_shm
) {
795 dev_err(&vsoc_dev
.dev
->dev
, "cannot iomap region\n");
796 vsoc_remove_device(pdev
);
800 vsoc_dev
.layout
= (struct vsoc_shm_layout_descriptor __force
*)
801 vsoc_dev
.kernel_mapped_shm
;
802 dev_info(&pdev
->dev
, "major_version: %d\n",
803 vsoc_dev
.layout
->major_version
);
804 dev_info(&pdev
->dev
, "minor_version: %d\n",
805 vsoc_dev
.layout
->minor_version
);
806 dev_info(&pdev
->dev
, "size: 0x%x\n", vsoc_dev
.layout
->size
);
807 dev_info(&pdev
->dev
, "regions: %d\n", vsoc_dev
.layout
->region_count
);
808 if (vsoc_dev
.layout
->major_version
!=
809 CURRENT_VSOC_LAYOUT_MAJOR_VERSION
) {
810 dev_err(&vsoc_dev
.dev
->dev
,
811 "driver supports only major_version %d\n",
812 CURRENT_VSOC_LAYOUT_MAJOR_VERSION
);
813 vsoc_remove_device(pdev
);
816 result
= alloc_chrdev_region(&devt
, 0, vsoc_dev
.layout
->region_count
,
819 dev_err(&vsoc_dev
.dev
->dev
, "alloc_chrdev_region failed\n");
820 vsoc_remove_device(pdev
);
823 vsoc_dev
.major
= MAJOR(devt
);
824 cdev_init(&vsoc_dev
.cdev
, &vsoc_ops
);
825 vsoc_dev
.cdev
.owner
= THIS_MODULE
;
826 result
= cdev_add(&vsoc_dev
.cdev
, devt
, vsoc_dev
.layout
->region_count
);
828 dev_err(&vsoc_dev
.dev
->dev
, "cdev_add error\n");
829 vsoc_remove_device(pdev
);
832 vsoc_dev
.cdev_added
= true;
833 vsoc_dev
.class = class_create(THIS_MODULE
, VSOC_DEV_NAME
);
834 if (IS_ERR(vsoc_dev
.class)) {
835 dev_err(&vsoc_dev
.dev
->dev
, "class_create failed\n");
836 vsoc_remove_device(pdev
);
837 return PTR_ERR(vsoc_dev
.class);
839 vsoc_dev
.class_added
= true;
840 vsoc_dev
.regions
= (struct vsoc_device_region __force
*)
841 ((void *)vsoc_dev
.layout
+
842 vsoc_dev
.layout
->vsoc_region_desc_offset
);
843 vsoc_dev
.msix_entries
=
844 kcalloc(vsoc_dev
.layout
->region_count
,
845 sizeof(vsoc_dev
.msix_entries
[0]), GFP_KERNEL
);
846 if (!vsoc_dev
.msix_entries
) {
847 dev_err(&vsoc_dev
.dev
->dev
,
848 "unable to allocate msix_entries\n");
849 vsoc_remove_device(pdev
);
852 vsoc_dev
.regions_data
=
853 kcalloc(vsoc_dev
.layout
->region_count
,
854 sizeof(vsoc_dev
.regions_data
[0]), GFP_KERNEL
);
855 if (!vsoc_dev
.regions_data
) {
856 dev_err(&vsoc_dev
.dev
->dev
,
857 "unable to allocate regions' data\n");
858 vsoc_remove_device(pdev
);
861 for (i
= 0; i
< vsoc_dev
.layout
->region_count
; ++i
)
862 vsoc_dev
.msix_entries
[i
].entry
= i
;
864 result
= pci_enable_msix_exact(vsoc_dev
.dev
, vsoc_dev
.msix_entries
,
865 vsoc_dev
.layout
->region_count
);
867 dev_info(&pdev
->dev
, "pci_enable_msix failed: %d\n", result
);
868 vsoc_remove_device(pdev
);
871 /* Check that all regions are well formed */
872 for (i
= 0; i
< vsoc_dev
.layout
->region_count
; ++i
) {
873 const struct vsoc_device_region
*region
= vsoc_dev
.regions
+ i
;
875 if (!PAGE_ALIGNED(region
->region_begin_offset
) ||
876 !PAGE_ALIGNED(region
->region_end_offset
)) {
877 dev_err(&vsoc_dev
.dev
->dev
,
878 "region %d not aligned (%x:%x)", i
,
879 region
->region_begin_offset
,
880 region
->region_end_offset
);
881 vsoc_remove_device(pdev
);
884 if (region
->region_begin_offset
>= region
->region_end_offset
||
885 region
->region_end_offset
> vsoc_dev
.shm_size
) {
886 dev_err(&vsoc_dev
.dev
->dev
,
887 "region %d offsets are wrong: %x %x %zx",
888 i
, region
->region_begin_offset
,
889 region
->region_end_offset
, vsoc_dev
.shm_size
);
890 vsoc_remove_device(pdev
);
893 if (region
->managed_by
>= vsoc_dev
.layout
->region_count
) {
894 dev_err(&vsoc_dev
.dev
->dev
,
895 "region %d has invalid owner: %u",
896 i
, region
->managed_by
);
897 vsoc_remove_device(pdev
);
901 vsoc_dev
.msix_enabled
= true;
902 for (i
= 0; i
< vsoc_dev
.layout
->region_count
; ++i
) {
903 const struct vsoc_device_region
*region
= vsoc_dev
.regions
+ i
;
904 size_t name_sz
= sizeof(vsoc_dev
.regions_data
[i
].name
) - 1;
905 const struct vsoc_signal_table_layout
*h_to_g_signal_table
=
906 ®ion
->host_to_guest_signal_table
;
907 const struct vsoc_signal_table_layout
*g_to_h_signal_table
=
908 ®ion
->guest_to_host_signal_table
;
910 vsoc_dev
.regions_data
[i
].name
[name_sz
] = '\0';
911 memcpy(vsoc_dev
.regions_data
[i
].name
, region
->device_name
,
913 dev_info(&pdev
->dev
, "region %d name=%s\n",
914 i
, vsoc_dev
.regions_data
[i
].name
);
916 (&vsoc_dev
.regions_data
[i
].interrupt_wait_queue
);
917 init_waitqueue_head(&vsoc_dev
.regions_data
[i
].futex_wait_queue
);
918 vsoc_dev
.regions_data
[i
].incoming_signalled
=
919 shm_off_to_virtual_addr(region
->region_begin_offset
) +
920 h_to_g_signal_table
->interrupt_signalled_offset
;
921 vsoc_dev
.regions_data
[i
].outgoing_signalled
=
922 shm_off_to_virtual_addr(region
->region_begin_offset
) +
923 g_to_h_signal_table
->interrupt_signalled_offset
;
924 result
= request_irq(vsoc_dev
.msix_entries
[i
].vector
,
926 vsoc_dev
.regions_data
[i
].name
,
927 vsoc_dev
.regions_data
+ i
);
930 "request_irq failed irq=%d vector=%d\n",
931 i
, vsoc_dev
.msix_entries
[i
].vector
);
932 vsoc_remove_device(pdev
);
935 vsoc_dev
.regions_data
[i
].irq_requested
= true;
936 if (!device_create(vsoc_dev
.class, NULL
,
937 MKDEV(vsoc_dev
.major
, i
),
938 NULL
, vsoc_dev
.regions_data
[i
].name
)) {
939 dev_err(&vsoc_dev
.dev
->dev
, "device_create failed\n");
940 vsoc_remove_device(pdev
);
943 vsoc_dev
.regions_data
[i
].device_created
= true;
949 * This should undo all of the allocations in the probe function in reverse
954 * The device may have been partially initialized, so double check
955 * that the allocations happened.
957 * This function may be called multiple times, so mark resources as freed
958 * as they are deallocated.
960 static void vsoc_remove_device(struct pci_dev
*pdev
)
964 * pdev is the first thing to be set on probe and the last thing
965 * to be cleared here. If it's NULL then there is no cleanup.
967 if (!pdev
|| !vsoc_dev
.dev
)
969 dev_info(&pdev
->dev
, "remove_device\n");
970 if (vsoc_dev
.regions_data
) {
971 for (i
= 0; i
< vsoc_dev
.layout
->region_count
; ++i
) {
972 if (vsoc_dev
.regions_data
[i
].device_created
) {
973 device_destroy(vsoc_dev
.class,
974 MKDEV(vsoc_dev
.major
, i
));
975 vsoc_dev
.regions_data
[i
].device_created
= false;
977 if (vsoc_dev
.regions_data
[i
].irq_requested
)
978 free_irq(vsoc_dev
.msix_entries
[i
].vector
, NULL
);
979 vsoc_dev
.regions_data
[i
].irq_requested
= false;
981 kfree(vsoc_dev
.regions_data
);
982 vsoc_dev
.regions_data
= NULL
;
984 if (vsoc_dev
.msix_enabled
) {
985 pci_disable_msix(pdev
);
986 vsoc_dev
.msix_enabled
= false;
988 kfree(vsoc_dev
.msix_entries
);
989 vsoc_dev
.msix_entries
= NULL
;
990 vsoc_dev
.regions
= NULL
;
991 if (vsoc_dev
.class_added
) {
992 class_destroy(vsoc_dev
.class);
993 vsoc_dev
.class_added
= false;
995 if (vsoc_dev
.cdev_added
) {
996 cdev_del(&vsoc_dev
.cdev
);
997 vsoc_dev
.cdev_added
= false;
999 if (vsoc_dev
.major
&& vsoc_dev
.layout
) {
1000 unregister_chrdev_region(MKDEV(vsoc_dev
.major
, 0),
1001 vsoc_dev
.layout
->region_count
);
1004 vsoc_dev
.layout
= NULL
;
1005 if (vsoc_dev
.kernel_mapped_shm
) {
1006 pci_iounmap(pdev
, vsoc_dev
.kernel_mapped_shm
);
1007 vsoc_dev
.kernel_mapped_shm
= NULL
;
1009 if (vsoc_dev
.regs
) {
1010 pci_iounmap(pdev
, vsoc_dev
.regs
);
1011 vsoc_dev
.regs
= NULL
;
1013 if (vsoc_dev
.requested_regions
) {
1014 pci_release_regions(pdev
);
1015 vsoc_dev
.requested_regions
= false;
1017 if (vsoc_dev
.enabled_device
) {
1018 pci_disable_device(pdev
);
1019 vsoc_dev
.enabled_device
= false;
1021 /* Do this last: it indicates that the device is not initialized. */
1022 vsoc_dev
.dev
= NULL
;
1025 static void __exit
vsoc_cleanup_module(void)
1027 vsoc_remove_device(vsoc_dev
.dev
);
1028 pci_unregister_driver(&vsoc_pci_driver
);
1031 static int __init
vsoc_init_module(void)
1035 INIT_LIST_HEAD(&vsoc_dev
.permissions
);
1036 mutex_init(&vsoc_dev
.mtx
);
1038 err
= pci_register_driver(&vsoc_pci_driver
);
1044 static int vsoc_open(struct inode
*inode
, struct file
*filp
)
1046 /* Can't use vsoc_validate_filep because filp is still incomplete */
1047 int ret
= vsoc_validate_inode(inode
);
1051 filp
->private_data
=
1052 kzalloc(sizeof(struct vsoc_private_data
), GFP_KERNEL
);
1053 if (!filp
->private_data
)
1058 static int vsoc_release(struct inode
*inode
, struct file
*filp
)
1060 struct vsoc_private_data
*private_data
= NULL
;
1061 struct fd_scoped_permission_node
*node
= NULL
;
1062 struct vsoc_device_region
*owner_region_p
= NULL
;
1063 int retval
= vsoc_validate_filep(filp
);
1067 private_data
= (struct vsoc_private_data
*)filp
->private_data
;
1071 node
= private_data
->fd_scoped_permission_node
;
1073 owner_region_p
= vsoc_region_from_inode(inode
);
1074 if (owner_region_p
->managed_by
!= VSOC_REGION_WHOLE
) {
1076 &vsoc_dev
.regions
[owner_region_p
->managed_by
];
1078 do_destroy_fd_scoped_permission_node(owner_region_p
, node
);
1079 private_data
->fd_scoped_permission_node
= NULL
;
1081 kfree(private_data
);
1082 filp
->private_data
= NULL
;
1088 * Returns the device relative offset and length of the area specified by the
1089 * fd scoped permission. If there is no fd scoped permission set, a default
1090 * permission covering the entire region is assumed, unless the region is owned
1091 * by another one, in which case the default is a permission with zero size.
1093 static ssize_t
vsoc_get_area(struct file
*filp
, __u32
*area_offset
)
1097 struct vsoc_device_region
*region_p
;
1098 struct fd_scoped_permission
*perm
;
1100 region_p
= vsoc_region_from_filep(filp
);
1101 off
= region_p
->region_begin_offset
;
1102 perm
= &((struct vsoc_private_data
*)filp
->private_data
)->
1103 fd_scoped_permission_node
->permission
;
1105 off
+= perm
->begin_offset
;
1106 length
= perm
->end_offset
- perm
->begin_offset
;
1107 } else if (region_p
->managed_by
== VSOC_REGION_WHOLE
) {
1108 /* No permission set and the regions is not owned by another,
1109 * default to full region access.
1111 length
= vsoc_device_region_size(region_p
);
1113 /* return zero length, access is denied. */
1121 static int vsoc_mmap(struct file
*filp
, struct vm_area_struct
*vma
)
1123 unsigned long len
= vma
->vm_end
- vma
->vm_start
;
1125 phys_addr_t mem_off
;
1127 int retval
= vsoc_validate_filep(filp
);
1131 area_len
= vsoc_get_area(filp
, &area_off
);
1132 /* Add the requested offset */
1133 area_off
+= (vma
->vm_pgoff
<< PAGE_SHIFT
);
1134 area_len
-= (vma
->vm_pgoff
<< PAGE_SHIFT
);
1137 vma
->vm_page_prot
= pgprot_noncached(vma
->vm_page_prot
);
1138 mem_off
= shm_off_to_phys_addr(area_off
);
1139 if (io_remap_pfn_range(vma
, vma
->vm_start
, mem_off
>> PAGE_SHIFT
,
1140 len
, vma
->vm_page_prot
))
1145 module_init(vsoc_init_module
);
1146 module_exit(vsoc_cleanup_module
);
1148 MODULE_LICENSE("GPL");
1149 MODULE_AUTHOR("Greg Hartman <ghartman@google.com>");
1150 MODULE_DESCRIPTION("VSoC interpretation of QEmu's ivshmem device");
1151 MODULE_VERSION("1.0");