1 // SPDX-License-Identifier: GPL-2.0
3 * drivers/android/staging/vsoc.c
5 * Android Virtual System on a Chip (VSoC) driver
7 * Copyright (C) 2017 Google, Inc.
9 * Author: ghartman@google.com
11 * Based on drivers/char/kvm_ivshmem.c - driver for KVM Inter-VM shared memory
12 * Copyright 2009 Cam Macdonell <cam@cs.ualberta.ca>
14 * Based on cirrusfb.c and 8139cp.c:
15 * Copyright 1999-2001 Jeff Garzik
16 * Copyright 2001-2004 Jeff Garzik
19 #include <linux/dma-mapping.h>
20 #include <linux/freezer.h>
21 #include <linux/futex.h>
22 #include <linux/init.h>
23 #include <linux/kernel.h>
24 #include <linux/module.h>
25 #include <linux/mutex.h>
26 #include <linux/pci.h>
27 #include <linux/proc_fs.h>
28 #include <linux/sched.h>
29 #include <linux/syscalls.h>
30 #include <linux/uaccess.h>
31 #include <linux/interrupt.h>
32 #include <linux/cdev.h>
33 #include <linux/file.h>
34 #include "uapi/vsoc_shm.h"
36 #define VSOC_DEV_NAME "vsoc"
39 * Description of the ivshmem-doorbell PCI device used by QEmu. These
40 * constants follow docs/specs/ivshmem-spec.txt, which can be found in
41 * the QEmu repository. This was last reconciled with the version that
46 * These constants are determined KVM Inter-VM shared memory device
50 INTR_MASK
= 0x00, /* Interrupt Mask */
51 INTR_STATUS
= 0x04, /* Interrupt Status */
52 IV_POSITION
= 0x08, /* VM ID */
53 DOORBELL
= 0x0c, /* Doorbell */
56 static const int REGISTER_BAR
; /* Equal to 0 */
57 static const int MAX_REGISTER_BAR_LEN
= 0x100;
59 * The MSI-x BAR is not used directly.
61 * static const int MSI_X_BAR = 1;
63 static const int SHARED_MEMORY_BAR
= 2;
65 struct vsoc_region_data
{
66 char name
[VSOC_DEVICE_NAME_SZ
+ 1];
67 wait_queue_head_t interrupt_wait_queue
;
68 /* TODO(b/73664181): Use multiple futex wait queues */
69 wait_queue_head_t futex_wait_queue
;
70 /* Flag indicating that an interrupt has been signalled by the host. */
71 atomic_t
*incoming_signalled
;
72 /* Flag indicating the guest has signalled the host. */
73 atomic_t
*outgoing_signalled
;
79 /* Kernel virtual address of REGISTER_BAR. */
81 /* Physical address of SHARED_MEMORY_BAR. */
82 phys_addr_t shm_phys_start
;
83 /* Kernel virtual address of SHARED_MEMORY_BAR. */
84 void __iomem
*kernel_mapped_shm
;
85 /* Size of the entire shared memory window in bytes. */
88 * Pointer to the virtual address of the shared memory layout structure.
89 * This is probably identical to kernel_mapped_shm, but saving this
90 * here saves a lot of annoying casts.
92 struct vsoc_shm_layout_descriptor
*layout
;
94 * Points to a table of region descriptors in the kernel's virtual
95 * address space. Calculated from
96 * vsoc_shm_layout_descriptor.vsoc_region_desc_offset
98 struct vsoc_device_region
*regions
;
99 /* Head of a list of permissions that have been granted. */
100 struct list_head permissions
;
102 /* Per-region (and therefore per-interrupt) information. */
103 struct vsoc_region_data
*regions_data
;
105 * Table of msi-x entries. This has to be separated from struct
106 * vsoc_region_data because the kernel deals with them as an array.
108 struct msix_entry
*msix_entries
;
109 /* Mutex that protectes the permission list */
111 /* Major number assigned by the kernel */
113 /* Character device assigned by the kernel */
115 /* Device class assigned by the kernel */
118 * Flags that indicate what we've initialized. These are used to do an
119 * orderly cleanup of the device.
122 bool requested_regions
;
128 static struct vsoc_device vsoc_dev
;
131 * TODO(ghartman): Add a /sys filesystem entry that summarizes the permissions.
134 struct fd_scoped_permission_node
{
135 struct fd_scoped_permission permission
;
136 struct list_head list
;
139 struct vsoc_private_data
{
140 struct fd_scoped_permission_node
*fd_scoped_permission_node
;
143 static long vsoc_ioctl(struct file
*, unsigned int, unsigned long);
144 static int vsoc_mmap(struct file
*, struct vm_area_struct
*);
145 static int vsoc_open(struct inode
*, struct file
*);
146 static int vsoc_release(struct inode
*, struct file
*);
147 static ssize_t
vsoc_read(struct file
*, char __user
*, size_t, loff_t
*);
148 static ssize_t
vsoc_write(struct file
*, const char __user
*, size_t, loff_t
*);
149 static loff_t
vsoc_lseek(struct file
*filp
, loff_t offset
, int origin
);
151 do_create_fd_scoped_permission(struct vsoc_device_region
*region_p
,
152 struct fd_scoped_permission_node
*np
,
153 struct fd_scoped_permission_arg __user
*arg
);
155 do_destroy_fd_scoped_permission(struct vsoc_device_region
*owner_region_p
,
156 struct fd_scoped_permission
*perm
);
157 static long do_vsoc_describe_region(struct file
*,
158 struct vsoc_device_region __user
*);
159 static ssize_t
vsoc_get_area(struct file
*filp
, __u32
*perm_off
);
162 * Validate arguments on entry points to the driver.
164 inline int vsoc_validate_inode(struct inode
*inode
)
166 if (iminor(inode
) >= vsoc_dev
.layout
->region_count
) {
167 dev_err(&vsoc_dev
.dev
->dev
,
168 "describe_region: invalid region %d\n", iminor(inode
));
174 inline int vsoc_validate_filep(struct file
*filp
)
176 int ret
= vsoc_validate_inode(file_inode(filp
));
180 if (!filp
->private_data
) {
181 dev_err(&vsoc_dev
.dev
->dev
,
182 "No private data on fd, region %d\n",
183 iminor(file_inode(filp
)));
189 /* Converts from shared memory offset to virtual address */
190 static inline void *shm_off_to_virtual_addr(__u32 offset
)
192 return (void __force
*)vsoc_dev
.kernel_mapped_shm
+ offset
;
195 /* Converts from shared memory offset to physical address */
196 static inline phys_addr_t
shm_off_to_phys_addr(__u32 offset
)
198 return vsoc_dev
.shm_phys_start
+ offset
;
202 * Convenience functions to obtain the region from the inode or file.
203 * Dangerous to call before validating the inode/file.
206 inline struct vsoc_device_region
*vsoc_region_from_inode(struct inode
*inode
)
208 return &vsoc_dev
.regions
[iminor(inode
)];
212 inline struct vsoc_device_region
*vsoc_region_from_filep(struct file
*inode
)
214 return vsoc_region_from_inode(file_inode(inode
));
217 static inline uint32_t vsoc_device_region_size(struct vsoc_device_region
*r
)
219 return r
->region_end_offset
- r
->region_begin_offset
;
222 static const struct file_operations vsoc_ops
= {
223 .owner
= THIS_MODULE
,
227 .unlocked_ioctl
= vsoc_ioctl
,
228 .compat_ioctl
= vsoc_ioctl
,
230 .llseek
= vsoc_lseek
,
231 .release
= vsoc_release
,
234 static struct pci_device_id vsoc_id_table
[] = {
235 {0x1af4, 0x1110, PCI_ANY_ID
, PCI_ANY_ID
, 0, 0, 0},
239 MODULE_DEVICE_TABLE(pci
, vsoc_id_table
);
241 static void vsoc_remove_device(struct pci_dev
*pdev
);
242 static int vsoc_probe_device(struct pci_dev
*pdev
,
243 const struct pci_device_id
*ent
);
245 static struct pci_driver vsoc_pci_driver
= {
247 .id_table
= vsoc_id_table
,
248 .probe
= vsoc_probe_device
,
249 .remove
= vsoc_remove_device
,
253 do_create_fd_scoped_permission(struct vsoc_device_region
*region_p
,
254 struct fd_scoped_permission_node
*np
,
255 struct fd_scoped_permission_arg __user
*arg
)
257 struct file
*managed_filp
;
259 atomic_t
*owner_ptr
= NULL
;
260 struct vsoc_device_region
*managed_region_p
;
262 if (copy_from_user(&np
->permission
,
263 &arg
->perm
, sizeof(np
->permission
)) ||
264 copy_from_user(&managed_fd
,
265 &arg
->managed_region_fd
, sizeof(managed_fd
))) {
268 managed_filp
= fdget(managed_fd
).file
;
269 /* Check that it's a valid fd, */
270 if (!managed_filp
|| vsoc_validate_filep(managed_filp
))
272 /* EEXIST if the given fd already has a permission. */
273 if (((struct vsoc_private_data
*)managed_filp
->private_data
)->
274 fd_scoped_permission_node
)
276 managed_region_p
= vsoc_region_from_filep(managed_filp
);
277 /* Check that the provided region is managed by this one */
278 if (&vsoc_dev
.regions
[managed_region_p
->managed_by
] != region_p
)
280 /* The area must be well formed and have non-zero size */
281 if (np
->permission
.begin_offset
>= np
->permission
.end_offset
)
283 /* The area must fit in the memory window */
284 if (np
->permission
.end_offset
>
285 vsoc_device_region_size(managed_region_p
))
287 /* The area must be in the region data section */
288 if (np
->permission
.begin_offset
<
289 managed_region_p
->offset_of_region_data
)
291 /* The area must be page aligned */
292 if (!PAGE_ALIGNED(np
->permission
.begin_offset
) ||
293 !PAGE_ALIGNED(np
->permission
.end_offset
))
295 /* Owner offset must be naturally aligned in the window */
296 if (np
->permission
.owner_offset
&
297 (sizeof(np
->permission
.owner_offset
) - 1))
299 /* The owner flag must reside in the owner memory */
300 if (np
->permission
.owner_offset
+ sizeof(np
->permission
.owner_offset
) >
301 vsoc_device_region_size(region_p
))
303 /* The owner flag must reside in the data section */
304 if (np
->permission
.owner_offset
< region_p
->offset_of_region_data
)
306 /* The owner value must change to claim the memory */
307 if (np
->permission
.owned_value
== VSOC_REGION_FREE
)
310 (atomic_t
*)shm_off_to_virtual_addr(region_p
->region_begin_offset
+
311 np
->permission
.owner_offset
);
312 /* We've already verified that this is in the shared memory window, so
313 * it should be safe to write to this address.
315 if (atomic_cmpxchg(owner_ptr
,
317 np
->permission
.owned_value
) != VSOC_REGION_FREE
) {
320 ((struct vsoc_private_data
*)managed_filp
->private_data
)->
321 fd_scoped_permission_node
= np
;
322 /* The file offset needs to be adjusted if the calling
323 * process did any read/write operations on the fd
324 * before creating the permission.
326 if (managed_filp
->f_pos
) {
327 if (managed_filp
->f_pos
> np
->permission
.end_offset
) {
328 /* If the offset is beyond the permission end, set it
331 managed_filp
->f_pos
= np
->permission
.end_offset
;
333 /* If the offset is within the permission interval
334 * keep it there otherwise reset it to zero.
336 if (managed_filp
->f_pos
< np
->permission
.begin_offset
) {
337 managed_filp
->f_pos
= 0;
339 managed_filp
->f_pos
-=
340 np
->permission
.begin_offset
;
348 do_destroy_fd_scoped_permission_node(struct vsoc_device_region
*owner_region_p
,
349 struct fd_scoped_permission_node
*node
)
352 do_destroy_fd_scoped_permission(owner_region_p
,
354 mutex_lock(&vsoc_dev
.mtx
);
355 list_del(&node
->list
);
356 mutex_unlock(&vsoc_dev
.mtx
);
362 do_destroy_fd_scoped_permission(struct vsoc_device_region
*owner_region_p
,
363 struct fd_scoped_permission
*perm
)
365 atomic_t
*owner_ptr
= NULL
;
370 owner_ptr
= (atomic_t
*)shm_off_to_virtual_addr
371 (owner_region_p
->region_begin_offset
+ perm
->owner_offset
);
372 prev
= atomic_xchg(owner_ptr
, VSOC_REGION_FREE
);
373 if (prev
!= perm
->owned_value
)
374 dev_err(&vsoc_dev
.dev
->dev
,
375 "%x-%x: owner (%s) %x: expected to be %x was %x",
376 perm
->begin_offset
, perm
->end_offset
,
377 owner_region_p
->device_name
, perm
->owner_offset
,
378 perm
->owned_value
, prev
);
381 static long do_vsoc_describe_region(struct file
*filp
,
382 struct vsoc_device_region __user
*dest
)
384 struct vsoc_device_region
*region_p
;
385 int retval
= vsoc_validate_filep(filp
);
389 region_p
= vsoc_region_from_filep(filp
);
390 if (copy_to_user(dest
, region_p
, sizeof(*region_p
)))
396 * Implements the inner logic of cond_wait. Copies to and from userspace are
397 * done in the helper function below.
399 static int handle_vsoc_cond_wait(struct file
*filp
, struct vsoc_cond_wait
*arg
)
402 u32 region_number
= iminor(file_inode(filp
));
403 struct vsoc_region_data
*data
= vsoc_dev
.regions_data
+ region_number
;
404 struct hrtimer_sleeper timeout
, *to
= NULL
;
406 struct vsoc_device_region
*region_p
= vsoc_region_from_filep(filp
);
407 atomic_t
*address
= NULL
;
410 /* Ensure that the offset is aligned */
411 if (arg
->offset
& (sizeof(uint32_t) - 1))
412 return -EADDRNOTAVAIL
;
413 /* Ensure that the offset is within shared memory */
414 if (((uint64_t)arg
->offset
) + region_p
->region_begin_offset
+
415 sizeof(uint32_t) > region_p
->region_end_offset
)
417 address
= shm_off_to_virtual_addr(region_p
->region_begin_offset
+
420 /* Ensure that the type of wait is valid */
421 switch (arg
->wait_type
) {
422 case VSOC_WAIT_IF_EQUAL
:
424 case VSOC_WAIT_IF_EQUAL_TIMEOUT
:
432 /* Copy the user-supplied timesec into the kernel structure.
433 * We do things this way to flatten differences between 32 bit
434 * and 64 bit timespecs.
436 if (arg
->wake_time_nsec
>= NSEC_PER_SEC
)
438 wake_time
= ktime_set(arg
->wake_time_sec
, arg
->wake_time_nsec
);
440 hrtimer_init_sleeper_on_stack(to
, CLOCK_MONOTONIC
,
442 hrtimer_set_expires_range_ns(&to
->timer
, wake_time
,
443 current
->timer_slack_ns
);
447 prepare_to_wait(&data
->futex_wait_queue
, &wait
,
450 * Check the sentinel value after prepare_to_wait. If the value
451 * changes after this check the writer will call signal,
452 * changing the task state from INTERRUPTIBLE to RUNNING. That
453 * will ensure that schedule() will eventually schedule this
456 if (atomic_read(address
) != arg
->value
) {
461 hrtimer_sleeper_start_expires(to
, HRTIMER_MODE_ABS
);
462 if (likely(to
->task
))
463 freezable_schedule();
464 hrtimer_cancel(&to
->timer
);
470 freezable_schedule();
472 /* Count the number of times that we woke up. This is useful
476 if (signal_pending(current
)) {
481 finish_wait(&data
->futex_wait_queue
, &wait
);
483 destroy_hrtimer_on_stack(&to
->timer
);
488 * Handles the details of copying from/to userspace to ensure that the copies
489 * happen on all of the return paths of cond_wait.
491 static int do_vsoc_cond_wait(struct file
*filp
,
492 struct vsoc_cond_wait __user
*untrusted_in
)
494 struct vsoc_cond_wait arg
;
497 if (copy_from_user(&arg
, untrusted_in
, sizeof(arg
)))
499 /* wakes is an out parameter. Initialize it to something sensible. */
501 rval
= handle_vsoc_cond_wait(filp
, &arg
);
502 if (copy_to_user(untrusted_in
, &arg
, sizeof(arg
)))
507 static int do_vsoc_cond_wake(struct file
*filp
, uint32_t offset
)
509 struct vsoc_device_region
*region_p
= vsoc_region_from_filep(filp
);
510 u32 region_number
= iminor(file_inode(filp
));
511 struct vsoc_region_data
*data
= vsoc_dev
.regions_data
+ region_number
;
512 /* Ensure that the offset is aligned */
513 if (offset
& (sizeof(uint32_t) - 1))
514 return -EADDRNOTAVAIL
;
515 /* Ensure that the offset is within shared memory */
516 if (((uint64_t)offset
) + region_p
->region_begin_offset
+
517 sizeof(uint32_t) > region_p
->region_end_offset
)
520 * TODO(b/73664181): Use multiple futex wait queues.
521 * We need to wake every sleeper when the condition changes. Typically
522 * only a single thread will be waiting on the condition, but there
523 * are exceptions. The worst case is about 10 threads.
525 wake_up_interruptible_all(&data
->futex_wait_queue
);
529 static long vsoc_ioctl(struct file
*filp
, unsigned int cmd
, unsigned long arg
)
532 struct vsoc_device_region
*region_p
;
534 struct vsoc_region_data
*reg_data
;
535 int retval
= vsoc_validate_filep(filp
);
539 region_p
= vsoc_region_from_filep(filp
);
540 reg_num
= iminor(file_inode(filp
));
541 reg_data
= vsoc_dev
.regions_data
+ reg_num
;
543 case VSOC_CREATE_FD_SCOPED_PERMISSION
:
545 struct fd_scoped_permission_node
*node
= NULL
;
547 node
= kzalloc(sizeof(*node
), GFP_KERNEL
);
548 /* We can't allocate memory for the permission */
551 INIT_LIST_HEAD(&node
->list
);
552 rv
= do_create_fd_scoped_permission
555 (struct fd_scoped_permission_arg __user
*)arg
);
557 mutex_lock(&vsoc_dev
.mtx
);
558 list_add(&node
->list
, &vsoc_dev
.permissions
);
559 mutex_unlock(&vsoc_dev
.mtx
);
567 case VSOC_GET_FD_SCOPED_PERMISSION
:
569 struct fd_scoped_permission_node
*node
=
570 ((struct vsoc_private_data
*)filp
->private_data
)->
571 fd_scoped_permission_node
;
575 ((struct fd_scoped_permission __user
*)arg
,
576 &node
->permission
, sizeof(node
->permission
)))
581 case VSOC_MAYBE_SEND_INTERRUPT_TO_HOST
:
582 if (!atomic_xchg(reg_data
->outgoing_signalled
, 1)) {
583 writel(reg_num
, vsoc_dev
.regs
+ DOORBELL
);
590 case VSOC_SEND_INTERRUPT_TO_HOST
:
591 writel(reg_num
, vsoc_dev
.regs
+ DOORBELL
);
593 case VSOC_WAIT_FOR_INCOMING_INTERRUPT
:
594 wait_event_interruptible
595 (reg_data
->interrupt_wait_queue
,
596 (atomic_read(reg_data
->incoming_signalled
) != 0));
599 case VSOC_DESCRIBE_REGION
:
600 return do_vsoc_describe_region
602 (struct vsoc_device_region __user
*)arg
);
604 case VSOC_SELF_INTERRUPT
:
605 atomic_set(reg_data
->incoming_signalled
, 1);
606 wake_up_interruptible(®_data
->interrupt_wait_queue
);
610 return do_vsoc_cond_wait(filp
,
611 (struct vsoc_cond_wait __user
*)arg
);
613 return do_vsoc_cond_wake(filp
, arg
);
621 static ssize_t
vsoc_read(struct file
*filp
, char __user
*buffer
, size_t len
,
627 int retval
= vsoc_validate_filep(filp
);
631 area_len
= vsoc_get_area(filp
, &area_off
);
632 area_p
= shm_off_to_virtual_addr(area_off
);
634 area_len
-= *poffset
;
639 if (copy_to_user(buffer
, area_p
, len
))
645 static loff_t
vsoc_lseek(struct file
*filp
, loff_t offset
, int origin
)
647 ssize_t area_len
= 0;
648 int retval
= vsoc_validate_filep(filp
);
652 area_len
= vsoc_get_area(filp
, NULL
);
658 if (offset
> 0 && offset
+ filp
->f_pos
< 0)
660 offset
+= filp
->f_pos
;
664 if (offset
> 0 && offset
+ area_len
< 0)
670 if (offset
>= area_len
)
677 /* Next hole is always the end of the region, unless offset is
680 if (offset
< area_len
)
688 if (offset
< 0 || offset
> area_len
)
690 filp
->f_pos
= offset
;
695 static ssize_t
vsoc_write(struct file
*filp
, const char __user
*buffer
,
696 size_t len
, loff_t
*poffset
)
701 int retval
= vsoc_validate_filep(filp
);
705 area_len
= vsoc_get_area(filp
, &area_off
);
706 area_p
= shm_off_to_virtual_addr(area_off
);
708 area_len
-= *poffset
;
713 if (copy_from_user(area_p
, buffer
, len
))
719 static irqreturn_t
vsoc_interrupt(int irq
, void *region_data_v
)
721 struct vsoc_region_data
*region_data
=
722 (struct vsoc_region_data
*)region_data_v
;
723 int reg_num
= region_data
- vsoc_dev
.regions_data
;
725 if (unlikely(!region_data
))
728 if (unlikely(reg_num
< 0 ||
729 reg_num
>= vsoc_dev
.layout
->region_count
)) {
730 dev_err(&vsoc_dev
.dev
->dev
,
731 "invalid irq @%p reg_num=0x%04x\n",
732 region_data
, reg_num
);
735 if (unlikely(vsoc_dev
.regions_data
+ reg_num
!= region_data
)) {
736 dev_err(&vsoc_dev
.dev
->dev
,
737 "irq not aligned @%p reg_num=0x%04x\n",
738 region_data
, reg_num
);
741 wake_up_interruptible(®ion_data
->interrupt_wait_queue
);
745 static int vsoc_probe_device(struct pci_dev
*pdev
,
746 const struct pci_device_id
*ent
)
750 resource_size_t reg_size
;
754 result
= pci_enable_device(pdev
);
757 "pci_enable_device failed %s: error %d\n",
758 pci_name(pdev
), result
);
761 vsoc_dev
.enabled_device
= true;
762 result
= pci_request_regions(pdev
, "vsoc");
764 dev_err(&pdev
->dev
, "pci_request_regions failed\n");
765 vsoc_remove_device(pdev
);
768 vsoc_dev
.requested_regions
= true;
769 /* Set up the control registers in BAR 0 */
770 reg_size
= pci_resource_len(pdev
, REGISTER_BAR
);
771 if (reg_size
> MAX_REGISTER_BAR_LEN
)
773 pci_iomap(pdev
, REGISTER_BAR
, MAX_REGISTER_BAR_LEN
);
775 vsoc_dev
.regs
= pci_iomap(pdev
, REGISTER_BAR
, reg_size
);
777 if (!vsoc_dev
.regs
) {
779 "cannot map registers of size %zu\n",
781 vsoc_remove_device(pdev
);
785 /* Map the shared memory in BAR 2 */
786 vsoc_dev
.shm_phys_start
= pci_resource_start(pdev
, SHARED_MEMORY_BAR
);
787 vsoc_dev
.shm_size
= pci_resource_len(pdev
, SHARED_MEMORY_BAR
);
789 dev_info(&pdev
->dev
, "shared memory @ DMA %pa size=0x%zx\n",
790 &vsoc_dev
.shm_phys_start
, vsoc_dev
.shm_size
);
791 vsoc_dev
.kernel_mapped_shm
= pci_iomap_wc(pdev
, SHARED_MEMORY_BAR
, 0);
792 if (!vsoc_dev
.kernel_mapped_shm
) {
793 dev_err(&vsoc_dev
.dev
->dev
, "cannot iomap region\n");
794 vsoc_remove_device(pdev
);
798 vsoc_dev
.layout
= (struct vsoc_shm_layout_descriptor __force
*)
799 vsoc_dev
.kernel_mapped_shm
;
800 dev_info(&pdev
->dev
, "major_version: %d\n",
801 vsoc_dev
.layout
->major_version
);
802 dev_info(&pdev
->dev
, "minor_version: %d\n",
803 vsoc_dev
.layout
->minor_version
);
804 dev_info(&pdev
->dev
, "size: 0x%x\n", vsoc_dev
.layout
->size
);
805 dev_info(&pdev
->dev
, "regions: %d\n", vsoc_dev
.layout
->region_count
);
806 if (vsoc_dev
.layout
->major_version
!=
807 CURRENT_VSOC_LAYOUT_MAJOR_VERSION
) {
808 dev_err(&vsoc_dev
.dev
->dev
,
809 "driver supports only major_version %d\n",
810 CURRENT_VSOC_LAYOUT_MAJOR_VERSION
);
811 vsoc_remove_device(pdev
);
814 result
= alloc_chrdev_region(&devt
, 0, vsoc_dev
.layout
->region_count
,
817 dev_err(&vsoc_dev
.dev
->dev
, "alloc_chrdev_region failed\n");
818 vsoc_remove_device(pdev
);
821 vsoc_dev
.major
= MAJOR(devt
);
822 cdev_init(&vsoc_dev
.cdev
, &vsoc_ops
);
823 vsoc_dev
.cdev
.owner
= THIS_MODULE
;
824 result
= cdev_add(&vsoc_dev
.cdev
, devt
, vsoc_dev
.layout
->region_count
);
826 dev_err(&vsoc_dev
.dev
->dev
, "cdev_add error\n");
827 vsoc_remove_device(pdev
);
830 vsoc_dev
.cdev_added
= true;
831 vsoc_dev
.class = class_create(THIS_MODULE
, VSOC_DEV_NAME
);
832 if (IS_ERR(vsoc_dev
.class)) {
833 dev_err(&vsoc_dev
.dev
->dev
, "class_create failed\n");
834 vsoc_remove_device(pdev
);
835 return PTR_ERR(vsoc_dev
.class);
837 vsoc_dev
.class_added
= true;
838 vsoc_dev
.regions
= (struct vsoc_device_region __force
*)
839 ((void *)vsoc_dev
.layout
+
840 vsoc_dev
.layout
->vsoc_region_desc_offset
);
841 vsoc_dev
.msix_entries
=
842 kcalloc(vsoc_dev
.layout
->region_count
,
843 sizeof(vsoc_dev
.msix_entries
[0]), GFP_KERNEL
);
844 if (!vsoc_dev
.msix_entries
) {
845 dev_err(&vsoc_dev
.dev
->dev
,
846 "unable to allocate msix_entries\n");
847 vsoc_remove_device(pdev
);
850 vsoc_dev
.regions_data
=
851 kcalloc(vsoc_dev
.layout
->region_count
,
852 sizeof(vsoc_dev
.regions_data
[0]), GFP_KERNEL
);
853 if (!vsoc_dev
.regions_data
) {
854 dev_err(&vsoc_dev
.dev
->dev
,
855 "unable to allocate regions' data\n");
856 vsoc_remove_device(pdev
);
859 for (i
= 0; i
< vsoc_dev
.layout
->region_count
; ++i
)
860 vsoc_dev
.msix_entries
[i
].entry
= i
;
862 result
= pci_enable_msix_exact(vsoc_dev
.dev
, vsoc_dev
.msix_entries
,
863 vsoc_dev
.layout
->region_count
);
865 dev_info(&pdev
->dev
, "pci_enable_msix failed: %d\n", result
);
866 vsoc_remove_device(pdev
);
869 /* Check that all regions are well formed */
870 for (i
= 0; i
< vsoc_dev
.layout
->region_count
; ++i
) {
871 const struct vsoc_device_region
*region
= vsoc_dev
.regions
+ i
;
873 if (!PAGE_ALIGNED(region
->region_begin_offset
) ||
874 !PAGE_ALIGNED(region
->region_end_offset
)) {
875 dev_err(&vsoc_dev
.dev
->dev
,
876 "region %d not aligned (%x:%x)", i
,
877 region
->region_begin_offset
,
878 region
->region_end_offset
);
879 vsoc_remove_device(pdev
);
882 if (region
->region_begin_offset
>= region
->region_end_offset
||
883 region
->region_end_offset
> vsoc_dev
.shm_size
) {
884 dev_err(&vsoc_dev
.dev
->dev
,
885 "region %d offsets are wrong: %x %x %zx",
886 i
, region
->region_begin_offset
,
887 region
->region_end_offset
, vsoc_dev
.shm_size
);
888 vsoc_remove_device(pdev
);
891 if (region
->managed_by
>= vsoc_dev
.layout
->region_count
) {
892 dev_err(&vsoc_dev
.dev
->dev
,
893 "region %d has invalid owner: %u",
894 i
, region
->managed_by
);
895 vsoc_remove_device(pdev
);
899 vsoc_dev
.msix_enabled
= true;
900 for (i
= 0; i
< vsoc_dev
.layout
->region_count
; ++i
) {
901 const struct vsoc_device_region
*region
= vsoc_dev
.regions
+ i
;
902 size_t name_sz
= sizeof(vsoc_dev
.regions_data
[i
].name
) - 1;
903 const struct vsoc_signal_table_layout
*h_to_g_signal_table
=
904 ®ion
->host_to_guest_signal_table
;
905 const struct vsoc_signal_table_layout
*g_to_h_signal_table
=
906 ®ion
->guest_to_host_signal_table
;
908 vsoc_dev
.regions_data
[i
].name
[name_sz
] = '\0';
909 memcpy(vsoc_dev
.regions_data
[i
].name
, region
->device_name
,
911 dev_info(&pdev
->dev
, "region %d name=%s\n",
912 i
, vsoc_dev
.regions_data
[i
].name
);
914 (&vsoc_dev
.regions_data
[i
].interrupt_wait_queue
);
915 init_waitqueue_head(&vsoc_dev
.regions_data
[i
].futex_wait_queue
);
916 vsoc_dev
.regions_data
[i
].incoming_signalled
=
917 shm_off_to_virtual_addr(region
->region_begin_offset
) +
918 h_to_g_signal_table
->interrupt_signalled_offset
;
919 vsoc_dev
.regions_data
[i
].outgoing_signalled
=
920 shm_off_to_virtual_addr(region
->region_begin_offset
) +
921 g_to_h_signal_table
->interrupt_signalled_offset
;
922 result
= request_irq(vsoc_dev
.msix_entries
[i
].vector
,
924 vsoc_dev
.regions_data
[i
].name
,
925 vsoc_dev
.regions_data
+ i
);
928 "request_irq failed irq=%d vector=%d\n",
929 i
, vsoc_dev
.msix_entries
[i
].vector
);
930 vsoc_remove_device(pdev
);
933 vsoc_dev
.regions_data
[i
].irq_requested
= true;
934 if (!device_create(vsoc_dev
.class, NULL
,
935 MKDEV(vsoc_dev
.major
, i
),
936 NULL
, vsoc_dev
.regions_data
[i
].name
)) {
937 dev_err(&vsoc_dev
.dev
->dev
, "device_create failed\n");
938 vsoc_remove_device(pdev
);
941 vsoc_dev
.regions_data
[i
].device_created
= true;
947 * This should undo all of the allocations in the probe function in reverse
952 * The device may have been partially initialized, so double check
953 * that the allocations happened.
955 * This function may be called multiple times, so mark resources as freed
956 * as they are deallocated.
958 static void vsoc_remove_device(struct pci_dev
*pdev
)
962 * pdev is the first thing to be set on probe and the last thing
963 * to be cleared here. If it's NULL then there is no cleanup.
965 if (!pdev
|| !vsoc_dev
.dev
)
967 dev_info(&pdev
->dev
, "remove_device\n");
968 if (vsoc_dev
.regions_data
) {
969 for (i
= 0; i
< vsoc_dev
.layout
->region_count
; ++i
) {
970 if (vsoc_dev
.regions_data
[i
].device_created
) {
971 device_destroy(vsoc_dev
.class,
972 MKDEV(vsoc_dev
.major
, i
));
973 vsoc_dev
.regions_data
[i
].device_created
= false;
975 if (vsoc_dev
.regions_data
[i
].irq_requested
)
976 free_irq(vsoc_dev
.msix_entries
[i
].vector
, NULL
);
977 vsoc_dev
.regions_data
[i
].irq_requested
= false;
979 kfree(vsoc_dev
.regions_data
);
980 vsoc_dev
.regions_data
= NULL
;
982 if (vsoc_dev
.msix_enabled
) {
983 pci_disable_msix(pdev
);
984 vsoc_dev
.msix_enabled
= false;
986 kfree(vsoc_dev
.msix_entries
);
987 vsoc_dev
.msix_entries
= NULL
;
988 vsoc_dev
.regions
= NULL
;
989 if (vsoc_dev
.class_added
) {
990 class_destroy(vsoc_dev
.class);
991 vsoc_dev
.class_added
= false;
993 if (vsoc_dev
.cdev_added
) {
994 cdev_del(&vsoc_dev
.cdev
);
995 vsoc_dev
.cdev_added
= false;
997 if (vsoc_dev
.major
&& vsoc_dev
.layout
) {
998 unregister_chrdev_region(MKDEV(vsoc_dev
.major
, 0),
999 vsoc_dev
.layout
->region_count
);
1002 vsoc_dev
.layout
= NULL
;
1003 if (vsoc_dev
.kernel_mapped_shm
) {
1004 pci_iounmap(pdev
, vsoc_dev
.kernel_mapped_shm
);
1005 vsoc_dev
.kernel_mapped_shm
= NULL
;
1007 if (vsoc_dev
.regs
) {
1008 pci_iounmap(pdev
, vsoc_dev
.regs
);
1009 vsoc_dev
.regs
= NULL
;
1011 if (vsoc_dev
.requested_regions
) {
1012 pci_release_regions(pdev
);
1013 vsoc_dev
.requested_regions
= false;
1015 if (vsoc_dev
.enabled_device
) {
1016 pci_disable_device(pdev
);
1017 vsoc_dev
.enabled_device
= false;
1019 /* Do this last: it indicates that the device is not initialized. */
1020 vsoc_dev
.dev
= NULL
;
1023 static void __exit
vsoc_cleanup_module(void)
1025 vsoc_remove_device(vsoc_dev
.dev
);
1026 pci_unregister_driver(&vsoc_pci_driver
);
1029 static int __init
vsoc_init_module(void)
1033 INIT_LIST_HEAD(&vsoc_dev
.permissions
);
1034 mutex_init(&vsoc_dev
.mtx
);
1036 err
= pci_register_driver(&vsoc_pci_driver
);
1042 static int vsoc_open(struct inode
*inode
, struct file
*filp
)
1044 /* Can't use vsoc_validate_filep because filp is still incomplete */
1045 int ret
= vsoc_validate_inode(inode
);
1049 filp
->private_data
=
1050 kzalloc(sizeof(struct vsoc_private_data
), GFP_KERNEL
);
1051 if (!filp
->private_data
)
1056 static int vsoc_release(struct inode
*inode
, struct file
*filp
)
1058 struct vsoc_private_data
*private_data
= NULL
;
1059 struct fd_scoped_permission_node
*node
= NULL
;
1060 struct vsoc_device_region
*owner_region_p
= NULL
;
1061 int retval
= vsoc_validate_filep(filp
);
1065 private_data
= (struct vsoc_private_data
*)filp
->private_data
;
1069 node
= private_data
->fd_scoped_permission_node
;
1071 owner_region_p
= vsoc_region_from_inode(inode
);
1072 if (owner_region_p
->managed_by
!= VSOC_REGION_WHOLE
) {
1074 &vsoc_dev
.regions
[owner_region_p
->managed_by
];
1076 do_destroy_fd_scoped_permission_node(owner_region_p
, node
);
1077 private_data
->fd_scoped_permission_node
= NULL
;
1079 kfree(private_data
);
1080 filp
->private_data
= NULL
;
1086 * Returns the device relative offset and length of the area specified by the
1087 * fd scoped permission. If there is no fd scoped permission set, a default
1088 * permission covering the entire region is assumed, unless the region is owned
1089 * by another one, in which case the default is a permission with zero size.
1091 static ssize_t
vsoc_get_area(struct file
*filp
, __u32
*area_offset
)
1095 struct vsoc_device_region
*region_p
;
1096 struct fd_scoped_permission
*perm
;
1098 region_p
= vsoc_region_from_filep(filp
);
1099 off
= region_p
->region_begin_offset
;
1100 perm
= &((struct vsoc_private_data
*)filp
->private_data
)->
1101 fd_scoped_permission_node
->permission
;
1103 off
+= perm
->begin_offset
;
1104 length
= perm
->end_offset
- perm
->begin_offset
;
1105 } else if (region_p
->managed_by
== VSOC_REGION_WHOLE
) {
1106 /* No permission set and the regions is not owned by another,
1107 * default to full region access.
1109 length
= vsoc_device_region_size(region_p
);
1111 /* return zero length, access is denied. */
1119 static int vsoc_mmap(struct file
*filp
, struct vm_area_struct
*vma
)
1121 unsigned long len
= vma
->vm_end
- vma
->vm_start
;
1123 phys_addr_t mem_off
;
1125 int retval
= vsoc_validate_filep(filp
);
1129 area_len
= vsoc_get_area(filp
, &area_off
);
1130 /* Add the requested offset */
1131 area_off
+= (vma
->vm_pgoff
<< PAGE_SHIFT
);
1132 area_len
-= (vma
->vm_pgoff
<< PAGE_SHIFT
);
1135 vma
->vm_page_prot
= pgprot_noncached(vma
->vm_page_prot
);
1136 mem_off
= shm_off_to_phys_addr(area_off
);
1137 if (io_remap_pfn_range(vma
, vma
->vm_start
, mem_off
>> PAGE_SHIFT
,
1138 len
, vma
->vm_page_prot
))
1143 module_init(vsoc_init_module
);
1144 module_exit(vsoc_cleanup_module
);
1146 MODULE_LICENSE("GPL");
1147 MODULE_AUTHOR("Greg Hartman <ghartman@google.com>");
1148 MODULE_DESCRIPTION("VSoC interpretation of QEmu's ivshmem device");
1149 MODULE_VERSION("1.0");