1 // SPDX-License-Identifier: GPL-2.0
3 * drivers/android/staging/vsoc.c
5 * Android Virtual System on a Chip (VSoC) driver
7 * Copyright (C) 2017 Google, Inc.
9 * Author: ghartman@google.com
11 * Based on drivers/char/kvm_ivshmem.c - driver for KVM Inter-VM shared memory
12 * Copyright 2009 Cam Macdonell <cam@cs.ualberta.ca>
14 * Based on cirrusfb.c and 8139cp.c:
15 * Copyright 1999-2001 Jeff Garzik
16 * Copyright 2001-2004 Jeff Garzik
19 #include <linux/dma-mapping.h>
20 #include <linux/freezer.h>
21 #include <linux/futex.h>
22 #include <linux/init.h>
23 #include <linux/kernel.h>
24 #include <linux/module.h>
25 #include <linux/mutex.h>
26 #include <linux/pci.h>
27 #include <linux/proc_fs.h>
28 #include <linux/sched.h>
29 #include <linux/syscalls.h>
30 #include <linux/uaccess.h>
31 #include <linux/interrupt.h>
32 #include <linux/mutex.h>
33 #include <linux/cdev.h>
34 #include <linux/file.h>
35 #include "uapi/vsoc_shm.h"
37 #define VSOC_DEV_NAME "vsoc"
40 * Description of the ivshmem-doorbell PCI device used by QEmu. These
41 * constants follow docs/specs/ivshmem-spec.txt, which can be found in
42 * the QEmu repository. This was last reconciled with the version that
47 * These constants are determined KVM Inter-VM shared memory device
51 INTR_MASK
= 0x00, /* Interrupt Mask */
52 INTR_STATUS
= 0x04, /* Interrupt Status */
53 IV_POSITION
= 0x08, /* VM ID */
54 DOORBELL
= 0x0c, /* Doorbell */
57 static const int REGISTER_BAR
; /* Equal to 0 */
58 static const int MAX_REGISTER_BAR_LEN
= 0x100;
60 * The MSI-x BAR is not used directly.
62 * static const int MSI_X_BAR = 1;
64 static const int SHARED_MEMORY_BAR
= 2;
66 struct vsoc_region_data
{
67 char name
[VSOC_DEVICE_NAME_SZ
+ 1];
68 wait_queue_head_t interrupt_wait_queue
;
69 /* TODO(b/73664181): Use multiple futex wait queues */
70 wait_queue_head_t futex_wait_queue
;
71 /* Flag indicating that an interrupt has been signalled by the host. */
72 atomic_t
*incoming_signalled
;
73 /* Flag indicating the guest has signalled the host. */
74 atomic_t
*outgoing_signalled
;
80 /* Kernel virtual address of REGISTER_BAR. */
82 /* Physical address of SHARED_MEMORY_BAR. */
83 phys_addr_t shm_phys_start
;
84 /* Kernel virtual address of SHARED_MEMORY_BAR. */
85 void __iomem
*kernel_mapped_shm
;
86 /* Size of the entire shared memory window in bytes. */
89 * Pointer to the virtual address of the shared memory layout structure.
90 * This is probably identical to kernel_mapped_shm, but saving this
91 * here saves a lot of annoying casts.
93 struct vsoc_shm_layout_descriptor
*layout
;
95 * Points to a table of region descriptors in the kernel's virtual
96 * address space. Calculated from
97 * vsoc_shm_layout_descriptor.vsoc_region_desc_offset
99 struct vsoc_device_region
*regions
;
100 /* Head of a list of permissions that have been granted. */
101 struct list_head permissions
;
103 /* Per-region (and therefore per-interrupt) information. */
104 struct vsoc_region_data
*regions_data
;
106 * Table of msi-x entries. This has to be separated from struct
107 * vsoc_region_data because the kernel deals with them as an array.
109 struct msix_entry
*msix_entries
;
110 /* Mutex that protectes the permission list */
112 /* Major number assigned by the kernel */
114 /* Character device assigned by the kernel */
116 /* Device class assigned by the kernel */
119 * Flags that indicate what we've initialized. These are used to do an
120 * orderly cleanup of the device.
123 bool requested_regions
;
129 static struct vsoc_device vsoc_dev
;
132 * TODO(ghartman): Add a /sys filesystem entry that summarizes the permissions.
135 struct fd_scoped_permission_node
{
136 struct fd_scoped_permission permission
;
137 struct list_head list
;
140 struct vsoc_private_data
{
141 struct fd_scoped_permission_node
*fd_scoped_permission_node
;
144 static long vsoc_ioctl(struct file
*, unsigned int, unsigned long);
145 static int vsoc_mmap(struct file
*, struct vm_area_struct
*);
146 static int vsoc_open(struct inode
*, struct file
*);
147 static int vsoc_release(struct inode
*, struct file
*);
148 static ssize_t
vsoc_read(struct file
*, char __user
*, size_t, loff_t
*);
149 static ssize_t
vsoc_write(struct file
*, const char __user
*, size_t, loff_t
*);
150 static loff_t
vsoc_lseek(struct file
*filp
, loff_t offset
, int origin
);
152 do_create_fd_scoped_permission(struct vsoc_device_region
*region_p
,
153 struct fd_scoped_permission_node
*np
,
154 struct fd_scoped_permission_arg __user
*arg
);
156 do_destroy_fd_scoped_permission(struct vsoc_device_region
*owner_region_p
,
157 struct fd_scoped_permission
*perm
);
158 static long do_vsoc_describe_region(struct file
*,
159 struct vsoc_device_region __user
*);
160 static ssize_t
vsoc_get_area(struct file
*filp
, __u32
*perm_off
);
163 * Validate arguments on entry points to the driver.
165 inline int vsoc_validate_inode(struct inode
*inode
)
167 if (iminor(inode
) >= vsoc_dev
.layout
->region_count
) {
168 dev_err(&vsoc_dev
.dev
->dev
,
169 "describe_region: invalid region %d\n", iminor(inode
));
175 inline int vsoc_validate_filep(struct file
*filp
)
177 int ret
= vsoc_validate_inode(file_inode(filp
));
181 if (!filp
->private_data
) {
182 dev_err(&vsoc_dev
.dev
->dev
,
183 "No private data on fd, region %d\n",
184 iminor(file_inode(filp
)));
190 /* Converts from shared memory offset to virtual address */
191 static inline void *shm_off_to_virtual_addr(__u32 offset
)
193 return (void __force
*)vsoc_dev
.kernel_mapped_shm
+ offset
;
196 /* Converts from shared memory offset to physical address */
197 static inline phys_addr_t
shm_off_to_phys_addr(__u32 offset
)
199 return vsoc_dev
.shm_phys_start
+ offset
;
203 * Convenience functions to obtain the region from the inode or file.
204 * Dangerous to call before validating the inode/file.
207 inline struct vsoc_device_region
*vsoc_region_from_inode(struct inode
*inode
)
209 return &vsoc_dev
.regions
[iminor(inode
)];
213 inline struct vsoc_device_region
*vsoc_region_from_filep(struct file
*inode
)
215 return vsoc_region_from_inode(file_inode(inode
));
218 static inline uint32_t vsoc_device_region_size(struct vsoc_device_region
*r
)
220 return r
->region_end_offset
- r
->region_begin_offset
;
223 static const struct file_operations vsoc_ops
= {
224 .owner
= THIS_MODULE
,
228 .unlocked_ioctl
= vsoc_ioctl
,
229 .compat_ioctl
= vsoc_ioctl
,
231 .llseek
= vsoc_lseek
,
232 .release
= vsoc_release
,
235 static struct pci_device_id vsoc_id_table
[] = {
236 {0x1af4, 0x1110, PCI_ANY_ID
, PCI_ANY_ID
, 0, 0, 0},
240 MODULE_DEVICE_TABLE(pci
, vsoc_id_table
);
242 static void vsoc_remove_device(struct pci_dev
*pdev
);
243 static int vsoc_probe_device(struct pci_dev
*pdev
,
244 const struct pci_device_id
*ent
);
246 static struct pci_driver vsoc_pci_driver
= {
248 .id_table
= vsoc_id_table
,
249 .probe
= vsoc_probe_device
,
250 .remove
= vsoc_remove_device
,
254 do_create_fd_scoped_permission(struct vsoc_device_region
*region_p
,
255 struct fd_scoped_permission_node
*np
,
256 struct fd_scoped_permission_arg __user
*arg
)
258 struct file
*managed_filp
;
260 atomic_t
*owner_ptr
= NULL
;
261 struct vsoc_device_region
*managed_region_p
;
263 if (copy_from_user(&np
->permission
,
264 &arg
->perm
, sizeof(np
->permission
)) ||
265 copy_from_user(&managed_fd
,
266 &arg
->managed_region_fd
, sizeof(managed_fd
))) {
269 managed_filp
= fdget(managed_fd
).file
;
270 /* Check that it's a valid fd, */
271 if (!managed_filp
|| vsoc_validate_filep(managed_filp
))
273 /* EEXIST if the given fd already has a permission. */
274 if (((struct vsoc_private_data
*)managed_filp
->private_data
)->
275 fd_scoped_permission_node
)
277 managed_region_p
= vsoc_region_from_filep(managed_filp
);
278 /* Check that the provided region is managed by this one */
279 if (&vsoc_dev
.regions
[managed_region_p
->managed_by
] != region_p
)
281 /* The area must be well formed and have non-zero size */
282 if (np
->permission
.begin_offset
>= np
->permission
.end_offset
)
284 /* The area must fit in the memory window */
285 if (np
->permission
.end_offset
>
286 vsoc_device_region_size(managed_region_p
))
288 /* The area must be in the region data section */
289 if (np
->permission
.begin_offset
<
290 managed_region_p
->offset_of_region_data
)
292 /* The area must be page aligned */
293 if (!PAGE_ALIGNED(np
->permission
.begin_offset
) ||
294 !PAGE_ALIGNED(np
->permission
.end_offset
))
296 /* Owner offset must be naturally aligned in the window */
297 if (np
->permission
.owner_offset
&
298 (sizeof(np
->permission
.owner_offset
) - 1))
300 /* The owner flag must reside in the owner memory */
301 if (np
->permission
.owner_offset
+ sizeof(np
->permission
.owner_offset
) >
302 vsoc_device_region_size(region_p
))
304 /* The owner flag must reside in the data section */
305 if (np
->permission
.owner_offset
< region_p
->offset_of_region_data
)
307 /* The owner value must change to claim the memory */
308 if (np
->permission
.owned_value
== VSOC_REGION_FREE
)
311 (atomic_t
*)shm_off_to_virtual_addr(region_p
->region_begin_offset
+
312 np
->permission
.owner_offset
);
313 /* We've already verified that this is in the shared memory window, so
314 * it should be safe to write to this address.
316 if (atomic_cmpxchg(owner_ptr
,
318 np
->permission
.owned_value
) != VSOC_REGION_FREE
) {
321 ((struct vsoc_private_data
*)managed_filp
->private_data
)->
322 fd_scoped_permission_node
= np
;
323 /* The file offset needs to be adjusted if the calling
324 * process did any read/write operations on the fd
325 * before creating the permission.
327 if (managed_filp
->f_pos
) {
328 if (managed_filp
->f_pos
> np
->permission
.end_offset
) {
329 /* If the offset is beyond the permission end, set it
332 managed_filp
->f_pos
= np
->permission
.end_offset
;
334 /* If the offset is within the permission interval
335 * keep it there otherwise reset it to zero.
337 if (managed_filp
->f_pos
< np
->permission
.begin_offset
) {
338 managed_filp
->f_pos
= 0;
340 managed_filp
->f_pos
-=
341 np
->permission
.begin_offset
;
349 do_destroy_fd_scoped_permission_node(struct vsoc_device_region
*owner_region_p
,
350 struct fd_scoped_permission_node
*node
)
353 do_destroy_fd_scoped_permission(owner_region_p
,
355 mutex_lock(&vsoc_dev
.mtx
);
356 list_del(&node
->list
);
357 mutex_unlock(&vsoc_dev
.mtx
);
363 do_destroy_fd_scoped_permission(struct vsoc_device_region
*owner_region_p
,
364 struct fd_scoped_permission
*perm
)
366 atomic_t
*owner_ptr
= NULL
;
371 owner_ptr
= (atomic_t
*)shm_off_to_virtual_addr
372 (owner_region_p
->region_begin_offset
+ perm
->owner_offset
);
373 prev
= atomic_xchg(owner_ptr
, VSOC_REGION_FREE
);
374 if (prev
!= perm
->owned_value
)
375 dev_err(&vsoc_dev
.dev
->dev
,
376 "%x-%x: owner (%s) %x: expected to be %x was %x",
377 perm
->begin_offset
, perm
->end_offset
,
378 owner_region_p
->device_name
, perm
->owner_offset
,
379 perm
->owned_value
, prev
);
382 static long do_vsoc_describe_region(struct file
*filp
,
383 struct vsoc_device_region __user
*dest
)
385 struct vsoc_device_region
*region_p
;
386 int retval
= vsoc_validate_filep(filp
);
390 region_p
= vsoc_region_from_filep(filp
);
391 if (copy_to_user(dest
, region_p
, sizeof(*region_p
)))
397 * Implements the inner logic of cond_wait. Copies to and from userspace are
398 * done in the helper function below.
400 static int handle_vsoc_cond_wait(struct file
*filp
, struct vsoc_cond_wait
*arg
)
403 u32 region_number
= iminor(file_inode(filp
));
404 struct vsoc_region_data
*data
= vsoc_dev
.regions_data
+ region_number
;
405 struct hrtimer_sleeper timeout
, *to
= NULL
;
407 struct vsoc_device_region
*region_p
= vsoc_region_from_filep(filp
);
408 atomic_t
*address
= NULL
;
411 /* Ensure that the offset is aligned */
412 if (arg
->offset
& (sizeof(uint32_t) - 1))
413 return -EADDRNOTAVAIL
;
414 /* Ensure that the offset is within shared memory */
415 if (((uint64_t)arg
->offset
) + region_p
->region_begin_offset
+
416 sizeof(uint32_t) > region_p
->region_end_offset
)
418 address
= shm_off_to_virtual_addr(region_p
->region_begin_offset
+
421 /* Ensure that the type of wait is valid */
422 switch (arg
->wait_type
) {
423 case VSOC_WAIT_IF_EQUAL
:
425 case VSOC_WAIT_IF_EQUAL_TIMEOUT
:
433 /* Copy the user-supplied timesec into the kernel structure.
434 * We do things this way to flatten differences between 32 bit
435 * and 64 bit timespecs.
437 if (arg
->wake_time_nsec
>= NSEC_PER_SEC
)
439 wake_time
= ktime_set(arg
->wake_time_sec
, arg
->wake_time_nsec
);
441 hrtimer_init_on_stack(&to
->timer
, CLOCK_MONOTONIC
,
443 hrtimer_set_expires_range_ns(&to
->timer
, wake_time
,
444 current
->timer_slack_ns
);
446 hrtimer_init_sleeper(to
, current
);
450 prepare_to_wait(&data
->futex_wait_queue
, &wait
,
453 * Check the sentinel value after prepare_to_wait. If the value
454 * changes after this check the writer will call signal,
455 * changing the task state from INTERRUPTIBLE to RUNNING. That
456 * will ensure that schedule() will eventually schedule this
459 if (atomic_read(address
) != arg
->value
) {
464 hrtimer_start_expires(&to
->timer
, HRTIMER_MODE_ABS
);
465 if (likely(to
->task
))
466 freezable_schedule();
467 hrtimer_cancel(&to
->timer
);
473 freezable_schedule();
475 /* Count the number of times that we woke up. This is useful
479 if (signal_pending(current
)) {
484 finish_wait(&data
->futex_wait_queue
, &wait
);
486 destroy_hrtimer_on_stack(&to
->timer
);
491 * Handles the details of copying from/to userspace to ensure that the copies
492 * happen on all of the return paths of cond_wait.
494 static int do_vsoc_cond_wait(struct file
*filp
,
495 struct vsoc_cond_wait __user
*untrusted_in
)
497 struct vsoc_cond_wait arg
;
500 if (copy_from_user(&arg
, untrusted_in
, sizeof(arg
)))
502 /* wakes is an out parameter. Initialize it to something sensible. */
504 rval
= handle_vsoc_cond_wait(filp
, &arg
);
505 if (copy_to_user(untrusted_in
, &arg
, sizeof(arg
)))
510 static int do_vsoc_cond_wake(struct file
*filp
, uint32_t offset
)
512 struct vsoc_device_region
*region_p
= vsoc_region_from_filep(filp
);
513 u32 region_number
= iminor(file_inode(filp
));
514 struct vsoc_region_data
*data
= vsoc_dev
.regions_data
+ region_number
;
515 /* Ensure that the offset is aligned */
516 if (offset
& (sizeof(uint32_t) - 1))
517 return -EADDRNOTAVAIL
;
518 /* Ensure that the offset is within shared memory */
519 if (((uint64_t)offset
) + region_p
->region_begin_offset
+
520 sizeof(uint32_t) > region_p
->region_end_offset
)
523 * TODO(b/73664181): Use multiple futex wait queues.
524 * We need to wake every sleeper when the condition changes. Typically
525 * only a single thread will be waiting on the condition, but there
526 * are exceptions. The worst case is about 10 threads.
528 wake_up_interruptible_all(&data
->futex_wait_queue
);
532 static long vsoc_ioctl(struct file
*filp
, unsigned int cmd
, unsigned long arg
)
535 struct vsoc_device_region
*region_p
;
537 struct vsoc_region_data
*reg_data
;
538 int retval
= vsoc_validate_filep(filp
);
542 region_p
= vsoc_region_from_filep(filp
);
543 reg_num
= iminor(file_inode(filp
));
544 reg_data
= vsoc_dev
.regions_data
+ reg_num
;
546 case VSOC_CREATE_FD_SCOPED_PERMISSION
:
548 struct fd_scoped_permission_node
*node
= NULL
;
550 node
= kzalloc(sizeof(*node
), GFP_KERNEL
);
551 /* We can't allocate memory for the permission */
554 INIT_LIST_HEAD(&node
->list
);
555 rv
= do_create_fd_scoped_permission
558 (struct fd_scoped_permission_arg __user
*)arg
);
560 mutex_lock(&vsoc_dev
.mtx
);
561 list_add(&node
->list
, &vsoc_dev
.permissions
);
562 mutex_unlock(&vsoc_dev
.mtx
);
570 case VSOC_GET_FD_SCOPED_PERMISSION
:
572 struct fd_scoped_permission_node
*node
=
573 ((struct vsoc_private_data
*)filp
->private_data
)->
574 fd_scoped_permission_node
;
578 ((struct fd_scoped_permission __user
*)arg
,
579 &node
->permission
, sizeof(node
->permission
)))
584 case VSOC_MAYBE_SEND_INTERRUPT_TO_HOST
:
585 if (!atomic_xchg(reg_data
->outgoing_signalled
, 1)) {
586 writel(reg_num
, vsoc_dev
.regs
+ DOORBELL
);
593 case VSOC_SEND_INTERRUPT_TO_HOST
:
594 writel(reg_num
, vsoc_dev
.regs
+ DOORBELL
);
596 case VSOC_WAIT_FOR_INCOMING_INTERRUPT
:
597 wait_event_interruptible
598 (reg_data
->interrupt_wait_queue
,
599 (atomic_read(reg_data
->incoming_signalled
) != 0));
602 case VSOC_DESCRIBE_REGION
:
603 return do_vsoc_describe_region
605 (struct vsoc_device_region __user
*)arg
);
607 case VSOC_SELF_INTERRUPT
:
608 atomic_set(reg_data
->incoming_signalled
, 1);
609 wake_up_interruptible(®_data
->interrupt_wait_queue
);
613 return do_vsoc_cond_wait(filp
,
614 (struct vsoc_cond_wait __user
*)arg
);
616 return do_vsoc_cond_wake(filp
, arg
);
624 static ssize_t
vsoc_read(struct file
*filp
, char __user
*buffer
, size_t len
,
630 int retval
= vsoc_validate_filep(filp
);
634 area_len
= vsoc_get_area(filp
, &area_off
);
635 area_p
= shm_off_to_virtual_addr(area_off
);
637 area_len
-= *poffset
;
642 if (copy_to_user(buffer
, area_p
, len
))
648 static loff_t
vsoc_lseek(struct file
*filp
, loff_t offset
, int origin
)
650 ssize_t area_len
= 0;
651 int retval
= vsoc_validate_filep(filp
);
655 area_len
= vsoc_get_area(filp
, NULL
);
661 if (offset
> 0 && offset
+ filp
->f_pos
< 0)
663 offset
+= filp
->f_pos
;
667 if (offset
> 0 && offset
+ area_len
< 0)
673 if (offset
>= area_len
)
680 /* Next hole is always the end of the region, unless offset is
683 if (offset
< area_len
)
691 if (offset
< 0 || offset
> area_len
)
693 filp
->f_pos
= offset
;
698 static ssize_t
vsoc_write(struct file
*filp
, const char __user
*buffer
,
699 size_t len
, loff_t
*poffset
)
704 int retval
= vsoc_validate_filep(filp
);
708 area_len
= vsoc_get_area(filp
, &area_off
);
709 area_p
= shm_off_to_virtual_addr(area_off
);
711 area_len
-= *poffset
;
716 if (copy_from_user(area_p
, buffer
, len
))
722 static irqreturn_t
vsoc_interrupt(int irq
, void *region_data_v
)
724 struct vsoc_region_data
*region_data
=
725 (struct vsoc_region_data
*)region_data_v
;
726 int reg_num
= region_data
- vsoc_dev
.regions_data
;
728 if (unlikely(!region_data
))
731 if (unlikely(reg_num
< 0 ||
732 reg_num
>= vsoc_dev
.layout
->region_count
)) {
733 dev_err(&vsoc_dev
.dev
->dev
,
734 "invalid irq @%p reg_num=0x%04x\n",
735 region_data
, reg_num
);
738 if (unlikely(vsoc_dev
.regions_data
+ reg_num
!= region_data
)) {
739 dev_err(&vsoc_dev
.dev
->dev
,
740 "irq not aligned @%p reg_num=0x%04x\n",
741 region_data
, reg_num
);
744 wake_up_interruptible(®ion_data
->interrupt_wait_queue
);
748 static int vsoc_probe_device(struct pci_dev
*pdev
,
749 const struct pci_device_id
*ent
)
753 resource_size_t reg_size
;
757 result
= pci_enable_device(pdev
);
760 "pci_enable_device failed %s: error %d\n",
761 pci_name(pdev
), result
);
764 vsoc_dev
.enabled_device
= true;
765 result
= pci_request_regions(pdev
, "vsoc");
767 dev_err(&pdev
->dev
, "pci_request_regions failed\n");
768 vsoc_remove_device(pdev
);
771 vsoc_dev
.requested_regions
= true;
772 /* Set up the control registers in BAR 0 */
773 reg_size
= pci_resource_len(pdev
, REGISTER_BAR
);
774 if (reg_size
> MAX_REGISTER_BAR_LEN
)
776 pci_iomap(pdev
, REGISTER_BAR
, MAX_REGISTER_BAR_LEN
);
778 vsoc_dev
.regs
= pci_iomap(pdev
, REGISTER_BAR
, reg_size
);
780 if (!vsoc_dev
.regs
) {
782 "cannot map registers of size %zu\n",
784 vsoc_remove_device(pdev
);
788 /* Map the shared memory in BAR 2 */
789 vsoc_dev
.shm_phys_start
= pci_resource_start(pdev
, SHARED_MEMORY_BAR
);
790 vsoc_dev
.shm_size
= pci_resource_len(pdev
, SHARED_MEMORY_BAR
);
792 dev_info(&pdev
->dev
, "shared memory @ DMA %pa size=0x%zx\n",
793 &vsoc_dev
.shm_phys_start
, vsoc_dev
.shm_size
);
794 vsoc_dev
.kernel_mapped_shm
= pci_iomap_wc(pdev
, SHARED_MEMORY_BAR
, 0);
795 if (!vsoc_dev
.kernel_mapped_shm
) {
796 dev_err(&vsoc_dev
.dev
->dev
, "cannot iomap region\n");
797 vsoc_remove_device(pdev
);
801 vsoc_dev
.layout
= (struct vsoc_shm_layout_descriptor __force
*)
802 vsoc_dev
.kernel_mapped_shm
;
803 dev_info(&pdev
->dev
, "major_version: %d\n",
804 vsoc_dev
.layout
->major_version
);
805 dev_info(&pdev
->dev
, "minor_version: %d\n",
806 vsoc_dev
.layout
->minor_version
);
807 dev_info(&pdev
->dev
, "size: 0x%x\n", vsoc_dev
.layout
->size
);
808 dev_info(&pdev
->dev
, "regions: %d\n", vsoc_dev
.layout
->region_count
);
809 if (vsoc_dev
.layout
->major_version
!=
810 CURRENT_VSOC_LAYOUT_MAJOR_VERSION
) {
811 dev_err(&vsoc_dev
.dev
->dev
,
812 "driver supports only major_version %d\n",
813 CURRENT_VSOC_LAYOUT_MAJOR_VERSION
);
814 vsoc_remove_device(pdev
);
817 result
= alloc_chrdev_region(&devt
, 0, vsoc_dev
.layout
->region_count
,
820 dev_err(&vsoc_dev
.dev
->dev
, "alloc_chrdev_region failed\n");
821 vsoc_remove_device(pdev
);
824 vsoc_dev
.major
= MAJOR(devt
);
825 cdev_init(&vsoc_dev
.cdev
, &vsoc_ops
);
826 vsoc_dev
.cdev
.owner
= THIS_MODULE
;
827 result
= cdev_add(&vsoc_dev
.cdev
, devt
, vsoc_dev
.layout
->region_count
);
829 dev_err(&vsoc_dev
.dev
->dev
, "cdev_add error\n");
830 vsoc_remove_device(pdev
);
833 vsoc_dev
.cdev_added
= true;
834 vsoc_dev
.class = class_create(THIS_MODULE
, VSOC_DEV_NAME
);
835 if (IS_ERR(vsoc_dev
.class)) {
836 dev_err(&vsoc_dev
.dev
->dev
, "class_create failed\n");
837 vsoc_remove_device(pdev
);
838 return PTR_ERR(vsoc_dev
.class);
840 vsoc_dev
.class_added
= true;
841 vsoc_dev
.regions
= (struct vsoc_device_region __force
*)
842 ((void *)vsoc_dev
.layout
+
843 vsoc_dev
.layout
->vsoc_region_desc_offset
);
844 vsoc_dev
.msix_entries
=
845 kcalloc(vsoc_dev
.layout
->region_count
,
846 sizeof(vsoc_dev
.msix_entries
[0]), GFP_KERNEL
);
847 if (!vsoc_dev
.msix_entries
) {
848 dev_err(&vsoc_dev
.dev
->dev
,
849 "unable to allocate msix_entries\n");
850 vsoc_remove_device(pdev
);
853 vsoc_dev
.regions_data
=
854 kcalloc(vsoc_dev
.layout
->region_count
,
855 sizeof(vsoc_dev
.regions_data
[0]), GFP_KERNEL
);
856 if (!vsoc_dev
.regions_data
) {
857 dev_err(&vsoc_dev
.dev
->dev
,
858 "unable to allocate regions' data\n");
859 vsoc_remove_device(pdev
);
862 for (i
= 0; i
< vsoc_dev
.layout
->region_count
; ++i
)
863 vsoc_dev
.msix_entries
[i
].entry
= i
;
865 result
= pci_enable_msix_exact(vsoc_dev
.dev
, vsoc_dev
.msix_entries
,
866 vsoc_dev
.layout
->region_count
);
868 dev_info(&pdev
->dev
, "pci_enable_msix failed: %d\n", result
);
869 vsoc_remove_device(pdev
);
872 /* Check that all regions are well formed */
873 for (i
= 0; i
< vsoc_dev
.layout
->region_count
; ++i
) {
874 const struct vsoc_device_region
*region
= vsoc_dev
.regions
+ i
;
876 if (!PAGE_ALIGNED(region
->region_begin_offset
) ||
877 !PAGE_ALIGNED(region
->region_end_offset
)) {
878 dev_err(&vsoc_dev
.dev
->dev
,
879 "region %d not aligned (%x:%x)", i
,
880 region
->region_begin_offset
,
881 region
->region_end_offset
);
882 vsoc_remove_device(pdev
);
885 if (region
->region_begin_offset
>= region
->region_end_offset
||
886 region
->region_end_offset
> vsoc_dev
.shm_size
) {
887 dev_err(&vsoc_dev
.dev
->dev
,
888 "region %d offsets are wrong: %x %x %zx",
889 i
, region
->region_begin_offset
,
890 region
->region_end_offset
, vsoc_dev
.shm_size
);
891 vsoc_remove_device(pdev
);
894 if (region
->managed_by
>= vsoc_dev
.layout
->region_count
) {
895 dev_err(&vsoc_dev
.dev
->dev
,
896 "region %d has invalid owner: %u",
897 i
, region
->managed_by
);
898 vsoc_remove_device(pdev
);
902 vsoc_dev
.msix_enabled
= true;
903 for (i
= 0; i
< vsoc_dev
.layout
->region_count
; ++i
) {
904 const struct vsoc_device_region
*region
= vsoc_dev
.regions
+ i
;
905 size_t name_sz
= sizeof(vsoc_dev
.regions_data
[i
].name
) - 1;
906 const struct vsoc_signal_table_layout
*h_to_g_signal_table
=
907 ®ion
->host_to_guest_signal_table
;
908 const struct vsoc_signal_table_layout
*g_to_h_signal_table
=
909 ®ion
->guest_to_host_signal_table
;
911 vsoc_dev
.regions_data
[i
].name
[name_sz
] = '\0';
912 memcpy(vsoc_dev
.regions_data
[i
].name
, region
->device_name
,
914 dev_info(&pdev
->dev
, "region %d name=%s\n",
915 i
, vsoc_dev
.regions_data
[i
].name
);
917 (&vsoc_dev
.regions_data
[i
].interrupt_wait_queue
);
918 init_waitqueue_head(&vsoc_dev
.regions_data
[i
].futex_wait_queue
);
919 vsoc_dev
.regions_data
[i
].incoming_signalled
=
920 shm_off_to_virtual_addr(region
->region_begin_offset
) +
921 h_to_g_signal_table
->interrupt_signalled_offset
;
922 vsoc_dev
.regions_data
[i
].outgoing_signalled
=
923 shm_off_to_virtual_addr(region
->region_begin_offset
) +
924 g_to_h_signal_table
->interrupt_signalled_offset
;
925 result
= request_irq(vsoc_dev
.msix_entries
[i
].vector
,
927 vsoc_dev
.regions_data
[i
].name
,
928 vsoc_dev
.regions_data
+ i
);
931 "request_irq failed irq=%d vector=%d\n",
932 i
, vsoc_dev
.msix_entries
[i
].vector
);
933 vsoc_remove_device(pdev
);
936 vsoc_dev
.regions_data
[i
].irq_requested
= true;
937 if (!device_create(vsoc_dev
.class, NULL
,
938 MKDEV(vsoc_dev
.major
, i
),
939 NULL
, vsoc_dev
.regions_data
[i
].name
)) {
940 dev_err(&vsoc_dev
.dev
->dev
, "device_create failed\n");
941 vsoc_remove_device(pdev
);
944 vsoc_dev
.regions_data
[i
].device_created
= true;
950 * This should undo all of the allocations in the probe function in reverse
955 * The device may have been partially initialized, so double check
956 * that the allocations happened.
958 * This function may be called multiple times, so mark resources as freed
959 * as they are deallocated.
961 static void vsoc_remove_device(struct pci_dev
*pdev
)
965 * pdev is the first thing to be set on probe and the last thing
966 * to be cleared here. If it's NULL then there is no cleanup.
968 if (!pdev
|| !vsoc_dev
.dev
)
970 dev_info(&pdev
->dev
, "remove_device\n");
971 if (vsoc_dev
.regions_data
) {
972 for (i
= 0; i
< vsoc_dev
.layout
->region_count
; ++i
) {
973 if (vsoc_dev
.regions_data
[i
].device_created
) {
974 device_destroy(vsoc_dev
.class,
975 MKDEV(vsoc_dev
.major
, i
));
976 vsoc_dev
.regions_data
[i
].device_created
= false;
978 if (vsoc_dev
.regions_data
[i
].irq_requested
)
979 free_irq(vsoc_dev
.msix_entries
[i
].vector
, NULL
);
980 vsoc_dev
.regions_data
[i
].irq_requested
= false;
982 kfree(vsoc_dev
.regions_data
);
983 vsoc_dev
.regions_data
= NULL
;
985 if (vsoc_dev
.msix_enabled
) {
986 pci_disable_msix(pdev
);
987 vsoc_dev
.msix_enabled
= false;
989 kfree(vsoc_dev
.msix_entries
);
990 vsoc_dev
.msix_entries
= NULL
;
991 vsoc_dev
.regions
= NULL
;
992 if (vsoc_dev
.class_added
) {
993 class_destroy(vsoc_dev
.class);
994 vsoc_dev
.class_added
= false;
996 if (vsoc_dev
.cdev_added
) {
997 cdev_del(&vsoc_dev
.cdev
);
998 vsoc_dev
.cdev_added
= false;
1000 if (vsoc_dev
.major
&& vsoc_dev
.layout
) {
1001 unregister_chrdev_region(MKDEV(vsoc_dev
.major
, 0),
1002 vsoc_dev
.layout
->region_count
);
1005 vsoc_dev
.layout
= NULL
;
1006 if (vsoc_dev
.kernel_mapped_shm
) {
1007 pci_iounmap(pdev
, vsoc_dev
.kernel_mapped_shm
);
1008 vsoc_dev
.kernel_mapped_shm
= NULL
;
1010 if (vsoc_dev
.regs
) {
1011 pci_iounmap(pdev
, vsoc_dev
.regs
);
1012 vsoc_dev
.regs
= NULL
;
1014 if (vsoc_dev
.requested_regions
) {
1015 pci_release_regions(pdev
);
1016 vsoc_dev
.requested_regions
= false;
1018 if (vsoc_dev
.enabled_device
) {
1019 pci_disable_device(pdev
);
1020 vsoc_dev
.enabled_device
= false;
1022 /* Do this last: it indicates that the device is not initialized. */
1023 vsoc_dev
.dev
= NULL
;
1026 static void __exit
vsoc_cleanup_module(void)
1028 vsoc_remove_device(vsoc_dev
.dev
);
1029 pci_unregister_driver(&vsoc_pci_driver
);
1032 static int __init
vsoc_init_module(void)
1036 INIT_LIST_HEAD(&vsoc_dev
.permissions
);
1037 mutex_init(&vsoc_dev
.mtx
);
1039 err
= pci_register_driver(&vsoc_pci_driver
);
1045 static int vsoc_open(struct inode
*inode
, struct file
*filp
)
1047 /* Can't use vsoc_validate_filep because filp is still incomplete */
1048 int ret
= vsoc_validate_inode(inode
);
1052 filp
->private_data
=
1053 kzalloc(sizeof(struct vsoc_private_data
), GFP_KERNEL
);
1054 if (!filp
->private_data
)
1059 static int vsoc_release(struct inode
*inode
, struct file
*filp
)
1061 struct vsoc_private_data
*private_data
= NULL
;
1062 struct fd_scoped_permission_node
*node
= NULL
;
1063 struct vsoc_device_region
*owner_region_p
= NULL
;
1064 int retval
= vsoc_validate_filep(filp
);
1068 private_data
= (struct vsoc_private_data
*)filp
->private_data
;
1072 node
= private_data
->fd_scoped_permission_node
;
1074 owner_region_p
= vsoc_region_from_inode(inode
);
1075 if (owner_region_p
->managed_by
!= VSOC_REGION_WHOLE
) {
1077 &vsoc_dev
.regions
[owner_region_p
->managed_by
];
1079 do_destroy_fd_scoped_permission_node(owner_region_p
, node
);
1080 private_data
->fd_scoped_permission_node
= NULL
;
1082 kfree(private_data
);
1083 filp
->private_data
= NULL
;
1089 * Returns the device relative offset and length of the area specified by the
1090 * fd scoped permission. If there is no fd scoped permission set, a default
1091 * permission covering the entire region is assumed, unless the region is owned
1092 * by another one, in which case the default is a permission with zero size.
1094 static ssize_t
vsoc_get_area(struct file
*filp
, __u32
*area_offset
)
1098 struct vsoc_device_region
*region_p
;
1099 struct fd_scoped_permission
*perm
;
1101 region_p
= vsoc_region_from_filep(filp
);
1102 off
= region_p
->region_begin_offset
;
1103 perm
= &((struct vsoc_private_data
*)filp
->private_data
)->
1104 fd_scoped_permission_node
->permission
;
1106 off
+= perm
->begin_offset
;
1107 length
= perm
->end_offset
- perm
->begin_offset
;
1108 } else if (region_p
->managed_by
== VSOC_REGION_WHOLE
) {
1109 /* No permission set and the regions is not owned by another,
1110 * default to full region access.
1112 length
= vsoc_device_region_size(region_p
);
1114 /* return zero length, access is denied. */
1122 static int vsoc_mmap(struct file
*filp
, struct vm_area_struct
*vma
)
1124 unsigned long len
= vma
->vm_end
- vma
->vm_start
;
1126 phys_addr_t mem_off
;
1128 int retval
= vsoc_validate_filep(filp
);
1132 area_len
= vsoc_get_area(filp
, &area_off
);
1133 /* Add the requested offset */
1134 area_off
+= (vma
->vm_pgoff
<< PAGE_SHIFT
);
1135 area_len
-= (vma
->vm_pgoff
<< PAGE_SHIFT
);
1138 vma
->vm_page_prot
= pgprot_noncached(vma
->vm_page_prot
);
1139 mem_off
= shm_off_to_phys_addr(area_off
);
1140 if (io_remap_pfn_range(vma
, vma
->vm_start
, mem_off
>> PAGE_SHIFT
,
1141 len
, vma
->vm_page_prot
))
1146 module_init(vsoc_init_module
);
1147 module_exit(vsoc_cleanup_module
);
1149 MODULE_LICENSE("GPL");
1150 MODULE_AUTHOR("Greg Hartman <ghartman@google.com>");
1151 MODULE_DESCRIPTION("VSoC interpretation of QEmu's ivshmem device");
1152 MODULE_VERSION("1.0");