1 // SPDX-License-Identifier: GPL-2.0-only
3 /* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. */
4 /* Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. */
6 #include <linux/bitfield.h>
7 #include <linux/bits.h>
8 #include <linux/completion.h>
9 #include <linux/delay.h>
10 #include <linux/dma-buf.h>
11 #include <linux/dma-mapping.h>
12 #include <linux/interrupt.h>
13 #include <linux/kref.h>
14 #include <linux/list.h>
15 #include <linux/math64.h>
17 #include <linux/moduleparam.h>
18 #include <linux/scatterlist.h>
19 #include <linux/spinlock.h>
20 #include <linux/srcu.h>
21 #include <linux/types.h>
22 #include <linux/uaccess.h>
23 #include <linux/wait.h>
24 #include <drm/drm_file.h>
25 #include <drm/drm_gem.h>
26 #include <drm/drm_prime.h>
27 #include <drm/drm_print.h>
28 #include <uapi/drm/qaic_accel.h>
32 #define SEM_VAL_MASK GENMASK_ULL(11, 0)
33 #define SEM_INDEX_MASK GENMASK_ULL(4, 0)
34 #define BULK_XFER BIT(3)
35 #define GEN_COMPLETION BIT(4)
36 #define INBOUND_XFER 1
37 #define OUTBOUND_XFER 2
38 #define REQHP_OFF 0x0 /* we read this */
39 #define REQTP_OFF 0x4 /* we write this */
40 #define RSPHP_OFF 0x8 /* we write this */
41 #define RSPTP_OFF 0xc /* we read this */
43 #define ENCODE_SEM(val, index, sync, cmd, flags) \
45 FIELD_PREP(GENMASK(11, 0), (val)) | \
46 FIELD_PREP(GENMASK(20, 16), (index)) | \
47 FIELD_PREP(BIT(22), (sync)) | \
48 FIELD_PREP(GENMASK(26, 24), (cmd)) | \
49 FIELD_PREP(GENMASK(30, 29), (flags)) | \
50 FIELD_PREP(BIT(31), (cmd) ? 1 : 0); \
52 #define NUM_EVENTS 128
54 #define fifo_at(base, offset) ((base) + (offset) * get_dbc_req_elem_size())
56 static unsigned int wait_exec_default_timeout_ms
= 5000; /* 5 sec default */
57 module_param(wait_exec_default_timeout_ms
, uint
, 0600);
58 MODULE_PARM_DESC(wait_exec_default_timeout_ms
, "Default timeout for DRM_IOCTL_QAIC_WAIT_BO");
60 static unsigned int datapath_poll_interval_us
= 100; /* 100 usec default */
61 module_param(datapath_poll_interval_us
, uint
, 0600);
62 MODULE_PARM_DESC(datapath_poll_interval_us
,
63 "Amount of time to sleep between activity when datapath polling is enabled");
67 * A request ID is assigned to each memory handle going in DMA queue.
68 * As a single memory handle can enqueue multiple elements in DMA queue
69 * all of them will have the same request ID.
75 * Special encoded variable
76 * 7 0 - Do not force to generate MSI after DMA is completed
77 * 1 - Force to generate MSI after DMA is completed
79 * 4 1 - Generate completion element in the response queue
80 * 0 - No Completion Code
81 * 3 0 - DMA request is a Link list transfer
82 * 1 - DMA request is a Bulk transfer
84 * 1:0 00 - No DMA transfer involved
85 * 01 - DMA transfer is part of inbound transfer
86 * 10 - DMA transfer has outbound transfer
91 /* Source address for the transfer */
93 /* Destination address for the transfer */
95 /* Length of transfer request */
98 /* Doorbell address */
101 * Special encoded variable
102 * 7 1 - Doorbell(db) write
103 * 0 - No doorbell write
105 * 1:0 00 - 32 bit access, db address must be aligned to 32bit-boundary
106 * 01 - 16 bit access, db address must be aligned to 16bit-boundary
107 * 10 - 8 bit access, db address must be aligned to 8bit-boundary
113 /* 32 bit data written to doorbell address */
116 * Special encoded variable
117 * All the fields of sem_cmdX are passed from user and all are ORed
118 * together to form sem_cmd.
119 * 0:11 Semaphore value
121 * 20:16 Semaphore index
125 * 26:24 Semaphore command
127 * 29 Semaphore DMA out bound sync fence
128 * 30 Semaphore DMA in bound sync fence
129 * 31 Enable semaphore command
138 /* Request ID of the memory handle whose DMA transaction is completed */
140 /* Status of the DMA transaction. 0 : Success otherwise failure */
144 static inline bool bo_queued(struct qaic_bo
*bo
)
146 return !list_empty(&bo
->xfer_list
);
149 inline int get_dbc_req_elem_size(void)
151 return sizeof(struct dbc_req
);
154 inline int get_dbc_rsp_elem_size(void)
156 return sizeof(struct dbc_rsp
);
159 static void free_slice(struct kref
*kref
)
161 struct bo_slice
*slice
= container_of(kref
, struct bo_slice
, ref_count
);
163 slice
->bo
->total_slice_nents
-= slice
->nents
;
164 list_del(&slice
->slice
);
165 drm_gem_object_put(&slice
->bo
->base
);
166 sg_free_table(slice
->sgt
);
172 static int clone_range_of_sgt_for_slice(struct qaic_device
*qdev
, struct sg_table
**sgt_out
,
173 struct sg_table
*sgt_in
, u64 size
, u64 offset
)
175 int total_len
, len
, nents
, offf
= 0, offl
= 0;
176 struct scatterlist
*sg
, *sgn
, *sgf
, *sgl
;
177 struct sg_table
*sgt
;
180 /* find out number of relevant nents needed for this mem */
186 size
= size
? size
: PAGE_SIZE
;
187 for_each_sgtable_dma_sg(sgt_in
, sg
, j
) {
188 len
= sg_dma_len(sg
);
192 if (offset
>= total_len
&& offset
< total_len
+ len
) {
194 offf
= offset
- total_len
;
198 if (offset
+ size
>= total_len
&&
199 offset
+ size
<= total_len
+ len
) {
201 offl
= offset
+ size
- total_len
;
212 sgt
= kzalloc(sizeof(*sgt
), GFP_KERNEL
);
218 ret
= sg_alloc_table(sgt
, nents
, GFP_KERNEL
);
222 /* copy relevant sg node and fix page and length */
224 for_each_sgtable_dma_sg(sgt
, sg
, j
) {
225 memcpy(sg
, sgn
, sizeof(*sg
));
227 sg_dma_address(sg
) += offf
;
228 sg_dma_len(sg
) -= offf
;
229 sg_set_page(sg
, sg_page(sgn
), sg_dma_len(sg
), offf
);
234 sg_dma_len(sg
) = offl
- offf
;
235 sg_set_page(sg
, sg_page(sgn
), offl
- offf
, offf
);
252 static int encode_reqs(struct qaic_device
*qdev
, struct bo_slice
*slice
,
253 struct qaic_attach_slice_entry
*req
)
255 __le64 db_addr
= cpu_to_le64(req
->db_addr
);
256 __le32 db_data
= cpu_to_le32(req
->db_data
);
257 struct scatterlist
*sg
;
258 __u8 cmd
= BULK_XFER
;
265 cmd
|= (slice
->dir
== DMA_TO_DEVICE
? INBOUND_XFER
: OUTBOUND_XFER
);
267 if (req
->db_len
&& !IS_ALIGNED(req
->db_addr
, req
->db_len
/ 8))
270 presync_sem
= req
->sem0
.presync
+ req
->sem1
.presync
+ req
->sem2
.presync
+ req
->sem3
.presync
;
274 presync_sem
= req
->sem0
.presync
<< 0 | req
->sem1
.presync
<< 1 |
275 req
->sem2
.presync
<< 2 | req
->sem3
.presync
<< 3;
277 switch (req
->db_len
) {
288 db_len
= 0; /* doorbell is not active for this command */
291 return -EINVAL
; /* should never hit this */
295 * When we end up splitting up a single request (ie a buf slice) into
296 * multiple DMA requests, we have to manage the sync data carefully.
297 * There can only be one presync sem. That needs to be on every xfer
298 * so that the DMA engine doesn't transfer data before the receiver is
299 * ready. We only do the doorbell and postsync sems after the xfer.
300 * To guarantee previous xfers for the request are complete, we use a
303 dev_addr
= req
->dev_addr
;
304 for_each_sgtable_dma_sg(slice
->sgt
, sg
, i
) {
305 slice
->reqs
[i
].cmd
= cmd
;
306 slice
->reqs
[i
].src_addr
= cpu_to_le64(slice
->dir
== DMA_TO_DEVICE
?
307 sg_dma_address(sg
) : dev_addr
);
308 slice
->reqs
[i
].dest_addr
= cpu_to_le64(slice
->dir
== DMA_TO_DEVICE
?
309 dev_addr
: sg_dma_address(sg
));
311 * sg_dma_len(sg) returns size of a DMA segment, maximum DMA
312 * segment size is set to UINT_MAX by qaic and hence return
313 * values of sg_dma_len(sg) can never exceed u32 range. So,
314 * by down sizing we are not corrupting the value.
316 slice
->reqs
[i
].len
= cpu_to_le32((u32
)sg_dma_len(sg
));
317 switch (presync_sem
) {
319 slice
->reqs
[i
].sem_cmd0
= cpu_to_le32(ENCODE_SEM(req
->sem0
.val
,
326 slice
->reqs
[i
].sem_cmd1
= cpu_to_le32(ENCODE_SEM(req
->sem1
.val
,
333 slice
->reqs
[i
].sem_cmd2
= cpu_to_le32(ENCODE_SEM(req
->sem2
.val
,
340 slice
->reqs
[i
].sem_cmd3
= cpu_to_le32(ENCODE_SEM(req
->sem3
.val
,
347 dev_addr
+= sg_dma_len(sg
);
349 /* add post transfer stuff to last segment */
351 slice
->reqs
[i
].cmd
|= GEN_COMPLETION
;
352 slice
->reqs
[i
].db_addr
= db_addr
;
353 slice
->reqs
[i
].db_len
= db_len
;
354 slice
->reqs
[i
].db_data
= db_data
;
356 * Add a fence if we have more than one request going to the hardware
357 * representing the entirety of the user request, and the user request
358 * has no presync condition.
359 * Fences are expensive, so we try to avoid them. We rely on the
360 * hardware behavior to avoid needing one when there is a presync
361 * condition. When a presync exists, all requests for that same
362 * presync will be queued into a fifo. Thus, since we queue the
363 * post xfer activity only on the last request we queue, the hardware
364 * will ensure that the last queued request is processed last, thus
365 * making sure the post xfer activity happens at the right time without
368 if (i
&& !presync_sem
)
369 req
->sem0
.flags
|= (slice
->dir
== DMA_TO_DEVICE
?
370 QAIC_SEM_INSYNCFENCE
: QAIC_SEM_OUTSYNCFENCE
);
371 slice
->reqs
[i
].sem_cmd0
= cpu_to_le32(ENCODE_SEM(req
->sem0
.val
, req
->sem0
.index
,
372 req
->sem0
.presync
, req
->sem0
.cmd
,
374 slice
->reqs
[i
].sem_cmd1
= cpu_to_le32(ENCODE_SEM(req
->sem1
.val
, req
->sem1
.index
,
375 req
->sem1
.presync
, req
->sem1
.cmd
,
377 slice
->reqs
[i
].sem_cmd2
= cpu_to_le32(ENCODE_SEM(req
->sem2
.val
, req
->sem2
.index
,
378 req
->sem2
.presync
, req
->sem2
.cmd
,
380 slice
->reqs
[i
].sem_cmd3
= cpu_to_le32(ENCODE_SEM(req
->sem3
.val
, req
->sem3
.index
,
381 req
->sem3
.presync
, req
->sem3
.cmd
,
387 static int qaic_map_one_slice(struct qaic_device
*qdev
, struct qaic_bo
*bo
,
388 struct qaic_attach_slice_entry
*slice_ent
)
390 struct sg_table
*sgt
= NULL
;
391 struct bo_slice
*slice
;
394 ret
= clone_range_of_sgt_for_slice(qdev
, &sgt
, bo
->sgt
, slice_ent
->size
, slice_ent
->offset
);
398 slice
= kmalloc(sizeof(*slice
), GFP_KERNEL
);
404 slice
->reqs
= kcalloc(sgt
->nents
, sizeof(*slice
->reqs
), GFP_KERNEL
);
410 slice
->no_xfer
= !slice_ent
->size
;
412 slice
->nents
= sgt
->nents
;
413 slice
->dir
= bo
->dir
;
415 slice
->size
= slice_ent
->size
;
416 slice
->offset
= slice_ent
->offset
;
418 ret
= encode_reqs(qdev
, slice
, slice_ent
);
422 bo
->total_slice_nents
+= sgt
->nents
;
423 kref_init(&slice
->ref_count
);
424 drm_gem_object_get(&bo
->base
);
425 list_add_tail(&slice
->slice
, &bo
->slices
);
440 static int create_sgt(struct qaic_device
*qdev
, struct sg_table
**sgt_out
, u64 size
)
442 struct scatterlist
*sg
;
443 struct sg_table
*sgt
;
454 nr_pages
= DIV_ROUND_UP(size
, PAGE_SIZE
);
456 * calculate how much extra we are going to allocate, to remove
459 buf_extra
= (PAGE_SIZE
- size
% PAGE_SIZE
) % PAGE_SIZE
;
460 max_order
= min(MAX_PAGE_ORDER
, get_order(size
));
462 /* allocate a single page for book keeping */
468 pages
= kvmalloc_array(nr_pages
, sizeof(*pages
) + sizeof(*pages_order
), GFP_KERNEL
);
473 pages_order
= (void *)pages
+ sizeof(*pages
) * nr_pages
;
476 * Allocate requested memory using alloc_pages. It is possible to allocate
477 * the requested memory in multiple chunks by calling alloc_pages
478 * multiple times. Use SG table to handle multiple allocated pages.
481 while (nr_pages
> 0) {
482 order
= min(get_order(nr_pages
* PAGE_SIZE
), max_order
);
484 pages
[i
] = alloc_pages(GFP_KERNEL
| GFP_HIGHUSER
|
485 __GFP_NOWARN
| __GFP_ZERO
|
486 (order
? __GFP_NORETRY
: __GFP_RETRY_MAYFAIL
),
492 goto free_partial_alloc
;
497 pages_order
[i
] = order
;
499 nr_pages
-= 1 << order
;
501 /* account for over allocation */
502 buf_extra
+= abs(nr_pages
) * PAGE_SIZE
;
506 sgt
= kmalloc(sizeof(*sgt
), GFP_KERNEL
);
509 goto free_partial_alloc
;
512 if (sg_alloc_table(sgt
, i
, GFP_KERNEL
)) {
517 /* Populate the SG table with the allocated memory pages */
519 for (k
= 0; k
< i
; k
++, sg
= sg_next(sg
)) {
520 /* Last entry requires special handling */
522 sg_set_page(sg
, pages
[k
], PAGE_SIZE
<< pages_order
[k
], 0);
524 sg_set_page(sg
, pages
[k
], (PAGE_SIZE
<< pages_order
[k
]) - buf_extra
, 0);
536 for (j
= 0; j
< i
; j
++)
537 __free_pages(pages
[j
], pages_order
[j
]);
544 static bool invalid_sem(struct qaic_sem
*sem
)
546 if (sem
->val
& ~SEM_VAL_MASK
|| sem
->index
& ~SEM_INDEX_MASK
||
547 !(sem
->presync
== 0 || sem
->presync
== 1) || sem
->pad
||
548 sem
->flags
& ~(QAIC_SEM_INSYNCFENCE
| QAIC_SEM_OUTSYNCFENCE
) ||
549 sem
->cmd
> QAIC_SEM_WAIT_GT_0
)
554 static int qaic_validate_req(struct qaic_device
*qdev
, struct qaic_attach_slice_entry
*slice_ent
,
555 u32 count
, u64 total_size
)
559 for (i
= 0; i
< count
; i
++) {
560 if (!(slice_ent
[i
].db_len
== 32 || slice_ent
[i
].db_len
== 16 ||
561 slice_ent
[i
].db_len
== 8 || slice_ent
[i
].db_len
== 0) ||
562 invalid_sem(&slice_ent
[i
].sem0
) || invalid_sem(&slice_ent
[i
].sem1
) ||
563 invalid_sem(&slice_ent
[i
].sem2
) || invalid_sem(&slice_ent
[i
].sem3
))
566 if (slice_ent
[i
].offset
+ slice_ent
[i
].size
> total_size
)
573 static void qaic_free_sgt(struct sg_table
*sgt
)
575 struct scatterlist
*sg
;
580 for (sg
= sgt
->sgl
; sg
; sg
= sg_next(sg
))
582 __free_pages(sg_page(sg
), get_order(sg
->length
));
587 static void qaic_gem_print_info(struct drm_printer
*p
, unsigned int indent
,
588 const struct drm_gem_object
*obj
)
590 struct qaic_bo
*bo
= to_qaic_bo(obj
);
592 drm_printf_indent(p
, indent
, "BO DMA direction %d\n", bo
->dir
);
595 static const struct vm_operations_struct drm_vm_ops
= {
596 .open
= drm_gem_vm_open
,
597 .close
= drm_gem_vm_close
,
600 static int qaic_gem_object_mmap(struct drm_gem_object
*obj
, struct vm_area_struct
*vma
)
602 struct qaic_bo
*bo
= to_qaic_bo(obj
);
603 unsigned long offset
= 0;
604 struct scatterlist
*sg
;
607 if (obj
->import_attach
)
610 for (sg
= bo
->sgt
->sgl
; sg
; sg
= sg_next(sg
)) {
612 ret
= remap_pfn_range(vma
, vma
->vm_start
+ offset
, page_to_pfn(sg_page(sg
)),
613 sg
->length
, vma
->vm_page_prot
);
616 offset
+= sg
->length
;
624 static void qaic_free_object(struct drm_gem_object
*obj
)
626 struct qaic_bo
*bo
= to_qaic_bo(obj
);
628 if (obj
->import_attach
) {
629 /* DMABUF/PRIME Path */
630 drm_prime_gem_destroy(obj
, NULL
);
632 /* Private buffer allocation path */
633 qaic_free_sgt(bo
->sgt
);
636 mutex_destroy(&bo
->lock
);
637 drm_gem_object_release(obj
);
641 static const struct drm_gem_object_funcs qaic_gem_funcs
= {
642 .free
= qaic_free_object
,
643 .print_info
= qaic_gem_print_info
,
644 .mmap
= qaic_gem_object_mmap
,
645 .vm_ops
= &drm_vm_ops
,
648 static void qaic_init_bo(struct qaic_bo
*bo
, bool reinit
)
652 reinit_completion(&bo
->xfer_done
);
654 mutex_init(&bo
->lock
);
655 init_completion(&bo
->xfer_done
);
657 complete_all(&bo
->xfer_done
);
658 INIT_LIST_HEAD(&bo
->slices
);
659 INIT_LIST_HEAD(&bo
->xfer_list
);
662 static struct qaic_bo
*qaic_alloc_init_bo(void)
666 bo
= kzalloc(sizeof(*bo
), GFP_KERNEL
);
668 return ERR_PTR(-ENOMEM
);
670 qaic_init_bo(bo
, false);
675 int qaic_create_bo_ioctl(struct drm_device
*dev
, void *data
, struct drm_file
*file_priv
)
677 struct qaic_create_bo
*args
= data
;
678 int usr_rcu_id
, qdev_rcu_id
;
679 struct drm_gem_object
*obj
;
680 struct qaic_device
*qdev
;
681 struct qaic_user
*usr
;
689 size
= PAGE_ALIGN(args
->size
);
693 usr
= file_priv
->driver_priv
;
694 usr_rcu_id
= srcu_read_lock(&usr
->qddev_lock
);
697 goto unlock_usr_srcu
;
700 qdev
= usr
->qddev
->qdev
;
701 qdev_rcu_id
= srcu_read_lock(&qdev
->dev_lock
);
702 if (qdev
->dev_state
!= QAIC_ONLINE
) {
704 goto unlock_dev_srcu
;
707 bo
= qaic_alloc_init_bo();
710 goto unlock_dev_srcu
;
714 drm_gem_private_object_init(dev
, obj
, size
);
716 obj
->funcs
= &qaic_gem_funcs
;
717 ret
= create_sgt(qdev
, &bo
->sgt
, size
);
721 ret
= drm_gem_create_mmap_offset(obj
);
725 ret
= drm_gem_handle_create(file_priv
, obj
, &args
->handle
);
729 bo
->handle
= args
->handle
;
730 drm_gem_object_put(obj
);
731 srcu_read_unlock(&qdev
->dev_lock
, qdev_rcu_id
);
732 srcu_read_unlock(&usr
->qddev_lock
, usr_rcu_id
);
737 drm_gem_object_put(obj
);
739 srcu_read_unlock(&qdev
->dev_lock
, qdev_rcu_id
);
741 srcu_read_unlock(&usr
->qddev_lock
, usr_rcu_id
);
745 int qaic_mmap_bo_ioctl(struct drm_device
*dev
, void *data
, struct drm_file
*file_priv
)
747 struct qaic_mmap_bo
*args
= data
;
748 int usr_rcu_id
, qdev_rcu_id
;
749 struct drm_gem_object
*obj
;
750 struct qaic_device
*qdev
;
751 struct qaic_user
*usr
;
754 usr
= file_priv
->driver_priv
;
755 usr_rcu_id
= srcu_read_lock(&usr
->qddev_lock
);
758 goto unlock_usr_srcu
;
761 qdev
= usr
->qddev
->qdev
;
762 qdev_rcu_id
= srcu_read_lock(&qdev
->dev_lock
);
763 if (qdev
->dev_state
!= QAIC_ONLINE
) {
765 goto unlock_dev_srcu
;
768 obj
= drm_gem_object_lookup(file_priv
, args
->handle
);
771 goto unlock_dev_srcu
;
774 args
->offset
= drm_vma_node_offset_addr(&obj
->vma_node
);
776 drm_gem_object_put(obj
);
779 srcu_read_unlock(&qdev
->dev_lock
, qdev_rcu_id
);
781 srcu_read_unlock(&usr
->qddev_lock
, usr_rcu_id
);
785 struct drm_gem_object
*qaic_gem_prime_import(struct drm_device
*dev
, struct dma_buf
*dma_buf
)
787 struct dma_buf_attachment
*attach
;
788 struct drm_gem_object
*obj
;
792 bo
= qaic_alloc_init_bo();
799 get_dma_buf(dma_buf
);
801 attach
= dma_buf_attach(dma_buf
, dev
->dev
);
802 if (IS_ERR(attach
)) {
803 ret
= PTR_ERR(attach
);
807 if (!attach
->dmabuf
->size
) {
809 goto size_align_fail
;
812 drm_gem_private_object_init(dev
, obj
, attach
->dmabuf
->size
);
814 * skipping dma_buf_map_attachment() as we do not know the direction
815 * just yet. Once the direction is known in the subsequent IOCTL to
816 * attach slicing, we can do it then.
819 obj
->funcs
= &qaic_gem_funcs
;
820 obj
->import_attach
= attach
;
821 obj
->resv
= dma_buf
->resv
;
826 dma_buf_detach(dma_buf
, attach
);
828 dma_buf_put(dma_buf
);
834 static int qaic_prepare_import_bo(struct qaic_bo
*bo
, struct qaic_attach_slice_hdr
*hdr
)
836 struct drm_gem_object
*obj
= &bo
->base
;
837 struct sg_table
*sgt
;
840 sgt
= dma_buf_map_attachment(obj
->import_attach
, hdr
->dir
);
851 static int qaic_prepare_export_bo(struct qaic_device
*qdev
, struct qaic_bo
*bo
,
852 struct qaic_attach_slice_hdr
*hdr
)
856 ret
= dma_map_sgtable(&qdev
->pdev
->dev
, bo
->sgt
, hdr
->dir
, 0);
863 static int qaic_prepare_bo(struct qaic_device
*qdev
, struct qaic_bo
*bo
,
864 struct qaic_attach_slice_hdr
*hdr
)
868 if (bo
->base
.import_attach
)
869 ret
= qaic_prepare_import_bo(bo
, hdr
);
871 ret
= qaic_prepare_export_bo(qdev
, bo
, hdr
);
873 bo
->dbc
= &qdev
->dbc
[hdr
->dbc_id
];
874 bo
->nr_slice
= hdr
->count
;
879 static void qaic_unprepare_import_bo(struct qaic_bo
*bo
)
881 dma_buf_unmap_attachment(bo
->base
.import_attach
, bo
->sgt
, bo
->dir
);
885 static void qaic_unprepare_export_bo(struct qaic_device
*qdev
, struct qaic_bo
*bo
)
887 dma_unmap_sgtable(&qdev
->pdev
->dev
, bo
->sgt
, bo
->dir
, 0);
890 static void qaic_unprepare_bo(struct qaic_device
*qdev
, struct qaic_bo
*bo
)
892 if (bo
->base
.import_attach
)
893 qaic_unprepare_import_bo(bo
);
895 qaic_unprepare_export_bo(qdev
, bo
);
902 static void qaic_free_slices_bo(struct qaic_bo
*bo
)
904 struct bo_slice
*slice
, *temp
;
906 list_for_each_entry_safe(slice
, temp
, &bo
->slices
, slice
)
907 kref_put(&slice
->ref_count
, free_slice
);
908 if (WARN_ON_ONCE(bo
->total_slice_nents
!= 0))
909 bo
->total_slice_nents
= 0;
913 static int qaic_attach_slicing_bo(struct qaic_device
*qdev
, struct qaic_bo
*bo
,
914 struct qaic_attach_slice_hdr
*hdr
,
915 struct qaic_attach_slice_entry
*slice_ent
)
919 for (i
= 0; i
< hdr
->count
; i
++) {
920 ret
= qaic_map_one_slice(qdev
, bo
, &slice_ent
[i
]);
922 qaic_free_slices_bo(bo
);
927 if (bo
->total_slice_nents
> bo
->dbc
->nelem
) {
928 qaic_free_slices_bo(bo
);
935 int qaic_attach_slice_bo_ioctl(struct drm_device
*dev
, void *data
, struct drm_file
*file_priv
)
937 struct qaic_attach_slice_entry
*slice_ent
;
938 struct qaic_attach_slice
*args
= data
;
939 int rcu_id
, usr_rcu_id
, qdev_rcu_id
;
940 struct dma_bridge_chan
*dbc
;
941 struct drm_gem_object
*obj
;
942 struct qaic_device
*qdev
;
943 unsigned long arg_size
;
944 struct qaic_user
*usr
;
945 u8 __user
*user_data
;
949 if (args
->hdr
.count
== 0)
952 arg_size
= args
->hdr
.count
* sizeof(*slice_ent
);
953 if (arg_size
/ args
->hdr
.count
!= sizeof(*slice_ent
))
956 if (!(args
->hdr
.dir
== DMA_TO_DEVICE
|| args
->hdr
.dir
== DMA_FROM_DEVICE
))
962 usr
= file_priv
->driver_priv
;
963 usr_rcu_id
= srcu_read_lock(&usr
->qddev_lock
);
966 goto unlock_usr_srcu
;
969 qdev
= usr
->qddev
->qdev
;
970 qdev_rcu_id
= srcu_read_lock(&qdev
->dev_lock
);
971 if (qdev
->dev_state
!= QAIC_ONLINE
) {
973 goto unlock_dev_srcu
;
976 if (args
->hdr
.dbc_id
>= qdev
->num_dbc
) {
978 goto unlock_dev_srcu
;
981 user_data
= u64_to_user_ptr(args
->data
);
983 slice_ent
= kzalloc(arg_size
, GFP_KERNEL
);
986 goto unlock_dev_srcu
;
989 ret
= copy_from_user(slice_ent
, user_data
, arg_size
);
995 obj
= drm_gem_object_lookup(file_priv
, args
->hdr
.handle
);
1001 ret
= qaic_validate_req(qdev
, slice_ent
, args
->hdr
.count
, obj
->size
);
1005 bo
= to_qaic_bo(obj
);
1006 ret
= mutex_lock_interruptible(&bo
->lock
);
1015 dbc
= &qdev
->dbc
[args
->hdr
.dbc_id
];
1016 rcu_id
= srcu_read_lock(&dbc
->ch_lock
);
1017 if (dbc
->usr
!= usr
) {
1019 goto unlock_ch_srcu
;
1022 ret
= qaic_prepare_bo(qdev
, bo
, &args
->hdr
);
1024 goto unlock_ch_srcu
;
1026 ret
= qaic_attach_slicing_bo(qdev
, bo
, &args
->hdr
, slice_ent
);
1030 if (args
->hdr
.dir
== DMA_TO_DEVICE
)
1031 dma_sync_sgtable_for_cpu(&qdev
->pdev
->dev
, bo
->sgt
, args
->hdr
.dir
);
1034 list_add_tail(&bo
->bo_list
, &bo
->dbc
->bo_lists
);
1035 srcu_read_unlock(&dbc
->ch_lock
, rcu_id
);
1036 mutex_unlock(&bo
->lock
);
1038 srcu_read_unlock(&qdev
->dev_lock
, qdev_rcu_id
);
1039 srcu_read_unlock(&usr
->qddev_lock
, usr_rcu_id
);
1044 qaic_unprepare_bo(qdev
, bo
);
1046 srcu_read_unlock(&dbc
->ch_lock
, rcu_id
);
1048 mutex_unlock(&bo
->lock
);
1050 drm_gem_object_put(obj
);
1054 srcu_read_unlock(&qdev
->dev_lock
, qdev_rcu_id
);
1056 srcu_read_unlock(&usr
->qddev_lock
, usr_rcu_id
);
1060 static inline u32
fifo_space_avail(u32 head
, u32 tail
, u32 q_size
)
1062 u32 avail
= head
- tail
- 1;
1070 static inline int copy_exec_reqs(struct qaic_device
*qdev
, struct bo_slice
*slice
, u32 dbc_id
,
1071 u32 head
, u32
*ptail
)
1073 struct dma_bridge_chan
*dbc
= &qdev
->dbc
[dbc_id
];
1074 struct dbc_req
*reqs
= slice
->reqs
;
1078 avail
= fifo_space_avail(head
, tail
, dbc
->nelem
);
1079 if (avail
< slice
->nents
)
1082 if (tail
+ slice
->nents
> dbc
->nelem
) {
1083 avail
= dbc
->nelem
- tail
;
1084 avail
= min_t(u32
, avail
, slice
->nents
);
1085 memcpy(fifo_at(dbc
->req_q_base
, tail
), reqs
, sizeof(*reqs
) * avail
);
1087 avail
= slice
->nents
- avail
;
1089 memcpy(dbc
->req_q_base
, reqs
, sizeof(*reqs
) * avail
);
1091 memcpy(fifo_at(dbc
->req_q_base
, tail
), reqs
, sizeof(*reqs
) * slice
->nents
);
1094 *ptail
= (tail
+ slice
->nents
) % dbc
->nelem
;
1099 static inline int copy_partial_exec_reqs(struct qaic_device
*qdev
, struct bo_slice
*slice
,
1100 u64 resize
, struct dma_bridge_chan
*dbc
, u32 head
,
1103 struct dbc_req
*reqs
= slice
->reqs
;
1104 struct dbc_req
*last_req
;
1110 avail
= fifo_space_avail(head
, tail
, dbc
->nelem
);
1113 * After this for loop is complete, first_n represents the index
1114 * of the last DMA request of this slice that needs to be
1115 * transferred after resizing and last_bytes represents DMA size
1118 last_bytes
= resize
;
1119 for (first_n
= 0; first_n
< slice
->nents
; first_n
++)
1120 if (last_bytes
> le32_to_cpu(reqs
[first_n
].len
))
1121 last_bytes
-= le32_to_cpu(reqs
[first_n
].len
);
1125 if (avail
< (first_n
+ 1))
1129 if (tail
+ first_n
> dbc
->nelem
) {
1130 avail
= dbc
->nelem
- tail
;
1131 avail
= min_t(u32
, avail
, first_n
);
1132 memcpy(fifo_at(dbc
->req_q_base
, tail
), reqs
, sizeof(*reqs
) * avail
);
1133 last_req
= reqs
+ avail
;
1134 avail
= first_n
- avail
;
1136 memcpy(dbc
->req_q_base
, last_req
, sizeof(*reqs
) * avail
);
1138 memcpy(fifo_at(dbc
->req_q_base
, tail
), reqs
, sizeof(*reqs
) * first_n
);
1143 * Copy over the last entry. Here we need to adjust len to the left over
1144 * size, and set src and dst to the entry it is copied to.
1146 last_req
= fifo_at(dbc
->req_q_base
, (tail
+ first_n
) % dbc
->nelem
);
1147 memcpy(last_req
, reqs
+ slice
->nents
- 1, sizeof(*reqs
));
1150 * last_bytes holds size of a DMA segment, maximum DMA segment size is
1151 * set to UINT_MAX by qaic and hence last_bytes can never exceed u32
1152 * range. So, by down sizing we are not corrupting the value.
1154 last_req
->len
= cpu_to_le32((u32
)last_bytes
);
1155 last_req
->src_addr
= reqs
[first_n
].src_addr
;
1156 last_req
->dest_addr
= reqs
[first_n
].dest_addr
;
1158 /* Disable DMA transfer */
1159 last_req
->cmd
= GENMASK(7, 2) & reqs
[first_n
].cmd
;
1161 *ptail
= (tail
+ first_n
+ 1) % dbc
->nelem
;
1166 static int send_bo_list_to_device(struct qaic_device
*qdev
, struct drm_file
*file_priv
,
1167 struct qaic_execute_entry
*exec
, unsigned int count
,
1168 bool is_partial
, struct dma_bridge_chan
*dbc
, u32 head
,
1171 struct qaic_partial_execute_entry
*pexec
= (struct qaic_partial_execute_entry
*)exec
;
1172 struct drm_gem_object
*obj
;
1173 struct bo_slice
*slice
;
1174 unsigned long flags
;
1179 for (i
= 0; i
< count
; i
++) {
1181 * ref count will be decremented when the transfer of this
1182 * buffer is complete. It is inside dbc_irq_threaded_fn().
1184 obj
= drm_gem_object_lookup(file_priv
,
1185 is_partial
? pexec
[i
].handle
: exec
[i
].handle
);
1188 goto failed_to_send_bo
;
1191 bo
= to_qaic_bo(obj
);
1192 ret
= mutex_lock_interruptible(&bo
->lock
);
1194 goto failed_to_send_bo
;
1201 if (is_partial
&& pexec
[i
].resize
> bo
->base
.size
) {
1206 spin_lock_irqsave(&dbc
->xfer_lock
, flags
);
1207 if (bo_queued(bo
)) {
1208 spin_unlock_irqrestore(&dbc
->xfer_lock
, flags
);
1213 bo
->req_id
= dbc
->next_req_id
++;
1215 list_for_each_entry(slice
, &bo
->slices
, slice
) {
1216 for (j
= 0; j
< slice
->nents
; j
++)
1217 slice
->reqs
[j
].req_id
= cpu_to_le16(bo
->req_id
);
1219 if (is_partial
&& (!pexec
[i
].resize
|| pexec
[i
].resize
<= slice
->offset
))
1220 /* Configure the slice for no DMA transfer */
1221 ret
= copy_partial_exec_reqs(qdev
, slice
, 0, dbc
, head
, tail
);
1222 else if (is_partial
&& pexec
[i
].resize
< slice
->offset
+ slice
->size
)
1223 /* Configure the slice to be partially DMA transferred */
1224 ret
= copy_partial_exec_reqs(qdev
, slice
,
1225 pexec
[i
].resize
- slice
->offset
, dbc
,
1228 ret
= copy_exec_reqs(qdev
, slice
, dbc
->id
, head
, tail
);
1230 spin_unlock_irqrestore(&dbc
->xfer_lock
, flags
);
1234 reinit_completion(&bo
->xfer_done
);
1235 list_add_tail(&bo
->xfer_list
, &dbc
->xfer_list
);
1236 spin_unlock_irqrestore(&dbc
->xfer_lock
, flags
);
1237 dma_sync_sgtable_for_device(&qdev
->pdev
->dev
, bo
->sgt
, bo
->dir
);
1238 mutex_unlock(&bo
->lock
);
1244 mutex_unlock(&bo
->lock
);
1247 drm_gem_object_put(obj
);
1248 for (j
= 0; j
< i
; j
++) {
1249 spin_lock_irqsave(&dbc
->xfer_lock
, flags
);
1250 bo
= list_last_entry(&dbc
->xfer_list
, struct qaic_bo
, xfer_list
);
1252 list_del_init(&bo
->xfer_list
);
1253 spin_unlock_irqrestore(&dbc
->xfer_lock
, flags
);
1254 dma_sync_sgtable_for_cpu(&qdev
->pdev
->dev
, bo
->sgt
, bo
->dir
);
1255 drm_gem_object_put(obj
);
1260 static void update_profiling_data(struct drm_file
*file_priv
,
1261 struct qaic_execute_entry
*exec
, unsigned int count
,
1262 bool is_partial
, u64 received_ts
, u64 submit_ts
, u32 queue_level
)
1264 struct qaic_partial_execute_entry
*pexec
= (struct qaic_partial_execute_entry
*)exec
;
1265 struct drm_gem_object
*obj
;
1269 for (i
= 0; i
< count
; i
++) {
1271 * Since we already committed the BO to hardware, the only way
1272 * this should fail is a pending signal. We can't cancel the
1273 * submit to hardware, so we have to just skip the profiling
1274 * data. In case the signal is not fatal to the process, we
1275 * return success so that the user doesn't try to resubmit.
1277 obj
= drm_gem_object_lookup(file_priv
,
1278 is_partial
? pexec
[i
].handle
: exec
[i
].handle
);
1281 bo
= to_qaic_bo(obj
);
1282 bo
->perf_stats
.req_received_ts
= received_ts
;
1283 bo
->perf_stats
.req_submit_ts
= submit_ts
;
1284 bo
->perf_stats
.queue_level_before
= queue_level
;
1285 queue_level
+= bo
->total_slice_nents
;
1286 drm_gem_object_put(obj
);
1290 static int __qaic_execute_bo_ioctl(struct drm_device
*dev
, void *data
, struct drm_file
*file_priv
,
1293 struct qaic_execute
*args
= data
;
1294 struct qaic_execute_entry
*exec
;
1295 struct dma_bridge_chan
*dbc
;
1296 int usr_rcu_id
, qdev_rcu_id
;
1297 struct qaic_device
*qdev
;
1298 struct qaic_user
*usr
;
1299 u8 __user
*user_data
;
1310 received_ts
= ktime_get_ns();
1312 size
= is_partial
? sizeof(struct qaic_partial_execute_entry
) : sizeof(*exec
);
1313 n
= (unsigned long)size
* args
->hdr
.count
;
1314 if (args
->hdr
.count
== 0 || n
/ args
->hdr
.count
!= size
)
1317 user_data
= u64_to_user_ptr(args
->data
);
1319 exec
= kcalloc(args
->hdr
.count
, size
, GFP_KERNEL
);
1323 if (copy_from_user(exec
, user_data
, n
)) {
1328 usr
= file_priv
->driver_priv
;
1329 usr_rcu_id
= srcu_read_lock(&usr
->qddev_lock
);
1332 goto unlock_usr_srcu
;
1335 qdev
= usr
->qddev
->qdev
;
1336 qdev_rcu_id
= srcu_read_lock(&qdev
->dev_lock
);
1337 if (qdev
->dev_state
!= QAIC_ONLINE
) {
1339 goto unlock_dev_srcu
;
1342 if (args
->hdr
.dbc_id
>= qdev
->num_dbc
) {
1344 goto unlock_dev_srcu
;
1347 dbc
= &qdev
->dbc
[args
->hdr
.dbc_id
];
1349 rcu_id
= srcu_read_lock(&dbc
->ch_lock
);
1350 if (!dbc
->usr
|| dbc
->usr
->handle
!= usr
->handle
) {
1352 goto release_ch_rcu
;
1355 head
= readl(dbc
->dbc_base
+ REQHP_OFF
);
1356 tail
= readl(dbc
->dbc_base
+ REQTP_OFF
);
1358 if (head
== U32_MAX
|| tail
== U32_MAX
) {
1359 /* PCI link error */
1361 goto release_ch_rcu
;
1364 queue_level
= head
<= tail
? tail
- head
: dbc
->nelem
- (head
- tail
);
1366 ret
= send_bo_list_to_device(qdev
, file_priv
, exec
, args
->hdr
.count
, is_partial
, dbc
,
1369 goto release_ch_rcu
;
1371 /* Finalize commit to hardware */
1372 submit_ts
= ktime_get_ns();
1373 writel(tail
, dbc
->dbc_base
+ REQTP_OFF
);
1375 update_profiling_data(file_priv
, exec
, args
->hdr
.count
, is_partial
, received_ts
,
1376 submit_ts
, queue_level
);
1378 if (datapath_polling
)
1379 schedule_work(&dbc
->poll_work
);
1382 srcu_read_unlock(&dbc
->ch_lock
, rcu_id
);
1384 srcu_read_unlock(&qdev
->dev_lock
, qdev_rcu_id
);
1386 srcu_read_unlock(&usr
->qddev_lock
, usr_rcu_id
);
1392 int qaic_execute_bo_ioctl(struct drm_device
*dev
, void *data
, struct drm_file
*file_priv
)
1394 return __qaic_execute_bo_ioctl(dev
, data
, file_priv
, false);
1397 int qaic_partial_execute_bo_ioctl(struct drm_device
*dev
, void *data
, struct drm_file
*file_priv
)
1399 return __qaic_execute_bo_ioctl(dev
, data
, file_priv
, true);
1403 * Our interrupt handling is a bit more complicated than a simple ideal, but
1406 * Each dbc has a completion queue. Entries in the queue correspond to DMA
1407 * requests which the device has processed. The hardware already has a built
1408 * in irq mitigation. When the device puts an entry into the queue, it will
1409 * only trigger an interrupt if the queue was empty. Therefore, when adding
1410 * the Nth event to a non-empty queue, the hardware doesn't trigger an
1411 * interrupt. This means the host doesn't get additional interrupts signaling
1412 * the same thing - the queue has something to process.
1413 * This behavior can be overridden in the DMA request.
1414 * This means that when the host receives an interrupt, it is required to
1417 * This behavior is what NAPI attempts to accomplish, although we can't use
1418 * NAPI as we don't have a netdev. We use threaded irqs instead.
1420 * However, there is a situation where the host drains the queue fast enough
1421 * that every event causes an interrupt. Typically this is not a problem as
1422 * the rate of events would be low. However, that is not the case with
1423 * lprnet for example. On an Intel Xeon D-2191 where we run 8 instances of
1424 * lprnet, the host receives roughly 80k interrupts per second from the device
1425 * (per /proc/interrupts). While NAPI documentation indicates the host should
1426 * just chug along, sadly that behavior causes instability in some hosts.
1428 * Therefore, we implement an interrupt disable scheme similar to NAPI. The
1429 * key difference is that we will delay after draining the queue for a small
1430 * time to allow additional events to come in via polling. Using the above
1431 * lprnet workload, this reduces the number of interrupts processed from
1432 * ~80k/sec to about 64 in 5 minutes and appears to solve the system
1435 irqreturn_t
dbc_irq_handler(int irq
, void *data
)
1437 struct dma_bridge_chan
*dbc
= data
;
1442 rcu_id
= srcu_read_lock(&dbc
->ch_lock
);
1444 if (datapath_polling
) {
1445 srcu_read_unlock(&dbc
->ch_lock
, rcu_id
);
1447 * Normally datapath_polling will not have irqs enabled, but
1448 * when running with only one MSI the interrupt is shared with
1449 * MHI so it cannot be disabled. Return ASAP instead.
1455 srcu_read_unlock(&dbc
->ch_lock
, rcu_id
);
1459 head
= readl(dbc
->dbc_base
+ RSPHP_OFF
);
1460 if (head
== U32_MAX
) { /* PCI link error */
1461 srcu_read_unlock(&dbc
->ch_lock
, rcu_id
);
1465 tail
= readl(dbc
->dbc_base
+ RSPTP_OFF
);
1466 if (tail
== U32_MAX
) { /* PCI link error */
1467 srcu_read_unlock(&dbc
->ch_lock
, rcu_id
);
1471 if (head
== tail
) { /* queue empty */
1472 srcu_read_unlock(&dbc
->ch_lock
, rcu_id
);
1476 if (!dbc
->qdev
->single_msi
)
1477 disable_irq_nosync(irq
);
1478 srcu_read_unlock(&dbc
->ch_lock
, rcu_id
);
1479 return IRQ_WAKE_THREAD
;
1482 void irq_polling_work(struct work_struct
*work
)
1484 struct dma_bridge_chan
*dbc
= container_of(work
, struct dma_bridge_chan
, poll_work
);
1485 unsigned long flags
;
1490 rcu_id
= srcu_read_lock(&dbc
->ch_lock
);
1493 if (dbc
->qdev
->dev_state
!= QAIC_ONLINE
) {
1494 srcu_read_unlock(&dbc
->ch_lock
, rcu_id
);
1498 srcu_read_unlock(&dbc
->ch_lock
, rcu_id
);
1501 spin_lock_irqsave(&dbc
->xfer_lock
, flags
);
1502 if (list_empty(&dbc
->xfer_list
)) {
1503 spin_unlock_irqrestore(&dbc
->xfer_lock
, flags
);
1504 srcu_read_unlock(&dbc
->ch_lock
, rcu_id
);
1507 spin_unlock_irqrestore(&dbc
->xfer_lock
, flags
);
1509 head
= readl(dbc
->dbc_base
+ RSPHP_OFF
);
1510 if (head
== U32_MAX
) { /* PCI link error */
1511 srcu_read_unlock(&dbc
->ch_lock
, rcu_id
);
1515 tail
= readl(dbc
->dbc_base
+ RSPTP_OFF
);
1516 if (tail
== U32_MAX
) { /* PCI link error */
1517 srcu_read_unlock(&dbc
->ch_lock
, rcu_id
);
1522 irq_wake_thread(dbc
->irq
, dbc
);
1523 srcu_read_unlock(&dbc
->ch_lock
, rcu_id
);
1528 usleep_range(datapath_poll_interval_us
, 2 * datapath_poll_interval_us
);
1532 irqreturn_t
dbc_irq_threaded_fn(int irq
, void *data
)
1534 struct dma_bridge_chan
*dbc
= data
;
1535 int event_count
= NUM_EVENTS
;
1536 int delay_count
= NUM_DELAYS
;
1537 struct qaic_device
*qdev
;
1538 struct qaic_bo
*bo
, *i
;
1539 struct dbc_rsp
*rsp
;
1540 unsigned long flags
;
1547 rcu_id
= srcu_read_lock(&dbc
->ch_lock
);
1550 head
= readl(dbc
->dbc_base
+ RSPHP_OFF
);
1551 if (head
== U32_MAX
) /* PCI link error */
1557 event_count
= NUM_EVENTS
;
1562 * if this channel isn't assigned or gets unassigned during processing
1563 * we have nothing further to do
1568 tail
= readl(dbc
->dbc_base
+ RSPTP_OFF
);
1569 if (tail
== U32_MAX
) /* PCI link error */
1572 if (head
== tail
) { /* queue empty */
1575 usleep_range(100, 200);
1576 goto read_fifo
; /* check for a new event */
1581 delay_count
= NUM_DELAYS
;
1582 while (head
!= tail
) {
1586 rsp
= dbc
->rsp_q_base
+ head
* sizeof(*rsp
);
1587 req_id
= le16_to_cpu(rsp
->req_id
);
1588 status
= le16_to_cpu(rsp
->status
);
1590 pci_dbg(qdev
->pdev
, "req_id %d failed with status %d\n", req_id
, status
);
1591 spin_lock_irqsave(&dbc
->xfer_lock
, flags
);
1593 * A BO can receive multiple interrupts, since a BO can be
1594 * divided into multiple slices and a buffer receives as many
1595 * interrupts as slices. So until it receives interrupts for
1596 * all the slices we cannot mark that buffer complete.
1598 list_for_each_entry_safe(bo
, i
, &dbc
->xfer_list
, xfer_list
) {
1599 if (bo
->req_id
== req_id
)
1600 bo
->nr_slice_xfer_done
++;
1604 if (bo
->nr_slice_xfer_done
< bo
->nr_slice
)
1608 * At this point we have received all the interrupts for
1609 * BO, which means BO execution is complete.
1611 dma_sync_sgtable_for_cpu(&qdev
->pdev
->dev
, bo
->sgt
, bo
->dir
);
1612 bo
->nr_slice_xfer_done
= 0;
1613 list_del_init(&bo
->xfer_list
);
1614 bo
->perf_stats
.req_processed_ts
= ktime_get_ns();
1615 complete_all(&bo
->xfer_done
);
1616 drm_gem_object_put(&bo
->base
);
1619 spin_unlock_irqrestore(&dbc
->xfer_lock
, flags
);
1620 head
= (head
+ 1) % dbc
->nelem
;
1624 * Update the head pointer of response queue and let the device know
1625 * that we have consumed elements from the queue.
1627 writel(head
, dbc
->dbc_base
+ RSPHP_OFF
);
1629 /* elements might have been put in the queue while we were processing */
1633 if (!qdev
->single_msi
&& likely(!datapath_polling
))
1635 else if (unlikely(datapath_polling
))
1636 schedule_work(&dbc
->poll_work
);
1637 /* checking the fifo and enabling irqs is a race, missed event check */
1638 tail
= readl(dbc
->dbc_base
+ RSPTP_OFF
);
1639 if (tail
!= U32_MAX
&& head
!= tail
) {
1640 if (!qdev
->single_msi
&& likely(!datapath_polling
))
1641 disable_irq_nosync(irq
);
1644 srcu_read_unlock(&dbc
->ch_lock
, rcu_id
);
1648 srcu_read_unlock(&dbc
->ch_lock
, rcu_id
);
1649 if (!qdev
->single_msi
&& likely(!datapath_polling
))
1651 else if (unlikely(datapath_polling
))
1652 schedule_work(&dbc
->poll_work
);
1657 int qaic_wait_bo_ioctl(struct drm_device
*dev
, void *data
, struct drm_file
*file_priv
)
1659 struct qaic_wait
*args
= data
;
1660 int usr_rcu_id
, qdev_rcu_id
;
1661 struct dma_bridge_chan
*dbc
;
1662 struct drm_gem_object
*obj
;
1663 struct qaic_device
*qdev
;
1664 unsigned long timeout
;
1665 struct qaic_user
*usr
;
1673 usr
= file_priv
->driver_priv
;
1674 usr_rcu_id
= srcu_read_lock(&usr
->qddev_lock
);
1677 goto unlock_usr_srcu
;
1680 qdev
= usr
->qddev
->qdev
;
1681 qdev_rcu_id
= srcu_read_lock(&qdev
->dev_lock
);
1682 if (qdev
->dev_state
!= QAIC_ONLINE
) {
1684 goto unlock_dev_srcu
;
1687 if (args
->dbc_id
>= qdev
->num_dbc
) {
1689 goto unlock_dev_srcu
;
1692 dbc
= &qdev
->dbc
[args
->dbc_id
];
1694 rcu_id
= srcu_read_lock(&dbc
->ch_lock
);
1695 if (dbc
->usr
!= usr
) {
1697 goto unlock_ch_srcu
;
1700 obj
= drm_gem_object_lookup(file_priv
, args
->handle
);
1703 goto unlock_ch_srcu
;
1706 bo
= to_qaic_bo(obj
);
1707 timeout
= args
->timeout
? args
->timeout
: wait_exec_default_timeout_ms
;
1708 timeout
= msecs_to_jiffies(timeout
);
1709 ret
= wait_for_completion_interruptible_timeout(&bo
->xfer_done
, timeout
);
1721 drm_gem_object_put(obj
);
1723 srcu_read_unlock(&dbc
->ch_lock
, rcu_id
);
1725 srcu_read_unlock(&qdev
->dev_lock
, qdev_rcu_id
);
1727 srcu_read_unlock(&usr
->qddev_lock
, usr_rcu_id
);
1731 int qaic_perf_stats_bo_ioctl(struct drm_device
*dev
, void *data
, struct drm_file
*file_priv
)
1733 struct qaic_perf_stats_entry
*ent
= NULL
;
1734 struct qaic_perf_stats
*args
= data
;
1735 int usr_rcu_id
, qdev_rcu_id
;
1736 struct drm_gem_object
*obj
;
1737 struct qaic_device
*qdev
;
1738 struct qaic_user
*usr
;
1742 usr
= file_priv
->driver_priv
;
1743 usr_rcu_id
= srcu_read_lock(&usr
->qddev_lock
);
1746 goto unlock_usr_srcu
;
1749 qdev
= usr
->qddev
->qdev
;
1750 qdev_rcu_id
= srcu_read_lock(&qdev
->dev_lock
);
1751 if (qdev
->dev_state
!= QAIC_ONLINE
) {
1753 goto unlock_dev_srcu
;
1756 if (args
->hdr
.dbc_id
>= qdev
->num_dbc
) {
1758 goto unlock_dev_srcu
;
1761 ent
= kcalloc(args
->hdr
.count
, sizeof(*ent
), GFP_KERNEL
);
1764 goto unlock_dev_srcu
;
1767 ret
= copy_from_user(ent
, u64_to_user_ptr(args
->data
), args
->hdr
.count
* sizeof(*ent
));
1773 for (i
= 0; i
< args
->hdr
.count
; i
++) {
1774 obj
= drm_gem_object_lookup(file_priv
, ent
[i
].handle
);
1779 bo
= to_qaic_bo(obj
);
1781 * perf stats ioctl is called before wait ioctl is complete then
1782 * the latency information is invalid.
1784 if (bo
->perf_stats
.req_processed_ts
< bo
->perf_stats
.req_submit_ts
) {
1785 ent
[i
].device_latency_us
= 0;
1787 ent
[i
].device_latency_us
= div_u64((bo
->perf_stats
.req_processed_ts
-
1788 bo
->perf_stats
.req_submit_ts
), 1000);
1790 ent
[i
].submit_latency_us
= div_u64((bo
->perf_stats
.req_submit_ts
-
1791 bo
->perf_stats
.req_received_ts
), 1000);
1792 ent
[i
].queue_level_before
= bo
->perf_stats
.queue_level_before
;
1793 ent
[i
].num_queue_element
= bo
->total_slice_nents
;
1794 drm_gem_object_put(obj
);
1797 if (copy_to_user(u64_to_user_ptr(args
->data
), ent
, args
->hdr
.count
* sizeof(*ent
)))
1803 srcu_read_unlock(&qdev
->dev_lock
, qdev_rcu_id
);
1805 srcu_read_unlock(&usr
->qddev_lock
, usr_rcu_id
);
1809 static void detach_slice_bo(struct qaic_device
*qdev
, struct qaic_bo
*bo
)
1811 qaic_free_slices_bo(bo
);
1812 qaic_unprepare_bo(qdev
, bo
);
1813 qaic_init_bo(bo
, true);
1814 list_del(&bo
->bo_list
);
1815 drm_gem_object_put(&bo
->base
);
1818 int qaic_detach_slice_bo_ioctl(struct drm_device
*dev
, void *data
, struct drm_file
*file_priv
)
1820 struct qaic_detach_slice
*args
= data
;
1821 int rcu_id
, usr_rcu_id
, qdev_rcu_id
;
1822 struct dma_bridge_chan
*dbc
;
1823 struct drm_gem_object
*obj
;
1824 struct qaic_device
*qdev
;
1825 struct qaic_user
*usr
;
1826 unsigned long flags
;
1833 usr
= file_priv
->driver_priv
;
1834 usr_rcu_id
= srcu_read_lock(&usr
->qddev_lock
);
1837 goto unlock_usr_srcu
;
1840 qdev
= usr
->qddev
->qdev
;
1841 qdev_rcu_id
= srcu_read_lock(&qdev
->dev_lock
);
1842 if (qdev
->dev_state
!= QAIC_ONLINE
) {
1844 goto unlock_dev_srcu
;
1847 obj
= drm_gem_object_lookup(file_priv
, args
->handle
);
1850 goto unlock_dev_srcu
;
1853 bo
= to_qaic_bo(obj
);
1854 ret
= mutex_lock_interruptible(&bo
->lock
);
1864 rcu_id
= srcu_read_lock(&dbc
->ch_lock
);
1865 if (dbc
->usr
!= usr
) {
1867 goto unlock_ch_srcu
;
1870 /* Check if BO is committed to H/W for DMA */
1871 spin_lock_irqsave(&dbc
->xfer_lock
, flags
);
1872 if (bo_queued(bo
)) {
1873 spin_unlock_irqrestore(&dbc
->xfer_lock
, flags
);
1875 goto unlock_ch_srcu
;
1877 spin_unlock_irqrestore(&dbc
->xfer_lock
, flags
);
1879 detach_slice_bo(qdev
, bo
);
1882 srcu_read_unlock(&dbc
->ch_lock
, rcu_id
);
1884 mutex_unlock(&bo
->lock
);
1886 drm_gem_object_put(obj
);
1888 srcu_read_unlock(&qdev
->dev_lock
, qdev_rcu_id
);
1890 srcu_read_unlock(&usr
->qddev_lock
, usr_rcu_id
);
1894 static void empty_xfer_list(struct qaic_device
*qdev
, struct dma_bridge_chan
*dbc
)
1896 unsigned long flags
;
1899 spin_lock_irqsave(&dbc
->xfer_lock
, flags
);
1900 while (!list_empty(&dbc
->xfer_list
)) {
1901 bo
= list_first_entry(&dbc
->xfer_list
, typeof(*bo
), xfer_list
);
1902 list_del_init(&bo
->xfer_list
);
1903 spin_unlock_irqrestore(&dbc
->xfer_lock
, flags
);
1904 bo
->nr_slice_xfer_done
= 0;
1906 bo
->perf_stats
.req_received_ts
= 0;
1907 bo
->perf_stats
.req_submit_ts
= 0;
1908 bo
->perf_stats
.req_processed_ts
= 0;
1909 bo
->perf_stats
.queue_level_before
= 0;
1910 dma_sync_sgtable_for_cpu(&qdev
->pdev
->dev
, bo
->sgt
, bo
->dir
);
1911 complete_all(&bo
->xfer_done
);
1912 drm_gem_object_put(&bo
->base
);
1913 spin_lock_irqsave(&dbc
->xfer_lock
, flags
);
1915 spin_unlock_irqrestore(&dbc
->xfer_lock
, flags
);
1918 int disable_dbc(struct qaic_device
*qdev
, u32 dbc_id
, struct qaic_user
*usr
)
1920 if (!qdev
->dbc
[dbc_id
].usr
|| qdev
->dbc
[dbc_id
].usr
->handle
!= usr
->handle
)
1923 qdev
->dbc
[dbc_id
].usr
= NULL
;
1924 synchronize_srcu(&qdev
->dbc
[dbc_id
].ch_lock
);
1929 * enable_dbc - Enable the DBC. DBCs are disabled by removing the context of
1930 * user. Add user context back to DBC to enable it. This function trusts the
1931 * DBC ID passed and expects the DBC to be disabled.
1932 * @qdev: Qranium device handle
1933 * @dbc_id: ID of the DBC
1934 * @usr: User context
1936 void enable_dbc(struct qaic_device
*qdev
, u32 dbc_id
, struct qaic_user
*usr
)
1938 qdev
->dbc
[dbc_id
].usr
= usr
;
1941 void wakeup_dbc(struct qaic_device
*qdev
, u32 dbc_id
)
1943 struct dma_bridge_chan
*dbc
= &qdev
->dbc
[dbc_id
];
1946 empty_xfer_list(qdev
, dbc
);
1947 synchronize_srcu(&dbc
->ch_lock
);
1949 * Threads holding channel lock, may add more elements in the xfer_list.
1950 * Flush out these elements from xfer_list.
1952 empty_xfer_list(qdev
, dbc
);
1955 void release_dbc(struct qaic_device
*qdev
, u32 dbc_id
)
1957 struct qaic_bo
*bo
, *bo_temp
;
1958 struct dma_bridge_chan
*dbc
;
1960 dbc
= &qdev
->dbc
[dbc_id
];
1964 wakeup_dbc(qdev
, dbc_id
);
1966 dma_free_coherent(&qdev
->pdev
->dev
, dbc
->total_size
, dbc
->req_q_base
, dbc
->dma_addr
);
1967 dbc
->total_size
= 0;
1968 dbc
->req_q_base
= NULL
;
1973 list_for_each_entry_safe(bo
, bo_temp
, &dbc
->bo_lists
, bo_list
) {
1974 drm_gem_object_get(&bo
->base
);
1975 mutex_lock(&bo
->lock
);
1976 detach_slice_bo(qdev
, bo
);
1977 mutex_unlock(&bo
->lock
);
1978 drm_gem_object_put(&bo
->base
);
1981 dbc
->in_use
= false;
1982 wake_up(&dbc
->dbc_release
);
1985 void qaic_data_get_fifo_info(struct dma_bridge_chan
*dbc
, u32
*head
, u32
*tail
)
1987 if (!dbc
|| !head
|| !tail
)
1990 *head
= readl(dbc
->dbc_base
+ REQHP_OFF
);
1991 *tail
= readl(dbc
->dbc_base
+ REQTP_OFF
);