1 // SPDX-License-Identifier: GPL-2.0-only
5 * Copyright (c) 2010-2015, NVIDIA Corporation.
8 #include <linux/dma-mapping.h>
10 #include <linux/host1x.h>
11 #include <linux/iommu.h>
12 #include <linux/kref.h>
13 #include <linux/module.h>
14 #include <linux/scatterlist.h>
15 #include <linux/slab.h>
16 #include <linux/vmalloc.h>
17 #include <trace/events/host1x.h>
24 #define HOST1X_WAIT_SYNCPT_OFFSET 0x8
26 struct host1x_job
*host1x_job_alloc(struct host1x_channel
*ch
,
27 u32 num_cmdbufs
, u32 num_relocs
,
30 struct host1x_job
*job
= NULL
;
31 unsigned int num_unpins
= num_relocs
;
36 enable_firewall
= IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL
) && !skip_firewall
;
39 num_unpins
+= num_cmdbufs
;
41 /* Check that we're not going to overflow */
42 total
= sizeof(struct host1x_job
) +
43 (u64
)num_relocs
* sizeof(struct host1x_reloc
) +
44 (u64
)num_unpins
* sizeof(struct host1x_job_unpin_data
) +
45 (u64
)num_cmdbufs
* sizeof(struct host1x_job_cmd
) +
46 (u64
)num_unpins
* sizeof(dma_addr_t
) +
47 (u64
)num_unpins
* sizeof(u32
*);
48 if (total
> ULONG_MAX
)
51 mem
= job
= kzalloc(total
, GFP_KERNEL
);
55 job
->enable_firewall
= enable_firewall
;
60 /* Redistribute memory to the structs */
61 mem
+= sizeof(struct host1x_job
);
62 job
->relocs
= num_relocs
? mem
: NULL
;
63 mem
+= num_relocs
* sizeof(struct host1x_reloc
);
64 job
->unpins
= num_unpins
? mem
: NULL
;
65 mem
+= num_unpins
* sizeof(struct host1x_job_unpin_data
);
66 job
->cmds
= num_cmdbufs
? mem
: NULL
;
67 mem
+= num_cmdbufs
* sizeof(struct host1x_job_cmd
);
68 job
->addr_phys
= num_unpins
? mem
: NULL
;
70 job
->reloc_addr_phys
= job
->addr_phys
;
71 job
->gather_addr_phys
= &job
->addr_phys
[num_relocs
];
75 EXPORT_SYMBOL(host1x_job_alloc
);
77 struct host1x_job
*host1x_job_get(struct host1x_job
*job
)
82 EXPORT_SYMBOL(host1x_job_get
);
84 static void job_free(struct kref
*ref
)
86 struct host1x_job
*job
= container_of(ref
, struct host1x_job
, ref
);
93 * remove_callback is atomic w.r.t. fence signaling, so
94 * after the call returns, we know that the callback is not
95 * in execution, and the fence can be safely freed.
97 dma_fence_remove_callback(job
->fence
, &job
->fence_cb
);
98 dma_fence_put(job
->fence
);
102 host1x_syncpt_put(job
->syncpt
);
107 void host1x_job_put(struct host1x_job
*job
)
109 kref_put(&job
->ref
, job_free
);
111 EXPORT_SYMBOL(host1x_job_put
);
113 void host1x_job_add_gather(struct host1x_job
*job
, struct host1x_bo
*bo
,
114 unsigned int words
, unsigned int offset
)
116 struct host1x_job_gather
*gather
= &job
->cmds
[job
->num_cmds
].gather
;
118 gather
->words
= words
;
120 gather
->offset
= offset
;
124 EXPORT_SYMBOL(host1x_job_add_gather
);
126 void host1x_job_add_wait(struct host1x_job
*job
, u32 id
, u32 thresh
,
127 bool relative
, u32 next_class
)
129 struct host1x_job_cmd
*cmd
= &job
->cmds
[job
->num_cmds
];
133 cmd
->wait
.threshold
= thresh
;
134 cmd
->wait
.next_class
= next_class
;
135 cmd
->wait
.relative
= relative
;
139 EXPORT_SYMBOL(host1x_job_add_wait
);
141 static unsigned int pin_job(struct host1x
*host
, struct host1x_job
*job
)
143 unsigned long mask
= HOST1X_RELOC_READ
| HOST1X_RELOC_WRITE
;
144 struct host1x_client
*client
= job
->client
;
145 struct device
*dev
= client
->dev
;
146 struct host1x_job_gather
*g
;
152 for (i
= 0; i
< job
->num_relocs
; i
++) {
153 struct host1x_reloc
*reloc
= &job
->relocs
[i
];
154 enum dma_data_direction direction
;
155 struct host1x_bo_mapping
*map
;
156 struct host1x_bo
*bo
;
158 reloc
->target
.bo
= host1x_bo_get(reloc
->target
.bo
);
159 if (!reloc
->target
.bo
) {
164 bo
= reloc
->target
.bo
;
166 switch (reloc
->flags
& mask
) {
167 case HOST1X_RELOC_READ
:
168 direction
= DMA_TO_DEVICE
;
171 case HOST1X_RELOC_WRITE
:
172 direction
= DMA_FROM_DEVICE
;
175 case HOST1X_RELOC_READ
| HOST1X_RELOC_WRITE
:
176 direction
= DMA_BIDIRECTIONAL
;
184 map
= host1x_bo_pin(dev
, bo
, direction
, NULL
);
191 * host1x clients are generally not able to do scatter-gather themselves, so fail
192 * if the buffer is discontiguous and we fail to map its SG table to a single
193 * contiguous chunk of I/O virtual memory.
195 if (map
->chunks
> 1) {
200 job
->addr_phys
[job
->num_unpins
] = map
->phys
;
201 job
->unpins
[job
->num_unpins
].map
= map
;
206 * We will copy gathers BO content later, so there is no need to
209 if (job
->enable_firewall
)
212 for (i
= 0; i
< job
->num_cmds
; i
++) {
213 struct host1x_bo_mapping
*map
;
214 size_t gather_size
= 0;
215 struct scatterlist
*sg
;
220 if (job
->cmds
[i
].is_wait
)
223 g
= &job
->cmds
[i
].gather
;
225 g
->bo
= host1x_bo_get(g
->bo
);
231 map
= host1x_bo_pin(host
->dev
, g
->bo
, DMA_TO_DEVICE
, NULL
);
238 for_each_sgtable_sg(map
->sgt
, sg
, j
)
239 gather_size
+= sg
->length
;
241 gather_size
= iova_align(&host
->iova
, gather_size
);
243 shift
= iova_shift(&host
->iova
);
244 alloc
= alloc_iova(&host
->iova
, gather_size
>> shift
,
245 host
->iova_end
>> shift
, true);
251 err
= iommu_map_sgtable(host
->domain
, iova_dma_addr(&host
->iova
, alloc
),
252 map
->sgt
, IOMMU_READ
);
254 __free_iova(&host
->iova
, alloc
);
259 map
->phys
= iova_dma_addr(&host
->iova
, alloc
);
260 map
->size
= gather_size
;
263 job
->addr_phys
[job
->num_unpins
] = map
->phys
;
264 job
->unpins
[job
->num_unpins
].map
= map
;
267 job
->gather_addr_phys
[i
] = map
->phys
;
273 host1x_bo_put(g
->bo
);
275 host1x_job_unpin(job
);
279 static int do_relocs(struct host1x_job
*job
, struct host1x_job_gather
*g
)
281 void *cmdbuf_addr
= NULL
;
282 struct host1x_bo
*cmdbuf
= g
->bo
;
285 /* pin & patch the relocs for one gather */
286 for (i
= 0; i
< job
->num_relocs
; i
++) {
287 struct host1x_reloc
*reloc
= &job
->relocs
[i
];
288 u32 reloc_addr
= (job
->reloc_addr_phys
[i
] +
289 reloc
->target
.offset
) >> reloc
->shift
;
292 /* skip all other gathers */
293 if (cmdbuf
!= reloc
->cmdbuf
.bo
)
296 if (job
->enable_firewall
) {
297 target
= (u32
*)job
->gather_copy_mapped
+
298 reloc
->cmdbuf
.offset
/ sizeof(u32
) +
299 g
->offset
/ sizeof(u32
);
304 cmdbuf_addr
= host1x_bo_mmap(cmdbuf
);
306 if (unlikely(!cmdbuf_addr
)) {
307 pr_err("Could not map cmdbuf for relocation\n");
312 target
= cmdbuf_addr
+ reloc
->cmdbuf
.offset
;
314 *target
= reloc_addr
;
318 host1x_bo_munmap(cmdbuf
, cmdbuf_addr
);
323 static bool check_reloc(struct host1x_reloc
*reloc
, struct host1x_bo
*cmdbuf
,
326 offset
*= sizeof(u32
);
328 if (reloc
->cmdbuf
.bo
!= cmdbuf
|| reloc
->cmdbuf
.offset
!= offset
)
331 /* relocation shift value validation isn't implemented yet */
338 struct host1x_firewall
{
339 struct host1x_job
*job
;
342 unsigned int num_relocs
;
343 struct host1x_reloc
*reloc
;
345 struct host1x_bo
*cmdbuf
;
355 static int check_register(struct host1x_firewall
*fw
, unsigned long offset
)
357 if (!fw
->job
->is_addr_reg
)
360 if (fw
->job
->is_addr_reg(fw
->dev
, fw
->class, offset
)) {
364 if (!check_reloc(fw
->reloc
, fw
->cmdbuf
, fw
->offset
))
374 static int check_class(struct host1x_firewall
*fw
, u32
class)
376 if (!fw
->job
->is_valid_class
) {
377 if (fw
->class != class)
380 if (!fw
->job
->is_valid_class(fw
->class))
387 static int check_mask(struct host1x_firewall
*fw
)
398 ret
= check_register(fw
, reg
);
412 static int check_incr(struct host1x_firewall
*fw
)
414 u32 count
= fw
->count
;
422 ret
= check_register(fw
, reg
);
435 static int check_nonincr(struct host1x_firewall
*fw
)
437 u32 count
= fw
->count
;
444 ret
= check_register(fw
, fw
->reg
);
456 static int validate(struct host1x_firewall
*fw
, struct host1x_job_gather
*g
)
458 u32
*cmdbuf_base
= (u32
*)fw
->job
->gather_copy_mapped
+
459 (g
->offset
/ sizeof(u32
));
460 u32 job_class
= fw
->class;
463 fw
->words
= g
->words
;
467 while (fw
->words
&& !err
) {
468 u32 word
= cmdbuf_base
[fw
->offset
];
469 u32 opcode
= (word
& 0xf0000000) >> 28;
479 fw
->class = word
>> 6 & 0x3ff;
480 fw
->mask
= word
& 0x3f;
481 fw
->reg
= word
>> 16 & 0xfff;
482 err
= check_class(fw
, job_class
);
484 err
= check_mask(fw
);
489 fw
->reg
= word
>> 16 & 0xfff;
490 fw
->count
= word
& 0xffff;
491 err
= check_incr(fw
);
497 fw
->reg
= word
>> 16 & 0xfff;
498 fw
->count
= word
& 0xffff;
499 err
= check_nonincr(fw
);
505 fw
->mask
= word
& 0xffff;
506 fw
->reg
= word
>> 16 & 0xfff;
507 err
= check_mask(fw
);
524 static inline int copy_gathers(struct device
*host
, struct host1x_job
*job
,
527 struct host1x_firewall fw
;
534 fw
.reloc
= job
->relocs
;
535 fw
.num_relocs
= job
->num_relocs
;
536 fw
.class = job
->class;
538 for (i
= 0; i
< job
->num_cmds
; i
++) {
539 struct host1x_job_gather
*g
;
541 if (job
->cmds
[i
].is_wait
)
544 g
= &job
->cmds
[i
].gather
;
546 size
+= g
->words
* sizeof(u32
);
550 * Try a non-blocking allocation from a higher priority pools first,
551 * as awaiting for the allocation here is a major performance hit.
553 job
->gather_copy_mapped
= dma_alloc_wc(host
, size
, &job
->gather_copy
,
556 /* the higher priority allocation failed, try the generic-blocking */
557 if (!job
->gather_copy_mapped
)
558 job
->gather_copy_mapped
= dma_alloc_wc(host
, size
,
561 if (!job
->gather_copy_mapped
)
564 job
->gather_copy_size
= size
;
566 for (i
= 0; i
< job
->num_cmds
; i
++) {
567 struct host1x_job_gather
*g
;
570 if (job
->cmds
[i
].is_wait
)
572 g
= &job
->cmds
[i
].gather
;
574 /* Copy the gather */
575 gather
= host1x_bo_mmap(g
->bo
);
576 memcpy(job
->gather_copy_mapped
+ offset
, gather
+ g
->offset
,
577 g
->words
* sizeof(u32
));
578 host1x_bo_munmap(g
->bo
, gather
);
580 /* Store the location in the buffer */
581 g
->base
= job
->gather_copy
;
584 /* Validate the job */
585 if (validate(&fw
, g
))
588 offset
+= g
->words
* sizeof(u32
);
591 /* No relocs should remain at this point */
598 int host1x_job_pin(struct host1x_job
*job
, struct device
*dev
)
602 struct host1x
*host
= dev_get_drvdata(dev
->parent
);
605 err
= pin_job(host
, job
);
609 if (job
->enable_firewall
) {
610 err
= copy_gathers(host
->dev
, job
, dev
);
616 for (i
= 0; i
< job
->num_cmds
; i
++) {
617 struct host1x_job_gather
*g
;
619 if (job
->cmds
[i
].is_wait
)
621 g
= &job
->cmds
[i
].gather
;
623 /* process each gather mem only once */
627 /* copy_gathers() sets gathers base if firewall is enabled */
628 if (!job
->enable_firewall
)
629 g
->base
= job
->gather_addr_phys
[i
];
631 for (j
= i
+ 1; j
< job
->num_cmds
; j
++) {
632 if (!job
->cmds
[j
].is_wait
&&
633 job
->cmds
[j
].gather
.bo
== g
->bo
) {
634 job
->cmds
[j
].gather
.handled
= true;
635 job
->cmds
[j
].gather
.base
= g
->base
;
639 err
= do_relocs(job
, g
);
646 host1x_job_unpin(job
);
651 EXPORT_SYMBOL(host1x_job_pin
);
653 void host1x_job_unpin(struct host1x_job
*job
)
655 struct host1x
*host
= dev_get_drvdata(job
->channel
->dev
->parent
);
658 for (i
= 0; i
< job
->num_unpins
; i
++) {
659 struct host1x_bo_mapping
*map
= job
->unpins
[i
].map
;
660 struct host1x_bo
*bo
= map
->bo
;
662 if (!job
->enable_firewall
&& map
->size
&& host
->domain
) {
663 iommu_unmap(host
->domain
, job
->addr_phys
[i
], map
->size
);
664 free_iova(&host
->iova
, iova_pfn(&host
->iova
, job
->addr_phys
[i
]));
667 host1x_bo_unpin(map
);
673 if (job
->gather_copy_size
)
674 dma_free_wc(host
->dev
, job
->gather_copy_size
,
675 job
->gather_copy_mapped
, job
->gather_copy
);
677 EXPORT_SYMBOL(host1x_job_unpin
);
680 * Debug routine used to dump job entries
682 void host1x_job_dump(struct device
*dev
, struct host1x_job
*job
)
684 dev_dbg(dev
, " SYNCPT_ID %d\n", job
->syncpt
->id
);
685 dev_dbg(dev
, " SYNCPT_VAL %d\n", job
->syncpt_end
);
686 dev_dbg(dev
, " FIRST_GET 0x%x\n", job
->first_get
);
687 dev_dbg(dev
, " TIMEOUT %d\n", job
->timeout
);
688 dev_dbg(dev
, " NUM_SLOTS %d\n", job
->num_slots
);
689 dev_dbg(dev
, " NUM_HANDLES %d\n", job
->num_unpins
);