1 // SPDX-License-Identifier: GPL-2.0-only
5 * Copyright (c) 2010-2015, NVIDIA Corporation.
8 #include <linux/dma-mapping.h>
10 #include <linux/host1x.h>
11 #include <linux/iommu.h>
12 #include <linux/kref.h>
13 #include <linux/module.h>
14 #include <linux/scatterlist.h>
15 #include <linux/slab.h>
16 #include <linux/vmalloc.h>
17 #include <trace/events/host1x.h>
24 #define HOST1X_WAIT_SYNCPT_OFFSET 0x8
26 struct host1x_job
*host1x_job_alloc(struct host1x_channel
*ch
,
27 u32 num_cmdbufs
, u32 num_relocs
)
29 struct host1x_job
*job
= NULL
;
30 unsigned int num_unpins
= num_relocs
;
34 if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL
))
35 num_unpins
+= num_cmdbufs
;
37 /* Check that we're not going to overflow */
38 total
= sizeof(struct host1x_job
) +
39 (u64
)num_relocs
* sizeof(struct host1x_reloc
) +
40 (u64
)num_unpins
* sizeof(struct host1x_job_unpin_data
) +
41 (u64
)num_cmdbufs
* sizeof(struct host1x_job_gather
) +
42 (u64
)num_unpins
* sizeof(dma_addr_t
) +
43 (u64
)num_unpins
* sizeof(u32
*);
44 if (total
> ULONG_MAX
)
47 mem
= job
= kzalloc(total
, GFP_KERNEL
);
54 /* Redistribute memory to the structs */
55 mem
+= sizeof(struct host1x_job
);
56 job
->relocs
= num_relocs
? mem
: NULL
;
57 mem
+= num_relocs
* sizeof(struct host1x_reloc
);
58 job
->unpins
= num_unpins
? mem
: NULL
;
59 mem
+= num_unpins
* sizeof(struct host1x_job_unpin_data
);
60 job
->gathers
= num_cmdbufs
? mem
: NULL
;
61 mem
+= num_cmdbufs
* sizeof(struct host1x_job_gather
);
62 job
->addr_phys
= num_unpins
? mem
: NULL
;
64 job
->reloc_addr_phys
= job
->addr_phys
;
65 job
->gather_addr_phys
= &job
->addr_phys
[num_relocs
];
69 EXPORT_SYMBOL(host1x_job_alloc
);
71 struct host1x_job
*host1x_job_get(struct host1x_job
*job
)
76 EXPORT_SYMBOL(host1x_job_get
);
78 static void job_free(struct kref
*ref
)
80 struct host1x_job
*job
= container_of(ref
, struct host1x_job
, ref
);
85 void host1x_job_put(struct host1x_job
*job
)
87 kref_put(&job
->ref
, job_free
);
89 EXPORT_SYMBOL(host1x_job_put
);
91 void host1x_job_add_gather(struct host1x_job
*job
, struct host1x_bo
*bo
,
92 unsigned int words
, unsigned int offset
)
94 struct host1x_job_gather
*gather
= &job
->gathers
[job
->num_gathers
];
96 gather
->words
= words
;
98 gather
->offset
= offset
;
102 EXPORT_SYMBOL(host1x_job_add_gather
);
104 static unsigned int pin_job(struct host1x
*host
, struct host1x_job
*job
)
106 struct host1x_client
*client
= job
->client
;
107 struct device
*dev
= client
->dev
;
108 struct host1x_job_gather
*g
;
109 struct iommu_domain
*domain
;
113 domain
= iommu_get_domain_for_dev(dev
);
116 for (i
= 0; i
< job
->num_relocs
; i
++) {
117 struct host1x_reloc
*reloc
= &job
->relocs
[i
];
118 dma_addr_t phys_addr
, *phys
;
119 struct sg_table
*sgt
;
121 reloc
->target
.bo
= host1x_bo_get(reloc
->target
.bo
);
122 if (!reloc
->target
.bo
) {
128 * If the client device is not attached to an IOMMU, the
129 * physical address of the buffer object can be used.
131 * Similarly, when an IOMMU domain is shared between all
132 * host1x clients, the IOVA is already available, so no
133 * need to map the buffer object again.
135 * XXX Note that this isn't always safe to do because it
136 * relies on an assumption that no cache maintenance is
137 * needed on the buffer objects.
139 if (!domain
|| client
->group
)
144 sgt
= host1x_bo_pin(dev
, reloc
->target
.bo
, phys
);
151 unsigned long mask
= HOST1X_RELOC_READ
|
153 enum dma_data_direction dir
;
155 switch (reloc
->flags
& mask
) {
156 case HOST1X_RELOC_READ
:
160 case HOST1X_RELOC_WRITE
:
161 dir
= DMA_FROM_DEVICE
;
164 case HOST1X_RELOC_READ
| HOST1X_RELOC_WRITE
:
165 dir
= DMA_BIDIRECTIONAL
;
173 err
= dma_map_sgtable(dev
, sgt
, dir
, 0);
177 job
->unpins
[job
->num_unpins
].dev
= dev
;
178 job
->unpins
[job
->num_unpins
].dir
= dir
;
179 phys_addr
= sg_dma_address(sgt
->sgl
);
182 job
->addr_phys
[job
->num_unpins
] = phys_addr
;
183 job
->unpins
[job
->num_unpins
].bo
= reloc
->target
.bo
;
184 job
->unpins
[job
->num_unpins
].sgt
= sgt
;
189 * We will copy gathers BO content later, so there is no need to
192 if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL
))
195 for (i
= 0; i
< job
->num_gathers
; i
++) {
196 size_t gather_size
= 0;
197 struct scatterlist
*sg
;
198 struct sg_table
*sgt
;
199 dma_addr_t phys_addr
;
205 g
= &job
->gathers
[i
];
206 g
->bo
= host1x_bo_get(g
->bo
);
213 * If the host1x is not attached to an IOMMU, there is no need
214 * to map the buffer object for the host1x, since the physical
215 * address can simply be used.
217 if (!iommu_get_domain_for_dev(host
->dev
))
222 sgt
= host1x_bo_pin(host
->dev
, g
->bo
, phys
);
229 for_each_sgtable_sg(sgt
, sg
, j
)
230 gather_size
+= sg
->length
;
231 gather_size
= iova_align(&host
->iova
, gather_size
);
233 shift
= iova_shift(&host
->iova
);
234 alloc
= alloc_iova(&host
->iova
, gather_size
>> shift
,
235 host
->iova_end
>> shift
, true);
241 err
= iommu_map_sgtable(host
->domain
,
242 iova_dma_addr(&host
->iova
, alloc
),
245 __free_iova(&host
->iova
, alloc
);
250 job
->unpins
[job
->num_unpins
].size
= gather_size
;
251 phys_addr
= iova_dma_addr(&host
->iova
, alloc
);
253 err
= dma_map_sgtable(host
->dev
, sgt
, DMA_TO_DEVICE
, 0);
257 job
->unpins
[job
->num_unpins
].dir
= DMA_TO_DEVICE
;
258 job
->unpins
[job
->num_unpins
].dev
= host
->dev
;
259 phys_addr
= sg_dma_address(sgt
->sgl
);
262 job
->addr_phys
[job
->num_unpins
] = phys_addr
;
263 job
->gather_addr_phys
[i
] = phys_addr
;
265 job
->unpins
[job
->num_unpins
].bo
= g
->bo
;
266 job
->unpins
[job
->num_unpins
].sgt
= sgt
;
273 host1x_bo_put(g
->bo
);
275 host1x_job_unpin(job
);
279 static int do_relocs(struct host1x_job
*job
, struct host1x_job_gather
*g
)
281 void *cmdbuf_addr
= NULL
;
282 struct host1x_bo
*cmdbuf
= g
->bo
;
285 /* pin & patch the relocs for one gather */
286 for (i
= 0; i
< job
->num_relocs
; i
++) {
287 struct host1x_reloc
*reloc
= &job
->relocs
[i
];
288 u32 reloc_addr
= (job
->reloc_addr_phys
[i
] +
289 reloc
->target
.offset
) >> reloc
->shift
;
292 /* skip all other gathers */
293 if (cmdbuf
!= reloc
->cmdbuf
.bo
)
296 if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL
)) {
297 target
= (u32
*)job
->gather_copy_mapped
+
298 reloc
->cmdbuf
.offset
/ sizeof(u32
) +
299 g
->offset
/ sizeof(u32
);
304 cmdbuf_addr
= host1x_bo_mmap(cmdbuf
);
306 if (unlikely(!cmdbuf_addr
)) {
307 pr_err("Could not map cmdbuf for relocation\n");
312 target
= cmdbuf_addr
+ reloc
->cmdbuf
.offset
;
314 *target
= reloc_addr
;
318 host1x_bo_munmap(cmdbuf
, cmdbuf_addr
);
323 static bool check_reloc(struct host1x_reloc
*reloc
, struct host1x_bo
*cmdbuf
,
326 offset
*= sizeof(u32
);
328 if (reloc
->cmdbuf
.bo
!= cmdbuf
|| reloc
->cmdbuf
.offset
!= offset
)
331 /* relocation shift value validation isn't implemented yet */
338 struct host1x_firewall
{
339 struct host1x_job
*job
;
342 unsigned int num_relocs
;
343 struct host1x_reloc
*reloc
;
345 struct host1x_bo
*cmdbuf
;
355 static int check_register(struct host1x_firewall
*fw
, unsigned long offset
)
357 if (!fw
->job
->is_addr_reg
)
360 if (fw
->job
->is_addr_reg(fw
->dev
, fw
->class, offset
)) {
364 if (!check_reloc(fw
->reloc
, fw
->cmdbuf
, fw
->offset
))
374 static int check_class(struct host1x_firewall
*fw
, u32
class)
376 if (!fw
->job
->is_valid_class
) {
377 if (fw
->class != class)
380 if (!fw
->job
->is_valid_class(fw
->class))
387 static int check_mask(struct host1x_firewall
*fw
)
398 ret
= check_register(fw
, reg
);
412 static int check_incr(struct host1x_firewall
*fw
)
414 u32 count
= fw
->count
;
422 ret
= check_register(fw
, reg
);
435 static int check_nonincr(struct host1x_firewall
*fw
)
437 u32 count
= fw
->count
;
444 ret
= check_register(fw
, fw
->reg
);
456 static int validate(struct host1x_firewall
*fw
, struct host1x_job_gather
*g
)
458 u32
*cmdbuf_base
= (u32
*)fw
->job
->gather_copy_mapped
+
459 (g
->offset
/ sizeof(u32
));
460 u32 job_class
= fw
->class;
463 fw
->words
= g
->words
;
467 while (fw
->words
&& !err
) {
468 u32 word
= cmdbuf_base
[fw
->offset
];
469 u32 opcode
= (word
& 0xf0000000) >> 28;
479 fw
->class = word
>> 6 & 0x3ff;
480 fw
->mask
= word
& 0x3f;
481 fw
->reg
= word
>> 16 & 0xfff;
482 err
= check_class(fw
, job_class
);
484 err
= check_mask(fw
);
489 fw
->reg
= word
>> 16 & 0xfff;
490 fw
->count
= word
& 0xffff;
491 err
= check_incr(fw
);
497 fw
->reg
= word
>> 16 & 0xfff;
498 fw
->count
= word
& 0xffff;
499 err
= check_nonincr(fw
);
505 fw
->mask
= word
& 0xffff;
506 fw
->reg
= word
>> 16 & 0xfff;
507 err
= check_mask(fw
);
524 static inline int copy_gathers(struct device
*host
, struct host1x_job
*job
,
527 struct host1x_firewall fw
;
534 fw
.reloc
= job
->relocs
;
535 fw
.num_relocs
= job
->num_relocs
;
536 fw
.class = job
->class;
538 for (i
= 0; i
< job
->num_gathers
; i
++) {
539 struct host1x_job_gather
*g
= &job
->gathers
[i
];
541 size
+= g
->words
* sizeof(u32
);
545 * Try a non-blocking allocation from a higher priority pools first,
546 * as awaiting for the allocation here is a major performance hit.
548 job
->gather_copy_mapped
= dma_alloc_wc(host
, size
, &job
->gather_copy
,
551 /* the higher priority allocation failed, try the generic-blocking */
552 if (!job
->gather_copy_mapped
)
553 job
->gather_copy_mapped
= dma_alloc_wc(host
, size
,
556 if (!job
->gather_copy_mapped
)
559 job
->gather_copy_size
= size
;
561 for (i
= 0; i
< job
->num_gathers
; i
++) {
562 struct host1x_job_gather
*g
= &job
->gathers
[i
];
565 /* Copy the gather */
566 gather
= host1x_bo_mmap(g
->bo
);
567 memcpy(job
->gather_copy_mapped
+ offset
, gather
+ g
->offset
,
568 g
->words
* sizeof(u32
));
569 host1x_bo_munmap(g
->bo
, gather
);
571 /* Store the location in the buffer */
572 g
->base
= job
->gather_copy
;
575 /* Validate the job */
576 if (validate(&fw
, g
))
579 offset
+= g
->words
* sizeof(u32
);
582 /* No relocs should remain at this point */
589 int host1x_job_pin(struct host1x_job
*job
, struct device
*dev
)
593 struct host1x
*host
= dev_get_drvdata(dev
->parent
);
596 err
= pin_job(host
, job
);
600 if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL
)) {
601 err
= copy_gathers(host
->dev
, job
, dev
);
607 for (i
= 0; i
< job
->num_gathers
; i
++) {
608 struct host1x_job_gather
*g
= &job
->gathers
[i
];
610 /* process each gather mem only once */
614 /* copy_gathers() sets gathers base if firewall is enabled */
615 if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL
))
616 g
->base
= job
->gather_addr_phys
[i
];
618 for (j
= i
+ 1; j
< job
->num_gathers
; j
++) {
619 if (job
->gathers
[j
].bo
== g
->bo
) {
620 job
->gathers
[j
].handled
= true;
621 job
->gathers
[j
].base
= g
->base
;
625 err
= do_relocs(job
, g
);
632 host1x_job_unpin(job
);
637 EXPORT_SYMBOL(host1x_job_pin
);
639 void host1x_job_unpin(struct host1x_job
*job
)
641 struct host1x
*host
= dev_get_drvdata(job
->channel
->dev
->parent
);
644 for (i
= 0; i
< job
->num_unpins
; i
++) {
645 struct host1x_job_unpin_data
*unpin
= &job
->unpins
[i
];
646 struct device
*dev
= unpin
->dev
?: host
->dev
;
647 struct sg_table
*sgt
= unpin
->sgt
;
649 if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL
) &&
650 unpin
->size
&& host
->domain
) {
651 iommu_unmap(host
->domain
, job
->addr_phys
[i
],
653 free_iova(&host
->iova
,
654 iova_pfn(&host
->iova
, job
->addr_phys
[i
]));
657 if (unpin
->dev
&& sgt
)
658 dma_unmap_sgtable(unpin
->dev
, sgt
, unpin
->dir
, 0);
660 host1x_bo_unpin(dev
, unpin
->bo
, sgt
);
661 host1x_bo_put(unpin
->bo
);
666 if (job
->gather_copy_size
)
667 dma_free_wc(host
->dev
, job
->gather_copy_size
,
668 job
->gather_copy_mapped
, job
->gather_copy
);
670 EXPORT_SYMBOL(host1x_job_unpin
);
673 * Debug routine used to dump job entries
675 void host1x_job_dump(struct device
*dev
, struct host1x_job
*job
)
677 dev_dbg(dev
, " SYNCPT_ID %d\n", job
->syncpt_id
);
678 dev_dbg(dev
, " SYNCPT_VAL %d\n", job
->syncpt_end
);
679 dev_dbg(dev
, " FIRST_GET 0x%x\n", job
->first_get
);
680 dev_dbg(dev
, " TIMEOUT %d\n", job
->timeout
);
681 dev_dbg(dev
, " NUM_SLOTS %d\n", job
->num_slots
);
682 dev_dbg(dev
, " NUM_HANDLES %d\n", job
->num_unpins
);