4 * Copyright (c) 2010-2015, NVIDIA Corporation.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 #include <linux/dma-mapping.h>
20 #include <linux/err.h>
21 #include <linux/host1x.h>
22 #include <linux/kref.h>
23 #include <linux/module.h>
24 #include <linux/scatterlist.h>
25 #include <linux/slab.h>
26 #include <linux/vmalloc.h>
27 #include <trace/events/host1x.h>
34 #define HOST1X_WAIT_SYNCPT_OFFSET 0x8
36 struct host1x_job
*host1x_job_alloc(struct host1x_channel
*ch
,
37 u32 num_cmdbufs
, u32 num_relocs
)
39 struct host1x_job
*job
= NULL
;
40 unsigned int num_unpins
= num_cmdbufs
+ num_relocs
;
44 /* Check that we're not going to overflow */
45 total
= sizeof(struct host1x_job
) +
46 (u64
)num_relocs
* sizeof(struct host1x_reloc
) +
47 (u64
)num_unpins
* sizeof(struct host1x_job_unpin_data
) +
48 (u64
)num_cmdbufs
* sizeof(struct host1x_job_gather
) +
49 (u64
)num_unpins
* sizeof(dma_addr_t
) +
50 (u64
)num_unpins
* sizeof(u32
*);
51 if (total
> ULONG_MAX
)
54 mem
= job
= kzalloc(total
, GFP_KERNEL
);
61 /* Redistribute memory to the structs */
62 mem
+= sizeof(struct host1x_job
);
63 job
->relocs
= num_relocs
? mem
: NULL
;
64 mem
+= num_relocs
* sizeof(struct host1x_reloc
);
65 job
->unpins
= num_unpins
? mem
: NULL
;
66 mem
+= num_unpins
* sizeof(struct host1x_job_unpin_data
);
67 job
->gathers
= num_cmdbufs
? mem
: NULL
;
68 mem
+= num_cmdbufs
* sizeof(struct host1x_job_gather
);
69 job
->addr_phys
= num_unpins
? mem
: NULL
;
71 job
->reloc_addr_phys
= job
->addr_phys
;
72 job
->gather_addr_phys
= &job
->addr_phys
[num_relocs
];
76 EXPORT_SYMBOL(host1x_job_alloc
);
78 struct host1x_job
*host1x_job_get(struct host1x_job
*job
)
83 EXPORT_SYMBOL(host1x_job_get
);
85 static void job_free(struct kref
*ref
)
87 struct host1x_job
*job
= container_of(ref
, struct host1x_job
, ref
);
92 void host1x_job_put(struct host1x_job
*job
)
94 kref_put(&job
->ref
, job_free
);
96 EXPORT_SYMBOL(host1x_job_put
);
98 void host1x_job_add_gather(struct host1x_job
*job
, struct host1x_bo
*bo
,
99 unsigned int words
, unsigned int offset
)
101 struct host1x_job_gather
*gather
= &job
->gathers
[job
->num_gathers
];
103 gather
->words
= words
;
105 gather
->offset
= offset
;
109 EXPORT_SYMBOL(host1x_job_add_gather
);
111 static unsigned int pin_job(struct host1x
*host
, struct host1x_job
*job
)
118 for (i
= 0; i
< job
->num_relocs
; i
++) {
119 struct host1x_reloc
*reloc
= &job
->relocs
[i
];
120 struct sg_table
*sgt
;
121 dma_addr_t phys_addr
;
123 reloc
->target
.bo
= host1x_bo_get(reloc
->target
.bo
);
124 if (!reloc
->target
.bo
) {
129 phys_addr
= host1x_bo_pin(reloc
->target
.bo
, &sgt
);
131 job
->addr_phys
[job
->num_unpins
] = phys_addr
;
132 job
->unpins
[job
->num_unpins
].bo
= reloc
->target
.bo
;
133 job
->unpins
[job
->num_unpins
].sgt
= sgt
;
137 for (i
= 0; i
< job
->num_gathers
; i
++) {
138 struct host1x_job_gather
*g
= &job
->gathers
[i
];
139 size_t gather_size
= 0;
140 struct scatterlist
*sg
;
141 struct sg_table
*sgt
;
142 dma_addr_t phys_addr
;
147 g
->bo
= host1x_bo_get(g
->bo
);
153 phys_addr
= host1x_bo_pin(g
->bo
, &sgt
);
155 if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL
) && host
->domain
) {
156 for_each_sg(sgt
->sgl
, sg
, sgt
->nents
, j
)
157 gather_size
+= sg
->length
;
158 gather_size
= iova_align(&host
->iova
, gather_size
);
160 shift
= iova_shift(&host
->iova
);
161 alloc
= alloc_iova(&host
->iova
, gather_size
>> shift
,
162 host
->iova_end
>> shift
, true);
168 err
= iommu_map_sg(host
->domain
,
169 iova_dma_addr(&host
->iova
, alloc
),
170 sgt
->sgl
, sgt
->nents
, IOMMU_READ
);
172 __free_iova(&host
->iova
, alloc
);
177 job
->addr_phys
[job
->num_unpins
] =
178 iova_dma_addr(&host
->iova
, alloc
);
179 job
->unpins
[job
->num_unpins
].size
= gather_size
;
181 job
->addr_phys
[job
->num_unpins
] = phys_addr
;
184 job
->gather_addr_phys
[i
] = job
->addr_phys
[job
->num_unpins
];
186 job
->unpins
[job
->num_unpins
].bo
= g
->bo
;
187 job
->unpins
[job
->num_unpins
].sgt
= sgt
;
194 host1x_job_unpin(job
);
198 static int do_relocs(struct host1x_job
*job
, struct host1x_job_gather
*g
)
201 void *cmdbuf_page_addr
= NULL
;
202 struct host1x_bo
*cmdbuf
= g
->bo
;
205 /* pin & patch the relocs for one gather */
206 for (i
= 0; i
< job
->num_relocs
; i
++) {
207 struct host1x_reloc
*reloc
= &job
->relocs
[i
];
208 u32 reloc_addr
= (job
->reloc_addr_phys
[i
] +
209 reloc
->target
.offset
) >> reloc
->shift
;
212 /* skip all other gathers */
213 if (cmdbuf
!= reloc
->cmdbuf
.bo
)
216 if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL
)) {
217 target
= (u32
*)job
->gather_copy_mapped
+
218 reloc
->cmdbuf
.offset
/ sizeof(u32
) +
219 g
->offset
/ sizeof(u32
);
223 if (last_page
!= reloc
->cmdbuf
.offset
>> PAGE_SHIFT
) {
224 if (cmdbuf_page_addr
)
225 host1x_bo_kunmap(cmdbuf
, last_page
,
228 cmdbuf_page_addr
= host1x_bo_kmap(cmdbuf
,
229 reloc
->cmdbuf
.offset
>> PAGE_SHIFT
);
230 last_page
= reloc
->cmdbuf
.offset
>> PAGE_SHIFT
;
232 if (unlikely(!cmdbuf_page_addr
)) {
233 pr_err("Could not map cmdbuf for relocation\n");
238 target
= cmdbuf_page_addr
+ (reloc
->cmdbuf
.offset
& ~PAGE_MASK
);
240 *target
= reloc_addr
;
243 if (cmdbuf_page_addr
)
244 host1x_bo_kunmap(cmdbuf
, last_page
, cmdbuf_page_addr
);
249 static bool check_reloc(struct host1x_reloc
*reloc
, struct host1x_bo
*cmdbuf
,
252 offset
*= sizeof(u32
);
254 if (reloc
->cmdbuf
.bo
!= cmdbuf
|| reloc
->cmdbuf
.offset
!= offset
)
257 /* relocation shift value validation isn't implemented yet */
264 struct host1x_firewall
{
265 struct host1x_job
*job
;
268 unsigned int num_relocs
;
269 struct host1x_reloc
*reloc
;
271 struct host1x_bo
*cmdbuf
;
281 static int check_register(struct host1x_firewall
*fw
, unsigned long offset
)
283 if (!fw
->job
->is_addr_reg
)
286 if (fw
->job
->is_addr_reg(fw
->dev
, fw
->class, offset
)) {
290 if (!check_reloc(fw
->reloc
, fw
->cmdbuf
, fw
->offset
))
300 static int check_class(struct host1x_firewall
*fw
, u32
class)
302 if (!fw
->job
->is_valid_class
) {
303 if (fw
->class != class)
306 if (!fw
->job
->is_valid_class(fw
->class))
313 static int check_mask(struct host1x_firewall
*fw
)
324 ret
= check_register(fw
, reg
);
338 static int check_incr(struct host1x_firewall
*fw
)
340 u32 count
= fw
->count
;
348 ret
= check_register(fw
, reg
);
361 static int check_nonincr(struct host1x_firewall
*fw
)
363 u32 count
= fw
->count
;
370 ret
= check_register(fw
, fw
->reg
);
382 static int validate(struct host1x_firewall
*fw
, struct host1x_job_gather
*g
)
384 u32
*cmdbuf_base
= (u32
*)fw
->job
->gather_copy_mapped
+
385 (g
->offset
/ sizeof(u32
));
386 u32 job_class
= fw
->class;
389 fw
->words
= g
->words
;
393 while (fw
->words
&& !err
) {
394 u32 word
= cmdbuf_base
[fw
->offset
];
395 u32 opcode
= (word
& 0xf0000000) >> 28;
405 fw
->class = word
>> 6 & 0x3ff;
406 fw
->mask
= word
& 0x3f;
407 fw
->reg
= word
>> 16 & 0xfff;
408 err
= check_class(fw
, job_class
);
410 err
= check_mask(fw
);
415 fw
->reg
= word
>> 16 & 0xfff;
416 fw
->count
= word
& 0xffff;
417 err
= check_incr(fw
);
423 fw
->reg
= word
>> 16 & 0xfff;
424 fw
->count
= word
& 0xffff;
425 err
= check_nonincr(fw
);
431 fw
->mask
= word
& 0xffff;
432 fw
->reg
= word
>> 16 & 0xfff;
433 err
= check_mask(fw
);
450 static inline int copy_gathers(struct device
*host
, struct host1x_job
*job
,
453 struct host1x_firewall fw
;
460 fw
.reloc
= job
->relocs
;
461 fw
.num_relocs
= job
->num_relocs
;
462 fw
.class = job
->class;
464 for (i
= 0; i
< job
->num_gathers
; i
++) {
465 struct host1x_job_gather
*g
= &job
->gathers
[i
];
467 size
+= g
->words
* sizeof(u32
);
471 * Try a non-blocking allocation from a higher priority pools first,
472 * as awaiting for the allocation here is a major performance hit.
474 job
->gather_copy_mapped
= dma_alloc_wc(host
, size
, &job
->gather_copy
,
477 /* the higher priority allocation failed, try the generic-blocking */
478 if (!job
->gather_copy_mapped
)
479 job
->gather_copy_mapped
= dma_alloc_wc(host
, size
,
482 if (!job
->gather_copy_mapped
)
485 job
->gather_copy_size
= size
;
487 for (i
= 0; i
< job
->num_gathers
; i
++) {
488 struct host1x_job_gather
*g
= &job
->gathers
[i
];
491 /* Copy the gather */
492 gather
= host1x_bo_mmap(g
->bo
);
493 memcpy(job
->gather_copy_mapped
+ offset
, gather
+ g
->offset
,
494 g
->words
* sizeof(u32
));
495 host1x_bo_munmap(g
->bo
, gather
);
497 /* Store the location in the buffer */
498 g
->base
= job
->gather_copy
;
501 /* Validate the job */
502 if (validate(&fw
, g
))
505 offset
+= g
->words
* sizeof(u32
);
508 /* No relocs should remain at this point */
515 int host1x_job_pin(struct host1x_job
*job
, struct device
*dev
)
519 struct host1x
*host
= dev_get_drvdata(dev
->parent
);
522 err
= pin_job(host
, job
);
526 if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL
)) {
527 err
= copy_gathers(host
->dev
, job
, dev
);
533 for (i
= 0; i
< job
->num_gathers
; i
++) {
534 struct host1x_job_gather
*g
= &job
->gathers
[i
];
536 /* process each gather mem only once */
540 /* copy_gathers() sets gathers base if firewall is enabled */
541 if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL
))
542 g
->base
= job
->gather_addr_phys
[i
];
544 for (j
= i
+ 1; j
< job
->num_gathers
; j
++) {
545 if (job
->gathers
[j
].bo
== g
->bo
) {
546 job
->gathers
[j
].handled
= true;
547 job
->gathers
[j
].base
= g
->base
;
551 err
= do_relocs(job
, g
);
558 host1x_job_unpin(job
);
563 EXPORT_SYMBOL(host1x_job_pin
);
565 void host1x_job_unpin(struct host1x_job
*job
)
567 struct host1x
*host
= dev_get_drvdata(job
->channel
->dev
->parent
);
570 for (i
= 0; i
< job
->num_unpins
; i
++) {
571 struct host1x_job_unpin_data
*unpin
= &job
->unpins
[i
];
573 if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL
) &&
574 unpin
->size
&& host
->domain
) {
575 iommu_unmap(host
->domain
, job
->addr_phys
[i
],
577 free_iova(&host
->iova
,
578 iova_pfn(&host
->iova
, job
->addr_phys
[i
]));
581 host1x_bo_unpin(unpin
->bo
, unpin
->sgt
);
582 host1x_bo_put(unpin
->bo
);
587 if (job
->gather_copy_size
)
588 dma_free_wc(host
->dev
, job
->gather_copy_size
,
589 job
->gather_copy_mapped
, job
->gather_copy
);
591 EXPORT_SYMBOL(host1x_job_unpin
);
594 * Debug routine used to dump job entries
596 void host1x_job_dump(struct device
*dev
, struct host1x_job
*job
)
598 dev_dbg(dev
, " SYNCPT_ID %d\n", job
->syncpt_id
);
599 dev_dbg(dev
, " SYNCPT_VAL %d\n", job
->syncpt_end
);
600 dev_dbg(dev
, " FIRST_GET 0x%x\n", job
->first_get
);
601 dev_dbg(dev
, " TIMEOUT %d\n", job
->timeout
);
602 dev_dbg(dev
, " NUM_SLOTS %d\n", job
->num_slots
);
603 dev_dbg(dev
, " NUM_HANDLES %d\n", job
->num_unpins
);