4 * Copyright (c) 2010-2015, NVIDIA Corporation.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 #include <linux/dma-mapping.h>
20 #include <linux/err.h>
21 #include <linux/host1x.h>
22 #include <linux/kref.h>
23 #include <linux/module.h>
24 #include <linux/scatterlist.h>
25 #include <linux/slab.h>
26 #include <linux/vmalloc.h>
27 #include <trace/events/host1x.h>
34 #define HOST1X_WAIT_SYNCPT_OFFSET 0x8
36 struct host1x_job
*host1x_job_alloc(struct host1x_channel
*ch
,
37 u32 num_cmdbufs
, u32 num_relocs
,
40 struct host1x_job
*job
= NULL
;
41 unsigned int num_unpins
= num_cmdbufs
+ num_relocs
;
45 /* Check that we're not going to overflow */
46 total
= sizeof(struct host1x_job
) +
47 (u64
)num_relocs
* sizeof(struct host1x_reloc
) +
48 (u64
)num_unpins
* sizeof(struct host1x_job_unpin_data
) +
49 (u64
)num_waitchks
* sizeof(struct host1x_waitchk
) +
50 (u64
)num_cmdbufs
* sizeof(struct host1x_job_gather
) +
51 (u64
)num_unpins
* sizeof(dma_addr_t
) +
52 (u64
)num_unpins
* sizeof(u32
*);
53 if (total
> ULONG_MAX
)
56 mem
= job
= kzalloc(total
, GFP_KERNEL
);
63 /* Redistribute memory to the structs */
64 mem
+= sizeof(struct host1x_job
);
65 job
->relocarray
= num_relocs
? mem
: NULL
;
66 mem
+= num_relocs
* sizeof(struct host1x_reloc
);
67 job
->unpins
= num_unpins
? mem
: NULL
;
68 mem
+= num_unpins
* sizeof(struct host1x_job_unpin_data
);
69 job
->waitchk
= num_waitchks
? mem
: NULL
;
70 mem
+= num_waitchks
* sizeof(struct host1x_waitchk
);
71 job
->gathers
= num_cmdbufs
? mem
: NULL
;
72 mem
+= num_cmdbufs
* sizeof(struct host1x_job_gather
);
73 job
->addr_phys
= num_unpins
? mem
: NULL
;
75 job
->reloc_addr_phys
= job
->addr_phys
;
76 job
->gather_addr_phys
= &job
->addr_phys
[num_relocs
];
80 EXPORT_SYMBOL(host1x_job_alloc
);
82 struct host1x_job
*host1x_job_get(struct host1x_job
*job
)
87 EXPORT_SYMBOL(host1x_job_get
);
89 static void job_free(struct kref
*ref
)
91 struct host1x_job
*job
= container_of(ref
, struct host1x_job
, ref
);
96 void host1x_job_put(struct host1x_job
*job
)
98 kref_put(&job
->ref
, job_free
);
100 EXPORT_SYMBOL(host1x_job_put
);
102 void host1x_job_add_gather(struct host1x_job
*job
, struct host1x_bo
*bo
,
103 u32 words
, u32 offset
)
105 struct host1x_job_gather
*cur_gather
= &job
->gathers
[job
->num_gathers
];
107 cur_gather
->words
= words
;
109 cur_gather
->offset
= offset
;
112 EXPORT_SYMBOL(host1x_job_add_gather
);
115 * NULL an already satisfied WAIT_SYNCPT host method, by patching its
116 * args in the command stream. The method data is changed to reference
117 * a reserved (never given out or incr) HOST1X_SYNCPT_RESERVED syncpt
118 * with a matching threshold value of 0, so is guaranteed to be popped
121 static void host1x_syncpt_patch_offset(struct host1x_syncpt
*sp
,
122 struct host1x_bo
*h
, u32 offset
)
124 void *patch_addr
= NULL
;
127 patch_addr
= host1x_bo_kmap(h
, offset
>> PAGE_SHIFT
);
129 host1x_syncpt_patch_wait(sp
,
130 patch_addr
+ (offset
& ~PAGE_MASK
));
131 host1x_bo_kunmap(h
, offset
>> PAGE_SHIFT
, patch_addr
);
133 pr_err("Could not map cmdbuf for wait check\n");
137 * Check driver supplied waitchk structs for syncpt thresholds
138 * that have already been satisfied and NULL the comparison (to
139 * avoid a wrap condition in the HW).
141 static int do_waitchks(struct host1x_job
*job
, struct host1x
*host
,
142 struct host1x_job_gather
*g
)
144 struct host1x_bo
*patch
= g
->bo
;
147 /* compare syncpt vs wait threshold */
148 for (i
= 0; i
< job
->num_waitchk
; i
++) {
149 struct host1x_waitchk
*wait
= &job
->waitchk
[i
];
150 struct host1x_syncpt
*sp
=
151 host1x_syncpt_get(host
, wait
->syncpt_id
);
153 /* validate syncpt id */
154 if (wait
->syncpt_id
> host1x_syncpt_nb_pts(host
))
157 /* skip all other gathers */
158 if (patch
!= wait
->bo
)
161 trace_host1x_syncpt_wait_check(wait
->bo
, wait
->offset
,
162 wait
->syncpt_id
, wait
->thresh
,
163 host1x_syncpt_read_min(sp
));
165 if (host1x_syncpt_is_expired(sp
, wait
->thresh
)) {
167 "drop WAIT id %u (%s) thresh 0x%x, min 0x%x\n",
168 wait
->syncpt_id
, sp
->name
, wait
->thresh
,
169 host1x_syncpt_read_min(sp
));
171 host1x_syncpt_patch_offset(sp
, patch
,
172 g
->offset
+ wait
->offset
);
181 static unsigned int pin_job(struct host1x
*host
, struct host1x_job
*job
)
188 for (i
= 0; i
< job
->num_relocs
; i
++) {
189 struct host1x_reloc
*reloc
= &job
->relocarray
[i
];
190 struct sg_table
*sgt
;
191 dma_addr_t phys_addr
;
193 reloc
->target
.bo
= host1x_bo_get(reloc
->target
.bo
);
194 if (!reloc
->target
.bo
) {
199 phys_addr
= host1x_bo_pin(reloc
->target
.bo
, &sgt
);
201 job
->addr_phys
[job
->num_unpins
] = phys_addr
;
202 job
->unpins
[job
->num_unpins
].bo
= reloc
->target
.bo
;
203 job
->unpins
[job
->num_unpins
].sgt
= sgt
;
207 for (i
= 0; i
< job
->num_gathers
; i
++) {
208 struct host1x_job_gather
*g
= &job
->gathers
[i
];
209 size_t gather_size
= 0;
210 struct scatterlist
*sg
;
211 struct sg_table
*sgt
;
212 dma_addr_t phys_addr
;
217 g
->bo
= host1x_bo_get(g
->bo
);
223 phys_addr
= host1x_bo_pin(g
->bo
, &sgt
);
225 if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL
) && host
->domain
) {
226 for_each_sg(sgt
->sgl
, sg
, sgt
->nents
, j
)
227 gather_size
+= sg
->length
;
228 gather_size
= iova_align(&host
->iova
, gather_size
);
230 shift
= iova_shift(&host
->iova
);
231 alloc
= alloc_iova(&host
->iova
, gather_size
>> shift
,
232 host
->iova_end
>> shift
, true);
238 err
= iommu_map_sg(host
->domain
,
239 iova_dma_addr(&host
->iova
, alloc
),
240 sgt
->sgl
, sgt
->nents
, IOMMU_READ
);
242 __free_iova(&host
->iova
, alloc
);
247 job
->addr_phys
[job
->num_unpins
] =
248 iova_dma_addr(&host
->iova
, alloc
);
249 job
->unpins
[job
->num_unpins
].size
= gather_size
;
251 job
->addr_phys
[job
->num_unpins
] = phys_addr
;
254 job
->gather_addr_phys
[i
] = job
->addr_phys
[job
->num_unpins
];
256 job
->unpins
[job
->num_unpins
].bo
= g
->bo
;
257 job
->unpins
[job
->num_unpins
].sgt
= sgt
;
264 host1x_job_unpin(job
);
268 static int do_relocs(struct host1x_job
*job
, struct host1x_job_gather
*g
)
272 void *cmdbuf_page_addr
= NULL
;
273 struct host1x_bo
*cmdbuf
= g
->bo
;
275 /* pin & patch the relocs for one gather */
276 for (i
= 0; i
< job
->num_relocs
; i
++) {
277 struct host1x_reloc
*reloc
= &job
->relocarray
[i
];
278 u32 reloc_addr
= (job
->reloc_addr_phys
[i
] +
279 reloc
->target
.offset
) >> reloc
->shift
;
282 /* skip all other gathers */
283 if (cmdbuf
!= reloc
->cmdbuf
.bo
)
286 if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL
)) {
287 target
= (u32
*)job
->gather_copy_mapped
+
288 reloc
->cmdbuf
.offset
/ sizeof(u32
) +
289 g
->offset
/ sizeof(u32
);
293 if (last_page
!= reloc
->cmdbuf
.offset
>> PAGE_SHIFT
) {
294 if (cmdbuf_page_addr
)
295 host1x_bo_kunmap(cmdbuf
, last_page
,
298 cmdbuf_page_addr
= host1x_bo_kmap(cmdbuf
,
299 reloc
->cmdbuf
.offset
>> PAGE_SHIFT
);
300 last_page
= reloc
->cmdbuf
.offset
>> PAGE_SHIFT
;
302 if (unlikely(!cmdbuf_page_addr
)) {
303 pr_err("Could not map cmdbuf for relocation\n");
308 target
= cmdbuf_page_addr
+ (reloc
->cmdbuf
.offset
& ~PAGE_MASK
);
310 *target
= reloc_addr
;
313 if (cmdbuf_page_addr
)
314 host1x_bo_kunmap(cmdbuf
, last_page
, cmdbuf_page_addr
);
319 static bool check_reloc(struct host1x_reloc
*reloc
, struct host1x_bo
*cmdbuf
,
322 offset
*= sizeof(u32
);
324 if (reloc
->cmdbuf
.bo
!= cmdbuf
|| reloc
->cmdbuf
.offset
!= offset
)
327 /* relocation shift value validation isn't implemented yet */
334 static bool check_wait(struct host1x_waitchk
*wait
, struct host1x_bo
*cmdbuf
,
337 offset
*= sizeof(u32
);
339 if (wait
->bo
!= cmdbuf
|| wait
->offset
!= offset
)
345 struct host1x_firewall
{
346 struct host1x_job
*job
;
349 unsigned int num_relocs
;
350 struct host1x_reloc
*reloc
;
352 unsigned int num_waitchks
;
353 struct host1x_waitchk
*waitchk
;
355 struct host1x_bo
*cmdbuf
;
365 static int check_register(struct host1x_firewall
*fw
, unsigned long offset
)
367 if (!fw
->job
->is_addr_reg
)
370 if (fw
->job
->is_addr_reg(fw
->dev
, fw
->class, offset
)) {
374 if (!check_reloc(fw
->reloc
, fw
->cmdbuf
, fw
->offset
))
381 if (offset
== HOST1X_WAIT_SYNCPT_OFFSET
) {
382 if (fw
->class != HOST1X_CLASS_HOST1X
)
385 if (!fw
->num_waitchks
)
388 if (!check_wait(fw
->waitchk
, fw
->cmdbuf
, fw
->offset
))
398 static int check_class(struct host1x_firewall
*fw
, u32
class)
400 if (!fw
->job
->is_valid_class
) {
401 if (fw
->class != class)
404 if (!fw
->job
->is_valid_class(fw
->class))
411 static int check_mask(struct host1x_firewall
*fw
)
422 ret
= check_register(fw
, reg
);
436 static int check_incr(struct host1x_firewall
*fw
)
438 u32 count
= fw
->count
;
446 ret
= check_register(fw
, reg
);
459 static int check_nonincr(struct host1x_firewall
*fw
)
461 u32 count
= fw
->count
;
468 ret
= check_register(fw
, fw
->reg
);
480 static int validate(struct host1x_firewall
*fw
, struct host1x_job_gather
*g
)
482 u32
*cmdbuf_base
= (u32
*)fw
->job
->gather_copy_mapped
+
483 (g
->offset
/ sizeof(u32
));
484 u32 job_class
= fw
->class;
487 fw
->words
= g
->words
;
491 while (fw
->words
&& !err
) {
492 u32 word
= cmdbuf_base
[fw
->offset
];
493 u32 opcode
= (word
& 0xf0000000) >> 28;
503 fw
->class = word
>> 6 & 0x3ff;
504 fw
->mask
= word
& 0x3f;
505 fw
->reg
= word
>> 16 & 0xfff;
506 err
= check_class(fw
, job_class
);
508 err
= check_mask(fw
);
513 fw
->reg
= word
>> 16 & 0xfff;
514 fw
->count
= word
& 0xffff;
515 err
= check_incr(fw
);
521 fw
->reg
= word
>> 16 & 0xfff;
522 fw
->count
= word
& 0xffff;
523 err
= check_nonincr(fw
);
529 fw
->mask
= word
& 0xffff;
530 fw
->reg
= word
>> 16 & 0xfff;
531 err
= check_mask(fw
);
548 static inline int copy_gathers(struct host1x_job
*job
, struct device
*dev
)
550 struct host1x_firewall fw
;
557 fw
.reloc
= job
->relocarray
;
558 fw
.num_relocs
= job
->num_relocs
;
559 fw
.waitchk
= job
->waitchk
;
560 fw
.num_waitchks
= job
->num_waitchk
;
561 fw
.class = job
->class;
563 for (i
= 0; i
< job
->num_gathers
; i
++) {
564 struct host1x_job_gather
*g
= &job
->gathers
[i
];
566 size
+= g
->words
* sizeof(u32
);
570 * Try a non-blocking allocation from a higher priority pools first,
571 * as awaiting for the allocation here is a major performance hit.
573 job
->gather_copy_mapped
= dma_alloc_wc(dev
, size
, &job
->gather_copy
,
576 /* the higher priority allocation failed, try the generic-blocking */
577 if (!job
->gather_copy_mapped
)
578 job
->gather_copy_mapped
= dma_alloc_wc(dev
, size
,
581 if (!job
->gather_copy_mapped
)
584 job
->gather_copy_size
= size
;
586 for (i
= 0; i
< job
->num_gathers
; i
++) {
587 struct host1x_job_gather
*g
= &job
->gathers
[i
];
590 /* Copy the gather */
591 gather
= host1x_bo_mmap(g
->bo
);
592 memcpy(job
->gather_copy_mapped
+ offset
, gather
+ g
->offset
,
593 g
->words
* sizeof(u32
));
594 host1x_bo_munmap(g
->bo
, gather
);
596 /* Store the location in the buffer */
597 g
->base
= job
->gather_copy
;
600 /* Validate the job */
601 if (validate(&fw
, g
))
604 offset
+= g
->words
* sizeof(u32
);
607 /* No relocs and waitchks should remain at this point */
608 if (fw
.num_relocs
|| fw
.num_waitchks
)
614 int host1x_job_pin(struct host1x_job
*job
, struct device
*dev
)
618 struct host1x
*host
= dev_get_drvdata(dev
->parent
);
619 DECLARE_BITMAP(waitchk_mask
, host1x_syncpt_nb_pts(host
));
621 bitmap_zero(waitchk_mask
, host1x_syncpt_nb_pts(host
));
622 for (i
= 0; i
< job
->num_waitchk
; i
++) {
623 u32 syncpt_id
= job
->waitchk
[i
].syncpt_id
;
625 if (syncpt_id
< host1x_syncpt_nb_pts(host
))
626 set_bit(syncpt_id
, waitchk_mask
);
629 /* get current syncpt values for waitchk */
630 for_each_set_bit(i
, waitchk_mask
, host1x_syncpt_nb_pts(host
))
631 host1x_syncpt_load(host
->syncpt
+ i
);
634 err
= pin_job(host
, job
);
638 if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL
)) {
639 err
= copy_gathers(job
, dev
);
645 for (i
= 0; i
< job
->num_gathers
; i
++) {
646 struct host1x_job_gather
*g
= &job
->gathers
[i
];
648 /* process each gather mem only once */
652 /* copy_gathers() sets gathers base if firewall is enabled */
653 if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL
))
654 g
->base
= job
->gather_addr_phys
[i
];
656 for (j
= i
+ 1; j
< job
->num_gathers
; j
++) {
657 if (job
->gathers
[j
].bo
== g
->bo
) {
658 job
->gathers
[j
].handled
= true;
659 job
->gathers
[j
].base
= g
->base
;
663 err
= do_relocs(job
, g
);
667 err
= do_waitchks(job
, host
, g
);
674 host1x_job_unpin(job
);
679 EXPORT_SYMBOL(host1x_job_pin
);
681 void host1x_job_unpin(struct host1x_job
*job
)
683 struct host1x
*host
= dev_get_drvdata(job
->channel
->dev
->parent
);
686 for (i
= 0; i
< job
->num_unpins
; i
++) {
687 struct host1x_job_unpin_data
*unpin
= &job
->unpins
[i
];
689 if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL
) && host
->domain
) {
690 iommu_unmap(host
->domain
, job
->addr_phys
[i
],
692 free_iova(&host
->iova
,
693 iova_pfn(&host
->iova
, job
->addr_phys
[i
]));
696 host1x_bo_unpin(unpin
->bo
, unpin
->sgt
);
697 host1x_bo_put(unpin
->bo
);
702 if (job
->gather_copy_size
)
703 dma_free_wc(job
->channel
->dev
, job
->gather_copy_size
,
704 job
->gather_copy_mapped
, job
->gather_copy
);
706 EXPORT_SYMBOL(host1x_job_unpin
);
709 * Debug routine used to dump job entries
711 void host1x_job_dump(struct device
*dev
, struct host1x_job
*job
)
713 dev_dbg(dev
, " SYNCPT_ID %d\n", job
->syncpt_id
);
714 dev_dbg(dev
, " SYNCPT_VAL %d\n", job
->syncpt_end
);
715 dev_dbg(dev
, " FIRST_GET 0x%x\n", job
->first_get
);
716 dev_dbg(dev
, " TIMEOUT %d\n", job
->timeout
);
717 dev_dbg(dev
, " NUM_SLOTS %d\n", job
->num_slots
);
718 dev_dbg(dev
, " NUM_HANDLES %d\n", job
->num_unpins
);