1 // SPDX-License-Identifier: GPL-2.0-only
5 * Copyright (c) 2010-2013, NVIDIA Corporation.
8 #include <linux/host1x.h>
9 #include <linux/iommu.h>
10 #include <linux/slab.h>
12 #include <trace/events/host1x.h>
14 #include "../channel.h"
19 #define TRACE_MAX_LENGTH 128U
21 static void trace_write_gather(struct host1x_cdma
*cdma
, struct host1x_bo
*bo
,
22 u32 offset
, u32 words
)
24 struct device
*dev
= cdma_to_channel(cdma
)->dev
;
27 if (host1x_debug_trace_cmdbuf
)
28 mem
= host1x_bo_mmap(bo
);
33 * Write in batches of 128 as there seems to be a limit
34 * of how much you can output to ftrace at once.
36 for (i
= 0; i
< words
; i
+= TRACE_MAX_LENGTH
) {
37 u32 num_words
= min(words
- i
, TRACE_MAX_LENGTH
);
39 offset
+= i
* sizeof(u32
);
41 trace_host1x_cdma_push_gather(dev_name(dev
), bo
,
46 host1x_bo_munmap(bo
, mem
);
50 static void submit_wait(struct host1x_job
*job
, u32 id
, u32 threshold
,
53 struct host1x_cdma
*cdma
= &job
->channel
->cdma
;
59 * If a memory context has been set, use it. Otherwise
60 * (if context isolation is disabled) use the engine's
63 if (job
->memory_context
)
64 stream_id
= job
->memory_context
->stream_id
;
66 stream_id
= job
->engine_fallback_streamid
;
68 host1x_cdma_push_wide(cdma
,
69 host1x_opcode_setclass(
71 HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32
,
72 /* WAIT_SYNCPT_32 is at SYNCPT_PAYLOAD_32+2 */
79 host1x_cdma_push_wide(&job
->channel
->cdma
,
80 host1x_opcode_setclass(job
->class, 0, 0),
81 host1x_opcode_setpayload(stream_id
),
82 host1x_opcode_setstreamid(job
->engine_streamid_offset
/ 4),
85 host1x_cdma_push_wide(cdma
,
86 host1x_opcode_setclass(
88 HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32
,
89 /* WAIT_SYNCPT_32 is at SYNCPT_PAYLOAD_32+2 */
94 host1x_opcode_setclass(next_class
, 0, 0)
97 /* TODO add waitchk or use waitbases or other mitigation */
98 host1x_cdma_push(cdma
,
99 host1x_opcode_setclass(
101 host1x_uclass_wait_syncpt_r(),
104 host1x_class_host_wait_syncpt(id
, threshold
)
106 host1x_cdma_push(cdma
,
107 host1x_opcode_setclass(next_class
, 0, 0),
113 static void submit_gathers(struct host1x_job
*job
, u32 job_syncpt_base
)
115 struct host1x_cdma
*cdma
= &job
->channel
->cdma
;
117 struct device
*dev
= job
->channel
->dev
;
122 for (i
= 0; i
< job
->num_cmds
; i
++) {
123 struct host1x_job_cmd
*cmd
= &job
->cmds
[i
];
126 if (cmd
->wait
.relative
)
127 threshold
= job_syncpt_base
+ cmd
->wait
.threshold
;
129 threshold
= cmd
->wait
.threshold
;
131 submit_wait(job
, cmd
->wait
.id
, threshold
, cmd
->wait
.next_class
);
133 struct host1x_job_gather
*g
= &cmd
->gather
;
135 dma_addr_t addr
= g
->base
+ g
->offset
;
138 op2
= lower_32_bits(addr
);
139 op3
= upper_32_bits(addr
);
141 trace_write_gather(cdma
, g
->bo
, g
->offset
, g
->words
);
145 u32 op1
= host1x_opcode_gather_wide(g
->words
);
146 u32 op4
= HOST1X_OPCODE_NOP
;
148 host1x_cdma_push_wide(cdma
, op1
, op2
, op3
, op4
);
150 dev_err(dev
, "invalid gather for push buffer %pad\n",
155 u32 op1
= host1x_opcode_gather(g
->words
);
157 host1x_cdma_push(cdma
, op1
, op2
);
163 static inline void synchronize_syncpt_base(struct host1x_job
*job
)
165 struct host1x_syncpt
*sp
= job
->syncpt
;
169 value
= host1x_syncpt_read_max(sp
);
172 host1x_cdma_push(&job
->channel
->cdma
,
173 host1x_opcode_setclass(HOST1X_CLASS_HOST1X
,
174 HOST1X_UCLASS_LOAD_SYNCPT_BASE
, 1),
175 HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(id
) |
176 HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(value
));
179 static void host1x_channel_set_streamid(struct host1x_channel
*channel
)
184 if (!tegra_dev_iommu_get_stream_id(channel
->dev
->parent
, &stream_id
))
185 stream_id
= TEGRA_STREAM_ID_BYPASS
;
187 host1x_ch_writel(channel
, stream_id
, HOST1X_CHANNEL_SMMU_STREAMID
);
191 static void host1x_enable_gather_filter(struct host1x_channel
*ch
)
194 struct host1x
*host
= dev_get_drvdata(ch
->dev
->parent
);
200 val
= host1x_hypervisor_readl(
201 host
, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch
->id
/ 32));
202 val
|= BIT(ch
->id
% 32);
203 host1x_hypervisor_writel(
204 host
, val
, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch
->id
/ 32));
207 HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1),
208 HOST1X_CHANNEL_CHANNELCTRL
);
212 static void channel_program_cdma(struct host1x_job
*job
)
214 struct host1x_cdma
*cdma
= &job
->channel
->cdma
;
215 struct host1x_syncpt
*sp
= job
->syncpt
;
220 /* Enter engine class with invalid stream ID. */
221 host1x_cdma_push_wide(cdma
,
222 host1x_opcode_acquire_mlock(job
->class),
223 host1x_opcode_setclass(job
->class, 0, 0),
224 host1x_opcode_setpayload(0),
225 host1x_opcode_setstreamid(job
->engine_streamid_offset
/ 4));
227 /* Before switching stream ID to real stream ID, ensure engine is idle. */
228 fence
= host1x_syncpt_incr_max(sp
, 1);
229 host1x_cdma_push(&job
->channel
->cdma
,
230 host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT
, 1),
231 HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job
->syncpt
->id
) |
232 HOST1X_UCLASS_INCR_SYNCPT_COND_F(4));
233 submit_wait(job
, job
->syncpt
->id
, fence
, job
->class);
236 job
->syncpt_end
= host1x_syncpt_incr_max(sp
, job
->syncpt_incrs
);
237 submit_gathers(job
, job
->syncpt_end
- job
->syncpt_incrs
);
239 /* Before releasing MLOCK, ensure engine is idle again. */
240 fence
= host1x_syncpt_incr_max(sp
, 1);
241 host1x_cdma_push(&job
->channel
->cdma
,
242 host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT
, 1),
243 HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job
->syncpt
->id
) |
244 HOST1X_UCLASS_INCR_SYNCPT_COND_F(4));
245 submit_wait(job
, job
->syncpt
->id
, fence
, job
->class);
248 host1x_cdma_push(cdma
,
249 HOST1X_OPCODE_NOP
, host1x_opcode_release_mlock(job
->class));
251 if (job
->serialize
) {
253 * Force serialization by inserting a host wait for the
254 * previous job to finish before this one can commence.
256 host1x_cdma_push(cdma
,
257 host1x_opcode_setclass(HOST1X_CLASS_HOST1X
,
258 host1x_uclass_wait_syncpt_r(), 1),
259 host1x_class_host_wait_syncpt(job
->syncpt
->id
,
260 host1x_syncpt_read_max(sp
)));
263 /* Synchronize base register to allow using it for relative waiting */
265 synchronize_syncpt_base(job
);
267 /* add a setclass for modules that require it */
269 host1x_cdma_push(cdma
,
270 host1x_opcode_setclass(job
->class, 0, 0),
273 job
->syncpt_end
= host1x_syncpt_incr_max(sp
, job
->syncpt_incrs
);
275 submit_gathers(job
, job
->syncpt_end
- job
->syncpt_incrs
);
279 static void job_complete_callback(struct dma_fence
*fence
, struct dma_fence_cb
*cb
)
281 struct host1x_job
*job
= container_of(cb
, struct host1x_job
, fence_cb
);
283 /* Schedules CDMA update. */
284 host1x_cdma_update(&job
->channel
->cdma
);
287 static int channel_submit(struct host1x_job
*job
)
289 struct host1x_channel
*ch
= job
->channel
;
290 struct host1x_syncpt
*sp
= job
->syncpt
;
294 struct host1x
*host
= dev_get_drvdata(ch
->dev
->parent
);
296 trace_host1x_channel_submit(dev_name(ch
->dev
),
297 job
->num_cmds
, job
->num_relocs
,
298 job
->syncpt
->id
, job
->syncpt_incrs
);
300 /* before error checks, return current max */
301 prev_max
= job
->syncpt_end
= host1x_syncpt_read_max(sp
);
303 /* get submit lock */
304 err
= mutex_lock_interruptible(&ch
->submitlock
);
308 host1x_channel_set_streamid(ch
);
309 host1x_enable_gather_filter(ch
);
310 host1x_hw_syncpt_assign_to_channel(host
, sp
, ch
);
312 /* begin a CDMA submit */
313 err
= host1x_cdma_begin(&ch
->cdma
, job
);
315 mutex_unlock(&ch
->submitlock
);
319 channel_program_cdma(job
);
320 syncval
= host1x_syncpt_read_max(sp
);
323 * Create fence before submitting job to HW to avoid job completing
324 * before the fence is set up.
326 job
->fence
= host1x_fence_create(sp
, syncval
, true);
327 if (WARN(IS_ERR(job
->fence
), "Failed to create submit complete fence")) {
330 err
= dma_fence_add_callback(job
->fence
, &job
->fence_cb
,
331 job_complete_callback
);
334 /* end CDMA submit & stash pinned hMems into sync queue */
335 host1x_cdma_end(&ch
->cdma
, job
);
337 trace_host1x_channel_submitted(dev_name(ch
->dev
), prev_max
, syncval
);
339 mutex_unlock(&ch
->submitlock
);
342 host1x_cdma_update(&ch
->cdma
);
344 WARN(err
, "Failed to set submit complete interrupt");
349 static int host1x_channel_init(struct host1x_channel
*ch
, struct host1x
*dev
,
353 ch
->regs
= dev
->regs
+ index
* 0x4000;
355 ch
->regs
= dev
->regs
+ index
* 0x100;
360 static const struct host1x_channel_ops host1x_channel_ops
= {
361 .init
= host1x_channel_init
,
362 .submit
= channel_submit
,