treewide: remove redundant IS_ERR() before error code check
[linux/fpc-iii.git] / drivers / gpu / drm / amd / amdgpu / vce_v4_0.c
blob683701cf72704de702b6d8fcf14be791a0770189
1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
23 * of the Software.
27 #include <linux/firmware.h>
29 #include "amdgpu.h"
30 #include "amdgpu_vce.h"
31 #include "soc15.h"
32 #include "soc15d.h"
33 #include "soc15_common.h"
34 #include "mmsch_v1_0.h"
36 #include "vce/vce_4_0_offset.h"
37 #include "vce/vce_4_0_default.h"
38 #include "vce/vce_4_0_sh_mask.h"
39 #include "mmhub/mmhub_1_0_offset.h"
40 #include "mmhub/mmhub_1_0_sh_mask.h"
42 #include "ivsrcid/vce/irqsrcs_vce_4_0.h"
44 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
46 #define VCE_V4_0_FW_SIZE (384 * 1024)
47 #define VCE_V4_0_STACK_SIZE (64 * 1024)
48 #define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
50 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
51 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
52 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
54 /**
55 * vce_v4_0_ring_get_rptr - get read pointer
57 * @ring: amdgpu_ring pointer
59 * Returns the current hardware read pointer
61 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
63 struct amdgpu_device *adev = ring->adev;
65 if (ring->me == 0)
66 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
67 else if (ring->me == 1)
68 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
69 else
70 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
73 /**
74 * vce_v4_0_ring_get_wptr - get write pointer
76 * @ring: amdgpu_ring pointer
78 * Returns the current hardware write pointer
80 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
82 struct amdgpu_device *adev = ring->adev;
84 if (ring->use_doorbell)
85 return adev->wb.wb[ring->wptr_offs];
87 if (ring->me == 0)
88 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
89 else if (ring->me == 1)
90 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
91 else
92 return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
95 /**
96 * vce_v4_0_ring_set_wptr - set write pointer
98 * @ring: amdgpu_ring pointer
100 * Commits the write pointer to the hardware
102 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
104 struct amdgpu_device *adev = ring->adev;
106 if (ring->use_doorbell) {
107 /* XXX check if swapping is necessary on BE */
108 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
109 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
110 return;
113 if (ring->me == 0)
114 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
115 lower_32_bits(ring->wptr));
116 else if (ring->me == 1)
117 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
118 lower_32_bits(ring->wptr));
119 else
120 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
121 lower_32_bits(ring->wptr));
124 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
126 int i, j;
128 for (i = 0; i < 10; ++i) {
129 for (j = 0; j < 100; ++j) {
130 uint32_t status =
131 RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
133 if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
134 return 0;
135 mdelay(10);
138 DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
139 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
140 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
141 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
142 mdelay(10);
143 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
144 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
145 mdelay(10);
149 return -ETIMEDOUT;
152 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
153 struct amdgpu_mm_table *table)
155 uint32_t data = 0, loop;
156 uint64_t addr = table->gpu_addr;
157 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
158 uint32_t size;
160 size = header->header_size + header->vce_table_size + header->uvd_table_size;
162 /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
163 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
164 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
166 /* 2, update vmid of descriptor */
167 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
168 data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
169 data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
170 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
172 /* 3, notify mmsch about the size of this descriptor */
173 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
175 /* 4, set resp to zero */
176 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
178 WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
179 adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
180 adev->vce.ring[0].wptr = 0;
181 adev->vce.ring[0].wptr_old = 0;
183 /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
184 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
186 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
187 loop = 1000;
188 while ((data & 0x10000002) != 0x10000002) {
189 udelay(10);
190 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
191 loop--;
192 if (!loop)
193 break;
196 if (!loop) {
197 dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
198 return -EBUSY;
201 return 0;
204 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
206 struct amdgpu_ring *ring;
207 uint32_t offset, size;
208 uint32_t table_size = 0;
209 struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
210 struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
211 struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
212 struct mmsch_v1_0_cmd_end end = { { 0 } };
213 uint32_t *init_table = adev->virt.mm_table.cpu_addr;
214 struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
216 direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
217 direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
218 direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
219 end.cmd_header.command_type = MMSCH_COMMAND__END;
221 if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
222 header->version = MMSCH_VERSION;
223 header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
225 if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
226 header->vce_table_offset = header->header_size;
227 else
228 header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
230 init_table += header->vce_table_offset;
232 ring = &adev->vce.ring[0];
233 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
234 lower_32_bits(ring->gpu_addr));
235 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
236 upper_32_bits(ring->gpu_addr));
237 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
238 ring->ring_size / 4);
240 /* BEGING OF MC_RESUME */
241 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
242 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
243 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
244 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
245 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
247 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
248 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
249 uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
250 uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
251 uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
253 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
254 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
255 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
256 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
257 (tmr_mc_addr >> 40) & 0xff);
258 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
259 } else {
260 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
261 mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
262 adev->vce.gpu_addr >> 8);
263 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
264 mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
265 (adev->vce.gpu_addr >> 40) & 0xff);
266 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
267 offset & ~0x0f000000);
270 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
271 mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
272 adev->vce.gpu_addr >> 8);
273 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
274 mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
275 (adev->vce.gpu_addr >> 40) & 0xff);
276 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
277 mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
278 adev->vce.gpu_addr >> 8);
279 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
280 mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
281 (adev->vce.gpu_addr >> 40) & 0xff);
283 size = VCE_V4_0_FW_SIZE;
284 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
286 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
287 size = VCE_V4_0_STACK_SIZE;
288 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
289 (offset & ~0x0f000000) | (1 << 24));
290 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
292 offset += size;
293 size = VCE_V4_0_DATA_SIZE;
294 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
295 (offset & ~0x0f000000) | (2 << 24));
296 MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
298 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
299 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
300 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
301 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
303 /* end of MC_RESUME */
304 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
305 VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
306 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
307 ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
308 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
309 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
311 MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
312 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
313 VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
315 /* clear BUSY flag */
316 MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
317 ~VCE_STATUS__JOB_BUSY_MASK, 0);
319 /* add end packet */
320 memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
321 table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
322 header->vce_table_size = table_size;
325 return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
329 * vce_v4_0_start - start VCE block
331 * @adev: amdgpu_device pointer
333 * Setup and start the VCE block
335 static int vce_v4_0_start(struct amdgpu_device *adev)
337 struct amdgpu_ring *ring;
338 int r;
340 ring = &adev->vce.ring[0];
342 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
343 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
344 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
345 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
346 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
348 ring = &adev->vce.ring[1];
350 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
351 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
352 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
353 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
354 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
356 ring = &adev->vce.ring[2];
358 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
359 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
360 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
361 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
362 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
364 vce_v4_0_mc_resume(adev);
365 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
366 ~VCE_STATUS__JOB_BUSY_MASK);
368 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
370 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
371 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
372 mdelay(100);
374 r = vce_v4_0_firmware_loaded(adev);
376 /* clear BUSY flag */
377 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
379 if (r) {
380 DRM_ERROR("VCE not responding, giving up!!!\n");
381 return r;
384 return 0;
387 static int vce_v4_0_stop(struct amdgpu_device *adev)
390 /* Disable VCPU */
391 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
393 /* hold on ECPU */
394 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
395 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
396 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
398 /* clear VCE_STATUS */
399 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
401 /* Set Clock-Gating off */
402 /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
403 vce_v4_0_set_vce_sw_clock_gating(adev, false);
406 return 0;
409 static int vce_v4_0_early_init(void *handle)
411 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
413 if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
414 adev->vce.num_rings = 1;
415 else
416 adev->vce.num_rings = 3;
418 vce_v4_0_set_ring_funcs(adev);
419 vce_v4_0_set_irq_funcs(adev);
421 return 0;
424 static int vce_v4_0_sw_init(void *handle)
426 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
427 struct amdgpu_ring *ring;
429 unsigned size;
430 int r, i;
432 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
433 if (r)
434 return r;
436 size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
437 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
438 size += VCE_V4_0_FW_SIZE;
440 r = amdgpu_vce_sw_init(adev, size);
441 if (r)
442 return r;
444 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
445 const struct common_firmware_header *hdr;
446 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
448 adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
449 if (!adev->vce.saved_bo)
450 return -ENOMEM;
452 hdr = (const struct common_firmware_header *)adev->vce.fw->data;
453 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
454 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
455 adev->firmware.fw_size +=
456 ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
457 DRM_INFO("PSP loading VCE firmware\n");
458 } else {
459 r = amdgpu_vce_resume(adev);
460 if (r)
461 return r;
464 for (i = 0; i < adev->vce.num_rings; i++) {
465 ring = &adev->vce.ring[i];
466 sprintf(ring->name, "vce%d", i);
467 if (amdgpu_sriov_vf(adev)) {
468 /* DOORBELL only works under SRIOV */
469 ring->use_doorbell = true;
471 /* currently only use the first encoding ring for sriov,
472 * so set unused location for other unused rings.
474 if (i == 0)
475 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
476 else
477 ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
479 r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
480 if (r)
481 return r;
485 r = amdgpu_vce_entity_init(adev);
486 if (r)
487 return r;
489 r = amdgpu_virt_alloc_mm_table(adev);
490 if (r)
491 return r;
493 return r;
496 static int vce_v4_0_sw_fini(void *handle)
498 int r;
499 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
501 /* free MM table */
502 amdgpu_virt_free_mm_table(adev);
504 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
505 kvfree(adev->vce.saved_bo);
506 adev->vce.saved_bo = NULL;
509 r = amdgpu_vce_suspend(adev);
510 if (r)
511 return r;
513 return amdgpu_vce_sw_fini(adev);
516 static int vce_v4_0_hw_init(void *handle)
518 int r, i;
519 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
521 if (amdgpu_sriov_vf(adev))
522 r = vce_v4_0_sriov_start(adev);
523 else
524 r = vce_v4_0_start(adev);
525 if (r)
526 return r;
528 for (i = 0; i < adev->vce.num_rings; i++) {
529 r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
530 if (r)
531 return r;
534 DRM_INFO("VCE initialized successfully.\n");
536 return 0;
539 static int vce_v4_0_hw_fini(void *handle)
541 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
542 int i;
544 if (!amdgpu_sriov_vf(adev)) {
545 /* vce_v4_0_wait_for_idle(handle); */
546 vce_v4_0_stop(adev);
547 } else {
548 /* full access mode, so don't touch any VCE register */
549 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
552 for (i = 0; i < adev->vce.num_rings; i++)
553 adev->vce.ring[i].sched.ready = false;
555 return 0;
558 static int vce_v4_0_suspend(void *handle)
560 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
561 int r;
563 if (adev->vce.vcpu_bo == NULL)
564 return 0;
566 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
567 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
568 void *ptr = adev->vce.cpu_addr;
570 memcpy_fromio(adev->vce.saved_bo, ptr, size);
573 r = vce_v4_0_hw_fini(adev);
574 if (r)
575 return r;
577 return amdgpu_vce_suspend(adev);
580 static int vce_v4_0_resume(void *handle)
582 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
583 int r;
585 if (adev->vce.vcpu_bo == NULL)
586 return -EINVAL;
588 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
589 unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
590 void *ptr = adev->vce.cpu_addr;
592 memcpy_toio(ptr, adev->vce.saved_bo, size);
593 } else {
594 r = amdgpu_vce_resume(adev);
595 if (r)
596 return r;
599 return vce_v4_0_hw_init(adev);
602 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
604 uint32_t offset, size;
605 uint64_t tmr_mc_addr;
607 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
608 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
609 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
610 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
612 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
613 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
614 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
615 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
616 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
618 offset = AMDGPU_VCE_FIRMWARE_OFFSET;
620 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
621 tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
622 adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
623 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
624 (tmr_mc_addr >> 8));
625 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
626 (tmr_mc_addr >> 40) & 0xff);
627 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
628 } else {
629 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
630 (adev->vce.gpu_addr >> 8));
631 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
632 (adev->vce.gpu_addr >> 40) & 0xff);
633 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
636 size = VCE_V4_0_FW_SIZE;
637 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
639 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
640 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
641 offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
642 size = VCE_V4_0_STACK_SIZE;
643 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
644 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
646 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
647 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
648 offset += size;
649 size = VCE_V4_0_DATA_SIZE;
650 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
651 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
653 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
654 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
655 VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
656 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
659 static int vce_v4_0_set_clockgating_state(void *handle,
660 enum amd_clockgating_state state)
662 /* needed for driver unload*/
663 return 0;
666 #if 0
667 static bool vce_v4_0_is_idle(void *handle)
669 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
670 u32 mask = 0;
672 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
673 mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
675 return !(RREG32(mmSRBM_STATUS2) & mask);
678 static int vce_v4_0_wait_for_idle(void *handle)
680 unsigned i;
681 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
683 for (i = 0; i < adev->usec_timeout; i++)
684 if (vce_v4_0_is_idle(handle))
685 return 0;
687 return -ETIMEDOUT;
690 #define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */
691 #define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */
692 #define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */
693 #define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
694 VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
696 static bool vce_v4_0_check_soft_reset(void *handle)
698 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
699 u32 srbm_soft_reset = 0;
701 /* According to VCE team , we should use VCE_STATUS instead
702 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
703 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
704 * instance's registers are accessed
705 * (0 for 1st instance, 10 for 2nd instance).
707 *VCE_STATUS
708 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB |
709 *|----+----+-----------+----+----+----+----------+---------+----|
710 *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0|
712 * VCE team suggest use bit 3--bit 6 for busy status check
714 mutex_lock(&adev->grbm_idx_mutex);
715 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
716 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
717 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
718 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
720 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
721 if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
722 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
723 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
725 WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
726 mutex_unlock(&adev->grbm_idx_mutex);
728 if (srbm_soft_reset) {
729 adev->vce.srbm_soft_reset = srbm_soft_reset;
730 return true;
731 } else {
732 adev->vce.srbm_soft_reset = 0;
733 return false;
737 static int vce_v4_0_soft_reset(void *handle)
739 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
740 u32 srbm_soft_reset;
742 if (!adev->vce.srbm_soft_reset)
743 return 0;
744 srbm_soft_reset = adev->vce.srbm_soft_reset;
746 if (srbm_soft_reset) {
747 u32 tmp;
749 tmp = RREG32(mmSRBM_SOFT_RESET);
750 tmp |= srbm_soft_reset;
751 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
752 WREG32(mmSRBM_SOFT_RESET, tmp);
753 tmp = RREG32(mmSRBM_SOFT_RESET);
755 udelay(50);
757 tmp &= ~srbm_soft_reset;
758 WREG32(mmSRBM_SOFT_RESET, tmp);
759 tmp = RREG32(mmSRBM_SOFT_RESET);
761 /* Wait a little for things to settle down */
762 udelay(50);
765 return 0;
768 static int vce_v4_0_pre_soft_reset(void *handle)
770 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
772 if (!adev->vce.srbm_soft_reset)
773 return 0;
775 mdelay(5);
777 return vce_v4_0_suspend(adev);
781 static int vce_v4_0_post_soft_reset(void *handle)
783 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
785 if (!adev->vce.srbm_soft_reset)
786 return 0;
788 mdelay(5);
790 return vce_v4_0_resume(adev);
793 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
795 u32 tmp, data;
797 tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
798 if (override)
799 data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
800 else
801 data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
803 if (tmp != data)
804 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
807 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
808 bool gated)
810 u32 data;
812 /* Set Override to disable Clock Gating */
813 vce_v4_0_override_vce_clock_gating(adev, true);
815 /* This function enables MGCG which is controlled by firmware.
816 With the clocks in the gated state the core is still
817 accessible but the firmware will throttle the clocks on the
818 fly as necessary.
820 if (gated) {
821 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
822 data |= 0x1ff;
823 data &= ~0xef0000;
824 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
826 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
827 data |= 0x3ff000;
828 data &= ~0xffc00000;
829 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
831 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
832 data |= 0x2;
833 data &= ~0x00010000;
834 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
836 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
837 data |= 0x37f;
838 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
840 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
841 data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
842 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
843 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
844 0x8;
845 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
846 } else {
847 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
848 data &= ~0x80010;
849 data |= 0xe70008;
850 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
852 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
853 data |= 0xffc00000;
854 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
856 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
857 data |= 0x10000;
858 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
860 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
861 data &= ~0xffc00000;
862 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
864 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
865 data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
866 VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
867 VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
868 0x8);
869 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
871 vce_v4_0_override_vce_clock_gating(adev, false);
874 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
876 u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
878 if (enable)
879 tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
880 else
881 tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
883 WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
886 static int vce_v4_0_set_clockgating_state(void *handle,
887 enum amd_clockgating_state state)
889 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
890 bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
891 int i;
893 if ((adev->asic_type == CHIP_POLARIS10) ||
894 (adev->asic_type == CHIP_TONGA) ||
895 (adev->asic_type == CHIP_FIJI))
896 vce_v4_0_set_bypass_mode(adev, enable);
898 if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
899 return 0;
901 mutex_lock(&adev->grbm_idx_mutex);
902 for (i = 0; i < 2; i++) {
903 /* Program VCE Instance 0 or 1 if not harvested */
904 if (adev->vce.harvest_config & (1 << i))
905 continue;
907 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
909 if (enable) {
910 /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
911 uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
912 data &= ~(0xf | 0xff0);
913 data |= ((0x0 << 0) | (0x04 << 4));
914 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
916 /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
917 data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
918 data &= ~(0xf | 0xff0);
919 data |= ((0x0 << 0) | (0x04 << 4));
920 WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
923 vce_v4_0_set_vce_sw_clock_gating(adev, enable);
926 WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
927 mutex_unlock(&adev->grbm_idx_mutex);
929 return 0;
931 #endif
933 static int vce_v4_0_set_powergating_state(void *handle,
934 enum amd_powergating_state state)
936 /* This doesn't actually powergate the VCE block.
937 * That's done in the dpm code via the SMC. This
938 * just re-inits the block as necessary. The actual
939 * gating still happens in the dpm code. We should
940 * revisit this when there is a cleaner line between
941 * the smc and the hw blocks
943 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
945 if (state == AMD_PG_STATE_GATE)
946 return vce_v4_0_stop(adev);
947 else
948 return vce_v4_0_start(adev);
951 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
952 struct amdgpu_ib *ib, uint32_t flags)
954 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
956 amdgpu_ring_write(ring, VCE_CMD_IB_VM);
957 amdgpu_ring_write(ring, vmid);
958 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
959 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
960 amdgpu_ring_write(ring, ib->length_dw);
963 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
964 u64 seq, unsigned flags)
966 WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
968 amdgpu_ring_write(ring, VCE_CMD_FENCE);
969 amdgpu_ring_write(ring, addr);
970 amdgpu_ring_write(ring, upper_32_bits(addr));
971 amdgpu_ring_write(ring, seq);
972 amdgpu_ring_write(ring, VCE_CMD_TRAP);
975 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
977 amdgpu_ring_write(ring, VCE_CMD_END);
980 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
981 uint32_t val, uint32_t mask)
983 amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
984 amdgpu_ring_write(ring, reg << 2);
985 amdgpu_ring_write(ring, mask);
986 amdgpu_ring_write(ring, val);
989 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
990 unsigned int vmid, uint64_t pd_addr)
992 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
994 pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
996 /* wait for reg writes */
997 vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
998 lower_32_bits(pd_addr), 0xffffffff);
1001 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
1002 uint32_t reg, uint32_t val)
1004 amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1005 amdgpu_ring_write(ring, reg << 2);
1006 amdgpu_ring_write(ring, val);
1009 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1010 struct amdgpu_irq_src *source,
1011 unsigned type,
1012 enum amdgpu_interrupt_state state)
1014 uint32_t val = 0;
1016 if (!amdgpu_sriov_vf(adev)) {
1017 if (state == AMDGPU_IRQ_STATE_ENABLE)
1018 val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1020 WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1021 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1023 return 0;
1026 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1027 struct amdgpu_irq_src *source,
1028 struct amdgpu_iv_entry *entry)
1030 DRM_DEBUG("IH: VCE\n");
1032 switch (entry->src_data[0]) {
1033 case 0:
1034 case 1:
1035 case 2:
1036 amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1037 break;
1038 default:
1039 DRM_ERROR("Unhandled interrupt: %d %d\n",
1040 entry->src_id, entry->src_data[0]);
1041 break;
1044 return 0;
1047 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1048 .name = "vce_v4_0",
1049 .early_init = vce_v4_0_early_init,
1050 .late_init = NULL,
1051 .sw_init = vce_v4_0_sw_init,
1052 .sw_fini = vce_v4_0_sw_fini,
1053 .hw_init = vce_v4_0_hw_init,
1054 .hw_fini = vce_v4_0_hw_fini,
1055 .suspend = vce_v4_0_suspend,
1056 .resume = vce_v4_0_resume,
1057 .is_idle = NULL /* vce_v4_0_is_idle */,
1058 .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1059 .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1060 .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1061 .soft_reset = NULL /* vce_v4_0_soft_reset */,
1062 .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1063 .set_clockgating_state = vce_v4_0_set_clockgating_state,
1064 .set_powergating_state = vce_v4_0_set_powergating_state,
1067 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1068 .type = AMDGPU_RING_TYPE_VCE,
1069 .align_mask = 0x3f,
1070 .nop = VCE_CMD_NO_OP,
1071 .support_64bit_ptrs = false,
1072 .no_user_fence = true,
1073 .vmhub = AMDGPU_MMHUB_0,
1074 .get_rptr = vce_v4_0_ring_get_rptr,
1075 .get_wptr = vce_v4_0_ring_get_wptr,
1076 .set_wptr = vce_v4_0_ring_set_wptr,
1077 .parse_cs = amdgpu_vce_ring_parse_cs_vm,
1078 .emit_frame_size =
1079 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1080 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1081 4 + /* vce_v4_0_emit_vm_flush */
1082 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1083 1, /* vce_v4_0_ring_insert_end */
1084 .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1085 .emit_ib = vce_v4_0_ring_emit_ib,
1086 .emit_vm_flush = vce_v4_0_emit_vm_flush,
1087 .emit_fence = vce_v4_0_ring_emit_fence,
1088 .test_ring = amdgpu_vce_ring_test_ring,
1089 .test_ib = amdgpu_vce_ring_test_ib,
1090 .insert_nop = amdgpu_ring_insert_nop,
1091 .insert_end = vce_v4_0_ring_insert_end,
1092 .pad_ib = amdgpu_ring_generic_pad_ib,
1093 .begin_use = amdgpu_vce_ring_begin_use,
1094 .end_use = amdgpu_vce_ring_end_use,
1095 .emit_wreg = vce_v4_0_emit_wreg,
1096 .emit_reg_wait = vce_v4_0_emit_reg_wait,
1097 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1100 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1102 int i;
1104 for (i = 0; i < adev->vce.num_rings; i++) {
1105 adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1106 adev->vce.ring[i].me = i;
1108 DRM_INFO("VCE enabled in VM mode\n");
1111 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1112 .set = vce_v4_0_set_interrupt_state,
1113 .process = vce_v4_0_process_interrupt,
1116 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1118 adev->vce.irq.num_types = 1;
1119 adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1122 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1124 .type = AMD_IP_BLOCK_TYPE_VCE,
1125 .major = 4,
1126 .minor = 0,
1127 .rev = 0,
1128 .funcs = &vce_v4_0_ip_funcs,