2 * Copyright 2015 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Alex Deucher
26 #include "amdgpu_trace.h"
30 const u32 sdma_offsets
[SDMA_MAX_INSTANCE
] =
36 static void si_dma_set_ring_funcs(struct amdgpu_device
*adev
);
37 static void si_dma_set_buffer_funcs(struct amdgpu_device
*adev
);
38 static void si_dma_set_vm_pte_funcs(struct amdgpu_device
*adev
);
39 static void si_dma_set_irq_funcs(struct amdgpu_device
*adev
);
41 static uint64_t si_dma_ring_get_rptr(struct amdgpu_ring
*ring
)
43 return ring
->adev
->wb
.wb
[ring
->rptr_offs
>>2];
46 static uint64_t si_dma_ring_get_wptr(struct amdgpu_ring
*ring
)
48 struct amdgpu_device
*adev
= ring
->adev
;
49 u32 me
= (ring
== &adev
->sdma
.instance
[0].ring
) ? 0 : 1;
51 return (RREG32(DMA_RB_WPTR
+ sdma_offsets
[me
]) & 0x3fffc) >> 2;
54 static void si_dma_ring_set_wptr(struct amdgpu_ring
*ring
)
56 struct amdgpu_device
*adev
= ring
->adev
;
57 u32 me
= (ring
== &adev
->sdma
.instance
[0].ring
) ? 0 : 1;
59 WREG32(DMA_RB_WPTR
+ sdma_offsets
[me
],
60 (lower_32_bits(ring
->wptr
) << 2) & 0x3fffc);
63 static void si_dma_ring_emit_ib(struct amdgpu_ring
*ring
,
65 unsigned vmid
, bool ctx_switch
)
67 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
68 * Pad as necessary with NOPs.
70 while ((lower_32_bits(ring
->wptr
) & 7) != 5)
71 amdgpu_ring_write(ring
, DMA_PACKET(DMA_PACKET_NOP
, 0, 0, 0, 0));
72 amdgpu_ring_write(ring
, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER
, vmid
, 0));
73 amdgpu_ring_write(ring
, (ib
->gpu_addr
& 0xFFFFFFE0));
74 amdgpu_ring_write(ring
, (ib
->length_dw
<< 12) | (upper_32_bits(ib
->gpu_addr
) & 0xFF));
79 * si_dma_ring_emit_fence - emit a fence on the DMA ring
81 * @ring: amdgpu ring pointer
82 * @fence: amdgpu fence object
84 * Add a DMA fence packet to the ring to write
85 * the fence seq number and DMA trap packet to generate
86 * an interrupt if needed (VI).
88 static void si_dma_ring_emit_fence(struct amdgpu_ring
*ring
, u64 addr
, u64 seq
,
92 bool write64bit
= flags
& AMDGPU_FENCE_FLAG_64BIT
;
94 amdgpu_ring_write(ring
, DMA_PACKET(DMA_PACKET_FENCE
, 0, 0, 0, 0));
95 amdgpu_ring_write(ring
, addr
& 0xfffffffc);
96 amdgpu_ring_write(ring
, (upper_32_bits(addr
) & 0xff));
97 amdgpu_ring_write(ring
, seq
);
98 /* optionally write high bits as well */
101 amdgpu_ring_write(ring
, DMA_PACKET(DMA_PACKET_FENCE
, 0, 0, 0, 0));
102 amdgpu_ring_write(ring
, addr
& 0xfffffffc);
103 amdgpu_ring_write(ring
, (upper_32_bits(addr
) & 0xff));
104 amdgpu_ring_write(ring
, upper_32_bits(seq
));
106 /* generate an interrupt */
107 amdgpu_ring_write(ring
, DMA_PACKET(DMA_PACKET_TRAP
, 0, 0, 0, 0));
110 static void si_dma_stop(struct amdgpu_device
*adev
)
112 struct amdgpu_ring
*ring
;
116 for (i
= 0; i
< adev
->sdma
.num_instances
; i
++) {
117 ring
= &adev
->sdma
.instance
[i
].ring
;
119 rb_cntl
= RREG32(DMA_RB_CNTL
+ sdma_offsets
[i
]);
120 rb_cntl
&= ~DMA_RB_ENABLE
;
121 WREG32(DMA_RB_CNTL
+ sdma_offsets
[i
], rb_cntl
);
123 if (adev
->mman
.buffer_funcs_ring
== ring
)
124 amdgpu_ttm_set_buffer_funcs_status(adev
, false);
129 static int si_dma_start(struct amdgpu_device
*adev
)
131 struct amdgpu_ring
*ring
;
132 u32 rb_cntl
, dma_cntl
, ib_cntl
, rb_bufsz
;
136 for (i
= 0; i
< adev
->sdma
.num_instances
; i
++) {
137 ring
= &adev
->sdma
.instance
[i
].ring
;
139 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL
+ sdma_offsets
[i
], 0);
140 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL
+ sdma_offsets
[i
], 0);
142 /* Set ring buffer size in dwords */
143 rb_bufsz
= order_base_2(ring
->ring_size
/ 4);
144 rb_cntl
= rb_bufsz
<< 1;
146 rb_cntl
|= DMA_RB_SWAP_ENABLE
| DMA_RPTR_WRITEBACK_SWAP_ENABLE
;
148 WREG32(DMA_RB_CNTL
+ sdma_offsets
[i
], rb_cntl
);
150 /* Initialize the ring buffer's read and write pointers */
151 WREG32(DMA_RB_RPTR
+ sdma_offsets
[i
], 0);
152 WREG32(DMA_RB_WPTR
+ sdma_offsets
[i
], 0);
154 rptr_addr
= adev
->wb
.gpu_addr
+ (ring
->rptr_offs
* 4);
156 WREG32(DMA_RB_RPTR_ADDR_LO
+ sdma_offsets
[i
], lower_32_bits(rptr_addr
));
157 WREG32(DMA_RB_RPTR_ADDR_HI
+ sdma_offsets
[i
], upper_32_bits(rptr_addr
) & 0xFF);
159 rb_cntl
|= DMA_RPTR_WRITEBACK_ENABLE
;
161 WREG32(DMA_RB_BASE
+ sdma_offsets
[i
], ring
->gpu_addr
>> 8);
164 ib_cntl
= DMA_IB_ENABLE
| CMD_VMID_FORCE
;
166 ib_cntl
|= DMA_IB_SWAP_ENABLE
;
168 WREG32(DMA_IB_CNTL
+ sdma_offsets
[i
], ib_cntl
);
170 dma_cntl
= RREG32(DMA_CNTL
+ sdma_offsets
[i
]);
171 dma_cntl
&= ~CTXEMPTY_INT_ENABLE
;
172 WREG32(DMA_CNTL
+ sdma_offsets
[i
], dma_cntl
);
175 WREG32(DMA_RB_WPTR
+ sdma_offsets
[i
], lower_32_bits(ring
->wptr
) << 2);
176 WREG32(DMA_RB_CNTL
+ sdma_offsets
[i
], rb_cntl
| DMA_RB_ENABLE
);
180 r
= amdgpu_ring_test_ring(ring
);
186 if (adev
->mman
.buffer_funcs_ring
== ring
)
187 amdgpu_ttm_set_buffer_funcs_status(adev
, true);
194 * si_dma_ring_test_ring - simple async dma engine test
196 * @ring: amdgpu_ring structure holding ring information
198 * Test the DMA engine by writing using it to write an
199 * value to memory. (VI).
200 * Returns 0 for success, error for failure.
202 static int si_dma_ring_test_ring(struct amdgpu_ring
*ring
)
204 struct amdgpu_device
*adev
= ring
->adev
;
211 r
= amdgpu_device_wb_get(adev
, &index
);
213 dev_err(adev
->dev
, "(%d) failed to allocate wb slot\n", r
);
217 gpu_addr
= adev
->wb
.gpu_addr
+ (index
* 4);
219 adev
->wb
.wb
[index
] = cpu_to_le32(tmp
);
221 r
= amdgpu_ring_alloc(ring
, 4);
223 DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring
->idx
, r
);
224 amdgpu_device_wb_free(adev
, index
);
228 amdgpu_ring_write(ring
, DMA_PACKET(DMA_PACKET_WRITE
, 0, 0, 0, 1));
229 amdgpu_ring_write(ring
, lower_32_bits(gpu_addr
));
230 amdgpu_ring_write(ring
, upper_32_bits(gpu_addr
) & 0xff);
231 amdgpu_ring_write(ring
, 0xDEADBEEF);
232 amdgpu_ring_commit(ring
);
234 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
235 tmp
= le32_to_cpu(adev
->wb
.wb
[index
]);
236 if (tmp
== 0xDEADBEEF)
241 if (i
< adev
->usec_timeout
) {
242 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring
->idx
, i
);
244 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
248 amdgpu_device_wb_free(adev
, index
);
254 * si_dma_ring_test_ib - test an IB on the DMA engine
256 * @ring: amdgpu_ring structure holding ring information
258 * Test a simple IB in the DMA ring (VI).
259 * Returns 0 on success, error on failure.
261 static int si_dma_ring_test_ib(struct amdgpu_ring
*ring
, long timeout
)
263 struct amdgpu_device
*adev
= ring
->adev
;
265 struct dma_fence
*f
= NULL
;
271 r
= amdgpu_device_wb_get(adev
, &index
);
273 dev_err(adev
->dev
, "(%ld) failed to allocate wb slot\n", r
);
277 gpu_addr
= adev
->wb
.gpu_addr
+ (index
* 4);
279 adev
->wb
.wb
[index
] = cpu_to_le32(tmp
);
280 memset(&ib
, 0, sizeof(ib
));
281 r
= amdgpu_ib_get(adev
, NULL
, 256, &ib
);
283 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r
);
287 ib
.ptr
[0] = DMA_PACKET(DMA_PACKET_WRITE
, 0, 0, 0, 1);
288 ib
.ptr
[1] = lower_32_bits(gpu_addr
);
289 ib
.ptr
[2] = upper_32_bits(gpu_addr
) & 0xff;
290 ib
.ptr
[3] = 0xDEADBEEF;
292 r
= amdgpu_ib_schedule(ring
, 1, &ib
, NULL
, &f
);
296 r
= dma_fence_wait_timeout(f
, false, timeout
);
298 DRM_ERROR("amdgpu: IB test timed out\n");
302 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r
);
305 tmp
= le32_to_cpu(adev
->wb
.wb
[index
]);
306 if (tmp
== 0xDEADBEEF) {
307 DRM_DEBUG("ib test on ring %d succeeded\n", ring
->idx
);
310 DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp
);
315 amdgpu_ib_free(adev
, &ib
, NULL
);
318 amdgpu_device_wb_free(adev
, index
);
323 * cik_dma_vm_copy_pte - update PTEs by copying them from the GART
325 * @ib: indirect buffer to fill with commands
326 * @pe: addr of the page entry
327 * @src: src addr to copy from
328 * @count: number of page entries to update
330 * Update PTEs by copying them from the GART using DMA (SI).
332 static void si_dma_vm_copy_pte(struct amdgpu_ib
*ib
,
333 uint64_t pe
, uint64_t src
,
336 unsigned bytes
= count
* 8;
338 ib
->ptr
[ib
->length_dw
++] = DMA_PACKET(DMA_PACKET_COPY
,
340 ib
->ptr
[ib
->length_dw
++] = lower_32_bits(pe
);
341 ib
->ptr
[ib
->length_dw
++] = lower_32_bits(src
);
342 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(pe
) & 0xff;
343 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(src
) & 0xff;
347 * si_dma_vm_write_pte - update PTEs by writing them manually
349 * @ib: indirect buffer to fill with commands
350 * @pe: addr of the page entry
351 * @value: dst addr to write into pe
352 * @count: number of page entries to update
353 * @incr: increase next addr by incr bytes
355 * Update PTEs by writing them manually using DMA (SI).
357 static void si_dma_vm_write_pte(struct amdgpu_ib
*ib
, uint64_t pe
,
358 uint64_t value
, unsigned count
,
361 unsigned ndw
= count
* 2;
363 ib
->ptr
[ib
->length_dw
++] = DMA_PACKET(DMA_PACKET_WRITE
, 0, 0, 0, ndw
);
364 ib
->ptr
[ib
->length_dw
++] = lower_32_bits(pe
);
365 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(pe
);
366 for (; ndw
> 0; ndw
-= 2) {
367 ib
->ptr
[ib
->length_dw
++] = lower_32_bits(value
);
368 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(value
);
374 * si_dma_vm_set_pte_pde - update the page tables using sDMA
376 * @ib: indirect buffer to fill with commands
377 * @pe: addr of the page entry
378 * @addr: dst addr to write into pe
379 * @count: number of page entries to update
380 * @incr: increase next addr by incr bytes
381 * @flags: access flags
383 * Update the page tables using sDMA (CIK).
385 static void si_dma_vm_set_pte_pde(struct amdgpu_ib
*ib
,
387 uint64_t addr
, unsigned count
,
388 uint32_t incr
, uint64_t flags
)
398 if (flags
& AMDGPU_PTE_VALID
)
403 /* for physically contiguous pages (vram) */
404 ib
->ptr
[ib
->length_dw
++] = DMA_PTE_PDE_PACKET(ndw
);
405 ib
->ptr
[ib
->length_dw
++] = pe
; /* dst addr */
406 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(pe
) & 0xff;
407 ib
->ptr
[ib
->length_dw
++] = lower_32_bits(flags
); /* mask */
408 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(flags
);
409 ib
->ptr
[ib
->length_dw
++] = value
; /* value */
410 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(value
);
411 ib
->ptr
[ib
->length_dw
++] = incr
; /* increment size */
412 ib
->ptr
[ib
->length_dw
++] = 0;
414 addr
+= (ndw
/ 2) * incr
;
420 * si_dma_pad_ib - pad the IB to the required number of dw
422 * @ib: indirect buffer to fill with padding
425 static void si_dma_ring_pad_ib(struct amdgpu_ring
*ring
, struct amdgpu_ib
*ib
)
427 while (ib
->length_dw
& 0x7)
428 ib
->ptr
[ib
->length_dw
++] = DMA_PACKET(DMA_PACKET_NOP
, 0, 0, 0, 0);
432 * cik_sdma_ring_emit_pipeline_sync - sync the pipeline
434 * @ring: amdgpu_ring pointer
436 * Make sure all previous operations are completed (CIK).
438 static void si_dma_ring_emit_pipeline_sync(struct amdgpu_ring
*ring
)
440 uint32_t seq
= ring
->fence_drv
.sync_seq
;
441 uint64_t addr
= ring
->fence_drv
.gpu_addr
;
444 amdgpu_ring_write(ring
, DMA_PACKET(DMA_PACKET_POLL_REG_MEM
, 0, 0, 0, 0) |
445 (1 << 27)); /* Poll memory */
446 amdgpu_ring_write(ring
, lower_32_bits(addr
));
447 amdgpu_ring_write(ring
, (0xff << 16) | upper_32_bits(addr
)); /* retry, addr_hi */
448 amdgpu_ring_write(ring
, 0xffffffff); /* mask */
449 amdgpu_ring_write(ring
, seq
); /* value */
450 amdgpu_ring_write(ring
, (3 << 28) | 0x20); /* func(equal) | poll interval */
454 * si_dma_ring_emit_vm_flush - cik vm flush using sDMA
456 * @ring: amdgpu_ring pointer
457 * @vm: amdgpu_vm pointer
459 * Update the page table base and flush the VM TLB
462 static void si_dma_ring_emit_vm_flush(struct amdgpu_ring
*ring
,
463 unsigned vmid
, uint64_t pd_addr
)
465 amdgpu_gmc_emit_flush_gpu_tlb(ring
, vmid
, pd_addr
);
467 /* wait for invalidate to complete */
468 amdgpu_ring_write(ring
, DMA_PACKET(DMA_PACKET_POLL_REG_MEM
, 0, 0, 0, 0));
469 amdgpu_ring_write(ring
, VM_INVALIDATE_REQUEST
);
470 amdgpu_ring_write(ring
, 0xff << 16); /* retry */
471 amdgpu_ring_write(ring
, 1 << vmid
); /* mask */
472 amdgpu_ring_write(ring
, 0); /* value */
473 amdgpu_ring_write(ring
, (0 << 28) | 0x20); /* func(always) | poll interval */
476 static void si_dma_ring_emit_wreg(struct amdgpu_ring
*ring
,
477 uint32_t reg
, uint32_t val
)
479 amdgpu_ring_write(ring
, DMA_PACKET(DMA_PACKET_SRBM_WRITE
, 0, 0, 0, 0));
480 amdgpu_ring_write(ring
, (0xf << 16) | reg
);
481 amdgpu_ring_write(ring
, val
);
484 static int si_dma_early_init(void *handle
)
486 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
488 adev
->sdma
.num_instances
= 2;
490 si_dma_set_ring_funcs(adev
);
491 si_dma_set_buffer_funcs(adev
);
492 si_dma_set_vm_pte_funcs(adev
);
493 si_dma_set_irq_funcs(adev
);
498 static int si_dma_sw_init(void *handle
)
500 struct amdgpu_ring
*ring
;
502 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
504 /* DMA0 trap event */
505 r
= amdgpu_irq_add_id(adev
, AMDGPU_IH_CLIENTID_LEGACY
, 224, &adev
->sdma
.trap_irq
);
509 /* DMA1 trap event */
510 r
= amdgpu_irq_add_id(adev
, AMDGPU_IH_CLIENTID_LEGACY
, 244, &adev
->sdma
.trap_irq_1
);
514 for (i
= 0; i
< adev
->sdma
.num_instances
; i
++) {
515 ring
= &adev
->sdma
.instance
[i
].ring
;
516 ring
->ring_obj
= NULL
;
517 ring
->use_doorbell
= false;
518 sprintf(ring
->name
, "sdma%d", i
);
519 r
= amdgpu_ring_init(adev
, ring
, 1024,
520 &adev
->sdma
.trap_irq
,
522 AMDGPU_SDMA_IRQ_TRAP0
:
523 AMDGPU_SDMA_IRQ_TRAP1
);
531 static int si_dma_sw_fini(void *handle
)
533 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
536 for (i
= 0; i
< adev
->sdma
.num_instances
; i
++)
537 amdgpu_ring_fini(&adev
->sdma
.instance
[i
].ring
);
542 static int si_dma_hw_init(void *handle
)
544 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
546 return si_dma_start(adev
);
549 static int si_dma_hw_fini(void *handle
)
551 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
558 static int si_dma_suspend(void *handle
)
560 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
562 return si_dma_hw_fini(adev
);
565 static int si_dma_resume(void *handle
)
567 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
569 return si_dma_hw_init(adev
);
572 static bool si_dma_is_idle(void *handle
)
574 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
575 u32 tmp
= RREG32(SRBM_STATUS2
);
577 if (tmp
& (DMA_BUSY_MASK
| DMA1_BUSY_MASK
))
583 static int si_dma_wait_for_idle(void *handle
)
586 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
588 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
589 if (si_dma_is_idle(handle
))
596 static int si_dma_soft_reset(void *handle
)
598 DRM_INFO("si_dma_soft_reset --- not implemented !!!!!!!\n");
602 static int si_dma_set_trap_irq_state(struct amdgpu_device
*adev
,
603 struct amdgpu_irq_src
*src
,
605 enum amdgpu_interrupt_state state
)
610 case AMDGPU_SDMA_IRQ_TRAP0
:
612 case AMDGPU_IRQ_STATE_DISABLE
:
613 sdma_cntl
= RREG32(DMA_CNTL
+ DMA0_REGISTER_OFFSET
);
614 sdma_cntl
&= ~TRAP_ENABLE
;
615 WREG32(DMA_CNTL
+ DMA0_REGISTER_OFFSET
, sdma_cntl
);
617 case AMDGPU_IRQ_STATE_ENABLE
:
618 sdma_cntl
= RREG32(DMA_CNTL
+ DMA0_REGISTER_OFFSET
);
619 sdma_cntl
|= TRAP_ENABLE
;
620 WREG32(DMA_CNTL
+ DMA0_REGISTER_OFFSET
, sdma_cntl
);
626 case AMDGPU_SDMA_IRQ_TRAP1
:
628 case AMDGPU_IRQ_STATE_DISABLE
:
629 sdma_cntl
= RREG32(DMA_CNTL
+ DMA1_REGISTER_OFFSET
);
630 sdma_cntl
&= ~TRAP_ENABLE
;
631 WREG32(DMA_CNTL
+ DMA1_REGISTER_OFFSET
, sdma_cntl
);
633 case AMDGPU_IRQ_STATE_ENABLE
:
634 sdma_cntl
= RREG32(DMA_CNTL
+ DMA1_REGISTER_OFFSET
);
635 sdma_cntl
|= TRAP_ENABLE
;
636 WREG32(DMA_CNTL
+ DMA1_REGISTER_OFFSET
, sdma_cntl
);
648 static int si_dma_process_trap_irq(struct amdgpu_device
*adev
,
649 struct amdgpu_irq_src
*source
,
650 struct amdgpu_iv_entry
*entry
)
652 amdgpu_fence_process(&adev
->sdma
.instance
[0].ring
);
657 static int si_dma_process_trap_irq_1(struct amdgpu_device
*adev
,
658 struct amdgpu_irq_src
*source
,
659 struct amdgpu_iv_entry
*entry
)
661 amdgpu_fence_process(&adev
->sdma
.instance
[1].ring
);
666 static int si_dma_process_illegal_inst_irq(struct amdgpu_device
*adev
,
667 struct amdgpu_irq_src
*source
,
668 struct amdgpu_iv_entry
*entry
)
670 DRM_ERROR("Illegal instruction in SDMA command stream\n");
671 schedule_work(&adev
->reset_work
);
675 static int si_dma_set_clockgating_state(void *handle
,
676 enum amd_clockgating_state state
)
678 u32 orig
, data
, offset
;
681 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
683 enable
= (state
== AMD_CG_STATE_GATE
) ? true : false;
685 if (enable
&& (adev
->cg_flags
& AMD_CG_SUPPORT_SDMA_MGCG
)) {
686 for (i
= 0; i
< adev
->sdma
.num_instances
; i
++) {
688 offset
= DMA0_REGISTER_OFFSET
;
690 offset
= DMA1_REGISTER_OFFSET
;
691 orig
= data
= RREG32(DMA_POWER_CNTL
+ offset
);
692 data
&= ~MEM_POWER_OVERRIDE
;
694 WREG32(DMA_POWER_CNTL
+ offset
, data
);
695 WREG32(DMA_CLK_CTRL
+ offset
, 0x00000100);
698 for (i
= 0; i
< adev
->sdma
.num_instances
; i
++) {
700 offset
= DMA0_REGISTER_OFFSET
;
702 offset
= DMA1_REGISTER_OFFSET
;
703 orig
= data
= RREG32(DMA_POWER_CNTL
+ offset
);
704 data
|= MEM_POWER_OVERRIDE
;
706 WREG32(DMA_POWER_CNTL
+ offset
, data
);
708 orig
= data
= RREG32(DMA_CLK_CTRL
+ offset
);
711 WREG32(DMA_CLK_CTRL
+ offset
, data
);
718 static int si_dma_set_powergating_state(void *handle
,
719 enum amd_powergating_state state
)
723 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
725 WREG32(DMA_PGFSM_WRITE
, 0x00002000);
726 WREG32(DMA_PGFSM_CONFIG
, 0x100010ff);
728 for (tmp
= 0; tmp
< 5; tmp
++)
729 WREG32(DMA_PGFSM_WRITE
, 0);
734 static const struct amd_ip_funcs si_dma_ip_funcs
= {
736 .early_init
= si_dma_early_init
,
738 .sw_init
= si_dma_sw_init
,
739 .sw_fini
= si_dma_sw_fini
,
740 .hw_init
= si_dma_hw_init
,
741 .hw_fini
= si_dma_hw_fini
,
742 .suspend
= si_dma_suspend
,
743 .resume
= si_dma_resume
,
744 .is_idle
= si_dma_is_idle
,
745 .wait_for_idle
= si_dma_wait_for_idle
,
746 .soft_reset
= si_dma_soft_reset
,
747 .set_clockgating_state
= si_dma_set_clockgating_state
,
748 .set_powergating_state
= si_dma_set_powergating_state
,
751 static const struct amdgpu_ring_funcs si_dma_ring_funcs
= {
752 .type
= AMDGPU_RING_TYPE_SDMA
,
754 .nop
= DMA_PACKET(DMA_PACKET_NOP
, 0, 0, 0, 0),
755 .support_64bit_ptrs
= false,
756 .get_rptr
= si_dma_ring_get_rptr
,
757 .get_wptr
= si_dma_ring_get_wptr
,
758 .set_wptr
= si_dma_ring_set_wptr
,
760 3 + 3 + /* hdp flush / invalidate */
761 6 + /* si_dma_ring_emit_pipeline_sync */
762 SI_FLUSH_GPU_TLB_NUM_WREG
* 3 + 6 + /* si_dma_ring_emit_vm_flush */
763 9 + 9 + 9, /* si_dma_ring_emit_fence x3 for user fence, vm fence */
764 .emit_ib_size
= 7 + 3, /* si_dma_ring_emit_ib */
765 .emit_ib
= si_dma_ring_emit_ib
,
766 .emit_fence
= si_dma_ring_emit_fence
,
767 .emit_pipeline_sync
= si_dma_ring_emit_pipeline_sync
,
768 .emit_vm_flush
= si_dma_ring_emit_vm_flush
,
769 .test_ring
= si_dma_ring_test_ring
,
770 .test_ib
= si_dma_ring_test_ib
,
771 .insert_nop
= amdgpu_ring_insert_nop
,
772 .pad_ib
= si_dma_ring_pad_ib
,
773 .emit_wreg
= si_dma_ring_emit_wreg
,
776 static void si_dma_set_ring_funcs(struct amdgpu_device
*adev
)
780 for (i
= 0; i
< adev
->sdma
.num_instances
; i
++)
781 adev
->sdma
.instance
[i
].ring
.funcs
= &si_dma_ring_funcs
;
784 static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs
= {
785 .set
= si_dma_set_trap_irq_state
,
786 .process
= si_dma_process_trap_irq
,
789 static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs_1
= {
790 .set
= si_dma_set_trap_irq_state
,
791 .process
= si_dma_process_trap_irq_1
,
794 static const struct amdgpu_irq_src_funcs si_dma_illegal_inst_irq_funcs
= {
795 .process
= si_dma_process_illegal_inst_irq
,
798 static void si_dma_set_irq_funcs(struct amdgpu_device
*adev
)
800 adev
->sdma
.trap_irq
.num_types
= AMDGPU_SDMA_IRQ_LAST
;
801 adev
->sdma
.trap_irq
.funcs
= &si_dma_trap_irq_funcs
;
802 adev
->sdma
.trap_irq_1
.funcs
= &si_dma_trap_irq_funcs_1
;
803 adev
->sdma
.illegal_inst_irq
.funcs
= &si_dma_illegal_inst_irq_funcs
;
807 * si_dma_emit_copy_buffer - copy buffer using the sDMA engine
809 * @ring: amdgpu_ring structure holding ring information
810 * @src_offset: src GPU address
811 * @dst_offset: dst GPU address
812 * @byte_count: number of bytes to xfer
814 * Copy GPU buffers using the DMA engine (VI).
815 * Used by the amdgpu ttm implementation to move pages if
816 * registered as the asic copy callback.
818 static void si_dma_emit_copy_buffer(struct amdgpu_ib
*ib
,
823 ib
->ptr
[ib
->length_dw
++] = DMA_PACKET(DMA_PACKET_COPY
,
824 1, 0, 0, byte_count
);
825 ib
->ptr
[ib
->length_dw
++] = lower_32_bits(dst_offset
);
826 ib
->ptr
[ib
->length_dw
++] = lower_32_bits(src_offset
);
827 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(dst_offset
) & 0xff;
828 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(src_offset
) & 0xff;
832 * si_dma_emit_fill_buffer - fill buffer using the sDMA engine
834 * @ring: amdgpu_ring structure holding ring information
835 * @src_data: value to write to buffer
836 * @dst_offset: dst GPU address
837 * @byte_count: number of bytes to xfer
839 * Fill GPU buffers using the DMA engine (VI).
841 static void si_dma_emit_fill_buffer(struct amdgpu_ib
*ib
,
846 ib
->ptr
[ib
->length_dw
++] = DMA_PACKET(DMA_PACKET_CONSTANT_FILL
,
847 0, 0, 0, byte_count
/ 4);
848 ib
->ptr
[ib
->length_dw
++] = lower_32_bits(dst_offset
);
849 ib
->ptr
[ib
->length_dw
++] = src_data
;
850 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(dst_offset
) << 16;
854 static const struct amdgpu_buffer_funcs si_dma_buffer_funcs
= {
855 .copy_max_bytes
= 0xffff8,
857 .emit_copy_buffer
= si_dma_emit_copy_buffer
,
859 .fill_max_bytes
= 0xffff8,
861 .emit_fill_buffer
= si_dma_emit_fill_buffer
,
864 static void si_dma_set_buffer_funcs(struct amdgpu_device
*adev
)
866 if (adev
->mman
.buffer_funcs
== NULL
) {
867 adev
->mman
.buffer_funcs
= &si_dma_buffer_funcs
;
868 adev
->mman
.buffer_funcs_ring
= &adev
->sdma
.instance
[0].ring
;
872 static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs
= {
873 .copy_pte_num_dw
= 5,
874 .copy_pte
= si_dma_vm_copy_pte
,
876 .write_pte
= si_dma_vm_write_pte
,
877 .set_pte_pde
= si_dma_vm_set_pte_pde
,
880 static void si_dma_set_vm_pte_funcs(struct amdgpu_device
*adev
)
884 if (adev
->vm_manager
.vm_pte_funcs
== NULL
) {
885 adev
->vm_manager
.vm_pte_funcs
= &si_dma_vm_pte_funcs
;
886 for (i
= 0; i
< adev
->sdma
.num_instances
; i
++)
887 adev
->vm_manager
.vm_pte_rings
[i
] =
888 &adev
->sdma
.instance
[i
].ring
;
890 adev
->vm_manager
.vm_pte_num_rings
= adev
->sdma
.num_instances
;
894 const struct amdgpu_ip_block_version si_dma_ip_block
=
896 .type
= AMD_IP_BLOCK_TYPE_SDMA
,
900 .funcs
= &si_dma_ip_funcs
,