2 * Copyright 2015 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Alex Deucher
26 #include "amdgpu_trace.h"
30 const u32 sdma_offsets
[SDMA_MAX_INSTANCE
] =
36 static void si_dma_set_ring_funcs(struct amdgpu_device
*adev
);
37 static void si_dma_set_buffer_funcs(struct amdgpu_device
*adev
);
38 static void si_dma_set_vm_pte_funcs(struct amdgpu_device
*adev
);
39 static void si_dma_set_irq_funcs(struct amdgpu_device
*adev
);
41 static uint64_t si_dma_ring_get_rptr(struct amdgpu_ring
*ring
)
43 return ring
->adev
->wb
.wb
[ring
->rptr_offs
>>2];
46 static uint64_t si_dma_ring_get_wptr(struct amdgpu_ring
*ring
)
48 struct amdgpu_device
*adev
= ring
->adev
;
49 u32 me
= (ring
== &adev
->sdma
.instance
[0].ring
) ? 0 : 1;
51 return (RREG32(DMA_RB_WPTR
+ sdma_offsets
[me
]) & 0x3fffc) >> 2;
54 static void si_dma_ring_set_wptr(struct amdgpu_ring
*ring
)
56 struct amdgpu_device
*adev
= ring
->adev
;
57 u32 me
= (ring
== &adev
->sdma
.instance
[0].ring
) ? 0 : 1;
59 WREG32(DMA_RB_WPTR
+ sdma_offsets
[me
],
60 (lower_32_bits(ring
->wptr
) << 2) & 0x3fffc);
63 static void si_dma_ring_emit_ib(struct amdgpu_ring
*ring
,
64 struct amdgpu_job
*job
,
68 unsigned vmid
= AMDGPU_JOB_GET_VMID(job
);
69 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
70 * Pad as necessary with NOPs.
72 while ((lower_32_bits(ring
->wptr
) & 7) != 5)
73 amdgpu_ring_write(ring
, DMA_PACKET(DMA_PACKET_NOP
, 0, 0, 0, 0));
74 amdgpu_ring_write(ring
, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER
, vmid
, 0));
75 amdgpu_ring_write(ring
, (ib
->gpu_addr
& 0xFFFFFFE0));
76 amdgpu_ring_write(ring
, (ib
->length_dw
<< 12) | (upper_32_bits(ib
->gpu_addr
) & 0xFF));
81 * si_dma_ring_emit_fence - emit a fence on the DMA ring
83 * @ring: amdgpu ring pointer
84 * @fence: amdgpu fence object
86 * Add a DMA fence packet to the ring to write
87 * the fence seq number and DMA trap packet to generate
88 * an interrupt if needed (VI).
90 static void si_dma_ring_emit_fence(struct amdgpu_ring
*ring
, u64 addr
, u64 seq
,
94 bool write64bit
= flags
& AMDGPU_FENCE_FLAG_64BIT
;
96 amdgpu_ring_write(ring
, DMA_PACKET(DMA_PACKET_FENCE
, 0, 0, 0, 0));
97 amdgpu_ring_write(ring
, addr
& 0xfffffffc);
98 amdgpu_ring_write(ring
, (upper_32_bits(addr
) & 0xff));
99 amdgpu_ring_write(ring
, seq
);
100 /* optionally write high bits as well */
103 amdgpu_ring_write(ring
, DMA_PACKET(DMA_PACKET_FENCE
, 0, 0, 0, 0));
104 amdgpu_ring_write(ring
, addr
& 0xfffffffc);
105 amdgpu_ring_write(ring
, (upper_32_bits(addr
) & 0xff));
106 amdgpu_ring_write(ring
, upper_32_bits(seq
));
108 /* generate an interrupt */
109 amdgpu_ring_write(ring
, DMA_PACKET(DMA_PACKET_TRAP
, 0, 0, 0, 0));
112 static void si_dma_stop(struct amdgpu_device
*adev
)
114 struct amdgpu_ring
*ring
;
118 for (i
= 0; i
< adev
->sdma
.num_instances
; i
++) {
119 ring
= &adev
->sdma
.instance
[i
].ring
;
121 rb_cntl
= RREG32(DMA_RB_CNTL
+ sdma_offsets
[i
]);
122 rb_cntl
&= ~DMA_RB_ENABLE
;
123 WREG32(DMA_RB_CNTL
+ sdma_offsets
[i
], rb_cntl
);
125 if (adev
->mman
.buffer_funcs_ring
== ring
)
126 amdgpu_ttm_set_buffer_funcs_status(adev
, false);
127 ring
->sched
.ready
= false;
131 static int si_dma_start(struct amdgpu_device
*adev
)
133 struct amdgpu_ring
*ring
;
134 u32 rb_cntl
, dma_cntl
, ib_cntl
, rb_bufsz
;
138 for (i
= 0; i
< adev
->sdma
.num_instances
; i
++) {
139 ring
= &adev
->sdma
.instance
[i
].ring
;
141 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL
+ sdma_offsets
[i
], 0);
142 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL
+ sdma_offsets
[i
], 0);
144 /* Set ring buffer size in dwords */
145 rb_bufsz
= order_base_2(ring
->ring_size
/ 4);
146 rb_cntl
= rb_bufsz
<< 1;
148 rb_cntl
|= DMA_RB_SWAP_ENABLE
| DMA_RPTR_WRITEBACK_SWAP_ENABLE
;
150 WREG32(DMA_RB_CNTL
+ sdma_offsets
[i
], rb_cntl
);
152 /* Initialize the ring buffer's read and write pointers */
153 WREG32(DMA_RB_RPTR
+ sdma_offsets
[i
], 0);
154 WREG32(DMA_RB_WPTR
+ sdma_offsets
[i
], 0);
156 rptr_addr
= adev
->wb
.gpu_addr
+ (ring
->rptr_offs
* 4);
158 WREG32(DMA_RB_RPTR_ADDR_LO
+ sdma_offsets
[i
], lower_32_bits(rptr_addr
));
159 WREG32(DMA_RB_RPTR_ADDR_HI
+ sdma_offsets
[i
], upper_32_bits(rptr_addr
) & 0xFF);
161 rb_cntl
|= DMA_RPTR_WRITEBACK_ENABLE
;
163 WREG32(DMA_RB_BASE
+ sdma_offsets
[i
], ring
->gpu_addr
>> 8);
166 ib_cntl
= DMA_IB_ENABLE
| CMD_VMID_FORCE
;
168 ib_cntl
|= DMA_IB_SWAP_ENABLE
;
170 WREG32(DMA_IB_CNTL
+ sdma_offsets
[i
], ib_cntl
);
172 dma_cntl
= RREG32(DMA_CNTL
+ sdma_offsets
[i
]);
173 dma_cntl
&= ~CTXEMPTY_INT_ENABLE
;
174 WREG32(DMA_CNTL
+ sdma_offsets
[i
], dma_cntl
);
177 WREG32(DMA_RB_WPTR
+ sdma_offsets
[i
], lower_32_bits(ring
->wptr
) << 2);
178 WREG32(DMA_RB_CNTL
+ sdma_offsets
[i
], rb_cntl
| DMA_RB_ENABLE
);
180 ring
->sched
.ready
= true;
182 r
= amdgpu_ring_test_helper(ring
);
186 if (adev
->mman
.buffer_funcs_ring
== ring
)
187 amdgpu_ttm_set_buffer_funcs_status(adev
, true);
194 * si_dma_ring_test_ring - simple async dma engine test
196 * @ring: amdgpu_ring structure holding ring information
198 * Test the DMA engine by writing using it to write an
199 * value to memory. (VI).
200 * Returns 0 for success, error for failure.
202 static int si_dma_ring_test_ring(struct amdgpu_ring
*ring
)
204 struct amdgpu_device
*adev
= ring
->adev
;
211 r
= amdgpu_device_wb_get(adev
, &index
);
215 gpu_addr
= adev
->wb
.gpu_addr
+ (index
* 4);
217 adev
->wb
.wb
[index
] = cpu_to_le32(tmp
);
219 r
= amdgpu_ring_alloc(ring
, 4);
223 amdgpu_ring_write(ring
, DMA_PACKET(DMA_PACKET_WRITE
, 0, 0, 0, 1));
224 amdgpu_ring_write(ring
, lower_32_bits(gpu_addr
));
225 amdgpu_ring_write(ring
, upper_32_bits(gpu_addr
) & 0xff);
226 amdgpu_ring_write(ring
, 0xDEADBEEF);
227 amdgpu_ring_commit(ring
);
229 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
230 tmp
= le32_to_cpu(adev
->wb
.wb
[index
]);
231 if (tmp
== 0xDEADBEEF)
236 if (i
>= adev
->usec_timeout
)
240 amdgpu_device_wb_free(adev
, index
);
245 * si_dma_ring_test_ib - test an IB on the DMA engine
247 * @ring: amdgpu_ring structure holding ring information
249 * Test a simple IB in the DMA ring (VI).
250 * Returns 0 on success, error on failure.
252 static int si_dma_ring_test_ib(struct amdgpu_ring
*ring
, long timeout
)
254 struct amdgpu_device
*adev
= ring
->adev
;
256 struct dma_fence
*f
= NULL
;
262 r
= amdgpu_device_wb_get(adev
, &index
);
266 gpu_addr
= adev
->wb
.gpu_addr
+ (index
* 4);
268 adev
->wb
.wb
[index
] = cpu_to_le32(tmp
);
269 memset(&ib
, 0, sizeof(ib
));
270 r
= amdgpu_ib_get(adev
, NULL
, 256, &ib
);
274 ib
.ptr
[0] = DMA_PACKET(DMA_PACKET_WRITE
, 0, 0, 0, 1);
275 ib
.ptr
[1] = lower_32_bits(gpu_addr
);
276 ib
.ptr
[2] = upper_32_bits(gpu_addr
) & 0xff;
277 ib
.ptr
[3] = 0xDEADBEEF;
279 r
= amdgpu_ib_schedule(ring
, 1, &ib
, NULL
, &f
);
283 r
= dma_fence_wait_timeout(f
, false, timeout
);
290 tmp
= le32_to_cpu(adev
->wb
.wb
[index
]);
291 if (tmp
== 0xDEADBEEF)
297 amdgpu_ib_free(adev
, &ib
, NULL
);
300 amdgpu_device_wb_free(adev
, index
);
305 * cik_dma_vm_copy_pte - update PTEs by copying them from the GART
307 * @ib: indirect buffer to fill with commands
308 * @pe: addr of the page entry
309 * @src: src addr to copy from
310 * @count: number of page entries to update
312 * Update PTEs by copying them from the GART using DMA (SI).
314 static void si_dma_vm_copy_pte(struct amdgpu_ib
*ib
,
315 uint64_t pe
, uint64_t src
,
318 unsigned bytes
= count
* 8;
320 ib
->ptr
[ib
->length_dw
++] = DMA_PACKET(DMA_PACKET_COPY
,
322 ib
->ptr
[ib
->length_dw
++] = lower_32_bits(pe
);
323 ib
->ptr
[ib
->length_dw
++] = lower_32_bits(src
);
324 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(pe
) & 0xff;
325 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(src
) & 0xff;
329 * si_dma_vm_write_pte - update PTEs by writing them manually
331 * @ib: indirect buffer to fill with commands
332 * @pe: addr of the page entry
333 * @value: dst addr to write into pe
334 * @count: number of page entries to update
335 * @incr: increase next addr by incr bytes
337 * Update PTEs by writing them manually using DMA (SI).
339 static void si_dma_vm_write_pte(struct amdgpu_ib
*ib
, uint64_t pe
,
340 uint64_t value
, unsigned count
,
343 unsigned ndw
= count
* 2;
345 ib
->ptr
[ib
->length_dw
++] = DMA_PACKET(DMA_PACKET_WRITE
, 0, 0, 0, ndw
);
346 ib
->ptr
[ib
->length_dw
++] = lower_32_bits(pe
);
347 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(pe
);
348 for (; ndw
> 0; ndw
-= 2) {
349 ib
->ptr
[ib
->length_dw
++] = lower_32_bits(value
);
350 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(value
);
356 * si_dma_vm_set_pte_pde - update the page tables using sDMA
358 * @ib: indirect buffer to fill with commands
359 * @pe: addr of the page entry
360 * @addr: dst addr to write into pe
361 * @count: number of page entries to update
362 * @incr: increase next addr by incr bytes
363 * @flags: access flags
365 * Update the page tables using sDMA (CIK).
367 static void si_dma_vm_set_pte_pde(struct amdgpu_ib
*ib
,
369 uint64_t addr
, unsigned count
,
370 uint32_t incr
, uint64_t flags
)
380 if (flags
& AMDGPU_PTE_VALID
)
385 /* for physically contiguous pages (vram) */
386 ib
->ptr
[ib
->length_dw
++] = DMA_PTE_PDE_PACKET(ndw
);
387 ib
->ptr
[ib
->length_dw
++] = pe
; /* dst addr */
388 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(pe
) & 0xff;
389 ib
->ptr
[ib
->length_dw
++] = lower_32_bits(flags
); /* mask */
390 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(flags
);
391 ib
->ptr
[ib
->length_dw
++] = value
; /* value */
392 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(value
);
393 ib
->ptr
[ib
->length_dw
++] = incr
; /* increment size */
394 ib
->ptr
[ib
->length_dw
++] = 0;
396 addr
+= (ndw
/ 2) * incr
;
402 * si_dma_pad_ib - pad the IB to the required number of dw
404 * @ib: indirect buffer to fill with padding
407 static void si_dma_ring_pad_ib(struct amdgpu_ring
*ring
, struct amdgpu_ib
*ib
)
409 while (ib
->length_dw
& 0x7)
410 ib
->ptr
[ib
->length_dw
++] = DMA_PACKET(DMA_PACKET_NOP
, 0, 0, 0, 0);
414 * cik_sdma_ring_emit_pipeline_sync - sync the pipeline
416 * @ring: amdgpu_ring pointer
418 * Make sure all previous operations are completed (CIK).
420 static void si_dma_ring_emit_pipeline_sync(struct amdgpu_ring
*ring
)
422 uint32_t seq
= ring
->fence_drv
.sync_seq
;
423 uint64_t addr
= ring
->fence_drv
.gpu_addr
;
426 amdgpu_ring_write(ring
, DMA_PACKET(DMA_PACKET_POLL_REG_MEM
, 0, 0, 0, 0) |
427 (1 << 27)); /* Poll memory */
428 amdgpu_ring_write(ring
, lower_32_bits(addr
));
429 amdgpu_ring_write(ring
, (0xff << 16) | upper_32_bits(addr
)); /* retry, addr_hi */
430 amdgpu_ring_write(ring
, 0xffffffff); /* mask */
431 amdgpu_ring_write(ring
, seq
); /* value */
432 amdgpu_ring_write(ring
, (3 << 28) | 0x20); /* func(equal) | poll interval */
436 * si_dma_ring_emit_vm_flush - cik vm flush using sDMA
438 * @ring: amdgpu_ring pointer
439 * @vm: amdgpu_vm pointer
441 * Update the page table base and flush the VM TLB
444 static void si_dma_ring_emit_vm_flush(struct amdgpu_ring
*ring
,
445 unsigned vmid
, uint64_t pd_addr
)
447 amdgpu_gmc_emit_flush_gpu_tlb(ring
, vmid
, pd_addr
);
449 /* wait for invalidate to complete */
450 amdgpu_ring_write(ring
, DMA_PACKET(DMA_PACKET_POLL_REG_MEM
, 0, 0, 0, 0));
451 amdgpu_ring_write(ring
, VM_INVALIDATE_REQUEST
);
452 amdgpu_ring_write(ring
, 0xff << 16); /* retry */
453 amdgpu_ring_write(ring
, 1 << vmid
); /* mask */
454 amdgpu_ring_write(ring
, 0); /* value */
455 amdgpu_ring_write(ring
, (0 << 28) | 0x20); /* func(always) | poll interval */
458 static void si_dma_ring_emit_wreg(struct amdgpu_ring
*ring
,
459 uint32_t reg
, uint32_t val
)
461 amdgpu_ring_write(ring
, DMA_PACKET(DMA_PACKET_SRBM_WRITE
, 0, 0, 0, 0));
462 amdgpu_ring_write(ring
, (0xf << 16) | reg
);
463 amdgpu_ring_write(ring
, val
);
466 static int si_dma_early_init(void *handle
)
468 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
470 adev
->sdma
.num_instances
= 2;
472 si_dma_set_ring_funcs(adev
);
473 si_dma_set_buffer_funcs(adev
);
474 si_dma_set_vm_pte_funcs(adev
);
475 si_dma_set_irq_funcs(adev
);
480 static int si_dma_sw_init(void *handle
)
482 struct amdgpu_ring
*ring
;
484 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
486 /* DMA0 trap event */
487 r
= amdgpu_irq_add_id(adev
, AMDGPU_IRQ_CLIENTID_LEGACY
, 224,
488 &adev
->sdma
.trap_irq
);
492 /* DMA1 trap event */
493 r
= amdgpu_irq_add_id(adev
, AMDGPU_IRQ_CLIENTID_LEGACY
, 244,
494 &adev
->sdma
.trap_irq
);
498 for (i
= 0; i
< adev
->sdma
.num_instances
; i
++) {
499 ring
= &adev
->sdma
.instance
[i
].ring
;
500 ring
->ring_obj
= NULL
;
501 ring
->use_doorbell
= false;
502 sprintf(ring
->name
, "sdma%d", i
);
503 r
= amdgpu_ring_init(adev
, ring
, 1024,
504 &adev
->sdma
.trap_irq
,
506 AMDGPU_SDMA_IRQ_TRAP0
:
507 AMDGPU_SDMA_IRQ_TRAP1
);
515 static int si_dma_sw_fini(void *handle
)
517 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
520 for (i
= 0; i
< adev
->sdma
.num_instances
; i
++)
521 amdgpu_ring_fini(&adev
->sdma
.instance
[i
].ring
);
526 static int si_dma_hw_init(void *handle
)
528 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
530 return si_dma_start(adev
);
533 static int si_dma_hw_fini(void *handle
)
535 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
542 static int si_dma_suspend(void *handle
)
544 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
546 return si_dma_hw_fini(adev
);
549 static int si_dma_resume(void *handle
)
551 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
553 return si_dma_hw_init(adev
);
556 static bool si_dma_is_idle(void *handle
)
558 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
559 u32 tmp
= RREG32(SRBM_STATUS2
);
561 if (tmp
& (DMA_BUSY_MASK
| DMA1_BUSY_MASK
))
567 static int si_dma_wait_for_idle(void *handle
)
570 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
572 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
573 if (si_dma_is_idle(handle
))
580 static int si_dma_soft_reset(void *handle
)
582 DRM_INFO("si_dma_soft_reset --- not implemented !!!!!!!\n");
586 static int si_dma_set_trap_irq_state(struct amdgpu_device
*adev
,
587 struct amdgpu_irq_src
*src
,
589 enum amdgpu_interrupt_state state
)
594 case AMDGPU_SDMA_IRQ_TRAP0
:
596 case AMDGPU_IRQ_STATE_DISABLE
:
597 sdma_cntl
= RREG32(DMA_CNTL
+ DMA0_REGISTER_OFFSET
);
598 sdma_cntl
&= ~TRAP_ENABLE
;
599 WREG32(DMA_CNTL
+ DMA0_REGISTER_OFFSET
, sdma_cntl
);
601 case AMDGPU_IRQ_STATE_ENABLE
:
602 sdma_cntl
= RREG32(DMA_CNTL
+ DMA0_REGISTER_OFFSET
);
603 sdma_cntl
|= TRAP_ENABLE
;
604 WREG32(DMA_CNTL
+ DMA0_REGISTER_OFFSET
, sdma_cntl
);
610 case AMDGPU_SDMA_IRQ_TRAP1
:
612 case AMDGPU_IRQ_STATE_DISABLE
:
613 sdma_cntl
= RREG32(DMA_CNTL
+ DMA1_REGISTER_OFFSET
);
614 sdma_cntl
&= ~TRAP_ENABLE
;
615 WREG32(DMA_CNTL
+ DMA1_REGISTER_OFFSET
, sdma_cntl
);
617 case AMDGPU_IRQ_STATE_ENABLE
:
618 sdma_cntl
= RREG32(DMA_CNTL
+ DMA1_REGISTER_OFFSET
);
619 sdma_cntl
|= TRAP_ENABLE
;
620 WREG32(DMA_CNTL
+ DMA1_REGISTER_OFFSET
, sdma_cntl
);
632 static int si_dma_process_trap_irq(struct amdgpu_device
*adev
,
633 struct amdgpu_irq_src
*source
,
634 struct amdgpu_iv_entry
*entry
)
636 if (entry
->src_id
== 224)
637 amdgpu_fence_process(&adev
->sdma
.instance
[0].ring
);
639 amdgpu_fence_process(&adev
->sdma
.instance
[1].ring
);
643 static int si_dma_set_clockgating_state(void *handle
,
644 enum amd_clockgating_state state
)
646 u32 orig
, data
, offset
;
649 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
651 enable
= (state
== AMD_CG_STATE_GATE
) ? true : false;
653 if (enable
&& (adev
->cg_flags
& AMD_CG_SUPPORT_SDMA_MGCG
)) {
654 for (i
= 0; i
< adev
->sdma
.num_instances
; i
++) {
656 offset
= DMA0_REGISTER_OFFSET
;
658 offset
= DMA1_REGISTER_OFFSET
;
659 orig
= data
= RREG32(DMA_POWER_CNTL
+ offset
);
660 data
&= ~MEM_POWER_OVERRIDE
;
662 WREG32(DMA_POWER_CNTL
+ offset
, data
);
663 WREG32(DMA_CLK_CTRL
+ offset
, 0x00000100);
666 for (i
= 0; i
< adev
->sdma
.num_instances
; i
++) {
668 offset
= DMA0_REGISTER_OFFSET
;
670 offset
= DMA1_REGISTER_OFFSET
;
671 orig
= data
= RREG32(DMA_POWER_CNTL
+ offset
);
672 data
|= MEM_POWER_OVERRIDE
;
674 WREG32(DMA_POWER_CNTL
+ offset
, data
);
676 orig
= data
= RREG32(DMA_CLK_CTRL
+ offset
);
679 WREG32(DMA_CLK_CTRL
+ offset
, data
);
686 static int si_dma_set_powergating_state(void *handle
,
687 enum amd_powergating_state state
)
691 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
693 WREG32(DMA_PGFSM_WRITE
, 0x00002000);
694 WREG32(DMA_PGFSM_CONFIG
, 0x100010ff);
696 for (tmp
= 0; tmp
< 5; tmp
++)
697 WREG32(DMA_PGFSM_WRITE
, 0);
702 static const struct amd_ip_funcs si_dma_ip_funcs
= {
704 .early_init
= si_dma_early_init
,
706 .sw_init
= si_dma_sw_init
,
707 .sw_fini
= si_dma_sw_fini
,
708 .hw_init
= si_dma_hw_init
,
709 .hw_fini
= si_dma_hw_fini
,
710 .suspend
= si_dma_suspend
,
711 .resume
= si_dma_resume
,
712 .is_idle
= si_dma_is_idle
,
713 .wait_for_idle
= si_dma_wait_for_idle
,
714 .soft_reset
= si_dma_soft_reset
,
715 .set_clockgating_state
= si_dma_set_clockgating_state
,
716 .set_powergating_state
= si_dma_set_powergating_state
,
719 static const struct amdgpu_ring_funcs si_dma_ring_funcs
= {
720 .type
= AMDGPU_RING_TYPE_SDMA
,
722 .nop
= DMA_PACKET(DMA_PACKET_NOP
, 0, 0, 0, 0),
723 .support_64bit_ptrs
= false,
724 .get_rptr
= si_dma_ring_get_rptr
,
725 .get_wptr
= si_dma_ring_get_wptr
,
726 .set_wptr
= si_dma_ring_set_wptr
,
728 3 + 3 + /* hdp flush / invalidate */
729 6 + /* si_dma_ring_emit_pipeline_sync */
730 SI_FLUSH_GPU_TLB_NUM_WREG
* 3 + 6 + /* si_dma_ring_emit_vm_flush */
731 9 + 9 + 9, /* si_dma_ring_emit_fence x3 for user fence, vm fence */
732 .emit_ib_size
= 7 + 3, /* si_dma_ring_emit_ib */
733 .emit_ib
= si_dma_ring_emit_ib
,
734 .emit_fence
= si_dma_ring_emit_fence
,
735 .emit_pipeline_sync
= si_dma_ring_emit_pipeline_sync
,
736 .emit_vm_flush
= si_dma_ring_emit_vm_flush
,
737 .test_ring
= si_dma_ring_test_ring
,
738 .test_ib
= si_dma_ring_test_ib
,
739 .insert_nop
= amdgpu_ring_insert_nop
,
740 .pad_ib
= si_dma_ring_pad_ib
,
741 .emit_wreg
= si_dma_ring_emit_wreg
,
744 static void si_dma_set_ring_funcs(struct amdgpu_device
*adev
)
748 for (i
= 0; i
< adev
->sdma
.num_instances
; i
++)
749 adev
->sdma
.instance
[i
].ring
.funcs
= &si_dma_ring_funcs
;
752 static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs
= {
753 .set
= si_dma_set_trap_irq_state
,
754 .process
= si_dma_process_trap_irq
,
757 static void si_dma_set_irq_funcs(struct amdgpu_device
*adev
)
759 adev
->sdma
.trap_irq
.num_types
= AMDGPU_SDMA_IRQ_LAST
;
760 adev
->sdma
.trap_irq
.funcs
= &si_dma_trap_irq_funcs
;
764 * si_dma_emit_copy_buffer - copy buffer using the sDMA engine
766 * @ring: amdgpu_ring structure holding ring information
767 * @src_offset: src GPU address
768 * @dst_offset: dst GPU address
769 * @byte_count: number of bytes to xfer
771 * Copy GPU buffers using the DMA engine (VI).
772 * Used by the amdgpu ttm implementation to move pages if
773 * registered as the asic copy callback.
775 static void si_dma_emit_copy_buffer(struct amdgpu_ib
*ib
,
780 ib
->ptr
[ib
->length_dw
++] = DMA_PACKET(DMA_PACKET_COPY
,
781 1, 0, 0, byte_count
);
782 ib
->ptr
[ib
->length_dw
++] = lower_32_bits(dst_offset
);
783 ib
->ptr
[ib
->length_dw
++] = lower_32_bits(src_offset
);
784 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(dst_offset
) & 0xff;
785 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(src_offset
) & 0xff;
789 * si_dma_emit_fill_buffer - fill buffer using the sDMA engine
791 * @ring: amdgpu_ring structure holding ring information
792 * @src_data: value to write to buffer
793 * @dst_offset: dst GPU address
794 * @byte_count: number of bytes to xfer
796 * Fill GPU buffers using the DMA engine (VI).
798 static void si_dma_emit_fill_buffer(struct amdgpu_ib
*ib
,
803 ib
->ptr
[ib
->length_dw
++] = DMA_PACKET(DMA_PACKET_CONSTANT_FILL
,
804 0, 0, 0, byte_count
/ 4);
805 ib
->ptr
[ib
->length_dw
++] = lower_32_bits(dst_offset
);
806 ib
->ptr
[ib
->length_dw
++] = src_data
;
807 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(dst_offset
) << 16;
811 static const struct amdgpu_buffer_funcs si_dma_buffer_funcs
= {
812 .copy_max_bytes
= 0xffff8,
814 .emit_copy_buffer
= si_dma_emit_copy_buffer
,
816 .fill_max_bytes
= 0xffff8,
818 .emit_fill_buffer
= si_dma_emit_fill_buffer
,
821 static void si_dma_set_buffer_funcs(struct amdgpu_device
*adev
)
823 adev
->mman
.buffer_funcs
= &si_dma_buffer_funcs
;
824 adev
->mman
.buffer_funcs_ring
= &adev
->sdma
.instance
[0].ring
;
827 static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs
= {
828 .copy_pte_num_dw
= 5,
829 .copy_pte
= si_dma_vm_copy_pte
,
831 .write_pte
= si_dma_vm_write_pte
,
832 .set_pte_pde
= si_dma_vm_set_pte_pde
,
835 static void si_dma_set_vm_pte_funcs(struct amdgpu_device
*adev
)
837 struct drm_gpu_scheduler
*sched
;
840 adev
->vm_manager
.vm_pte_funcs
= &si_dma_vm_pte_funcs
;
841 for (i
= 0; i
< adev
->sdma
.num_instances
; i
++) {
842 sched
= &adev
->sdma
.instance
[i
].ring
.sched
;
843 adev
->vm_manager
.vm_pte_rqs
[i
] =
844 &sched
->sched_rq
[DRM_SCHED_PRIORITY_KERNEL
];
846 adev
->vm_manager
.vm_pte_num_rqs
= adev
->sdma
.num_instances
;
849 const struct amdgpu_ip_block_version si_dma_ip_block
=
851 .type
= AMD_IP_BLOCK_TYPE_SDMA
,
855 .funcs
= &si_dma_ip_funcs
,