2 * Copyright 2019 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/firmware.h>
24 #include <linux/pci.h>
26 #include "amdgpu_atomfirmware.h"
27 #include "gmc_v10_0.h"
29 #include "hdp/hdp_5_0_0_offset.h"
30 #include "hdp/hdp_5_0_0_sh_mask.h"
31 #include "gc/gc_10_1_0_sh_mask.h"
32 #include "mmhub/mmhub_2_0_0_sh_mask.h"
33 #include "athub/athub_2_0_0_sh_mask.h"
34 #include "athub/athub_2_0_0_offset.h"
35 #include "dcn/dcn_2_0_0_offset.h"
36 #include "dcn/dcn_2_0_0_sh_mask.h"
37 #include "oss/osssys_5_0_0_offset.h"
38 #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
39 #include "navi10_enum.h"
43 #include "soc15_common.h"
45 #include "nbio_v2_3.h"
47 #include "gfxhub_v2_0.h"
48 #include "mmhub_v2_0.h"
49 #include "athub_v2_0.h"
50 /* XXX Move this macro to navi10 header file, which is like vid.h for VI.*/
51 #define AMDGPU_NUM_OF_VMIDS 8
54 static const struct soc15_reg_golden golden_settings_navi10_hdp
[] =
56 /* TODO add golden setting for hdp */
61 gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device
*adev
,
62 struct amdgpu_irq_src
*src
, unsigned type
,
63 enum amdgpu_interrupt_state state
)
65 struct amdgpu_vmhub
*hub
;
66 u32 tmp
, reg
, bits
[AMDGPU_MAX_VMHUBS
], i
;
68 bits
[AMDGPU_GFXHUB_0
] = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK
|
69 GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK
|
70 GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK
|
71 GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK
|
72 GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK
|
73 GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK
|
74 GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK
;
76 bits
[AMDGPU_MMHUB_0
] = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK
|
77 MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK
|
78 MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK
|
79 MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK
|
80 MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK
|
81 MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK
|
82 MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK
;
85 case AMDGPU_IRQ_STATE_DISABLE
:
87 hub
= &adev
->vmhub
[AMDGPU_MMHUB_0
];
88 for (i
= 0; i
< 16; i
++) {
89 reg
= hub
->vm_context0_cntl
+ i
;
91 tmp
&= ~bits
[AMDGPU_MMHUB_0
];
96 hub
= &adev
->vmhub
[AMDGPU_GFXHUB_0
];
97 for (i
= 0; i
< 16; i
++) {
98 reg
= hub
->vm_context0_cntl
+ i
;
100 tmp
&= ~bits
[AMDGPU_GFXHUB_0
];
104 case AMDGPU_IRQ_STATE_ENABLE
:
106 hub
= &adev
->vmhub
[AMDGPU_MMHUB_0
];
107 for (i
= 0; i
< 16; i
++) {
108 reg
= hub
->vm_context0_cntl
+ i
;
110 tmp
|= bits
[AMDGPU_MMHUB_0
];
115 hub
= &adev
->vmhub
[AMDGPU_GFXHUB_0
];
116 for (i
= 0; i
< 16; i
++) {
117 reg
= hub
->vm_context0_cntl
+ i
;
119 tmp
|= bits
[AMDGPU_GFXHUB_0
];
130 static int gmc_v10_0_process_interrupt(struct amdgpu_device
*adev
,
131 struct amdgpu_irq_src
*source
,
132 struct amdgpu_iv_entry
*entry
)
134 struct amdgpu_vmhub
*hub
= &adev
->vmhub
[entry
->vmid_src
];
138 addr
= (u64
)entry
->src_data
[0] << 12;
139 addr
|= ((u64
)entry
->src_data
[1] & 0xf) << 44;
141 if (!amdgpu_sriov_vf(adev
)) {
143 * Issue a dummy read to wait for the status register to
144 * be updated to avoid reading an incorrect value due to
145 * the new fast GRBM interface.
147 if (entry
->vmid_src
== AMDGPU_GFXHUB_0
)
148 RREG32(hub
->vm_l2_pro_fault_status
);
150 status
= RREG32(hub
->vm_l2_pro_fault_status
);
151 WREG32_P(hub
->vm_l2_pro_fault_cntl
, 1, ~1);
154 if (printk_ratelimit()) {
155 struct amdgpu_task_info task_info
;
157 memset(&task_info
, 0, sizeof(struct amdgpu_task_info
));
158 amdgpu_vm_get_task_info(adev
, entry
->pasid
, &task_info
);
161 "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, "
162 "for process %s pid %d thread %s pid %d)\n",
163 entry
->vmid_src
? "mmhub" : "gfxhub",
164 entry
->src_id
, entry
->ring_id
, entry
->vmid
,
165 entry
->pasid
, task_info
.process_name
, task_info
.tgid
,
166 task_info
.task_name
, task_info
.pid
);
167 dev_err(adev
->dev
, " in page starting at address 0x%016llx from client %d\n",
168 addr
, entry
->client_id
);
169 if (!amdgpu_sriov_vf(adev
)) {
171 "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
173 dev_err(adev
->dev
, "\t MORE_FAULTS: 0x%lx\n",
174 REG_GET_FIELD(status
,
175 GCVM_L2_PROTECTION_FAULT_STATUS
, MORE_FAULTS
));
176 dev_err(adev
->dev
, "\t WALKER_ERROR: 0x%lx\n",
177 REG_GET_FIELD(status
,
178 GCVM_L2_PROTECTION_FAULT_STATUS
, WALKER_ERROR
));
179 dev_err(adev
->dev
, "\t PERMISSION_FAULTS: 0x%lx\n",
180 REG_GET_FIELD(status
,
181 GCVM_L2_PROTECTION_FAULT_STATUS
, PERMISSION_FAULTS
));
182 dev_err(adev
->dev
, "\t MAPPING_ERROR: 0x%lx\n",
183 REG_GET_FIELD(status
,
184 GCVM_L2_PROTECTION_FAULT_STATUS
, MAPPING_ERROR
));
185 dev_err(adev
->dev
, "\t RW: 0x%lx\n",
186 REG_GET_FIELD(status
,
187 GCVM_L2_PROTECTION_FAULT_STATUS
, RW
));
194 static const struct amdgpu_irq_src_funcs gmc_v10_0_irq_funcs
= {
195 .set
= gmc_v10_0_vm_fault_interrupt_state
,
196 .process
= gmc_v10_0_process_interrupt
,
199 static void gmc_v10_0_set_irq_funcs(struct amdgpu_device
*adev
)
201 adev
->gmc
.vm_fault
.num_types
= 1;
202 adev
->gmc
.vm_fault
.funcs
= &gmc_v10_0_irq_funcs
;
205 static uint32_t gmc_v10_0_get_invalidate_req(unsigned int vmid
,
210 /* invalidate using legacy mode on vmid*/
211 req
= REG_SET_FIELD(req
, GCVM_INVALIDATE_ENG0_REQ
,
212 PER_VMID_INVALIDATE_REQ
, 1 << vmid
);
213 req
= REG_SET_FIELD(req
, GCVM_INVALIDATE_ENG0_REQ
, FLUSH_TYPE
, flush_type
);
214 req
= REG_SET_FIELD(req
, GCVM_INVALIDATE_ENG0_REQ
, INVALIDATE_L2_PTES
, 1);
215 req
= REG_SET_FIELD(req
, GCVM_INVALIDATE_ENG0_REQ
, INVALIDATE_L2_PDE0
, 1);
216 req
= REG_SET_FIELD(req
, GCVM_INVALIDATE_ENG0_REQ
, INVALIDATE_L2_PDE1
, 1);
217 req
= REG_SET_FIELD(req
, GCVM_INVALIDATE_ENG0_REQ
, INVALIDATE_L2_PDE2
, 1);
218 req
= REG_SET_FIELD(req
, GCVM_INVALIDATE_ENG0_REQ
, INVALIDATE_L1_PTES
, 1);
219 req
= REG_SET_FIELD(req
, GCVM_INVALIDATE_ENG0_REQ
,
220 CLEAR_PROTECTION_FAULT_STATUS_ADDR
, 0);
226 * gmc_v10_0_use_invalidate_semaphore - judge whether to use semaphore
228 * @adev: amdgpu_device pointer
232 static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device
*adev
,
235 return ((vmhub
== AMDGPU_MMHUB_0
||
236 vmhub
== AMDGPU_MMHUB_1
) &&
237 (!amdgpu_sriov_vf(adev
)));
240 static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
241 struct amdgpu_device
*adev
,
242 uint8_t vmid
, uint16_t *p_pasid
)
246 value
= RREG32(SOC15_REG_OFFSET(ATHUB
, 0, mmATC_VMID0_PASID_MAPPING
)
248 *p_pasid
= value
& ATC_VMID0_PASID_MAPPING__PASID_MASK
;
250 return !!(value
& ATC_VMID0_PASID_MAPPING__VALID_MASK
);
255 * VMID 0 is the physical GPU addresses as used by the kernel.
256 * VMIDs 1-15 are used for userspace clients and are handled
257 * by the amdgpu vm/hsa code.
260 static void gmc_v10_0_flush_vm_hub(struct amdgpu_device
*adev
, uint32_t vmid
,
261 unsigned int vmhub
, uint32_t flush_type
)
263 bool use_semaphore
= gmc_v10_0_use_invalidate_semaphore(adev
, vmhub
);
264 struct amdgpu_vmhub
*hub
= &adev
->vmhub
[vmhub
];
265 u32 tmp
= gmc_v10_0_get_invalidate_req(vmid
, flush_type
);
266 /* Use register 17 for GART */
267 const unsigned eng
= 17;
270 spin_lock(&adev
->gmc
.invalidate_lock
);
272 * It may lose gpuvm invalidate acknowldege state across power-gating
273 * off cycle, add semaphore acquire before invalidation and semaphore
274 * release after invalidation to avoid entering power gated state
278 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
280 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
281 /* a read return value of 1 means semaphore acuqire */
282 tmp
= RREG32_NO_KIQ(hub
->vm_inv_eng0_sem
+ eng
);
288 if (i
>= adev
->usec_timeout
)
289 DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
292 WREG32_NO_KIQ(hub
->vm_inv_eng0_req
+ eng
, tmp
);
295 * Issue a dummy read to wait for the ACK register to be cleared
296 * to avoid a false ACK due to the new fast GRBM interface.
298 if (vmhub
== AMDGPU_GFXHUB_0
)
299 RREG32_NO_KIQ(hub
->vm_inv_eng0_req
+ eng
);
301 /* Wait for ACK with a delay.*/
302 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
303 tmp
= RREG32_NO_KIQ(hub
->vm_inv_eng0_ack
+ eng
);
311 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
314 * add semaphore release after invalidation,
315 * write with 0 means semaphore release
317 WREG32_NO_KIQ(hub
->vm_inv_eng0_sem
+ eng
, 0);
319 spin_unlock(&adev
->gmc
.invalidate_lock
);
321 if (i
< adev
->usec_timeout
)
324 DRM_ERROR("Timeout waiting for VM flush ACK!\n");
328 * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
330 * @adev: amdgpu_device pointer
331 * @vmid: vm instance to flush
333 * Flush the TLB for the requested page table.
335 static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device
*adev
, uint32_t vmid
,
336 uint32_t vmhub
, uint32_t flush_type
)
338 struct amdgpu_ring
*ring
= adev
->mman
.buffer_funcs_ring
;
339 struct dma_fence
*fence
;
340 struct amdgpu_job
*job
;
344 /* flush hdp cache */
345 adev
->nbio
.funcs
->hdp_flush(adev
, NULL
);
347 mutex_lock(&adev
->mman
.gtt_window_lock
);
349 if (vmhub
== AMDGPU_MMHUB_0
) {
350 gmc_v10_0_flush_vm_hub(adev
, vmid
, AMDGPU_MMHUB_0
, 0);
351 mutex_unlock(&adev
->mman
.gtt_window_lock
);
355 BUG_ON(vmhub
!= AMDGPU_GFXHUB_0
);
357 if (!adev
->mman
.buffer_funcs_enabled
||
358 !adev
->ib_pool_ready
||
359 adev
->in_gpu_reset
||
360 ring
->sched
.ready
== false) {
361 gmc_v10_0_flush_vm_hub(adev
, vmid
, AMDGPU_GFXHUB_0
, 0);
362 mutex_unlock(&adev
->mman
.gtt_window_lock
);
366 /* The SDMA on Navi has a bug which can theoretically result in memory
367 * corruption if an invalidation happens at the same time as an VA
368 * translation. Avoid this by doing the invalidation from the SDMA
371 r
= amdgpu_job_alloc_with_ib(adev
, 16 * 4, &job
);
375 job
->vm_pd_addr
= amdgpu_gmc_pd_addr(adev
->gart
.bo
);
376 job
->vm_needs_flush
= true;
377 job
->ibs
->ptr
[job
->ibs
->length_dw
++] = ring
->funcs
->nop
;
378 amdgpu_ring_pad_ib(ring
, &job
->ibs
[0]);
379 r
= amdgpu_job_submit(job
, &adev
->mman
.entity
,
380 AMDGPU_FENCE_OWNER_UNDEFINED
, &fence
);
384 mutex_unlock(&adev
->mman
.gtt_window_lock
);
386 dma_fence_wait(fence
, false);
387 dma_fence_put(fence
);
392 amdgpu_job_free(job
);
395 mutex_unlock(&adev
->mman
.gtt_window_lock
);
396 DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r
);
400 * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid
402 * @adev: amdgpu_device pointer
403 * @pasid: pasid to be flush
405 * Flush the TLB for the requested pasid.
407 static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device
*adev
,
408 uint16_t pasid
, uint32_t flush_type
,
414 uint16_t queried_pasid
;
416 struct amdgpu_ring
*ring
= &adev
->gfx
.kiq
.ring
;
417 struct amdgpu_kiq
*kiq
= &adev
->gfx
.kiq
;
419 if (amdgpu_emu_mode
== 0 && ring
->sched
.ready
) {
420 spin_lock(&adev
->gfx
.kiq
.ring_lock
);
421 amdgpu_ring_alloc(ring
, kiq
->pmf
->invalidate_tlbs_size
);
422 kiq
->pmf
->kiq_invalidate_tlbs(ring
,
423 pasid
, flush_type
, all_hub
);
424 amdgpu_fence_emit_polling(ring
, &seq
);
425 amdgpu_ring_commit(ring
);
426 spin_unlock(&adev
->gfx
.kiq
.ring_lock
);
427 r
= amdgpu_fence_wait_polling(ring
, seq
, adev
->usec_timeout
);
429 DRM_ERROR("wait for kiq fence error: %ld.\n", r
);
436 for (vmid
= 1; vmid
< 16; vmid
++) {
438 ret
= gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev
, vmid
,
440 if (ret
&& queried_pasid
== pasid
) {
442 for (i
= 0; i
< adev
->num_vmhubs
; i
++)
443 gmc_v10_0_flush_gpu_tlb(adev
, vmid
,
446 gmc_v10_0_flush_gpu_tlb(adev
, vmid
,
456 static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring
*ring
,
457 unsigned vmid
, uint64_t pd_addr
)
459 bool use_semaphore
= gmc_v10_0_use_invalidate_semaphore(ring
->adev
, ring
->funcs
->vmhub
);
460 struct amdgpu_vmhub
*hub
= &ring
->adev
->vmhub
[ring
->funcs
->vmhub
];
461 uint32_t req
= gmc_v10_0_get_invalidate_req(vmid
, 0);
462 unsigned eng
= ring
->vm_inv_eng
;
465 * It may lose gpuvm invalidate acknowldege state across power-gating
466 * off cycle, add semaphore acquire before invalidation and semaphore
467 * release after invalidation to avoid entering power gated state
471 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
473 /* a read return value of 1 means semaphore acuqire */
474 amdgpu_ring_emit_reg_wait(ring
,
475 hub
->vm_inv_eng0_sem
+ eng
, 0x1, 0x1);
477 amdgpu_ring_emit_wreg(ring
, hub
->ctx0_ptb_addr_lo32
+ (2 * vmid
),
478 lower_32_bits(pd_addr
));
480 amdgpu_ring_emit_wreg(ring
, hub
->ctx0_ptb_addr_hi32
+ (2 * vmid
),
481 upper_32_bits(pd_addr
));
483 amdgpu_ring_emit_reg_write_reg_wait(ring
, hub
->vm_inv_eng0_req
+ eng
,
484 hub
->vm_inv_eng0_ack
+ eng
,
487 /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
490 * add semaphore release after invalidation,
491 * write with 0 means semaphore release
493 amdgpu_ring_emit_wreg(ring
, hub
->vm_inv_eng0_sem
+ eng
, 0);
498 static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring
*ring
, unsigned vmid
,
501 struct amdgpu_device
*adev
= ring
->adev
;
504 if (ring
->funcs
->vmhub
== AMDGPU_GFXHUB_0
)
505 reg
= SOC15_REG_OFFSET(OSSSYS
, 0, mmIH_VMID_0_LUT
) + vmid
;
507 reg
= SOC15_REG_OFFSET(OSSSYS
, 0, mmIH_VMID_0_LUT_MM
) + vmid
;
509 amdgpu_ring_emit_wreg(ring
, reg
, pasid
);
513 * PTE format on NAVI 10:
522 * 47:12 4k physical page base address
532 * PDE format on NAVI 10:
533 * 63:59 block fragment size
537 * 47:6 physical base address of PD or PTE
544 static uint64_t gmc_v10_0_map_mtype(struct amdgpu_device
*adev
, uint32_t flags
)
547 case AMDGPU_VM_MTYPE_DEFAULT
:
548 return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC
);
549 case AMDGPU_VM_MTYPE_NC
:
550 return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC
);
551 case AMDGPU_VM_MTYPE_WC
:
552 return AMDGPU_PTE_MTYPE_NV10(MTYPE_WC
);
553 case AMDGPU_VM_MTYPE_CC
:
554 return AMDGPU_PTE_MTYPE_NV10(MTYPE_CC
);
555 case AMDGPU_VM_MTYPE_UC
:
556 return AMDGPU_PTE_MTYPE_NV10(MTYPE_UC
);
558 return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC
);
562 static void gmc_v10_0_get_vm_pde(struct amdgpu_device
*adev
, int level
,
563 uint64_t *addr
, uint64_t *flags
)
565 if (!(*flags
& AMDGPU_PDE_PTE
) && !(*flags
& AMDGPU_PTE_SYSTEM
))
566 *addr
= adev
->vm_manager
.vram_base_offset
+ *addr
-
567 adev
->gmc
.vram_start
;
568 BUG_ON(*addr
& 0xFFFF00000000003FULL
);
570 if (!adev
->gmc
.translate_further
)
573 if (level
== AMDGPU_VM_PDB1
) {
574 /* Set the block fragment size */
575 if (!(*flags
& AMDGPU_PDE_PTE
))
576 *flags
|= AMDGPU_PDE_BFS(0x9);
578 } else if (level
== AMDGPU_VM_PDB0
) {
579 if (*flags
& AMDGPU_PDE_PTE
)
580 *flags
&= ~AMDGPU_PDE_PTE
;
582 *flags
|= AMDGPU_PTE_TF
;
586 static void gmc_v10_0_get_vm_pte(struct amdgpu_device
*adev
,
587 struct amdgpu_bo_va_mapping
*mapping
,
590 *flags
&= ~AMDGPU_PTE_EXECUTABLE
;
591 *flags
|= mapping
->flags
& AMDGPU_PTE_EXECUTABLE
;
593 *flags
&= ~AMDGPU_PTE_MTYPE_NV10_MASK
;
594 *flags
|= (mapping
->flags
& AMDGPU_PTE_MTYPE_NV10_MASK
);
596 if (mapping
->flags
& AMDGPU_PTE_PRT
) {
597 *flags
|= AMDGPU_PTE_PRT
;
598 *flags
|= AMDGPU_PTE_SNOOPED
;
599 *flags
|= AMDGPU_PTE_LOG
;
600 *flags
|= AMDGPU_PTE_SYSTEM
;
601 *flags
&= ~AMDGPU_PTE_VALID
;
605 static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs
= {
606 .flush_gpu_tlb
= gmc_v10_0_flush_gpu_tlb
,
607 .flush_gpu_tlb_pasid
= gmc_v10_0_flush_gpu_tlb_pasid
,
608 .emit_flush_gpu_tlb
= gmc_v10_0_emit_flush_gpu_tlb
,
609 .emit_pasid_mapping
= gmc_v10_0_emit_pasid_mapping
,
610 .map_mtype
= gmc_v10_0_map_mtype
,
611 .get_vm_pde
= gmc_v10_0_get_vm_pde
,
612 .get_vm_pte
= gmc_v10_0_get_vm_pte
615 static void gmc_v10_0_set_gmc_funcs(struct amdgpu_device
*adev
)
617 if (adev
->gmc
.gmc_funcs
== NULL
)
618 adev
->gmc
.gmc_funcs
= &gmc_v10_0_gmc_funcs
;
621 static int gmc_v10_0_early_init(void *handle
)
623 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
625 gmc_v10_0_set_gmc_funcs(adev
);
626 gmc_v10_0_set_irq_funcs(adev
);
628 adev
->gmc
.shared_aperture_start
= 0x2000000000000000ULL
;
629 adev
->gmc
.shared_aperture_end
=
630 adev
->gmc
.shared_aperture_start
+ (4ULL << 30) - 1;
631 adev
->gmc
.private_aperture_start
= 0x1000000000000000ULL
;
632 adev
->gmc
.private_aperture_end
=
633 adev
->gmc
.private_aperture_start
+ (4ULL << 30) - 1;
638 static int gmc_v10_0_late_init(void *handle
)
640 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
644 * Can't free the stolen VGA memory when it might be used for memory
647 if (!adev
->fw_vram_usage
.mem_train_support
)
648 amdgpu_bo_late_init(adev
);
650 r
= amdgpu_gmc_allocate_vm_inv_eng(adev
);
654 return amdgpu_irq_get(adev
, &adev
->gmc
.vm_fault
, 0);
657 static void gmc_v10_0_vram_gtt_location(struct amdgpu_device
*adev
,
658 struct amdgpu_gmc
*mc
)
662 base
= gfxhub_v2_0_get_fb_location(adev
);
664 amdgpu_gmc_vram_location(adev
, &adev
->gmc
, base
);
665 amdgpu_gmc_gart_location(adev
, mc
);
667 /* base offset of vram pages */
668 adev
->vm_manager
.vram_base_offset
= gfxhub_v2_0_get_mc_fb_offset(adev
);
672 * gmc_v10_0_mc_init - initialize the memory controller driver params
674 * @adev: amdgpu_device pointer
676 * Look up the amount of vram, vram width, and decide how to place
677 * vram and gart within the GPU's physical address space.
678 * Returns 0 for success.
680 static int gmc_v10_0_mc_init(struct amdgpu_device
*adev
)
682 /* Could aper size report 0 ? */
683 adev
->gmc
.aper_base
= pci_resource_start(adev
->pdev
, 0);
684 adev
->gmc
.aper_size
= pci_resource_len(adev
->pdev
, 0);
686 /* size in MB on si */
687 adev
->gmc
.mc_vram_size
=
688 adev
->nbio
.funcs
->get_memsize(adev
) * 1024ULL * 1024ULL;
689 adev
->gmc
.real_vram_size
= adev
->gmc
.mc_vram_size
;
690 adev
->gmc
.visible_vram_size
= adev
->gmc
.aper_size
;
692 /* In case the PCI BAR is larger than the actual amount of vram */
693 if (adev
->gmc
.visible_vram_size
> adev
->gmc
.real_vram_size
)
694 adev
->gmc
.visible_vram_size
= adev
->gmc
.real_vram_size
;
696 /* set the gart size */
697 if (amdgpu_gart_size
== -1) {
698 switch (adev
->asic_type
) {
703 adev
->gmc
.gart_size
= 512ULL << 20;
707 adev
->gmc
.gart_size
= (u64
)amdgpu_gart_size
<< 20;
709 gmc_v10_0_vram_gtt_location(adev
, &adev
->gmc
);
714 static int gmc_v10_0_gart_init(struct amdgpu_device
*adev
)
719 WARN(1, "NAVI10 PCIE GART already initialized\n");
723 /* Initialize common gart structure */
724 r
= amdgpu_gart_init(adev
);
728 adev
->gart
.table_size
= adev
->gart
.num_gpu_pages
* 8;
729 adev
->gart
.gart_pte_flags
= AMDGPU_PTE_MTYPE_NV10(MTYPE_UC
) |
730 AMDGPU_PTE_EXECUTABLE
;
732 return amdgpu_gart_table_vram_alloc(adev
);
735 static unsigned gmc_v10_0_get_vbios_fb_size(struct amdgpu_device
*adev
)
737 u32 d1vga_control
= RREG32_SOC15(DCE
, 0, mmD1VGA_CONTROL
);
740 if (REG_GET_FIELD(d1vga_control
, D1VGA_CONTROL
, D1VGA_MODE_ENABLE
)) {
741 size
= 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
746 viewport
= RREG32_SOC15(DCE
, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION
);
747 pitch
= RREG32_SOC15(DCE
, 0, mmHUBPREQ0_DCSURF_SURFACE_PITCH
);
748 size
= (REG_GET_FIELD(viewport
,
749 HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION
, PRI_VIEWPORT_HEIGHT
) *
750 REG_GET_FIELD(pitch
, HUBPREQ0_DCSURF_SURFACE_PITCH
, PITCH
) *
753 /* return 0 if the pre-OS buffer uses up most of vram */
754 if ((adev
->gmc
.real_vram_size
- size
) < (8 * 1024 * 1024)) {
755 DRM_ERROR("Warning: pre-OS buffer uses most of vram, \
756 be aware of gart table overwrite\n");
765 static int gmc_v10_0_sw_init(void *handle
)
767 int r
, vram_width
= 0, vram_type
= 0, vram_vendor
= 0;
768 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
770 gfxhub_v2_0_init(adev
);
771 mmhub_v2_0_init(adev
);
773 spin_lock_init(&adev
->gmc
.invalidate_lock
);
775 r
= amdgpu_atomfirmware_get_vram_info(adev
,
776 &vram_width
, &vram_type
, &vram_vendor
);
777 if (!amdgpu_emu_mode
)
778 adev
->gmc
.vram_width
= vram_width
;
780 adev
->gmc
.vram_width
= 1 * 128; /* numchan * chansize */
782 adev
->gmc
.vram_type
= vram_type
;
783 adev
->gmc
.vram_vendor
= vram_vendor
;
784 switch (adev
->asic_type
) {
788 adev
->num_vmhubs
= 2;
790 * To fulfill 4-level page support,
791 * vm size is 256TB (48bit), maximum size of Navi10/Navi14/Navi12,
792 * block size 512 (9bit)
794 amdgpu_vm_adjust_size(adev
, 256 * 1024, 9, 3, 48);
800 /* This interrupt is VMC page fault.*/
801 r
= amdgpu_irq_add_id(adev
, SOC15_IH_CLIENTID_VMC
,
802 VMC_1_0__SRCID__VM_FAULT
,
803 &adev
->gmc
.vm_fault
);
808 r
= amdgpu_irq_add_id(adev
, SOC15_IH_CLIENTID_UTCL2
,
809 UTCL2_1_0__SRCID__FAULT
,
810 &adev
->gmc
.vm_fault
);
815 * Set the internal MC address mask This is the max address of the GPU's
816 * internal address space.
818 adev
->gmc
.mc_mask
= 0xffffffffffffULL
; /* 48 bit MC */
820 r
= dma_set_mask_and_coherent(adev
->dev
, DMA_BIT_MASK(44));
822 printk(KERN_WARNING
"amdgpu: No suitable DMA available.\n");
826 r
= gmc_v10_0_mc_init(adev
);
830 adev
->gmc
.stolen_size
= gmc_v10_0_get_vbios_fb_size(adev
);
833 * In dual GPUs scenario, stolen_size is assigned to zero on the
834 * secondary GPU, since there is no pre-OS console using that memory.
835 * Then the bottom region of VRAM was allocated as GTT, unfortunately a
836 * small region of bottom VRAM was encroached by UMC firmware during
837 * GDDR6 BIST training, this cause page fault.
838 * The page fault can be fixed by forcing stolen_size to 3MB, then the
839 * bottom region of VRAM was allocated as stolen memory, GTT corruption
842 adev
->gmc
.stolen_size
= max(adev
->gmc
.stolen_size
,
843 AMDGPU_STOLEN_BIST_TRAINING_DEFAULT_SIZE
);
846 r
= amdgpu_bo_init(adev
);
850 r
= gmc_v10_0_gart_init(adev
);
856 * VMID 0 is reserved for System
857 * amdgpu graphics/compute will use VMIDs 1-7
858 * amdkfd will use VMIDs 8-15
860 adev
->vm_manager
.id_mgr
[AMDGPU_GFXHUB_0
].num_ids
= AMDGPU_NUM_OF_VMIDS
;
861 adev
->vm_manager
.id_mgr
[AMDGPU_MMHUB_0
].num_ids
= AMDGPU_NUM_OF_VMIDS
;
863 amdgpu_vm_manager_init(adev
);
869 * gmc_v8_0_gart_fini - vm fini callback
871 * @adev: amdgpu_device pointer
873 * Tears down the driver GART/VM setup (CIK).
875 static void gmc_v10_0_gart_fini(struct amdgpu_device
*adev
)
877 amdgpu_gart_table_vram_free(adev
);
878 amdgpu_gart_fini(adev
);
881 static int gmc_v10_0_sw_fini(void *handle
)
883 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
884 void *stolen_vga_buf
;
887 * Free the stolen memory if it wasn't already freed in late_init
888 * because of memory training.
890 amdgpu_bo_free_kernel(&adev
->stolen_vga_memory
, NULL
, &stolen_vga_buf
);
892 amdgpu_vm_manager_fini(adev
);
893 gmc_v10_0_gart_fini(adev
);
894 amdgpu_gem_force_release(adev
);
895 amdgpu_bo_fini(adev
);
900 static void gmc_v10_0_init_golden_registers(struct amdgpu_device
*adev
)
902 switch (adev
->asic_type
) {
913 * gmc_v10_0_gart_enable - gart enable
915 * @adev: amdgpu_device pointer
917 static int gmc_v10_0_gart_enable(struct amdgpu_device
*adev
)
923 if (adev
->gart
.bo
== NULL
) {
924 dev_err(adev
->dev
, "No VRAM object for PCIE GART.\n");
928 r
= amdgpu_gart_table_vram_pin(adev
);
932 r
= gfxhub_v2_0_gart_enable(adev
);
936 r
= mmhub_v2_0_gart_enable(adev
);
940 tmp
= RREG32_SOC15(HDP
, 0, mmHDP_MISC_CNTL
);
941 tmp
|= HDP_MISC_CNTL__FLUSH_INVALIDATE_CACHE_MASK
;
942 WREG32_SOC15(HDP
, 0, mmHDP_MISC_CNTL
, tmp
);
944 tmp
= RREG32_SOC15(HDP
, 0, mmHDP_HOST_PATH_CNTL
);
945 WREG32_SOC15(HDP
, 0, mmHDP_HOST_PATH_CNTL
, tmp
);
947 /* Flush HDP after it is initialized */
948 adev
->nbio
.funcs
->hdp_flush(adev
, NULL
);
950 value
= (amdgpu_vm_fault_stop
== AMDGPU_VM_FAULT_STOP_ALWAYS
) ?
953 gfxhub_v2_0_set_fault_enable_default(adev
, value
);
954 mmhub_v2_0_set_fault_enable_default(adev
, value
);
955 gmc_v10_0_flush_gpu_tlb(adev
, 0, AMDGPU_MMHUB_0
, 0);
956 gmc_v10_0_flush_gpu_tlb(adev
, 0, AMDGPU_GFXHUB_0
, 0);
958 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
959 (unsigned)(adev
->gmc
.gart_size
>> 20),
960 (unsigned long long)amdgpu_bo_gpu_offset(adev
->gart
.bo
));
962 adev
->gart
.ready
= true;
967 static int gmc_v10_0_hw_init(void *handle
)
970 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
972 /* The sequence of these two function calls matters.*/
973 gmc_v10_0_init_golden_registers(adev
);
975 r
= gmc_v10_0_gart_enable(adev
);
983 * gmc_v10_0_gart_disable - gart disable
985 * @adev: amdgpu_device pointer
987 * This disables all VM page table.
989 static void gmc_v10_0_gart_disable(struct amdgpu_device
*adev
)
991 gfxhub_v2_0_gart_disable(adev
);
992 mmhub_v2_0_gart_disable(adev
);
993 amdgpu_gart_table_vram_unpin(adev
);
996 static int gmc_v10_0_hw_fini(void *handle
)
998 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1000 if (amdgpu_sriov_vf(adev
)) {
1001 /* full access mode, so don't touch any GMC register */
1002 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
1006 amdgpu_irq_put(adev
, &adev
->gmc
.vm_fault
, 0);
1007 gmc_v10_0_gart_disable(adev
);
1012 static int gmc_v10_0_suspend(void *handle
)
1014 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1016 gmc_v10_0_hw_fini(adev
);
1021 static int gmc_v10_0_resume(void *handle
)
1024 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1026 r
= gmc_v10_0_hw_init(adev
);
1030 amdgpu_vmid_reset_all(adev
);
1035 static bool gmc_v10_0_is_idle(void *handle
)
1037 /* MC is always ready in GMC v10.*/
1041 static int gmc_v10_0_wait_for_idle(void *handle
)
1043 /* There is no need to wait for MC idle in GMC v10.*/
1047 static int gmc_v10_0_soft_reset(void *handle
)
1052 static int gmc_v10_0_set_clockgating_state(void *handle
,
1053 enum amd_clockgating_state state
)
1056 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1058 r
= mmhub_v2_0_set_clockgating(adev
, state
);
1062 return athub_v2_0_set_clockgating(adev
, state
);
1065 static void gmc_v10_0_get_clockgating_state(void *handle
, u32
*flags
)
1067 struct amdgpu_device
*adev
= (struct amdgpu_device
*)handle
;
1069 mmhub_v2_0_get_clockgating(adev
, flags
);
1071 athub_v2_0_get_clockgating(adev
, flags
);
1074 static int gmc_v10_0_set_powergating_state(void *handle
,
1075 enum amd_powergating_state state
)
1080 const struct amd_ip_funcs gmc_v10_0_ip_funcs
= {
1081 .name
= "gmc_v10_0",
1082 .early_init
= gmc_v10_0_early_init
,
1083 .late_init
= gmc_v10_0_late_init
,
1084 .sw_init
= gmc_v10_0_sw_init
,
1085 .sw_fini
= gmc_v10_0_sw_fini
,
1086 .hw_init
= gmc_v10_0_hw_init
,
1087 .hw_fini
= gmc_v10_0_hw_fini
,
1088 .suspend
= gmc_v10_0_suspend
,
1089 .resume
= gmc_v10_0_resume
,
1090 .is_idle
= gmc_v10_0_is_idle
,
1091 .wait_for_idle
= gmc_v10_0_wait_for_idle
,
1092 .soft_reset
= gmc_v10_0_soft_reset
,
1093 .set_clockgating_state
= gmc_v10_0_set_clockgating_state
,
1094 .set_powergating_state
= gmc_v10_0_set_powergating_state
,
1095 .get_clockgating_state
= gmc_v10_0_get_clockgating_state
,
1098 const struct amdgpu_ip_block_version gmc_v10_0_ip_block
=
1100 .type
= AMD_IP_BLOCK_TYPE_GMC
,
1104 .funcs
= &gmc_v10_0_ip_funcs
,