1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2019 HabanaLabs, Ltd.
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_0.h"
11 #include "../include/goya/asic_reg/goya_masks.h"
12 #include "../include/goya/goya_reg_map.h"
14 #include <linux/pci.h>
15 #include <linux/hwmon.h>
16 #include <linux/iommu.h>
17 #include <linux/seq_file.h>
20 * GOYA security scheme:
22 * 1. Host is protected by:
23 * - Range registers (When MMU is enabled, DMA RR does NOT protect host)
26 * 2. DRAM is protected by:
27 * - Range registers (protect the first 512MB)
28 * - MMU (isolation between users)
30 * 3. Configuration is protected by:
34 * When MMU is disabled:
36 * QMAN DMA: PQ, CQ, CP, DMA are secured.
37 * PQ, CB and the data are on the host.
40 * PQ, CQ and CP are not secured.
41 * PQ, CB and the data are on the SRAM/DRAM.
43 * Since QMAN DMA is secured, the driver is parsing the DMA CB:
44 * - checks DMA pointer
45 * - WREG, MSG_PROT are not allowed.
46 * - MSG_LONG/SHORT are allowed.
48 * A read/write transaction by the QMAN to a protected area will succeed if
49 * and only if the QMAN's CP is secured and MSG_PROT is used
52 * When MMU is enabled:
54 * QMAN DMA: PQ, CQ and CP are secured.
55 * MMU is set to bypass on the Secure props register of the QMAN.
56 * The reasons we don't enable MMU for PQ, CQ and CP are:
57 * - PQ entry is in kernel address space and the driver doesn't map it.
58 * - CP writes to MSIX register and to kernel address space (completion
61 * DMA is not secured but because CP is secured, the driver still needs to parse
62 * the CB, but doesn't need to check the DMA addresses.
64 * For QMAN DMA 0, DMA is also secured because only the driver uses this DMA and
65 * the driver doesn't map memory in MMU.
67 * QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode)
69 * DMA RR does NOT protect host because DMA is not secured
73 #define GOYA_BOOT_FIT_FILE "habanalabs/goya/goya-boot-fit.itb"
74 #define GOYA_LINUX_FW_FILE "habanalabs/goya/goya-fit.itb"
76 #define GOYA_MMU_REGS_NUM 63
78 #define GOYA_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
80 #define GOYA_RESET_TIMEOUT_MSEC 500 /* 500ms */
81 #define GOYA_PLDM_RESET_TIMEOUT_MSEC 20000 /* 20s */
82 #define GOYA_RESET_WAIT_MSEC 1 /* 1ms */
83 #define GOYA_CPU_RESET_WAIT_MSEC 100 /* 100ms */
84 #define GOYA_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
85 #define GOYA_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
86 #define GOYA_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
87 #define GOYA_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
88 #define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
89 #define GOYA_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
91 #define GOYA_QMAN0_FENCE_VAL 0xD169B243
93 #define GOYA_MAX_STRING_LEN 20
95 #define GOYA_CB_POOL_CB_CNT 512
96 #define GOYA_CB_POOL_CB_SIZE 0x20000 /* 128KB */
98 #define IS_QM_IDLE(engine, qm_glbl_sts0) \
99 (((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK)
100 #define IS_DMA_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(DMA, qm_glbl_sts0)
101 #define IS_TPC_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(TPC, qm_glbl_sts0)
102 #define IS_MME_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(MME, qm_glbl_sts0)
104 #define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \
105 (((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \
106 engine##_CMDQ_IDLE_MASK)
107 #define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \
108 IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0)
109 #define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \
110 IS_CMDQ_IDLE(MME, cmdq_glbl_sts0)
112 #define IS_DMA_IDLE(dma_core_sts0) \
113 !((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK)
115 #define IS_TPC_IDLE(tpc_cfg_sts) \
116 (((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK)
118 #define IS_MME_IDLE(mme_arch_sts) \
119 (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
122 static const char goya_irq_name
[GOYA_MSIX_ENTRIES
][GOYA_MAX_STRING_LEN
] = {
123 "goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
124 "goya cq 4", "goya cpu eq"
127 static u16 goya_packet_sizes
[MAX_PACKET_ID
] = {
128 [PACKET_WREG_32
] = sizeof(struct packet_wreg32
),
129 [PACKET_WREG_BULK
] = sizeof(struct packet_wreg_bulk
),
130 [PACKET_MSG_LONG
] = sizeof(struct packet_msg_long
),
131 [PACKET_MSG_SHORT
] = sizeof(struct packet_msg_short
),
132 [PACKET_CP_DMA
] = sizeof(struct packet_cp_dma
),
133 [PACKET_MSG_PROT
] = sizeof(struct packet_msg_prot
),
134 [PACKET_FENCE
] = sizeof(struct packet_fence
),
135 [PACKET_LIN_DMA
] = sizeof(struct packet_lin_dma
),
136 [PACKET_NOP
] = sizeof(struct packet_nop
),
137 [PACKET_STOP
] = sizeof(struct packet_stop
)
140 static inline bool validate_packet_id(enum packet_id id
)
144 case PACKET_WREG_BULK
:
145 case PACKET_MSG_LONG
:
146 case PACKET_MSG_SHORT
:
148 case PACKET_MSG_PROT
:
159 static u64 goya_mmu_regs
[GOYA_MMU_REGS_NUM
] = {
160 mmDMA_QM_0_GLBL_NON_SECURE_PROPS
,
161 mmDMA_QM_1_GLBL_NON_SECURE_PROPS
,
162 mmDMA_QM_2_GLBL_NON_SECURE_PROPS
,
163 mmDMA_QM_3_GLBL_NON_SECURE_PROPS
,
164 mmDMA_QM_4_GLBL_NON_SECURE_PROPS
,
165 mmTPC0_QM_GLBL_SECURE_PROPS
,
166 mmTPC0_QM_GLBL_NON_SECURE_PROPS
,
167 mmTPC0_CMDQ_GLBL_SECURE_PROPS
,
168 mmTPC0_CMDQ_GLBL_NON_SECURE_PROPS
,
171 mmTPC1_QM_GLBL_SECURE_PROPS
,
172 mmTPC1_QM_GLBL_NON_SECURE_PROPS
,
173 mmTPC1_CMDQ_GLBL_SECURE_PROPS
,
174 mmTPC1_CMDQ_GLBL_NON_SECURE_PROPS
,
177 mmTPC2_QM_GLBL_SECURE_PROPS
,
178 mmTPC2_QM_GLBL_NON_SECURE_PROPS
,
179 mmTPC2_CMDQ_GLBL_SECURE_PROPS
,
180 mmTPC2_CMDQ_GLBL_NON_SECURE_PROPS
,
183 mmTPC3_QM_GLBL_SECURE_PROPS
,
184 mmTPC3_QM_GLBL_NON_SECURE_PROPS
,
185 mmTPC3_CMDQ_GLBL_SECURE_PROPS
,
186 mmTPC3_CMDQ_GLBL_NON_SECURE_PROPS
,
189 mmTPC4_QM_GLBL_SECURE_PROPS
,
190 mmTPC4_QM_GLBL_NON_SECURE_PROPS
,
191 mmTPC4_CMDQ_GLBL_SECURE_PROPS
,
192 mmTPC4_CMDQ_GLBL_NON_SECURE_PROPS
,
195 mmTPC5_QM_GLBL_SECURE_PROPS
,
196 mmTPC5_QM_GLBL_NON_SECURE_PROPS
,
197 mmTPC5_CMDQ_GLBL_SECURE_PROPS
,
198 mmTPC5_CMDQ_GLBL_NON_SECURE_PROPS
,
201 mmTPC6_QM_GLBL_SECURE_PROPS
,
202 mmTPC6_QM_GLBL_NON_SECURE_PROPS
,
203 mmTPC6_CMDQ_GLBL_SECURE_PROPS
,
204 mmTPC6_CMDQ_GLBL_NON_SECURE_PROPS
,
207 mmTPC7_QM_GLBL_SECURE_PROPS
,
208 mmTPC7_QM_GLBL_NON_SECURE_PROPS
,
209 mmTPC7_CMDQ_GLBL_SECURE_PROPS
,
210 mmTPC7_CMDQ_GLBL_NON_SECURE_PROPS
,
213 mmMME_QM_GLBL_SECURE_PROPS
,
214 mmMME_QM_GLBL_NON_SECURE_PROPS
,
215 mmMME_CMDQ_GLBL_SECURE_PROPS
,
216 mmMME_CMDQ_GLBL_NON_SECURE_PROPS
,
217 mmMME_SBA_CONTROL_DATA
,
218 mmMME_SBB_CONTROL_DATA
,
219 mmMME_SBC_CONTROL_DATA
,
220 mmMME_WBC_CONTROL_DATA
,
221 mmPCIE_WRAP_PSOC_ARUSER
,
222 mmPCIE_WRAP_PSOC_AWUSER
225 static u32 goya_all_events
[] = {
226 GOYA_ASYNC_EVENT_ID_PCIE_IF
,
227 GOYA_ASYNC_EVENT_ID_TPC0_ECC
,
228 GOYA_ASYNC_EVENT_ID_TPC1_ECC
,
229 GOYA_ASYNC_EVENT_ID_TPC2_ECC
,
230 GOYA_ASYNC_EVENT_ID_TPC3_ECC
,
231 GOYA_ASYNC_EVENT_ID_TPC4_ECC
,
232 GOYA_ASYNC_EVENT_ID_TPC5_ECC
,
233 GOYA_ASYNC_EVENT_ID_TPC6_ECC
,
234 GOYA_ASYNC_EVENT_ID_TPC7_ECC
,
235 GOYA_ASYNC_EVENT_ID_MME_ECC
,
236 GOYA_ASYNC_EVENT_ID_MME_ECC_EXT
,
237 GOYA_ASYNC_EVENT_ID_MMU_ECC
,
238 GOYA_ASYNC_EVENT_ID_DMA_MACRO
,
239 GOYA_ASYNC_EVENT_ID_DMA_ECC
,
240 GOYA_ASYNC_EVENT_ID_CPU_IF_ECC
,
241 GOYA_ASYNC_EVENT_ID_PSOC_MEM
,
242 GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT
,
243 GOYA_ASYNC_EVENT_ID_SRAM0
,
244 GOYA_ASYNC_EVENT_ID_SRAM1
,
245 GOYA_ASYNC_EVENT_ID_SRAM2
,
246 GOYA_ASYNC_EVENT_ID_SRAM3
,
247 GOYA_ASYNC_EVENT_ID_SRAM4
,
248 GOYA_ASYNC_EVENT_ID_SRAM5
,
249 GOYA_ASYNC_EVENT_ID_SRAM6
,
250 GOYA_ASYNC_EVENT_ID_SRAM7
,
251 GOYA_ASYNC_EVENT_ID_SRAM8
,
252 GOYA_ASYNC_EVENT_ID_SRAM9
,
253 GOYA_ASYNC_EVENT_ID_SRAM10
,
254 GOYA_ASYNC_EVENT_ID_SRAM11
,
255 GOYA_ASYNC_EVENT_ID_SRAM12
,
256 GOYA_ASYNC_EVENT_ID_SRAM13
,
257 GOYA_ASYNC_EVENT_ID_SRAM14
,
258 GOYA_ASYNC_EVENT_ID_SRAM15
,
259 GOYA_ASYNC_EVENT_ID_SRAM16
,
260 GOYA_ASYNC_EVENT_ID_SRAM17
,
261 GOYA_ASYNC_EVENT_ID_SRAM18
,
262 GOYA_ASYNC_EVENT_ID_SRAM19
,
263 GOYA_ASYNC_EVENT_ID_SRAM20
,
264 GOYA_ASYNC_EVENT_ID_SRAM21
,
265 GOYA_ASYNC_EVENT_ID_SRAM22
,
266 GOYA_ASYNC_EVENT_ID_SRAM23
,
267 GOYA_ASYNC_EVENT_ID_SRAM24
,
268 GOYA_ASYNC_EVENT_ID_SRAM25
,
269 GOYA_ASYNC_EVENT_ID_SRAM26
,
270 GOYA_ASYNC_EVENT_ID_SRAM27
,
271 GOYA_ASYNC_EVENT_ID_SRAM28
,
272 GOYA_ASYNC_EVENT_ID_SRAM29
,
273 GOYA_ASYNC_EVENT_ID_GIC500
,
274 GOYA_ASYNC_EVENT_ID_PLL0
,
275 GOYA_ASYNC_EVENT_ID_PLL1
,
276 GOYA_ASYNC_EVENT_ID_PLL3
,
277 GOYA_ASYNC_EVENT_ID_PLL4
,
278 GOYA_ASYNC_EVENT_ID_PLL5
,
279 GOYA_ASYNC_EVENT_ID_PLL6
,
280 GOYA_ASYNC_EVENT_ID_AXI_ECC
,
281 GOYA_ASYNC_EVENT_ID_L2_RAM_ECC
,
282 GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET
,
283 GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT
,
284 GOYA_ASYNC_EVENT_ID_PCIE_DEC
,
285 GOYA_ASYNC_EVENT_ID_TPC0_DEC
,
286 GOYA_ASYNC_EVENT_ID_TPC1_DEC
,
287 GOYA_ASYNC_EVENT_ID_TPC2_DEC
,
288 GOYA_ASYNC_EVENT_ID_TPC3_DEC
,
289 GOYA_ASYNC_EVENT_ID_TPC4_DEC
,
290 GOYA_ASYNC_EVENT_ID_TPC5_DEC
,
291 GOYA_ASYNC_EVENT_ID_TPC6_DEC
,
292 GOYA_ASYNC_EVENT_ID_TPC7_DEC
,
293 GOYA_ASYNC_EVENT_ID_MME_WACS
,
294 GOYA_ASYNC_EVENT_ID_MME_WACSD
,
295 GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER
,
296 GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC
,
297 GOYA_ASYNC_EVENT_ID_PSOC
,
298 GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR
,
299 GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR
,
300 GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR
,
301 GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR
,
302 GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR
,
303 GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR
,
304 GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR
,
305 GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR
,
306 GOYA_ASYNC_EVENT_ID_TPC0_CMDQ
,
307 GOYA_ASYNC_EVENT_ID_TPC1_CMDQ
,
308 GOYA_ASYNC_EVENT_ID_TPC2_CMDQ
,
309 GOYA_ASYNC_EVENT_ID_TPC3_CMDQ
,
310 GOYA_ASYNC_EVENT_ID_TPC4_CMDQ
,
311 GOYA_ASYNC_EVENT_ID_TPC5_CMDQ
,
312 GOYA_ASYNC_EVENT_ID_TPC6_CMDQ
,
313 GOYA_ASYNC_EVENT_ID_TPC7_CMDQ
,
314 GOYA_ASYNC_EVENT_ID_TPC0_QM
,
315 GOYA_ASYNC_EVENT_ID_TPC1_QM
,
316 GOYA_ASYNC_EVENT_ID_TPC2_QM
,
317 GOYA_ASYNC_EVENT_ID_TPC3_QM
,
318 GOYA_ASYNC_EVENT_ID_TPC4_QM
,
319 GOYA_ASYNC_EVENT_ID_TPC5_QM
,
320 GOYA_ASYNC_EVENT_ID_TPC6_QM
,
321 GOYA_ASYNC_EVENT_ID_TPC7_QM
,
322 GOYA_ASYNC_EVENT_ID_MME_QM
,
323 GOYA_ASYNC_EVENT_ID_MME_CMDQ
,
324 GOYA_ASYNC_EVENT_ID_DMA0_QM
,
325 GOYA_ASYNC_EVENT_ID_DMA1_QM
,
326 GOYA_ASYNC_EVENT_ID_DMA2_QM
,
327 GOYA_ASYNC_EVENT_ID_DMA3_QM
,
328 GOYA_ASYNC_EVENT_ID_DMA4_QM
,
329 GOYA_ASYNC_EVENT_ID_DMA0_CH
,
330 GOYA_ASYNC_EVENT_ID_DMA1_CH
,
331 GOYA_ASYNC_EVENT_ID_DMA2_CH
,
332 GOYA_ASYNC_EVENT_ID_DMA3_CH
,
333 GOYA_ASYNC_EVENT_ID_DMA4_CH
,
334 GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU
,
335 GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU
,
336 GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU
,
337 GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU
,
338 GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU
,
339 GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU
,
340 GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU
,
341 GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU
,
342 GOYA_ASYNC_EVENT_ID_DMA_BM_CH0
,
343 GOYA_ASYNC_EVENT_ID_DMA_BM_CH1
,
344 GOYA_ASYNC_EVENT_ID_DMA_BM_CH2
,
345 GOYA_ASYNC_EVENT_ID_DMA_BM_CH3
,
346 GOYA_ASYNC_EVENT_ID_DMA_BM_CH4
,
347 GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S
,
348 GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E
,
349 GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S
,
350 GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
353 static int goya_mmu_clear_pgt_range(struct hl_device
*hdev
);
354 static int goya_mmu_set_dram_default_page(struct hl_device
*hdev
);
355 static int goya_mmu_add_mappings_for_device_cpu(struct hl_device
*hdev
);
356 static void goya_mmu_prepare(struct hl_device
*hdev
, u32 asid
);
358 int goya_get_fixed_properties(struct hl_device
*hdev
)
360 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
363 prop
->max_queues
= GOYA_QUEUE_ID_SIZE
;
364 prop
->hw_queues_props
= kcalloc(prop
->max_queues
,
365 sizeof(struct hw_queue_properties
),
368 if (!prop
->hw_queues_props
)
371 for (i
= 0 ; i
< NUMBER_OF_EXT_HW_QUEUES
; i
++) {
372 prop
->hw_queues_props
[i
].type
= QUEUE_TYPE_EXT
;
373 prop
->hw_queues_props
[i
].driver_only
= 0;
374 prop
->hw_queues_props
[i
].cb_alloc_flags
= CB_ALLOC_KERNEL
;
377 for (; i
< NUMBER_OF_EXT_HW_QUEUES
+ NUMBER_OF_CPU_HW_QUEUES
; i
++) {
378 prop
->hw_queues_props
[i
].type
= QUEUE_TYPE_CPU
;
379 prop
->hw_queues_props
[i
].driver_only
= 1;
380 prop
->hw_queues_props
[i
].cb_alloc_flags
= CB_ALLOC_KERNEL
;
383 for (; i
< NUMBER_OF_EXT_HW_QUEUES
+ NUMBER_OF_CPU_HW_QUEUES
+
384 NUMBER_OF_INT_HW_QUEUES
; i
++) {
385 prop
->hw_queues_props
[i
].type
= QUEUE_TYPE_INT
;
386 prop
->hw_queues_props
[i
].driver_only
= 0;
387 prop
->hw_queues_props
[i
].cb_alloc_flags
= CB_ALLOC_USER
;
390 prop
->completion_queues_count
= NUMBER_OF_CMPLT_QUEUES
;
392 prop
->dram_base_address
= DRAM_PHYS_BASE
;
393 prop
->dram_size
= DRAM_PHYS_DEFAULT_SIZE
;
394 prop
->dram_end_address
= prop
->dram_base_address
+ prop
->dram_size
;
395 prop
->dram_user_base_address
= DRAM_BASE_ADDR_USER
;
397 prop
->sram_base_address
= SRAM_BASE_ADDR
;
398 prop
->sram_size
= SRAM_SIZE
;
399 prop
->sram_end_address
= prop
->sram_base_address
+ prop
->sram_size
;
400 prop
->sram_user_base_address
= prop
->sram_base_address
+
401 SRAM_USER_BASE_OFFSET
;
403 prop
->mmu_pgt_addr
= MMU_PAGE_TABLES_ADDR
;
404 prop
->mmu_dram_default_page_addr
= MMU_DRAM_DEFAULT_PAGE_ADDR
;
406 prop
->mmu_pgt_size
= 0x800000; /* 8MB */
408 prop
->mmu_pgt_size
= MMU_PAGE_TABLES_SIZE
;
409 prop
->mmu_pte_size
= HL_PTE_SIZE
;
410 prop
->mmu_hop_table_size
= HOP_TABLE_SIZE
;
411 prop
->mmu_hop0_tables_total_size
= HOP0_TABLES_TOTAL_SIZE
;
412 prop
->dram_page_size
= PAGE_SIZE_2MB
;
413 prop
->dram_supports_virtual_memory
= true;
415 prop
->dmmu
.hop0_shift
= HOP0_SHIFT
;
416 prop
->dmmu
.hop1_shift
= HOP1_SHIFT
;
417 prop
->dmmu
.hop2_shift
= HOP2_SHIFT
;
418 prop
->dmmu
.hop3_shift
= HOP3_SHIFT
;
419 prop
->dmmu
.hop4_shift
= HOP4_SHIFT
;
420 prop
->dmmu
.hop0_mask
= HOP0_MASK
;
421 prop
->dmmu
.hop1_mask
= HOP1_MASK
;
422 prop
->dmmu
.hop2_mask
= HOP2_MASK
;
423 prop
->dmmu
.hop3_mask
= HOP3_MASK
;
424 prop
->dmmu
.hop4_mask
= HOP4_MASK
;
425 prop
->dmmu
.start_addr
= VA_DDR_SPACE_START
;
426 prop
->dmmu
.end_addr
= VA_DDR_SPACE_END
;
427 prop
->dmmu
.page_size
= PAGE_SIZE_2MB
;
428 prop
->dmmu
.num_hops
= MMU_ARCH_5_HOPS
;
430 /* shifts and masks are the same in PMMU and DMMU */
431 memcpy(&prop
->pmmu
, &prop
->dmmu
, sizeof(prop
->dmmu
));
432 prop
->pmmu
.start_addr
= VA_HOST_SPACE_START
;
433 prop
->pmmu
.end_addr
= VA_HOST_SPACE_END
;
434 prop
->pmmu
.page_size
= PAGE_SIZE_4KB
;
435 prop
->pmmu
.num_hops
= MMU_ARCH_5_HOPS
;
437 /* PMMU and HPMMU are the same except of page size */
438 memcpy(&prop
->pmmu_huge
, &prop
->pmmu
, sizeof(prop
->pmmu
));
439 prop
->pmmu_huge
.page_size
= PAGE_SIZE_2MB
;
441 prop
->dram_size_for_default_page_mapping
= VA_DDR_SPACE_END
;
442 prop
->cfg_size
= CFG_SIZE
;
443 prop
->max_asid
= MAX_ASID
;
444 prop
->num_of_events
= GOYA_ASYNC_EVENT_ID_SIZE
;
445 prop
->high_pll
= PLL_HIGH_DEFAULT
;
446 prop
->cb_pool_cb_cnt
= GOYA_CB_POOL_CB_CNT
;
447 prop
->cb_pool_cb_size
= GOYA_CB_POOL_CB_SIZE
;
448 prop
->max_power_default
= MAX_POWER_DEFAULT
;
449 prop
->tpc_enabled_mask
= TPC_ENABLED_MASK
;
450 prop
->pcie_dbi_base_address
= mmPCIE_DBI_BASE
;
451 prop
->pcie_aux_dbi_reg_addr
= CFG_BASE
+ mmPCIE_AUX_DBI
;
453 strncpy(prop
->cpucp_info
.card_name
, GOYA_DEFAULT_CARD_NAME
,
456 prop
->max_pending_cs
= GOYA_MAX_PENDING_CS
;
458 /* disable fw security for now, set it in a later stage */
459 prop
->fw_security_disabled
= true;
460 prop
->fw_security_status_valid
= false;
461 prop
->hard_reset_done_by_fw
= false;
467 * goya_pci_bars_map - Map PCI BARS of Goya device
469 * @hdev: pointer to hl_device structure
471 * Request PCI regions and map them to kernel virtual addresses.
472 * Returns 0 on success
475 static int goya_pci_bars_map(struct hl_device
*hdev
)
477 static const char * const name
[] = {"SRAM_CFG", "MSIX", "DDR"};
478 bool is_wc
[3] = {false, false, true};
481 rc
= hl_pci_bars_map(hdev
, name
, is_wc
);
485 hdev
->rmmio
= hdev
->pcie_bar
[SRAM_CFG_BAR_ID
] +
486 (CFG_BASE
- SRAM_BASE_ADDR
);
491 static u64
goya_set_ddr_bar_base(struct hl_device
*hdev
, u64 addr
)
493 struct goya_device
*goya
= hdev
->asic_specific
;
494 struct hl_inbound_pci_region pci_region
;
498 if ((goya
) && (goya
->ddr_bar_cur_addr
== addr
))
501 /* Inbound Region 1 - Bar 4 - Point to DDR */
502 pci_region
.mode
= PCI_BAR_MATCH_MODE
;
503 pci_region
.bar
= DDR_BAR_ID
;
504 pci_region
.addr
= addr
;
505 rc
= hl_pci_set_inbound_region(hdev
, 1, &pci_region
);
510 old_addr
= goya
->ddr_bar_cur_addr
;
511 goya
->ddr_bar_cur_addr
= addr
;
518 * goya_init_iatu - Initialize the iATU unit inside the PCI controller
520 * @hdev: pointer to hl_device structure
522 * This is needed in case the firmware doesn't initialize the iATU
525 static int goya_init_iatu(struct hl_device
*hdev
)
527 struct hl_inbound_pci_region inbound_region
;
528 struct hl_outbound_pci_region outbound_region
;
531 /* Inbound Region 0 - Bar 0 - Point to SRAM and CFG */
532 inbound_region
.mode
= PCI_BAR_MATCH_MODE
;
533 inbound_region
.bar
= SRAM_CFG_BAR_ID
;
534 inbound_region
.addr
= SRAM_BASE_ADDR
;
535 rc
= hl_pci_set_inbound_region(hdev
, 0, &inbound_region
);
539 /* Inbound Region 1 - Bar 4 - Point to DDR */
540 inbound_region
.mode
= PCI_BAR_MATCH_MODE
;
541 inbound_region
.bar
= DDR_BAR_ID
;
542 inbound_region
.addr
= DRAM_PHYS_BASE
;
543 rc
= hl_pci_set_inbound_region(hdev
, 1, &inbound_region
);
547 hdev
->asic_funcs
->set_dma_mask_from_fw(hdev
);
549 /* Outbound Region 0 - Point to Host */
550 outbound_region
.addr
= HOST_PHYS_BASE
;
551 outbound_region
.size
= HOST_PHYS_SIZE
;
552 rc
= hl_pci_set_outbound_region(hdev
, &outbound_region
);
558 static enum hl_device_hw_state
goya_get_hw_state(struct hl_device
*hdev
)
560 return RREG32(mmHW_STATE
);
564 * goya_early_init - GOYA early initialization code
566 * @hdev: pointer to hl_device structure
570 * PCI controller initialization
574 static int goya_early_init(struct hl_device
*hdev
)
576 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
577 struct pci_dev
*pdev
= hdev
->pdev
;
581 rc
= goya_get_fixed_properties(hdev
);
583 dev_err(hdev
->dev
, "Failed to get fixed properties\n");
587 /* Check BAR sizes */
588 if (pci_resource_len(pdev
, SRAM_CFG_BAR_ID
) != CFG_BAR_SIZE
) {
590 "Not " HL_NAME
"? BAR %d size %llu, expecting %llu\n",
592 (unsigned long long) pci_resource_len(pdev
,
596 goto free_queue_props
;
599 if (pci_resource_len(pdev
, MSIX_BAR_ID
) != MSIX_BAR_SIZE
) {
601 "Not " HL_NAME
"? BAR %d size %llu, expecting %llu\n",
603 (unsigned long long) pci_resource_len(pdev
,
607 goto free_queue_props
;
610 prop
->dram_pci_bar_size
= pci_resource_len(pdev
, DDR_BAR_ID
);
612 rc
= hl_pci_init(hdev
);
614 goto free_queue_props
;
616 /* Before continuing in the initialization, we need to read the preboot
617 * version to determine whether we run with a security-enabled firmware
619 rc
= hl_fw_read_preboot_status(hdev
, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS
,
620 mmCPU_BOOT_DEV_STS0
, mmCPU_BOOT_ERR0
,
621 GOYA_BOOT_FIT_REQ_TIMEOUT_USEC
);
623 if (hdev
->reset_on_preboot_fail
)
624 hdev
->asic_funcs
->hw_fini(hdev
, true);
628 if (goya_get_hw_state(hdev
) == HL_DEVICE_HW_STATE_DIRTY
) {
630 "H/W state is dirty, must reset before initializing\n");
631 hdev
->asic_funcs
->hw_fini(hdev
, true);
635 val
= RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS
);
636 if (val
& PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK
)
638 "PCI strap is not configured correctly, PCI bus errors may occur\n");
646 kfree(hdev
->asic_prop
.hw_queues_props
);
651 * goya_early_fini - GOYA early finalization code
653 * @hdev: pointer to hl_device structure
658 static int goya_early_fini(struct hl_device
*hdev
)
660 kfree(hdev
->asic_prop
.hw_queues_props
);
666 static void goya_mmu_prepare_reg(struct hl_device
*hdev
, u64 reg
, u32 asid
)
668 /* mask to zero the MMBP and ASID bits */
669 WREG32_AND(reg
, ~0x7FF);
670 WREG32_OR(reg
, asid
);
673 static void goya_qman0_set_security(struct hl_device
*hdev
, bool secure
)
675 struct goya_device
*goya
= hdev
->asic_specific
;
677 if (!(goya
->hw_cap_initialized
& HW_CAP_MMU
))
681 WREG32(mmDMA_QM_0_GLBL_PROT
, QMAN_DMA_FULLY_TRUSTED
);
683 WREG32(mmDMA_QM_0_GLBL_PROT
, QMAN_DMA_PARTLY_TRUSTED
);
685 RREG32(mmDMA_QM_0_GLBL_PROT
);
689 * goya_fetch_psoc_frequency - Fetch PSOC frequency values
691 * @hdev: pointer to hl_device structure
694 static void goya_fetch_psoc_frequency(struct hl_device
*hdev
)
696 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
697 u32 nr
= 0, nf
= 0, od
= 0, div_fctr
= 0, pll_clk
, div_sel
;
698 u16 pll_freq_arr
[HL_PLL_NUM_OUTPUTS
], freq
;
701 if (hdev
->asic_prop
.fw_security_disabled
) {
702 div_fctr
= RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1
);
703 div_sel
= RREG32(mmPSOC_PCI_PLL_DIV_SEL_1
);
704 nr
= RREG32(mmPSOC_PCI_PLL_NR
);
705 nf
= RREG32(mmPSOC_PCI_PLL_NF
);
706 od
= RREG32(mmPSOC_PCI_PLL_OD
);
708 if (div_sel
== DIV_SEL_REF_CLK
||
709 div_sel
== DIV_SEL_DIVIDED_REF
) {
710 if (div_sel
== DIV_SEL_REF_CLK
)
713 freq
= PLL_REF_CLK
/ (div_fctr
+ 1);
714 } else if (div_sel
== DIV_SEL_PLL_CLK
||
715 div_sel
== DIV_SEL_DIVIDED_PLL
) {
716 pll_clk
= PLL_REF_CLK
* (nf
+ 1) /
717 ((nr
+ 1) * (od
+ 1));
718 if (div_sel
== DIV_SEL_PLL_CLK
)
721 freq
= pll_clk
/ (div_fctr
+ 1);
724 "Received invalid div select value: %d",
729 rc
= hl_fw_cpucp_pll_info_get(hdev
, PCI_PLL
, pll_freq_arr
);
734 freq
= pll_freq_arr
[1];
737 prop
->psoc_timestamp_frequency
= freq
;
738 prop
->psoc_pci_pll_nr
= nr
;
739 prop
->psoc_pci_pll_nf
= nf
;
740 prop
->psoc_pci_pll_od
= od
;
741 prop
->psoc_pci_pll_div_factor
= div_fctr
;
744 int goya_late_init(struct hl_device
*hdev
)
746 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
749 goya_fetch_psoc_frequency(hdev
);
751 rc
= goya_mmu_clear_pgt_range(hdev
);
754 "Failed to clear MMU page tables range %d\n", rc
);
758 rc
= goya_mmu_set_dram_default_page(hdev
);
760 dev_err(hdev
->dev
, "Failed to set DRAM default page %d\n", rc
);
764 rc
= goya_mmu_add_mappings_for_device_cpu(hdev
);
768 rc
= goya_init_cpu_queues(hdev
);
772 rc
= goya_test_cpu_queue(hdev
);
776 rc
= goya_cpucp_info_get(hdev
);
778 dev_err(hdev
->dev
, "Failed to get cpucp info %d\n", rc
);
782 /* Now that we have the DRAM size in ASIC prop, we need to check
783 * its size and configure the DMA_IF DDR wrap protection (which is in
784 * the MMU block) accordingly. The value is the log2 of the DRAM size
786 WREG32(mmMMU_LOG2_DDR_SIZE
, ilog2(prop
->dram_size
));
788 rc
= hl_fw_send_pci_access_msg(hdev
, CPUCP_PACKET_ENABLE_PCI_ACCESS
);
791 "Failed to enable PCI access from CPU %d\n", rc
);
795 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
,
796 GOYA_ASYNC_EVENT_ID_INTS_REGISTER
);
802 * goya_late_fini - GOYA late tear-down code
804 * @hdev: pointer to hl_device structure
806 * Free sensors allocated structures
808 void goya_late_fini(struct hl_device
*hdev
)
810 const struct hwmon_channel_info
**channel_info_arr
;
813 if (!hdev
->hl_chip_info
->info
)
816 channel_info_arr
= hdev
->hl_chip_info
->info
;
818 while (channel_info_arr
[i
]) {
819 kfree(channel_info_arr
[i
]->config
);
820 kfree(channel_info_arr
[i
]);
824 kfree(channel_info_arr
);
826 hdev
->hl_chip_info
->info
= NULL
;
830 * goya_sw_init - Goya software initialization code
832 * @hdev: pointer to hl_device structure
835 static int goya_sw_init(struct hl_device
*hdev
)
837 struct goya_device
*goya
;
840 /* Allocate device structure */
841 goya
= kzalloc(sizeof(*goya
), GFP_KERNEL
);
845 /* according to goya_init_iatu */
846 goya
->ddr_bar_cur_addr
= DRAM_PHYS_BASE
;
848 goya
->mme_clk
= GOYA_PLL_FREQ_LOW
;
849 goya
->tpc_clk
= GOYA_PLL_FREQ_LOW
;
850 goya
->ic_clk
= GOYA_PLL_FREQ_LOW
;
852 hdev
->asic_specific
= goya
;
854 /* Create DMA pool for small allocations */
855 hdev
->dma_pool
= dma_pool_create(dev_name(hdev
->dev
),
856 &hdev
->pdev
->dev
, GOYA_DMA_POOL_BLK_SIZE
, 8, 0);
857 if (!hdev
->dma_pool
) {
858 dev_err(hdev
->dev
, "failed to create DMA pool\n");
860 goto free_goya_device
;
863 hdev
->cpu_accessible_dma_mem
=
864 hdev
->asic_funcs
->asic_dma_alloc_coherent(hdev
,
865 HL_CPU_ACCESSIBLE_MEM_SIZE
,
866 &hdev
->cpu_accessible_dma_address
,
867 GFP_KERNEL
| __GFP_ZERO
);
869 if (!hdev
->cpu_accessible_dma_mem
) {
874 dev_dbg(hdev
->dev
, "cpu accessible memory at bus address %pad\n",
875 &hdev
->cpu_accessible_dma_address
);
877 hdev
->cpu_accessible_dma_pool
= gen_pool_create(ilog2(32), -1);
878 if (!hdev
->cpu_accessible_dma_pool
) {
880 "Failed to create CPU accessible DMA pool\n");
882 goto free_cpu_dma_mem
;
885 rc
= gen_pool_add(hdev
->cpu_accessible_dma_pool
,
886 (uintptr_t) hdev
->cpu_accessible_dma_mem
,
887 HL_CPU_ACCESSIBLE_MEM_SIZE
, -1);
890 "Failed to add memory to CPU accessible DMA pool\n");
892 goto free_cpu_accessible_dma_pool
;
895 spin_lock_init(&goya
->hw_queues_lock
);
896 hdev
->supports_coresight
= true;
897 hdev
->supports_soft_reset
= true;
901 free_cpu_accessible_dma_pool
:
902 gen_pool_destroy(hdev
->cpu_accessible_dma_pool
);
904 hdev
->asic_funcs
->asic_dma_free_coherent(hdev
,
905 HL_CPU_ACCESSIBLE_MEM_SIZE
,
906 hdev
->cpu_accessible_dma_mem
,
907 hdev
->cpu_accessible_dma_address
);
909 dma_pool_destroy(hdev
->dma_pool
);
917 * goya_sw_fini - Goya software tear-down code
919 * @hdev: pointer to hl_device structure
922 static int goya_sw_fini(struct hl_device
*hdev
)
924 struct goya_device
*goya
= hdev
->asic_specific
;
926 gen_pool_destroy(hdev
->cpu_accessible_dma_pool
);
928 hdev
->asic_funcs
->asic_dma_free_coherent(hdev
,
929 HL_CPU_ACCESSIBLE_MEM_SIZE
,
930 hdev
->cpu_accessible_dma_mem
,
931 hdev
->cpu_accessible_dma_address
);
933 dma_pool_destroy(hdev
->dma_pool
);
940 static void goya_init_dma_qman(struct hl_device
*hdev
, int dma_id
,
941 dma_addr_t bus_address
)
943 struct goya_device
*goya
= hdev
->asic_specific
;
944 u32 mtr_base_lo
, mtr_base_hi
;
945 u32 so_base_lo
, so_base_hi
;
946 u32 gic_base_lo
, gic_base_hi
;
947 u32 reg_off
= dma_id
* (mmDMA_QM_1_PQ_PI
- mmDMA_QM_0_PQ_PI
);
948 u32 dma_err_cfg
= QMAN_DMA_ERR_MSG_EN
;
950 mtr_base_lo
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_MON_PAY_ADDRL_0
);
951 mtr_base_hi
= upper_32_bits(CFG_BASE
+ mmSYNC_MNGR_MON_PAY_ADDRL_0
);
952 so_base_lo
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
953 so_base_hi
= upper_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
956 lower_32_bits(CFG_BASE
+ mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
);
958 upper_32_bits(CFG_BASE
+ mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
);
960 WREG32(mmDMA_QM_0_PQ_BASE_LO
+ reg_off
, lower_32_bits(bus_address
));
961 WREG32(mmDMA_QM_0_PQ_BASE_HI
+ reg_off
, upper_32_bits(bus_address
));
963 WREG32(mmDMA_QM_0_PQ_SIZE
+ reg_off
, ilog2(HL_QUEUE_LENGTH
));
964 WREG32(mmDMA_QM_0_PQ_PI
+ reg_off
, 0);
965 WREG32(mmDMA_QM_0_PQ_CI
+ reg_off
, 0);
967 WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_LO
+ reg_off
, mtr_base_lo
);
968 WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_HI
+ reg_off
, mtr_base_hi
);
969 WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_LO
+ reg_off
, so_base_lo
);
970 WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_HI
+ reg_off
, so_base_hi
);
971 WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_LO
+ reg_off
, gic_base_lo
);
972 WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_HI
+ reg_off
, gic_base_hi
);
973 WREG32(mmDMA_QM_0_GLBL_ERR_WDATA
+ reg_off
,
974 GOYA_ASYNC_EVENT_ID_DMA0_QM
+ dma_id
);
976 /* PQ has buffer of 2 cache lines, while CQ has 8 lines */
977 WREG32(mmDMA_QM_0_PQ_CFG1
+ reg_off
, 0x00020002);
978 WREG32(mmDMA_QM_0_CQ_CFG1
+ reg_off
, 0x00080008);
980 if (goya
->hw_cap_initialized
& HW_CAP_MMU
)
981 WREG32(mmDMA_QM_0_GLBL_PROT
+ reg_off
, QMAN_DMA_PARTLY_TRUSTED
);
983 WREG32(mmDMA_QM_0_GLBL_PROT
+ reg_off
, QMAN_DMA_FULLY_TRUSTED
);
985 if (hdev
->stop_on_err
)
986 dma_err_cfg
|= 1 << DMA_QM_0_GLBL_ERR_CFG_DMA_STOP_ON_ERR_SHIFT
;
988 WREG32(mmDMA_QM_0_GLBL_ERR_CFG
+ reg_off
, dma_err_cfg
);
989 WREG32(mmDMA_QM_0_GLBL_CFG0
+ reg_off
, QMAN_DMA_ENABLE
);
992 static void goya_init_dma_ch(struct hl_device
*hdev
, int dma_id
)
994 u32 gic_base_lo
, gic_base_hi
;
996 u32 reg_off
= dma_id
* (mmDMA_CH_1_CFG1
- mmDMA_CH_0_CFG1
);
999 lower_32_bits(CFG_BASE
+ mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
);
1001 upper_32_bits(CFG_BASE
+ mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
);
1003 WREG32(mmDMA_CH_0_ERRMSG_ADDR_LO
+ reg_off
, gic_base_lo
);
1004 WREG32(mmDMA_CH_0_ERRMSG_ADDR_HI
+ reg_off
, gic_base_hi
);
1005 WREG32(mmDMA_CH_0_ERRMSG_WDATA
+ reg_off
,
1006 GOYA_ASYNC_EVENT_ID_DMA0_CH
+ dma_id
);
1009 sob_addr
= CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_1000
+
1012 sob_addr
= CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_1007
;
1014 WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI
+ reg_off
, upper_32_bits(sob_addr
));
1015 WREG32(mmDMA_CH_0_WR_COMP_WDATA
+ reg_off
, 0x80000001);
1019 * goya_init_dma_qmans - Initialize QMAN DMA registers
1021 * @hdev: pointer to hl_device structure
1023 * Initialize the H/W registers of the QMAN DMA channels
1026 void goya_init_dma_qmans(struct hl_device
*hdev
)
1028 struct goya_device
*goya
= hdev
->asic_specific
;
1029 struct hl_hw_queue
*q
;
1032 if (goya
->hw_cap_initialized
& HW_CAP_DMA
)
1035 q
= &hdev
->kernel_queues
[0];
1037 for (i
= 0 ; i
< NUMBER_OF_EXT_HW_QUEUES
; i
++, q
++) {
1038 q
->cq_id
= q
->msi_vec
= i
;
1039 goya_init_dma_qman(hdev
, i
, q
->bus_address
);
1040 goya_init_dma_ch(hdev
, i
);
1043 goya
->hw_cap_initialized
|= HW_CAP_DMA
;
1047 * goya_disable_external_queues - Disable external queues
1049 * @hdev: pointer to hl_device structure
1052 static void goya_disable_external_queues(struct hl_device
*hdev
)
1054 struct goya_device
*goya
= hdev
->asic_specific
;
1056 if (!(goya
->hw_cap_initialized
& HW_CAP_DMA
))
1059 WREG32(mmDMA_QM_0_GLBL_CFG0
, 0);
1060 WREG32(mmDMA_QM_1_GLBL_CFG0
, 0);
1061 WREG32(mmDMA_QM_2_GLBL_CFG0
, 0);
1062 WREG32(mmDMA_QM_3_GLBL_CFG0
, 0);
1063 WREG32(mmDMA_QM_4_GLBL_CFG0
, 0);
1066 static int goya_stop_queue(struct hl_device
*hdev
, u32 cfg_reg
,
1067 u32 cp_sts_reg
, u32 glbl_sts0_reg
)
1072 /* use the values of TPC0 as they are all the same*/
1074 WREG32(cfg_reg
, 1 << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
1076 status
= RREG32(cp_sts_reg
);
1077 if (status
& TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK
) {
1078 rc
= hl_poll_timeout(
1082 !(status
& TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK
),
1084 QMAN_FENCE_TIMEOUT_USEC
);
1086 /* if QMAN is stuck in fence no need to check for stop */
1091 rc
= hl_poll_timeout(
1095 (status
& TPC0_QM_GLBL_STS0_CP_IS_STOP_MASK
),
1097 QMAN_STOP_TIMEOUT_USEC
);
1101 "Timeout while waiting for QMAN to stop\n");
1109 * goya_stop_external_queues - Stop external queues
1111 * @hdev: pointer to hl_device structure
1113 * Returns 0 on success
1116 static int goya_stop_external_queues(struct hl_device
*hdev
)
1120 struct goya_device
*goya
= hdev
->asic_specific
;
1122 if (!(goya
->hw_cap_initialized
& HW_CAP_DMA
))
1125 rc
= goya_stop_queue(hdev
,
1126 mmDMA_QM_0_GLBL_CFG1
,
1128 mmDMA_QM_0_GLBL_STS0
);
1131 dev_err(hdev
->dev
, "failed to stop DMA QMAN 0\n");
1135 rc
= goya_stop_queue(hdev
,
1136 mmDMA_QM_1_GLBL_CFG1
,
1138 mmDMA_QM_1_GLBL_STS0
);
1141 dev_err(hdev
->dev
, "failed to stop DMA QMAN 1\n");
1145 rc
= goya_stop_queue(hdev
,
1146 mmDMA_QM_2_GLBL_CFG1
,
1148 mmDMA_QM_2_GLBL_STS0
);
1151 dev_err(hdev
->dev
, "failed to stop DMA QMAN 2\n");
1155 rc
= goya_stop_queue(hdev
,
1156 mmDMA_QM_3_GLBL_CFG1
,
1158 mmDMA_QM_3_GLBL_STS0
);
1161 dev_err(hdev
->dev
, "failed to stop DMA QMAN 3\n");
1165 rc
= goya_stop_queue(hdev
,
1166 mmDMA_QM_4_GLBL_CFG1
,
1168 mmDMA_QM_4_GLBL_STS0
);
1171 dev_err(hdev
->dev
, "failed to stop DMA QMAN 4\n");
1179 * goya_init_cpu_queues - Initialize PQ/CQ/EQ of CPU
1181 * @hdev: pointer to hl_device structure
1183 * Returns 0 on success
1186 int goya_init_cpu_queues(struct hl_device
*hdev
)
1188 struct goya_device
*goya
= hdev
->asic_specific
;
1191 struct hl_hw_queue
*cpu_pq
= &hdev
->kernel_queues
[GOYA_QUEUE_ID_CPU_PQ
];
1194 if (!hdev
->cpu_queues_enable
)
1197 if (goya
->hw_cap_initialized
& HW_CAP_CPU_Q
)
1200 eq
= &hdev
->event_queue
;
1202 WREG32(mmCPU_PQ_BASE_ADDR_LOW
, lower_32_bits(cpu_pq
->bus_address
));
1203 WREG32(mmCPU_PQ_BASE_ADDR_HIGH
, upper_32_bits(cpu_pq
->bus_address
));
1205 WREG32(mmCPU_EQ_BASE_ADDR_LOW
, lower_32_bits(eq
->bus_address
));
1206 WREG32(mmCPU_EQ_BASE_ADDR_HIGH
, upper_32_bits(eq
->bus_address
));
1208 WREG32(mmCPU_CQ_BASE_ADDR_LOW
,
1209 lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR
));
1210 WREG32(mmCPU_CQ_BASE_ADDR_HIGH
,
1211 upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR
));
1213 WREG32(mmCPU_PQ_LENGTH
, HL_QUEUE_SIZE_IN_BYTES
);
1214 WREG32(mmCPU_EQ_LENGTH
, HL_EQ_SIZE_IN_BYTES
);
1215 WREG32(mmCPU_CQ_LENGTH
, HL_CPU_ACCESSIBLE_MEM_SIZE
);
1217 /* Used for EQ CI */
1218 WREG32(mmCPU_EQ_CI
, 0);
1220 WREG32(mmCPU_IF_PF_PQ_PI
, 0);
1222 WREG32(mmCPU_PQ_INIT_STATUS
, PQ_INIT_STATUS_READY_FOR_CP
);
1224 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
,
1225 GOYA_ASYNC_EVENT_ID_PI_UPDATE
);
1227 err
= hl_poll_timeout(
1229 mmCPU_PQ_INIT_STATUS
,
1231 (status
== PQ_INIT_STATUS_READY_FOR_HOST
),
1233 GOYA_CPU_TIMEOUT_USEC
);
1237 "Failed to setup communication with device CPU\n");
1241 goya
->hw_cap_initialized
|= HW_CAP_CPU_Q
;
1245 static void goya_set_pll_refclk(struct hl_device
*hdev
)
1247 WREG32(mmCPU_PLL_DIV_SEL_0
, 0x0);
1248 WREG32(mmCPU_PLL_DIV_SEL_1
, 0x0);
1249 WREG32(mmCPU_PLL_DIV_SEL_2
, 0x0);
1250 WREG32(mmCPU_PLL_DIV_SEL_3
, 0x0);
1252 WREG32(mmIC_PLL_DIV_SEL_0
, 0x0);
1253 WREG32(mmIC_PLL_DIV_SEL_1
, 0x0);
1254 WREG32(mmIC_PLL_DIV_SEL_2
, 0x0);
1255 WREG32(mmIC_PLL_DIV_SEL_3
, 0x0);
1257 WREG32(mmMC_PLL_DIV_SEL_0
, 0x0);
1258 WREG32(mmMC_PLL_DIV_SEL_1
, 0x0);
1259 WREG32(mmMC_PLL_DIV_SEL_2
, 0x0);
1260 WREG32(mmMC_PLL_DIV_SEL_3
, 0x0);
1262 WREG32(mmPSOC_MME_PLL_DIV_SEL_0
, 0x0);
1263 WREG32(mmPSOC_MME_PLL_DIV_SEL_1
, 0x0);
1264 WREG32(mmPSOC_MME_PLL_DIV_SEL_2
, 0x0);
1265 WREG32(mmPSOC_MME_PLL_DIV_SEL_3
, 0x0);
1267 WREG32(mmPSOC_PCI_PLL_DIV_SEL_0
, 0x0);
1268 WREG32(mmPSOC_PCI_PLL_DIV_SEL_1
, 0x0);
1269 WREG32(mmPSOC_PCI_PLL_DIV_SEL_2
, 0x0);
1270 WREG32(mmPSOC_PCI_PLL_DIV_SEL_3
, 0x0);
1272 WREG32(mmPSOC_EMMC_PLL_DIV_SEL_0
, 0x0);
1273 WREG32(mmPSOC_EMMC_PLL_DIV_SEL_1
, 0x0);
1274 WREG32(mmPSOC_EMMC_PLL_DIV_SEL_2
, 0x0);
1275 WREG32(mmPSOC_EMMC_PLL_DIV_SEL_3
, 0x0);
1277 WREG32(mmTPC_PLL_DIV_SEL_0
, 0x0);
1278 WREG32(mmTPC_PLL_DIV_SEL_1
, 0x0);
1279 WREG32(mmTPC_PLL_DIV_SEL_2
, 0x0);
1280 WREG32(mmTPC_PLL_DIV_SEL_3
, 0x0);
1283 static void goya_disable_clk_rlx(struct hl_device
*hdev
)
1285 WREG32(mmPSOC_MME_PLL_CLK_RLX_0
, 0x100010);
1286 WREG32(mmIC_PLL_CLK_RLX_0
, 0x100010);
1289 static void _goya_tpc_mbist_workaround(struct hl_device
*hdev
, u8 tpc_id
)
1291 u64 tpc_eml_address
;
1292 u32 val
, tpc_offset
, tpc_eml_offset
, tpc_slm_offset
;
1295 tpc_offset
= tpc_id
* 0x40000;
1296 tpc_eml_offset
= tpc_id
* 0x200000;
1297 tpc_eml_address
= (mmTPC0_EML_CFG_BASE
+ tpc_eml_offset
- CFG_BASE
);
1298 tpc_slm_offset
= tpc_eml_address
+ 0x100000;
1301 * Workaround for Bug H2 #2443 :
1302 * "TPC SB is not initialized on chip reset"
1305 val
= RREG32(mmTPC0_CFG_FUNC_MBIST_CNTRL
+ tpc_offset
);
1306 if (val
& TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_ACTIVE_MASK
)
1307 dev_warn(hdev
->dev
, "TPC%d MBIST ACTIVE is not cleared\n",
1310 WREG32(mmTPC0_CFG_FUNC_MBIST_PAT
+ tpc_offset
, val
& 0xFFFFF000);
1312 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_0
+ tpc_offset
, 0x37FF);
1313 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_1
+ tpc_offset
, 0x303F);
1314 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_2
+ tpc_offset
, 0x71FF);
1315 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_3
+ tpc_offset
, 0x71FF);
1316 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_4
+ tpc_offset
, 0x70FF);
1317 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_5
+ tpc_offset
, 0x70FF);
1318 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_6
+ tpc_offset
, 0x70FF);
1319 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_7
+ tpc_offset
, 0x70FF);
1320 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_8
+ tpc_offset
, 0x70FF);
1321 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_9
+ tpc_offset
, 0x70FF);
1323 WREG32_OR(mmTPC0_CFG_FUNC_MBIST_CNTRL
+ tpc_offset
,
1324 1 << TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_START_SHIFT
);
1326 err
= hl_poll_timeout(
1328 mmTPC0_CFG_FUNC_MBIST_CNTRL
+ tpc_offset
,
1330 (val
& TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_DONE_MASK
),
1332 HL_DEVICE_TIMEOUT_USEC
);
1336 "Timeout while waiting for TPC%d MBIST DONE\n", tpc_id
);
1338 WREG32_OR(mmTPC0_EML_CFG_DBG_CNT
+ tpc_eml_offset
,
1339 1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT
);
1341 msleep(GOYA_RESET_WAIT_MSEC
);
1343 WREG32_AND(mmTPC0_EML_CFG_DBG_CNT
+ tpc_eml_offset
,
1344 ~(1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT
));
1346 msleep(GOYA_RESET_WAIT_MSEC
);
1348 for (slm_index
= 0 ; slm_index
< 256 ; slm_index
++)
1349 WREG32(tpc_slm_offset
+ (slm_index
<< 2), 0);
1351 val
= RREG32(tpc_slm_offset
);
1354 static void goya_tpc_mbist_workaround(struct hl_device
*hdev
)
1356 struct goya_device
*goya
= hdev
->asic_specific
;
1362 if (goya
->hw_cap_initialized
& HW_CAP_TPC_MBIST
)
1365 /* Workaround for H2 #2443 */
1367 for (i
= 0 ; i
< TPC_MAX_NUM
; i
++)
1368 _goya_tpc_mbist_workaround(hdev
, i
);
1370 goya
->hw_cap_initialized
|= HW_CAP_TPC_MBIST
;
1374 * goya_init_golden_registers - Initialize golden registers
1376 * @hdev: pointer to hl_device structure
1378 * Initialize the H/W registers of the device
1381 static void goya_init_golden_registers(struct hl_device
*hdev
)
1383 struct goya_device
*goya
= hdev
->asic_specific
;
1384 u32 polynom
[10], tpc_intr_mask
, offset
;
1387 if (goya
->hw_cap_initialized
& HW_CAP_GOLDEN
)
1390 polynom
[0] = 0x00020080;
1391 polynom
[1] = 0x00401000;
1392 polynom
[2] = 0x00200800;
1393 polynom
[3] = 0x00002000;
1394 polynom
[4] = 0x00080200;
1395 polynom
[5] = 0x00040100;
1396 polynom
[6] = 0x00100400;
1397 polynom
[7] = 0x00004000;
1398 polynom
[8] = 0x00010000;
1399 polynom
[9] = 0x00008000;
1401 /* Mask all arithmetic interrupts from TPC */
1402 tpc_intr_mask
= 0x7FFF;
1404 for (i
= 0, offset
= 0 ; i
< 6 ; i
++, offset
+= 0x20000) {
1405 WREG32(mmSRAM_Y0_X0_RTR_HBW_RD_RQ_L_ARB
+ offset
, 0x302);
1406 WREG32(mmSRAM_Y0_X1_RTR_HBW_RD_RQ_L_ARB
+ offset
, 0x302);
1407 WREG32(mmSRAM_Y0_X2_RTR_HBW_RD_RQ_L_ARB
+ offset
, 0x302);
1408 WREG32(mmSRAM_Y0_X3_RTR_HBW_RD_RQ_L_ARB
+ offset
, 0x302);
1409 WREG32(mmSRAM_Y0_X4_RTR_HBW_RD_RQ_L_ARB
+ offset
, 0x302);
1411 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_L_ARB
+ offset
, 0x204);
1412 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_L_ARB
+ offset
, 0x204);
1413 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_L_ARB
+ offset
, 0x204);
1414 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_L_ARB
+ offset
, 0x204);
1415 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_L_ARB
+ offset
, 0x204);
1418 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_E_ARB
+ offset
, 0x206);
1419 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_E_ARB
+ offset
, 0x206);
1420 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_E_ARB
+ offset
, 0x206);
1421 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_E_ARB
+ offset
, 0x207);
1422 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_E_ARB
+ offset
, 0x207);
1424 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_W_ARB
+ offset
, 0x207);
1425 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_W_ARB
+ offset
, 0x207);
1426 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_W_ARB
+ offset
, 0x206);
1427 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_W_ARB
+ offset
, 0x206);
1428 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_W_ARB
+ offset
, 0x206);
1430 WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_E_ARB
+ offset
, 0x101);
1431 WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_E_ARB
+ offset
, 0x102);
1432 WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_E_ARB
+ offset
, 0x103);
1433 WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_E_ARB
+ offset
, 0x104);
1434 WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_E_ARB
+ offset
, 0x105);
1436 WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_W_ARB
+ offset
, 0x105);
1437 WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_W_ARB
+ offset
, 0x104);
1438 WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_W_ARB
+ offset
, 0x103);
1439 WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_W_ARB
+ offset
, 0x102);
1440 WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_W_ARB
+ offset
, 0x101);
1443 WREG32(mmMME_STORE_MAX_CREDIT
, 0x21);
1444 WREG32(mmMME_AGU
, 0x0f0f0f10);
1445 WREG32(mmMME_SEI_MASK
, ~0x0);
1447 WREG32(mmMME6_RTR_HBW_RD_RQ_N_ARB
, 0x01010101);
1448 WREG32(mmMME5_RTR_HBW_RD_RQ_N_ARB
, 0x01040101);
1449 WREG32(mmMME4_RTR_HBW_RD_RQ_N_ARB
, 0x01030101);
1450 WREG32(mmMME3_RTR_HBW_RD_RQ_N_ARB
, 0x01020101);
1451 WREG32(mmMME2_RTR_HBW_RD_RQ_N_ARB
, 0x01010101);
1452 WREG32(mmMME1_RTR_HBW_RD_RQ_N_ARB
, 0x07010701);
1453 WREG32(mmMME6_RTR_HBW_RD_RQ_S_ARB
, 0x04010401);
1454 WREG32(mmMME5_RTR_HBW_RD_RQ_S_ARB
, 0x04050401);
1455 WREG32(mmMME4_RTR_HBW_RD_RQ_S_ARB
, 0x03070301);
1456 WREG32(mmMME3_RTR_HBW_RD_RQ_S_ARB
, 0x01030101);
1457 WREG32(mmMME2_RTR_HBW_RD_RQ_S_ARB
, 0x01040101);
1458 WREG32(mmMME1_RTR_HBW_RD_RQ_S_ARB
, 0x01050105);
1459 WREG32(mmMME6_RTR_HBW_RD_RQ_W_ARB
, 0x01010501);
1460 WREG32(mmMME5_RTR_HBW_RD_RQ_W_ARB
, 0x01010501);
1461 WREG32(mmMME4_RTR_HBW_RD_RQ_W_ARB
, 0x01040301);
1462 WREG32(mmMME3_RTR_HBW_RD_RQ_W_ARB
, 0x01030401);
1463 WREG32(mmMME2_RTR_HBW_RD_RQ_W_ARB
, 0x01040101);
1464 WREG32(mmMME1_RTR_HBW_RD_RQ_W_ARB
, 0x01050101);
1465 WREG32(mmMME6_RTR_HBW_WR_RQ_N_ARB
, 0x02020202);
1466 WREG32(mmMME5_RTR_HBW_WR_RQ_N_ARB
, 0x01070101);
1467 WREG32(mmMME4_RTR_HBW_WR_RQ_N_ARB
, 0x02020201);
1468 WREG32(mmMME3_RTR_HBW_WR_RQ_N_ARB
, 0x07020701);
1469 WREG32(mmMME2_RTR_HBW_WR_RQ_N_ARB
, 0x01020101);
1470 WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB
, 0x01010101);
1471 WREG32(mmMME6_RTR_HBW_WR_RQ_S_ARB
, 0x01070101);
1472 WREG32(mmMME5_RTR_HBW_WR_RQ_S_ARB
, 0x01070101);
1473 WREG32(mmMME4_RTR_HBW_WR_RQ_S_ARB
, 0x07020701);
1474 WREG32(mmMME3_RTR_HBW_WR_RQ_S_ARB
, 0x02020201);
1475 WREG32(mmMME2_RTR_HBW_WR_RQ_S_ARB
, 0x01070101);
1476 WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB
, 0x01020102);
1477 WREG32(mmMME6_RTR_HBW_WR_RQ_W_ARB
, 0x01020701);
1478 WREG32(mmMME5_RTR_HBW_WR_RQ_W_ARB
, 0x01020701);
1479 WREG32(mmMME4_RTR_HBW_WR_RQ_W_ARB
, 0x07020707);
1480 WREG32(mmMME3_RTR_HBW_WR_RQ_W_ARB
, 0x01020201);
1481 WREG32(mmMME2_RTR_HBW_WR_RQ_W_ARB
, 0x01070201);
1482 WREG32(mmMME1_RTR_HBW_WR_RQ_W_ARB
, 0x01070201);
1483 WREG32(mmMME6_RTR_HBW_RD_RS_N_ARB
, 0x01070102);
1484 WREG32(mmMME5_RTR_HBW_RD_RS_N_ARB
, 0x01070102);
1485 WREG32(mmMME4_RTR_HBW_RD_RS_N_ARB
, 0x01060102);
1486 WREG32(mmMME3_RTR_HBW_RD_RS_N_ARB
, 0x01040102);
1487 WREG32(mmMME2_RTR_HBW_RD_RS_N_ARB
, 0x01020102);
1488 WREG32(mmMME1_RTR_HBW_RD_RS_N_ARB
, 0x01020107);
1489 WREG32(mmMME6_RTR_HBW_RD_RS_S_ARB
, 0x01020106);
1490 WREG32(mmMME5_RTR_HBW_RD_RS_S_ARB
, 0x01020102);
1491 WREG32(mmMME4_RTR_HBW_RD_RS_S_ARB
, 0x01040102);
1492 WREG32(mmMME3_RTR_HBW_RD_RS_S_ARB
, 0x01060102);
1493 WREG32(mmMME2_RTR_HBW_RD_RS_S_ARB
, 0x01070102);
1494 WREG32(mmMME1_RTR_HBW_RD_RS_S_ARB
, 0x01070102);
1495 WREG32(mmMME6_RTR_HBW_RD_RS_E_ARB
, 0x01020702);
1496 WREG32(mmMME5_RTR_HBW_RD_RS_E_ARB
, 0x01020702);
1497 WREG32(mmMME4_RTR_HBW_RD_RS_E_ARB
, 0x01040602);
1498 WREG32(mmMME3_RTR_HBW_RD_RS_E_ARB
, 0x01060402);
1499 WREG32(mmMME2_RTR_HBW_RD_RS_E_ARB
, 0x01070202);
1500 WREG32(mmMME1_RTR_HBW_RD_RS_E_ARB
, 0x01070102);
1501 WREG32(mmMME6_RTR_HBW_RD_RS_W_ARB
, 0x01060401);
1502 WREG32(mmMME5_RTR_HBW_RD_RS_W_ARB
, 0x01060401);
1503 WREG32(mmMME4_RTR_HBW_RD_RS_W_ARB
, 0x01060401);
1504 WREG32(mmMME3_RTR_HBW_RD_RS_W_ARB
, 0x01060401);
1505 WREG32(mmMME2_RTR_HBW_RD_RS_W_ARB
, 0x01060401);
1506 WREG32(mmMME1_RTR_HBW_RD_RS_W_ARB
, 0x01060401);
1507 WREG32(mmMME6_RTR_HBW_WR_RS_N_ARB
, 0x01050101);
1508 WREG32(mmMME5_RTR_HBW_WR_RS_N_ARB
, 0x01040101);
1509 WREG32(mmMME4_RTR_HBW_WR_RS_N_ARB
, 0x01030101);
1510 WREG32(mmMME3_RTR_HBW_WR_RS_N_ARB
, 0x01020101);
1511 WREG32(mmMME2_RTR_HBW_WR_RS_N_ARB
, 0x01010101);
1512 WREG32(mmMME1_RTR_HBW_WR_RS_N_ARB
, 0x01010107);
1513 WREG32(mmMME6_RTR_HBW_WR_RS_S_ARB
, 0x01010107);
1514 WREG32(mmMME5_RTR_HBW_WR_RS_S_ARB
, 0x01010101);
1515 WREG32(mmMME4_RTR_HBW_WR_RS_S_ARB
, 0x01020101);
1516 WREG32(mmMME3_RTR_HBW_WR_RS_S_ARB
, 0x01030101);
1517 WREG32(mmMME2_RTR_HBW_WR_RS_S_ARB
, 0x01040101);
1518 WREG32(mmMME1_RTR_HBW_WR_RS_S_ARB
, 0x01050101);
1519 WREG32(mmMME6_RTR_HBW_WR_RS_E_ARB
, 0x01010501);
1520 WREG32(mmMME5_RTR_HBW_WR_RS_E_ARB
, 0x01010501);
1521 WREG32(mmMME4_RTR_HBW_WR_RS_E_ARB
, 0x01040301);
1522 WREG32(mmMME3_RTR_HBW_WR_RS_E_ARB
, 0x01030401);
1523 WREG32(mmMME2_RTR_HBW_WR_RS_E_ARB
, 0x01040101);
1524 WREG32(mmMME1_RTR_HBW_WR_RS_E_ARB
, 0x01050101);
1525 WREG32(mmMME6_RTR_HBW_WR_RS_W_ARB
, 0x01010101);
1526 WREG32(mmMME5_RTR_HBW_WR_RS_W_ARB
, 0x01010101);
1527 WREG32(mmMME4_RTR_HBW_WR_RS_W_ARB
, 0x01010101);
1528 WREG32(mmMME3_RTR_HBW_WR_RS_W_ARB
, 0x01010101);
1529 WREG32(mmMME2_RTR_HBW_WR_RS_W_ARB
, 0x01010101);
1530 WREG32(mmMME1_RTR_HBW_WR_RS_W_ARB
, 0x01010101);
1532 WREG32(mmTPC1_RTR_HBW_RD_RQ_N_ARB
, 0x01010101);
1533 WREG32(mmTPC1_RTR_HBW_RD_RQ_S_ARB
, 0x01010101);
1534 WREG32(mmTPC1_RTR_HBW_RD_RQ_E_ARB
, 0x01060101);
1535 WREG32(mmTPC1_RTR_HBW_WR_RQ_N_ARB
, 0x02020102);
1536 WREG32(mmTPC1_RTR_HBW_WR_RQ_S_ARB
, 0x01010101);
1537 WREG32(mmTPC1_RTR_HBW_WR_RQ_E_ARB
, 0x02070202);
1538 WREG32(mmTPC1_RTR_HBW_RD_RS_N_ARB
, 0x01020201);
1539 WREG32(mmTPC1_RTR_HBW_RD_RS_S_ARB
, 0x01070201);
1540 WREG32(mmTPC1_RTR_HBW_RD_RS_W_ARB
, 0x01070202);
1541 WREG32(mmTPC1_RTR_HBW_WR_RS_N_ARB
, 0x01010101);
1542 WREG32(mmTPC1_RTR_HBW_WR_RS_S_ARB
, 0x01050101);
1543 WREG32(mmTPC1_RTR_HBW_WR_RS_W_ARB
, 0x01050101);
1545 WREG32(mmTPC2_RTR_HBW_RD_RQ_N_ARB
, 0x01020101);
1546 WREG32(mmTPC2_RTR_HBW_RD_RQ_S_ARB
, 0x01050101);
1547 WREG32(mmTPC2_RTR_HBW_RD_RQ_E_ARB
, 0x01010201);
1548 WREG32(mmTPC2_RTR_HBW_WR_RQ_N_ARB
, 0x02040102);
1549 WREG32(mmTPC2_RTR_HBW_WR_RQ_S_ARB
, 0x01050101);
1550 WREG32(mmTPC2_RTR_HBW_WR_RQ_E_ARB
, 0x02060202);
1551 WREG32(mmTPC2_RTR_HBW_RD_RS_N_ARB
, 0x01020201);
1552 WREG32(mmTPC2_RTR_HBW_RD_RS_S_ARB
, 0x01070201);
1553 WREG32(mmTPC2_RTR_HBW_RD_RS_W_ARB
, 0x01070202);
1554 WREG32(mmTPC2_RTR_HBW_WR_RS_N_ARB
, 0x01010101);
1555 WREG32(mmTPC2_RTR_HBW_WR_RS_S_ARB
, 0x01040101);
1556 WREG32(mmTPC2_RTR_HBW_WR_RS_W_ARB
, 0x01040101);
1558 WREG32(mmTPC3_RTR_HBW_RD_RQ_N_ARB
, 0x01030101);
1559 WREG32(mmTPC3_RTR_HBW_RD_RQ_S_ARB
, 0x01040101);
1560 WREG32(mmTPC3_RTR_HBW_RD_RQ_E_ARB
, 0x01040301);
1561 WREG32(mmTPC3_RTR_HBW_WR_RQ_N_ARB
, 0x02060102);
1562 WREG32(mmTPC3_RTR_HBW_WR_RQ_S_ARB
, 0x01040101);
1563 WREG32(mmTPC3_RTR_HBW_WR_RQ_E_ARB
, 0x01040301);
1564 WREG32(mmTPC3_RTR_HBW_RD_RS_N_ARB
, 0x01040201);
1565 WREG32(mmTPC3_RTR_HBW_RD_RS_S_ARB
, 0x01060201);
1566 WREG32(mmTPC3_RTR_HBW_RD_RS_W_ARB
, 0x01060402);
1567 WREG32(mmTPC3_RTR_HBW_WR_RS_N_ARB
, 0x01020101);
1568 WREG32(mmTPC3_RTR_HBW_WR_RS_S_ARB
, 0x01030101);
1569 WREG32(mmTPC3_RTR_HBW_WR_RS_W_ARB
, 0x01030401);
1571 WREG32(mmTPC4_RTR_HBW_RD_RQ_N_ARB
, 0x01040101);
1572 WREG32(mmTPC4_RTR_HBW_RD_RQ_S_ARB
, 0x01030101);
1573 WREG32(mmTPC4_RTR_HBW_RD_RQ_E_ARB
, 0x01030401);
1574 WREG32(mmTPC4_RTR_HBW_WR_RQ_N_ARB
, 0x02070102);
1575 WREG32(mmTPC4_RTR_HBW_WR_RQ_S_ARB
, 0x01030101);
1576 WREG32(mmTPC4_RTR_HBW_WR_RQ_E_ARB
, 0x02060702);
1577 WREG32(mmTPC4_RTR_HBW_RD_RS_N_ARB
, 0x01060201);
1578 WREG32(mmTPC4_RTR_HBW_RD_RS_S_ARB
, 0x01040201);
1579 WREG32(mmTPC4_RTR_HBW_RD_RS_W_ARB
, 0x01040602);
1580 WREG32(mmTPC4_RTR_HBW_WR_RS_N_ARB
, 0x01030101);
1581 WREG32(mmTPC4_RTR_HBW_WR_RS_S_ARB
, 0x01020101);
1582 WREG32(mmTPC4_RTR_HBW_WR_RS_W_ARB
, 0x01040301);
1584 WREG32(mmTPC5_RTR_HBW_RD_RQ_N_ARB
, 0x01050101);
1585 WREG32(mmTPC5_RTR_HBW_RD_RQ_S_ARB
, 0x01020101);
1586 WREG32(mmTPC5_RTR_HBW_RD_RQ_E_ARB
, 0x01200501);
1587 WREG32(mmTPC5_RTR_HBW_WR_RQ_N_ARB
, 0x02070102);
1588 WREG32(mmTPC5_RTR_HBW_WR_RQ_S_ARB
, 0x01020101);
1589 WREG32(mmTPC5_RTR_HBW_WR_RQ_E_ARB
, 0x02020602);
1590 WREG32(mmTPC5_RTR_HBW_RD_RS_N_ARB
, 0x01070201);
1591 WREG32(mmTPC5_RTR_HBW_RD_RS_S_ARB
, 0x01020201);
1592 WREG32(mmTPC5_RTR_HBW_RD_RS_W_ARB
, 0x01020702);
1593 WREG32(mmTPC5_RTR_HBW_WR_RS_N_ARB
, 0x01040101);
1594 WREG32(mmTPC5_RTR_HBW_WR_RS_S_ARB
, 0x01010101);
1595 WREG32(mmTPC5_RTR_HBW_WR_RS_W_ARB
, 0x01010501);
1597 WREG32(mmTPC6_RTR_HBW_RD_RQ_N_ARB
, 0x01010101);
1598 WREG32(mmTPC6_RTR_HBW_RD_RQ_S_ARB
, 0x01010101);
1599 WREG32(mmTPC6_RTR_HBW_RD_RQ_E_ARB
, 0x01010601);
1600 WREG32(mmTPC6_RTR_HBW_WR_RQ_N_ARB
, 0x01010101);
1601 WREG32(mmTPC6_RTR_HBW_WR_RQ_S_ARB
, 0x01010101);
1602 WREG32(mmTPC6_RTR_HBW_WR_RQ_E_ARB
, 0x02020702);
1603 WREG32(mmTPC6_RTR_HBW_RD_RS_N_ARB
, 0x01010101);
1604 WREG32(mmTPC6_RTR_HBW_RD_RS_S_ARB
, 0x01010101);
1605 WREG32(mmTPC6_RTR_HBW_RD_RS_W_ARB
, 0x01020702);
1606 WREG32(mmTPC6_RTR_HBW_WR_RS_N_ARB
, 0x01050101);
1607 WREG32(mmTPC6_RTR_HBW_WR_RS_S_ARB
, 0x01010101);
1608 WREG32(mmTPC6_RTR_HBW_WR_RS_W_ARB
, 0x01010501);
1610 for (i
= 0, offset
= 0 ; i
< 10 ; i
++, offset
+= 4) {
1611 WREG32(mmMME1_RTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1612 WREG32(mmMME2_RTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1613 WREG32(mmMME3_RTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1614 WREG32(mmMME4_RTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1615 WREG32(mmMME5_RTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1616 WREG32(mmMME6_RTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1618 WREG32(mmTPC0_NRTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1619 WREG32(mmTPC1_RTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1620 WREG32(mmTPC2_RTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1621 WREG32(mmTPC3_RTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1622 WREG32(mmTPC4_RTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1623 WREG32(mmTPC5_RTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1624 WREG32(mmTPC6_RTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1625 WREG32(mmTPC7_NRTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1627 WREG32(mmPCI_NRTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1628 WREG32(mmDMA_NRTR_SPLIT_COEF_0
+ offset
, polynom
[i
] >> 7);
1631 for (i
= 0, offset
= 0 ; i
< 6 ; i
++, offset
+= 0x40000) {
1632 WREG32(mmMME1_RTR_SCRAMB_EN
+ offset
,
1633 1 << MME1_RTR_SCRAMB_EN_VAL_SHIFT
);
1634 WREG32(mmMME1_RTR_NON_LIN_SCRAMB
+ offset
,
1635 1 << MME1_RTR_NON_LIN_SCRAMB_EN_SHIFT
);
1638 for (i
= 0, offset
= 0 ; i
< 8 ; i
++, offset
+= 0x40000) {
1640 * Workaround for Bug H2 #2441 :
1641 * "ST.NOP set trace event illegal opcode"
1643 WREG32(mmTPC0_CFG_TPC_INTR_MASK
+ offset
, tpc_intr_mask
);
1645 WREG32(mmTPC0_NRTR_SCRAMB_EN
+ offset
,
1646 1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT
);
1647 WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB
+ offset
,
1648 1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT
);
1650 WREG32_FIELD(TPC0_CFG_MSS_CONFIG
, offset
,
1651 ICACHE_FETCH_LINE_NUM
, 2);
1654 WREG32(mmDMA_NRTR_SCRAMB_EN
, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT
);
1655 WREG32(mmDMA_NRTR_NON_LIN_SCRAMB
,
1656 1 << DMA_NRTR_NON_LIN_SCRAMB_EN_SHIFT
);
1658 WREG32(mmPCI_NRTR_SCRAMB_EN
, 1 << PCI_NRTR_SCRAMB_EN_VAL_SHIFT
);
1659 WREG32(mmPCI_NRTR_NON_LIN_SCRAMB
,
1660 1 << PCI_NRTR_NON_LIN_SCRAMB_EN_SHIFT
);
1663 * Workaround for H2 #HW-23 bug
1664 * Set DMA max outstanding read requests to 240 on DMA CH 1.
1665 * This limitation is still large enough to not affect Gen4 bandwidth.
1666 * We need to only limit that DMA channel because the user can only read
1667 * from Host using DMA CH 1
1669 WREG32(mmDMA_CH_1_CFG0
, 0x0fff00F0);
1671 WREG32(mmTPC_PLL_CLK_RLX_0
, 0x200020);
1673 goya
->hw_cap_initialized
|= HW_CAP_GOLDEN
;
1676 static void goya_init_mme_qman(struct hl_device
*hdev
)
1678 u32 mtr_base_lo
, mtr_base_hi
;
1679 u32 so_base_lo
, so_base_hi
;
1680 u32 gic_base_lo
, gic_base_hi
;
1683 mtr_base_lo
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_MON_PAY_ADDRL_0
);
1684 mtr_base_hi
= upper_32_bits(CFG_BASE
+ mmSYNC_MNGR_MON_PAY_ADDRL_0
);
1685 so_base_lo
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
1686 so_base_hi
= upper_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
1689 lower_32_bits(CFG_BASE
+ mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
);
1691 upper_32_bits(CFG_BASE
+ mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
);
1693 qman_base_addr
= hdev
->asic_prop
.sram_base_address
+
1694 MME_QMAN_BASE_OFFSET
;
1696 WREG32(mmMME_QM_PQ_BASE_LO
, lower_32_bits(qman_base_addr
));
1697 WREG32(mmMME_QM_PQ_BASE_HI
, upper_32_bits(qman_base_addr
));
1698 WREG32(mmMME_QM_PQ_SIZE
, ilog2(MME_QMAN_LENGTH
));
1699 WREG32(mmMME_QM_PQ_PI
, 0);
1700 WREG32(mmMME_QM_PQ_CI
, 0);
1701 WREG32(mmMME_QM_CP_LDMA_SRC_BASE_LO_OFFSET
, 0x10C0);
1702 WREG32(mmMME_QM_CP_LDMA_SRC_BASE_HI_OFFSET
, 0x10C4);
1703 WREG32(mmMME_QM_CP_LDMA_TSIZE_OFFSET
, 0x10C8);
1704 WREG32(mmMME_QM_CP_LDMA_COMMIT_OFFSET
, 0x10CC);
1706 WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_LO
, mtr_base_lo
);
1707 WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_HI
, mtr_base_hi
);
1708 WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_LO
, so_base_lo
);
1709 WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_HI
, so_base_hi
);
1711 /* QMAN CQ has 8 cache lines */
1712 WREG32(mmMME_QM_CQ_CFG1
, 0x00080008);
1714 WREG32(mmMME_QM_GLBL_ERR_ADDR_LO
, gic_base_lo
);
1715 WREG32(mmMME_QM_GLBL_ERR_ADDR_HI
, gic_base_hi
);
1717 WREG32(mmMME_QM_GLBL_ERR_WDATA
, GOYA_ASYNC_EVENT_ID_MME_QM
);
1719 WREG32(mmMME_QM_GLBL_ERR_CFG
, QMAN_MME_ERR_MSG_EN
);
1721 WREG32(mmMME_QM_GLBL_PROT
, QMAN_MME_ERR_PROT
);
1723 WREG32(mmMME_QM_GLBL_CFG0
, QMAN_MME_ENABLE
);
1726 static void goya_init_mme_cmdq(struct hl_device
*hdev
)
1728 u32 mtr_base_lo
, mtr_base_hi
;
1729 u32 so_base_lo
, so_base_hi
;
1730 u32 gic_base_lo
, gic_base_hi
;
1732 mtr_base_lo
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_MON_PAY_ADDRL_0
);
1733 mtr_base_hi
= upper_32_bits(CFG_BASE
+ mmSYNC_MNGR_MON_PAY_ADDRL_0
);
1734 so_base_lo
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
1735 so_base_hi
= upper_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
1738 lower_32_bits(CFG_BASE
+ mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
);
1740 upper_32_bits(CFG_BASE
+ mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
);
1742 WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO
, mtr_base_lo
);
1743 WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI
, mtr_base_hi
);
1744 WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO
, so_base_lo
);
1745 WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_HI
, so_base_hi
);
1747 /* CMDQ CQ has 20 cache lines */
1748 WREG32(mmMME_CMDQ_CQ_CFG1
, 0x00140014);
1750 WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_LO
, gic_base_lo
);
1751 WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_HI
, gic_base_hi
);
1753 WREG32(mmMME_CMDQ_GLBL_ERR_WDATA
, GOYA_ASYNC_EVENT_ID_MME_CMDQ
);
1755 WREG32(mmMME_CMDQ_GLBL_ERR_CFG
, CMDQ_MME_ERR_MSG_EN
);
1757 WREG32(mmMME_CMDQ_GLBL_PROT
, CMDQ_MME_ERR_PROT
);
1759 WREG32(mmMME_CMDQ_GLBL_CFG0
, CMDQ_MME_ENABLE
);
1762 void goya_init_mme_qmans(struct hl_device
*hdev
)
1764 struct goya_device
*goya
= hdev
->asic_specific
;
1765 u32 so_base_lo
, so_base_hi
;
1767 if (goya
->hw_cap_initialized
& HW_CAP_MME
)
1770 so_base_lo
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
1771 so_base_hi
= upper_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
1773 WREG32(mmMME_SM_BASE_ADDRESS_LOW
, so_base_lo
);
1774 WREG32(mmMME_SM_BASE_ADDRESS_HIGH
, so_base_hi
);
1776 goya_init_mme_qman(hdev
);
1777 goya_init_mme_cmdq(hdev
);
1779 goya
->hw_cap_initialized
|= HW_CAP_MME
;
1782 static void goya_init_tpc_qman(struct hl_device
*hdev
, u32 base_off
, int tpc_id
)
1784 u32 mtr_base_lo
, mtr_base_hi
;
1785 u32 so_base_lo
, so_base_hi
;
1786 u32 gic_base_lo
, gic_base_hi
;
1788 u32 reg_off
= tpc_id
* (mmTPC1_QM_PQ_PI
- mmTPC0_QM_PQ_PI
);
1790 mtr_base_lo
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_MON_PAY_ADDRL_0
);
1791 mtr_base_hi
= upper_32_bits(CFG_BASE
+ mmSYNC_MNGR_MON_PAY_ADDRL_0
);
1792 so_base_lo
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
1793 so_base_hi
= upper_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
1796 lower_32_bits(CFG_BASE
+ mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
);
1798 upper_32_bits(CFG_BASE
+ mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
);
1800 qman_base_addr
= hdev
->asic_prop
.sram_base_address
+ base_off
;
1802 WREG32(mmTPC0_QM_PQ_BASE_LO
+ reg_off
, lower_32_bits(qman_base_addr
));
1803 WREG32(mmTPC0_QM_PQ_BASE_HI
+ reg_off
, upper_32_bits(qman_base_addr
));
1804 WREG32(mmTPC0_QM_PQ_SIZE
+ reg_off
, ilog2(TPC_QMAN_LENGTH
));
1805 WREG32(mmTPC0_QM_PQ_PI
+ reg_off
, 0);
1806 WREG32(mmTPC0_QM_PQ_CI
+ reg_off
, 0);
1807 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET
+ reg_off
, 0x10C0);
1808 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_HI_OFFSET
+ reg_off
, 0x10C4);
1809 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET
+ reg_off
, 0x10C8);
1810 WREG32(mmTPC0_QM_CP_LDMA_COMMIT_OFFSET
+ reg_off
, 0x10CC);
1812 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO
+ reg_off
, mtr_base_lo
);
1813 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI
+ reg_off
, mtr_base_hi
);
1814 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO
+ reg_off
, so_base_lo
);
1815 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI
+ reg_off
, so_base_hi
);
1817 WREG32(mmTPC0_QM_CQ_CFG1
+ reg_off
, 0x00080008);
1819 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO
+ reg_off
, gic_base_lo
);
1820 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI
+ reg_off
, gic_base_hi
);
1822 WREG32(mmTPC0_QM_GLBL_ERR_WDATA
+ reg_off
,
1823 GOYA_ASYNC_EVENT_ID_TPC0_QM
+ tpc_id
);
1825 WREG32(mmTPC0_QM_GLBL_ERR_CFG
+ reg_off
, QMAN_TPC_ERR_MSG_EN
);
1827 WREG32(mmTPC0_QM_GLBL_PROT
+ reg_off
, QMAN_TPC_ERR_PROT
);
1829 WREG32(mmTPC0_QM_GLBL_CFG0
+ reg_off
, QMAN_TPC_ENABLE
);
1832 static void goya_init_tpc_cmdq(struct hl_device
*hdev
, int tpc_id
)
1834 u32 mtr_base_lo
, mtr_base_hi
;
1835 u32 so_base_lo
, so_base_hi
;
1836 u32 gic_base_lo
, gic_base_hi
;
1837 u32 reg_off
= tpc_id
* (mmTPC1_CMDQ_CQ_CFG1
- mmTPC0_CMDQ_CQ_CFG1
);
1839 mtr_base_lo
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_MON_PAY_ADDRL_0
);
1840 mtr_base_hi
= upper_32_bits(CFG_BASE
+ mmSYNC_MNGR_MON_PAY_ADDRL_0
);
1841 so_base_lo
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
1842 so_base_hi
= upper_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
1845 lower_32_bits(CFG_BASE
+ mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
);
1847 upper_32_bits(CFG_BASE
+ mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
);
1849 WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_LO
+ reg_off
, mtr_base_lo
);
1850 WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_HI
+ reg_off
, mtr_base_hi
);
1851 WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_LO
+ reg_off
, so_base_lo
);
1852 WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_HI
+ reg_off
, so_base_hi
);
1854 WREG32(mmTPC0_CMDQ_CQ_CFG1
+ reg_off
, 0x00140014);
1856 WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_LO
+ reg_off
, gic_base_lo
);
1857 WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_HI
+ reg_off
, gic_base_hi
);
1859 WREG32(mmTPC0_CMDQ_GLBL_ERR_WDATA
+ reg_off
,
1860 GOYA_ASYNC_EVENT_ID_TPC0_CMDQ
+ tpc_id
);
1862 WREG32(mmTPC0_CMDQ_GLBL_ERR_CFG
+ reg_off
, CMDQ_TPC_ERR_MSG_EN
);
1864 WREG32(mmTPC0_CMDQ_GLBL_PROT
+ reg_off
, CMDQ_TPC_ERR_PROT
);
1866 WREG32(mmTPC0_CMDQ_GLBL_CFG0
+ reg_off
, CMDQ_TPC_ENABLE
);
1869 void goya_init_tpc_qmans(struct hl_device
*hdev
)
1871 struct goya_device
*goya
= hdev
->asic_specific
;
1872 u32 so_base_lo
, so_base_hi
;
1873 u32 cfg_off
= mmTPC1_CFG_SM_BASE_ADDRESS_LOW
-
1874 mmTPC0_CFG_SM_BASE_ADDRESS_LOW
;
1877 if (goya
->hw_cap_initialized
& HW_CAP_TPC
)
1880 so_base_lo
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
1881 so_base_hi
= upper_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
1883 for (i
= 0 ; i
< TPC_MAX_NUM
; i
++) {
1884 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_LOW
+ i
* cfg_off
,
1886 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH
+ i
* cfg_off
,
1890 goya_init_tpc_qman(hdev
, TPC0_QMAN_BASE_OFFSET
, 0);
1891 goya_init_tpc_qman(hdev
, TPC1_QMAN_BASE_OFFSET
, 1);
1892 goya_init_tpc_qman(hdev
, TPC2_QMAN_BASE_OFFSET
, 2);
1893 goya_init_tpc_qman(hdev
, TPC3_QMAN_BASE_OFFSET
, 3);
1894 goya_init_tpc_qman(hdev
, TPC4_QMAN_BASE_OFFSET
, 4);
1895 goya_init_tpc_qman(hdev
, TPC5_QMAN_BASE_OFFSET
, 5);
1896 goya_init_tpc_qman(hdev
, TPC6_QMAN_BASE_OFFSET
, 6);
1897 goya_init_tpc_qman(hdev
, TPC7_QMAN_BASE_OFFSET
, 7);
1899 for (i
= 0 ; i
< TPC_MAX_NUM
; i
++)
1900 goya_init_tpc_cmdq(hdev
, i
);
1902 goya
->hw_cap_initialized
|= HW_CAP_TPC
;
1906 * goya_disable_internal_queues - Disable internal queues
1908 * @hdev: pointer to hl_device structure
1911 static void goya_disable_internal_queues(struct hl_device
*hdev
)
1913 struct goya_device
*goya
= hdev
->asic_specific
;
1915 if (!(goya
->hw_cap_initialized
& HW_CAP_MME
))
1918 WREG32(mmMME_QM_GLBL_CFG0
, 0);
1919 WREG32(mmMME_CMDQ_GLBL_CFG0
, 0);
1922 if (!(goya
->hw_cap_initialized
& HW_CAP_TPC
))
1925 WREG32(mmTPC0_QM_GLBL_CFG0
, 0);
1926 WREG32(mmTPC0_CMDQ_GLBL_CFG0
, 0);
1928 WREG32(mmTPC1_QM_GLBL_CFG0
, 0);
1929 WREG32(mmTPC1_CMDQ_GLBL_CFG0
, 0);
1931 WREG32(mmTPC2_QM_GLBL_CFG0
, 0);
1932 WREG32(mmTPC2_CMDQ_GLBL_CFG0
, 0);
1934 WREG32(mmTPC3_QM_GLBL_CFG0
, 0);
1935 WREG32(mmTPC3_CMDQ_GLBL_CFG0
, 0);
1937 WREG32(mmTPC4_QM_GLBL_CFG0
, 0);
1938 WREG32(mmTPC4_CMDQ_GLBL_CFG0
, 0);
1940 WREG32(mmTPC5_QM_GLBL_CFG0
, 0);
1941 WREG32(mmTPC5_CMDQ_GLBL_CFG0
, 0);
1943 WREG32(mmTPC6_QM_GLBL_CFG0
, 0);
1944 WREG32(mmTPC6_CMDQ_GLBL_CFG0
, 0);
1946 WREG32(mmTPC7_QM_GLBL_CFG0
, 0);
1947 WREG32(mmTPC7_CMDQ_GLBL_CFG0
, 0);
1951 * goya_stop_internal_queues - Stop internal queues
1953 * @hdev: pointer to hl_device structure
1955 * Returns 0 on success
1958 static int goya_stop_internal_queues(struct hl_device
*hdev
)
1960 struct goya_device
*goya
= hdev
->asic_specific
;
1963 if (!(goya
->hw_cap_initialized
& HW_CAP_MME
))
1967 * Each queue (QMAN) is a separate H/W logic. That means that each
1968 * QMAN can be stopped independently and failure to stop one does NOT
1969 * mandate we should not try to stop other QMANs
1972 rc
= goya_stop_queue(hdev
,
1975 mmMME_QM_GLBL_STS0
);
1978 dev_err(hdev
->dev
, "failed to stop MME QMAN\n");
1982 rc
= goya_stop_queue(hdev
,
1983 mmMME_CMDQ_GLBL_CFG1
,
1985 mmMME_CMDQ_GLBL_STS0
);
1988 dev_err(hdev
->dev
, "failed to stop MME CMDQ\n");
1993 if (!(goya
->hw_cap_initialized
& HW_CAP_TPC
))
1996 rc
= goya_stop_queue(hdev
,
1997 mmTPC0_QM_GLBL_CFG1
,
1999 mmTPC0_QM_GLBL_STS0
);
2002 dev_err(hdev
->dev
, "failed to stop TPC 0 QMAN\n");
2006 rc
= goya_stop_queue(hdev
,
2007 mmTPC0_CMDQ_GLBL_CFG1
,
2009 mmTPC0_CMDQ_GLBL_STS0
);
2012 dev_err(hdev
->dev
, "failed to stop TPC 0 CMDQ\n");
2016 rc
= goya_stop_queue(hdev
,
2017 mmTPC1_QM_GLBL_CFG1
,
2019 mmTPC1_QM_GLBL_STS0
);
2022 dev_err(hdev
->dev
, "failed to stop TPC 1 QMAN\n");
2026 rc
= goya_stop_queue(hdev
,
2027 mmTPC1_CMDQ_GLBL_CFG1
,
2029 mmTPC1_CMDQ_GLBL_STS0
);
2032 dev_err(hdev
->dev
, "failed to stop TPC 1 CMDQ\n");
2036 rc
= goya_stop_queue(hdev
,
2037 mmTPC2_QM_GLBL_CFG1
,
2039 mmTPC2_QM_GLBL_STS0
);
2042 dev_err(hdev
->dev
, "failed to stop TPC 2 QMAN\n");
2046 rc
= goya_stop_queue(hdev
,
2047 mmTPC2_CMDQ_GLBL_CFG1
,
2049 mmTPC2_CMDQ_GLBL_STS0
);
2052 dev_err(hdev
->dev
, "failed to stop TPC 2 CMDQ\n");
2056 rc
= goya_stop_queue(hdev
,
2057 mmTPC3_QM_GLBL_CFG1
,
2059 mmTPC3_QM_GLBL_STS0
);
2062 dev_err(hdev
->dev
, "failed to stop TPC 3 QMAN\n");
2066 rc
= goya_stop_queue(hdev
,
2067 mmTPC3_CMDQ_GLBL_CFG1
,
2069 mmTPC3_CMDQ_GLBL_STS0
);
2072 dev_err(hdev
->dev
, "failed to stop TPC 3 CMDQ\n");
2076 rc
= goya_stop_queue(hdev
,
2077 mmTPC4_QM_GLBL_CFG1
,
2079 mmTPC4_QM_GLBL_STS0
);
2082 dev_err(hdev
->dev
, "failed to stop TPC 4 QMAN\n");
2086 rc
= goya_stop_queue(hdev
,
2087 mmTPC4_CMDQ_GLBL_CFG1
,
2089 mmTPC4_CMDQ_GLBL_STS0
);
2092 dev_err(hdev
->dev
, "failed to stop TPC 4 CMDQ\n");
2096 rc
= goya_stop_queue(hdev
,
2097 mmTPC5_QM_GLBL_CFG1
,
2099 mmTPC5_QM_GLBL_STS0
);
2102 dev_err(hdev
->dev
, "failed to stop TPC 5 QMAN\n");
2106 rc
= goya_stop_queue(hdev
,
2107 mmTPC5_CMDQ_GLBL_CFG1
,
2109 mmTPC5_CMDQ_GLBL_STS0
);
2112 dev_err(hdev
->dev
, "failed to stop TPC 5 CMDQ\n");
2116 rc
= goya_stop_queue(hdev
,
2117 mmTPC6_QM_GLBL_CFG1
,
2119 mmTPC6_QM_GLBL_STS0
);
2122 dev_err(hdev
->dev
, "failed to stop TPC 6 QMAN\n");
2126 rc
= goya_stop_queue(hdev
,
2127 mmTPC6_CMDQ_GLBL_CFG1
,
2129 mmTPC6_CMDQ_GLBL_STS0
);
2132 dev_err(hdev
->dev
, "failed to stop TPC 6 CMDQ\n");
2136 rc
= goya_stop_queue(hdev
,
2137 mmTPC7_QM_GLBL_CFG1
,
2139 mmTPC7_QM_GLBL_STS0
);
2142 dev_err(hdev
->dev
, "failed to stop TPC 7 QMAN\n");
2146 rc
= goya_stop_queue(hdev
,
2147 mmTPC7_CMDQ_GLBL_CFG1
,
2149 mmTPC7_CMDQ_GLBL_STS0
);
2152 dev_err(hdev
->dev
, "failed to stop TPC 7 CMDQ\n");
2159 static void goya_dma_stall(struct hl_device
*hdev
)
2161 struct goya_device
*goya
= hdev
->asic_specific
;
2163 if (!(goya
->hw_cap_initialized
& HW_CAP_DMA
))
2166 WREG32(mmDMA_QM_0_GLBL_CFG1
, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT
);
2167 WREG32(mmDMA_QM_1_GLBL_CFG1
, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT
);
2168 WREG32(mmDMA_QM_2_GLBL_CFG1
, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT
);
2169 WREG32(mmDMA_QM_3_GLBL_CFG1
, 1 << DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT
);
2170 WREG32(mmDMA_QM_4_GLBL_CFG1
, 1 << DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT
);
2173 static void goya_tpc_stall(struct hl_device
*hdev
)
2175 struct goya_device
*goya
= hdev
->asic_specific
;
2177 if (!(goya
->hw_cap_initialized
& HW_CAP_TPC
))
2180 WREG32(mmTPC0_CFG_TPC_STALL
, 1 << TPC0_CFG_TPC_STALL_V_SHIFT
);
2181 WREG32(mmTPC1_CFG_TPC_STALL
, 1 << TPC1_CFG_TPC_STALL_V_SHIFT
);
2182 WREG32(mmTPC2_CFG_TPC_STALL
, 1 << TPC2_CFG_TPC_STALL_V_SHIFT
);
2183 WREG32(mmTPC3_CFG_TPC_STALL
, 1 << TPC3_CFG_TPC_STALL_V_SHIFT
);
2184 WREG32(mmTPC4_CFG_TPC_STALL
, 1 << TPC4_CFG_TPC_STALL_V_SHIFT
);
2185 WREG32(mmTPC5_CFG_TPC_STALL
, 1 << TPC5_CFG_TPC_STALL_V_SHIFT
);
2186 WREG32(mmTPC6_CFG_TPC_STALL
, 1 << TPC6_CFG_TPC_STALL_V_SHIFT
);
2187 WREG32(mmTPC7_CFG_TPC_STALL
, 1 << TPC7_CFG_TPC_STALL_V_SHIFT
);
2190 static void goya_mme_stall(struct hl_device
*hdev
)
2192 struct goya_device
*goya
= hdev
->asic_specific
;
2194 if (!(goya
->hw_cap_initialized
& HW_CAP_MME
))
2197 WREG32(mmMME_STALL
, 0xFFFFFFFF);
2200 static int goya_enable_msix(struct hl_device
*hdev
)
2202 struct goya_device
*goya
= hdev
->asic_specific
;
2203 int cq_cnt
= hdev
->asic_prop
.completion_queues_count
;
2204 int rc
, i
, irq_cnt_init
, irq
;
2206 if (goya
->hw_cap_initialized
& HW_CAP_MSIX
)
2209 rc
= pci_alloc_irq_vectors(hdev
->pdev
, GOYA_MSIX_ENTRIES
,
2210 GOYA_MSIX_ENTRIES
, PCI_IRQ_MSIX
);
2213 "MSI-X: Failed to enable support -- %d/%d\n",
2214 GOYA_MSIX_ENTRIES
, rc
);
2218 for (i
= 0, irq_cnt_init
= 0 ; i
< cq_cnt
; i
++, irq_cnt_init
++) {
2219 irq
= pci_irq_vector(hdev
->pdev
, i
);
2220 rc
= request_irq(irq
, hl_irq_handler_cq
, 0, goya_irq_name
[i
],
2221 &hdev
->completion_queue
[i
]);
2223 dev_err(hdev
->dev
, "Failed to request IRQ %d", irq
);
2228 irq
= pci_irq_vector(hdev
->pdev
, GOYA_EVENT_QUEUE_MSIX_IDX
);
2230 rc
= request_irq(irq
, hl_irq_handler_eq
, 0,
2231 goya_irq_name
[GOYA_EVENT_QUEUE_MSIX_IDX
],
2232 &hdev
->event_queue
);
2234 dev_err(hdev
->dev
, "Failed to request IRQ %d", irq
);
2238 goya
->hw_cap_initialized
|= HW_CAP_MSIX
;
2242 for (i
= 0 ; i
< irq_cnt_init
; i
++)
2243 free_irq(pci_irq_vector(hdev
->pdev
, i
),
2244 &hdev
->completion_queue
[i
]);
2246 pci_free_irq_vectors(hdev
->pdev
);
2250 static void goya_sync_irqs(struct hl_device
*hdev
)
2252 struct goya_device
*goya
= hdev
->asic_specific
;
2255 if (!(goya
->hw_cap_initialized
& HW_CAP_MSIX
))
2258 /* Wait for all pending IRQs to be finished */
2259 for (i
= 0 ; i
< hdev
->asic_prop
.completion_queues_count
; i
++)
2260 synchronize_irq(pci_irq_vector(hdev
->pdev
, i
));
2262 synchronize_irq(pci_irq_vector(hdev
->pdev
, GOYA_EVENT_QUEUE_MSIX_IDX
));
2265 static void goya_disable_msix(struct hl_device
*hdev
)
2267 struct goya_device
*goya
= hdev
->asic_specific
;
2270 if (!(goya
->hw_cap_initialized
& HW_CAP_MSIX
))
2273 goya_sync_irqs(hdev
);
2275 irq
= pci_irq_vector(hdev
->pdev
, GOYA_EVENT_QUEUE_MSIX_IDX
);
2276 free_irq(irq
, &hdev
->event_queue
);
2278 for (i
= 0 ; i
< hdev
->asic_prop
.completion_queues_count
; i
++) {
2279 irq
= pci_irq_vector(hdev
->pdev
, i
);
2280 free_irq(irq
, &hdev
->completion_queue
[i
]);
2283 pci_free_irq_vectors(hdev
->pdev
);
2285 goya
->hw_cap_initialized
&= ~HW_CAP_MSIX
;
2288 static void goya_enable_timestamp(struct hl_device
*hdev
)
2290 /* Disable the timestamp counter */
2291 WREG32(mmPSOC_TIMESTAMP_BASE
- CFG_BASE
, 0);
2293 /* Zero the lower/upper parts of the 64-bit counter */
2294 WREG32(mmPSOC_TIMESTAMP_BASE
- CFG_BASE
+ 0xC, 0);
2295 WREG32(mmPSOC_TIMESTAMP_BASE
- CFG_BASE
+ 0x8, 0);
2297 /* Enable the counter */
2298 WREG32(mmPSOC_TIMESTAMP_BASE
- CFG_BASE
, 1);
2301 static void goya_disable_timestamp(struct hl_device
*hdev
)
2303 /* Disable the timestamp counter */
2304 WREG32(mmPSOC_TIMESTAMP_BASE
- CFG_BASE
, 0);
2307 static void goya_halt_engines(struct hl_device
*hdev
, bool hard_reset
)
2309 u32 wait_timeout_ms
;
2312 "Halting compute engines and disabling interrupts\n");
2315 wait_timeout_ms
= GOYA_PLDM_RESET_WAIT_MSEC
;
2317 wait_timeout_ms
= GOYA_RESET_WAIT_MSEC
;
2319 goya_stop_external_queues(hdev
);
2320 goya_stop_internal_queues(hdev
);
2322 msleep(wait_timeout_ms
);
2324 goya_dma_stall(hdev
);
2325 goya_tpc_stall(hdev
);
2326 goya_mme_stall(hdev
);
2328 msleep(wait_timeout_ms
);
2330 goya_disable_external_queues(hdev
);
2331 goya_disable_internal_queues(hdev
);
2333 goya_disable_timestamp(hdev
);
2336 goya_disable_msix(hdev
);
2337 goya_mmu_remove_device_cpu_mappings(hdev
);
2339 goya_sync_irqs(hdev
);
2344 * goya_load_firmware_to_device() - Load LINUX FW code to device.
2345 * @hdev: Pointer to hl_device structure.
2347 * Copy LINUX fw code from firmware file to HBM BAR.
2349 * Return: 0 on success, non-zero for failure.
2351 static int goya_load_firmware_to_device(struct hl_device
*hdev
)
2355 dst
= hdev
->pcie_bar
[DDR_BAR_ID
] + LINUX_FW_OFFSET
;
2357 return hl_fw_load_fw_to_device(hdev
, GOYA_LINUX_FW_FILE
, dst
, 0, 0);
2361 * goya_load_boot_fit_to_device() - Load boot fit to device.
2362 * @hdev: Pointer to hl_device structure.
2364 * Copy boot fit file to SRAM BAR.
2366 * Return: 0 on success, non-zero for failure.
2368 static int goya_load_boot_fit_to_device(struct hl_device
*hdev
)
2372 dst
= hdev
->pcie_bar
[SRAM_CFG_BAR_ID
] + BOOT_FIT_SRAM_OFFSET
;
2374 return hl_fw_load_fw_to_device(hdev
, GOYA_BOOT_FIT_FILE
, dst
, 0, 0);
2378 * FW component passes an offset from SRAM_BASE_ADDR in SCRATCHPAD_xx.
2379 * The version string should be located by that offset.
2381 static int goya_read_device_fw_version(struct hl_device
*hdev
,
2382 enum hl_fw_component fwc
)
2390 ver_off
= RREG32(mmUBOOT_VER_OFFSET
);
2391 dest
= hdev
->asic_prop
.uboot_ver
;
2394 case FW_COMP_PREBOOT
:
2395 ver_off
= RREG32(mmPREBOOT_VER_OFFSET
);
2396 dest
= hdev
->asic_prop
.preboot_ver
;
2400 dev_warn(hdev
->dev
, "Undefined FW component: %d\n", fwc
);
2404 ver_off
&= ~((u32
)SRAM_BASE_ADDR
);
2406 if (ver_off
< SRAM_SIZE
- VERSION_MAX_LEN
) {
2407 memcpy_fromio(dest
, hdev
->pcie_bar
[SRAM_CFG_BAR_ID
] + ver_off
,
2410 dev_err(hdev
->dev
, "%s version offset (0x%x) is above SRAM\n",
2412 strcpy(dest
, "unavailable");
2420 static int goya_init_cpu(struct hl_device
*hdev
)
2422 struct goya_device
*goya
= hdev
->asic_specific
;
2425 if (!hdev
->cpu_enable
)
2428 if (goya
->hw_cap_initialized
& HW_CAP_CPU
)
2432 * Before pushing u-boot/linux to device, need to set the ddr bar to
2433 * base address of dram
2435 if (goya_set_ddr_bar_base(hdev
, DRAM_PHYS_BASE
) == U64_MAX
) {
2437 "failed to map DDR bar to DRAM base address\n");
2441 rc
= hl_fw_init_cpu(hdev
, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS
,
2442 mmPSOC_GLOBAL_CONF_UBOOT_MAGIC
,
2443 mmCPU_CMD_STATUS_TO_HOST
,
2444 mmCPU_BOOT_DEV_STS0
, mmCPU_BOOT_ERR0
,
2445 false, GOYA_CPU_TIMEOUT_USEC
,
2446 GOYA_BOOT_FIT_REQ_TIMEOUT_USEC
);
2451 goya
->hw_cap_initialized
|= HW_CAP_CPU
;
2456 static int goya_mmu_update_asid_hop0_addr(struct hl_device
*hdev
, u32 asid
,
2459 u32 status
, timeout_usec
;
2463 timeout_usec
= GOYA_PLDM_MMU_TIMEOUT_USEC
;
2465 timeout_usec
= MMU_CONFIG_TIMEOUT_USEC
;
2467 WREG32(MMU_HOP0_PA43_12
, phys_addr
>> MMU_HOP0_PA43_12_SHIFT
);
2468 WREG32(MMU_HOP0_PA49_44
, phys_addr
>> MMU_HOP0_PA49_44_SHIFT
);
2469 WREG32(MMU_ASID_BUSY
, 0x80000000 | asid
);
2471 rc
= hl_poll_timeout(
2475 !(status
& 0x80000000),
2481 "Timeout during MMU hop0 config of asid %d\n", asid
);
2488 int goya_mmu_init(struct hl_device
*hdev
)
2490 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
2491 struct goya_device
*goya
= hdev
->asic_specific
;
2495 if (!hdev
->mmu_enable
)
2498 if (goya
->hw_cap_initialized
& HW_CAP_MMU
)
2501 hdev
->dram_default_page_mapping
= true;
2503 for (i
= 0 ; i
< prop
->max_asid
; i
++) {
2504 hop0_addr
= prop
->mmu_pgt_addr
+
2505 (i
* prop
->mmu_hop_table_size
);
2507 rc
= goya_mmu_update_asid_hop0_addr(hdev
, i
, hop0_addr
);
2510 "failed to set hop0 addr for asid %d\n", i
);
2515 goya
->hw_cap_initialized
|= HW_CAP_MMU
;
2517 /* init MMU cache manage page */
2518 WREG32(mmSTLB_CACHE_INV_BASE_39_8
,
2519 lower_32_bits(MMU_CACHE_MNG_ADDR
>> 8));
2520 WREG32(mmSTLB_CACHE_INV_BASE_49_40
, MMU_CACHE_MNG_ADDR
>> 40);
2522 /* Remove follower feature due to performance bug */
2523 WREG32_AND(mmSTLB_STLB_FEATURE_EN
,
2524 (~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK
));
2526 hdev
->asic_funcs
->mmu_invalidate_cache(hdev
, true,
2527 VM_TYPE_USERPTR
| VM_TYPE_PHYS_PACK
);
2529 WREG32(mmMMU_MMU_ENABLE
, 1);
2530 WREG32(mmMMU_SPI_MASK
, 0xF);
2539 * goya_hw_init - Goya hardware initialization code
2541 * @hdev: pointer to hl_device structure
2543 * Returns 0 on success
2546 static int goya_hw_init(struct hl_device
*hdev
)
2548 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
2551 /* Perform read from the device to make sure device is up */
2552 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG
);
2555 * Let's mark in the H/W that we have reached this point. We check
2556 * this value in the reset_before_init function to understand whether
2557 * we need to reset the chip before doing H/W init. This register is
2558 * cleared by the H/W upon H/W reset
2560 WREG32(mmHW_STATE
, HL_DEVICE_HW_STATE_DIRTY
);
2562 rc
= goya_init_cpu(hdev
);
2564 dev_err(hdev
->dev
, "failed to initialize CPU\n");
2568 goya_tpc_mbist_workaround(hdev
);
2570 goya_init_golden_registers(hdev
);
2573 * After CPU initialization is finished, change DDR bar mapping inside
2574 * iATU to point to the start address of the MMU page tables
2576 if (goya_set_ddr_bar_base(hdev
, (MMU_PAGE_TABLES_ADDR
&
2577 ~(prop
->dram_pci_bar_size
- 0x1ull
))) == U64_MAX
) {
2579 "failed to map DDR bar to MMU page tables\n");
2583 rc
= goya_mmu_init(hdev
);
2587 goya_init_security(hdev
);
2589 goya_init_dma_qmans(hdev
);
2591 goya_init_mme_qmans(hdev
);
2593 goya_init_tpc_qmans(hdev
);
2595 goya_enable_timestamp(hdev
);
2597 /* MSI-X must be enabled before CPU queues are initialized */
2598 rc
= goya_enable_msix(hdev
);
2600 goto disable_queues
;
2602 /* Perform read from the device to flush all MSI-X configuration */
2603 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG
);
2608 goya_disable_internal_queues(hdev
);
2609 goya_disable_external_queues(hdev
);
2615 * goya_hw_fini - Goya hardware tear-down code
2617 * @hdev: pointer to hl_device structure
2618 * @hard_reset: should we do hard reset to all engines or just reset the
2619 * compute/dma engines
2621 static void goya_hw_fini(struct hl_device
*hdev
, bool hard_reset
)
2623 struct goya_device
*goya
= hdev
->asic_specific
;
2624 u32 reset_timeout_ms
, cpu_timeout_ms
, status
;
2627 reset_timeout_ms
= GOYA_PLDM_RESET_TIMEOUT_MSEC
;
2628 cpu_timeout_ms
= GOYA_PLDM_RESET_WAIT_MSEC
;
2630 reset_timeout_ms
= GOYA_RESET_TIMEOUT_MSEC
;
2631 cpu_timeout_ms
= GOYA_CPU_RESET_WAIT_MSEC
;
2635 /* I don't know what is the state of the CPU so make sure it is
2636 * stopped in any means necessary
2638 WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC
, KMD_MSG_GOTO_WFE
);
2639 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
,
2640 GOYA_ASYNC_EVENT_ID_HALT_MACHINE
);
2642 msleep(cpu_timeout_ms
);
2644 goya_set_ddr_bar_base(hdev
, DRAM_PHYS_BASE
);
2645 goya_disable_clk_rlx(hdev
);
2646 goya_set_pll_refclk(hdev
);
2648 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG
, RESET_ALL
);
2650 "Issued HARD reset command, going to wait %dms\n",
2653 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG
, DMA_MME_TPC_RESET
);
2655 "Issued SOFT reset command, going to wait %dms\n",
2660 * After hard reset, we can't poll the BTM_FSM register because the PSOC
2661 * itself is in reset. In either reset we need to wait until the reset
2664 msleep(reset_timeout_ms
);
2666 status
= RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM
);
2667 if (status
& PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK
)
2669 "Timeout while waiting for device to reset 0x%x\n",
2672 if (!hard_reset
&& goya
) {
2673 goya
->hw_cap_initialized
&= ~(HW_CAP_DMA
| HW_CAP_MME
|
2674 HW_CAP_GOLDEN
| HW_CAP_TPC
);
2675 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
,
2676 GOYA_ASYNC_EVENT_ID_SOFT_RESET
);
2680 /* Chicken bit to re-initiate boot sequencer flow */
2681 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START
,
2682 1 << PSOC_GLOBAL_CONF_BOOT_SEQ_RE_START_IND_SHIFT
);
2683 /* Move boot manager FSM to pre boot sequencer init state */
2684 WREG32(mmPSOC_GLOBAL_CONF_SW_BTM_FSM
,
2685 0xA << PSOC_GLOBAL_CONF_SW_BTM_FSM_CTRL_SHIFT
);
2688 goya
->hw_cap_initialized
&= ~(HW_CAP_CPU
| HW_CAP_CPU_Q
|
2689 HW_CAP_DDR_0
| HW_CAP_DDR_1
|
2690 HW_CAP_DMA
| HW_CAP_MME
|
2691 HW_CAP_MMU
| HW_CAP_TPC_MBIST
|
2692 HW_CAP_GOLDEN
| HW_CAP_TPC
);
2694 memset(goya
->events_stat
, 0, sizeof(goya
->events_stat
));
2698 int goya_suspend(struct hl_device
*hdev
)
2702 rc
= hl_fw_send_pci_access_msg(hdev
, CPUCP_PACKET_DISABLE_PCI_ACCESS
);
2704 dev_err(hdev
->dev
, "Failed to disable PCI access from CPU\n");
2709 int goya_resume(struct hl_device
*hdev
)
2711 return goya_init_iatu(hdev
);
2714 static int goya_cb_mmap(struct hl_device
*hdev
, struct vm_area_struct
*vma
,
2715 void *cpu_addr
, dma_addr_t dma_addr
, size_t size
)
2719 vma
->vm_flags
|= VM_IO
| VM_PFNMAP
| VM_DONTEXPAND
| VM_DONTDUMP
|
2720 VM_DONTCOPY
| VM_NORESERVE
;
2722 rc
= dma_mmap_coherent(hdev
->dev
, vma
, cpu_addr
, dma_addr
, size
);
2724 dev_err(hdev
->dev
, "dma_mmap_coherent error %d", rc
);
2729 void goya_ring_doorbell(struct hl_device
*hdev
, u32 hw_queue_id
, u32 pi
)
2731 u32 db_reg_offset
, db_value
;
2733 switch (hw_queue_id
) {
2734 case GOYA_QUEUE_ID_DMA_0
:
2735 db_reg_offset
= mmDMA_QM_0_PQ_PI
;
2738 case GOYA_QUEUE_ID_DMA_1
:
2739 db_reg_offset
= mmDMA_QM_1_PQ_PI
;
2742 case GOYA_QUEUE_ID_DMA_2
:
2743 db_reg_offset
= mmDMA_QM_2_PQ_PI
;
2746 case GOYA_QUEUE_ID_DMA_3
:
2747 db_reg_offset
= mmDMA_QM_3_PQ_PI
;
2750 case GOYA_QUEUE_ID_DMA_4
:
2751 db_reg_offset
= mmDMA_QM_4_PQ_PI
;
2754 case GOYA_QUEUE_ID_CPU_PQ
:
2755 db_reg_offset
= mmCPU_IF_PF_PQ_PI
;
2758 case GOYA_QUEUE_ID_MME
:
2759 db_reg_offset
= mmMME_QM_PQ_PI
;
2762 case GOYA_QUEUE_ID_TPC0
:
2763 db_reg_offset
= mmTPC0_QM_PQ_PI
;
2766 case GOYA_QUEUE_ID_TPC1
:
2767 db_reg_offset
= mmTPC1_QM_PQ_PI
;
2770 case GOYA_QUEUE_ID_TPC2
:
2771 db_reg_offset
= mmTPC2_QM_PQ_PI
;
2774 case GOYA_QUEUE_ID_TPC3
:
2775 db_reg_offset
= mmTPC3_QM_PQ_PI
;
2778 case GOYA_QUEUE_ID_TPC4
:
2779 db_reg_offset
= mmTPC4_QM_PQ_PI
;
2782 case GOYA_QUEUE_ID_TPC5
:
2783 db_reg_offset
= mmTPC5_QM_PQ_PI
;
2786 case GOYA_QUEUE_ID_TPC6
:
2787 db_reg_offset
= mmTPC6_QM_PQ_PI
;
2790 case GOYA_QUEUE_ID_TPC7
:
2791 db_reg_offset
= mmTPC7_QM_PQ_PI
;
2795 /* Should never get here */
2796 dev_err(hdev
->dev
, "H/W queue %d is invalid. Can't set pi\n",
2803 /* ring the doorbell */
2804 WREG32(db_reg_offset
, db_value
);
2806 if (hw_queue_id
== GOYA_QUEUE_ID_CPU_PQ
)
2807 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
,
2808 GOYA_ASYNC_EVENT_ID_PI_UPDATE
);
2811 void goya_pqe_write(struct hl_device
*hdev
, __le64
*pqe
, struct hl_bd
*bd
)
2813 /* The QMANs are on the SRAM so need to copy to IO space */
2814 memcpy_toio((void __iomem
*) pqe
, bd
, sizeof(struct hl_bd
));
2817 static void *goya_dma_alloc_coherent(struct hl_device
*hdev
, size_t size
,
2818 dma_addr_t
*dma_handle
, gfp_t flags
)
2820 void *kernel_addr
= dma_alloc_coherent(&hdev
->pdev
->dev
, size
,
2823 /* Shift to the device's base physical address of host memory */
2825 *dma_handle
+= HOST_PHYS_BASE
;
2830 static void goya_dma_free_coherent(struct hl_device
*hdev
, size_t size
,
2831 void *cpu_addr
, dma_addr_t dma_handle
)
2833 /* Cancel the device's base physical address of host memory */
2834 dma_addr_t fixed_dma_handle
= dma_handle
- HOST_PHYS_BASE
;
2836 dma_free_coherent(&hdev
->pdev
->dev
, size
, cpu_addr
, fixed_dma_handle
);
2839 int goya_scrub_device_mem(struct hl_device
*hdev
, u64 addr
, u64 size
)
2844 void *goya_get_int_queue_base(struct hl_device
*hdev
, u32 queue_id
,
2845 dma_addr_t
*dma_handle
, u16
*queue_len
)
2850 *dma_handle
= hdev
->asic_prop
.sram_base_address
;
2852 base
= (void *) hdev
->pcie_bar
[SRAM_CFG_BAR_ID
];
2855 case GOYA_QUEUE_ID_MME
:
2856 offset
= MME_QMAN_BASE_OFFSET
;
2857 *queue_len
= MME_QMAN_LENGTH
;
2859 case GOYA_QUEUE_ID_TPC0
:
2860 offset
= TPC0_QMAN_BASE_OFFSET
;
2861 *queue_len
= TPC_QMAN_LENGTH
;
2863 case GOYA_QUEUE_ID_TPC1
:
2864 offset
= TPC1_QMAN_BASE_OFFSET
;
2865 *queue_len
= TPC_QMAN_LENGTH
;
2867 case GOYA_QUEUE_ID_TPC2
:
2868 offset
= TPC2_QMAN_BASE_OFFSET
;
2869 *queue_len
= TPC_QMAN_LENGTH
;
2871 case GOYA_QUEUE_ID_TPC3
:
2872 offset
= TPC3_QMAN_BASE_OFFSET
;
2873 *queue_len
= TPC_QMAN_LENGTH
;
2875 case GOYA_QUEUE_ID_TPC4
:
2876 offset
= TPC4_QMAN_BASE_OFFSET
;
2877 *queue_len
= TPC_QMAN_LENGTH
;
2879 case GOYA_QUEUE_ID_TPC5
:
2880 offset
= TPC5_QMAN_BASE_OFFSET
;
2881 *queue_len
= TPC_QMAN_LENGTH
;
2883 case GOYA_QUEUE_ID_TPC6
:
2884 offset
= TPC6_QMAN_BASE_OFFSET
;
2885 *queue_len
= TPC_QMAN_LENGTH
;
2887 case GOYA_QUEUE_ID_TPC7
:
2888 offset
= TPC7_QMAN_BASE_OFFSET
;
2889 *queue_len
= TPC_QMAN_LENGTH
;
2892 dev_err(hdev
->dev
, "Got invalid queue id %d\n", queue_id
);
2897 *dma_handle
+= offset
;
2902 static int goya_send_job_on_qman0(struct hl_device
*hdev
, struct hl_cs_job
*job
)
2904 struct packet_msg_prot
*fence_pkt
;
2906 dma_addr_t fence_dma_addr
;
2912 timeout
= GOYA_PLDM_QMAN0_TIMEOUT_USEC
;
2914 timeout
= HL_DEVICE_TIMEOUT_USEC
;
2916 if (!hdev
->asic_funcs
->is_device_idle(hdev
, NULL
, NULL
)) {
2917 dev_err_ratelimited(hdev
->dev
,
2918 "Can't send driver job on QMAN0 because the device is not idle\n");
2922 fence_ptr
= hdev
->asic_funcs
->asic_dma_pool_zalloc(hdev
, 4, GFP_KERNEL
,
2926 "Failed to allocate fence memory for QMAN0\n");
2930 goya_qman0_set_security(hdev
, true);
2932 cb
= job
->patched_cb
;
2934 fence_pkt
= cb
->kernel_address
+
2935 job
->job_cb_size
- sizeof(struct packet_msg_prot
);
2937 tmp
= (PACKET_MSG_PROT
<< GOYA_PKT_CTL_OPCODE_SHIFT
) |
2938 (1 << GOYA_PKT_CTL_EB_SHIFT
) |
2939 (1 << GOYA_PKT_CTL_MB_SHIFT
);
2940 fence_pkt
->ctl
= cpu_to_le32(tmp
);
2941 fence_pkt
->value
= cpu_to_le32(GOYA_QMAN0_FENCE_VAL
);
2942 fence_pkt
->addr
= cpu_to_le64(fence_dma_addr
);
2944 rc
= hl_hw_queue_send_cb_no_cmpl(hdev
, GOYA_QUEUE_ID_DMA_0
,
2945 job
->job_cb_size
, cb
->bus_address
);
2947 dev_err(hdev
->dev
, "Failed to send CB on QMAN0, %d\n", rc
);
2948 goto free_fence_ptr
;
2951 rc
= hl_poll_timeout_memory(hdev
, fence_ptr
, tmp
,
2952 (tmp
== GOYA_QMAN0_FENCE_VAL
), 1000,
2955 hl_hw_queue_inc_ci_kernel(hdev
, GOYA_QUEUE_ID_DMA_0
);
2957 if (rc
== -ETIMEDOUT
) {
2958 dev_err(hdev
->dev
, "QMAN0 Job timeout (0x%x)\n", tmp
);
2959 goto free_fence_ptr
;
2963 hdev
->asic_funcs
->asic_dma_pool_free(hdev
, (void *) fence_ptr
,
2966 goya_qman0_set_security(hdev
, false);
2971 int goya_send_cpu_message(struct hl_device
*hdev
, u32
*msg
, u16 len
,
2972 u32 timeout
, u64
*result
)
2974 struct goya_device
*goya
= hdev
->asic_specific
;
2976 if (!(goya
->hw_cap_initialized
& HW_CAP_CPU_Q
)) {
2983 timeout
= GOYA_MSG_TO_CPU_TIMEOUT_USEC
;
2985 return hl_fw_send_cpu_message(hdev
, GOYA_QUEUE_ID_CPU_PQ
, msg
, len
,
2989 int goya_test_queue(struct hl_device
*hdev
, u32 hw_queue_id
)
2991 struct packet_msg_prot
*fence_pkt
;
2992 dma_addr_t pkt_dma_addr
;
2994 dma_addr_t fence_dma_addr
;
2998 fence_val
= GOYA_QMAN0_FENCE_VAL
;
3000 fence_ptr
= hdev
->asic_funcs
->asic_dma_pool_zalloc(hdev
, 4, GFP_KERNEL
,
3004 "Failed to allocate memory for H/W queue %d testing\n",
3011 fence_pkt
= hdev
->asic_funcs
->asic_dma_pool_zalloc(hdev
,
3012 sizeof(struct packet_msg_prot
),
3013 GFP_KERNEL
, &pkt_dma_addr
);
3016 "Failed to allocate packet for H/W queue %d testing\n",
3019 goto free_fence_ptr
;
3022 tmp
= (PACKET_MSG_PROT
<< GOYA_PKT_CTL_OPCODE_SHIFT
) |
3023 (1 << GOYA_PKT_CTL_EB_SHIFT
) |
3024 (1 << GOYA_PKT_CTL_MB_SHIFT
);
3025 fence_pkt
->ctl
= cpu_to_le32(tmp
);
3026 fence_pkt
->value
= cpu_to_le32(fence_val
);
3027 fence_pkt
->addr
= cpu_to_le64(fence_dma_addr
);
3029 rc
= hl_hw_queue_send_cb_no_cmpl(hdev
, hw_queue_id
,
3030 sizeof(struct packet_msg_prot
),
3034 "Failed to send fence packet to H/W queue %d\n",
3039 rc
= hl_poll_timeout_memory(hdev
, fence_ptr
, tmp
, (tmp
== fence_val
),
3040 1000, GOYA_TEST_QUEUE_WAIT_USEC
, true);
3042 hl_hw_queue_inc_ci_kernel(hdev
, hw_queue_id
);
3044 if (rc
== -ETIMEDOUT
) {
3046 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3047 hw_queue_id
, (unsigned long long) fence_dma_addr
, tmp
);
3052 hdev
->asic_funcs
->asic_dma_pool_free(hdev
, (void *) fence_pkt
,
3055 hdev
->asic_funcs
->asic_dma_pool_free(hdev
, (void *) fence_ptr
,
3060 int goya_test_cpu_queue(struct hl_device
*hdev
)
3062 struct goya_device
*goya
= hdev
->asic_specific
;
3065 * check capability here as send_cpu_message() won't update the result
3066 * value if no capability
3068 if (!(goya
->hw_cap_initialized
& HW_CAP_CPU_Q
))
3071 return hl_fw_test_cpu_queue(hdev
);
3074 int goya_test_queues(struct hl_device
*hdev
)
3076 int i
, rc
, ret_val
= 0;
3078 for (i
= 0 ; i
< NUMBER_OF_EXT_HW_QUEUES
; i
++) {
3079 rc
= goya_test_queue(hdev
, i
);
3087 static void *goya_dma_pool_zalloc(struct hl_device
*hdev
, size_t size
,
3088 gfp_t mem_flags
, dma_addr_t
*dma_handle
)
3092 if (size
> GOYA_DMA_POOL_BLK_SIZE
)
3095 kernel_addr
= dma_pool_zalloc(hdev
->dma_pool
, mem_flags
, dma_handle
);
3097 /* Shift to the device's base physical address of host memory */
3099 *dma_handle
+= HOST_PHYS_BASE
;
3104 static void goya_dma_pool_free(struct hl_device
*hdev
, void *vaddr
,
3105 dma_addr_t dma_addr
)
3107 /* Cancel the device's base physical address of host memory */
3108 dma_addr_t fixed_dma_addr
= dma_addr
- HOST_PHYS_BASE
;
3110 dma_pool_free(hdev
->dma_pool
, vaddr
, fixed_dma_addr
);
3113 void *goya_cpu_accessible_dma_pool_alloc(struct hl_device
*hdev
, size_t size
,
3114 dma_addr_t
*dma_handle
)
3118 vaddr
= hl_fw_cpu_accessible_dma_pool_alloc(hdev
, size
, dma_handle
);
3119 *dma_handle
= (*dma_handle
) - hdev
->cpu_accessible_dma_address
+
3120 VA_CPU_ACCESSIBLE_MEM_ADDR
;
3125 void goya_cpu_accessible_dma_pool_free(struct hl_device
*hdev
, size_t size
,
3128 hl_fw_cpu_accessible_dma_pool_free(hdev
, size
, vaddr
);
3131 static int goya_dma_map_sg(struct hl_device
*hdev
, struct scatterlist
*sgl
,
3132 int nents
, enum dma_data_direction dir
)
3134 struct scatterlist
*sg
;
3137 if (!dma_map_sg(&hdev
->pdev
->dev
, sgl
, nents
, dir
))
3140 /* Shift to the device's base physical address of host memory */
3141 for_each_sg(sgl
, sg
, nents
, i
)
3142 sg
->dma_address
+= HOST_PHYS_BASE
;
3147 static void goya_dma_unmap_sg(struct hl_device
*hdev
, struct scatterlist
*sgl
,
3148 int nents
, enum dma_data_direction dir
)
3150 struct scatterlist
*sg
;
3153 /* Cancel the device's base physical address of host memory */
3154 for_each_sg(sgl
, sg
, nents
, i
)
3155 sg
->dma_address
-= HOST_PHYS_BASE
;
3157 dma_unmap_sg(&hdev
->pdev
->dev
, sgl
, nents
, dir
);
3160 u32
goya_get_dma_desc_list_size(struct hl_device
*hdev
, struct sg_table
*sgt
)
3162 struct scatterlist
*sg
, *sg_next_iter
;
3163 u32 count
, dma_desc_cnt
;
3165 dma_addr_t addr
, addr_next
;
3169 for_each_sg(sgt
->sgl
, sg
, sgt
->nents
, count
) {
3171 len
= sg_dma_len(sg
);
3172 addr
= sg_dma_address(sg
);
3177 while ((count
+ 1) < sgt
->nents
) {
3178 sg_next_iter
= sg_next(sg
);
3179 len_next
= sg_dma_len(sg_next_iter
);
3180 addr_next
= sg_dma_address(sg_next_iter
);
3185 if ((addr
+ len
== addr_next
) &&
3186 (len
+ len_next
<= DMA_MAX_TRANSFER_SIZE
)) {
3198 return dma_desc_cnt
* sizeof(struct packet_lin_dma
);
3201 static int goya_pin_memory_before_cs(struct hl_device
*hdev
,
3202 struct hl_cs_parser
*parser
,
3203 struct packet_lin_dma
*user_dma_pkt
,
3204 u64 addr
, enum dma_data_direction dir
)
3206 struct hl_userptr
*userptr
;
3209 if (hl_userptr_is_pinned(hdev
, addr
, le32_to_cpu(user_dma_pkt
->tsize
),
3210 parser
->job_userptr_list
, &userptr
))
3211 goto already_pinned
;
3213 userptr
= kzalloc(sizeof(*userptr
), GFP_ATOMIC
);
3217 rc
= hl_pin_host_memory(hdev
, addr
, le32_to_cpu(user_dma_pkt
->tsize
),
3222 list_add_tail(&userptr
->job_node
, parser
->job_userptr_list
);
3224 rc
= hdev
->asic_funcs
->asic_dma_map_sg(hdev
, userptr
->sgt
->sgl
,
3225 userptr
->sgt
->nents
, dir
);
3227 dev_err(hdev
->dev
, "failed to map sgt with DMA region\n");
3231 userptr
->dma_mapped
= true;
3235 parser
->patched_cb_size
+=
3236 goya_get_dma_desc_list_size(hdev
, userptr
->sgt
);
3241 hl_unpin_host_memory(hdev
, userptr
);
3247 static int goya_validate_dma_pkt_host(struct hl_device
*hdev
,
3248 struct hl_cs_parser
*parser
,
3249 struct packet_lin_dma
*user_dma_pkt
)
3251 u64 device_memory_addr
, addr
;
3252 enum dma_data_direction dir
;
3253 enum goya_dma_direction user_dir
;
3254 bool sram_addr
= true;
3255 bool skip_host_mem_pin
= false;
3260 ctl
= le32_to_cpu(user_dma_pkt
->ctl
);
3262 user_dir
= (ctl
& GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK
) >>
3263 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT
;
3265 user_memset
= (ctl
& GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK
) >>
3266 GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT
;
3269 case DMA_HOST_TO_DRAM
:
3270 dev_dbg(hdev
->dev
, "DMA direction is HOST --> DRAM\n");
3271 dir
= DMA_TO_DEVICE
;
3273 addr
= le64_to_cpu(user_dma_pkt
->src_addr
);
3274 device_memory_addr
= le64_to_cpu(user_dma_pkt
->dst_addr
);
3276 skip_host_mem_pin
= true;
3279 case DMA_DRAM_TO_HOST
:
3280 dev_dbg(hdev
->dev
, "DMA direction is DRAM --> HOST\n");
3281 dir
= DMA_FROM_DEVICE
;
3283 addr
= le64_to_cpu(user_dma_pkt
->dst_addr
);
3284 device_memory_addr
= le64_to_cpu(user_dma_pkt
->src_addr
);
3287 case DMA_HOST_TO_SRAM
:
3288 dev_dbg(hdev
->dev
, "DMA direction is HOST --> SRAM\n");
3289 dir
= DMA_TO_DEVICE
;
3290 addr
= le64_to_cpu(user_dma_pkt
->src_addr
);
3291 device_memory_addr
= le64_to_cpu(user_dma_pkt
->dst_addr
);
3293 skip_host_mem_pin
= true;
3296 case DMA_SRAM_TO_HOST
:
3297 dev_dbg(hdev
->dev
, "DMA direction is SRAM --> HOST\n");
3298 dir
= DMA_FROM_DEVICE
;
3299 addr
= le64_to_cpu(user_dma_pkt
->dst_addr
);
3300 device_memory_addr
= le64_to_cpu(user_dma_pkt
->src_addr
);
3303 dev_err(hdev
->dev
, "DMA direction is undefined\n");
3308 if (!hl_mem_area_inside_range(device_memory_addr
,
3309 le32_to_cpu(user_dma_pkt
->tsize
),
3310 hdev
->asic_prop
.sram_user_base_address
,
3311 hdev
->asic_prop
.sram_end_address
)) {
3314 "SRAM address 0x%llx + 0x%x is invalid\n",
3316 user_dma_pkt
->tsize
);
3320 if (!hl_mem_area_inside_range(device_memory_addr
,
3321 le32_to_cpu(user_dma_pkt
->tsize
),
3322 hdev
->asic_prop
.dram_user_base_address
,
3323 hdev
->asic_prop
.dram_end_address
)) {
3326 "DRAM address 0x%llx + 0x%x is invalid\n",
3328 user_dma_pkt
->tsize
);
3333 if (skip_host_mem_pin
)
3334 parser
->patched_cb_size
+= sizeof(*user_dma_pkt
);
3336 if ((dir
== DMA_TO_DEVICE
) &&
3337 (parser
->hw_queue_id
> GOYA_QUEUE_ID_DMA_1
)) {
3339 "Can't DMA from host on queue other then 1\n");
3343 rc
= goya_pin_memory_before_cs(hdev
, parser
, user_dma_pkt
,
3350 static int goya_validate_dma_pkt_no_host(struct hl_device
*hdev
,
3351 struct hl_cs_parser
*parser
,
3352 struct packet_lin_dma
*user_dma_pkt
)
3354 u64 sram_memory_addr
, dram_memory_addr
;
3355 enum goya_dma_direction user_dir
;
3358 ctl
= le32_to_cpu(user_dma_pkt
->ctl
);
3359 user_dir
= (ctl
& GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK
) >>
3360 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT
;
3362 if (user_dir
== DMA_DRAM_TO_SRAM
) {
3363 dev_dbg(hdev
->dev
, "DMA direction is DRAM --> SRAM\n");
3364 dram_memory_addr
= le64_to_cpu(user_dma_pkt
->src_addr
);
3365 sram_memory_addr
= le64_to_cpu(user_dma_pkt
->dst_addr
);
3367 dev_dbg(hdev
->dev
, "DMA direction is SRAM --> DRAM\n");
3368 sram_memory_addr
= le64_to_cpu(user_dma_pkt
->src_addr
);
3369 dram_memory_addr
= le64_to_cpu(user_dma_pkt
->dst_addr
);
3372 if (!hl_mem_area_inside_range(sram_memory_addr
,
3373 le32_to_cpu(user_dma_pkt
->tsize
),
3374 hdev
->asic_prop
.sram_user_base_address
,
3375 hdev
->asic_prop
.sram_end_address
)) {
3376 dev_err(hdev
->dev
, "SRAM address 0x%llx + 0x%x is invalid\n",
3377 sram_memory_addr
, user_dma_pkt
->tsize
);
3381 if (!hl_mem_area_inside_range(dram_memory_addr
,
3382 le32_to_cpu(user_dma_pkt
->tsize
),
3383 hdev
->asic_prop
.dram_user_base_address
,
3384 hdev
->asic_prop
.dram_end_address
)) {
3385 dev_err(hdev
->dev
, "DRAM address 0x%llx + 0x%x is invalid\n",
3386 dram_memory_addr
, user_dma_pkt
->tsize
);
3390 parser
->patched_cb_size
+= sizeof(*user_dma_pkt
);
3395 static int goya_validate_dma_pkt_no_mmu(struct hl_device
*hdev
,
3396 struct hl_cs_parser
*parser
,
3397 struct packet_lin_dma
*user_dma_pkt
)
3399 enum goya_dma_direction user_dir
;
3403 dev_dbg(hdev
->dev
, "DMA packet details:\n");
3404 dev_dbg(hdev
->dev
, "source == 0x%llx\n",
3405 le64_to_cpu(user_dma_pkt
->src_addr
));
3406 dev_dbg(hdev
->dev
, "destination == 0x%llx\n",
3407 le64_to_cpu(user_dma_pkt
->dst_addr
));
3408 dev_dbg(hdev
->dev
, "size == %u\n", le32_to_cpu(user_dma_pkt
->tsize
));
3410 ctl
= le32_to_cpu(user_dma_pkt
->ctl
);
3411 user_dir
= (ctl
& GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK
) >>
3412 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT
;
3415 * Special handling for DMA with size 0. The H/W has a bug where
3416 * this can cause the QMAN DMA to get stuck, so block it here.
3418 if (user_dma_pkt
->tsize
== 0) {
3420 "Got DMA with size 0, might reset the device\n");
3424 if ((user_dir
== DMA_DRAM_TO_SRAM
) || (user_dir
== DMA_SRAM_TO_DRAM
))
3425 rc
= goya_validate_dma_pkt_no_host(hdev
, parser
, user_dma_pkt
);
3427 rc
= goya_validate_dma_pkt_host(hdev
, parser
, user_dma_pkt
);
3432 static int goya_validate_dma_pkt_mmu(struct hl_device
*hdev
,
3433 struct hl_cs_parser
*parser
,
3434 struct packet_lin_dma
*user_dma_pkt
)
3436 dev_dbg(hdev
->dev
, "DMA packet details:\n");
3437 dev_dbg(hdev
->dev
, "source == 0x%llx\n",
3438 le64_to_cpu(user_dma_pkt
->src_addr
));
3439 dev_dbg(hdev
->dev
, "destination == 0x%llx\n",
3440 le64_to_cpu(user_dma_pkt
->dst_addr
));
3441 dev_dbg(hdev
->dev
, "size == %u\n", le32_to_cpu(user_dma_pkt
->tsize
));
3445 * We can't allow user to read from Host using QMANs other than 1.
3446 * PMMU and HPMMU addresses are equal, check only one of them.
3448 if (parser
->hw_queue_id
!= GOYA_QUEUE_ID_DMA_1
&&
3449 hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt
->src_addr
),
3450 le32_to_cpu(user_dma_pkt
->tsize
),
3451 hdev
->asic_prop
.pmmu
.start_addr
,
3452 hdev
->asic_prop
.pmmu
.end_addr
)) {
3454 "Can't DMA from host on queue other then 1\n");
3458 if (user_dma_pkt
->tsize
== 0) {
3460 "Got DMA with size 0, might reset the device\n");
3464 parser
->patched_cb_size
+= sizeof(*user_dma_pkt
);
3469 static int goya_validate_wreg32(struct hl_device
*hdev
,
3470 struct hl_cs_parser
*parser
,
3471 struct packet_wreg32
*wreg_pkt
)
3473 struct goya_device
*goya
= hdev
->asic_specific
;
3474 u32 sob_start_addr
, sob_end_addr
;
3477 reg_offset
= le32_to_cpu(wreg_pkt
->ctl
) &
3478 GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK
;
3480 dev_dbg(hdev
->dev
, "WREG32 packet details:\n");
3481 dev_dbg(hdev
->dev
, "reg_offset == 0x%x\n", reg_offset
);
3482 dev_dbg(hdev
->dev
, "value == 0x%x\n",
3483 le32_to_cpu(wreg_pkt
->value
));
3485 if (reg_offset
!= (mmDMA_CH_0_WR_COMP_ADDR_LO
& 0x1FFF)) {
3486 dev_err(hdev
->dev
, "WREG32 packet with illegal address 0x%x\n",
3492 * With MMU, DMA channels are not secured, so it doesn't matter where
3493 * the WR COMP will be written to because it will go out with
3494 * non-secured property
3496 if (goya
->hw_cap_initialized
& HW_CAP_MMU
)
3499 sob_start_addr
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_0
);
3500 sob_end_addr
= lower_32_bits(CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_1023
);
3502 if ((le32_to_cpu(wreg_pkt
->value
) < sob_start_addr
) ||
3503 (le32_to_cpu(wreg_pkt
->value
) > sob_end_addr
)) {
3505 dev_err(hdev
->dev
, "WREG32 packet with illegal value 0x%x\n",
3513 static int goya_validate_cb(struct hl_device
*hdev
,
3514 struct hl_cs_parser
*parser
, bool is_mmu
)
3516 u32 cb_parsed_length
= 0;
3519 parser
->patched_cb_size
= 0;
3521 /* cb_user_size is more than 0 so loop will always be executed */
3522 while (cb_parsed_length
< parser
->user_cb_size
) {
3523 enum packet_id pkt_id
;
3525 struct goya_packet
*user_pkt
;
3527 user_pkt
= parser
->user_cb
->kernel_address
+ cb_parsed_length
;
3529 pkt_id
= (enum packet_id
) (
3530 (le64_to_cpu(user_pkt
->header
) &
3531 PACKET_HEADER_PACKET_ID_MASK
) >>
3532 PACKET_HEADER_PACKET_ID_SHIFT
);
3534 if (!validate_packet_id(pkt_id
)) {
3535 dev_err(hdev
->dev
, "Invalid packet id %u\n", pkt_id
);
3540 pkt_size
= goya_packet_sizes
[pkt_id
];
3541 cb_parsed_length
+= pkt_size
;
3542 if (cb_parsed_length
> parser
->user_cb_size
) {
3544 "packet 0x%x is out of CB boundary\n", pkt_id
);
3550 case PACKET_WREG_32
:
3552 * Although it is validated after copy in patch_cb(),
3553 * need to validate here as well because patch_cb() is
3554 * not called in MMU path while this function is called
3556 rc
= goya_validate_wreg32(hdev
,
3557 parser
, (struct packet_wreg32
*) user_pkt
);
3558 parser
->patched_cb_size
+= pkt_size
;
3561 case PACKET_WREG_BULK
:
3563 "User not allowed to use WREG_BULK\n");
3567 case PACKET_MSG_PROT
:
3569 "User not allowed to use MSG_PROT\n");
3574 dev_err(hdev
->dev
, "User not allowed to use CP_DMA\n");
3579 dev_err(hdev
->dev
, "User not allowed to use STOP\n");
3583 case PACKET_LIN_DMA
:
3585 rc
= goya_validate_dma_pkt_mmu(hdev
, parser
,
3586 (struct packet_lin_dma
*) user_pkt
);
3588 rc
= goya_validate_dma_pkt_no_mmu(hdev
, parser
,
3589 (struct packet_lin_dma
*) user_pkt
);
3592 case PACKET_MSG_LONG
:
3593 case PACKET_MSG_SHORT
:
3596 parser
->patched_cb_size
+= pkt_size
;
3600 dev_err(hdev
->dev
, "Invalid packet header 0x%x\n",
3611 * The new CB should have space at the end for two MSG_PROT packets:
3612 * 1. A packet that will act as a completion packet
3613 * 2. A packet that will generate MSI-X interrupt
3615 parser
->patched_cb_size
+= sizeof(struct packet_msg_prot
) * 2;
3620 static int goya_patch_dma_packet(struct hl_device
*hdev
,
3621 struct hl_cs_parser
*parser
,
3622 struct packet_lin_dma
*user_dma_pkt
,
3623 struct packet_lin_dma
*new_dma_pkt
,
3624 u32
*new_dma_pkt_size
)
3626 struct hl_userptr
*userptr
;
3627 struct scatterlist
*sg
, *sg_next_iter
;
3628 u32 count
, dma_desc_cnt
;
3630 dma_addr_t dma_addr
, dma_addr_next
;
3631 enum goya_dma_direction user_dir
;
3632 u64 device_memory_addr
, addr
;
3633 enum dma_data_direction dir
;
3634 struct sg_table
*sgt
;
3635 bool skip_host_mem_pin
= false;
3637 u32 user_rdcomp_mask
, user_wrcomp_mask
, ctl
;
3639 ctl
= le32_to_cpu(user_dma_pkt
->ctl
);
3641 user_dir
= (ctl
& GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK
) >>
3642 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT
;
3644 user_memset
= (ctl
& GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK
) >>
3645 GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT
;
3647 if ((user_dir
== DMA_DRAM_TO_SRAM
) || (user_dir
== DMA_SRAM_TO_DRAM
) ||
3648 (user_dma_pkt
->tsize
== 0)) {
3649 memcpy(new_dma_pkt
, user_dma_pkt
, sizeof(*new_dma_pkt
));
3650 *new_dma_pkt_size
= sizeof(*new_dma_pkt
);
3654 if ((user_dir
== DMA_HOST_TO_DRAM
) || (user_dir
== DMA_HOST_TO_SRAM
)) {
3655 addr
= le64_to_cpu(user_dma_pkt
->src_addr
);
3656 device_memory_addr
= le64_to_cpu(user_dma_pkt
->dst_addr
);
3657 dir
= DMA_TO_DEVICE
;
3659 skip_host_mem_pin
= true;
3661 addr
= le64_to_cpu(user_dma_pkt
->dst_addr
);
3662 device_memory_addr
= le64_to_cpu(user_dma_pkt
->src_addr
);
3663 dir
= DMA_FROM_DEVICE
;
3666 if ((!skip_host_mem_pin
) &&
3667 (hl_userptr_is_pinned(hdev
, addr
,
3668 le32_to_cpu(user_dma_pkt
->tsize
),
3669 parser
->job_userptr_list
, &userptr
) == false)) {
3670 dev_err(hdev
->dev
, "Userptr 0x%llx + 0x%x NOT mapped\n",
3671 addr
, user_dma_pkt
->tsize
);
3675 if ((user_memset
) && (dir
== DMA_TO_DEVICE
)) {
3676 memcpy(new_dma_pkt
, user_dma_pkt
, sizeof(*user_dma_pkt
));
3677 *new_dma_pkt_size
= sizeof(*user_dma_pkt
);
3681 user_rdcomp_mask
= ctl
& GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK
;
3683 user_wrcomp_mask
= ctl
& GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK
;
3688 for_each_sg(sgt
->sgl
, sg
, sgt
->nents
, count
) {
3689 len
= sg_dma_len(sg
);
3690 dma_addr
= sg_dma_address(sg
);
3695 while ((count
+ 1) < sgt
->nents
) {
3696 sg_next_iter
= sg_next(sg
);
3697 len_next
= sg_dma_len(sg_next_iter
);
3698 dma_addr_next
= sg_dma_address(sg_next_iter
);
3703 if ((dma_addr
+ len
== dma_addr_next
) &&
3704 (len
+ len_next
<= DMA_MAX_TRANSFER_SIZE
)) {
3713 ctl
= le32_to_cpu(user_dma_pkt
->ctl
);
3714 if (likely(dma_desc_cnt
))
3715 ctl
&= ~GOYA_PKT_CTL_EB_MASK
;
3716 ctl
&= ~(GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK
|
3717 GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK
);
3718 new_dma_pkt
->ctl
= cpu_to_le32(ctl
);
3719 new_dma_pkt
->tsize
= cpu_to_le32((u32
) len
);
3721 if (dir
== DMA_TO_DEVICE
) {
3722 new_dma_pkt
->src_addr
= cpu_to_le64(dma_addr
);
3723 new_dma_pkt
->dst_addr
= cpu_to_le64(device_memory_addr
);
3725 new_dma_pkt
->src_addr
= cpu_to_le64(device_memory_addr
);
3726 new_dma_pkt
->dst_addr
= cpu_to_le64(dma_addr
);
3730 device_memory_addr
+= len
;
3735 if (!dma_desc_cnt
) {
3737 "Error of 0 SG entries when patching DMA packet\n");
3741 /* Fix the last dma packet - rdcomp/wrcomp must be as user set them */
3743 new_dma_pkt
->ctl
|= cpu_to_le32(user_rdcomp_mask
| user_wrcomp_mask
);
3745 *new_dma_pkt_size
= dma_desc_cnt
* sizeof(struct packet_lin_dma
);
3750 static int goya_patch_cb(struct hl_device
*hdev
,
3751 struct hl_cs_parser
*parser
)
3753 u32 cb_parsed_length
= 0;
3754 u32 cb_patched_cur_length
= 0;
3757 /* cb_user_size is more than 0 so loop will always be executed */
3758 while (cb_parsed_length
< parser
->user_cb_size
) {
3759 enum packet_id pkt_id
;
3761 u32 new_pkt_size
= 0;
3762 struct goya_packet
*user_pkt
, *kernel_pkt
;
3764 user_pkt
= parser
->user_cb
->kernel_address
+ cb_parsed_length
;
3765 kernel_pkt
= parser
->patched_cb
->kernel_address
+
3766 cb_patched_cur_length
;
3768 pkt_id
= (enum packet_id
) (
3769 (le64_to_cpu(user_pkt
->header
) &
3770 PACKET_HEADER_PACKET_ID_MASK
) >>
3771 PACKET_HEADER_PACKET_ID_SHIFT
);
3773 if (!validate_packet_id(pkt_id
)) {
3774 dev_err(hdev
->dev
, "Invalid packet id %u\n", pkt_id
);
3779 pkt_size
= goya_packet_sizes
[pkt_id
];
3780 cb_parsed_length
+= pkt_size
;
3781 if (cb_parsed_length
> parser
->user_cb_size
) {
3783 "packet 0x%x is out of CB boundary\n", pkt_id
);
3789 case PACKET_LIN_DMA
:
3790 rc
= goya_patch_dma_packet(hdev
, parser
,
3791 (struct packet_lin_dma
*) user_pkt
,
3792 (struct packet_lin_dma
*) kernel_pkt
,
3794 cb_patched_cur_length
+= new_pkt_size
;
3797 case PACKET_WREG_32
:
3798 memcpy(kernel_pkt
, user_pkt
, pkt_size
);
3799 cb_patched_cur_length
+= pkt_size
;
3800 rc
= goya_validate_wreg32(hdev
, parser
,
3801 (struct packet_wreg32
*) kernel_pkt
);
3804 case PACKET_WREG_BULK
:
3806 "User not allowed to use WREG_BULK\n");
3810 case PACKET_MSG_PROT
:
3812 "User not allowed to use MSG_PROT\n");
3817 dev_err(hdev
->dev
, "User not allowed to use CP_DMA\n");
3822 dev_err(hdev
->dev
, "User not allowed to use STOP\n");
3826 case PACKET_MSG_LONG
:
3827 case PACKET_MSG_SHORT
:
3830 memcpy(kernel_pkt
, user_pkt
, pkt_size
);
3831 cb_patched_cur_length
+= pkt_size
;
3835 dev_err(hdev
->dev
, "Invalid packet header 0x%x\n",
3848 static int goya_parse_cb_mmu(struct hl_device
*hdev
,
3849 struct hl_cs_parser
*parser
)
3851 u64 patched_cb_handle
;
3852 u32 patched_cb_size
;
3853 struct hl_cb
*user_cb
;
3857 * The new CB should have space at the end for two MSG_PROT pkt:
3858 * 1. A packet that will act as a completion packet
3859 * 2. A packet that will generate MSI-X interrupt
3861 parser
->patched_cb_size
= parser
->user_cb_size
+
3862 sizeof(struct packet_msg_prot
) * 2;
3864 rc
= hl_cb_create(hdev
, &hdev
->kernel_cb_mgr
, hdev
->kernel_ctx
,
3865 parser
->patched_cb_size
, false, false,
3866 &patched_cb_handle
);
3870 "Failed to allocate patched CB for DMA CS %d\n",
3875 patched_cb_handle
>>= PAGE_SHIFT
;
3876 parser
->patched_cb
= hl_cb_get(hdev
, &hdev
->kernel_cb_mgr
,
3877 (u32
) patched_cb_handle
);
3878 /* hl_cb_get should never fail here so use kernel WARN */
3879 WARN(!parser
->patched_cb
, "DMA CB handle invalid 0x%x\n",
3880 (u32
) patched_cb_handle
);
3881 if (!parser
->patched_cb
) {
3887 * The check that parser->user_cb_size <= parser->user_cb->size was done
3888 * in validate_queue_index().
3890 memcpy(parser
->patched_cb
->kernel_address
,
3891 parser
->user_cb
->kernel_address
,
3892 parser
->user_cb_size
);
3894 patched_cb_size
= parser
->patched_cb_size
;
3896 /* validate patched CB instead of user CB */
3897 user_cb
= parser
->user_cb
;
3898 parser
->user_cb
= parser
->patched_cb
;
3899 rc
= goya_validate_cb(hdev
, parser
, true);
3900 parser
->user_cb
= user_cb
;
3903 hl_cb_put(parser
->patched_cb
);
3907 if (patched_cb_size
!= parser
->patched_cb_size
) {
3908 dev_err(hdev
->dev
, "user CB size mismatch\n");
3909 hl_cb_put(parser
->patched_cb
);
3916 * Always call cb destroy here because we still have 1 reference
3917 * to it by calling cb_get earlier. After the job will be completed,
3918 * cb_put will release it, but here we want to remove it from the
3921 hl_cb_destroy(hdev
, &hdev
->kernel_cb_mgr
,
3922 patched_cb_handle
<< PAGE_SHIFT
);
3927 static int goya_parse_cb_no_mmu(struct hl_device
*hdev
,
3928 struct hl_cs_parser
*parser
)
3930 u64 patched_cb_handle
;
3933 rc
= goya_validate_cb(hdev
, parser
, false);
3938 rc
= hl_cb_create(hdev
, &hdev
->kernel_cb_mgr
, hdev
->kernel_ctx
,
3939 parser
->patched_cb_size
, false, false,
3940 &patched_cb_handle
);
3943 "Failed to allocate patched CB for DMA CS %d\n", rc
);
3947 patched_cb_handle
>>= PAGE_SHIFT
;
3948 parser
->patched_cb
= hl_cb_get(hdev
, &hdev
->kernel_cb_mgr
,
3949 (u32
) patched_cb_handle
);
3950 /* hl_cb_get should never fail here so use kernel WARN */
3951 WARN(!parser
->patched_cb
, "DMA CB handle invalid 0x%x\n",
3952 (u32
) patched_cb_handle
);
3953 if (!parser
->patched_cb
) {
3958 rc
= goya_patch_cb(hdev
, parser
);
3961 hl_cb_put(parser
->patched_cb
);
3965 * Always call cb destroy here because we still have 1 reference
3966 * to it by calling cb_get earlier. After the job will be completed,
3967 * cb_put will release it, but here we want to remove it from the
3970 hl_cb_destroy(hdev
, &hdev
->kernel_cb_mgr
,
3971 patched_cb_handle
<< PAGE_SHIFT
);
3975 hl_userptr_delete_list(hdev
, parser
->job_userptr_list
);
3979 static int goya_parse_cb_no_ext_queue(struct hl_device
*hdev
,
3980 struct hl_cs_parser
*parser
)
3982 struct asic_fixed_properties
*asic_prop
= &hdev
->asic_prop
;
3983 struct goya_device
*goya
= hdev
->asic_specific
;
3985 if (goya
->hw_cap_initialized
& HW_CAP_MMU
)
3988 /* For internal queue jobs, just check if CB address is valid */
3989 if (hl_mem_area_inside_range(
3990 (u64
) (uintptr_t) parser
->user_cb
,
3991 parser
->user_cb_size
,
3992 asic_prop
->sram_user_base_address
,
3993 asic_prop
->sram_end_address
))
3996 if (hl_mem_area_inside_range(
3997 (u64
) (uintptr_t) parser
->user_cb
,
3998 parser
->user_cb_size
,
3999 asic_prop
->dram_user_base_address
,
4000 asic_prop
->dram_end_address
))
4004 "Internal CB address 0x%px + 0x%x is not in SRAM nor in DRAM\n",
4005 parser
->user_cb
, parser
->user_cb_size
);
4010 int goya_cs_parser(struct hl_device
*hdev
, struct hl_cs_parser
*parser
)
4012 struct goya_device
*goya
= hdev
->asic_specific
;
4014 if (parser
->queue_type
== QUEUE_TYPE_INT
)
4015 return goya_parse_cb_no_ext_queue(hdev
, parser
);
4017 if (goya
->hw_cap_initialized
& HW_CAP_MMU
)
4018 return goya_parse_cb_mmu(hdev
, parser
);
4020 return goya_parse_cb_no_mmu(hdev
, parser
);
4023 void goya_add_end_of_cb_packets(struct hl_device
*hdev
, void *kernel_address
,
4024 u32 len
, u64 cq_addr
, u32 cq_val
, u32 msix_vec
,
4027 struct packet_msg_prot
*cq_pkt
;
4030 cq_pkt
= kernel_address
+ len
- (sizeof(struct packet_msg_prot
) * 2);
4032 tmp
= (PACKET_MSG_PROT
<< GOYA_PKT_CTL_OPCODE_SHIFT
) |
4033 (1 << GOYA_PKT_CTL_EB_SHIFT
) |
4034 (1 << GOYA_PKT_CTL_MB_SHIFT
);
4035 cq_pkt
->ctl
= cpu_to_le32(tmp
);
4036 cq_pkt
->value
= cpu_to_le32(cq_val
);
4037 cq_pkt
->addr
= cpu_to_le64(cq_addr
);
4041 tmp
= (PACKET_MSG_PROT
<< GOYA_PKT_CTL_OPCODE_SHIFT
) |
4042 (1 << GOYA_PKT_CTL_MB_SHIFT
);
4043 cq_pkt
->ctl
= cpu_to_le32(tmp
);
4044 cq_pkt
->value
= cpu_to_le32(msix_vec
& 0x7FF);
4045 cq_pkt
->addr
= cpu_to_le64(CFG_BASE
+ mmPCIE_DBI_MSIX_DOORBELL_OFF
);
4048 void goya_update_eq_ci(struct hl_device
*hdev
, u32 val
)
4050 WREG32(mmCPU_EQ_CI
, val
);
4053 void goya_restore_phase_topology(struct hl_device
*hdev
)
4058 static void goya_clear_sm_regs(struct hl_device
*hdev
)
4060 int i
, num_of_sob_in_longs
, num_of_mon_in_longs
;
4062 num_of_sob_in_longs
=
4063 ((mmSYNC_MNGR_SOB_OBJ_1023
- mmSYNC_MNGR_SOB_OBJ_0
) + 4);
4065 num_of_mon_in_longs
=
4066 ((mmSYNC_MNGR_MON_STATUS_255
- mmSYNC_MNGR_MON_STATUS_0
) + 4);
4068 for (i
= 0 ; i
< num_of_sob_in_longs
; i
+= 4)
4069 WREG32(mmSYNC_MNGR_SOB_OBJ_0
+ i
, 0);
4071 for (i
= 0 ; i
< num_of_mon_in_longs
; i
+= 4)
4072 WREG32(mmSYNC_MNGR_MON_STATUS_0
+ i
, 0);
4074 /* Flush all WREG to prevent race */
4075 i
= RREG32(mmSYNC_MNGR_SOB_OBJ_0
);
4079 * goya_debugfs_read32 - read a 32bit value from a given device or a host mapped
4082 * @hdev: pointer to hl_device structure
4083 * @addr: device or host mapped address
4084 * @val: returned value
4086 * In case of DDR address that is not mapped into the default aperture that
4087 * the DDR bar exposes, the function will configure the iATU so that the DDR
4088 * bar will be positioned at a base address that allows reading from the
4089 * required address. Configuring the iATU during normal operation can
4090 * lead to undefined behavior and therefore, should be done with extreme care
4093 static int goya_debugfs_read32(struct hl_device
*hdev
, u64 addr
, u32
*val
)
4095 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
4099 if ((addr
>= CFG_BASE
) && (addr
< CFG_BASE
+ CFG_SIZE
)) {
4100 *val
= RREG32(addr
- CFG_BASE
);
4102 } else if ((addr
>= SRAM_BASE_ADDR
) &&
4103 (addr
< SRAM_BASE_ADDR
+ SRAM_SIZE
)) {
4105 *val
= readl(hdev
->pcie_bar
[SRAM_CFG_BAR_ID
] +
4106 (addr
- SRAM_BASE_ADDR
));
4108 } else if (addr
< DRAM_PHYS_BASE
+ hdev
->asic_prop
.dram_size
) {
4110 u64 bar_base_addr
= DRAM_PHYS_BASE
+
4111 (addr
& ~(prop
->dram_pci_bar_size
- 0x1ull
));
4113 ddr_bar_addr
= goya_set_ddr_bar_base(hdev
, bar_base_addr
);
4114 if (ddr_bar_addr
!= U64_MAX
) {
4115 *val
= readl(hdev
->pcie_bar
[DDR_BAR_ID
] +
4116 (addr
- bar_base_addr
));
4118 ddr_bar_addr
= goya_set_ddr_bar_base(hdev
,
4121 if (ddr_bar_addr
== U64_MAX
)
4124 } else if (addr
>= HOST_PHYS_BASE
&& !iommu_present(&pci_bus_type
)) {
4125 *val
= *(u32
*) phys_to_virt(addr
- HOST_PHYS_BASE
);
4135 * goya_debugfs_write32 - write a 32bit value to a given device or a host mapped
4138 * @hdev: pointer to hl_device structure
4139 * @addr: device or host mapped address
4140 * @val: returned value
4142 * In case of DDR address that is not mapped into the default aperture that
4143 * the DDR bar exposes, the function will configure the iATU so that the DDR
4144 * bar will be positioned at a base address that allows writing to the
4145 * required address. Configuring the iATU during normal operation can
4146 * lead to undefined behavior and therefore, should be done with extreme care
4149 static int goya_debugfs_write32(struct hl_device
*hdev
, u64 addr
, u32 val
)
4151 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
4155 if ((addr
>= CFG_BASE
) && (addr
< CFG_BASE
+ CFG_SIZE
)) {
4156 WREG32(addr
- CFG_BASE
, val
);
4158 } else if ((addr
>= SRAM_BASE_ADDR
) &&
4159 (addr
< SRAM_BASE_ADDR
+ SRAM_SIZE
)) {
4161 writel(val
, hdev
->pcie_bar
[SRAM_CFG_BAR_ID
] +
4162 (addr
- SRAM_BASE_ADDR
));
4164 } else if (addr
< DRAM_PHYS_BASE
+ hdev
->asic_prop
.dram_size
) {
4166 u64 bar_base_addr
= DRAM_PHYS_BASE
+
4167 (addr
& ~(prop
->dram_pci_bar_size
- 0x1ull
));
4169 ddr_bar_addr
= goya_set_ddr_bar_base(hdev
, bar_base_addr
);
4170 if (ddr_bar_addr
!= U64_MAX
) {
4171 writel(val
, hdev
->pcie_bar
[DDR_BAR_ID
] +
4172 (addr
- bar_base_addr
));
4174 ddr_bar_addr
= goya_set_ddr_bar_base(hdev
,
4177 if (ddr_bar_addr
== U64_MAX
)
4180 } else if (addr
>= HOST_PHYS_BASE
&& !iommu_present(&pci_bus_type
)) {
4181 *(u32
*) phys_to_virt(addr
- HOST_PHYS_BASE
) = val
;
4190 static int goya_debugfs_read64(struct hl_device
*hdev
, u64 addr
, u64
*val
)
4192 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
4196 if ((addr
>= CFG_BASE
) && (addr
<= CFG_BASE
+ CFG_SIZE
- sizeof(u64
))) {
4197 u32 val_l
= RREG32(addr
- CFG_BASE
);
4198 u32 val_h
= RREG32(addr
+ sizeof(u32
) - CFG_BASE
);
4200 *val
= (((u64
) val_h
) << 32) | val_l
;
4202 } else if ((addr
>= SRAM_BASE_ADDR
) &&
4203 (addr
<= SRAM_BASE_ADDR
+ SRAM_SIZE
- sizeof(u64
))) {
4205 *val
= readq(hdev
->pcie_bar
[SRAM_CFG_BAR_ID
] +
4206 (addr
- SRAM_BASE_ADDR
));
4209 DRAM_PHYS_BASE
+ hdev
->asic_prop
.dram_size
- sizeof(u64
)) {
4211 u64 bar_base_addr
= DRAM_PHYS_BASE
+
4212 (addr
& ~(prop
->dram_pci_bar_size
- 0x1ull
));
4214 ddr_bar_addr
= goya_set_ddr_bar_base(hdev
, bar_base_addr
);
4215 if (ddr_bar_addr
!= U64_MAX
) {
4216 *val
= readq(hdev
->pcie_bar
[DDR_BAR_ID
] +
4217 (addr
- bar_base_addr
));
4219 ddr_bar_addr
= goya_set_ddr_bar_base(hdev
,
4222 if (ddr_bar_addr
== U64_MAX
)
4225 } else if (addr
>= HOST_PHYS_BASE
&& !iommu_present(&pci_bus_type
)) {
4226 *val
= *(u64
*) phys_to_virt(addr
- HOST_PHYS_BASE
);
4235 static int goya_debugfs_write64(struct hl_device
*hdev
, u64 addr
, u64 val
)
4237 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
4241 if ((addr
>= CFG_BASE
) && (addr
<= CFG_BASE
+ CFG_SIZE
- sizeof(u64
))) {
4242 WREG32(addr
- CFG_BASE
, lower_32_bits(val
));
4243 WREG32(addr
+ sizeof(u32
) - CFG_BASE
, upper_32_bits(val
));
4245 } else if ((addr
>= SRAM_BASE_ADDR
) &&
4246 (addr
<= SRAM_BASE_ADDR
+ SRAM_SIZE
- sizeof(u64
))) {
4248 writeq(val
, hdev
->pcie_bar
[SRAM_CFG_BAR_ID
] +
4249 (addr
- SRAM_BASE_ADDR
));
4252 DRAM_PHYS_BASE
+ hdev
->asic_prop
.dram_size
- sizeof(u64
)) {
4254 u64 bar_base_addr
= DRAM_PHYS_BASE
+
4255 (addr
& ~(prop
->dram_pci_bar_size
- 0x1ull
));
4257 ddr_bar_addr
= goya_set_ddr_bar_base(hdev
, bar_base_addr
);
4258 if (ddr_bar_addr
!= U64_MAX
) {
4259 writeq(val
, hdev
->pcie_bar
[DDR_BAR_ID
] +
4260 (addr
- bar_base_addr
));
4262 ddr_bar_addr
= goya_set_ddr_bar_base(hdev
,
4265 if (ddr_bar_addr
== U64_MAX
)
4268 } else if (addr
>= HOST_PHYS_BASE
&& !iommu_present(&pci_bus_type
)) {
4269 *(u64
*) phys_to_virt(addr
- HOST_PHYS_BASE
) = val
;
4278 static u64
goya_read_pte(struct hl_device
*hdev
, u64 addr
)
4280 struct goya_device
*goya
= hdev
->asic_specific
;
4282 if (hdev
->hard_reset_pending
)
4285 return readq(hdev
->pcie_bar
[DDR_BAR_ID
] +
4286 (addr
- goya
->ddr_bar_cur_addr
));
4289 static void goya_write_pte(struct hl_device
*hdev
, u64 addr
, u64 val
)
4291 struct goya_device
*goya
= hdev
->asic_specific
;
4293 if (hdev
->hard_reset_pending
)
4296 writeq(val
, hdev
->pcie_bar
[DDR_BAR_ID
] +
4297 (addr
- goya
->ddr_bar_cur_addr
));
4300 static const char *_goya_get_event_desc(u16 event_type
)
4302 switch (event_type
) {
4303 case GOYA_ASYNC_EVENT_ID_PCIE_IF
:
4305 case GOYA_ASYNC_EVENT_ID_TPC0_ECC
:
4306 case GOYA_ASYNC_EVENT_ID_TPC1_ECC
:
4307 case GOYA_ASYNC_EVENT_ID_TPC2_ECC
:
4308 case GOYA_ASYNC_EVENT_ID_TPC3_ECC
:
4309 case GOYA_ASYNC_EVENT_ID_TPC4_ECC
:
4310 case GOYA_ASYNC_EVENT_ID_TPC5_ECC
:
4311 case GOYA_ASYNC_EVENT_ID_TPC6_ECC
:
4312 case GOYA_ASYNC_EVENT_ID_TPC7_ECC
:
4314 case GOYA_ASYNC_EVENT_ID_MME_ECC
:
4316 case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT
:
4317 return "MME_ecc_ext";
4318 case GOYA_ASYNC_EVENT_ID_MMU_ECC
:
4320 case GOYA_ASYNC_EVENT_ID_DMA_MACRO
:
4322 case GOYA_ASYNC_EVENT_ID_DMA_ECC
:
4324 case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC
:
4325 return "CPU_if_ecc";
4326 case GOYA_ASYNC_EVENT_ID_PSOC_MEM
:
4328 case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT
:
4329 return "PSOC_coresight";
4330 case GOYA_ASYNC_EVENT_ID_SRAM0
... GOYA_ASYNC_EVENT_ID_SRAM29
:
4332 case GOYA_ASYNC_EVENT_ID_GIC500
:
4334 case GOYA_ASYNC_EVENT_ID_PLL0
... GOYA_ASYNC_EVENT_ID_PLL6
:
4336 case GOYA_ASYNC_EVENT_ID_AXI_ECC
:
4338 case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC
:
4339 return "L2_ram_ecc";
4340 case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET
:
4341 return "PSOC_gpio_05_sw_reset";
4342 case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT
:
4343 return "PSOC_gpio_10_vrhot_icrit";
4344 case GOYA_ASYNC_EVENT_ID_PCIE_DEC
:
4346 case GOYA_ASYNC_EVENT_ID_TPC0_DEC
:
4347 case GOYA_ASYNC_EVENT_ID_TPC1_DEC
:
4348 case GOYA_ASYNC_EVENT_ID_TPC2_DEC
:
4349 case GOYA_ASYNC_EVENT_ID_TPC3_DEC
:
4350 case GOYA_ASYNC_EVENT_ID_TPC4_DEC
:
4351 case GOYA_ASYNC_EVENT_ID_TPC5_DEC
:
4352 case GOYA_ASYNC_EVENT_ID_TPC6_DEC
:
4353 case GOYA_ASYNC_EVENT_ID_TPC7_DEC
:
4355 case GOYA_ASYNC_EVENT_ID_MME_WACS
:
4357 case GOYA_ASYNC_EVENT_ID_MME_WACSD
:
4359 case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER
:
4360 return "CPU_axi_splitter";
4361 case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC
:
4362 return "PSOC_axi_dec";
4363 case GOYA_ASYNC_EVENT_ID_PSOC
:
4365 case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR
:
4366 case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR
:
4367 case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR
:
4368 case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR
:
4369 case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR
:
4370 case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR
:
4371 case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR
:
4372 case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR
:
4373 return "TPC%d_krn_err";
4374 case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ
... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ
:
4376 case GOYA_ASYNC_EVENT_ID_TPC0_QM
... GOYA_ASYNC_EVENT_ID_TPC7_QM
:
4378 case GOYA_ASYNC_EVENT_ID_MME_QM
:
4380 case GOYA_ASYNC_EVENT_ID_MME_CMDQ
:
4382 case GOYA_ASYNC_EVENT_ID_DMA0_QM
... GOYA_ASYNC_EVENT_ID_DMA4_QM
:
4384 case GOYA_ASYNC_EVENT_ID_DMA0_CH
... GOYA_ASYNC_EVENT_ID_DMA4_CH
:
4386 case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU
:
4387 case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU
:
4388 case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU
:
4389 case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU
:
4390 case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU
:
4391 case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU
:
4392 case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU
:
4393 case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU
:
4394 return "TPC%d_bmon_spmu";
4395 case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0
... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4
:
4396 return "DMA_bm_ch%d";
4397 case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S
:
4398 return "POWER_ENV_S";
4399 case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E
:
4400 return "POWER_ENV_E";
4401 case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S
:
4402 return "THERMAL_ENV_S";
4403 case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
:
4404 return "THERMAL_ENV_E";
4410 static void goya_get_event_desc(u16 event_type
, char *desc
, size_t size
)
4414 switch (event_type
) {
4415 case GOYA_ASYNC_EVENT_ID_TPC0_ECC
:
4416 case GOYA_ASYNC_EVENT_ID_TPC1_ECC
:
4417 case GOYA_ASYNC_EVENT_ID_TPC2_ECC
:
4418 case GOYA_ASYNC_EVENT_ID_TPC3_ECC
:
4419 case GOYA_ASYNC_EVENT_ID_TPC4_ECC
:
4420 case GOYA_ASYNC_EVENT_ID_TPC5_ECC
:
4421 case GOYA_ASYNC_EVENT_ID_TPC6_ECC
:
4422 case GOYA_ASYNC_EVENT_ID_TPC7_ECC
:
4423 index
= (event_type
- GOYA_ASYNC_EVENT_ID_TPC0_ECC
) / 3;
4424 snprintf(desc
, size
, _goya_get_event_desc(event_type
), index
);
4426 case GOYA_ASYNC_EVENT_ID_SRAM0
... GOYA_ASYNC_EVENT_ID_SRAM29
:
4427 index
= event_type
- GOYA_ASYNC_EVENT_ID_SRAM0
;
4428 snprintf(desc
, size
, _goya_get_event_desc(event_type
), index
);
4430 case GOYA_ASYNC_EVENT_ID_PLL0
... GOYA_ASYNC_EVENT_ID_PLL6
:
4431 index
= event_type
- GOYA_ASYNC_EVENT_ID_PLL0
;
4432 snprintf(desc
, size
, _goya_get_event_desc(event_type
), index
);
4434 case GOYA_ASYNC_EVENT_ID_TPC0_DEC
:
4435 case GOYA_ASYNC_EVENT_ID_TPC1_DEC
:
4436 case GOYA_ASYNC_EVENT_ID_TPC2_DEC
:
4437 case GOYA_ASYNC_EVENT_ID_TPC3_DEC
:
4438 case GOYA_ASYNC_EVENT_ID_TPC4_DEC
:
4439 case GOYA_ASYNC_EVENT_ID_TPC5_DEC
:
4440 case GOYA_ASYNC_EVENT_ID_TPC6_DEC
:
4441 case GOYA_ASYNC_EVENT_ID_TPC7_DEC
:
4442 index
= (event_type
- GOYA_ASYNC_EVENT_ID_TPC0_DEC
) / 3;
4443 snprintf(desc
, size
, _goya_get_event_desc(event_type
), index
);
4445 case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR
:
4446 case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR
:
4447 case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR
:
4448 case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR
:
4449 case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR
:
4450 case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR
:
4451 case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR
:
4452 case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR
:
4453 index
= (event_type
- GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR
) / 10;
4454 snprintf(desc
, size
, _goya_get_event_desc(event_type
), index
);
4456 case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ
... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ
:
4457 index
= event_type
- GOYA_ASYNC_EVENT_ID_TPC0_CMDQ
;
4458 snprintf(desc
, size
, _goya_get_event_desc(event_type
), index
);
4460 case GOYA_ASYNC_EVENT_ID_TPC0_QM
... GOYA_ASYNC_EVENT_ID_TPC7_QM
:
4461 index
= event_type
- GOYA_ASYNC_EVENT_ID_TPC0_QM
;
4462 snprintf(desc
, size
, _goya_get_event_desc(event_type
), index
);
4464 case GOYA_ASYNC_EVENT_ID_DMA0_QM
... GOYA_ASYNC_EVENT_ID_DMA4_QM
:
4465 index
= event_type
- GOYA_ASYNC_EVENT_ID_DMA0_QM
;
4466 snprintf(desc
, size
, _goya_get_event_desc(event_type
), index
);
4468 case GOYA_ASYNC_EVENT_ID_DMA0_CH
... GOYA_ASYNC_EVENT_ID_DMA4_CH
:
4469 index
= event_type
- GOYA_ASYNC_EVENT_ID_DMA0_CH
;
4470 snprintf(desc
, size
, _goya_get_event_desc(event_type
), index
);
4472 case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU
:
4473 case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU
:
4474 case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU
:
4475 case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU
:
4476 case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU
:
4477 case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU
:
4478 case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU
:
4479 case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU
:
4480 index
= (event_type
- GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU
) / 10;
4481 snprintf(desc
, size
, _goya_get_event_desc(event_type
), index
);
4483 case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0
... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4
:
4484 index
= event_type
- GOYA_ASYNC_EVENT_ID_DMA_BM_CH0
;
4485 snprintf(desc
, size
, _goya_get_event_desc(event_type
), index
);
4488 snprintf(desc
, size
, _goya_get_event_desc(event_type
));
4493 static void goya_print_razwi_info(struct hl_device
*hdev
)
4495 if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD
)) {
4496 dev_err_ratelimited(hdev
->dev
, "Illegal write to LBW\n");
4497 WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD
, 0);
4500 if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD
)) {
4501 dev_err_ratelimited(hdev
->dev
, "Illegal read from LBW\n");
4502 WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD
, 0);
4505 if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD
)) {
4506 dev_err_ratelimited(hdev
->dev
, "Illegal write to HBW\n");
4507 WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD
, 0);
4510 if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD
)) {
4511 dev_err_ratelimited(hdev
->dev
, "Illegal read from HBW\n");
4512 WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD
, 0);
4516 static void goya_print_mmu_error_info(struct hl_device
*hdev
)
4518 struct goya_device
*goya
= hdev
->asic_specific
;
4522 if (!(goya
->hw_cap_initialized
& HW_CAP_MMU
))
4525 val
= RREG32(mmMMU_PAGE_ERROR_CAPTURE
);
4526 if (val
& MMU_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK
) {
4527 addr
= val
& MMU_PAGE_ERROR_CAPTURE_VA_49_32_MASK
;
4529 addr
|= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA
);
4531 dev_err_ratelimited(hdev
->dev
, "MMU page fault on va 0x%llx\n",
4534 WREG32(mmMMU_PAGE_ERROR_CAPTURE
, 0);
4538 static void goya_print_irq_info(struct hl_device
*hdev
, u16 event_type
,
4543 goya_get_event_desc(event_type
, desc
, sizeof(desc
));
4544 dev_err_ratelimited(hdev
->dev
, "Received H/W interrupt %d [\"%s\"]\n",
4548 goya_print_razwi_info(hdev
);
4549 goya_print_mmu_error_info(hdev
);
4553 static int goya_unmask_irq_arr(struct hl_device
*hdev
, u32
*irq_arr
,
4554 size_t irq_arr_size
)
4556 struct cpucp_unmask_irq_arr_packet
*pkt
;
4557 size_t total_pkt_size
;
4560 int irq_num_entries
, irq_arr_index
;
4561 __le32
*goya_irq_arr
;
4563 total_pkt_size
= sizeof(struct cpucp_unmask_irq_arr_packet
) +
4566 /* data should be aligned to 8 bytes in order to CPU-CP to copy it */
4567 total_pkt_size
= (total_pkt_size
+ 0x7) & ~0x7;
4569 /* total_pkt_size is casted to u16 later on */
4570 if (total_pkt_size
> USHRT_MAX
) {
4571 dev_err(hdev
->dev
, "too many elements in IRQ array\n");
4575 pkt
= kzalloc(total_pkt_size
, GFP_KERNEL
);
4579 irq_num_entries
= irq_arr_size
/ sizeof(irq_arr
[0]);
4580 pkt
->length
= cpu_to_le32(irq_num_entries
);
4582 /* We must perform any necessary endianness conversation on the irq
4583 * array being passed to the goya hardware
4585 for (irq_arr_index
= 0, goya_irq_arr
= (__le32
*) &pkt
->irqs
;
4586 irq_arr_index
< irq_num_entries
; irq_arr_index
++)
4587 goya_irq_arr
[irq_arr_index
] =
4588 cpu_to_le32(irq_arr
[irq_arr_index
]);
4590 pkt
->cpucp_pkt
.ctl
= cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY
<<
4591 CPUCP_PKT_CTL_OPCODE_SHIFT
);
4593 rc
= hdev
->asic_funcs
->send_cpu_message(hdev
, (u32
*) pkt
,
4594 total_pkt_size
, 0, &result
);
4597 dev_err(hdev
->dev
, "failed to unmask IRQ array\n");
4604 static int goya_soft_reset_late_init(struct hl_device
*hdev
)
4607 * Unmask all IRQs since some could have been received
4608 * during the soft reset
4610 return goya_unmask_irq_arr(hdev
, goya_all_events
,
4611 sizeof(goya_all_events
));
4614 static int goya_unmask_irq(struct hl_device
*hdev
, u16 event_type
)
4616 struct cpucp_packet pkt
;
4620 memset(&pkt
, 0, sizeof(pkt
));
4622 pkt
.ctl
= cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ
<<
4623 CPUCP_PKT_CTL_OPCODE_SHIFT
);
4624 pkt
.value
= cpu_to_le64(event_type
);
4626 rc
= hdev
->asic_funcs
->send_cpu_message(hdev
, (u32
*) &pkt
, sizeof(pkt
),
4630 dev_err(hdev
->dev
, "failed to unmask RAZWI IRQ %d", event_type
);
4635 static void goya_print_clk_change_info(struct hl_device
*hdev
, u16 event_type
)
4637 switch (event_type
) {
4638 case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S
:
4639 hdev
->clk_throttling_reason
|= HL_CLK_THROTTLE_POWER
;
4640 dev_info_ratelimited(hdev
->dev
,
4641 "Clock throttling due to power consumption\n");
4643 case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E
:
4644 hdev
->clk_throttling_reason
&= ~HL_CLK_THROTTLE_POWER
;
4645 dev_info_ratelimited(hdev
->dev
,
4646 "Power envelop is safe, back to optimal clock\n");
4648 case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S
:
4649 hdev
->clk_throttling_reason
|= HL_CLK_THROTTLE_THERMAL
;
4650 dev_info_ratelimited(hdev
->dev
,
4651 "Clock throttling due to overheating\n");
4653 case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
:
4654 hdev
->clk_throttling_reason
&= ~HL_CLK_THROTTLE_THERMAL
;
4655 dev_info_ratelimited(hdev
->dev
,
4656 "Thermal envelop is safe, back to optimal clock\n");
4660 dev_err(hdev
->dev
, "Received invalid clock change event %d\n",
4666 void goya_handle_eqe(struct hl_device
*hdev
, struct hl_eq_entry
*eq_entry
)
4668 u32 ctl
= le32_to_cpu(eq_entry
->hdr
.ctl
);
4669 u16 event_type
= ((ctl
& EQ_CTL_EVENT_TYPE_MASK
)
4670 >> EQ_CTL_EVENT_TYPE_SHIFT
);
4671 struct goya_device
*goya
= hdev
->asic_specific
;
4673 goya
->events_stat
[event_type
]++;
4674 goya
->events_stat_aggregate
[event_type
]++;
4676 switch (event_type
) {
4677 case GOYA_ASYNC_EVENT_ID_PCIE_IF
:
4678 case GOYA_ASYNC_EVENT_ID_TPC0_ECC
:
4679 case GOYA_ASYNC_EVENT_ID_TPC1_ECC
:
4680 case GOYA_ASYNC_EVENT_ID_TPC2_ECC
:
4681 case GOYA_ASYNC_EVENT_ID_TPC3_ECC
:
4682 case GOYA_ASYNC_EVENT_ID_TPC4_ECC
:
4683 case GOYA_ASYNC_EVENT_ID_TPC5_ECC
:
4684 case GOYA_ASYNC_EVENT_ID_TPC6_ECC
:
4685 case GOYA_ASYNC_EVENT_ID_TPC7_ECC
:
4686 case GOYA_ASYNC_EVENT_ID_MME_ECC
:
4687 case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT
:
4688 case GOYA_ASYNC_EVENT_ID_MMU_ECC
:
4689 case GOYA_ASYNC_EVENT_ID_DMA_MACRO
:
4690 case GOYA_ASYNC_EVENT_ID_DMA_ECC
:
4691 case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC
:
4692 case GOYA_ASYNC_EVENT_ID_PSOC_MEM
:
4693 case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT
:
4694 case GOYA_ASYNC_EVENT_ID_SRAM0
... GOYA_ASYNC_EVENT_ID_SRAM29
:
4695 case GOYA_ASYNC_EVENT_ID_GIC500
:
4696 case GOYA_ASYNC_EVENT_ID_PLL0
... GOYA_ASYNC_EVENT_ID_PLL6
:
4697 case GOYA_ASYNC_EVENT_ID_AXI_ECC
:
4698 case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC
:
4699 case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET
:
4700 goya_print_irq_info(hdev
, event_type
, false);
4701 if (hdev
->hard_reset_on_fw_events
)
4702 hl_device_reset(hdev
, true, false);
4705 case GOYA_ASYNC_EVENT_ID_PCIE_DEC
:
4706 case GOYA_ASYNC_EVENT_ID_TPC0_DEC
:
4707 case GOYA_ASYNC_EVENT_ID_TPC1_DEC
:
4708 case GOYA_ASYNC_EVENT_ID_TPC2_DEC
:
4709 case GOYA_ASYNC_EVENT_ID_TPC3_DEC
:
4710 case GOYA_ASYNC_EVENT_ID_TPC4_DEC
:
4711 case GOYA_ASYNC_EVENT_ID_TPC5_DEC
:
4712 case GOYA_ASYNC_EVENT_ID_TPC6_DEC
:
4713 case GOYA_ASYNC_EVENT_ID_TPC7_DEC
:
4714 case GOYA_ASYNC_EVENT_ID_MME_WACS
:
4715 case GOYA_ASYNC_EVENT_ID_MME_WACSD
:
4716 case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER
:
4717 case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC
:
4718 case GOYA_ASYNC_EVENT_ID_PSOC
:
4719 case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR
:
4720 case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR
:
4721 case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR
:
4722 case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR
:
4723 case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR
:
4724 case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR
:
4725 case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR
:
4726 case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR
:
4727 case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ
... GOYA_ASYNC_EVENT_ID_TPC7_QM
:
4728 case GOYA_ASYNC_EVENT_ID_MME_QM
:
4729 case GOYA_ASYNC_EVENT_ID_MME_CMDQ
:
4730 case GOYA_ASYNC_EVENT_ID_DMA0_QM
... GOYA_ASYNC_EVENT_ID_DMA4_QM
:
4731 case GOYA_ASYNC_EVENT_ID_DMA0_CH
... GOYA_ASYNC_EVENT_ID_DMA4_CH
:
4732 goya_print_irq_info(hdev
, event_type
, true);
4733 goya_unmask_irq(hdev
, event_type
);
4736 case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT
:
4737 case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU
:
4738 case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU
:
4739 case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU
:
4740 case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU
:
4741 case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU
:
4742 case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU
:
4743 case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU
:
4744 case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU
:
4745 case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0
... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4
:
4746 goya_print_irq_info(hdev
, event_type
, false);
4747 goya_unmask_irq(hdev
, event_type
);
4750 case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S
:
4751 case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E
:
4752 case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S
:
4753 case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
:
4754 goya_print_clk_change_info(hdev
, event_type
);
4755 goya_unmask_irq(hdev
, event_type
);
4759 dev_err(hdev
->dev
, "Received invalid H/W interrupt %d\n",
4765 void *goya_get_events_stat(struct hl_device
*hdev
, bool aggregate
, u32
*size
)
4767 struct goya_device
*goya
= hdev
->asic_specific
;
4770 *size
= (u32
) sizeof(goya
->events_stat_aggregate
);
4771 return goya
->events_stat_aggregate
;
4774 *size
= (u32
) sizeof(goya
->events_stat
);
4775 return goya
->events_stat
;
4778 static int goya_memset_device_memory(struct hl_device
*hdev
, u64 addr
, u64 size
,
4779 u64 val
, bool is_dram
)
4781 struct packet_lin_dma
*lin_dma_pkt
;
4782 struct hl_cs_job
*job
;
4785 int rc
, lin_dma_pkts_cnt
;
4787 lin_dma_pkts_cnt
= DIV_ROUND_UP_ULL(size
, SZ_2G
);
4788 cb_size
= lin_dma_pkts_cnt
* sizeof(struct packet_lin_dma
) +
4789 sizeof(struct packet_msg_prot
);
4790 cb
= hl_cb_kernel_create(hdev
, cb_size
, false);
4794 lin_dma_pkt
= cb
->kernel_address
;
4797 memset(lin_dma_pkt
, 0, sizeof(*lin_dma_pkt
));
4799 ctl
= ((PACKET_LIN_DMA
<< GOYA_PKT_CTL_OPCODE_SHIFT
) |
4800 (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT
) |
4801 (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT
) |
4802 (1 << GOYA_PKT_CTL_RB_SHIFT
) |
4803 (1 << GOYA_PKT_CTL_MB_SHIFT
));
4804 ctl
|= (is_dram
? DMA_HOST_TO_DRAM
: DMA_HOST_TO_SRAM
) <<
4805 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT
;
4806 lin_dma_pkt
->ctl
= cpu_to_le32(ctl
);
4808 lin_dma_pkt
->src_addr
= cpu_to_le64(val
);
4809 lin_dma_pkt
->dst_addr
= cpu_to_le64(addr
);
4810 if (lin_dma_pkts_cnt
> 1)
4811 lin_dma_pkt
->tsize
= cpu_to_le32(SZ_2G
);
4813 lin_dma_pkt
->tsize
= cpu_to_le32(size
);
4818 } while (--lin_dma_pkts_cnt
);
4820 job
= hl_cs_allocate_job(hdev
, QUEUE_TYPE_EXT
, true);
4822 dev_err(hdev
->dev
, "Failed to allocate a new job\n");
4829 atomic_inc(&job
->user_cb
->cs_cnt
);
4830 job
->user_cb_size
= cb_size
;
4831 job
->hw_queue_id
= GOYA_QUEUE_ID_DMA_0
;
4832 job
->patched_cb
= job
->user_cb
;
4833 job
->job_cb_size
= job
->user_cb_size
;
4835 hl_debugfs_add_job(hdev
, job
);
4837 rc
= goya_send_job_on_qman0(hdev
, job
);
4839 hl_debugfs_remove_job(hdev
, job
);
4841 atomic_dec(&cb
->cs_cnt
);
4845 hl_cb_destroy(hdev
, &hdev
->kernel_cb_mgr
, cb
->id
<< PAGE_SHIFT
);
4850 int goya_context_switch(struct hl_device
*hdev
, u32 asid
)
4852 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
4853 u64 addr
= prop
->sram_base_address
, sob_addr
;
4854 u32 size
= hdev
->pldm
? 0x10000 : prop
->sram_size
;
4855 u64 val
= 0x7777777777777777ull
;
4857 u32 channel_off
= mmDMA_CH_1_WR_COMP_ADDR_LO
-
4858 mmDMA_CH_0_WR_COMP_ADDR_LO
;
4860 rc
= goya_memset_device_memory(hdev
, addr
, size
, val
, false);
4862 dev_err(hdev
->dev
, "Failed to clear SRAM in context switch\n");
4866 /* we need to reset registers that the user is allowed to change */
4867 sob_addr
= CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_1007
;
4868 WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO
, lower_32_bits(sob_addr
));
4870 for (dma_id
= 1 ; dma_id
< NUMBER_OF_EXT_HW_QUEUES
; dma_id
++) {
4871 sob_addr
= CFG_BASE
+ mmSYNC_MNGR_SOB_OBJ_1000
+
4873 WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO
+ channel_off
* dma_id
,
4874 lower_32_bits(sob_addr
));
4877 WREG32(mmTPC_PLL_CLK_RLX_0
, 0x200020);
4879 goya_mmu_prepare(hdev
, asid
);
4881 goya_clear_sm_regs(hdev
);
4886 static int goya_mmu_clear_pgt_range(struct hl_device
*hdev
)
4888 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
4889 struct goya_device
*goya
= hdev
->asic_specific
;
4890 u64 addr
= prop
->mmu_pgt_addr
;
4891 u32 size
= prop
->mmu_pgt_size
+ MMU_DRAM_DEFAULT_PAGE_SIZE
+
4894 if (!(goya
->hw_cap_initialized
& HW_CAP_MMU
))
4897 return goya_memset_device_memory(hdev
, addr
, size
, 0, true);
4900 static int goya_mmu_set_dram_default_page(struct hl_device
*hdev
)
4902 struct goya_device
*goya
= hdev
->asic_specific
;
4903 u64 addr
= hdev
->asic_prop
.mmu_dram_default_page_addr
;
4904 u32 size
= MMU_DRAM_DEFAULT_PAGE_SIZE
;
4905 u64 val
= 0x9999999999999999ull
;
4907 if (!(goya
->hw_cap_initialized
& HW_CAP_MMU
))
4910 return goya_memset_device_memory(hdev
, addr
, size
, val
, true);
4913 static int goya_mmu_add_mappings_for_device_cpu(struct hl_device
*hdev
)
4915 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
4916 struct goya_device
*goya
= hdev
->asic_specific
;
4920 if (!(goya
->hw_cap_initialized
& HW_CAP_MMU
))
4923 for (off
= 0 ; off
< CPU_FW_IMAGE_SIZE
; off
+= PAGE_SIZE_2MB
) {
4924 rc
= hl_mmu_map_page(hdev
->kernel_ctx
,
4925 prop
->dram_base_address
+ off
,
4926 prop
->dram_base_address
+ off
, PAGE_SIZE_2MB
,
4927 (off
+ PAGE_SIZE_2MB
) == CPU_FW_IMAGE_SIZE
);
4929 dev_err(hdev
->dev
, "Map failed for address 0x%llx\n",
4930 prop
->dram_base_address
+ off
);
4935 if (!(hdev
->cpu_accessible_dma_address
& (PAGE_SIZE_2MB
- 1))) {
4936 rc
= hl_mmu_map_page(hdev
->kernel_ctx
,
4937 VA_CPU_ACCESSIBLE_MEM_ADDR
,
4938 hdev
->cpu_accessible_dma_address
,
4939 PAGE_SIZE_2MB
, true);
4943 "Map failed for CPU accessible memory\n");
4944 off
-= PAGE_SIZE_2MB
;
4948 for (cpu_off
= 0 ; cpu_off
< SZ_2M
; cpu_off
+= PAGE_SIZE_4KB
) {
4949 rc
= hl_mmu_map_page(hdev
->kernel_ctx
,
4950 VA_CPU_ACCESSIBLE_MEM_ADDR
+ cpu_off
,
4951 hdev
->cpu_accessible_dma_address
+ cpu_off
,
4952 PAGE_SIZE_4KB
, true);
4955 "Map failed for CPU accessible memory\n");
4956 cpu_off
-= PAGE_SIZE_4KB
;
4962 goya_mmu_prepare_reg(hdev
, mmCPU_IF_ARUSER_OVR
, HL_KERNEL_ASID_ID
);
4963 goya_mmu_prepare_reg(hdev
, mmCPU_IF_AWUSER_OVR
, HL_KERNEL_ASID_ID
);
4964 WREG32(mmCPU_IF_ARUSER_OVR_EN
, 0x7FF);
4965 WREG32(mmCPU_IF_AWUSER_OVR_EN
, 0x7FF);
4967 /* Make sure configuration is flushed to device */
4968 RREG32(mmCPU_IF_AWUSER_OVR_EN
);
4970 goya
->device_cpu_mmu_mappings_done
= true;
4975 for (; cpu_off
>= 0 ; cpu_off
-= PAGE_SIZE_4KB
)
4976 if (hl_mmu_unmap_page(hdev
->kernel_ctx
,
4977 VA_CPU_ACCESSIBLE_MEM_ADDR
+ cpu_off
,
4978 PAGE_SIZE_4KB
, true))
4979 dev_warn_ratelimited(hdev
->dev
,
4980 "failed to unmap address 0x%llx\n",
4981 VA_CPU_ACCESSIBLE_MEM_ADDR
+ cpu_off
);
4983 for (; off
>= 0 ; off
-= PAGE_SIZE_2MB
)
4984 if (hl_mmu_unmap_page(hdev
->kernel_ctx
,
4985 prop
->dram_base_address
+ off
, PAGE_SIZE_2MB
,
4987 dev_warn_ratelimited(hdev
->dev
,
4988 "failed to unmap address 0x%llx\n",
4989 prop
->dram_base_address
+ off
);
4994 void goya_mmu_remove_device_cpu_mappings(struct hl_device
*hdev
)
4996 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
4997 struct goya_device
*goya
= hdev
->asic_specific
;
5000 if (!(goya
->hw_cap_initialized
& HW_CAP_MMU
))
5003 if (!goya
->device_cpu_mmu_mappings_done
)
5006 WREG32(mmCPU_IF_ARUSER_OVR_EN
, 0);
5007 WREG32(mmCPU_IF_AWUSER_OVR_EN
, 0);
5009 if (!(hdev
->cpu_accessible_dma_address
& (PAGE_SIZE_2MB
- 1))) {
5010 if (hl_mmu_unmap_page(hdev
->kernel_ctx
,
5011 VA_CPU_ACCESSIBLE_MEM_ADDR
,
5012 PAGE_SIZE_2MB
, true))
5014 "Failed to unmap CPU accessible memory\n");
5016 for (cpu_off
= 0 ; cpu_off
< SZ_2M
; cpu_off
+= PAGE_SIZE_4KB
)
5017 if (hl_mmu_unmap_page(hdev
->kernel_ctx
,
5018 VA_CPU_ACCESSIBLE_MEM_ADDR
+ cpu_off
,
5020 (cpu_off
+ PAGE_SIZE_4KB
) >= SZ_2M
))
5021 dev_warn_ratelimited(hdev
->dev
,
5022 "failed to unmap address 0x%llx\n",
5023 VA_CPU_ACCESSIBLE_MEM_ADDR
+ cpu_off
);
5026 for (off
= 0 ; off
< CPU_FW_IMAGE_SIZE
; off
+= PAGE_SIZE_2MB
)
5027 if (hl_mmu_unmap_page(hdev
->kernel_ctx
,
5028 prop
->dram_base_address
+ off
, PAGE_SIZE_2MB
,
5029 (off
+ PAGE_SIZE_2MB
) >= CPU_FW_IMAGE_SIZE
))
5030 dev_warn_ratelimited(hdev
->dev
,
5031 "Failed to unmap address 0x%llx\n",
5032 prop
->dram_base_address
+ off
);
5034 goya
->device_cpu_mmu_mappings_done
= false;
5037 static void goya_mmu_prepare(struct hl_device
*hdev
, u32 asid
)
5039 struct goya_device
*goya
= hdev
->asic_specific
;
5042 if (!(goya
->hw_cap_initialized
& HW_CAP_MMU
))
5045 if (asid
& ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK
) {
5046 WARN(1, "asid %u is too big\n", asid
);
5050 /* zero the MMBP and ASID bits and then set the ASID */
5051 for (i
= 0 ; i
< GOYA_MMU_REGS_NUM
; i
++)
5052 goya_mmu_prepare_reg(hdev
, goya_mmu_regs
[i
], asid
);
5055 static int goya_mmu_invalidate_cache(struct hl_device
*hdev
, bool is_hard
,
5058 struct goya_device
*goya
= hdev
->asic_specific
;
5059 u32 status
, timeout_usec
;
5062 if (!(goya
->hw_cap_initialized
& HW_CAP_MMU
) ||
5063 hdev
->hard_reset_pending
)
5066 /* no need in L1 only invalidation in Goya */
5071 timeout_usec
= GOYA_PLDM_MMU_TIMEOUT_USEC
;
5073 timeout_usec
= MMU_CONFIG_TIMEOUT_USEC
;
5075 mutex_lock(&hdev
->mmu_cache_lock
);
5077 /* L0 & L1 invalidation */
5078 WREG32(mmSTLB_INV_ALL_START
, 1);
5080 rc
= hl_poll_timeout(
5082 mmSTLB_INV_ALL_START
,
5088 mutex_unlock(&hdev
->mmu_cache_lock
);
5091 dev_err_ratelimited(hdev
->dev
,
5092 "MMU cache invalidation timeout\n");
5093 hl_device_reset(hdev
, true, false);
5099 static int goya_mmu_invalidate_cache_range(struct hl_device
*hdev
,
5100 bool is_hard
, u32 asid
, u64 va
, u64 size
)
5102 struct goya_device
*goya
= hdev
->asic_specific
;
5103 u32 status
, timeout_usec
, inv_data
, pi
;
5106 if (!(goya
->hw_cap_initialized
& HW_CAP_MMU
) ||
5107 hdev
->hard_reset_pending
)
5110 /* no need in L1 only invalidation in Goya */
5115 timeout_usec
= GOYA_PLDM_MMU_TIMEOUT_USEC
;
5117 timeout_usec
= MMU_CONFIG_TIMEOUT_USEC
;
5119 mutex_lock(&hdev
->mmu_cache_lock
);
5122 * TODO: currently invalidate entire L0 & L1 as in regular hard
5123 * invalidation. Need to apply invalidation of specific cache lines with
5124 * mask of ASID & VA & size.
5125 * Note that L1 with be flushed entirely in any case.
5128 /* L0 & L1 invalidation */
5129 inv_data
= RREG32(mmSTLB_CACHE_INV
);
5131 pi
= ((inv_data
& STLB_CACHE_INV_PRODUCER_INDEX_MASK
) + 1) & 0xFF;
5132 WREG32(mmSTLB_CACHE_INV
,
5133 (inv_data
& STLB_CACHE_INV_INDEX_MASK_MASK
) | pi
);
5135 rc
= hl_poll_timeout(
5137 mmSTLB_INV_CONSUMER_INDEX
,
5143 mutex_unlock(&hdev
->mmu_cache_lock
);
5146 dev_err_ratelimited(hdev
->dev
,
5147 "MMU cache invalidation timeout\n");
5148 hl_device_reset(hdev
, true, false);
5154 int goya_send_heartbeat(struct hl_device
*hdev
)
5156 struct goya_device
*goya
= hdev
->asic_specific
;
5158 if (!(goya
->hw_cap_initialized
& HW_CAP_CPU_Q
))
5161 return hl_fw_send_heartbeat(hdev
);
5164 int goya_cpucp_info_get(struct hl_device
*hdev
)
5166 struct goya_device
*goya
= hdev
->asic_specific
;
5167 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
5171 if (!(goya
->hw_cap_initialized
& HW_CAP_CPU_Q
))
5174 rc
= hl_fw_cpucp_info_get(hdev
, mmCPU_BOOT_DEV_STS0
);
5178 dram_size
= le64_to_cpu(prop
->cpucp_info
.dram_size
);
5180 if ((!is_power_of_2(dram_size
)) ||
5181 (dram_size
< DRAM_PHYS_DEFAULT_SIZE
)) {
5183 "F/W reported invalid DRAM size %llu. Trying to use default size\n",
5185 dram_size
= DRAM_PHYS_DEFAULT_SIZE
;
5188 prop
->dram_size
= dram_size
;
5189 prop
->dram_end_address
= prop
->dram_base_address
+ dram_size
;
5192 if (!strlen(prop
->cpucp_info
.card_name
))
5193 strncpy(prop
->cpucp_info
.card_name
, GOYA_DEFAULT_CARD_NAME
,
5199 static void goya_set_clock_gating(struct hl_device
*hdev
)
5201 /* clock gating not supported in Goya */
5204 static void goya_disable_clock_gating(struct hl_device
*hdev
)
5206 /* clock gating not supported in Goya */
5209 static bool goya_is_device_idle(struct hl_device
*hdev
, u64
*mask
,
5212 const char *fmt
= "%-5d%-9s%#-14x%#-16x%#x\n";
5213 const char *dma_fmt
= "%-5d%-9s%#-14x%#x\n";
5214 u32 qm_glbl_sts0
, cmdq_glbl_sts0
, dma_core_sts0
, tpc_cfg_sts
,
5216 bool is_idle
= true, is_eng_idle
;
5221 seq_puts(s
, "\nDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0\n"
5222 "--- ------- ------------ -------------\n");
5224 offset
= mmDMA_QM_1_GLBL_STS0
- mmDMA_QM_0_GLBL_STS0
;
5226 for (i
= 0 ; i
< DMA_MAX_NUM
; i
++) {
5227 qm_glbl_sts0
= RREG32(mmDMA_QM_0_GLBL_STS0
+ i
* offset
);
5228 dma_core_sts0
= RREG32(mmDMA_CH_0_STS0
+ i
* offset
);
5229 is_eng_idle
= IS_DMA_QM_IDLE(qm_glbl_sts0
) &&
5230 IS_DMA_IDLE(dma_core_sts0
);
5231 is_idle
&= is_eng_idle
;
5234 *mask
|= ((u64
) !is_eng_idle
) <<
5235 (GOYA_ENGINE_ID_DMA_0
+ i
);
5237 seq_printf(s
, dma_fmt
, i
, is_eng_idle
? "Y" : "N",
5238 qm_glbl_sts0
, dma_core_sts0
);
5243 "\nTPC is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 CFG_STATUS\n"
5244 "--- ------- ------------ -------------- ----------\n");
5246 offset
= mmTPC1_QM_GLBL_STS0
- mmTPC0_QM_GLBL_STS0
;
5248 for (i
= 0 ; i
< TPC_MAX_NUM
; i
++) {
5249 qm_glbl_sts0
= RREG32(mmTPC0_QM_GLBL_STS0
+ i
* offset
);
5250 cmdq_glbl_sts0
= RREG32(mmTPC0_CMDQ_GLBL_STS0
+ i
* offset
);
5251 tpc_cfg_sts
= RREG32(mmTPC0_CFG_STATUS
+ i
* offset
);
5252 is_eng_idle
= IS_TPC_QM_IDLE(qm_glbl_sts0
) &&
5253 IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0
) &&
5254 IS_TPC_IDLE(tpc_cfg_sts
);
5255 is_idle
&= is_eng_idle
;
5258 *mask
|= ((u64
) !is_eng_idle
) <<
5259 (GOYA_ENGINE_ID_TPC_0
+ i
);
5261 seq_printf(s
, fmt
, i
, is_eng_idle
? "Y" : "N",
5262 qm_glbl_sts0
, cmdq_glbl_sts0
, tpc_cfg_sts
);
5267 "\nMME is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 ARCH_STATUS\n"
5268 "--- ------- ------------ -------------- -----------\n");
5270 qm_glbl_sts0
= RREG32(mmMME_QM_GLBL_STS0
);
5271 cmdq_glbl_sts0
= RREG32(mmMME_CMDQ_GLBL_STS0
);
5272 mme_arch_sts
= RREG32(mmMME_ARCH_STATUS
);
5273 is_eng_idle
= IS_MME_QM_IDLE(qm_glbl_sts0
) &&
5274 IS_MME_CMDQ_IDLE(cmdq_glbl_sts0
) &&
5275 IS_MME_IDLE(mme_arch_sts
);
5276 is_idle
&= is_eng_idle
;
5279 *mask
|= ((u64
) !is_eng_idle
) << GOYA_ENGINE_ID_MME_0
;
5281 seq_printf(s
, fmt
, 0, is_eng_idle
? "Y" : "N", qm_glbl_sts0
,
5282 cmdq_glbl_sts0
, mme_arch_sts
);
5289 static void goya_hw_queues_lock(struct hl_device
*hdev
)
5290 __acquires(&goya
->hw_queues_lock
)
5292 struct goya_device
*goya
= hdev
->asic_specific
;
5294 spin_lock(&goya
->hw_queues_lock
);
5297 static void goya_hw_queues_unlock(struct hl_device
*hdev
)
5298 __releases(&goya
->hw_queues_lock
)
5300 struct goya_device
*goya
= hdev
->asic_specific
;
5302 spin_unlock(&goya
->hw_queues_lock
);
5305 static u32
goya_get_pci_id(struct hl_device
*hdev
)
5307 return hdev
->pdev
->device
;
5310 static int goya_get_eeprom_data(struct hl_device
*hdev
, void *data
,
5313 struct goya_device
*goya
= hdev
->asic_specific
;
5315 if (!(goya
->hw_cap_initialized
& HW_CAP_CPU_Q
))
5318 return hl_fw_get_eeprom_data(hdev
, data
, max_size
);
5321 static int goya_ctx_init(struct hl_ctx
*ctx
)
5326 u32
goya_get_queue_id_for_cq(struct hl_device
*hdev
, u32 cq_idx
)
5331 static u32
goya_get_signal_cb_size(struct hl_device
*hdev
)
5336 static u32
goya_get_wait_cb_size(struct hl_device
*hdev
)
5341 static u32
goya_gen_signal_cb(struct hl_device
*hdev
, void *data
, u16 sob_id
,
5347 static u32
goya_gen_wait_cb(struct hl_device
*hdev
,
5348 struct hl_gen_wait_properties
*prop
)
5353 static void goya_reset_sob(struct hl_device
*hdev
, void *data
)
5358 static void goya_reset_sob_group(struct hl_device
*hdev
, u16 sob_group
)
5363 static void goya_set_dma_mask_from_fw(struct hl_device
*hdev
)
5365 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0
) ==
5366 HL_POWER9_HOST_MAGIC
) {
5367 dev_dbg(hdev
->dev
, "Working in 64-bit DMA mode\n");
5368 hdev
->power9_64bit_dma_enable
= 1;
5369 hdev
->dma_mask
= 64;
5371 dev_dbg(hdev
->dev
, "Working in 48-bit DMA mode\n");
5372 hdev
->power9_64bit_dma_enable
= 0;
5373 hdev
->dma_mask
= 48;
5377 u64
goya_get_device_time(struct hl_device
*hdev
)
5379 u64 device_time
= ((u64
) RREG32(mmPSOC_TIMESTAMP_CNTCVU
)) << 32;
5381 return device_time
| RREG32(mmPSOC_TIMESTAMP_CNTCVL
);
5384 static void goya_collective_wait_init_cs(struct hl_cs
*cs
)
5389 static int goya_collective_wait_create_jobs(struct hl_device
*hdev
,
5390 struct hl_ctx
*ctx
, struct hl_cs
*cs
, u32 wait_queue_id
,
5391 u32 collective_engine_id
)
5396 static void goya_ctx_fini(struct hl_ctx
*ctx
)
5401 static const struct hl_asic_funcs goya_funcs
= {
5402 .early_init
= goya_early_init
,
5403 .early_fini
= goya_early_fini
,
5404 .late_init
= goya_late_init
,
5405 .late_fini
= goya_late_fini
,
5406 .sw_init
= goya_sw_init
,
5407 .sw_fini
= goya_sw_fini
,
5408 .hw_init
= goya_hw_init
,
5409 .hw_fini
= goya_hw_fini
,
5410 .halt_engines
= goya_halt_engines
,
5411 .suspend
= goya_suspend
,
5412 .resume
= goya_resume
,
5413 .cb_mmap
= goya_cb_mmap
,
5414 .ring_doorbell
= goya_ring_doorbell
,
5415 .pqe_write
= goya_pqe_write
,
5416 .asic_dma_alloc_coherent
= goya_dma_alloc_coherent
,
5417 .asic_dma_free_coherent
= goya_dma_free_coherent
,
5418 .scrub_device_mem
= goya_scrub_device_mem
,
5419 .get_int_queue_base
= goya_get_int_queue_base
,
5420 .test_queues
= goya_test_queues
,
5421 .asic_dma_pool_zalloc
= goya_dma_pool_zalloc
,
5422 .asic_dma_pool_free
= goya_dma_pool_free
,
5423 .cpu_accessible_dma_pool_alloc
= goya_cpu_accessible_dma_pool_alloc
,
5424 .cpu_accessible_dma_pool_free
= goya_cpu_accessible_dma_pool_free
,
5425 .hl_dma_unmap_sg
= goya_dma_unmap_sg
,
5426 .cs_parser
= goya_cs_parser
,
5427 .asic_dma_map_sg
= goya_dma_map_sg
,
5428 .get_dma_desc_list_size
= goya_get_dma_desc_list_size
,
5429 .add_end_of_cb_packets
= goya_add_end_of_cb_packets
,
5430 .update_eq_ci
= goya_update_eq_ci
,
5431 .context_switch
= goya_context_switch
,
5432 .restore_phase_topology
= goya_restore_phase_topology
,
5433 .debugfs_read32
= goya_debugfs_read32
,
5434 .debugfs_write32
= goya_debugfs_write32
,
5435 .debugfs_read64
= goya_debugfs_read64
,
5436 .debugfs_write64
= goya_debugfs_write64
,
5437 .add_device_attr
= goya_add_device_attr
,
5438 .handle_eqe
= goya_handle_eqe
,
5439 .set_pll_profile
= goya_set_pll_profile
,
5440 .get_events_stat
= goya_get_events_stat
,
5441 .read_pte
= goya_read_pte
,
5442 .write_pte
= goya_write_pte
,
5443 .mmu_invalidate_cache
= goya_mmu_invalidate_cache
,
5444 .mmu_invalidate_cache_range
= goya_mmu_invalidate_cache_range
,
5445 .send_heartbeat
= goya_send_heartbeat
,
5446 .set_clock_gating
= goya_set_clock_gating
,
5447 .disable_clock_gating
= goya_disable_clock_gating
,
5448 .debug_coresight
= goya_debug_coresight
,
5449 .is_device_idle
= goya_is_device_idle
,
5450 .soft_reset_late_init
= goya_soft_reset_late_init
,
5451 .hw_queues_lock
= goya_hw_queues_lock
,
5452 .hw_queues_unlock
= goya_hw_queues_unlock
,
5453 .get_pci_id
= goya_get_pci_id
,
5454 .get_eeprom_data
= goya_get_eeprom_data
,
5455 .send_cpu_message
= goya_send_cpu_message
,
5456 .pci_bars_map
= goya_pci_bars_map
,
5457 .init_iatu
= goya_init_iatu
,
5460 .halt_coresight
= goya_halt_coresight
,
5461 .ctx_init
= goya_ctx_init
,
5462 .ctx_fini
= goya_ctx_fini
,
5463 .get_clk_rate
= goya_get_clk_rate
,
5464 .get_queue_id_for_cq
= goya_get_queue_id_for_cq
,
5465 .read_device_fw_version
= goya_read_device_fw_version
,
5466 .load_firmware_to_device
= goya_load_firmware_to_device
,
5467 .load_boot_fit_to_device
= goya_load_boot_fit_to_device
,
5468 .get_signal_cb_size
= goya_get_signal_cb_size
,
5469 .get_wait_cb_size
= goya_get_wait_cb_size
,
5470 .gen_signal_cb
= goya_gen_signal_cb
,
5471 .gen_wait_cb
= goya_gen_wait_cb
,
5472 .reset_sob
= goya_reset_sob
,
5473 .reset_sob_group
= goya_reset_sob_group
,
5474 .set_dma_mask_from_fw
= goya_set_dma_mask_from_fw
,
5475 .get_device_time
= goya_get_device_time
,
5476 .collective_wait_init_cs
= goya_collective_wait_init_cs
,
5477 .collective_wait_create_jobs
= goya_collective_wait_create_jobs
5481 * goya_set_asic_funcs - set Goya function pointers
5483 * @*hdev: pointer to hl_device structure
5486 void goya_set_asic_funcs(struct hl_device
*hdev
)
5488 hdev
->asic_funcs
= &goya_funcs
;