Merge tag 'trace-printf-v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/trace...
[drm/drm-misc.git] / drivers / accel / habanalabs / gaudi2 / gaudi2.c
bloba38b88baadf2bae76519c1937257a7da935cb7ea
1 // SPDX-License-Identifier: GPL-2.0
3 /*
4 * Copyright 2020-2022 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
8 #include "gaudi2P.h"
9 #include "gaudi2_masks.h"
10 #include "../include/gaudi2/gaudi2_special_blocks.h"
11 #include "../include/hw_ip/mmu/mmu_general.h"
12 #include "../include/hw_ip/mmu/mmu_v2_0.h"
13 #include "../include/gaudi2/gaudi2_packets.h"
14 #include "../include/gaudi2/gaudi2_reg_map.h"
15 #include "../include/gaudi2/gaudi2_async_ids_map_extended.h"
16 #include "../include/gaudi2/arc/gaudi2_arc_common_packets.h"
18 #include <linux/module.h>
19 #include <linux/pci.h>
20 #include <linux/hwmon.h>
21 #include <linux/iommu.h>
23 #define GAUDI2_DMA_POOL_BLK_SIZE SZ_256 /* 256 bytes */
25 #define GAUDI2_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
27 #define GAUDI2_RESET_POLL_TIMEOUT_USEC 500000 /* 500ms */
28 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC 25000 /* 25s */
29 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC 25000 /* 25s */
30 #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC 3000000 /* 3s */
31 #define GAUDI2_RESET_POLL_CNT 3
32 #define GAUDI2_RESET_WAIT_MSEC 1 /* 1ms */
33 #define GAUDI2_CPU_RESET_WAIT_MSEC 100 /* 100ms */
34 #define GAUDI2_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
35 #define GAUDI2_CB_POOL_CB_CNT 512
36 #define GAUDI2_CB_POOL_CB_SIZE SZ_128K /* 128KB */
37 #define GAUDI2_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
38 #define GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC 25000000 /* 25s */
39 #define GAUDI2_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
40 #define GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
42 #define GAUDI2_ALLOC_CPU_MEM_RETRY_CNT 3
45 * since the code already has built-in support for binning of up to MAX_FAULTY_TPCS TPCs
46 * and the code relies on that value (for array size etc..) we define another value
47 * for MAX faulty TPCs which reflects the cluster binning requirements
49 #define MAX_CLUSTER_BINNING_FAULTY_TPCS 1
50 #define MAX_FAULTY_XBARS 1
51 #define MAX_FAULTY_EDMAS 1
52 #define MAX_FAULTY_DECODERS 1
54 #define GAUDI2_TPC_FULL_MASK 0x1FFFFFF
55 #define GAUDI2_HIF_HMMU_FULL_MASK 0xFFFF
56 #define GAUDI2_DECODER_FULL_MASK 0x3FF
58 #define GAUDI2_NA_EVENT_CAUSE 0xFF
59 #define GAUDI2_NUM_OF_QM_ERR_CAUSE 18
60 #define GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE 25
61 #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE 3
62 #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE 14
63 #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE 3
64 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE 2
65 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE 22
66 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE 31
67 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE 25
68 #define GAUDI2_NUM_OF_MME_ERR_CAUSE 16
69 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE 7
70 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE 8
71 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE 19
72 #define GAUDI2_NUM_OF_HBM_SEI_CAUSE 9
73 #define GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE 3
74 #define GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE 3
75 #define GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE 2
76 #define GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE 2
77 #define GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE 2
78 #define GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE 5
80 #define GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 10)
81 #define GAUDI2_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 200)
82 #define GAUDI2_ARB_WDT_TIMEOUT (0x1000000)
84 #define GAUDI2_VDEC_TIMEOUT_USEC 10000 /* 10ms */
85 #define GAUDI2_PLDM_VDEC_TIMEOUT_USEC (GAUDI2_VDEC_TIMEOUT_USEC * 100)
87 #define KDMA_TIMEOUT_USEC USEC_PER_SEC
89 #define IS_DMA_IDLE(dma_core_sts0) \
90 (!((dma_core_sts0) & (DCORE0_EDMA0_CORE_STS0_BUSY_MASK)))
92 #define IS_DMA_HALTED(dma_core_sts1) \
93 ((dma_core_sts1) & (DCORE0_EDMA0_CORE_STS1_IS_HALT_MASK))
95 #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
97 #define IS_TPC_IDLE(tpc_cfg_sts) (((tpc_cfg_sts) & (TPC_IDLE_MASK)) == (TPC_IDLE_MASK))
99 #define IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) \
100 ((((qm_glbl_sts0) & (QM_IDLE_MASK)) == (QM_IDLE_MASK)) && \
101 (((qm_glbl_sts1) & (QM_ARC_IDLE_MASK)) == (QM_ARC_IDLE_MASK)) && \
102 (((qm_cgm_sts) & (CGM_IDLE_MASK)) == (CGM_IDLE_MASK)))
104 #define PCIE_DEC_EN_MASK 0x300
105 #define DEC_WORK_STATE_IDLE 0
106 #define DEC_WORK_STATE_PEND 3
107 #define IS_DEC_IDLE(dec_swreg15) \
108 (((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_IDLE || \
109 ((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_PEND)
111 /* HBM MMU address scrambling parameters */
112 #define GAUDI2_HBM_MMU_SCRM_MEM_SIZE SZ_8M
113 #define GAUDI2_HBM_MMU_SCRM_DIV_SHIFT 26
114 #define GAUDI2_HBM_MMU_SCRM_MOD_SHIFT 0
115 #define GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK DRAM_VA_HINT_MASK
116 #define GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR 16
117 #define MMU_RANGE_INV_VA_LSB_SHIFT 12
118 #define MMU_RANGE_INV_VA_MSB_SHIFT 44
119 #define MMU_RANGE_INV_EN_SHIFT 0
120 #define MMU_RANGE_INV_ASID_EN_SHIFT 1
121 #define MMU_RANGE_INV_ASID_SHIFT 2
123 /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has
124 * a 2 entries FIFO, and hence it is not enabled for it.
126 #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0)
127 #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0)
129 #define GAUDI2_MAX_STRING_LEN 64
131 #define GAUDI2_VDEC_MSIX_ENTRIES (GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \
132 GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1)
134 #define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)
136 /* RAZWI initiator coordinates */
137 #define RAZWI_GET_AXUSER_XY(x) \
138 ((x & 0xF8001FF0) >> 4)
140 #define RAZWI_GET_AXUSER_LOW_XY(x) \
141 ((x & 0x00001FF0) >> 4)
143 #define RAZWI_INITIATOR_AXUER_L_X_SHIFT 0
144 #define RAZWI_INITIATOR_AXUER_L_X_MASK 0x1F
145 #define RAZWI_INITIATOR_AXUER_L_Y_SHIFT 5
146 #define RAZWI_INITIATOR_AXUER_L_Y_MASK 0xF
148 #define RAZWI_INITIATOR_AXUER_H_X_SHIFT 23
149 #define RAZWI_INITIATOR_AXUER_H_X_MASK 0x1F
151 #define RAZWI_INITIATOR_ID_X_Y_LOW(x, y) \
152 ((((y) & RAZWI_INITIATOR_AXUER_L_Y_MASK) << RAZWI_INITIATOR_AXUER_L_Y_SHIFT) | \
153 (((x) & RAZWI_INITIATOR_AXUER_L_X_MASK) << RAZWI_INITIATOR_AXUER_L_X_SHIFT))
155 #define RAZWI_INITIATOR_ID_X_HIGH(x) \
156 (((x) & RAZWI_INITIATOR_AXUER_H_X_MASK) << RAZWI_INITIATOR_AXUER_H_X_SHIFT)
158 #define RAZWI_INITIATOR_ID_X_Y(xl, yl, xh) \
159 (RAZWI_INITIATOR_ID_X_Y_LOW(xl, yl) | RAZWI_INITIATOR_ID_X_HIGH(xh))
161 #define PSOC_RAZWI_ENG_STR_SIZE 128
162 #define PSOC_RAZWI_MAX_ENG_PER_RTR 5
164 /* HW scrambles only bits 0-25 */
165 #define HW_UNSCRAMBLED_BITS_MASK GENMASK_ULL(63, 26)
167 #define GAUDI2_GLBL_ERR_MAX_CAUSE_NUM 17
169 struct gaudi2_razwi_info {
170 u32 axuser_xy;
171 u32 rtr_ctrl;
172 u16 eng_id;
173 char *eng_name;
176 static struct gaudi2_razwi_info common_razwi_info[] = {
177 {RAZWI_INITIATOR_ID_X_Y(2, 4, 0), mmDCORE0_RTR0_CTRL_BASE,
178 GAUDI2_DCORE0_ENGINE_ID_DEC_0, "DEC0"},
179 {RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE,
180 GAUDI2_DCORE0_ENGINE_ID_DEC_1, "DEC1"},
181 {RAZWI_INITIATOR_ID_X_Y(17, 4, 18), mmDCORE1_RTR7_CTRL_BASE,
182 GAUDI2_DCORE1_ENGINE_ID_DEC_0, "DEC2"},
183 {RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE,
184 GAUDI2_DCORE1_ENGINE_ID_DEC_1, "DEC3"},
185 {RAZWI_INITIATOR_ID_X_Y(2, 11, 0), mmDCORE2_RTR0_CTRL_BASE,
186 GAUDI2_DCORE2_ENGINE_ID_DEC_0, "DEC4"},
187 {RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE,
188 GAUDI2_DCORE2_ENGINE_ID_DEC_1, "DEC5"},
189 {RAZWI_INITIATOR_ID_X_Y(17, 11, 18), mmDCORE3_RTR7_CTRL_BASE,
190 GAUDI2_DCORE3_ENGINE_ID_DEC_0, "DEC6"},
191 {RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE,
192 GAUDI2_DCORE3_ENGINE_ID_DEC_1, "DEC7"},
193 {RAZWI_INITIATOR_ID_X_Y(2, 4, 6), mmDCORE0_RTR0_CTRL_BASE,
194 GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC8"},
195 {RAZWI_INITIATOR_ID_X_Y(2, 4, 7), mmDCORE0_RTR0_CTRL_BASE,
196 GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC9"},
197 {RAZWI_INITIATOR_ID_X_Y(3, 4, 2), mmDCORE0_RTR1_CTRL_BASE,
198 GAUDI2_DCORE0_ENGINE_ID_TPC_0, "TPC0"},
199 {RAZWI_INITIATOR_ID_X_Y(3, 4, 4), mmDCORE0_RTR1_CTRL_BASE,
200 GAUDI2_DCORE0_ENGINE_ID_TPC_1, "TPC1"},
201 {RAZWI_INITIATOR_ID_X_Y(4, 4, 2), mmDCORE0_RTR2_CTRL_BASE,
202 GAUDI2_DCORE0_ENGINE_ID_TPC_2, "TPC2"},
203 {RAZWI_INITIATOR_ID_X_Y(4, 4, 4), mmDCORE0_RTR2_CTRL_BASE,
204 GAUDI2_DCORE0_ENGINE_ID_TPC_3, "TPC3"},
205 {RAZWI_INITIATOR_ID_X_Y(5, 4, 2), mmDCORE0_RTR3_CTRL_BASE,
206 GAUDI2_DCORE0_ENGINE_ID_TPC_4, "TPC4"},
207 {RAZWI_INITIATOR_ID_X_Y(5, 4, 4), mmDCORE0_RTR3_CTRL_BASE,
208 GAUDI2_DCORE0_ENGINE_ID_TPC_5, "TPC5"},
209 {RAZWI_INITIATOR_ID_X_Y(16, 4, 14), mmDCORE1_RTR6_CTRL_BASE,
210 GAUDI2_DCORE1_ENGINE_ID_TPC_0, "TPC6"},
211 {RAZWI_INITIATOR_ID_X_Y(16, 4, 16), mmDCORE1_RTR6_CTRL_BASE,
212 GAUDI2_DCORE1_ENGINE_ID_TPC_1, "TPC7"},
213 {RAZWI_INITIATOR_ID_X_Y(15, 4, 14), mmDCORE1_RTR5_CTRL_BASE,
214 GAUDI2_DCORE1_ENGINE_ID_TPC_2, "TPC8"},
215 {RAZWI_INITIATOR_ID_X_Y(15, 4, 16), mmDCORE1_RTR5_CTRL_BASE,
216 GAUDI2_DCORE1_ENGINE_ID_TPC_3, "TPC9"},
217 {RAZWI_INITIATOR_ID_X_Y(14, 4, 14), mmDCORE1_RTR4_CTRL_BASE,
218 GAUDI2_DCORE1_ENGINE_ID_TPC_4, "TPC10"},
219 {RAZWI_INITIATOR_ID_X_Y(14, 4, 16), mmDCORE1_RTR4_CTRL_BASE,
220 GAUDI2_DCORE1_ENGINE_ID_TPC_5, "TPC11"},
221 {RAZWI_INITIATOR_ID_X_Y(5, 11, 2), mmDCORE2_RTR3_CTRL_BASE,
222 GAUDI2_DCORE2_ENGINE_ID_TPC_0, "TPC12"},
223 {RAZWI_INITIATOR_ID_X_Y(5, 11, 4), mmDCORE2_RTR3_CTRL_BASE,
224 GAUDI2_DCORE2_ENGINE_ID_TPC_1, "TPC13"},
225 {RAZWI_INITIATOR_ID_X_Y(4, 11, 2), mmDCORE2_RTR2_CTRL_BASE,
226 GAUDI2_DCORE2_ENGINE_ID_TPC_2, "TPC14"},
227 {RAZWI_INITIATOR_ID_X_Y(4, 11, 4), mmDCORE2_RTR2_CTRL_BASE,
228 GAUDI2_DCORE2_ENGINE_ID_TPC_3, "TPC15"},
229 {RAZWI_INITIATOR_ID_X_Y(3, 11, 2), mmDCORE2_RTR1_CTRL_BASE,
230 GAUDI2_DCORE2_ENGINE_ID_TPC_4, "TPC16"},
231 {RAZWI_INITIATOR_ID_X_Y(3, 11, 4), mmDCORE2_RTR1_CTRL_BASE,
232 GAUDI2_DCORE2_ENGINE_ID_TPC_5, "TPC17"},
233 {RAZWI_INITIATOR_ID_X_Y(14, 11, 14), mmDCORE3_RTR4_CTRL_BASE,
234 GAUDI2_DCORE3_ENGINE_ID_TPC_0, "TPC18"},
235 {RAZWI_INITIATOR_ID_X_Y(14, 11, 16), mmDCORE3_RTR4_CTRL_BASE,
236 GAUDI2_DCORE3_ENGINE_ID_TPC_1, "TPC19"},
237 {RAZWI_INITIATOR_ID_X_Y(15, 11, 14), mmDCORE3_RTR5_CTRL_BASE,
238 GAUDI2_DCORE3_ENGINE_ID_TPC_2, "TPC20"},
239 {RAZWI_INITIATOR_ID_X_Y(15, 11, 16), mmDCORE3_RTR5_CTRL_BASE,
240 GAUDI2_DCORE3_ENGINE_ID_TPC_3, "TPC21"},
241 {RAZWI_INITIATOR_ID_X_Y(16, 11, 14), mmDCORE3_RTR6_CTRL_BASE,
242 GAUDI2_DCORE3_ENGINE_ID_TPC_4, "TPC22"},
243 {RAZWI_INITIATOR_ID_X_Y(16, 11, 16), mmDCORE3_RTR6_CTRL_BASE,
244 GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC23"},
245 {RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE,
246 GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC24"},
247 {RAZWI_INITIATOR_ID_X_Y(17, 4, 8), mmDCORE1_RTR7_CTRL_BASE,
248 GAUDI2_ENGINE_ID_NIC0_0, "NIC0"},
249 {RAZWI_INITIATOR_ID_X_Y(17, 4, 10), mmDCORE1_RTR7_CTRL_BASE,
250 GAUDI2_ENGINE_ID_NIC0_1, "NIC1"},
251 {RAZWI_INITIATOR_ID_X_Y(17, 4, 12), mmDCORE1_RTR7_CTRL_BASE,
252 GAUDI2_ENGINE_ID_NIC1_0, "NIC2"},
253 {RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE,
254 GAUDI2_ENGINE_ID_NIC1_1, "NIC3"},
255 {RAZWI_INITIATOR_ID_X_Y(17, 4, 15), mmDCORE1_RTR7_CTRL_BASE,
256 GAUDI2_ENGINE_ID_NIC2_0, "NIC4"},
257 {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
258 GAUDI2_ENGINE_ID_NIC2_1, "NIC5"},
259 {RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE,
260 GAUDI2_ENGINE_ID_NIC3_0, "NIC6"},
261 {RAZWI_INITIATOR_ID_X_Y(2, 11, 6), mmDCORE2_RTR0_CTRL_BASE,
262 GAUDI2_ENGINE_ID_NIC3_1, "NIC7"},
263 {RAZWI_INITIATOR_ID_X_Y(2, 11, 8), mmDCORE2_RTR0_CTRL_BASE,
264 GAUDI2_ENGINE_ID_NIC4_0, "NIC8"},
265 {RAZWI_INITIATOR_ID_X_Y(17, 11, 12), mmDCORE3_RTR7_CTRL_BASE,
266 GAUDI2_ENGINE_ID_NIC4_1, "NIC9"},
267 {RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE,
268 GAUDI2_ENGINE_ID_NIC5_0, "NIC10"},
269 {RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE,
270 GAUDI2_ENGINE_ID_NIC5_1, "NIC11"},
271 {RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE,
272 GAUDI2_ENGINE_ID_PDMA_0, "PDMA0"},
273 {RAZWI_INITIATOR_ID_X_Y(2, 4, 3), mmDCORE0_RTR0_CTRL_BASE,
274 GAUDI2_ENGINE_ID_PDMA_1, "PDMA1"},
275 {RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE,
276 GAUDI2_ENGINE_ID_SIZE, "PMMU"},
277 {RAZWI_INITIATOR_ID_X_Y(2, 4, 5), mmDCORE0_RTR0_CTRL_BASE,
278 GAUDI2_ENGINE_ID_SIZE, "PCIE"},
279 {RAZWI_INITIATOR_ID_X_Y(17, 4, 16), mmDCORE1_RTR7_CTRL_BASE,
280 GAUDI2_ENGINE_ID_ARC_FARM, "ARC_FARM"},
281 {RAZWI_INITIATOR_ID_X_Y(17, 4, 17), mmDCORE1_RTR7_CTRL_BASE,
282 GAUDI2_ENGINE_ID_KDMA, "KDMA"},
283 {RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF1_RTR_CTRL_BASE,
284 GAUDI2_DCORE0_ENGINE_ID_EDMA_0, "EDMA0"},
285 {RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE,
286 GAUDI2_DCORE0_ENGINE_ID_EDMA_1, "EDMA1"},
287 {RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF1_RTR_CTRL_BASE,
288 GAUDI2_DCORE1_ENGINE_ID_EDMA_0, "EDMA2"},
289 {RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF0_RTR_CTRL_BASE,
290 GAUDI2_DCORE1_ENGINE_ID_EDMA_1, "EDMA3"},
291 {RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE,
292 GAUDI2_DCORE2_ENGINE_ID_EDMA_0, "EDMA4"},
293 {RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE,
294 GAUDI2_DCORE2_ENGINE_ID_EDMA_1, "EDMA5"},
295 {RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE,
296 GAUDI2_DCORE3_ENGINE_ID_EDMA_0, "EDMA6"},
297 {RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE,
298 GAUDI2_DCORE3_ENGINE_ID_EDMA_1, "EDMA7"},
299 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
300 GAUDI2_ENGINE_ID_SIZE, "HMMU0"},
301 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
302 GAUDI2_ENGINE_ID_SIZE, "HMMU1"},
303 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
304 GAUDI2_ENGINE_ID_SIZE, "HMMU2"},
305 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
306 GAUDI2_ENGINE_ID_SIZE, "HMMU3"},
307 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
308 GAUDI2_ENGINE_ID_SIZE, "HMMU4"},
309 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
310 GAUDI2_ENGINE_ID_SIZE, "HMMU5"},
311 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
312 GAUDI2_ENGINE_ID_SIZE, "HMMU6"},
313 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
314 GAUDI2_ENGINE_ID_SIZE, "HMMU7"},
315 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
316 GAUDI2_ENGINE_ID_SIZE, "HMMU8"},
317 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
318 GAUDI2_ENGINE_ID_SIZE, "HMMU9"},
319 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
320 GAUDI2_ENGINE_ID_SIZE, "HMMU10"},
321 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
322 GAUDI2_ENGINE_ID_SIZE, "HMMU11"},
323 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
324 GAUDI2_ENGINE_ID_SIZE, "HMMU12"},
325 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
326 GAUDI2_ENGINE_ID_SIZE, "HMMU13"},
327 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
328 GAUDI2_ENGINE_ID_SIZE, "HMMU14"},
329 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
330 GAUDI2_ENGINE_ID_SIZE, "HMMU15"},
331 {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
332 GAUDI2_ENGINE_ID_ROT_0, "ROT0"},
333 {RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE,
334 GAUDI2_ENGINE_ID_ROT_1, "ROT1"},
335 {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
336 GAUDI2_ENGINE_ID_PSOC, "CPU"},
337 {RAZWI_INITIATOR_ID_X_Y(17, 11, 11), mmDCORE3_RTR7_CTRL_BASE,
338 GAUDI2_ENGINE_ID_PSOC, "PSOC"}
341 static struct gaudi2_razwi_info mme_razwi_info[] = {
342 /* MME X high coordinate is N/A, hence using only low coordinates */
343 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE,
344 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP0"},
345 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
346 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP1"},
347 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE,
348 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_WR"},
349 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
350 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_RD"},
351 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE,
352 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE0"},
353 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE,
354 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE1"},
355 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE,
356 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE2"},
357 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE,
358 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE3"},
359 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
360 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE4"},
361 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE,
362 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP0"},
363 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
364 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP1"},
365 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE,
366 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_WR"},
367 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
368 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_RD"},
369 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE,
370 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE0"},
371 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE,
372 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE1"},
373 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE,
374 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE2"},
375 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE,
376 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE3"},
377 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
378 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE4"},
379 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE,
380 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP0"},
381 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
382 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP1"},
383 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE,
384 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_WR"},
385 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
386 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_RD"},
387 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE,
388 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE0"},
389 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE,
390 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE1"},
391 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE,
392 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE2"},
393 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE,
394 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE3"},
395 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
396 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE4"},
397 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE,
398 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP0"},
399 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
400 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP1"},
401 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE,
402 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_WR"},
403 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
404 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_RD"},
405 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE,
406 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE0"},
407 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE,
408 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE1"},
409 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE,
410 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE2"},
411 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE,
412 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE3"},
413 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
414 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE4"}
417 enum hl_pmmu_fatal_cause {
418 LATENCY_RD_OUT_FIFO_OVERRUN,
419 LATENCY_WR_OUT_FIFO_OVERRUN,
422 enum hl_pcie_drain_ind_cause {
423 LBW_AXI_DRAIN_IND,
424 HBW_AXI_DRAIN_IND
427 static const u32 cluster_hmmu_hif_enabled_mask[GAUDI2_HBM_NUM] = {
428 [HBM_ID0] = 0xFFFC,
429 [HBM_ID1] = 0xFFCF,
430 [HBM_ID2] = 0xF7F7,
431 [HBM_ID3] = 0x7F7F,
432 [HBM_ID4] = 0xFCFF,
433 [HBM_ID5] = 0xCFFF,
436 static const u8 xbar_edge_to_hbm_cluster[EDMA_ID_SIZE] = {
437 [0] = HBM_ID0,
438 [1] = HBM_ID1,
439 [2] = HBM_ID4,
440 [3] = HBM_ID5,
443 static const u8 edma_to_hbm_cluster[EDMA_ID_SIZE] = {
444 [EDMA_ID_DCORE0_INSTANCE0] = HBM_ID0,
445 [EDMA_ID_DCORE0_INSTANCE1] = HBM_ID2,
446 [EDMA_ID_DCORE1_INSTANCE0] = HBM_ID1,
447 [EDMA_ID_DCORE1_INSTANCE1] = HBM_ID3,
448 [EDMA_ID_DCORE2_INSTANCE0] = HBM_ID2,
449 [EDMA_ID_DCORE2_INSTANCE1] = HBM_ID4,
450 [EDMA_ID_DCORE3_INSTANCE0] = HBM_ID3,
451 [EDMA_ID_DCORE3_INSTANCE1] = HBM_ID5,
454 static const int gaudi2_qman_async_event_id[] = {
455 [GAUDI2_QUEUE_ID_PDMA_0_0] = GAUDI2_EVENT_PDMA0_QM,
456 [GAUDI2_QUEUE_ID_PDMA_0_1] = GAUDI2_EVENT_PDMA0_QM,
457 [GAUDI2_QUEUE_ID_PDMA_0_2] = GAUDI2_EVENT_PDMA0_QM,
458 [GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_EVENT_PDMA0_QM,
459 [GAUDI2_QUEUE_ID_PDMA_1_0] = GAUDI2_EVENT_PDMA1_QM,
460 [GAUDI2_QUEUE_ID_PDMA_1_1] = GAUDI2_EVENT_PDMA1_QM,
461 [GAUDI2_QUEUE_ID_PDMA_1_2] = GAUDI2_EVENT_PDMA1_QM,
462 [GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_EVENT_PDMA1_QM,
463 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = GAUDI2_EVENT_HDMA0_QM,
464 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = GAUDI2_EVENT_HDMA0_QM,
465 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = GAUDI2_EVENT_HDMA0_QM,
466 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = GAUDI2_EVENT_HDMA0_QM,
467 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = GAUDI2_EVENT_HDMA1_QM,
468 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = GAUDI2_EVENT_HDMA1_QM,
469 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = GAUDI2_EVENT_HDMA1_QM,
470 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = GAUDI2_EVENT_HDMA1_QM,
471 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = GAUDI2_EVENT_MME0_QM,
472 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = GAUDI2_EVENT_MME0_QM,
473 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = GAUDI2_EVENT_MME0_QM,
474 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = GAUDI2_EVENT_MME0_QM,
475 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = GAUDI2_EVENT_TPC0_QM,
476 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = GAUDI2_EVENT_TPC0_QM,
477 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = GAUDI2_EVENT_TPC0_QM,
478 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = GAUDI2_EVENT_TPC0_QM,
479 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = GAUDI2_EVENT_TPC1_QM,
480 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = GAUDI2_EVENT_TPC1_QM,
481 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = GAUDI2_EVENT_TPC1_QM,
482 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = GAUDI2_EVENT_TPC1_QM,
483 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = GAUDI2_EVENT_TPC2_QM,
484 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = GAUDI2_EVENT_TPC2_QM,
485 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = GAUDI2_EVENT_TPC2_QM,
486 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = GAUDI2_EVENT_TPC2_QM,
487 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = GAUDI2_EVENT_TPC3_QM,
488 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = GAUDI2_EVENT_TPC3_QM,
489 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = GAUDI2_EVENT_TPC3_QM,
490 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = GAUDI2_EVENT_TPC3_QM,
491 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = GAUDI2_EVENT_TPC4_QM,
492 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = GAUDI2_EVENT_TPC4_QM,
493 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = GAUDI2_EVENT_TPC4_QM,
494 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = GAUDI2_EVENT_TPC4_QM,
495 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = GAUDI2_EVENT_TPC5_QM,
496 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = GAUDI2_EVENT_TPC5_QM,
497 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = GAUDI2_EVENT_TPC5_QM,
498 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = GAUDI2_EVENT_TPC5_QM,
499 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = GAUDI2_EVENT_TPC24_QM,
500 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = GAUDI2_EVENT_TPC24_QM,
501 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = GAUDI2_EVENT_TPC24_QM,
502 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = GAUDI2_EVENT_TPC24_QM,
503 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = GAUDI2_EVENT_HDMA2_QM,
504 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = GAUDI2_EVENT_HDMA2_QM,
505 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = GAUDI2_EVENT_HDMA2_QM,
506 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = GAUDI2_EVENT_HDMA2_QM,
507 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = GAUDI2_EVENT_HDMA3_QM,
508 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = GAUDI2_EVENT_HDMA3_QM,
509 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = GAUDI2_EVENT_HDMA3_QM,
510 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = GAUDI2_EVENT_HDMA3_QM,
511 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = GAUDI2_EVENT_MME1_QM,
512 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = GAUDI2_EVENT_MME1_QM,
513 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = GAUDI2_EVENT_MME1_QM,
514 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = GAUDI2_EVENT_MME1_QM,
515 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = GAUDI2_EVENT_TPC6_QM,
516 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = GAUDI2_EVENT_TPC6_QM,
517 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = GAUDI2_EVENT_TPC6_QM,
518 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = GAUDI2_EVENT_TPC6_QM,
519 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = GAUDI2_EVENT_TPC7_QM,
520 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = GAUDI2_EVENT_TPC7_QM,
521 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = GAUDI2_EVENT_TPC7_QM,
522 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = GAUDI2_EVENT_TPC7_QM,
523 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = GAUDI2_EVENT_TPC8_QM,
524 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = GAUDI2_EVENT_TPC8_QM,
525 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = GAUDI2_EVENT_TPC8_QM,
526 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = GAUDI2_EVENT_TPC8_QM,
527 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = GAUDI2_EVENT_TPC9_QM,
528 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = GAUDI2_EVENT_TPC9_QM,
529 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = GAUDI2_EVENT_TPC9_QM,
530 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = GAUDI2_EVENT_TPC9_QM,
531 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = GAUDI2_EVENT_TPC10_QM,
532 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = GAUDI2_EVENT_TPC10_QM,
533 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = GAUDI2_EVENT_TPC10_QM,
534 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = GAUDI2_EVENT_TPC10_QM,
535 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = GAUDI2_EVENT_TPC11_QM,
536 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = GAUDI2_EVENT_TPC11_QM,
537 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = GAUDI2_EVENT_TPC11_QM,
538 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = GAUDI2_EVENT_TPC11_QM,
539 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = GAUDI2_EVENT_HDMA4_QM,
540 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = GAUDI2_EVENT_HDMA4_QM,
541 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = GAUDI2_EVENT_HDMA4_QM,
542 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = GAUDI2_EVENT_HDMA4_QM,
543 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = GAUDI2_EVENT_HDMA5_QM,
544 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = GAUDI2_EVENT_HDMA5_QM,
545 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = GAUDI2_EVENT_HDMA5_QM,
546 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = GAUDI2_EVENT_HDMA5_QM,
547 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = GAUDI2_EVENT_MME2_QM,
548 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = GAUDI2_EVENT_MME2_QM,
549 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = GAUDI2_EVENT_MME2_QM,
550 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = GAUDI2_EVENT_MME2_QM,
551 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = GAUDI2_EVENT_TPC12_QM,
552 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = GAUDI2_EVENT_TPC12_QM,
553 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = GAUDI2_EVENT_TPC12_QM,
554 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = GAUDI2_EVENT_TPC12_QM,
555 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = GAUDI2_EVENT_TPC13_QM,
556 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = GAUDI2_EVENT_TPC13_QM,
557 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = GAUDI2_EVENT_TPC13_QM,
558 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = GAUDI2_EVENT_TPC13_QM,
559 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = GAUDI2_EVENT_TPC14_QM,
560 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = GAUDI2_EVENT_TPC14_QM,
561 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = GAUDI2_EVENT_TPC14_QM,
562 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = GAUDI2_EVENT_TPC14_QM,
563 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = GAUDI2_EVENT_TPC15_QM,
564 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = GAUDI2_EVENT_TPC15_QM,
565 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = GAUDI2_EVENT_TPC15_QM,
566 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = GAUDI2_EVENT_TPC15_QM,
567 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = GAUDI2_EVENT_TPC16_QM,
568 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = GAUDI2_EVENT_TPC16_QM,
569 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = GAUDI2_EVENT_TPC16_QM,
570 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = GAUDI2_EVENT_TPC16_QM,
571 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = GAUDI2_EVENT_TPC17_QM,
572 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = GAUDI2_EVENT_TPC17_QM,
573 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = GAUDI2_EVENT_TPC17_QM,
574 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = GAUDI2_EVENT_TPC17_QM,
575 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = GAUDI2_EVENT_HDMA6_QM,
576 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = GAUDI2_EVENT_HDMA6_QM,
577 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = GAUDI2_EVENT_HDMA6_QM,
578 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = GAUDI2_EVENT_HDMA6_QM,
579 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = GAUDI2_EVENT_HDMA7_QM,
580 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = GAUDI2_EVENT_HDMA7_QM,
581 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = GAUDI2_EVENT_HDMA7_QM,
582 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = GAUDI2_EVENT_HDMA7_QM,
583 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = GAUDI2_EVENT_MME3_QM,
584 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = GAUDI2_EVENT_MME3_QM,
585 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = GAUDI2_EVENT_MME3_QM,
586 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = GAUDI2_EVENT_MME3_QM,
587 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = GAUDI2_EVENT_TPC18_QM,
588 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = GAUDI2_EVENT_TPC18_QM,
589 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = GAUDI2_EVENT_TPC18_QM,
590 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = GAUDI2_EVENT_TPC18_QM,
591 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = GAUDI2_EVENT_TPC19_QM,
592 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = GAUDI2_EVENT_TPC19_QM,
593 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = GAUDI2_EVENT_TPC19_QM,
594 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = GAUDI2_EVENT_TPC19_QM,
595 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = GAUDI2_EVENT_TPC20_QM,
596 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = GAUDI2_EVENT_TPC20_QM,
597 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = GAUDI2_EVENT_TPC20_QM,
598 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = GAUDI2_EVENT_TPC20_QM,
599 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = GAUDI2_EVENT_TPC21_QM,
600 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = GAUDI2_EVENT_TPC21_QM,
601 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = GAUDI2_EVENT_TPC21_QM,
602 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = GAUDI2_EVENT_TPC21_QM,
603 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = GAUDI2_EVENT_TPC22_QM,
604 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = GAUDI2_EVENT_TPC22_QM,
605 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = GAUDI2_EVENT_TPC22_QM,
606 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = GAUDI2_EVENT_TPC22_QM,
607 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = GAUDI2_EVENT_TPC23_QM,
608 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = GAUDI2_EVENT_TPC23_QM,
609 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = GAUDI2_EVENT_TPC23_QM,
610 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = GAUDI2_EVENT_TPC23_QM,
611 [GAUDI2_QUEUE_ID_NIC_0_0] = GAUDI2_EVENT_NIC0_QM0,
612 [GAUDI2_QUEUE_ID_NIC_0_1] = GAUDI2_EVENT_NIC0_QM0,
613 [GAUDI2_QUEUE_ID_NIC_0_2] = GAUDI2_EVENT_NIC0_QM0,
614 [GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_EVENT_NIC0_QM0,
615 [GAUDI2_QUEUE_ID_NIC_1_0] = GAUDI2_EVENT_NIC0_QM1,
616 [GAUDI2_QUEUE_ID_NIC_1_1] = GAUDI2_EVENT_NIC0_QM1,
617 [GAUDI2_QUEUE_ID_NIC_1_2] = GAUDI2_EVENT_NIC0_QM1,
618 [GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_EVENT_NIC0_QM1,
619 [GAUDI2_QUEUE_ID_NIC_2_0] = GAUDI2_EVENT_NIC1_QM0,
620 [GAUDI2_QUEUE_ID_NIC_2_1] = GAUDI2_EVENT_NIC1_QM0,
621 [GAUDI2_QUEUE_ID_NIC_2_2] = GAUDI2_EVENT_NIC1_QM0,
622 [GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_EVENT_NIC1_QM0,
623 [GAUDI2_QUEUE_ID_NIC_3_0] = GAUDI2_EVENT_NIC1_QM1,
624 [GAUDI2_QUEUE_ID_NIC_3_1] = GAUDI2_EVENT_NIC1_QM1,
625 [GAUDI2_QUEUE_ID_NIC_3_2] = GAUDI2_EVENT_NIC1_QM1,
626 [GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_EVENT_NIC1_QM1,
627 [GAUDI2_QUEUE_ID_NIC_4_0] = GAUDI2_EVENT_NIC2_QM0,
628 [GAUDI2_QUEUE_ID_NIC_4_1] = GAUDI2_EVENT_NIC2_QM0,
629 [GAUDI2_QUEUE_ID_NIC_4_2] = GAUDI2_EVENT_NIC2_QM0,
630 [GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_EVENT_NIC2_QM0,
631 [GAUDI2_QUEUE_ID_NIC_5_0] = GAUDI2_EVENT_NIC2_QM1,
632 [GAUDI2_QUEUE_ID_NIC_5_1] = GAUDI2_EVENT_NIC2_QM1,
633 [GAUDI2_QUEUE_ID_NIC_5_2] = GAUDI2_EVENT_NIC2_QM1,
634 [GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_EVENT_NIC2_QM1,
635 [GAUDI2_QUEUE_ID_NIC_6_0] = GAUDI2_EVENT_NIC3_QM0,
636 [GAUDI2_QUEUE_ID_NIC_6_1] = GAUDI2_EVENT_NIC3_QM0,
637 [GAUDI2_QUEUE_ID_NIC_6_2] = GAUDI2_EVENT_NIC3_QM0,
638 [GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_EVENT_NIC3_QM0,
639 [GAUDI2_QUEUE_ID_NIC_7_0] = GAUDI2_EVENT_NIC3_QM1,
640 [GAUDI2_QUEUE_ID_NIC_7_1] = GAUDI2_EVENT_NIC3_QM1,
641 [GAUDI2_QUEUE_ID_NIC_7_2] = GAUDI2_EVENT_NIC3_QM1,
642 [GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_EVENT_NIC3_QM1,
643 [GAUDI2_QUEUE_ID_NIC_8_0] = GAUDI2_EVENT_NIC4_QM0,
644 [GAUDI2_QUEUE_ID_NIC_8_1] = GAUDI2_EVENT_NIC4_QM0,
645 [GAUDI2_QUEUE_ID_NIC_8_2] = GAUDI2_EVENT_NIC4_QM0,
646 [GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_EVENT_NIC4_QM0,
647 [GAUDI2_QUEUE_ID_NIC_9_0] = GAUDI2_EVENT_NIC4_QM1,
648 [GAUDI2_QUEUE_ID_NIC_9_1] = GAUDI2_EVENT_NIC4_QM1,
649 [GAUDI2_QUEUE_ID_NIC_9_2] = GAUDI2_EVENT_NIC4_QM1,
650 [GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_EVENT_NIC4_QM1,
651 [GAUDI2_QUEUE_ID_NIC_10_0] = GAUDI2_EVENT_NIC5_QM0,
652 [GAUDI2_QUEUE_ID_NIC_10_1] = GAUDI2_EVENT_NIC5_QM0,
653 [GAUDI2_QUEUE_ID_NIC_10_2] = GAUDI2_EVENT_NIC5_QM0,
654 [GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_EVENT_NIC5_QM0,
655 [GAUDI2_QUEUE_ID_NIC_11_0] = GAUDI2_EVENT_NIC5_QM1,
656 [GAUDI2_QUEUE_ID_NIC_11_1] = GAUDI2_EVENT_NIC5_QM1,
657 [GAUDI2_QUEUE_ID_NIC_11_2] = GAUDI2_EVENT_NIC5_QM1,
658 [GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_EVENT_NIC5_QM1,
659 [GAUDI2_QUEUE_ID_NIC_12_0] = GAUDI2_EVENT_NIC6_QM0,
660 [GAUDI2_QUEUE_ID_NIC_12_1] = GAUDI2_EVENT_NIC6_QM0,
661 [GAUDI2_QUEUE_ID_NIC_12_2] = GAUDI2_EVENT_NIC6_QM0,
662 [GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_EVENT_NIC6_QM0,
663 [GAUDI2_QUEUE_ID_NIC_13_0] = GAUDI2_EVENT_NIC6_QM1,
664 [GAUDI2_QUEUE_ID_NIC_13_1] = GAUDI2_EVENT_NIC6_QM1,
665 [GAUDI2_QUEUE_ID_NIC_13_2] = GAUDI2_EVENT_NIC6_QM1,
666 [GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_EVENT_NIC6_QM1,
667 [GAUDI2_QUEUE_ID_NIC_14_0] = GAUDI2_EVENT_NIC7_QM0,
668 [GAUDI2_QUEUE_ID_NIC_14_1] = GAUDI2_EVENT_NIC7_QM0,
669 [GAUDI2_QUEUE_ID_NIC_14_2] = GAUDI2_EVENT_NIC7_QM0,
670 [GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_EVENT_NIC7_QM0,
671 [GAUDI2_QUEUE_ID_NIC_15_0] = GAUDI2_EVENT_NIC7_QM1,
672 [GAUDI2_QUEUE_ID_NIC_15_1] = GAUDI2_EVENT_NIC7_QM1,
673 [GAUDI2_QUEUE_ID_NIC_15_2] = GAUDI2_EVENT_NIC7_QM1,
674 [GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_EVENT_NIC7_QM1,
675 [GAUDI2_QUEUE_ID_NIC_16_0] = GAUDI2_EVENT_NIC8_QM0,
676 [GAUDI2_QUEUE_ID_NIC_16_1] = GAUDI2_EVENT_NIC8_QM0,
677 [GAUDI2_QUEUE_ID_NIC_16_2] = GAUDI2_EVENT_NIC8_QM0,
678 [GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_EVENT_NIC8_QM0,
679 [GAUDI2_QUEUE_ID_NIC_17_0] = GAUDI2_EVENT_NIC8_QM1,
680 [GAUDI2_QUEUE_ID_NIC_17_1] = GAUDI2_EVENT_NIC8_QM1,
681 [GAUDI2_QUEUE_ID_NIC_17_2] = GAUDI2_EVENT_NIC8_QM1,
682 [GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_EVENT_NIC8_QM1,
683 [GAUDI2_QUEUE_ID_NIC_18_0] = GAUDI2_EVENT_NIC9_QM0,
684 [GAUDI2_QUEUE_ID_NIC_18_1] = GAUDI2_EVENT_NIC9_QM0,
685 [GAUDI2_QUEUE_ID_NIC_18_2] = GAUDI2_EVENT_NIC9_QM0,
686 [GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_EVENT_NIC9_QM0,
687 [GAUDI2_QUEUE_ID_NIC_19_0] = GAUDI2_EVENT_NIC9_QM1,
688 [GAUDI2_QUEUE_ID_NIC_19_1] = GAUDI2_EVENT_NIC9_QM1,
689 [GAUDI2_QUEUE_ID_NIC_19_2] = GAUDI2_EVENT_NIC9_QM1,
690 [GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_EVENT_NIC9_QM1,
691 [GAUDI2_QUEUE_ID_NIC_20_0] = GAUDI2_EVENT_NIC10_QM0,
692 [GAUDI2_QUEUE_ID_NIC_20_1] = GAUDI2_EVENT_NIC10_QM0,
693 [GAUDI2_QUEUE_ID_NIC_20_2] = GAUDI2_EVENT_NIC10_QM0,
694 [GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_EVENT_NIC10_QM0,
695 [GAUDI2_QUEUE_ID_NIC_21_0] = GAUDI2_EVENT_NIC10_QM1,
696 [GAUDI2_QUEUE_ID_NIC_21_1] = GAUDI2_EVENT_NIC10_QM1,
697 [GAUDI2_QUEUE_ID_NIC_21_2] = GAUDI2_EVENT_NIC10_QM1,
698 [GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_EVENT_NIC10_QM1,
699 [GAUDI2_QUEUE_ID_NIC_22_0] = GAUDI2_EVENT_NIC11_QM0,
700 [GAUDI2_QUEUE_ID_NIC_22_1] = GAUDI2_EVENT_NIC11_QM0,
701 [GAUDI2_QUEUE_ID_NIC_22_2] = GAUDI2_EVENT_NIC11_QM0,
702 [GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_EVENT_NIC11_QM0,
703 [GAUDI2_QUEUE_ID_NIC_23_0] = GAUDI2_EVENT_NIC11_QM1,
704 [GAUDI2_QUEUE_ID_NIC_23_1] = GAUDI2_EVENT_NIC11_QM1,
705 [GAUDI2_QUEUE_ID_NIC_23_2] = GAUDI2_EVENT_NIC11_QM1,
706 [GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_EVENT_NIC11_QM1,
707 [GAUDI2_QUEUE_ID_ROT_0_0] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
708 [GAUDI2_QUEUE_ID_ROT_0_1] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
709 [GAUDI2_QUEUE_ID_ROT_0_2] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
710 [GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
711 [GAUDI2_QUEUE_ID_ROT_1_0] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
712 [GAUDI2_QUEUE_ID_ROT_1_1] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
713 [GAUDI2_QUEUE_ID_ROT_1_2] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
714 [GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_EVENT_ROTATOR1_ROT1_QM
717 static const int gaudi2_dma_core_async_event_id[] = {
718 [DMA_CORE_ID_EDMA0] = GAUDI2_EVENT_HDMA0_CORE,
719 [DMA_CORE_ID_EDMA1] = GAUDI2_EVENT_HDMA1_CORE,
720 [DMA_CORE_ID_EDMA2] = GAUDI2_EVENT_HDMA2_CORE,
721 [DMA_CORE_ID_EDMA3] = GAUDI2_EVENT_HDMA3_CORE,
722 [DMA_CORE_ID_EDMA4] = GAUDI2_EVENT_HDMA4_CORE,
723 [DMA_CORE_ID_EDMA5] = GAUDI2_EVENT_HDMA5_CORE,
724 [DMA_CORE_ID_EDMA6] = GAUDI2_EVENT_HDMA6_CORE,
725 [DMA_CORE_ID_EDMA7] = GAUDI2_EVENT_HDMA7_CORE,
726 [DMA_CORE_ID_PDMA0] = GAUDI2_EVENT_PDMA0_CORE,
727 [DMA_CORE_ID_PDMA1] = GAUDI2_EVENT_PDMA1_CORE,
728 [DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE,
731 static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = {
732 "qman sei intr",
733 "arc sei intr"
736 static const char * const gaudi2_cpu_sei_error_cause[GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE] = {
737 "AXI_TERMINATOR WR",
738 "AXI_TERMINATOR RD",
739 "AXI SPLIT SEI Status"
742 static const char * const gaudi2_arc_sei_error_cause[GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE] = {
743 "cbu_bresp_sei_intr_cause",
744 "cbu_rresp_sei_intr_cause",
745 "lbu_bresp_sei_intr_cause",
746 "lbu_rresp_sei_intr_cause",
747 "cbu_axi_split_intr_cause",
748 "lbu_axi_split_intr_cause",
749 "arc_ip_excptn_sei_intr_cause",
750 "dmi_bresp_sei_intr_cause",
751 "aux2apb_err_sei_intr_cause",
752 "cfg_lbw_wr_terminated_intr_cause",
753 "cfg_lbw_rd_terminated_intr_cause",
754 "cfg_dccm_wr_terminated_intr_cause",
755 "cfg_dccm_rd_terminated_intr_cause",
756 "cfg_hbw_rd_terminated_intr_cause"
759 static const char * const gaudi2_dec_error_cause[GAUDI2_NUM_OF_DEC_ERR_CAUSE] = {
760 "msix_vcd_hbw_sei",
761 "msix_l2c_hbw_sei",
762 "msix_nrm_hbw_sei",
763 "msix_abnrm_hbw_sei",
764 "msix_vcd_lbw_sei",
765 "msix_l2c_lbw_sei",
766 "msix_nrm_lbw_sei",
767 "msix_abnrm_lbw_sei",
768 "apb_vcd_lbw_sei",
769 "apb_l2c_lbw_sei",
770 "apb_nrm_lbw_sei",
771 "apb_abnrm_lbw_sei",
772 "dec_sei",
773 "dec_apb_sei",
774 "trc_apb_sei",
775 "lbw_mstr_if_sei",
776 "axi_split_bresp_err_sei",
777 "hbw_axi_wr_viol_sei",
778 "hbw_axi_rd_viol_sei",
779 "lbw_axi_wr_viol_sei",
780 "lbw_axi_rd_viol_sei",
781 "vcd_spi",
782 "l2c_spi",
783 "nrm_spi",
784 "abnrm_spi",
787 static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = {
788 "PQ AXI HBW error",
789 "CQ AXI HBW error",
790 "CP AXI HBW error",
791 "CP error due to undefined OPCODE",
792 "CP encountered STOP OPCODE",
793 "CP AXI LBW error",
794 "CP WRREG32 or WRBULK returned error",
795 "N/A",
796 "FENCE 0 inc over max value and clipped",
797 "FENCE 1 inc over max value and clipped",
798 "FENCE 2 inc over max value and clipped",
799 "FENCE 3 inc over max value and clipped",
800 "FENCE 0 dec under min value and clipped",
801 "FENCE 1 dec under min value and clipped",
802 "FENCE 2 dec under min value and clipped",
803 "FENCE 3 dec under min value and clipped",
804 "CPDMA Up overflow",
805 "PQC L2H error"
808 static const char * const gaudi2_lower_qman_error_cause[GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE] = {
809 "RSVD0",
810 "CQ AXI HBW error",
811 "CP AXI HBW error",
812 "CP error due to undefined OPCODE",
813 "CP encountered STOP OPCODE",
814 "CP AXI LBW error",
815 "CP WRREG32 or WRBULK returned error",
816 "N/A",
817 "FENCE 0 inc over max value and clipped",
818 "FENCE 1 inc over max value and clipped",
819 "FENCE 2 inc over max value and clipped",
820 "FENCE 3 inc over max value and clipped",
821 "FENCE 0 dec under min value and clipped",
822 "FENCE 1 dec under min value and clipped",
823 "FENCE 2 dec under min value and clipped",
824 "FENCE 3 dec under min value and clipped",
825 "CPDMA Up overflow",
826 "RSVD17",
827 "CQ_WR_IFIFO_CI_ERR",
828 "CQ_WR_CTL_CI_ERR",
829 "ARC_CQF_RD_ERR",
830 "ARC_CQ_WR_IFIFO_CI_ERR",
831 "ARC_CQ_WR_CTL_CI_ERR",
832 "ARC_AXI_ERR",
833 "CP_SWITCH_WDT_ERR"
836 static const char * const gaudi2_qman_arb_error_cause[GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE] = {
837 "Choice push while full error",
838 "Choice Q watchdog error",
839 "MSG AXI LBW returned with error"
842 static const char * const guadi2_rot_error_cause[GAUDI2_NUM_OF_ROT_ERR_CAUSE] = {
843 "qm_axi_err",
844 "qm_trace_fence_events",
845 "qm_sw_err",
846 "qm_cp_sw_stop",
847 "lbw_mstr_rresp_err",
848 "lbw_mstr_bresp_err",
849 "lbw_msg_slverr",
850 "hbw_msg_slverr",
851 "wbc_slverr",
852 "hbw_mstr_rresp_err",
853 "hbw_mstr_bresp_err",
854 "sb_resp_intr",
855 "mrsb_resp_intr",
856 "core_dw_status_0",
857 "core_dw_status_1",
858 "core_dw_status_2",
859 "core_dw_status_3",
860 "core_dw_status_4",
861 "core_dw_status_5",
862 "core_dw_status_6",
863 "core_dw_status_7",
864 "async_arc2cpu_sei_intr",
867 static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAUSE] = {
868 "tpc_address_exceed_slm",
869 "tpc_div_by_0",
870 "tpc_spu_mac_overflow",
871 "tpc_spu_addsub_overflow",
872 "tpc_spu_abs_overflow",
873 "tpc_spu_fma_fp_dst_nan",
874 "tpc_spu_fma_fp_dst_inf",
875 "tpc_spu_convert_fp_dst_nan",
876 "tpc_spu_convert_fp_dst_inf",
877 "tpc_spu_fp_dst_denorm",
878 "tpc_vpu_mac_overflow",
879 "tpc_vpu_addsub_overflow",
880 "tpc_vpu_abs_overflow",
881 "tpc_vpu_convert_fp_dst_nan",
882 "tpc_vpu_convert_fp_dst_inf",
883 "tpc_vpu_fma_fp_dst_nan",
884 "tpc_vpu_fma_fp_dst_inf",
885 "tpc_vpu_fp_dst_denorm",
886 "tpc_assertions",
887 "tpc_illegal_instruction",
888 "tpc_pc_wrap_around",
889 "tpc_qm_sw_err",
890 "tpc_hbw_rresp_err",
891 "tpc_hbw_bresp_err",
892 "tpc_lbw_rresp_err",
893 "tpc_lbw_bresp_err",
894 "st_unlock_already_locked",
895 "invalid_lock_access",
896 "LD_L protection violation",
897 "ST_L protection violation",
898 "D$ L0CS mismatch",
901 static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = {
902 "agu_resp_intr",
903 "qman_axi_err",
904 "wap sei (wbc axi err)",
905 "arc sei",
906 "cfg access error",
907 "qm_sw_err",
908 "sbte_dbg_intr_0",
909 "sbte_dbg_intr_1",
910 "sbte_dbg_intr_2",
911 "sbte_dbg_intr_3",
912 "sbte_dbg_intr_4",
913 "sbte_prtn_intr_0",
914 "sbte_prtn_intr_1",
915 "sbte_prtn_intr_2",
916 "sbte_prtn_intr_3",
917 "sbte_prtn_intr_4",
920 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
921 "WBC ERR RESP_0",
922 "WBC ERR RESP_1",
923 "AP SOURCE POS INF",
924 "AP SOURCE NEG INF",
925 "AP SOURCE NAN",
926 "AP RESULT POS INF",
927 "AP RESULT NEG INF",
930 static const char * const gaudi2_dma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
931 "HBW Read returned with error RRESP",
932 "HBW write returned with error BRESP",
933 "LBW write returned with error BRESP",
934 "descriptor_fifo_overflow",
935 "KDMA SB LBW Read returned with error",
936 "KDMA WBC LBW Write returned with error",
937 "TRANSPOSE ENGINE DESC FIFO OVERFLOW",
938 "WRONG CFG FOR COMMIT IN LIN DMA"
941 static const char * const gaudi2_kdma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
942 "HBW/LBW Read returned with error RRESP",
943 "HBW/LBW write returned with error BRESP",
944 "LBW write returned with error BRESP",
945 "descriptor_fifo_overflow",
946 "KDMA SB LBW Read returned with error",
947 "KDMA WBC LBW Write returned with error",
948 "TRANSPOSE ENGINE DESC FIFO OVERFLOW",
949 "WRONG CFG FOR COMMIT IN LIN DMA"
952 struct gaudi2_sm_sei_cause_data {
953 const char *cause_name;
954 const char *log_name;
957 static const struct gaudi2_sm_sei_cause_data
958 gaudi2_sm_sei_cause[GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE] = {
959 {"calculated SO value overflow/underflow", "SOB ID"},
960 {"payload address of monitor is not aligned to 4B", "monitor addr"},
961 {"armed monitor write got BRESP (SLVERR or DECERR)", "AXI id"},
964 static const char * const
965 gaudi2_pmmu_fatal_interrupts_cause[GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE] = {
966 "LATENCY_RD_OUT_FIFO_OVERRUN",
967 "LATENCY_WR_OUT_FIFO_OVERRUN",
970 static const char * const
971 gaudi2_hif_fatal_interrupts_cause[GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE] = {
972 "LATENCY_RD_OUT_FIFO_OVERRUN",
973 "LATENCY_WR_OUT_FIFO_OVERRUN",
976 static const char * const
977 gaudi2_psoc_axi_drain_interrupts_cause[GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE] = {
978 "AXI drain HBW",
979 "AXI drain LBW",
982 static const char * const
983 gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = {
984 "HBW error response",
985 "LBW error response",
986 "TLP is blocked by RR"
989 static const int gaudi2_queue_id_to_engine_id[] = {
990 [GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_ENGINE_ID_PDMA_0,
991 [GAUDI2_QUEUE_ID_PDMA_1_0...GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_ENGINE_ID_PDMA_1,
992 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] =
993 GAUDI2_DCORE0_ENGINE_ID_EDMA_0,
994 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] =
995 GAUDI2_DCORE0_ENGINE_ID_EDMA_1,
996 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] =
997 GAUDI2_DCORE1_ENGINE_ID_EDMA_0,
998 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] =
999 GAUDI2_DCORE1_ENGINE_ID_EDMA_1,
1000 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] =
1001 GAUDI2_DCORE2_ENGINE_ID_EDMA_0,
1002 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] =
1003 GAUDI2_DCORE2_ENGINE_ID_EDMA_1,
1004 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] =
1005 GAUDI2_DCORE3_ENGINE_ID_EDMA_0,
1006 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] =
1007 GAUDI2_DCORE3_ENGINE_ID_EDMA_1,
1008 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3] =
1009 GAUDI2_DCORE0_ENGINE_ID_MME,
1010 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3] =
1011 GAUDI2_DCORE1_ENGINE_ID_MME,
1012 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3] =
1013 GAUDI2_DCORE2_ENGINE_ID_MME,
1014 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3] =
1015 GAUDI2_DCORE3_ENGINE_ID_MME,
1016 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0...GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] =
1017 GAUDI2_DCORE0_ENGINE_ID_TPC_0,
1018 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0...GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] =
1019 GAUDI2_DCORE0_ENGINE_ID_TPC_1,
1020 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0...GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] =
1021 GAUDI2_DCORE0_ENGINE_ID_TPC_2,
1022 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0...GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] =
1023 GAUDI2_DCORE0_ENGINE_ID_TPC_3,
1024 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0...GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] =
1025 GAUDI2_DCORE0_ENGINE_ID_TPC_4,
1026 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0...GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] =
1027 GAUDI2_DCORE0_ENGINE_ID_TPC_5,
1028 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0...GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] =
1029 GAUDI2_DCORE0_ENGINE_ID_TPC_6,
1030 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0...GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] =
1031 GAUDI2_DCORE1_ENGINE_ID_TPC_0,
1032 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0...GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] =
1033 GAUDI2_DCORE1_ENGINE_ID_TPC_1,
1034 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0...GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] =
1035 GAUDI2_DCORE1_ENGINE_ID_TPC_2,
1036 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0...GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] =
1037 GAUDI2_DCORE1_ENGINE_ID_TPC_3,
1038 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0...GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] =
1039 GAUDI2_DCORE1_ENGINE_ID_TPC_4,
1040 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0...GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] =
1041 GAUDI2_DCORE1_ENGINE_ID_TPC_5,
1042 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0...GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] =
1043 GAUDI2_DCORE2_ENGINE_ID_TPC_0,
1044 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0...GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] =
1045 GAUDI2_DCORE2_ENGINE_ID_TPC_1,
1046 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0...GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] =
1047 GAUDI2_DCORE2_ENGINE_ID_TPC_2,
1048 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0...GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] =
1049 GAUDI2_DCORE2_ENGINE_ID_TPC_3,
1050 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0...GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] =
1051 GAUDI2_DCORE2_ENGINE_ID_TPC_4,
1052 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0...GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] =
1053 GAUDI2_DCORE2_ENGINE_ID_TPC_5,
1054 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0...GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] =
1055 GAUDI2_DCORE3_ENGINE_ID_TPC_0,
1056 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0...GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] =
1057 GAUDI2_DCORE3_ENGINE_ID_TPC_1,
1058 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0...GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] =
1059 GAUDI2_DCORE3_ENGINE_ID_TPC_2,
1060 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0...GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] =
1061 GAUDI2_DCORE3_ENGINE_ID_TPC_3,
1062 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0...GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] =
1063 GAUDI2_DCORE3_ENGINE_ID_TPC_4,
1064 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0...GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] =
1065 GAUDI2_DCORE3_ENGINE_ID_TPC_5,
1066 [GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_ENGINE_ID_NIC0_0,
1067 [GAUDI2_QUEUE_ID_NIC_1_0...GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_ENGINE_ID_NIC0_1,
1068 [GAUDI2_QUEUE_ID_NIC_2_0...GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_ENGINE_ID_NIC1_0,
1069 [GAUDI2_QUEUE_ID_NIC_3_0...GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_ENGINE_ID_NIC1_1,
1070 [GAUDI2_QUEUE_ID_NIC_4_0...GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_ENGINE_ID_NIC2_0,
1071 [GAUDI2_QUEUE_ID_NIC_5_0...GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_ENGINE_ID_NIC2_1,
1072 [GAUDI2_QUEUE_ID_NIC_6_0...GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_ENGINE_ID_NIC3_0,
1073 [GAUDI2_QUEUE_ID_NIC_7_0...GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_ENGINE_ID_NIC3_1,
1074 [GAUDI2_QUEUE_ID_NIC_8_0...GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_ENGINE_ID_NIC4_0,
1075 [GAUDI2_QUEUE_ID_NIC_9_0...GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_ENGINE_ID_NIC4_1,
1076 [GAUDI2_QUEUE_ID_NIC_10_0...GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_ENGINE_ID_NIC5_0,
1077 [GAUDI2_QUEUE_ID_NIC_11_0...GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_ENGINE_ID_NIC5_1,
1078 [GAUDI2_QUEUE_ID_NIC_12_0...GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_ENGINE_ID_NIC6_0,
1079 [GAUDI2_QUEUE_ID_NIC_13_0...GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_ENGINE_ID_NIC6_1,
1080 [GAUDI2_QUEUE_ID_NIC_14_0...GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_ENGINE_ID_NIC7_0,
1081 [GAUDI2_QUEUE_ID_NIC_15_0...GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_ENGINE_ID_NIC7_1,
1082 [GAUDI2_QUEUE_ID_NIC_16_0...GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_ENGINE_ID_NIC8_0,
1083 [GAUDI2_QUEUE_ID_NIC_17_0...GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_ENGINE_ID_NIC8_1,
1084 [GAUDI2_QUEUE_ID_NIC_18_0...GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_ENGINE_ID_NIC9_0,
1085 [GAUDI2_QUEUE_ID_NIC_19_0...GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_ENGINE_ID_NIC9_1,
1086 [GAUDI2_QUEUE_ID_NIC_20_0...GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_ENGINE_ID_NIC10_0,
1087 [GAUDI2_QUEUE_ID_NIC_21_0...GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_ENGINE_ID_NIC10_1,
1088 [GAUDI2_QUEUE_ID_NIC_22_0...GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_ENGINE_ID_NIC11_0,
1089 [GAUDI2_QUEUE_ID_NIC_23_0...GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_ENGINE_ID_NIC11_1,
1090 [GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_ENGINE_ID_ROT_0,
1091 [GAUDI2_QUEUE_ID_ROT_1_0...GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_ENGINE_ID_ROT_1,
1094 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = {
1095 [GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE,
1096 [GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE,
1097 [GAUDI2_QUEUE_ID_PDMA_0_2] = mmPDMA0_QM_BASE,
1098 [GAUDI2_QUEUE_ID_PDMA_0_3] = mmPDMA0_QM_BASE,
1099 [GAUDI2_QUEUE_ID_PDMA_1_0] = mmPDMA1_QM_BASE,
1100 [GAUDI2_QUEUE_ID_PDMA_1_1] = mmPDMA1_QM_BASE,
1101 [GAUDI2_QUEUE_ID_PDMA_1_2] = mmPDMA1_QM_BASE,
1102 [GAUDI2_QUEUE_ID_PDMA_1_3] = mmPDMA1_QM_BASE,
1103 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = mmDCORE0_EDMA0_QM_BASE,
1104 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = mmDCORE0_EDMA0_QM_BASE,
1105 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = mmDCORE0_EDMA0_QM_BASE,
1106 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = mmDCORE0_EDMA0_QM_BASE,
1107 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = mmDCORE0_EDMA1_QM_BASE,
1108 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = mmDCORE0_EDMA1_QM_BASE,
1109 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = mmDCORE0_EDMA1_QM_BASE,
1110 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = mmDCORE0_EDMA1_QM_BASE,
1111 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = mmDCORE0_MME_QM_BASE,
1112 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = mmDCORE0_MME_QM_BASE,
1113 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = mmDCORE0_MME_QM_BASE,
1114 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = mmDCORE0_MME_QM_BASE,
1115 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = mmDCORE0_TPC0_QM_BASE,
1116 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = mmDCORE0_TPC0_QM_BASE,
1117 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = mmDCORE0_TPC0_QM_BASE,
1118 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = mmDCORE0_TPC0_QM_BASE,
1119 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = mmDCORE0_TPC1_QM_BASE,
1120 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = mmDCORE0_TPC1_QM_BASE,
1121 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = mmDCORE0_TPC1_QM_BASE,
1122 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = mmDCORE0_TPC1_QM_BASE,
1123 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = mmDCORE0_TPC2_QM_BASE,
1124 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = mmDCORE0_TPC2_QM_BASE,
1125 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = mmDCORE0_TPC2_QM_BASE,
1126 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = mmDCORE0_TPC2_QM_BASE,
1127 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = mmDCORE0_TPC3_QM_BASE,
1128 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = mmDCORE0_TPC3_QM_BASE,
1129 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = mmDCORE0_TPC3_QM_BASE,
1130 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = mmDCORE0_TPC3_QM_BASE,
1131 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = mmDCORE0_TPC4_QM_BASE,
1132 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = mmDCORE0_TPC4_QM_BASE,
1133 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = mmDCORE0_TPC4_QM_BASE,
1134 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = mmDCORE0_TPC4_QM_BASE,
1135 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = mmDCORE0_TPC5_QM_BASE,
1136 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = mmDCORE0_TPC5_QM_BASE,
1137 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = mmDCORE0_TPC5_QM_BASE,
1138 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = mmDCORE0_TPC5_QM_BASE,
1139 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = mmDCORE0_TPC6_QM_BASE,
1140 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = mmDCORE0_TPC6_QM_BASE,
1141 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = mmDCORE0_TPC6_QM_BASE,
1142 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = mmDCORE0_TPC6_QM_BASE,
1143 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = mmDCORE1_EDMA0_QM_BASE,
1144 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = mmDCORE1_EDMA0_QM_BASE,
1145 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = mmDCORE1_EDMA0_QM_BASE,
1146 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = mmDCORE1_EDMA0_QM_BASE,
1147 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = mmDCORE1_EDMA1_QM_BASE,
1148 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = mmDCORE1_EDMA1_QM_BASE,
1149 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = mmDCORE1_EDMA1_QM_BASE,
1150 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = mmDCORE1_EDMA1_QM_BASE,
1151 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = mmDCORE1_MME_QM_BASE,
1152 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = mmDCORE1_MME_QM_BASE,
1153 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = mmDCORE1_MME_QM_BASE,
1154 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = mmDCORE1_MME_QM_BASE,
1155 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = mmDCORE1_TPC0_QM_BASE,
1156 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = mmDCORE1_TPC0_QM_BASE,
1157 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = mmDCORE1_TPC0_QM_BASE,
1158 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = mmDCORE1_TPC0_QM_BASE,
1159 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = mmDCORE1_TPC1_QM_BASE,
1160 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = mmDCORE1_TPC1_QM_BASE,
1161 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = mmDCORE1_TPC1_QM_BASE,
1162 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = mmDCORE1_TPC1_QM_BASE,
1163 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = mmDCORE1_TPC2_QM_BASE,
1164 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = mmDCORE1_TPC2_QM_BASE,
1165 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = mmDCORE1_TPC2_QM_BASE,
1166 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = mmDCORE1_TPC2_QM_BASE,
1167 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = mmDCORE1_TPC3_QM_BASE,
1168 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = mmDCORE1_TPC3_QM_BASE,
1169 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = mmDCORE1_TPC3_QM_BASE,
1170 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = mmDCORE1_TPC3_QM_BASE,
1171 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = mmDCORE1_TPC4_QM_BASE,
1172 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = mmDCORE1_TPC4_QM_BASE,
1173 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = mmDCORE1_TPC4_QM_BASE,
1174 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = mmDCORE1_TPC4_QM_BASE,
1175 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = mmDCORE1_TPC5_QM_BASE,
1176 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = mmDCORE1_TPC5_QM_BASE,
1177 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = mmDCORE1_TPC5_QM_BASE,
1178 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = mmDCORE1_TPC5_QM_BASE,
1179 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = mmDCORE2_EDMA0_QM_BASE,
1180 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = mmDCORE2_EDMA0_QM_BASE,
1181 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = mmDCORE2_EDMA0_QM_BASE,
1182 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = mmDCORE2_EDMA0_QM_BASE,
1183 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = mmDCORE2_EDMA1_QM_BASE,
1184 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = mmDCORE2_EDMA1_QM_BASE,
1185 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = mmDCORE2_EDMA1_QM_BASE,
1186 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = mmDCORE2_EDMA1_QM_BASE,
1187 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = mmDCORE2_MME_QM_BASE,
1188 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = mmDCORE2_MME_QM_BASE,
1189 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = mmDCORE2_MME_QM_BASE,
1190 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = mmDCORE2_MME_QM_BASE,
1191 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = mmDCORE2_TPC0_QM_BASE,
1192 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = mmDCORE2_TPC0_QM_BASE,
1193 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = mmDCORE2_TPC0_QM_BASE,
1194 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = mmDCORE2_TPC0_QM_BASE,
1195 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = mmDCORE2_TPC1_QM_BASE,
1196 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = mmDCORE2_TPC1_QM_BASE,
1197 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = mmDCORE2_TPC1_QM_BASE,
1198 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = mmDCORE2_TPC1_QM_BASE,
1199 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = mmDCORE2_TPC2_QM_BASE,
1200 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = mmDCORE2_TPC2_QM_BASE,
1201 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = mmDCORE2_TPC2_QM_BASE,
1202 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = mmDCORE2_TPC2_QM_BASE,
1203 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = mmDCORE2_TPC3_QM_BASE,
1204 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = mmDCORE2_TPC3_QM_BASE,
1205 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = mmDCORE2_TPC3_QM_BASE,
1206 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = mmDCORE2_TPC3_QM_BASE,
1207 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = mmDCORE2_TPC4_QM_BASE,
1208 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = mmDCORE2_TPC4_QM_BASE,
1209 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = mmDCORE2_TPC4_QM_BASE,
1210 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = mmDCORE2_TPC4_QM_BASE,
1211 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = mmDCORE2_TPC5_QM_BASE,
1212 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = mmDCORE2_TPC5_QM_BASE,
1213 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = mmDCORE2_TPC5_QM_BASE,
1214 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = mmDCORE2_TPC5_QM_BASE,
1215 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = mmDCORE3_EDMA0_QM_BASE,
1216 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = mmDCORE3_EDMA0_QM_BASE,
1217 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = mmDCORE3_EDMA0_QM_BASE,
1218 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = mmDCORE3_EDMA0_QM_BASE,
1219 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = mmDCORE3_EDMA1_QM_BASE,
1220 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = mmDCORE3_EDMA1_QM_BASE,
1221 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = mmDCORE3_EDMA1_QM_BASE,
1222 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = mmDCORE3_EDMA1_QM_BASE,
1223 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = mmDCORE3_MME_QM_BASE,
1224 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = mmDCORE3_MME_QM_BASE,
1225 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = mmDCORE3_MME_QM_BASE,
1226 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = mmDCORE3_MME_QM_BASE,
1227 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = mmDCORE3_TPC0_QM_BASE,
1228 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = mmDCORE3_TPC0_QM_BASE,
1229 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = mmDCORE3_TPC0_QM_BASE,
1230 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = mmDCORE3_TPC0_QM_BASE,
1231 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = mmDCORE3_TPC1_QM_BASE,
1232 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = mmDCORE3_TPC1_QM_BASE,
1233 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = mmDCORE3_TPC1_QM_BASE,
1234 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = mmDCORE3_TPC1_QM_BASE,
1235 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = mmDCORE3_TPC2_QM_BASE,
1236 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = mmDCORE3_TPC2_QM_BASE,
1237 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = mmDCORE3_TPC2_QM_BASE,
1238 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = mmDCORE3_TPC2_QM_BASE,
1239 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = mmDCORE3_TPC3_QM_BASE,
1240 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = mmDCORE3_TPC3_QM_BASE,
1241 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = mmDCORE3_TPC3_QM_BASE,
1242 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = mmDCORE3_TPC3_QM_BASE,
1243 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = mmDCORE3_TPC4_QM_BASE,
1244 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = mmDCORE3_TPC4_QM_BASE,
1245 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = mmDCORE3_TPC4_QM_BASE,
1246 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = mmDCORE3_TPC4_QM_BASE,
1247 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = mmDCORE3_TPC5_QM_BASE,
1248 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = mmDCORE3_TPC5_QM_BASE,
1249 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = mmDCORE3_TPC5_QM_BASE,
1250 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = mmDCORE3_TPC5_QM_BASE,
1251 [GAUDI2_QUEUE_ID_NIC_0_0] = mmNIC0_QM0_BASE,
1252 [GAUDI2_QUEUE_ID_NIC_0_1] = mmNIC0_QM0_BASE,
1253 [GAUDI2_QUEUE_ID_NIC_0_2] = mmNIC0_QM0_BASE,
1254 [GAUDI2_QUEUE_ID_NIC_0_3] = mmNIC0_QM0_BASE,
1255 [GAUDI2_QUEUE_ID_NIC_1_0] = mmNIC0_QM1_BASE,
1256 [GAUDI2_QUEUE_ID_NIC_1_1] = mmNIC0_QM1_BASE,
1257 [GAUDI2_QUEUE_ID_NIC_1_2] = mmNIC0_QM1_BASE,
1258 [GAUDI2_QUEUE_ID_NIC_1_3] = mmNIC0_QM1_BASE,
1259 [GAUDI2_QUEUE_ID_NIC_2_0] = mmNIC1_QM0_BASE,
1260 [GAUDI2_QUEUE_ID_NIC_2_1] = mmNIC1_QM0_BASE,
1261 [GAUDI2_QUEUE_ID_NIC_2_2] = mmNIC1_QM0_BASE,
1262 [GAUDI2_QUEUE_ID_NIC_2_3] = mmNIC1_QM0_BASE,
1263 [GAUDI2_QUEUE_ID_NIC_3_0] = mmNIC1_QM1_BASE,
1264 [GAUDI2_QUEUE_ID_NIC_3_1] = mmNIC1_QM1_BASE,
1265 [GAUDI2_QUEUE_ID_NIC_3_2] = mmNIC1_QM1_BASE,
1266 [GAUDI2_QUEUE_ID_NIC_3_3] = mmNIC1_QM1_BASE,
1267 [GAUDI2_QUEUE_ID_NIC_4_0] = mmNIC2_QM0_BASE,
1268 [GAUDI2_QUEUE_ID_NIC_4_1] = mmNIC2_QM0_BASE,
1269 [GAUDI2_QUEUE_ID_NIC_4_2] = mmNIC2_QM0_BASE,
1270 [GAUDI2_QUEUE_ID_NIC_4_3] = mmNIC2_QM0_BASE,
1271 [GAUDI2_QUEUE_ID_NIC_5_0] = mmNIC2_QM1_BASE,
1272 [GAUDI2_QUEUE_ID_NIC_5_1] = mmNIC2_QM1_BASE,
1273 [GAUDI2_QUEUE_ID_NIC_5_2] = mmNIC2_QM1_BASE,
1274 [GAUDI2_QUEUE_ID_NIC_5_3] = mmNIC2_QM1_BASE,
1275 [GAUDI2_QUEUE_ID_NIC_6_0] = mmNIC3_QM0_BASE,
1276 [GAUDI2_QUEUE_ID_NIC_6_1] = mmNIC3_QM0_BASE,
1277 [GAUDI2_QUEUE_ID_NIC_6_2] = mmNIC3_QM0_BASE,
1278 [GAUDI2_QUEUE_ID_NIC_6_3] = mmNIC3_QM0_BASE,
1279 [GAUDI2_QUEUE_ID_NIC_7_0] = mmNIC3_QM1_BASE,
1280 [GAUDI2_QUEUE_ID_NIC_7_1] = mmNIC3_QM1_BASE,
1281 [GAUDI2_QUEUE_ID_NIC_7_2] = mmNIC3_QM1_BASE,
1282 [GAUDI2_QUEUE_ID_NIC_7_3] = mmNIC3_QM1_BASE,
1283 [GAUDI2_QUEUE_ID_NIC_8_0] = mmNIC4_QM0_BASE,
1284 [GAUDI2_QUEUE_ID_NIC_8_1] = mmNIC4_QM0_BASE,
1285 [GAUDI2_QUEUE_ID_NIC_8_2] = mmNIC4_QM0_BASE,
1286 [GAUDI2_QUEUE_ID_NIC_8_3] = mmNIC4_QM0_BASE,
1287 [GAUDI2_QUEUE_ID_NIC_9_0] = mmNIC4_QM1_BASE,
1288 [GAUDI2_QUEUE_ID_NIC_9_1] = mmNIC4_QM1_BASE,
1289 [GAUDI2_QUEUE_ID_NIC_9_2] = mmNIC4_QM1_BASE,
1290 [GAUDI2_QUEUE_ID_NIC_9_3] = mmNIC4_QM1_BASE,
1291 [GAUDI2_QUEUE_ID_NIC_10_0] = mmNIC5_QM0_BASE,
1292 [GAUDI2_QUEUE_ID_NIC_10_1] = mmNIC5_QM0_BASE,
1293 [GAUDI2_QUEUE_ID_NIC_10_2] = mmNIC5_QM0_BASE,
1294 [GAUDI2_QUEUE_ID_NIC_10_3] = mmNIC5_QM0_BASE,
1295 [GAUDI2_QUEUE_ID_NIC_11_0] = mmNIC5_QM1_BASE,
1296 [GAUDI2_QUEUE_ID_NIC_11_1] = mmNIC5_QM1_BASE,
1297 [GAUDI2_QUEUE_ID_NIC_11_2] = mmNIC5_QM1_BASE,
1298 [GAUDI2_QUEUE_ID_NIC_11_3] = mmNIC5_QM1_BASE,
1299 [GAUDI2_QUEUE_ID_NIC_12_0] = mmNIC6_QM0_BASE,
1300 [GAUDI2_QUEUE_ID_NIC_12_1] = mmNIC6_QM0_BASE,
1301 [GAUDI2_QUEUE_ID_NIC_12_2] = mmNIC6_QM0_BASE,
1302 [GAUDI2_QUEUE_ID_NIC_12_3] = mmNIC6_QM0_BASE,
1303 [GAUDI2_QUEUE_ID_NIC_13_0] = mmNIC6_QM1_BASE,
1304 [GAUDI2_QUEUE_ID_NIC_13_1] = mmNIC6_QM1_BASE,
1305 [GAUDI2_QUEUE_ID_NIC_13_2] = mmNIC6_QM1_BASE,
1306 [GAUDI2_QUEUE_ID_NIC_13_3] = mmNIC6_QM1_BASE,
1307 [GAUDI2_QUEUE_ID_NIC_14_0] = mmNIC7_QM0_BASE,
1308 [GAUDI2_QUEUE_ID_NIC_14_1] = mmNIC7_QM0_BASE,
1309 [GAUDI2_QUEUE_ID_NIC_14_2] = mmNIC7_QM0_BASE,
1310 [GAUDI2_QUEUE_ID_NIC_14_3] = mmNIC7_QM0_BASE,
1311 [GAUDI2_QUEUE_ID_NIC_15_0] = mmNIC7_QM1_BASE,
1312 [GAUDI2_QUEUE_ID_NIC_15_1] = mmNIC7_QM1_BASE,
1313 [GAUDI2_QUEUE_ID_NIC_15_2] = mmNIC7_QM1_BASE,
1314 [GAUDI2_QUEUE_ID_NIC_15_3] = mmNIC7_QM1_BASE,
1315 [GAUDI2_QUEUE_ID_NIC_16_0] = mmNIC8_QM0_BASE,
1316 [GAUDI2_QUEUE_ID_NIC_16_1] = mmNIC8_QM0_BASE,
1317 [GAUDI2_QUEUE_ID_NIC_16_2] = mmNIC8_QM0_BASE,
1318 [GAUDI2_QUEUE_ID_NIC_16_3] = mmNIC8_QM0_BASE,
1319 [GAUDI2_QUEUE_ID_NIC_17_0] = mmNIC8_QM1_BASE,
1320 [GAUDI2_QUEUE_ID_NIC_17_1] = mmNIC8_QM1_BASE,
1321 [GAUDI2_QUEUE_ID_NIC_17_2] = mmNIC8_QM1_BASE,
1322 [GAUDI2_QUEUE_ID_NIC_17_3] = mmNIC8_QM1_BASE,
1323 [GAUDI2_QUEUE_ID_NIC_18_0] = mmNIC9_QM0_BASE,
1324 [GAUDI2_QUEUE_ID_NIC_18_1] = mmNIC9_QM0_BASE,
1325 [GAUDI2_QUEUE_ID_NIC_18_2] = mmNIC9_QM0_BASE,
1326 [GAUDI2_QUEUE_ID_NIC_18_3] = mmNIC9_QM0_BASE,
1327 [GAUDI2_QUEUE_ID_NIC_19_0] = mmNIC9_QM1_BASE,
1328 [GAUDI2_QUEUE_ID_NIC_19_1] = mmNIC9_QM1_BASE,
1329 [GAUDI2_QUEUE_ID_NIC_19_2] = mmNIC9_QM1_BASE,
1330 [GAUDI2_QUEUE_ID_NIC_19_3] = mmNIC9_QM1_BASE,
1331 [GAUDI2_QUEUE_ID_NIC_20_0] = mmNIC10_QM0_BASE,
1332 [GAUDI2_QUEUE_ID_NIC_20_1] = mmNIC10_QM0_BASE,
1333 [GAUDI2_QUEUE_ID_NIC_20_2] = mmNIC10_QM0_BASE,
1334 [GAUDI2_QUEUE_ID_NIC_20_3] = mmNIC10_QM0_BASE,
1335 [GAUDI2_QUEUE_ID_NIC_21_0] = mmNIC10_QM1_BASE,
1336 [GAUDI2_QUEUE_ID_NIC_21_1] = mmNIC10_QM1_BASE,
1337 [GAUDI2_QUEUE_ID_NIC_21_2] = mmNIC10_QM1_BASE,
1338 [GAUDI2_QUEUE_ID_NIC_21_3] = mmNIC10_QM1_BASE,
1339 [GAUDI2_QUEUE_ID_NIC_22_0] = mmNIC11_QM0_BASE,
1340 [GAUDI2_QUEUE_ID_NIC_22_1] = mmNIC11_QM0_BASE,
1341 [GAUDI2_QUEUE_ID_NIC_22_2] = mmNIC11_QM0_BASE,
1342 [GAUDI2_QUEUE_ID_NIC_22_3] = mmNIC11_QM0_BASE,
1343 [GAUDI2_QUEUE_ID_NIC_23_0] = mmNIC11_QM1_BASE,
1344 [GAUDI2_QUEUE_ID_NIC_23_1] = mmNIC11_QM1_BASE,
1345 [GAUDI2_QUEUE_ID_NIC_23_2] = mmNIC11_QM1_BASE,
1346 [GAUDI2_QUEUE_ID_NIC_23_3] = mmNIC11_QM1_BASE,
1347 [GAUDI2_QUEUE_ID_ROT_0_0] = mmROT0_QM_BASE,
1348 [GAUDI2_QUEUE_ID_ROT_0_1] = mmROT0_QM_BASE,
1349 [GAUDI2_QUEUE_ID_ROT_0_2] = mmROT0_QM_BASE,
1350 [GAUDI2_QUEUE_ID_ROT_0_3] = mmROT0_QM_BASE,
1351 [GAUDI2_QUEUE_ID_ROT_1_0] = mmROT1_QM_BASE,
1352 [GAUDI2_QUEUE_ID_ROT_1_1] = mmROT1_QM_BASE,
1353 [GAUDI2_QUEUE_ID_ROT_1_2] = mmROT1_QM_BASE,
1354 [GAUDI2_QUEUE_ID_ROT_1_3] = mmROT1_QM_BASE
1357 static const u32 gaudi2_arc_blocks_bases[NUM_ARC_CPUS] = {
1358 [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_AUX_BASE,
1359 [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_AUX_BASE,
1360 [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_AUX_BASE,
1361 [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_AUX_BASE,
1362 [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_AUX_BASE,
1363 [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_AUX_BASE,
1364 [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_ARC_AUX_BASE,
1365 [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_ARC_AUX_BASE,
1366 [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_ARC_AUX_BASE,
1367 [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_ARC_AUX_BASE,
1368 [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_ARC_AUX_BASE,
1369 [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_ARC_AUX_BASE,
1370 [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_ARC_AUX_BASE,
1371 [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_ARC_AUX_BASE,
1372 [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_ARC_AUX_BASE,
1373 [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_ARC_AUX_BASE,
1374 [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_ARC_AUX_BASE,
1375 [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_ARC_AUX_BASE,
1376 [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_ARC_AUX_BASE,
1377 [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_ARC_AUX_BASE,
1378 [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_ARC_AUX_BASE,
1379 [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_ARC_AUX_BASE,
1380 [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_ARC_AUX_BASE,
1381 [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_ARC_AUX_BASE,
1382 [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_ARC_AUX_BASE,
1383 [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_ARC_AUX_BASE,
1384 [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_ARC_AUX_BASE,
1385 [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_ARC_AUX_BASE,
1386 [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_ARC_AUX_BASE,
1387 [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_ARC_AUX_BASE,
1388 [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_ARC_AUX_BASE,
1389 [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_AUX_BASE,
1390 [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_AUX_BASE,
1391 [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_ARC_AUX_BASE,
1392 [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_ARC_AUX_BASE,
1393 [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_ARC_AUX_BASE,
1394 [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_ARC_AUX_BASE,
1395 [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_ARC_AUX_BASE,
1396 [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_ARC_AUX_BASE,
1397 [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_ARC_AUX_BASE,
1398 [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_ARC_AUX_BASE,
1399 [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_AUX_BASE,
1400 [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_AUX_BASE,
1401 [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_AUX_BASE,
1402 [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_AUX_BASE,
1403 [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_ARC_AUX0_BASE,
1404 [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_ARC_AUX1_BASE,
1405 [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_ARC_AUX0_BASE,
1406 [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_ARC_AUX1_BASE,
1407 [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_ARC_AUX0_BASE,
1408 [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_ARC_AUX1_BASE,
1409 [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_ARC_AUX0_BASE,
1410 [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_ARC_AUX1_BASE,
1411 [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_ARC_AUX0_BASE,
1412 [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_ARC_AUX1_BASE,
1413 [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_ARC_AUX0_BASE,
1414 [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_ARC_AUX1_BASE,
1415 [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_ARC_AUX0_BASE,
1416 [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_ARC_AUX1_BASE,
1417 [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_ARC_AUX0_BASE,
1418 [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_ARC_AUX1_BASE,
1419 [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_ARC_AUX0_BASE,
1420 [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_ARC_AUX1_BASE,
1421 [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_ARC_AUX0_BASE,
1422 [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_ARC_AUX1_BASE,
1423 [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_ARC_AUX0_BASE,
1424 [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_ARC_AUX1_BASE,
1425 [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_ARC_AUX0_BASE,
1426 [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_ARC_AUX1_BASE,
1429 static const u32 gaudi2_arc_dccm_bases[NUM_ARC_CPUS] = {
1430 [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_DCCM0_BASE,
1431 [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_DCCM0_BASE,
1432 [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_DCCM0_BASE,
1433 [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_DCCM0_BASE,
1434 [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_DCCM_BASE,
1435 [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_DCCM_BASE,
1436 [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_DCCM_BASE,
1437 [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_DCCM_BASE,
1438 [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_DCCM_BASE,
1439 [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_DCCM_BASE,
1440 [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_DCCM_BASE,
1441 [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_DCCM_BASE,
1442 [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_DCCM_BASE,
1443 [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_DCCM_BASE,
1444 [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_DCCM_BASE,
1445 [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_DCCM_BASE,
1446 [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_DCCM_BASE,
1447 [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_DCCM_BASE,
1448 [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_DCCM_BASE,
1449 [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_DCCM_BASE,
1450 [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_DCCM_BASE,
1451 [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_DCCM_BASE,
1452 [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_DCCM_BASE,
1453 [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_DCCM_BASE,
1454 [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_DCCM_BASE,
1455 [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_DCCM_BASE,
1456 [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_DCCM_BASE,
1457 [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_DCCM_BASE,
1458 [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_DCCM_BASE,
1459 [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_DCCM_BASE,
1460 [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_DCCM_BASE,
1461 [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_DCCM_BASE,
1462 [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_DCCM_BASE,
1463 [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_DCCM_BASE,
1464 [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_DCCM_BASE,
1465 [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_DCCM_BASE,
1466 [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_DCCM_BASE,
1467 [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_DCCM_BASE,
1468 [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_DCCM_BASE,
1469 [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_DCCM_BASE,
1470 [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_DCCM_BASE,
1471 [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_DCCM_BASE,
1472 [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_DCCM_BASE,
1473 [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_DCCM_BASE,
1474 [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_DCCM_BASE,
1475 [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_DCCM0_BASE,
1476 [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_DCCM1_BASE,
1477 [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_DCCM0_BASE,
1478 [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_DCCM1_BASE,
1479 [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_DCCM0_BASE,
1480 [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_DCCM1_BASE,
1481 [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_DCCM0_BASE,
1482 [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_DCCM1_BASE,
1483 [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_DCCM0_BASE,
1484 [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_DCCM1_BASE,
1485 [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_DCCM0_BASE,
1486 [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_DCCM1_BASE,
1487 [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_DCCM0_BASE,
1488 [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_DCCM1_BASE,
1489 [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_DCCM0_BASE,
1490 [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_DCCM1_BASE,
1491 [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_DCCM0_BASE,
1492 [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_DCCM1_BASE,
1493 [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_DCCM0_BASE,
1494 [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_DCCM1_BASE,
1495 [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_DCCM0_BASE,
1496 [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_DCCM1_BASE,
1497 [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_DCCM0_BASE,
1498 [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_DCCM1_BASE,
1501 const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE] = {
1502 [MME_ID_DCORE0] = mmDCORE0_MME_CTRL_LO_BASE,
1503 [MME_ID_DCORE1] = mmDCORE1_MME_CTRL_LO_BASE,
1504 [MME_ID_DCORE2] = mmDCORE2_MME_CTRL_LO_BASE,
1505 [MME_ID_DCORE3] = mmDCORE3_MME_CTRL_LO_BASE,
1508 static const u32 gaudi2_queue_id_to_arc_id[GAUDI2_QUEUE_ID_SIZE] = {
1509 [GAUDI2_QUEUE_ID_PDMA_0_0] = CPU_ID_PDMA_QMAN_ARC0,
1510 [GAUDI2_QUEUE_ID_PDMA_0_1] = CPU_ID_PDMA_QMAN_ARC0,
1511 [GAUDI2_QUEUE_ID_PDMA_0_2] = CPU_ID_PDMA_QMAN_ARC0,
1512 [GAUDI2_QUEUE_ID_PDMA_0_3] = CPU_ID_PDMA_QMAN_ARC0,
1513 [GAUDI2_QUEUE_ID_PDMA_1_0] = CPU_ID_PDMA_QMAN_ARC1,
1514 [GAUDI2_QUEUE_ID_PDMA_1_1] = CPU_ID_PDMA_QMAN_ARC1,
1515 [GAUDI2_QUEUE_ID_PDMA_1_2] = CPU_ID_PDMA_QMAN_ARC1,
1516 [GAUDI2_QUEUE_ID_PDMA_1_3] = CPU_ID_PDMA_QMAN_ARC1,
1517 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC0,
1518 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC0,
1519 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC0,
1520 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC0,
1521 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC1,
1522 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC1,
1523 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC1,
1524 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC1,
1525 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = CPU_ID_MME_QMAN_ARC0,
1526 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = CPU_ID_MME_QMAN_ARC0,
1527 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = CPU_ID_MME_QMAN_ARC0,
1528 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = CPU_ID_MME_QMAN_ARC0,
1529 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = CPU_ID_TPC_QMAN_ARC0,
1530 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = CPU_ID_TPC_QMAN_ARC0,
1531 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = CPU_ID_TPC_QMAN_ARC0,
1532 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = CPU_ID_TPC_QMAN_ARC0,
1533 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = CPU_ID_TPC_QMAN_ARC1,
1534 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = CPU_ID_TPC_QMAN_ARC1,
1535 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = CPU_ID_TPC_QMAN_ARC1,
1536 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = CPU_ID_TPC_QMAN_ARC1,
1537 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = CPU_ID_TPC_QMAN_ARC2,
1538 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = CPU_ID_TPC_QMAN_ARC2,
1539 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = CPU_ID_TPC_QMAN_ARC2,
1540 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = CPU_ID_TPC_QMAN_ARC2,
1541 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = CPU_ID_TPC_QMAN_ARC3,
1542 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = CPU_ID_TPC_QMAN_ARC3,
1543 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = CPU_ID_TPC_QMAN_ARC3,
1544 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = CPU_ID_TPC_QMAN_ARC3,
1545 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = CPU_ID_TPC_QMAN_ARC4,
1546 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = CPU_ID_TPC_QMAN_ARC4,
1547 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = CPU_ID_TPC_QMAN_ARC4,
1548 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = CPU_ID_TPC_QMAN_ARC4,
1549 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = CPU_ID_TPC_QMAN_ARC5,
1550 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = CPU_ID_TPC_QMAN_ARC5,
1551 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = CPU_ID_TPC_QMAN_ARC5,
1552 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = CPU_ID_TPC_QMAN_ARC5,
1553 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = CPU_ID_TPC_QMAN_ARC24,
1554 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = CPU_ID_TPC_QMAN_ARC24,
1555 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = CPU_ID_TPC_QMAN_ARC24,
1556 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = CPU_ID_TPC_QMAN_ARC24,
1557 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC2,
1558 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC2,
1559 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC2,
1560 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC2,
1561 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC3,
1562 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC3,
1563 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC3,
1564 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC3,
1565 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = CPU_ID_SCHED_ARC4,
1566 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = CPU_ID_SCHED_ARC4,
1567 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = CPU_ID_SCHED_ARC4,
1568 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = CPU_ID_SCHED_ARC4,
1569 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = CPU_ID_TPC_QMAN_ARC6,
1570 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = CPU_ID_TPC_QMAN_ARC6,
1571 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = CPU_ID_TPC_QMAN_ARC6,
1572 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = CPU_ID_TPC_QMAN_ARC6,
1573 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = CPU_ID_TPC_QMAN_ARC7,
1574 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = CPU_ID_TPC_QMAN_ARC7,
1575 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = CPU_ID_TPC_QMAN_ARC7,
1576 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = CPU_ID_TPC_QMAN_ARC7,
1577 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = CPU_ID_TPC_QMAN_ARC8,
1578 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = CPU_ID_TPC_QMAN_ARC8,
1579 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = CPU_ID_TPC_QMAN_ARC8,
1580 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = CPU_ID_TPC_QMAN_ARC8,
1581 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = CPU_ID_TPC_QMAN_ARC9,
1582 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = CPU_ID_TPC_QMAN_ARC9,
1583 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = CPU_ID_TPC_QMAN_ARC9,
1584 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = CPU_ID_TPC_QMAN_ARC9,
1585 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = CPU_ID_TPC_QMAN_ARC10,
1586 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = CPU_ID_TPC_QMAN_ARC10,
1587 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = CPU_ID_TPC_QMAN_ARC10,
1588 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = CPU_ID_TPC_QMAN_ARC10,
1589 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = CPU_ID_TPC_QMAN_ARC11,
1590 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = CPU_ID_TPC_QMAN_ARC11,
1591 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = CPU_ID_TPC_QMAN_ARC11,
1592 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = CPU_ID_TPC_QMAN_ARC11,
1593 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC4,
1594 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC4,
1595 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC4,
1596 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC4,
1597 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC5,
1598 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC5,
1599 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC5,
1600 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC5,
1601 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = CPU_ID_MME_QMAN_ARC1,
1602 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = CPU_ID_MME_QMAN_ARC1,
1603 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = CPU_ID_MME_QMAN_ARC1,
1604 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = CPU_ID_MME_QMAN_ARC1,
1605 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = CPU_ID_TPC_QMAN_ARC12,
1606 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = CPU_ID_TPC_QMAN_ARC12,
1607 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = CPU_ID_TPC_QMAN_ARC12,
1608 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = CPU_ID_TPC_QMAN_ARC12,
1609 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = CPU_ID_TPC_QMAN_ARC13,
1610 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = CPU_ID_TPC_QMAN_ARC13,
1611 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = CPU_ID_TPC_QMAN_ARC13,
1612 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = CPU_ID_TPC_QMAN_ARC13,
1613 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = CPU_ID_TPC_QMAN_ARC14,
1614 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = CPU_ID_TPC_QMAN_ARC14,
1615 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = CPU_ID_TPC_QMAN_ARC14,
1616 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = CPU_ID_TPC_QMAN_ARC14,
1617 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = CPU_ID_TPC_QMAN_ARC15,
1618 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = CPU_ID_TPC_QMAN_ARC15,
1619 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = CPU_ID_TPC_QMAN_ARC15,
1620 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = CPU_ID_TPC_QMAN_ARC15,
1621 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = CPU_ID_TPC_QMAN_ARC16,
1622 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = CPU_ID_TPC_QMAN_ARC16,
1623 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = CPU_ID_TPC_QMAN_ARC16,
1624 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = CPU_ID_TPC_QMAN_ARC16,
1625 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = CPU_ID_TPC_QMAN_ARC17,
1626 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = CPU_ID_TPC_QMAN_ARC17,
1627 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = CPU_ID_TPC_QMAN_ARC17,
1628 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = CPU_ID_TPC_QMAN_ARC17,
1629 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC6,
1630 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC6,
1631 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC6,
1632 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC6,
1633 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC7,
1634 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC7,
1635 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC7,
1636 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC7,
1637 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = CPU_ID_SCHED_ARC5,
1638 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = CPU_ID_SCHED_ARC5,
1639 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = CPU_ID_SCHED_ARC5,
1640 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = CPU_ID_SCHED_ARC5,
1641 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = CPU_ID_TPC_QMAN_ARC18,
1642 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = CPU_ID_TPC_QMAN_ARC18,
1643 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = CPU_ID_TPC_QMAN_ARC18,
1644 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = CPU_ID_TPC_QMAN_ARC18,
1645 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = CPU_ID_TPC_QMAN_ARC19,
1646 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = CPU_ID_TPC_QMAN_ARC19,
1647 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = CPU_ID_TPC_QMAN_ARC19,
1648 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = CPU_ID_TPC_QMAN_ARC19,
1649 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = CPU_ID_TPC_QMAN_ARC20,
1650 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = CPU_ID_TPC_QMAN_ARC20,
1651 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = CPU_ID_TPC_QMAN_ARC20,
1652 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = CPU_ID_TPC_QMAN_ARC20,
1653 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = CPU_ID_TPC_QMAN_ARC21,
1654 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = CPU_ID_TPC_QMAN_ARC21,
1655 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = CPU_ID_TPC_QMAN_ARC21,
1656 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = CPU_ID_TPC_QMAN_ARC21,
1657 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = CPU_ID_TPC_QMAN_ARC22,
1658 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = CPU_ID_TPC_QMAN_ARC22,
1659 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = CPU_ID_TPC_QMAN_ARC22,
1660 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = CPU_ID_TPC_QMAN_ARC22,
1661 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = CPU_ID_TPC_QMAN_ARC23,
1662 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = CPU_ID_TPC_QMAN_ARC23,
1663 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = CPU_ID_TPC_QMAN_ARC23,
1664 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = CPU_ID_TPC_QMAN_ARC23,
1665 [GAUDI2_QUEUE_ID_NIC_0_0] = CPU_ID_NIC_QMAN_ARC0,
1666 [GAUDI2_QUEUE_ID_NIC_0_1] = CPU_ID_NIC_QMAN_ARC0,
1667 [GAUDI2_QUEUE_ID_NIC_0_2] = CPU_ID_NIC_QMAN_ARC0,
1668 [GAUDI2_QUEUE_ID_NIC_0_3] = CPU_ID_NIC_QMAN_ARC0,
1669 [GAUDI2_QUEUE_ID_NIC_1_0] = CPU_ID_NIC_QMAN_ARC1,
1670 [GAUDI2_QUEUE_ID_NIC_1_1] = CPU_ID_NIC_QMAN_ARC1,
1671 [GAUDI2_QUEUE_ID_NIC_1_2] = CPU_ID_NIC_QMAN_ARC1,
1672 [GAUDI2_QUEUE_ID_NIC_1_3] = CPU_ID_NIC_QMAN_ARC1,
1673 [GAUDI2_QUEUE_ID_NIC_2_0] = CPU_ID_NIC_QMAN_ARC2,
1674 [GAUDI2_QUEUE_ID_NIC_2_1] = CPU_ID_NIC_QMAN_ARC2,
1675 [GAUDI2_QUEUE_ID_NIC_2_2] = CPU_ID_NIC_QMAN_ARC2,
1676 [GAUDI2_QUEUE_ID_NIC_2_3] = CPU_ID_NIC_QMAN_ARC2,
1677 [GAUDI2_QUEUE_ID_NIC_3_0] = CPU_ID_NIC_QMAN_ARC3,
1678 [GAUDI2_QUEUE_ID_NIC_3_1] = CPU_ID_NIC_QMAN_ARC3,
1679 [GAUDI2_QUEUE_ID_NIC_3_2] = CPU_ID_NIC_QMAN_ARC3,
1680 [GAUDI2_QUEUE_ID_NIC_3_3] = CPU_ID_NIC_QMAN_ARC3,
1681 [GAUDI2_QUEUE_ID_NIC_4_0] = CPU_ID_NIC_QMAN_ARC4,
1682 [GAUDI2_QUEUE_ID_NIC_4_1] = CPU_ID_NIC_QMAN_ARC4,
1683 [GAUDI2_QUEUE_ID_NIC_4_2] = CPU_ID_NIC_QMAN_ARC4,
1684 [GAUDI2_QUEUE_ID_NIC_4_3] = CPU_ID_NIC_QMAN_ARC4,
1685 [GAUDI2_QUEUE_ID_NIC_5_0] = CPU_ID_NIC_QMAN_ARC5,
1686 [GAUDI2_QUEUE_ID_NIC_5_1] = CPU_ID_NIC_QMAN_ARC5,
1687 [GAUDI2_QUEUE_ID_NIC_5_2] = CPU_ID_NIC_QMAN_ARC5,
1688 [GAUDI2_QUEUE_ID_NIC_5_3] = CPU_ID_NIC_QMAN_ARC5,
1689 [GAUDI2_QUEUE_ID_NIC_6_0] = CPU_ID_NIC_QMAN_ARC6,
1690 [GAUDI2_QUEUE_ID_NIC_6_1] = CPU_ID_NIC_QMAN_ARC6,
1691 [GAUDI2_QUEUE_ID_NIC_6_2] = CPU_ID_NIC_QMAN_ARC6,
1692 [GAUDI2_QUEUE_ID_NIC_6_3] = CPU_ID_NIC_QMAN_ARC6,
1693 [GAUDI2_QUEUE_ID_NIC_7_0] = CPU_ID_NIC_QMAN_ARC7,
1694 [GAUDI2_QUEUE_ID_NIC_7_1] = CPU_ID_NIC_QMAN_ARC7,
1695 [GAUDI2_QUEUE_ID_NIC_7_2] = CPU_ID_NIC_QMAN_ARC7,
1696 [GAUDI2_QUEUE_ID_NIC_7_3] = CPU_ID_NIC_QMAN_ARC7,
1697 [GAUDI2_QUEUE_ID_NIC_8_0] = CPU_ID_NIC_QMAN_ARC8,
1698 [GAUDI2_QUEUE_ID_NIC_8_1] = CPU_ID_NIC_QMAN_ARC8,
1699 [GAUDI2_QUEUE_ID_NIC_8_2] = CPU_ID_NIC_QMAN_ARC8,
1700 [GAUDI2_QUEUE_ID_NIC_8_3] = CPU_ID_NIC_QMAN_ARC8,
1701 [GAUDI2_QUEUE_ID_NIC_9_0] = CPU_ID_NIC_QMAN_ARC9,
1702 [GAUDI2_QUEUE_ID_NIC_9_1] = CPU_ID_NIC_QMAN_ARC9,
1703 [GAUDI2_QUEUE_ID_NIC_9_2] = CPU_ID_NIC_QMAN_ARC9,
1704 [GAUDI2_QUEUE_ID_NIC_9_3] = CPU_ID_NIC_QMAN_ARC9,
1705 [GAUDI2_QUEUE_ID_NIC_10_0] = CPU_ID_NIC_QMAN_ARC10,
1706 [GAUDI2_QUEUE_ID_NIC_10_1] = CPU_ID_NIC_QMAN_ARC10,
1707 [GAUDI2_QUEUE_ID_NIC_10_2] = CPU_ID_NIC_QMAN_ARC10,
1708 [GAUDI2_QUEUE_ID_NIC_10_3] = CPU_ID_NIC_QMAN_ARC10,
1709 [GAUDI2_QUEUE_ID_NIC_11_0] = CPU_ID_NIC_QMAN_ARC11,
1710 [GAUDI2_QUEUE_ID_NIC_11_1] = CPU_ID_NIC_QMAN_ARC11,
1711 [GAUDI2_QUEUE_ID_NIC_11_2] = CPU_ID_NIC_QMAN_ARC11,
1712 [GAUDI2_QUEUE_ID_NIC_11_3] = CPU_ID_NIC_QMAN_ARC11,
1713 [GAUDI2_QUEUE_ID_NIC_12_0] = CPU_ID_NIC_QMAN_ARC12,
1714 [GAUDI2_QUEUE_ID_NIC_12_1] = CPU_ID_NIC_QMAN_ARC12,
1715 [GAUDI2_QUEUE_ID_NIC_12_2] = CPU_ID_NIC_QMAN_ARC12,
1716 [GAUDI2_QUEUE_ID_NIC_12_3] = CPU_ID_NIC_QMAN_ARC12,
1717 [GAUDI2_QUEUE_ID_NIC_13_0] = CPU_ID_NIC_QMAN_ARC13,
1718 [GAUDI2_QUEUE_ID_NIC_13_1] = CPU_ID_NIC_QMAN_ARC13,
1719 [GAUDI2_QUEUE_ID_NIC_13_2] = CPU_ID_NIC_QMAN_ARC13,
1720 [GAUDI2_QUEUE_ID_NIC_13_3] = CPU_ID_NIC_QMAN_ARC13,
1721 [GAUDI2_QUEUE_ID_NIC_14_0] = CPU_ID_NIC_QMAN_ARC14,
1722 [GAUDI2_QUEUE_ID_NIC_14_1] = CPU_ID_NIC_QMAN_ARC14,
1723 [GAUDI2_QUEUE_ID_NIC_14_2] = CPU_ID_NIC_QMAN_ARC14,
1724 [GAUDI2_QUEUE_ID_NIC_14_3] = CPU_ID_NIC_QMAN_ARC14,
1725 [GAUDI2_QUEUE_ID_NIC_15_0] = CPU_ID_NIC_QMAN_ARC15,
1726 [GAUDI2_QUEUE_ID_NIC_15_1] = CPU_ID_NIC_QMAN_ARC15,
1727 [GAUDI2_QUEUE_ID_NIC_15_2] = CPU_ID_NIC_QMAN_ARC15,
1728 [GAUDI2_QUEUE_ID_NIC_15_3] = CPU_ID_NIC_QMAN_ARC15,
1729 [GAUDI2_QUEUE_ID_NIC_16_0] = CPU_ID_NIC_QMAN_ARC16,
1730 [GAUDI2_QUEUE_ID_NIC_16_1] = CPU_ID_NIC_QMAN_ARC16,
1731 [GAUDI2_QUEUE_ID_NIC_16_2] = CPU_ID_NIC_QMAN_ARC16,
1732 [GAUDI2_QUEUE_ID_NIC_16_3] = CPU_ID_NIC_QMAN_ARC16,
1733 [GAUDI2_QUEUE_ID_NIC_17_0] = CPU_ID_NIC_QMAN_ARC17,
1734 [GAUDI2_QUEUE_ID_NIC_17_1] = CPU_ID_NIC_QMAN_ARC17,
1735 [GAUDI2_QUEUE_ID_NIC_17_2] = CPU_ID_NIC_QMAN_ARC17,
1736 [GAUDI2_QUEUE_ID_NIC_17_3] = CPU_ID_NIC_QMAN_ARC17,
1737 [GAUDI2_QUEUE_ID_NIC_18_0] = CPU_ID_NIC_QMAN_ARC18,
1738 [GAUDI2_QUEUE_ID_NIC_18_1] = CPU_ID_NIC_QMAN_ARC18,
1739 [GAUDI2_QUEUE_ID_NIC_18_2] = CPU_ID_NIC_QMAN_ARC18,
1740 [GAUDI2_QUEUE_ID_NIC_18_3] = CPU_ID_NIC_QMAN_ARC18,
1741 [GAUDI2_QUEUE_ID_NIC_19_0] = CPU_ID_NIC_QMAN_ARC19,
1742 [GAUDI2_QUEUE_ID_NIC_19_1] = CPU_ID_NIC_QMAN_ARC19,
1743 [GAUDI2_QUEUE_ID_NIC_19_2] = CPU_ID_NIC_QMAN_ARC19,
1744 [GAUDI2_QUEUE_ID_NIC_19_3] = CPU_ID_NIC_QMAN_ARC19,
1745 [GAUDI2_QUEUE_ID_NIC_20_0] = CPU_ID_NIC_QMAN_ARC20,
1746 [GAUDI2_QUEUE_ID_NIC_20_1] = CPU_ID_NIC_QMAN_ARC20,
1747 [GAUDI2_QUEUE_ID_NIC_20_2] = CPU_ID_NIC_QMAN_ARC20,
1748 [GAUDI2_QUEUE_ID_NIC_20_3] = CPU_ID_NIC_QMAN_ARC20,
1749 [GAUDI2_QUEUE_ID_NIC_21_0] = CPU_ID_NIC_QMAN_ARC21,
1750 [GAUDI2_QUEUE_ID_NIC_21_1] = CPU_ID_NIC_QMAN_ARC21,
1751 [GAUDI2_QUEUE_ID_NIC_21_2] = CPU_ID_NIC_QMAN_ARC21,
1752 [GAUDI2_QUEUE_ID_NIC_21_3] = CPU_ID_NIC_QMAN_ARC21,
1753 [GAUDI2_QUEUE_ID_NIC_22_0] = CPU_ID_NIC_QMAN_ARC22,
1754 [GAUDI2_QUEUE_ID_NIC_22_1] = CPU_ID_NIC_QMAN_ARC22,
1755 [GAUDI2_QUEUE_ID_NIC_22_2] = CPU_ID_NIC_QMAN_ARC22,
1756 [GAUDI2_QUEUE_ID_NIC_22_3] = CPU_ID_NIC_QMAN_ARC22,
1757 [GAUDI2_QUEUE_ID_NIC_23_0] = CPU_ID_NIC_QMAN_ARC23,
1758 [GAUDI2_QUEUE_ID_NIC_23_1] = CPU_ID_NIC_QMAN_ARC23,
1759 [GAUDI2_QUEUE_ID_NIC_23_2] = CPU_ID_NIC_QMAN_ARC23,
1760 [GAUDI2_QUEUE_ID_NIC_23_3] = CPU_ID_NIC_QMAN_ARC23,
1761 [GAUDI2_QUEUE_ID_ROT_0_0] = CPU_ID_ROT_QMAN_ARC0,
1762 [GAUDI2_QUEUE_ID_ROT_0_1] = CPU_ID_ROT_QMAN_ARC0,
1763 [GAUDI2_QUEUE_ID_ROT_0_2] = CPU_ID_ROT_QMAN_ARC0,
1764 [GAUDI2_QUEUE_ID_ROT_0_3] = CPU_ID_ROT_QMAN_ARC0,
1765 [GAUDI2_QUEUE_ID_ROT_1_0] = CPU_ID_ROT_QMAN_ARC1,
1766 [GAUDI2_QUEUE_ID_ROT_1_1] = CPU_ID_ROT_QMAN_ARC1,
1767 [GAUDI2_QUEUE_ID_ROT_1_2] = CPU_ID_ROT_QMAN_ARC1,
1768 [GAUDI2_QUEUE_ID_ROT_1_3] = CPU_ID_ROT_QMAN_ARC1
1771 const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE] = {
1772 [DMA_CORE_ID_PDMA0] = mmPDMA0_CORE_BASE,
1773 [DMA_CORE_ID_PDMA1] = mmPDMA1_CORE_BASE,
1774 [DMA_CORE_ID_EDMA0] = mmDCORE0_EDMA0_CORE_BASE,
1775 [DMA_CORE_ID_EDMA1] = mmDCORE0_EDMA1_CORE_BASE,
1776 [DMA_CORE_ID_EDMA2] = mmDCORE1_EDMA0_CORE_BASE,
1777 [DMA_CORE_ID_EDMA3] = mmDCORE1_EDMA1_CORE_BASE,
1778 [DMA_CORE_ID_EDMA4] = mmDCORE2_EDMA0_CORE_BASE,
1779 [DMA_CORE_ID_EDMA5] = mmDCORE2_EDMA1_CORE_BASE,
1780 [DMA_CORE_ID_EDMA6] = mmDCORE3_EDMA0_CORE_BASE,
1781 [DMA_CORE_ID_EDMA7] = mmDCORE3_EDMA1_CORE_BASE,
1782 [DMA_CORE_ID_KDMA] = mmARC_FARM_KDMA_BASE
1785 const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE] = {
1786 [MME_ID_DCORE0] = mmDCORE0_MME_ACC_BASE,
1787 [MME_ID_DCORE1] = mmDCORE1_MME_ACC_BASE,
1788 [MME_ID_DCORE2] = mmDCORE2_MME_ACC_BASE,
1789 [MME_ID_DCORE3] = mmDCORE3_MME_ACC_BASE
1792 static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = {
1793 [TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_CFG_BASE,
1794 [TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_CFG_BASE,
1795 [TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_CFG_BASE,
1796 [TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_CFG_BASE,
1797 [TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_CFG_BASE,
1798 [TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_CFG_BASE,
1799 [TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_CFG_BASE,
1800 [TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_CFG_BASE,
1801 [TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_CFG_BASE,
1802 [TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_CFG_BASE,
1803 [TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_CFG_BASE,
1804 [TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_CFG_BASE,
1805 [TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_CFG_BASE,
1806 [TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_CFG_BASE,
1807 [TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_CFG_BASE,
1808 [TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_CFG_BASE,
1809 [TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_CFG_BASE,
1810 [TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_CFG_BASE,
1811 [TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_CFG_BASE,
1812 [TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_CFG_BASE,
1813 [TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_CFG_BASE,
1814 [TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_CFG_BASE,
1815 [TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_CFG_BASE,
1816 [TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_CFG_BASE,
1817 [TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE,
1820 static const u32 gaudi2_tpc_eml_cfg_blocks_bases[TPC_ID_SIZE] = {
1821 [TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_EML_CFG_BASE,
1822 [TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_EML_CFG_BASE,
1823 [TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_EML_CFG_BASE,
1824 [TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_EML_CFG_BASE,
1825 [TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_EML_CFG_BASE,
1826 [TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_EML_CFG_BASE,
1827 [TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_EML_CFG_BASE,
1828 [TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_EML_CFG_BASE,
1829 [TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_EML_CFG_BASE,
1830 [TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_EML_CFG_BASE,
1831 [TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_EML_CFG_BASE,
1832 [TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_EML_CFG_BASE,
1833 [TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_EML_CFG_BASE,
1834 [TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_EML_CFG_BASE,
1835 [TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_EML_CFG_BASE,
1836 [TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_EML_CFG_BASE,
1837 [TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_EML_CFG_BASE,
1838 [TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_EML_CFG_BASE,
1839 [TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_EML_CFG_BASE,
1840 [TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_EML_CFG_BASE,
1841 [TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_EML_CFG_BASE,
1842 [TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_EML_CFG_BASE,
1843 [TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_EML_CFG_BASE,
1844 [TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_EML_CFG_BASE,
1845 [TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_EML_CFG_BASE,
1848 const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = {
1849 [ROTATOR_ID_0] = mmROT0_BASE,
1850 [ROTATOR_ID_1] = mmROT1_BASE
1853 static const u32 gaudi2_tpc_id_to_queue_id[TPC_ID_SIZE] = {
1854 [TPC_ID_DCORE0_TPC0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0,
1855 [TPC_ID_DCORE0_TPC1] = GAUDI2_QUEUE_ID_DCORE0_TPC_1_0,
1856 [TPC_ID_DCORE0_TPC2] = GAUDI2_QUEUE_ID_DCORE0_TPC_2_0,
1857 [TPC_ID_DCORE0_TPC3] = GAUDI2_QUEUE_ID_DCORE0_TPC_3_0,
1858 [TPC_ID_DCORE0_TPC4] = GAUDI2_QUEUE_ID_DCORE0_TPC_4_0,
1859 [TPC_ID_DCORE0_TPC5] = GAUDI2_QUEUE_ID_DCORE0_TPC_5_0,
1860 [TPC_ID_DCORE1_TPC0] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0,
1861 [TPC_ID_DCORE1_TPC1] = GAUDI2_QUEUE_ID_DCORE1_TPC_1_0,
1862 [TPC_ID_DCORE1_TPC2] = GAUDI2_QUEUE_ID_DCORE1_TPC_2_0,
1863 [TPC_ID_DCORE1_TPC3] = GAUDI2_QUEUE_ID_DCORE1_TPC_3_0,
1864 [TPC_ID_DCORE1_TPC4] = GAUDI2_QUEUE_ID_DCORE1_TPC_4_0,
1865 [TPC_ID_DCORE1_TPC5] = GAUDI2_QUEUE_ID_DCORE1_TPC_5_0,
1866 [TPC_ID_DCORE2_TPC0] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0,
1867 [TPC_ID_DCORE2_TPC1] = GAUDI2_QUEUE_ID_DCORE2_TPC_1_0,
1868 [TPC_ID_DCORE2_TPC2] = GAUDI2_QUEUE_ID_DCORE2_TPC_2_0,
1869 [TPC_ID_DCORE2_TPC3] = GAUDI2_QUEUE_ID_DCORE2_TPC_3_0,
1870 [TPC_ID_DCORE2_TPC4] = GAUDI2_QUEUE_ID_DCORE2_TPC_4_0,
1871 [TPC_ID_DCORE2_TPC5] = GAUDI2_QUEUE_ID_DCORE2_TPC_5_0,
1872 [TPC_ID_DCORE3_TPC0] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0,
1873 [TPC_ID_DCORE3_TPC1] = GAUDI2_QUEUE_ID_DCORE3_TPC_1_0,
1874 [TPC_ID_DCORE3_TPC2] = GAUDI2_QUEUE_ID_DCORE3_TPC_2_0,
1875 [TPC_ID_DCORE3_TPC3] = GAUDI2_QUEUE_ID_DCORE3_TPC_3_0,
1876 [TPC_ID_DCORE3_TPC4] = GAUDI2_QUEUE_ID_DCORE3_TPC_4_0,
1877 [TPC_ID_DCORE3_TPC5] = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0,
1878 [TPC_ID_DCORE0_TPC6] = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0,
1881 static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = {
1882 [ROTATOR_ID_0] = GAUDI2_QUEUE_ID_ROT_0_0,
1883 [ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0,
1886 static const u32 gaudi2_tpc_engine_id_to_tpc_id[] = {
1887 [GAUDI2_DCORE0_ENGINE_ID_TPC_0] = TPC_ID_DCORE0_TPC0,
1888 [GAUDI2_DCORE0_ENGINE_ID_TPC_1] = TPC_ID_DCORE0_TPC1,
1889 [GAUDI2_DCORE0_ENGINE_ID_TPC_2] = TPC_ID_DCORE0_TPC2,
1890 [GAUDI2_DCORE0_ENGINE_ID_TPC_3] = TPC_ID_DCORE0_TPC3,
1891 [GAUDI2_DCORE0_ENGINE_ID_TPC_4] = TPC_ID_DCORE0_TPC4,
1892 [GAUDI2_DCORE0_ENGINE_ID_TPC_5] = TPC_ID_DCORE0_TPC5,
1893 [GAUDI2_DCORE1_ENGINE_ID_TPC_0] = TPC_ID_DCORE1_TPC0,
1894 [GAUDI2_DCORE1_ENGINE_ID_TPC_1] = TPC_ID_DCORE1_TPC1,
1895 [GAUDI2_DCORE1_ENGINE_ID_TPC_2] = TPC_ID_DCORE1_TPC2,
1896 [GAUDI2_DCORE1_ENGINE_ID_TPC_3] = TPC_ID_DCORE1_TPC3,
1897 [GAUDI2_DCORE1_ENGINE_ID_TPC_4] = TPC_ID_DCORE1_TPC4,
1898 [GAUDI2_DCORE1_ENGINE_ID_TPC_5] = TPC_ID_DCORE1_TPC5,
1899 [GAUDI2_DCORE2_ENGINE_ID_TPC_0] = TPC_ID_DCORE2_TPC0,
1900 [GAUDI2_DCORE2_ENGINE_ID_TPC_1] = TPC_ID_DCORE2_TPC1,
1901 [GAUDI2_DCORE2_ENGINE_ID_TPC_2] = TPC_ID_DCORE2_TPC2,
1902 [GAUDI2_DCORE2_ENGINE_ID_TPC_3] = TPC_ID_DCORE2_TPC3,
1903 [GAUDI2_DCORE2_ENGINE_ID_TPC_4] = TPC_ID_DCORE2_TPC4,
1904 [GAUDI2_DCORE2_ENGINE_ID_TPC_5] = TPC_ID_DCORE2_TPC5,
1905 [GAUDI2_DCORE3_ENGINE_ID_TPC_0] = TPC_ID_DCORE3_TPC0,
1906 [GAUDI2_DCORE3_ENGINE_ID_TPC_1] = TPC_ID_DCORE3_TPC1,
1907 [GAUDI2_DCORE3_ENGINE_ID_TPC_2] = TPC_ID_DCORE3_TPC2,
1908 [GAUDI2_DCORE3_ENGINE_ID_TPC_3] = TPC_ID_DCORE3_TPC3,
1909 [GAUDI2_DCORE3_ENGINE_ID_TPC_4] = TPC_ID_DCORE3_TPC4,
1910 [GAUDI2_DCORE3_ENGINE_ID_TPC_5] = TPC_ID_DCORE3_TPC5,
1911 /* the PCI TPC is placed last (mapped liked HW) */
1912 [GAUDI2_DCORE0_ENGINE_ID_TPC_6] = TPC_ID_DCORE0_TPC6,
1915 static const u32 gaudi2_mme_engine_id_to_mme_id[] = {
1916 [GAUDI2_DCORE0_ENGINE_ID_MME] = MME_ID_DCORE0,
1917 [GAUDI2_DCORE1_ENGINE_ID_MME] = MME_ID_DCORE1,
1918 [GAUDI2_DCORE2_ENGINE_ID_MME] = MME_ID_DCORE2,
1919 [GAUDI2_DCORE3_ENGINE_ID_MME] = MME_ID_DCORE3,
1922 static const u32 gaudi2_edma_engine_id_to_edma_id[] = {
1923 [GAUDI2_ENGINE_ID_PDMA_0] = DMA_CORE_ID_PDMA0,
1924 [GAUDI2_ENGINE_ID_PDMA_1] = DMA_CORE_ID_PDMA1,
1925 [GAUDI2_DCORE0_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA0,
1926 [GAUDI2_DCORE0_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA1,
1927 [GAUDI2_DCORE1_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA2,
1928 [GAUDI2_DCORE1_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA3,
1929 [GAUDI2_DCORE2_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA4,
1930 [GAUDI2_DCORE2_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA5,
1931 [GAUDI2_DCORE3_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA6,
1932 [GAUDI2_DCORE3_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA7,
1933 [GAUDI2_ENGINE_ID_KDMA] = DMA_CORE_ID_KDMA,
1936 const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1937 GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
1938 GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0,
1939 GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
1940 GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0,
1941 GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
1942 GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0,
1943 GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0,
1944 GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0,
1947 static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRING_LEN] = {
1948 "gaudi2 vdec 0_0", "gaudi2 vdec 0_0 abnormal",
1949 "gaudi2 vdec 0_1", "gaudi2 vdec 0_1 abnormal",
1950 "gaudi2 vdec 1_0", "gaudi2 vdec 1_0 abnormal",
1951 "gaudi2 vdec 1_1", "gaudi2 vdec 1_1 abnormal",
1952 "gaudi2 vdec 2_0", "gaudi2 vdec 2_0 abnormal",
1953 "gaudi2 vdec 2_1", "gaudi2 vdec 2_1 abnormal",
1954 "gaudi2 vdec 3_0", "gaudi2 vdec 3_0 abnormal",
1955 "gaudi2 vdec 3_1", "gaudi2 vdec 3_1 abnormal",
1956 "gaudi2 vdec s_0", "gaudi2 vdec s_0 abnormal",
1957 "gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal"
1960 enum rtr_id {
1961 DCORE0_RTR0,
1962 DCORE0_RTR1,
1963 DCORE0_RTR2,
1964 DCORE0_RTR3,
1965 DCORE0_RTR4,
1966 DCORE0_RTR5,
1967 DCORE0_RTR6,
1968 DCORE0_RTR7,
1969 DCORE1_RTR0,
1970 DCORE1_RTR1,
1971 DCORE1_RTR2,
1972 DCORE1_RTR3,
1973 DCORE1_RTR4,
1974 DCORE1_RTR5,
1975 DCORE1_RTR6,
1976 DCORE1_RTR7,
1977 DCORE2_RTR0,
1978 DCORE2_RTR1,
1979 DCORE2_RTR2,
1980 DCORE2_RTR3,
1981 DCORE2_RTR4,
1982 DCORE2_RTR5,
1983 DCORE2_RTR6,
1984 DCORE2_RTR7,
1985 DCORE3_RTR0,
1986 DCORE3_RTR1,
1987 DCORE3_RTR2,
1988 DCORE3_RTR3,
1989 DCORE3_RTR4,
1990 DCORE3_RTR5,
1991 DCORE3_RTR6,
1992 DCORE3_RTR7,
1995 static const u32 gaudi2_tpc_initiator_hbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1996 DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3,
1997 DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4,
1998 DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1,
1999 DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6,
2000 DCORE0_RTR0
2003 static const u32 gaudi2_tpc_initiator_lbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
2004 DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2,
2005 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5,
2006 DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1, DCORE2_RTR0, DCORE2_RTR0,
2007 DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6, DCORE3_RTR7, DCORE3_RTR7,
2008 DCORE0_RTR0
2011 static const u32 gaudi2_dec_initiator_hbw_rtr_id[NUMBER_OF_DEC] = {
2012 DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0,
2013 DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0
2016 static const u32 gaudi2_dec_initiator_lbw_rtr_id[NUMBER_OF_DEC] = {
2017 DCORE0_RTR1, DCORE0_RTR1, DCORE1_RTR6, DCORE1_RTR6, DCORE2_RTR1, DCORE2_RTR1,
2018 DCORE3_RTR6, DCORE3_RTR6, DCORE0_RTR0, DCORE0_RTR0
2021 static const u32 gaudi2_nic_initiator_hbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
2022 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
2023 DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
2026 static const u32 gaudi2_nic_initiator_lbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
2027 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
2028 DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
2031 static const u32 gaudi2_edma_initiator_hbw_sft[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
2032 mmSFT0_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
2033 mmSFT0_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
2034 mmSFT1_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
2035 mmSFT1_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
2036 mmSFT2_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
2037 mmSFT2_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
2038 mmSFT3_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
2039 mmSFT3_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE
2042 static const u32 gaudi2_pdma_initiator_hbw_rtr_id[NUM_OF_PDMA] = {
2043 DCORE0_RTR0, DCORE0_RTR0
2046 static const u32 gaudi2_pdma_initiator_lbw_rtr_id[NUM_OF_PDMA] = {
2047 DCORE0_RTR2, DCORE0_RTR2
2050 static const u32 gaudi2_rot_initiator_hbw_rtr_id[NUM_OF_ROT] = {
2051 DCORE2_RTR0, DCORE3_RTR7
2054 static const u32 gaudi2_rot_initiator_lbw_rtr_id[NUM_OF_ROT] = {
2055 DCORE2_RTR2, DCORE3_RTR5
2058 struct mme_initiators_rtr_id {
2059 u32 wap0;
2060 u32 wap1;
2061 u32 write;
2062 u32 read;
2063 u32 sbte0;
2064 u32 sbte1;
2065 u32 sbte2;
2066 u32 sbte3;
2067 u32 sbte4;
2070 enum mme_initiators {
2071 MME_WAP0 = 0,
2072 MME_WAP1,
2073 MME_WRITE,
2074 MME_READ,
2075 MME_SBTE0,
2076 MME_SBTE1,
2077 MME_SBTE2,
2078 MME_SBTE3,
2079 MME_SBTE4,
2080 MME_INITIATORS_MAX
2083 static const struct mme_initiators_rtr_id
2084 gaudi2_mme_initiator_rtr_id[NUM_OF_MME_PER_DCORE * NUM_OF_DCORES] = {
2085 { .wap0 = 5, .wap1 = 7, .write = 6, .read = 7,
2086 .sbte0 = 7, .sbte1 = 4, .sbte2 = 4, .sbte3 = 5, .sbte4 = 6},
2087 { .wap0 = 10, .wap1 = 8, .write = 9, .read = 8,
2088 .sbte0 = 11, .sbte1 = 11, .sbte2 = 10, .sbte3 = 9, .sbte4 = 8},
2089 { .wap0 = 21, .wap1 = 23, .write = 22, .read = 23,
2090 .sbte0 = 20, .sbte1 = 20, .sbte2 = 21, .sbte3 = 22, .sbte4 = 23},
2091 { .wap0 = 30, .wap1 = 28, .write = 29, .read = 30,
2092 .sbte0 = 31, .sbte1 = 31, .sbte2 = 30, .sbte3 = 29, .sbte4 = 28},
2095 enum razwi_event_sources {
2096 RAZWI_TPC,
2097 RAZWI_MME,
2098 RAZWI_EDMA,
2099 RAZWI_PDMA,
2100 RAZWI_NIC,
2101 RAZWI_DEC,
2102 RAZWI_ROT,
2103 RAZWI_ARC_FARM
2106 struct hbm_mc_error_causes {
2107 u32 mask;
2108 char cause[50];
2111 static struct hl_special_block_info gaudi2_special_blocks[] = GAUDI2_SPECIAL_BLOCKS;
2113 /* Special blocks iterator is currently used to configure security protection bits,
2114 * and read global errors. Most HW blocks are addressable and those who aren't (N/A)-
2115 * must be skipped. Following configurations are commonly used for both PB config
2116 * and global error reading, since currently they both share the same settings.
2117 * Once it changes, we must remember to use separate configurations for either one.
2119 static int gaudi2_iterator_skip_block_types[] = {
2120 GAUDI2_BLOCK_TYPE_PLL,
2121 GAUDI2_BLOCK_TYPE_EU_BIST,
2122 GAUDI2_BLOCK_TYPE_HBM,
2123 GAUDI2_BLOCK_TYPE_XFT
2126 static struct range gaudi2_iterator_skip_block_ranges[] = {
2127 /* Skip all PSOC blocks except for PSOC_GLOBAL_CONF */
2128 {mmPSOC_I2C_M0_BASE, mmPSOC_EFUSE_BASE},
2129 {mmPSOC_BTL_BASE, mmPSOC_MSTR_IF_RR_SHRD_HBW_BASE},
2130 /* Skip all CPU blocks except for CPU_IF */
2131 {mmCPU_CA53_CFG_BASE, mmCPU_CA53_CFG_BASE},
2132 {mmCPU_TIMESTAMP_BASE, mmCPU_MSTR_IF_RR_SHRD_HBW_BASE}
2135 static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = {
2136 {HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"},
2137 {HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"},
2138 {HBM_MC_SPI_THR_DIS_ENG_MASK, "temperature-based throttling disengaged"},
2139 {HBM_MC_SPI_IEEE1500_COMP_MASK, "IEEE1500 op comp"},
2140 {HBM_MC_SPI_IEEE1500_PAUSED_MASK, "IEEE1500 op paused"},
2143 static const char * const hbm_mc_sei_cause[GAUDI2_NUM_OF_HBM_SEI_CAUSE] = {
2144 [HBM_SEI_CMD_PARITY_EVEN] = "SEI C/A parity even",
2145 [HBM_SEI_CMD_PARITY_ODD] = "SEI C/A parity odd",
2146 [HBM_SEI_READ_ERR] = "SEI read data error",
2147 [HBM_SEI_WRITE_DATA_PARITY_ERR] = "SEI write data parity error",
2148 [HBM_SEI_CATTRIP] = "SEI CATTRIP asserted",
2149 [HBM_SEI_MEM_BIST_FAIL] = "SEI memory BIST fail",
2150 [HBM_SEI_DFI] = "SEI DFI error",
2151 [HBM_SEI_INV_TEMP_READ_OUT] = "SEI invalid temp read",
2152 [HBM_SEI_BIST_FAIL] = "SEI BIST fail"
2155 struct mmu_spi_sei_cause {
2156 char cause[50];
2157 int clear_bit;
2160 static const struct mmu_spi_sei_cause gaudi2_mmu_spi_sei[GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE] = {
2161 {"page fault", 1}, /* INTERRUPT_CLR[1] */
2162 {"page access", 1}, /* INTERRUPT_CLR[1] */
2163 {"bypass ddr", 2}, /* INTERRUPT_CLR[2] */
2164 {"multi hit", 2}, /* INTERRUPT_CLR[2] */
2165 {"mmu rei0", -1}, /* no clear register bit */
2166 {"mmu rei1", -1}, /* no clear register bit */
2167 {"stlb rei0", -1}, /* no clear register bit */
2168 {"stlb rei1", -1}, /* no clear register bit */
2169 {"rr privileged write hit", 2}, /* INTERRUPT_CLR[2] */
2170 {"rr privileged read hit", 2}, /* INTERRUPT_CLR[2] */
2171 {"rr secure write hit", 2}, /* INTERRUPT_CLR[2] */
2172 {"rr secure read hit", 2}, /* INTERRUPT_CLR[2] */
2173 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
2174 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
2175 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
2176 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
2177 {"slave error", 16}, /* INTERRUPT_CLR[16] */
2178 {"dec error", 17}, /* INTERRUPT_CLR[17] */
2179 {"burst fifo full", 2} /* INTERRUPT_CLR[2] */
2182 struct gaudi2_cache_invld_params {
2183 u64 start_va;
2184 u64 end_va;
2185 u32 inv_start_val;
2186 u32 flags;
2187 bool range_invalidation;
2190 struct gaudi2_tpc_idle_data {
2191 struct engines_data *e;
2192 unsigned long *mask;
2193 bool *is_idle;
2194 const char *tpc_fmt;
2197 struct gaudi2_tpc_mmu_data {
2198 u32 rw_asid;
2201 static s64 gaudi2_state_dump_specs_props[SP_MAX] = {0};
2203 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val);
2204 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id);
2205 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id);
2206 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id);
2207 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id);
2208 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val);
2209 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size,
2210 bool is_memset);
2211 static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2212 struct engines_data *e);
2213 static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2214 struct engines_data *e);
2215 static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2216 struct engines_data *e);
2217 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr);
2218 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr);
2220 static void gaudi2_init_scrambler_hbm(struct hl_device *hdev)
2225 static u32 gaudi2_get_signal_cb_size(struct hl_device *hdev)
2227 return sizeof(struct packet_msg_short);
2230 static u32 gaudi2_get_wait_cb_size(struct hl_device *hdev)
2232 return sizeof(struct packet_msg_short) * 4 + sizeof(struct packet_fence);
2235 void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx)
2237 struct asic_fixed_properties *prop = &hdev->asic_prop;
2238 int dcore, inst, tpc_seq;
2239 u32 offset;
2241 /* init the return code */
2242 ctx->rc = 0;
2244 for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) {
2245 for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) {
2246 tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
2248 if (!(prop->tpc_enabled_mask & BIT(tpc_seq)))
2249 continue;
2251 offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst);
2253 ctx->fn(hdev, dcore, inst, offset, ctx);
2254 if (ctx->rc) {
2255 dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n",
2256 dcore, inst);
2257 return;
2262 if (!(prop->tpc_enabled_mask & BIT(TPC_ID_DCORE0_TPC6)))
2263 return;
2265 /* special check for PCI TPC (DCORE0_TPC6) */
2266 offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1);
2267 ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx);
2268 if (ctx->rc)
2269 dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n");
2272 static bool gaudi2_host_phys_addr_valid(u64 addr)
2274 if ((addr < HOST_PHYS_BASE_0 + HOST_PHYS_SIZE_0) || (addr >= HOST_PHYS_BASE_1))
2275 return true;
2277 return false;
2280 static int set_number_of_functional_hbms(struct hl_device *hdev)
2282 struct asic_fixed_properties *prop = &hdev->asic_prop;
2283 u8 faulty_hbms = hweight64(hdev->dram_binning);
2285 /* check if all HBMs should be used */
2286 if (!faulty_hbms) {
2287 dev_dbg(hdev->dev, "All HBM are in use (no binning)\n");
2288 prop->num_functional_hbms = GAUDI2_HBM_NUM;
2289 return 0;
2293 * check for error condition in which number of binning
2294 * candidates is higher than the maximum supported by the
2295 * driver (in which case binning mask shall be ignored and driver will
2296 * set the default)
2298 if (faulty_hbms > MAX_FAULTY_HBMS) {
2299 dev_err(hdev->dev,
2300 "HBM binning supports max of %d faulty HBMs, supplied mask 0x%llx.\n",
2301 MAX_FAULTY_HBMS, hdev->dram_binning);
2302 return -EINVAL;
2306 * by default, number of functional HBMs in Gaudi2 is always
2307 * GAUDI2_HBM_NUM - 1.
2309 prop->num_functional_hbms = GAUDI2_HBM_NUM - faulty_hbms;
2310 return 0;
2313 static bool gaudi2_is_edma_queue_id(u32 queue_id)
2316 switch (queue_id) {
2317 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
2318 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
2319 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
2320 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
2321 return true;
2322 default:
2323 return false;
2327 static int gaudi2_set_dram_properties(struct hl_device *hdev)
2329 struct asic_fixed_properties *prop = &hdev->asic_prop;
2330 u64 hbm_drv_base_offset = 0, edma_pq_base_addr;
2331 u32 basic_hbm_page_size, edma_idx = 0;
2332 int rc, i;
2334 rc = set_number_of_functional_hbms(hdev);
2335 if (rc)
2336 return -EINVAL;
2339 * Due to HW bug in which TLB size is x16 smaller than expected we use a workaround
2340 * in which we are using x16 bigger page size to be able to populate the entire
2341 * HBM mappings in the TLB
2343 basic_hbm_page_size = prop->num_functional_hbms * SZ_8M;
2344 prop->dram_page_size = GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR * basic_hbm_page_size;
2345 prop->device_mem_alloc_default_page_size = prop->dram_page_size;
2346 prop->dram_size = prop->num_functional_hbms * SZ_16G;
2347 prop->dram_base_address = DRAM_PHYS_BASE;
2348 prop->dram_end_address = prop->dram_base_address + prop->dram_size;
2349 prop->dram_supports_virtual_memory = true;
2351 prop->dram_user_base_address = DRAM_PHYS_BASE + prop->dram_page_size;
2352 prop->dram_hints_align_mask = ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK;
2353 prop->hints_dram_reserved_va_range.start_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START;
2354 prop->hints_dram_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END;
2356 /* since DRAM page size differs from DMMU page size we need to allocate
2357 * DRAM memory in units of dram_page size and mapping this memory in
2358 * units of DMMU page size. we overcome this size mismatch using a
2359 * scrambling routine which takes a DRAM page and converts it to a DMMU
2360 * page.
2361 * We therefore:
2362 * 1. partition the virtual address space to DRAM-page (whole) pages.
2363 * (suppose we get n such pages)
2364 * 2. limit the amount of virtual address space we got from 1 above to
2365 * a multiple of 64M as we don't want the scrambled address to cross
2366 * the DRAM virtual address space.
2367 * ( m = (n * DRAM_page_size) / DMMU_page_size).
2368 * 3. determine the and address accordingly
2369 * end_addr = start_addr + m * 48M
2371 * the DRAM address MSBs (63:48) are not part of the roundup calculation
2373 prop->dmmu.start_addr = prop->dram_base_address +
2374 (prop->dram_page_size *
2375 DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size));
2376 prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size *
2377 div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size);
2379 * Driver can't share an (48MB) HBM page with the F/W in order to prevent FW to block
2380 * the driver part by range register, so it must start at the next (48MB) page
2382 hbm_drv_base_offset = roundup(CPU_FW_IMAGE_SIZE, prop->num_functional_hbms * SZ_8M);
2385 * The NIC driver section size and the HMMU page tables section in the HBM needs
2386 * to be the remaining size in the first dram page after taking into
2387 * account the F/W image size
2390 /* Reserve region in HBM for HMMU page tables */
2391 prop->mmu_pgt_addr = DRAM_PHYS_BASE + hbm_drv_base_offset +
2392 ((prop->dram_page_size - hbm_drv_base_offset) -
2393 (HMMU_PAGE_TABLES_SIZE + EDMA_PQS_SIZE + EDMA_SCRATCHPAD_SIZE));
2395 /* Set EDMA PQs HBM addresses */
2396 edma_pq_base_addr = prop->mmu_pgt_addr + HMMU_PAGE_TABLES_SIZE;
2398 for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
2399 if (gaudi2_is_edma_queue_id(i)) {
2400 prop->hw_queues_props[i].q_dram_bd_address = edma_pq_base_addr +
2401 (edma_idx * HL_QUEUE_SIZE_IN_BYTES);
2402 edma_idx++;
2406 return 0;
2409 static int gaudi2_set_fixed_properties(struct hl_device *hdev)
2411 struct asic_fixed_properties *prop = &hdev->asic_prop;
2412 struct hw_queue_properties *q_props;
2413 u32 num_sync_stream_queues = 0;
2414 int i, rc;
2416 prop->max_queues = GAUDI2_QUEUE_ID_SIZE;
2417 prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties),
2418 GFP_KERNEL);
2420 if (!prop->hw_queues_props)
2421 return -ENOMEM;
2423 q_props = prop->hw_queues_props;
2425 for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
2426 q_props[i].type = QUEUE_TYPE_HW;
2427 q_props[i].driver_only = 0;
2429 if (i >= GAUDI2_QUEUE_ID_NIC_0_0 && i <= GAUDI2_QUEUE_ID_NIC_23_3) {
2430 q_props[i].supports_sync_stream = 0;
2431 } else {
2432 q_props[i].supports_sync_stream = 1;
2433 num_sync_stream_queues++;
2436 q_props[i].cb_alloc_flags = CB_ALLOC_USER;
2438 if (gaudi2_is_edma_queue_id(i))
2439 q_props[i].dram_bd = 1;
2442 q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU;
2443 q_props[GAUDI2_QUEUE_ID_CPU_PQ].driver_only = 1;
2444 q_props[GAUDI2_QUEUE_ID_CPU_PQ].cb_alloc_flags = CB_ALLOC_KERNEL;
2446 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
2447 prop->cfg_base_address = CFG_BASE;
2448 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE_0;
2449 prop->host_base_address = HOST_PHYS_BASE_0;
2450 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE_0;
2451 prop->max_pending_cs = GAUDI2_MAX_PENDING_CS;
2452 prop->completion_queues_count = GAUDI2_RESERVED_CQ_NUMBER;
2453 prop->user_dec_intr_count = NUMBER_OF_DEC;
2454 prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1;
2455 prop->completion_mode = HL_COMPLETION_MODE_CS;
2456 prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER;
2457 prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER;
2459 prop->sram_base_address = SRAM_BASE_ADDR;
2460 prop->sram_size = SRAM_SIZE;
2461 prop->sram_end_address = prop->sram_base_address + prop->sram_size;
2462 prop->sram_user_base_address = prop->sram_base_address + SRAM_USER_BASE_OFFSET;
2464 prop->hints_range_reservation = true;
2466 prop->rotator_enabled_mask = BIT(NUM_OF_ROT) - 1;
2468 prop->max_asid = 2;
2470 prop->dmmu.pgt_size = HMMU_PAGE_TABLES_SIZE;
2471 prop->mmu_pte_size = HL_PTE_SIZE;
2473 prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT;
2474 prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT;
2475 prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT;
2476 prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT;
2477 prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK;
2478 prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK;
2479 prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK;
2480 prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK;
2481 prop->dmmu.page_size = PAGE_SIZE_1GB;
2482 prop->dmmu.num_hops = MMU_ARCH_4_HOPS;
2483 prop->dmmu.last_mask = LAST_MASK;
2484 prop->dmmu.host_resident = 0;
2485 prop->dmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
2486 prop->dmmu.hop0_tables_total_size = HOP_TABLE_SIZE_512_PTE * prop->max_asid;
2488 /* As we need to set the pgt address in dram for HMMU init so we cannot
2489 * wait to the fw cpucp info to set the dram props as mmu init comes before
2490 * hw init
2492 rc = hdev->asic_funcs->set_dram_properties(hdev);
2493 if (rc)
2494 goto free_qprops;
2496 prop->mmu_pgt_size = PMMU_PAGE_TABLES_SIZE;
2498 prop->pmmu.pgt_size = prop->mmu_pgt_size;
2499 hdev->pmmu_huge_range = true;
2500 prop->pmmu.host_resident = 1;
2501 prop->pmmu.num_hops = MMU_ARCH_6_HOPS;
2502 prop->pmmu.last_mask = LAST_MASK;
2503 prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
2504 prop->pmmu.hop0_tables_total_size = HOP_TABLE_SIZE_512_PTE * prop->max_asid;
2506 prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START;
2507 prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END;
2508 prop->hints_host_hpage_reserved_va_range.start_addr =
2509 RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START;
2510 prop->hints_host_hpage_reserved_va_range.end_addr =
2511 RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END;
2513 if (PAGE_SIZE == SZ_64K) {
2514 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_64K;
2515 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_64K;
2516 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_64K;
2517 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_64K;
2518 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_64K;
2519 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_64K;
2520 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_64K;
2521 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_64K;
2522 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_64K;
2523 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_64K;
2524 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_64K;
2525 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_64K;
2526 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2527 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2528 prop->pmmu.page_size = PAGE_SIZE_64KB;
2530 /* shifts and masks are the same in PMMU and HPMMU */
2531 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2532 prop->pmmu_huge.page_size = PAGE_SIZE_16MB;
2533 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2534 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2535 } else {
2536 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_4K;
2537 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_4K;
2538 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_4K;
2539 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_4K;
2540 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_4K;
2541 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_4K;
2542 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_4K;
2543 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_4K;
2544 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_4K;
2545 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_4K;
2546 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_4K;
2547 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_4K;
2548 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2549 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2550 prop->pmmu.page_size = PAGE_SIZE_4KB;
2552 /* shifts and masks are the same in PMMU and HPMMU */
2553 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2554 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
2555 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2556 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2559 prop->max_num_of_engines = GAUDI2_ENGINE_ID_SIZE;
2560 prop->num_engine_cores = CPU_ID_MAX;
2561 prop->cfg_size = CFG_SIZE;
2562 prop->num_of_events = GAUDI2_EVENT_SIZE;
2564 prop->supports_engine_modes = true;
2566 prop->dc_power_default = DC_POWER_DEFAULT;
2568 prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT;
2569 prop->cb_pool_cb_size = GAUDI2_CB_POOL_CB_SIZE;
2570 prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE;
2571 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
2573 strscpy_pad(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2575 prop->mme_master_slave_mode = 1;
2577 prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER +
2578 (num_sync_stream_queues * HL_RSVD_SOBS);
2580 prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER +
2581 (num_sync_stream_queues * HL_RSVD_MONS);
2583 prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST;
2584 prop->tpc_interrupt_id = GAUDI2_IRQ_NUM_TPC_ASSERT;
2585 prop->eq_interrupt_id = GAUDI2_IRQ_NUM_EVENT_QUEUE;
2587 prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER;
2589 prop->fw_cpu_boot_dev_sts0_valid = false;
2590 prop->fw_cpu_boot_dev_sts1_valid = false;
2591 prop->hard_reset_done_by_fw = false;
2592 prop->gic_interrupts_enable = true;
2594 prop->server_type = HL_SERVER_TYPE_UNKNOWN;
2596 prop->max_dec = NUMBER_OF_DEC;
2598 prop->clk_pll_index = HL_GAUDI2_MME_PLL;
2600 prop->dma_mask = 64;
2602 prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0;
2604 prop->supports_advanced_cpucp_rc = true;
2606 return 0;
2608 free_qprops:
2609 kfree(prop->hw_queues_props);
2610 return rc;
2613 static int gaudi2_pci_bars_map(struct hl_device *hdev)
2615 static const char * const name[] = {"CFG_SRAM", "MSIX", "DRAM"};
2616 bool is_wc[3] = {false, false, true};
2617 int rc;
2619 rc = hl_pci_bars_map(hdev, name, is_wc);
2620 if (rc)
2621 return rc;
2623 hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + (CFG_BASE - STM_FLASH_BASE_ADDR);
2625 return 0;
2628 static u64 gaudi2_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
2630 struct gaudi2_device *gaudi2 = hdev->asic_specific;
2631 struct hl_inbound_pci_region pci_region;
2632 u64 old_addr = addr;
2633 int rc;
2635 if ((gaudi2) && (gaudi2->dram_bar_cur_addr == addr))
2636 return old_addr;
2638 if (hdev->asic_prop.iatu_done_by_fw)
2639 return U64_MAX;
2641 /* Inbound Region 2 - Bar 4 - Point to DRAM */
2642 pci_region.mode = PCI_BAR_MATCH_MODE;
2643 pci_region.bar = DRAM_BAR_ID;
2644 pci_region.addr = addr;
2645 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
2646 if (rc)
2647 return U64_MAX;
2649 if (gaudi2) {
2650 old_addr = gaudi2->dram_bar_cur_addr;
2651 gaudi2->dram_bar_cur_addr = addr;
2654 return old_addr;
2657 static int gaudi2_init_iatu(struct hl_device *hdev)
2659 struct hl_inbound_pci_region inbound_region;
2660 struct hl_outbound_pci_region outbound_region;
2661 u32 bar_addr_low, bar_addr_high;
2662 int rc;
2664 if (hdev->asic_prop.iatu_done_by_fw)
2665 return 0;
2667 /* Temporary inbound Region 0 - Bar 0 - Point to CFG
2668 * We must map this region in BAR match mode in order to
2669 * fetch BAR physical base address
2671 inbound_region.mode = PCI_BAR_MATCH_MODE;
2672 inbound_region.bar = SRAM_CFG_BAR_ID;
2673 /* Base address must be aligned to Bar size which is 256 MB */
2674 inbound_region.addr = STM_FLASH_BASE_ADDR - STM_FLASH_ALIGNED_OFF;
2675 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2676 if (rc)
2677 return rc;
2679 /* Fetch physical BAR address */
2680 bar_addr_high = RREG32(mmPCIE_DBI_BAR1_REG + STM_FLASH_ALIGNED_OFF);
2681 bar_addr_low = RREG32(mmPCIE_DBI_BAR0_REG + STM_FLASH_ALIGNED_OFF) & ~0xF;
2683 hdev->pcie_bar_phys[SRAM_CFG_BAR_ID] = (u64)bar_addr_high << 32 | bar_addr_low;
2685 /* Inbound Region 0 - Bar 0 - Point to CFG */
2686 inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2687 inbound_region.bar = SRAM_CFG_BAR_ID;
2688 inbound_region.offset_in_bar = 0;
2689 inbound_region.addr = STM_FLASH_BASE_ADDR;
2690 inbound_region.size = CFG_REGION_SIZE;
2691 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2692 if (rc)
2693 return rc;
2695 /* Inbound Region 1 - Bar 0 - Point to BAR0_RESERVED + SRAM */
2696 inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2697 inbound_region.bar = SRAM_CFG_BAR_ID;
2698 inbound_region.offset_in_bar = CFG_REGION_SIZE;
2699 inbound_region.addr = BAR0_RSRVD_BASE_ADDR;
2700 inbound_region.size = BAR0_RSRVD_SIZE + SRAM_SIZE;
2701 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
2702 if (rc)
2703 return rc;
2705 /* Inbound Region 2 - Bar 4 - Point to DRAM */
2706 inbound_region.mode = PCI_BAR_MATCH_MODE;
2707 inbound_region.bar = DRAM_BAR_ID;
2708 inbound_region.addr = DRAM_PHYS_BASE;
2709 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
2710 if (rc)
2711 return rc;
2713 /* Outbound Region 0 - Point to Host */
2714 outbound_region.addr = HOST_PHYS_BASE_0;
2715 outbound_region.size = HOST_PHYS_SIZE_0;
2716 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
2718 return rc;
2721 static enum hl_device_hw_state gaudi2_get_hw_state(struct hl_device *hdev)
2723 return RREG32(mmHW_STATE);
2726 static int gaudi2_tpc_binning_init_prop(struct hl_device *hdev)
2728 struct asic_fixed_properties *prop = &hdev->asic_prop;
2731 * check for error condition in which number of binning candidates
2732 * is higher than the maximum supported by the driver
2734 if (hweight64(hdev->tpc_binning) > MAX_CLUSTER_BINNING_FAULTY_TPCS) {
2735 dev_err(hdev->dev, "TPC binning is supported for max of %d faulty TPCs, provided mask 0x%llx\n",
2736 MAX_CLUSTER_BINNING_FAULTY_TPCS,
2737 hdev->tpc_binning);
2738 return -EINVAL;
2741 prop->tpc_binning_mask = hdev->tpc_binning;
2742 prop->tpc_enabled_mask = GAUDI2_TPC_FULL_MASK;
2744 return 0;
2747 static int gaudi2_set_tpc_binning_masks(struct hl_device *hdev)
2749 struct asic_fixed_properties *prop = &hdev->asic_prop;
2750 struct hw_queue_properties *q_props = prop->hw_queues_props;
2751 u64 tpc_binning_mask;
2752 u8 subst_idx = 0;
2753 int i, rc;
2755 rc = gaudi2_tpc_binning_init_prop(hdev);
2756 if (rc)
2757 return rc;
2759 tpc_binning_mask = prop->tpc_binning_mask;
2761 for (i = 0 ; i < MAX_FAULTY_TPCS ; i++) {
2762 u8 subst_seq, binned, qid_base;
2764 if (tpc_binning_mask == 0)
2765 break;
2767 if (subst_idx == 0) {
2768 subst_seq = TPC_ID_DCORE0_TPC6;
2769 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
2770 } else {
2771 subst_seq = TPC_ID_DCORE3_TPC5;
2772 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0;
2776 /* clear bit from mask */
2777 binned = __ffs(tpc_binning_mask);
2779 * Coverity complains about possible out-of-bound access in
2780 * clear_bit
2782 if (binned >= TPC_ID_SIZE) {
2783 dev_err(hdev->dev,
2784 "Invalid binned TPC (binning mask: %llx)\n",
2785 tpc_binning_mask);
2786 return -EINVAL;
2788 clear_bit(binned, (unsigned long *)&tpc_binning_mask);
2790 /* also clear replacing TPC bit from enabled mask */
2791 clear_bit(subst_seq, (unsigned long *)&prop->tpc_enabled_mask);
2793 /* bin substite TPC's Qs */
2794 q_props[qid_base].binned = 1;
2795 q_props[qid_base + 1].binned = 1;
2796 q_props[qid_base + 2].binned = 1;
2797 q_props[qid_base + 3].binned = 1;
2799 subst_idx++;
2802 return 0;
2805 static int gaudi2_set_dec_binning_masks(struct hl_device *hdev)
2807 struct asic_fixed_properties *prop = &hdev->asic_prop;
2808 u8 num_faulty;
2810 num_faulty = hweight32(hdev->decoder_binning);
2813 * check for error condition in which number of binning candidates
2814 * is higher than the maximum supported by the driver
2816 if (num_faulty > MAX_FAULTY_DECODERS) {
2817 dev_err(hdev->dev, "decoder binning is supported for max of single faulty decoder, provided mask 0x%x\n",
2818 hdev->decoder_binning);
2819 return -EINVAL;
2822 prop->decoder_binning_mask = (hdev->decoder_binning & GAUDI2_DECODER_FULL_MASK);
2824 if (prop->decoder_binning_mask)
2825 prop->decoder_enabled_mask = (GAUDI2_DECODER_FULL_MASK & ~BIT(DEC_ID_PCIE_VDEC1));
2826 else
2827 prop->decoder_enabled_mask = GAUDI2_DECODER_FULL_MASK;
2829 return 0;
2832 static void gaudi2_set_dram_binning_masks(struct hl_device *hdev)
2834 struct asic_fixed_properties *prop = &hdev->asic_prop;
2836 /* check if we should override default binning */
2837 if (!hdev->dram_binning) {
2838 prop->dram_binning_mask = 0;
2839 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK;
2840 return;
2843 /* set DRAM binning constraints */
2844 prop->faulty_dram_cluster_map |= hdev->dram_binning;
2845 prop->dram_binning_mask = hdev->dram_binning;
2846 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK & ~BIT(HBM_ID5);
2849 static int gaudi2_set_edma_binning_masks(struct hl_device *hdev)
2851 struct asic_fixed_properties *prop = &hdev->asic_prop;
2852 struct hw_queue_properties *q_props;
2853 u8 seq, num_faulty;
2855 num_faulty = hweight32(hdev->edma_binning);
2858 * check for error condition in which number of binning candidates
2859 * is higher than the maximum supported by the driver
2861 if (num_faulty > MAX_FAULTY_EDMAS) {
2862 dev_err(hdev->dev,
2863 "EDMA binning is supported for max of single faulty EDMA, provided mask 0x%x\n",
2864 hdev->edma_binning);
2865 return -EINVAL;
2868 if (!hdev->edma_binning) {
2869 prop->edma_binning_mask = 0;
2870 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK;
2871 return 0;
2874 seq = __ffs((unsigned long)hdev->edma_binning);
2876 /* set binning constraints */
2877 prop->faulty_dram_cluster_map |= BIT(edma_to_hbm_cluster[seq]);
2878 prop->edma_binning_mask = hdev->edma_binning;
2879 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK & ~BIT(EDMA_ID_DCORE3_INSTANCE1);
2881 /* bin substitute EDMA's queue */
2882 q_props = prop->hw_queues_props;
2883 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0].binned = 1;
2884 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1].binned = 1;
2885 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2].binned = 1;
2886 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3].binned = 1;
2888 return 0;
2891 static int gaudi2_set_xbar_edge_enable_mask(struct hl_device *hdev, u32 xbar_edge_iso_mask)
2893 struct asic_fixed_properties *prop = &hdev->asic_prop;
2894 u8 num_faulty, seq;
2896 /* check if we should override default binning */
2897 if (!xbar_edge_iso_mask) {
2898 prop->xbar_edge_enabled_mask = GAUDI2_XBAR_EDGE_FULL_MASK;
2899 return 0;
2903 * note that it can be set to value other than 0 only after cpucp packet (i.e.
2904 * only the FW can set a redundancy value). for user it'll always be 0.
2906 num_faulty = hweight32(xbar_edge_iso_mask);
2909 * check for error condition in which number of binning candidates
2910 * is higher than the maximum supported by the driver
2912 if (num_faulty > MAX_FAULTY_XBARS) {
2913 dev_err(hdev->dev, "we cannot have more than %d faulty XBAR EDGE\n",
2914 MAX_FAULTY_XBARS);
2915 return -EINVAL;
2918 seq = __ffs((unsigned long)xbar_edge_iso_mask);
2920 /* set binning constraints */
2921 prop->faulty_dram_cluster_map |= BIT(xbar_edge_to_hbm_cluster[seq]);
2922 prop->xbar_edge_enabled_mask = (~xbar_edge_iso_mask) & GAUDI2_XBAR_EDGE_FULL_MASK;
2924 return 0;
2927 static int gaudi2_set_cluster_binning_masks_common(struct hl_device *hdev, u8 xbar_edge_iso_mask)
2929 int rc;
2932 * mark all clusters as good, each component will "fail" cluster
2933 * based on eFuse/user values.
2934 * If more than single cluster is faulty- the chip is unusable
2936 hdev->asic_prop.faulty_dram_cluster_map = 0;
2938 gaudi2_set_dram_binning_masks(hdev);
2940 rc = gaudi2_set_edma_binning_masks(hdev);
2941 if (rc)
2942 return rc;
2944 rc = gaudi2_set_xbar_edge_enable_mask(hdev, xbar_edge_iso_mask);
2945 if (rc)
2946 return rc;
2949 /* always initially set to full mask */
2950 hdev->asic_prop.hmmu_hif_enabled_mask = GAUDI2_HIF_HMMU_FULL_MASK;
2952 return 0;
2955 static int gaudi2_set_cluster_binning_masks(struct hl_device *hdev)
2957 struct asic_fixed_properties *prop = &hdev->asic_prop;
2958 int rc;
2960 rc = gaudi2_set_cluster_binning_masks_common(hdev, prop->cpucp_info.xbar_binning_mask);
2961 if (rc)
2962 return rc;
2964 /* if we have DRAM binning reported by FW we should perform cluster config */
2965 if (prop->faulty_dram_cluster_map) {
2966 u8 cluster_seq = __ffs((unsigned long)prop->faulty_dram_cluster_map);
2968 prop->hmmu_hif_enabled_mask = cluster_hmmu_hif_enabled_mask[cluster_seq];
2971 return 0;
2974 static int gaudi2_set_binning_masks(struct hl_device *hdev)
2976 int rc;
2978 rc = gaudi2_set_cluster_binning_masks(hdev);
2979 if (rc)
2980 return rc;
2982 rc = gaudi2_set_tpc_binning_masks(hdev);
2983 if (rc)
2984 return rc;
2986 rc = gaudi2_set_dec_binning_masks(hdev);
2987 if (rc)
2988 return rc;
2990 return 0;
2993 static int gaudi2_cpucp_info_get(struct hl_device *hdev)
2995 struct gaudi2_device *gaudi2 = hdev->asic_specific;
2996 struct asic_fixed_properties *prop = &hdev->asic_prop;
2997 long max_power;
2998 u64 dram_size;
2999 int rc;
3001 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
3002 return 0;
3004 /* No point of asking this information again when not doing hard reset, as the device
3005 * CPU hasn't been reset
3007 if (hdev->reset_info.in_compute_reset)
3008 return 0;
3010 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
3011 mmCPU_BOOT_ERR1);
3012 if (rc)
3013 return rc;
3015 dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
3016 if (dram_size) {
3017 /* we can have wither 5 or 6 HBMs. other values are invalid */
3019 if ((dram_size != ((GAUDI2_HBM_NUM - 1) * SZ_16G)) &&
3020 (dram_size != (GAUDI2_HBM_NUM * SZ_16G))) {
3021 dev_err(hdev->dev,
3022 "F/W reported invalid DRAM size %llu. Trying to use default size %llu\n",
3023 dram_size, prop->dram_size);
3024 dram_size = prop->dram_size;
3027 prop->dram_size = dram_size;
3028 prop->dram_end_address = prop->dram_base_address + dram_size;
3031 if (!strlen(prop->cpucp_info.card_name))
3032 strscpy_pad(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME,
3033 CARD_NAME_MAX_LEN);
3035 /* Overwrite binning masks with the actual binning values from F/W */
3036 hdev->dram_binning = prop->cpucp_info.dram_binning_mask;
3037 hdev->edma_binning = prop->cpucp_info.edma_binning_mask;
3038 hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask);
3039 hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask));
3041 dev_dbg(hdev->dev, "Read binning masks: tpc: 0x%llx, dram: 0x%llx, edma: 0x%x, dec: 0x%x\n",
3042 hdev->tpc_binning, hdev->dram_binning, hdev->edma_binning,
3043 hdev->decoder_binning);
3046 * at this point the DRAM parameters need to be updated according to data obtained
3047 * from the FW
3049 rc = hdev->asic_funcs->set_dram_properties(hdev);
3050 if (rc)
3051 return rc;
3053 rc = hdev->asic_funcs->set_binning_masks(hdev);
3054 if (rc)
3055 return rc;
3057 max_power = hl_fw_get_max_power(hdev);
3058 if (max_power < 0)
3059 return max_power;
3061 prop->max_power_default = (u64) max_power;
3063 return 0;
3066 static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev)
3068 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3069 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS];
3070 int rc;
3072 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
3073 return 0;
3075 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI2_CPU_PLL, pll_freq_arr);
3076 if (rc)
3077 return rc;
3079 hdev->asic_prop.psoc_timestamp_frequency = pll_freq_arr[3];
3081 return 0;
3084 static int gaudi2_mmu_clear_pgt_range(struct hl_device *hdev)
3086 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3087 struct asic_fixed_properties *prop = &hdev->asic_prop;
3088 int rc;
3090 if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
3091 return 0;
3093 if (prop->dmmu.host_resident)
3094 return 0;
3096 rc = gaudi2_memset_device_memory(hdev, prop->mmu_pgt_addr, prop->dmmu.pgt_size, 0);
3097 if (rc)
3098 dev_err(hdev->dev, "Failed to clear mmu pgt");
3100 return rc;
3103 static int gaudi2_early_init(struct hl_device *hdev)
3105 struct asic_fixed_properties *prop = &hdev->asic_prop;
3106 struct pci_dev *pdev = hdev->pdev;
3107 resource_size_t pci_bar_size;
3108 int rc;
3110 rc = gaudi2_set_fixed_properties(hdev);
3111 if (rc)
3112 return rc;
3114 /* Check BAR sizes */
3115 pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID);
3117 if (pci_bar_size != CFG_BAR_SIZE) {
3118 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
3119 SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
3120 rc = -ENODEV;
3121 goto free_queue_props;
3124 pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID);
3125 if (pci_bar_size != MSIX_BAR_SIZE) {
3126 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
3127 MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE);
3128 rc = -ENODEV;
3129 goto free_queue_props;
3132 prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID);
3133 hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID);
3136 * Only in pldm driver config iATU
3138 if (hdev->pldm)
3139 hdev->asic_prop.iatu_done_by_fw = false;
3140 else
3141 hdev->asic_prop.iatu_done_by_fw = true;
3143 rc = hl_pci_init(hdev);
3144 if (rc)
3145 goto free_queue_props;
3147 /* Before continuing in the initialization, we need to read the preboot
3148 * version to determine whether we run with a security-enabled firmware
3150 rc = hl_fw_read_preboot_status(hdev);
3151 if (rc) {
3152 if (hdev->reset_on_preboot_fail)
3153 /* we are already on failure flow, so don't check if hw_fini fails. */
3154 hdev->asic_funcs->hw_fini(hdev, true, false);
3155 goto pci_fini;
3158 if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
3159 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
3160 rc = hdev->asic_funcs->hw_fini(hdev, true, false);
3161 if (rc) {
3162 dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
3163 goto pci_fini;
3167 return 0;
3169 pci_fini:
3170 hl_pci_fini(hdev);
3171 free_queue_props:
3172 kfree(hdev->asic_prop.hw_queues_props);
3173 return rc;
3176 static int gaudi2_early_fini(struct hl_device *hdev)
3178 kfree(hdev->asic_prop.hw_queues_props);
3179 hl_pci_fini(hdev);
3181 return 0;
3184 static bool gaudi2_is_arc_nic_owned(u64 arc_id)
3186 switch (arc_id) {
3187 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
3188 return true;
3189 default:
3190 return false;
3194 static bool gaudi2_is_arc_tpc_owned(u64 arc_id)
3196 switch (arc_id) {
3197 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
3198 return true;
3199 default:
3200 return false;
3204 static void gaudi2_init_arcs(struct hl_device *hdev)
3206 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3207 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3208 u64 arc_id;
3209 u32 i;
3211 for (i = CPU_ID_SCHED_ARC0 ; i <= CPU_ID_SCHED_ARC3 ; i++) {
3212 if (gaudi2_is_arc_enabled(hdev, i))
3213 continue;
3215 gaudi2_set_arc_id_cap(hdev, i);
3218 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
3219 if (!gaudi2_is_queue_enabled(hdev, i))
3220 continue;
3222 arc_id = gaudi2_queue_id_to_arc_id[i];
3223 if (gaudi2_is_arc_enabled(hdev, arc_id))
3224 continue;
3226 if (gaudi2_is_arc_nic_owned(arc_id) &&
3227 !(hdev->nic_ports_mask & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)))
3228 continue;
3230 if (gaudi2_is_arc_tpc_owned(arc_id) && !(gaudi2->tpc_hw_cap_initialized &
3231 BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)))
3232 continue;
3234 gaudi2_set_arc_id_cap(hdev, arc_id);
3237 /* Fetch ARC scratchpad address */
3238 hdev->asic_prop.engine_core_interrupt_reg_addr =
3239 CFG_BASE + le32_to_cpu(dyn_regs->eng_arc_irq_ctrl);
3242 static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id)
3244 u32 reg_base, reg_val;
3245 int rc;
3247 switch (cpu_id) {
3248 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC3:
3249 /* Each ARC scheduler has 2 consecutive DCCM blocks */
3250 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3251 ARC_DCCM_BLOCK_SIZE * 2, true);
3252 if (rc)
3253 return rc;
3254 break;
3255 case CPU_ID_SCHED_ARC4:
3256 case CPU_ID_SCHED_ARC5:
3257 case CPU_ID_MME_QMAN_ARC0:
3258 case CPU_ID_MME_QMAN_ARC1:
3259 reg_base = gaudi2_arc_blocks_bases[cpu_id];
3261 /* Scrub lower DCCM block */
3262 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3263 ARC_DCCM_BLOCK_SIZE, true);
3264 if (rc)
3265 return rc;
3267 /* Switch to upper DCCM block */
3268 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 1);
3269 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
3271 /* Scrub upper DCCM block */
3272 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3273 ARC_DCCM_BLOCK_SIZE, true);
3274 if (rc)
3275 return rc;
3277 /* Switch to lower DCCM block */
3278 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 0);
3279 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
3280 break;
3281 default:
3282 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3283 ARC_DCCM_BLOCK_SIZE, true);
3284 if (rc)
3285 return rc;
3288 return 0;
3291 static int gaudi2_scrub_arcs_dccm(struct hl_device *hdev)
3293 u16 arc_id;
3294 int rc;
3296 for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) {
3297 if (!gaudi2_is_arc_enabled(hdev, arc_id))
3298 continue;
3300 rc = gaudi2_scrub_arc_dccm(hdev, arc_id);
3301 if (rc)
3302 return rc;
3305 return 0;
3308 static int gaudi2_late_init(struct hl_device *hdev)
3310 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3311 int rc;
3313 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS,
3314 gaudi2->virt_msix_db_dma_addr);
3315 if (rc)
3316 return rc;
3318 rc = gaudi2_fetch_psoc_frequency(hdev);
3319 if (rc) {
3320 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
3321 goto disable_pci_access;
3324 rc = gaudi2_mmu_clear_pgt_range(hdev);
3325 if (rc) {
3326 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
3327 goto disable_pci_access;
3330 gaudi2_init_arcs(hdev);
3332 rc = gaudi2_scrub_arcs_dccm(hdev);
3333 if (rc) {
3334 dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
3335 goto disable_pci_access;
3338 gaudi2_init_security(hdev);
3340 return 0;
3342 disable_pci_access:
3343 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
3345 return rc;
3348 static void gaudi2_late_fini(struct hl_device *hdev)
3350 hl_hwmon_release_resources(hdev);
3353 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx)
3355 struct user_mapped_block *blocks = gaudi2->mapped_blocks;
3357 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3358 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3359 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3360 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3361 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3362 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3363 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3364 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3365 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmPCIE_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3366 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx], mmPCIE_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3369 static void gaudi2_user_mapped_blocks_init(struct hl_device *hdev)
3371 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3372 struct user_mapped_block *blocks = gaudi2->mapped_blocks;
3373 u32 block_size, umr_start_idx, num_umr_blocks;
3374 int i;
3376 for (i = 0 ; i < NUM_ARC_CPUS ; i++) {
3377 if (i >= CPU_ID_SCHED_ARC0 && i <= CPU_ID_SCHED_ARC3)
3378 block_size = ARC_DCCM_BLOCK_SIZE * 2;
3379 else
3380 block_size = ARC_DCCM_BLOCK_SIZE;
3382 blocks[i].address = gaudi2_arc_dccm_bases[i];
3383 blocks[i].size = block_size;
3386 blocks[NUM_ARC_CPUS].address = mmARC_FARM_ARC0_ACP_ENG_BASE;
3387 blocks[NUM_ARC_CPUS].size = HL_BLOCK_SIZE;
3389 blocks[NUM_ARC_CPUS + 1].address = mmARC_FARM_ARC1_ACP_ENG_BASE;
3390 blocks[NUM_ARC_CPUS + 1].size = HL_BLOCK_SIZE;
3392 blocks[NUM_ARC_CPUS + 2].address = mmARC_FARM_ARC2_ACP_ENG_BASE;
3393 blocks[NUM_ARC_CPUS + 2].size = HL_BLOCK_SIZE;
3395 blocks[NUM_ARC_CPUS + 3].address = mmARC_FARM_ARC3_ACP_ENG_BASE;
3396 blocks[NUM_ARC_CPUS + 3].size = HL_BLOCK_SIZE;
3398 blocks[NUM_ARC_CPUS + 4].address = mmDCORE0_MME_QM_ARC_ACP_ENG_BASE;
3399 blocks[NUM_ARC_CPUS + 4].size = HL_BLOCK_SIZE;
3401 blocks[NUM_ARC_CPUS + 5].address = mmDCORE1_MME_QM_ARC_ACP_ENG_BASE;
3402 blocks[NUM_ARC_CPUS + 5].size = HL_BLOCK_SIZE;
3404 blocks[NUM_ARC_CPUS + 6].address = mmDCORE2_MME_QM_ARC_ACP_ENG_BASE;
3405 blocks[NUM_ARC_CPUS + 6].size = HL_BLOCK_SIZE;
3407 blocks[NUM_ARC_CPUS + 7].address = mmDCORE3_MME_QM_ARC_ACP_ENG_BASE;
3408 blocks[NUM_ARC_CPUS + 7].size = HL_BLOCK_SIZE;
3410 umr_start_idx = NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS;
3411 num_umr_blocks = NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS;
3412 for (i = 0 ; i < num_umr_blocks ; i++) {
3413 u8 nic_id, umr_block_id;
3415 nic_id = i / NUM_OF_USER_NIC_UMR_BLOCKS;
3416 umr_block_id = i % NUM_OF_USER_NIC_UMR_BLOCKS;
3418 blocks[umr_start_idx + i].address =
3419 mmNIC0_UMR0_0_UNSECURE_DOORBELL0_BASE +
3420 (nic_id / NIC_NUMBER_OF_QM_PER_MACRO) * NIC_OFFSET +
3421 (nic_id % NIC_NUMBER_OF_QM_PER_MACRO) * NIC_QM_OFFSET +
3422 umr_block_id * NIC_UMR_OFFSET;
3423 blocks[umr_start_idx + i].size = HL_BLOCK_SIZE;
3426 /* Expose decoder HW configuration block to user */
3427 gaudi2_user_mapped_dec_init(gaudi2, USR_MAPPED_BLK_DEC_START_IDX);
3429 for (i = 1; i < NUM_OF_DCORES; ++i) {
3430 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].size = SM_OBJS_BLOCK_SIZE;
3431 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].size = HL_BLOCK_SIZE;
3433 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].address =
3434 mmDCORE0_SYNC_MNGR_OBJS_BASE + i * DCORE_OFFSET;
3436 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].address =
3437 mmDCORE0_SYNC_MNGR_GLBL_BASE + i * DCORE_OFFSET;
3441 static int gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
3443 dma_addr_t dma_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
3444 void *virt_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {};
3445 int i, j, rc = 0;
3447 /* The device ARC works with 32-bits addresses, and because there is a single HW register
3448 * that holds the extension bits (49..28), these bits must be identical in all the allocated
3449 * range.
3452 for (i = 0 ; i < GAUDI2_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
3453 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
3454 &dma_addr_arr[i], GFP_KERNEL | __GFP_ZERO);
3455 if (!virt_addr_arr[i]) {
3456 rc = -ENOMEM;
3457 goto free_dma_mem_arr;
3460 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
3461 if (GAUDI2_ARC_PCI_MSB_ADDR(dma_addr_arr[i]) == GAUDI2_ARC_PCI_MSB_ADDR(end_addr))
3462 break;
3465 if (i == GAUDI2_ALLOC_CPU_MEM_RETRY_CNT) {
3466 dev_err(hdev->dev,
3467 "MSB of ARC accessible DMA memory are not identical in all range\n");
3468 rc = -EFAULT;
3469 goto free_dma_mem_arr;
3472 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
3473 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
3475 free_dma_mem_arr:
3476 for (j = 0 ; j < i ; j++)
3477 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
3478 dma_addr_arr[j]);
3480 return rc;
3483 static void gaudi2_set_pci_memory_regions(struct hl_device *hdev)
3485 struct asic_fixed_properties *prop = &hdev->asic_prop;
3486 struct pci_mem_region *region;
3488 /* CFG */
3489 region = &hdev->pci_mem_region[PCI_REGION_CFG];
3490 region->region_base = CFG_BASE;
3491 region->region_size = CFG_SIZE;
3492 region->offset_in_bar = CFG_BASE - STM_FLASH_BASE_ADDR;
3493 region->bar_size = CFG_BAR_SIZE;
3494 region->bar_id = SRAM_CFG_BAR_ID;
3495 region->used = 1;
3497 /* SRAM */
3498 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
3499 region->region_base = SRAM_BASE_ADDR;
3500 region->region_size = SRAM_SIZE;
3501 region->offset_in_bar = CFG_REGION_SIZE + BAR0_RSRVD_SIZE;
3502 region->bar_size = CFG_BAR_SIZE;
3503 region->bar_id = SRAM_CFG_BAR_ID;
3504 region->used = 1;
3506 /* DRAM */
3507 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
3508 region->region_base = DRAM_PHYS_BASE;
3509 region->region_size = hdev->asic_prop.dram_size;
3510 region->offset_in_bar = 0;
3511 region->bar_size = prop->dram_pci_bar_size;
3512 region->bar_id = DRAM_BAR_ID;
3513 region->used = 1;
3516 static void gaudi2_user_interrupt_setup(struct hl_device *hdev)
3518 struct asic_fixed_properties *prop = &hdev->asic_prop;
3519 int i, j, k;
3521 /* Initialize TPC interrupt */
3522 HL_USR_INTR_STRUCT_INIT(hdev->tpc_interrupt, hdev, 0, HL_USR_INTERRUPT_TPC);
3524 /* Initialize unexpected error interrupt */
3525 HL_USR_INTR_STRUCT_INIT(hdev->unexpected_error_interrupt, hdev, 0,
3526 HL_USR_INTERRUPT_UNEXPECTED);
3528 /* Initialize common user CQ interrupt */
3529 HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev,
3530 HL_COMMON_USER_CQ_INTERRUPT_ID, HL_USR_INTERRUPT_CQ);
3532 /* Initialize common decoder interrupt */
3533 HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev,
3534 HL_COMMON_DEC_INTERRUPT_ID, HL_USR_INTERRUPT_DECODER);
3536 /* User interrupts structure holds both decoder and user interrupts from various engines.
3537 * We first initialize the decoder interrupts and then we add the user interrupts.
3538 * The only limitation is that the last decoder interrupt id must be smaller
3539 * then GAUDI2_IRQ_NUM_USER_FIRST. This is checked at compilation time.
3542 /* Initialize decoder interrupts, expose only normal interrupts,
3543 * error interrupts to be handled by driver
3545 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, j = 0 ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_NRM;
3546 i += 2, j++)
3547 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i,
3548 HL_USR_INTERRUPT_DECODER);
3550 for (i = GAUDI2_IRQ_NUM_USER_FIRST, k = 0 ; k < prop->user_interrupt_count; i++, j++, k++)
3551 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, HL_USR_INTERRUPT_CQ);
3554 static inline int gaudi2_get_non_zero_random_int(void)
3556 int rand = get_random_u32();
3558 return rand ? rand : 1;
3561 static void gaudi2_special_blocks_free(struct hl_device *hdev)
3563 struct asic_fixed_properties *prop = &hdev->asic_prop;
3564 struct hl_skip_blocks_cfg *skip_special_blocks_cfg =
3565 &prop->skip_special_blocks_cfg;
3567 kfree(prop->special_blocks);
3568 kfree(skip_special_blocks_cfg->block_types);
3569 kfree(skip_special_blocks_cfg->block_ranges);
3572 static void gaudi2_special_blocks_iterator_free(struct hl_device *hdev)
3574 gaudi2_special_blocks_free(hdev);
3577 static bool gaudi2_special_block_skip(struct hl_device *hdev,
3578 struct hl_special_blocks_cfg *special_blocks_cfg,
3579 u32 blk_idx, u32 major, u32 minor, u32 sub_minor)
3581 return false;
3584 static int gaudi2_special_blocks_config(struct hl_device *hdev)
3586 struct asic_fixed_properties *prop = &hdev->asic_prop;
3587 int i, rc;
3589 /* Configure Special blocks */
3590 prop->glbl_err_max_cause_num = GAUDI2_GLBL_ERR_MAX_CAUSE_NUM;
3591 prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks);
3592 prop->special_blocks = kmalloc_array(prop->num_of_special_blocks,
3593 sizeof(*prop->special_blocks), GFP_KERNEL);
3594 if (!prop->special_blocks)
3595 return -ENOMEM;
3597 for (i = 0 ; i < prop->num_of_special_blocks ; i++)
3598 memcpy(&prop->special_blocks[i], &gaudi2_special_blocks[i],
3599 sizeof(*prop->special_blocks));
3601 /* Configure when to skip Special blocks */
3602 memset(&prop->skip_special_blocks_cfg, 0, sizeof(prop->skip_special_blocks_cfg));
3603 prop->skip_special_blocks_cfg.skip_block_hook = gaudi2_special_block_skip;
3605 if (ARRAY_SIZE(gaudi2_iterator_skip_block_types)) {
3606 prop->skip_special_blocks_cfg.block_types =
3607 kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_types),
3608 sizeof(gaudi2_iterator_skip_block_types[0]), GFP_KERNEL);
3609 if (!prop->skip_special_blocks_cfg.block_types) {
3610 rc = -ENOMEM;
3611 goto free_special_blocks;
3614 memcpy(prop->skip_special_blocks_cfg.block_types, gaudi2_iterator_skip_block_types,
3615 sizeof(gaudi2_iterator_skip_block_types));
3617 prop->skip_special_blocks_cfg.block_types_len =
3618 ARRAY_SIZE(gaudi2_iterator_skip_block_types);
3621 if (ARRAY_SIZE(gaudi2_iterator_skip_block_ranges)) {
3622 prop->skip_special_blocks_cfg.block_ranges =
3623 kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_ranges),
3624 sizeof(gaudi2_iterator_skip_block_ranges[0]), GFP_KERNEL);
3625 if (!prop->skip_special_blocks_cfg.block_ranges) {
3626 rc = -ENOMEM;
3627 goto free_skip_special_blocks_types;
3630 for (i = 0 ; i < ARRAY_SIZE(gaudi2_iterator_skip_block_ranges) ; i++)
3631 memcpy(&prop->skip_special_blocks_cfg.block_ranges[i],
3632 &gaudi2_iterator_skip_block_ranges[i],
3633 sizeof(struct range));
3635 prop->skip_special_blocks_cfg.block_ranges_len =
3636 ARRAY_SIZE(gaudi2_iterator_skip_block_ranges);
3639 return 0;
3641 free_skip_special_blocks_types:
3642 kfree(prop->skip_special_blocks_cfg.block_types);
3643 free_special_blocks:
3644 kfree(prop->special_blocks);
3646 return rc;
3649 static int gaudi2_special_blocks_iterator_config(struct hl_device *hdev)
3651 return gaudi2_special_blocks_config(hdev);
3654 static void gaudi2_test_queues_msgs_free(struct hl_device *hdev)
3656 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3657 struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info;
3658 int i;
3660 for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) {
3661 /* bail-out if this is an allocation failure point */
3662 if (!msg_info[i].kern_addr)
3663 break;
3665 hl_asic_dma_pool_free(hdev, msg_info[i].kern_addr, msg_info[i].dma_addr);
3666 msg_info[i].kern_addr = NULL;
3670 static int gaudi2_test_queues_msgs_alloc(struct hl_device *hdev)
3672 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3673 struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info;
3674 int i, rc;
3676 /* allocate a message-short buf for each Q we intend to test */
3677 for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) {
3678 msg_info[i].kern_addr =
3679 (void *)hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_short),
3680 GFP_KERNEL, &msg_info[i].dma_addr);
3681 if (!msg_info[i].kern_addr) {
3682 dev_err(hdev->dev,
3683 "Failed to allocate dma memory for H/W queue %d testing\n", i);
3684 rc = -ENOMEM;
3685 goto err_exit;
3689 return 0;
3691 err_exit:
3692 gaudi2_test_queues_msgs_free(hdev);
3693 return rc;
3696 static int gaudi2_sw_init(struct hl_device *hdev)
3698 struct asic_fixed_properties *prop = &hdev->asic_prop;
3699 struct gaudi2_device *gaudi2;
3700 int i, rc;
3702 /* Allocate device structure */
3703 gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL);
3704 if (!gaudi2)
3705 return -ENOMEM;
3707 for (i = 0 ; i < ARRAY_SIZE(gaudi2_irq_map_table) ; i++) {
3708 if (gaudi2_irq_map_table[i].msg || !gaudi2_irq_map_table[i].valid)
3709 continue;
3711 if (gaudi2->num_of_valid_hw_events == GAUDI2_EVENT_SIZE) {
3712 dev_err(hdev->dev, "H/W events array exceeds the limit of %u events\n",
3713 GAUDI2_EVENT_SIZE);
3714 rc = -EINVAL;
3715 goto free_gaudi2_device;
3718 gaudi2->hw_events[gaudi2->num_of_valid_hw_events++] = gaudi2_irq_map_table[i].fc_id;
3721 for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++)
3722 gaudi2->lfsr_rand_seeds[i] = gaudi2_get_non_zero_random_int();
3724 gaudi2->cpucp_info_get = gaudi2_cpucp_info_get;
3726 hdev->asic_specific = gaudi2;
3728 /* Create DMA pool for small allocations.
3729 * Use DEVICE_CACHE_LINE_SIZE for alignment since the NIC memory-mapped
3730 * PI/CI registers allocated from this pool have this restriction
3732 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev,
3733 GAUDI2_DMA_POOL_BLK_SIZE, DEVICE_CACHE_LINE_SIZE, 0);
3734 if (!hdev->dma_pool) {
3735 dev_err(hdev->dev, "failed to create DMA pool\n");
3736 rc = -ENOMEM;
3737 goto free_gaudi2_device;
3740 rc = gaudi2_alloc_cpu_accessible_dma_mem(hdev);
3741 if (rc)
3742 goto free_dma_pool;
3744 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
3745 if (!hdev->cpu_accessible_dma_pool) {
3746 dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n");
3747 rc = -ENOMEM;
3748 goto free_cpu_dma_mem;
3751 rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem,
3752 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
3753 if (rc) {
3754 dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n");
3755 rc = -EFAULT;
3756 goto free_cpu_accessible_dma_pool;
3759 gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size,
3760 &gaudi2->virt_msix_db_dma_addr);
3761 if (!gaudi2->virt_msix_db_cpu_addr) {
3762 dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n");
3763 rc = -ENOMEM;
3764 goto free_cpu_accessible_dma_pool;
3767 spin_lock_init(&gaudi2->hw_queues_lock);
3769 gaudi2->scratchpad_bus_address = prop->mmu_pgt_addr + HMMU_PAGE_TABLES_SIZE + EDMA_PQS_SIZE;
3771 gaudi2_user_mapped_blocks_init(hdev);
3773 /* Initialize user interrupts */
3774 gaudi2_user_interrupt_setup(hdev);
3776 hdev->supports_coresight = true;
3777 hdev->supports_sync_stream = true;
3778 hdev->supports_cb_mapping = true;
3779 hdev->supports_wait_for_multi_cs = false;
3781 prop->supports_compute_reset = true;
3783 /* Event queue sanity check added in FW version 1.11 */
3784 if (hl_fw_version_cmp(hdev, 1, 11, 0) < 0)
3785 hdev->event_queue.check_eqe_index = false;
3786 else
3787 hdev->event_queue.check_eqe_index = true;
3789 hdev->asic_funcs->set_pci_memory_regions(hdev);
3791 rc = gaudi2_special_blocks_iterator_config(hdev);
3792 if (rc)
3793 goto free_virt_msix_db_mem;
3795 rc = gaudi2_test_queues_msgs_alloc(hdev);
3796 if (rc)
3797 goto special_blocks_free;
3799 hdev->heartbeat_debug_info.cpu_queue_id = GAUDI2_QUEUE_ID_CPU_PQ;
3801 return 0;
3803 special_blocks_free:
3804 gaudi2_special_blocks_iterator_free(hdev);
3805 free_virt_msix_db_mem:
3806 hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3807 free_cpu_accessible_dma_pool:
3808 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3809 free_cpu_dma_mem:
3810 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3811 hdev->cpu_accessible_dma_address);
3812 free_dma_pool:
3813 dma_pool_destroy(hdev->dma_pool);
3814 free_gaudi2_device:
3815 kfree(gaudi2);
3816 return rc;
3819 static int gaudi2_sw_fini(struct hl_device *hdev)
3821 struct asic_fixed_properties *prop = &hdev->asic_prop;
3822 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3824 gaudi2_test_queues_msgs_free(hdev);
3826 gaudi2_special_blocks_iterator_free(hdev);
3828 hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3830 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3832 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3833 hdev->cpu_accessible_dma_address);
3835 dma_pool_destroy(hdev->dma_pool);
3837 kfree(gaudi2);
3839 return 0;
3842 static void gaudi2_stop_qman_common(struct hl_device *hdev, u32 reg_base)
3844 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_STOP |
3845 QM_GLBL_CFG1_CQF_STOP |
3846 QM_GLBL_CFG1_CP_STOP);
3848 /* stop also the ARC */
3849 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_STOP);
3852 static void gaudi2_flush_qman_common(struct hl_device *hdev, u32 reg_base)
3854 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_FLUSH |
3855 QM_GLBL_CFG1_CQF_FLUSH |
3856 QM_GLBL_CFG1_CP_FLUSH);
3859 static void gaudi2_flush_qman_arc_common(struct hl_device *hdev, u32 reg_base)
3861 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_FLUSH);
3865 * gaudi2_clear_qm_fence_counters_common - clear QM's fence counters
3867 * @hdev: pointer to the habanalabs device structure
3868 * @queue_id: queue to clear fence counters to
3869 * @skip_fence: if true set maximum fence value to all fence counters to avoid
3870 * getting stuck on any fence value. otherwise set all fence
3871 * counters to 0 (standard clear of fence counters)
3873 static void gaudi2_clear_qm_fence_counters_common(struct hl_device *hdev, u32 queue_id,
3874 bool skip_fence)
3876 u32 size, reg_base;
3877 u32 addr, val;
3879 reg_base = gaudi2_qm_blocks_bases[queue_id];
3881 addr = reg_base + QM_CP_FENCE0_CNT_0_OFFSET;
3882 size = mmPDMA0_QM_CP_BARRIER_CFG - mmPDMA0_QM_CP_FENCE0_CNT_0;
3885 * in case we want to make sure that QM that is stuck on a fence will
3886 * be released we should set the fence counter to a higher value that
3887 * the value the QM waiting for. to comply with any fence counter of
3888 * any value we set maximum fence value to all counters
3890 val = skip_fence ? U32_MAX : 0;
3891 gaudi2_memset_device_lbw(hdev, addr, size, val);
3894 static void gaudi2_qman_manual_flush_common(struct hl_device *hdev, u32 queue_id)
3896 u32 reg_base = gaudi2_qm_blocks_bases[queue_id];
3898 gaudi2_clear_qm_fence_counters_common(hdev, queue_id, true);
3899 gaudi2_flush_qman_common(hdev, reg_base);
3900 gaudi2_flush_qman_arc_common(hdev, reg_base);
3903 static void gaudi2_stop_dma_qmans(struct hl_device *hdev)
3905 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3906 int dcore, inst;
3908 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3909 goto stop_edma_qmans;
3911 /* Stop CPs of PDMA QMANs */
3912 gaudi2_stop_qman_common(hdev, mmPDMA0_QM_BASE);
3913 gaudi2_stop_qman_common(hdev, mmPDMA1_QM_BASE);
3915 stop_edma_qmans:
3916 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3917 return;
3919 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3920 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3921 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3922 u32 qm_base;
3924 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3925 continue;
3927 qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3928 inst * DCORE_EDMA_OFFSET;
3930 /* Stop CPs of EDMA QMANs */
3931 gaudi2_stop_qman_common(hdev, qm_base);
3936 static void gaudi2_stop_mme_qmans(struct hl_device *hdev)
3938 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3939 u32 offset, i;
3941 offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3943 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
3944 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)))
3945 continue;
3947 gaudi2_stop_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3951 static void gaudi2_stop_tpc_qmans(struct hl_device *hdev)
3953 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3954 u32 reg_base;
3955 int i;
3957 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3958 return;
3960 for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3961 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3962 continue;
3964 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3965 gaudi2_stop_qman_common(hdev, reg_base);
3969 static void gaudi2_stop_rot_qmans(struct hl_device *hdev)
3971 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3972 u32 reg_base;
3973 int i;
3975 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3976 return;
3978 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3979 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3980 continue;
3982 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3983 gaudi2_stop_qman_common(hdev, reg_base);
3987 static void gaudi2_stop_nic_qmans(struct hl_device *hdev)
3989 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3990 u32 reg_base, queue_id;
3991 int i;
3993 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3994 return;
3996 queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3998 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3999 if (!(hdev->nic_ports_mask & BIT(i)))
4000 continue;
4002 reg_base = gaudi2_qm_blocks_bases[queue_id];
4003 gaudi2_stop_qman_common(hdev, reg_base);
4007 static void gaudi2_stall_dma_common(struct hl_device *hdev, u32 reg_base)
4009 u32 reg_val;
4011 reg_val = FIELD_PREP(PDMA0_CORE_CFG_1_HALT_MASK, 0x1);
4012 WREG32(reg_base + DMA_CORE_CFG_1_OFFSET, reg_val);
4015 static void gaudi2_dma_stall(struct hl_device *hdev)
4017 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4018 int dcore, inst;
4020 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
4021 goto stall_edma;
4023 gaudi2_stall_dma_common(hdev, mmPDMA0_CORE_BASE);
4024 gaudi2_stall_dma_common(hdev, mmPDMA1_CORE_BASE);
4026 stall_edma:
4027 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
4028 return;
4030 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
4031 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
4032 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
4033 u32 core_base;
4035 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
4036 continue;
4038 core_base = mmDCORE0_EDMA0_CORE_BASE + dcore * DCORE_OFFSET +
4039 inst * DCORE_EDMA_OFFSET;
4041 /* Stall CPs of EDMA QMANs */
4042 gaudi2_stall_dma_common(hdev, core_base);
4047 static void gaudi2_mme_stall(struct hl_device *hdev)
4049 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4050 u32 offset, i;
4052 offset = mmDCORE1_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_QM_STALL;
4054 for (i = 0 ; i < NUM_OF_DCORES ; i++)
4055 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
4056 WREG32(mmDCORE0_MME_CTRL_LO_QM_STALL + (i * offset), 1);
4059 static void gaudi2_tpc_stall(struct hl_device *hdev)
4061 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4062 u32 reg_base;
4063 int i;
4065 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
4066 return;
4068 for (i = 0 ; i < TPC_ID_SIZE ; i++) {
4069 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
4070 continue;
4072 reg_base = gaudi2_tpc_cfg_blocks_bases[i];
4073 WREG32(reg_base + TPC_CFG_STALL_OFFSET, 1);
4077 static void gaudi2_rotator_stall(struct hl_device *hdev)
4079 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4080 u32 reg_val;
4081 int i;
4083 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
4084 return;
4086 reg_val = FIELD_PREP(ROT_MSS_HALT_WBC_MASK, 0x1) |
4087 FIELD_PREP(ROT_MSS_HALT_RSB_MASK, 0x1) |
4088 FIELD_PREP(ROT_MSS_HALT_MRSB_MASK, 0x1);
4090 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
4091 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
4092 continue;
4094 WREG32(mmROT0_MSS_HALT + i * ROT_OFFSET, reg_val);
4098 static void gaudi2_disable_qman_common(struct hl_device *hdev, u32 reg_base)
4100 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, 0);
4103 static void gaudi2_disable_dma_qmans(struct hl_device *hdev)
4105 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4106 int dcore, inst;
4108 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
4109 goto stop_edma_qmans;
4111 gaudi2_disable_qman_common(hdev, mmPDMA0_QM_BASE);
4112 gaudi2_disable_qman_common(hdev, mmPDMA1_QM_BASE);
4114 stop_edma_qmans:
4115 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
4116 return;
4118 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
4119 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
4120 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
4121 u32 qm_base;
4123 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
4124 continue;
4126 qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
4127 inst * DCORE_EDMA_OFFSET;
4129 /* Disable CPs of EDMA QMANs */
4130 gaudi2_disable_qman_common(hdev, qm_base);
4135 static void gaudi2_disable_mme_qmans(struct hl_device *hdev)
4137 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4138 u32 offset, i;
4140 offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
4142 for (i = 0 ; i < NUM_OF_DCORES ; i++)
4143 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
4144 gaudi2_disable_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
4147 static void gaudi2_disable_tpc_qmans(struct hl_device *hdev)
4149 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4150 u32 reg_base;
4151 int i;
4153 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
4154 return;
4156 for (i = 0 ; i < TPC_ID_SIZE ; i++) {
4157 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
4158 continue;
4160 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
4161 gaudi2_disable_qman_common(hdev, reg_base);
4165 static void gaudi2_disable_rot_qmans(struct hl_device *hdev)
4167 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4168 u32 reg_base;
4169 int i;
4171 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
4172 return;
4174 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
4175 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
4176 continue;
4178 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
4179 gaudi2_disable_qman_common(hdev, reg_base);
4183 static void gaudi2_disable_nic_qmans(struct hl_device *hdev)
4185 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4186 u32 reg_base, queue_id;
4187 int i;
4189 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
4190 return;
4192 queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
4194 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4195 if (!(hdev->nic_ports_mask & BIT(i)))
4196 continue;
4198 reg_base = gaudi2_qm_blocks_bases[queue_id];
4199 gaudi2_disable_qman_common(hdev, reg_base);
4203 static void gaudi2_enable_timestamp(struct hl_device *hdev)
4205 /* Disable the timestamp counter */
4206 WREG32(mmPSOC_TIMESTAMP_BASE, 0);
4208 /* Zero the lower/upper parts of the 64-bit counter */
4209 WREG32(mmPSOC_TIMESTAMP_BASE + 0xC, 0);
4210 WREG32(mmPSOC_TIMESTAMP_BASE + 0x8, 0);
4212 /* Enable the counter */
4213 WREG32(mmPSOC_TIMESTAMP_BASE, 1);
4216 static void gaudi2_disable_timestamp(struct hl_device *hdev)
4218 /* Disable the timestamp counter */
4219 WREG32(mmPSOC_TIMESTAMP_BASE, 0);
4222 static const char *gaudi2_irq_name(u16 irq_number)
4224 switch (irq_number) {
4225 case GAUDI2_IRQ_NUM_EVENT_QUEUE:
4226 return "gaudi2 cpu eq";
4227 case GAUDI2_IRQ_NUM_COMPLETION:
4228 return "gaudi2 completion";
4229 case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM:
4230 return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM];
4231 case GAUDI2_IRQ_NUM_TPC_ASSERT:
4232 return "gaudi2 tpc assert";
4233 case GAUDI2_IRQ_NUM_UNEXPECTED_ERROR:
4234 return "gaudi2 unexpected error";
4235 case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
4236 return "gaudi2 user completion";
4237 case GAUDI2_IRQ_NUM_EQ_ERROR:
4238 return "gaudi2 eq error";
4239 default:
4240 return "invalid";
4244 static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num)
4246 int i, irq, relative_idx;
4247 struct hl_dec *dec;
4249 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i < max_irq_num ; i++) {
4250 irq = pci_irq_vector(hdev->pdev, i);
4251 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
4253 dec = hdev->dec + relative_idx / 2;
4255 /* We pass different structures depending on the irq handler. For the abnormal
4256 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
4257 * user_interrupt entry
4259 free_irq(irq, ((relative_idx % 2) ?
4260 (void *) dec :
4261 (void *) &hdev->user_interrupt[dec->core_id]));
4265 static int gaudi2_dec_enable_msix(struct hl_device *hdev)
4267 int rc, i, irq_init_cnt, irq, relative_idx;
4268 struct hl_dec *dec;
4270 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0;
4271 i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM;
4272 i++, irq_init_cnt++) {
4274 irq = pci_irq_vector(hdev->pdev, i);
4275 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
4277 /* We pass different structures depending on the irq handler. For the abnormal
4278 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
4279 * user_interrupt entry
4281 * TODO: change the dec abnrm to threaded irq
4284 dec = hdev->dec + relative_idx / 2;
4285 if (relative_idx % 2) {
4286 rc = request_irq(irq, hl_irq_handler_dec_abnrm, 0,
4287 gaudi2_irq_name(i), (void *) dec);
4288 } else {
4289 rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i),
4290 (void *) &hdev->user_interrupt[dec->core_id]);
4293 if (rc) {
4294 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4295 goto free_dec_irqs;
4299 return 0;
4301 free_dec_irqs:
4302 gaudi2_dec_disable_msix(hdev, (GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + irq_init_cnt));
4303 return rc;
4306 static int gaudi2_enable_msix(struct hl_device *hdev)
4308 struct asic_fixed_properties *prop = &hdev->asic_prop;
4309 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4310 int rc, irq, i, j, user_irq_init_cnt;
4311 struct hl_cq *cq;
4313 if (gaudi2->hw_cap_initialized & HW_CAP_MSIX)
4314 return 0;
4316 hl_init_cpu_for_irq(hdev);
4318 rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES,
4319 PCI_IRQ_MSIX);
4320 if (rc < 0) {
4321 dev_err(hdev->dev, "MSI-X: Failed to enable support -- %d/%d\n",
4322 GAUDI2_MSIX_ENTRIES, rc);
4323 return rc;
4326 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4327 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
4328 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq);
4329 if (rc) {
4330 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4331 goto free_irq_vectors;
4334 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4335 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_EVENT_QUEUE),
4336 &hdev->event_queue);
4337 if (rc) {
4338 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4339 goto free_completion_irq;
4342 rc = gaudi2_dec_enable_msix(hdev);
4343 if (rc) {
4344 dev_err(hdev->dev, "Failed to enable decoder IRQ");
4345 goto free_event_irq;
4348 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4349 rc = request_threaded_irq(irq, NULL, hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4350 gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT),
4351 &hdev->tpc_interrupt);
4352 if (rc) {
4353 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4354 goto free_dec_irq;
4357 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4358 rc = request_threaded_irq(irq, NULL, hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4359 gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR),
4360 &hdev->unexpected_error_interrupt);
4361 if (rc) {
4362 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4363 goto free_tpc_irq;
4366 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0;
4367 user_irq_init_cnt < prop->user_interrupt_count;
4368 i++, j++, user_irq_init_cnt++) {
4370 irq = pci_irq_vector(hdev->pdev, i);
4371 hl_set_irq_affinity(hdev, irq);
4372 rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i),
4373 &hdev->user_interrupt[j]);
4374 if (rc) {
4375 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4376 goto free_user_irq;
4380 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR);
4381 rc = request_threaded_irq(irq, NULL, hl_irq_eq_error_interrupt_thread_handler,
4382 IRQF_ONESHOT, gaudi2_irq_name(GAUDI2_IRQ_NUM_EQ_ERROR),
4383 hdev);
4384 if (rc) {
4385 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4386 goto free_user_irq;
4389 gaudi2->hw_cap_initialized |= HW_CAP_MSIX;
4391 return 0;
4393 free_user_irq:
4394 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count;
4395 i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) {
4397 irq = pci_irq_vector(hdev->pdev, i);
4398 irq_set_affinity_and_hint(irq, NULL);
4399 free_irq(irq, &hdev->user_interrupt[j]);
4401 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4402 free_irq(irq, &hdev->unexpected_error_interrupt);
4403 free_tpc_irq:
4404 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4405 free_irq(irq, &hdev->tpc_interrupt);
4406 free_dec_irq:
4407 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_DEC_LAST + 1);
4408 free_event_irq:
4409 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4410 free_irq(irq, cq);
4412 free_completion_irq:
4413 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4414 free_irq(irq, cq);
4416 free_irq_vectors:
4417 pci_free_irq_vectors(hdev->pdev);
4419 return rc;
4422 static void gaudi2_sync_irqs(struct hl_device *hdev)
4424 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4425 int i, j;
4426 int irq;
4428 if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
4429 return;
4431 /* Wait for all pending IRQs to be finished */
4432 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION));
4434 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM ; i++) {
4435 irq = pci_irq_vector(hdev->pdev, i);
4436 synchronize_irq(irq);
4439 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT));
4440 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR));
4442 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count;
4443 i++, j++) {
4444 irq = pci_irq_vector(hdev->pdev, i);
4445 synchronize_irq(irq);
4448 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE));
4449 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR));
4452 static void gaudi2_disable_msix(struct hl_device *hdev)
4454 struct asic_fixed_properties *prop = &hdev->asic_prop;
4455 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4456 struct hl_cq *cq;
4457 int irq, i, j, k;
4459 if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
4460 return;
4462 gaudi2_sync_irqs(hdev);
4464 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4465 free_irq(irq, &hdev->event_queue);
4467 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
4469 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4470 free_irq(irq, &hdev->tpc_interrupt);
4472 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4473 free_irq(irq, &hdev->unexpected_error_interrupt);
4475 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0;
4476 k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) {
4478 irq = pci_irq_vector(hdev->pdev, i);
4479 irq_set_affinity_and_hint(irq, NULL);
4480 free_irq(irq, &hdev->user_interrupt[j]);
4483 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4484 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
4485 free_irq(irq, cq);
4487 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR);
4488 free_irq(irq, hdev);
4490 pci_free_irq_vectors(hdev->pdev);
4492 gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX;
4495 static void gaudi2_stop_dcore_dec(struct hl_device *hdev, int dcore_id)
4497 u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
4498 u32 graceful_pend_mask = DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
4499 u32 timeout_usec, dec_id, dec_bit, offset, graceful;
4500 int rc;
4502 if (hdev->pldm)
4503 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
4504 else
4505 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
4507 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4508 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
4509 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4510 continue;
4512 offset = dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET;
4514 WREG32(mmDCORE0_DEC0_CMD_SWREG16 + offset, 0);
4516 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
4518 /* Wait till all traffic from decoder stops
4519 * before apply core reset.
4521 rc = hl_poll_timeout(
4522 hdev,
4523 mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset,
4524 graceful,
4525 (graceful & graceful_pend_mask),
4526 100,
4527 timeout_usec);
4528 if (rc)
4529 dev_err(hdev->dev,
4530 "Failed to stop traffic from DCORE%d Decoder %d\n",
4531 dcore_id, dec_id);
4535 static void gaudi2_stop_pcie_dec(struct hl_device *hdev)
4537 u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
4538 u32 graceful_pend_mask = PCIE_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
4539 u32 timeout_usec, dec_id, dec_bit, offset, graceful;
4540 int rc;
4542 if (hdev->pldm)
4543 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
4544 else
4545 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
4547 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4548 dec_bit = PCIE_DEC_SHIFT + dec_id;
4549 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4550 continue;
4552 offset = dec_id * PCIE_VDEC_OFFSET;
4554 WREG32(mmPCIE_DEC0_CMD_SWREG16 + offset, 0);
4556 WREG32(mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
4558 /* Wait till all traffic from decoder stops
4559 * before apply core reset.
4561 rc = hl_poll_timeout(
4562 hdev,
4563 mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset,
4564 graceful,
4565 (graceful & graceful_pend_mask),
4566 100,
4567 timeout_usec);
4568 if (rc)
4569 dev_err(hdev->dev,
4570 "Failed to stop traffic from PCIe Decoder %d\n",
4571 dec_id);
4575 static void gaudi2_stop_dec(struct hl_device *hdev)
4577 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4578 int dcore_id;
4580 if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == 0)
4581 return;
4583 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
4584 gaudi2_stop_dcore_dec(hdev, dcore_id);
4586 gaudi2_stop_pcie_dec(hdev);
4589 static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
4591 u32 reg_base, reg_val;
4593 reg_base = gaudi2_arc_blocks_bases[cpu_id];
4594 if (run_mode == HL_ENGINE_CORE_RUN)
4595 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1);
4596 else
4597 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1);
4599 WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val);
4602 static void gaudi2_halt_arcs(struct hl_device *hdev)
4604 u16 arc_id;
4606 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) {
4607 if (gaudi2_is_arc_enabled(hdev, arc_id))
4608 gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT);
4612 static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
4614 int rc;
4615 u32 reg_base, val, ack_mask, timeout_usec = 100000;
4617 if (hdev->pldm)
4618 timeout_usec *= 100;
4620 reg_base = gaudi2_arc_blocks_bases[cpu_id];
4621 if (run_mode == HL_ENGINE_CORE_RUN)
4622 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK;
4623 else
4624 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK;
4626 rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET,
4627 val, ((val & ack_mask) == ack_mask),
4628 1000, timeout_usec);
4630 if (!rc) {
4631 /* Clear */
4632 val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0);
4633 WREG32(reg_base + ARC_HALT_REQ_OFFSET, val);
4636 return rc;
4639 static void gaudi2_reset_arcs(struct hl_device *hdev)
4641 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4642 u16 arc_id;
4644 if (!gaudi2)
4645 return;
4647 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++)
4648 if (gaudi2_is_arc_enabled(hdev, arc_id))
4649 gaudi2_clr_arc_id_cap(hdev, arc_id);
4652 static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev)
4654 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4655 u32 queue_id;
4656 int i;
4658 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
4659 return;
4661 queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
4663 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4664 if (!(hdev->nic_ports_mask & BIT(i)))
4665 continue;
4667 gaudi2_qman_manual_flush_common(hdev, queue_id);
4671 static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids,
4672 u32 num_cores, u32 core_command)
4674 int i, rc;
4676 for (i = 0 ; i < num_cores ; i++) {
4677 if (gaudi2_is_arc_enabled(hdev, core_ids[i]))
4678 gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command);
4681 for (i = 0 ; i < num_cores ; i++) {
4682 if (gaudi2_is_arc_enabled(hdev, core_ids[i])) {
4683 rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command);
4685 if (rc) {
4686 dev_err(hdev->dev, "failed to %s arc: %d\n",
4687 (core_command == HL_ENGINE_CORE_HALT) ?
4688 "HALT" : "RUN", core_ids[i]);
4689 return -1;
4694 return 0;
4697 static int gaudi2_set_tpc_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4699 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4700 u32 reg_base, reg_addr, reg_val, tpc_id;
4702 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
4703 return 0;
4705 tpc_id = gaudi2_tpc_engine_id_to_tpc_id[engine_id];
4706 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + tpc_id)))
4707 return 0;
4709 reg_base = gaudi2_tpc_cfg_blocks_bases[tpc_id];
4710 reg_addr = reg_base + TPC_CFG_STALL_OFFSET;
4711 reg_val = FIELD_PREP(DCORE0_TPC0_CFG_TPC_STALL_V_MASK,
4712 (engine_command == HL_ENGINE_STALL) ? 1 : 0);
4713 WREG32(reg_addr, reg_val);
4715 if (engine_command == HL_ENGINE_RESUME) {
4716 reg_base = gaudi2_tpc_eml_cfg_blocks_bases[tpc_id];
4717 reg_addr = reg_base + TPC_EML_CFG_DBG_CNT_OFFSET;
4718 RMWREG32(reg_addr, 0x1, DCORE0_TPC0_EML_CFG_DBG_CNT_DBG_EXIT_MASK);
4721 return 0;
4724 static int gaudi2_set_mme_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4726 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4727 u32 reg_base, reg_addr, reg_val, mme_id;
4729 mme_id = gaudi2_mme_engine_id_to_mme_id[engine_id];
4730 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + mme_id)))
4731 return 0;
4733 reg_base = gaudi2_mme_ctrl_lo_blocks_bases[mme_id];
4734 reg_addr = reg_base + MME_CTRL_LO_QM_STALL_OFFSET;
4735 reg_val = FIELD_PREP(DCORE0_MME_CTRL_LO_QM_STALL_V_MASK,
4736 (engine_command == HL_ENGINE_STALL) ? 1 : 0);
4737 WREG32(reg_addr, reg_val);
4739 return 0;
4742 static int gaudi2_set_edma_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4744 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4745 u32 reg_base, reg_addr, reg_val, edma_id;
4747 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
4748 return 0;
4750 edma_id = gaudi2_edma_engine_id_to_edma_id[engine_id];
4751 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + edma_id)))
4752 return 0;
4754 reg_base = gaudi2_dma_core_blocks_bases[edma_id];
4755 reg_addr = reg_base + EDMA_CORE_CFG_STALL_OFFSET;
4756 reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK,
4757 (engine_command == HL_ENGINE_STALL) ? 1 : 0);
4758 WREG32(reg_addr, reg_val);
4760 if (engine_command == HL_ENGINE_STALL) {
4761 reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK, 0x1) |
4762 FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_FLUSH_MASK, 0x1);
4763 WREG32(reg_addr, reg_val);
4766 return 0;
4769 static int gaudi2_set_engine_modes(struct hl_device *hdev,
4770 u32 *engine_ids, u32 num_engines, u32 engine_command)
4772 int i, rc;
4774 for (i = 0 ; i < num_engines ; ++i) {
4775 switch (engine_ids[i]) {
4776 case GAUDI2_DCORE0_ENGINE_ID_TPC_0 ... GAUDI2_DCORE0_ENGINE_ID_TPC_5:
4777 case GAUDI2_DCORE1_ENGINE_ID_TPC_0 ... GAUDI2_DCORE1_ENGINE_ID_TPC_5:
4778 case GAUDI2_DCORE2_ENGINE_ID_TPC_0 ... GAUDI2_DCORE2_ENGINE_ID_TPC_5:
4779 case GAUDI2_DCORE3_ENGINE_ID_TPC_0 ... GAUDI2_DCORE3_ENGINE_ID_TPC_5:
4780 rc = gaudi2_set_tpc_engine_mode(hdev, engine_ids[i], engine_command);
4781 if (rc)
4782 return rc;
4784 break;
4785 case GAUDI2_DCORE0_ENGINE_ID_MME:
4786 case GAUDI2_DCORE1_ENGINE_ID_MME:
4787 case GAUDI2_DCORE2_ENGINE_ID_MME:
4788 case GAUDI2_DCORE3_ENGINE_ID_MME:
4789 rc = gaudi2_set_mme_engine_mode(hdev, engine_ids[i], engine_command);
4790 if (rc)
4791 return rc;
4793 break;
4794 case GAUDI2_DCORE0_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE0_ENGINE_ID_EDMA_1:
4795 case GAUDI2_DCORE1_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE1_ENGINE_ID_EDMA_1:
4796 case GAUDI2_DCORE2_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE2_ENGINE_ID_EDMA_1:
4797 case GAUDI2_DCORE3_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE3_ENGINE_ID_EDMA_1:
4798 rc = gaudi2_set_edma_engine_mode(hdev, engine_ids[i], engine_command);
4799 if (rc)
4800 return rc;
4802 break;
4803 default:
4804 dev_err(hdev->dev, "Invalid engine ID %u\n", engine_ids[i]);
4805 return -EINVAL;
4809 return 0;
4812 static int gaudi2_set_engines(struct hl_device *hdev, u32 *engine_ids,
4813 u32 num_engines, u32 engine_command)
4815 switch (engine_command) {
4816 case HL_ENGINE_CORE_HALT:
4817 case HL_ENGINE_CORE_RUN:
4818 return gaudi2_set_engine_cores(hdev, engine_ids, num_engines, engine_command);
4820 case HL_ENGINE_STALL:
4821 case HL_ENGINE_RESUME:
4822 return gaudi2_set_engine_modes(hdev, engine_ids, num_engines, engine_command);
4824 default:
4825 dev_err(hdev->dev, "failed to execute command id %u\n", engine_command);
4826 return -EINVAL;
4830 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4832 u32 wait_timeout_ms;
4834 if (hdev->pldm)
4835 wait_timeout_ms = GAUDI2_PLDM_RESET_WAIT_MSEC;
4836 else
4837 wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC;
4839 if (fw_reset)
4840 goto skip_engines;
4842 gaudi2_stop_dma_qmans(hdev);
4843 gaudi2_stop_mme_qmans(hdev);
4844 gaudi2_stop_tpc_qmans(hdev);
4845 gaudi2_stop_rot_qmans(hdev);
4846 gaudi2_stop_nic_qmans(hdev);
4847 msleep(wait_timeout_ms);
4849 gaudi2_halt_arcs(hdev);
4850 gaudi2_dma_stall(hdev);
4851 gaudi2_mme_stall(hdev);
4852 gaudi2_tpc_stall(hdev);
4853 gaudi2_rotator_stall(hdev);
4855 msleep(wait_timeout_ms);
4857 gaudi2_stop_dec(hdev);
4860 * in case of soft reset do a manual flush for QMANs (currently called
4861 * only for NIC QMANs
4863 if (!hard_reset)
4864 gaudi2_nic_qmans_manual_flush(hdev);
4866 gaudi2_disable_dma_qmans(hdev);
4867 gaudi2_disable_mme_qmans(hdev);
4868 gaudi2_disable_tpc_qmans(hdev);
4869 gaudi2_disable_rot_qmans(hdev);
4870 gaudi2_disable_nic_qmans(hdev);
4871 gaudi2_disable_timestamp(hdev);
4873 skip_engines:
4874 if (hard_reset) {
4875 gaudi2_disable_msix(hdev);
4876 return;
4879 gaudi2_sync_irqs(hdev);
4882 static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
4884 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
4886 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
4887 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
4888 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
4889 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
4890 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
4891 pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
4892 pre_fw_load->wait_for_preboot_extended_timeout =
4893 GAUDI2_PREBOOT_EXTENDED_REQ_TIMEOUT_USEC;
4896 static void gaudi2_init_firmware_loader(struct hl_device *hdev)
4898 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
4899 struct dynamic_fw_load_mgr *dynamic_loader;
4900 struct cpu_dyn_regs *dyn_regs;
4902 /* fill common fields */
4903 fw_loader->fw_comp_loaded = FW_TYPE_NONE;
4904 fw_loader->boot_fit_img.image_name = GAUDI2_BOOT_FIT_FILE;
4905 fw_loader->linux_img.image_name = GAUDI2_LINUX_FW_FILE;
4906 fw_loader->boot_fit_timeout = GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC;
4907 fw_loader->skip_bmc = false;
4908 fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
4909 fw_loader->dram_bar_id = DRAM_BAR_ID;
4910 fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC;
4912 /* here we update initial values for few specific dynamic regs (as
4913 * before reading the first descriptor from FW those value has to be
4914 * hard-coded). in later stages of the protocol those values will be
4915 * updated automatically by reading the FW descriptor so data there
4916 * will always be up-to-date
4918 dynamic_loader = &hdev->fw_loader.dynamic_loader;
4919 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
4920 dyn_regs->kmd_msg_to_cpu = cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
4921 dyn_regs->cpu_cmd_status_to_host = cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
4922 dynamic_loader->wait_for_bl_timeout = GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC;
4925 static int gaudi2_init_cpu(struct hl_device *hdev)
4927 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4928 int rc;
4930 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4931 return 0;
4933 if (gaudi2->hw_cap_initialized & HW_CAP_CPU)
4934 return 0;
4936 rc = hl_fw_init_cpu(hdev);
4937 if (rc)
4938 return rc;
4940 gaudi2->hw_cap_initialized |= HW_CAP_CPU;
4942 return 0;
4945 static int gaudi2_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4947 struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
4948 struct asic_fixed_properties *prop = &hdev->asic_prop;
4949 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4950 struct cpu_dyn_regs *dyn_regs;
4951 struct hl_eq *eq;
4952 u32 status;
4953 int err;
4955 if (!hdev->cpu_queues_enable)
4956 return 0;
4958 if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
4959 return 0;
4961 eq = &hdev->event_queue;
4963 dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4965 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4966 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4968 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4969 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4971 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, lower_32_bits(hdev->cpu_accessible_dma_address));
4972 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, upper_32_bits(hdev->cpu_accessible_dma_address));
4974 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4975 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4976 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4978 /* Used for EQ CI */
4979 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4981 WREG32(mmCPU_IF_PF_PQ_PI, 0);
4983 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4985 /* Let the ARC know we are ready as it is now handling those queues */
4987 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
4988 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
4990 err = hl_poll_timeout(
4991 hdev,
4992 mmCPU_IF_QUEUE_INIT,
4993 status,
4994 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4995 1000,
4996 cpu_timeout);
4998 if (err) {
4999 dev_err(hdev->dev, "Failed to communicate with device CPU (timeout)\n");
5000 return -EIO;
5003 /* update FW application security bits */
5004 if (prop->fw_cpu_boot_dev_sts0_valid)
5005 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
5007 if (prop->fw_cpu_boot_dev_sts1_valid)
5008 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
5010 gaudi2->hw_cap_initialized |= HW_CAP_CPU_Q;
5011 return 0;
5014 static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base,
5015 u32 queue_id_base)
5017 struct hl_hw_queue *q;
5018 u32 pq_id, pq_offset;
5020 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
5021 q = &hdev->kernel_queues[queue_id_base + pq_id];
5022 pq_offset = pq_id * 4;
5024 if (q->dram_bd) {
5025 WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
5026 lower_32_bits(q->pq_dram_address));
5027 WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
5028 upper_32_bits(q->pq_dram_address));
5029 } else {
5030 WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
5031 lower_32_bits(q->bus_address));
5032 WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
5033 upper_32_bits(q->bus_address));
5035 WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH));
5036 WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0);
5037 WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0);
5041 static void gaudi2_init_qman_cp(struct hl_device *hdev, u32 reg_base)
5043 u32 cp_id, cp_offset, mtr_base_lo, mtr_base_hi, so_base_lo, so_base_hi;
5045 mtr_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
5046 mtr_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
5047 so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
5048 so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
5050 for (cp_id = 0 ; cp_id < NUM_OF_CP_PER_QMAN; cp_id++) {
5051 cp_offset = cp_id * 4;
5053 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_LO_0_OFFSET + cp_offset, mtr_base_lo);
5054 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_HI_0_OFFSET + cp_offset, mtr_base_hi);
5055 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_LO_0_OFFSET + cp_offset, so_base_lo);
5056 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_HI_0_OFFSET + cp_offset, so_base_hi);
5059 /* allow QMANs to accept work from ARC CQF */
5060 WREG32(reg_base + QM_CP_CFG_OFFSET, FIELD_PREP(PDMA0_QM_CP_CFG_SWITCH_EN_MASK, 0x1));
5063 static void gaudi2_init_qman_pqc(struct hl_device *hdev, u32 reg_base,
5064 u32 queue_id_base)
5066 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5067 u32 pq_id, pq_offset, so_base_lo, so_base_hi;
5069 so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
5070 so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
5072 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
5073 pq_offset = pq_id * 4;
5075 /* Configure QMAN HBW to scratchpad as it is not needed */
5076 WREG32(reg_base + QM_PQC_HBW_BASE_LO_0_OFFSET + pq_offset,
5077 lower_32_bits(gaudi2->scratchpad_bus_address));
5078 WREG32(reg_base + QM_PQC_HBW_BASE_HI_0_OFFSET + pq_offset,
5079 upper_32_bits(gaudi2->scratchpad_bus_address));
5080 WREG32(reg_base + QM_PQC_SIZE_0_OFFSET + pq_offset,
5081 ilog2(PAGE_SIZE / sizeof(struct hl_cq_entry)));
5083 WREG32(reg_base + QM_PQC_PI_0_OFFSET + pq_offset, 0);
5084 WREG32(reg_base + QM_PQC_LBW_WDATA_0_OFFSET + pq_offset, QM_PQC_LBW_WDATA);
5085 WREG32(reg_base + QM_PQC_LBW_BASE_LO_0_OFFSET + pq_offset, so_base_lo);
5086 WREG32(reg_base + QM_PQC_LBW_BASE_HI_0_OFFSET + pq_offset, so_base_hi);
5089 /* Enable QMAN H/W completion */
5090 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
5093 static u32 gaudi2_get_dyn_sp_reg(struct hl_device *hdev, u32 queue_id_base)
5095 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5096 u32 sp_reg_addr;
5098 switch (queue_id_base) {
5099 case GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_1_3:
5100 fallthrough;
5101 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
5102 fallthrough;
5103 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
5104 fallthrough;
5105 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
5106 fallthrough;
5107 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
5108 sp_reg_addr = le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
5109 break;
5110 case GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
5111 fallthrough;
5112 case GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
5113 fallthrough;
5114 case GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
5115 fallthrough;
5116 case GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
5117 sp_reg_addr = le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
5118 break;
5119 case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
5120 fallthrough;
5121 case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
5122 fallthrough;
5123 case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
5124 fallthrough;
5125 case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
5126 sp_reg_addr = le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
5127 break;
5128 case GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_1_3:
5129 sp_reg_addr = le32_to_cpu(dyn_regs->gic_rot_qm_irq_ctrl);
5130 break;
5131 case GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_23_3:
5132 sp_reg_addr = le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
5133 break;
5134 default:
5135 dev_err(hdev->dev, "Unexpected h/w queue %d\n", queue_id_base);
5136 return 0;
5139 return sp_reg_addr;
5142 static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base,
5143 u32 queue_id_base)
5145 u32 glbl_prot = QMAN_MAKE_TRUSTED, irq_handler_offset;
5146 int map_table_entry;
5148 WREG32(reg_base + QM_GLBL_PROT_OFFSET, glbl_prot);
5150 irq_handler_offset = gaudi2_get_dyn_sp_reg(hdev, queue_id_base);
5151 WREG32(reg_base + QM_GLBL_ERR_ADDR_LO_OFFSET, lower_32_bits(CFG_BASE + irq_handler_offset));
5152 WREG32(reg_base + QM_GLBL_ERR_ADDR_HI_OFFSET, upper_32_bits(CFG_BASE + irq_handler_offset));
5154 map_table_entry = gaudi2_qman_async_event_id[queue_id_base];
5155 WREG32(reg_base + QM_GLBL_ERR_WDATA_OFFSET,
5156 gaudi2_irq_map_table[map_table_entry].cpu_id);
5158 WREG32(reg_base + QM_ARB_ERR_MSG_EN_OFFSET, QM_ARB_ERR_MSG_EN_MASK);
5160 WREG32(reg_base + QM_ARB_SLV_CHOISE_WDT_OFFSET, GAUDI2_ARB_WDT_TIMEOUT);
5161 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, 0);
5162 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0);
5164 /* Enable the QMAN channel.
5165 * PDMA QMAN configuration is different, as we do not allow user to
5166 * access some of the CPs.
5167 * PDMA0: CP2/3 are reserved for the ARC usage.
5168 * PDMA1: CP1/2/3 are reserved for the ARC usage.
5170 if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0])
5171 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE);
5172 else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0])
5173 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE);
5174 else
5175 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE);
5178 static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base,
5179 u32 queue_id_base)
5181 u32 pq_id;
5183 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++)
5184 hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION;
5186 gaudi2_init_qman_pq(hdev, reg_base, queue_id_base);
5187 gaudi2_init_qman_cp(hdev, reg_base);
5188 gaudi2_init_qman_pqc(hdev, reg_base, queue_id_base);
5189 gaudi2_init_qman_common(hdev, reg_base, queue_id_base);
5192 static void gaudi2_init_dma_core(struct hl_device *hdev, u32 reg_base,
5193 u32 dma_core_id, bool is_secure)
5195 u32 prot, irq_handler_offset;
5196 struct cpu_dyn_regs *dyn_regs;
5197 int map_table_entry;
5199 prot = 1 << ARC_FARM_KDMA_PROT_ERR_VAL_SHIFT;
5200 if (is_secure)
5201 prot |= 1 << ARC_FARM_KDMA_PROT_VAL_SHIFT;
5203 WREG32(reg_base + DMA_CORE_PROT_OFFSET, prot);
5205 dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5206 irq_handler_offset = le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
5208 WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_LO_OFFSET,
5209 lower_32_bits(CFG_BASE + irq_handler_offset));
5211 WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_HI_OFFSET,
5212 upper_32_bits(CFG_BASE + irq_handler_offset));
5214 map_table_entry = gaudi2_dma_core_async_event_id[dma_core_id];
5215 WREG32(reg_base + DMA_CORE_ERRMSG_WDATA_OFFSET,
5216 gaudi2_irq_map_table[map_table_entry].cpu_id);
5218 /* Enable the DMA channel */
5219 WREG32(reg_base + DMA_CORE_CFG_0_OFFSET, 1 << ARC_FARM_KDMA_CFG_0_EN_SHIFT);
5222 static void gaudi2_init_kdma(struct hl_device *hdev)
5224 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5225 u32 reg_base;
5227 if ((gaudi2->hw_cap_initialized & HW_CAP_KDMA) == HW_CAP_KDMA)
5228 return;
5230 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_KDMA];
5232 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_KDMA, true);
5234 gaudi2->hw_cap_initialized |= HW_CAP_KDMA;
5237 static void gaudi2_init_pdma(struct hl_device *hdev)
5239 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5240 u32 reg_base;
5242 if ((gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK) == HW_CAP_PDMA_MASK)
5243 return;
5245 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA0];
5246 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA0, false);
5248 reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0];
5249 gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_0_0);
5251 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA1];
5252 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA1, false);
5254 reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0];
5255 gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_1_0);
5257 gaudi2->hw_cap_initialized |= HW_CAP_PDMA_MASK;
5260 static void gaudi2_init_edma_instance(struct hl_device *hdev, u8 seq)
5262 u32 reg_base, base_edma_core_id, base_edma_qman_id;
5264 base_edma_core_id = DMA_CORE_ID_EDMA0 + seq;
5265 base_edma_qman_id = edma_stream_base[seq];
5267 reg_base = gaudi2_dma_core_blocks_bases[base_edma_core_id];
5268 gaudi2_init_dma_core(hdev, reg_base, base_edma_core_id, false);
5270 reg_base = gaudi2_qm_blocks_bases[base_edma_qman_id];
5271 gaudi2_init_qman(hdev, reg_base, base_edma_qman_id);
5274 static void gaudi2_init_edma(struct hl_device *hdev)
5276 struct asic_fixed_properties *prop = &hdev->asic_prop;
5277 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5278 int dcore, inst;
5280 if ((gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK) == HW_CAP_EDMA_MASK)
5281 return;
5283 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
5284 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
5285 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
5287 if (!(prop->edma_enabled_mask & BIT(seq)))
5288 continue;
5290 gaudi2_init_edma_instance(hdev, seq);
5292 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_EDMA_SHIFT + seq);
5298 * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell.
5299 * @hdev: pointer to habanalabs device structure.
5300 * @sob_id: sync object ID.
5301 * @first_mon_id: ID of first monitor out of 3 consecutive monitors.
5302 * @interrupt_id: interrupt ID.
5304 * Some initiators cannot have HBW address in their completion address registers, and thus cannot
5305 * write directly to the HBW host memory of the virtual MSI-X doorbell.
5306 * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write.
5308 * The mechanism in the sync manager block is composed of a master monitor with 3 messages.
5309 * In addition to the HBW write, the other 2 messages are for preparing the monitor to next
5310 * completion, by decrementing the sync object value and re-arming the monitor.
5312 static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id,
5313 u32 first_mon_id, u32 interrupt_id)
5315 u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config;
5316 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5317 u64 addr;
5318 u8 mask;
5320 /* Reset the SOB value */
5321 sob_offset = sob_id * sizeof(u32);
5322 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
5324 /* Configure 3 monitors:
5325 * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor)
5326 * 2. Decrement SOB value by 1.
5327 * 3. Re-arm the master monitor.
5330 first_mon_offset = first_mon_id * sizeof(u32);
5332 /* 2nd monitor: Decrement SOB value by 1 */
5333 mon_offset = first_mon_offset + sizeof(u32);
5335 addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
5336 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5337 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5339 payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */
5340 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) |
5341 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1);
5342 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5344 /* 3rd monitor: Re-arm the master monitor */
5345 mon_offset = first_mon_offset + 2 * sizeof(u32);
5347 addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset;
5348 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5349 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5351 sob_group = sob_id / 8;
5352 mask = ~BIT(sob_id & 0x7);
5353 mode = 0; /* comparison mode is "greater than or equal to" */
5354 arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) |
5355 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) |
5356 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) |
5357 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1);
5359 payload = arm;
5360 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5362 /* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */
5363 mon_offset = first_mon_offset;
5365 config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */
5366 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config);
5368 addr = gaudi2->virt_msix_db_dma_addr;
5369 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5370 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5372 payload = interrupt_id;
5373 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5375 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm);
5378 static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev)
5380 u32 decoder_id, sob_id, first_mon_id, interrupt_id;
5381 struct asic_fixed_properties *prop = &hdev->asic_prop;
5383 /* Decoder normal/abnormal interrupts */
5384 for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) {
5385 if (!(prop->decoder_enabled_mask & BIT(decoder_id)))
5386 continue;
5388 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
5389 first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id;
5390 interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id;
5391 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
5393 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
5394 first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id;
5395 interrupt_id += 1;
5396 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
5400 static void gaudi2_init_sm(struct hl_device *hdev)
5402 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5403 u64 cq_address;
5404 u32 reg_val;
5405 int i;
5407 /* Enable HBW/LBW CQ for completion monitors */
5408 reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
5409 reg_val |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_LBW_EN_MASK, 1);
5411 for (i = 0 ; i < GAUDI2_MAX_PENDING_CS ; i++)
5412 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
5414 /* Enable only HBW CQ for KDMA completion monitor */
5415 reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
5416 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
5418 /* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */
5419 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr));
5420 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr));
5421 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION);
5423 for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) {
5424 cq_address =
5425 hdev->completion_queue[i].bus_address;
5427 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + (4 * i),
5428 lower_32_bits(cq_address));
5429 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + (4 * i),
5430 upper_32_bits(cq_address));
5431 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + (4 * i),
5432 ilog2(HL_CQ_SIZE_IN_BYTES));
5435 /* Configure kernel ASID and MMU BP*/
5436 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000);
5437 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0);
5439 /* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */
5440 gaudi2_prepare_sm_for_virt_msix_db(hdev);
5443 static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base)
5445 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5446 u32 reg_val;
5447 int i;
5449 reg_val = FIELD_PREP(MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK, 0);
5450 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK, 1);
5451 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK, 1);
5452 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NAN_MASK, 1);
5453 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK, 1);
5454 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK, 1);
5456 WREG32(reg_base + MME_ACC_INTR_MASK_OFFSET, reg_val);
5457 WREG32(reg_base + MME_ACC_AP_LFSR_POLY_OFFSET, 0x80DEADAF);
5459 for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) {
5460 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_SEL_OFFSET, i);
5461 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_WDATA_OFFSET, gaudi2->lfsr_rand_seeds[i]);
5465 static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id,
5466 bool config_qman_only)
5468 u32 queue_id_base, reg_base;
5470 switch (dcore_id) {
5471 case 0:
5472 queue_id_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
5473 break;
5474 case 1:
5475 queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
5476 break;
5477 case 2:
5478 queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
5479 break;
5480 case 3:
5481 queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
5482 break;
5483 default:
5484 dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id);
5485 return;
5488 if (!config_qman_only) {
5489 reg_base = gaudi2_mme_acc_blocks_bases[dcore_id];
5490 gaudi2_init_mme_acc(hdev, reg_base);
5493 reg_base = gaudi2_qm_blocks_bases[queue_id_base];
5494 gaudi2_init_qman(hdev, reg_base, queue_id_base);
5497 static void gaudi2_init_mme(struct hl_device *hdev)
5499 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5500 int i;
5502 if ((gaudi2->hw_cap_initialized & HW_CAP_MME_MASK) == HW_CAP_MME_MASK)
5503 return;
5505 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
5506 gaudi2_init_dcore_mme(hdev, i, false);
5508 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_MME_SHIFT + i);
5512 static void gaudi2_init_tpc_cfg(struct hl_device *hdev, u32 reg_base)
5514 /* Mask arithmetic and QM interrupts in TPC */
5515 WREG32(reg_base + TPC_CFG_TPC_INTR_MASK_OFFSET, 0x23FFFE);
5517 /* Set 16 cache lines */
5518 WREG32(reg_base + TPC_CFG_MSS_CONFIG_OFFSET,
5519 2 << DCORE0_TPC0_CFG_MSS_CONFIG_ICACHE_FETCH_LINE_NUM_SHIFT);
5522 struct gaudi2_tpc_init_cfg_data {
5523 enum gaudi2_queue_id dcore_tpc_qid_base[NUM_OF_DCORES];
5526 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst,
5527 u32 offset, struct iterate_module_ctx *ctx)
5529 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5530 struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data;
5531 u32 queue_id_base;
5532 u8 seq;
5534 queue_id_base = cfg_data->dcore_tpc_qid_base[dcore] + (inst * NUM_OF_PQ_PER_QMAN);
5536 if (dcore == 0 && inst == (NUM_DCORE0_TPC - 1))
5537 /* gets last sequence number */
5538 seq = NUM_OF_DCORES * NUM_OF_TPC_PER_DCORE;
5539 else
5540 seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
5542 gaudi2_init_tpc_cfg(hdev, mmDCORE0_TPC0_CFG_BASE + offset);
5543 gaudi2_init_qman(hdev, mmDCORE0_TPC0_QM_BASE + offset, queue_id_base);
5545 gaudi2->tpc_hw_cap_initialized |= BIT_ULL(HW_CAP_TPC_SHIFT + seq);
5548 static void gaudi2_init_tpc(struct hl_device *hdev)
5550 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5551 struct gaudi2_tpc_init_cfg_data init_cfg_data;
5552 struct iterate_module_ctx tpc_iter;
5554 if (!hdev->asic_prop.tpc_enabled_mask)
5555 return;
5557 if ((gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK) == HW_CAP_TPC_MASK)
5558 return;
5560 init_cfg_data.dcore_tpc_qid_base[0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0;
5561 init_cfg_data.dcore_tpc_qid_base[1] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0;
5562 init_cfg_data.dcore_tpc_qid_base[2] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0;
5563 init_cfg_data.dcore_tpc_qid_base[3] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0;
5564 tpc_iter.fn = &gaudi2_init_tpc_config;
5565 tpc_iter.data = &init_cfg_data;
5566 gaudi2_iterate_tpcs(hdev, &tpc_iter);
5569 static void gaudi2_init_rotator(struct hl_device *hdev)
5571 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5572 u32 i, reg_base, queue_id;
5574 queue_id = GAUDI2_QUEUE_ID_ROT_0_0;
5576 for (i = 0 ; i < NUM_OF_ROT ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
5577 reg_base = gaudi2_qm_blocks_bases[queue_id];
5578 gaudi2_init_qman(hdev, reg_base, queue_id);
5580 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_ROT_SHIFT + i);
5584 static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id)
5586 u32 sob_id;
5588 /* VCMD normal interrupt */
5589 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
5590 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR,
5591 mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
5592 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
5594 /* VCMD abnormal interrupt */
5595 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
5596 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR,
5597 mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
5598 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
5601 static void gaudi2_init_dec(struct hl_device *hdev)
5603 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5604 u32 dcore_id, dec_id, dec_bit;
5605 u64 base_addr;
5607 if (!hdev->asic_prop.decoder_enabled_mask)
5608 return;
5610 if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == HW_CAP_DEC_MASK)
5611 return;
5613 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5614 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
5615 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
5617 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
5618 continue;
5620 base_addr = mmDCORE0_DEC0_CMD_BASE +
5621 BRDG_CTRL_BLOCK_OFFSET +
5622 dcore_id * DCORE_OFFSET +
5623 dec_id * DCORE_VDEC_OFFSET;
5625 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
5627 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
5630 for (dec_id = 0 ; dec_id < NUM_OF_PCIE_VDEC ; dec_id++) {
5631 dec_bit = PCIE_DEC_SHIFT + dec_id;
5632 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
5633 continue;
5635 base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET +
5636 dec_id * DCORE_VDEC_OFFSET;
5638 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
5640 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
5644 static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev,
5645 u32 stlb_base, u32 asid, u64 phys_addr)
5647 u32 status, timeout_usec;
5648 int rc;
5650 if (hdev->pldm || !hdev->pdev)
5651 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5652 else
5653 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5655 WREG32(stlb_base + STLB_ASID_OFFSET, asid);
5656 WREG32(stlb_base + STLB_HOP0_PA43_12_OFFSET, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
5657 WREG32(stlb_base + STLB_HOP0_PA63_44_OFFSET, phys_addr >> MMU_HOP0_PA63_44_SHIFT);
5658 WREG32(stlb_base + STLB_BUSY_OFFSET, 0x80000000);
5660 rc = hl_poll_timeout(
5661 hdev,
5662 stlb_base + STLB_BUSY_OFFSET,
5663 status,
5664 !(status & 0x80000000),
5665 1000,
5666 timeout_usec);
5668 if (rc) {
5669 dev_err(hdev->dev, "Timeout during MMU hop0 config of asid %d\n", asid);
5670 return rc;
5673 return 0;
5676 static void gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device *hdev, u32 stlb_base,
5677 u32 start_offset, u32 inv_start_val,
5678 u32 flags)
5680 /* clear PMMU mem line cache (only needed in mmu range invalidation) */
5681 if (flags & MMU_OP_CLEAR_MEMCACHE)
5682 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INVALIDATION, 0x1);
5684 if (flags & MMU_OP_SKIP_LOW_CACHE_INV)
5685 return;
5687 WREG32(stlb_base + start_offset, inv_start_val);
5690 static int gaudi2_mmu_invalidate_cache_status_poll(struct hl_device *hdev, u32 stlb_base,
5691 struct gaudi2_cache_invld_params *inv_params)
5693 u32 status, timeout_usec, start_offset;
5694 int rc;
5696 timeout_usec = (hdev->pldm) ? GAUDI2_PLDM_MMU_TIMEOUT_USEC :
5697 GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5699 /* poll PMMU mem line cache (only needed in mmu range invalidation) */
5700 if (inv_params->flags & MMU_OP_CLEAR_MEMCACHE) {
5701 rc = hl_poll_timeout(
5702 hdev,
5703 mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS,
5704 status,
5705 status & 0x1,
5706 1000,
5707 timeout_usec);
5709 if (rc)
5710 return rc;
5712 /* Need to manually reset the status to 0 */
5713 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 0x0);
5716 /* Lower cache does not work with cache lines, hence we can skip its
5717 * invalidation upon map and invalidate only upon unmap
5719 if (inv_params->flags & MMU_OP_SKIP_LOW_CACHE_INV)
5720 return 0;
5722 start_offset = inv_params->range_invalidation ?
5723 STLB_RANGE_CACHE_INVALIDATION_OFFSET : STLB_INV_ALL_START_OFFSET;
5725 rc = hl_poll_timeout(
5726 hdev,
5727 stlb_base + start_offset,
5728 status,
5729 !(status & 0x1),
5730 1000,
5731 timeout_usec);
5733 return rc;
5736 bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id)
5738 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5739 u32 hw_cap;
5741 hw_cap = HW_CAP_DCORE0_DMMU0 << (NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id);
5743 if (gaudi2->hw_cap_initialized & hw_cap)
5744 return true;
5746 return false;
5749 /* this function shall be called only for HMMUs for which capability bit is set */
5750 static inline u32 get_hmmu_stlb_base(int dcore_id, int hmmu_id)
5752 u32 offset;
5754 offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5755 return (u32)(mmDCORE0_HMMU0_STLB_BASE + offset);
5758 static void gaudi2_mmu_invalidate_cache_trigger(struct hl_device *hdev, u32 stlb_base,
5759 struct gaudi2_cache_invld_params *inv_params)
5761 u32 start_offset;
5763 if (inv_params->range_invalidation) {
5764 /* Set the addresses range
5765 * Note: that the start address we set in register, is not included in
5766 * the range of the invalidation, by design.
5767 * that's why we need to set lower address than the one we actually
5768 * want to be included in the range invalidation.
5770 u64 start = inv_params->start_va - 1;
5772 start_offset = STLB_RANGE_CACHE_INVALIDATION_OFFSET;
5774 WREG32(stlb_base + STLB_RANGE_INV_START_LSB_OFFSET,
5775 start >> MMU_RANGE_INV_VA_LSB_SHIFT);
5777 WREG32(stlb_base + STLB_RANGE_INV_START_MSB_OFFSET,
5778 start >> MMU_RANGE_INV_VA_MSB_SHIFT);
5780 WREG32(stlb_base + STLB_RANGE_INV_END_LSB_OFFSET,
5781 inv_params->end_va >> MMU_RANGE_INV_VA_LSB_SHIFT);
5783 WREG32(stlb_base + STLB_RANGE_INV_END_MSB_OFFSET,
5784 inv_params->end_va >> MMU_RANGE_INV_VA_MSB_SHIFT);
5785 } else {
5786 start_offset = STLB_INV_ALL_START_OFFSET;
5789 gaudi2_mmu_send_invalidate_cache_cmd(hdev, stlb_base, start_offset,
5790 inv_params->inv_start_val, inv_params->flags);
5793 static inline void gaudi2_hmmu_invalidate_cache_trigger(struct hl_device *hdev,
5794 int dcore_id, int hmmu_id,
5795 struct gaudi2_cache_invld_params *inv_params)
5797 u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
5799 gaudi2_mmu_invalidate_cache_trigger(hdev, stlb_base, inv_params);
5802 static inline int gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device *hdev,
5803 int dcore_id, int hmmu_id,
5804 struct gaudi2_cache_invld_params *inv_params)
5806 u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
5808 return gaudi2_mmu_invalidate_cache_status_poll(hdev, stlb_base, inv_params);
5811 static int gaudi2_hmmus_invalidate_cache(struct hl_device *hdev,
5812 struct gaudi2_cache_invld_params *inv_params)
5814 int dcore_id, hmmu_id;
5816 /* first send all invalidation commands */
5817 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
5818 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
5819 if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
5820 continue;
5822 gaudi2_hmmu_invalidate_cache_trigger(hdev, dcore_id, hmmu_id, inv_params);
5826 /* next, poll all invalidations status */
5827 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
5828 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
5829 int rc;
5831 if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
5832 continue;
5834 rc = gaudi2_hmmu_invalidate_cache_status_poll(hdev, dcore_id, hmmu_id,
5835 inv_params);
5836 if (rc)
5837 return rc;
5841 return 0;
5844 static int gaudi2_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
5846 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5847 struct gaudi2_cache_invld_params invld_params;
5848 int rc = 0;
5850 if (hdev->reset_info.hard_reset_pending)
5851 return rc;
5853 invld_params.range_invalidation = false;
5854 invld_params.inv_start_val = 1;
5856 if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5857 invld_params.flags = flags;
5858 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5859 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5860 &invld_params);
5861 } else if (flags & MMU_OP_PHYS_PACK) {
5862 invld_params.flags = 0;
5863 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5866 return rc;
5869 static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
5870 u32 flags, u32 asid, u64 va, u64 size)
5872 struct gaudi2_cache_invld_params invld_params = {0};
5873 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5874 u64 start_va, end_va;
5875 u32 inv_start_val;
5876 int rc = 0;
5878 if (hdev->reset_info.hard_reset_pending)
5879 return 0;
5881 inv_start_val = (1 << MMU_RANGE_INV_EN_SHIFT |
5882 1 << MMU_RANGE_INV_ASID_EN_SHIFT |
5883 asid << MMU_RANGE_INV_ASID_SHIFT);
5884 start_va = va;
5885 end_va = start_va + size;
5887 if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5888 /* As range invalidation does not support zero address we will
5889 * do full invalidation in this case
5891 if (start_va) {
5892 invld_params.range_invalidation = true;
5893 invld_params.start_va = start_va;
5894 invld_params.end_va = end_va;
5895 invld_params.inv_start_val = inv_start_val;
5896 invld_params.flags = flags | MMU_OP_CLEAR_MEMCACHE;
5897 } else {
5898 invld_params.range_invalidation = false;
5899 invld_params.inv_start_val = 1;
5900 invld_params.flags = flags;
5904 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5905 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5906 &invld_params);
5907 if (rc)
5908 return rc;
5910 } else if (flags & MMU_OP_PHYS_PACK) {
5911 invld_params.start_va = gaudi2_mmu_scramble_addr(hdev, start_va);
5912 invld_params.end_va = gaudi2_mmu_scramble_addr(hdev, end_va);
5913 invld_params.inv_start_val = inv_start_val;
5914 invld_params.flags = flags;
5915 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5918 return rc;
5921 static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base,
5922 bool host_resident_pgt)
5924 struct asic_fixed_properties *prop = &hdev->asic_prop;
5925 u64 hop0_addr;
5926 u32 asid, max_asid = prop->max_asid;
5927 int rc;
5929 /* it takes too much time to init all of the ASIDs on palladium */
5930 if (hdev->pldm)
5931 max_asid = min((u32) 8, max_asid);
5933 for (asid = 0 ; asid < max_asid ; asid++) {
5934 if (host_resident_pgt)
5935 hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr;
5936 else
5937 hop0_addr = prop->mmu_pgt_addr + (asid * prop->dmmu.hop_table_size);
5939 rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr);
5940 if (rc) {
5941 dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid);
5942 return rc;
5946 return 0;
5949 static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base,
5950 bool host_resident_pgt)
5952 u32 status, timeout_usec;
5953 int rc;
5955 if (hdev->pldm || !hdev->pdev)
5956 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5957 else
5958 timeout_usec = GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5960 WREG32(stlb_base + STLB_INV_ALL_START_OFFSET, 1);
5962 rc = hl_poll_timeout(
5963 hdev,
5964 stlb_base + STLB_SRAM_INIT_OFFSET,
5965 status,
5966 !status,
5967 1000,
5968 timeout_usec);
5970 if (rc)
5971 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n");
5973 rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base, host_resident_pgt);
5974 if (rc)
5975 return rc;
5977 WREG32(mmu_base + MMU_BYPASS_OFFSET, 0);
5979 rc = hl_poll_timeout(
5980 hdev,
5981 stlb_base + STLB_INV_ALL_START_OFFSET,
5982 status,
5983 !status,
5984 1000,
5985 timeout_usec);
5987 if (rc)
5988 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU invalidate all\n");
5990 WREG32(mmu_base + MMU_ENABLE_OFFSET, 1);
5992 return rc;
5995 static int gaudi2_pci_mmu_init(struct hl_device *hdev)
5997 struct asic_fixed_properties *prop = &hdev->asic_prop;
5998 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5999 u32 mmu_base, stlb_base;
6000 int rc;
6002 if (gaudi2->hw_cap_initialized & HW_CAP_PMMU)
6003 return 0;
6005 mmu_base = mmPMMU_HBW_MMU_BASE;
6006 stlb_base = mmPMMU_HBW_STLB_BASE;
6008 RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
6009 (0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) |
6010 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) |
6011 (4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) |
6012 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_SHIFT) |
6013 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_SHIFT),
6014 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
6015 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
6016 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
6017 PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
6018 PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
6020 WREG32(stlb_base + STLB_LL_LOOKUP_MASK_63_32_OFFSET, 0);
6022 if (PAGE_SIZE == SZ_64K) {
6023 /* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */
6024 RMWREG32_SHIFTED(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET,
6025 FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) |
6026 FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) |
6027 FIELD_PREP(
6028 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK,
6030 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK |
6031 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK |
6032 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK);
6035 WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK);
6037 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base, prop->pmmu.host_resident);
6038 if (rc)
6039 return rc;
6041 gaudi2->hw_cap_initialized |= HW_CAP_PMMU;
6043 return 0;
6046 static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id,
6047 int hmmu_id)
6049 struct asic_fixed_properties *prop = &hdev->asic_prop;
6050 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6051 u32 offset, mmu_base, stlb_base, hw_cap;
6052 u8 dmmu_seq;
6053 int rc;
6055 dmmu_seq = NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id;
6056 hw_cap = HW_CAP_DCORE0_DMMU0 << dmmu_seq;
6059 * return if DMMU is already initialized or if it's not out of
6060 * isolation (due to cluster binning)
6062 if ((gaudi2->hw_cap_initialized & hw_cap) || !(prop->hmmu_hif_enabled_mask & BIT(dmmu_seq)))
6063 return 0;
6065 offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
6066 mmu_base = mmDCORE0_HMMU0_MMU_BASE + offset;
6067 stlb_base = mmDCORE0_HMMU0_STLB_BASE + offset;
6069 RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */,
6070 MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK);
6072 RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
6073 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) |
6074 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) |
6075 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) |
6076 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK, 3) |
6077 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK, 3),
6078 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
6079 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
6080 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
6081 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
6082 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
6084 RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1,
6085 STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK);
6087 WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK);
6089 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base, prop->dmmu.host_resident);
6090 if (rc)
6091 return rc;
6093 gaudi2->hw_cap_initialized |= hw_cap;
6095 return 0;
6098 static int gaudi2_hbm_mmu_init(struct hl_device *hdev)
6100 int rc, dcore_id, hmmu_id;
6102 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
6103 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE; hmmu_id++) {
6104 rc = gaudi2_dcore_hmmu_init(hdev, dcore_id, hmmu_id);
6105 if (rc)
6106 return rc;
6109 return 0;
6112 static int gaudi2_mmu_init(struct hl_device *hdev)
6114 int rc;
6116 rc = gaudi2_pci_mmu_init(hdev);
6117 if (rc)
6118 return rc;
6120 rc = gaudi2_hbm_mmu_init(hdev);
6121 if (rc)
6122 return rc;
6124 return 0;
6127 static int gaudi2_hw_init(struct hl_device *hdev)
6129 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6130 int rc;
6132 /* Let's mark in the H/W that we have reached this point. We check
6133 * this value in the reset_before_init function to understand whether
6134 * we need to reset the chip before doing H/W init. This register is
6135 * cleared by the H/W upon H/W reset
6137 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
6139 /* Perform read from the device to make sure device is up */
6140 RREG32(mmHW_STATE);
6142 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
6143 * So we set it here and if anyone tries to move it later to
6144 * a different address, there will be an error
6146 if (hdev->asic_prop.iatu_done_by_fw)
6147 gaudi2->dram_bar_cur_addr = DRAM_PHYS_BASE;
6150 * Before pushing u-boot/linux to device, need to set the hbm bar to
6151 * base address of dram
6153 if (gaudi2_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
6154 dev_err(hdev->dev, "failed to map HBM bar to DRAM base address\n");
6155 return -EIO;
6158 rc = gaudi2_init_cpu(hdev);
6159 if (rc) {
6160 dev_err(hdev->dev, "failed to initialize CPU\n");
6161 return rc;
6164 gaudi2_init_scrambler_hbm(hdev);
6165 gaudi2_init_kdma(hdev);
6167 rc = gaudi2_init_cpu_queues(hdev, GAUDI2_CPU_TIMEOUT_USEC);
6168 if (rc) {
6169 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", rc);
6170 return rc;
6173 rc = gaudi2->cpucp_info_get(hdev);
6174 if (rc) {
6175 dev_err(hdev->dev, "Failed to get cpucp info\n");
6176 return rc;
6179 rc = gaudi2_mmu_init(hdev);
6180 if (rc)
6181 return rc;
6183 gaudi2_init_pdma(hdev);
6184 gaudi2_init_edma(hdev);
6185 gaudi2_init_sm(hdev);
6186 gaudi2_init_tpc(hdev);
6187 gaudi2_init_mme(hdev);
6188 gaudi2_init_rotator(hdev);
6189 gaudi2_init_dec(hdev);
6190 gaudi2_enable_timestamp(hdev);
6192 rc = gaudi2_coresight_init(hdev);
6193 if (rc)
6194 goto disable_queues;
6196 rc = gaudi2_enable_msix(hdev);
6197 if (rc)
6198 goto disable_queues;
6200 /* Perform read from the device to flush all configuration */
6201 RREG32(mmHW_STATE);
6203 return 0;
6205 disable_queues:
6206 gaudi2_disable_dma_qmans(hdev);
6207 gaudi2_disable_mme_qmans(hdev);
6208 gaudi2_disable_tpc_qmans(hdev);
6209 gaudi2_disable_rot_qmans(hdev);
6210 gaudi2_disable_nic_qmans(hdev);
6212 gaudi2_disable_timestamp(hdev);
6214 return rc;
6218 * gaudi2_send_hard_reset_cmd - common function to handle reset
6220 * @hdev: pointer to the habanalabs device structure
6222 * This function handles the various possible scenarios for reset.
6223 * It considers if reset is handled by driver\FW and what FW components are loaded
6225 static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev)
6227 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6228 bool heartbeat_reset, preboot_only, cpu_initialized = false;
6229 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6230 u32 cpu_boot_status;
6232 preboot_only = (hdev->fw_loader.fw_comp_loaded == FW_TYPE_PREBOOT_CPU);
6233 heartbeat_reset = (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT);
6236 * Handle corner case where failure was at cpu management app load,
6237 * and driver didn't detect any failure while loading the FW,
6238 * then at such scenario driver will send only HALT_MACHINE
6239 * and no one will respond to this request since FW already back to preboot
6240 * and it cannot handle such cmd.
6241 * In this case next time the management app loads it'll check on events register
6242 * which will still have the halt indication, and will reboot the device.
6243 * The solution is to let preboot clear all relevant registers before next boot
6244 * once driver send COMMS_RST_DEV.
6246 cpu_boot_status = RREG32(mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS);
6248 if (gaudi2 && (gaudi2->hw_cap_initialized & HW_CAP_CPU) &&
6249 (cpu_boot_status == CPU_BOOT_STATUS_SRAM_AVAIL))
6250 cpu_initialized = true;
6253 * when Linux/Bootfit exist this write to the SP can be interpreted in 2 ways:
6254 * 1. FW reset: FW initiate the reset sequence
6255 * 2. driver reset: FW will start HALT sequence (the preparations for the
6256 * reset but not the reset itself as it is not implemented
6257 * on their part) and LKD will wait to let FW complete the
6258 * sequence before issuing the reset
6260 if (!preboot_only && cpu_initialized) {
6261 WREG32(le32_to_cpu(dyn_regs->gic_host_halt_irq),
6262 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_HALT_MACHINE].cpu_id);
6264 msleep(GAUDI2_CPU_RESET_WAIT_MSEC);
6268 * When working with preboot (without Linux/Boot fit) we can
6269 * communicate only using the COMMS commands to issue halt/reset.
6271 * For the case in which we are working with Linux/Bootfit this is a hail-mary
6272 * attempt to revive the card in the small chance that the f/w has
6273 * experienced a watchdog event, which caused it to return back to preboot.
6274 * In that case, triggering reset through GIC won't help. We need to
6275 * trigger the reset as if Linux wasn't loaded.
6277 * We do it only if the reset cause was HB, because that would be the
6278 * indication of such an event.
6280 * In case watchdog hasn't expired but we still got HB, then this won't
6281 * do any damage.
6284 if (heartbeat_reset || preboot_only || !cpu_initialized) {
6285 if (hdev->asic_prop.hard_reset_done_by_fw)
6286 hl_fw_ask_hard_reset_without_linux(hdev);
6287 else
6288 hl_fw_ask_halt_machine_without_linux(hdev);
6293 * gaudi2_execute_hard_reset - execute hard reset by driver/FW
6295 * @hdev: pointer to the habanalabs device structure
6297 * This function executes hard reset based on if driver/FW should do the reset
6299 static void gaudi2_execute_hard_reset(struct hl_device *hdev)
6301 if (hdev->asic_prop.hard_reset_done_by_fw) {
6302 gaudi2_send_hard_reset_cmd(hdev);
6303 return;
6306 /* Set device to handle FLR by H/W as we will put the device
6307 * CPU to halt mode
6309 WREG32(mmPCIE_AUX_FLR_CTRL,
6310 (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
6312 gaudi2_send_hard_reset_cmd(hdev);
6314 WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1);
6318 * gaudi2_execute_soft_reset - execute soft reset by driver/FW
6320 * @hdev: pointer to the habanalabs device structure
6321 * @driver_performs_reset: true if driver should perform reset instead of f/w.
6322 * @poll_timeout_us: time to wait for response from f/w.
6324 * This function executes soft reset based on if driver/FW should do the reset
6326 static int gaudi2_execute_soft_reset(struct hl_device *hdev, bool driver_performs_reset,
6327 u32 poll_timeout_us)
6329 if (!driver_performs_reset)
6330 return hl_fw_send_soft_reset(hdev);
6332 /* Block access to engines, QMANs and SM during reset, these
6333 * RRs will be reconfigured after soft reset.
6334 * PCIE_MSIX is left unsecured to allow NIC packets processing during the reset.
6336 gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 1,
6337 mmDCORE0_TPC0_QM_DCCM_BASE, mmPCIE_MSIX_BASE);
6339 gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 2,
6340 mmPCIE_MSIX_BASE + HL_BLOCK_SIZE,
6341 mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE);
6343 WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1);
6344 return 0;
6347 static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 poll_timeout_us)
6349 int i, rc = 0;
6350 u32 reg_val;
6352 /* We poll the BTM done indication multiple times after reset due to
6353 * a HW errata 'GAUDI2_0300'
6355 for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
6356 rc = hl_poll_timeout(
6357 hdev,
6358 mmPSOC_GLOBAL_CONF_BTM_FSM,
6359 reg_val,
6360 reg_val == 0,
6361 1000,
6362 poll_timeout_us);
6364 if (rc)
6365 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val);
6368 static int gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
6370 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6371 u32 poll_timeout_us, reset_sleep_ms;
6372 bool driver_performs_reset = false;
6373 int rc;
6375 if (hdev->pldm) {
6376 reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC :
6377 GAUDI2_PLDM_SRESET_TIMEOUT_MSEC;
6378 poll_timeout_us = GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC;
6379 } else {
6380 reset_sleep_ms = GAUDI2_RESET_TIMEOUT_MSEC;
6381 poll_timeout_us = GAUDI2_RESET_POLL_TIMEOUT_USEC;
6384 if (fw_reset)
6385 goto skip_reset;
6387 gaudi2_reset_arcs(hdev);
6389 if (hard_reset) {
6390 driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw;
6391 gaudi2_execute_hard_reset(hdev);
6392 } else {
6394 * As we have to support also work with preboot only (which does not supports
6395 * soft reset) we have to make sure that security is disabled before letting driver
6396 * do the reset. user shall control the BFE flags to avoid asking soft reset in
6397 * secured device with preboot only.
6399 driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU &&
6400 !hdev->asic_prop.fw_security_enabled);
6401 rc = gaudi2_execute_soft_reset(hdev, driver_performs_reset, poll_timeout_us);
6402 if (rc)
6403 return rc;
6406 skip_reset:
6407 if (driver_performs_reset || hard_reset) {
6409 * Instead of waiting for BTM indication we should wait for preboot ready:
6410 * Consider the below scenario:
6411 * 1. FW update is being triggered
6412 * - setting the dirty bit
6413 * 2. hard reset will be triggered due to the dirty bit
6414 * 3. FW initiates the reset:
6415 * - dirty bit cleared
6416 * - BTM indication cleared
6417 * - preboot ready indication cleared
6418 * 4. during hard reset:
6419 * - BTM indication will be set
6420 * - BIST test performed and another reset triggered
6421 * 5. only after this reset the preboot will set the preboot ready
6423 * when polling on BTM indication alone we can lose sync with FW while trying to
6424 * communicate with FW that is during reset.
6425 * to overcome this we will always wait to preboot ready indication
6428 /* without this sleep reset will not work */
6429 msleep(reset_sleep_ms);
6431 if (hdev->fw_components & FW_TYPE_PREBOOT_CPU)
6432 hl_fw_wait_preboot_ready(hdev);
6433 else
6434 gaudi2_poll_btm_indication(hdev, poll_timeout_us);
6437 if (!gaudi2)
6438 return 0;
6440 gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK);
6441 gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK);
6444 * Clear NIC capability mask in order for driver to re-configure
6445 * NIC QMANs. NIC ports will not be re-configured during soft
6446 * reset as we call gaudi2_nic_init only during hard reset
6448 gaudi2->nic_hw_cap_initialized &= ~(HW_CAP_NIC_MASK);
6450 if (hard_reset) {
6451 gaudi2->hw_cap_initialized &=
6452 ~(HW_CAP_DRAM | HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_MASK |
6453 HW_CAP_PMMU | HW_CAP_CPU | HW_CAP_CPU_Q |
6454 HW_CAP_SRAM_SCRAMBLER | HW_CAP_DMMU_MASK |
6455 HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_KDMA |
6456 HW_CAP_MME_MASK | HW_CAP_ROT_MASK);
6458 memset(gaudi2->events_stat, 0, sizeof(gaudi2->events_stat));
6459 } else {
6460 gaudi2->hw_cap_initialized &=
6461 ~(HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_SW_RESET |
6462 HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK |
6463 HW_CAP_ROT_MASK);
6465 return 0;
6468 static int gaudi2_suspend(struct hl_device *hdev)
6470 return hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
6473 static int gaudi2_resume(struct hl_device *hdev)
6475 return gaudi2_init_iatu(hdev);
6478 static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
6479 void *cpu_addr, dma_addr_t dma_addr, size_t size)
6481 int rc;
6483 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
6484 VM_DONTCOPY | VM_NORESERVE);
6486 #ifdef _HAS_DMA_MMAP_COHERENT
6488 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
6489 if (rc)
6490 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
6492 #else
6494 rc = remap_pfn_range(vma, vma->vm_start,
6495 virt_to_phys(cpu_addr) >> PAGE_SHIFT,
6496 size, vma->vm_page_prot);
6497 if (rc)
6498 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
6500 #endif
6502 return rc;
6505 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id)
6507 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6508 u64 hw_cap_mask = 0;
6509 u64 hw_tpc_cap_bit = 0;
6510 u64 hw_nic_cap_bit = 0;
6511 u64 hw_test_cap_bit = 0;
6513 switch (hw_queue_id) {
6514 case GAUDI2_QUEUE_ID_PDMA_0_0:
6515 case GAUDI2_QUEUE_ID_PDMA_0_1:
6516 case GAUDI2_QUEUE_ID_PDMA_1_0:
6517 hw_cap_mask = HW_CAP_PDMA_MASK;
6518 break;
6519 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
6520 hw_test_cap_bit = HW_CAP_EDMA_SHIFT +
6521 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2);
6522 break;
6523 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
6524 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + NUM_OF_EDMA_PER_DCORE +
6525 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0) >> 2);
6526 break;
6527 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
6528 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 2 * NUM_OF_EDMA_PER_DCORE +
6529 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0) >> 2);
6530 break;
6531 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
6532 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 3 * NUM_OF_EDMA_PER_DCORE +
6533 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0) >> 2);
6534 break;
6536 case GAUDI2_QUEUE_ID_DCORE0_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
6537 hw_test_cap_bit = HW_CAP_MME_SHIFT;
6538 break;
6540 case GAUDI2_QUEUE_ID_DCORE1_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
6541 hw_test_cap_bit = HW_CAP_MME_SHIFT + 1;
6542 break;
6544 case GAUDI2_QUEUE_ID_DCORE2_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
6545 hw_test_cap_bit = HW_CAP_MME_SHIFT + 2;
6546 break;
6548 case GAUDI2_QUEUE_ID_DCORE3_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
6549 hw_test_cap_bit = HW_CAP_MME_SHIFT + 3;
6550 break;
6552 case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_5_3:
6553 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT +
6554 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0) >> 2);
6556 /* special case where cap bit refers to the first queue id */
6557 if (!hw_tpc_cap_bit)
6558 return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(0));
6559 break;
6561 case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
6562 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + NUM_OF_TPC_PER_DCORE +
6563 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0) >> 2);
6564 break;
6566 case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
6567 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (2 * NUM_OF_TPC_PER_DCORE) +
6568 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0) >> 2);
6569 break;
6571 case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
6572 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (3 * NUM_OF_TPC_PER_DCORE) +
6573 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0) >> 2);
6574 break;
6576 case GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
6577 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (4 * NUM_OF_TPC_PER_DCORE);
6578 break;
6580 case GAUDI2_QUEUE_ID_ROT_0_0 ... GAUDI2_QUEUE_ID_ROT_1_3:
6581 hw_test_cap_bit = HW_CAP_ROT_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_ROT_0_0) >> 2);
6582 break;
6584 case GAUDI2_QUEUE_ID_NIC_0_0 ... GAUDI2_QUEUE_ID_NIC_23_3:
6585 hw_nic_cap_bit = HW_CAP_NIC_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_NIC_0_0) >> 2);
6587 /* special case where cap bit refers to the first queue id */
6588 if (!hw_nic_cap_bit)
6589 return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(0));
6590 break;
6592 case GAUDI2_QUEUE_ID_CPU_PQ:
6593 return !!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q);
6595 default:
6596 return false;
6599 if (hw_tpc_cap_bit)
6600 return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(hw_tpc_cap_bit));
6602 if (hw_nic_cap_bit)
6603 return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(hw_nic_cap_bit));
6605 if (hw_test_cap_bit)
6606 hw_cap_mask = BIT_ULL(hw_test_cap_bit);
6608 return !!(gaudi2->hw_cap_initialized & hw_cap_mask);
6611 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id)
6613 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6615 switch (arc_id) {
6616 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6617 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6618 return !!(gaudi2->active_hw_arc & BIT_ULL(arc_id));
6620 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6621 return !!(gaudi2->active_tpc_arc & BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
6623 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6624 return !!(gaudi2->active_nic_arc & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
6626 default:
6627 return false;
6631 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id)
6633 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6635 switch (arc_id) {
6636 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6637 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6638 gaudi2->active_hw_arc &= ~(BIT_ULL(arc_id));
6639 break;
6641 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6642 gaudi2->active_tpc_arc &= ~(BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
6643 break;
6645 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6646 gaudi2->active_nic_arc &= ~(BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
6647 break;
6649 default:
6650 return;
6654 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id)
6656 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6658 switch (arc_id) {
6659 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6660 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6661 gaudi2->active_hw_arc |= BIT_ULL(arc_id);
6662 break;
6664 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6665 gaudi2->active_tpc_arc |= BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0);
6666 break;
6668 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6669 gaudi2->active_nic_arc |= BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0);
6670 break;
6672 default:
6673 return;
6677 static void gaudi2_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
6679 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6680 u32 pq_offset, reg_base, db_reg_offset, db_value;
6682 if (hw_queue_id != GAUDI2_QUEUE_ID_CPU_PQ) {
6684 * QMAN has 4 successive PQ_PI registers, 1 for each of the QMAN PQs.
6685 * Masking the H/W queue ID with 0x3 extracts the QMAN internal PQ
6686 * number.
6688 pq_offset = (hw_queue_id & 0x3) * 4;
6689 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6690 db_reg_offset = reg_base + QM_PQ_PI_0_OFFSET + pq_offset;
6691 } else {
6692 db_reg_offset = mmCPU_IF_PF_PQ_PI;
6695 db_value = pi;
6697 /* ring the doorbell */
6698 WREG32(db_reg_offset, db_value);
6700 if (hw_queue_id == GAUDI2_QUEUE_ID_CPU_PQ) {
6701 /* make sure device CPU will read latest data from host */
6702 mb();
6703 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
6704 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
6708 static void gaudi2_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
6710 __le64 *pbd = (__le64 *) bd;
6712 /* The QMANs are on the host memory so a simple copy suffice */
6713 pqe[0] = pbd[0];
6714 pqe[1] = pbd[1];
6717 static void *gaudi2_dma_alloc_coherent(struct hl_device *hdev, size_t size,
6718 dma_addr_t *dma_handle, gfp_t flags)
6720 return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags);
6723 static void gaudi2_dma_free_coherent(struct hl_device *hdev, size_t size,
6724 void *cpu_addr, dma_addr_t dma_handle)
6726 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
6729 static int gaudi2_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
6730 u32 timeout, u64 *result)
6732 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6734 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) {
6735 if (result)
6736 *result = 0;
6737 return 0;
6740 if (!timeout)
6741 timeout = GAUDI2_MSG_TO_CPU_TIMEOUT_USEC;
6743 return hl_fw_send_cpu_message(hdev, GAUDI2_QUEUE_ID_CPU_PQ, msg, len, timeout, result);
6746 static void *gaudi2_dma_pool_zalloc(struct hl_device *hdev, size_t size,
6747 gfp_t mem_flags, dma_addr_t *dma_handle)
6749 if (size > GAUDI2_DMA_POOL_BLK_SIZE)
6750 return NULL;
6752 return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
6755 static void gaudi2_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr)
6757 dma_pool_free(hdev->dma_pool, vaddr, dma_addr);
6760 static void *gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
6761 dma_addr_t *dma_handle)
6763 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
6766 static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)
6768 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
6771 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser)
6773 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
6774 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6776 if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) {
6777 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
6778 return -EINVAL;
6781 /* Just check if CB address is valid */
6783 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6784 parser->user_cb_size,
6785 asic_prop->sram_user_base_address,
6786 asic_prop->sram_end_address))
6787 return 0;
6789 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6790 parser->user_cb_size,
6791 asic_prop->dram_user_base_address,
6792 asic_prop->dram_end_address))
6793 return 0;
6795 if ((gaudi2->hw_cap_initialized & HW_CAP_DMMU_MASK) &&
6796 hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6797 parser->user_cb_size,
6798 asic_prop->dmmu.start_addr,
6799 asic_prop->dmmu.end_addr))
6800 return 0;
6802 if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) {
6803 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6804 parser->user_cb_size,
6805 asic_prop->pmmu.start_addr,
6806 asic_prop->pmmu.end_addr) ||
6807 hl_mem_area_inside_range(
6808 (u64) (uintptr_t) parser->user_cb,
6809 parser->user_cb_size,
6810 asic_prop->pmmu_huge.start_addr,
6811 asic_prop->pmmu_huge.end_addr))
6812 return 0;
6814 } else if (gaudi2_host_phys_addr_valid((u64) (uintptr_t) parser->user_cb)) {
6815 if (!hdev->pdev)
6816 return 0;
6818 if (!device_iommu_mapped(&hdev->pdev->dev))
6819 return 0;
6822 dev_err(hdev->dev, "CB address %p + 0x%x for internal QMAN is not valid\n",
6823 parser->user_cb, parser->user_cb_size);
6825 return -EFAULT;
6828 static int gaudi2_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
6830 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6832 if (!parser->is_kernel_allocated_cb)
6833 return gaudi2_validate_cb_address(hdev, parser);
6835 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
6836 dev_err(hdev->dev, "PMMU not initialized - Unsupported mode in Gaudi2\n");
6837 return -EINVAL;
6840 return 0;
6843 static int gaudi2_send_heartbeat(struct hl_device *hdev)
6845 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6847 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6848 return 0;
6850 return hl_fw_send_heartbeat(hdev);
6853 /* This is an internal helper function, used to update the KDMA mmu props.
6854 * Should be called with a proper kdma lock.
6856 static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev,
6857 bool mmu_bypass, u32 asid)
6859 u32 rw_asid, rw_mmu_bp;
6861 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6862 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6864 rw_mmu_bp = (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_SHIFT) |
6865 (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_SHIFT);
6867 WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_ASID, rw_asid);
6868 WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp);
6871 static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id,
6872 u32 mon_payload, u32 sync_value)
6874 u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm;
6875 u8 mask;
6877 sob_offset = sob_id * 4;
6878 mon_offset = mon_id * 4;
6880 /* Reset the SOB value */
6881 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
6883 /* Configure this address with CQ_ID 0 because CQ_EN is set */
6884 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id);
6886 /* Configure this address with CS index because CQ_EN is set */
6887 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload);
6889 sync_group_id = sob_id / 8;
6890 mask = ~(1 << (sob_id & 0x7));
6891 mode = 1; /* comparison mode is "equal to" */
6893 mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value);
6894 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode);
6895 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask);
6896 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id);
6897 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm);
6900 /* This is an internal helper function used by gaudi2_send_job_to_kdma only */
6901 static int gaudi2_send_job_to_kdma(struct hl_device *hdev,
6902 u64 src_addr, u64 dst_addr,
6903 u32 size, bool is_memset)
6905 u32 comp_val, commit_mask, *polling_addr, timeout, status = 0;
6906 struct hl_cq_entry *cq_base;
6907 struct hl_cq *cq;
6908 u64 comp_addr;
6909 int rc;
6911 gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION,
6912 GAUDI2_RESERVED_MON_KDMA_COMPLETION,
6913 GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1);
6915 comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6916 (GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32));
6918 comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
6919 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
6921 WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_LO, lower_32_bits(src_addr));
6922 WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_HI, upper_32_bits(src_addr));
6923 WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_LO, lower_32_bits(dst_addr));
6924 WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_HI, upper_32_bits(dst_addr));
6925 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_LO, lower_32_bits(comp_addr));
6926 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_HI, upper_32_bits(comp_addr));
6927 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_WDATA, comp_val);
6928 WREG32(mmARC_FARM_KDMA_CTX_DST_TSIZE_0, size);
6930 commit_mask = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) |
6931 FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1);
6933 if (is_memset)
6934 commit_mask |= FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1);
6936 WREG32(mmARC_FARM_KDMA_CTX_COMMIT, commit_mask);
6938 /* Wait for completion */
6939 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_KDMA_COMPLETION];
6940 cq_base = cq->kernel_address;
6941 polling_addr = (u32 *)&cq_base[cq->ci];
6943 if (hdev->pldm)
6944 /* for each 1MB 20 second of timeout */
6945 timeout = ((size / SZ_1M) + 1) * USEC_PER_SEC * 20;
6946 else
6947 timeout = KDMA_TIMEOUT_USEC;
6949 /* Polling */
6950 rc = hl_poll_timeout_memory(
6951 hdev,
6952 polling_addr,
6953 status,
6954 (status == 1),
6955 1000,
6956 timeout,
6957 true);
6959 *polling_addr = 0;
6961 if (rc) {
6962 dev_err(hdev->dev, "Timeout while waiting for KDMA to be idle\n");
6963 WREG32(mmARC_FARM_KDMA_CFG_1, 1 << ARC_FARM_KDMA_CFG_1_HALT_SHIFT);
6964 return rc;
6967 cq->ci = hl_cq_inc_ptr(cq->ci);
6969 return 0;
6972 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val)
6974 u32 i;
6976 for (i = 0 ; i < size ; i += sizeof(u32))
6977 WREG32(addr + i, val);
6980 static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, bool enable)
6982 u32 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6984 if (enable) {
6985 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED_TEST_MODE);
6986 WREG32(reg_base + QM_PQC_CFG_OFFSET, 0);
6987 } else {
6988 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED);
6989 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
6993 static inline u32 gaudi2_test_queue_hw_queue_id_to_sob_id(struct hl_device *hdev, u32 hw_queue_id)
6995 return hdev->asic_prop.first_available_user_sob[0] +
6996 hw_queue_id - GAUDI2_QUEUE_ID_PDMA_0_0;
6999 static void gaudi2_test_queue_clear(struct hl_device *hdev, u32 hw_queue_id)
7001 u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
7002 u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
7004 /* Reset the SOB value */
7005 WREG32(sob_addr, 0);
7008 static int gaudi2_test_queue_send_msg_short(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val,
7009 struct gaudi2_queues_test_info *msg_info)
7011 u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
7012 u32 tmp, sob_base = 1;
7013 struct packet_msg_short *msg_short_pkt = msg_info->kern_addr;
7014 size_t pkt_size = sizeof(struct packet_msg_short);
7015 int rc;
7017 tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) |
7018 (1 << GAUDI2_PKT_CTL_EB_SHIFT) |
7019 (1 << GAUDI2_PKT_CTL_MB_SHIFT) |
7020 (sob_base << GAUDI2_PKT_SHORT_CTL_BASE_SHIFT) |
7021 (sob_offset << GAUDI2_PKT_SHORT_CTL_ADDR_SHIFT);
7023 msg_short_pkt->value = cpu_to_le32(sob_val);
7024 msg_short_pkt->ctl = cpu_to_le32(tmp);
7026 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, msg_info->dma_addr);
7027 if (rc)
7028 dev_err(hdev->dev,
7029 "Failed to send msg_short packet to H/W queue %d\n", hw_queue_id);
7031 return rc;
7034 static int gaudi2_test_queue_wait_completion(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val)
7036 u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
7037 u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
7038 u32 timeout_usec, tmp;
7039 int rc;
7041 if (hdev->pldm)
7042 timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC;
7043 else
7044 timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC;
7046 rc = hl_poll_timeout(
7047 hdev,
7048 sob_addr,
7049 tmp,
7050 (tmp == sob_val),
7051 1000,
7052 timeout_usec);
7054 if (rc == -ETIMEDOUT) {
7055 dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n",
7056 hw_queue_id, tmp);
7057 rc = -EIO;
7060 return rc;
7063 static int gaudi2_test_cpu_queue(struct hl_device *hdev)
7065 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7068 * check capability here as send_cpu_message() won't update the result
7069 * value if no capability
7071 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
7072 return 0;
7074 return hl_fw_test_cpu_queue(hdev);
7077 static int gaudi2_test_queues(struct hl_device *hdev)
7079 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7080 struct gaudi2_queues_test_info *msg_info;
7081 u32 sob_val = 0x5a5a;
7082 int i, rc;
7084 /* send test message on all enabled Qs */
7085 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
7086 if (!gaudi2_is_queue_enabled(hdev, i) || gaudi2_is_edma_queue_id(i))
7087 continue;
7089 msg_info = &gaudi2->queues_test_info[i - GAUDI2_QUEUE_ID_PDMA_0_0];
7090 gaudi2_qman_set_test_mode(hdev, i, true);
7091 gaudi2_test_queue_clear(hdev, i);
7092 rc = gaudi2_test_queue_send_msg_short(hdev, i, sob_val, msg_info);
7093 if (rc)
7094 goto done;
7097 rc = gaudi2_test_cpu_queue(hdev);
7098 if (rc)
7099 goto done;
7101 /* verify that all messages were processed */
7102 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
7103 if (!gaudi2_is_queue_enabled(hdev, i) || gaudi2_is_edma_queue_id(i))
7104 continue;
7106 rc = gaudi2_test_queue_wait_completion(hdev, i, sob_val);
7107 if (rc)
7108 /* chip is not usable, no need for cleanups, just bail-out with error */
7109 goto done;
7111 gaudi2_test_queue_clear(hdev, i);
7112 gaudi2_qman_set_test_mode(hdev, i, false);
7115 done:
7116 return rc;
7119 static int gaudi2_compute_reset_late_init(struct hl_device *hdev)
7121 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7122 size_t irq_arr_size;
7123 int rc;
7125 gaudi2_init_arcs(hdev);
7127 rc = gaudi2_scrub_arcs_dccm(hdev);
7128 if (rc) {
7129 dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
7130 return rc;
7133 gaudi2_init_security(hdev);
7135 /* Unmask all IRQs since some could have been received during the soft reset */
7136 irq_arr_size = gaudi2->num_of_valid_hw_events * sizeof(gaudi2->hw_events[0]);
7137 return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size);
7140 static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7141 struct engines_data *e)
7143 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1;
7144 struct asic_fixed_properties *prop = &hdev->asic_prop;
7145 unsigned long *mask = (unsigned long *) mask_arr;
7146 const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#-15x%#x\n";
7147 bool is_idle = true, is_eng_idle;
7148 int engine_idx, i, j;
7149 u64 offset;
7151 if (e)
7152 hl_engine_data_sprintf(e,
7153 "\nCORE EDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0 DMA_CORE_STS1\n"
7154 "---- ---- ------- ------------ ------------- -------------\n");
7156 for (i = 0; i < NUM_OF_DCORES; i++) {
7157 for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) {
7158 int seq = i * NUM_OF_EDMA_PER_DCORE + j;
7160 if (!(prop->edma_enabled_mask & BIT(seq)))
7161 continue;
7163 engine_idx = GAUDI2_DCORE0_ENGINE_ID_EDMA_0 +
7164 i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
7165 offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET;
7167 dma_core_sts0 = RREG32(mmDCORE0_EDMA0_CORE_STS0 + offset);
7168 dma_core_sts1 = RREG32(mmDCORE0_EDMA0_CORE_STS1 + offset);
7170 qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset);
7171 qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset);
7172 qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset);
7174 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7175 IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1);
7176 is_idle &= is_eng_idle;
7178 if (mask && !is_eng_idle)
7179 set_bit(engine_idx, mask);
7181 if (e)
7182 hl_engine_data_sprintf(e, edma_fmt, i, j, is_eng_idle ? "Y" : "N",
7183 qm_glbl_sts0, dma_core_sts0, dma_core_sts1);
7187 return is_idle;
7190 static bool gaudi2_get_pdma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7191 struct engines_data *e)
7193 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1;
7194 unsigned long *mask = (unsigned long *) mask_arr;
7195 const char *pdma_fmt = "%-6d%-9s%#-14x%#-15x%#x\n";
7196 bool is_idle = true, is_eng_idle;
7197 int engine_idx, i;
7198 u64 offset;
7200 if (e)
7201 hl_engine_data_sprintf(e,
7202 "\nPDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0 DMA_CORE_STS1\n"
7203 "---- ------- ------------ ------------- -------------\n");
7205 for (i = 0 ; i < NUM_OF_PDMA ; i++) {
7206 engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i;
7207 offset = i * PDMA_OFFSET;
7208 dma_core_sts0 = RREG32(mmPDMA0_CORE_STS0 + offset);
7209 dma_core_sts1 = RREG32(mmPDMA0_CORE_STS1 + offset);
7211 qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset);
7212 qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset);
7213 qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset);
7215 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7216 IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1);
7217 is_idle &= is_eng_idle;
7219 if (mask && !is_eng_idle)
7220 set_bit(engine_idx, mask);
7222 if (e)
7223 hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N",
7224 qm_glbl_sts0, dma_core_sts0, dma_core_sts1);
7227 return is_idle;
7230 static bool gaudi2_get_nic_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7231 struct engines_data *e)
7233 unsigned long *mask = (unsigned long *) mask_arr;
7234 const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n";
7235 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7236 bool is_idle = true, is_eng_idle;
7237 int engine_idx, i;
7238 u64 offset = 0;
7240 /* NIC, twelve macros in Full chip */
7241 if (e && hdev->nic_ports_mask)
7242 hl_engine_data_sprintf(e,
7243 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
7244 "--- ------- ------------ ----------\n");
7246 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
7247 if (!(i & 1))
7248 offset = i / 2 * NIC_OFFSET;
7249 else
7250 offset += NIC_QM_OFFSET;
7252 if (!(hdev->nic_ports_mask & BIT(i)))
7253 continue;
7255 engine_idx = GAUDI2_ENGINE_ID_NIC0_0 + i;
7258 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
7259 qm_glbl_sts1 = RREG32(mmNIC0_QM0_GLBL_STS1 + offset);
7260 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
7262 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7263 is_idle &= is_eng_idle;
7265 if (mask && !is_eng_idle)
7266 set_bit(engine_idx, mask);
7268 if (e)
7269 hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N",
7270 qm_glbl_sts0, qm_cgm_sts);
7273 return is_idle;
7276 static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7277 struct engines_data *e)
7279 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, mme_arch_sts;
7280 unsigned long *mask = (unsigned long *) mask_arr;
7281 const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n";
7282 bool is_idle = true, is_eng_idle;
7283 int engine_idx, i;
7284 u64 offset;
7286 if (e)
7287 hl_engine_data_sprintf(e,
7288 "\nMME Stub is_idle QM_GLBL_STS0 MME_ARCH_STATUS\n"
7289 "--- ---- ------- ------------ ---------------\n");
7290 /* MME, one per Dcore */
7291 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
7292 engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET;
7293 offset = i * DCORE_OFFSET;
7295 qm_glbl_sts0 = RREG32(mmDCORE0_MME_QM_GLBL_STS0 + offset);
7296 qm_glbl_sts1 = RREG32(mmDCORE0_MME_QM_GLBL_STS1 + offset);
7297 qm_cgm_sts = RREG32(mmDCORE0_MME_QM_CGM_STS + offset);
7299 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7300 is_idle &= is_eng_idle;
7302 mme_arch_sts = RREG32(mmDCORE0_MME_CTRL_LO_ARCH_STATUS + offset);
7303 is_eng_idle &= IS_MME_IDLE(mme_arch_sts);
7304 is_idle &= is_eng_idle;
7306 if (e)
7307 hl_engine_data_sprintf(e, mme_fmt, i, "N",
7308 is_eng_idle ? "Y" : "N",
7309 qm_glbl_sts0,
7310 mme_arch_sts);
7312 if (mask && !is_eng_idle)
7313 set_bit(engine_idx, mask);
7316 return is_idle;
7319 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset,
7320 struct iterate_module_ctx *ctx)
7322 struct gaudi2_tpc_idle_data *idle_data = ctx->data;
7323 u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7324 bool is_eng_idle;
7325 int engine_idx;
7327 if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1)))
7328 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7329 else
7330 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 +
7331 dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst;
7333 tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset);
7334 qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset);
7335 qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset);
7336 qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset);
7338 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7339 IS_TPC_IDLE(tpc_cfg_sts);
7340 *(idle_data->is_idle) &= is_eng_idle;
7342 if (idle_data->mask && !is_eng_idle)
7343 set_bit(engine_idx, idle_data->mask);
7345 if (idle_data->e)
7346 hl_engine_data_sprintf(idle_data->e,
7347 idle_data->tpc_fmt, dcore, inst,
7348 is_eng_idle ? "Y" : "N",
7349 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
7352 static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7353 struct engines_data *e)
7355 struct asic_fixed_properties *prop = &hdev->asic_prop;
7356 unsigned long *mask = (unsigned long *) mask_arr;
7357 bool is_idle = true;
7359 struct gaudi2_tpc_idle_data tpc_idle_data = {
7360 .tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n",
7361 .e = e,
7362 .mask = mask,
7363 .is_idle = &is_idle,
7365 struct iterate_module_ctx tpc_iter = {
7366 .fn = &gaudi2_is_tpc_engine_idle,
7367 .data = &tpc_idle_data,
7370 if (e && prop->tpc_enabled_mask)
7371 hl_engine_data_sprintf(e,
7372 "\nCORE TPC is_idle QM_GLBL_STS0 QM_CGM_STS STATUS\n"
7373 "---- --- ------- ------------ ---------- ------\n");
7375 gaudi2_iterate_tpcs(hdev, &tpc_iter);
7377 return *tpc_idle_data.is_idle;
7380 static bool gaudi2_get_decoder_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7381 struct engines_data *e)
7383 struct asic_fixed_properties *prop = &hdev->asic_prop;
7384 unsigned long *mask = (unsigned long *) mask_arr;
7385 const char *pcie_dec_fmt = "%-10d%-9s%#x\n";
7386 const char *dec_fmt = "%-6d%-5d%-9s%#x\n";
7387 bool is_idle = true, is_eng_idle;
7388 u32 dec_swreg15, dec_enabled_bit;
7389 int engine_idx, i, j;
7390 u64 offset;
7392 /* Decoders, two each Dcore and two shared PCIe decoders */
7393 if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK)))
7394 hl_engine_data_sprintf(e,
7395 "\nCORE DEC is_idle VSI_CMD_SWREG15\n"
7396 "---- --- ------- ---------------\n");
7398 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
7399 for (j = 0 ; j < NUM_OF_DEC_PER_DCORE ; j++) {
7400 dec_enabled_bit = 1 << (i * NUM_OF_DEC_PER_DCORE + j);
7401 if (!(prop->decoder_enabled_mask & dec_enabled_bit))
7402 continue;
7404 engine_idx = GAUDI2_DCORE0_ENGINE_ID_DEC_0 +
7405 i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
7406 offset = i * DCORE_OFFSET + j * DCORE_DEC_OFFSET;
7408 dec_swreg15 = RREG32(mmDCORE0_DEC0_CMD_SWREG15 + offset);
7409 is_eng_idle = IS_DEC_IDLE(dec_swreg15);
7410 is_idle &= is_eng_idle;
7412 if (mask && !is_eng_idle)
7413 set_bit(engine_idx, mask);
7415 if (e)
7416 hl_engine_data_sprintf(e, dec_fmt, i, j,
7417 is_eng_idle ? "Y" : "N", dec_swreg15);
7421 if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK))
7422 hl_engine_data_sprintf(e,
7423 "\nPCIe DEC is_idle VSI_CMD_SWREG15\n"
7424 "-------- ------- ---------------\n");
7426 /* Check shared(PCIe) decoders */
7427 for (i = 0 ; i < NUM_OF_DEC_PER_DCORE ; i++) {
7428 dec_enabled_bit = PCIE_DEC_SHIFT + i;
7429 if (!(prop->decoder_enabled_mask & BIT(dec_enabled_bit)))
7430 continue;
7432 engine_idx = GAUDI2_PCIE_ENGINE_ID_DEC_0 + i;
7433 offset = i * DCORE_DEC_OFFSET;
7434 dec_swreg15 = RREG32(mmPCIE_DEC0_CMD_SWREG15 + offset);
7435 is_eng_idle = IS_DEC_IDLE(dec_swreg15);
7436 is_idle &= is_eng_idle;
7438 if (mask && !is_eng_idle)
7439 set_bit(engine_idx, mask);
7441 if (e)
7442 hl_engine_data_sprintf(e, pcie_dec_fmt, i,
7443 is_eng_idle ? "Y" : "N", dec_swreg15);
7446 return is_idle;
7449 static bool gaudi2_get_rotator_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7450 struct engines_data *e)
7452 const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-14x%#x\n";
7453 unsigned long *mask = (unsigned long *) mask_arr;
7454 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7455 bool is_idle = true, is_eng_idle;
7456 int engine_idx, i;
7457 u64 offset;
7459 if (e)
7460 hl_engine_data_sprintf(e,
7461 "\nCORE ROT is_idle QM_GLBL_STS0 QM_GLBL_STS1 QM_CGM_STS\n"
7462 "---- --- ------- ------------ ------------ ----------\n");
7464 for (i = 0 ; i < NUM_OF_ROT ; i++) {
7465 engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i;
7467 offset = i * ROT_OFFSET;
7469 qm_glbl_sts0 = RREG32(mmROT0_QM_GLBL_STS0 + offset);
7470 qm_glbl_sts1 = RREG32(mmROT0_QM_GLBL_STS1 + offset);
7471 qm_cgm_sts = RREG32(mmROT0_QM_CGM_STS + offset);
7473 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7474 is_idle &= is_eng_idle;
7476 if (mask && !is_eng_idle)
7477 set_bit(engine_idx, mask);
7479 if (e)
7480 hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N",
7481 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7484 return is_idle;
7487 static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7488 struct engines_data *e)
7490 bool is_idle = true;
7492 is_idle &= gaudi2_get_edma_idle_status(hdev, mask_arr, mask_len, e);
7493 is_idle &= gaudi2_get_pdma_idle_status(hdev, mask_arr, mask_len, e);
7494 is_idle &= gaudi2_get_nic_idle_status(hdev, mask_arr, mask_len, e);
7495 is_idle &= gaudi2_get_mme_idle_status(hdev, mask_arr, mask_len, e);
7496 is_idle &= gaudi2_get_tpc_idle_status(hdev, mask_arr, mask_len, e);
7497 is_idle &= gaudi2_get_decoder_idle_status(hdev, mask_arr, mask_len, e);
7498 is_idle &= gaudi2_get_rotator_idle_status(hdev, mask_arr, mask_len, e);
7500 return is_idle;
7503 static void gaudi2_hw_queues_lock(struct hl_device *hdev)
7504 __acquires(&gaudi2->hw_queues_lock)
7506 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7508 spin_lock(&gaudi2->hw_queues_lock);
7511 static void gaudi2_hw_queues_unlock(struct hl_device *hdev)
7512 __releases(&gaudi2->hw_queues_lock)
7514 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7516 spin_unlock(&gaudi2->hw_queues_lock);
7519 static u32 gaudi2_get_pci_id(struct hl_device *hdev)
7521 return hdev->pdev->device;
7524 static int gaudi2_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
7526 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7528 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
7529 return 0;
7531 return hl_fw_get_eeprom_data(hdev, data, max_size);
7534 static void gaudi2_update_eq_ci(struct hl_device *hdev, u32 val)
7536 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
7539 static void *gaudi2_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7541 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7543 if (aggregate) {
7544 *size = (u32) sizeof(gaudi2->events_stat_aggregate);
7545 return gaudi2->events_stat_aggregate;
7548 *size = (u32) sizeof(gaudi2->events_stat);
7549 return gaudi2->events_stat;
7552 static void gaudi2_mmu_vdec_dcore_prepare(struct hl_device *hdev, int dcore_id,
7553 int dcore_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
7555 u32 offset = (mmDCORE0_VDEC1_BRDG_CTRL_BASE - mmDCORE0_VDEC0_BRDG_CTRL_BASE) *
7556 dcore_vdec_id + DCORE_OFFSET * dcore_id;
7558 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
7559 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
7561 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
7562 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
7564 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
7565 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
7567 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
7568 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
7570 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
7571 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
7574 static void gaudi2_mmu_dcore_prepare(struct hl_device *hdev, int dcore_id, u32 asid)
7576 u32 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
7577 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
7578 struct asic_fixed_properties *prop = &hdev->asic_prop;
7579 u32 dcore_offset = dcore_id * DCORE_OFFSET;
7580 u32 vdec_id, i, ports_offset, reg_val;
7581 u8 edma_seq_base;
7583 /* EDMA */
7584 edma_seq_base = dcore_id * NUM_OF_EDMA_PER_DCORE;
7585 if (prop->edma_enabled_mask & BIT(edma_seq_base)) {
7586 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7587 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7588 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
7589 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
7592 if (prop->edma_enabled_mask & BIT(edma_seq_base + 1)) {
7593 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7594 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7595 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
7596 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
7599 /* Sync Mngr */
7600 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV + dcore_offset, asid);
7602 * Sync Mngrs on dcores 1 - 3 are exposed to user, so must use user ASID
7603 * for any access type
7605 if (dcore_id > 0) {
7606 reg_val = (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_RD_SHIFT) |
7607 (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_WR_SHIFT);
7608 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID + dcore_offset, reg_val);
7609 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_MMU_BP + dcore_offset, 0);
7612 WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_MMU_BP + dcore_offset, 0);
7613 WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_ASID + dcore_offset, rw_asid);
7615 for (i = 0 ; i < NUM_OF_MME_SBTE_PORTS ; i++) {
7616 ports_offset = i * DCORE_MME_SBTE_OFFSET;
7617 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_MMU_BP +
7618 dcore_offset + ports_offset, 0);
7619 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_ASID +
7620 dcore_offset + ports_offset, rw_asid);
7623 for (i = 0 ; i < NUM_OF_MME_WB_PORTS ; i++) {
7624 ports_offset = i * DCORE_MME_WB_OFFSET;
7625 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_MMU_BP +
7626 dcore_offset + ports_offset, 0);
7627 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_ASID +
7628 dcore_offset + ports_offset, rw_asid);
7631 WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7632 WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7635 * Decoders
7637 for (vdec_id = 0 ; vdec_id < NUM_OF_DEC_PER_DCORE ; vdec_id++) {
7638 if (prop->decoder_enabled_mask & BIT(dcore_id * NUM_OF_DEC_PER_DCORE + vdec_id))
7639 gaudi2_mmu_vdec_dcore_prepare(hdev, dcore_id, vdec_id, rw_asid, 0);
7643 static void gudi2_mmu_vdec_shared_prepare(struct hl_device *hdev,
7644 int shared_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
7646 u32 offset = (mmPCIE_VDEC1_BRDG_CTRL_BASE - mmPCIE_VDEC0_BRDG_CTRL_BASE) * shared_vdec_id;
7648 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
7649 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
7651 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
7652 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
7654 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
7655 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
7657 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
7658 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
7660 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
7661 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
7664 static void gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device *hdev, int arc_farm_id,
7665 u32 rw_asid, u32 rw_mmu_bp)
7667 u32 offset = (mmARC_FARM_ARC1_DUP_ENG_BASE - mmARC_FARM_ARC0_DUP_ENG_BASE) * arc_farm_id;
7669 WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_MMU_BP + offset, rw_mmu_bp);
7670 WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_ASID + offset, rw_asid);
7673 static void gaudi2_arc_mmu_prepare(struct hl_device *hdev, u32 cpu_id, u32 asid)
7675 u32 reg_base, reg_offset, reg_val = 0;
7677 reg_base = gaudi2_arc_blocks_bases[cpu_id];
7679 /* Enable MMU and configure asid for all relevant ARC regions */
7680 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_MMU_BP_MASK, 0);
7681 reg_val |= FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_0_ASID_MASK, asid);
7683 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION3_GENERAL);
7684 WREG32(reg_base + reg_offset, reg_val);
7686 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION4_HBM0_FW);
7687 WREG32(reg_base + reg_offset, reg_val);
7689 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION5_HBM1_GC_DATA);
7690 WREG32(reg_base + reg_offset, reg_val);
7692 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION6_HBM2_GC_DATA);
7693 WREG32(reg_base + reg_offset, reg_val);
7695 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION7_HBM3_GC_DATA);
7696 WREG32(reg_base + reg_offset, reg_val);
7698 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION9_PCIE);
7699 WREG32(reg_base + reg_offset, reg_val);
7701 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION10_GENERAL);
7702 WREG32(reg_base + reg_offset, reg_val);
7704 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION11_GENERAL);
7705 WREG32(reg_base + reg_offset, reg_val);
7707 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION12_GENERAL);
7708 WREG32(reg_base + reg_offset, reg_val);
7710 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION13_GENERAL);
7711 WREG32(reg_base + reg_offset, reg_val);
7713 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION14_GENERAL);
7714 WREG32(reg_base + reg_offset, reg_val);
7717 static int gaudi2_arc_mmu_prepare_all(struct hl_device *hdev, u32 asid)
7719 int i;
7721 if (hdev->fw_components & FW_TYPE_BOOT_CPU)
7722 return hl_fw_cpucp_engine_core_asid_set(hdev, asid);
7724 for (i = CPU_ID_SCHED_ARC0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
7725 gaudi2_arc_mmu_prepare(hdev, i, asid);
7727 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
7728 if (!gaudi2_is_queue_enabled(hdev, i))
7729 continue;
7731 gaudi2_arc_mmu_prepare(hdev, gaudi2_queue_id_to_arc_id[i], asid);
7734 return 0;
7737 static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid)
7739 struct asic_fixed_properties *prop = &hdev->asic_prop;
7740 u32 rw_asid, offset;
7741 int rc, i;
7743 rw_asid = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_MASK, asid) |
7744 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_MASK, asid);
7746 WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
7747 WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
7748 WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_ASID, rw_asid);
7749 WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_MMU_BP, 0);
7751 WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
7752 WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
7753 WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_ASID, rw_asid);
7754 WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_MMU_BP, 0);
7756 /* ROT */
7757 for (i = 0 ; i < NUM_OF_ROT ; i++) {
7758 offset = i * ROT_OFFSET;
7759 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_ASID + offset, rw_asid);
7760 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
7761 RMWREG32(mmROT0_CPL_QUEUE_AWUSER + offset, asid, MMUBP_ASID_MASK);
7762 RMWREG32(mmROT0_DESC_HBW_ARUSER_LO + offset, asid, MMUBP_ASID_MASK);
7763 RMWREG32(mmROT0_DESC_HBW_AWUSER_LO + offset, asid, MMUBP_ASID_MASK);
7766 /* Shared Decoders are the last bits in the decoders mask */
7767 if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 0))
7768 gudi2_mmu_vdec_shared_prepare(hdev, 0, rw_asid, 0);
7770 if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 1))
7771 gudi2_mmu_vdec_shared_prepare(hdev, 1, rw_asid, 0);
7773 /* arc farm arc dup eng */
7774 for (i = 0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
7775 gudi2_mmu_arc_farm_arc_dup_eng_prepare(hdev, i, rw_asid, 0);
7777 rc = gaudi2_arc_mmu_prepare_all(hdev, asid);
7778 if (rc)
7779 return rc;
7781 return 0;
7784 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst, u32 offset,
7785 struct iterate_module_ctx *ctx)
7787 struct gaudi2_tpc_mmu_data *mmu_data = ctx->data;
7789 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0);
7790 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid);
7791 WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
7792 WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_ASID + offset, mmu_data->rw_asid);
7795 /* zero the MMUBP and set the ASID */
7796 static int gaudi2_mmu_prepare(struct hl_device *hdev, u32 asid)
7798 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7799 struct gaudi2_tpc_mmu_data tpc_mmu_data;
7800 struct iterate_module_ctx tpc_iter = {
7801 .fn = &gaudi2_tpc_mmu_prepare,
7802 .data = &tpc_mmu_data,
7804 int rc, i;
7806 if (asid & ~DCORE0_HMMU0_STLB_ASID_ASID_MASK) {
7807 dev_crit(hdev->dev, "asid %u is too big\n", asid);
7808 return -EINVAL;
7811 if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
7812 return 0;
7814 rc = gaudi2_mmu_shared_prepare(hdev, asid);
7815 if (rc)
7816 return rc;
7818 /* configure DCORE MMUs */
7819 tpc_mmu_data.rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
7820 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
7821 gaudi2_iterate_tpcs(hdev, &tpc_iter);
7822 for (i = 0 ; i < NUM_OF_DCORES ; i++)
7823 gaudi2_mmu_dcore_prepare(hdev, i, asid);
7825 return 0;
7828 static inline bool is_info_event(u32 event)
7830 switch (event) {
7831 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
7832 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
7833 case GAUDI2_EVENT_ARC_PWR_BRK_ENTRY ... GAUDI2_EVENT_ARC_PWR_RD_MODE3:
7835 /* return in case of NIC status event - these events are received periodically and not as
7836 * an indication to an error.
7838 case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1:
7839 case GAUDI2_EVENT_ARC_EQ_HEARTBEAT:
7840 return true;
7841 default:
7842 return false;
7846 static void gaudi2_print_event(struct hl_device *hdev, u16 event_type,
7847 bool ratelimited, const char *fmt, ...)
7849 struct va_format vaf;
7850 va_list args;
7852 va_start(args, fmt);
7853 vaf.fmt = fmt;
7854 vaf.va = &args;
7856 if (ratelimited)
7857 dev_err_ratelimited(hdev->dev, "%s: %pV\n",
7858 gaudi2_irq_map_table[event_type].valid ?
7859 gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
7860 else
7861 dev_err(hdev->dev, "%s: %pV\n",
7862 gaudi2_irq_map_table[event_type].valid ?
7863 gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
7865 va_end(args);
7868 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7869 struct hl_eq_ecc_data *ecc_data)
7871 u64 ecc_address = 0, ecc_syndrome = 0;
7872 u8 memory_wrapper_idx = 0;
7873 bool has_block_id = false;
7874 u16 block_id;
7876 if (hl_fw_version_cmp(hdev, 1, 12, 0) >= 0)
7877 has_block_id = true;
7879 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7880 ecc_syndrome = le64_to_cpu(ecc_data->ecc_syndrom);
7881 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7883 if (has_block_id) {
7884 block_id = le16_to_cpu(ecc_data->block_id);
7885 gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
7886 "ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. block id %#x. critical %u.",
7887 ecc_address, ecc_syndrome, memory_wrapper_idx, block_id,
7888 ecc_data->is_critical);
7889 } else {
7890 gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
7891 "ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. critical %u.",
7892 ecc_address, ecc_syndrome, memory_wrapper_idx, ecc_data->is_critical);
7895 return !!ecc_data->is_critical;
7898 static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u32 engine_id)
7900 struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
7901 u64 cq_ptr, cp_current_inst;
7902 u32 lo, hi, cq_size, cp_sts;
7903 bool is_arc_cq;
7905 cp_sts = RREG32(qman_base + QM_CP_STS_4_OFFSET);
7906 is_arc_cq = FIELD_GET(PDMA0_QM_CP_STS_CUR_CQ_MASK, cp_sts); /* 0 - legacy CQ, 1 - ARC_CQ */
7908 if (is_arc_cq) {
7909 lo = RREG32(qman_base + QM_ARC_CQ_PTR_LO_STS_OFFSET);
7910 hi = RREG32(qman_base + QM_ARC_CQ_PTR_HI_STS_OFFSET);
7911 cq_ptr = ((u64) hi) << 32 | lo;
7912 cq_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_STS_OFFSET);
7913 } else {
7914 lo = RREG32(qman_base + QM_CQ_PTR_LO_STS_4_OFFSET);
7915 hi = RREG32(qman_base + QM_CQ_PTR_HI_STS_4_OFFSET);
7916 cq_ptr = ((u64) hi) << 32 | lo;
7917 cq_size = RREG32(qman_base + QM_CQ_TSIZE_STS_4_OFFSET);
7920 lo = RREG32(qman_base + QM_CP_CURRENT_INST_LO_4_OFFSET);
7921 hi = RREG32(qman_base + QM_CP_CURRENT_INST_HI_4_OFFSET);
7922 cp_current_inst = ((u64) hi) << 32 | lo;
7924 dev_info(hdev->dev,
7925 "LowerQM. %sCQ: {ptr %#llx, size %u}, CP: {instruction %#018llx}\n",
7926 is_arc_cq ? "ARC_" : "", cq_ptr, cq_size, cp_current_inst);
7928 if (undef_opcode->write_enable) {
7929 memset(undef_opcode, 0, sizeof(*undef_opcode));
7930 undef_opcode->timestamp = ktime_get();
7931 undef_opcode->cq_addr = cq_ptr;
7932 undef_opcode->cq_size = cq_size;
7933 undef_opcode->engine_id = engine_id;
7934 undef_opcode->stream_id = QMAN_STREAMS;
7935 undef_opcode->write_enable = 0;
7939 static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type,
7940 u64 qman_base, u32 qid_base, u64 *event_mask)
7942 u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0;
7943 u64 glbl_sts_addr, arb_err_addr;
7944 char reg_desc[32];
7946 glbl_sts_addr = qman_base + (mmDCORE0_TPC0_QM_GLBL_ERR_STS_0 - mmDCORE0_TPC0_QM_BASE);
7947 arb_err_addr = qman_base + (mmDCORE0_TPC0_QM_ARB_ERR_CAUSE - mmDCORE0_TPC0_QM_BASE);
7949 /* Iterate through all stream GLBL_ERR_STS registers + Lower CP */
7950 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7951 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7953 if (!glbl_sts_val)
7954 continue;
7956 if (i == QMAN_STREAMS) {
7957 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerQM");
7958 num_error_causes = GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE;
7959 } else {
7960 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7961 num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE;
7964 for (j = 0 ; j < num_error_causes ; j++)
7965 if (glbl_sts_val & BIT(j)) {
7966 gaudi2_print_event(hdev, event_type, true,
7967 "%s. err cause: %s", reg_desc,
7968 i == QMAN_STREAMS ?
7969 gaudi2_lower_qman_error_cause[j] :
7970 gaudi2_qman_error_cause[j]);
7971 error_count++;
7974 /* Check for undefined opcode error in lower QM */
7975 if ((i == QMAN_STREAMS) &&
7976 (glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK)) {
7977 handle_lower_qman_data_on_err(hdev, qman_base,
7978 gaudi2_queue_id_to_engine_id[qid_base]);
7979 *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
7983 arb_err_val = RREG32(arb_err_addr);
7985 if (!arb_err_val)
7986 goto out;
7988 for (j = 0 ; j < GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7989 if (arb_err_val & BIT(j)) {
7990 gaudi2_print_event(hdev, event_type, true,
7991 "ARB_ERR. err cause: %s",
7992 gaudi2_qman_arb_error_cause[j]);
7993 error_count++;
7997 out:
7998 return error_count;
8001 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
8002 u64 rtr_mstr_if_base_addr, bool is_write, char *name,
8003 enum gaudi2_engine_id id, u64 *event_mask)
8005 u32 razwi_hi, razwi_lo, razwi_xy;
8006 u16 eng_id = id;
8007 u8 rd_wr_flag;
8009 if (is_write) {
8010 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HI);
8011 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_LO);
8012 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_XY);
8013 rd_wr_flag = HL_RAZWI_WRITE;
8014 } else {
8015 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI);
8016 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_LO);
8017 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_XY);
8018 rd_wr_flag = HL_RAZWI_READ;
8021 hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1,
8022 rd_wr_flag | HL_RAZWI_HBW, event_mask);
8024 dev_err_ratelimited(hdev->dev,
8025 "%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n",
8026 name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy);
8029 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
8030 u64 rtr_mstr_if_base_addr, bool is_write, char *name,
8031 enum gaudi2_engine_id id, u64 *event_mask)
8033 u64 razwi_addr = CFG_BASE;
8034 u32 razwi_xy;
8035 u16 eng_id = id;
8036 u8 rd_wr_flag;
8038 if (is_write) {
8039 razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI);
8040 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY);
8041 rd_wr_flag = HL_RAZWI_WRITE;
8042 } else {
8043 razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI);
8044 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY);
8045 rd_wr_flag = HL_RAZWI_READ;
8048 hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask);
8049 dev_err_ratelimited(hdev->dev,
8050 "%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%llX Initiator coordinates 0x%x\n",
8051 name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr,
8052 razwi_xy);
8055 static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev,
8056 enum razwi_event_sources module, u8 module_idx)
8058 switch (module) {
8059 case RAZWI_TPC:
8060 if (module_idx == (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES))
8061 return GAUDI2_DCORE0_ENGINE_ID_TPC_6;
8062 return (((module_idx / NUM_OF_TPC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
8063 (module_idx % NUM_OF_TPC_PER_DCORE) +
8064 (GAUDI2_DCORE0_ENGINE_ID_TPC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
8066 case RAZWI_MME:
8067 return ((GAUDI2_DCORE0_ENGINE_ID_MME - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) +
8068 (module_idx * ENGINE_ID_DCORE_OFFSET));
8070 case RAZWI_EDMA:
8071 return (((module_idx / NUM_OF_EDMA_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
8072 (module_idx % NUM_OF_EDMA_PER_DCORE));
8074 case RAZWI_PDMA:
8075 return (GAUDI2_ENGINE_ID_PDMA_0 + module_idx);
8077 case RAZWI_NIC:
8078 return (GAUDI2_ENGINE_ID_NIC0_0 + (NIC_NUMBER_OF_QM_PER_MACRO * module_idx));
8080 case RAZWI_DEC:
8081 if (module_idx == 8)
8082 return GAUDI2_PCIE_ENGINE_ID_DEC_0;
8084 if (module_idx == 9)
8085 return GAUDI2_PCIE_ENGINE_ID_DEC_1;
8087 return (((module_idx / NUM_OF_DEC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
8088 (module_idx % NUM_OF_DEC_PER_DCORE) +
8089 (GAUDI2_DCORE0_ENGINE_ID_DEC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
8091 case RAZWI_ROT:
8092 return GAUDI2_ENGINE_ID_ROT_0 + module_idx;
8094 case RAZWI_ARC_FARM:
8095 return GAUDI2_ENGINE_ID_ARC_FARM;
8097 default:
8098 return GAUDI2_ENGINE_ID_SIZE;
8103 * This function handles RR(Range register) hit events.
8104 * raised be initiators not PSOC RAZWI.
8106 static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
8107 enum razwi_event_sources module, u8 module_idx,
8108 u8 module_sub_idx, u64 *event_mask)
8110 bool via_sft = false;
8111 u32 hbw_rtr_id, lbw_rtr_id, dcore_id, dcore_rtr_id, eng_id, binned_idx;
8112 u64 hbw_rtr_mstr_if_base_addr, lbw_rtr_mstr_if_base_addr;
8113 u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0;
8114 u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0;
8115 char initiator_name[64];
8117 switch (module) {
8118 case RAZWI_TPC:
8119 sprintf(initiator_name, "TPC_%u", module_idx);
8120 if (hdev->tpc_binning) {
8121 binned_idx = __ffs(hdev->tpc_binning);
8122 if (binned_idx == module_idx)
8123 module_idx = TPC_ID_DCORE0_TPC6;
8126 hbw_rtr_id = gaudi2_tpc_initiator_hbw_rtr_id[module_idx];
8127 lbw_rtr_id = gaudi2_tpc_initiator_lbw_rtr_id[module_idx];
8128 break;
8129 case RAZWI_MME:
8130 sprintf(initiator_name, "MME_%u", module_idx);
8131 switch (module_sub_idx) {
8132 case MME_WAP0:
8133 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0;
8134 break;
8135 case MME_WAP1:
8136 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1;
8137 break;
8138 case MME_WRITE:
8139 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write;
8140 break;
8141 case MME_READ:
8142 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read;
8143 break;
8144 case MME_SBTE0:
8145 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0;
8146 break;
8147 case MME_SBTE1:
8148 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1;
8149 break;
8150 case MME_SBTE2:
8151 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2;
8152 break;
8153 case MME_SBTE3:
8154 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3;
8155 break;
8156 case MME_SBTE4:
8157 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4;
8158 break;
8159 default:
8160 return;
8162 lbw_rtr_id = hbw_rtr_id;
8163 break;
8164 case RAZWI_EDMA:
8165 hbw_rtr_mstr_if_base_addr = gaudi2_edma_initiator_hbw_sft[module_idx];
8166 dcore_id = module_idx / NUM_OF_EDMA_PER_DCORE;
8167 /* SFT has separate MSTR_IF for LBW, only there we can
8168 * read the LBW razwi related registers
8170 lbw_rtr_mstr_if_base_addr = mmSFT0_LBW_RTR_IF_MSTR_IF_RR_SHRD_HBW_BASE +
8171 dcore_id * SFT_DCORE_OFFSET;
8172 via_sft = true;
8173 sprintf(initiator_name, "EDMA_%u", module_idx);
8174 break;
8175 case RAZWI_PDMA:
8176 hbw_rtr_id = gaudi2_pdma_initiator_hbw_rtr_id[module_idx];
8177 lbw_rtr_id = gaudi2_pdma_initiator_lbw_rtr_id[module_idx];
8178 sprintf(initiator_name, "PDMA_%u", module_idx);
8179 break;
8180 case RAZWI_NIC:
8181 hbw_rtr_id = gaudi2_nic_initiator_hbw_rtr_id[module_idx];
8182 lbw_rtr_id = gaudi2_nic_initiator_lbw_rtr_id[module_idx];
8183 sprintf(initiator_name, "NIC_%u", module_idx);
8184 break;
8185 case RAZWI_DEC:
8186 sprintf(initiator_name, "DEC_%u", module_idx);
8187 if (hdev->decoder_binning) {
8188 binned_idx = __ffs(hdev->decoder_binning);
8189 if (binned_idx == module_idx)
8190 module_idx = DEC_ID_PCIE_VDEC1;
8192 hbw_rtr_id = gaudi2_dec_initiator_hbw_rtr_id[module_idx];
8193 lbw_rtr_id = gaudi2_dec_initiator_lbw_rtr_id[module_idx];
8194 break;
8195 case RAZWI_ROT:
8196 hbw_rtr_id = gaudi2_rot_initiator_hbw_rtr_id[module_idx];
8197 lbw_rtr_id = gaudi2_rot_initiator_lbw_rtr_id[module_idx];
8198 sprintf(initiator_name, "ROT_%u", module_idx);
8199 break;
8200 case RAZWI_ARC_FARM:
8201 lbw_rtr_id = DCORE1_RTR5;
8202 hbw_rtr_id = DCORE1_RTR7;
8203 sprintf(initiator_name, "ARC_FARM_%u", module_idx);
8204 break;
8205 default:
8206 return;
8209 /* Find router mstr_if register base */
8210 if (!via_sft) {
8211 dcore_id = hbw_rtr_id / NUM_OF_RTR_PER_DCORE;
8212 dcore_rtr_id = hbw_rtr_id % NUM_OF_RTR_PER_DCORE;
8213 hbw_rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE +
8214 dcore_id * DCORE_OFFSET +
8215 dcore_rtr_id * DCORE_RTR_OFFSET +
8216 RTR_MSTR_IF_OFFSET;
8217 lbw_rtr_mstr_if_base_addr = hbw_rtr_mstr_if_base_addr +
8218 (((s32)lbw_rtr_id - hbw_rtr_id) * DCORE_RTR_OFFSET);
8221 /* Find out event cause by reading "RAZWI_HAPPENED" registers */
8222 hbw_shrd_aw = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED);
8223 hbw_shrd_ar = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED);
8224 lbw_shrd_aw = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
8225 lbw_shrd_ar = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
8227 eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx);
8228 if (hbw_shrd_aw) {
8229 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, true,
8230 initiator_name, eng_id, event_mask);
8232 /* Clear event indication */
8233 WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw);
8236 if (hbw_shrd_ar) {
8237 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, false,
8238 initiator_name, eng_id, event_mask);
8240 /* Clear event indication */
8241 WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar);
8244 if (lbw_shrd_aw) {
8245 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, true,
8246 initiator_name, eng_id, event_mask);
8248 /* Clear event indication */
8249 WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw);
8252 if (lbw_shrd_ar) {
8253 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, false,
8254 initiator_name, eng_id, event_mask);
8256 /* Clear event indication */
8257 WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar);
8261 static void gaudi2_check_if_razwi_happened(struct hl_device *hdev)
8263 struct asic_fixed_properties *prop = &hdev->asic_prop;
8264 u8 mod_idx, sub_mod;
8266 /* check all TPCs */
8267 for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) {
8268 if (prop->tpc_enabled_mask & BIT(mod_idx))
8269 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL);
8272 /* check all MMEs */
8273 for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
8274 for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++)
8275 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx,
8276 sub_mod, NULL);
8278 /* check all EDMAs */
8279 for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
8280 if (prop->edma_enabled_mask & BIT(mod_idx))
8281 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL);
8283 /* check all PDMAs */
8284 for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++)
8285 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL);
8287 /* check all NICs */
8288 for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++)
8289 if (hdev->nic_ports_mask & BIT(mod_idx))
8290 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0,
8291 NULL);
8293 /* check all DECs */
8294 for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++)
8295 if (prop->decoder_enabled_mask & BIT(mod_idx))
8296 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL);
8298 /* check all ROTs */
8299 for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++)
8300 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL);
8303 static int gaudi2_psoc_razwi_get_engines(struct gaudi2_razwi_info *razwi_info, u32 array_size,
8304 u32 axuser_xy, u32 *base, u16 *eng_id,
8305 char *eng_name)
8308 int i, num_of_eng = 0;
8309 u16 str_size = 0;
8311 for (i = 0 ; i < array_size ; i++) {
8312 if (axuser_xy != razwi_info[i].axuser_xy)
8313 continue;
8315 eng_id[num_of_eng] = razwi_info[i].eng_id;
8316 base[num_of_eng] = razwi_info[i].rtr_ctrl;
8317 if (!num_of_eng)
8318 str_size += scnprintf(eng_name + str_size,
8319 PSOC_RAZWI_ENG_STR_SIZE - str_size, "%s",
8320 razwi_info[i].eng_name);
8321 else
8322 str_size += scnprintf(eng_name + str_size,
8323 PSOC_RAZWI_ENG_STR_SIZE - str_size, " or %s",
8324 razwi_info[i].eng_name);
8325 num_of_eng++;
8328 return num_of_eng;
8331 static bool gaudi2_handle_psoc_razwi_happened(struct hl_device *hdev, u32 razwi_reg,
8332 u64 *event_mask)
8334 u32 axuser_xy = RAZWI_GET_AXUSER_XY(razwi_reg), addr_hi = 0, addr_lo = 0;
8335 u32 base[PSOC_RAZWI_MAX_ENG_PER_RTR];
8336 u16 num_of_eng, eng_id[PSOC_RAZWI_MAX_ENG_PER_RTR];
8337 char eng_name_str[PSOC_RAZWI_ENG_STR_SIZE];
8338 bool razwi_happened = false;
8339 u64 addr;
8340 int i;
8342 num_of_eng = gaudi2_psoc_razwi_get_engines(common_razwi_info, ARRAY_SIZE(common_razwi_info),
8343 axuser_xy, base, eng_id, eng_name_str);
8345 /* If no match for XY coordinates, try to find it in MME razwi table */
8346 if (!num_of_eng) {
8347 axuser_xy = RAZWI_GET_AXUSER_LOW_XY(razwi_reg);
8348 num_of_eng = gaudi2_psoc_razwi_get_engines(mme_razwi_info,
8349 ARRAY_SIZE(mme_razwi_info),
8350 axuser_xy, base, eng_id,
8351 eng_name_str);
8354 for (i = 0 ; i < num_of_eng ; i++) {
8355 if (RREG32(base[i] + DEC_RAZWI_HBW_AW_SET)) {
8356 addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_HI);
8357 addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_LO);
8358 addr = ((u64)addr_hi << 32) + addr_lo;
8359 if (addr) {
8360 dev_err(hdev->dev,
8361 "PSOC HBW AW RAZWI: %s, address (aligned to 128 byte): 0x%llX\n",
8362 eng_name_str, addr);
8363 hl_handle_razwi(hdev, addr, &eng_id[0],
8364 num_of_eng, HL_RAZWI_HBW | HL_RAZWI_WRITE, event_mask);
8365 razwi_happened = true;
8369 if (RREG32(base[i] + DEC_RAZWI_HBW_AR_SET)) {
8370 addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_HI);
8371 addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_LO);
8372 addr = ((u64)addr_hi << 32) + addr_lo;
8373 if (addr) {
8374 dev_err(hdev->dev,
8375 "PSOC HBW AR RAZWI: %s, address (aligned to 128 byte): 0x%llX\n",
8376 eng_name_str, addr);
8377 hl_handle_razwi(hdev, addr, &eng_id[0],
8378 num_of_eng, HL_RAZWI_HBW | HL_RAZWI_READ, event_mask);
8379 razwi_happened = true;
8383 if (RREG32(base[i] + DEC_RAZWI_LBW_AW_SET)) {
8384 addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AW_ADDR);
8385 if (addr_lo) {
8386 dev_err(hdev->dev,
8387 "PSOC LBW AW RAZWI: %s, address (aligned to 128 byte): 0x%X\n",
8388 eng_name_str, addr_lo);
8389 hl_handle_razwi(hdev, addr_lo, &eng_id[0],
8390 num_of_eng, HL_RAZWI_LBW | HL_RAZWI_WRITE, event_mask);
8391 razwi_happened = true;
8395 if (RREG32(base[i] + DEC_RAZWI_LBW_AR_SET)) {
8396 addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AR_ADDR);
8397 if (addr_lo) {
8398 dev_err(hdev->dev,
8399 "PSOC LBW AR RAZWI: %s, address (aligned to 128 byte): 0x%X\n",
8400 eng_name_str, addr_lo);
8401 hl_handle_razwi(hdev, addr_lo, &eng_id[0],
8402 num_of_eng, HL_RAZWI_LBW | HL_RAZWI_READ, event_mask);
8403 razwi_happened = true;
8406 /* In common case the loop will break, when there is only one engine id, or
8407 * several engines with the same router. The exceptional case is with psoc razwi
8408 * from EDMA, where it's possible to get axuser id which fits 2 routers (2
8409 * interfaces of sft router). In this case, maybe the first router won't hold info
8410 * and we will need to iterate on the other router.
8412 if (razwi_happened)
8413 break;
8416 return razwi_happened;
8419 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */
8420 static int gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask)
8422 u32 razwi_mask_info, razwi_intr = 0, error_count = 0;
8424 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) {
8425 razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT);
8426 if (!razwi_intr)
8427 return 0;
8430 razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO);
8432 dev_err_ratelimited(hdev->dev,
8433 "PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n",
8434 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info),
8435 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info),
8436 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info),
8437 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info),
8438 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info));
8440 if (gaudi2_handle_psoc_razwi_happened(hdev, razwi_mask_info, event_mask))
8441 error_count++;
8442 else
8443 dev_err_ratelimited(hdev->dev,
8444 "PSOC RAZWI interrupt: invalid razwi info (0x%x)\n",
8445 razwi_mask_info);
8447 /* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */
8448 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX))
8449 WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr);
8451 return error_count;
8454 static int _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base, u16 event_type)
8456 u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8458 sts_val = RREG32(qman_base + QM_SEI_STATUS_OFFSET);
8460 for (i = 0 ; i < GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE ; i++) {
8461 if (sts_val & BIT(i)) {
8462 gaudi2_print_event(hdev, event_type, true,
8463 "err cause: %s", gaudi2_qm_sei_error_cause[i]);
8464 sts_clr_val |= BIT(i);
8465 error_count++;
8469 WREG32(qman_base + QM_SEI_STATUS_OFFSET, sts_clr_val);
8471 return error_count;
8474 static int gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type,
8475 bool extended_err_check, u64 *event_mask)
8477 enum razwi_event_sources module;
8478 u32 error_count = 0;
8479 u64 qman_base;
8480 u8 index;
8482 switch (event_type) {
8483 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC23_AXI_ERR_RSP:
8484 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
8485 qman_base = mmDCORE0_TPC0_QM_BASE +
8486 (index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET +
8487 (index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET;
8488 module = RAZWI_TPC;
8489 break;
8490 case GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
8491 qman_base = mmDCORE0_TPC6_QM_BASE;
8492 module = RAZWI_TPC;
8493 break;
8494 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
8495 case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
8496 case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
8497 case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
8498 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
8499 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
8500 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
8501 qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET;
8502 module = RAZWI_MME;
8503 break;
8504 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
8505 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
8506 index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP;
8507 qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET;
8508 module = RAZWI_PDMA;
8509 break;
8510 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
8511 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
8512 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
8513 qman_base = mmROT0_QM_BASE + index * ROT_OFFSET;
8514 module = RAZWI_ROT;
8515 break;
8516 default:
8517 return 0;
8520 error_count = _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
8522 /* There is a single event per NIC macro, so should check its both QMAN blocks */
8523 if (event_type >= GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE &&
8524 event_type <= GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE)
8525 error_count += _gaudi2_handle_qm_sei_err(hdev,
8526 qman_base + NIC_QM_OFFSET, event_type);
8528 if (extended_err_check) {
8529 /* check if RAZWI happened */
8530 gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, event_mask);
8531 hl_check_for_glbl_errors(hdev);
8534 return error_count;
8537 static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8539 u32 qid_base, error_count = 0;
8540 u64 qman_base;
8541 u8 index = 0;
8543 switch (event_type) {
8544 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM:
8545 index = event_type - GAUDI2_EVENT_TPC0_QM;
8546 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 + index * QMAN_STREAMS;
8547 qman_base = mmDCORE0_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8548 break;
8549 case GAUDI2_EVENT_TPC6_QM ... GAUDI2_EVENT_TPC11_QM:
8550 index = event_type - GAUDI2_EVENT_TPC6_QM;
8551 qid_base = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 + index * QMAN_STREAMS;
8552 qman_base = mmDCORE1_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8553 break;
8554 case GAUDI2_EVENT_TPC12_QM ... GAUDI2_EVENT_TPC17_QM:
8555 index = event_type - GAUDI2_EVENT_TPC12_QM;
8556 qid_base = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 + index * QMAN_STREAMS;
8557 qman_base = mmDCORE2_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8558 break;
8559 case GAUDI2_EVENT_TPC18_QM ... GAUDI2_EVENT_TPC23_QM:
8560 index = event_type - GAUDI2_EVENT_TPC18_QM;
8561 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 + index * QMAN_STREAMS;
8562 qman_base = mmDCORE3_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8563 break;
8564 case GAUDI2_EVENT_TPC24_QM:
8565 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
8566 qman_base = mmDCORE0_TPC6_QM_BASE;
8567 break;
8568 case GAUDI2_EVENT_MME0_QM:
8569 qid_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
8570 qman_base = mmDCORE0_MME_QM_BASE;
8571 break;
8572 case GAUDI2_EVENT_MME1_QM:
8573 qid_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
8574 qman_base = mmDCORE1_MME_QM_BASE;
8575 break;
8576 case GAUDI2_EVENT_MME2_QM:
8577 qid_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
8578 qman_base = mmDCORE2_MME_QM_BASE;
8579 break;
8580 case GAUDI2_EVENT_MME3_QM:
8581 qid_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
8582 qman_base = mmDCORE3_MME_QM_BASE;
8583 break;
8584 case GAUDI2_EVENT_HDMA0_QM:
8585 index = 0;
8586 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0;
8587 qman_base = mmDCORE0_EDMA0_QM_BASE;
8588 break;
8589 case GAUDI2_EVENT_HDMA1_QM:
8590 index = 1;
8591 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0;
8592 qman_base = mmDCORE0_EDMA1_QM_BASE;
8593 break;
8594 case GAUDI2_EVENT_HDMA2_QM:
8595 index = 2;
8596 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0;
8597 qman_base = mmDCORE1_EDMA0_QM_BASE;
8598 break;
8599 case GAUDI2_EVENT_HDMA3_QM:
8600 index = 3;
8601 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0;
8602 qman_base = mmDCORE1_EDMA1_QM_BASE;
8603 break;
8604 case GAUDI2_EVENT_HDMA4_QM:
8605 index = 4;
8606 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0;
8607 qman_base = mmDCORE2_EDMA0_QM_BASE;
8608 break;
8609 case GAUDI2_EVENT_HDMA5_QM:
8610 index = 5;
8611 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0;
8612 qman_base = mmDCORE2_EDMA1_QM_BASE;
8613 break;
8614 case GAUDI2_EVENT_HDMA6_QM:
8615 index = 6;
8616 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0;
8617 qman_base = mmDCORE3_EDMA0_QM_BASE;
8618 break;
8619 case GAUDI2_EVENT_HDMA7_QM:
8620 index = 7;
8621 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0;
8622 qman_base = mmDCORE3_EDMA1_QM_BASE;
8623 break;
8624 case GAUDI2_EVENT_PDMA0_QM:
8625 qid_base = GAUDI2_QUEUE_ID_PDMA_0_0;
8626 qman_base = mmPDMA0_QM_BASE;
8627 break;
8628 case GAUDI2_EVENT_PDMA1_QM:
8629 qid_base = GAUDI2_QUEUE_ID_PDMA_1_0;
8630 qman_base = mmPDMA1_QM_BASE;
8631 break;
8632 case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
8633 qid_base = GAUDI2_QUEUE_ID_ROT_0_0;
8634 qman_base = mmROT0_QM_BASE;
8635 break;
8636 case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
8637 qid_base = GAUDI2_QUEUE_ID_ROT_1_0;
8638 qman_base = mmROT1_QM_BASE;
8639 break;
8640 default:
8641 return 0;
8644 error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base,
8645 qid_base, event_mask);
8647 /* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */
8648 if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) {
8649 error_count += _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
8650 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, index, 0, event_mask);
8653 hl_check_for_glbl_errors(hdev);
8655 return error_count;
8658 static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8660 u32 i, sts_val, sts_clr_val, error_count = 0, arc_farm;
8662 for (arc_farm = 0 ; arc_farm < NUM_OF_ARC_FARMS_ARC ; arc_farm++) {
8663 sts_clr_val = 0;
8664 sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS +
8665 (arc_farm * ARC_FARM_OFFSET));
8667 for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) {
8668 if (sts_val & BIT(i)) {
8669 gaudi2_print_event(hdev, event_type, true,
8670 "ARC FARM ARC %u err cause: %s",
8671 arc_farm, gaudi2_arc_sei_error_cause[i]);
8672 sts_clr_val |= BIT(i);
8673 error_count++;
8676 WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR + (arc_farm * ARC_FARM_OFFSET),
8677 sts_clr_val);
8680 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ARC_FARM, 0, 0, event_mask);
8681 hl_check_for_glbl_errors(hdev);
8683 return error_count;
8686 static int gaudi2_handle_cpu_sei_err(struct hl_device *hdev, u16 event_type)
8688 u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8690 sts_val = RREG32(mmCPU_IF_CPU_SEI_INTR_STS);
8692 for (i = 0 ; i < GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE ; i++) {
8693 if (sts_val & BIT(i)) {
8694 gaudi2_print_event(hdev, event_type, true,
8695 "err cause: %s", gaudi2_cpu_sei_error_cause[i]);
8696 sts_clr_val |= BIT(i);
8697 error_count++;
8701 hl_check_for_glbl_errors(hdev);
8703 WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val);
8705 return error_count;
8708 static int gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index, u16 event_type,
8709 struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8710 u64 *event_mask)
8712 u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8713 u32 error_count = 0;
8714 int i;
8716 for (i = 0 ; i < GAUDI2_NUM_OF_ROT_ERR_CAUSE ; i++)
8717 if (intr_cause_data & BIT(i)) {
8718 gaudi2_print_event(hdev, event_type, true,
8719 "err cause: %s", guadi2_rot_error_cause[i]);
8720 error_count++;
8723 /* check if RAZWI happened */
8724 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0, event_mask);
8725 hl_check_for_glbl_errors(hdev);
8727 return error_count;
8730 static int gaudi2_tpc_ack_interrupts(struct hl_device *hdev, u8 tpc_index, u16 event_type,
8731 struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8732 u64 *event_mask)
8734 u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8735 u32 error_count = 0;
8736 int i;
8738 for (i = 0 ; i < GAUDI2_NUM_OF_TPC_INTR_CAUSE ; i++)
8739 if (intr_cause_data & BIT(i)) {
8740 gaudi2_print_event(hdev, event_type, true,
8741 "interrupt cause: %s", gaudi2_tpc_interrupts_cause[i]);
8742 error_count++;
8745 /* check if RAZWI happened */
8746 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0, event_mask);
8747 hl_check_for_glbl_errors(hdev);
8749 return error_count;
8752 static int gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, u16 event_type,
8753 u64 *event_mask)
8755 u32 sts_addr, sts_val, sts_clr_val = 0, error_count = 0;
8756 int i;
8758 if (dec_index < NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES)
8759 /* DCORE DEC */
8760 sts_addr = mmDCORE0_VDEC0_BRDG_CTRL_CAUSE_INTR +
8761 DCORE_OFFSET * (dec_index / NUM_OF_DEC_PER_DCORE) +
8762 DCORE_VDEC_OFFSET * (dec_index % NUM_OF_DEC_PER_DCORE);
8763 else
8764 /* PCIE DEC */
8765 sts_addr = mmPCIE_VDEC0_BRDG_CTRL_CAUSE_INTR + PCIE_VDEC_OFFSET *
8766 (dec_index - NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES);
8768 sts_val = RREG32(sts_addr);
8770 for (i = 0 ; i < GAUDI2_NUM_OF_DEC_ERR_CAUSE ; i++) {
8771 if (sts_val & BIT(i)) {
8772 gaudi2_print_event(hdev, event_type, true,
8773 "err cause: %s", gaudi2_dec_error_cause[i]);
8774 sts_clr_val |= BIT(i);
8775 error_count++;
8779 /* check if RAZWI happened */
8780 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, event_mask);
8781 hl_check_for_glbl_errors(hdev);
8783 /* Write 1 clear errors */
8784 WREG32(sts_addr, sts_clr_val);
8786 return error_count;
8789 static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8790 u64 *event_mask)
8792 u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8793 int i;
8795 sts_addr = mmDCORE0_MME_CTRL_LO_INTR_CAUSE + DCORE_OFFSET * mme_index;
8796 sts_clr_addr = mmDCORE0_MME_CTRL_LO_INTR_CLEAR + DCORE_OFFSET * mme_index;
8798 sts_val = RREG32(sts_addr);
8800 for (i = 0 ; i < GAUDI2_NUM_OF_MME_ERR_CAUSE ; i++) {
8801 if (sts_val & BIT(i)) {
8802 gaudi2_print_event(hdev, event_type, true,
8803 "err cause: %s", guadi2_mme_error_cause[i]);
8804 sts_clr_val |= BIT(i);
8805 error_count++;
8809 /* check if RAZWI happened */
8810 for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++)
8811 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, event_mask);
8813 hl_check_for_glbl_errors(hdev);
8815 WREG32(sts_clr_addr, sts_clr_val);
8817 return error_count;
8820 static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type)
8823 * We have a single error cause here but the report mechanism is
8824 * buggy. Hence there is no good reason to fetch the cause so we
8825 * just check for glbl_errors and exit.
8827 hl_check_for_glbl_errors(hdev);
8829 return GAUDI2_NA_EVENT_CAUSE;
8832 static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8833 u64 *event_mask)
8835 u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8836 int i;
8838 sts_addr = mmDCORE0_MME_ACC_INTR_CAUSE + DCORE_OFFSET * mme_index;
8839 sts_clr_addr = mmDCORE0_MME_ACC_INTR_CLEAR + DCORE_OFFSET * mme_index;
8841 sts_val = RREG32(sts_addr);
8843 for (i = 0 ; i < GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE ; i++) {
8844 if (sts_val & BIT(i)) {
8845 gaudi2_print_event(hdev, event_type, true,
8846 "err cause: %s", guadi2_mme_wap_error_cause[i]);
8847 sts_clr_val |= BIT(i);
8848 error_count++;
8852 /* check if RAZWI happened on WAP0/1 */
8853 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, event_mask);
8854 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, event_mask);
8855 hl_check_for_glbl_errors(hdev);
8857 WREG32(sts_clr_addr, sts_clr_val);
8859 return error_count;
8862 static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type,
8863 u64 intr_cause_data)
8865 u32 error_count = 0;
8866 int i;
8868 /* If an AXI read or write error is received, an error is reported and
8869 * interrupt message is sent. Due to an HW errata, when reading the cause
8870 * register of the KDMA engine, the reported error is always HBW even if
8871 * the actual error caused by a LBW KDMA transaction.
8873 for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8874 if (intr_cause_data & BIT(i)) {
8875 gaudi2_print_event(hdev, event_type, true,
8876 "err cause: %s", gaudi2_kdma_core_interrupts_cause[i]);
8877 error_count++;
8880 hl_check_for_glbl_errors(hdev);
8882 return error_count;
8885 static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type, u64 intr_cause)
8887 u32 error_count = 0;
8888 int i;
8890 for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8891 if (intr_cause & BIT(i)) {
8892 gaudi2_print_event(hdev, event_type, true,
8893 "err cause: %s", gaudi2_dma_core_interrupts_cause[i]);
8894 error_count++;
8897 hl_check_for_glbl_errors(hdev);
8899 return error_count;
8902 static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask)
8904 u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr;
8906 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED;
8907 if (RREG32(razwi_happened_addr)) {
8908 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8909 GAUDI2_ENGINE_ID_PCIE, event_mask);
8910 WREG32(razwi_happened_addr, 0x1);
8913 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED;
8914 if (RREG32(razwi_happened_addr)) {
8915 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8916 GAUDI2_ENGINE_ID_PCIE, event_mask);
8917 WREG32(razwi_happened_addr, 0x1);
8920 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED;
8921 if (RREG32(razwi_happened_addr)) {
8922 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8923 GAUDI2_ENGINE_ID_PCIE, event_mask);
8924 WREG32(razwi_happened_addr, 0x1);
8927 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED;
8928 if (RREG32(razwi_happened_addr)) {
8929 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8930 GAUDI2_ENGINE_ID_PCIE, event_mask);
8931 WREG32(razwi_happened_addr, 0x1);
8935 static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_type,
8936 u64 intr_cause_data, u64 *event_mask)
8938 u32 error_count = 0;
8939 int i;
8941 for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) {
8942 if (!(intr_cause_data & BIT_ULL(i)))
8943 continue;
8945 gaudi2_print_event(hdev, event_type, true,
8946 "err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]);
8947 error_count++;
8949 switch (intr_cause_data & BIT_ULL(i)) {
8950 case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK:
8951 hl_check_for_glbl_errors(hdev);
8952 break;
8953 case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK:
8954 gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask);
8955 break;
8959 return error_count;
8962 static int gaudi2_handle_pif_fatal(struct hl_device *hdev, u16 event_type,
8963 u64 intr_cause_data)
8966 u32 error_count = 0;
8967 int i;
8969 for (i = 0 ; i < GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE ; i++) {
8970 if (intr_cause_data & BIT_ULL(i)) {
8971 gaudi2_print_event(hdev, event_type, true,
8972 "err cause: %s", gaudi2_pmmu_fatal_interrupts_cause[i]);
8973 error_count++;
8977 return error_count;
8980 static int gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 intr_cause_data)
8982 u32 error_count = 0;
8983 int i;
8985 for (i = 0 ; i < GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE ; i++) {
8986 if (intr_cause_data & BIT_ULL(i)) {
8987 gaudi2_print_event(hdev, event_type, true,
8988 "err cause: %s", gaudi2_hif_fatal_interrupts_cause[i]);
8989 error_count++;
8993 return error_count;
8996 static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu,
8997 u64 *event_mask)
8999 u32 valid, val;
9000 u64 addr;
9002 valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
9004 if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_PAGE_ERR_VALID_ENTRY_MASK))
9005 return;
9007 val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE));
9008 addr = val & DCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA_63_32_MASK;
9009 addr <<= 32;
9010 addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA));
9012 if (is_pmmu) {
9013 dev_err_ratelimited(hdev->dev, "PMMU page fault on va 0x%llx\n", addr);
9014 } else {
9015 addr = gaudi2_mmu_descramble_addr(hdev, addr);
9016 addr &= HW_UNSCRAMBLED_BITS_MASK;
9017 dev_err_ratelimited(hdev->dev, "HMMU page fault on va range 0x%llx - 0x%llx\n",
9018 addr, addr + ~HW_UNSCRAMBLED_BITS_MASK);
9021 hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask);
9023 WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0);
9026 static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu)
9028 u32 valid, val;
9029 u64 addr;
9031 valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
9033 if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_ACCESS_ERR_VALID_ENTRY_MASK))
9034 return;
9036 val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE));
9037 addr = val & DCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA_63_32_MASK;
9038 addr <<= 32;
9039 addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA));
9041 if (!is_pmmu)
9042 addr = gaudi2_mmu_descramble_addr(hdev, addr);
9044 dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n",
9045 is_pmmu ? "PMMU" : "HMMU", addr);
9046 WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0);
9049 static int gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, u16 event_type,
9050 u64 mmu_base, bool is_pmmu, u64 *event_mask)
9052 u32 spi_sei_cause, interrupt_clr = 0x0, error_count = 0;
9053 int i;
9055 spi_sei_cause = RREG32(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET);
9057 for (i = 0 ; i < GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE ; i++) {
9058 if (spi_sei_cause & BIT(i)) {
9059 gaudi2_print_event(hdev, event_type, true,
9060 "err cause: %s", gaudi2_mmu_spi_sei[i].cause);
9062 if (i == 0)
9063 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, event_mask);
9064 else if (i == 1)
9065 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
9067 if (gaudi2_mmu_spi_sei[i].clear_bit >= 0)
9068 interrupt_clr |= BIT(gaudi2_mmu_spi_sei[i].clear_bit);
9070 error_count++;
9074 /* Clear cause */
9075 WREG32_AND(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET, ~spi_sei_cause);
9077 /* Clear interrupt */
9078 WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr);
9080 return error_count;
9083 static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_index)
9085 u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log,
9086 cq_intr_addr, cq_intr_val, cq_intr_queue_index, error_count = 0;
9087 int i;
9089 sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index;
9090 cq_intr_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_INTR + DCORE_OFFSET * sm_index;
9092 sei_cause_val = RREG32(sei_cause_addr);
9093 sei_cause_cause = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_CAUSE_MASK, sei_cause_val);
9094 cq_intr_val = RREG32(cq_intr_addr);
9096 /* SEI interrupt */
9097 if (sei_cause_cause) {
9098 /* There are corresponding SEI_CAUSE_log bits for every SEI_CAUSE_cause bit */
9099 sei_cause_log = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_LOG_MASK,
9100 sei_cause_val);
9102 for (i = 0 ; i < GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE ; i++) {
9103 if (!(sei_cause_cause & BIT(i)))
9104 continue;
9106 gaudi2_print_event(hdev, event_type, true,
9107 "err cause: %s. %s: 0x%X",
9108 gaudi2_sm_sei_cause[i].cause_name,
9109 gaudi2_sm_sei_cause[i].log_name,
9110 sei_cause_log);
9111 error_count++;
9112 break;
9115 /* Clear SM_SEI_CAUSE */
9116 WREG32(sei_cause_addr, 0);
9119 /* CQ interrupt */
9120 if (cq_intr_val & DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_SEC_INTR_MASK) {
9121 cq_intr_queue_index =
9122 FIELD_GET(DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_INTR_QUEUE_INDEX_MASK,
9123 cq_intr_val);
9125 dev_err_ratelimited(hdev->dev, "SM%u err. err cause: CQ_INTR. queue index: %u\n",
9126 sm_index, cq_intr_queue_index);
9127 error_count++;
9129 /* Clear CQ_INTR */
9130 WREG32(cq_intr_addr, 0);
9133 hl_check_for_glbl_errors(hdev);
9135 return error_count;
9138 static u64 get_hmmu_base(u16 event_type)
9140 u8 dcore, index_in_dcore;
9142 switch (event_type) {
9143 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP:
9144 case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU0_SECURITY_ERROR:
9145 dcore = 0;
9146 index_in_dcore = 0;
9147 break;
9148 case GAUDI2_EVENT_HMMU_1_AXI_ERR_RSP:
9149 case GAUDI2_EVENT_HMMU1_SPI_BASE ... GAUDI2_EVENT_HMMU1_SECURITY_ERROR:
9150 dcore = 1;
9151 index_in_dcore = 0;
9152 break;
9153 case GAUDI2_EVENT_HMMU_2_AXI_ERR_RSP:
9154 case GAUDI2_EVENT_HMMU2_SPI_BASE ... GAUDI2_EVENT_HMMU2_SECURITY_ERROR:
9155 dcore = 0;
9156 index_in_dcore = 1;
9157 break;
9158 case GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP:
9159 case GAUDI2_EVENT_HMMU3_SPI_BASE ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR:
9160 dcore = 1;
9161 index_in_dcore = 1;
9162 break;
9163 case GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP:
9164 case GAUDI2_EVENT_HMMU4_SPI_BASE ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR:
9165 dcore = 3;
9166 index_in_dcore = 2;
9167 break;
9168 case GAUDI2_EVENT_HMMU_5_AXI_ERR_RSP:
9169 case GAUDI2_EVENT_HMMU5_SPI_BASE ... GAUDI2_EVENT_HMMU5_SECURITY_ERROR:
9170 dcore = 2;
9171 index_in_dcore = 2;
9172 break;
9173 case GAUDI2_EVENT_HMMU_6_AXI_ERR_RSP:
9174 case GAUDI2_EVENT_HMMU6_SPI_BASE ... GAUDI2_EVENT_HMMU6_SECURITY_ERROR:
9175 dcore = 3;
9176 index_in_dcore = 3;
9177 break;
9178 case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP:
9179 case GAUDI2_EVENT_HMMU7_SPI_BASE ... GAUDI2_EVENT_HMMU7_SECURITY_ERROR:
9180 dcore = 2;
9181 index_in_dcore = 3;
9182 break;
9183 case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP:
9184 case GAUDI2_EVENT_HMMU8_SPI_BASE ... GAUDI2_EVENT_HMMU8_SECURITY_ERROR:
9185 dcore = 0;
9186 index_in_dcore = 2;
9187 break;
9188 case GAUDI2_EVENT_HMMU_9_AXI_ERR_RSP:
9189 case GAUDI2_EVENT_HMMU9_SPI_BASE ... GAUDI2_EVENT_HMMU9_SECURITY_ERROR:
9190 dcore = 1;
9191 index_in_dcore = 2;
9192 break;
9193 case GAUDI2_EVENT_HMMU_10_AXI_ERR_RSP:
9194 case GAUDI2_EVENT_HMMU10_SPI_BASE ... GAUDI2_EVENT_HMMU10_SECURITY_ERROR:
9195 dcore = 0;
9196 index_in_dcore = 3;
9197 break;
9198 case GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP:
9199 case GAUDI2_EVENT_HMMU11_SPI_BASE ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR:
9200 dcore = 1;
9201 index_in_dcore = 3;
9202 break;
9203 case GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9204 case GAUDI2_EVENT_HMMU12_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9205 dcore = 3;
9206 index_in_dcore = 0;
9207 break;
9208 case GAUDI2_EVENT_HMMU_13_AXI_ERR_RSP:
9209 case GAUDI2_EVENT_HMMU13_SPI_BASE ... GAUDI2_EVENT_HMMU13_SECURITY_ERROR:
9210 dcore = 2;
9211 index_in_dcore = 0;
9212 break;
9213 case GAUDI2_EVENT_HMMU_14_AXI_ERR_RSP:
9214 case GAUDI2_EVENT_HMMU14_SPI_BASE ... GAUDI2_EVENT_HMMU14_SECURITY_ERROR:
9215 dcore = 3;
9216 index_in_dcore = 1;
9217 break;
9218 case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP:
9219 case GAUDI2_EVENT_HMMU15_SPI_BASE ... GAUDI2_EVENT_HMMU15_SECURITY_ERROR:
9220 dcore = 2;
9221 index_in_dcore = 1;
9222 break;
9223 default:
9224 return ULONG_MAX;
9227 return mmDCORE0_HMMU0_MMU_BASE + dcore * DCORE_OFFSET + index_in_dcore * DCORE_HMMU_OFFSET;
9230 static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
9232 bool is_pmmu = false;
9233 u32 error_count = 0;
9234 u64 mmu_base;
9236 switch (event_type) {
9237 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9238 case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9239 mmu_base = get_hmmu_base(event_type);
9240 break;
9242 case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
9243 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
9244 is_pmmu = true;
9245 mmu_base = mmPMMU_HBW_MMU_BASE;
9246 break;
9247 default:
9248 return 0;
9251 if (mmu_base == ULONG_MAX)
9252 return 0;
9254 error_count = gaudi2_handle_mmu_spi_sei_generic(hdev, event_type, mmu_base,
9255 is_pmmu, event_mask);
9256 hl_check_for_glbl_errors(hdev);
9258 return error_count;
9262 /* returns true if hard reset is required (ECC DERR or Read parity), false otherwise (ECC SERR) */
9263 static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
9264 struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt)
9266 bool require_hard_reset = false;
9267 u32 addr, beat, beat_shift;
9269 dev_err_ratelimited(hdev->dev,
9270 "READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n",
9271 FIELD_GET(HBM_ECC_SERR_CNTR_MASK, err_cnt),
9272 FIELD_GET(HBM_ECC_DERR_CNTR_MASK, err_cnt),
9273 FIELD_GET(HBM_RD_PARITY_CNTR_MASK, err_cnt));
9275 addr = le32_to_cpu(rd_err_data->dbg_rd_err_addr.rd_addr_val);
9276 dev_err_ratelimited(hdev->dev,
9277 "READ ERROR address: sid(%u), bg(%u), ba(%u), col(%u), row(%u)\n",
9278 FIELD_GET(HBM_RD_ADDR_SID_MASK, addr),
9279 FIELD_GET(HBM_RD_ADDR_BG_MASK, addr),
9280 FIELD_GET(HBM_RD_ADDR_BA_MASK, addr),
9281 FIELD_GET(HBM_RD_ADDR_COL_MASK, addr),
9282 FIELD_GET(HBM_RD_ADDR_ROW_MASK, addr));
9284 /* For each beat (RDQS edge), look for possible errors and print relevant info */
9285 for (beat = 0 ; beat < 4 ; beat++) {
9286 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9287 (HBM_RD_ERR_SERR_BEAT0_MASK << beat))
9288 dev_err_ratelimited(hdev->dev, "Beat%d ECC SERR: DM: %#x, Syndrome: %#x\n",
9289 beat,
9290 le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9291 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
9293 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9294 (HBM_RD_ERR_DERR_BEAT0_MASK << beat)) {
9295 dev_err_ratelimited(hdev->dev, "Beat%d ECC DERR: DM: %#x, Syndrome: %#x\n",
9296 beat,
9297 le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9298 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
9299 require_hard_reset = true;
9302 beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT;
9303 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9304 (HBM_RD_ERR_PAR_ERR_BEAT0_MASK << beat_shift)) {
9305 dev_err_ratelimited(hdev->dev,
9306 "Beat%d read PARITY: DM: %#x, PAR data: %#x\n",
9307 beat,
9308 le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9309 (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9310 (HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >>
9311 (HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift));
9312 require_hard_reset = true;
9315 dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat);
9316 dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
9317 le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2]));
9318 dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
9319 le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1]));
9322 return require_hard_reset;
9325 static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev,
9326 struct hl_eq_hbm_sei_wr_par_intr_info *wr_par_err_data, u32 err_cnt)
9328 struct hbm_sei_wr_cmd_address *wr_cmd_addr = wr_par_err_data->dbg_last_wr_cmds;
9329 u32 i, curr_addr, derr = wr_par_err_data->dbg_derr;
9331 dev_err_ratelimited(hdev->dev, "WRITE PARITY ERROR count: %d\n", err_cnt);
9333 dev_err_ratelimited(hdev->dev, "CK-0 DERR: 0x%02x, CK-1 DERR: 0x%02x\n",
9334 derr & 0x3, derr & 0xc);
9336 /* JIRA H6-3286 - the following prints may not be valid */
9337 dev_err_ratelimited(hdev->dev, "Last latched write commands addresses:\n");
9338 for (i = 0 ; i < HBM_WR_PAR_CMD_LIFO_LEN ; i++) {
9339 curr_addr = le32_to_cpu(wr_cmd_addr[i].dbg_wr_cmd_addr);
9340 dev_err_ratelimited(hdev->dev,
9341 "\twrite cmd[%u]: Address: SID(%u) BG(%u) BA(%u) COL(%u).\n",
9343 FIELD_GET(WR_PAR_LAST_CMD_SID_MASK, curr_addr),
9344 FIELD_GET(WR_PAR_LAST_CMD_BG_MASK, curr_addr),
9345 FIELD_GET(WR_PAR_LAST_CMD_BA_MASK, curr_addr),
9346 FIELD_GET(WR_PAR_LAST_CMD_COL_MASK, curr_addr));
9350 static void gaudi2_hbm_sei_print_ca_par_info(struct hl_device *hdev,
9351 struct hl_eq_hbm_sei_ca_par_intr_info *ca_par_err_data, u32 err_cnt)
9353 __le32 *col_cmd = ca_par_err_data->dbg_col;
9354 __le16 *row_cmd = ca_par_err_data->dbg_row;
9355 u32 i;
9357 dev_err_ratelimited(hdev->dev, "CA ERROR count: %d\n", err_cnt);
9359 dev_err_ratelimited(hdev->dev, "Last latched C&R bus commands:\n");
9360 for (i = 0 ; i < HBM_CA_ERR_CMD_LIFO_LEN ; i++)
9361 dev_err_ratelimited(hdev->dev, "cmd%u: ROW(0x%04x) COL(0x%05x)\n", i,
9362 le16_to_cpu(row_cmd[i]) & (u16)GENMASK(13, 0),
9363 le32_to_cpu(col_cmd[i]) & (u32)GENMASK(17, 0));
9366 /* Returns true if hard reset is needed or false otherwise */
9367 static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type,
9368 struct hl_eq_hbm_sei_data *sei_data)
9370 bool require_hard_reset = false;
9371 u32 hbm_id, mc_id, cause_idx;
9373 hbm_id = (event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 4;
9374 mc_id = ((event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 2) % 2;
9376 cause_idx = sei_data->hdr.sei_cause;
9377 if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) {
9378 gaudi2_print_event(hdev, event_type, true,
9379 "err cause: %s",
9380 "Invalid HBM SEI event cause (%d) provided by FW", cause_idx);
9381 return true;
9384 gaudi2_print_event(hdev, event_type, !sei_data->hdr.is_critical,
9385 "System %s Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s",
9386 sei_data->hdr.is_critical ? "Critical" : "Non-critical",
9387 hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
9388 hbm_mc_sei_cause[cause_idx]);
9390 /* Print error-specific info */
9391 switch (cause_idx) {
9392 case HBM_SEI_CATTRIP:
9393 require_hard_reset = true;
9394 break;
9396 case HBM_SEI_CMD_PARITY_EVEN:
9397 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_even_info,
9398 le32_to_cpu(sei_data->hdr.cnt));
9399 require_hard_reset = true;
9400 break;
9402 case HBM_SEI_CMD_PARITY_ODD:
9403 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_odd_info,
9404 le32_to_cpu(sei_data->hdr.cnt));
9405 require_hard_reset = true;
9406 break;
9408 case HBM_SEI_WRITE_DATA_PARITY_ERR:
9409 gaudi2_hbm_sei_print_wr_par_info(hdev, &sei_data->wr_parity_info,
9410 le32_to_cpu(sei_data->hdr.cnt));
9411 require_hard_reset = true;
9412 break;
9414 case HBM_SEI_READ_ERR:
9415 /* Unlike other SEI events, read error requires further processing of the
9416 * raw data in order to determine the root cause.
9418 require_hard_reset = gaudi2_hbm_sei_handle_read_err(hdev,
9419 &sei_data->read_err_info,
9420 le32_to_cpu(sei_data->hdr.cnt));
9421 break;
9423 default:
9424 break;
9427 require_hard_reset |= !!sei_data->hdr.is_critical;
9429 return require_hard_reset;
9432 static int gaudi2_handle_hbm_cattrip(struct hl_device *hdev, u16 event_type,
9433 u64 intr_cause_data)
9435 if (intr_cause_data) {
9436 gaudi2_print_event(hdev, event_type, true,
9437 "temperature error cause: %#llx", intr_cause_data);
9438 return 1;
9441 return 0;
9444 static int gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data)
9446 u32 i, error_count = 0;
9448 for (i = 0 ; i < GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE ; i++)
9449 if (intr_cause_data & hbm_mc_spi[i].mask) {
9450 dev_dbg(hdev->dev, "HBM spi event: notification cause(%s)\n",
9451 hbm_mc_spi[i].cause);
9452 error_count++;
9455 return error_count;
9458 static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
9460 ktime_t zero_time = ktime_set(0, 0);
9462 mutex_lock(&hdev->clk_throttling.lock);
9464 switch (event_type) {
9465 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
9466 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
9467 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
9468 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
9469 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
9470 dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n");
9471 break;
9473 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
9474 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
9475 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
9476 dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n");
9477 break;
9479 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
9480 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
9481 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
9482 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
9483 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
9484 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9485 dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n");
9486 break;
9488 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
9489 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
9490 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
9491 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9492 dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n");
9493 break;
9495 default:
9496 dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type);
9497 break;
9500 mutex_unlock(&hdev->clk_throttling.lock);
9503 static void gaudi2_print_out_of_sync_info(struct hl_device *hdev, u16 event_type,
9504 struct cpucp_pkt_sync_err *sync_err)
9506 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
9508 gaudi2_print_event(hdev, event_type, false,
9509 "FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d",
9510 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci),
9511 q->pi, atomic_read(&q->ci));
9514 static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type)
9516 u32 p2p_intr, msix_gw_intr, error_count = 0;
9518 p2p_intr = RREG32(mmPCIE_WRAP_P2P_INTR);
9519 msix_gw_intr = RREG32(mmPCIE_WRAP_MSIX_GW_INTR);
9521 if (p2p_intr) {
9522 gaudi2_print_event(hdev, event_type, true,
9523 "pcie p2p transaction terminated due to security, req_id(0x%x)",
9524 RREG32(mmPCIE_WRAP_P2P_REQ_ID));
9526 WREG32(mmPCIE_WRAP_P2P_INTR, 0x1);
9527 error_count++;
9530 if (msix_gw_intr) {
9531 gaudi2_print_event(hdev, event_type, true,
9532 "pcie msi-x gen denied due to vector num check failure, vec(0x%X)",
9533 RREG32(mmPCIE_WRAP_MSIX_GW_VEC));
9535 WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1);
9536 error_count++;
9539 return error_count;
9542 static int gaudi2_handle_pcie_drain(struct hl_device *hdev,
9543 struct hl_eq_pcie_drain_ind_data *drain_data)
9545 u64 cause, error_count = 0;
9547 cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data);
9549 if (cause & BIT_ULL(0)) {
9550 dev_err_ratelimited(hdev->dev, "PCIE AXI drain LBW completed\n");
9551 error_count++;
9554 if (cause & BIT_ULL(1)) {
9555 dev_err_ratelimited(hdev->dev, "PCIE AXI drain HBW completed\n");
9556 error_count++;
9559 return error_count;
9562 static int gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data)
9564 u32 error_count = 0;
9565 int i;
9567 for (i = 0 ; i < GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE ; i++) {
9568 if (intr_cause_data & BIT_ULL(i)) {
9569 dev_err_ratelimited(hdev->dev, "PSOC %s completed\n",
9570 gaudi2_psoc_axi_drain_interrupts_cause[i]);
9571 error_count++;
9575 hl_check_for_glbl_errors(hdev);
9577 return error_count;
9580 static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev, u16 event_type,
9581 struct cpucp_pkt_sync_err *sync_err)
9583 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
9585 gaudi2_print_event(hdev, event_type, false,
9586 "FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d",
9587 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
9590 static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type,
9591 struct hl_eq_engine_arc_intr_data *data)
9593 struct hl_engine_arc_dccm_queue_full_irq *q;
9594 u32 intr_type, engine_id;
9595 u64 payload;
9597 intr_type = le32_to_cpu(data->intr_type);
9598 engine_id = le32_to_cpu(data->engine_id);
9599 payload = le64_to_cpu(data->payload);
9601 switch (intr_type) {
9602 case ENGINE_ARC_DCCM_QUEUE_FULL_IRQ:
9603 q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload;
9605 gaudi2_print_event(hdev, event_type, true,
9606 "ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u",
9607 engine_id, intr_type, q->queue_index);
9608 return 1;
9609 default:
9610 gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type");
9611 return 0;
9615 static u16 event_id_to_engine_id(struct hl_device *hdev, u16 event_type)
9617 enum gaudi2_block_types type = GAUDI2_BLOCK_TYPE_MAX;
9618 u16 index;
9620 switch (event_type) {
9621 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
9622 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
9623 type = GAUDI2_BLOCK_TYPE_TPC;
9624 break;
9625 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC24_QM:
9626 index = event_type - GAUDI2_EVENT_TPC0_QM;
9627 type = GAUDI2_BLOCK_TYPE_TPC;
9628 break;
9629 case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
9630 case GAUDI2_EVENT_MME0_SPI_BASE ... GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
9631 case GAUDI2_EVENT_MME0_QM:
9632 index = 0;
9633 type = GAUDI2_BLOCK_TYPE_MME;
9634 break;
9635 case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
9636 case GAUDI2_EVENT_MME1_SPI_BASE ... GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
9637 case GAUDI2_EVENT_MME1_QM:
9638 index = 1;
9639 type = GAUDI2_BLOCK_TYPE_MME;
9640 break;
9641 case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
9642 case GAUDI2_EVENT_MME2_SPI_BASE ... GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
9643 case GAUDI2_EVENT_MME2_QM:
9644 index = 2;
9645 type = GAUDI2_BLOCK_TYPE_MME;
9646 break;
9647 case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
9648 case GAUDI2_EVENT_MME3_SPI_BASE ... GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
9649 case GAUDI2_EVENT_MME3_QM:
9650 index = 3;
9651 type = GAUDI2_BLOCK_TYPE_MME;
9652 break;
9653 case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
9654 case GAUDI2_EVENT_KDMA_BM_SPMU:
9655 case GAUDI2_EVENT_KDMA0_CORE:
9656 return GAUDI2_ENGINE_ID_KDMA;
9657 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
9658 case GAUDI2_EVENT_PDMA0_CORE:
9659 case GAUDI2_EVENT_PDMA0_BM_SPMU:
9660 case GAUDI2_EVENT_PDMA0_QM:
9661 return GAUDI2_ENGINE_ID_PDMA_0;
9662 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
9663 case GAUDI2_EVENT_PDMA1_CORE:
9664 case GAUDI2_EVENT_PDMA1_BM_SPMU:
9665 case GAUDI2_EVENT_PDMA1_QM:
9666 return GAUDI2_ENGINE_ID_PDMA_1;
9667 case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
9668 index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
9669 type = GAUDI2_BLOCK_TYPE_DEC;
9670 break;
9671 case GAUDI2_EVENT_DEC0_SPI ... GAUDI2_EVENT_DEC9_BMON_SPMU:
9672 index = (event_type - GAUDI2_EVENT_DEC0_SPI) >> 1;
9673 type = GAUDI2_BLOCK_TYPE_DEC;
9674 break;
9675 case GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE:
9676 index = event_type - GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE;
9677 return GAUDI2_ENGINE_ID_NIC0_0 + (index * 2);
9678 case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
9679 index = event_type - GAUDI2_EVENT_NIC0_QM0;
9680 return GAUDI2_ENGINE_ID_NIC0_0 + index;
9681 case GAUDI2_EVENT_NIC0_BMON_SPMU ... GAUDI2_EVENT_NIC11_SW_ERROR:
9682 index = event_type - GAUDI2_EVENT_NIC0_BMON_SPMU;
9683 return GAUDI2_ENGINE_ID_NIC0_0 + (index * 2);
9684 case GAUDI2_EVENT_TPC0_BMON_SPMU ... GAUDI2_EVENT_TPC24_KERNEL_ERR:
9685 index = (event_type - GAUDI2_EVENT_TPC0_BMON_SPMU) >> 1;
9686 type = GAUDI2_BLOCK_TYPE_TPC;
9687 break;
9688 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
9689 case GAUDI2_EVENT_ROTATOR0_BMON_SPMU:
9690 case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
9691 return GAUDI2_ENGINE_ID_ROT_0;
9692 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
9693 case GAUDI2_EVENT_ROTATOR1_BMON_SPMU:
9694 case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
9695 return GAUDI2_ENGINE_ID_ROT_1;
9696 case GAUDI2_EVENT_HDMA0_BM_SPMU:
9697 case GAUDI2_EVENT_HDMA0_QM:
9698 case GAUDI2_EVENT_HDMA0_CORE:
9699 return GAUDI2_DCORE0_ENGINE_ID_EDMA_0;
9700 case GAUDI2_EVENT_HDMA1_BM_SPMU:
9701 case GAUDI2_EVENT_HDMA1_QM:
9702 case GAUDI2_EVENT_HDMA1_CORE:
9703 return GAUDI2_DCORE0_ENGINE_ID_EDMA_1;
9704 case GAUDI2_EVENT_HDMA2_BM_SPMU:
9705 case GAUDI2_EVENT_HDMA2_QM:
9706 case GAUDI2_EVENT_HDMA2_CORE:
9707 return GAUDI2_DCORE1_ENGINE_ID_EDMA_0;
9708 case GAUDI2_EVENT_HDMA3_BM_SPMU:
9709 case GAUDI2_EVENT_HDMA3_QM:
9710 case GAUDI2_EVENT_HDMA3_CORE:
9711 return GAUDI2_DCORE1_ENGINE_ID_EDMA_1;
9712 case GAUDI2_EVENT_HDMA4_BM_SPMU:
9713 case GAUDI2_EVENT_HDMA4_QM:
9714 case GAUDI2_EVENT_HDMA4_CORE:
9715 return GAUDI2_DCORE2_ENGINE_ID_EDMA_0;
9716 case GAUDI2_EVENT_HDMA5_BM_SPMU:
9717 case GAUDI2_EVENT_HDMA5_QM:
9718 case GAUDI2_EVENT_HDMA5_CORE:
9719 return GAUDI2_DCORE2_ENGINE_ID_EDMA_1;
9720 case GAUDI2_EVENT_HDMA6_BM_SPMU:
9721 case GAUDI2_EVENT_HDMA6_QM:
9722 case GAUDI2_EVENT_HDMA6_CORE:
9723 return GAUDI2_DCORE3_ENGINE_ID_EDMA_0;
9724 case GAUDI2_EVENT_HDMA7_BM_SPMU:
9725 case GAUDI2_EVENT_HDMA7_QM:
9726 case GAUDI2_EVENT_HDMA7_CORE:
9727 return GAUDI2_DCORE3_ENGINE_ID_EDMA_1;
9728 default:
9729 break;
9732 switch (type) {
9733 case GAUDI2_BLOCK_TYPE_TPC:
9734 switch (index) {
9735 case TPC_ID_DCORE0_TPC0 ... TPC_ID_DCORE0_TPC5:
9736 return GAUDI2_DCORE0_ENGINE_ID_TPC_0 + index;
9737 case TPC_ID_DCORE1_TPC0 ... TPC_ID_DCORE1_TPC5:
9738 return GAUDI2_DCORE1_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE1_TPC0;
9739 case TPC_ID_DCORE2_TPC0 ... TPC_ID_DCORE2_TPC5:
9740 return GAUDI2_DCORE2_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE2_TPC0;
9741 case TPC_ID_DCORE3_TPC0 ... TPC_ID_DCORE3_TPC5:
9742 return GAUDI2_DCORE3_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE3_TPC0;
9743 default:
9744 break;
9746 break;
9747 case GAUDI2_BLOCK_TYPE_MME:
9748 switch (index) {
9749 case MME_ID_DCORE0: return GAUDI2_DCORE0_ENGINE_ID_MME;
9750 case MME_ID_DCORE1: return GAUDI2_DCORE1_ENGINE_ID_MME;
9751 case MME_ID_DCORE2: return GAUDI2_DCORE2_ENGINE_ID_MME;
9752 case MME_ID_DCORE3: return GAUDI2_DCORE3_ENGINE_ID_MME;
9753 default:
9754 break;
9756 break;
9757 case GAUDI2_BLOCK_TYPE_DEC:
9758 switch (index) {
9759 case DEC_ID_DCORE0_DEC0: return GAUDI2_DCORE0_ENGINE_ID_DEC_0;
9760 case DEC_ID_DCORE0_DEC1: return GAUDI2_DCORE0_ENGINE_ID_DEC_1;
9761 case DEC_ID_DCORE1_DEC0: return GAUDI2_DCORE1_ENGINE_ID_DEC_0;
9762 case DEC_ID_DCORE1_DEC1: return GAUDI2_DCORE1_ENGINE_ID_DEC_1;
9763 case DEC_ID_DCORE2_DEC0: return GAUDI2_DCORE2_ENGINE_ID_DEC_0;
9764 case DEC_ID_DCORE2_DEC1: return GAUDI2_DCORE2_ENGINE_ID_DEC_1;
9765 case DEC_ID_DCORE3_DEC0: return GAUDI2_DCORE3_ENGINE_ID_DEC_0;
9766 case DEC_ID_DCORE3_DEC1: return GAUDI2_DCORE3_ENGINE_ID_DEC_1;
9767 case DEC_ID_PCIE_VDEC0: return GAUDI2_PCIE_ENGINE_ID_DEC_0;
9768 case DEC_ID_PCIE_VDEC1: return GAUDI2_PCIE_ENGINE_ID_DEC_1;
9769 default:
9770 break;
9772 break;
9773 default:
9774 break;
9777 return U16_MAX;
9780 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
9782 struct gaudi2_device *gaudi2 = hdev->asic_specific;
9783 bool reset_required = false, is_critical = false;
9784 u32 index, ctl, reset_flags = 0, error_count = 0;
9785 u64 event_mask = 0;
9786 u16 event_type;
9788 ctl = le32_to_cpu(eq_entry->hdr.ctl);
9789 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT);
9791 if (event_type >= GAUDI2_EVENT_SIZE) {
9792 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
9793 event_type, GAUDI2_EVENT_SIZE - 1);
9794 return;
9797 gaudi2->events_stat[event_type]++;
9798 gaudi2->events_stat_aggregate[event_type]++;
9800 switch (event_type) {
9801 case GAUDI2_EVENT_PCIE_CORE_SERR ... GAUDI2_EVENT_ARC0_ECC_DERR:
9802 fallthrough;
9803 case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR:
9804 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9805 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9806 reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
9807 is_critical = eq_entry->ecc_data.is_critical;
9808 error_count++;
9809 break;
9811 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM:
9812 fallthrough;
9813 case GAUDI2_EVENT_ROTATOR0_ROT0_QM ... GAUDI2_EVENT_ROTATOR1_ROT1_QM:
9814 fallthrough;
9815 case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
9816 error_count = gaudi2_handle_qman_err(hdev, event_type, &event_mask);
9817 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9818 break;
9820 case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
9821 error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type, &event_mask);
9822 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9823 break;
9825 case GAUDI2_EVENT_CPU_AXI_ERR_RSP:
9826 error_count = gaudi2_handle_cpu_sei_err(hdev, event_type);
9827 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9828 event_mask |= HL_NOTIFIER_EVENT_CRITICL_FW_ERR;
9829 break;
9831 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
9832 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
9833 error_count = gaudi2_handle_qm_sei_err(hdev, event_type, true, &event_mask);
9834 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9835 break;
9837 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
9838 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
9839 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
9840 error_count = gaudi2_handle_rot_err(hdev, index, event_type,
9841 &eq_entry->razwi_with_intr_cause, &event_mask);
9842 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9843 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9844 break;
9846 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
9847 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
9848 error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
9849 &eq_entry->razwi_with_intr_cause, &event_mask);
9850 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9851 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9852 break;
9854 case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
9855 index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
9856 error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
9857 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9858 break;
9860 case GAUDI2_EVENT_TPC0_KERNEL_ERR:
9861 case GAUDI2_EVENT_TPC1_KERNEL_ERR:
9862 case GAUDI2_EVENT_TPC2_KERNEL_ERR:
9863 case GAUDI2_EVENT_TPC3_KERNEL_ERR:
9864 case GAUDI2_EVENT_TPC4_KERNEL_ERR:
9865 case GAUDI2_EVENT_TPC5_KERNEL_ERR:
9866 case GAUDI2_EVENT_TPC6_KERNEL_ERR:
9867 case GAUDI2_EVENT_TPC7_KERNEL_ERR:
9868 case GAUDI2_EVENT_TPC8_KERNEL_ERR:
9869 case GAUDI2_EVENT_TPC9_KERNEL_ERR:
9870 case GAUDI2_EVENT_TPC10_KERNEL_ERR:
9871 case GAUDI2_EVENT_TPC11_KERNEL_ERR:
9872 case GAUDI2_EVENT_TPC12_KERNEL_ERR:
9873 case GAUDI2_EVENT_TPC13_KERNEL_ERR:
9874 case GAUDI2_EVENT_TPC14_KERNEL_ERR:
9875 case GAUDI2_EVENT_TPC15_KERNEL_ERR:
9876 case GAUDI2_EVENT_TPC16_KERNEL_ERR:
9877 case GAUDI2_EVENT_TPC17_KERNEL_ERR:
9878 case GAUDI2_EVENT_TPC18_KERNEL_ERR:
9879 case GAUDI2_EVENT_TPC19_KERNEL_ERR:
9880 case GAUDI2_EVENT_TPC20_KERNEL_ERR:
9881 case GAUDI2_EVENT_TPC21_KERNEL_ERR:
9882 case GAUDI2_EVENT_TPC22_KERNEL_ERR:
9883 case GAUDI2_EVENT_TPC23_KERNEL_ERR:
9884 case GAUDI2_EVENT_TPC24_KERNEL_ERR:
9885 index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) /
9886 (GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR);
9887 error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
9888 &eq_entry->razwi_with_intr_cause, &event_mask);
9889 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9890 break;
9892 case GAUDI2_EVENT_DEC0_SPI:
9893 case GAUDI2_EVENT_DEC1_SPI:
9894 case GAUDI2_EVENT_DEC2_SPI:
9895 case GAUDI2_EVENT_DEC3_SPI:
9896 case GAUDI2_EVENT_DEC4_SPI:
9897 case GAUDI2_EVENT_DEC5_SPI:
9898 case GAUDI2_EVENT_DEC6_SPI:
9899 case GAUDI2_EVENT_DEC7_SPI:
9900 case GAUDI2_EVENT_DEC8_SPI:
9901 case GAUDI2_EVENT_DEC9_SPI:
9902 index = (event_type - GAUDI2_EVENT_DEC0_SPI) /
9903 (GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI);
9904 error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
9905 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9906 break;
9908 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
9909 case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
9910 case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
9911 case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
9912 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
9913 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
9914 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
9915 error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
9916 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9917 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9918 break;
9920 case GAUDI2_EVENT_MME0_QMAN_SW_ERROR:
9921 case GAUDI2_EVENT_MME1_QMAN_SW_ERROR:
9922 case GAUDI2_EVENT_MME2_QMAN_SW_ERROR:
9923 case GAUDI2_EVENT_MME3_QMAN_SW_ERROR:
9924 index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) /
9925 (GAUDI2_EVENT_MME1_QMAN_SW_ERROR -
9926 GAUDI2_EVENT_MME0_QMAN_SW_ERROR);
9927 error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
9928 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9929 break;
9931 case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
9932 case GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
9933 case GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
9934 case GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
9935 index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) /
9936 (GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID -
9937 GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID);
9938 error_count = gaudi2_handle_mme_wap_err(hdev, index, event_type, &event_mask);
9939 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9940 break;
9942 case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
9943 case GAUDI2_EVENT_KDMA0_CORE:
9944 error_count = gaudi2_handle_kdma_core_event(hdev, event_type,
9945 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9946 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9947 break;
9949 case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_HDMA5_CORE:
9950 error_count = gaudi2_handle_dma_core_event(hdev, event_type,
9951 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9952 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9953 break;
9955 case GAUDI2_EVENT_PDMA0_CORE ... GAUDI2_EVENT_PDMA1_CORE:
9956 error_count = gaudi2_handle_dma_core_event(hdev, event_type,
9957 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9958 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9959 break;
9961 case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR:
9962 error_count = gaudi2_print_pcie_addr_dec_info(hdev, event_type,
9963 le64_to_cpu(eq_entry->intr_cause.intr_cause_data), &event_mask);
9964 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9965 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9966 break;
9968 case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9969 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9970 case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
9971 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
9972 error_count = gaudi2_handle_mmu_spi_sei_err(hdev, event_type, &event_mask);
9973 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9974 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9975 break;
9977 case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL:
9978 error_count = gaudi2_handle_hif_fatal(hdev, event_type,
9979 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9980 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9981 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9982 break;
9984 case GAUDI2_EVENT_PMMU_FATAL_0:
9985 error_count = gaudi2_handle_pif_fatal(hdev, event_type,
9986 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9987 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9988 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9989 break;
9991 case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT:
9992 error_count = gaudi2_ack_psoc_razwi_event_handler(hdev, &event_mask);
9993 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9994 break;
9996 case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE:
9997 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9998 if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) {
9999 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10000 reset_required = true;
10001 is_critical = eq_entry->sei_data.hdr.is_critical;
10003 error_count++;
10004 break;
10006 case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5:
10007 error_count = gaudi2_handle_hbm_cattrip(hdev, event_type,
10008 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
10009 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10010 break;
10012 case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI:
10013 error_count = gaudi2_handle_hbm_mc_spi(hdev,
10014 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
10015 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10016 break;
10018 case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE:
10019 error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
10020 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10021 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10022 if (hl_fw_version_cmp(hdev, 1, 13, 0) >= 0)
10023 is_critical = true;
10024 break;
10026 case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN:
10027 error_count = gaudi2_handle_psoc_drain(hdev,
10028 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
10029 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10030 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10031 break;
10033 case GAUDI2_EVENT_CPU_AXI_ECC:
10034 error_count = GAUDI2_NA_EVENT_CAUSE;
10035 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10036 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10037 break;
10038 case GAUDI2_EVENT_CPU_L2_RAM_ECC:
10039 error_count = GAUDI2_NA_EVENT_CAUSE;
10040 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10041 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10042 break;
10043 case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP:
10044 case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
10045 case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
10046 case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
10047 error_count = gaudi2_handle_mme_sbte_err(hdev, event_type);
10048 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10049 break;
10050 case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
10051 error_count = GAUDI2_NA_EVENT_CAUSE;
10052 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10053 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10054 break;
10055 case GAUDI2_EVENT_PSOC_AXI_ERR_RSP:
10056 error_count = GAUDI2_NA_EVENT_CAUSE;
10057 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10058 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10059 break;
10060 case GAUDI2_EVENT_PSOC_PRSTN_FALL:
10061 error_count = GAUDI2_NA_EVENT_CAUSE;
10062 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10063 break;
10064 case GAUDI2_EVENT_PCIE_APB_TIMEOUT:
10065 error_count = GAUDI2_NA_EVENT_CAUSE;
10066 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10067 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10068 break;
10069 case GAUDI2_EVENT_PCIE_FATAL_ERR:
10070 error_count = GAUDI2_NA_EVENT_CAUSE;
10071 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10072 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10073 break;
10074 case GAUDI2_EVENT_TPC0_BMON_SPMU:
10075 case GAUDI2_EVENT_TPC1_BMON_SPMU:
10076 case GAUDI2_EVENT_TPC2_BMON_SPMU:
10077 case GAUDI2_EVENT_TPC3_BMON_SPMU:
10078 case GAUDI2_EVENT_TPC4_BMON_SPMU:
10079 case GAUDI2_EVENT_TPC5_BMON_SPMU:
10080 case GAUDI2_EVENT_TPC6_BMON_SPMU:
10081 case GAUDI2_EVENT_TPC7_BMON_SPMU:
10082 case GAUDI2_EVENT_TPC8_BMON_SPMU:
10083 case GAUDI2_EVENT_TPC9_BMON_SPMU:
10084 case GAUDI2_EVENT_TPC10_BMON_SPMU:
10085 case GAUDI2_EVENT_TPC11_BMON_SPMU:
10086 case GAUDI2_EVENT_TPC12_BMON_SPMU:
10087 case GAUDI2_EVENT_TPC13_BMON_SPMU:
10088 case GAUDI2_EVENT_TPC14_BMON_SPMU:
10089 case GAUDI2_EVENT_TPC15_BMON_SPMU:
10090 case GAUDI2_EVENT_TPC16_BMON_SPMU:
10091 case GAUDI2_EVENT_TPC17_BMON_SPMU:
10092 case GAUDI2_EVENT_TPC18_BMON_SPMU:
10093 case GAUDI2_EVENT_TPC19_BMON_SPMU:
10094 case GAUDI2_EVENT_TPC20_BMON_SPMU:
10095 case GAUDI2_EVENT_TPC21_BMON_SPMU:
10096 case GAUDI2_EVENT_TPC22_BMON_SPMU:
10097 case GAUDI2_EVENT_TPC23_BMON_SPMU:
10098 case GAUDI2_EVENT_TPC24_BMON_SPMU:
10099 case GAUDI2_EVENT_MME0_CTRL_BMON_SPMU:
10100 case GAUDI2_EVENT_MME0_SBTE_BMON_SPMU:
10101 case GAUDI2_EVENT_MME0_WAP_BMON_SPMU:
10102 case GAUDI2_EVENT_MME1_CTRL_BMON_SPMU:
10103 case GAUDI2_EVENT_MME1_SBTE_BMON_SPMU:
10104 case GAUDI2_EVENT_MME1_WAP_BMON_SPMU:
10105 case GAUDI2_EVENT_MME2_CTRL_BMON_SPMU:
10106 case GAUDI2_EVENT_MME2_SBTE_BMON_SPMU:
10107 case GAUDI2_EVENT_MME2_WAP_BMON_SPMU:
10108 case GAUDI2_EVENT_MME3_CTRL_BMON_SPMU:
10109 case GAUDI2_EVENT_MME3_SBTE_BMON_SPMU:
10110 case GAUDI2_EVENT_MME3_WAP_BMON_SPMU:
10111 case GAUDI2_EVENT_HDMA2_BM_SPMU ... GAUDI2_EVENT_PDMA1_BM_SPMU:
10112 fallthrough;
10113 case GAUDI2_EVENT_DEC0_BMON_SPMU:
10114 case GAUDI2_EVENT_DEC1_BMON_SPMU:
10115 case GAUDI2_EVENT_DEC2_BMON_SPMU:
10116 case GAUDI2_EVENT_DEC3_BMON_SPMU:
10117 case GAUDI2_EVENT_DEC4_BMON_SPMU:
10118 case GAUDI2_EVENT_DEC5_BMON_SPMU:
10119 case GAUDI2_EVENT_DEC6_BMON_SPMU:
10120 case GAUDI2_EVENT_DEC7_BMON_SPMU:
10121 case GAUDI2_EVENT_DEC8_BMON_SPMU:
10122 case GAUDI2_EVENT_DEC9_BMON_SPMU:
10123 case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU:
10124 error_count = GAUDI2_NA_EVENT_CAUSE;
10125 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10126 break;
10128 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
10129 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
10130 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
10131 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
10132 gaudi2_print_clk_change_info(hdev, event_type, &event_mask);
10133 error_count = GAUDI2_NA_EVENT_CAUSE;
10134 break;
10136 case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC:
10137 gaudi2_print_out_of_sync_info(hdev, event_type, &eq_entry->pkt_sync_err);
10138 error_count = GAUDI2_NA_EVENT_CAUSE;
10139 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10140 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10141 break;
10143 case GAUDI2_EVENT_PCIE_FLR_REQUESTED:
10144 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10145 error_count = GAUDI2_NA_EVENT_CAUSE;
10146 /* Do nothing- FW will handle it */
10147 break;
10149 case GAUDI2_EVENT_PCIE_P2P_MSIX:
10150 error_count = gaudi2_handle_pcie_p2p_msix(hdev, event_type);
10151 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10152 break;
10154 case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE:
10155 index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE;
10156 error_count = gaudi2_handle_sm_err(hdev, event_type, index);
10157 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10158 break;
10160 case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR:
10161 error_count = GAUDI2_NA_EVENT_CAUSE;
10162 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10163 break;
10165 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
10166 dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n",
10167 le64_to_cpu(eq_entry->data[0]));
10168 error_count = GAUDI2_NA_EVENT_CAUSE;
10169 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10170 break;
10171 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT:
10172 dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n",
10173 le64_to_cpu(eq_entry->data[0]));
10174 error_count = GAUDI2_NA_EVENT_CAUSE;
10175 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10176 break;
10178 case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED:
10179 gaudi2_print_cpu_pkt_failure_info(hdev, event_type, &eq_entry->pkt_sync_err);
10180 error_count = GAUDI2_NA_EVENT_CAUSE;
10181 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10182 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10183 break;
10185 case GAUDI2_EVENT_ARC_DCCM_FULL:
10186 error_count = hl_arc_event_handle(hdev, event_type, &eq_entry->arc_data);
10187 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10188 break;
10190 case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED:
10191 case GAUDI2_EVENT_CPU_DEV_RESET_REQ:
10192 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10193 error_count = GAUDI2_NA_EVENT_CAUSE;
10194 is_critical = true;
10195 break;
10197 case GAUDI2_EVENT_ARC_PWR_BRK_ENTRY:
10198 case GAUDI2_EVENT_ARC_PWR_BRK_EXT:
10199 case GAUDI2_EVENT_ARC_PWR_RD_MODE0:
10200 case GAUDI2_EVENT_ARC_PWR_RD_MODE1:
10201 case GAUDI2_EVENT_ARC_PWR_RD_MODE2:
10202 case GAUDI2_EVENT_ARC_PWR_RD_MODE3:
10203 error_count = GAUDI2_NA_EVENT_CAUSE;
10204 dev_info_ratelimited(hdev->dev, "%s event received\n",
10205 gaudi2_irq_map_table[event_type].name);
10206 break;
10208 case GAUDI2_EVENT_ARC_EQ_HEARTBEAT:
10209 hl_eq_heartbeat_event_handle(hdev);
10210 error_count = GAUDI2_NA_EVENT_CAUSE;
10211 break;
10212 default:
10213 if (gaudi2_irq_map_table[event_type].valid) {
10214 dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
10215 event_type);
10216 error_count = GAUDI2_NA_EVENT_CAUSE;
10220 if (event_mask & HL_NOTIFIER_EVENT_USER_ENGINE_ERR)
10221 hl_capture_engine_err(hdev, event_id_to_engine_id(hdev, event_type), error_count);
10223 /* Make sure to dump an error in case no error cause was printed so far.
10224 * Note that although we have counted the errors, we use this number as
10225 * a boolean.
10227 if (error_count == GAUDI2_NA_EVENT_CAUSE && !is_info_event(event_type))
10228 gaudi2_print_event(hdev, event_type, true, "%d", event_type);
10229 else if (error_count == 0)
10230 gaudi2_print_event(hdev, event_type, true,
10231 "No error cause for H/W event %u", event_type);
10233 if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) || reset_required) {
10234 if (reset_required ||
10235 (gaudi2_irq_map_table[event_type].reset == EVENT_RESET_TYPE_HARD))
10236 reset_flags |= HL_DRV_RESET_HARD;
10238 if (hdev->hard_reset_on_fw_events ||
10239 (hdev->asic_prop.fw_security_enabled && is_critical))
10240 goto reset_device;
10243 /* Send unmask irq only for interrupts not classified as MSG */
10244 if (!gaudi2_irq_map_table[event_type].msg)
10245 hl_fw_unmask_irq(hdev, event_type);
10247 if (event_mask)
10248 hl_notifier_event_send_all(hdev, event_mask);
10250 return;
10252 reset_device:
10253 if (hdev->asic_prop.fw_security_enabled && is_critical) {
10254 reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW;
10255 event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
10256 } else {
10257 reset_flags |= HL_DRV_RESET_DELAY;
10259 /* escalate general hw errors to critical/fatal error */
10260 if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
10261 hl_handle_critical_hw_err(hdev, event_type, &event_mask);
10263 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
10264 hl_device_cond_reset(hdev, reset_flags, event_mask);
10267 static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
10268 struct packet_lin_dma *lin_dma_pkt,
10269 u64 phys_addr, u32 hw_queue_id, u32 size, u64 addr, u32 val)
10271 u32 ctl, pkt_size;
10272 int rc = 0, i;
10274 ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
10275 ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
10276 ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_WRCOMP_MASK, 1);
10277 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 1);
10279 lin_dma_pkt->ctl = cpu_to_le32(ctl);
10280 lin_dma_pkt->src_addr = cpu_to_le64(val);
10281 lin_dma_pkt->dst_addr = cpu_to_le64(addr);
10282 lin_dma_pkt->tsize = cpu_to_le32(size);
10284 pkt_size = sizeof(struct packet_lin_dma);
10286 for (i = 0; i < 3; i++) {
10287 rc = hdev->asic_funcs->access_dev_mem(hdev, PCI_REGION_DRAM,
10288 phys_addr + (i * sizeof(u64)),
10289 ((u64 *)(lin_dma_pkt)) + i, DEBUGFS_WRITE64);
10290 if (rc) {
10291 dev_err(hdev->dev, "Failed to copy lin_dma packet to HBM (%#llx)\n",
10292 phys_addr);
10293 return rc;
10297 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, phys_addr);
10298 if (rc)
10299 dev_err(hdev->dev, "Failed to send lin_dma packet to H/W queue %d\n",
10300 hw_queue_id);
10302 return rc;
10305 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val)
10307 u32 edma_queues_id[] = {GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
10308 GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
10309 GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
10310 GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0};
10311 u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val,
10312 old_mmubp, mmubp, num_of_pkts, busy, pkt_size, cb_len;
10313 u64 comp_addr, cur_addr = addr, end_addr = addr + size;
10314 struct asic_fixed_properties *prop = &hdev->asic_prop;
10315 int rc = 0, dma_num = 0, i;
10316 void *lin_dma_pkts_arr;
10318 if (prop->edma_enabled_mask == 0) {
10319 dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n");
10320 return -EIO;
10323 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
10324 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
10325 comp_addr = CFG_BASE + sob_addr;
10326 comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
10327 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
10328 mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) |
10329 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1);
10331 /* Calculate how many lin dma pkts we'll need */
10332 num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G);
10333 pkt_size = sizeof(struct packet_lin_dma);
10334 cb_len = pkt_size * num_of_pkts;
10337 * if we're not scrubing HMMU or NIC reserved sections in hbm,
10338 * then it the scrubing of the user section, as we use the start of the user section
10339 * to store the CB of the EDMA QM, so shift the start address of the scrubbing accordingly
10340 * and scrub the CB section before leaving this function.
10342 if ((addr >= prop->dram_user_base_address) &&
10343 (addr < prop->dram_user_base_address + cb_len))
10344 cur_addr += (prop->dram_user_base_address + cb_len) - addr;
10346 lin_dma_pkts_arr = kvcalloc(num_of_pkts, pkt_size, GFP_KERNEL);
10347 if (!lin_dma_pkts_arr)
10348 return -ENOMEM;
10351 * set mmu bypass for the scrubbing - all ddmas are configured the same so save
10352 * only the first one to restore later
10353 * also set the sob addr for all edma cores for completion.
10354 * set QM as trusted to allow it to access physical address with MMU bp.
10356 old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP);
10357 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10358 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10359 u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
10360 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10362 if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10363 continue;
10365 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP +
10366 edma_offset, mmubp);
10367 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset,
10368 lower_32_bits(comp_addr));
10369 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset,
10370 upper_32_bits(comp_addr));
10371 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset,
10372 comp_val);
10373 gaudi2_qman_set_test_mode(hdev,
10374 edma_queues_id[dcore] + 4 * edma_idx, true);
10378 WREG32(sob_addr, 0);
10380 while (cur_addr < end_addr) {
10381 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10382 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10383 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10385 if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10386 continue;
10388 chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr);
10390 rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev,
10391 (struct packet_lin_dma *)lin_dma_pkts_arr + dma_num,
10392 prop->dram_user_base_address + (dma_num * pkt_size),
10393 edma_queues_id[dcore] + edma_idx * 4,
10394 chunk_size, cur_addr, val);
10395 if (rc)
10396 goto end;
10398 dma_num++;
10399 cur_addr += chunk_size;
10400 if (cur_addr == end_addr)
10401 goto edma_wait;
10406 edma_wait:
10407 rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000);
10408 if (rc) {
10409 dev_err(hdev->dev, "DMA Timeout during HBM scrubbing(sob: 0x%x, dma_num: 0x%x)\n",
10410 busy, dma_num);
10411 goto end;
10413 end:
10414 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10415 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10416 u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
10417 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10419 if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10420 continue;
10422 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp);
10423 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 0);
10424 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 0);
10425 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 0);
10426 gaudi2_qman_set_test_mode(hdev,
10427 edma_queues_id[dcore] + 4 * edma_idx, false);
10431 memset(lin_dma_pkts_arr, 0, sizeof(u64));
10433 /* Zero the HBM area where we copied the CB */
10434 for (i = 0; i < cb_len / sizeof(u64); i += sizeof(u64))
10435 rc = hdev->asic_funcs->access_dev_mem(hdev, PCI_REGION_DRAM,
10436 prop->dram_user_base_address + i,
10437 (u64 *)(lin_dma_pkts_arr), DEBUGFS_WRITE64);
10438 WREG32(sob_addr, 0);
10440 kfree(lin_dma_pkts_arr);
10442 return rc;
10445 static int gaudi2_scrub_device_dram(struct hl_device *hdev, u64 val)
10447 int rc;
10448 struct asic_fixed_properties *prop = &hdev->asic_prop;
10449 u64 size = prop->dram_end_address - prop->dram_user_base_address;
10451 rc = gaudi2_memset_device_memory(hdev, prop->dram_user_base_address, size, val);
10453 if (rc)
10454 dev_err(hdev->dev, "Failed to scrub dram, address: 0x%llx size: %llu\n",
10455 prop->dram_user_base_address, size);
10456 return rc;
10459 static int gaudi2_scrub_device_mem(struct hl_device *hdev)
10461 int rc;
10462 struct asic_fixed_properties *prop = &hdev->asic_prop;
10463 u64 val = hdev->memory_scrub_val;
10464 u64 addr, size;
10466 if (!hdev->memory_scrub)
10467 return 0;
10469 /* scrub SRAM */
10470 addr = prop->sram_user_base_address;
10471 size = hdev->pldm ? 0x10000 : (prop->sram_size - SRAM_USER_BASE_OFFSET);
10472 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx, val: 0x%llx\n",
10473 addr, addr + size, val);
10474 rc = gaudi2_memset_device_memory(hdev, addr, size, val);
10475 if (rc) {
10476 dev_err(hdev->dev, "scrubbing SRAM failed (%d)\n", rc);
10477 return rc;
10480 /* scrub DRAM */
10481 rc = gaudi2_scrub_device_dram(hdev, val);
10482 if (rc) {
10483 dev_err(hdev->dev, "scrubbing DRAM failed (%d)\n", rc);
10484 return rc;
10486 return 0;
10489 static void gaudi2_restore_user_sm_registers(struct hl_device *hdev)
10491 u64 addr, mon_sts_addr, mon_cfg_addr, cq_lbw_l_addr, cq_lbw_h_addr,
10492 cq_lbw_data_addr, cq_base_l_addr, cq_base_h_addr, cq_size_addr;
10493 u32 val, size, offset;
10494 int dcore_id;
10496 offset = hdev->asic_prop.first_available_cq[0] * 4;
10497 cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset;
10498 cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + offset;
10499 cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + offset;
10500 cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + offset;
10501 cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + offset;
10502 cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + offset;
10503 size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 -
10504 (mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset);
10506 /* memset dcore0 CQ registers */
10507 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
10508 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
10509 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
10510 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
10511 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
10512 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
10514 cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET;
10515 cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET;
10516 cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + DCORE_OFFSET;
10517 cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + DCORE_OFFSET;
10518 cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + DCORE_OFFSET;
10519 cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + DCORE_OFFSET;
10520 size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0;
10522 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10523 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
10524 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
10525 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
10526 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
10527 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
10528 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
10530 cq_lbw_l_addr += DCORE_OFFSET;
10531 cq_lbw_h_addr += DCORE_OFFSET;
10532 cq_lbw_data_addr += DCORE_OFFSET;
10533 cq_base_l_addr += DCORE_OFFSET;
10534 cq_base_h_addr += DCORE_OFFSET;
10535 cq_size_addr += DCORE_OFFSET;
10538 offset = hdev->asic_prop.first_available_user_mon[0] * 4;
10539 addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset;
10540 val = 1 << DCORE0_SYNC_MNGR_OBJS_MON_STATUS_PROT_SHIFT;
10541 size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - (mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset);
10543 /* memset dcore0 monitors */
10544 gaudi2_memset_device_lbw(hdev, addr, size, val);
10546 addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + offset;
10547 gaudi2_memset_device_lbw(hdev, addr, size, 0);
10549 mon_sts_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + DCORE_OFFSET;
10550 mon_cfg_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + DCORE_OFFSET;
10551 size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0;
10553 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10554 gaudi2_memset_device_lbw(hdev, mon_sts_addr, size, val);
10555 gaudi2_memset_device_lbw(hdev, mon_cfg_addr, size, 0);
10556 mon_sts_addr += DCORE_OFFSET;
10557 mon_cfg_addr += DCORE_OFFSET;
10560 offset = hdev->asic_prop.first_available_user_sob[0] * 4;
10561 addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset;
10562 val = 0;
10563 size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 -
10564 (mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
10566 /* memset dcore0 sobs */
10567 gaudi2_memset_device_lbw(hdev, addr, size, val);
10569 addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + DCORE_OFFSET;
10570 size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0;
10572 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10573 gaudi2_memset_device_lbw(hdev, addr, size, val);
10574 addr += DCORE_OFFSET;
10577 /* Flush all WREG to prevent race */
10578 val = RREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
10581 static void gaudi2_restore_user_qm_registers(struct hl_device *hdev)
10583 u32 reg_base, hw_queue_id;
10585 for (hw_queue_id = GAUDI2_QUEUE_ID_PDMA_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_ROT_1_0;
10586 hw_queue_id += NUM_OF_PQ_PER_QMAN) {
10587 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
10588 continue;
10590 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
10592 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
10593 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
10596 /* Flush all WREG to prevent race */
10597 RREG32(mmPDMA0_QM_ARB_CFG_0);
10600 static void gaudi2_restore_nic_qm_registers(struct hl_device *hdev)
10602 u32 reg_base, hw_queue_id;
10604 for (hw_queue_id = GAUDI2_QUEUE_ID_NIC_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_NIC_23_3;
10605 hw_queue_id += NUM_OF_PQ_PER_QMAN) {
10606 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
10607 continue;
10609 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
10611 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
10612 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
10615 /* Flush all WREG to prevent race */
10616 RREG32(mmPDMA0_QM_ARB_CFG_0);
10619 static int gaudi2_context_switch(struct hl_device *hdev, u32 asid)
10621 return 0;
10624 static void gaudi2_restore_phase_topology(struct hl_device *hdev)
10628 static void gaudi2_init_block_instances(struct hl_device *hdev, u32 block_idx,
10629 struct dup_block_ctx *cfg_ctx)
10631 u64 block_base = cfg_ctx->base + block_idx * cfg_ctx->block_off;
10632 u8 seq;
10633 int i;
10635 for (i = 0 ; i < cfg_ctx->instances ; i++) {
10636 seq = block_idx * cfg_ctx->instances + i;
10638 /* skip disabled instance */
10639 if (!(cfg_ctx->enabled_mask & BIT_ULL(seq)))
10640 continue;
10642 cfg_ctx->instance_cfg_fn(hdev, block_base + i * cfg_ctx->instance_off,
10643 cfg_ctx->data);
10647 static void gaudi2_init_blocks_with_mask(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx,
10648 u64 mask)
10650 int i;
10652 cfg_ctx->enabled_mask = mask;
10654 for (i = 0 ; i < cfg_ctx->blocks ; i++)
10655 gaudi2_init_block_instances(hdev, i, cfg_ctx);
10658 void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx)
10660 gaudi2_init_blocks_with_mask(hdev, cfg_ctx, U64_MAX);
10663 static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr)
10665 void *host_mem_virtual_addr;
10666 dma_addr_t host_mem_dma_addr;
10667 u64 reserved_va_base;
10668 u32 pos, size_left, size_to_dma;
10669 struct hl_ctx *ctx;
10670 int rc = 0;
10672 /* Fetch the ctx */
10673 ctx = hl_get_compute_ctx(hdev);
10674 if (!ctx) {
10675 dev_err(hdev->dev, "No ctx available\n");
10676 return -EINVAL;
10679 /* Allocate buffers for read and for poll */
10680 host_mem_virtual_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &host_mem_dma_addr,
10681 GFP_KERNEL | __GFP_ZERO);
10682 if (host_mem_virtual_addr == NULL) {
10683 dev_err(hdev->dev, "Failed to allocate memory for KDMA read\n");
10684 rc = -ENOMEM;
10685 goto put_ctx;
10688 /* Reserve VM region on asic side */
10689 reserved_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, SZ_2M,
10690 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
10691 if (!reserved_va_base) {
10692 dev_err(hdev->dev, "Failed to reserve vmem on asic\n");
10693 rc = -ENOMEM;
10694 goto free_data_buffer;
10697 /* Create mapping on asic side */
10698 mutex_lock(&hdev->mmu_lock);
10700 rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M);
10701 if (rc) {
10702 dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
10703 goto unreserve_va;
10706 rc = hl_mmu_invalidate_cache_range(hdev, false,
10707 MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV,
10708 ctx->asid, reserved_va_base, SZ_2M);
10709 if (rc) {
10710 hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
10711 goto unreserve_va;
10714 mutex_unlock(&hdev->mmu_lock);
10716 /* Enable MMU on KDMA */
10717 gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid);
10719 pos = 0;
10720 size_left = size;
10721 size_to_dma = SZ_2M;
10723 while (size_left > 0) {
10724 if (size_left < SZ_2M)
10725 size_to_dma = size_left;
10727 rc = gaudi2_send_job_to_kdma(hdev, addr, reserved_va_base, size_to_dma, false);
10728 if (rc)
10729 break;
10731 memcpy(blob_addr + pos, host_mem_virtual_addr, size_to_dma);
10733 if (size_left <= SZ_2M)
10734 break;
10736 pos += SZ_2M;
10737 addr += SZ_2M;
10738 size_left -= SZ_2M;
10741 gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID);
10743 mutex_lock(&hdev->mmu_lock);
10745 rc = hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
10746 if (rc)
10747 goto unreserve_va;
10749 rc = hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
10750 ctx->asid, reserved_va_base, SZ_2M);
10752 unreserve_va:
10753 mutex_unlock(&hdev->mmu_lock);
10754 hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M);
10755 free_data_buffer:
10756 hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr);
10757 put_ctx:
10758 hl_ctx_put(ctx);
10760 return rc;
10763 static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *ctx)
10765 struct gaudi2_device *gaudi2 = hdev->asic_specific;
10766 int min_alloc_order, rc;
10768 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
10769 return 0;
10771 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
10772 HOST_SPACE_INTERNAL_CB_SZ,
10773 &hdev->internal_cb_pool_dma_addr,
10774 GFP_KERNEL | __GFP_ZERO);
10776 if (!hdev->internal_cb_pool_virt_addr)
10777 return -ENOMEM;
10779 min_alloc_order = ilog2(min(gaudi2_get_signal_cb_size(hdev),
10780 gaudi2_get_wait_cb_size(hdev)));
10782 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
10783 if (!hdev->internal_cb_pool) {
10784 dev_err(hdev->dev, "Failed to create internal CB pool\n");
10785 rc = -ENOMEM;
10786 goto free_internal_cb_pool;
10789 rc = gen_pool_add(hdev->internal_cb_pool, (uintptr_t) hdev->internal_cb_pool_virt_addr,
10790 HOST_SPACE_INTERNAL_CB_SZ, -1);
10791 if (rc) {
10792 dev_err(hdev->dev, "Failed to add memory to internal CB pool\n");
10793 rc = -EFAULT;
10794 goto destroy_internal_cb_pool;
10797 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
10798 HOST_SPACE_INTERNAL_CB_SZ, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
10800 if (!hdev->internal_cb_va_base) {
10801 rc = -ENOMEM;
10802 goto destroy_internal_cb_pool;
10805 mutex_lock(&hdev->mmu_lock);
10807 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr,
10808 HOST_SPACE_INTERNAL_CB_SZ);
10809 if (rc)
10810 goto unreserve_internal_cb_pool;
10812 rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
10813 if (rc)
10814 goto unmap_internal_cb_pool;
10816 mutex_unlock(&hdev->mmu_lock);
10818 return 0;
10820 unmap_internal_cb_pool:
10821 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10822 unreserve_internal_cb_pool:
10823 mutex_unlock(&hdev->mmu_lock);
10824 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10825 destroy_internal_cb_pool:
10826 gen_pool_destroy(hdev->internal_cb_pool);
10827 free_internal_cb_pool:
10828 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
10829 hdev->internal_cb_pool_dma_addr);
10831 return rc;
10834 static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *ctx)
10836 struct gaudi2_device *gaudi2 = hdev->asic_specific;
10838 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
10839 return;
10841 mutex_lock(&hdev->mmu_lock);
10842 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10843 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10844 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
10845 mutex_unlock(&hdev->mmu_lock);
10847 gen_pool_destroy(hdev->internal_cb_pool);
10849 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
10850 hdev->internal_cb_pool_dma_addr);
10853 static void gaudi2_restore_user_registers(struct hl_device *hdev)
10855 gaudi2_restore_user_sm_registers(hdev);
10856 gaudi2_restore_user_qm_registers(hdev);
10859 static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
10861 struct hl_device *hdev = ctx->hdev;
10862 struct asic_fixed_properties *prop = &hdev->asic_prop;
10863 struct gaudi2_device *gaudi2 = hdev->asic_specific;
10864 int rc;
10866 rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
10867 gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true);
10868 if (rc)
10869 dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n",
10870 RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
10872 return rc;
10875 static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
10877 struct hl_device *hdev = ctx->hdev;
10878 struct asic_fixed_properties *prop = &hdev->asic_prop;
10879 int rc;
10881 rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
10882 prop->pmmu.page_size, true);
10883 if (rc)
10884 dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n",
10885 RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
10888 static int gaudi2_ctx_init(struct hl_ctx *ctx)
10890 int rc;
10892 if (ctx->asid == HL_KERNEL_ASID_ID)
10893 return 0;
10895 rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid);
10896 if (rc)
10897 return rc;
10899 /* No need to clear user registers if the device has just
10900 * performed reset, we restore only nic qm registers
10902 if (ctx->hdev->reset_upon_device_release)
10903 gaudi2_restore_nic_qm_registers(ctx->hdev);
10904 else
10905 gaudi2_restore_user_registers(ctx->hdev);
10907 rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx);
10908 if (rc)
10909 return rc;
10911 rc = gaudi2_map_virtual_msix_doorbell_memory(ctx);
10912 if (rc)
10913 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
10915 return rc;
10918 static void gaudi2_ctx_fini(struct hl_ctx *ctx)
10920 if (ctx->asid == HL_KERNEL_ASID_ID)
10921 return;
10923 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
10925 gaudi2_unmap_virtual_msix_doorbell_memory(ctx);
10928 static int gaudi2_pre_schedule_cs(struct hl_cs *cs)
10930 struct hl_device *hdev = cs->ctx->hdev;
10931 int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
10932 u32 mon_payload, sob_id, mon_id;
10934 if (!cs_needs_completion(cs))
10935 return 0;
10938 * First 64 SOB/MON are reserved for driver for QMAN auto completion
10939 * mechanism. Each SOB/MON pair are used for a pending CS with the same
10940 * cyclic index. The SOB value is increased when each of the CS jobs is
10941 * completed. When the SOB reaches the number of CS jobs, the monitor
10942 * generates MSI-X interrupt.
10945 sob_id = mon_id = index;
10946 mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
10947 (1 << CQ_ENTRY_READY_SHIFT) | index;
10949 gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload,
10950 cs->jobs_cnt);
10952 return 0;
10955 static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
10957 return HL_INVALID_QUEUE;
10960 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb)
10962 struct hl_cb *cb = data;
10963 struct packet_msg_short *pkt;
10964 u32 value, ctl, pkt_size = sizeof(*pkt);
10966 pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address + size);
10967 memset(pkt, 0, pkt_size);
10969 /* Inc by 1, Mode ADD */
10970 value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
10971 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
10973 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
10974 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 1); /* SOB base */
10975 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10976 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, eb);
10977 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10979 pkt->value = cpu_to_le32(value);
10980 pkt->ctl = cpu_to_le32(ctl);
10982 return size + pkt_size;
10985 static u32 gaudi2_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr)
10987 u32 ctl, pkt_size = sizeof(*pkt);
10989 memset(pkt, 0, pkt_size);
10991 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
10992 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
10993 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10994 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10995 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 0);
10997 pkt->value = cpu_to_le32(value);
10998 pkt->ctl = cpu_to_le32(ctl);
11000 return pkt_size;
11003 static u32 gaudi2_add_arm_monitor_pkt(struct hl_device *hdev, struct packet_msg_short *pkt,
11004 u16 sob_base, u8 sob_mask, u16 sob_val, u16 addr)
11006 u32 ctl, value, pkt_size = sizeof(*pkt);
11007 u8 mask;
11009 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
11010 dev_err(hdev->dev, "sob_base %u (mask %#x) is not valid\n", sob_base, sob_mask);
11011 return 0;
11014 memset(pkt, 0, pkt_size);
11016 value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
11017 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
11018 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MODE_MASK, 0); /* GREATER OR EQUAL*/
11019 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MASK_MASK, mask);
11021 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
11022 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
11023 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
11024 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
11025 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
11027 pkt->value = cpu_to_le32(value);
11028 pkt->ctl = cpu_to_le32(ctl);
11030 return pkt_size;
11033 static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt)
11035 u32 ctl, cfg, pkt_size = sizeof(*pkt);
11037 memset(pkt, 0, pkt_size);
11039 cfg = FIELD_PREP(GAUDI2_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
11040 cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
11041 cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_ID_MASK, 2);
11043 ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
11044 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
11045 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
11047 pkt->cfg = cpu_to_le32(cfg);
11048 pkt->ctl = cpu_to_le32(ctl);
11050 return pkt_size;
11053 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop)
11055 struct hl_cb *cb = prop->data;
11056 void *buf = (void *) (uintptr_t) (cb->kernel_address);
11058 u64 monitor_base, fence_addr = 0;
11059 u32 stream_index, size = prop->size;
11060 u16 msg_addr_offset;
11062 stream_index = prop->q_idx % 4;
11063 fence_addr = CFG_BASE + gaudi2_qm_blocks_bases[prop->q_idx] +
11064 QM_FENCE2_OFFSET + stream_index * 4;
11067 * monitor_base should be the content of the base0 address registers,
11068 * so it will be added to the msg short offsets
11070 monitor_base = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
11072 /* First monitor config packet: low address of the sync */
11073 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + prop->mon_id * 4) -
11074 monitor_base;
11076 size += gaudi2_add_mon_msg_short(buf + size, (u32) fence_addr, msg_addr_offset);
11078 /* Second monitor config packet: high address of the sync */
11079 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + prop->mon_id * 4) -
11080 monitor_base;
11082 size += gaudi2_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), msg_addr_offset);
11085 * Third monitor config packet: the payload, i.e. what to write when the
11086 * sync triggers
11088 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + prop->mon_id * 4) -
11089 monitor_base;
11091 size += gaudi2_add_mon_msg_short(buf + size, 1, msg_addr_offset);
11093 /* Fourth monitor config packet: bind the monitor to a sync object */
11094 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) - monitor_base;
11096 size += gaudi2_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, prop->sob_mask,
11097 prop->sob_val, msg_addr_offset);
11099 /* Fence packet */
11100 size += gaudi2_add_fence_pkt(buf + size);
11102 return size;
11105 static void gaudi2_reset_sob(struct hl_device *hdev, void *data)
11107 struct hl_hw_sob *hw_sob = data;
11109 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id);
11111 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 0);
11113 kref_init(&hw_sob->kref);
11116 static void gaudi2_reset_sob_group(struct hl_device *hdev, u16 sob_group)
11120 static u64 gaudi2_get_device_time(struct hl_device *hdev)
11122 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
11124 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
11127 static int gaudi2_collective_wait_init_cs(struct hl_cs *cs)
11129 return 0;
11132 static int gaudi2_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx,
11133 struct hl_cs *cs, u32 wait_queue_id,
11134 u32 collective_engine_id, u32 encaps_signal_offset)
11136 return -EINVAL;
11140 * hl_mmu_scramble - converts a dram (non power of 2) page-size aligned address
11141 * to DMMU page-size address (64MB) before mapping it in
11142 * the MMU.
11143 * The operation is performed on both the virtual and physical addresses.
11144 * for device with 6 HBMs the scramble is:
11145 * (addr[47:0] / 48M) * 64M + addr % 48M + addr[63:48]
11147 * Example:
11148 * =============================================================================
11149 * Allocated DRAM Reserved VA scrambled VA for MMU mapping Scrambled PA
11150 * Phys address in MMU last
11151 * HOP
11152 * =============================================================================
11153 * PA1 0x3000000 VA1 0x9C000000 SVA1= (VA1/48M)*64M 0xD0000000 <- PA1/48M 0x1
11154 * PA2 0x9000000 VA2 0x9F000000 SVA2= (VA2/48M)*64M 0xD4000000 <- PA2/48M 0x3
11155 * =============================================================================
11157 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr)
11159 struct asic_fixed_properties *prop = &hdev->asic_prop;
11160 u32 divisor, mod_va;
11161 u64 div_va;
11163 /* accept any address in the DRAM address space */
11164 if (hl_mem_area_inside_range(raw_addr, sizeof(raw_addr), DRAM_PHYS_BASE,
11165 VA_HBM_SPACE_END)) {
11167 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
11168 div_va = div_u64_rem(raw_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, divisor, &mod_va);
11169 return (raw_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) |
11170 (div_va << GAUDI2_HBM_MMU_SCRM_DIV_SHIFT) |
11171 (mod_va << GAUDI2_HBM_MMU_SCRM_MOD_SHIFT);
11174 return raw_addr;
11177 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr)
11179 struct asic_fixed_properties *prop = &hdev->asic_prop;
11180 u32 divisor, mod_va;
11181 u64 div_va;
11183 /* accept any address in the DRAM address space */
11184 if (hl_mem_area_inside_range(scrambled_addr, sizeof(scrambled_addr), DRAM_PHYS_BASE,
11185 VA_HBM_SPACE_END)) {
11187 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
11188 div_va = div_u64_rem(scrambled_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK,
11189 PAGE_SIZE_64MB, &mod_va);
11191 return ((scrambled_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) +
11192 (div_va * divisor + mod_va));
11195 return scrambled_addr;
11198 static u32 gaudi2_get_dec_base_addr(struct hl_device *hdev, u32 core_id)
11200 u32 base = 0, dcore_id, dec_id;
11202 if (core_id >= NUMBER_OF_DEC) {
11203 dev_err(hdev->dev, "Unexpected core number %d for DEC\n", core_id);
11204 goto out;
11207 if (core_id < 8) {
11208 dcore_id = core_id / NUM_OF_DEC_PER_DCORE;
11209 dec_id = core_id % NUM_OF_DEC_PER_DCORE;
11211 base = mmDCORE0_DEC0_CMD_BASE + dcore_id * DCORE_OFFSET +
11212 dec_id * DCORE_VDEC_OFFSET;
11213 } else {
11214 /* PCIe Shared Decoder */
11215 base = mmPCIE_DEC0_CMD_BASE + ((core_id % 8) * PCIE_VDEC_OFFSET);
11217 out:
11218 return base;
11221 static int gaudi2_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
11222 u32 *block_size, u32 *block_id)
11224 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11225 int i;
11227 for (i = 0 ; i < NUM_USER_MAPPED_BLOCKS ; i++) {
11228 if (block_addr == CFG_BASE + gaudi2->mapped_blocks[i].address) {
11229 *block_id = i;
11230 if (block_size)
11231 *block_size = gaudi2->mapped_blocks[i].size;
11232 return 0;
11236 dev_err(hdev->dev, "Invalid block address %#llx", block_addr);
11238 return -EINVAL;
11241 static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
11242 u32 block_id, u32 block_size)
11244 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11245 u64 offset_in_bar;
11246 u64 address;
11247 int rc;
11249 if (block_id >= NUM_USER_MAPPED_BLOCKS) {
11250 dev_err(hdev->dev, "Invalid block id %u", block_id);
11251 return -EINVAL;
11254 /* we allow mapping only an entire block */
11255 if (block_size != gaudi2->mapped_blocks[block_id].size) {
11256 dev_err(hdev->dev, "Invalid block size %u", block_size);
11257 return -EINVAL;
11260 offset_in_bar = CFG_BASE + gaudi2->mapped_blocks[block_id].address - STM_FLASH_BASE_ADDR;
11262 address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar;
11264 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
11265 VM_DONTCOPY | VM_NORESERVE);
11267 rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT,
11268 block_size, vma->vm_page_prot);
11269 if (rc)
11270 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
11272 return rc;
11275 static void gaudi2_enable_events_from_fw(struct hl_device *hdev)
11277 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11279 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
11280 u32 irq_handler_offset = le32_to_cpu(dyn_regs->gic_host_ints_irq);
11282 if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
11283 WREG32(irq_handler_offset,
11284 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_INTS_REGISTER].cpu_id);
11287 static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base)
11289 switch (mmu_id) {
11290 case HW_CAP_DCORE0_DMMU0:
11291 *mmu_base = mmDCORE0_HMMU0_MMU_BASE;
11292 break;
11293 case HW_CAP_DCORE0_DMMU1:
11294 *mmu_base = mmDCORE0_HMMU1_MMU_BASE;
11295 break;
11296 case HW_CAP_DCORE0_DMMU2:
11297 *mmu_base = mmDCORE0_HMMU2_MMU_BASE;
11298 break;
11299 case HW_CAP_DCORE0_DMMU3:
11300 *mmu_base = mmDCORE0_HMMU3_MMU_BASE;
11301 break;
11302 case HW_CAP_DCORE1_DMMU0:
11303 *mmu_base = mmDCORE1_HMMU0_MMU_BASE;
11304 break;
11305 case HW_CAP_DCORE1_DMMU1:
11306 *mmu_base = mmDCORE1_HMMU1_MMU_BASE;
11307 break;
11308 case HW_CAP_DCORE1_DMMU2:
11309 *mmu_base = mmDCORE1_HMMU2_MMU_BASE;
11310 break;
11311 case HW_CAP_DCORE1_DMMU3:
11312 *mmu_base = mmDCORE1_HMMU3_MMU_BASE;
11313 break;
11314 case HW_CAP_DCORE2_DMMU0:
11315 *mmu_base = mmDCORE2_HMMU0_MMU_BASE;
11316 break;
11317 case HW_CAP_DCORE2_DMMU1:
11318 *mmu_base = mmDCORE2_HMMU1_MMU_BASE;
11319 break;
11320 case HW_CAP_DCORE2_DMMU2:
11321 *mmu_base = mmDCORE2_HMMU2_MMU_BASE;
11322 break;
11323 case HW_CAP_DCORE2_DMMU3:
11324 *mmu_base = mmDCORE2_HMMU3_MMU_BASE;
11325 break;
11326 case HW_CAP_DCORE3_DMMU0:
11327 *mmu_base = mmDCORE3_HMMU0_MMU_BASE;
11328 break;
11329 case HW_CAP_DCORE3_DMMU1:
11330 *mmu_base = mmDCORE3_HMMU1_MMU_BASE;
11331 break;
11332 case HW_CAP_DCORE3_DMMU2:
11333 *mmu_base = mmDCORE3_HMMU2_MMU_BASE;
11334 break;
11335 case HW_CAP_DCORE3_DMMU3:
11336 *mmu_base = mmDCORE3_HMMU3_MMU_BASE;
11337 break;
11338 case HW_CAP_PMMU:
11339 *mmu_base = mmPMMU_HBW_MMU_BASE;
11340 break;
11341 default:
11342 return -EINVAL;
11345 return 0;
11348 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id)
11350 bool is_pmmu = (mmu_id == HW_CAP_PMMU);
11351 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11352 u32 mmu_base;
11354 if (!(gaudi2->hw_cap_initialized & mmu_id))
11355 return;
11357 if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base))
11358 return;
11360 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, NULL);
11361 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
11364 static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
11366 u32 i, mmu_id, num_of_hmmus = NUM_OF_HMMU_PER_DCORE * NUM_OF_DCORES;
11368 /* check all HMMUs */
11369 for (i = 0 ; i < num_of_hmmus ; i++) {
11370 mmu_id = HW_CAP_DCORE0_DMMU0 << i;
11372 if (mmu_cap_mask & mmu_id)
11373 gaudi2_ack_mmu_error(hdev, mmu_id);
11376 /* check PMMU */
11377 if (mmu_cap_mask & HW_CAP_PMMU)
11378 gaudi2_ack_mmu_error(hdev, HW_CAP_PMMU);
11380 return 0;
11383 static void gaudi2_get_msi_info(__le32 *table)
11385 table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX);
11386 table[CPUCP_EVENT_QUEUE_ERR_MSI_TYPE] = cpu_to_le32(GAUDI2_IRQ_NUM_EQ_ERROR);
11389 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)
11391 switch (pll_idx) {
11392 case HL_GAUDI2_CPU_PLL: return CPU_PLL;
11393 case HL_GAUDI2_PCI_PLL: return PCI_PLL;
11394 case HL_GAUDI2_NIC_PLL: return NIC_PLL;
11395 case HL_GAUDI2_DMA_PLL: return DMA_PLL;
11396 case HL_GAUDI2_MESH_PLL: return MESH_PLL;
11397 case HL_GAUDI2_MME_PLL: return MME_PLL;
11398 case HL_GAUDI2_TPC_PLL: return TPC_PLL;
11399 case HL_GAUDI2_IF_PLL: return IF_PLL;
11400 case HL_GAUDI2_SRAM_PLL: return SRAM_PLL;
11401 case HL_GAUDI2_HBM_PLL: return HBM_PLL;
11402 case HL_GAUDI2_VID_PLL: return VID_PLL;
11403 case HL_GAUDI2_MSS_PLL: return MSS_PLL;
11404 default: return -EINVAL;
11408 static int gaudi2_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map)
11410 /* Not implemented */
11411 return 0;
11414 static int gaudi2_monitor_valid(struct hl_mon_state_dump *mon)
11416 /* Not implemented */
11417 return 0;
11420 static int gaudi2_print_single_monitor(char **buf, size_t *size, size_t *offset,
11421 struct hl_device *hdev, struct hl_mon_state_dump *mon)
11423 /* Not implemented */
11424 return 0;
11428 static int gaudi2_print_fences_single_engine(struct hl_device *hdev, u64 base_offset,
11429 u64 status_base_offset, enum hl_sync_engine_type engine_type,
11430 u32 engine_id, char **buf, size_t *size, size_t *offset)
11432 /* Not implemented */
11433 return 0;
11437 static struct hl_state_dump_specs_funcs gaudi2_state_dump_funcs = {
11438 .monitor_valid = gaudi2_monitor_valid,
11439 .print_single_monitor = gaudi2_print_single_monitor,
11440 .gen_sync_to_engine_map = gaudi2_gen_sync_to_engine_map,
11441 .print_fences_single_engine = gaudi2_print_fences_single_engine,
11444 static void gaudi2_state_dump_init(struct hl_device *hdev)
11446 /* Not implemented */
11447 hdev->state_dump_specs.props = gaudi2_state_dump_specs_props;
11448 hdev->state_dump_specs.funcs = gaudi2_state_dump_funcs;
11451 static u32 gaudi2_get_sob_addr(struct hl_device *hdev, u32 sob_id)
11453 return 0;
11456 static u32 *gaudi2_get_stream_master_qid_arr(void)
11458 return NULL;
11461 static void gaudi2_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
11462 struct attribute_group *dev_vrm_attr_grp)
11464 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
11465 hl_sysfs_add_dev_vrm_attr(hdev, dev_vrm_attr_grp);
11468 static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop,
11469 u32 page_size, u32 *real_page_size, bool is_dram_addr)
11471 struct asic_fixed_properties *prop = &hdev->asic_prop;
11473 /* for host pages the page size must be */
11474 if (!is_dram_addr) {
11475 if (page_size % mmu_prop->page_size)
11476 goto page_size_err;
11478 *real_page_size = mmu_prop->page_size;
11479 return 0;
11482 if ((page_size % prop->dram_page_size) || (prop->dram_page_size > mmu_prop->page_size))
11483 goto page_size_err;
11486 * MMU page size is different from DRAM page size (more precisely, DMMU page is greater
11487 * than DRAM page size).
11488 * for this reason work with the DRAM page size and let the MMU scrambling routine handle
11489 * this mismatch when calculating the address to place in the MMU page table.
11490 * (in that case also make sure that the dram_page_size is not greater than the
11491 * mmu page size)
11493 *real_page_size = prop->dram_page_size;
11495 return 0;
11497 page_size_err:
11498 dev_err(hdev->dev, "page size of 0x%X is not 0x%X aligned, can't map\n",
11499 page_size, mmu_prop->page_size >> 10);
11500 return -EFAULT;
11503 static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data)
11505 return -EOPNOTSUPP;
11508 int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
11510 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11512 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
11513 return 0;
11515 return hl_fw_send_device_activity(hdev, open);
11518 static u64 gaudi2_read_pte(struct hl_device *hdev, u64 addr)
11520 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11521 u64 val;
11523 if (hdev->reset_info.hard_reset_pending)
11524 return U64_MAX;
11526 val = readq(hdev->pcie_bar[DRAM_BAR_ID] + (addr - gaudi2->dram_bar_cur_addr));
11528 return val;
11531 static void gaudi2_write_pte(struct hl_device *hdev, u64 addr, u64 val)
11533 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11535 if (hdev->reset_info.hard_reset_pending)
11536 return;
11538 writeq(val, hdev->pcie_bar[DRAM_BAR_ID] + (addr - gaudi2->dram_bar_cur_addr));
11541 static const struct hl_asic_funcs gaudi2_funcs = {
11542 .early_init = gaudi2_early_init,
11543 .early_fini = gaudi2_early_fini,
11544 .late_init = gaudi2_late_init,
11545 .late_fini = gaudi2_late_fini,
11546 .sw_init = gaudi2_sw_init,
11547 .sw_fini = gaudi2_sw_fini,
11548 .hw_init = gaudi2_hw_init,
11549 .hw_fini = gaudi2_hw_fini,
11550 .halt_engines = gaudi2_halt_engines,
11551 .suspend = gaudi2_suspend,
11552 .resume = gaudi2_resume,
11553 .mmap = gaudi2_mmap,
11554 .ring_doorbell = gaudi2_ring_doorbell,
11555 .pqe_write = gaudi2_pqe_write,
11556 .asic_dma_alloc_coherent = gaudi2_dma_alloc_coherent,
11557 .asic_dma_free_coherent = gaudi2_dma_free_coherent,
11558 .scrub_device_mem = gaudi2_scrub_device_mem,
11559 .scrub_device_dram = gaudi2_scrub_device_dram,
11560 .get_int_queue_base = NULL,
11561 .test_queues = gaudi2_test_queues,
11562 .asic_dma_pool_zalloc = gaudi2_dma_pool_zalloc,
11563 .asic_dma_pool_free = gaudi2_dma_pool_free,
11564 .cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
11565 .cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
11566 .dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
11567 .cs_parser = gaudi2_cs_parser,
11568 .dma_map_sgtable = hl_asic_dma_map_sgtable,
11569 .add_end_of_cb_packets = NULL,
11570 .update_eq_ci = gaudi2_update_eq_ci,
11571 .context_switch = gaudi2_context_switch,
11572 .restore_phase_topology = gaudi2_restore_phase_topology,
11573 .debugfs_read_dma = gaudi2_debugfs_read_dma,
11574 .add_device_attr = gaudi2_add_device_attr,
11575 .handle_eqe = gaudi2_handle_eqe,
11576 .get_events_stat = gaudi2_get_events_stat,
11577 .read_pte = gaudi2_read_pte,
11578 .write_pte = gaudi2_write_pte,
11579 .mmu_invalidate_cache = gaudi2_mmu_invalidate_cache,
11580 .mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range,
11581 .mmu_prefetch_cache_range = NULL,
11582 .send_heartbeat = gaudi2_send_heartbeat,
11583 .debug_coresight = gaudi2_debug_coresight,
11584 .is_device_idle = gaudi2_is_device_idle,
11585 .compute_reset_late_init = gaudi2_compute_reset_late_init,
11586 .hw_queues_lock = gaudi2_hw_queues_lock,
11587 .hw_queues_unlock = gaudi2_hw_queues_unlock,
11588 .get_pci_id = gaudi2_get_pci_id,
11589 .get_eeprom_data = gaudi2_get_eeprom_data,
11590 .get_monitor_dump = gaudi2_get_monitor_dump,
11591 .send_cpu_message = gaudi2_send_cpu_message,
11592 .pci_bars_map = gaudi2_pci_bars_map,
11593 .init_iatu = gaudi2_init_iatu,
11594 .rreg = hl_rreg,
11595 .wreg = hl_wreg,
11596 .halt_coresight = gaudi2_halt_coresight,
11597 .ctx_init = gaudi2_ctx_init,
11598 .ctx_fini = gaudi2_ctx_fini,
11599 .pre_schedule_cs = gaudi2_pre_schedule_cs,
11600 .get_queue_id_for_cq = gaudi2_get_queue_id_for_cq,
11601 .load_firmware_to_device = NULL,
11602 .load_boot_fit_to_device = NULL,
11603 .get_signal_cb_size = gaudi2_get_signal_cb_size,
11604 .get_wait_cb_size = gaudi2_get_wait_cb_size,
11605 .gen_signal_cb = gaudi2_gen_signal_cb,
11606 .gen_wait_cb = gaudi2_gen_wait_cb,
11607 .reset_sob = gaudi2_reset_sob,
11608 .reset_sob_group = gaudi2_reset_sob_group,
11609 .get_device_time = gaudi2_get_device_time,
11610 .pb_print_security_errors = gaudi2_pb_print_security_errors,
11611 .collective_wait_init_cs = gaudi2_collective_wait_init_cs,
11612 .collective_wait_create_jobs = gaudi2_collective_wait_create_jobs,
11613 .get_dec_base_addr = gaudi2_get_dec_base_addr,
11614 .scramble_addr = gaudi2_mmu_scramble_addr,
11615 .descramble_addr = gaudi2_mmu_descramble_addr,
11616 .ack_protection_bits_errors = gaudi2_ack_protection_bits_errors,
11617 .get_hw_block_id = gaudi2_get_hw_block_id,
11618 .hw_block_mmap = gaudi2_block_mmap,
11619 .enable_events_from_fw = gaudi2_enable_events_from_fw,
11620 .ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error,
11621 .get_msi_info = gaudi2_get_msi_info,
11622 .map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx,
11623 .init_firmware_preload_params = gaudi2_init_firmware_preload_params,
11624 .init_firmware_loader = gaudi2_init_firmware_loader,
11625 .init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm,
11626 .state_dump_init = gaudi2_state_dump_init,
11627 .get_sob_addr = &gaudi2_get_sob_addr,
11628 .set_pci_memory_regions = gaudi2_set_pci_memory_regions,
11629 .get_stream_master_qid_arr = gaudi2_get_stream_master_qid_arr,
11630 .check_if_razwi_happened = gaudi2_check_if_razwi_happened,
11631 .mmu_get_real_page_size = gaudi2_mmu_get_real_page_size,
11632 .access_dev_mem = hl_access_dev_mem,
11633 .set_dram_bar_base = gaudi2_set_hbm_bar_base,
11634 .set_engine_cores = gaudi2_set_engine_cores,
11635 .set_engines = gaudi2_set_engines,
11636 .send_device_activity = gaudi2_send_device_activity,
11637 .set_dram_properties = gaudi2_set_dram_properties,
11638 .set_binning_masks = gaudi2_set_binning_masks,
11641 void gaudi2_set_asic_funcs(struct hl_device *hdev)
11643 hdev->asic_funcs = &gaudi2_funcs;