1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2022 HabanaLabs, Ltd.
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
24 * Gaudi security scheme:
26 * 1. Host is protected by:
30 * 2. DDR is protected by:
31 * - Range registers (protect the first 512MB)
33 * 3. Configuration is protected by:
37 * MMU is always enabled.
39 * QMAN DMA channels 0,1 (PCI DMAN):
40 * - DMA is not secured.
41 * - PQ and CQ are secured.
42 * - CP is secured: The driver needs to parse CB but WREG should be allowed
43 * because of TDMA (tensor DMA). Hence, WREG is always not
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
51 * The current use cases for the driver to use the DMA are:
52 * - Clear SRAM on context switch (happens on context switch when device is
54 * - MMU page tables area clear (happens on init)
56 * QMAN DMA 2-7, TPC, MME, NIC:
57 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
66 MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE
);
67 MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE
);
68 MODULE_FIRMWARE(GAUDI_TPC_FW_FILE
);
70 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
72 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
73 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
74 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
75 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
77 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
78 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
79 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
80 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
81 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
82 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
83 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */
84 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
85 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */
87 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
89 #define GAUDI_MAX_STRING_LEN 20
91 #define GAUDI_CB_POOL_CB_CNT 512
92 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
94 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
96 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
98 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16
100 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
102 #define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */
104 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
106 #define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010")
108 #define MONITOR_SOB_STRING_SIZE 256
110 static u32 gaudi_stream_master
[GAUDI_STREAM_MASTER_ARR_SIZE
] = {
111 GAUDI_QUEUE_ID_DMA_0_0
,
112 GAUDI_QUEUE_ID_DMA_0_1
,
113 GAUDI_QUEUE_ID_DMA_0_2
,
114 GAUDI_QUEUE_ID_DMA_0_3
,
115 GAUDI_QUEUE_ID_DMA_1_0
,
116 GAUDI_QUEUE_ID_DMA_1_1
,
117 GAUDI_QUEUE_ID_DMA_1_2
,
118 GAUDI_QUEUE_ID_DMA_1_3
121 static const u8 gaudi_dma_assignment
[GAUDI_DMA_MAX
] = {
122 [GAUDI_PCI_DMA_1
] = GAUDI_ENGINE_ID_DMA_0
,
123 [GAUDI_PCI_DMA_2
] = GAUDI_ENGINE_ID_DMA_1
,
124 [GAUDI_HBM_DMA_1
] = GAUDI_ENGINE_ID_DMA_2
,
125 [GAUDI_HBM_DMA_2
] = GAUDI_ENGINE_ID_DMA_3
,
126 [GAUDI_HBM_DMA_3
] = GAUDI_ENGINE_ID_DMA_4
,
127 [GAUDI_HBM_DMA_4
] = GAUDI_ENGINE_ID_DMA_5
,
128 [GAUDI_HBM_DMA_5
] = GAUDI_ENGINE_ID_DMA_6
,
129 [GAUDI_HBM_DMA_6
] = GAUDI_ENGINE_ID_DMA_7
132 static const u8 gaudi_cq_assignment
[NUMBER_OF_CMPLT_QUEUES
] = {
133 [0] = GAUDI_QUEUE_ID_DMA_0_0
,
134 [1] = GAUDI_QUEUE_ID_DMA_0_1
,
135 [2] = GAUDI_QUEUE_ID_DMA_0_2
,
136 [3] = GAUDI_QUEUE_ID_DMA_0_3
,
137 [4] = GAUDI_QUEUE_ID_DMA_1_0
,
138 [5] = GAUDI_QUEUE_ID_DMA_1_1
,
139 [6] = GAUDI_QUEUE_ID_DMA_1_2
,
140 [7] = GAUDI_QUEUE_ID_DMA_1_3
,
143 static const u16 gaudi_packet_sizes
[MAX_PACKET_ID
] = {
144 [PACKET_WREG_32
] = sizeof(struct packet_wreg32
),
145 [PACKET_WREG_BULK
] = sizeof(struct packet_wreg_bulk
),
146 [PACKET_MSG_LONG
] = sizeof(struct packet_msg_long
),
147 [PACKET_MSG_SHORT
] = sizeof(struct packet_msg_short
),
148 [PACKET_CP_DMA
] = sizeof(struct packet_cp_dma
),
149 [PACKET_REPEAT
] = sizeof(struct packet_repeat
),
150 [PACKET_MSG_PROT
] = sizeof(struct packet_msg_prot
),
151 [PACKET_FENCE
] = sizeof(struct packet_fence
),
152 [PACKET_LIN_DMA
] = sizeof(struct packet_lin_dma
),
153 [PACKET_NOP
] = sizeof(struct packet_nop
),
154 [PACKET_STOP
] = sizeof(struct packet_stop
),
155 [PACKET_ARB_POINT
] = sizeof(struct packet_arb_point
),
156 [PACKET_WAIT
] = sizeof(struct packet_wait
),
157 [PACKET_LOAD_AND_EXE
] = sizeof(struct packet_load_and_exe
)
160 static inline bool validate_packet_id(enum packet_id id
)
164 case PACKET_WREG_BULK
:
165 case PACKET_MSG_LONG
:
166 case PACKET_MSG_SHORT
:
169 case PACKET_MSG_PROT
:
174 case PACKET_ARB_POINT
:
176 case PACKET_LOAD_AND_EXE
:
183 static const char * const
184 gaudi_tpc_interrupts_cause
[GAUDI_NUM_OF_TPC_INTR_CAUSE
] = {
185 "tpc_address_exceed_slm",
187 "tpc_spu_mac_overflow",
188 "tpc_spu_addsub_overflow",
189 "tpc_spu_abs_overflow",
190 "tpc_spu_fp_dst_nan_inf",
191 "tpc_spu_fp_dst_denorm",
192 "tpc_vpu_mac_overflow",
193 "tpc_vpu_addsub_overflow",
194 "tpc_vpu_abs_overflow",
195 "tpc_vpu_fp_dst_nan_inf",
196 "tpc_vpu_fp_dst_denorm",
198 "tpc_illegal_instruction",
199 "tpc_pc_wrap_around",
207 static const char * const
208 gaudi_qman_error_cause
[GAUDI_NUM_OF_QM_ERR_CAUSE
] = {
212 "CP error due to undefined OPCODE",
213 "CP encountered STOP OPCODE",
215 "CP WRREG32 or WRBULK returned error",
217 "FENCE 0 inc over max value and clipped",
218 "FENCE 1 inc over max value and clipped",
219 "FENCE 2 inc over max value and clipped",
220 "FENCE 3 inc over max value and clipped",
221 "FENCE 0 dec under min value and clipped",
222 "FENCE 1 dec under min value and clipped",
223 "FENCE 2 dec under min value and clipped",
224 "FENCE 3 dec under min value and clipped"
227 static const char * const
228 gaudi_qman_arb_error_cause
[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE
] = {
229 "Choice push while full error",
230 "Choice Q watchdog error",
231 "MSG AXI LBW returned with error"
234 static enum hl_queue_type gaudi_queue_type
[GAUDI_QUEUE_ID_SIZE
] = {
235 QUEUE_TYPE_EXT
, /* GAUDI_QUEUE_ID_DMA_0_0 */
236 QUEUE_TYPE_EXT
, /* GAUDI_QUEUE_ID_DMA_0_1 */
237 QUEUE_TYPE_EXT
, /* GAUDI_QUEUE_ID_DMA_0_2 */
238 QUEUE_TYPE_EXT
, /* GAUDI_QUEUE_ID_DMA_0_3 */
239 QUEUE_TYPE_EXT
, /* GAUDI_QUEUE_ID_DMA_1_0 */
240 QUEUE_TYPE_EXT
, /* GAUDI_QUEUE_ID_DMA_1_1 */
241 QUEUE_TYPE_EXT
, /* GAUDI_QUEUE_ID_DMA_1_2 */
242 QUEUE_TYPE_EXT
, /* GAUDI_QUEUE_ID_DMA_1_3 */
243 QUEUE_TYPE_CPU
, /* GAUDI_QUEUE_ID_CPU_PQ */
244 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_2_0 */
245 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_2_1 */
246 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_2_2 */
247 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_2_3 */
248 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_3_0 */
249 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_3_1 */
250 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_3_2 */
251 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_3_3 */
252 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_4_0 */
253 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_4_1 */
254 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_4_2 */
255 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_4_3 */
256 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_5_0 */
257 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_5_1 */
258 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_5_2 */
259 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_5_3 */
260 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_6_0 */
261 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_6_1 */
262 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_6_2 */
263 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_6_3 */
264 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_7_0 */
265 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_7_1 */
266 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_7_2 */
267 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_7_3 */
268 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_MME_0_0 */
269 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_MME_0_1 */
270 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_MME_0_2 */
271 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_MME_0_3 */
272 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_MME_1_0 */
273 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_MME_1_1 */
274 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_MME_1_2 */
275 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_MME_1_3 */
276 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_0_0 */
277 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_0_1 */
278 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_0_2 */
279 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_0_3 */
280 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_1_0 */
281 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_1_1 */
282 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_1_2 */
283 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_1_3 */
284 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_2_0 */
285 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_2_1 */
286 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_2_2 */
287 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_2_3 */
288 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_3_0 */
289 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_3_1 */
290 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_3_2 */
291 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_3_3 */
292 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_4_0 */
293 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_4_1 */
294 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_4_2 */
295 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_4_3 */
296 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_5_0 */
297 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_5_1 */
298 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_5_2 */
299 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_5_3 */
300 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_6_0 */
301 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_6_1 */
302 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_6_2 */
303 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_6_3 */
304 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_7_0 */
305 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_7_1 */
306 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_7_2 */
307 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_7_3 */
308 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_0_0 */
309 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_0_1 */
310 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_0_2 */
311 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_0_3 */
312 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_1_0 */
313 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_1_1 */
314 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_1_2 */
315 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_1_3 */
316 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_2_0 */
317 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_2_1 */
318 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_2_2 */
319 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_2_3 */
320 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_3_0 */
321 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_3_1 */
322 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_3_2 */
323 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_3_3 */
324 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_4_0 */
325 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_4_1 */
326 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_4_2 */
327 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_4_3 */
328 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_5_0 */
329 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_5_1 */
330 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_5_2 */
331 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_5_3 */
332 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_6_0 */
333 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_6_1 */
334 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_6_2 */
335 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_6_3 */
336 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_7_0 */
337 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_7_1 */
338 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_7_2 */
339 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_7_3 */
340 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_8_0 */
341 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_8_1 */
342 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_8_2 */
343 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_8_3 */
344 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_9_0 */
345 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_9_1 */
346 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_9_2 */
347 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_9_3 */
350 static struct hl_hw_obj_name_entry gaudi_so_id_to_str
[] = {
351 { .id
= 0, .name
= "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
352 { .id
= 1, .name
= "SYNC_OBJ_DMA_UP_FEEDBACK" },
353 { .id
= 2, .name
= "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
354 { .id
= 3, .name
= "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
355 { .id
= 4, .name
= "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
356 { .id
= 5, .name
= "SYNC_OBJ_HOST_DRAM_DONE" },
357 { .id
= 6, .name
= "SYNC_OBJ_DBG_CTR_DEPRECATED" },
358 { .id
= 7, .name
= "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
359 { .id
= 8, .name
= "SYNC_OBJ_ENGINE_SEM_MME_0" },
360 { .id
= 9, .name
= "SYNC_OBJ_ENGINE_SEM_MME_1" },
361 { .id
= 10, .name
= "SYNC_OBJ_ENGINE_SEM_TPC_0" },
362 { .id
= 11, .name
= "SYNC_OBJ_ENGINE_SEM_TPC_1" },
363 { .id
= 12, .name
= "SYNC_OBJ_ENGINE_SEM_TPC_2" },
364 { .id
= 13, .name
= "SYNC_OBJ_ENGINE_SEM_TPC_3" },
365 { .id
= 14, .name
= "SYNC_OBJ_ENGINE_SEM_TPC_4" },
366 { .id
= 15, .name
= "SYNC_OBJ_ENGINE_SEM_TPC_5" },
367 { .id
= 16, .name
= "SYNC_OBJ_ENGINE_SEM_TPC_6" },
368 { .id
= 17, .name
= "SYNC_OBJ_ENGINE_SEM_TPC_7" },
369 { .id
= 18, .name
= "SYNC_OBJ_ENGINE_SEM_DMA_1" },
370 { .id
= 19, .name
= "SYNC_OBJ_ENGINE_SEM_DMA_2" },
371 { .id
= 20, .name
= "SYNC_OBJ_ENGINE_SEM_DMA_3" },
372 { .id
= 21, .name
= "SYNC_OBJ_ENGINE_SEM_DMA_4" },
373 { .id
= 22, .name
= "SYNC_OBJ_ENGINE_SEM_DMA_5" },
374 { .id
= 23, .name
= "SYNC_OBJ_ENGINE_SEM_DMA_6" },
375 { .id
= 24, .name
= "SYNC_OBJ_ENGINE_SEM_DMA_7" },
376 { .id
= 25, .name
= "SYNC_OBJ_DBG_CTR_0" },
377 { .id
= 26, .name
= "SYNC_OBJ_DBG_CTR_1" },
380 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str
[] = {
381 { .id
= 200, .name
= "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
382 { .id
= 201, .name
= "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
383 { .id
= 203, .name
= "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
384 { .id
= 204, .name
= "MON_OBJ_TPC_0_CLK_GATE" },
385 { .id
= 205, .name
= "MON_OBJ_TPC_1_CLK_GATE" },
386 { .id
= 206, .name
= "MON_OBJ_TPC_2_CLK_GATE" },
387 { .id
= 207, .name
= "MON_OBJ_TPC_3_CLK_GATE" },
388 { .id
= 208, .name
= "MON_OBJ_TPC_4_CLK_GATE" },
389 { .id
= 209, .name
= "MON_OBJ_TPC_5_CLK_GATE" },
390 { .id
= 210, .name
= "MON_OBJ_TPC_6_CLK_GATE" },
391 { .id
= 211, .name
= "MON_OBJ_TPC_7_CLK_GATE" },
394 static s64 gaudi_state_dump_specs_props
[] = {
395 [SP_SYNC_OBJ_BASE_ADDR
] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
,
396 [SP_NEXT_SYNC_OBJ_ADDR
] = NEXT_SYNC_OBJ_ADDR_INTERVAL
,
397 [SP_SYNC_OBJ_AMOUNT
] = NUM_OF_SOB_IN_BLOCK
,
398 [SP_MON_OBJ_WR_ADDR_LOW
] =
399 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
,
400 [SP_MON_OBJ_WR_ADDR_HIGH
] =
401 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0
,
402 [SP_MON_OBJ_WR_DATA
] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0
,
403 [SP_MON_OBJ_ARM_DATA
] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0
,
404 [SP_MON_OBJ_STATUS
] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0
,
405 [SP_MONITORS_AMOUNT
] = NUM_OF_MONITORS_IN_BLOCK
,
406 [SP_TPC0_CMDQ
] = mmTPC0_QM_GLBL_CFG0
,
407 [SP_TPC0_CFG_SO
] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR
,
408 [SP_NEXT_TPC
] = mmTPC1_QM_GLBL_CFG0
- mmTPC0_QM_GLBL_CFG0
,
409 [SP_MME_CMDQ
] = mmMME0_QM_GLBL_CFG0
,
410 [SP_MME_CFG_SO
] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL
,
411 [SP_NEXT_MME
] = mmMME2_QM_GLBL_CFG0
- mmMME0_QM_GLBL_CFG0
,
412 [SP_DMA_CMDQ
] = mmDMA0_QM_GLBL_CFG0
,
413 [SP_DMA_CFG_SO
] = mmDMA0_CORE_WR_COMP_ADDR_LO
,
414 [SP_DMA_QUEUES_OFFSET
] = mmDMA1_QM_GLBL_CFG0
- mmDMA0_QM_GLBL_CFG0
,
415 [SP_NUM_OF_MME_ENGINES
] = NUM_OF_MME_ENGINES
,
416 [SP_SUB_MME_ENG_NUM
] = NUM_OF_MME_SUB_ENGINES
,
417 [SP_NUM_OF_DMA_ENGINES
] = NUM_OF_DMA_ENGINES
,
418 [SP_NUM_OF_TPC_ENGINES
] = NUM_OF_TPC_ENGINES
,
419 [SP_ENGINE_NUM_OF_QUEUES
] = NUM_OF_QUEUES
,
420 [SP_ENGINE_NUM_OF_STREAMS
] = NUM_OF_STREAMS
,
421 [SP_ENGINE_NUM_OF_FENCES
] = NUM_OF_FENCES
,
422 [SP_FENCE0_CNT_OFFSET
] =
423 mmDMA0_QM_CP_FENCE0_CNT_0
- mmDMA0_QM_GLBL_CFG0
,
424 [SP_FENCE0_RDATA_OFFSET
] =
425 mmDMA0_QM_CP_FENCE0_RDATA_0
- mmDMA0_QM_GLBL_CFG0
,
426 [SP_CP_STS_OFFSET
] = mmDMA0_QM_CP_STS_0
- mmDMA0_QM_GLBL_CFG0
,
430 static const int gaudi_queue_id_to_engine_id
[] = {
431 [GAUDI_QUEUE_ID_DMA_0_0
...GAUDI_QUEUE_ID_DMA_0_3
] = GAUDI_ENGINE_ID_DMA_0
,
432 [GAUDI_QUEUE_ID_DMA_1_0
...GAUDI_QUEUE_ID_DMA_1_3
] = GAUDI_ENGINE_ID_DMA_1
,
433 [GAUDI_QUEUE_ID_CPU_PQ
] = GAUDI_ENGINE_ID_SIZE
,
434 [GAUDI_QUEUE_ID_DMA_2_0
...GAUDI_QUEUE_ID_DMA_2_3
] = GAUDI_ENGINE_ID_DMA_2
,
435 [GAUDI_QUEUE_ID_DMA_3_0
...GAUDI_QUEUE_ID_DMA_3_3
] = GAUDI_ENGINE_ID_DMA_3
,
436 [GAUDI_QUEUE_ID_DMA_4_0
...GAUDI_QUEUE_ID_DMA_4_3
] = GAUDI_ENGINE_ID_DMA_4
,
437 [GAUDI_QUEUE_ID_DMA_5_0
...GAUDI_QUEUE_ID_DMA_5_3
] = GAUDI_ENGINE_ID_DMA_5
,
438 [GAUDI_QUEUE_ID_DMA_6_0
...GAUDI_QUEUE_ID_DMA_6_3
] = GAUDI_ENGINE_ID_DMA_6
,
439 [GAUDI_QUEUE_ID_DMA_7_0
...GAUDI_QUEUE_ID_DMA_7_3
] = GAUDI_ENGINE_ID_DMA_7
,
440 [GAUDI_QUEUE_ID_MME_0_0
...GAUDI_QUEUE_ID_MME_0_3
] = GAUDI_ENGINE_ID_MME_0
,
441 [GAUDI_QUEUE_ID_MME_1_0
...GAUDI_QUEUE_ID_MME_1_3
] = GAUDI_ENGINE_ID_MME_2
,
442 [GAUDI_QUEUE_ID_TPC_0_0
...GAUDI_QUEUE_ID_TPC_0_3
] = GAUDI_ENGINE_ID_TPC_0
,
443 [GAUDI_QUEUE_ID_TPC_1_0
...GAUDI_QUEUE_ID_TPC_1_3
] = GAUDI_ENGINE_ID_TPC_1
,
444 [GAUDI_QUEUE_ID_TPC_2_0
...GAUDI_QUEUE_ID_TPC_2_3
] = GAUDI_ENGINE_ID_TPC_2
,
445 [GAUDI_QUEUE_ID_TPC_3_0
...GAUDI_QUEUE_ID_TPC_3_3
] = GAUDI_ENGINE_ID_TPC_3
,
446 [GAUDI_QUEUE_ID_TPC_4_0
...GAUDI_QUEUE_ID_TPC_4_3
] = GAUDI_ENGINE_ID_TPC_4
,
447 [GAUDI_QUEUE_ID_TPC_5_0
...GAUDI_QUEUE_ID_TPC_5_3
] = GAUDI_ENGINE_ID_TPC_5
,
448 [GAUDI_QUEUE_ID_TPC_6_0
...GAUDI_QUEUE_ID_TPC_6_3
] = GAUDI_ENGINE_ID_TPC_6
,
449 [GAUDI_QUEUE_ID_TPC_7_0
...GAUDI_QUEUE_ID_TPC_7_3
] = GAUDI_ENGINE_ID_TPC_7
,
450 [GAUDI_QUEUE_ID_NIC_0_0
...GAUDI_QUEUE_ID_NIC_0_3
] = GAUDI_ENGINE_ID_NIC_0
,
451 [GAUDI_QUEUE_ID_NIC_1_0
...GAUDI_QUEUE_ID_NIC_1_3
] = GAUDI_ENGINE_ID_NIC_1
,
452 [GAUDI_QUEUE_ID_NIC_2_0
...GAUDI_QUEUE_ID_NIC_2_3
] = GAUDI_ENGINE_ID_NIC_2
,
453 [GAUDI_QUEUE_ID_NIC_3_0
...GAUDI_QUEUE_ID_NIC_3_3
] = GAUDI_ENGINE_ID_NIC_3
,
454 [GAUDI_QUEUE_ID_NIC_4_0
...GAUDI_QUEUE_ID_NIC_4_3
] = GAUDI_ENGINE_ID_NIC_4
,
455 [GAUDI_QUEUE_ID_NIC_5_0
...GAUDI_QUEUE_ID_NIC_5_3
] = GAUDI_ENGINE_ID_NIC_5
,
456 [GAUDI_QUEUE_ID_NIC_6_0
...GAUDI_QUEUE_ID_NIC_6_3
] = GAUDI_ENGINE_ID_NIC_6
,
457 [GAUDI_QUEUE_ID_NIC_7_0
...GAUDI_QUEUE_ID_NIC_7_3
] = GAUDI_ENGINE_ID_NIC_7
,
458 [GAUDI_QUEUE_ID_NIC_8_0
...GAUDI_QUEUE_ID_NIC_8_3
] = GAUDI_ENGINE_ID_NIC_8
,
459 [GAUDI_QUEUE_ID_NIC_9_0
...GAUDI_QUEUE_ID_NIC_9_3
] = GAUDI_ENGINE_ID_NIC_9
,
462 /* The order here is opposite to the order of the indexing in the h/w.
463 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
465 static const char * const gaudi_sync_manager_names
[] = {
473 struct ecc_info_extract_params
{
479 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device
*hdev
, u32 asid
,
481 static int gaudi_send_job_on_qman0(struct hl_device
*hdev
,
482 struct hl_cs_job
*job
);
483 static int gaudi_memset_device_memory(struct hl_device
*hdev
, u64 addr
,
485 static int gaudi_memset_registers(struct hl_device
*hdev
, u64 reg_base
,
486 u32 num_regs
, u32 val
);
487 static int gaudi_run_tpc_kernel(struct hl_device
*hdev
, u64 tpc_kernel
,
489 static int gaudi_mmu_clear_pgt_range(struct hl_device
*hdev
);
490 static int gaudi_cpucp_info_get(struct hl_device
*hdev
);
491 static void gaudi_disable_clock_gating(struct hl_device
*hdev
);
492 static void gaudi_mmu_prepare(struct hl_device
*hdev
, u32 asid
);
493 static u32
gaudi_gen_signal_cb(struct hl_device
*hdev
, void *data
, u16 sob_id
,
495 static u32
gaudi_gen_wait_cb(struct hl_device
*hdev
,
496 struct hl_gen_wait_properties
*prop
);
497 static inline enum hl_collective_mode
498 get_collective_mode(struct hl_device
*hdev
, u32 queue_id
)
500 if (gaudi_queue_type
[queue_id
] == QUEUE_TYPE_EXT
)
501 return HL_COLLECTIVE_MASTER
;
503 if (queue_id
>= GAUDI_QUEUE_ID_DMA_5_0
&&
504 queue_id
<= GAUDI_QUEUE_ID_DMA_5_3
)
505 return HL_COLLECTIVE_SLAVE
;
507 if (queue_id
>= GAUDI_QUEUE_ID_TPC_7_0
&&
508 queue_id
<= GAUDI_QUEUE_ID_TPC_7_3
)
509 return HL_COLLECTIVE_SLAVE
;
511 if (queue_id
>= GAUDI_QUEUE_ID_NIC_0_0
&&
512 queue_id
<= GAUDI_QUEUE_ID_NIC_9_3
)
513 return HL_COLLECTIVE_SLAVE
;
515 return HL_COLLECTIVE_NOT_SUPPORTED
;
518 static inline void set_default_power_values(struct hl_device
*hdev
)
520 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
522 if (hdev
->card_type
== cpucp_card_type_pmc
) {
523 prop
->max_power_default
= MAX_POWER_DEFAULT_PMC
;
525 if (prop
->fw_security_enabled
)
526 prop
->dc_power_default
= DC_POWER_DEFAULT_PMC_SEC
;
528 prop
->dc_power_default
= DC_POWER_DEFAULT_PMC
;
530 prop
->max_power_default
= MAX_POWER_DEFAULT_PCI
;
531 prop
->dc_power_default
= DC_POWER_DEFAULT_PCI
;
535 static int gaudi_set_fixed_properties(struct hl_device
*hdev
)
537 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
538 u32 num_sync_stream_queues
= 0;
541 prop
->max_queues
= GAUDI_QUEUE_ID_SIZE
;
542 prop
->hw_queues_props
= kcalloc(prop
->max_queues
,
543 sizeof(struct hw_queue_properties
),
546 if (!prop
->hw_queues_props
)
549 for (i
= 0 ; i
< prop
->max_queues
; i
++) {
550 if (gaudi_queue_type
[i
] == QUEUE_TYPE_EXT
) {
551 prop
->hw_queues_props
[i
].type
= QUEUE_TYPE_EXT
;
552 prop
->hw_queues_props
[i
].driver_only
= 0;
553 prop
->hw_queues_props
[i
].supports_sync_stream
= 1;
554 prop
->hw_queues_props
[i
].cb_alloc_flags
=
556 num_sync_stream_queues
++;
557 } else if (gaudi_queue_type
[i
] == QUEUE_TYPE_CPU
) {
558 prop
->hw_queues_props
[i
].type
= QUEUE_TYPE_CPU
;
559 prop
->hw_queues_props
[i
].driver_only
= 1;
560 prop
->hw_queues_props
[i
].supports_sync_stream
= 0;
561 prop
->hw_queues_props
[i
].cb_alloc_flags
=
563 } else if (gaudi_queue_type
[i
] == QUEUE_TYPE_INT
) {
564 prop
->hw_queues_props
[i
].type
= QUEUE_TYPE_INT
;
565 prop
->hw_queues_props
[i
].driver_only
= 0;
566 prop
->hw_queues_props
[i
].supports_sync_stream
= 0;
567 prop
->hw_queues_props
[i
].cb_alloc_flags
=
571 prop
->hw_queues_props
[i
].collective_mode
=
572 get_collective_mode(hdev
, i
);
575 prop
->cache_line_size
= DEVICE_CACHE_LINE_SIZE
;
576 prop
->cfg_base_address
= CFG_BASE
;
577 prop
->device_dma_offset_for_host_access
= HOST_PHYS_BASE
;
578 prop
->host_base_address
= HOST_PHYS_BASE
;
579 prop
->host_end_address
= prop
->host_base_address
+ HOST_PHYS_SIZE
;
580 prop
->completion_queues_count
= NUMBER_OF_CMPLT_QUEUES
;
581 prop
->completion_mode
= HL_COMPLETION_MODE_JOB
;
582 prop
->collective_first_sob
= 0;
583 prop
->collective_first_mon
= 0;
585 /* 2 SOBs per internal queue stream are reserved for collective */
586 prop
->sync_stream_first_sob
=
587 ALIGN(NUMBER_OF_SOBS_IN_GRP
, HL_MAX_SOBS_PER_MONITOR
)
588 * QMAN_STREAMS
* HL_RSVD_SOBS
;
590 /* 1 monitor per internal queue stream are reserved for collective
591 * 2 monitors per external queue stream are reserved for collective
593 prop
->sync_stream_first_mon
=
594 (NUMBER_OF_COLLECTIVE_QUEUES
* QMAN_STREAMS
) +
595 (NUMBER_OF_EXT_HW_QUEUES
* 2);
597 prop
->dram_base_address
= DRAM_PHYS_BASE
;
598 prop
->dram_size
= GAUDI_HBM_SIZE_32GB
;
599 prop
->dram_end_address
= prop
->dram_base_address
+ prop
->dram_size
;
600 prop
->dram_user_base_address
= DRAM_BASE_ADDR_USER
;
602 prop
->sram_base_address
= SRAM_BASE_ADDR
;
603 prop
->sram_size
= SRAM_SIZE
;
604 prop
->sram_end_address
= prop
->sram_base_address
+ prop
->sram_size
;
605 prop
->sram_user_base_address
=
606 prop
->sram_base_address
+ SRAM_USER_BASE_OFFSET
;
608 prop
->mmu_cache_mng_addr
= MMU_CACHE_MNG_ADDR
;
609 prop
->mmu_cache_mng_size
= MMU_CACHE_MNG_SIZE
;
611 prop
->mmu_pgt_addr
= MMU_PAGE_TABLES_ADDR
;
613 prop
->mmu_pgt_size
= 0x800000; /* 8MB */
615 prop
->mmu_pgt_size
= MMU_PAGE_TABLES_SIZE
;
616 prop
->mmu_pte_size
= HL_PTE_SIZE
;
617 prop
->dram_page_size
= PAGE_SIZE_2MB
;
618 prop
->device_mem_alloc_default_page_size
= prop
->dram_page_size
;
619 prop
->dram_supports_virtual_memory
= false;
621 prop
->pmmu
.hop_shifts
[MMU_HOP0
] = MMU_V1_1_HOP0_SHIFT
;
622 prop
->pmmu
.hop_shifts
[MMU_HOP1
] = MMU_V1_1_HOP1_SHIFT
;
623 prop
->pmmu
.hop_shifts
[MMU_HOP2
] = MMU_V1_1_HOP2_SHIFT
;
624 prop
->pmmu
.hop_shifts
[MMU_HOP3
] = MMU_V1_1_HOP3_SHIFT
;
625 prop
->pmmu
.hop_shifts
[MMU_HOP4
] = MMU_V1_1_HOP4_SHIFT
;
626 prop
->pmmu
.hop_masks
[MMU_HOP0
] = MMU_V1_1_HOP0_MASK
;
627 prop
->pmmu
.hop_masks
[MMU_HOP1
] = MMU_V1_1_HOP1_MASK
;
628 prop
->pmmu
.hop_masks
[MMU_HOP2
] = MMU_V1_1_HOP2_MASK
;
629 prop
->pmmu
.hop_masks
[MMU_HOP3
] = MMU_V1_1_HOP3_MASK
;
630 prop
->pmmu
.hop_masks
[MMU_HOP4
] = MMU_V1_1_HOP4_MASK
;
631 prop
->pmmu
.start_addr
= VA_HOST_SPACE_START
;
632 prop
->pmmu
.end_addr
=
633 (VA_HOST_SPACE_START
+ VA_HOST_SPACE_SIZE
/ 2) - 1;
634 prop
->pmmu
.page_size
= PAGE_SIZE_4KB
;
635 prop
->pmmu
.num_hops
= MMU_ARCH_5_HOPS
;
636 prop
->pmmu
.last_mask
= LAST_MASK
;
637 /* TODO: will be duplicated until implementing per-MMU props */
638 prop
->pmmu
.hop_table_size
= HOP_TABLE_SIZE_512_PTE
;
639 prop
->pmmu
.hop0_tables_total_size
= HOP0_512_PTE_TABLES_TOTAL_SIZE
;
641 /* PMMU and HPMMU are the same except of page size */
642 memcpy(&prop
->pmmu_huge
, &prop
->pmmu
, sizeof(prop
->pmmu
));
643 prop
->pmmu_huge
.page_size
= PAGE_SIZE_2MB
;
645 /* shifts and masks are the same in PMMU and DMMU */
646 memcpy(&prop
->dmmu
, &prop
->pmmu
, sizeof(prop
->pmmu
));
647 prop
->dmmu
.start_addr
= (VA_HOST_SPACE_START
+ VA_HOST_SPACE_SIZE
/ 2);
648 prop
->dmmu
.end_addr
= VA_HOST_SPACE_END
;
649 prop
->dmmu
.page_size
= PAGE_SIZE_2MB
;
650 prop
->dmmu
.pgt_size
= prop
->mmu_pgt_size
;
652 prop
->cfg_size
= CFG_SIZE
;
653 prop
->max_asid
= MAX_ASID
;
654 prop
->num_of_events
= GAUDI_EVENT_SIZE
;
655 prop
->max_num_of_engines
= GAUDI_ENGINE_ID_SIZE
;
656 prop
->tpc_enabled_mask
= TPC_ENABLED_MASK
;
658 set_default_power_values(hdev
);
660 prop
->cb_pool_cb_cnt
= GAUDI_CB_POOL_CB_CNT
;
661 prop
->cb_pool_cb_size
= GAUDI_CB_POOL_CB_SIZE
;
663 prop
->pcie_dbi_base_address
= mmPCIE_DBI_BASE
;
664 prop
->pcie_aux_dbi_reg_addr
= CFG_BASE
+ mmPCIE_AUX_DBI
;
666 strscpy_pad(prop
->cpucp_info
.card_name
, GAUDI_DEFAULT_CARD_NAME
,
669 prop
->max_pending_cs
= GAUDI_MAX_PENDING_CS
;
671 prop
->first_available_user_sob
[HL_GAUDI_WS_DCORE
] =
672 prop
->sync_stream_first_sob
+
673 (num_sync_stream_queues
* HL_RSVD_SOBS
);
674 prop
->first_available_user_mon
[HL_GAUDI_WS_DCORE
] =
675 prop
->sync_stream_first_mon
+
676 (num_sync_stream_queues
* HL_RSVD_MONS
);
678 prop
->first_available_user_interrupt
= USHRT_MAX
;
679 prop
->tpc_interrupt_id
= USHRT_MAX
;
682 prop
->eq_interrupt_id
= 0;
684 for (i
= 0 ; i
< HL_MAX_DCORES
; i
++)
685 prop
->first_available_cq
[i
] = USHRT_MAX
;
687 prop
->fw_cpu_boot_dev_sts0_valid
= false;
688 prop
->fw_cpu_boot_dev_sts1_valid
= false;
689 prop
->hard_reset_done_by_fw
= false;
690 prop
->gic_interrupts_enable
= true;
692 prop
->server_type
= HL_SERVER_TYPE_UNKNOWN
;
694 prop
->clk_pll_index
= HL_GAUDI_MME_PLL
;
695 prop
->max_freq_value
= GAUDI_MAX_CLK_FREQ
;
697 prop
->use_get_power_for_reset_history
= true;
699 prop
->configurable_stop_on_err
= true;
701 prop
->set_max_power_on_device_init
= true;
705 prop
->hbw_flush_reg
= mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL
;
710 static int gaudi_pci_bars_map(struct hl_device
*hdev
)
712 static const char * const name
[] = {"SRAM", "CFG", "HBM"};
713 bool is_wc
[3] = {false, false, true};
716 rc
= hl_pci_bars_map(hdev
, name
, is_wc
);
720 hdev
->rmmio
= hdev
->pcie_bar
[CFG_BAR_ID
] +
721 (CFG_BASE
- SPI_FLASH_BASE_ADDR
);
726 static u64
gaudi_set_hbm_bar_base(struct hl_device
*hdev
, u64 addr
)
728 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
729 struct hl_inbound_pci_region pci_region
;
733 if ((gaudi
) && (gaudi
->hbm_bar_cur_addr
== addr
))
736 if (hdev
->asic_prop
.iatu_done_by_fw
)
739 /* Inbound Region 2 - Bar 4 - Point to HBM */
740 pci_region
.mode
= PCI_BAR_MATCH_MODE
;
741 pci_region
.bar
= HBM_BAR_ID
;
742 pci_region
.addr
= addr
;
743 rc
= hl_pci_set_inbound_region(hdev
, 2, &pci_region
);
748 old_addr
= gaudi
->hbm_bar_cur_addr
;
749 gaudi
->hbm_bar_cur_addr
= addr
;
755 static int gaudi_init_iatu(struct hl_device
*hdev
)
757 struct hl_inbound_pci_region inbound_region
;
758 struct hl_outbound_pci_region outbound_region
;
761 if (hdev
->asic_prop
.iatu_done_by_fw
)
764 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
765 inbound_region
.mode
= PCI_BAR_MATCH_MODE
;
766 inbound_region
.bar
= SRAM_BAR_ID
;
767 inbound_region
.addr
= SRAM_BASE_ADDR
;
768 rc
= hl_pci_set_inbound_region(hdev
, 0, &inbound_region
);
772 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
773 inbound_region
.mode
= PCI_BAR_MATCH_MODE
;
774 inbound_region
.bar
= CFG_BAR_ID
;
775 inbound_region
.addr
= SPI_FLASH_BASE_ADDR
;
776 rc
= hl_pci_set_inbound_region(hdev
, 1, &inbound_region
);
780 /* Inbound Region 2 - Bar 4 - Point to HBM */
781 inbound_region
.mode
= PCI_BAR_MATCH_MODE
;
782 inbound_region
.bar
= HBM_BAR_ID
;
783 inbound_region
.addr
= DRAM_PHYS_BASE
;
784 rc
= hl_pci_set_inbound_region(hdev
, 2, &inbound_region
);
788 /* Outbound Region 0 - Point to Host */
789 outbound_region
.addr
= HOST_PHYS_BASE
;
790 outbound_region
.size
= HOST_PHYS_SIZE
;
791 rc
= hl_pci_set_outbound_region(hdev
, &outbound_region
);
797 static enum hl_device_hw_state
gaudi_get_hw_state(struct hl_device
*hdev
)
799 return RREG32(mmHW_STATE
);
802 static int gaudi_early_init(struct hl_device
*hdev
)
804 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
805 struct pci_dev
*pdev
= hdev
->pdev
;
806 resource_size_t pci_bar_size
;
810 rc
= gaudi_set_fixed_properties(hdev
);
812 dev_err(hdev
->dev
, "Failed setting fixed properties\n");
816 /* Check BAR sizes */
817 pci_bar_size
= pci_resource_len(pdev
, SRAM_BAR_ID
);
819 if (pci_bar_size
!= SRAM_BAR_SIZE
) {
820 dev_err(hdev
->dev
, "Not " HL_NAME
"? BAR %d size %pa, expecting %llu\n",
821 SRAM_BAR_ID
, &pci_bar_size
, SRAM_BAR_SIZE
);
823 goto free_queue_props
;
826 pci_bar_size
= pci_resource_len(pdev
, CFG_BAR_ID
);
828 if (pci_bar_size
!= CFG_BAR_SIZE
) {
829 dev_err(hdev
->dev
, "Not " HL_NAME
"? BAR %d size %pa, expecting %llu\n",
830 CFG_BAR_ID
, &pci_bar_size
, CFG_BAR_SIZE
);
832 goto free_queue_props
;
835 prop
->dram_pci_bar_size
= pci_resource_len(pdev
, HBM_BAR_ID
);
836 hdev
->dram_pci_bar_start
= pci_resource_start(pdev
, HBM_BAR_ID
);
838 /* If FW security is enabled at this point it means no access to ELBI */
839 if (hdev
->asic_prop
.fw_security_enabled
) {
840 hdev
->asic_prop
.iatu_done_by_fw
= true;
843 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
844 * decision can only be taken based on PCI ID security.
846 hdev
->asic_prop
.gic_interrupts_enable
= false;
850 rc
= hl_pci_elbi_read(hdev
, CFG_BASE
+ mmCPU_BOOT_DEV_STS0
,
853 goto free_queue_props
;
855 /* Check whether FW is configuring iATU */
856 if ((fw_boot_status
& CPU_BOOT_DEV_STS0_ENABLED
) &&
857 (fw_boot_status
& CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN
))
858 hdev
->asic_prop
.iatu_done_by_fw
= true;
861 rc
= hl_pci_init(hdev
);
863 goto free_queue_props
;
865 /* Before continuing in the initialization, we need to read the preboot
866 * version to determine whether we run with a security-enabled firmware
868 rc
= hl_fw_read_preboot_status(hdev
);
870 if (hdev
->reset_on_preboot_fail
)
871 /* we are already on failure flow, so don't check if hw_fini fails. */
872 hdev
->asic_funcs
->hw_fini(hdev
, true, false);
876 if (gaudi_get_hw_state(hdev
) == HL_DEVICE_HW_STATE_DIRTY
) {
877 dev_dbg(hdev
->dev
, "H/W state is dirty, must reset before initializing\n");
878 rc
= hdev
->asic_funcs
->hw_fini(hdev
, true, false);
880 dev_err(hdev
->dev
, "failed to reset HW in dirty state (%d)\n", rc
);
890 kfree(hdev
->asic_prop
.hw_queues_props
);
894 static int gaudi_early_fini(struct hl_device
*hdev
)
896 kfree(hdev
->asic_prop
.hw_queues_props
);
903 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
905 * @hdev: pointer to hl_device structure
908 static int gaudi_fetch_psoc_frequency(struct hl_device
*hdev
)
910 u32 nr
= 0, nf
= 0, od
= 0, div_fctr
= 0, pll_clk
, div_sel
;
911 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
912 u16 pll_freq_arr
[HL_PLL_NUM_OUTPUTS
], freq
;
915 if ((hdev
->fw_components
& FW_TYPE_LINUX
) &&
916 (prop
->fw_app_cpu_boot_dev_sts0
& CPU_BOOT_DEV_STS0_PLL_INFO_EN
)) {
917 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
919 if (!(gaudi
->hw_cap_initialized
& HW_CAP_CPU_Q
))
922 rc
= hl_fw_cpucp_pll_info_get(hdev
, HL_GAUDI_CPU_PLL
, pll_freq_arr
);
927 freq
= pll_freq_arr
[2];
929 /* Backward compatibility */
930 div_fctr
= RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2
);
931 div_sel
= RREG32(mmPSOC_CPU_PLL_DIV_SEL_2
);
932 nr
= RREG32(mmPSOC_CPU_PLL_NR
);
933 nf
= RREG32(mmPSOC_CPU_PLL_NF
);
934 od
= RREG32(mmPSOC_CPU_PLL_OD
);
936 if (div_sel
== DIV_SEL_REF_CLK
||
937 div_sel
== DIV_SEL_DIVIDED_REF
) {
938 if (div_sel
== DIV_SEL_REF_CLK
)
941 freq
= PLL_REF_CLK
/ (div_fctr
+ 1);
942 } else if (div_sel
== DIV_SEL_PLL_CLK
||
943 div_sel
== DIV_SEL_DIVIDED_PLL
) {
944 pll_clk
= PLL_REF_CLK
* (nf
+ 1) /
945 ((nr
+ 1) * (od
+ 1));
946 if (div_sel
== DIV_SEL_PLL_CLK
)
949 freq
= pll_clk
/ (div_fctr
+ 1);
951 dev_warn(hdev
->dev
, "Received invalid div select value: %#x", div_sel
);
956 prop
->psoc_timestamp_frequency
= freq
;
957 prop
->psoc_pci_pll_nr
= nr
;
958 prop
->psoc_pci_pll_nf
= nf
;
959 prop
->psoc_pci_pll_od
= od
;
960 prop
->psoc_pci_pll_div_factor
= div_fctr
;
965 static int _gaudi_init_tpc_mem(struct hl_device
*hdev
,
966 dma_addr_t tpc_kernel_src_addr
, u32 tpc_kernel_size
)
968 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
969 struct packet_lin_dma
*init_tpc_mem_pkt
;
970 struct hl_cs_job
*job
;
977 cb
= hl_cb_kernel_create(hdev
, PAGE_SIZE
, false);
981 init_tpc_mem_pkt
= cb
->kernel_address
;
982 cb_size
= sizeof(*init_tpc_mem_pkt
);
983 memset(init_tpc_mem_pkt
, 0, cb_size
);
985 init_tpc_mem_pkt
->tsize
= cpu_to_le32(tpc_kernel_size
);
987 ctl
= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK
, PACKET_LIN_DMA
);
988 ctl
|= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK
, 1);
989 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK
, 1);
990 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK
, 1);
992 init_tpc_mem_pkt
->ctl
= cpu_to_le32(ctl
);
994 init_tpc_mem_pkt
->src_addr
= cpu_to_le64(tpc_kernel_src_addr
);
996 /* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
997 dst_addr
= FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK
,
998 round_up(prop
->sram_user_base_address
, SZ_8K
));
999 init_tpc_mem_pkt
->dst_addr
|= cpu_to_le64(dst_addr
);
1001 job
= hl_cs_allocate_job(hdev
, QUEUE_TYPE_EXT
, true);
1003 dev_err(hdev
->dev
, "Failed to allocate a new job\n");
1010 atomic_inc(&job
->user_cb
->cs_cnt
);
1011 job
->user_cb_size
= cb_size
;
1012 job
->hw_queue_id
= GAUDI_QUEUE_ID_DMA_0_0
;
1013 job
->patched_cb
= job
->user_cb
;
1014 job
->job_cb_size
= job
->user_cb_size
+ sizeof(struct packet_msg_prot
);
1016 hl_debugfs_add_job(hdev
, job
);
1018 rc
= gaudi_send_job_on_qman0(hdev
, job
);
1023 for (tpc_id
= 0 ; tpc_id
< TPC_NUMBER_OF_ENGINES
; tpc_id
++) {
1024 rc
= gaudi_run_tpc_kernel(hdev
, dst_addr
, tpc_id
);
1030 hl_userptr_delete_list(hdev
, &job
->userptr_list
);
1031 hl_debugfs_remove_job(hdev
, job
);
1033 atomic_dec(&cb
->cs_cnt
);
1037 hl_cb_destroy(&hdev
->kernel_mem_mgr
, cb
->buf
->handle
);
1043 * gaudi_init_tpc_mem() - Initialize TPC memories.
1044 * @hdev: Pointer to hl_device structure.
1046 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1048 * Return: 0 for success, negative value for error.
1050 static int gaudi_init_tpc_mem(struct hl_device
*hdev
)
1052 const struct firmware
*fw
;
1055 dma_addr_t dma_handle
;
1059 rc
= request_firmware(&fw
, GAUDI_TPC_FW_FILE
, hdev
->dev
);
1060 if (rc
== -EINTR
&& count
-- > 0) {
1066 dev_err(hdev
->dev
, "Failed to load firmware file %s\n",
1072 cpu_addr
= hl_asic_dma_alloc_coherent(hdev
, fw_size
, &dma_handle
, GFP_KERNEL
| __GFP_ZERO
);
1075 "Failed to allocate %zu of dma memory for TPC kernel\n",
1081 memcpy(cpu_addr
, fw
->data
, fw_size
);
1083 rc
= _gaudi_init_tpc_mem(hdev
, dma_handle
, fw_size
);
1085 hl_asic_dma_free_coherent(hdev
, fw
->size
, cpu_addr
, dma_handle
);
1088 release_firmware(fw
);
1092 static void gaudi_collective_map_sobs(struct hl_device
*hdev
, u32 stream
)
1094 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
1095 struct gaudi_collective_properties
*prop
= &gaudi
->collective_props
;
1096 struct hl_hw_queue
*q
;
1097 u32 i
, sob_id
, sob_group_id
, queue_id
;
1099 /* Iterate through SOB groups and assign a SOB for each slave queue */
1101 stream
* HL_RSVD_SOBS
+ prop
->curr_sob_group_idx
[stream
];
1102 sob_id
= prop
->hw_sob_group
[sob_group_id
].base_sob_id
;
1104 queue_id
= GAUDI_QUEUE_ID_NIC_0_0
+ stream
;
1105 for (i
= 0 ; i
< NIC_NUMBER_OF_ENGINES
; i
++) {
1106 q
= &hdev
->kernel_queues
[queue_id
+ (4 * i
)];
1107 q
->sync_stream_prop
.collective_sob_id
= sob_id
+ i
;
1110 /* Both DMA5 and TPC7 use the same resources since only a single
1111 * engine need to participate in the reduction process
1113 queue_id
= GAUDI_QUEUE_ID_DMA_5_0
+ stream
;
1114 q
= &hdev
->kernel_queues
[queue_id
];
1115 q
->sync_stream_prop
.collective_sob_id
=
1116 sob_id
+ NIC_NUMBER_OF_ENGINES
;
1118 queue_id
= GAUDI_QUEUE_ID_TPC_7_0
+ stream
;
1119 q
= &hdev
->kernel_queues
[queue_id
];
1120 q
->sync_stream_prop
.collective_sob_id
=
1121 sob_id
+ NIC_NUMBER_OF_ENGINES
;
1124 static void gaudi_sob_group_hw_reset(struct kref
*ref
)
1126 struct gaudi_hw_sob_group
*hw_sob_group
=
1127 container_of(ref
, struct gaudi_hw_sob_group
, kref
);
1128 struct hl_device
*hdev
= hw_sob_group
->hdev
;
1131 for (i
= 0 ; i
< NUMBER_OF_SOBS_IN_GRP
; i
++)
1132 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0
+
1133 (hw_sob_group
->base_sob_id
* 4) + (i
* 4)), 0);
1135 kref_init(&hw_sob_group
->kref
);
1138 static void gaudi_sob_group_reset_error(struct kref
*ref
)
1140 struct gaudi_hw_sob_group
*hw_sob_group
=
1141 container_of(ref
, struct gaudi_hw_sob_group
, kref
);
1142 struct hl_device
*hdev
= hw_sob_group
->hdev
;
1145 "SOB release shouldn't be called here, base_sob_id: %d\n",
1146 hw_sob_group
->base_sob_id
);
1149 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device
*gaudi
)
1151 struct gaudi_collective_properties
*prop
;
1154 prop
= &gaudi
->collective_props
;
1156 memset(prop
->mstr_sob_mask
, 0, sizeof(prop
->mstr_sob_mask
));
1158 for (i
= 0 ; i
< NIC_NUMBER_OF_ENGINES
; i
++)
1159 if (gaudi
->hw_cap_initialized
& BIT(HW_CAP_NIC_SHIFT
+ i
))
1160 prop
->mstr_sob_mask
[i
/ HL_MAX_SOBS_PER_MONITOR
] |=
1161 BIT(i
% HL_MAX_SOBS_PER_MONITOR
);
1162 /* Set collective engine bit */
1163 prop
->mstr_sob_mask
[i
/ HL_MAX_SOBS_PER_MONITOR
] |=
1164 BIT(i
% HL_MAX_SOBS_PER_MONITOR
);
1167 static int gaudi_collective_init(struct hl_device
*hdev
)
1169 u32 i
, sob_id
, reserved_sobs_per_group
;
1170 struct gaudi_collective_properties
*prop
;
1171 struct gaudi_device
*gaudi
;
1173 gaudi
= hdev
->asic_specific
;
1174 prop
= &gaudi
->collective_props
;
1175 sob_id
= hdev
->asic_prop
.collective_first_sob
;
1177 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1178 reserved_sobs_per_group
=
1179 ALIGN(NUMBER_OF_SOBS_IN_GRP
, HL_MAX_SOBS_PER_MONITOR
);
1181 /* Init SOB groups */
1182 for (i
= 0 ; i
< NUM_SOB_GROUPS
; i
++) {
1183 prop
->hw_sob_group
[i
].hdev
= hdev
;
1184 prop
->hw_sob_group
[i
].base_sob_id
= sob_id
;
1185 sob_id
+= reserved_sobs_per_group
;
1186 gaudi_sob_group_hw_reset(&prop
->hw_sob_group
[i
].kref
);
1189 for (i
= 0 ; i
< QMAN_STREAMS
; i
++) {
1190 prop
->next_sob_group_val
[i
] = 1;
1191 prop
->curr_sob_group_idx
[i
] = 0;
1192 gaudi_collective_map_sobs(hdev
, i
);
1195 gaudi_collective_mstr_sob_mask_set(gaudi
);
1200 static void gaudi_reset_sob_group(struct hl_device
*hdev
, u16 sob_group
)
1202 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
1203 struct gaudi_collective_properties
*cprop
= &gaudi
->collective_props
;
1205 kref_put(&cprop
->hw_sob_group
[sob_group
].kref
,
1206 gaudi_sob_group_hw_reset
);
1209 static void gaudi_collective_master_init_job(struct hl_device
*hdev
,
1210 struct hl_cs_job
*job
, u32 stream
, u32 sob_group_offset
)
1212 u32 master_sob_base
, master_monitor
, queue_id
, cb_size
= 0;
1213 struct gaudi_collective_properties
*cprop
;
1214 struct hl_gen_wait_properties wait_prop
;
1215 struct hl_sync_stream_properties
*prop
;
1216 struct gaudi_device
*gaudi
;
1218 gaudi
= hdev
->asic_specific
;
1219 cprop
= &gaudi
->collective_props
;
1220 queue_id
= job
->hw_queue_id
;
1221 prop
= &hdev
->kernel_queues
[queue_id
].sync_stream_prop
;
1224 cprop
->hw_sob_group
[sob_group_offset
].base_sob_id
;
1225 master_monitor
= prop
->collective_mstr_mon_id
[0];
1227 cprop
->hw_sob_group
[sob_group_offset
].queue_id
= queue_id
;
1230 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1231 master_sob_base
, cprop
->mstr_sob_mask
[0],
1232 cprop
->next_sob_group_val
[stream
],
1233 master_monitor
, queue_id
);
1235 wait_prop
.data
= (void *) job
->patched_cb
;
1236 wait_prop
.sob_base
= master_sob_base
;
1237 wait_prop
.sob_mask
= cprop
->mstr_sob_mask
[0];
1238 wait_prop
.sob_val
= cprop
->next_sob_group_val
[stream
];
1239 wait_prop
.mon_id
= master_monitor
;
1240 wait_prop
.q_idx
= queue_id
;
1241 wait_prop
.size
= cb_size
;
1242 cb_size
+= gaudi_gen_wait_cb(hdev
, &wait_prop
);
1244 master_sob_base
+= HL_MAX_SOBS_PER_MONITOR
;
1245 master_monitor
= prop
->collective_mstr_mon_id
[1];
1248 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1249 master_sob_base
, cprop
->mstr_sob_mask
[1],
1250 cprop
->next_sob_group_val
[stream
],
1251 master_monitor
, queue_id
);
1253 wait_prop
.sob_base
= master_sob_base
;
1254 wait_prop
.sob_mask
= cprop
->mstr_sob_mask
[1];
1255 wait_prop
.mon_id
= master_monitor
;
1256 wait_prop
.size
= cb_size
;
1257 cb_size
+= gaudi_gen_wait_cb(hdev
, &wait_prop
);
1260 static void gaudi_collective_slave_init_job(struct hl_device
*hdev
,
1261 struct hl_cs_job
*job
, struct hl_cs_compl
*cs_cmpl
)
1263 struct hl_gen_wait_properties wait_prop
;
1264 struct hl_sync_stream_properties
*prop
;
1265 u32 queue_id
, cb_size
= 0;
1267 queue_id
= job
->hw_queue_id
;
1268 prop
= &hdev
->kernel_queues
[queue_id
].sync_stream_prop
;
1270 if (job
->cs
->encaps_signals
) {
1271 /* use the encaps signal handle store earlier in the flow
1272 * and set the SOB information from the encaps
1275 hl_hw_queue_encaps_sig_set_sob_info(hdev
, job
->cs
, job
,
1278 dev_dbg(hdev
->dev
, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n",
1280 cs_cmpl
->hw_sob
->sob_id
,
1284 /* Add to wait CBs using slave monitor */
1285 wait_prop
.data
= (void *) job
->user_cb
;
1286 wait_prop
.sob_base
= cs_cmpl
->hw_sob
->sob_id
;
1287 wait_prop
.sob_mask
= 0x1;
1288 wait_prop
.sob_val
= cs_cmpl
->sob_val
;
1289 wait_prop
.mon_id
= prop
->collective_slave_mon_id
;
1290 wait_prop
.q_idx
= queue_id
;
1291 wait_prop
.size
= cb_size
;
1294 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1295 cs_cmpl
->hw_sob
->sob_id
, cs_cmpl
->sob_val
,
1296 prop
->collective_slave_mon_id
, queue_id
);
1298 cb_size
+= gaudi_gen_wait_cb(hdev
, &wait_prop
);
1301 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1302 prop
->collective_sob_id
, queue_id
);
1304 cb_size
+= gaudi_gen_signal_cb(hdev
, job
->user_cb
,
1305 prop
->collective_sob_id
, cb_size
, false);
1308 static int gaudi_collective_wait_init_cs(struct hl_cs
*cs
)
1310 struct hl_cs_compl
*signal_cs_cmpl
=
1311 container_of(cs
->signal_fence
, struct hl_cs_compl
, base_fence
);
1312 struct hl_cs_compl
*cs_cmpl
=
1313 container_of(cs
->fence
, struct hl_cs_compl
, base_fence
);
1314 struct hl_cs_encaps_sig_handle
*handle
= cs
->encaps_sig_hdl
;
1315 struct gaudi_collective_properties
*cprop
;
1316 u32 stream
, queue_id
, sob_group_offset
;
1317 struct gaudi_device
*gaudi
;
1318 struct hl_device
*hdev
;
1319 struct hl_cs_job
*job
;
1324 gaudi
= hdev
->asic_specific
;
1325 cprop
= &gaudi
->collective_props
;
1327 if (cs
->encaps_signals
) {
1328 cs_cmpl
->hw_sob
= handle
->hw_sob
;
1329 /* at this checkpoint we only need the hw_sob pointer
1330 * for the completion check before start going over the jobs
1331 * of the master/slaves, the sob_value will be taken later on
1332 * in gaudi_collective_slave_init_job depends on each
1333 * job wait offset value.
1335 cs_cmpl
->sob_val
= 0;
1337 /* copy the SOB id and value of the signal CS */
1338 cs_cmpl
->hw_sob
= signal_cs_cmpl
->hw_sob
;
1339 cs_cmpl
->sob_val
= signal_cs_cmpl
->sob_val
;
1342 /* check again if the signal cs already completed.
1343 * if yes then don't send any wait cs since the hw_sob
1344 * could be in reset already. if signal is not completed
1345 * then get refcount to hw_sob to prevent resetting the sob
1346 * while wait cs is not submitted.
1347 * note that this check is protected by two locks,
1348 * hw queue lock and completion object lock,
1349 * and the same completion object lock also protects
1350 * the hw_sob reset handler function.
1351 * The hw_queue lock prevent out of sync of hw_sob
1352 * refcount value, changed by signal/wait flows.
1354 spin_lock(&signal_cs_cmpl
->lock
);
1356 if (completion_done(&cs
->signal_fence
->completion
)) {
1357 spin_unlock(&signal_cs_cmpl
->lock
);
1360 /* Increment kref since all slave queues are now waiting on it */
1361 kref_get(&cs_cmpl
->hw_sob
->kref
);
1363 spin_unlock(&signal_cs_cmpl
->lock
);
1365 /* Calculate the stream from collective master queue (1st job) */
1366 job
= list_first_entry(&cs
->job_list
, struct hl_cs_job
, cs_node
);
1367 stream
= job
->hw_queue_id
% 4;
1369 stream
* HL_RSVD_SOBS
+ cprop
->curr_sob_group_idx
[stream
];
1371 list_for_each_entry(job
, &cs
->job_list
, cs_node
) {
1372 queue_id
= job
->hw_queue_id
;
1374 if (hdev
->kernel_queues
[queue_id
].collective_mode
==
1375 HL_COLLECTIVE_MASTER
)
1376 gaudi_collective_master_init_job(hdev
, job
, stream
,
1379 gaudi_collective_slave_init_job(hdev
, job
, cs_cmpl
);
1382 cs_cmpl
->sob_group
= sob_group_offset
;
1384 /* Handle sob group kref and wraparound */
1385 kref_get(&cprop
->hw_sob_group
[sob_group_offset
].kref
);
1386 cprop
->next_sob_group_val
[stream
]++;
1388 if (cprop
->next_sob_group_val
[stream
] == HL_MAX_SOB_VAL
) {
1390 * Decrement as we reached the max value.
1391 * The release function won't be called here as we've
1392 * just incremented the refcount.
1394 kref_put(&cprop
->hw_sob_group
[sob_group_offset
].kref
,
1395 gaudi_sob_group_reset_error
);
1396 cprop
->next_sob_group_val
[stream
] = 1;
1397 /* only two SOBs are currently in use */
1398 cprop
->curr_sob_group_idx
[stream
] =
1399 (cprop
->curr_sob_group_idx
[stream
] + 1) &
1402 gaudi_collective_map_sobs(hdev
, stream
);
1404 dev_dbg(hdev
->dev
, "switched to SOB group %d, stream: %d\n",
1405 cprop
->curr_sob_group_idx
[stream
], stream
);
1409 hl_fence_put(cs
->signal_fence
);
1410 cs
->signal_fence
= NULL
;
1415 static u32
gaudi_get_patched_cb_extra_size(u32 user_cb_size
)
1417 u32 cacheline_end
, additional_commands
;
1419 cacheline_end
= round_up(user_cb_size
, DEVICE_CACHE_LINE_SIZE
);
1420 additional_commands
= sizeof(struct packet_msg_prot
) * 2;
1422 if (user_cb_size
+ additional_commands
> cacheline_end
)
1423 return cacheline_end
- user_cb_size
+ additional_commands
;
1425 return additional_commands
;
1428 static int gaudi_collective_wait_create_job(struct hl_device
*hdev
,
1429 struct hl_ctx
*ctx
, struct hl_cs
*cs
,
1430 enum hl_collective_mode mode
, u32 queue_id
, u32 wait_queue_id
,
1431 u32 encaps_signal_offset
)
1433 struct hw_queue_properties
*hw_queue_prop
;
1434 struct hl_cs_counters_atomic
*cntr
;
1435 struct hl_cs_job
*job
;
1440 cntr
= &hdev
->aggregated_cs_counters
;
1442 if (mode
== HL_COLLECTIVE_MASTER
) {
1443 /* CB size of collective master queue contains
1444 * 4 msg short packets for monitor 1 configuration
1446 * 4 msg short packets for monitor 2 configuration
1448 * 2 msg prot packets for completion and MSI
1450 cb_size
= sizeof(struct packet_msg_short
) * 8 +
1451 sizeof(struct packet_fence
) * 2 +
1452 sizeof(struct packet_msg_prot
) * 2;
1455 /* CB size of collective slave queues contains
1456 * 4 msg short packets for monitor configuration
1458 * 1 additional msg short packet for sob signal
1460 cb_size
= sizeof(struct packet_msg_short
) * 5 +
1461 sizeof(struct packet_fence
);
1465 hw_queue_prop
= &hdev
->asic_prop
.hw_queues_props
[queue_id
];
1466 job
= hl_cs_allocate_job(hdev
, hw_queue_prop
->type
, true);
1468 atomic64_inc(&ctx
->cs_counters
.out_of_mem_drop_cnt
);
1469 atomic64_inc(&cntr
->out_of_mem_drop_cnt
);
1470 dev_err(hdev
->dev
, "Failed to allocate a new job\n");
1474 /* Allocate internal mapped CB for non patched CBs */
1475 cb
= hl_cb_kernel_create(hdev
, cb_size
, !patched_cb
);
1477 atomic64_inc(&ctx
->cs_counters
.out_of_mem_drop_cnt
);
1478 atomic64_inc(&cntr
->out_of_mem_drop_cnt
);
1486 atomic_inc(&job
->user_cb
->cs_cnt
);
1487 job
->user_cb_size
= cb_size
;
1488 job
->hw_queue_id
= queue_id
;
1490 /* since its guaranteed to have only one chunk in the collective wait
1491 * cs, we can use this chunk to set the encapsulated signal offset
1494 if (cs
->encaps_signals
)
1495 job
->encaps_sig_wait_offset
= encaps_signal_offset
;
1498 * No need in parsing, user CB is the patched CB.
1499 * We call hl_cb_destroy() out of two reasons - we don't need
1500 * the CB in the CB idr anymore and to decrement its refcount as
1501 * it was incremented inside hl_cb_kernel_create().
1504 job
->patched_cb
= job
->user_cb
;
1506 job
->patched_cb
= NULL
;
1508 job
->job_cb_size
= job
->user_cb_size
;
1509 hl_cb_destroy(&hdev
->kernel_mem_mgr
, cb
->buf
->handle
);
1511 /* increment refcount as for external queues we get completion */
1512 if (hw_queue_prop
->type
== QUEUE_TYPE_EXT
)
1515 cs
->jobs_in_queue_cnt
[job
->hw_queue_id
]++;
1517 list_add_tail(&job
->cs_node
, &cs
->job_list
);
1519 hl_debugfs_add_job(hdev
, job
);
1524 static int gaudi_collective_wait_create_jobs(struct hl_device
*hdev
,
1525 struct hl_ctx
*ctx
, struct hl_cs
*cs
,
1526 u32 wait_queue_id
, u32 collective_engine_id
,
1527 u32 encaps_signal_offset
)
1529 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
1530 struct hw_queue_properties
*hw_queue_prop
;
1531 u32 queue_id
, collective_queue
, num_jobs
;
1532 u32 stream
, nic_queue
, nic_idx
= 0;
1536 /* Verify wait queue id is configured as master */
1537 hw_queue_prop
= &hdev
->asic_prop
.hw_queues_props
[wait_queue_id
];
1538 if (!(hw_queue_prop
->collective_mode
== HL_COLLECTIVE_MASTER
)) {
1540 "Queue %d is not configured as collective master\n",
1545 /* Verify engine id is supported */
1546 if (collective_engine_id
!= GAUDI_ENGINE_ID_DMA_5
&&
1547 collective_engine_id
!= GAUDI_ENGINE_ID_TPC_7
) {
1549 "Collective wait does not support engine %u\n",
1550 collective_engine_id
);
1554 stream
= wait_queue_id
% 4;
1556 if (collective_engine_id
== GAUDI_ENGINE_ID_DMA_5
)
1557 collective_queue
= GAUDI_QUEUE_ID_DMA_5_0
+ stream
;
1559 collective_queue
= GAUDI_QUEUE_ID_TPC_7_0
+ stream
;
1561 num_jobs
= NUMBER_OF_SOBS_IN_GRP
+ 1;
1562 nic_queue
= GAUDI_QUEUE_ID_NIC_0_0
+ stream
;
1564 /* First job goes to the collective master queue, it will wait for
1565 * the collective slave queues to finish execution.
1566 * The synchronization is done using two monitors:
1567 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1568 * reduction engine (DMA5/TPC7).
1570 * Rest of the jobs goes to the collective slave queues which will
1571 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1573 for (i
= 0 ; i
< num_jobs
; i
++) {
1575 queue_id
= wait_queue_id
;
1576 rc
= gaudi_collective_wait_create_job(hdev
, ctx
, cs
,
1577 HL_COLLECTIVE_MASTER
, queue_id
,
1578 wait_queue_id
, encaps_signal_offset
);
1580 if (nic_idx
< NIC_NUMBER_OF_ENGINES
) {
1581 if (gaudi
->hw_cap_initialized
&
1582 BIT(HW_CAP_NIC_SHIFT
+ nic_idx
))
1587 queue_id
= nic_queue
;
1594 queue_id
= collective_queue
;
1597 rc
= gaudi_collective_wait_create_job(hdev
, ctx
, cs
,
1598 HL_COLLECTIVE_SLAVE
, queue_id
,
1599 wait_queue_id
, encaps_signal_offset
);
1609 static int gaudi_late_init(struct hl_device
*hdev
)
1611 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
1614 rc
= gaudi
->cpucp_info_get(hdev
);
1616 dev_err(hdev
->dev
, "Failed to get cpucp info\n");
1620 if ((hdev
->card_type
== cpucp_card_type_pci
) &&
1621 (hdev
->nic_ports_mask
& 0x3)) {
1623 "PCI card detected, only 8 ports are enabled\n");
1624 hdev
->nic_ports_mask
&= ~0x3;
1626 /* Stop and disable unused NIC QMANs */
1627 WREG32(mmNIC0_QM0_GLBL_CFG1
, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK
|
1628 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK
|
1629 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK
);
1631 WREG32(mmNIC0_QM1_GLBL_CFG1
, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK
|
1632 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK
|
1633 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK
);
1635 WREG32(mmNIC0_QM0_GLBL_CFG0
, 0);
1636 WREG32(mmNIC0_QM1_GLBL_CFG0
, 0);
1638 gaudi
->hw_cap_initialized
&= ~(HW_CAP_NIC0
| HW_CAP_NIC1
);
1641 rc
= hl_fw_send_pci_access_msg(hdev
, CPUCP_PACKET_ENABLE_PCI_ACCESS
, 0x0);
1645 /* Scrub both SRAM and DRAM */
1646 rc
= hdev
->asic_funcs
->scrub_device_mem(hdev
);
1648 goto disable_pci_access
;
1650 rc
= gaudi_fetch_psoc_frequency(hdev
);
1652 dev_err(hdev
->dev
, "Failed to fetch psoc frequency\n");
1653 goto disable_pci_access
;
1656 rc
= gaudi_mmu_clear_pgt_range(hdev
);
1658 dev_err(hdev
->dev
, "Failed to clear MMU page tables range\n");
1659 goto disable_pci_access
;
1662 rc
= gaudi_init_tpc_mem(hdev
);
1664 dev_err(hdev
->dev
, "Failed to initialize TPC memories\n");
1665 goto disable_pci_access
;
1668 rc
= gaudi_collective_init(hdev
);
1670 dev_err(hdev
->dev
, "Failed to init collective\n");
1671 goto disable_pci_access
;
1674 /* We only support a single ASID for the user, so for the sake of optimization, just
1675 * initialize the ASID one time during device initialization with the fixed value of 1
1677 gaudi_mmu_prepare(hdev
, 1);
1679 hl_fw_set_pll_profile(hdev
);
1684 hl_fw_send_pci_access_msg(hdev
, CPUCP_PACKET_DISABLE_PCI_ACCESS
, 0x0);
1689 static void gaudi_late_fini(struct hl_device
*hdev
)
1691 hl_hwmon_release_resources(hdev
);
1694 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device
*hdev
)
1696 dma_addr_t dma_addr_arr
[GAUDI_ALLOC_CPU_MEM_RETRY_CNT
] = {}, end_addr
;
1697 void *virt_addr_arr
[GAUDI_ALLOC_CPU_MEM_RETRY_CNT
] = {};
1701 * The device CPU works with 40-bits addresses, while bit 39 must be set
1702 * to '1' when accessing the host.
1703 * Bits 49:39 of the full host address are saved for a later
1704 * configuration of the HW to perform extension to 50 bits.
1705 * Because there is a single HW register that holds the extension bits,
1706 * these bits must be identical in all allocated range.
1709 for (i
= 0 ; i
< GAUDI_ALLOC_CPU_MEM_RETRY_CNT
; i
++) {
1710 virt_addr_arr
[i
] = hl_asic_dma_alloc_coherent(hdev
, HL_CPU_ACCESSIBLE_MEM_SIZE
,
1712 GFP_KERNEL
| __GFP_ZERO
);
1713 if (!virt_addr_arr
[i
]) {
1715 goto free_dma_mem_arr
;
1718 end_addr
= dma_addr_arr
[i
] + HL_CPU_ACCESSIBLE_MEM_SIZE
- 1;
1719 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr
[i
]) ==
1720 GAUDI_CPU_PCI_MSB_ADDR(end_addr
))
1724 if (i
== GAUDI_ALLOC_CPU_MEM_RETRY_CNT
) {
1726 "MSB of CPU accessible DMA memory are not identical in all range\n");
1728 goto free_dma_mem_arr
;
1731 hdev
->cpu_accessible_dma_mem
= virt_addr_arr
[i
];
1732 hdev
->cpu_accessible_dma_address
= dma_addr_arr
[i
];
1733 hdev
->cpu_pci_msb_addr
=
1734 GAUDI_CPU_PCI_MSB_ADDR(hdev
->cpu_accessible_dma_address
);
1736 if (!hdev
->asic_prop
.fw_security_enabled
)
1737 GAUDI_PCI_TO_CPU_ADDR(hdev
->cpu_accessible_dma_address
);
1740 for (j
= 0 ; j
< i
; j
++)
1741 hl_asic_dma_free_coherent(hdev
, HL_CPU_ACCESSIBLE_MEM_SIZE
, virt_addr_arr
[j
],
1747 static void gaudi_free_internal_qmans_pq_mem(struct hl_device
*hdev
)
1749 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
1750 struct gaudi_internal_qman_info
*q
;
1753 for (i
= 0 ; i
< GAUDI_QUEUE_ID_SIZE
; i
++) {
1754 q
= &gaudi
->internal_qmans
[i
];
1755 if (!q
->pq_kernel_addr
)
1757 hl_asic_dma_free_coherent(hdev
, q
->pq_size
, q
->pq_kernel_addr
, q
->pq_dma_addr
);
1761 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device
*hdev
)
1763 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
1764 struct gaudi_internal_qman_info
*q
;
1767 for (i
= 0 ; i
< GAUDI_QUEUE_ID_SIZE
; i
++) {
1768 if (gaudi_queue_type
[i
] != QUEUE_TYPE_INT
)
1771 q
= &gaudi
->internal_qmans
[i
];
1774 case GAUDI_QUEUE_ID_DMA_2_0
... GAUDI_QUEUE_ID_DMA_7_3
:
1775 q
->pq_size
= HBM_DMA_QMAN_SIZE_IN_BYTES
;
1777 case GAUDI_QUEUE_ID_MME_0_0
... GAUDI_QUEUE_ID_MME_1_3
:
1778 q
->pq_size
= MME_QMAN_SIZE_IN_BYTES
;
1780 case GAUDI_QUEUE_ID_TPC_0_0
... GAUDI_QUEUE_ID_TPC_7_3
:
1781 q
->pq_size
= TPC_QMAN_SIZE_IN_BYTES
;
1783 case GAUDI_QUEUE_ID_NIC_0_0
... GAUDI_QUEUE_ID_NIC_9_3
:
1784 q
->pq_size
= NIC_QMAN_SIZE_IN_BYTES
;
1787 dev_err(hdev
->dev
, "Bad internal queue index %d", i
);
1789 goto free_internal_qmans_pq_mem
;
1792 q
->pq_kernel_addr
= hl_asic_dma_alloc_coherent(hdev
, q
->pq_size
, &q
->pq_dma_addr
,
1793 GFP_KERNEL
| __GFP_ZERO
);
1794 if (!q
->pq_kernel_addr
) {
1796 goto free_internal_qmans_pq_mem
;
1802 free_internal_qmans_pq_mem
:
1803 gaudi_free_internal_qmans_pq_mem(hdev
);
1807 static void gaudi_set_pci_memory_regions(struct hl_device
*hdev
)
1809 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
1810 struct pci_mem_region
*region
;
1813 region
= &hdev
->pci_mem_region
[PCI_REGION_CFG
];
1814 region
->region_base
= CFG_BASE
;
1815 region
->region_size
= CFG_SIZE
;
1816 region
->offset_in_bar
= CFG_BASE
- SPI_FLASH_BASE_ADDR
;
1817 region
->bar_size
= CFG_BAR_SIZE
;
1818 region
->bar_id
= CFG_BAR_ID
;
1822 region
= &hdev
->pci_mem_region
[PCI_REGION_SRAM
];
1823 region
->region_base
= SRAM_BASE_ADDR
;
1824 region
->region_size
= SRAM_SIZE
;
1825 region
->offset_in_bar
= 0;
1826 region
->bar_size
= SRAM_BAR_SIZE
;
1827 region
->bar_id
= SRAM_BAR_ID
;
1831 region
= &hdev
->pci_mem_region
[PCI_REGION_DRAM
];
1832 region
->region_base
= DRAM_PHYS_BASE
;
1833 region
->region_size
= hdev
->asic_prop
.dram_size
;
1834 region
->offset_in_bar
= 0;
1835 region
->bar_size
= prop
->dram_pci_bar_size
;
1836 region
->bar_id
= HBM_BAR_ID
;
1840 region
= &hdev
->pci_mem_region
[PCI_REGION_SP_SRAM
];
1841 region
->region_base
= PSOC_SCRATCHPAD_ADDR
;
1842 region
->region_size
= PSOC_SCRATCHPAD_SIZE
;
1843 region
->offset_in_bar
= PSOC_SCRATCHPAD_ADDR
- SPI_FLASH_BASE_ADDR
;
1844 region
->bar_size
= CFG_BAR_SIZE
;
1845 region
->bar_id
= CFG_BAR_ID
;
1849 static int gaudi_sw_init(struct hl_device
*hdev
)
1851 struct gaudi_device
*gaudi
;
1852 u32 i
, event_id
= 0;
1855 /* Allocate device structure */
1856 gaudi
= kzalloc(sizeof(*gaudi
), GFP_KERNEL
);
1860 for (i
= 0 ; i
< ARRAY_SIZE(gaudi_irq_map_table
) ; i
++) {
1861 if (gaudi_irq_map_table
[i
].valid
) {
1862 if (event_id
== GAUDI_EVENT_SIZE
) {
1864 "Event array exceeds the limit of %u events\n",
1867 goto free_gaudi_device
;
1870 gaudi
->events
[event_id
++] =
1871 gaudi_irq_map_table
[i
].fc_id
;
1875 gaudi
->cpucp_info_get
= gaudi_cpucp_info_get
;
1877 hdev
->asic_specific
= gaudi
;
1879 /* Create DMA pool for small allocations */
1880 hdev
->dma_pool
= dma_pool_create(dev_name(hdev
->dev
),
1881 &hdev
->pdev
->dev
, GAUDI_DMA_POOL_BLK_SIZE
, 8, 0);
1882 if (!hdev
->dma_pool
) {
1883 dev_err(hdev
->dev
, "failed to create DMA pool\n");
1885 goto free_gaudi_device
;
1888 rc
= gaudi_alloc_cpu_accessible_dma_mem(hdev
);
1892 hdev
->cpu_accessible_dma_pool
= gen_pool_create(ilog2(32), -1);
1893 if (!hdev
->cpu_accessible_dma_pool
) {
1895 "Failed to create CPU accessible DMA pool\n");
1897 goto free_cpu_dma_mem
;
1900 rc
= gen_pool_add(hdev
->cpu_accessible_dma_pool
,
1901 (uintptr_t) hdev
->cpu_accessible_dma_mem
,
1902 HL_CPU_ACCESSIBLE_MEM_SIZE
, -1);
1905 "Failed to add memory to CPU accessible DMA pool\n");
1907 goto free_cpu_accessible_dma_pool
;
1910 rc
= gaudi_alloc_internal_qmans_pq_mem(hdev
);
1912 goto free_cpu_accessible_dma_pool
;
1914 spin_lock_init(&gaudi
->hw_queues_lock
);
1916 hdev
->supports_sync_stream
= true;
1917 hdev
->supports_coresight
= true;
1918 hdev
->supports_staged_submission
= true;
1919 hdev
->supports_wait_for_multi_cs
= true;
1921 hdev
->asic_funcs
->set_pci_memory_regions(hdev
);
1922 hdev
->stream_master_qid_arr
=
1923 hdev
->asic_funcs
->get_stream_master_qid_arr();
1924 hdev
->stream_master_qid_arr_size
= GAUDI_STREAM_MASTER_ARR_SIZE
;
1928 free_cpu_accessible_dma_pool
:
1929 gen_pool_destroy(hdev
->cpu_accessible_dma_pool
);
1931 if (!hdev
->asic_prop
.fw_security_enabled
)
1932 GAUDI_CPU_TO_PCI_ADDR(hdev
->cpu_accessible_dma_address
,
1933 hdev
->cpu_pci_msb_addr
);
1934 hl_asic_dma_free_coherent(hdev
, HL_CPU_ACCESSIBLE_MEM_SIZE
, hdev
->cpu_accessible_dma_mem
,
1935 hdev
->cpu_accessible_dma_address
);
1937 dma_pool_destroy(hdev
->dma_pool
);
1943 static int gaudi_sw_fini(struct hl_device
*hdev
)
1945 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
1947 gaudi_free_internal_qmans_pq_mem(hdev
);
1949 gen_pool_destroy(hdev
->cpu_accessible_dma_pool
);
1951 if (!hdev
->asic_prop
.fw_security_enabled
)
1952 GAUDI_CPU_TO_PCI_ADDR(hdev
->cpu_accessible_dma_address
,
1953 hdev
->cpu_pci_msb_addr
);
1955 hl_asic_dma_free_coherent(hdev
, HL_CPU_ACCESSIBLE_MEM_SIZE
, hdev
->cpu_accessible_dma_mem
,
1956 hdev
->cpu_accessible_dma_address
);
1958 dma_pool_destroy(hdev
->dma_pool
);
1965 static irqreturn_t
gaudi_irq_handler_single(int irq
, void *arg
)
1967 struct hl_device
*hdev
= arg
;
1973 for (i
= 0 ; i
< hdev
->asic_prop
.completion_queues_count
; i
++)
1974 hl_irq_handler_cq(irq
, &hdev
->completion_queue
[i
]);
1976 hl_irq_handler_eq(irq
, &hdev
->event_queue
);
1982 * For backward compatibility, new MSI interrupts should be set after the
1983 * existing CPU and NIC interrupts.
1985 static int gaudi_pci_irq_vector(struct hl_device
*hdev
, unsigned int nr
,
1990 if ((nr
!= GAUDI_EVENT_QUEUE_MSI_IDX
) && (cpu_eq
))
1991 dev_crit(hdev
->dev
, "CPU EQ must use IRQ %d\n",
1992 GAUDI_EVENT_QUEUE_MSI_IDX
);
1994 msi_vec
= ((nr
< GAUDI_EVENT_QUEUE_MSI_IDX
) || (cpu_eq
)) ? nr
:
1995 (nr
+ NIC_NUMBER_OF_ENGINES
+ 1);
1997 return pci_irq_vector(hdev
->pdev
, msi_vec
);
2000 static int gaudi_enable_msi_single(struct hl_device
*hdev
)
2004 dev_dbg(hdev
->dev
, "Working in single MSI IRQ mode\n");
2006 irq
= gaudi_pci_irq_vector(hdev
, 0, false);
2007 rc
= request_irq(irq
, gaudi_irq_handler_single
, 0,
2008 "gaudi single msi", hdev
);
2011 "Failed to request single MSI IRQ\n");
2016 static int gaudi_enable_msi(struct hl_device
*hdev
)
2018 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
2021 if (gaudi
->hw_cap_initialized
& HW_CAP_MSI
)
2024 rc
= pci_alloc_irq_vectors(hdev
->pdev
, 1, 1, PCI_IRQ_MSI
);
2026 dev_err(hdev
->dev
, "MSI: Failed to enable support %d\n", rc
);
2030 rc
= gaudi_enable_msi_single(hdev
);
2032 goto free_pci_irq_vectors
;
2034 gaudi
->hw_cap_initialized
|= HW_CAP_MSI
;
2038 free_pci_irq_vectors
:
2039 pci_free_irq_vectors(hdev
->pdev
);
2043 static void gaudi_sync_irqs(struct hl_device
*hdev
)
2045 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
2047 if (!(gaudi
->hw_cap_initialized
& HW_CAP_MSI
))
2050 /* Wait for all pending IRQs to be finished */
2051 synchronize_irq(gaudi_pci_irq_vector(hdev
, 0, false));
2054 static void gaudi_disable_msi(struct hl_device
*hdev
)
2056 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
2058 if (!(gaudi
->hw_cap_initialized
& HW_CAP_MSI
))
2061 gaudi_sync_irqs(hdev
);
2062 free_irq(gaudi_pci_irq_vector(hdev
, 0, false), hdev
);
2063 pci_free_irq_vectors(hdev
->pdev
);
2065 gaudi
->hw_cap_initialized
&= ~HW_CAP_MSI
;
2068 static void gaudi_init_scrambler_sram(struct hl_device
*hdev
)
2070 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
2072 if (hdev
->asic_prop
.fw_security_enabled
)
2075 if (hdev
->asic_prop
.fw_app_cpu_boot_dev_sts0
&
2076 CPU_BOOT_DEV_STS0_SRAM_SCR_EN
)
2079 if (gaudi
->hw_cap_initialized
& HW_CAP_SRAM_SCRAMBLER
)
2082 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN
,
2083 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
2084 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN
,
2085 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
2086 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN
,
2087 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
2088 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN
,
2089 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
2090 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN
,
2091 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
2092 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN
,
2093 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
2094 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN
,
2095 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
2096 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN
,
2097 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
2099 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN
,
2100 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
2101 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN
,
2102 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
2103 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN
,
2104 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
2105 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN
,
2106 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
2107 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN
,
2108 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
2109 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN
,
2110 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
2111 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN
,
2112 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
2113 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN
,
2114 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
2116 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN
,
2117 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT
);
2118 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN
,
2119 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT
);
2120 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN
,
2121 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT
);
2122 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN
,
2123 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT
);
2124 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN
,
2125 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT
);
2126 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN
,
2127 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT
);
2128 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN
,
2129 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT
);
2130 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN
,
2131 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT
);
2133 gaudi
->hw_cap_initialized
|= HW_CAP_SRAM_SCRAMBLER
;
2136 static void gaudi_init_scrambler_hbm(struct hl_device
*hdev
)
2138 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
2140 if (hdev
->asic_prop
.fw_security_enabled
)
2143 if (hdev
->asic_prop
.fw_bootfit_cpu_boot_dev_sts0
&
2144 CPU_BOOT_DEV_STS0_DRAM_SCR_EN
)
2147 if (gaudi
->hw_cap_initialized
& HW_CAP_HBM_SCRAMBLER
)
2150 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN
,
2151 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
2152 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN
,
2153 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
2154 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN
,
2155 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
2156 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN
,
2157 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
2158 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN
,
2159 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
2160 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN
,
2161 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
2162 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN
,
2163 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
2164 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN
,
2165 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
2167 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN
,
2168 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
2169 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN
,
2170 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
2171 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN
,
2172 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
2173 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN
,
2174 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
2175 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN
,
2176 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
2177 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN
,
2178 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
2179 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN
,
2180 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
2181 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN
,
2182 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
2184 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN
,
2185 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT
);
2186 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN
,
2187 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT
);
2188 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN
,
2189 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT
);
2190 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN
,
2191 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT
);
2192 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN
,
2193 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT
);
2194 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN
,
2195 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT
);
2196 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN
,
2197 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT
);
2198 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN
,
2199 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT
);
2201 gaudi
->hw_cap_initialized
|= HW_CAP_HBM_SCRAMBLER
;
2204 static void gaudi_init_e2e(struct hl_device
*hdev
)
2206 if (hdev
->asic_prop
.fw_security_enabled
)
2209 if (hdev
->asic_prop
.fw_bootfit_cpu_boot_dev_sts0
&
2210 CPU_BOOT_DEV_STS0_E2E_CRED_EN
)
2213 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE
, 247 >> 3);
2214 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE
, 785 >> 3);
2215 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE
, 49);
2216 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE
, 101);
2218 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE
, 275 >> 3);
2219 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE
, 614 >> 3);
2220 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE
, 1);
2221 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE
, 39);
2223 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE
, 1);
2224 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE
, 1);
2225 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE
, 1);
2226 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE
, 32);
2228 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE
, 176 >> 3);
2229 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE
, 32 >> 3);
2230 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE
, 19);
2231 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE
, 32);
2233 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE
, 176 >> 3);
2234 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE
, 32 >> 3);
2235 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE
, 19);
2236 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE
, 32);
2238 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE
, 1);
2239 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE
, 1);
2240 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE
, 1);
2241 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE
, 32);
2243 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE
, 275 >> 3);
2244 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE
, 614 >> 3);
2245 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE
, 1);
2246 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE
, 39);
2248 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE
, 297 >> 3);
2249 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE
, 908 >> 3);
2250 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE
, 19);
2251 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE
, 19);
2253 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE
, 318 >> 3);
2254 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE
, 956 >> 3);
2255 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE
, 79);
2256 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE
, 163);
2258 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE
, 275 >> 3);
2259 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE
, 614 >> 3);
2260 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE
, 1);
2261 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE
, 39);
2263 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE
, 1);
2264 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE
, 1);
2265 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE
, 1);
2266 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE
, 32);
2268 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE
, 176 >> 3);
2269 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE
, 32 >> 3);
2270 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE
, 19);
2271 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE
, 32);
2273 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE
, 176 >> 3);
2274 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE
, 32 >> 3);
2275 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE
, 19);
2276 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE
, 32);
2278 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE
, 1);
2279 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE
, 1);
2280 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE
, 1);
2281 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE
, 32);
2283 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE
, 275 >> 3);
2284 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE
, 614 >> 3);
2285 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE
, 1);
2286 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE
, 39);
2288 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE
, 318 >> 3);
2289 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE
, 956 >> 3);
2290 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE
, 79);
2291 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE
, 79);
2293 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE
, 344 >> 3);
2294 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE
, 1000 >> 3);
2295 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE
, 162);
2296 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE
, 338);
2298 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE
, 344 >> 3);
2299 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE
, 1000 >> 3);
2300 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE
, 162);
2301 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE
, 338);
2303 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE
, 344 >> 3);
2304 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE
, 1000 >> 3);
2305 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE
, 162);
2306 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE
, 338);
2308 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE
, 344 >> 3);
2309 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE
, 1000 >> 3);
2310 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE
, 162);
2311 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE
, 338);
2313 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE
, 344 >> 3);
2314 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE
, 1000 >> 3);
2315 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE
, 162);
2316 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE
, 338);
2318 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE
, 344 >> 3);
2319 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE
, 1000 >> 3);
2320 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE
, 162);
2321 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE
, 338);
2323 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE
, 344 >> 3);
2324 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE
, 1000 >> 3);
2325 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE
, 162);
2326 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE
, 338);
2328 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE
, 344 >> 3);
2329 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE
, 1000 >> 3);
2330 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE
, 162);
2331 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE
, 338);
2333 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN
,
2334 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2335 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN
,
2336 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2338 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN
,
2339 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2340 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN
,
2341 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2343 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN
,
2344 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2345 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN
,
2346 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2348 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN
,
2349 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2350 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN
,
2351 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2353 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN
,
2354 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2355 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN
,
2356 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2358 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN
,
2359 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2360 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN
,
2361 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2363 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN
,
2364 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2365 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN
,
2366 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2368 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN
,
2369 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2370 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN
,
2371 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2373 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN
,
2374 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2375 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN
,
2376 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2378 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN
,
2379 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2380 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN
,
2381 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2383 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN
,
2384 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2385 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN
,
2386 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2388 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN
,
2389 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2390 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN
,
2391 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2393 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN
,
2394 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2395 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN
,
2396 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2398 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN
,
2399 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2400 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN
,
2401 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2403 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN
,
2404 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2405 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN
,
2406 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2408 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN
,
2409 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2410 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN
,
2411 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2413 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN
,
2414 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT
);
2415 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN
,
2416 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT
);
2418 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN
,
2419 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT
);
2420 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN
,
2421 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT
);
2423 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN
,
2424 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT
);
2425 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN
,
2426 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT
);
2428 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN
,
2429 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT
);
2430 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN
,
2431 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT
);
2433 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN
,
2434 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT
);
2435 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN
,
2436 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT
);
2438 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN
,
2439 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT
);
2440 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN
,
2441 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT
);
2443 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN
,
2444 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT
);
2445 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN
,
2446 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT
);
2448 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN
,
2449 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT
);
2450 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN
,
2451 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT
);
2454 static void gaudi_init_hbm_cred(struct hl_device
*hdev
)
2456 u32 hbm0_wr
, hbm1_wr
, hbm0_rd
, hbm1_rd
;
2458 if (hdev
->asic_prop
.fw_security_enabled
)
2461 if (hdev
->asic_prop
.fw_bootfit_cpu_boot_dev_sts0
&
2462 CPU_BOOT_DEV_STS0_HBM_CRED_EN
)
2465 hbm0_wr
= 0x33333333;
2466 hbm0_rd
= 0x77777777;
2467 hbm1_wr
= 0x55555555;
2468 hbm1_rd
= 0xDDDDDDDD;
2470 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT
, hbm0_wr
);
2471 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT
, hbm1_wr
);
2472 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT
, hbm0_rd
);
2473 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT
, hbm1_rd
);
2475 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT
, hbm0_wr
);
2476 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT
, hbm1_wr
);
2477 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT
, hbm0_rd
);
2478 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT
, hbm1_rd
);
2480 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT
, hbm0_wr
);
2481 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT
, hbm1_wr
);
2482 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT
, hbm0_rd
);
2483 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT
, hbm1_rd
);
2485 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT
, hbm0_wr
);
2486 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT
, hbm1_wr
);
2487 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT
, hbm0_rd
);
2488 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT
, hbm1_rd
);
2490 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0
,
2491 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT
) |
2492 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT
));
2493 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0
,
2494 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT
) |
2495 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT
));
2496 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0
,
2497 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT
) |
2498 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT
));
2499 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0
,
2500 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT
) |
2501 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT
));
2503 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1
,
2504 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT
) |
2505 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT
));
2506 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1
,
2507 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT
) |
2508 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT
));
2509 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1
,
2510 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT
) |
2511 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT
));
2512 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1
,
2513 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT
) |
2514 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT
));
2517 static void gaudi_init_golden_registers(struct hl_device
*hdev
)
2522 gaudi_init_e2e(hdev
);
2523 gaudi_init_hbm_cred(hdev
);
2525 for (tpc_id
= 0, tpc_offset
= 0;
2526 tpc_id
< TPC_NUMBER_OF_ENGINES
;
2527 tpc_id
++, tpc_offset
+= TPC_CFG_OFFSET
) {
2528 /* Mask all arithmetic interrupts from TPC */
2529 WREG32(mmTPC0_CFG_TPC_INTR_MASK
+ tpc_offset
, 0x8FFE);
2530 /* Set 16 cache lines */
2531 WREG32_FIELD(TPC0_CFG_MSS_CONFIG
, tpc_offset
,
2532 ICACHE_FETCH_LINE_NUM
, 2);
2535 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2536 for (i
= 0 ; i
< 128 ; i
+= 8)
2537 writeq(0, hdev
->pcie_bar
[SRAM_BAR_ID
] + i
);
2539 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD
, 3);
2540 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD
, 3);
2541 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD
, 3);
2542 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD
, 3);
2545 static void gaudi_init_pci_dma_qman(struct hl_device
*hdev
, int dma_id
,
2546 int qman_id
, dma_addr_t qman_pq_addr
)
2548 struct cpu_dyn_regs
*dyn_regs
=
2549 &hdev
->fw_loader
.dynamic_loader
.comm_desc
.cpu_dyn_regs
;
2550 u32 mtr_base_en_lo
, mtr_base_en_hi
, mtr_base_ws_lo
, mtr_base_ws_hi
;
2551 u32 so_base_en_lo
, so_base_en_hi
, so_base_ws_lo
, so_base_ws_hi
;
2552 u32 q_off
, dma_qm_offset
;
2553 u32 dma_qm_err_cfg
, irq_handler_offset
;
2555 dma_qm_offset
= dma_id
* DMA_QMAN_OFFSET
;
2557 mtr_base_en_lo
= lower_32_bits(CFG_BASE
+
2558 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
2559 mtr_base_en_hi
= upper_32_bits(CFG_BASE
+
2560 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
2561 so_base_en_lo
= lower_32_bits(CFG_BASE
+
2562 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
);
2563 so_base_en_hi
= upper_32_bits(CFG_BASE
+
2564 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
);
2565 mtr_base_ws_lo
= lower_32_bits(CFG_BASE
+
2566 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
2567 mtr_base_ws_hi
= upper_32_bits(CFG_BASE
+
2568 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
2569 so_base_ws_lo
= lower_32_bits(CFG_BASE
+
2570 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0
);
2571 so_base_ws_hi
= upper_32_bits(CFG_BASE
+
2572 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0
);
2574 q_off
= dma_qm_offset
+ qman_id
* 4;
2576 WREG32(mmDMA0_QM_PQ_BASE_LO_0
+ q_off
, lower_32_bits(qman_pq_addr
));
2577 WREG32(mmDMA0_QM_PQ_BASE_HI_0
+ q_off
, upper_32_bits(qman_pq_addr
));
2579 WREG32(mmDMA0_QM_PQ_SIZE_0
+ q_off
, ilog2(HL_QUEUE_LENGTH
));
2580 WREG32(mmDMA0_QM_PQ_PI_0
+ q_off
, 0);
2581 WREG32(mmDMA0_QM_PQ_CI_0
+ q_off
, 0);
2583 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0
+ q_off
, QMAN_LDMA_SIZE_OFFSET
);
2584 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0
+ q_off
,
2585 QMAN_LDMA_SRC_OFFSET
);
2586 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0
+ q_off
,
2587 QMAN_LDMA_DST_OFFSET
);
2589 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0
+ q_off
, mtr_base_en_lo
);
2590 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0
+ q_off
, mtr_base_en_hi
);
2591 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0
+ q_off
, so_base_en_lo
);
2592 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0
+ q_off
, so_base_en_hi
);
2593 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0
+ q_off
, mtr_base_ws_lo
);
2594 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0
+ q_off
, mtr_base_ws_hi
);
2595 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0
+ q_off
, so_base_ws_lo
);
2596 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0
+ q_off
, so_base_ws_hi
);
2598 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0
+ q_off
, 0x100);
2600 /* The following configuration is needed only once per QMAN */
2602 irq_handler_offset
= hdev
->asic_prop
.gic_interrupts_enable
?
2603 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
:
2604 le32_to_cpu(dyn_regs
->gic_dma_qm_irq_ctrl
);
2606 /* Configure RAZWI IRQ */
2607 dma_qm_err_cfg
= PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK
;
2608 if (hdev
->stop_on_err
)
2610 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK
;
2612 WREG32(mmDMA0_QM_GLBL_ERR_CFG
+ dma_qm_offset
, dma_qm_err_cfg
);
2614 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO
+ dma_qm_offset
,
2615 lower_32_bits(CFG_BASE
+ irq_handler_offset
));
2616 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI
+ dma_qm_offset
,
2617 upper_32_bits(CFG_BASE
+ irq_handler_offset
));
2619 WREG32(mmDMA0_QM_GLBL_ERR_WDATA
+ dma_qm_offset
,
2620 gaudi_irq_map_table
[GAUDI_EVENT_DMA0_QM
].cpu_id
+
2623 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN
+ dma_qm_offset
,
2624 QM_ARB_ERR_MSG_EN_MASK
);
2626 /* Set timeout to maximum */
2627 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT
+ dma_qm_offset
, GAUDI_ARB_WDT_TIMEOUT
);
2629 WREG32(mmDMA0_QM_GLBL_PROT
+ dma_qm_offset
,
2630 QMAN_EXTERNAL_MAKE_TRUSTED
);
2632 WREG32(mmDMA0_QM_GLBL_CFG1
+ dma_qm_offset
, 0);
2636 static void gaudi_init_dma_core(struct hl_device
*hdev
, int dma_id
)
2638 struct cpu_dyn_regs
*dyn_regs
=
2639 &hdev
->fw_loader
.dynamic_loader
.comm_desc
.cpu_dyn_regs
;
2640 u32 dma_err_cfg
= 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT
;
2641 u32 dma_offset
= dma_id
* DMA_CORE_OFFSET
;
2642 u32 irq_handler_offset
;
2644 /* Set to maximum possible according to physical size */
2645 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND
+ dma_offset
, 0);
2646 WREG32(mmDMA0_CORE_RD_MAX_SIZE
+ dma_offset
, 0);
2648 /* WA for H/W bug H3-2116 */
2649 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND
+ dma_offset
, 15);
2651 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2652 if (hdev
->stop_on_err
)
2653 dma_err_cfg
|= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT
;
2655 WREG32(mmDMA0_CORE_ERR_CFG
+ dma_offset
, dma_err_cfg
);
2657 irq_handler_offset
= hdev
->asic_prop
.gic_interrupts_enable
?
2658 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
:
2659 le32_to_cpu(dyn_regs
->gic_dma_core_irq_ctrl
);
2661 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO
+ dma_offset
,
2662 lower_32_bits(CFG_BASE
+ irq_handler_offset
));
2663 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI
+ dma_offset
,
2664 upper_32_bits(CFG_BASE
+ irq_handler_offset
));
2666 WREG32(mmDMA0_CORE_ERRMSG_WDATA
+ dma_offset
,
2667 gaudi_irq_map_table
[GAUDI_EVENT_DMA0_CORE
].cpu_id
+ dma_id
);
2668 WREG32(mmDMA0_CORE_PROT
+ dma_offset
,
2669 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT
);
2670 /* If the channel is secured, it should be in MMU bypass mode */
2671 WREG32(mmDMA0_CORE_SECURE_PROPS
+ dma_offset
,
2672 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT
);
2673 WREG32(mmDMA0_CORE_CFG_0
+ dma_offset
, 1 << DMA0_CORE_CFG_0_EN_SHIFT
);
2676 static void gaudi_enable_qman(struct hl_device
*hdev
, int dma_id
,
2679 u32 dma_qm_offset
= dma_id
* DMA_QMAN_OFFSET
;
2681 WREG32(mmDMA0_QM_GLBL_CFG0
+ dma_qm_offset
, enable_mask
);
2684 static void gaudi_init_pci_dma_qmans(struct hl_device
*hdev
)
2686 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
2687 struct hl_hw_queue
*q
;
2688 int i
, j
, dma_id
, cpu_skip
, nic_skip
, cq_id
= 0, q_idx
, msi_vec
= 0;
2690 if (gaudi
->hw_cap_initialized
& HW_CAP_PCI_DMA
)
2693 for (i
= 0 ; i
< PCI_DMA_NUMBER_OF_CHNLS
; i
++) {
2694 dma_id
= gaudi_dma_assignment
[i
];
2696 * For queues after the CPU Q need to add 1 to get the correct
2697 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2698 * order to get the correct MSI register.
2702 nic_skip
= NIC_NUMBER_OF_ENGINES
;
2708 for (j
= 0 ; j
< QMAN_STREAMS
; j
++) {
2709 q_idx
= 4 * dma_id
+ j
+ cpu_skip
;
2710 q
= &hdev
->kernel_queues
[q_idx
];
2712 q
->msi_vec
= nic_skip
+ cpu_skip
+ msi_vec
++;
2713 gaudi_init_pci_dma_qman(hdev
, dma_id
, j
,
2717 gaudi_init_dma_core(hdev
, dma_id
);
2719 gaudi_enable_qman(hdev
, dma_id
, PCI_DMA_QMAN_ENABLE
);
2722 gaudi
->hw_cap_initialized
|= HW_CAP_PCI_DMA
;
2725 static void gaudi_init_hbm_dma_qman(struct hl_device
*hdev
, int dma_id
,
2726 int qman_id
, u64 qman_base_addr
)
2728 struct cpu_dyn_regs
*dyn_regs
=
2729 &hdev
->fw_loader
.dynamic_loader
.comm_desc
.cpu_dyn_regs
;
2730 u32 mtr_base_en_lo
, mtr_base_en_hi
, mtr_base_ws_lo
, mtr_base_ws_hi
;
2731 u32 so_base_en_lo
, so_base_en_hi
, so_base_ws_lo
, so_base_ws_hi
;
2732 u32 dma_qm_err_cfg
, irq_handler_offset
;
2733 u32 q_off
, dma_qm_offset
;
2735 dma_qm_offset
= dma_id
* DMA_QMAN_OFFSET
;
2737 mtr_base_en_lo
= lower_32_bits(CFG_BASE
+
2738 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
2739 mtr_base_en_hi
= upper_32_bits(CFG_BASE
+
2740 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
2741 so_base_en_lo
= lower_32_bits(CFG_BASE
+
2742 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
);
2743 so_base_en_hi
= upper_32_bits(CFG_BASE
+
2744 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
);
2745 mtr_base_ws_lo
= lower_32_bits(CFG_BASE
+
2746 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
2747 mtr_base_ws_hi
= upper_32_bits(CFG_BASE
+
2748 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
2749 so_base_ws_lo
= lower_32_bits(CFG_BASE
+
2750 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0
);
2751 so_base_ws_hi
= upper_32_bits(CFG_BASE
+
2752 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0
);
2754 q_off
= dma_qm_offset
+ qman_id
* 4;
2757 WREG32(mmDMA0_QM_PQ_BASE_LO_0
+ q_off
,
2758 lower_32_bits(qman_base_addr
));
2759 WREG32(mmDMA0_QM_PQ_BASE_HI_0
+ q_off
,
2760 upper_32_bits(qman_base_addr
));
2762 WREG32(mmDMA0_QM_PQ_SIZE_0
+ q_off
, ilog2(HBM_DMA_QMAN_LENGTH
));
2763 WREG32(mmDMA0_QM_PQ_PI_0
+ q_off
, 0);
2764 WREG32(mmDMA0_QM_PQ_CI_0
+ q_off
, 0);
2766 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0
+ q_off
,
2767 QMAN_CPDMA_SIZE_OFFSET
);
2768 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0
+ q_off
,
2769 QMAN_CPDMA_SRC_OFFSET
);
2770 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0
+ q_off
,
2771 QMAN_CPDMA_DST_OFFSET
);
2773 irq_handler_offset
= hdev
->asic_prop
.gic_interrupts_enable
?
2774 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
:
2775 le32_to_cpu(dyn_regs
->gic_dma_qm_irq_ctrl
);
2777 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0
+ q_off
,
2778 QMAN_LDMA_SIZE_OFFSET
);
2779 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0
+ q_off
,
2780 QMAN_LDMA_SRC_OFFSET
);
2781 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0
+ q_off
,
2782 QMAN_LDMA_DST_OFFSET
);
2784 /* Configure RAZWI IRQ */
2785 dma_qm_err_cfg
= HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK
;
2786 if (hdev
->stop_on_err
)
2788 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK
;
2790 WREG32(mmDMA0_QM_GLBL_ERR_CFG
+ dma_qm_offset
, dma_qm_err_cfg
);
2792 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO
+ dma_qm_offset
,
2793 lower_32_bits(CFG_BASE
+ irq_handler_offset
));
2794 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI
+ dma_qm_offset
,
2795 upper_32_bits(CFG_BASE
+ irq_handler_offset
));
2797 WREG32(mmDMA0_QM_GLBL_ERR_WDATA
+ dma_qm_offset
,
2798 gaudi_irq_map_table
[GAUDI_EVENT_DMA0_QM
].cpu_id
+
2801 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN
+ dma_qm_offset
,
2802 QM_ARB_ERR_MSG_EN_MASK
);
2804 /* Set timeout to maximum */
2805 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT
+ dma_qm_offset
, GAUDI_ARB_WDT_TIMEOUT
);
2807 WREG32(mmDMA0_QM_GLBL_CFG1
+ dma_qm_offset
, 0);
2808 WREG32(mmDMA0_QM_GLBL_PROT
+ dma_qm_offset
,
2809 QMAN_INTERNAL_MAKE_TRUSTED
);
2812 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0
+ q_off
, mtr_base_en_lo
);
2813 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0
+ q_off
, mtr_base_en_hi
);
2814 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0
+ q_off
, so_base_en_lo
);
2815 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0
+ q_off
, so_base_en_hi
);
2817 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2818 if (gaudi_dma_assignment
[dma_id
] == GAUDI_ENGINE_ID_DMA_5
) {
2819 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0
+ q_off
,
2821 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0
+ q_off
,
2823 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0
+ q_off
,
2825 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0
+ q_off
,
2830 static void gaudi_init_hbm_dma_qmans(struct hl_device
*hdev
)
2832 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
2833 struct gaudi_internal_qman_info
*q
;
2835 int i
, j
, dma_id
, internal_q_index
;
2837 if (gaudi
->hw_cap_initialized
& HW_CAP_HBM_DMA
)
2840 for (i
= 0 ; i
< HBM_DMA_NUMBER_OF_CHNLS
; i
++) {
2841 dma_id
= gaudi_dma_assignment
[GAUDI_HBM_DMA_1
+ i
];
2843 for (j
= 0 ; j
< QMAN_STREAMS
; j
++) {
2845 * Add the CPU queue in order to get the correct queue
2846 * number as all internal queue are placed after it
2848 internal_q_index
= dma_id
* QMAN_STREAMS
+ j
+ 1;
2850 q
= &gaudi
->internal_qmans
[internal_q_index
];
2851 qman_base_addr
= (u64
) q
->pq_dma_addr
;
2852 gaudi_init_hbm_dma_qman(hdev
, dma_id
, j
,
2856 /* Initializing lower CP for HBM DMA QMAN */
2857 gaudi_init_hbm_dma_qman(hdev
, dma_id
, 4, 0);
2859 gaudi_init_dma_core(hdev
, dma_id
);
2861 gaudi_enable_qman(hdev
, dma_id
, HBM_DMA_QMAN_ENABLE
);
2864 gaudi
->hw_cap_initialized
|= HW_CAP_HBM_DMA
;
2867 static void gaudi_init_mme_qman(struct hl_device
*hdev
, u32 mme_offset
,
2868 int qman_id
, u64 qman_base_addr
)
2870 struct cpu_dyn_regs
*dyn_regs
=
2871 &hdev
->fw_loader
.dynamic_loader
.comm_desc
.cpu_dyn_regs
;
2872 u32 mtr_base_lo
, mtr_base_hi
;
2873 u32 so_base_lo
, so_base_hi
;
2874 u32 irq_handler_offset
;
2878 mtr_base_lo
= lower_32_bits(CFG_BASE
+
2879 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
2880 mtr_base_hi
= upper_32_bits(CFG_BASE
+
2881 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
2882 so_base_lo
= lower_32_bits(CFG_BASE
+
2883 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
);
2884 so_base_hi
= upper_32_bits(CFG_BASE
+
2885 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
);
2887 q_off
= mme_offset
+ qman_id
* 4;
2890 WREG32(mmMME0_QM_PQ_BASE_LO_0
+ q_off
,
2891 lower_32_bits(qman_base_addr
));
2892 WREG32(mmMME0_QM_PQ_BASE_HI_0
+ q_off
,
2893 upper_32_bits(qman_base_addr
));
2895 WREG32(mmMME0_QM_PQ_SIZE_0
+ q_off
, ilog2(MME_QMAN_LENGTH
));
2896 WREG32(mmMME0_QM_PQ_PI_0
+ q_off
, 0);
2897 WREG32(mmMME0_QM_PQ_CI_0
+ q_off
, 0);
2899 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0
+ q_off
,
2900 QMAN_CPDMA_SIZE_OFFSET
);
2901 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0
+ q_off
,
2902 QMAN_CPDMA_SRC_OFFSET
);
2903 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0
+ q_off
,
2904 QMAN_CPDMA_DST_OFFSET
);
2906 irq_handler_offset
= hdev
->asic_prop
.gic_interrupts_enable
?
2907 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
:
2908 le32_to_cpu(dyn_regs
->gic_mme_qm_irq_ctrl
);
2910 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0
+ q_off
,
2911 QMAN_LDMA_SIZE_OFFSET
);
2912 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0
+ q_off
,
2913 QMAN_LDMA_SRC_OFFSET
);
2914 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0
+ q_off
,
2915 QMAN_LDMA_DST_OFFSET
);
2917 /* Configure RAZWI IRQ */
2918 mme_id
= mme_offset
/
2919 (mmMME1_QM_GLBL_CFG0
- mmMME0_QM_GLBL_CFG0
) / 2;
2921 mme_qm_err_cfg
= MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK
;
2922 if (hdev
->stop_on_err
)
2924 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK
;
2926 WREG32(mmMME0_QM_GLBL_ERR_CFG
+ mme_offset
, mme_qm_err_cfg
);
2928 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO
+ mme_offset
,
2929 lower_32_bits(CFG_BASE
+ irq_handler_offset
));
2930 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI
+ mme_offset
,
2931 upper_32_bits(CFG_BASE
+ irq_handler_offset
));
2933 WREG32(mmMME0_QM_GLBL_ERR_WDATA
+ mme_offset
,
2934 gaudi_irq_map_table
[GAUDI_EVENT_MME0_QM
].cpu_id
+
2937 WREG32(mmMME0_QM_ARB_ERR_MSG_EN
+ mme_offset
,
2938 QM_ARB_ERR_MSG_EN_MASK
);
2940 /* Set timeout to maximum */
2941 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT
+ mme_offset
, GAUDI_ARB_WDT_TIMEOUT
);
2943 WREG32(mmMME0_QM_GLBL_CFG1
+ mme_offset
, 0);
2944 WREG32(mmMME0_QM_GLBL_PROT
+ mme_offset
,
2945 QMAN_INTERNAL_MAKE_TRUSTED
);
2948 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0
+ q_off
, mtr_base_lo
);
2949 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0
+ q_off
, mtr_base_hi
);
2950 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0
+ q_off
, so_base_lo
);
2951 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0
+ q_off
, so_base_hi
);
2954 static void gaudi_init_mme_qmans(struct hl_device
*hdev
)
2956 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
2957 struct gaudi_internal_qman_info
*q
;
2960 int i
, internal_q_index
;
2962 if (gaudi
->hw_cap_initialized
& HW_CAP_MME
)
2966 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2967 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2970 mme_offset
= mmMME2_QM_GLBL_CFG0
- mmMME0_QM_GLBL_CFG0
;
2972 for (i
= 0 ; i
< MME_NUMBER_OF_QMANS
; i
++) {
2973 internal_q_index
= GAUDI_QUEUE_ID_MME_0_0
+ i
;
2974 q
= &gaudi
->internal_qmans
[internal_q_index
];
2975 qman_base_addr
= (u64
) q
->pq_dma_addr
;
2976 gaudi_init_mme_qman(hdev
, mme_offset
, (i
& 0x3),
2982 /* Initializing lower CP for MME QMANs */
2983 mme_offset
= mmMME2_QM_GLBL_CFG0
- mmMME0_QM_GLBL_CFG0
;
2984 gaudi_init_mme_qman(hdev
, mme_offset
, 4, 0);
2985 gaudi_init_mme_qman(hdev
, 0, 4, 0);
2987 WREG32(mmMME2_QM_GLBL_CFG0
, QMAN_MME_ENABLE
);
2988 WREG32(mmMME0_QM_GLBL_CFG0
, QMAN_MME_ENABLE
);
2990 gaudi
->hw_cap_initialized
|= HW_CAP_MME
;
2993 static void gaudi_init_tpc_qman(struct hl_device
*hdev
, u32 tpc_offset
,
2994 int qman_id
, u64 qman_base_addr
)
2996 struct cpu_dyn_regs
*dyn_regs
=
2997 &hdev
->fw_loader
.dynamic_loader
.comm_desc
.cpu_dyn_regs
;
2998 u32 mtr_base_en_lo
, mtr_base_en_hi
, mtr_base_ws_lo
, mtr_base_ws_hi
;
2999 u32 so_base_en_lo
, so_base_en_hi
, so_base_ws_lo
, so_base_ws_hi
;
3000 u32 tpc_qm_err_cfg
, irq_handler_offset
;
3003 mtr_base_en_lo
= lower_32_bits(CFG_BASE
+
3004 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
3005 mtr_base_en_hi
= upper_32_bits(CFG_BASE
+
3006 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
3007 so_base_en_lo
= lower_32_bits(CFG_BASE
+
3008 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
);
3009 so_base_en_hi
= upper_32_bits(CFG_BASE
+
3010 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
);
3011 mtr_base_ws_lo
= lower_32_bits(CFG_BASE
+
3012 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
3013 mtr_base_ws_hi
= upper_32_bits(CFG_BASE
+
3014 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
3015 so_base_ws_lo
= lower_32_bits(CFG_BASE
+
3016 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0
);
3017 so_base_ws_hi
= upper_32_bits(CFG_BASE
+
3018 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0
);
3020 q_off
= tpc_offset
+ qman_id
* 4;
3022 tpc_id
= tpc_offset
/
3023 (mmTPC1_QM_GLBL_CFG0
- mmTPC0_QM_GLBL_CFG0
);
3026 WREG32(mmTPC0_QM_PQ_BASE_LO_0
+ q_off
,
3027 lower_32_bits(qman_base_addr
));
3028 WREG32(mmTPC0_QM_PQ_BASE_HI_0
+ q_off
,
3029 upper_32_bits(qman_base_addr
));
3031 WREG32(mmTPC0_QM_PQ_SIZE_0
+ q_off
, ilog2(TPC_QMAN_LENGTH
));
3032 WREG32(mmTPC0_QM_PQ_PI_0
+ q_off
, 0);
3033 WREG32(mmTPC0_QM_PQ_CI_0
+ q_off
, 0);
3035 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0
+ q_off
,
3036 QMAN_CPDMA_SIZE_OFFSET
);
3037 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0
+ q_off
,
3038 QMAN_CPDMA_SRC_OFFSET
);
3039 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0
+ q_off
,
3040 QMAN_CPDMA_DST_OFFSET
);
3042 irq_handler_offset
= hdev
->asic_prop
.gic_interrupts_enable
?
3043 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
:
3044 le32_to_cpu(dyn_regs
->gic_tpc_qm_irq_ctrl
);
3046 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0
+ q_off
,
3047 QMAN_LDMA_SIZE_OFFSET
);
3048 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0
+ q_off
,
3049 QMAN_LDMA_SRC_OFFSET
);
3050 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0
+ q_off
,
3051 QMAN_LDMA_DST_OFFSET
);
3053 /* Configure RAZWI IRQ */
3054 tpc_qm_err_cfg
= TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK
;
3055 if (hdev
->stop_on_err
)
3057 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK
;
3059 WREG32(mmTPC0_QM_GLBL_ERR_CFG
+ tpc_offset
, tpc_qm_err_cfg
);
3061 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO
+ tpc_offset
,
3062 lower_32_bits(CFG_BASE
+ irq_handler_offset
));
3063 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI
+ tpc_offset
,
3064 upper_32_bits(CFG_BASE
+ irq_handler_offset
));
3066 WREG32(mmTPC0_QM_GLBL_ERR_WDATA
+ tpc_offset
,
3067 gaudi_irq_map_table
[GAUDI_EVENT_TPC0_QM
].cpu_id
+
3070 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN
+ tpc_offset
,
3071 QM_ARB_ERR_MSG_EN_MASK
);
3073 /* Set timeout to maximum */
3074 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT
+ tpc_offset
, GAUDI_ARB_WDT_TIMEOUT
);
3076 WREG32(mmTPC0_QM_GLBL_CFG1
+ tpc_offset
, 0);
3077 WREG32(mmTPC0_QM_GLBL_PROT
+ tpc_offset
,
3078 QMAN_INTERNAL_MAKE_TRUSTED
);
3081 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0
+ q_off
, mtr_base_en_lo
);
3082 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0
+ q_off
, mtr_base_en_hi
);
3083 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0
+ q_off
, so_base_en_lo
);
3084 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0
+ q_off
, so_base_en_hi
);
3086 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3088 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0
+ q_off
,
3090 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0
+ q_off
,
3092 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0
+ q_off
,
3094 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0
+ q_off
,
3099 static void gaudi_init_tpc_qmans(struct hl_device
*hdev
)
3101 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3102 struct gaudi_internal_qman_info
*q
;
3104 u32 so_base_hi
, tpc_offset
= 0;
3105 u32 tpc_delta
= mmTPC1_CFG_SM_BASE_ADDRESS_HIGH
-
3106 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH
;
3107 int i
, tpc_id
, internal_q_index
;
3109 if (gaudi
->hw_cap_initialized
& HW_CAP_TPC_MASK
)
3112 so_base_hi
= upper_32_bits(CFG_BASE
+
3113 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
);
3115 for (tpc_id
= 0 ; tpc_id
< TPC_NUMBER_OF_ENGINES
; tpc_id
++) {
3116 for (i
= 0 ; i
< QMAN_STREAMS
; i
++) {
3117 internal_q_index
= GAUDI_QUEUE_ID_TPC_0_0
+
3118 tpc_id
* QMAN_STREAMS
+ i
;
3119 q
= &gaudi
->internal_qmans
[internal_q_index
];
3120 qman_base_addr
= (u64
) q
->pq_dma_addr
;
3121 gaudi_init_tpc_qman(hdev
, tpc_offset
, i
,
3125 /* Initializing lower CP for TPC QMAN */
3126 gaudi_init_tpc_qman(hdev
, tpc_offset
, 4, 0);
3128 /* Enable the QMAN and TPC channel */
3129 WREG32(mmTPC0_QM_GLBL_CFG0
+ tpc_offset
,
3134 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH
+ tpc_id
* tpc_delta
,
3137 tpc_offset
+= mmTPC1_QM_GLBL_CFG0
- mmTPC0_QM_GLBL_CFG0
;
3139 gaudi
->hw_cap_initialized
|=
3140 FIELD_PREP(HW_CAP_TPC_MASK
, 1 << tpc_id
);
3144 static void gaudi_init_nic_qman(struct hl_device
*hdev
, u32 nic_offset
,
3145 int qman_id
, u64 qman_base_addr
, int nic_id
)
3147 struct cpu_dyn_regs
*dyn_regs
=
3148 &hdev
->fw_loader
.dynamic_loader
.comm_desc
.cpu_dyn_regs
;
3149 u32 mtr_base_en_lo
, mtr_base_en_hi
, mtr_base_ws_lo
, mtr_base_ws_hi
;
3150 u32 so_base_en_lo
, so_base_en_hi
, so_base_ws_lo
, so_base_ws_hi
;
3151 u32 nic_qm_err_cfg
, irq_handler_offset
;
3154 mtr_base_en_lo
= lower_32_bits((CFG_BASE
& U32_MAX
) +
3155 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
3156 mtr_base_en_hi
= upper_32_bits(CFG_BASE
+
3157 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
3158 so_base_en_lo
= lower_32_bits((CFG_BASE
& U32_MAX
) +
3159 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
);
3160 so_base_en_hi
= upper_32_bits(CFG_BASE
+
3161 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
);
3162 mtr_base_ws_lo
= lower_32_bits((CFG_BASE
& U32_MAX
) +
3163 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
3164 mtr_base_ws_hi
= upper_32_bits(CFG_BASE
+
3165 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
3166 so_base_ws_lo
= lower_32_bits((CFG_BASE
& U32_MAX
) +
3167 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0
);
3168 so_base_ws_hi
= upper_32_bits(CFG_BASE
+
3169 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0
);
3171 q_off
= nic_offset
+ qman_id
* 4;
3173 WREG32(mmNIC0_QM0_PQ_BASE_LO_0
+ q_off
, lower_32_bits(qman_base_addr
));
3174 WREG32(mmNIC0_QM0_PQ_BASE_HI_0
+ q_off
, upper_32_bits(qman_base_addr
));
3176 WREG32(mmNIC0_QM0_PQ_SIZE_0
+ q_off
, ilog2(NIC_QMAN_LENGTH
));
3177 WREG32(mmNIC0_QM0_PQ_PI_0
+ q_off
, 0);
3178 WREG32(mmNIC0_QM0_PQ_CI_0
+ q_off
, 0);
3180 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0
+ q_off
,
3181 QMAN_LDMA_SIZE_OFFSET
);
3182 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0
+ q_off
,
3183 QMAN_LDMA_SRC_OFFSET
);
3184 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0
+ q_off
,
3185 QMAN_LDMA_DST_OFFSET
);
3187 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0
+ q_off
, mtr_base_en_lo
);
3188 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0
+ q_off
, mtr_base_en_hi
);
3189 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0
+ q_off
, so_base_en_lo
);
3190 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0
+ q_off
, so_base_en_hi
);
3192 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3193 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0
+ q_off
, mtr_base_ws_lo
);
3194 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0
+ q_off
, mtr_base_ws_hi
);
3195 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0
+ q_off
, so_base_ws_lo
);
3196 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0
+ q_off
, so_base_ws_hi
);
3199 irq_handler_offset
= hdev
->asic_prop
.gic_interrupts_enable
?
3200 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
:
3201 le32_to_cpu(dyn_regs
->gic_nic_qm_irq_ctrl
);
3203 /* Configure RAZWI IRQ */
3204 nic_qm_err_cfg
= NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK
;
3205 if (hdev
->stop_on_err
)
3207 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK
;
3209 WREG32(mmNIC0_QM0_GLBL_ERR_CFG
+ nic_offset
, nic_qm_err_cfg
);
3211 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO
+ nic_offset
,
3212 lower_32_bits(CFG_BASE
+ irq_handler_offset
));
3213 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI
+ nic_offset
,
3214 upper_32_bits(CFG_BASE
+ irq_handler_offset
));
3216 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA
+ nic_offset
,
3217 gaudi_irq_map_table
[GAUDI_EVENT_NIC0_QM0
].cpu_id
+
3220 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN
+ nic_offset
,
3221 QM_ARB_ERR_MSG_EN_MASK
);
3223 /* Set timeout to maximum */
3224 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT
+ nic_offset
, GAUDI_ARB_WDT_TIMEOUT
);
3226 WREG32(mmNIC0_QM0_GLBL_CFG1
+ nic_offset
, 0);
3227 WREG32(mmNIC0_QM0_GLBL_PROT
+ nic_offset
,
3228 QMAN_INTERNAL_MAKE_TRUSTED
);
3232 static void gaudi_init_nic_qmans(struct hl_device
*hdev
)
3234 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3235 struct gaudi_internal_qman_info
*q
;
3238 u32 nic_delta_between_qmans
=
3239 mmNIC0_QM1_GLBL_CFG0
- mmNIC0_QM0_GLBL_CFG0
;
3240 u32 nic_delta_between_nics
=
3241 mmNIC1_QM0_GLBL_CFG0
- mmNIC0_QM0_GLBL_CFG0
;
3242 int i
, nic_id
, internal_q_index
;
3244 if (!hdev
->nic_ports_mask
)
3247 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC_MASK
)
3250 dev_dbg(hdev
->dev
, "Initializing NIC QMANs\n");
3252 for (nic_id
= 0 ; nic_id
< NIC_NUMBER_OF_ENGINES
; nic_id
++) {
3253 if (!(hdev
->nic_ports_mask
& (1 << nic_id
))) {
3254 nic_offset
+= nic_delta_between_qmans
;
3256 nic_offset
-= (nic_delta_between_qmans
* 2);
3257 nic_offset
+= nic_delta_between_nics
;
3262 for (i
= 0 ; i
< QMAN_STREAMS
; i
++) {
3263 internal_q_index
= GAUDI_QUEUE_ID_NIC_0_0
+
3264 nic_id
* QMAN_STREAMS
+ i
;
3265 q
= &gaudi
->internal_qmans
[internal_q_index
];
3266 qman_base_addr
= (u64
) q
->pq_dma_addr
;
3267 gaudi_init_nic_qman(hdev
, nic_offset
, (i
& 0x3),
3268 qman_base_addr
, nic_id
);
3271 /* Enable the QMAN */
3272 WREG32(mmNIC0_QM0_GLBL_CFG0
+ nic_offset
, NIC_QMAN_ENABLE
);
3274 nic_offset
+= nic_delta_between_qmans
;
3276 nic_offset
-= (nic_delta_between_qmans
* 2);
3277 nic_offset
+= nic_delta_between_nics
;
3280 gaudi
->hw_cap_initialized
|= 1 << (HW_CAP_NIC_SHIFT
+ nic_id
);
3284 static void gaudi_disable_pci_dma_qmans(struct hl_device
*hdev
)
3286 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3288 if (!(gaudi
->hw_cap_initialized
& HW_CAP_PCI_DMA
))
3291 WREG32(mmDMA0_QM_GLBL_CFG0
, 0);
3292 WREG32(mmDMA1_QM_GLBL_CFG0
, 0);
3293 WREG32(mmDMA5_QM_GLBL_CFG0
, 0);
3296 static void gaudi_disable_hbm_dma_qmans(struct hl_device
*hdev
)
3298 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3300 if (!(gaudi
->hw_cap_initialized
& HW_CAP_HBM_DMA
))
3303 WREG32(mmDMA2_QM_GLBL_CFG0
, 0);
3304 WREG32(mmDMA3_QM_GLBL_CFG0
, 0);
3305 WREG32(mmDMA4_QM_GLBL_CFG0
, 0);
3306 WREG32(mmDMA6_QM_GLBL_CFG0
, 0);
3307 WREG32(mmDMA7_QM_GLBL_CFG0
, 0);
3310 static void gaudi_disable_mme_qmans(struct hl_device
*hdev
)
3312 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3314 if (!(gaudi
->hw_cap_initialized
& HW_CAP_MME
))
3317 WREG32(mmMME2_QM_GLBL_CFG0
, 0);
3318 WREG32(mmMME0_QM_GLBL_CFG0
, 0);
3321 static void gaudi_disable_tpc_qmans(struct hl_device
*hdev
)
3323 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3327 if (!(gaudi
->hw_cap_initialized
& HW_CAP_TPC_MASK
))
3330 for (tpc_id
= 0 ; tpc_id
< TPC_NUMBER_OF_ENGINES
; tpc_id
++) {
3331 WREG32(mmTPC0_QM_GLBL_CFG0
+ tpc_offset
, 0);
3332 tpc_offset
+= mmTPC1_QM_GLBL_CFG0
- mmTPC0_QM_GLBL_CFG0
;
3336 static void gaudi_disable_nic_qmans(struct hl_device
*hdev
)
3338 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3339 u32 nic_mask
, nic_offset
= 0;
3340 u32 nic_delta_between_qmans
=
3341 mmNIC0_QM1_GLBL_CFG0
- mmNIC0_QM0_GLBL_CFG0
;
3342 u32 nic_delta_between_nics
=
3343 mmNIC1_QM0_GLBL_CFG0
- mmNIC0_QM0_GLBL_CFG0
;
3346 for (nic_id
= 0 ; nic_id
< NIC_NUMBER_OF_ENGINES
; nic_id
++) {
3347 nic_mask
= 1 << (HW_CAP_NIC_SHIFT
+ nic_id
);
3349 if (gaudi
->hw_cap_initialized
& nic_mask
)
3350 WREG32(mmNIC0_QM0_GLBL_CFG0
+ nic_offset
, 0);
3352 nic_offset
+= nic_delta_between_qmans
;
3354 nic_offset
-= (nic_delta_between_qmans
* 2);
3355 nic_offset
+= nic_delta_between_nics
;
3360 static void gaudi_stop_pci_dma_qmans(struct hl_device
*hdev
)
3362 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3364 if (!(gaudi
->hw_cap_initialized
& HW_CAP_PCI_DMA
))
3367 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3368 WREG32(mmDMA0_QM_GLBL_CFG1
, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3369 WREG32(mmDMA1_QM_GLBL_CFG1
, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3370 WREG32(mmDMA5_QM_GLBL_CFG1
, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3373 static void gaudi_stop_hbm_dma_qmans(struct hl_device
*hdev
)
3375 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3377 if (!(gaudi
->hw_cap_initialized
& HW_CAP_HBM_DMA
))
3380 /* Stop CPs of HBM DMA QMANs */
3382 WREG32(mmDMA2_QM_GLBL_CFG1
, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3383 WREG32(mmDMA3_QM_GLBL_CFG1
, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3384 WREG32(mmDMA4_QM_GLBL_CFG1
, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3385 WREG32(mmDMA6_QM_GLBL_CFG1
, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3386 WREG32(mmDMA7_QM_GLBL_CFG1
, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3389 static void gaudi_stop_mme_qmans(struct hl_device
*hdev
)
3391 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3393 if (!(gaudi
->hw_cap_initialized
& HW_CAP_MME
))
3396 /* Stop CPs of MME QMANs */
3397 WREG32(mmMME2_QM_GLBL_CFG1
, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3398 WREG32(mmMME0_QM_GLBL_CFG1
, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3401 static void gaudi_stop_tpc_qmans(struct hl_device
*hdev
)
3403 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3405 if (!(gaudi
->hw_cap_initialized
& HW_CAP_TPC_MASK
))
3408 WREG32(mmTPC0_QM_GLBL_CFG1
, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3409 WREG32(mmTPC1_QM_GLBL_CFG1
, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3410 WREG32(mmTPC2_QM_GLBL_CFG1
, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3411 WREG32(mmTPC3_QM_GLBL_CFG1
, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3412 WREG32(mmTPC4_QM_GLBL_CFG1
, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3413 WREG32(mmTPC5_QM_GLBL_CFG1
, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3414 WREG32(mmTPC6_QM_GLBL_CFG1
, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3415 WREG32(mmTPC7_QM_GLBL_CFG1
, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3418 static void gaudi_stop_nic_qmans(struct hl_device
*hdev
)
3420 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3422 /* Stop upper CPs of QMANs */
3424 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC0
)
3425 WREG32(mmNIC0_QM0_GLBL_CFG1
,
3426 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK
|
3427 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK
|
3428 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK
);
3430 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC1
)
3431 WREG32(mmNIC0_QM1_GLBL_CFG1
,
3432 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK
|
3433 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK
|
3434 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK
);
3436 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC2
)
3437 WREG32(mmNIC1_QM0_GLBL_CFG1
,
3438 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK
|
3439 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK
|
3440 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK
);
3442 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC3
)
3443 WREG32(mmNIC1_QM1_GLBL_CFG1
,
3444 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK
|
3445 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK
|
3446 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK
);
3448 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC4
)
3449 WREG32(mmNIC2_QM0_GLBL_CFG1
,
3450 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK
|
3451 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK
|
3452 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK
);
3454 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC5
)
3455 WREG32(mmNIC2_QM1_GLBL_CFG1
,
3456 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK
|
3457 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK
|
3458 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK
);
3460 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC6
)
3461 WREG32(mmNIC3_QM0_GLBL_CFG1
,
3462 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK
|
3463 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK
|
3464 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK
);
3466 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC7
)
3467 WREG32(mmNIC3_QM1_GLBL_CFG1
,
3468 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK
|
3469 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK
|
3470 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK
);
3472 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC8
)
3473 WREG32(mmNIC4_QM0_GLBL_CFG1
,
3474 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK
|
3475 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK
|
3476 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK
);
3478 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC9
)
3479 WREG32(mmNIC4_QM1_GLBL_CFG1
,
3480 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK
|
3481 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK
|
3482 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK
);
3485 static void gaudi_pci_dma_stall(struct hl_device
*hdev
)
3487 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3489 if (!(gaudi
->hw_cap_initialized
& HW_CAP_PCI_DMA
))
3492 WREG32(mmDMA0_CORE_CFG_1
, 1 << DMA0_CORE_CFG_1_HALT_SHIFT
);
3493 WREG32(mmDMA1_CORE_CFG_1
, 1 << DMA0_CORE_CFG_1_HALT_SHIFT
);
3494 WREG32(mmDMA5_CORE_CFG_1
, 1 << DMA0_CORE_CFG_1_HALT_SHIFT
);
3497 static void gaudi_hbm_dma_stall(struct hl_device
*hdev
)
3499 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3501 if (!(gaudi
->hw_cap_initialized
& HW_CAP_HBM_DMA
))
3504 WREG32(mmDMA2_CORE_CFG_1
, 1 << DMA0_CORE_CFG_1_HALT_SHIFT
);
3505 WREG32(mmDMA3_CORE_CFG_1
, 1 << DMA0_CORE_CFG_1_HALT_SHIFT
);
3506 WREG32(mmDMA4_CORE_CFG_1
, 1 << DMA0_CORE_CFG_1_HALT_SHIFT
);
3507 WREG32(mmDMA6_CORE_CFG_1
, 1 << DMA0_CORE_CFG_1_HALT_SHIFT
);
3508 WREG32(mmDMA7_CORE_CFG_1
, 1 << DMA0_CORE_CFG_1_HALT_SHIFT
);
3511 static void gaudi_mme_stall(struct hl_device
*hdev
)
3513 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3515 if (!(gaudi
->hw_cap_initialized
& HW_CAP_MME
))
3518 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3519 WREG32(mmMME0_ACC_ACC_STALL
, 1 << MME_ACC_ACC_STALL_R_SHIFT
);
3520 WREG32(mmMME0_ACC_ACC_STALL
, 1 << MME_ACC_ACC_STALL_R_SHIFT
);
3521 WREG32(mmMME0_SBAB_SB_STALL
, 1 << MME_SBAB_SB_STALL_R_SHIFT
);
3522 WREG32(mmMME0_SBAB_SB_STALL
, 1 << MME_SBAB_SB_STALL_R_SHIFT
);
3523 WREG32(mmMME1_ACC_ACC_STALL
, 1 << MME_ACC_ACC_STALL_R_SHIFT
);
3524 WREG32(mmMME1_ACC_ACC_STALL
, 1 << MME_ACC_ACC_STALL_R_SHIFT
);
3525 WREG32(mmMME1_SBAB_SB_STALL
, 1 << MME_SBAB_SB_STALL_R_SHIFT
);
3526 WREG32(mmMME1_SBAB_SB_STALL
, 1 << MME_SBAB_SB_STALL_R_SHIFT
);
3527 WREG32(mmMME2_ACC_ACC_STALL
, 1 << MME_ACC_ACC_STALL_R_SHIFT
);
3528 WREG32(mmMME2_ACC_ACC_STALL
, 1 << MME_ACC_ACC_STALL_R_SHIFT
);
3529 WREG32(mmMME2_SBAB_SB_STALL
, 1 << MME_SBAB_SB_STALL_R_SHIFT
);
3530 WREG32(mmMME2_SBAB_SB_STALL
, 1 << MME_SBAB_SB_STALL_R_SHIFT
);
3531 WREG32(mmMME3_ACC_ACC_STALL
, 1 << MME_ACC_ACC_STALL_R_SHIFT
);
3532 WREG32(mmMME3_ACC_ACC_STALL
, 1 << MME_ACC_ACC_STALL_R_SHIFT
);
3533 WREG32(mmMME3_SBAB_SB_STALL
, 1 << MME_SBAB_SB_STALL_R_SHIFT
);
3534 WREG32(mmMME3_SBAB_SB_STALL
, 1 << MME_SBAB_SB_STALL_R_SHIFT
);
3537 static void gaudi_tpc_stall(struct hl_device
*hdev
)
3539 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3541 if (!(gaudi
->hw_cap_initialized
& HW_CAP_TPC_MASK
))
3544 WREG32(mmTPC0_CFG_TPC_STALL
, 1 << TPC0_CFG_TPC_STALL_V_SHIFT
);
3545 WREG32(mmTPC1_CFG_TPC_STALL
, 1 << TPC0_CFG_TPC_STALL_V_SHIFT
);
3546 WREG32(mmTPC2_CFG_TPC_STALL
, 1 << TPC0_CFG_TPC_STALL_V_SHIFT
);
3547 WREG32(mmTPC3_CFG_TPC_STALL
, 1 << TPC0_CFG_TPC_STALL_V_SHIFT
);
3548 WREG32(mmTPC4_CFG_TPC_STALL
, 1 << TPC0_CFG_TPC_STALL_V_SHIFT
);
3549 WREG32(mmTPC5_CFG_TPC_STALL
, 1 << TPC0_CFG_TPC_STALL_V_SHIFT
);
3550 WREG32(mmTPC6_CFG_TPC_STALL
, 1 << TPC0_CFG_TPC_STALL_V_SHIFT
);
3551 WREG32(mmTPC7_CFG_TPC_STALL
, 1 << TPC0_CFG_TPC_STALL_V_SHIFT
);
3554 static void gaudi_disable_clock_gating(struct hl_device
*hdev
)
3559 if (hdev
->asic_prop
.fw_security_enabled
)
3562 for (i
= 0, qman_offset
= 0 ; i
< DMA_NUMBER_OF_CHANNELS
; i
++) {
3563 WREG32(mmDMA0_QM_CGM_CFG
+ qman_offset
, 0);
3564 WREG32(mmDMA0_QM_CGM_CFG1
+ qman_offset
, 0);
3566 qman_offset
+= (mmDMA1_QM_CGM_CFG
- mmDMA0_QM_CGM_CFG
);
3569 WREG32(mmMME0_QM_CGM_CFG
, 0);
3570 WREG32(mmMME0_QM_CGM_CFG1
, 0);
3571 WREG32(mmMME2_QM_CGM_CFG
, 0);
3572 WREG32(mmMME2_QM_CGM_CFG1
, 0);
3574 for (i
= 0, qman_offset
= 0 ; i
< TPC_NUMBER_OF_ENGINES
; i
++) {
3575 WREG32(mmTPC0_QM_CGM_CFG
+ qman_offset
, 0);
3576 WREG32(mmTPC0_QM_CGM_CFG1
+ qman_offset
, 0);
3578 qman_offset
+= (mmTPC1_QM_CGM_CFG
- mmTPC0_QM_CGM_CFG
);
3582 static void gaudi_enable_timestamp(struct hl_device
*hdev
)
3584 /* Disable the timestamp counter */
3585 WREG32(mmPSOC_TIMESTAMP_BASE
- CFG_BASE
, 0);
3587 /* Zero the lower/upper parts of the 64-bit counter */
3588 WREG32(mmPSOC_TIMESTAMP_BASE
- CFG_BASE
+ 0xC, 0);
3589 WREG32(mmPSOC_TIMESTAMP_BASE
- CFG_BASE
+ 0x8, 0);
3591 /* Enable the counter */
3592 WREG32(mmPSOC_TIMESTAMP_BASE
- CFG_BASE
, 1);
3595 static void gaudi_disable_timestamp(struct hl_device
*hdev
)
3597 /* Disable the timestamp counter */
3598 WREG32(mmPSOC_TIMESTAMP_BASE
- CFG_BASE
, 0);
3601 static void gaudi_halt_engines(struct hl_device
*hdev
, bool hard_reset
, bool fw_reset
)
3603 u32 wait_timeout_ms
;
3606 wait_timeout_ms
= GAUDI_PLDM_RESET_WAIT_MSEC
;
3608 wait_timeout_ms
= GAUDI_RESET_WAIT_MSEC
;
3613 gaudi_stop_nic_qmans(hdev
);
3614 gaudi_stop_mme_qmans(hdev
);
3615 gaudi_stop_tpc_qmans(hdev
);
3616 gaudi_stop_hbm_dma_qmans(hdev
);
3617 gaudi_stop_pci_dma_qmans(hdev
);
3619 msleep(wait_timeout_ms
);
3621 gaudi_pci_dma_stall(hdev
);
3622 gaudi_hbm_dma_stall(hdev
);
3623 gaudi_tpc_stall(hdev
);
3624 gaudi_mme_stall(hdev
);
3626 msleep(wait_timeout_ms
);
3628 gaudi_disable_nic_qmans(hdev
);
3629 gaudi_disable_mme_qmans(hdev
);
3630 gaudi_disable_tpc_qmans(hdev
);
3631 gaudi_disable_hbm_dma_qmans(hdev
);
3632 gaudi_disable_pci_dma_qmans(hdev
);
3634 gaudi_disable_timestamp(hdev
);
3637 gaudi_disable_msi(hdev
);
3640 static int gaudi_mmu_init(struct hl_device
*hdev
)
3642 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
3643 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3647 if (gaudi
->hw_cap_initialized
& HW_CAP_MMU
)
3650 for (i
= 0 ; i
< prop
->max_asid
; i
++) {
3651 hop0_addr
= prop
->mmu_pgt_addr
+
3652 (i
* prop
->dmmu
.hop_table_size
);
3654 rc
= gaudi_mmu_update_asid_hop0_addr(hdev
, i
, hop0_addr
);
3657 "failed to set hop0 addr for asid %d\n", i
);
3662 /* init MMU cache manage page */
3663 WREG32(mmSTLB_CACHE_INV_BASE_39_8
, prop
->mmu_cache_mng_addr
>> 8);
3664 WREG32(mmSTLB_CACHE_INV_BASE_49_40
, prop
->mmu_cache_mng_addr
>> 40);
3666 /* mem cache invalidation */
3667 WREG32(mmSTLB_MEM_CACHE_INVALIDATION
, 1);
3669 rc
= hl_mmu_invalidate_cache(hdev
, true, 0);
3673 WREG32(mmMMU_UP_MMU_ENABLE
, 1);
3674 WREG32(mmMMU_UP_SPI_MASK
, 0xF);
3676 WREG32(mmSTLB_HOP_CONFIGURATION
, 0x30440);
3679 * The H/W expects the first PI after init to be 1. After wraparound
3682 gaudi
->mmu_cache_inv_pi
= 1;
3684 gaudi
->hw_cap_initialized
|= HW_CAP_MMU
;
3689 static int gaudi_load_firmware_to_device(struct hl_device
*hdev
)
3693 dst
= hdev
->pcie_bar
[HBM_BAR_ID
] + LINUX_FW_OFFSET
;
3695 return hl_fw_load_fw_to_device(hdev
, GAUDI_LINUX_FW_FILE
, dst
, 0, 0);
3698 static int gaudi_load_boot_fit_to_device(struct hl_device
*hdev
)
3702 dst
= hdev
->pcie_bar
[SRAM_BAR_ID
] + BOOT_FIT_SRAM_OFFSET
;
3704 return hl_fw_load_fw_to_device(hdev
, GAUDI_BOOT_FIT_FILE
, dst
, 0, 0);
3707 static void gaudi_init_dynamic_firmware_loader(struct hl_device
*hdev
)
3709 struct dynamic_fw_load_mgr
*dynamic_loader
;
3710 struct cpu_dyn_regs
*dyn_regs
;
3712 dynamic_loader
= &hdev
->fw_loader
.dynamic_loader
;
3715 * here we update initial values for few specific dynamic regs (as
3716 * before reading the first descriptor from FW those value has to be
3717 * hard-coded) in later stages of the protocol those values will be
3718 * updated automatically by reading the FW descriptor so data there
3719 * will always be up-to-date
3721 dyn_regs
= &dynamic_loader
->comm_desc
.cpu_dyn_regs
;
3722 dyn_regs
->kmd_msg_to_cpu
=
3723 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU
);
3724 dyn_regs
->cpu_cmd_status_to_host
=
3725 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST
);
3727 dynamic_loader
->wait_for_bl_timeout
= GAUDI_WAIT_FOR_BL_TIMEOUT_USEC
;
3730 static void gaudi_init_static_firmware_loader(struct hl_device
*hdev
)
3732 struct static_fw_load_mgr
*static_loader
;
3734 static_loader
= &hdev
->fw_loader
.static_loader
;
3736 static_loader
->preboot_version_max_off
= SRAM_SIZE
- VERSION_MAX_LEN
;
3737 static_loader
->boot_fit_version_max_off
= SRAM_SIZE
- VERSION_MAX_LEN
;
3738 static_loader
->kmd_msg_to_cpu_reg
= mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU
;
3739 static_loader
->cpu_cmd_status_to_host_reg
= mmCPU_CMD_STATUS_TO_HOST
;
3740 static_loader
->cpu_boot_status_reg
= mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS
;
3741 static_loader
->cpu_boot_dev_status0_reg
= mmCPU_BOOT_DEV_STS0
;
3742 static_loader
->cpu_boot_dev_status1_reg
= mmCPU_BOOT_DEV_STS1
;
3743 static_loader
->boot_err0_reg
= mmCPU_BOOT_ERR0
;
3744 static_loader
->boot_err1_reg
= mmCPU_BOOT_ERR1
;
3745 static_loader
->preboot_version_offset_reg
= mmPREBOOT_VER_OFFSET
;
3746 static_loader
->boot_fit_version_offset_reg
= mmUBOOT_VER_OFFSET
;
3747 static_loader
->sram_offset_mask
= ~(lower_32_bits(SRAM_BASE_ADDR
));
3748 static_loader
->cpu_reset_wait_msec
= hdev
->pldm
?
3749 GAUDI_PLDM_RESET_WAIT_MSEC
:
3750 GAUDI_CPU_RESET_WAIT_MSEC
;
3753 static void gaudi_init_firmware_preload_params(struct hl_device
*hdev
)
3755 struct pre_fw_load_props
*pre_fw_load
= &hdev
->fw_loader
.pre_fw_load
;
3757 pre_fw_load
->cpu_boot_status_reg
= mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS
;
3758 pre_fw_load
->sts_boot_dev_sts0_reg
= mmCPU_BOOT_DEV_STS0
;
3759 pre_fw_load
->sts_boot_dev_sts1_reg
= mmCPU_BOOT_DEV_STS1
;
3760 pre_fw_load
->boot_err0_reg
= mmCPU_BOOT_ERR0
;
3761 pre_fw_load
->boot_err1_reg
= mmCPU_BOOT_ERR1
;
3762 pre_fw_load
->wait_for_preboot_timeout
= GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC
;
3765 static void gaudi_init_firmware_loader(struct hl_device
*hdev
)
3767 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
3768 struct fw_load_mgr
*fw_loader
= &hdev
->fw_loader
;
3770 /* fill common fields */
3771 fw_loader
->fw_comp_loaded
= FW_TYPE_NONE
;
3772 fw_loader
->boot_fit_img
.image_name
= GAUDI_BOOT_FIT_FILE
;
3773 fw_loader
->linux_img
.image_name
= GAUDI_LINUX_FW_FILE
;
3774 fw_loader
->cpu_timeout
= GAUDI_CPU_TIMEOUT_USEC
;
3775 fw_loader
->boot_fit_timeout
= GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC
;
3776 fw_loader
->skip_bmc
= !hdev
->bmc_enable
;
3777 fw_loader
->sram_bar_id
= SRAM_BAR_ID
;
3778 fw_loader
->dram_bar_id
= HBM_BAR_ID
;
3780 if (prop
->dynamic_fw_load
)
3781 gaudi_init_dynamic_firmware_loader(hdev
);
3783 gaudi_init_static_firmware_loader(hdev
);
3786 static int gaudi_init_cpu(struct hl_device
*hdev
)
3788 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3791 if (!(hdev
->fw_components
& FW_TYPE_PREBOOT_CPU
))
3794 if (gaudi
->hw_cap_initialized
& HW_CAP_CPU
)
3798 * The device CPU works with 40 bits addresses.
3799 * This register sets the extension to 50 bits.
3801 if (!hdev
->asic_prop
.fw_security_enabled
)
3802 WREG32(mmCPU_IF_CPU_MSB_ADDR
, hdev
->cpu_pci_msb_addr
);
3804 rc
= hl_fw_init_cpu(hdev
);
3809 gaudi
->hw_cap_initialized
|= HW_CAP_CPU
;
3814 static int gaudi_init_cpu_queues(struct hl_device
*hdev
, u32 cpu_timeout
)
3816 struct cpu_dyn_regs
*dyn_regs
=
3817 &hdev
->fw_loader
.dynamic_loader
.comm_desc
.cpu_dyn_regs
;
3818 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
3819 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3820 u32 status
, irq_handler_offset
;
3822 struct hl_hw_queue
*cpu_pq
=
3823 &hdev
->kernel_queues
[GAUDI_QUEUE_ID_CPU_PQ
];
3826 if (!hdev
->cpu_queues_enable
)
3829 if (gaudi
->hw_cap_initialized
& HW_CAP_CPU_Q
)
3832 eq
= &hdev
->event_queue
;
3834 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW
, lower_32_bits(cpu_pq
->bus_address
));
3835 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH
, upper_32_bits(cpu_pq
->bus_address
));
3837 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW
, lower_32_bits(eq
->bus_address
));
3838 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH
, upper_32_bits(eq
->bus_address
));
3840 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW
,
3841 lower_32_bits(hdev
->cpu_accessible_dma_address
));
3842 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH
,
3843 upper_32_bits(hdev
->cpu_accessible_dma_address
));
3845 WREG32(mmCPU_IF_PQ_LENGTH
, HL_QUEUE_SIZE_IN_BYTES
);
3846 WREG32(mmCPU_IF_EQ_LENGTH
, HL_EQ_SIZE_IN_BYTES
);
3847 WREG32(mmCPU_IF_CQ_LENGTH
, HL_CPU_ACCESSIBLE_MEM_SIZE
);
3849 /* Used for EQ CI */
3850 WREG32(mmCPU_IF_EQ_RD_OFFS
, 0);
3852 WREG32(mmCPU_IF_PF_PQ_PI
, 0);
3854 WREG32(mmCPU_IF_QUEUE_INIT
, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI
);
3856 irq_handler_offset
= prop
->gic_interrupts_enable
?
3857 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
:
3858 le32_to_cpu(dyn_regs
->gic_host_pi_upd_irq
);
3860 WREG32(irq_handler_offset
,
3861 gaudi_irq_map_table
[GAUDI_EVENT_PI_UPDATE
].cpu_id
);
3863 err
= hl_poll_timeout(
3865 mmCPU_IF_QUEUE_INIT
,
3867 (status
== PQ_INIT_STATUS_READY_FOR_HOST
),
3873 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
3877 /* update FW application security bits */
3878 if (prop
->fw_cpu_boot_dev_sts0_valid
)
3879 prop
->fw_app_cpu_boot_dev_sts0
= RREG32(mmCPU_BOOT_DEV_STS0
);
3880 if (prop
->fw_cpu_boot_dev_sts1_valid
)
3881 prop
->fw_app_cpu_boot_dev_sts1
= RREG32(mmCPU_BOOT_DEV_STS1
);
3883 gaudi
->hw_cap_initialized
|= HW_CAP_CPU_Q
;
3887 static void gaudi_pre_hw_init(struct hl_device
*hdev
)
3889 /* Perform read from the device to make sure device is up */
3892 if (!hdev
->asic_prop
.fw_security_enabled
) {
3893 /* Set the access through PCI bars (Linux driver only) as
3896 WREG32(mmPCIE_WRAP_LBW_PROT_OVR
,
3897 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK
|
3898 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK
));
3900 /* Perform read to flush the waiting writes to ensure
3901 * configuration was set in the device
3903 RREG32(mmPCIE_WRAP_LBW_PROT_OVR
);
3907 * Let's mark in the H/W that we have reached this point. We check
3908 * this value in the reset_before_init function to understand whether
3909 * we need to reset the chip before doing H/W init. This register is
3910 * cleared by the H/W upon H/W reset
3912 WREG32(mmHW_STATE
, HL_DEVICE_HW_STATE_DIRTY
);
3915 static int gaudi_hw_init(struct hl_device
*hdev
)
3917 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3920 gaudi_pre_hw_init(hdev
);
3922 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3923 * So we set it here and if anyone tries to move it later to
3924 * a different address, there will be an error
3926 if (hdev
->asic_prop
.iatu_done_by_fw
)
3927 gaudi
->hbm_bar_cur_addr
= DRAM_PHYS_BASE
;
3930 * Before pushing u-boot/linux to device, need to set the hbm bar to
3931 * base address of dram
3933 if (gaudi_set_hbm_bar_base(hdev
, DRAM_PHYS_BASE
) == U64_MAX
) {
3935 "failed to map HBM bar to DRAM base address\n");
3939 rc
= gaudi_init_cpu(hdev
);
3941 dev_err(hdev
->dev
, "failed to initialize CPU\n");
3945 /* In case the clock gating was enabled in preboot we need to disable
3946 * it here before touching the MME/TPC registers.
3948 gaudi_disable_clock_gating(hdev
);
3950 /* SRAM scrambler must be initialized after CPU is running from HBM */
3951 gaudi_init_scrambler_sram(hdev
);
3953 /* This is here just in case we are working without CPU */
3954 gaudi_init_scrambler_hbm(hdev
);
3956 gaudi_init_golden_registers(hdev
);
3958 rc
= gaudi_mmu_init(hdev
);
3962 gaudi_init_security(hdev
);
3964 gaudi_init_pci_dma_qmans(hdev
);
3966 gaudi_init_hbm_dma_qmans(hdev
);
3968 gaudi_init_mme_qmans(hdev
);
3970 gaudi_init_tpc_qmans(hdev
);
3972 gaudi_init_nic_qmans(hdev
);
3974 gaudi_enable_timestamp(hdev
);
3976 /* MSI must be enabled before CPU queues and NIC are initialized */
3977 rc
= gaudi_enable_msi(hdev
);
3979 goto disable_queues
;
3981 /* must be called after MSI was enabled */
3982 rc
= gaudi_init_cpu_queues(hdev
, GAUDI_CPU_TIMEOUT_USEC
);
3984 dev_err(hdev
->dev
, "failed to initialize CPU H/W queues %d\n",
3989 /* Perform read from the device to flush all configuration */
3995 gaudi_disable_msi(hdev
);
3997 gaudi_disable_mme_qmans(hdev
);
3998 gaudi_disable_pci_dma_qmans(hdev
);
4003 static int gaudi_hw_fini(struct hl_device
*hdev
, bool hard_reset
, bool fw_reset
)
4005 struct cpu_dyn_regs
*dyn_regs
=
4006 &hdev
->fw_loader
.dynamic_loader
.comm_desc
.cpu_dyn_regs
;
4007 u32 status
, reset_timeout_ms
, cpu_timeout_ms
, irq_handler_offset
;
4008 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
4009 bool driver_performs_reset
;
4012 dev_err(hdev
->dev
, "GAUDI doesn't support soft-reset\n");
4017 reset_timeout_ms
= GAUDI_PLDM_HRESET_TIMEOUT_MSEC
;
4018 cpu_timeout_ms
= GAUDI_PLDM_RESET_WAIT_MSEC
;
4020 reset_timeout_ms
= GAUDI_RESET_TIMEOUT_MSEC
;
4021 cpu_timeout_ms
= GAUDI_CPU_RESET_WAIT_MSEC
;
4026 "Firmware performs HARD reset, going to wait %dms\n",
4032 driver_performs_reset
= !!(!hdev
->asic_prop
.fw_security_enabled
&&
4033 !hdev
->asic_prop
.hard_reset_done_by_fw
);
4035 /* Set device to handle FLR by H/W as we will put the device CPU to
4038 if (driver_performs_reset
)
4039 WREG32(mmPCIE_AUX_FLR_CTRL
, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK
|
4040 PCIE_AUX_FLR_CTRL_INT_MASK_MASK
));
4042 /* If linux is loaded in the device CPU we need to communicate with it
4043 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4044 * registers in case of old F/Ws
4046 if (hdev
->fw_loader
.fw_comp_loaded
& FW_TYPE_LINUX
) {
4047 irq_handler_offset
= hdev
->asic_prop
.gic_interrupts_enable
?
4048 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
:
4049 le32_to_cpu(dyn_regs
->gic_host_halt_irq
);
4051 WREG32(irq_handler_offset
,
4052 gaudi_irq_map_table
[GAUDI_EVENT_HALT_MACHINE
].cpu_id
);
4054 /* This is a hail-mary attempt to revive the card in the small chance that the
4055 * f/w has experienced a watchdog event, which caused it to return back to preboot.
4056 * In that case, triggering reset through GIC won't help. We need to trigger the
4057 * reset as if Linux wasn't loaded.
4059 * We do it only if the reset cause was HB, because that would be the indication
4062 * In case watchdog hasn't expired but we still got HB, then this won't do any
4065 if (hdev
->reset_info
.curr_reset_cause
== HL_RESET_CAUSE_HEARTBEAT
) {
4066 if (hdev
->asic_prop
.hard_reset_done_by_fw
)
4067 hl_fw_ask_hard_reset_without_linux(hdev
);
4069 hl_fw_ask_halt_machine_without_linux(hdev
);
4072 if (hdev
->asic_prop
.hard_reset_done_by_fw
)
4073 hl_fw_ask_hard_reset_without_linux(hdev
);
4075 hl_fw_ask_halt_machine_without_linux(hdev
);
4078 if (driver_performs_reset
) {
4080 /* Configure the reset registers. Must be done as early as
4081 * possible in case we fail during H/W initialization
4083 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H
,
4084 (CFG_RST_H_DMA_MASK
|
4085 CFG_RST_H_MME_MASK
|
4087 CFG_RST_H_TPC_7_MASK
));
4089 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L
, CFG_RST_L_TPC_MASK
);
4091 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H
,
4092 (CFG_RST_H_HBM_MASK
|
4093 CFG_RST_H_TPC_7_MASK
|
4094 CFG_RST_H_NIC_MASK
|
4096 CFG_RST_H_DMA_MASK
|
4097 CFG_RST_H_MME_MASK
|
4098 CFG_RST_H_CPU_MASK
|
4099 CFG_RST_H_MMU_MASK
));
4101 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L
,
4102 (CFG_RST_L_IF_MASK
|
4103 CFG_RST_L_PSOC_MASK
|
4104 CFG_RST_L_TPC_MASK
));
4106 msleep(cpu_timeout_ms
);
4108 /* Tell ASIC not to re-initialize PCIe */
4109 WREG32(mmPREBOOT_PCIE_EN
, LKD_HARD_RESET_MAGIC
);
4111 /* Restart BTL/BLR upon hard-reset */
4112 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START
, 1);
4114 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST
,
4115 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT
);
4118 "Issued HARD reset command, going to wait %dms\n",
4122 "Firmware performs HARD reset, going to wait %dms\n",
4128 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4129 * itself is in reset. Need to wait until the reset is deasserted
4131 msleep(reset_timeout_ms
);
4133 status
= RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM
);
4134 if (status
& PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK
) {
4135 dev_err(hdev
->dev
, "Timeout while waiting for device to reset 0x%x\n", status
);
4140 gaudi
->hw_cap_initialized
&= ~(HW_CAP_CPU
| HW_CAP_CPU_Q
| HW_CAP_HBM
|
4141 HW_CAP_PCI_DMA
| HW_CAP_MME
| HW_CAP_TPC_MASK
|
4142 HW_CAP_HBM_DMA
| HW_CAP_PLL
| HW_CAP_NIC_MASK
|
4143 HW_CAP_MMU
| HW_CAP_SRAM_SCRAMBLER
|
4144 HW_CAP_HBM_SCRAMBLER
);
4146 memset(gaudi
->events_stat
, 0, sizeof(gaudi
->events_stat
));
4148 hdev
->device_cpu_is_halted
= false;
4153 static int gaudi_suspend(struct hl_device
*hdev
)
4155 return hl_fw_send_pci_access_msg(hdev
, CPUCP_PACKET_DISABLE_PCI_ACCESS
, 0x0);
4158 static int gaudi_resume(struct hl_device
*hdev
)
4160 return gaudi_init_iatu(hdev
);
4163 static int gaudi_mmap(struct hl_device
*hdev
, struct vm_area_struct
*vma
,
4164 void *cpu_addr
, dma_addr_t dma_addr
, size_t size
)
4168 vm_flags_set(vma
, VM_IO
| VM_PFNMAP
| VM_DONTEXPAND
| VM_DONTDUMP
|
4169 VM_DONTCOPY
| VM_NORESERVE
);
4171 rc
= dma_mmap_coherent(hdev
->dev
, vma
, cpu_addr
,
4172 (dma_addr
- HOST_PHYS_BASE
), size
);
4174 dev_err(hdev
->dev
, "dma_mmap_coherent error %d", rc
);
4179 static void gaudi_ring_doorbell(struct hl_device
*hdev
, u32 hw_queue_id
, u32 pi
)
4181 struct cpu_dyn_regs
*dyn_regs
=
4182 &hdev
->fw_loader
.dynamic_loader
.comm_desc
.cpu_dyn_regs
;
4183 u32 db_reg_offset
, db_value
, dma_qm_offset
, q_off
, irq_handler_offset
;
4184 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
4185 bool invalid_queue
= false;
4188 switch (hw_queue_id
) {
4189 case GAUDI_QUEUE_ID_DMA_0_0
...GAUDI_QUEUE_ID_DMA_0_3
:
4190 dma_id
= gaudi_dma_assignment
[GAUDI_PCI_DMA_1
];
4191 dma_qm_offset
= dma_id
* DMA_QMAN_OFFSET
;
4192 q_off
= dma_qm_offset
+ (hw_queue_id
& 0x3) * 4;
4193 db_reg_offset
= mmDMA0_QM_PQ_PI_0
+ q_off
;
4196 case GAUDI_QUEUE_ID_DMA_1_0
...GAUDI_QUEUE_ID_DMA_1_3
:
4197 dma_id
= gaudi_dma_assignment
[GAUDI_PCI_DMA_2
];
4198 dma_qm_offset
= dma_id
* DMA_QMAN_OFFSET
;
4199 q_off
= dma_qm_offset
+ (hw_queue_id
& 0x3) * 4;
4200 db_reg_offset
= mmDMA0_QM_PQ_PI_0
+ q_off
;
4203 case GAUDI_QUEUE_ID_DMA_2_0
...GAUDI_QUEUE_ID_DMA_2_3
:
4204 dma_id
= gaudi_dma_assignment
[GAUDI_HBM_DMA_1
];
4205 dma_qm_offset
= dma_id
* DMA_QMAN_OFFSET
;
4206 q_off
= dma_qm_offset
+ ((hw_queue_id
- 1) & 0x3) * 4;
4207 db_reg_offset
= mmDMA0_QM_PQ_PI_0
+ q_off
;
4210 case GAUDI_QUEUE_ID_DMA_3_0
...GAUDI_QUEUE_ID_DMA_3_3
:
4211 dma_id
= gaudi_dma_assignment
[GAUDI_HBM_DMA_2
];
4212 dma_qm_offset
= dma_id
* DMA_QMAN_OFFSET
;
4213 q_off
= dma_qm_offset
+ ((hw_queue_id
- 1) & 0x3) * 4;
4214 db_reg_offset
= mmDMA0_QM_PQ_PI_0
+ q_off
;
4217 case GAUDI_QUEUE_ID_DMA_4_0
...GAUDI_QUEUE_ID_DMA_4_3
:
4218 dma_id
= gaudi_dma_assignment
[GAUDI_HBM_DMA_3
];
4219 dma_qm_offset
= dma_id
* DMA_QMAN_OFFSET
;
4220 q_off
= dma_qm_offset
+ ((hw_queue_id
- 1) & 0x3) * 4;
4221 db_reg_offset
= mmDMA0_QM_PQ_PI_0
+ q_off
;
4224 case GAUDI_QUEUE_ID_DMA_5_0
...GAUDI_QUEUE_ID_DMA_5_3
:
4225 dma_id
= gaudi_dma_assignment
[GAUDI_HBM_DMA_4
];
4226 dma_qm_offset
= dma_id
* DMA_QMAN_OFFSET
;
4227 q_off
= dma_qm_offset
+ ((hw_queue_id
- 1) & 0x3) * 4;
4228 db_reg_offset
= mmDMA0_QM_PQ_PI_0
+ q_off
;
4231 case GAUDI_QUEUE_ID_DMA_6_0
...GAUDI_QUEUE_ID_DMA_6_3
:
4232 dma_id
= gaudi_dma_assignment
[GAUDI_HBM_DMA_5
];
4233 dma_qm_offset
= dma_id
* DMA_QMAN_OFFSET
;
4234 q_off
= dma_qm_offset
+ ((hw_queue_id
- 1) & 0x3) * 4;
4235 db_reg_offset
= mmDMA0_QM_PQ_PI_0
+ q_off
;
4238 case GAUDI_QUEUE_ID_DMA_7_0
...GAUDI_QUEUE_ID_DMA_7_3
:
4239 dma_id
= gaudi_dma_assignment
[GAUDI_HBM_DMA_6
];
4240 dma_qm_offset
= dma_id
* DMA_QMAN_OFFSET
;
4241 q_off
= dma_qm_offset
+ ((hw_queue_id
- 1) & 0x3) * 4;
4242 db_reg_offset
= mmDMA0_QM_PQ_PI_0
+ q_off
;
4245 case GAUDI_QUEUE_ID_CPU_PQ
:
4246 if (gaudi
->hw_cap_initialized
& HW_CAP_CPU_Q
)
4247 db_reg_offset
= mmCPU_IF_PF_PQ_PI
;
4249 invalid_queue
= true;
4252 case GAUDI_QUEUE_ID_MME_0_0
:
4253 db_reg_offset
= mmMME2_QM_PQ_PI_0
;
4256 case GAUDI_QUEUE_ID_MME_0_1
:
4257 db_reg_offset
= mmMME2_QM_PQ_PI_1
;
4260 case GAUDI_QUEUE_ID_MME_0_2
:
4261 db_reg_offset
= mmMME2_QM_PQ_PI_2
;
4264 case GAUDI_QUEUE_ID_MME_0_3
:
4265 db_reg_offset
= mmMME2_QM_PQ_PI_3
;
4268 case GAUDI_QUEUE_ID_MME_1_0
:
4269 db_reg_offset
= mmMME0_QM_PQ_PI_0
;
4272 case GAUDI_QUEUE_ID_MME_1_1
:
4273 db_reg_offset
= mmMME0_QM_PQ_PI_1
;
4276 case GAUDI_QUEUE_ID_MME_1_2
:
4277 db_reg_offset
= mmMME0_QM_PQ_PI_2
;
4280 case GAUDI_QUEUE_ID_MME_1_3
:
4281 db_reg_offset
= mmMME0_QM_PQ_PI_3
;
4284 case GAUDI_QUEUE_ID_TPC_0_0
:
4285 db_reg_offset
= mmTPC0_QM_PQ_PI_0
;
4288 case GAUDI_QUEUE_ID_TPC_0_1
:
4289 db_reg_offset
= mmTPC0_QM_PQ_PI_1
;
4292 case GAUDI_QUEUE_ID_TPC_0_2
:
4293 db_reg_offset
= mmTPC0_QM_PQ_PI_2
;
4296 case GAUDI_QUEUE_ID_TPC_0_3
:
4297 db_reg_offset
= mmTPC0_QM_PQ_PI_3
;
4300 case GAUDI_QUEUE_ID_TPC_1_0
:
4301 db_reg_offset
= mmTPC1_QM_PQ_PI_0
;
4304 case GAUDI_QUEUE_ID_TPC_1_1
:
4305 db_reg_offset
= mmTPC1_QM_PQ_PI_1
;
4308 case GAUDI_QUEUE_ID_TPC_1_2
:
4309 db_reg_offset
= mmTPC1_QM_PQ_PI_2
;
4312 case GAUDI_QUEUE_ID_TPC_1_3
:
4313 db_reg_offset
= mmTPC1_QM_PQ_PI_3
;
4316 case GAUDI_QUEUE_ID_TPC_2_0
:
4317 db_reg_offset
= mmTPC2_QM_PQ_PI_0
;
4320 case GAUDI_QUEUE_ID_TPC_2_1
:
4321 db_reg_offset
= mmTPC2_QM_PQ_PI_1
;
4324 case GAUDI_QUEUE_ID_TPC_2_2
:
4325 db_reg_offset
= mmTPC2_QM_PQ_PI_2
;
4328 case GAUDI_QUEUE_ID_TPC_2_3
:
4329 db_reg_offset
= mmTPC2_QM_PQ_PI_3
;
4332 case GAUDI_QUEUE_ID_TPC_3_0
:
4333 db_reg_offset
= mmTPC3_QM_PQ_PI_0
;
4336 case GAUDI_QUEUE_ID_TPC_3_1
:
4337 db_reg_offset
= mmTPC3_QM_PQ_PI_1
;
4340 case GAUDI_QUEUE_ID_TPC_3_2
:
4341 db_reg_offset
= mmTPC3_QM_PQ_PI_2
;
4344 case GAUDI_QUEUE_ID_TPC_3_3
:
4345 db_reg_offset
= mmTPC3_QM_PQ_PI_3
;
4348 case GAUDI_QUEUE_ID_TPC_4_0
:
4349 db_reg_offset
= mmTPC4_QM_PQ_PI_0
;
4352 case GAUDI_QUEUE_ID_TPC_4_1
:
4353 db_reg_offset
= mmTPC4_QM_PQ_PI_1
;
4356 case GAUDI_QUEUE_ID_TPC_4_2
:
4357 db_reg_offset
= mmTPC4_QM_PQ_PI_2
;
4360 case GAUDI_QUEUE_ID_TPC_4_3
:
4361 db_reg_offset
= mmTPC4_QM_PQ_PI_3
;
4364 case GAUDI_QUEUE_ID_TPC_5_0
:
4365 db_reg_offset
= mmTPC5_QM_PQ_PI_0
;
4368 case GAUDI_QUEUE_ID_TPC_5_1
:
4369 db_reg_offset
= mmTPC5_QM_PQ_PI_1
;
4372 case GAUDI_QUEUE_ID_TPC_5_2
:
4373 db_reg_offset
= mmTPC5_QM_PQ_PI_2
;
4376 case GAUDI_QUEUE_ID_TPC_5_3
:
4377 db_reg_offset
= mmTPC5_QM_PQ_PI_3
;
4380 case GAUDI_QUEUE_ID_TPC_6_0
:
4381 db_reg_offset
= mmTPC6_QM_PQ_PI_0
;
4384 case GAUDI_QUEUE_ID_TPC_6_1
:
4385 db_reg_offset
= mmTPC6_QM_PQ_PI_1
;
4388 case GAUDI_QUEUE_ID_TPC_6_2
:
4389 db_reg_offset
= mmTPC6_QM_PQ_PI_2
;
4392 case GAUDI_QUEUE_ID_TPC_6_3
:
4393 db_reg_offset
= mmTPC6_QM_PQ_PI_3
;
4396 case GAUDI_QUEUE_ID_TPC_7_0
:
4397 db_reg_offset
= mmTPC7_QM_PQ_PI_0
;
4400 case GAUDI_QUEUE_ID_TPC_7_1
:
4401 db_reg_offset
= mmTPC7_QM_PQ_PI_1
;
4404 case GAUDI_QUEUE_ID_TPC_7_2
:
4405 db_reg_offset
= mmTPC7_QM_PQ_PI_2
;
4408 case GAUDI_QUEUE_ID_TPC_7_3
:
4409 db_reg_offset
= mmTPC7_QM_PQ_PI_3
;
4412 case GAUDI_QUEUE_ID_NIC_0_0
...GAUDI_QUEUE_ID_NIC_0_3
:
4413 if (!(gaudi
->hw_cap_initialized
& HW_CAP_NIC0
))
4414 invalid_queue
= true;
4416 q_off
= ((hw_queue_id
- 1) & 0x3) * 4;
4417 db_reg_offset
= mmNIC0_QM0_PQ_PI_0
+ q_off
;
4420 case GAUDI_QUEUE_ID_NIC_1_0
...GAUDI_QUEUE_ID_NIC_1_3
:
4421 if (!(gaudi
->hw_cap_initialized
& HW_CAP_NIC1
))
4422 invalid_queue
= true;
4424 q_off
= ((hw_queue_id
- 1) & 0x3) * 4;
4425 db_reg_offset
= mmNIC0_QM1_PQ_PI_0
+ q_off
;
4428 case GAUDI_QUEUE_ID_NIC_2_0
...GAUDI_QUEUE_ID_NIC_2_3
:
4429 if (!(gaudi
->hw_cap_initialized
& HW_CAP_NIC2
))
4430 invalid_queue
= true;
4432 q_off
= ((hw_queue_id
- 1) & 0x3) * 4;
4433 db_reg_offset
= mmNIC1_QM0_PQ_PI_0
+ q_off
;
4436 case GAUDI_QUEUE_ID_NIC_3_0
...GAUDI_QUEUE_ID_NIC_3_3
:
4437 if (!(gaudi
->hw_cap_initialized
& HW_CAP_NIC3
))
4438 invalid_queue
= true;
4440 q_off
= ((hw_queue_id
- 1) & 0x3) * 4;
4441 db_reg_offset
= mmNIC1_QM1_PQ_PI_0
+ q_off
;
4444 case GAUDI_QUEUE_ID_NIC_4_0
...GAUDI_QUEUE_ID_NIC_4_3
:
4445 if (!(gaudi
->hw_cap_initialized
& HW_CAP_NIC4
))
4446 invalid_queue
= true;
4448 q_off
= ((hw_queue_id
- 1) & 0x3) * 4;
4449 db_reg_offset
= mmNIC2_QM0_PQ_PI_0
+ q_off
;
4452 case GAUDI_QUEUE_ID_NIC_5_0
...GAUDI_QUEUE_ID_NIC_5_3
:
4453 if (!(gaudi
->hw_cap_initialized
& HW_CAP_NIC5
))
4454 invalid_queue
= true;
4456 q_off
= ((hw_queue_id
- 1) & 0x3) * 4;
4457 db_reg_offset
= mmNIC2_QM1_PQ_PI_0
+ q_off
;
4460 case GAUDI_QUEUE_ID_NIC_6_0
...GAUDI_QUEUE_ID_NIC_6_3
:
4461 if (!(gaudi
->hw_cap_initialized
& HW_CAP_NIC6
))
4462 invalid_queue
= true;
4464 q_off
= ((hw_queue_id
- 1) & 0x3) * 4;
4465 db_reg_offset
= mmNIC3_QM0_PQ_PI_0
+ q_off
;
4468 case GAUDI_QUEUE_ID_NIC_7_0
...GAUDI_QUEUE_ID_NIC_7_3
:
4469 if (!(gaudi
->hw_cap_initialized
& HW_CAP_NIC7
))
4470 invalid_queue
= true;
4472 q_off
= ((hw_queue_id
- 1) & 0x3) * 4;
4473 db_reg_offset
= mmNIC3_QM1_PQ_PI_0
+ q_off
;
4476 case GAUDI_QUEUE_ID_NIC_8_0
...GAUDI_QUEUE_ID_NIC_8_3
:
4477 if (!(gaudi
->hw_cap_initialized
& HW_CAP_NIC8
))
4478 invalid_queue
= true;
4480 q_off
= ((hw_queue_id
- 1) & 0x3) * 4;
4481 db_reg_offset
= mmNIC4_QM0_PQ_PI_0
+ q_off
;
4484 case GAUDI_QUEUE_ID_NIC_9_0
...GAUDI_QUEUE_ID_NIC_9_3
:
4485 if (!(gaudi
->hw_cap_initialized
& HW_CAP_NIC9
))
4486 invalid_queue
= true;
4488 q_off
= ((hw_queue_id
- 1) & 0x3) * 4;
4489 db_reg_offset
= mmNIC4_QM1_PQ_PI_0
+ q_off
;
4493 invalid_queue
= true;
4496 if (invalid_queue
) {
4497 /* Should never get here */
4498 dev_err(hdev
->dev
, "h/w queue %d is invalid. Can't set pi\n",
4505 /* ring the doorbell */
4506 WREG32(db_reg_offset
, db_value
);
4508 if (hw_queue_id
== GAUDI_QUEUE_ID_CPU_PQ
) {
4509 /* make sure device CPU will read latest data from host */
4512 irq_handler_offset
= hdev
->asic_prop
.gic_interrupts_enable
?
4513 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
:
4514 le32_to_cpu(dyn_regs
->gic_host_pi_upd_irq
);
4516 WREG32(irq_handler_offset
,
4517 gaudi_irq_map_table
[GAUDI_EVENT_PI_UPDATE
].cpu_id
);
4521 static void gaudi_pqe_write(struct hl_device
*hdev
, __le64
*pqe
,
4524 __le64
*pbd
= (__le64
*) bd
;
4526 /* The QMANs are on the host memory so a simple copy suffice */
4531 static void *gaudi_dma_alloc_coherent(struct hl_device
*hdev
, size_t size
,
4532 dma_addr_t
*dma_handle
, gfp_t flags
)
4534 void *kernel_addr
= dma_alloc_coherent(&hdev
->pdev
->dev
, size
,
4537 /* Shift to the device's base physical address of host memory */
4539 *dma_handle
+= HOST_PHYS_BASE
;
4544 static void gaudi_dma_free_coherent(struct hl_device
*hdev
, size_t size
,
4545 void *cpu_addr
, dma_addr_t dma_handle
)
4547 /* Cancel the device's base physical address of host memory */
4548 dma_addr_t fixed_dma_handle
= dma_handle
- HOST_PHYS_BASE
;
4550 dma_free_coherent(&hdev
->pdev
->dev
, size
, cpu_addr
, fixed_dma_handle
);
4553 static int gaudi_scrub_device_dram(struct hl_device
*hdev
, u64 val
)
4555 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
4556 u64 cur_addr
= prop
->dram_user_base_address
;
4557 u32 chunk_size
, busy
;
4560 while (cur_addr
< prop
->dram_end_address
) {
4561 for (dma_id
= 0 ; dma_id
< DMA_NUMBER_OF_CHANNELS
; dma_id
++) {
4562 u32 dma_offset
= dma_id
* DMA_CORE_OFFSET
;
4565 min((u64
)SZ_2G
, prop
->dram_end_address
- cur_addr
);
4568 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4569 cur_addr
, cur_addr
+ chunk_size
);
4571 WREG32(mmDMA0_CORE_SRC_BASE_LO
+ dma_offset
,
4572 lower_32_bits(val
));
4573 WREG32(mmDMA0_CORE_SRC_BASE_HI
+ dma_offset
,
4574 upper_32_bits(val
));
4575 WREG32(mmDMA0_CORE_DST_BASE_LO
+ dma_offset
,
4576 lower_32_bits(cur_addr
));
4577 WREG32(mmDMA0_CORE_DST_BASE_HI
+ dma_offset
,
4578 upper_32_bits(cur_addr
));
4579 WREG32(mmDMA0_CORE_DST_TSIZE_0
+ dma_offset
,
4581 WREG32(mmDMA0_CORE_COMMIT
+ dma_offset
,
4582 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT
) |
4583 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT
)));
4585 cur_addr
+= chunk_size
;
4587 if (cur_addr
== prop
->dram_end_address
)
4591 for (dma_id
= 0 ; dma_id
< DMA_NUMBER_OF_CHANNELS
; dma_id
++) {
4592 u32 dma_offset
= dma_id
* DMA_CORE_OFFSET
;
4594 rc
= hl_poll_timeout(
4596 mmDMA0_CORE_STS0
+ dma_offset
,
4598 ((busy
& DMA0_CORE_STS0_BUSY_MASK
) == 0),
4600 HBM_SCRUBBING_TIMEOUT_US
);
4604 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4614 static int gaudi_scrub_device_mem(struct hl_device
*hdev
)
4616 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
4617 u64 wait_to_idle_time
= HBM_SCRUBBING_TIMEOUT_US
;
4618 u64 addr
, size
, val
= hdev
->memory_scrub_val
;
4622 if (!hdev
->memory_scrub
)
4625 timeout
= ktime_add_us(ktime_get(), wait_to_idle_time
);
4626 while (!hdev
->asic_funcs
->is_device_idle(hdev
, NULL
, 0, NULL
)) {
4627 if (ktime_compare(ktime_get(), timeout
) > 0) {
4628 dev_err(hdev
->dev
, "waiting for idle timeout\n");
4631 usleep_range((1000 >> 2) + 1, 1000);
4635 addr
= prop
->sram_user_base_address
;
4636 size
= hdev
->pldm
? 0x10000 : prop
->sram_size
- SRAM_USER_BASE_OFFSET
;
4638 dev_dbg(hdev
->dev
, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4639 addr
, addr
+ size
, val
);
4640 rc
= gaudi_memset_device_memory(hdev
, addr
, size
, val
);
4642 dev_err(hdev
->dev
, "Failed to clear SRAM (%d)\n", rc
);
4646 /* Scrub HBM using all DMA channels in parallel */
4647 rc
= gaudi_scrub_device_dram(hdev
, val
);
4649 dev_err(hdev
->dev
, "Failed to clear HBM (%d)\n", rc
);
4656 static void *gaudi_get_int_queue_base(struct hl_device
*hdev
,
4657 u32 queue_id
, dma_addr_t
*dma_handle
,
4660 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
4661 struct gaudi_internal_qman_info
*q
;
4663 if (queue_id
>= GAUDI_QUEUE_ID_SIZE
||
4664 gaudi_queue_type
[queue_id
] != QUEUE_TYPE_INT
) {
4665 dev_err(hdev
->dev
, "Got invalid queue id %d\n", queue_id
);
4669 q
= &gaudi
->internal_qmans
[queue_id
];
4670 *dma_handle
= q
->pq_dma_addr
;
4671 *queue_len
= q
->pq_size
/ QMAN_PQ_ENTRY_SIZE
;
4673 return q
->pq_kernel_addr
;
4676 static int gaudi_send_cpu_message(struct hl_device
*hdev
, u32
*msg
,
4677 u16 len
, u32 timeout
, u64
*result
)
4679 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
4681 if (!(gaudi
->hw_cap_initialized
& HW_CAP_CPU_Q
)) {
4688 timeout
= GAUDI_MSG_TO_CPU_TIMEOUT_USEC
;
4690 return hl_fw_send_cpu_message(hdev
, GAUDI_QUEUE_ID_CPU_PQ
, msg
, len
,
4694 static int gaudi_test_queue(struct hl_device
*hdev
, u32 hw_queue_id
)
4696 struct packet_msg_prot
*fence_pkt
;
4697 dma_addr_t pkt_dma_addr
;
4698 u32 fence_val
, tmp
, timeout_usec
;
4699 dma_addr_t fence_dma_addr
;
4704 timeout_usec
= GAUDI_PLDM_TEST_QUEUE_WAIT_USEC
;
4706 timeout_usec
= GAUDI_TEST_QUEUE_WAIT_USEC
;
4708 fence_val
= GAUDI_QMAN0_FENCE_VAL
;
4710 fence_ptr
= hl_asic_dma_pool_zalloc(hdev
, 4, GFP_KERNEL
, &fence_dma_addr
);
4713 "Failed to allocate memory for H/W queue %d testing\n",
4720 fence_pkt
= hl_asic_dma_pool_zalloc(hdev
, sizeof(struct packet_msg_prot
), GFP_KERNEL
,
4724 "Failed to allocate packet for H/W queue %d testing\n",
4727 goto free_fence_ptr
;
4730 tmp
= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK
, PACKET_MSG_PROT
);
4731 tmp
|= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK
, 1);
4732 tmp
|= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK
, 1);
4734 fence_pkt
->ctl
= cpu_to_le32(tmp
);
4735 fence_pkt
->value
= cpu_to_le32(fence_val
);
4736 fence_pkt
->addr
= cpu_to_le64(fence_dma_addr
);
4738 rc
= hl_hw_queue_send_cb_no_cmpl(hdev
, hw_queue_id
,
4739 sizeof(struct packet_msg_prot
),
4743 "Failed to send fence packet to H/W queue %d\n",
4748 rc
= hl_poll_timeout_memory(hdev
, fence_ptr
, tmp
, (tmp
== fence_val
),
4749 1000, timeout_usec
, true);
4751 hl_hw_queue_inc_ci_kernel(hdev
, hw_queue_id
);
4753 if (rc
== -ETIMEDOUT
) {
4755 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4756 hw_queue_id
, (unsigned long long) fence_dma_addr
, tmp
);
4761 hl_asic_dma_pool_free(hdev
, (void *) fence_pkt
, pkt_dma_addr
);
4763 hl_asic_dma_pool_free(hdev
, (void *) fence_ptr
, fence_dma_addr
);
4767 static int gaudi_test_cpu_queue(struct hl_device
*hdev
)
4769 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
4772 * check capability here as send_cpu_message() won't update the result
4773 * value if no capability
4775 if (!(gaudi
->hw_cap_initialized
& HW_CAP_CPU_Q
))
4778 return hl_fw_test_cpu_queue(hdev
);
4781 static int gaudi_test_queues(struct hl_device
*hdev
)
4783 int i
, rc
, ret_val
= 0;
4785 for (i
= 0 ; i
< hdev
->asic_prop
.max_queues
; i
++) {
4786 if (hdev
->asic_prop
.hw_queues_props
[i
].type
== QUEUE_TYPE_EXT
) {
4787 rc
= gaudi_test_queue(hdev
, i
);
4793 rc
= gaudi_test_cpu_queue(hdev
);
4800 static void *gaudi_dma_pool_zalloc(struct hl_device
*hdev
, size_t size
,
4801 gfp_t mem_flags
, dma_addr_t
*dma_handle
)
4805 if (size
> GAUDI_DMA_POOL_BLK_SIZE
)
4808 kernel_addr
= dma_pool_zalloc(hdev
->dma_pool
, mem_flags
, dma_handle
);
4810 /* Shift to the device's base physical address of host memory */
4812 *dma_handle
+= HOST_PHYS_BASE
;
4817 static void gaudi_dma_pool_free(struct hl_device
*hdev
, void *vaddr
,
4818 dma_addr_t dma_addr
)
4820 /* Cancel the device's base physical address of host memory */
4821 dma_addr_t fixed_dma_addr
= dma_addr
- HOST_PHYS_BASE
;
4823 dma_pool_free(hdev
->dma_pool
, vaddr
, fixed_dma_addr
);
4826 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device
*hdev
,
4827 size_t size
, dma_addr_t
*dma_handle
)
4829 return hl_fw_cpu_accessible_dma_pool_alloc(hdev
, size
, dma_handle
);
4832 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device
*hdev
,
4833 size_t size
, void *vaddr
)
4835 hl_fw_cpu_accessible_dma_pool_free(hdev
, size
, vaddr
);
4838 static u32
gaudi_get_dma_desc_list_size(struct hl_device
*hdev
, struct sg_table
*sgt
)
4840 struct scatterlist
*sg
, *sg_next_iter
;
4841 u32 count
, dma_desc_cnt
;
4843 dma_addr_t addr
, addr_next
;
4847 for_each_sgtable_dma_sg(sgt
, sg
, count
) {
4848 len
= sg_dma_len(sg
);
4849 addr
= sg_dma_address(sg
);
4854 while ((count
+ 1) < sgt
->nents
) {
4855 sg_next_iter
= sg_next(sg
);
4856 len_next
= sg_dma_len(sg_next_iter
);
4857 addr_next
= sg_dma_address(sg_next_iter
);
4862 if ((addr
+ len
== addr_next
) &&
4863 (len
+ len_next
<= DMA_MAX_TRANSFER_SIZE
)) {
4875 return dma_desc_cnt
* sizeof(struct packet_lin_dma
);
4878 static int gaudi_pin_memory_before_cs(struct hl_device
*hdev
,
4879 struct hl_cs_parser
*parser
,
4880 struct packet_lin_dma
*user_dma_pkt
,
4881 u64 addr
, enum dma_data_direction dir
)
4883 struct hl_userptr
*userptr
;
4886 if (hl_userptr_is_pinned(hdev
, addr
, le32_to_cpu(user_dma_pkt
->tsize
),
4887 parser
->job_userptr_list
, &userptr
))
4888 goto already_pinned
;
4890 userptr
= kzalloc(sizeof(*userptr
), GFP_KERNEL
);
4894 rc
= hl_pin_host_memory(hdev
, addr
, le32_to_cpu(user_dma_pkt
->tsize
),
4899 list_add_tail(&userptr
->job_node
, parser
->job_userptr_list
);
4901 rc
= hl_dma_map_sgtable(hdev
, userptr
->sgt
, dir
);
4903 dev_err(hdev
->dev
, "failed to map sgt with DMA region\n");
4907 userptr
->dma_mapped
= true;
4911 parser
->patched_cb_size
+=
4912 gaudi_get_dma_desc_list_size(hdev
, userptr
->sgt
);
4917 list_del(&userptr
->job_node
);
4918 hl_unpin_host_memory(hdev
, userptr
);
4924 static int gaudi_validate_dma_pkt_host(struct hl_device
*hdev
,
4925 struct hl_cs_parser
*parser
,
4926 struct packet_lin_dma
*user_dma_pkt
,
4929 enum dma_data_direction dir
;
4930 bool skip_host_mem_pin
= false, user_memset
;
4934 user_memset
= (le32_to_cpu(user_dma_pkt
->ctl
) &
4935 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK
) >>
4936 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT
;
4940 skip_host_mem_pin
= true;
4942 dev_dbg(hdev
->dev
, "DMA direction is HOST --> DEVICE\n");
4943 dir
= DMA_TO_DEVICE
;
4944 addr
= le64_to_cpu(user_dma_pkt
->src_addr
);
4946 dev_dbg(hdev
->dev
, "DMA direction is DEVICE --> HOST\n");
4947 dir
= DMA_FROM_DEVICE
;
4948 addr
= (le64_to_cpu(user_dma_pkt
->dst_addr
) &
4949 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK
) >>
4950 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT
;
4953 if (skip_host_mem_pin
)
4954 parser
->patched_cb_size
+= sizeof(*user_dma_pkt
);
4956 rc
= gaudi_pin_memory_before_cs(hdev
, parser
, user_dma_pkt
,
4962 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device
*hdev
,
4963 struct hl_cs_parser
*parser
,
4964 struct packet_lin_dma
*user_dma_pkt
)
4966 bool src_in_host
= false;
4967 u64 dst_addr
= (le64_to_cpu(user_dma_pkt
->dst_addr
) &
4968 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK
) >>
4969 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT
;
4971 dev_dbg(hdev
->dev
, "DMA packet details:\n");
4972 dev_dbg(hdev
->dev
, "source == 0x%llx\n",
4973 le64_to_cpu(user_dma_pkt
->src_addr
));
4974 dev_dbg(hdev
->dev
, "destination == 0x%llx\n", dst_addr
);
4975 dev_dbg(hdev
->dev
, "size == %u\n", le32_to_cpu(user_dma_pkt
->tsize
));
4978 * Special handling for DMA with size 0. Bypass all validations
4979 * because no transactions will be done except for WR_COMP, which
4980 * is not a security issue
4982 if (!le32_to_cpu(user_dma_pkt
->tsize
)) {
4983 parser
->patched_cb_size
+= sizeof(*user_dma_pkt
);
4987 if (parser
->hw_queue_id
<= GAUDI_QUEUE_ID_DMA_0_3
)
4990 return gaudi_validate_dma_pkt_host(hdev
, parser
, user_dma_pkt
,
4994 static int gaudi_validate_load_and_exe_pkt(struct hl_device
*hdev
,
4995 struct hl_cs_parser
*parser
,
4996 struct packet_load_and_exe
*user_pkt
)
5000 cfg
= le32_to_cpu(user_pkt
->cfg
);
5002 if (cfg
& GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK
) {
5004 "User not allowed to use Load and Execute\n");
5008 parser
->patched_cb_size
+= sizeof(struct packet_load_and_exe
);
5013 static int gaudi_validate_cb(struct hl_device
*hdev
,
5014 struct hl_cs_parser
*parser
, bool is_mmu
)
5016 u32 cb_parsed_length
= 0;
5019 parser
->patched_cb_size
= 0;
5021 /* cb_user_size is more than 0 so loop will always be executed */
5022 while (cb_parsed_length
< parser
->user_cb_size
) {
5023 enum packet_id pkt_id
;
5025 struct gaudi_packet
*user_pkt
;
5027 user_pkt
= parser
->user_cb
->kernel_address
+ cb_parsed_length
;
5029 pkt_id
= (enum packet_id
) (
5030 (le64_to_cpu(user_pkt
->header
) &
5031 PACKET_HEADER_PACKET_ID_MASK
) >>
5032 PACKET_HEADER_PACKET_ID_SHIFT
);
5034 if (!validate_packet_id(pkt_id
)) {
5035 dev_err(hdev
->dev
, "Invalid packet id %u\n", pkt_id
);
5040 pkt_size
= gaudi_packet_sizes
[pkt_id
];
5041 cb_parsed_length
+= pkt_size
;
5042 if (cb_parsed_length
> parser
->user_cb_size
) {
5044 "packet 0x%x is out of CB boundary\n", pkt_id
);
5050 case PACKET_MSG_PROT
:
5052 "User not allowed to use MSG_PROT\n");
5057 dev_err(hdev
->dev
, "User not allowed to use CP_DMA\n");
5062 dev_err(hdev
->dev
, "User not allowed to use STOP\n");
5066 case PACKET_WREG_BULK
:
5068 "User not allowed to use WREG_BULK\n");
5072 case PACKET_LOAD_AND_EXE
:
5073 rc
= gaudi_validate_load_and_exe_pkt(hdev
, parser
,
5074 (struct packet_load_and_exe
*) user_pkt
);
5077 case PACKET_LIN_DMA
:
5078 parser
->contains_dma_pkt
= true;
5080 parser
->patched_cb_size
+= pkt_size
;
5082 rc
= gaudi_validate_dma_pkt_no_mmu(hdev
, parser
,
5083 (struct packet_lin_dma
*) user_pkt
);
5086 case PACKET_WREG_32
:
5087 case PACKET_MSG_LONG
:
5088 case PACKET_MSG_SHORT
:
5092 case PACKET_ARB_POINT
:
5093 parser
->patched_cb_size
+= pkt_size
;
5097 dev_err(hdev
->dev
, "Invalid packet header 0x%x\n",
5108 * The new CB should have space at the end for two MSG_PROT packets:
5109 * 1. Optional NOP padding for cacheline alignment
5110 * 2. A packet that will act as a completion packet
5111 * 3. A packet that will generate MSI interrupt
5113 if (parser
->completion
)
5114 parser
->patched_cb_size
+= gaudi_get_patched_cb_extra_size(
5115 parser
->patched_cb_size
);
5120 static int gaudi_patch_dma_packet(struct hl_device
*hdev
,
5121 struct hl_cs_parser
*parser
,
5122 struct packet_lin_dma
*user_dma_pkt
,
5123 struct packet_lin_dma
*new_dma_pkt
,
5124 u32
*new_dma_pkt_size
)
5126 struct hl_userptr
*userptr
;
5127 struct scatterlist
*sg
, *sg_next_iter
;
5128 u32 count
, dma_desc_cnt
, user_wrcomp_en_mask
, ctl
;
5130 dma_addr_t dma_addr
, dma_addr_next
;
5131 u64 device_memory_addr
, addr
;
5132 enum dma_data_direction dir
;
5133 struct sg_table
*sgt
;
5134 bool src_in_host
= false;
5135 bool skip_host_mem_pin
= false;
5138 ctl
= le32_to_cpu(user_dma_pkt
->ctl
);
5140 if (parser
->hw_queue_id
<= GAUDI_QUEUE_ID_DMA_0_3
)
5143 user_memset
= (ctl
& GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK
) >>
5144 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT
;
5147 addr
= le64_to_cpu(user_dma_pkt
->src_addr
);
5148 device_memory_addr
= le64_to_cpu(user_dma_pkt
->dst_addr
);
5149 dir
= DMA_TO_DEVICE
;
5151 skip_host_mem_pin
= true;
5153 addr
= le64_to_cpu(user_dma_pkt
->dst_addr
);
5154 device_memory_addr
= le64_to_cpu(user_dma_pkt
->src_addr
);
5155 dir
= DMA_FROM_DEVICE
;
5158 if ((!skip_host_mem_pin
) &&
5159 (!hl_userptr_is_pinned(hdev
, addr
,
5160 le32_to_cpu(user_dma_pkt
->tsize
),
5161 parser
->job_userptr_list
, &userptr
))) {
5162 dev_err(hdev
->dev
, "Userptr 0x%llx + 0x%x NOT mapped\n",
5163 addr
, user_dma_pkt
->tsize
);
5167 if ((user_memset
) && (dir
== DMA_TO_DEVICE
)) {
5168 memcpy(new_dma_pkt
, user_dma_pkt
, sizeof(*user_dma_pkt
));
5169 *new_dma_pkt_size
= sizeof(*user_dma_pkt
);
5173 user_wrcomp_en_mask
= ctl
& GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK
;
5178 for_each_sgtable_dma_sg(sgt
, sg
, count
) {
5179 len
= sg_dma_len(sg
);
5180 dma_addr
= sg_dma_address(sg
);
5185 while ((count
+ 1) < sgt
->nents
) {
5186 sg_next_iter
= sg_next(sg
);
5187 len_next
= sg_dma_len(sg_next_iter
);
5188 dma_addr_next
= sg_dma_address(sg_next_iter
);
5193 if ((dma_addr
+ len
== dma_addr_next
) &&
5194 (len
+ len_next
<= DMA_MAX_TRANSFER_SIZE
)) {
5203 ctl
= le32_to_cpu(user_dma_pkt
->ctl
);
5204 if (likely(dma_desc_cnt
))
5205 ctl
&= ~GAUDI_PKT_CTL_EB_MASK
;
5206 ctl
&= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK
;
5207 new_dma_pkt
->ctl
= cpu_to_le32(ctl
);
5208 new_dma_pkt
->tsize
= cpu_to_le32(len
);
5210 if (dir
== DMA_TO_DEVICE
) {
5211 new_dma_pkt
->src_addr
= cpu_to_le64(dma_addr
);
5212 new_dma_pkt
->dst_addr
= cpu_to_le64(device_memory_addr
);
5214 new_dma_pkt
->src_addr
= cpu_to_le64(device_memory_addr
);
5215 new_dma_pkt
->dst_addr
= cpu_to_le64(dma_addr
);
5219 device_memory_addr
+= len
;
5224 if (!dma_desc_cnt
) {
5226 "Error of 0 SG entries when patching DMA packet\n");
5230 /* Fix the last dma packet - wrcomp must be as user set it */
5232 new_dma_pkt
->ctl
|= cpu_to_le32(user_wrcomp_en_mask
);
5234 *new_dma_pkt_size
= dma_desc_cnt
* sizeof(struct packet_lin_dma
);
5239 static int gaudi_patch_cb(struct hl_device
*hdev
,
5240 struct hl_cs_parser
*parser
)
5242 u32 cb_parsed_length
= 0;
5243 u32 cb_patched_cur_length
= 0;
5246 /* cb_user_size is more than 0 so loop will always be executed */
5247 while (cb_parsed_length
< parser
->user_cb_size
) {
5248 enum packet_id pkt_id
;
5250 u32 new_pkt_size
= 0;
5251 struct gaudi_packet
*user_pkt
, *kernel_pkt
;
5253 user_pkt
= parser
->user_cb
->kernel_address
+ cb_parsed_length
;
5254 kernel_pkt
= parser
->patched_cb
->kernel_address
+
5255 cb_patched_cur_length
;
5257 pkt_id
= (enum packet_id
) (
5258 (le64_to_cpu(user_pkt
->header
) &
5259 PACKET_HEADER_PACKET_ID_MASK
) >>
5260 PACKET_HEADER_PACKET_ID_SHIFT
);
5262 if (!validate_packet_id(pkt_id
)) {
5263 dev_err(hdev
->dev
, "Invalid packet id %u\n", pkt_id
);
5268 pkt_size
= gaudi_packet_sizes
[pkt_id
];
5269 cb_parsed_length
+= pkt_size
;
5270 if (cb_parsed_length
> parser
->user_cb_size
) {
5272 "packet 0x%x is out of CB boundary\n", pkt_id
);
5278 case PACKET_LIN_DMA
:
5279 rc
= gaudi_patch_dma_packet(hdev
, parser
,
5280 (struct packet_lin_dma
*) user_pkt
,
5281 (struct packet_lin_dma
*) kernel_pkt
,
5283 cb_patched_cur_length
+= new_pkt_size
;
5286 case PACKET_MSG_PROT
:
5288 "User not allowed to use MSG_PROT\n");
5293 dev_err(hdev
->dev
, "User not allowed to use CP_DMA\n");
5298 dev_err(hdev
->dev
, "User not allowed to use STOP\n");
5302 case PACKET_WREG_32
:
5303 case PACKET_WREG_BULK
:
5304 case PACKET_MSG_LONG
:
5305 case PACKET_MSG_SHORT
:
5309 case PACKET_ARB_POINT
:
5310 case PACKET_LOAD_AND_EXE
:
5311 memcpy(kernel_pkt
, user_pkt
, pkt_size
);
5312 cb_patched_cur_length
+= pkt_size
;
5316 dev_err(hdev
->dev
, "Invalid packet header 0x%x\n",
5329 static int gaudi_parse_cb_mmu(struct hl_device
*hdev
,
5330 struct hl_cs_parser
*parser
)
5333 u32 patched_cb_size
;
5334 struct hl_cb
*user_cb
;
5338 * The new CB should have space at the end for two MSG_PROT packets:
5339 * 1. Optional NOP padding for cacheline alignment
5340 * 2. A packet that will act as a completion packet
5341 * 3. A packet that will generate MSI interrupt
5343 if (parser
->completion
)
5344 parser
->patched_cb_size
= parser
->user_cb_size
+
5345 gaudi_get_patched_cb_extra_size(parser
->user_cb_size
);
5347 parser
->patched_cb_size
= parser
->user_cb_size
;
5349 rc
= hl_cb_create(hdev
, &hdev
->kernel_mem_mgr
, hdev
->kernel_ctx
,
5350 parser
->patched_cb_size
, false, false,
5355 "Failed to allocate patched CB for DMA CS %d\n",
5360 parser
->patched_cb
= hl_cb_get(&hdev
->kernel_mem_mgr
, handle
);
5361 /* hl_cb_get should never fail */
5362 if (!parser
->patched_cb
) {
5363 dev_crit(hdev
->dev
, "DMA CB handle invalid 0x%llx\n", handle
);
5369 * We are protected from overflow because the check
5370 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5371 * in the common code. That check is done only if is_kernel_allocated_cb is true.
5373 * There is no option to reach here without going through that check because:
5374 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5375 * an external queue.
5376 * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5378 memcpy(parser
->patched_cb
->kernel_address
,
5379 parser
->user_cb
->kernel_address
,
5380 parser
->user_cb_size
);
5382 patched_cb_size
= parser
->patched_cb_size
;
5384 /* Validate patched CB instead of user CB */
5385 user_cb
= parser
->user_cb
;
5386 parser
->user_cb
= parser
->patched_cb
;
5387 rc
= gaudi_validate_cb(hdev
, parser
, true);
5388 parser
->user_cb
= user_cb
;
5391 hl_cb_put(parser
->patched_cb
);
5395 if (patched_cb_size
!= parser
->patched_cb_size
) {
5396 dev_err(hdev
->dev
, "user CB size mismatch\n");
5397 hl_cb_put(parser
->patched_cb
);
5404 * Always call cb destroy here because we still have 1 reference
5405 * to it by calling cb_get earlier. After the job will be completed,
5406 * cb_put will release it, but here we want to remove it from the
5409 hl_cb_destroy(&hdev
->kernel_mem_mgr
, handle
);
5414 static int gaudi_parse_cb_no_mmu(struct hl_device
*hdev
,
5415 struct hl_cs_parser
*parser
)
5420 rc
= gaudi_validate_cb(hdev
, parser
, false);
5425 rc
= hl_cb_create(hdev
, &hdev
->kernel_mem_mgr
, hdev
->kernel_ctx
,
5426 parser
->patched_cb_size
, false, false,
5430 "Failed to allocate patched CB for DMA CS %d\n", rc
);
5434 parser
->patched_cb
= hl_cb_get(&hdev
->kernel_mem_mgr
, handle
);
5435 /* hl_cb_get should never fail here */
5436 if (!parser
->patched_cb
) {
5437 dev_crit(hdev
->dev
, "DMA CB handle invalid 0x%llx\n", handle
);
5442 rc
= gaudi_patch_cb(hdev
, parser
);
5445 hl_cb_put(parser
->patched_cb
);
5449 * Always call cb destroy here because we still have 1 reference
5450 * to it by calling cb_get earlier. After the job will be completed,
5451 * cb_put will release it, but here we want to remove it from the
5454 hl_cb_destroy(&hdev
->kernel_mem_mgr
, handle
);
5458 hl_userptr_delete_list(hdev
, parser
->job_userptr_list
);
5462 static int gaudi_parse_cb_no_ext_queue(struct hl_device
*hdev
,
5463 struct hl_cs_parser
*parser
)
5465 struct asic_fixed_properties
*asic_prop
= &hdev
->asic_prop
;
5466 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
5467 u32 nic_queue_offset
, nic_mask_q_id
;
5469 if ((parser
->hw_queue_id
>= GAUDI_QUEUE_ID_NIC_0_0
) &&
5470 (parser
->hw_queue_id
<= GAUDI_QUEUE_ID_NIC_9_3
)) {
5471 nic_queue_offset
= parser
->hw_queue_id
- GAUDI_QUEUE_ID_NIC_0_0
;
5472 nic_mask_q_id
= 1 << (HW_CAP_NIC_SHIFT
+ (nic_queue_offset
>> 2));
5474 if (!(gaudi
->hw_cap_initialized
& nic_mask_q_id
)) {
5475 dev_err(hdev
->dev
, "h/w queue %d is disabled\n", parser
->hw_queue_id
);
5480 /* For internal queue jobs just check if CB address is valid */
5481 if (hl_mem_area_inside_range((u64
) (uintptr_t) parser
->user_cb
,
5482 parser
->user_cb_size
,
5483 asic_prop
->sram_user_base_address
,
5484 asic_prop
->sram_end_address
))
5487 if (hl_mem_area_inside_range((u64
) (uintptr_t) parser
->user_cb
,
5488 parser
->user_cb_size
,
5489 asic_prop
->dram_user_base_address
,
5490 asic_prop
->dram_end_address
))
5493 /* PMMU and HPMMU addresses are equal, check only one of them */
5494 if (hl_mem_area_inside_range((u64
) (uintptr_t) parser
->user_cb
,
5495 parser
->user_cb_size
,
5496 asic_prop
->pmmu
.start_addr
,
5497 asic_prop
->pmmu
.end_addr
))
5501 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5502 parser
->user_cb
, parser
->user_cb_size
);
5507 static int gaudi_cs_parser(struct hl_device
*hdev
, struct hl_cs_parser
*parser
)
5509 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
5511 if (parser
->queue_type
== QUEUE_TYPE_INT
)
5512 return gaudi_parse_cb_no_ext_queue(hdev
, parser
);
5514 if (gaudi
->hw_cap_initialized
& HW_CAP_MMU
)
5515 return gaudi_parse_cb_mmu(hdev
, parser
);
5517 return gaudi_parse_cb_no_mmu(hdev
, parser
);
5520 static void gaudi_add_end_of_cb_packets(struct hl_device
*hdev
, void *kernel_address
,
5521 u32 len
, u32 original_len
, u64 cq_addr
, u32 cq_val
,
5522 u32 msi_vec
, bool eb
)
5524 struct packet_msg_prot
*cq_pkt
;
5525 struct packet_nop
*cq_padding
;
5529 cq_padding
= kernel_address
+ original_len
;
5530 cq_pkt
= kernel_address
+ len
- (sizeof(struct packet_msg_prot
) * 2);
5532 while ((void *)cq_padding
< (void *)cq_pkt
) {
5533 cq_padding
->ctl
= cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK
, PACKET_NOP
));
5537 tmp
= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK
, PACKET_MSG_PROT
);
5538 tmp
|= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK
, 1);
5541 tmp
|= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK
, 1);
5543 cq_pkt
->ctl
= cpu_to_le32(tmp
);
5544 cq_pkt
->value
= cpu_to_le32(cq_val
);
5545 cq_pkt
->addr
= cpu_to_le64(cq_addr
);
5549 tmp
= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK
, PACKET_MSG_PROT
);
5550 tmp
|= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK
, 1);
5551 cq_pkt
->ctl
= cpu_to_le32(tmp
);
5552 cq_pkt
->value
= cpu_to_le32(1);
5553 msi_addr
= hdev
->pdev
? mmPCIE_CORE_MSI_REQ
: mmPCIE_MSI_INTR_0
+ msi_vec
* 4;
5554 cq_pkt
->addr
= cpu_to_le64(CFG_BASE
+ msi_addr
);
5557 static void gaudi_update_eq_ci(struct hl_device
*hdev
, u32 val
)
5559 WREG32(mmCPU_IF_EQ_RD_OFFS
, val
);
5562 static int gaudi_memset_device_memory(struct hl_device
*hdev
, u64 addr
,
5565 struct packet_lin_dma
*lin_dma_pkt
;
5566 struct hl_cs_job
*job
;
5567 u32 cb_size
, ctl
, err_cause
;
5571 cb
= hl_cb_kernel_create(hdev
, PAGE_SIZE
, false);
5575 lin_dma_pkt
= cb
->kernel_address
;
5576 memset(lin_dma_pkt
, 0, sizeof(*lin_dma_pkt
));
5577 cb_size
= sizeof(*lin_dma_pkt
);
5579 ctl
= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK
, PACKET_LIN_DMA
);
5580 ctl
|= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK
, 1);
5581 ctl
|= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK
, 1);
5582 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK
, 1);
5583 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK
, 1);
5585 lin_dma_pkt
->ctl
= cpu_to_le32(ctl
);
5586 lin_dma_pkt
->src_addr
= cpu_to_le64(val
);
5587 lin_dma_pkt
->dst_addr
|= cpu_to_le64(addr
);
5588 lin_dma_pkt
->tsize
= cpu_to_le32(size
);
5590 job
= hl_cs_allocate_job(hdev
, QUEUE_TYPE_EXT
, true);
5592 dev_err(hdev
->dev
, "Failed to allocate a new job\n");
5597 /* Verify DMA is OK */
5598 err_cause
= RREG32(mmDMA0_CORE_ERR_CAUSE
);
5599 if (err_cause
&& !hdev
->init_done
) {
5601 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5603 WREG32(mmDMA0_CORE_ERR_CAUSE
, err_cause
);
5608 atomic_inc(&job
->user_cb
->cs_cnt
);
5609 job
->user_cb_size
= cb_size
;
5610 job
->hw_queue_id
= GAUDI_QUEUE_ID_DMA_0_0
;
5611 job
->patched_cb
= job
->user_cb
;
5612 job
->job_cb_size
= job
->user_cb_size
+ sizeof(struct packet_msg_prot
);
5614 hl_debugfs_add_job(hdev
, job
);
5616 rc
= gaudi_send_job_on_qman0(hdev
, job
);
5617 hl_debugfs_remove_job(hdev
, job
);
5619 atomic_dec(&cb
->cs_cnt
);
5621 /* Verify DMA is OK */
5622 err_cause
= RREG32(mmDMA0_CORE_ERR_CAUSE
);
5624 dev_err(hdev
->dev
, "DMA Failed, cause 0x%x\n", err_cause
);
5626 if (!hdev
->init_done
) {
5628 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5630 WREG32(mmDMA0_CORE_ERR_CAUSE
, err_cause
);
5636 hl_cb_destroy(&hdev
->kernel_mem_mgr
, cb
->buf
->handle
);
5641 static int gaudi_memset_registers(struct hl_device
*hdev
, u64 reg_base
,
5642 u32 num_regs
, u32 val
)
5644 struct packet_msg_long
*pkt
;
5645 struct hl_cs_job
*job
;
5650 cb_size
= (sizeof(*pkt
) * num_regs
) + sizeof(struct packet_msg_prot
);
5652 if (cb_size
> SZ_2M
) {
5653 dev_err(hdev
->dev
, "CB size must be smaller than %uMB", SZ_2M
);
5657 cb
= hl_cb_kernel_create(hdev
, cb_size
, false);
5661 pkt
= cb
->kernel_address
;
5663 ctl
= FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK
, 0); /* write the value */
5664 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK
, PACKET_MSG_LONG
);
5665 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK
, 1);
5666 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK
, 1);
5667 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK
, 1);
5669 for (i
= 0; i
< num_regs
; i
++, pkt
++) {
5670 pkt
->ctl
= cpu_to_le32(ctl
);
5671 pkt
->value
= cpu_to_le32(val
);
5672 pkt
->addr
= cpu_to_le64(reg_base
+ (i
* 4));
5675 job
= hl_cs_allocate_job(hdev
, QUEUE_TYPE_EXT
, true);
5677 dev_err(hdev
->dev
, "Failed to allocate a new job\n");
5684 atomic_inc(&job
->user_cb
->cs_cnt
);
5685 job
->user_cb_size
= cb_size
;
5686 job
->hw_queue_id
= GAUDI_QUEUE_ID_DMA_0_0
;
5687 job
->patched_cb
= job
->user_cb
;
5688 job
->job_cb_size
= cb_size
;
5690 hl_debugfs_add_job(hdev
, job
);
5692 rc
= gaudi_send_job_on_qman0(hdev
, job
);
5693 hl_debugfs_remove_job(hdev
, job
);
5695 atomic_dec(&cb
->cs_cnt
);
5699 hl_cb_destroy(&hdev
->kernel_mem_mgr
, cb
->buf
->handle
);
5704 static int gaudi_restore_sm_registers(struct hl_device
*hdev
)
5710 base_addr
= CFG_BASE
+ mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
;
5711 num_regs
= NUM_OF_SOB_IN_BLOCK
;
5712 rc
= gaudi_memset_registers(hdev
, base_addr
, num_regs
, 0);
5714 dev_err(hdev
->dev
, "failed resetting SM registers");
5718 base_addr
= CFG_BASE
+ mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0
;
5719 num_regs
= NUM_OF_SOB_IN_BLOCK
;
5720 rc
= gaudi_memset_registers(hdev
, base_addr
, num_regs
, 0);
5722 dev_err(hdev
->dev
, "failed resetting SM registers");
5726 base_addr
= CFG_BASE
+ mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0
;
5727 num_regs
= NUM_OF_SOB_IN_BLOCK
;
5728 rc
= gaudi_memset_registers(hdev
, base_addr
, num_regs
, 0);
5730 dev_err(hdev
->dev
, "failed resetting SM registers");
5734 base_addr
= CFG_BASE
+ mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0
;
5735 num_regs
= NUM_OF_MONITORS_IN_BLOCK
;
5736 rc
= gaudi_memset_registers(hdev
, base_addr
, num_regs
, 0);
5738 dev_err(hdev
->dev
, "failed resetting SM registers");
5742 base_addr
= CFG_BASE
+ mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0
;
5743 num_regs
= NUM_OF_MONITORS_IN_BLOCK
;
5744 rc
= gaudi_memset_registers(hdev
, base_addr
, num_regs
, 0);
5746 dev_err(hdev
->dev
, "failed resetting SM registers");
5750 base_addr
= CFG_BASE
+ mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0
;
5751 num_regs
= NUM_OF_MONITORS_IN_BLOCK
;
5752 rc
= gaudi_memset_registers(hdev
, base_addr
, num_regs
, 0);
5754 dev_err(hdev
->dev
, "failed resetting SM registers");
5758 base_addr
= CFG_BASE
+ mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0
+
5759 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT
* 4);
5760 num_regs
= NUM_OF_SOB_IN_BLOCK
- GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT
;
5761 rc
= gaudi_memset_registers(hdev
, base_addr
, num_regs
, 0);
5763 dev_err(hdev
->dev
, "failed resetting SM registers");
5767 base_addr
= CFG_BASE
+ mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0
+
5768 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR
* 4);
5769 num_regs
= NUM_OF_MONITORS_IN_BLOCK
- GAUDI_FIRST_AVAILABLE_W_S_MONITOR
;
5770 rc
= gaudi_memset_registers(hdev
, base_addr
, num_regs
, 0);
5772 dev_err(hdev
->dev
, "failed resetting SM registers");
5779 static void gaudi_restore_dma_registers(struct hl_device
*hdev
)
5781 u32 sob_delta
= mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1
-
5782 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
;
5785 for (i
= 0 ; i
< DMA_NUMBER_OF_CHANNELS
; i
++) {
5786 u64 sob_addr
= CFG_BASE
+
5787 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
+
5789 u32 dma_offset
= i
* DMA_CORE_OFFSET
;
5791 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO
+ dma_offset
,
5792 lower_32_bits(sob_addr
));
5793 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI
+ dma_offset
,
5794 upper_32_bits(sob_addr
));
5795 WREG32(mmDMA0_CORE_WR_COMP_WDATA
+ dma_offset
, 0x80000001);
5797 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5798 * modified by the user for SRAM reduction
5801 WREG32(mmDMA0_CORE_WR_AWUSER_31_11
+ dma_offset
,
5806 static void gaudi_restore_qm_registers(struct hl_device
*hdev
)
5811 for (i
= 0 ; i
< DMA_NUMBER_OF_CHANNELS
; i
++) {
5812 qman_offset
= i
* DMA_QMAN_OFFSET
;
5813 WREG32(mmDMA0_QM_ARB_CFG_0
+ qman_offset
, 0);
5816 for (i
= 0 ; i
< MME_NUMBER_OF_MASTER_ENGINES
; i
++) {
5817 qman_offset
= i
* (mmMME2_QM_BASE
- mmMME0_QM_BASE
);
5818 WREG32(mmMME0_QM_ARB_CFG_0
+ qman_offset
, 0);
5821 for (i
= 0 ; i
< TPC_NUMBER_OF_ENGINES
; i
++) {
5822 qman_offset
= i
* TPC_QMAN_OFFSET
;
5823 WREG32(mmTPC0_QM_ARB_CFG_0
+ qman_offset
, 0);
5826 for (i
= 0 ; i
< NIC_NUMBER_OF_ENGINES
; i
++) {
5827 qman_offset
= (i
>> 1) * NIC_MACRO_QMAN_OFFSET
+
5828 (i
& 0x1) * NIC_ENGINE_QMAN_OFFSET
;
5829 WREG32(mmNIC0_QM0_ARB_CFG_0
+ qman_offset
, 0);
5833 static int gaudi_restore_user_registers(struct hl_device
*hdev
)
5837 rc
= gaudi_restore_sm_registers(hdev
);
5841 gaudi_restore_dma_registers(hdev
);
5842 gaudi_restore_qm_registers(hdev
);
5847 static int gaudi_context_switch(struct hl_device
*hdev
, u32 asid
)
5852 static int gaudi_mmu_clear_pgt_range(struct hl_device
*hdev
)
5854 u32 size
= hdev
->asic_prop
.mmu_pgt_size
+
5855 hdev
->asic_prop
.mmu_cache_mng_size
;
5856 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
5857 u64 addr
= hdev
->asic_prop
.mmu_pgt_addr
;
5859 if (!(gaudi
->hw_cap_initialized
& HW_CAP_MMU
))
5862 return gaudi_memset_device_memory(hdev
, addr
, size
, 0);
5865 static void gaudi_restore_phase_topology(struct hl_device
*hdev
)
5870 static int gaudi_dma_core_transfer(struct hl_device
*hdev
, int dma_id
, u64 addr
,
5871 u32 size_to_dma
, dma_addr_t dma_addr
)
5877 dma_offset
= dma_id
* DMA_CORE_OFFSET
;
5879 WREG32(mmDMA0_CORE_SRC_BASE_LO
+ dma_offset
, lower_32_bits(addr
));
5880 WREG32(mmDMA0_CORE_SRC_BASE_HI
+ dma_offset
, upper_32_bits(addr
));
5881 WREG32(mmDMA0_CORE_DST_BASE_LO
+ dma_offset
, lower_32_bits(dma_addr
));
5882 WREG32(mmDMA0_CORE_DST_BASE_HI
+ dma_offset
, upper_32_bits(dma_addr
));
5883 WREG32(mmDMA0_CORE_DST_TSIZE_0
+ dma_offset
, size_to_dma
);
5884 WREG32(mmDMA0_CORE_COMMIT
+ dma_offset
,
5885 (1 << DMA0_CORE_COMMIT_LIN_SHIFT
));
5887 rc
= hl_poll_timeout(
5889 mmDMA0_CORE_STS0
+ dma_offset
,
5891 ((val
& DMA0_CORE_STS0_BUSY_MASK
) == 0),
5897 "DMA %d timed-out during reading of 0x%llx\n",
5902 /* Verify DMA is OK */
5903 err_cause
= RREG32(mmDMA0_CORE_ERR_CAUSE
+ dma_offset
);
5905 dev_err(hdev
->dev
, "DMA Failed, cause 0x%x\n", err_cause
);
5907 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5909 WREG32(mmDMA0_CORE_ERR_CAUSE
+ dma_offset
, err_cause
);
5917 static int gaudi_debugfs_read_dma(struct hl_device
*hdev
, u64 addr
, u32 size
,
5920 u32 dma_core_sts0
, err_cause
, cfg1
, size_left
, pos
, size_to_dma
;
5921 u32 qm_glbl_sts0
, qm_cgm_sts
;
5922 u64 dma_offset
, qm_offset
;
5923 dma_addr_t dma_addr
;
5928 kernel_addr
= hl_asic_dma_alloc_coherent(hdev
, SZ_2M
, &dma_addr
, GFP_KERNEL
| __GFP_ZERO
);
5933 hdev
->asic_funcs
->hw_queues_lock(hdev
);
5935 dma_id
= gaudi_dma_assignment
[GAUDI_PCI_DMA_1
];
5936 dma_offset
= dma_id
* DMA_CORE_OFFSET
;
5937 qm_offset
= dma_id
* DMA_QMAN_OFFSET
;
5938 dma_core_sts0
= RREG32(mmDMA0_CORE_STS0
+ dma_offset
);
5939 qm_glbl_sts0
= RREG32(mmDMA0_QM_GLBL_STS0
+ qm_offset
);
5940 qm_cgm_sts
= RREG32(mmDMA0_QM_CGM_STS
+ qm_offset
);
5941 is_eng_idle
= IS_QM_IDLE(qm_glbl_sts0
, qm_cgm_sts
) &&
5942 IS_DMA_IDLE(dma_core_sts0
);
5945 dma_id
= gaudi_dma_assignment
[GAUDI_PCI_DMA_2
];
5946 dma_offset
= dma_id
* DMA_CORE_OFFSET
;
5947 qm_offset
= dma_id
* DMA_QMAN_OFFSET
;
5948 dma_core_sts0
= RREG32(mmDMA0_CORE_STS0
+ dma_offset
);
5949 qm_glbl_sts0
= RREG32(mmDMA0_QM_GLBL_STS0
+ qm_offset
);
5950 qm_cgm_sts
= RREG32(mmDMA0_QM_CGM_STS
+ qm_offset
);
5951 is_eng_idle
= IS_QM_IDLE(qm_glbl_sts0
, qm_cgm_sts
) &&
5952 IS_DMA_IDLE(dma_core_sts0
);
5955 dev_err_ratelimited(hdev
->dev
,
5956 "Can't read via DMA because it is BUSY\n");
5962 cfg1
= RREG32(mmDMA0_QM_GLBL_CFG1
+ qm_offset
);
5963 WREG32(mmDMA0_QM_GLBL_CFG1
+ qm_offset
,
5964 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
5966 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
5967 * using the compute ctx ASID, if exists. If not, use the kernel ctx
5970 WREG32_OR(mmDMA0_CORE_PROT
+ dma_offset
, BIT(DMA0_CORE_PROT_VAL_SHIFT
));
5972 /* Verify DMA is OK */
5973 err_cause
= RREG32(mmDMA0_CORE_ERR_CAUSE
+ dma_offset
);
5976 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5978 WREG32(mmDMA0_CORE_ERR_CAUSE
+ dma_offset
, err_cause
);
5983 size_to_dma
= SZ_2M
;
5985 while (size_left
> 0) {
5987 if (size_left
< SZ_2M
)
5988 size_to_dma
= size_left
;
5990 rc
= gaudi_dma_core_transfer(hdev
, dma_id
, addr
, size_to_dma
,
5995 memcpy(blob_addr
+ pos
, kernel_addr
, size_to_dma
);
5997 if (size_left
<= SZ_2M
)
6005 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6006 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6009 WREG32_AND(mmDMA0_CORE_PROT
+ dma_offset
,
6010 ~BIT(DMA0_CORE_PROT_VAL_SHIFT
));
6012 WREG32(mmDMA0_QM_GLBL_CFG1
+ qm_offset
, cfg1
);
6015 hdev
->asic_funcs
->hw_queues_unlock(hdev
);
6017 hl_asic_dma_free_coherent(hdev
, SZ_2M
, kernel_addr
, dma_addr
);
6022 static u64
gaudi_read_pte(struct hl_device
*hdev
, u64 addr
)
6024 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
6026 if (hdev
->reset_info
.hard_reset_pending
)
6029 return readq(hdev
->pcie_bar
[HBM_BAR_ID
] +
6030 (addr
- gaudi
->hbm_bar_cur_addr
));
6033 static void gaudi_write_pte(struct hl_device
*hdev
, u64 addr
, u64 val
)
6035 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
6037 if (hdev
->reset_info
.hard_reset_pending
)
6040 writeq(val
, hdev
->pcie_bar
[HBM_BAR_ID
] +
6041 (addr
- gaudi
->hbm_bar_cur_addr
));
6044 void gaudi_mmu_prepare_reg(struct hl_device
*hdev
, u64 reg
, u32 asid
)
6046 /* mask to zero the MMBP and ASID bits */
6047 WREG32_AND(reg
, ~0x7FF);
6048 WREG32_OR(reg
, asid
);
6051 static void gaudi_mmu_prepare(struct hl_device
*hdev
, u32 asid
)
6053 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
6055 if (!(gaudi
->hw_cap_initialized
& HW_CAP_MMU
))
6058 if (asid
& ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK
) {
6059 dev_crit(hdev
->dev
, "asid %u is too big\n", asid
);
6063 gaudi_mmu_prepare_reg(hdev
, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6064 gaudi_mmu_prepare_reg(hdev
, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6065 gaudi_mmu_prepare_reg(hdev
, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6066 gaudi_mmu_prepare_reg(hdev
, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6067 gaudi_mmu_prepare_reg(hdev
, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6069 gaudi_mmu_prepare_reg(hdev
, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6070 gaudi_mmu_prepare_reg(hdev
, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6071 gaudi_mmu_prepare_reg(hdev
, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6072 gaudi_mmu_prepare_reg(hdev
, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6073 gaudi_mmu_prepare_reg(hdev
, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6075 gaudi_mmu_prepare_reg(hdev
, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6076 gaudi_mmu_prepare_reg(hdev
, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6077 gaudi_mmu_prepare_reg(hdev
, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6078 gaudi_mmu_prepare_reg(hdev
, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6079 gaudi_mmu_prepare_reg(hdev
, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6081 gaudi_mmu_prepare_reg(hdev
, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6082 gaudi_mmu_prepare_reg(hdev
, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6083 gaudi_mmu_prepare_reg(hdev
, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6084 gaudi_mmu_prepare_reg(hdev
, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6085 gaudi_mmu_prepare_reg(hdev
, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6087 gaudi_mmu_prepare_reg(hdev
, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6088 gaudi_mmu_prepare_reg(hdev
, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6089 gaudi_mmu_prepare_reg(hdev
, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6090 gaudi_mmu_prepare_reg(hdev
, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6091 gaudi_mmu_prepare_reg(hdev
, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6093 gaudi_mmu_prepare_reg(hdev
, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6094 gaudi_mmu_prepare_reg(hdev
, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6095 gaudi_mmu_prepare_reg(hdev
, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6096 gaudi_mmu_prepare_reg(hdev
, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6097 gaudi_mmu_prepare_reg(hdev
, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6099 gaudi_mmu_prepare_reg(hdev
, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6100 gaudi_mmu_prepare_reg(hdev
, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6101 gaudi_mmu_prepare_reg(hdev
, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6102 gaudi_mmu_prepare_reg(hdev
, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6103 gaudi_mmu_prepare_reg(hdev
, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6105 gaudi_mmu_prepare_reg(hdev
, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6106 gaudi_mmu_prepare_reg(hdev
, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6107 gaudi_mmu_prepare_reg(hdev
, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6108 gaudi_mmu_prepare_reg(hdev
, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6109 gaudi_mmu_prepare_reg(hdev
, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6111 gaudi_mmu_prepare_reg(hdev
, mmDMA0_CORE_NON_SECURE_PROPS
, asid
);
6112 gaudi_mmu_prepare_reg(hdev
, mmDMA1_CORE_NON_SECURE_PROPS
, asid
);
6113 gaudi_mmu_prepare_reg(hdev
, mmDMA2_CORE_NON_SECURE_PROPS
, asid
);
6114 gaudi_mmu_prepare_reg(hdev
, mmDMA3_CORE_NON_SECURE_PROPS
, asid
);
6115 gaudi_mmu_prepare_reg(hdev
, mmDMA4_CORE_NON_SECURE_PROPS
, asid
);
6116 gaudi_mmu_prepare_reg(hdev
, mmDMA5_CORE_NON_SECURE_PROPS
, asid
);
6117 gaudi_mmu_prepare_reg(hdev
, mmDMA6_CORE_NON_SECURE_PROPS
, asid
);
6118 gaudi_mmu_prepare_reg(hdev
, mmDMA7_CORE_NON_SECURE_PROPS
, asid
);
6120 gaudi_mmu_prepare_reg(hdev
, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6121 gaudi_mmu_prepare_reg(hdev
, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6122 gaudi_mmu_prepare_reg(hdev
, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6123 gaudi_mmu_prepare_reg(hdev
, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6124 gaudi_mmu_prepare_reg(hdev
, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6125 gaudi_mmu_prepare_reg(hdev
, mmTPC0_CFG_ARUSER_LO
, asid
);
6126 gaudi_mmu_prepare_reg(hdev
, mmTPC0_CFG_AWUSER_LO
, asid
);
6128 gaudi_mmu_prepare_reg(hdev
, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6129 gaudi_mmu_prepare_reg(hdev
, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6130 gaudi_mmu_prepare_reg(hdev
, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6131 gaudi_mmu_prepare_reg(hdev
, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6132 gaudi_mmu_prepare_reg(hdev
, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6133 gaudi_mmu_prepare_reg(hdev
, mmTPC1_CFG_ARUSER_LO
, asid
);
6134 gaudi_mmu_prepare_reg(hdev
, mmTPC1_CFG_AWUSER_LO
, asid
);
6136 gaudi_mmu_prepare_reg(hdev
, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6137 gaudi_mmu_prepare_reg(hdev
, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6138 gaudi_mmu_prepare_reg(hdev
, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6139 gaudi_mmu_prepare_reg(hdev
, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6140 gaudi_mmu_prepare_reg(hdev
, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6141 gaudi_mmu_prepare_reg(hdev
, mmTPC2_CFG_ARUSER_LO
, asid
);
6142 gaudi_mmu_prepare_reg(hdev
, mmTPC2_CFG_AWUSER_LO
, asid
);
6144 gaudi_mmu_prepare_reg(hdev
, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6145 gaudi_mmu_prepare_reg(hdev
, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6146 gaudi_mmu_prepare_reg(hdev
, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6147 gaudi_mmu_prepare_reg(hdev
, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6148 gaudi_mmu_prepare_reg(hdev
, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6149 gaudi_mmu_prepare_reg(hdev
, mmTPC3_CFG_ARUSER_LO
, asid
);
6150 gaudi_mmu_prepare_reg(hdev
, mmTPC3_CFG_AWUSER_LO
, asid
);
6152 gaudi_mmu_prepare_reg(hdev
, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6153 gaudi_mmu_prepare_reg(hdev
, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6154 gaudi_mmu_prepare_reg(hdev
, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6155 gaudi_mmu_prepare_reg(hdev
, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6156 gaudi_mmu_prepare_reg(hdev
, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6157 gaudi_mmu_prepare_reg(hdev
, mmTPC4_CFG_ARUSER_LO
, asid
);
6158 gaudi_mmu_prepare_reg(hdev
, mmTPC4_CFG_AWUSER_LO
, asid
);
6160 gaudi_mmu_prepare_reg(hdev
, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6161 gaudi_mmu_prepare_reg(hdev
, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6162 gaudi_mmu_prepare_reg(hdev
, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6163 gaudi_mmu_prepare_reg(hdev
, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6164 gaudi_mmu_prepare_reg(hdev
, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6165 gaudi_mmu_prepare_reg(hdev
, mmTPC5_CFG_ARUSER_LO
, asid
);
6166 gaudi_mmu_prepare_reg(hdev
, mmTPC5_CFG_AWUSER_LO
, asid
);
6168 gaudi_mmu_prepare_reg(hdev
, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6169 gaudi_mmu_prepare_reg(hdev
, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6170 gaudi_mmu_prepare_reg(hdev
, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6171 gaudi_mmu_prepare_reg(hdev
, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6172 gaudi_mmu_prepare_reg(hdev
, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6173 gaudi_mmu_prepare_reg(hdev
, mmTPC6_CFG_ARUSER_LO
, asid
);
6174 gaudi_mmu_prepare_reg(hdev
, mmTPC6_CFG_AWUSER_LO
, asid
);
6176 gaudi_mmu_prepare_reg(hdev
, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6177 gaudi_mmu_prepare_reg(hdev
, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6178 gaudi_mmu_prepare_reg(hdev
, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6179 gaudi_mmu_prepare_reg(hdev
, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6180 gaudi_mmu_prepare_reg(hdev
, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6181 gaudi_mmu_prepare_reg(hdev
, mmTPC7_CFG_ARUSER_LO
, asid
);
6182 gaudi_mmu_prepare_reg(hdev
, mmTPC7_CFG_AWUSER_LO
, asid
);
6184 gaudi_mmu_prepare_reg(hdev
, mmMME0_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6185 gaudi_mmu_prepare_reg(hdev
, mmMME0_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6186 gaudi_mmu_prepare_reg(hdev
, mmMME0_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6187 gaudi_mmu_prepare_reg(hdev
, mmMME0_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6188 gaudi_mmu_prepare_reg(hdev
, mmMME0_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6189 gaudi_mmu_prepare_reg(hdev
, mmMME2_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6190 gaudi_mmu_prepare_reg(hdev
, mmMME2_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6191 gaudi_mmu_prepare_reg(hdev
, mmMME2_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6192 gaudi_mmu_prepare_reg(hdev
, mmMME2_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6193 gaudi_mmu_prepare_reg(hdev
, mmMME2_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6195 gaudi_mmu_prepare_reg(hdev
, mmMME0_SBAB_ARUSER0
, asid
);
6196 gaudi_mmu_prepare_reg(hdev
, mmMME0_SBAB_ARUSER1
, asid
);
6197 gaudi_mmu_prepare_reg(hdev
, mmMME1_SBAB_ARUSER0
, asid
);
6198 gaudi_mmu_prepare_reg(hdev
, mmMME1_SBAB_ARUSER1
, asid
);
6199 gaudi_mmu_prepare_reg(hdev
, mmMME2_SBAB_ARUSER0
, asid
);
6200 gaudi_mmu_prepare_reg(hdev
, mmMME2_SBAB_ARUSER1
, asid
);
6201 gaudi_mmu_prepare_reg(hdev
, mmMME3_SBAB_ARUSER0
, asid
);
6202 gaudi_mmu_prepare_reg(hdev
, mmMME3_SBAB_ARUSER1
, asid
);
6203 gaudi_mmu_prepare_reg(hdev
, mmMME0_ACC_WBC
, asid
);
6204 gaudi_mmu_prepare_reg(hdev
, mmMME1_ACC_WBC
, asid
);
6205 gaudi_mmu_prepare_reg(hdev
, mmMME2_ACC_WBC
, asid
);
6206 gaudi_mmu_prepare_reg(hdev
, mmMME3_ACC_WBC
, asid
);
6208 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC0
) {
6209 gaudi_mmu_prepare_reg(hdev
, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0
,
6211 gaudi_mmu_prepare_reg(hdev
, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1
,
6213 gaudi_mmu_prepare_reg(hdev
, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2
,
6215 gaudi_mmu_prepare_reg(hdev
, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3
,
6217 gaudi_mmu_prepare_reg(hdev
, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4
,
6221 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC1
) {
6222 gaudi_mmu_prepare_reg(hdev
, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0
,
6224 gaudi_mmu_prepare_reg(hdev
, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1
,
6226 gaudi_mmu_prepare_reg(hdev
, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2
,
6228 gaudi_mmu_prepare_reg(hdev
, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3
,
6230 gaudi_mmu_prepare_reg(hdev
, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4
,
6234 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC2
) {
6235 gaudi_mmu_prepare_reg(hdev
, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0
,
6237 gaudi_mmu_prepare_reg(hdev
, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1
,
6239 gaudi_mmu_prepare_reg(hdev
, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2
,
6241 gaudi_mmu_prepare_reg(hdev
, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3
,
6243 gaudi_mmu_prepare_reg(hdev
, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4
,
6247 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC3
) {
6248 gaudi_mmu_prepare_reg(hdev
, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0
,
6250 gaudi_mmu_prepare_reg(hdev
, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1
,
6252 gaudi_mmu_prepare_reg(hdev
, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2
,
6254 gaudi_mmu_prepare_reg(hdev
, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3
,
6256 gaudi_mmu_prepare_reg(hdev
, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4
,
6260 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC4
) {
6261 gaudi_mmu_prepare_reg(hdev
, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0
,
6263 gaudi_mmu_prepare_reg(hdev
, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1
,
6265 gaudi_mmu_prepare_reg(hdev
, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2
,
6267 gaudi_mmu_prepare_reg(hdev
, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3
,
6269 gaudi_mmu_prepare_reg(hdev
, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4
,
6273 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC5
) {
6274 gaudi_mmu_prepare_reg(hdev
, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0
,
6276 gaudi_mmu_prepare_reg(hdev
, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1
,
6278 gaudi_mmu_prepare_reg(hdev
, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2
,
6280 gaudi_mmu_prepare_reg(hdev
, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3
,
6282 gaudi_mmu_prepare_reg(hdev
, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4
,
6286 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC6
) {
6287 gaudi_mmu_prepare_reg(hdev
, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0
,
6289 gaudi_mmu_prepare_reg(hdev
, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1
,
6291 gaudi_mmu_prepare_reg(hdev
, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2
,
6293 gaudi_mmu_prepare_reg(hdev
, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3
,
6295 gaudi_mmu_prepare_reg(hdev
, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4
,
6299 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC7
) {
6300 gaudi_mmu_prepare_reg(hdev
, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0
,
6302 gaudi_mmu_prepare_reg(hdev
, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1
,
6304 gaudi_mmu_prepare_reg(hdev
, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2
,
6306 gaudi_mmu_prepare_reg(hdev
, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3
,
6308 gaudi_mmu_prepare_reg(hdev
, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4
,
6312 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC8
) {
6313 gaudi_mmu_prepare_reg(hdev
, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0
,
6315 gaudi_mmu_prepare_reg(hdev
, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1
,
6317 gaudi_mmu_prepare_reg(hdev
, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2
,
6319 gaudi_mmu_prepare_reg(hdev
, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3
,
6321 gaudi_mmu_prepare_reg(hdev
, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4
,
6325 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC9
) {
6326 gaudi_mmu_prepare_reg(hdev
, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0
,
6328 gaudi_mmu_prepare_reg(hdev
, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1
,
6330 gaudi_mmu_prepare_reg(hdev
, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2
,
6332 gaudi_mmu_prepare_reg(hdev
, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3
,
6334 gaudi_mmu_prepare_reg(hdev
, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4
,
6338 gaudi_mmu_prepare_reg(hdev
, mmPSOC_GLOBAL_CONF_TRACE_ARUSER
, asid
);
6339 gaudi_mmu_prepare_reg(hdev
, mmPSOC_GLOBAL_CONF_TRACE_AWUSER
, asid
);
6342 static int gaudi_send_job_on_qman0(struct hl_device
*hdev
,
6343 struct hl_cs_job
*job
)
6345 struct packet_msg_prot
*fence_pkt
;
6347 dma_addr_t fence_dma_addr
;
6349 u32 tmp
, timeout
, dma_offset
;
6353 timeout
= GAUDI_PLDM_QMAN0_TIMEOUT_USEC
;
6355 timeout
= HL_DEVICE_TIMEOUT_USEC
;
6357 fence_ptr
= hl_asic_dma_pool_zalloc(hdev
, 4, GFP_KERNEL
, &fence_dma_addr
);
6360 "Failed to allocate fence memory for QMAN0\n");
6364 cb
= job
->patched_cb
;
6366 fence_pkt
= cb
->kernel_address
+
6367 job
->job_cb_size
- sizeof(struct packet_msg_prot
);
6369 tmp
= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK
, PACKET_MSG_PROT
);
6370 tmp
|= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK
, 1);
6371 tmp
|= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK
, 1);
6373 fence_pkt
->ctl
= cpu_to_le32(tmp
);
6374 fence_pkt
->value
= cpu_to_le32(GAUDI_QMAN0_FENCE_VAL
);
6375 fence_pkt
->addr
= cpu_to_le64(fence_dma_addr
);
6377 dma_offset
= gaudi_dma_assignment
[GAUDI_PCI_DMA_1
] * DMA_CORE_OFFSET
;
6379 WREG32(mmDMA0_CORE_PROT
+ dma_offset
,
6380 BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT
) | BIT(DMA0_CORE_PROT_VAL_SHIFT
));
6382 rc
= hl_hw_queue_send_cb_no_cmpl(hdev
, GAUDI_QUEUE_ID_DMA_0_0
,
6383 job
->job_cb_size
, cb
->bus_address
);
6385 dev_err(hdev
->dev
, "Failed to send CB on QMAN0, %d\n", rc
);
6386 goto free_fence_ptr
;
6389 rc
= hl_poll_timeout_memory(hdev
, fence_ptr
, tmp
,
6390 (tmp
== GAUDI_QMAN0_FENCE_VAL
), 1000,
6393 hl_hw_queue_inc_ci_kernel(hdev
, GAUDI_QUEUE_ID_DMA_0_0
);
6395 if (rc
== -ETIMEDOUT
) {
6396 dev_err(hdev
->dev
, "QMAN0 Job timeout (0x%x)\n", tmp
);
6397 goto free_fence_ptr
;
6401 WREG32(mmDMA0_CORE_PROT
+ dma_offset
, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT
));
6403 hl_asic_dma_pool_free(hdev
, (void *) fence_ptr
, fence_dma_addr
);
6407 static void gaudi_get_event_desc(u16 event_type
, char *desc
, size_t size
)
6409 if (event_type
>= GAUDI_EVENT_SIZE
)
6410 goto event_not_supported
;
6412 if (!gaudi_irq_map_table
[event_type
].valid
)
6413 goto event_not_supported
;
6415 snprintf(desc
, size
, gaudi_irq_map_table
[event_type
].name
);
6419 event_not_supported
:
6420 snprintf(desc
, size
, "N/A");
6423 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device
*hdev
, u32 x_y
,
6424 bool is_write
, u16
*engine_id_1
,
6427 u32 dma_id
[2], dma_offset
, err_cause
[2], mask
, i
;
6429 mask
= is_write
? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK
:
6430 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK
;
6433 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0
:
6434 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1
:
6438 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0
:
6439 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1
:
6443 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0
:
6444 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1
:
6448 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0
:
6449 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1
:
6454 goto unknown_initiator
;
6457 for (i
= 0 ; i
< 2 ; i
++) {
6458 dma_offset
= dma_id
[i
] * DMA_CORE_OFFSET
;
6459 err_cause
[i
] = RREG32(mmDMA0_CORE_ERR_CAUSE
+ dma_offset
);
6463 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0
:
6464 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1
:
6465 if ((err_cause
[0] & mask
) && !(err_cause
[1] & mask
)) {
6466 *engine_id_1
= GAUDI_ENGINE_ID_DMA_0
;
6468 } else if (!(err_cause
[0] & mask
) && (err_cause
[1] & mask
)) {
6469 *engine_id_1
= GAUDI_ENGINE_ID_DMA_2
;
6472 *engine_id_1
= GAUDI_ENGINE_ID_DMA_0
;
6473 *engine_id_2
= GAUDI_ENGINE_ID_DMA_2
;
6474 return "DMA0 or DMA2";
6476 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0
:
6477 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1
:
6478 if ((err_cause
[0] & mask
) && !(err_cause
[1] & mask
)) {
6479 *engine_id_1
= GAUDI_ENGINE_ID_DMA_1
;
6481 } else if (!(err_cause
[0] & mask
) && (err_cause
[1] & mask
)) {
6482 *engine_id_1
= GAUDI_ENGINE_ID_DMA_3
;
6485 *engine_id_1
= GAUDI_ENGINE_ID_DMA_1
;
6486 *engine_id_2
= GAUDI_ENGINE_ID_DMA_3
;
6487 return "DMA1 or DMA3";
6489 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0
:
6490 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1
:
6491 if ((err_cause
[0] & mask
) && !(err_cause
[1] & mask
)) {
6492 *engine_id_1
= GAUDI_ENGINE_ID_DMA_4
;
6494 } else if (!(err_cause
[0] & mask
) && (err_cause
[1] & mask
)) {
6495 *engine_id_1
= GAUDI_ENGINE_ID_DMA_6
;
6498 *engine_id_1
= GAUDI_ENGINE_ID_DMA_4
;
6499 *engine_id_2
= GAUDI_ENGINE_ID_DMA_6
;
6500 return "DMA4 or DMA6";
6502 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0
:
6503 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1
:
6504 if ((err_cause
[0] & mask
) && !(err_cause
[1] & mask
)) {
6505 *engine_id_1
= GAUDI_ENGINE_ID_DMA_5
;
6507 } else if (!(err_cause
[0] & mask
) && (err_cause
[1] & mask
)) {
6508 *engine_id_1
= GAUDI_ENGINE_ID_DMA_7
;
6511 *engine_id_1
= GAUDI_ENGINE_ID_DMA_5
;
6512 *engine_id_2
= GAUDI_ENGINE_ID_DMA_7
;
6513 return "DMA5 or DMA7";
6518 return "unknown initiator";
6521 static const char *gaudi_get_razwi_initiator_name(struct hl_device
*hdev
, bool is_write
,
6522 u16
*engine_id_1
, u16
*engine_id_2
)
6524 u32 val
, x_y
, axi_id
;
6526 val
= is_write
? RREG32(mmMMU_UP_RAZWI_WRITE_ID
) :
6527 RREG32(mmMMU_UP_RAZWI_READ_ID
);
6528 x_y
= val
& ((RAZWI_INITIATOR_Y_MASK
<< RAZWI_INITIATOR_Y_SHIFT
) |
6529 (RAZWI_INITIATOR_X_MASK
<< RAZWI_INITIATOR_X_SHIFT
));
6530 axi_id
= val
& (RAZWI_INITIATOR_AXI_ID_MASK
<<
6531 RAZWI_INITIATOR_AXI_ID_SHIFT
);
6534 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0
:
6535 if (axi_id
== RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC
)) {
6536 *engine_id_1
= GAUDI_ENGINE_ID_TPC_0
;
6539 if (axi_id
== RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC
)) {
6540 *engine_id_1
= GAUDI_ENGINE_ID_NIC_0
;
6544 case RAZWI_INITIATOR_ID_X_Y_TPC1
:
6545 *engine_id_1
= GAUDI_ENGINE_ID_TPC_1
;
6547 case RAZWI_INITIATOR_ID_X_Y_MME0_0
:
6548 case RAZWI_INITIATOR_ID_X_Y_MME0_1
:
6549 *engine_id_1
= GAUDI_ENGINE_ID_MME_0
;
6551 case RAZWI_INITIATOR_ID_X_Y_MME1_0
:
6552 case RAZWI_INITIATOR_ID_X_Y_MME1_1
:
6553 *engine_id_1
= GAUDI_ENGINE_ID_MME_1
;
6555 case RAZWI_INITIATOR_ID_X_Y_TPC2
:
6556 *engine_id_1
= GAUDI_ENGINE_ID_TPC_2
;
6558 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC
:
6559 if (axi_id
== RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC
)) {
6560 *engine_id_1
= GAUDI_ENGINE_ID_TPC_3
;
6563 /* PCI, CPU or PSOC does not have engine id*/
6564 if (axi_id
== RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI
))
6566 if (axi_id
== RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU
))
6568 if (axi_id
== RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC
))
6571 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0
:
6572 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1
:
6573 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0
:
6574 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1
:
6575 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0
:
6576 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1
:
6577 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0
:
6578 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1
:
6579 return gaudi_get_razwi_initiator_dma_name(hdev
, x_y
, is_write
,
6580 engine_id_1
, engine_id_2
);
6581 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2
:
6582 if (axi_id
== RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC
)) {
6583 *engine_id_1
= GAUDI_ENGINE_ID_TPC_4
;
6586 if (axi_id
== RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC
)) {
6587 *engine_id_1
= GAUDI_ENGINE_ID_NIC_1
;
6590 if (axi_id
== RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT
)) {
6591 *engine_id_1
= GAUDI_ENGINE_ID_NIC_2
;
6595 case RAZWI_INITIATOR_ID_X_Y_TPC5
:
6596 *engine_id_1
= GAUDI_ENGINE_ID_TPC_5
;
6598 case RAZWI_INITIATOR_ID_X_Y_MME2_0
:
6599 case RAZWI_INITIATOR_ID_X_Y_MME2_1
:
6600 *engine_id_1
= GAUDI_ENGINE_ID_MME_2
;
6602 case RAZWI_INITIATOR_ID_X_Y_MME3_0
:
6603 case RAZWI_INITIATOR_ID_X_Y_MME3_1
:
6604 *engine_id_1
= GAUDI_ENGINE_ID_MME_3
;
6606 case RAZWI_INITIATOR_ID_X_Y_TPC6
:
6607 *engine_id_1
= GAUDI_ENGINE_ID_TPC_6
;
6609 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5
:
6610 if (axi_id
== RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC
)) {
6611 *engine_id_1
= GAUDI_ENGINE_ID_TPC_7
;
6614 if (axi_id
== RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC
)) {
6615 *engine_id_1
= GAUDI_ENGINE_ID_NIC_4
;
6618 if (axi_id
== RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT
)) {
6619 *engine_id_1
= GAUDI_ENGINE_ID_NIC_5
;
6628 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6630 (val
>> RAZWI_INITIATOR_Y_SHIFT
) & RAZWI_INITIATOR_Y_MASK
,
6631 (val
>> RAZWI_INITIATOR_X_SHIFT
) & RAZWI_INITIATOR_X_MASK
,
6632 (val
>> RAZWI_INITIATOR_AXI_ID_SHIFT
) &
6633 RAZWI_INITIATOR_AXI_ID_MASK
);
6635 return "unknown initiator";
6638 static void gaudi_print_and_get_razwi_info(struct hl_device
*hdev
, u16
*engine_id_1
,
6639 u16
*engine_id_2
, bool *is_read
, bool *is_write
)
6642 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD
)) {
6643 dev_err_ratelimited(hdev
->dev
,
6644 "RAZWI event caused by illegal write of %s\n",
6645 gaudi_get_razwi_initiator_name(hdev
, true, engine_id_1
, engine_id_2
));
6646 WREG32(mmMMU_UP_RAZWI_WRITE_VLD
, 0);
6650 if (RREG32(mmMMU_UP_RAZWI_READ_VLD
)) {
6651 dev_err_ratelimited(hdev
->dev
,
6652 "RAZWI event caused by illegal read of %s\n",
6653 gaudi_get_razwi_initiator_name(hdev
, false, engine_id_1
, engine_id_2
));
6654 WREG32(mmMMU_UP_RAZWI_READ_VLD
, 0);
6659 static void gaudi_print_and_get_mmu_error_info(struct hl_device
*hdev
, u64
*addr
, u64
*event_mask
)
6661 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
6664 if (!(gaudi
->hw_cap_initialized
& HW_CAP_MMU
))
6667 val
= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE
);
6668 if (val
& MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK
) {
6669 *addr
= val
& MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK
;
6671 *addr
|= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA
);
6673 dev_err_ratelimited(hdev
->dev
, "MMU page fault on va 0x%llx\n", *addr
);
6674 hl_handle_page_fault(hdev
, *addr
, 0, true, event_mask
);
6676 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE
, 0);
6679 val
= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE
);
6680 if (val
& MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK
) {
6681 *addr
= val
& MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK
;
6683 *addr
|= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA
);
6685 dev_err_ratelimited(hdev
->dev
, "MMU access error on va 0x%llx\n", *addr
);
6687 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE
, 0);
6692 * +-------------------+------------------------------------------------------+
6693 * | Configuration Reg | Description |
6695 * +-------------------+------------------------------------------------------+
6696 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
6697 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
6698 * | |0xF34 memory wrappers 63:32 |
6699 * | |0xF38 memory wrappers 95:64 |
6700 * | |0xF3C memory wrappers 127:96 |
6701 * +-------------------+------------------------------------------------------+
6702 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
6703 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
6704 * | |0xF44 memory wrappers 63:32 |
6705 * | |0xF48 memory wrappers 95:64 |
6706 * | |0xF4C memory wrappers 127:96 |
6707 * +-------------------+------------------------------------------------------+
6709 static int gaudi_extract_ecc_info(struct hl_device
*hdev
,
6710 struct ecc_info_extract_params
*params
, u64
*ecc_address
,
6711 u64
*ecc_syndrom
, u8
*memory_wrapper_idx
)
6713 u32 i
, num_mem_regs
, reg
, err_bit
;
6714 u64 err_addr
, err_word
= 0;
6716 num_mem_regs
= params
->num_memories
/ 32 +
6717 ((params
->num_memories
% 32) ? 1 : 0);
6719 if (params
->block_address
>= CFG_BASE
)
6720 params
->block_address
-= CFG_BASE
;
6723 err_addr
= params
->block_address
+ GAUDI_ECC_DERR0_OFFSET
;
6725 err_addr
= params
->block_address
+ GAUDI_ECC_SERR0_OFFSET
;
6727 /* Set invalid wrapper index */
6728 *memory_wrapper_idx
= 0xFF;
6730 /* Iterate through memory wrappers, a single bit must be set */
6731 for (i
= 0 ; i
< num_mem_regs
; i
++) {
6733 err_word
= RREG32(err_addr
);
6735 err_bit
= __ffs(err_word
);
6736 *memory_wrapper_idx
= err_bit
+ (32 * i
);
6741 if (*memory_wrapper_idx
== 0xFF) {
6742 dev_err(hdev
->dev
, "ECC error information cannot be found\n");
6746 WREG32(params
->block_address
+ GAUDI_ECC_MEM_SEL_OFFSET
,
6747 *memory_wrapper_idx
);
6750 RREG32(params
->block_address
+ GAUDI_ECC_ADDRESS_OFFSET
);
6752 RREG32(params
->block_address
+ GAUDI_ECC_SYNDROME_OFFSET
);
6754 /* Clear error indication */
6755 reg
= RREG32(params
->block_address
+ GAUDI_ECC_MEM_INFO_CLR_OFFSET
);
6757 reg
|= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK
, 1);
6759 reg
|= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK
, 1);
6761 WREG32(params
->block_address
+ GAUDI_ECC_MEM_INFO_CLR_OFFSET
, reg
);
6767 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6769 * @idx: the current pi/ci value
6770 * @q_len: the queue length (power of 2)
6772 * @return the cyclically decremented index
6774 static inline u32
gaudi_queue_idx_dec(u32 idx
, u32 q_len
)
6776 u32 mask
= q_len
- 1;
6779 * modular decrement is equivalent to adding (queue_size -1)
6780 * later we take LSBs to make sure the value is in the
6781 * range [0, queue_len - 1]
6783 return (idx
+ q_len
- 1) & mask
;
6787 * gaudi_handle_sw_config_stream_data - print SW config stream data
6789 * @hdev: pointer to the habanalabs device structure
6790 * @stream: the QMAN's stream
6791 * @qman_base: base address of QMAN registers block
6792 * @event_mask: mask of the last events occurred
6794 static void gaudi_handle_sw_config_stream_data(struct hl_device
*hdev
, u32 stream
,
6795 u64 qman_base
, u64 event_mask
)
6797 u64 cq_ptr_lo
, cq_ptr_hi
, cq_tsize
, cq_ptr
;
6798 u32 cq_ptr_lo_off
, size
;
6800 cq_ptr_lo_off
= mmTPC0_QM_CQ_PTR_LO_1
- mmTPC0_QM_CQ_PTR_LO_0
;
6802 cq_ptr_lo
= qman_base
+ (mmTPC0_QM_CQ_PTR_LO_0
- mmTPC0_QM_BASE
) +
6803 stream
* cq_ptr_lo_off
;
6804 cq_ptr_hi
= cq_ptr_lo
+
6805 (mmTPC0_QM_CQ_PTR_HI_0
- mmTPC0_QM_CQ_PTR_LO_0
);
6806 cq_tsize
= cq_ptr_lo
+
6807 (mmTPC0_QM_CQ_TSIZE_0
- mmTPC0_QM_CQ_PTR_LO_0
);
6809 cq_ptr
= (((u64
) RREG32(cq_ptr_hi
)) << 32) | RREG32(cq_ptr_lo
);
6810 size
= RREG32(cq_tsize
);
6811 dev_info(hdev
->dev
, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6812 stream
, cq_ptr
, size
);
6814 if (event_mask
& HL_NOTIFIER_EVENT_UNDEFINED_OPCODE
) {
6815 hdev
->captured_err_info
.undef_opcode
.cq_addr
= cq_ptr
;
6816 hdev
->captured_err_info
.undef_opcode
.cq_size
= size
;
6817 hdev
->captured_err_info
.undef_opcode
.stream_id
= stream
;
6822 * gaudi_handle_last_pqes_on_err - print last PQEs on error
6824 * @hdev: pointer to the habanalabs device structure
6825 * @qid_base: first QID of the QMAN (out of 4 streams)
6826 * @stream: the QMAN's stream
6827 * @qman_base: base address of QMAN registers block
6828 * @event_mask: mask of the last events occurred
6829 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6831 static void gaudi_handle_last_pqes_on_err(struct hl_device
*hdev
, u32 qid_base
,
6832 u32 stream
, u64 qman_base
,
6836 u32 ci
, qm_ci_stream_off
, queue_len
;
6837 struct hl_hw_queue
*q
;
6838 u64 pq_ci
, addr
[PQ_FETCHER_CACHE_SIZE
];
6841 q
= &hdev
->kernel_queues
[qid_base
+ stream
];
6843 qm_ci_stream_off
= mmTPC0_QM_PQ_CI_1
- mmTPC0_QM_PQ_CI_0
;
6844 pq_ci
= qman_base
+ (mmTPC0_QM_PQ_CI_0
- mmTPC0_QM_BASE
) +
6845 stream
* qm_ci_stream_off
;
6847 queue_len
= (q
->queue_type
== QUEUE_TYPE_INT
) ?
6848 q
->int_queue_len
: HL_QUEUE_LENGTH
;
6850 hdev
->asic_funcs
->hw_queues_lock(hdev
);
6853 gaudi_handle_sw_config_stream_data(hdev
, stream
, qman_base
, event_mask
);
6857 /* we should start printing form ci -1 */
6858 ci
= gaudi_queue_idx_dec(ci
, queue_len
);
6859 memset(addr
, 0, sizeof(addr
));
6861 for (i
= 0; i
< PQ_FETCHER_CACHE_SIZE
; i
++) {
6865 bd
= q
->kernel_address
;
6868 len
= le32_to_cpu(bd
->len
);
6869 /* len 0 means uninitialized entry- break */
6873 addr
[i
] = le64_to_cpu(bd
->ptr
);
6875 dev_info(hdev
->dev
, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6876 stream
, ci
, addr
[i
], len
);
6878 /* get previous ci, wrap if needed */
6879 ci
= gaudi_queue_idx_dec(ci
, queue_len
);
6882 if (event_mask
& HL_NOTIFIER_EVENT_UNDEFINED_OPCODE
) {
6883 struct undefined_opcode_info
*undef_opcode
= &hdev
->captured_err_info
.undef_opcode
;
6884 u32 arr_idx
= undef_opcode
->cb_addr_streams_len
;
6887 undef_opcode
->timestamp
= ktime_get();
6888 undef_opcode
->engine_id
= gaudi_queue_id_to_engine_id
[qid_base
];
6891 memcpy(undef_opcode
->cb_addr_streams
[arr_idx
], addr
, sizeof(addr
));
6892 undef_opcode
->cb_addr_streams_len
++;
6895 hdev
->asic_funcs
->hw_queues_unlock(hdev
);
6899 * handle_qman_data_on_err - extract QMAN data on error
6901 * @hdev: pointer to the habanalabs device structure
6902 * @qid_base: first QID of the QMAN (out of 4 streams)
6903 * @stream: the QMAN's stream
6904 * @qman_base: base address of QMAN registers block
6905 * @event_mask: mask of the last events occurred
6907 * This function attempt to exatract as much data as possible on QMAN error.
6908 * On upper CP print the SW config stream data and last 8 PQEs.
6909 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6911 static void handle_qman_data_on_err(struct hl_device
*hdev
, u32 qid_base
,
6912 u32 stream
, u64 qman_base
, u64 event_mask
)
6916 if (stream
!= QMAN_STREAMS
) {
6917 gaudi_handle_last_pqes_on_err(hdev
, qid_base
, stream
,
6918 qman_base
, event_mask
, true);
6922 /* handle Lower-CP */
6923 gaudi_handle_sw_config_stream_data(hdev
, stream
, qman_base
, event_mask
);
6925 for (i
= 0; i
< QMAN_STREAMS
; i
++)
6926 gaudi_handle_last_pqes_on_err(hdev
, qid_base
, i
,
6927 qman_base
, event_mask
, false);
6930 static void gaudi_handle_qman_err_generic(struct hl_device
*hdev
,
6931 const char *qm_name
,
6936 u32 i
, j
, glbl_sts_val
, arb_err_val
, glbl_sts_clr_val
;
6937 u64 glbl_sts_addr
, arb_err_addr
;
6940 glbl_sts_addr
= qman_base
+ (mmTPC0_QM_GLBL_STS1_0
- mmTPC0_QM_BASE
);
6941 arb_err_addr
= qman_base
+ (mmTPC0_QM_ARB_ERR_CAUSE
- mmTPC0_QM_BASE
);
6943 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
6944 for (i
= 0 ; i
< QMAN_STREAMS
+ 1 ; i
++) {
6945 glbl_sts_clr_val
= 0;
6946 glbl_sts_val
= RREG32(glbl_sts_addr
+ 4 * i
);
6951 if (i
== QMAN_STREAMS
)
6952 snprintf(reg_desc
, ARRAY_SIZE(reg_desc
), "LowerCP");
6954 snprintf(reg_desc
, ARRAY_SIZE(reg_desc
), "stream%u", i
);
6956 for (j
= 0 ; j
< GAUDI_NUM_OF_QM_ERR_CAUSE
; j
++) {
6957 if (glbl_sts_val
& BIT(j
)) {
6958 dev_err_ratelimited(hdev
->dev
,
6959 "%s %s. err cause: %s\n",
6961 gaudi_qman_error_cause
[j
]);
6962 glbl_sts_clr_val
|= BIT(j
);
6965 /* check for undefined opcode */
6966 if (glbl_sts_val
& TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK
&&
6967 hdev
->captured_err_info
.undef_opcode
.write_enable
) {
6968 memset(&hdev
->captured_err_info
.undef_opcode
, 0,
6969 sizeof(hdev
->captured_err_info
.undef_opcode
));
6971 hdev
->captured_err_info
.undef_opcode
.write_enable
= false;
6972 *event_mask
|= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE
;
6975 /* Write 1 clear errors */
6976 if (!hdev
->stop_on_err
)
6977 WREG32(glbl_sts_addr
+ 4 * i
, glbl_sts_clr_val
);
6979 handle_qman_data_on_err(hdev
, qid_base
, i
, qman_base
, *event_mask
);
6982 arb_err_val
= RREG32(arb_err_addr
);
6987 for (j
= 0 ; j
< GAUDI_NUM_OF_QM_ARB_ERR_CAUSE
; j
++) {
6988 if (arb_err_val
& BIT(j
)) {
6989 dev_err_ratelimited(hdev
->dev
,
6990 "%s ARB_ERR. err cause: %s\n",
6992 gaudi_qman_arb_error_cause
[j
]);
6997 static void gaudi_print_sm_sei_info(struct hl_device
*hdev
, u16 event_type
,
6998 struct hl_eq_sm_sei_data
*sei_data
)
7000 u32 index
= event_type
- GAUDI_EVENT_DMA_IF_SEI_0
;
7002 /* Flip the bits as the enum is ordered in the opposite way */
7003 index
= (index
^ 0x3) & 0x3;
7005 switch (sei_data
->sei_cause
) {
7006 case SM_SEI_SO_OVERFLOW
:
7007 dev_err_ratelimited(hdev
->dev
,
7008 "%s SEI Error: SOB Group %u overflow/underflow",
7009 gaudi_sync_manager_names
[index
],
7010 le32_to_cpu(sei_data
->sei_log
));
7012 case SM_SEI_LBW_4B_UNALIGNED
:
7013 dev_err_ratelimited(hdev
->dev
,
7014 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7015 gaudi_sync_manager_names
[index
],
7016 le32_to_cpu(sei_data
->sei_log
));
7018 case SM_SEI_AXI_RESPONSE_ERR
:
7019 dev_err_ratelimited(hdev
->dev
,
7020 "%s SEI Error: AXI ID %u response error",
7021 gaudi_sync_manager_names
[index
],
7022 le32_to_cpu(sei_data
->sei_log
));
7025 dev_err_ratelimited(hdev
->dev
, "Unknown SM SEI cause %u",
7026 le32_to_cpu(sei_data
->sei_log
));
7031 static void gaudi_handle_ecc_event(struct hl_device
*hdev
, u16 event_type
,
7032 struct hl_eq_ecc_data
*ecc_data
)
7034 struct ecc_info_extract_params params
;
7035 u64 ecc_address
= 0, ecc_syndrom
= 0;
7036 u8 index
, memory_wrapper_idx
= 0;
7037 bool extract_info_from_fw
;
7040 if (hdev
->asic_prop
.fw_security_enabled
) {
7041 extract_info_from_fw
= true;
7042 goto extract_ecc_info
;
7045 switch (event_type
) {
7046 case GAUDI_EVENT_PCIE_CORE_SERR
... GAUDI_EVENT_PCIE_PHY_DERR
:
7047 case GAUDI_EVENT_DMA0_SERR_ECC
... GAUDI_EVENT_MMU_DERR
:
7048 extract_info_from_fw
= true;
7050 case GAUDI_EVENT_TPC0_SERR
... GAUDI_EVENT_TPC7_SERR
:
7051 index
= event_type
- GAUDI_EVENT_TPC0_SERR
;
7052 params
.block_address
= mmTPC0_CFG_BASE
+ index
* TPC_CFG_OFFSET
;
7053 params
.num_memories
= 90;
7054 params
.derr
= false;
7055 extract_info_from_fw
= false;
7057 case GAUDI_EVENT_TPC0_DERR
... GAUDI_EVENT_TPC7_DERR
:
7058 index
= event_type
- GAUDI_EVENT_TPC0_DERR
;
7059 params
.block_address
=
7060 mmTPC0_CFG_BASE
+ index
* TPC_CFG_OFFSET
;
7061 params
.num_memories
= 90;
7063 extract_info_from_fw
= false;
7065 case GAUDI_EVENT_MME0_ACC_SERR
:
7066 case GAUDI_EVENT_MME1_ACC_SERR
:
7067 case GAUDI_EVENT_MME2_ACC_SERR
:
7068 case GAUDI_EVENT_MME3_ACC_SERR
:
7069 index
= (event_type
- GAUDI_EVENT_MME0_ACC_SERR
) / 4;
7070 params
.block_address
= mmMME0_ACC_BASE
+ index
* MME_ACC_OFFSET
;
7071 params
.num_memories
= 128;
7072 params
.derr
= false;
7073 extract_info_from_fw
= false;
7075 case GAUDI_EVENT_MME0_ACC_DERR
:
7076 case GAUDI_EVENT_MME1_ACC_DERR
:
7077 case GAUDI_EVENT_MME2_ACC_DERR
:
7078 case GAUDI_EVENT_MME3_ACC_DERR
:
7079 index
= (event_type
- GAUDI_EVENT_MME0_ACC_DERR
) / 4;
7080 params
.block_address
= mmMME0_ACC_BASE
+ index
* MME_ACC_OFFSET
;
7081 params
.num_memories
= 128;
7083 extract_info_from_fw
= false;
7085 case GAUDI_EVENT_MME0_SBAB_SERR
:
7086 case GAUDI_EVENT_MME1_SBAB_SERR
:
7087 case GAUDI_EVENT_MME2_SBAB_SERR
:
7088 case GAUDI_EVENT_MME3_SBAB_SERR
:
7089 index
= (event_type
- GAUDI_EVENT_MME0_SBAB_SERR
) / 4;
7090 params
.block_address
=
7091 mmMME0_SBAB_BASE
+ index
* MME_ACC_OFFSET
;
7092 params
.num_memories
= 33;
7093 params
.derr
= false;
7094 extract_info_from_fw
= false;
7096 case GAUDI_EVENT_MME0_SBAB_DERR
:
7097 case GAUDI_EVENT_MME1_SBAB_DERR
:
7098 case GAUDI_EVENT_MME2_SBAB_DERR
:
7099 case GAUDI_EVENT_MME3_SBAB_DERR
:
7100 index
= (event_type
- GAUDI_EVENT_MME0_SBAB_DERR
) / 4;
7101 params
.block_address
=
7102 mmMME0_SBAB_BASE
+ index
* MME_ACC_OFFSET
;
7103 params
.num_memories
= 33;
7105 extract_info_from_fw
= false;
7112 if (extract_info_from_fw
) {
7113 ecc_address
= le64_to_cpu(ecc_data
->ecc_address
);
7114 ecc_syndrom
= le64_to_cpu(ecc_data
->ecc_syndrom
);
7115 memory_wrapper_idx
= ecc_data
->memory_wrapper_idx
;
7117 rc
= gaudi_extract_ecc_info(hdev
, ¶ms
, &ecc_address
,
7118 &ecc_syndrom
, &memory_wrapper_idx
);
7124 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7125 ecc_address
, ecc_syndrom
, memory_wrapper_idx
);
7128 static void gaudi_handle_qman_err(struct hl_device
*hdev
, u16 event_type
, u64
*event_mask
)
7135 switch (event_type
) {
7136 case GAUDI_EVENT_TPC0_QM
... GAUDI_EVENT_TPC7_QM
:
7137 index
= event_type
- GAUDI_EVENT_TPC0_QM
;
7138 qid_base
= GAUDI_QUEUE_ID_TPC_0_0
+ index
* QMAN_STREAMS
;
7139 qman_base
= mmTPC0_QM_BASE
+ index
* TPC_QMAN_OFFSET
;
7140 snprintf(desc
, ARRAY_SIZE(desc
), "%s%d", "TPC_QM", index
);
7142 case GAUDI_EVENT_MME0_QM
... GAUDI_EVENT_MME2_QM
:
7143 if (event_type
== GAUDI_EVENT_MME0_QM
) {
7145 qid_base
= GAUDI_QUEUE_ID_MME_0_0
;
7146 } else { /* event_type == GAUDI_EVENT_MME2_QM */
7148 qid_base
= GAUDI_QUEUE_ID_MME_1_0
;
7150 qman_base
= mmMME0_QM_BASE
+ index
* MME_QMAN_OFFSET
;
7151 snprintf(desc
, ARRAY_SIZE(desc
), "%s%d", "MME_QM", index
);
7153 case GAUDI_EVENT_DMA0_QM
... GAUDI_EVENT_DMA7_QM
:
7154 index
= event_type
- GAUDI_EVENT_DMA0_QM
;
7155 qid_base
= GAUDI_QUEUE_ID_DMA_0_0
+ index
* QMAN_STREAMS
;
7156 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7159 qman_base
= mmDMA0_QM_BASE
+ index
* DMA_QMAN_OFFSET
;
7160 snprintf(desc
, ARRAY_SIZE(desc
), "%s%d", "DMA_QM", index
);
7162 case GAUDI_EVENT_NIC0_QM0
:
7163 qid_base
= GAUDI_QUEUE_ID_NIC_0_0
;
7164 qman_base
= mmNIC0_QM0_BASE
;
7165 snprintf(desc
, ARRAY_SIZE(desc
), "NIC0_QM0");
7167 case GAUDI_EVENT_NIC0_QM1
:
7168 qid_base
= GAUDI_QUEUE_ID_NIC_1_0
;
7169 qman_base
= mmNIC0_QM1_BASE
;
7170 snprintf(desc
, ARRAY_SIZE(desc
), "NIC0_QM1");
7172 case GAUDI_EVENT_NIC1_QM0
:
7173 qid_base
= GAUDI_QUEUE_ID_NIC_2_0
;
7174 qman_base
= mmNIC1_QM0_BASE
;
7175 snprintf(desc
, ARRAY_SIZE(desc
), "NIC1_QM0");
7177 case GAUDI_EVENT_NIC1_QM1
:
7178 qid_base
= GAUDI_QUEUE_ID_NIC_3_0
;
7179 qman_base
= mmNIC1_QM1_BASE
;
7180 snprintf(desc
, ARRAY_SIZE(desc
), "NIC1_QM1");
7182 case GAUDI_EVENT_NIC2_QM0
:
7183 qid_base
= GAUDI_QUEUE_ID_NIC_4_0
;
7184 qman_base
= mmNIC2_QM0_BASE
;
7185 snprintf(desc
, ARRAY_SIZE(desc
), "NIC2_QM0");
7187 case GAUDI_EVENT_NIC2_QM1
:
7188 qid_base
= GAUDI_QUEUE_ID_NIC_5_0
;
7189 qman_base
= mmNIC2_QM1_BASE
;
7190 snprintf(desc
, ARRAY_SIZE(desc
), "NIC2_QM1");
7192 case GAUDI_EVENT_NIC3_QM0
:
7193 qid_base
= GAUDI_QUEUE_ID_NIC_6_0
;
7194 qman_base
= mmNIC3_QM0_BASE
;
7195 snprintf(desc
, ARRAY_SIZE(desc
), "NIC3_QM0");
7197 case GAUDI_EVENT_NIC3_QM1
:
7198 qid_base
= GAUDI_QUEUE_ID_NIC_7_0
;
7199 qman_base
= mmNIC3_QM1_BASE
;
7200 snprintf(desc
, ARRAY_SIZE(desc
), "NIC3_QM1");
7202 case GAUDI_EVENT_NIC4_QM0
:
7203 qid_base
= GAUDI_QUEUE_ID_NIC_8_0
;
7204 qman_base
= mmNIC4_QM0_BASE
;
7205 snprintf(desc
, ARRAY_SIZE(desc
), "NIC4_QM0");
7207 case GAUDI_EVENT_NIC4_QM1
:
7208 qid_base
= GAUDI_QUEUE_ID_NIC_9_0
;
7209 qman_base
= mmNIC4_QM1_BASE
;
7210 snprintf(desc
, ARRAY_SIZE(desc
), "NIC4_QM1");
7216 gaudi_handle_qman_err_generic(hdev
, desc
, qman_base
, qid_base
, event_mask
);
7219 static void gaudi_print_irq_info(struct hl_device
*hdev
, u16 event_type
,
7220 bool check_razwi
, u64
*event_mask
)
7222 bool is_read
= false, is_write
= false;
7223 u16 engine_id
[2], num_of_razwi_eng
= 0;
7229 * Init engine id by default as not valid and only if razwi initiated from engine with
7230 * engine id it will get valid value.
7232 engine_id
[0] = HL_RAZWI_NA_ENG_ID
;
7233 engine_id
[1] = HL_RAZWI_NA_ENG_ID
;
7235 gaudi_get_event_desc(event_type
, desc
, sizeof(desc
));
7236 dev_err_ratelimited(hdev
->dev
, "Received H/W interrupt %d [\"%s\"]\n",
7240 gaudi_print_and_get_razwi_info(hdev
, &engine_id
[0], &engine_id
[1], &is_read
,
7242 gaudi_print_and_get_mmu_error_info(hdev
, &razwi_addr
, event_mask
);
7245 razwi_flags
|= HL_RAZWI_READ
;
7247 razwi_flags
|= HL_RAZWI_WRITE
;
7249 if (engine_id
[0] != HL_RAZWI_NA_ENG_ID
) {
7250 if (engine_id
[1] != HL_RAZWI_NA_ENG_ID
)
7251 num_of_razwi_eng
= 2;
7253 num_of_razwi_eng
= 1;
7257 hl_handle_razwi(hdev
, razwi_addr
, engine_id
, num_of_razwi_eng
,
7258 razwi_flags
, event_mask
);
7262 static void gaudi_print_out_of_sync_info(struct hl_device
*hdev
,
7263 struct cpucp_pkt_sync_err
*sync_err
)
7265 struct hl_hw_queue
*q
= &hdev
->kernel_queues
[GAUDI_QUEUE_ID_CPU_PQ
];
7267 dev_err(hdev
->dev
, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
7268 le32_to_cpu(sync_err
->pi
), le32_to_cpu(sync_err
->ci
), q
->pi
, atomic_read(&q
->ci
));
7271 static void gaudi_print_fw_alive_info(struct hl_device
*hdev
,
7272 struct hl_eq_fw_alive
*fw_alive
)
7275 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7276 (fw_alive
->severity
== FW_ALIVE_SEVERITY_MINOR
) ? "Minor" : "Critical",
7277 le32_to_cpu(fw_alive
->process_id
),
7278 le32_to_cpu(fw_alive
->thread_id
),
7279 le64_to_cpu(fw_alive
->uptime_seconds
));
7282 static void gaudi_print_nic_axi_irq_info(struct hl_device
*hdev
, u16 event_type
,
7285 char desc
[64] = "", *type
;
7286 struct eq_nic_sei_event
*eq_nic_sei
= data
;
7287 u16 nic_id
= event_type
- GAUDI_EVENT_NIC_SEI_0
;
7289 switch (eq_nic_sei
->axi_error_cause
) {
7306 type
= "NON_AXI_ERR";
7312 dev_err(hdev
->dev
, "unknown NIC AXI cause %d\n",
7313 eq_nic_sei
->axi_error_cause
);
7318 snprintf(desc
, sizeof(desc
), "NIC%d_%s%d", nic_id
, type
,
7320 dev_err_ratelimited(hdev
->dev
, "Received H/W interrupt %d [\"%s\"]\n",
7324 static int gaudi_compute_reset_late_init(struct hl_device
*hdev
)
7326 /* GAUDI doesn't support any reset except hard-reset */
7330 static int gaudi_hbm_read_interrupts(struct hl_device
*hdev
, int device
,
7331 struct hl_eq_hbm_ecc_data
*hbm_ecc_data
)
7333 u32 base
, val
, val2
, wr_par
, rd_par
, ca_par
, derr
, serr
, type
, ch
;
7336 if (hdev
->asic_prop
.fw_app_cpu_boot_dev_sts0
&
7337 CPU_BOOT_DEV_STS0_HBM_ECC_EN
) {
7338 if (!hbm_ecc_data
) {
7339 dev_err(hdev
->dev
, "No FW ECC data");
7343 wr_par
= FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK
,
7344 le32_to_cpu(hbm_ecc_data
->hbm_ecc_info
));
7345 rd_par
= FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK
,
7346 le32_to_cpu(hbm_ecc_data
->hbm_ecc_info
));
7347 ca_par
= FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK
,
7348 le32_to_cpu(hbm_ecc_data
->hbm_ecc_info
));
7349 derr
= FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK
,
7350 le32_to_cpu(hbm_ecc_data
->hbm_ecc_info
));
7351 serr
= FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK
,
7352 le32_to_cpu(hbm_ecc_data
->hbm_ecc_info
));
7353 type
= FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK
,
7354 le32_to_cpu(hbm_ecc_data
->hbm_ecc_info
));
7355 ch
= FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK
,
7356 le32_to_cpu(hbm_ecc_data
->hbm_ecc_info
));
7359 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7360 device
, ch
, wr_par
, rd_par
, ca_par
, serr
, derr
);
7362 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7363 device
, ch
, hbm_ecc_data
->first_addr
, type
,
7364 hbm_ecc_data
->sec_cont_cnt
, hbm_ecc_data
->sec_cnt
,
7365 hbm_ecc_data
->dec_cnt
);
7369 if (hdev
->asic_prop
.fw_security_enabled
) {
7370 dev_info(hdev
->dev
, "Cannot access MC regs for ECC data while security is enabled\n");
7374 base
= GAUDI_HBM_CFG_BASE
+ device
* GAUDI_HBM_CFG_OFFSET
;
7375 for (ch
= 0 ; ch
< GAUDI_HBM_CHANNELS
; ch
++) {
7376 val
= RREG32_MASK(base
+ ch
* 0x1000 + 0x06C, 0x0000FFFF);
7377 val
= (val
& 0xFF) | ((val
>> 8) & 0xFF);
7381 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7382 device
, ch
* 2, val
& 0x1, (val
>> 1) & 0x1,
7383 (val
>> 2) & 0x1, (val
>> 3) & 0x1,
7386 val2
= RREG32(base
+ ch
* 0x1000 + 0x060);
7388 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7390 RREG32(base
+ ch
* 0x1000 + 0x064),
7391 (val2
& 0x200) >> 9, (val2
& 0xFC00) >> 10,
7392 (val2
& 0xFF0000) >> 16,
7393 (val2
& 0xFF000000) >> 24);
7396 val
= RREG32_MASK(base
+ ch
* 0x1000 + 0x07C, 0x0000FFFF);
7397 val
= (val
& 0xFF) | ((val
>> 8) & 0xFF);
7401 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7402 device
, ch
* 2 + 1, val
& 0x1, (val
>> 1) & 0x1,
7403 (val
>> 2) & 0x1, (val
>> 3) & 0x1,
7406 val2
= RREG32(base
+ ch
* 0x1000 + 0x070);
7408 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7410 RREG32(base
+ ch
* 0x1000 + 0x074),
7411 (val2
& 0x200) >> 9, (val2
& 0xFC00) >> 10,
7412 (val2
& 0xFF0000) >> 16,
7413 (val2
& 0xFF000000) >> 24);
7416 /* Clear interrupts */
7417 RMWREG32(base
+ (ch
* 0x1000) + 0x060, 0x1C8, 0x1FF);
7418 RMWREG32(base
+ (ch
* 0x1000) + 0x070, 0x1C8, 0x1FF);
7419 WREG32(base
+ (ch
* 0x1000) + 0x06C, 0x1F1F);
7420 WREG32(base
+ (ch
* 0x1000) + 0x07C, 0x1F1F);
7421 RMWREG32(base
+ (ch
* 0x1000) + 0x060, 0x0, 0xF);
7422 RMWREG32(base
+ (ch
* 0x1000) + 0x070, 0x0, 0xF);
7425 val
= RREG32(base
+ 0x8F30);
7426 val2
= RREG32(base
+ 0x8F34);
7430 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7433 val
= RREG32(base
+ 0x8F40);
7434 val2
= RREG32(base
+ 0x8F44);
7438 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7445 static int gaudi_hbm_event_to_dev(u16 hbm_event_type
)
7447 switch (hbm_event_type
) {
7448 case GAUDI_EVENT_HBM0_SPI_0
:
7449 case GAUDI_EVENT_HBM0_SPI_1
:
7451 case GAUDI_EVENT_HBM1_SPI_0
:
7452 case GAUDI_EVENT_HBM1_SPI_1
:
7454 case GAUDI_EVENT_HBM2_SPI_0
:
7455 case GAUDI_EVENT_HBM2_SPI_1
:
7457 case GAUDI_EVENT_HBM3_SPI_0
:
7458 case GAUDI_EVENT_HBM3_SPI_1
:
7464 /* Should never happen */
7468 static bool gaudi_tpc_read_interrupts(struct hl_device
*hdev
, u8 tpc_id
,
7469 char *interrupt_name
)
7471 u32 tpc_offset
= tpc_id
* TPC_CFG_OFFSET
, tpc_interrupts_cause
, i
;
7472 bool soft_reset_required
= false;
7474 tpc_interrupts_cause
= RREG32(mmTPC0_CFG_TPC_INTR_CAUSE
+ tpc_offset
) &
7475 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK
;
7477 for (i
= 0 ; i
< GAUDI_NUM_OF_TPC_INTR_CAUSE
; i
++)
7478 if (tpc_interrupts_cause
& BIT(i
)) {
7479 dev_err_ratelimited(hdev
->dev
,
7480 "TPC%d_%s interrupt cause: %s\n",
7481 tpc_id
, interrupt_name
,
7482 gaudi_tpc_interrupts_cause
[i
]);
7483 /* If this is QM error, we need to soft-reset */
7485 soft_reset_required
= true;
7488 /* Clear interrupts */
7489 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE
+ tpc_offset
, 0);
7491 return soft_reset_required
;
7494 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type
)
7496 return (tpc_dec_event_type
- GAUDI_EVENT_TPC0_DEC
) >> 1;
7499 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type
)
7501 return (tpc_dec_event_type
- GAUDI_EVENT_TPC0_KRN_ERR
) / 6;
7504 static void gaudi_print_clk_change_info(struct hl_device
*hdev
, u16 event_type
, u64
*event_mask
)
7506 ktime_t zero_time
= ktime_set(0, 0);
7508 mutex_lock(&hdev
->clk_throttling
.lock
);
7510 switch (event_type
) {
7511 case GAUDI_EVENT_FIX_POWER_ENV_S
:
7512 hdev
->clk_throttling
.current_reason
|= HL_CLK_THROTTLE_POWER
;
7513 hdev
->clk_throttling
.aggregated_reason
|= HL_CLK_THROTTLE_POWER
;
7514 hdev
->clk_throttling
.timestamp
[HL_CLK_THROTTLE_TYPE_POWER
].start
= ktime_get();
7515 hdev
->clk_throttling
.timestamp
[HL_CLK_THROTTLE_TYPE_POWER
].end
= zero_time
;
7516 dev_info_ratelimited(hdev
->dev
,
7517 "Clock throttling due to power consumption\n");
7520 case GAUDI_EVENT_FIX_POWER_ENV_E
:
7521 hdev
->clk_throttling
.current_reason
&= ~HL_CLK_THROTTLE_POWER
;
7522 hdev
->clk_throttling
.timestamp
[HL_CLK_THROTTLE_TYPE_POWER
].end
= ktime_get();
7523 dev_info_ratelimited(hdev
->dev
,
7524 "Power envelop is safe, back to optimal clock\n");
7527 case GAUDI_EVENT_FIX_THERMAL_ENV_S
:
7528 hdev
->clk_throttling
.current_reason
|= HL_CLK_THROTTLE_THERMAL
;
7529 hdev
->clk_throttling
.aggregated_reason
|= HL_CLK_THROTTLE_THERMAL
;
7530 hdev
->clk_throttling
.timestamp
[HL_CLK_THROTTLE_TYPE_THERMAL
].start
= ktime_get();
7531 hdev
->clk_throttling
.timestamp
[HL_CLK_THROTTLE_TYPE_THERMAL
].end
= zero_time
;
7532 *event_mask
|= HL_NOTIFIER_EVENT_USER_ENGINE_ERR
;
7533 dev_info_ratelimited(hdev
->dev
,
7534 "Clock throttling due to overheating\n");
7537 case GAUDI_EVENT_FIX_THERMAL_ENV_E
:
7538 hdev
->clk_throttling
.current_reason
&= ~HL_CLK_THROTTLE_THERMAL
;
7539 hdev
->clk_throttling
.timestamp
[HL_CLK_THROTTLE_TYPE_THERMAL
].end
= ktime_get();
7540 *event_mask
|= HL_NOTIFIER_EVENT_USER_ENGINE_ERR
;
7541 dev_info_ratelimited(hdev
->dev
,
7542 "Thermal envelop is safe, back to optimal clock\n");
7546 dev_err(hdev
->dev
, "Received invalid clock change event %d\n",
7551 mutex_unlock(&hdev
->clk_throttling
.lock
);
7554 static void gaudi_handle_eqe(struct hl_device
*hdev
, struct hl_eq_entry
*eq_entry
)
7556 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
7557 struct hl_info_fw_err_info fw_err_info
;
7558 u64 data
= le64_to_cpu(eq_entry
->data
[0]), event_mask
= 0;
7559 u32 ctl
= le32_to_cpu(eq_entry
->hdr
.ctl
);
7560 u32 fw_fatal_err_flag
= 0, flags
= 0;
7561 u16 event_type
= ((ctl
& EQ_CTL_EVENT_TYPE_MASK
)
7562 >> EQ_CTL_EVENT_TYPE_SHIFT
);
7563 bool reset_required
, reset_direct
= false;
7567 if (event_type
>= GAUDI_EVENT_SIZE
) {
7568 dev_err(hdev
->dev
, "Event type %u exceeds maximum of %u",
7569 event_type
, GAUDI_EVENT_SIZE
- 1);
7573 gaudi
->events_stat
[event_type
]++;
7574 gaudi
->events_stat_aggregate
[event_type
]++;
7576 switch (event_type
) {
7577 case GAUDI_EVENT_PCIE_CORE_DERR
:
7578 case GAUDI_EVENT_PCIE_IF_DERR
:
7579 case GAUDI_EVENT_PCIE_PHY_DERR
:
7580 case GAUDI_EVENT_TPC0_DERR
... GAUDI_EVENT_TPC7_DERR
:
7581 case GAUDI_EVENT_MME0_ACC_DERR
:
7582 case GAUDI_EVENT_MME0_SBAB_DERR
:
7583 case GAUDI_EVENT_MME1_ACC_DERR
:
7584 case GAUDI_EVENT_MME1_SBAB_DERR
:
7585 case GAUDI_EVENT_MME2_ACC_DERR
:
7586 case GAUDI_EVENT_MME2_SBAB_DERR
:
7587 case GAUDI_EVENT_MME3_ACC_DERR
:
7588 case GAUDI_EVENT_MME3_SBAB_DERR
:
7589 case GAUDI_EVENT_DMA0_DERR_ECC
... GAUDI_EVENT_DMA7_DERR_ECC
:
7591 case GAUDI_EVENT_CPU_IF_ECC_DERR
:
7592 case GAUDI_EVENT_PSOC_MEM_DERR
:
7593 case GAUDI_EVENT_PSOC_CORESIGHT_DERR
:
7594 case GAUDI_EVENT_SRAM0_DERR
... GAUDI_EVENT_SRAM28_DERR
:
7595 case GAUDI_EVENT_NIC0_DERR
... GAUDI_EVENT_NIC4_DERR
:
7596 case GAUDI_EVENT_DMA_IF0_DERR
... GAUDI_EVENT_DMA_IF3_DERR
:
7597 case GAUDI_EVENT_HBM_0_DERR
... GAUDI_EVENT_HBM_3_DERR
:
7598 case GAUDI_EVENT_MMU_DERR
:
7599 case GAUDI_EVENT_NIC0_CS_DBG_DERR
... GAUDI_EVENT_NIC4_CS_DBG_DERR
:
7600 gaudi_print_irq_info(hdev
, event_type
, true, &event_mask
);
7601 gaudi_handle_ecc_event(hdev
, event_type
, &eq_entry
->ecc_data
);
7602 event_mask
|= HL_NOTIFIER_EVENT_GENERAL_HW_ERR
;
7603 fw_fatal_err_flag
= HL_DRV_RESET_FW_FATAL_ERR
;
7606 case GAUDI_EVENT_GIC500
:
7607 case GAUDI_EVENT_AXI_ECC
:
7608 case GAUDI_EVENT_L2_RAM_ECC
:
7609 case GAUDI_EVENT_PLL0
... GAUDI_EVENT_PLL17
:
7610 gaudi_print_irq_info(hdev
, event_type
, false, &event_mask
);
7611 fw_fatal_err_flag
= HL_DRV_RESET_FW_FATAL_ERR
;
7612 event_mask
|= HL_NOTIFIER_EVENT_GENERAL_HW_ERR
;
7615 case GAUDI_EVENT_HBM0_SPI_0
:
7616 case GAUDI_EVENT_HBM1_SPI_0
:
7617 case GAUDI_EVENT_HBM2_SPI_0
:
7618 case GAUDI_EVENT_HBM3_SPI_0
:
7619 gaudi_print_irq_info(hdev
, event_type
, false, &event_mask
);
7620 gaudi_hbm_read_interrupts(hdev
,
7621 gaudi_hbm_event_to_dev(event_type
),
7622 &eq_entry
->hbm_ecc_data
);
7623 fw_fatal_err_flag
= HL_DRV_RESET_FW_FATAL_ERR
;
7624 event_mask
|= HL_NOTIFIER_EVENT_GENERAL_HW_ERR
;
7627 case GAUDI_EVENT_HBM0_SPI_1
:
7628 case GAUDI_EVENT_HBM1_SPI_1
:
7629 case GAUDI_EVENT_HBM2_SPI_1
:
7630 case GAUDI_EVENT_HBM3_SPI_1
:
7631 gaudi_print_irq_info(hdev
, event_type
, false, &event_mask
);
7632 gaudi_hbm_read_interrupts(hdev
,
7633 gaudi_hbm_event_to_dev(event_type
),
7634 &eq_entry
->hbm_ecc_data
);
7635 hl_fw_unmask_irq(hdev
, event_type
);
7636 event_mask
|= HL_NOTIFIER_EVENT_GENERAL_HW_ERR
;
7639 case GAUDI_EVENT_TPC0_DEC
:
7640 case GAUDI_EVENT_TPC1_DEC
:
7641 case GAUDI_EVENT_TPC2_DEC
:
7642 case GAUDI_EVENT_TPC3_DEC
:
7643 case GAUDI_EVENT_TPC4_DEC
:
7644 case GAUDI_EVENT_TPC5_DEC
:
7645 case GAUDI_EVENT_TPC6_DEC
:
7646 case GAUDI_EVENT_TPC7_DEC
:
7647 /* In TPC DEC event, notify on TPC assertion. While there isn't
7648 * a specific event for assertion yet, the FW generates TPC DEC event.
7649 * The SW upper layer will inspect an internal mapped area to indicate
7650 * if the event is a TPC Assertion or a "real" TPC DEC.
7652 event_mask
|= HL_NOTIFIER_EVENT_TPC_ASSERT
;
7653 gaudi_print_irq_info(hdev
, event_type
, true, &event_mask
);
7654 reset_required
= gaudi_tpc_read_interrupts(hdev
,
7655 tpc_dec_event_to_tpc_id(event_type
),
7656 "AXI_SLV_DEC_Error");
7657 event_mask
|= HL_NOTIFIER_EVENT_USER_ENGINE_ERR
;
7658 if (reset_required
) {
7659 dev_err(hdev
->dev
, "reset required due to %s\n",
7660 gaudi_irq_map_table
[event_type
].name
);
7662 reset_direct
= true;
7665 hl_fw_unmask_irq(hdev
, event_type
);
7666 event_mask
|= HL_NOTIFIER_EVENT_DEVICE_RESET
;
7670 case GAUDI_EVENT_TPC0_KRN_ERR
:
7671 case GAUDI_EVENT_TPC1_KRN_ERR
:
7672 case GAUDI_EVENT_TPC2_KRN_ERR
:
7673 case GAUDI_EVENT_TPC3_KRN_ERR
:
7674 case GAUDI_EVENT_TPC4_KRN_ERR
:
7675 case GAUDI_EVENT_TPC5_KRN_ERR
:
7676 case GAUDI_EVENT_TPC6_KRN_ERR
:
7677 case GAUDI_EVENT_TPC7_KRN_ERR
:
7678 gaudi_print_irq_info(hdev
, event_type
, true, &event_mask
);
7679 reset_required
= gaudi_tpc_read_interrupts(hdev
,
7680 tpc_krn_event_to_tpc_id(event_type
),
7682 event_mask
|= HL_NOTIFIER_EVENT_USER_ENGINE_ERR
;
7683 if (reset_required
) {
7684 dev_err(hdev
->dev
, "reset required due to %s\n",
7685 gaudi_irq_map_table
[event_type
].name
);
7687 reset_direct
= true;
7690 hl_fw_unmask_irq(hdev
, event_type
);
7691 event_mask
|= HL_NOTIFIER_EVENT_DEVICE_RESET
;
7695 case GAUDI_EVENT_PCIE_CORE_SERR
:
7696 case GAUDI_EVENT_PCIE_IF_SERR
:
7697 case GAUDI_EVENT_PCIE_PHY_SERR
:
7698 case GAUDI_EVENT_TPC0_SERR
... GAUDI_EVENT_TPC7_SERR
:
7699 case GAUDI_EVENT_MME0_ACC_SERR
:
7700 case GAUDI_EVENT_MME0_SBAB_SERR
:
7701 case GAUDI_EVENT_MME1_ACC_SERR
:
7702 case GAUDI_EVENT_MME1_SBAB_SERR
:
7703 case GAUDI_EVENT_MME2_ACC_SERR
:
7704 case GAUDI_EVENT_MME2_SBAB_SERR
:
7705 case GAUDI_EVENT_MME3_ACC_SERR
:
7706 case GAUDI_EVENT_MME3_SBAB_SERR
:
7707 case GAUDI_EVENT_DMA0_SERR_ECC
... GAUDI_EVENT_DMA7_SERR_ECC
:
7708 case GAUDI_EVENT_CPU_IF_ECC_SERR
:
7709 case GAUDI_EVENT_PSOC_MEM_SERR
:
7710 case GAUDI_EVENT_PSOC_CORESIGHT_SERR
:
7711 case GAUDI_EVENT_SRAM0_SERR
... GAUDI_EVENT_SRAM28_SERR
:
7712 case GAUDI_EVENT_NIC0_SERR
... GAUDI_EVENT_NIC4_SERR
:
7713 case GAUDI_EVENT_DMA_IF0_SERR
... GAUDI_EVENT_DMA_IF3_SERR
:
7714 case GAUDI_EVENT_HBM_0_SERR
... GAUDI_EVENT_HBM_3_SERR
:
7716 case GAUDI_EVENT_MMU_SERR
:
7717 gaudi_print_irq_info(hdev
, event_type
, true, &event_mask
);
7718 gaudi_handle_ecc_event(hdev
, event_type
, &eq_entry
->ecc_data
);
7719 hl_fw_unmask_irq(hdev
, event_type
);
7720 event_mask
|= HL_NOTIFIER_EVENT_GENERAL_HW_ERR
;
7723 case GAUDI_EVENT_PCIE_DEC
:
7724 case GAUDI_EVENT_CPU_AXI_SPLITTER
:
7725 case GAUDI_EVENT_PSOC_AXI_DEC
:
7726 case GAUDI_EVENT_PSOC_PRSTN_FALL
:
7727 gaudi_print_irq_info(hdev
, event_type
, true, &event_mask
);
7728 hl_fw_unmask_irq(hdev
, event_type
);
7729 event_mask
|= HL_NOTIFIER_EVENT_GENERAL_HW_ERR
;
7732 case GAUDI_EVENT_MMU_PAGE_FAULT
:
7733 case GAUDI_EVENT_MMU_WR_PERM
:
7734 gaudi_print_irq_info(hdev
, event_type
, true, &event_mask
);
7735 hl_fw_unmask_irq(hdev
, event_type
);
7736 event_mask
|= HL_NOTIFIER_EVENT_USER_ENGINE_ERR
;
7739 case GAUDI_EVENT_MME0_WBC_RSP
:
7740 case GAUDI_EVENT_MME0_SBAB0_RSP
:
7741 case GAUDI_EVENT_MME1_WBC_RSP
:
7742 case GAUDI_EVENT_MME1_SBAB0_RSP
:
7743 case GAUDI_EVENT_MME2_WBC_RSP
:
7744 case GAUDI_EVENT_MME2_SBAB0_RSP
:
7745 case GAUDI_EVENT_MME3_WBC_RSP
:
7746 case GAUDI_EVENT_MME3_SBAB0_RSP
:
7747 case GAUDI_EVENT_RAZWI_OR_ADC
:
7748 case GAUDI_EVENT_MME0_QM
... GAUDI_EVENT_MME2_QM
:
7749 case GAUDI_EVENT_DMA0_QM
... GAUDI_EVENT_DMA7_QM
:
7751 case GAUDI_EVENT_NIC0_QM0
:
7752 case GAUDI_EVENT_NIC0_QM1
:
7753 case GAUDI_EVENT_NIC1_QM0
:
7754 case GAUDI_EVENT_NIC1_QM1
:
7755 case GAUDI_EVENT_NIC2_QM0
:
7756 case GAUDI_EVENT_NIC2_QM1
:
7757 case GAUDI_EVENT_NIC3_QM0
:
7758 case GAUDI_EVENT_NIC3_QM1
:
7759 case GAUDI_EVENT_NIC4_QM0
:
7760 case GAUDI_EVENT_NIC4_QM1
:
7761 case GAUDI_EVENT_DMA0_CORE
... GAUDI_EVENT_DMA7_CORE
:
7762 case GAUDI_EVENT_TPC0_QM
... GAUDI_EVENT_TPC7_QM
:
7763 gaudi_print_irq_info(hdev
, event_type
, true, &event_mask
);
7764 gaudi_handle_qman_err(hdev
, event_type
, &event_mask
);
7765 hl_fw_unmask_irq(hdev
, event_type
);
7766 event_mask
|= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR
| HL_NOTIFIER_EVENT_DEVICE_RESET
);
7769 case GAUDI_EVENT_RAZWI_OR_ADC_SW
:
7770 gaudi_print_irq_info(hdev
, event_type
, true, &event_mask
);
7771 event_mask
|= HL_NOTIFIER_EVENT_USER_ENGINE_ERR
;
7774 case GAUDI_EVENT_TPC0_BMON_SPMU
:
7775 case GAUDI_EVENT_TPC1_BMON_SPMU
:
7776 case GAUDI_EVENT_TPC2_BMON_SPMU
:
7777 case GAUDI_EVENT_TPC3_BMON_SPMU
:
7778 case GAUDI_EVENT_TPC4_BMON_SPMU
:
7779 case GAUDI_EVENT_TPC5_BMON_SPMU
:
7780 case GAUDI_EVENT_TPC6_BMON_SPMU
:
7781 case GAUDI_EVENT_TPC7_BMON_SPMU
:
7782 case GAUDI_EVENT_DMA_BM_CH0
... GAUDI_EVENT_DMA_BM_CH7
:
7783 gaudi_print_irq_info(hdev
, event_type
, false, &event_mask
);
7784 hl_fw_unmask_irq(hdev
, event_type
);
7785 event_mask
|= HL_NOTIFIER_EVENT_USER_ENGINE_ERR
;
7788 case GAUDI_EVENT_NIC_SEI_0
... GAUDI_EVENT_NIC_SEI_4
:
7789 gaudi_print_nic_axi_irq_info(hdev
, event_type
, &data
);
7790 hl_fw_unmask_irq(hdev
, event_type
);
7791 event_mask
|= HL_NOTIFIER_EVENT_USER_ENGINE_ERR
;
7794 case GAUDI_EVENT_DMA_IF_SEI_0
... GAUDI_EVENT_DMA_IF_SEI_3
:
7795 gaudi_print_irq_info(hdev
, event_type
, false, &event_mask
);
7796 gaudi_print_sm_sei_info(hdev
, event_type
,
7797 &eq_entry
->sm_sei_data
);
7798 rc
= hl_state_dump(hdev
);
7799 event_mask
|= HL_NOTIFIER_EVENT_USER_ENGINE_ERR
;
7802 "Error during system state dump %d\n", rc
);
7803 hl_fw_unmask_irq(hdev
, event_type
);
7806 case GAUDI_EVENT_STATUS_NIC0_ENG0
... GAUDI_EVENT_STATUS_NIC4_ENG1
:
7809 case GAUDI_EVENT_FIX_POWER_ENV_S
... GAUDI_EVENT_FIX_THERMAL_ENV_E
:
7810 gaudi_print_clk_change_info(hdev
, event_type
, &event_mask
);
7811 hl_fw_unmask_irq(hdev
, event_type
);
7814 case GAUDI_EVENT_PSOC_GPIO_U16_0
:
7815 cause
= le64_to_cpu(eq_entry
->data
[0]) & 0xFF;
7817 "Received high temp H/W interrupt %d (cause %d)\n",
7819 event_mask
|= HL_NOTIFIER_EVENT_USER_ENGINE_ERR
;
7822 case GAUDI_EVENT_DEV_RESET_REQ
:
7823 gaudi_print_irq_info(hdev
, event_type
, false, &event_mask
);
7824 event_mask
|= HL_NOTIFIER_EVENT_GENERAL_HW_ERR
;
7827 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC
:
7828 gaudi_print_irq_info(hdev
, event_type
, false, &event_mask
);
7829 gaudi_print_out_of_sync_info(hdev
, &eq_entry
->pkt_sync_err
);
7830 event_mask
|= HL_NOTIFIER_EVENT_GENERAL_HW_ERR
;
7833 case GAUDI_EVENT_FW_ALIVE_S
:
7834 gaudi_print_irq_info(hdev
, event_type
, false, &event_mask
);
7835 gaudi_print_fw_alive_info(hdev
, &eq_entry
->fw_alive
);
7836 fw_err_info
.err_type
= HL_INFO_FW_REPORTED_ERR
;
7837 fw_err_info
.event_id
= event_type
;
7838 fw_err_info
.event_mask
= &event_mask
;
7839 hl_handle_fw_err(hdev
, &fw_err_info
);
7843 dev_err(hdev
->dev
, "Received invalid H/W interrupt %d\n",
7849 hl_notifier_event_send_all(hdev
, event_mask
);
7854 reset_required
= true;
7856 if (hdev
->asic_prop
.fw_security_enabled
&& !reset_direct
) {
7857 flags
= HL_DRV_RESET_HARD
| HL_DRV_RESET_BYPASS_REQ_TO_FW
| fw_fatal_err_flag
;
7859 /* notify on device unavailable while the reset triggered by fw */
7860 event_mask
|= (HL_NOTIFIER_EVENT_DEVICE_RESET
|
7861 HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE
);
7862 } else if (hdev
->hard_reset_on_fw_events
) {
7863 flags
= HL_DRV_RESET_HARD
| HL_DRV_RESET_DELAY
| fw_fatal_err_flag
;
7864 event_mask
|= HL_NOTIFIER_EVENT_DEVICE_RESET
;
7866 reset_required
= false;
7869 if (reset_required
) {
7870 /* escalate general hw errors to critical/fatal error */
7871 if (event_mask
& HL_NOTIFIER_EVENT_GENERAL_HW_ERR
)
7872 hl_handle_critical_hw_err(hdev
, event_type
, &event_mask
);
7874 hl_device_cond_reset(hdev
, flags
, event_mask
);
7876 hl_fw_unmask_irq(hdev
, event_type
);
7877 /* Notification on occurred event needs to be sent although reset is not executed */
7879 hl_notifier_event_send_all(hdev
, event_mask
);
7883 static void *gaudi_get_events_stat(struct hl_device
*hdev
, bool aggregate
, u32
*size
)
7885 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
7888 *size
= (u32
) sizeof(gaudi
->events_stat_aggregate
);
7889 return gaudi
->events_stat_aggregate
;
7892 *size
= (u32
) sizeof(gaudi
->events_stat
);
7893 return gaudi
->events_stat
;
7896 static int gaudi_mmu_invalidate_cache(struct hl_device
*hdev
, bool is_hard
, u32 flags
)
7898 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
7899 u32 status
, timeout_usec
;
7902 if (!(gaudi
->hw_cap_initialized
& HW_CAP_MMU
) ||
7903 hdev
->reset_info
.hard_reset_pending
)
7907 timeout_usec
= GAUDI_PLDM_MMU_TIMEOUT_USEC
;
7909 timeout_usec
= MMU_CONFIG_TIMEOUT_USEC
;
7911 /* L0 & L1 invalidation */
7912 WREG32(mmSTLB_INV_PS
, 3);
7913 WREG32(mmSTLB_CACHE_INV
, gaudi
->mmu_cache_inv_pi
++);
7914 WREG32(mmSTLB_INV_PS
, 2);
7916 rc
= hl_poll_timeout(
7924 WREG32(mmSTLB_INV_SET
, 0);
7929 static int gaudi_mmu_invalidate_cache_range(struct hl_device
*hdev
,
7930 bool is_hard
, u32 flags
,
7931 u32 asid
, u64 va
, u64 size
)
7933 /* Treat as invalidate all because there is no range invalidation
7936 return hdev
->asic_funcs
->mmu_invalidate_cache(hdev
, is_hard
, flags
);
7939 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device
*hdev
, u32 asid
, u64 phys_addr
)
7941 u32 status
, timeout_usec
;
7945 timeout_usec
= GAUDI_PLDM_MMU_TIMEOUT_USEC
;
7947 timeout_usec
= MMU_CONFIG_TIMEOUT_USEC
;
7949 WREG32(MMU_ASID
, asid
);
7950 WREG32(MMU_HOP0_PA43_12
, phys_addr
>> MMU_HOP0_PA43_12_SHIFT
);
7951 WREG32(MMU_HOP0_PA49_44
, phys_addr
>> MMU_HOP0_PA49_44_SHIFT
);
7952 WREG32(MMU_BUSY
, 0x80000000);
7954 rc
= hl_poll_timeout(
7958 !(status
& 0x80000000),
7964 "Timeout during MMU hop0 config of asid %d\n", asid
);
7971 static int gaudi_send_heartbeat(struct hl_device
*hdev
)
7973 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
7975 if (!(gaudi
->hw_cap_initialized
& HW_CAP_CPU_Q
))
7978 return hl_fw_send_heartbeat(hdev
);
7981 static int gaudi_cpucp_info_get(struct hl_device
*hdev
)
7983 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
7984 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
7987 if (!(gaudi
->hw_cap_initialized
& HW_CAP_CPU_Q
))
7990 rc
= hl_fw_cpucp_handshake(hdev
, mmCPU_BOOT_DEV_STS0
,
7991 mmCPU_BOOT_DEV_STS1
, mmCPU_BOOT_ERR0
,
7996 if (!strlen(prop
->cpucp_info
.card_name
))
7997 strscpy_pad(prop
->cpucp_info
.card_name
, GAUDI_DEFAULT_CARD_NAME
,
8000 hdev
->card_type
= le32_to_cpu(hdev
->asic_prop
.cpucp_info
.card_type
);
8002 set_default_power_values(hdev
);
8007 static bool gaudi_is_device_idle(struct hl_device
*hdev
, u64
*mask_arr
, u8 mask_len
,
8008 struct engines_data
*e
)
8010 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
8011 const char *fmt
= "%-5d%-9s%#-14x%#-12x%#x\n";
8012 const char *mme_slave_fmt
= "%-5d%-9s%-14s%-12s%#x\n";
8013 const char *nic_fmt
= "%-5d%-9s%#-14x%#x\n";
8014 unsigned long *mask
= (unsigned long *)mask_arr
;
8015 u32 qm_glbl_sts0
, qm_cgm_sts
, dma_core_sts0
, tpc_cfg_sts
, mme_arch_sts
;
8016 bool is_idle
= true, is_eng_idle
, is_slave
;
8018 int i
, dma_id
, port
;
8021 hl_engine_data_sprintf(e
,
8022 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
8023 "--- ------- ------------ ---------- -------------\n");
8025 for (i
= 0 ; i
< DMA_NUMBER_OF_CHNLS
; i
++) {
8026 dma_id
= gaudi_dma_assignment
[i
];
8027 offset
= dma_id
* DMA_QMAN_OFFSET
;
8029 qm_glbl_sts0
= RREG32(mmDMA0_QM_GLBL_STS0
+ offset
);
8030 qm_cgm_sts
= RREG32(mmDMA0_QM_CGM_STS
+ offset
);
8031 dma_core_sts0
= RREG32(mmDMA0_CORE_STS0
+ offset
);
8032 is_eng_idle
= IS_QM_IDLE(qm_glbl_sts0
, qm_cgm_sts
) &&
8033 IS_DMA_IDLE(dma_core_sts0
);
8034 is_idle
&= is_eng_idle
;
8036 if (mask
&& !is_eng_idle
)
8037 set_bit(GAUDI_ENGINE_ID_DMA_0
+ dma_id
, mask
);
8039 hl_engine_data_sprintf(e
, fmt
, dma_id
,
8040 is_eng_idle
? "Y" : "N", qm_glbl_sts0
,
8041 qm_cgm_sts
, dma_core_sts0
);
8045 hl_engine_data_sprintf(e
,
8046 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
8047 "--- ------- ------------ ---------- ----------\n");
8049 for (i
= 0 ; i
< TPC_NUMBER_OF_ENGINES
; i
++) {
8050 offset
= i
* TPC_QMAN_OFFSET
;
8051 qm_glbl_sts0
= RREG32(mmTPC0_QM_GLBL_STS0
+ offset
);
8052 qm_cgm_sts
= RREG32(mmTPC0_QM_CGM_STS
+ offset
);
8053 tpc_cfg_sts
= RREG32(mmTPC0_CFG_STATUS
+ offset
);
8054 is_eng_idle
= IS_QM_IDLE(qm_glbl_sts0
, qm_cgm_sts
) &&
8055 IS_TPC_IDLE(tpc_cfg_sts
);
8056 is_idle
&= is_eng_idle
;
8058 if (mask
&& !is_eng_idle
)
8059 set_bit(GAUDI_ENGINE_ID_TPC_0
+ i
, mask
);
8061 hl_engine_data_sprintf(e
, fmt
, i
,
8062 is_eng_idle
? "Y" : "N",
8063 qm_glbl_sts0
, qm_cgm_sts
, tpc_cfg_sts
);
8067 hl_engine_data_sprintf(e
,
8068 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
8069 "--- ------- ------------ ---------- -----------\n");
8071 for (i
= 0 ; i
< MME_NUMBER_OF_ENGINES
; i
++) {
8072 offset
= i
* MME_QMAN_OFFSET
;
8073 mme_arch_sts
= RREG32(mmMME0_CTRL_ARCH_STATUS
+ offset
);
8074 is_eng_idle
= IS_MME_IDLE(mme_arch_sts
);
8076 /* MME 1 & 3 are slaves, no need to check their QMANs */
8079 qm_glbl_sts0
= RREG32(mmMME0_QM_GLBL_STS0
+ offset
);
8080 qm_cgm_sts
= RREG32(mmMME0_QM_CGM_STS
+ offset
);
8081 is_eng_idle
&= IS_QM_IDLE(qm_glbl_sts0
, qm_cgm_sts
);
8084 is_idle
&= is_eng_idle
;
8086 if (mask
&& !is_eng_idle
)
8087 set_bit(GAUDI_ENGINE_ID_MME_0
+ i
, mask
);
8090 hl_engine_data_sprintf(e
, fmt
, i
,
8091 is_eng_idle
? "Y" : "N",
8092 qm_glbl_sts0
, qm_cgm_sts
, mme_arch_sts
);
8094 hl_engine_data_sprintf(e
, mme_slave_fmt
, i
,
8095 is_eng_idle
? "Y" : "N", "-",
8101 hl_engine_data_sprintf(e
,
8102 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
8103 "--- ------- ------------ ----------\n");
8105 for (i
= 0 ; i
< (NIC_NUMBER_OF_ENGINES
/ 2) ; i
++) {
8106 offset
= i
* NIC_MACRO_QMAN_OFFSET
;
8108 if (gaudi
->hw_cap_initialized
& BIT(HW_CAP_NIC_SHIFT
+ port
)) {
8109 qm_glbl_sts0
= RREG32(mmNIC0_QM0_GLBL_STS0
+ offset
);
8110 qm_cgm_sts
= RREG32(mmNIC0_QM0_CGM_STS
+ offset
);
8111 is_eng_idle
= IS_QM_IDLE(qm_glbl_sts0
, qm_cgm_sts
);
8112 is_idle
&= is_eng_idle
;
8114 if (mask
&& !is_eng_idle
)
8115 set_bit(GAUDI_ENGINE_ID_NIC_0
+ port
, mask
);
8117 hl_engine_data_sprintf(e
, nic_fmt
, port
,
8118 is_eng_idle
? "Y" : "N",
8119 qm_glbl_sts0
, qm_cgm_sts
);
8123 if (gaudi
->hw_cap_initialized
& BIT(HW_CAP_NIC_SHIFT
+ port
)) {
8124 qm_glbl_sts0
= RREG32(mmNIC0_QM1_GLBL_STS0
+ offset
);
8125 qm_cgm_sts
= RREG32(mmNIC0_QM1_CGM_STS
+ offset
);
8126 is_eng_idle
= IS_QM_IDLE(qm_glbl_sts0
, qm_cgm_sts
);
8127 is_idle
&= is_eng_idle
;
8129 if (mask
&& !is_eng_idle
)
8130 set_bit(GAUDI_ENGINE_ID_NIC_0
+ port
, mask
);
8132 hl_engine_data_sprintf(e
, nic_fmt
, port
,
8133 is_eng_idle
? "Y" : "N",
8134 qm_glbl_sts0
, qm_cgm_sts
);
8139 hl_engine_data_sprintf(e
, "\n");
8144 static void gaudi_hw_queues_lock(struct hl_device
*hdev
)
8145 __acquires(&gaudi
->hw_queues_lock
)
8147 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
8149 spin_lock(&gaudi
->hw_queues_lock
);
8152 static void gaudi_hw_queues_unlock(struct hl_device
*hdev
)
8153 __releases(&gaudi
->hw_queues_lock
)
8155 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
8157 spin_unlock(&gaudi
->hw_queues_lock
);
8160 static u32
gaudi_get_pci_id(struct hl_device
*hdev
)
8162 return hdev
->pdev
->device
;
8165 static int gaudi_get_eeprom_data(struct hl_device
*hdev
, void *data
,
8168 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
8170 if (!(gaudi
->hw_cap_initialized
& HW_CAP_CPU_Q
))
8173 return hl_fw_get_eeprom_data(hdev
, data
, max_size
);
8176 static int gaudi_get_monitor_dump(struct hl_device
*hdev
, void *data
)
8178 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
8180 if (!(gaudi
->hw_cap_initialized
& HW_CAP_CPU_Q
))
8183 return hl_fw_get_monitor_dump(hdev
, data
);
8187 * this function should be used only during initialization and/or after reset,
8188 * when there are no active users.
8190 static int gaudi_run_tpc_kernel(struct hl_device
*hdev
, u64 tpc_kernel
, u32 tpc_id
)
8196 offset
= tpc_id
* (mmTPC1_CFG_STATUS
- mmTPC0_CFG_STATUS
);
8199 kernel_timeout
= GAUDI_PLDM_TPC_KERNEL_WAIT_USEC
;
8201 kernel_timeout
= HL_DEVICE_TIMEOUT_USEC
;
8203 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW
+ offset
,
8204 lower_32_bits(tpc_kernel
));
8205 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH
+ offset
,
8206 upper_32_bits(tpc_kernel
));
8208 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW
+ offset
,
8209 lower_32_bits(tpc_kernel
));
8210 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH
+ offset
,
8211 upper_32_bits(tpc_kernel
));
8212 /* set a valid LUT pointer, content is of no significance */
8213 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO
+ offset
,
8214 lower_32_bits(tpc_kernel
));
8215 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI
+ offset
,
8216 upper_32_bits(tpc_kernel
));
8218 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR
+ offset
,
8219 lower_32_bits(CFG_BASE
+
8220 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
));
8222 WREG32(mmTPC0_CFG_TPC_CMD
+ offset
,
8223 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT
|
8224 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT
));
8225 /* wait a bit for the engine to start executing */
8226 usleep_range(1000, 1500);
8228 /* wait until engine has finished executing */
8229 rc
= hl_poll_timeout(
8231 mmTPC0_CFG_STATUS
+ offset
,
8233 (status
& TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK
) ==
8234 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK
,
8240 "Timeout while waiting for TPC%d icache prefetch\n",
8245 WREG32(mmTPC0_CFG_TPC_EXECUTE
+ offset
,
8246 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT
);
8248 /* wait a bit for the engine to start executing */
8249 usleep_range(1000, 1500);
8251 /* wait until engine has finished executing */
8252 rc
= hl_poll_timeout(
8254 mmTPC0_CFG_STATUS
+ offset
,
8256 (status
& TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK
) ==
8257 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK
,
8263 "Timeout while waiting for TPC%d vector pipe\n",
8268 rc
= hl_poll_timeout(
8270 mmTPC0_CFG_WQ_INFLIGHT_CNTR
+ offset
,
8278 "Timeout while waiting for TPC%d kernel to execute\n",
8286 static int gaudi_internal_cb_pool_init(struct hl_device
*hdev
,
8289 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
8290 int min_alloc_order
, rc
, collective_cb_size
;
8292 if (!(gaudi
->hw_cap_initialized
& HW_CAP_MMU
))
8295 hdev
->internal_cb_pool_virt_addr
= hl_asic_dma_alloc_coherent(hdev
,
8296 HOST_SPACE_INTERNAL_CB_SZ
,
8297 &hdev
->internal_cb_pool_dma_addr
,
8298 GFP_KERNEL
| __GFP_ZERO
);
8300 if (!hdev
->internal_cb_pool_virt_addr
)
8303 collective_cb_size
= sizeof(struct packet_msg_short
) * 5 +
8304 sizeof(struct packet_fence
);
8305 min_alloc_order
= ilog2(collective_cb_size
);
8307 hdev
->internal_cb_pool
= gen_pool_create(min_alloc_order
, -1);
8308 if (!hdev
->internal_cb_pool
) {
8310 "Failed to create internal CB pool\n");
8312 goto free_internal_cb_pool
;
8315 rc
= gen_pool_add(hdev
->internal_cb_pool
,
8316 (uintptr_t) hdev
->internal_cb_pool_virt_addr
,
8317 HOST_SPACE_INTERNAL_CB_SZ
, -1);
8320 "Failed to add memory to internal CB pool\n");
8322 goto destroy_internal_cb_pool
;
8325 hdev
->internal_cb_va_base
= hl_reserve_va_block(hdev
, ctx
,
8326 HL_VA_RANGE_TYPE_HOST
, HOST_SPACE_INTERNAL_CB_SZ
,
8327 HL_MMU_VA_ALIGNMENT_NOT_NEEDED
);
8329 if (!hdev
->internal_cb_va_base
) {
8331 goto destroy_internal_cb_pool
;
8334 mutex_lock(&hdev
->mmu_lock
);
8336 rc
= hl_mmu_map_contiguous(ctx
, hdev
->internal_cb_va_base
,
8337 hdev
->internal_cb_pool_dma_addr
,
8338 HOST_SPACE_INTERNAL_CB_SZ
);
8340 goto unreserve_internal_cb_pool
;
8342 rc
= hl_mmu_invalidate_cache(hdev
, false, MMU_OP_USERPTR
);
8344 goto unmap_internal_cb_pool
;
8346 mutex_unlock(&hdev
->mmu_lock
);
8350 unmap_internal_cb_pool
:
8351 hl_mmu_unmap_contiguous(ctx
, hdev
->internal_cb_va_base
,
8352 HOST_SPACE_INTERNAL_CB_SZ
);
8353 unreserve_internal_cb_pool
:
8354 mutex_unlock(&hdev
->mmu_lock
);
8355 hl_unreserve_va_block(hdev
, ctx
, hdev
->internal_cb_va_base
,
8356 HOST_SPACE_INTERNAL_CB_SZ
);
8357 destroy_internal_cb_pool
:
8358 gen_pool_destroy(hdev
->internal_cb_pool
);
8359 free_internal_cb_pool
:
8360 hl_asic_dma_free_coherent(hdev
, HOST_SPACE_INTERNAL_CB_SZ
, hdev
->internal_cb_pool_virt_addr
,
8361 hdev
->internal_cb_pool_dma_addr
);
8366 static void gaudi_internal_cb_pool_fini(struct hl_device
*hdev
,
8369 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
8371 if (!(gaudi
->hw_cap_initialized
& HW_CAP_MMU
))
8374 mutex_lock(&hdev
->mmu_lock
);
8375 hl_mmu_unmap_contiguous(ctx
, hdev
->internal_cb_va_base
,
8376 HOST_SPACE_INTERNAL_CB_SZ
);
8377 hl_unreserve_va_block(hdev
, ctx
, hdev
->internal_cb_va_base
,
8378 HOST_SPACE_INTERNAL_CB_SZ
);
8379 hl_mmu_invalidate_cache(hdev
, true, MMU_OP_USERPTR
);
8380 mutex_unlock(&hdev
->mmu_lock
);
8382 gen_pool_destroy(hdev
->internal_cb_pool
);
8384 hl_asic_dma_free_coherent(hdev
, HOST_SPACE_INTERNAL_CB_SZ
, hdev
->internal_cb_pool_virt_addr
,
8385 hdev
->internal_cb_pool_dma_addr
);
8388 static int gaudi_ctx_init(struct hl_ctx
*ctx
)
8392 if (ctx
->asid
== HL_KERNEL_ASID_ID
)
8395 rc
= gaudi_internal_cb_pool_init(ctx
->hdev
, ctx
);
8399 rc
= gaudi_restore_user_registers(ctx
->hdev
);
8401 gaudi_internal_cb_pool_fini(ctx
->hdev
, ctx
);
8406 static void gaudi_ctx_fini(struct hl_ctx
*ctx
)
8408 if (ctx
->asid
== HL_KERNEL_ASID_ID
)
8411 gaudi_internal_cb_pool_fini(ctx
->hdev
, ctx
);
8414 static int gaudi_pre_schedule_cs(struct hl_cs
*cs
)
8419 static u32
gaudi_get_queue_id_for_cq(struct hl_device
*hdev
, u32 cq_idx
)
8421 return gaudi_cq_assignment
[cq_idx
];
8424 static u32
gaudi_get_signal_cb_size(struct hl_device
*hdev
)
8426 return sizeof(struct packet_msg_short
) +
8427 sizeof(struct packet_msg_prot
) * 2;
8430 static u32
gaudi_get_wait_cb_size(struct hl_device
*hdev
)
8432 return sizeof(struct packet_msg_short
) * 4 +
8433 sizeof(struct packet_fence
) +
8434 sizeof(struct packet_msg_prot
) * 2;
8437 static u32
gaudi_get_sob_addr(struct hl_device
*hdev
, u32 sob_id
)
8439 return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0
+ (sob_id
* 4);
8442 static u32
gaudi_gen_signal_cb(struct hl_device
*hdev
, void *data
, u16 sob_id
,
8445 struct hl_cb
*cb
= (struct hl_cb
*) data
;
8446 struct packet_msg_short
*pkt
;
8447 u32 value
, ctl
, pkt_size
= sizeof(*pkt
);
8449 pkt
= cb
->kernel_address
+ size
;
8450 memset(pkt
, 0, pkt_size
);
8452 /* Inc by 1, Mode ADD */
8453 value
= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK
, 1);
8454 value
|= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK
, 1);
8456 ctl
= FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK
, sob_id
* 4);
8457 ctl
|= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK
, 0); /* write the value */
8458 ctl
|= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK
, 3); /* W_S SOB base */
8459 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK
, PACKET_MSG_SHORT
);
8460 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK
, eb
);
8461 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK
, 1);
8462 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK
, 1);
8464 pkt
->value
= cpu_to_le32(value
);
8465 pkt
->ctl
= cpu_to_le32(ctl
);
8467 return size
+ pkt_size
;
8470 static u32
gaudi_add_mon_msg_short(struct packet_msg_short
*pkt
, u32 value
,
8473 u32 ctl
, pkt_size
= sizeof(*pkt
);
8475 memset(pkt
, 0, pkt_size
);
8477 ctl
= FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK
, addr
);
8478 ctl
|= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK
, 2); /* W_S MON base */
8479 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK
, PACKET_MSG_SHORT
);
8480 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK
, 0);
8481 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK
, 1);
8482 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK
, 0); /* last pkt MB */
8484 pkt
->value
= cpu_to_le32(value
);
8485 pkt
->ctl
= cpu_to_le32(ctl
);
8490 static u32
gaudi_add_arm_monitor_pkt(struct hl_device
*hdev
,
8491 struct packet_msg_short
*pkt
, u16 sob_base
, u8 sob_mask
,
8492 u16 sob_val
, u16 mon_id
)
8495 u32 ctl
, value
, pkt_size
= sizeof(*pkt
);
8496 u16 msg_addr_offset
;
8499 if (hl_gen_sob_mask(sob_base
, sob_mask
, &mask
)) {
8501 "sob_base %u (mask %#x) is not valid\n",
8502 sob_base
, sob_mask
);
8507 * monitor_base should be the content of the base0 address registers,
8508 * so it will be added to the msg short offsets
8510 monitor_base
= mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
;
8513 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0
+ mon_id
* 4) -
8516 memset(pkt
, 0, pkt_size
);
8518 /* Monitor config packet: bind the monitor to a sync object */
8519 value
= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK
, sob_base
/ 8);
8520 value
|= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK
, sob_val
);
8521 value
|= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK
,
8522 0); /* GREATER OR EQUAL*/
8523 value
|= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK
, mask
);
8525 ctl
= FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK
, msg_addr_offset
);
8526 ctl
|= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK
, 0); /* write the value */
8527 ctl
|= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK
, 2); /* W_S MON base */
8528 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK
, PACKET_MSG_SHORT
);
8529 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK
, 0);
8530 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK
, 1);
8531 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK
, 1);
8533 pkt
->value
= cpu_to_le32(value
);
8534 pkt
->ctl
= cpu_to_le32(ctl
);
8539 static u32
gaudi_add_fence_pkt(struct packet_fence
*pkt
)
8541 u32 ctl
, cfg
, pkt_size
= sizeof(*pkt
);
8543 memset(pkt
, 0, pkt_size
);
8545 cfg
= FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK
, 1);
8546 cfg
|= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK
, 1);
8547 cfg
|= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK
, 2);
8549 ctl
= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK
, PACKET_FENCE
);
8550 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK
, 0);
8551 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK
, 1);
8552 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK
, 1);
8554 pkt
->cfg
= cpu_to_le32(cfg
);
8555 pkt
->ctl
= cpu_to_le32(ctl
);
8560 static int gaudi_get_fence_addr(struct hl_device
*hdev
, u32 queue_id
, u64
*addr
)
8562 u32 offset
, nic_index
;
8565 case GAUDI_QUEUE_ID_DMA_0_0
:
8566 offset
= mmDMA0_QM_CP_FENCE2_RDATA_0
;
8568 case GAUDI_QUEUE_ID_DMA_0_1
:
8569 offset
= mmDMA0_QM_CP_FENCE2_RDATA_1
;
8571 case GAUDI_QUEUE_ID_DMA_0_2
:
8572 offset
= mmDMA0_QM_CP_FENCE2_RDATA_2
;
8574 case GAUDI_QUEUE_ID_DMA_0_3
:
8575 offset
= mmDMA0_QM_CP_FENCE2_RDATA_3
;
8577 case GAUDI_QUEUE_ID_DMA_1_0
:
8578 offset
= mmDMA1_QM_CP_FENCE2_RDATA_0
;
8580 case GAUDI_QUEUE_ID_DMA_1_1
:
8581 offset
= mmDMA1_QM_CP_FENCE2_RDATA_1
;
8583 case GAUDI_QUEUE_ID_DMA_1_2
:
8584 offset
= mmDMA1_QM_CP_FENCE2_RDATA_2
;
8586 case GAUDI_QUEUE_ID_DMA_1_3
:
8587 offset
= mmDMA1_QM_CP_FENCE2_RDATA_3
;
8589 case GAUDI_QUEUE_ID_DMA_5_0
:
8590 offset
= mmDMA5_QM_CP_FENCE2_RDATA_0
;
8592 case GAUDI_QUEUE_ID_DMA_5_1
:
8593 offset
= mmDMA5_QM_CP_FENCE2_RDATA_1
;
8595 case GAUDI_QUEUE_ID_DMA_5_2
:
8596 offset
= mmDMA5_QM_CP_FENCE2_RDATA_2
;
8598 case GAUDI_QUEUE_ID_DMA_5_3
:
8599 offset
= mmDMA5_QM_CP_FENCE2_RDATA_3
;
8601 case GAUDI_QUEUE_ID_TPC_7_0
:
8602 offset
= mmTPC7_QM_CP_FENCE2_RDATA_0
;
8604 case GAUDI_QUEUE_ID_TPC_7_1
:
8605 offset
= mmTPC7_QM_CP_FENCE2_RDATA_1
;
8607 case GAUDI_QUEUE_ID_TPC_7_2
:
8608 offset
= mmTPC7_QM_CP_FENCE2_RDATA_2
;
8610 case GAUDI_QUEUE_ID_TPC_7_3
:
8611 offset
= mmTPC7_QM_CP_FENCE2_RDATA_3
;
8613 case GAUDI_QUEUE_ID_NIC_0_0
:
8614 case GAUDI_QUEUE_ID_NIC_1_0
:
8615 case GAUDI_QUEUE_ID_NIC_2_0
:
8616 case GAUDI_QUEUE_ID_NIC_3_0
:
8617 case GAUDI_QUEUE_ID_NIC_4_0
:
8618 case GAUDI_QUEUE_ID_NIC_5_0
:
8619 case GAUDI_QUEUE_ID_NIC_6_0
:
8620 case GAUDI_QUEUE_ID_NIC_7_0
:
8621 case GAUDI_QUEUE_ID_NIC_8_0
:
8622 case GAUDI_QUEUE_ID_NIC_9_0
:
8623 nic_index
= (queue_id
- GAUDI_QUEUE_ID_NIC_0_0
) >> 2;
8624 offset
= mmNIC0_QM0_CP_FENCE2_RDATA_0
+
8625 (nic_index
>> 1) * NIC_MACRO_QMAN_OFFSET
+
8626 (nic_index
& 0x1) * NIC_ENGINE_QMAN_OFFSET
;
8628 case GAUDI_QUEUE_ID_NIC_0_1
:
8629 case GAUDI_QUEUE_ID_NIC_1_1
:
8630 case GAUDI_QUEUE_ID_NIC_2_1
:
8631 case GAUDI_QUEUE_ID_NIC_3_1
:
8632 case GAUDI_QUEUE_ID_NIC_4_1
:
8633 case GAUDI_QUEUE_ID_NIC_5_1
:
8634 case GAUDI_QUEUE_ID_NIC_6_1
:
8635 case GAUDI_QUEUE_ID_NIC_7_1
:
8636 case GAUDI_QUEUE_ID_NIC_8_1
:
8637 case GAUDI_QUEUE_ID_NIC_9_1
:
8638 nic_index
= (queue_id
- GAUDI_QUEUE_ID_NIC_0_1
) >> 2;
8639 offset
= mmNIC0_QM0_CP_FENCE2_RDATA_1
+
8640 (nic_index
>> 1) * NIC_MACRO_QMAN_OFFSET
+
8641 (nic_index
& 0x1) * NIC_ENGINE_QMAN_OFFSET
;
8643 case GAUDI_QUEUE_ID_NIC_0_2
:
8644 case GAUDI_QUEUE_ID_NIC_1_2
:
8645 case GAUDI_QUEUE_ID_NIC_2_2
:
8646 case GAUDI_QUEUE_ID_NIC_3_2
:
8647 case GAUDI_QUEUE_ID_NIC_4_2
:
8648 case GAUDI_QUEUE_ID_NIC_5_2
:
8649 case GAUDI_QUEUE_ID_NIC_6_2
:
8650 case GAUDI_QUEUE_ID_NIC_7_2
:
8651 case GAUDI_QUEUE_ID_NIC_8_2
:
8652 case GAUDI_QUEUE_ID_NIC_9_2
:
8653 nic_index
= (queue_id
- GAUDI_QUEUE_ID_NIC_0_2
) >> 2;
8654 offset
= mmNIC0_QM0_CP_FENCE2_RDATA_2
+
8655 (nic_index
>> 1) * NIC_MACRO_QMAN_OFFSET
+
8656 (nic_index
& 0x1) * NIC_ENGINE_QMAN_OFFSET
;
8658 case GAUDI_QUEUE_ID_NIC_0_3
:
8659 case GAUDI_QUEUE_ID_NIC_1_3
:
8660 case GAUDI_QUEUE_ID_NIC_2_3
:
8661 case GAUDI_QUEUE_ID_NIC_3_3
:
8662 case GAUDI_QUEUE_ID_NIC_4_3
:
8663 case GAUDI_QUEUE_ID_NIC_5_3
:
8664 case GAUDI_QUEUE_ID_NIC_6_3
:
8665 case GAUDI_QUEUE_ID_NIC_7_3
:
8666 case GAUDI_QUEUE_ID_NIC_8_3
:
8667 case GAUDI_QUEUE_ID_NIC_9_3
:
8668 nic_index
= (queue_id
- GAUDI_QUEUE_ID_NIC_0_3
) >> 2;
8669 offset
= mmNIC0_QM0_CP_FENCE2_RDATA_3
+
8670 (nic_index
>> 1) * NIC_MACRO_QMAN_OFFSET
+
8671 (nic_index
& 0x1) * NIC_ENGINE_QMAN_OFFSET
;
8677 *addr
= CFG_BASE
+ offset
;
8682 static u32
gaudi_add_mon_pkts(void *buf
, u16 mon_id
, u64 fence_addr
)
8686 u16 msg_addr_offset
;
8689 * monitor_base should be the content of the base0 address registers,
8690 * so it will be added to the msg short offsets
8692 monitor_base
= mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
;
8694 /* First monitor config packet: low address of the sync */
8696 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
+ mon_id
* 4) -
8699 size
+= gaudi_add_mon_msg_short(buf
+ size
, (u32
) fence_addr
,
8702 /* Second monitor config packet: high address of the sync */
8704 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0
+ mon_id
* 4) -
8707 size
+= gaudi_add_mon_msg_short(buf
+ size
, (u32
) (fence_addr
>> 32),
8711 * Third monitor config packet: the payload, i.e. what to write when the
8715 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0
+ mon_id
* 4) -
8718 size
+= gaudi_add_mon_msg_short(buf
+ size
, 1, msg_addr_offset
);
8723 static u32
gaudi_gen_wait_cb(struct hl_device
*hdev
,
8724 struct hl_gen_wait_properties
*prop
)
8726 struct hl_cb
*cb
= (struct hl_cb
*) prop
->data
;
8727 void *buf
= cb
->kernel_address
;
8729 u32 size
= prop
->size
;
8731 if (gaudi_get_fence_addr(hdev
, prop
->q_idx
, &fence_addr
)) {
8732 dev_crit(hdev
->dev
, "wrong queue id %d for wait packet\n",
8737 size
+= gaudi_add_mon_pkts(buf
+ size
, prop
->mon_id
, fence_addr
);
8738 size
+= gaudi_add_arm_monitor_pkt(hdev
, buf
+ size
, prop
->sob_base
,
8739 prop
->sob_mask
, prop
->sob_val
, prop
->mon_id
);
8740 size
+= gaudi_add_fence_pkt(buf
+ size
);
8745 static void gaudi_reset_sob(struct hl_device
*hdev
, void *data
)
8747 struct hl_hw_sob
*hw_sob
= (struct hl_hw_sob
*) data
;
8749 dev_dbg(hdev
->dev
, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob
->q_idx
,
8752 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0
+
8753 hw_sob
->sob_id
* 4, 0);
8755 kref_init(&hw_sob
->kref
);
8758 static u64
gaudi_get_device_time(struct hl_device
*hdev
)
8760 u64 device_time
= ((u64
) RREG32(mmPSOC_TIMESTAMP_CNTCVU
)) << 32;
8762 return device_time
| RREG32(mmPSOC_TIMESTAMP_CNTCVL
);
8765 static int gaudi_get_hw_block_id(struct hl_device
*hdev
, u64 block_addr
,
8766 u32
*block_size
, u32
*block_id
)
8771 static int gaudi_block_mmap(struct hl_device
*hdev
,
8772 struct vm_area_struct
*vma
,
8773 u32 block_id
, u32 block_size
)
8778 static void gaudi_enable_events_from_fw(struct hl_device
*hdev
)
8780 struct cpu_dyn_regs
*dyn_regs
=
8781 &hdev
->fw_loader
.dynamic_loader
.comm_desc
.cpu_dyn_regs
;
8782 u32 irq_handler_offset
= hdev
->asic_prop
.gic_interrupts_enable
?
8783 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
:
8784 le32_to_cpu(dyn_regs
->gic_host_ints_irq
);
8786 WREG32(irq_handler_offset
,
8787 gaudi_irq_map_table
[GAUDI_EVENT_INTS_REGISTER
].cpu_id
);
8790 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device
*hdev
, u64 mmu_cap_mask
)
8795 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx
)
8798 case HL_GAUDI_CPU_PLL
: return CPU_PLL
;
8799 case HL_GAUDI_PCI_PLL
: return PCI_PLL
;
8800 case HL_GAUDI_NIC_PLL
: return NIC_PLL
;
8801 case HL_GAUDI_DMA_PLL
: return DMA_PLL
;
8802 case HL_GAUDI_MESH_PLL
: return MESH_PLL
;
8803 case HL_GAUDI_MME_PLL
: return MME_PLL
;
8804 case HL_GAUDI_TPC_PLL
: return TPC_PLL
;
8805 case HL_GAUDI_IF_PLL
: return IF_PLL
;
8806 case HL_GAUDI_SRAM_PLL
: return SRAM_PLL
;
8807 case HL_GAUDI_HBM_PLL
: return HBM_PLL
;
8808 default: return -EINVAL
;
8812 static int gaudi_add_sync_to_engine_map_entry(
8813 struct hl_sync_to_engine_map
*map
, u32 reg_value
,
8814 enum hl_sync_engine_type engine_type
, u32 engine_id
)
8816 struct hl_sync_to_engine_map_entry
*entry
;
8818 /* Reg value represents a partial address of sync object,
8819 * it is used as unique identifier. For this we need to
8820 * clear the cutoff cfg base bits from the value.
8822 if (reg_value
== 0 || reg_value
== 0xffffffff)
8824 reg_value
-= lower_32_bits(CFG_BASE
);
8826 /* create a new hash entry */
8827 entry
= kzalloc(sizeof(*entry
), GFP_KERNEL
);
8830 entry
->engine_type
= engine_type
;
8831 entry
->engine_id
= engine_id
;
8832 entry
->sync_id
= reg_value
;
8833 hash_add(map
->tb
, &entry
->node
, reg_value
);
8838 static int gaudi_gen_sync_to_engine_map(struct hl_device
*hdev
,
8839 struct hl_sync_to_engine_map
*map
)
8841 struct hl_state_dump_specs
*sds
= &hdev
->state_dump_specs
;
8845 /* Iterate over TPC engines */
8846 for (i
= 0; i
< sds
->props
[SP_NUM_OF_TPC_ENGINES
]; ++i
) {
8848 reg_value
= RREG32(sds
->props
[SP_TPC0_CFG_SO
] +
8849 sds
->props
[SP_NEXT_TPC
] * i
);
8851 rc
= gaudi_add_sync_to_engine_map_entry(map
, reg_value
,
8854 goto free_sync_to_engine_map
;
8857 /* Iterate over MME engines */
8858 for (i
= 0; i
< sds
->props
[SP_NUM_OF_MME_ENGINES
]; ++i
) {
8859 for (j
= 0; j
< sds
->props
[SP_SUB_MME_ENG_NUM
]; ++j
) {
8861 reg_value
= RREG32(sds
->props
[SP_MME_CFG_SO
] +
8862 sds
->props
[SP_NEXT_MME
] * i
+
8865 rc
= gaudi_add_sync_to_engine_map_entry(
8866 map
, reg_value
, ENGINE_MME
,
8867 i
* sds
->props
[SP_SUB_MME_ENG_NUM
] + j
);
8869 goto free_sync_to_engine_map
;
8873 /* Iterate over DMA engines */
8874 for (i
= 0; i
< sds
->props
[SP_NUM_OF_DMA_ENGINES
]; ++i
) {
8875 reg_value
= RREG32(sds
->props
[SP_DMA_CFG_SO
] +
8876 sds
->props
[SP_DMA_QUEUES_OFFSET
] * i
);
8877 rc
= gaudi_add_sync_to_engine_map_entry(map
, reg_value
,
8880 goto free_sync_to_engine_map
;
8885 free_sync_to_engine_map
:
8886 hl_state_dump_free_sync_to_engine_map(map
);
8891 static int gaudi_monitor_valid(struct hl_mon_state_dump
*mon
)
8894 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK
,
8898 static void gaudi_fill_sobs_from_mon(char *sobs
, struct hl_mon_state_dump
*mon
)
8900 const size_t max_write
= 10;
8904 /* Sync object ID is calculated as follows:
8905 * (8 * group_id + cleared bits in mask)
8907 gid
= FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK
,
8909 mask
= FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK
,
8912 for (i
= 0, offset
= 0; mask
&& offset
< MONITOR_SOB_STRING_SIZE
-
8913 max_write
; mask
>>= 1, i
++) {
8915 sob
= gid
* MONITOR_MAX_SOBS
+ i
;
8918 offset
+= snprintf(sobs
+ offset
, max_write
,
8921 offset
+= snprintf(sobs
+ offset
, max_write
, "%u", sob
);
8926 static int gaudi_print_single_monitor(char **buf
, size_t *size
, size_t *offset
,
8927 struct hl_device
*hdev
,
8928 struct hl_mon_state_dump
*mon
)
8931 char scratch_buf1
[BIN_REG_STRING_SIZE
],
8932 scratch_buf2
[BIN_REG_STRING_SIZE
];
8933 char monitored_sobs
[MONITOR_SOB_STRING_SIZE
] = {0};
8935 name
= hl_state_dump_get_monitor_name(hdev
, mon
);
8939 gaudi_fill_sobs_from_mon(monitored_sobs
, mon
);
8941 return hl_snprintf_resize(
8943 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
8945 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK
,
8947 hl_format_as_binary(
8948 scratch_buf1
, sizeof(scratch_buf1
),
8950 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK
,
8952 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK
,
8955 (((u64
)mon
->wr_addr_high
) << 32) | mon
->wr_addr_low
,
8956 hl_format_as_binary(
8957 scratch_buf2
, sizeof(scratch_buf2
),
8959 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK
,
8965 static int gaudi_print_fences_single_engine(
8966 struct hl_device
*hdev
, u64 base_offset
, u64 status_base_offset
,
8967 enum hl_sync_engine_type engine_type
, u32 engine_id
, char **buf
,
8968 size_t *size
, size_t *offset
)
8970 struct hl_state_dump_specs
*sds
= &hdev
->state_dump_specs
;
8971 int rc
= -ENOMEM
, i
;
8972 u32
*statuses
, *fences
;
8974 statuses
= kcalloc(sds
->props
[SP_ENGINE_NUM_OF_QUEUES
],
8975 sizeof(*statuses
), GFP_KERNEL
);
8979 fences
= kcalloc(sds
->props
[SP_ENGINE_NUM_OF_FENCES
] *
8980 sds
->props
[SP_ENGINE_NUM_OF_QUEUES
],
8981 sizeof(*fences
), GFP_KERNEL
);
8985 for (i
= 0; i
< sds
->props
[SP_ENGINE_NUM_OF_FENCES
]; ++i
)
8986 statuses
[i
] = RREG32(status_base_offset
+ i
* sizeof(u32
));
8988 for (i
= 0; i
< sds
->props
[SP_ENGINE_NUM_OF_FENCES
] *
8989 sds
->props
[SP_ENGINE_NUM_OF_QUEUES
]; ++i
)
8990 fences
[i
] = RREG32(base_offset
+ i
* sizeof(u32
));
8992 /* The actual print */
8993 for (i
= 0; i
< sds
->props
[SP_ENGINE_NUM_OF_QUEUES
]; ++i
) {
8995 u64 fence_cnt
, fence_rdata
;
8996 const char *engine_name
;
8998 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK
,
9003 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK
, statuses
[i
]);
9004 fence_cnt
= base_offset
+ CFG_BASE
+
9006 (i
+ fence_id
* sds
->props
[SP_ENGINE_NUM_OF_QUEUES
]);
9007 fence_rdata
= fence_cnt
- sds
->props
[SP_FENCE0_CNT_OFFSET
] +
9008 sds
->props
[SP_FENCE0_RDATA_OFFSET
];
9009 engine_name
= hl_sync_engine_to_string(engine_type
);
9011 rc
= hl_snprintf_resize(
9013 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9014 engine_name
, engine_id
,
9016 fence_cnt
, engine_name
, engine_id
, fence_id
, i
,
9017 fence_rdata
, engine_name
, engine_id
, fence_id
, i
,
9035 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs
= {
9036 .monitor_valid
= gaudi_monitor_valid
,
9037 .print_single_monitor
= gaudi_print_single_monitor
,
9038 .gen_sync_to_engine_map
= gaudi_gen_sync_to_engine_map
,
9039 .print_fences_single_engine
= gaudi_print_fences_single_engine
,
9042 static void gaudi_state_dump_init(struct hl_device
*hdev
)
9044 struct hl_state_dump_specs
*sds
= &hdev
->state_dump_specs
;
9047 for (i
= 0; i
< ARRAY_SIZE(gaudi_so_id_to_str
); ++i
)
9048 hash_add(sds
->so_id_to_str_tb
,
9049 &gaudi_so_id_to_str
[i
].node
,
9050 gaudi_so_id_to_str
[i
].id
);
9052 for (i
= 0; i
< ARRAY_SIZE(gaudi_monitor_id_to_str
); ++i
)
9053 hash_add(sds
->monitor_id_to_str_tb
,
9054 &gaudi_monitor_id_to_str
[i
].node
,
9055 gaudi_monitor_id_to_str
[i
].id
);
9057 sds
->props
= gaudi_state_dump_specs_props
;
9059 sds
->sync_namager_names
= gaudi_sync_manager_names
;
9061 sds
->funcs
= gaudi_state_dump_funcs
;
9064 static u32
*gaudi_get_stream_master_qid_arr(void)
9066 return gaudi_stream_master
;
9069 static int gaudi_set_dram_properties(struct hl_device
*hdev
)
9074 static int gaudi_set_binning_masks(struct hl_device
*hdev
)
9079 static void gaudi_check_if_razwi_happened(struct hl_device
*hdev
)
9083 static ssize_t
infineon_ver_show(struct device
*dev
, struct device_attribute
*attr
, char *buf
)
9085 struct hl_device
*hdev
= dev_get_drvdata(dev
);
9086 struct cpucp_info
*cpucp_info
;
9088 cpucp_info
= &hdev
->asic_prop
.cpucp_info
;
9090 return sprintf(buf
, "%#04x\n", le32_to_cpu(cpucp_info
->infineon_version
));
9093 static DEVICE_ATTR_RO(infineon_ver
);
9095 static struct attribute
*gaudi_vrm_dev_attrs
[] = {
9096 &dev_attr_infineon_ver
.attr
,
9100 static void gaudi_add_device_attr(struct hl_device
*hdev
, struct attribute_group
*dev_clk_attr_grp
,
9101 struct attribute_group
*dev_vrm_attr_grp
)
9103 hl_sysfs_add_dev_clk_attr(hdev
, dev_clk_attr_grp
);
9104 dev_vrm_attr_grp
->attrs
= gaudi_vrm_dev_attrs
;
9107 static int gaudi_send_device_activity(struct hl_device
*hdev
, bool open
)
9112 static const struct hl_asic_funcs gaudi_funcs
= {
9113 .early_init
= gaudi_early_init
,
9114 .early_fini
= gaudi_early_fini
,
9115 .late_init
= gaudi_late_init
,
9116 .late_fini
= gaudi_late_fini
,
9117 .sw_init
= gaudi_sw_init
,
9118 .sw_fini
= gaudi_sw_fini
,
9119 .hw_init
= gaudi_hw_init
,
9120 .hw_fini
= gaudi_hw_fini
,
9121 .halt_engines
= gaudi_halt_engines
,
9122 .suspend
= gaudi_suspend
,
9123 .resume
= gaudi_resume
,
9125 .ring_doorbell
= gaudi_ring_doorbell
,
9126 .pqe_write
= gaudi_pqe_write
,
9127 .asic_dma_alloc_coherent
= gaudi_dma_alloc_coherent
,
9128 .asic_dma_free_coherent
= gaudi_dma_free_coherent
,
9129 .scrub_device_mem
= gaudi_scrub_device_mem
,
9130 .scrub_device_dram
= gaudi_scrub_device_dram
,
9131 .get_int_queue_base
= gaudi_get_int_queue_base
,
9132 .test_queues
= gaudi_test_queues
,
9133 .asic_dma_pool_zalloc
= gaudi_dma_pool_zalloc
,
9134 .asic_dma_pool_free
= gaudi_dma_pool_free
,
9135 .cpu_accessible_dma_pool_alloc
= gaudi_cpu_accessible_dma_pool_alloc
,
9136 .cpu_accessible_dma_pool_free
= gaudi_cpu_accessible_dma_pool_free
,
9137 .dma_unmap_sgtable
= hl_asic_dma_unmap_sgtable
,
9138 .cs_parser
= gaudi_cs_parser
,
9139 .dma_map_sgtable
= hl_asic_dma_map_sgtable
,
9140 .add_end_of_cb_packets
= gaudi_add_end_of_cb_packets
,
9141 .update_eq_ci
= gaudi_update_eq_ci
,
9142 .context_switch
= gaudi_context_switch
,
9143 .restore_phase_topology
= gaudi_restore_phase_topology
,
9144 .debugfs_read_dma
= gaudi_debugfs_read_dma
,
9145 .add_device_attr
= gaudi_add_device_attr
,
9146 .handle_eqe
= gaudi_handle_eqe
,
9147 .get_events_stat
= gaudi_get_events_stat
,
9148 .read_pte
= gaudi_read_pte
,
9149 .write_pte
= gaudi_write_pte
,
9150 .mmu_invalidate_cache
= gaudi_mmu_invalidate_cache
,
9151 .mmu_invalidate_cache_range
= gaudi_mmu_invalidate_cache_range
,
9152 .mmu_prefetch_cache_range
= NULL
,
9153 .send_heartbeat
= gaudi_send_heartbeat
,
9154 .debug_coresight
= gaudi_debug_coresight
,
9155 .is_device_idle
= gaudi_is_device_idle
,
9156 .compute_reset_late_init
= gaudi_compute_reset_late_init
,
9157 .hw_queues_lock
= gaudi_hw_queues_lock
,
9158 .hw_queues_unlock
= gaudi_hw_queues_unlock
,
9159 .get_pci_id
= gaudi_get_pci_id
,
9160 .get_eeprom_data
= gaudi_get_eeprom_data
,
9161 .get_monitor_dump
= gaudi_get_monitor_dump
,
9162 .send_cpu_message
= gaudi_send_cpu_message
,
9163 .pci_bars_map
= gaudi_pci_bars_map
,
9164 .init_iatu
= gaudi_init_iatu
,
9167 .halt_coresight
= gaudi_halt_coresight
,
9168 .ctx_init
= gaudi_ctx_init
,
9169 .ctx_fini
= gaudi_ctx_fini
,
9170 .pre_schedule_cs
= gaudi_pre_schedule_cs
,
9171 .get_queue_id_for_cq
= gaudi_get_queue_id_for_cq
,
9172 .load_firmware_to_device
= gaudi_load_firmware_to_device
,
9173 .load_boot_fit_to_device
= gaudi_load_boot_fit_to_device
,
9174 .get_signal_cb_size
= gaudi_get_signal_cb_size
,
9175 .get_wait_cb_size
= gaudi_get_wait_cb_size
,
9176 .gen_signal_cb
= gaudi_gen_signal_cb
,
9177 .gen_wait_cb
= gaudi_gen_wait_cb
,
9178 .reset_sob
= gaudi_reset_sob
,
9179 .reset_sob_group
= gaudi_reset_sob_group
,
9180 .get_device_time
= gaudi_get_device_time
,
9181 .pb_print_security_errors
= NULL
,
9182 .collective_wait_init_cs
= gaudi_collective_wait_init_cs
,
9183 .collective_wait_create_jobs
= gaudi_collective_wait_create_jobs
,
9184 .get_dec_base_addr
= NULL
,
9185 .scramble_addr
= hl_mmu_scramble_addr
,
9186 .descramble_addr
= hl_mmu_descramble_addr
,
9187 .ack_protection_bits_errors
= gaudi_ack_protection_bits_errors
,
9188 .get_hw_block_id
= gaudi_get_hw_block_id
,
9189 .hw_block_mmap
= gaudi_block_mmap
,
9190 .enable_events_from_fw
= gaudi_enable_events_from_fw
,
9191 .ack_mmu_errors
= gaudi_ack_mmu_page_fault_or_access_error
,
9192 .map_pll_idx_to_fw_idx
= gaudi_map_pll_idx_to_fw_idx
,
9193 .init_firmware_preload_params
= gaudi_init_firmware_preload_params
,
9194 .init_firmware_loader
= gaudi_init_firmware_loader
,
9195 .init_cpu_scrambler_dram
= gaudi_init_scrambler_hbm
,
9196 .state_dump_init
= gaudi_state_dump_init
,
9197 .get_sob_addr
= gaudi_get_sob_addr
,
9198 .set_pci_memory_regions
= gaudi_set_pci_memory_regions
,
9199 .get_stream_master_qid_arr
= gaudi_get_stream_master_qid_arr
,
9200 .check_if_razwi_happened
= gaudi_check_if_razwi_happened
,
9201 .mmu_get_real_page_size
= hl_mmu_get_real_page_size
,
9202 .access_dev_mem
= hl_access_dev_mem
,
9203 .set_dram_bar_base
= gaudi_set_hbm_bar_base
,
9204 .send_device_activity
= gaudi_send_device_activity
,
9205 .set_dram_properties
= gaudi_set_dram_properties
,
9206 .set_binning_masks
= gaudi_set_binning_masks
,
9210 * gaudi_set_asic_funcs - set GAUDI function pointers
9212 * @hdev: pointer to hl_device structure
9215 void gaudi_set_asic_funcs(struct hl_device
*hdev
)
9217 hdev
->asic_funcs
= &gaudi_funcs
;