1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2020 HabanaLabs, Ltd.
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
24 * Gaudi security scheme:
26 * 1. Host is protected by:
30 * 2. DDR is protected by:
31 * - Range registers (protect the first 512MB)
33 * 3. Configuration is protected by:
37 * MMU is always enabled.
39 * QMAN DMA channels 0,1 (PCI DMAN):
40 * - DMA is not secured.
41 * - PQ and CQ are secured.
42 * - CP is secured: The driver needs to parse CB but WREG should be allowed
43 * because of TDMA (tensor DMA). Hence, WREG is always not
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
51 * The current use cases for the driver to use the DMA are:
52 * - Clear SRAM on context switch (happens on context switch when device is
54 * - MMU page tables area clear (happens on init)
56 * QMAN DMA 2-7, TPC, MME, NIC:
57 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
66 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
68 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
73 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
82 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
84 #define GAUDI_MAX_STRING_LEN 20
86 #define GAUDI_CB_POOL_CB_CNT 512
87 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
89 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
91 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
93 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16
95 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
97 #define GAUDI_ARB_WDT_TIMEOUT 0x1000000
99 #define GAUDI_CLK_GATE_DEBUGFS_MASK (\
100 BIT(GAUDI_ENGINE_ID_MME_0) |\
101 BIT(GAUDI_ENGINE_ID_MME_2) |\
102 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
104 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
106 #define GAUDI_PLL_MAX 10
108 static const char gaudi_irq_name
[GAUDI_MSI_ENTRIES
][GAUDI_MAX_STRING_LEN
] = {
109 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
110 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
111 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
115 static const u8 gaudi_dma_assignment
[GAUDI_DMA_MAX
] = {
116 [GAUDI_PCI_DMA_1
] = GAUDI_ENGINE_ID_DMA_0
,
117 [GAUDI_PCI_DMA_2
] = GAUDI_ENGINE_ID_DMA_1
,
118 [GAUDI_HBM_DMA_1
] = GAUDI_ENGINE_ID_DMA_2
,
119 [GAUDI_HBM_DMA_2
] = GAUDI_ENGINE_ID_DMA_3
,
120 [GAUDI_HBM_DMA_3
] = GAUDI_ENGINE_ID_DMA_4
,
121 [GAUDI_HBM_DMA_4
] = GAUDI_ENGINE_ID_DMA_5
,
122 [GAUDI_HBM_DMA_5
] = GAUDI_ENGINE_ID_DMA_6
,
123 [GAUDI_HBM_DMA_6
] = GAUDI_ENGINE_ID_DMA_7
126 static const u8 gaudi_cq_assignment
[NUMBER_OF_CMPLT_QUEUES
] = {
127 [0] = GAUDI_QUEUE_ID_DMA_0_0
,
128 [1] = GAUDI_QUEUE_ID_DMA_0_1
,
129 [2] = GAUDI_QUEUE_ID_DMA_0_2
,
130 [3] = GAUDI_QUEUE_ID_DMA_0_3
,
131 [4] = GAUDI_QUEUE_ID_DMA_1_0
,
132 [5] = GAUDI_QUEUE_ID_DMA_1_1
,
133 [6] = GAUDI_QUEUE_ID_DMA_1_2
,
134 [7] = GAUDI_QUEUE_ID_DMA_1_3
,
137 static const u16 gaudi_packet_sizes
[MAX_PACKET_ID
] = {
138 [PACKET_WREG_32
] = sizeof(struct packet_wreg32
),
139 [PACKET_WREG_BULK
] = sizeof(struct packet_wreg_bulk
),
140 [PACKET_MSG_LONG
] = sizeof(struct packet_msg_long
),
141 [PACKET_MSG_SHORT
] = sizeof(struct packet_msg_short
),
142 [PACKET_CP_DMA
] = sizeof(struct packet_cp_dma
),
143 [PACKET_REPEAT
] = sizeof(struct packet_repeat
),
144 [PACKET_MSG_PROT
] = sizeof(struct packet_msg_prot
),
145 [PACKET_FENCE
] = sizeof(struct packet_fence
),
146 [PACKET_LIN_DMA
] = sizeof(struct packet_lin_dma
),
147 [PACKET_NOP
] = sizeof(struct packet_nop
),
148 [PACKET_STOP
] = sizeof(struct packet_stop
),
149 [PACKET_ARB_POINT
] = sizeof(struct packet_arb_point
),
150 [PACKET_WAIT
] = sizeof(struct packet_wait
),
151 [PACKET_LOAD_AND_EXE
] = sizeof(struct packet_load_and_exe
)
154 static inline bool validate_packet_id(enum packet_id id
)
158 case PACKET_WREG_BULK
:
159 case PACKET_MSG_LONG
:
160 case PACKET_MSG_SHORT
:
163 case PACKET_MSG_PROT
:
168 case PACKET_ARB_POINT
:
170 case PACKET_LOAD_AND_EXE
:
177 static const char * const
178 gaudi_tpc_interrupts_cause
[GAUDI_NUM_OF_TPC_INTR_CAUSE
] = {
179 "tpc_address_exceed_slm",
181 "tpc_spu_mac_overflow",
182 "tpc_spu_addsub_overflow",
183 "tpc_spu_abs_overflow",
184 "tpc_spu_fp_dst_nan_inf",
185 "tpc_spu_fp_dst_denorm",
186 "tpc_vpu_mac_overflow",
187 "tpc_vpu_addsub_overflow",
188 "tpc_vpu_abs_overflow",
189 "tpc_vpu_fp_dst_nan_inf",
190 "tpc_vpu_fp_dst_denorm",
192 "tpc_illegal_instruction",
193 "tpc_pc_wrap_around",
201 static const char * const
202 gaudi_qman_error_cause
[GAUDI_NUM_OF_QM_ERR_CAUSE
] = {
206 "CP error due to undefined OPCODE",
207 "CP encountered STOP OPCODE",
209 "CP WRREG32 or WRBULK returned error",
211 "FENCE 0 inc over max value and clipped",
212 "FENCE 1 inc over max value and clipped",
213 "FENCE 2 inc over max value and clipped",
214 "FENCE 3 inc over max value and clipped",
215 "FENCE 0 dec under min value and clipped",
216 "FENCE 1 dec under min value and clipped",
217 "FENCE 2 dec under min value and clipped",
218 "FENCE 3 dec under min value and clipped"
221 static const char * const
222 gaudi_qman_arb_error_cause
[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE
] = {
223 "Choice push while full error",
224 "Choice Q watchdog error",
225 "MSG AXI LBW returned with error"
228 static enum hl_queue_type gaudi_queue_type
[GAUDI_QUEUE_ID_SIZE
] = {
229 QUEUE_TYPE_EXT
, /* GAUDI_QUEUE_ID_DMA_0_0 */
230 QUEUE_TYPE_EXT
, /* GAUDI_QUEUE_ID_DMA_0_1 */
231 QUEUE_TYPE_EXT
, /* GAUDI_QUEUE_ID_DMA_0_2 */
232 QUEUE_TYPE_EXT
, /* GAUDI_QUEUE_ID_DMA_0_3 */
233 QUEUE_TYPE_EXT
, /* GAUDI_QUEUE_ID_DMA_1_0 */
234 QUEUE_TYPE_EXT
, /* GAUDI_QUEUE_ID_DMA_1_1 */
235 QUEUE_TYPE_EXT
, /* GAUDI_QUEUE_ID_DMA_1_2 */
236 QUEUE_TYPE_EXT
, /* GAUDI_QUEUE_ID_DMA_1_3 */
237 QUEUE_TYPE_CPU
, /* GAUDI_QUEUE_ID_CPU_PQ */
238 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_2_0 */
239 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_2_1 */
240 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_2_2 */
241 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_2_3 */
242 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_3_0 */
243 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_3_1 */
244 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_3_2 */
245 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_3_3 */
246 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_4_0 */
247 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_4_1 */
248 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_4_2 */
249 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_4_3 */
250 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_5_0 */
251 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_5_1 */
252 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_5_2 */
253 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_5_3 */
254 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_6_0 */
255 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_6_1 */
256 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_6_2 */
257 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_6_3 */
258 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_7_0 */
259 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_7_1 */
260 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_7_2 */
261 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_DMA_7_3 */
262 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_MME_0_0 */
263 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_MME_0_1 */
264 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_MME_0_2 */
265 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_MME_0_3 */
266 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_MME_1_0 */
267 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_MME_1_1 */
268 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_MME_1_2 */
269 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_MME_1_3 */
270 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_0_0 */
271 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_0_1 */
272 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_0_2 */
273 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_0_3 */
274 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_1_0 */
275 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_1_1 */
276 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_1_2 */
277 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_1_3 */
278 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_2_0 */
279 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_2_1 */
280 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_2_2 */
281 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_2_3 */
282 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_3_0 */
283 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_3_1 */
284 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_3_2 */
285 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_3_3 */
286 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_4_0 */
287 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_4_1 */
288 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_4_2 */
289 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_4_3 */
290 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_5_0 */
291 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_5_1 */
292 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_5_2 */
293 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_5_3 */
294 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_6_0 */
295 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_6_1 */
296 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_6_2 */
297 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_6_3 */
298 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_7_0 */
299 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_7_1 */
300 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_7_2 */
301 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_TPC_7_3 */
302 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_0_0 */
303 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_0_1 */
304 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_0_2 */
305 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_0_3 */
306 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_1_0 */
307 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_1_1 */
308 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_1_2 */
309 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_1_3 */
310 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_2_0 */
311 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_2_1 */
312 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_2_2 */
313 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_2_3 */
314 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_3_0 */
315 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_3_1 */
316 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_3_2 */
317 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_3_3 */
318 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_4_0 */
319 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_4_1 */
320 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_4_2 */
321 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_4_3 */
322 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_5_0 */
323 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_5_1 */
324 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_5_2 */
325 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_5_3 */
326 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_6_0 */
327 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_6_1 */
328 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_6_2 */
329 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_6_3 */
330 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_7_0 */
331 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_7_1 */
332 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_7_2 */
333 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_7_3 */
334 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_8_0 */
335 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_8_1 */
336 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_8_2 */
337 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_8_3 */
338 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_9_0 */
339 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_9_1 */
340 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_9_2 */
341 QUEUE_TYPE_INT
, /* GAUDI_QUEUE_ID_NIC_9_3 */
344 struct ecc_info_extract_params
{
348 bool disable_clock_gating
;
351 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device
*hdev
, u32 asid
,
353 static int gaudi_send_job_on_qman0(struct hl_device
*hdev
,
354 struct hl_cs_job
*job
);
355 static int gaudi_memset_device_memory(struct hl_device
*hdev
, u64 addr
,
357 static int gaudi_run_tpc_kernel(struct hl_device
*hdev
, u64 tpc_kernel
,
359 static int gaudi_mmu_clear_pgt_range(struct hl_device
*hdev
);
360 static int gaudi_cpucp_info_get(struct hl_device
*hdev
);
361 static void gaudi_disable_clock_gating(struct hl_device
*hdev
);
362 static void gaudi_mmu_prepare(struct hl_device
*hdev
, u32 asid
);
363 static u32
gaudi_gen_signal_cb(struct hl_device
*hdev
, void *data
, u16 sob_id
,
365 static u32
gaudi_gen_wait_cb(struct hl_device
*hdev
,
366 struct hl_gen_wait_properties
*prop
);
368 static inline enum hl_collective_mode
369 get_collective_mode(struct hl_device
*hdev
, u32 queue_id
)
371 if (gaudi_queue_type
[queue_id
] == QUEUE_TYPE_EXT
)
372 return HL_COLLECTIVE_MASTER
;
374 if (queue_id
>= GAUDI_QUEUE_ID_DMA_5_0
&&
375 queue_id
<= GAUDI_QUEUE_ID_DMA_5_3
)
376 return HL_COLLECTIVE_SLAVE
;
378 if (queue_id
>= GAUDI_QUEUE_ID_TPC_7_0
&&
379 queue_id
<= GAUDI_QUEUE_ID_TPC_7_3
)
380 return HL_COLLECTIVE_SLAVE
;
382 if (queue_id
>= GAUDI_QUEUE_ID_NIC_0_0
&&
383 queue_id
<= GAUDI_QUEUE_ID_NIC_9_3
)
384 return HL_COLLECTIVE_SLAVE
;
386 return HL_COLLECTIVE_NOT_SUPPORTED
;
389 static int gaudi_get_fixed_properties(struct hl_device
*hdev
)
391 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
392 u32 num_sync_stream_queues
= 0;
395 prop
->max_queues
= GAUDI_QUEUE_ID_SIZE
;
396 prop
->hw_queues_props
= kcalloc(prop
->max_queues
,
397 sizeof(struct hw_queue_properties
),
400 if (!prop
->hw_queues_props
)
403 for (i
= 0 ; i
< prop
->max_queues
; i
++) {
404 if (gaudi_queue_type
[i
] == QUEUE_TYPE_EXT
) {
405 prop
->hw_queues_props
[i
].type
= QUEUE_TYPE_EXT
;
406 prop
->hw_queues_props
[i
].driver_only
= 0;
407 prop
->hw_queues_props
[i
].supports_sync_stream
= 1;
408 prop
->hw_queues_props
[i
].cb_alloc_flags
=
410 num_sync_stream_queues
++;
411 } else if (gaudi_queue_type
[i
] == QUEUE_TYPE_CPU
) {
412 prop
->hw_queues_props
[i
].type
= QUEUE_TYPE_CPU
;
413 prop
->hw_queues_props
[i
].driver_only
= 1;
414 prop
->hw_queues_props
[i
].supports_sync_stream
= 0;
415 prop
->hw_queues_props
[i
].cb_alloc_flags
=
417 } else if (gaudi_queue_type
[i
] == QUEUE_TYPE_INT
) {
418 prop
->hw_queues_props
[i
].type
= QUEUE_TYPE_INT
;
419 prop
->hw_queues_props
[i
].driver_only
= 0;
420 prop
->hw_queues_props
[i
].supports_sync_stream
= 0;
421 prop
->hw_queues_props
[i
].cb_alloc_flags
=
425 prop
->hw_queues_props
[i
].collective_mode
=
426 get_collective_mode(hdev
, i
);
429 prop
->completion_queues_count
= NUMBER_OF_CMPLT_QUEUES
;
430 prop
->collective_first_sob
= 0;
431 prop
->collective_first_mon
= 0;
433 /* 2 SOBs per internal queue stream are reserved for collective */
434 prop
->sync_stream_first_sob
=
435 ALIGN(NUMBER_OF_SOBS_IN_GRP
, HL_MAX_SOBS_PER_MONITOR
)
436 * QMAN_STREAMS
* HL_RSVD_SOBS
;
438 /* 1 monitor per internal queue stream are reserved for collective
439 * 2 monitors per external queue stream are reserved for collective
441 prop
->sync_stream_first_mon
=
442 (NUMBER_OF_COLLECTIVE_QUEUES
* QMAN_STREAMS
) +
443 (NUMBER_OF_EXT_HW_QUEUES
* 2);
445 prop
->dram_base_address
= DRAM_PHYS_BASE
;
446 prop
->dram_size
= GAUDI_HBM_SIZE_32GB
;
447 prop
->dram_end_address
= prop
->dram_base_address
+
449 prop
->dram_user_base_address
= DRAM_BASE_ADDR_USER
;
451 prop
->sram_base_address
= SRAM_BASE_ADDR
;
452 prop
->sram_size
= SRAM_SIZE
;
453 prop
->sram_end_address
= prop
->sram_base_address
+
455 prop
->sram_user_base_address
= prop
->sram_base_address
+
456 SRAM_USER_BASE_OFFSET
;
458 prop
->mmu_pgt_addr
= MMU_PAGE_TABLES_ADDR
;
460 prop
->mmu_pgt_size
= 0x800000; /* 8MB */
462 prop
->mmu_pgt_size
= MMU_PAGE_TABLES_SIZE
;
463 prop
->mmu_pte_size
= HL_PTE_SIZE
;
464 prop
->mmu_hop_table_size
= HOP_TABLE_SIZE
;
465 prop
->mmu_hop0_tables_total_size
= HOP0_TABLES_TOTAL_SIZE
;
466 prop
->dram_page_size
= PAGE_SIZE_2MB
;
467 prop
->dram_supports_virtual_memory
= false;
469 prop
->pmmu
.hop0_shift
= HOP0_SHIFT
;
470 prop
->pmmu
.hop1_shift
= HOP1_SHIFT
;
471 prop
->pmmu
.hop2_shift
= HOP2_SHIFT
;
472 prop
->pmmu
.hop3_shift
= HOP3_SHIFT
;
473 prop
->pmmu
.hop4_shift
= HOP4_SHIFT
;
474 prop
->pmmu
.hop0_mask
= HOP0_MASK
;
475 prop
->pmmu
.hop1_mask
= HOP1_MASK
;
476 prop
->pmmu
.hop2_mask
= HOP2_MASK
;
477 prop
->pmmu
.hop3_mask
= HOP3_MASK
;
478 prop
->pmmu
.hop4_mask
= HOP4_MASK
;
479 prop
->pmmu
.start_addr
= VA_HOST_SPACE_START
;
480 prop
->pmmu
.end_addr
=
481 (VA_HOST_SPACE_START
+ VA_HOST_SPACE_SIZE
/ 2) - 1;
482 prop
->pmmu
.page_size
= PAGE_SIZE_4KB
;
483 prop
->pmmu
.num_hops
= MMU_ARCH_5_HOPS
;
485 /* PMMU and HPMMU are the same except of page size */
486 memcpy(&prop
->pmmu_huge
, &prop
->pmmu
, sizeof(prop
->pmmu
));
487 prop
->pmmu_huge
.page_size
= PAGE_SIZE_2MB
;
489 /* shifts and masks are the same in PMMU and DMMU */
490 memcpy(&prop
->dmmu
, &prop
->pmmu
, sizeof(prop
->pmmu
));
491 prop
->dmmu
.start_addr
= (VA_HOST_SPACE_START
+ VA_HOST_SPACE_SIZE
/ 2);
492 prop
->dmmu
.end_addr
= VA_HOST_SPACE_END
;
493 prop
->dmmu
.page_size
= PAGE_SIZE_2MB
;
495 prop
->cfg_size
= CFG_SIZE
;
496 prop
->max_asid
= MAX_ASID
;
497 prop
->num_of_events
= GAUDI_EVENT_SIZE
;
498 prop
->tpc_enabled_mask
= TPC_ENABLED_MASK
;
500 prop
->max_power_default
= MAX_POWER_DEFAULT_PCI
;
502 prop
->cb_pool_cb_cnt
= GAUDI_CB_POOL_CB_CNT
;
503 prop
->cb_pool_cb_size
= GAUDI_CB_POOL_CB_SIZE
;
505 prop
->pcie_dbi_base_address
= mmPCIE_DBI_BASE
;
506 prop
->pcie_aux_dbi_reg_addr
= CFG_BASE
+ mmPCIE_AUX_DBI
;
508 strncpy(prop
->cpucp_info
.card_name
, GAUDI_DEFAULT_CARD_NAME
,
511 prop
->max_pending_cs
= GAUDI_MAX_PENDING_CS
;
513 prop
->first_available_user_sob
[HL_GAUDI_WS_DCORE
] =
514 prop
->sync_stream_first_sob
+
515 (num_sync_stream_queues
* HL_RSVD_SOBS
);
516 prop
->first_available_user_mon
[HL_GAUDI_WS_DCORE
] =
517 prop
->sync_stream_first_mon
+
518 (num_sync_stream_queues
* HL_RSVD_MONS
);
520 /* disable fw security for now, set it in a later stage */
521 prop
->fw_security_disabled
= true;
522 prop
->fw_security_status_valid
= false;
523 prop
->hard_reset_done_by_fw
= false;
528 static int gaudi_pci_bars_map(struct hl_device
*hdev
)
530 static const char * const name
[] = {"SRAM", "CFG", "HBM"};
531 bool is_wc
[3] = {false, false, true};
534 rc
= hl_pci_bars_map(hdev
, name
, is_wc
);
538 hdev
->rmmio
= hdev
->pcie_bar
[CFG_BAR_ID
] +
539 (CFG_BASE
- SPI_FLASH_BASE_ADDR
);
544 static u64
gaudi_set_hbm_bar_base(struct hl_device
*hdev
, u64 addr
)
546 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
547 struct hl_inbound_pci_region pci_region
;
551 if ((gaudi
) && (gaudi
->hbm_bar_cur_addr
== addr
))
554 /* Inbound Region 2 - Bar 4 - Point to HBM */
555 pci_region
.mode
= PCI_BAR_MATCH_MODE
;
556 pci_region
.bar
= HBM_BAR_ID
;
557 pci_region
.addr
= addr
;
558 rc
= hl_pci_set_inbound_region(hdev
, 2, &pci_region
);
563 old_addr
= gaudi
->hbm_bar_cur_addr
;
564 gaudi
->hbm_bar_cur_addr
= addr
;
570 static int gaudi_init_iatu(struct hl_device
*hdev
)
572 struct hl_inbound_pci_region inbound_region
;
573 struct hl_outbound_pci_region outbound_region
;
576 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
577 inbound_region
.mode
= PCI_BAR_MATCH_MODE
;
578 inbound_region
.bar
= SRAM_BAR_ID
;
579 inbound_region
.addr
= SRAM_BASE_ADDR
;
580 rc
= hl_pci_set_inbound_region(hdev
, 0, &inbound_region
);
584 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
585 inbound_region
.mode
= PCI_BAR_MATCH_MODE
;
586 inbound_region
.bar
= CFG_BAR_ID
;
587 inbound_region
.addr
= SPI_FLASH_BASE_ADDR
;
588 rc
= hl_pci_set_inbound_region(hdev
, 1, &inbound_region
);
592 /* Inbound Region 2 - Bar 4 - Point to HBM */
593 inbound_region
.mode
= PCI_BAR_MATCH_MODE
;
594 inbound_region
.bar
= HBM_BAR_ID
;
595 inbound_region
.addr
= DRAM_PHYS_BASE
;
596 rc
= hl_pci_set_inbound_region(hdev
, 2, &inbound_region
);
600 hdev
->asic_funcs
->set_dma_mask_from_fw(hdev
);
602 /* Outbound Region 0 - Point to Host */
603 outbound_region
.addr
= HOST_PHYS_BASE
;
604 outbound_region
.size
= HOST_PHYS_SIZE
;
605 rc
= hl_pci_set_outbound_region(hdev
, &outbound_region
);
611 static enum hl_device_hw_state
gaudi_get_hw_state(struct hl_device
*hdev
)
613 return RREG32(mmHW_STATE
);
616 static int gaudi_early_init(struct hl_device
*hdev
)
618 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
619 struct pci_dev
*pdev
= hdev
->pdev
;
622 rc
= gaudi_get_fixed_properties(hdev
);
624 dev_err(hdev
->dev
, "Failed to get fixed properties\n");
628 /* Check BAR sizes */
629 if (pci_resource_len(pdev
, SRAM_BAR_ID
) != SRAM_BAR_SIZE
) {
631 "Not " HL_NAME
"? BAR %d size %llu, expecting %llu\n",
633 (unsigned long long) pci_resource_len(pdev
,
637 goto free_queue_props
;
640 if (pci_resource_len(pdev
, CFG_BAR_ID
) != CFG_BAR_SIZE
) {
642 "Not " HL_NAME
"? BAR %d size %llu, expecting %llu\n",
644 (unsigned long long) pci_resource_len(pdev
,
648 goto free_queue_props
;
651 prop
->dram_pci_bar_size
= pci_resource_len(pdev
, HBM_BAR_ID
);
653 rc
= hl_pci_init(hdev
);
655 goto free_queue_props
;
657 /* Before continuing in the initialization, we need to read the preboot
658 * version to determine whether we run with a security-enabled firmware
660 rc
= hl_fw_read_preboot_status(hdev
, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS
,
661 mmCPU_BOOT_DEV_STS0
, mmCPU_BOOT_ERR0
,
662 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC
);
664 if (hdev
->reset_on_preboot_fail
)
665 hdev
->asic_funcs
->hw_fini(hdev
, true);
669 if (gaudi_get_hw_state(hdev
) == HL_DEVICE_HW_STATE_DIRTY
) {
671 "H/W state is dirty, must reset before initializing\n");
672 hdev
->asic_funcs
->hw_fini(hdev
, true);
680 kfree(hdev
->asic_prop
.hw_queues_props
);
684 static int gaudi_early_fini(struct hl_device
*hdev
)
686 kfree(hdev
->asic_prop
.hw_queues_props
);
693 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
695 * @hdev: pointer to hl_device structure
698 static int gaudi_fetch_psoc_frequency(struct hl_device
*hdev
)
700 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
701 u32 nr
= 0, nf
= 0, od
= 0, div_fctr
= 0, pll_clk
, div_sel
;
702 u16 pll_freq_arr
[HL_PLL_NUM_OUTPUTS
], freq
;
705 if (hdev
->asic_prop
.fw_security_disabled
) {
706 /* Backward compatibility */
707 div_fctr
= RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2
);
708 div_sel
= RREG32(mmPSOC_CPU_PLL_DIV_SEL_2
);
709 nr
= RREG32(mmPSOC_CPU_PLL_NR
);
710 nf
= RREG32(mmPSOC_CPU_PLL_NF
);
711 od
= RREG32(mmPSOC_CPU_PLL_OD
);
713 if (div_sel
== DIV_SEL_REF_CLK
||
714 div_sel
== DIV_SEL_DIVIDED_REF
) {
715 if (div_sel
== DIV_SEL_REF_CLK
)
718 freq
= PLL_REF_CLK
/ (div_fctr
+ 1);
719 } else if (div_sel
== DIV_SEL_PLL_CLK
||
720 div_sel
== DIV_SEL_DIVIDED_PLL
) {
721 pll_clk
= PLL_REF_CLK
* (nf
+ 1) /
722 ((nr
+ 1) * (od
+ 1));
723 if (div_sel
== DIV_SEL_PLL_CLK
)
726 freq
= pll_clk
/ (div_fctr
+ 1);
729 "Received invalid div select value: %d",
734 rc
= hl_fw_cpucp_pll_info_get(hdev
, CPU_PLL
, pll_freq_arr
);
739 freq
= pll_freq_arr
[2];
742 prop
->psoc_timestamp_frequency
= freq
;
743 prop
->psoc_pci_pll_nr
= nr
;
744 prop
->psoc_pci_pll_nf
= nf
;
745 prop
->psoc_pci_pll_od
= od
;
746 prop
->psoc_pci_pll_div_factor
= div_fctr
;
751 static int _gaudi_init_tpc_mem(struct hl_device
*hdev
,
752 dma_addr_t tpc_kernel_src_addr
, u32 tpc_kernel_size
)
754 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
755 struct packet_lin_dma
*init_tpc_mem_pkt
;
756 struct hl_cs_job
*job
;
763 cb
= hl_cb_kernel_create(hdev
, PAGE_SIZE
, false);
767 init_tpc_mem_pkt
= cb
->kernel_address
;
768 cb_size
= sizeof(*init_tpc_mem_pkt
);
769 memset(init_tpc_mem_pkt
, 0, cb_size
);
771 init_tpc_mem_pkt
->tsize
= cpu_to_le32(tpc_kernel_size
);
773 ctl
= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK
, PACKET_LIN_DMA
);
774 ctl
|= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK
, 1);
775 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK
, 1);
776 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK
, 1);
778 init_tpc_mem_pkt
->ctl
= cpu_to_le32(ctl
);
780 init_tpc_mem_pkt
->src_addr
= cpu_to_le64(tpc_kernel_src_addr
);
781 dst_addr
= (prop
->sram_user_base_address
&
782 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK
) >>
783 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT
;
784 init_tpc_mem_pkt
->dst_addr
|= cpu_to_le64(dst_addr
);
786 job
= hl_cs_allocate_job(hdev
, QUEUE_TYPE_EXT
, true);
788 dev_err(hdev
->dev
, "Failed to allocate a new job\n");
795 atomic_inc(&job
->user_cb
->cs_cnt
);
796 job
->user_cb_size
= cb_size
;
797 job
->hw_queue_id
= GAUDI_QUEUE_ID_DMA_0_0
;
798 job
->patched_cb
= job
->user_cb
;
799 job
->job_cb_size
= job
->user_cb_size
+ sizeof(struct packet_msg_prot
);
801 hl_debugfs_add_job(hdev
, job
);
803 rc
= gaudi_send_job_on_qman0(hdev
, job
);
808 for (tpc_id
= 0 ; tpc_id
< TPC_NUMBER_OF_ENGINES
; tpc_id
++) {
809 rc
= gaudi_run_tpc_kernel(hdev
, dst_addr
, tpc_id
);
815 hl_userptr_delete_list(hdev
, &job
->userptr_list
);
816 hl_debugfs_remove_job(hdev
, job
);
818 atomic_dec(&cb
->cs_cnt
);
822 hl_cb_destroy(hdev
, &hdev
->kernel_cb_mgr
, cb
->id
<< PAGE_SHIFT
);
828 * gaudi_init_tpc_mem() - Initialize TPC memories.
829 * @hdev: Pointer to hl_device structure.
831 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
833 * Return: 0 for success, negative value for error.
835 static int gaudi_init_tpc_mem(struct hl_device
*hdev
)
837 const struct firmware
*fw
;
840 dma_addr_t dma_handle
;
844 rc
= request_firmware(&fw
, GAUDI_TPC_FW_FILE
, hdev
->dev
);
845 if (rc
== -EINTR
&& count
-- > 0) {
851 dev_err(hdev
->dev
, "Failed to load firmware file %s\n",
857 cpu_addr
= hdev
->asic_funcs
->asic_dma_alloc_coherent(hdev
, fw_size
,
858 &dma_handle
, GFP_KERNEL
| __GFP_ZERO
);
861 "Failed to allocate %zu of dma memory for TPC kernel\n",
867 memcpy(cpu_addr
, fw
->data
, fw_size
);
869 rc
= _gaudi_init_tpc_mem(hdev
, dma_handle
, fw_size
);
871 hdev
->asic_funcs
->asic_dma_free_coherent(hdev
, fw
->size
, cpu_addr
,
875 release_firmware(fw
);
879 static void gaudi_collective_map_sobs(struct hl_device
*hdev
, u32 stream
)
881 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
882 struct gaudi_collective_properties
*prop
= &gaudi
->collective_props
;
883 struct hl_hw_queue
*q
;
884 u32 i
, sob_id
, sob_group_id
, queue_id
;
886 /* Iterate through SOB groups and assign a SOB for each slave queue */
888 stream
* HL_RSVD_SOBS
+ prop
->curr_sob_group_idx
[stream
];
889 sob_id
= prop
->hw_sob_group
[sob_group_id
].base_sob_id
;
891 queue_id
= GAUDI_QUEUE_ID_NIC_0_0
+ stream
;
892 for (i
= 0 ; i
< NIC_NUMBER_OF_ENGINES
; i
++) {
893 q
= &hdev
->kernel_queues
[queue_id
+ (4 * i
)];
894 q
->sync_stream_prop
.collective_sob_id
= sob_id
+ i
;
897 /* Both DMA5 and TPC7 use the same resources since only a single
898 * engine need to participate in the reduction process
900 queue_id
= GAUDI_QUEUE_ID_DMA_5_0
+ stream
;
901 q
= &hdev
->kernel_queues
[queue_id
];
902 q
->sync_stream_prop
.collective_sob_id
=
903 sob_id
+ NIC_NUMBER_OF_ENGINES
;
905 queue_id
= GAUDI_QUEUE_ID_TPC_7_0
+ stream
;
906 q
= &hdev
->kernel_queues
[queue_id
];
907 q
->sync_stream_prop
.collective_sob_id
=
908 sob_id
+ NIC_NUMBER_OF_ENGINES
;
911 static void gaudi_sob_group_hw_reset(struct kref
*ref
)
913 struct gaudi_hw_sob_group
*hw_sob_group
=
914 container_of(ref
, struct gaudi_hw_sob_group
, kref
);
915 struct hl_device
*hdev
= hw_sob_group
->hdev
;
918 for (i
= 0 ; i
< NUMBER_OF_SOBS_IN_GRP
; i
++)
919 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0
+
920 (hw_sob_group
->base_sob_id
+ i
) * 4, 0);
922 kref_init(&hw_sob_group
->kref
);
925 static void gaudi_sob_group_reset_error(struct kref
*ref
)
927 struct gaudi_hw_sob_group
*hw_sob_group
=
928 container_of(ref
, struct gaudi_hw_sob_group
, kref
);
929 struct hl_device
*hdev
= hw_sob_group
->hdev
;
932 "SOB release shouldn't be called here, base_sob_id: %d\n",
933 hw_sob_group
->base_sob_id
);
936 static int gaudi_collective_init(struct hl_device
*hdev
)
938 u32 i
, master_monitor_sobs
, sob_id
, reserved_sobs_per_group
;
939 struct gaudi_collective_properties
*prop
;
940 struct gaudi_device
*gaudi
;
942 gaudi
= hdev
->asic_specific
;
943 prop
= &gaudi
->collective_props
;
944 sob_id
= hdev
->asic_prop
.collective_first_sob
;
946 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
947 reserved_sobs_per_group
=
948 ALIGN(NUMBER_OF_SOBS_IN_GRP
, HL_MAX_SOBS_PER_MONITOR
);
950 /* Init SOB groups */
951 for (i
= 0 ; i
< NUM_SOB_GROUPS
; i
++) {
952 prop
->hw_sob_group
[i
].hdev
= hdev
;
953 prop
->hw_sob_group
[i
].base_sob_id
= sob_id
;
954 sob_id
+= reserved_sobs_per_group
;
955 gaudi_sob_group_hw_reset(&prop
->hw_sob_group
[i
].kref
);
958 for (i
= 0 ; i
< QMAN_STREAMS
; i
++) {
959 prop
->next_sob_group_val
[i
] = 1;
960 prop
->curr_sob_group_idx
[i
] = 0;
961 gaudi_collective_map_sobs(hdev
, i
);
964 prop
->mstr_sob_mask
[0] = 0;
965 master_monitor_sobs
= HL_MAX_SOBS_PER_MONITOR
;
966 for (i
= 0 ; i
< master_monitor_sobs
; i
++)
967 if (gaudi
->hw_cap_initialized
& BIT(HW_CAP_NIC_SHIFT
+ i
))
968 prop
->mstr_sob_mask
[0] |= BIT(i
);
970 prop
->mstr_sob_mask
[1] = 0;
971 master_monitor_sobs
=
972 NIC_NUMBER_OF_ENGINES
- HL_MAX_SOBS_PER_MONITOR
;
973 for (i
= 0 ; i
< master_monitor_sobs
; i
++) {
974 if (gaudi
->hw_cap_initialized
& BIT(HW_CAP_NIC_SHIFT
+ i
))
975 prop
->mstr_sob_mask
[1] |= BIT(i
);
978 /* Set collective engine bit */
979 prop
->mstr_sob_mask
[1] |= BIT(i
);
984 static void gaudi_reset_sob_group(struct hl_device
*hdev
, u16 sob_group
)
986 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
987 struct gaudi_collective_properties
*cprop
= &gaudi
->collective_props
;
989 kref_put(&cprop
->hw_sob_group
[sob_group
].kref
,
990 gaudi_sob_group_hw_reset
);
993 static void gaudi_collective_master_init_job(struct hl_device
*hdev
,
994 struct hl_cs_job
*job
, u32 stream
, u32 sob_group_offset
)
996 u32 master_sob_base
, master_monitor
, queue_id
, cb_size
= 0;
997 struct gaudi_collective_properties
*cprop
;
998 struct hl_gen_wait_properties wait_prop
;
999 struct hl_sync_stream_properties
*prop
;
1000 struct gaudi_device
*gaudi
;
1002 gaudi
= hdev
->asic_specific
;
1003 cprop
= &gaudi
->collective_props
;
1004 queue_id
= job
->hw_queue_id
;
1005 prop
= &hdev
->kernel_queues
[queue_id
].sync_stream_prop
;
1008 cprop
->hw_sob_group
[sob_group_offset
].base_sob_id
;
1009 master_monitor
= prop
->collective_mstr_mon_id
[0];
1012 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1013 master_sob_base
, cprop
->mstr_sob_mask
[0],
1014 cprop
->next_sob_group_val
[stream
],
1015 master_monitor
, queue_id
);
1017 wait_prop
.data
= (void *) job
->patched_cb
;
1018 wait_prop
.sob_base
= master_sob_base
;
1019 wait_prop
.sob_mask
= cprop
->mstr_sob_mask
[0];
1020 wait_prop
.sob_val
= cprop
->next_sob_group_val
[stream
];
1021 wait_prop
.mon_id
= master_monitor
;
1022 wait_prop
.q_idx
= queue_id
;
1023 wait_prop
.size
= cb_size
;
1024 cb_size
+= gaudi_gen_wait_cb(hdev
, &wait_prop
);
1026 master_sob_base
+= HL_MAX_SOBS_PER_MONITOR
;
1027 master_monitor
= prop
->collective_mstr_mon_id
[1];
1030 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1031 master_sob_base
, cprop
->mstr_sob_mask
[1],
1032 cprop
->next_sob_group_val
[stream
],
1033 master_monitor
, queue_id
);
1035 wait_prop
.sob_base
= master_sob_base
;
1036 wait_prop
.sob_mask
= cprop
->mstr_sob_mask
[1];
1037 wait_prop
.mon_id
= master_monitor
;
1038 wait_prop
.size
= cb_size
;
1039 cb_size
+= gaudi_gen_wait_cb(hdev
, &wait_prop
);
1042 static void gaudi_collective_slave_init_job(struct hl_device
*hdev
,
1043 struct hl_cs_job
*job
, struct hl_cs_compl
*cs_cmpl
)
1045 struct hl_gen_wait_properties wait_prop
;
1046 struct hl_sync_stream_properties
*prop
;
1047 u32 queue_id
, cb_size
= 0;
1049 queue_id
= job
->hw_queue_id
;
1050 prop
= &hdev
->kernel_queues
[queue_id
].sync_stream_prop
;
1052 /* Add to wait CBs using slave monitor */
1053 wait_prop
.data
= (void *) job
->user_cb
;
1054 wait_prop
.sob_base
= cs_cmpl
->hw_sob
->sob_id
;
1055 wait_prop
.sob_mask
= 0x1;
1056 wait_prop
.sob_val
= cs_cmpl
->sob_val
;
1057 wait_prop
.mon_id
= prop
->collective_slave_mon_id
;
1058 wait_prop
.q_idx
= queue_id
;
1059 wait_prop
.size
= cb_size
;
1062 "Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
1063 cs_cmpl
->hw_sob
->sob_id
, cs_cmpl
->sob_val
,
1064 prop
->collective_slave_mon_id
, queue_id
);
1066 cb_size
+= gaudi_gen_wait_cb(hdev
, &wait_prop
);
1069 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1070 prop
->collective_sob_id
, queue_id
);
1072 cb_size
+= gaudi_gen_signal_cb(hdev
, job
->user_cb
,
1073 prop
->collective_sob_id
, cb_size
, false);
1076 static void gaudi_collective_wait_init_cs(struct hl_cs
*cs
)
1078 struct hl_cs_compl
*signal_cs_cmpl
=
1079 container_of(cs
->signal_fence
, struct hl_cs_compl
, base_fence
);
1080 struct hl_cs_compl
*cs_cmpl
=
1081 container_of(cs
->fence
, struct hl_cs_compl
, base_fence
);
1082 struct gaudi_collective_properties
*cprop
;
1083 u32 stream
, queue_id
, sob_group_offset
;
1084 struct gaudi_device
*gaudi
;
1085 struct hl_device
*hdev
;
1086 struct hl_cs_job
*job
;
1091 gaudi
= hdev
->asic_specific
;
1092 cprop
= &gaudi
->collective_props
;
1094 /* copy the SOB id and value of the signal CS */
1095 cs_cmpl
->hw_sob
= signal_cs_cmpl
->hw_sob
;
1096 cs_cmpl
->sob_val
= signal_cs_cmpl
->sob_val
;
1098 /* Calculate the stream from collective master queue (1st job) */
1099 job
= list_first_entry(&cs
->job_list
, struct hl_cs_job
, cs_node
);
1100 stream
= job
->hw_queue_id
% 4;
1102 stream
* HL_RSVD_SOBS
+ cprop
->curr_sob_group_idx
[stream
];
1104 list_for_each_entry(job
, &cs
->job_list
, cs_node
) {
1105 queue_id
= job
->hw_queue_id
;
1107 if (hdev
->kernel_queues
[queue_id
].collective_mode
==
1108 HL_COLLECTIVE_MASTER
)
1109 gaudi_collective_master_init_job(hdev
, job
, stream
,
1112 gaudi_collective_slave_init_job(hdev
, job
, cs_cmpl
);
1115 cs_cmpl
->sob_group
= sob_group_offset
;
1117 /* Handle sob group kref and wraparound */
1118 kref_get(&cprop
->hw_sob_group
[sob_group_offset
].kref
);
1119 cprop
->next_sob_group_val
[stream
]++;
1121 if (cprop
->next_sob_group_val
[stream
] == HL_MAX_SOB_VAL
) {
1123 * Decrement as we reached the max value.
1124 * The release function won't be called here as we've
1125 * just incremented the refcount.
1127 kref_put(&cprop
->hw_sob_group
[sob_group_offset
].kref
,
1128 gaudi_sob_group_reset_error
);
1129 cprop
->next_sob_group_val
[stream
] = 1;
1130 /* only two SOBs are currently in use */
1131 cprop
->curr_sob_group_idx
[stream
] =
1132 (cprop
->curr_sob_group_idx
[stream
] + 1) &
1135 gaudi_collective_map_sobs(hdev
, stream
);
1137 dev_dbg(hdev
->dev
, "switched to SOB group %d, stream: %d\n",
1138 cprop
->curr_sob_group_idx
[stream
], stream
);
1141 /* Increment kref since all slave queues are now waiting on it */
1142 kref_get(&cs_cmpl
->hw_sob
->kref
);
1144 * Must put the signal fence after the SOB refcnt increment so
1145 * the SOB refcnt won't turn 0 and reset the SOB before the
1146 * wait CS was submitted.
1149 hl_fence_put(cs
->signal_fence
);
1150 cs
->signal_fence
= NULL
;
1153 static int gaudi_collective_wait_create_job(struct hl_device
*hdev
,
1154 struct hl_ctx
*ctx
, struct hl_cs
*cs
,
1155 enum hl_collective_mode mode
, u32 queue_id
, u32 wait_queue_id
)
1157 struct hw_queue_properties
*hw_queue_prop
;
1158 struct hl_cs_counters_atomic
*cntr
;
1159 struct hl_cs_job
*job
;
1164 cntr
= &hdev
->aggregated_cs_counters
;
1166 if (mode
== HL_COLLECTIVE_MASTER
) {
1167 /* CB size of collective master queue contains
1168 * 4 msg short packets for monitor 1 configuration
1170 * 4 msg short packets for monitor 2 configuration
1172 * 2 msg prot packets for completion and MSI-X
1174 cb_size
= sizeof(struct packet_msg_short
) * 8 +
1175 sizeof(struct packet_fence
) * 2 +
1176 sizeof(struct packet_msg_prot
) * 2;
1179 /* CB size of collective slave queues contains
1180 * 4 msg short packets for monitor configuration
1182 * 1 additional msg short packet for sob signal
1184 cb_size
= sizeof(struct packet_msg_short
) * 5 +
1185 sizeof(struct packet_fence
);
1189 hw_queue_prop
= &hdev
->asic_prop
.hw_queues_props
[queue_id
];
1190 job
= hl_cs_allocate_job(hdev
, hw_queue_prop
->type
, true);
1192 atomic64_inc(&ctx
->cs_counters
.out_of_mem_drop_cnt
);
1193 atomic64_inc(&cntr
->out_of_mem_drop_cnt
);
1194 dev_err(hdev
->dev
, "Failed to allocate a new job\n");
1198 /* Allocate internal mapped CB for non patched CBs */
1199 cb
= hl_cb_kernel_create(hdev
, cb_size
,
1200 hdev
->mmu_enable
&& !patched_cb
);
1202 atomic64_inc(&ctx
->cs_counters
.out_of_mem_drop_cnt
);
1203 atomic64_inc(&cntr
->out_of_mem_drop_cnt
);
1211 atomic_inc(&job
->user_cb
->cs_cnt
);
1212 job
->user_cb_size
= cb_size
;
1213 job
->hw_queue_id
= queue_id
;
1216 * No need in parsing, user CB is the patched CB.
1217 * We call hl_cb_destroy() out of two reasons - we don't need
1218 * the CB in the CB idr anymore and to decrement its refcount as
1219 * it was incremented inside hl_cb_kernel_create().
1222 job
->patched_cb
= job
->user_cb
;
1224 job
->patched_cb
= NULL
;
1226 job
->job_cb_size
= job
->user_cb_size
;
1227 hl_cb_destroy(hdev
, &hdev
->kernel_cb_mgr
, cb
->id
<< PAGE_SHIFT
);
1229 /* increment refcount as for external queues we get completion */
1230 if (hw_queue_prop
->type
== QUEUE_TYPE_EXT
)
1233 cs
->jobs_in_queue_cnt
[job
->hw_queue_id
]++;
1235 list_add_tail(&job
->cs_node
, &cs
->job_list
);
1237 hl_debugfs_add_job(hdev
, job
);
1242 static int gaudi_collective_wait_create_jobs(struct hl_device
*hdev
,
1243 struct hl_ctx
*ctx
, struct hl_cs
*cs
, u32 wait_queue_id
,
1244 u32 collective_engine_id
)
1246 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
1247 struct hw_queue_properties
*hw_queue_prop
;
1248 u32 queue_id
, collective_queue
, num_jobs
;
1249 u32 stream
, nic_queue
, nic_idx
= 0;
1253 /* Verify wait queue id is configured as master */
1254 hw_queue_prop
= &hdev
->asic_prop
.hw_queues_props
[wait_queue_id
];
1255 if (!(hw_queue_prop
->collective_mode
== HL_COLLECTIVE_MASTER
)) {
1257 "Queue %d is not configured as collective master\n",
1262 /* Verify engine id is supported */
1263 if (collective_engine_id
!= GAUDI_ENGINE_ID_DMA_5
&&
1264 collective_engine_id
!= GAUDI_ENGINE_ID_TPC_7
) {
1266 "Collective wait does not support engine %u\n",
1267 collective_engine_id
);
1271 stream
= wait_queue_id
% 4;
1273 if (collective_engine_id
== GAUDI_ENGINE_ID_DMA_5
)
1274 collective_queue
= GAUDI_QUEUE_ID_DMA_5_0
+ stream
;
1276 collective_queue
= GAUDI_QUEUE_ID_TPC_7_0
+ stream
;
1278 num_jobs
= NUMBER_OF_SOBS_IN_GRP
+ 1;
1279 nic_queue
= GAUDI_QUEUE_ID_NIC_0_0
+ stream
;
1281 /* First job goes to the collective master queue, it will wait for
1282 * the collective slave queues to finish execution.
1283 * The synchronization is done using two monitors:
1284 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1285 * reduction engine (DMA5/TPC7).
1287 * Rest of the jobs goes to the collective slave queues which will
1288 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1290 for (i
= 0 ; i
< num_jobs
; i
++) {
1292 queue_id
= wait_queue_id
;
1293 rc
= gaudi_collective_wait_create_job(hdev
, ctx
, cs
,
1294 HL_COLLECTIVE_MASTER
, queue_id
, wait_queue_id
);
1296 if (nic_idx
< NIC_NUMBER_OF_ENGINES
) {
1297 if (gaudi
->hw_cap_initialized
&
1298 BIT(HW_CAP_NIC_SHIFT
+ nic_idx
))
1303 queue_id
= nic_queue
;
1310 queue_id
= collective_queue
;
1313 rc
= gaudi_collective_wait_create_job(hdev
, ctx
, cs
,
1314 HL_COLLECTIVE_SLAVE
, queue_id
, wait_queue_id
);
1324 static int gaudi_late_init(struct hl_device
*hdev
)
1326 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
1329 rc
= gaudi
->cpucp_info_get(hdev
);
1331 dev_err(hdev
->dev
, "Failed to get cpucp info\n");
1335 if ((hdev
->card_type
== cpucp_card_type_pci
) &&
1336 (hdev
->nic_ports_mask
& 0x3)) {
1338 "PCI card detected, only 8 ports are enabled\n");
1339 hdev
->nic_ports_mask
&= ~0x3;
1341 /* Stop and disable unused NIC QMANs */
1342 WREG32(mmNIC0_QM0_GLBL_CFG1
, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK
|
1343 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK
|
1344 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK
);
1346 WREG32(mmNIC0_QM1_GLBL_CFG1
, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK
|
1347 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK
|
1348 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK
);
1350 WREG32(mmNIC0_QM0_GLBL_CFG0
, 0);
1351 WREG32(mmNIC0_QM1_GLBL_CFG0
, 0);
1353 gaudi
->hw_cap_initialized
&= ~(HW_CAP_NIC0
| HW_CAP_NIC1
);
1356 rc
= hl_fw_send_pci_access_msg(hdev
, CPUCP_PACKET_ENABLE_PCI_ACCESS
);
1358 dev_err(hdev
->dev
, "Failed to enable PCI access from CPU\n");
1362 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
, GAUDI_EVENT_INTS_REGISTER
);
1364 rc
= gaudi_fetch_psoc_frequency(hdev
);
1366 dev_err(hdev
->dev
, "Failed to fetch psoc frequency\n");
1367 goto disable_pci_access
;
1370 rc
= gaudi_mmu_clear_pgt_range(hdev
);
1372 dev_err(hdev
->dev
, "Failed to clear MMU page tables range\n");
1373 goto disable_pci_access
;
1376 rc
= gaudi_init_tpc_mem(hdev
);
1378 dev_err(hdev
->dev
, "Failed to initialize TPC memories\n");
1379 goto disable_pci_access
;
1382 rc
= gaudi_collective_init(hdev
);
1384 dev_err(hdev
->dev
, "Failed to init collective\n");
1385 goto disable_pci_access
;
1391 hl_fw_send_pci_access_msg(hdev
, CPUCP_PACKET_DISABLE_PCI_ACCESS
);
1396 static void gaudi_late_fini(struct hl_device
*hdev
)
1398 const struct hwmon_channel_info
**channel_info_arr
;
1401 if (!hdev
->hl_chip_info
->info
)
1404 channel_info_arr
= hdev
->hl_chip_info
->info
;
1406 while (channel_info_arr
[i
]) {
1407 kfree(channel_info_arr
[i
]->config
);
1408 kfree(channel_info_arr
[i
]);
1412 kfree(channel_info_arr
);
1414 hdev
->hl_chip_info
->info
= NULL
;
1417 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device
*hdev
)
1419 dma_addr_t dma_addr_arr
[GAUDI_ALLOC_CPU_MEM_RETRY_CNT
] = {}, end_addr
;
1420 void *virt_addr_arr
[GAUDI_ALLOC_CPU_MEM_RETRY_CNT
] = {};
1424 * The device CPU works with 40-bits addresses, while bit 39 must be set
1425 * to '1' when accessing the host.
1426 * Bits 49:39 of the full host address are saved for a later
1427 * configuration of the HW to perform extension to 50 bits.
1428 * Because there is a single HW register that holds the extension bits,
1429 * these bits must be identical in all allocated range.
1432 for (i
= 0 ; i
< GAUDI_ALLOC_CPU_MEM_RETRY_CNT
; i
++) {
1434 hdev
->asic_funcs
->asic_dma_alloc_coherent(hdev
,
1435 HL_CPU_ACCESSIBLE_MEM_SIZE
,
1437 GFP_KERNEL
| __GFP_ZERO
);
1438 if (!virt_addr_arr
[i
]) {
1440 goto free_dma_mem_arr
;
1443 end_addr
= dma_addr_arr
[i
] + HL_CPU_ACCESSIBLE_MEM_SIZE
- 1;
1444 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr
[i
]) ==
1445 GAUDI_CPU_PCI_MSB_ADDR(end_addr
))
1449 if (i
== GAUDI_ALLOC_CPU_MEM_RETRY_CNT
) {
1451 "MSB of CPU accessible DMA memory are not identical in all range\n");
1453 goto free_dma_mem_arr
;
1456 hdev
->cpu_accessible_dma_mem
= virt_addr_arr
[i
];
1457 hdev
->cpu_accessible_dma_address
= dma_addr_arr
[i
];
1458 hdev
->cpu_pci_msb_addr
=
1459 GAUDI_CPU_PCI_MSB_ADDR(hdev
->cpu_accessible_dma_address
);
1461 if (hdev
->asic_prop
.fw_security_disabled
)
1462 GAUDI_PCI_TO_CPU_ADDR(hdev
->cpu_accessible_dma_address
);
1465 for (j
= 0 ; j
< i
; j
++)
1466 hdev
->asic_funcs
->asic_dma_free_coherent(hdev
,
1467 HL_CPU_ACCESSIBLE_MEM_SIZE
,
1474 static void gaudi_free_internal_qmans_pq_mem(struct hl_device
*hdev
)
1476 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
1477 struct gaudi_internal_qman_info
*q
;
1480 for (i
= 0 ; i
< GAUDI_QUEUE_ID_SIZE
; i
++) {
1481 q
= &gaudi
->internal_qmans
[i
];
1482 if (!q
->pq_kernel_addr
)
1484 hdev
->asic_funcs
->asic_dma_free_coherent(hdev
, q
->pq_size
,
1490 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device
*hdev
)
1492 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
1493 struct gaudi_internal_qman_info
*q
;
1496 for (i
= 0 ; i
< GAUDI_QUEUE_ID_SIZE
; i
++) {
1497 if (gaudi_queue_type
[i
] != QUEUE_TYPE_INT
)
1500 q
= &gaudi
->internal_qmans
[i
];
1503 case GAUDI_QUEUE_ID_DMA_2_0
... GAUDI_QUEUE_ID_DMA_7_3
:
1504 q
->pq_size
= HBM_DMA_QMAN_SIZE_IN_BYTES
;
1506 case GAUDI_QUEUE_ID_MME_0_0
... GAUDI_QUEUE_ID_MME_1_3
:
1507 q
->pq_size
= MME_QMAN_SIZE_IN_BYTES
;
1509 case GAUDI_QUEUE_ID_TPC_0_0
... GAUDI_QUEUE_ID_TPC_7_3
:
1510 q
->pq_size
= TPC_QMAN_SIZE_IN_BYTES
;
1512 case GAUDI_QUEUE_ID_NIC_0_0
... GAUDI_QUEUE_ID_NIC_9_3
:
1513 q
->pq_size
= NIC_QMAN_SIZE_IN_BYTES
;
1516 dev_err(hdev
->dev
, "Bad internal queue index %d", i
);
1518 goto free_internal_qmans_pq_mem
;
1521 q
->pq_kernel_addr
= hdev
->asic_funcs
->asic_dma_alloc_coherent(
1524 GFP_KERNEL
| __GFP_ZERO
);
1525 if (!q
->pq_kernel_addr
) {
1527 goto free_internal_qmans_pq_mem
;
1533 free_internal_qmans_pq_mem
:
1534 gaudi_free_internal_qmans_pq_mem(hdev
);
1538 static int gaudi_sw_init(struct hl_device
*hdev
)
1540 struct gaudi_device
*gaudi
;
1541 u32 i
, event_id
= 0;
1544 /* Allocate device structure */
1545 gaudi
= kzalloc(sizeof(*gaudi
), GFP_KERNEL
);
1549 for (i
= 0 ; i
< ARRAY_SIZE(gaudi_irq_map_table
) ; i
++) {
1550 if (gaudi_irq_map_table
[i
].valid
) {
1551 if (event_id
== GAUDI_EVENT_SIZE
) {
1553 "Event array exceeds the limit of %u events\n",
1556 goto free_gaudi_device
;
1559 gaudi
->events
[event_id
++] =
1560 gaudi_irq_map_table
[i
].fc_id
;
1564 gaudi
->cpucp_info_get
= gaudi_cpucp_info_get
;
1566 gaudi
->max_freq_value
= GAUDI_MAX_CLK_FREQ
;
1568 hdev
->asic_specific
= gaudi
;
1570 /* Create DMA pool for small allocations */
1571 hdev
->dma_pool
= dma_pool_create(dev_name(hdev
->dev
),
1572 &hdev
->pdev
->dev
, GAUDI_DMA_POOL_BLK_SIZE
, 8, 0);
1573 if (!hdev
->dma_pool
) {
1574 dev_err(hdev
->dev
, "failed to create DMA pool\n");
1576 goto free_gaudi_device
;
1579 rc
= gaudi_alloc_cpu_accessible_dma_mem(hdev
);
1583 hdev
->cpu_accessible_dma_pool
= gen_pool_create(ilog2(32), -1);
1584 if (!hdev
->cpu_accessible_dma_pool
) {
1586 "Failed to create CPU accessible DMA pool\n");
1588 goto free_cpu_dma_mem
;
1591 rc
= gen_pool_add(hdev
->cpu_accessible_dma_pool
,
1592 (uintptr_t) hdev
->cpu_accessible_dma_mem
,
1593 HL_CPU_ACCESSIBLE_MEM_SIZE
, -1);
1596 "Failed to add memory to CPU accessible DMA pool\n");
1598 goto free_cpu_accessible_dma_pool
;
1601 rc
= gaudi_alloc_internal_qmans_pq_mem(hdev
);
1603 goto free_cpu_accessible_dma_pool
;
1605 spin_lock_init(&gaudi
->hw_queues_lock
);
1606 mutex_init(&gaudi
->clk_gate_mutex
);
1608 hdev
->supports_sync_stream
= true;
1609 hdev
->supports_coresight
= true;
1613 free_cpu_accessible_dma_pool
:
1614 gen_pool_destroy(hdev
->cpu_accessible_dma_pool
);
1616 if (hdev
->asic_prop
.fw_security_disabled
)
1617 GAUDI_CPU_TO_PCI_ADDR(hdev
->cpu_accessible_dma_address
,
1618 hdev
->cpu_pci_msb_addr
);
1619 hdev
->asic_funcs
->asic_dma_free_coherent(hdev
,
1620 HL_CPU_ACCESSIBLE_MEM_SIZE
,
1621 hdev
->cpu_accessible_dma_mem
,
1622 hdev
->cpu_accessible_dma_address
);
1624 dma_pool_destroy(hdev
->dma_pool
);
1630 static int gaudi_sw_fini(struct hl_device
*hdev
)
1632 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
1634 gaudi_free_internal_qmans_pq_mem(hdev
);
1636 gen_pool_destroy(hdev
->cpu_accessible_dma_pool
);
1638 if (hdev
->asic_prop
.fw_security_disabled
)
1639 GAUDI_CPU_TO_PCI_ADDR(hdev
->cpu_accessible_dma_address
,
1640 hdev
->cpu_pci_msb_addr
);
1642 hdev
->asic_funcs
->asic_dma_free_coherent(hdev
,
1643 HL_CPU_ACCESSIBLE_MEM_SIZE
,
1644 hdev
->cpu_accessible_dma_mem
,
1645 hdev
->cpu_accessible_dma_address
);
1647 dma_pool_destroy(hdev
->dma_pool
);
1649 mutex_destroy(&gaudi
->clk_gate_mutex
);
1656 static irqreturn_t
gaudi_irq_handler_single(int irq
, void *arg
)
1658 struct hl_device
*hdev
= arg
;
1664 for (i
= 0 ; i
< hdev
->asic_prop
.completion_queues_count
; i
++)
1665 hl_irq_handler_cq(irq
, &hdev
->completion_queue
[i
]);
1667 hl_irq_handler_eq(irq
, &hdev
->event_queue
);
1673 * For backward compatibility, new MSI interrupts should be set after the
1674 * existing CPU and NIC interrupts.
1676 static int gaudi_pci_irq_vector(struct hl_device
*hdev
, unsigned int nr
,
1681 if ((nr
!= GAUDI_EVENT_QUEUE_MSI_IDX
) && (cpu_eq
))
1682 dev_crit(hdev
->dev
, "CPU EQ must use IRQ %d\n",
1683 GAUDI_EVENT_QUEUE_MSI_IDX
);
1685 msi_vec
= ((nr
< GAUDI_EVENT_QUEUE_MSI_IDX
) || (cpu_eq
)) ? nr
:
1686 (nr
+ NIC_NUMBER_OF_ENGINES
+ 1);
1688 return pci_irq_vector(hdev
->pdev
, msi_vec
);
1691 static int gaudi_enable_msi_single(struct hl_device
*hdev
)
1695 dev_dbg(hdev
->dev
, "Working in single MSI IRQ mode\n");
1697 irq
= gaudi_pci_irq_vector(hdev
, 0, false);
1698 rc
= request_irq(irq
, gaudi_irq_handler_single
, 0,
1699 "gaudi single msi", hdev
);
1702 "Failed to request single MSI IRQ\n");
1707 static int gaudi_enable_msi_multi(struct hl_device
*hdev
)
1709 int cq_cnt
= hdev
->asic_prop
.completion_queues_count
;
1710 int rc
, i
, irq_cnt_init
, irq
;
1712 for (i
= 0, irq_cnt_init
= 0 ; i
< cq_cnt
; i
++, irq_cnt_init
++) {
1713 irq
= gaudi_pci_irq_vector(hdev
, i
, false);
1714 rc
= request_irq(irq
, hl_irq_handler_cq
, 0, gaudi_irq_name
[i
],
1715 &hdev
->completion_queue
[i
]);
1717 dev_err(hdev
->dev
, "Failed to request IRQ %d", irq
);
1722 irq
= gaudi_pci_irq_vector(hdev
, GAUDI_EVENT_QUEUE_MSI_IDX
, true);
1723 rc
= request_irq(irq
, hl_irq_handler_eq
, 0, gaudi_irq_name
[cq_cnt
],
1724 &hdev
->event_queue
);
1726 dev_err(hdev
->dev
, "Failed to request IRQ %d", irq
);
1733 for (i
= 0 ; i
< irq_cnt_init
; i
++)
1734 free_irq(gaudi_pci_irq_vector(hdev
, i
, false),
1735 &hdev
->completion_queue
[i
]);
1739 static int gaudi_enable_msi(struct hl_device
*hdev
)
1741 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
1744 if (gaudi
->hw_cap_initialized
& HW_CAP_MSI
)
1747 rc
= pci_alloc_irq_vectors(hdev
->pdev
, 1, GAUDI_MSI_ENTRIES
,
1750 dev_err(hdev
->dev
, "MSI: Failed to enable support %d\n", rc
);
1754 if (rc
< NUMBER_OF_INTERRUPTS
) {
1755 gaudi
->multi_msi_mode
= false;
1756 rc
= gaudi_enable_msi_single(hdev
);
1758 gaudi
->multi_msi_mode
= true;
1759 rc
= gaudi_enable_msi_multi(hdev
);
1763 goto free_pci_irq_vectors
;
1765 gaudi
->hw_cap_initialized
|= HW_CAP_MSI
;
1769 free_pci_irq_vectors
:
1770 pci_free_irq_vectors(hdev
->pdev
);
1774 static void gaudi_sync_irqs(struct hl_device
*hdev
)
1776 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
1777 int i
, cq_cnt
= hdev
->asic_prop
.completion_queues_count
;
1779 if (!(gaudi
->hw_cap_initialized
& HW_CAP_MSI
))
1782 /* Wait for all pending IRQs to be finished */
1783 if (gaudi
->multi_msi_mode
) {
1784 for (i
= 0 ; i
< cq_cnt
; i
++)
1785 synchronize_irq(gaudi_pci_irq_vector(hdev
, i
, false));
1787 synchronize_irq(gaudi_pci_irq_vector(hdev
,
1788 GAUDI_EVENT_QUEUE_MSI_IDX
,
1791 synchronize_irq(gaudi_pci_irq_vector(hdev
, 0, false));
1795 static void gaudi_disable_msi(struct hl_device
*hdev
)
1797 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
1798 int i
, irq
, cq_cnt
= hdev
->asic_prop
.completion_queues_count
;
1800 if (!(gaudi
->hw_cap_initialized
& HW_CAP_MSI
))
1803 gaudi_sync_irqs(hdev
);
1805 if (gaudi
->multi_msi_mode
) {
1806 irq
= gaudi_pci_irq_vector(hdev
, GAUDI_EVENT_QUEUE_MSI_IDX
,
1808 free_irq(irq
, &hdev
->event_queue
);
1810 for (i
= 0 ; i
< cq_cnt
; i
++) {
1811 irq
= gaudi_pci_irq_vector(hdev
, i
, false);
1812 free_irq(irq
, &hdev
->completion_queue
[i
]);
1815 free_irq(gaudi_pci_irq_vector(hdev
, 0, false), hdev
);
1818 pci_free_irq_vectors(hdev
->pdev
);
1820 gaudi
->hw_cap_initialized
&= ~HW_CAP_MSI
;
1823 static void gaudi_init_scrambler_sram(struct hl_device
*hdev
)
1825 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
1827 if (!hdev
->asic_prop
.fw_security_disabled
)
1830 if (hdev
->asic_prop
.fw_security_status_valid
&&
1831 (hdev
->asic_prop
.fw_app_security_map
&
1832 CPU_BOOT_DEV_STS0_SRAM_SCR_EN
))
1835 if (gaudi
->hw_cap_initialized
& HW_CAP_SRAM_SCRAMBLER
)
1838 if (!hdev
->sram_scrambler_enable
)
1841 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN
,
1842 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
1843 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN
,
1844 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
1845 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN
,
1846 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
1847 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN
,
1848 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
1849 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN
,
1850 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
1851 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN
,
1852 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
1853 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN
,
1854 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
1855 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN
,
1856 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
1858 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN
,
1859 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
1860 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN
,
1861 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
1862 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN
,
1863 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
1864 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN
,
1865 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
1866 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN
,
1867 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
1868 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN
,
1869 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
1870 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN
,
1871 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
1872 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN
,
1873 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT
);
1875 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN
,
1876 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT
);
1877 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN
,
1878 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT
);
1879 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN
,
1880 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT
);
1881 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN
,
1882 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT
);
1883 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN
,
1884 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT
);
1885 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN
,
1886 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT
);
1887 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN
,
1888 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT
);
1889 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN
,
1890 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT
);
1892 gaudi
->hw_cap_initialized
|= HW_CAP_SRAM_SCRAMBLER
;
1895 static void gaudi_init_scrambler_hbm(struct hl_device
*hdev
)
1897 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
1899 if (!hdev
->asic_prop
.fw_security_disabled
)
1902 if (hdev
->asic_prop
.fw_security_status_valid
&&
1903 (hdev
->asic_prop
.fw_boot_cpu_security_map
&
1904 CPU_BOOT_DEV_STS0_DRAM_SCR_EN
))
1907 if (gaudi
->hw_cap_initialized
& HW_CAP_HBM_SCRAMBLER
)
1910 if (!hdev
->dram_scrambler_enable
)
1913 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN
,
1914 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
1915 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN
,
1916 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
1917 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN
,
1918 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
1919 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN
,
1920 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
1921 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN
,
1922 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
1923 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN
,
1924 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
1925 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN
,
1926 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
1927 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN
,
1928 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
1930 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN
,
1931 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
1932 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN
,
1933 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
1934 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN
,
1935 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
1936 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN
,
1937 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
1938 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN
,
1939 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
1940 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN
,
1941 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
1942 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN
,
1943 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
1944 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN
,
1945 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT
);
1947 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN
,
1948 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT
);
1949 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN
,
1950 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT
);
1951 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN
,
1952 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT
);
1953 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN
,
1954 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT
);
1955 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN
,
1956 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT
);
1957 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN
,
1958 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT
);
1959 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN
,
1960 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT
);
1961 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN
,
1962 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT
);
1964 gaudi
->hw_cap_initialized
|= HW_CAP_HBM_SCRAMBLER
;
1967 static void gaudi_init_e2e(struct hl_device
*hdev
)
1969 if (!hdev
->asic_prop
.fw_security_disabled
)
1972 if (hdev
->asic_prop
.fw_security_status_valid
&&
1973 (hdev
->asic_prop
.fw_boot_cpu_security_map
&
1974 CPU_BOOT_DEV_STS0_E2E_CRED_EN
))
1977 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE
, 247 >> 3);
1978 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE
, 785 >> 3);
1979 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE
, 49);
1980 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE
, 101);
1982 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE
, 275 >> 3);
1983 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE
, 614 >> 3);
1984 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE
, 1);
1985 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE
, 39);
1987 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE
, 1);
1988 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE
, 1);
1989 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE
, 1);
1990 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE
, 32);
1992 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE
, 176 >> 3);
1993 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE
, 32 >> 3);
1994 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE
, 19);
1995 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE
, 32);
1997 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE
, 176 >> 3);
1998 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE
, 32 >> 3);
1999 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE
, 19);
2000 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE
, 32);
2002 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE
, 1);
2003 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE
, 1);
2004 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE
, 1);
2005 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE
, 32);
2007 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE
, 275 >> 3);
2008 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE
, 614 >> 3);
2009 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE
, 1);
2010 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE
, 39);
2012 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE
, 297 >> 3);
2013 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE
, 908 >> 3);
2014 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE
, 19);
2015 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE
, 19);
2017 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE
, 318 >> 3);
2018 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE
, 956 >> 3);
2019 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE
, 79);
2020 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE
, 163);
2022 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE
, 275 >> 3);
2023 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE
, 614 >> 3);
2024 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE
, 1);
2025 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE
, 39);
2027 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE
, 1);
2028 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE
, 1);
2029 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE
, 1);
2030 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE
, 32);
2032 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE
, 176 >> 3);
2033 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE
, 32 >> 3);
2034 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE
, 19);
2035 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE
, 32);
2037 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE
, 176 >> 3);
2038 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE
, 32 >> 3);
2039 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE
, 19);
2040 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE
, 32);
2042 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE
, 1);
2043 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE
, 1);
2044 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE
, 1);
2045 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE
, 32);
2047 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE
, 275 >> 3);
2048 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE
, 614 >> 3);
2049 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE
, 1);
2050 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE
, 39);
2052 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE
, 318 >> 3);
2053 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE
, 956 >> 3);
2054 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE
, 79);
2055 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE
, 79);
2057 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE
, 344 >> 3);
2058 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE
, 1000 >> 3);
2059 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE
, 162);
2060 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE
, 338);
2062 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE
, 344 >> 3);
2063 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE
, 1000 >> 3);
2064 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE
, 162);
2065 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE
, 338);
2067 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE
, 344 >> 3);
2068 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE
, 1000 >> 3);
2069 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE
, 162);
2070 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE
, 338);
2072 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE
, 344 >> 3);
2073 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE
, 1000 >> 3);
2074 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE
, 162);
2075 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE
, 338);
2077 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE
, 344 >> 3);
2078 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE
, 1000 >> 3);
2079 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE
, 162);
2080 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE
, 338);
2082 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE
, 344 >> 3);
2083 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE
, 1000 >> 3);
2084 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE
, 162);
2085 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE
, 338);
2087 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE
, 344 >> 3);
2088 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE
, 1000 >> 3);
2089 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE
, 162);
2090 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE
, 338);
2092 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE
, 344 >> 3);
2093 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE
, 1000 >> 3);
2094 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE
, 162);
2095 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE
, 338);
2097 if (!hdev
->dram_scrambler_enable
) {
2098 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0
, 0x21);
2099 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1
, 0x22);
2100 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18
, 0x1F);
2101 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3
, 0x20);
2103 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0
, 0x21);
2104 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1
, 0x22);
2105 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18
, 0x1F);
2106 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3
, 0x20);
2108 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0
, 0x21);
2109 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1
, 0x22);
2110 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18
, 0x1F);
2111 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3
, 0x20);
2113 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0
, 0x21);
2114 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1
, 0x22);
2115 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18
, 0x1F);
2116 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3
, 0x20);
2118 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0
, 0x21);
2119 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1
, 0x22);
2120 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18
, 0x1F);
2121 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3
, 0x20);
2123 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0
, 0x21);
2124 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1
, 0x22);
2125 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18
, 0x1F);
2126 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3
, 0x20);
2128 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0
, 0x21);
2129 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1
, 0x22);
2130 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18
, 0x1F);
2131 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3
, 0x20);
2133 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0
, 0x21);
2134 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1
, 0x22);
2135 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18
, 0x1F);
2136 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3
, 0x20);
2138 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0
, 0x21);
2139 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1
, 0x22);
2140 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18
, 0x1F);
2141 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3
, 0x20);
2143 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0
, 0x21);
2144 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1
, 0x22);
2145 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18
, 0x1F);
2146 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3
, 0x20);
2148 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0
, 0x21);
2149 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1
, 0x22);
2150 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18
, 0x1F);
2151 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3
, 0x20);
2153 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0
, 0x21);
2154 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1
, 0x22);
2155 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18
, 0x1F);
2156 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3
, 0x20);
2158 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0
, 0x21);
2159 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1
, 0x22);
2160 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18
, 0x1F);
2161 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3
, 0x20);
2163 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0
, 0x21);
2164 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1
, 0x22);
2165 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18
, 0x1F);
2166 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3
, 0x20);
2168 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0
, 0x21);
2169 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1
, 0x22);
2170 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18
, 0x1F);
2171 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3
, 0x20);
2173 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0
, 0x21);
2174 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1
, 0x22);
2175 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18
, 0x1F);
2176 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3
, 0x20);
2178 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0
, 0x21);
2179 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1
, 0x22);
2180 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18
, 0x1F);
2181 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3
, 0x20);
2183 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0
, 0x21);
2184 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1
, 0x22);
2185 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18
, 0x1F);
2186 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3
, 0x20);
2188 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0
, 0x21);
2189 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1
, 0x22);
2190 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18
, 0x1F);
2191 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3
, 0x20);
2193 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0
, 0x21);
2194 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1
, 0x22);
2195 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18
, 0x1F);
2196 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3
, 0x20);
2198 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0
, 0x21);
2199 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1
, 0x22);
2200 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18
, 0x1F);
2201 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3
, 0x20);
2203 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0
, 0x21);
2204 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1
, 0x22);
2205 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18
, 0x1F);
2206 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3
, 0x20);
2208 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0
, 0x21);
2209 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1
, 0x22);
2210 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18
, 0x1F);
2211 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3
, 0x20);
2213 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0
, 0x21);
2214 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1
, 0x22);
2215 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18
, 0x1F);
2216 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3
, 0x20);
2219 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN
,
2220 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2221 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN
,
2222 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2224 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN
,
2225 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2226 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN
,
2227 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2229 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN
,
2230 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2231 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN
,
2232 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2234 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN
,
2235 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2236 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN
,
2237 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2239 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN
,
2240 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2241 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN
,
2242 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2244 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN
,
2245 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2246 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN
,
2247 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2249 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN
,
2250 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2251 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN
,
2252 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2254 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN
,
2255 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2256 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN
,
2257 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2259 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN
,
2260 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2261 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN
,
2262 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2264 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN
,
2265 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2266 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN
,
2267 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2269 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN
,
2270 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2271 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN
,
2272 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2274 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN
,
2275 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2276 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN
,
2277 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2279 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN
,
2280 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2281 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN
,
2282 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2284 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN
,
2285 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2286 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN
,
2287 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2289 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN
,
2290 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2291 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN
,
2292 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2294 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN
,
2295 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT
);
2296 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN
,
2297 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT
);
2299 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN
,
2300 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT
);
2301 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN
,
2302 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT
);
2304 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN
,
2305 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT
);
2306 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN
,
2307 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT
);
2309 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN
,
2310 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT
);
2311 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN
,
2312 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT
);
2314 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN
,
2315 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT
);
2316 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN
,
2317 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT
);
2319 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN
,
2320 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT
);
2321 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN
,
2322 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT
);
2324 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN
,
2325 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT
);
2326 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN
,
2327 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT
);
2329 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN
,
2330 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT
);
2331 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN
,
2332 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT
);
2334 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN
,
2335 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT
);
2336 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN
,
2337 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT
);
2340 static void gaudi_init_hbm_cred(struct hl_device
*hdev
)
2342 uint32_t hbm0_wr
, hbm1_wr
, hbm0_rd
, hbm1_rd
;
2344 if (!hdev
->asic_prop
.fw_security_disabled
)
2347 if (hdev
->asic_prop
.fw_security_status_valid
&&
2348 (hdev
->asic_prop
.fw_boot_cpu_security_map
&
2349 CPU_BOOT_DEV_STS0_HBM_CRED_EN
))
2352 hbm0_wr
= 0x33333333;
2353 hbm0_rd
= 0x77777777;
2354 hbm1_wr
= 0x55555555;
2355 hbm1_rd
= 0xDDDDDDDD;
2357 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT
, hbm0_wr
);
2358 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT
, hbm1_wr
);
2359 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT
, hbm0_rd
);
2360 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT
, hbm1_rd
);
2362 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT
, hbm0_wr
);
2363 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT
, hbm1_wr
);
2364 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT
, hbm0_rd
);
2365 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT
, hbm1_rd
);
2367 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT
, hbm0_wr
);
2368 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT
, hbm1_wr
);
2369 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT
, hbm0_rd
);
2370 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT
, hbm1_rd
);
2372 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT
, hbm0_wr
);
2373 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT
, hbm1_wr
);
2374 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT
, hbm0_rd
);
2375 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT
, hbm1_rd
);
2377 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0
,
2378 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT
) |
2379 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT
));
2380 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0
,
2381 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT
) |
2382 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT
));
2383 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0
,
2384 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT
) |
2385 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT
));
2386 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0
,
2387 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT
) |
2388 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT
));
2390 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1
,
2391 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT
) |
2392 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT
));
2393 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1
,
2394 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT
) |
2395 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT
));
2396 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1
,
2397 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT
) |
2398 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT
));
2399 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1
,
2400 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT
) |
2401 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT
));
2404 static void gaudi_init_golden_registers(struct hl_device
*hdev
)
2409 gaudi_init_e2e(hdev
);
2410 gaudi_init_hbm_cred(hdev
);
2412 for (tpc_id
= 0, tpc_offset
= 0;
2413 tpc_id
< TPC_NUMBER_OF_ENGINES
;
2414 tpc_id
++, tpc_offset
+= TPC_CFG_OFFSET
) {
2415 /* Mask all arithmetic interrupts from TPC */
2416 WREG32(mmTPC0_CFG_TPC_INTR_MASK
+ tpc_offset
, 0x8FFF);
2417 /* Set 16 cache lines */
2418 WREG32_FIELD(TPC0_CFG_MSS_CONFIG
, tpc_offset
,
2419 ICACHE_FETCH_LINE_NUM
, 2);
2422 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2423 for (i
= 0 ; i
< 128 ; i
+= 8)
2424 writeq(0, hdev
->pcie_bar
[SRAM_BAR_ID
] + i
);
2426 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD
, 3);
2427 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD
, 3);
2428 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD
, 3);
2429 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD
, 3);
2432 static void gaudi_init_pci_dma_qman(struct hl_device
*hdev
, int dma_id
,
2433 int qman_id
, dma_addr_t qman_pq_addr
)
2435 u32 mtr_base_en_lo
, mtr_base_en_hi
, mtr_base_ws_lo
, mtr_base_ws_hi
;
2436 u32 so_base_en_lo
, so_base_en_hi
, so_base_ws_lo
, so_base_ws_hi
;
2437 u32 q_off
, dma_qm_offset
;
2440 dma_qm_offset
= dma_id
* DMA_QMAN_OFFSET
;
2442 mtr_base_en_lo
= lower_32_bits(CFG_BASE
+
2443 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
2444 mtr_base_en_hi
= upper_32_bits(CFG_BASE
+
2445 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
2446 so_base_en_lo
= lower_32_bits(CFG_BASE
+
2447 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
);
2448 so_base_en_hi
= upper_32_bits(CFG_BASE
+
2449 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
);
2450 mtr_base_ws_lo
= lower_32_bits(CFG_BASE
+
2451 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
2452 mtr_base_ws_hi
= upper_32_bits(CFG_BASE
+
2453 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
2454 so_base_ws_lo
= lower_32_bits(CFG_BASE
+
2455 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0
);
2456 so_base_ws_hi
= upper_32_bits(CFG_BASE
+
2457 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0
);
2459 q_off
= dma_qm_offset
+ qman_id
* 4;
2461 WREG32(mmDMA0_QM_PQ_BASE_LO_0
+ q_off
, lower_32_bits(qman_pq_addr
));
2462 WREG32(mmDMA0_QM_PQ_BASE_HI_0
+ q_off
, upper_32_bits(qman_pq_addr
));
2464 WREG32(mmDMA0_QM_PQ_SIZE_0
+ q_off
, ilog2(HL_QUEUE_LENGTH
));
2465 WREG32(mmDMA0_QM_PQ_PI_0
+ q_off
, 0);
2466 WREG32(mmDMA0_QM_PQ_CI_0
+ q_off
, 0);
2468 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0
+ q_off
, QMAN_LDMA_SIZE_OFFSET
);
2469 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0
+ q_off
,
2470 QMAN_LDMA_SRC_OFFSET
);
2471 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0
+ q_off
,
2472 QMAN_LDMA_DST_OFFSET
);
2474 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0
+ q_off
, mtr_base_en_lo
);
2475 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0
+ q_off
, mtr_base_en_hi
);
2476 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0
+ q_off
, so_base_en_lo
);
2477 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0
+ q_off
, so_base_en_hi
);
2478 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0
+ q_off
, mtr_base_ws_lo
);
2479 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0
+ q_off
, mtr_base_ws_hi
);
2480 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0
+ q_off
, so_base_ws_lo
);
2481 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0
+ q_off
, so_base_ws_hi
);
2483 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0
+ q_off
, 0x100);
2485 /* The following configuration is needed only once per QMAN */
2487 /* Configure RAZWI IRQ */
2488 dma_qm_err_cfg
= PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK
;
2489 if (hdev
->stop_on_err
) {
2491 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK
;
2494 WREG32(mmDMA0_QM_GLBL_ERR_CFG
+ dma_qm_offset
, dma_qm_err_cfg
);
2495 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO
+ dma_qm_offset
,
2496 lower_32_bits(CFG_BASE
+
2497 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
));
2498 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI
+ dma_qm_offset
,
2499 upper_32_bits(CFG_BASE
+
2500 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
));
2501 WREG32(mmDMA0_QM_GLBL_ERR_WDATA
+ dma_qm_offset
,
2502 gaudi_irq_map_table
[GAUDI_EVENT_DMA0_QM
].cpu_id
+
2505 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN
+ dma_qm_offset
,
2506 QM_ARB_ERR_MSG_EN_MASK
);
2508 /* Increase ARB WDT to support streams architecture */
2509 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT
+ dma_qm_offset
,
2510 GAUDI_ARB_WDT_TIMEOUT
);
2512 WREG32(mmDMA0_QM_GLBL_PROT
+ dma_qm_offset
,
2513 QMAN_EXTERNAL_MAKE_TRUSTED
);
2515 WREG32(mmDMA0_QM_GLBL_CFG1
+ dma_qm_offset
, 0);
2519 static void gaudi_init_dma_core(struct hl_device
*hdev
, int dma_id
)
2521 u32 dma_offset
= dma_id
* DMA_CORE_OFFSET
;
2522 u32 dma_err_cfg
= 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT
;
2524 /* Set to maximum possible according to physical size */
2525 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND
+ dma_offset
, 0);
2526 WREG32(mmDMA0_CORE_RD_MAX_SIZE
+ dma_offset
, 0);
2528 /* WA for H/W bug H3-2116 */
2529 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND
+ dma_offset
, 15);
2531 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2532 if (hdev
->stop_on_err
)
2533 dma_err_cfg
|= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT
;
2535 WREG32(mmDMA0_CORE_ERR_CFG
+ dma_offset
, dma_err_cfg
);
2536 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO
+ dma_offset
,
2537 lower_32_bits(CFG_BASE
+ mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
));
2538 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI
+ dma_offset
,
2539 upper_32_bits(CFG_BASE
+ mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
));
2540 WREG32(mmDMA0_CORE_ERRMSG_WDATA
+ dma_offset
,
2541 gaudi_irq_map_table
[GAUDI_EVENT_DMA0_CORE
].cpu_id
+ dma_id
);
2542 WREG32(mmDMA0_CORE_PROT
+ dma_offset
,
2543 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT
);
2544 /* If the channel is secured, it should be in MMU bypass mode */
2545 WREG32(mmDMA0_CORE_SECURE_PROPS
+ dma_offset
,
2546 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT
);
2547 WREG32(mmDMA0_CORE_CFG_0
+ dma_offset
, 1 << DMA0_CORE_CFG_0_EN_SHIFT
);
2550 static void gaudi_enable_qman(struct hl_device
*hdev
, int dma_id
,
2553 u32 dma_qm_offset
= dma_id
* DMA_QMAN_OFFSET
;
2555 WREG32(mmDMA0_QM_GLBL_CFG0
+ dma_qm_offset
, enable_mask
);
2558 static void gaudi_init_pci_dma_qmans(struct hl_device
*hdev
)
2560 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
2561 struct hl_hw_queue
*q
;
2562 int i
, j
, dma_id
, cpu_skip
, nic_skip
, cq_id
= 0, q_idx
, msi_vec
= 0;
2564 if (gaudi
->hw_cap_initialized
& HW_CAP_PCI_DMA
)
2567 for (i
= 0 ; i
< PCI_DMA_NUMBER_OF_CHNLS
; i
++) {
2568 dma_id
= gaudi_dma_assignment
[i
];
2570 * For queues after the CPU Q need to add 1 to get the correct
2571 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2572 * order to get the correct MSI register.
2576 nic_skip
= NIC_NUMBER_OF_ENGINES
;
2582 for (j
= 0 ; j
< QMAN_STREAMS
; j
++) {
2583 q_idx
= 4 * dma_id
+ j
+ cpu_skip
;
2584 q
= &hdev
->kernel_queues
[q_idx
];
2586 q
->msi_vec
= nic_skip
+ cpu_skip
+ msi_vec
++;
2587 gaudi_init_pci_dma_qman(hdev
, dma_id
, j
,
2591 gaudi_init_dma_core(hdev
, dma_id
);
2593 gaudi_enable_qman(hdev
, dma_id
, PCI_DMA_QMAN_ENABLE
);
2596 gaudi
->hw_cap_initialized
|= HW_CAP_PCI_DMA
;
2599 static void gaudi_init_hbm_dma_qman(struct hl_device
*hdev
, int dma_id
,
2600 int qman_id
, u64 qman_base_addr
)
2602 u32 mtr_base_en_lo
, mtr_base_en_hi
, mtr_base_ws_lo
, mtr_base_ws_hi
;
2603 u32 so_base_en_lo
, so_base_en_hi
, so_base_ws_lo
, so_base_ws_hi
;
2604 u32 q_off
, dma_qm_offset
;
2607 dma_qm_offset
= dma_id
* DMA_QMAN_OFFSET
;
2609 mtr_base_en_lo
= lower_32_bits(CFG_BASE
+
2610 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
2611 mtr_base_en_hi
= upper_32_bits(CFG_BASE
+
2612 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
2613 so_base_en_lo
= lower_32_bits(CFG_BASE
+
2614 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
);
2615 so_base_en_hi
= upper_32_bits(CFG_BASE
+
2616 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
);
2617 mtr_base_ws_lo
= lower_32_bits(CFG_BASE
+
2618 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
2619 mtr_base_ws_hi
= upper_32_bits(CFG_BASE
+
2620 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
2621 so_base_ws_lo
= lower_32_bits(CFG_BASE
+
2622 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0
);
2623 so_base_ws_hi
= upper_32_bits(CFG_BASE
+
2624 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0
);
2626 q_off
= dma_qm_offset
+ qman_id
* 4;
2629 WREG32(mmDMA0_QM_PQ_BASE_LO_0
+ q_off
,
2630 lower_32_bits(qman_base_addr
));
2631 WREG32(mmDMA0_QM_PQ_BASE_HI_0
+ q_off
,
2632 upper_32_bits(qman_base_addr
));
2634 WREG32(mmDMA0_QM_PQ_SIZE_0
+ q_off
, ilog2(HBM_DMA_QMAN_LENGTH
));
2635 WREG32(mmDMA0_QM_PQ_PI_0
+ q_off
, 0);
2636 WREG32(mmDMA0_QM_PQ_CI_0
+ q_off
, 0);
2638 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0
+ q_off
,
2639 QMAN_CPDMA_SIZE_OFFSET
);
2640 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0
+ q_off
,
2641 QMAN_CPDMA_SRC_OFFSET
);
2642 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0
+ q_off
,
2643 QMAN_CPDMA_DST_OFFSET
);
2645 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0
+ q_off
,
2646 QMAN_LDMA_SIZE_OFFSET
);
2647 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0
+ q_off
,
2648 QMAN_LDMA_SRC_OFFSET
);
2649 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0
+ q_off
,
2650 QMAN_LDMA_DST_OFFSET
);
2652 /* Configure RAZWI IRQ */
2653 dma_qm_err_cfg
= HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK
;
2654 if (hdev
->stop_on_err
) {
2656 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK
;
2658 WREG32(mmDMA0_QM_GLBL_ERR_CFG
+ dma_qm_offset
, dma_qm_err_cfg
);
2660 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO
+ dma_qm_offset
,
2661 lower_32_bits(CFG_BASE
+
2662 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
));
2663 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI
+ dma_qm_offset
,
2664 upper_32_bits(CFG_BASE
+
2665 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
));
2666 WREG32(mmDMA0_QM_GLBL_ERR_WDATA
+ dma_qm_offset
,
2667 gaudi_irq_map_table
[GAUDI_EVENT_DMA0_QM
].cpu_id
+
2670 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN
+ dma_qm_offset
,
2671 QM_ARB_ERR_MSG_EN_MASK
);
2673 /* Increase ARB WDT to support streams architecture */
2674 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT
+ dma_qm_offset
,
2675 GAUDI_ARB_WDT_TIMEOUT
);
2677 WREG32(mmDMA0_QM_GLBL_CFG1
+ dma_qm_offset
, 0);
2678 WREG32(mmDMA0_QM_GLBL_PROT
+ dma_qm_offset
,
2679 QMAN_INTERNAL_MAKE_TRUSTED
);
2682 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0
+ q_off
, mtr_base_en_lo
);
2683 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0
+ q_off
, mtr_base_en_hi
);
2684 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0
+ q_off
, so_base_en_lo
);
2685 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0
+ q_off
, so_base_en_hi
);
2687 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2688 if (gaudi_dma_assignment
[dma_id
] == GAUDI_ENGINE_ID_DMA_5
) {
2689 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0
+ q_off
,
2691 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0
+ q_off
,
2693 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0
+ q_off
,
2695 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0
+ q_off
,
2700 static void gaudi_init_hbm_dma_qmans(struct hl_device
*hdev
)
2702 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
2703 struct gaudi_internal_qman_info
*q
;
2705 int i
, j
, dma_id
, internal_q_index
;
2707 if (gaudi
->hw_cap_initialized
& HW_CAP_HBM_DMA
)
2710 for (i
= 0 ; i
< HBM_DMA_NUMBER_OF_CHNLS
; i
++) {
2711 dma_id
= gaudi_dma_assignment
[GAUDI_HBM_DMA_1
+ i
];
2713 for (j
= 0 ; j
< QMAN_STREAMS
; j
++) {
2715 * Add the CPU queue in order to get the correct queue
2716 * number as all internal queue are placed after it
2718 internal_q_index
= dma_id
* QMAN_STREAMS
+ j
+ 1;
2720 q
= &gaudi
->internal_qmans
[internal_q_index
];
2721 qman_base_addr
= (u64
) q
->pq_dma_addr
;
2722 gaudi_init_hbm_dma_qman(hdev
, dma_id
, j
,
2726 /* Initializing lower CP for HBM DMA QMAN */
2727 gaudi_init_hbm_dma_qman(hdev
, dma_id
, 4, 0);
2729 gaudi_init_dma_core(hdev
, dma_id
);
2731 gaudi_enable_qman(hdev
, dma_id
, HBM_DMA_QMAN_ENABLE
);
2734 gaudi
->hw_cap_initialized
|= HW_CAP_HBM_DMA
;
2737 static void gaudi_init_mme_qman(struct hl_device
*hdev
, u32 mme_offset
,
2738 int qman_id
, u64 qman_base_addr
)
2740 u32 mtr_base_lo
, mtr_base_hi
;
2741 u32 so_base_lo
, so_base_hi
;
2745 mtr_base_lo
= lower_32_bits(CFG_BASE
+
2746 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
2747 mtr_base_hi
= upper_32_bits(CFG_BASE
+
2748 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
2749 so_base_lo
= lower_32_bits(CFG_BASE
+
2750 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
);
2751 so_base_hi
= upper_32_bits(CFG_BASE
+
2752 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
);
2754 q_off
= mme_offset
+ qman_id
* 4;
2757 WREG32(mmMME0_QM_PQ_BASE_LO_0
+ q_off
,
2758 lower_32_bits(qman_base_addr
));
2759 WREG32(mmMME0_QM_PQ_BASE_HI_0
+ q_off
,
2760 upper_32_bits(qman_base_addr
));
2762 WREG32(mmMME0_QM_PQ_SIZE_0
+ q_off
, ilog2(MME_QMAN_LENGTH
));
2763 WREG32(mmMME0_QM_PQ_PI_0
+ q_off
, 0);
2764 WREG32(mmMME0_QM_PQ_CI_0
+ q_off
, 0);
2766 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0
+ q_off
,
2767 QMAN_CPDMA_SIZE_OFFSET
);
2768 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0
+ q_off
,
2769 QMAN_CPDMA_SRC_OFFSET
);
2770 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0
+ q_off
,
2771 QMAN_CPDMA_DST_OFFSET
);
2773 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0
+ q_off
,
2774 QMAN_LDMA_SIZE_OFFSET
);
2775 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0
+ q_off
,
2776 QMAN_LDMA_SRC_OFFSET
);
2777 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0
+ q_off
,
2778 QMAN_LDMA_DST_OFFSET
);
2780 /* Configure RAZWI IRQ */
2781 mme_id
= mme_offset
/
2782 (mmMME1_QM_GLBL_CFG0
- mmMME0_QM_GLBL_CFG0
);
2784 mme_qm_err_cfg
= MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK
;
2785 if (hdev
->stop_on_err
) {
2787 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK
;
2789 WREG32(mmMME0_QM_GLBL_ERR_CFG
+ mme_offset
, mme_qm_err_cfg
);
2790 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO
+ mme_offset
,
2791 lower_32_bits(CFG_BASE
+
2792 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
));
2793 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI
+ mme_offset
,
2794 upper_32_bits(CFG_BASE
+
2795 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
));
2796 WREG32(mmMME0_QM_GLBL_ERR_WDATA
+ mme_offset
,
2797 gaudi_irq_map_table
[GAUDI_EVENT_MME0_QM
].cpu_id
+
2800 WREG32(mmMME0_QM_ARB_ERR_MSG_EN
+ mme_offset
,
2801 QM_ARB_ERR_MSG_EN_MASK
);
2803 /* Increase ARB WDT to support streams architecture */
2804 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT
+ mme_offset
,
2805 GAUDI_ARB_WDT_TIMEOUT
);
2807 WREG32(mmMME0_QM_GLBL_CFG1
+ mme_offset
, 0);
2808 WREG32(mmMME0_QM_GLBL_PROT
+ mme_offset
,
2809 QMAN_INTERNAL_MAKE_TRUSTED
);
2812 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0
+ q_off
, mtr_base_lo
);
2813 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0
+ q_off
, mtr_base_hi
);
2814 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0
+ q_off
, so_base_lo
);
2815 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0
+ q_off
, so_base_hi
);
2818 static void gaudi_init_mme_qmans(struct hl_device
*hdev
)
2820 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
2821 struct gaudi_internal_qman_info
*q
;
2824 int i
, internal_q_index
;
2826 if (gaudi
->hw_cap_initialized
& HW_CAP_MME
)
2830 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2831 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2834 mme_offset
= mmMME2_QM_GLBL_CFG0
- mmMME0_QM_GLBL_CFG0
;
2836 for (i
= 0 ; i
< MME_NUMBER_OF_QMANS
; i
++) {
2837 internal_q_index
= GAUDI_QUEUE_ID_MME_0_0
+ i
;
2838 q
= &gaudi
->internal_qmans
[internal_q_index
];
2839 qman_base_addr
= (u64
) q
->pq_dma_addr
;
2840 gaudi_init_mme_qman(hdev
, mme_offset
, (i
& 0x3),
2846 /* Initializing lower CP for MME QMANs */
2847 mme_offset
= mmMME2_QM_GLBL_CFG0
- mmMME0_QM_GLBL_CFG0
;
2848 gaudi_init_mme_qman(hdev
, mme_offset
, 4, 0);
2849 gaudi_init_mme_qman(hdev
, 0, 4, 0);
2851 WREG32(mmMME2_QM_GLBL_CFG0
, QMAN_MME_ENABLE
);
2852 WREG32(mmMME0_QM_GLBL_CFG0
, QMAN_MME_ENABLE
);
2854 gaudi
->hw_cap_initialized
|= HW_CAP_MME
;
2857 static void gaudi_init_tpc_qman(struct hl_device
*hdev
, u32 tpc_offset
,
2858 int qman_id
, u64 qman_base_addr
)
2860 u32 mtr_base_en_lo
, mtr_base_en_hi
, mtr_base_ws_lo
, mtr_base_ws_hi
;
2861 u32 so_base_en_lo
, so_base_en_hi
, so_base_ws_lo
, so_base_ws_hi
;
2865 mtr_base_en_lo
= lower_32_bits(CFG_BASE
+
2866 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
2867 mtr_base_en_hi
= upper_32_bits(CFG_BASE
+
2868 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
2869 so_base_en_lo
= lower_32_bits(CFG_BASE
+
2870 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
);
2871 so_base_en_hi
= upper_32_bits(CFG_BASE
+
2872 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
);
2873 mtr_base_ws_lo
= lower_32_bits(CFG_BASE
+
2874 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
2875 mtr_base_ws_hi
= upper_32_bits(CFG_BASE
+
2876 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
2877 so_base_ws_lo
= lower_32_bits(CFG_BASE
+
2878 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0
);
2879 so_base_ws_hi
= upper_32_bits(CFG_BASE
+
2880 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0
);
2882 q_off
= tpc_offset
+ qman_id
* 4;
2884 tpc_id
= tpc_offset
/
2885 (mmTPC1_QM_GLBL_CFG0
- mmTPC0_QM_GLBL_CFG0
);
2888 WREG32(mmTPC0_QM_PQ_BASE_LO_0
+ q_off
,
2889 lower_32_bits(qman_base_addr
));
2890 WREG32(mmTPC0_QM_PQ_BASE_HI_0
+ q_off
,
2891 upper_32_bits(qman_base_addr
));
2893 WREG32(mmTPC0_QM_PQ_SIZE_0
+ q_off
, ilog2(TPC_QMAN_LENGTH
));
2894 WREG32(mmTPC0_QM_PQ_PI_0
+ q_off
, 0);
2895 WREG32(mmTPC0_QM_PQ_CI_0
+ q_off
, 0);
2897 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0
+ q_off
,
2898 QMAN_CPDMA_SIZE_OFFSET
);
2899 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0
+ q_off
,
2900 QMAN_CPDMA_SRC_OFFSET
);
2901 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0
+ q_off
,
2902 QMAN_CPDMA_DST_OFFSET
);
2904 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0
+ q_off
,
2905 QMAN_LDMA_SIZE_OFFSET
);
2906 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0
+ q_off
,
2907 QMAN_LDMA_SRC_OFFSET
);
2908 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0
+ q_off
,
2909 QMAN_LDMA_DST_OFFSET
);
2911 /* Configure RAZWI IRQ */
2912 tpc_qm_err_cfg
= TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK
;
2913 if (hdev
->stop_on_err
) {
2915 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK
;
2918 WREG32(mmTPC0_QM_GLBL_ERR_CFG
+ tpc_offset
, tpc_qm_err_cfg
);
2919 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO
+ tpc_offset
,
2920 lower_32_bits(CFG_BASE
+
2921 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
));
2922 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI
+ tpc_offset
,
2923 upper_32_bits(CFG_BASE
+
2924 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
));
2925 WREG32(mmTPC0_QM_GLBL_ERR_WDATA
+ tpc_offset
,
2926 gaudi_irq_map_table
[GAUDI_EVENT_TPC0_QM
].cpu_id
+
2929 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN
+ tpc_offset
,
2930 QM_ARB_ERR_MSG_EN_MASK
);
2932 /* Increase ARB WDT to support streams architecture */
2933 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT
+ tpc_offset
,
2934 GAUDI_ARB_WDT_TIMEOUT
);
2936 WREG32(mmTPC0_QM_GLBL_CFG1
+ tpc_offset
, 0);
2937 WREG32(mmTPC0_QM_GLBL_PROT
+ tpc_offset
,
2938 QMAN_INTERNAL_MAKE_TRUSTED
);
2941 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0
+ q_off
, mtr_base_en_lo
);
2942 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0
+ q_off
, mtr_base_en_hi
);
2943 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0
+ q_off
, so_base_en_lo
);
2944 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0
+ q_off
, so_base_en_hi
);
2946 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
2948 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0
+ q_off
,
2950 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0
+ q_off
,
2952 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0
+ q_off
,
2954 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0
+ q_off
,
2959 static void gaudi_init_tpc_qmans(struct hl_device
*hdev
)
2961 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
2962 struct gaudi_internal_qman_info
*q
;
2964 u32 so_base_hi
, tpc_offset
= 0;
2965 u32 tpc_delta
= mmTPC1_CFG_SM_BASE_ADDRESS_HIGH
-
2966 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH
;
2967 int i
, tpc_id
, internal_q_index
;
2969 if (gaudi
->hw_cap_initialized
& HW_CAP_TPC_MASK
)
2972 so_base_hi
= upper_32_bits(CFG_BASE
+
2973 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
);
2975 for (tpc_id
= 0 ; tpc_id
< TPC_NUMBER_OF_ENGINES
; tpc_id
++) {
2976 for (i
= 0 ; i
< QMAN_STREAMS
; i
++) {
2977 internal_q_index
= GAUDI_QUEUE_ID_TPC_0_0
+
2978 tpc_id
* QMAN_STREAMS
+ i
;
2979 q
= &gaudi
->internal_qmans
[internal_q_index
];
2980 qman_base_addr
= (u64
) q
->pq_dma_addr
;
2981 gaudi_init_tpc_qman(hdev
, tpc_offset
, i
,
2985 /* Initializing lower CP for TPC QMAN */
2986 gaudi_init_tpc_qman(hdev
, tpc_offset
, 4, 0);
2988 /* Enable the QMAN and TPC channel */
2989 WREG32(mmTPC0_QM_GLBL_CFG0
+ tpc_offset
,
2994 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH
+ tpc_id
* tpc_delta
,
2997 tpc_offset
+= mmTPC1_QM_GLBL_CFG0
- mmTPC0_QM_GLBL_CFG0
;
2999 gaudi
->hw_cap_initialized
|=
3000 FIELD_PREP(HW_CAP_TPC_MASK
, 1 << tpc_id
);
3004 static void gaudi_init_nic_qman(struct hl_device
*hdev
, u32 nic_offset
,
3005 int qman_id
, u64 qman_base_addr
, int nic_id
)
3007 u32 mtr_base_en_lo
, mtr_base_en_hi
, mtr_base_ws_lo
, mtr_base_ws_hi
;
3008 u32 so_base_en_lo
, so_base_en_hi
, so_base_ws_lo
, so_base_ws_hi
;
3012 mtr_base_en_lo
= lower_32_bits(CFG_BASE
+
3013 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
3014 mtr_base_en_hi
= upper_32_bits(CFG_BASE
+
3015 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
3016 so_base_en_lo
= lower_32_bits(CFG_BASE
+
3017 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
);
3018 so_base_en_hi
= upper_32_bits(CFG_BASE
+
3019 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
);
3020 mtr_base_ws_lo
= lower_32_bits(CFG_BASE
+
3021 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
3022 mtr_base_ws_hi
= upper_32_bits(CFG_BASE
+
3023 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
);
3024 so_base_ws_lo
= lower_32_bits(CFG_BASE
+
3025 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0
);
3026 so_base_ws_hi
= upper_32_bits(CFG_BASE
+
3027 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0
);
3029 q_off
= nic_offset
+ qman_id
* 4;
3031 WREG32(mmNIC0_QM0_PQ_BASE_LO_0
+ q_off
, lower_32_bits(qman_base_addr
));
3032 WREG32(mmNIC0_QM0_PQ_BASE_HI_0
+ q_off
, upper_32_bits(qman_base_addr
));
3034 WREG32(mmNIC0_QM0_PQ_SIZE_0
+ q_off
, ilog2(NIC_QMAN_LENGTH
));
3035 WREG32(mmNIC0_QM0_PQ_PI_0
+ q_off
, 0);
3036 WREG32(mmNIC0_QM0_PQ_CI_0
+ q_off
, 0);
3038 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0
+ q_off
,
3039 QMAN_LDMA_SIZE_OFFSET
);
3040 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0
+ q_off
,
3041 QMAN_LDMA_SRC_OFFSET
);
3042 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0
+ q_off
,
3043 QMAN_LDMA_DST_OFFSET
);
3045 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0
+ q_off
, mtr_base_en_lo
);
3046 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0
+ q_off
, mtr_base_en_hi
);
3047 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0
+ q_off
, so_base_en_lo
);
3048 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0
+ q_off
, so_base_en_hi
);
3050 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3051 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0
+ q_off
, mtr_base_ws_lo
);
3052 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0
+ q_off
, mtr_base_ws_hi
);
3053 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0
+ q_off
, so_base_ws_lo
);
3054 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0
+ q_off
, so_base_ws_hi
);
3057 /* Configure RAZWI IRQ */
3058 nic_qm_err_cfg
= NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK
;
3059 if (hdev
->stop_on_err
) {
3061 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK
;
3064 WREG32(mmNIC0_QM0_GLBL_ERR_CFG
+ nic_offset
, nic_qm_err_cfg
);
3065 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO
+ nic_offset
,
3066 lower_32_bits(CFG_BASE
+
3067 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
));
3068 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI
+ nic_offset
,
3069 upper_32_bits(CFG_BASE
+
3070 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
));
3071 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA
+ nic_offset
,
3072 gaudi_irq_map_table
[GAUDI_EVENT_NIC0_QM0
].cpu_id
+
3075 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN
+ nic_offset
,
3076 QM_ARB_ERR_MSG_EN_MASK
);
3078 /* Increase ARB WDT to support streams architecture */
3079 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT
+ nic_offset
,
3080 GAUDI_ARB_WDT_TIMEOUT
);
3082 WREG32(mmNIC0_QM0_GLBL_CFG1
+ nic_offset
, 0);
3083 WREG32(mmNIC0_QM0_GLBL_PROT
+ nic_offset
,
3084 QMAN_INTERNAL_MAKE_TRUSTED
);
3088 static void gaudi_init_nic_qmans(struct hl_device
*hdev
)
3090 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3091 struct gaudi_internal_qman_info
*q
;
3094 u32 nic_delta_between_qmans
=
3095 mmNIC0_QM1_GLBL_CFG0
- mmNIC0_QM0_GLBL_CFG0
;
3096 u32 nic_delta_between_nics
=
3097 mmNIC1_QM0_GLBL_CFG0
- mmNIC0_QM0_GLBL_CFG0
;
3098 int i
, nic_id
, internal_q_index
;
3100 if (!hdev
->nic_ports_mask
)
3103 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC_MASK
)
3106 dev_dbg(hdev
->dev
, "Initializing NIC QMANs\n");
3108 for (nic_id
= 0 ; nic_id
< NIC_NUMBER_OF_ENGINES
; nic_id
++) {
3109 if (!(hdev
->nic_ports_mask
& (1 << nic_id
))) {
3110 nic_offset
+= nic_delta_between_qmans
;
3112 nic_offset
-= (nic_delta_between_qmans
* 2);
3113 nic_offset
+= nic_delta_between_nics
;
3118 for (i
= 0 ; i
< QMAN_STREAMS
; i
++) {
3119 internal_q_index
= GAUDI_QUEUE_ID_NIC_0_0
+
3120 nic_id
* QMAN_STREAMS
+ i
;
3121 q
= &gaudi
->internal_qmans
[internal_q_index
];
3122 qman_base_addr
= (u64
) q
->pq_dma_addr
;
3123 gaudi_init_nic_qman(hdev
, nic_offset
, (i
& 0x3),
3124 qman_base_addr
, nic_id
);
3127 /* Enable the QMAN */
3128 WREG32(mmNIC0_QM0_GLBL_CFG0
+ nic_offset
, NIC_QMAN_ENABLE
);
3130 nic_offset
+= nic_delta_between_qmans
;
3132 nic_offset
-= (nic_delta_between_qmans
* 2);
3133 nic_offset
+= nic_delta_between_nics
;
3136 gaudi
->hw_cap_initialized
|= 1 << (HW_CAP_NIC_SHIFT
+ nic_id
);
3140 static void gaudi_disable_pci_dma_qmans(struct hl_device
*hdev
)
3142 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3144 if (!(gaudi
->hw_cap_initialized
& HW_CAP_PCI_DMA
))
3147 WREG32(mmDMA0_QM_GLBL_CFG0
, 0);
3148 WREG32(mmDMA1_QM_GLBL_CFG0
, 0);
3149 WREG32(mmDMA5_QM_GLBL_CFG0
, 0);
3152 static void gaudi_disable_hbm_dma_qmans(struct hl_device
*hdev
)
3154 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3156 if (!(gaudi
->hw_cap_initialized
& HW_CAP_HBM_DMA
))
3159 WREG32(mmDMA2_QM_GLBL_CFG0
, 0);
3160 WREG32(mmDMA3_QM_GLBL_CFG0
, 0);
3161 WREG32(mmDMA4_QM_GLBL_CFG0
, 0);
3162 WREG32(mmDMA6_QM_GLBL_CFG0
, 0);
3163 WREG32(mmDMA7_QM_GLBL_CFG0
, 0);
3166 static void gaudi_disable_mme_qmans(struct hl_device
*hdev
)
3168 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3170 if (!(gaudi
->hw_cap_initialized
& HW_CAP_MME
))
3173 WREG32(mmMME2_QM_GLBL_CFG0
, 0);
3174 WREG32(mmMME0_QM_GLBL_CFG0
, 0);
3177 static void gaudi_disable_tpc_qmans(struct hl_device
*hdev
)
3179 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3183 if (!(gaudi
->hw_cap_initialized
& HW_CAP_TPC_MASK
))
3186 for (tpc_id
= 0 ; tpc_id
< TPC_NUMBER_OF_ENGINES
; tpc_id
++) {
3187 WREG32(mmTPC0_QM_GLBL_CFG0
+ tpc_offset
, 0);
3188 tpc_offset
+= mmTPC1_QM_GLBL_CFG0
- mmTPC0_QM_GLBL_CFG0
;
3192 static void gaudi_disable_nic_qmans(struct hl_device
*hdev
)
3194 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3195 u32 nic_mask
, nic_offset
= 0;
3196 u32 nic_delta_between_qmans
=
3197 mmNIC0_QM1_GLBL_CFG0
- mmNIC0_QM0_GLBL_CFG0
;
3198 u32 nic_delta_between_nics
=
3199 mmNIC1_QM0_GLBL_CFG0
- mmNIC0_QM0_GLBL_CFG0
;
3202 for (nic_id
= 0 ; nic_id
< NIC_NUMBER_OF_ENGINES
; nic_id
++) {
3203 nic_mask
= 1 << (HW_CAP_NIC_SHIFT
+ nic_id
);
3205 if (gaudi
->hw_cap_initialized
& nic_mask
)
3206 WREG32(mmNIC0_QM0_GLBL_CFG0
+ nic_offset
, 0);
3208 nic_offset
+= nic_delta_between_qmans
;
3210 nic_offset
-= (nic_delta_between_qmans
* 2);
3211 nic_offset
+= nic_delta_between_nics
;
3216 static void gaudi_stop_pci_dma_qmans(struct hl_device
*hdev
)
3218 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3220 if (!(gaudi
->hw_cap_initialized
& HW_CAP_PCI_DMA
))
3223 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3224 WREG32(mmDMA0_QM_GLBL_CFG1
, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3225 WREG32(mmDMA1_QM_GLBL_CFG1
, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3226 WREG32(mmDMA5_QM_GLBL_CFG1
, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3229 static void gaudi_stop_hbm_dma_qmans(struct hl_device
*hdev
)
3231 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3233 if (!(gaudi
->hw_cap_initialized
& HW_CAP_HBM_DMA
))
3236 /* Stop CPs of HBM DMA QMANs */
3238 WREG32(mmDMA2_QM_GLBL_CFG1
, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3239 WREG32(mmDMA3_QM_GLBL_CFG1
, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3240 WREG32(mmDMA4_QM_GLBL_CFG1
, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3241 WREG32(mmDMA6_QM_GLBL_CFG1
, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3242 WREG32(mmDMA7_QM_GLBL_CFG1
, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3245 static void gaudi_stop_mme_qmans(struct hl_device
*hdev
)
3247 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3249 if (!(gaudi
->hw_cap_initialized
& HW_CAP_MME
))
3252 /* Stop CPs of MME QMANs */
3253 WREG32(mmMME2_QM_GLBL_CFG1
, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3254 WREG32(mmMME0_QM_GLBL_CFG1
, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3257 static void gaudi_stop_tpc_qmans(struct hl_device
*hdev
)
3259 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3261 if (!(gaudi
->hw_cap_initialized
& HW_CAP_TPC_MASK
))
3264 WREG32(mmTPC0_QM_GLBL_CFG1
, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3265 WREG32(mmTPC1_QM_GLBL_CFG1
, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3266 WREG32(mmTPC2_QM_GLBL_CFG1
, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3267 WREG32(mmTPC3_QM_GLBL_CFG1
, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3268 WREG32(mmTPC4_QM_GLBL_CFG1
, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3269 WREG32(mmTPC5_QM_GLBL_CFG1
, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3270 WREG32(mmTPC6_QM_GLBL_CFG1
, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3271 WREG32(mmTPC7_QM_GLBL_CFG1
, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT
);
3274 static void gaudi_stop_nic_qmans(struct hl_device
*hdev
)
3276 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3278 /* Stop upper CPs of QMANs */
3280 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC0
)
3281 WREG32(mmNIC0_QM0_GLBL_CFG1
,
3282 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK
|
3283 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK
|
3284 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK
);
3286 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC1
)
3287 WREG32(mmNIC0_QM1_GLBL_CFG1
,
3288 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK
|
3289 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK
|
3290 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK
);
3292 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC2
)
3293 WREG32(mmNIC1_QM0_GLBL_CFG1
,
3294 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK
|
3295 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK
|
3296 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK
);
3298 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC3
)
3299 WREG32(mmNIC1_QM1_GLBL_CFG1
,
3300 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK
|
3301 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK
|
3302 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK
);
3304 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC4
)
3305 WREG32(mmNIC2_QM0_GLBL_CFG1
,
3306 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK
|
3307 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK
|
3308 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK
);
3310 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC5
)
3311 WREG32(mmNIC2_QM1_GLBL_CFG1
,
3312 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK
|
3313 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK
|
3314 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK
);
3316 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC6
)
3317 WREG32(mmNIC3_QM0_GLBL_CFG1
,
3318 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK
|
3319 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK
|
3320 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK
);
3322 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC7
)
3323 WREG32(mmNIC3_QM1_GLBL_CFG1
,
3324 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK
|
3325 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK
|
3326 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK
);
3328 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC8
)
3329 WREG32(mmNIC4_QM0_GLBL_CFG1
,
3330 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK
|
3331 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK
|
3332 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK
);
3334 if (gaudi
->hw_cap_initialized
& HW_CAP_NIC9
)
3335 WREG32(mmNIC4_QM1_GLBL_CFG1
,
3336 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK
|
3337 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK
|
3338 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK
);
3341 static void gaudi_pci_dma_stall(struct hl_device
*hdev
)
3343 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3345 if (!(gaudi
->hw_cap_initialized
& HW_CAP_PCI_DMA
))
3348 WREG32(mmDMA0_CORE_CFG_1
, 1 << DMA0_CORE_CFG_1_HALT_SHIFT
);
3349 WREG32(mmDMA1_CORE_CFG_1
, 1 << DMA0_CORE_CFG_1_HALT_SHIFT
);
3350 WREG32(mmDMA5_CORE_CFG_1
, 1 << DMA0_CORE_CFG_1_HALT_SHIFT
);
3353 static void gaudi_hbm_dma_stall(struct hl_device
*hdev
)
3355 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3357 if (!(gaudi
->hw_cap_initialized
& HW_CAP_HBM_DMA
))
3360 WREG32(mmDMA2_CORE_CFG_1
, 1 << DMA0_CORE_CFG_1_HALT_SHIFT
);
3361 WREG32(mmDMA3_CORE_CFG_1
, 1 << DMA0_CORE_CFG_1_HALT_SHIFT
);
3362 WREG32(mmDMA4_CORE_CFG_1
, 1 << DMA0_CORE_CFG_1_HALT_SHIFT
);
3363 WREG32(mmDMA6_CORE_CFG_1
, 1 << DMA0_CORE_CFG_1_HALT_SHIFT
);
3364 WREG32(mmDMA7_CORE_CFG_1
, 1 << DMA0_CORE_CFG_1_HALT_SHIFT
);
3367 static void gaudi_mme_stall(struct hl_device
*hdev
)
3369 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3371 if (!(gaudi
->hw_cap_initialized
& HW_CAP_MME
))
3374 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3375 WREG32(mmMME0_ACC_ACC_STALL
, 1 << MME_ACC_ACC_STALL_R_SHIFT
);
3376 WREG32(mmMME0_ACC_ACC_STALL
, 1 << MME_ACC_ACC_STALL_R_SHIFT
);
3377 WREG32(mmMME0_SBAB_SB_STALL
, 1 << MME_SBAB_SB_STALL_R_SHIFT
);
3378 WREG32(mmMME0_SBAB_SB_STALL
, 1 << MME_SBAB_SB_STALL_R_SHIFT
);
3379 WREG32(mmMME1_ACC_ACC_STALL
, 1 << MME_ACC_ACC_STALL_R_SHIFT
);
3380 WREG32(mmMME1_ACC_ACC_STALL
, 1 << MME_ACC_ACC_STALL_R_SHIFT
);
3381 WREG32(mmMME1_SBAB_SB_STALL
, 1 << MME_SBAB_SB_STALL_R_SHIFT
);
3382 WREG32(mmMME1_SBAB_SB_STALL
, 1 << MME_SBAB_SB_STALL_R_SHIFT
);
3383 WREG32(mmMME2_ACC_ACC_STALL
, 1 << MME_ACC_ACC_STALL_R_SHIFT
);
3384 WREG32(mmMME2_ACC_ACC_STALL
, 1 << MME_ACC_ACC_STALL_R_SHIFT
);
3385 WREG32(mmMME2_SBAB_SB_STALL
, 1 << MME_SBAB_SB_STALL_R_SHIFT
);
3386 WREG32(mmMME2_SBAB_SB_STALL
, 1 << MME_SBAB_SB_STALL_R_SHIFT
);
3387 WREG32(mmMME3_ACC_ACC_STALL
, 1 << MME_ACC_ACC_STALL_R_SHIFT
);
3388 WREG32(mmMME3_ACC_ACC_STALL
, 1 << MME_ACC_ACC_STALL_R_SHIFT
);
3389 WREG32(mmMME3_SBAB_SB_STALL
, 1 << MME_SBAB_SB_STALL_R_SHIFT
);
3390 WREG32(mmMME3_SBAB_SB_STALL
, 1 << MME_SBAB_SB_STALL_R_SHIFT
);
3393 static void gaudi_tpc_stall(struct hl_device
*hdev
)
3395 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3397 if (!(gaudi
->hw_cap_initialized
& HW_CAP_TPC_MASK
))
3400 WREG32(mmTPC0_CFG_TPC_STALL
, 1 << TPC0_CFG_TPC_STALL_V_SHIFT
);
3401 WREG32(mmTPC1_CFG_TPC_STALL
, 1 << TPC0_CFG_TPC_STALL_V_SHIFT
);
3402 WREG32(mmTPC2_CFG_TPC_STALL
, 1 << TPC0_CFG_TPC_STALL_V_SHIFT
);
3403 WREG32(mmTPC3_CFG_TPC_STALL
, 1 << TPC0_CFG_TPC_STALL_V_SHIFT
);
3404 WREG32(mmTPC4_CFG_TPC_STALL
, 1 << TPC0_CFG_TPC_STALL_V_SHIFT
);
3405 WREG32(mmTPC5_CFG_TPC_STALL
, 1 << TPC0_CFG_TPC_STALL_V_SHIFT
);
3406 WREG32(mmTPC6_CFG_TPC_STALL
, 1 << TPC0_CFG_TPC_STALL_V_SHIFT
);
3407 WREG32(mmTPC7_CFG_TPC_STALL
, 1 << TPC0_CFG_TPC_STALL_V_SHIFT
);
3410 static void gaudi_set_clock_gating(struct hl_device
*hdev
)
3412 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3417 /* In case we are during debug session, don't enable the clock gate
3418 * as it may interfere
3423 if (!hdev
->asic_prop
.fw_security_disabled
)
3426 for (i
= GAUDI_PCI_DMA_1
, qman_offset
= 0 ; i
< GAUDI_HBM_DMA_1
; i
++) {
3427 enable
= !!(hdev
->clock_gating_mask
&
3428 (BIT_ULL(gaudi_dma_assignment
[i
])));
3430 qman_offset
= gaudi_dma_assignment
[i
] * DMA_QMAN_OFFSET
;
3431 WREG32(mmDMA0_QM_CGM_CFG1
+ qman_offset
,
3432 enable
? QMAN_CGM1_PWR_GATE_EN
: 0);
3433 WREG32(mmDMA0_QM_CGM_CFG
+ qman_offset
,
3434 enable
? QMAN_UPPER_CP_CGM_PWR_GATE_EN
: 0);
3437 for (i
= GAUDI_HBM_DMA_1
; i
< GAUDI_DMA_MAX
; i
++) {
3438 enable
= !!(hdev
->clock_gating_mask
&
3439 (BIT_ULL(gaudi_dma_assignment
[i
])));
3441 qman_offset
= gaudi_dma_assignment
[i
] * DMA_QMAN_OFFSET
;
3442 WREG32(mmDMA0_QM_CGM_CFG1
+ qman_offset
,
3443 enable
? QMAN_CGM1_PWR_GATE_EN
: 0);
3444 WREG32(mmDMA0_QM_CGM_CFG
+ qman_offset
,
3445 enable
? QMAN_COMMON_CP_CGM_PWR_GATE_EN
: 0);
3448 enable
= !!(hdev
->clock_gating_mask
& (BIT_ULL(GAUDI_ENGINE_ID_MME_0
)));
3449 WREG32(mmMME0_QM_CGM_CFG1
, enable
? QMAN_CGM1_PWR_GATE_EN
: 0);
3450 WREG32(mmMME0_QM_CGM_CFG
, enable
? QMAN_COMMON_CP_CGM_PWR_GATE_EN
: 0);
3452 enable
= !!(hdev
->clock_gating_mask
& (BIT_ULL(GAUDI_ENGINE_ID_MME_2
)));
3453 WREG32(mmMME2_QM_CGM_CFG1
, enable
? QMAN_CGM1_PWR_GATE_EN
: 0);
3454 WREG32(mmMME2_QM_CGM_CFG
, enable
? QMAN_COMMON_CP_CGM_PWR_GATE_EN
: 0);
3456 for (i
= 0, qman_offset
= 0 ; i
< TPC_NUMBER_OF_ENGINES
; i
++) {
3457 enable
= !!(hdev
->clock_gating_mask
&
3458 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0
+ i
)));
3460 WREG32(mmTPC0_QM_CGM_CFG1
+ qman_offset
,
3461 enable
? QMAN_CGM1_PWR_GATE_EN
: 0);
3462 WREG32(mmTPC0_QM_CGM_CFG
+ qman_offset
,
3463 enable
? QMAN_COMMON_CP_CGM_PWR_GATE_EN
: 0);
3465 qman_offset
+= TPC_QMAN_OFFSET
;
3468 gaudi
->hw_cap_initialized
|= HW_CAP_CLK_GATE
;
3471 static void gaudi_disable_clock_gating(struct hl_device
*hdev
)
3473 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3477 if (!hdev
->asic_prop
.fw_security_disabled
)
3480 for (i
= 0, qman_offset
= 0 ; i
< DMA_NUMBER_OF_CHANNELS
; i
++) {
3481 WREG32(mmDMA0_QM_CGM_CFG
+ qman_offset
, 0);
3482 WREG32(mmDMA0_QM_CGM_CFG1
+ qman_offset
, 0);
3484 qman_offset
+= (mmDMA1_QM_CGM_CFG
- mmDMA0_QM_CGM_CFG
);
3487 WREG32(mmMME0_QM_CGM_CFG
, 0);
3488 WREG32(mmMME0_QM_CGM_CFG1
, 0);
3489 WREG32(mmMME2_QM_CGM_CFG
, 0);
3490 WREG32(mmMME2_QM_CGM_CFG1
, 0);
3492 for (i
= 0, qman_offset
= 0 ; i
< TPC_NUMBER_OF_ENGINES
; i
++) {
3493 WREG32(mmTPC0_QM_CGM_CFG
+ qman_offset
, 0);
3494 WREG32(mmTPC0_QM_CGM_CFG1
+ qman_offset
, 0);
3496 qman_offset
+= (mmTPC1_QM_CGM_CFG
- mmTPC0_QM_CGM_CFG
);
3499 gaudi
->hw_cap_initialized
&= ~(HW_CAP_CLK_GATE
);
3502 static void gaudi_enable_timestamp(struct hl_device
*hdev
)
3504 /* Disable the timestamp counter */
3505 WREG32(mmPSOC_TIMESTAMP_BASE
- CFG_BASE
, 0);
3507 /* Zero the lower/upper parts of the 64-bit counter */
3508 WREG32(mmPSOC_TIMESTAMP_BASE
- CFG_BASE
+ 0xC, 0);
3509 WREG32(mmPSOC_TIMESTAMP_BASE
- CFG_BASE
+ 0x8, 0);
3511 /* Enable the counter */
3512 WREG32(mmPSOC_TIMESTAMP_BASE
- CFG_BASE
, 1);
3515 static void gaudi_disable_timestamp(struct hl_device
*hdev
)
3517 /* Disable the timestamp counter */
3518 WREG32(mmPSOC_TIMESTAMP_BASE
- CFG_BASE
, 0);
3521 static void gaudi_halt_engines(struct hl_device
*hdev
, bool hard_reset
)
3523 u32 wait_timeout_ms
;
3526 "Halting compute engines and disabling interrupts\n");
3529 wait_timeout_ms
= GAUDI_PLDM_RESET_WAIT_MSEC
;
3531 wait_timeout_ms
= GAUDI_RESET_WAIT_MSEC
;
3533 gaudi_stop_nic_qmans(hdev
);
3534 gaudi_stop_mme_qmans(hdev
);
3535 gaudi_stop_tpc_qmans(hdev
);
3536 gaudi_stop_hbm_dma_qmans(hdev
);
3537 gaudi_stop_pci_dma_qmans(hdev
);
3539 hdev
->asic_funcs
->disable_clock_gating(hdev
);
3541 msleep(wait_timeout_ms
);
3543 gaudi_pci_dma_stall(hdev
);
3544 gaudi_hbm_dma_stall(hdev
);
3545 gaudi_tpc_stall(hdev
);
3546 gaudi_mme_stall(hdev
);
3548 msleep(wait_timeout_ms
);
3550 gaudi_disable_nic_qmans(hdev
);
3551 gaudi_disable_mme_qmans(hdev
);
3552 gaudi_disable_tpc_qmans(hdev
);
3553 gaudi_disable_hbm_dma_qmans(hdev
);
3554 gaudi_disable_pci_dma_qmans(hdev
);
3556 gaudi_disable_timestamp(hdev
);
3558 gaudi_disable_msi(hdev
);
3561 static int gaudi_mmu_init(struct hl_device
*hdev
)
3563 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
3564 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3568 if (!hdev
->mmu_enable
)
3571 if (gaudi
->hw_cap_initialized
& HW_CAP_MMU
)
3574 for (i
= 0 ; i
< prop
->max_asid
; i
++) {
3575 hop0_addr
= prop
->mmu_pgt_addr
+
3576 (i
* prop
->mmu_hop_table_size
);
3578 rc
= gaudi_mmu_update_asid_hop0_addr(hdev
, i
, hop0_addr
);
3581 "failed to set hop0 addr for asid %d\n", i
);
3586 /* init MMU cache manage page */
3587 WREG32(mmSTLB_CACHE_INV_BASE_39_8
, MMU_CACHE_MNG_ADDR
>> 8);
3588 WREG32(mmSTLB_CACHE_INV_BASE_49_40
, MMU_CACHE_MNG_ADDR
>> 40);
3590 hdev
->asic_funcs
->mmu_invalidate_cache(hdev
, true, 0);
3592 WREG32(mmMMU_UP_MMU_ENABLE
, 1);
3593 WREG32(mmMMU_UP_SPI_MASK
, 0xF);
3595 WREG32(mmSTLB_HOP_CONFIGURATION
,
3596 hdev
->mmu_huge_page_opt
? 0x30440 : 0x40440);
3599 * The H/W expects the first PI after init to be 1. After wraparound
3602 gaudi
->mmu_cache_inv_pi
= 1;
3604 gaudi
->hw_cap_initialized
|= HW_CAP_MMU
;
3612 static int gaudi_load_firmware_to_device(struct hl_device
*hdev
)
3616 /* HBM scrambler must be initialized before pushing F/W to HBM */
3617 gaudi_init_scrambler_hbm(hdev
);
3619 dst
= hdev
->pcie_bar
[HBM_BAR_ID
] + LINUX_FW_OFFSET
;
3621 return hl_fw_load_fw_to_device(hdev
, GAUDI_LINUX_FW_FILE
, dst
, 0, 0);
3624 static int gaudi_load_boot_fit_to_device(struct hl_device
*hdev
)
3628 dst
= hdev
->pcie_bar
[SRAM_BAR_ID
] + BOOT_FIT_SRAM_OFFSET
;
3630 return hl_fw_load_fw_to_device(hdev
, GAUDI_BOOT_FIT_FILE
, dst
, 0, 0);
3633 static int gaudi_read_device_fw_version(struct hl_device
*hdev
,
3634 enum hl_fw_component fwc
)
3642 ver_off
= RREG32(mmUBOOT_VER_OFFSET
);
3643 dest
= hdev
->asic_prop
.uboot_ver
;
3646 case FW_COMP_PREBOOT
:
3647 ver_off
= RREG32(mmPREBOOT_VER_OFFSET
);
3648 dest
= hdev
->asic_prop
.preboot_ver
;
3652 dev_warn(hdev
->dev
, "Undefined FW component: %d\n", fwc
);
3656 ver_off
&= ~((u32
)SRAM_BASE_ADDR
);
3658 if (ver_off
< SRAM_SIZE
- VERSION_MAX_LEN
) {
3659 memcpy_fromio(dest
, hdev
->pcie_bar
[SRAM_BAR_ID
] + ver_off
,
3662 dev_err(hdev
->dev
, "%s version offset (0x%x) is above SRAM\n",
3664 strcpy(dest
, "unavailable");
3671 static int gaudi_init_cpu(struct hl_device
*hdev
)
3673 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3676 if (!hdev
->cpu_enable
)
3679 if (gaudi
->hw_cap_initialized
& HW_CAP_CPU
)
3683 * The device CPU works with 40 bits addresses.
3684 * This register sets the extension to 50 bits.
3686 if (hdev
->asic_prop
.fw_security_disabled
)
3687 WREG32(mmCPU_IF_CPU_MSB_ADDR
, hdev
->cpu_pci_msb_addr
);
3689 rc
= hl_fw_init_cpu(hdev
, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS
,
3690 mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU
,
3691 mmCPU_CMD_STATUS_TO_HOST
,
3692 mmCPU_BOOT_DEV_STS0
, mmCPU_BOOT_ERR0
,
3693 !hdev
->bmc_enable
, GAUDI_CPU_TIMEOUT_USEC
,
3694 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC
);
3699 gaudi
->hw_cap_initialized
|= HW_CAP_CPU
;
3704 static int gaudi_init_cpu_queues(struct hl_device
*hdev
, u32 cpu_timeout
)
3706 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3709 struct hl_hw_queue
*cpu_pq
=
3710 &hdev
->kernel_queues
[GAUDI_QUEUE_ID_CPU_PQ
];
3713 if (!hdev
->cpu_queues_enable
)
3716 if (gaudi
->hw_cap_initialized
& HW_CAP_CPU_Q
)
3719 eq
= &hdev
->event_queue
;
3721 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW
, lower_32_bits(cpu_pq
->bus_address
));
3722 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH
, upper_32_bits(cpu_pq
->bus_address
));
3724 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW
, lower_32_bits(eq
->bus_address
));
3725 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH
, upper_32_bits(eq
->bus_address
));
3727 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW
,
3728 lower_32_bits(hdev
->cpu_accessible_dma_address
));
3729 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH
,
3730 upper_32_bits(hdev
->cpu_accessible_dma_address
));
3732 WREG32(mmCPU_IF_PQ_LENGTH
, HL_QUEUE_SIZE_IN_BYTES
);
3733 WREG32(mmCPU_IF_EQ_LENGTH
, HL_EQ_SIZE_IN_BYTES
);
3734 WREG32(mmCPU_IF_CQ_LENGTH
, HL_CPU_ACCESSIBLE_MEM_SIZE
);
3736 /* Used for EQ CI */
3737 WREG32(mmCPU_IF_EQ_RD_OFFS
, 0);
3739 WREG32(mmCPU_IF_PF_PQ_PI
, 0);
3741 if (gaudi
->multi_msi_mode
)
3742 WREG32(mmCPU_IF_QUEUE_INIT
, PQ_INIT_STATUS_READY_FOR_CP
);
3744 WREG32(mmCPU_IF_QUEUE_INIT
,
3745 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI
);
3747 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
, GAUDI_EVENT_PI_UPDATE
);
3749 err
= hl_poll_timeout(
3751 mmCPU_IF_QUEUE_INIT
,
3753 (status
== PQ_INIT_STATUS_READY_FOR_HOST
),
3759 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
3763 gaudi
->hw_cap_initialized
|= HW_CAP_CPU_Q
;
3767 static void gaudi_pre_hw_init(struct hl_device
*hdev
)
3769 /* Perform read from the device to make sure device is up */
3772 if (hdev
->asic_prop
.fw_security_disabled
) {
3773 /* Set the access through PCI bars (Linux driver only) as
3776 WREG32(mmPCIE_WRAP_LBW_PROT_OVR
,
3777 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK
|
3778 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK
));
3780 /* Perform read to flush the waiting writes to ensure
3781 * configuration was set in the device
3783 RREG32(mmPCIE_WRAP_LBW_PROT_OVR
);
3787 * Let's mark in the H/W that we have reached this point. We check
3788 * this value in the reset_before_init function to understand whether
3789 * we need to reset the chip before doing H/W init. This register is
3790 * cleared by the H/W upon H/W reset
3792 WREG32(mmHW_STATE
, HL_DEVICE_HW_STATE_DIRTY
);
3795 static int gaudi_hw_init(struct hl_device
*hdev
)
3799 gaudi_pre_hw_init(hdev
);
3801 gaudi_init_pci_dma_qmans(hdev
);
3803 gaudi_init_hbm_dma_qmans(hdev
);
3805 rc
= gaudi_init_cpu(hdev
);
3807 dev_err(hdev
->dev
, "failed to initialize CPU\n");
3811 /* In case the clock gating was enabled in preboot we need to disable
3812 * it here before touching the MME/TPC registers.
3813 * There is no need to take clk gating mutex because when this function
3814 * runs, no other relevant code can run
3816 hdev
->asic_funcs
->disable_clock_gating(hdev
);
3818 /* SRAM scrambler must be initialized after CPU is running from HBM */
3819 gaudi_init_scrambler_sram(hdev
);
3821 /* This is here just in case we are working without CPU */
3822 gaudi_init_scrambler_hbm(hdev
);
3824 gaudi_init_golden_registers(hdev
);
3826 rc
= gaudi_mmu_init(hdev
);
3830 gaudi_init_security(hdev
);
3832 gaudi_init_mme_qmans(hdev
);
3834 gaudi_init_tpc_qmans(hdev
);
3836 gaudi_init_nic_qmans(hdev
);
3838 hdev
->asic_funcs
->set_clock_gating(hdev
);
3840 gaudi_enable_timestamp(hdev
);
3842 /* MSI must be enabled before CPU queues and NIC are initialized */
3843 rc
= gaudi_enable_msi(hdev
);
3845 goto disable_queues
;
3847 /* must be called after MSI was enabled */
3848 rc
= gaudi_init_cpu_queues(hdev
, GAUDI_CPU_TIMEOUT_USEC
);
3850 dev_err(hdev
->dev
, "failed to initialize CPU H/W queues %d\n",
3855 /* Perform read from the device to flush all configuration */
3861 gaudi_disable_msi(hdev
);
3863 gaudi_disable_mme_qmans(hdev
);
3864 gaudi_disable_pci_dma_qmans(hdev
);
3869 static void gaudi_hw_fini(struct hl_device
*hdev
, bool hard_reset
)
3871 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
3872 u32 status
, reset_timeout_ms
, cpu_timeout_ms
;
3875 dev_err(hdev
->dev
, "GAUDI doesn't support soft-reset\n");
3880 reset_timeout_ms
= GAUDI_PLDM_HRESET_TIMEOUT_MSEC
;
3881 cpu_timeout_ms
= GAUDI_PLDM_RESET_WAIT_MSEC
;
3883 reset_timeout_ms
= GAUDI_RESET_TIMEOUT_MSEC
;
3884 cpu_timeout_ms
= GAUDI_CPU_RESET_WAIT_MSEC
;
3887 /* Set device to handle FLR by H/W as we will put the device CPU to
3890 if (hdev
->asic_prop
.fw_security_disabled
&&
3891 !hdev
->asic_prop
.hard_reset_done_by_fw
)
3892 WREG32(mmPCIE_AUX_FLR_CTRL
, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK
|
3893 PCIE_AUX_FLR_CTRL_INT_MASK_MASK
));
3895 /* I don't know what is the state of the CPU so make sure it is
3896 * stopped in any means necessary
3898 if (hdev
->asic_prop
.hard_reset_done_by_fw
)
3899 WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU
, KMD_MSG_RST_DEV
);
3901 WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU
, KMD_MSG_GOTO_WFE
);
3903 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
, GAUDI_EVENT_HALT_MACHINE
);
3905 if (hdev
->asic_prop
.fw_security_disabled
&&
3906 !hdev
->asic_prop
.hard_reset_done_by_fw
) {
3908 /* Configure the reset registers. Must be done as early as
3909 * possible in case we fail during H/W initialization
3911 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H
,
3912 (CFG_RST_H_DMA_MASK
|
3913 CFG_RST_H_MME_MASK
|
3915 CFG_RST_H_TPC_7_MASK
));
3917 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L
, CFG_RST_L_TPC_MASK
);
3919 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H
,
3920 (CFG_RST_H_HBM_MASK
|
3921 CFG_RST_H_TPC_7_MASK
|
3922 CFG_RST_H_NIC_MASK
|
3924 CFG_RST_H_DMA_MASK
|
3925 CFG_RST_H_MME_MASK
|
3926 CFG_RST_H_CPU_MASK
|
3927 CFG_RST_H_MMU_MASK
));
3929 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L
,
3930 (CFG_RST_L_IF_MASK
|
3931 CFG_RST_L_PSOC_MASK
|
3932 CFG_RST_L_TPC_MASK
));
3934 msleep(cpu_timeout_ms
);
3936 /* Tell ASIC not to re-initialize PCIe */
3937 WREG32(mmPREBOOT_PCIE_EN
, LKD_HARD_RESET_MAGIC
);
3939 /* Restart BTL/BLR upon hard-reset */
3940 if (hdev
->asic_prop
.fw_security_disabled
)
3941 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START
, 1);
3943 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST
,
3944 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT
);
3947 "Issued HARD reset command, going to wait %dms\n",
3951 "Firmware performs HARD reset, going to wait %dms\n",
3956 * After hard reset, we can't poll the BTM_FSM register because the PSOC
3957 * itself is in reset. Need to wait until the reset is deasserted
3959 msleep(reset_timeout_ms
);
3961 status
= RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM
);
3962 if (status
& PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK
)
3964 "Timeout while waiting for device to reset 0x%x\n",
3968 gaudi
->hw_cap_initialized
&= ~(HW_CAP_CPU
| HW_CAP_CPU_Q
|
3969 HW_CAP_HBM
| HW_CAP_PCI_DMA
|
3970 HW_CAP_MME
| HW_CAP_TPC_MASK
|
3971 HW_CAP_HBM_DMA
| HW_CAP_PLL
|
3972 HW_CAP_NIC_MASK
| HW_CAP_MMU
|
3973 HW_CAP_SRAM_SCRAMBLER
|
3974 HW_CAP_HBM_SCRAMBLER
|
3977 memset(gaudi
->events_stat
, 0, sizeof(gaudi
->events_stat
));
3981 static int gaudi_suspend(struct hl_device
*hdev
)
3985 rc
= hl_fw_send_pci_access_msg(hdev
, CPUCP_PACKET_DISABLE_PCI_ACCESS
);
3987 dev_err(hdev
->dev
, "Failed to disable PCI access from CPU\n");
3992 static int gaudi_resume(struct hl_device
*hdev
)
3994 return gaudi_init_iatu(hdev
);
3997 static int gaudi_cb_mmap(struct hl_device
*hdev
, struct vm_area_struct
*vma
,
3998 void *cpu_addr
, dma_addr_t dma_addr
, size_t size
)
4002 vma
->vm_flags
|= VM_IO
| VM_PFNMAP
| VM_DONTEXPAND
| VM_DONTDUMP
|
4003 VM_DONTCOPY
| VM_NORESERVE
;
4005 rc
= dma_mmap_coherent(hdev
->dev
, vma
, cpu_addr
, dma_addr
, size
);
4007 dev_err(hdev
->dev
, "dma_mmap_coherent error %d", rc
);
4012 static void gaudi_ring_doorbell(struct hl_device
*hdev
, u32 hw_queue_id
, u32 pi
)
4014 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
4015 u32 db_reg_offset
, db_value
, dma_qm_offset
, q_off
;
4017 bool invalid_queue
= false;
4019 switch (hw_queue_id
) {
4020 case GAUDI_QUEUE_ID_DMA_0_0
...GAUDI_QUEUE_ID_DMA_0_3
:
4021 dma_id
= gaudi_dma_assignment
[GAUDI_PCI_DMA_1
];
4022 dma_qm_offset
= dma_id
* DMA_QMAN_OFFSET
;
4023 q_off
= dma_qm_offset
+ (hw_queue_id
& 0x3) * 4;
4024 db_reg_offset
= mmDMA0_QM_PQ_PI_0
+ q_off
;
4027 case GAUDI_QUEUE_ID_DMA_1_0
...GAUDI_QUEUE_ID_DMA_1_3
:
4028 dma_id
= gaudi_dma_assignment
[GAUDI_PCI_DMA_2
];
4029 dma_qm_offset
= dma_id
* DMA_QMAN_OFFSET
;
4030 q_off
= dma_qm_offset
+ (hw_queue_id
& 0x3) * 4;
4031 db_reg_offset
= mmDMA0_QM_PQ_PI_0
+ q_off
;
4034 case GAUDI_QUEUE_ID_DMA_2_0
...GAUDI_QUEUE_ID_DMA_2_3
:
4035 dma_id
= gaudi_dma_assignment
[GAUDI_HBM_DMA_1
];
4036 dma_qm_offset
= dma_id
* DMA_QMAN_OFFSET
;
4037 q_off
= dma_qm_offset
+ ((hw_queue_id
- 1) & 0x3) * 4;
4038 db_reg_offset
= mmDMA0_QM_PQ_PI_0
+ q_off
;
4041 case GAUDI_QUEUE_ID_DMA_3_0
...GAUDI_QUEUE_ID_DMA_3_3
:
4042 dma_id
= gaudi_dma_assignment
[GAUDI_HBM_DMA_2
];
4043 dma_qm_offset
= dma_id
* DMA_QMAN_OFFSET
;
4044 q_off
= dma_qm_offset
+ ((hw_queue_id
- 1) & 0x3) * 4;
4045 db_reg_offset
= mmDMA0_QM_PQ_PI_0
+ q_off
;
4048 case GAUDI_QUEUE_ID_DMA_4_0
...GAUDI_QUEUE_ID_DMA_4_3
:
4049 dma_id
= gaudi_dma_assignment
[GAUDI_HBM_DMA_3
];
4050 dma_qm_offset
= dma_id
* DMA_QMAN_OFFSET
;
4051 q_off
= dma_qm_offset
+ ((hw_queue_id
- 1) & 0x3) * 4;
4052 db_reg_offset
= mmDMA0_QM_PQ_PI_0
+ q_off
;
4055 case GAUDI_QUEUE_ID_DMA_5_0
...GAUDI_QUEUE_ID_DMA_5_3
:
4056 dma_id
= gaudi_dma_assignment
[GAUDI_HBM_DMA_4
];
4057 dma_qm_offset
= dma_id
* DMA_QMAN_OFFSET
;
4058 q_off
= dma_qm_offset
+ ((hw_queue_id
- 1) & 0x3) * 4;
4059 db_reg_offset
= mmDMA0_QM_PQ_PI_0
+ q_off
;
4062 case GAUDI_QUEUE_ID_DMA_6_0
...GAUDI_QUEUE_ID_DMA_6_3
:
4063 dma_id
= gaudi_dma_assignment
[GAUDI_HBM_DMA_5
];
4064 dma_qm_offset
= dma_id
* DMA_QMAN_OFFSET
;
4065 q_off
= dma_qm_offset
+ ((hw_queue_id
- 1) & 0x3) * 4;
4066 db_reg_offset
= mmDMA0_QM_PQ_PI_0
+ q_off
;
4069 case GAUDI_QUEUE_ID_DMA_7_0
...GAUDI_QUEUE_ID_DMA_7_3
:
4070 dma_id
= gaudi_dma_assignment
[GAUDI_HBM_DMA_6
];
4071 dma_qm_offset
= dma_id
* DMA_QMAN_OFFSET
;
4072 q_off
= dma_qm_offset
+ ((hw_queue_id
- 1) & 0x3) * 4;
4073 db_reg_offset
= mmDMA0_QM_PQ_PI_0
+ q_off
;
4076 case GAUDI_QUEUE_ID_CPU_PQ
:
4077 if (gaudi
->hw_cap_initialized
& HW_CAP_CPU_Q
)
4078 db_reg_offset
= mmCPU_IF_PF_PQ_PI
;
4080 invalid_queue
= true;
4083 case GAUDI_QUEUE_ID_MME_0_0
:
4084 db_reg_offset
= mmMME2_QM_PQ_PI_0
;
4087 case GAUDI_QUEUE_ID_MME_0_1
:
4088 db_reg_offset
= mmMME2_QM_PQ_PI_1
;
4091 case GAUDI_QUEUE_ID_MME_0_2
:
4092 db_reg_offset
= mmMME2_QM_PQ_PI_2
;
4095 case GAUDI_QUEUE_ID_MME_0_3
:
4096 db_reg_offset
= mmMME2_QM_PQ_PI_3
;
4099 case GAUDI_QUEUE_ID_MME_1_0
:
4100 db_reg_offset
= mmMME0_QM_PQ_PI_0
;
4103 case GAUDI_QUEUE_ID_MME_1_1
:
4104 db_reg_offset
= mmMME0_QM_PQ_PI_1
;
4107 case GAUDI_QUEUE_ID_MME_1_2
:
4108 db_reg_offset
= mmMME0_QM_PQ_PI_2
;
4111 case GAUDI_QUEUE_ID_MME_1_3
:
4112 db_reg_offset
= mmMME0_QM_PQ_PI_3
;
4115 case GAUDI_QUEUE_ID_TPC_0_0
:
4116 db_reg_offset
= mmTPC0_QM_PQ_PI_0
;
4119 case GAUDI_QUEUE_ID_TPC_0_1
:
4120 db_reg_offset
= mmTPC0_QM_PQ_PI_1
;
4123 case GAUDI_QUEUE_ID_TPC_0_2
:
4124 db_reg_offset
= mmTPC0_QM_PQ_PI_2
;
4127 case GAUDI_QUEUE_ID_TPC_0_3
:
4128 db_reg_offset
= mmTPC0_QM_PQ_PI_3
;
4131 case GAUDI_QUEUE_ID_TPC_1_0
:
4132 db_reg_offset
= mmTPC1_QM_PQ_PI_0
;
4135 case GAUDI_QUEUE_ID_TPC_1_1
:
4136 db_reg_offset
= mmTPC1_QM_PQ_PI_1
;
4139 case GAUDI_QUEUE_ID_TPC_1_2
:
4140 db_reg_offset
= mmTPC1_QM_PQ_PI_2
;
4143 case GAUDI_QUEUE_ID_TPC_1_3
:
4144 db_reg_offset
= mmTPC1_QM_PQ_PI_3
;
4147 case GAUDI_QUEUE_ID_TPC_2_0
:
4148 db_reg_offset
= mmTPC2_QM_PQ_PI_0
;
4151 case GAUDI_QUEUE_ID_TPC_2_1
:
4152 db_reg_offset
= mmTPC2_QM_PQ_PI_1
;
4155 case GAUDI_QUEUE_ID_TPC_2_2
:
4156 db_reg_offset
= mmTPC2_QM_PQ_PI_2
;
4159 case GAUDI_QUEUE_ID_TPC_2_3
:
4160 db_reg_offset
= mmTPC2_QM_PQ_PI_3
;
4163 case GAUDI_QUEUE_ID_TPC_3_0
:
4164 db_reg_offset
= mmTPC3_QM_PQ_PI_0
;
4167 case GAUDI_QUEUE_ID_TPC_3_1
:
4168 db_reg_offset
= mmTPC3_QM_PQ_PI_1
;
4171 case GAUDI_QUEUE_ID_TPC_3_2
:
4172 db_reg_offset
= mmTPC3_QM_PQ_PI_2
;
4175 case GAUDI_QUEUE_ID_TPC_3_3
:
4176 db_reg_offset
= mmTPC3_QM_PQ_PI_3
;
4179 case GAUDI_QUEUE_ID_TPC_4_0
:
4180 db_reg_offset
= mmTPC4_QM_PQ_PI_0
;
4183 case GAUDI_QUEUE_ID_TPC_4_1
:
4184 db_reg_offset
= mmTPC4_QM_PQ_PI_1
;
4187 case GAUDI_QUEUE_ID_TPC_4_2
:
4188 db_reg_offset
= mmTPC4_QM_PQ_PI_2
;
4191 case GAUDI_QUEUE_ID_TPC_4_3
:
4192 db_reg_offset
= mmTPC4_QM_PQ_PI_3
;
4195 case GAUDI_QUEUE_ID_TPC_5_0
:
4196 db_reg_offset
= mmTPC5_QM_PQ_PI_0
;
4199 case GAUDI_QUEUE_ID_TPC_5_1
:
4200 db_reg_offset
= mmTPC5_QM_PQ_PI_1
;
4203 case GAUDI_QUEUE_ID_TPC_5_2
:
4204 db_reg_offset
= mmTPC5_QM_PQ_PI_2
;
4207 case GAUDI_QUEUE_ID_TPC_5_3
:
4208 db_reg_offset
= mmTPC5_QM_PQ_PI_3
;
4211 case GAUDI_QUEUE_ID_TPC_6_0
:
4212 db_reg_offset
= mmTPC6_QM_PQ_PI_0
;
4215 case GAUDI_QUEUE_ID_TPC_6_1
:
4216 db_reg_offset
= mmTPC6_QM_PQ_PI_1
;
4219 case GAUDI_QUEUE_ID_TPC_6_2
:
4220 db_reg_offset
= mmTPC6_QM_PQ_PI_2
;
4223 case GAUDI_QUEUE_ID_TPC_6_3
:
4224 db_reg_offset
= mmTPC6_QM_PQ_PI_3
;
4227 case GAUDI_QUEUE_ID_TPC_7_0
:
4228 db_reg_offset
= mmTPC7_QM_PQ_PI_0
;
4231 case GAUDI_QUEUE_ID_TPC_7_1
:
4232 db_reg_offset
= mmTPC7_QM_PQ_PI_1
;
4235 case GAUDI_QUEUE_ID_TPC_7_2
:
4236 db_reg_offset
= mmTPC7_QM_PQ_PI_2
;
4239 case GAUDI_QUEUE_ID_TPC_7_3
:
4240 db_reg_offset
= mmTPC7_QM_PQ_PI_3
;
4243 case GAUDI_QUEUE_ID_NIC_0_0
:
4244 db_reg_offset
= mmNIC0_QM0_PQ_PI_0
;
4247 case GAUDI_QUEUE_ID_NIC_0_1
:
4248 db_reg_offset
= mmNIC0_QM0_PQ_PI_1
;
4251 case GAUDI_QUEUE_ID_NIC_0_2
:
4252 db_reg_offset
= mmNIC0_QM0_PQ_PI_2
;
4255 case GAUDI_QUEUE_ID_NIC_0_3
:
4256 db_reg_offset
= mmNIC0_QM0_PQ_PI_3
;
4259 case GAUDI_QUEUE_ID_NIC_1_0
:
4260 db_reg_offset
= mmNIC0_QM1_PQ_PI_0
;
4263 case GAUDI_QUEUE_ID_NIC_1_1
:
4264 db_reg_offset
= mmNIC0_QM1_PQ_PI_1
;
4267 case GAUDI_QUEUE_ID_NIC_1_2
:
4268 db_reg_offset
= mmNIC0_QM1_PQ_PI_2
;
4271 case GAUDI_QUEUE_ID_NIC_1_3
:
4272 db_reg_offset
= mmNIC0_QM1_PQ_PI_3
;
4275 case GAUDI_QUEUE_ID_NIC_2_0
:
4276 db_reg_offset
= mmNIC1_QM0_PQ_PI_0
;
4279 case GAUDI_QUEUE_ID_NIC_2_1
:
4280 db_reg_offset
= mmNIC1_QM0_PQ_PI_1
;
4283 case GAUDI_QUEUE_ID_NIC_2_2
:
4284 db_reg_offset
= mmNIC1_QM0_PQ_PI_2
;
4287 case GAUDI_QUEUE_ID_NIC_2_3
:
4288 db_reg_offset
= mmNIC1_QM0_PQ_PI_3
;
4291 case GAUDI_QUEUE_ID_NIC_3_0
:
4292 db_reg_offset
= mmNIC1_QM1_PQ_PI_0
;
4295 case GAUDI_QUEUE_ID_NIC_3_1
:
4296 db_reg_offset
= mmNIC1_QM1_PQ_PI_1
;
4299 case GAUDI_QUEUE_ID_NIC_3_2
:
4300 db_reg_offset
= mmNIC1_QM1_PQ_PI_2
;
4303 case GAUDI_QUEUE_ID_NIC_3_3
:
4304 db_reg_offset
= mmNIC1_QM1_PQ_PI_3
;
4307 case GAUDI_QUEUE_ID_NIC_4_0
:
4308 db_reg_offset
= mmNIC2_QM0_PQ_PI_0
;
4311 case GAUDI_QUEUE_ID_NIC_4_1
:
4312 db_reg_offset
= mmNIC2_QM0_PQ_PI_1
;
4315 case GAUDI_QUEUE_ID_NIC_4_2
:
4316 db_reg_offset
= mmNIC2_QM0_PQ_PI_2
;
4319 case GAUDI_QUEUE_ID_NIC_4_3
:
4320 db_reg_offset
= mmNIC2_QM0_PQ_PI_3
;
4323 case GAUDI_QUEUE_ID_NIC_5_0
:
4324 db_reg_offset
= mmNIC2_QM1_PQ_PI_0
;
4327 case GAUDI_QUEUE_ID_NIC_5_1
:
4328 db_reg_offset
= mmNIC2_QM1_PQ_PI_1
;
4331 case GAUDI_QUEUE_ID_NIC_5_2
:
4332 db_reg_offset
= mmNIC2_QM1_PQ_PI_2
;
4335 case GAUDI_QUEUE_ID_NIC_5_3
:
4336 db_reg_offset
= mmNIC2_QM1_PQ_PI_3
;
4339 case GAUDI_QUEUE_ID_NIC_6_0
:
4340 db_reg_offset
= mmNIC3_QM0_PQ_PI_0
;
4343 case GAUDI_QUEUE_ID_NIC_6_1
:
4344 db_reg_offset
= mmNIC3_QM0_PQ_PI_1
;
4347 case GAUDI_QUEUE_ID_NIC_6_2
:
4348 db_reg_offset
= mmNIC3_QM0_PQ_PI_2
;
4351 case GAUDI_QUEUE_ID_NIC_6_3
:
4352 db_reg_offset
= mmNIC3_QM0_PQ_PI_3
;
4355 case GAUDI_QUEUE_ID_NIC_7_0
:
4356 db_reg_offset
= mmNIC3_QM1_PQ_PI_0
;
4359 case GAUDI_QUEUE_ID_NIC_7_1
:
4360 db_reg_offset
= mmNIC3_QM1_PQ_PI_1
;
4363 case GAUDI_QUEUE_ID_NIC_7_2
:
4364 db_reg_offset
= mmNIC3_QM1_PQ_PI_2
;
4367 case GAUDI_QUEUE_ID_NIC_7_3
:
4368 db_reg_offset
= mmNIC3_QM1_PQ_PI_3
;
4371 case GAUDI_QUEUE_ID_NIC_8_0
:
4372 db_reg_offset
= mmNIC4_QM0_PQ_PI_0
;
4375 case GAUDI_QUEUE_ID_NIC_8_1
:
4376 db_reg_offset
= mmNIC4_QM0_PQ_PI_1
;
4379 case GAUDI_QUEUE_ID_NIC_8_2
:
4380 db_reg_offset
= mmNIC4_QM0_PQ_PI_2
;
4383 case GAUDI_QUEUE_ID_NIC_8_3
:
4384 db_reg_offset
= mmNIC4_QM0_PQ_PI_3
;
4387 case GAUDI_QUEUE_ID_NIC_9_0
:
4388 db_reg_offset
= mmNIC4_QM1_PQ_PI_0
;
4391 case GAUDI_QUEUE_ID_NIC_9_1
:
4392 db_reg_offset
= mmNIC4_QM1_PQ_PI_1
;
4395 case GAUDI_QUEUE_ID_NIC_9_2
:
4396 db_reg_offset
= mmNIC4_QM1_PQ_PI_2
;
4399 case GAUDI_QUEUE_ID_NIC_9_3
:
4400 db_reg_offset
= mmNIC4_QM1_PQ_PI_3
;
4404 invalid_queue
= true;
4407 if (invalid_queue
) {
4408 /* Should never get here */
4409 dev_err(hdev
->dev
, "h/w queue %d is invalid. Can't set pi\n",
4416 /* ring the doorbell */
4417 WREG32(db_reg_offset
, db_value
);
4419 if (hw_queue_id
== GAUDI_QUEUE_ID_CPU_PQ
)
4420 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR
,
4421 GAUDI_EVENT_PI_UPDATE
);
4424 static void gaudi_pqe_write(struct hl_device
*hdev
, __le64
*pqe
,
4427 __le64
*pbd
= (__le64
*) bd
;
4429 /* The QMANs are on the host memory so a simple copy suffice */
4434 static void *gaudi_dma_alloc_coherent(struct hl_device
*hdev
, size_t size
,
4435 dma_addr_t
*dma_handle
, gfp_t flags
)
4437 void *kernel_addr
= dma_alloc_coherent(&hdev
->pdev
->dev
, size
,
4440 /* Shift to the device's base physical address of host memory */
4442 *dma_handle
+= HOST_PHYS_BASE
;
4447 static void gaudi_dma_free_coherent(struct hl_device
*hdev
, size_t size
,
4448 void *cpu_addr
, dma_addr_t dma_handle
)
4450 /* Cancel the device's base physical address of host memory */
4451 dma_addr_t fixed_dma_handle
= dma_handle
- HOST_PHYS_BASE
;
4453 dma_free_coherent(&hdev
->pdev
->dev
, size
, cpu_addr
, fixed_dma_handle
);
4456 static int gaudi_hbm_scrubbing(struct hl_device
*hdev
)
4458 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
4459 u64 cur_addr
= DRAM_BASE_ADDR_USER
;
4464 while (cur_addr
< prop
->dram_end_address
) {
4465 for (dma_id
= 0 ; dma_id
< DMA_NUMBER_OF_CHANNELS
; dma_id
++) {
4466 u32 dma_offset
= dma_id
* DMA_CORE_OFFSET
;
4469 min((u64
)SZ_2G
, prop
->dram_end_address
- cur_addr
);
4472 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4473 cur_addr
, cur_addr
+ chunk_size
);
4475 WREG32(mmDMA0_CORE_SRC_BASE_LO
+ dma_offset
, 0);
4476 WREG32(mmDMA0_CORE_SRC_BASE_HI
+ dma_offset
, 0);
4477 WREG32(mmDMA0_CORE_DST_BASE_LO
+ dma_offset
,
4478 lower_32_bits(cur_addr
));
4479 WREG32(mmDMA0_CORE_DST_BASE_HI
+ dma_offset
,
4480 upper_32_bits(cur_addr
));
4481 WREG32(mmDMA0_CORE_DST_TSIZE_0
+ dma_offset
,
4483 WREG32(mmDMA0_CORE_COMMIT
+ dma_offset
,
4484 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT
) |
4485 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT
)));
4487 cur_addr
+= chunk_size
;
4489 if (cur_addr
== prop
->dram_end_address
)
4493 for (dma_id
= 0 ; dma_id
< DMA_NUMBER_OF_CHANNELS
; dma_id
++) {
4494 u32 dma_offset
= dma_id
* DMA_CORE_OFFSET
;
4496 rc
= hl_poll_timeout(
4498 mmDMA0_CORE_STS0
+ dma_offset
,
4500 ((val
& DMA0_CORE_STS0_BUSY_MASK
) == 0),
4502 HBM_SCRUBBING_TIMEOUT_US
);
4506 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4516 static int gaudi_scrub_device_mem(struct hl_device
*hdev
, u64 addr
, u64 size
)
4518 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
4519 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
4524 if (!hdev
->memory_scrub
)
4527 if (!addr
&& !size
) {
4528 /* Wait till device is idle */
4529 rc
= hl_poll_timeout(
4531 mmDMA0_CORE_STS0
/* dummy */,
4533 (hdev
->asic_funcs
->is_device_idle(hdev
,
4536 HBM_SCRUBBING_TIMEOUT_US
);
4538 dev_err(hdev
->dev
, "waiting for idle timeout\n");
4543 addr
= prop
->sram_user_base_address
;
4544 size
= hdev
->pldm
? 0x10000 :
4545 (prop
->sram_size
- SRAM_USER_BASE_OFFSET
);
4546 val
= 0x7777777777777777ull
;
4548 rc
= gaudi_memset_device_memory(hdev
, addr
, size
, val
);
4551 "Failed to clear SRAM in mem scrub all\n");
4555 mutex_lock(&gaudi
->clk_gate_mutex
);
4556 hdev
->asic_funcs
->disable_clock_gating(hdev
);
4558 /* Scrub HBM using all DMA channels in parallel */
4559 rc
= gaudi_hbm_scrubbing(hdev
);
4562 "Failed to clear HBM in mem scrub all\n");
4564 hdev
->asic_funcs
->set_clock_gating(hdev
);
4565 mutex_unlock(&gaudi
->clk_gate_mutex
);
4571 static void *gaudi_get_int_queue_base(struct hl_device
*hdev
,
4572 u32 queue_id
, dma_addr_t
*dma_handle
,
4575 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
4576 struct gaudi_internal_qman_info
*q
;
4578 if (queue_id
>= GAUDI_QUEUE_ID_SIZE
||
4579 gaudi_queue_type
[queue_id
] != QUEUE_TYPE_INT
) {
4580 dev_err(hdev
->dev
, "Got invalid queue id %d\n", queue_id
);
4584 q
= &gaudi
->internal_qmans
[queue_id
];
4585 *dma_handle
= q
->pq_dma_addr
;
4586 *queue_len
= q
->pq_size
/ QMAN_PQ_ENTRY_SIZE
;
4588 return q
->pq_kernel_addr
;
4591 static int gaudi_send_cpu_message(struct hl_device
*hdev
, u32
*msg
,
4592 u16 len
, u32 timeout
, u64
*result
)
4594 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
4596 if (!(gaudi
->hw_cap_initialized
& HW_CAP_CPU_Q
)) {
4603 timeout
= GAUDI_MSG_TO_CPU_TIMEOUT_USEC
;
4605 return hl_fw_send_cpu_message(hdev
, GAUDI_QUEUE_ID_CPU_PQ
, msg
, len
,
4609 static int gaudi_test_queue(struct hl_device
*hdev
, u32 hw_queue_id
)
4611 struct packet_msg_prot
*fence_pkt
;
4612 dma_addr_t pkt_dma_addr
;
4613 u32 fence_val
, tmp
, timeout_usec
;
4614 dma_addr_t fence_dma_addr
;
4619 timeout_usec
= GAUDI_PLDM_TEST_QUEUE_WAIT_USEC
;
4621 timeout_usec
= GAUDI_TEST_QUEUE_WAIT_USEC
;
4623 fence_val
= GAUDI_QMAN0_FENCE_VAL
;
4625 fence_ptr
= hdev
->asic_funcs
->asic_dma_pool_zalloc(hdev
, 4, GFP_KERNEL
,
4629 "Failed to allocate memory for H/W queue %d testing\n",
4636 fence_pkt
= hdev
->asic_funcs
->asic_dma_pool_zalloc(hdev
,
4637 sizeof(struct packet_msg_prot
),
4638 GFP_KERNEL
, &pkt_dma_addr
);
4641 "Failed to allocate packet for H/W queue %d testing\n",
4644 goto free_fence_ptr
;
4647 tmp
= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK
, PACKET_MSG_PROT
);
4648 tmp
|= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK
, 1);
4649 tmp
|= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK
, 1);
4651 fence_pkt
->ctl
= cpu_to_le32(tmp
);
4652 fence_pkt
->value
= cpu_to_le32(fence_val
);
4653 fence_pkt
->addr
= cpu_to_le64(fence_dma_addr
);
4655 rc
= hl_hw_queue_send_cb_no_cmpl(hdev
, hw_queue_id
,
4656 sizeof(struct packet_msg_prot
),
4660 "Failed to send fence packet to H/W queue %d\n",
4665 rc
= hl_poll_timeout_memory(hdev
, fence_ptr
, tmp
, (tmp
== fence_val
),
4666 1000, timeout_usec
, true);
4668 hl_hw_queue_inc_ci_kernel(hdev
, hw_queue_id
);
4670 if (rc
== -ETIMEDOUT
) {
4672 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4673 hw_queue_id
, (unsigned long long) fence_dma_addr
, tmp
);
4678 hdev
->asic_funcs
->asic_dma_pool_free(hdev
, (void *) fence_pkt
,
4681 hdev
->asic_funcs
->asic_dma_pool_free(hdev
, (void *) fence_ptr
,
4686 static int gaudi_test_cpu_queue(struct hl_device
*hdev
)
4688 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
4691 * check capability here as send_cpu_message() won't update the result
4692 * value if no capability
4694 if (!(gaudi
->hw_cap_initialized
& HW_CAP_CPU_Q
))
4697 return hl_fw_test_cpu_queue(hdev
);
4700 static int gaudi_test_queues(struct hl_device
*hdev
)
4702 int i
, rc
, ret_val
= 0;
4704 for (i
= 0 ; i
< hdev
->asic_prop
.max_queues
; i
++) {
4705 if (hdev
->asic_prop
.hw_queues_props
[i
].type
== QUEUE_TYPE_EXT
) {
4706 rc
= gaudi_test_queue(hdev
, i
);
4712 rc
= gaudi_test_cpu_queue(hdev
);
4719 static void *gaudi_dma_pool_zalloc(struct hl_device
*hdev
, size_t size
,
4720 gfp_t mem_flags
, dma_addr_t
*dma_handle
)
4724 if (size
> GAUDI_DMA_POOL_BLK_SIZE
)
4727 kernel_addr
= dma_pool_zalloc(hdev
->dma_pool
, mem_flags
, dma_handle
);
4729 /* Shift to the device's base physical address of host memory */
4731 *dma_handle
+= HOST_PHYS_BASE
;
4736 static void gaudi_dma_pool_free(struct hl_device
*hdev
, void *vaddr
,
4737 dma_addr_t dma_addr
)
4739 /* Cancel the device's base physical address of host memory */
4740 dma_addr_t fixed_dma_addr
= dma_addr
- HOST_PHYS_BASE
;
4742 dma_pool_free(hdev
->dma_pool
, vaddr
, fixed_dma_addr
);
4745 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device
*hdev
,
4746 size_t size
, dma_addr_t
*dma_handle
)
4748 return hl_fw_cpu_accessible_dma_pool_alloc(hdev
, size
, dma_handle
);
4751 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device
*hdev
,
4752 size_t size
, void *vaddr
)
4754 hl_fw_cpu_accessible_dma_pool_free(hdev
, size
, vaddr
);
4757 static int gaudi_dma_map_sg(struct hl_device
*hdev
, struct scatterlist
*sgl
,
4758 int nents
, enum dma_data_direction dir
)
4760 struct scatterlist
*sg
;
4763 if (!dma_map_sg(&hdev
->pdev
->dev
, sgl
, nents
, dir
))
4766 /* Shift to the device's base physical address of host memory */
4767 for_each_sg(sgl
, sg
, nents
, i
)
4768 sg
->dma_address
+= HOST_PHYS_BASE
;
4773 static void gaudi_dma_unmap_sg(struct hl_device
*hdev
, struct scatterlist
*sgl
,
4774 int nents
, enum dma_data_direction dir
)
4776 struct scatterlist
*sg
;
4779 /* Cancel the device's base physical address of host memory */
4780 for_each_sg(sgl
, sg
, nents
, i
)
4781 sg
->dma_address
-= HOST_PHYS_BASE
;
4783 dma_unmap_sg(&hdev
->pdev
->dev
, sgl
, nents
, dir
);
4786 static u32
gaudi_get_dma_desc_list_size(struct hl_device
*hdev
,
4787 struct sg_table
*sgt
)
4789 struct scatterlist
*sg
, *sg_next_iter
;
4790 u32 count
, dma_desc_cnt
;
4792 dma_addr_t addr
, addr_next
;
4796 for_each_sg(sgt
->sgl
, sg
, sgt
->nents
, count
) {
4798 len
= sg_dma_len(sg
);
4799 addr
= sg_dma_address(sg
);
4804 while ((count
+ 1) < sgt
->nents
) {
4805 sg_next_iter
= sg_next(sg
);
4806 len_next
= sg_dma_len(sg_next_iter
);
4807 addr_next
= sg_dma_address(sg_next_iter
);
4812 if ((addr
+ len
== addr_next
) &&
4813 (len
+ len_next
<= DMA_MAX_TRANSFER_SIZE
)) {
4825 return dma_desc_cnt
* sizeof(struct packet_lin_dma
);
4828 static int gaudi_pin_memory_before_cs(struct hl_device
*hdev
,
4829 struct hl_cs_parser
*parser
,
4830 struct packet_lin_dma
*user_dma_pkt
,
4831 u64 addr
, enum dma_data_direction dir
)
4833 struct hl_userptr
*userptr
;
4836 if (hl_userptr_is_pinned(hdev
, addr
, le32_to_cpu(user_dma_pkt
->tsize
),
4837 parser
->job_userptr_list
, &userptr
))
4838 goto already_pinned
;
4840 userptr
= kzalloc(sizeof(*userptr
), GFP_ATOMIC
);
4844 rc
= hl_pin_host_memory(hdev
, addr
, le32_to_cpu(user_dma_pkt
->tsize
),
4849 list_add_tail(&userptr
->job_node
, parser
->job_userptr_list
);
4851 rc
= hdev
->asic_funcs
->asic_dma_map_sg(hdev
, userptr
->sgt
->sgl
,
4852 userptr
->sgt
->nents
, dir
);
4854 dev_err(hdev
->dev
, "failed to map sgt with DMA region\n");
4858 userptr
->dma_mapped
= true;
4862 parser
->patched_cb_size
+=
4863 gaudi_get_dma_desc_list_size(hdev
, userptr
->sgt
);
4868 hl_unpin_host_memory(hdev
, userptr
);
4874 static int gaudi_validate_dma_pkt_host(struct hl_device
*hdev
,
4875 struct hl_cs_parser
*parser
,
4876 struct packet_lin_dma
*user_dma_pkt
,
4879 enum dma_data_direction dir
;
4880 bool skip_host_mem_pin
= false, user_memset
;
4884 user_memset
= (le32_to_cpu(user_dma_pkt
->ctl
) &
4885 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK
) >>
4886 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT
;
4890 skip_host_mem_pin
= true;
4892 dev_dbg(hdev
->dev
, "DMA direction is HOST --> DEVICE\n");
4893 dir
= DMA_TO_DEVICE
;
4894 addr
= le64_to_cpu(user_dma_pkt
->src_addr
);
4896 dev_dbg(hdev
->dev
, "DMA direction is DEVICE --> HOST\n");
4897 dir
= DMA_FROM_DEVICE
;
4898 addr
= (le64_to_cpu(user_dma_pkt
->dst_addr
) &
4899 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK
) >>
4900 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT
;
4903 if (skip_host_mem_pin
)
4904 parser
->patched_cb_size
+= sizeof(*user_dma_pkt
);
4906 rc
= gaudi_pin_memory_before_cs(hdev
, parser
, user_dma_pkt
,
4912 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device
*hdev
,
4913 struct hl_cs_parser
*parser
,
4914 struct packet_lin_dma
*user_dma_pkt
)
4916 bool src_in_host
= false;
4917 u64 dst_addr
= (le64_to_cpu(user_dma_pkt
->dst_addr
) &
4918 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK
) >>
4919 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT
;
4921 dev_dbg(hdev
->dev
, "DMA packet details:\n");
4922 dev_dbg(hdev
->dev
, "source == 0x%llx\n",
4923 le64_to_cpu(user_dma_pkt
->src_addr
));
4924 dev_dbg(hdev
->dev
, "destination == 0x%llx\n", dst_addr
);
4925 dev_dbg(hdev
->dev
, "size == %u\n", le32_to_cpu(user_dma_pkt
->tsize
));
4928 * Special handling for DMA with size 0. Bypass all validations
4929 * because no transactions will be done except for WR_COMP, which
4930 * is not a security issue
4932 if (!le32_to_cpu(user_dma_pkt
->tsize
)) {
4933 parser
->patched_cb_size
+= sizeof(*user_dma_pkt
);
4937 if (parser
->hw_queue_id
<= GAUDI_QUEUE_ID_DMA_0_3
)
4940 return gaudi_validate_dma_pkt_host(hdev
, parser
, user_dma_pkt
,
4944 static int gaudi_validate_load_and_exe_pkt(struct hl_device
*hdev
,
4945 struct hl_cs_parser
*parser
,
4946 struct packet_load_and_exe
*user_pkt
)
4950 cfg
= le32_to_cpu(user_pkt
->cfg
);
4952 if (cfg
& GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK
) {
4954 "User not allowed to use Load and Execute\n");
4958 parser
->patched_cb_size
+= sizeof(struct packet_load_and_exe
);
4963 static int gaudi_validate_cb(struct hl_device
*hdev
,
4964 struct hl_cs_parser
*parser
, bool is_mmu
)
4966 u32 cb_parsed_length
= 0;
4969 parser
->patched_cb_size
= 0;
4971 /* cb_user_size is more than 0 so loop will always be executed */
4972 while (cb_parsed_length
< parser
->user_cb_size
) {
4973 enum packet_id pkt_id
;
4975 struct gaudi_packet
*user_pkt
;
4977 user_pkt
= parser
->user_cb
->kernel_address
+ cb_parsed_length
;
4979 pkt_id
= (enum packet_id
) (
4980 (le64_to_cpu(user_pkt
->header
) &
4981 PACKET_HEADER_PACKET_ID_MASK
) >>
4982 PACKET_HEADER_PACKET_ID_SHIFT
);
4984 if (!validate_packet_id(pkt_id
)) {
4985 dev_err(hdev
->dev
, "Invalid packet id %u\n", pkt_id
);
4990 pkt_size
= gaudi_packet_sizes
[pkt_id
];
4991 cb_parsed_length
+= pkt_size
;
4992 if (cb_parsed_length
> parser
->user_cb_size
) {
4994 "packet 0x%x is out of CB boundary\n", pkt_id
);
5000 case PACKET_MSG_PROT
:
5002 "User not allowed to use MSG_PROT\n");
5007 dev_err(hdev
->dev
, "User not allowed to use CP_DMA\n");
5012 dev_err(hdev
->dev
, "User not allowed to use STOP\n");
5016 case PACKET_WREG_BULK
:
5018 "User not allowed to use WREG_BULK\n");
5022 case PACKET_LOAD_AND_EXE
:
5023 rc
= gaudi_validate_load_and_exe_pkt(hdev
, parser
,
5024 (struct packet_load_and_exe
*) user_pkt
);
5027 case PACKET_LIN_DMA
:
5028 parser
->contains_dma_pkt
= true;
5030 parser
->patched_cb_size
+= pkt_size
;
5032 rc
= gaudi_validate_dma_pkt_no_mmu(hdev
, parser
,
5033 (struct packet_lin_dma
*) user_pkt
);
5036 case PACKET_WREG_32
:
5037 case PACKET_MSG_LONG
:
5038 case PACKET_MSG_SHORT
:
5042 case PACKET_ARB_POINT
:
5043 parser
->patched_cb_size
+= pkt_size
;
5047 dev_err(hdev
->dev
, "Invalid packet header 0x%x\n",
5058 * The new CB should have space at the end for two MSG_PROT packets:
5059 * 1. A packet that will act as a completion packet
5060 * 2. A packet that will generate MSI-X interrupt
5062 parser
->patched_cb_size
+= sizeof(struct packet_msg_prot
) * 2;
5067 static int gaudi_patch_dma_packet(struct hl_device
*hdev
,
5068 struct hl_cs_parser
*parser
,
5069 struct packet_lin_dma
*user_dma_pkt
,
5070 struct packet_lin_dma
*new_dma_pkt
,
5071 u32
*new_dma_pkt_size
)
5073 struct hl_userptr
*userptr
;
5074 struct scatterlist
*sg
, *sg_next_iter
;
5075 u32 count
, dma_desc_cnt
, user_wrcomp_en_mask
, ctl
;
5077 dma_addr_t dma_addr
, dma_addr_next
;
5078 u64 device_memory_addr
, addr
;
5079 enum dma_data_direction dir
;
5080 struct sg_table
*sgt
;
5081 bool src_in_host
= false;
5082 bool skip_host_mem_pin
= false;
5085 ctl
= le32_to_cpu(user_dma_pkt
->ctl
);
5087 if (parser
->hw_queue_id
<= GAUDI_QUEUE_ID_DMA_0_3
)
5090 user_memset
= (ctl
& GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK
) >>
5091 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT
;
5094 addr
= le64_to_cpu(user_dma_pkt
->src_addr
);
5095 device_memory_addr
= le64_to_cpu(user_dma_pkt
->dst_addr
);
5096 dir
= DMA_TO_DEVICE
;
5098 skip_host_mem_pin
= true;
5100 addr
= le64_to_cpu(user_dma_pkt
->dst_addr
);
5101 device_memory_addr
= le64_to_cpu(user_dma_pkt
->src_addr
);
5102 dir
= DMA_FROM_DEVICE
;
5105 if ((!skip_host_mem_pin
) &&
5106 (!hl_userptr_is_pinned(hdev
, addr
,
5107 le32_to_cpu(user_dma_pkt
->tsize
),
5108 parser
->job_userptr_list
, &userptr
))) {
5109 dev_err(hdev
->dev
, "Userptr 0x%llx + 0x%x NOT mapped\n",
5110 addr
, user_dma_pkt
->tsize
);
5114 if ((user_memset
) && (dir
== DMA_TO_DEVICE
)) {
5115 memcpy(new_dma_pkt
, user_dma_pkt
, sizeof(*user_dma_pkt
));
5116 *new_dma_pkt_size
= sizeof(*user_dma_pkt
);
5120 user_wrcomp_en_mask
= ctl
& GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK
;
5125 for_each_sg(sgt
->sgl
, sg
, sgt
->nents
, count
) {
5126 len
= sg_dma_len(sg
);
5127 dma_addr
= sg_dma_address(sg
);
5132 while ((count
+ 1) < sgt
->nents
) {
5133 sg_next_iter
= sg_next(sg
);
5134 len_next
= sg_dma_len(sg_next_iter
);
5135 dma_addr_next
= sg_dma_address(sg_next_iter
);
5140 if ((dma_addr
+ len
== dma_addr_next
) &&
5141 (len
+ len_next
<= DMA_MAX_TRANSFER_SIZE
)) {
5150 ctl
= le32_to_cpu(user_dma_pkt
->ctl
);
5151 if (likely(dma_desc_cnt
))
5152 ctl
&= ~GAUDI_PKT_CTL_EB_MASK
;
5153 ctl
&= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK
;
5154 new_dma_pkt
->ctl
= cpu_to_le32(ctl
);
5155 new_dma_pkt
->tsize
= cpu_to_le32(len
);
5157 if (dir
== DMA_TO_DEVICE
) {
5158 new_dma_pkt
->src_addr
= cpu_to_le64(dma_addr
);
5159 new_dma_pkt
->dst_addr
= cpu_to_le64(device_memory_addr
);
5161 new_dma_pkt
->src_addr
= cpu_to_le64(device_memory_addr
);
5162 new_dma_pkt
->dst_addr
= cpu_to_le64(dma_addr
);
5166 device_memory_addr
+= len
;
5171 if (!dma_desc_cnt
) {
5173 "Error of 0 SG entries when patching DMA packet\n");
5177 /* Fix the last dma packet - wrcomp must be as user set it */
5179 new_dma_pkt
->ctl
|= cpu_to_le32(user_wrcomp_en_mask
);
5181 *new_dma_pkt_size
= dma_desc_cnt
* sizeof(struct packet_lin_dma
);
5186 static int gaudi_patch_cb(struct hl_device
*hdev
,
5187 struct hl_cs_parser
*parser
)
5189 u32 cb_parsed_length
= 0;
5190 u32 cb_patched_cur_length
= 0;
5193 /* cb_user_size is more than 0 so loop will always be executed */
5194 while (cb_parsed_length
< parser
->user_cb_size
) {
5195 enum packet_id pkt_id
;
5197 u32 new_pkt_size
= 0;
5198 struct gaudi_packet
*user_pkt
, *kernel_pkt
;
5200 user_pkt
= parser
->user_cb
->kernel_address
+ cb_parsed_length
;
5201 kernel_pkt
= parser
->patched_cb
->kernel_address
+
5202 cb_patched_cur_length
;
5204 pkt_id
= (enum packet_id
) (
5205 (le64_to_cpu(user_pkt
->header
) &
5206 PACKET_HEADER_PACKET_ID_MASK
) >>
5207 PACKET_HEADER_PACKET_ID_SHIFT
);
5209 if (!validate_packet_id(pkt_id
)) {
5210 dev_err(hdev
->dev
, "Invalid packet id %u\n", pkt_id
);
5215 pkt_size
= gaudi_packet_sizes
[pkt_id
];
5216 cb_parsed_length
+= pkt_size
;
5217 if (cb_parsed_length
> parser
->user_cb_size
) {
5219 "packet 0x%x is out of CB boundary\n", pkt_id
);
5225 case PACKET_LIN_DMA
:
5226 rc
= gaudi_patch_dma_packet(hdev
, parser
,
5227 (struct packet_lin_dma
*) user_pkt
,
5228 (struct packet_lin_dma
*) kernel_pkt
,
5230 cb_patched_cur_length
+= new_pkt_size
;
5233 case PACKET_MSG_PROT
:
5235 "User not allowed to use MSG_PROT\n");
5240 dev_err(hdev
->dev
, "User not allowed to use CP_DMA\n");
5245 dev_err(hdev
->dev
, "User not allowed to use STOP\n");
5249 case PACKET_WREG_32
:
5250 case PACKET_WREG_BULK
:
5251 case PACKET_MSG_LONG
:
5252 case PACKET_MSG_SHORT
:
5256 case PACKET_ARB_POINT
:
5257 case PACKET_LOAD_AND_EXE
:
5258 memcpy(kernel_pkt
, user_pkt
, pkt_size
);
5259 cb_patched_cur_length
+= pkt_size
;
5263 dev_err(hdev
->dev
, "Invalid packet header 0x%x\n",
5276 static int gaudi_parse_cb_mmu(struct hl_device
*hdev
,
5277 struct hl_cs_parser
*parser
)
5279 u64 patched_cb_handle
;
5280 u32 patched_cb_size
;
5281 struct hl_cb
*user_cb
;
5285 * The new CB should have space at the end for two MSG_PROT pkt:
5286 * 1. A packet that will act as a completion packet
5287 * 2. A packet that will generate MSI interrupt
5289 parser
->patched_cb_size
= parser
->user_cb_size
+
5290 sizeof(struct packet_msg_prot
) * 2;
5292 rc
= hl_cb_create(hdev
, &hdev
->kernel_cb_mgr
, hdev
->kernel_ctx
,
5293 parser
->patched_cb_size
, false, false,
5294 &patched_cb_handle
);
5298 "Failed to allocate patched CB for DMA CS %d\n",
5303 patched_cb_handle
>>= PAGE_SHIFT
;
5304 parser
->patched_cb
= hl_cb_get(hdev
, &hdev
->kernel_cb_mgr
,
5305 (u32
) patched_cb_handle
);
5306 /* hl_cb_get should never fail here so use kernel WARN */
5307 WARN(!parser
->patched_cb
, "DMA CB handle invalid 0x%x\n",
5308 (u32
) patched_cb_handle
);
5309 if (!parser
->patched_cb
) {
5315 * The check that parser->user_cb_size <= parser->user_cb->size was done
5316 * in validate_queue_index().
5318 memcpy(parser
->patched_cb
->kernel_address
,
5319 parser
->user_cb
->kernel_address
,
5320 parser
->user_cb_size
);
5322 patched_cb_size
= parser
->patched_cb_size
;
5324 /* Validate patched CB instead of user CB */
5325 user_cb
= parser
->user_cb
;
5326 parser
->user_cb
= parser
->patched_cb
;
5327 rc
= gaudi_validate_cb(hdev
, parser
, true);
5328 parser
->user_cb
= user_cb
;
5331 hl_cb_put(parser
->patched_cb
);
5335 if (patched_cb_size
!= parser
->patched_cb_size
) {
5336 dev_err(hdev
->dev
, "user CB size mismatch\n");
5337 hl_cb_put(parser
->patched_cb
);
5344 * Always call cb destroy here because we still have 1 reference
5345 * to it by calling cb_get earlier. After the job will be completed,
5346 * cb_put will release it, but here we want to remove it from the
5349 hl_cb_destroy(hdev
, &hdev
->kernel_cb_mgr
,
5350 patched_cb_handle
<< PAGE_SHIFT
);
5355 static int gaudi_parse_cb_no_mmu(struct hl_device
*hdev
,
5356 struct hl_cs_parser
*parser
)
5358 u64 patched_cb_handle
;
5361 rc
= gaudi_validate_cb(hdev
, parser
, false);
5366 rc
= hl_cb_create(hdev
, &hdev
->kernel_cb_mgr
, hdev
->kernel_ctx
,
5367 parser
->patched_cb_size
, false, false,
5368 &patched_cb_handle
);
5371 "Failed to allocate patched CB for DMA CS %d\n", rc
);
5375 patched_cb_handle
>>= PAGE_SHIFT
;
5376 parser
->patched_cb
= hl_cb_get(hdev
, &hdev
->kernel_cb_mgr
,
5377 (u32
) patched_cb_handle
);
5378 /* hl_cb_get should never fail here so use kernel WARN */
5379 WARN(!parser
->patched_cb
, "DMA CB handle invalid 0x%x\n",
5380 (u32
) patched_cb_handle
);
5381 if (!parser
->patched_cb
) {
5386 rc
= gaudi_patch_cb(hdev
, parser
);
5389 hl_cb_put(parser
->patched_cb
);
5393 * Always call cb destroy here because we still have 1 reference
5394 * to it by calling cb_get earlier. After the job will be completed,
5395 * cb_put will release it, but here we want to remove it from the
5398 hl_cb_destroy(hdev
, &hdev
->kernel_cb_mgr
,
5399 patched_cb_handle
<< PAGE_SHIFT
);
5403 hl_userptr_delete_list(hdev
, parser
->job_userptr_list
);
5407 static int gaudi_parse_cb_no_ext_queue(struct hl_device
*hdev
,
5408 struct hl_cs_parser
*parser
)
5410 struct asic_fixed_properties
*asic_prop
= &hdev
->asic_prop
;
5411 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
5412 u32 nic_mask_q_id
= 1 << (HW_CAP_NIC_SHIFT
+
5413 ((parser
->hw_queue_id
- GAUDI_QUEUE_ID_NIC_0_0
) >> 2));
5415 if ((parser
->hw_queue_id
>= GAUDI_QUEUE_ID_NIC_0_0
) &&
5416 (parser
->hw_queue_id
<= GAUDI_QUEUE_ID_NIC_9_3
) &&
5417 (!(gaudi
->hw_cap_initialized
& nic_mask_q_id
))) {
5418 dev_err(hdev
->dev
, "h/w queue %d is disabled\n",
5419 parser
->hw_queue_id
);
5423 /* For internal queue jobs just check if CB address is valid */
5424 if (hl_mem_area_inside_range((u64
) (uintptr_t) parser
->user_cb
,
5425 parser
->user_cb_size
,
5426 asic_prop
->sram_user_base_address
,
5427 asic_prop
->sram_end_address
))
5430 if (hl_mem_area_inside_range((u64
) (uintptr_t) parser
->user_cb
,
5431 parser
->user_cb_size
,
5432 asic_prop
->dram_user_base_address
,
5433 asic_prop
->dram_end_address
))
5436 /* PMMU and HPMMU addresses are equal, check only one of them */
5437 if (hl_mem_area_inside_range((u64
) (uintptr_t) parser
->user_cb
,
5438 parser
->user_cb_size
,
5439 asic_prop
->pmmu
.start_addr
,
5440 asic_prop
->pmmu
.end_addr
))
5444 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5445 parser
->user_cb
, parser
->user_cb_size
);
5450 static int gaudi_cs_parser(struct hl_device
*hdev
, struct hl_cs_parser
*parser
)
5452 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
5454 if (parser
->queue_type
== QUEUE_TYPE_INT
)
5455 return gaudi_parse_cb_no_ext_queue(hdev
, parser
);
5457 if (gaudi
->hw_cap_initialized
& HW_CAP_MMU
)
5458 return gaudi_parse_cb_mmu(hdev
, parser
);
5460 return gaudi_parse_cb_no_mmu(hdev
, parser
);
5463 static void gaudi_add_end_of_cb_packets(struct hl_device
*hdev
,
5464 void *kernel_address
, u32 len
,
5465 u64 cq_addr
, u32 cq_val
, u32 msi_vec
,
5468 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
5469 struct packet_msg_prot
*cq_pkt
;
5472 cq_pkt
= kernel_address
+ len
- (sizeof(struct packet_msg_prot
) * 2);
5474 tmp
= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK
, PACKET_MSG_PROT
);
5475 tmp
|= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK
, 1);
5478 tmp
|= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK
, 1);
5480 cq_pkt
->ctl
= cpu_to_le32(tmp
);
5481 cq_pkt
->value
= cpu_to_le32(cq_val
);
5482 cq_pkt
->addr
= cpu_to_le64(cq_addr
);
5486 tmp
= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK
, PACKET_MSG_PROT
);
5487 tmp
|= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK
, 1);
5488 cq_pkt
->ctl
= cpu_to_le32(tmp
);
5489 cq_pkt
->value
= cpu_to_le32(1);
5491 if (!gaudi
->multi_msi_mode
)
5494 cq_pkt
->addr
= cpu_to_le64(CFG_BASE
+ mmPCIE_MSI_INTR_0
+ msi_vec
* 4);
5497 static void gaudi_update_eq_ci(struct hl_device
*hdev
, u32 val
)
5499 WREG32(mmCPU_IF_EQ_RD_OFFS
, val
);
5502 static int gaudi_memset_device_memory(struct hl_device
*hdev
, u64 addr
,
5505 struct packet_lin_dma
*lin_dma_pkt
;
5506 struct hl_cs_job
*job
;
5507 u32 cb_size
, ctl
, err_cause
;
5511 cb
= hl_cb_kernel_create(hdev
, PAGE_SIZE
, false);
5515 lin_dma_pkt
= cb
->kernel_address
;
5516 memset(lin_dma_pkt
, 0, sizeof(*lin_dma_pkt
));
5517 cb_size
= sizeof(*lin_dma_pkt
);
5519 ctl
= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK
, PACKET_LIN_DMA
);
5520 ctl
|= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK
, 1);
5521 ctl
|= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK
, 1);
5522 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK
, 1);
5523 ctl
|= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK
, 1);
5525 lin_dma_pkt
->ctl
= cpu_to_le32(ctl
);
5526 lin_dma_pkt
->src_addr
= cpu_to_le64(val
);
5527 lin_dma_pkt
->dst_addr
|= cpu_to_le64(addr
);
5528 lin_dma_pkt
->tsize
= cpu_to_le32(size
);
5530 job
= hl_cs_allocate_job(hdev
, QUEUE_TYPE_EXT
, true);
5532 dev_err(hdev
->dev
, "Failed to allocate a new job\n");
5537 /* Verify DMA is OK */
5538 err_cause
= RREG32(mmDMA0_CORE_ERR_CAUSE
);
5539 if (err_cause
&& !hdev
->init_done
) {
5541 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5543 WREG32(mmDMA0_CORE_ERR_CAUSE
, err_cause
);
5548 atomic_inc(&job
->user_cb
->cs_cnt
);
5549 job
->user_cb_size
= cb_size
;
5550 job
->hw_queue_id
= GAUDI_QUEUE_ID_DMA_0_0
;
5551 job
->patched_cb
= job
->user_cb
;
5552 job
->job_cb_size
= job
->user_cb_size
+ sizeof(struct packet_msg_prot
);
5554 hl_debugfs_add_job(hdev
, job
);
5556 rc
= gaudi_send_job_on_qman0(hdev
, job
);
5557 hl_debugfs_remove_job(hdev
, job
);
5559 atomic_dec(&cb
->cs_cnt
);
5561 /* Verify DMA is OK */
5562 err_cause
= RREG32(mmDMA0_CORE_ERR_CAUSE
);
5564 dev_err(hdev
->dev
, "DMA Failed, cause 0x%x\n", err_cause
);
5566 if (!hdev
->init_done
) {
5568 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5570 WREG32(mmDMA0_CORE_ERR_CAUSE
, err_cause
);
5576 hl_cb_destroy(hdev
, &hdev
->kernel_cb_mgr
, cb
->id
<< PAGE_SHIFT
);
5581 static void gaudi_restore_sm_registers(struct hl_device
*hdev
)
5585 for (i
= 0 ; i
< NUM_OF_SOB_IN_BLOCK
<< 2 ; i
+= 4) {
5586 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
+ i
, 0);
5587 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0
+ i
, 0);
5588 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0
+ i
, 0);
5591 for (i
= 0 ; i
< NUM_OF_MONITORS_IN_BLOCK
<< 2 ; i
+= 4) {
5592 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0
+ i
, 0);
5593 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0
+ i
, 0);
5594 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0
+ i
, 0);
5597 i
= GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT
* 4;
5599 for (; i
< NUM_OF_SOB_IN_BLOCK
<< 2 ; i
+= 4)
5600 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0
+ i
, 0);
5602 i
= GAUDI_FIRST_AVAILABLE_W_S_MONITOR
* 4;
5604 for (; i
< NUM_OF_MONITORS_IN_BLOCK
<< 2 ; i
+= 4)
5605 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0
+ i
, 0);
5608 static void gaudi_restore_dma_registers(struct hl_device
*hdev
)
5610 u32 sob_delta
= mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1
-
5611 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
;
5614 for (i
= 0 ; i
< DMA_NUMBER_OF_CHANNELS
; i
++) {
5615 u64 sob_addr
= CFG_BASE
+
5616 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
+
5618 u32 dma_offset
= i
* DMA_CORE_OFFSET
;
5620 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO
+ dma_offset
,
5621 lower_32_bits(sob_addr
));
5622 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI
+ dma_offset
,
5623 upper_32_bits(sob_addr
));
5624 WREG32(mmDMA0_CORE_WR_COMP_WDATA
+ dma_offset
, 0x80000001);
5626 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5627 * modified by the user for SRAM reduction
5630 WREG32(mmDMA0_CORE_WR_AWUSER_31_11
+ dma_offset
,
5635 static void gaudi_restore_qm_registers(struct hl_device
*hdev
)
5640 for (i
= 0 ; i
< DMA_NUMBER_OF_CHANNELS
; i
++) {
5641 qman_offset
= i
* DMA_QMAN_OFFSET
;
5642 WREG32(mmDMA0_QM_ARB_CFG_0
+ qman_offset
, 0);
5645 for (i
= 0 ; i
< MME_NUMBER_OF_MASTER_ENGINES
; i
++) {
5646 qman_offset
= i
* (mmMME2_QM_BASE
- mmMME0_QM_BASE
);
5647 WREG32(mmMME0_QM_ARB_CFG_0
+ qman_offset
, 0);
5650 for (i
= 0 ; i
< TPC_NUMBER_OF_ENGINES
; i
++) {
5651 qman_offset
= i
* TPC_QMAN_OFFSET
;
5652 WREG32(mmTPC0_QM_ARB_CFG_0
+ qman_offset
, 0);
5655 for (i
= 0 ; i
< NIC_NUMBER_OF_ENGINES
; i
++) {
5656 qman_offset
= (i
>> 1) * NIC_MACRO_QMAN_OFFSET
+
5657 (i
& 0x1) * NIC_ENGINE_QMAN_OFFSET
;
5658 WREG32(mmNIC0_QM0_ARB_CFG_0
+ qman_offset
, 0);
5662 static void gaudi_restore_user_registers(struct hl_device
*hdev
)
5664 gaudi_restore_sm_registers(hdev
);
5665 gaudi_restore_dma_registers(hdev
);
5666 gaudi_restore_qm_registers(hdev
);
5669 static int gaudi_context_switch(struct hl_device
*hdev
, u32 asid
)
5671 gaudi_restore_user_registers(hdev
);
5676 static int gaudi_mmu_clear_pgt_range(struct hl_device
*hdev
)
5678 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
5679 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
5680 u64 addr
= prop
->mmu_pgt_addr
;
5681 u32 size
= prop
->mmu_pgt_size
+ MMU_CACHE_MNG_SIZE
;
5683 if (!(gaudi
->hw_cap_initialized
& HW_CAP_MMU
))
5686 return gaudi_memset_device_memory(hdev
, addr
, size
, 0);
5689 static void gaudi_restore_phase_topology(struct hl_device
*hdev
)
5694 static int gaudi_debugfs_read32(struct hl_device
*hdev
, u64 addr
, u32
*val
)
5696 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
5697 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
5701 if ((addr
>= CFG_BASE
) && (addr
< CFG_BASE
+ CFG_SIZE
)) {
5703 if ((gaudi
->hw_cap_initialized
& HW_CAP_CLK_GATE
) &&
5704 (hdev
->clock_gating_mask
&
5705 GAUDI_CLK_GATE_DEBUGFS_MASK
)) {
5707 dev_err_ratelimited(hdev
->dev
,
5708 "Can't read register - clock gating is enabled!\n");
5711 *val
= RREG32(addr
- CFG_BASE
);
5714 } else if ((addr
>= SRAM_BASE_ADDR
) &&
5715 (addr
< SRAM_BASE_ADDR
+ SRAM_BAR_SIZE
)) {
5716 *val
= readl(hdev
->pcie_bar
[SRAM_BAR_ID
] +
5717 (addr
- SRAM_BASE_ADDR
));
5718 } else if (addr
< DRAM_PHYS_BASE
+ hdev
->asic_prop
.dram_size
) {
5719 u64 bar_base_addr
= DRAM_PHYS_BASE
+
5720 (addr
& ~(prop
->dram_pci_bar_size
- 0x1ull
));
5722 hbm_bar_addr
= gaudi_set_hbm_bar_base(hdev
, bar_base_addr
);
5723 if (hbm_bar_addr
!= U64_MAX
) {
5724 *val
= readl(hdev
->pcie_bar
[HBM_BAR_ID
] +
5725 (addr
- bar_base_addr
));
5727 hbm_bar_addr
= gaudi_set_hbm_bar_base(hdev
,
5730 if (hbm_bar_addr
== U64_MAX
)
5732 } else if (addr
>= HOST_PHYS_BASE
&& !iommu_present(&pci_bus_type
)) {
5733 *val
= *(u32
*) phys_to_virt(addr
- HOST_PHYS_BASE
);
5741 static int gaudi_debugfs_write32(struct hl_device
*hdev
, u64 addr
, u32 val
)
5743 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
5744 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
5748 if ((addr
>= CFG_BASE
) && (addr
< CFG_BASE
+ CFG_SIZE
)) {
5750 if ((gaudi
->hw_cap_initialized
& HW_CAP_CLK_GATE
) &&
5751 (hdev
->clock_gating_mask
&
5752 GAUDI_CLK_GATE_DEBUGFS_MASK
)) {
5754 dev_err_ratelimited(hdev
->dev
,
5755 "Can't write register - clock gating is enabled!\n");
5758 WREG32(addr
- CFG_BASE
, val
);
5761 } else if ((addr
>= SRAM_BASE_ADDR
) &&
5762 (addr
< SRAM_BASE_ADDR
+ SRAM_BAR_SIZE
)) {
5763 writel(val
, hdev
->pcie_bar
[SRAM_BAR_ID
] +
5764 (addr
- SRAM_BASE_ADDR
));
5765 } else if (addr
< DRAM_PHYS_BASE
+ hdev
->asic_prop
.dram_size
) {
5766 u64 bar_base_addr
= DRAM_PHYS_BASE
+
5767 (addr
& ~(prop
->dram_pci_bar_size
- 0x1ull
));
5769 hbm_bar_addr
= gaudi_set_hbm_bar_base(hdev
, bar_base_addr
);
5770 if (hbm_bar_addr
!= U64_MAX
) {
5771 writel(val
, hdev
->pcie_bar
[HBM_BAR_ID
] +
5772 (addr
- bar_base_addr
));
5774 hbm_bar_addr
= gaudi_set_hbm_bar_base(hdev
,
5777 if (hbm_bar_addr
== U64_MAX
)
5779 } else if (addr
>= HOST_PHYS_BASE
&& !iommu_present(&pci_bus_type
)) {
5780 *(u32
*) phys_to_virt(addr
- HOST_PHYS_BASE
) = val
;
5788 static int gaudi_debugfs_read64(struct hl_device
*hdev
, u64 addr
, u64
*val
)
5790 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
5791 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
5795 if ((addr
>= CFG_BASE
) && (addr
<= CFG_BASE
+ CFG_SIZE
- sizeof(u64
))) {
5797 if ((gaudi
->hw_cap_initialized
& HW_CAP_CLK_GATE
) &&
5798 (hdev
->clock_gating_mask
&
5799 GAUDI_CLK_GATE_DEBUGFS_MASK
)) {
5801 dev_err_ratelimited(hdev
->dev
,
5802 "Can't read register - clock gating is enabled!\n");
5805 u32 val_l
= RREG32(addr
- CFG_BASE
);
5806 u32 val_h
= RREG32(addr
+ sizeof(u32
) - CFG_BASE
);
5808 *val
= (((u64
) val_h
) << 32) | val_l
;
5811 } else if ((addr
>= SRAM_BASE_ADDR
) &&
5812 (addr
<= SRAM_BASE_ADDR
+ SRAM_BAR_SIZE
- sizeof(u64
))) {
5813 *val
= readq(hdev
->pcie_bar
[SRAM_BAR_ID
] +
5814 (addr
- SRAM_BASE_ADDR
));
5816 DRAM_PHYS_BASE
+ hdev
->asic_prop
.dram_size
- sizeof(u64
)) {
5817 u64 bar_base_addr
= DRAM_PHYS_BASE
+
5818 (addr
& ~(prop
->dram_pci_bar_size
- 0x1ull
));
5820 hbm_bar_addr
= gaudi_set_hbm_bar_base(hdev
, bar_base_addr
);
5821 if (hbm_bar_addr
!= U64_MAX
) {
5822 *val
= readq(hdev
->pcie_bar
[HBM_BAR_ID
] +
5823 (addr
- bar_base_addr
));
5825 hbm_bar_addr
= gaudi_set_hbm_bar_base(hdev
,
5828 if (hbm_bar_addr
== U64_MAX
)
5830 } else if (addr
>= HOST_PHYS_BASE
&& !iommu_present(&pci_bus_type
)) {
5831 *val
= *(u64
*) phys_to_virt(addr
- HOST_PHYS_BASE
);
5839 static int gaudi_debugfs_write64(struct hl_device
*hdev
, u64 addr
, u64 val
)
5841 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
5842 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
5846 if ((addr
>= CFG_BASE
) && (addr
<= CFG_BASE
+ CFG_SIZE
- sizeof(u64
))) {
5848 if ((gaudi
->hw_cap_initialized
& HW_CAP_CLK_GATE
) &&
5849 (hdev
->clock_gating_mask
&
5850 GAUDI_CLK_GATE_DEBUGFS_MASK
)) {
5852 dev_err_ratelimited(hdev
->dev
,
5853 "Can't write register - clock gating is enabled!\n");
5856 WREG32(addr
- CFG_BASE
, lower_32_bits(val
));
5857 WREG32(addr
+ sizeof(u32
) - CFG_BASE
,
5858 upper_32_bits(val
));
5861 } else if ((addr
>= SRAM_BASE_ADDR
) &&
5862 (addr
<= SRAM_BASE_ADDR
+ SRAM_BAR_SIZE
- sizeof(u64
))) {
5863 writeq(val
, hdev
->pcie_bar
[SRAM_BAR_ID
] +
5864 (addr
- SRAM_BASE_ADDR
));
5866 DRAM_PHYS_BASE
+ hdev
->asic_prop
.dram_size
- sizeof(u64
)) {
5867 u64 bar_base_addr
= DRAM_PHYS_BASE
+
5868 (addr
& ~(prop
->dram_pci_bar_size
- 0x1ull
));
5870 hbm_bar_addr
= gaudi_set_hbm_bar_base(hdev
, bar_base_addr
);
5871 if (hbm_bar_addr
!= U64_MAX
) {
5872 writeq(val
, hdev
->pcie_bar
[HBM_BAR_ID
] +
5873 (addr
- bar_base_addr
));
5875 hbm_bar_addr
= gaudi_set_hbm_bar_base(hdev
,
5878 if (hbm_bar_addr
== U64_MAX
)
5880 } else if (addr
>= HOST_PHYS_BASE
&& !iommu_present(&pci_bus_type
)) {
5881 *(u64
*) phys_to_virt(addr
- HOST_PHYS_BASE
) = val
;
5889 static u64
gaudi_read_pte(struct hl_device
*hdev
, u64 addr
)
5891 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
5893 if (hdev
->hard_reset_pending
)
5896 return readq(hdev
->pcie_bar
[HBM_BAR_ID
] +
5897 (addr
- gaudi
->hbm_bar_cur_addr
));
5900 static void gaudi_write_pte(struct hl_device
*hdev
, u64 addr
, u64 val
)
5902 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
5904 if (hdev
->hard_reset_pending
)
5907 writeq(val
, hdev
->pcie_bar
[HBM_BAR_ID
] +
5908 (addr
- gaudi
->hbm_bar_cur_addr
));
5911 void gaudi_mmu_prepare_reg(struct hl_device
*hdev
, u64 reg
, u32 asid
)
5913 /* mask to zero the MMBP and ASID bits */
5914 WREG32_AND(reg
, ~0x7FF);
5915 WREG32_OR(reg
, asid
);
5918 static void gaudi_mmu_prepare(struct hl_device
*hdev
, u32 asid
)
5920 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
5922 if (!(gaudi
->hw_cap_initialized
& HW_CAP_MMU
))
5925 if (asid
& ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK
) {
5926 WARN(1, "asid %u is too big\n", asid
);
5930 mutex_lock(&gaudi
->clk_gate_mutex
);
5932 hdev
->asic_funcs
->disable_clock_gating(hdev
);
5934 gaudi_mmu_prepare_reg(hdev
, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
5935 gaudi_mmu_prepare_reg(hdev
, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
5936 gaudi_mmu_prepare_reg(hdev
, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
5937 gaudi_mmu_prepare_reg(hdev
, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
5938 gaudi_mmu_prepare_reg(hdev
, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
5940 gaudi_mmu_prepare_reg(hdev
, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
5941 gaudi_mmu_prepare_reg(hdev
, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
5942 gaudi_mmu_prepare_reg(hdev
, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
5943 gaudi_mmu_prepare_reg(hdev
, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
5944 gaudi_mmu_prepare_reg(hdev
, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
5946 gaudi_mmu_prepare_reg(hdev
, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
5947 gaudi_mmu_prepare_reg(hdev
, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
5948 gaudi_mmu_prepare_reg(hdev
, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
5949 gaudi_mmu_prepare_reg(hdev
, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
5950 gaudi_mmu_prepare_reg(hdev
, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
5952 gaudi_mmu_prepare_reg(hdev
, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
5953 gaudi_mmu_prepare_reg(hdev
, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
5954 gaudi_mmu_prepare_reg(hdev
, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
5955 gaudi_mmu_prepare_reg(hdev
, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
5956 gaudi_mmu_prepare_reg(hdev
, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
5958 gaudi_mmu_prepare_reg(hdev
, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
5959 gaudi_mmu_prepare_reg(hdev
, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
5960 gaudi_mmu_prepare_reg(hdev
, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
5961 gaudi_mmu_prepare_reg(hdev
, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
5962 gaudi_mmu_prepare_reg(hdev
, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
5964 gaudi_mmu_prepare_reg(hdev
, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
5965 gaudi_mmu_prepare_reg(hdev
, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
5966 gaudi_mmu_prepare_reg(hdev
, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
5967 gaudi_mmu_prepare_reg(hdev
, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
5968 gaudi_mmu_prepare_reg(hdev
, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
5970 gaudi_mmu_prepare_reg(hdev
, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
5971 gaudi_mmu_prepare_reg(hdev
, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
5972 gaudi_mmu_prepare_reg(hdev
, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
5973 gaudi_mmu_prepare_reg(hdev
, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
5974 gaudi_mmu_prepare_reg(hdev
, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
5976 gaudi_mmu_prepare_reg(hdev
, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
5977 gaudi_mmu_prepare_reg(hdev
, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
5978 gaudi_mmu_prepare_reg(hdev
, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
5979 gaudi_mmu_prepare_reg(hdev
, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
5980 gaudi_mmu_prepare_reg(hdev
, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
5982 gaudi_mmu_prepare_reg(hdev
, mmDMA0_CORE_NON_SECURE_PROPS
, asid
);
5983 gaudi_mmu_prepare_reg(hdev
, mmDMA1_CORE_NON_SECURE_PROPS
, asid
);
5984 gaudi_mmu_prepare_reg(hdev
, mmDMA2_CORE_NON_SECURE_PROPS
, asid
);
5985 gaudi_mmu_prepare_reg(hdev
, mmDMA3_CORE_NON_SECURE_PROPS
, asid
);
5986 gaudi_mmu_prepare_reg(hdev
, mmDMA4_CORE_NON_SECURE_PROPS
, asid
);
5987 gaudi_mmu_prepare_reg(hdev
, mmDMA5_CORE_NON_SECURE_PROPS
, asid
);
5988 gaudi_mmu_prepare_reg(hdev
, mmDMA6_CORE_NON_SECURE_PROPS
, asid
);
5989 gaudi_mmu_prepare_reg(hdev
, mmDMA7_CORE_NON_SECURE_PROPS
, asid
);
5991 gaudi_mmu_prepare_reg(hdev
, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
5992 gaudi_mmu_prepare_reg(hdev
, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
5993 gaudi_mmu_prepare_reg(hdev
, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
5994 gaudi_mmu_prepare_reg(hdev
, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
5995 gaudi_mmu_prepare_reg(hdev
, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
5996 gaudi_mmu_prepare_reg(hdev
, mmTPC0_CFG_ARUSER_LO
, asid
);
5997 gaudi_mmu_prepare_reg(hdev
, mmTPC0_CFG_AWUSER_LO
, asid
);
5999 gaudi_mmu_prepare_reg(hdev
, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6000 gaudi_mmu_prepare_reg(hdev
, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6001 gaudi_mmu_prepare_reg(hdev
, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6002 gaudi_mmu_prepare_reg(hdev
, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6003 gaudi_mmu_prepare_reg(hdev
, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6004 gaudi_mmu_prepare_reg(hdev
, mmTPC1_CFG_ARUSER_LO
, asid
);
6005 gaudi_mmu_prepare_reg(hdev
, mmTPC1_CFG_AWUSER_LO
, asid
);
6007 gaudi_mmu_prepare_reg(hdev
, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6008 gaudi_mmu_prepare_reg(hdev
, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6009 gaudi_mmu_prepare_reg(hdev
, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6010 gaudi_mmu_prepare_reg(hdev
, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6011 gaudi_mmu_prepare_reg(hdev
, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6012 gaudi_mmu_prepare_reg(hdev
, mmTPC2_CFG_ARUSER_LO
, asid
);
6013 gaudi_mmu_prepare_reg(hdev
, mmTPC2_CFG_AWUSER_LO
, asid
);
6015 gaudi_mmu_prepare_reg(hdev
, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6016 gaudi_mmu_prepare_reg(hdev
, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6017 gaudi_mmu_prepare_reg(hdev
, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6018 gaudi_mmu_prepare_reg(hdev
, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6019 gaudi_mmu_prepare_reg(hdev
, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6020 gaudi_mmu_prepare_reg(hdev
, mmTPC3_CFG_ARUSER_LO
, asid
);
6021 gaudi_mmu_prepare_reg(hdev
, mmTPC3_CFG_AWUSER_LO
, asid
);
6023 gaudi_mmu_prepare_reg(hdev
, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6024 gaudi_mmu_prepare_reg(hdev
, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6025 gaudi_mmu_prepare_reg(hdev
, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6026 gaudi_mmu_prepare_reg(hdev
, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6027 gaudi_mmu_prepare_reg(hdev
, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6028 gaudi_mmu_prepare_reg(hdev
, mmTPC4_CFG_ARUSER_LO
, asid
);
6029 gaudi_mmu_prepare_reg(hdev
, mmTPC4_CFG_AWUSER_LO
, asid
);
6031 gaudi_mmu_prepare_reg(hdev
, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6032 gaudi_mmu_prepare_reg(hdev
, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6033 gaudi_mmu_prepare_reg(hdev
, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6034 gaudi_mmu_prepare_reg(hdev
, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6035 gaudi_mmu_prepare_reg(hdev
, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6036 gaudi_mmu_prepare_reg(hdev
, mmTPC5_CFG_ARUSER_LO
, asid
);
6037 gaudi_mmu_prepare_reg(hdev
, mmTPC5_CFG_AWUSER_LO
, asid
);
6039 gaudi_mmu_prepare_reg(hdev
, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6040 gaudi_mmu_prepare_reg(hdev
, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6041 gaudi_mmu_prepare_reg(hdev
, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6042 gaudi_mmu_prepare_reg(hdev
, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6043 gaudi_mmu_prepare_reg(hdev
, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6044 gaudi_mmu_prepare_reg(hdev
, mmTPC6_CFG_ARUSER_LO
, asid
);
6045 gaudi_mmu_prepare_reg(hdev
, mmTPC6_CFG_AWUSER_LO
, asid
);
6047 gaudi_mmu_prepare_reg(hdev
, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6048 gaudi_mmu_prepare_reg(hdev
, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6049 gaudi_mmu_prepare_reg(hdev
, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6050 gaudi_mmu_prepare_reg(hdev
, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6051 gaudi_mmu_prepare_reg(hdev
, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6052 gaudi_mmu_prepare_reg(hdev
, mmTPC7_CFG_ARUSER_LO
, asid
);
6053 gaudi_mmu_prepare_reg(hdev
, mmTPC7_CFG_AWUSER_LO
, asid
);
6055 gaudi_mmu_prepare_reg(hdev
, mmMME0_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6056 gaudi_mmu_prepare_reg(hdev
, mmMME0_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6057 gaudi_mmu_prepare_reg(hdev
, mmMME0_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6058 gaudi_mmu_prepare_reg(hdev
, mmMME0_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6059 gaudi_mmu_prepare_reg(hdev
, mmMME0_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6060 gaudi_mmu_prepare_reg(hdev
, mmMME2_QM_GLBL_NON_SECURE_PROPS_0
, asid
);
6061 gaudi_mmu_prepare_reg(hdev
, mmMME2_QM_GLBL_NON_SECURE_PROPS_1
, asid
);
6062 gaudi_mmu_prepare_reg(hdev
, mmMME2_QM_GLBL_NON_SECURE_PROPS_2
, asid
);
6063 gaudi_mmu_prepare_reg(hdev
, mmMME2_QM_GLBL_NON_SECURE_PROPS_3
, asid
);
6064 gaudi_mmu_prepare_reg(hdev
, mmMME2_QM_GLBL_NON_SECURE_PROPS_4
, asid
);
6066 gaudi_mmu_prepare_reg(hdev
, mmMME0_SBAB_ARUSER0
, asid
);
6067 gaudi_mmu_prepare_reg(hdev
, mmMME0_SBAB_ARUSER1
, asid
);
6068 gaudi_mmu_prepare_reg(hdev
, mmMME1_SBAB_ARUSER0
, asid
);
6069 gaudi_mmu_prepare_reg(hdev
, mmMME1_SBAB_ARUSER1
, asid
);
6070 gaudi_mmu_prepare_reg(hdev
, mmMME2_SBAB_ARUSER0
, asid
);
6071 gaudi_mmu_prepare_reg(hdev
, mmMME2_SBAB_ARUSER1
, asid
);
6072 gaudi_mmu_prepare_reg(hdev
, mmMME3_SBAB_ARUSER0
, asid
);
6073 gaudi_mmu_prepare_reg(hdev
, mmMME3_SBAB_ARUSER1
, asid
);
6074 gaudi_mmu_prepare_reg(hdev
, mmMME0_ACC_WBC
, asid
);
6075 gaudi_mmu_prepare_reg(hdev
, mmMME1_ACC_WBC
, asid
);
6076 gaudi_mmu_prepare_reg(hdev
, mmMME2_ACC_WBC
, asid
);
6077 gaudi_mmu_prepare_reg(hdev
, mmMME3_ACC_WBC
, asid
);
6079 if (hdev
->nic_ports_mask
& GAUDI_NIC_MASK_NIC0
) {
6080 gaudi_mmu_prepare_reg(hdev
, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0
,
6082 gaudi_mmu_prepare_reg(hdev
, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1
,
6084 gaudi_mmu_prepare_reg(hdev
, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2
,
6086 gaudi_mmu_prepare_reg(hdev
, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3
,
6088 gaudi_mmu_prepare_reg(hdev
, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4
,
6092 if (hdev
->nic_ports_mask
& GAUDI_NIC_MASK_NIC1
) {
6093 gaudi_mmu_prepare_reg(hdev
, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0
,
6095 gaudi_mmu_prepare_reg(hdev
, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1
,
6097 gaudi_mmu_prepare_reg(hdev
, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2
,
6099 gaudi_mmu_prepare_reg(hdev
, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3
,
6101 gaudi_mmu_prepare_reg(hdev
, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4
,
6105 if (hdev
->nic_ports_mask
& GAUDI_NIC_MASK_NIC2
) {
6106 gaudi_mmu_prepare_reg(hdev
, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0
,
6108 gaudi_mmu_prepare_reg(hdev
, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1
,
6110 gaudi_mmu_prepare_reg(hdev
, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2
,
6112 gaudi_mmu_prepare_reg(hdev
, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3
,
6114 gaudi_mmu_prepare_reg(hdev
, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4
,
6118 if (hdev
->nic_ports_mask
& GAUDI_NIC_MASK_NIC3
) {
6119 gaudi_mmu_prepare_reg(hdev
, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0
,
6121 gaudi_mmu_prepare_reg(hdev
, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1
,
6123 gaudi_mmu_prepare_reg(hdev
, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2
,
6125 gaudi_mmu_prepare_reg(hdev
, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3
,
6127 gaudi_mmu_prepare_reg(hdev
, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4
,
6131 if (hdev
->nic_ports_mask
& GAUDI_NIC_MASK_NIC4
) {
6132 gaudi_mmu_prepare_reg(hdev
, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0
,
6134 gaudi_mmu_prepare_reg(hdev
, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1
,
6136 gaudi_mmu_prepare_reg(hdev
, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2
,
6138 gaudi_mmu_prepare_reg(hdev
, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3
,
6140 gaudi_mmu_prepare_reg(hdev
, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4
,
6144 if (hdev
->nic_ports_mask
& GAUDI_NIC_MASK_NIC5
) {
6145 gaudi_mmu_prepare_reg(hdev
, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0
,
6147 gaudi_mmu_prepare_reg(hdev
, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1
,
6149 gaudi_mmu_prepare_reg(hdev
, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2
,
6151 gaudi_mmu_prepare_reg(hdev
, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3
,
6153 gaudi_mmu_prepare_reg(hdev
, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4
,
6157 if (hdev
->nic_ports_mask
& GAUDI_NIC_MASK_NIC6
) {
6158 gaudi_mmu_prepare_reg(hdev
, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0
,
6160 gaudi_mmu_prepare_reg(hdev
, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1
,
6162 gaudi_mmu_prepare_reg(hdev
, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2
,
6164 gaudi_mmu_prepare_reg(hdev
, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3
,
6166 gaudi_mmu_prepare_reg(hdev
, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4
,
6170 if (hdev
->nic_ports_mask
& GAUDI_NIC_MASK_NIC7
) {
6171 gaudi_mmu_prepare_reg(hdev
, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0
,
6173 gaudi_mmu_prepare_reg(hdev
, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1
,
6175 gaudi_mmu_prepare_reg(hdev
, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2
,
6177 gaudi_mmu_prepare_reg(hdev
, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3
,
6179 gaudi_mmu_prepare_reg(hdev
, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4
,
6183 if (hdev
->nic_ports_mask
& GAUDI_NIC_MASK_NIC8
) {
6184 gaudi_mmu_prepare_reg(hdev
, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0
,
6186 gaudi_mmu_prepare_reg(hdev
, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1
,
6188 gaudi_mmu_prepare_reg(hdev
, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2
,
6190 gaudi_mmu_prepare_reg(hdev
, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3
,
6192 gaudi_mmu_prepare_reg(hdev
, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4
,
6196 if (hdev
->nic_ports_mask
& GAUDI_NIC_MASK_NIC9
) {
6197 gaudi_mmu_prepare_reg(hdev
, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0
,
6199 gaudi_mmu_prepare_reg(hdev
, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1
,
6201 gaudi_mmu_prepare_reg(hdev
, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2
,
6203 gaudi_mmu_prepare_reg(hdev
, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3
,
6205 gaudi_mmu_prepare_reg(hdev
, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4
,
6209 hdev
->asic_funcs
->set_clock_gating(hdev
);
6211 mutex_unlock(&gaudi
->clk_gate_mutex
);
6214 static int gaudi_send_job_on_qman0(struct hl_device
*hdev
,
6215 struct hl_cs_job
*job
)
6217 struct packet_msg_prot
*fence_pkt
;
6219 dma_addr_t fence_dma_addr
;
6221 u32 tmp
, timeout
, dma_offset
;
6225 timeout
= GAUDI_PLDM_QMAN0_TIMEOUT_USEC
;
6227 timeout
= HL_DEVICE_TIMEOUT_USEC
;
6229 if (!hdev
->asic_funcs
->is_device_idle(hdev
, NULL
, NULL
)) {
6230 dev_err_ratelimited(hdev
->dev
,
6231 "Can't send driver job on QMAN0 because the device is not idle\n");
6235 fence_ptr
= hdev
->asic_funcs
->asic_dma_pool_zalloc(hdev
, 4, GFP_KERNEL
,
6239 "Failed to allocate fence memory for QMAN0\n");
6243 cb
= job
->patched_cb
;
6245 fence_pkt
= cb
->kernel_address
+
6246 job
->job_cb_size
- sizeof(struct packet_msg_prot
);
6248 tmp
= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK
, PACKET_MSG_PROT
);
6249 tmp
|= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK
, 1);
6250 tmp
|= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK
, 1);
6252 fence_pkt
->ctl
= cpu_to_le32(tmp
);
6253 fence_pkt
->value
= cpu_to_le32(GAUDI_QMAN0_FENCE_VAL
);
6254 fence_pkt
->addr
= cpu_to_le64(fence_dma_addr
);
6256 dma_offset
= gaudi_dma_assignment
[GAUDI_PCI_DMA_1
] * DMA_CORE_OFFSET
;
6258 WREG32_OR(mmDMA0_CORE_PROT
+ dma_offset
, BIT(DMA0_CORE_PROT_VAL_SHIFT
));
6260 rc
= hl_hw_queue_send_cb_no_cmpl(hdev
, GAUDI_QUEUE_ID_DMA_0_0
,
6261 job
->job_cb_size
, cb
->bus_address
);
6263 dev_err(hdev
->dev
, "Failed to send CB on QMAN0, %d\n", rc
);
6264 goto free_fence_ptr
;
6267 rc
= hl_poll_timeout_memory(hdev
, fence_ptr
, tmp
,
6268 (tmp
== GAUDI_QMAN0_FENCE_VAL
), 1000,
6271 hl_hw_queue_inc_ci_kernel(hdev
, GAUDI_QUEUE_ID_DMA_0_0
);
6273 if (rc
== -ETIMEDOUT
) {
6274 dev_err(hdev
->dev
, "QMAN0 Job timeout (0x%x)\n", tmp
);
6275 goto free_fence_ptr
;
6279 WREG32_AND(mmDMA0_CORE_PROT
+ dma_offset
,
6280 ~BIT(DMA0_CORE_PROT_VAL_SHIFT
));
6282 hdev
->asic_funcs
->asic_dma_pool_free(hdev
, (void *) fence_ptr
,
6287 static void gaudi_get_event_desc(u16 event_type
, char *desc
, size_t size
)
6289 if (event_type
>= GAUDI_EVENT_SIZE
)
6290 goto event_not_supported
;
6292 if (!gaudi_irq_map_table
[event_type
].valid
)
6293 goto event_not_supported
;
6295 snprintf(desc
, size
, gaudi_irq_map_table
[event_type
].name
);
6299 event_not_supported
:
6300 snprintf(desc
, size
, "N/A");
6303 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device
*hdev
,
6304 u32 x_y
, bool is_write
)
6306 u32 dma_id
[2], dma_offset
, err_cause
[2], mask
, i
;
6308 mask
= is_write
? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK
:
6309 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK
;
6312 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0
:
6313 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1
:
6317 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0
:
6318 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1
:
6322 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0
:
6323 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1
:
6327 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0
:
6328 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1
:
6333 goto unknown_initiator
;
6336 for (i
= 0 ; i
< 2 ; i
++) {
6337 dma_offset
= dma_id
[i
] * DMA_CORE_OFFSET
;
6338 err_cause
[i
] = RREG32(mmDMA0_CORE_ERR_CAUSE
+ dma_offset
);
6342 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0
:
6343 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1
:
6344 if ((err_cause
[0] & mask
) && !(err_cause
[1] & mask
))
6346 else if (!(err_cause
[0] & mask
) && (err_cause
[1] & mask
))
6349 return "DMA0 or DMA2";
6350 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0
:
6351 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1
:
6352 if ((err_cause
[0] & mask
) && !(err_cause
[1] & mask
))
6354 else if (!(err_cause
[0] & mask
) && (err_cause
[1] & mask
))
6357 return "DMA1 or DMA3";
6358 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0
:
6359 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1
:
6360 if ((err_cause
[0] & mask
) && !(err_cause
[1] & mask
))
6362 else if (!(err_cause
[0] & mask
) && (err_cause
[1] & mask
))
6365 return "DMA4 or DMA6";
6366 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0
:
6367 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1
:
6368 if ((err_cause
[0] & mask
) && !(err_cause
[1] & mask
))
6370 else if (!(err_cause
[0] & mask
) && (err_cause
[1] & mask
))
6373 return "DMA5 or DMA7";
6377 return "unknown initiator";
6380 static const char *gaudi_get_razwi_initiator_name(struct hl_device
*hdev
,
6383 u32 val
, x_y
, axi_id
;
6385 val
= is_write
? RREG32(mmMMU_UP_RAZWI_WRITE_ID
) :
6386 RREG32(mmMMU_UP_RAZWI_READ_ID
);
6387 x_y
= val
& ((RAZWI_INITIATOR_Y_MASK
<< RAZWI_INITIATOR_Y_SHIFT
) |
6388 (RAZWI_INITIATOR_X_MASK
<< RAZWI_INITIATOR_X_SHIFT
));
6389 axi_id
= val
& (RAZWI_INITIATOR_AXI_ID_MASK
<<
6390 RAZWI_INITIATOR_AXI_ID_SHIFT
);
6393 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0
:
6394 if (axi_id
== RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC
))
6396 if (axi_id
== RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC
))
6399 case RAZWI_INITIATOR_ID_X_Y_TPC1
:
6401 case RAZWI_INITIATOR_ID_X_Y_MME0_0
:
6402 case RAZWI_INITIATOR_ID_X_Y_MME0_1
:
6404 case RAZWI_INITIATOR_ID_X_Y_MME1_0
:
6405 case RAZWI_INITIATOR_ID_X_Y_MME1_1
:
6407 case RAZWI_INITIATOR_ID_X_Y_TPC2
:
6409 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC
:
6410 if (axi_id
== RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC
))
6412 if (axi_id
== RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI
))
6414 if (axi_id
== RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU
))
6416 if (axi_id
== RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC
))
6419 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0
:
6420 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1
:
6421 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0
:
6422 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1
:
6423 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0
:
6424 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1
:
6425 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0
:
6426 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1
:
6427 return gaudi_get_razwi_initiator_dma_name(hdev
, x_y
, is_write
);
6428 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2
:
6429 if (axi_id
== RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC
))
6431 if (axi_id
== RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC
))
6433 if (axi_id
== RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT
))
6436 case RAZWI_INITIATOR_ID_X_Y_TPC5
:
6438 case RAZWI_INITIATOR_ID_X_Y_MME2_0
:
6439 case RAZWI_INITIATOR_ID_X_Y_MME2_1
:
6441 case RAZWI_INITIATOR_ID_X_Y_MME3_0
:
6442 case RAZWI_INITIATOR_ID_X_Y_MME3_1
:
6444 case RAZWI_INITIATOR_ID_X_Y_TPC6
:
6446 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5
:
6447 if (axi_id
== RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC
))
6449 if (axi_id
== RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC
))
6451 if (axi_id
== RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT
))
6459 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6461 (val
>> RAZWI_INITIATOR_Y_SHIFT
) & RAZWI_INITIATOR_Y_MASK
,
6462 (val
>> RAZWI_INITIATOR_X_SHIFT
) & RAZWI_INITIATOR_X_MASK
,
6463 (val
>> RAZWI_INITIATOR_AXI_ID_SHIFT
) &
6464 RAZWI_INITIATOR_AXI_ID_MASK
);
6466 return "unknown initiator";
6469 static void gaudi_print_razwi_info(struct hl_device
*hdev
)
6471 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD
)) {
6472 dev_err_ratelimited(hdev
->dev
,
6473 "RAZWI event caused by illegal write of %s\n",
6474 gaudi_get_razwi_initiator_name(hdev
, true));
6475 WREG32(mmMMU_UP_RAZWI_WRITE_VLD
, 0);
6478 if (RREG32(mmMMU_UP_RAZWI_READ_VLD
)) {
6479 dev_err_ratelimited(hdev
->dev
,
6480 "RAZWI event caused by illegal read of %s\n",
6481 gaudi_get_razwi_initiator_name(hdev
, false));
6482 WREG32(mmMMU_UP_RAZWI_READ_VLD
, 0);
6486 static void gaudi_print_mmu_error_info(struct hl_device
*hdev
)
6488 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
6492 if (!(gaudi
->hw_cap_initialized
& HW_CAP_MMU
))
6495 val
= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE
);
6496 if (val
& MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK
) {
6497 addr
= val
& MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK
;
6499 addr
|= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA
);
6501 dev_err_ratelimited(hdev
->dev
, "MMU page fault on va 0x%llx\n",
6504 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE
, 0);
6507 val
= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE
);
6508 if (val
& MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK
) {
6509 addr
= val
& MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK
;
6511 addr
|= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA
);
6513 dev_err_ratelimited(hdev
->dev
,
6514 "MMU access error on va 0x%llx\n", addr
);
6516 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE
, 0);
6521 * +-------------------+------------------------------------------------------+
6522 * | Configuration Reg | Description |
6524 * +-------------------+------------------------------------------------------+
6525 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
6526 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
6527 * | |0xF34 memory wrappers 63:32 |
6528 * | |0xF38 memory wrappers 95:64 |
6529 * | |0xF3C memory wrappers 127:96 |
6530 * +-------------------+------------------------------------------------------+
6531 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
6532 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
6533 * | |0xF44 memory wrappers 63:32 |
6534 * | |0xF48 memory wrappers 95:64 |
6535 * | |0xF4C memory wrappers 127:96 |
6536 * +-------------------+------------------------------------------------------+
6538 static int gaudi_extract_ecc_info(struct hl_device
*hdev
,
6539 struct ecc_info_extract_params
*params
, u64
*ecc_address
,
6540 u64
*ecc_syndrom
, u8
*memory_wrapper_idx
)
6542 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
6543 u32 i
, num_mem_regs
, reg
, err_bit
;
6544 u64 err_addr
, err_word
= 0;
6547 num_mem_regs
= params
->num_memories
/ 32 +
6548 ((params
->num_memories
% 32) ? 1 : 0);
6550 if (params
->block_address
>= CFG_BASE
)
6551 params
->block_address
-= CFG_BASE
;
6554 err_addr
= params
->block_address
+ GAUDI_ECC_DERR0_OFFSET
;
6556 err_addr
= params
->block_address
+ GAUDI_ECC_SERR0_OFFSET
;
6558 if (params
->disable_clock_gating
) {
6559 mutex_lock(&gaudi
->clk_gate_mutex
);
6560 hdev
->asic_funcs
->disable_clock_gating(hdev
);
6563 /* Set invalid wrapper index */
6564 *memory_wrapper_idx
= 0xFF;
6566 /* Iterate through memory wrappers, a single bit must be set */
6567 for (i
= 0 ; i
< num_mem_regs
; i
++) {
6569 err_word
= RREG32(err_addr
);
6571 err_bit
= __ffs(err_word
);
6572 *memory_wrapper_idx
= err_bit
+ (32 * i
);
6577 if (*memory_wrapper_idx
== 0xFF) {
6578 dev_err(hdev
->dev
, "ECC error information cannot be found\n");
6580 goto enable_clk_gate
;
6583 WREG32(params
->block_address
+ GAUDI_ECC_MEM_SEL_OFFSET
,
6584 *memory_wrapper_idx
);
6587 RREG32(params
->block_address
+ GAUDI_ECC_ADDRESS_OFFSET
);
6589 RREG32(params
->block_address
+ GAUDI_ECC_SYNDROME_OFFSET
);
6591 /* Clear error indication */
6592 reg
= RREG32(params
->block_address
+ GAUDI_ECC_MEM_INFO_CLR_OFFSET
);
6594 reg
|= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK
, 1);
6596 reg
|= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK
, 1);
6598 WREG32(params
->block_address
+ GAUDI_ECC_MEM_INFO_CLR_OFFSET
, reg
);
6601 if (params
->disable_clock_gating
) {
6602 hdev
->asic_funcs
->set_clock_gating(hdev
);
6604 mutex_unlock(&gaudi
->clk_gate_mutex
);
6610 static void gaudi_handle_qman_err_generic(struct hl_device
*hdev
,
6611 const char *qm_name
,
6615 u32 i
, j
, glbl_sts_val
, arb_err_val
, glbl_sts_clr_val
;
6618 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
6619 for (i
= 0 ; i
< QMAN_STREAMS
+ 1 ; i
++) {
6620 glbl_sts_clr_val
= 0;
6621 glbl_sts_val
= RREG32(glbl_sts_addr
+ 4 * i
);
6626 if (i
== QMAN_STREAMS
)
6627 snprintf(reg_desc
, ARRAY_SIZE(reg_desc
), "LowerCP");
6629 snprintf(reg_desc
, ARRAY_SIZE(reg_desc
), "stream%u", i
);
6631 for (j
= 0 ; j
< GAUDI_NUM_OF_QM_ERR_CAUSE
; j
++) {
6632 if (glbl_sts_val
& BIT(j
)) {
6633 dev_err_ratelimited(hdev
->dev
,
6634 "%s %s. err cause: %s\n",
6636 gaudi_qman_error_cause
[j
]);
6637 glbl_sts_clr_val
|= BIT(j
);
6641 /* Write 1 clear errors */
6642 WREG32(glbl_sts_addr
+ 4 * i
, glbl_sts_clr_val
);
6645 arb_err_val
= RREG32(arb_err_addr
);
6650 for (j
= 0 ; j
< GAUDI_NUM_OF_QM_ARB_ERR_CAUSE
; j
++) {
6651 if (arb_err_val
& BIT(j
)) {
6652 dev_err_ratelimited(hdev
->dev
,
6653 "%s ARB_ERR. err cause: %s\n",
6655 gaudi_qman_arb_error_cause
[j
]);
6660 static void gaudi_handle_ecc_event(struct hl_device
*hdev
, u16 event_type
,
6661 struct hl_eq_ecc_data
*ecc_data
)
6663 struct ecc_info_extract_params params
;
6664 u64 ecc_address
= 0, ecc_syndrom
= 0;
6665 u8 index
, memory_wrapper_idx
= 0;
6666 bool extract_info_from_fw
;
6669 switch (event_type
) {
6670 case GAUDI_EVENT_PCIE_CORE_SERR
... GAUDI_EVENT_PCIE_PHY_DERR
:
6671 case GAUDI_EVENT_DMA0_SERR_ECC
... GAUDI_EVENT_MMU_DERR
:
6672 extract_info_from_fw
= true;
6674 case GAUDI_EVENT_TPC0_SERR
... GAUDI_EVENT_TPC7_SERR
:
6675 index
= event_type
- GAUDI_EVENT_TPC0_SERR
;
6676 params
.block_address
= mmTPC0_CFG_BASE
+ index
* TPC_CFG_OFFSET
;
6677 params
.num_memories
= 90;
6678 params
.derr
= false;
6679 params
.disable_clock_gating
= true;
6680 extract_info_from_fw
= false;
6682 case GAUDI_EVENT_TPC0_DERR
... GAUDI_EVENT_TPC7_DERR
:
6683 index
= event_type
- GAUDI_EVENT_TPC0_DERR
;
6684 params
.block_address
=
6685 mmTPC0_CFG_BASE
+ index
* TPC_CFG_OFFSET
;
6686 params
.num_memories
= 90;
6688 params
.disable_clock_gating
= true;
6689 extract_info_from_fw
= false;
6691 case GAUDI_EVENT_MME0_ACC_SERR
:
6692 case GAUDI_EVENT_MME1_ACC_SERR
:
6693 case GAUDI_EVENT_MME2_ACC_SERR
:
6694 case GAUDI_EVENT_MME3_ACC_SERR
:
6695 index
= (event_type
- GAUDI_EVENT_MME0_ACC_SERR
) / 4;
6696 params
.block_address
= mmMME0_ACC_BASE
+ index
* MME_ACC_OFFSET
;
6697 params
.num_memories
= 128;
6698 params
.derr
= false;
6699 params
.disable_clock_gating
= true;
6700 extract_info_from_fw
= false;
6702 case GAUDI_EVENT_MME0_ACC_DERR
:
6703 case GAUDI_EVENT_MME1_ACC_DERR
:
6704 case GAUDI_EVENT_MME2_ACC_DERR
:
6705 case GAUDI_EVENT_MME3_ACC_DERR
:
6706 index
= (event_type
- GAUDI_EVENT_MME0_ACC_DERR
) / 4;
6707 params
.block_address
= mmMME0_ACC_BASE
+ index
* MME_ACC_OFFSET
;
6708 params
.num_memories
= 128;
6710 params
.disable_clock_gating
= true;
6711 extract_info_from_fw
= false;
6713 case GAUDI_EVENT_MME0_SBAB_SERR
:
6714 case GAUDI_EVENT_MME1_SBAB_SERR
:
6715 case GAUDI_EVENT_MME2_SBAB_SERR
:
6716 case GAUDI_EVENT_MME3_SBAB_SERR
:
6717 index
= (event_type
- GAUDI_EVENT_MME0_SBAB_SERR
) / 4;
6718 params
.block_address
=
6719 mmMME0_SBAB_BASE
+ index
* MME_ACC_OFFSET
;
6720 params
.num_memories
= 33;
6721 params
.derr
= false;
6722 params
.disable_clock_gating
= true;
6723 extract_info_from_fw
= false;
6725 case GAUDI_EVENT_MME0_SBAB_DERR
:
6726 case GAUDI_EVENT_MME1_SBAB_DERR
:
6727 case GAUDI_EVENT_MME2_SBAB_DERR
:
6728 case GAUDI_EVENT_MME3_SBAB_DERR
:
6729 index
= (event_type
- GAUDI_EVENT_MME0_SBAB_DERR
) / 4;
6730 params
.block_address
=
6731 mmMME0_SBAB_BASE
+ index
* MME_ACC_OFFSET
;
6732 params
.num_memories
= 33;
6734 params
.disable_clock_gating
= true;
6735 extract_info_from_fw
= false;
6741 if (extract_info_from_fw
) {
6742 ecc_address
= le64_to_cpu(ecc_data
->ecc_address
);
6743 ecc_syndrom
= le64_to_cpu(ecc_data
->ecc_syndrom
);
6744 memory_wrapper_idx
= ecc_data
->memory_wrapper_idx
;
6746 rc
= gaudi_extract_ecc_info(hdev
, ¶ms
, &ecc_address
,
6747 &ecc_syndrom
, &memory_wrapper_idx
);
6753 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
6754 ecc_address
, ecc_syndrom
, memory_wrapper_idx
);
6757 static void gaudi_handle_qman_err(struct hl_device
*hdev
, u16 event_type
)
6759 u64 glbl_sts_addr
, arb_err_addr
;
6763 switch (event_type
) {
6764 case GAUDI_EVENT_TPC0_QM
... GAUDI_EVENT_TPC7_QM
:
6765 index
= event_type
- GAUDI_EVENT_TPC0_QM
;
6767 mmTPC0_QM_GLBL_STS1_0
+ index
* TPC_QMAN_OFFSET
;
6769 mmTPC0_QM_ARB_ERR_CAUSE
+ index
* TPC_QMAN_OFFSET
;
6770 snprintf(desc
, ARRAY_SIZE(desc
), "%s%d", "TPC_QM", index
);
6772 case GAUDI_EVENT_MME0_QM
... GAUDI_EVENT_MME2_QM
:
6773 index
= event_type
- GAUDI_EVENT_MME0_QM
;
6775 mmMME0_QM_GLBL_STS1_0
+ index
* MME_QMAN_OFFSET
;
6777 mmMME0_QM_ARB_ERR_CAUSE
+ index
* MME_QMAN_OFFSET
;
6778 snprintf(desc
, ARRAY_SIZE(desc
), "%s%d", "MME_QM", index
);
6780 case GAUDI_EVENT_DMA0_QM
... GAUDI_EVENT_DMA7_QM
:
6781 index
= event_type
- GAUDI_EVENT_DMA0_QM
;
6783 mmDMA0_QM_GLBL_STS1_0
+ index
* DMA_QMAN_OFFSET
;
6785 mmDMA0_QM_ARB_ERR_CAUSE
+ index
* DMA_QMAN_OFFSET
;
6786 snprintf(desc
, ARRAY_SIZE(desc
), "%s%d", "DMA_QM", index
);
6788 case GAUDI_EVENT_NIC0_QM0
:
6789 glbl_sts_addr
= mmNIC0_QM0_GLBL_STS1_0
;
6790 arb_err_addr
= mmNIC0_QM0_ARB_ERR_CAUSE
;
6791 snprintf(desc
, ARRAY_SIZE(desc
), "NIC0_QM0");
6793 case GAUDI_EVENT_NIC0_QM1
:
6794 glbl_sts_addr
= mmNIC0_QM1_GLBL_STS1_0
;
6795 arb_err_addr
= mmNIC0_QM1_ARB_ERR_CAUSE
;
6796 snprintf(desc
, ARRAY_SIZE(desc
), "NIC0_QM1");
6798 case GAUDI_EVENT_NIC1_QM0
:
6799 glbl_sts_addr
= mmNIC1_QM0_GLBL_STS1_0
;
6800 arb_err_addr
= mmNIC1_QM0_ARB_ERR_CAUSE
;
6801 snprintf(desc
, ARRAY_SIZE(desc
), "NIC1_QM0");
6803 case GAUDI_EVENT_NIC1_QM1
:
6804 glbl_sts_addr
= mmNIC1_QM1_GLBL_STS1_0
;
6805 arb_err_addr
= mmNIC1_QM1_ARB_ERR_CAUSE
;
6806 snprintf(desc
, ARRAY_SIZE(desc
), "NIC1_QM1");
6808 case GAUDI_EVENT_NIC2_QM0
:
6809 glbl_sts_addr
= mmNIC2_QM0_GLBL_STS1_0
;
6810 arb_err_addr
= mmNIC2_QM0_ARB_ERR_CAUSE
;
6811 snprintf(desc
, ARRAY_SIZE(desc
), "NIC2_QM0");
6813 case GAUDI_EVENT_NIC2_QM1
:
6814 glbl_sts_addr
= mmNIC2_QM1_GLBL_STS1_0
;
6815 arb_err_addr
= mmNIC2_QM1_ARB_ERR_CAUSE
;
6816 snprintf(desc
, ARRAY_SIZE(desc
), "NIC2_QM1");
6818 case GAUDI_EVENT_NIC3_QM0
:
6819 glbl_sts_addr
= mmNIC3_QM0_GLBL_STS1_0
;
6820 arb_err_addr
= mmNIC3_QM0_ARB_ERR_CAUSE
;
6821 snprintf(desc
, ARRAY_SIZE(desc
), "NIC3_QM0");
6823 case GAUDI_EVENT_NIC3_QM1
:
6824 glbl_sts_addr
= mmNIC3_QM1_GLBL_STS1_0
;
6825 arb_err_addr
= mmNIC3_QM1_ARB_ERR_CAUSE
;
6826 snprintf(desc
, ARRAY_SIZE(desc
), "NIC3_QM1");
6828 case GAUDI_EVENT_NIC4_QM0
:
6829 glbl_sts_addr
= mmNIC4_QM0_GLBL_STS1_0
;
6830 arb_err_addr
= mmNIC4_QM0_ARB_ERR_CAUSE
;
6831 snprintf(desc
, ARRAY_SIZE(desc
), "NIC4_QM0");
6833 case GAUDI_EVENT_NIC4_QM1
:
6834 glbl_sts_addr
= mmNIC4_QM1_GLBL_STS1_0
;
6835 arb_err_addr
= mmNIC4_QM1_ARB_ERR_CAUSE
;
6836 snprintf(desc
, ARRAY_SIZE(desc
), "NIC4_QM1");
6842 gaudi_handle_qman_err_generic(hdev
, desc
, glbl_sts_addr
, arb_err_addr
);
6845 static void gaudi_print_irq_info(struct hl_device
*hdev
, u16 event_type
,
6850 gaudi_get_event_desc(event_type
, desc
, sizeof(desc
));
6851 dev_err_ratelimited(hdev
->dev
, "Received H/W interrupt %d [\"%s\"]\n",
6855 gaudi_print_razwi_info(hdev
);
6856 gaudi_print_mmu_error_info(hdev
);
6860 static int gaudi_soft_reset_late_init(struct hl_device
*hdev
)
6862 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
6864 /* Unmask all IRQs since some could have been received
6865 * during the soft reset
6867 return hl_fw_unmask_irq_arr(hdev
, gaudi
->events
, sizeof(gaudi
->events
));
6870 static int gaudi_hbm_read_interrupts(struct hl_device
*hdev
, int device
,
6871 struct hl_eq_hbm_ecc_data
*hbm_ecc_data
)
6873 u32 base
, val
, val2
, wr_par
, rd_par
, ca_par
, derr
, serr
, type
, ch
;
6876 if (!hdev
->asic_prop
.fw_security_disabled
) {
6877 if (!hbm_ecc_data
) {
6878 dev_err(hdev
->dev
, "No FW ECC data");
6882 wr_par
= FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK
,
6883 le32_to_cpu(hbm_ecc_data
->hbm_ecc_info
));
6884 rd_par
= FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK
,
6885 le32_to_cpu(hbm_ecc_data
->hbm_ecc_info
));
6886 ca_par
= FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK
,
6887 le32_to_cpu(hbm_ecc_data
->hbm_ecc_info
));
6888 derr
= FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK
,
6889 le32_to_cpu(hbm_ecc_data
->hbm_ecc_info
));
6890 serr
= FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK
,
6891 le32_to_cpu(hbm_ecc_data
->hbm_ecc_info
));
6892 type
= FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK
,
6893 le32_to_cpu(hbm_ecc_data
->hbm_ecc_info
));
6894 ch
= FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK
,
6895 le32_to_cpu(hbm_ecc_data
->hbm_ecc_info
));
6898 "HBM%d pc%d ECC: TYPE=%d, WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
6899 device
, ch
, type
, wr_par
, rd_par
, ca_par
, serr
, derr
);
6906 base
= GAUDI_HBM_CFG_BASE
+ device
* GAUDI_HBM_CFG_OFFSET
;
6907 for (ch
= 0 ; ch
< GAUDI_HBM_CHANNELS
; ch
++) {
6908 val
= RREG32_MASK(base
+ ch
* 0x1000 + 0x06C, 0x0000FFFF);
6909 val
= (val
& 0xFF) | ((val
>> 8) & 0xFF);
6913 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
6914 device
, ch
* 2, val
& 0x1, (val
>> 1) & 0x1,
6915 (val
>> 2) & 0x1, (val
>> 3) & 0x1,
6918 val2
= RREG32(base
+ ch
* 0x1000 + 0x060);
6920 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
6922 RREG32(base
+ ch
* 0x1000 + 0x064),
6923 (val2
& 0x200) >> 9, (val2
& 0xFC00) >> 10,
6924 (val2
& 0xFF0000) >> 16,
6925 (val2
& 0xFF000000) >> 24);
6928 val
= RREG32_MASK(base
+ ch
* 0x1000 + 0x07C, 0x0000FFFF);
6929 val
= (val
& 0xFF) | ((val
>> 8) & 0xFF);
6933 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
6934 device
, ch
* 2 + 1, val
& 0x1, (val
>> 1) & 0x1,
6935 (val
>> 2) & 0x1, (val
>> 3) & 0x1,
6938 val2
= RREG32(base
+ ch
* 0x1000 + 0x070);
6940 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
6942 RREG32(base
+ ch
* 0x1000 + 0x074),
6943 (val2
& 0x200) >> 9, (val2
& 0xFC00) >> 10,
6944 (val2
& 0xFF0000) >> 16,
6945 (val2
& 0xFF000000) >> 24);
6948 /* Clear interrupts */
6949 RMWREG32(base
+ (ch
* 0x1000) + 0x060, 0x1C8, 0x1FF);
6950 RMWREG32(base
+ (ch
* 0x1000) + 0x070, 0x1C8, 0x1FF);
6951 WREG32(base
+ (ch
* 0x1000) + 0x06C, 0x1F1F);
6952 WREG32(base
+ (ch
* 0x1000) + 0x07C, 0x1F1F);
6953 RMWREG32(base
+ (ch
* 0x1000) + 0x060, 0x0, 0xF);
6954 RMWREG32(base
+ (ch
* 0x1000) + 0x070, 0x0, 0xF);
6957 val
= RREG32(base
+ 0x8F30);
6958 val2
= RREG32(base
+ 0x8F34);
6962 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
6965 val
= RREG32(base
+ 0x8F40);
6966 val2
= RREG32(base
+ 0x8F44);
6970 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
6977 static int gaudi_hbm_event_to_dev(u16 hbm_event_type
)
6979 switch (hbm_event_type
) {
6980 case GAUDI_EVENT_HBM0_SPI_0
:
6981 case GAUDI_EVENT_HBM0_SPI_1
:
6983 case GAUDI_EVENT_HBM1_SPI_0
:
6984 case GAUDI_EVENT_HBM1_SPI_1
:
6986 case GAUDI_EVENT_HBM2_SPI_0
:
6987 case GAUDI_EVENT_HBM2_SPI_1
:
6989 case GAUDI_EVENT_HBM3_SPI_0
:
6990 case GAUDI_EVENT_HBM3_SPI_1
:
6996 /* Should never happen */
7000 static bool gaudi_tpc_read_interrupts(struct hl_device
*hdev
, u8 tpc_id
,
7001 char *interrupt_name
)
7003 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
7004 u32 tpc_offset
= tpc_id
* TPC_CFG_OFFSET
, tpc_interrupts_cause
, i
;
7005 bool soft_reset_required
= false;
7007 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7008 * gating, and thus cannot be done in CPU-CP and should be done instead
7012 mutex_lock(&gaudi
->clk_gate_mutex
);
7014 hdev
->asic_funcs
->disable_clock_gating(hdev
);
7016 tpc_interrupts_cause
= RREG32(mmTPC0_CFG_TPC_INTR_CAUSE
+ tpc_offset
) &
7017 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK
;
7019 for (i
= 0 ; i
< GAUDI_NUM_OF_TPC_INTR_CAUSE
; i
++)
7020 if (tpc_interrupts_cause
& BIT(i
)) {
7021 dev_err_ratelimited(hdev
->dev
,
7022 "TPC%d_%s interrupt cause: %s\n",
7023 tpc_id
, interrupt_name
,
7024 gaudi_tpc_interrupts_cause
[i
]);
7025 /* If this is QM error, we need to soft-reset */
7027 soft_reset_required
= true;
7030 /* Clear interrupts */
7031 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE
+ tpc_offset
, 0);
7033 hdev
->asic_funcs
->set_clock_gating(hdev
);
7035 mutex_unlock(&gaudi
->clk_gate_mutex
);
7037 return soft_reset_required
;
7040 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type
)
7042 return (tpc_dec_event_type
- GAUDI_EVENT_TPC0_DEC
) >> 1;
7045 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type
)
7047 return (tpc_dec_event_type
- GAUDI_EVENT_TPC0_KRN_ERR
) / 6;
7050 static void gaudi_print_clk_change_info(struct hl_device
*hdev
,
7053 switch (event_type
) {
7054 case GAUDI_EVENT_FIX_POWER_ENV_S
:
7055 hdev
->clk_throttling_reason
|= HL_CLK_THROTTLE_POWER
;
7056 dev_info_ratelimited(hdev
->dev
,
7057 "Clock throttling due to power consumption\n");
7060 case GAUDI_EVENT_FIX_POWER_ENV_E
:
7061 hdev
->clk_throttling_reason
&= ~HL_CLK_THROTTLE_POWER
;
7062 dev_info_ratelimited(hdev
->dev
,
7063 "Power envelop is safe, back to optimal clock\n");
7066 case GAUDI_EVENT_FIX_THERMAL_ENV_S
:
7067 hdev
->clk_throttling_reason
|= HL_CLK_THROTTLE_THERMAL
;
7068 dev_info_ratelimited(hdev
->dev
,
7069 "Clock throttling due to overheating\n");
7072 case GAUDI_EVENT_FIX_THERMAL_ENV_E
:
7073 hdev
->clk_throttling_reason
&= ~HL_CLK_THROTTLE_THERMAL
;
7074 dev_info_ratelimited(hdev
->dev
,
7075 "Thermal envelop is safe, back to optimal clock\n");
7079 dev_err(hdev
->dev
, "Received invalid clock change event %d\n",
7085 static void gaudi_handle_eqe(struct hl_device
*hdev
,
7086 struct hl_eq_entry
*eq_entry
)
7088 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
7089 u32 ctl
= le32_to_cpu(eq_entry
->hdr
.ctl
);
7090 u16 event_type
= ((ctl
& EQ_CTL_EVENT_TYPE_MASK
)
7091 >> EQ_CTL_EVENT_TYPE_SHIFT
);
7093 bool reset_required
;
7095 gaudi
->events_stat
[event_type
]++;
7096 gaudi
->events_stat_aggregate
[event_type
]++;
7098 switch (event_type
) {
7099 case GAUDI_EVENT_PCIE_CORE_DERR
:
7100 case GAUDI_EVENT_PCIE_IF_DERR
:
7101 case GAUDI_EVENT_PCIE_PHY_DERR
:
7102 case GAUDI_EVENT_TPC0_DERR
... GAUDI_EVENT_TPC7_DERR
:
7103 case GAUDI_EVENT_MME0_ACC_DERR
:
7104 case GAUDI_EVENT_MME0_SBAB_DERR
:
7105 case GAUDI_EVENT_MME1_ACC_DERR
:
7106 case GAUDI_EVENT_MME1_SBAB_DERR
:
7107 case GAUDI_EVENT_MME2_ACC_DERR
:
7108 case GAUDI_EVENT_MME2_SBAB_DERR
:
7109 case GAUDI_EVENT_MME3_ACC_DERR
:
7110 case GAUDI_EVENT_MME3_SBAB_DERR
:
7111 case GAUDI_EVENT_DMA0_DERR_ECC
... GAUDI_EVENT_DMA7_DERR_ECC
:
7113 case GAUDI_EVENT_CPU_IF_ECC_DERR
:
7114 case GAUDI_EVENT_PSOC_MEM_DERR
:
7115 case GAUDI_EVENT_PSOC_CORESIGHT_DERR
:
7116 case GAUDI_EVENT_SRAM0_DERR
... GAUDI_EVENT_SRAM28_DERR
:
7117 case GAUDI_EVENT_DMA_IF0_DERR
... GAUDI_EVENT_DMA_IF3_DERR
:
7118 case GAUDI_EVENT_HBM_0_DERR
... GAUDI_EVENT_HBM_3_DERR
:
7119 case GAUDI_EVENT_MMU_DERR
:
7120 gaudi_print_irq_info(hdev
, event_type
, true);
7121 gaudi_handle_ecc_event(hdev
, event_type
, &eq_entry
->ecc_data
);
7122 if (hdev
->hard_reset_on_fw_events
)
7123 hl_device_reset(hdev
, true, false);
7126 case GAUDI_EVENT_GIC500
:
7127 case GAUDI_EVENT_AXI_ECC
:
7128 case GAUDI_EVENT_L2_RAM_ECC
:
7129 case GAUDI_EVENT_PLL0
... GAUDI_EVENT_PLL17
:
7130 gaudi_print_irq_info(hdev
, event_type
, false);
7131 if (hdev
->hard_reset_on_fw_events
)
7132 hl_device_reset(hdev
, true, false);
7135 case GAUDI_EVENT_HBM0_SPI_0
:
7136 case GAUDI_EVENT_HBM1_SPI_0
:
7137 case GAUDI_EVENT_HBM2_SPI_0
:
7138 case GAUDI_EVENT_HBM3_SPI_0
:
7139 gaudi_print_irq_info(hdev
, event_type
, false);
7140 gaudi_hbm_read_interrupts(hdev
,
7141 gaudi_hbm_event_to_dev(event_type
),
7142 &eq_entry
->hbm_ecc_data
);
7143 if (hdev
->hard_reset_on_fw_events
)
7144 hl_device_reset(hdev
, true, false);
7147 case GAUDI_EVENT_HBM0_SPI_1
:
7148 case GAUDI_EVENT_HBM1_SPI_1
:
7149 case GAUDI_EVENT_HBM2_SPI_1
:
7150 case GAUDI_EVENT_HBM3_SPI_1
:
7151 gaudi_print_irq_info(hdev
, event_type
, false);
7152 gaudi_hbm_read_interrupts(hdev
,
7153 gaudi_hbm_event_to_dev(event_type
),
7154 &eq_entry
->hbm_ecc_data
);
7157 case GAUDI_EVENT_TPC0_DEC
:
7158 case GAUDI_EVENT_TPC1_DEC
:
7159 case GAUDI_EVENT_TPC2_DEC
:
7160 case GAUDI_EVENT_TPC3_DEC
:
7161 case GAUDI_EVENT_TPC4_DEC
:
7162 case GAUDI_EVENT_TPC5_DEC
:
7163 case GAUDI_EVENT_TPC6_DEC
:
7164 case GAUDI_EVENT_TPC7_DEC
:
7165 gaudi_print_irq_info(hdev
, event_type
, true);
7166 reset_required
= gaudi_tpc_read_interrupts(hdev
,
7167 tpc_dec_event_to_tpc_id(event_type
),
7168 "AXI_SLV_DEC_Error");
7169 if (reset_required
) {
7170 dev_err(hdev
->dev
, "hard reset required due to %s\n",
7171 gaudi_irq_map_table
[event_type
].name
);
7173 if (hdev
->hard_reset_on_fw_events
)
7174 hl_device_reset(hdev
, true, false);
7176 hl_fw_unmask_irq(hdev
, event_type
);
7180 case GAUDI_EVENT_TPC0_KRN_ERR
:
7181 case GAUDI_EVENT_TPC1_KRN_ERR
:
7182 case GAUDI_EVENT_TPC2_KRN_ERR
:
7183 case GAUDI_EVENT_TPC3_KRN_ERR
:
7184 case GAUDI_EVENT_TPC4_KRN_ERR
:
7185 case GAUDI_EVENT_TPC5_KRN_ERR
:
7186 case GAUDI_EVENT_TPC6_KRN_ERR
:
7187 case GAUDI_EVENT_TPC7_KRN_ERR
:
7188 gaudi_print_irq_info(hdev
, event_type
, true);
7189 reset_required
= gaudi_tpc_read_interrupts(hdev
,
7190 tpc_krn_event_to_tpc_id(event_type
),
7192 if (reset_required
) {
7193 dev_err(hdev
->dev
, "hard reset required due to %s\n",
7194 gaudi_irq_map_table
[event_type
].name
);
7196 if (hdev
->hard_reset_on_fw_events
)
7197 hl_device_reset(hdev
, true, false);
7199 hl_fw_unmask_irq(hdev
, event_type
);
7203 case GAUDI_EVENT_PCIE_CORE_SERR
:
7204 case GAUDI_EVENT_PCIE_IF_SERR
:
7205 case GAUDI_EVENT_PCIE_PHY_SERR
:
7206 case GAUDI_EVENT_TPC0_SERR
... GAUDI_EVENT_TPC7_SERR
:
7207 case GAUDI_EVENT_MME0_ACC_SERR
:
7208 case GAUDI_EVENT_MME0_SBAB_SERR
:
7209 case GAUDI_EVENT_MME1_ACC_SERR
:
7210 case GAUDI_EVENT_MME1_SBAB_SERR
:
7211 case GAUDI_EVENT_MME2_ACC_SERR
:
7212 case GAUDI_EVENT_MME2_SBAB_SERR
:
7213 case GAUDI_EVENT_MME3_ACC_SERR
:
7214 case GAUDI_EVENT_MME3_SBAB_SERR
:
7215 case GAUDI_EVENT_DMA0_SERR_ECC
... GAUDI_EVENT_DMA7_SERR_ECC
:
7216 case GAUDI_EVENT_CPU_IF_ECC_SERR
:
7217 case GAUDI_EVENT_PSOC_MEM_SERR
:
7218 case GAUDI_EVENT_PSOC_CORESIGHT_SERR
:
7219 case GAUDI_EVENT_SRAM0_SERR
... GAUDI_EVENT_SRAM28_SERR
:
7220 case GAUDI_EVENT_DMA_IF0_SERR
... GAUDI_EVENT_DMA_IF3_SERR
:
7221 case GAUDI_EVENT_HBM_0_SERR
... GAUDI_EVENT_HBM_3_SERR
:
7223 case GAUDI_EVENT_MMU_SERR
:
7224 gaudi_print_irq_info(hdev
, event_type
, true);
7225 gaudi_handle_ecc_event(hdev
, event_type
, &eq_entry
->ecc_data
);
7226 hl_fw_unmask_irq(hdev
, event_type
);
7229 case GAUDI_EVENT_PCIE_DEC
:
7230 case GAUDI_EVENT_MME0_WBC_RSP
:
7231 case GAUDI_EVENT_MME0_SBAB0_RSP
:
7232 case GAUDI_EVENT_MME1_WBC_RSP
:
7233 case GAUDI_EVENT_MME1_SBAB0_RSP
:
7234 case GAUDI_EVENT_MME2_WBC_RSP
:
7235 case GAUDI_EVENT_MME2_SBAB0_RSP
:
7236 case GAUDI_EVENT_MME3_WBC_RSP
:
7237 case GAUDI_EVENT_MME3_SBAB0_RSP
:
7238 case GAUDI_EVENT_CPU_AXI_SPLITTER
:
7239 case GAUDI_EVENT_PSOC_AXI_DEC
:
7240 case GAUDI_EVENT_PSOC_PRSTN_FALL
:
7241 case GAUDI_EVENT_MMU_PAGE_FAULT
:
7242 case GAUDI_EVENT_MMU_WR_PERM
:
7243 case GAUDI_EVENT_RAZWI_OR_ADC
:
7244 case GAUDI_EVENT_TPC0_QM
... GAUDI_EVENT_TPC7_QM
:
7245 case GAUDI_EVENT_MME0_QM
... GAUDI_EVENT_MME2_QM
:
7246 case GAUDI_EVENT_DMA0_QM
... GAUDI_EVENT_DMA7_QM
:
7248 case GAUDI_EVENT_NIC0_QM0
:
7249 case GAUDI_EVENT_NIC0_QM1
:
7250 case GAUDI_EVENT_NIC1_QM0
:
7251 case GAUDI_EVENT_NIC1_QM1
:
7252 case GAUDI_EVENT_NIC2_QM0
:
7253 case GAUDI_EVENT_NIC2_QM1
:
7254 case GAUDI_EVENT_NIC3_QM0
:
7255 case GAUDI_EVENT_NIC3_QM1
:
7256 case GAUDI_EVENT_NIC4_QM0
:
7257 case GAUDI_EVENT_NIC4_QM1
:
7258 case GAUDI_EVENT_DMA0_CORE
... GAUDI_EVENT_DMA7_CORE
:
7259 gaudi_print_irq_info(hdev
, event_type
, true);
7260 gaudi_handle_qman_err(hdev
, event_type
);
7261 hl_fw_unmask_irq(hdev
, event_type
);
7264 case GAUDI_EVENT_RAZWI_OR_ADC_SW
:
7265 gaudi_print_irq_info(hdev
, event_type
, true);
7266 if (hdev
->hard_reset_on_fw_events
)
7267 hl_device_reset(hdev
, true, false);
7270 case GAUDI_EVENT_TPC0_BMON_SPMU
:
7271 case GAUDI_EVENT_TPC1_BMON_SPMU
:
7272 case GAUDI_EVENT_TPC2_BMON_SPMU
:
7273 case GAUDI_EVENT_TPC3_BMON_SPMU
:
7274 case GAUDI_EVENT_TPC4_BMON_SPMU
:
7275 case GAUDI_EVENT_TPC5_BMON_SPMU
:
7276 case GAUDI_EVENT_TPC6_BMON_SPMU
:
7277 case GAUDI_EVENT_TPC7_BMON_SPMU
:
7278 case GAUDI_EVENT_DMA_BM_CH0
... GAUDI_EVENT_DMA_BM_CH7
:
7279 gaudi_print_irq_info(hdev
, event_type
, false);
7280 hl_fw_unmask_irq(hdev
, event_type
);
7283 case GAUDI_EVENT_FIX_POWER_ENV_S
... GAUDI_EVENT_FIX_THERMAL_ENV_E
:
7284 gaudi_print_clk_change_info(hdev
, event_type
);
7285 hl_fw_unmask_irq(hdev
, event_type
);
7288 case GAUDI_EVENT_PSOC_GPIO_U16_0
:
7289 cause
= le64_to_cpu(eq_entry
->data
[0]) & 0xFF;
7291 "Received high temp H/W interrupt %d (cause %d)\n",
7296 dev_err(hdev
->dev
, "Received invalid H/W interrupt %d\n",
7302 static void *gaudi_get_events_stat(struct hl_device
*hdev
, bool aggregate
,
7305 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
7308 *size
= (u32
) sizeof(gaudi
->events_stat_aggregate
);
7309 return gaudi
->events_stat_aggregate
;
7312 *size
= (u32
) sizeof(gaudi
->events_stat
);
7313 return gaudi
->events_stat
;
7316 static int gaudi_mmu_invalidate_cache(struct hl_device
*hdev
, bool is_hard
,
7319 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
7320 u32 status
, timeout_usec
;
7323 if (!(gaudi
->hw_cap_initialized
& HW_CAP_MMU
) ||
7324 hdev
->hard_reset_pending
)
7328 timeout_usec
= GAUDI_PLDM_MMU_TIMEOUT_USEC
;
7330 timeout_usec
= MMU_CONFIG_TIMEOUT_USEC
;
7332 mutex_lock(&hdev
->mmu_cache_lock
);
7334 /* L0 & L1 invalidation */
7335 WREG32(mmSTLB_INV_PS
, 3);
7336 WREG32(mmSTLB_CACHE_INV
, gaudi
->mmu_cache_inv_pi
++);
7337 WREG32(mmSTLB_INV_PS
, 2);
7339 rc
= hl_poll_timeout(
7347 WREG32(mmSTLB_INV_SET
, 0);
7349 mutex_unlock(&hdev
->mmu_cache_lock
);
7352 dev_err_ratelimited(hdev
->dev
,
7353 "MMU cache invalidation timeout\n");
7354 hl_device_reset(hdev
, true, false);
7360 static int gaudi_mmu_invalidate_cache_range(struct hl_device
*hdev
,
7361 bool is_hard
, u32 asid
, u64 va
, u64 size
)
7363 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
7364 u32 status
, timeout_usec
;
7369 if (!(gaudi
->hw_cap_initialized
& HW_CAP_MMU
) ||
7370 hdev
->hard_reset_pending
)
7373 mutex_lock(&hdev
->mmu_cache_lock
);
7376 timeout_usec
= GAUDI_PLDM_MMU_TIMEOUT_USEC
;
7378 timeout_usec
= MMU_CONFIG_TIMEOUT_USEC
;
7381 * TODO: currently invalidate entire L0 & L1 as in regular hard
7382 * invalidation. Need to apply invalidation of specific cache
7383 * lines with mask of ASID & VA & size.
7384 * Note that L1 with be flushed entirely in any case.
7387 /* L0 & L1 invalidation */
7388 inv_data
= RREG32(mmSTLB_CACHE_INV
);
7390 pi
= ((inv_data
& STLB_CACHE_INV_PRODUCER_INDEX_MASK
) + 1) & 0xFF;
7391 WREG32(mmSTLB_CACHE_INV
,
7392 (inv_data
& STLB_CACHE_INV_INDEX_MASK_MASK
) | pi
);
7394 rc
= hl_poll_timeout(
7396 mmSTLB_INV_CONSUMER_INDEX
,
7402 mutex_unlock(&hdev
->mmu_cache_lock
);
7405 dev_err_ratelimited(hdev
->dev
,
7406 "MMU cache invalidation timeout\n");
7407 hl_device_reset(hdev
, true, false);
7413 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device
*hdev
,
7414 u32 asid
, u64 phys_addr
)
7416 u32 status
, timeout_usec
;
7420 timeout_usec
= GAUDI_PLDM_MMU_TIMEOUT_USEC
;
7422 timeout_usec
= MMU_CONFIG_TIMEOUT_USEC
;
7424 WREG32(MMU_ASID
, asid
);
7425 WREG32(MMU_HOP0_PA43_12
, phys_addr
>> MMU_HOP0_PA43_12_SHIFT
);
7426 WREG32(MMU_HOP0_PA49_44
, phys_addr
>> MMU_HOP0_PA49_44_SHIFT
);
7427 WREG32(MMU_BUSY
, 0x80000000);
7429 rc
= hl_poll_timeout(
7433 !(status
& 0x80000000),
7439 "Timeout during MMU hop0 config of asid %d\n", asid
);
7446 static int gaudi_send_heartbeat(struct hl_device
*hdev
)
7448 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
7450 if (!(gaudi
->hw_cap_initialized
& HW_CAP_CPU_Q
))
7453 return hl_fw_send_heartbeat(hdev
);
7456 static int gaudi_cpucp_info_get(struct hl_device
*hdev
)
7458 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
7459 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
7462 if (!(gaudi
->hw_cap_initialized
& HW_CAP_CPU_Q
))
7465 rc
= hl_fw_cpucp_info_get(hdev
, mmCPU_BOOT_DEV_STS0
);
7469 if (!strlen(prop
->cpucp_info
.card_name
))
7470 strncpy(prop
->cpucp_info
.card_name
, GAUDI_DEFAULT_CARD_NAME
,
7473 hdev
->card_type
= le32_to_cpu(hdev
->asic_prop
.cpucp_info
.card_type
);
7475 if (hdev
->card_type
== cpucp_card_type_pci
)
7476 prop
->max_power_default
= MAX_POWER_DEFAULT_PCI
;
7477 else if (hdev
->card_type
== cpucp_card_type_pmc
)
7478 prop
->max_power_default
= MAX_POWER_DEFAULT_PMC
;
7480 hdev
->max_power
= prop
->max_power_default
;
7485 static bool gaudi_is_device_idle(struct hl_device
*hdev
, u64
*mask
,
7488 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
7489 const char *fmt
= "%-5d%-9s%#-14x%#-12x%#x\n";
7490 const char *mme_slave_fmt
= "%-5d%-9s%-14s%-12s%#x\n";
7491 const char *nic_fmt
= "%-5d%-9s%#-14x%#x\n";
7492 u32 qm_glbl_sts0
, qm_cgm_sts
, dma_core_sts0
, tpc_cfg_sts
, mme_arch_sts
;
7493 bool is_idle
= true, is_eng_idle
, is_slave
;
7495 int i
, dma_id
, port
;
7497 mutex_lock(&gaudi
->clk_gate_mutex
);
7499 hdev
->asic_funcs
->disable_clock_gating(hdev
);
7503 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
7504 "--- ------- ------------ ---------- -------------\n");
7506 for (i
= 0 ; i
< DMA_NUMBER_OF_CHNLS
; i
++) {
7507 dma_id
= gaudi_dma_assignment
[i
];
7508 offset
= dma_id
* DMA_QMAN_OFFSET
;
7510 qm_glbl_sts0
= RREG32(mmDMA0_QM_GLBL_STS0
+ offset
);
7511 qm_cgm_sts
= RREG32(mmDMA0_QM_CGM_STS
+ offset
);
7512 dma_core_sts0
= RREG32(mmDMA0_CORE_STS0
+ offset
);
7513 is_eng_idle
= IS_QM_IDLE(qm_glbl_sts0
, qm_cgm_sts
) &&
7514 IS_DMA_IDLE(dma_core_sts0
);
7515 is_idle
&= is_eng_idle
;
7518 *mask
|= ((u64
) !is_eng_idle
) <<
7519 (GAUDI_ENGINE_ID_DMA_0
+ dma_id
);
7521 seq_printf(s
, fmt
, dma_id
,
7522 is_eng_idle
? "Y" : "N", qm_glbl_sts0
,
7523 qm_cgm_sts
, dma_core_sts0
);
7528 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
7529 "--- ------- ------------ ---------- ----------\n");
7531 for (i
= 0 ; i
< TPC_NUMBER_OF_ENGINES
; i
++) {
7532 offset
= i
* TPC_QMAN_OFFSET
;
7533 qm_glbl_sts0
= RREG32(mmTPC0_QM_GLBL_STS0
+ offset
);
7534 qm_cgm_sts
= RREG32(mmTPC0_QM_CGM_STS
+ offset
);
7535 tpc_cfg_sts
= RREG32(mmTPC0_CFG_STATUS
+ offset
);
7536 is_eng_idle
= IS_QM_IDLE(qm_glbl_sts0
, qm_cgm_sts
) &&
7537 IS_TPC_IDLE(tpc_cfg_sts
);
7538 is_idle
&= is_eng_idle
;
7541 *mask
|= ((u64
) !is_eng_idle
) <<
7542 (GAUDI_ENGINE_ID_TPC_0
+ i
);
7544 seq_printf(s
, fmt
, i
,
7545 is_eng_idle
? "Y" : "N",
7546 qm_glbl_sts0
, qm_cgm_sts
, tpc_cfg_sts
);
7551 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
7552 "--- ------- ------------ ---------- -----------\n");
7554 for (i
= 0 ; i
< MME_NUMBER_OF_ENGINES
; i
++) {
7555 offset
= i
* MME_QMAN_OFFSET
;
7556 mme_arch_sts
= RREG32(mmMME0_CTRL_ARCH_STATUS
+ offset
);
7557 is_eng_idle
= IS_MME_IDLE(mme_arch_sts
);
7559 /* MME 1 & 3 are slaves, no need to check their QMANs */
7562 qm_glbl_sts0
= RREG32(mmMME0_QM_GLBL_STS0
+ offset
);
7563 qm_cgm_sts
= RREG32(mmMME0_QM_CGM_STS
+ offset
);
7564 is_eng_idle
&= IS_QM_IDLE(qm_glbl_sts0
, qm_cgm_sts
);
7567 is_idle
&= is_eng_idle
;
7570 *mask
|= ((u64
) !is_eng_idle
) <<
7571 (GAUDI_ENGINE_ID_MME_0
+ i
);
7574 seq_printf(s
, fmt
, i
,
7575 is_eng_idle
? "Y" : "N",
7576 qm_glbl_sts0
, qm_cgm_sts
, mme_arch_sts
);
7578 seq_printf(s
, mme_slave_fmt
, i
,
7579 is_eng_idle
? "Y" : "N", "-",
7585 seq_puts(s
, "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
7586 "--- ------- ------------ ----------\n");
7588 for (i
= 0 ; i
< (NIC_NUMBER_OF_ENGINES
/ 2) ; i
++) {
7589 offset
= i
* NIC_MACRO_QMAN_OFFSET
;
7591 if (hdev
->nic_ports_mask
& BIT(port
)) {
7592 qm_glbl_sts0
= RREG32(mmNIC0_QM0_GLBL_STS0
+ offset
);
7593 qm_cgm_sts
= RREG32(mmNIC0_QM0_CGM_STS
+ offset
);
7594 is_eng_idle
= IS_QM_IDLE(qm_glbl_sts0
, qm_cgm_sts
);
7595 is_idle
&= is_eng_idle
;
7598 *mask
|= ((u64
) !is_eng_idle
) <<
7599 (GAUDI_ENGINE_ID_NIC_0
+ port
);
7601 seq_printf(s
, nic_fmt
, port
,
7602 is_eng_idle
? "Y" : "N",
7603 qm_glbl_sts0
, qm_cgm_sts
);
7607 if (hdev
->nic_ports_mask
& BIT(port
)) {
7608 qm_glbl_sts0
= RREG32(mmNIC0_QM1_GLBL_STS0
+ offset
);
7609 qm_cgm_sts
= RREG32(mmNIC0_QM1_CGM_STS
+ offset
);
7610 is_eng_idle
= IS_QM_IDLE(qm_glbl_sts0
, qm_cgm_sts
);
7611 is_idle
&= is_eng_idle
;
7614 *mask
|= ((u64
) !is_eng_idle
) <<
7615 (GAUDI_ENGINE_ID_NIC_0
+ port
);
7617 seq_printf(s
, nic_fmt
, port
,
7618 is_eng_idle
? "Y" : "N",
7619 qm_glbl_sts0
, qm_cgm_sts
);
7626 hdev
->asic_funcs
->set_clock_gating(hdev
);
7628 mutex_unlock(&gaudi
->clk_gate_mutex
);
7633 static void gaudi_hw_queues_lock(struct hl_device
*hdev
)
7634 __acquires(&gaudi
->hw_queues_lock
)
7636 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
7638 spin_lock(&gaudi
->hw_queues_lock
);
7641 static void gaudi_hw_queues_unlock(struct hl_device
*hdev
)
7642 __releases(&gaudi
->hw_queues_lock
)
7644 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
7646 spin_unlock(&gaudi
->hw_queues_lock
);
7649 static u32
gaudi_get_pci_id(struct hl_device
*hdev
)
7651 return hdev
->pdev
->device
;
7654 static int gaudi_get_eeprom_data(struct hl_device
*hdev
, void *data
,
7657 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
7659 if (!(gaudi
->hw_cap_initialized
& HW_CAP_CPU_Q
))
7662 return hl_fw_get_eeprom_data(hdev
, data
, max_size
);
7666 * this function should be used only during initialization and/or after reset,
7667 * when there are no active users.
7669 static int gaudi_run_tpc_kernel(struct hl_device
*hdev
, u64 tpc_kernel
,
7672 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
7677 offset
= tpc_id
* (mmTPC1_CFG_STATUS
- mmTPC0_CFG_STATUS
);
7680 kernel_timeout
= GAUDI_PLDM_TPC_KERNEL_WAIT_USEC
;
7682 kernel_timeout
= HL_DEVICE_TIMEOUT_USEC
;
7684 mutex_lock(&gaudi
->clk_gate_mutex
);
7686 hdev
->asic_funcs
->disable_clock_gating(hdev
);
7688 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW
+ offset
,
7689 lower_32_bits(tpc_kernel
));
7690 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH
+ offset
,
7691 upper_32_bits(tpc_kernel
));
7693 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW
+ offset
,
7694 lower_32_bits(tpc_kernel
));
7695 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH
+ offset
,
7696 upper_32_bits(tpc_kernel
));
7697 /* set a valid LUT pointer, content is of no significance */
7698 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO
+ offset
,
7699 lower_32_bits(tpc_kernel
));
7700 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI
+ offset
,
7701 upper_32_bits(tpc_kernel
));
7703 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR
+ offset
,
7704 lower_32_bits(CFG_BASE
+
7705 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0
));
7707 WREG32(mmTPC0_CFG_TPC_CMD
+ offset
,
7708 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT
|
7709 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT
));
7710 /* wait a bit for the engine to start executing */
7711 usleep_range(1000, 1500);
7713 /* wait until engine has finished executing */
7714 rc
= hl_poll_timeout(
7716 mmTPC0_CFG_STATUS
+ offset
,
7718 (status
& TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK
) ==
7719 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK
,
7725 "Timeout while waiting for TPC%d icache prefetch\n",
7727 hdev
->asic_funcs
->set_clock_gating(hdev
);
7728 mutex_unlock(&gaudi
->clk_gate_mutex
);
7732 WREG32(mmTPC0_CFG_TPC_EXECUTE
+ offset
,
7733 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT
);
7735 /* wait a bit for the engine to start executing */
7736 usleep_range(1000, 1500);
7738 /* wait until engine has finished executing */
7739 rc
= hl_poll_timeout(
7741 mmTPC0_CFG_STATUS
+ offset
,
7743 (status
& TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK
) ==
7744 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK
,
7750 "Timeout while waiting for TPC%d vector pipe\n",
7752 hdev
->asic_funcs
->set_clock_gating(hdev
);
7753 mutex_unlock(&gaudi
->clk_gate_mutex
);
7757 rc
= hl_poll_timeout(
7759 mmTPC0_CFG_WQ_INFLIGHT_CNTR
+ offset
,
7765 hdev
->asic_funcs
->set_clock_gating(hdev
);
7766 mutex_unlock(&gaudi
->clk_gate_mutex
);
7770 "Timeout while waiting for TPC%d kernel to execute\n",
7778 static int gaudi_internal_cb_pool_init(struct hl_device
*hdev
,
7781 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
7782 int min_alloc_order
, rc
, collective_cb_size
;
7784 if (!(gaudi
->hw_cap_initialized
& HW_CAP_MMU
))
7787 hdev
->internal_cb_pool_virt_addr
=
7788 hdev
->asic_funcs
->asic_dma_alloc_coherent(hdev
,
7789 HOST_SPACE_INTERNAL_CB_SZ
,
7790 &hdev
->internal_cb_pool_dma_addr
,
7791 GFP_KERNEL
| __GFP_ZERO
);
7793 if (!hdev
->internal_cb_pool_virt_addr
)
7796 collective_cb_size
= sizeof(struct packet_msg_short
) * 5 +
7797 sizeof(struct packet_fence
);
7798 min_alloc_order
= ilog2(collective_cb_size
);
7800 hdev
->internal_cb_pool
= gen_pool_create(min_alloc_order
, -1);
7801 if (!hdev
->internal_cb_pool
) {
7803 "Failed to create internal CB pool\n");
7805 goto free_internal_cb_pool
;
7808 rc
= gen_pool_add(hdev
->internal_cb_pool
,
7809 (uintptr_t) hdev
->internal_cb_pool_virt_addr
,
7810 HOST_SPACE_INTERNAL_CB_SZ
, -1);
7813 "Failed to add memory to internal CB pool\n");
7815 goto destroy_internal_cb_pool
;
7818 hdev
->internal_cb_va_base
= hl_reserve_va_block(hdev
, ctx
,
7819 HL_VA_RANGE_TYPE_HOST
, HOST_SPACE_INTERNAL_CB_SZ
,
7820 HL_MMU_VA_ALIGNMENT_NOT_NEEDED
);
7822 if (!hdev
->internal_cb_va_base
)
7823 goto destroy_internal_cb_pool
;
7825 mutex_lock(&ctx
->mmu_lock
);
7826 rc
= hl_mmu_map_contiguous(ctx
, hdev
->internal_cb_va_base
,
7827 hdev
->internal_cb_pool_dma_addr
,
7828 HOST_SPACE_INTERNAL_CB_SZ
);
7830 hdev
->asic_funcs
->mmu_invalidate_cache(hdev
, false, VM_TYPE_USERPTR
);
7831 mutex_unlock(&ctx
->mmu_lock
);
7834 goto unreserve_internal_cb_pool
;
7838 unreserve_internal_cb_pool
:
7839 hl_unreserve_va_block(hdev
, ctx
, hdev
->internal_cb_va_base
,
7840 HOST_SPACE_INTERNAL_CB_SZ
);
7841 destroy_internal_cb_pool
:
7842 gen_pool_destroy(hdev
->internal_cb_pool
);
7843 free_internal_cb_pool
:
7844 hdev
->asic_funcs
->asic_dma_free_coherent(hdev
,
7845 HOST_SPACE_INTERNAL_CB_SZ
,
7846 hdev
->internal_cb_pool_virt_addr
,
7847 hdev
->internal_cb_pool_dma_addr
);
7852 static void gaudi_internal_cb_pool_fini(struct hl_device
*hdev
,
7855 struct gaudi_device
*gaudi
= hdev
->asic_specific
;
7857 if (!(gaudi
->hw_cap_initialized
& HW_CAP_MMU
))
7860 mutex_lock(&ctx
->mmu_lock
);
7861 hl_mmu_unmap_contiguous(ctx
, hdev
->internal_cb_va_base
,
7862 HOST_SPACE_INTERNAL_CB_SZ
);
7863 hl_unreserve_va_block(hdev
, ctx
, hdev
->internal_cb_va_base
,
7864 HOST_SPACE_INTERNAL_CB_SZ
);
7865 hdev
->asic_funcs
->mmu_invalidate_cache(hdev
, true, VM_TYPE_USERPTR
);
7866 mutex_unlock(&ctx
->mmu_lock
);
7868 gen_pool_destroy(hdev
->internal_cb_pool
);
7870 hdev
->asic_funcs
->asic_dma_free_coherent(hdev
,
7871 HOST_SPACE_INTERNAL_CB_SZ
,
7872 hdev
->internal_cb_pool_virt_addr
,
7873 hdev
->internal_cb_pool_dma_addr
);
7876 static int gaudi_ctx_init(struct hl_ctx
*ctx
)
7878 gaudi_mmu_prepare(ctx
->hdev
, ctx
->asid
);
7879 return gaudi_internal_cb_pool_init(ctx
->hdev
, ctx
);
7882 static void gaudi_ctx_fini(struct hl_ctx
*ctx
)
7884 struct hl_device
*hdev
= ctx
->hdev
;
7886 /* Gaudi will NEVER support more then a single compute context.
7887 * Therefore, don't clear anything unless it is the compute context
7889 if (hdev
->compute_ctx
!= ctx
)
7892 gaudi_internal_cb_pool_fini(ctx
->hdev
, ctx
);
7895 static u32
gaudi_get_queue_id_for_cq(struct hl_device
*hdev
, u32 cq_idx
)
7897 return gaudi_cq_assignment
[cq_idx
];
7900 static u32
gaudi_get_signal_cb_size(struct hl_device
*hdev
)
7902 return sizeof(struct packet_msg_short
) +
7903 sizeof(struct packet_msg_prot
) * 2;
7906 static u32
gaudi_get_wait_cb_size(struct hl_device
*hdev
)
7908 return sizeof(struct packet_msg_short
) * 4 +
7909 sizeof(struct packet_fence
) +
7910 sizeof(struct packet_msg_prot
) * 2;
7913 static u32
gaudi_gen_signal_cb(struct hl_device
*hdev
, void *data
, u16 sob_id
,
7916 struct hl_cb
*cb
= (struct hl_cb
*) data
;
7917 struct packet_msg_short
*pkt
;
7918 u32 value
, ctl
, pkt_size
= sizeof(*pkt
);
7920 pkt
= cb
->kernel_address
+ size
;
7921 memset(pkt
, 0, pkt_size
);
7923 /* Inc by 1, Mode ADD */
7924 value
= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK
, 1);
7925 value
|= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK
, 1);
7927 ctl
= FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK
, sob_id
* 4);
7928 ctl
|= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK
, 0); /* write the value */
7929 ctl
|= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK
, 3); /* W_S SOB base */
7930 ctl
|= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK
, PACKET_MSG_SHORT
);
7931 ctl
|= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK
, eb
);
7932 ctl
|= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK
, 1);
7933 ctl
|= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK
, 1);
7935 pkt
->value
= cpu_to_le32(value
);
7936 pkt
->ctl
= cpu_to_le32(ctl
);
7938 return size
+ pkt_size
;
7941 static u32
gaudi_add_mon_msg_short(struct packet_msg_short
*pkt
, u32 value
,
7944 u32 ctl
, pkt_size
= sizeof(*pkt
);
7946 memset(pkt
, 0, pkt_size
);
7948 ctl
= FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK
, addr
);
7949 ctl
|= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK
, 2); /* W_S MON base */
7950 ctl
|= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK
, PACKET_MSG_SHORT
);
7951 ctl
|= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK
, 0);
7952 ctl
|= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK
, 1);
7953 ctl
|= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK
, 0); /* last pkt MB */
7955 pkt
->value
= cpu_to_le32(value
);
7956 pkt
->ctl
= cpu_to_le32(ctl
);
7961 static u32
gaudi_add_arm_monitor_pkt(struct hl_device
*hdev
,
7962 struct packet_msg_short
*pkt
, u16 sob_base
, u8 sob_mask
,
7963 u16 sob_val
, u16 mon_id
)
7966 u32 ctl
, value
, pkt_size
= sizeof(*pkt
);
7967 u16 msg_addr_offset
;
7970 if (hl_gen_sob_mask(sob_base
, sob_mask
, &mask
)) {
7972 "sob_base %u (mask %#x) is not valid\n",
7973 sob_base
, sob_mask
);
7978 * monitor_base should be the content of the base0 address registers,
7979 * so it will be added to the msg short offsets
7981 monitor_base
= mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
;
7984 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0
+ mon_id
* 4) -
7987 memset(pkt
, 0, pkt_size
);
7989 /* Monitor config packet: bind the monitor to a sync object */
7990 value
= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK
, sob_base
/ 8);
7991 value
|= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK
, sob_val
);
7992 value
|= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK
,
7993 0); /* GREATER OR EQUAL*/
7994 value
|= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK
, mask
);
7996 ctl
= FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK
, msg_addr_offset
);
7997 ctl
|= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK
, 0); /* write the value */
7998 ctl
|= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK
, 2); /* W_S MON base */
7999 ctl
|= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK
, PACKET_MSG_SHORT
);
8000 ctl
|= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK
, 0);
8001 ctl
|= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK
, 1);
8002 ctl
|= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK
, 1);
8004 pkt
->value
= cpu_to_le32(value
);
8005 pkt
->ctl
= cpu_to_le32(ctl
);
8010 static u32
gaudi_add_fence_pkt(struct packet_fence
*pkt
)
8012 u32 ctl
, cfg
, pkt_size
= sizeof(*pkt
);
8014 memset(pkt
, 0, pkt_size
);
8016 cfg
= FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK
, 1);
8017 cfg
|= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK
, 1);
8018 cfg
|= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK
, 2);
8020 ctl
= FIELD_PREP(GAUDI_PKT_FENCE_CTL_OPCODE_MASK
, PACKET_FENCE
);
8021 ctl
|= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK
, 0);
8022 ctl
|= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK
, 1);
8023 ctl
|= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK
, 1);
8025 pkt
->cfg
= cpu_to_le32(cfg
);
8026 pkt
->ctl
= cpu_to_le32(ctl
);
8031 static int gaudi_get_fence_addr(struct hl_device
*hdev
, u32 queue_id
, u64
*addr
)
8033 u32 offset
, nic_index
;
8036 case GAUDI_QUEUE_ID_DMA_0_0
:
8037 offset
= mmDMA0_QM_CP_FENCE2_RDATA_0
;
8039 case GAUDI_QUEUE_ID_DMA_0_1
:
8040 offset
= mmDMA0_QM_CP_FENCE2_RDATA_1
;
8042 case GAUDI_QUEUE_ID_DMA_0_2
:
8043 offset
= mmDMA0_QM_CP_FENCE2_RDATA_2
;
8045 case GAUDI_QUEUE_ID_DMA_0_3
:
8046 offset
= mmDMA0_QM_CP_FENCE2_RDATA_3
;
8048 case GAUDI_QUEUE_ID_DMA_1_0
:
8049 offset
= mmDMA1_QM_CP_FENCE2_RDATA_0
;
8051 case GAUDI_QUEUE_ID_DMA_1_1
:
8052 offset
= mmDMA1_QM_CP_FENCE2_RDATA_1
;
8054 case GAUDI_QUEUE_ID_DMA_1_2
:
8055 offset
= mmDMA1_QM_CP_FENCE2_RDATA_2
;
8057 case GAUDI_QUEUE_ID_DMA_1_3
:
8058 offset
= mmDMA1_QM_CP_FENCE2_RDATA_3
;
8060 case GAUDI_QUEUE_ID_DMA_5_0
:
8061 offset
= mmDMA5_QM_CP_FENCE2_RDATA_0
;
8063 case GAUDI_QUEUE_ID_DMA_5_1
:
8064 offset
= mmDMA5_QM_CP_FENCE2_RDATA_1
;
8066 case GAUDI_QUEUE_ID_DMA_5_2
:
8067 offset
= mmDMA5_QM_CP_FENCE2_RDATA_2
;
8069 case GAUDI_QUEUE_ID_DMA_5_3
:
8070 offset
= mmDMA5_QM_CP_FENCE2_RDATA_3
;
8072 case GAUDI_QUEUE_ID_TPC_7_0
:
8073 offset
= mmTPC7_QM_CP_FENCE2_RDATA_0
;
8075 case GAUDI_QUEUE_ID_TPC_7_1
:
8076 offset
= mmTPC7_QM_CP_FENCE2_RDATA_1
;
8078 case GAUDI_QUEUE_ID_TPC_7_2
:
8079 offset
= mmTPC7_QM_CP_FENCE2_RDATA_2
;
8081 case GAUDI_QUEUE_ID_TPC_7_3
:
8082 offset
= mmTPC7_QM_CP_FENCE2_RDATA_3
;
8084 case GAUDI_QUEUE_ID_NIC_0_0
:
8085 case GAUDI_QUEUE_ID_NIC_1_0
:
8086 case GAUDI_QUEUE_ID_NIC_2_0
:
8087 case GAUDI_QUEUE_ID_NIC_3_0
:
8088 case GAUDI_QUEUE_ID_NIC_4_0
:
8089 case GAUDI_QUEUE_ID_NIC_5_0
:
8090 case GAUDI_QUEUE_ID_NIC_6_0
:
8091 case GAUDI_QUEUE_ID_NIC_7_0
:
8092 case GAUDI_QUEUE_ID_NIC_8_0
:
8093 case GAUDI_QUEUE_ID_NIC_9_0
:
8094 nic_index
= (queue_id
- GAUDI_QUEUE_ID_NIC_0_0
) >> 2;
8095 offset
= mmNIC0_QM0_CP_FENCE2_RDATA_0
+
8096 (nic_index
>> 1) * NIC_MACRO_QMAN_OFFSET
+
8097 (nic_index
& 0x1) * NIC_ENGINE_QMAN_OFFSET
;
8099 case GAUDI_QUEUE_ID_NIC_0_1
:
8100 case GAUDI_QUEUE_ID_NIC_1_1
:
8101 case GAUDI_QUEUE_ID_NIC_2_1
:
8102 case GAUDI_QUEUE_ID_NIC_3_1
:
8103 case GAUDI_QUEUE_ID_NIC_4_1
:
8104 case GAUDI_QUEUE_ID_NIC_5_1
:
8105 case GAUDI_QUEUE_ID_NIC_6_1
:
8106 case GAUDI_QUEUE_ID_NIC_7_1
:
8107 case GAUDI_QUEUE_ID_NIC_8_1
:
8108 case GAUDI_QUEUE_ID_NIC_9_1
:
8109 nic_index
= (queue_id
- GAUDI_QUEUE_ID_NIC_0_1
) >> 2;
8110 offset
= mmNIC0_QM0_CP_FENCE2_RDATA_1
+
8111 (nic_index
>> 1) * NIC_MACRO_QMAN_OFFSET
+
8112 (nic_index
& 0x1) * NIC_ENGINE_QMAN_OFFSET
;
8114 case GAUDI_QUEUE_ID_NIC_0_2
:
8115 case GAUDI_QUEUE_ID_NIC_1_2
:
8116 case GAUDI_QUEUE_ID_NIC_2_2
:
8117 case GAUDI_QUEUE_ID_NIC_3_2
:
8118 case GAUDI_QUEUE_ID_NIC_4_2
:
8119 case GAUDI_QUEUE_ID_NIC_5_2
:
8120 case GAUDI_QUEUE_ID_NIC_6_2
:
8121 case GAUDI_QUEUE_ID_NIC_7_2
:
8122 case GAUDI_QUEUE_ID_NIC_8_2
:
8123 case GAUDI_QUEUE_ID_NIC_9_2
:
8124 nic_index
= (queue_id
- GAUDI_QUEUE_ID_NIC_0_2
) >> 2;
8125 offset
= mmNIC0_QM0_CP_FENCE2_RDATA_2
+
8126 (nic_index
>> 1) * NIC_MACRO_QMAN_OFFSET
+
8127 (nic_index
& 0x1) * NIC_ENGINE_QMAN_OFFSET
;
8129 case GAUDI_QUEUE_ID_NIC_0_3
:
8130 case GAUDI_QUEUE_ID_NIC_1_3
:
8131 case GAUDI_QUEUE_ID_NIC_2_3
:
8132 case GAUDI_QUEUE_ID_NIC_3_3
:
8133 case GAUDI_QUEUE_ID_NIC_4_3
:
8134 case GAUDI_QUEUE_ID_NIC_5_3
:
8135 case GAUDI_QUEUE_ID_NIC_6_3
:
8136 case GAUDI_QUEUE_ID_NIC_7_3
:
8137 case GAUDI_QUEUE_ID_NIC_8_3
:
8138 case GAUDI_QUEUE_ID_NIC_9_3
:
8139 nic_index
= (queue_id
- GAUDI_QUEUE_ID_NIC_0_3
) >> 2;
8140 offset
= mmNIC0_QM0_CP_FENCE2_RDATA_3
+
8141 (nic_index
>> 1) * NIC_MACRO_QMAN_OFFSET
+
8142 (nic_index
& 0x1) * NIC_ENGINE_QMAN_OFFSET
;
8148 *addr
= CFG_BASE
+ offset
;
8153 static u32
gaudi_add_mon_pkts(void *buf
, u16 mon_id
, u64 fence_addr
)
8157 u16 msg_addr_offset
;
8160 * monitor_base should be the content of the base0 address registers,
8161 * so it will be added to the msg short offsets
8163 monitor_base
= mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
;
8165 /* First monitor config packet: low address of the sync */
8167 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0
+ mon_id
* 4) -
8170 size
+= gaudi_add_mon_msg_short(buf
+ size
, (u32
) fence_addr
,
8173 /* Second monitor config packet: high address of the sync */
8175 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0
+ mon_id
* 4) -
8178 size
+= gaudi_add_mon_msg_short(buf
+ size
, (u32
) (fence_addr
>> 32),
8182 * Third monitor config packet: the payload, i.e. what to write when the
8186 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0
+ mon_id
* 4) -
8189 size
+= gaudi_add_mon_msg_short(buf
+ size
, 1, msg_addr_offset
);
8194 static u32
gaudi_gen_wait_cb(struct hl_device
*hdev
,
8195 struct hl_gen_wait_properties
*prop
)
8197 struct hl_cb
*cb
= (struct hl_cb
*) prop
->data
;
8198 void *buf
= cb
->kernel_address
;
8200 u32 size
= prop
->size
;
8202 if (gaudi_get_fence_addr(hdev
, prop
->q_idx
, &fence_addr
)) {
8203 dev_crit(hdev
->dev
, "wrong queue id %d for wait packet\n",
8208 size
+= gaudi_add_mon_pkts(buf
+ size
, prop
->mon_id
, fence_addr
);
8209 size
+= gaudi_add_arm_monitor_pkt(hdev
, buf
+ size
, prop
->sob_base
,
8210 prop
->sob_mask
, prop
->sob_val
, prop
->mon_id
);
8211 size
+= gaudi_add_fence_pkt(buf
+ size
);
8216 static void gaudi_reset_sob(struct hl_device
*hdev
, void *data
)
8218 struct hl_hw_sob
*hw_sob
= (struct hl_hw_sob
*) data
;
8220 dev_dbg(hdev
->dev
, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob
->q_idx
,
8223 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0
+ hw_sob
->sob_id
* 4,
8226 kref_init(&hw_sob
->kref
);
8229 static void gaudi_set_dma_mask_from_fw(struct hl_device
*hdev
)
8231 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0
) ==
8232 HL_POWER9_HOST_MAGIC
) {
8233 hdev
->power9_64bit_dma_enable
= 1;
8234 hdev
->dma_mask
= 64;
8236 hdev
->power9_64bit_dma_enable
= 0;
8237 hdev
->dma_mask
= 48;
8241 static u64
gaudi_get_device_time(struct hl_device
*hdev
)
8243 u64 device_time
= ((u64
) RREG32(mmPSOC_TIMESTAMP_CNTCVU
)) << 32;
8245 return device_time
| RREG32(mmPSOC_TIMESTAMP_CNTCVL
);
8248 static const struct hl_asic_funcs gaudi_funcs
= {
8249 .early_init
= gaudi_early_init
,
8250 .early_fini
= gaudi_early_fini
,
8251 .late_init
= gaudi_late_init
,
8252 .late_fini
= gaudi_late_fini
,
8253 .sw_init
= gaudi_sw_init
,
8254 .sw_fini
= gaudi_sw_fini
,
8255 .hw_init
= gaudi_hw_init
,
8256 .hw_fini
= gaudi_hw_fini
,
8257 .halt_engines
= gaudi_halt_engines
,
8258 .suspend
= gaudi_suspend
,
8259 .resume
= gaudi_resume
,
8260 .cb_mmap
= gaudi_cb_mmap
,
8261 .ring_doorbell
= gaudi_ring_doorbell
,
8262 .pqe_write
= gaudi_pqe_write
,
8263 .asic_dma_alloc_coherent
= gaudi_dma_alloc_coherent
,
8264 .asic_dma_free_coherent
= gaudi_dma_free_coherent
,
8265 .scrub_device_mem
= gaudi_scrub_device_mem
,
8266 .get_int_queue_base
= gaudi_get_int_queue_base
,
8267 .test_queues
= gaudi_test_queues
,
8268 .asic_dma_pool_zalloc
= gaudi_dma_pool_zalloc
,
8269 .asic_dma_pool_free
= gaudi_dma_pool_free
,
8270 .cpu_accessible_dma_pool_alloc
= gaudi_cpu_accessible_dma_pool_alloc
,
8271 .cpu_accessible_dma_pool_free
= gaudi_cpu_accessible_dma_pool_free
,
8272 .hl_dma_unmap_sg
= gaudi_dma_unmap_sg
,
8273 .cs_parser
= gaudi_cs_parser
,
8274 .asic_dma_map_sg
= gaudi_dma_map_sg
,
8275 .get_dma_desc_list_size
= gaudi_get_dma_desc_list_size
,
8276 .add_end_of_cb_packets
= gaudi_add_end_of_cb_packets
,
8277 .update_eq_ci
= gaudi_update_eq_ci
,
8278 .context_switch
= gaudi_context_switch
,
8279 .restore_phase_topology
= gaudi_restore_phase_topology
,
8280 .debugfs_read32
= gaudi_debugfs_read32
,
8281 .debugfs_write32
= gaudi_debugfs_write32
,
8282 .debugfs_read64
= gaudi_debugfs_read64
,
8283 .debugfs_write64
= gaudi_debugfs_write64
,
8284 .add_device_attr
= gaudi_add_device_attr
,
8285 .handle_eqe
= gaudi_handle_eqe
,
8286 .set_pll_profile
= gaudi_set_pll_profile
,
8287 .get_events_stat
= gaudi_get_events_stat
,
8288 .read_pte
= gaudi_read_pte
,
8289 .write_pte
= gaudi_write_pte
,
8290 .mmu_invalidate_cache
= gaudi_mmu_invalidate_cache
,
8291 .mmu_invalidate_cache_range
= gaudi_mmu_invalidate_cache_range
,
8292 .send_heartbeat
= gaudi_send_heartbeat
,
8293 .set_clock_gating
= gaudi_set_clock_gating
,
8294 .disable_clock_gating
= gaudi_disable_clock_gating
,
8295 .debug_coresight
= gaudi_debug_coresight
,
8296 .is_device_idle
= gaudi_is_device_idle
,
8297 .soft_reset_late_init
= gaudi_soft_reset_late_init
,
8298 .hw_queues_lock
= gaudi_hw_queues_lock
,
8299 .hw_queues_unlock
= gaudi_hw_queues_unlock
,
8300 .get_pci_id
= gaudi_get_pci_id
,
8301 .get_eeprom_data
= gaudi_get_eeprom_data
,
8302 .send_cpu_message
= gaudi_send_cpu_message
,
8303 .pci_bars_map
= gaudi_pci_bars_map
,
8304 .init_iatu
= gaudi_init_iatu
,
8307 .halt_coresight
= gaudi_halt_coresight
,
8308 .ctx_init
= gaudi_ctx_init
,
8309 .ctx_fini
= gaudi_ctx_fini
,
8310 .get_clk_rate
= gaudi_get_clk_rate
,
8311 .get_queue_id_for_cq
= gaudi_get_queue_id_for_cq
,
8312 .read_device_fw_version
= gaudi_read_device_fw_version
,
8313 .load_firmware_to_device
= gaudi_load_firmware_to_device
,
8314 .load_boot_fit_to_device
= gaudi_load_boot_fit_to_device
,
8315 .get_signal_cb_size
= gaudi_get_signal_cb_size
,
8316 .get_wait_cb_size
= gaudi_get_wait_cb_size
,
8317 .gen_signal_cb
= gaudi_gen_signal_cb
,
8318 .gen_wait_cb
= gaudi_gen_wait_cb
,
8319 .reset_sob
= gaudi_reset_sob
,
8320 .reset_sob_group
= gaudi_reset_sob_group
,
8321 .set_dma_mask_from_fw
= gaudi_set_dma_mask_from_fw
,
8322 .get_device_time
= gaudi_get_device_time
,
8323 .collective_wait_init_cs
= gaudi_collective_wait_init_cs
,
8324 .collective_wait_create_jobs
= gaudi_collective_wait_create_jobs
8328 * gaudi_set_asic_funcs - set GAUDI function pointers
8330 * @hdev: pointer to hl_device structure
8333 void gaudi_set_asic_funcs(struct hl_device
*hdev
)
8335 hdev
->asic_funcs
= &gaudi_funcs
;