WIP FPC-III support
[linux/fpc-iii.git] / drivers / misc / habanalabs / gaudi / gaudi.c
blob8c09e4466af8ce72eff7787b1986389f68bdb595
1 // SPDX-License-Identifier: GPL-2.0
3 /*
4 * Copyright 2016-2020 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
24 * Gaudi security scheme:
26 * 1. Host is protected by:
27 * - Range registers
28 * - MMU
30 * 2. DDR is protected by:
31 * - Range registers (protect the first 512MB)
33 * 3. Configuration is protected by:
34 * - Range registers
35 * - Protection bits
37 * MMU is always enabled.
39 * QMAN DMA channels 0,1 (PCI DMAN):
40 * - DMA is not secured.
41 * - PQ and CQ are secured.
42 * - CP is secured: The driver needs to parse CB but WREG should be allowed
43 * because of TDMA (tensor DMA). Hence, WREG is always not
44 * secured.
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
49 * running.
51 * The current use cases for the driver to use the DMA are:
52 * - Clear SRAM on context switch (happens on context switch when device is
53 * idle)
54 * - MMU page tables area clear (happens on init)
56 * QMAN DMA 2-7, TPC, MME, NIC:
57 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
66 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
68 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
73 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
82 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
84 #define GAUDI_MAX_STRING_LEN 20
86 #define GAUDI_CB_POOL_CB_CNT 512
87 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
89 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
91 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
93 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16
95 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
97 #define GAUDI_ARB_WDT_TIMEOUT 0x1000000
99 #define GAUDI_CLK_GATE_DEBUGFS_MASK (\
100 BIT(GAUDI_ENGINE_ID_MME_0) |\
101 BIT(GAUDI_ENGINE_ID_MME_2) |\
102 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
104 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
106 #define GAUDI_PLL_MAX 10
108 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
109 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
110 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
111 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
112 "gaudi cpu eq"
115 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
116 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
117 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
118 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
119 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
120 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
121 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
122 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
123 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
126 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
127 [0] = GAUDI_QUEUE_ID_DMA_0_0,
128 [1] = GAUDI_QUEUE_ID_DMA_0_1,
129 [2] = GAUDI_QUEUE_ID_DMA_0_2,
130 [3] = GAUDI_QUEUE_ID_DMA_0_3,
131 [4] = GAUDI_QUEUE_ID_DMA_1_0,
132 [5] = GAUDI_QUEUE_ID_DMA_1_1,
133 [6] = GAUDI_QUEUE_ID_DMA_1_2,
134 [7] = GAUDI_QUEUE_ID_DMA_1_3,
137 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
138 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
139 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
140 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
141 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
142 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
143 [PACKET_REPEAT] = sizeof(struct packet_repeat),
144 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
145 [PACKET_FENCE] = sizeof(struct packet_fence),
146 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
147 [PACKET_NOP] = sizeof(struct packet_nop),
148 [PACKET_STOP] = sizeof(struct packet_stop),
149 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
150 [PACKET_WAIT] = sizeof(struct packet_wait),
151 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
154 static inline bool validate_packet_id(enum packet_id id)
156 switch (id) {
157 case PACKET_WREG_32:
158 case PACKET_WREG_BULK:
159 case PACKET_MSG_LONG:
160 case PACKET_MSG_SHORT:
161 case PACKET_CP_DMA:
162 case PACKET_REPEAT:
163 case PACKET_MSG_PROT:
164 case PACKET_FENCE:
165 case PACKET_LIN_DMA:
166 case PACKET_NOP:
167 case PACKET_STOP:
168 case PACKET_ARB_POINT:
169 case PACKET_WAIT:
170 case PACKET_LOAD_AND_EXE:
171 return true;
172 default:
173 return false;
177 static const char * const
178 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
179 "tpc_address_exceed_slm",
180 "tpc_div_by_0",
181 "tpc_spu_mac_overflow",
182 "tpc_spu_addsub_overflow",
183 "tpc_spu_abs_overflow",
184 "tpc_spu_fp_dst_nan_inf",
185 "tpc_spu_fp_dst_denorm",
186 "tpc_vpu_mac_overflow",
187 "tpc_vpu_addsub_overflow",
188 "tpc_vpu_abs_overflow",
189 "tpc_vpu_fp_dst_nan_inf",
190 "tpc_vpu_fp_dst_denorm",
191 "tpc_assertions",
192 "tpc_illegal_instruction",
193 "tpc_pc_wrap_around",
194 "tpc_qm_sw_err",
195 "tpc_hbw_rresp_err",
196 "tpc_hbw_bresp_err",
197 "tpc_lbw_rresp_err",
198 "tpc_lbw_bresp_err"
201 static const char * const
202 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
203 "PQ AXI HBW error",
204 "CQ AXI HBW error",
205 "CP AXI HBW error",
206 "CP error due to undefined OPCODE",
207 "CP encountered STOP OPCODE",
208 "CP AXI LBW error",
209 "CP WRREG32 or WRBULK returned error",
210 "N/A",
211 "FENCE 0 inc over max value and clipped",
212 "FENCE 1 inc over max value and clipped",
213 "FENCE 2 inc over max value and clipped",
214 "FENCE 3 inc over max value and clipped",
215 "FENCE 0 dec under min value and clipped",
216 "FENCE 1 dec under min value and clipped",
217 "FENCE 2 dec under min value and clipped",
218 "FENCE 3 dec under min value and clipped"
221 static const char * const
222 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
223 "Choice push while full error",
224 "Choice Q watchdog error",
225 "MSG AXI LBW returned with error"
228 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
229 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
230 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
231 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
232 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
233 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
234 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
235 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
236 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
237 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
238 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
239 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
240 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
241 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
242 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
243 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
344 struct ecc_info_extract_params {
345 u64 block_address;
346 u32 num_memories;
347 bool derr;
348 bool disable_clock_gating;
351 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
352 u64 phys_addr);
353 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
354 struct hl_cs_job *job);
355 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
356 u32 size, u64 val);
357 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
358 u32 tpc_id);
359 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
360 static int gaudi_cpucp_info_get(struct hl_device *hdev);
361 static void gaudi_disable_clock_gating(struct hl_device *hdev);
362 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
363 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
364 u32 size, bool eb);
365 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
366 struct hl_gen_wait_properties *prop);
368 static inline enum hl_collective_mode
369 get_collective_mode(struct hl_device *hdev, u32 queue_id)
371 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
372 return HL_COLLECTIVE_MASTER;
374 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
375 queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
376 return HL_COLLECTIVE_SLAVE;
378 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
379 queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
380 return HL_COLLECTIVE_SLAVE;
382 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
383 queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
384 return HL_COLLECTIVE_SLAVE;
386 return HL_COLLECTIVE_NOT_SUPPORTED;
389 static int gaudi_get_fixed_properties(struct hl_device *hdev)
391 struct asic_fixed_properties *prop = &hdev->asic_prop;
392 u32 num_sync_stream_queues = 0;
393 int i;
395 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
396 prop->hw_queues_props = kcalloc(prop->max_queues,
397 sizeof(struct hw_queue_properties),
398 GFP_KERNEL);
400 if (!prop->hw_queues_props)
401 return -ENOMEM;
403 for (i = 0 ; i < prop->max_queues ; i++) {
404 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
405 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
406 prop->hw_queues_props[i].driver_only = 0;
407 prop->hw_queues_props[i].supports_sync_stream = 1;
408 prop->hw_queues_props[i].cb_alloc_flags =
409 CB_ALLOC_KERNEL;
410 num_sync_stream_queues++;
411 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
412 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
413 prop->hw_queues_props[i].driver_only = 1;
414 prop->hw_queues_props[i].supports_sync_stream = 0;
415 prop->hw_queues_props[i].cb_alloc_flags =
416 CB_ALLOC_KERNEL;
417 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
418 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
419 prop->hw_queues_props[i].driver_only = 0;
420 prop->hw_queues_props[i].supports_sync_stream = 0;
421 prop->hw_queues_props[i].cb_alloc_flags =
422 CB_ALLOC_USER;
425 prop->hw_queues_props[i].collective_mode =
426 get_collective_mode(hdev, i);
429 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
430 prop->collective_first_sob = 0;
431 prop->collective_first_mon = 0;
433 /* 2 SOBs per internal queue stream are reserved for collective */
434 prop->sync_stream_first_sob =
435 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
436 * QMAN_STREAMS * HL_RSVD_SOBS;
438 /* 1 monitor per internal queue stream are reserved for collective
439 * 2 monitors per external queue stream are reserved for collective
441 prop->sync_stream_first_mon =
442 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
443 (NUMBER_OF_EXT_HW_QUEUES * 2);
445 prop->dram_base_address = DRAM_PHYS_BASE;
446 prop->dram_size = GAUDI_HBM_SIZE_32GB;
447 prop->dram_end_address = prop->dram_base_address +
448 prop->dram_size;
449 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
451 prop->sram_base_address = SRAM_BASE_ADDR;
452 prop->sram_size = SRAM_SIZE;
453 prop->sram_end_address = prop->sram_base_address +
454 prop->sram_size;
455 prop->sram_user_base_address = prop->sram_base_address +
456 SRAM_USER_BASE_OFFSET;
458 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
459 if (hdev->pldm)
460 prop->mmu_pgt_size = 0x800000; /* 8MB */
461 else
462 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
463 prop->mmu_pte_size = HL_PTE_SIZE;
464 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
465 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
466 prop->dram_page_size = PAGE_SIZE_2MB;
467 prop->dram_supports_virtual_memory = false;
469 prop->pmmu.hop0_shift = HOP0_SHIFT;
470 prop->pmmu.hop1_shift = HOP1_SHIFT;
471 prop->pmmu.hop2_shift = HOP2_SHIFT;
472 prop->pmmu.hop3_shift = HOP3_SHIFT;
473 prop->pmmu.hop4_shift = HOP4_SHIFT;
474 prop->pmmu.hop0_mask = HOP0_MASK;
475 prop->pmmu.hop1_mask = HOP1_MASK;
476 prop->pmmu.hop2_mask = HOP2_MASK;
477 prop->pmmu.hop3_mask = HOP3_MASK;
478 prop->pmmu.hop4_mask = HOP4_MASK;
479 prop->pmmu.start_addr = VA_HOST_SPACE_START;
480 prop->pmmu.end_addr =
481 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
482 prop->pmmu.page_size = PAGE_SIZE_4KB;
483 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
485 /* PMMU and HPMMU are the same except of page size */
486 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
487 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
489 /* shifts and masks are the same in PMMU and DMMU */
490 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
491 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
492 prop->dmmu.end_addr = VA_HOST_SPACE_END;
493 prop->dmmu.page_size = PAGE_SIZE_2MB;
495 prop->cfg_size = CFG_SIZE;
496 prop->max_asid = MAX_ASID;
497 prop->num_of_events = GAUDI_EVENT_SIZE;
498 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
500 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
502 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
503 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
505 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
506 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
508 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
509 CARD_NAME_MAX_LEN);
511 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
513 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
514 prop->sync_stream_first_sob +
515 (num_sync_stream_queues * HL_RSVD_SOBS);
516 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
517 prop->sync_stream_first_mon +
518 (num_sync_stream_queues * HL_RSVD_MONS);
520 /* disable fw security for now, set it in a later stage */
521 prop->fw_security_disabled = true;
522 prop->fw_security_status_valid = false;
523 prop->hard_reset_done_by_fw = false;
525 return 0;
528 static int gaudi_pci_bars_map(struct hl_device *hdev)
530 static const char * const name[] = {"SRAM", "CFG", "HBM"};
531 bool is_wc[3] = {false, false, true};
532 int rc;
534 rc = hl_pci_bars_map(hdev, name, is_wc);
535 if (rc)
536 return rc;
538 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
539 (CFG_BASE - SPI_FLASH_BASE_ADDR);
541 return 0;
544 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
546 struct gaudi_device *gaudi = hdev->asic_specific;
547 struct hl_inbound_pci_region pci_region;
548 u64 old_addr = addr;
549 int rc;
551 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
552 return old_addr;
554 /* Inbound Region 2 - Bar 4 - Point to HBM */
555 pci_region.mode = PCI_BAR_MATCH_MODE;
556 pci_region.bar = HBM_BAR_ID;
557 pci_region.addr = addr;
558 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
559 if (rc)
560 return U64_MAX;
562 if (gaudi) {
563 old_addr = gaudi->hbm_bar_cur_addr;
564 gaudi->hbm_bar_cur_addr = addr;
567 return old_addr;
570 static int gaudi_init_iatu(struct hl_device *hdev)
572 struct hl_inbound_pci_region inbound_region;
573 struct hl_outbound_pci_region outbound_region;
574 int rc;
576 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
577 inbound_region.mode = PCI_BAR_MATCH_MODE;
578 inbound_region.bar = SRAM_BAR_ID;
579 inbound_region.addr = SRAM_BASE_ADDR;
580 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
581 if (rc)
582 goto done;
584 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
585 inbound_region.mode = PCI_BAR_MATCH_MODE;
586 inbound_region.bar = CFG_BAR_ID;
587 inbound_region.addr = SPI_FLASH_BASE_ADDR;
588 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
589 if (rc)
590 goto done;
592 /* Inbound Region 2 - Bar 4 - Point to HBM */
593 inbound_region.mode = PCI_BAR_MATCH_MODE;
594 inbound_region.bar = HBM_BAR_ID;
595 inbound_region.addr = DRAM_PHYS_BASE;
596 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
597 if (rc)
598 goto done;
600 hdev->asic_funcs->set_dma_mask_from_fw(hdev);
602 /* Outbound Region 0 - Point to Host */
603 outbound_region.addr = HOST_PHYS_BASE;
604 outbound_region.size = HOST_PHYS_SIZE;
605 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
607 done:
608 return rc;
611 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
613 return RREG32(mmHW_STATE);
616 static int gaudi_early_init(struct hl_device *hdev)
618 struct asic_fixed_properties *prop = &hdev->asic_prop;
619 struct pci_dev *pdev = hdev->pdev;
620 int rc;
622 rc = gaudi_get_fixed_properties(hdev);
623 if (rc) {
624 dev_err(hdev->dev, "Failed to get fixed properties\n");
625 return rc;
628 /* Check BAR sizes */
629 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
630 dev_err(hdev->dev,
631 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
632 SRAM_BAR_ID,
633 (unsigned long long) pci_resource_len(pdev,
634 SRAM_BAR_ID),
635 SRAM_BAR_SIZE);
636 rc = -ENODEV;
637 goto free_queue_props;
640 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
641 dev_err(hdev->dev,
642 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
643 CFG_BAR_ID,
644 (unsigned long long) pci_resource_len(pdev,
645 CFG_BAR_ID),
646 CFG_BAR_SIZE);
647 rc = -ENODEV;
648 goto free_queue_props;
651 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
653 rc = hl_pci_init(hdev);
654 if (rc)
655 goto free_queue_props;
657 /* Before continuing in the initialization, we need to read the preboot
658 * version to determine whether we run with a security-enabled firmware
660 rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
661 mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
662 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
663 if (rc) {
664 if (hdev->reset_on_preboot_fail)
665 hdev->asic_funcs->hw_fini(hdev, true);
666 goto pci_fini;
669 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
670 dev_info(hdev->dev,
671 "H/W state is dirty, must reset before initializing\n");
672 hdev->asic_funcs->hw_fini(hdev, true);
675 return 0;
677 pci_fini:
678 hl_pci_fini(hdev);
679 free_queue_props:
680 kfree(hdev->asic_prop.hw_queues_props);
681 return rc;
684 static int gaudi_early_fini(struct hl_device *hdev)
686 kfree(hdev->asic_prop.hw_queues_props);
687 hl_pci_fini(hdev);
689 return 0;
693 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
695 * @hdev: pointer to hl_device structure
698 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
700 struct asic_fixed_properties *prop = &hdev->asic_prop;
701 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
702 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
703 int rc;
705 if (hdev->asic_prop.fw_security_disabled) {
706 /* Backward compatibility */
707 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
708 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
709 nr = RREG32(mmPSOC_CPU_PLL_NR);
710 nf = RREG32(mmPSOC_CPU_PLL_NF);
711 od = RREG32(mmPSOC_CPU_PLL_OD);
713 if (div_sel == DIV_SEL_REF_CLK ||
714 div_sel == DIV_SEL_DIVIDED_REF) {
715 if (div_sel == DIV_SEL_REF_CLK)
716 freq = PLL_REF_CLK;
717 else
718 freq = PLL_REF_CLK / (div_fctr + 1);
719 } else if (div_sel == DIV_SEL_PLL_CLK ||
720 div_sel == DIV_SEL_DIVIDED_PLL) {
721 pll_clk = PLL_REF_CLK * (nf + 1) /
722 ((nr + 1) * (od + 1));
723 if (div_sel == DIV_SEL_PLL_CLK)
724 freq = pll_clk;
725 else
726 freq = pll_clk / (div_fctr + 1);
727 } else {
728 dev_warn(hdev->dev,
729 "Received invalid div select value: %d",
730 div_sel);
731 freq = 0;
733 } else {
734 rc = hl_fw_cpucp_pll_info_get(hdev, CPU_PLL, pll_freq_arr);
736 if (rc)
737 return rc;
739 freq = pll_freq_arr[2];
742 prop->psoc_timestamp_frequency = freq;
743 prop->psoc_pci_pll_nr = nr;
744 prop->psoc_pci_pll_nf = nf;
745 prop->psoc_pci_pll_od = od;
746 prop->psoc_pci_pll_div_factor = div_fctr;
748 return 0;
751 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
752 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
754 struct asic_fixed_properties *prop = &hdev->asic_prop;
755 struct packet_lin_dma *init_tpc_mem_pkt;
756 struct hl_cs_job *job;
757 struct hl_cb *cb;
758 u64 dst_addr;
759 u32 cb_size, ctl;
760 u8 tpc_id;
761 int rc;
763 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
764 if (!cb)
765 return -EFAULT;
767 init_tpc_mem_pkt = cb->kernel_address;
768 cb_size = sizeof(*init_tpc_mem_pkt);
769 memset(init_tpc_mem_pkt, 0, cb_size);
771 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
773 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
774 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
775 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
776 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
778 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
780 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
781 dst_addr = (prop->sram_user_base_address &
782 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
783 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
784 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
786 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
787 if (!job) {
788 dev_err(hdev->dev, "Failed to allocate a new job\n");
789 rc = -ENOMEM;
790 goto release_cb;
793 job->id = 0;
794 job->user_cb = cb;
795 atomic_inc(&job->user_cb->cs_cnt);
796 job->user_cb_size = cb_size;
797 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
798 job->patched_cb = job->user_cb;
799 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
801 hl_debugfs_add_job(hdev, job);
803 rc = gaudi_send_job_on_qman0(hdev, job);
805 if (rc)
806 goto free_job;
808 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
809 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
810 if (rc)
811 break;
814 free_job:
815 hl_userptr_delete_list(hdev, &job->userptr_list);
816 hl_debugfs_remove_job(hdev, job);
817 kfree(job);
818 atomic_dec(&cb->cs_cnt);
820 release_cb:
821 hl_cb_put(cb);
822 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
824 return rc;
828 * gaudi_init_tpc_mem() - Initialize TPC memories.
829 * @hdev: Pointer to hl_device structure.
831 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
833 * Return: 0 for success, negative value for error.
835 static int gaudi_init_tpc_mem(struct hl_device *hdev)
837 const struct firmware *fw;
838 size_t fw_size;
839 void *cpu_addr;
840 dma_addr_t dma_handle;
841 int rc, count = 5;
843 again:
844 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
845 if (rc == -EINTR && count-- > 0) {
846 msleep(50);
847 goto again;
850 if (rc) {
851 dev_err(hdev->dev, "Failed to load firmware file %s\n",
852 GAUDI_TPC_FW_FILE);
853 goto out;
856 fw_size = fw->size;
857 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
858 &dma_handle, GFP_KERNEL | __GFP_ZERO);
859 if (!cpu_addr) {
860 dev_err(hdev->dev,
861 "Failed to allocate %zu of dma memory for TPC kernel\n",
862 fw_size);
863 rc = -ENOMEM;
864 goto out;
867 memcpy(cpu_addr, fw->data, fw_size);
869 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
871 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
872 dma_handle);
874 out:
875 release_firmware(fw);
876 return rc;
879 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
881 struct gaudi_device *gaudi = hdev->asic_specific;
882 struct gaudi_collective_properties *prop = &gaudi->collective_props;
883 struct hl_hw_queue *q;
884 u32 i, sob_id, sob_group_id, queue_id;
886 /* Iterate through SOB groups and assign a SOB for each slave queue */
887 sob_group_id =
888 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
889 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
891 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
892 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
893 q = &hdev->kernel_queues[queue_id + (4 * i)];
894 q->sync_stream_prop.collective_sob_id = sob_id + i;
897 /* Both DMA5 and TPC7 use the same resources since only a single
898 * engine need to participate in the reduction process
900 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
901 q = &hdev->kernel_queues[queue_id];
902 q->sync_stream_prop.collective_sob_id =
903 sob_id + NIC_NUMBER_OF_ENGINES;
905 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
906 q = &hdev->kernel_queues[queue_id];
907 q->sync_stream_prop.collective_sob_id =
908 sob_id + NIC_NUMBER_OF_ENGINES;
911 static void gaudi_sob_group_hw_reset(struct kref *ref)
913 struct gaudi_hw_sob_group *hw_sob_group =
914 container_of(ref, struct gaudi_hw_sob_group, kref);
915 struct hl_device *hdev = hw_sob_group->hdev;
916 int i;
918 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
919 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
920 (hw_sob_group->base_sob_id + i) * 4, 0);
922 kref_init(&hw_sob_group->kref);
925 static void gaudi_sob_group_reset_error(struct kref *ref)
927 struct gaudi_hw_sob_group *hw_sob_group =
928 container_of(ref, struct gaudi_hw_sob_group, kref);
929 struct hl_device *hdev = hw_sob_group->hdev;
931 dev_crit(hdev->dev,
932 "SOB release shouldn't be called here, base_sob_id: %d\n",
933 hw_sob_group->base_sob_id);
936 static int gaudi_collective_init(struct hl_device *hdev)
938 u32 i, master_monitor_sobs, sob_id, reserved_sobs_per_group;
939 struct gaudi_collective_properties *prop;
940 struct gaudi_device *gaudi;
942 gaudi = hdev->asic_specific;
943 prop = &gaudi->collective_props;
944 sob_id = hdev->asic_prop.collective_first_sob;
946 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
947 reserved_sobs_per_group =
948 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
950 /* Init SOB groups */
951 for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
952 prop->hw_sob_group[i].hdev = hdev;
953 prop->hw_sob_group[i].base_sob_id = sob_id;
954 sob_id += reserved_sobs_per_group;
955 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
958 for (i = 0 ; i < QMAN_STREAMS; i++) {
959 prop->next_sob_group_val[i] = 1;
960 prop->curr_sob_group_idx[i] = 0;
961 gaudi_collective_map_sobs(hdev, i);
964 prop->mstr_sob_mask[0] = 0;
965 master_monitor_sobs = HL_MAX_SOBS_PER_MONITOR;
966 for (i = 0 ; i < master_monitor_sobs ; i++)
967 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
968 prop->mstr_sob_mask[0] |= BIT(i);
970 prop->mstr_sob_mask[1] = 0;
971 master_monitor_sobs =
972 NIC_NUMBER_OF_ENGINES - HL_MAX_SOBS_PER_MONITOR;
973 for (i = 0 ; i < master_monitor_sobs; i++) {
974 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
975 prop->mstr_sob_mask[1] |= BIT(i);
978 /* Set collective engine bit */
979 prop->mstr_sob_mask[1] |= BIT(i);
981 return 0;
984 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
986 struct gaudi_device *gaudi = hdev->asic_specific;
987 struct gaudi_collective_properties *cprop = &gaudi->collective_props;
989 kref_put(&cprop->hw_sob_group[sob_group].kref,
990 gaudi_sob_group_hw_reset);
993 static void gaudi_collective_master_init_job(struct hl_device *hdev,
994 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
996 u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
997 struct gaudi_collective_properties *cprop;
998 struct hl_gen_wait_properties wait_prop;
999 struct hl_sync_stream_properties *prop;
1000 struct gaudi_device *gaudi;
1002 gaudi = hdev->asic_specific;
1003 cprop = &gaudi->collective_props;
1004 queue_id = job->hw_queue_id;
1005 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1007 master_sob_base =
1008 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1009 master_monitor = prop->collective_mstr_mon_id[0];
1011 dev_dbg(hdev->dev,
1012 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1013 master_sob_base, cprop->mstr_sob_mask[0],
1014 cprop->next_sob_group_val[stream],
1015 master_monitor, queue_id);
1017 wait_prop.data = (void *) job->patched_cb;
1018 wait_prop.sob_base = master_sob_base;
1019 wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1020 wait_prop.sob_val = cprop->next_sob_group_val[stream];
1021 wait_prop.mon_id = master_monitor;
1022 wait_prop.q_idx = queue_id;
1023 wait_prop.size = cb_size;
1024 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1026 master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1027 master_monitor = prop->collective_mstr_mon_id[1];
1029 dev_dbg(hdev->dev,
1030 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1031 master_sob_base, cprop->mstr_sob_mask[1],
1032 cprop->next_sob_group_val[stream],
1033 master_monitor, queue_id);
1035 wait_prop.sob_base = master_sob_base;
1036 wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1037 wait_prop.mon_id = master_monitor;
1038 wait_prop.size = cb_size;
1039 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1042 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1043 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1045 struct hl_gen_wait_properties wait_prop;
1046 struct hl_sync_stream_properties *prop;
1047 u32 queue_id, cb_size = 0;
1049 queue_id = job->hw_queue_id;
1050 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1052 /* Add to wait CBs using slave monitor */
1053 wait_prop.data = (void *) job->user_cb;
1054 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1055 wait_prop.sob_mask = 0x1;
1056 wait_prop.sob_val = cs_cmpl->sob_val;
1057 wait_prop.mon_id = prop->collective_slave_mon_id;
1058 wait_prop.q_idx = queue_id;
1059 wait_prop.size = cb_size;
1061 dev_dbg(hdev->dev,
1062 "Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
1063 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1064 prop->collective_slave_mon_id, queue_id);
1066 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1068 dev_dbg(hdev->dev,
1069 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1070 prop->collective_sob_id, queue_id);
1072 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1073 prop->collective_sob_id, cb_size, false);
1076 static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
1078 struct hl_cs_compl *signal_cs_cmpl =
1079 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1080 struct hl_cs_compl *cs_cmpl =
1081 container_of(cs->fence, struct hl_cs_compl, base_fence);
1082 struct gaudi_collective_properties *cprop;
1083 u32 stream, queue_id, sob_group_offset;
1084 struct gaudi_device *gaudi;
1085 struct hl_device *hdev;
1086 struct hl_cs_job *job;
1087 struct hl_ctx *ctx;
1089 ctx = cs->ctx;
1090 hdev = ctx->hdev;
1091 gaudi = hdev->asic_specific;
1092 cprop = &gaudi->collective_props;
1094 /* copy the SOB id and value of the signal CS */
1095 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1096 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1098 /* Calculate the stream from collective master queue (1st job) */
1099 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1100 stream = job->hw_queue_id % 4;
1101 sob_group_offset =
1102 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1104 list_for_each_entry(job, &cs->job_list, cs_node) {
1105 queue_id = job->hw_queue_id;
1107 if (hdev->kernel_queues[queue_id].collective_mode ==
1108 HL_COLLECTIVE_MASTER)
1109 gaudi_collective_master_init_job(hdev, job, stream,
1110 sob_group_offset);
1111 else
1112 gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1115 cs_cmpl->sob_group = sob_group_offset;
1117 /* Handle sob group kref and wraparound */
1118 kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1119 cprop->next_sob_group_val[stream]++;
1121 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1123 * Decrement as we reached the max value.
1124 * The release function won't be called here as we've
1125 * just incremented the refcount.
1127 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1128 gaudi_sob_group_reset_error);
1129 cprop->next_sob_group_val[stream] = 1;
1130 /* only two SOBs are currently in use */
1131 cprop->curr_sob_group_idx[stream] =
1132 (cprop->curr_sob_group_idx[stream] + 1) &
1133 (HL_RSVD_SOBS - 1);
1135 gaudi_collective_map_sobs(hdev, stream);
1137 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1138 cprop->curr_sob_group_idx[stream], stream);
1141 /* Increment kref since all slave queues are now waiting on it */
1142 kref_get(&cs_cmpl->hw_sob->kref);
1144 * Must put the signal fence after the SOB refcnt increment so
1145 * the SOB refcnt won't turn 0 and reset the SOB before the
1146 * wait CS was submitted.
1148 mb();
1149 hl_fence_put(cs->signal_fence);
1150 cs->signal_fence = NULL;
1153 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1154 struct hl_ctx *ctx, struct hl_cs *cs,
1155 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
1157 struct hw_queue_properties *hw_queue_prop;
1158 struct hl_cs_counters_atomic *cntr;
1159 struct hl_cs_job *job;
1160 struct hl_cb *cb;
1161 u32 cb_size;
1162 bool patched_cb;
1164 cntr = &hdev->aggregated_cs_counters;
1166 if (mode == HL_COLLECTIVE_MASTER) {
1167 /* CB size of collective master queue contains
1168 * 4 msg short packets for monitor 1 configuration
1169 * 1 fence packet
1170 * 4 msg short packets for monitor 2 configuration
1171 * 1 fence packet
1172 * 2 msg prot packets for completion and MSI-X
1174 cb_size = sizeof(struct packet_msg_short) * 8 +
1175 sizeof(struct packet_fence) * 2 +
1176 sizeof(struct packet_msg_prot) * 2;
1177 patched_cb = true;
1178 } else {
1179 /* CB size of collective slave queues contains
1180 * 4 msg short packets for monitor configuration
1181 * 1 fence packet
1182 * 1 additional msg short packet for sob signal
1184 cb_size = sizeof(struct packet_msg_short) * 5 +
1185 sizeof(struct packet_fence);
1186 patched_cb = false;
1189 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1190 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1191 if (!job) {
1192 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1193 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1194 dev_err(hdev->dev, "Failed to allocate a new job\n");
1195 return -ENOMEM;
1198 /* Allocate internal mapped CB for non patched CBs */
1199 cb = hl_cb_kernel_create(hdev, cb_size,
1200 hdev->mmu_enable && !patched_cb);
1201 if (!cb) {
1202 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1203 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1204 kfree(job);
1205 return -EFAULT;
1208 job->id = 0;
1209 job->cs = cs;
1210 job->user_cb = cb;
1211 atomic_inc(&job->user_cb->cs_cnt);
1212 job->user_cb_size = cb_size;
1213 job->hw_queue_id = queue_id;
1216 * No need in parsing, user CB is the patched CB.
1217 * We call hl_cb_destroy() out of two reasons - we don't need
1218 * the CB in the CB idr anymore and to decrement its refcount as
1219 * it was incremented inside hl_cb_kernel_create().
1221 if (patched_cb)
1222 job->patched_cb = job->user_cb;
1223 else
1224 job->patched_cb = NULL;
1226 job->job_cb_size = job->user_cb_size;
1227 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1229 /* increment refcount as for external queues we get completion */
1230 if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1231 cs_get(cs);
1233 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1235 list_add_tail(&job->cs_node, &cs->job_list);
1237 hl_debugfs_add_job(hdev, job);
1239 return 0;
1242 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1243 struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
1244 u32 collective_engine_id)
1246 struct gaudi_device *gaudi = hdev->asic_specific;
1247 struct hw_queue_properties *hw_queue_prop;
1248 u32 queue_id, collective_queue, num_jobs;
1249 u32 stream, nic_queue, nic_idx = 0;
1250 bool skip;
1251 int i, rc;
1253 /* Verify wait queue id is configured as master */
1254 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1255 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1256 dev_err(hdev->dev,
1257 "Queue %d is not configured as collective master\n",
1258 wait_queue_id);
1259 return -EINVAL;
1262 /* Verify engine id is supported */
1263 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1264 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1265 dev_err(hdev->dev,
1266 "Collective wait does not support engine %u\n",
1267 collective_engine_id);
1268 return -EINVAL;
1271 stream = wait_queue_id % 4;
1273 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1274 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1275 else
1276 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1278 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1279 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1281 /* First job goes to the collective master queue, it will wait for
1282 * the collective slave queues to finish execution.
1283 * The synchronization is done using two monitors:
1284 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1285 * reduction engine (DMA5/TPC7).
1287 * Rest of the jobs goes to the collective slave queues which will
1288 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1290 for (i = 0 ; i < num_jobs ; i++) {
1291 if (i == 0) {
1292 queue_id = wait_queue_id;
1293 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1294 HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
1295 } else {
1296 if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1297 if (gaudi->hw_cap_initialized &
1298 BIT(HW_CAP_NIC_SHIFT + nic_idx))
1299 skip = false;
1300 else
1301 skip = true;
1303 queue_id = nic_queue;
1304 nic_queue += 4;
1305 nic_idx++;
1307 if (skip)
1308 continue;
1309 } else {
1310 queue_id = collective_queue;
1313 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1314 HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
1317 if (rc)
1318 return rc;
1321 return rc;
1324 static int gaudi_late_init(struct hl_device *hdev)
1326 struct gaudi_device *gaudi = hdev->asic_specific;
1327 int rc;
1329 rc = gaudi->cpucp_info_get(hdev);
1330 if (rc) {
1331 dev_err(hdev->dev, "Failed to get cpucp info\n");
1332 return rc;
1335 if ((hdev->card_type == cpucp_card_type_pci) &&
1336 (hdev->nic_ports_mask & 0x3)) {
1337 dev_info(hdev->dev,
1338 "PCI card detected, only 8 ports are enabled\n");
1339 hdev->nic_ports_mask &= ~0x3;
1341 /* Stop and disable unused NIC QMANs */
1342 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1343 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1344 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1346 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1347 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1348 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1350 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1351 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1353 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1356 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1357 if (rc) {
1358 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1359 return rc;
1362 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
1364 rc = gaudi_fetch_psoc_frequency(hdev);
1365 if (rc) {
1366 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1367 goto disable_pci_access;
1370 rc = gaudi_mmu_clear_pgt_range(hdev);
1371 if (rc) {
1372 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1373 goto disable_pci_access;
1376 rc = gaudi_init_tpc_mem(hdev);
1377 if (rc) {
1378 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1379 goto disable_pci_access;
1382 rc = gaudi_collective_init(hdev);
1383 if (rc) {
1384 dev_err(hdev->dev, "Failed to init collective\n");
1385 goto disable_pci_access;
1388 return 0;
1390 disable_pci_access:
1391 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1393 return rc;
1396 static void gaudi_late_fini(struct hl_device *hdev)
1398 const struct hwmon_channel_info **channel_info_arr;
1399 int i = 0;
1401 if (!hdev->hl_chip_info->info)
1402 return;
1404 channel_info_arr = hdev->hl_chip_info->info;
1406 while (channel_info_arr[i]) {
1407 kfree(channel_info_arr[i]->config);
1408 kfree(channel_info_arr[i]);
1409 i++;
1412 kfree(channel_info_arr);
1414 hdev->hl_chip_info->info = NULL;
1417 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1419 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1420 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1421 int i, j, rc = 0;
1424 * The device CPU works with 40-bits addresses, while bit 39 must be set
1425 * to '1' when accessing the host.
1426 * Bits 49:39 of the full host address are saved for a later
1427 * configuration of the HW to perform extension to 50 bits.
1428 * Because there is a single HW register that holds the extension bits,
1429 * these bits must be identical in all allocated range.
1432 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1433 virt_addr_arr[i] =
1434 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1435 HL_CPU_ACCESSIBLE_MEM_SIZE,
1436 &dma_addr_arr[i],
1437 GFP_KERNEL | __GFP_ZERO);
1438 if (!virt_addr_arr[i]) {
1439 rc = -ENOMEM;
1440 goto free_dma_mem_arr;
1443 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1444 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1445 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1446 break;
1449 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1450 dev_err(hdev->dev,
1451 "MSB of CPU accessible DMA memory are not identical in all range\n");
1452 rc = -EFAULT;
1453 goto free_dma_mem_arr;
1456 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1457 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1458 hdev->cpu_pci_msb_addr =
1459 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1461 if (hdev->asic_prop.fw_security_disabled)
1462 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1464 free_dma_mem_arr:
1465 for (j = 0 ; j < i ; j++)
1466 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1467 HL_CPU_ACCESSIBLE_MEM_SIZE,
1468 virt_addr_arr[j],
1469 dma_addr_arr[j]);
1471 return rc;
1474 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1476 struct gaudi_device *gaudi = hdev->asic_specific;
1477 struct gaudi_internal_qman_info *q;
1478 u32 i;
1480 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1481 q = &gaudi->internal_qmans[i];
1482 if (!q->pq_kernel_addr)
1483 continue;
1484 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1485 q->pq_kernel_addr,
1486 q->pq_dma_addr);
1490 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1492 struct gaudi_device *gaudi = hdev->asic_specific;
1493 struct gaudi_internal_qman_info *q;
1494 int rc, i;
1496 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1497 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1498 continue;
1500 q = &gaudi->internal_qmans[i];
1502 switch (i) {
1503 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1504 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1505 break;
1506 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1507 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1508 break;
1509 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1510 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1511 break;
1512 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1513 q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1514 break;
1515 default:
1516 dev_err(hdev->dev, "Bad internal queue index %d", i);
1517 rc = -EINVAL;
1518 goto free_internal_qmans_pq_mem;
1521 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1522 hdev, q->pq_size,
1523 &q->pq_dma_addr,
1524 GFP_KERNEL | __GFP_ZERO);
1525 if (!q->pq_kernel_addr) {
1526 rc = -ENOMEM;
1527 goto free_internal_qmans_pq_mem;
1531 return 0;
1533 free_internal_qmans_pq_mem:
1534 gaudi_free_internal_qmans_pq_mem(hdev);
1535 return rc;
1538 static int gaudi_sw_init(struct hl_device *hdev)
1540 struct gaudi_device *gaudi;
1541 u32 i, event_id = 0;
1542 int rc;
1544 /* Allocate device structure */
1545 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1546 if (!gaudi)
1547 return -ENOMEM;
1549 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1550 if (gaudi_irq_map_table[i].valid) {
1551 if (event_id == GAUDI_EVENT_SIZE) {
1552 dev_err(hdev->dev,
1553 "Event array exceeds the limit of %u events\n",
1554 GAUDI_EVENT_SIZE);
1555 rc = -EINVAL;
1556 goto free_gaudi_device;
1559 gaudi->events[event_id++] =
1560 gaudi_irq_map_table[i].fc_id;
1564 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1566 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1568 hdev->asic_specific = gaudi;
1570 /* Create DMA pool for small allocations */
1571 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1572 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1573 if (!hdev->dma_pool) {
1574 dev_err(hdev->dev, "failed to create DMA pool\n");
1575 rc = -ENOMEM;
1576 goto free_gaudi_device;
1579 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1580 if (rc)
1581 goto free_dma_pool;
1583 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1584 if (!hdev->cpu_accessible_dma_pool) {
1585 dev_err(hdev->dev,
1586 "Failed to create CPU accessible DMA pool\n");
1587 rc = -ENOMEM;
1588 goto free_cpu_dma_mem;
1591 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1592 (uintptr_t) hdev->cpu_accessible_dma_mem,
1593 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1594 if (rc) {
1595 dev_err(hdev->dev,
1596 "Failed to add memory to CPU accessible DMA pool\n");
1597 rc = -EFAULT;
1598 goto free_cpu_accessible_dma_pool;
1601 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1602 if (rc)
1603 goto free_cpu_accessible_dma_pool;
1605 spin_lock_init(&gaudi->hw_queues_lock);
1606 mutex_init(&gaudi->clk_gate_mutex);
1608 hdev->supports_sync_stream = true;
1609 hdev->supports_coresight = true;
1611 return 0;
1613 free_cpu_accessible_dma_pool:
1614 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1615 free_cpu_dma_mem:
1616 if (hdev->asic_prop.fw_security_disabled)
1617 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1618 hdev->cpu_pci_msb_addr);
1619 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1620 HL_CPU_ACCESSIBLE_MEM_SIZE,
1621 hdev->cpu_accessible_dma_mem,
1622 hdev->cpu_accessible_dma_address);
1623 free_dma_pool:
1624 dma_pool_destroy(hdev->dma_pool);
1625 free_gaudi_device:
1626 kfree(gaudi);
1627 return rc;
1630 static int gaudi_sw_fini(struct hl_device *hdev)
1632 struct gaudi_device *gaudi = hdev->asic_specific;
1634 gaudi_free_internal_qmans_pq_mem(hdev);
1636 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1638 if (hdev->asic_prop.fw_security_disabled)
1639 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1640 hdev->cpu_pci_msb_addr);
1642 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1643 HL_CPU_ACCESSIBLE_MEM_SIZE,
1644 hdev->cpu_accessible_dma_mem,
1645 hdev->cpu_accessible_dma_address);
1647 dma_pool_destroy(hdev->dma_pool);
1649 mutex_destroy(&gaudi->clk_gate_mutex);
1651 kfree(gaudi);
1653 return 0;
1656 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1658 struct hl_device *hdev = arg;
1659 int i;
1661 if (hdev->disabled)
1662 return IRQ_HANDLED;
1664 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1665 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1667 hl_irq_handler_eq(irq, &hdev->event_queue);
1669 return IRQ_HANDLED;
1673 * For backward compatibility, new MSI interrupts should be set after the
1674 * existing CPU and NIC interrupts.
1676 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1677 bool cpu_eq)
1679 int msi_vec;
1681 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1682 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1683 GAUDI_EVENT_QUEUE_MSI_IDX);
1685 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1686 (nr + NIC_NUMBER_OF_ENGINES + 1);
1688 return pci_irq_vector(hdev->pdev, msi_vec);
1691 static int gaudi_enable_msi_single(struct hl_device *hdev)
1693 int rc, irq;
1695 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1697 irq = gaudi_pci_irq_vector(hdev, 0, false);
1698 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1699 "gaudi single msi", hdev);
1700 if (rc)
1701 dev_err(hdev->dev,
1702 "Failed to request single MSI IRQ\n");
1704 return rc;
1707 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1709 int cq_cnt = hdev->asic_prop.completion_queues_count;
1710 int rc, i, irq_cnt_init, irq;
1712 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1713 irq = gaudi_pci_irq_vector(hdev, i, false);
1714 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1715 &hdev->completion_queue[i]);
1716 if (rc) {
1717 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1718 goto free_irqs;
1722 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1723 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1724 &hdev->event_queue);
1725 if (rc) {
1726 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1727 goto free_irqs;
1730 return 0;
1732 free_irqs:
1733 for (i = 0 ; i < irq_cnt_init ; i++)
1734 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1735 &hdev->completion_queue[i]);
1736 return rc;
1739 static int gaudi_enable_msi(struct hl_device *hdev)
1741 struct gaudi_device *gaudi = hdev->asic_specific;
1742 int rc;
1744 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1745 return 0;
1747 rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
1748 PCI_IRQ_MSI);
1749 if (rc < 0) {
1750 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1751 return rc;
1754 if (rc < NUMBER_OF_INTERRUPTS) {
1755 gaudi->multi_msi_mode = false;
1756 rc = gaudi_enable_msi_single(hdev);
1757 } else {
1758 gaudi->multi_msi_mode = true;
1759 rc = gaudi_enable_msi_multi(hdev);
1762 if (rc)
1763 goto free_pci_irq_vectors;
1765 gaudi->hw_cap_initialized |= HW_CAP_MSI;
1767 return 0;
1769 free_pci_irq_vectors:
1770 pci_free_irq_vectors(hdev->pdev);
1771 return rc;
1774 static void gaudi_sync_irqs(struct hl_device *hdev)
1776 struct gaudi_device *gaudi = hdev->asic_specific;
1777 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1779 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1780 return;
1782 /* Wait for all pending IRQs to be finished */
1783 if (gaudi->multi_msi_mode) {
1784 for (i = 0 ; i < cq_cnt ; i++)
1785 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1787 synchronize_irq(gaudi_pci_irq_vector(hdev,
1788 GAUDI_EVENT_QUEUE_MSI_IDX,
1789 true));
1790 } else {
1791 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1795 static void gaudi_disable_msi(struct hl_device *hdev)
1797 struct gaudi_device *gaudi = hdev->asic_specific;
1798 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1800 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1801 return;
1803 gaudi_sync_irqs(hdev);
1805 if (gaudi->multi_msi_mode) {
1806 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1807 true);
1808 free_irq(irq, &hdev->event_queue);
1810 for (i = 0 ; i < cq_cnt ; i++) {
1811 irq = gaudi_pci_irq_vector(hdev, i, false);
1812 free_irq(irq, &hdev->completion_queue[i]);
1814 } else {
1815 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1818 pci_free_irq_vectors(hdev->pdev);
1820 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1823 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1825 struct gaudi_device *gaudi = hdev->asic_specific;
1827 if (!hdev->asic_prop.fw_security_disabled)
1828 return;
1830 if (hdev->asic_prop.fw_security_status_valid &&
1831 (hdev->asic_prop.fw_app_security_map &
1832 CPU_BOOT_DEV_STS0_SRAM_SCR_EN))
1833 return;
1835 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1836 return;
1838 if (!hdev->sram_scrambler_enable)
1839 return;
1841 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1842 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1843 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1844 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1845 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1846 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1847 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1848 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1849 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1850 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1851 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1852 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1853 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1854 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1855 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1856 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1858 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1859 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1860 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1861 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1862 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1863 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1864 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1865 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1866 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1867 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1868 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1869 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1870 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1871 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1872 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1873 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1875 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1876 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1877 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1878 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1879 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1880 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1881 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1882 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1883 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1884 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1885 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1886 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1887 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1888 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1889 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1890 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1892 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1895 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1897 struct gaudi_device *gaudi = hdev->asic_specific;
1899 if (!hdev->asic_prop.fw_security_disabled)
1900 return;
1902 if (hdev->asic_prop.fw_security_status_valid &&
1903 (hdev->asic_prop.fw_boot_cpu_security_map &
1904 CPU_BOOT_DEV_STS0_DRAM_SCR_EN))
1905 return;
1907 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1908 return;
1910 if (!hdev->dram_scrambler_enable)
1911 return;
1913 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1914 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1915 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1916 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1917 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1918 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1919 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1920 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1921 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1922 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1923 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1924 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1925 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1926 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1927 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1928 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1930 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1931 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1932 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1933 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1934 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1935 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1936 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1937 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1938 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1939 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1940 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1941 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1942 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1943 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1944 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1945 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1947 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1948 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1949 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1950 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1951 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1952 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1953 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1954 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1955 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1956 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1957 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1958 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1959 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
1960 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1961 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
1962 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1964 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
1967 static void gaudi_init_e2e(struct hl_device *hdev)
1969 if (!hdev->asic_prop.fw_security_disabled)
1970 return;
1972 if (hdev->asic_prop.fw_security_status_valid &&
1973 (hdev->asic_prop.fw_boot_cpu_security_map &
1974 CPU_BOOT_DEV_STS0_E2E_CRED_EN))
1975 return;
1977 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
1978 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
1979 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
1980 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
1982 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1983 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1984 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1985 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1987 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1988 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1989 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1990 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1992 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1993 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1994 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1995 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1997 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1998 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1999 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2000 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2002 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2003 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2004 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2005 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2007 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2008 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2009 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2010 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2012 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2013 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2014 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2015 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2017 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2018 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2019 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2020 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2022 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2023 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2024 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2025 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2027 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2028 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2029 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2030 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2032 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2033 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2034 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2035 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2037 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2038 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2039 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2040 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2042 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2043 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2044 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2045 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2047 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2048 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2049 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2050 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2052 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2053 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2054 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2055 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2057 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2058 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2059 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2060 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2062 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2063 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2064 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2065 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2067 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2068 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2069 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2070 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2072 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2073 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2074 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2075 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2077 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2078 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2079 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2080 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2082 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2083 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2084 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2085 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2087 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2088 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2089 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2090 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2092 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2093 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2094 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2095 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2097 if (!hdev->dram_scrambler_enable) {
2098 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2099 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2100 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2101 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2103 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2104 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2105 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2106 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2108 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2109 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2110 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2111 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2113 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2114 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2115 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2116 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2118 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2119 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2120 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2121 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2123 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2124 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2125 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2126 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2128 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2129 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2130 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2131 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2133 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2134 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2135 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2136 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2138 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2139 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2140 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2141 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2143 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2144 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2145 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2146 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2148 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2149 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2150 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2151 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2153 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2154 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2155 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2156 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2158 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2159 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2160 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2161 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2163 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2164 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2165 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2166 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2168 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2169 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2170 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2171 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2173 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2174 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2175 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2176 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2178 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2179 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2180 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2181 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2183 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2184 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2185 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2186 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2188 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2189 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2190 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2191 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2193 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2194 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2195 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2196 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2198 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2199 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2200 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2201 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2203 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2204 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2205 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2206 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2208 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2209 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2210 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2211 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2213 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2214 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2215 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2216 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2219 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2220 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2221 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2222 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2224 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2225 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2226 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2227 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2229 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2230 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2231 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2232 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2234 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2235 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2236 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2237 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2239 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2240 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2241 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2242 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2244 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2245 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2246 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2247 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2249 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2250 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2251 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2252 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2254 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2255 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2256 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2257 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2259 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2260 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2261 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2262 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2264 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2265 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2266 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2267 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2269 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2270 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2271 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2272 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2274 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2275 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2276 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2277 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2279 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2280 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2281 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2282 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2284 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2285 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2286 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2287 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2289 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2290 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2291 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2292 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2294 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2295 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2296 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2297 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2299 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2300 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2301 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2302 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2304 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2305 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2306 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2307 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2309 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2310 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2311 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2312 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2314 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2315 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2316 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2317 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2319 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2320 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2321 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2322 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2324 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2325 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2326 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2327 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2329 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2330 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2331 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2332 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2334 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2335 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2336 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2337 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2340 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2342 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2344 if (!hdev->asic_prop.fw_security_disabled)
2345 return;
2347 if (hdev->asic_prop.fw_security_status_valid &&
2348 (hdev->asic_prop.fw_boot_cpu_security_map &
2349 CPU_BOOT_DEV_STS0_HBM_CRED_EN))
2350 return;
2352 hbm0_wr = 0x33333333;
2353 hbm0_rd = 0x77777777;
2354 hbm1_wr = 0x55555555;
2355 hbm1_rd = 0xDDDDDDDD;
2357 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2358 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2359 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2360 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2362 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2363 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2364 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2365 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2367 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2368 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2369 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2370 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2372 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2373 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2374 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2375 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2377 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2378 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2379 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2380 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2381 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2382 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2383 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2384 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2385 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2386 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2387 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2388 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2390 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2391 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2392 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2393 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2394 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2395 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2396 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2397 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2398 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2399 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2400 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2401 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2404 static void gaudi_init_golden_registers(struct hl_device *hdev)
2406 u32 tpc_offset;
2407 int tpc_id, i;
2409 gaudi_init_e2e(hdev);
2410 gaudi_init_hbm_cred(hdev);
2412 for (tpc_id = 0, tpc_offset = 0;
2413 tpc_id < TPC_NUMBER_OF_ENGINES;
2414 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2415 /* Mask all arithmetic interrupts from TPC */
2416 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
2417 /* Set 16 cache lines */
2418 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2419 ICACHE_FETCH_LINE_NUM, 2);
2422 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2423 for (i = 0 ; i < 128 ; i += 8)
2424 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2426 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2427 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2428 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2429 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2432 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2433 int qman_id, dma_addr_t qman_pq_addr)
2435 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2436 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2437 u32 q_off, dma_qm_offset;
2438 u32 dma_qm_err_cfg;
2440 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2442 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2443 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2444 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2445 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2446 so_base_en_lo = lower_32_bits(CFG_BASE +
2447 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2448 so_base_en_hi = upper_32_bits(CFG_BASE +
2449 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2450 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2451 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2452 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2453 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2454 so_base_ws_lo = lower_32_bits(CFG_BASE +
2455 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2456 so_base_ws_hi = upper_32_bits(CFG_BASE +
2457 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2459 q_off = dma_qm_offset + qman_id * 4;
2461 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2462 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2464 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2465 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2466 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2468 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2469 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2470 QMAN_LDMA_SRC_OFFSET);
2471 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2472 QMAN_LDMA_DST_OFFSET);
2474 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2475 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2476 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2477 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2478 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2479 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2480 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2481 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2483 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2485 /* The following configuration is needed only once per QMAN */
2486 if (qman_id == 0) {
2487 /* Configure RAZWI IRQ */
2488 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2489 if (hdev->stop_on_err) {
2490 dma_qm_err_cfg |=
2491 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2494 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2495 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2496 lower_32_bits(CFG_BASE +
2497 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2498 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2499 upper_32_bits(CFG_BASE +
2500 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2501 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2502 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2503 dma_id);
2505 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2506 QM_ARB_ERR_MSG_EN_MASK);
2508 /* Increase ARB WDT to support streams architecture */
2509 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2510 GAUDI_ARB_WDT_TIMEOUT);
2512 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2513 QMAN_EXTERNAL_MAKE_TRUSTED);
2515 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2519 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2521 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2522 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2524 /* Set to maximum possible according to physical size */
2525 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2526 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2528 /* WA for H/W bug H3-2116 */
2529 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2531 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2532 if (hdev->stop_on_err)
2533 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2535 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2536 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2537 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2538 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2539 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2540 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2541 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2542 WREG32(mmDMA0_CORE_PROT + dma_offset,
2543 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2544 /* If the channel is secured, it should be in MMU bypass mode */
2545 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2546 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2547 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2550 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2551 u32 enable_mask)
2553 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2555 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2558 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2560 struct gaudi_device *gaudi = hdev->asic_specific;
2561 struct hl_hw_queue *q;
2562 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2564 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2565 return;
2567 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2568 dma_id = gaudi_dma_assignment[i];
2570 * For queues after the CPU Q need to add 1 to get the correct
2571 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2572 * order to get the correct MSI register.
2574 if (dma_id > 1) {
2575 cpu_skip = 1;
2576 nic_skip = NIC_NUMBER_OF_ENGINES;
2577 } else {
2578 cpu_skip = 0;
2579 nic_skip = 0;
2582 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2583 q_idx = 4 * dma_id + j + cpu_skip;
2584 q = &hdev->kernel_queues[q_idx];
2585 q->cq_id = cq_id++;
2586 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2587 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2588 q->bus_address);
2591 gaudi_init_dma_core(hdev, dma_id);
2593 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2596 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2599 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2600 int qman_id, u64 qman_base_addr)
2602 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2603 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2604 u32 q_off, dma_qm_offset;
2605 u32 dma_qm_err_cfg;
2607 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2609 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2610 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2611 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2612 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2613 so_base_en_lo = lower_32_bits(CFG_BASE +
2614 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2615 so_base_en_hi = upper_32_bits(CFG_BASE +
2616 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2617 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2618 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2619 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2620 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2621 so_base_ws_lo = lower_32_bits(CFG_BASE +
2622 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2623 so_base_ws_hi = upper_32_bits(CFG_BASE +
2624 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2626 q_off = dma_qm_offset + qman_id * 4;
2628 if (qman_id < 4) {
2629 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2630 lower_32_bits(qman_base_addr));
2631 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2632 upper_32_bits(qman_base_addr));
2634 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2635 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2636 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2638 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2639 QMAN_CPDMA_SIZE_OFFSET);
2640 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2641 QMAN_CPDMA_SRC_OFFSET);
2642 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2643 QMAN_CPDMA_DST_OFFSET);
2644 } else {
2645 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2646 QMAN_LDMA_SIZE_OFFSET);
2647 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2648 QMAN_LDMA_SRC_OFFSET);
2649 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2650 QMAN_LDMA_DST_OFFSET);
2652 /* Configure RAZWI IRQ */
2653 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2654 if (hdev->stop_on_err) {
2655 dma_qm_err_cfg |=
2656 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2658 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2660 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2661 lower_32_bits(CFG_BASE +
2662 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2663 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2664 upper_32_bits(CFG_BASE +
2665 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2666 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2667 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2668 dma_id);
2670 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2671 QM_ARB_ERR_MSG_EN_MASK);
2673 /* Increase ARB WDT to support streams architecture */
2674 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2675 GAUDI_ARB_WDT_TIMEOUT);
2677 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2678 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2679 QMAN_INTERNAL_MAKE_TRUSTED);
2682 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2683 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2684 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2685 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2687 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2688 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2689 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2690 mtr_base_ws_lo);
2691 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2692 mtr_base_ws_hi);
2693 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2694 so_base_ws_lo);
2695 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2696 so_base_ws_hi);
2700 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2702 struct gaudi_device *gaudi = hdev->asic_specific;
2703 struct gaudi_internal_qman_info *q;
2704 u64 qman_base_addr;
2705 int i, j, dma_id, internal_q_index;
2707 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2708 return;
2710 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2711 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2713 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2715 * Add the CPU queue in order to get the correct queue
2716 * number as all internal queue are placed after it
2718 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2720 q = &gaudi->internal_qmans[internal_q_index];
2721 qman_base_addr = (u64) q->pq_dma_addr;
2722 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2723 qman_base_addr);
2726 /* Initializing lower CP for HBM DMA QMAN */
2727 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2729 gaudi_init_dma_core(hdev, dma_id);
2731 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2734 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2737 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2738 int qman_id, u64 qman_base_addr)
2740 u32 mtr_base_lo, mtr_base_hi;
2741 u32 so_base_lo, so_base_hi;
2742 u32 q_off, mme_id;
2743 u32 mme_qm_err_cfg;
2745 mtr_base_lo = lower_32_bits(CFG_BASE +
2746 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2747 mtr_base_hi = upper_32_bits(CFG_BASE +
2748 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2749 so_base_lo = lower_32_bits(CFG_BASE +
2750 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2751 so_base_hi = upper_32_bits(CFG_BASE +
2752 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2754 q_off = mme_offset + qman_id * 4;
2756 if (qman_id < 4) {
2757 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2758 lower_32_bits(qman_base_addr));
2759 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2760 upper_32_bits(qman_base_addr));
2762 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2763 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2764 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2766 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2767 QMAN_CPDMA_SIZE_OFFSET);
2768 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2769 QMAN_CPDMA_SRC_OFFSET);
2770 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2771 QMAN_CPDMA_DST_OFFSET);
2772 } else {
2773 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2774 QMAN_LDMA_SIZE_OFFSET);
2775 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2776 QMAN_LDMA_SRC_OFFSET);
2777 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2778 QMAN_LDMA_DST_OFFSET);
2780 /* Configure RAZWI IRQ */
2781 mme_id = mme_offset /
2782 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0);
2784 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2785 if (hdev->stop_on_err) {
2786 mme_qm_err_cfg |=
2787 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2789 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2790 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2791 lower_32_bits(CFG_BASE +
2792 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2793 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2794 upper_32_bits(CFG_BASE +
2795 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2796 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2797 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2798 mme_id);
2800 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2801 QM_ARB_ERR_MSG_EN_MASK);
2803 /* Increase ARB WDT to support streams architecture */
2804 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2805 GAUDI_ARB_WDT_TIMEOUT);
2807 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2808 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2809 QMAN_INTERNAL_MAKE_TRUSTED);
2812 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2813 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2814 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2815 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2818 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2820 struct gaudi_device *gaudi = hdev->asic_specific;
2821 struct gaudi_internal_qman_info *q;
2822 u64 qman_base_addr;
2823 u32 mme_offset;
2824 int i, internal_q_index;
2826 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2827 return;
2830 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2831 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2834 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2836 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2837 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2838 q = &gaudi->internal_qmans[internal_q_index];
2839 qman_base_addr = (u64) q->pq_dma_addr;
2840 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2841 qman_base_addr);
2842 if (i == 3)
2843 mme_offset = 0;
2846 /* Initializing lower CP for MME QMANs */
2847 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2848 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2849 gaudi_init_mme_qman(hdev, 0, 4, 0);
2851 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2852 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2854 gaudi->hw_cap_initialized |= HW_CAP_MME;
2857 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2858 int qman_id, u64 qman_base_addr)
2860 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2861 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2862 u32 q_off, tpc_id;
2863 u32 tpc_qm_err_cfg;
2865 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2866 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2867 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2868 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2869 so_base_en_lo = lower_32_bits(CFG_BASE +
2870 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2871 so_base_en_hi = upper_32_bits(CFG_BASE +
2872 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2873 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2874 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2875 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2876 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2877 so_base_ws_lo = lower_32_bits(CFG_BASE +
2878 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2879 so_base_ws_hi = upper_32_bits(CFG_BASE +
2880 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2882 q_off = tpc_offset + qman_id * 4;
2884 tpc_id = tpc_offset /
2885 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2887 if (qman_id < 4) {
2888 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2889 lower_32_bits(qman_base_addr));
2890 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2891 upper_32_bits(qman_base_addr));
2893 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2894 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2895 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2897 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2898 QMAN_CPDMA_SIZE_OFFSET);
2899 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2900 QMAN_CPDMA_SRC_OFFSET);
2901 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2902 QMAN_CPDMA_DST_OFFSET);
2903 } else {
2904 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2905 QMAN_LDMA_SIZE_OFFSET);
2906 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2907 QMAN_LDMA_SRC_OFFSET);
2908 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2909 QMAN_LDMA_DST_OFFSET);
2911 /* Configure RAZWI IRQ */
2912 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2913 if (hdev->stop_on_err) {
2914 tpc_qm_err_cfg |=
2915 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2918 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
2919 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
2920 lower_32_bits(CFG_BASE +
2921 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2922 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
2923 upper_32_bits(CFG_BASE +
2924 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2925 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
2926 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
2927 tpc_id);
2929 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
2930 QM_ARB_ERR_MSG_EN_MASK);
2932 /* Increase ARB WDT to support streams architecture */
2933 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
2934 GAUDI_ARB_WDT_TIMEOUT);
2936 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
2937 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
2938 QMAN_INTERNAL_MAKE_TRUSTED);
2941 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2942 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2943 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2944 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2946 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
2947 if (tpc_id == 6) {
2948 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2949 mtr_base_ws_lo);
2950 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2951 mtr_base_ws_hi);
2952 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2953 so_base_ws_lo);
2954 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2955 so_base_ws_hi);
2959 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
2961 struct gaudi_device *gaudi = hdev->asic_specific;
2962 struct gaudi_internal_qman_info *q;
2963 u64 qman_base_addr;
2964 u32 so_base_hi, tpc_offset = 0;
2965 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
2966 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
2967 int i, tpc_id, internal_q_index;
2969 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
2970 return;
2972 so_base_hi = upper_32_bits(CFG_BASE +
2973 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2975 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2976 for (i = 0 ; i < QMAN_STREAMS ; i++) {
2977 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
2978 tpc_id * QMAN_STREAMS + i;
2979 q = &gaudi->internal_qmans[internal_q_index];
2980 qman_base_addr = (u64) q->pq_dma_addr;
2981 gaudi_init_tpc_qman(hdev, tpc_offset, i,
2982 qman_base_addr);
2984 if (i == 3) {
2985 /* Initializing lower CP for TPC QMAN */
2986 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
2988 /* Enable the QMAN and TPC channel */
2989 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
2990 QMAN_TPC_ENABLE);
2994 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
2995 so_base_hi);
2997 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2999 gaudi->hw_cap_initialized |=
3000 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3004 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3005 int qman_id, u64 qman_base_addr, int nic_id)
3007 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3008 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3009 u32 q_off;
3010 u32 nic_qm_err_cfg;
3012 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3013 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3014 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3015 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3016 so_base_en_lo = lower_32_bits(CFG_BASE +
3017 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3018 so_base_en_hi = upper_32_bits(CFG_BASE +
3019 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3020 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3021 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3022 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3023 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3024 so_base_ws_lo = lower_32_bits(CFG_BASE +
3025 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3026 so_base_ws_hi = upper_32_bits(CFG_BASE +
3027 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3029 q_off = nic_offset + qman_id * 4;
3031 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3032 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3034 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3035 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3036 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3038 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3039 QMAN_LDMA_SIZE_OFFSET);
3040 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3041 QMAN_LDMA_SRC_OFFSET);
3042 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3043 QMAN_LDMA_DST_OFFSET);
3045 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3046 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3047 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3048 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3050 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3051 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3052 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3053 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3054 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3056 if (qman_id == 0) {
3057 /* Configure RAZWI IRQ */
3058 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3059 if (hdev->stop_on_err) {
3060 nic_qm_err_cfg |=
3061 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3064 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3065 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3066 lower_32_bits(CFG_BASE +
3067 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
3068 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3069 upper_32_bits(CFG_BASE +
3070 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
3071 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3072 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3073 nic_id);
3075 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3076 QM_ARB_ERR_MSG_EN_MASK);
3078 /* Increase ARB WDT to support streams architecture */
3079 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3080 GAUDI_ARB_WDT_TIMEOUT);
3082 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3083 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3084 QMAN_INTERNAL_MAKE_TRUSTED);
3088 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3090 struct gaudi_device *gaudi = hdev->asic_specific;
3091 struct gaudi_internal_qman_info *q;
3092 u64 qman_base_addr;
3093 u32 nic_offset = 0;
3094 u32 nic_delta_between_qmans =
3095 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3096 u32 nic_delta_between_nics =
3097 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3098 int i, nic_id, internal_q_index;
3100 if (!hdev->nic_ports_mask)
3101 return;
3103 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3104 return;
3106 dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3108 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3109 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3110 nic_offset += nic_delta_between_qmans;
3111 if (nic_id & 1) {
3112 nic_offset -= (nic_delta_between_qmans * 2);
3113 nic_offset += nic_delta_between_nics;
3115 continue;
3118 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3119 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3120 nic_id * QMAN_STREAMS + i;
3121 q = &gaudi->internal_qmans[internal_q_index];
3122 qman_base_addr = (u64) q->pq_dma_addr;
3123 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3124 qman_base_addr, nic_id);
3127 /* Enable the QMAN */
3128 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3130 nic_offset += nic_delta_between_qmans;
3131 if (nic_id & 1) {
3132 nic_offset -= (nic_delta_between_qmans * 2);
3133 nic_offset += nic_delta_between_nics;
3136 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3140 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3142 struct gaudi_device *gaudi = hdev->asic_specific;
3144 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3145 return;
3147 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3148 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3149 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3152 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3154 struct gaudi_device *gaudi = hdev->asic_specific;
3156 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3157 return;
3159 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3160 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3161 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3162 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3163 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3166 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3168 struct gaudi_device *gaudi = hdev->asic_specific;
3170 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3171 return;
3173 WREG32(mmMME2_QM_GLBL_CFG0, 0);
3174 WREG32(mmMME0_QM_GLBL_CFG0, 0);
3177 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3179 struct gaudi_device *gaudi = hdev->asic_specific;
3180 u32 tpc_offset = 0;
3181 int tpc_id;
3183 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3184 return;
3186 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3187 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3188 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3192 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3194 struct gaudi_device *gaudi = hdev->asic_specific;
3195 u32 nic_mask, nic_offset = 0;
3196 u32 nic_delta_between_qmans =
3197 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3198 u32 nic_delta_between_nics =
3199 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3200 int nic_id;
3202 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3203 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3205 if (gaudi->hw_cap_initialized & nic_mask)
3206 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3208 nic_offset += nic_delta_between_qmans;
3209 if (nic_id & 1) {
3210 nic_offset -= (nic_delta_between_qmans * 2);
3211 nic_offset += nic_delta_between_nics;
3216 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3218 struct gaudi_device *gaudi = hdev->asic_specific;
3220 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3221 return;
3223 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3224 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3225 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3226 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3229 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3231 struct gaudi_device *gaudi = hdev->asic_specific;
3233 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3234 return;
3236 /* Stop CPs of HBM DMA QMANs */
3238 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3239 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3240 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3241 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3242 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3245 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3247 struct gaudi_device *gaudi = hdev->asic_specific;
3249 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3250 return;
3252 /* Stop CPs of MME QMANs */
3253 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3254 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3257 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3259 struct gaudi_device *gaudi = hdev->asic_specific;
3261 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3262 return;
3264 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3265 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3266 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3267 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3268 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3269 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3270 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3271 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3274 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3276 struct gaudi_device *gaudi = hdev->asic_specific;
3278 /* Stop upper CPs of QMANs */
3280 if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3281 WREG32(mmNIC0_QM0_GLBL_CFG1,
3282 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3283 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3284 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3286 if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3287 WREG32(mmNIC0_QM1_GLBL_CFG1,
3288 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3289 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3290 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3292 if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3293 WREG32(mmNIC1_QM0_GLBL_CFG1,
3294 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3295 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3296 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3298 if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3299 WREG32(mmNIC1_QM1_GLBL_CFG1,
3300 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3301 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3302 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3304 if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3305 WREG32(mmNIC2_QM0_GLBL_CFG1,
3306 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3307 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3308 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3310 if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3311 WREG32(mmNIC2_QM1_GLBL_CFG1,
3312 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3313 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3314 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3316 if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3317 WREG32(mmNIC3_QM0_GLBL_CFG1,
3318 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3319 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3320 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3322 if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3323 WREG32(mmNIC3_QM1_GLBL_CFG1,
3324 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3325 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3326 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3328 if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3329 WREG32(mmNIC4_QM0_GLBL_CFG1,
3330 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3331 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3332 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3334 if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3335 WREG32(mmNIC4_QM1_GLBL_CFG1,
3336 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3337 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3338 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3341 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3343 struct gaudi_device *gaudi = hdev->asic_specific;
3345 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3346 return;
3348 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3349 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3350 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3353 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3355 struct gaudi_device *gaudi = hdev->asic_specific;
3357 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3358 return;
3360 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3361 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3362 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3363 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3364 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3367 static void gaudi_mme_stall(struct hl_device *hdev)
3369 struct gaudi_device *gaudi = hdev->asic_specific;
3371 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3372 return;
3374 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3375 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3376 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3377 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3378 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3379 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3380 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3381 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3382 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3383 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3384 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3385 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3386 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3387 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3388 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3389 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3390 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3393 static void gaudi_tpc_stall(struct hl_device *hdev)
3395 struct gaudi_device *gaudi = hdev->asic_specific;
3397 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3398 return;
3400 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3401 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3402 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3403 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3404 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3405 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3406 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3407 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3410 static void gaudi_set_clock_gating(struct hl_device *hdev)
3412 struct gaudi_device *gaudi = hdev->asic_specific;
3413 u32 qman_offset;
3414 bool enable;
3415 int i;
3417 /* In case we are during debug session, don't enable the clock gate
3418 * as it may interfere
3420 if (hdev->in_debug)
3421 return;
3423 if (!hdev->asic_prop.fw_security_disabled)
3424 return;
3426 for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3427 enable = !!(hdev->clock_gating_mask &
3428 (BIT_ULL(gaudi_dma_assignment[i])));
3430 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3431 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3432 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3433 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3434 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3437 for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3438 enable = !!(hdev->clock_gating_mask &
3439 (BIT_ULL(gaudi_dma_assignment[i])));
3441 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3442 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3443 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3444 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3445 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3448 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3449 WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3450 WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3452 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3453 WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3454 WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3456 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3457 enable = !!(hdev->clock_gating_mask &
3458 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3460 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3461 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3462 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3463 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3465 qman_offset += TPC_QMAN_OFFSET;
3468 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3471 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3473 struct gaudi_device *gaudi = hdev->asic_specific;
3474 u32 qman_offset;
3475 int i;
3477 if (!hdev->asic_prop.fw_security_disabled)
3478 return;
3480 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3481 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3482 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3484 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3487 WREG32(mmMME0_QM_CGM_CFG, 0);
3488 WREG32(mmMME0_QM_CGM_CFG1, 0);
3489 WREG32(mmMME2_QM_CGM_CFG, 0);
3490 WREG32(mmMME2_QM_CGM_CFG1, 0);
3492 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3493 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3494 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3496 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3499 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3502 static void gaudi_enable_timestamp(struct hl_device *hdev)
3504 /* Disable the timestamp counter */
3505 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3507 /* Zero the lower/upper parts of the 64-bit counter */
3508 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3509 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3511 /* Enable the counter */
3512 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3515 static void gaudi_disable_timestamp(struct hl_device *hdev)
3517 /* Disable the timestamp counter */
3518 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3521 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
3523 u32 wait_timeout_ms;
3525 dev_info(hdev->dev,
3526 "Halting compute engines and disabling interrupts\n");
3528 if (hdev->pldm)
3529 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3530 else
3531 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3533 gaudi_stop_nic_qmans(hdev);
3534 gaudi_stop_mme_qmans(hdev);
3535 gaudi_stop_tpc_qmans(hdev);
3536 gaudi_stop_hbm_dma_qmans(hdev);
3537 gaudi_stop_pci_dma_qmans(hdev);
3539 hdev->asic_funcs->disable_clock_gating(hdev);
3541 msleep(wait_timeout_ms);
3543 gaudi_pci_dma_stall(hdev);
3544 gaudi_hbm_dma_stall(hdev);
3545 gaudi_tpc_stall(hdev);
3546 gaudi_mme_stall(hdev);
3548 msleep(wait_timeout_ms);
3550 gaudi_disable_nic_qmans(hdev);
3551 gaudi_disable_mme_qmans(hdev);
3552 gaudi_disable_tpc_qmans(hdev);
3553 gaudi_disable_hbm_dma_qmans(hdev);
3554 gaudi_disable_pci_dma_qmans(hdev);
3556 gaudi_disable_timestamp(hdev);
3558 gaudi_disable_msi(hdev);
3561 static int gaudi_mmu_init(struct hl_device *hdev)
3563 struct asic_fixed_properties *prop = &hdev->asic_prop;
3564 struct gaudi_device *gaudi = hdev->asic_specific;
3565 u64 hop0_addr;
3566 int rc, i;
3568 if (!hdev->mmu_enable)
3569 return 0;
3571 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3572 return 0;
3574 for (i = 0 ; i < prop->max_asid ; i++) {
3575 hop0_addr = prop->mmu_pgt_addr +
3576 (i * prop->mmu_hop_table_size);
3578 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3579 if (rc) {
3580 dev_err(hdev->dev,
3581 "failed to set hop0 addr for asid %d\n", i);
3582 goto err;
3586 /* init MMU cache manage page */
3587 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3588 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3590 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3592 WREG32(mmMMU_UP_MMU_ENABLE, 1);
3593 WREG32(mmMMU_UP_SPI_MASK, 0xF);
3595 WREG32(mmSTLB_HOP_CONFIGURATION,
3596 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3599 * The H/W expects the first PI after init to be 1. After wraparound
3600 * we'll write 0.
3602 gaudi->mmu_cache_inv_pi = 1;
3604 gaudi->hw_cap_initialized |= HW_CAP_MMU;
3606 return 0;
3608 err:
3609 return rc;
3612 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3614 void __iomem *dst;
3616 /* HBM scrambler must be initialized before pushing F/W to HBM */
3617 gaudi_init_scrambler_hbm(hdev);
3619 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3621 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3624 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3626 void __iomem *dst;
3628 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3630 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3633 static int gaudi_read_device_fw_version(struct hl_device *hdev,
3634 enum hl_fw_component fwc)
3636 const char *name;
3637 u32 ver_off;
3638 char *dest;
3640 switch (fwc) {
3641 case FW_COMP_UBOOT:
3642 ver_off = RREG32(mmUBOOT_VER_OFFSET);
3643 dest = hdev->asic_prop.uboot_ver;
3644 name = "U-Boot";
3645 break;
3646 case FW_COMP_PREBOOT:
3647 ver_off = RREG32(mmPREBOOT_VER_OFFSET);
3648 dest = hdev->asic_prop.preboot_ver;
3649 name = "Preboot";
3650 break;
3651 default:
3652 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
3653 return -EIO;
3656 ver_off &= ~((u32)SRAM_BASE_ADDR);
3658 if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
3659 memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
3660 VERSION_MAX_LEN);
3661 } else {
3662 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
3663 name, ver_off);
3664 strcpy(dest, "unavailable");
3665 return -EIO;
3668 return 0;
3671 static int gaudi_init_cpu(struct hl_device *hdev)
3673 struct gaudi_device *gaudi = hdev->asic_specific;
3674 int rc;
3676 if (!hdev->cpu_enable)
3677 return 0;
3679 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3680 return 0;
3683 * The device CPU works with 40 bits addresses.
3684 * This register sets the extension to 50 bits.
3686 if (hdev->asic_prop.fw_security_disabled)
3687 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3689 rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
3690 mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
3691 mmCPU_CMD_STATUS_TO_HOST,
3692 mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
3693 !hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
3694 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
3696 if (rc)
3697 return rc;
3699 gaudi->hw_cap_initialized |= HW_CAP_CPU;
3701 return 0;
3704 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3706 struct gaudi_device *gaudi = hdev->asic_specific;
3707 struct hl_eq *eq;
3708 u32 status;
3709 struct hl_hw_queue *cpu_pq =
3710 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3711 int err;
3713 if (!hdev->cpu_queues_enable)
3714 return 0;
3716 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3717 return 0;
3719 eq = &hdev->event_queue;
3721 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3722 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3724 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3725 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3727 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3728 lower_32_bits(hdev->cpu_accessible_dma_address));
3729 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3730 upper_32_bits(hdev->cpu_accessible_dma_address));
3732 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3733 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3734 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3736 /* Used for EQ CI */
3737 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3739 WREG32(mmCPU_IF_PF_PQ_PI, 0);
3741 if (gaudi->multi_msi_mode)
3742 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
3743 else
3744 WREG32(mmCPU_IF_QUEUE_INIT,
3745 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3747 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
3749 err = hl_poll_timeout(
3750 hdev,
3751 mmCPU_IF_QUEUE_INIT,
3752 status,
3753 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3754 1000,
3755 cpu_timeout);
3757 if (err) {
3758 dev_err(hdev->dev,
3759 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
3760 return -EIO;
3763 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3764 return 0;
3767 static void gaudi_pre_hw_init(struct hl_device *hdev)
3769 /* Perform read from the device to make sure device is up */
3770 RREG32(mmHW_STATE);
3772 if (hdev->asic_prop.fw_security_disabled) {
3773 /* Set the access through PCI bars (Linux driver only) as
3774 * secured
3776 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3777 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3778 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3780 /* Perform read to flush the waiting writes to ensure
3781 * configuration was set in the device
3783 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3787 * Let's mark in the H/W that we have reached this point. We check
3788 * this value in the reset_before_init function to understand whether
3789 * we need to reset the chip before doing H/W init. This register is
3790 * cleared by the H/W upon H/W reset
3792 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3795 static int gaudi_hw_init(struct hl_device *hdev)
3797 int rc;
3799 gaudi_pre_hw_init(hdev);
3801 gaudi_init_pci_dma_qmans(hdev);
3803 gaudi_init_hbm_dma_qmans(hdev);
3805 rc = gaudi_init_cpu(hdev);
3806 if (rc) {
3807 dev_err(hdev->dev, "failed to initialize CPU\n");
3808 return rc;
3811 /* In case the clock gating was enabled in preboot we need to disable
3812 * it here before touching the MME/TPC registers.
3813 * There is no need to take clk gating mutex because when this function
3814 * runs, no other relevant code can run
3816 hdev->asic_funcs->disable_clock_gating(hdev);
3818 /* SRAM scrambler must be initialized after CPU is running from HBM */
3819 gaudi_init_scrambler_sram(hdev);
3821 /* This is here just in case we are working without CPU */
3822 gaudi_init_scrambler_hbm(hdev);
3824 gaudi_init_golden_registers(hdev);
3826 rc = gaudi_mmu_init(hdev);
3827 if (rc)
3828 return rc;
3830 gaudi_init_security(hdev);
3832 gaudi_init_mme_qmans(hdev);
3834 gaudi_init_tpc_qmans(hdev);
3836 gaudi_init_nic_qmans(hdev);
3838 hdev->asic_funcs->set_clock_gating(hdev);
3840 gaudi_enable_timestamp(hdev);
3842 /* MSI must be enabled before CPU queues and NIC are initialized */
3843 rc = gaudi_enable_msi(hdev);
3844 if (rc)
3845 goto disable_queues;
3847 /* must be called after MSI was enabled */
3848 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3849 if (rc) {
3850 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3851 rc);
3852 goto disable_msi;
3855 /* Perform read from the device to flush all configuration */
3856 RREG32(mmHW_STATE);
3858 return 0;
3860 disable_msi:
3861 gaudi_disable_msi(hdev);
3862 disable_queues:
3863 gaudi_disable_mme_qmans(hdev);
3864 gaudi_disable_pci_dma_qmans(hdev);
3866 return rc;
3869 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
3871 struct gaudi_device *gaudi = hdev->asic_specific;
3872 u32 status, reset_timeout_ms, cpu_timeout_ms;
3874 if (!hard_reset) {
3875 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
3876 return;
3879 if (hdev->pldm) {
3880 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
3881 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3882 } else {
3883 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
3884 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
3887 /* Set device to handle FLR by H/W as we will put the device CPU to
3888 * halt mode
3890 if (hdev->asic_prop.fw_security_disabled &&
3891 !hdev->asic_prop.hard_reset_done_by_fw)
3892 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
3893 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
3895 /* I don't know what is the state of the CPU so make sure it is
3896 * stopped in any means necessary
3898 if (hdev->asic_prop.hard_reset_done_by_fw)
3899 WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_RST_DEV);
3900 else
3901 WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
3903 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
3905 if (hdev->asic_prop.fw_security_disabled &&
3906 !hdev->asic_prop.hard_reset_done_by_fw) {
3908 /* Configure the reset registers. Must be done as early as
3909 * possible in case we fail during H/W initialization
3911 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
3912 (CFG_RST_H_DMA_MASK |
3913 CFG_RST_H_MME_MASK |
3914 CFG_RST_H_SM_MASK |
3915 CFG_RST_H_TPC_7_MASK));
3917 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
3919 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
3920 (CFG_RST_H_HBM_MASK |
3921 CFG_RST_H_TPC_7_MASK |
3922 CFG_RST_H_NIC_MASK |
3923 CFG_RST_H_SM_MASK |
3924 CFG_RST_H_DMA_MASK |
3925 CFG_RST_H_MME_MASK |
3926 CFG_RST_H_CPU_MASK |
3927 CFG_RST_H_MMU_MASK));
3929 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
3930 (CFG_RST_L_IF_MASK |
3931 CFG_RST_L_PSOC_MASK |
3932 CFG_RST_L_TPC_MASK));
3934 msleep(cpu_timeout_ms);
3936 /* Tell ASIC not to re-initialize PCIe */
3937 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
3939 /* Restart BTL/BLR upon hard-reset */
3940 if (hdev->asic_prop.fw_security_disabled)
3941 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
3943 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
3944 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
3946 dev_info(hdev->dev,
3947 "Issued HARD reset command, going to wait %dms\n",
3948 reset_timeout_ms);
3949 } else {
3950 dev_info(hdev->dev,
3951 "Firmware performs HARD reset, going to wait %dms\n",
3952 reset_timeout_ms);
3956 * After hard reset, we can't poll the BTM_FSM register because the PSOC
3957 * itself is in reset. Need to wait until the reset is deasserted
3959 msleep(reset_timeout_ms);
3961 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
3962 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
3963 dev_err(hdev->dev,
3964 "Timeout while waiting for device to reset 0x%x\n",
3965 status);
3967 if (gaudi) {
3968 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
3969 HW_CAP_HBM | HW_CAP_PCI_DMA |
3970 HW_CAP_MME | HW_CAP_TPC_MASK |
3971 HW_CAP_HBM_DMA | HW_CAP_PLL |
3972 HW_CAP_NIC_MASK | HW_CAP_MMU |
3973 HW_CAP_SRAM_SCRAMBLER |
3974 HW_CAP_HBM_SCRAMBLER |
3975 HW_CAP_CLK_GATE);
3977 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
3981 static int gaudi_suspend(struct hl_device *hdev)
3983 int rc;
3985 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
3986 if (rc)
3987 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
3989 return rc;
3992 static int gaudi_resume(struct hl_device *hdev)
3994 return gaudi_init_iatu(hdev);
3997 static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
3998 void *cpu_addr, dma_addr_t dma_addr, size_t size)
4000 int rc;
4002 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4003 VM_DONTCOPY | VM_NORESERVE;
4005 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
4006 if (rc)
4007 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4009 return rc;
4012 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4014 struct gaudi_device *gaudi = hdev->asic_specific;
4015 u32 db_reg_offset, db_value, dma_qm_offset, q_off;
4016 int dma_id;
4017 bool invalid_queue = false;
4019 switch (hw_queue_id) {
4020 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4021 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4022 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4023 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4024 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4025 break;
4027 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4028 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4029 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4030 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4031 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4032 break;
4034 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4035 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4036 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4037 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4038 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4039 break;
4041 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4042 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4043 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4044 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4045 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4046 break;
4048 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4049 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4050 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4051 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4052 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4053 break;
4055 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4056 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4057 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4058 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4059 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4060 break;
4062 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4063 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4064 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4065 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4066 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4067 break;
4069 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4070 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4071 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4072 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4073 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4074 break;
4076 case GAUDI_QUEUE_ID_CPU_PQ:
4077 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4078 db_reg_offset = mmCPU_IF_PF_PQ_PI;
4079 else
4080 invalid_queue = true;
4081 break;
4083 case GAUDI_QUEUE_ID_MME_0_0:
4084 db_reg_offset = mmMME2_QM_PQ_PI_0;
4085 break;
4087 case GAUDI_QUEUE_ID_MME_0_1:
4088 db_reg_offset = mmMME2_QM_PQ_PI_1;
4089 break;
4091 case GAUDI_QUEUE_ID_MME_0_2:
4092 db_reg_offset = mmMME2_QM_PQ_PI_2;
4093 break;
4095 case GAUDI_QUEUE_ID_MME_0_3:
4096 db_reg_offset = mmMME2_QM_PQ_PI_3;
4097 break;
4099 case GAUDI_QUEUE_ID_MME_1_0:
4100 db_reg_offset = mmMME0_QM_PQ_PI_0;
4101 break;
4103 case GAUDI_QUEUE_ID_MME_1_1:
4104 db_reg_offset = mmMME0_QM_PQ_PI_1;
4105 break;
4107 case GAUDI_QUEUE_ID_MME_1_2:
4108 db_reg_offset = mmMME0_QM_PQ_PI_2;
4109 break;
4111 case GAUDI_QUEUE_ID_MME_1_3:
4112 db_reg_offset = mmMME0_QM_PQ_PI_3;
4113 break;
4115 case GAUDI_QUEUE_ID_TPC_0_0:
4116 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4117 break;
4119 case GAUDI_QUEUE_ID_TPC_0_1:
4120 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4121 break;
4123 case GAUDI_QUEUE_ID_TPC_0_2:
4124 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4125 break;
4127 case GAUDI_QUEUE_ID_TPC_0_3:
4128 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4129 break;
4131 case GAUDI_QUEUE_ID_TPC_1_0:
4132 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4133 break;
4135 case GAUDI_QUEUE_ID_TPC_1_1:
4136 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4137 break;
4139 case GAUDI_QUEUE_ID_TPC_1_2:
4140 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4141 break;
4143 case GAUDI_QUEUE_ID_TPC_1_3:
4144 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4145 break;
4147 case GAUDI_QUEUE_ID_TPC_2_0:
4148 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4149 break;
4151 case GAUDI_QUEUE_ID_TPC_2_1:
4152 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4153 break;
4155 case GAUDI_QUEUE_ID_TPC_2_2:
4156 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4157 break;
4159 case GAUDI_QUEUE_ID_TPC_2_3:
4160 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4161 break;
4163 case GAUDI_QUEUE_ID_TPC_3_0:
4164 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4165 break;
4167 case GAUDI_QUEUE_ID_TPC_3_1:
4168 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4169 break;
4171 case GAUDI_QUEUE_ID_TPC_3_2:
4172 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4173 break;
4175 case GAUDI_QUEUE_ID_TPC_3_3:
4176 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4177 break;
4179 case GAUDI_QUEUE_ID_TPC_4_0:
4180 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4181 break;
4183 case GAUDI_QUEUE_ID_TPC_4_1:
4184 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4185 break;
4187 case GAUDI_QUEUE_ID_TPC_4_2:
4188 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4189 break;
4191 case GAUDI_QUEUE_ID_TPC_4_3:
4192 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4193 break;
4195 case GAUDI_QUEUE_ID_TPC_5_0:
4196 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4197 break;
4199 case GAUDI_QUEUE_ID_TPC_5_1:
4200 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4201 break;
4203 case GAUDI_QUEUE_ID_TPC_5_2:
4204 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4205 break;
4207 case GAUDI_QUEUE_ID_TPC_5_3:
4208 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4209 break;
4211 case GAUDI_QUEUE_ID_TPC_6_0:
4212 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4213 break;
4215 case GAUDI_QUEUE_ID_TPC_6_1:
4216 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4217 break;
4219 case GAUDI_QUEUE_ID_TPC_6_2:
4220 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4221 break;
4223 case GAUDI_QUEUE_ID_TPC_6_3:
4224 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4225 break;
4227 case GAUDI_QUEUE_ID_TPC_7_0:
4228 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4229 break;
4231 case GAUDI_QUEUE_ID_TPC_7_1:
4232 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4233 break;
4235 case GAUDI_QUEUE_ID_TPC_7_2:
4236 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4237 break;
4239 case GAUDI_QUEUE_ID_TPC_7_3:
4240 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4241 break;
4243 case GAUDI_QUEUE_ID_NIC_0_0:
4244 db_reg_offset = mmNIC0_QM0_PQ_PI_0;
4245 break;
4247 case GAUDI_QUEUE_ID_NIC_0_1:
4248 db_reg_offset = mmNIC0_QM0_PQ_PI_1;
4249 break;
4251 case GAUDI_QUEUE_ID_NIC_0_2:
4252 db_reg_offset = mmNIC0_QM0_PQ_PI_2;
4253 break;
4255 case GAUDI_QUEUE_ID_NIC_0_3:
4256 db_reg_offset = mmNIC0_QM0_PQ_PI_3;
4257 break;
4259 case GAUDI_QUEUE_ID_NIC_1_0:
4260 db_reg_offset = mmNIC0_QM1_PQ_PI_0;
4261 break;
4263 case GAUDI_QUEUE_ID_NIC_1_1:
4264 db_reg_offset = mmNIC0_QM1_PQ_PI_1;
4265 break;
4267 case GAUDI_QUEUE_ID_NIC_1_2:
4268 db_reg_offset = mmNIC0_QM1_PQ_PI_2;
4269 break;
4271 case GAUDI_QUEUE_ID_NIC_1_3:
4272 db_reg_offset = mmNIC0_QM1_PQ_PI_3;
4273 break;
4275 case GAUDI_QUEUE_ID_NIC_2_0:
4276 db_reg_offset = mmNIC1_QM0_PQ_PI_0;
4277 break;
4279 case GAUDI_QUEUE_ID_NIC_2_1:
4280 db_reg_offset = mmNIC1_QM0_PQ_PI_1;
4281 break;
4283 case GAUDI_QUEUE_ID_NIC_2_2:
4284 db_reg_offset = mmNIC1_QM0_PQ_PI_2;
4285 break;
4287 case GAUDI_QUEUE_ID_NIC_2_3:
4288 db_reg_offset = mmNIC1_QM0_PQ_PI_3;
4289 break;
4291 case GAUDI_QUEUE_ID_NIC_3_0:
4292 db_reg_offset = mmNIC1_QM1_PQ_PI_0;
4293 break;
4295 case GAUDI_QUEUE_ID_NIC_3_1:
4296 db_reg_offset = mmNIC1_QM1_PQ_PI_1;
4297 break;
4299 case GAUDI_QUEUE_ID_NIC_3_2:
4300 db_reg_offset = mmNIC1_QM1_PQ_PI_2;
4301 break;
4303 case GAUDI_QUEUE_ID_NIC_3_3:
4304 db_reg_offset = mmNIC1_QM1_PQ_PI_3;
4305 break;
4307 case GAUDI_QUEUE_ID_NIC_4_0:
4308 db_reg_offset = mmNIC2_QM0_PQ_PI_0;
4309 break;
4311 case GAUDI_QUEUE_ID_NIC_4_1:
4312 db_reg_offset = mmNIC2_QM0_PQ_PI_1;
4313 break;
4315 case GAUDI_QUEUE_ID_NIC_4_2:
4316 db_reg_offset = mmNIC2_QM0_PQ_PI_2;
4317 break;
4319 case GAUDI_QUEUE_ID_NIC_4_3:
4320 db_reg_offset = mmNIC2_QM0_PQ_PI_3;
4321 break;
4323 case GAUDI_QUEUE_ID_NIC_5_0:
4324 db_reg_offset = mmNIC2_QM1_PQ_PI_0;
4325 break;
4327 case GAUDI_QUEUE_ID_NIC_5_1:
4328 db_reg_offset = mmNIC2_QM1_PQ_PI_1;
4329 break;
4331 case GAUDI_QUEUE_ID_NIC_5_2:
4332 db_reg_offset = mmNIC2_QM1_PQ_PI_2;
4333 break;
4335 case GAUDI_QUEUE_ID_NIC_5_3:
4336 db_reg_offset = mmNIC2_QM1_PQ_PI_3;
4337 break;
4339 case GAUDI_QUEUE_ID_NIC_6_0:
4340 db_reg_offset = mmNIC3_QM0_PQ_PI_0;
4341 break;
4343 case GAUDI_QUEUE_ID_NIC_6_1:
4344 db_reg_offset = mmNIC3_QM0_PQ_PI_1;
4345 break;
4347 case GAUDI_QUEUE_ID_NIC_6_2:
4348 db_reg_offset = mmNIC3_QM0_PQ_PI_2;
4349 break;
4351 case GAUDI_QUEUE_ID_NIC_6_3:
4352 db_reg_offset = mmNIC3_QM0_PQ_PI_3;
4353 break;
4355 case GAUDI_QUEUE_ID_NIC_7_0:
4356 db_reg_offset = mmNIC3_QM1_PQ_PI_0;
4357 break;
4359 case GAUDI_QUEUE_ID_NIC_7_1:
4360 db_reg_offset = mmNIC3_QM1_PQ_PI_1;
4361 break;
4363 case GAUDI_QUEUE_ID_NIC_7_2:
4364 db_reg_offset = mmNIC3_QM1_PQ_PI_2;
4365 break;
4367 case GAUDI_QUEUE_ID_NIC_7_3:
4368 db_reg_offset = mmNIC3_QM1_PQ_PI_3;
4369 break;
4371 case GAUDI_QUEUE_ID_NIC_8_0:
4372 db_reg_offset = mmNIC4_QM0_PQ_PI_0;
4373 break;
4375 case GAUDI_QUEUE_ID_NIC_8_1:
4376 db_reg_offset = mmNIC4_QM0_PQ_PI_1;
4377 break;
4379 case GAUDI_QUEUE_ID_NIC_8_2:
4380 db_reg_offset = mmNIC4_QM0_PQ_PI_2;
4381 break;
4383 case GAUDI_QUEUE_ID_NIC_8_3:
4384 db_reg_offset = mmNIC4_QM0_PQ_PI_3;
4385 break;
4387 case GAUDI_QUEUE_ID_NIC_9_0:
4388 db_reg_offset = mmNIC4_QM1_PQ_PI_0;
4389 break;
4391 case GAUDI_QUEUE_ID_NIC_9_1:
4392 db_reg_offset = mmNIC4_QM1_PQ_PI_1;
4393 break;
4395 case GAUDI_QUEUE_ID_NIC_9_2:
4396 db_reg_offset = mmNIC4_QM1_PQ_PI_2;
4397 break;
4399 case GAUDI_QUEUE_ID_NIC_9_3:
4400 db_reg_offset = mmNIC4_QM1_PQ_PI_3;
4401 break;
4403 default:
4404 invalid_queue = true;
4407 if (invalid_queue) {
4408 /* Should never get here */
4409 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4410 hw_queue_id);
4411 return;
4414 db_value = pi;
4416 /* ring the doorbell */
4417 WREG32(db_reg_offset, db_value);
4419 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
4420 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
4421 GAUDI_EVENT_PI_UPDATE);
4424 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4425 struct hl_bd *bd)
4427 __le64 *pbd = (__le64 *) bd;
4429 /* The QMANs are on the host memory so a simple copy suffice */
4430 pqe[0] = pbd[0];
4431 pqe[1] = pbd[1];
4434 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4435 dma_addr_t *dma_handle, gfp_t flags)
4437 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4438 dma_handle, flags);
4440 /* Shift to the device's base physical address of host memory */
4441 if (kernel_addr)
4442 *dma_handle += HOST_PHYS_BASE;
4444 return kernel_addr;
4447 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4448 void *cpu_addr, dma_addr_t dma_handle)
4450 /* Cancel the device's base physical address of host memory */
4451 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4453 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4456 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4458 struct asic_fixed_properties *prop = &hdev->asic_prop;
4459 u64 cur_addr = DRAM_BASE_ADDR_USER;
4460 u32 val;
4461 u32 chunk_size;
4462 int rc, dma_id;
4464 while (cur_addr < prop->dram_end_address) {
4465 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4466 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4468 chunk_size =
4469 min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4471 dev_dbg(hdev->dev,
4472 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4473 cur_addr, cur_addr + chunk_size);
4475 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0);
4476 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0);
4477 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4478 lower_32_bits(cur_addr));
4479 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4480 upper_32_bits(cur_addr));
4481 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4482 chunk_size);
4483 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4484 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4485 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4487 cur_addr += chunk_size;
4489 if (cur_addr == prop->dram_end_address)
4490 break;
4493 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4494 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4496 rc = hl_poll_timeout(
4497 hdev,
4498 mmDMA0_CORE_STS0 + dma_offset,
4499 val,
4500 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4501 1000,
4502 HBM_SCRUBBING_TIMEOUT_US);
4504 if (rc) {
4505 dev_err(hdev->dev,
4506 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4507 dma_id);
4508 return -EIO;
4513 return 0;
4516 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4518 struct asic_fixed_properties *prop = &hdev->asic_prop;
4519 struct gaudi_device *gaudi = hdev->asic_specific;
4520 u64 idle_mask = 0;
4521 int rc = 0;
4522 u64 val = 0;
4524 if (!hdev->memory_scrub)
4525 return 0;
4527 if (!addr && !size) {
4528 /* Wait till device is idle */
4529 rc = hl_poll_timeout(
4530 hdev,
4531 mmDMA0_CORE_STS0/* dummy */,
4532 val/* dummy */,
4533 (hdev->asic_funcs->is_device_idle(hdev,
4534 &idle_mask, NULL)),
4535 1000,
4536 HBM_SCRUBBING_TIMEOUT_US);
4537 if (rc) {
4538 dev_err(hdev->dev, "waiting for idle timeout\n");
4539 return -EIO;
4542 /* Scrub SRAM */
4543 addr = prop->sram_user_base_address;
4544 size = hdev->pldm ? 0x10000 :
4545 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4546 val = 0x7777777777777777ull;
4548 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4549 if (rc) {
4550 dev_err(hdev->dev,
4551 "Failed to clear SRAM in mem scrub all\n");
4552 return rc;
4555 mutex_lock(&gaudi->clk_gate_mutex);
4556 hdev->asic_funcs->disable_clock_gating(hdev);
4558 /* Scrub HBM using all DMA channels in parallel */
4559 rc = gaudi_hbm_scrubbing(hdev);
4560 if (rc)
4561 dev_err(hdev->dev,
4562 "Failed to clear HBM in mem scrub all\n");
4564 hdev->asic_funcs->set_clock_gating(hdev);
4565 mutex_unlock(&gaudi->clk_gate_mutex);
4568 return rc;
4571 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4572 u32 queue_id, dma_addr_t *dma_handle,
4573 u16 *queue_len)
4575 struct gaudi_device *gaudi = hdev->asic_specific;
4576 struct gaudi_internal_qman_info *q;
4578 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4579 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4580 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4581 return NULL;
4584 q = &gaudi->internal_qmans[queue_id];
4585 *dma_handle = q->pq_dma_addr;
4586 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4588 return q->pq_kernel_addr;
4591 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4592 u16 len, u32 timeout, u64 *result)
4594 struct gaudi_device *gaudi = hdev->asic_specific;
4596 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4597 if (result)
4598 *result = 0;
4599 return 0;
4602 if (!timeout)
4603 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4605 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4606 timeout, result);
4609 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4611 struct packet_msg_prot *fence_pkt;
4612 dma_addr_t pkt_dma_addr;
4613 u32 fence_val, tmp, timeout_usec;
4614 dma_addr_t fence_dma_addr;
4615 u32 *fence_ptr;
4616 int rc;
4618 if (hdev->pldm)
4619 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4620 else
4621 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4623 fence_val = GAUDI_QMAN0_FENCE_VAL;
4625 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4626 &fence_dma_addr);
4627 if (!fence_ptr) {
4628 dev_err(hdev->dev,
4629 "Failed to allocate memory for H/W queue %d testing\n",
4630 hw_queue_id);
4631 return -ENOMEM;
4634 *fence_ptr = 0;
4636 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4637 sizeof(struct packet_msg_prot),
4638 GFP_KERNEL, &pkt_dma_addr);
4639 if (!fence_pkt) {
4640 dev_err(hdev->dev,
4641 "Failed to allocate packet for H/W queue %d testing\n",
4642 hw_queue_id);
4643 rc = -ENOMEM;
4644 goto free_fence_ptr;
4647 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4648 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4649 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4651 fence_pkt->ctl = cpu_to_le32(tmp);
4652 fence_pkt->value = cpu_to_le32(fence_val);
4653 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4655 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4656 sizeof(struct packet_msg_prot),
4657 pkt_dma_addr);
4658 if (rc) {
4659 dev_err(hdev->dev,
4660 "Failed to send fence packet to H/W queue %d\n",
4661 hw_queue_id);
4662 goto free_pkt;
4665 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4666 1000, timeout_usec, true);
4668 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4670 if (rc == -ETIMEDOUT) {
4671 dev_err(hdev->dev,
4672 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4673 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4674 rc = -EIO;
4677 free_pkt:
4678 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4679 pkt_dma_addr);
4680 free_fence_ptr:
4681 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4682 fence_dma_addr);
4683 return rc;
4686 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4688 struct gaudi_device *gaudi = hdev->asic_specific;
4691 * check capability here as send_cpu_message() won't update the result
4692 * value if no capability
4694 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4695 return 0;
4697 return hl_fw_test_cpu_queue(hdev);
4700 static int gaudi_test_queues(struct hl_device *hdev)
4702 int i, rc, ret_val = 0;
4704 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4705 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4706 rc = gaudi_test_queue(hdev, i);
4707 if (rc)
4708 ret_val = -EINVAL;
4712 rc = gaudi_test_cpu_queue(hdev);
4713 if (rc)
4714 ret_val = -EINVAL;
4716 return ret_val;
4719 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4720 gfp_t mem_flags, dma_addr_t *dma_handle)
4722 void *kernel_addr;
4724 if (size > GAUDI_DMA_POOL_BLK_SIZE)
4725 return NULL;
4727 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4729 /* Shift to the device's base physical address of host memory */
4730 if (kernel_addr)
4731 *dma_handle += HOST_PHYS_BASE;
4733 return kernel_addr;
4736 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4737 dma_addr_t dma_addr)
4739 /* Cancel the device's base physical address of host memory */
4740 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4742 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4745 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4746 size_t size, dma_addr_t *dma_handle)
4748 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4751 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4752 size_t size, void *vaddr)
4754 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4757 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
4758 int nents, enum dma_data_direction dir)
4760 struct scatterlist *sg;
4761 int i;
4763 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
4764 return -ENOMEM;
4766 /* Shift to the device's base physical address of host memory */
4767 for_each_sg(sgl, sg, nents, i)
4768 sg->dma_address += HOST_PHYS_BASE;
4770 return 0;
4773 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
4774 int nents, enum dma_data_direction dir)
4776 struct scatterlist *sg;
4777 int i;
4779 /* Cancel the device's base physical address of host memory */
4780 for_each_sg(sgl, sg, nents, i)
4781 sg->dma_address -= HOST_PHYS_BASE;
4783 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
4786 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
4787 struct sg_table *sgt)
4789 struct scatterlist *sg, *sg_next_iter;
4790 u32 count, dma_desc_cnt;
4791 u64 len, len_next;
4792 dma_addr_t addr, addr_next;
4794 dma_desc_cnt = 0;
4796 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
4798 len = sg_dma_len(sg);
4799 addr = sg_dma_address(sg);
4801 if (len == 0)
4802 break;
4804 while ((count + 1) < sgt->nents) {
4805 sg_next_iter = sg_next(sg);
4806 len_next = sg_dma_len(sg_next_iter);
4807 addr_next = sg_dma_address(sg_next_iter);
4809 if (len_next == 0)
4810 break;
4812 if ((addr + len == addr_next) &&
4813 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4814 len += len_next;
4815 count++;
4816 sg = sg_next_iter;
4817 } else {
4818 break;
4822 dma_desc_cnt++;
4825 return dma_desc_cnt * sizeof(struct packet_lin_dma);
4828 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4829 struct hl_cs_parser *parser,
4830 struct packet_lin_dma *user_dma_pkt,
4831 u64 addr, enum dma_data_direction dir)
4833 struct hl_userptr *userptr;
4834 int rc;
4836 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4837 parser->job_userptr_list, &userptr))
4838 goto already_pinned;
4840 userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
4841 if (!userptr)
4842 return -ENOMEM;
4844 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4845 userptr);
4846 if (rc)
4847 goto free_userptr;
4849 list_add_tail(&userptr->job_node, parser->job_userptr_list);
4851 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
4852 userptr->sgt->nents, dir);
4853 if (rc) {
4854 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4855 goto unpin_memory;
4858 userptr->dma_mapped = true;
4859 userptr->dir = dir;
4861 already_pinned:
4862 parser->patched_cb_size +=
4863 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4865 return 0;
4867 unpin_memory:
4868 hl_unpin_host_memory(hdev, userptr);
4869 free_userptr:
4870 kfree(userptr);
4871 return rc;
4874 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4875 struct hl_cs_parser *parser,
4876 struct packet_lin_dma *user_dma_pkt,
4877 bool src_in_host)
4879 enum dma_data_direction dir;
4880 bool skip_host_mem_pin = false, user_memset;
4881 u64 addr;
4882 int rc = 0;
4884 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4885 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4886 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4888 if (src_in_host) {
4889 if (user_memset)
4890 skip_host_mem_pin = true;
4892 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4893 dir = DMA_TO_DEVICE;
4894 addr = le64_to_cpu(user_dma_pkt->src_addr);
4895 } else {
4896 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4897 dir = DMA_FROM_DEVICE;
4898 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4899 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4900 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4903 if (skip_host_mem_pin)
4904 parser->patched_cb_size += sizeof(*user_dma_pkt);
4905 else
4906 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4907 addr, dir);
4909 return rc;
4912 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4913 struct hl_cs_parser *parser,
4914 struct packet_lin_dma *user_dma_pkt)
4916 bool src_in_host = false;
4917 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4918 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4919 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4921 dev_dbg(hdev->dev, "DMA packet details:\n");
4922 dev_dbg(hdev->dev, "source == 0x%llx\n",
4923 le64_to_cpu(user_dma_pkt->src_addr));
4924 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4925 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4928 * Special handling for DMA with size 0. Bypass all validations
4929 * because no transactions will be done except for WR_COMP, which
4930 * is not a security issue
4932 if (!le32_to_cpu(user_dma_pkt->tsize)) {
4933 parser->patched_cb_size += sizeof(*user_dma_pkt);
4934 return 0;
4937 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
4938 src_in_host = true;
4940 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
4941 src_in_host);
4944 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
4945 struct hl_cs_parser *parser,
4946 struct packet_load_and_exe *user_pkt)
4948 u32 cfg;
4950 cfg = le32_to_cpu(user_pkt->cfg);
4952 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
4953 dev_err(hdev->dev,
4954 "User not allowed to use Load and Execute\n");
4955 return -EPERM;
4958 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
4960 return 0;
4963 static int gaudi_validate_cb(struct hl_device *hdev,
4964 struct hl_cs_parser *parser, bool is_mmu)
4966 u32 cb_parsed_length = 0;
4967 int rc = 0;
4969 parser->patched_cb_size = 0;
4971 /* cb_user_size is more than 0 so loop will always be executed */
4972 while (cb_parsed_length < parser->user_cb_size) {
4973 enum packet_id pkt_id;
4974 u16 pkt_size;
4975 struct gaudi_packet *user_pkt;
4977 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
4979 pkt_id = (enum packet_id) (
4980 (le64_to_cpu(user_pkt->header) &
4981 PACKET_HEADER_PACKET_ID_MASK) >>
4982 PACKET_HEADER_PACKET_ID_SHIFT);
4984 if (!validate_packet_id(pkt_id)) {
4985 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
4986 rc = -EINVAL;
4987 break;
4990 pkt_size = gaudi_packet_sizes[pkt_id];
4991 cb_parsed_length += pkt_size;
4992 if (cb_parsed_length > parser->user_cb_size) {
4993 dev_err(hdev->dev,
4994 "packet 0x%x is out of CB boundary\n", pkt_id);
4995 rc = -EINVAL;
4996 break;
4999 switch (pkt_id) {
5000 case PACKET_MSG_PROT:
5001 dev_err(hdev->dev,
5002 "User not allowed to use MSG_PROT\n");
5003 rc = -EPERM;
5004 break;
5006 case PACKET_CP_DMA:
5007 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5008 rc = -EPERM;
5009 break;
5011 case PACKET_STOP:
5012 dev_err(hdev->dev, "User not allowed to use STOP\n");
5013 rc = -EPERM;
5014 break;
5016 case PACKET_WREG_BULK:
5017 dev_err(hdev->dev,
5018 "User not allowed to use WREG_BULK\n");
5019 rc = -EPERM;
5020 break;
5022 case PACKET_LOAD_AND_EXE:
5023 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5024 (struct packet_load_and_exe *) user_pkt);
5025 break;
5027 case PACKET_LIN_DMA:
5028 parser->contains_dma_pkt = true;
5029 if (is_mmu)
5030 parser->patched_cb_size += pkt_size;
5031 else
5032 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5033 (struct packet_lin_dma *) user_pkt);
5034 break;
5036 case PACKET_WREG_32:
5037 case PACKET_MSG_LONG:
5038 case PACKET_MSG_SHORT:
5039 case PACKET_REPEAT:
5040 case PACKET_FENCE:
5041 case PACKET_NOP:
5042 case PACKET_ARB_POINT:
5043 parser->patched_cb_size += pkt_size;
5044 break;
5046 default:
5047 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5048 pkt_id);
5049 rc = -EINVAL;
5050 break;
5053 if (rc)
5054 break;
5058 * The new CB should have space at the end for two MSG_PROT packets:
5059 * 1. A packet that will act as a completion packet
5060 * 2. A packet that will generate MSI-X interrupt
5062 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5064 return rc;
5067 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5068 struct hl_cs_parser *parser,
5069 struct packet_lin_dma *user_dma_pkt,
5070 struct packet_lin_dma *new_dma_pkt,
5071 u32 *new_dma_pkt_size)
5073 struct hl_userptr *userptr;
5074 struct scatterlist *sg, *sg_next_iter;
5075 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5076 u64 len, len_next;
5077 dma_addr_t dma_addr, dma_addr_next;
5078 u64 device_memory_addr, addr;
5079 enum dma_data_direction dir;
5080 struct sg_table *sgt;
5081 bool src_in_host = false;
5082 bool skip_host_mem_pin = false;
5083 bool user_memset;
5085 ctl = le32_to_cpu(user_dma_pkt->ctl);
5087 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5088 src_in_host = true;
5090 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5091 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5093 if (src_in_host) {
5094 addr = le64_to_cpu(user_dma_pkt->src_addr);
5095 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5096 dir = DMA_TO_DEVICE;
5097 if (user_memset)
5098 skip_host_mem_pin = true;
5099 } else {
5100 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5101 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5102 dir = DMA_FROM_DEVICE;
5105 if ((!skip_host_mem_pin) &&
5106 (!hl_userptr_is_pinned(hdev, addr,
5107 le32_to_cpu(user_dma_pkt->tsize),
5108 parser->job_userptr_list, &userptr))) {
5109 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5110 addr, user_dma_pkt->tsize);
5111 return -EFAULT;
5114 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5115 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5116 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5117 return 0;
5120 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5122 sgt = userptr->sgt;
5123 dma_desc_cnt = 0;
5125 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5126 len = sg_dma_len(sg);
5127 dma_addr = sg_dma_address(sg);
5129 if (len == 0)
5130 break;
5132 while ((count + 1) < sgt->nents) {
5133 sg_next_iter = sg_next(sg);
5134 len_next = sg_dma_len(sg_next_iter);
5135 dma_addr_next = sg_dma_address(sg_next_iter);
5137 if (len_next == 0)
5138 break;
5140 if ((dma_addr + len == dma_addr_next) &&
5141 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5142 len += len_next;
5143 count++;
5144 sg = sg_next_iter;
5145 } else {
5146 break;
5150 ctl = le32_to_cpu(user_dma_pkt->ctl);
5151 if (likely(dma_desc_cnt))
5152 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5153 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5154 new_dma_pkt->ctl = cpu_to_le32(ctl);
5155 new_dma_pkt->tsize = cpu_to_le32(len);
5157 if (dir == DMA_TO_DEVICE) {
5158 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5159 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5160 } else {
5161 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5162 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5165 if (!user_memset)
5166 device_memory_addr += len;
5167 dma_desc_cnt++;
5168 new_dma_pkt++;
5171 if (!dma_desc_cnt) {
5172 dev_err(hdev->dev,
5173 "Error of 0 SG entries when patching DMA packet\n");
5174 return -EFAULT;
5177 /* Fix the last dma packet - wrcomp must be as user set it */
5178 new_dma_pkt--;
5179 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5181 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5183 return 0;
5186 static int gaudi_patch_cb(struct hl_device *hdev,
5187 struct hl_cs_parser *parser)
5189 u32 cb_parsed_length = 0;
5190 u32 cb_patched_cur_length = 0;
5191 int rc = 0;
5193 /* cb_user_size is more than 0 so loop will always be executed */
5194 while (cb_parsed_length < parser->user_cb_size) {
5195 enum packet_id pkt_id;
5196 u16 pkt_size;
5197 u32 new_pkt_size = 0;
5198 struct gaudi_packet *user_pkt, *kernel_pkt;
5200 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5201 kernel_pkt = parser->patched_cb->kernel_address +
5202 cb_patched_cur_length;
5204 pkt_id = (enum packet_id) (
5205 (le64_to_cpu(user_pkt->header) &
5206 PACKET_HEADER_PACKET_ID_MASK) >>
5207 PACKET_HEADER_PACKET_ID_SHIFT);
5209 if (!validate_packet_id(pkt_id)) {
5210 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5211 rc = -EINVAL;
5212 break;
5215 pkt_size = gaudi_packet_sizes[pkt_id];
5216 cb_parsed_length += pkt_size;
5217 if (cb_parsed_length > parser->user_cb_size) {
5218 dev_err(hdev->dev,
5219 "packet 0x%x is out of CB boundary\n", pkt_id);
5220 rc = -EINVAL;
5221 break;
5224 switch (pkt_id) {
5225 case PACKET_LIN_DMA:
5226 rc = gaudi_patch_dma_packet(hdev, parser,
5227 (struct packet_lin_dma *) user_pkt,
5228 (struct packet_lin_dma *) kernel_pkt,
5229 &new_pkt_size);
5230 cb_patched_cur_length += new_pkt_size;
5231 break;
5233 case PACKET_MSG_PROT:
5234 dev_err(hdev->dev,
5235 "User not allowed to use MSG_PROT\n");
5236 rc = -EPERM;
5237 break;
5239 case PACKET_CP_DMA:
5240 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5241 rc = -EPERM;
5242 break;
5244 case PACKET_STOP:
5245 dev_err(hdev->dev, "User not allowed to use STOP\n");
5246 rc = -EPERM;
5247 break;
5249 case PACKET_WREG_32:
5250 case PACKET_WREG_BULK:
5251 case PACKET_MSG_LONG:
5252 case PACKET_MSG_SHORT:
5253 case PACKET_REPEAT:
5254 case PACKET_FENCE:
5255 case PACKET_NOP:
5256 case PACKET_ARB_POINT:
5257 case PACKET_LOAD_AND_EXE:
5258 memcpy(kernel_pkt, user_pkt, pkt_size);
5259 cb_patched_cur_length += pkt_size;
5260 break;
5262 default:
5263 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5264 pkt_id);
5265 rc = -EINVAL;
5266 break;
5269 if (rc)
5270 break;
5273 return rc;
5276 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5277 struct hl_cs_parser *parser)
5279 u64 patched_cb_handle;
5280 u32 patched_cb_size;
5281 struct hl_cb *user_cb;
5282 int rc;
5285 * The new CB should have space at the end for two MSG_PROT pkt:
5286 * 1. A packet that will act as a completion packet
5287 * 2. A packet that will generate MSI interrupt
5289 parser->patched_cb_size = parser->user_cb_size +
5290 sizeof(struct packet_msg_prot) * 2;
5292 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5293 parser->patched_cb_size, false, false,
5294 &patched_cb_handle);
5296 if (rc) {
5297 dev_err(hdev->dev,
5298 "Failed to allocate patched CB for DMA CS %d\n",
5299 rc);
5300 return rc;
5303 patched_cb_handle >>= PAGE_SHIFT;
5304 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5305 (u32) patched_cb_handle);
5306 /* hl_cb_get should never fail here so use kernel WARN */
5307 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
5308 (u32) patched_cb_handle);
5309 if (!parser->patched_cb) {
5310 rc = -EFAULT;
5311 goto out;
5315 * The check that parser->user_cb_size <= parser->user_cb->size was done
5316 * in validate_queue_index().
5318 memcpy(parser->patched_cb->kernel_address,
5319 parser->user_cb->kernel_address,
5320 parser->user_cb_size);
5322 patched_cb_size = parser->patched_cb_size;
5324 /* Validate patched CB instead of user CB */
5325 user_cb = parser->user_cb;
5326 parser->user_cb = parser->patched_cb;
5327 rc = gaudi_validate_cb(hdev, parser, true);
5328 parser->user_cb = user_cb;
5330 if (rc) {
5331 hl_cb_put(parser->patched_cb);
5332 goto out;
5335 if (patched_cb_size != parser->patched_cb_size) {
5336 dev_err(hdev->dev, "user CB size mismatch\n");
5337 hl_cb_put(parser->patched_cb);
5338 rc = -EINVAL;
5339 goto out;
5342 out:
5344 * Always call cb destroy here because we still have 1 reference
5345 * to it by calling cb_get earlier. After the job will be completed,
5346 * cb_put will release it, but here we want to remove it from the
5347 * idr
5349 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5350 patched_cb_handle << PAGE_SHIFT);
5352 return rc;
5355 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5356 struct hl_cs_parser *parser)
5358 u64 patched_cb_handle;
5359 int rc;
5361 rc = gaudi_validate_cb(hdev, parser, false);
5363 if (rc)
5364 goto free_userptr;
5366 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5367 parser->patched_cb_size, false, false,
5368 &patched_cb_handle);
5369 if (rc) {
5370 dev_err(hdev->dev,
5371 "Failed to allocate patched CB for DMA CS %d\n", rc);
5372 goto free_userptr;
5375 patched_cb_handle >>= PAGE_SHIFT;
5376 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5377 (u32) patched_cb_handle);
5378 /* hl_cb_get should never fail here so use kernel WARN */
5379 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
5380 (u32) patched_cb_handle);
5381 if (!parser->patched_cb) {
5382 rc = -EFAULT;
5383 goto out;
5386 rc = gaudi_patch_cb(hdev, parser);
5388 if (rc)
5389 hl_cb_put(parser->patched_cb);
5391 out:
5393 * Always call cb destroy here because we still have 1 reference
5394 * to it by calling cb_get earlier. After the job will be completed,
5395 * cb_put will release it, but here we want to remove it from the
5396 * idr
5398 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5399 patched_cb_handle << PAGE_SHIFT);
5401 free_userptr:
5402 if (rc)
5403 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5404 return rc;
5407 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5408 struct hl_cs_parser *parser)
5410 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5411 struct gaudi_device *gaudi = hdev->asic_specific;
5412 u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5413 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5415 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5416 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5417 (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5418 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5419 parser->hw_queue_id);
5420 return -EINVAL;
5423 /* For internal queue jobs just check if CB address is valid */
5424 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5425 parser->user_cb_size,
5426 asic_prop->sram_user_base_address,
5427 asic_prop->sram_end_address))
5428 return 0;
5430 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5431 parser->user_cb_size,
5432 asic_prop->dram_user_base_address,
5433 asic_prop->dram_end_address))
5434 return 0;
5436 /* PMMU and HPMMU addresses are equal, check only one of them */
5437 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5438 parser->user_cb_size,
5439 asic_prop->pmmu.start_addr,
5440 asic_prop->pmmu.end_addr))
5441 return 0;
5443 dev_err(hdev->dev,
5444 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5445 parser->user_cb, parser->user_cb_size);
5447 return -EFAULT;
5450 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5452 struct gaudi_device *gaudi = hdev->asic_specific;
5454 if (parser->queue_type == QUEUE_TYPE_INT)
5455 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5457 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5458 return gaudi_parse_cb_mmu(hdev, parser);
5459 else
5460 return gaudi_parse_cb_no_mmu(hdev, parser);
5463 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5464 void *kernel_address, u32 len,
5465 u64 cq_addr, u32 cq_val, u32 msi_vec,
5466 bool eb)
5468 struct gaudi_device *gaudi = hdev->asic_specific;
5469 struct packet_msg_prot *cq_pkt;
5470 u32 tmp;
5472 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5474 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5475 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5477 if (eb)
5478 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5480 cq_pkt->ctl = cpu_to_le32(tmp);
5481 cq_pkt->value = cpu_to_le32(cq_val);
5482 cq_pkt->addr = cpu_to_le64(cq_addr);
5484 cq_pkt++;
5486 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5487 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5488 cq_pkt->ctl = cpu_to_le32(tmp);
5489 cq_pkt->value = cpu_to_le32(1);
5491 if (!gaudi->multi_msi_mode)
5492 msi_vec = 0;
5494 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
5497 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5499 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5502 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5503 u32 size, u64 val)
5505 struct packet_lin_dma *lin_dma_pkt;
5506 struct hl_cs_job *job;
5507 u32 cb_size, ctl, err_cause;
5508 struct hl_cb *cb;
5509 int rc;
5511 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5512 if (!cb)
5513 return -EFAULT;
5515 lin_dma_pkt = cb->kernel_address;
5516 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5517 cb_size = sizeof(*lin_dma_pkt);
5519 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5520 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5521 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5522 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5523 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5525 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5526 lin_dma_pkt->src_addr = cpu_to_le64(val);
5527 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5528 lin_dma_pkt->tsize = cpu_to_le32(size);
5530 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5531 if (!job) {
5532 dev_err(hdev->dev, "Failed to allocate a new job\n");
5533 rc = -ENOMEM;
5534 goto release_cb;
5537 /* Verify DMA is OK */
5538 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5539 if (err_cause && !hdev->init_done) {
5540 dev_dbg(hdev->dev,
5541 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5542 err_cause);
5543 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5546 job->id = 0;
5547 job->user_cb = cb;
5548 atomic_inc(&job->user_cb->cs_cnt);
5549 job->user_cb_size = cb_size;
5550 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5551 job->patched_cb = job->user_cb;
5552 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5554 hl_debugfs_add_job(hdev, job);
5556 rc = gaudi_send_job_on_qman0(hdev, job);
5557 hl_debugfs_remove_job(hdev, job);
5558 kfree(job);
5559 atomic_dec(&cb->cs_cnt);
5561 /* Verify DMA is OK */
5562 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5563 if (err_cause) {
5564 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5565 rc = -EIO;
5566 if (!hdev->init_done) {
5567 dev_dbg(hdev->dev,
5568 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5569 err_cause);
5570 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5574 release_cb:
5575 hl_cb_put(cb);
5576 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5578 return rc;
5581 static void gaudi_restore_sm_registers(struct hl_device *hdev)
5583 int i;
5585 for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
5586 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
5587 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
5588 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
5591 for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
5592 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
5593 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
5594 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
5597 i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
5599 for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
5600 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
5602 i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
5604 for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
5605 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
5608 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5610 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5611 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5612 int i;
5614 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5615 u64 sob_addr = CFG_BASE +
5616 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5617 (i * sob_delta);
5618 u32 dma_offset = i * DMA_CORE_OFFSET;
5620 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5621 lower_32_bits(sob_addr));
5622 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5623 upper_32_bits(sob_addr));
5624 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5626 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5627 * modified by the user for SRAM reduction
5629 if (i > 1)
5630 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5631 0x00000001);
5635 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5637 u32 qman_offset;
5638 int i;
5640 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5641 qman_offset = i * DMA_QMAN_OFFSET;
5642 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5645 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5646 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5647 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5650 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5651 qman_offset = i * TPC_QMAN_OFFSET;
5652 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5655 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5656 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5657 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5658 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5662 static void gaudi_restore_user_registers(struct hl_device *hdev)
5664 gaudi_restore_sm_registers(hdev);
5665 gaudi_restore_dma_registers(hdev);
5666 gaudi_restore_qm_registers(hdev);
5669 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5671 gaudi_restore_user_registers(hdev);
5673 return 0;
5676 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5678 struct asic_fixed_properties *prop = &hdev->asic_prop;
5679 struct gaudi_device *gaudi = hdev->asic_specific;
5680 u64 addr = prop->mmu_pgt_addr;
5681 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
5683 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5684 return 0;
5686 return gaudi_memset_device_memory(hdev, addr, size, 0);
5689 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5694 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
5696 struct asic_fixed_properties *prop = &hdev->asic_prop;
5697 struct gaudi_device *gaudi = hdev->asic_specific;
5698 u64 hbm_bar_addr;
5699 int rc = 0;
5701 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
5703 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
5704 (hdev->clock_gating_mask &
5705 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
5707 dev_err_ratelimited(hdev->dev,
5708 "Can't read register - clock gating is enabled!\n");
5709 rc = -EFAULT;
5710 } else {
5711 *val = RREG32(addr - CFG_BASE);
5714 } else if ((addr >= SRAM_BASE_ADDR) &&
5715 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
5716 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
5717 (addr - SRAM_BASE_ADDR));
5718 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
5719 u64 bar_base_addr = DRAM_PHYS_BASE +
5720 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
5722 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
5723 if (hbm_bar_addr != U64_MAX) {
5724 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
5725 (addr - bar_base_addr));
5727 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
5728 hbm_bar_addr);
5730 if (hbm_bar_addr == U64_MAX)
5731 rc = -EIO;
5732 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
5733 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
5734 } else {
5735 rc = -EFAULT;
5738 return rc;
5741 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
5743 struct asic_fixed_properties *prop = &hdev->asic_prop;
5744 struct gaudi_device *gaudi = hdev->asic_specific;
5745 u64 hbm_bar_addr;
5746 int rc = 0;
5748 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
5750 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
5751 (hdev->clock_gating_mask &
5752 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
5754 dev_err_ratelimited(hdev->dev,
5755 "Can't write register - clock gating is enabled!\n");
5756 rc = -EFAULT;
5757 } else {
5758 WREG32(addr - CFG_BASE, val);
5761 } else if ((addr >= SRAM_BASE_ADDR) &&
5762 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
5763 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
5764 (addr - SRAM_BASE_ADDR));
5765 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
5766 u64 bar_base_addr = DRAM_PHYS_BASE +
5767 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
5769 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
5770 if (hbm_bar_addr != U64_MAX) {
5771 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
5772 (addr - bar_base_addr));
5774 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
5775 hbm_bar_addr);
5777 if (hbm_bar_addr == U64_MAX)
5778 rc = -EIO;
5779 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
5780 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
5781 } else {
5782 rc = -EFAULT;
5785 return rc;
5788 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
5790 struct asic_fixed_properties *prop = &hdev->asic_prop;
5791 struct gaudi_device *gaudi = hdev->asic_specific;
5792 u64 hbm_bar_addr;
5793 int rc = 0;
5795 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
5797 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
5798 (hdev->clock_gating_mask &
5799 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
5801 dev_err_ratelimited(hdev->dev,
5802 "Can't read register - clock gating is enabled!\n");
5803 rc = -EFAULT;
5804 } else {
5805 u32 val_l = RREG32(addr - CFG_BASE);
5806 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
5808 *val = (((u64) val_h) << 32) | val_l;
5811 } else if ((addr >= SRAM_BASE_ADDR) &&
5812 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
5813 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
5814 (addr - SRAM_BASE_ADDR));
5815 } else if (addr <=
5816 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
5817 u64 bar_base_addr = DRAM_PHYS_BASE +
5818 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
5820 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
5821 if (hbm_bar_addr != U64_MAX) {
5822 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
5823 (addr - bar_base_addr));
5825 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
5826 hbm_bar_addr);
5828 if (hbm_bar_addr == U64_MAX)
5829 rc = -EIO;
5830 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
5831 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
5832 } else {
5833 rc = -EFAULT;
5836 return rc;
5839 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
5841 struct asic_fixed_properties *prop = &hdev->asic_prop;
5842 struct gaudi_device *gaudi = hdev->asic_specific;
5843 u64 hbm_bar_addr;
5844 int rc = 0;
5846 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
5848 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
5849 (hdev->clock_gating_mask &
5850 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
5852 dev_err_ratelimited(hdev->dev,
5853 "Can't write register - clock gating is enabled!\n");
5854 rc = -EFAULT;
5855 } else {
5856 WREG32(addr - CFG_BASE, lower_32_bits(val));
5857 WREG32(addr + sizeof(u32) - CFG_BASE,
5858 upper_32_bits(val));
5861 } else if ((addr >= SRAM_BASE_ADDR) &&
5862 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
5863 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
5864 (addr - SRAM_BASE_ADDR));
5865 } else if (addr <=
5866 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
5867 u64 bar_base_addr = DRAM_PHYS_BASE +
5868 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
5870 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
5871 if (hbm_bar_addr != U64_MAX) {
5872 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
5873 (addr - bar_base_addr));
5875 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
5876 hbm_bar_addr);
5878 if (hbm_bar_addr == U64_MAX)
5879 rc = -EIO;
5880 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
5881 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
5882 } else {
5883 rc = -EFAULT;
5886 return rc;
5889 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
5891 struct gaudi_device *gaudi = hdev->asic_specific;
5893 if (hdev->hard_reset_pending)
5894 return U64_MAX;
5896 return readq(hdev->pcie_bar[HBM_BAR_ID] +
5897 (addr - gaudi->hbm_bar_cur_addr));
5900 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
5902 struct gaudi_device *gaudi = hdev->asic_specific;
5904 if (hdev->hard_reset_pending)
5905 return;
5907 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
5908 (addr - gaudi->hbm_bar_cur_addr));
5911 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
5913 /* mask to zero the MMBP and ASID bits */
5914 WREG32_AND(reg, ~0x7FF);
5915 WREG32_OR(reg, asid);
5918 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
5920 struct gaudi_device *gaudi = hdev->asic_specific;
5922 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5923 return;
5925 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
5926 WARN(1, "asid %u is too big\n", asid);
5927 return;
5930 mutex_lock(&gaudi->clk_gate_mutex);
5932 hdev->asic_funcs->disable_clock_gating(hdev);
5934 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
5935 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
5936 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
5937 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
5938 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
5940 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
5941 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
5942 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
5943 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
5944 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
5946 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
5947 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
5948 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
5949 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
5950 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
5952 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
5953 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
5954 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
5955 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
5956 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
5958 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
5959 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
5960 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
5961 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
5962 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
5964 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
5965 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
5966 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
5967 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
5968 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
5970 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
5971 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
5972 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
5973 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
5974 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
5976 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
5977 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
5978 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
5979 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
5980 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
5982 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
5983 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
5984 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
5985 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
5986 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
5987 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
5988 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
5989 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
5991 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
5992 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
5993 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
5994 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
5995 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
5996 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
5997 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
5999 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6000 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6001 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6002 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6003 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6004 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6005 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6007 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6008 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6009 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6010 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6011 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6012 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6013 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6015 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6016 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6017 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6018 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6019 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6020 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6021 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6023 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6024 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6025 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6026 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6027 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6028 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6029 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6031 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6032 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6033 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6034 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6035 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6036 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6037 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6039 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6040 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6041 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6042 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6043 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6044 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6045 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6047 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6048 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6049 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6050 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6051 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6052 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6053 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6055 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6056 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6057 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6058 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6059 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6060 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6061 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6062 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6063 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6064 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6066 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6067 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6068 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6069 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6070 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6071 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6072 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6073 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6074 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6075 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6076 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6077 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6079 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC0) {
6080 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6081 asid);
6082 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6083 asid);
6084 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6085 asid);
6086 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6087 asid);
6088 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6089 asid);
6092 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC1) {
6093 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6094 asid);
6095 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6096 asid);
6097 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6098 asid);
6099 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6100 asid);
6101 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6102 asid);
6105 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC2) {
6106 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6107 asid);
6108 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6109 asid);
6110 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6111 asid);
6112 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6113 asid);
6114 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6115 asid);
6118 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC3) {
6119 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6120 asid);
6121 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6122 asid);
6123 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6124 asid);
6125 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6126 asid);
6127 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6128 asid);
6131 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC4) {
6132 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6133 asid);
6134 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6135 asid);
6136 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6137 asid);
6138 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6139 asid);
6140 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6141 asid);
6144 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC5) {
6145 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6146 asid);
6147 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6148 asid);
6149 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6150 asid);
6151 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6152 asid);
6153 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6154 asid);
6157 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC6) {
6158 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6159 asid);
6160 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6161 asid);
6162 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6163 asid);
6164 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6165 asid);
6166 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6167 asid);
6170 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC7) {
6171 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6172 asid);
6173 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6174 asid);
6175 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6176 asid);
6177 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6178 asid);
6179 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6180 asid);
6183 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC8) {
6184 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6185 asid);
6186 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6187 asid);
6188 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6189 asid);
6190 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6191 asid);
6192 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6193 asid);
6196 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC9) {
6197 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6198 asid);
6199 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6200 asid);
6201 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6202 asid);
6203 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6204 asid);
6205 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6206 asid);
6209 hdev->asic_funcs->set_clock_gating(hdev);
6211 mutex_unlock(&gaudi->clk_gate_mutex);
6214 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6215 struct hl_cs_job *job)
6217 struct packet_msg_prot *fence_pkt;
6218 u32 *fence_ptr;
6219 dma_addr_t fence_dma_addr;
6220 struct hl_cb *cb;
6221 u32 tmp, timeout, dma_offset;
6222 int rc;
6224 if (hdev->pldm)
6225 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6226 else
6227 timeout = HL_DEVICE_TIMEOUT_USEC;
6229 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
6230 dev_err_ratelimited(hdev->dev,
6231 "Can't send driver job on QMAN0 because the device is not idle\n");
6232 return -EBUSY;
6235 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6236 &fence_dma_addr);
6237 if (!fence_ptr) {
6238 dev_err(hdev->dev,
6239 "Failed to allocate fence memory for QMAN0\n");
6240 return -ENOMEM;
6243 cb = job->patched_cb;
6245 fence_pkt = cb->kernel_address +
6246 job->job_cb_size - sizeof(struct packet_msg_prot);
6248 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6249 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6250 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6252 fence_pkt->ctl = cpu_to_le32(tmp);
6253 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6254 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6256 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6258 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6260 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6261 job->job_cb_size, cb->bus_address);
6262 if (rc) {
6263 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6264 goto free_fence_ptr;
6267 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6268 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6269 timeout, true);
6271 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6273 if (rc == -ETIMEDOUT) {
6274 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6275 goto free_fence_ptr;
6278 free_fence_ptr:
6279 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6280 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6282 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6283 fence_dma_addr);
6284 return rc;
6287 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6289 if (event_type >= GAUDI_EVENT_SIZE)
6290 goto event_not_supported;
6292 if (!gaudi_irq_map_table[event_type].valid)
6293 goto event_not_supported;
6295 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6297 return;
6299 event_not_supported:
6300 snprintf(desc, size, "N/A");
6303 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6304 u32 x_y, bool is_write)
6306 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6308 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6309 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6311 switch (x_y) {
6312 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6313 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6314 dma_id[0] = 0;
6315 dma_id[1] = 2;
6316 break;
6317 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6318 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6319 dma_id[0] = 1;
6320 dma_id[1] = 3;
6321 break;
6322 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6323 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6324 dma_id[0] = 4;
6325 dma_id[1] = 6;
6326 break;
6327 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6328 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6329 dma_id[0] = 5;
6330 dma_id[1] = 7;
6331 break;
6332 default:
6333 goto unknown_initiator;
6336 for (i = 0 ; i < 2 ; i++) {
6337 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6338 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6341 switch (x_y) {
6342 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6343 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6344 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6345 return "DMA0";
6346 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6347 return "DMA2";
6348 else
6349 return "DMA0 or DMA2";
6350 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6351 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6352 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6353 return "DMA1";
6354 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6355 return "DMA3";
6356 else
6357 return "DMA1 or DMA3";
6358 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6359 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6360 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6361 return "DMA4";
6362 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6363 return "DMA6";
6364 else
6365 return "DMA4 or DMA6";
6366 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6367 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6368 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6369 return "DMA5";
6370 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6371 return "DMA7";
6372 else
6373 return "DMA5 or DMA7";
6376 unknown_initiator:
6377 return "unknown initiator";
6380 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
6381 bool is_write)
6383 u32 val, x_y, axi_id;
6385 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6386 RREG32(mmMMU_UP_RAZWI_READ_ID);
6387 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6388 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6389 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6390 RAZWI_INITIATOR_AXI_ID_SHIFT);
6392 switch (x_y) {
6393 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6394 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6395 return "TPC0";
6396 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6397 return "NIC0";
6398 break;
6399 case RAZWI_INITIATOR_ID_X_Y_TPC1:
6400 return "TPC1";
6401 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6402 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6403 return "MME0";
6404 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6405 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6406 return "MME1";
6407 case RAZWI_INITIATOR_ID_X_Y_TPC2:
6408 return "TPC2";
6409 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6410 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6411 return "TPC3";
6412 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6413 return "PCI";
6414 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6415 return "CPU";
6416 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6417 return "PSOC";
6418 break;
6419 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6420 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6421 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6422 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6423 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6424 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6425 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6426 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6427 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
6428 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6429 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6430 return "TPC4";
6431 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6432 return "NIC1";
6433 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6434 return "NIC2";
6435 break;
6436 case RAZWI_INITIATOR_ID_X_Y_TPC5:
6437 return "TPC5";
6438 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6439 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6440 return "MME2";
6441 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6442 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6443 return "MME3";
6444 case RAZWI_INITIATOR_ID_X_Y_TPC6:
6445 return "TPC6";
6446 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6447 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6448 return "TPC7";
6449 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6450 return "NIC4";
6451 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6452 return "NIC5";
6453 break;
6454 default:
6455 break;
6458 dev_err(hdev->dev,
6459 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6460 val,
6461 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6462 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6463 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6464 RAZWI_INITIATOR_AXI_ID_MASK);
6466 return "unknown initiator";
6469 static void gaudi_print_razwi_info(struct hl_device *hdev)
6471 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6472 dev_err_ratelimited(hdev->dev,
6473 "RAZWI event caused by illegal write of %s\n",
6474 gaudi_get_razwi_initiator_name(hdev, true));
6475 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6478 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6479 dev_err_ratelimited(hdev->dev,
6480 "RAZWI event caused by illegal read of %s\n",
6481 gaudi_get_razwi_initiator_name(hdev, false));
6482 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6486 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
6488 struct gaudi_device *gaudi = hdev->asic_specific;
6489 u64 addr;
6490 u32 val;
6492 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6493 return;
6495 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6496 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6497 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6498 addr <<= 32;
6499 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6501 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
6502 addr);
6504 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6507 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6508 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6509 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6510 addr <<= 32;
6511 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6513 dev_err_ratelimited(hdev->dev,
6514 "MMU access error on va 0x%llx\n", addr);
6516 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6521 * +-------------------+------------------------------------------------------+
6522 * | Configuration Reg | Description |
6523 * | Address | |
6524 * +-------------------+------------------------------------------------------+
6525 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
6526 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
6527 * | |0xF34 memory wrappers 63:32 |
6528 * | |0xF38 memory wrappers 95:64 |
6529 * | |0xF3C memory wrappers 127:96 |
6530 * +-------------------+------------------------------------------------------+
6531 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
6532 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
6533 * | |0xF44 memory wrappers 63:32 |
6534 * | |0xF48 memory wrappers 95:64 |
6535 * | |0xF4C memory wrappers 127:96 |
6536 * +-------------------+------------------------------------------------------+
6538 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6539 struct ecc_info_extract_params *params, u64 *ecc_address,
6540 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6542 struct gaudi_device *gaudi = hdev->asic_specific;
6543 u32 i, num_mem_regs, reg, err_bit;
6544 u64 err_addr, err_word = 0;
6545 int rc = 0;
6547 num_mem_regs = params->num_memories / 32 +
6548 ((params->num_memories % 32) ? 1 : 0);
6550 if (params->block_address >= CFG_BASE)
6551 params->block_address -= CFG_BASE;
6553 if (params->derr)
6554 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6555 else
6556 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6558 if (params->disable_clock_gating) {
6559 mutex_lock(&gaudi->clk_gate_mutex);
6560 hdev->asic_funcs->disable_clock_gating(hdev);
6563 /* Set invalid wrapper index */
6564 *memory_wrapper_idx = 0xFF;
6566 /* Iterate through memory wrappers, a single bit must be set */
6567 for (i = 0 ; i < num_mem_regs ; i++) {
6568 err_addr += i * 4;
6569 err_word = RREG32(err_addr);
6570 if (err_word) {
6571 err_bit = __ffs(err_word);
6572 *memory_wrapper_idx = err_bit + (32 * i);
6573 break;
6577 if (*memory_wrapper_idx == 0xFF) {
6578 dev_err(hdev->dev, "ECC error information cannot be found\n");
6579 rc = -EINVAL;
6580 goto enable_clk_gate;
6583 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6584 *memory_wrapper_idx);
6586 *ecc_address =
6587 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6588 *ecc_syndrom =
6589 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6591 /* Clear error indication */
6592 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6593 if (params->derr)
6594 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6595 else
6596 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6598 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6600 enable_clk_gate:
6601 if (params->disable_clock_gating) {
6602 hdev->asic_funcs->set_clock_gating(hdev);
6604 mutex_unlock(&gaudi->clk_gate_mutex);
6607 return rc;
6610 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
6611 const char *qm_name,
6612 u64 glbl_sts_addr,
6613 u64 arb_err_addr)
6615 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
6616 char reg_desc[32];
6618 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
6619 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
6620 glbl_sts_clr_val = 0;
6621 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
6623 if (!glbl_sts_val)
6624 continue;
6626 if (i == QMAN_STREAMS)
6627 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
6628 else
6629 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
6631 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
6632 if (glbl_sts_val & BIT(j)) {
6633 dev_err_ratelimited(hdev->dev,
6634 "%s %s. err cause: %s\n",
6635 qm_name, reg_desc,
6636 gaudi_qman_error_cause[j]);
6637 glbl_sts_clr_val |= BIT(j);
6641 /* Write 1 clear errors */
6642 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
6645 arb_err_val = RREG32(arb_err_addr);
6647 if (!arb_err_val)
6648 return;
6650 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
6651 if (arb_err_val & BIT(j)) {
6652 dev_err_ratelimited(hdev->dev,
6653 "%s ARB_ERR. err cause: %s\n",
6654 qm_name,
6655 gaudi_qman_arb_error_cause[j]);
6660 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
6661 struct hl_eq_ecc_data *ecc_data)
6663 struct ecc_info_extract_params params;
6664 u64 ecc_address = 0, ecc_syndrom = 0;
6665 u8 index, memory_wrapper_idx = 0;
6666 bool extract_info_from_fw;
6667 int rc;
6669 switch (event_type) {
6670 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
6671 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
6672 extract_info_from_fw = true;
6673 break;
6674 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
6675 index = event_type - GAUDI_EVENT_TPC0_SERR;
6676 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
6677 params.num_memories = 90;
6678 params.derr = false;
6679 params.disable_clock_gating = true;
6680 extract_info_from_fw = false;
6681 break;
6682 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
6683 index = event_type - GAUDI_EVENT_TPC0_DERR;
6684 params.block_address =
6685 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
6686 params.num_memories = 90;
6687 params.derr = true;
6688 params.disable_clock_gating = true;
6689 extract_info_from_fw = false;
6690 break;
6691 case GAUDI_EVENT_MME0_ACC_SERR:
6692 case GAUDI_EVENT_MME1_ACC_SERR:
6693 case GAUDI_EVENT_MME2_ACC_SERR:
6694 case GAUDI_EVENT_MME3_ACC_SERR:
6695 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
6696 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
6697 params.num_memories = 128;
6698 params.derr = false;
6699 params.disable_clock_gating = true;
6700 extract_info_from_fw = false;
6701 break;
6702 case GAUDI_EVENT_MME0_ACC_DERR:
6703 case GAUDI_EVENT_MME1_ACC_DERR:
6704 case GAUDI_EVENT_MME2_ACC_DERR:
6705 case GAUDI_EVENT_MME3_ACC_DERR:
6706 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
6707 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
6708 params.num_memories = 128;
6709 params.derr = true;
6710 params.disable_clock_gating = true;
6711 extract_info_from_fw = false;
6712 break;
6713 case GAUDI_EVENT_MME0_SBAB_SERR:
6714 case GAUDI_EVENT_MME1_SBAB_SERR:
6715 case GAUDI_EVENT_MME2_SBAB_SERR:
6716 case GAUDI_EVENT_MME3_SBAB_SERR:
6717 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
6718 params.block_address =
6719 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
6720 params.num_memories = 33;
6721 params.derr = false;
6722 params.disable_clock_gating = true;
6723 extract_info_from_fw = false;
6724 break;
6725 case GAUDI_EVENT_MME0_SBAB_DERR:
6726 case GAUDI_EVENT_MME1_SBAB_DERR:
6727 case GAUDI_EVENT_MME2_SBAB_DERR:
6728 case GAUDI_EVENT_MME3_SBAB_DERR:
6729 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
6730 params.block_address =
6731 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
6732 params.num_memories = 33;
6733 params.derr = true;
6734 params.disable_clock_gating = true;
6735 extract_info_from_fw = false;
6736 break;
6737 default:
6738 return;
6741 if (extract_info_from_fw) {
6742 ecc_address = le64_to_cpu(ecc_data->ecc_address);
6743 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
6744 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
6745 } else {
6746 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
6747 &ecc_syndrom, &memory_wrapper_idx);
6748 if (rc)
6749 return;
6752 dev_err(hdev->dev,
6753 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
6754 ecc_address, ecc_syndrom, memory_wrapper_idx);
6757 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
6759 u64 glbl_sts_addr, arb_err_addr;
6760 u8 index;
6761 char desc[32];
6763 switch (event_type) {
6764 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
6765 index = event_type - GAUDI_EVENT_TPC0_QM;
6766 glbl_sts_addr =
6767 mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
6768 arb_err_addr =
6769 mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
6770 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
6771 break;
6772 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
6773 index = event_type - GAUDI_EVENT_MME0_QM;
6774 glbl_sts_addr =
6775 mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
6776 arb_err_addr =
6777 mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
6778 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
6779 break;
6780 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
6781 index = event_type - GAUDI_EVENT_DMA0_QM;
6782 glbl_sts_addr =
6783 mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
6784 arb_err_addr =
6785 mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
6786 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
6787 break;
6788 case GAUDI_EVENT_NIC0_QM0:
6789 glbl_sts_addr = mmNIC0_QM0_GLBL_STS1_0;
6790 arb_err_addr = mmNIC0_QM0_ARB_ERR_CAUSE;
6791 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
6792 break;
6793 case GAUDI_EVENT_NIC0_QM1:
6794 glbl_sts_addr = mmNIC0_QM1_GLBL_STS1_0;
6795 arb_err_addr = mmNIC0_QM1_ARB_ERR_CAUSE;
6796 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
6797 break;
6798 case GAUDI_EVENT_NIC1_QM0:
6799 glbl_sts_addr = mmNIC1_QM0_GLBL_STS1_0;
6800 arb_err_addr = mmNIC1_QM0_ARB_ERR_CAUSE;
6801 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
6802 break;
6803 case GAUDI_EVENT_NIC1_QM1:
6804 glbl_sts_addr = mmNIC1_QM1_GLBL_STS1_0;
6805 arb_err_addr = mmNIC1_QM1_ARB_ERR_CAUSE;
6806 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
6807 break;
6808 case GAUDI_EVENT_NIC2_QM0:
6809 glbl_sts_addr = mmNIC2_QM0_GLBL_STS1_0;
6810 arb_err_addr = mmNIC2_QM0_ARB_ERR_CAUSE;
6811 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
6812 break;
6813 case GAUDI_EVENT_NIC2_QM1:
6814 glbl_sts_addr = mmNIC2_QM1_GLBL_STS1_0;
6815 arb_err_addr = mmNIC2_QM1_ARB_ERR_CAUSE;
6816 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
6817 break;
6818 case GAUDI_EVENT_NIC3_QM0:
6819 glbl_sts_addr = mmNIC3_QM0_GLBL_STS1_0;
6820 arb_err_addr = mmNIC3_QM0_ARB_ERR_CAUSE;
6821 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
6822 break;
6823 case GAUDI_EVENT_NIC3_QM1:
6824 glbl_sts_addr = mmNIC3_QM1_GLBL_STS1_0;
6825 arb_err_addr = mmNIC3_QM1_ARB_ERR_CAUSE;
6826 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
6827 break;
6828 case GAUDI_EVENT_NIC4_QM0:
6829 glbl_sts_addr = mmNIC4_QM0_GLBL_STS1_0;
6830 arb_err_addr = mmNIC4_QM0_ARB_ERR_CAUSE;
6831 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
6832 break;
6833 case GAUDI_EVENT_NIC4_QM1:
6834 glbl_sts_addr = mmNIC4_QM1_GLBL_STS1_0;
6835 arb_err_addr = mmNIC4_QM1_ARB_ERR_CAUSE;
6836 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
6837 break;
6838 default:
6839 return;
6842 gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
6845 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
6846 bool razwi)
6848 char desc[64] = "";
6850 gaudi_get_event_desc(event_type, desc, sizeof(desc));
6851 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
6852 event_type, desc);
6854 if (razwi) {
6855 gaudi_print_razwi_info(hdev);
6856 gaudi_print_mmu_error_info(hdev);
6860 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
6862 struct gaudi_device *gaudi = hdev->asic_specific;
6864 /* Unmask all IRQs since some could have been received
6865 * during the soft reset
6867 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
6870 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
6871 struct hl_eq_hbm_ecc_data *hbm_ecc_data)
6873 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
6874 int err = 0;
6876 if (!hdev->asic_prop.fw_security_disabled) {
6877 if (!hbm_ecc_data) {
6878 dev_err(hdev->dev, "No FW ECC data");
6879 return 0;
6882 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
6883 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6884 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
6885 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6886 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
6887 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6888 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
6889 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6890 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
6891 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6892 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
6893 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6894 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
6895 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6897 dev_err(hdev->dev,
6898 "HBM%d pc%d ECC: TYPE=%d, WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
6899 device, ch, type, wr_par, rd_par, ca_par, serr, derr);
6901 err = 1;
6903 return 0;
6906 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
6907 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
6908 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
6909 val = (val & 0xFF) | ((val >> 8) & 0xFF);
6910 if (val) {
6911 err = 1;
6912 dev_err(hdev->dev,
6913 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
6914 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
6915 (val >> 2) & 0x1, (val >> 3) & 0x1,
6916 (val >> 4) & 0x1);
6918 val2 = RREG32(base + ch * 0x1000 + 0x060);
6919 dev_err(hdev->dev,
6920 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
6921 device, ch * 2,
6922 RREG32(base + ch * 0x1000 + 0x064),
6923 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
6924 (val2 & 0xFF0000) >> 16,
6925 (val2 & 0xFF000000) >> 24);
6928 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
6929 val = (val & 0xFF) | ((val >> 8) & 0xFF);
6930 if (val) {
6931 err = 1;
6932 dev_err(hdev->dev,
6933 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
6934 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
6935 (val >> 2) & 0x1, (val >> 3) & 0x1,
6936 (val >> 4) & 0x1);
6938 val2 = RREG32(base + ch * 0x1000 + 0x070);
6939 dev_err(hdev->dev,
6940 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
6941 device, ch * 2 + 1,
6942 RREG32(base + ch * 0x1000 + 0x074),
6943 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
6944 (val2 & 0xFF0000) >> 16,
6945 (val2 & 0xFF000000) >> 24);
6948 /* Clear interrupts */
6949 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
6950 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
6951 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
6952 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
6953 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
6954 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
6957 val = RREG32(base + 0x8F30);
6958 val2 = RREG32(base + 0x8F34);
6959 if (val | val2) {
6960 err = 1;
6961 dev_err(hdev->dev,
6962 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
6963 device, val, val2);
6965 val = RREG32(base + 0x8F40);
6966 val2 = RREG32(base + 0x8F44);
6967 if (val | val2) {
6968 err = 1;
6969 dev_err(hdev->dev,
6970 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
6971 device, val, val2);
6974 return err;
6977 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
6979 switch (hbm_event_type) {
6980 case GAUDI_EVENT_HBM0_SPI_0:
6981 case GAUDI_EVENT_HBM0_SPI_1:
6982 return 0;
6983 case GAUDI_EVENT_HBM1_SPI_0:
6984 case GAUDI_EVENT_HBM1_SPI_1:
6985 return 1;
6986 case GAUDI_EVENT_HBM2_SPI_0:
6987 case GAUDI_EVENT_HBM2_SPI_1:
6988 return 2;
6989 case GAUDI_EVENT_HBM3_SPI_0:
6990 case GAUDI_EVENT_HBM3_SPI_1:
6991 return 3;
6992 default:
6993 break;
6996 /* Should never happen */
6997 return 0;
7000 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7001 char *interrupt_name)
7003 struct gaudi_device *gaudi = hdev->asic_specific;
7004 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7005 bool soft_reset_required = false;
7007 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7008 * gating, and thus cannot be done in CPU-CP and should be done instead
7009 * by the driver.
7012 mutex_lock(&gaudi->clk_gate_mutex);
7014 hdev->asic_funcs->disable_clock_gating(hdev);
7016 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7017 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7019 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7020 if (tpc_interrupts_cause & BIT(i)) {
7021 dev_err_ratelimited(hdev->dev,
7022 "TPC%d_%s interrupt cause: %s\n",
7023 tpc_id, interrupt_name,
7024 gaudi_tpc_interrupts_cause[i]);
7025 /* If this is QM error, we need to soft-reset */
7026 if (i == 15)
7027 soft_reset_required = true;
7030 /* Clear interrupts */
7031 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7033 hdev->asic_funcs->set_clock_gating(hdev);
7035 mutex_unlock(&gaudi->clk_gate_mutex);
7037 return soft_reset_required;
7040 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7042 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7045 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7047 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7050 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7051 u16 event_type)
7053 switch (event_type) {
7054 case GAUDI_EVENT_FIX_POWER_ENV_S:
7055 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
7056 dev_info_ratelimited(hdev->dev,
7057 "Clock throttling due to power consumption\n");
7058 break;
7060 case GAUDI_EVENT_FIX_POWER_ENV_E:
7061 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
7062 dev_info_ratelimited(hdev->dev,
7063 "Power envelop is safe, back to optimal clock\n");
7064 break;
7066 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7067 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
7068 dev_info_ratelimited(hdev->dev,
7069 "Clock throttling due to overheating\n");
7070 break;
7072 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7073 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
7074 dev_info_ratelimited(hdev->dev,
7075 "Thermal envelop is safe, back to optimal clock\n");
7076 break;
7078 default:
7079 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7080 event_type);
7081 break;
7085 static void gaudi_handle_eqe(struct hl_device *hdev,
7086 struct hl_eq_entry *eq_entry)
7088 struct gaudi_device *gaudi = hdev->asic_specific;
7089 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7090 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7091 >> EQ_CTL_EVENT_TYPE_SHIFT);
7092 u8 cause;
7093 bool reset_required;
7095 gaudi->events_stat[event_type]++;
7096 gaudi->events_stat_aggregate[event_type]++;
7098 switch (event_type) {
7099 case GAUDI_EVENT_PCIE_CORE_DERR:
7100 case GAUDI_EVENT_PCIE_IF_DERR:
7101 case GAUDI_EVENT_PCIE_PHY_DERR:
7102 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7103 case GAUDI_EVENT_MME0_ACC_DERR:
7104 case GAUDI_EVENT_MME0_SBAB_DERR:
7105 case GAUDI_EVENT_MME1_ACC_DERR:
7106 case GAUDI_EVENT_MME1_SBAB_DERR:
7107 case GAUDI_EVENT_MME2_ACC_DERR:
7108 case GAUDI_EVENT_MME2_SBAB_DERR:
7109 case GAUDI_EVENT_MME3_ACC_DERR:
7110 case GAUDI_EVENT_MME3_SBAB_DERR:
7111 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7112 fallthrough;
7113 case GAUDI_EVENT_CPU_IF_ECC_DERR:
7114 case GAUDI_EVENT_PSOC_MEM_DERR:
7115 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7116 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7117 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7118 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7119 case GAUDI_EVENT_MMU_DERR:
7120 gaudi_print_irq_info(hdev, event_type, true);
7121 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7122 if (hdev->hard_reset_on_fw_events)
7123 hl_device_reset(hdev, true, false);
7124 break;
7126 case GAUDI_EVENT_GIC500:
7127 case GAUDI_EVENT_AXI_ECC:
7128 case GAUDI_EVENT_L2_RAM_ECC:
7129 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7130 gaudi_print_irq_info(hdev, event_type, false);
7131 if (hdev->hard_reset_on_fw_events)
7132 hl_device_reset(hdev, true, false);
7133 break;
7135 case GAUDI_EVENT_HBM0_SPI_0:
7136 case GAUDI_EVENT_HBM1_SPI_0:
7137 case GAUDI_EVENT_HBM2_SPI_0:
7138 case GAUDI_EVENT_HBM3_SPI_0:
7139 gaudi_print_irq_info(hdev, event_type, false);
7140 gaudi_hbm_read_interrupts(hdev,
7141 gaudi_hbm_event_to_dev(event_type),
7142 &eq_entry->hbm_ecc_data);
7143 if (hdev->hard_reset_on_fw_events)
7144 hl_device_reset(hdev, true, false);
7145 break;
7147 case GAUDI_EVENT_HBM0_SPI_1:
7148 case GAUDI_EVENT_HBM1_SPI_1:
7149 case GAUDI_EVENT_HBM2_SPI_1:
7150 case GAUDI_EVENT_HBM3_SPI_1:
7151 gaudi_print_irq_info(hdev, event_type, false);
7152 gaudi_hbm_read_interrupts(hdev,
7153 gaudi_hbm_event_to_dev(event_type),
7154 &eq_entry->hbm_ecc_data);
7155 break;
7157 case GAUDI_EVENT_TPC0_DEC:
7158 case GAUDI_EVENT_TPC1_DEC:
7159 case GAUDI_EVENT_TPC2_DEC:
7160 case GAUDI_EVENT_TPC3_DEC:
7161 case GAUDI_EVENT_TPC4_DEC:
7162 case GAUDI_EVENT_TPC5_DEC:
7163 case GAUDI_EVENT_TPC6_DEC:
7164 case GAUDI_EVENT_TPC7_DEC:
7165 gaudi_print_irq_info(hdev, event_type, true);
7166 reset_required = gaudi_tpc_read_interrupts(hdev,
7167 tpc_dec_event_to_tpc_id(event_type),
7168 "AXI_SLV_DEC_Error");
7169 if (reset_required) {
7170 dev_err(hdev->dev, "hard reset required due to %s\n",
7171 gaudi_irq_map_table[event_type].name);
7173 if (hdev->hard_reset_on_fw_events)
7174 hl_device_reset(hdev, true, false);
7175 } else {
7176 hl_fw_unmask_irq(hdev, event_type);
7178 break;
7180 case GAUDI_EVENT_TPC0_KRN_ERR:
7181 case GAUDI_EVENT_TPC1_KRN_ERR:
7182 case GAUDI_EVENT_TPC2_KRN_ERR:
7183 case GAUDI_EVENT_TPC3_KRN_ERR:
7184 case GAUDI_EVENT_TPC4_KRN_ERR:
7185 case GAUDI_EVENT_TPC5_KRN_ERR:
7186 case GAUDI_EVENT_TPC6_KRN_ERR:
7187 case GAUDI_EVENT_TPC7_KRN_ERR:
7188 gaudi_print_irq_info(hdev, event_type, true);
7189 reset_required = gaudi_tpc_read_interrupts(hdev,
7190 tpc_krn_event_to_tpc_id(event_type),
7191 "KRN_ERR");
7192 if (reset_required) {
7193 dev_err(hdev->dev, "hard reset required due to %s\n",
7194 gaudi_irq_map_table[event_type].name);
7196 if (hdev->hard_reset_on_fw_events)
7197 hl_device_reset(hdev, true, false);
7198 } else {
7199 hl_fw_unmask_irq(hdev, event_type);
7201 break;
7203 case GAUDI_EVENT_PCIE_CORE_SERR:
7204 case GAUDI_EVENT_PCIE_IF_SERR:
7205 case GAUDI_EVENT_PCIE_PHY_SERR:
7206 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7207 case GAUDI_EVENT_MME0_ACC_SERR:
7208 case GAUDI_EVENT_MME0_SBAB_SERR:
7209 case GAUDI_EVENT_MME1_ACC_SERR:
7210 case GAUDI_EVENT_MME1_SBAB_SERR:
7211 case GAUDI_EVENT_MME2_ACC_SERR:
7212 case GAUDI_EVENT_MME2_SBAB_SERR:
7213 case GAUDI_EVENT_MME3_ACC_SERR:
7214 case GAUDI_EVENT_MME3_SBAB_SERR:
7215 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7216 case GAUDI_EVENT_CPU_IF_ECC_SERR:
7217 case GAUDI_EVENT_PSOC_MEM_SERR:
7218 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7219 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7220 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7221 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7222 fallthrough;
7223 case GAUDI_EVENT_MMU_SERR:
7224 gaudi_print_irq_info(hdev, event_type, true);
7225 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7226 hl_fw_unmask_irq(hdev, event_type);
7227 break;
7229 case GAUDI_EVENT_PCIE_DEC:
7230 case GAUDI_EVENT_MME0_WBC_RSP:
7231 case GAUDI_EVENT_MME0_SBAB0_RSP:
7232 case GAUDI_EVENT_MME1_WBC_RSP:
7233 case GAUDI_EVENT_MME1_SBAB0_RSP:
7234 case GAUDI_EVENT_MME2_WBC_RSP:
7235 case GAUDI_EVENT_MME2_SBAB0_RSP:
7236 case GAUDI_EVENT_MME3_WBC_RSP:
7237 case GAUDI_EVENT_MME3_SBAB0_RSP:
7238 case GAUDI_EVENT_CPU_AXI_SPLITTER:
7239 case GAUDI_EVENT_PSOC_AXI_DEC:
7240 case GAUDI_EVENT_PSOC_PRSTN_FALL:
7241 case GAUDI_EVENT_MMU_PAGE_FAULT:
7242 case GAUDI_EVENT_MMU_WR_PERM:
7243 case GAUDI_EVENT_RAZWI_OR_ADC:
7244 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7245 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7246 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7247 fallthrough;
7248 case GAUDI_EVENT_NIC0_QM0:
7249 case GAUDI_EVENT_NIC0_QM1:
7250 case GAUDI_EVENT_NIC1_QM0:
7251 case GAUDI_EVENT_NIC1_QM1:
7252 case GAUDI_EVENT_NIC2_QM0:
7253 case GAUDI_EVENT_NIC2_QM1:
7254 case GAUDI_EVENT_NIC3_QM0:
7255 case GAUDI_EVENT_NIC3_QM1:
7256 case GAUDI_EVENT_NIC4_QM0:
7257 case GAUDI_EVENT_NIC4_QM1:
7258 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7259 gaudi_print_irq_info(hdev, event_type, true);
7260 gaudi_handle_qman_err(hdev, event_type);
7261 hl_fw_unmask_irq(hdev, event_type);
7262 break;
7264 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7265 gaudi_print_irq_info(hdev, event_type, true);
7266 if (hdev->hard_reset_on_fw_events)
7267 hl_device_reset(hdev, true, false);
7268 break;
7270 case GAUDI_EVENT_TPC0_BMON_SPMU:
7271 case GAUDI_EVENT_TPC1_BMON_SPMU:
7272 case GAUDI_EVENT_TPC2_BMON_SPMU:
7273 case GAUDI_EVENT_TPC3_BMON_SPMU:
7274 case GAUDI_EVENT_TPC4_BMON_SPMU:
7275 case GAUDI_EVENT_TPC5_BMON_SPMU:
7276 case GAUDI_EVENT_TPC6_BMON_SPMU:
7277 case GAUDI_EVENT_TPC7_BMON_SPMU:
7278 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7279 gaudi_print_irq_info(hdev, event_type, false);
7280 hl_fw_unmask_irq(hdev, event_type);
7281 break;
7283 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7284 gaudi_print_clk_change_info(hdev, event_type);
7285 hl_fw_unmask_irq(hdev, event_type);
7286 break;
7288 case GAUDI_EVENT_PSOC_GPIO_U16_0:
7289 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7290 dev_err(hdev->dev,
7291 "Received high temp H/W interrupt %d (cause %d)\n",
7292 event_type, cause);
7293 break;
7295 default:
7296 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7297 event_type);
7298 break;
7302 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
7303 u32 *size)
7305 struct gaudi_device *gaudi = hdev->asic_specific;
7307 if (aggregate) {
7308 *size = (u32) sizeof(gaudi->events_stat_aggregate);
7309 return gaudi->events_stat_aggregate;
7312 *size = (u32) sizeof(gaudi->events_stat);
7313 return gaudi->events_stat;
7316 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
7317 u32 flags)
7319 struct gaudi_device *gaudi = hdev->asic_specific;
7320 u32 status, timeout_usec;
7321 int rc;
7323 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7324 hdev->hard_reset_pending)
7325 return 0;
7327 if (hdev->pldm)
7328 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7329 else
7330 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7332 mutex_lock(&hdev->mmu_cache_lock);
7334 /* L0 & L1 invalidation */
7335 WREG32(mmSTLB_INV_PS, 3);
7336 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7337 WREG32(mmSTLB_INV_PS, 2);
7339 rc = hl_poll_timeout(
7340 hdev,
7341 mmSTLB_INV_PS,
7342 status,
7343 !status,
7344 1000,
7345 timeout_usec);
7347 WREG32(mmSTLB_INV_SET, 0);
7349 mutex_unlock(&hdev->mmu_cache_lock);
7351 if (rc) {
7352 dev_err_ratelimited(hdev->dev,
7353 "MMU cache invalidation timeout\n");
7354 hl_device_reset(hdev, true, false);
7357 return rc;
7360 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7361 bool is_hard, u32 asid, u64 va, u64 size)
7363 struct gaudi_device *gaudi = hdev->asic_specific;
7364 u32 status, timeout_usec;
7365 u32 inv_data;
7366 u32 pi;
7367 int rc;
7369 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7370 hdev->hard_reset_pending)
7371 return 0;
7373 mutex_lock(&hdev->mmu_cache_lock);
7375 if (hdev->pldm)
7376 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7377 else
7378 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7381 * TODO: currently invalidate entire L0 & L1 as in regular hard
7382 * invalidation. Need to apply invalidation of specific cache
7383 * lines with mask of ASID & VA & size.
7384 * Note that L1 with be flushed entirely in any case.
7387 /* L0 & L1 invalidation */
7388 inv_data = RREG32(mmSTLB_CACHE_INV);
7389 /* PI is 8 bit */
7390 pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
7391 WREG32(mmSTLB_CACHE_INV,
7392 (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
7394 rc = hl_poll_timeout(
7395 hdev,
7396 mmSTLB_INV_CONSUMER_INDEX,
7397 status,
7398 status == pi,
7399 1000,
7400 timeout_usec);
7402 mutex_unlock(&hdev->mmu_cache_lock);
7404 if (rc) {
7405 dev_err_ratelimited(hdev->dev,
7406 "MMU cache invalidation timeout\n");
7407 hl_device_reset(hdev, true, false);
7410 return rc;
7413 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
7414 u32 asid, u64 phys_addr)
7416 u32 status, timeout_usec;
7417 int rc;
7419 if (hdev->pldm)
7420 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7421 else
7422 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7424 WREG32(MMU_ASID, asid);
7425 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7426 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7427 WREG32(MMU_BUSY, 0x80000000);
7429 rc = hl_poll_timeout(
7430 hdev,
7431 MMU_BUSY,
7432 status,
7433 !(status & 0x80000000),
7434 1000,
7435 timeout_usec);
7437 if (rc) {
7438 dev_err(hdev->dev,
7439 "Timeout during MMU hop0 config of asid %d\n", asid);
7440 return rc;
7443 return 0;
7446 static int gaudi_send_heartbeat(struct hl_device *hdev)
7448 struct gaudi_device *gaudi = hdev->asic_specific;
7450 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7451 return 0;
7453 return hl_fw_send_heartbeat(hdev);
7456 static int gaudi_cpucp_info_get(struct hl_device *hdev)
7458 struct gaudi_device *gaudi = hdev->asic_specific;
7459 struct asic_fixed_properties *prop = &hdev->asic_prop;
7460 int rc;
7462 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7463 return 0;
7465 rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0);
7466 if (rc)
7467 return rc;
7469 if (!strlen(prop->cpucp_info.card_name))
7470 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
7471 CARD_NAME_MAX_LEN);
7473 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
7475 if (hdev->card_type == cpucp_card_type_pci)
7476 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
7477 else if (hdev->card_type == cpucp_card_type_pmc)
7478 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
7480 hdev->max_power = prop->max_power_default;
7482 return 0;
7485 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
7486 struct seq_file *s)
7488 struct gaudi_device *gaudi = hdev->asic_specific;
7489 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
7490 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
7491 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
7492 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
7493 bool is_idle = true, is_eng_idle, is_slave;
7494 u64 offset;
7495 int i, dma_id, port;
7497 mutex_lock(&gaudi->clk_gate_mutex);
7499 hdev->asic_funcs->disable_clock_gating(hdev);
7501 if (s)
7502 seq_puts(s,
7503 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
7504 "--- ------- ------------ ---------- -------------\n");
7506 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
7507 dma_id = gaudi_dma_assignment[i];
7508 offset = dma_id * DMA_QMAN_OFFSET;
7510 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
7511 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
7512 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
7513 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
7514 IS_DMA_IDLE(dma_core_sts0);
7515 is_idle &= is_eng_idle;
7517 if (mask)
7518 *mask |= ((u64) !is_eng_idle) <<
7519 (GAUDI_ENGINE_ID_DMA_0 + dma_id);
7520 if (s)
7521 seq_printf(s, fmt, dma_id,
7522 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
7523 qm_cgm_sts, dma_core_sts0);
7526 if (s)
7527 seq_puts(s,
7528 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
7529 "--- ------- ------------ ---------- ----------\n");
7531 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
7532 offset = i * TPC_QMAN_OFFSET;
7533 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
7534 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
7535 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
7536 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
7537 IS_TPC_IDLE(tpc_cfg_sts);
7538 is_idle &= is_eng_idle;
7540 if (mask)
7541 *mask |= ((u64) !is_eng_idle) <<
7542 (GAUDI_ENGINE_ID_TPC_0 + i);
7543 if (s)
7544 seq_printf(s, fmt, i,
7545 is_eng_idle ? "Y" : "N",
7546 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
7549 if (s)
7550 seq_puts(s,
7551 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
7552 "--- ------- ------------ ---------- -----------\n");
7554 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
7555 offset = i * MME_QMAN_OFFSET;
7556 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
7557 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
7559 /* MME 1 & 3 are slaves, no need to check their QMANs */
7560 is_slave = i % 2;
7561 if (!is_slave) {
7562 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
7563 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
7564 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
7567 is_idle &= is_eng_idle;
7569 if (mask)
7570 *mask |= ((u64) !is_eng_idle) <<
7571 (GAUDI_ENGINE_ID_MME_0 + i);
7572 if (s) {
7573 if (!is_slave)
7574 seq_printf(s, fmt, i,
7575 is_eng_idle ? "Y" : "N",
7576 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
7577 else
7578 seq_printf(s, mme_slave_fmt, i,
7579 is_eng_idle ? "Y" : "N", "-",
7580 "-", mme_arch_sts);
7584 if (s)
7585 seq_puts(s, "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
7586 "--- ------- ------------ ----------\n");
7588 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
7589 offset = i * NIC_MACRO_QMAN_OFFSET;
7590 port = 2 * i;
7591 if (hdev->nic_ports_mask & BIT(port)) {
7592 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
7593 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
7594 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
7595 is_idle &= is_eng_idle;
7597 if (mask)
7598 *mask |= ((u64) !is_eng_idle) <<
7599 (GAUDI_ENGINE_ID_NIC_0 + port);
7600 if (s)
7601 seq_printf(s, nic_fmt, port,
7602 is_eng_idle ? "Y" : "N",
7603 qm_glbl_sts0, qm_cgm_sts);
7606 port = 2 * i + 1;
7607 if (hdev->nic_ports_mask & BIT(port)) {
7608 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
7609 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
7610 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
7611 is_idle &= is_eng_idle;
7613 if (mask)
7614 *mask |= ((u64) !is_eng_idle) <<
7615 (GAUDI_ENGINE_ID_NIC_0 + port);
7616 if (s)
7617 seq_printf(s, nic_fmt, port,
7618 is_eng_idle ? "Y" : "N",
7619 qm_glbl_sts0, qm_cgm_sts);
7623 if (s)
7624 seq_puts(s, "\n");
7626 hdev->asic_funcs->set_clock_gating(hdev);
7628 mutex_unlock(&gaudi->clk_gate_mutex);
7630 return is_idle;
7633 static void gaudi_hw_queues_lock(struct hl_device *hdev)
7634 __acquires(&gaudi->hw_queues_lock)
7636 struct gaudi_device *gaudi = hdev->asic_specific;
7638 spin_lock(&gaudi->hw_queues_lock);
7641 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
7642 __releases(&gaudi->hw_queues_lock)
7644 struct gaudi_device *gaudi = hdev->asic_specific;
7646 spin_unlock(&gaudi->hw_queues_lock);
7649 static u32 gaudi_get_pci_id(struct hl_device *hdev)
7651 return hdev->pdev->device;
7654 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
7655 size_t max_size)
7657 struct gaudi_device *gaudi = hdev->asic_specific;
7659 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7660 return 0;
7662 return hl_fw_get_eeprom_data(hdev, data, max_size);
7666 * this function should be used only during initialization and/or after reset,
7667 * when there are no active users.
7669 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
7670 u32 tpc_id)
7672 struct gaudi_device *gaudi = hdev->asic_specific;
7673 u64 kernel_timeout;
7674 u32 status, offset;
7675 int rc;
7677 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
7679 if (hdev->pldm)
7680 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
7681 else
7682 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
7684 mutex_lock(&gaudi->clk_gate_mutex);
7686 hdev->asic_funcs->disable_clock_gating(hdev);
7688 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
7689 lower_32_bits(tpc_kernel));
7690 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
7691 upper_32_bits(tpc_kernel));
7693 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
7694 lower_32_bits(tpc_kernel));
7695 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
7696 upper_32_bits(tpc_kernel));
7697 /* set a valid LUT pointer, content is of no significance */
7698 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
7699 lower_32_bits(tpc_kernel));
7700 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
7701 upper_32_bits(tpc_kernel));
7703 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
7704 lower_32_bits(CFG_BASE +
7705 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
7707 WREG32(mmTPC0_CFG_TPC_CMD + offset,
7708 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
7709 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
7710 /* wait a bit for the engine to start executing */
7711 usleep_range(1000, 1500);
7713 /* wait until engine has finished executing */
7714 rc = hl_poll_timeout(
7715 hdev,
7716 mmTPC0_CFG_STATUS + offset,
7717 status,
7718 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
7719 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
7720 1000,
7721 kernel_timeout);
7723 if (rc) {
7724 dev_err(hdev->dev,
7725 "Timeout while waiting for TPC%d icache prefetch\n",
7726 tpc_id);
7727 hdev->asic_funcs->set_clock_gating(hdev);
7728 mutex_unlock(&gaudi->clk_gate_mutex);
7729 return -EIO;
7732 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
7733 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
7735 /* wait a bit for the engine to start executing */
7736 usleep_range(1000, 1500);
7738 /* wait until engine has finished executing */
7739 rc = hl_poll_timeout(
7740 hdev,
7741 mmTPC0_CFG_STATUS + offset,
7742 status,
7743 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
7744 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
7745 1000,
7746 kernel_timeout);
7748 if (rc) {
7749 dev_err(hdev->dev,
7750 "Timeout while waiting for TPC%d vector pipe\n",
7751 tpc_id);
7752 hdev->asic_funcs->set_clock_gating(hdev);
7753 mutex_unlock(&gaudi->clk_gate_mutex);
7754 return -EIO;
7757 rc = hl_poll_timeout(
7758 hdev,
7759 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
7760 status,
7761 (status == 0),
7762 1000,
7763 kernel_timeout);
7765 hdev->asic_funcs->set_clock_gating(hdev);
7766 mutex_unlock(&gaudi->clk_gate_mutex);
7768 if (rc) {
7769 dev_err(hdev->dev,
7770 "Timeout while waiting for TPC%d kernel to execute\n",
7771 tpc_id);
7772 return -EIO;
7775 return 0;
7778 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
7779 struct hl_ctx *ctx)
7781 struct gaudi_device *gaudi = hdev->asic_specific;
7782 int min_alloc_order, rc, collective_cb_size;
7784 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7785 return 0;
7787 hdev->internal_cb_pool_virt_addr =
7788 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
7789 HOST_SPACE_INTERNAL_CB_SZ,
7790 &hdev->internal_cb_pool_dma_addr,
7791 GFP_KERNEL | __GFP_ZERO);
7793 if (!hdev->internal_cb_pool_virt_addr)
7794 return -ENOMEM;
7796 collective_cb_size = sizeof(struct packet_msg_short) * 5 +
7797 sizeof(struct packet_fence);
7798 min_alloc_order = ilog2(collective_cb_size);
7800 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
7801 if (!hdev->internal_cb_pool) {
7802 dev_err(hdev->dev,
7803 "Failed to create internal CB pool\n");
7804 rc = -ENOMEM;
7805 goto free_internal_cb_pool;
7808 rc = gen_pool_add(hdev->internal_cb_pool,
7809 (uintptr_t) hdev->internal_cb_pool_virt_addr,
7810 HOST_SPACE_INTERNAL_CB_SZ, -1);
7811 if (rc) {
7812 dev_err(hdev->dev,
7813 "Failed to add memory to internal CB pool\n");
7814 rc = -EFAULT;
7815 goto destroy_internal_cb_pool;
7818 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
7819 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
7820 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
7822 if (!hdev->internal_cb_va_base)
7823 goto destroy_internal_cb_pool;
7825 mutex_lock(&ctx->mmu_lock);
7826 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
7827 hdev->internal_cb_pool_dma_addr,
7828 HOST_SPACE_INTERNAL_CB_SZ);
7830 hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
7831 mutex_unlock(&ctx->mmu_lock);
7833 if (rc)
7834 goto unreserve_internal_cb_pool;
7836 return 0;
7838 unreserve_internal_cb_pool:
7839 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
7840 HOST_SPACE_INTERNAL_CB_SZ);
7841 destroy_internal_cb_pool:
7842 gen_pool_destroy(hdev->internal_cb_pool);
7843 free_internal_cb_pool:
7844 hdev->asic_funcs->asic_dma_free_coherent(hdev,
7845 HOST_SPACE_INTERNAL_CB_SZ,
7846 hdev->internal_cb_pool_virt_addr,
7847 hdev->internal_cb_pool_dma_addr);
7849 return rc;
7852 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
7853 struct hl_ctx *ctx)
7855 struct gaudi_device *gaudi = hdev->asic_specific;
7857 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7858 return;
7860 mutex_lock(&ctx->mmu_lock);
7861 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
7862 HOST_SPACE_INTERNAL_CB_SZ);
7863 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
7864 HOST_SPACE_INTERNAL_CB_SZ);
7865 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
7866 mutex_unlock(&ctx->mmu_lock);
7868 gen_pool_destroy(hdev->internal_cb_pool);
7870 hdev->asic_funcs->asic_dma_free_coherent(hdev,
7871 HOST_SPACE_INTERNAL_CB_SZ,
7872 hdev->internal_cb_pool_virt_addr,
7873 hdev->internal_cb_pool_dma_addr);
7876 static int gaudi_ctx_init(struct hl_ctx *ctx)
7878 gaudi_mmu_prepare(ctx->hdev, ctx->asid);
7879 return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
7882 static void gaudi_ctx_fini(struct hl_ctx *ctx)
7884 struct hl_device *hdev = ctx->hdev;
7886 /* Gaudi will NEVER support more then a single compute context.
7887 * Therefore, don't clear anything unless it is the compute context
7889 if (hdev->compute_ctx != ctx)
7890 return;
7892 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
7895 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
7897 return gaudi_cq_assignment[cq_idx];
7900 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
7902 return sizeof(struct packet_msg_short) +
7903 sizeof(struct packet_msg_prot) * 2;
7906 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
7908 return sizeof(struct packet_msg_short) * 4 +
7909 sizeof(struct packet_fence) +
7910 sizeof(struct packet_msg_prot) * 2;
7913 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
7914 u32 size, bool eb)
7916 struct hl_cb *cb = (struct hl_cb *) data;
7917 struct packet_msg_short *pkt;
7918 u32 value, ctl, pkt_size = sizeof(*pkt);
7920 pkt = cb->kernel_address + size;
7921 memset(pkt, 0, pkt_size);
7923 /* Inc by 1, Mode ADD */
7924 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
7925 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
7927 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
7928 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
7929 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
7930 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
7931 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, eb);
7932 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
7933 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
7935 pkt->value = cpu_to_le32(value);
7936 pkt->ctl = cpu_to_le32(ctl);
7938 return size + pkt_size;
7941 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
7942 u16 addr)
7944 u32 ctl, pkt_size = sizeof(*pkt);
7946 memset(pkt, 0, pkt_size);
7948 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
7949 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
7950 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
7951 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
7952 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
7953 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 0); /* last pkt MB */
7955 pkt->value = cpu_to_le32(value);
7956 pkt->ctl = cpu_to_le32(ctl);
7958 return pkt_size;
7961 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
7962 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
7963 u16 sob_val, u16 mon_id)
7965 u64 monitor_base;
7966 u32 ctl, value, pkt_size = sizeof(*pkt);
7967 u16 msg_addr_offset;
7968 u8 mask;
7970 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
7971 dev_err(hdev->dev,
7972 "sob_base %u (mask %#x) is not valid\n",
7973 sob_base, sob_mask);
7974 return 0;
7978 * monitor_base should be the content of the base0 address registers,
7979 * so it will be added to the msg short offsets
7981 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
7983 msg_addr_offset =
7984 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
7985 monitor_base;
7987 memset(pkt, 0, pkt_size);
7989 /* Monitor config packet: bind the monitor to a sync object */
7990 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
7991 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
7992 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
7993 0); /* GREATER OR EQUAL*/
7994 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
7996 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
7997 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
7998 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
7999 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8000 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
8001 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
8002 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
8004 pkt->value = cpu_to_le32(value);
8005 pkt->ctl = cpu_to_le32(ctl);
8007 return pkt_size;
8010 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8012 u32 ctl, cfg, pkt_size = sizeof(*pkt);
8014 memset(pkt, 0, pkt_size);
8016 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8017 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8018 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8020 ctl = FIELD_PREP(GAUDI_PKT_FENCE_CTL_OPCODE_MASK, PACKET_FENCE);
8021 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
8022 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
8023 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
8025 pkt->cfg = cpu_to_le32(cfg);
8026 pkt->ctl = cpu_to_le32(ctl);
8028 return pkt_size;
8031 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8033 u32 offset, nic_index;
8035 switch (queue_id) {
8036 case GAUDI_QUEUE_ID_DMA_0_0:
8037 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8038 break;
8039 case GAUDI_QUEUE_ID_DMA_0_1:
8040 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8041 break;
8042 case GAUDI_QUEUE_ID_DMA_0_2:
8043 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8044 break;
8045 case GAUDI_QUEUE_ID_DMA_0_3:
8046 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8047 break;
8048 case GAUDI_QUEUE_ID_DMA_1_0:
8049 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8050 break;
8051 case GAUDI_QUEUE_ID_DMA_1_1:
8052 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8053 break;
8054 case GAUDI_QUEUE_ID_DMA_1_2:
8055 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8056 break;
8057 case GAUDI_QUEUE_ID_DMA_1_3:
8058 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8059 break;
8060 case GAUDI_QUEUE_ID_DMA_5_0:
8061 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8062 break;
8063 case GAUDI_QUEUE_ID_DMA_5_1:
8064 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8065 break;
8066 case GAUDI_QUEUE_ID_DMA_5_2:
8067 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8068 break;
8069 case GAUDI_QUEUE_ID_DMA_5_3:
8070 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8071 break;
8072 case GAUDI_QUEUE_ID_TPC_7_0:
8073 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8074 break;
8075 case GAUDI_QUEUE_ID_TPC_7_1:
8076 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8077 break;
8078 case GAUDI_QUEUE_ID_TPC_7_2:
8079 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8080 break;
8081 case GAUDI_QUEUE_ID_TPC_7_3:
8082 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8083 break;
8084 case GAUDI_QUEUE_ID_NIC_0_0:
8085 case GAUDI_QUEUE_ID_NIC_1_0:
8086 case GAUDI_QUEUE_ID_NIC_2_0:
8087 case GAUDI_QUEUE_ID_NIC_3_0:
8088 case GAUDI_QUEUE_ID_NIC_4_0:
8089 case GAUDI_QUEUE_ID_NIC_5_0:
8090 case GAUDI_QUEUE_ID_NIC_6_0:
8091 case GAUDI_QUEUE_ID_NIC_7_0:
8092 case GAUDI_QUEUE_ID_NIC_8_0:
8093 case GAUDI_QUEUE_ID_NIC_9_0:
8094 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8095 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8096 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8097 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8098 break;
8099 case GAUDI_QUEUE_ID_NIC_0_1:
8100 case GAUDI_QUEUE_ID_NIC_1_1:
8101 case GAUDI_QUEUE_ID_NIC_2_1:
8102 case GAUDI_QUEUE_ID_NIC_3_1:
8103 case GAUDI_QUEUE_ID_NIC_4_1:
8104 case GAUDI_QUEUE_ID_NIC_5_1:
8105 case GAUDI_QUEUE_ID_NIC_6_1:
8106 case GAUDI_QUEUE_ID_NIC_7_1:
8107 case GAUDI_QUEUE_ID_NIC_8_1:
8108 case GAUDI_QUEUE_ID_NIC_9_1:
8109 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8110 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8111 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8112 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8113 break;
8114 case GAUDI_QUEUE_ID_NIC_0_2:
8115 case GAUDI_QUEUE_ID_NIC_1_2:
8116 case GAUDI_QUEUE_ID_NIC_2_2:
8117 case GAUDI_QUEUE_ID_NIC_3_2:
8118 case GAUDI_QUEUE_ID_NIC_4_2:
8119 case GAUDI_QUEUE_ID_NIC_5_2:
8120 case GAUDI_QUEUE_ID_NIC_6_2:
8121 case GAUDI_QUEUE_ID_NIC_7_2:
8122 case GAUDI_QUEUE_ID_NIC_8_2:
8123 case GAUDI_QUEUE_ID_NIC_9_2:
8124 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8125 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8126 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8127 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8128 break;
8129 case GAUDI_QUEUE_ID_NIC_0_3:
8130 case GAUDI_QUEUE_ID_NIC_1_3:
8131 case GAUDI_QUEUE_ID_NIC_2_3:
8132 case GAUDI_QUEUE_ID_NIC_3_3:
8133 case GAUDI_QUEUE_ID_NIC_4_3:
8134 case GAUDI_QUEUE_ID_NIC_5_3:
8135 case GAUDI_QUEUE_ID_NIC_6_3:
8136 case GAUDI_QUEUE_ID_NIC_7_3:
8137 case GAUDI_QUEUE_ID_NIC_8_3:
8138 case GAUDI_QUEUE_ID_NIC_9_3:
8139 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8140 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8141 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8142 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8143 break;
8144 default:
8145 return -EINVAL;
8148 *addr = CFG_BASE + offset;
8150 return 0;
8153 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8155 u64 monitor_base;
8156 u32 size = 0;
8157 u16 msg_addr_offset;
8160 * monitor_base should be the content of the base0 address registers,
8161 * so it will be added to the msg short offsets
8163 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8165 /* First monitor config packet: low address of the sync */
8166 msg_addr_offset =
8167 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8168 monitor_base;
8170 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8171 msg_addr_offset);
8173 /* Second monitor config packet: high address of the sync */
8174 msg_addr_offset =
8175 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8176 monitor_base;
8178 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8179 msg_addr_offset);
8182 * Third monitor config packet: the payload, i.e. what to write when the
8183 * sync triggers
8185 msg_addr_offset =
8186 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8187 monitor_base;
8189 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8191 return size;
8194 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8195 struct hl_gen_wait_properties *prop)
8197 struct hl_cb *cb = (struct hl_cb *) prop->data;
8198 void *buf = cb->kernel_address;
8199 u64 fence_addr = 0;
8200 u32 size = prop->size;
8202 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8203 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8204 prop->q_idx);
8205 return 0;
8208 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8209 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8210 prop->sob_mask, prop->sob_val, prop->mon_id);
8211 size += gaudi_add_fence_pkt(buf + size);
8213 return size;
8216 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8218 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8220 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8221 hw_sob->sob_id);
8223 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
8226 kref_init(&hw_sob->kref);
8229 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
8231 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
8232 HL_POWER9_HOST_MAGIC) {
8233 hdev->power9_64bit_dma_enable = 1;
8234 hdev->dma_mask = 64;
8235 } else {
8236 hdev->power9_64bit_dma_enable = 0;
8237 hdev->dma_mask = 48;
8241 static u64 gaudi_get_device_time(struct hl_device *hdev)
8243 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8245 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8248 static const struct hl_asic_funcs gaudi_funcs = {
8249 .early_init = gaudi_early_init,
8250 .early_fini = gaudi_early_fini,
8251 .late_init = gaudi_late_init,
8252 .late_fini = gaudi_late_fini,
8253 .sw_init = gaudi_sw_init,
8254 .sw_fini = gaudi_sw_fini,
8255 .hw_init = gaudi_hw_init,
8256 .hw_fini = gaudi_hw_fini,
8257 .halt_engines = gaudi_halt_engines,
8258 .suspend = gaudi_suspend,
8259 .resume = gaudi_resume,
8260 .cb_mmap = gaudi_cb_mmap,
8261 .ring_doorbell = gaudi_ring_doorbell,
8262 .pqe_write = gaudi_pqe_write,
8263 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
8264 .asic_dma_free_coherent = gaudi_dma_free_coherent,
8265 .scrub_device_mem = gaudi_scrub_device_mem,
8266 .get_int_queue_base = gaudi_get_int_queue_base,
8267 .test_queues = gaudi_test_queues,
8268 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
8269 .asic_dma_pool_free = gaudi_dma_pool_free,
8270 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
8271 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
8272 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
8273 .cs_parser = gaudi_cs_parser,
8274 .asic_dma_map_sg = gaudi_dma_map_sg,
8275 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
8276 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
8277 .update_eq_ci = gaudi_update_eq_ci,
8278 .context_switch = gaudi_context_switch,
8279 .restore_phase_topology = gaudi_restore_phase_topology,
8280 .debugfs_read32 = gaudi_debugfs_read32,
8281 .debugfs_write32 = gaudi_debugfs_write32,
8282 .debugfs_read64 = gaudi_debugfs_read64,
8283 .debugfs_write64 = gaudi_debugfs_write64,
8284 .add_device_attr = gaudi_add_device_attr,
8285 .handle_eqe = gaudi_handle_eqe,
8286 .set_pll_profile = gaudi_set_pll_profile,
8287 .get_events_stat = gaudi_get_events_stat,
8288 .read_pte = gaudi_read_pte,
8289 .write_pte = gaudi_write_pte,
8290 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
8291 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
8292 .send_heartbeat = gaudi_send_heartbeat,
8293 .set_clock_gating = gaudi_set_clock_gating,
8294 .disable_clock_gating = gaudi_disable_clock_gating,
8295 .debug_coresight = gaudi_debug_coresight,
8296 .is_device_idle = gaudi_is_device_idle,
8297 .soft_reset_late_init = gaudi_soft_reset_late_init,
8298 .hw_queues_lock = gaudi_hw_queues_lock,
8299 .hw_queues_unlock = gaudi_hw_queues_unlock,
8300 .get_pci_id = gaudi_get_pci_id,
8301 .get_eeprom_data = gaudi_get_eeprom_data,
8302 .send_cpu_message = gaudi_send_cpu_message,
8303 .pci_bars_map = gaudi_pci_bars_map,
8304 .init_iatu = gaudi_init_iatu,
8305 .rreg = hl_rreg,
8306 .wreg = hl_wreg,
8307 .halt_coresight = gaudi_halt_coresight,
8308 .ctx_init = gaudi_ctx_init,
8309 .ctx_fini = gaudi_ctx_fini,
8310 .get_clk_rate = gaudi_get_clk_rate,
8311 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
8312 .read_device_fw_version = gaudi_read_device_fw_version,
8313 .load_firmware_to_device = gaudi_load_firmware_to_device,
8314 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
8315 .get_signal_cb_size = gaudi_get_signal_cb_size,
8316 .get_wait_cb_size = gaudi_get_wait_cb_size,
8317 .gen_signal_cb = gaudi_gen_signal_cb,
8318 .gen_wait_cb = gaudi_gen_wait_cb,
8319 .reset_sob = gaudi_reset_sob,
8320 .reset_sob_group = gaudi_reset_sob_group,
8321 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
8322 .get_device_time = gaudi_get_device_time,
8323 .collective_wait_init_cs = gaudi_collective_wait_init_cs,
8324 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs
8328 * gaudi_set_asic_funcs - set GAUDI function pointers
8330 * @hdev: pointer to hl_device structure
8333 void gaudi_set_asic_funcs(struct hl_device *hdev)
8335 hdev->asic_funcs = &gaudi_funcs;