2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/types.h>
25 #include <linux/kernel.h>
26 #include <linux/log2.h>
27 #include <linux/sched.h>
28 #include <linux/slab.h>
29 #include <linux/mutex.h>
30 #include <linux/device.h>
32 #include "kfd_pm4_headers.h"
33 #include "kfd_pm4_headers_diq.h"
34 #include "kfd_kernel_queue.h"
36 #include "kfd_pm4_opcodes.h"
38 #include "kfd_dbgmgr.h"
39 #include "kfd_dbgdev.h"
40 #include "kfd_device_queue_manager.h"
41 #include "../../radeon/cik_reg.h"
43 static void dbgdev_address_watch_disable_nodiq(struct kfd_dev
*dev
)
45 BUG_ON(!dev
|| !dev
->kfd2kgd
);
47 dev
->kfd2kgd
->address_watch_disable(dev
->kgd
);
50 static int dbgdev_diq_submit_ib(struct kfd_dbgdev
*dbgdev
,
51 unsigned int pasid
, uint64_t vmid0_address
,
52 uint32_t *packet_buff
, size_t size_in_bytes
)
54 struct pm4__release_mem
*rm_packet
;
55 struct pm4__indirect_buffer_pasid
*ib_packet
;
56 struct kfd_mem_obj
*mem_obj
;
57 size_t pq_packets_size_in_bytes
;
58 union ULARGE_INTEGER
*largep
;
59 union ULARGE_INTEGER addr
;
60 struct kernel_queue
*kq
;
62 unsigned int *ib_packet_buff
;
65 BUG_ON(!dbgdev
|| !dbgdev
->kq
|| !packet_buff
|| !size_in_bytes
);
69 pq_packets_size_in_bytes
= sizeof(struct pm4__release_mem
) +
70 sizeof(struct pm4__indirect_buffer_pasid
);
73 * We acquire a buffer from DIQ
74 * The receive packet buff will be sitting on the Indirect Buffer
75 * and in the PQ we put the IB packet + sync packet(s).
77 status
= kq
->ops
.acquire_packet_buffer(kq
,
78 pq_packets_size_in_bytes
/ sizeof(uint32_t),
81 pr_err("amdkfd: acquire_packet_buffer failed\n");
85 memset(ib_packet_buff
, 0, pq_packets_size_in_bytes
);
87 ib_packet
= (struct pm4__indirect_buffer_pasid
*) (ib_packet_buff
);
89 ib_packet
->header
.count
= 3;
90 ib_packet
->header
.opcode
= IT_INDIRECT_BUFFER_PASID
;
91 ib_packet
->header
.type
= PM4_TYPE_3
;
93 largep
= (union ULARGE_INTEGER
*) &vmid0_address
;
95 ib_packet
->bitfields2
.ib_base_lo
= largep
->u
.low_part
>> 2;
96 ib_packet
->bitfields3
.ib_base_hi
= largep
->u
.high_part
;
98 ib_packet
->control
= (1 << 23) | (1 << 31) |
99 ((size_in_bytes
/ sizeof(uint32_t)) & 0xfffff);
101 ib_packet
->bitfields5
.pasid
= pasid
;
104 * for now we use release mem for GPU-CPU synchronization
105 * Consider WaitRegMem + WriteData as a better alternative
106 * we get a GART allocations ( gpu/cpu mapping),
107 * for the sync variable, and wait until:
109 * (b) Sync var is written by CP to mem.
111 rm_packet
= (struct pm4__release_mem
*) (ib_packet_buff
+
112 (sizeof(struct pm4__indirect_buffer_pasid
) /
113 sizeof(unsigned int)));
115 status
= kfd_gtt_sa_allocate(dbgdev
->dev
, sizeof(uint64_t),
119 pr_err("amdkfd: Failed to allocate GART memory\n");
120 kq
->ops
.rollback_packet(kq
);
124 rm_state
= (uint64_t *) mem_obj
->cpu_ptr
;
126 *rm_state
= QUEUESTATE__ACTIVE_COMPLETION_PENDING
;
128 rm_packet
->header
.opcode
= IT_RELEASE_MEM
;
129 rm_packet
->header
.type
= PM4_TYPE_3
;
130 rm_packet
->header
.count
= sizeof(struct pm4__release_mem
) /
131 sizeof(unsigned int) - 2;
133 rm_packet
->bitfields2
.event_type
= CACHE_FLUSH_AND_INV_TS_EVENT
;
134 rm_packet
->bitfields2
.event_index
=
135 event_index___release_mem__end_of_pipe
;
137 rm_packet
->bitfields2
.cache_policy
= cache_policy___release_mem__lru
;
138 rm_packet
->bitfields2
.atc
= 0;
139 rm_packet
->bitfields2
.tc_wb_action_ena
= 1;
141 addr
.quad_part
= mem_obj
->gpu_addr
;
143 rm_packet
->bitfields4
.address_lo_32b
= addr
.u
.low_part
>> 2;
144 rm_packet
->address_hi
= addr
.u
.high_part
;
146 rm_packet
->bitfields3
.data_sel
=
147 data_sel___release_mem__send_64_bit_data
;
149 rm_packet
->bitfields3
.int_sel
=
150 int_sel___release_mem__send_data_after_write_confirm
;
152 rm_packet
->bitfields3
.dst_sel
=
153 dst_sel___release_mem__memory_controller
;
155 rm_packet
->data_lo
= QUEUESTATE__ACTIVE
;
157 kq
->ops
.submit_packet(kq
);
159 /* Wait till CP writes sync code: */
160 status
= amdkfd_fence_wait_timeout(
161 (unsigned int *) rm_state
,
162 QUEUESTATE__ACTIVE
, 1500);
164 kfd_gtt_sa_free(dbgdev
->dev
, mem_obj
);
169 static int dbgdev_register_nodiq(struct kfd_dbgdev
*dbgdev
)
174 * no action is needed in this case,
175 * just make sure diq will not be used
183 static int dbgdev_register_diq(struct kfd_dbgdev
*dbgdev
)
185 struct queue_properties properties
;
187 struct kernel_queue
*kq
= NULL
;
190 BUG_ON(!dbgdev
|| !dbgdev
->pqm
|| !dbgdev
->dev
);
192 status
= pqm_create_queue(dbgdev
->pqm
, dbgdev
->dev
, NULL
,
193 &properties
, 0, KFD_QUEUE_TYPE_DIQ
,
197 pr_err("amdkfd: Failed to create DIQ\n");
201 pr_debug("DIQ Created with queue id: %d\n", qid
);
203 kq
= pqm_get_kernel_queue(dbgdev
->pqm
, qid
);
206 pr_err("amdkfd: Error getting DIQ\n");
207 pqm_destroy_queue(dbgdev
->pqm
, qid
);
216 static int dbgdev_unregister_nodiq(struct kfd_dbgdev
*dbgdev
)
218 BUG_ON(!dbgdev
|| !dbgdev
->dev
);
220 /* disable watch address */
221 dbgdev_address_watch_disable_nodiq(dbgdev
->dev
);
225 static int dbgdev_unregister_diq(struct kfd_dbgdev
*dbgdev
)
227 /* todo - disable address watch */
230 BUG_ON(!dbgdev
|| !dbgdev
->pqm
|| !dbgdev
->kq
);
232 status
= pqm_destroy_queue(dbgdev
->pqm
,
233 dbgdev
->kq
->queue
->properties
.queue_id
);
239 static void dbgdev_address_watch_set_registers(
240 const struct dbg_address_watch_info
*adw_info
,
241 union TCP_WATCH_ADDR_H_BITS
*addrHi
,
242 union TCP_WATCH_ADDR_L_BITS
*addrLo
,
243 union TCP_WATCH_CNTL_BITS
*cntl
,
244 unsigned int index
, unsigned int vmid
)
246 union ULARGE_INTEGER addr
;
248 BUG_ON(!adw_info
|| !addrHi
|| !addrLo
|| !cntl
);
255 if (adw_info
->watch_mask
!= NULL
)
256 cntl
->bitfields
.mask
=
257 (uint32_t) (adw_info
->watch_mask
[index
] &
258 ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK
);
260 cntl
->bitfields
.mask
= ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK
;
262 addr
.quad_part
= (unsigned long long) adw_info
->watch_address
[index
];
264 addrHi
->bitfields
.addr
= addr
.u
.high_part
&
265 ADDRESS_WATCH_REG_ADDHIGH_MASK
;
266 addrLo
->bitfields
.addr
=
267 (addr
.u
.low_part
>> ADDRESS_WATCH_REG_ADDLOW_SHIFT
);
269 cntl
->bitfields
.mode
= adw_info
->watch_mode
[index
];
270 cntl
->bitfields
.vmid
= (uint32_t) vmid
;
271 /* for now assume it is an ATC address */
272 cntl
->u32All
|= ADDRESS_WATCH_REG_CNTL_ATC_BIT
;
274 pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl
->bitfields
.mask
);
275 pr_debug("\t\t%20s %08x\n", "set reg add high :",
276 addrHi
->bitfields
.addr
);
277 pr_debug("\t\t%20s %08x\n", "set reg add low :",
278 addrLo
->bitfields
.addr
);
281 static int dbgdev_address_watch_nodiq(struct kfd_dbgdev
*dbgdev
,
282 struct dbg_address_watch_info
*adw_info
)
284 union TCP_WATCH_ADDR_H_BITS addrHi
;
285 union TCP_WATCH_ADDR_L_BITS addrLo
;
286 union TCP_WATCH_CNTL_BITS cntl
;
287 struct kfd_process_device
*pdd
;
290 BUG_ON(!dbgdev
|| !dbgdev
->dev
|| !adw_info
);
292 /* taking the vmid for that process on the safe way using pdd */
293 pdd
= kfd_get_process_device_data(dbgdev
->dev
,
296 pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
304 if ((adw_info
->num_watch_points
> MAX_WATCH_ADDRESSES
) ||
305 (adw_info
->num_watch_points
== 0)) {
306 pr_err("amdkfd: num_watch_points is invalid\n");
310 if ((adw_info
->watch_mode
== NULL
) ||
311 (adw_info
->watch_address
== NULL
)) {
312 pr_err("amdkfd: adw_info fields are not valid\n");
316 for (i
= 0 ; i
< adw_info
->num_watch_points
; i
++) {
317 dbgdev_address_watch_set_registers(adw_info
, &addrHi
, &addrLo
,
318 &cntl
, i
, pdd
->qpd
.vmid
);
320 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
321 pr_debug("\t\t%20s %08x\n", "register index :", i
);
322 pr_debug("\t\t%20s %08x\n", "vmid is :", pdd
->qpd
.vmid
);
323 pr_debug("\t\t%20s %08x\n", "Address Low is :",
324 addrLo
.bitfields
.addr
);
325 pr_debug("\t\t%20s %08x\n", "Address high is :",
326 addrHi
.bitfields
.addr
);
327 pr_debug("\t\t%20s %08x\n", "Address high is :",
328 addrHi
.bitfields
.addr
);
329 pr_debug("\t\t%20s %08x\n", "Control Mask is :",
330 cntl
.bitfields
.mask
);
331 pr_debug("\t\t%20s %08x\n", "Control Mode is :",
332 cntl
.bitfields
.mode
);
333 pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
334 cntl
.bitfields
.vmid
);
335 pr_debug("\t\t%20s %08x\n", "Control atc is :",
337 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
339 pdd
->dev
->kfd2kgd
->address_watch_execute(
350 static int dbgdev_address_watch_diq(struct kfd_dbgdev
*dbgdev
,
351 struct dbg_address_watch_info
*adw_info
)
353 struct pm4__set_config_reg
*packets_vec
;
354 union TCP_WATCH_ADDR_H_BITS addrHi
;
355 union TCP_WATCH_ADDR_L_BITS addrLo
;
356 union TCP_WATCH_CNTL_BITS cntl
;
357 struct kfd_mem_obj
*mem_obj
;
358 unsigned int aw_reg_add_dword
;
359 uint32_t *packet_buff_uint
;
362 size_t ib_size
= sizeof(struct pm4__set_config_reg
) * 4;
363 /* we do not control the vmid in DIQ mode, just a place holder */
364 unsigned int vmid
= 0;
366 BUG_ON(!dbgdev
|| !dbgdev
->dev
|| !adw_info
);
372 if ((adw_info
->num_watch_points
> MAX_WATCH_ADDRESSES
) ||
373 (adw_info
->num_watch_points
== 0)) {
374 pr_err("amdkfd: num_watch_points is invalid\n");
378 if ((NULL
== adw_info
->watch_mode
) ||
379 (NULL
== adw_info
->watch_address
)) {
380 pr_err("amdkfd: adw_info fields are not valid\n");
384 status
= kfd_gtt_sa_allocate(dbgdev
->dev
, ib_size
, &mem_obj
);
387 pr_err("amdkfd: Failed to allocate GART memory\n");
391 packet_buff_uint
= mem_obj
->cpu_ptr
;
393 memset(packet_buff_uint
, 0, ib_size
);
395 packets_vec
= (struct pm4__set_config_reg
*) (packet_buff_uint
);
397 packets_vec
[0].header
.count
= 1;
398 packets_vec
[0].header
.opcode
= IT_SET_CONFIG_REG
;
399 packets_vec
[0].header
.type
= PM4_TYPE_3
;
400 packets_vec
[0].bitfields2
.vmid_shift
= ADDRESS_WATCH_CNTL_OFFSET
;
401 packets_vec
[0].bitfields2
.insert_vmid
= 1;
402 packets_vec
[1].ordinal1
= packets_vec
[0].ordinal1
;
403 packets_vec
[1].bitfields2
.insert_vmid
= 0;
404 packets_vec
[2].ordinal1
= packets_vec
[0].ordinal1
;
405 packets_vec
[2].bitfields2
.insert_vmid
= 0;
406 packets_vec
[3].ordinal1
= packets_vec
[0].ordinal1
;
407 packets_vec
[3].bitfields2
.vmid_shift
= ADDRESS_WATCH_CNTL_OFFSET
;
408 packets_vec
[3].bitfields2
.insert_vmid
= 1;
410 for (i
= 0; i
< adw_info
->num_watch_points
; i
++) {
411 dbgdev_address_watch_set_registers(adw_info
,
418 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
419 pr_debug("\t\t%20s %08x\n", "register index :", i
);
420 pr_debug("\t\t%20s %08x\n", "vmid is :", vmid
);
421 pr_debug("\t\t%20s %p\n", "Add ptr is :",
422 adw_info
->watch_address
);
423 pr_debug("\t\t%20s %08llx\n", "Add is :",
424 adw_info
->watch_address
[i
]);
425 pr_debug("\t\t%20s %08x\n", "Address Low is :",
426 addrLo
.bitfields
.addr
);
427 pr_debug("\t\t%20s %08x\n", "Address high is :",
428 addrHi
.bitfields
.addr
);
429 pr_debug("\t\t%20s %08x\n", "Control Mask is :",
430 cntl
.bitfields
.mask
);
431 pr_debug("\t\t%20s %08x\n", "Control Mode is :",
432 cntl
.bitfields
.mode
);
433 pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
434 cntl
.bitfields
.vmid
);
435 pr_debug("\t\t%20s %08x\n", "Control atc is :",
437 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
440 dbgdev
->dev
->kfd2kgd
->address_watch_get_offset(
443 ADDRESS_WATCH_REG_CNTL
);
445 aw_reg_add_dword
/= sizeof(uint32_t);
447 packets_vec
[0].bitfields2
.reg_offset
=
448 aw_reg_add_dword
- AMD_CONFIG_REG_BASE
;
450 packets_vec
[0].reg_data
[0] = cntl
.u32All
;
453 dbgdev
->dev
->kfd2kgd
->address_watch_get_offset(
456 ADDRESS_WATCH_REG_ADDR_HI
);
458 aw_reg_add_dword
/= sizeof(uint32_t);
460 packets_vec
[1].bitfields2
.reg_offset
=
461 aw_reg_add_dword
- AMD_CONFIG_REG_BASE
;
462 packets_vec
[1].reg_data
[0] = addrHi
.u32All
;
465 dbgdev
->dev
->kfd2kgd
->address_watch_get_offset(
468 ADDRESS_WATCH_REG_ADDR_LO
);
470 aw_reg_add_dword
/= sizeof(uint32_t);
472 packets_vec
[2].bitfields2
.reg_offset
=
473 aw_reg_add_dword
- AMD_CONFIG_REG_BASE
;
474 packets_vec
[2].reg_data
[0] = addrLo
.u32All
;
476 /* enable watch flag if address is not zero*/
477 if (adw_info
->watch_address
[i
] > 0)
478 cntl
.bitfields
.valid
= 1;
480 cntl
.bitfields
.valid
= 0;
483 dbgdev
->dev
->kfd2kgd
->address_watch_get_offset(
486 ADDRESS_WATCH_REG_CNTL
);
488 aw_reg_add_dword
/= sizeof(uint32_t);
490 packets_vec
[3].bitfields2
.reg_offset
=
491 aw_reg_add_dword
- AMD_CONFIG_REG_BASE
;
492 packets_vec
[3].reg_data
[0] = cntl
.u32All
;
494 status
= dbgdev_diq_submit_ib(
496 adw_info
->process
->pasid
,
502 pr_err("amdkfd: Failed to submit IB to DIQ\n");
507 kfd_gtt_sa_free(dbgdev
->dev
, mem_obj
);
511 static int dbgdev_wave_control_set_registers(
512 struct dbg_wave_control_info
*wac_info
,
513 union SQ_CMD_BITS
*in_reg_sq_cmd
,
514 union GRBM_GFX_INDEX_BITS
*in_reg_gfx_index
)
517 union SQ_CMD_BITS reg_sq_cmd
;
518 union GRBM_GFX_INDEX_BITS reg_gfx_index
;
519 struct HsaDbgWaveMsgAMDGen2
*pMsg
;
521 BUG_ON(!wac_info
|| !in_reg_sq_cmd
|| !in_reg_gfx_index
);
523 reg_sq_cmd
.u32All
= 0;
524 reg_gfx_index
.u32All
= 0;
525 pMsg
= &wac_info
->dbgWave_msg
.DbgWaveMsg
.WaveMsgInfoGen2
;
527 switch (wac_info
->mode
) {
528 /* Send command to single wave */
529 case HSA_DBG_WAVEMODE_SINGLE
:
531 * Limit access to the process waves only,
532 * by setting vmid check
534 reg_sq_cmd
.bits
.check_vmid
= 1;
535 reg_sq_cmd
.bits
.simd_id
= pMsg
->ui32
.SIMD
;
536 reg_sq_cmd
.bits
.wave_id
= pMsg
->ui32
.WaveId
;
537 reg_sq_cmd
.bits
.mode
= SQ_IND_CMD_MODE_SINGLE
;
539 reg_gfx_index
.bits
.sh_index
= pMsg
->ui32
.ShaderArray
;
540 reg_gfx_index
.bits
.se_index
= pMsg
->ui32
.ShaderEngine
;
541 reg_gfx_index
.bits
.instance_index
= pMsg
->ui32
.HSACU
;
545 /* Send command to all waves with matching VMID */
546 case HSA_DBG_WAVEMODE_BROADCAST_PROCESS
:
548 reg_gfx_index
.bits
.sh_broadcast_writes
= 1;
549 reg_gfx_index
.bits
.se_broadcast_writes
= 1;
550 reg_gfx_index
.bits
.instance_broadcast_writes
= 1;
552 reg_sq_cmd
.bits
.mode
= SQ_IND_CMD_MODE_BROADCAST
;
556 /* Send command to all CU waves with matching VMID */
557 case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU
:
559 reg_sq_cmd
.bits
.check_vmid
= 1;
560 reg_sq_cmd
.bits
.mode
= SQ_IND_CMD_MODE_BROADCAST
;
562 reg_gfx_index
.bits
.sh_index
= pMsg
->ui32
.ShaderArray
;
563 reg_gfx_index
.bits
.se_index
= pMsg
->ui32
.ShaderEngine
;
564 reg_gfx_index
.bits
.instance_index
= pMsg
->ui32
.HSACU
;
572 switch (wac_info
->operand
) {
573 case HSA_DBG_WAVEOP_HALT
:
574 reg_sq_cmd
.bits
.cmd
= SQ_IND_CMD_CMD_HALT
;
577 case HSA_DBG_WAVEOP_RESUME
:
578 reg_sq_cmd
.bits
.cmd
= SQ_IND_CMD_CMD_RESUME
;
581 case HSA_DBG_WAVEOP_KILL
:
582 reg_sq_cmd
.bits
.cmd
= SQ_IND_CMD_CMD_KILL
;
585 case HSA_DBG_WAVEOP_DEBUG
:
586 reg_sq_cmd
.bits
.cmd
= SQ_IND_CMD_CMD_DEBUG
;
589 case HSA_DBG_WAVEOP_TRAP
:
590 if (wac_info
->trapId
< MAX_TRAPID
) {
591 reg_sq_cmd
.bits
.cmd
= SQ_IND_CMD_CMD_TRAP
;
592 reg_sq_cmd
.bits
.trap_id
= wac_info
->trapId
;
604 *in_reg_sq_cmd
= reg_sq_cmd
;
605 *in_reg_gfx_index
= reg_gfx_index
;
611 static int dbgdev_wave_control_diq(struct kfd_dbgdev
*dbgdev
,
612 struct dbg_wave_control_info
*wac_info
)
616 union SQ_CMD_BITS reg_sq_cmd
;
617 union GRBM_GFX_INDEX_BITS reg_gfx_index
;
618 struct kfd_mem_obj
*mem_obj
;
619 uint32_t *packet_buff_uint
;
620 struct pm4__set_config_reg
*packets_vec
;
621 size_t ib_size
= sizeof(struct pm4__set_config_reg
) * 3;
623 BUG_ON(!dbgdev
|| !wac_info
);
625 reg_sq_cmd
.u32All
= 0;
627 status
= dbgdev_wave_control_set_registers(wac_info
, ®_sq_cmd
,
630 pr_err("amdkfd: Failed to set wave control registers\n");
634 /* we do not control the VMID in DIQ,so reset it to a known value */
635 reg_sq_cmd
.bits
.vm_id
= 0;
637 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
639 pr_debug("\t\t mode is: %u\n", wac_info
->mode
);
640 pr_debug("\t\t operand is: %u\n", wac_info
->operand
);
641 pr_debug("\t\t trap id is: %u\n", wac_info
->trapId
);
642 pr_debug("\t\t msg value is: %u\n",
643 wac_info
->dbgWave_msg
.DbgWaveMsg
.WaveMsgInfoGen2
.Value
);
644 pr_debug("\t\t vmid is: N/A\n");
646 pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd
.bitfields
.check_vmid
);
647 pr_debug("\t\t command is : %u\n", reg_sq_cmd
.bitfields
.cmd
);
648 pr_debug("\t\t queue id is : %u\n", reg_sq_cmd
.bitfields
.queue_id
);
649 pr_debug("\t\t simd id is : %u\n", reg_sq_cmd
.bitfields
.simd_id
);
650 pr_debug("\t\t mode is : %u\n", reg_sq_cmd
.bitfields
.mode
);
651 pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd
.bitfields
.vm_id
);
652 pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd
.bitfields
.wave_id
);
654 pr_debug("\t\t ibw is : %u\n",
655 reg_gfx_index
.bitfields
.instance_broadcast_writes
);
656 pr_debug("\t\t ii is : %u\n",
657 reg_gfx_index
.bitfields
.instance_index
);
658 pr_debug("\t\t sebw is : %u\n",
659 reg_gfx_index
.bitfields
.se_broadcast_writes
);
660 pr_debug("\t\t se_ind is : %u\n", reg_gfx_index
.bitfields
.se_index
);
661 pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index
.bitfields
.sh_index
);
662 pr_debug("\t\t sbw is : %u\n",
663 reg_gfx_index
.bitfields
.sh_broadcast_writes
);
665 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
667 status
= kfd_gtt_sa_allocate(dbgdev
->dev
, ib_size
, &mem_obj
);
670 pr_err("amdkfd: Failed to allocate GART memory\n");
674 packet_buff_uint
= mem_obj
->cpu_ptr
;
676 memset(packet_buff_uint
, 0, ib_size
);
678 packets_vec
= (struct pm4__set_config_reg
*) packet_buff_uint
;
679 packets_vec
[0].header
.count
= 1;
680 packets_vec
[0].header
.opcode
= IT_SET_UCONFIG_REG
;
681 packets_vec
[0].header
.type
= PM4_TYPE_3
;
682 packets_vec
[0].bitfields2
.reg_offset
=
683 GRBM_GFX_INDEX
/ (sizeof(uint32_t)) -
686 packets_vec
[0].bitfields2
.insert_vmid
= 0;
687 packets_vec
[0].reg_data
[0] = reg_gfx_index
.u32All
;
689 packets_vec
[1].header
.count
= 1;
690 packets_vec
[1].header
.opcode
= IT_SET_CONFIG_REG
;
691 packets_vec
[1].header
.type
= PM4_TYPE_3
;
692 packets_vec
[1].bitfields2
.reg_offset
= SQ_CMD
/ (sizeof(uint32_t)) -
695 packets_vec
[1].bitfields2
.vmid_shift
= SQ_CMD_VMID_OFFSET
;
696 packets_vec
[1].bitfields2
.insert_vmid
= 1;
697 packets_vec
[1].reg_data
[0] = reg_sq_cmd
.u32All
;
699 /* Restore the GRBM_GFX_INDEX register */
701 reg_gfx_index
.u32All
= 0;
702 reg_gfx_index
.bits
.sh_broadcast_writes
= 1;
703 reg_gfx_index
.bits
.instance_broadcast_writes
= 1;
704 reg_gfx_index
.bits
.se_broadcast_writes
= 1;
707 packets_vec
[2].ordinal1
= packets_vec
[0].ordinal1
;
708 packets_vec
[2].bitfields2
.reg_offset
=
709 GRBM_GFX_INDEX
/ (sizeof(uint32_t)) -
712 packets_vec
[2].bitfields2
.insert_vmid
= 0;
713 packets_vec
[2].reg_data
[0] = reg_gfx_index
.u32All
;
715 status
= dbgdev_diq_submit_ib(
717 wac_info
->process
->pasid
,
723 pr_err("amdkfd: Failed to submit IB to DIQ\n");
725 kfd_gtt_sa_free(dbgdev
->dev
, mem_obj
);
730 static int dbgdev_wave_control_nodiq(struct kfd_dbgdev
*dbgdev
,
731 struct dbg_wave_control_info
*wac_info
)
734 union SQ_CMD_BITS reg_sq_cmd
;
735 union GRBM_GFX_INDEX_BITS reg_gfx_index
;
736 struct kfd_process_device
*pdd
;
738 BUG_ON(!dbgdev
|| !dbgdev
->dev
|| !wac_info
);
740 reg_sq_cmd
.u32All
= 0;
742 /* taking the VMID for that process on the safe way using PDD */
743 pdd
= kfd_get_process_device_data(dbgdev
->dev
, wac_info
->process
);
746 pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
749 status
= dbgdev_wave_control_set_registers(wac_info
, ®_sq_cmd
,
752 pr_err("amdkfd: Failed to set wave control registers\n");
756 /* for non DIQ we need to patch the VMID: */
758 reg_sq_cmd
.bits
.vm_id
= pdd
->qpd
.vmid
;
760 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
762 pr_debug("\t\t mode is: %u\n", wac_info
->mode
);
763 pr_debug("\t\t operand is: %u\n", wac_info
->operand
);
764 pr_debug("\t\t trap id is: %u\n", wac_info
->trapId
);
765 pr_debug("\t\t msg value is: %u\n",
766 wac_info
->dbgWave_msg
.DbgWaveMsg
.WaveMsgInfoGen2
.Value
);
767 pr_debug("\t\t vmid is: %u\n", pdd
->qpd
.vmid
);
769 pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd
.bitfields
.check_vmid
);
770 pr_debug("\t\t command is : %u\n", reg_sq_cmd
.bitfields
.cmd
);
771 pr_debug("\t\t queue id is : %u\n", reg_sq_cmd
.bitfields
.queue_id
);
772 pr_debug("\t\t simd id is : %u\n", reg_sq_cmd
.bitfields
.simd_id
);
773 pr_debug("\t\t mode is : %u\n", reg_sq_cmd
.bitfields
.mode
);
774 pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd
.bitfields
.vm_id
);
775 pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd
.bitfields
.wave_id
);
777 pr_debug("\t\t ibw is : %u\n",
778 reg_gfx_index
.bitfields
.instance_broadcast_writes
);
779 pr_debug("\t\t ii is : %u\n",
780 reg_gfx_index
.bitfields
.instance_index
);
781 pr_debug("\t\t sebw is : %u\n",
782 reg_gfx_index
.bitfields
.se_broadcast_writes
);
783 pr_debug("\t\t se_ind is : %u\n", reg_gfx_index
.bitfields
.se_index
);
784 pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index
.bitfields
.sh_index
);
785 pr_debug("\t\t sbw is : %u\n",
786 reg_gfx_index
.bitfields
.sh_broadcast_writes
);
788 pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
790 return dbgdev
->dev
->kfd2kgd
->wave_control_execute(dbgdev
->dev
->kgd
,
791 reg_gfx_index
.u32All
,
795 int dbgdev_wave_reset_wavefronts(struct kfd_dev
*dev
, struct kfd_process
*p
)
799 union SQ_CMD_BITS reg_sq_cmd
;
800 union GRBM_GFX_INDEX_BITS reg_gfx_index
;
801 struct kfd_process_device
*pdd
;
802 struct dbg_wave_control_info wac_info
;
804 int first_vmid_to_scan
= 8;
805 int last_vmid_to_scan
= 15;
807 first_vmid_to_scan
= ffs(dev
->shared_resources
.compute_vmid_bitmap
) - 1;
808 temp
= dev
->shared_resources
.compute_vmid_bitmap
>> first_vmid_to_scan
;
809 last_vmid_to_scan
= first_vmid_to_scan
+ ffz(temp
);
811 reg_sq_cmd
.u32All
= 0;
814 wac_info
.mode
= HSA_DBG_WAVEMODE_BROADCAST_PROCESS
;
815 wac_info
.operand
= HSA_DBG_WAVEOP_KILL
;
817 pr_debug("Killing all process wavefronts\n");
819 /* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
820 * ATC_VMID15_PASID_MAPPING
821 * to check which VMID the current process is mapped to. */
823 for (vmid
= first_vmid_to_scan
; vmid
<= last_vmid_to_scan
; vmid
++) {
824 if (dev
->kfd2kgd
->get_atc_vmid_pasid_mapping_valid
826 if (dev
->kfd2kgd
->get_atc_vmid_pasid_mapping_valid
827 (dev
->kgd
, vmid
) == p
->pasid
) {
828 pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
835 if (vmid
> last_vmid_to_scan
) {
836 pr_err("amdkfd: didn't found vmid for pasid (%d)\n", p
->pasid
);
840 /* taking the VMID for that process on the safe way using PDD */
841 pdd
= kfd_get_process_device_data(dev
, p
);
845 status
= dbgdev_wave_control_set_registers(&wac_info
, ®_sq_cmd
,
850 /* for non DIQ we need to patch the VMID: */
851 reg_sq_cmd
.bits
.vm_id
= vmid
;
853 dev
->kfd2kgd
->wave_control_execute(dev
->kgd
,
854 reg_gfx_index
.u32All
,
860 void kfd_dbgdev_init(struct kfd_dbgdev
*pdbgdev
, struct kfd_dev
*pdev
,
861 enum DBGDEV_TYPE type
)
863 BUG_ON(!pdbgdev
|| !pdev
);
867 pdbgdev
->type
= type
;
871 case DBGDEV_TYPE_NODIQ
:
872 pdbgdev
->dbgdev_register
= dbgdev_register_nodiq
;
873 pdbgdev
->dbgdev_unregister
= dbgdev_unregister_nodiq
;
874 pdbgdev
->dbgdev_wave_control
= dbgdev_wave_control_nodiq
;
875 pdbgdev
->dbgdev_address_watch
= dbgdev_address_watch_nodiq
;
877 case DBGDEV_TYPE_DIQ
:
879 pdbgdev
->dbgdev_register
= dbgdev_register_diq
;
880 pdbgdev
->dbgdev_unregister
= dbgdev_unregister_diq
;
881 pdbgdev
->dbgdev_wave_control
= dbgdev_wave_control_diq
;
882 pdbgdev
->dbgdev_address_watch
= dbgdev_address_watch_diq
;