2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/slab.h>
25 #include <linux/mutex.h>
26 #include "kfd_device_queue_manager.h"
27 #include "kfd_kernel_queue.h"
29 #include "kfd_pm4_headers.h"
30 #include "kfd_pm4_opcodes.h"
32 static inline void inc_wptr(unsigned int *wptr
, unsigned int increment_bytes
,
33 unsigned int buffer_size_bytes
)
35 unsigned int temp
= *wptr
+ increment_bytes
/ sizeof(uint32_t);
37 BUG_ON((temp
* sizeof(uint32_t)) > buffer_size_bytes
);
41 static unsigned int build_pm4_header(unsigned int opcode
, size_t packet_size
)
43 union PM4_MES_TYPE_3_HEADER header
;
46 header
.opcode
= opcode
;
47 header
.count
= packet_size
/sizeof(uint32_t) - 2;
48 header
.type
= PM4_TYPE_3
;
53 static void pm_calc_rlib_size(struct packet_manager
*pm
,
54 unsigned int *rlib_size
,
55 bool *over_subscription
)
57 unsigned int process_count
, queue_count
;
59 BUG_ON(!pm
|| !rlib_size
|| !over_subscription
);
61 process_count
= pm
->dqm
->processes_count
;
62 queue_count
= pm
->dqm
->queue_count
;
64 /* check if there is over subscription*/
65 *over_subscription
= false;
66 if ((process_count
> 1) ||
67 queue_count
> PIPE_PER_ME_CP_SCHEDULING
* QUEUES_PER_PIPE
) {
68 *over_subscription
= true;
69 pr_debug("kfd: over subscribed runlist\n");
72 /* calculate run list ib allocation size */
73 *rlib_size
= process_count
* sizeof(struct pm4_map_process
) +
74 queue_count
* sizeof(struct pm4_map_queues
);
77 * Increase the allocation size in case we need a chained run list
78 * when over subscription
80 if (*over_subscription
)
81 *rlib_size
+= sizeof(struct pm4_runlist
);
83 pr_debug("kfd: runlist ib size %d\n", *rlib_size
);
86 static int pm_allocate_runlist_ib(struct packet_manager
*pm
,
87 unsigned int **rl_buffer
,
88 uint64_t *rl_gpu_buffer
,
89 unsigned int *rl_buffer_size
,
90 bool *is_over_subscription
)
95 BUG_ON(pm
->allocated
== true);
96 BUG_ON(is_over_subscription
== NULL
);
98 pm_calc_rlib_size(pm
, rl_buffer_size
, is_over_subscription
);
100 retval
= kfd_gtt_sa_allocate(pm
->dqm
->dev
, *rl_buffer_size
,
104 pr_err("kfd: failed to allocate runlist IB\n");
108 *(void **)rl_buffer
= pm
->ib_buffer_obj
->cpu_ptr
;
109 *rl_gpu_buffer
= pm
->ib_buffer_obj
->gpu_addr
;
111 memset(*rl_buffer
, 0, *rl_buffer_size
);
112 pm
->allocated
= true;
116 static int pm_create_runlist(struct packet_manager
*pm
, uint32_t *buffer
,
117 uint64_t ib
, size_t ib_size_in_dwords
, bool chain
)
119 struct pm4_runlist
*packet
;
121 BUG_ON(!pm
|| !buffer
|| !ib
);
123 packet
= (struct pm4_runlist
*)buffer
;
125 memset(buffer
, 0, sizeof(struct pm4_runlist
));
126 packet
->header
.u32all
= build_pm4_header(IT_RUN_LIST
,
127 sizeof(struct pm4_runlist
));
129 packet
->bitfields4
.ib_size
= ib_size_in_dwords
;
130 packet
->bitfields4
.chain
= chain
? 1 : 0;
131 packet
->bitfields4
.offload_polling
= 0;
132 packet
->bitfields4
.valid
= 1;
133 packet
->ordinal2
= lower_32_bits(ib
);
134 packet
->bitfields3
.ib_base_hi
= upper_32_bits(ib
);
139 static int pm_create_map_process(struct packet_manager
*pm
, uint32_t *buffer
,
140 struct qcm_process_device
*qpd
)
142 struct pm4_map_process
*packet
;
146 BUG_ON(!pm
|| !buffer
|| !qpd
);
148 packet
= (struct pm4_map_process
*)buffer
;
150 pr_debug("kfd: In func %s\n", __func__
);
152 memset(buffer
, 0, sizeof(struct pm4_map_process
));
154 packet
->header
.u32all
= build_pm4_header(IT_MAP_PROCESS
,
155 sizeof(struct pm4_map_process
));
156 packet
->bitfields2
.diq_enable
= (qpd
->is_debug
) ? 1 : 0;
157 packet
->bitfields2
.process_quantum
= 1;
158 packet
->bitfields2
.pasid
= qpd
->pqm
->process
->pasid
;
159 packet
->bitfields3
.page_table_base
= qpd
->page_table_base
;
160 packet
->bitfields10
.gds_size
= qpd
->gds_size
;
161 packet
->bitfields10
.num_gws
= qpd
->num_gws
;
162 packet
->bitfields10
.num_oac
= qpd
->num_oac
;
164 list_for_each_entry(cur
, &qpd
->queues_list
, list
)
166 packet
->bitfields10
.num_queues
= (qpd
->is_debug
) ? 0 : num_queues
;
168 packet
->sh_mem_config
= qpd
->sh_mem_config
;
169 packet
->sh_mem_bases
= qpd
->sh_mem_bases
;
170 packet
->sh_mem_ape1_base
= qpd
->sh_mem_ape1_base
;
171 packet
->sh_mem_ape1_limit
= qpd
->sh_mem_ape1_limit
;
173 packet
->gds_addr_lo
= lower_32_bits(qpd
->gds_context_area
);
174 packet
->gds_addr_hi
= upper_32_bits(qpd
->gds_context_area
);
179 static int pm_create_map_queue(struct packet_manager
*pm
, uint32_t *buffer
,
180 struct queue
*q
, bool is_static
)
182 struct pm4_map_queues
*packet
;
183 bool use_static
= is_static
;
185 BUG_ON(!pm
|| !buffer
|| !q
);
187 pr_debug("kfd: In func %s\n", __func__
);
189 packet
= (struct pm4_map_queues
*)buffer
;
190 memset(buffer
, 0, sizeof(struct pm4_map_queues
));
192 packet
->header
.u32all
= build_pm4_header(IT_MAP_QUEUES
,
193 sizeof(struct pm4_map_queues
));
194 packet
->bitfields2
.alloc_format
=
195 alloc_format__mes_map_queues__one_per_pipe
;
196 packet
->bitfields2
.num_queues
= 1;
197 packet
->bitfields2
.queue_sel
=
198 queue_sel__mes_map_queues__map_to_hws_determined_queue_slots
;
200 packet
->bitfields2
.vidmem
= (q
->properties
.is_interop
) ?
201 vidmem__mes_map_queues__uses_video_memory
:
202 vidmem__mes_map_queues__uses_no_video_memory
;
204 switch (q
->properties
.type
) {
205 case KFD_QUEUE_TYPE_COMPUTE
:
206 case KFD_QUEUE_TYPE_DIQ
:
207 packet
->bitfields2
.engine_sel
=
208 engine_sel__mes_map_queues__compute
;
210 case KFD_QUEUE_TYPE_SDMA
:
211 packet
->bitfields2
.engine_sel
=
212 engine_sel__mes_map_queues__sdma0
;
213 use_static
= false; /* no static queues under SDMA */
220 packet
->mes_map_queues_ordinals
[0].bitfields3
.doorbell_offset
=
221 q
->properties
.doorbell_off
;
223 packet
->mes_map_queues_ordinals
[0].bitfields3
.is_static
=
224 (use_static
== true) ? 1 : 0;
226 packet
->mes_map_queues_ordinals
[0].mqd_addr_lo
=
227 lower_32_bits(q
->gart_mqd_addr
);
229 packet
->mes_map_queues_ordinals
[0].mqd_addr_hi
=
230 upper_32_bits(q
->gart_mqd_addr
);
232 packet
->mes_map_queues_ordinals
[0].wptr_addr_lo
=
233 lower_32_bits((uint64_t)q
->properties
.write_ptr
);
235 packet
->mes_map_queues_ordinals
[0].wptr_addr_hi
=
236 upper_32_bits((uint64_t)q
->properties
.write_ptr
);
241 static int pm_create_runlist_ib(struct packet_manager
*pm
,
242 struct list_head
*queues
,
243 uint64_t *rl_gpu_addr
,
244 size_t *rl_size_bytes
)
246 unsigned int alloc_size_bytes
;
247 unsigned int *rl_buffer
, rl_wptr
, i
;
248 int retval
, proccesses_mapped
;
249 struct device_process_node
*cur
;
250 struct qcm_process_device
*qpd
;
252 struct kernel_queue
*kq
;
253 bool is_over_subscription
;
255 BUG_ON(!pm
|| !queues
|| !rl_size_bytes
|| !rl_gpu_addr
);
257 rl_wptr
= retval
= proccesses_mapped
= 0;
259 retval
= pm_allocate_runlist_ib(pm
, &rl_buffer
, rl_gpu_addr
,
260 &alloc_size_bytes
, &is_over_subscription
);
264 *rl_size_bytes
= alloc_size_bytes
;
266 pr_debug("kfd: In func %s\n", __func__
);
267 pr_debug("kfd: building runlist ib process count: %d queues count %d\n",
268 pm
->dqm
->processes_count
, pm
->dqm
->queue_count
);
270 /* build the run list ib packet */
271 list_for_each_entry(cur
, queues
, list
) {
273 /* build map process packet */
274 if (proccesses_mapped
>= pm
->dqm
->processes_count
) {
275 pr_debug("kfd: not enough space left in runlist IB\n");
280 retval
= pm_create_map_process(pm
, &rl_buffer
[rl_wptr
], qpd
);
285 inc_wptr(&rl_wptr
, sizeof(struct pm4_map_process
),
288 list_for_each_entry(kq
, &qpd
->priv_queue_list
, list
) {
289 if (kq
->queue
->properties
.is_active
!= true)
292 pr_debug("kfd: static_queue, mapping kernel q %d, is debug status %d\n",
293 kq
->queue
->queue
, qpd
->is_debug
);
295 retval
= pm_create_map_queue(pm
, &rl_buffer
[rl_wptr
],
296 kq
->queue
, qpd
->is_debug
);
301 sizeof(struct pm4_map_queues
),
305 list_for_each_entry(q
, &qpd
->queues_list
, list
) {
306 if (q
->properties
.is_active
!= true)
309 pr_debug("kfd: static_queue, mapping user queue %d, is debug status %d\n",
310 q
->queue
, qpd
->is_debug
);
312 retval
= pm_create_map_queue(pm
, &rl_buffer
[rl_wptr
],
319 sizeof(struct pm4_map_queues
),
324 pr_debug("kfd: finished map process and queues to runlist\n");
326 if (is_over_subscription
)
327 pm_create_runlist(pm
, &rl_buffer
[rl_wptr
], *rl_gpu_addr
,
328 alloc_size_bytes
/ sizeof(uint32_t), true);
330 for (i
= 0; i
< alloc_size_bytes
/ sizeof(uint32_t); i
++)
331 pr_debug("0x%2X ", rl_buffer
[i
]);
337 int pm_init(struct packet_manager
*pm
, struct device_queue_manager
*dqm
)
342 mutex_init(&pm
->lock
);
343 pm
->priv_queue
= kernel_queue_init(dqm
->dev
, KFD_QUEUE_TYPE_HIQ
);
344 if (pm
->priv_queue
== NULL
) {
345 mutex_destroy(&pm
->lock
);
348 pm
->allocated
= false;
353 void pm_uninit(struct packet_manager
*pm
)
357 mutex_destroy(&pm
->lock
);
358 kernel_queue_uninit(pm
->priv_queue
);
361 int pm_send_set_resources(struct packet_manager
*pm
,
362 struct scheduling_resources
*res
)
364 struct pm4_set_resources
*packet
;
368 pr_debug("kfd: In func %s\n", __func__
);
370 mutex_lock(&pm
->lock
);
371 pm
->priv_queue
->ops
.acquire_packet_buffer(pm
->priv_queue
,
372 sizeof(*packet
) / sizeof(uint32_t),
373 (unsigned int **)&packet
);
374 if (packet
== NULL
) {
375 mutex_unlock(&pm
->lock
);
376 pr_err("kfd: failed to allocate buffer on kernel queue\n");
380 memset(packet
, 0, sizeof(struct pm4_set_resources
));
381 packet
->header
.u32all
= build_pm4_header(IT_SET_RESOURCES
,
382 sizeof(struct pm4_set_resources
));
384 packet
->bitfields2
.queue_type
=
385 queue_type__mes_set_resources__hsa_interface_queue_hiq
;
386 packet
->bitfields2
.vmid_mask
= res
->vmid_mask
;
387 packet
->bitfields2
.unmap_latency
= KFD_UNMAP_LATENCY
;
388 packet
->bitfields7
.oac_mask
= res
->oac_mask
;
389 packet
->bitfields8
.gds_heap_base
= res
->gds_heap_base
;
390 packet
->bitfields8
.gds_heap_size
= res
->gds_heap_size
;
392 packet
->gws_mask_lo
= lower_32_bits(res
->gws_mask
);
393 packet
->gws_mask_hi
= upper_32_bits(res
->gws_mask
);
395 packet
->queue_mask_lo
= lower_32_bits(res
->queue_mask
);
396 packet
->queue_mask_hi
= upper_32_bits(res
->queue_mask
);
398 pm
->priv_queue
->ops
.submit_packet(pm
->priv_queue
);
400 mutex_unlock(&pm
->lock
);
405 int pm_send_runlist(struct packet_manager
*pm
, struct list_head
*dqm_queues
)
407 uint64_t rl_gpu_ib_addr
;
409 size_t rl_ib_size
, packet_size_dwords
;
412 BUG_ON(!pm
|| !dqm_queues
);
414 retval
= pm_create_runlist_ib(pm
, dqm_queues
, &rl_gpu_ib_addr
,
417 goto fail_create_runlist_ib
;
419 pr_debug("kfd: runlist IB address: 0x%llX\n", rl_gpu_ib_addr
);
421 packet_size_dwords
= sizeof(struct pm4_runlist
) / sizeof(uint32_t);
422 mutex_lock(&pm
->lock
);
424 retval
= pm
->priv_queue
->ops
.acquire_packet_buffer(pm
->priv_queue
,
425 packet_size_dwords
, &rl_buffer
);
427 goto fail_acquire_packet_buffer
;
429 retval
= pm_create_runlist(pm
, rl_buffer
, rl_gpu_ib_addr
,
430 rl_ib_size
/ sizeof(uint32_t), false);
432 goto fail_create_runlist
;
434 pm
->priv_queue
->ops
.submit_packet(pm
->priv_queue
);
436 mutex_unlock(&pm
->lock
);
441 pm
->priv_queue
->ops
.rollback_packet(pm
->priv_queue
);
442 fail_acquire_packet_buffer
:
443 mutex_unlock(&pm
->lock
);
444 fail_create_runlist_ib
:
445 if (pm
->allocated
== true)
450 int pm_send_query_status(struct packet_manager
*pm
, uint64_t fence_address
,
451 uint32_t fence_value
)
454 struct pm4_query_status
*packet
;
456 BUG_ON(!pm
|| !fence_address
);
458 mutex_lock(&pm
->lock
);
459 retval
= pm
->priv_queue
->ops
.acquire_packet_buffer(
461 sizeof(struct pm4_query_status
) / sizeof(uint32_t),
462 (unsigned int **)&packet
);
464 goto fail_acquire_packet_buffer
;
466 packet
->header
.u32all
= build_pm4_header(IT_QUERY_STATUS
,
467 sizeof(struct pm4_query_status
));
469 packet
->bitfields2
.context_id
= 0;
470 packet
->bitfields2
.interrupt_sel
=
471 interrupt_sel__mes_query_status__completion_status
;
472 packet
->bitfields2
.command
=
473 command__mes_query_status__fence_only_after_write_ack
;
475 packet
->addr_hi
= upper_32_bits((uint64_t)fence_address
);
476 packet
->addr_lo
= lower_32_bits((uint64_t)fence_address
);
477 packet
->data_hi
= upper_32_bits((uint64_t)fence_value
);
478 packet
->data_lo
= lower_32_bits((uint64_t)fence_value
);
480 pm
->priv_queue
->ops
.submit_packet(pm
->priv_queue
);
481 mutex_unlock(&pm
->lock
);
485 fail_acquire_packet_buffer
:
486 mutex_unlock(&pm
->lock
);
490 int pm_send_unmap_queue(struct packet_manager
*pm
, enum kfd_queue_type type
,
491 enum kfd_preempt_type_filter mode
,
492 uint32_t filter_param
, bool reset
,
493 unsigned int sdma_engine
)
497 struct pm4_unmap_queues
*packet
;
501 mutex_lock(&pm
->lock
);
502 retval
= pm
->priv_queue
->ops
.acquire_packet_buffer(
504 sizeof(struct pm4_unmap_queues
) / sizeof(uint32_t),
507 goto err_acquire_packet_buffer
;
509 packet
= (struct pm4_unmap_queues
*)buffer
;
510 memset(buffer
, 0, sizeof(struct pm4_unmap_queues
));
511 pr_debug("kfd: static_queue: unmapping queues: mode is %d , reset is %d , type is %d\n",
513 packet
->header
.u32all
= build_pm4_header(IT_UNMAP_QUEUES
,
514 sizeof(struct pm4_unmap_queues
));
516 case KFD_QUEUE_TYPE_COMPUTE
:
517 case KFD_QUEUE_TYPE_DIQ
:
518 packet
->bitfields2
.engine_sel
=
519 engine_sel__mes_unmap_queues__compute
;
521 case KFD_QUEUE_TYPE_SDMA
:
522 packet
->bitfields2
.engine_sel
=
523 engine_sel__mes_unmap_queues__sdma0
+ sdma_engine
;
531 packet
->bitfields2
.action
=
532 action__mes_unmap_queues__reset_queues
;
534 packet
->bitfields2
.action
=
535 action__mes_unmap_queues__preempt_queues
;
538 case KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE
:
539 packet
->bitfields2
.queue_sel
=
540 queue_sel__mes_unmap_queues__perform_request_on_specified_queues
;
541 packet
->bitfields2
.num_queues
= 1;
542 packet
->bitfields3b
.doorbell_offset0
= filter_param
;
544 case KFD_PREEMPT_TYPE_FILTER_BY_PASID
:
545 packet
->bitfields2
.queue_sel
=
546 queue_sel__mes_unmap_queues__perform_request_on_pasid_queues
;
547 packet
->bitfields3a
.pasid
= filter_param
;
549 case KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES
:
550 packet
->bitfields2
.queue_sel
=
551 queue_sel__mes_unmap_queues__perform_request_on_all_active_queues
;
553 case KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES
:
554 /* in this case, we do not preempt static queues */
555 packet
->bitfields2
.queue_sel
=
556 queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only
;
563 pm
->priv_queue
->ops
.submit_packet(pm
->priv_queue
);
565 mutex_unlock(&pm
->lock
);
568 err_acquire_packet_buffer
:
569 mutex_unlock(&pm
->lock
);
573 void pm_release_ib(struct packet_manager
*pm
)
577 mutex_lock(&pm
->lock
);
579 kfd_gtt_sa_free(pm
->dqm
->dev
, pm
->ib_buffer_obj
);
580 pm
->allocated
= false;
582 mutex_unlock(&pm
->lock
);