2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/slab.h>
25 #include <linux/mutex.h>
26 #include "kfd_device_queue_manager.h"
27 #include "kfd_kernel_queue.h"
30 static inline void inc_wptr(unsigned int *wptr
, unsigned int increment_bytes
,
31 unsigned int buffer_size_bytes
)
33 unsigned int temp
= *wptr
+ increment_bytes
/ sizeof(uint32_t);
35 WARN((temp
* sizeof(uint32_t)) > buffer_size_bytes
,
36 "Runlist IB overflow");
40 static void pm_calc_rlib_size(struct packet_manager
*pm
,
41 unsigned int *rlib_size
,
42 bool *over_subscription
)
44 unsigned int process_count
, queue_count
, compute_queue_count
;
45 unsigned int map_queue_size
;
46 unsigned int max_proc_per_quantum
= 1;
47 struct kfd_dev
*dev
= pm
->dqm
->dev
;
49 process_count
= pm
->dqm
->processes_count
;
50 queue_count
= pm
->dqm
->queue_count
;
51 compute_queue_count
= queue_count
- pm
->dqm
->sdma_queue_count
-
52 pm
->dqm
->xgmi_sdma_queue_count
;
54 /* check if there is over subscription
55 * Note: the arbitration between the number of VMIDs and
56 * hws_max_conc_proc has been done in
57 * kgd2kfd_device_init().
59 *over_subscription
= false;
61 if (dev
->max_proc_per_quantum
> 1)
62 max_proc_per_quantum
= dev
->max_proc_per_quantum
;
64 if ((process_count
> max_proc_per_quantum
) ||
65 compute_queue_count
> get_queues_num(pm
->dqm
)) {
66 *over_subscription
= true;
67 pr_debug("Over subscribed runlist\n");
70 map_queue_size
= pm
->pmf
->map_queues_size
;
71 /* calculate run list ib allocation size */
72 *rlib_size
= process_count
* pm
->pmf
->map_process_size
+
73 queue_count
* map_queue_size
;
76 * Increase the allocation size in case we need a chained run list
77 * when over subscription
79 if (*over_subscription
)
80 *rlib_size
+= pm
->pmf
->runlist_size
;
82 pr_debug("runlist ib size %d\n", *rlib_size
);
85 static int pm_allocate_runlist_ib(struct packet_manager
*pm
,
86 unsigned int **rl_buffer
,
87 uint64_t *rl_gpu_buffer
,
88 unsigned int *rl_buffer_size
,
89 bool *is_over_subscription
)
93 if (WARN_ON(pm
->allocated
))
96 pm_calc_rlib_size(pm
, rl_buffer_size
, is_over_subscription
);
98 mutex_lock(&pm
->lock
);
100 retval
= kfd_gtt_sa_allocate(pm
->dqm
->dev
, *rl_buffer_size
,
104 pr_err("Failed to allocate runlist IB\n");
108 *(void **)rl_buffer
= pm
->ib_buffer_obj
->cpu_ptr
;
109 *rl_gpu_buffer
= pm
->ib_buffer_obj
->gpu_addr
;
111 memset(*rl_buffer
, 0, *rl_buffer_size
);
112 pm
->allocated
= true;
115 mutex_unlock(&pm
->lock
);
119 static int pm_create_runlist_ib(struct packet_manager
*pm
,
120 struct list_head
*queues
,
121 uint64_t *rl_gpu_addr
,
122 size_t *rl_size_bytes
)
124 unsigned int alloc_size_bytes
;
125 unsigned int *rl_buffer
, rl_wptr
, i
;
126 int retval
, proccesses_mapped
;
127 struct device_process_node
*cur
;
128 struct qcm_process_device
*qpd
;
130 struct kernel_queue
*kq
;
131 bool is_over_subscription
;
133 rl_wptr
= retval
= proccesses_mapped
= 0;
135 retval
= pm_allocate_runlist_ib(pm
, &rl_buffer
, rl_gpu_addr
,
136 &alloc_size_bytes
, &is_over_subscription
);
140 *rl_size_bytes
= alloc_size_bytes
;
141 pm
->ib_size_bytes
= alloc_size_bytes
;
143 pr_debug("Building runlist ib process count: %d queues count %d\n",
144 pm
->dqm
->processes_count
, pm
->dqm
->queue_count
);
146 /* build the run list ib packet */
147 list_for_each_entry(cur
, queues
, list
) {
149 /* build map process packet */
150 if (proccesses_mapped
>= pm
->dqm
->processes_count
) {
151 pr_debug("Not enough space left in runlist IB\n");
156 retval
= pm
->pmf
->map_process(pm
, &rl_buffer
[rl_wptr
], qpd
);
161 inc_wptr(&rl_wptr
, pm
->pmf
->map_process_size
,
164 list_for_each_entry(kq
, &qpd
->priv_queue_list
, list
) {
165 if (!kq
->queue
->properties
.is_active
)
168 pr_debug("static_queue, mapping kernel q %d, is debug status %d\n",
169 kq
->queue
->queue
, qpd
->is_debug
);
171 retval
= pm
->pmf
->map_queues(pm
,
179 pm
->pmf
->map_queues_size
,
183 list_for_each_entry(q
, &qpd
->queues_list
, list
) {
184 if (!q
->properties
.is_active
)
187 pr_debug("static_queue, mapping user queue %d, is debug status %d\n",
188 q
->queue
, qpd
->is_debug
);
190 retval
= pm
->pmf
->map_queues(pm
,
199 pm
->pmf
->map_queues_size
,
204 pr_debug("Finished map process and queues to runlist\n");
206 if (is_over_subscription
) {
207 if (!pm
->is_over_subscription
)
208 pr_warn("Runlist is getting oversubscribed. Expect reduced ROCm performance.\n");
209 retval
= pm
->pmf
->runlist(pm
, &rl_buffer
[rl_wptr
],
211 alloc_size_bytes
/ sizeof(uint32_t),
214 pm
->is_over_subscription
= is_over_subscription
;
216 for (i
= 0; i
< alloc_size_bytes
/ sizeof(uint32_t); i
++)
217 pr_debug("0x%2X ", rl_buffer
[i
]);
223 int pm_init(struct packet_manager
*pm
, struct device_queue_manager
*dqm
)
225 switch (dqm
->dev
->device_info
->asic_family
) {
228 /* PM4 packet structures on CIK are the same as on VI */
236 pm
->pmf
= &kfd_vi_pm_funcs
;
247 pm
->pmf
= &kfd_v9_pm_funcs
;
250 WARN(1, "Unexpected ASIC family %u",
251 dqm
->dev
->device_info
->asic_family
);
256 mutex_init(&pm
->lock
);
257 pm
->priv_queue
= kernel_queue_init(dqm
->dev
, KFD_QUEUE_TYPE_HIQ
);
258 if (!pm
->priv_queue
) {
259 mutex_destroy(&pm
->lock
);
262 pm
->allocated
= false;
267 void pm_uninit(struct packet_manager
*pm
, bool hanging
)
269 mutex_destroy(&pm
->lock
);
270 kernel_queue_uninit(pm
->priv_queue
, hanging
);
273 int pm_send_set_resources(struct packet_manager
*pm
,
274 struct scheduling_resources
*res
)
276 uint32_t *buffer
, size
;
279 size
= pm
->pmf
->set_resources_size
;
280 mutex_lock(&pm
->lock
);
281 kq_acquire_packet_buffer(pm
->priv_queue
,
282 size
/ sizeof(uint32_t),
283 (unsigned int **)&buffer
);
285 pr_err("Failed to allocate buffer on kernel queue\n");
290 retval
= pm
->pmf
->set_resources(pm
, buffer
, res
);
292 kq_submit_packet(pm
->priv_queue
);
294 kq_rollback_packet(pm
->priv_queue
);
297 mutex_unlock(&pm
->lock
);
302 int pm_send_runlist(struct packet_manager
*pm
, struct list_head
*dqm_queues
)
304 uint64_t rl_gpu_ib_addr
;
306 size_t rl_ib_size
, packet_size_dwords
;
309 retval
= pm_create_runlist_ib(pm
, dqm_queues
, &rl_gpu_ib_addr
,
312 goto fail_create_runlist_ib
;
314 pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr
);
316 packet_size_dwords
= pm
->pmf
->runlist_size
/ sizeof(uint32_t);
317 mutex_lock(&pm
->lock
);
319 retval
= kq_acquire_packet_buffer(pm
->priv_queue
,
320 packet_size_dwords
, &rl_buffer
);
322 goto fail_acquire_packet_buffer
;
324 retval
= pm
->pmf
->runlist(pm
, rl_buffer
, rl_gpu_ib_addr
,
325 rl_ib_size
/ sizeof(uint32_t), false);
327 goto fail_create_runlist
;
329 kq_submit_packet(pm
->priv_queue
);
331 mutex_unlock(&pm
->lock
);
336 kq_rollback_packet(pm
->priv_queue
);
337 fail_acquire_packet_buffer
:
338 mutex_unlock(&pm
->lock
);
339 fail_create_runlist_ib
:
344 int pm_send_query_status(struct packet_manager
*pm
, uint64_t fence_address
,
345 uint32_t fence_value
)
347 uint32_t *buffer
, size
;
350 if (WARN_ON(!fence_address
))
353 size
= pm
->pmf
->query_status_size
;
354 mutex_lock(&pm
->lock
);
355 kq_acquire_packet_buffer(pm
->priv_queue
,
356 size
/ sizeof(uint32_t), (unsigned int **)&buffer
);
358 pr_err("Failed to allocate buffer on kernel queue\n");
363 retval
= pm
->pmf
->query_status(pm
, buffer
, fence_address
, fence_value
);
365 kq_submit_packet(pm
->priv_queue
);
367 kq_rollback_packet(pm
->priv_queue
);
370 mutex_unlock(&pm
->lock
);
374 int pm_send_unmap_queue(struct packet_manager
*pm
, enum kfd_queue_type type
,
375 enum kfd_unmap_queues_filter filter
,
376 uint32_t filter_param
, bool reset
,
377 unsigned int sdma_engine
)
379 uint32_t *buffer
, size
;
382 size
= pm
->pmf
->unmap_queues_size
;
383 mutex_lock(&pm
->lock
);
384 kq_acquire_packet_buffer(pm
->priv_queue
,
385 size
/ sizeof(uint32_t), (unsigned int **)&buffer
);
387 pr_err("Failed to allocate buffer on kernel queue\n");
392 retval
= pm
->pmf
->unmap_queues(pm
, buffer
, type
, filter
, filter_param
,
395 kq_submit_packet(pm
->priv_queue
);
397 kq_rollback_packet(pm
->priv_queue
);
400 mutex_unlock(&pm
->lock
);
404 void pm_release_ib(struct packet_manager
*pm
)
406 mutex_lock(&pm
->lock
);
408 kfd_gtt_sa_free(pm
->dqm
->dev
, pm
->ib_buffer_obj
);
409 pm
->allocated
= false;
411 mutex_unlock(&pm
->lock
);
414 #if defined(CONFIG_DEBUG_FS)
416 int pm_debugfs_runlist(struct seq_file
*m
, void *data
)
418 struct packet_manager
*pm
= data
;
420 mutex_lock(&pm
->lock
);
422 if (!pm
->allocated
) {
423 seq_puts(m
, " No active runlist\n");
427 seq_hex_dump(m
, " ", DUMP_PREFIX_OFFSET
, 32, 4,
428 pm
->ib_buffer_obj
->cpu_ptr
, pm
->ib_size_bytes
, false);
431 mutex_unlock(&pm
->lock
);
435 int pm_debugfs_hang_hws(struct packet_manager
*pm
)
437 uint32_t *buffer
, size
;
440 size
= pm
->pmf
->query_status_size
;
441 mutex_lock(&pm
->lock
);
442 kq_acquire_packet_buffer(pm
->priv_queue
,
443 size
/ sizeof(uint32_t), (unsigned int **)&buffer
);
445 pr_err("Failed to allocate buffer on kernel queue\n");
449 memset(buffer
, 0x55, size
);
450 kq_submit_packet(pm
->priv_queue
);
452 pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
453 buffer
[0], buffer
[1], buffer
[2], buffer
[3],
454 buffer
[4], buffer
[5], buffer
[6]);
456 mutex_unlock(&pm
->lock
);