2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/slab.h>
25 #include <linux/mutex.h>
26 #include "kfd_device_queue_manager.h"
27 #include "kfd_kernel_queue.h"
30 static inline void inc_wptr(unsigned int *wptr
, unsigned int increment_bytes
,
31 unsigned int buffer_size_bytes
)
33 unsigned int temp
= *wptr
+ increment_bytes
/ sizeof(uint32_t);
35 WARN((temp
* sizeof(uint32_t)) > buffer_size_bytes
,
36 "Runlist IB overflow");
40 static void pm_calc_rlib_size(struct packet_manager
*pm
,
41 unsigned int *rlib_size
,
42 bool *over_subscription
)
44 unsigned int process_count
, queue_count
, compute_queue_count
;
45 unsigned int map_queue_size
;
46 unsigned int max_proc_per_quantum
= 1;
47 struct kfd_dev
*dev
= pm
->dqm
->dev
;
49 process_count
= pm
->dqm
->processes_count
;
50 queue_count
= pm
->dqm
->queue_count
;
51 compute_queue_count
= queue_count
- pm
->dqm
->sdma_queue_count
;
53 /* check if there is over subscription
54 * Note: the arbitration between the number of VMIDs and
55 * hws_max_conc_proc has been done in
56 * kgd2kfd_device_init().
58 *over_subscription
= false;
60 if (dev
->max_proc_per_quantum
> 1)
61 max_proc_per_quantum
= dev
->max_proc_per_quantum
;
63 if ((process_count
> max_proc_per_quantum
) ||
64 compute_queue_count
> get_queues_num(pm
->dqm
)) {
65 *over_subscription
= true;
66 pr_debug("Over subscribed runlist\n");
69 map_queue_size
= pm
->pmf
->map_queues_size
;
70 /* calculate run list ib allocation size */
71 *rlib_size
= process_count
* pm
->pmf
->map_process_size
+
72 queue_count
* map_queue_size
;
75 * Increase the allocation size in case we need a chained run list
76 * when over subscription
78 if (*over_subscription
)
79 *rlib_size
+= pm
->pmf
->runlist_size
;
81 pr_debug("runlist ib size %d\n", *rlib_size
);
84 static int pm_allocate_runlist_ib(struct packet_manager
*pm
,
85 unsigned int **rl_buffer
,
86 uint64_t *rl_gpu_buffer
,
87 unsigned int *rl_buffer_size
,
88 bool *is_over_subscription
)
92 if (WARN_ON(pm
->allocated
))
95 pm_calc_rlib_size(pm
, rl_buffer_size
, is_over_subscription
);
97 mutex_lock(&pm
->lock
);
99 retval
= kfd_gtt_sa_allocate(pm
->dqm
->dev
, *rl_buffer_size
,
103 pr_err("Failed to allocate runlist IB\n");
107 *(void **)rl_buffer
= pm
->ib_buffer_obj
->cpu_ptr
;
108 *rl_gpu_buffer
= pm
->ib_buffer_obj
->gpu_addr
;
110 memset(*rl_buffer
, 0, *rl_buffer_size
);
111 pm
->allocated
= true;
114 mutex_unlock(&pm
->lock
);
118 static int pm_create_runlist_ib(struct packet_manager
*pm
,
119 struct list_head
*queues
,
120 uint64_t *rl_gpu_addr
,
121 size_t *rl_size_bytes
)
123 unsigned int alloc_size_bytes
;
124 unsigned int *rl_buffer
, rl_wptr
, i
;
125 int retval
, proccesses_mapped
;
126 struct device_process_node
*cur
;
127 struct qcm_process_device
*qpd
;
129 struct kernel_queue
*kq
;
130 bool is_over_subscription
;
132 rl_wptr
= retval
= proccesses_mapped
= 0;
134 retval
= pm_allocate_runlist_ib(pm
, &rl_buffer
, rl_gpu_addr
,
135 &alloc_size_bytes
, &is_over_subscription
);
139 *rl_size_bytes
= alloc_size_bytes
;
140 pm
->ib_size_bytes
= alloc_size_bytes
;
142 pr_debug("Building runlist ib process count: %d queues count %d\n",
143 pm
->dqm
->processes_count
, pm
->dqm
->queue_count
);
145 /* build the run list ib packet */
146 list_for_each_entry(cur
, queues
, list
) {
148 /* build map process packet */
149 if (proccesses_mapped
>= pm
->dqm
->processes_count
) {
150 pr_debug("Not enough space left in runlist IB\n");
155 retval
= pm
->pmf
->map_process(pm
, &rl_buffer
[rl_wptr
], qpd
);
160 inc_wptr(&rl_wptr
, pm
->pmf
->map_process_size
,
163 list_for_each_entry(kq
, &qpd
->priv_queue_list
, list
) {
164 if (!kq
->queue
->properties
.is_active
)
167 pr_debug("static_queue, mapping kernel q %d, is debug status %d\n",
168 kq
->queue
->queue
, qpd
->is_debug
);
170 retval
= pm
->pmf
->map_queues(pm
,
178 pm
->pmf
->map_queues_size
,
182 list_for_each_entry(q
, &qpd
->queues_list
, list
) {
183 if (!q
->properties
.is_active
)
186 pr_debug("static_queue, mapping user queue %d, is debug status %d\n",
187 q
->queue
, qpd
->is_debug
);
189 retval
= pm
->pmf
->map_queues(pm
,
198 pm
->pmf
->map_queues_size
,
203 pr_debug("Finished map process and queues to runlist\n");
205 if (is_over_subscription
)
206 retval
= pm
->pmf
->runlist(pm
, &rl_buffer
[rl_wptr
],
208 alloc_size_bytes
/ sizeof(uint32_t),
211 for (i
= 0; i
< alloc_size_bytes
/ sizeof(uint32_t); i
++)
212 pr_debug("0x%2X ", rl_buffer
[i
]);
218 int pm_init(struct packet_manager
*pm
, struct device_queue_manager
*dqm
)
220 switch (dqm
->dev
->device_info
->asic_family
) {
223 /* PM4 packet structures on CIK are the same as on VI */
230 pm
->pmf
= &kfd_vi_pm_funcs
;
236 pm
->pmf
= &kfd_v9_pm_funcs
;
239 WARN(1, "Unexpected ASIC family %u",
240 dqm
->dev
->device_info
->asic_family
);
245 mutex_init(&pm
->lock
);
246 pm
->priv_queue
= kernel_queue_init(dqm
->dev
, KFD_QUEUE_TYPE_HIQ
);
247 if (!pm
->priv_queue
) {
248 mutex_destroy(&pm
->lock
);
251 pm
->allocated
= false;
256 void pm_uninit(struct packet_manager
*pm
)
258 mutex_destroy(&pm
->lock
);
259 kernel_queue_uninit(pm
->priv_queue
);
262 int pm_send_set_resources(struct packet_manager
*pm
,
263 struct scheduling_resources
*res
)
265 uint32_t *buffer
, size
;
268 size
= pm
->pmf
->set_resources_size
;
269 mutex_lock(&pm
->lock
);
270 pm
->priv_queue
->ops
.acquire_packet_buffer(pm
->priv_queue
,
271 size
/ sizeof(uint32_t),
272 (unsigned int **)&buffer
);
274 pr_err("Failed to allocate buffer on kernel queue\n");
279 retval
= pm
->pmf
->set_resources(pm
, buffer
, res
);
281 pm
->priv_queue
->ops
.submit_packet(pm
->priv_queue
);
283 pm
->priv_queue
->ops
.rollback_packet(pm
->priv_queue
);
286 mutex_unlock(&pm
->lock
);
291 int pm_send_runlist(struct packet_manager
*pm
, struct list_head
*dqm_queues
)
293 uint64_t rl_gpu_ib_addr
;
295 size_t rl_ib_size
, packet_size_dwords
;
298 retval
= pm_create_runlist_ib(pm
, dqm_queues
, &rl_gpu_ib_addr
,
301 goto fail_create_runlist_ib
;
303 pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr
);
305 packet_size_dwords
= pm
->pmf
->runlist_size
/ sizeof(uint32_t);
306 mutex_lock(&pm
->lock
);
308 retval
= pm
->priv_queue
->ops
.acquire_packet_buffer(pm
->priv_queue
,
309 packet_size_dwords
, &rl_buffer
);
311 goto fail_acquire_packet_buffer
;
313 retval
= pm
->pmf
->runlist(pm
, rl_buffer
, rl_gpu_ib_addr
,
314 rl_ib_size
/ sizeof(uint32_t), false);
316 goto fail_create_runlist
;
318 pm
->priv_queue
->ops
.submit_packet(pm
->priv_queue
);
320 mutex_unlock(&pm
->lock
);
325 pm
->priv_queue
->ops
.rollback_packet(pm
->priv_queue
);
326 fail_acquire_packet_buffer
:
327 mutex_unlock(&pm
->lock
);
328 fail_create_runlist_ib
:
333 int pm_send_query_status(struct packet_manager
*pm
, uint64_t fence_address
,
334 uint32_t fence_value
)
336 uint32_t *buffer
, size
;
339 if (WARN_ON(!fence_address
))
342 size
= pm
->pmf
->query_status_size
;
343 mutex_lock(&pm
->lock
);
344 pm
->priv_queue
->ops
.acquire_packet_buffer(pm
->priv_queue
,
345 size
/ sizeof(uint32_t), (unsigned int **)&buffer
);
347 pr_err("Failed to allocate buffer on kernel queue\n");
352 retval
= pm
->pmf
->query_status(pm
, buffer
, fence_address
, fence_value
);
354 pm
->priv_queue
->ops
.submit_packet(pm
->priv_queue
);
356 pm
->priv_queue
->ops
.rollback_packet(pm
->priv_queue
);
359 mutex_unlock(&pm
->lock
);
363 int pm_send_unmap_queue(struct packet_manager
*pm
, enum kfd_queue_type type
,
364 enum kfd_unmap_queues_filter filter
,
365 uint32_t filter_param
, bool reset
,
366 unsigned int sdma_engine
)
368 uint32_t *buffer
, size
;
371 size
= pm
->pmf
->unmap_queues_size
;
372 mutex_lock(&pm
->lock
);
373 pm
->priv_queue
->ops
.acquire_packet_buffer(pm
->priv_queue
,
374 size
/ sizeof(uint32_t), (unsigned int **)&buffer
);
376 pr_err("Failed to allocate buffer on kernel queue\n");
381 retval
= pm
->pmf
->unmap_queues(pm
, buffer
, type
, filter
, filter_param
,
384 pm
->priv_queue
->ops
.submit_packet(pm
->priv_queue
);
386 pm
->priv_queue
->ops
.rollback_packet(pm
->priv_queue
);
389 mutex_unlock(&pm
->lock
);
393 void pm_release_ib(struct packet_manager
*pm
)
395 mutex_lock(&pm
->lock
);
397 kfd_gtt_sa_free(pm
->dqm
->dev
, pm
->ib_buffer_obj
);
398 pm
->allocated
= false;
400 mutex_unlock(&pm
->lock
);
403 #if defined(CONFIG_DEBUG_FS)
405 int pm_debugfs_runlist(struct seq_file
*m
, void *data
)
407 struct packet_manager
*pm
= data
;
409 mutex_lock(&pm
->lock
);
411 if (!pm
->allocated
) {
412 seq_puts(m
, " No active runlist\n");
416 seq_hex_dump(m
, " ", DUMP_PREFIX_OFFSET
, 32, 4,
417 pm
->ib_buffer_obj
->cpu_ptr
, pm
->ib_size_bytes
, false);
420 mutex_unlock(&pm
->lock
);
424 int pm_debugfs_hang_hws(struct packet_manager
*pm
)
426 uint32_t *buffer
, size
;
429 size
= pm
->pmf
->query_status_size
;
430 mutex_lock(&pm
->lock
);
431 pm
->priv_queue
->ops
.acquire_packet_buffer(pm
->priv_queue
,
432 size
/ sizeof(uint32_t), (unsigned int **)&buffer
);
434 pr_err("Failed to allocate buffer on kernel queue\n");
438 memset(buffer
, 0x55, size
);
439 pm
->priv_queue
->ops
.submit_packet(pm
->priv_queue
);
441 pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
442 buffer
[0], buffer
[1], buffer
[2], buffer
[3],
443 buffer
[4], buffer
[5], buffer
[6]);
445 mutex_unlock(&pm
->lock
);