2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/mman.h>
25 #include <linux/slab.h>
27 #include <linux/idr.h>
30 * This extension supports a kernel level doorbells management for the
31 * kernel queues using the first doorbell page reserved for the kernel.
34 static DEFINE_IDA(doorbell_ida
);
35 static unsigned int max_doorbell_slices
;
38 * Each device exposes a doorbell aperture, a PCI MMIO aperture that
39 * receives 32-bit writes that are passed to queues as wptr values.
40 * The doorbells are intended to be written by applications as part
41 * of queueing work on user-mode queues.
42 * We assign doorbells to applications in PAGE_SIZE-sized and aligned chunks.
43 * We map the doorbell address space into user-mode when a process creates
44 * its first queue on each device.
45 * Although the mapping is done by KFD, it is equivalent to an mmap of
46 * the /dev/kfd with the particular device encoded in the mmap offset.
47 * There will be other uses for mmap of /dev/kfd, so only a range of
48 * offsets (KFD_MMAP_DOORBELL_START-END) is used for doorbells.
51 /* # of doorbell bytes allocated for each process. */
52 size_t kfd_doorbell_process_slice(struct kfd_dev
*kfd
)
54 return roundup(kfd
->device_info
->doorbell_size
*
55 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS
,
59 /* Doorbell calculations for device init. */
60 int kfd_doorbell_init(struct kfd_dev
*kfd
)
62 size_t doorbell_start_offset
;
63 size_t doorbell_aperture_size
;
64 size_t doorbell_process_limit
;
67 * We start with calculations in bytes because the input data might
68 * only be byte-aligned.
69 * Only after we have done the rounding can we assume any alignment.
72 doorbell_start_offset
=
73 roundup(kfd
->shared_resources
.doorbell_start_offset
,
74 kfd_doorbell_process_slice(kfd
));
76 doorbell_aperture_size
=
77 rounddown(kfd
->shared_resources
.doorbell_aperture_size
,
78 kfd_doorbell_process_slice(kfd
));
80 if (doorbell_aperture_size
> doorbell_start_offset
)
81 doorbell_process_limit
=
82 (doorbell_aperture_size
- doorbell_start_offset
) /
83 kfd_doorbell_process_slice(kfd
);
87 if (!max_doorbell_slices
||
88 doorbell_process_limit
< max_doorbell_slices
)
89 max_doorbell_slices
= doorbell_process_limit
;
91 kfd
->doorbell_base
= kfd
->shared_resources
.doorbell_physical_address
+
92 doorbell_start_offset
;
94 kfd
->doorbell_base_dw_offset
= doorbell_start_offset
/ sizeof(u32
);
96 kfd
->doorbell_kernel_ptr
= ioremap(kfd
->doorbell_base
,
97 kfd_doorbell_process_slice(kfd
));
99 if (!kfd
->doorbell_kernel_ptr
)
102 pr_debug("Doorbell initialization:\n");
103 pr_debug("doorbell base == 0x%08lX\n",
104 (uintptr_t)kfd
->doorbell_base
);
106 pr_debug("doorbell_base_dw_offset == 0x%08lX\n",
107 kfd
->doorbell_base_dw_offset
);
109 pr_debug("doorbell_process_limit == 0x%08lX\n",
110 doorbell_process_limit
);
112 pr_debug("doorbell_kernel_offset == 0x%08lX\n",
113 (uintptr_t)kfd
->doorbell_base
);
115 pr_debug("doorbell aperture size == 0x%08lX\n",
116 kfd
->shared_resources
.doorbell_aperture_size
);
118 pr_debug("doorbell kernel address == %p\n", kfd
->doorbell_kernel_ptr
);
123 void kfd_doorbell_fini(struct kfd_dev
*kfd
)
125 if (kfd
->doorbell_kernel_ptr
)
126 iounmap(kfd
->doorbell_kernel_ptr
);
129 int kfd_doorbell_mmap(struct kfd_dev
*dev
, struct kfd_process
*process
,
130 struct vm_area_struct
*vma
)
135 * For simplicitly we only allow mapping of the entire doorbell
136 * allocation of a single device & process.
138 if (vma
->vm_end
- vma
->vm_start
!= kfd_doorbell_process_slice(dev
))
141 /* Calculate physical address of doorbell */
142 address
= kfd_get_process_doorbells(dev
, process
);
144 vma
->vm_flags
|= VM_IO
| VM_DONTCOPY
| VM_DONTEXPAND
| VM_NORESERVE
|
145 VM_DONTDUMP
| VM_PFNMAP
;
147 vma
->vm_page_prot
= pgprot_noncached(vma
->vm_page_prot
);
149 pr_debug("Mapping doorbell page\n"
150 " target user address == 0x%08llX\n"
151 " physical address == 0x%08llX\n"
152 " vm_flags == 0x%04lX\n"
153 " size == 0x%04lX\n",
154 (unsigned long long) vma
->vm_start
, address
, vma
->vm_flags
,
155 kfd_doorbell_process_slice(dev
));
158 return io_remap_pfn_range(vma
,
160 address
>> PAGE_SHIFT
,
161 kfd_doorbell_process_slice(dev
),
166 /* get kernel iomem pointer for a doorbell */
167 void __iomem
*kfd_get_kernel_doorbell(struct kfd_dev
*kfd
,
168 unsigned int *doorbell_off
)
172 mutex_lock(&kfd
->doorbell_mutex
);
173 inx
= find_first_zero_bit(kfd
->doorbell_available_index
,
174 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS
);
176 __set_bit(inx
, kfd
->doorbell_available_index
);
177 mutex_unlock(&kfd
->doorbell_mutex
);
179 if (inx
>= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS
)
182 inx
*= kfd
->device_info
->doorbell_size
/ sizeof(u32
);
185 * Calculating the kernel doorbell offset using the first
188 *doorbell_off
= kfd
->doorbell_base_dw_offset
+ inx
;
190 pr_debug("Get kernel queue doorbell\n"
191 " doorbell offset == 0x%08X\n"
192 " doorbell index == 0x%x\n",
195 return kfd
->doorbell_kernel_ptr
+ inx
;
198 void kfd_release_kernel_doorbell(struct kfd_dev
*kfd
, u32 __iomem
*db_addr
)
202 inx
= (unsigned int)(db_addr
- kfd
->doorbell_kernel_ptr
)
203 * sizeof(u32
) / kfd
->device_info
->doorbell_size
;
205 mutex_lock(&kfd
->doorbell_mutex
);
206 __clear_bit(inx
, kfd
->doorbell_available_index
);
207 mutex_unlock(&kfd
->doorbell_mutex
);
210 void write_kernel_doorbell(void __iomem
*db
, u32 value
)
214 pr_debug("Writing %d to doorbell address %p\n", value
, db
);
218 void write_kernel_doorbell64(void __iomem
*db
, u64 value
)
221 WARN(((unsigned long)db
& 7) != 0,
222 "Unaligned 64-bit doorbell");
223 writeq(value
, (u64 __iomem
*)db
);
224 pr_debug("writing %llu to doorbell address %p\n", value
, db
);
228 unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev
*kfd
,
229 struct kfd_process
*process
,
230 unsigned int doorbell_id
)
233 * doorbell_base_dw_offset accounts for doorbells taken by KGD.
234 * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to
235 * the process's doorbells. The offset returned is in dword
236 * units regardless of the ASIC-dependent doorbell size.
238 return kfd
->doorbell_base_dw_offset
+
239 process
->doorbell_index
240 * kfd_doorbell_process_slice(kfd
) / sizeof(u32
) +
241 doorbell_id
* kfd
->device_info
->doorbell_size
/ sizeof(u32
);
244 uint64_t kfd_get_number_elems(struct kfd_dev
*kfd
)
246 uint64_t num_of_elems
= (kfd
->shared_resources
.doorbell_aperture_size
-
247 kfd
->shared_resources
.doorbell_start_offset
) /
248 kfd_doorbell_process_slice(kfd
) + 1;
254 phys_addr_t
kfd_get_process_doorbells(struct kfd_dev
*dev
,
255 struct kfd_process
*process
)
257 return dev
->doorbell_base
+
258 process
->doorbell_index
* kfd_doorbell_process_slice(dev
);
261 int kfd_alloc_process_doorbells(struct kfd_process
*process
)
263 int r
= ida_simple_get(&doorbell_ida
, 1, max_doorbell_slices
,
266 process
->doorbell_index
= r
;
271 void kfd_free_process_doorbells(struct kfd_process
*process
)
273 if (process
->doorbell_index
)
274 ida_simple_remove(&doorbell_ida
, process
->doorbell_index
);