1 //===------ API.cpp - Kernel Language (CUDA/HIP) entry points ----- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 //===----------------------------------------------------------------------===//
11 #include "Shared/APITypes.h"
16 unsigned x
= 0, y
= 0, z
= 0;
19 struct __omp_kernel_t
{
22 size_t __shared_memory
;
27 static __omp_kernel_t __current_kernel
= {};
28 #pragma omp threadprivate(__current_kernel);
32 // TODO: There is little reason we need to keep these names or the way calls are
33 // issued. For now we do to avoid modifying Clang's CUDA codegen. Unclear when
34 // we actually need to push/pop configurations.
35 unsigned __llvmPushCallConfiguration(dim3 __grid_size
, dim3 __block_size
,
36 size_t __shared_memory
, void *__stream
) {
37 __omp_kernel_t
&__kernel
= __current_kernel
;
38 __kernel
.__grid_size
= __grid_size
;
39 __kernel
.__block_size
= __block_size
;
40 __kernel
.__shared_memory
= __shared_memory
;
41 __kernel
.__stream
= __stream
;
45 unsigned __llvmPopCallConfiguration(dim3
*__grid_size
, dim3
*__block_size
,
46 size_t *__shared_memory
, void *__stream
) {
47 __omp_kernel_t
&__kernel
= __current_kernel
;
48 *__grid_size
= __kernel
.__grid_size
;
49 *__block_size
= __kernel
.__block_size
;
50 *__shared_memory
= __kernel
.__shared_memory
;
51 *((void **)__stream
) = __kernel
.__stream
;
55 int __tgt_target_kernel(void *Loc
, int64_t DeviceId
, int32_t NumTeams
,
56 int32_t ThreadLimit
, const void *HostPtr
,
59 unsigned llvmLaunchKernel(const void *func
, dim3 gridDim
, dim3 blockDim
,
60 void *args
, size_t sharedMem
, void *stream
) {
61 KernelArgsTy Args
= {};
62 Args
.DynCGroupMem
= sharedMem
;
63 Args
.NumTeams
[0] = gridDim
.x
;
64 Args
.NumTeams
[1] = gridDim
.y
;
65 Args
.NumTeams
[2] = gridDim
.z
;
66 Args
.ThreadLimit
[0] = blockDim
.x
;
67 Args
.ThreadLimit
[1] = blockDim
.y
;
68 Args
.ThreadLimit
[2] = blockDim
.z
;
69 Args
.ArgPtrs
= reinterpret_cast<void **>(args
);
70 Args
.Flags
.IsCUDA
= true;
71 return __tgt_target_kernel(nullptr, 0, gridDim
.x
, blockDim
.x
, func
, &Args
);