1 //===---------------- Implementation of GPU utils ---------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H
10 #define LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H
12 #include "src/__support/macros/config.h"
13 #include "src/__support/macros/properties/architectures.h"
15 #if defined(LIBC_TARGET_ARCH_IS_AMDGPU)
16 #include "amdgpu/utils.h"
17 #elif defined(LIBC_TARGET_ARCH_IS_NVPTX)
18 #include "nvptx/utils.h"
20 #include "generic/utils.h"
23 namespace LIBC_NAMESPACE_DECL
{
25 /// Get the first active thread inside the lane.
26 LIBC_INLINE
uint64_t get_first_lane_id(uint64_t lane_mask
) {
27 return __builtin_ffsll(lane_mask
) - 1;
30 /// Conditional that is only true for a single thread in a lane.
31 LIBC_INLINE
bool is_first_lane(uint64_t lane_mask
) {
32 return gpu::get_lane_id() == get_first_lane_id(lane_mask
);
35 /// Gets the sum of all lanes inside the warp or wavefront.
36 LIBC_INLINE
uint32_t reduce(uint64_t lane_mask
, uint32_t x
) {
37 for (uint32_t step
= gpu::get_lane_size() / 2; step
> 0; step
/= 2) {
38 uint32_t index
= step
+ gpu::get_lane_id();
39 x
+= gpu::shuffle(lane_mask
, index
, x
);
41 return gpu::broadcast_value(lane_mask
, x
);
44 /// Gets the accumulator scan of the threads in the warp or wavefront.
45 LIBC_INLINE
uint32_t scan(uint64_t lane_mask
, uint32_t x
) {
46 for (uint32_t step
= 1; step
< gpu::get_lane_size(); step
*= 2) {
47 uint32_t index
= gpu::get_lane_id() - step
;
48 uint32_t bitmask
= gpu::get_lane_id() >= step
;
49 x
+= -bitmask
& gpu::shuffle(lane_mask
, index
, x
);
55 } // namespace LIBC_NAMESPACE_DECL
57 #endif // LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H