[sanitizer] Improve FreeBSD ASLR detection
[llvm-project.git] / openmp / libomptarget / deviceRTLs / nvptx / src / target_impl.h
blobab471e3116327a712fcd848acb7ea6fd339abb1f
1 //===------------ target_impl.h - NVPTX OpenMP GPU options ------- CUDA -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Definitions of target specific functions
11 //===----------------------------------------------------------------------===//
12 #ifndef _TARGET_IMPL_H_
13 #define _TARGET_IMPL_H_
15 #include "nvptx_interface.h"
17 #include <stddef.h>
18 #include <stdint.h>
20 // subset of inttypes.h
21 #define PRId64 "ld"
22 #define PRIu64 "lu"
24 typedef uint32_t __kmpc_impl_lanemask_t;
26 #define INLINE inline __attribute__((always_inline))
27 #define NOINLINE __attribute__((noinline))
28 #define ALIGN(N) __attribute__((aligned(N)))
29 #define PLUGIN_ACCESSIBLE /* no annotation needed for cuda plugin */
31 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
33 INLINE constexpr const llvm::omp::GV &getGridValue() {
34 return llvm::omp::NVPTXGridValues;
37 ////////////////////////////////////////////////////////////////////////////////
38 // Kernel options
39 ////////////////////////////////////////////////////////////////////////////////
41 ////////////////////////////////////////////////////////////////////////////////
42 // The following def must match the absolute limit hardwired in the host RTL
43 // max number of threads per team
44 enum { MAX_THREADS_PER_TEAM = getGridValue().GV_Max_WG_Size };
45 enum { WARPSIZE = getGridValue().GV_Warp_Size };
47 // Maximum number of omp state objects per SM allocated statically in global
48 // memory.
49 #if __CUDA_ARCH__ >= 600
50 #define OMP_STATE_COUNT 32
51 #else
52 #define OMP_STATE_COUNT 16
53 #endif
55 #if !defined(MAX_SM)
56 #if __CUDA_ARCH__ >= 900
57 #error unsupported compute capability, define MAX_SM via LIBOMPTARGET_NVPTX_MAX_SM cmake option
58 #elif __CUDA_ARCH__ >= 800
59 // GA100 design has a maxinum of 128 SMs but A100 product only has 108 SMs
60 // GA102 design has a maxinum of 84 SMs
61 #define MAX_SM 108
62 #elif __CUDA_ARCH__ >= 700
63 #define MAX_SM 84
64 #elif __CUDA_ARCH__ >= 600
65 #define MAX_SM 56
66 #else
67 #define MAX_SM 16
68 #endif
69 #endif
71 #define OMP_ACTIVE_PARALLEL_LEVEL 128
73 // Data sharing related quantities, need to match what is used in the compiler.
74 enum DATA_SHARING_SIZES {
75 // The size reserved for data in a shared memory slot.
76 DS_Slot_Size = getGridValue().GV_Slot_Size,
77 // The slot size that should be reserved for a working warp.
78 DS_Worker_Warp_Slot_Size = getGridValue().warpSlotSize(),
79 // The maximum number of warps in use
80 DS_Max_Warp_Number = getGridValue().maxWarpNumber(),
83 enum : __kmpc_impl_lanemask_t {
84 __kmpc_impl_all_lanes = ~(__kmpc_impl_lanemask_t)0
87 #define printf(...)
89 #endif