1 // Verify the behavior of the denormal-fp-mode attributes in the way that
2 // rocm-device-libs should be built with. The bitcode should be compiled with
3 // denormal-fp-math-f32=dynamic, and should be replaced with the denormal mode
6 // Build the fake device library in the way rocm-device-libs should be built.
8 // RUN: %clang_cc1 -x cl -triple amdgcn-amd-amdhsa -fdenormal-fp-math-f32=dynamic \
9 // RUN: -mcode-object-version=none -emit-llvm-bc \
10 // RUN: %S/Inputs/ocml-sample.cl -o %t.dynamic.f32.bc
12 // RUN: %clang_cc1 -x cl -triple amdgcn-amd-amdhsa -fdenormal-fp-math=dynamic \
13 // RUN: -mcode-object-version=none -emit-llvm-bc \
14 // RUN: %S/Inputs/ocml-sample.cl -o %t.dynamic.full.bc
18 // Check the default behavior with no denormal-fp-math arguments.
19 // RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 -fcuda-is-device \
20 // RUN: -mlink-builtin-bitcode %t.dynamic.f32.bc \
21 // RUN: -emit-llvm %s -o - | FileCheck -implicit-check-not=denormal-fp-math %s --check-prefixes=CHECK,INTERNALIZE
24 // Check an explicit full ieee request
25 // RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 -fcuda-is-device \
26 // RUN: -fdenormal-fp-math=ieee \
27 // RUN: -mlink-builtin-bitcode %t.dynamic.f32.bc \
28 // RUN: -emit-llvm %s -o - | FileCheck -implicit-check-not=denormal-fp-math %s --check-prefixes=CHECK,INTERNALIZE
31 // Check explicit f32-only flushing request
32 // RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \
33 // RUN: -fcuda-is-device -fdenormal-fp-math-f32=preserve-sign \
34 // RUN: -mlink-builtin-bitcode %t.dynamic.f32.bc -emit-llvm %s -o - \
35 // RUN: | FileCheck -implicit-check-not=denormal-fp-math --enable-var-scope %s --check-prefixes=CHECK,INTERNALIZE,IEEEF64-PSZF32
38 // Check explicit flush all request. Only the f32 component of the library is
39 // dynamic, so the linked functions should use IEEE as the base mode and the new
40 // functions preserve-sign.
41 // RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \
42 // RUN: -fcuda-is-device -fdenormal-fp-math=preserve-sign \
43 // RUN: -mlink-builtin-bitcode %t.dynamic.f32.bc -emit-llvm %s -o - \
44 // RUN: | FileCheck -implicit-check-not=denormal-fp-math --enable-var-scope %s --check-prefixes=CHECK,INTERNALIZE,PSZ
47 // Check explicit f32-only, ieee-other flushing request
48 // RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \
49 // RUN: -fcuda-is-device -fdenormal-fp-math=ieee -fdenormal-fp-math-f32=preserve-sign \
50 // RUN: -mlink-builtin-bitcode %t.dynamic.f32.bc -emit-llvm %s -o - \
51 // RUN: | FileCheck -implicit-check-not=denormal-fp-math --enable-var-scope %s --check-prefixes=CHECK,INTERNALIZE,IEEEF64-PSZF32
54 // Check inverse of normal usage. Requesting IEEE f32, with flushed f16/f64
55 // RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \
56 // RUN: -fcuda-is-device -fdenormal-fp-math=preserve-sign -fdenormal-fp-math-f32=ieee \
57 // RUN: -mlink-builtin-bitcode %t.dynamic.f32.bc -emit-llvm %s -o - \
58 // RUN: | FileCheck -implicit-check-not=denormal-fp-math --enable-var-scope %s --check-prefixes=CHECK,INTERNALIZE,IEEEF32-PSZF64-DYNF32
61 // Check backwards from the normal usage where both library components can be
63 // RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \
64 // RUN: -fcuda-is-device -fdenormal-fp-math=preserve-sign -fdenormal-fp-math-f32=ieee \
65 // RUN: -mlink-builtin-bitcode %t.dynamic.full.bc -emit-llvm %s -o - \
66 // RUN: | FileCheck -implicit-check-not=denormal-fp-math --enable-var-scope %s --check-prefixes=CHECK,INTERNALIZE,IEEEF32-PSZF64-DYNFULL
70 // Check the case where no internalization is performed
71 // RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \
72 // RUN: -fcuda-is-device -fdenormal-fp-math=preserve-sign -fdenormal-fp-math-f32=ieee \
73 // RUN: -mlink-bitcode-file %t.dynamic.full.bc -emit-llvm %s -o - \
74 // RUN: | FileCheck -implicit-check-not=denormal-fp-math --enable-var-scope %s --check-prefixes=CHECK,NOINTERNALIZE,NOINTERNALIZE-IEEEF32-PSZF64-DYNFULL
78 #define __device__ __attribute__((device))
79 #define __global__ __attribute__((global))
81 typedef _Float16 half;
84 __device__ half do_f16_stuff(half a, half b, half c);
85 __device__ float do_f32_stuff(float a, float b, float c);
87 // Currently all library functions are internalized. Check a weak function in
88 // case we ever choose to not internalize these. In that case, the safest thing
89 // to do would likely be to preserve the dynamic denormal-fp-math.
90 __attribute__((weak)) __device__ float weak_do_f32_stuff(float a, float b, float c);
91 __device__ double do_f64_stuff(double a, double b, double c);
94 // CHECK: kernel_f16({{.*}}) #[[$KERNELATTR:[0-9]+]]
95 __global__ void kernel_f16(float* out, float* a, float* b, float* c) {
97 out[id] = do_f16_stuff(a[id], b[id], c[id]);
100 // CHECK: kernel_f32({{.*}}) #[[$KERNELATTR]]
101 __global__ void kernel_f32(float* out, float* a, float* b, float* c) {
103 out[id] = do_f32_stuff(a[id], b[id], c[id]);
104 out[id] += weak_do_f32_stuff(a[id], b[id], c[id]);
107 // CHECK: kernel_f64({{.*}}) #[[$KERNELATTR]]
108 __global__ void kernel_f64(double* out, double* a, double* b, double* c) {
110 out[id] = do_f64_stuff(a[id], b[id], c[id]);
114 // INTERNALIZE: define internal {{(noundef )?}}half @do_f16_stuff({{.*}}) #[[$FUNCATTR:[0-9]+]]
115 // INTERNALIZE: define internal {{(noundef )?}}float @do_f32_stuff({{.*}}) #[[$FUNCATTR]]
116 // INTERNALIZE: define internal {{(noundef )?}}double @do_f64_stuff({{.*}}) #[[$FUNCATTR]]
117 // INTERNALIZE: define internal {{(noundef )?}}float @weak_do_f32_stuff({{.*}}) #[[$WEAK_FUNCATTR:[0-9]+]]
120 // NOINTERNALIZE: define dso_local {{(noundef )?}}half @do_f16_stuff({{.*}}) #[[$FUNCATTR:[0-9]+]]
121 // NOINTERNALIZE: define dso_local {{(noundef )?}}float @do_f32_stuff({{.*}}) #[[$FUNCATTR]]
122 // NOINTERNALIZE: define dso_local {{(noundef )?}}double @do_f64_stuff({{.*}}) #[[$FUNCATTR]]
123 // NOINTERNALIZE: define weak {{(noundef )?}}float @weak_do_f32_stuff({{.*}}) #[[$WEAK_FUNCATTR:[0-9]+]]
127 // We should not be littering call sites with the attribute
128 // Everything should use the default ieee with no explicit attribute
130 // FIXME: Should check-not "denormal-fp-math" within the denormal-fp-math-f32
133 // Default mode relies on the implicit check-not for the denormal-fp-math.
135 // PSZ: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign"
136 // PSZ-SAME: "target-cpu"="gfx803"
137 // PSZ: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
138 // PSZ-SAME: "target-cpu"="gfx803"
139 // PSZ: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
140 // PSZ-SAME: "target-cpu"="gfx803"
142 // FIXME: Should check-not "denormal-fp-math" within the line
143 // IEEEF64-PSZF32: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
144 // IEEEF64-PSZF32-SAME: "target-cpu"="gfx803"
145 // IEEEF64-PSZF32: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
146 // IEEEF64-PSZF32-SAME: "target-cpu"="gfx803"
147 // IEEEF64-PSZF32: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
148 // IEEEF64-PSZF32-SAME: "target-cpu"="gfx803"
150 // IEEEF32-PSZF64-DYNF32: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" {{.*}} "target-cpu"="gfx803" {{.*}} }
151 // implicit check-not
152 // implicit check-not
155 // IEEEF32-PSZF64-DYNFULL: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee"
156 // IEEEF32-PSZF64-DYNFULL-SAME: "target-cpu"="gfx803"
157 // IEEEF32-PSZF64-DYNFULL: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee"
158 // IEEEF32-PSZF64-DYNFULL-SAME: "target-cpu"="gfx803"
159 // IEEEF32-PSZF64-DYNFULL: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee"
160 // IEEEF32-PSZF64-DYNFULL-SAME: "target-cpu"="gfx803"
162 // -mlink-bitcode-file doesn't internalize or propagate attributes.
163 // NOINTERNALIZE-IEEEF32-PSZF64-DYNFULL: #[[$KERNELATTR]] = { {{.*}} "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" {{.*}} "target-cpu"="gfx803" {{.*}} }
164 // NOINTERNALIZE-IEEEF32-PSZF64-DYNFULL: #[[$FUNCATTR]] = { {{.*}} "denormal-fp-math"="dynamic,dynamic" {{.*}} }
165 // NOINTERNALIZE-IEEEF32-PSZF64-DYNFULL: #[[$WEAK_FUNCATTR]] = { {{.*}} "denormal-fp-math"="dynamic,dynamic" {{.*}} }