1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GCN-FLUSH %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals,+fp-exceptions < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-EXCEPT,VI,GCN-FLUSH %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-DENORM,GCN-DENORM %s
4 ; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-FLUSH,GCN-FLUSH %s
6 ; GCN-LABEL: {{^}}test_no_fold_canonicalize_loaded_value_f32:
7 ; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
8 ; GFX9-DENORM: v_max_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
9 define amdgpu_kernel void @test_no_fold_canonicalize_loaded_value_f32(float addrspace(1)* %arg) {
10 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
11 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
12 %v = load float, float addrspace(1)* %gep, align 4
13 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
14 store float %canonicalized, float addrspace(1)* %gep, align 4
18 ; GCN-LABEL: {{^}}test_fold_canonicalize_fmul_value_f32:
19 ; GCN: v_mul_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
20 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
22 define amdgpu_kernel void @test_fold_canonicalize_fmul_value_f32(float addrspace(1)* %arg) {
23 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
24 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
25 %load = load float, float addrspace(1)* %gep, align 4
26 %v = fmul float %load, 15.0
27 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
28 store float %canonicalized, float addrspace(1)* %gep, align 4
32 ; GCN-LABEL: {{^}}test_fold_canonicalize_sub_value_f32:
33 ; GCN: v_sub_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
34 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
36 define amdgpu_kernel void @test_fold_canonicalize_sub_value_f32(float addrspace(1)* %arg) {
37 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
38 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
39 %load = load float, float addrspace(1)* %gep, align 4
40 %v = fsub float 15.0, %load
41 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
42 store float %canonicalized, float addrspace(1)* %gep, align 4
46 ; GCN-LABEL: {{^}}test_fold_canonicalize_add_value_f32:
47 ; GCN: v_add_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
48 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
50 define amdgpu_kernel void @test_fold_canonicalize_add_value_f32(float addrspace(1)* %arg) {
51 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
52 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
53 %load = load float, float addrspace(1)* %gep, align 4
54 %v = fadd float %load, 15.0
55 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
56 store float %canonicalized, float addrspace(1)* %gep, align 4
60 ; GCN-LABEL: {{^}}test_fold_canonicalize_sqrt_value_f32:
61 ; GCN: v_sqrt_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
62 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
64 define amdgpu_kernel void @test_fold_canonicalize_sqrt_value_f32(float addrspace(1)* %arg) {
65 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
66 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
67 %load = load float, float addrspace(1)* %gep, align 4
68 %v = call float @llvm.sqrt.f32(float %load)
69 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
70 store float %canonicalized, float addrspace(1)* %gep, align 4
74 ; GCN-LABEL: test_fold_canonicalize_fceil_value_f32:
75 ; GCN: v_ceil_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
76 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
78 define amdgpu_kernel void @test_fold_canonicalize_fceil_value_f32(float addrspace(1)* %arg) {
79 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
80 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
81 %load = load float, float addrspace(1)* %gep, align 4
82 %v = call float @llvm.ceil.f32(float %load)
83 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
84 store float %canonicalized, float addrspace(1)* %gep, align 4
88 ; GCN-LABEL: test_fold_canonicalize_floor_value_f32:
89 ; GCN: v_floor_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
90 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
92 define amdgpu_kernel void @test_fold_canonicalize_floor_value_f32(float addrspace(1)* %arg) {
93 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
94 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
95 %load = load float, float addrspace(1)* %gep, align 4
96 %v = call float @llvm.floor.f32(float %load)
97 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
98 store float %canonicalized, float addrspace(1)* %gep, align 4
102 ; GCN-LABEL: test_fold_canonicalize_fma_value_f32:
103 ; GCN: v_fma_f32 [[V:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
104 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
106 define amdgpu_kernel void @test_fold_canonicalize_fma_value_f32(float addrspace(1)* %arg) {
107 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
108 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
109 %load = load float, float addrspace(1)* %gep, align 4
110 %v = call float @llvm.fma.f32(float %load, float 15.0, float 15.0)
111 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
112 store float %canonicalized, float addrspace(1)* %gep, align 4
116 ; GCN-LABEL: test_fold_canonicalize_fmuladd_value_f32:
117 ; GCN-FLUSH: v_mac_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
118 ; GFX9-DENORM: v_fma_f32 [[V:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
119 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
121 define amdgpu_kernel void @test_fold_canonicalize_fmuladd_value_f32(float addrspace(1)* %arg) {
122 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
123 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
124 %load = load float, float addrspace(1)* %gep, align 4
125 %v = call float @llvm.fmuladd.f32(float %load, float 15.0, float 15.0)
126 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
127 store float %canonicalized, float addrspace(1)* %gep, align 4
131 ; GCN-LABEL: test_fold_canonicalize_canonicalize_value_f32:
132 ; GCN: {{flat|global}}_load_dword [[LOAD:v[0-9]+]],
133 ; GCN-FLUSH: v_mul_f32_e32 [[V:v[0-9]+]], 1.0, [[LOAD]]
134 ; GCN-DENORM: v_max_f32_e32 [[V:v[0-9]+]], [[LOAD]], [[LOAD]]
135 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
137 define amdgpu_kernel void @test_fold_canonicalize_canonicalize_value_f32(float addrspace(1)* %arg) {
138 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
139 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
140 %load = load float, float addrspace(1)* %gep, align 4
141 %v = call float @llvm.canonicalize.f32(float %load)
142 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
143 store float %canonicalized, float addrspace(1)* %gep, align 4
147 ; GCN-LABEL: test_fold_canonicalize_fpextend_value_f64_f32:
148 ; GCN: v_cvt_f64_f32_e32 [[V:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}
149 ; GCN: {{flat|global}}_store_dwordx2 v[{{[0-9:]+}}], [[V]]
151 define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f64_f32(float addrspace(1)* %arg, double addrspace(1)* %out) {
152 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
153 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
154 %load = load float, float addrspace(1)* %gep, align 4
155 %v = fpext float %load to double
156 %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
157 %gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id
158 store double %canonicalized, double addrspace(1)* %gep2, align 8
162 ; GCN-LABEL: test_fold_canonicalize_fpextend_value_f32_f16:
163 ; GCN: v_cvt_f32_f16_e32 [[V:v[0-9]+]], v{{[0-9]+}}
164 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
166 define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16(half addrspace(1)* %arg, float addrspace(1)* %out) {
167 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
168 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
169 %load = load half, half addrspace(1)* %gep, align 2
170 %v = fpext half %load to float
171 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
172 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
173 store float %canonicalized, float addrspace(1)* %gep2, align 4
177 ; GCN-LABEL: test_fold_canonicalize_fpround_value_f32_f64:
178 ; GCN: v_cvt_f32_f64_e32 [[V:v[0-9]+]], v[{{[0-9:]+}}]
179 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
181 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f32_f64(double addrspace(1)* %arg, float addrspace(1)* %out) {
182 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
183 %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
184 %load = load double, double addrspace(1)* %gep, align 8
185 %v = fptrunc double %load to float
186 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
187 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
188 store float %canonicalized, float addrspace(1)* %gep2, align 4
192 ; GCN-LABEL: test_fold_canonicalize_fpround_value_f16_f32:
193 ; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
194 ; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
196 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32(float addrspace(1)* %arg, half addrspace(1)* %out) {
197 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
198 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
199 %load = load float, float addrspace(1)* %gep, align 4
200 %v = fptrunc float %load to half
201 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
202 %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
203 store half %canonicalized, half addrspace(1)* %gep2, align 2
207 ; GCN-LABEL: test_fold_canonicalize_fpround_value_v2f16_v2f32:
208 ; GCN-DAG: v_cvt_f16_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}}
209 ; VI-DAG: v_cvt_f16_f32_sdwa [[V1:v[0-9]+]], v{{[0-9]+}}
210 ; VI: v_or_b32_e32 [[V:v[0-9]+]], [[V0]], [[V1]]
211 ; GFX9: v_cvt_f16_f32_e32 [[V1:v[0-9]+]], v{{[0-9]+}}
212 ; GFX9: v_and_b32_e32 [[V0_16:v[0-9]+]], 0xffff, [[V0]]
213 ; GFX9: v_lshl_or_b32 [[V:v[0-9]+]], [[V1]], 16, [[V0_16]]
214 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
216 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_v2f16_v2f32(<2 x float> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) {
217 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
218 %gep = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %arg, i32 %id
219 %load = load <2 x float>, <2 x float> addrspace(1)* %gep, align 8
220 %v = fptrunc <2 x float> %load to <2 x half>
221 %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v)
222 %gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i32 %id
223 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %gep2, align 4
227 ; GCN-LABEL: test_no_fold_canonicalize_fneg_value_f32:
228 ; GCN-FLUSH: v_mul_f32_e64 v{{[0-9]+}}, 1.0, -v{{[0-9]+}}
229 ; GCN-DENORM: v_max_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
230 define amdgpu_kernel void @test_no_fold_canonicalize_fneg_value_f32(float addrspace(1)* %arg) {
231 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
232 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
233 %load = load float, float addrspace(1)* %gep, align 4
234 %v = fsub float -0.0, %load
235 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
236 store float %canonicalized, float addrspace(1)* %gep, align 4
240 ; GCN-LABEL: test_fold_canonicalize_fneg_value_f32:
241 ; GCN: v_xor_b32_e32 [[V:v[0-9]+]], 0x80000000, v{{[0-9]+}}
242 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
244 define amdgpu_kernel void @test_fold_canonicalize_fneg_value_f32(float addrspace(1)* %arg) {
245 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
246 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
247 %load = load float, float addrspace(1)* %gep, align 4
248 %v0 = fadd float %load, 0.0
249 %v = fsub float -0.0, %v0
250 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
251 store float %canonicalized, float addrspace(1)* %gep, align 4
255 ; GCN-LABEL: test_no_fold_canonicalize_fabs_value_f32:
256 ; GCN-FLUSH: v_mul_f32_e64 v{{[0-9]+}}, 1.0, |v{{[0-9]+}}|
257 ; GCN-DENORM: v_max_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |v{{[0-9]+}}|
258 define amdgpu_kernel void @test_no_fold_canonicalize_fabs_value_f32(float addrspace(1)* %arg) {
259 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
260 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
261 %load = load float, float addrspace(1)* %gep, align 4
262 %v = tail call float @llvm.fabs.f32(float %load)
263 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
264 store float %canonicalized, float addrspace(1)* %gep, align 4
268 ; GCN-LABEL: test_fold_canonicalize_fabs_value_f32:
269 ; GCN: v_and_b32_e32 [[V:v[0-9]+]], 0x7fffffff, v{{[0-9]+}}
270 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
272 define amdgpu_kernel void @test_fold_canonicalize_fabs_value_f32(float addrspace(1)* %arg) {
273 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
274 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
275 %load = load float, float addrspace(1)* %gep, align 4
276 %v0 = fadd float %load, 0.0
277 %v = tail call float @llvm.fabs.f32(float %v0)
278 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
279 store float %canonicalized, float addrspace(1)* %gep, align 4
283 ; GCN-LABEL: test_fold_canonicalize_sin_value_f32:
284 ; GCN: v_sin_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
285 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
287 define amdgpu_kernel void @test_fold_canonicalize_sin_value_f32(float addrspace(1)* %arg) {
288 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
289 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
290 %load = load float, float addrspace(1)* %gep, align 4
291 %v = tail call float @llvm.sin.f32(float %load)
292 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
293 store float %canonicalized, float addrspace(1)* %gep, align 4
297 ; GCN-LABEL: test_fold_canonicalize_cos_value_f32:
298 ; GCN: v_cos_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
299 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
301 define amdgpu_kernel void @test_fold_canonicalize_cos_value_f32(float addrspace(1)* %arg) {
302 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
303 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
304 %load = load float, float addrspace(1)* %gep, align 4
305 %v = tail call float @llvm.cos.f32(float %load)
306 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
307 store float %canonicalized, float addrspace(1)* %gep, align 4
311 ; GCN-LABEL: test_fold_canonicalize_sin_value_f16:
312 ; GCN: v_sin_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}}
313 ; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], [[V0]]
314 ; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
316 define amdgpu_kernel void @test_fold_canonicalize_sin_value_f16(half addrspace(1)* %arg) {
317 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
318 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
319 %load = load half, half addrspace(1)* %gep, align 2
320 %v = tail call half @llvm.sin.f16(half %load)
321 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
322 store half %canonicalized, half addrspace(1)* %gep, align 2
326 ; GCN-LABEL: test_fold_canonicalize_cos_value_f16:
327 ; GCN: v_cos_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}}
328 ; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], [[V0]]
329 ; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
331 define amdgpu_kernel void @test_fold_canonicalize_cos_value_f16(half addrspace(1)* %arg) {
332 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
333 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
334 %load = load half, half addrspace(1)* %gep, align 2
335 %v = tail call half @llvm.cos.f16(half %load)
336 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
337 store half %canonicalized, half addrspace(1)* %gep, align 2
341 ; GCN-LABEL: test_fold_canonicalize_qNaN_value_f32:
342 ; GCN: v_mov_b32_e32 [[V:v[0-9]+]], 0x7fc00000
343 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
345 define amdgpu_kernel void @test_fold_canonicalize_qNaN_value_f32(float addrspace(1)* %arg) {
346 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
347 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
348 %canonicalized = tail call float @llvm.canonicalize.f32(float 0x7FF8000000000000)
349 store float %canonicalized, float addrspace(1)* %gep, align 4
353 ; GCN-LABEL: test_fold_canonicalize_minnum_value_from_load_f32:
354 ; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
355 ; GFX9: v_min_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
356 ; GFX9: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
357 define amdgpu_kernel void @test_fold_canonicalize_minnum_value_from_load_f32(float addrspace(1)* %arg) {
358 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
359 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
360 %load = load float, float addrspace(1)* %gep, align 4
361 %v = tail call float @llvm.minnum.f32(float %load, float 0.0)
362 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
363 store float %canonicalized, float addrspace(1)* %gep, align 4
367 ; GCN-LABEL: test_fold_canonicalize_minnum_value_f32:
368 ; GCN: v_min_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
369 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
371 define amdgpu_kernel void @test_fold_canonicalize_minnum_value_f32(float addrspace(1)* %arg) {
372 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
373 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
374 %load = load float, float addrspace(1)* %gep, align 4
375 %v0 = fadd float %load, 0.0
376 %v = tail call float @llvm.minnum.f32(float %v0, float 0.0)
377 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
378 store float %canonicalized, float addrspace(1)* %gep, align 4
382 ; GCN-LABEL: test_fold_canonicalize_sNaN_value_f32:
383 ; GCN: v_min_f32_e32 [[V0:v[0-9]+]], 0x7f800001, v{{[0-9]+}}
384 ; GCN-FLUSH: v_mul_f32_e32 [[RESULT:v[0-9]+]], 1.0, [[V0]]
385 ; GCN-DENORM: v_max_f32_e32 [[RESULT:v[0-9]+]], [[V0]], [[V0]]
386 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[RESULT]]
387 define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(float addrspace(1)* %arg) {
388 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
389 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
390 %load = load float, float addrspace(1)* %gep, align 4
391 %v = tail call float @llvm.minnum.f32(float %load, float bitcast (i32 2139095041 to float))
392 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
393 store float %canonicalized, float addrspace(1)* %gep, align 4
397 ; GCN-LABEL: test_fold_canonicalize_denorm_value_f32:
398 ; GFX9: v_min_f32_e32 [[RESULT:v[0-9]+]], 0x7fffff, v{{[0-9]+}}
399 ; VI: v_min_f32_e32 [[V0:v[0-9]+]], 0x7fffff, v{{[0-9]+}}
400 ; VI: v_mul_f32_e32 [[RESULT:v[0-9]+]], 1.0, [[V0]]
401 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[RESULT]]
403 define amdgpu_kernel void @test_fold_canonicalize_denorm_value_f32(float addrspace(1)* %arg) {
404 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
405 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
406 %load = load float, float addrspace(1)* %gep, align 4
407 %v = tail call float @llvm.minnum.f32(float %load, float bitcast (i32 8388607 to float))
408 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
409 store float %canonicalized, float addrspace(1)* %gep, align 4
413 ; GCN-LABEL: test_fold_canonicalize_maxnum_value_from_load_f32:
414 ; GFX9: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, v{{[0-9]+}}
415 ; VI: v_max_f32_e32 [[V0:v[0-9]+]], 0, v{{[0-9]+}}
416 ; VI: v_mul_f32_e32 [[RESULT:v[0-9]+]], 1.0, [[V0]]
417 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[RESULT]]
419 define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_from_load_f32(float addrspace(1)* %arg) {
420 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
421 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
422 %load = load float, float addrspace(1)* %gep, align 4
423 %v = tail call float @llvm.maxnum.f32(float %load, float 0.0)
424 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
425 store float %canonicalized, float addrspace(1)* %gep, align 4
429 ; GCN-LABEL: test_fold_canonicalize_maxnum_value_f32:
430 ; GCN: v_max_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
431 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
433 define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f32(float addrspace(1)* %arg) {
434 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
435 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
436 %load = load float, float addrspace(1)* %gep, align 4
437 %v0 = fadd float %load, 0.0
438 %v = tail call float @llvm.maxnum.f32(float %v0, float 0.0)
439 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
440 store float %canonicalized, float addrspace(1)* %gep, align 4
444 ; GCN-LABEL: test_fold_canonicalize_maxnum_value_f64:
445 ; GCN: v_max_f64 [[V:v\[[0-9]+:[0-9]+\]]], v[{{[0-9:]+}}], 0
446 ; GCN: {{flat|global}}_store_dwordx2 v[{{[0-9:]+}}], [[V]]
448 define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f64(double addrspace(1)* %arg) {
449 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
450 %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
451 %load = load double, double addrspace(1)* %gep, align 8
452 %v0 = fadd double %load, 0.0
453 %v = tail call double @llvm.maxnum.f64(double %v0, double 0.0)
454 %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
455 store double %canonicalized, double addrspace(1)* %gep, align 8
459 ; GCN-LABEL: test_no_fold_canonicalize_fmul_value_f32_no_ieee:
460 ; GCN-EXCEPT: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
461 define amdgpu_ps float @test_no_fold_canonicalize_fmul_value_f32_no_ieee(float %arg) {
463 %v = fmul float %arg, 15.0
464 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
465 ret float %canonicalized
468 ; GCN-LABEL: test_fold_canonicalize_fmul_nnan_value_f32_no_ieee:
469 ; GCN: v_mul_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
472 define amdgpu_ps float @test_fold_canonicalize_fmul_nnan_value_f32_no_ieee(float %arg) {
474 %v = fmul nnan float %arg, 15.0
475 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
476 ret float %canonicalized
479 ; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f32
480 ; GFX9-DENORM: global_load_dword [[V:v[0-9]+]],
481 ; GFX9-DENORM: global_store_dword v[{{[0-9:]+}}], [[V]]
482 ; GFX9-DENORM-NOT: 1.0
483 ; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
484 define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f32(float addrspace(1)* %arg, float addrspace(1)* %out) #1 {
485 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
486 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
487 %v = load float, float addrspace(1)* %gep, align 4
488 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
489 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
490 store float %canonicalized, float addrspace(1)* %gep2, align 4
494 ; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f64
495 ; GCN: {{flat|global}}_load_dwordx2 [[V:v\[[0-9:]+\]]],
496 ; GCN: {{flat|global}}_store_dwordx2 v[{{[0-9:]+}}], [[V]]
498 define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f64(double addrspace(1)* %arg, double addrspace(1)* %out) #1 {
499 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
500 %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
501 %v = load double, double addrspace(1)* %gep, align 8
502 %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
503 %gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id
504 store double %canonicalized, double addrspace(1)* %gep2, align 8
508 ; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f16
509 ; GCN: {{flat|global}}_load_ushort [[V:v[0-9]+]],
510 ; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
512 define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f16(half addrspace(1)* %arg, half addrspace(1)* %out) #1 {
513 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
514 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
515 %v = load half, half addrspace(1)* %gep, align 2
516 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
517 %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
518 store half %canonicalized, half addrspace(1)* %gep2, align 2
522 ; Avoid failing the test on FreeBSD11.0 which will match the GCN-NOT: 1.0
523 ; in the .amd_amdgpu_isa "amdgcn-unknown-freebsd11.0--gfx802" directive
524 ; CHECK: .amd_amdgpu_isa
526 declare float @llvm.canonicalize.f32(float) #0
527 declare double @llvm.canonicalize.f64(double) #0
528 declare half @llvm.canonicalize.f16(half) #0
529 declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
530 declare i32 @llvm.amdgcn.workitem.id.x() #0
531 declare float @llvm.sqrt.f32(float) #0
532 declare float @llvm.ceil.f32(float) #0
533 declare float @llvm.floor.f32(float) #0
534 declare float @llvm.fma.f32(float, float, float) #0
535 declare float @llvm.fmuladd.f32(float, float, float) #0
536 declare float @llvm.fabs.f32(float) #0
537 declare float @llvm.sin.f32(float) #0
538 declare float @llvm.cos.f32(float) #0
539 declare half @llvm.sin.f16(half) #0
540 declare half @llvm.cos.f16(half) #0
541 declare float @llvm.minnum.f32(float, float) #0
542 declare float @llvm.maxnum.f32(float, float) #0
543 declare double @llvm.maxnum.f64(double, double) #0
545 attributes #0 = { nounwind readnone }
546 attributes #1 = { "no-nans-fp-math"="true" }