1 ; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs -denormal-fp-math-f32=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,VI-FLUSH,GCN-FLUSH %s
2 ; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs -denormal-fp-math-f32=ieee < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,VI-DENORM,GCN-DENORM %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -denormal-fp-math-f32=ieee < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-DENORM,GCN-DENORM %s
4 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -denormal-fp-math-f32=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-FLUSH,GCN-FLUSH %s
6 ; GCN-LABEL: {{^}}test_no_fold_canonicalize_loaded_value_f32:
7 ; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
8 ; GFX9: v_max_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
9 define amdgpu_kernel void @test_no_fold_canonicalize_loaded_value_f32(ptr addrspace(1) %arg) {
10 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
11 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
12 %v = load float, ptr addrspace(1) %gep, align 4
13 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
14 store float %canonicalized, ptr addrspace(1) %gep, align 4
18 ; GCN-LABEL: {{^}}test_fold_canonicalize_fmul_value_f32:
19 ; GCN: v_mul_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
20 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
22 define amdgpu_kernel void @test_fold_canonicalize_fmul_value_f32(ptr addrspace(1) %arg) {
23 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
24 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
25 %load = load float, ptr addrspace(1) %gep, align 4
26 %v = fmul float %load, 15.0
27 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
28 store float %canonicalized, ptr addrspace(1) %gep, align 4
32 ; GCN-LABEL: {{^}}test_fold_canonicalize_fmul_legacy_value_f32:
33 ; GCN: v_mul_legacy_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
36 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
37 define amdgpu_kernel void @test_fold_canonicalize_fmul_legacy_value_f32(ptr addrspace(1) %arg) {
38 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
39 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
40 %load = load float, ptr addrspace(1) %gep, align 4
41 %v = call float @llvm.amdgcn.fmul.legacy(float %load, float 15.0)
42 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
43 store float %canonicalized, ptr addrspace(1) %gep, align 4
47 ; GCN-LABEL: {{^}}test_fold_canonicalize_sub_value_f32:
48 ; GCN: v_sub_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
51 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
52 define amdgpu_kernel void @test_fold_canonicalize_sub_value_f32(ptr addrspace(1) %arg) {
53 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
54 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
55 %load = load float, ptr addrspace(1) %gep, align 4
56 %v = fsub float 15.0, %load
57 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
58 store float %canonicalized, ptr addrspace(1) %gep, align 4
62 ; GCN-LABEL: {{^}}test_fold_canonicalize_add_value_f32:
63 ; GCN: v_add_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
66 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
67 define amdgpu_kernel void @test_fold_canonicalize_add_value_f32(ptr addrspace(1) %arg) {
68 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
69 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
70 %load = load float, ptr addrspace(1) %gep, align 4
71 %v = fadd float %load, 15.0
72 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
73 store float %canonicalized, ptr addrspace(1) %gep, align 4
77 ; GCN-LABEL: {{^}}test_fold_canonicalize_sqrt_value_f32:
78 ; GCN: v_sqrt_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
81 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
82 define amdgpu_kernel void @test_fold_canonicalize_sqrt_value_f32(ptr addrspace(1) %arg) {
83 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
84 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
85 %load = load float, ptr addrspace(1) %gep, align 4
86 %v = call afn float @llvm.sqrt.f32(float %load)
87 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
88 store float %canonicalized, ptr addrspace(1) %gep, align 4
92 ; GCN-LABEL: test_fold_canonicalize_fceil_value_f32:
93 ; GCN: v_ceil_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
96 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
97 define amdgpu_kernel void @test_fold_canonicalize_fceil_value_f32(ptr addrspace(1) %arg) {
98 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
99 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
100 %load = load float, ptr addrspace(1) %gep, align 4
101 %v = call float @llvm.ceil.f32(float %load)
102 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
103 store float %canonicalized, ptr addrspace(1) %gep, align 4
107 ; GCN-LABEL: test_fold_canonicalize_floor_value_f32:
108 ; GCN: v_floor_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
111 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
112 define amdgpu_kernel void @test_fold_canonicalize_floor_value_f32(ptr addrspace(1) %arg) {
113 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
114 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
115 %load = load float, ptr addrspace(1) %gep, align 4
116 %v = call float @llvm.floor.f32(float %load)
117 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
118 store float %canonicalized, ptr addrspace(1) %gep, align 4
122 ; GCN-LABEL: test_fold_canonicalize_fma_value_f32:
123 ; GCN: s_mov_b32 [[SREG:s[0-9]+]], 0x41700000
124 ; GCN: v_fma_f32 [[V:v[0-9]+]], v{{[0-9]+}}, [[SREG]], [[SREG]]
127 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
128 define amdgpu_kernel void @test_fold_canonicalize_fma_value_f32(ptr addrspace(1) %arg) {
129 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
130 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
131 %load = load float, ptr addrspace(1) %gep, align 4
132 %v = call float @llvm.fma.f32(float %load, float 15.0, float 15.0)
133 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
134 store float %canonicalized, ptr addrspace(1) %gep, align 4
138 ; GCN-LABEL: test_fold_canonicalize_fmad_ftz_value_f32:
139 ; GCN: v_mac_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+$}}
142 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
143 define amdgpu_kernel void @test_fold_canonicalize_fmad_ftz_value_f32(ptr addrspace(1) %arg) {
144 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
145 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
146 %load = load float, ptr addrspace(1) %gep, align 4
147 %v = call float @llvm.amdgcn.fmad.ftz.f32(float %load, float 15.0, float 15.0)
148 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
149 store float %canonicalized, ptr addrspace(1) %gep, align 4
153 ; GCN-LABEL: test_fold_canonicalize_fmuladd_value_f32:
154 ; GCN-FLUSH: v_mac_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
155 ; GCN-DENORM: s_mov_b32 [[SREG:s[0-9]+]], 0x41700000
156 ; GCN-DENORM: v_fma_f32 [[V:v[0-9]+]], v{{[0-9]+}}, [[SREG]], [[SREG]]
159 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
161 define amdgpu_kernel void @test_fold_canonicalize_fmuladd_value_f32(ptr addrspace(1) %arg) {
162 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
163 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
164 %load = load float, ptr addrspace(1) %gep, align 4
165 %v = call float @llvm.fmuladd.f32(float %load, float 15.0, float 15.0)
166 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
167 store float %canonicalized, ptr addrspace(1) %gep, align 4
171 ; GCN-LABEL: test_fold_canonicalize_canonicalize_value_f32:
172 ; GCN: {{flat|global}}_load_dword [[LOAD:v[0-9]+]],
173 ; VI: v_mul_f32_e32 [[V:v[0-9]+]], 1.0, [[LOAD]]
174 ; GFX9: v_max_f32_e32 [[V:v[0-9]+]], [[LOAD]], [[LOAD]]
178 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
179 define amdgpu_kernel void @test_fold_canonicalize_canonicalize_value_f32(ptr addrspace(1) %arg) {
180 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
181 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
182 %load = load float, ptr addrspace(1) %gep, align 4
183 %v = call float @llvm.canonicalize.f32(float %load)
184 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
185 store float %canonicalized, ptr addrspace(1) %gep, align 4
189 ; GCN-LABEL: test_fold_canonicalize_fpextend_value_f64_f32:
190 ; GCN: v_cvt_f64_f32_e32 [[V:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}
193 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, [[V]]
194 define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f64_f32(ptr addrspace(1) %arg, ptr addrspace(1) %out) {
195 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
196 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
197 %load = load float, ptr addrspace(1) %gep, align 4
198 %v = fpext float %load to double
199 %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
200 %gep2 = getelementptr inbounds double, ptr addrspace(1) %out, i32 %id
201 store double %canonicalized, ptr addrspace(1) %gep2, align 8
205 ; GCN-LABEL: test_fold_canonicalize_fpextend_value_f32_f16:
206 ; GCN: v_cvt_f32_f16_e32 [[V:v[0-9]+]], v{{[0-9]+}}
209 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
210 define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16(ptr addrspace(1) %arg, ptr addrspace(1) %out) {
211 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
212 %gep = getelementptr inbounds half, ptr addrspace(1) %arg, i32 %id
213 %load = load half, ptr addrspace(1) %gep, align 2
214 %v = fpext half %load to float
215 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
216 %gep2 = getelementptr inbounds float, ptr addrspace(1) %out, i32 %id
217 store float %canonicalized, ptr addrspace(1) %gep2, align 4
221 ; GCN-LABEL: test_fold_canonicalize_fpextend_value_f32_f16_flushf16:
222 ; GCN: v_cvt_f32_f16_e32 [[V:v[0-9]+]], v{{[0-9]+}}
225 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
226 define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16_flushf16(ptr addrspace(1) %arg, ptr addrspace(1) %out) #2 {
227 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
228 %gep = getelementptr inbounds half, ptr addrspace(1) %arg, i32 %id
229 %load = load half, ptr addrspace(1) %gep, align 2
230 %v = fpext half %load to float
231 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
232 %gep2 = getelementptr inbounds float, ptr addrspace(1) %out, i32 %id
233 store float %canonicalized, ptr addrspace(1) %gep2, align 4
237 ; GCN-LABEL: test_fold_canonicalize_fpround_value_f32_f64:
238 ; GCN: v_cvt_f32_f64_e32 [[V:v[0-9]+]], v[{{[0-9:]+}}]
241 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
242 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f32_f64(ptr addrspace(1) %arg, ptr addrspace(1) %out) {
243 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
244 %gep = getelementptr inbounds double, ptr addrspace(1) %arg, i32 %id
245 %load = load double, ptr addrspace(1) %gep, align 8
246 %v = fptrunc double %load to float
247 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
248 %gep2 = getelementptr inbounds float, ptr addrspace(1) %out, i32 %id
249 store float %canonicalized, ptr addrspace(1) %gep2, align 4
253 ; GCN-LABEL: test_fold_canonicalize_fpround_value_f16_f32:
254 ; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
257 ; GCN: {{flat|global}}_store_short v{{.+}}, [[V]]
258 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32(ptr addrspace(1) %arg, ptr addrspace(1) %out) {
259 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
260 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
261 %load = load float, ptr addrspace(1) %gep, align 4
262 %v = fptrunc float %load to half
263 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
264 %gep2 = getelementptr inbounds half, ptr addrspace(1) %out, i32 %id
265 store half %canonicalized, ptr addrspace(1) %gep2, align 2
269 ; GCN-LABEL: test_fold_canonicalize_fpround_value_f16_f32_flushf16:
270 ; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
273 ; GCN: {{flat|global}}_store_short v{{.+}}, [[V]]
274 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32_flushf16(ptr addrspace(1) %arg, ptr addrspace(1) %out) #2 {
275 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
276 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
277 %load = load float, ptr addrspace(1) %gep, align 4
278 %v = fptrunc float %load to half
279 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
280 %gep2 = getelementptr inbounds half, ptr addrspace(1) %out, i32 %id
281 store half %canonicalized, ptr addrspace(1) %gep2, align 2
285 ; GCN-LABEL: test_fold_canonicalize_fpround_value_v2f16_v2f32:
286 ; GCN-DAG: v_cvt_f16_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}}
287 ; VI-DAG: v_cvt_f16_f32_sdwa [[V1:v[0-9]+]], v{{[0-9]+}}
288 ; VI: v_or_b32_e32 [[V:v[0-9]+]], [[V0]], [[V1]]
289 ; GFX9: v_cvt_f16_f32_e32 [[V1:v[0-9]+]], v{{[0-9]+}}
290 ; GFX9: v_pack_b32_f16 [[V:v[0-9]+]], [[V1]], [[V0]]
293 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
294 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_v2f16_v2f32(ptr addrspace(1) %arg, ptr addrspace(1) %out) {
295 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
296 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %arg, i32 %id
297 %load = load <2 x float>, ptr addrspace(1) %gep, align 8
298 %v = fptrunc <2 x float> %load to <2 x half>
299 %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v)
300 %gep2 = getelementptr inbounds <2 x half>, ptr addrspace(1) %out, i32 %id
301 store <2 x half> %canonicalized, ptr addrspace(1) %gep2, align 4
305 ; GCN-LABEL: test_no_fold_canonicalize_fneg_value_f32:
306 ; VI: v_mul_f32_e32 v{{[0-9]+}}, -1.0, v{{[0-9]+}}
307 ; GFX9: v_max_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
308 define amdgpu_kernel void @test_no_fold_canonicalize_fneg_value_f32(ptr addrspace(1) %arg) {
309 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
310 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
311 %load = load float, ptr addrspace(1) %gep, align 4
312 %v = fneg float %load
313 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
314 store float %canonicalized, ptr addrspace(1) %gep, align 4
318 ; GCN-LABEL: test_fold_canonicalize_fneg_value_f32:
319 ; GCN: v_xor_b32_e32 [[V:v[0-9]+]], 0x80000000, v{{[0-9]+}}
322 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
323 define amdgpu_kernel void @test_fold_canonicalize_fneg_value_f32(ptr addrspace(1) %arg) {
324 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
325 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
326 %load = load float, ptr addrspace(1) %gep, align 4
327 %v0 = fadd float %load, 0.0
329 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
330 store float %canonicalized, ptr addrspace(1) %gep, align 4
334 ; GCN-LABEL: test_no_fold_canonicalize_fabs_value_f32:
335 ; VI: v_mul_f32_e64 v{{[0-9]+}}, 1.0, |v{{[0-9]+}}|
336 ; GFX9: v_max_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |v{{[0-9]+}}|
337 define amdgpu_kernel void @test_no_fold_canonicalize_fabs_value_f32(ptr addrspace(1) %arg) {
338 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
339 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
340 %load = load float, ptr addrspace(1) %gep, align 4
341 %v = tail call float @llvm.fabs.f32(float %load)
342 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
343 store float %canonicalized, ptr addrspace(1) %gep, align 4
347 ; GCN-LABEL: test_no_fold_canonicalize_fcopysign_value_f32:
348 ; VI: v_mul_f32_e64 v{{[0-9]+}}, 1.0, |v{{[0-9]+}}|
349 ; GFX9: v_max_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |v{{[0-9]+}}|
353 define amdgpu_kernel void @test_no_fold_canonicalize_fcopysign_value_f32(ptr addrspace(1) %arg, float %sign) {
354 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
355 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
356 %load = load float, ptr addrspace(1) %gep, align 4
357 %canon.load = tail call float @llvm.canonicalize.f32(float %load)
358 %copysign = call float @llvm.copysign.f32(float %canon.load, float %sign)
359 %v = tail call float @llvm.fabs.f32(float %load)
360 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
361 store float %canonicalized, ptr addrspace(1) %gep, align 4
365 ; GCN-LABEL: test_fold_canonicalize_fabs_value_f32:
366 ; GCN: v_and_b32_e32 [[V:v[0-9]+]], 0x7fffffff, v{{[0-9]+}}
369 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
370 define amdgpu_kernel void @test_fold_canonicalize_fabs_value_f32(ptr addrspace(1) %arg) {
371 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
372 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
373 %load = load float, ptr addrspace(1) %gep, align 4
374 %v0 = fadd float %load, 0.0
375 %v = tail call float @llvm.fabs.f32(float %v0)
376 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
377 store float %canonicalized, ptr addrspace(1) %gep, align 4
381 ; GCN-LABEL: test_fold_canonicalize_sin_value_f32:
382 ; GCN: v_sin_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
385 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
386 define amdgpu_kernel void @test_fold_canonicalize_sin_value_f32(ptr addrspace(1) %arg) {
387 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
388 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
389 %load = load float, ptr addrspace(1) %gep, align 4
390 %v = tail call float @llvm.sin.f32(float %load)
391 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
392 store float %canonicalized, ptr addrspace(1) %gep, align 4
396 ; GCN-LABEL: test_fold_canonicalize_cos_value_f32:
397 ; GCN: v_cos_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
400 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
401 define amdgpu_kernel void @test_fold_canonicalize_cos_value_f32(ptr addrspace(1) %arg) {
402 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
403 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
404 %load = load float, ptr addrspace(1) %gep, align 4
405 %v = tail call float @llvm.cos.f32(float %load)
406 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
407 store float %canonicalized, ptr addrspace(1) %gep, align 4
411 ; GCN-LABEL: test_fold_canonicalize_sin_value_f16:
412 ; GCN: v_sin_f16_e32 [[V0:v[0-9]+]], v{{[0-9]+}}
415 ; GCN: {{flat|global}}_store_short v{{.+}}, [[V0]]
416 define amdgpu_kernel void @test_fold_canonicalize_sin_value_f16(ptr addrspace(1) %arg) {
417 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
418 %gep = getelementptr inbounds half, ptr addrspace(1) %arg, i32 %id
419 %load = load half, ptr addrspace(1) %gep, align 2
420 %v = tail call half @llvm.sin.f16(half %load)
421 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
422 store half %canonicalized, ptr addrspace(1) %gep, align 2
426 ; GCN-LABEL: test_fold_canonicalize_cos_value_f16:
427 ; GCN: v_cos_f16_e32 [[V0:v[0-9]+]], v{{[0-9]+}}
430 ; GCN: {{flat|global}}_store_short v{{.+}}, [[V0]]
431 define amdgpu_kernel void @test_fold_canonicalize_cos_value_f16(ptr addrspace(1) %arg) {
432 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
433 %gep = getelementptr inbounds half, ptr addrspace(1) %arg, i32 %id
434 %load = load half, ptr addrspace(1) %gep, align 2
435 %v = tail call half @llvm.cos.f16(half %load)
436 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
437 store half %canonicalized, ptr addrspace(1) %gep, align 2
441 ; GCN-LABEL: test_fold_canonicalize_qNaN_value_f32:
442 ; GCN: v_mov_b32_e32 [[V:v[0-9]+]], 0x7fc00000
445 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
446 define amdgpu_kernel void @test_fold_canonicalize_qNaN_value_f32(ptr addrspace(1) %arg) {
447 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
448 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
449 %canonicalized = tail call float @llvm.canonicalize.f32(float 0x7FF8000000000000)
450 store float %canonicalized, ptr addrspace(1) %gep, align 4
454 ; GCN-LABEL: test_fold_canonicalize_minnum_value_from_load_f32_ieee_mode:
455 ; GCN: {{flat|global}}_load_dword [[VAL:v[0-9]+]]
456 ; VI: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[VAL]]
457 ; GFX9: v_max_f32_e32 [[QUIET:v[0-9]+]], [[VAL]], [[VAL]]
459 ; GCN: v_min_f32_e32 [[V:v[0-9]+]], 0, [[QUIET]]
463 ; GFX9: {{flat|global}}_store_dword v{{.+}}, [[V]]
464 define amdgpu_kernel void @test_fold_canonicalize_minnum_value_from_load_f32_ieee_mode(ptr addrspace(1) %arg) {
465 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
466 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
467 %load = load float, ptr addrspace(1) %gep, align 4
468 %v = tail call float @llvm.minnum.f32(float %load, float 0.0)
469 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
470 store float %canonicalized, ptr addrspace(1) %gep, align 4
474 ; GCN-LABEL: test_fold_canonicalize_minnum_value_from_load_f32_nnan_ieee_mode:
475 ; VI-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
476 ; GCN-DENORM-NOT: v_max
477 ; GCN-DENORM-NOT: v_mul
479 ; GCN: v_min_f32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
480 ; GCN-DENORM-NOT: v_max
481 ; GCN-DENORM-NOT: v_mul
483 ; GFX9: {{flat|global}}_store_dword
484 define amdgpu_kernel void @test_fold_canonicalize_minnum_value_from_load_f32_nnan_ieee_mode(ptr addrspace(1) %arg) #1 {
485 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
486 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
487 %load = load float, ptr addrspace(1) %gep, align 4
488 %v = tail call float @llvm.minnum.f32(float %load, float 0.0)
489 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
490 store float %canonicalized, ptr addrspace(1) %gep, align 4
494 ; GCN-LABEL: test_fold_canonicalize_minnum_value_f32:
495 ; GCN: v_min_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
498 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
499 define amdgpu_kernel void @test_fold_canonicalize_minnum_value_f32(ptr addrspace(1) %arg) {
500 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
501 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
502 %load = load float, ptr addrspace(1) %gep, align 4
503 %v0 = fadd float %load, 0.0
504 %v = tail call float @llvm.minnum.f32(float %v0, float 0.0)
505 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
506 store float %canonicalized, ptr addrspace(1) %gep, align 4
510 ; FIXME: Should there be more checks here? minnum with NaN operand is simplified away.
512 ; GCN-LABEL: test_fold_canonicalize_sNaN_value_f32:
513 ; GCN: {{flat|global}}_load_dword [[LOAD:v[0-9]+]]
514 ; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[LOAD]]
515 ; GFX9: v_max_f32_e32 v{{[0-9]+}}, [[LOAD]], [[LOAD]]
516 define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(ptr addrspace(1) %arg) {
517 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
518 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
519 %load = load float, ptr addrspace(1) %gep, align 4
520 %v = tail call float @llvm.minnum.f32(float %load, float bitcast (i32 2139095041 to float))
521 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
522 store float %canonicalized, ptr addrspace(1) %gep, align 4
526 ; GCN-LABEL: test_fold_canonicalize_denorm_value_f32:
527 ; GCN: {{flat|global}}_load_dword [[VAL:v[0-9]+]]
529 ; GFX9-DENORM: v_max_f32_e32 [[QUIET:v[0-9]+]], [[VAL]], [[VAL]]
530 ; GFX9-DENORM: v_min_f32_e32 [[RESULT:v[0-9]+]], 0x7fffff, [[QUIET]]
532 ; GFX9-FLUSH: v_max_f32_e32 [[QUIET:v[0-9]+]], [[VAL]], [[VAL]]
533 ; GFX9-FLUSH: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET]]
535 ; VI-FLUSH: v_mul_f32_e32 [[QUIET_V0:v[0-9]+]], 1.0, [[VAL]]
536 ; VI-FLUSH: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_V0]]
538 ; VI-DENORM: v_min_f32_e32 [[RESULT:v[0-9]+]], 0x7fffff, [[VAL]]
542 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[RESULT]]
543 define amdgpu_kernel void @test_fold_canonicalize_denorm_value_f32(ptr addrspace(1) %arg) {
544 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
545 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
546 %load = load float, ptr addrspace(1) %gep, align 4
547 %v = tail call float @llvm.minnum.f32(float %load, float bitcast (i32 8388607 to float))
548 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
549 store float %canonicalized, ptr addrspace(1) %gep, align 4
553 ; GCN-LABEL: test_fold_canonicalize_maxnum_value_from_load_f32_ieee_mode:
554 ; GCN: {{flat|global}}_load_dword [[VAL:v[0-9]+]]
556 ; GFX9: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[VAL]]
558 ; VI-FLUSH: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[VAL]]
559 ; VI-FLUSH: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET]]
561 ; VI-DENORM: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[VAL]]
565 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[RESULT]]
566 define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_from_load_f32_ieee_mode(ptr addrspace(1) %arg) {
567 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
568 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
569 %load = load float, ptr addrspace(1) %gep, align 4
570 %v = tail call float @llvm.maxnum.f32(float %load, float 0.0)
571 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
572 store float %canonicalized, ptr addrspace(1) %gep, align 4
576 ; GCN-LABEL: test_fold_canonicalize_maxnum_value_f32:
577 ; GCN: v_max_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
580 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
581 define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f32(ptr addrspace(1) %arg) {
582 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
583 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
584 %load = load float, ptr addrspace(1) %gep, align 4
585 %v0 = fadd float %load, 0.0
586 %v = tail call float @llvm.maxnum.f32(float %v0, float 0.0)
587 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
588 store float %canonicalized, ptr addrspace(1) %gep, align 4
592 ; GCN-LABEL: test_fold_canonicalize_maxnum_value_f64:
593 ; GCN: v_max_f64 [[V:v\[[0-9]+:[0-9]+\]]], v[{{[0-9:]+}}], 0
596 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, [[V]]
597 define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f64(ptr addrspace(1) %arg) {
598 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
599 %gep = getelementptr inbounds double, ptr addrspace(1) %arg, i32 %id
600 %load = load double, ptr addrspace(1) %gep, align 8
601 %v0 = fadd double %load, 0.0
602 %v = tail call double @llvm.maxnum.f64(double %v0, double 0.0)
603 %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
604 store double %canonicalized, ptr addrspace(1) %gep, align 8
608 ; GCN-LABEL: test_fold_canonicalize_fmul_value_f32_no_ieee:
609 ; GCN: v_mul_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
613 define amdgpu_ps float @test_fold_canonicalize_fmul_value_f32_no_ieee(float %arg) {
615 %v = fmul float %arg, 15.0
616 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
617 ret float %canonicalized
620 ; GCN-LABEL: test_fold_canonicalize_fmul_nnan_value_f32_no_ieee:
621 ; GCN: v_mul_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
625 define amdgpu_ps float @test_fold_canonicalize_fmul_nnan_value_f32_no_ieee(float %arg) {
627 %v = fmul nnan float %arg, 15.0
628 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
629 ret float %canonicalized
632 ; GCN-LABEL: {{^}}test_fold_canonicalize_fdiv_value_f32_no_ieee:
633 ; GCN: v_div_fixup_f32
637 define amdgpu_ps float @test_fold_canonicalize_fdiv_value_f32_no_ieee(float %arg0) {
639 %v = fdiv float 15.0, %arg0
640 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
641 ret float %canonicalized
644 ; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f32
645 ; GFX9-DENORM: global_load_dword [[V:v[0-9]+]],
646 ; GFX9-DENORM: global_store_dword v{{[0-9]+}}, [[V]], s{{\[[0-9]+:[0-9]+\]}}
647 ; GFX9-DENORM-NOT: 1.0
648 ; GFX9-DENORM-NOT: v_max
649 ; VI-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
650 ; GFX9-FLUSH: v_max_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
651 define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f32(ptr addrspace(1) %arg, ptr addrspace(1) %out) #1 {
652 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
653 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
654 %v = load float, ptr addrspace(1) %gep, align 4
655 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
656 %gep2 = getelementptr inbounds float, ptr addrspace(1) %out, i32 %id
657 store float %canonicalized, ptr addrspace(1) %gep2, align 4
661 ; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f64
662 ; GCN: {{flat|global}}_load_dwordx2 [[V:v\[[0-9:]+\]]],
663 ; GCN: {{flat|global}}_store_dwordx2 v{{.+}}, [[V]]
666 define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f64(ptr addrspace(1) %arg, ptr addrspace(1) %out) #1 {
667 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
668 %gep = getelementptr inbounds double, ptr addrspace(1) %arg, i32 %id
669 %v = load double, ptr addrspace(1) %gep, align 8
670 %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
671 %gep2 = getelementptr inbounds double, ptr addrspace(1) %out, i32 %id
672 store double %canonicalized, ptr addrspace(1) %gep2, align 8
676 ; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f16
677 ; GCN: {{flat|global}}_load_ushort [[V:v[0-9]+]],
680 ; GCN: {{flat|global}}_store_short v{{.+}}, [[V]]
681 define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f16(ptr addrspace(1) %arg, ptr addrspace(1) %out) #1 {
682 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
683 %gep = getelementptr inbounds half, ptr addrspace(1) %arg, i32 %id
684 %v = load half, ptr addrspace(1) %gep, align 2
685 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
686 %gep2 = getelementptr inbounds half, ptr addrspace(1) %out, i32 %id
687 store half %canonicalized, ptr addrspace(1) %gep2, align 2
691 ; GCN-LABEL: {{^}}test_fold_canonicalize_select_value_f32:
697 define amdgpu_kernel void @test_fold_canonicalize_select_value_f32(ptr addrspace(1) %arg) {
698 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
699 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
700 %load0 = load volatile float, ptr addrspace(1) %gep, align 4
701 %load1 = load volatile float, ptr addrspace(1) %gep, align 4
702 %load2 = load volatile i32, ptr addrspace(1) undef, align 4
703 %v0 = fadd float %load0, 15.0
704 %v1 = fadd float %load1, 32.0
705 %cond = icmp eq i32 %load2, 0
706 %select = select i1 %cond, float %v0, float %v1
707 %canonicalized = tail call float @llvm.canonicalize.f32(float %select)
708 store float %canonicalized, ptr addrspace(1) %gep, align 4
712 ; Need to quiet the nan with a separate instruction since it will be
713 ; passed through the minnum.
714 ; FIXME: canonicalize doens't work correctly without ieee_mode
716 ; GCN-LABEL: {{^}}test_fold_canonicalize_minnum_value_no_ieee_mode:
719 ; GFX9: v_min_f32_e32 v0, v0, v1
720 ; GFX9-NEXT: ; return to shader
722 ; VI-FLUSH: v_min_f32_e32 v0, v0, v1
723 ; VI-FLUSH-NEXT: v_mul_f32_e32 v0, 1.0, v0
724 ; VI-FLUSH-NEXT: ; return
727 ; VI-DENORM: v_min_f32_e32 v0, v0, v1
728 ; VI-DENORM-NEXT: ; return
729 define amdgpu_ps float @test_fold_canonicalize_minnum_value_no_ieee_mode(float %arg0, float %arg1) {
730 %v = tail call float @llvm.minnum.f32(float %arg0, float %arg1)
731 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
732 ret float %canonicalized
735 ; GCN-LABEL: {{^}}test_fold_canonicalize_minnum_value_ieee_mode:
736 ; GFX9: v_min_f32_e32 v0, v0, v1
737 ; GFX9-NEXT: s_setpc_b64
739 ; VI-DAG: v_mul_f32_e32 v0, 1.0, v0
740 ; VI-DAG: v_mul_f32_e32 v1, 1.0, v1
741 ; VI: v_min_f32_e32 v0, v0, v1
743 ; VI-NEXT: s_setpc_b64
744 define float @test_fold_canonicalize_minnum_value_ieee_mode(float %arg0, float %arg1) {
745 %v = tail call float @llvm.minnum.f32(float %arg0, float %arg1)
746 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
747 ret float %canonicalized
750 ; Canonicalizing flush necessary pre-gfx9
751 ; GCN-LABEL: {{^}}test_fold_canonicalize_minnum_value_no_ieee_mode_nnan:
752 ; GCN: v_min_f32_e32 v0, v0, v1
753 ; VI-FLUSH-NEXT: v_mul_f32_e32 v0, 1.0, v0
755 define amdgpu_ps float @test_fold_canonicalize_minnum_value_no_ieee_mode_nnan(float %arg0, float %arg1) #1 {
756 %v = tail call float @llvm.minnum.f32(float %arg0, float %arg1)
757 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
758 ret float %canonicalized
761 ; GCN-LABEL: {{^}}v_test_canonicalize_build_vector_v2f16:
762 ; GFX9-DAG: v_add_f16_e32
763 ; GFX9-DAG: v_mul_f16_e32
766 define <2 x half> @v_test_canonicalize_build_vector_v2f16(<2 x half> %vec) {
767 %lo = extractelement <2 x half> %vec, i32 0
768 %hi = extractelement <2 x half> %vec, i32 1
769 %lo.op = fadd half %lo, 1.0
770 %hi.op = fmul half %lo, 4.0
771 %ins0 = insertelement <2 x half> undef, half %lo.op, i32 0
772 %ins1 = insertelement <2 x half> %ins0, half %hi.op, i32 1
773 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins1)
774 ret <2 x half> %canonicalized
777 ; GCN-LABEL: {{^}}v_test_canonicalize_build_vector_noncanon1_v2f16:
778 ; GFX9: v_add_f16_e32
780 define <2 x half> @v_test_canonicalize_build_vector_noncanon1_v2f16(<2 x half> %vec) {
781 %lo = extractelement <2 x half> %vec, i32 0
782 %lo.op = fadd half %lo, 1.0
783 %ins = insertelement <2 x half> %vec, half %lo.op, i32 0
784 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins)
785 ret <2 x half> %canonicalized
788 ; GCN-LABEL: {{^}}v_test_canonicalize_build_vector_noncanon0_v2f16:
789 ; GFX9: v_add_f16_sdwa
791 define <2 x half> @v_test_canonicalize_build_vector_noncanon0_v2f16(<2 x half> %vec) {
792 %hi = extractelement <2 x half> %vec, i32 1
793 %hi.op = fadd half %hi, 1.0
794 %ins = insertelement <2 x half> %vec, half %hi.op, i32 1
795 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins)
796 ret <2 x half> %canonicalized
799 ; GCN-LABEL: {{^}}v_test_canonicalize_extract_element_v2f16:
801 ; GFX9-NEXT: v_mul_f16_e32 v0, 4.0, v0
802 ; GFX9-NEXT: s_setpc_b64
803 define half @v_test_canonicalize_extract_element_v2f16(<2 x half> %vec) {
804 %vec.op = fmul <2 x half> %vec, <half 4.0, half 4.0>
805 %elt = extractelement <2 x half> %vec.op, i32 0
806 %canonicalized = call half @llvm.canonicalize.f16(half %elt)
807 ret half %canonicalized
810 ; GCN-LABEL: {{^}}v_test_canonicalize_insertelement_v2f16:
811 ; GFX9: v_mul_f16_e32
815 define <2 x half> @v_test_canonicalize_insertelement_v2f16(<2 x half> %vec, half %val, i32 %idx) {
816 %vec.op = fmul <2 x half> %vec, <half 4.0, half 4.0>
817 %ins.op = fmul half %val, 8.0
818 %ins = insertelement <2 x half> %vec.op, half %ins.op, i32 %idx
819 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins)
820 ret <2 x half> %canonicalized
823 ; GCN-LABEL: {{^}}v_test_canonicalize_insertelement_noncanon_vec_v2f16:
825 ; GFX9: v_pk_max_f16 v0, v0, v0
826 ; GFX9-NEXT: s_setpc_b64
827 define <2 x half> @v_test_canonicalize_insertelement_noncanon_vec_v2f16(<2 x half> %vec, half %val, i32 %idx) {
828 %ins.op = fmul half %val, 8.0
829 %ins = insertelement <2 x half> %vec, half %ins.op, i32 %idx
830 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins)
831 ret <2 x half> %canonicalized
834 ; GCN-LABEL: {{^}}v_test_canonicalize_insertelement_noncanon_insval_v2f16:
836 ; GFX9: v_pk_max_f16 v0, v0, v0
837 ; GFX9-NEXT: s_setpc_b64
838 define <2 x half> @v_test_canonicalize_insertelement_noncanon_insval_v2f16(<2 x half> %vec, half %val, i32 %idx) {
839 %vec.op = fmul <2 x half> %vec, <half 4.0, half 4.0>
840 %ins = insertelement <2 x half> %vec.op, half %val, i32 %idx
841 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins)
842 ret <2 x half> %canonicalized
845 ; GCN-LABEL: {{^}}v_test_canonicalize_cvt_pkrtz:
847 ; GCN-NEXT: v_cvt_pkrtz_f16_f32 v0, v0, v1
848 ; GCN-NEXT: s_setpc_b64
849 define <2 x half> @v_test_canonicalize_cvt_pkrtz(float %a, float %b) {
850 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %a, float %b)
851 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %cvt)
852 ret <2 x half> %canonicalized
855 ; GCN-LABEL: {{^}}v_test_canonicalize_cubeid:
857 ; GCN-NEXT: v_cubeid_f32 v0, v0, v1, v2
858 ; GCN-NEXT: s_setpc_b64
859 define float @v_test_canonicalize_cubeid(float %a, float %b, float %c) {
860 %cvt = call float @llvm.amdgcn.cubeid(float %a, float %b, float %c)
861 %canonicalized = call float @llvm.canonicalize.f32(float %cvt)
862 ret float %canonicalized
865 ; GCN-LABEL: {{^}}v_test_canonicalize_frexp_mant:
867 ; GCN-NEXT: v_frexp_mant_f32_e32 v0, v0
868 ; GCN-NEXT: s_setpc_b64
869 define float @v_test_canonicalize_frexp_mant(float %a) {
870 %cvt = call float @llvm.amdgcn.frexp.mant.f32(float %a)
871 %canonicalized = call float @llvm.canonicalize.f32(float %cvt)
872 ret float %canonicalized
875 ; GCN-LABEL: {{^}}v_test_canonicalize_amdgcn_log:
877 ; GCN-NEXT: v_log_f32
878 ; GCN-NEXT: s_setpc_b64
879 define float @v_test_canonicalize_amdgcn_log(float %a) {
880 %log = call float @llvm.amdgcn.log.f32(float %a)
881 %canonicalized = call float @llvm.canonicalize.f32(float %log)
882 ret float %canonicalized
885 ; GCN-LABEL: {{^}}v_test_canonicalize_amdgcn_exp2:
887 ; GCN-NEXT: v_exp_f32
888 ; GCN-NEXT: s_setpc_b64
889 define float @v_test_canonicalize_amdgcn_exp2(float %a) {
890 %log = call float @llvm.amdgcn.exp2.f32(float %a)
891 %canonicalized = call float @llvm.canonicalize.f32(float %log)
892 ret float %canonicalized
895 ; Avoid failing the test on FreeBSD11.0 which will match the GCN-NOT: 1.0
896 ; in the .amd_amdgpu_isa "amdgcn-unknown-freebsd11.0--gfx802" directive
897 ; GCN: .amd_amdgpu_isa
899 declare float @llvm.canonicalize.f32(float) #0
900 declare float @llvm.copysign.f32(float, float) #0
901 declare float @llvm.amdgcn.fmul.legacy(float, float) #0
902 declare float @llvm.amdgcn.fmad.ftz.f32(float, float, float) #0
903 declare double @llvm.canonicalize.f64(double) #0
904 declare half @llvm.canonicalize.f16(half) #0
905 declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
906 declare i32 @llvm.amdgcn.workitem.id.x() #0
907 declare float @llvm.sqrt.f32(float) #0
908 declare float @llvm.ceil.f32(float) #0
909 declare float @llvm.floor.f32(float) #0
910 declare float @llvm.fma.f32(float, float, float) #0
911 declare float @llvm.fmuladd.f32(float, float, float) #0
912 declare float @llvm.fabs.f32(float) #0
913 declare float @llvm.sin.f32(float) #0
914 declare float @llvm.cos.f32(float) #0
915 declare half @llvm.sin.f16(half) #0
916 declare half @llvm.cos.f16(half) #0
917 declare float @llvm.minnum.f32(float, float) #0
918 declare float @llvm.maxnum.f32(float, float) #0
919 declare double @llvm.maxnum.f64(double, double) #0
920 declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #0
921 declare float @llvm.amdgcn.cubeid(float, float, float) #0
922 declare float @llvm.amdgcn.frexp.mant.f32(float) #0
923 declare float @llvm.amdgcn.log.f32(float) #0
924 declare float @llvm.amdgcn.exp2.f32(float) #0
926 attributes #0 = { nounwind readnone }
927 attributes #1 = { "no-nans-fp-math"="true" }
928 attributes #2 = { "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" }