1 ; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,VI-FLUSH,GCN-FLUSH,GCN-NOEXCEPT %s
2 ; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs -mattr=-fp32-denormals,+fp-exceptions < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-EXCEPT,VI,VI-FLUSH,GCN-FLUSH %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,VI-DENORM,GCN-DENORM,GCN-NOEXCEPT %s
4 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-DENORM,GCN-DENORM,GCN-NOEXCEPT %s
5 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-FLUSH,GCN-FLUSH,GCN-NOEXCEPT %s
7 ; GCN-LABEL: {{^}}test_no_fold_canonicalize_loaded_value_f32:
8 ; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
9 ; GFX9-DENORM: v_max_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
10 define amdgpu_kernel void @test_no_fold_canonicalize_loaded_value_f32(float addrspace(1)* %arg) {
11 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
12 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
13 %v = load float, float addrspace(1)* %gep, align 4
14 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
15 store float %canonicalized, float addrspace(1)* %gep, align 4
19 ; GCN-LABEL: {{^}}test_fold_canonicalize_fmul_value_f32:
20 ; GCN: v_mul_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
21 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
23 define amdgpu_kernel void @test_fold_canonicalize_fmul_value_f32(float addrspace(1)* %arg) {
24 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
25 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
26 %load = load float, float addrspace(1)* %gep, align 4
27 %v = fmul float %load, 15.0
28 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
29 store float %canonicalized, float addrspace(1)* %gep, align 4
33 ; GCN-LABEL: {{^}}test_fold_canonicalize_fmul_legacy_value_f32:
34 ; GCN: v_mul_legacy_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
37 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
38 define amdgpu_kernel void @test_fold_canonicalize_fmul_legacy_value_f32(float addrspace(1)* %arg) {
39 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
40 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
41 %load = load float, float addrspace(1)* %gep, align 4
42 %v = call float @llvm.amdgcn.fmul.legacy(float %load, float 15.0)
43 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
44 store float %canonicalized, float addrspace(1)* %gep, align 4
48 ; GCN-LABEL: {{^}}test_fold_canonicalize_sub_value_f32:
49 ; GCN: v_sub_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
52 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
53 define amdgpu_kernel void @test_fold_canonicalize_sub_value_f32(float addrspace(1)* %arg) {
54 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
55 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
56 %load = load float, float addrspace(1)* %gep, align 4
57 %v = fsub float 15.0, %load
58 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
59 store float %canonicalized, float addrspace(1)* %gep, align 4
63 ; GCN-LABEL: {{^}}test_fold_canonicalize_add_value_f32:
64 ; GCN: v_add_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
67 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
68 define amdgpu_kernel void @test_fold_canonicalize_add_value_f32(float addrspace(1)* %arg) {
69 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
70 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
71 %load = load float, float addrspace(1)* %gep, align 4
72 %v = fadd float %load, 15.0
73 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
74 store float %canonicalized, float addrspace(1)* %gep, align 4
78 ; GCN-LABEL: {{^}}test_fold_canonicalize_sqrt_value_f32:
79 ; GCN: v_sqrt_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
82 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
83 define amdgpu_kernel void @test_fold_canonicalize_sqrt_value_f32(float addrspace(1)* %arg) {
84 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
85 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
86 %load = load float, float addrspace(1)* %gep, align 4
87 %v = call float @llvm.sqrt.f32(float %load)
88 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
89 store float %canonicalized, float addrspace(1)* %gep, align 4
93 ; GCN-LABEL: test_fold_canonicalize_fceil_value_f32:
94 ; GCN: v_ceil_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
97 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
98 define amdgpu_kernel void @test_fold_canonicalize_fceil_value_f32(float addrspace(1)* %arg) {
99 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
100 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
101 %load = load float, float addrspace(1)* %gep, align 4
102 %v = call float @llvm.ceil.f32(float %load)
103 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
104 store float %canonicalized, float addrspace(1)* %gep, align 4
108 ; GCN-LABEL: test_fold_canonicalize_floor_value_f32:
109 ; GCN: v_floor_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
112 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
113 define amdgpu_kernel void @test_fold_canonicalize_floor_value_f32(float addrspace(1)* %arg) {
114 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
115 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
116 %load = load float, float addrspace(1)* %gep, align 4
117 %v = call float @llvm.floor.f32(float %load)
118 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
119 store float %canonicalized, float addrspace(1)* %gep, align 4
123 ; GCN-LABEL: test_fold_canonicalize_fma_value_f32:
124 ; GCN: s_mov_b32 [[SREG:s[0-9]+]], 0x41700000
125 ; GCN: v_fma_f32 [[V:v[0-9]+]], v{{[0-9]+}}, [[SREG]], [[SREG]]
128 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
129 define amdgpu_kernel void @test_fold_canonicalize_fma_value_f32(float addrspace(1)* %arg) {
130 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
131 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
132 %load = load float, float addrspace(1)* %gep, align 4
133 %v = call float @llvm.fma.f32(float %load, float 15.0, float 15.0)
134 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
135 store float %canonicalized, float addrspace(1)* %gep, align 4
139 ; GCN-LABEL: test_fold_canonicalize_fmad_ftz_value_f32:
140 ; GCN: s_mov_b32 [[SGPR:s[0-9]+]], 0x41700000
141 ; GCN: v_mad_f32 [[V:v[0-9]+]], v{{[0-9]+}}, [[SGPR]], [[SGPR]]
144 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
145 define amdgpu_kernel void @test_fold_canonicalize_fmad_ftz_value_f32(float addrspace(1)* %arg) {
146 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
147 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
148 %load = load float, float addrspace(1)* %gep, align 4
149 %v = call float @llvm.amdgcn.fmad.ftz.f32(float %load, float 15.0, float 15.0)
150 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
151 store float %canonicalized, float addrspace(1)* %gep, align 4
155 ; GCN-LABEL: test_fold_canonicalize_fmuladd_value_f32:
156 ; GCN-FLUSH: v_mac_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
157 ; GCN-DENORM: s_mov_b32 [[SREG:s[0-9]+]], 0x41700000
158 ; GCN-DENORM: v_fma_f32 [[V:v[0-9]+]], v{{[0-9]+}}, [[SREG]], [[SREG]]
161 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
163 define amdgpu_kernel void @test_fold_canonicalize_fmuladd_value_f32(float addrspace(1)* %arg) {
164 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
165 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
166 %load = load float, float addrspace(1)* %gep, align 4
167 %v = call float @llvm.fmuladd.f32(float %load, float 15.0, float 15.0)
168 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
169 store float %canonicalized, float addrspace(1)* %gep, align 4
173 ; GCN-LABEL: test_fold_canonicalize_canonicalize_value_f32:
174 ; GCN: {{flat|global}}_load_dword [[LOAD:v[0-9]+]],
175 ; GCN-FLUSH: v_mul_f32_e32 [[V:v[0-9]+]], 1.0, [[LOAD]]
176 ; GCN-DENORM: v_max_f32_e32 [[V:v[0-9]+]], [[LOAD]], [[LOAD]]
179 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
180 define amdgpu_kernel void @test_fold_canonicalize_canonicalize_value_f32(float addrspace(1)* %arg) {
181 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
182 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
183 %load = load float, float addrspace(1)* %gep, align 4
184 %v = call float @llvm.canonicalize.f32(float %load)
185 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
186 store float %canonicalized, float addrspace(1)* %gep, align 4
190 ; GCN-LABEL: test_fold_canonicalize_fpextend_value_f64_f32:
191 ; GCN: v_cvt_f64_f32_e32 [[V:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}
194 ; GCN: {{flat|global}}_store_dwordx2 v[{{[0-9:]+}}], [[V]]
195 define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f64_f32(float addrspace(1)* %arg, double addrspace(1)* %out) {
196 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
197 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
198 %load = load float, float addrspace(1)* %gep, align 4
199 %v = fpext float %load to double
200 %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
201 %gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id
202 store double %canonicalized, double addrspace(1)* %gep2, align 8
206 ; GCN-LABEL: test_fold_canonicalize_fpextend_value_f32_f16:
207 ; GCN: v_cvt_f32_f16_e32 [[V:v[0-9]+]], v{{[0-9]+}}
210 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
211 define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16(half addrspace(1)* %arg, float addrspace(1)* %out) {
212 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
213 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
214 %load = load half, half addrspace(1)* %gep, align 2
215 %v = fpext half %load to float
216 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
217 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
218 store float %canonicalized, float addrspace(1)* %gep2, align 4
222 ; GCN-LABEL: test_fold_canonicalize_fpextend_value_f32_f16_flushf16:
223 ; GCN: v_cvt_f32_f16_e32 [[V:v[0-9]+]], v{{[0-9]+}}
226 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
227 define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16_flushf16(half addrspace(1)* %arg, float addrspace(1)* %out) #2 {
228 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
229 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
230 %load = load half, half addrspace(1)* %gep, align 2
231 %v = fpext half %load to float
232 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
233 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
234 store float %canonicalized, float addrspace(1)* %gep2, align 4
238 ; GCN-LABEL: test_fold_canonicalize_fpround_value_f32_f64:
239 ; GCN: v_cvt_f32_f64_e32 [[V:v[0-9]+]], v[{{[0-9:]+}}]
242 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
243 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f32_f64(double addrspace(1)* %arg, float addrspace(1)* %out) {
244 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
245 %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
246 %load = load double, double addrspace(1)* %gep, align 8
247 %v = fptrunc double %load to float
248 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
249 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
250 store float %canonicalized, float addrspace(1)* %gep2, align 4
254 ; GCN-LABEL: test_fold_canonicalize_fpround_value_f16_f32:
255 ; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
258 ; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
259 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32(float addrspace(1)* %arg, half addrspace(1)* %out) {
260 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
261 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
262 %load = load float, float addrspace(1)* %gep, align 4
263 %v = fptrunc float %load to half
264 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
265 %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
266 store half %canonicalized, half addrspace(1)* %gep2, align 2
270 ; GCN-LABEL: test_fold_canonicalize_fpround_value_f16_f32_flushf16:
271 ; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
274 ; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
275 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32_flushf16(float addrspace(1)* %arg, half addrspace(1)* %out) #2 {
276 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
277 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
278 %load = load float, float addrspace(1)* %gep, align 4
279 %v = fptrunc float %load to half
280 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
281 %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
282 store half %canonicalized, half addrspace(1)* %gep2, align 2
286 ; GCN-LABEL: test_fold_canonicalize_fpround_value_v2f16_v2f32:
287 ; GCN-DAG: v_cvt_f16_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}}
288 ; VI-DAG: v_cvt_f16_f32_sdwa [[V1:v[0-9]+]], v{{[0-9]+}}
289 ; VI: v_or_b32_e32 [[V:v[0-9]+]], [[V0]], [[V1]]
290 ; GFX9: v_cvt_f16_f32_e32 [[V1:v[0-9]+]], v{{[0-9]+}}
291 ; GFX9: v_and_b32_e32 [[V0_16:v[0-9]+]], 0xffff, [[V0]]
292 ; GFX9: v_lshl_or_b32 [[V:v[0-9]+]], [[V1]], 16, [[V0_16]]
295 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
296 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_v2f16_v2f32(<2 x float> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) {
297 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
298 %gep = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %arg, i32 %id
299 %load = load <2 x float>, <2 x float> addrspace(1)* %gep, align 8
300 %v = fptrunc <2 x float> %load to <2 x half>
301 %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v)
302 %gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i32 %id
303 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %gep2, align 4
307 ; GCN-LABEL: test_no_fold_canonicalize_fneg_value_f32:
308 ; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, -1.0, v{{[0-9]+}}
309 ; GCN-DENORM: v_max_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
310 define amdgpu_kernel void @test_no_fold_canonicalize_fneg_value_f32(float addrspace(1)* %arg) {
311 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
312 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
313 %load = load float, float addrspace(1)* %gep, align 4
314 %v = fsub float -0.0, %load
315 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
316 store float %canonicalized, float addrspace(1)* %gep, align 4
320 ; GCN-LABEL: test_fold_canonicalize_fneg_value_f32:
321 ; GCN: v_xor_b32_e32 [[V:v[0-9]+]], 0x80000000, v{{[0-9]+}}
324 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
325 define amdgpu_kernel void @test_fold_canonicalize_fneg_value_f32(float addrspace(1)* %arg) {
326 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
327 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
328 %load = load float, float addrspace(1)* %gep, align 4
329 %v0 = fadd float %load, 0.0
330 %v = fsub float -0.0, %v0
331 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
332 store float %canonicalized, float addrspace(1)* %gep, align 4
336 ; GCN-LABEL: test_no_fold_canonicalize_fabs_value_f32:
337 ; GCN-FLUSH: v_mul_f32_e64 v{{[0-9]+}}, 1.0, |v{{[0-9]+}}|
338 ; GCN-DENORM: v_max_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |v{{[0-9]+}}|
339 define amdgpu_kernel void @test_no_fold_canonicalize_fabs_value_f32(float addrspace(1)* %arg) {
340 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
341 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
342 %load = load float, float addrspace(1)* %gep, align 4
343 %v = tail call float @llvm.fabs.f32(float %load)
344 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
345 store float %canonicalized, float addrspace(1)* %gep, align 4
349 ; GCN-LABEL: test_no_fold_canonicalize_fcopysign_value_f32:
350 ; GCN-FLUSH: v_mul_f32_e64 v{{[0-9]+}}, 1.0, |v{{[0-9]+}}|
351 ; GCN-DENORM: v_max_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |v{{[0-9]+}}|
354 define amdgpu_kernel void @test_no_fold_canonicalize_fcopysign_value_f32(float addrspace(1)* %arg, float %sign) {
355 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
356 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
357 %load = load float, float addrspace(1)* %gep, align 4
358 %canon.load = tail call float @llvm.canonicalize.f32(float %load)
359 %copysign = call float @llvm.copysign.f32(float %canon.load, float %sign)
360 %v = tail call float @llvm.fabs.f32(float %load)
361 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
362 store float %canonicalized, float addrspace(1)* %gep, align 4
366 ; GCN-LABEL: test_fold_canonicalize_fabs_value_f32:
367 ; GCN: v_and_b32_e32 [[V:v[0-9]+]], 0x7fffffff, v{{[0-9]+}}
370 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
371 define amdgpu_kernel void @test_fold_canonicalize_fabs_value_f32(float addrspace(1)* %arg) {
372 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
373 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
374 %load = load float, float addrspace(1)* %gep, align 4
375 %v0 = fadd float %load, 0.0
376 %v = tail call float @llvm.fabs.f32(float %v0)
377 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
378 store float %canonicalized, float addrspace(1)* %gep, align 4
382 ; GCN-LABEL: test_fold_canonicalize_sin_value_f32:
383 ; GCN: v_sin_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
386 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
387 define amdgpu_kernel void @test_fold_canonicalize_sin_value_f32(float addrspace(1)* %arg) {
388 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
389 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
390 %load = load float, float addrspace(1)* %gep, align 4
391 %v = tail call float @llvm.sin.f32(float %load)
392 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
393 store float %canonicalized, float addrspace(1)* %gep, align 4
397 ; GCN-LABEL: test_fold_canonicalize_cos_value_f32:
398 ; GCN: v_cos_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}
401 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
402 define amdgpu_kernel void @test_fold_canonicalize_cos_value_f32(float addrspace(1)* %arg) {
403 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
404 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
405 %load = load float, float addrspace(1)* %gep, align 4
406 %v = tail call float @llvm.cos.f32(float %load)
407 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
408 store float %canonicalized, float addrspace(1)* %gep, align 4
412 ; GCN-LABEL: test_fold_canonicalize_sin_value_f16:
413 ; GCN: v_sin_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}}
414 ; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], [[V0]]
417 ; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
418 define amdgpu_kernel void @test_fold_canonicalize_sin_value_f16(half addrspace(1)* %arg) {
419 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
420 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
421 %load = load half, half addrspace(1)* %gep, align 2
422 %v = tail call half @llvm.sin.f16(half %load)
423 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
424 store half %canonicalized, half addrspace(1)* %gep, align 2
428 ; GCN-LABEL: test_fold_canonicalize_cos_value_f16:
429 ; GCN: v_cos_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}}
430 ; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], [[V0]]
433 ; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]]
434 define amdgpu_kernel void @test_fold_canonicalize_cos_value_f16(half addrspace(1)* %arg) {
435 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
436 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
437 %load = load half, half addrspace(1)* %gep, align 2
438 %v = tail call half @llvm.cos.f16(half %load)
439 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
440 store half %canonicalized, half addrspace(1)* %gep, align 2
444 ; GCN-LABEL: test_fold_canonicalize_qNaN_value_f32:
445 ; GCN: v_mov_b32_e32 [[V:v[0-9]+]], 0x7fc00000
448 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
449 define amdgpu_kernel void @test_fold_canonicalize_qNaN_value_f32(float addrspace(1)* %arg) {
450 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
451 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
452 %canonicalized = tail call float @llvm.canonicalize.f32(float 0x7FF8000000000000)
453 store float %canonicalized, float addrspace(1)* %gep, align 4
457 ; GCN-LABEL: test_fold_canonicalize_minnum_value_from_load_f32_ieee_mode:
458 ; GCN: {{flat|global}}_load_dword [[VAL:v[0-9]+]]
459 ; GCN-FLUSH: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[VAL]]
460 ; GCN-DENORM: v_max_f32_e32 [[QUIET:v[0-9]+]], [[VAL]], [[VAL]]
461 ; GCN: v_min_f32_e32 [[V:v[0-9]+]], 0, [[QUIET]]
466 ; GFX9: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
467 define amdgpu_kernel void @test_fold_canonicalize_minnum_value_from_load_f32_ieee_mode(float addrspace(1)* %arg) {
468 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
469 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
470 %load = load float, float addrspace(1)* %gep, align 4
471 %v = tail call float @llvm.minnum.f32(float %load, float 0.0)
472 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
473 store float %canonicalized, float addrspace(1)* %gep, align 4
477 ; GCN-LABEL: test_fold_canonicalize_minnum_value_from_load_f32_nnan_ieee_mode:
478 ; VI-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
479 ; GCN-DENORM-NOT: v_max
480 ; GCN-DENORM-NOT: v_mul
482 ; GCN: v_min_f32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}}
483 ; GCN-DENORM-NOT: v_max
484 ; GCN-DENORM-NOT: v_mul
486 ; GFX9: {{flat|global}}_store_dword v[{{[0-9:]+}}]
487 define amdgpu_kernel void @test_fold_canonicalize_minnum_value_from_load_f32_nnan_ieee_mode(float addrspace(1)* %arg) #1 {
488 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
489 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
490 %load = load float, float addrspace(1)* %gep, align 4
491 %v = tail call float @llvm.minnum.f32(float %load, float 0.0)
492 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
493 store float %canonicalized, float addrspace(1)* %gep, align 4
497 ; GCN-LABEL: test_fold_canonicalize_minnum_value_f32:
498 ; GCN: v_min_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
501 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
502 define amdgpu_kernel void @test_fold_canonicalize_minnum_value_f32(float addrspace(1)* %arg) {
503 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
504 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
505 %load = load float, float addrspace(1)* %gep, align 4
506 %v0 = fadd float %load, 0.0
507 %v = tail call float @llvm.minnum.f32(float %v0, float 0.0)
508 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
509 store float %canonicalized, float addrspace(1)* %gep, align 4
513 ; FIXME: Should there be more checks here? minnum with NaN operand is simplified away.
515 ; GCN-LABEL: test_fold_canonicalize_sNaN_value_f32:
516 ; GCN: {{flat|global}}_load_dword [[LOAD:v[0-9]+]]
517 ; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[LOAD]]
518 ; GCN-DENORM: v_max_f32_e32 v{{[0-9]+}}, [[LOAD]], [[LOAD]]
519 define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(float addrspace(1)* %arg) {
520 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
521 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
522 %load = load float, float addrspace(1)* %gep, align 4
523 %v = tail call float @llvm.minnum.f32(float %load, float bitcast (i32 2139095041 to float))
524 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
525 store float %canonicalized, float addrspace(1)* %gep, align 4
529 ; GCN-LABEL: test_fold_canonicalize_denorm_value_f32:
530 ; GCN: {{flat|global}}_load_dword [[VAL:v[0-9]+]]
532 ; GFX9-DENORM: v_max_f32_e32 [[QUIET:v[0-9]+]], [[VAL]], [[VAL]]
533 ; GFX9-DENORM: v_min_f32_e32 [[RESULT:v[0-9]+]], 0x7fffff, [[QUIET]]
535 ; GFX9-FLUSH: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[VAL]]
536 ; GFX9-FLUSH: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET]]
539 ; VI-FLUSH: v_mul_f32_e32 [[QUIET_V0:v[0-9]+]], 1.0, [[VAL]]
540 ; VI-FLUSH: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET_V0]]
542 ; VI-DENORM: v_min_f32_e32 [[RESULT:v[0-9]+]], 0x7fffff, [[VAL]]
546 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[RESULT]]
547 define amdgpu_kernel void @test_fold_canonicalize_denorm_value_f32(float addrspace(1)* %arg) {
548 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
549 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
550 %load = load float, float addrspace(1)* %gep, align 4
551 %v = tail call float @llvm.minnum.f32(float %load, float bitcast (i32 8388607 to float))
552 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
553 store float %canonicalized, float addrspace(1)* %gep, align 4
557 ; GCN-LABEL: test_fold_canonicalize_maxnum_value_from_load_f32_ieee_mode:
558 ; GCN: {{flat|global}}_load_dword [[VAL:v[0-9]+]]
560 ; GFX9: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[VAL]]
562 ; VI-FLUSH: v_mul_f32_e32 [[QUIET:v[0-9]+]], 1.0, [[VAL]]
563 ; VI-FLUSH: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[QUIET]]
565 ; VI-DENORM: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[VAL]]
569 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[RESULT]]
570 define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_from_load_f32_ieee_mode(float addrspace(1)* %arg) {
571 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
572 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
573 %load = load float, float addrspace(1)* %gep, align 4
574 %v = tail call float @llvm.maxnum.f32(float %load, float 0.0)
575 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
576 store float %canonicalized, float addrspace(1)* %gep, align 4
580 ; GCN-LABEL: test_fold_canonicalize_maxnum_value_f32:
581 ; GCN: v_max_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
584 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]]
585 define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f32(float addrspace(1)* %arg) {
586 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
587 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
588 %load = load float, float addrspace(1)* %gep, align 4
589 %v0 = fadd float %load, 0.0
590 %v = tail call float @llvm.maxnum.f32(float %v0, float 0.0)
591 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
592 store float %canonicalized, float addrspace(1)* %gep, align 4
596 ; GCN-LABEL: test_fold_canonicalize_maxnum_value_f64:
597 ; GCN: v_max_f64 [[V:v\[[0-9]+:[0-9]+\]]], v[{{[0-9:]+}}], 0
600 ; GCN: {{flat|global}}_store_dwordx2 v[{{[0-9:]+}}], [[V]]
601 define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f64(double addrspace(1)* %arg) {
602 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
603 %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
604 %load = load double, double addrspace(1)* %gep, align 8
605 %v0 = fadd double %load, 0.0
606 %v = tail call double @llvm.maxnum.f64(double %v0, double 0.0)
607 %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
608 store double %canonicalized, double addrspace(1)* %gep, align 8
612 ; GCN-LABEL: test_fold_canonicalize_fmul_value_f32_no_ieee:
613 ; GCN: v_mul_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
617 define amdgpu_ps float @test_fold_canonicalize_fmul_value_f32_no_ieee(float %arg) {
619 %v = fmul float %arg, 15.0
620 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
621 ret float %canonicalized
624 ; GCN-LABEL: test_fold_canonicalize_fmul_nnan_value_f32_no_ieee:
625 ; GCN: v_mul_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}}
629 define amdgpu_ps float @test_fold_canonicalize_fmul_nnan_value_f32_no_ieee(float %arg) {
631 %v = fmul nnan float %arg, 15.0
632 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
633 ret float %canonicalized
636 ; GCN-LABEL: {{^}}test_fold_canonicalize_fdiv_value_f32_no_ieee:
637 ; GCN: v_div_fixup_f32
641 define amdgpu_ps float @test_fold_canonicalize_fdiv_value_f32_no_ieee(float %arg0) {
643 %v = fdiv float 15.0, %arg0
644 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
645 ret float %canonicalized
648 ; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f32
649 ; GFX9-DENORM: global_load_dword [[V:v[0-9]+]],
650 ; GFX9-DENORM: global_store_dword v[{{[0-9:]+}}], [[V]]
651 ; GFX9-DENORM-NOT: 1.0
652 ; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
653 define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f32(float addrspace(1)* %arg, float addrspace(1)* %out) #1 {
654 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
655 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
656 %v = load float, float addrspace(1)* %gep, align 4
657 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
658 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id
659 store float %canonicalized, float addrspace(1)* %gep2, align 4
663 ; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f64
664 ; GCN: {{flat|global}}_load_dwordx2 [[V:v\[[0-9:]+\]]],
665 ; GCN: {{flat|global}}_store_dwordx2 v[{{[0-9:]+}}], [[V]]
668 define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f64(double addrspace(1)* %arg, double addrspace(1)* %out) #1 {
669 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
670 %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id
671 %v = load double, double addrspace(1)* %gep, align 8
672 %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
673 %gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id
674 store double %canonicalized, double addrspace(1)* %gep2, align 8
678 ; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f16
679 ; GCN: {{flat|global}}_load_ushort [[V:v[0-9]+]],
682 ; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[V]]
683 define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f16(half addrspace(1)* %arg, half addrspace(1)* %out) #1 {
684 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
685 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id
686 %v = load half, half addrspace(1)* %gep, align 2
687 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
688 %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id
689 store half %canonicalized, half addrspace(1)* %gep2, align 2
693 ; GCN-LABEL: {{^}}test_fold_canonicalize_select_value_f32:
699 define amdgpu_kernel void @test_fold_canonicalize_select_value_f32(float addrspace(1)* %arg) {
700 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
701 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
702 %load0 = load volatile float, float addrspace(1)* %gep, align 4
703 %load1 = load volatile float, float addrspace(1)* %gep, align 4
704 %load2 = load volatile i32, i32 addrspace(1)* undef, align 4
705 %v0 = fadd float %load0, 15.0
706 %v1 = fadd float %load1, 32.0
707 %cond = icmp eq i32 %load2, 0
708 %select = select i1 %cond, float %v0, float %v1
709 %canonicalized = tail call float @llvm.canonicalize.f32(float %select)
710 store float %canonicalized, float addrspace(1)* %gep, align 4
714 ; Need to quiet the nan with a separate instruction since it will be
715 ; passed through the minnum.
716 ; FIXME: canonicalize doens't work correctly without ieee_mode
718 ; GCN-LABEL: {{^}}test_fold_canonicalize_minnum_value_no_ieee_mode:
721 ; GFX9: v_min_f32_e32 v0, v0, v1
722 ; GFX9-NEXT: ; return to shader
724 ; VI-FLUSH: v_min_f32_e32 v0, v0, v1
725 ; VI-FLUSH-NEXT: v_mul_f32_e32 v0, 1.0, v0
726 ; VI-FLUSH-NEXT: ; return
729 ; VI-DENORM: v_min_f32_e32 v0, v0, v1
730 ; VI-DENORM-NEXT: ; return
731 define amdgpu_ps float @test_fold_canonicalize_minnum_value_no_ieee_mode(float %arg0, float %arg1) {
732 %v = tail call float @llvm.minnum.f32(float %arg0, float %arg1)
733 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
734 ret float %canonicalized
737 ; GCN-LABEL: {{^}}test_fold_canonicalize_minnum_value_ieee_mode:
738 ; GFX9: v_min_f32_e32 v0, v0, v1
739 ; GFX9-NEXT: s_setpc_b64
741 ; VI-FLUSH-DAG: v_mul_f32_e32 v0, 1.0, v0
742 ; VI-FLUSH-DAG: v_mul_f32_e32 v1, 1.0, v1
743 ; VI-FLUSH: v_min_f32_e32 v0, v0, v1
745 ; VI-DENORM-DAG: v_max_f32_e32 v0, v0, v0
746 ; VI-DENORM-DAG: v_max_f32_e32 v1, v1, v1
747 ; VI-DENORM: v_min_f32_e32 v0, v0, v1
749 ; VI-NEXT: s_setpc_b64
750 define float @test_fold_canonicalize_minnum_value_ieee_mode(float %arg0, float %arg1) {
751 %v = tail call float @llvm.minnum.f32(float %arg0, float %arg1)
752 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
753 ret float %canonicalized
756 ; Canonicalizing flush necessary pre-gfx9
757 ; GCN-LABEL: {{^}}test_fold_canonicalize_minnum_value_no_ieee_mode_nnan:
758 ; GCN: v_min_f32_e32 v0, v0, v1
759 ; VI-FLUSH-NEXT: v_mul_f32_e32 v0, 1.0, v0
761 define amdgpu_ps float @test_fold_canonicalize_minnum_value_no_ieee_mode_nnan(float %arg0, float %arg1) #1 {
762 %v = tail call float @llvm.minnum.f32(float %arg0, float %arg1)
763 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
764 ret float %canonicalized
767 ; GCN-LABEL: {{^}}v_test_canonicalize_build_vector_v2f16:
768 ; GFX9-DAG: v_add_f16_e32
769 ; GFX9-DAG: v_mul_f16_e32
772 define <2 x half> @v_test_canonicalize_build_vector_v2f16(<2 x half> %vec) {
773 %lo = extractelement <2 x half> %vec, i32 0
774 %hi = extractelement <2 x half> %vec, i32 1
775 %lo.op = fadd half %lo, 1.0
776 %hi.op = fmul half %lo, 4.0
777 %ins0 = insertelement <2 x half> undef, half %lo.op, i32 0
778 %ins1 = insertelement <2 x half> %ins0, half %hi.op, i32 1
779 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins1)
780 ret <2 x half> %canonicalized
783 ; GCN-LABEL: {{^}}v_test_canonicalize_build_vector_noncanon1_v2f16:
784 ; GFX9: v_add_f16_e32
786 define <2 x half> @v_test_canonicalize_build_vector_noncanon1_v2f16(<2 x half> %vec) {
787 %lo = extractelement <2 x half> %vec, i32 0
788 %lo.op = fadd half %lo, 1.0
789 %ins = insertelement <2 x half> %vec, half %lo.op, i32 0
790 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins)
791 ret <2 x half> %canonicalized
794 ; GCN-LABEL: {{^}}v_test_canonicalize_build_vector_noncanon0_v2f16:
795 ; GFX9: v_add_f16_sdwa
797 define <2 x half> @v_test_canonicalize_build_vector_noncanon0_v2f16(<2 x half> %vec) {
798 %hi = extractelement <2 x half> %vec, i32 1
799 %hi.op = fadd half %hi, 1.0
800 %ins = insertelement <2 x half> %vec, half %hi.op, i32 1
801 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins)
802 ret <2 x half> %canonicalized
805 ; GCN-LABEL: {{^}}v_test_canonicalize_extract_element_v2f16:
807 ; GFX9-NEXT: v_mul_f16_e32 v0, 4.0, v0
808 ; GFX9-NEXT: s_setpc_b64
809 define half @v_test_canonicalize_extract_element_v2f16(<2 x half> %vec) {
810 %vec.op = fmul <2 x half> %vec, <half 4.0, half 4.0>
811 %elt = extractelement <2 x half> %vec.op, i32 0
812 %canonicalized = call half @llvm.canonicalize.f16(half %elt)
813 ret half %canonicalized
816 ; GCN-LABEL: {{^}}v_test_canonicalize_insertelement_v2f16:
817 ; GFX9: v_mul_f16_e32
821 define <2 x half> @v_test_canonicalize_insertelement_v2f16(<2 x half> %vec, half %val, i32 %idx) {
822 %vec.op = fmul <2 x half> %vec, <half 4.0, half 4.0>
823 %ins.op = fmul half %val, 8.0
824 %ins = insertelement <2 x half> %vec.op, half %ins.op, i32 %idx
825 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins)
826 ret <2 x half> %canonicalized
829 ; GCN-LABEL: {{^}}v_test_canonicalize_insertelement_noncanon_vec_v2f16:
831 ; GFX9: v_pk_max_f16 v0, v0, v0
832 ; GFX9-NEXT: s_setpc_b64
833 define <2 x half> @v_test_canonicalize_insertelement_noncanon_vec_v2f16(<2 x half> %vec, half %val, i32 %idx) {
834 %ins.op = fmul half %val, 8.0
835 %ins = insertelement <2 x half> %vec, half %ins.op, i32 %idx
836 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins)
837 ret <2 x half> %canonicalized
840 ; GCN-LABEL: {{^}}v_test_canonicalize_insertelement_noncanon_insval_v2f16:
842 ; GFX9: v_pk_max_f16 v0, v0, v0
843 ; GFX9-NEXT: s_setpc_b64
844 define <2 x half> @v_test_canonicalize_insertelement_noncanon_insval_v2f16(<2 x half> %vec, half %val, i32 %idx) {
845 %vec.op = fmul <2 x half> %vec, <half 4.0, half 4.0>
846 %ins = insertelement <2 x half> %vec.op, half %val, i32 %idx
847 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins)
848 ret <2 x half> %canonicalized
851 ; GCN-LABEL: {{^}}v_test_canonicalize_cvt_pkrtz:
853 ; GCN-NEXT: v_cvt_pkrtz_f16_f32 v0, v0, v1
854 ; GCN-NEXT: s_setpc_b64
855 define <2 x half> @v_test_canonicalize_cvt_pkrtz(float %a, float %b) {
856 %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %a, float %b)
857 %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %cvt)
858 ret <2 x half> %canonicalized
861 ; GCN-LABEL: {{^}}v_test_canonicalize_cubeid:
863 ; GCN-NEXT: v_cubeid_f32 v0, v0, v1, v2
864 ; GCN-NEXT: s_setpc_b64
865 define float @v_test_canonicalize_cubeid(float %a, float %b, float %c) {
866 %cvt = call float @llvm.amdgcn.cubeid(float %a, float %b, float %c)
867 %canonicalized = call float @llvm.canonicalize.f32(float %cvt)
868 ret float %canonicalized
871 ; GCN-LABEL: {{^}}v_test_canonicalize_frexp_mant:
873 ; GCN-NEXT: v_frexp_mant_f32_e32 v0, v0
874 ; GCN-NEXT: s_setpc_b64
875 define float @v_test_canonicalize_frexp_mant(float %a) {
876 %cvt = call float @llvm.amdgcn.frexp.mant.f32(float %a)
877 %canonicalized = call float @llvm.canonicalize.f32(float %cvt)
878 ret float %canonicalized
881 ; Avoid failing the test on FreeBSD11.0 which will match the GCN-NOT: 1.0
882 ; in the .amd_amdgpu_isa "amdgcn-unknown-freebsd11.0--gfx802" directive
883 ; CHECK: .amd_amdgpu_isa
885 declare float @llvm.canonicalize.f32(float) #0
886 declare float @llvm.copysign.f32(float, float) #0
887 declare float @llvm.amdgcn.fmul.legacy(float, float) #0
888 declare float @llvm.amdgcn.fmad.ftz.f32(float, float, float) #0
889 declare double @llvm.canonicalize.f64(double) #0
890 declare half @llvm.canonicalize.f16(half) #0
891 declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
892 declare i32 @llvm.amdgcn.workitem.id.x() #0
893 declare float @llvm.sqrt.f32(float) #0
894 declare float @llvm.ceil.f32(float) #0
895 declare float @llvm.floor.f32(float) #0
896 declare float @llvm.fma.f32(float, float, float) #0
897 declare float @llvm.fmuladd.f32(float, float, float) #0
898 declare float @llvm.fabs.f32(float) #0
899 declare float @llvm.sin.f32(float) #0
900 declare float @llvm.cos.f32(float) #0
901 declare half @llvm.sin.f16(half) #0
902 declare half @llvm.cos.f16(half) #0
903 declare float @llvm.minnum.f32(float, float) #0
904 declare float @llvm.maxnum.f32(float, float) #0
905 declare double @llvm.maxnum.f64(double, double) #0
906 declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #0
907 declare float @llvm.amdgcn.cubeid(float, float, float) #0
908 declare float @llvm.amdgcn.frexp.mant.f32(float) #0
910 attributes #0 = { nounwind readnone }
911 attributes #1 = { "no-nans-fp-math"="true" }
912 attributes #2 = { "target-features"="-fp64-fp16-denormals" }