1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX678,GFX6 %s
3 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX678,GFX8 %s
4 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
5 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
7 declare float @llvm.fabs.f32(float) #0
8 declare float @llvm.canonicalize.f32(float) #0
9 declare <2 x float> @llvm.canonicalize.v2f32(<2 x float>) #0
10 declare <3 x float> @llvm.canonicalize.v3f32(<3 x float>) #0
11 declare <4 x float> @llvm.canonicalize.v4f32(<4 x float>) #0
12 declare <8 x float> @llvm.canonicalize.v8f32(<8 x float>) #0
13 declare double @llvm.fabs.f64(double) #0
14 declare double @llvm.canonicalize.f64(double) #0
15 declare <2 x double> @llvm.canonicalize.v2f64(<2 x double>) #0
16 declare <3 x double> @llvm.canonicalize.v3f64(<3 x double>) #0
17 declare <4 x double> @llvm.canonicalize.v4f64(<4 x double>) #0
18 declare half @llvm.canonicalize.f16(half) #0
19 declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
20 declare i32 @llvm.amdgcn.workitem.id.x() #0
22 define amdgpu_kernel void @v_test_canonicalize_var_f32(ptr addrspace(1) %out) #1 {
23 ; GFX678-LABEL: v_test_canonicalize_var_f32:
25 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
26 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
27 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
28 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
29 ; GFX678-NEXT: flat_load_dword v2, v[0:1]
30 ; GFX678-NEXT: s_waitcnt vmcnt(0)
31 ; GFX678-NEXT: v_mul_f32_e32 v2, 1.0, v2
32 ; GFX678-NEXT: flat_store_dword v[0:1], v2
33 ; GFX678-NEXT: s_endpgm
35 ; GFX9-LABEL: v_test_canonicalize_var_f32:
37 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
38 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
39 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
40 ; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
41 ; GFX9-NEXT: s_waitcnt vmcnt(0)
42 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
43 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
46 ; GFX11-LABEL: v_test_canonicalize_var_f32:
48 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
49 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
50 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
51 ; GFX11-NEXT: global_load_b32 v1, v0, s[0:1]
52 ; GFX11-NEXT: s_waitcnt vmcnt(0)
53 ; GFX11-NEXT: v_max_f32_e32 v1, v1, v1
54 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
56 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
57 ; GFX11-NEXT: s_endpgm
58 %val = load float, ptr addrspace(1) %out
59 %canonicalized = call float @llvm.canonicalize.f32(float %val)
60 store float %canonicalized, ptr addrspace(1) %out
64 define amdgpu_kernel void @s_test_canonicalize_var_f32(ptr addrspace(1) %out, float %val) #1 {
65 ; GFX6-LABEL: s_test_canonicalize_var_f32:
67 ; GFX6-NEXT: s_load_dword s2, s[4:5], 0x2
68 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
69 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
70 ; GFX6-NEXT: v_mul_f32_e64 v2, 1.0, s2
71 ; GFX6-NEXT: v_mov_b32_e32 v0, s0
72 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
73 ; GFX6-NEXT: flat_store_dword v[0:1], v2
76 ; GFX8-LABEL: s_test_canonicalize_var_f32:
78 ; GFX8-NEXT: s_load_dword s2, s[4:5], 0x8
79 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
80 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
81 ; GFX8-NEXT: v_mul_f32_e64 v2, 1.0, s2
82 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
83 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
84 ; GFX8-NEXT: flat_store_dword v[0:1], v2
87 ; GFX9-LABEL: s_test_canonicalize_var_f32:
89 ; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8
90 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
91 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
92 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
93 ; GFX9-NEXT: v_max_f32_e64 v1, s2, s2
94 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
97 ; GFX11-LABEL: s_test_canonicalize_var_f32:
99 ; GFX11-NEXT: s_clause 0x1
100 ; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x8
101 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
102 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
103 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
104 ; GFX11-NEXT: v_max_f32_e64 v1, s2, s2
105 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
106 ; GFX11-NEXT: s_nop 0
107 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
108 ; GFX11-NEXT: s_endpgm
109 %canonicalized = call float @llvm.canonicalize.f32(float %val)
110 store float %canonicalized, ptr addrspace(1) %out
114 define amdgpu_kernel void @v_test_canonicalize_fabs_var_f32(ptr addrspace(1) %out) #1 {
115 ; GFX678-LABEL: v_test_canonicalize_fabs_var_f32:
117 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
118 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
119 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
120 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
121 ; GFX678-NEXT: flat_load_dword v2, v[0:1]
122 ; GFX678-NEXT: s_waitcnt vmcnt(0)
123 ; GFX678-NEXT: v_mul_f32_e64 v2, 1.0, |v2|
124 ; GFX678-NEXT: flat_store_dword v[0:1], v2
125 ; GFX678-NEXT: s_endpgm
127 ; GFX9-LABEL: v_test_canonicalize_fabs_var_f32:
129 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
130 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
131 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
132 ; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
133 ; GFX9-NEXT: s_waitcnt vmcnt(0)
134 ; GFX9-NEXT: v_max_f32_e64 v1, |v1|, |v1|
135 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
136 ; GFX9-NEXT: s_endpgm
138 ; GFX11-LABEL: v_test_canonicalize_fabs_var_f32:
140 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
141 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
142 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
143 ; GFX11-NEXT: global_load_b32 v1, v0, s[0:1]
144 ; GFX11-NEXT: s_waitcnt vmcnt(0)
145 ; GFX11-NEXT: v_max_f32_e64 v1, |v1|, |v1|
146 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
147 ; GFX11-NEXT: s_nop 0
148 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
149 ; GFX11-NEXT: s_endpgm
150 %val = load float, ptr addrspace(1) %out
151 %val.fabs = call float @llvm.fabs.f32(float %val)
152 %canonicalized = call float @llvm.canonicalize.f32(float %val.fabs)
153 store float %canonicalized, ptr addrspace(1) %out
157 define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f32(ptr addrspace(1) %out) #1 {
158 ; GFX678-LABEL: v_test_canonicalize_fneg_fabs_var_f32:
160 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
161 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
162 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
163 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
164 ; GFX678-NEXT: flat_load_dword v2, v[0:1]
165 ; GFX678-NEXT: s_waitcnt vmcnt(0)
166 ; GFX678-NEXT: v_mul_f32_e64 v2, -1.0, |v2|
167 ; GFX678-NEXT: flat_store_dword v[0:1], v2
168 ; GFX678-NEXT: s_endpgm
170 ; GFX9-LABEL: v_test_canonicalize_fneg_fabs_var_f32:
172 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
173 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
174 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
175 ; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
176 ; GFX9-NEXT: s_waitcnt vmcnt(0)
177 ; GFX9-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
178 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
179 ; GFX9-NEXT: s_endpgm
181 ; GFX11-LABEL: v_test_canonicalize_fneg_fabs_var_f32:
183 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
184 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
185 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
186 ; GFX11-NEXT: global_load_b32 v1, v0, s[0:1]
187 ; GFX11-NEXT: s_waitcnt vmcnt(0)
188 ; GFX11-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
189 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
190 ; GFX11-NEXT: s_nop 0
191 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
192 ; GFX11-NEXT: s_endpgm
193 %val = load float, ptr addrspace(1) %out
194 %val.fabs = call float @llvm.fabs.f32(float %val)
195 %val.fabs.fneg = fneg float %val.fabs
196 %canonicalized = call float @llvm.canonicalize.f32(float %val.fabs.fneg)
197 store float %canonicalized, ptr addrspace(1) %out
201 define amdgpu_kernel void @v_test_canonicalize_fneg_var_f32(ptr addrspace(1) %out) #1 {
202 ; GFX678-LABEL: v_test_canonicalize_fneg_var_f32:
204 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
205 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
206 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
207 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
208 ; GFX678-NEXT: flat_load_dword v2, v[0:1]
209 ; GFX678-NEXT: s_waitcnt vmcnt(0)
210 ; GFX678-NEXT: v_mul_f32_e32 v2, -1.0, v2
211 ; GFX678-NEXT: flat_store_dword v[0:1], v2
212 ; GFX678-NEXT: s_endpgm
214 ; GFX9-LABEL: v_test_canonicalize_fneg_var_f32:
216 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
217 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
218 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
219 ; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
220 ; GFX9-NEXT: s_waitcnt vmcnt(0)
221 ; GFX9-NEXT: v_max_f32_e64 v1, -v1, -v1
222 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
223 ; GFX9-NEXT: s_endpgm
225 ; GFX11-LABEL: v_test_canonicalize_fneg_var_f32:
227 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
228 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
229 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
230 ; GFX11-NEXT: global_load_b32 v1, v0, s[0:1]
231 ; GFX11-NEXT: s_waitcnt vmcnt(0)
232 ; GFX11-NEXT: v_max_f32_e64 v1, -v1, -v1
233 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
234 ; GFX11-NEXT: s_nop 0
235 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
236 ; GFX11-NEXT: s_endpgm
237 %val = load float, ptr addrspace(1) %out
238 %val.fneg = fneg float %val
239 %canonicalized = call float @llvm.canonicalize.f32(float %val.fneg)
240 store float %canonicalized, ptr addrspace(1) %out
244 define amdgpu_kernel void @test_fold_canonicalize_undef_f32(ptr addrspace(1) %out) #1 {
245 ; GFX678-LABEL: test_fold_canonicalize_undef_f32:
247 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
248 ; GFX678-NEXT: v_mov_b32_e32 v2, 0
249 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
250 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
251 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
252 ; GFX678-NEXT: flat_store_dword v[0:1], v2
253 ; GFX678-NEXT: s_endpgm
255 ; GFX9-LABEL: test_fold_canonicalize_undef_f32:
257 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
258 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
259 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
260 ; GFX9-NEXT: global_store_dword v0, v0, s[0:1]
261 ; GFX9-NEXT: s_endpgm
263 ; GFX11-LABEL: test_fold_canonicalize_undef_f32:
265 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
266 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
267 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
268 ; GFX11-NEXT: global_store_b32 v0, v0, s[0:1]
269 ; GFX11-NEXT: s_nop 0
270 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
271 ; GFX11-NEXT: s_endpgm
272 %canonicalized = call float @llvm.canonicalize.f32(float undef)
273 store float %canonicalized, ptr addrspace(1) %out
277 define amdgpu_kernel void @test_fold_canonicalize_p0_f32(ptr addrspace(1) %out) #1 {
278 ; GFX678-LABEL: test_fold_canonicalize_p0_f32:
280 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
281 ; GFX678-NEXT: v_mov_b32_e32 v2, 0
282 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
283 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
284 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
285 ; GFX678-NEXT: flat_store_dword v[0:1], v2
286 ; GFX678-NEXT: s_endpgm
288 ; GFX9-LABEL: test_fold_canonicalize_p0_f32:
290 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
291 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
292 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
293 ; GFX9-NEXT: global_store_dword v0, v0, s[0:1]
294 ; GFX9-NEXT: s_endpgm
296 ; GFX11-LABEL: test_fold_canonicalize_p0_f32:
298 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
299 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
300 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
301 ; GFX11-NEXT: global_store_b32 v0, v0, s[0:1]
302 ; GFX11-NEXT: s_nop 0
303 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
304 ; GFX11-NEXT: s_endpgm
305 %canonicalized = call float @llvm.canonicalize.f32(float 0.0)
306 store float %canonicalized, ptr addrspace(1) %out
310 define amdgpu_kernel void @test_fold_canonicalize_n0_f32(ptr addrspace(1) %out) #1 {
311 ; GFX678-LABEL: test_fold_canonicalize_n0_f32:
313 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
314 ; GFX678-NEXT: v_bfrev_b32_e32 v2, 1
315 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
316 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
317 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
318 ; GFX678-NEXT: flat_store_dword v[0:1], v2
319 ; GFX678-NEXT: s_endpgm
321 ; GFX9-LABEL: test_fold_canonicalize_n0_f32:
323 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
324 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
325 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
326 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
327 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
328 ; GFX9-NEXT: s_endpgm
330 ; GFX11-LABEL: test_fold_canonicalize_n0_f32:
332 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
333 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
334 ; GFX11-NEXT: v_bfrev_b32_e32 v1, 1
335 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
336 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
337 ; GFX11-NEXT: s_nop 0
338 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
339 ; GFX11-NEXT: s_endpgm
340 %canonicalized = call float @llvm.canonicalize.f32(float -0.0)
341 store float %canonicalized, ptr addrspace(1) %out
345 define amdgpu_kernel void @test_fold_canonicalize_p1_f32(ptr addrspace(1) %out) #1 {
346 ; GFX678-LABEL: test_fold_canonicalize_p1_f32:
348 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
349 ; GFX678-NEXT: v_mov_b32_e32 v2, 1.0
350 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
351 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
352 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
353 ; GFX678-NEXT: flat_store_dword v[0:1], v2
354 ; GFX678-NEXT: s_endpgm
356 ; GFX9-LABEL: test_fold_canonicalize_p1_f32:
358 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
359 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
360 ; GFX9-NEXT: v_mov_b32_e32 v1, 1.0
361 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
362 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
363 ; GFX9-NEXT: s_endpgm
365 ; GFX11-LABEL: test_fold_canonicalize_p1_f32:
367 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
368 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 1.0
369 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
370 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
371 ; GFX11-NEXT: s_nop 0
372 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
373 ; GFX11-NEXT: s_endpgm
374 %canonicalized = call float @llvm.canonicalize.f32(float 1.0)
375 store float %canonicalized, ptr addrspace(1) %out
379 define amdgpu_kernel void @test_fold_canonicalize_n1_f32(ptr addrspace(1) %out) #1 {
380 ; GFX678-LABEL: test_fold_canonicalize_n1_f32:
382 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
383 ; GFX678-NEXT: v_mov_b32_e32 v2, -1.0
384 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
385 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
386 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
387 ; GFX678-NEXT: flat_store_dword v[0:1], v2
388 ; GFX678-NEXT: s_endpgm
390 ; GFX9-LABEL: test_fold_canonicalize_n1_f32:
392 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
393 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
394 ; GFX9-NEXT: v_mov_b32_e32 v1, -1.0
395 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
396 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
397 ; GFX9-NEXT: s_endpgm
399 ; GFX11-LABEL: test_fold_canonicalize_n1_f32:
401 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
402 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, -1.0
403 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
404 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
405 ; GFX11-NEXT: s_nop 0
406 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
407 ; GFX11-NEXT: s_endpgm
408 %canonicalized = call float @llvm.canonicalize.f32(float -1.0)
409 store float %canonicalized, ptr addrspace(1) %out
413 define amdgpu_kernel void @test_fold_canonicalize_literal_f32(ptr addrspace(1) %out) #1 {
414 ; GFX678-LABEL: test_fold_canonicalize_literal_f32:
416 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
417 ; GFX678-NEXT: v_mov_b32_e32 v2, 0x41800000
418 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
419 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
420 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
421 ; GFX678-NEXT: flat_store_dword v[0:1], v2
422 ; GFX678-NEXT: s_endpgm
424 ; GFX9-LABEL: test_fold_canonicalize_literal_f32:
426 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
427 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
428 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x41800000
429 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
430 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
431 ; GFX9-NEXT: s_endpgm
433 ; GFX11-LABEL: test_fold_canonicalize_literal_f32:
435 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
436 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x41800000
437 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
438 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
439 ; GFX11-NEXT: s_nop 0
440 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
441 ; GFX11-NEXT: s_endpgm
442 %canonicalized = call float @llvm.canonicalize.f32(float 16.0)
443 store float %canonicalized, ptr addrspace(1) %out
447 define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32(ptr addrspace(1) %out) #1 {
448 ; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32:
450 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
451 ; GFX678-NEXT: v_mov_b32_e32 v2, 0
452 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
453 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
454 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
455 ; GFX678-NEXT: flat_store_dword v[0:1], v2
456 ; GFX678-NEXT: s_endpgm
458 ; GFX9-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32:
460 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
461 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
462 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
463 ; GFX9-NEXT: global_store_dword v0, v0, s[0:1]
464 ; GFX9-NEXT: s_endpgm
466 ; GFX11-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32:
468 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
469 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
470 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
471 ; GFX11-NEXT: global_store_b32 v0, v0, s[0:1]
472 ; GFX11-NEXT: s_nop 0
473 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
474 ; GFX11-NEXT: s_endpgm
475 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
476 store float %canonicalized, ptr addrspace(1) %out
480 define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32_dynamic(ptr addrspace(1) %out) #5 {
481 ; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic:
483 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
484 ; GFX678-NEXT: s_mov_b32 s2, 0x7fffff
485 ; GFX678-NEXT: v_mul_f32_e64 v2, 1.0, s2
486 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
487 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
488 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
489 ; GFX678-NEXT: flat_store_dword v[0:1], v2
490 ; GFX678-NEXT: s_endpgm
492 ; GFX9-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic:
494 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
495 ; GFX9-NEXT: s_mov_b32 s2, 0x7fffff
496 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
497 ; GFX9-NEXT: v_max_f32_e64 v1, s2, s2
498 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
499 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
500 ; GFX9-NEXT: s_endpgm
502 ; GFX11-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic:
504 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
505 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
506 ; GFX11-NEXT: v_max_f32_e64 v1, 0x7fffff, 0x7fffff
507 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
508 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
509 ; GFX11-NEXT: s_nop 0
510 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
511 ; GFX11-NEXT: s_endpgm
512 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
513 store float %canonicalized, ptr addrspace(1) %out
517 define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_out(ptr addrspace(1) %out) #6 {
518 ; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_out:
520 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
521 ; GFX678-NEXT: s_mov_b32 s2, 0x7fffff
522 ; GFX678-NEXT: v_mul_f32_e64 v2, 1.0, s2
523 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
524 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
525 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
526 ; GFX678-NEXT: flat_store_dword v[0:1], v2
527 ; GFX678-NEXT: s_endpgm
529 ; GFX9-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_out:
531 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
532 ; GFX9-NEXT: s_mov_b32 s2, 0x7fffff
533 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
534 ; GFX9-NEXT: v_max_f32_e64 v1, s2, s2
535 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
536 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
537 ; GFX9-NEXT: s_endpgm
539 ; GFX11-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_out:
541 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
542 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
543 ; GFX11-NEXT: v_max_f32_e64 v1, 0x7fffff, 0x7fffff
544 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
545 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
546 ; GFX11-NEXT: s_nop 0
547 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
548 ; GFX11-NEXT: s_endpgm
549 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
550 store float %canonicalized, ptr addrspace(1) %out
554 define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_in(ptr addrspace(1) %out) #7 {
555 ; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_in:
557 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
558 ; GFX678-NEXT: s_mov_b32 s2, 0x7fffff
559 ; GFX678-NEXT: v_mul_f32_e64 v2, 1.0, s2
560 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
561 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
562 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
563 ; GFX678-NEXT: flat_store_dword v[0:1], v2
564 ; GFX678-NEXT: s_endpgm
566 ; GFX9-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_in:
568 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
569 ; GFX9-NEXT: s_mov_b32 s2, 0x7fffff
570 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
571 ; GFX9-NEXT: v_max_f32_e64 v1, s2, s2
572 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
573 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
574 ; GFX9-NEXT: s_endpgm
576 ; GFX11-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_in:
578 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
579 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
580 ; GFX11-NEXT: v_max_f32_e64 v1, 0x7fffff, 0x7fffff
581 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
582 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
583 ; GFX11-NEXT: s_nop 0
584 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
585 ; GFX11-NEXT: s_endpgm
586 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
587 store float %canonicalized, ptr addrspace(1) %out
591 define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f32(ptr addrspace(1) %out) #3 {
592 ; GFX678-LABEL: test_denormals_fold_canonicalize_denormal0_f32:
594 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
595 ; GFX678-NEXT: v_mov_b32_e32 v2, 0x7fffff
596 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
597 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
598 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
599 ; GFX678-NEXT: flat_store_dword v[0:1], v2
600 ; GFX678-NEXT: s_endpgm
602 ; GFX9-LABEL: test_denormals_fold_canonicalize_denormal0_f32:
604 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
605 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
606 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7fffff
607 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
608 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
609 ; GFX9-NEXT: s_endpgm
611 ; GFX11-LABEL: test_denormals_fold_canonicalize_denormal0_f32:
613 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
614 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fffff
615 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
616 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
617 ; GFX11-NEXT: s_nop 0
618 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
619 ; GFX11-NEXT: s_endpgm
620 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
621 store float %canonicalized, ptr addrspace(1) %out
625 define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f32(ptr addrspace(1) %out) #1 {
626 ; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal1_f32:
628 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
629 ; GFX678-NEXT: v_bfrev_b32_e32 v2, 1
630 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
631 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
632 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
633 ; GFX678-NEXT: flat_store_dword v[0:1], v2
634 ; GFX678-NEXT: s_endpgm
636 ; GFX9-LABEL: test_no_denormals_fold_canonicalize_denormal1_f32:
638 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
639 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
640 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
641 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
642 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
643 ; GFX9-NEXT: s_endpgm
645 ; GFX11-LABEL: test_no_denormals_fold_canonicalize_denormal1_f32:
647 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
648 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
649 ; GFX11-NEXT: v_bfrev_b32_e32 v1, 1
650 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
651 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
652 ; GFX11-NEXT: s_nop 0
653 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
654 ; GFX11-NEXT: s_endpgm
655 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
656 store float %canonicalized, ptr addrspace(1) %out
660 define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f32(ptr addrspace(1) %out) #3 {
661 ; GFX678-LABEL: test_denormals_fold_canonicalize_denormal1_f32:
663 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
664 ; GFX678-NEXT: v_mov_b32_e32 v2, 0x807fffff
665 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
666 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
667 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
668 ; GFX678-NEXT: flat_store_dword v[0:1], v2
669 ; GFX678-NEXT: s_endpgm
671 ; GFX9-LABEL: test_denormals_fold_canonicalize_denormal1_f32:
673 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
674 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
675 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x807fffff
676 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
677 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
678 ; GFX9-NEXT: s_endpgm
680 ; GFX11-LABEL: test_denormals_fold_canonicalize_denormal1_f32:
682 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
683 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x807fffff
684 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
685 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
686 ; GFX11-NEXT: s_nop 0
687 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
688 ; GFX11-NEXT: s_endpgm
689 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
690 store float %canonicalized, ptr addrspace(1) %out
694 define amdgpu_kernel void @test_fold_canonicalize_qnan_f32(ptr addrspace(1) %out) #1 {
695 ; GFX678-LABEL: test_fold_canonicalize_qnan_f32:
697 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
698 ; GFX678-NEXT: v_mov_b32_e32 v2, 0x7fc00000
699 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
700 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
701 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
702 ; GFX678-NEXT: flat_store_dword v[0:1], v2
703 ; GFX678-NEXT: s_endpgm
705 ; GFX9-LABEL: test_fold_canonicalize_qnan_f32:
707 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
708 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
709 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7fc00000
710 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
711 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
712 ; GFX9-NEXT: s_endpgm
714 ; GFX11-LABEL: test_fold_canonicalize_qnan_f32:
716 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
717 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
718 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
719 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
720 ; GFX11-NEXT: s_nop 0
721 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
722 ; GFX11-NEXT: s_endpgm
723 %canonicalized = call float @llvm.canonicalize.f32(float 0x7FF8000000000000)
724 store float %canonicalized, ptr addrspace(1) %out
728 define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f32(ptr addrspace(1) %out) #1 {
729 ; GFX678-LABEL: test_fold_canonicalize_qnan_value_neg1_f32:
731 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
732 ; GFX678-NEXT: v_mov_b32_e32 v2, 0x7fc00000
733 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
734 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
735 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
736 ; GFX678-NEXT: flat_store_dword v[0:1], v2
737 ; GFX678-NEXT: s_endpgm
739 ; GFX9-LABEL: test_fold_canonicalize_qnan_value_neg1_f32:
741 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
742 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
743 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7fc00000
744 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
745 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
746 ; GFX9-NEXT: s_endpgm
748 ; GFX11-LABEL: test_fold_canonicalize_qnan_value_neg1_f32:
750 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
751 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
752 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
753 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
754 ; GFX11-NEXT: s_nop 0
755 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
756 ; GFX11-NEXT: s_endpgm
757 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 -1 to float))
758 store float %canonicalized, ptr addrspace(1) %out
762 define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f32(ptr addrspace(1) %out) #1 {
763 ; GFX678-LABEL: test_fold_canonicalize_qnan_value_neg2_f32:
765 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
766 ; GFX678-NEXT: v_mov_b32_e32 v2, 0x7fc00000
767 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
768 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
769 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
770 ; GFX678-NEXT: flat_store_dword v[0:1], v2
771 ; GFX678-NEXT: s_endpgm
773 ; GFX9-LABEL: test_fold_canonicalize_qnan_value_neg2_f32:
775 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
776 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
777 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7fc00000
778 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
779 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
780 ; GFX9-NEXT: s_endpgm
782 ; GFX11-LABEL: test_fold_canonicalize_qnan_value_neg2_f32:
784 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
785 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
786 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
787 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
788 ; GFX11-NEXT: s_nop 0
789 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
790 ; GFX11-NEXT: s_endpgm
791 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 -2 to float))
792 store float %canonicalized, ptr addrspace(1) %out
796 define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f32(ptr addrspace(1) %out) #1 {
797 ; GFX678-LABEL: test_fold_canonicalize_snan0_value_f32:
799 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
800 ; GFX678-NEXT: v_mov_b32_e32 v2, 0x7fc00000
801 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
802 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
803 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
804 ; GFX678-NEXT: flat_store_dword v[0:1], v2
805 ; GFX678-NEXT: s_endpgm
807 ; GFX9-LABEL: test_fold_canonicalize_snan0_value_f32:
809 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
810 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
811 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7fc00000
812 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
813 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
814 ; GFX9-NEXT: s_endpgm
816 ; GFX11-LABEL: test_fold_canonicalize_snan0_value_f32:
818 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
819 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
820 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
821 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
822 ; GFX11-NEXT: s_nop 0
823 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
824 ; GFX11-NEXT: s_endpgm
825 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2139095041 to float))
826 store float %canonicalized, ptr addrspace(1) %out
830 define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f32(ptr addrspace(1) %out) #1 {
831 ; GFX678-LABEL: test_fold_canonicalize_snan1_value_f32:
833 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
834 ; GFX678-NEXT: v_mov_b32_e32 v2, 0x7fc00000
835 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
836 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
837 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
838 ; GFX678-NEXT: flat_store_dword v[0:1], v2
839 ; GFX678-NEXT: s_endpgm
841 ; GFX9-LABEL: test_fold_canonicalize_snan1_value_f32:
843 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
844 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
845 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7fc00000
846 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
847 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
848 ; GFX9-NEXT: s_endpgm
850 ; GFX11-LABEL: test_fold_canonicalize_snan1_value_f32:
852 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
853 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
854 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
855 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
856 ; GFX11-NEXT: s_nop 0
857 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
858 ; GFX11-NEXT: s_endpgm
859 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2143289343 to float))
860 store float %canonicalized, ptr addrspace(1) %out
864 define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f32(ptr addrspace(1) %out) #1 {
865 ; GFX678-LABEL: test_fold_canonicalize_snan2_value_f32:
867 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
868 ; GFX678-NEXT: v_mov_b32_e32 v2, 0x7fc00000
869 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
870 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
871 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
872 ; GFX678-NEXT: flat_store_dword v[0:1], v2
873 ; GFX678-NEXT: s_endpgm
875 ; GFX9-LABEL: test_fold_canonicalize_snan2_value_f32:
877 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
878 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
879 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7fc00000
880 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
881 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
882 ; GFX9-NEXT: s_endpgm
884 ; GFX11-LABEL: test_fold_canonicalize_snan2_value_f32:
886 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
887 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
888 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
889 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
890 ; GFX11-NEXT: s_nop 0
891 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
892 ; GFX11-NEXT: s_endpgm
893 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 4286578689 to float))
894 store float %canonicalized, ptr addrspace(1) %out
898 define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f32(ptr addrspace(1) %out) #1 {
899 ; GFX678-LABEL: test_fold_canonicalize_snan3_value_f32:
901 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
902 ; GFX678-NEXT: v_mov_b32_e32 v2, 0x7fc00000
903 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
904 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
905 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
906 ; GFX678-NEXT: flat_store_dword v[0:1], v2
907 ; GFX678-NEXT: s_endpgm
909 ; GFX9-LABEL: test_fold_canonicalize_snan3_value_f32:
911 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
912 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
913 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7fc00000
914 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
915 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
916 ; GFX9-NEXT: s_endpgm
918 ; GFX11-LABEL: test_fold_canonicalize_snan3_value_f32:
920 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
921 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
922 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
923 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
924 ; GFX11-NEXT: s_nop 0
925 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
926 ; GFX11-NEXT: s_endpgm
927 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 4290772991 to float))
928 store float %canonicalized, ptr addrspace(1) %out
932 define amdgpu_kernel void @v_test_canonicalize_var_f64(ptr addrspace(1) %out) #1 {
933 ; GFX678-LABEL: v_test_canonicalize_var_f64:
935 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
936 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
937 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
938 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
939 ; GFX678-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
940 ; GFX678-NEXT: s_waitcnt vmcnt(0)
941 ; GFX678-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
942 ; GFX678-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
943 ; GFX678-NEXT: s_endpgm
945 ; GFX9-LABEL: v_test_canonicalize_var_f64:
947 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
948 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
949 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
950 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
951 ; GFX9-NEXT: s_waitcnt vmcnt(0)
952 ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
953 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
954 ; GFX9-NEXT: s_endpgm
956 ; GFX11-LABEL: v_test_canonicalize_var_f64:
958 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
959 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
960 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
961 ; GFX11-NEXT: global_load_b64 v[0:1], v2, s[0:1]
962 ; GFX11-NEXT: s_waitcnt vmcnt(0)
963 ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
964 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
965 ; GFX11-NEXT: s_nop 0
966 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
967 ; GFX11-NEXT: s_endpgm
968 %val = load double, ptr addrspace(1) %out
969 %canonicalized = call double @llvm.canonicalize.f64(double %val)
970 store double %canonicalized, ptr addrspace(1) %out
974 define amdgpu_kernel void @s_test_canonicalize_var_f64(ptr addrspace(1) %out, double %val) #1 {
975 ; GFX6-LABEL: s_test_canonicalize_var_f64:
977 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
978 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
979 ; GFX6-NEXT: v_max_f64 v[2:3], s[2:3], s[2:3]
980 ; GFX6-NEXT: v_mov_b32_e32 v0, s0
981 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
982 ; GFX6-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
983 ; GFX6-NEXT: s_endpgm
985 ; GFX8-LABEL: s_test_canonicalize_var_f64:
987 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
988 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
989 ; GFX8-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3]
990 ; GFX8-NEXT: v_mov_b32_e32 v2, s0
991 ; GFX8-NEXT: v_mov_b32_e32 v3, s1
992 ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
993 ; GFX8-NEXT: s_endpgm
995 ; GFX9-LABEL: s_test_canonicalize_var_f64:
997 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
998 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
999 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1000 ; GFX9-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3]
1001 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1002 ; GFX9-NEXT: s_endpgm
1004 ; GFX11-LABEL: s_test_canonicalize_var_f64:
1006 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
1007 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
1008 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1009 ; GFX11-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3]
1010 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1011 ; GFX11-NEXT: s_nop 0
1012 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1013 ; GFX11-NEXT: s_endpgm
1014 %canonicalized = call double @llvm.canonicalize.f64(double %val)
1015 store double %canonicalized, ptr addrspace(1) %out
1019 define amdgpu_kernel void @v_test_canonicalize_fabs_var_f64(ptr addrspace(1) %out) #1 {
1020 ; GFX678-LABEL: v_test_canonicalize_fabs_var_f64:
1022 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1023 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1024 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
1025 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
1026 ; GFX678-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
1027 ; GFX678-NEXT: s_waitcnt vmcnt(0)
1028 ; GFX678-NEXT: v_max_f64 v[2:3], |v[2:3]|, |v[2:3]|
1029 ; GFX678-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
1030 ; GFX678-NEXT: s_endpgm
1032 ; GFX9-LABEL: v_test_canonicalize_fabs_var_f64:
1034 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1035 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
1036 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1037 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
1038 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1039 ; GFX9-NEXT: v_max_f64 v[0:1], |v[0:1]|, |v[0:1]|
1040 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1041 ; GFX9-NEXT: s_endpgm
1043 ; GFX11-LABEL: v_test_canonicalize_fabs_var_f64:
1045 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1046 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
1047 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1048 ; GFX11-NEXT: global_load_b64 v[0:1], v2, s[0:1]
1049 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1050 ; GFX11-NEXT: v_max_f64 v[0:1], |v[0:1]|, |v[0:1]|
1051 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1052 ; GFX11-NEXT: s_nop 0
1053 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1054 ; GFX11-NEXT: s_endpgm
1055 %val = load double, ptr addrspace(1) %out
1056 %val.fabs = call double @llvm.fabs.f64(double %val)
1057 %canonicalized = call double @llvm.canonicalize.f64(double %val.fabs)
1058 store double %canonicalized, ptr addrspace(1) %out
1062 define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f64(ptr addrspace(1) %out) #1 {
1063 ; GFX678-LABEL: v_test_canonicalize_fneg_fabs_var_f64:
1065 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1066 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1067 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
1068 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
1069 ; GFX678-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
1070 ; GFX678-NEXT: s_waitcnt vmcnt(0)
1071 ; GFX678-NEXT: v_max_f64 v[2:3], -|v[2:3]|, -|v[2:3]|
1072 ; GFX678-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
1073 ; GFX678-NEXT: s_endpgm
1075 ; GFX9-LABEL: v_test_canonicalize_fneg_fabs_var_f64:
1077 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1078 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
1079 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1080 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
1081 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1082 ; GFX9-NEXT: v_max_f64 v[0:1], -|v[0:1]|, -|v[0:1]|
1083 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1084 ; GFX9-NEXT: s_endpgm
1086 ; GFX11-LABEL: v_test_canonicalize_fneg_fabs_var_f64:
1088 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1089 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
1090 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1091 ; GFX11-NEXT: global_load_b64 v[0:1], v2, s[0:1]
1092 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1093 ; GFX11-NEXT: v_max_f64 v[0:1], -|v[0:1]|, -|v[0:1]|
1094 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1095 ; GFX11-NEXT: s_nop 0
1096 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1097 ; GFX11-NEXT: s_endpgm
1098 %val = load double, ptr addrspace(1) %out
1099 %val.fabs = call double @llvm.fabs.f64(double %val)
1100 %val.fabs.fneg = fneg double %val.fabs
1101 %canonicalized = call double @llvm.canonicalize.f64(double %val.fabs.fneg)
1102 store double %canonicalized, ptr addrspace(1) %out
1106 define amdgpu_kernel void @v_test_canonicalize_fneg_var_f64(ptr addrspace(1) %out) #1 {
1107 ; GFX678-LABEL: v_test_canonicalize_fneg_var_f64:
1109 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1110 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1111 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
1112 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
1113 ; GFX678-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
1114 ; GFX678-NEXT: s_waitcnt vmcnt(0)
1115 ; GFX678-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
1116 ; GFX678-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
1117 ; GFX678-NEXT: s_endpgm
1119 ; GFX9-LABEL: v_test_canonicalize_fneg_var_f64:
1121 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1122 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
1123 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1124 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
1125 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1126 ; GFX9-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
1127 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1128 ; GFX9-NEXT: s_endpgm
1130 ; GFX11-LABEL: v_test_canonicalize_fneg_var_f64:
1132 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1133 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
1134 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1135 ; GFX11-NEXT: global_load_b64 v[0:1], v2, s[0:1]
1136 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1137 ; GFX11-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
1138 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1139 ; GFX11-NEXT: s_nop 0
1140 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1141 ; GFX11-NEXT: s_endpgm
1142 %val = load double, ptr addrspace(1) %out
1143 %val.fneg = fneg double %val
1144 %canonicalized = call double @llvm.canonicalize.f64(double %val.fneg)
1145 store double %canonicalized, ptr addrspace(1) %out
1149 define amdgpu_kernel void @test_fold_canonicalize_p0_f64(ptr addrspace(1) %out) #1 {
1150 ; GFX678-LABEL: test_fold_canonicalize_p0_f64:
1152 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1153 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
1154 ; GFX678-NEXT: v_mov_b32_e32 v1, v0
1155 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1156 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1157 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1158 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1159 ; GFX678-NEXT: s_endpgm
1161 ; GFX9-LABEL: test_fold_canonicalize_p0_f64:
1163 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1164 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1165 ; GFX9-NEXT: v_mov_b32_e32 v1, v0
1166 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1167 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
1168 ; GFX9-NEXT: s_endpgm
1170 ; GFX11-LABEL: test_fold_canonicalize_p0_f64:
1172 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1173 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1174 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1175 ; GFX11-NEXT: v_mov_b32_e32 v1, v0
1176 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1177 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1178 ; GFX11-NEXT: s_nop 0
1179 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1180 ; GFX11-NEXT: s_endpgm
1181 %canonicalized = call double @llvm.canonicalize.f64(double 0.0)
1182 store double %canonicalized, ptr addrspace(1) %out
1186 define amdgpu_kernel void @test_fold_canonicalize_n0_f64(ptr addrspace(1) %out) #1 {
1187 ; GFX678-LABEL: test_fold_canonicalize_n0_f64:
1189 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1190 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
1191 ; GFX678-NEXT: v_bfrev_b32_e32 v1, 1
1192 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1193 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1194 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1195 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1196 ; GFX678-NEXT: s_endpgm
1198 ; GFX9-LABEL: test_fold_canonicalize_n0_f64:
1200 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1201 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1202 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
1203 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1204 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
1205 ; GFX9-NEXT: s_endpgm
1207 ; GFX11-LABEL: test_fold_canonicalize_n0_f64:
1209 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1210 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1211 ; GFX11-NEXT: v_bfrev_b32_e32 v1, 1
1212 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1213 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1214 ; GFX11-NEXT: s_nop 0
1215 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1216 ; GFX11-NEXT: s_endpgm
1217 %canonicalized = call double @llvm.canonicalize.f64(double -0.0)
1218 store double %canonicalized, ptr addrspace(1) %out
1222 define amdgpu_kernel void @test_fold_canonicalize_p1_f64(ptr addrspace(1) %out) #1 {
1223 ; GFX678-LABEL: test_fold_canonicalize_p1_f64:
1225 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1226 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
1227 ; GFX678-NEXT: v_mov_b32_e32 v1, 0x3ff00000
1228 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1229 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1230 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1231 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1232 ; GFX678-NEXT: s_endpgm
1234 ; GFX9-LABEL: test_fold_canonicalize_p1_f64:
1236 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1237 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1238 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x3ff00000
1239 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1240 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
1241 ; GFX9-NEXT: s_endpgm
1243 ; GFX11-LABEL: test_fold_canonicalize_p1_f64:
1245 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1246 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3ff00000
1247 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1248 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1249 ; GFX11-NEXT: s_nop 0
1250 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1251 ; GFX11-NEXT: s_endpgm
1252 %canonicalized = call double @llvm.canonicalize.f64(double 1.0)
1253 store double %canonicalized, ptr addrspace(1) %out
1257 define amdgpu_kernel void @test_fold_canonicalize_n1_f64(ptr addrspace(1) %out) #1 {
1258 ; GFX678-LABEL: test_fold_canonicalize_n1_f64:
1260 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1261 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
1262 ; GFX678-NEXT: v_mov_b32_e32 v1, 0xbff00000
1263 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1264 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1265 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1266 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1267 ; GFX678-NEXT: s_endpgm
1269 ; GFX9-LABEL: test_fold_canonicalize_n1_f64:
1271 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1272 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1273 ; GFX9-NEXT: v_mov_b32_e32 v1, 0xbff00000
1274 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1275 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
1276 ; GFX9-NEXT: s_endpgm
1278 ; GFX11-LABEL: test_fold_canonicalize_n1_f64:
1280 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1281 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0xbff00000
1282 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1283 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1284 ; GFX11-NEXT: s_nop 0
1285 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1286 ; GFX11-NEXT: s_endpgm
1287 %canonicalized = call double @llvm.canonicalize.f64(double -1.0)
1288 store double %canonicalized, ptr addrspace(1) %out
1292 define amdgpu_kernel void @test_fold_canonicalize_literal_f64(ptr addrspace(1) %out) #1 {
1293 ; GFX678-LABEL: test_fold_canonicalize_literal_f64:
1295 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1296 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
1297 ; GFX678-NEXT: v_mov_b32_e32 v1, 0x40300000
1298 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1299 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1300 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1301 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1302 ; GFX678-NEXT: s_endpgm
1304 ; GFX9-LABEL: test_fold_canonicalize_literal_f64:
1306 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1307 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1308 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40300000
1309 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1310 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
1311 ; GFX9-NEXT: s_endpgm
1313 ; GFX11-LABEL: test_fold_canonicalize_literal_f64:
1315 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1316 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x40300000
1317 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1318 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1319 ; GFX11-NEXT: s_nop 0
1320 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1321 ; GFX11-NEXT: s_endpgm
1322 %canonicalized = call double @llvm.canonicalize.f64(double 16.0)
1323 store double %canonicalized, ptr addrspace(1) %out
1327 define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f64(ptr addrspace(1) %out) #2 {
1328 ; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal0_f64:
1330 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1331 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
1332 ; GFX678-NEXT: v_mov_b32_e32 v1, v0
1333 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1334 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1335 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1336 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1337 ; GFX678-NEXT: s_endpgm
1339 ; GFX9-LABEL: test_no_denormals_fold_canonicalize_denormal0_f64:
1341 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1342 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1343 ; GFX9-NEXT: v_mov_b32_e32 v1, v0
1344 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1345 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
1346 ; GFX9-NEXT: s_endpgm
1348 ; GFX11-LABEL: test_no_denormals_fold_canonicalize_denormal0_f64:
1350 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1351 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1352 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1353 ; GFX11-NEXT: v_mov_b32_e32 v1, v0
1354 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1355 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1356 ; GFX11-NEXT: s_nop 0
1357 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1358 ; GFX11-NEXT: s_endpgm
1359 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
1360 store double %canonicalized, ptr addrspace(1) %out
1364 define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f64(ptr addrspace(1) %out) #3 {
1365 ; GFX678-LABEL: test_denormals_fold_canonicalize_denormal0_f64:
1367 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1368 ; GFX678-NEXT: v_mov_b32_e32 v0, -1
1369 ; GFX678-NEXT: v_mov_b32_e32 v1, 0xfffff
1370 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1371 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1372 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1373 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1374 ; GFX678-NEXT: s_endpgm
1376 ; GFX9-LABEL: test_denormals_fold_canonicalize_denormal0_f64:
1378 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1379 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
1380 ; GFX9-NEXT: v_mov_b32_e32 v0, -1
1381 ; GFX9-NEXT: v_mov_b32_e32 v1, 0xfffff
1382 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1383 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1384 ; GFX9-NEXT: s_endpgm
1386 ; GFX11-LABEL: test_denormals_fold_canonicalize_denormal0_f64:
1388 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1389 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, 0xfffff
1390 ; GFX11-NEXT: v_mov_b32_e32 v0, -1
1391 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1392 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1393 ; GFX11-NEXT: s_nop 0
1394 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1395 ; GFX11-NEXT: s_endpgm
1396 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
1397 store double %canonicalized, ptr addrspace(1) %out
1401 define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f64(ptr addrspace(1) %out) #2 {
1402 ; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal1_f64:
1404 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1405 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
1406 ; GFX678-NEXT: v_bfrev_b32_e32 v1, 1
1407 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1408 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1409 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1410 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1411 ; GFX678-NEXT: s_endpgm
1413 ; GFX9-LABEL: test_no_denormals_fold_canonicalize_denormal1_f64:
1415 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1416 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1417 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
1418 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1419 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
1420 ; GFX9-NEXT: s_endpgm
1422 ; GFX11-LABEL: test_no_denormals_fold_canonicalize_denormal1_f64:
1424 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1425 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1426 ; GFX11-NEXT: v_bfrev_b32_e32 v1, 1
1427 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1428 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1429 ; GFX11-NEXT: s_nop 0
1430 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1431 ; GFX11-NEXT: s_endpgm
1432 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
1433 store double %canonicalized, ptr addrspace(1) %out
1437 define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f64(ptr addrspace(1) %out) #3 {
1438 ; GFX678-LABEL: test_denormals_fold_canonicalize_denormal1_f64:
1440 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1441 ; GFX678-NEXT: v_mov_b32_e32 v0, -1
1442 ; GFX678-NEXT: v_mov_b32_e32 v1, 0x800fffff
1443 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1444 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1445 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1446 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1447 ; GFX678-NEXT: s_endpgm
1449 ; GFX9-LABEL: test_denormals_fold_canonicalize_denormal1_f64:
1451 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1452 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
1453 ; GFX9-NEXT: v_mov_b32_e32 v0, -1
1454 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x800fffff
1455 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1456 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1457 ; GFX9-NEXT: s_endpgm
1459 ; GFX11-LABEL: test_denormals_fold_canonicalize_denormal1_f64:
1461 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1462 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, 0x800fffff
1463 ; GFX11-NEXT: v_mov_b32_e32 v0, -1
1464 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1465 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1466 ; GFX11-NEXT: s_nop 0
1467 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1468 ; GFX11-NEXT: s_endpgm
1469 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
1470 store double %canonicalized, ptr addrspace(1) %out
1474 define amdgpu_kernel void @test_fold_canonicalize_qnan_f64(ptr addrspace(1) %out) #1 {
1475 ; GFX678-LABEL: test_fold_canonicalize_qnan_f64:
1477 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1478 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
1479 ; GFX678-NEXT: v_mov_b32_e32 v1, 0x7ff80000
1480 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1481 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1482 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1483 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1484 ; GFX678-NEXT: s_endpgm
1486 ; GFX9-LABEL: test_fold_canonicalize_qnan_f64:
1488 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1489 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1490 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
1491 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1492 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
1493 ; GFX9-NEXT: s_endpgm
1495 ; GFX11-LABEL: test_fold_canonicalize_qnan_f64:
1497 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1498 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
1499 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1500 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1501 ; GFX11-NEXT: s_nop 0
1502 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1503 ; GFX11-NEXT: s_endpgm
1504 %canonicalized = call double @llvm.canonicalize.f64(double 0x7FF8000000000000)
1505 store double %canonicalized, ptr addrspace(1) %out
1509 define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f64(ptr addrspace(1) %out) #1 {
1510 ; GFX678-LABEL: test_fold_canonicalize_qnan_value_neg1_f64:
1512 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1513 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
1514 ; GFX678-NEXT: v_mov_b32_e32 v1, 0x7ff80000
1515 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1516 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1517 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1518 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1519 ; GFX678-NEXT: s_endpgm
1521 ; GFX9-LABEL: test_fold_canonicalize_qnan_value_neg1_f64:
1523 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1524 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1525 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
1526 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1527 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
1528 ; GFX9-NEXT: s_endpgm
1530 ; GFX11-LABEL: test_fold_canonicalize_qnan_value_neg1_f64:
1532 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1533 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
1534 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1535 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1536 ; GFX11-NEXT: s_nop 0
1537 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1538 ; GFX11-NEXT: s_endpgm
1539 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -1 to double))
1540 store double %canonicalized, ptr addrspace(1) %out
1544 define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f64(ptr addrspace(1) %out) #1 {
1545 ; GFX678-LABEL: test_fold_canonicalize_qnan_value_neg2_f64:
1547 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1548 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
1549 ; GFX678-NEXT: v_mov_b32_e32 v1, 0x7ff80000
1550 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1551 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1552 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1553 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1554 ; GFX678-NEXT: s_endpgm
1556 ; GFX9-LABEL: test_fold_canonicalize_qnan_value_neg2_f64:
1558 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1559 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1560 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
1561 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1562 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
1563 ; GFX9-NEXT: s_endpgm
1565 ; GFX11-LABEL: test_fold_canonicalize_qnan_value_neg2_f64:
1567 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1568 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
1569 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1570 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1571 ; GFX11-NEXT: s_nop 0
1572 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1573 ; GFX11-NEXT: s_endpgm
1574 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -2 to double))
1575 store double %canonicalized, ptr addrspace(1) %out
1579 define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f64(ptr addrspace(1) %out) #1 {
1580 ; GFX678-LABEL: test_fold_canonicalize_snan0_value_f64:
1582 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1583 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
1584 ; GFX678-NEXT: v_mov_b32_e32 v1, 0x7ff80000
1585 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1586 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1587 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1588 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1589 ; GFX678-NEXT: s_endpgm
1591 ; GFX9-LABEL: test_fold_canonicalize_snan0_value_f64:
1593 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1594 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1595 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
1596 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1597 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
1598 ; GFX9-NEXT: s_endpgm
1600 ; GFX11-LABEL: test_fold_canonicalize_snan0_value_f64:
1602 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1603 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
1604 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1605 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1606 ; GFX11-NEXT: s_nop 0
1607 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1608 ; GFX11-NEXT: s_endpgm
1609 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9218868437227405313 to double))
1610 store double %canonicalized, ptr addrspace(1) %out
1614 define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f64(ptr addrspace(1) %out) #1 {
1615 ; GFX678-LABEL: test_fold_canonicalize_snan1_value_f64:
1617 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1618 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
1619 ; GFX678-NEXT: v_mov_b32_e32 v1, 0x7ff80000
1620 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1621 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1622 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1623 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1624 ; GFX678-NEXT: s_endpgm
1626 ; GFX9-LABEL: test_fold_canonicalize_snan1_value_f64:
1628 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1629 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1630 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
1631 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1632 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
1633 ; GFX9-NEXT: s_endpgm
1635 ; GFX11-LABEL: test_fold_canonicalize_snan1_value_f64:
1637 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1638 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
1639 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1640 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1641 ; GFX11-NEXT: s_nop 0
1642 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1643 ; GFX11-NEXT: s_endpgm
1644 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9223372036854775807 to double))
1645 store double %canonicalized, ptr addrspace(1) %out
1649 define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f64(ptr addrspace(1) %out) #1 {
1650 ; GFX678-LABEL: test_fold_canonicalize_snan2_value_f64:
1652 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1653 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
1654 ; GFX678-NEXT: v_mov_b32_e32 v1, 0x7ff80000
1655 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1656 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1657 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1658 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1659 ; GFX678-NEXT: s_endpgm
1661 ; GFX9-LABEL: test_fold_canonicalize_snan2_value_f64:
1663 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1664 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1665 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
1666 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1667 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
1668 ; GFX9-NEXT: s_endpgm
1670 ; GFX11-LABEL: test_fold_canonicalize_snan2_value_f64:
1672 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1673 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
1674 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1675 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1676 ; GFX11-NEXT: s_nop 0
1677 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1678 ; GFX11-NEXT: s_endpgm
1679 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 18442240474082181121 to double))
1680 store double %canonicalized, ptr addrspace(1) %out
1684 define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f64(ptr addrspace(1) %out) #1 {
1685 ; GFX678-LABEL: test_fold_canonicalize_snan3_value_f64:
1687 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1688 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
1689 ; GFX678-NEXT: v_mov_b32_e32 v1, 0x7ff80000
1690 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1691 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1692 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1693 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1694 ; GFX678-NEXT: s_endpgm
1696 ; GFX9-LABEL: test_fold_canonicalize_snan3_value_f64:
1698 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1699 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1700 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
1701 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1702 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
1703 ; GFX9-NEXT: s_endpgm
1705 ; GFX11-LABEL: test_fold_canonicalize_snan3_value_f64:
1707 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1708 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
1709 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1710 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1711 ; GFX11-NEXT: s_nop 0
1712 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1713 ; GFX11-NEXT: s_endpgm
1714 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 18446744073709551615 to double))
1715 store double %canonicalized, ptr addrspace(1) %out
1719 define amdgpu_kernel void @test_canonicalize_value_f64_flush(ptr addrspace(1) %arg, ptr addrspace(1) %out) #4 {
1720 ; GFX6-LABEL: test_canonicalize_value_f64_flush:
1722 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
1723 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 3, v0
1724 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1725 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
1726 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v2
1727 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1728 ; GFX6-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
1729 ; GFX6-NEXT: v_mov_b32_e32 v3, s3
1730 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, s2, v2
1731 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
1732 ; GFX6-NEXT: s_waitcnt vmcnt(0)
1733 ; GFX6-NEXT: v_mul_f64 v[0:1], 1.0, v[0:1]
1734 ; GFX6-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1735 ; GFX6-NEXT: s_endpgm
1737 ; GFX8-LABEL: test_canonicalize_value_f64_flush:
1739 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
1740 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 3, v0
1741 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1742 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
1743 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
1744 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1745 ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
1746 ; GFX8-NEXT: v_mov_b32_e32 v3, s3
1747 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, s2, v2
1748 ; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
1749 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1750 ; GFX8-NEXT: v_mul_f64 v[0:1], 1.0, v[0:1]
1751 ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1752 ; GFX8-NEXT: s_endpgm
1754 ; GFX9-LABEL: test_canonicalize_value_f64_flush:
1756 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
1757 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v0
1758 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1759 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
1760 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1761 ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
1762 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
1763 ; GFX9-NEXT: s_endpgm
1765 ; GFX11-LABEL: test_canonicalize_value_f64_flush:
1767 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
1768 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 3, v0
1769 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1770 ; GFX11-NEXT: global_load_b64 v[0:1], v2, s[0:1]
1771 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1772 ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
1773 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[2:3]
1774 ; GFX11-NEXT: s_nop 0
1775 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1776 ; GFX11-NEXT: s_endpgm
1777 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
1778 %gep = getelementptr inbounds double, ptr addrspace(1) %arg, i32 %id
1779 %v = load double, ptr addrspace(1) %gep, align 8
1780 %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
1781 %gep2 = getelementptr inbounds double, ptr addrspace(1) %out, i32 %id
1782 store double %canonicalized, ptr addrspace(1) %gep2, align 8
1786 define amdgpu_kernel void @test_canonicalize_value_f32_flush(ptr addrspace(1) %arg, ptr addrspace(1) %out) #4 {
1787 ; GFX6-LABEL: test_canonicalize_value_f32_flush:
1789 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
1790 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 2, v0
1791 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1792 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
1793 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v2
1794 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1795 ; GFX6-NEXT: flat_load_dword v0, v[0:1]
1796 ; GFX6-NEXT: v_mov_b32_e32 v1, s3
1797 ; GFX6-NEXT: s_waitcnt vmcnt(0)
1798 ; GFX6-NEXT: v_mul_f32_e32 v3, 1.0, v0
1799 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s2, v2
1800 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1801 ; GFX6-NEXT: flat_store_dword v[0:1], v3
1802 ; GFX6-NEXT: s_endpgm
1804 ; GFX8-LABEL: test_canonicalize_value_f32_flush:
1806 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
1807 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0
1808 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1809 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
1810 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
1811 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1812 ; GFX8-NEXT: flat_load_dword v0, v[0:1]
1813 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
1814 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1815 ; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v0
1816 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2
1817 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1818 ; GFX8-NEXT: flat_store_dword v[0:1], v3
1819 ; GFX8-NEXT: s_endpgm
1821 ; GFX9-LABEL: test_canonicalize_value_f32_flush:
1823 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
1824 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1825 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1826 ; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
1827 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1828 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
1829 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3]
1830 ; GFX9-NEXT: s_endpgm
1832 ; GFX11-LABEL: test_canonicalize_value_f32_flush:
1834 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
1835 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1836 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1837 ; GFX11-NEXT: global_load_b32 v1, v0, s[0:1]
1838 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1839 ; GFX11-NEXT: v_max_f32_e32 v1, v1, v1
1840 ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
1841 ; GFX11-NEXT: s_nop 0
1842 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1843 ; GFX11-NEXT: s_endpgm
1844 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
1845 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
1846 %v = load float, ptr addrspace(1) %gep, align 4
1847 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
1848 %gep2 = getelementptr inbounds float, ptr addrspace(1) %out, i32 %id
1849 store float %canonicalized, ptr addrspace(1) %gep2, align 4
1853 define amdgpu_kernel void @test_canonicalize_value_f16_flush(ptr addrspace(1) %arg, ptr addrspace(1) %out) #4 {
1854 ; GFX6-LABEL: test_canonicalize_value_f16_flush:
1856 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
1857 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v0
1858 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1859 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
1860 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v2
1861 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1862 ; GFX6-NEXT: flat_load_ushort v0, v[0:1]
1863 ; GFX6-NEXT: v_mov_b32_e32 v1, s3
1864 ; GFX6-NEXT: s_waitcnt vmcnt(0)
1865 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
1866 ; GFX6-NEXT: v_mul_f32_e32 v0, 1.0, v0
1867 ; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v0
1868 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s2, v2
1869 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1870 ; GFX6-NEXT: flat_store_short v[0:1], v3
1871 ; GFX6-NEXT: s_endpgm
1873 ; GFX8-LABEL: test_canonicalize_value_f16_flush:
1875 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
1876 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 1, v0
1877 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1878 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
1879 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
1880 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1881 ; GFX8-NEXT: flat_load_ushort v0, v[0:1]
1882 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
1883 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1884 ; GFX8-NEXT: v_mul_f16_e32 v3, 1.0, v0
1885 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2
1886 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1887 ; GFX8-NEXT: flat_store_short v[0:1], v3
1888 ; GFX8-NEXT: s_endpgm
1890 ; GFX9-LABEL: test_canonicalize_value_f16_flush:
1892 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
1893 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
1894 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1895 ; GFX9-NEXT: global_load_ushort v1, v0, s[0:1]
1896 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1897 ; GFX9-NEXT: v_max_f16_e32 v1, v1, v1
1898 ; GFX9-NEXT: global_store_short v0, v1, s[2:3]
1899 ; GFX9-NEXT: s_endpgm
1901 ; GFX11-LABEL: test_canonicalize_value_f16_flush:
1903 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
1904 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0
1905 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1906 ; GFX11-NEXT: global_load_u16 v1, v0, s[0:1]
1907 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1908 ; GFX11-NEXT: v_max_f16_e32 v1, v1, v1
1909 ; GFX11-NEXT: global_store_b16 v0, v1, s[2:3]
1910 ; GFX11-NEXT: s_nop 0
1911 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1912 ; GFX11-NEXT: s_endpgm
1913 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
1914 %gep = getelementptr inbounds half, ptr addrspace(1) %arg, i32 %id
1915 %v = load half, ptr addrspace(1) %gep, align 2
1916 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
1917 %gep2 = getelementptr inbounds half, ptr addrspace(1) %out, i32 %id
1918 store half %canonicalized, ptr addrspace(1) %gep2, align 2
1923 define amdgpu_kernel void @test_canonicalize_value_v2f16_flush(ptr addrspace(1) %arg, ptr addrspace(1) %out) #4 {
1924 ; GFX6-LABEL: test_canonicalize_value_v2f16_flush:
1926 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
1927 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 2, v0
1928 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1929 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
1930 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v2
1931 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1932 ; GFX6-NEXT: flat_load_dword v0, v[0:1]
1933 ; GFX6-NEXT: v_mov_b32_e32 v3, s3
1934 ; GFX6-NEXT: s_waitcnt vmcnt(0)
1935 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v0
1936 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
1937 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
1938 ; GFX6-NEXT: v_mul_f32_e32 v1, 1.0, v1
1939 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
1940 ; GFX6-NEXT: v_mul_f32_e32 v0, 1.0, v0
1941 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
1942 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
1943 ; GFX6-NEXT: v_or_b32_e32 v4, v1, v0
1944 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s2, v2
1945 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
1946 ; GFX6-NEXT: flat_store_dword v[0:1], v4
1947 ; GFX6-NEXT: s_endpgm
1949 ; GFX8-LABEL: test_canonicalize_value_v2f16_flush:
1951 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
1952 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0
1953 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1954 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
1955 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
1956 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1957 ; GFX8-NEXT: flat_load_dword v0, v[0:1]
1958 ; GFX8-NEXT: v_mov_b32_e32 v1, 0x3c00
1959 ; GFX8-NEXT: v_mov_b32_e32 v3, s3
1960 ; GFX8-NEXT: s_waitcnt vmcnt(0)
1961 ; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1962 ; GFX8-NEXT: v_mul_f16_e32 v0, 1.0, v0
1963 ; GFX8-NEXT: v_or_b32_e32 v4, v0, v1
1964 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2
1965 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
1966 ; GFX8-NEXT: flat_store_dword v[0:1], v4
1967 ; GFX8-NEXT: s_endpgm
1969 ; GFX9-LABEL: test_canonicalize_value_v2f16_flush:
1971 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
1972 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1973 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1974 ; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
1975 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1976 ; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
1977 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3]
1978 ; GFX9-NEXT: s_endpgm
1980 ; GFX11-LABEL: test_canonicalize_value_v2f16_flush:
1982 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
1983 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1984 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1985 ; GFX11-NEXT: global_load_b32 v1, v0, s[0:1]
1986 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1987 ; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
1988 ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
1989 ; GFX11-NEXT: s_nop 0
1990 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1991 ; GFX11-NEXT: s_endpgm
1992 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
1993 %gep = getelementptr inbounds <2 x half>, ptr addrspace(1) %arg, i32 %id
1994 %v = load <2 x half>, ptr addrspace(1) %gep, align 4
1995 %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v)
1996 %gep2 = getelementptr inbounds <2 x half>, ptr addrspace(1) %out, i32 %id
1997 store <2 x half> %canonicalized, ptr addrspace(1) %gep2, align 2
2001 define amdgpu_kernel void @test_canonicalize_value_f64_denorm(ptr addrspace(1) %arg, ptr addrspace(1) %out) #3 {
2002 ; GFX6-LABEL: test_canonicalize_value_f64_denorm:
2004 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2005 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 3, v0
2006 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
2007 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
2008 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v2
2009 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2010 ; GFX6-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
2011 ; GFX6-NEXT: v_mov_b32_e32 v3, s3
2012 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, s2, v2
2013 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
2014 ; GFX6-NEXT: s_waitcnt vmcnt(0)
2015 ; GFX6-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2016 ; GFX6-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
2017 ; GFX6-NEXT: s_endpgm
2019 ; GFX8-LABEL: test_canonicalize_value_f64_denorm:
2021 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2022 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 3, v0
2023 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
2024 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
2025 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
2026 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2027 ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
2028 ; GFX8-NEXT: v_mov_b32_e32 v3, s3
2029 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, s2, v2
2030 ; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
2031 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2032 ; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2033 ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
2034 ; GFX8-NEXT: s_endpgm
2036 ; GFX9-LABEL: test_canonicalize_value_f64_denorm:
2038 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2039 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v0
2040 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2041 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
2042 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2043 ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2044 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
2045 ; GFX9-NEXT: s_endpgm
2047 ; GFX11-LABEL: test_canonicalize_value_f64_denorm:
2049 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
2050 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 3, v0
2051 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2052 ; GFX11-NEXT: global_load_b64 v[0:1], v2, s[0:1]
2053 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2054 ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2055 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[2:3]
2056 ; GFX11-NEXT: s_nop 0
2057 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2058 ; GFX11-NEXT: s_endpgm
2059 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
2060 %gep = getelementptr inbounds double, ptr addrspace(1) %arg, i32 %id
2061 %v = load double, ptr addrspace(1) %gep, align 8
2062 %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
2063 %gep2 = getelementptr inbounds double, ptr addrspace(1) %out, i32 %id
2064 store double %canonicalized, ptr addrspace(1) %gep2, align 8
2068 define amdgpu_kernel void @test_canonicalize_value_f32_denorm(ptr addrspace(1) %arg, ptr addrspace(1) %out) #3 {
2069 ; GFX6-LABEL: test_canonicalize_value_f32_denorm:
2071 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2072 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 2, v0
2073 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
2074 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
2075 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v2
2076 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2077 ; GFX6-NEXT: flat_load_dword v0, v[0:1]
2078 ; GFX6-NEXT: v_mov_b32_e32 v1, s3
2079 ; GFX6-NEXT: s_waitcnt vmcnt(0)
2080 ; GFX6-NEXT: v_mul_f32_e32 v3, 1.0, v0
2081 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s2, v2
2082 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2083 ; GFX6-NEXT: flat_store_dword v[0:1], v3
2084 ; GFX6-NEXT: s_endpgm
2086 ; GFX8-LABEL: test_canonicalize_value_f32_denorm:
2088 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2089 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0
2090 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
2091 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
2092 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
2093 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2094 ; GFX8-NEXT: flat_load_dword v0, v[0:1]
2095 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
2096 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2097 ; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v0
2098 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2
2099 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2100 ; GFX8-NEXT: flat_store_dword v[0:1], v3
2101 ; GFX8-NEXT: s_endpgm
2103 ; GFX9-LABEL: test_canonicalize_value_f32_denorm:
2105 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2106 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2107 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2108 ; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
2109 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2110 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
2111 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3]
2112 ; GFX9-NEXT: s_endpgm
2114 ; GFX11-LABEL: test_canonicalize_value_f32_denorm:
2116 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
2117 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2118 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2119 ; GFX11-NEXT: global_load_b32 v1, v0, s[0:1]
2120 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2121 ; GFX11-NEXT: v_max_f32_e32 v1, v1, v1
2122 ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
2123 ; GFX11-NEXT: s_nop 0
2124 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2125 ; GFX11-NEXT: s_endpgm
2126 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
2127 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
2128 %v = load float, ptr addrspace(1) %gep, align 4
2129 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
2130 %gep2 = getelementptr inbounds float, ptr addrspace(1) %out, i32 %id
2131 store float %canonicalized, ptr addrspace(1) %gep2, align 4
2135 ; FIXME: Conversion to float should count as the canonicalize pre-gfx8
2136 define amdgpu_kernel void @test_canonicalize_value_f16_denorm(ptr addrspace(1) %arg, ptr addrspace(1) %out) #3 {
2137 ; GFX6-LABEL: test_canonicalize_value_f16_denorm:
2139 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2140 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v0
2141 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
2142 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
2143 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v2
2144 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2145 ; GFX6-NEXT: flat_load_ushort v0, v[0:1]
2146 ; GFX6-NEXT: v_mov_b32_e32 v1, s3
2147 ; GFX6-NEXT: s_waitcnt vmcnt(0)
2148 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
2149 ; GFX6-NEXT: v_mul_f32_e32 v0, 1.0, v0
2150 ; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v0
2151 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s2, v2
2152 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2153 ; GFX6-NEXT: flat_store_short v[0:1], v3
2154 ; GFX6-NEXT: s_endpgm
2156 ; GFX8-LABEL: test_canonicalize_value_f16_denorm:
2158 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2159 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 1, v0
2160 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
2161 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
2162 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
2163 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2164 ; GFX8-NEXT: flat_load_ushort v0, v[0:1]
2165 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
2166 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2167 ; GFX8-NEXT: v_max_f16_e32 v3, v0, v0
2168 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2
2169 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2170 ; GFX8-NEXT: flat_store_short v[0:1], v3
2171 ; GFX8-NEXT: s_endpgm
2173 ; GFX9-LABEL: test_canonicalize_value_f16_denorm:
2175 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2176 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
2177 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2178 ; GFX9-NEXT: global_load_ushort v1, v0, s[0:1]
2179 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2180 ; GFX9-NEXT: v_max_f16_e32 v1, v1, v1
2181 ; GFX9-NEXT: global_store_short v0, v1, s[2:3]
2182 ; GFX9-NEXT: s_endpgm
2184 ; GFX11-LABEL: test_canonicalize_value_f16_denorm:
2186 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
2187 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0
2188 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2189 ; GFX11-NEXT: global_load_u16 v1, v0, s[0:1]
2190 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2191 ; GFX11-NEXT: v_max_f16_e32 v1, v1, v1
2192 ; GFX11-NEXT: global_store_b16 v0, v1, s[2:3]
2193 ; GFX11-NEXT: s_nop 0
2194 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2195 ; GFX11-NEXT: s_endpgm
2196 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
2197 %gep = getelementptr inbounds half, ptr addrspace(1) %arg, i32 %id
2198 %v = load half, ptr addrspace(1) %gep, align 2
2199 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
2200 %gep2 = getelementptr inbounds half, ptr addrspace(1) %out, i32 %id
2201 store half %canonicalized, ptr addrspace(1) %gep2, align 2
2207 define amdgpu_kernel void @test_canonicalize_value_v2f16_denorm(ptr addrspace(1) %arg, ptr addrspace(1) %out) #3 {
2208 ; GFX6-LABEL: test_canonicalize_value_v2f16_denorm:
2210 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2211 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 2, v0
2212 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
2213 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
2214 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v2
2215 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2216 ; GFX6-NEXT: flat_load_dword v0, v[0:1]
2217 ; GFX6-NEXT: v_mov_b32_e32 v3, s3
2218 ; GFX6-NEXT: s_waitcnt vmcnt(0)
2219 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v0
2220 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2221 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
2222 ; GFX6-NEXT: v_mul_f32_e32 v1, 1.0, v1
2223 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
2224 ; GFX6-NEXT: v_mul_f32_e32 v0, 1.0, v0
2225 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
2226 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2227 ; GFX6-NEXT: v_or_b32_e32 v4, v1, v0
2228 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s2, v2
2229 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
2230 ; GFX6-NEXT: flat_store_dword v[0:1], v4
2231 ; GFX6-NEXT: s_endpgm
2233 ; GFX8-LABEL: test_canonicalize_value_v2f16_denorm:
2235 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2236 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0
2237 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
2238 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
2239 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
2240 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2241 ; GFX8-NEXT: flat_load_dword v0, v[0:1]
2242 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
2243 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2244 ; GFX8-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
2245 ; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
2246 ; GFX8-NEXT: v_or_b32_e32 v3, v0, v3
2247 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2
2248 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2249 ; GFX8-NEXT: flat_store_dword v[0:1], v3
2250 ; GFX8-NEXT: s_endpgm
2252 ; GFX9-LABEL: test_canonicalize_value_v2f16_denorm:
2254 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2255 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2256 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2257 ; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
2258 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2259 ; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
2260 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3]
2261 ; GFX9-NEXT: s_endpgm
2263 ; GFX11-LABEL: test_canonicalize_value_v2f16_denorm:
2265 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
2266 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2267 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2268 ; GFX11-NEXT: global_load_b32 v1, v0, s[0:1]
2269 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2270 ; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
2271 ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
2272 ; GFX11-NEXT: s_nop 0
2273 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2274 ; GFX11-NEXT: s_endpgm
2275 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
2276 %gep = getelementptr inbounds <2 x half>, ptr addrspace(1) %arg, i32 %id
2277 %v = load <2 x half>, ptr addrspace(1) %gep, align 4
2278 %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v)
2279 %gep2 = getelementptr inbounds <2 x half>, ptr addrspace(1) %out, i32 %id
2280 store <2 x half> %canonicalized, ptr addrspace(1) %gep2, align 2
2284 define amdgpu_kernel void @v_test_canonicalize_var_v2f64(ptr addrspace(1) %out) #1 {
2285 ; GFX6-LABEL: v_test_canonicalize_var_v2f64:
2287 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2288 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 4, v0
2289 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
2290 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
2291 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v0
2292 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2293 ; GFX6-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
2294 ; GFX6-NEXT: v_mov_b32_e32 v5, s1
2295 ; GFX6-NEXT: v_mov_b32_e32 v4, s0
2296 ; GFX6-NEXT: s_waitcnt vmcnt(0)
2297 ; GFX6-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
2298 ; GFX6-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2299 ; GFX6-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2300 ; GFX6-NEXT: s_endpgm
2302 ; GFX8-LABEL: v_test_canonicalize_var_v2f64:
2304 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2305 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 4, v0
2306 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
2307 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
2308 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0
2309 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2310 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
2311 ; GFX8-NEXT: v_mov_b32_e32 v5, s1
2312 ; GFX8-NEXT: v_mov_b32_e32 v4, s0
2313 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2314 ; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
2315 ; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2316 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2317 ; GFX8-NEXT: s_endpgm
2319 ; GFX9-LABEL: v_test_canonicalize_var_v2f64:
2321 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2322 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 4, v0
2323 ; GFX9-NEXT: v_mov_b32_e32 v4, 0
2324 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2325 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
2326 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2327 ; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
2328 ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2329 ; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
2330 ; GFX9-NEXT: s_endpgm
2332 ; GFX11-LABEL: v_test_canonicalize_var_v2f64:
2334 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
2335 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0
2336 ; GFX11-NEXT: v_mov_b32_e32 v4, 0
2337 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2338 ; GFX11-NEXT: global_load_b128 v[0:3], v0, s[0:1]
2339 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2340 ; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
2341 ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2342 ; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1]
2343 ; GFX11-NEXT: s_nop 0
2344 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2345 ; GFX11-NEXT: s_endpgm
2346 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2347 %gep = getelementptr <2 x double>, ptr addrspace(1) %out, i32 %tid
2348 %val = load <2 x double>, ptr addrspace(1) %gep
2349 %canonicalized = call <2 x double> @llvm.canonicalize.v2f64(<2 x double> %val)
2350 store <2 x double> %canonicalized, ptr addrspace(1) %out
2355 define <2 x float> @v_test_canonicalize_v2f32_flush(<2 x float> %arg) #1 {
2356 ; GFX678-LABEL: v_test_canonicalize_v2f32_flush:
2358 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2359 ; GFX678-NEXT: v_mul_f32_e32 v0, 1.0, v0
2360 ; GFX678-NEXT: v_mul_f32_e32 v1, 1.0, v1
2361 ; GFX678-NEXT: s_setpc_b64 s[30:31]
2363 ; GFX9-LABEL: v_test_canonicalize_v2f32_flush:
2365 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2366 ; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
2367 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
2368 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2370 ; GFX11-LABEL: v_test_canonicalize_v2f32_flush:
2372 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2373 ; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
2374 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2375 %canon = call <2 x float> @llvm.canonicalize.v2f32(<2 x float> %arg)
2376 ret <2 x float> %canon
2380 define <3 x float> @v_test_canonicalize_v3f32_flush(<3 x float> %arg) #1 {
2381 ; GFX678-LABEL: v_test_canonicalize_v3f32_flush:
2383 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2384 ; GFX678-NEXT: v_mul_f32_e32 v0, 1.0, v0
2385 ; GFX678-NEXT: v_mul_f32_e32 v1, 1.0, v1
2386 ; GFX678-NEXT: v_mul_f32_e32 v2, 1.0, v2
2387 ; GFX678-NEXT: s_setpc_b64 s[30:31]
2389 ; GFX9-LABEL: v_test_canonicalize_v3f32_flush:
2391 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2392 ; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
2393 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
2394 ; GFX9-NEXT: v_max_f32_e32 v2, v2, v2
2395 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2397 ; GFX11-LABEL: v_test_canonicalize_v3f32_flush:
2399 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2400 ; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
2401 ; GFX11-NEXT: v_max_f32_e32 v2, v2, v2
2402 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2403 %canon = call <3 x float> @llvm.canonicalize.v3f32(<3 x float> %arg)
2404 ret <3 x float> %canon
2408 define <4 x float> @v_test_canonicalize_v4f32_flush(<4 x float> %arg) #1 {
2409 ; GFX678-LABEL: v_test_canonicalize_v4f32_flush:
2411 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2412 ; GFX678-NEXT: v_mul_f32_e32 v0, 1.0, v0
2413 ; GFX678-NEXT: v_mul_f32_e32 v1, 1.0, v1
2414 ; GFX678-NEXT: v_mul_f32_e32 v2, 1.0, v2
2415 ; GFX678-NEXT: v_mul_f32_e32 v3, 1.0, v3
2416 ; GFX678-NEXT: s_setpc_b64 s[30:31]
2418 ; GFX9-LABEL: v_test_canonicalize_v4f32_flush:
2420 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2421 ; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
2422 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
2423 ; GFX9-NEXT: v_max_f32_e32 v2, v2, v2
2424 ; GFX9-NEXT: v_max_f32_e32 v3, v3, v3
2425 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2427 ; GFX11-LABEL: v_test_canonicalize_v4f32_flush:
2429 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2430 ; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
2431 ; GFX11-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
2432 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2433 %canon = call <4 x float> @llvm.canonicalize.v4f32(<4 x float> %arg)
2434 ret <4 x float> %canon
2438 define <8 x float> @v_test_canonicalize_v8f32_flush(<8 x float> %arg) #1 {
2439 ; GFX678-LABEL: v_test_canonicalize_v8f32_flush:
2441 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2442 ; GFX678-NEXT: v_mul_f32_e32 v0, 1.0, v0
2443 ; GFX678-NEXT: v_mul_f32_e32 v1, 1.0, v1
2444 ; GFX678-NEXT: v_mul_f32_e32 v2, 1.0, v2
2445 ; GFX678-NEXT: v_mul_f32_e32 v3, 1.0, v3
2446 ; GFX678-NEXT: v_mul_f32_e32 v4, 1.0, v4
2447 ; GFX678-NEXT: v_mul_f32_e32 v5, 1.0, v5
2448 ; GFX678-NEXT: v_mul_f32_e32 v6, 1.0, v6
2449 ; GFX678-NEXT: v_mul_f32_e32 v7, 1.0, v7
2450 ; GFX678-NEXT: s_setpc_b64 s[30:31]
2452 ; GFX9-LABEL: v_test_canonicalize_v8f32_flush:
2454 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2455 ; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
2456 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
2457 ; GFX9-NEXT: v_max_f32_e32 v2, v2, v2
2458 ; GFX9-NEXT: v_max_f32_e32 v3, v3, v3
2459 ; GFX9-NEXT: v_max_f32_e32 v4, v4, v4
2460 ; GFX9-NEXT: v_max_f32_e32 v5, v5, v5
2461 ; GFX9-NEXT: v_max_f32_e32 v6, v6, v6
2462 ; GFX9-NEXT: v_max_f32_e32 v7, v7, v7
2463 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2465 ; GFX11-LABEL: v_test_canonicalize_v8f32_flush:
2467 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2468 ; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
2469 ; GFX11-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
2470 ; GFX11-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5
2471 ; GFX11-NEXT: v_dual_max_f32 v6, v6, v6 :: v_dual_max_f32 v7, v7, v7
2472 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2473 %canon = call <8 x float> @llvm.canonicalize.v8f32(<8 x float> %arg)
2474 ret <8 x float> %canon
2477 define <2 x double> @v_test_canonicalize_v2f64(<2 x double> %arg) #1 {
2478 ; GFX678-LABEL: v_test_canonicalize_v2f64:
2480 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2481 ; GFX678-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2482 ; GFX678-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
2483 ; GFX678-NEXT: s_setpc_b64 s[30:31]
2485 ; GFX9-LABEL: v_test_canonicalize_v2f64:
2487 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2488 ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2489 ; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
2490 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2492 ; GFX11-LABEL: v_test_canonicalize_v2f64:
2494 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2495 ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2496 ; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
2497 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2498 %canon = call <2 x double> @llvm.canonicalize.v2f64(<2 x double> %arg)
2499 ret <2 x double> %canon
2502 define <3 x double> @v_test_canonicalize_v3f64(<3 x double> %arg) #1 {
2503 ; GFX678-LABEL: v_test_canonicalize_v3f64:
2505 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2506 ; GFX678-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2507 ; GFX678-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
2508 ; GFX678-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
2509 ; GFX678-NEXT: s_setpc_b64 s[30:31]
2511 ; GFX9-LABEL: v_test_canonicalize_v3f64:
2513 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2514 ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2515 ; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
2516 ; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
2517 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2519 ; GFX11-LABEL: v_test_canonicalize_v3f64:
2521 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2522 ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2523 ; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
2524 ; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
2525 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2526 %canon = call <3 x double> @llvm.canonicalize.v3f64(<3 x double> %arg)
2527 ret <3 x double> %canon
2530 define <4 x double> @v_test_canonicalize_v4f64(<4 x double> %arg) #1 {
2531 ; GFX678-LABEL: v_test_canonicalize_v4f64:
2533 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2534 ; GFX678-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2535 ; GFX678-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
2536 ; GFX678-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
2537 ; GFX678-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
2538 ; GFX678-NEXT: s_setpc_b64 s[30:31]
2540 ; GFX9-LABEL: v_test_canonicalize_v4f64:
2542 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2543 ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2544 ; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
2545 ; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
2546 ; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
2547 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2549 ; GFX11-LABEL: v_test_canonicalize_v4f64:
2551 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2552 ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2553 ; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
2554 ; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
2555 ; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
2556 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2557 %canon = call <4 x double> @llvm.canonicalize.v4f64(<4 x double> %arg)
2558 ret <4 x double> %canon
2561 attributes #0 = { nounwind readnone }
2562 attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
2563 attributes #2 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
2564 attributes #3 = { nounwind "denormal-fp-math"="ieee,ieee" }
2565 attributes #4 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
2566 attributes #5 = { nounwind "denormal-fp-math-f32"="dynamic,dynamic" }
2567 attributes #6 = { nounwind "denormal-fp-math-f32"="dynamic,ieee" }
2568 attributes #7 = { nounwind "denormal-fp-math-f32"="ieee,dynamic" }