1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX678,GFX6 %s
3 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX678,GFX8 %s
4 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
5 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
6 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
8 declare float @llvm.fabs.f32(float) #0
9 declare float @llvm.canonicalize.f32(float) #0
10 declare <2 x float> @llvm.canonicalize.v2f32(<2 x float>) #0
11 declare <3 x float> @llvm.canonicalize.v3f32(<3 x float>) #0
12 declare <4 x float> @llvm.canonicalize.v4f32(<4 x float>) #0
13 declare <8 x float> @llvm.canonicalize.v8f32(<8 x float>) #0
14 declare double @llvm.fabs.f64(double) #0
15 declare double @llvm.canonicalize.f64(double) #0
16 declare <2 x double> @llvm.canonicalize.v2f64(<2 x double>) #0
17 declare <3 x double> @llvm.canonicalize.v3f64(<3 x double>) #0
18 declare <4 x double> @llvm.canonicalize.v4f64(<4 x double>) #0
19 declare half @llvm.canonicalize.f16(half) #0
20 declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
21 declare i32 @llvm.amdgcn.workitem.id.x() #0
23 define amdgpu_kernel void @v_test_canonicalize_var_f32(ptr addrspace(1) %out) #1 {
24 ; GFX678-LABEL: v_test_canonicalize_var_f32:
26 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
27 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
28 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
29 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
30 ; GFX678-NEXT: flat_load_dword v2, v[0:1]
31 ; GFX678-NEXT: s_waitcnt vmcnt(0)
32 ; GFX678-NEXT: v_mul_f32_e32 v2, 1.0, v2
33 ; GFX678-NEXT: flat_store_dword v[0:1], v2
34 ; GFX678-NEXT: s_endpgm
36 ; GFX9-LABEL: v_test_canonicalize_var_f32:
38 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
39 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
40 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
41 ; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
42 ; GFX9-NEXT: s_waitcnt vmcnt(0)
43 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
44 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
47 ; GFX11-LABEL: v_test_canonicalize_var_f32:
49 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
50 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
51 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
52 ; GFX11-NEXT: global_load_b32 v1, v0, s[0:1]
53 ; GFX11-NEXT: s_waitcnt vmcnt(0)
54 ; GFX11-NEXT: v_max_f32_e32 v1, v1, v1
55 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
57 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
58 ; GFX11-NEXT: s_endpgm
60 ; GFX12-LABEL: v_test_canonicalize_var_f32:
62 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
63 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
64 ; GFX12-NEXT: s_wait_kmcnt 0x0
65 ; GFX12-NEXT: global_load_b32 v1, v0, s[0:1]
66 ; GFX12-NEXT: s_wait_loadcnt 0x0
67 ; GFX12-NEXT: v_max_num_f32_e32 v1, v1, v1
68 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
70 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
71 ; GFX12-NEXT: s_endpgm
72 %val = load float, ptr addrspace(1) %out
73 %canonicalized = call float @llvm.canonicalize.f32(float %val)
74 store float %canonicalized, ptr addrspace(1) %out
78 define amdgpu_kernel void @s_test_canonicalize_var_f32(ptr addrspace(1) %out, float %val) #1 {
79 ; GFX6-LABEL: s_test_canonicalize_var_f32:
81 ; GFX6-NEXT: s_load_dword s2, s[4:5], 0x2
82 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
83 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
84 ; GFX6-NEXT: v_mul_f32_e64 v2, 1.0, s2
85 ; GFX6-NEXT: v_mov_b32_e32 v0, s0
86 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
87 ; GFX6-NEXT: flat_store_dword v[0:1], v2
90 ; GFX8-LABEL: s_test_canonicalize_var_f32:
92 ; GFX8-NEXT: s_load_dword s2, s[4:5], 0x8
93 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
94 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
95 ; GFX8-NEXT: v_mul_f32_e64 v2, 1.0, s2
96 ; GFX8-NEXT: v_mov_b32_e32 v0, s0
97 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
98 ; GFX8-NEXT: flat_store_dword v[0:1], v2
101 ; GFX9-LABEL: s_test_canonicalize_var_f32:
103 ; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8
104 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
105 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
106 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
107 ; GFX9-NEXT: v_max_f32_e64 v1, s2, s2
108 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
109 ; GFX9-NEXT: s_endpgm
111 ; GFX11-LABEL: s_test_canonicalize_var_f32:
113 ; GFX11-NEXT: s_clause 0x1
114 ; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x8
115 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
116 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
117 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
118 ; GFX11-NEXT: v_max_f32_e64 v1, s2, s2
119 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
120 ; GFX11-NEXT: s_nop 0
121 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
122 ; GFX11-NEXT: s_endpgm
124 ; GFX12-LABEL: s_test_canonicalize_var_f32:
126 ; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
127 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
128 ; GFX12-NEXT: s_wait_kmcnt 0x0
129 ; GFX12-NEXT: v_max_num_f32_e64 v1, s2, s2
130 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
131 ; GFX12-NEXT: s_nop 0
132 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
133 ; GFX12-NEXT: s_endpgm
134 %canonicalized = call float @llvm.canonicalize.f32(float %val)
135 store float %canonicalized, ptr addrspace(1) %out
139 define amdgpu_kernel void @v_test_canonicalize_fabs_var_f32(ptr addrspace(1) %out) #1 {
140 ; GFX678-LABEL: v_test_canonicalize_fabs_var_f32:
142 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
143 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
144 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
145 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
146 ; GFX678-NEXT: flat_load_dword v2, v[0:1]
147 ; GFX678-NEXT: s_waitcnt vmcnt(0)
148 ; GFX678-NEXT: v_mul_f32_e64 v2, 1.0, |v2|
149 ; GFX678-NEXT: flat_store_dword v[0:1], v2
150 ; GFX678-NEXT: s_endpgm
152 ; GFX9-LABEL: v_test_canonicalize_fabs_var_f32:
154 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
155 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
156 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
157 ; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
158 ; GFX9-NEXT: s_waitcnt vmcnt(0)
159 ; GFX9-NEXT: v_max_f32_e64 v1, |v1|, |v1|
160 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
161 ; GFX9-NEXT: s_endpgm
163 ; GFX11-LABEL: v_test_canonicalize_fabs_var_f32:
165 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
166 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
167 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
168 ; GFX11-NEXT: global_load_b32 v1, v0, s[0:1]
169 ; GFX11-NEXT: s_waitcnt vmcnt(0)
170 ; GFX11-NEXT: v_max_f32_e64 v1, |v1|, |v1|
171 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
172 ; GFX11-NEXT: s_nop 0
173 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
174 ; GFX11-NEXT: s_endpgm
176 ; GFX12-LABEL: v_test_canonicalize_fabs_var_f32:
178 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
179 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
180 ; GFX12-NEXT: s_wait_kmcnt 0x0
181 ; GFX12-NEXT: global_load_b32 v1, v0, s[0:1]
182 ; GFX12-NEXT: s_wait_loadcnt 0x0
183 ; GFX12-NEXT: v_max_num_f32_e64 v1, |v1|, |v1|
184 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
185 ; GFX12-NEXT: s_nop 0
186 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
187 ; GFX12-NEXT: s_endpgm
188 %val = load float, ptr addrspace(1) %out
189 %val.fabs = call float @llvm.fabs.f32(float %val)
190 %canonicalized = call float @llvm.canonicalize.f32(float %val.fabs)
191 store float %canonicalized, ptr addrspace(1) %out
195 define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f32(ptr addrspace(1) %out) #1 {
196 ; GFX678-LABEL: v_test_canonicalize_fneg_fabs_var_f32:
198 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
199 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
200 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
201 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
202 ; GFX678-NEXT: flat_load_dword v2, v[0:1]
203 ; GFX678-NEXT: s_waitcnt vmcnt(0)
204 ; GFX678-NEXT: v_mul_f32_e64 v2, -1.0, |v2|
205 ; GFX678-NEXT: flat_store_dword v[0:1], v2
206 ; GFX678-NEXT: s_endpgm
208 ; GFX9-LABEL: v_test_canonicalize_fneg_fabs_var_f32:
210 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
211 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
212 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
213 ; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
214 ; GFX9-NEXT: s_waitcnt vmcnt(0)
215 ; GFX9-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
216 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
217 ; GFX9-NEXT: s_endpgm
219 ; GFX11-LABEL: v_test_canonicalize_fneg_fabs_var_f32:
221 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
222 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
223 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
224 ; GFX11-NEXT: global_load_b32 v1, v0, s[0:1]
225 ; GFX11-NEXT: s_waitcnt vmcnt(0)
226 ; GFX11-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
227 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
228 ; GFX11-NEXT: s_nop 0
229 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
230 ; GFX11-NEXT: s_endpgm
232 ; GFX12-LABEL: v_test_canonicalize_fneg_fabs_var_f32:
234 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
235 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
236 ; GFX12-NEXT: s_wait_kmcnt 0x0
237 ; GFX12-NEXT: global_load_b32 v1, v0, s[0:1]
238 ; GFX12-NEXT: s_wait_loadcnt 0x0
239 ; GFX12-NEXT: v_max_num_f32_e64 v1, -|v1|, -|v1|
240 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
241 ; GFX12-NEXT: s_nop 0
242 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
243 ; GFX12-NEXT: s_endpgm
244 %val = load float, ptr addrspace(1) %out
245 %val.fabs = call float @llvm.fabs.f32(float %val)
246 %val.fabs.fneg = fneg float %val.fabs
247 %canonicalized = call float @llvm.canonicalize.f32(float %val.fabs.fneg)
248 store float %canonicalized, ptr addrspace(1) %out
252 define amdgpu_kernel void @v_test_canonicalize_fneg_var_f32(ptr addrspace(1) %out) #1 {
253 ; GFX678-LABEL: v_test_canonicalize_fneg_var_f32:
255 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
256 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
257 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
258 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
259 ; GFX678-NEXT: flat_load_dword v2, v[0:1]
260 ; GFX678-NEXT: s_waitcnt vmcnt(0)
261 ; GFX678-NEXT: v_mul_f32_e32 v2, -1.0, v2
262 ; GFX678-NEXT: flat_store_dword v[0:1], v2
263 ; GFX678-NEXT: s_endpgm
265 ; GFX9-LABEL: v_test_canonicalize_fneg_var_f32:
267 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
268 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
269 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
270 ; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
271 ; GFX9-NEXT: s_waitcnt vmcnt(0)
272 ; GFX9-NEXT: v_max_f32_e64 v1, -v1, -v1
273 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
274 ; GFX9-NEXT: s_endpgm
276 ; GFX11-LABEL: v_test_canonicalize_fneg_var_f32:
278 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
279 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
280 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
281 ; GFX11-NEXT: global_load_b32 v1, v0, s[0:1]
282 ; GFX11-NEXT: s_waitcnt vmcnt(0)
283 ; GFX11-NEXT: v_max_f32_e64 v1, -v1, -v1
284 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
285 ; GFX11-NEXT: s_nop 0
286 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
287 ; GFX11-NEXT: s_endpgm
289 ; GFX12-LABEL: v_test_canonicalize_fneg_var_f32:
291 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
292 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
293 ; GFX12-NEXT: s_wait_kmcnt 0x0
294 ; GFX12-NEXT: global_load_b32 v1, v0, s[0:1]
295 ; GFX12-NEXT: s_wait_loadcnt 0x0
296 ; GFX12-NEXT: v_max_num_f32_e64 v1, -v1, -v1
297 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
298 ; GFX12-NEXT: s_nop 0
299 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
300 ; GFX12-NEXT: s_endpgm
301 %val = load float, ptr addrspace(1) %out
302 %val.fneg = fneg float %val
303 %canonicalized = call float @llvm.canonicalize.f32(float %val.fneg)
304 store float %canonicalized, ptr addrspace(1) %out
308 define amdgpu_kernel void @test_fold_canonicalize_undef_f32(ptr addrspace(1) %out) #1 {
309 ; GFX678-LABEL: test_fold_canonicalize_undef_f32:
311 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
312 ; GFX678-NEXT: v_mov_b32_e32 v2, 0
313 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
314 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
315 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
316 ; GFX678-NEXT: flat_store_dword v[0:1], v2
317 ; GFX678-NEXT: s_endpgm
319 ; GFX9-LABEL: test_fold_canonicalize_undef_f32:
321 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
322 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
323 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
324 ; GFX9-NEXT: global_store_dword v0, v0, s[0:1]
325 ; GFX9-NEXT: s_endpgm
327 ; GFX11-LABEL: test_fold_canonicalize_undef_f32:
329 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
330 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
331 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
332 ; GFX11-NEXT: global_store_b32 v0, v0, s[0:1]
333 ; GFX11-NEXT: s_nop 0
334 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
335 ; GFX11-NEXT: s_endpgm
337 ; GFX12-LABEL: test_fold_canonicalize_undef_f32:
339 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
340 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
341 ; GFX12-NEXT: s_wait_kmcnt 0x0
342 ; GFX12-NEXT: global_store_b32 v0, v0, s[0:1]
343 ; GFX12-NEXT: s_nop 0
344 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
345 ; GFX12-NEXT: s_endpgm
346 %canonicalized = call float @llvm.canonicalize.f32(float undef)
347 store float %canonicalized, ptr addrspace(1) %out
351 define amdgpu_kernel void @test_fold_canonicalize_p0_f32(ptr addrspace(1) %out) #1 {
352 ; GFX678-LABEL: test_fold_canonicalize_p0_f32:
354 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
355 ; GFX678-NEXT: v_mov_b32_e32 v2, 0
356 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
357 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
358 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
359 ; GFX678-NEXT: flat_store_dword v[0:1], v2
360 ; GFX678-NEXT: s_endpgm
362 ; GFX9-LABEL: test_fold_canonicalize_p0_f32:
364 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
365 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
366 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
367 ; GFX9-NEXT: global_store_dword v0, v0, s[0:1]
368 ; GFX9-NEXT: s_endpgm
370 ; GFX11-LABEL: test_fold_canonicalize_p0_f32:
372 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
373 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
374 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
375 ; GFX11-NEXT: global_store_b32 v0, v0, s[0:1]
376 ; GFX11-NEXT: s_nop 0
377 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
378 ; GFX11-NEXT: s_endpgm
380 ; GFX12-LABEL: test_fold_canonicalize_p0_f32:
382 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
383 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
384 ; GFX12-NEXT: s_wait_kmcnt 0x0
385 ; GFX12-NEXT: global_store_b32 v0, v0, s[0:1]
386 ; GFX12-NEXT: s_nop 0
387 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
388 ; GFX12-NEXT: s_endpgm
389 %canonicalized = call float @llvm.canonicalize.f32(float 0.0)
390 store float %canonicalized, ptr addrspace(1) %out
394 define amdgpu_kernel void @test_fold_canonicalize_n0_f32(ptr addrspace(1) %out) #1 {
395 ; GFX678-LABEL: test_fold_canonicalize_n0_f32:
397 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
398 ; GFX678-NEXT: v_bfrev_b32_e32 v2, 1
399 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
400 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
401 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
402 ; GFX678-NEXT: flat_store_dword v[0:1], v2
403 ; GFX678-NEXT: s_endpgm
405 ; GFX9-LABEL: test_fold_canonicalize_n0_f32:
407 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
408 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
409 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
410 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
411 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
412 ; GFX9-NEXT: s_endpgm
414 ; GFX11-LABEL: test_fold_canonicalize_n0_f32:
416 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
417 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
418 ; GFX11-NEXT: v_bfrev_b32_e32 v1, 1
419 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
420 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
421 ; GFX11-NEXT: s_nop 0
422 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
423 ; GFX11-NEXT: s_endpgm
425 ; GFX12-LABEL: test_fold_canonicalize_n0_f32:
427 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
428 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
429 ; GFX12-NEXT: v_bfrev_b32_e32 v1, 1
430 ; GFX12-NEXT: s_wait_kmcnt 0x0
431 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
432 ; GFX12-NEXT: s_nop 0
433 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
434 ; GFX12-NEXT: s_endpgm
435 %canonicalized = call float @llvm.canonicalize.f32(float -0.0)
436 store float %canonicalized, ptr addrspace(1) %out
440 define amdgpu_kernel void @test_fold_canonicalize_p1_f32(ptr addrspace(1) %out) #1 {
441 ; GFX678-LABEL: test_fold_canonicalize_p1_f32:
443 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
444 ; GFX678-NEXT: v_mov_b32_e32 v2, 1.0
445 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
446 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
447 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
448 ; GFX678-NEXT: flat_store_dword v[0:1], v2
449 ; GFX678-NEXT: s_endpgm
451 ; GFX9-LABEL: test_fold_canonicalize_p1_f32:
453 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
454 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
455 ; GFX9-NEXT: v_mov_b32_e32 v1, 1.0
456 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
457 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
458 ; GFX9-NEXT: s_endpgm
460 ; GFX11-LABEL: test_fold_canonicalize_p1_f32:
462 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
463 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 1.0
464 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
465 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
466 ; GFX11-NEXT: s_nop 0
467 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
468 ; GFX11-NEXT: s_endpgm
470 ; GFX12-LABEL: test_fold_canonicalize_p1_f32:
472 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
473 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 1.0
474 ; GFX12-NEXT: s_wait_kmcnt 0x0
475 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
476 ; GFX12-NEXT: s_nop 0
477 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
478 ; GFX12-NEXT: s_endpgm
479 %canonicalized = call float @llvm.canonicalize.f32(float 1.0)
480 store float %canonicalized, ptr addrspace(1) %out
484 define amdgpu_kernel void @test_fold_canonicalize_n1_f32(ptr addrspace(1) %out) #1 {
485 ; GFX678-LABEL: test_fold_canonicalize_n1_f32:
487 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
488 ; GFX678-NEXT: v_mov_b32_e32 v2, -1.0
489 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
490 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
491 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
492 ; GFX678-NEXT: flat_store_dword v[0:1], v2
493 ; GFX678-NEXT: s_endpgm
495 ; GFX9-LABEL: test_fold_canonicalize_n1_f32:
497 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
498 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
499 ; GFX9-NEXT: v_mov_b32_e32 v1, -1.0
500 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
501 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
502 ; GFX9-NEXT: s_endpgm
504 ; GFX11-LABEL: test_fold_canonicalize_n1_f32:
506 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
507 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, -1.0
508 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
509 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
510 ; GFX11-NEXT: s_nop 0
511 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
512 ; GFX11-NEXT: s_endpgm
514 ; GFX12-LABEL: test_fold_canonicalize_n1_f32:
516 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
517 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, -1.0
518 ; GFX12-NEXT: s_wait_kmcnt 0x0
519 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
520 ; GFX12-NEXT: s_nop 0
521 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
522 ; GFX12-NEXT: s_endpgm
523 %canonicalized = call float @llvm.canonicalize.f32(float -1.0)
524 store float %canonicalized, ptr addrspace(1) %out
528 define amdgpu_kernel void @test_fold_canonicalize_literal_f32(ptr addrspace(1) %out) #1 {
529 ; GFX678-LABEL: test_fold_canonicalize_literal_f32:
531 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
532 ; GFX678-NEXT: v_mov_b32_e32 v2, 0x41800000
533 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
534 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
535 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
536 ; GFX678-NEXT: flat_store_dword v[0:1], v2
537 ; GFX678-NEXT: s_endpgm
539 ; GFX9-LABEL: test_fold_canonicalize_literal_f32:
541 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
542 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
543 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x41800000
544 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
545 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
546 ; GFX9-NEXT: s_endpgm
548 ; GFX11-LABEL: test_fold_canonicalize_literal_f32:
550 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
551 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x41800000
552 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
553 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
554 ; GFX11-NEXT: s_nop 0
555 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
556 ; GFX11-NEXT: s_endpgm
558 ; GFX12-LABEL: test_fold_canonicalize_literal_f32:
560 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
561 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x41800000
562 ; GFX12-NEXT: s_wait_kmcnt 0x0
563 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
564 ; GFX12-NEXT: s_nop 0
565 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
566 ; GFX12-NEXT: s_endpgm
567 %canonicalized = call float @llvm.canonicalize.f32(float 16.0)
568 store float %canonicalized, ptr addrspace(1) %out
572 define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32(ptr addrspace(1) %out) #1 {
573 ; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32:
575 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
576 ; GFX678-NEXT: v_mov_b32_e32 v2, 0
577 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
578 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
579 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
580 ; GFX678-NEXT: flat_store_dword v[0:1], v2
581 ; GFX678-NEXT: s_endpgm
583 ; GFX9-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32:
585 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
586 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
587 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
588 ; GFX9-NEXT: global_store_dword v0, v0, s[0:1]
589 ; GFX9-NEXT: s_endpgm
591 ; GFX11-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32:
593 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
594 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
595 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
596 ; GFX11-NEXT: global_store_b32 v0, v0, s[0:1]
597 ; GFX11-NEXT: s_nop 0
598 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
599 ; GFX11-NEXT: s_endpgm
601 ; GFX12-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32:
603 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
604 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
605 ; GFX12-NEXT: s_wait_kmcnt 0x0
606 ; GFX12-NEXT: global_store_b32 v0, v0, s[0:1]
607 ; GFX12-NEXT: s_nop 0
608 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
609 ; GFX12-NEXT: s_endpgm
610 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
611 store float %canonicalized, ptr addrspace(1) %out
615 define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32_dynamic(ptr addrspace(1) %out) #5 {
616 ; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic:
618 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
619 ; GFX678-NEXT: s_mov_b32 s2, 0x7fffff
620 ; GFX678-NEXT: v_mul_f32_e64 v2, 1.0, s2
621 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
622 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
623 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
624 ; GFX678-NEXT: flat_store_dword v[0:1], v2
625 ; GFX678-NEXT: s_endpgm
627 ; GFX9-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic:
629 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
630 ; GFX9-NEXT: s_mov_b32 s2, 0x7fffff
631 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
632 ; GFX9-NEXT: v_max_f32_e64 v1, s2, s2
633 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
634 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
635 ; GFX9-NEXT: s_endpgm
637 ; GFX11-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic:
639 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
640 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
641 ; GFX11-NEXT: v_max_f32_e64 v1, 0x7fffff, 0x7fffff
642 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
643 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
644 ; GFX11-NEXT: s_nop 0
645 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
646 ; GFX11-NEXT: s_endpgm
648 ; GFX12-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic:
650 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
651 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
652 ; GFX12-NEXT: v_max_num_f32_e64 v1, 0x7fffff, 0x7fffff
653 ; GFX12-NEXT: s_wait_kmcnt 0x0
654 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
655 ; GFX12-NEXT: s_nop 0
656 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
657 ; GFX12-NEXT: s_endpgm
658 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
659 store float %canonicalized, ptr addrspace(1) %out
663 define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_out(ptr addrspace(1) %out) #6 {
664 ; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_out:
666 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
667 ; GFX678-NEXT: s_mov_b32 s2, 0x7fffff
668 ; GFX678-NEXT: v_mul_f32_e64 v2, 1.0, s2
669 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
670 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
671 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
672 ; GFX678-NEXT: flat_store_dword v[0:1], v2
673 ; GFX678-NEXT: s_endpgm
675 ; GFX9-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_out:
677 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
678 ; GFX9-NEXT: s_mov_b32 s2, 0x7fffff
679 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
680 ; GFX9-NEXT: v_max_f32_e64 v1, s2, s2
681 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
682 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
683 ; GFX9-NEXT: s_endpgm
685 ; GFX11-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_out:
687 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
688 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
689 ; GFX11-NEXT: v_max_f32_e64 v1, 0x7fffff, 0x7fffff
690 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
691 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
692 ; GFX11-NEXT: s_nop 0
693 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
694 ; GFX11-NEXT: s_endpgm
696 ; GFX12-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_out:
698 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
699 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
700 ; GFX12-NEXT: v_max_num_f32_e64 v1, 0x7fffff, 0x7fffff
701 ; GFX12-NEXT: s_wait_kmcnt 0x0
702 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
703 ; GFX12-NEXT: s_nop 0
704 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
705 ; GFX12-NEXT: s_endpgm
706 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
707 store float %canonicalized, ptr addrspace(1) %out
711 define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_in(ptr addrspace(1) %out) #7 {
712 ; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_in:
714 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
715 ; GFX678-NEXT: s_mov_b32 s2, 0x7fffff
716 ; GFX678-NEXT: v_mul_f32_e64 v2, 1.0, s2
717 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
718 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
719 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
720 ; GFX678-NEXT: flat_store_dword v[0:1], v2
721 ; GFX678-NEXT: s_endpgm
723 ; GFX9-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_in:
725 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
726 ; GFX9-NEXT: s_mov_b32 s2, 0x7fffff
727 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
728 ; GFX9-NEXT: v_max_f32_e64 v1, s2, s2
729 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
730 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
731 ; GFX9-NEXT: s_endpgm
733 ; GFX11-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_in:
735 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
736 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
737 ; GFX11-NEXT: v_max_f32_e64 v1, 0x7fffff, 0x7fffff
738 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
739 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
740 ; GFX11-NEXT: s_nop 0
741 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
742 ; GFX11-NEXT: s_endpgm
744 ; GFX12-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_in:
746 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
747 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
748 ; GFX12-NEXT: v_max_num_f32_e64 v1, 0x7fffff, 0x7fffff
749 ; GFX12-NEXT: s_wait_kmcnt 0x0
750 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
751 ; GFX12-NEXT: s_nop 0
752 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
753 ; GFX12-NEXT: s_endpgm
754 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
755 store float %canonicalized, ptr addrspace(1) %out
759 define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f32(ptr addrspace(1) %out) #3 {
760 ; GFX678-LABEL: test_denormals_fold_canonicalize_denormal0_f32:
762 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
763 ; GFX678-NEXT: v_mov_b32_e32 v2, 0x7fffff
764 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
765 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
766 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
767 ; GFX678-NEXT: flat_store_dword v[0:1], v2
768 ; GFX678-NEXT: s_endpgm
770 ; GFX9-LABEL: test_denormals_fold_canonicalize_denormal0_f32:
772 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
773 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
774 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7fffff
775 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
776 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
777 ; GFX9-NEXT: s_endpgm
779 ; GFX11-LABEL: test_denormals_fold_canonicalize_denormal0_f32:
781 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
782 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fffff
783 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
784 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
785 ; GFX11-NEXT: s_nop 0
786 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
787 ; GFX11-NEXT: s_endpgm
789 ; GFX12-LABEL: test_denormals_fold_canonicalize_denormal0_f32:
791 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
792 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fffff
793 ; GFX12-NEXT: s_wait_kmcnt 0x0
794 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
795 ; GFX12-NEXT: s_nop 0
796 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
797 ; GFX12-NEXT: s_endpgm
798 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
799 store float %canonicalized, ptr addrspace(1) %out
803 define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f32(ptr addrspace(1) %out) #1 {
804 ; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal1_f32:
806 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
807 ; GFX678-NEXT: v_bfrev_b32_e32 v2, 1
808 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
809 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
810 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
811 ; GFX678-NEXT: flat_store_dword v[0:1], v2
812 ; GFX678-NEXT: s_endpgm
814 ; GFX9-LABEL: test_no_denormals_fold_canonicalize_denormal1_f32:
816 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
817 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
818 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
819 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
820 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
821 ; GFX9-NEXT: s_endpgm
823 ; GFX11-LABEL: test_no_denormals_fold_canonicalize_denormal1_f32:
825 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
826 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
827 ; GFX11-NEXT: v_bfrev_b32_e32 v1, 1
828 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
829 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
830 ; GFX11-NEXT: s_nop 0
831 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
832 ; GFX11-NEXT: s_endpgm
834 ; GFX12-LABEL: test_no_denormals_fold_canonicalize_denormal1_f32:
836 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
837 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
838 ; GFX12-NEXT: v_bfrev_b32_e32 v1, 1
839 ; GFX12-NEXT: s_wait_kmcnt 0x0
840 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
841 ; GFX12-NEXT: s_nop 0
842 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
843 ; GFX12-NEXT: s_endpgm
844 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
845 store float %canonicalized, ptr addrspace(1) %out
849 define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f32(ptr addrspace(1) %out) #3 {
850 ; GFX678-LABEL: test_denormals_fold_canonicalize_denormal1_f32:
852 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
853 ; GFX678-NEXT: v_mov_b32_e32 v2, 0x807fffff
854 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
855 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
856 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
857 ; GFX678-NEXT: flat_store_dword v[0:1], v2
858 ; GFX678-NEXT: s_endpgm
860 ; GFX9-LABEL: test_denormals_fold_canonicalize_denormal1_f32:
862 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
863 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
864 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x807fffff
865 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
866 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
867 ; GFX9-NEXT: s_endpgm
869 ; GFX11-LABEL: test_denormals_fold_canonicalize_denormal1_f32:
871 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
872 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x807fffff
873 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
874 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
875 ; GFX11-NEXT: s_nop 0
876 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
877 ; GFX11-NEXT: s_endpgm
879 ; GFX12-LABEL: test_denormals_fold_canonicalize_denormal1_f32:
881 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
882 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x807fffff
883 ; GFX12-NEXT: s_wait_kmcnt 0x0
884 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
885 ; GFX12-NEXT: s_nop 0
886 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
887 ; GFX12-NEXT: s_endpgm
888 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
889 store float %canonicalized, ptr addrspace(1) %out
893 define amdgpu_kernel void @test_fold_canonicalize_qnan_f32(ptr addrspace(1) %out) #1 {
894 ; GFX678-LABEL: test_fold_canonicalize_qnan_f32:
896 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
897 ; GFX678-NEXT: v_mov_b32_e32 v2, 0x7fc00000
898 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
899 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
900 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
901 ; GFX678-NEXT: flat_store_dword v[0:1], v2
902 ; GFX678-NEXT: s_endpgm
904 ; GFX9-LABEL: test_fold_canonicalize_qnan_f32:
906 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
907 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
908 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7fc00000
909 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
910 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
911 ; GFX9-NEXT: s_endpgm
913 ; GFX11-LABEL: test_fold_canonicalize_qnan_f32:
915 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
916 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
917 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
918 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
919 ; GFX11-NEXT: s_nop 0
920 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
921 ; GFX11-NEXT: s_endpgm
923 ; GFX12-LABEL: test_fold_canonicalize_qnan_f32:
925 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
926 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
927 ; GFX12-NEXT: s_wait_kmcnt 0x0
928 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
929 ; GFX12-NEXT: s_nop 0
930 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
931 ; GFX12-NEXT: s_endpgm
932 %canonicalized = call float @llvm.canonicalize.f32(float 0x7FF8000000000000)
933 store float %canonicalized, ptr addrspace(1) %out
937 define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f32(ptr addrspace(1) %out) #1 {
938 ; GFX678-LABEL: test_fold_canonicalize_qnan_value_neg1_f32:
940 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
941 ; GFX678-NEXT: v_mov_b32_e32 v2, 0x7fc00000
942 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
943 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
944 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
945 ; GFX678-NEXT: flat_store_dword v[0:1], v2
946 ; GFX678-NEXT: s_endpgm
948 ; GFX9-LABEL: test_fold_canonicalize_qnan_value_neg1_f32:
950 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
951 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
952 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7fc00000
953 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
954 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
955 ; GFX9-NEXT: s_endpgm
957 ; GFX11-LABEL: test_fold_canonicalize_qnan_value_neg1_f32:
959 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
960 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
961 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
962 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
963 ; GFX11-NEXT: s_nop 0
964 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
965 ; GFX11-NEXT: s_endpgm
967 ; GFX12-LABEL: test_fold_canonicalize_qnan_value_neg1_f32:
969 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
970 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
971 ; GFX12-NEXT: s_wait_kmcnt 0x0
972 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
973 ; GFX12-NEXT: s_nop 0
974 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
975 ; GFX12-NEXT: s_endpgm
976 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 -1 to float))
977 store float %canonicalized, ptr addrspace(1) %out
981 define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f32(ptr addrspace(1) %out) #1 {
982 ; GFX678-LABEL: test_fold_canonicalize_qnan_value_neg2_f32:
984 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
985 ; GFX678-NEXT: v_mov_b32_e32 v2, 0x7fc00000
986 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
987 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
988 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
989 ; GFX678-NEXT: flat_store_dword v[0:1], v2
990 ; GFX678-NEXT: s_endpgm
992 ; GFX9-LABEL: test_fold_canonicalize_qnan_value_neg2_f32:
994 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
995 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
996 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7fc00000
997 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
998 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
999 ; GFX9-NEXT: s_endpgm
1001 ; GFX11-LABEL: test_fold_canonicalize_qnan_value_neg2_f32:
1003 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1004 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
1005 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1006 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
1007 ; GFX11-NEXT: s_nop 0
1008 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1009 ; GFX11-NEXT: s_endpgm
1011 ; GFX12-LABEL: test_fold_canonicalize_qnan_value_neg2_f32:
1013 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1014 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
1015 ; GFX12-NEXT: s_wait_kmcnt 0x0
1016 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
1017 ; GFX12-NEXT: s_nop 0
1018 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1019 ; GFX12-NEXT: s_endpgm
1020 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 -2 to float))
1021 store float %canonicalized, ptr addrspace(1) %out
1025 define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f32(ptr addrspace(1) %out) #1 {
1026 ; GFX678-LABEL: test_fold_canonicalize_snan0_value_f32:
1028 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1029 ; GFX678-NEXT: v_mov_b32_e32 v2, 0x7fc00000
1030 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1031 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
1032 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
1033 ; GFX678-NEXT: flat_store_dword v[0:1], v2
1034 ; GFX678-NEXT: s_endpgm
1036 ; GFX9-LABEL: test_fold_canonicalize_snan0_value_f32:
1038 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1039 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1040 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7fc00000
1041 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1042 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
1043 ; GFX9-NEXT: s_endpgm
1045 ; GFX11-LABEL: test_fold_canonicalize_snan0_value_f32:
1047 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1048 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
1049 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1050 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
1051 ; GFX11-NEXT: s_nop 0
1052 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1053 ; GFX11-NEXT: s_endpgm
1055 ; GFX12-LABEL: test_fold_canonicalize_snan0_value_f32:
1057 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1058 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
1059 ; GFX12-NEXT: s_wait_kmcnt 0x0
1060 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
1061 ; GFX12-NEXT: s_nop 0
1062 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1063 ; GFX12-NEXT: s_endpgm
1064 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2139095041 to float))
1065 store float %canonicalized, ptr addrspace(1) %out
1069 define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f32(ptr addrspace(1) %out) #1 {
1070 ; GFX678-LABEL: test_fold_canonicalize_snan1_value_f32:
1072 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1073 ; GFX678-NEXT: v_mov_b32_e32 v2, 0x7fc00000
1074 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1075 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
1076 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
1077 ; GFX678-NEXT: flat_store_dword v[0:1], v2
1078 ; GFX678-NEXT: s_endpgm
1080 ; GFX9-LABEL: test_fold_canonicalize_snan1_value_f32:
1082 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1083 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1084 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7fc00000
1085 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1086 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
1087 ; GFX9-NEXT: s_endpgm
1089 ; GFX11-LABEL: test_fold_canonicalize_snan1_value_f32:
1091 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1092 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
1093 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1094 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
1095 ; GFX11-NEXT: s_nop 0
1096 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1097 ; GFX11-NEXT: s_endpgm
1099 ; GFX12-LABEL: test_fold_canonicalize_snan1_value_f32:
1101 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1102 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
1103 ; GFX12-NEXT: s_wait_kmcnt 0x0
1104 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
1105 ; GFX12-NEXT: s_nop 0
1106 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1107 ; GFX12-NEXT: s_endpgm
1108 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2143289343 to float))
1109 store float %canonicalized, ptr addrspace(1) %out
1113 define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f32(ptr addrspace(1) %out) #1 {
1114 ; GFX678-LABEL: test_fold_canonicalize_snan2_value_f32:
1116 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1117 ; GFX678-NEXT: v_mov_b32_e32 v2, 0x7fc00000
1118 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1119 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
1120 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
1121 ; GFX678-NEXT: flat_store_dword v[0:1], v2
1122 ; GFX678-NEXT: s_endpgm
1124 ; GFX9-LABEL: test_fold_canonicalize_snan2_value_f32:
1126 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1127 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1128 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7fc00000
1129 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1130 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
1131 ; GFX9-NEXT: s_endpgm
1133 ; GFX11-LABEL: test_fold_canonicalize_snan2_value_f32:
1135 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1136 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
1137 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1138 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
1139 ; GFX11-NEXT: s_nop 0
1140 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1141 ; GFX11-NEXT: s_endpgm
1143 ; GFX12-LABEL: test_fold_canonicalize_snan2_value_f32:
1145 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1146 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
1147 ; GFX12-NEXT: s_wait_kmcnt 0x0
1148 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
1149 ; GFX12-NEXT: s_nop 0
1150 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1151 ; GFX12-NEXT: s_endpgm
1152 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 4286578689 to float))
1153 store float %canonicalized, ptr addrspace(1) %out
1157 define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f32(ptr addrspace(1) %out) #1 {
1158 ; GFX678-LABEL: test_fold_canonicalize_snan3_value_f32:
1160 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1161 ; GFX678-NEXT: v_mov_b32_e32 v2, 0x7fc00000
1162 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1163 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
1164 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
1165 ; GFX678-NEXT: flat_store_dword v[0:1], v2
1166 ; GFX678-NEXT: s_endpgm
1168 ; GFX9-LABEL: test_fold_canonicalize_snan3_value_f32:
1170 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1171 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1172 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7fc00000
1173 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1174 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
1175 ; GFX9-NEXT: s_endpgm
1177 ; GFX11-LABEL: test_fold_canonicalize_snan3_value_f32:
1179 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1180 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
1181 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1182 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
1183 ; GFX11-NEXT: s_nop 0
1184 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1185 ; GFX11-NEXT: s_endpgm
1187 ; GFX12-LABEL: test_fold_canonicalize_snan3_value_f32:
1189 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1190 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc00000
1191 ; GFX12-NEXT: s_wait_kmcnt 0x0
1192 ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
1193 ; GFX12-NEXT: s_nop 0
1194 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1195 ; GFX12-NEXT: s_endpgm
1196 %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 4290772991 to float))
1197 store float %canonicalized, ptr addrspace(1) %out
1201 define amdgpu_kernel void @v_test_canonicalize_var_f64(ptr addrspace(1) %out) #1 {
1202 ; GFX678-LABEL: v_test_canonicalize_var_f64:
1204 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1205 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1206 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
1207 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
1208 ; GFX678-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
1209 ; GFX678-NEXT: s_waitcnt vmcnt(0)
1210 ; GFX678-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
1211 ; GFX678-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
1212 ; GFX678-NEXT: s_endpgm
1214 ; GFX9-LABEL: v_test_canonicalize_var_f64:
1216 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1217 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
1218 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1219 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
1220 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1221 ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
1222 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1223 ; GFX9-NEXT: s_endpgm
1225 ; GFX11-LABEL: v_test_canonicalize_var_f64:
1227 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1228 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
1229 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1230 ; GFX11-NEXT: global_load_b64 v[0:1], v2, s[0:1]
1231 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1232 ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
1233 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1234 ; GFX11-NEXT: s_nop 0
1235 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1236 ; GFX11-NEXT: s_endpgm
1238 ; GFX12-LABEL: v_test_canonicalize_var_f64:
1240 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1241 ; GFX12-NEXT: v_mov_b32_e32 v2, 0
1242 ; GFX12-NEXT: s_wait_kmcnt 0x0
1243 ; GFX12-NEXT: global_load_b64 v[0:1], v2, s[0:1]
1244 ; GFX12-NEXT: s_wait_loadcnt 0x0
1245 ; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
1246 ; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1247 ; GFX12-NEXT: s_nop 0
1248 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1249 ; GFX12-NEXT: s_endpgm
1250 %val = load double, ptr addrspace(1) %out
1251 %canonicalized = call double @llvm.canonicalize.f64(double %val)
1252 store double %canonicalized, ptr addrspace(1) %out
1256 define amdgpu_kernel void @s_test_canonicalize_var_f64(ptr addrspace(1) %out, double %val) #1 {
1257 ; GFX6-LABEL: s_test_canonicalize_var_f64:
1259 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
1260 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
1261 ; GFX6-NEXT: v_max_f64 v[2:3], s[2:3], s[2:3]
1262 ; GFX6-NEXT: v_mov_b32_e32 v0, s0
1263 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
1264 ; GFX6-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
1265 ; GFX6-NEXT: s_endpgm
1267 ; GFX8-LABEL: s_test_canonicalize_var_f64:
1269 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
1270 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1271 ; GFX8-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3]
1272 ; GFX8-NEXT: v_mov_b32_e32 v2, s0
1273 ; GFX8-NEXT: v_mov_b32_e32 v3, s1
1274 ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1275 ; GFX8-NEXT: s_endpgm
1277 ; GFX9-LABEL: s_test_canonicalize_var_f64:
1279 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
1280 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
1281 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1282 ; GFX9-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3]
1283 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1284 ; GFX9-NEXT: s_endpgm
1286 ; GFX11-LABEL: s_test_canonicalize_var_f64:
1288 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
1289 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
1290 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1291 ; GFX11-NEXT: v_max_f64 v[0:1], s[2:3], s[2:3]
1292 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1293 ; GFX11-NEXT: s_nop 0
1294 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1295 ; GFX11-NEXT: s_endpgm
1297 ; GFX12-LABEL: s_test_canonicalize_var_f64:
1299 ; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
1300 ; GFX12-NEXT: v_mov_b32_e32 v2, 0
1301 ; GFX12-NEXT: s_wait_kmcnt 0x0
1302 ; GFX12-NEXT: v_max_num_f64_e64 v[0:1], s[2:3], s[2:3]
1303 ; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1304 ; GFX12-NEXT: s_nop 0
1305 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1306 ; GFX12-NEXT: s_endpgm
1307 %canonicalized = call double @llvm.canonicalize.f64(double %val)
1308 store double %canonicalized, ptr addrspace(1) %out
1312 define amdgpu_kernel void @v_test_canonicalize_fabs_var_f64(ptr addrspace(1) %out) #1 {
1313 ; GFX678-LABEL: v_test_canonicalize_fabs_var_f64:
1315 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1316 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1317 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
1318 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
1319 ; GFX678-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
1320 ; GFX678-NEXT: s_waitcnt vmcnt(0)
1321 ; GFX678-NEXT: v_max_f64 v[2:3], |v[2:3]|, |v[2:3]|
1322 ; GFX678-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
1323 ; GFX678-NEXT: s_endpgm
1325 ; GFX9-LABEL: v_test_canonicalize_fabs_var_f64:
1327 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1328 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
1329 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1330 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
1331 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1332 ; GFX9-NEXT: v_max_f64 v[0:1], |v[0:1]|, |v[0:1]|
1333 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1334 ; GFX9-NEXT: s_endpgm
1336 ; GFX11-LABEL: v_test_canonicalize_fabs_var_f64:
1338 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1339 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
1340 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1341 ; GFX11-NEXT: global_load_b64 v[0:1], v2, s[0:1]
1342 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1343 ; GFX11-NEXT: v_max_f64 v[0:1], |v[0:1]|, |v[0:1]|
1344 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1345 ; GFX11-NEXT: s_nop 0
1346 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1347 ; GFX11-NEXT: s_endpgm
1349 ; GFX12-LABEL: v_test_canonicalize_fabs_var_f64:
1351 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1352 ; GFX12-NEXT: v_mov_b32_e32 v2, 0
1353 ; GFX12-NEXT: s_wait_kmcnt 0x0
1354 ; GFX12-NEXT: global_load_b64 v[0:1], v2, s[0:1]
1355 ; GFX12-NEXT: s_wait_loadcnt 0x0
1356 ; GFX12-NEXT: v_max_num_f64_e64 v[0:1], |v[0:1]|, |v[0:1]|
1357 ; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1358 ; GFX12-NEXT: s_nop 0
1359 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1360 ; GFX12-NEXT: s_endpgm
1361 %val = load double, ptr addrspace(1) %out
1362 %val.fabs = call double @llvm.fabs.f64(double %val)
1363 %canonicalized = call double @llvm.canonicalize.f64(double %val.fabs)
1364 store double %canonicalized, ptr addrspace(1) %out
1368 define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f64(ptr addrspace(1) %out) #1 {
1369 ; GFX678-LABEL: v_test_canonicalize_fneg_fabs_var_f64:
1371 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1372 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1373 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
1374 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
1375 ; GFX678-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
1376 ; GFX678-NEXT: s_waitcnt vmcnt(0)
1377 ; GFX678-NEXT: v_max_f64 v[2:3], -|v[2:3]|, -|v[2:3]|
1378 ; GFX678-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
1379 ; GFX678-NEXT: s_endpgm
1381 ; GFX9-LABEL: v_test_canonicalize_fneg_fabs_var_f64:
1383 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1384 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
1385 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1386 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
1387 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1388 ; GFX9-NEXT: v_max_f64 v[0:1], -|v[0:1]|, -|v[0:1]|
1389 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1390 ; GFX9-NEXT: s_endpgm
1392 ; GFX11-LABEL: v_test_canonicalize_fneg_fabs_var_f64:
1394 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1395 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
1396 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1397 ; GFX11-NEXT: global_load_b64 v[0:1], v2, s[0:1]
1398 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1399 ; GFX11-NEXT: v_max_f64 v[0:1], -|v[0:1]|, -|v[0:1]|
1400 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1401 ; GFX11-NEXT: s_nop 0
1402 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1403 ; GFX11-NEXT: s_endpgm
1405 ; GFX12-LABEL: v_test_canonicalize_fneg_fabs_var_f64:
1407 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1408 ; GFX12-NEXT: v_mov_b32_e32 v2, 0
1409 ; GFX12-NEXT: s_wait_kmcnt 0x0
1410 ; GFX12-NEXT: global_load_b64 v[0:1], v2, s[0:1]
1411 ; GFX12-NEXT: s_wait_loadcnt 0x0
1412 ; GFX12-NEXT: v_max_num_f64_e64 v[0:1], -|v[0:1]|, -|v[0:1]|
1413 ; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1414 ; GFX12-NEXT: s_nop 0
1415 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1416 ; GFX12-NEXT: s_endpgm
1417 %val = load double, ptr addrspace(1) %out
1418 %val.fabs = call double @llvm.fabs.f64(double %val)
1419 %val.fabs.fneg = fneg double %val.fabs
1420 %canonicalized = call double @llvm.canonicalize.f64(double %val.fabs.fneg)
1421 store double %canonicalized, ptr addrspace(1) %out
1425 define amdgpu_kernel void @v_test_canonicalize_fneg_var_f64(ptr addrspace(1) %out) #1 {
1426 ; GFX678-LABEL: v_test_canonicalize_fneg_var_f64:
1428 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1429 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1430 ; GFX678-NEXT: v_mov_b32_e32 v0, s0
1431 ; GFX678-NEXT: v_mov_b32_e32 v1, s1
1432 ; GFX678-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
1433 ; GFX678-NEXT: s_waitcnt vmcnt(0)
1434 ; GFX678-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
1435 ; GFX678-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
1436 ; GFX678-NEXT: s_endpgm
1438 ; GFX9-LABEL: v_test_canonicalize_fneg_var_f64:
1440 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1441 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
1442 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1443 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
1444 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1445 ; GFX9-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
1446 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1447 ; GFX9-NEXT: s_endpgm
1449 ; GFX11-LABEL: v_test_canonicalize_fneg_var_f64:
1451 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1452 ; GFX11-NEXT: v_mov_b32_e32 v2, 0
1453 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1454 ; GFX11-NEXT: global_load_b64 v[0:1], v2, s[0:1]
1455 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1456 ; GFX11-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
1457 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1458 ; GFX11-NEXT: s_nop 0
1459 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1460 ; GFX11-NEXT: s_endpgm
1462 ; GFX12-LABEL: v_test_canonicalize_fneg_var_f64:
1464 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1465 ; GFX12-NEXT: v_mov_b32_e32 v2, 0
1466 ; GFX12-NEXT: s_wait_kmcnt 0x0
1467 ; GFX12-NEXT: global_load_b64 v[0:1], v2, s[0:1]
1468 ; GFX12-NEXT: s_wait_loadcnt 0x0
1469 ; GFX12-NEXT: v_max_num_f64_e64 v[0:1], -v[0:1], -v[0:1]
1470 ; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1471 ; GFX12-NEXT: s_nop 0
1472 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1473 ; GFX12-NEXT: s_endpgm
1474 %val = load double, ptr addrspace(1) %out
1475 %val.fneg = fneg double %val
1476 %canonicalized = call double @llvm.canonicalize.f64(double %val.fneg)
1477 store double %canonicalized, ptr addrspace(1) %out
1481 define amdgpu_kernel void @test_fold_canonicalize_p0_f64(ptr addrspace(1) %out) #1 {
1482 ; GFX678-LABEL: test_fold_canonicalize_p0_f64:
1484 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1485 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
1486 ; GFX678-NEXT: v_mov_b32_e32 v1, v0
1487 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1488 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1489 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1490 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1491 ; GFX678-NEXT: s_endpgm
1493 ; GFX9-LABEL: test_fold_canonicalize_p0_f64:
1495 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1496 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1497 ; GFX9-NEXT: v_mov_b32_e32 v1, v0
1498 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1499 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
1500 ; GFX9-NEXT: s_endpgm
1502 ; GFX11-LABEL: test_fold_canonicalize_p0_f64:
1504 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1505 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1506 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1507 ; GFX11-NEXT: v_mov_b32_e32 v1, v0
1508 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1509 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1510 ; GFX11-NEXT: s_nop 0
1511 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1512 ; GFX11-NEXT: s_endpgm
1514 ; GFX12-LABEL: test_fold_canonicalize_p0_f64:
1516 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1517 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
1518 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
1519 ; GFX12-NEXT: v_mov_b32_e32 v1, v0
1520 ; GFX12-NEXT: s_wait_kmcnt 0x0
1521 ; GFX12-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1522 ; GFX12-NEXT: s_nop 0
1523 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1524 ; GFX12-NEXT: s_endpgm
1525 %canonicalized = call double @llvm.canonicalize.f64(double 0.0)
1526 store double %canonicalized, ptr addrspace(1) %out
1530 define amdgpu_kernel void @test_fold_canonicalize_n0_f64(ptr addrspace(1) %out) #1 {
1531 ; GFX678-LABEL: test_fold_canonicalize_n0_f64:
1533 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1534 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
1535 ; GFX678-NEXT: v_bfrev_b32_e32 v1, 1
1536 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1537 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1538 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1539 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1540 ; GFX678-NEXT: s_endpgm
1542 ; GFX9-LABEL: test_fold_canonicalize_n0_f64:
1544 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1545 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1546 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
1547 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1548 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
1549 ; GFX9-NEXT: s_endpgm
1551 ; GFX11-LABEL: test_fold_canonicalize_n0_f64:
1553 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1554 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1555 ; GFX11-NEXT: v_bfrev_b32_e32 v1, 1
1556 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1557 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1558 ; GFX11-NEXT: s_nop 0
1559 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1560 ; GFX11-NEXT: s_endpgm
1562 ; GFX12-LABEL: test_fold_canonicalize_n0_f64:
1564 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1565 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
1566 ; GFX12-NEXT: v_bfrev_b32_e32 v1, 1
1567 ; GFX12-NEXT: s_wait_kmcnt 0x0
1568 ; GFX12-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1569 ; GFX12-NEXT: s_nop 0
1570 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1571 ; GFX12-NEXT: s_endpgm
1572 %canonicalized = call double @llvm.canonicalize.f64(double -0.0)
1573 store double %canonicalized, ptr addrspace(1) %out
1577 define amdgpu_kernel void @test_fold_canonicalize_p1_f64(ptr addrspace(1) %out) #1 {
1578 ; GFX678-LABEL: test_fold_canonicalize_p1_f64:
1580 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1581 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
1582 ; GFX678-NEXT: v_mov_b32_e32 v1, 0x3ff00000
1583 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1584 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1585 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1586 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1587 ; GFX678-NEXT: s_endpgm
1589 ; GFX9-LABEL: test_fold_canonicalize_p1_f64:
1591 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1592 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1593 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x3ff00000
1594 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1595 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
1596 ; GFX9-NEXT: s_endpgm
1598 ; GFX11-LABEL: test_fold_canonicalize_p1_f64:
1600 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1601 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3ff00000
1602 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1603 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1604 ; GFX11-NEXT: s_nop 0
1605 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1606 ; GFX11-NEXT: s_endpgm
1608 ; GFX12-LABEL: test_fold_canonicalize_p1_f64:
1610 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1611 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3ff00000
1612 ; GFX12-NEXT: s_wait_kmcnt 0x0
1613 ; GFX12-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1614 ; GFX12-NEXT: s_nop 0
1615 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1616 ; GFX12-NEXT: s_endpgm
1617 %canonicalized = call double @llvm.canonicalize.f64(double 1.0)
1618 store double %canonicalized, ptr addrspace(1) %out
1622 define amdgpu_kernel void @test_fold_canonicalize_n1_f64(ptr addrspace(1) %out) #1 {
1623 ; GFX678-LABEL: test_fold_canonicalize_n1_f64:
1625 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1626 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
1627 ; GFX678-NEXT: v_mov_b32_e32 v1, 0xbff00000
1628 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1629 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1630 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1631 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1632 ; GFX678-NEXT: s_endpgm
1634 ; GFX9-LABEL: test_fold_canonicalize_n1_f64:
1636 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1637 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1638 ; GFX9-NEXT: v_mov_b32_e32 v1, 0xbff00000
1639 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1640 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
1641 ; GFX9-NEXT: s_endpgm
1643 ; GFX11-LABEL: test_fold_canonicalize_n1_f64:
1645 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1646 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0xbff00000
1647 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1648 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1649 ; GFX11-NEXT: s_nop 0
1650 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1651 ; GFX11-NEXT: s_endpgm
1653 ; GFX12-LABEL: test_fold_canonicalize_n1_f64:
1655 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1656 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0xbff00000
1657 ; GFX12-NEXT: s_wait_kmcnt 0x0
1658 ; GFX12-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1659 ; GFX12-NEXT: s_nop 0
1660 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1661 ; GFX12-NEXT: s_endpgm
1662 %canonicalized = call double @llvm.canonicalize.f64(double -1.0)
1663 store double %canonicalized, ptr addrspace(1) %out
1667 define amdgpu_kernel void @test_fold_canonicalize_literal_f64(ptr addrspace(1) %out) #1 {
1668 ; GFX678-LABEL: test_fold_canonicalize_literal_f64:
1670 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1671 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
1672 ; GFX678-NEXT: v_mov_b32_e32 v1, 0x40300000
1673 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1674 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1675 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1676 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1677 ; GFX678-NEXT: s_endpgm
1679 ; GFX9-LABEL: test_fold_canonicalize_literal_f64:
1681 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1682 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1683 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40300000
1684 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1685 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
1686 ; GFX9-NEXT: s_endpgm
1688 ; GFX11-LABEL: test_fold_canonicalize_literal_f64:
1690 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1691 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x40300000
1692 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1693 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1694 ; GFX11-NEXT: s_nop 0
1695 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1696 ; GFX11-NEXT: s_endpgm
1698 ; GFX12-LABEL: test_fold_canonicalize_literal_f64:
1700 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1701 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x40300000
1702 ; GFX12-NEXT: s_wait_kmcnt 0x0
1703 ; GFX12-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1704 ; GFX12-NEXT: s_nop 0
1705 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1706 ; GFX12-NEXT: s_endpgm
1707 %canonicalized = call double @llvm.canonicalize.f64(double 16.0)
1708 store double %canonicalized, ptr addrspace(1) %out
1712 define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f64(ptr addrspace(1) %out) #2 {
1713 ; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal0_f64:
1715 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1716 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
1717 ; GFX678-NEXT: v_mov_b32_e32 v1, v0
1718 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1719 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1720 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1721 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1722 ; GFX678-NEXT: s_endpgm
1724 ; GFX9-LABEL: test_no_denormals_fold_canonicalize_denormal0_f64:
1726 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1727 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1728 ; GFX9-NEXT: v_mov_b32_e32 v1, v0
1729 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1730 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
1731 ; GFX9-NEXT: s_endpgm
1733 ; GFX11-LABEL: test_no_denormals_fold_canonicalize_denormal0_f64:
1735 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1736 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1737 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1738 ; GFX11-NEXT: v_mov_b32_e32 v1, v0
1739 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1740 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1741 ; GFX11-NEXT: s_nop 0
1742 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1743 ; GFX11-NEXT: s_endpgm
1745 ; GFX12-LABEL: test_no_denormals_fold_canonicalize_denormal0_f64:
1747 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1748 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
1749 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
1750 ; GFX12-NEXT: v_mov_b32_e32 v1, v0
1751 ; GFX12-NEXT: s_wait_kmcnt 0x0
1752 ; GFX12-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1753 ; GFX12-NEXT: s_nop 0
1754 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1755 ; GFX12-NEXT: s_endpgm
1756 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
1757 store double %canonicalized, ptr addrspace(1) %out
1761 define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f64(ptr addrspace(1) %out) #3 {
1762 ; GFX678-LABEL: test_denormals_fold_canonicalize_denormal0_f64:
1764 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1765 ; GFX678-NEXT: v_mov_b32_e32 v0, -1
1766 ; GFX678-NEXT: v_mov_b32_e32 v1, 0xfffff
1767 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1768 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1769 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1770 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1771 ; GFX678-NEXT: s_endpgm
1773 ; GFX9-LABEL: test_denormals_fold_canonicalize_denormal0_f64:
1775 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1776 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
1777 ; GFX9-NEXT: v_mov_b32_e32 v0, -1
1778 ; GFX9-NEXT: v_mov_b32_e32 v1, 0xfffff
1779 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1780 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1781 ; GFX9-NEXT: s_endpgm
1783 ; GFX11-LABEL: test_denormals_fold_canonicalize_denormal0_f64:
1785 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1786 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, 0xfffff
1787 ; GFX11-NEXT: v_mov_b32_e32 v0, -1
1788 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1789 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1790 ; GFX11-NEXT: s_nop 0
1791 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1792 ; GFX11-NEXT: s_endpgm
1794 ; GFX12-LABEL: test_denormals_fold_canonicalize_denormal0_f64:
1796 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1797 ; GFX12-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, 0xfffff
1798 ; GFX12-NEXT: v_mov_b32_e32 v0, -1
1799 ; GFX12-NEXT: s_wait_kmcnt 0x0
1800 ; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1801 ; GFX12-NEXT: s_nop 0
1802 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1803 ; GFX12-NEXT: s_endpgm
1804 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 4503599627370495 to double))
1805 store double %canonicalized, ptr addrspace(1) %out
1809 define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal1_f64(ptr addrspace(1) %out) #2 {
1810 ; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal1_f64:
1812 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1813 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
1814 ; GFX678-NEXT: v_bfrev_b32_e32 v1, 1
1815 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1816 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1817 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1818 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1819 ; GFX678-NEXT: s_endpgm
1821 ; GFX9-LABEL: test_no_denormals_fold_canonicalize_denormal1_f64:
1823 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1824 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1825 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
1826 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1827 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
1828 ; GFX9-NEXT: s_endpgm
1830 ; GFX11-LABEL: test_no_denormals_fold_canonicalize_denormal1_f64:
1832 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1833 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1834 ; GFX11-NEXT: v_bfrev_b32_e32 v1, 1
1835 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1836 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1837 ; GFX11-NEXT: s_nop 0
1838 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1839 ; GFX11-NEXT: s_endpgm
1841 ; GFX12-LABEL: test_no_denormals_fold_canonicalize_denormal1_f64:
1843 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1844 ; GFX12-NEXT: v_mov_b32_e32 v0, 0
1845 ; GFX12-NEXT: v_bfrev_b32_e32 v1, 1
1846 ; GFX12-NEXT: s_wait_kmcnt 0x0
1847 ; GFX12-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1848 ; GFX12-NEXT: s_nop 0
1849 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1850 ; GFX12-NEXT: s_endpgm
1851 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
1852 store double %canonicalized, ptr addrspace(1) %out
1856 define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal1_f64(ptr addrspace(1) %out) #3 {
1857 ; GFX678-LABEL: test_denormals_fold_canonicalize_denormal1_f64:
1859 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1860 ; GFX678-NEXT: v_mov_b32_e32 v0, -1
1861 ; GFX678-NEXT: v_mov_b32_e32 v1, 0x800fffff
1862 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1863 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1864 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1865 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1866 ; GFX678-NEXT: s_endpgm
1868 ; GFX9-LABEL: test_denormals_fold_canonicalize_denormal1_f64:
1870 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1871 ; GFX9-NEXT: v_mov_b32_e32 v2, 0
1872 ; GFX9-NEXT: v_mov_b32_e32 v0, -1
1873 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x800fffff
1874 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1875 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
1876 ; GFX9-NEXT: s_endpgm
1878 ; GFX11-LABEL: test_denormals_fold_canonicalize_denormal1_f64:
1880 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1881 ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, 0x800fffff
1882 ; GFX11-NEXT: v_mov_b32_e32 v0, -1
1883 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1884 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1885 ; GFX11-NEXT: s_nop 0
1886 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1887 ; GFX11-NEXT: s_endpgm
1889 ; GFX12-LABEL: test_denormals_fold_canonicalize_denormal1_f64:
1891 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1892 ; GFX12-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, 0x800fffff
1893 ; GFX12-NEXT: v_mov_b32_e32 v0, -1
1894 ; GFX12-NEXT: s_wait_kmcnt 0x0
1895 ; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1896 ; GFX12-NEXT: s_nop 0
1897 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1898 ; GFX12-NEXT: s_endpgm
1899 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
1900 store double %canonicalized, ptr addrspace(1) %out
1904 define amdgpu_kernel void @test_fold_canonicalize_qnan_f64(ptr addrspace(1) %out) #1 {
1905 ; GFX678-LABEL: test_fold_canonicalize_qnan_f64:
1907 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1908 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
1909 ; GFX678-NEXT: v_mov_b32_e32 v1, 0x7ff80000
1910 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1911 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1912 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1913 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1914 ; GFX678-NEXT: s_endpgm
1916 ; GFX9-LABEL: test_fold_canonicalize_qnan_f64:
1918 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1919 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1920 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
1921 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1922 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
1923 ; GFX9-NEXT: s_endpgm
1925 ; GFX11-LABEL: test_fold_canonicalize_qnan_f64:
1927 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1928 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
1929 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1930 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1931 ; GFX11-NEXT: s_nop 0
1932 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1933 ; GFX11-NEXT: s_endpgm
1935 ; GFX12-LABEL: test_fold_canonicalize_qnan_f64:
1937 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1938 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
1939 ; GFX12-NEXT: s_wait_kmcnt 0x0
1940 ; GFX12-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1941 ; GFX12-NEXT: s_nop 0
1942 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1943 ; GFX12-NEXT: s_endpgm
1944 %canonicalized = call double @llvm.canonicalize.f64(double 0x7FF8000000000000)
1945 store double %canonicalized, ptr addrspace(1) %out
1949 define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg1_f64(ptr addrspace(1) %out) #1 {
1950 ; GFX678-LABEL: test_fold_canonicalize_qnan_value_neg1_f64:
1952 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1953 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
1954 ; GFX678-NEXT: v_mov_b32_e32 v1, 0x7ff80000
1955 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
1956 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
1957 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
1958 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
1959 ; GFX678-NEXT: s_endpgm
1961 ; GFX9-LABEL: test_fold_canonicalize_qnan_value_neg1_f64:
1963 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1964 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1965 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
1966 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1967 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
1968 ; GFX9-NEXT: s_endpgm
1970 ; GFX11-LABEL: test_fold_canonicalize_qnan_value_neg1_f64:
1972 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1973 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
1974 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1975 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1976 ; GFX11-NEXT: s_nop 0
1977 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1978 ; GFX11-NEXT: s_endpgm
1980 ; GFX12-LABEL: test_fold_canonicalize_qnan_value_neg1_f64:
1982 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
1983 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
1984 ; GFX12-NEXT: s_wait_kmcnt 0x0
1985 ; GFX12-NEXT: global_store_b64 v0, v[0:1], s[0:1]
1986 ; GFX12-NEXT: s_nop 0
1987 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1988 ; GFX12-NEXT: s_endpgm
1989 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -1 to double))
1990 store double %canonicalized, ptr addrspace(1) %out
1994 define amdgpu_kernel void @test_fold_canonicalize_qnan_value_neg2_f64(ptr addrspace(1) %out) #1 {
1995 ; GFX678-LABEL: test_fold_canonicalize_qnan_value_neg2_f64:
1997 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1998 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
1999 ; GFX678-NEXT: v_mov_b32_e32 v1, 0x7ff80000
2000 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
2001 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
2002 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
2003 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
2004 ; GFX678-NEXT: s_endpgm
2006 ; GFX9-LABEL: test_fold_canonicalize_qnan_value_neg2_f64:
2008 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2009 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2010 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
2011 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2012 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
2013 ; GFX9-NEXT: s_endpgm
2015 ; GFX11-LABEL: test_fold_canonicalize_qnan_value_neg2_f64:
2017 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
2018 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
2019 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2020 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
2021 ; GFX11-NEXT: s_nop 0
2022 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2023 ; GFX11-NEXT: s_endpgm
2025 ; GFX12-LABEL: test_fold_canonicalize_qnan_value_neg2_f64:
2027 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
2028 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
2029 ; GFX12-NEXT: s_wait_kmcnt 0x0
2030 ; GFX12-NEXT: global_store_b64 v0, v[0:1], s[0:1]
2031 ; GFX12-NEXT: s_nop 0
2032 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2033 ; GFX12-NEXT: s_endpgm
2034 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -2 to double))
2035 store double %canonicalized, ptr addrspace(1) %out
2039 define amdgpu_kernel void @test_fold_canonicalize_snan0_value_f64(ptr addrspace(1) %out) #1 {
2040 ; GFX678-LABEL: test_fold_canonicalize_snan0_value_f64:
2042 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2043 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
2044 ; GFX678-NEXT: v_mov_b32_e32 v1, 0x7ff80000
2045 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
2046 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
2047 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
2048 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
2049 ; GFX678-NEXT: s_endpgm
2051 ; GFX9-LABEL: test_fold_canonicalize_snan0_value_f64:
2053 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2054 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2055 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
2056 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2057 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
2058 ; GFX9-NEXT: s_endpgm
2060 ; GFX11-LABEL: test_fold_canonicalize_snan0_value_f64:
2062 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
2063 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
2064 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2065 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
2066 ; GFX11-NEXT: s_nop 0
2067 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2068 ; GFX11-NEXT: s_endpgm
2070 ; GFX12-LABEL: test_fold_canonicalize_snan0_value_f64:
2072 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
2073 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
2074 ; GFX12-NEXT: s_wait_kmcnt 0x0
2075 ; GFX12-NEXT: global_store_b64 v0, v[0:1], s[0:1]
2076 ; GFX12-NEXT: s_nop 0
2077 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2078 ; GFX12-NEXT: s_endpgm
2079 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9218868437227405313 to double))
2080 store double %canonicalized, ptr addrspace(1) %out
2084 define amdgpu_kernel void @test_fold_canonicalize_snan1_value_f64(ptr addrspace(1) %out) #1 {
2085 ; GFX678-LABEL: test_fold_canonicalize_snan1_value_f64:
2087 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2088 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
2089 ; GFX678-NEXT: v_mov_b32_e32 v1, 0x7ff80000
2090 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
2091 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
2092 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
2093 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
2094 ; GFX678-NEXT: s_endpgm
2096 ; GFX9-LABEL: test_fold_canonicalize_snan1_value_f64:
2098 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2099 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2100 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
2101 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2102 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
2103 ; GFX9-NEXT: s_endpgm
2105 ; GFX11-LABEL: test_fold_canonicalize_snan1_value_f64:
2107 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
2108 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
2109 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2110 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
2111 ; GFX11-NEXT: s_nop 0
2112 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2113 ; GFX11-NEXT: s_endpgm
2115 ; GFX12-LABEL: test_fold_canonicalize_snan1_value_f64:
2117 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
2118 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
2119 ; GFX12-NEXT: s_wait_kmcnt 0x0
2120 ; GFX12-NEXT: global_store_b64 v0, v[0:1], s[0:1]
2121 ; GFX12-NEXT: s_nop 0
2122 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2123 ; GFX12-NEXT: s_endpgm
2124 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9223372036854775807 to double))
2125 store double %canonicalized, ptr addrspace(1) %out
2129 define amdgpu_kernel void @test_fold_canonicalize_snan2_value_f64(ptr addrspace(1) %out) #1 {
2130 ; GFX678-LABEL: test_fold_canonicalize_snan2_value_f64:
2132 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2133 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
2134 ; GFX678-NEXT: v_mov_b32_e32 v1, 0x7ff80000
2135 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
2136 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
2137 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
2138 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
2139 ; GFX678-NEXT: s_endpgm
2141 ; GFX9-LABEL: test_fold_canonicalize_snan2_value_f64:
2143 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2144 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2145 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
2146 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2147 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
2148 ; GFX9-NEXT: s_endpgm
2150 ; GFX11-LABEL: test_fold_canonicalize_snan2_value_f64:
2152 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
2153 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
2154 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2155 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
2156 ; GFX11-NEXT: s_nop 0
2157 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2158 ; GFX11-NEXT: s_endpgm
2160 ; GFX12-LABEL: test_fold_canonicalize_snan2_value_f64:
2162 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
2163 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
2164 ; GFX12-NEXT: s_wait_kmcnt 0x0
2165 ; GFX12-NEXT: global_store_b64 v0, v[0:1], s[0:1]
2166 ; GFX12-NEXT: s_nop 0
2167 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2168 ; GFX12-NEXT: s_endpgm
2169 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 18442240474082181121 to double))
2170 store double %canonicalized, ptr addrspace(1) %out
2174 define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f64(ptr addrspace(1) %out) #1 {
2175 ; GFX678-LABEL: test_fold_canonicalize_snan3_value_f64:
2177 ; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2178 ; GFX678-NEXT: v_mov_b32_e32 v0, 0
2179 ; GFX678-NEXT: v_mov_b32_e32 v1, 0x7ff80000
2180 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
2181 ; GFX678-NEXT: v_mov_b32_e32 v3, s1
2182 ; GFX678-NEXT: v_mov_b32_e32 v2, s0
2183 ; GFX678-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
2184 ; GFX678-NEXT: s_endpgm
2186 ; GFX9-LABEL: test_fold_canonicalize_snan3_value_f64:
2188 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2189 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2190 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x7ff80000
2191 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2192 ; GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
2193 ; GFX9-NEXT: s_endpgm
2195 ; GFX11-LABEL: test_fold_canonicalize_snan3_value_f64:
2197 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
2198 ; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
2199 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2200 ; GFX11-NEXT: global_store_b64 v0, v[0:1], s[0:1]
2201 ; GFX11-NEXT: s_nop 0
2202 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2203 ; GFX11-NEXT: s_endpgm
2205 ; GFX12-LABEL: test_fold_canonicalize_snan3_value_f64:
2207 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
2208 ; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7ff80000
2209 ; GFX12-NEXT: s_wait_kmcnt 0x0
2210 ; GFX12-NEXT: global_store_b64 v0, v[0:1], s[0:1]
2211 ; GFX12-NEXT: s_nop 0
2212 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2213 ; GFX12-NEXT: s_endpgm
2214 %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 18446744073709551615 to double))
2215 store double %canonicalized, ptr addrspace(1) %out
2219 define amdgpu_kernel void @test_canonicalize_value_f64_flush(ptr addrspace(1) %arg, ptr addrspace(1) %out) #4 {
2220 ; GFX6-LABEL: test_canonicalize_value_f64_flush:
2222 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2223 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 3, v0
2224 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
2225 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
2226 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v2
2227 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2228 ; GFX6-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
2229 ; GFX6-NEXT: v_mov_b32_e32 v3, s3
2230 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, s2, v2
2231 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
2232 ; GFX6-NEXT: s_waitcnt vmcnt(0)
2233 ; GFX6-NEXT: v_mul_f64 v[0:1], 1.0, v[0:1]
2234 ; GFX6-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
2235 ; GFX6-NEXT: s_endpgm
2237 ; GFX8-LABEL: test_canonicalize_value_f64_flush:
2239 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2240 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 3, v0
2241 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
2242 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
2243 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
2244 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2245 ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
2246 ; GFX8-NEXT: v_mov_b32_e32 v3, s3
2247 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, s2, v2
2248 ; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
2249 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2250 ; GFX8-NEXT: v_mul_f64 v[0:1], 1.0, v[0:1]
2251 ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
2252 ; GFX8-NEXT: s_endpgm
2254 ; GFX9-LABEL: test_canonicalize_value_f64_flush:
2256 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2257 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v0
2258 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2259 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
2260 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2261 ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2262 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
2263 ; GFX9-NEXT: s_endpgm
2265 ; GFX11-LABEL: test_canonicalize_value_f64_flush:
2267 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
2268 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 3, v0
2269 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2270 ; GFX11-NEXT: global_load_b64 v[0:1], v2, s[0:1]
2271 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2272 ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2273 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[2:3]
2274 ; GFX11-NEXT: s_nop 0
2275 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2276 ; GFX11-NEXT: s_endpgm
2278 ; GFX12-LABEL: test_canonicalize_value_f64_flush:
2280 ; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
2281 ; GFX12-NEXT: v_lshlrev_b32_e32 v2, 3, v0
2282 ; GFX12-NEXT: s_wait_kmcnt 0x0
2283 ; GFX12-NEXT: global_load_b64 v[0:1], v2, s[0:1]
2284 ; GFX12-NEXT: s_wait_loadcnt 0x0
2285 ; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
2286 ; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
2287 ; GFX12-NEXT: s_nop 0
2288 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2289 ; GFX12-NEXT: s_endpgm
2290 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
2291 %gep = getelementptr inbounds double, ptr addrspace(1) %arg, i32 %id
2292 %v = load double, ptr addrspace(1) %gep, align 8
2293 %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
2294 %gep2 = getelementptr inbounds double, ptr addrspace(1) %out, i32 %id
2295 store double %canonicalized, ptr addrspace(1) %gep2, align 8
2299 define amdgpu_kernel void @test_canonicalize_value_f32_flush(ptr addrspace(1) %arg, ptr addrspace(1) %out) #4 {
2300 ; GFX6-LABEL: test_canonicalize_value_f32_flush:
2302 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2303 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 2, v0
2304 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
2305 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
2306 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v2
2307 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2308 ; GFX6-NEXT: flat_load_dword v0, v[0:1]
2309 ; GFX6-NEXT: v_mov_b32_e32 v1, s3
2310 ; GFX6-NEXT: s_waitcnt vmcnt(0)
2311 ; GFX6-NEXT: v_mul_f32_e32 v3, 1.0, v0
2312 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s2, v2
2313 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2314 ; GFX6-NEXT: flat_store_dword v[0:1], v3
2315 ; GFX6-NEXT: s_endpgm
2317 ; GFX8-LABEL: test_canonicalize_value_f32_flush:
2319 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2320 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0
2321 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
2322 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
2323 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
2324 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2325 ; GFX8-NEXT: flat_load_dword v0, v[0:1]
2326 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
2327 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2328 ; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v0
2329 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2
2330 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2331 ; GFX8-NEXT: flat_store_dword v[0:1], v3
2332 ; GFX8-NEXT: s_endpgm
2334 ; GFX9-LABEL: test_canonicalize_value_f32_flush:
2336 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2337 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2338 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2339 ; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
2340 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2341 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
2342 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3]
2343 ; GFX9-NEXT: s_endpgm
2345 ; GFX11-LABEL: test_canonicalize_value_f32_flush:
2347 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
2348 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2349 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2350 ; GFX11-NEXT: global_load_b32 v1, v0, s[0:1]
2351 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2352 ; GFX11-NEXT: v_max_f32_e32 v1, v1, v1
2353 ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
2354 ; GFX11-NEXT: s_nop 0
2355 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2356 ; GFX11-NEXT: s_endpgm
2358 ; GFX12-LABEL: test_canonicalize_value_f32_flush:
2360 ; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
2361 ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2362 ; GFX12-NEXT: s_wait_kmcnt 0x0
2363 ; GFX12-NEXT: global_load_b32 v1, v0, s[0:1]
2364 ; GFX12-NEXT: s_wait_loadcnt 0x0
2365 ; GFX12-NEXT: v_max_num_f32_e32 v1, v1, v1
2366 ; GFX12-NEXT: global_store_b32 v0, v1, s[2:3]
2367 ; GFX12-NEXT: s_nop 0
2368 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2369 ; GFX12-NEXT: s_endpgm
2370 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
2371 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
2372 %v = load float, ptr addrspace(1) %gep, align 4
2373 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
2374 %gep2 = getelementptr inbounds float, ptr addrspace(1) %out, i32 %id
2375 store float %canonicalized, ptr addrspace(1) %gep2, align 4
2379 define amdgpu_kernel void @test_canonicalize_value_f16_flush(ptr addrspace(1) %arg, ptr addrspace(1) %out) #4 {
2380 ; GFX6-LABEL: test_canonicalize_value_f16_flush:
2382 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2383 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v0
2384 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
2385 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
2386 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v2
2387 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2388 ; GFX6-NEXT: flat_load_ushort v0, v[0:1]
2389 ; GFX6-NEXT: v_mov_b32_e32 v1, s3
2390 ; GFX6-NEXT: s_waitcnt vmcnt(0)
2391 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
2392 ; GFX6-NEXT: v_mul_f32_e32 v0, 1.0, v0
2393 ; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v0
2394 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s2, v2
2395 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2396 ; GFX6-NEXT: flat_store_short v[0:1], v3
2397 ; GFX6-NEXT: s_endpgm
2399 ; GFX8-LABEL: test_canonicalize_value_f16_flush:
2401 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2402 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 1, v0
2403 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
2404 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
2405 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
2406 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2407 ; GFX8-NEXT: flat_load_ushort v0, v[0:1]
2408 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
2409 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2410 ; GFX8-NEXT: v_mul_f16_e32 v3, 1.0, v0
2411 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2
2412 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2413 ; GFX8-NEXT: flat_store_short v[0:1], v3
2414 ; GFX8-NEXT: s_endpgm
2416 ; GFX9-LABEL: test_canonicalize_value_f16_flush:
2418 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2419 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
2420 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2421 ; GFX9-NEXT: global_load_ushort v1, v0, s[0:1]
2422 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2423 ; GFX9-NEXT: v_max_f16_e32 v1, v1, v1
2424 ; GFX9-NEXT: global_store_short v0, v1, s[2:3]
2425 ; GFX9-NEXT: s_endpgm
2427 ; GFX11-LABEL: test_canonicalize_value_f16_flush:
2429 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
2430 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0
2431 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2432 ; GFX11-NEXT: global_load_u16 v1, v0, s[0:1]
2433 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2434 ; GFX11-NEXT: v_max_f16_e32 v1, v1, v1
2435 ; GFX11-NEXT: global_store_b16 v0, v1, s[2:3]
2436 ; GFX11-NEXT: s_nop 0
2437 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2438 ; GFX11-NEXT: s_endpgm
2440 ; GFX12-LABEL: test_canonicalize_value_f16_flush:
2442 ; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
2443 ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 1, v0
2444 ; GFX12-NEXT: s_wait_kmcnt 0x0
2445 ; GFX12-NEXT: global_load_u16 v1, v0, s[0:1]
2446 ; GFX12-NEXT: s_wait_loadcnt 0x0
2447 ; GFX12-NEXT: v_max_num_f16_e32 v1, v1, v1
2448 ; GFX12-NEXT: global_store_b16 v0, v1, s[2:3]
2449 ; GFX12-NEXT: s_nop 0
2450 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2451 ; GFX12-NEXT: s_endpgm
2452 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
2453 %gep = getelementptr inbounds half, ptr addrspace(1) %arg, i32 %id
2454 %v = load half, ptr addrspace(1) %gep, align 2
2455 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
2456 %gep2 = getelementptr inbounds half, ptr addrspace(1) %out, i32 %id
2457 store half %canonicalized, ptr addrspace(1) %gep2, align 2
2462 define amdgpu_kernel void @test_canonicalize_value_v2f16_flush(ptr addrspace(1) %arg, ptr addrspace(1) %out) #4 {
2463 ; GFX6-LABEL: test_canonicalize_value_v2f16_flush:
2465 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2466 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 2, v0
2467 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
2468 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
2469 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v2
2470 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2471 ; GFX6-NEXT: flat_load_dword v0, v[0:1]
2472 ; GFX6-NEXT: v_mov_b32_e32 v3, s3
2473 ; GFX6-NEXT: s_waitcnt vmcnt(0)
2474 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v0
2475 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2476 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
2477 ; GFX6-NEXT: v_mul_f32_e32 v1, 1.0, v1
2478 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
2479 ; GFX6-NEXT: v_mul_f32_e32 v0, 1.0, v0
2480 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
2481 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2482 ; GFX6-NEXT: v_or_b32_e32 v4, v1, v0
2483 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s2, v2
2484 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
2485 ; GFX6-NEXT: flat_store_dword v[0:1], v4
2486 ; GFX6-NEXT: s_endpgm
2488 ; GFX8-LABEL: test_canonicalize_value_v2f16_flush:
2490 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2491 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0
2492 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
2493 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
2494 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
2495 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2496 ; GFX8-NEXT: flat_load_dword v0, v[0:1]
2497 ; GFX8-NEXT: v_mov_b32_e32 v1, 0x3c00
2498 ; GFX8-NEXT: v_mov_b32_e32 v3, s3
2499 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2500 ; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
2501 ; GFX8-NEXT: v_mul_f16_e32 v0, 1.0, v0
2502 ; GFX8-NEXT: v_or_b32_e32 v4, v0, v1
2503 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2
2504 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
2505 ; GFX8-NEXT: flat_store_dword v[0:1], v4
2506 ; GFX8-NEXT: s_endpgm
2508 ; GFX9-LABEL: test_canonicalize_value_v2f16_flush:
2510 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2511 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2512 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2513 ; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
2514 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2515 ; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
2516 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3]
2517 ; GFX9-NEXT: s_endpgm
2519 ; GFX11-LABEL: test_canonicalize_value_v2f16_flush:
2521 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
2522 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2523 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2524 ; GFX11-NEXT: global_load_b32 v1, v0, s[0:1]
2525 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2526 ; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
2527 ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
2528 ; GFX11-NEXT: s_nop 0
2529 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2530 ; GFX11-NEXT: s_endpgm
2532 ; GFX12-LABEL: test_canonicalize_value_v2f16_flush:
2534 ; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
2535 ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2536 ; GFX12-NEXT: s_wait_kmcnt 0x0
2537 ; GFX12-NEXT: global_load_b32 v1, v0, s[0:1]
2538 ; GFX12-NEXT: s_wait_loadcnt 0x0
2539 ; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
2540 ; GFX12-NEXT: global_store_b32 v0, v1, s[2:3]
2541 ; GFX12-NEXT: s_nop 0
2542 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2543 ; GFX12-NEXT: s_endpgm
2544 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
2545 %gep = getelementptr inbounds <2 x half>, ptr addrspace(1) %arg, i32 %id
2546 %v = load <2 x half>, ptr addrspace(1) %gep, align 4
2547 %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v)
2548 %gep2 = getelementptr inbounds <2 x half>, ptr addrspace(1) %out, i32 %id
2549 store <2 x half> %canonicalized, ptr addrspace(1) %gep2, align 2
2553 define amdgpu_kernel void @test_canonicalize_value_f64_denorm(ptr addrspace(1) %arg, ptr addrspace(1) %out) #3 {
2554 ; GFX6-LABEL: test_canonicalize_value_f64_denorm:
2556 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2557 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 3, v0
2558 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
2559 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
2560 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v2
2561 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2562 ; GFX6-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
2563 ; GFX6-NEXT: v_mov_b32_e32 v3, s3
2564 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, s2, v2
2565 ; GFX6-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
2566 ; GFX6-NEXT: s_waitcnt vmcnt(0)
2567 ; GFX6-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2568 ; GFX6-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
2569 ; GFX6-NEXT: s_endpgm
2571 ; GFX8-LABEL: test_canonicalize_value_f64_denorm:
2573 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2574 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 3, v0
2575 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
2576 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
2577 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
2578 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2579 ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
2580 ; GFX8-NEXT: v_mov_b32_e32 v3, s3
2581 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, s2, v2
2582 ; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
2583 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2584 ; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2585 ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
2586 ; GFX8-NEXT: s_endpgm
2588 ; GFX9-LABEL: test_canonicalize_value_f64_denorm:
2590 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2591 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v0
2592 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2593 ; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
2594 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2595 ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2596 ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
2597 ; GFX9-NEXT: s_endpgm
2599 ; GFX11-LABEL: test_canonicalize_value_f64_denorm:
2601 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
2602 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 3, v0
2603 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2604 ; GFX11-NEXT: global_load_b64 v[0:1], v2, s[0:1]
2605 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2606 ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2607 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[2:3]
2608 ; GFX11-NEXT: s_nop 0
2609 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2610 ; GFX11-NEXT: s_endpgm
2612 ; GFX12-LABEL: test_canonicalize_value_f64_denorm:
2614 ; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
2615 ; GFX12-NEXT: v_lshlrev_b32_e32 v2, 3, v0
2616 ; GFX12-NEXT: s_wait_kmcnt 0x0
2617 ; GFX12-NEXT: global_load_b64 v[0:1], v2, s[0:1]
2618 ; GFX12-NEXT: s_wait_loadcnt 0x0
2619 ; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
2620 ; GFX12-NEXT: global_store_b64 v2, v[0:1], s[2:3]
2621 ; GFX12-NEXT: s_nop 0
2622 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2623 ; GFX12-NEXT: s_endpgm
2624 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
2625 %gep = getelementptr inbounds double, ptr addrspace(1) %arg, i32 %id
2626 %v = load double, ptr addrspace(1) %gep, align 8
2627 %canonicalized = tail call double @llvm.canonicalize.f64(double %v)
2628 %gep2 = getelementptr inbounds double, ptr addrspace(1) %out, i32 %id
2629 store double %canonicalized, ptr addrspace(1) %gep2, align 8
2633 define amdgpu_kernel void @test_canonicalize_value_f32_denorm(ptr addrspace(1) %arg, ptr addrspace(1) %out) #3 {
2634 ; GFX6-LABEL: test_canonicalize_value_f32_denorm:
2636 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2637 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 2, v0
2638 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
2639 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
2640 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v2
2641 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2642 ; GFX6-NEXT: flat_load_dword v0, v[0:1]
2643 ; GFX6-NEXT: v_mov_b32_e32 v1, s3
2644 ; GFX6-NEXT: s_waitcnt vmcnt(0)
2645 ; GFX6-NEXT: v_mul_f32_e32 v3, 1.0, v0
2646 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s2, v2
2647 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2648 ; GFX6-NEXT: flat_store_dword v[0:1], v3
2649 ; GFX6-NEXT: s_endpgm
2651 ; GFX8-LABEL: test_canonicalize_value_f32_denorm:
2653 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2654 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0
2655 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
2656 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
2657 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
2658 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2659 ; GFX8-NEXT: flat_load_dword v0, v[0:1]
2660 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
2661 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2662 ; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v0
2663 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2
2664 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2665 ; GFX8-NEXT: flat_store_dword v[0:1], v3
2666 ; GFX8-NEXT: s_endpgm
2668 ; GFX9-LABEL: test_canonicalize_value_f32_denorm:
2670 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2671 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2672 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2673 ; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
2674 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2675 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
2676 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3]
2677 ; GFX9-NEXT: s_endpgm
2679 ; GFX11-LABEL: test_canonicalize_value_f32_denorm:
2681 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
2682 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2683 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2684 ; GFX11-NEXT: global_load_b32 v1, v0, s[0:1]
2685 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2686 ; GFX11-NEXT: v_max_f32_e32 v1, v1, v1
2687 ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
2688 ; GFX11-NEXT: s_nop 0
2689 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2690 ; GFX11-NEXT: s_endpgm
2692 ; GFX12-LABEL: test_canonicalize_value_f32_denorm:
2694 ; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
2695 ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2696 ; GFX12-NEXT: s_wait_kmcnt 0x0
2697 ; GFX12-NEXT: global_load_b32 v1, v0, s[0:1]
2698 ; GFX12-NEXT: s_wait_loadcnt 0x0
2699 ; GFX12-NEXT: v_max_num_f32_e32 v1, v1, v1
2700 ; GFX12-NEXT: global_store_b32 v0, v1, s[2:3]
2701 ; GFX12-NEXT: s_nop 0
2702 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2703 ; GFX12-NEXT: s_endpgm
2704 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
2705 %gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
2706 %v = load float, ptr addrspace(1) %gep, align 4
2707 %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
2708 %gep2 = getelementptr inbounds float, ptr addrspace(1) %out, i32 %id
2709 store float %canonicalized, ptr addrspace(1) %gep2, align 4
2713 ; FIXME: Conversion to float should count as the canonicalize pre-gfx8
2714 define amdgpu_kernel void @test_canonicalize_value_f16_denorm(ptr addrspace(1) %arg, ptr addrspace(1) %out) #3 {
2715 ; GFX6-LABEL: test_canonicalize_value_f16_denorm:
2717 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2718 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v0
2719 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
2720 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
2721 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v2
2722 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2723 ; GFX6-NEXT: flat_load_ushort v0, v[0:1]
2724 ; GFX6-NEXT: v_mov_b32_e32 v1, s3
2725 ; GFX6-NEXT: s_waitcnt vmcnt(0)
2726 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
2727 ; GFX6-NEXT: v_mul_f32_e32 v0, 1.0, v0
2728 ; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v0
2729 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s2, v2
2730 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2731 ; GFX6-NEXT: flat_store_short v[0:1], v3
2732 ; GFX6-NEXT: s_endpgm
2734 ; GFX8-LABEL: test_canonicalize_value_f16_denorm:
2736 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2737 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 1, v0
2738 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
2739 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
2740 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
2741 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2742 ; GFX8-NEXT: flat_load_ushort v0, v[0:1]
2743 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
2744 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2745 ; GFX8-NEXT: v_max_f16_e32 v3, v0, v0
2746 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2
2747 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2748 ; GFX8-NEXT: flat_store_short v[0:1], v3
2749 ; GFX8-NEXT: s_endpgm
2751 ; GFX9-LABEL: test_canonicalize_value_f16_denorm:
2753 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2754 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
2755 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2756 ; GFX9-NEXT: global_load_ushort v1, v0, s[0:1]
2757 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2758 ; GFX9-NEXT: v_max_f16_e32 v1, v1, v1
2759 ; GFX9-NEXT: global_store_short v0, v1, s[2:3]
2760 ; GFX9-NEXT: s_endpgm
2762 ; GFX11-LABEL: test_canonicalize_value_f16_denorm:
2764 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
2765 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0
2766 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2767 ; GFX11-NEXT: global_load_u16 v1, v0, s[0:1]
2768 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2769 ; GFX11-NEXT: v_max_f16_e32 v1, v1, v1
2770 ; GFX11-NEXT: global_store_b16 v0, v1, s[2:3]
2771 ; GFX11-NEXT: s_nop 0
2772 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2773 ; GFX11-NEXT: s_endpgm
2775 ; GFX12-LABEL: test_canonicalize_value_f16_denorm:
2777 ; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
2778 ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 1, v0
2779 ; GFX12-NEXT: s_wait_kmcnt 0x0
2780 ; GFX12-NEXT: global_load_u16 v1, v0, s[0:1]
2781 ; GFX12-NEXT: s_wait_loadcnt 0x0
2782 ; GFX12-NEXT: v_max_num_f16_e32 v1, v1, v1
2783 ; GFX12-NEXT: global_store_b16 v0, v1, s[2:3]
2784 ; GFX12-NEXT: s_nop 0
2785 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2786 ; GFX12-NEXT: s_endpgm
2787 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
2788 %gep = getelementptr inbounds half, ptr addrspace(1) %arg, i32 %id
2789 %v = load half, ptr addrspace(1) %gep, align 2
2790 %canonicalized = tail call half @llvm.canonicalize.f16(half %v)
2791 %gep2 = getelementptr inbounds half, ptr addrspace(1) %out, i32 %id
2792 store half %canonicalized, ptr addrspace(1) %gep2, align 2
2798 define amdgpu_kernel void @test_canonicalize_value_v2f16_denorm(ptr addrspace(1) %arg, ptr addrspace(1) %out) #3 {
2799 ; GFX6-LABEL: test_canonicalize_value_v2f16_denorm:
2801 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2802 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 2, v0
2803 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
2804 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
2805 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v2
2806 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2807 ; GFX6-NEXT: flat_load_dword v0, v[0:1]
2808 ; GFX6-NEXT: v_mov_b32_e32 v3, s3
2809 ; GFX6-NEXT: s_waitcnt vmcnt(0)
2810 ; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v0
2811 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2812 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0
2813 ; GFX6-NEXT: v_mul_f32_e32 v1, 1.0, v1
2814 ; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1
2815 ; GFX6-NEXT: v_mul_f32_e32 v0, 1.0, v0
2816 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0
2817 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2818 ; GFX6-NEXT: v_or_b32_e32 v4, v1, v0
2819 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s2, v2
2820 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
2821 ; GFX6-NEXT: flat_store_dword v[0:1], v4
2822 ; GFX6-NEXT: s_endpgm
2824 ; GFX8-LABEL: test_canonicalize_value_v2f16_denorm:
2826 ; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2827 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 2, v0
2828 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
2829 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
2830 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v2
2831 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2832 ; GFX8-NEXT: flat_load_dword v0, v[0:1]
2833 ; GFX8-NEXT: v_mov_b32_e32 v1, s3
2834 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2835 ; GFX8-NEXT: v_max_f16_sdwa v3, v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
2836 ; GFX8-NEXT: v_max_f16_e32 v0, v0, v0
2837 ; GFX8-NEXT: v_or_b32_e32 v3, v0, v3
2838 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v2
2839 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2840 ; GFX8-NEXT: flat_store_dword v[0:1], v3
2841 ; GFX8-NEXT: s_endpgm
2843 ; GFX9-LABEL: test_canonicalize_value_v2f16_denorm:
2845 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
2846 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2847 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2848 ; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
2849 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2850 ; GFX9-NEXT: v_pk_max_f16 v1, v1, v1
2851 ; GFX9-NEXT: global_store_dword v0, v1, s[2:3]
2852 ; GFX9-NEXT: s_endpgm
2854 ; GFX11-LABEL: test_canonicalize_value_v2f16_denorm:
2856 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
2857 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2858 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2859 ; GFX11-NEXT: global_load_b32 v1, v0, s[0:1]
2860 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2861 ; GFX11-NEXT: v_pk_max_f16 v1, v1, v1
2862 ; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
2863 ; GFX11-NEXT: s_nop 0
2864 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2865 ; GFX11-NEXT: s_endpgm
2867 ; GFX12-LABEL: test_canonicalize_value_v2f16_denorm:
2869 ; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
2870 ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
2871 ; GFX12-NEXT: s_wait_kmcnt 0x0
2872 ; GFX12-NEXT: global_load_b32 v1, v0, s[0:1]
2873 ; GFX12-NEXT: s_wait_loadcnt 0x0
2874 ; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1
2875 ; GFX12-NEXT: global_store_b32 v0, v1, s[2:3]
2876 ; GFX12-NEXT: s_nop 0
2877 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2878 ; GFX12-NEXT: s_endpgm
2879 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
2880 %gep = getelementptr inbounds <2 x half>, ptr addrspace(1) %arg, i32 %id
2881 %v = load <2 x half>, ptr addrspace(1) %gep, align 4
2882 %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v)
2883 %gep2 = getelementptr inbounds <2 x half>, ptr addrspace(1) %out, i32 %id
2884 store <2 x half> %canonicalized, ptr addrspace(1) %gep2, align 2
2888 define amdgpu_kernel void @v_test_canonicalize_var_v2f64(ptr addrspace(1) %out) #1 {
2889 ; GFX6-LABEL: v_test_canonicalize_var_v2f64:
2891 ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2892 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 4, v0
2893 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
2894 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
2895 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v0
2896 ; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2897 ; GFX6-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
2898 ; GFX6-NEXT: v_mov_b32_e32 v5, s1
2899 ; GFX6-NEXT: v_mov_b32_e32 v4, s0
2900 ; GFX6-NEXT: s_waitcnt vmcnt(0)
2901 ; GFX6-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
2902 ; GFX6-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2903 ; GFX6-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2904 ; GFX6-NEXT: s_endpgm
2906 ; GFX8-LABEL: v_test_canonicalize_var_v2f64:
2908 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2909 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 4, v0
2910 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
2911 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
2912 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0
2913 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
2914 ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
2915 ; GFX8-NEXT: v_mov_b32_e32 v5, s1
2916 ; GFX8-NEXT: v_mov_b32_e32 v4, s0
2917 ; GFX8-NEXT: s_waitcnt vmcnt(0)
2918 ; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
2919 ; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2920 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
2921 ; GFX8-NEXT: s_endpgm
2923 ; GFX9-LABEL: v_test_canonicalize_var_v2f64:
2925 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
2926 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 4, v0
2927 ; GFX9-NEXT: v_mov_b32_e32 v4, 0
2928 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2929 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
2930 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2931 ; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
2932 ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2933 ; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
2934 ; GFX9-NEXT: s_endpgm
2936 ; GFX11-LABEL: v_test_canonicalize_var_v2f64:
2938 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
2939 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0
2940 ; GFX11-NEXT: v_mov_b32_e32 v4, 0
2941 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2942 ; GFX11-NEXT: global_load_b128 v[0:3], v0, s[0:1]
2943 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2944 ; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
2945 ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
2946 ; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1]
2947 ; GFX11-NEXT: s_nop 0
2948 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2949 ; GFX11-NEXT: s_endpgm
2951 ; GFX12-LABEL: v_test_canonicalize_var_v2f64:
2953 ; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
2954 ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 4, v0
2955 ; GFX12-NEXT: v_mov_b32_e32 v4, 0
2956 ; GFX12-NEXT: s_wait_kmcnt 0x0
2957 ; GFX12-NEXT: global_load_b128 v[0:3], v0, s[0:1]
2958 ; GFX12-NEXT: s_wait_loadcnt 0x0
2959 ; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
2960 ; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
2961 ; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1]
2962 ; GFX12-NEXT: s_nop 0
2963 ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2964 ; GFX12-NEXT: s_endpgm
2965 %tid = call i32 @llvm.amdgcn.workitem.id.x()
2966 %gep = getelementptr <2 x double>, ptr addrspace(1) %out, i32 %tid
2967 %val = load <2 x double>, ptr addrspace(1) %gep
2968 %canonicalized = call <2 x double> @llvm.canonicalize.v2f64(<2 x double> %val)
2969 store <2 x double> %canonicalized, ptr addrspace(1) %out
2974 define <2 x float> @v_test_canonicalize_v2f32_flush(<2 x float> %arg) #1 {
2975 ; GFX678-LABEL: v_test_canonicalize_v2f32_flush:
2977 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2978 ; GFX678-NEXT: v_mul_f32_e32 v0, 1.0, v0
2979 ; GFX678-NEXT: v_mul_f32_e32 v1, 1.0, v1
2980 ; GFX678-NEXT: s_setpc_b64 s[30:31]
2982 ; GFX9-LABEL: v_test_canonicalize_v2f32_flush:
2984 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2985 ; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
2986 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
2987 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2989 ; GFX11-LABEL: v_test_canonicalize_v2f32_flush:
2991 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2992 ; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
2993 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2995 ; GFX12-LABEL: v_test_canonicalize_v2f32_flush:
2997 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
2998 ; GFX12-NEXT: s_wait_expcnt 0x0
2999 ; GFX12-NEXT: s_wait_samplecnt 0x0
3000 ; GFX12-NEXT: s_wait_bvhcnt 0x0
3001 ; GFX12-NEXT: s_wait_kmcnt 0x0
3002 ; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
3003 ; GFX12-NEXT: s_setpc_b64 s[30:31]
3004 %canon = call <2 x float> @llvm.canonicalize.v2f32(<2 x float> %arg)
3005 ret <2 x float> %canon
3009 define <3 x float> @v_test_canonicalize_v3f32_flush(<3 x float> %arg) #1 {
3010 ; GFX678-LABEL: v_test_canonicalize_v3f32_flush:
3012 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3013 ; GFX678-NEXT: v_mul_f32_e32 v0, 1.0, v0
3014 ; GFX678-NEXT: v_mul_f32_e32 v1, 1.0, v1
3015 ; GFX678-NEXT: v_mul_f32_e32 v2, 1.0, v2
3016 ; GFX678-NEXT: s_setpc_b64 s[30:31]
3018 ; GFX9-LABEL: v_test_canonicalize_v3f32_flush:
3020 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3021 ; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
3022 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
3023 ; GFX9-NEXT: v_max_f32_e32 v2, v2, v2
3024 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3026 ; GFX11-LABEL: v_test_canonicalize_v3f32_flush:
3028 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3029 ; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
3030 ; GFX11-NEXT: v_max_f32_e32 v2, v2, v2
3031 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3033 ; GFX12-LABEL: v_test_canonicalize_v3f32_flush:
3035 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
3036 ; GFX12-NEXT: s_wait_expcnt 0x0
3037 ; GFX12-NEXT: s_wait_samplecnt 0x0
3038 ; GFX12-NEXT: s_wait_bvhcnt 0x0
3039 ; GFX12-NEXT: s_wait_kmcnt 0x0
3040 ; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
3041 ; GFX12-NEXT: v_max_num_f32_e32 v2, v2, v2
3042 ; GFX12-NEXT: s_setpc_b64 s[30:31]
3043 %canon = call <3 x float> @llvm.canonicalize.v3f32(<3 x float> %arg)
3044 ret <3 x float> %canon
3048 define <4 x float> @v_test_canonicalize_v4f32_flush(<4 x float> %arg) #1 {
3049 ; GFX678-LABEL: v_test_canonicalize_v4f32_flush:
3051 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3052 ; GFX678-NEXT: v_mul_f32_e32 v0, 1.0, v0
3053 ; GFX678-NEXT: v_mul_f32_e32 v1, 1.0, v1
3054 ; GFX678-NEXT: v_mul_f32_e32 v2, 1.0, v2
3055 ; GFX678-NEXT: v_mul_f32_e32 v3, 1.0, v3
3056 ; GFX678-NEXT: s_setpc_b64 s[30:31]
3058 ; GFX9-LABEL: v_test_canonicalize_v4f32_flush:
3060 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3061 ; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
3062 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
3063 ; GFX9-NEXT: v_max_f32_e32 v2, v2, v2
3064 ; GFX9-NEXT: v_max_f32_e32 v3, v3, v3
3065 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3067 ; GFX11-LABEL: v_test_canonicalize_v4f32_flush:
3069 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3070 ; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
3071 ; GFX11-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
3072 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3074 ; GFX12-LABEL: v_test_canonicalize_v4f32_flush:
3076 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
3077 ; GFX12-NEXT: s_wait_expcnt 0x0
3078 ; GFX12-NEXT: s_wait_samplecnt 0x0
3079 ; GFX12-NEXT: s_wait_bvhcnt 0x0
3080 ; GFX12-NEXT: s_wait_kmcnt 0x0
3081 ; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
3082 ; GFX12-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
3083 ; GFX12-NEXT: s_setpc_b64 s[30:31]
3084 %canon = call <4 x float> @llvm.canonicalize.v4f32(<4 x float> %arg)
3085 ret <4 x float> %canon
3089 define <8 x float> @v_test_canonicalize_v8f32_flush(<8 x float> %arg) #1 {
3090 ; GFX678-LABEL: v_test_canonicalize_v8f32_flush:
3092 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3093 ; GFX678-NEXT: v_mul_f32_e32 v0, 1.0, v0
3094 ; GFX678-NEXT: v_mul_f32_e32 v1, 1.0, v1
3095 ; GFX678-NEXT: v_mul_f32_e32 v2, 1.0, v2
3096 ; GFX678-NEXT: v_mul_f32_e32 v3, 1.0, v3
3097 ; GFX678-NEXT: v_mul_f32_e32 v4, 1.0, v4
3098 ; GFX678-NEXT: v_mul_f32_e32 v5, 1.0, v5
3099 ; GFX678-NEXT: v_mul_f32_e32 v6, 1.0, v6
3100 ; GFX678-NEXT: v_mul_f32_e32 v7, 1.0, v7
3101 ; GFX678-NEXT: s_setpc_b64 s[30:31]
3103 ; GFX9-LABEL: v_test_canonicalize_v8f32_flush:
3105 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3106 ; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
3107 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
3108 ; GFX9-NEXT: v_max_f32_e32 v2, v2, v2
3109 ; GFX9-NEXT: v_max_f32_e32 v3, v3, v3
3110 ; GFX9-NEXT: v_max_f32_e32 v4, v4, v4
3111 ; GFX9-NEXT: v_max_f32_e32 v5, v5, v5
3112 ; GFX9-NEXT: v_max_f32_e32 v6, v6, v6
3113 ; GFX9-NEXT: v_max_f32_e32 v7, v7, v7
3114 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3116 ; GFX11-LABEL: v_test_canonicalize_v8f32_flush:
3118 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3119 ; GFX11-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
3120 ; GFX11-NEXT: v_dual_max_f32 v2, v2, v2 :: v_dual_max_f32 v3, v3, v3
3121 ; GFX11-NEXT: v_dual_max_f32 v4, v4, v4 :: v_dual_max_f32 v5, v5, v5
3122 ; GFX11-NEXT: v_dual_max_f32 v6, v6, v6 :: v_dual_max_f32 v7, v7, v7
3123 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3125 ; GFX12-LABEL: v_test_canonicalize_v8f32_flush:
3127 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
3128 ; GFX12-NEXT: s_wait_expcnt 0x0
3129 ; GFX12-NEXT: s_wait_samplecnt 0x0
3130 ; GFX12-NEXT: s_wait_bvhcnt 0x0
3131 ; GFX12-NEXT: s_wait_kmcnt 0x0
3132 ; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v0 :: v_dual_max_num_f32 v1, v1, v1
3133 ; GFX12-NEXT: v_dual_max_num_f32 v2, v2, v2 :: v_dual_max_num_f32 v3, v3, v3
3134 ; GFX12-NEXT: v_dual_max_num_f32 v4, v4, v4 :: v_dual_max_num_f32 v5, v5, v5
3135 ; GFX12-NEXT: v_dual_max_num_f32 v6, v6, v6 :: v_dual_max_num_f32 v7, v7, v7
3136 ; GFX12-NEXT: s_setpc_b64 s[30:31]
3137 %canon = call <8 x float> @llvm.canonicalize.v8f32(<8 x float> %arg)
3138 ret <8 x float> %canon
3141 define <2 x double> @v_test_canonicalize_v2f64(<2 x double> %arg) #1 {
3142 ; GFX678-LABEL: v_test_canonicalize_v2f64:
3144 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3145 ; GFX678-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
3146 ; GFX678-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
3147 ; GFX678-NEXT: s_setpc_b64 s[30:31]
3149 ; GFX9-LABEL: v_test_canonicalize_v2f64:
3151 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3152 ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
3153 ; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
3154 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3156 ; GFX11-LABEL: v_test_canonicalize_v2f64:
3158 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3159 ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
3160 ; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
3161 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3163 ; GFX12-LABEL: v_test_canonicalize_v2f64:
3165 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
3166 ; GFX12-NEXT: s_wait_expcnt 0x0
3167 ; GFX12-NEXT: s_wait_samplecnt 0x0
3168 ; GFX12-NEXT: s_wait_bvhcnt 0x0
3169 ; GFX12-NEXT: s_wait_kmcnt 0x0
3170 ; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
3171 ; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
3172 ; GFX12-NEXT: s_setpc_b64 s[30:31]
3173 %canon = call <2 x double> @llvm.canonicalize.v2f64(<2 x double> %arg)
3174 ret <2 x double> %canon
3177 define <3 x double> @v_test_canonicalize_v3f64(<3 x double> %arg) #1 {
3178 ; GFX678-LABEL: v_test_canonicalize_v3f64:
3180 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3181 ; GFX678-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
3182 ; GFX678-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
3183 ; GFX678-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
3184 ; GFX678-NEXT: s_setpc_b64 s[30:31]
3186 ; GFX9-LABEL: v_test_canonicalize_v3f64:
3188 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3189 ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
3190 ; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
3191 ; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
3192 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3194 ; GFX11-LABEL: v_test_canonicalize_v3f64:
3196 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3197 ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
3198 ; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
3199 ; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
3200 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3202 ; GFX12-LABEL: v_test_canonicalize_v3f64:
3204 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
3205 ; GFX12-NEXT: s_wait_expcnt 0x0
3206 ; GFX12-NEXT: s_wait_samplecnt 0x0
3207 ; GFX12-NEXT: s_wait_bvhcnt 0x0
3208 ; GFX12-NEXT: s_wait_kmcnt 0x0
3209 ; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
3210 ; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
3211 ; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5]
3212 ; GFX12-NEXT: s_setpc_b64 s[30:31]
3213 %canon = call <3 x double> @llvm.canonicalize.v3f64(<3 x double> %arg)
3214 ret <3 x double> %canon
3217 define <4 x double> @v_test_canonicalize_v4f64(<4 x double> %arg) #1 {
3218 ; GFX678-LABEL: v_test_canonicalize_v4f64:
3220 ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3221 ; GFX678-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
3222 ; GFX678-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
3223 ; GFX678-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
3224 ; GFX678-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
3225 ; GFX678-NEXT: s_setpc_b64 s[30:31]
3227 ; GFX9-LABEL: v_test_canonicalize_v4f64:
3229 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3230 ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
3231 ; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
3232 ; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
3233 ; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
3234 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3236 ; GFX11-LABEL: v_test_canonicalize_v4f64:
3238 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3239 ; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
3240 ; GFX11-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
3241 ; GFX11-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
3242 ; GFX11-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
3243 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3245 ; GFX12-LABEL: v_test_canonicalize_v4f64:
3247 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
3248 ; GFX12-NEXT: s_wait_expcnt 0x0
3249 ; GFX12-NEXT: s_wait_samplecnt 0x0
3250 ; GFX12-NEXT: s_wait_bvhcnt 0x0
3251 ; GFX12-NEXT: s_wait_kmcnt 0x0
3252 ; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
3253 ; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
3254 ; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[4:5]
3255 ; GFX12-NEXT: v_max_num_f64_e32 v[6:7], v[6:7], v[6:7]
3256 ; GFX12-NEXT: s_setpc_b64 s[30:31]
3257 %canon = call <4 x double> @llvm.canonicalize.v4f64(<4 x double> %arg)
3258 ret <4 x double> %canon
3261 attributes #0 = { nounwind readnone }
3262 attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
3263 attributes #2 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
3264 attributes #3 = { nounwind "denormal-fp-math"="ieee,ieee" }
3265 attributes #4 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
3266 attributes #5 = { nounwind "denormal-fp-math-f32"="dynamic,dynamic" }
3267 attributes #6 = { nounwind "denormal-fp-math-f32"="dynamic,ieee" }
3268 attributes #7 = { nounwind "denormal-fp-math-f32"="ieee,dynamic" }